Retrieve bulk data from YAML using Python

Retrieve bulk data from YAML using Python - python

I have a yaml file of the form below:
Solution:
- number of solutions: 1
number of solutions displayed: 1
- Gap: None
Status: optimal
Message: bonmin\x3a Optimal
Objective:
objective:
Value: 0.010981105395
Variable:
battery_E[b1,1,1]:
Value: 0.25
battery_E[b1,1,2]:
Value: 0.259912707017
battery_E[b1,2,1]:
Value: 0.120758408109
battery_E[b2,1,1]:
Value: 0.0899999972181
battery_E[b2,2,3]:
Value: 0.198967393893
windfarm_L[w1,2,3]:
Value: 1
windfarm_L[w1,3,1]:
Value: 1
windfarm_L[w1,3,2]:
Value: 1
Using Python27, I would like to import all battery_E values from this YAML file. I know I can iterate over the keys of battery_E dictionary to retrieve them one by one (I am already doing it using PyYAML) but I would like to avoid iterating and do it in one go!

It's not possible "in one go" - there will still be some kind of iteration either way, and that's completely OK.
However, if the memory is a concern, you can load only values of the keys of interest during YAML loading:
from __future__ import print_function
import yaml
KEY = 'battery_E'
class Loader(yaml.SafeLoader):
def __init__(self, stream):
super(Loader, self).__init__(stream)
self.values = []
def compose_mapping_node(self, anchor):
start_event = self.get_event()
tag = start_event.tag
if tag is None or tag == '!':
tag = self.resolve(yaml.MappingNode, None, start_event.implicit)
node = yaml.MappingNode(tag, [],
start_event.start_mark, None,
flow_style=start_event.flow_style)
if anchor is not None:
self.anchors[anchor] = node
while not self.check_event(yaml.MappingEndEvent):
item_key = self.compose_node(node, None)
item_value = self.compose_node(node, item_key)
if (isinstance(item_key, yaml.ScalarNode)
and item_key.value.startswith(KEY)
and item_key.value[len(KEY)] == '['):
self.values.append(self.construct_object(item_value, deep=True))
else:
node.value.append((item_key, item_value))
end_event = self.get_event()
node.end_mark = end_event.end_mark
return node
with open('test.yaml') as f:
loader = Loader(f)
try:
loader.get_single_data()
finally:
loader.dispose()
print(loader.values)
Note however, that this code does not assume anything about the position of battery_E keys in the tree inside the YAML file - it will just load all of their values.

There is no need to retrieve each entry using PyYAML, you can load the data once, and then use Pythons to select the key-value pairs with the following two lines:
data = yaml.safe_load(open('input.yaml'))
kv = {k:v['Value'] for k, v in data['Solution'][1]['Variable'].items() if k.startswith('battery_E')}
after that kv contains:
{'battery_E[b2,2,3]': 0.198967393893, 'battery_E[b1,1,1]': 0.25, 'battery_E[b1,1,2]': 0.259912707017, 'battery_E[b2,1,1]': 0.0899999972181, 'battery_E[b1,2,1]': 0.120758408109}

Related

Modify a loop based on key and value in dictionary

Im new to python
I wrote the code below, to search in a dictionary, do something, clear old items in dictionary and update dictionary with new key and values and break while there is noting to add to dictionary (it is empty), how can I modify my code to do this process?
#since_id - Returns results with an ID greater than
#(that is, more recent than) the specified ID. There are limits to the
#number of Tweets which can be accessed through the API.
# If the limit of Tweets has occured since the since_id,
# the since_id will be forced to the oldest ID available.
# max_id - Returns results with an ID less than (that is, older than)
#or equal to the specified ID.
Dict2 = dict({'#TweeetLorraine':1392217841680764931})
d2 = {}
rep=[]
from tqdm import tqdm
for key, value in tqdm(Dict2.items()):
for i in tweepy.Cursor(api.search,
q='to:{} -filter:retweets"'.format(key),lang="en"
,since_id=value,tweet_mode='extended',
wait_on_rate_limit=True,
wait_on_rate_limit_notify=True).items(50):
if (i.in_reply_to_status_id == value):
rep.append(i)
from pandas.io.json import json_normalize
dfflat = pd.DataFrame()
for tweet in rep:
df_for_tweet = json_normalize(tweet._json)
dfflat=dfflat.append(df_for_tweet,ignore_index=True,sort=True)
d2.update(zip(dfflat["user.screen_name"].tolist(), dfflat["id"].tolist()))
d2 ```

You can use a while loop for that :
#since_id - Returns results with an ID greater than
#(that is, more recent than) the specified ID. There are limits to the
#number of Tweets which can be accessed through the API.
# If the limit of Tweets has occured since the since_id,
# the since_id will be forced to the oldest ID available.
# max_id - Returns results with an ID less than (that is, older than)
#or equal to the specified ID.
Dict2 = dict({'#TweeetLorraine':1392217841680764931})
d2 = {}
rep=[]
from tqdm import tqdm
for key, value in tqdm(Dict2.items()):
for i in tweepy.Cursor(api.search,
q='to:{} -filter:retweets"'.format(key),lang="en"
,since_id=value,tweet_mode='extended',
wait_on_rate_limit=True,
wait_on_rate_limit_notify=True).items(50):
if (i.in_reply_to_status_id == value):
rep.append(i)
from pandas.io.json import json_normalize
dfflat = pd.DataFrame()
for tweet in rep:
df_for_tweet = json_normalize(tweet._json)
dfflat=dfflat.append(df_for_tweet,ignore_index=True,sort=True)
d2.update(zip(dfflat["user.screen_name"].tolist(), dfflat["id"].tolist()))
d2
For your use case, here is roughly the code that does what you describe, there is better ways to do that using map, I let you search for it if you want to know more.
Also, I'm not sure whether you want to completely clear the dict or only clear the current "i", but I think you can modify the following snippet to your true needs
mydict = initial_dict
# while there is something in the dictionary
while mydict:
value_searched = None
for key, value in mydict.items():
for i in tweepy.Cursor(api.search,
q='to:{} -filter:retweets"'.format(key),lang="en"
,since_id=value,tweet_mode='extended',
wait_on_rate_limit=True,
wait_on_rate_limit_notify=True).items(50):
if (i.in_reply_to_status_id == value):
replies3.append(i)
value_searched = i
break
break
# create new dict from value retrieved
mydict = {"#" +value_searched.user.screen_name : value_searched.id_str}
Edit2 :
Using recursivity
def tweepy_stub(key, value):
if key == "TweeetLorraine" and value == 1392217841680764931:
return [
("AlexBC997", 1392385334155956226),
("ChapinDolores", 1392432099945238529),
]
elif key == "AlexBC997" and value == 1392385334155956226:
return [("test", 139238533415595852)]
elif ("ChapinDolores", 1392432099945238529):
return []
def recursive(list_values, nb_recursion):
mydict = {}
if list_values == None or nb_recursion == 0:
return mydict
else:
for name_user, tweet_id in list_values:
mydict[(name_user, tweet_id)] = recursive(
retrieve_direct_reply_stub(name_user, tweet_id), nb_recursion - 1
)
return mydict
class stub_tweepy_answer:
def __init__(self, status_id) -> None:
self.in_reply_to_status_id = status_id
def retrieve_direct_reply_stub(name_user, tweepy_id):
rep = []
d2 = []
return tweepy_stub(name_user, tweepy_id)
def retrieve_direct_reply(name_user, tweet_id):
rep = []
d2 = []
for i in tweepy_stub(name_user, tweet_id):
val = i
if (i.in_reply_to_status_id == tweet_id):
rep.append(i)
from pandas.io.json import json_normalize
dfflat = pd.DataFrame()
for tweet in rep:
df_for_tweet = json_normalize(tweet._json)
dfflat=dfflat.append(df_for_tweet,ignore_index=True,sort=True)
d2.append(zip(dfflat["user.screen_name"].tolist(), dfflat["id"].tolist()))
return d2
#print(retrieve_direct_reply_stub("TweeetLorraine", 1392217841680764931))
elem = [("TweeetLorraine", 1392217841680764931)]
print(recursive(elem, 3))

XML find all attribute values of a tag of a child

I want to get the text value of every child that has one and every attribute value of every child that has one. I can get the text values but I am having trouble getting the attribute values one by one and assigning each to a variable.
I have the following XML file:
<Transactions>
<CardAuthorisation xmlns:xsi="http://...">
<RecType>ADV</RecType>
<AuthId>60874046</AuthId>
<LocalDate>202008010000</LocalDate>
<SettlementDate>202008</SettlementDate>
<Card productid="16" PAN="64256700991593" product="MC" programid="AUST" branchcode="" />
</CardAuthorisation>
</Transactions>
I have the following code:
import xml.etree.ElementTree as et
xFile = "test.XML"
xtree = et.parse(xFile)
xRoot = xtree.getroot()
for cardAuthorisation in xRoot.findall('CardAuthorisation'):
recType = cardAuthorisation.find('./RecType').text
authId = cardAuthorisation.find('./AuthId').text
localDate = cardAuthorisation.find('./LocalDate').text
settlementDate = cardAuthorisation.find('./SettlementDate').text
#here is where I am having trouble with
#pseudocode
for every attribute in Card:
card_productid = #the value of productid if not None else None
.
.
.
branchcode = #the value of branchcode if not None else None
This is my first time working with XML files, I have done a lot of research but none of them matches my use case. Any help would be highly appreciated, thanks in advance.

To get all <Card> tags and each attribute/value of <Card>, you can do:
for c in cardAuthorisation.findall('Card'):
for k, v in c.items():
print(k, v)
Prints:
productid 16
PAN 64256700991593
product MC
programid AUST
branchcode

You can access attributes of the "Card" element as follows:
card = cardAuthorisation.find('./Card')
for key in card.keys():
print(key, card.get(key))

Pytest -Get one item from multiple returned values

I've e2e_te_data.json file which includes my 2 different test points. It means I will have 2 test case data and give the pytest and it will execute 2 different test cases.
`e2e_te_data.json
[{ "dataSource":"dataSource1",
"machineName":"MachineName_X",
},
{` "dataSource":"dataSource2",
"machineName":"MachineName_Y",
}]
--`-------This is my code:
def read_test_data_from_json():
JsonFile = open('..\\e2eTestData.json','r')
h=[]
convertedJsonStr=[]
json_input = JsonFile.read()
parsedJsonStr = json.loads(json_input) # Parse JSON string to Python dict
for i in range(0, len(parsedJsonStr)):
convertedJsonStr.append(json.dumps(parsedJsonStr[i]))
h.append(parsedJsonStr[i]['machineName'])
return convertedJsonStr,h
#pytest.mark.parametrize("convertedJsonStr,h", (read_test_data_from_json()[0],read_test_data_from_json()[1]))
def test_GetFrequencyOfAllToolUsage(convertedJsonStr,h):
objAPI=HTTPMethods()
frequencyOfToolResultFromAPIRequest=objAPI.getFrequencyOfTools(read_test_data_from_json[0])
print(h)
Value of convertedJsonstr variable
I want to get one item of convertedJsonStr and h returned from read_test_data_from_json method when it comes into test_GetFrequencyOfAllToolUsage method. But I see all items of convertedJsonStr and h as image above.

First Item
def read_test_data_from_json():
JsonFile = json.load(open('..\\e2eTestData.json','r'))
# First item
return JsonFile[0], JsonFile[0]["machineName"]
Last item
return JsonFile[-1], JsonFile[-1]["machineName"]
Random item
item = random.choice(JsonFile)
return item, item["machineName"]

get comment during iteration in ruamel.yaml

How can I get the comments when I iterate through the YAML object
yaml = YAML()
with open(path, 'r') as f:
yaml_data = yaml.load(f)
for obj in yaml_data:
# how to get the comments here?
This is the source data (an ansible playbook)
---
- name: gather all complex custom facts using the custom module
hosts: switches
gather_facts: False
connection: local
tasks:
# There is a bug in ansible 2.4.1 which prevents it loading
# playbook/group_vars
- name: ensure we're running a known working version
assert:
that:
- 'ansible_version.major == 2'
- 'ansible_version.minor == 4'
After Anthon comments, this is the way I found to access the comments in the child nodes (needs to be refined):
for idx, obj in enumerate(yaml_data):
for i, item in enumerate(obj.items()):
pprint(yaml_data[i].ca.items)

You did not specify your input, but since your code expects an obj and
not a key, I assume the root level of your YAML is a sequence and not mapping.
If you want to get the comments after each element (i.e nr 1 and the last) you can do:
import ruamel.yaml
yaml_str = """\
- one # nr 1
- two
- three # the last
"""
yaml = ruamel.yaml.YAML()
data = yaml.load(yaml_str)
for idx, obj in enumerate(data):
comment_token = data.ca.items.get(idx)
if comment_token is None:
continue
print(repr(comment_token[0].value))
which gives:
'# nr 1\n'
'# the last\n'
You might want to strip of the leading octothorpe and trailing newline.
Please note that this works with the current version (0.15.61), but
there is no guarantee it might not to change.

Using the example from Anthon as well as an issue in ruamel.yaml on sourceforge, here's a set of methods which should allow you to retrieve (almost - see below) all the comments in your documents:
from ruamel.yaml import YAML
from ruamel.yaml.comments import CommentedMap, CommentedSeq
# set attributes
def get_comments_map(self, key):
coms = []
comments = self.ca.items.get(key)
if comments is None:
return coms
for token in comments:
if token is None:
continue
elif isinstance(token, list):
coms.extend(token)
else:
coms.append(token)
return coms
def get_comments_seq(self, idx):
coms = []
comments = self.ca.items.get(idx)
if comments is None:
return coms
for token in comments:
if token is None:
continue
elif isinstance(token, list):
coms.extend(token)
else:
coms.append(token)
return coms
setattr(CommentedMap, 'get_comments', get_comments_map)
setattr(CommentedSeq, 'get_comments', get_comments_seq)
# load string
yaml_str = """\
- name: gather all complex custom facts using the custom module
hosts: switches
gather_facts: False
connection: local
tasks:
# There is a bug in ansible 2.4.1 which prevents it loading
# playbook/group_vars
- name: ensure we're running a known working version
assert:
that:
- 'ansible_version.major == 2'
- 'ansible_version.minor == 4'
"""
yml = YAML(typ='rt')
data = yml.load(yaml_str)
def walk_data(data):
if isinstance(data, CommentedMap):
for k, v in data.items():
print(k, [ comment.value for comment in data.get_comments(k)])
if isinstance(v, CommentedMap) or isinstance(v, CommentedSeq):
walk_data(v)
elif isinstance(data, CommentedSeq):
for idx, item in enumerate(data):
print(idx, [ comment.value for comment in data.get_comments(idx)])
if isinstance(item, CommentedMap) or isinstance(item, CommentedSeq):
walk_data(item)
walk_data(data)
Here's the output:
0 []
name []
hosts []
gather_facts []
connection []
tasks ['# There is a bug in ansible 2.4.1 which prevents it loading\n', '# playbook/group_vars\n']
0 []
name []
assert []
that []
0 []
1 []
Unfortunately, there are two is one problems that I have encountered which are not covered by this method:
You will notice that there is no leading \n in the comments for tasks. As a result, it is not possible with this method to differentiate between comments which start on the same line as tasks or on the next line. Since the CommentToken.start_mark.line only contains the absolute line of the comment, it might be able to be compared to the line of tasks. But, I have not yet found a way to retrieve the line associated with tasks inside the loaded data.
There does not seem to be a way that I have found yet to retrieve comments at the head of the document. So, any initial comments would need to be discovered using a method other than to retrieve them outside the yaml reader. But, related to problem #1, these head comments are included in the absolute line count of other comments. To add the comments at the head of the document, you need to use [comment.value for comment in data.ca.comment[1] as per this explanation by Anthon.

Python - Getting Attributes From A File of Constants

I have a file of constant variables that I need to query and I am not sure how to go about it.
I have a database query which is returning user names and I need to find the matching user name in the file of constant variables.
The file looks like this:
SALES_MANAGER_01 = {"user_name": "BO01", "password": "password", "attend_password": "BO001",
"csm_password": "SM001", "employee_num": "BOSM001"}
There is just a bunch of users just like the one above.
My function looks like this:
#attr("user_test")
def test_get_user_for_login(self):
application_code = 'BO'
user_from_view = self.select_user_for_login(application_code=application_code)
users = [d['USER'] for d in user_from_view]
user_with_ent = choice(users)
user_wo_ent = user_with_ent[-4:]
password = ""
global_users = dir(gum)
for item in global_users:
if user_wo_ent not in item.__getattr__("user_name"):
user_with_ent = choice(users)
user_wo_ent = user_with_ent[-4:]
else:
password = item.__getattr__("password")
print(user_wo_ent, password)
global_users = dir(gum) is my file of constants. So I know I am doing something wrong since I am getting an attribute error AttributeError: 'str' object has no attribute '__getattr__', I am just not sure how to go about resolving it.

You should reverse your looping as you want to compare each item to your match condition. Also, you have a dictionary, so use it to do some heavy lifting.
You need to add some imports
import re
from ast import literal_eval
I've changed the dir(gum) bit to be this function.
def get_global_users(filename):
gusers = {} # create a global users dict
p_key = re.compile(ur'\b\w*\b') # regex to get first part, e.g.. SALES_MANAGER_01
p_value = re.compile(ur'\{.*\}') # regex to grab everything in {}
with (open(filename)) as f: # open the file and work through it
for line in f: # for each line
gum_key = p_key.match(line) # pull out the key
gum_value = p_value.search(line) # pull out the value
''' Here is the real action. update a dictionary
with the match of gum_key and with match of gum_value'''
gusers[gum_key.group()] = literal_eval(gum_value.group())
return(gusers) # return the dictionary
The bottom of your existing code is replaced with this.
global_users = get_global_users(gum) # assign return to global_users
for key, value in global_users.iteritems(): # walk through all key, value pairs
if value['user_name'] != user_wo_ent:
user_with_ent = choice(users)
user_wo_ent = user_with_ent[-4:]
else:
password = value['password']

So a very simple answer was get the dir of the constants file then parsing over it like so:
global_users = dir(gum)
for item in global_users:
o = gum.__dict__[item]
if type(o) is not dict:
continue
if gum.__dict__[item].get("user_name") == user_wo_ent:
print(user_wo_ent, o.get("password"))
else:
print("User was not in global_user_mappings")

I was able to find the answer by doing the following:
def get_user_for_login(application_code='BO'):
user_from_view = BaseServiceTest().select_user_for_login(application_code=application_code)
users = [d['USER'] for d in user_from_view]
user_with_ent = choice(users)
user_wo_ent = user_with_ent[4:]
global_users = dir(gum)
user_dict = {'user_name': '', 'password': ''}
for item in global_users:
o = gum.__dict__[item]
if type(o) is not dict:
continue
if user_wo_ent == o.get("user_name"):
user_dict['user_name'] = user_wo_ent
user_dict['password'] = o.get("password")
return user_dict

Develop Reference

Python is a programming language that lets you work quickly and integrate systems more effectively.

Retrieve bulk data from YAML using Python - python

Related

Modify a loop based on key and value in dictionary

XML find all attribute values of a tag of a child

Pytest -Get one item from multiple returned values

get comment during iteration in ruamel.yaml

Python - Getting Attributes From A File of Constants

Categories

Resources