Unique string to Python Dictonary and then dump as yaml - python

The following output string is from a specific program and gets dumped in a text file. This more or less looks like python dictionary but isn't. [BTW. this is just a basic example but it might complex with multi layer dict]
p_d: '{a:3, what:3.6864e-05, s:"lion", sst:'{c:-20, b:6, p:"panther"}}
First I wanted to convert this string output to a python dictionary format, so that I can then dump it into an yaml output file.
Tried like ast.literal_eval or json.loads or yaml.load, but wasn't successful. Wondering if there is any better custom way to convert this to python dictionary.
Update:
Tried the scenario which #Anthon provide and worked fine for one of the case scenarios. Thanks a lot for that.
And when tried with slightly complex scenario, faced the error.
import sys
import json
import ruamel.yaml
#Case 1
#dit = "{p_d: '{a:3, what:3.6864e-05, s:lion, sst:'{c:-20, b:6, p:panther}}}"
#Case 2
dit = "'{p_d: '{a:3, what:3.6864e-05, s:lion, vec_mode:'{2.5, -2.9, 3.4, 5.6, -8.9, -5.67, 2, 2, 2, 2, 5.4, 2, 2, 6.545, 2, 2}, sst:'{c:-20, b:6, p:panther}}}"
print(dit)
yaml_str = dit.replace('"', '').replace("'",'').replace(':', ': ')
print(yaml_str)
print('#### full block style')
yaml = ruamel.yaml.YAML(typ='safe') #
yaml.default_flow_style = False
data = yaml.load(yaml_str)
yaml.dump(data, sys.stdout)
Output:
'{p_d: '{a:3, what:3.6864e-05, s:lion, vec_mode:'{2.5, -2.9, 3.4, 5.6, -8.9, -5.67, 2, 2, 2, 2, 5.4, 2, 2, 6.545, 2, 2}, sst:'{c:-20, b:6, p:panther}}}
{p_d: {a: 3, what: 3.6864e-05, s: lion, vec_mode: {2.5, -2.9, 3.4, 5.6, -8.9, -5.67, 2, 2, 2, 2, 5.4, 2, 2, 6.545, 2, 2}, sst: {c: -20, b: 6, p: panther}}}
#### full block style
Traceback (most recent call last):
File "./ditoyaml_ruamel.py", line 24, in <module>
data = yaml.load(yaml_str)
File "python3.6/site-packages/ruamel/yaml/main.py", line 434, in load
return constructor.get_single_data()
File "python3.6/site-packages/ruamel/yaml/constructor.py", line 122, in get_single_data
return self.construct_document(node)
File "python3.6/site-packages/ruamel/yaml/constructor.py", line 132, in construct_document
for _dummy in generator:
File "python3.6/site-packages/ruamel/yaml/constructor.py", line 722, in construct_yaml_map
value = self.construct_mapping(node)
File "python3.6/site-packages/ruamel/yaml/constructor.py", line 446, in construct_mapping
return BaseConstructor.construct_mapping(self, node, deep=deep)
File "python3.6/site-packages/ruamel/yaml/constructor.py", line 264, in construct_mapping
if self.check_mapping_key(node, key_node, mapping, key, value):
File "python3.6/site-packages/ruamel/yaml/constructor.py", line 295, in check_mapping_key
raise DuplicateKeyError(*args)
ruamel.yaml.constructor.DuplicateKeyError: while constructing a mapping
in "<unicode string>", line 1, column 52
found duplicate key "2" with value "None" (original value: "None")
in "<unicode string>", line 1, column 90
To suppress this check see:
http://yaml.readthedocs.io/en/latest/api.html#duplicate-keys
Update:
Tried to suppress the error as suggested in the link with below code
yaml.allow_duplicate_keys = True
Output:
'{p_d: '{a:3, what:3.6864e-05, s:lion, vec_mode:'{2.5, -2.9, 3.4, 5.6, -8.9, -5.67, 2, 2, 2, 2, 5.4, 2, 2, 6.545, 2, 2}, sst:'{c:-20, b:6, p:panther}}}
{p_d: {a: 3, what: 3.6864e-05, s: lion, vec_mode: {2.5, -2.9, 3.4, 5.6, -8.9, -5.67, 2, 2, 2, 2, 5.4, 2, 2, 6.545, 2, 2}, sst: {c: -20, b: 6, p: panther}}}
#### full block style
p_d:
a: 3
s: lion
sst:
b: 6
c: -20
p: panther
vec_mode:
-8.9: null
-5.67: null
-2.9: null
2: null
2.5: null
3.4: null
5.4: null
5.6: null
6.545: null
what: 3.6864e-05
It was add :null pair to it, but was expecting more like arrays. Something like as shown below. Trying further at my end.
'{p_d: '{a:3, what:3.6864e-05, s:lion, vec_mode:'{2.5, -2.9, 3.4, 5.6, -8.9, -5.67, 2, 2, 2, 2, 5.4, 2, 2, 6.545, 2, 2}, sst:'{c:-20, b:6, p:panther}}}
{p_d: {a: 3, what: 3.6864e-05, s: lion, vec_mode: {2.5, -2.9, 3.4, 5.6, -8.9, -5.67, 2, 2, 2, 2, 5.4, 2, 2, 6.545, 2, 2}, sst: {c: -20, b: 6, p: panther}}}
#### full block style
p_d:
a: 3
s: lion
sst:
b: 6
c: -20
p: panther
vec_mode:
[-8.9,
-5.67,
-2.9,
2,
2.5,
3.4,
5.4,
5.6,
6.545]
what: 3.6864e-05

The second single quote is very strangely placed, making the curly braces before and after it unbalanced.
So this is not a case of loading as-if YAML and then parsing (recursively) the values that look like a mapping/sequence.
On the other hand YAML doesn't need quoting unless the scalars start with, or in some cases contain, special
characters (e.g. ! - tag, :+space - value indicator, & - anchor, etc.). And your input
doesn't seem to have that. So you can try to remove all quotes and make sure a space follows the value indicator (
necessary in YAML unless both key and value are quoted):
import sys
import json
import ruamel.yaml
input_str = """\
p_d: '{a:3, what:3.6864e-05, s:"lion", sst:'{c:-20, b:6, p:"panther"}}
"""
yaml_str = input_str.replace('"', '').replace("'", '').replace(':', ': ')
print('#### preserved flow/block style')
yaml = ruamel.yaml.YAML()
data = yaml.load(yaml_str)
yaml.dump(data, sys.stdout)
print('#### flow/block style not preserved, with leaf-nodes in flow style')
yaml = ruamel.yaml.YAML(typ='safe') # doesn't preserve the flow/block style
data = yaml.load(yaml_str)
yaml.dump(data, sys.stdout)
print('#### full block style')
yaml = ruamel.yaml.YAML(typ='safe') #
yaml.default_flow_style = False
data = yaml.load(yaml_str)
yaml.dump(data, sys.stdout)
print('#### json output, all quoted')
json.dump(data, sys.stdout)
which gives:
#### preserved flow/block style
p_d: {a: 3, what: 3.6864e-05, s: lion, sst: {c: -20, b: 6, p: panther}}
#### flow/block style not preserved, with leaf-nodes in flow style
p_d:
a: 3
s: lion
sst: {b: 6, c: -20, p: panther}
what: 3.6864e-05
#### full block style
p_d:
a: 3
s: lion
sst:
b: 6
c: -20
p: panther
what: 3.6864e-05
#### json output, all quoted
{"p_d": {"a": 3, "what": 3.6864e-05, "s": "lion", "sst": {"c": -20, "b": 6, "p": "panther"}}}
If your real input does have scalars that start with special characters it might be easier to re-add them for these special cases than try to not remove them in the first place.

Related

Convert a frozenset to a dictionary in python

I have the following frozenset:
f_set = [frozenset({8, 14, 15, 18}), frozenset({1, 2, 3, 7, 8}), frozenset({0, 4, 5})]
I need to convert f_set into a dictionary as the following
For the first set, I need the dictionary to have a value of 0.
For the second set, I need the dictionary to have a value of 1.
For the third set, I need the dictionary to have a value of 2.
Now, in case some keys are existed in multiple set, assign a new values to them. In this case 8 existed in both set 1 and set 2, so assign a value of 3.
dict1 = {8:3, 14:0, 15:0, 18:0, 1:1, 2:1, 3:1, 7:1, 0:2, 4:2, 5:2}
Note: my actual f_set contains more than three sets, so I'd like to avoid doing that manually.
You can use dict comprehension with enumerate:
f_set = [frozenset({8, 14, 15, 18}), frozenset({1, 2, 3, 7, 8}), frozenset({0, 4, 5})]
dict1 = {x: i for i, s in enumerate(f_set) for x in s}
print(dict1)
# {8: 1, 18: 0, 14: 0, 15: 0, 1: 1, 2: 1, 3: 1, 7: 1, 0: 2, 4: 2, 5: 2}
Note that, if the sets are not mutually disjoint, some keys will be discarded, since a dict cannot have duplicate keys.
You can simply loop over the frozensets to set each of them in an output dictionary:
output = dict()
for i in range(len(f_set)):
for s in f_set[i]:
output[s] = i
Note although the order may be different from what you have, order shouldn't matter in the dictionary.

How to write a nested python dict to csv with row being key, value, key, value

I have a nested dict that looks like:
{KeyA: {'ItemA': 1, 'ItemB': 2, 'ItemC': 3, 'ItemD': 4, 'ItemE': 5, 'ItemF': 6},
{KeyB: {'ItemR': 2, 'ItemQ': 3, 'ItemG': 4, 'ItemZ': 5, 'ItemX': 6, 'ItemY': 7}
I would like to output this to a csv where the desired row format is:
ItemA, 1, Item B, 2, ItemC, 3, ItemD, 4, ItemE, 5, ItemF, 6
I've managed to get a row that's keys and then another below it with the associated value with the below code:
for item in myDict:
item = myDict[x]
itemVals = item.values()
wr.writerow(item)
wr.writerow(itemVals)
x += 1
I've tried a number of ways of reformatting this and keep running into subscriptable errors every which way I try.
The length of the top level dict could be large, up to 30k nested dicts. The nested dicts are a constant length of 6 key:value pairs, currently.
What's a clean way to achieve this?
Here is an implementation with loops:
myDict = {'KeyA': {'ItemA': 1, 'ItemB': 2, 'ItemC': 3, 'ItemD': 4, 'ItemE': 5, 'ItemF': 6},
'KeyB': {'ItemR': 2, 'ItemQ': 3, 'ItemG': 4, 'ItemZ': 5, 'ItemX': 6, 'ItemY': 7}}
with open("output.csv", "w") as file:
for key in myDict:
for nestedKey in myDict[key]:
file.write(key + "," + str(myDict[key][nestedKey]) + ",")
file.write("\n")
output.csv:
KeyA,1,KeyA,2,KeyA,3,KeyA,4,KeyA,5,KeyA,6,
KeyB,2,KeyB,3,KeyB,4,KeyB,5,KeyB,6,KeyB,7,

Updating key values in dictionaries

I am trying to write code for the following:
The idea is to have a storage/inventory dictionary and then have the key values be reduced by certain household tasks. E.g. cleaning, cooking etc.
This would be the storage dictionary:
cupboard= {"cookies":30,
"coffee":3,
"washingpowder": 5,
"cleaningspray": 5,
'Pasta': 0.5,
'Tomato': 4,
'Beef': 2,
'Potato': 2,
'Flour': 0.2,
'Milk': 1,
"Burger buns": 6}
now this is the code that I wrote to try and reduce one single key's value (idea is that the action "cleaning" reduces the key "cleaning spray" by one cleaning unit = 0.5
cleaning_amount = 0.5
def cleaning(room):
while cupboard["cleaningspray"] <0.5:
cleaned = {key: cupboard.get(key) - cleaning_amount for key in cupboard}
return cupboard
livingroom = 1*cleaning_amount
cleaning(livingroom)
print(cupboard)
but it returns this, which is the same dictionary as before, with no updated values
{'cookies': 30, 'coffee': 3, 'washingpowder': 5, 'cleaningspray': 5, 'Pasta': 0.5, 'Tomato': 4, 'Beef': 2, 'Potato': 2, 'Flour': 0.2, 'Milk': 1, 'Burger buns': 6}
Can anybody help?
Thank you!!
picture attached to see indents etc.
I guess you want to decrease the "cleaningspray" amount depending on the room size (or other factors). I would do it like this:
cleaning_amount = 0.5
def cleaning(cleaning_factor):
if cupboard["cleaningspray"] > 0.5:
# reduce the amount of cleaning spray depending on the cleaning_factor and the global cleaning_amount
cupboard["cleaningspray"] -= cleaning_factor * cleaning_amount
livingroom_cleaning_factor = 1
cleaning(livingroom_cleaning_factor)
print(cupboard)
Output:
{'cookies': 30, 'coffee': 3, 'washingpowder': 5, 'cleaningspray': 4.5, 'Pasta': 0.5, 'Tomato': 4, 'Beef': 2, 'Potato': 2, 'Flour': 0.2, 'Milk': 1, 'Burger buns': 6}
So I believe the reason that the values don't change is because it is being done in a for loop.
e.g.
list_values = [1, 2, 3, 4, 5]
new_variable = [num + 1 for num in list_values]
print("list_values", list_values) # The original list_values variable doesn't change
print("new_variable", new_variable) # This new variable holds the required value
This returns:
list_values [1, 2, 3, 4, 5]
new_variable [2, 3, 4, 5, 6]
So to fix the problem, you can use the 'new_variable'
So, now that the concept is clear (I hope), in your case it would be something like this
def cleaning():
while cupboard["cleaningspray"] > 0.5: # Also here, i beleive you intend to have `>`
#and not `<` in the original code
cleaned = {key: cupboard.get(key) - cleaning_amount for key in cupboard}
return cleaned
We return the 'new_variable' that is cleaned
so it can be assigned to the original dictionary variable as follows if required:
cupboard = cleaning()
EDIT:
Also, as #d-k-bo commented, if you intend to only have the operation carry out once... an if statement would also do the job
if cupboard["cleaningspray"] > 0.5: # Again assuming you intended '>' and not '<'
Otherwise, you should keep the return statement outside the while loop

Python- parse .txt files with multiple dictionaries

I have the following as a .txt file
{"a": 1, "b": 2, "c": 3}
{"d": 4, "e": 5, "f": 6}
{"g": 7, "h": 8, "i": 9}
How can I use python to open the file, and write a comma to separate each dictionary?
I.e. what regular expression can find every instance of "} {" and put a comma there?
(the real file is much larger (~10GB), and this issue prevents the file from being a syntactically correct JSON object that I can parse with json.loads())
You can use str.join with ',' as the delimeter, using a line-by-line file read in a generator expression. Then put [] around the contents to make it valid json.
import json
with open(filename, 'r') as f:
contents = '[' + ','.join(line for line in f) + ']'
data = json.loads(n)
This results in data
[
{'a': 1, 'b': 2, 'c': 3},
{'d': 4, 'e': 5, 'f': 6},
{'g': 7, 'h': 8, 'i': 9}
]

Is it possible to use part of dict key as column name for table?

I’m working on getting quite big set of data analyzed. It is too big to be processed manually so I need to grab and parse it automatically and the problem is that I’m not a programmer at all and this is my first piece of code ever so I may miss something obvious.
The set is 123 fields for each of 2 700 items, not all the items have data for all the fields and some fields have more than one value. Data is accessible via API responding with JSON file limited to 50 items max per call and I pay for each call. OK, going to problem itself:
I managed to get nested JSON flat with flatten module. Keys in JSON and resulting dict does not have unique for item name so resulting file looks like {‘item_1_param_1’ : ‘X’, ‘item_1_param_2’ : ‘Y’, … , ‘item_2700_param_123’ : ‘Z’}.
I’m stuck on this point — I need this dataset to be analyzed as table (in Excel or probably SPSS) but I’m only able to produce a table with 1 column and 2700 row out of it.
What I would be absolutely happy to have is a table like
item_1 item_2 … item_2700
param_1 X Y … K
param_2 L [M, N, O] … P
… … … … …
param_123 N/A Q … Z
The idea of what was done is the next (pseudocode just to illustrate logic):
response = requests.get(url)
output = json.loads(response.text)
flat_json = flatten(output)
(The full code is way bigger as this script is meant to be reused later by people who understand programming even less than me so it is full of checks and warnings plus contains some workarounds to correct mistakes in JSON generated on API side)
So is there a way to extract a part of a dict key (like 'item_1' from 'item_1_param_1') and transform 1-column table to multi-column using this part as a column name and assign correct values to it? Thanx a lot in advance for helping newbie!
This is possible yes. You can get the keys with key_list = list(some_dictionary.keys()) then you can say
for key in key_list:
separated_key_names = key.split('_') #separate key name at "_"
variable1 = " ".join(separated_key_names[0], separated_key_names[1])
variable2 = " ".join(separated_key_names[2], separated_key_names[3])
This will split the key, and make a variable for each name like (item 1, param 1)
You may also be interested in the some_dictionary.items() method. You could use this like
for key, value in some_dictionary:
separated_key_names = key.split('_') #separate key name at "_"
variable1 = " ".join(separated_key_names[0], separated_key_names[1])
variable2 = " ".join(separated_key_names[2], separated_key_names[3])
# Now you have separated the variable names, and the key. You can process how you want
my_info = {(variable1, variable2): value} # for example
I'm not sure how you'd like to reassemble the data, but you should be able to manipulate it however you'd like from here. Leave a comment if something is unclear to you in my post!
How about this:
import re
from itertools import groupby
def identify_item(pair):
return re.search('item_\d+', pair[0]).group()
gb = groupby(flat_json.items(), key=identify_item)
result = {item_id: {param.replace(item_id, '')[1:]: value
for param, value in param_pair}
for item_id, param_pair in gb}
This assumes that your data will be of the format specified in the question; otherwise, some tweaking might be necessary.
Example:
>>> flat_json = {'item_{}_param_{}'.format(i, j): np.random.randint(0, 10) for i in range(1, 11) for j in range(1, 5)}
>>> # apply transformation
>>> result
{'item_1': {'param_1': 2, 'param_2': 2, 'param_3': 8, 'param_4': 9},
'item_2': {'param_1': 0, 'param_2': 2, 'param_3': 8, 'param_4': 7},
'item_3': {'param_1': 3, 'param_2': 7, 'param_3': 6, 'param_4': 7},
'item_4': {'param_1': 0, 'param_2': 9, 'param_3': 0, 'param_4': 4},
'item_5': {'param_1': 5, 'param_2': 1, 'param_3': 2, 'param_4': 9},
'item_6': {'param_1': 9, 'param_2': 5, 'param_3': 0, 'param_4': 0},
'item_7': {'param_1': 4, 'param_2': 7, 'param_3': 4, 'param_4': 2},
'item_8': {'param_1': 2, 'param_2': 8, 'param_3': 5, 'param_4': 7},
'item_9': {'param_1': 5, 'param_2': 4, 'param_3': 1, 'param_4': 8},
'item_10': {'param_1': 4, 'param_2': 0, 'param_3': 3, 'param_4': 0}}

Categories

Resources