Creating nested Json structure with multiple key values in Python from Json - python

My code is as follows:
import json
def reformat(importscompanies):
#print importscompanies
container={}
child=[]
item_dict={}
for name, imports in importscompanies.iteritems():
item_dict['name'] = imports
item_dict['size'] = '500'
child.append(dict(item_dict))
container['name'] = name
container['children'] = child
if __name__ == '__main__':
raw_data = json.load(open('data/bricsinvestorsfirst.json'))
run(raw_data)
def run(raw_data):
raw_data2 = raw_data[0]
the_output = reformat(raw_data2)
My issue is, the code isn't going through the whole file. It's only outputting one entry. Why is this? Am I rewriting something and do I need another dict that appends with every loop?
Also, it seems as though the for loop is going through the iteritems for each dict key. Is there a way to make it pass only once?
The issue is indeed
raw_data2 = raw_data[0]
I ended up creating an iterator to access the dict values.
Thanks.
Lastly, I'm hoping my final Json file looks this way, using the data I provided above:
{'name': u'name', 'children': [{'name': u'500 Startups', 'size': '500'}, {'name': u'AffinityChina', 'size': '500'}]}

Try this. Though your sample input and output data don't really give many clues as to where the "name" fields should come from. I've assumed you wanted the name of the original item in your list.
original_json = json.load(open('data/bricsinvestorsfirst.json'),'r')
response_json = {}
response_json["name"] = "analytics"
# where your children list will go
children = []
size = 500 # or whatever else you want
# For each item in your original list
for item in original_json:
children.append({"name" : item["name"],
"size" : size})
response_json["children"] = children
print json.dumps(response_json,indent=2)

"It's only outputting one entry" because you only select the first dictionary in the JSON file when you say raw_data2 = raw_data[0]
Try something like this as a starting point (I haven't tested/ran it):
import json
def run():
with open('data/bricsinvestorsfirst.json') as input_file:
raw_data = json.load(input_file)
children = []
for item in raw_data:
children.append({
'name': item['name'],
'size': '500'
})
container = {}
container['name'] = 'name'
container['children'] = children
return json.dumps(container)
if __name__ == '__main__':
print run()

Related

How to recreate the tree organization in nested dictionnaries

I've a problem I have been struggling on for some time now. What I need to do is to check things for a large amount of data inside many folders. To keep track of what has been done I wanted to create a yaml file containing the tree organization of my data structure. Thus, the objective is to create nested dictionaries of the folders containing data.
The script I made is working, but it duplicates each folder and I don't know how to call recursively the function to avoid this. Here is the code :
def load_tree_structure_as_dictionnary(current_dict):
for dir_name in current_dict.keys():
lst_sub_dir = [f.path for f in os.scandir(dir_name) if f.is_dir()]
if lst_sub_dir == []:
current_dict[dir_name]['correct_calibration'] = None
else:
for sub_dir in lst_sub_dir:
current_dict[dir_name][sub_dir] = load_tree_structure_as_dictionnary( {sub_dir: {}} )
return current_dict
init_dict = {data_path : {} }
full_dict = load_tree_structure_as_dictionnary(init_dict)
I know the error is in the recursive call, but I can't create a new 'sub_dir' key if there isnt a dictionnary initialized ( hence the {sub_dir : {}} )
Also I am new to writing stackoverflow questions, lmk if something needs to be improved in the syntax.
After changing current_dict[dir_name][sub_dir] = load_tree_structure_as_dictionnary( {sub_dir: {}} ) to current_dict[dir_name].update(load_tree_structure_as_dictionnary( {sub_dir: {}} )) your code will not duplicate the sub_dir.
def load_tree_structure_as_dictionnary(current_dict):
for dir_name in current_dict.keys():
lst_sub_dir = [f.path for f in os.scandir(dir_name) if f.is_dir()]
if lst_sub_dir == []:
current_dict[dir_name]['correct_calibration'] = None
else:
for sub_dir in lst_sub_dir:
current_dict[dir_name].update(load_tree_structure_as_dictionnary( {sub_dir: {}} ))
return current_dict
init_dict = {"venv" : {} }
full_dict = load_tree_structure_as_dictionnary(init_dict)

How parse XML into list of dicts?

Given the sample xml below:
<_Document>
<_Data1> 'foo'
<_SubData1> 'bar1' </_SubData1>
<_SubData2> 'bar2' </_SubData2>
<_SubData3> 'bar3' </_SubData3>
</_Data1>
</_Document>
I want to capture each SubData value and update it with the Data1 value in a dictionary and then append that value to a list. Such that the output would look something like:
[{Data1: 'foo', SubData1: 'bar1'}, {Data1: 'foo', SubData2: 'bar2'}, {Data1: 'foo', SubData3: 'bar3'}]
My code is:
from lxml import etree
import re
new_records = []
for child in root.iter('_Document'): #finding all children with each 'Document' string
for top_data in child.iter(): #iterating through the entirety of each 'Document' sections tags and text.
if "Data" in top_data.tag:
for data in top_data:
rec = {}
if data.text is not None and data.text.isspace() is False: #avoiding NoneTypes and empty data.
g = data.tag.strip("_") #cleaning up the tag
rec[g] = data.text.replace("\n", " ") #cleaning up the value
for b in re.finditer(r'^_SubData', data.tag): #searching through each 'SubData' contained in a given tag.
for subdata in data:
subdict = {}
if subdata.text is not None: #again preventing NoneTypes
z = subdata.tag.strip("_") #tag cleaning
subdict[z] = subdata.text.replace("\n", " ") #text cleaning
rec.update(subdict) #update the data record dictionary with the subdata
new_records.append(rec) #appending to the list
This, unfortunately, outputs:
[{Data1: 'foo', SubData3: 'bar3'}]
As it only updates and appends the final update of the dictionary.
I've tried different varieties of this including initializing a list after the first 'if' statement in the second for loop to append after each loop pass, but that required quite a bit of clean up at the end to get through the nesting it would cause.
I've also tried initializing empty dictionaries outside of the loops to update to preserve the previous updates and append that way.
I'm curious if there is some functionality of lxml that I've missed or a more pythonic approach to get the desired output.
I offered what I think of as a declarative approach in another solution. If you're more comfortable explicitly defining the structure with loops, here's an imperative approach:
from xml.etree import ElementTree as ET
import pprint
new_records = []
document = ET.parse('input.xml').getroot()
for elem in document:
if elem.tag.startswith('_Data'):
data = elem
data_name = data.tag[1:] # skip leading '_'
data_val = data.text.strip()
for elem in data:
if elem.tag.startswith('_SubData'):
subdata = elem
subdata_name = subdata.tag[1:]
subdata_val = subdata.text.strip()
new_records.append(
{data_name: data_val, subdata_name: subdata_val}
)
pprint.pprint(new_records)
The input and output is the same as in my other solution.
You can do this with Python's built-in ElementTree class and its iterparse() method which walks an XML tree and produces a pair of event and element for every step through the tree. We listen for when it starts parsing an element, and if its _Data... or _SubData... we act.
This is a declarative approach, and relies on the fact that _SubData is only a child of _Data, that is, that your very small and simple sample is exactly representative of what you're actually dealing with.
You'll need to manage a little state for the _Data elements, but that's it:
from xml.etree import ElementTree as ET
import pprint
new_records = []
data_name = None
data_val = None
for event, elem in ET.iterparse('input.xml', ['start']):
tag_name = elem.tag[1:] # skip possible leading '_'
if event == 'start' and tag_name.startswith('Data'):
data_name = tag_name
data_val = elem.text.strip()
if event == 'start' and tag_name.startswith('SubData'):
subdata_name = tag_name
subdata_val = elem.text.strip()
record = {
data_name: data_val, subdata_name: subdata_val
}
new_records.append(record)
pprint.pprint(new_records)
I modified your sample, my input.xml:
<_Document>
<_Data1>foo
<_SubData1>bar1</_SubData1>
<_SubData2>bar2</_SubData2>
<_SubData3>bar3</_SubData3>
</_Data1>
<_Data2>FOO
<_SubData1>BAR1</_SubData1>
<_SubData2>BAR2</_SubData2>
<_SubData3>BAR3</_SubData3>
</_Data2>
</_Document>
When I run my script on that input, I get:
[{'Data1': 'foo', 'SubData1': 'bar1'},
{'Data1': 'foo', 'SubData2': 'bar2'},
{'Data1': 'foo', 'SubData3': 'bar3'},
{'Data2': 'FOO', 'SubData1': 'BAR1'},
{'Data2': 'FOO', 'SubData2': 'BAR2'},
{'Data2': 'FOO', 'SubData3': 'BAR3'}]
Consider dictionary comprehension using dictionary merge:
new_records = [
{
**{doc.tag.replace('_', ''): doc.text.strip().replace("'", "")},
**{data.tag.replace('_', ''): data.text.strip().replace("'", "")}
}
for doc in root.iterfind('*')
for data in doc.iterfind('*')
]
new_records
[{'Data1': 'foo', 'SubData1': 'bar1'},
{'Data1': 'foo', 'SubData2': 'bar2'},
{'Data1': 'foo', 'SubData3': 'bar3'}]

Creating a tree structure with dict and lists in Python for QTreeView

First of all Im out of practice and Im trying to create a tree structure in Python 3.8 to feed my QTreeView.
I read the output of zfs (subprocess.check_output) and split the whole stuff to have a list with pool names:
Example list:
pools = ['deadpool','deadpool/Backup','deadpool/Photos','deadpool/Photos/Foobar']
Now I've to convert and sort the whole list by parent/child and it should look like this at the end so I can use it in a QTreeView:
{'deadpool':
{
'Backup': {},
'Photos': {'Foobar': {}},
}
}
I tried it with two for loops but Im just too stupid..
Can someone show me an easy example?
Or is there an easier way in QtTreeView/QTreeWidget itself?
If I understood your question, you want to build a treewidget from a dict ? If it is, this is a very quick example with Pyside2 (I think there is no problem with PyQt5 for the Treewidget).
data = {
'deadpool':{
'Backup': {},
'Photos': {
'Foobar': {}
}
}
}
def build_tree(data=None, parent=None):
for key, value in data.items():
item = QTreeWidgetItem(parent)
item.setText(0, key)
if isinstance(value, dict):
build_tree(data=value, parent=item)
window = QWidget()
layout = QVBoxLayout()
window.setLayout(layout)
treewidget = QTreeWidget()
build_tree(data=data, parent=treewidget)
layout.addWidget(treewidget)
window.show()
If you just want convert your list to the dict in order to use it in the treewidget you can try this:
data = ['deadpool','deadpool/Backup','deadpool/Photos','deadpool/Photos/Foobar']
tree = {}
for path in data: # for each path
node = tree # start from the very top
for level in path.split('/'): # split the path into a list
if level: # if a name is non-empty
node = node.setdefault(level, dict())
# move to the deeper level
# (or create it if unexistent)
print(tree)
This code above is from this topic : Stackoverflow
And now your have a dict to use with the fisrt bloc of code !
I hope it will help you !

serializing a json file from list of list

So i have a list of elements:
elements = [room1, room2, room3]
I also have a list of key/value attributes that each room has:
keys = ["level", "finish1", "finish2"]
values = [["ground", "paint1", "carpet1"],["ground", "paint1", "paint2"], ["second level", "paint1", "paint2"]]
is there a way to serialize this two lists into a json file structured like this:
{'room1': [{'level': 'ground', 'finish1': 'paint1', 'finish2': 'carpet1'}],'room2': [{'level': 'ground', 'finish1': 'paint1', 'finish2': 'paint2'}],'room3': [{'level': 'second level', 'finish1': 'paint1', 'finish2': 'paint2'}]}
I am on this weird platform that doesnt support dictionaries so I created a class for them:
class collection():
def __init__(self,name,key,value):
self.name = name
self.dict = {}
self.dict[key] = value
def __str__(self):
x = str(self.name) + " collection"
for key,value in self.dict.iteritems():
x = x + '\n'+ ' %s= %s ' % (key, value)
return x
then i found a peiece of code that would allow me to create a basic json code from two parallel lists:
def json_list(keys,values):
lst = []
for pn, dn in zip(values, keys):
d = {}
d[dn]=pn
lst.append(d)
return json.dumps(lst)
but this code desnt give me the {room1: [{ ... structure
Any ideas would be great. This software I am working with is based on IronPython2.7
Ok, so the above worked great. I got a great feedback from Comments. I have one more variation that I didnt account for. Sometimes when I try to mix more than singe element type (rooms, columns etc) they might not have the same amount of attributes. For example a room can have (level, finish and finish) while column might have only thickness and material. If i kept it all organized in parallel lists key/value is it possible to modify the definition below:
keys = [[thickness, material],[level,finish,finish]]
values = [[100,paint],[ground,paint,paint]]
elements = [column,room]
How would i need to modify the definition below to make it work? Again I want to export a json file.
I don't know how Python can even work without dictionaries, so please just test this and tell me the error it shows you:
import json
elements = ['r1','r2','r3']
keys = ["level", "finish1", "finish2"]
values = [["ground", "paint1", "carpet1"],["ground", "paint1", "paint2"], ["second level", "paint1", "paint2"]]
d = dict()
for (index, room) in enumerate(elements):
d[room] = dict()
for (index2, key) in enumerate(keys):
d[room][key] = values[index][index2]
print json.dumps(d)
This may work.
#-*- encoding: utf-8 -*-
import json
elements = ["room1", "room2", "room3"]
keys = ["level", "finish1", "finish2"]
values = [["ground", "paint1", "carpet1"],["ground", "paint1", "paint2"], ["second level", "paint1", "paint2"]]
what_i_want = dict((room, [dict(zip(keys, value))])
for room, value in zip(elements, values))
print(json.dumps(what_i_want))

Python json dumps syntax error when appending list of dict

I got two functions that return a list of dictionary and i'm trying to get json to encode it, it works when i try doing it with my first function, but now i'm appending second function with a syntax error of ": expected". I will eventually be appending total of 7 functions that each output a list of dict. Is there a better way of accomplishing this?
import dmidecode
import simplejson as json
def get_bios_specs():
BIOSdict = {}
BIOSlist = []
for v in dmidecode.bios().values():
if type(v) == dict and v['dmi_type'] == 0:
BIOSdict["Name"] = str((v['data']['Vendor']))
BIOSdict["Description"] = str((v['data']['Vendor']))
BIOSdict["BuildNumber"] = str((v['data']['Version']))
BIOSdict["SoftwareElementID"] = str((v['data']['BIOS Revision']))
BIOSdict["primaryBIOS"] = "True"
BIOSlist.append(BIOSdict)
return BIOSlist
def get_board_specs():
MOBOdict = {}
MOBOlist = []
for v in dmidecode.baseboard().values():
if type(v) == dict and v['dmi_type'] == 2:
MOBOdict["Manufacturer"] = str(v['data']['Manufacturer'])
MOBOdict["Model"] = str(v['data']['Product Name'])
MOBOlist.append(MOBOdict)
return MOBOlist
def get_json_dumps():
jsonOBJ = json
#Syntax error is here, i can't use comma to continue adding more, nor + to append.
return jsonOBJ.dumps({'HardwareSpec':{'BIOS': get_bios_specs()},{'Motherboard': get_board_specs()}})
Use multiple items within your nested dictionary.
jsonOBJ.dumps({
'HardwareSpec': {
'BIOS': get_bios_specs(),
'Motherboard': get_board_specs()
}
})
And if you want multiple BIOS items or Motherboard items, just use a list.
...
'HardwareSpec': {
'BIOS': [
get_bios_specs(),
get_uefi_specs()
]
...
}
If you want a more convenient lookup of specs, you can just embed a dict:
jsonOBJ.dumps({'HardwareSpec':{'BIOS': get_bios_specs(),
'Motherboard': get_board_specs()
}
})

Categories

Resources