convert xml which has 'children' into dictionary

convert xml which has 'children' into dictionary - python

I have a xml file which has childrens and I want to convert it into a dict.
<people>
<type>
<name>lo_123</name>
<country>AUS</country>
<note>
<name>joe</name>
<gender>m</gender>
<age>26</age>
<spouse>
<name>lisa</name>
<gender>f</gender>
</spouse>
</note>
</type>
</people>
This is my code to convert it
import xml.etree.cElementTree as ET
xml='xmltest.xml'
crif_tree = ET.parse(xml)
crif_root = crif_tree.getroot()
data = []
for one in crif_root.findall('.//type'):
reg={e.tag: e.text for e in list(note1)}
data.append(reg)
for two in crif_root.findall('.//type/note'):
reg={e.tag: e.text for e in list(note1)}
data.append(reg)
for three in crif_root.findall('.//type/note/spouse'):
reg={e.tag: e.text for e in list(note1)}
data.append(reg)
print(data)
Here is the output of data
[{'name': 'lo_123', 'country': 'AUS', 'note': '\n '}, {'name': 'joe', 'gender': 'm', 'age': '26', 'spouse': '\n '}, {'name': 'lisa', 'gender': 'f'}]
My desired output would be
[{'name': 'lo_123', 'country': 'AUS', 'note': '\n ', 'name': 'joe', 'gender': 'm', 'age': '26', 'spouse': '\n ', 'name': 'lisa', 'gender': 'f'}]

Related

add key value in nested dictionary

datainput = {'thissong-fav-user:type1-chan-44-John': [{'Song': 'Rock',
'Type': 'Hard',
'Price': '10'}],
'thissong-fav-user:type1-chan-45-kelly-md': [{'Song': 'Rock',
'Type': 'Soft',
'Price': '5'}]}
Outputrequired:
{'thissong-fav-user:type1-chan-44-John': [{key:'Song',Value:'Rock'},
{key:'Type', Value:'Hard'},
{Key: 'Price', Value:'10'}],
'thissong-fav-user:type1-chan-45-kelly-md': [{key:'Song',Value:'Rock'},
{key:'Type', Value:'Soft'},
{Key: 'Price', Value:'5'}]}
I started with below, which gives me an inner nested pattern not sure how I can get the desired output.
temps = [{'Key': key, 'Value': value} for (key, value) in datainput.items()]

Here is how:
datainput = {'thissong-fav-user:type1-chan-44-John': [{'Song': 'Rock',
'Type': 'Hard',
'Price': '10'}],
'thissong-fav-user:type1-chan-45-kelly-md': [{'Song': 'Rock',
'Type': 'Soft',
'Price': '5'}]}
temps = {k:[{'Key':a, 'Value':b}
for a,b in v[0].items()]
for k,v in datainput.items()}
print(datainput)
Output:
{'thissong-fav-user:type1-chan-44-John': [{'Key': 'Song', 'Value': 'Rock'},
{'Key': 'Type', 'Value': 'Hard'},
{'Key': 'Price', 'Value': '10'}],
'thissong-fav-user:type1-chan-45-kelly-md': [{'Key': 'Song', 'Value': 'Rock'},
{'Key': 'Type', 'Value': 'Soft'},
{'Key': 'Price', 'Value': '5'}]}

I believe the way of having taken the input is fine but in order to get the desired output, you got to take the inputs initially, then key-value pair and finally iterate.
datainput = {'thissong-fav-user:type1-chan-44-John': [{'Song': 'Rock',
'Type': 'Hard',
'Price': '10'}],
'thissong-fav-user:type1-chan-45-kelly-md': [{'Song': 'Rock',
'Type': 'Soft',
'Price': '5'}]}
datainput = {k:[{'Key':a, 'Value':b} for a,b in v[0].items()] for k,v in datainput.items()}
print(datainput)
Most probably, you'll get the desired output in this fashion.

Merge two dictionaries based on similarity excluding a key

I have the following three dictionaries in an array:
items = [
{
'FirstName': 'David',
'LastName': 'Smith',
'Language': set(['en'])
},
{
'FirstName': 'David',
'LastName': 'Smith',
'Language': set(['fr'])
},
{
'FirstName': 'Bob',
'LastName': 'Jones',
'Language': set(['en'])
} ]
I want to merge together these dictionaries if the two dictionaries are the same minus the specified key: and add that key together. If using the "Language" key it would merge the array into the following:
[ {
'FirstName': 'David',
'LastName': 'Smith',
'Language': set(['en','fr'])
},{
'FirstName': 'Bob',
'LastName': 'Jones',
'Language': set(['en'])
} ]
Here is what I'm currently doing:
from copy import deepcopy
def _merge_items_on_field(items, field):
'''Given an array of dicts, merge the
dicts together if they are the same except for the 'field'.
If merging dicts, add the unique values of that field together.'''
items = deepcopy(items)
items_merged_on_field = []
for num, item in enumerate(items):
# Remove that key/value from the dict
field_value = item.pop(field)
# Get an array of items *without* that field to compare against
items_without_field = deepcopy(items_merged_on_field)
map(lambda d: d.pop(field), items_without_field)
# If the dict item is found ("else"), add the fields together
# If not ("except"), then add in the dict item to the array
try:
index = items_without_field.index(item)
except ValueError:
item[field] = field_value
items_merged_on_field.append(item)
else:
items_merged_on_field[index][field] = items_merged_on_field[index][field].union(field_value)
return items_merged_on_field
>>> items = [{'LastName': 'Smith', 'Language': set(['en']), 'FirstName': 'David'}, {'LastName': 'Smith', 'Language': set(['fr']), 'FirstName': 'David'}, {'LastName': 'Jones', 'Language': set(['en']), 'FirstName': 'Bob'}]
>>> _merge_items_on_field(items, 'Language')
[{'LastName': 'Smith', 'Language': set(['fr', 'en']), 'FirstName': 'David'}, {'LastName': 'Jones', 'Language': set(['en']), 'FirstName': 'Bob'}]
This seems a bit complicated -- is there a better way to do this?

There are a couple of ways of doing this. The most painless method to my knowledge utilises the pandas library—in particular, a groupby + apply.
import pandas as pd
merged = (
pd.DataFrame(items)
.groupby(['FirstName', 'LastName'], sort=False)
.Language
.apply(lambda x: set.union(*x))
.reset_index()
.to_dict(orient='records')
)
print(merged)
[
{'FirstName': 'David', 'LastName': 'Smith', 'Language': {'en', 'fr'}},
{'FirstName': 'Bob', 'LastName': 'Jones', 'Language': {'en'}}
]
The other method (that I mentioned) uses itertools.groupby, but seeing as you have 30 columns to group on, I'd just recommend sticking to pandas.
If you want to turn this into a function,
def merge(items, field):
df = pd.DataFrame(items)
columns = df.columns.difference([field]).tolist()
return (
df.groupby(columns, sort=False)[field]
.apply(lambda x: set.union(*x))
.reset_index()
.to_dict(orient='records')
)
merged = merge(items, 'Language')
print(merged)
[
{'FirstName': 'David', 'LastName': 'Smith', 'Language': {'en', 'fr'}},
{'FirstName': 'Bob', 'LastName': 'Jones', 'Language': {'en'}}
]

You can use itertools.groupby:
import itertools
d = [{'FirstName': 'David', 'LastName': 'Smith', 'Language': {'en'}}, {'FirstName': 'David', 'LastName': 'Smith', 'Language': {'fr'}}, {'FirstName': 'Bob', 'LastName': 'Jones', 'Language': {'en'}}]
v = [[a, list(b)] for a, b in itertools.groupby(sorted(d, key=lambda x:x['FirstName']), key=lambda x:x['FirstName'])]
final_dict = [{**{'FirstName':a}, **{'LastName':(lambda x:[list(set(x)), x[0]][len(set(x)) == 1])([i['LastName'] for i in b])}, **{'Language':set([list(i['Language'])[0] for i in b])}} for a, b in v]
Output:
[{'FirstName': 'Bob', 'LastName': 'Jones', 'Language': {'en'}}, {'FirstName': 'David', 'LastName': 'Smith', 'Language': {'en', 'fr'}}]

If pandas is not an option:
from itertools import groupby
from functools import reduce
arr = [
{'FirstName': 'David', 'LastName': 'Smith', 'Language': set(['en'])},
{'FirstName': 'David', 'LastName': 'Smith', 'Language': set(['fr'])},
{'FirstName': 'David', 'LastName': 'Jones', 'Language': set(['sp'])}
]
def reduce_field(items, field, op=set.union, sort=False):
def _key(d):
return tuple((k, v) for k, v in d.items() if k != field)
if sort:
items = sorted(items, key=_key)
res = []
for k, g in groupby(items, key=_key):
d = dict(k)
d[field] = reduce(op, (el[field] for el in g))
res.append(d)
return res
reduce_field(arr, 'Language')

You can try it manually :
new_dict={}
#
#
#
d = [{'FirstName': 'David', 'LastName': 'Smith', 'Language': {'en'}},
{'FirstName': 'David', 'LastName': 'Smith', 'Language': {'fr'}},
{'FirstName': 'Bob', 'LastName': 'Jones', 'Language': {'en'}}]
for i in d:
if (i['FirstName'],i['LastName']) not in new_dict:
new_dict[(i['FirstName'],i['LastName'])]=i
else:
new_dict[(i['FirstName'],i['LastName'])]['Language']=set(list(new_dict[(i['FirstName'],i['LastName'])]['Language'])+list(i['Language']))
print(new_dict.values())
output:
# dict_values([{'FirstName': 'Bob',
# 'LastName': 'Jones',
# 'Language': {'en'}},
# {'FirstName': 'David',
# 'LastName': 'Smith',
# 'Language': {'fr', 'en'}}])

python: combine lists to dictionary with header

I want to combine two lists in to one dictionary type.
Name = ['John','Mary','Serena','Felicia']
Data = ['26','179','25','164','29','149','29','167']
desirable output in Json format
{"people":[{'Name': 'John',
'Age': '26',
'Height': '179'},
{'Name': 'Mary',
'Age': '25',
'Height': '164'}
{'Name': 'Serena',
'Age': '29',
'Height': '149'}
{'Name': 'Felicia',
'Age': '29',
'Height': '167'} ]
}
I try to combine with list1 = {k: 'Name' for k in Name} but the dictionary
shown 'John' = 'Name' which is reverse of what I need.

a couple of nested zips & slices do the trick to build dicts in a list comprehension, as value of the outer dict:
Name = ['John','Mary','Serena','Felicia']
Data = ['26','179','25','164','29','149','29','167']
result = {'people':[{'Name' : name, 'Age' : age, 'Height' : height}
for name,(age,height) in zip(Name,zip(Data[::2],Data[1::2]))]}
print(result)
prints:
{'people': [{'Name': 'John', 'Height': '179', 'Age': '26'}, {'Name': 'Mary', 'Height': '164', 'Age': '25'}, {'Name': 'Serena', 'Height': '149', 'Age': '29'}, {'Name': 'Felicia', 'Height': '167', 'Age': '29'}]}
if you don't want to create "hard" slices, use itertools.islice, also would be interesting to convert integer values as integer, which avoids the easy trap of lexicographical compare of integers as strings later on:
from itertools import islice
result = {'people':[{'Name' : name, 'Age' : int(age), 'Height' : int(height)}
for name,age,height in zip(Name,islice(Data,None,None,2),islice(Data,1,None,2))]}
(and also thanks to comments, no need to nest zip statements)
result:
{'people': [{'Height': 179, 'Age': 26, 'Name': 'John'},
{'Height': 164, 'Age': 25, 'Name': 'Mary'},
{'Height': 149, 'Age': 29, 'Name': 'Serena'},
{'Height': 167, 'Age': 29, 'Name': 'Felicia'}]}

Using zip:
Name = ['John','Mary','Serena','Felicia']
Data = ['26','179','25','164','29','149','29','167']
dct = {"people": [{'Name': i, 'Age': j, 'Height': k} for i, j, k in zip(Name, Data[::2], Data[1::2])]}
print(dct)
Output:
{'people': [{'Name': 'John', 'Age': '26', 'Height': '179'}, {'Name': 'Mary', 'Age': '25', 'Height': '164'}, {'Name': 'Serena', 'Age': '29', 'Height': '149'}, {'Name': 'Felicia', 'Age': '29', 'Height': '167'}]}

Here's an interesting approach.
>>> Name = ['John','Mary','Serena','Felicia']
>>> Data = ['26','179','25','164','29','149','29','167']
>>> keys = ['Name', 'Age', 'Height']
>>> it = iter(Data)
>>> {'people':[dict(zip(keys,i)) for i in zip(Name, it, it)]}
Can't make it too much more compact than this.
Ouput
{'people': [{'Name': 'John', 'Age': '26', 'Height': '179'}, {'Name': 'Mary', 'Age': '25', 'Height': '164'}, {'Name': 'Serena', 'Age': '29', 'Height': '149'}, {'Name': 'Felicia', 'Age': '29', 'Height': '167'}]}

How to create a list of dictionaries using for loop?

Textfile:
VIP Room, 10, 250
Executive Room,30, 500
Pool Site, 50, 850
Banquet Hall, 200, 1000
Chamber Hall, 500, 2000
Concert Hall, 1000, 3500
My code so far to read the file and create a list:
def readVenueList():
dic={}
venueList=[]
f=open("venue.txt","r")
for line in f:
line = line.split(",")
print(line)
for i in line:
i.split()
dic["name"]=i[0]
dic["num"]=i[1]
dic["cost"]=i[2]
venueList.append(dic)
return(venueList)
How do I create a list of dictionaries with the following output?
venueList = [{'cost': '250', 'name': 'VIP Room', 'num': '10'},
{'cost': '250', 'name': 'Executive Room', 'num': '30'},
# and so on and so forth...
]

You can simply use the csv reader library to handle this.
import csv
headers = ['name', 'num', 'cost']
with open('venue.txt', 'r') as f:
reader = csv.reader(f)
needed_list = [{headers[i]: row[i].strip() for i in range(3)} for row in reader]

It is very similar to earlier answer by #N M
datablob = u"""VIP Room,10,250
Executive Room,30,500
Pool Site,50,850
Banquet Hall,200,1000
Chamber Hall,500,2000
Concert Hall,1000,3500
"""
from csv import reader
from io import StringIO
def readVenueList(fd):
c = reader(fd)
hdr = ["name", "num", "cost"]
for i in c:
d = {}
for el, v in enumerate(i):
d[hdr[el]] = v
yield d
if __name__ == '__main__':
# replace with file object
# file = open("archive1.csv")
file = StringIO(datablob)
print(list(readVenueList(file)))
# Output
[{'name': 'VIP Room', 'num': '10', 'cost': '250'}, {'name':
'Executive Room', 'num': '30', 'cost': '500'}, {'name': 'Pool
Site', 'num': '50', 'cost': '850'}, {'name': 'Banquet Hall',
'num': '200', 'cost': '1000'}, {'name': 'Chamber Hall', 'num':
'500', 'cost': '2000'}, {'name': 'Concert Hall', 'num': '1000',
'cost': '3500'}]

If you don't want to use a CSV reader (though that's probably the best idea), you could also do this using list/dictionary comprehensions
with open('venue.txt', 'r') as f:
lines = (line.split(',') for line in f)
venues = [
{'name': name.strip(), 'number': int(num), 'cost': int(cost)}
for name, num, cost in lines
]

Here's how to modify your code do it properly (and follow the PEP 8 - Style Guide for Python Code recommendations more closely) :
from pprint import pprint
def readVenueList():
venueList = []
with open("venue.txt", "r") as f:
for line in f:
dic = {}
items = [item.strip() for item in line.split(",")]
dic["name"] = items[0]
dic["num"] = items[1]
dic["cost"] = items[2]
venueList.append(dic)
return venueList
venueList = readVenueList()
pprint(venueList)
Output:
[{'cost': '250', 'name': 'VIP Room', 'num': '10'},
{'cost': '500', 'name': 'Executive Room', 'num': '30'},
{'cost': '850', 'name': 'Pool Site', 'num': '50'},
{'cost': '1000', 'name': 'Banquet Hall', 'num': '200'},
{'cost': '2000', 'name': 'Chamber Hall', 'num': '500'},
{'cost': '3500', 'name': 'Concert Hall', 'num': '1000'}]

Create a key in a nested dict without using update function

Input :
{'Name': 'A','Blood Group': 'O +ve', 'Age': '1', 'Sex': 'M','Phone Number': '01234567', 'Mobile Number': '9876543210', 'Date of Birth': '01-01-95'}
1.
d.update({'Contact Info': {'Mobile Number':d['Mobile Number'],'Phone
Number':d['Phone Number'] }})
2.
d['Contact Info']={}
d['Contact Info']['Mobile Number']=d['Mobile Number']
Can you say any better way or different way to create a dictionary key which can be assigned to a dict item as value???
Original Code:
import csv
import copy
from collections import namedtuple
d={}
ls=[]
def nest():
with open ("details.csv", 'r') as f:
reader=csv.DictReader(f)
for row in reader:
d.update(row)
PersonalDetails = namedtuple('PersonalDetails','blood_group age sex')
ContactInfo = namedtuple('ContactInfo','phone_number mobile_number')
d1=copy.deepcopy(d)
ls.append(d1)
print ls
nest()

This is how I would update my dict of dicts:
I would create a function that will take a 3 arguments(The key of the subdict, the subkey of said subdict and the value you want to change.) I assign to be updated and then update that value.
d = {
'Name': 'A',
'Personal Details': {'Blood Group': 'O +ve', 'Age': '1', 'Sex': 'M'},
'Contact Info': {'Phone Number': '01234567', 'Mobile Number': '9876543210'},
'Date of Birth': '01-01-95'
}
def updateInfo(toBeUpdated, subkey, ValueToUpdate):
if toBeUpdated in d:
tempdict = d[toBeUpdated]
tempdict[subkey] = ValueToUpdate
d[toBeUpdated] = tempdict
print (d)
else:
print ("No %s to update" % (toBeUpdated))
updateInfo('Contact Info','Mobile Number','999 999 9999')
the result I get from this:
{'Name': 'A', 'Personal Details': {'Blood Group': 'O +ve', 'Age': '1', 'Sex': 'M'}, 'Contact Info': {'Phone Number': '01234567', 'Mobile Number': '999 999 9999'}, 'Date of Birth': '01-01-95'}

Develop Reference

Python is a programming language that lets you work quickly and integrate systems more effectively.

convert xml which has 'children' into dictionary - python

Related

add key value in nested dictionary

Merge two dictionaries based on similarity excluding a key

python: combine lists to dictionary with header

How to create a list of dictionaries using for loop?

Create a key in a nested dict without using update function

Categories

Resources