How to remove # prefix while converting xml to json in Python

How to remove # prefix while converting xml to json in Python - python

I'm trying to convert xml to json in python using xmltodict library. Though, the xml is getting converted to json, before every key in dict, '#' is getting prefixed. Below is the code snippet and sample output:
import xmltodict
import json
with open('response.xml','r') as res_file:
doc = xmltodict.parse(res_file.read())
xml_json_str = json.dumps(doc)
final_json = json.loads(xml_json_str)
Output:
"CustomerInfo": {
"#address": "Bangalore, Karnataka 560034",
"#email": "abc#gmail.com",
"#name": "Sam",
}
How to remove # from all key's at one go?

Finally I found a solution which works like charm. While parsing the xml, set attr_prefix='' to remove all # from keys.
Below changes worked for me:
with open('response.xml','r') as res_file:
doc = xmltodict.parse(res_file.read(), attr_prefix='')

Check this out:
It will remove all the # from all keys be it in any node: I have added one extra note just to show you the example:
def removeAtTheRate(jsonFile,final_json_edited):
if jsonFile != {} and type(jsonFile) == dict:
for i in jsonFile.keys():
final_json_values = {}
for j in jsonFile[i]:
if j[:1] == '#':
final_json_values[j[1:]] = jsonFile[i][j]
if i[:1] == '#':
final_json_edited[i[1:]] = final_json_values
else:
final_json_edited[i] = final_json_values
print(final_json_edited)
doc = {"#CustomerInfo":{"#address": "Bangalore, Karnataka 560034","#email": "abc#gmail.com","#name": "Sam"},"Location":{"#Loc":"Mum"}}
removeAtTheRate(doc,{})
Result:
>> {'Location': {'Loc': 'Mum'}, 'CustomerInfo': {'name': 'Sam', 'address':
'Bangalore, Karnataka 560034', 'email': 'abc#gmail.com'}}

Related

How to add sub item to json in for loop

I am trying to achieve the following json output:
My current code:
#!/usr/bin/env python3.9
import json
complete_lst = []
url_lst = ['https://', 'https://', 'https://']
complete_lst.append({'title': 'Hello'})
for url in url_lst:
complete_lst.append({'watch': {'Season1': {'url': url}}
})
with open("Hello.json", "w") as file:
json.dump(complete_lst, file)
the output json file looks like this :
I want all the urls to be nested under watch->Season1->url key

Try this:
import json
complete_lst = []
url_lst = ['https://', 'https://', 'https://']
complete_lst.append({
'title': 'Hello',
'watch': {'Season1':{"url":[]}}
})
for url in url_lst:
complete_lst[0]["watch"]["Season1"]["url"].append(url)
print(complete_lst)
If your data is static then just do that:
import json
complete_lst = [{
'title': 'Hello',
'watch': {'Season1':{"url":['https://', 'https://', 'https://']}}
}]
print(complete_lst)

Another way of doing this would be to build a dictionary instead of list:
#!/usr/bin/env python3.9
import json
url_lst = ['https://', 'https://', 'https://']
complete_list = {}
complete_list['title'] = "Hello"
complete_list['watch'] = {}
complete_list['watch']['Season1'] = {}
complete_list['watch']['Season1']['urls'] = []
for url in url_lst:
complete_list['watch']['Season1']['urls'].append(url)
with open("Hello.json", "w") as file:
json.dump(complete_list, file)
Note: Here you don't need to access item by their indices and can directly use keys

json to dictionary in python

This is my json file input.
{"Report":{"id":101,"type":"typeA","Replist":[{"rptid":"r001","subrpt":{"subid":74,"subname":"name1","subval":113},"RelsubList":[{"Relid":8,"Relsubdetails":{"Rel_subname":"name8","Rel_Subval":65}},{"Relid":5,"Relsubdetails":{"Rel_subname":"name5","Rel_Subval":40}}],"fldA":30,"fldB":23}]}}
...
I am writing python program to convert the input into the below format in my dictionary.
I am new to python.
Expected output:
out: {"id": "101", "type": "typeA", "rptid": "r001", "subrpt_subid": "74", "subrpt_subname": "name1", "subrpt_subval":"113","Relid":"8","Rel_subname":"name8","Rel_Subval":"65","Relid":"5","Rel_subname":"name5","Rel_Subval":"40","fldA":"30","fldB":"23"
I used the following logic to convert the output till subrpt.
Current output:
out: {'id': '101', 'type': 'typeA', 'rptid': 'r001', 'subrpt_subid': '74', 'subrpt_subname': 'name1', 'subrpt_subval': '113'}
But I am struggling to get the logic of RelsubList(it looks like it has both list and dictionary[{}] ).
please help me to get the logic for the same.
import json
list1 = []
dict1 = {}
dict2 = {}
data_file = "samp1.json"
file = open(data_file)
for line in file:
json_line = json.loads(line)
json_line = json_line["Report"]
dict1["id"]=str(json_line["id"])
dict1["type"] = str(json_line["type"])
json_line = json_line["Replist"]
dict1["rptid"]= str(json_line[0]["rptid"])
dict1["subrpt_subid"] = str(json_line[0]["subrpt"]["subid"])
dict1["subrpt_subname"] = str(json_line[0]["subrpt"]["subname"])
dict1["subrpt_subval"] = str(json_line[0]["subrpt"]["subval"])
print("out:", dict1)

Some of your logic is confusing to me, i.e. why are you doing json.loads(line) in every loop?
Anyway, the following should get you the logic for RealsubList:
import json
f = open("data.json")
data = json.load(f)
for line in data:
relsublist = data["Report"]["Replist"][0]["RelsubList"]
print(relsublist)
Results in:
[{'Relid': 8, 'Relsubdetails': {'Rel_subname': 'name8', 'Rel_Subval': 65}}, {'Relid': 5, 'Relsubdetails': {'Rel_subname': 'name5', 'Rel_Subval': 40}}]
The reason for the [0] index after ["Replist"] is Replist contains an array of nested dictionaries, so you need to call it out by index. In this case its only a single array, so it would be 0

Dictionary key name from variable

I am trying to create a nested dictionary, whereby the key to each nested dictionary is named from the value from a variable. My end result should look something like this:
data_dict = {
'jane': {'name': 'jane', 'email': 'jane#example.com'},
'jim': {'name': 'jim', 'email': 'jim#example.com'}
}
Here is what I am trying:
data_dict = {}
s = "jane"
data_dict[s][name] = 'jane'
To my surprise, this does not work. Is this possible?

You want something like:
data_dict = {}
s = "jane"
data_dict[s] = {}
data_dict[s]['name'] = s
That should work, though I would recommend instead of a nested dictionary that you use a dictionary of names to either namedtuples or instances of a class.

Try this:
data_dict = {}
s = ["jane", "jim"]
for name in s:
data_dict[name] = {}
data_dict[name]['name'] = name
data_dict[name]['email'] = name + '#example.com'

as #Milad in the comment mentioned, you first need to initialize s as empty dictionary first
data={}
data['Tom']={}
data['Tom']['name'] = 'Tom Marvolo Riddle'
data['Tom']['email'] = 'iamlordvoldermort.com'

For existing dictionaries you can do dict[key] = value although if there is no dict that would raise an error. I think this is the code you want to have:
data_dict = {}
s = "jane"
data_dict[s] = {"name": s, "email": f"{s}#example.com"}
print(data_dict)
I just realized when I got a notification about this question:
data_dict = defaultdict(dict)
data_dict["jane"]["name"] = "jane"
Would be a better answer I think.

Turn a simple dictionary into dictionary with nested lists

Given the following data received from a web form:
for key in request.form.keys():
print key, request.form.getlist(key)
group_name [u'myGroup']
category [u'social group']
creation_date [u'03/07/2013']
notes [u'Here are some notes about the group']
members[0][name] [u'Adam']
members[0][location] [u'London']
members[0][dob] [u'01/01/1981']
members[1][name] [u'Bruce']
members[1][location] [u'Cardiff']
members[1][dob] [u'02/02/1982']
How can I turn it into a dictionary like this? It's eventually going to be used as JSON but as JSON and dictionaries are easily interchanged my goal is just to get to the following structure.
event = {
group_name : 'myGroup',
notes : 'Here are some notes about the group,
category : 'social group',
creation_date : '03/07/2013',
members : [
{
name : 'Adam',
location : 'London',
dob : '01/01/1981'
}
{
name : 'Bruce',
location : 'Cardiff',
dob : '02/02/1982'
}
]
}
Here's what I have managed so far. Using the following list comprehension I can easily make sense of the ordinary fields:
event = [ (key, request.form.getlist(key)[0]) for key in request.form.keys() if key[0:7] != "catches" ]
but I'm struggling with the members list. There can be any number of members. I think I need to separately create a list for them and add that to a dictionary with the non-iterative records. I can get the member data like this:
tmp_members = [(key, request.form.getlist(key)) for key in request.form.keys() if key[0:7]=="members"]
Then I can pull out the list index and field name:
member_arr = []
members_orig = [ (key, request.form.getlist(key)[0]) for key in request.form.keys() if key[0:7] ==
"members" ]
for i in members_orig:
p1 = i[0].index('[')
p2 = i[0].index(']')
members_index = i[0][p1+1:p2]
p1 = i[0].rfind('[')
members_field = i[0][p1+1:-1]
But how do I add this to my data structure. The following won't work because I could be trying to process members[1][name] before members[0][name].
members_arr[int(members_index)] = {members_field : i[1]}
This seems very convoluted. Is there a simper way of doing this, and if not how can I get this working?

You could store the data in a dictionary and then use the json library.
import json
json_data = json.dumps(dict)
print(json_data)
This will print a json string.
Check out the json library here

Yes, convert it to a dictionary, then use json.dumps(), with some optional parameters, to print out the JSON in the format you need:
eventdict = {
'group_name': 'myGroup',
'notes': 'Here are some notes about the group',
'category': 'social group',
'creation_date': '03/07/2013',
'members': [
{'name': 'Adam',
'location': 'London',
'dob': '01/01/1981'},
{'name': 'Bruce',
'location': 'Cardiff',
'dob': '02/02/1982'}
]
}
import json
print json.dumps(eventdict, indent=4)
The order of the key:value pairs is not always consistent, but if you're just looking for pretty-looking JSON that can be parsed by a script, while remaining human-readable, this should work. You can also sort the keys alphabetically, using:
print json.dumps(eventdict, indent=4, sort_keys=True)

The following python functions can be used to create a nested dictionary from the flat dictionary. Just pass in the html form output to decode().
def get_key_name(str):
first_pos = str.find('[')
return str[:first_pos]
def get_subkey_name(str):
'''Used with lists of dictionaries only'''
first_pos = str.rfind('[')
last_pos = str.rfind(']')
return str[first_pos:last_pos+1]
def get_key_index(str):
first_pos = str.find('[')
last_pos = str.find(']')
return str[first_pos:last_pos+1]
def decode(idic):
odic = {} # Initialise an empty dictionary
# Scan all the top level keys
for key in idic:
# Nested entries have [] in their key
if '[' in key and ']' in key:
if key.rfind('[') == key.find('[') and key.rfind(']') == key.find(']'):
print key, 'is a nested list'
key_name = get_key_name(key)
key_index = int(get_key_index(key).replace('[','',1).replace(']','',1))
# Append can't be used because we may not get the list in the correct order.
try:
odic[key_name][key_index] = idic[key][0]
except KeyError: # List doesn't yet exist
odic[key_name] = [None] * (key_index + 1)
odic[key_name][key_index] = idic[key][0]
except IndexError: # List is too short
odic[key_name] = odic[key_name] + ([None] * (key_index - len(odic[key_name]) + 1 ))
# TO DO: This could be a function
odic[key_name][key_index] = idic[key][0]
else:
key_name = get_key_name(key)
key_index = int(get_key_index(key).replace('[','',1).replace(']','',1))
subkey_name = get_subkey_name(key).replace('[','',1).replace(']','',1)
try:
odic[key_name][key_index][subkey_name] = idic[key][0]
except KeyError: # Dictionary doesn't yet exist
print "KeyError"
# The dictionaries must not be bound to the same object
odic[key_name] = [{} for _ in range(key_index+1)]
odic[key_name][key_index][subkey_name] = idic[key][0]
except IndexError: # List is too short
# The dictionaries must not be bound to the same object
odic[key_name] = odic[key_name] + [{} for _ in range(key_index - len(odic[key_name]) + 1)]
odic[key_name][key_index][subkey_name] = idic[key][0]
else:
# This can be added to the output dictionary directly
print key, 'is a simple key value pair'
odic[key] = idic[key][0]
return odic

Ordering with default dict append

I have a Python code, see below, which takes a JSON file in the structure:
{
"name":"Winking Entertainment",
"imports":"Translink Capital"
},
{
"name":"Wochacha",
"imports":"Sequoia Capital"
},
{
"name":"Wuhan Kindstar Diagnostics",
"imports":"Baird Venture Partners"
},
And aggregates repeat values in "imports" and turns the matching strings into a single array for that entry. (see snippet below)
import json
from collections import defaultdict
def map_names_to_imports(raw_data):
name_to_imports = defaultdict(list)
for row in raw_data:
name_to_imports[row['imports']].append(row['name'])
return name_to_imports
def reformat(name_to_imports):
output = []
for name, imports in name_to_imports.items():
new_dict = {
'name': name,
'imports': list(set(imports))
}
output.append(new_dict)
return output
def run(raw_data):
name_to_imports = map_names_to_imports(raw_data)
output = reformat(name_to_imports)
with open('clean-data2.json','wb') as f:
f.write(json.dumps(output))
if __name__ == '__main__':
raw_data = json.load(open('bricinvestors.json'))
run(raw_data)
The issue I am having is my Json file is not coming out the right way.
For some reason, name and imports are getting reversed. So my output looks like:
{"imports": ["SinoHub"], "name": "Iroquois Capital"}, {"imports": ["Qunar.com", "Lashou.com"], "name": "Tenaya Capital"}
In fact, I want to keep the {"name": "string", "imports": "string"} format -- and not the other way around.
What should I do?
Thanks.

If you're using Python 2.7+, you could use collections.OrderedDict as your input to json.loads(), instead of the standard Python dict. The standard library dict class doesn't guarantee the ordering of keys.

Building on dano's answer, you could use the OrderedDict.setdefault method instead of using a defaultdict:
import json
import collections
OrderedDict = collections.OrderedDict
def map_names_to_imports(raw_data):
name_to_imports = OrderedDict()
for row in raw_data:
name_to_imports.setdefault(row['imports'], []).append(row['name'])
return name_to_imports
def reformat(name_to_imports):
output = []
for name, imports in name_to_imports.items():
new_dict = OrderedDict([('name', name),
('imports', list(set(imports)))])
output.append(new_dict)
return output
def run(raw_data):
name_to_imports = map_names_to_imports(raw_data)
output = reformat(name_to_imports)
with open('clean-data2.json', 'wb') as f:
f.write(json.dumps(output))
if __name__ == '__main__':
raw_data = json.load(open('bricinvestors.json'),
object_pairs_hook=OrderedDict)
run(raw_data)

Final version, which is based in large part on #unutbu's answer.
import json
import collections
OrderedDict = collections.OrderedDict
def map_names_to_imports(raw_data):
name_to_imports = OrderedDict()
for row in raw_data:
name_to_imports.setdefault(row['imports'], []).append(row['name'])
return name_to_imports
def reformat(name_to_imports):
the_output = []
for name, imports in name_to_imports.items():
new_dict = OrderedDict([('name', name),
('imports', list(set(imports)))])
the_output.append(new_dict)
return the_output
def run(raw_data):
name_to_imports = map_names_to_imports(raw_data)
the_output = reformat(name_to_imports)
with open('data/clean-data2.json', 'w+', encoding='utf8') as f:
f.write(json.dumps(the_output))
if __name__ == '__main__':
raw_data = json.load(open('data/bricsinvestorsfirst.json'), object_pairs_hook=OrderedDict)
run(raw_data)

Develop Reference

Python is a programming language that lets you work quickly and integrate systems more effectively.

How to remove # prefix while converting xml to json in Python - python

Finally I found a solution which works like charm. While parsing the xml, set attr_prefix='' to remove all # from keys. Below changes worked for me: with open('response.xml','r') as res_file: doc = xmltodict.parse(res_file.read(), attr_prefix='')

Related

How to add sub item to json in for loop

json to dictionary in python

Dictionary key name from variable

Turn a simple dictionary into dictionary with nested lists

Ordering with default dict append

Categories

Resources