How i make json.loads turn str into int - python

I need '-40' to be -40 on the output, how i can do this?
In [2]: foo
Out[2]: '{\n\t"rssiIntentRange":"-40"\n}'
In [3]: import json
In [4]: json.loads(foo)
Out[4]: {u'rssiIntentRange': u'-40'}
Im searching for something clean and generic. Doing treatments after the json.loads is what im already doing and its very dirty because of all types of data and indexes. If json.loads is not the best option im open to other approaches as well that treats JSON

If you want to influence what happens when you call json.loads(), you need to write an object hook:
import json
def int_please_object_hook(obj):
"""If a value in obj is a string, try to convert it to an int"""
rv = {}
for k, v in obj.items():
if isinstance(v, basestring):
try:
rv[k] = int(v)
except ValueError:
rv[k] = v
else:
rv[k] = v
return rv
j = '{"key1": "value1", "key2": "10", "key3": "-10"}'
print(json.loads(j))
# {'key1': 'value1', 'key2': '10', 'key3': '-10'}
print(json.loads(j, object_hook=int_please_object_hook))
# {'key1': 'value1', 'key2': 10, 'key3': -10}

One way to do this would be to convert the -40 string to int after loading the json.
Example -
>>> foo = '{\n\t"rssiIntentRange":"-40"\n}'
>>> import json
>>> d = json.loads(foo)
>>> d['rssiIntentRange'] = int(d['rssiIntentRange'])
>>> d
{'rssiIntentRange': -40}
For changing all such values inside the loaded dictionary, you can do -
d = json.loads(foo)
for k,v in d.items():
try:
d[k] = int(v)
except (ValueError, TypeError):
pass
Example/Demo -
>>> foo = '{\n\t"rssiIntentRange":"-40",\n\t"blah":"abcd",\n\t"anothernumber":"-10"\n}'
>>> d = json.loads(foo)
>>> for k,v in d.items():
... try:
... d[k] = int(v)
... except (ValueError, TypeError):
... pass
...
>>> d
{'blah': 'abcd', 'anothernumber': -10, 'rssiIntentRange': -40}

Related

Nested and escaped JSON payload to flattened dictionary - python

I'm looking for any suggestions to resolve an issue I'm facing. It might seem as a simple problem, but after a few days trying to find an answer - I think it is not anymore.
I'm receiving data (StringType) in a following JSON-like format, and there is a requirement to turn it into flat key-value pair dictionary. Here is a payload sample:
s = """{"status": "active", "name": "{\"first\": \"John\", \"last\": \"Smith\"}", "street_address": "100 \"Y\" Street"}"""
and the desired output should look like this:
{'status': 'active', 'name_first': 'John', 'name_last': 'Smith', 'street_address': '100 "Y" Street'}
The issue is I can't find a way to turn original string (s) into a dictionary. If I can achieve that the flattening part is working perfectly fine.
import json
import collections
import ast
#############################################################
# Flatten complex structure into a flat dictionary
#############################################################
def flatten_dictionary(dictionary, parent_key=False, separator='_', value_to_str=True):
"""
Turn a nested complex json into a flattened dictionary
:param dictionary: The dictionary to flatten
:param parent_key: The string to prepend to dictionary's keys
:param separator: The string used to separate flattened keys
:param value_to_str: Force all returned values to string type
:return: A flattened dictionary
"""
items = []
for key, value in dictionary.items():
new_key = str(parent_key) + separator + key if parent_key else key
try:
value = json.loads(value)
except BaseException:
value = value
if isinstance(value, collections.MutableMapping):
if not value.items():
items.append((new_key,None))
else:
items.extend(flatten_dictionary(value, new_key, separator).items())
elif isinstance(value, list):
if len(value):
for k, v in enumerate(value):
items.extend(flatten_dictionary({str(k): (str(v) if value_to_str else v)}, new_key).items())
else:
items.append((new_key,None))
else:
items.append((new_key, (str(value) if value_to_str else value)))
return dict(items)
# Data sample; sting and dictionary
s = """{"status": "active", "name": "{\"first\": \"John\", \"last\": \"Smith\"}", "street_address": "100 \"Y\" Street"}"""
d = {"status": "active", "name": "{\"first\": \"John\", \"last\": \"Smith\"}", "street_address": "100 \"Y\" Street"}
# Works for dictionary type
print(flatten_dictionary(d))
# Doesn't work for string type, for any of the below methods
e = eval(s)
# a = ast.literal_eval(s)
# j = json.loads(s)
Try:
import json
import re
def jsonify(s):
s = s.replace('"{','{').replace('}"','}')
s = re.sub(r'street_address":\s+"(.+)"(.+)"(.+)"', r'street_address": "\1\2\3"',s)
return json.loads(s)
If you must keep the quotes around Y, try:
def jsonify(s):
s = s.replace('"{','{').replace('}"','}')
search = re.search(r'street_address":\s+"(.+)"(.+)"(.+)"',s)
if search:
s = re.sub(r'street_address":\s+"(.+)"(.+)"(.+)"', r'street_address": "\1\2\3"',s)
dict_version = json.loads(s)
dict_version['street_address'] = dict_version['street_address'].replace(search.group(2),'"'+search.group(2)+'"')
return dict_version
A more generalized attempt:
def jsonify(s):
pattern = r'(?<=[,}])\s*"(.[^\{\}:,]+?)":\s+"([^\{\}:,]+?)"([^\{\}:,]+?)"([^\{\}:,]+?)"([,\}])'
s = s.replace('"{','{').replace('}"','}')
search = re.search(pattern,s)
matches = []
if search:
matches = re.findall(pattern,s)
s = re.sub(pattern, r'"\1": "\2\3\4"\5',s)
dict_version = json.loads(s)
for match in matches:
dict_version[match[0]] = dict_version[match[0]].replace(match[2],'"'+match[2]+'"')
return dict_version

Parse nested json to csv using Python Pandas

I have a json in below format:
{"MainName":[{"col1":"12345","col2":"False","col3":"190809","SubName1":{"col4":30.00,"SubName2":{"col5":"19703","col6":"USD"}},"col7":"7372267","SubName3":[{"col8":"345337","col9":"PC"}],"col10":"10265","col11":"29889004","col12":"calculated","col13":"9218","SubName4":{"col14":1,"SubName5":{"col15":"1970324","col16":"integer"}},"col17":"434628","col18":"2020-02-06T13:47:40.000-0800","col19":"754878037","SubName6":{"col20":30.00,"SubName7":{"col21":"19703248","col22":"USD"}}},{"col1":"12345","col2":"False","col3":"190809","SubName1":{"col4":30.00,"SubName2":{"col5":"19703","col6":"USD"}},"col7":"7372267","SubName3":[{"col8":"345337","col9":"PC"}],"col10":"10265","col11":"29889004","col12":"calculated","col13":"9218","SubName4":{"col14":1,"SubName5":{"col15":"1970324","col16":"integer"}},"col17":"434628","col18":"2020-02-06T13:47:40.000-0800","col19":"754878037","SubName6":{"col20":30.00,"SubName7":{"col21":"19703248","col22":"USD"}}}],"skip":0,"top":2,"next":"/v1/APIName?skip=2&top=2"}
I want to convert it into csv with below format:
MainName_col1,MainName_col2,MainName_col3,MainName_SubName1_col4,MainName_SubName1_SubName2_col5,MainName_SubName1_SubName2_col6,MainName_col7,MainName_SubName3_col8,MainName_SubName3_col9,MainName_col10,MainName_col11,MainName_col12,MainName_col13,MainName_SubName4_col14,MainName_SubName4_SubName5_col15,MainName_SubName4_SubName5_col16,MainName_col17,MainName_col18,MainName_col19,MainName_SubName6_col20,MainName_SubName6_SubName7_col21,MainName_SubName6_SubName7_col22
12345,False,190809,30.0,19703,USD,7372267,345337,PC,10265,29889004,calculated,9218,1,1970324,integer,434628,2020-02-06T13:47:40.000-0800,754878037,30.0,19703248,USD
12345,False,190809,30.0,19703,USD,7372267,345337,PC,10265,29889004,calculated,9218,2,123453,integer,434628,2020-02-06T13:47:40.000-0800,754878037,30.0,19703248,USD
Kindly help me out in this.
Use below function to flatten your JSON data.
dc = {"MainName":[{"col1":"12345","col2":False,"col3":"190809","SubName1":{"col4":30.00,"SubName2":{"col5":"19703","col6":"USD"}},"col7":"7372267","SubName3":[{"col8":"345337","col9":"PC"}],"col10":"10265","col11":"29889004","col12":"calculated","col13":"9218","SubName4":{"col14":1,"SubName5":{"col15":"1970324","col16":"integer"}},"col17":"434628","col18":"2020-02-06T13:47:40.000-0800","col19":"754878037","SubName6":{"col20":30.00,"SubName7":{"col21":"19703248","col22":"USD"}}}],"skip":0,"top":1,"next":"/v1/APIName?skip=1&top=1"}
def flatten(root: str, dict_obj: dict):
flat = {}
for i in dict_obj.keys():
val = dict_obj[i]
if not isinstance(val, dict) and not isinstance(val, list):
flat[f'{root}_{i}'] = val
else:
if isinstance(val, list):
val = val[-1]
flat.update(flatten(f'{root}_{i}', val))
return flat
flatten('MainName', dc['MainName'][0])
It will give you expected output. Then use it the way you want.
{'MainName_col1': '12345',
'MainName_col2': False,
'MainName_col3': '190809',
'MainName_SubName1_col4': 30.0,
'MainName_SubName1_SubName2_col5': '19703',
'MainName_SubName1_SubName2_col6': 'USD',
'MainName_col7': '7372267',
'MainName_SubName3_col8': '345337',
'MainName_SubName3_col9': 'PC',
'MainName_col10': '10265',
'MainName_col11': '29889004',
'MainName_col12': 'calculated',
'MainName_col13': '9218',
'MainName_SubName4_col14': 1,
'MainName_SubName4_SubName5_col15': '1970324',
'MainName_SubName4_SubName5_col16': 'integer',
'MainName_col17': '434628',
'MainName_col18': '2020-02-06T13:47:40.000-0800',
'MainName_col19': '754878037',
'MainName_SubName6_col20': 30.0,
'MainName_SubName6_SubName7_col21': '19703248',
'MainName_SubName6_SubName7_col22': 'USD'}
As of my understanding, your dc will look like below
dc = {"MainName":[{"col1":"12345","col2":"False","col3":"190809","SubName1":{"col4":30.00,"SubName2":{"col5":"19703","col6":"USD"}},"col7":"7372267","SubName3":[{"col8":"345337","col9":"PC"}],"col10":"10265","col11":"29889004","col12":"calculated","col13":"9218","SubName4":{"col14":1,"SubName5":{"col15":"1970324","col16":"integer"}},"col17":"434628","col18":"2020-02-06T13:47:40.000-0800","col19":"754878037","SubName6":{"col20":30.00,"SubName7":{"col21":"19703248","col22":"USD"}}},{"col1_a":"12345XX","col2_b":"False","col3_c":"190809","SubName1":{"col4_d":30.00,"SubName2":{"col5_e":"19703","col6_f":"USD"}},"col7_g":"7372267","SubName3":[{"col8_h":"345337","col9":"PC"}],"col10_i":"10265","col11_j":"29889004","col12_k":"calculated","col13_l":"9218","SubName4":{"col14_m":1,"SubName5":{"col15_n":"1970324","col16_o":"integer"}},"col17_p":"434628","col18_q":"2020-02-06T13:47:40.000-0800","col19_r":"754878037","SubName6":{"col20_s":30.00,"SubName7":{"col21_t":"19703248","col22_u":"USDZZ"}}}],"skip":0,"top":2,"next":"/v1/APIName?skip=2&top=2"}
I used the above answer to flatten everything into single object
def flatten(root: str, dict_obj: dict):
flat = {}
for i in dict_obj.keys():
val = dict_obj[i]
if not isinstance(val, dict) and not isinstance(val, list):
flat[f'{root}_{i}'] = val
else:
if isinstance(val, list):
val = val[-1]
flat.update(flatten(f'{root}_{i}', val))
return flat
keys_list = []
values_list = []
for i in range(len(dc['MainName'])):
result = flatten('MainName', dc['MainName'][i])
keys_list.append(list(result.keys()))
values_list.append(list(result.values()))
for k in keys_list:
for res in k:
guestFile = open("sample.csv","a")
guestFile.write(res)
guestFile.write(",")
guestFile.close()
for v in values_list:
for res in v:
guestFile = open("sample.csv","a")
guestFile.write(str(res))
guestFile.write(",")
guestFile.close()
Checkout my code at https://repl.it/#TamilselvanLaks/jsontocsvmul
Note: Use the 'run' button to run the program, left side you can see sample.csv
there you can see all keys as like you want
Please let me know my answer meets your expectation

parse empty string using json

I was wondering if there was a way to use json.loads in order to automatically convert an empty string in something else, such as None.
For example, given:
data = json.loads('{"foo":"5", "bar":""}')
I would like to have:
data = {"foo":"5", "bar":None}
Instead of:
data = {"foo":"5", "bar":""}
You can use a dictionary comprehension:
data = json.loads('{"foo":"5", "bar":""}')
res = {k: v if v != '' else None for k, v in data.items()}
{'foo': '5', 'bar': None}
This will only deal with the first level of a nested dictionary. You can use a recursive function to deal with the more generalised nested dictionary case:
def updater(d, inval, outval):
for k, v in d.items():
if isinstance(v, dict):
updater(d[k], inval, outval)
else:
if v == '':
d[k] = None
return d
data = json.loads('{"foo":"5", "bar":"", "nested": {"test": "", "test2": "5"}}')
res = updater(data, '', None)
{'foo': '5', 'bar': None,
'nested': {'test': None, 'test2': '5'}}
You can also accomplish this with the json.loads object_hook parameter. For example:
import json
import six
def empty_string2none(obj):
for k, v in six.iteritems(obj):
if v == '':
obj[k] = None
return obj
print(json.loads('{"foo":"5", "bar":"", "hello": {"world": ""}}',
object_hook=empty_string2none))
This will print
{'foo': '5', 'bar': None, 'hello': {'world': None}}
This way, you don't need additional recursion.
I did some trial and error and it is impossible to parse None into a String using json.loads() you will have to use json.loads() with json.dumps() like I do in this example:
import json
data = json.loads('{"foo":"5", "bar":"%r"}' %(None))
data2 = json.loads(json.dumps({'foo': 5, 'bar': None}))
if data2['bar'] is None:
print('worked')
print(data['bar'])
else:
print('did not work')

Construct python dict from DeepDiff result

I have a DeepDiff result which is obtained by comparing two JSON files. I have to construct a python dictionary from the deepdiff result as follows.
json1 = {"spark": {"ttl":3, "poll":34}}
json2 = {"spark": {"ttl":3, "poll":34, "toll":23}, "cion": 34}
deepdiffresult = {'dictionary_item_added': {"root['spark']['toll']", "root['cion']"}}
expecteddict = {"spark" : {"toll":23}, "cion":34}
How can this be achieved?
There is probably a better way to do this. But you can parse the returned strings and chain together a new dictionary with the result you want.
json1 = {"spark": {"ttl":3, "poll":34}}
json2 = {"spark": {"ttl":3, "poll":34, "toll":23}, "cion": 34}
deepdiffresult = {'dictionary_item_added': {"root['spark']['toll']", "root['cion']"}}
added = deepdiffresult['dictionary_item_added']
def convert(s, j):
s = s.replace('root','')
s = s.replace('[','')
s = s.replace("'",'')
keys = s.split(']')[:-1]
d = {}
for k in reversed(keys):
if not d:
d[k] = None
else:
d = {k: d}
v = None
v_ref = d
for i, k in enumerate(keys, 1):
if not v:
v = j.get(k)
else:
v = v.get(k)
if i<len(keys):
v_ref = v_ref.get(k)
v_ref[k] = v
return d
added_dict = {}
for added_str in added:
added_dict.update(convert(added_str, json2))
added_dict
#returns:
{'cion': 34, 'spark': {'toll': 23}}
Simple Answer,
in python have a in-build called Dictdiffer function. can you try this.
$ pip install dictdiffer
Examples:
from dictdiffer import diff
result = diff(json1, json2)
print result == {"spark" : {"toll":23}, "cion":34}
References:
DictDiffer

how to convert python dictionary into parent-child relation?

input_dictionary= {'A': [{'B1': [{'C1': ['D2']}, {'C2': ['D3']}]}, {'B2': [{'C1': ['D2']}, {'C2': ['D3']}]}]}
Output:
['A','A_B1','A_B2','A_B1_C1','A_B1,C2',.....so on]
So, the generic format is parent_child_subchild and so on.
Any advice/guideline would be appreciated.
>>> def stringify(d, prefix=[]):
... try:
... for k,l in d.items():
... return ['_'.join(prefix+[k])]+[f for e in l for f in stringify(e,prefix+[k])]
... except AttributeError:
... return ['_'.join(prefix+[d])]
...
>>> stringify(input_dictionary)
['A', 'A_B1', 'A_B1_C1', 'A_B1_C1_D2', 'A_B1_C2', 'A_B1_C2_D3', 'A_B2', 'A_B2_C1', 'A_B2_C1_D2', 'A_B2_C2', 'A_B2_C2_D3']

Categories

Resources