Convert an uneven nested dictioary into tabular form

Convert an uneven nested dictioary into tabular form - python

res = {'Head': {'Ide': 'GLE', 'ID': '7b', 'Source': 'CARS', 'Target': 'TULUM', 'Country': 'GL'},
'Load': {'Stat': {'Code': '21', 'Reason': 'invalid'}, 'SrcFilePath': '/path.xls'}}
res is the nested dictionary that needs to be converted into a tabular form.
With the following columns and respective values:
Ide ID Source Target Country Code Reason SrcFilePath
Code:
for col,data in res.items():
final_data = dict(data.items())
df = pd.DataFrame(final_data)
print(df)
Error:
ValueError: If using all scalar values, you must pass an index

You can try:
pd.DataFrame.from_dict(res, orient='index')

You could try using:
pd.json_normalize(res)
Although the output can be a bit "ugly", but it actually works.

I assume that res isn't the only record and there's data like:
data = [
{'Head': {'Ide': 'GLE', 'ID': '7b', 'Source': 'CARS', 'Target': 'TULUM', 'Country': 'GL'}, 'Load': {'Stat': {'Code': '21', 'Reason': 'invalid'}, 'SrcFilePath': '/path.xls'}}
, {'Head': {'Ide': 'ABC', 'ID': '8b', 'Source': 'CARS', 'Target': 'TULUM', 'Country': 'AB'}, 'Load': {'Stat': {'Code': '21', 'Reason': 'invalid'}, 'SrcFilePath': '/path.xls'}}
, {'Head': {'Ide': 'EFG', 'ID': '9b', 'Source': 'CARS', 'Target': 'TULUM', 'Country': 'EF'}, 'Load': {'Stat': {'Code': '21', 'Reason': 'invalid'}, 'SrcFilePath': '/path.xls'}}
]
So we have to write a procedure to flatten records and apply it by map to the data before transforming records into a frame:
def flatten_dict(d:dict) -> dict:
res = {}
for k, v in d.items():
if type(v) is dict:
res.update(flatten_dict(v))
else:
res[k] = v
return res
output = pd.DataFrame(map(flatten_dict, data))
The output:
Ide ID Source Target Country Code Reason SrcFilePath
0 GLE 7b CARS TULUM GL 21 invalid /path.xls
1 ABC 8b CARS TULUM AB 21 invalid /path.xls
2 EFG 9b CARS TULUM EF 21 invalid /path.xls

Related

python according to the same value combining dictionary

i have a list of dict like this
[
{'id': 'A123',
'feature': {'name': 'jack', 'age' : '18' },
'create_time': '2022-5-17 10:29:47',
'is_fast': False},
{'id': 'A123',
'feature': {'gender': 'male'},
'create_time': '2022-5-17 10:29:47',
'is_fast': False},
{'id': 'A123',
'habit': {'name': 'read'},
'create_time': '2022-5-15 10:29:45',
'is_fast': False},
{'id': 'A456',
'feature': {'name': 'rose'},
'create_time': '2022-4-15 10:29:45',
'is_fast': False},
{'id': 'A456',
'habit': {'name': 'sport'},
'create_time': '2022-3-15 10:29:45',
'is_fast': False}
]
But I want to merge the same "id" values together using something function
The desired output is as follows
[
{'id': 'A123',
'feature': {'name': 'jack', 'age' : '18' ,'gender': 'male'},
'habit': {'name': 'read'},
'create_time': '2022-5-19 10:29:47', #Get the latest time based on the same id
'is_fast': False},
{'id': 'A456',
'feature': {'name': 'rose'},
'habit': {'name': 'sport'},
'create_time': '2022-4-15 10:29:45',
'is_fast': False},
]
How can I merge the same "id" values according to these dictionaries..

This should get you started... I put some inline notes to explain what the code is doing. You still need to implement a date time comparison.
def merge_dicts(lst):
final = {} # results
for row in lst: # iterate through list
if row['id'] not in final: # if current item id hasnt been seen
final[row['id']] = row # assign it to results with id as the key
else:
record = final[row['id']] # otherwise compare to data already stored
for k,v in row.items(): #iterate through dictionary items
if k not in record: # if key not in results
record[k] = v # add the key and value
continue
if record[k] == v: continue # if they are already equal move on
if isinstance(v, dict): # if its a dictionary
record[k].update(v) # update the dictionary
else: # must be date time sequence so do some datetime comparison
"""Do some date comparison and assign correct date"""
return [v for k,v in final.items()] # convert to list
print(merge_dicts(lst))
output:
[
{
'id': 'A123',
'feature': {'name': 'jack', 'age': '18', 'gender': 'male'},
'create_time': '2022-5-17 10:29:47',
'is_fast': False,
'habit': {'name': 'read'}
},
{
'id': 'A456',
'feature': {'name': 'rose'},
'create_time': '2022-4-15 10:29:45',
'is_fast': False,
'habit': {'name': 'sport'}
}
]

You can use the dict.setdefault method to initialize sub-dicts under keys that don't already exist to avoid cluttering up your code with conditional statements that test the existence of keys:
merged = {}
for d in lst:
s = merged.setdefault(d['id'], d)
for k, v in d.items():
if isinstance(v, dict):
s.setdefault(k, v).update(v)
elif v > s[k]: # the dates/times in the input follow alphabetical order
s[k] = v # later dates/times takes precedence
print(list(merged.values()))
Demo: https://replit.com/#blhsing/BlandCarelessPolygons#main.py

Converting nested list into data frame using pandas

I have below list which stored in data
{'id': 255719,
'display_name': 'Off Broadway Apartments',
'access_right': {'status': 'OWNED', 'api_enabled': True},
'creation_time': '2021-04-26T15:53:29+00:00',
'status': {'value': 'OFFLINE', 'last_change': '2021-07-10T17:26:50+00:00'},
'licence': {'id': 74213,
'code': '23AF-15A8-0514-2E4B-04DE-5C19-A574-B20B',
'bound_mac_address': '00:11:32:C2:FE:6A',
'activation_time': '2021-04-26T15:53:29+00:00',
'type': 'SUBSCRIPTION'},
'timezone': 'America/Chicago',
'version': {'agent': '3.7.0-b001', 'package': '2.5.1-0022'},
'location': {'latitude': '41.4126', 'longitude': '-99.6345'}}
I would like to convert into data frame.can anyone advise?
I tried below code
df = pd.DataFrame(data)
but it's not coming properly as many nested lists. can anyone advise?

from pandas.io.json import json_normalize
# load json
json = {'id': 255719,
'display_name': 'Off Broadway Apartments',
'access_right': {'status': 'OWNED', 'api_enabled': True},
'creation_time': '2021-04-26T15:53:29+00:00',
'status': {'value': 'OFFLINE', 'last_change': '2021-07-10T17:26:50+00:00'},
'licence': {'id': 74213,
'code': '23AF-15A8-0514-2E4B-04DE-5C19-A574-B20B',
'bound_mac_address': '00:11:32:C2:FE:6A',
'activation_time': '2021-04-26T15:53:29+00:00',
'type': 'SUBSCRIPTION'},
'timezone': 'America/Chicago',
'version': {'agent': '3.7.0-b001', 'package': '2.5.1-0022'},
'location': {'latitude': '41.4126', 'longitude': '-99.6345'}}
Create a fuction to flat nested jsons:
def flatten_json(y):
out = {}
def flatten(x, name=''):
if type(x) is dict:
for a in x:
flatten(x[a], name + a + '_')
elif type(x) is list:
i = 0
for a in x:
flatten(a, name + str(i) + '_')
i += 1
else:
out[name[:-1]] = x
flatten(y)
return out
You can now use that function on your original json file:
flat = flatten_json(json)
df = json_normalize(flat)
Results:
id display_name ... location_latitude location_longitude
0 255719 Off Broadway Apartments ... 41.4126 -99.6345

Convert multiline "tabular" string to dictionary

I have a string that looks something like this:
name1 pass blue n/a
name-6t56-yt6 fail red n/a
name-45 pass blue n/a
name-6t567-yt6 fail red n/a
I want to extract data from the first 2 columns and would ideally store it in a dictionary in the following manner:
[{'type': 'name1', 'status': 'pass'}, {'type': 'name-6t56-yt6', 'status': 'fail'}, {'type': 'name-45', 'status': 'pass'}, {'type': 'name-6t567-yt6', 'status': 'fail'}]
Any ideas of how to approach this?
Note that this is a multi-line string(in utf-8 format).

Assuming you want a list:
Setup:
>>> s = '''name1 pass blue n/a
... name-6t56-yt6 fail red n/a
... name-45 pass blue n/a
... name-6t567-yt6 fail red n/a'''
Construct result:
>>> [dict(zip(('type', 'status'), line.split(maxsplit=2)[:2])) for line in s.splitlines()]
[{'type': 'name1', 'status': 'pass'}, {'type': 'name-6t56-yt6', 'status': 'fail'}, {'type': 'name-45', 'status': 'pass'}, {'type': 'name-6t567-yt6', 'status': 'fail'}]

In your code you are using a set of dictionaries, it's not the best idea, here i am using a list of dictionaries
s = """name1 pass blue n/a
name-6t56-yt6 fail red n/a
name-45 pass blue n/a
name-6t567-yt6 fail red n/a"""
d = []
for line in s.split('\n'):
type, status = line.split()[0:2]
d.append({'type': type, 'status': status})
content of d:
[{'type': 'name1', 'status': 'pass'},
{'type': 'name-6t56-yt6', 'status': 'fail'},
{'type': 'name-45', 'status': 'pass'},
{'type': 'name-6t567-yt6', 'status': 'fail'}]

from pprint import pprint
with open('file.txt') as f:
data = f.readlines()
result = []
for line in data:
result.append({
'type': line[0:line.index(' ')],
'status': 'pass' if 'pass' in line else 'fail'
})
pprint(result)
# [{'status': 'pass', 'type': 'name1'},
# {'status': 'fail', 'type': 'name-6t56-yt6'},
# {'status': 'pass', 'type': 'name-45'},
# {'status': 'fail', 'type': 'name-6t567-yt6'}]

With the text input defined as a multinine string, text, you can read it's content into the desired dictionary structure like this:
# from collections import defaultdict
from pprint import pprint as pp
text = """name1 pass blue n/a
name-6t56-yt6 fail red n/a
name-45 pass blue n/a
name-6t567-yt6 fail red n/a"""
d = []
for line in text.split("\n"):
type, status = line.split()[0:2]
d.append({"type": type, "status": status})
pp(d)
Which will output:
[{'status': 'name1', 'type': 'pass'},
{'status': 'name-6t56-yt6', 'type': 'fail'},
{'status': 'name-45', 'type': 'pass'},
{'status': 'name-6t567-yt6', 'type': 'fail'}]

Have two non identical list of dictionaries and need to merge them with a non unique key

d1 = [{'del':True, 'Name':'tbl_n','node':'3'},{'del':True, 'Name':'src_n','node':'5'}]
d2 = [{'items':'23', 'column_name':'tbl_n','created':'3.34','count':0,'valid':'yes'},
{'items':'43', 'column_name':'src_n','created':'3.34','count':40,'valid':'yes'},
{'items':'22', 'column_name':'mod_n','created':'3.34','count':13,'valid':'no'}
I would like to merge d1 to d2 with respect to 'Name' key in d1 and 'column_name' in d2
below is one of the tried step
from collections import Counter
summed = sum((Counter({elem['column_name']: elem['val_count']}) for elem in my_dict1 + my_dict2), Counter())
print(summed)
the expected output im looking for is
d3 = [{'items':23, 'Name': 'tbl_n','node':3,'created':3.34,'count':0,'valid':'yes'},{'del':True,'items':43,'Name':'src_n','node':5.'created':3.34,'count':40,'valid':'yes'},{'items':22,'column_name:'mod_n','created':3.34,'count':14,'valid':'no'}

I tried to understand your expected output and suggest you the following solution. Tell me if you expect something else. Regarding your code, I replaced d1and d2 identifiers with respectively ld1and ld2 ('ld' being for 'list of dictionaries') for better readability:
ld1 = [{'del':True, 'Name':'tbl_n','node':'3'},
{'del':True, 'Name':'src_n','node':'5'}]
ld2 = [{'items':'23', 'column_name':'tbl_n','created':'3.34','count':0, 'valid':'yes'},
{'items':'43', 'column_name':'src_n','created':'3.34','count':40,'valid':'yes'},
{'items':'22', 'column_name':'mod_n','created':'3.34','count':13,'valid':'no'}]
def mergeTwoListOfDicts(ld1, ld2):
result = []
for d2 in ld2:
for d1 in ld1:
if ('Name' in d1) and ('column_name' in d2):
if d1['Name'] == d2['column_name']:
d2.pop('column_name')
d2.update(d1)
break
result.append(d2)
return result
print(mergeTwoListOfDicts(ld1, ld2))
# [{'items': '23', 'created': '3.34', 'count': 0, 'valid': 'yes', 'del': True, 'Name': 'tbl_n', 'node': '3'},
# {'items': '43', 'created': '3.34', 'count': 40, 'valid': 'yes', 'del': True, 'Name': 'src_n', 'node': '5'},
# {'items': '22', 'created': '3.34', 'count': 13, 'valid': 'no', 'column_name': 'mod_n'}]

combining two JSON {key1:value1 , key2:value2} to single key (i e {key3:value1,value2}) Python

I want to combine longitude and latitude to
{latlon: '40.33333,-79.34343'}
the entire JSON is in variable data = jsonData
I want to remove original key-value pair
{
'locale': 'en_US',
'timezone': '-7',
'id': '13',
'agerangemin': '21',
'verified': 'true',
'coverimageurl': 'scontent.xx.fbcdn/t31.0-0/p480x480/13063482_1183967848280764_1411489384515766669_o.jpg',
'tagline': 'Veggien',
'lastupdated': '1462341401',
'fbupdated_time': '2016-03-30T00:38:48+0000',
'lname': 'Kulkarni',
'fname': 'Nikhil',
'email': 'nikhilhk.usa#gmail.com',
'latitude': '40.333333',
'longitude': '-79.34343',
'displayname': 'Nikhil Kulkarni',
'fbprofileid': '1121344884543061',
'profileimageurl': 'scontent.xx.fbcdn/hprofile-xft1/v/t1.0-1/p100x100/10423743_952350738109144_964810479230145631_n.jpg?oh=71f7e953dbbf8e2f1d9f22418f7888b2&oe=579F4A36',
'link': 'facebook/app_scoped_user_id/1121344884543061/',
'diet': 'Vegetarian',
'dietsinceyear': '1966',
'gender': 'M',
'vegstory': '',
'shortdescription': 'Just like that',
'categoryids': '',
'reasonforveg': 'Religious'
}

data['latlong'] = data['latitude'] + ',' + data['longitude']
del data['latitude']
del data['longitude']

Can be done in one line.
>>> dic = {'latitude': '40.333333', 'longitude': '-79.34343'}
>>>
>>> dic['latlon'] = "{0},{1}".format(dic.pop('latitude'),dic.pop('longitude'))
>>> dic
{'latlon': '40.333333,-79.34343'}
To understand how dic.pop() work, see this.

>>> json_data['latlon'] = ','.join(json_data[k] for k in ('latitude', 'longitude'))
>>> json_data['latlon']
'40.333333,-79.34343'
Note that this will preserve the original key-value pair.
UPDATE:
If you want to remove the original key-value pair use pop method:
>>> json_data['latlon'] = ','.join(json_data.pop(k) for k in ('latitude', 'longitude'))
>>> json_data['latlon']
'40.333333,-79.34343'

Develop Reference

Python is a programming language that lets you work quickly and integrate systems more effectively.

Convert an uneven nested dictioary into tabular form - python

You can try: pd.DataFrame.from_dict(res, orient='index')

You could try using: pd.json_normalize(res) Although the output can be a bit "ugly", but it actually works.

Related

python according to the same value combining dictionary

Converting nested list into data frame using pandas

Convert multiline "tabular" string to dictionary

Have two non identical list of dictionaries and need to merge them with a non unique key

combining two JSON {key1:value1 , key2:value2} to single key (i e {key3:value1,value2}) Python

Categories

Resources