Convert an uneven nested dictioary into tabular form - python

res = {'Head': {'Ide': 'GLE', 'ID': '7b', 'Source': 'CARS', 'Target': 'TULUM', 'Country': 'GL'},
'Load': {'Stat': {'Code': '21', 'Reason': 'invalid'}, 'SrcFilePath': '/path.xls'}}
res is the nested dictionary that needs to be converted into a tabular form.
With the following columns and respective values:
Ide ID Source Target Country Code Reason SrcFilePath
Code:
for col,data in res.items():
final_data = dict(data.items())
df = pd.DataFrame(final_data)
print(df)
Error:
ValueError: If using all scalar values, you must pass an index

You can try:
pd.DataFrame.from_dict(res, orient='index')

You could try using:
pd.json_normalize(res)
Although the output can be a bit "ugly", but it actually works.

I assume that res isn't the only record and there's data like:
data = [
{'Head': {'Ide': 'GLE', 'ID': '7b', 'Source': 'CARS', 'Target': 'TULUM', 'Country': 'GL'}, 'Load': {'Stat': {'Code': '21', 'Reason': 'invalid'}, 'SrcFilePath': '/path.xls'}}
, {'Head': {'Ide': 'ABC', 'ID': '8b', 'Source': 'CARS', 'Target': 'TULUM', 'Country': 'AB'}, 'Load': {'Stat': {'Code': '21', 'Reason': 'invalid'}, 'SrcFilePath': '/path.xls'}}
, {'Head': {'Ide': 'EFG', 'ID': '9b', 'Source': 'CARS', 'Target': 'TULUM', 'Country': 'EF'}, 'Load': {'Stat': {'Code': '21', 'Reason': 'invalid'}, 'SrcFilePath': '/path.xls'}}
]
So we have to write a procedure to flatten records and apply it by map to the data before transforming records into a frame:
def flatten_dict(d:dict) -> dict:
res = {}
for k, v in d.items():
if type(v) is dict:
res.update(flatten_dict(v))
else:
res[k] = v
return res
output = pd.DataFrame(map(flatten_dict, data))
The output:
Ide ID Source Target Country Code Reason SrcFilePath
0 GLE 7b CARS TULUM GL 21 invalid /path.xls
1 ABC 8b CARS TULUM AB 21 invalid /path.xls
2 EFG 9b CARS TULUM EF 21 invalid /path.xls

Related

python according to the same value combining dictionary

i have a list of dict like this
[
{'id': 'A123',
'feature': {'name': 'jack', 'age' : '18' },
'create_time': '2022-5-17 10:29:47',
'is_fast': False},
{'id': 'A123',
'feature': {'gender': 'male'},
'create_time': '2022-5-17 10:29:47',
'is_fast': False},
{'id': 'A123',
'habit': {'name': 'read'},
'create_time': '2022-5-15 10:29:45',
'is_fast': False},
{'id': 'A456',
'feature': {'name': 'rose'},
'create_time': '2022-4-15 10:29:45',
'is_fast': False},
{'id': 'A456',
'habit': {'name': 'sport'},
'create_time': '2022-3-15 10:29:45',
'is_fast': False}
]
But I want to merge the same "id" values ​​together using something function
The desired output is as follows
[
{'id': 'A123',
'feature': {'name': 'jack', 'age' : '18' ,'gender': 'male'},
'habit': {'name': 'read'},
'create_time': '2022-5-19 10:29:47', #Get the latest time based on the same id
'is_fast': False},
{'id': 'A456',
'feature': {'name': 'rose'},
'habit': {'name': 'sport'},
'create_time': '2022-4-15 10:29:45',
'is_fast': False},
]
How can I merge the same "id" values ​​according to these dictionaries..
This should get you started... I put some inline notes to explain what the code is doing. You still need to implement a date time comparison.
def merge_dicts(lst):
final = {} # results
for row in lst: # iterate through list
if row['id'] not in final: # if current item id hasnt been seen
final[row['id']] = row # assign it to results with id as the key
else:
record = final[row['id']] # otherwise compare to data already stored
for k,v in row.items(): #iterate through dictionary items
if k not in record: # if key not in results
record[k] = v # add the key and value
continue
if record[k] == v: continue # if they are already equal move on
if isinstance(v, dict): # if its a dictionary
record[k].update(v) # update the dictionary
else: # must be date time sequence so do some datetime comparison
"""Do some date comparison and assign correct date"""
return [v for k,v in final.items()] # convert to list
print(merge_dicts(lst))
output:
[
{
'id': 'A123',
'feature': {'name': 'jack', 'age': '18', 'gender': 'male'},
'create_time': '2022-5-17 10:29:47',
'is_fast': False,
'habit': {'name': 'read'}
},
{
'id': 'A456',
'feature': {'name': 'rose'},
'create_time': '2022-4-15 10:29:45',
'is_fast': False,
'habit': {'name': 'sport'}
}
]
You can use the dict.setdefault method to initialize sub-dicts under keys that don't already exist to avoid cluttering up your code with conditional statements that test the existence of keys:
merged = {}
for d in lst:
s = merged.setdefault(d['id'], d)
for k, v in d.items():
if isinstance(v, dict):
s.setdefault(k, v).update(v)
elif v > s[k]: # the dates/times in the input follow alphabetical order
s[k] = v # later dates/times takes precedence
print(list(merged.values()))
Demo: https://replit.com/#blhsing/BlandCarelessPolygons#main.py

Converting nested list into data frame using pandas

I have below list which stored in data
{'id': 255719,
'display_name': 'Off Broadway Apartments',
'access_right': {'status': 'OWNED', 'api_enabled': True},
'creation_time': '2021-04-26T15:53:29+00:00',
'status': {'value': 'OFFLINE', 'last_change': '2021-07-10T17:26:50+00:00'},
'licence': {'id': 74213,
'code': '23AF-15A8-0514-2E4B-04DE-5C19-A574-B20B',
'bound_mac_address': '00:11:32:C2:FE:6A',
'activation_time': '2021-04-26T15:53:29+00:00',
'type': 'SUBSCRIPTION'},
'timezone': 'America/Chicago',
'version': {'agent': '3.7.0-b001', 'package': '2.5.1-0022'},
'location': {'latitude': '41.4126', 'longitude': '-99.6345'}}
I would like to convert into data frame.can anyone advise?
I tried below code
df = pd.DataFrame(data)
but it's not coming properly as many nested lists. can anyone advise?
from pandas.io.json import json_normalize
# load json
json = {'id': 255719,
'display_name': 'Off Broadway Apartments',
'access_right': {'status': 'OWNED', 'api_enabled': True},
'creation_time': '2021-04-26T15:53:29+00:00',
'status': {'value': 'OFFLINE', 'last_change': '2021-07-10T17:26:50+00:00'},
'licence': {'id': 74213,
'code': '23AF-15A8-0514-2E4B-04DE-5C19-A574-B20B',
'bound_mac_address': '00:11:32:C2:FE:6A',
'activation_time': '2021-04-26T15:53:29+00:00',
'type': 'SUBSCRIPTION'},
'timezone': 'America/Chicago',
'version': {'agent': '3.7.0-b001', 'package': '2.5.1-0022'},
'location': {'latitude': '41.4126', 'longitude': '-99.6345'}}
Create a fuction to flat nested jsons:
def flatten_json(y):
out = {}
def flatten(x, name=''):
if type(x) is dict:
for a in x:
flatten(x[a], name + a + '_')
elif type(x) is list:
i = 0
for a in x:
flatten(a, name + str(i) + '_')
i += 1
else:
out[name[:-1]] = x
flatten(y)
return out
You can now use that function on your original json file:
flat = flatten_json(json)
df = json_normalize(flat)
Results:
id display_name ... location_latitude location_longitude
0 255719 Off Broadway Apartments ... 41.4126 -99.6345

Convert multiline "tabular" string to dictionary

I have a string that looks something like this:
name1 pass blue n/a
name-6t56-yt6 fail red n/a
name-45 pass blue n/a
name-6t567-yt6 fail red n/a
I want to extract data from the first 2 columns and would ideally store it in a dictionary in the following manner:
[{'type': 'name1', 'status': 'pass'}, {'type': 'name-6t56-yt6', 'status': 'fail'}, {'type': 'name-45', 'status': 'pass'}, {'type': 'name-6t567-yt6', 'status': 'fail'}]
Any ideas of how to approach this?
Note that this is a multi-line string(in utf-8 format).
Assuming you want a list:
Setup:
>>> s = '''name1 pass blue n/a
... name-6t56-yt6 fail red n/a
... name-45 pass blue n/a
... name-6t567-yt6 fail red n/a'''
Construct result:
>>> [dict(zip(('type', 'status'), line.split(maxsplit=2)[:2])) for line in s.splitlines()]
[{'type': 'name1', 'status': 'pass'}, {'type': 'name-6t56-yt6', 'status': 'fail'}, {'type': 'name-45', 'status': 'pass'}, {'type': 'name-6t567-yt6', 'status': 'fail'}]
In your code you are using a set of dictionaries, it's not the best idea, here i am using a list of dictionaries
s = """name1 pass blue n/a
name-6t56-yt6 fail red n/a
name-45 pass blue n/a
name-6t567-yt6 fail red n/a"""
d = []
for line in s.split('\n'):
type, status = line.split()[0:2]
d.append({'type': type, 'status': status})
content of d:
[{'type': 'name1', 'status': 'pass'},
{'type': 'name-6t56-yt6', 'status': 'fail'},
{'type': 'name-45', 'status': 'pass'},
{'type': 'name-6t567-yt6', 'status': 'fail'}]
from pprint import pprint
with open('file.txt') as f:
data = f.readlines()
result = []
for line in data:
result.append({
'type': line[0:line.index(' ')],
'status': 'pass' if 'pass' in line else 'fail'
})
pprint(result)
# [{'status': 'pass', 'type': 'name1'},
# {'status': 'fail', 'type': 'name-6t56-yt6'},
# {'status': 'pass', 'type': 'name-45'},
# {'status': 'fail', 'type': 'name-6t567-yt6'}]
With the text input defined as a multinine string, text, you can read it's content into the desired dictionary structure like this:
# from collections import defaultdict
from pprint import pprint as pp
text = """name1 pass blue n/a
name-6t56-yt6 fail red n/a
name-45 pass blue n/a
name-6t567-yt6 fail red n/a"""
d = []
for line in text.split("\n"):
type, status = line.split()[0:2]
d.append({"type": type, "status": status})
pp(d)
Which will output:
[{'status': 'name1', 'type': 'pass'},
{'status': 'name-6t56-yt6', 'type': 'fail'},
{'status': 'name-45', 'type': 'pass'},
{'status': 'name-6t567-yt6', 'type': 'fail'}]

Have two non identical list of dictionaries and need to merge them with a non unique key

d1 = [{'del':True, 'Name':'tbl_n','node':'3'},{'del':True, 'Name':'src_n','node':'5'}]
d2 = [{'items':'23', 'column_name':'tbl_n','created':'3.34','count':0,'valid':'yes'},
{'items':'43', 'column_name':'src_n','created':'3.34','count':40,'valid':'yes'},
{'items':'22', 'column_name':'mod_n','created':'3.34','count':13,'valid':'no'}
I would like to merge d1 to d2 with respect to 'Name' key in d1 and 'column_name' in d2
below is one of the tried step
from collections import Counter
summed = sum((Counter({elem['column_name']: elem['val_count']}) for elem in my_dict1 + my_dict2), Counter())
print(summed)
the expected output im looking for is
d3 = [{'items':23, 'Name': 'tbl_n','node':3,'created':3.34,'count':0,'valid':'yes'},{'del':True,'items':43,'Name':'src_n','node':5.'created':3.34,'count':40,'valid':'yes'},{'items':22,'column_name:'mod_n','created':3.34,'count':14,'valid':'no'}
I tried to understand your expected output and suggest you the following solution. Tell me if you expect something else. Regarding your code, I replaced d1and d2 identifiers with respectively ld1and ld2 ('ld' being for 'list of dictionaries') for better readability:
ld1 = [{'del':True, 'Name':'tbl_n','node':'3'},
{'del':True, 'Name':'src_n','node':'5'}]
ld2 = [{'items':'23', 'column_name':'tbl_n','created':'3.34','count':0, 'valid':'yes'},
{'items':'43', 'column_name':'src_n','created':'3.34','count':40,'valid':'yes'},
{'items':'22', 'column_name':'mod_n','created':'3.34','count':13,'valid':'no'}]
def mergeTwoListOfDicts(ld1, ld2):
result = []
for d2 in ld2:
for d1 in ld1:
if ('Name' in d1) and ('column_name' in d2):
if d1['Name'] == d2['column_name']:
d2.pop('column_name')
d2.update(d1)
break
result.append(d2)
return result
print(mergeTwoListOfDicts(ld1, ld2))
# [{'items': '23', 'created': '3.34', 'count': 0, 'valid': 'yes', 'del': True, 'Name': 'tbl_n', 'node': '3'},
# {'items': '43', 'created': '3.34', 'count': 40, 'valid': 'yes', 'del': True, 'Name': 'src_n', 'node': '5'},
# {'items': '22', 'created': '3.34', 'count': 13, 'valid': 'no', 'column_name': 'mod_n'}]

combining two JSON {key1:value1 , key2:value2} to single key (i e {key3:value1,value2}) Python

I want to combine longitude and latitude to
{latlon: '40.33333,-79.34343'}
the entire JSON is in variable data = jsonData
I want to remove original key-value pair
{
'locale': 'en_US',
'timezone': '-7',
'id': '13',
'agerangemin': '21',
'verified': 'true',
'coverimageurl': 'scontent.xx.fbcdn/t31.0-0/p480x480/13063482_1183967848280764_1411489384515766669_o.jpg',
'tagline': 'Veggien',
'lastupdated': '1462341401',
'fbupdated_time': '2016-03-30T00:38:48+0000',
'lname': 'Kulkarni',
'fname': 'Nikhil',
'email': 'nikhilhk.usa#gmail.com',
'latitude': '40.333333',
'longitude': '-79.34343',
'displayname': 'Nikhil Kulkarni',
'fbprofileid': '1121344884543061',
'profileimageurl': 'scontent.xx.fbcdn/hprofile-xft1/v/t1.0-1/p100x100/10423743_952350738109144_964810479230145631_n.jpg?oh=71f7e953dbbf8e2f1d9f22418f7888b2&oe=579F4A36',
'link': 'facebook/app_scoped_user_id/1121344884543061/',
'diet': 'Vegetarian',
'dietsinceyear': '1966',
'gender': 'M',
'vegstory': '',
'shortdescription': 'Just like that',
'categoryids': '',
'reasonforveg': 'Religious'
}
data['latlong'] = data['latitude'] + ',' + data['longitude']
del data['latitude']
del data['longitude']
Can be done in one line.
>>> dic = {'latitude': '40.333333', 'longitude': '-79.34343'}
>>>
>>> dic['latlon'] = "{0},{1}".format(dic.pop('latitude'),dic.pop('longitude'))
>>> dic
{'latlon': '40.333333,-79.34343'}
To understand how dic.pop() work, see this.
>>> json_data['latlon'] = ','.join(json_data[k] for k in ('latitude', 'longitude'))
>>> json_data['latlon']
'40.333333,-79.34343'
Note that this will preserve the original key-value pair.
UPDATE:
If you want to remove the original key-value pair use pop method:
>>> json_data['latlon'] = ','.join(json_data.pop(k) for k in ('latitude', 'longitude'))
>>> json_data['latlon']
'40.333333,-79.34343'

Categories

Resources