I want to rename the dict keys in Python.
There are two keys, 'Curry_Vegetables_Set1 (59).JPG131850' and 'Curry_Vegetables_Set1 (62).JPG104359', which I want to rename with replace1 value. How can I rename them?
Below is the dict sample:
file = {'Curry_Vegetables_Set1 (59).JPG131850': {'filename': '1.5_Curry_Vegetables_59.jpg',
'size': 131850,
'regions': [{'shape_attributes': {'name': 'polygon',
'all_points_x': [510, 563,622,],
'all_points_y': [459, 523, 505,]},
'region_attributes': {'food': 'curry_vegetable'}}],
'file_attributes': {}},
'Curry_Vegetables_Set1 (62).JPG104359': {'filename': '1.5_Curry_Vegetables_62.jpg',
'size': 104359,
'regions': [{'shape_attributes': {'name': 'polygon',
'all_points_x': [471,490,528,],
'all_points_y': [496,476,493]},
'region_attributes': {'food': 'curry_vegetable'}}],
'file_attributes': {}},}
I tried the code below,
for key,value in file.items():
name = key.split('.')
num = name[0].split('(')
image_num = num[1][:-1]
replace1 = '1.5_Curry_Vegetables_'+image_num+'.'+name[1]
# replace old keys with replace1
file[replace1] = file[key]
but it gives error as:
RuntimeError: dictionary changed size during iteration
The reason to get RunTimeError is you are updating the dict which is already loaded using file.items(). Casting the dictionary items to list creates a list of its items, so you can iterate over it and avoid the RunTimeError.
file = {'Curry_Vegetables_Set1 (59).JPG131850': {'filename': '1.5_Curry_Vegetables_59.jpg',
'size': 131850,
'regions': [{'shape_attributes': {'name': 'polygon', 'all_points_x': [510, 563,622,], 'all_points_y': [459, 523, 505,]},
'region_attributes': {'food': 'curry_vegetable'}}],
'file_attributes': {}
},
'Curry_Vegetables_Set1 (62).JPG104359': {'filename': '1.5_Curry_Vegetables_62.jpg',
'size': 104359,
'regions': [{'shape_attributes': {'name': 'polygon', 'all_points_x': [471,490,528,], 'all_points_y': [496,476,493]},
'region_attributes': {'food': 'curry_vegetable'}}],
'file_attributes': {}},
}
for key,value in list(file.items()):
name = key.split('.')
num = name[0].split('(')
image_num = num[1][:-1]
replace1 = '1.5_Curry_Vegetables_'+image_num+'.'+name[1]
file[replace1] = file[key]
del file[key]
print (file)
Output:
{'1.5_Curry_Vegetables_59.JPG131850': {'filename': '1.5_Curry_Vegetables_59.jpg', 'size': 131850, 'regions': [{'shape_attributes': {'name': 'polygon', 'all_points_x': [510, 563, 622], 'all_points_y': [459, 523, 505]}, 'region_attributes': {'food': 'curry_vegetable'}}], 'file_attributes': {}}, '1.5_Curry_Vegetables_62.JPG104359': {'filename': '1.5_Curry_Vegetables_62.jpg', 'size': 104359, 'regions': [{'shape_attributes': {'name': 'polygon', 'all_points_x': [471, 490, 528], 'all_points_y': [496, 476, 493]}, 'region_attributes': {'food': 'curry_vegetable'}}], 'file_attributes': {}}}
You're not really changing the old keys; you're adding new ones to the same dictionary. You should create a new empty dictionary and add the new key/value pairs to that in the loop.
file2 = {}
for key, value in file.items():
name = key.split('.')
num = name[0].split('(')
image_num = num[1][:-1]
replace1 = '1.5_Curry_Vegetables_' + image_num + '.' + name[1]
# replace old keys with replace1
file2[replace1] = file[key]
print(file2)
Inplace:
oldKeys = list(dictionary.keys())
for oldKey in oldKeys:
name = oldKey .split('.')
num = name[0].split('(')
image_num = num[1][:-1]
newKey = '1.5_Curry_Vegetables_' + image_num + '.' + name[1]
dictionary[newKey] = dictionary.pop(oldKey)
Or via creating a new dict:
def newKeyFromOld(oldKey):
name = oldKey .split('.')
num = name[0].split('(')
image_num = num[1][:-1]
newKey = '1.5_Curry_Vegetables_' + image_num + '.' + name[1]
return newKey
{newKeyFromOld(oldKey): value for oldKey, value in dictionary.items()}
Related
I have a dictionary which contains some key-value pairs as strings, but some key-values are dictionaries.
The data looks like this:
{'amount': 123,
'baseUnit': 'test',
'currency': {'code': 'EUR'},
'dimensions': {'height': {'iri': 'http://www.example.com/data/measurement-height-12345',
'unitOfMeasure': 'm',
'value': 23},
'length': {'iri': 'http://www.example.com/data/measurement-length-12345',
'unitOfMeasure': 'm',
'value': 8322},
'volume': {'unitOfMeasure': '', 'value': 0},
'weight': {'iri': 'http://www.example.com/data/measurement-weight-12345',
'unitOfMeasure': 'KG',
'value': 23},
'width': {'iri': 'http://www.example.com/data/measurement-width-12345',
'unitOfMeasure': 'm',
'value': 1}},
'exportListNumber': '1234',
'iri': 'http://www.example.com/data/material-12345',
'number': '12345',
'orderUnit': 'sdf',
'producerFormattedPID': '12345',
'producerID': 'example',
'producerNonFormattedPID': '12345',
'stateID': 'm70',
'typeID': 'FERT'}
for the dimensions and price keys, there are some nested dictionaries as values. How can I extract that data so that the final variable is a dictionary with only keys-values as strings. For the price, I would need something like:
{'pricecurrencycode':'EUR','priceamount':123} instead of 'price': {'currency': {'code': 'EUR'}, 'amount': 123}.
and the same happening to dimensions key->to extract all the nested dictionaries so that it could be easier to transform into a final dataframe.
You can define a recursive flatten function that gets called whenever the dictionary value is a dictionary.
Assuming python>=3.9:
def flatten(my_dict, prefix=""):
res = {}
for k, v in my_dict.items():
if isinstance(v, dict):
res |= flatten(v, prefix+k)
else:
res[prefix+k] = v
return res
A slightly more verbose option for older python versions:
def flatten(my_dict, prefix=""):
res = {}
for k, v in my_dict.items():
if isinstance(v, dict):
for k_flat, v_flat in flatten(v, prefix+k).items():
res[k_flat] = v_flat
else:
res[prefix+k] = v
return res
I have this list of dictionary and I would like to get those with the same exact value of 'name' and 'school' into a new list and also getting their 'age' merged into a list as well and the rest of the dictionary that is not identical to just add into the list as per usual..
Here is an example of the list of dictionary
[{'name': 'Jane', 'age':12, 'school': 'SIT'}, {'name': 'John', 'age':13, 'school': 'SMU'},{'name': 'Jane', 'age':14, 'school': 'SIT'}, {'name': 'Jane', 'age':16, 'school': 'SIT'}, {'name': 'John', 'age':13, 'school': 'NUS'}]
and I would like it to make it into something like this..
[{'name': 'Jane', 'age': [12,14,16], 'school': 'SIT'}, {'name': 'John', 'age': 13, 'school': 'SMU'}, {'name': 'John', 'age':13, 'school': 'NUS'}]
using Python.. please help!
tried using counter, loops but still can't get it to work..
You could use itertools.groupby().
Example:
import itertools
from pprint import pprint
data = [{'name': 'Jane', 'age':12, 'school': 'SIT'}, {'name': 'John', 'age':13, 'school': 'SMU'},{'name': 'Jane', 'age':14, 'school': 'SIT'}, {'name': 'Jane', 'age':16, 'school': 'SIT'}, {'name': 'John', 'age':13, 'school': 'NUS'}]
keyfunc = lambda x: (x["name"], x["school"])
# needs to be sorted to use groupby
data.sort(key=keyfunc)
output = []
for k,v in itertools.groupby(data, key=keyfunc):
this_group = {
"name": k[0],
"school": k[1],
"age": [i["age"] for i in v],
}
output.append(this_group)
pprint(output)
The output is:
[{'age': [12, 14, 16], 'name': 'Jane', 'school': 'SIT'},
{'age': [13], 'name': 'John', 'school': 'NUS'},
{'age': [13], 'name': 'John', 'school': 'SMU'}]
If you wish to go with the solution based on a buffer dictionary, please check out the dict.setdefault() method.
Example:
buffer = {}
for i in data:
buffer.setdefault((i["name"], i["school"]), []).append(i["age"])
For reference:
https://docs.python.org/3/library/itertools.html#itertools.groupby
https://docs.python.org/3/library/stdtypes.html#dict.setdefault
x = [{'name': 'Jane', 'age':12, 'school': 'SIT'}, {'name': 'John', 'age':13, 'school': 'SMU'},{'name': 'Jane', 'age':14, 'school': 'SIT'}, {'name': 'Jane', 'age':16, 'school': 'SIT'}, {'name': 'John', 'age':13, 'school': 'NUS'}]
new_x = {}
for r in x:
if r['name'] in new_x.keys():
if not isinstance(new_x[r['name']]['age'], list):
new_x[r['name']]['age'] = [new_x[r['name']]['age']]
if r['age'] not in new_x[r['name']]['age']:
new_x[r['name']]['age'].append(r['age'])
else:
new_x[r['name']] = {'age': r['age'], 'school': r['school']}
z = [v.update(name=k) for k, v in new_x.items()]
z = [v for k, v in new_x.items()]
Here is a universal solution to your problem. Only name and school are considered "special". All other keys, like age are converted to list when a new value has to be added.
l = [
{"name": "Jane", "age": 12, "school": "SIT"},
{"name": "John", "age": 13, "school": "SMU"},
{"name": "Jane", "age": 14, "school": "SIT"},
{"name": "Jane", "age": 16, "school": "SIT"},
{"name": "John", "age": 13, "school": "NUS"},
]
r = {}
for x in l:
id = f"{x['name']}-{x['school']}"
if id in r:
for k,v in x.items():
if k not in ["name", "school"]:
if k in r[id]:
if isinstance(r[id][k], list):
r[id][k].append(v)
else:
r[id][k] = [r[id][k], v]
else:
r[id][k] = v
else:
r[id] = x
result = [x for x in r.values()]
i have a list of dict like this
[
{'id': 'A123',
'feature': {'name': 'jack', 'age' : '18' },
'create_time': '2022-5-17 10:29:47',
'is_fast': False},
{'id': 'A123',
'feature': {'gender': 'male'},
'create_time': '2022-5-17 10:29:47',
'is_fast': False},
{'id': 'A123',
'habit': {'name': 'read'},
'create_time': '2022-5-15 10:29:45',
'is_fast': False},
{'id': 'A456',
'feature': {'name': 'rose'},
'create_time': '2022-4-15 10:29:45',
'is_fast': False},
{'id': 'A456',
'habit': {'name': 'sport'},
'create_time': '2022-3-15 10:29:45',
'is_fast': False}
]
But I want to merge the same "id" values together using something function
The desired output is as follows
[
{'id': 'A123',
'feature': {'name': 'jack', 'age' : '18' ,'gender': 'male'},
'habit': {'name': 'read'},
'create_time': '2022-5-19 10:29:47', #Get the latest time based on the same id
'is_fast': False},
{'id': 'A456',
'feature': {'name': 'rose'},
'habit': {'name': 'sport'},
'create_time': '2022-4-15 10:29:45',
'is_fast': False},
]
How can I merge the same "id" values according to these dictionaries..
This should get you started... I put some inline notes to explain what the code is doing. You still need to implement a date time comparison.
def merge_dicts(lst):
final = {} # results
for row in lst: # iterate through list
if row['id'] not in final: # if current item id hasnt been seen
final[row['id']] = row # assign it to results with id as the key
else:
record = final[row['id']] # otherwise compare to data already stored
for k,v in row.items(): #iterate through dictionary items
if k not in record: # if key not in results
record[k] = v # add the key and value
continue
if record[k] == v: continue # if they are already equal move on
if isinstance(v, dict): # if its a dictionary
record[k].update(v) # update the dictionary
else: # must be date time sequence so do some datetime comparison
"""Do some date comparison and assign correct date"""
return [v for k,v in final.items()] # convert to list
print(merge_dicts(lst))
output:
[
{
'id': 'A123',
'feature': {'name': 'jack', 'age': '18', 'gender': 'male'},
'create_time': '2022-5-17 10:29:47',
'is_fast': False,
'habit': {'name': 'read'}
},
{
'id': 'A456',
'feature': {'name': 'rose'},
'create_time': '2022-4-15 10:29:45',
'is_fast': False,
'habit': {'name': 'sport'}
}
]
You can use the dict.setdefault method to initialize sub-dicts under keys that don't already exist to avoid cluttering up your code with conditional statements that test the existence of keys:
merged = {}
for d in lst:
s = merged.setdefault(d['id'], d)
for k, v in d.items():
if isinstance(v, dict):
s.setdefault(k, v).update(v)
elif v > s[k]: # the dates/times in the input follow alphabetical order
s[k] = v # later dates/times takes precedence
print(list(merged.values()))
Demo: https://replit.com/#blhsing/BlandCarelessPolygons#main.py
I have below list which stored in data
{'id': 255719,
'display_name': 'Off Broadway Apartments',
'access_right': {'status': 'OWNED', 'api_enabled': True},
'creation_time': '2021-04-26T15:53:29+00:00',
'status': {'value': 'OFFLINE', 'last_change': '2021-07-10T17:26:50+00:00'},
'licence': {'id': 74213,
'code': '23AF-15A8-0514-2E4B-04DE-5C19-A574-B20B',
'bound_mac_address': '00:11:32:C2:FE:6A',
'activation_time': '2021-04-26T15:53:29+00:00',
'type': 'SUBSCRIPTION'},
'timezone': 'America/Chicago',
'version': {'agent': '3.7.0-b001', 'package': '2.5.1-0022'},
'location': {'latitude': '41.4126', 'longitude': '-99.6345'}}
I would like to convert into data frame.can anyone advise?
I tried below code
df = pd.DataFrame(data)
but it's not coming properly as many nested lists. can anyone advise?
from pandas.io.json import json_normalize
# load json
json = {'id': 255719,
'display_name': 'Off Broadway Apartments',
'access_right': {'status': 'OWNED', 'api_enabled': True},
'creation_time': '2021-04-26T15:53:29+00:00',
'status': {'value': 'OFFLINE', 'last_change': '2021-07-10T17:26:50+00:00'},
'licence': {'id': 74213,
'code': '23AF-15A8-0514-2E4B-04DE-5C19-A574-B20B',
'bound_mac_address': '00:11:32:C2:FE:6A',
'activation_time': '2021-04-26T15:53:29+00:00',
'type': 'SUBSCRIPTION'},
'timezone': 'America/Chicago',
'version': {'agent': '3.7.0-b001', 'package': '2.5.1-0022'},
'location': {'latitude': '41.4126', 'longitude': '-99.6345'}}
Create a fuction to flat nested jsons:
def flatten_json(y):
out = {}
def flatten(x, name=''):
if type(x) is dict:
for a in x:
flatten(x[a], name + a + '_')
elif type(x) is list:
i = 0
for a in x:
flatten(a, name + str(i) + '_')
i += 1
else:
out[name[:-1]] = x
flatten(y)
return out
You can now use that function on your original json file:
flat = flatten_json(json)
df = json_normalize(flat)
Results:
id display_name ... location_latitude location_longitude
0 255719 Off Broadway Apartments ... 41.4126 -99.6345
I have a complex situation which I hope to solve and which might profit us all. I collected data from my API, added a pagination and inserted the complete data package in a tuple named q1 and finally I have made a dictionary named dict_1of that tuple which looks like this:
dict_1 = {100: {'ID': 100, 'DKSTGFase': None, 'DK': False, 'KM': None,
'Country: {'Name': GE', 'City': {'Name': 'Berlin'}},
'Type': {'Name': '219'}, 'DKObject': {'Name': '8555', 'Object': {'Name': 'Car'}},
'Order': {'OrderId': 101, 'CreatedOn': '2018-07-06T16:54:36.783+02:00',
'ModifiedOn': '2018-07-06T16:54:36.783+02:00',
'Name': Audi, 'Client': {‘1’ }}, 'DKComponent': {'Name': ‘John’}},
{200: {'ID': 200, 'DKSTGFase': None, 'DK': False, ' KM ': None,
'Country: {'Name': ES', 'City': {'Name': 'Madrid'}}, 'Type': {'Name': '220'},
'DKObject': {'Name': '8556', 'Object': {'Name': 'Car'}},
'Order': {'OrderId': 102, 'CreatedOn': '2018-07-06T16:54:36.783+02:00',
'ModifiedOn': '2018-07-06T16:54:36.783+02:00',
'Name': Mercedes, 'Client': {‘2’ }}, 'DKComponent': {'Name': ‘Sergio’}},
Please note that in the above dictionary I have just stated 2 records. The actual dictionary has 1400 records till it reaches ID 1500.
Now I want to 2 things:
I want to change some keys for all the records. key DK has to become DK1. Key Name in Country has to become Name1 and Name in Object has to become 'Name2'
The second thing I want is to make a dataFrame of the whole bunch of data. My expected outcome is:
This is my code:
q1 = response_2.json()
next_link = q1['#odata.nextLink']
q1 = [tuple(q1.values())]
while next_link:
new_response = requests.get(next_link, headers=headers, proxies=proxies)
new_data = new_response.json()
q1.append(tuple(new_data.values()))
next_link = new_data.get('#odata.nextLink', None)
dict_1 = {
record['ID']: record
for tup in q1
for record in tup[2]
}
#print(dict_1)
for x in dict_1.values():
x['DK1'] = x['DK']
x['Country']['Name1'] = x['Country']['Name']
x['Object']['Name2'] = x['Object']['Name']
df = pd.DataFrame(dict_1)
When i run this I receive the following Error:
Traceback (most recent call last):
File "c:\data\FF\Desktop\Python\PythongMySQL\Talky.py", line 57, in <module>
x['Country']['Name1'] = x['Country']['Name']
TypeError: 'NoneType' object is not subscriptable
working code
lists=[]
alldict=[{100: {'ID': 100, 'DKSTGFase': None, 'DK': False, 'KM': None,
'Country': {'Name': 'GE', 'City': {'Name': 'Berlin'}},
'Type': {'Name': '219'}, 'DKObject': {'Name': '8555', 'Object': {'Name': 'Car'}},
'Order': {'OrderId': 101, 'CreatedOn': '2018-07-06T16:54:36.783+02:00',
'ModifiedOn': '2018-07-06T16:54:36.783+02:00',
'Name': 'Audi', 'Client': {'1' }}, 'DKComponent': {'Name': 'John'}}}]
for eachdict in alldict:
key=list(eachdict.keys())[0]
eachdict[key]['DK1']=eachdict[key]['DK']
del eachdict[key]['DK']
eachdict[key]['Country']['Name1']=eachdict[key]['Country']['Name']
del eachdict[key]['Country']['Name']
eachdict[key]['DKObject']['Object']['Name2']=eachdict[key]['DKObject']['Object']['Name']
del eachdict[key]['DKObject']['Object']['Name']
lists.append([key, eachdict[key]['DK1'], eachdict[key]['KM'], eachdict[key]['Country']['Name1'],
eachdict[key]['Country']['City']['Name'], eachdict[key]['DKObject']['Object']['Name2'], eachdict[key]['Order']['Client']])
pd.DataFrame(lists, columns=[<columnNamesHere>])
Output:
{100: {'ID': 100,
'DKSTGFase': None,
'KM': None,
'Country': {'City': {'Name': 'Berlin'}, 'Name1': 'GE'},
'Type': {'Name': '219'},
'DKObject': {'Name': '8555', 'Object': {'Name2': 'Car'}},
'Order': {'OrderId': 101,
'CreatedOn': '2018-07-06T16:54:36.783+02:00',
'ModifiedOn': '2018-07-06T16:54:36.783+02:00',
'Name': 'Audi',
'Client': {'1'}},
'DKComponent': {'Name': 'John'},
'DK1': False}}