On the multidimensional JSON below i had extracted the "width" and "height" values as i want to append on an empty table/array and use them later for calculations. On the following JSON.
[
{
"frame_id":1,
"filename":"bake/IMG_20210930_090024.jpg",
"objects": [
{"class_id":0, "name":"brick", "relative_coordinates":{"left_x":1279, "top_y": 991, "width": 922, "height":1164},"relevant":true}
]
},
{
"frame_id":2,
"filename":"bake/IMG_20210930_090017.jpg",
"objects": [
{"class_id":1, "name":"limestone", "relative_coordinates":{"left_x":1672, "top_y":1536, "width": 651, "height": 623},"relevant":true}
]
}
]
My code and result:
with open('/home/pan/output/result.json') as json_data:
data = json.load(json_data)
for item in data:
for row in item.get('objects', []):
print(row['class_id'], row['relative_coordinates']['width'],row['relative_coordinates']['height'])
0 922 1164
1 651 623
My main question would be that i would like to show the results only for "class_id":0 for width and height.
Also whats the best way to append those values through the console or an empty array[] and make calculations later?
You can try this:
value_list = []
with open('result.json') as json_data:
data = json.load(json_data)
for item in data:
for row in item.get('objects', []):
if row['class_id'] == 0:
print(row['class_id'], row['relative_coordinates']['width'], row['relative_coordinates']['height'])
value_list.append(row['relative_coordinates']['width'])
value_list.append(row['relative_coordinates']['height'])
print(value_list)
Output:
0 922 1164
[922, 1164]
You get to your data by slogging through the dicts and lists one by one.
width = -1
height = -1
for item in data:
if item['objects'][0]['class_id'] == 0:
width = item['objects'][0]['relative_coordinates']['width']
height = item['objects'][0]['relative_coordinates']['height']
break
Just adding a if condition in your code will give the expected results as below:
data = None
with open('/home/pan/output/result.json') as json_data:
data = json.load(json_data)
co_ordinates = []
for item in data:
for row in item.get('objects', []):
if row['class_id'] == 0:
co_ordinates.append(row['relative_coordinates']['width'])
co_ordinates.append(row['relative_coordinates']['height'])
break
print (co_ordinates)
Output:
[922, 1164]
Related
I put together a python script to clean CSV files. The reformatting works, but the data rows the writer writes to the new CSV file are wrong. I am constructing a dictionary of all rows of data before writing using writer.writerows(). When I check the dictionary using print statements, the correct data is appending to the list. However, after appending, the incorrect values are in the dictionary.
import csv
data = []
with open(r'C:\\Data\\input.csv', 'r') as csv_file:
csv_reader = csv.reader(csv_file, delimiter=',')
line_count = 0
street_fields = [] # Store new field names in list
street_fields.append("startdate")
street_fields.append("starttime")
street_fields.append("sitecode")
street_fields.append("recordtime")
street_fields.append("direction")
street_fields.append("turnright")
street_fields.append("wentthrough")
street_fields.append("turnleft")
street_fields.append("pedestrians")
for row in csv_reader: # Read input rows
if line_count == 0:
startdate = row[1] # Get Start Date from B1
line_count += 1
elif line_count == 1:
starttime = row[1] # Get Start Time from B2
line_count += 1
elif line_count == 2:
sitecode = str(row[1]) # Get Site code from B3
line_count += 1
elif line_count == 3:
street_count = len(row) - 3 # Determine number of streets in report
streetnames = []
i = 1
while i < street_count:
streetnames.append(row[i]) # Add streets to list
i += 4
line_count += 1
elif line_count > 4:
street_values = {} # Create dictionary to store new row values
n = 1
for street in streetnames:
turnright = 0 + n
wentthrough = 1 + n
turnleft = 2 + n
pedestrians = 3 + n
street_values["startdate"] = startdate
street_values["starttime"] = starttime
street_values["sitecode"] = sitecode
street_values["recordtime"] = row[0]
street_values["direction"] = street
street_values["turnright"] = int(row[turnright])
street_values["wentthrough"] = int(row[wentthrough])
street_values["turnleft"] = int(row[turnleft])
street_values["pedestrians"] = int(row[pedestrians])
data.append(street_values) # Append row dictionary to list
#print(street_values) ### UNCOMMENT TO SEE CORRECT ROW DATA ###
#print(data) ### UNCOMMENT TO SEE INCORRECT ROW DATA ###
n += 4
line_count += 1
else:
line_count += 1
with open(r'C:\\Data\\output.csv', 'w', newline='', encoding="utf-8") as w_scv_file:
writer = csv.DictWriter(w_scv_file,fieldnames=street_fields)
writer.writerow(dict((fn,fn) for fn in street_fields)) # Write headers to new CSV
writer.writerows(data) # Write data from list of dictionaries
An example of the list of dictionaries created (JSON):
[
{
"startdate":"11/9/2017",
"starttime":"7:00",
"sitecode":"012345",
"recordtime":"7:00",
"direction":"Cloud Dr. From North",
"turnright":0,
"wentthrough":2,
"turnleft":11,
"pedestrians":0
},
{
"startdate":"11/9/2017",
"starttime":"7:00",
"sitecode":"012345",
"recordtime":"7:00",
"direction":"Florida Blvd. From East",
"turnright":4,
"wentthrough":433,
"turnleft":15,
"pedestrians":0
},
{
"startdate":"11/9/2017",
"starttime":"7:00",
"sitecode":"012345",
"recordtime":"7:00",
"direction":"Cloud Dr. From South",
"turnright":15,
"wentthrough":4,
"turnleft":6,
"pedestrians":0
},
{
"startdate":"11/9/2017",
"starttime":"7:00",
"sitecode":"012345",
"recordtime":"7:00",
"direction":"Florida Blvd. From West",
"turnright":2,
"wentthrough":219,
"turnleft":2,
"pedestrians":0
},
{
"startdate":"11/9/2017",
"starttime":"7:00",
"sitecode":"012345",
"recordtime":"7:15",
"direction":"Cloud Dr. From North",
"turnright":1,
"wentthrough":3,
"turnleft":8,
"pedestrians":0
}
]
What actually writes to the CSV:
Note the Direction field and data rows are incorrect. For some reason when it loops through the streetnames list, the last street name and the corresponding row values persist for the individual record time.
Do I need to delete my variables before re-assigning them values?
It looks like you are appending the same dictionary to the list over and over.
In general, when appending a nuber of separate dictionaries to a list, I would use mylist.append(mydict.copy()), otherwise later on when you assign new values within a dictionary of the same name you are really just updating your old dictionary, including entries in your list that point to a dictionary of the same name (see mutable vs immutable objects in python).
In short: If you want the dictionary in the list to be a separate entity from the new one, create a deep copy using dict.copy() when appending it to the list.
I have the following code:
import json
data_sample = [{
"name":"John",
"age":30,
"cars":[ {
"temp":{
"sum":"20",
"for":12,
}
,
"id":30,
"element":[ {"model":"Taurus1", "doors":{"id":"1", "id2":101}}, {"model":"T1", "doors":{"id":"2", "id2":12}}, {"model":"As", "doors":{"id":"Mo", "id2":4}} ]
}, {
"temp":{
"sum":"10",
"for":12,
}
,
"id":31,
"element":[ {"model":"Taurus2", "doors":{"id":"2", "id2":102}}, {"model":"T2", "doors":{"id":"5", "id2":12}}, {"model":"Thing", "doors":{"id":"Fo", "id2":4}} ]
}, {
"temp":{
"sum":"20",
"for":10,
}
,
"id":32,
"element":[ {"model":"Taurus3", "doors":{"id":"3", "id2":103}}, {"model":"T3", "doors":{"id":"15", "id2":62}}, {"model":"By", "doors":{"id":"Log", "id2":4}} ]
} ]
}]
def flat_list(z):
x = []
for i, data_obj in enumerate(z):
if type(data_obj) is dict or type(data_obj) is list:
x.extend([flatten_data(data_obj)])
else:
x.extend([data_obj])
return x
def flatten_data(y):
out = {}
def flatten(x, name=''):
if type(x) is dict:
for a in x:
flatten(x[a], name + a + '_')
elif type(x) is list:
out[name[:-1]] = flat_list(x)
else:
out[name[:-1]] = x
flatten(y)
return out
def generatejson(response2):
# response 2 is [(first data set), (second data set)] convert it to dictionary {0: (first data set), 1: (second data set)}
sample_object = {i: data_response for i, data_response in enumerate(response2)}
flat = {k: flatten_data(v) for k, v in sample_object.items()}
return json.dumps(flat, sort_keys=True)
print generatejson(data_sample)
This code takes data from the following format:
[(first data set), (second data set)]
and begin to look for nesting dicts. If nesting dict is detected the code flats it to the parent level.
For example the code detects this:
doors is nested dict so it converts it to:
Note that it doesn't change the lists/arrays. They are not being flattened.
My issue:
On small amount of data the code works great however handling large amount of sets (1000+) the performance is very low... And sometimes even crash.
How can I improve and optimize the performance of this code?
The data_sample contains only 1 data set (I assume that's enough for checking).
I have a json file something of this sort
{"label" :
[
{"confidence": 1.0, "Arm_upVector": "(0.108535, 0.987291, 0.116085)", "bone_direction": ["(0, 0, 0)", "(0.354117, -0.111147, 0.928573)", "(0.144538, -0.00496286, 0.989487)", "(0.446597, -0.15941, 0.88042)", "(-0.145324, -0.134126, 0.980251)", "(0.0181324, 0.250534, 0.967938)", "(0.0234257, 0.321893, 0.946486)", "(0.0270345, 0.370523, 0.92843)", "(-0.278899, -0.118777, 0.952947)", "(-0.233781, 0.223357, 0.946287)", "(-0.202379, 0.307555, 0.92976)", "(-0.179014, 0.365886, 0.913281)", "(-0.419468, -0.0960966, 0.902669)", "(-0.311356, 0.246008, 0.917898)", "(-0.270254, 0.328053, 0.905176)", "(-0.239766, 0.384412, 0.891482)", "(-0.545443, -0.112047, 0.830625)", "(-0.571996, 0.254741, 0.779697)", "(-0.541193, 0.297035, 0.78669)", "(-0.517904, 0.327198, 0.79039)"], "handtype": "Right hand", "hand": 1, "finger": 5, "FrameId": 132251}
]
}
I am trying to match the handtype present in the json file with the handtype in my dictionary.
my dictionary is as follows:
data1={
'FrameId':frame.id,
'hand' : len(frame.hands),
'handtype': handType,
'Arm_upVector': str(basis.y_basis),
'confidence': confidence,
'finger': len(frame.fingers),
'bone_direction' : list1
# 'pinch_strength': pinch,
# 'grab_strength' : strength,
# 'vector_direction' : str(fingerDirection)
}
if confidence==1:
with open('data.json') as f:
s=json.load(f)
for row in s['label']:
if data1['handtype'] == s['handtype']:
print "match found"
I am trying to do something of this sort. Please help
You need to compare row['handtype'] with data1['handtype'] like following:
if confidence==1:
with open('data.json') as f:
s=json.load(f)
for row in s['label']:
if data1['handtype'] == row['handtype']:
print "match found"
Search for a value and get the parent dictionary names (keys):
Dictionary = {dict1:{
'part1': {
'.wbxml': 'application/vnd.wap.wbxml',
'.rl': 'application/resource-lists+xml',
},
'part2':
{'.wsdl': 'application/wsdl+xml',
'.rs': 'application/rls-services+xml',
'.xop': 'application/xop+xml',
'.svg': 'image/svg+xml',
},
'part3':{...}, ...
dict2:{
'part1': { '.dotx': 'application/vnd.openxmlformats-..'
'.zaz': 'application/vnd.zzazz.deck+xml',
'.xer': 'application/patch-ops-error+xml',}
},
'part2':{...},
'part3':{...},...
},...
In above dictionary I need to search values like: "image/svg+xml". Where, none of the values are repeated in the dictionary. How to search the "image/svg+xml"? so that it should return the parent keys in a dictionary { dict1:"part2" }.
Please note: Solutions should work unmodified for both Python 2.7 and Python 3.3.
Here's a simple recursive version:
def getpath(nested_dict, value, prepath=()):
for k, v in nested_dict.items():
path = prepath + (k,)
if v == value: # found value
return path
elif hasattr(v, 'items'): # v is a dict
p = getpath(v, value, path) # recursive call
if p is not None:
return p
Example:
print(getpath(dictionary, 'image/svg+xml'))
# -> ('dict1', 'part2', '.svg')
To yield multiple paths (Python 3 only solution):
def find_paths(nested_dict, value, prepath=()):
for k, v in nested_dict.items():
path = prepath + (k,)
if v == value: # found value
yield path
elif hasattr(v, 'items'): # v is a dict
yield from find_paths(v, value, path)
print(*find_paths(dictionary, 'image/svg+xml'))
This is an iterative traversal of your nested dicts that additionally keeps track of all the keys leading up to a particular point. Therefore as soon as you find the correct value inside your dicts, you also already have the keys needed to get to that value.
The code below will run as-is if you put it in a .py file. The find_mime_type(...) function returns the sequence of keys that will get you from the original dictionary to the value you want. The demo() function shows how to use it.
d = {'dict1':
{'part1':
{'.wbxml': 'application/vnd.wap.wbxml',
'.rl': 'application/resource-lists+xml'},
'part2':
{'.wsdl': 'application/wsdl+xml',
'.rs': 'application/rls-services+xml',
'.xop': 'application/xop+xml',
'.svg': 'image/svg+xml'}},
'dict2':
{'part1':
{'.dotx': 'application/vnd.openxmlformats-..',
'.zaz': 'application/vnd.zzazz.deck+xml',
'.xer': 'application/patch-ops-error+xml'}}}
def demo():
mime_type = 'image/svg+xml'
try:
key_chain = find_mime_type(d, mime_type)
except KeyError:
print ('Could not find this mime type: {0}'.format(mime_type))
exit()
print ('Found {0} mime type here: {1}'.format(mime_type, key_chain))
nested = d
for key in key_chain:
nested = nested[key]
print ('Confirmation lookup: {0}'.format(nested))
def find_mime_type(d, mime_type):
reverse_linked_q = list()
reverse_linked_q.append((list(), d))
while reverse_linked_q:
this_key_chain, this_v = reverse_linked_q.pop()
# finish search if found the mime type
if this_v == mime_type:
return this_key_chain
# not found. keep searching
# queue dicts for checking / ignore anything that's not a dict
try:
items = this_v.items()
except AttributeError:
continue # this was not a nested dict. ignore it
for k, v in items:
reverse_linked_q.append((this_key_chain + [k], v))
# if we haven't returned by this point, we've exhausted all the contents
raise KeyError
if __name__ == '__main__':
demo()
Output:
Found image/svg+xml mime type here: ['dict1', 'part2', '.svg']
Confirmation lookup: image/svg+xml
Here is a solution that works for a complex data structure of nested lists and dicts
import pprint
def search(d, search_pattern, prev_datapoint_path=''):
output = []
current_datapoint = d
current_datapoint_path = prev_datapoint_path
if type(current_datapoint) is dict:
for dkey in current_datapoint:
if search_pattern in str(dkey):
c = current_datapoint_path
c+="['"+dkey+"']"
output.append(c)
c = current_datapoint_path
c+="['"+dkey+"']"
for i in search(current_datapoint[dkey], search_pattern, c):
output.append(i)
elif type(current_datapoint) is list:
for i in range(0, len(current_datapoint)):
if search_pattern in str(i):
c = current_datapoint_path
c += "[" + str(i) + "]"
output.append(i)
c = current_datapoint_path
c+="["+ str(i) +"]"
for i in search(current_datapoint[i], search_pattern, c):
output.append(i)
elif search_pattern in str(current_datapoint):
c = current_datapoint_path
output.append(c)
output = filter(None, output)
return list(output)
if __name__ == "__main__":
d = {'dict1':
{'part1':
{'.wbxml': 'application/vnd.wap.wbxml',
'.rl': 'application/resource-lists+xml'},
'part2':
{'.wsdl': 'application/wsdl+xml',
'.rs': 'application/rls-services+xml',
'.xop': 'application/xop+xml',
'.svg': 'image/svg+xml'}},
'dict2':
{'part1':
{'.dotx': 'application/vnd.openxmlformats-..',
'.zaz': 'application/vnd.zzazz.deck+xml',
'.xer': 'application/patch-ops-error+xml'}}}
d2 = {
"items":
{
"item":
[
{
"id": "0001",
"type": "donut",
"name": "Cake",
"ppu": 0.55,
"batters":
{
"batter":
[
{"id": "1001", "type": "Regular"},
{"id": "1002", "type": "Chocolate"},
{"id": "1003", "type": "Blueberry"},
{"id": "1004", "type": "Devil's Food"}
]
},
"topping":
[
{"id": "5001", "type": "None"},
{"id": "5002", "type": "Glazed"},
{"id": "5005", "type": "Sugar"},
{"id": "5007", "type": "Powdered Sugar"},
{"id": "5006", "type": "Chocolate with Sprinkles"},
{"id": "5003", "type": "Chocolate"},
{"id": "5004", "type": "Maple"}
]
},
...
]
}
}
pprint.pprint(search(d,'svg+xml','d'))
>> ["d['dict1']['part2']['.svg']"]
pprint.pprint(search(d2,'500','d2'))
>> ["d2['items']['item'][0]['topping'][0]['id']",
"d2['items']['item'][0]['topping'][1]['id']",
"d2['items']['item'][0]['topping'][2]['id']",
"d2['items']['item'][0]['topping'][3]['id']",
"d2['items']['item'][0]['topping'][4]['id']",
"d2['items']['item'][0]['topping'][5]['id']",
"d2['items']['item'][0]['topping'][6]['id']"]
Here are two similar quick and dirty ways of doing this type of operation. The function find_parent_dict1 uses list comprehension but if you are uncomfortable with that then find_parent_dict2 uses the infamous nested for loops.
Dictionary = {'dict1':{'part1':{'.wbxml':'1','.rl':'2'},'part2':{'.wbdl':'3','.rs':'4'}},'dict2':{'part3':{'.wbxml':'5','.rl':'6'},'part4':{'.wbdl':'1','.rs':'10'}}}
value = '3'
def find_parent_dict1(Dictionary):
for key1 in Dictionary.keys():
item = {key1:key2 for key2 in Dictionary[key1].keys() if value in Dictionary[key1][key2].values()}
if len(item)>0:
return item
find_parent_dict1(Dictionary)
def find_parent_dict2(Dictionary):
for key1 in Dictionary.keys():
for key2 in Dictionary[key1].keys():
if value in Dictionary[key1][key2].values():
print {key1:key2}
find_parent_dict2(Dictionary)
Traverses a nested dict looking for a particular value. When success is achieved the full key path to the value is printed. I left all the comments and print statements for pedagogical purposes (this isn't production code!)
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Mon Jan 24 17:16:46 2022
#author: wellington
"""
class Tree(dict):
"""
allows autovivification as in Perl hashes
"""
def __missing__(self, key):
value = self[key] = type(self)()
return value
# tracking the key sequence when seeking the target
key_list = Tree()
# dict storing the target success result
success = Tree()
# example nested dict of dicts and lists
E = {
'AA':
{
'BB':
{'CC':
{
'DD':
{
'ZZ':'YY',
'WW':'PP'
},
'QQ':
{
'RR':'SS'
},
},
'II':
{
'JJ':'KK'
},
'LL':['MM', 'GG', 'TT']
}
}
}
def find_keys_from_value(data, target):
"""
recursive function -
given a value it returns all the keys in the path to that value within
the dict "data"
there are many paths and many false routes
at the end of a given path if success has not been achieved
the function discards keys to get back to the next possible path junction
"""
print(f"the number of keys in the local dict is {len(data)}")
key_counter = 0
for key in data:
key_counter += 1
# if target has been located stop iterating through keys
if success[target] == 1:
break
else:
# eliminate prior key from path that did not lead to success
if key_counter > 1:
k_list.pop()
# add key to new path
k_list.append(key)
print(f"printing k_list after append{k_list}")
# if target located set success[target] = 1 and exit
if key == target or data[key] == target:
key_list[target] = k_list
success[target] = 1
break
# if the target has not been located check to see if the value
# associated with the new key is a dict and if so return to the
# recursive function with the new dict as "data"
elif isinstance(data[key], dict):
print(f"\nvalue is dict\n {data[key]}")
find_keys_from_value(data[key], target)
# check to see if the value associated with the new key is a list
elif isinstance(data[key], list):
# print("\nv is list\n")
# search through the list
for i in data[key]:
# check to see if the list element is a dict
# and if so return to the recursive function with
# the new dict as "data
if isinstance(i, dict):
find_keys_from_value(i, target)
# check to see if each list element is the target
elif i == target:
print(f"list entry {i} is target")
success[target] = 1
key_list[target] = k_list
elif i != target:
print(f"list entry {i} is not target")
print(f"printing k_list before pop_b {k_list}")
print(f"popping off key_b {key}")
# so if value is not a key and not a list and not the target then
# discard the key from the key list
elif data[key] != target:
print(f"value {data[key]} is not target")
print(f"printing k_list before removing key_before {k_list}")
print(f"removing key_c {key}")
k_list.remove(key)
# select target values
values = ["PP", "SS", "KK", "TT"]
success = {}
for target in values:
print(f"\nlooking for target {target}")
success[target] = 0
k_list = []
find_keys_from_value(E, target)
print(f"\nprinting key_list for target {target}")
print(f"{key_list[target]}\n")
print("\n****************\n\n")
I have result set of rows in a database that all relate to each other through a parent child relationship
Each row is represented as follows objectid, id, parent, child, name, level so when I read an example from the database in my program it looks like this
Organization1
Component1
Department1
Sections1
Sections2
Department2
Sections3
Component2
Department3
Sections4
Sections5
Department4
Sections6
Where Organizations has many departments and departments has many Components and Components has many sections
my code thus far looks like this and that works but I need to put it into json format and the json format has to look like the below
for v in result:
level = v[5]
child = v[3]
parent = v[2]
if level == 0:
OrgDic['InstID'] = v[4]
OrgDic['Child'] = v[3]
OrgDic['Parent'] = v[2]
Organizations.append(InstDic)
OrgDic = {}
if level == 1:
ComponentsDic['CollegeID'] = v[4]
ComponentsDic['Child'] = v[3]
ComponentsDic['Parent'] = v[2]
Components.append(CollegeDic)
ComponentsDic = {}
if level == 2:
DepartmentDic['DepartmentID'] = v[4]
DepartmentDic['Child'] = v[3]
DepartmentDic['Parent'] = v[2]
Departments.append(DepartmentDic)
DepartmentDic = {}
if level == 3:
SectionDic['SubjectID'] = v[4]
SectionDic['Child'] = v[3]
SectionDic['Parent'] = v[2]
Sections.append(SubjectDic)
SectionDic = {}
for w in :
print w['Organization']
for x in Components:
if w['Child'] == x['Parent']:
print x['Components']
for y in Departments:
if x['Child'] == y['Parent']:
print y['Deparments']
for z in Sections:
if y['Child'] == z['Parent']:
print z['Sections']
JSON FORMAT
{
"Eff_Date": "08/02/2013",
"Tree":
[
{
"OrganizationID": "Organization1",
"Components":
[
{"ComponentID": "Component1",
"Departments":
[
{"DepartmentID": "Dep1",
"Sections":
[
{"SectionID": "Section1"},
{"SectionID": "Section2"}
]},
{"DepartmentID": "Dep2",
"Sections":
[
{"SectionID": "Section3"}
]}
]}
]
}
basically, all you have to do is dump the json after your first snippet (given that snippet does correctly create the tree you exposed, I did not thoroughly check it, but it looks coherent):
import json
print json.dumps({"Eff_Date": "08/02/2013", "Tree":Organizations})
and tada!
I was able to do it the following way
data[]
data.append([-1, 0 ,"name1", 0])
data.append([0,1, "name2", 1])
data.append([1, 2, "name3", 1])
data.append([2 ,3, "name4", 2])
data.append([2 ,4, "name5" ,2])
data.append([1 ,5, "name6", 2])
data.append([5, 6, "name7", 3])
data.append([5, 7, "name8",1])
data.append([5, 7, "name9",2])
def listToDict(input):
root = {}
lookup = {}
for parent_id, id, name, attr in input:
if parent_id == -1:
root['name'] = name;
lookup[id] = root
else:
node = {'name': name}
lookup[parent_id].setdefault('children', []).append(node)
lookup[id] = node
return root
result = listToDict(data)
print result
print json.dumps(result)
In my case my data was a result set from a database so I had to loop through it as follows
for v in result:
values = [v[2], v[3], v[4], v[5]]
pc.append(values)