Create hierarchical json dump from list of dictionary in python - python

The table:
categories = Table("categories", metadata,
Column("id", Integer, primary_key=True),
Column("name", String),
Column("parent_id", Integer, ForeignKey("categories.id"),
CheckConstraint('id!=parent_id'), nullable=True),
)
A category can have many children, but only 1 parent. I have got the list of dictionary values as follows using CTE: eg. For id :14, parent is 13 and traversed from parent 8->10->12->13->14 where parent 8 has no parent id.
[
{
"id": 14,
"name": "cat14",
"parent_id": 13,
"path_info": [
8,
10,
12,
13,
14
]
},
{
"id": 15,
"name": "cat15",
"parent_id": 13,
"path_info": [
8,
10,
12,
13,
15
]
}
]
I would like to get the attributes of the parent also embedded as subcategories in the list as:
{
"id": 14,
"name": "cat14",
"parent_id": 13,
"subcats": [
{
"id: 8",
"name": "cat8",
"parent_id":null
},
{
"id: 10",
"name": "cat10",
"parent_id":8
},
{
"id: 12",
"name": "cat12",
"parent_id":10
},
and similarly for ids 13 and 14.....
]
},
{
"id": 15,
"name": "cat15",
"parent_id": 13,
"subcats": [
{
"id: 8",
"name": "cat8",
"parent_id":null
},
{
"id: 10",
"name": "cat10",
"parent_id":8
},
{
"id: 12",
"name": "cat12",
"parent_id":10
},
and similarly for ids 13, 14, 15.....
]
}
]
Notice that 'path_info' has been deleted from the dictionary and each id has been displayed with its details. I want json dumps with the above indented format. How to go about? Using flask 0.10, python 2.7

There is a tolerable way to do this with a few list/dict comprehensions.
lst = [{"id": 14, "name": "cat14", "parent_id": 13, "path_info": [8, 10, 12, 13, 14]}, {"id": 15, "name": "cat15", "parent_id": 13, "path_info": [8, 10, 12, 13, 15]}]
master_dct = { d['id'] : d for d in lst}
for d in lst:
d['subcats'] = [{field : master_dct[i][field] for field in ['id', 'name', 'parent_id']} \
for i in d['path_info'] if i in master_dct]
import json
with open('out.json', 'w') as f:
json.dump(lst, f)

You can perform it in python code:
Given we have a json object. I've slightly modified it - added absent nodes and wrap into an object as it is required by the specification:
{
"array": [
{
"id": 14,
"name": "cat14",
"parent_id": 13,
"path_info": [
8,
10,
12,
13,
14
]
},
{
"id": 15,
"name": "cat15",
"parent_id": 13,
"path_info": [
8,
10,
12,
13,
15
]
},
{
"id": 13,
"name": "cat13",
"parent_id": 12,
"path_info": [
8,
10,
12,
13
]
},
{
"id": 12,
"name": "cat12",
"parent_id": 10,
"path_info": [
8,
10,
12
]
},
{
"id": 10,
"name": "cat10",
"parent_id": 8,
"path_info": [
8,
10
]
},
{
"id": 8,
"name": "cat8",
"parent_id": null,
"path_info": [
8
]
}
]
}
Then you may use following code:
# load data above from file
j=json.load(open('json_file_above.json')) #
# the array with real data we need
a=j['array']
# auxiliary dict which have node identificators as keys and nodes as values
d={x['id']:x for x in a}
# here the magic begins :)
for x in a:
# add new key with list to each element
x['subcats'] = [
# compose dict element for subcats
dict(id=i, name=d[i]['name'], parent_id=d[i]['parent_id'])
for
i
in [
# we take path_info id list and
# cut off the first element - itself
y for y in x['path_info'][1:]
]
]
del x['path_info']
To be sure you are getting the thing you need:
>>> print(json.dumps(a, indent=True))
[
{
"name": "cat14",
"subcats": [
{
"name": "cat10",
"id": 10,
"parent_id": 8
},
{
"name": "cat12",
"id": 12,
"parent_id": 10
},
{
"name": "cat13",
"id": 13,
"parent_id": 12
},
{
"name": "cat14",
"id": 14,
"parent_id": 13
}
],
"id": 14,
"parent_id": 13
},
{
"name": "cat15",
"subcats": [
{
"name": "cat10",
"id": 10,
"parent_id": 8
},
{
"name": "cat12",
"id": 12,
"parent_id": 10
},
{
"name": "cat13",
"id": 13,
"parent_id": 12
},
{
"name": "cat15",
"id": 15,
"parent_id": 13
}
],
"id": 15,
"parent_id": 13
},
{
"name": "cat13",
"subcats": [
{
"name": "cat10",
"id": 10,
"parent_id": 8
},
{
"name": "cat12",
"id": 12,
"parent_id": 10
},
{
"name": "cat13",
"id": 13,
"parent_id": 12
}
],
"id": 13,
"parent_id": 12
},
{
"name": "cat12",
"subcats": [
{
"name": "cat10",
"id": 10,
"parent_id": 8
},
{
"name": "cat12",
"id": 12,
"parent_id": 10
}
],
"id": 12,
"parent_id": 10
},
{
"name": "cat10",
"subcats": [
{
"name": "cat10",
"id": 10,
"parent_id": 8
}
],
"id": 10,
"parent_id": 8
},
{
"name": "cat8",
"subcats": [],
"id": 8,
"parent_id": null
}
]
>>>

The pythonic code for this: Simple and straightforward
import json
categories = [] #input
def transform(category, child_node_id):
category['subcats'].append({
'id': child_node_id,
'name': 'cat%s' % child_node_id,
'parent_id': category['id']
})
for category in categories:
category['subcats'] = []
[transform(category, child_node_id) for child_node_id in category['path_info']]
category.pop('path_info', None)
print(json.dumps(categories, indent=4))

Related

Join nested list to ID value

I retrieve data from my DB for a Python app and it comes in the following format (as a list, tbl):
[
{
"id": "rec2fiwnTQewTv9HC",
"createdTime": "2022-06-27T08:25:47.000Z",
"fields": {
"Num": 19,
"latitude": 31.101405,
"longitude": 36.391831,
"State": 2,
"Label": "xyz",
"Red": 0,
"Green": 255,
"Blue": 0
}
},
{
"id": "rec4y7vhgZVDHrhrQ",
"createdTime": "2022-06-27T08:25:47.000Z",
"fields": {
"Num": 30,
"latitude": 31.101405,
"longitude": 36.391831,
"State": 2,
"Label": "abc",
"Red": 0,
"Green": 255,
"Blue": 0
}
}
]
I can retrieve the values in the fields nested list by doing this:
pd.DataFrame([d['fields'] for d in tbl])
I would like to add the id field to each row of the dataframe but I can't figure out how to do this.
Try:
data = [
{
"id": "rec2fiwnTQewTv9HC",
"createdTime": "2022-06-27T08:25:47.000Z",
"fields": {
"Num": 19,
"latitude": 31.101405,
"longitude": 36.391831,
"State": 2,
"Label": "xyz",
"Red": 0,
"Green": 255,
"Blue": 0,
},
},
{
"id": "rec4y7vhgZVDHrhrQ",
"createdTime": "2022-06-27T08:25:47.000Z",
"fields": {
"Num": 30,
"latitude": 31.101405,
"longitude": 36.391831,
"State": 2,
"Label": "abc",
"Red": 0,
"Green": 255,
"Blue": 0,
},
},
]
df = pd.DataFrame([{"id": d["id"], **d["fields"]} for d in data])
print(df)
Prints:
id Num latitude longitude State Label Red Green Blue
0 rec2fiwnTQewTv9HC 19 31.101405 36.391831 2 xyz 0 255 0
1 rec4y7vhgZVDHrhrQ 30 31.101405 36.391831 2 abc 0 255 0

How to return the last 5 values that exist before the last and penultimate in a JSON?

At the moment for me to be able to do this, I get the last 7 values, then I create a list with the first 5:
last_seven = response['graphPoints'][-7:]
only_five = [last_seven[0],last_seven[1],last_seven[2],last_seven[3],last_seven[4]]
As I'm still learning, I had to do it in this archaic way because I couldn't understand how I can get these 5 directly from ([-7:]) JSON minus the last and penultimate one, I would like some help to do it correctly.
My expected colect on this example are:
{
"minute": 33,
"value": 42
},
{
"minute": 34,
"value": 28
},
{
"minute": 35,
"value": 16
},
{
"minute": 36,
"value": -30
},
{
"minute": 37,
"value": -22
}
To make it easier, I leave here an example JSON in case you want to test it yourself:
{
"graphPoints": [
{
"minute": 1,
"value": 0
},
{
"minute": 2,
"value": 0
},
{
"minute": 3,
"value": 5
},
{
"minute": 4,
"value": 8
},
{
"minute": 5,
"value": 25
},
{
"minute": 6,
"value": 65
},
{
"minute": 7,
"value": 39
},
{
"minute": 8,
"value": 23
},
{
"minute": 9,
"value": -25
},
{
"minute": 10,
"value": -9
},
{
"minute": 11,
"value": -39
},
{
"minute": 12,
"value": -24
},
{
"minute": 13,
"value": -14
},
{
"minute": 14,
"value": -7
},
{
"minute": 15,
"value": 60
},
{
"minute": 16,
"value": 36
},
{
"minute": 17,
"value": 22
},
{
"minute": 18,
"value": 8
},
{
"minute": 19,
"value": 10
},
{
"minute": 20,
"value": 7
},
{
"minute": 21,
"value": 4
},
{
"minute": 22,
"value": 8
},
{
"minute": 23,
"value": 5
},
{
"minute": 24,
"value": 3
},
{
"minute": 25,
"value": 2
},
{
"minute": 26,
"value": 61
},
{
"minute": 27,
"value": 41
},
{
"minute": 28,
"value": 35
},
{
"minute": 29,
"value": 51
},
{
"minute": 30,
"value": 40
},
{
"minute": 31,
"value": 20
},
{
"minute": 32,
"value": 72
},
{
"minute": 33,
"value": 42
},
{
"minute": 34,
"value": 28
},
{
"minute": 35,
"value": 16
},
{
"minute": 36,
"value": -30
},
{
"minute": 37,
"value": -22
},
{
"minute": 38,
"value": -43
},
{
"minute": 39,
"value": -26
}
],
"periodTime": null,
"periodCount": 2
}
You want to get the first 5 values of the last seven values.
This can be done in two ways:
response['graphPoints'][-7:][5:]
Explaining the code above: First you get the last 7 values as a list. Then by typing [5:] you get the first 5 values of the result.
Better way
BUT there is a better way. You can do this in one indexing:
response['graphPoints'][-7:-2]
This way you tell python to give you the values that their indexes are -7, -6, -5, -4 and -3. Note that -2 is not in the range bacause the number after : is not in the results so it goes to before index -2 that is index -3.
I tested all these ways on your data and it works perfectly.

DataFrame of DataFrames Python

I have this Json File,
{
"timezone": "UTC",
"serverTime": 1565246363776,
"rateLimits": […… ],
"exchangeFilters": [….. ],
"symbols": [
{
"symbol": "ETHBTC",
"status": "TRADING",
"baseAsset": "ETH",
"baseAssetPrecision": 8,
"quoteAsset": "BTC",
"quotePrecision": 8,
"quoteAssetPrecision": 8,
"baseCommissionPrecision": 8,
"quoteCommissionPrecision": 8,
"filters": [
{"filterType": "PRICE_FILTER",
"minPrice": "0.00000100",
"maxPrice": "100000.00000000",
"tickSize": "0.00000100"},
{"filterType": "PERCENT_PRICE",
"multiplierUp": "1.3000",
"multiplierDown": "0.7000",
"avgPriceMins": 5}
{"filterType": "LOT_SIZE",
"minQty": "0.00100000",
"maxQty": "100000.00000000",
"stepSize": “0.00100000"}]
}
I have transformed 'symbols' from the Json File into a DataFrame:
with open('exchangeInfo.json', 'r') as fp:
exchangeInfo = json.load(fp)
symbolsDF = pd.DataFrame(client.get_exchange_info(['symbols'])
I would like to transform the column 'filters' from 'symbols' to columns like this
"symbols": [
{
"symbol": "ETHBTC",
"status": "TRADING",
"baseAsset": "ETH",
"baseAssetPrecision": 8,
"quoteAsset": "BTC",
"quotePrecision": 8,
"quoteAssetPrecision": 8,
"baseCommissionPrecision": 8,
"quoteCommissionPrecision": 8,
"minPrice": "0.00000100",
"maxPrice": "100000.00000000",
"tickSize": "0.00000100",
"minQty": "0.00100000",
"maxQty": "100000.00000000",
"stepSize": "0.00100000"}]
}
So my final DataFrame will consist on symbols, and the columns will be
["symbol", "status", "baseAsset", "baseAssetPrecision", "quoteAsset","quotePrecision", "quoteAssetPrecision", "baseCommissionPrecision", "quoteCommissionPrecision", "minPrice", "maxPrice", "tickSize", "minQty", "maxQty", "stepSize"]
Thank You
Try:
exchangeInfo = {
"timezone": "UTC",
"serverTime": 1565246363776,
"rateLimits": [],
"exchangeFilters": [],
"symbols": [
{
"symbol": "ETHBTC",
"status": "TRADING",
"baseAsset": "ETH",
"baseAssetPrecision": 8,
"quoteAsset": "BTC",
"quotePrecision": 8,
"quoteAssetPrecision": 8,
"baseCommissionPrecision": 8,
"quoteCommissionPrecision": 8,
"filters": [
{
"filterType": "PRICE_FILTER",
"minPrice": "0.00000100",
"maxPrice": "100000.00000000",
"tickSize": "0.00000100",
},
{
"filterType": "PERCENT_PRICE",
"multiplierUp": "1.3000",
"multiplierDown": "0.7000",
"avgPriceMins": 5,
},
{
"filterType": "LOT_SIZE",
"minQty": "0.00100000",
"maxQty": "100000.00000000",
"stepSize": "0.00100000",
},
],
},
],
}
df = pd.json_normalize(exchangeInfo["symbols"])
df = pd.concat(
[
df,
df.pop("filters")
.apply(lambda x: dict(i for d in x for i in d.items()))
.apply(pd.Series),
],
axis=1,
).drop(columns="filterType")
print(df)
Prints:
symbol status baseAsset baseAssetPrecision quoteAsset quotePrecision quoteAssetPrecision baseCommissionPrecision quoteCommissionPrecision minPrice maxPrice tickSize multiplierUp multiplierDown avgPriceMins minQty maxQty stepSize
0 ETHBTC TRADING ETH 8 BTC 8 8 8 8 0.00000100 100000.00000000 0.00000100 1.3000 0.7000 5 0.00100000 100000.00000000 0.00100000

csv to json with column data that needs to be grouped

I have a CSV file in a format similar to this
order_id, customer_name, item_1_id, item_1_quantity, Item_2_id, Item_2_quantity, Item_3_id, Item_3_quantity
1, John, 4, 1, 24, 4, 16, 1
2, Paul, 8, 3, 41, 1, 33, 1
3, Andrew, 1, 1, 34, 4, 8, 2
I want to export to json, currently I am doing this.
df = pd.read_csv('simple.csv')
print ( df.to_json(orient = 'records') )
And the output is
[
{
"Item_2_id": 24,
"Item_2_quantity": 4,
"Item_3_id": 16,
"Item_3_quantity": 1,
"customer_name": "John",
"item_1_id": 4,
"item_1_quantity": 1,
"order_id": 1
},
......
However, I would like the output to be
[
{
"customer_name": "John",
"order_id": 1,
"items": [
{ "id": 4, "quantity": 1 },
{ "id": 24, "quantity": 4 },
{ "id": 16, "quantity": 1 },
]
},
......
Any suggestions on a good way to do this?
In this particular project, there will not be more than 5 times per order
Try the following:
import pandas as pd
import json
output_lst = []
##specify the first row as header
df = pd.read_csv('simple.csv', header=0)
##iterate through all the rows
for index, row in df.iterrows():
dict = {}
items_lst = []
## column_list is a list of column headers
column_list = df.columns.values
for i, col_name in enumerate(column_list):
## for the first 2 columns simply copy the value into the dictionary
if i<2:
element = row[col_name]
if isinstance(element, str):
## strip if it is a string type value
element = element.strip()
dict[col_name] = element
elif "_id" in col_name:
## i+1 is used assuming that the item_quantity comes right after the corresponding item_id for each item
item_dict = {"id":row[col_name], "quantity":row[column_list[i+1]]}
items_lst.append(item_dict)
dict["items"] = items_lst
output_lst.append(dict)
print json.dumps(output_lst)
If you run the above file with the sample.csv described in the question then you get the following output:
[
{
"order_id": 1,
"items": [
{
"id": 4,
"quantity": 1
},
{
"id": 24,
"quantity": 4
},
{
"id": 16,
"quantity": 1
}
],
" customer_name": "John"
},
{
"order_id": 2,
"items": [
{
"id": 8,
"quantity": 3
},
{
"id": 41,
"quantity": 1
},
{
"id": 33,
"quantity": 1
}
],
" customer_name": "Paul"
},
{
"order_id": 3,
"items": [
{
"id": 1,
"quantity": 1
},
{
"id": 34,
"quantity": 4
},
{
"id": 8,
"quantity": 2
}
],
" customer_name": "Andrew"
}
]
Source DF:
In [168]: df
Out[168]:
order_id customer_name item_1_id item_1_quantity Item_2_id Item_2_quantity Item_3_id Item_3_quantity
0 1 John 4 1 24 4 16 1
1 2 Paul 8 3 41 1 33 1
2 3 Andrew 1 1 34 4 8 2
Solution:
In [169]: %paste
import re
x = df[['order_id','customer_name']].copy()
x['id'] = \
pd.Series(df.loc[:, df.columns.str.contains(r'item_.*?_id',
flags=re.I)].values.tolist(),
index=df.index)
x['quantity'] = \
pd.Series(df.loc[:, df.columns.str.contains(r'item_.*?_quantity',
flags=re.I)].values.tolist(),
index=df.index)
x.to_json(orient='records')
## -- End pasted text --
Out[169]: '[{"order_id":1,"customer_name":"John","id":[4,24,16],"quantity":[1,4,1]},{"order_id":2,"customer_name":"Paul","id":[8,41,33],"qua
ntity":[3,1,1]},{"order_id":3,"customer_name":"Andrew","id":[1,34,8],"quantity":[1,4,2]}]'
Intermediate helper DF:
In [82]: x
Out[82]:
order_id customer_name id quantity
0 1 John [4, 24, 16] [1, 4, 1]
1 2 Paul [8, 41, 33] [3, 1, 1]
2 3 Andrew [1, 34, 8] [1, 4, 2]
j = df.set_index(['order_id','customer_name']) \
.groupby(lambda x: x.split('_')[-1], axis=1) \
.agg(lambda x: x.values.tolist()) \
.reset_index() \
.to_json(orient='records')
import json
Beatufied result:
In [122]: print(json.dumps(json.loads(j), indent=2))
[
{
"order_id": 1,
"customer_name": "John",
"id": [
4,
24,
16
],
"quantity": [
1,
4,
1
]
},
{
"order_id": 2,
"customer_name": "Paul",
"id": [
8,
41,
33
],
"quantity": [
3,
1,
1
]
},
{
"order_id": 3,
"customer_name": "Andrew",
"id": [
1,
34,
8
],
"quantity": [
1,
4,
2
]
}
]

Take the first n dictionaries of a specific key in a sorted list

I writing a script which calculates the distance in miles between an order's shipping address and each store location for a specific chain of stores. So far, I have created a sorted list of dictionaries (sorted by order_id and then distance). It looks like this:
[
{
"order_id": 1,
"distance": 10,
"storeID": 1112
},
{
"order_id": 1,
"distance": 20,
"storeID": 1116
},
{
"order_id": 1,
"distance": 30,
"storeID": 1134
},
{
"order_id": 1,
"distance": 40,
"storeID": 1133
},
{
"order_id": 2,
"distance": 6,
"storeID": 1112
},
{
"order_id": 2,
"distance": 12,
"storeID": 1116
},
{
"order_id": 2,
"distance": 18,
"storeID": 1134
},
{
"order_id": 2,
"distance": 24,
"storeID": 1133
}
]
From here, I would like to find the two closest stores for each order_id, as well as their distances.
What I'd ultimately want to end up with is a list that looks like this:
[
{
"order_id": 1,
"closet_store_distance": 10,
"closest_store_id": 1112,
"second_closet_store_distance": 20,
"second_closest_store_id": 1116
},
{
"order_id": 2,
"closet_store_distance": 6,
"closest_store_id": 1112,
"second_closet_store_distance": 12,
"second_closest_store_id": 1116
}
]
I am unsure of how to loop through each order_id in this list and select the two closest stores. Any help is appreciated.
Try something like this, I made the assumption that the initial data was in a file called sample.txt.
import json
from operator import itemgetter
def make_order(stores, id):
return {
"order_id": id,
"closet_store_distance": stores[0][1],
"closest_store_id": stores[0][0],
"second_closet_store_distance": stores[1][1],
"second_closest_store_id": stores[1][0]
}
def main():
with open('sample.txt', 'r') as data_file:
data = json.loads(data_file.read())
id1 = {}
id2 = {}
for i in data:
if i["order_id"] == 1:
id1[i["storeID"]] = i["distance"]
else:
id2[i["storeID"]] = i["distance"]
top1 = sorted(id1.items(), key=itemgetter(1))
top2 = sorted(id2.items(), key=itemgetter(1))
with open('results.json', 'w') as result_file:
order1 = make_order(top1, 1)
order2 = make_order(top2, 2)
json.dump([order1, order2], result_file, indent=3, separators=(',', ': '))
if __name__ == '__main__':
main()
The resulting file looks like:
[
{
"second_closest_store_id": 1116,
"closet_store_distance": 10,
"closest_store_id": 1112,
"order_id": 1,
"second_closet_store_distance": 20
},
{
"second_closest_store_id": 1116,
"closet_store_distance": 6,
"closest_store_id": 1112,
"order_id": 2,
"second_closet_store_distance": 12
}
]
A nice readable answer (but using one of my free libraries.):
from PLOD import PLOD
order_store_list = [
{
"order_id": 1,
"distance": 10,
"storeID": 1112
},
{
"order_id": 1,
"distance": 20,
"storeID": 1116
},
{
"order_id": 1,
"distance": 30,
"storeID": 1134
},
{
"order_id": 1,
"distance": 40,
"storeID": 1133
},
{
"order_id": 2,
"distance": 6,
"storeID": 1112
},
{
"order_id": 2,
"distance": 12,
"storeID": 1116
},
{
"order_id": 2,
"distance": 18,
"storeID": 1134
},
{
"order_id": 2,
"distance": 24,
"storeID": 1133
}
]
#
# first, get the order_ids (place in a dictionary to ensure uniqueness)
#
order_id_keys = {}
for entry in order_store_list:
order_id_keys[entry["order_id"]] = True
#
# next, get the two closest stores per order_id
#
closest_stores = []
for order_id in order_id_keys:
top_two = PLOD(order_store_list).eq("order_id", order_id).sort("distance").returnList(limit=2)
closest_stores.append({
"order_id": order_id,
"closet_store_distance": top_two[0]["distance"],
"closest_store_id": top_two[0]["storeID"],
"second_closet_store_distance": top_two[1]["distance"],
"second_closest_store_id": top_two[1]["storeID"]
})
#
# sort by order_id again (if that is important)
#
closest_stores = PLOD(closest_stores).sort("order_id").returnList()
This example assumes the production order_store_list will fit in memory. If you are using a larger dataset, I strongly recommend using a database and python library for that database.
My PLOD library is free and open source (MIT), but requires Python 2.7. I'm about two weeks away from a Python 3.5 release. See https://pypi.python.org/pypi/PLOD/0.1.7

Categories

Resources