Parsing Nested loops and converting to Dataframe

Parsing Nested loops and converting to Dataframe - python

I have queried device information from Mongodb and the output comes as this. I need to put it in a dataframe with the following infomation:
[{'message': {'obj': [{'time': '2022-06-03 00:00:00',
'temp': 33.96,
'humidty': 91.44,
'x0': -543,
'y0': 93,
'z0': -790,
'dmac': 'DD340206D4C6'},
{'time': '2022-06-03 00:00:00',
'temp': 29.86,
'humidty': 80.92,
'x0': 178,
'y0': 774,
'z0': -527,
'dmac': 'DD340206D4C6'},
{'time': '2022-06-03 00:00:00',
'temp': 30.33,
'humidty': 85.11,
'x0': 94,
'y0': -701,
'z0': -737,
'dmac': 'DD340206D4C6'}]}},
{'message': {'obj': [{'time': '2022-06-03 00:00:01',
'temp': 28.82,
'humidty': 85.77,
'x0': -193,
'y0': 423,
'z0': -820,
'dmac': 'DD340206D4C6'},
{'time': '2022-06-03 00:00:01',
'temp': 30.33,
'humidty': 85.11,
'x0': 64,
'y0': -705,
'z0': -744,
'dmac': 'DD340206D4C6'},
{'time': '2022-06-03 00:00:02',
'temp': 33.96,
'humidty': 91.44,
'x0': -541,
'y0': 95,
'z0': -798,
'dmac': 'DD340206D4C6'}]}}
Expected like this:
dmac
temp
humidity
x0
y0
z0
time
DD340206D4C6
29.86
91.44
-543
93
-790
2022-06-03 00:00:00
It is a dictionary of nested dictionary. Each array has contains 3 lists so i need to put each in a different row

data_in:
in_ = [
{
"message": {
"obj": [
{
"time": "2022-06-03 00:00:00",
"temp": 33.96,
"humidty": 91.44,
"x0": -543,
"y0": 93,
"z0": -790,
"dmac": "DD340206D4C6"
},
{
"time": "2022-06-03 00:00:00",
"temp": 29.86,
"humidty": 80.92,
"x0": 178,
"y0": 774,
"z0": -527,
"dmac": "DD340206D4C6"
},
{
"time": "2022-06-03 00:00:00",
"temp": 30.33,
"humidty": 85.11,
"x0": 94,
"y0": -701,
"z0": -737,
"dmac": "DD340206D4C6"
}
]
}
},
{
"message": {
"obj": [
{
"time": "2022-06-03 00:00:01",
"temp": 28.82,
"humidty": 85.77,
"x0": -193,
"y0": 423,
"z0": -820,
"dmac": "DD340206D4C6"
},
{
"time": "2022-06-03 00:00:01",
"temp": 30.33,
"humidty": 85.11,
"x0": 64,
"y0": -705,
"z0": -744,
"dmac": "DD340206D4C6"
},
{
"time": "2022-06-03 00:00:02",
"temp": 33.96,
"humidty": 91.44,
"x0": -541,
"y0": 95,
"z0": -798,
"dmac": "DD340206D4C6"
}
]
}
}
]
Code:
import pandas as pd
df = pd.json_normalize(in_, ["message", "obj"])
df = df[["dmac", "temp", "humidty", "x0", "y0", "z0", "time"]]
Output:
dmac
temp
humidty
x0
y0
z0
time
DD340206D4C6
33.96
91.44
-543
93
-790
2022-06-03 00:00:00
DD340206D4C6
29.86
80.92
178
774
-527
2022-06-03 00:00:00
DD340206D4C6
30.33
85.11
94
-701
-737
2022-06-03 00:00:00
DD340206D4C6
28.82
85.77
-193
423
-820
2022-06-03 00:00:01
DD340206D4C6
30.33
85.11
64
-705
-744
2022-06-03 00:00:01
DD340206D4C6
33.96
91.44
-541
95
-798
2022-06-03 00:00:02

Related

Python read from JSON file value and write to csv modify some values

I have a JSON file loaded and written to CSV in this way:
# Opening JSON file and loading the data
# into the variable data
with open('personal.json') as json_file:
data = json.load(json_file)
employee_data = data['results']
#inventory_data = data['inventory']
print (inventory_data)
#print(f'Total users: {res.json().get("total")}')
# now we will open a file for writing
data_file = open('data_file.csv', 'a')
# create the csv writer object
csv_writer = csv.writer(data_file)
# Counter variable used for writing
# headers to the CSV file
count = 0
for emp in employee_data:
if count == 0:
# Writing headers of CSV file
header = emp.keys()
csv_writer.writerow(header)
count += 1
# Writing data of CSV file
csv_writer.writerow(emp.values())
data_file.close()
Works perfect there is no problem at all. This is the example of some JSON file created by this:
{
"count": 1100,
"next": "https://any.com/?limit=10&offset=10",
"previous": null,
"results": [
{
"list_no": "00011",
"item_no": "MZVL2256HC",
"upc_code": "",
"manufacturer_no": "MZVL2256HCHQ-00",
"manufacturer": "SAMSUNGOEM",
"category": "SSD",
"product_name": "Samsung PM9A1 - SSD - 256 GB - PCIe 4.0 x4 (NVMe)",
"price": 56.65,
"instant_rebate": "",
"instant_rebate_item_no": "",
"weight": 0.06,
"unit": 300,
"length": 14.75,
"width": 12.0,
"height": 3.75,
"package": "BULK",
"specorder": "Y",
"is_domestic_only": "N",
"inventory": {
"": 50
}
},
{
"list_no": "00012",
"item_no": "MZVL2512HC",
"upc_code": "",
"manufacturer_no": "MZVL2512HCJQ-00",
"manufacturer": "SAMSUNGOEM",
"category": "SSD",
"product_name": "Samsung PM9A1 MZVL2512HCJQ - SSD - 512 GB - PCIe 4.0 x4 (NVMe)",
"price": 70.04,
"instant_rebate": "",
"instant_rebate_item_no": "",
"weight": 0.06,
"unit": 300,
"length": 23.0,
"width": 17.0,
"height": 10.0,
"package": "BULK",
"specorder": "Y",
"is_domestic_only": "N",
"inventory": {
"": 55
}
},
{
"list_no": "00013",
"item_no": "MZVL21T0HC",
"upc_code": "",
"manufacturer_no": "MZVL21T0HCLR-00A00",
"manufacturer": "SAMSUNGOEM",
"category": "SSD",
"product_name": "Samsung PM9A1 MZVL21T0HCLR - SSD - 1 TB - PCIe 4.0 x4 (NVMe)",
"price": 105.06,
"instant_rebate": "",
"instant_rebate_item_no": "",
"weight": 0.06,
"unit": 300,
"length": 23.0,
"width": 17.0,
"height": 10.0,
"package": "BULK",
"specorder": "Y",
"is_domestic_only": "N",
"inventory": {
"": 52,
"M": 0
}
},
{
"list_no": "00014",
"item_no": "MZVL22T0HB",
"upc_code": "",
"manufacturer_no": "MZVL22T0HBLB-00A00",
"manufacturer": "SAMSUNGOEM",
"category": "SSD",
"product_name": "Samsung PM9A1 - SSD - 2 TB - PCIe 4.0 x4 (NVMe)",
"price": 187.46,
"instant_rebate": "",
"instant_rebate_item_no": "",
"weight": 0.06,
"unit": 300,
"length": 23.0,
"width": 17.0,
"height": 10.0,
"package": "BULK",
"specorder": "Y",
"is_domestic_only": "N",
"inventory": {
"": 58
}
},
{
"list_no": "00102",
"item_no": "MT48RD64A1",
"upc_code": "649528921666",
"manufacturer_no": "MTC40F2046S1RC48BA1R",
"manufacturer": "MICRON",
"category": "MEM",
"product_name": "Micron - DDR5 - module - 64 GB - DIMM 288-pin - 4800 MHz / PC5-38400 - registered",
"price": 297.67,
"instant_rebate": "",
"instant_rebate_item_no": "",
"weight": 0.09,
"unit": 100,
"length": 14.75,
"width": 12.0,
"height": 3.75,
"package": "BULK",
"specorder": "Y",
"is_domestic_only": "Y",
"inventory": {
"": 36,
"L": 0
}
},
{
"list_no": "00104",
"item_no": "MT48RD32A1",
"upc_code": "649528921598",
"manufacturer_no": "MTC20F2085S1RC48BA1R",
"manufacturer": "MICRON",
"category": "MEM",
"product_name": "Micron - DDR5 - module - 32 GB - DIMM 288-pin - 4800 MHz / PC5-38400 - registered",
"price": 164.8,
"instant_rebate": "",
"instant_rebate_item_no": "",
"weight": 0.1,
"unit": 100,
"length": 14.75,
"width": 12.0,
"height": 3.75,
"package": "BULK",
"specorder": "N",
"is_domestic_only": "N",
"inventory": {
"": 53
}
},
{
"list_no": "00105",
"item_no": "MT48RS32A2",
"upc_code": "649528921529",
"manufacturer_no": "MTC20F1045S1RC48BA2R",
"manufacturer": "MICRON",
"category": "MEM",
"product_name": "Micron - DDR5 - module - 32 GB - DIMM 288-pin - 4800 MHz / PC5-38400 - registered",
"price": 164.8,
"instant_rebate": "",
"instant_rebate_item_no": "",
"weight": 0.1,
"unit": 100,
"length": 14.75,
"width": 12.0,
"height": 3.75,
"package": "BULK",
"specorder": "N",
"is_domestic_only": "Y",
"inventory": {
"": 38
}
},
{
"list_no": "00108",
"item_no": "MT48RS16A1",
"upc_code": "649528921376",
"manufacturer_no": "MTC10F1084S1RC48BA1R",
"manufacturer": "MICRON",
"category": "MEM",
"product_name": "Micron - DDR5 - module - 16 GB - DIMM 288-pin - 4800 MHz / PC5-38400",
"price": 95.79,
"instant_rebate": "",
"instant_rebate_item_no": "",
"weight": 0.09,
"unit": 100,
"length": 14.75,
"width": 12.0,
"height": 3.75,
"package": "BULK",
"specorder": "N",
"is_domestic_only": "Y",
"inventory": {
"": 364,
"L": 50
}
},
{
"list_no": "00116",
"item_no": "MT48E32A1",
"upc_code": "649528931481",
"manufacturer_no": "MTC20C2085S1EC48BA1R",
"manufacturer": "MICRON",
"category": "MEM",
"product_name": "Micron - DDR5 - module - 32 GB - DIMM 288-pin - 4800 MHz / PC5-38400 - unbuffered",
"price": 164.8,
"instant_rebate": "",
"instant_rebate_item_no": "",
"weight": 0.09,
"unit": 100,
"length": 14.75,
"width": 12.0,
"height": 3.65,
"package": "BULK",
"specorder": "N",
"is_domestic_only": "Y",
"inventory": {
"": 32,
"N": 0,
"L": 0
}
},
{
"list_no": "00118",
"item_no": "MT48E16A1",
"upc_code": "649528931429",
"manufacturer_no": "MTC10C1084S1EC48BA1R",
"manufacturer": "MICRON",
"category": "MEM",
"product_name": "Micron - DDR5 - module - 16 GB - DIMM 288-pin - 4800 MHz / PC5-38400 - unbuffered",
"price": 95.79,
"instant_rebate": "",
"instant_rebate_item_no": "",
"weight": 0.09,
"unit": 100,
"length": 14.75,
"width": 12.0,
"height": 3.75,
"package": "BULK",
"specorder": "N",
"is_domestic_only": "Y",
"inventory": {
"": 21
}
}
]
}
The issue I have is with one column created in the csv called inventory I have some values as:
"inventory": {"": 52, "N": 57, "L": 34, "M": 15}
or
"inventory": {"": 21}
Can be clearly seen in the JSON text. What I want is that all instances in inventory to that instances I want to make a sum of all that values appear there and that sum that is the value to send to the column inventory. Why? Because actually it sends me to csv as that I copied literal and they don’ work for the purpose. In primer example the value in inventory would be 52+57+34+15=158 and second example 21 and like that with all values. How can I modify the code to do that?
Thank you
I tried to investigate how to call values. For example:
employee_data = data['results']
This variable reads the values in the JSON file as count, results, etc but after results I don’t know how to call the values as list_no, inventory, etc.

You can calculate the sum of inventory with return all values from inventory dict from data['results']. Then you can call list_no data with iterate data['results'] with single data i and make i['list_no'] to get the data. These codes should be works:
import json
# Opening JSON file and loading the data
# into the variable data
with open('personal.json') as json_file:
data = json.load(json_file)
employee_data = data['results']
for i in employee_data:
# calculate sum of Inventory data
sumOfInventory = sum(list(i['inventory'].values()))
# print a few data
print('List_No :', i['list_no'])
print('Inventory :', i['inventory'])
print('Sum of Inventory :', sumOfInventory)
print('----------------------')
# insert that sum into the json data
i['sumOfInventory'] = sumOfInventory
#print new json data
print('new json data :')
print(employee_data)
print('')
#inventory_data = data['inventory']
# print (inventory_data)
#print(f'Total users: {res.json().get("total")}')
# now we will open a file for writing
data_file = open('data_file.csv', 'a')
# create the csv writer object
csv_writer = csv.writer(data_file)
# Counter variable used for writing
# headers to the CSV file
count = 0
for emp in employee_data:
if count == 0:
# Writing headers of CSV file
header = emp.keys()
csv_writer.writerow(header)
count += 1
# Writing data of CSV file
csv_writer.writerow(emp.values())
data_file.close()
The console output must be like this :
List_No : 00011
Inventory : {'': 50}
Sum of Inventory : 50
----------------------
List_No : 00012
Inventory : {'': 55}
Sum of Inventory : 55
----------------------
List_No : 00013
Inventory : {'': 52, 'M': 0}
Sum of Inventory : 52
----------------------
List_No : 00014
Inventory : {'': 58}
Sum of Inventory : 58
----------------------
List_No : 00102
Inventory : {'': 36, 'L': 0}
Sum of Inventory : 36
----------------------
List_No : 00104
Inventory : {'': 53}
Sum of Inventory : 53
----------------------
List_No : 00105
Inventory : {'': 38}
Sum of Inventory : 38
----------------------
List_No : 00108
Inventory : {'': 364, 'L': 50}
Sum of Inventory : 414
----------------------
List_No : 00116
Inventory : {'': 32, 'N': 0, 'L': 0}
Sum of Inventory : 32
----------------------
List_No : 00118
Inventory : {'': 21}
Sum of Inventory : 21
----------------------
new json data :
[{'list_no': '00011', 'item_no': 'MZVL2256HC', 'upc_code': '', 'manufacturer_no': 'MZVL2256HCHQ-00', 'manufacturer': 'SAMSUNGOEM', 'category': 'SSD', 'product_name': 'Samsung PM9A1 - SSD - 256 GB - PCIe 4.0 x4 (NVMe)', 'price': 56.65, 'instant_rebate': '', 'instant_rebate_item_no': '', 'weight': 0.06, 'unit': 300, 'length': 14.75, 'width': 12.0, 'height': 3.75, 'package': 'BULK', 'specorder': 'Y', 'is_domestic_only': 'N', 'inventory': {'': 50}, 'sumOfInventory': 50}, {'list_no': '00012', 'item_no': 'MZVL2512HC', 'upc_code': '', 'manufacturer_no': 'MZVL2512HCJQ-00', 'manufacturer': 'SAMSUNGOEM', 'category': 'SSD', 'product_name': 'Samsung PM9A1 MZVL2512HCJQ - SSD - 512 GB - PCIe 4.0 x4 (NVMe)', 'price': 70.04, 'instant_rebate': '', 'instant_rebate_item_no': '', 'weight': 0.06, 'unit': 300, 'length': 23.0, 'width': 17.0, 'height': 10.0, 'package': 'BULK', 'specorder': 'Y', 'is_domestic_only': 'N', 'inventory': {'': 55}, 'sumOfInventory': 55}, {'list_no': '00013', 'item_no': 'MZVL21T0HC', 'upc_code': '', 'manufacturer_no': 'MZVL21T0HCLR-00A00', 'manufacturer': 'SAMSUNGOEM', 'category': 'SSD', 'product_name': 'Samsung PM9A1 MZVL21T0HCLR - SSD - 1 TB - PCIe 4.0 x4 (NVMe)', 'price': 105.06, 'instant_rebate': '', 'instant_rebate_item_no': '', 'weight': 0.06, 'unit': 300, 'length': 23.0, 'width': 17.0, 'height': 10.0, 'package': 'BULK', 'specorder': 'Y', 'is_domestic_only': 'N', 'inventory': {'': 52, 'M': 0}, 'sumOfInventory': 52}, {'list_no': '00014', 'item_no': 'MZVL22T0HB', 'upc_code': '', 'manufacturer_no': 'MZVL22T0HBLB-00A00', 'manufacturer': 'SAMSUNGOEM', 'category': 'SSD', 'product_name': 'Samsung PM9A1 - SSD - 2 TB - PCIe 4.0 x4 (NVMe)', 'price': 187.46, 'instant_rebate': '', 'instant_rebate_item_no': '', 'weight': 0.06, 'unit': 300, 'length': 23.0, 'width': 17.0, 'height': 10.0, 'package': 'BULK', 'specorder': 'Y', 'is_domestic_only': 'N', 'inventory': {'': 58}, 'sumOfInventory': 58}, {'list_no': '00102', 'item_no': 'MT48RD64A1', 'upc_code': '649528921666', 'manufacturer_no': 'MTC40F2046S1RC48BA1R', 'manufacturer': 'MICRON', 'category': 'MEM', 'product_name': 'Micron - DDR5 - module - 64 GB - DIMM 288-pin - 4800 MHz / PC5-38400 - registered', 'price': 297.67, 'instant_rebate': '', 'instant_rebate_item_no': '', 'weight': 0.09, 'unit': 100, 'length': 14.75, 'width': 12.0, 'height': 3.75, 'package': 'BULK', 'specorder': 'Y', 'is_domestic_only': 'Y', 'inventory': {'': 36, 'L': 0}, 'sumOfInventory': 36}, {'list_no': '00104', 'item_no': 'MT48RD32A1', 'upc_code': '649528921598', 'manufacturer_no': 'MTC20F2085S1RC48BA1R', 'manufacturer': 'MICRON', 'category': 'MEM', 'product_name': 'Micron - DDR5 - module - 32 GB - DIMM 288-pin - 4800 MHz / PC5-38400 - registered', 'price': 164.8, 'instant_rebate': '', 'instant_rebate_item_no': '', 'weight': 0.1, 'unit': 100, 'length': 14.75, 'width': 12.0, 'height': 3.75, 'package': 'BULK', 'specorder': 'N', 'is_domestic_only': 'N', 'inventory': {'': 53}, 'sumOfInventory': 53}, {'list_no': '00105', 'item_no': 'MT48RS32A2', 'upc_code': '649528921529', 'manufacturer_no': 'MTC20F1045S1RC48BA2R', 'manufacturer': 'MICRON', 'category': 'MEM', 'product_name': 'Micron - DDR5 - module - 32 GB - DIMM 288-pin - 4800 MHz / PC5-38400 - registered', 'price': 164.8, 'instant_rebate': '', 'instant_rebate_item_no': '', 'weight': 0.1, 'unit': 100, 'length': 14.75, 'width': 12.0, 'height': 3.75, 'package': 'BULK', 'specorder': 'N', 'is_domestic_only': 'Y', 'inventory': {'': 38}, 'sumOfInventory': 38}, {'list_no': '00108', 'item_no': 'MT48RS16A1', 'upc_code': '649528921376', 'manufacturer_no': 'MTC10F1084S1RC48BA1R', 'manufacturer': 'MICRON', 'category': 'MEM', 'product_name': 'Micron - DDR5 - module - 16 GB - DIMM 288-pin - 4800 MHz / PC5-38400', 'price': 95.79, 'instant_rebate': '', 'instant_rebate_item_no': '', 'weight': 0.09, 'unit': 100, 'length': 14.75, 'width': 12.0, 'height': 3.75, 'package': 'BULK', 'specorder': 'N', 'is_domestic_only': 'Y', 'inventory': {'': 364, 'L': 50}, 'sumOfInventory': 414}, {'list_no': '00116', 'item_no': 'MT48E32A1', 'upc_code': '649528931481', 'manufacturer_no': 'MTC20C2085S1EC48BA1R', 'manufacturer': 'MICRON', 'category': 'MEM', 'product_name': 'Micron - DDR5 - module - 32 GB - DIMM 288-pin - 4800 MHz / PC5-38400 - unbuffered', 'price': 164.8, 'instant_rebate': '', 'instant_rebate_item_no': '', 'weight': 0.09, 'unit': 100, 'length': 14.75, 'width': 12.0, 'height': 3.65, 'package': 'BULK', 'specorder': 'N', 'is_domestic_only': 'Y', 'inventory': {'': 32, 'N': 0, 'L': 0}, 'sumOfInventory': 32}, {'list_no': '00118', 'item_no': 'MT48E16A1', 'upc_code': '649528931429', 'manufacturer_no': 'MTC10C1084S1EC48BA1R', 'manufacturer': 'MICRON', 'category': 'MEM', 'product_name': 'Micron - DDR5 - module - 16 GB - DIMM 288-pin - 4800 MHz / PC5-38400 - unbuffered', 'price': 95.79, 'instant_rebate': '', 'instant_rebate_item_no': '', 'weight': 0.09, 'unit': 100, 'length': 14.75, 'width': 12.0, 'height': 3.75, 'package': 'BULK', 'specorder': 'N', 'is_domestic_only': 'Y', 'inventory': {'': 21}, 'sumOfInventory': 21}]

How to add data to dictionary from an array conditionally

I have dictionaries in a list that already have some data and I want to add a vin number to each brand in these dictionaries.
my_brand_dict = [
{"key": {"Brand": "Tesla", "Date": "20203"}, "Total": 56},
{"key": {"Brand": "Tesla", "Date": "20207"}, "Total": 88},
{"key": {"Brand": "Audi", "Date": "202014"}, "Total": 79},
{"key": {"Brand": "Mercedes", "Date": "20201"}, "Total": 49},
]
my_vins = ["f60a0a", "#2019c0", "#a81b1b", "#468650", "#21248a", "#ff7a00"]
When Brand is Tesla add '#468650'
When Brand is Mercedes add '#2019c0'
When Brand is Toyota add '#21248a'
When Brand is Audi add '#ff7a00'
My expected output:
my_brand_dict = [
{"key": {"Brand": "Tesla", "Date": "20203"}, "Total": 56, "my_vin": "#468650"},
{"key": {"Brand": "Toyota", "Date": "20207"}, "Total": 88, "my_vin": "#21248a"},
{"key": {"Brand": "Audi", "Date": "202014"}, "Total": 79, "my_vin": "#ff7a00"},
{"key": {"Brand": "Mercedes", "Date": "20201"}, "Total": 49, "my_vin": "#2019c0"},
]
Couldn't find anything that matches what I want to achieve
Conditionally add values to dictionary

I would suggest using a dictionary instead of a list for your my_vins so that it maps brands to vins. This way you can easily get corresponding vin.
my_brand_dict = [
{"key": {"Brand": "Tesla", "Date": "20203"}, "Total": 56},
{"key": {"Brand": "Tesla", "Date": "20207"}, "Total": 88},
{"key": {"Brand": "Audi", "Date": "202014"}, "Total": 79},
{"key": {"Brand": "Mercedes", "Date": "20201"}, "Total": 49},
]
my_vins = {
"Mercedes": "#2019c0",
"Tesla": "#468650",
"Toyota": "#21248a",
"Audi": "#ff7a00",
}
for d in my_brand_dict:
brand = d["key"]["Brand"]
vin = my_vins[brand]
d["my_vin"] = vin
print(my_brand_dict)
Then take care of what should happen if a brand doesn't have a vin, You can raise exception or assign a default value.

You can define a dict base Brand & my_vins. Then use the defined dict and change value in-place in the my_brand_dict like the below.
my_vins_dct = {'Tesla' : '#468650',
'Mercedes' : '#2019c0',
'Toyota' : '#21248a',
'Audi' : '#ff7a00'}
my_brand_dict = [
{"key": {"Brand": "Tesla", "Date": "20203"}, "Total": 56},
{"key": {"Brand": "Tesla", "Date": "20207"}, "Total": 88},
{"key": {"Brand": "Audi", "Date": "202014"}, "Total": 79},
{"key": {"Brand": "Mercedes", "Date": "20201"}, "Total": 49},
{"key": {"Brand": "xxxx", "Date": "20201"}, "Total": 49},
]
for dct in my_brand_dict:
# First approach try/except and 'continue'
try :
dct['my_vin'] = my_vins_dct[dct['key']['Brand']]
except KeyError:
continue
# Second approach for adding 'Not Found'
# dct['my_vin'] = my_vins_dct.get(dct['key']['Brand'], 'Brand Not Found')
print(my_brand_dict)
Output:
[
{'key': {'Brand': 'Tesla', 'Date': '20203'}, 'Total': 56, 'my_vin': '#468650'},
{'key': {'Brand': 'Tesla', 'Date': '20207'}, 'Total': 88, 'my_vin': '#468650'},
{'key': {'Brand': 'Audi', 'Date': '202014'}, 'Total': 79, 'my_vin': '#ff7a00'},
{'key': {'Brand': 'Mercedes', 'Date': '20201'}, 'Total': 49, 'my_vin': '#2019c0'},
{'key': {'Brand': 'xxxx', 'Date': '20201'}, 'Total': 49}
]
# Output Second approach
# [
# {'key': {'Brand': 'Tesla', 'Date': '20203'}, 'Total': 56, 'my_vin': '#468650'},
# {'key': {'Brand': 'Tesla', 'Date': '20207'}, 'Total': 88, 'my_vin': '#468650'},
# {'key': {'Brand': 'Audi', 'Date': '202014'}, 'Total': 79, 'my_vin': '#ff7a00'},
# {'key': {'Brand': 'Mercedes', 'Date': '20201'}, 'Total': 49, 'my_vin': '#2019c0'},
# {'key': {'Brand': 'xxxx', 'Date': '20201'}, 'Total': 49, 'my_vin': 'Brand Not Found'}
# ]

my_brand_dict = [{'key': {'Brand': 'Tesla', 'Date': '20203'}, 'Total': 56}, {'key': {'Brand': 'Tesla', 'Date': '20207'}, 'Total': 88},
{'key': {'Brand': 'Audi', 'Date': '202014'}, 'Total': 79}, {'key': {'Brand': 'Mercedes', 'Date': '20201'}, 'Total': 49}]
my_vins = ['f60a0a', '#2019c0', '#a81b1b', '#468650', '#21248a', '#ff7a00']
# When Brand Tesla add '#468650'
# When Brand Mercedes add '#2019c0'
# When Brand Toyota add '#21248a'
# When Brand Audi add '#ff7a00'
for item in my_brand_dict:
if item['key']['Brand'] == 'Tesla':
item['my_vin'] = '#468650'
elif item['key']['Brand'] == 'Mercedes':
item['my_vin'] = '#2019c0'
elif item['key']['Brand'] == 'Toyota':
item['my_vin'] = '#21248a'
elif item['key']['Brand'] == 'Audi':
item['my_vin'] = '#ff7a00'
print(my_brand_dict)
This code works for me

pandas dataframe to custom nested json

I have a pandas dataframe that looks like this:
user_id cat_id prod_id score pref_prod
29762 9 3115 1.000000 335.0
29762 58 1335 1.000000 335.0
234894 58 1335 1.000000 335.0
413276 43 1388 1.000000 335.0
413276 58 1335 1.000000 335.0
413276 73 26 1.000000 335.0
9280593 9 137 1.000000 335.0
9280593 58 1335 1.000000 335.0
9280593 74 160 1.000000 335.0
4554542 66 1612 0.166667 197.0
4554542 66 1406 0.166767 197.0
4554542 66 2021 1.000000 197.0
I want to group this df by user_id & cat_id and convert it to json so that it looks something like this:
{
29762: {
'cat_id': {
9: [{
'prod_id': 3115,
'score': 1.0
}],
58: [{
'prod_id': 1335,
'score': 1.0
}]
},
'pref_prod': 335.0
}
234894: {
'cat_id': {
58: [{
'prod_id': 1335,
'score': 1.0
}]
},
'pref_prod': 335.0
}
413276: {
'cat_id': {
43: [{
'prod_id': 1388,
'score': 1.0,
'fav_provider': 335.0
}],
58: [{
'prod_id': 1335,
'score': 1.0,
'fav_provider': 335.0
}],
73: [{
'prod_id': 26,
'score': 1.0,
}]
},
'pref_prod': 335.0
}
4554542: {
'cat_id': {
66: [{
'prod_id': 1612,
'score': 0.166
}, {
'prod_id': 1406,
'score': 0.16
}, {
'prod_id': 2021,
'score': 1.0,
}]
},
'pref_prod': 197.0
}
}
As of now I can do
gb = df.groupby(['user_id', 'cat_id']).apply(lambda g: g.drop(['user_id', 'cat_id'], axis=1).to_dict(orient='records')).to_dict()
which gives me user_id and cat_id in tuple keys:
{
(29762, 9): [{
'prod_id': 3115,
'score': 1.0,
'pref_prod': 335.0
}],
(29762, 58): [{
'prod_id': 1335,
'score': 1.0,
'pref_prod': 335.0
}],
(234894, 58): [{
'prod_id': 1335,
'score': 1.0,
'pref_prod': 335.0
}],
(413276, 43): [{
'prod_id': 1388,
'score': 1.0,
'pref_prod': 335.0
}],
(413276, 58): [{
'prod_id': 1335,
'score': 1.0,
'pref_prod': 335.0
}],
(413276, 73): [{
'prod_id': 26,
'score': 1.0,
'pref_prod': 335.0
}],
(9280593, 9): [{
'prod_id': 137,
'score': 1.0,
'pref_prod': 335.0
}],
(9280593, 58): [{
'prod_id': 1335,
'score': 1.0,
'pref_prod': 335.0
}],
(9280593, 74): [{
'prod_id': 160,
'score': 1.0,
'pref_prod': 335.0
}],
(4554542,
66): [{
'prod_id': 1612,
'score': 0.16666666666666666,
'pref_prod': 197.0
}, {
'prod_id': 1406,
'score': 0.16676666666666665,
'pref_prod': 197.0
}, {
'prod_id': 2021,
'score': 1.0,
'pref_prod': 197.0
}]
}
How can I get the json in the desired format

I can't think of any direct way to do it with pandas only. But you can construct a new dictionary with the desired format based on gb, using a defaultdict
from collections import defaultdict
import json # just to prettyprint the resulting dictionary
gb = df.groupby(['user_id', 'cat_id']).apply(lambda g: g.drop(['user_id', 'cat_id'], axis=1).to_dict(orient='records')).to_dict()
d = defaultdict(lambda: {'cat_id':{}} )
for (user_id, cat_id), records in gb.items():
for record in records:
# drop 'pref_prod' key of each record
# I'm assuming its unique for each (user_id, cat_id) group
pref_prod = record.pop('pref_prod')
d[user_id]['cat_id'][cat_id] = records
d[user_id]['pref_prod'] = pref_prod
>>> print(json.dumps(d, indent=4))
{
"29762": {
"cat_id": {
"9": [
{
"prod_id": 3115,
"score": 1.0
}
],
"58": [
{
"prod_id": 1335,
"score": 1.0
}
]
},
"pref_prod": 335.0
},
"234894": {
"cat_id": {
"58": [
{
"prod_id": 1335,
"score": 1.0
}
]
},
"pref_prod": 335.0
},
"413276": {
"cat_id": {
"43": [
{
"prod_id": 1388,
"score": 1.0
}
],
"58": [
{
"prod_id": 1335,
"score": 1.0
}
],
"73": [
{
"prod_id": 26,
"score": 1.0
}
]
},
"pref_prod": 335.0
},
"4554542": {
"cat_id": {
"66": [
{
"prod_id": 1612,
"score": 0.166667
},
{
"prod_id": 1406,
"score": 0.166767
},
{
"prod_id": 2021,
"score": 1.0
}
]
},
"pref_prod": 197.0
},
"9280593": {
"cat_id": {
"9": [
{
"prod_id": 137,
"score": 1.0
}
],
"58": [
{
"prod_id": 1335,
"score": 1.0
}
],
"74": [
{
"prod_id": 160,
"score": 1.0
}
]
},
"pref_prod": 335.0
}
}

I used a namedtuple from a dataframe conversion to create the json tree. if the tree has more than one level than I would use recursion to build it. the dataframe did not contain lists of list so recursion was not required.
from io import StringIO
import io
from collections import namedtuple
data="""user_id,cat_id,prod_id,score,pref_prod
29762,9,3115,1.000000,335.0
29762,58,1335,1.000000,335.0
234894,58,1335,1.000000,335.0
413276,43,1388,1.000000,335.0
413276,58,335,1.000000,335.0
413276,73,26,1.000000,335.0
9280593,9,137,1.000000,335.0
9280593,58,1335,1.000000,335.0
9280593,74,160,1.000000,335.0
4554542,66,1612,0.166667,197.0
4554542,66,1406,0.166767,197.0
4554542,66,2021,1.000000,197.0"""
df = pd.read_csv(io.StringIO(data), sep=',')
Record=namedtuple('Generic',['user_id','cat_id','prod_id','score','pref_prod'])
def map_to_record(row):
return Record(row.user_id, row.cat_id, row.prod_id,row.score,row.pref_prod)
my_list = list(map(map_to_record, df.itertuples()))
def named_tuple_to_json(named_tuple):
"""
convert a named tuple to a json tree structure
"""
json_string="records:["
for record in named_tuple:
json_string+="{"
json_string+="'user_id': {},'cat_id': {},'prod_id': {},'score': {},'pref_prod': {},".format(
record.user_id,record.cat_id,record.prod_id,record.score,record.pref_prod)
json_string+="},"
json_string+="]"
return json_string
# convert the list of named tuples to a json tree structure
json_tree = named_tuple_to_json(my_list)
print(json_tree)
output
records:[{'user_id': 29762,'cat_id': 9,'prod_id': 3115,'score': 1.0,'pref_prod': 335.0,},{'user_id': 29762,'cat_id': 58,'prod_id': 1335,'score': 1.0,'pref_prod': 335.0,},{'user_id': 234894,'cat_id': 58,'prod_id': 1335,'score': 1.0,'pref_prod': 335.0,},{'user_id': 413276,'cat_id': 43,'prod_id': 1388,'score': 1.0,'pref_prod': 335.0,},{'user_id': 413276,'cat_id': 58,'prod_id': 335,'score': 1.0,'pref_prod': 335.0,},{'user_id': 413276,'cat_id': 73,'prod_id': 26,'score': 1.0,'pref_prod': 335.0,},{'user_id': 9280593,'cat_id': 9,'prod_id': 137,'score': 1.0,'pref_prod': 335.0,},{'user_id': 9280593,'cat_id': 58,'prod_id': 1335,'score': 1.0,'pref_prod': 335.0,},{'user_id': 9280593,'cat_id': 74,'prod_id': 160,'score': 1.0,'pref_prod': 335.0,},{'user_id': 4554542,'cat_id': 66,'prod_id': 1612,'score': 0.166667,'pref_prod': 197.0,},{'user_id': 4554542,'cat_id': 66,'prod_id': 1406,'score': 0.166767,'pref_prod': 197.0,},{'user_id': 4554542,'cat_id': 66,'prod_id': 2021,'score': 1.0,'pref_prod': 197.0,},]

drop selective columns pandas dataframe while flattening

I have a created a dataframe from a JSON but want to keep only the first 5 columns of the result.
Here is a part of the JSON:
{
"lat": 52.517,
"lon": 13.3889,
"timezone": "Europe/Berlin",
"timezone_offset": 7200,
"current": {
"dt": 1628156947,
"sunrise": 1628134359,
"sunset": 1628189532,
"temp": 295.54,
"feels_like": 295.43,
"pressure": 1009,
"humidity": 61,
"dew_point": 287.66,
"uvi": 4.53,
"clouds": 20,
"visibility": 10000,
"wind_speed": 3.58,
"wind_deg": 79,
"wind_gust": 4.92,
"weather": [
{
"id": 801,
"main": "Clouds",
"description": "few clouds",
"icon": "02d"
}
]
},
"hourly": [
{
"dt": 1628154000,
"temp": 295.26,
"feels_like": 295.09,
"pressure": 1009,
"humidity": 60,
"dew_point": 287.14,
"uvi": 4.01,
"clouds": 36,
"visibility": 10000,
"wind_speed": 3.6,
"wind_deg": 83,
"wind_gust": 4.76,
"weather": [
{
"id": 500,
"main": "Rain",
"description": "light rain",
"icon": "10d"
}
],
"pop": 0.49,
"rain": {
"1h": 0.52
}
},
{
"dt": 1628157600,
"temp": 295.54,
"feels_like": 295.43,
"pressure": 1009,
"humidity": 61,
"dew_point": 287.66,
"uvi": 4.53,
"clouds": 20,
"visibility": 10000,
"wind_speed": 3.76,
"wind_deg": 85,
"wind_gust": 4.91,
"weather": [
{
"id": 801,
"main": "Clouds",
"description": "few clouds",
"icon": "02d"
}
],
"pop": 0.55
},
{
"dt": 1628161200,
"temp": 295.58,
"feels_like": 295.42,
"pressure": 1009,
"humidity": 59,
"dew_point": 287.18,
"uvi": 4.9,
"clouds": 36,
"visibility": 10000,
"wind_speed": 3.58,
"wind_deg": 95,
"wind_gust": 4.73,
"weather": [
{
"id": 802,
"main": "Clouds",
"description": "scattered clouds",
"icon": "03d"
}
],
"pop": 0.59
}
]
}
I have flattened the JSON first like this:
df_history = pd.json_normalize(data_history, max_level=1)`
That gave me this structure:
lat lon timezone timezone_offset hourly current.dt current.sunrise current.sunset current.temp current.feels_like ... current.humidity current.dew_point current.uvi current.clouds current.visibility current.wind_speed current.wind_deg current.wind_gust current.weather current.rain
0 52.517 13.3889 Europe/Berlin 7200 [{'dt': 1627776000, 'temp': 17.82, 'feels_like... 1627855200 1627874869 1627930649 16.36 16.4 ... 90 14.72 0 0 10000 3.13 254 11.18 [{'id': 500, 'main': 'Rain', 'description': 'l... {'1h': 0.17}
But I want to keep only the columns up to the column "hourly" and then flatten it.
I have tried this but to no avail:
df_history_small = pd.json_normalize(data_history, record_path='hourly',meta=['dt','temp', 'humidity'], errors='ignore')
What am I doing wrong? How can I achieve my goal?
my final goal it to have a dataframe that looks like this:
lat lon timezone timezone_offset timestamp temp feels_like humidity pressure
0 52.517 13.3889 Europe/Berlin 7200 08/01/2021 00:00:00 17.82 17.46 69 1005

Try:
cols = ['lat', 'lon', 'timezone', 'timezone_offset',
'dt', 'temp', 'feels_like', 'humidity']
out = pd.json_normalize(data_history, ['hourly'], meta=cols[:4])[cols]
>>> out
lat lon timezone timezone_offset dt temp feels_like humidity
0 52.517 13.3889 Europe/Berlin 7200 1628154000 295.26 295.09 60
1 52.517 13.3889 Europe/Berlin 7200 1628157600 295.54 295.43 61
2 52.517 13.3889 Europe/Berlin 7200 1628161200 295.58 295.42 59
Feel free to convert dt to timestamp with:
df['timestamp'] = pd.to_datetime(out['dt'], unit='s')

Data structure manipulation with Pandas

I have a list of dicts as follows :
[
{
"status": "BV",
"max_total_duration": null,
"min_total_duration": null,
"75th_percentile": 420,
"median": 240.0,
"25th_percentile": 180,
"avg_total_duration": null
},
{
"status": "CORR",
"max_total_duration": null,
"min_total_duration": null,
"75th_percentile": 1380,
"median": 720.0,
"25th_percentile": 420,
"avg_total_duration": null
},
{
"status": "FILL",
"max_total_duration": null,
"min_total_duration": null,
"75th_percentile": 1500,
"median": 840.0,
"25th_percentile": 480,
"avg_total_duration": null
},
{
"status": "INIT",
"max_total_duration": 11280,
"min_total_duration": 120,
"75th_percentile": 720,
"median": 360.0,
"25th_percentile": 180,
"avg_total_duration": 2061
},
]
As is evident,max_total_duration,min_total_duration and avg_total_duration is null for all status except when status is "INIT".What I would want is to remove all the entries for null values and for INIT where max_total_duration,min_total_duration and avg_total_duration have correct values, add them as a new dictionary in the list as follows:
[
{
"status": "BV",
"75th_percentile": 420,
"median": 240.0,
"25th_percentile": 180,
},
{
"status": "CORR",
"75th_percentile": 1380,
"median": 720.0,
"25th_percentile": 420,
},
{
"status": "FILL",
"75th_percentile": 1500,
"median": 840.0,
"25th_percentile": 480,
},
{
"status": "INIT",
"75th_percentile": 720,
"median": 360.0,
"25th_percentile": 180,
},
{
"max_total_duration": 11280,
"min_total_duration": 120,
"avg_total_duration": 2061,
}
]
I have tried doing this by iterating over the list and it is computationally very expensive.Is there an easier way of doing this with pandas ?

data =[
{
"status": "BV",
"max_total_duration": None,
"min_total_duration": None,
"75th_percentile": 420,
"median": 240.0,
"25th_percentile": 180,
"avg_total_duration": None
},
{
"status": "CORR",
"max_total_duration": None,
"min_total_duration": None,
"75th_percentile": 1380,
"median": 720.0,
"25th_percentile": 420,
"avg_total_duration": None
},
{
"status": "FILL",
"max_total_duration": None,
"min_total_duration": None,
"75th_percentile": 1500,
"median": 840.0,
"25th_percentile": 480,
"avg_total_duration": None
},
{
"status": "INIT",
"max_total_duration": 11280,
"min_total_duration": 120,
"75th_percentile": 720,
"median": 360.0,
"25th_percentile": 180,
"avg_total_duration": 2061
},
]
data = [{key: val for key, val in d.iteritems() if val} for d in data]
final = []
for d in data:
status = d.get('status')
if status == 'INIT':
final.append({'max_total_duration': d.get('max_total_duration'), 'min_total_duration': d.get('min_total_duration'), 'avg_total_duration': d.get('avg_total_duration')})
del d['max_total_duration']
del d['min_total_duration']
del d['avg_total_duration']
final.append(d)
print final

import pandas as pd
# Substituting your 'null' for 'None'
df = pd.DataFrame(data)
>>> df
25th_percentile 75th_percentile avg_total_duration max_total_duration \
0 180 420 NaN NaN
1 420 1380 NaN NaN
2 480 1500 NaN NaN
3 180 720 2061 11280
median min_total_duration status
0 240 NaN BV
1 720 NaN CORR
2 840 NaN FILL
3 360 120 INIT
Grabbing the percentiles part:
df_percentiles = df[['status','25th_percentile','median','75th_percentile']]
>>> df_percentiles
status 25th_percentile median 75th_percentile
0 BV 180 240 420
1 CORR 420 720 1380
2 FILL 480 840 1500
3 INIT 180 360 720
Grabbing the durations part:
df_durations = df[df['status'] == 'INIT'][['max_total_duration','min_total_duration','avg_total_duration']]
>>> df_durations
max_total_duration min_total_duration avg_total_duration
3 11280 120 2061
Loop and combine to list:
summary = df_percentiles.T.to_dict().values()
summary.append(df_durations.T.to_dict().values())
>>> summary
[{'25th_percentile': 180,
'75th_percentile': 420,
'median': 240.0,
'status': 'BV'},
{'25th_percentile': 420,
'75th_percentile': 1380,
'median': 720.0,
'status': 'CORR'},
{'25th_percentile': 480,
'75th_percentile': 1500,
'median': 840.0,
'status': 'FILL'},
{'25th_percentile': 180,
'75th_percentile': 720,
'median': 360.0,
'status': 'INIT'},
{'avg_total_duration': 2061.0,
'max_total_duration': 11280.0,
'min_total_duration': 120.0}]

Develop Reference

Python is a programming language that lets you work quickly and integrate systems more effectively.

Parsing Nested loops and converting to Dataframe - python

Related

Python read from JSON file value and write to csv modify some values

How to add data to dictionary from an array conditionally

pandas dataframe to custom nested json

drop selective columns pandas dataframe while flattening

Data structure manipulation with Pandas

Categories

Resources