From Python dataframe to nested dictionary

From Python dataframe to nested dictionary - python

I want to use an api and would need to put my dataframe in a dictionary format first.
The dataframe df that looks like this:
OrigC OrigZ OrigN Weigh DestC DestZ DestN
0 PL 97 TP 59 DE 63 SN
Exepected output of the first row:
{"section":[
{"location":
{
"zipCode":
{"OrigC": "PL",
"OrigZ":"97"},
"location": {"id": "1"},
"OrigN": "TP"
},
"carriageParameter":
{"road":
{"truckLoad": "Auto"}
},
"load":
{"Weigh": "59",
"unit": "ton",
"showEmissionsAtResponse": "true"
}
},
{"location":
{
"zipCode":
{"DestC": "DE",
"DestZ":"63"},
"location": {"id": "2"},
"DestN": "SN"
},
"carriageParameter":
{"road":
{"truckLoad":"Auto"}
},
"unload":
{"WEIGHTTONS":"59",
"unit": "ton",
"showEmissionsAtResponse": "true"
}
}]}
Note that there is static information in the dictionary that doesn't require any change.
How can this be done in Python?

You can use iterrows.
dic = {}
dic['section'] = []
for ix, row in df.iterrows():
in_dict = {
'location': {
'zip_code': {
'OrigC': row['OrigC'],
'OrigZ': row['OrigZ'],
},
'location': {'id': ix+1}, # I am guessing here.
'OrigN': 'TP',
},
'CarriageParameter': {
'road': {
'truckLoad': 'Auto'}
},
'load': {
'Weigh': str(row['Weigh']),
}
}
dic['section'].append(in_dict)
Note that this is not the entire entry, but I think it is clear enough to illustrate the idea.

Related

Flattening Multi-Level Nested Object to DataFrame

I am trying to convert an object/dictionary to a Python DataFrame using the following code:
sr = pd.Series(object)
df = pd.DataFrame(sr.values.tolist())
display(df)
It works well but some of the output columns are of object/dictionary type, and I would like to break them up to multiple columns, for example, if column "Items" produces the following value in a cell:
obj = {
"item1": {
"id": "item1",
"relatedItems": [
{
"id": "1111",
"category": "electronics"
},
{
"id": "9999",
"category": "electronics",
"subcategory": "computers"
},
{
"id": "2222",
"category": "electronics",
"subcategory": "computers",
"additionalData": {
"createdBy": "Doron",
"inventory": 100
}
}
]
},
"item2": {
"id": "item2",
"relatedItems": [
{
"id": "4444",
"category": "furniture",
"subcategory": "sofas"
},
{
"id": "5555",
"category": "books",
},
{
"id": "6666",
"category": "electronics",
"subcategory": "computers",
"additionalData": {
"createdBy": "Joe",
"inventory": 5,
"condition": {
"name": "new",
"inspectedBy": "Doron"
}
}
}
]
}
}
The desired output is:
I tried using df.explode, but it multiplies the row to multiple rows, I am looking for a way to achieve the same but split into columns and retain a single row.
Any suggestions?

You can use the pd.json_normalize function to flatten the nested dictionary into multiple columns, with the keys joined with a dot (.).
sr = pd.Series({
'Items': {
'item_name': 'name',
'item_value': 'value'
}
})
df = pd.json_normalize(sr, sep='.')
display(df)
This will give you the following df
Items.item_name Items.item_value
0 name value
You can also specify the level of nesting by passing the record_path parameter to pd.json_normalize, for example, to only flatten the 'Items' key:
df = pd.json_normalize(sr, 'Items', sep='.')
display(df)

Seems like you're looking for pandas.json_normalize which has a (sep) parameter:
obj = {
'name': 'Doron Barel',
'items': {
'item_name': 'name',
'item_value': 'value',
'another_item_prop': [
{
'subitem1_name': 'just_another_name',
'subitem1_value': 'just_another_value',
},
{
'subitem2_name': 'one_more_name',
'subitem2_value': 'one_more_value',
}
]
}
}

df = pd.json_normalize(obj, sep='.')

ser = df.pop('items.another_item_prop').explode()

out = (df.join(pd.DataFrame(ser.tolist(), index=s.index)
.rename(columns= lambda x: ser.name+"."+x))
.groupby("name", as_index=False).first()
)
Output :
print(out)

name items.item_name items.item_value items.another_item_prop.subitem1_name items.another_item_prop.subitem1_value items.another_item_prop.subitem2_name items.another_item_prop.subitem2_value
0 Doron Barel name value just_another_name just_another_value one_more_name one_more_value

Python - having trouble selecting single value from json data

I have the following code from which I want to select a singular piece of data from the JSON.
I have the following code from which I want to select a singular piece of data from the JSON.
j = {
"data": [
{
"astronomicalDawn": "2023-01-16T04:58:21+00:00",
"astronomicalDusk": "2023-01-16T17:00:31+00:00",
"civilDawn": "2023-01-16T06:38:18+00:00",
"civilDusk": "2023-01-16T15:20:34+00:00",
"moonFraction": 0.36248449454701365,
"moonPhase": {
"closest": {
"text": "Third quarter",
"time": "2023-01-14T22:34:00+00:00",
"value": 0.75
},
"current": {
"text": "Waning crescent",
"time": "2023-01-16T06:00:00+00:00",
"value": 0.7943440617174506
}
},
"moonrise": "2023-01-16T01:01:55+00:00",
"moonset": "2023-01-16T09:53:57+00:00",
"nauticalDawn": "2023-01-16T05:46:36+00:00",
"nauticalDusk": "2023-01-16T16:12:16+00:00",
"sunrise": "2023-01-16T07:28:07+00:00",
"sunset": "2023-01-16T14:30:45+00:00",
"time": "2023-01-16T06:00:00+00:00"
},
{
"astronomicalDawn": "2023-01-17T04:57:26+00:00",
"astronomicalDusk": "2023-01-17T17:02:07+00:00",
"civilDawn": "2023-01-17T06:37:07+00:00",
"civilDusk": "2023-01-17T15:22:26+00:00",
"moonFraction": 0.26001046334874545,
"moonPhase": {
"closest": {
"text": "Third quarter",
"time": "2023-01-14T21:31:00+00:00",
"value": 0.75
},
"current": {
"text": "Waning crescent",
"time": "2023-01-17T06:00:00+00:00",
"value": 0.8296778757434323
}
},
"moonrise": "2023-01-17T02:38:30+00:00",
"moonset": "2023-01-17T10:01:03+00:00",
"nauticalDawn": "2023-01-17T05:45:35+00:00",
"nauticalDusk": "2023-01-17T16:13:58+00:00",
"sunrise": "2023-01-17T07:26:40+00:00",
"sunset": "2023-01-17T14:32:54+00:00",
"time": "2023-01-17T06:00:00+00:00"
}
],
"meta": {
"cost": 1,
"dailyQuota": 10,
"lat": 58.7984,
"lng": 17.8081,
"requestCount": 1,
"start": "2023-01-16 06:00"
}
}
print(j['data']['moonPhase'])
Which gives me this error;
TypeError: list indices must be integers or slices, not str
That error is in regard to the very last line of the code. But changing the very very last line to print(j['data']) works.
What am I doing wrong - I am trying to select moonPhase data. It turns me on. Thank you.

Try:
print(j['data'][0]['moonPhase'])
or
print(j['data'][1]['moonPhase'])
Explanation: The data property of the json object contains a list. There are two items in the list (item 0 and item 1). You must first select an item using [0] or [1] before selecting the moonPhase property of one of the objects in the list.
edit: If you want to select only items where the moonPhase is in the future try:
print([
item['moonPhase']
for item in j['data']
if (
datetime.fromisoformat(
item['moonPhase']['current']['time']
).timestamp() >
datetime.now().timestamp()
)
])
output
[{'closest': {'text': 'Third quarter', 'time': '2023-01-14T21:31:00+00:00', 'value': 0.75}, 'current': {'text': 'Waning crescent', 'time': '2023-01-17T06:00:00+00:00', 'value': 0.8296778757434323}}]

Create dictionary of dictionary from each row and export each row as a json file in python

I have a pandas data frame that looks like below
I would like to create a dictionary of dictionaries like below for each row. traits inside a list of dictionaries.
#row1 example output. it should skip null attributes face and hat.
{
"name": "rv",
"image": "https://img0.png",
"attributes": [
{ "trait_type": "background", "value":"grey" },
{ "trait_type": "tshirt", "value":"yellow" },
{ "trait_type": "eagle", "value":"male" },
{ "trait_type": "hair", "value":"darktwists" }
]
}
#row3 example output. it should skip null attributes face and hair.
{
"name": "nv",
"image": "https://img2.png",
"attributes": [
{ "trait_type": "background", "value":"brown" },
{ "trait_type": "tshirt", "value":"americanflag" },
{ "trait_type": "eagle", "value":"male" },
{ "trait_type": "hat", "value":"policehat" }
]
}
Like this, each row output should be stored in a separate JSON file.
I tried this with lambda apply and _to_json and I encounter two problems:
1)not able to pack attributes as a separate dictionary inside dictionary in the required format.
2)JSON stores forward slashes "//" in HTTPS image link as '/'
Any help is much appreciated. Thank You.

First you need to melt the df to structure it for "attribute" key in your dictionary and rename the columns accordingly:
df = df.melt(id_vars=['name','image'])
df = df.rename(columns={'variable':"trait_type"})
Then we need to group them based on the name and image (and other unique values you want to include in your json structure.) Then itterate over groups and constructe the dictionary structure:
results = []
for index, group in df.groupby(['name','image']):
temp_dict = {}
temp_dict["name"] = index[0]
temp_dict["image"] = index[1]
temp_dict["attributes"] = group[["trait_type","value"]].to_dict("records")
results.append(temp_dict)
This should give you your preferred results.
Compact Answer (Suggested by Timus):
You can perform the whole operation in one line:
dicts = (df.melt(id_vars=['name', 'image'], var_name='trait_type')
.dropna()
.groupby(['name', 'image'])[['trait_type', 'value']]
.apply(pd.DataFrame.to_dict, orient='records')
.reset_index(drop=False)
.rename(columns={0: 'attributes'})
.to_dict(orient='records'))

First thing: The JSON format you have is not a generic type format, so it can not be created directly using pandas to_json() and/or to_dict() methods of the dataframes, so you need to handle it manually
Second thing: pandas by default adds the escape character that is why 'https://img0.png' gets replaced by https:\\/\\/img0.png
out=(df.assign(attributes=df[['background', 'tshirt', 'eagle', 'face', 'hat']]
.apply(lambda x:[{'trait_type':index, 'value':value}
for index, value in x[x.notna()].iteritems()],
axis=1))
[['name', 'image', 'attributes']]
.apply(dict, axis=1)
.tolist()
)
OUTPUT:
[
{
'name': 'rv',
'image': 'https://img0.png',
'attributes': [
{'trait_type': 'background', 'value': 'gray'},
{'trait_type': 'tshirt', 'value': 'yellow'},
{'trait_type': 'eagle', 'value': 'male'}
]
},
{
'name': 'cv',
'image': 'https://img1.png',
'attributes': [
{'trait_type': 'background', 'value': 'yellow'},
{'trait_type': 'tshirt', 'value': 'green'},
{'trait_type': 'eagle', 'value': 'male'}
]
}
]
After this, you will have a list of dictionaries in the variable out, you can just pass it to json.dumps to create the json out of it, which looks like below:
>>> import json
>>> print(json.dumps(out, indent=4)
[
{
"name": "rv",
"image": "https://img0.png",
"attributes": [
{
"trait_type": "background",
"value": "gray"
},
{
"trait_type": "tshirt",
"value": "yellow"
},
{
"trait_type": "eagle",
"value": "male"
}
]
},
{
"name": "cv",
"image": "https://img1.png",
"attributes": [
{
"trait_type": "background",
"value": "yellow"
},
{
"trait_type": "tshirt",
"value": "green"
},
{
"trait_type": "eagle",
"value": "male"
}
]
}
]

Identify the nested object with min value in a JSON array

I have a JSON file with the following structure.
I want to identify the channel with the minimum fee and to print its details (channel_id, node1, node2 and the total fee)
The total fee of a channel is the sum of fees of both nodes:
total_fee = fee_node1 + fee_node2
{
"edges": [
{
"channel_id": "1",
"node1": "Alice",
"node2": "Bob",
"node1_policy": {
"fee": "1000"
},
"node2_policy": {
"fee": "1000"
}
},
{
"channel_id": "2",
"node1": "Caleb",
"node2": "Daniel",
"node1_policy": {
"fee": "500",
},
"node2_policy": {
"fee": "3000",
}
},
{
"channel_id": "3",
"node1": "Elen",
"node2": "Fatih",
"node1_policy": {
"fee": "2000"
},
"node2_policy": {
"fee": "5000"
}
}
}
What is the best method to accomplish this task?

It is quite simple to do that in a few lines of code. I should mention though that the format of your JSON file is invalid, because of the redundant commas right after the fee keys. Consider removing them in order to be valid:
import json
with open('channels.json', 'r') as inFile:
jsonData = json.loads(inFile.read())
minimum = min([elem for elem in jsonData['edges']], key=lambda e: int(e['node1_policy']['fee']) + int(e['node2_policy']['fee']))
print(minimum)
Output:
{'channel_id': '1', 'node1': 'Alice', 'node2': 'Bob', 'node1_policy': {'fee': '1000'}, 'node2_policy': {'fee': '1000'}}

manipulating json in python using recursion

All,
I am trying to change the way some json looks by going through and formatting it in the following way:
1. flatten all of the fields lists
2. Then remove the fields lists and replace them with the name : flatten list
Example:
{
"name": "",
"fields": [{
"name": "keys",
"fields": [{
"node-name": "0/0/CPU0"
},
{
"interface-name": "TenGigE0/0/0/47"
},
{
"device-id": "ASR9K-H1902.corp.cisco.com"
}
]
},
{
"name": "content",
"fields": [{
"name": "lldp-neighbor",
"fields": [{
"receiving-interface-name": "TenGigE0/0/0/47"
},
{
"receiving-parent-interface-name": "Bundle-Ether403"
},
{
"device-id": "ASR9K-H1902.corp.cisco.com"
},
{
"chassis-id": "78ba.f975.a64f"
},
{
"port-id-detail": "Te0/1/0/4/0"
},
{
"header-version": 0
},
{
"hold-time": 120
},
{
"enabled-capabilities": "R"
},
{
"platform": ""
}
]
}]
}
]
}
Would turn into:
{
"": [{
"keys": [{
"node-name": "0/0/CPU0",
"interface-name": "TenGigE0/0/0/47",
"device-id": "ASR9K-H1902.corp.cisco.com"
}]
},
{
"content": [{
"lldp-neighbor": [{
"receiving-interface-name": "TenGigE0/0/0/47",
"receiving-parent-interface-name": "Bundle-Ether403",
"device-id": "ASR9K-H1902.corp.cisco.com",
"chassis-id": "78ba.f975.a64f",
"port-id-detail": "Te0/1/0/4/0",
"header-version": 0,
"hold-time": 120,
"enabled-capabilities": "R",
"platform": ""
}]
}]
}
]
}
I have tried the following to get the list flattened:
def _flatten_fields(self, fields_list):
c = {}
for b in [d for d in fields_list if bool(d)]:
c.update(b)
return c
This seems to work but I can't figure out a way to get into the sub levels using recursion, I am saving all flatten lists and names into a new dictionary, is there a way to do it by just manipulating the original dictionary?

This worked on the example you provided:
import json
def flatten(data):
result = dict()
if isinstance(data, dict):
if 'name' in data:
name = data['name']
result[name] = flatten(data['fields'])
else:
key = data.keys()[0]
value = data.values()[0]
result[key] = value
else:
for entry in data:
result.update(flatten(entry))
return result
print json.dumps(flatten(data), indent=4)
Output
{
"": {
"keys": {
"node-name": "0/0/CPU0",
"interface-name": "TenGigE0/0/0/47",
"device-id": "ASR9K-H1902.corp.cisco.com"
},
"content": {
"lldp-neighbor": {
"receiving-interface-name": "TenGigE0/0/0/47",
"receiving-parent-interface-name": "Bundle-Ether403",
"header-version": 0,
"port-id-detail": "Te0/1/0/4/0",
"chassis-id": "78ba.f975.a64f",
"platform": "",
"device-id": "ASR9K-H1902.corp.cisco.com",
"hold-time": 120,
"enabled-capabilities": "R"
}
}
}
}
It doesn't have the extra list layers shown in your expected output, but I don't think you want those.

This worked on the example you provided:
def flatten_fields(fields_list):
c = {}
for item in fields_list:
for key in item:
if key == "fields":
c[item["name"]] = flatten_fields(item["fields"])
elif key != "name":
c[key] = item[key]
break
return [c]
But it works on a list of dictionaries, so you should call it like flatten_fields([data])[0].
The output is:
{
"": [{
"keys": [{
"node-name": "0/0/CP0",
"interface-name": "TenGigE0/0/0/47",
"device-id": "ASR9K-H1902.corp.cisco.com"
}],
"content": [{
"lldp-neighbor": [{
"chassis-id": "78ba.f975.a64f",
"receiving-parent-interface-name": "Bndle-Ether403",
"enabled-capabilities": "R",
"device-id": "ASR9K-H1902.corp.cisco.com",
"hold-time": 120,
"receiving-interface-name": "TenGigE0/0/0/47",
"platform": "",
"header-version": 0,
"port-id-detail": "Te0/1/0/4/0"
}]
}]
}]
}

Develop Reference

Python is a programming language that lets you work quickly and integrate systems more effectively.

From Python dataframe to nested dictionary - python

Related

Flattening Multi-Level Nested Object to DataFrame

Python - having trouble selecting single value from json data

Create dictionary of dictionary from each row and export each row as a json file in python

Identify the nested object with min value in a JSON array

manipulating json in python using recursion

Categories

Resources