Transform data from dataframe to json use pandas

Transform data from dataframe to json use pandas - python

How transform data from dataframe
pd.DataFrame(
[
['2021-12-14 12:00:00','subgroup_1','group_1','Subgroup 1'],
['2021-12-14 12:15:00','subgroup_1','group_1','Subgroup 1'],
['2021-12-14 12:15:00','subgroup_1','group_1','Subgroup 1'],
['2021-12-14 12:30:00','subgroup_1','group_1','Subgroup 1'],
['2021-12-14 12:45:00','subgroup_1','group_1','Subgroup 1'],
['2021-12-14 13:00:00','subgroup_1','group_1','Subgroup 1'],
['2021-12-14 12:30:00','subgroup_3','group_2','Subgroup 3'],
['2021-12-14 12:45:00','subgroup_3','group_2','Subgroup 3'],
['2021-12-14 13:00:00','subgroup_3','group_2','Subgroup 3'],
], columns=['timestamp','subgroup','group','name']
)
to json
use pandas. Please, help me.

My solution :
df["timestamp"] = pd.to_datetime(df.timestamp)
out = {}
for k, df_group in df.groupby("group"):
out[k] = []
for _, df_subgroup in df_group.groupby("subgroup"):
name = df_subgroup["name"].values[0] # Assuming `name` is unique in this group
count = len(df_subgroup)
timegroup = [
{"index": k, "value": v}
for k, v in df_subgroup.groupby("timestamp")
.name.count()
.to_dict()
.items()
]
# Complete with missing timestamps
for ts in df.timestamp.unique():
if ts not in [t["index"] for t in timegroup]:
timegroup.append({"index": pd.Timestamp(ts), "value": 0})
# Sort by timestamp
timegroup = sorted(timegroup, key=lambda x: x["index"])
# Convert timestamp into strings
for t in timegroup:
t["index"] = t["index"].strftime("%Y-%m-%d %H:%M:%S")
out[k].append({"name": name, "count": count, "timegroup": timegroup})
Result in out :
{
"group_1": [
{
"name": "Subgroup 1",
"count": 6,
"timegroup": [
{
"index": "2021-12-14 12:00:00",
"value": 1
},
{
"index": "2021-12-14 12:15:00",
"value": 2
},
{
"index": "2021-12-14 12:30:00",
"value": 1
},
{
"index": "2021-12-14 12:45:00",
"value": 1
},
{
"index": "2021-12-14 13:00:00",
"value": 1
}
]
}
],
"group_2": [
{
"name": "Subgroup 3",
"count": 3,
"timegroup": [
{
"index": "2021-12-14 12:00:00",
"value": 0
},
{
"index": "2021-12-14 12:15:00",
"value": 0
},
{
"index": "2021-12-14 12:30:00",
"value": 1
},
{
"index": "2021-12-14 12:45:00",
"value": 1
},
{
"index": "2021-12-14 13:00:00",
"value": 1
}
]
}
]
}

Here to get your desire output I've applied 4 steps:
Code:
#STEP 1 ---- to group by group
df = df.groupby(['group','name']).agg(list).reset_index('name')
#STEP 2 ---Add insert column which will be the total of timstamp list elements
df['count'] = df.apply(lambda x: len(x.timestamp),axis=1)
#STEP 3 --timestamp list value element to dictionary where add the index and value
for r,v in enumerate(df.index):
l=[]
for i in set(df['timestamp'][r]):
l.append({'index' : i, 'value' : df['timestamp'][r].count(i)})
df.at[v, 'timestamp'] = l
#STEP 4 --CONVERTNG TO JSON BY INDEX
[json.loads(df[['name','count','timestamp']].to_json(orient="index"))]
Output:
[{'group_1': {'name': 'Subgroup 1',
'count': 6,
'timestamp': [{'index': '2021-12-14 12:00:00', 'value': 1},
{'index': '2021-12-14 12:30:00', 'value': 1},
{'index': '2021-12-14 13:00:00', 'value': 1},
{'index': '2021-12-14 12:15:00', 'value': 2},
{'index': '2021-12-14 12:45:00', 'value': 1}]},
'group_2': {'name': 'Subgroup 3',
'count': 3,
'timestamp': [{'index': '2021-12-14 12:30:00', 'value': 1},
{'index': '2021-12-14 12:45:00', 'value': 1},
{'index': '2021-12-14 13:00:00', 'value': 1}]}}]

Related

How can I convert a Data Frame to {"key":"xxxx","value":"xxxx"} structure?

Let's say I have a Data Frame as follows:
id name dob
0 1 Joe 16 Jun 1999
1 2 John 04 Aug 1997
Now I want this data frame to be transformed as follows:
{
"items": [
{
"id": "1",
"attributes": [
{
"key": "name",
"value": "Joe"
},
{
"key": "dob",
"value": "16 Jun 1999"
}
]
},
{
"id": "2",
"attributes": [
{
"key": "name",
"value": "John"
},
{
"key": "dob",
"value": "04 Aug 1997"
}
]
}
]
}
I've tried the following code. It's working fine. I'm getting my desired output. But I'm trying to figure out if there's an easy way to do it, you know less code and more efficiency.
import pandas as pd
def transform(row):
op = {}
op["id"] = row["id"]
op["attributes"] = []
for col in cols:
op["attributes"].append({"key":col,"value":row[col]})
return op
data = [{"id":"1","name":"Joe","dob":"16 Jun 1999"},{"id":"2","name":"John","dob":"04 Aug 1997"}]
df = pd.DataFrame(data)
cols = df.columns.delete(0) #deleting id column
op = {"items":df.apply(transform, axis=1).tolist()}
print(op)
Can anyone has any ideas?

Here is a solution you can give it a try, using list comprehension
dict_ = df.set_index('id').to_dict(orient='index')
items = {"items": [
{"id": k, "attributes": [{"key": i, "value": j} for i, j in v.items()]}
for k, v in dict_.items()
]}
print(items)
{'items': [{'attributes': [{'key': 'name', 'value': 'Joe'},
{'key': 'dob', 'value': '16 Jun 1999'}],
'id': '1'},
{'attributes': [{'key': 'name', 'value': 'John'},
{'key': 'dob', 'value': '04 Aug 1997'}],
'id': '2'}]}

How to get count for a particular key in the dictionary with values as list

My content inside a dictionary is below
I need to know count for 1. BusinessArea and its count of values
Designation and its count of values
test= [ { 'masterid': '1', 'name': 'Group1', 'BusinessArea': [ 'Accounting','Research'], 'Designation': [ 'L1' 'L2' ] }, { 'masterid': '2', 'name': 'Group1', 'BusinessArea': ['Research','Accounting' ], 'Role': [ { 'id': '5032', 'name': 'Tester' }, { 'id': '5033', 'name': 'Developer' } ], 'Designation': [ 'L1' 'L2' ]}, { 'masterid': '3', 'name': 'Group1', 'BusinessArea': [ 'Engineering' ], 'Role': [ { 'id': '5032', 'name': 'Developer' }, { 'id': '5033', 'name': 'Developer', 'parentname': '' } ], 'Designation': [ 'L1' ]}]
I want to get the count of masterid of BusinessArea and Designation which is all the names
Expected out is below
[
{
"name": "BusinessArea",
"values": [
{
"name": "Accounting",
"count": "2"
},
{
"name": "Research",
"count": "2"
},
{
"name": "Engineering",
"count": "1"
}
]
},
{
"name": "Designation",
"values": [
{
"name": "L1",
"count": "3"
},
{
"name": "l2",
"count": "2"
}
]
}
]
masterid 1,2 and 3 there are L1 and masterid 1 and 2 there are L2 so for L1:3, and L2:2

something like the below (not exactly the output you mentioned but quite close..)
from collections import defaultdict
test = [{'masterid': '1', 'name': 'Group1', 'BusinessArea': ['Accounting', 'Research'], 'Designation': ['L1', 'L2']},
{'masterid': '2', 'name': 'Group1', 'BusinessArea': ['Research', 'Accounting'],
'Role': [{'id': '5032', 'name': 'Tester'}, {'id': '5033', 'name': 'Developer'}], 'Designation': ['L1', 'L2']},
{'masterid': '3', 'name': 'Group1', 'BusinessArea': ['Engineering'],
'Role': [{'id': '5032', 'name': 'Developer'}, {'id': '5033', 'name': 'Developer', 'parentname': ''}],
'Designation': ['L1']}]
b_area = defaultdict(int)
des = defaultdict(int)
for entry in test:
for val in entry['BusinessArea']:
b_area[val] += 1
for val in entry['Designation']:
des[val] += 1
print(b_area)
print(des)
output
defaultdict(<class 'int'>, {'Accounting': 2, 'Research': 2, 'Engineering': 1})
defaultdict(<class 'int'>, {'L1': 3, 'L2': 2})

Merge 2 lists and remove duplicates in Python

I have 2 lists, looking like:
temp_data:
{
"id": 1,
"name": "test (replaced)",
"code": "test",
"last_update": "2020-01-01",
"online": false,
"data": {
"temperature": [
{
"date": "2019-12-17",
"value": 23.652905748126333
},
...
]}
hum_data:
{
"id": 1,
"name": "test (replaced)",
"code": "test",
"last_update": "2020-01-01",
"online": false,
"data": {
"humidity": [
{
"date": "2019-12-17",
"value": 23.652905748126333
},
...
]}
I need to merge the 2 lists to 1 without duplicating data. What is the easiest/efficient way? After merging, I want something like this:
{
"id": 1,
"name": "test",
"code": "test",
"last_update": "2020-01-01",
"online": false,
"data": {
"temperature": [
{
"date": "2019-12-17",
"value": 23.652905748126333
},
...
],
"humidity": [
{
"date": "2019-12-17",
"value": 23.652905748126333
},
...
Thanks for helping.

If your lists hum_data and temp_data are not sorted then first sort them and then concatenate the dictionaries pair-wise.
# To make comparisons for sorting
compare_function = lambda value : value['id']
# sort arrays before to make later concatenation easier
temp_data.sort(key=compare_function)
hum_data.sort(key=compare_function)
combined_data = temp_data.copy()
# concatenate the dictionries using the update function
for hum_row, combined_row in zip(hum_data, combined_data):
combined_row['data'].update(hum_row['data'])
# combined hum_data and temp_data
combined_data
If the lists are already sorted then you just need to concatenate dictionary by dictionary.
combined_data = temp_data.copy()
# concatenate the dictionries using the update function
for hum_row, combined_row in zip(hum_data, combined_data):
combined_row['data'].update(hum_row['data'])
# combined hum_data and temp_data
combined_data
With that code I got the following result:
[
{
'id': 1,
'name': 'test (replaced)',
'code': 'test',
'last_update': '2020-01-01',
'online': False,
'data': {
'temperature': [{'date': '2019-12-17', 'value': 1}],
'humidity': [{'date': '2019-12-17', 'value': 1}]}
},
{
'id': 2,
'name': 'test (replaced)',
'code': 'test',
'last_update': '2020-01-01',
'online': False,
'data': {
'temperature': [{'date': '2019-12-17', 'value': 2}],
'humidity': [{'date': '2019-12-17', 'value': 2}]}
}
]

Python - Add multiple values in dictionary / json value

I'm building a python application which receives REST response in below format:
[
{
'metric': 'pass_status',
'history': [
{
'date': '2019-02-20T10:26:52+0000',
'value': 'OK'
},
{
'date': '2019-03-13T11:37:39+0000',
'value': 'FAIL'
},
{
'date': '2019-03-13T12:00:57+0000',
'value': 'OK'
}
]
},
{
'metric': 'bugs',
'history': [
{
'date': '2019-02-20T10:26:52+0000',
'value': '1'
},
{
'date': '2019-03-13T11:37:39+0000',
'value': '6'
},
{
'date': '2019-03-13T12:00:57+0000',
'value': '2'
}
]
},
{
'metric': 'code_smells',
'history': [
{
'date': '2019-02-20T10:26:52+0000',
'value': '0'
},
{
'date': '2019-03-13T11:37:39+0000',
'value': '1'
},
{
'date': '2019-03-13T12:00:57+0000',
'value': '2'
}
]
}
]
You can see dates are same within for each metric.
I want to collate this data date-wise, i.e. my result json/dictionary should look like:
[
'2019-02-20T10:26:52+0000' : {
'pass_status' : 'OK',
'bugs' : '1',
'code_smells' : '0'
},
'2019-03-13T11:37:39+0000' : {
'pass_status' : 'FAIL',
'bugs' : '6',
'code_smells' : '1'
},
'2019-03-13T11:37:39+0000' : {
'pass_status' : 'OK',
'bugs' : '2',
'code_smells' : '2'
}
]
What will be the suggested approach to do this?
Thanks

I tried some itertools.groupby magic, but it turned into a mess...
maybe iteration + defaultdict is just keeping it simple...
like this:
from collections import defaultdict
result = defaultdict(dict)
for metric_dict in data:
metric_name = metric_dict['metric']
for entry in metric_dict['history']:
result[entry['date']][metric_name] = entry['value']
print(dict(result))
or a full example with the data:
data = [
{
'metric': 'pass_status',
'history': [
{
'date': '2019-02-20T10:26:52+0000',
'value': 'OK'
},
{
'date': '2019-03-13T11:37:39+0000',
'value': 'FAIL'
},
{
'date': '2019-03-13T12:00:57+0000',
'value': 'OK'
}
]
},
{
'metric': 'bugs',
'history': [
{
'date': '2019-02-20T10:26:52+0000',
'value': '1'
},
{
'date': '2019-03-13T11:37:39+0000',
'value': '6'
},
{
'date': '2019-03-13T12:00:57+0000',
'value': '2'
}
]
},
{
'metric': 'code_smells',
'history': [
{
'date': '2019-02-20T10:26:52+0000',
'value': '0'
},
{
'date': '2019-03-13T11:37:39+0000',
'value': '1'
},
{
'date': '2019-03-13T12:00:57+0000',
'value': '2'
}
]
}
]
from collections import defaultdict
result = defaultdict(dict)
for metric_dict in data:
metric_name = metric_dict['metric']
for entry in metric_dict['history']:
result[entry['date']][metric_name] = entry['value']
print(result)

Sum same object in many list in Python

I'm a beginner in Python. How to sum the same object id in many list Python? I have sample data of this.
data = [
[
{
'id': 1,
'count': 10
},
{
'id': 2,
'count': 20
},
],
[
{
'id': 1,
'count': 20
},
{
'id': 2,
'count': 30
},
]
]
How to sum count of same id, so I can get:
data = [
{
'id': 1,
'count': 30
},
{
'id': 2,
'count': 50
},
]

Try using pandas:
import pandas as pd
df = pd.DataFrame(sum(data, [])) # flatten the data
df = df.groupby('id').sum()
d = [{'id': index, 'count': row['count']} for index, row in df.iterrows()]

This isn't the optimal solution, but it works.
data = [
[
{
'id': 1,
'count': 10
},
{
'id': 2,
'count': 20
},
],
[
{
'id': 1,
'count': 20
},
{
'id': 2,
'count': 30
},
]
]
sumofdata = []
doneids = []
for i in data:
for j in i:
if j["id"] in doneids:
for d in sumofdata:
if d["id"] == j["id"]:
d["count"] += j["count"]
break
else:
doneids.append(j["id"])
sumofdata.append(j)
print(sumofdata)

Develop Reference

Python is a programming language that lets you work quickly and integrate systems more effectively.

Transform data from dataframe to json use pandas - python

Related

How can I convert a Data Frame to {"key":"xxxx","value":"xxxx"} structure?

How to get count for a particular key in the dictionary with values as list

Merge 2 lists and remove duplicates in Python

Python - Add multiple values in dictionary / json value

Sum same object in many list in Python

Categories

Resources