I'm a beginner in Python. How to sum the same object id in many list Python? I have sample data of this.
data = [
[
{
'id': 1,
'count': 10
},
{
'id': 2,
'count': 20
},
],
[
{
'id': 1,
'count': 20
},
{
'id': 2,
'count': 30
},
]
]
How to sum count of same id, so I can get:
data = [
{
'id': 1,
'count': 30
},
{
'id': 2,
'count': 50
},
]
Try using pandas:
import pandas as pd
df = pd.DataFrame(sum(data, [])) # flatten the data
df = df.groupby('id').sum()
d = [{'id': index, 'count': row['count']} for index, row in df.iterrows()]
This isn't the optimal solution, but it works.
data = [
[
{
'id': 1,
'count': 10
},
{
'id': 2,
'count': 20
},
],
[
{
'id': 1,
'count': 20
},
{
'id': 2,
'count': 30
},
]
]
sumofdata = []
doneids = []
for i in data:
for j in i:
if j["id"] in doneids:
for d in sumofdata:
if d["id"] == j["id"]:
d["count"] += j["count"]
break
else:
doneids.append(j["id"])
sumofdata.append(j)
print(sumofdata)
Related
How transform data from dataframe
pd.DataFrame(
[
['2021-12-14 12:00:00','subgroup_1','group_1','Subgroup 1'],
['2021-12-14 12:15:00','subgroup_1','group_1','Subgroup 1'],
['2021-12-14 12:15:00','subgroup_1','group_1','Subgroup 1'],
['2021-12-14 12:30:00','subgroup_1','group_1','Subgroup 1'],
['2021-12-14 12:45:00','subgroup_1','group_1','Subgroup 1'],
['2021-12-14 13:00:00','subgroup_1','group_1','Subgroup 1'],
['2021-12-14 12:30:00','subgroup_3','group_2','Subgroup 3'],
['2021-12-14 12:45:00','subgroup_3','group_2','Subgroup 3'],
['2021-12-14 13:00:00','subgroup_3','group_2','Subgroup 3'],
], columns=['timestamp','subgroup','group','name']
)
to json
use pandas. Please, help me.
My solution :
df["timestamp"] = pd.to_datetime(df.timestamp)
out = {}
for k, df_group in df.groupby("group"):
out[k] = []
for _, df_subgroup in df_group.groupby("subgroup"):
name = df_subgroup["name"].values[0] # Assuming `name` is unique in this group
count = len(df_subgroup)
timegroup = [
{"index": k, "value": v}
for k, v in df_subgroup.groupby("timestamp")
.name.count()
.to_dict()
.items()
]
# Complete with missing timestamps
for ts in df.timestamp.unique():
if ts not in [t["index"] for t in timegroup]:
timegroup.append({"index": pd.Timestamp(ts), "value": 0})
# Sort by timestamp
timegroup = sorted(timegroup, key=lambda x: x["index"])
# Convert timestamp into strings
for t in timegroup:
t["index"] = t["index"].strftime("%Y-%m-%d %H:%M:%S")
out[k].append({"name": name, "count": count, "timegroup": timegroup})
Result in out :
{
"group_1": [
{
"name": "Subgroup 1",
"count": 6,
"timegroup": [
{
"index": "2021-12-14 12:00:00",
"value": 1
},
{
"index": "2021-12-14 12:15:00",
"value": 2
},
{
"index": "2021-12-14 12:30:00",
"value": 1
},
{
"index": "2021-12-14 12:45:00",
"value": 1
},
{
"index": "2021-12-14 13:00:00",
"value": 1
}
]
}
],
"group_2": [
{
"name": "Subgroup 3",
"count": 3,
"timegroup": [
{
"index": "2021-12-14 12:00:00",
"value": 0
},
{
"index": "2021-12-14 12:15:00",
"value": 0
},
{
"index": "2021-12-14 12:30:00",
"value": 1
},
{
"index": "2021-12-14 12:45:00",
"value": 1
},
{
"index": "2021-12-14 13:00:00",
"value": 1
}
]
}
]
}
Here to get your desire output I've applied 4 steps:
Code:
#STEP 1 ---- to group by group
df = df.groupby(['group','name']).agg(list).reset_index('name')
#STEP 2 ---Add insert column which will be the total of timstamp list elements
df['count'] = df.apply(lambda x: len(x.timestamp),axis=1)
#STEP 3 --timestamp list value element to dictionary where add the index and value
for r,v in enumerate(df.index):
l=[]
for i in set(df['timestamp'][r]):
l.append({'index' : i, 'value' : df['timestamp'][r].count(i)})
df.at[v, 'timestamp'] = l
#STEP 4 --CONVERTNG TO JSON BY INDEX
[json.loads(df[['name','count','timestamp']].to_json(orient="index"))]
Output:
[{'group_1': {'name': 'Subgroup 1',
'count': 6,
'timestamp': [{'index': '2021-12-14 12:00:00', 'value': 1},
{'index': '2021-12-14 12:30:00', 'value': 1},
{'index': '2021-12-14 13:00:00', 'value': 1},
{'index': '2021-12-14 12:15:00', 'value': 2},
{'index': '2021-12-14 12:45:00', 'value': 1}]},
'group_2': {'name': 'Subgroup 3',
'count': 3,
'timestamp': [{'index': '2021-12-14 12:30:00', 'value': 1},
{'index': '2021-12-14 12:45:00', 'value': 1},
{'index': '2021-12-14 13:00:00', 'value': 1}]}}]
I have below list of dicts
[{
'NAV': 50,
'id': '61e6b2a1d0c32b744d3e3b2d'
}, {
'NAV': 25,
'id': '61e7fbe2d0c32b744d3e6ab4'
}, {
'NAV': 30,
'id': '61e801cbd0c32b744d3e7003'
}, {
'NAV': 30,
'id': '61e80663d0c32b744d3e7c51'
}, {
'NAV': 30,
'id': '61e80d9ad0c32b744d3e8da6'
}, {
'NAV': 30,
'id': '61e80f5fd0c32b744d3e93f0'
}, {
'NAV': 30,
'id': '61e90908d0c32b744d3ea967'
}, {
'NAV': 30,
'id': '61ea7cf3d0c32b744d3ed1b2'
}, {
'NAV': 50,
'id': '61fa387127e14670f3a67194'
}, {
'NAV': 30,
'id': '61fa3cea27e14670f3a6772c'
}, {
'Amount': 30,
'id': '61e6b373d0c32b744d3e3d14'
}, {
'Amount': 30,
'id': '61e6b49cd0c32b744d3e3ea0'
}, {
'Amount': 25,
'id': '61e7fe90d0c32b744d3e6ccd'
}, {
'Amount': 20,
'id': '61e80246d0c32b744d3e7242'
}, {
'Amount': 20,
'id': '61e80287d0c32b744d3e74ae'
}, {
'Amount': 20,
'id': '61e80253d0c32b744d3e733e'
}, {
'Amount': 34,
'id': '61e80697d0c32b744d3e7edd'
}, {
'Amount': 20,
'id': '61e806a3d0c32b744d3e7ff9'
}, {
'Amount': 30,
'id': '61e80e0ad0c32b744d3e906e'
}, {
'Amount': 30,
'id': '61e80e22d0c32b744d3e9198'
}, {
'Amount': 20,
'id': '61e81011d0c32b744d3e978e'
}, {
'Amount': 20,
'id': '61e8104bd0c32b744d3e9a92'
}, {
'Amount': 20,
'id': '61e81024d0c32b744d3e98cd'
}, {
'Amount': 20,
'id': '61e90994d0c32b744d3eac2b'
}, {
'Amount': 20,
'id': '61e909aad0c32b744d3ead76'
}, {
'Amount': 50,
'id': '61fa392a27e14670f3a67337'
}, {
'Amount': 50,
'id': '61fa393727e14670f3a67347'
}, {
'Amount': 50,
'id': '61fa3d6727e14670f3a67750'
}, {
'Amount': 150,
'id': '61fa3d7127e14670f3a67760'
}]
Above list contains dict which has key as NAV and Amount. I need to find the max value separately among all the dicts for NAV and Amount. So that output is
NAV = 50
Amount = 150
I have tried some approach like:
max(outList, key=lambda x: x['NAV'])
But this is giving me keyerror of 'NAV'. What is the best way to do it?
I don't understand why you are calling Current NAV ($ M). It doesn't exist in the list you have provided. Anyway, I came up with the code below:
def getMax(value):
if "NAV" in value:
return value["NAV"]
else:
return value["Amount"]
max(outList, key= getMax)
If you are interested in finding the max value for NAV and Amount separately, you can try filtering the list out and then calling the lambda as you used before.
print(max([x["NAV"] for x in outList if "NAV" in x]))
print(max([x["Amount"] for x in outList if "Amount" in x])
you could try something like this:
print max([i["NAV"] for i in t if "NAV" in i])
print max([i["Amount"] for i in t if "Amount" in i])
Result:
50
150
def max_from_list(t_list, key):
return max([i[key] for i in t_list if key in i])
if you are not only looking for NAV and Amount, maybe you can do as below:
from collections import defaultdict
res = defaultdict(list)
for i in d:
for k, v in i.items():
if k != 'id':
res[k] = max(res.get(k, 0), v)
You are on the right track with your max solution:
assuming your list is called outList (BTW, that is not a pythonic name, try out_list instead)
nav = max(outList, key=lambda item: item.get("NAV", float("-inf")))['NAV']
amount = max(outList, key=lambda item: item.get("Amount", float("-inf")))['Amount']
I want to eliminate repeated elements from list and return number of time element is repeated
Even though i'm using list but in json format Counter throws error
if dict inside list cannot be used here please let me know any fast alternative (because len(all_response) would be in 6 digit )
here is my code:
from collections import Counter
all_response = [
{
"stock_id": 315,
"product_id": 315
},
{
"stock_id": 315,
"product_id": 315
},
{
"stock_id": 1,
"product_id": 1
},
{
"stock_id": 2,
"product_id": 2
},
{
"stock_id": 2,
"product_id": 2
},
{
"stock_id": 6,
"product_id": 6
}]
stock_ids = []
d = Counter(all_response)
for i in all_response:
if i['stock_id'] not in stock_ids:
stock_ids.append({'stock_id':i['stock_id']})
stock_ids.append({'product_count': d[i['stock_id']]})
print(stock_ids)
Expected Output:
[
{
"stock_id": 315,
"product_count": 2
},{
"stock_id": 1,
"product_count": 1
},
{
"stock_id": 2,
"product_count": 2
},
{
"stock_id": 6,
"product_count": 1
}]
original_list = [{'stock_id': 315, 'product_id': 315}, {'stock_id': 1, 'product_id': 1}, {'stock_id': 2, 'product_id': 2}, {'stock_id': 2, 'product_id': 2}, {'stock_id': 6, 'product_id': 6}]
intermediate_result = {}
for i in original_list:
if i["stock_id"] in intermediate_result.keys():
intermediate_result[i["stock_id"]] = intermediate_result[i["stock_id"]]+1
else:
intermediate_result[i["stock_id"]] = 1
result = []
for k,v in intermediate_result.items():
result.append({"stock_id": k, "count": v})
print(result)
[{'stock_id': 315, 'count': 1}, {'stock_id': 1, 'count': 1}, {'stock_id': 2, 'count': 2}, {'stock_id': 6, 'count': 1}]
About your code:
stock_ids = []
d = Counter(all_response)
for i in all_response:
# here is not okay. stock_ids is list of dictionaries
# so you comparing integer id with one of dictionaries
if i['stock_id'] not in stock_ids:
# here is half okay (you appending only id key, without count key)
stock_ids.append({'stock_id':i['stock_id']})
# here is not okay, because you trying to append new element to list
# you should append only once if you want only one dictionary element
stock_ids.append({'product_count': d[i['stock_id']]})
Like the error message says, Counter can't accept a dict as input. But you can create a list of items from the key you are interested in.
d = Counter(x['stock_id'] for x in all_response)
stock_ids = [{'product_id': x, 'product_count': d[x]} for x in d]
My content inside a dictionary is below
I need to know count for 1. BusinessArea and its count of values
Designation and its count of values
test= [ { 'masterid': '1', 'name': 'Group1', 'BusinessArea': [ 'Accounting','Research'], 'Designation': [ 'L1' 'L2' ] }, { 'masterid': '2', 'name': 'Group1', 'BusinessArea': ['Research','Accounting' ], 'Role': [ { 'id': '5032', 'name': 'Tester' }, { 'id': '5033', 'name': 'Developer' } ], 'Designation': [ 'L1' 'L2' ]}, { 'masterid': '3', 'name': 'Group1', 'BusinessArea': [ 'Engineering' ], 'Role': [ { 'id': '5032', 'name': 'Developer' }, { 'id': '5033', 'name': 'Developer', 'parentname': '' } ], 'Designation': [ 'L1' ]}]
I want to get the count of masterid of BusinessArea and Designation which is all the names
Expected out is below
[
{
"name": "BusinessArea",
"values": [
{
"name": "Accounting",
"count": "2"
},
{
"name": "Research",
"count": "2"
},
{
"name": "Engineering",
"count": "1"
}
]
},
{
"name": "Designation",
"values": [
{
"name": "L1",
"count": "3"
},
{
"name": "l2",
"count": "2"
}
]
}
]
masterid 1,2 and 3 there are L1 and masterid 1 and 2 there are L2 so for L1:3, and L2:2
something like the below (not exactly the output you mentioned but quite close..)
from collections import defaultdict
test = [{'masterid': '1', 'name': 'Group1', 'BusinessArea': ['Accounting', 'Research'], 'Designation': ['L1', 'L2']},
{'masterid': '2', 'name': 'Group1', 'BusinessArea': ['Research', 'Accounting'],
'Role': [{'id': '5032', 'name': 'Tester'}, {'id': '5033', 'name': 'Developer'}], 'Designation': ['L1', 'L2']},
{'masterid': '3', 'name': 'Group1', 'BusinessArea': ['Engineering'],
'Role': [{'id': '5032', 'name': 'Developer'}, {'id': '5033', 'name': 'Developer', 'parentname': ''}],
'Designation': ['L1']}]
b_area = defaultdict(int)
des = defaultdict(int)
for entry in test:
for val in entry['BusinessArea']:
b_area[val] += 1
for val in entry['Designation']:
des[val] += 1
print(b_area)
print(des)
output
defaultdict(<class 'int'>, {'Accounting': 2, 'Research': 2, 'Engineering': 1})
defaultdict(<class 'int'>, {'L1': 3, 'L2': 2})
I'm building a python application which receives REST response in below format:
[
{
'metric': 'pass_status',
'history': [
{
'date': '2019-02-20T10:26:52+0000',
'value': 'OK'
},
{
'date': '2019-03-13T11:37:39+0000',
'value': 'FAIL'
},
{
'date': '2019-03-13T12:00:57+0000',
'value': 'OK'
}
]
},
{
'metric': 'bugs',
'history': [
{
'date': '2019-02-20T10:26:52+0000',
'value': '1'
},
{
'date': '2019-03-13T11:37:39+0000',
'value': '6'
},
{
'date': '2019-03-13T12:00:57+0000',
'value': '2'
}
]
},
{
'metric': 'code_smells',
'history': [
{
'date': '2019-02-20T10:26:52+0000',
'value': '0'
},
{
'date': '2019-03-13T11:37:39+0000',
'value': '1'
},
{
'date': '2019-03-13T12:00:57+0000',
'value': '2'
}
]
}
]
You can see dates are same within for each metric.
I want to collate this data date-wise, i.e. my result json/dictionary should look like:
[
'2019-02-20T10:26:52+0000' : {
'pass_status' : 'OK',
'bugs' : '1',
'code_smells' : '0'
},
'2019-03-13T11:37:39+0000' : {
'pass_status' : 'FAIL',
'bugs' : '6',
'code_smells' : '1'
},
'2019-03-13T11:37:39+0000' : {
'pass_status' : 'OK',
'bugs' : '2',
'code_smells' : '2'
}
]
What will be the suggested approach to do this?
Thanks
I tried some itertools.groupby magic, but it turned into a mess...
maybe iteration + defaultdict is just keeping it simple...
like this:
from collections import defaultdict
result = defaultdict(dict)
for metric_dict in data:
metric_name = metric_dict['metric']
for entry in metric_dict['history']:
result[entry['date']][metric_name] = entry['value']
print(dict(result))
or a full example with the data:
data = [
{
'metric': 'pass_status',
'history': [
{
'date': '2019-02-20T10:26:52+0000',
'value': 'OK'
},
{
'date': '2019-03-13T11:37:39+0000',
'value': 'FAIL'
},
{
'date': '2019-03-13T12:00:57+0000',
'value': 'OK'
}
]
},
{
'metric': 'bugs',
'history': [
{
'date': '2019-02-20T10:26:52+0000',
'value': '1'
},
{
'date': '2019-03-13T11:37:39+0000',
'value': '6'
},
{
'date': '2019-03-13T12:00:57+0000',
'value': '2'
}
]
},
{
'metric': 'code_smells',
'history': [
{
'date': '2019-02-20T10:26:52+0000',
'value': '0'
},
{
'date': '2019-03-13T11:37:39+0000',
'value': '1'
},
{
'date': '2019-03-13T12:00:57+0000',
'value': '2'
}
]
}
]
from collections import defaultdict
result = defaultdict(dict)
for metric_dict in data:
metric_name = metric_dict['metric']
for entry in metric_dict['history']:
result[entry['date']][metric_name] = entry['value']
print(result)