Convert a nested list of strings into a data frame - python

I have JSON file containing something like this.
[7500,
'29-Dec-2022',
{'strikePrice': 7500, 'expiryDate': '29-Dec-2022', 'underlying': 'NIFTY', 'identifier': 'OPTIDXNIFTY29-12-2022PE7500.00', 'openInterest': 21, 'changeinOpenInterest': 0, 'pchangeinOpenInterest': 0, 'totalTradedVolume': 0, 'impliedVolatility': 0, 'lastPrice': 8.6, 'change': 0, 'pChange': 0, 'totalBuyQuantity': 1800, 'totalSellQuantity': 0, 'bidQty': 1800, 'bidprice': 3.05, 'askQty': 0, 'askPrice': 0, 'underlyingValue': 17287.05},
8300,
'30-Jun-2022',
{'strikePrice': 8300, 'expiryDate': '30-Jun-2022', 'underlying': 'NIFTY', 'identifier': 'OPTIDXNIFTY30-06-2022PE8300.00', 'openInterest': 3, 'changeinOpenInterest': 0, 'pchangeinOpenInterest': 0, 'totalTradedVolume': 0, 'impliedVolatility': 0, 'lastPrice': 4.7, 'change': 0, 'pChange': 0, 'totalBuyQuantity': 1050, 'totalSellQuantity': 0, 'bidQty': 750, 'bidprice': 0.35, 'askQty': 0, 'askPrice': 0, 'underlyingValue': 17287.05},
8500,
'29-Jun-2023', {'strikePrice': 8500, 'expiryDate': '29-Jun-2023', 'underlying': 'NIFTY', 'identifier': 'OPTIDXNIFTY29-06-2023CE8500.00', 'openInterest': 319.5, 'changeinOpenInterest': 0, 'pchangeinOpenInterest': 0, 'totalTradedVolume': 0, 'impliedVolatility': 0, 'lastPrice': 1775, 'change': 0, 'pChange': 0, 'totalBuyQuantity': 0, 'totalSellQuantity': 50, 'bidQty': 0, 'bidprice': 0, 'askQty': 50, 'askPrice': 9970, 'underlyingValue': 17287.05},
8500,
'29-Dec-2022',
{'strikePrice': 8500, 'expiryDate': '29-Dec-2022', 'underlying': 'NIFTY', 'identifier': 'OPTIDXNIFTY29-12-2022PE8500.00', 'openInterest': 2254, 'changeinOpenInterest': 0, 'pchangeinOpenInterest': 0, 'totalTradedVolume': 0, 'impliedVolatility': 0, 'lastPrice': 22.9, 'change': 0, 'pChange': 0, 'totalBuyQuantity': 2700, 'totalSellQuantity': 0, 'bidQty': 1800, 'bidprice': 3.15, 'askQty': 0, 'askPrice': 0, 'underlyingValue': 17287.05}]
Code:
read_cont = []
new_list1 = []
new_list2 = []
for i in rjson:
for j in rjson[i]:
read_cont.append(rjson[i][j])
data_filter = read_cont[1]
for item in data_filter:
for j in item:
new_list1.append(item[j])
new_list1 = map(str,new_list1)
for i in new_list1:
if len(i) > 100:
new_list2.append(i)
header_names = ["STRIKE PRICE","EXPIRY","underlying", "identifier","OPENINTEREST","changeinOpenInterest","pchangeinOpenInterest", "totalTradedVolume","impliedVolatility","lastPrice","change","pChange", "totalBuyQuantity","totalSellQuantity","bidQty","bidprice","askQty","askPrice","underlyingValue"]
df = pd.DataFrame(columns=header_names)
In order to separate the strikePrice entries from the nested list, I had converted all the items to string
["{'strikePrice': 7500, 'expiryDate': '29-Dec-2022', 'underlying': 'NIFTY', 'identifier': 'OPTIDXNIFTY29-12-2022PE7500.00', 'openInterest': 21, 'changeinOpenInterest': 0, 'pchangeinOpenInterest': 0, 'totalTradedVolume': 0, 'impliedVolatility': 0, 'lastPrice': 8.6, 'change': 0, 'pChange': 0, 'totalBuyQuantity': 1800, 'totalSellQuantity': 0, 'bidQty': 1800, 'bidprice': 3.05, 'askQty': 0, 'askPrice': 0, 'underlyingValue': 17287.05}",
"{'strikePrice': 8300, 'expiryDate': '30-Jun-2022', 'underlying': 'NIFTY', 'identifier': 'OPTIDXNIFTY30-06-2022PE8300.00', 'openInterest': 3, 'changeinOpenInterest': 0, 'pchangeinOpenInterest': 0, 'totalTradedVolume': 0, 'impliedVolatility': 0, 'lastPrice': 4.7, 'change': 0, 'pChange': 0, 'totalBuyQuantity': 1050, 'totalSellQuantity': 0, 'bidQty': 750, 'bidprice': 0.35, 'askQty': 0, 'askPrice': 0, 'underlyingValue': 17287.05}"
Now I want to transfer the content to a data frame containing the below column mention in the code

result_dict = []
result_values = []
for i in range(2, len(input_list), 3):
result_dict.append(input_list[i])
result_values.append(input_list[i].values())
col_names = list(result_dict[0].keys())
result_df = pd.DataFrame(result_values, columns = col_names)

rjson = response.json()
read_cont = []
new_list1 = []
new_list2 = []
for i in rjson:
for j in rjson[i]:
read_cont.append(rjson[i][j])
data_filter = read_cont[1]
for item in data_filter:
for j in item:
new_list1.append(item[j])
for j in new_list1:
if type(j) == dict:
new_list2.append(j)
df = pd.DataFrame(new_list2)

Related

python nested dictionary to pandas DataFrame

main_dict = {
'NSE:ACC': {'average_price': 0,
'buy_quantity': 0,
'depth': {'buy': [{'orders': 0, 'price': 0, 'quantity': 0},
{'orders': 0, 'price': 0, 'quantity': 0},
{'orders': 0, 'price': 0, 'quantity': 0},
{'orders': 0, 'price': 0, 'quantity': 0},
{'orders': 0, 'price': 0, 'quantity': 0}],
'sell': [{'orders': 0, 'price': 0, 'quantity': 0},
{'orders': 0, 'price': 0, 'quantity': 0},
{'orders': 0, 'price': 0, 'quantity': 0},
{'orders': 0, 'price': 0, 'quantity': 0},
{'orders': 0, 'price': 0, 'quantity': 0}]},
'instrument_token': 5633,
'last_price': 2488.9,
'last_quantity': 0,
'last_trade_time': '2022-09-23 15:59:10',
'lower_circuit_limit': 2240.05,
'net_change': 0,
'ohlc': {'close': 2555.7,
'high': 2585.5,
'low': 2472.2,
'open': 2575},
'oi': 0,
'oi_day_high': 0,
'oi_day_low': 0,
'sell_quantity': 0,
'timestamp': '2022-09-23 18:55:17',
'upper_circuit_limit': 2737.75,
'volume': 0},
}
convert dict to pandas dataframe
for example:
symbol last_price net_change Open High Low Close
NSE:ACC 2488.9 0 2575 2585.5 2472.2 2555.7
I am trying pd.DataFrame.from_dict(main_dict)
but it does not work.
please give the best suggestion.
I would first select the necessary data from your dict and then pass that as input to pd.DataFrame()
df_input = [{
"symbol": symbol,
"last_price": main_dict.get(symbol).get("last_price"),
"net_change": main_dict.get(symbol).get("net_change"),
"open": main_dict.get(symbol).get("ohlc").get("open"),
"high": main_dict.get(symbol).get("ohlc").get("high"),
"low": main_dict.get(symbol).get("ohlc").get("low"),
"close": main_dict.get(symbol).get("ohlc").get("close")
} for symbol in main_dict]
import pandas as pd
df = pd.DataFrame(df_input)

Cummulative Dictionary

I am trying to write a python function where for each key (the dates), the value would be the sum of that day's result and the previous day(s) (sort of following the same logic as the fibonacci sequence).
For example, I have:
{20200516: {'Level1': 0, 'Level2': 1, 'Level3': 0, 'Level4': 0}, 20200517: {'Level1': 0, 'Level2': 0, 'Level3': 0, 'Level4': 1}, 20200518: {'Level1': 1, 'Level2': 0, 'Level3': 0, 'Level4': 0}, 20200519: {'Level1': 0, 'Level2': 1, 'Level3': 0, 'Level4': 1}}
but I want to have:
{20200516: {'Level1': 0, 'Level2': 1, 'Level3': 0, 'Level4': 0}, 20200517: {'Level1': 0, 'Level2': 1, 'Level3': 0, 'Level4': 1}, 20200518: {'Level1': 1, 'Level2': 1, 'Level3': 0, 'Level4': 1}, 20200519: {'Level1': 1, 'Level2': 2, 'Level3': 0, 'Level4': 2}
What I have done until now:
def summing(d):
'''
each key after the first one is the sum of the one before and its own result
>>> {20200516: {'Level1': 0, 'Level2': 1, 'Level3': 0, 'Level4': 0}, 20200517: {'Level1': 0,
'Level2': 0, 'Level3': 0, 'Level4': 1}, 20200518: {'Level1': 1, 'Level2': 0, 'Level3':
0, 'Level4': 0}, 20200519: {'Level1': 0, 'Level2': 1, 'Level3': 0, 'Level4': 1}}
{20200516: {'Level1': 0, 'Level2': 1, 'Level3': 0, 'Level4': 0}, 20200517: {'Level1': 0,
'Level2': 1, 'Level3': 0, 'Level4': 1}, 20200518: {'Level1': 1, 'Level2': 1, 'Level3': 0, '
Level4': 1}, 20200519: {'Level1': 1, 'Level2': 2, 'Level3': 0, 'Level4': 2}
'''
#STILL IN PROGRESS
c={}
for key in d:
if key == 20200516:
c[20200516]=d[20200516]
else:
c[key]=d[key-1]+d[key]
return c
You made a good effort, but you can't just add dicts like that. Here's a minimal change to get from your input to desired output, by using dict comprehension to add the value for each entry in the daily record:
from pprint import pprint
def summing_oneday(d1, d2):
return {key: d1[key] + d2[key] for key in d2}
def summing(data):
result = {}
for day in sorted(data.keys()):
if not result:
result[day] = data[day]
else:
result[day] = summing_oneday(previous, data[day])
previous = result[day]
return result
data = {20200516: {'Level1': 0, 'Level2': 1, 'Level3': 0, 'Level4': 0}, 20200517: {'Level1': 0, 'Level2': 0, 'Level3': 0, 'Level4': 1}, 20200518: {'Level1': 1, 'Level2': 0, 'Level3': 0, 'Level4': 0}, 20200519: {'Level1': 0, 'Level2': 1, 'Level3': 0, 'Level4': 1}}
pprint(summing(data))
I'm assuming all the keys are present on all the daily records. Otherwise we'll have to deal with that.

Python read particular data from response JSON

I am new to Python and JSON. I am calling an API and as response body I am getting below :
{'product': 'Cycle', 'available': 20, 'blocked': 0, 'orderBooked': 0, 'transfer': 0, 'restock': 0, 'unavailable': 0, 'total': 0, 'lCode': '2000112', 'locationId': '745', 'locationCode': '425', 'stockType': 'IN STOCK', 'adminStock': {'rp': 0, 'management': 0, 'rc': 0, 'total': 0, 'default': 0}, 'isBlocked': False, 'plannedDate': None, 'plannedUpdate': True, 'bookedQuantity': 0}
{'product': 'Cooker', 'available': 958, 'blocked': 10, 'orderBooked': 10, 'transfer': 30, 'restock': 0, 'unavailable': 0, 'total': 0, 'lCode': '589620', 'locationId': '420', 'locationCode': '695', 'stockType': 'PRE ORDER', 'adminStock': {'rp': 0, 'management': 0, 'rc': 0, 'total': 0, 'default': 0}, 'isBlocked': False, 'plannedDate': None, 'plannedUpdate': True, 'bookedQuantity': 0}
{'product': 'Cycle', 'available': 96220, 'blocked': 0, 'orderBooked': 0, 'transfer': 0, 'restock': 0, 'unavailable': 0, 'total': 0, 'lCode': '2000112', 'locationId': '745', 'locationCode': '425', 'stockType': 'CONFIRMED', 'adminStock': {'rp': 0, 'management': 0, 'rc': 0, 'total': 0, 'default': 0}, 'isBlocked': False, 'plannedDate': None, 'plannedUpdate': True, 'bookedQuantity': 0}
{'product': 'Lapms', 'available': 89958, 'blocked': 1890, 'orderBooked': 1045, 'transfer': 230, 'restock': 0, 'unavailable': 0, 'total': 0, 'lCode': '78963', 'locationId': '896', 'locationCode': '463', 'stockType': 'TRANSIT', 'adminStock': {'rp': 0, 'management': 0, 'rc': 0, 'total': 0, 'default': 0}, 'isBlocked': False, 'plannedDate': None, 'plannedUpdate': True, 'bookedQuantity': 0}
The data I mentioned above will vary as per the API request. So Whatever be the response. Based on the Products, I need to Print Multi line data. My request is to read this Json and get the following Data :
Name:<'product'>, Code:<'lCode'>, Location:<'locationCode'>, Stock Type:<'stockType'>, Availability:<'available'>
So For the Above Json, the output should be like :
Name:Cycle, Code:2000112, Location:425, Stock Type:PRE ORDER, Availability:20
Name:Cooker, Code:589620, Location:695, Stock Type:<'stockType'>, Availability:958
Name:Cycle, Code:2000112, Location:425, Stock Type:CONFIRMED, Availability:96220
Name:Lapms, Code:78963, Location:463, Stock Type:TRANSIT, Availability:89958
So Based on the Times,
product is occuring, the data output will be having that much lines
I dont have any idea on parsing Json in Python. Please help in understanding how I can get the data in below format. I havent tried anything as I am stuck
This is what I believe you want. As some comments say, indeed these outputs should be treated as dictionaries or lists, with dictionaries and/or lists nested within them. It's important to know the difference since the first should be addressed by its key whereas the latter by its index. You can find some extra information regarding how to read jsons/dictionaries here
import pandas as pd
json_1 = {'product': 'Cycle', 'available': 20, 'blocked': 0, 'orderBooked': 0, 'transfer': 0, 'restock': 0, 'unavailable': 0, 'total': 0, 'lCode': '2000112', 'locationId': '745', 'locationCode': '425', 'stockType': 'IN STOCK', 'adminStock': {'rp': 0, 'management': 0, 'rc': 0, 'total': 0, 'default': 0}, 'isBlocked': False, 'plannedDate': None, 'plannedUpdate': True, 'bookedQuantity': 0}
json_2 = {'product': 'Cooker', 'available': 958, 'blocked': 10, 'orderBooked': 10, 'transfer': 30, 'restock': 0, 'unavailable': 0, 'total': 0, 'lCode': '589620', 'locationId': '420', 'locationCode': '695', 'stockType': 'PRE ORDER', 'adminStock': {'rp': 0, 'management': 0, 'rc': 0, 'total': 0, 'default': 0}, 'isBlocked': False, 'plannedDate': None, 'plannedUpdate': True, 'bookedQuantity': 0}
json_3 = {'product': 'Cycle', 'available': 96220, 'blocked': 0, 'orderBooked': 0, 'transfer': 0, 'restock': 0, 'unavailable': 0, 'total': 0, 'lCode': '2000112', 'locationId': '745', 'locationCode': '425', 'stockType': 'CONFIRMED', 'adminStock': {'rp': 0, 'management': 0, 'rc': 0, 'total': 0, 'default': 0}, 'isBlocked': False, 'plannedDate': None, 'plannedUpdate': True, 'bookedQuantity': 0}
json_4 = {'product': 'Lapms', 'available': 89958, 'blocked': 1890, 'orderBooked': 1045, 'transfer': 230, 'restock': 0, 'unavailable': 0, 'total': 0, 'lCode': '78963', 'locationId': '896', 'locationCode': '463', 'stockType': 'TRANSIT', 'adminStock': {'rp': 0, 'management': 0, 'rc': 0, 'total': 0, 'default': 0}, 'isBlocked': False, 'plannedDate': None, 'plannedUpdate': True, 'bookedQuantity': 0}
support_list = []
support_list.append([json_1,json_2,json_3,json_4])
support_dict = {'Name':[],'Code':[],'Location':[],'Stock type':[],'Availability':[]}
for i in range(len(support_list[0])):
support_dict['Name'].append(support_list[0][i]['product'])
support_dict['Code'].append(support_list[0][i]['lCode'])
support_dict['Location'].append(support_list[0][i]['locationCode'])
support_dict['Stock type'].append(support_list[0][i]['stockType'])
support_dict['Availability'].append(support_list[0][i]['available'])
df = pd.DataFrame(support_dict)
print(df)
Output:
Name Code Location Stock type Availability
0 Cycle 2000112 425 IN STOCK 20
1 Cooker 589620 695 PRE ORDER 958
2 Cycle 2000112 425 CONFIRMED 96220
3 Lapms 78963 463 TRANSIT 89958
EDIT: OPs says it's only list with multiple jsons in it.
It applies the same logic:
import pandas as pd
json_output= [{'product': 'Cycle', 'available': 20, 'blocked': 0, 'orderBooked': 0, 'transfer': 0, 'restock': 0, 'unavailable': 0, 'total': 0, 'lCode': '2000112', 'locationId': '745', 'locationCode': '425', 'stockType': 'IN STOCK', 'adminStock': {'rp': 0, 'management': 0, 'rc': 0, 'total': 0, 'default': 0}, 'isBlocked': False, 'plannedDate': None, 'plannedUpdate': True, 'bookedQuantity': 0},{'product': 'Cooker', 'available': 958, 'blocked': 10, 'orderBooked': 10, 'transfer': 30, 'restock': 0, 'unavailable': 0, 'total': 0, 'lCode': '589620', 'locationId': '420', 'locationCode': '695', 'stockType': 'PRE ORDER', 'adminStock': {'rp': 0, 'management': 0, 'rc': 0, 'total': 0, 'default': 0}, 'isBlocked': False, 'plannedDate': None, 'plannedUpdate': True, 'bookedQuantity': 0},{'product': 'Cycle', 'available': 96220, 'blocked': 0, 'orderBooked': 0, 'transfer': 0, 'restock': 0, 'unavailable': 0, 'total': 0, 'lCode': '2000112', 'locationId': '745', 'locationCode': '425', 'stockType': 'CONFIRMED', 'adminStock': {'rp': 0, 'management': 0, 'rc': 0, 'total': 0, 'default': 0}, 'isBlocked': False, 'plannedDate': None, 'plannedUpdate': True, 'bookedQuantity': 0},{'product': 'Lapms', 'available': 89958, 'blocked': 1890, 'orderBooked': 1045, 'transfer': 230, 'restock': 0, 'unavailable': 0, 'total': 0, 'lCode': '78963', 'locationId': '896', 'locationCode': '463', 'stockType': 'TRANSIT', 'adminStock': {'rp': 0, 'management': 0, 'rc': 0, 'total': 0, 'default': 0}, 'isBlocked': False, 'plannedDate': None, 'plannedUpdate': True, 'bookedQuantity': 0}]
support_dict = {'Name':[],'Code':[],'Location':[],'Stock type':[],'Availability':[]}
for i in range(len(json_output)):
support_dict['Name'].append(json_output[i]['product'])
support_dict['Code'].append(json_output[i]['lCode'])
support_dict['Location'].append(json_output[i]['locationCode'])
support_dict['Stock type'].append(json_output[i]['stockType'])
support_dict['Availability'].append(json_output[i]['available'])
df = pd.DataFrame(support_dict)
print(df)
Output:
Name Code Location Stock type Availability
0 Cycle 2000112 425 IN STOCK 20
1 Cooker 589620 695 PRE ORDER 958
2 Cycle 2000112 425 CONFIRMED 96220
3 Lapms 78963 463 TRANSIT 89958
EDIT 2: If you want the output as lines:
json_output= [{'product': 'Cycle', 'available': 20, 'blocked': 0, 'orderBooked': 0, 'transfer': 0, 'restock': 0, 'unavailable': 0, 'total': 0, 'lCode': '2000112', 'locationId': '745', 'locationCode': '425', 'stockType': 'IN STOCK', 'adminStock': {'rp': 0, 'management': 0, 'rc': 0, 'total': 0, 'default': 0}, 'isBlocked': False, 'plannedDate': None, 'plannedUpdate': True, 'bookedQuantity': 0},{'product': 'Cooker', 'available': 958, 'blocked': 10, 'orderBooked': 10, 'transfer': 30, 'restock': 0, 'unavailable': 0, 'total': 0, 'lCode': '589620', 'locationId': '420', 'locationCode': '695', 'stockType': 'PRE ORDER', 'adminStock': {'rp': 0, 'management': 0, 'rc': 0, 'total': 0, 'default': 0}, 'isBlocked': False, 'plannedDate': None, 'plannedUpdate': True, 'bookedQuantity': 0},{'product': 'Cycle', 'available': 96220, 'blocked': 0, 'orderBooked': 0, 'transfer': 0, 'restock': 0, 'unavailable': 0, 'total': 0, 'lCode': '2000112', 'locationId': '745', 'locationCode': '425', 'stockType': 'CONFIRMED', 'adminStock': {'rp': 0, 'management': 0, 'rc': 0, 'total': 0, 'default': 0}, 'isBlocked': False, 'plannedDate': None, 'plannedUpdate': True, 'bookedQuantity': 0},{'product': 'Lapms', 'available': 89958, 'blocked': 1890, 'orderBooked': 1045, 'transfer': 230, 'restock': 0, 'unavailable': 0, 'total': 0, 'lCode': '78963', 'locationId': '896', 'locationCode': '463', 'stockType': 'TRANSIT', 'adminStock': {'rp': 0, 'management': 0, 'rc': 0, 'total': 0, 'default': 0}, 'isBlocked': False, 'plannedDate': None, 'plannedUpdate': True, 'bookedQuantity': 0}]
for i in range(len(json_output)):
print('Name: ' + str(json_output[i]['product']) + ', Code: ' + str(json_output[i]['lCode']) + ', Location: ' + str(json_output[i]['locationCode']) + ', Stock type: ' + str(json_output[i]['stockType']) + ', Availability: ' + str(json_output[i]['available']))
Output:
Name: Cycle, Code: 2000112, Location: 425, Stock type: IN STOCK, Availability: 20
Name: Cooker, Code: 589620, Location: 695, Stock type: PRE ORDER, Availability: 958
Name: Cycle, Code: 2000112, Location: 425, Stock type: CONFIRMED, Availability: 96220
Name: Lapms, Code: 78963, Location: 463, Stock type: TRANSIT, Availability: 89958
If you parse json file you will get standard python dictionary.
import json
json_data = '{"a": 1, "b": 2, "c": 3, "d": 4, "e": 5}'
parsed_json = (json.loads(json_data))

Cleanest way to sum list of nested dicts

Is there a cleaner/more pythonic way of summing the contents of a list of nested dicts? Here's what I'm doing, but I suspect that there may be a better way:
list_of_nested_dicts = [{'class1': {'TP': 1, 'FP': 0, 'FN': 2}, 'class2': {'TP': 0, 'FP': 0, 'FN': 0}, 'class3': {'TP': 0, 'FP': 0, 'FN': 0}, 'class4': {'TP': 1, 'FP': 0, 'FN': 2}},
{'class1': {'TP': 1, 'FP': 0, 'FN': 2}, 'class2': {'TP': 0, 'FP': 0, 'FN': 0}, 'class3': {'TP': 0, 'FP': 0, 'FN': 0}, 'class4': {'TP': 1, 'FP': 0, 'FN': 2}},
{'class1': {'TP': 1, 'FP': 0, 'FN': 2}, 'class2': {'TP': 0, 'FP': 0, 'FN': 0}, 'class3': {'TP': 0, 'FP': 0, 'FN': 0}, 'class4': {'TP': 1, 'FP': 0, 'FN': 2}},
{'class1': {'TP': 1, 'FP': 0, 'FN': 2}, 'class2': {'TP': 0, 'FP': 0, 'FN': 0}, 'class3': {'TP': 0, 'FP': 0, 'FN': 0}, 'class4': {'TP': 1, 'FP': 0, 'FN': 2}}]
total_counts = {k:{'TP': 0, 'FP': 0, 'FN': 0} for k in list_of_nested_dicts[0].keys()}
for d in list_of_nested_dicts:
for label,counts_dict in d.items():
for k,v in counts_dict.items():
total_counts[label][k] += v
print(total_counts)
(Assuming all keys are exactly the same, but values could be any integer)
You can have a slightly tighter code using collections (similar result to #blhsing)
import collections
counts = collections.defaultdict(collections.Counter)
for d in list_of_nested_dicts:
for k, v in d.items():
counts[k].update(v)
This will give you a defaultdict of counters instead of only dicts, but they behave similarly. You can also explicitly cast them to dicts at the end if you want.
{'class1': {'FN': 8, 'FP': 0, 'TP': 4},
'class2': {'FN': 0, 'FP': 0, 'TP': 0},
'class3': {'FN': 0, 'FP': 0, 'TP': 0},
'class4': {'FN': 8, 'FP': 0, 'TP': 4}}
vs
defaultdict(<class 'collections.Counter'>,
{'class1': Counter({'FN': 8, 'TP': 4, 'FP': 0}),
'class2': Counter({'TP': 0, 'FP': 0, 'FN': 0}),
'class3': Counter({'TP': 0, 'FP': 0, 'FN': 0}),
'class4': Counter({'FN': 8, 'TP': 4, 'FP': 0})})
One thing in your code that stands out as "unclean" is the fact that you are hard-coding the keys of the sub-dicts in the initialization of total_counts. You can avoid such hard-coding by using the dict.setdefault and dict.get methods as you iterate over the items of the sub-dicts instead:
total_counts = {}
for d in list_of_nested_dicts:
for label, counts_dict in d.items():
for k, v in counts_dict.items():
total_counts[label][k] = total_counts.setdefault(label, {}).get(k, 0) + v

Rows not appending to dataframe while on loop

I was working through a database and creating a dataframe of selected information. The database can be found at www.cricsheet.org.
The code for the same is:
bat = {'Name' : [], 'Runs' : [], 'Balls' : [], 'StrikeR' : []}
batsman = pd.DataFrame(bat)
batsman.head()
index = ['Name','Runs','Balls','StrikeR']
data = []
count = 0
for i in items[0]["1st innings"]["deliveries"]:
name = list(i.values())[0]["batsman"]
run = list(i.values())[0]["runs"]["batsman"]
if name in list(batsman['Name']):
batsman.loc[batsman.Name == name].Runs += run
batsman.loc[batsman.Name == name].Balls += 1
batsman.loc[batsman.Name == name].StrikeR = batsman.loc[batsman.Name == name].Runs/batsman.loc[batsman.Name == name].Balls
else:
data = [name,run,1,run]
print(b)
batsman.append(pd.Series(data, index = index), ignore_index=True)
To give a context the array data is of type:
['GC Smith', 0, 1, 0]
['HH Dippenaar', 0, 1, 0]
['HH Dippenaar', 0, 1, 0]
['HH Dippenaar', 2, 1, 2]
['HH Dippenaar', 0, 1, 0]
I was hoping to update this data in a pandas dataframe, However the data is not appending to the dataframe. Can anyone tell me why and what is the solution to it?
Edit: I am adding a part of items[0] dataset.
{'1st innings': {'team': 'South Africa', 'deliveries': [{0.1: {'batsman': 'GC Smith', 'bowler': 'WPUJC Vaas', 'non_striker': 'HH Dippenaar', 'runs': {'batsman': 0, 'extras': 0, 'total': 0}}}, {0.2: {'batsman': 'GC Smith', 'bowler': 'WPUJC Vaas', 'non_striker': 'HH Dippenaar', 'runs': {'batsman': 0, 'extras': 0, 'total': 0}}}, {0.3: {'batsman': 'GC Smith', 'bowler': 'WPUJC Vaas', 'non_striker': 'HH Dippenaar', 'runs': {'batsman': 0, 'extras': 0, 'total': 0}}}, {0.4: {'batsman': 'GC Smith', 'bowler': 'WPUJC Vaas', 'non_striker': 'HH Dippenaar', 'runs': {'batsman': 0, 'extras': 0, 'total': 0}}}, {0.5: {'batsman': 'GC Smith', 'bowler': 'WPUJC Vaas', 'non_striker': 'HH Dippenaar', 'runs': {'batsman': 0, 'extras': 0, 'total': 0}}}, {0.6: {'batsman': 'GC Smith', 'bowler': 'WPUJC Vaas', 'non_striker': 'HH Dippenaar', 'runs': {'batsman': 0, 'extras': 0, 'total': 0}}}
Hei,
Appending to a dataframe doesn't happen in place. The append function will only return the new dataframe which contains the appended value, and will not modify the original dataframe
So,
batsman.append(pd.Series(data, index = index), ignore_index=True)
Should be
batsman = batsman.append(pd.Series(data, index = index), ignore_index=True)

Categories

Resources