Merge data from list of dict - python

I think what I want to do is easy but I don't know the correct way to do this.
I have a list as:
[
{
"id": 16,
"condition": true,
"tags": 6,
},
{
"id": 16,
"condition": true,
"tags": 1,
},
{
"id": 16,
"condition": true,
"tags": 4,
},
{
"id": 3,
"condition": false,
"tags": 3,
}
]
And I want to group the element of by list by id and condition, the output would be:
[
{
"id": 16,
"condition": true,
"tags": [6, 1, 4],
},
{
"id": 16,
"condition": false,
"tags": [3],
}
]
I can do this by looping on my list and creating another array but I was wondering about a better way to do this.
for now my code is like this:
def format(self):
list_assigned = the_list_I_want_to_modify
res = []
for x in list_assigned:
exists = [v for v in res if
v['id'] == x['id'] and v['condition'] == x['condition']]
if not exists:
x['tags'] = [x['tags']]
res.append(x)
else:
exists[0]['tags'].append(x['tags'])
return res
Thanks

There might be a prettier solution, but you could solve it by first creating a temporary dictionary with keys being tuples containing all the keys from your original list that are required to group by, and appending the tags in a list - I use the .setdefault(key, type) dictionary function to make a dictionary with default list elements.
Then you can unpack that dictionary into a list again afterwards with a list comprehension.
a = [
{
"id": 16,
"condition": True,
"tags": 6,
},
{
"id": 16,
"condition": True,
"tags": 1,
},
{
"id": 16,
"condition": True,
"tags": 4,
},
{
"verified_by": 3,
"condition": False,
"tags": 3,
}
]
tmp = {}
for elem in a:
groupby_keys = tuple(sorted((k, v) for k, v in elem.items() if k != 'tags'))
tmp.setdefault(groupby_keys, []).append(elem['tags'])
out = [{a[0]: a[1] for a in list(k) + [('tags', v)]} for k, v in tmp.items()]
print(out)
Output:
>>> out
[{'id': 16, 'condition': True, 'tags': [6, 1, 4]}, {'verified_by': 3, 'condition': False, 'tags': [3]}]

Related

Split Python Array based on a property

I have a Python List like this:
myList = [
{
"key": 1,
"date": "2020-01-02"
},
{
"key": 2,
"date": "2020-02-02"
},
{
"key": 3,
"date": "2020-01-03"
},
{
"key": 4,
"date": "2020-01-02"
},
{
"key": 5,
"date": "2020-02-02"
},
]
Now I want to split the array based on the property "date". I want my list to look like this
myList = [
[
{
"key": 1,
"date": "2020-01-02"
},
{
"key": 4,
"date": "2020-01-02"
},
],
[
{
"key": 2,
"date": "2020-02-02"
},
{
"key": 5,
"date": "2020-02-02"
},
],
[
{
"key": 3,
"date": "2020-01-03"
},
]
]
So I want a new array for each specific date in the current list. Can someone help me to achieve that?
d={}
for i in range(len(myList)):
d.setdefault(myList[i]['date'], []).append(i)
myList = [ [myList[i] for i in v] for k,v in d.items() ] # replace the original `myList` following PO behavior.
Logic:
You want to group the data based on 'date' that means you need a dictionary data structure. The rest are just implementation details.

is there a way to check nested dictionary values and if they have 0 or null or empty string then delete them in python

this is json file that i want to convert to python
{
"UniqueId": "PO3589472",
"FareType": 2,
"BookedBy": "Api ",
"OrderBy": "Api ",
"ClientBalance": 0,
"Error": null,
"Success": true,
"TktTimeLimit": "2022-08-10T14:11:45",
"Category": 21,
"Status": 21,
"RefundMethod": 1,
"TravelItinerary": {
"ItineraryInfo": {
"ItineraryPricing": {
"BaseFare": 8469250,
"ServiceTax": 0,
"TotalTax": 993000,
"TotalFare": 9462250,
"TotalCommission": 0,
"Currency": "IRR"
},
"CustomerInfoes": [
{
"Customer": {
"Gender": 0,
"PassengerType": 1,
"PassportNumber": "",
"NationalId": "1829961233",
"Nationality": "IR",
"DateOfBirth": "1996-07-08T00:00:00",
"PassportExpireDate": "0001-01-01T00:00:00",
"PassportIssueCountry": "IR",
"PassportIssueDate": "2022-08-10T00:00:00",
"PaxName": {
"PassengerFirstName": "MAJID",
"PassengerMiddleName": null,
"PassengerLastName": "MAJIDIFAR",
"PassengerTitle": 0
}
},
"ETickets": "8151405444745",
"ETicketNumbers": [
{
"ETicketNumber": "8151405444745",
"EticketStatus": 1,
"IsRefunded": false,
"DateOfIssue": "2022-08-10T13:58:47",
"AirlinePnr": "TXNXM",
"TotalRefund": 0
}
]
}
],
"ReservationItems": [
{
"AirEquipmentType": "737",
"AirlinePnr": "TXNXM",
"ArrivalAirportLocationCode": "ABD",
"ArrivalDateTime": "2022-08-17T23:25:00",
"ArrivalTerminal": "",
"Baggage": "20KG",
"DepartureAirportLocationCode": "THR",
"DepartureDateTime": "2022-08-17T22:05:00",
"DepartureTerminal": "Terminal 4",
"FlightNumber": "3750",
"JourneyDuration": "01:20",
"JourneyDurationPerMinute": 0,
"MarketingAirlineCode": "EP",
"OperatingAirlineCode": "EP",
"ResBookDesigCode": "Y",
"StopQuantity": 0,
"IsCharter": false,
"TechnicalStops": [],
"IsReturn": false,
"CabinClassCode": 1
}
],
"TripDetailPtcFareBreakdowns": [
{
"PassengerTypeQuantity": {
"PassengerType": 1,
"Quantity": 1
},
"TripDetailPassengerFare": {
"BaseFare": 8469250,
"ServiceTax": 0,
"Tax": 993000,
"TotalFare": 9462250,
"Commission": 0,
"Currency": "IRR"
}
}
],
"PhoneNumber": "09359276735",
"Email": "info#iran-tech.com",
"ItineraryFareFamily": null
},
"BookingNotes": [],
"Services": []
},
"ValidatingAirlineCode": "EP",
"DirectionInd": 1,
"OnlineCheckIn": false,
"AirRemark": [],
"curl_error": false
}
As mentioned already, this begs for recursion. Here is an example:
import json
from collections.abc import Callable, Hashable
from typing import Any
def filter_dict(
dictionary: dict[Hashable, Any],
exclude_func: Callable[[Any], bool],
) -> None:
discard = set()
for key, value in dictionary.items():
if isinstance(value, dict):
filter_dict(value, exclude_func)
elif exclude_func(value):
discard.add(key)
for key in discard:
del dictionary[key]
def is_nothing(value: Any) -> bool:
return value is None or value == 0 or value == ""
def main() -> None:
j = "{}" # Your JSON string here
d = json.loads(j)
filter_dict(d, is_nothing)
print(json.dumps(d, indent=4))
if __name__ == '__main__':
main()
It does not handle JSON objects nested inside arrays (i.e. dictionaries nested inside lists), but I think you can build on that yourself.
If data contains your parsed Json string from the question you can do:
def change(o):
if isinstance(o, dict):
for k, v in tuple(o.items()):
if v is None or v == 0 or v == "":
del o[k]
else:
change(v)
elif isinstance(o, list):
for v in o:
change(v)
change(data)
print(data)
Prints:
{
"UniqueId": "PO3589472",
"FareType": 2,
"BookedBy": "Api ",
"OrderBy": "Api ",
"Success": True,
"TktTimeLimit": "2022-08-10T14:11:45",
"Category": 21,
"Status": 21,
"RefundMethod": 1,
"TravelItinerary": {
"ItineraryInfo": {
"ItineraryPricing": {
"BaseFare": 8469250,
"TotalTax": 993000,
"TotalFare": 9462250,
"Currency": "IRR",
},
"CustomerInfoes": [
{
"Customer": {
"PassengerType": 1,
"NationalId": "1829961233",
"Nationality": "IR",
"DateOfBirth": "1996-07-08T00:00:00",
"PassportExpireDate": "0001-01-01T00:00:00",
"PassportIssueCountry": "IR",
"PassportIssueDate": "2022-08-10T00:00:00",
"PaxName": {
"PassengerFirstName": "MAJID",
"PassengerLastName": "MAJIDIFAR",
},
},
"ETickets": "8151405444745",
"ETicketNumbers": [
{
"ETicketNumber": "8151405444745",
"EticketStatus": 1,
"DateOfIssue": "2022-08-10T13:58:47",
"AirlinePnr": "TXNXM",
}
],
}
],
"ReservationItems": [
{
"AirEquipmentType": "737",
"AirlinePnr": "TXNXM",
"ArrivalAirportLocationCode": "ABD",
"ArrivalDateTime": "2022-08-17T23:25:00",
"Baggage": "20KG",
"DepartureAirportLocationCode": "THR",
"DepartureDateTime": "2022-08-17T22:05:00",
"DepartureTerminal": "Terminal 4",
"FlightNumber": "3750",
"JourneyDuration": "01:20",
"MarketingAirlineCode": "EP",
"OperatingAirlineCode": "EP",
"ResBookDesigCode": "Y",
"TechnicalStops": [],
"CabinClassCode": 1,
}
],
"TripDetailPtcFareBreakdowns": [
{
"PassengerTypeQuantity": {
"PassengerType": 1,
"Quantity": 1,
},
"TripDetailPassengerFare": {
"BaseFare": 8469250,
"Tax": 993000,
"TotalFare": 9462250,
"Currency": "IRR",
},
}
],
"PhoneNumber": "09359276735",
"Email": "info#iran-tech.com",
},
"BookingNotes": [],
"Services": [],
},
"ValidatingAirlineCode": "EP",
"DirectionInd": 1,
"AirRemark": [],
}
This function will recursively iterate over dictionary and remove keys with empty values. However, there may be some issues with nested lists, I will try to finish it later.
def remove_keys_in_one_level(d):
if isinstance(d, dict):
for key, value in list(d.items()):
if not value: # configure this condition if you don't want to remove empty lists, dictionaries...
d.pop(key)
else:
remove_keys_in_one_level(value)
elif isinstance(d, list):
for lst_indx, lst_item in enumerate(d):
remove_keys_in_one_level(lst_item)
return d
remove_keys_in_one_level(jsn)

how to get a nested data from Concatenate values with same keys in a list of dictionaries?

I have all_writing_test_name_data
all_writing_test_name_data=
[
{
"id": 1,
"book_name": Math,
"writing_test_description": "string",
"subject_id": 1,
"book_id": 2,
"writing_test": "string"
},
{
"id": 2,
"book_name": Math-1,
"writing_test_description": "string-1",
"subject_id": 1,
"book_id": 2,
"writing_test": "string-1"
}
]
and I want to Concatenate all_writing_test_name_data like this
[
{
"subject_id": 1,
"writing_items": [
{
"id": 1,
"book_name": Math,
"writing_test_description": "string",
"book_id": 2,
"writing_test": "string"
},
{
"id": 2,
"book_name": Math-1,
"writing_test_description": "string-1",
"book_id": 2,
"writing_test": "string-1"
}
]
}
]
i have tried this but i think there is some lacking in the code for this i can't get the desire data
x=all_writing_test_name_data
# printing original list
print("The original list is : " + str(x))
import operator
from functools import reduce
all_keys = reduce(operator.or_, (d.keys() for d in x))
bar = {key: [d.get(key) for d in x] for key in all_keys}
print('bar',bar['writing_test']+bar['writing_test_description'])
from collections import Counter
result = Counter()
for d in x:
result[d['writing_test']] = d['writing_test']
result[d['writing_test_description']] = d['writing_test_description']
print(result)
z=bar['writing_test']+bar['writing_test_description']
print bar
but I can't get my desire data. how can i get the exact same data,what is the mistake
You can group the input data by the subject_id property of each dict, then re-format that data into the value you want:
from collections import defaultdict
groups = defaultdict(list)
for test in all_writing_test_name_data:
subject_id = test.pop('subject_id')
groups[subject_id].append(test)
result = [ { 'subject_id' : k, 'writing_items' : v } for k, v in groups.items() ]
Output:
[
{
"subject_id": 1,
"writing_items": [
{
"id": 1,
"book_name": "Math",
"writing_test_description": "string",
"book_id": 2,
"writing_test": "string"
},
{
"id": 2,
"book_name": "Math-1",
"writing_test_description": "string-1",
"book_id": 2,
"writing_test": "string-1"
}
]
}
]
Note that this will alter the value of all_writing_test_name_data (due to the test.pop()). If this is not desired, add
test = test.copy()
prior to the pop.

How to extract JSON from a nested JSON file?

I am calling an API and getting a response like the below.
{
"status": 200,
"errmsg": "OK",
"data": {
"total": 12,
"items": [{
"id": 11,
"name": "BBC",
"priority": 4,
"levelStr": "All",
"escalatingChainId": 3,
"escalatingChain": {
"inAlerting": false,
"throttlingAlerts": 20,
"enableThrottling": true,
"name": "Example123",
"destination": [],
"description": "",
"ccdestination": [],
"id": 3,
"throttlingPeriod": 10
}
},
{
"id": 21,
"name": "CNBC",
"priority": 4,
"levelStr": "All",
"escalatingChainId": 3,
"escalatingChain": {
"inAlerting": false,
"throttlingAlerts": 20,
"enableThrottling": true,
"name": "Example456",
"destination": [],
"description": "",
"ccdestination": [],
"id": 3,
"throttlingPeriod": 10
}
}
]
}
}
I need to clean-up this JSON a bit and produce a simple JSON like below where escalatingChainName is the name in the escalatingChain list so that I can write this into a CSV file.
{
"items": [{
"id": 11,
"name": "BBC",
"priority": 4,
"levelStr": "All",
"escalatingChainId": 3,
"escalatingChainName": "Example123"
},
{
"id": 21,
"name": "CNBC",
"priority": 4,
"levelStr": "All",
"escalatingChainId": 3,
"escalatingChainName": "Example456"
}
]
}
Is there a JSON function that I can use to copy only the necessary key-value or nested key-values to a new JSON object?
With the below code, I am able to get the details list.
json_response = response.json()
items = json_response['data']
details = items['items']
I can print individual list items using
for x in details:
print(x)
How do I take it from here to pull only the necessary fields like id, name, priority and the name from escalatingchain to create a new list or JSON?
There is no existing function that will do what you want, so you'll need to write one. Fortunately that's not too hard in this case — basically you just create a list of new items by extracting the pieces of data you want from the existing ones.
import json
json_response = """\
{
"status": 200,
"errmsg": "OK",
"data": {
"total": 12,
"items": [{
"id": 11,
"name": "BBC",
"priority": 4,
"levelStr": "All",
"escalatingChainId": 3,
"escalatingChain": {
"inAlerting": false,
"throttlingAlerts": 20,
"enableThrottling": true,
"name": "Example123",
"destination": [],
"description": "",
"ccdestination": [],
"id": 3,
"throttlingPeriod": 10
}
},
{
"id": 21,
"name": "CNBC",
"priority": 4,
"levelStr": "All",
"escalatingChainId": 3,
"escalatingChain": {
"inAlerting": false,
"throttlingAlerts": 20,
"enableThrottling": true,
"name": "Example456",
"destination": [],
"description": "",
"ccdestination": [],
"id": 3,
"throttlingPeriod": 10
}
}
]
}
}
"""
response = json.loads(json_response)
cleaned = []
for item in response['data']['items']:
cleaned.append({'id': item['id'],
'name': item['name'],
'priority': item['priority'],
'levelStr': item['levelStr'],
'escalatingChainId': item['escalatingChainId'],
'escalatingChainName': item['escalatingChain']['name']})
print('cleaned:')
print(json.dumps(cleaned, indent=4))
You can try:
data = {
"status": 200,
"errmsg": "OK",
"data": {
"total": 12,
"items": [{
"id": 11,
"name": "BBC",
"priority": 4,
"levelStr": "All",
"escalatingChainId": 3,
"escalatingChain": {
"inAlerting": False,
"throttlingAlerts": 20,
"enableThrottling": True,
"name": "Example123",
"destination": [],
"description": "",
"ccdestination": [],
"id": 3,
"throttlingPeriod": 10
}
},
{
"id": 21,
"name": "CNBC",
"priority": 4,
"levelStr": "All",
"escalatingChainId": 3,
"escalatingChain": {
"inAlerting": False,
"throttlingAlerts": 20,
"enableThrottling": True,
"name": "Example456",
"destination": [],
"description": "",
"ccdestination": [],
"id": 3,
"throttlingPeriod": 10
}
}
]
}
}
for single_item in data["data"]["items"]:
print(single_item["id"])
print(single_item["name"])
print(single_item["priority"])
print(single_item["levelStr"])
print(single_item["escalatingChain"]["inAlerting"])
# and so on
Two ways of approaching this depending on whether your dealing with a variable or .json file using python list and dictionary comprehension:
Where data variable of type dictionary (nested) already defined:
# keys you want
to_keep = ['id', 'name', 'priority', 'levelStr', 'escalatingChainId',
'escalatingChainName']
new_data = [{k:v for k,v in low_dict.items() if k in to_keep}
for low_dict in data['data']['items']]
# where item is dictionary at lowest level
escalations = [{v+'Name':k[v]['name']} for k in data['data']['items']
for v in k if type(k[v])==dict]
# merge both lists of python dictionaries to produce flattened list of dictionaries
new_data = [{**new,**escl} for new,escl in zip(new_data,escalations)]
Or (and since your refer json package) if you have save the response to as a .json file:
import json
with open('response.json', 'r') as handl:
data = json.load(handl)
to_keep = ['id', 'name', 'priority', 'levelStr', 'escalatingChainId',
'escalatingChainName']
new_data = [{k:v for k,v in low_dict.items() if k in to_keep}
for low_dict in data['data']['items']]
escalations = [{v+'Name':k[v]['name']} for k in data['data']['items']
for v in k if type(k[v])==dict]
new_data = [{**new,**escl} for new,escl in zip(new_data,escalations)]
Both produce output:
[{'id': 11,
'name': 'BBC',
'priority': 4,
'levelStr': 'All',
'escalatingChainId': 3,
'escalatingChainName': 'Example123'},
{'id': 21,
'name': 'CNBC',
'priority': 4,
'levelStr': 'All',
'escalatingChainId': 3,
'escalatingChainName': 'Example456'}]

Extract data from deeply-nested JSON

I am new to JSON data and I am trying to get down to a very deeply nested dictionary that sits inside of 3 arrays and a dictionary of a JSON file. I am trying to get to the dictionary with the label "player". I am trying to extract the players for all teams to then move into a DataFrame so I can use for other analysis.
How do i write loop to get to this data?
This is what I'm trying to do but it doesn't work:
rostered = json.dumps(rostered_players)
player_value = [[
values['playerPoolEntry']['player']
]for values in rostered]
Here is a sample of data that I am working with:
rostered =
'''[
[
{
"appliedStatTotal": 260.05,
"entries": [
[
{
"injuryStatus": "NORMAL",
"lineupSlotId": 2,
"playerId": 2977644,
"playerPoolEntry": {
"appliedStatTotal": 37.6,
"id": 2977644,
"keeperValue": 56,
"keeperValueFuture": 56,
"lineupLocked": false,
"onTeamId": 1,
"player": {
"firstName": "Todd",
"fullName": "Todd Gurley II",
"id": 2977644,
"injuryStatus": "ACTIVE",
"lastName": "Gurley II"
},
"rosterLocked": false,
"tradeLocked": false
}
}
]
]
}
],
[
{
"appliedStatTotal": 260.05,
"entries": [
[
{
"injuryStatus": "NORMAL",
"lineupSlotId": 4,
"playerId": 3892889,
"playerPoolEntry": {
"appliedStatTotal": 6.6000000000000005,
"id": 3892889,
"keeperValue": 5,
"keeperValueFuture": 5,
"lineupLocked": false,
"onTeamId": 2,
"player": {
"defaultPositionId": 3,
"firstName": "Dede",
"fullName": "Dede Westbrook",
"id": 3892889,
"lastName": "Westbrook"
},
"rosterLocked": false,
"tradeLocked": false
}
}
]
]
}
]
]'''
One way to do this is to use recursion. This is a more functional approach.
from pprint import pprint
def iterate_deep(item):
if isinstance(item, dict):
for key, val in item.items():
if key == 'player':
pprint(val)
else:
iterate_deep(val)
elif isinstance(item, list):
for deeper_item in item:
iterate_deep(deeper_item)
iterate_deep(rostered_players)
A possible solution could be parsing the given JSON as it is.
rostered=json.loads(rostered)
player_values=[]
for level0 in rostered:
for level1 in level0:
if 'entries' in level1:
level2 = level1.get('entries')
for level3 in level2:
for level4 in level3:
if 'playerPoolEntry' in level4 and 'player' in level4.get('playerPoolEntry'):
player_values.append(level4.get('playerPoolEntry').get('player'))

Categories

Resources