Extract data from deeply-nested JSON - python

I am new to JSON data and I am trying to get down to a very deeply nested dictionary that sits inside of 3 arrays and a dictionary of a JSON file. I am trying to get to the dictionary with the label "player". I am trying to extract the players for all teams to then move into a DataFrame so I can use for other analysis.
How do i write loop to get to this data?
This is what I'm trying to do but it doesn't work:
rostered = json.dumps(rostered_players)
player_value = [[
values['playerPoolEntry']['player']
]for values in rostered]
Here is a sample of data that I am working with:
rostered =
'''[
[
{
"appliedStatTotal": 260.05,
"entries": [
[
{
"injuryStatus": "NORMAL",
"lineupSlotId": 2,
"playerId": 2977644,
"playerPoolEntry": {
"appliedStatTotal": 37.6,
"id": 2977644,
"keeperValue": 56,
"keeperValueFuture": 56,
"lineupLocked": false,
"onTeamId": 1,
"player": {
"firstName": "Todd",
"fullName": "Todd Gurley II",
"id": 2977644,
"injuryStatus": "ACTIVE",
"lastName": "Gurley II"
},
"rosterLocked": false,
"tradeLocked": false
}
}
]
]
}
],
[
{
"appliedStatTotal": 260.05,
"entries": [
[
{
"injuryStatus": "NORMAL",
"lineupSlotId": 4,
"playerId": 3892889,
"playerPoolEntry": {
"appliedStatTotal": 6.6000000000000005,
"id": 3892889,
"keeperValue": 5,
"keeperValueFuture": 5,
"lineupLocked": false,
"onTeamId": 2,
"player": {
"defaultPositionId": 3,
"firstName": "Dede",
"fullName": "Dede Westbrook",
"id": 3892889,
"lastName": "Westbrook"
},
"rosterLocked": false,
"tradeLocked": false
}
}
]
]
}
]
]'''

One way to do this is to use recursion. This is a more functional approach.
from pprint import pprint
def iterate_deep(item):
if isinstance(item, dict):
for key, val in item.items():
if key == 'player':
pprint(val)
else:
iterate_deep(val)
elif isinstance(item, list):
for deeper_item in item:
iterate_deep(deeper_item)
iterate_deep(rostered_players)

A possible solution could be parsing the given JSON as it is.
rostered=json.loads(rostered)
player_values=[]
for level0 in rostered:
for level1 in level0:
if 'entries' in level1:
level2 = level1.get('entries')
for level3 in level2:
for level4 in level3:
if 'playerPoolEntry' in level4 and 'player' in level4.get('playerPoolEntry'):
player_values.append(level4.get('playerPoolEntry').get('player'))

Related

Split Python Array based on a property

I have a Python List like this:
myList = [
{
"key": 1,
"date": "2020-01-02"
},
{
"key": 2,
"date": "2020-02-02"
},
{
"key": 3,
"date": "2020-01-03"
},
{
"key": 4,
"date": "2020-01-02"
},
{
"key": 5,
"date": "2020-02-02"
},
]
Now I want to split the array based on the property "date". I want my list to look like this
myList = [
[
{
"key": 1,
"date": "2020-01-02"
},
{
"key": 4,
"date": "2020-01-02"
},
],
[
{
"key": 2,
"date": "2020-02-02"
},
{
"key": 5,
"date": "2020-02-02"
},
],
[
{
"key": 3,
"date": "2020-01-03"
},
]
]
So I want a new array for each specific date in the current list. Can someone help me to achieve that?
d={}
for i in range(len(myList)):
d.setdefault(myList[i]['date'], []).append(i)
myList = [ [myList[i] for i in v] for k,v in d.items() ] # replace the original `myList` following PO behavior.
Logic:
You want to group the data based on 'date' that means you need a dictionary data structure. The rest are just implementation details.

is there a way to check nested dictionary values and if they have 0 or null or empty string then delete them in python

this is json file that i want to convert to python
{
"UniqueId": "PO3589472",
"FareType": 2,
"BookedBy": "Api ",
"OrderBy": "Api ",
"ClientBalance": 0,
"Error": null,
"Success": true,
"TktTimeLimit": "2022-08-10T14:11:45",
"Category": 21,
"Status": 21,
"RefundMethod": 1,
"TravelItinerary": {
"ItineraryInfo": {
"ItineraryPricing": {
"BaseFare": 8469250,
"ServiceTax": 0,
"TotalTax": 993000,
"TotalFare": 9462250,
"TotalCommission": 0,
"Currency": "IRR"
},
"CustomerInfoes": [
{
"Customer": {
"Gender": 0,
"PassengerType": 1,
"PassportNumber": "",
"NationalId": "1829961233",
"Nationality": "IR",
"DateOfBirth": "1996-07-08T00:00:00",
"PassportExpireDate": "0001-01-01T00:00:00",
"PassportIssueCountry": "IR",
"PassportIssueDate": "2022-08-10T00:00:00",
"PaxName": {
"PassengerFirstName": "MAJID",
"PassengerMiddleName": null,
"PassengerLastName": "MAJIDIFAR",
"PassengerTitle": 0
}
},
"ETickets": "8151405444745",
"ETicketNumbers": [
{
"ETicketNumber": "8151405444745",
"EticketStatus": 1,
"IsRefunded": false,
"DateOfIssue": "2022-08-10T13:58:47",
"AirlinePnr": "TXNXM",
"TotalRefund": 0
}
]
}
],
"ReservationItems": [
{
"AirEquipmentType": "737",
"AirlinePnr": "TXNXM",
"ArrivalAirportLocationCode": "ABD",
"ArrivalDateTime": "2022-08-17T23:25:00",
"ArrivalTerminal": "",
"Baggage": "20KG",
"DepartureAirportLocationCode": "THR",
"DepartureDateTime": "2022-08-17T22:05:00",
"DepartureTerminal": "Terminal 4",
"FlightNumber": "3750",
"JourneyDuration": "01:20",
"JourneyDurationPerMinute": 0,
"MarketingAirlineCode": "EP",
"OperatingAirlineCode": "EP",
"ResBookDesigCode": "Y",
"StopQuantity": 0,
"IsCharter": false,
"TechnicalStops": [],
"IsReturn": false,
"CabinClassCode": 1
}
],
"TripDetailPtcFareBreakdowns": [
{
"PassengerTypeQuantity": {
"PassengerType": 1,
"Quantity": 1
},
"TripDetailPassengerFare": {
"BaseFare": 8469250,
"ServiceTax": 0,
"Tax": 993000,
"TotalFare": 9462250,
"Commission": 0,
"Currency": "IRR"
}
}
],
"PhoneNumber": "09359276735",
"Email": "info#iran-tech.com",
"ItineraryFareFamily": null
},
"BookingNotes": [],
"Services": []
},
"ValidatingAirlineCode": "EP",
"DirectionInd": 1,
"OnlineCheckIn": false,
"AirRemark": [],
"curl_error": false
}
As mentioned already, this begs for recursion. Here is an example:
import json
from collections.abc import Callable, Hashable
from typing import Any
def filter_dict(
dictionary: dict[Hashable, Any],
exclude_func: Callable[[Any], bool],
) -> None:
discard = set()
for key, value in dictionary.items():
if isinstance(value, dict):
filter_dict(value, exclude_func)
elif exclude_func(value):
discard.add(key)
for key in discard:
del dictionary[key]
def is_nothing(value: Any) -> bool:
return value is None or value == 0 or value == ""
def main() -> None:
j = "{}" # Your JSON string here
d = json.loads(j)
filter_dict(d, is_nothing)
print(json.dumps(d, indent=4))
if __name__ == '__main__':
main()
It does not handle JSON objects nested inside arrays (i.e. dictionaries nested inside lists), but I think you can build on that yourself.
If data contains your parsed Json string from the question you can do:
def change(o):
if isinstance(o, dict):
for k, v in tuple(o.items()):
if v is None or v == 0 or v == "":
del o[k]
else:
change(v)
elif isinstance(o, list):
for v in o:
change(v)
change(data)
print(data)
Prints:
{
"UniqueId": "PO3589472",
"FareType": 2,
"BookedBy": "Api ",
"OrderBy": "Api ",
"Success": True,
"TktTimeLimit": "2022-08-10T14:11:45",
"Category": 21,
"Status": 21,
"RefundMethod": 1,
"TravelItinerary": {
"ItineraryInfo": {
"ItineraryPricing": {
"BaseFare": 8469250,
"TotalTax": 993000,
"TotalFare": 9462250,
"Currency": "IRR",
},
"CustomerInfoes": [
{
"Customer": {
"PassengerType": 1,
"NationalId": "1829961233",
"Nationality": "IR",
"DateOfBirth": "1996-07-08T00:00:00",
"PassportExpireDate": "0001-01-01T00:00:00",
"PassportIssueCountry": "IR",
"PassportIssueDate": "2022-08-10T00:00:00",
"PaxName": {
"PassengerFirstName": "MAJID",
"PassengerLastName": "MAJIDIFAR",
},
},
"ETickets": "8151405444745",
"ETicketNumbers": [
{
"ETicketNumber": "8151405444745",
"EticketStatus": 1,
"DateOfIssue": "2022-08-10T13:58:47",
"AirlinePnr": "TXNXM",
}
],
}
],
"ReservationItems": [
{
"AirEquipmentType": "737",
"AirlinePnr": "TXNXM",
"ArrivalAirportLocationCode": "ABD",
"ArrivalDateTime": "2022-08-17T23:25:00",
"Baggage": "20KG",
"DepartureAirportLocationCode": "THR",
"DepartureDateTime": "2022-08-17T22:05:00",
"DepartureTerminal": "Terminal 4",
"FlightNumber": "3750",
"JourneyDuration": "01:20",
"MarketingAirlineCode": "EP",
"OperatingAirlineCode": "EP",
"ResBookDesigCode": "Y",
"TechnicalStops": [],
"CabinClassCode": 1,
}
],
"TripDetailPtcFareBreakdowns": [
{
"PassengerTypeQuantity": {
"PassengerType": 1,
"Quantity": 1,
},
"TripDetailPassengerFare": {
"BaseFare": 8469250,
"Tax": 993000,
"TotalFare": 9462250,
"Currency": "IRR",
},
}
],
"PhoneNumber": "09359276735",
"Email": "info#iran-tech.com",
},
"BookingNotes": [],
"Services": [],
},
"ValidatingAirlineCode": "EP",
"DirectionInd": 1,
"AirRemark": [],
}
This function will recursively iterate over dictionary and remove keys with empty values. However, there may be some issues with nested lists, I will try to finish it later.
def remove_keys_in_one_level(d):
if isinstance(d, dict):
for key, value in list(d.items()):
if not value: # configure this condition if you don't want to remove empty lists, dictionaries...
d.pop(key)
else:
remove_keys_in_one_level(value)
elif isinstance(d, list):
for lst_indx, lst_item in enumerate(d):
remove_keys_in_one_level(lst_item)
return d
remove_keys_in_one_level(jsn)

Searching for a string in a json file and extracting its section

I was wondering how to perform the following:
1.search for strings in a json and extract their nested components.
given:
"type": "blah",
"animals": [
{
"type": "dog1",
"name": "oscar",
}
},
{
"type": "dog2",
"name": "John",
}
},
{
"type": "cat1",
"name": "Fred",
"Colors": [
"Red"
],
"Contact_info": [
{
"Owner": "Jill",
"Owner_number": "123"
}
],
},
{
"type": "cat3",
"name": "Freddy",
"Colors": [
"Blue"
],
"Contact_info": [
{
"Owner": "Ann",
"Owner_number": "1323"
}
],
From this json, I would like to extract all of the animals that are of type cat like cat1 and cat2, as well as all of the information within that block. Like if I search for cat it should return:
{
"type": "cat1",
"name": "Fred",
"Colors": [
"Red"
],
"Contact_info": [
{
"Owner": "Jill",
"Owner_number": "123"
}
],
},
{
"type": "cat3",
"name": "Freddy",
"Colors": [
"Blue"
],
"Contact_info": [
{
"Owner": "Ann",
"Owner_number": "1323"
}
],
Not necessarily that format, but just all of the information that has type cat. Im trying to search for objects in a json file and extract features from that search as well as anything nested inside of it.
Here is my code so far:
f = open('data.json')
# returns JSON object as
# a dictionary
data = json.load(f)
# Iterating through the json
# list
for i in data:
if i['type'] == 'cat':
print(i['name'])
print(i['colors'])
break
# Closing file
f.close()```
To begin with, I recommend using the with statement that creates a runtime context that allows you to run a group of statements under the control of a context manager.
It’s much more readable and allows you to skip closing the file since the context manager will do everything for you.
Moving to your problem
Suppose your file is called animals.json
# Import json library to work with json files
import json
# Use context manager
with open("animals.json", "rb") as f:
# Load animals list from json file
animals = json.load(f)["animals"]
# Create a list of dictionaries if animal type contains "cat"
cats = [animal for animal in animals if "cat" in animal.get("type")]
# Write data to cats.json
json.dump(cats, open("cats.json", "w"), indent=4, sort_keys=False, ensure_ascii=False)
This code outputs the formatted cats.json file with all necessary elements:
[
{
"type": "cat1",
"name": "Fred",
"Colors": [
"Red"
],
"Contact_info": [
{
"Owner": "Jill",
"Owner_number": "123"
}
]
},
{
"type": "cat3",
"name": "Freddy",
"Colors": [
"Blue"
],
"Contact_info": [
{
"Owner": "Ann",
"Owner_number": "1323"
}
]
}
]

parsing nested JSON using python

I have a scenario where I have below JSON data, which I want to parse and store the results in a dict under these conditions :
Condt --> Parse through json and find under data if groupProperty is equal to Tests then return that groupValue and value in a dict.
{
"dateFrom": "2020-03-26 07:35:00",
"dateTo": "2020-03-26 07:40:00",
"groupLabels": [
{
"groupProperty": "Tests",
"groupLabels": [
{
"groupId": "1053777",
"groupLabel": "testappzxco"
},
{
"groupId": "570009",
"groupLabel": "testappzkbo"
}
]
}
],
"binSize": 300,
"data": {
"points": [
{
"timestamp": 1585208100,
"numberOfDataPoints": 24,
"value": 0,
"groups": [
{
"groupProperty": "Tests",
"groupValue": "1053777"
},
{
"groupProperty": "Test Labels",
"groupValue": "61776"
}
]
},
{
"timestamp": 1585208100,
"numberOfDataPoints": 5,
"value": 4.888970,
"groups": [
{
"groupProperty": "Tests",
"groupValue": "1241460"
},
{
"groupProperty": "Test Labels",
"groupValue": "61710"
}
]
},
{
"timestamp": 1585208100,
"numberOfDataPoints": 96,
"value": 0,
"groups": [
{
"groupProperty": "Test Labels",
"groupValue": "61770"
}
]
},
{
"timestamp": 1585208100,
"numberOfDataPoints": 101,
"value": 0.01980198019801982,
"groups": [
{
"groupProperty": "Test Labels",
"groupValue": "61773"
}
]
},
{
"timestamp": 1585208100,
"numberOfDataPoints": 104,
"value": 0,
"groups": [
{
"groupProperty": "Test Labels",
"groupValue": "61776"
}
]
}
]
}
}
What I have tried and it doesn't even get the right details :
dat = json.loads(original_data)
testl=[]
for key in dat:
temp=key['data']['points']
for key1 in temp:
if key1['groups']['groupProperty'] == "Tests":
testl.append({key1['groupValue'], key['value']
})
Since the json is very complex I am not sure how to get the desired output.
Below is the desired O/P :
[{"tname":1241460, "tvalue":4.888970},{"tname":1053777, "tvalue":0}]
Any help would be great !
There is no special problem here: you just have to be very cautious in writing the comprehension:
[{"tname": g['groupValue'], 'tvalue': da['value']}
for da in d['data']['points'] for g in da['groups'] if g['groupProperty'] == 'Tests']
with the provided sample, it gives as expected:
[{'tname': '1053777', 'tvalue': 0}, {'tname': '1241460', 'tvalue': 4.88897}]
You have not noticed that groups is also an array, this way it should work:
points=dat['data']['points']
for key1 in points:
groups = key1['groups']
value = key1['value']
for group in groups:
if group['groupProperty'] =='Tests':
testl.append({'tname':group['groupValue'], 'value':value})
print (testl)
A recursive search making fewer assumptions on the structure of dat:
testl = []
def search(obj, seen_data, value):
if isinstance(obj, list):
for x in obj:
search(x, seen_data, value)
elif isinstance(obj, dict):
if not seen_data:
if 'data' in obj:
seen_data = True
else:
if value is not None:
if 'groupProperty' in obj and obj['groupProperty'] == 'Tests':
if 'groupValue' in obj:
tests = obj['groupValue']
testl.append({'tname': tests, 'tvalue': value})
value = None
elif 'value' in obj:
value = obj['value']
for x in obj.values():
search(x, seen_data, value)
search(dat, False, None)
print(testl)
Prints:
[{'tname': '1053777', 'tvalue': 0}, {'tname': '1241460', 'tvalue': 4.88897}]
See Python Demo

Merge JSON data with Python

As part of a Python program, I want to merge JSON objects that contain identically structured data. For instance:
{
"responseStatus": "SUCCESS",
"responseDetails": {
"total": 5754,
},
"data": [
{
"id": 1324651
},
{
"id": 5686131
}
]
}
What I want to do is to add the content of the data array of my section object into the data array of my first object.
So, assuming:
thejson1 = json.loads({"responseStatus": "SUCCESS","responseDetails": {"total": 5754,},"data": [{"id": 1324651},{"id": 5686131}]})
thejson2 = json.loads({"responseStatus": "SUCCESS","responseDetails": {"total": 1234,},"data": [{"id": 2165735},{"id": 2133256}]})
I thought that executing:
thejson1["data"].append(thejson2["data"])
Would expand thejson1 into:
{
"responseStatus": "SUCCESS",
"responseDetails": {
"total": 5754,
},
"data": [
{
"id": 1324651
},
{
"id": 5686131
},
{
"id": 2165735
},
{
"id": 2133256
}
]
}
But what it does instead is add thejson2 data as an array within the data array of thejson1:
{
"responseStatus": "SUCCESS",
"responseDetails": {
"total": 5754,
},
"data": [
{
"id": 1324651
},
{
"id": 5686131
},
[
{
"id": 2165735
},
{
"id": 2133256
}
]
]
}
So, what am I doing wrong? It looks like append adds the data array of the second JSON object instead of its content, but note that I can't know in advance the contents of the "data" array in my JSON input, so I can't write code that specifically loops in the "id" objects to add them one by one.
Thanks in advance!
R.
You're looking for extend, not append.
thejson1["data"].extend(thejson2["data"])
append takes the single argument and insert it to the end. While extend extends the list by adding all the individual values in the argument list to the end.
# example:
a=[1, 2, 3]
b = a[:].append([4, 5])
# b = [1, 2, 3, [4, 5]]
c = a[:].extend([4, 5])
# c = [1, 2, 3, 4, 5]
thejson1 = {"responseStatus": "SUCCESS","responseDetails": {"total": 5754,},"data": [{"id": 1324651},{"id": 5686131}]}
thejson2 = {"responseStatus": "SUCCESS","responseDetails": {"total": 1234,},"data": [{"id": 2165735},{"id": 2133256}]}
thejson1["data"] += thejson2["data"]
Output:
{'responseDetails': {'total': 5754}, 'data': [{'id': 1324651}, {'id': 5686131}, {'id': 2165735}, {'id': 2133256}], 'responseStatus': 'SUCCESS'}
You can also use += to extend.

Categories

Resources