Json_normalize with meta data - python

{
"generated_at": "2022-09-19T15:30:42+00:00",
"sport_schedule_sport_event_markets": [{
"sport_event": {
"id": "sr:sport_event:33623209",
"start_time": "2022-09-18T17:00:00+00:00",
"start_time_confirmed": true,
"competitors": [{
"id": "sr:competitor:4413",
"name": "Baltimore Ravens",
"country": "USA",
"country_code": "USA",
"abbreviation": "BAL",
"qualifier": "home",
"rotation_number": 264
}, {
"id": "sr:competitor:4287",
"name": "Miami Dolphins",
"country": "USA",
"country_code": "USA",
"abbreviation": "MIA",
"qualifier": "away",
"rotation_number": 263
}]
},
"markets": [{
"id": "sr:market:1",
"name": "1x2",
"books": [{
"id": "sr:book:18149",
"name": "DraftKings",
"removed": true,
"external_sport_event_id": "180327504",
"external_market_id": "120498143",
"outcomes": [{
"id": "sr:outcome:1",
"type": "home",
"odds_decimal": "1.13",
"odds_american": "-800",
"odds_fraction": "1\/8",
"open_odds_decimal": "1.37",
"open_odds_american": "-270",
"open_odds_fraction": "10\/27",
"external_outcome_id": "0QA120498143#1341135376_13L88808Q1468Q20",
"removed": true
}, {
"id": "sr:outcome:2",
"type": "draw",
"odds_decimal": "31.00",
"odds_american": "+3000",
"odds_fraction": "30\/1",
"open_odds_decimal": "36.00",
"open_odds_american": "+3500",
"open_odds_fraction": "35\/1",
"external_outcome_id": "0QA120498143#1341135377_13L88808Q10Q22",
"removed": true
}, {
"id": "sr:outcome:3",
"type": "away",
"odds_decimal": "6.00",
"odds_american": "+500",
"odds_fraction": "5\/1",
"open_odds_decimal": "2.95",
"open_odds_american": "+195",
"open_odds_fraction": "39\/20",
"external_outcome_id": "0QA120498143#1341135378_13L88808Q1329Q21",
"removed": true
}]
I'm trying to get outcomes as the main table and sport event ID at the metaID. below is not working
#df =pd.json_normalize(data,record_path=['sport_schedule_sport_event_markets','markets','books','outcomes'],meta=[['sport_schedule_sport_event_markets','sport_event']],meta_prefix='game_',errors='ignore')

Related

I need to loop over a big JSON - Pandas/Python

I have a JSON file that looks like this:
[ {
"id": 121,
"name": "Lebanon",
"iso3": "LBN",
"iso2": "LB",
"numeric_code": "422",
"phone_code": "961",
"capital": "Beirut",
"currency": "LBP",
"currency_name": "Lebanese pound",
"currency_symbol": "£",
"tld": ".lb",
"native": "لبنان",
"region": "Asia",
"subregion": "Western Asia",
"timezones": [
{
"zoneName": "Asia\/Beirut",
"gmtOffset": 7200,
"gmtOffsetName": "UTC+02:00",
"abbreviation": "EET",
"tzName": "Eastern European Time"
}
],
"translations": {
"kr": "레바논",
"br": "Líbano",
"pt": "Líbano",
"nl": "Libanon",
"hr": "Libanon",
"fa": "لبنان",
"de": "Libanon",
"es": "Líbano",
"fr": "Liban",
"ja": "レバノン",
"it": "Libano",
"cn": "黎巴嫩",
"tr": "Lübnan"
},
"latitude": "33.83333333",
"longitude": "35.83333333",
"emoji": "🇱🇧",
"emojiU": "U+1F1F1 U+1F1E7",
"states": [
{
"id": 2285,
"name": "Akkar Governorate",
"state_code": "AK",
"latitude": "34.53287630",
"longitude": "36.13281320",
"type": null,
"cities": [
{
"id": 65939,
"name": "Caza de Aakkar",
"latitude": "34.53333000",
"longitude": "36.16667000"
}
]
},
{
"id": 2283,
"name": "Baalbek-Hermel Governorate",
"state_code": "BH",
"latitude": "34.26585560",
"longitude": "36.34980970",
"type": null,
"cities": [
{
"id": 65933,
"name": "Baalbek",
"latitude": "34.00583000",
"longitude": "36.21806000"
},
{
"id": 65941,
"name": "Caza de Baalbek",
"latitude": "34.09822000",
"longitude": "36.27157000"
}
]
},
{
"id": 2286,
"name": "Beirut Governorate",
"state_code": "BA",
"latitude": "33.88861060",
"longitude": "35.49547720",
"type": null,
"cities": [
{
"id": 65936,
"name": "Beirut",
"latitude": "33.89332000",
"longitude": "35.50157000"
},
{
"id": 65951,
"name": "Ra’s Bayrūt",
"latitude": "33.90000000",
"longitude": "35.48333000"
}
]
},
{
"id": 2287,
"name": "Beqaa Governorate",
"state_code": "BI",
"latitude": "33.84626620",
"longitude": "35.90194890",
"type": null,
"cities": [
{
"id": 65930,
"name": "Aanjar",
"latitude": "33.72778000",
"longitude": "35.93111000"
},
{
"id": 65955,
"name": "Zahlé",
"latitude": "33.84675000",
"longitude": "35.90203000"
}
]
},
{
"id": 2282,
"name": "Mount Lebanon Governorate",
"state_code": "JL",
"latitude": "33.81008580",
"longitude": "35.59731390",
"type": null,
"cities": [
{
"id": 65932,
"name": "Baabda",
"latitude": "33.83389000",
"longitude": "35.54417000"
},
{
"id": 65937,
"name": "Bhamdoun",
"latitude": "33.79500000",
"longitude": "35.65111000"
},
{
"id": 65938,
"name": "Bhamdoûn el Mhatta",
"latitude": "33.80861000",
"longitude": "35.65972000"
},
{
"id": 65940,
"name": "Caza de Baabda",
"latitude": "33.84592000",
"longitude": "35.66791000"
},
{
"id": 65947,
"name": "Jbaïl",
"latitude": "34.12111000",
"longitude": "35.64806000"
},
{
"id": 65948,
"name": "Jounieh",
"latitude": "33.98083000",
"longitude": "35.61778000"
}
]
},
{
"id": 2288,
"name": "Nabatieh Governorate",
"state_code": "NA",
"latitude": "33.37716930",
"longitude": "35.48382930",
"type": null,
"cities": [
{
"id": 65931,
"name": "Ain Ebel",
"latitude": "33.11023000",
"longitude": "35.40251000"
},
{
"id": 65942,
"name": "Caza de Bent Jbaïl",
"latitude": "33.15964000",
"longitude": "35.41137000"
},
{
"id": 65943,
"name": "Caza de Nabatîyé",
"latitude": "33.39435000",
"longitude": "35.44483000"
},
{
"id": 65946,
"name": "Habboûch",
"latitude": "33.40729000",
"longitude": "35.48169000"
},
{
"id": 65949,
"name": "Marjayoûn",
"latitude": "33.36028000",
"longitude": "35.59111000"
},
{
"id": 65950,
"name": "Nabatîyé et Tahta",
"latitude": "33.37889000",
"longitude": "35.48389000"
}
]
},
{
"id": 2284,
"name": "North Governorate",
"state_code": "AS",
"latitude": "34.43806250",
"longitude": "35.83082330",
"type": null,
"cities": [
{
"id": 65934,
"name": "Batroûn",
"latitude": "34.25528000",
"longitude": "35.65806000"
},
{
"id": 65935,
"name": "Bcharré",
"latitude": "34.25083000",
"longitude": "36.01056000"
},
{
"id": 65953,
"name": "Tripoli",
"latitude": "34.43352000",
"longitude": "35.84415000"
}
]
},
{
"id": 2281,
"name": "South Governorate",
"state_code": "JA",
"latitude": "33.27214790",
"longitude": "35.20327780",
"type": null,
"cities": [
{
"id": 65944,
"name": "En Nâqoûra",
"latitude": "33.11806000",
"longitude": "35.13972000"
},
{
"id": 65945,
"name": "Ghazieh",
"latitude": "33.51750000",
"longitude": "35.36889000"
},
{
"id": 65952,
"name": "Sidon",
"latitude": "33.55751000",
"longitude": "35.37148000"
},
{
"id": 65954,
"name": "Tyre",
"latitude": "33.27333000",
"longitude": "35.19389000"
}
]
}
]
}
]
How can I access states > name ?
For example, I need Akkar Governorate from
"states": [
{
"id": 2285,
"name": "Akkar Governorate",
"state_code": "AK",
"latitude": "34.53287630",
"longitude": "36.13281320",
"type": null,
"cities": [
{
"id": 65939,
"name": "Caza de Aakkar",
"latitude": "34.53333000",
"longitude": "36.16667000"
}
]
}
]
How can I manage a loop to do that?
Use:
yourJsonObj = json.loads("your json string")
for key, val in yourJsonObj
Access/Manipulate data
Here are more examples:
JSON Read, write, parse
Good luck!
First you need to:
with open(r"C:\Users\Bar\AppData\Local\Programs\Python\Python310\Scripts\contries+states+cities.json", encoding="utf8") as f:
data = json.loads(f.read())
You can read name like this:
data[0]['states'][1]['name'] #hierarchy way
also you can even perform following kind of Coding:
def get_data(name):
for i in range(len( data[0]['states'])):
if data[0]['states'][i]['name'] == name:
print(data[0]['states'][i])
get_data('Akkar Governorate')

Nested dictionary in a list received from google api not getting parsed

I have been spending the past days trying to retrieve all of the "dateTime" values from "start" and "end" from this nested dictionary in a nested list(s) with every kind of looping, yet I haven't been successful. Does anyone know how I shall approach it?
AllCals = [
{
"accessRole": "reader",
"defaultReminders": [],
"etag": "\"\"",
"items": [],
"kind": "calendar#events",
"nextSyncToken": "=",
"summary": "Holidays in Canada",
"timeZone": "America/Toronto",
"updated": "2020-11-09T20:44:17.000Z"
},
{
"accessRole": "owner",
"defaultReminders": [],
"etag": "\"\"",
"items": [],
"kind": "calendar#events",
"nextSyncToken": "=",
"summary": "Business Sim",
"timeZone": "America/Toronto",
"updated": "2020-10-26T19:23:14.031Z"
},
{
"accessRole": "owner",
"defaultReminders": [],
"etag": "\"\"",
"items": [],
"kind": "calendar#events",
"nextSyncToken": "=",
"summary": "COMN",
"timeZone": "America/Toronto",
"updated": "2020-11-05T20:15:35.506Z"
},
{
"accessRole": "owner",
"defaultReminders": [],
"etag": "\"\"",
"items": [
{
"created": "2020-11-09T16:45:47.000Z",
"creator": {
"email": "#gmail.com"
},
"end": {
"dateTime": "2020-11-10T18:30:00-05:00"
},
"etag": "\"\"",
"htmlLink": "https://www.google.com/calendar",
"iCalUID": "#google.com",
"id": "",
"kind": "calendar#event",
"organizer": {
"displayName": "Predictive",
"email": "#group.calendar.google.com",
"self": true
},
"reminders": {
"useDefault": true
},
"sequence": 1,
"start": {
"dateTime": "2020-11-10T17:30:00-05:00"
},
"status": "confirmed",
"summary": "Group call",
"updated": "2020-11-10T00:13:33.387Z"
},
{
"created": "2020-11-11T02:02:03.000Z",
"creator": {
"email": "#gmail.com"
},
"end": {
"dateTime": "2020-11-10T22:30:00-05:00"
},
"etag": "\"\"",
"htmlLink": "https://www.google.com/calendar/event",
"iCalUID": "#google.com",
"id": "",
"kind": "calendar#event",
"organizer": {
"displayName": "Predictive",
"email": "#group.calendar.google.com",
"self": true
},
"reminders": {
"useDefault": true
},
"sequence": 0,
"start": {
"dateTime": "2020-11-10T21:30:00-05:00"
},
"status": "confirmed",
"summary": "predictive part",
"updated": "2020-11-11T02:02:03.410Z"
}
],
"kind": "calendar#",
"nextSyncToken": "-=",
"summary": "Predictive",
"timeZone": "America/Toronto",
"updated": "2020-11-11T02:02:03.410Z"
},
{
"accessRole": "owner",
"defaultReminders": [],
"description": "",
"etag": "\"\"",
"items": [
{
"created": "2020-09-23T02:48:00.000Z",
"creator": {
"email": "#gmail.com"
},
"end": {
"date": "2020-11-10"
},
"etag": "\"\"",
"htmlLink": "",
"iCalUID": "#google.com",
"id": "",
"kind": "calendar#event",
"organizer": {
"displayName": "Real Estate",
"email": "#group.calendar.google.com",
"self": true
},
"reminders": {
"useDefault": false
},
"sequence": 3,
"start": {
"date": "2020-11-09"
},
"status": "confirmed",
"summary": "Assignment 3",
"updated": "2020-09-26T17:36:50.714Z"
}
],
"kind": "#events",
"nextSyncToken": "=",
"summary": "Real Estate",
"timeZone": "America/Toronto",
"updated": "2020-11-06T06:24:35.930Z"
},
{
"accessRole": "owner",
"defaultReminders": [],
"etag": "\"\"",
"items": [
{
"created": "2020-09-30T19:24:53.000Z",
"creator": {
"email": "#gmail.com"
},
"end": {
"dateTime": "2020-11-10T02:00:00-05:00",
"timeZone": "America/Toronto"
},
"etag": "\"\"",
"htmlLink": "",
"iCalUID": "#google.com",
"id": "",
"kind": "calendar#event",
"organizer": {
"displayName": "Reading",
"email": "#group.calendar.google.com",
"self": true
},
"originalStartTime": {
"dateTime": "2020-11-10T01:00:00-05:00",
"timeZone": "America/Toronto"
},
"recurringEventId": "",
"reminders": {
"useDefault": true
},
"sequence": 0,
"start": {
"dateTime": "2020-11-10T01:00:00-05:00",
"timeZone": "America/Toronto"
},
"status": "confirmed",
"summary": "Reading",
"updated": "2020-09-30T19:24:53.912Z"
}
],
"kind": "calendar#events",
"nextSyncToken": "=",
"summary": "Reading",
"timeZone": "America/Toronto",
"updated": "2020-09-30T19:24:53.912Z"
},
{
"accessRole": "owner",
"defaultReminders": [],
"etag": "\"\"",
"items": [
{
"created": "2020-11-11T00:09:57.000Z",
"creator": {
"email": "#gmail.com"
},
"end": {
"dateTime": "2020-11-10T20:30:00-05:00"
},
"etag": "\"\"",
"htmlLink": "",
"iCalUID": "#google.com",
"id": "",
"kind": "calendar#event",
"organizer": {
"displayName": "ToDo",
"email": "#group.calendar.google.com",
"self": true
},
"reminders": {
"useDefault": true
},
"sequence": 0,
"start": {
"dateTime": "2020-11-10T19:30:00-05:00"
},
"status": "confirmed",
"summary": "",
"updated": "2020-11-11T00:09:57.152Z"
}
],
"kind": "calendar#events",
"nextSyncToken": "-=",
"summary": "ToDo",
"timeZone": "America/Toronto",
"updated": "2020-11-11T00:09:57.152Z"
},
{
"accessRole": "owner",
"defaultReminders": [
{
"method": "popup",
"minutes": 30
}
],
"etag": "\"\"",
"items": [
{
"created": "2020-09-29T03:29:51.000Z",
"creator": {
"email": "#gmail.com",
"self": true
},
"end": {
"dateTime": "2020-11-10T14:30:00-05:00",
"timeZone": "America/Toronto"
},
"etag": "\"\"",
"htmlLink": "",
"iCalUID": ".com",
"id": "",
"kind": "calendar#event",
"organizer": {
"email": "#gmail.com",
"self": true
},
"originalStartTime": {
"dateTime": "2020-11-10T11:30:00-05:00",
"timeZone": "America/Toronto"
},
"recurringEventId": "",
"reminders": {
"useDefault": true
},
"sequence": 0,
"start": {
"dateTime": "2020-11-10T11:30:00-05:00",
"timeZone": "America/Toronto"
},
"status": "confirmed",
"summary": "ENTR 4700",
"updated": "2020-09-29T03:29:51.740Z"
},
{
"created": "2020-11-10T05:39:09.000Z",
"creator": {
"email": "#gmail.com",
"self": true
},
"end": {
"dateTime": "2020-11-10T11:30:00-05:00"
},
"etag": "",
"htmlLink": "",
"iCalUID": "#google.com",
"id": "",
"kind": "calendar#event",
"organizer": {
"email": "#gmail.com",
"self": true
},
"reminders": {
"useDefault": true
},
"sequence": 1,
"start": {
"dateTime": "2020-11-10T10:30:00-05:00"
},
"status": "confirmed",
"summary": "Pick a term sheet ",
"updated": "2020-11-10T05:39:12.662Z"
}
],
"kind": "calendar#events",
"nextSyncToken": "=",
"summary": "ENTR 4700",
"timeZone": "America/Toronto",
"updated": "2020-11-10T06:12:07.093Z"
},
{
"accessRole": "owner",
"defaultReminders": [],
"etag": "\"\"",
"items": [],
"kind": "calendar#events",
"nextSyncToken": "=",
"summary": "Programming",
"timeZone": "America/Toronto",
"updated": "2020-11-08T01:45:49.847Z"
},
{
"accessRole": "owner",
"defaultReminders": [],
"etag": "\"\"",
"items": [],
"kind": "calendar#events",
"nextSyncToken": "",
"summary": "Alexa",
"timeZone": "America/Toronto",
"updated": "2020-11-08T16:37:12.291Z"
}
]
print(type(AllCals))
<class 'list'>
Unfortunately, google's calendar API doesn't provide all the events of the user even when "CalendarId" is set to 'primary' which has been an issue amongst developers. Therefore, I have no choice but to extract each calendar of the user, merge, and then parse.
The ultimate goal: Retrieve the user's duration of events from the calendar.
events = events_result.get('items', [])
Ids = [item['id'] for item in events]
AllCals = [service.events().list(calendarId=id, timeMin=yesterdayy, timeMax = utc_dt,singleEvents=True,orderBy='updated').execute() for id in Ids ]
The example data you showed seems to be JSON, but I'll assume you have a list of dicts, each with a key "items" holding an array of calendar items, each of which has keys "start" and "end", each containing a dict with either a key "date" or "dateTime" containing an ISO date string.
I still don't know what you are going to do with it, but if I understand you correctly, you want to retrieve the durations of all calendar's items.
from datetime import datetime
# AllCals = ...
for calendar in AllCals:
for item in calendar["items"]:
# this could probably be done more elegantly. you basically want item["start"]["date"] or item["start"]["dateTime"], whichever is present
item_start = item["start"]
item_start_string = item_start["date"] if "date" in item_start else item_start["dateTime"]
start_datetime = datetime.fromisoformat(item_start_string)
item_end = item["end"]
item_end_string = item_end["date"] if "date" in item_end else item_end["dateTime"]
end_datetime = datetime.fromisoformat(item_end_string)
duration = end_datetime - start_datetime
print(f"{item['summary']}: {duration}")
With your example data, this prints:
Group call: 1:00:00
predictive part: 1:00:00
Assignment 3: 1 day, 0:00:00
Reading: 1:00:00
: 1:00:00
ENTR 4700: 3:00:00
Pick a term sheet : 1:00:00

Best way to build denormilazed dataframe with pandas from spotify API

I just downloaded some json from spotify and took a look into the pd.normalize_json().
But if I normalise the data i still have dictionaries within my dataframe. Also setting the level doesnt help.
DATA I want to have in my dataframe:
{
"collaborative": false,
"description": "",
"external_urls": {
"spotify": "https://open.spotify.com/playlist/5"
},
"followers": {
"href": null,
"total": 0
},
"href": "https://api.spotify.com/v1/playlists/5?additional_types=track",
"id": "5",
"images": [
{
"height": 640,
"url": "https://i.scdn.co/image/a",
"width": 640
}
],
"name": "Another",
"owner": {
"display_name": "user",
"external_urls": {
"spotify": "https://open.spotify.com/user/user"
},
"href": "https://api.spotify.com/v1/users/user",
"id": "user",
"type": "user",
"uri": "spotify:user:user"
},
"primary_color": null,
"public": true,
"snapshot_id": "M2QxNTcyYTkMDc2",
"tracks": {
"href": "https://api.spotify.com/v1/playlists/100&additional_types=track",
"items": [
{
"added_at": "2020-12-13T18:34:09Z",
"added_by": {
"external_urls": {
"spotify": "https://open.spotify.com/user/user"
},
"href": "https://api.spotify.com/v1/users/user",
"id": "user",
"type": "user",
"uri": "spotify:user:user"
},
"is_local": false,
"primary_color": null,
"track": {
"album": {
"album_type": "album",
"artists": [
{
"external_urls": {
"spotify": "https://open.spotify.com/artist/1dfeR4Had"
},
"href": "https://api.spotify.com/v1/artists/1dfDbWqFHLkxsg1d",
"id": "1dfeR4HaWDbWqFHLkxsg1d",
"name": "Q",
"type": "artist",
"uri": "spotify:artist:1dfeRqFHLkxsg1d"
}
],
"available_markets": [
"CA",
"US"
],
"external_urls": {
"spotify": "https://open.spotify.com/album/6wPXmlLzZ5cCa"
},
"href": "https://api.spotify.com/v1/albums/6wPXUJ9LzZ5cCa",
"id": "6wPXUmYJ9zZ5cCa",
"images": [
{
"height": 640,
"url": "https://i.scdn.co/image/ab676620a47",
"width": 640
},
{
"height": 300,
"url": "https://i.scdn.co/image/ab67616d0620a47",
"width": 300
},
{
"height": 64,
"url": "https://i.scdn.co/image/ab603e6620a47",
"width": 64
}
],
"name": "The (Deluxe ",
"release_date": "1920-07-17",
"release_date_precision": "day",
"total_tracks": 15,
"type": "album",
"uri": "spotify:album:6m5cCa"
},
"artists": [
{
"external_urls": {
"spotify": "https://open.spotify.com/artist/1dg1d"
},
"href": "https://api.spotify.com/v1/artists/1dsg1d",
"id": "1dfeR4HaWDbWqFHLkxsg1d",
"name": "Q",
"type": "artist",
"uri": "spotify:artist:1dxsg1d"
}
],
"available_markets": [
"CA",
"US"
],
"disc_number": 1,
"duration_ms": 21453,
"episode": false,
"explicit": false,
"external_ids": {
"isrc": "GBU6015"
},
"external_urls": {
"spotify": "https://open.spotify.com/track/5716J"
},
"href": "https://api.spotify.com/v1/tracks/5716J",
"id": "5716J",
"is_local": false,
"name": "Another",
"popularity": 73,
"preview_url": null,
"track": true,
"track_number": 3,
"type": "track",
"uri": "spotify:track:516J"
},
"video_thumbnail": {
"url": null
}
}
],
"limit": 100,
"next": null,
"offset": 0,
"previous": null,
"total": 1
},
"type": "playlist",
"uri": "spotify:playlist:fek"
}
So what are best practices to read nested data like this into one dataframe in pandas?
I'm glad for any advice.
EDIT:
so basically I want all keys as columns in my dataframe. But with normalise it stops at "tracks.items" and if I normalise this again i have the recursive problem again.
It depends on the information you are looking for. Take a look at pandas.read_json() to see if that can work. Also you can select data as such
json_output = {"collaborative": 'false',"description": "", "external_urls": {"spotify": "https://open.spotify.com/playlist/5"}}
df['collaborative'] = json_output['collaborative'] #set value of your df to value of returned json values

Parsing JSON with multiple arrays and comparing values with Excel data using Python 3.6

I have a JSON like:
{
"results": [{
"data": {
"child": [{
"sex": "2",
"birthDateReliability": "0",
"applicationInternalIdentifier": "cmpclt",
"birthDate": "2016-07-04",
"firstName": "Anna"
}],
"consumerType": "PRIVATE",
"countryCode": "FR",
"initialAppSourceCode": "ABCDWEB",
"optin": [{
"optinSourceApplication": "ABCDWEB",
"acceptanceDate": "2017-02-10T10:14:55.037Z",
"marketingGroupService": "XYZXYX-ABC"
}, {
"optinSourceApplication": "ABCDWEB",
"acceptanceDate": "2017-02-10T10:14:55.037Z",
"marketingGroupService": "XYZXYX-DEF"
}, {
"optinSourceApplication": "ABCDWEB",
"acceptanceDate": "2017-02-10T10:14:55.037Z",
"marketingGroupService": "XYZXYX-GHI"
}, {
"optinSourceApplication": "ABCDWEB",
"acceptanceDate": "2017-02-10T10:14:55.037Z",
"marketingGroupService": "XYZXYX-JKL"
}, {
"optinSourceApplication": "ABCDWEB",
"acceptanceDate": "2017-02-10T10:14:55.037Z",
"marketingGroupService": "XYZXYX-MNO"
}],
"didsys_KGexample": true,
"addressLine1": "123 Street",
"marketCode": "10107"
},
"lastUpdatedTimestamp": 1486721887742,
"socialProviders": "site",
"password": {
"hashSettings": {
"rounds": 9504778,
"salt": "XXXXXXXXX",
"algorithm": "xyz"
},
"hash": "$AF$$$F$$$$$ZX$$$$$J$--"
},
"iRank": 0,
"created": "2017-02-10T10:15:36.814Z",
"lastLoginTimestamp": 1486721736970,
"oldestDataUpdated": "2017-02-10T10:15:36.861Z",
"isLockedOut": false,
"profile": {
"zip": "12345",
"lastName": "Shah",
"email": "abc#gmail.com",
"locale": "en",
"firstName": "Jiten",
"city": "London"
},
"isVerified": false,
"createdTimestamp": 1486721736814,
"identities": [{
"lastName": "Shah",
"zip": "12345",
"isLoginIdentity": true,
"locale": "en",
"lastUpdatedTimestamp": 1486721887742,
"lastUpdated": "2017-02-10T10:18:07.742Z",
"provider": "site",
"allowsLogin": true,
"isExpiredSession": false,
"providerUID": "jbx63a0ed2f9a1cfa8cgh7dsdsl3",
"city": "London",
"oldestDataUpdatedTimestamp": 1486721736861,
"email": "abc#gmail.com",
"oldestDataUpdated": "2017-02-10T10:15:36.861Z",
"firstName": "Jiten"
}],
"lastUpdated": "2017-02-10T10:18:07.742Z",
"emails": {
"unverified": ["abc#gmail.com"],
"verified": []
},
"isRegistered": true,
"regSource": "https://abcn.net/user/register",
"lastLoginLocation": {
"state": "H9",
"coordinates": {
"lon": -0.0930938720703125,
"lat": 51.51420593261719
},
"country": "GB",
"city": "London"
},
"isActive": true,
"lastLogin": "2017-02-10T10:15:36.970Z",
"oldestDataUpdatedTimestamp": 1486721736861,
"UID": "ed9af442a4a7bd63a08a1cfa8c9d02f9",
"registered": "2017-02-10T10:18:07.993Z",
"rbaPolicy": {
"riskPolicyLocked": false
},
"loginIDs": {
"unverifiedEmails": [],
"emails": ["abc#gmail.com"]
},
"registeredTimestamp": 1486721887993,
"loginProvider": "site"
}],
"objectsCount": 1,
"totalCount": 1,
"statusCode": 200,
"errorCode": 0,
"statusReason": "OK",
"callId": "5164e6c985ee4ed9bcd76ebd403cfaaa",
"time": "2017-02-14T14:24:26.487Z"
}
I have maintained the Excel sheet where I have maintained all the above JSON Key/Values in two columns.
Now I want to compare/Validate all the values from Excel Sheet against key/value of above JSON file using Python 3.6.

Iterating over JSON object individually for processing each post

I have the following JSON object that I get from the Instagram API, it can have n number of posts (depending upon the count parameter provided).
{
"pagination": {
"next_url": "https:\/\/api.instagram.com\/v1\/users\/3\/media\/recent?access_token=184046392.f59def8.c5726b469ad2462f85c7cea5f72083c0&max_id=205140190233104928_3",
"next_max_id": "205140190233104928_3"
},
"meta": {
"code": 200
},
"data": [{
"attribution": null,
"tags": [],
"type": "image",
"location": {
"latitude": 37.798594362,
"name": "Presidio Bowling Center",
"longitude": -122.459878922,
"id": 27052
},
"comments": {
"count": 132,
"data": [{
"created_time": "1342734265",
"text": "Distinguishing!",
"from": {
"username": "naiicamilos",
"profile_picture": "http:\/\/images.instagram.com\/profiles\/profile_53690312_75sq_1336573463.jpg",
"id": "53690312",
"full_name": "Naii Camilos"
},
"id": "239194812924826175"
}, {
"created_time": "1342737428",
"text": "#kevin in Spanish Presidio means Jail",
"from": {
"username": "jm0426",
"profile_picture": "http:\/\/images.instagram.com\/profiles\/profile_25881992_75sq_1342156673.jpg",
"id": "25881992",
"full_name": "Juan Mayen"
},
"id": "239221343768285211"
}, {
"created_time": "1342768120",
"text": "Good imagination",
"from": {
"username": "kidloca",
"profile_picture": "http:\/\/images.instagram.com\/profiles\/profile_193133903_75sq_1342032241.jpg",
"id": "193133903",
"full_name": "Khaleda Noon"
},
"id": "239478811731694145"
}, {
"created_time": "1342775967",
"text": "Cwl!",
"from": {
"username": "awesomeath",
"profile_picture": "http:\/\/images.instagram.com\/profiles\/profile_179252164_75sq_1339745821.jpg",
"id": "179252164",
"full_name": "awesomeath"
},
"id": "239544638740894674"
}, {
"created_time": "1342796153",
"text": "\u597d\u7f8e\u263a",
"from": {
"username": "hidelau",
"profile_picture": "http:\/\/images.instagram.com\/profiles\/profile_47295330_75sq_1342763977.jpg",
"id": "47295330",
"full_name": "Hide Lau"
},
"id": "239713963951001995"
}, {
"created_time": "1343018007",
"text": "#mindfreak",
"from": {
"username": "info2021",
"profile_picture": "http:\/\/images.instagram.com\/profiles\/anonymousUser.jpg",
"id": "27664191",
"full_name": "info2021"
},
"id": "241575017119224582"
}, {
"created_time": "1343068374",
"text": "#kevin please share and promote my last pic. This will be the new hype as instagram",
"from": {
"username": "thansy_mansy",
"profile_picture": "http:\/\/images.instagram.com\/profiles\/profile_189019343_75sq_1342951587.jpg",
"id": "189019343",
"full_name": "thansy_mansy"
},
"id": "241997523093295303"
}, {
"created_time": "1343068382",
"text": "#kevin :P",
"from": {
"username": "thansy_mansy",
"profile_picture": "http:\/\/images.instagram.com\/profiles\/profile_189019343_75sq_1342951587.jpg",
"id": "189019343",
"full_name": "thansy_mansy"
},
"id": "241997589589790922"
}]
},
"filter": "Rise",
"created_time": "1342676212",
"link": "http:\/\/instagr.am\/p\/NQD4KAABKF\/",
"likes": {
"count": 4810,
"data": [{
"username": "caitlyn_hammonds",
"profile_picture": "http:\/\/images.instagram.com\/profiles\/anonymousUser.jpg",
"id": "198322184",
"full_name": "caitlyn_hammonds"
}, {
"username": "sophiafrancis",
"profile_picture": "http:\/\/images.instagram.com\/profiles\/profile_43092892_75sq_1340548333.jpg",
"id": "43092892",
"full_name": "Sophiaaa."
}, {
"username": "amna7861",
"profile_picture": "http:\/\/images.instagram.com\/profiles\/profile_175807260_75sq_1343135903.jpg",
"id": "175807260",
"full_name": "Amna Haroon"
}, {
"username": "yaya0318",
"profile_picture": "http:\/\/images.instagram.com\/profiles\/profile_74056_75sq_1287001004.jpg",
"id": "74056",
"full_name": "Mao Yaya"
}, {
"username": "jay_damage",
"profile_picture": "http:\/\/images.instagram.com\/profiles\/profile_197465040_75sq_1342932411.jpg",
"id": "197465040",
"full_name": "jay_damage"
}, {
"username": "reves",
"profile_picture": "http:\/\/images.instagram.com\/profiles\/profile_671833_75sq_1335966794.jpg",
"id": "671833",
"full_name": "Fernando D. Ramirez"
}, {
"username": "lizray1",
"profile_picture": "http:\/\/images.instagram.com\/profiles\/profile_198450407_75sq_1343144120.jpg",
"id": "198450407",
"full_name": "lizray1"
}, {
"username": "alivewtheglory",
"profile_picture": "http:\/\/images.instagram.com\/profiles\/profile_37907416_75sq_1341561441.jpg",
"id": "37907416",
"full_name": "Marilynn C"
}, {
"username": "mnforever55",
"profile_picture": "http:\/\/images.instagram.com\/profiles\/profile_29255977_75sq_1334833008.jpg",
"id": "29255977",
"full_name": "mnforever55"
}]
},
"images": {
"low_resolution": {
"url": "http:\/\/distilleryimage9.s3.amazonaws.com\/beb7f896d16311e19fe21231380f3636_6.jpg",
"width": 306,
"height": 306
},
"thumbnail": {
"url": "http:\/\/distilleryimage9.s3.amazonaws.com\/beb7f896d16311e19fe21231380f3636_5.jpg",
"width": 150,
"height": 150
},
"standard_resolution": {
"url": "http:\/\/distilleryimage9.s3.amazonaws.com\/beb7f896d16311e19fe21231380f3636_7.jpg",
"width": 612,
"height": 612
}
},
"caption": {
"created_time": "1342676255",
"text": "Happy birthday #amy !",
"from": {
"username": "kevin",
"profile_picture": "http:\/\/images.instagram.com\/profiles\/profile_3_75sq_1325536697.jpg",
"id": "3",
"full_name": "Kevin Systrom"
},
"id": "238708186813567655"
},
"user_has_liked": false,
"id": "238707833418289797_3",
"user": {
"username": "kevin",
"website": "",
"bio": "CEO & Co-founder of Instagram",
"profile_picture": "http:\/\/images.instagram.com\/profiles\/profile_3_75sq_1325536697.jpg",
"full_name": "Kevin Systrom",
"id": "3"
}
}, {
"attribution": null,
"tags": [],
"type": "image",
"location": {
"latitude": 38.503100608,
"name": "Goose & Gander",
"longitude": -122.468387538,
"id": 12059278
},
"comments": {
"count": 85,
"data": [{
"created_time": "1342555499",
"text": "Cheers !!! \ud83d\ude18\ud83d\ude18",
"from": {
"username": "kattiab",
"profile_picture": "http:\/\/images.instagram.com\/profiles\/profile_1345073_75sq_1340495505.jpg",
"id": "1345073",
"full_name": "kattia b"
},
"id": "237695212468572732"
}, {
"created_time": "1342558279",
"text": "happy birthday instagram!",
"from": {
"username": "alanasayshi",
"profile_picture": "http:\/\/images.instagram.com\/profiles\/profile_4235095_75sq_1341960681.jpg",
"id": "4235095",
"full_name": "Alana Boy\u00e9r"
},
"id": "237718535382504383"
}, {
"created_time": "1342567977",
"text": "Happy Natal Day Instagram!",
"from": {
"username": "cynrtst",
"profile_picture": "http:\/\/images.instagram.com\/profiles\/profile_12493918_75sq_1341538462.jpg",
"id": "12493918",
"full_name": "Cynthia L"
},
"id": "237799888639758668"
}, {
"created_time": "1342568896",
"text": "Happy Birthday \ud83c\udf89\ud83c\udf89\ud83c\udf89 was it a long labour \ud83d\ude02\ud83d\ude02\ud83d\ude02",
"from": {
"username": "relzie",
"profile_picture": "http:\/\/images.instagram.com\/profiles\/profile_15718874_75sq_1332290975.jpg",
"id": "15718874",
"full_name": "Relz"
},
"id": "237807595966960050"
}, {
"created_time": "1342579289",
"text": "Cheers #kevin and Happy Birthday #instagram thank you so much Kevin for creating instagram it's truly got me back out there taking more photos and falling in love with photography all over again...",
"from": {
"username": "bpphotographs",
"profile_picture": "http:\/\/images.instagram.com\/profiles\/profile_12149171_75sq_1339457436.jpg",
"id": "12149171",
"full_name": "bpphotographs"
},
"id": "237894779172557723"
}, {
"created_time": "1342652660",
"text": "#suz_h",
"from": {
"username": "ianyorke",
"profile_picture": "http:\/\/images.instagram.com\/profiles\/profile_1041088_75sq_1339844137.jpg",
"id": "1041088",
"full_name": "Ian Yorke"
},
"id": "238510264889118973"
}, {
"created_time": "1342667574",
"text": "Love your app\ud83d\udc97\ud83d\udc97\ud83d\udc97\ud83d\udc97\ud83d\udc97",
"from": {
"username": "gothangel1997",
"profile_picture": "http:\/\/images.instagram.com\/profiles\/profile_186799125_75sq_1342668825.jpg",
"id": "186799125",
"full_name": "Angel Mercado"
},
"id": "238635368570687940"
}, {
"created_time": "1342843274",
"text": "\ud83d\ude09\u2764",
"from": {
"username": "andescu",
"profile_picture": "http:\/\/images.instagram.com\/profiles\/profile_145554839_75sq_1337641309.jpg",
"id": "145554839",
"full_name": "andescu"
},
"id": "240109245637333685"
}]
},
"filter": "Sierra",
"created_time": "1342332400",
"link": "http:\/\/instagr.am\/p\/NF0G6bABA2\/",
"likes": {
"count": 3282,
"data": [{
"username": "caysondesigns",
"profile_picture": "http:\/\/images.instagram.com\/profiles\/profile_23464609_75sq_1329177054.jpg",
"id": "23464609",
"full_name": "Jasmine at Cayson Designs"
}, {
"username": "m_azooz16",
"profile_picture": "http:\/\/images.instagram.com\/profiles\/profile_198179404_75sq_1343096096.jpg",
"id": "198179404",
"full_name": "m_azooz16"
}, {
"username": "shulinghuang",
"profile_picture": "http:\/\/images.instagram.com\/profiles\/profile_144887495_75sq_1342460246.jpg",
"id": "144887495",
"full_name": "H\u3002"
}, {
"username": "caitlyn_hammonds",
"profile_picture": "http:\/\/images.instagram.com\/profiles\/anonymousUser.jpg",
"id": "198322184",
"full_name": "caitlyn_hammonds"
}, {
"username": "sophiafrancis",
"profile_picture": "http:\/\/images.instagram.com\/profiles\/profile_43092892_75sq_1340548333.jpg",
"id": "43092892",
"full_name": "Sophiaaa."
}, {
"username": "beatle1234",
"profile_picture": "http:\/\/images.instagram.com\/profiles\/anonymousUser.jpg",
"id": "197988834",
"full_name": "beatle1234"
}, {
"username": "yaya0318",
"profile_picture": "http:\/\/images.instagram.com\/profiles\/profile_74056_75sq_1287001004.jpg",
"id": "74056",
"full_name": "Mao Yaya"
}, {
"username": "lizray1",
"profile_picture": "http:\/\/images.instagram.com\/profiles\/profile_198450407_75sq_1343144120.jpg",
"id": "198450407",
"full_name": "lizray1"
}, {
"username": "rawr1234321",
"profile_picture": "http:\/\/images.instagram.com\/profiles\/profile_198492630_75sq_1343151765.jpg",
"id": "198492630",
"full_name": "rawr1234321"
}]
},
"images": {
"low_resolution": {
"url": "http:\/\/distilleryimage6.s3.amazonaws.com\/3ec59e18ce4311e1b8031231380702ee_6.jpg",
"width": 306,
"height": 306
},
"thumbnail": {
"url": "http:\/\/distilleryimage6.s3.amazonaws.com\/3ec59e18ce4311e1b8031231380702ee_5.jpg",
"width": 150,
"height": 150
},
"standard_resolution": {
"url": "http:\/\/distilleryimage6.s3.amazonaws.com\/3ec59e18ce4311e1b8031231380702ee_7.jpg",
"width": 612,
"height": 612
}
},
"caption": {
"created_time": "1342332465",
"text": "Mellivora capensis - eagle rare, peat, honey, lemon, pineapple, black cardamom, chili, coconut foam",
"from": {
"username": "kevin",
"profile_picture": "http:\/\/images.instagram.com\/profiles\/profile_3_75sq_1325536697.jpg",
"id": "3",
"full_name": "Kevin Systrom"
},
"id": "235824269324456712"
},
"user_has_liked": false,
"id": "235823728972271670_3",
"user": {
"username": "kevin",
"website": "",
"bio": "CEO & Co-founder of Instagram",
"profile_picture": "http:\/\/images.instagram.com\/profiles\/profile_3_75sq_1325536697.jpg",
"full_name": "Kevin Systrom",
"id": "3"
}
}, .....
So I m looking to iterate over each post individually and extract the tags, id, and the image urls. I m having some trouble, (since I m a PHP developer and finding it really hard to work with Python as a beginner).
Here's the code that I m using to iterate over each post and process the attributes provided. I dont want to store them in a list or dict. Just want to search through the tags.
(this is just a attempted code since I couldnt find which loop should I use)
info= simplejson.load(info)
print type(info['data']) # I get it as a list
for k, v in info['data']:
print v
I could have done this easily using php with a foreach :
foreach($info->data as $i) {
$tags = $i->tags();
$id = $i->id();
}
If info['data'] is a list, you should be able to iterate over it like so:
for post in info['data']:
tags = post['tags']
id = post['id']
image_urls = [] # An empty list -- we'll fill it below
for img_type in ['low_resolution', 'thumbnail', 'standard_resolution']:
image_urls.append(post['images'][img_type]['url'])
# Now image_urls has all the image urls in it
I think the part that's rather different from PHP is that where the key is "tags" in the JSON structure, you have to use the string "tags" in Python, whereas you would use the literal tags() in PHP.

Categories

Resources