Related
I have a JSON file that looks like this:
[ {
"id": 121,
"name": "Lebanon",
"iso3": "LBN",
"iso2": "LB",
"numeric_code": "422",
"phone_code": "961",
"capital": "Beirut",
"currency": "LBP",
"currency_name": "Lebanese pound",
"currency_symbol": "£",
"tld": ".lb",
"native": "لبنان",
"region": "Asia",
"subregion": "Western Asia",
"timezones": [
{
"zoneName": "Asia\/Beirut",
"gmtOffset": 7200,
"gmtOffsetName": "UTC+02:00",
"abbreviation": "EET",
"tzName": "Eastern European Time"
}
],
"translations": {
"kr": "레바논",
"br": "Líbano",
"pt": "Líbano",
"nl": "Libanon",
"hr": "Libanon",
"fa": "لبنان",
"de": "Libanon",
"es": "Líbano",
"fr": "Liban",
"ja": "レバノン",
"it": "Libano",
"cn": "黎巴嫩",
"tr": "Lübnan"
},
"latitude": "33.83333333",
"longitude": "35.83333333",
"emoji": "🇱🇧",
"emojiU": "U+1F1F1 U+1F1E7",
"states": [
{
"id": 2285,
"name": "Akkar Governorate",
"state_code": "AK",
"latitude": "34.53287630",
"longitude": "36.13281320",
"type": null,
"cities": [
{
"id": 65939,
"name": "Caza de Aakkar",
"latitude": "34.53333000",
"longitude": "36.16667000"
}
]
},
{
"id": 2283,
"name": "Baalbek-Hermel Governorate",
"state_code": "BH",
"latitude": "34.26585560",
"longitude": "36.34980970",
"type": null,
"cities": [
{
"id": 65933,
"name": "Baalbek",
"latitude": "34.00583000",
"longitude": "36.21806000"
},
{
"id": 65941,
"name": "Caza de Baalbek",
"latitude": "34.09822000",
"longitude": "36.27157000"
}
]
},
{
"id": 2286,
"name": "Beirut Governorate",
"state_code": "BA",
"latitude": "33.88861060",
"longitude": "35.49547720",
"type": null,
"cities": [
{
"id": 65936,
"name": "Beirut",
"latitude": "33.89332000",
"longitude": "35.50157000"
},
{
"id": 65951,
"name": "Ra’s Bayrūt",
"latitude": "33.90000000",
"longitude": "35.48333000"
}
]
},
{
"id": 2287,
"name": "Beqaa Governorate",
"state_code": "BI",
"latitude": "33.84626620",
"longitude": "35.90194890",
"type": null,
"cities": [
{
"id": 65930,
"name": "Aanjar",
"latitude": "33.72778000",
"longitude": "35.93111000"
},
{
"id": 65955,
"name": "Zahlé",
"latitude": "33.84675000",
"longitude": "35.90203000"
}
]
},
{
"id": 2282,
"name": "Mount Lebanon Governorate",
"state_code": "JL",
"latitude": "33.81008580",
"longitude": "35.59731390",
"type": null,
"cities": [
{
"id": 65932,
"name": "Baabda",
"latitude": "33.83389000",
"longitude": "35.54417000"
},
{
"id": 65937,
"name": "Bhamdoun",
"latitude": "33.79500000",
"longitude": "35.65111000"
},
{
"id": 65938,
"name": "Bhamdoûn el Mhatta",
"latitude": "33.80861000",
"longitude": "35.65972000"
},
{
"id": 65940,
"name": "Caza de Baabda",
"latitude": "33.84592000",
"longitude": "35.66791000"
},
{
"id": 65947,
"name": "Jbaïl",
"latitude": "34.12111000",
"longitude": "35.64806000"
},
{
"id": 65948,
"name": "Jounieh",
"latitude": "33.98083000",
"longitude": "35.61778000"
}
]
},
{
"id": 2288,
"name": "Nabatieh Governorate",
"state_code": "NA",
"latitude": "33.37716930",
"longitude": "35.48382930",
"type": null,
"cities": [
{
"id": 65931,
"name": "Ain Ebel",
"latitude": "33.11023000",
"longitude": "35.40251000"
},
{
"id": 65942,
"name": "Caza de Bent Jbaïl",
"latitude": "33.15964000",
"longitude": "35.41137000"
},
{
"id": 65943,
"name": "Caza de Nabatîyé",
"latitude": "33.39435000",
"longitude": "35.44483000"
},
{
"id": 65946,
"name": "Habboûch",
"latitude": "33.40729000",
"longitude": "35.48169000"
},
{
"id": 65949,
"name": "Marjayoûn",
"latitude": "33.36028000",
"longitude": "35.59111000"
},
{
"id": 65950,
"name": "Nabatîyé et Tahta",
"latitude": "33.37889000",
"longitude": "35.48389000"
}
]
},
{
"id": 2284,
"name": "North Governorate",
"state_code": "AS",
"latitude": "34.43806250",
"longitude": "35.83082330",
"type": null,
"cities": [
{
"id": 65934,
"name": "Batroûn",
"latitude": "34.25528000",
"longitude": "35.65806000"
},
{
"id": 65935,
"name": "Bcharré",
"latitude": "34.25083000",
"longitude": "36.01056000"
},
{
"id": 65953,
"name": "Tripoli",
"latitude": "34.43352000",
"longitude": "35.84415000"
}
]
},
{
"id": 2281,
"name": "South Governorate",
"state_code": "JA",
"latitude": "33.27214790",
"longitude": "35.20327780",
"type": null,
"cities": [
{
"id": 65944,
"name": "En Nâqoûra",
"latitude": "33.11806000",
"longitude": "35.13972000"
},
{
"id": 65945,
"name": "Ghazieh",
"latitude": "33.51750000",
"longitude": "35.36889000"
},
{
"id": 65952,
"name": "Sidon",
"latitude": "33.55751000",
"longitude": "35.37148000"
},
{
"id": 65954,
"name": "Tyre",
"latitude": "33.27333000",
"longitude": "35.19389000"
}
]
}
]
}
]
How can I access states > name ?
For example, I need Akkar Governorate from
"states": [
{
"id": 2285,
"name": "Akkar Governorate",
"state_code": "AK",
"latitude": "34.53287630",
"longitude": "36.13281320",
"type": null,
"cities": [
{
"id": 65939,
"name": "Caza de Aakkar",
"latitude": "34.53333000",
"longitude": "36.16667000"
}
]
}
]
How can I manage a loop to do that?
Use:
yourJsonObj = json.loads("your json string")
for key, val in yourJsonObj
Access/Manipulate data
Here are more examples:
JSON Read, write, parse
Good luck!
First you need to:
with open(r"C:\Users\Bar\AppData\Local\Programs\Python\Python310\Scripts\contries+states+cities.json", encoding="utf8") as f:
data = json.loads(f.read())
You can read name like this:
data[0]['states'][1]['name'] #hierarchy way
also you can even perform following kind of Coding:
def get_data(name):
for i in range(len( data[0]['states'])):
if data[0]['states'][i]['name'] == name:
print(data[0]['states'][i])
get_data('Akkar Governorate')
my data looks like this:
[
{
"id": "00f0bbe514dcaf262c8a",
"status": "CL",
"type": "opportunity",
"locations": [
{
"name": "New York, USA",
"lat": 99.0853,
"lng": 99.7818,
"id": "456",
"type": "CI"
},
{
"name": "Boston, USA",
"lat": 80.0853,
"lng": 80.7818,
"id": "555",
"type": "CI"
},
{
"name": "London, UK",
"lat": 10.0853,
"lng": 10.7818,
"id": "999",
"type": "CI"
}
]
},
{
"id": "sadl9asod01",
"status": "CL",
"type": "opportunity",
"locations": [
{
"name": "Boston, USA",
"lat": 80.0853,
"lng": 80.7818,
"id": "555",
"type": "CI"
},
]
},
{
"id": "13094ulk",
"status": "CL",
"type": "project", # has right location but not type
"locations": [
{
"name": "Boston, USA",
"lat": 80.0853,
"lng": 80.7818,
"id": "555",
"type": "CI"
},
]
}
]
I want to build a query that the type must be opportunity:
type_q = ElasticQ('bool', must=[ElasticQ('match', type='opportunity')])
query = self.index.search().query(type_q)
I know how to build an "in" query with the dsl, for example:
excluded_ids = self._excluded_jobs() # list
query = query.exclude('terms', id=excluded_ids)
but, how can I add to the query what in SQL I would do like this:
WHERE type='opportunity'
AND
location.id in (1, 2, 3)
location represents here object inside the locations array of the doc
Something like:
type_q = ElasticQ('bool', must=[
ElasticQ('match', type='opportunity'),
ElasticQ('terms', id=excluded_ids),
])
Or, if you actually wanted to exclude those IDs:
type_q = ElasticQ('bool',
must=[ElasticQ('match', type='opportunity')]
must_not=[ElasticQ('terms', id=excluded_ids)]
)
I just downloaded some json from spotify and took a look into the pd.normalize_json().
But if I normalise the data i still have dictionaries within my dataframe. Also setting the level doesnt help.
DATA I want to have in my dataframe:
{
"collaborative": false,
"description": "",
"external_urls": {
"spotify": "https://open.spotify.com/playlist/5"
},
"followers": {
"href": null,
"total": 0
},
"href": "https://api.spotify.com/v1/playlists/5?additional_types=track",
"id": "5",
"images": [
{
"height": 640,
"url": "https://i.scdn.co/image/a",
"width": 640
}
],
"name": "Another",
"owner": {
"display_name": "user",
"external_urls": {
"spotify": "https://open.spotify.com/user/user"
},
"href": "https://api.spotify.com/v1/users/user",
"id": "user",
"type": "user",
"uri": "spotify:user:user"
},
"primary_color": null,
"public": true,
"snapshot_id": "M2QxNTcyYTkMDc2",
"tracks": {
"href": "https://api.spotify.com/v1/playlists/100&additional_types=track",
"items": [
{
"added_at": "2020-12-13T18:34:09Z",
"added_by": {
"external_urls": {
"spotify": "https://open.spotify.com/user/user"
},
"href": "https://api.spotify.com/v1/users/user",
"id": "user",
"type": "user",
"uri": "spotify:user:user"
},
"is_local": false,
"primary_color": null,
"track": {
"album": {
"album_type": "album",
"artists": [
{
"external_urls": {
"spotify": "https://open.spotify.com/artist/1dfeR4Had"
},
"href": "https://api.spotify.com/v1/artists/1dfDbWqFHLkxsg1d",
"id": "1dfeR4HaWDbWqFHLkxsg1d",
"name": "Q",
"type": "artist",
"uri": "spotify:artist:1dfeRqFHLkxsg1d"
}
],
"available_markets": [
"CA",
"US"
],
"external_urls": {
"spotify": "https://open.spotify.com/album/6wPXmlLzZ5cCa"
},
"href": "https://api.spotify.com/v1/albums/6wPXUJ9LzZ5cCa",
"id": "6wPXUmYJ9zZ5cCa",
"images": [
{
"height": 640,
"url": "https://i.scdn.co/image/ab676620a47",
"width": 640
},
{
"height": 300,
"url": "https://i.scdn.co/image/ab67616d0620a47",
"width": 300
},
{
"height": 64,
"url": "https://i.scdn.co/image/ab603e6620a47",
"width": 64
}
],
"name": "The (Deluxe ",
"release_date": "1920-07-17",
"release_date_precision": "day",
"total_tracks": 15,
"type": "album",
"uri": "spotify:album:6m5cCa"
},
"artists": [
{
"external_urls": {
"spotify": "https://open.spotify.com/artist/1dg1d"
},
"href": "https://api.spotify.com/v1/artists/1dsg1d",
"id": "1dfeR4HaWDbWqFHLkxsg1d",
"name": "Q",
"type": "artist",
"uri": "spotify:artist:1dxsg1d"
}
],
"available_markets": [
"CA",
"US"
],
"disc_number": 1,
"duration_ms": 21453,
"episode": false,
"explicit": false,
"external_ids": {
"isrc": "GBU6015"
},
"external_urls": {
"spotify": "https://open.spotify.com/track/5716J"
},
"href": "https://api.spotify.com/v1/tracks/5716J",
"id": "5716J",
"is_local": false,
"name": "Another",
"popularity": 73,
"preview_url": null,
"track": true,
"track_number": 3,
"type": "track",
"uri": "spotify:track:516J"
},
"video_thumbnail": {
"url": null
}
}
],
"limit": 100,
"next": null,
"offset": 0,
"previous": null,
"total": 1
},
"type": "playlist",
"uri": "spotify:playlist:fek"
}
So what are best practices to read nested data like this into one dataframe in pandas?
I'm glad for any advice.
EDIT:
so basically I want all keys as columns in my dataframe. But with normalise it stops at "tracks.items" and if I normalise this again i have the recursive problem again.
It depends on the information you are looking for. Take a look at pandas.read_json() to see if that can work. Also you can select data as such
json_output = {"collaborative": 'false',"description": "", "external_urls": {"spotify": "https://open.spotify.com/playlist/5"}}
df['collaborative'] = json_output['collaborative'] #set value of your df to value of returned json values
I have this json file which has some data like this:
[
{
"stats": [
{
"city": "'s",
"latitude": "51",
"longitude": "5",
"region": "Europe",
"date_range": "date_all_time",
},
{
"city": "'s",
"latitude": "5",
"longitude": "5.67",
"region": "Europe",
"date_range": "date_last_year",
},
{
"city": "Aalborg",
"latitude": "57.03",
"longitude": "9.007",
"region": "Europe",
"date_range": "date_all_time",
},
{
"city": "Aalborg",
"latitude": "57.033",
"longitude": "9.0007",
"region": "Europe",
"date_range": "date_last_year",
},
{
"city": "Aalborg",
"latitude": "57.0",
"longitude": "9.97",
"region": "Europe",
"date_range": "date_last_month",
},
{
"city": "Aarau",
"latitude": "47.32",
"longitude": "8.05",
"region": "Europe",
"date_range": "date_last_year",
},
And I want to group all the Date Range which has same value say "date_range": "date_last_year". I was trying this code but when I try to print the values, its showing like this "The filtered dictionary is : {}".
Please help me, I'm just a beginner to learn json and python. Thank you
def convert_object():
res = dict((k, finaljson[k]) for k in ['Date_Range']
if k in finaljson)
print("The filtered dictionary is : " + str(res))
The Actual result/Outpu I want is like this, all date_last_year values:
{
"city": "'s",
"latitude": "51",
"longitude": "5",
"region": "Europe",
"date_range": "date_last_year",
},
{
"city": "'s",
"latitude": "5",
"longitude": "5.67",
"region": "Europe",
"date_range": "date_last_year",
},
{
"city": "Aalborg",
"latitude": "57.03",
"longitude": "9.007",
"region": "Europe",
"date_range": "date_last_year",
},
{
"city": "Aalborg",
"latitude": "57.033",
"longitude": "9.0007",
"region": "Europe",
"date_range": "date_last_year",
},
{
"city": "Aalborg",
"latitude": "57.0",
"longitude": "9.97",
"region": "Europe",
"date_range": "date_last_year",
},
{
"city": "Aarau",
"latitude": "47.32",
"longitude": "8.05",
"region": "Europe",
"date_range": "date_last_year",
},
Assuming you have "data" as a list with your data.
final_json = dict()
for obj in data:
if obj['date_range'] in final_json:
final_json[obj['date_range']].append(obj)
else:
final_json[obj['date_range']] = [obj]
import json
print(json.dumps(final_json, indent=4))
This of json as a nested dictionary where each level is an indice.
I have the following JSON object that I get from the Instagram API, it can have n number of posts (depending upon the count parameter provided).
{
"pagination": {
"next_url": "https:\/\/api.instagram.com\/v1\/users\/3\/media\/recent?access_token=184046392.f59def8.c5726b469ad2462f85c7cea5f72083c0&max_id=205140190233104928_3",
"next_max_id": "205140190233104928_3"
},
"meta": {
"code": 200
},
"data": [{
"attribution": null,
"tags": [],
"type": "image",
"location": {
"latitude": 37.798594362,
"name": "Presidio Bowling Center",
"longitude": -122.459878922,
"id": 27052
},
"comments": {
"count": 132,
"data": [{
"created_time": "1342734265",
"text": "Distinguishing!",
"from": {
"username": "naiicamilos",
"profile_picture": "http:\/\/images.instagram.com\/profiles\/profile_53690312_75sq_1336573463.jpg",
"id": "53690312",
"full_name": "Naii Camilos"
},
"id": "239194812924826175"
}, {
"created_time": "1342737428",
"text": "#kevin in Spanish Presidio means Jail",
"from": {
"username": "jm0426",
"profile_picture": "http:\/\/images.instagram.com\/profiles\/profile_25881992_75sq_1342156673.jpg",
"id": "25881992",
"full_name": "Juan Mayen"
},
"id": "239221343768285211"
}, {
"created_time": "1342768120",
"text": "Good imagination",
"from": {
"username": "kidloca",
"profile_picture": "http:\/\/images.instagram.com\/profiles\/profile_193133903_75sq_1342032241.jpg",
"id": "193133903",
"full_name": "Khaleda Noon"
},
"id": "239478811731694145"
}, {
"created_time": "1342775967",
"text": "Cwl!",
"from": {
"username": "awesomeath",
"profile_picture": "http:\/\/images.instagram.com\/profiles\/profile_179252164_75sq_1339745821.jpg",
"id": "179252164",
"full_name": "awesomeath"
},
"id": "239544638740894674"
}, {
"created_time": "1342796153",
"text": "\u597d\u7f8e\u263a",
"from": {
"username": "hidelau",
"profile_picture": "http:\/\/images.instagram.com\/profiles\/profile_47295330_75sq_1342763977.jpg",
"id": "47295330",
"full_name": "Hide Lau"
},
"id": "239713963951001995"
}, {
"created_time": "1343018007",
"text": "#mindfreak",
"from": {
"username": "info2021",
"profile_picture": "http:\/\/images.instagram.com\/profiles\/anonymousUser.jpg",
"id": "27664191",
"full_name": "info2021"
},
"id": "241575017119224582"
}, {
"created_time": "1343068374",
"text": "#kevin please share and promote my last pic. This will be the new hype as instagram",
"from": {
"username": "thansy_mansy",
"profile_picture": "http:\/\/images.instagram.com\/profiles\/profile_189019343_75sq_1342951587.jpg",
"id": "189019343",
"full_name": "thansy_mansy"
},
"id": "241997523093295303"
}, {
"created_time": "1343068382",
"text": "#kevin :P",
"from": {
"username": "thansy_mansy",
"profile_picture": "http:\/\/images.instagram.com\/profiles\/profile_189019343_75sq_1342951587.jpg",
"id": "189019343",
"full_name": "thansy_mansy"
},
"id": "241997589589790922"
}]
},
"filter": "Rise",
"created_time": "1342676212",
"link": "http:\/\/instagr.am\/p\/NQD4KAABKF\/",
"likes": {
"count": 4810,
"data": [{
"username": "caitlyn_hammonds",
"profile_picture": "http:\/\/images.instagram.com\/profiles\/anonymousUser.jpg",
"id": "198322184",
"full_name": "caitlyn_hammonds"
}, {
"username": "sophiafrancis",
"profile_picture": "http:\/\/images.instagram.com\/profiles\/profile_43092892_75sq_1340548333.jpg",
"id": "43092892",
"full_name": "Sophiaaa."
}, {
"username": "amna7861",
"profile_picture": "http:\/\/images.instagram.com\/profiles\/profile_175807260_75sq_1343135903.jpg",
"id": "175807260",
"full_name": "Amna Haroon"
}, {
"username": "yaya0318",
"profile_picture": "http:\/\/images.instagram.com\/profiles\/profile_74056_75sq_1287001004.jpg",
"id": "74056",
"full_name": "Mao Yaya"
}, {
"username": "jay_damage",
"profile_picture": "http:\/\/images.instagram.com\/profiles\/profile_197465040_75sq_1342932411.jpg",
"id": "197465040",
"full_name": "jay_damage"
}, {
"username": "reves",
"profile_picture": "http:\/\/images.instagram.com\/profiles\/profile_671833_75sq_1335966794.jpg",
"id": "671833",
"full_name": "Fernando D. Ramirez"
}, {
"username": "lizray1",
"profile_picture": "http:\/\/images.instagram.com\/profiles\/profile_198450407_75sq_1343144120.jpg",
"id": "198450407",
"full_name": "lizray1"
}, {
"username": "alivewtheglory",
"profile_picture": "http:\/\/images.instagram.com\/profiles\/profile_37907416_75sq_1341561441.jpg",
"id": "37907416",
"full_name": "Marilynn C"
}, {
"username": "mnforever55",
"profile_picture": "http:\/\/images.instagram.com\/profiles\/profile_29255977_75sq_1334833008.jpg",
"id": "29255977",
"full_name": "mnforever55"
}]
},
"images": {
"low_resolution": {
"url": "http:\/\/distilleryimage9.s3.amazonaws.com\/beb7f896d16311e19fe21231380f3636_6.jpg",
"width": 306,
"height": 306
},
"thumbnail": {
"url": "http:\/\/distilleryimage9.s3.amazonaws.com\/beb7f896d16311e19fe21231380f3636_5.jpg",
"width": 150,
"height": 150
},
"standard_resolution": {
"url": "http:\/\/distilleryimage9.s3.amazonaws.com\/beb7f896d16311e19fe21231380f3636_7.jpg",
"width": 612,
"height": 612
}
},
"caption": {
"created_time": "1342676255",
"text": "Happy birthday #amy !",
"from": {
"username": "kevin",
"profile_picture": "http:\/\/images.instagram.com\/profiles\/profile_3_75sq_1325536697.jpg",
"id": "3",
"full_name": "Kevin Systrom"
},
"id": "238708186813567655"
},
"user_has_liked": false,
"id": "238707833418289797_3",
"user": {
"username": "kevin",
"website": "",
"bio": "CEO & Co-founder of Instagram",
"profile_picture": "http:\/\/images.instagram.com\/profiles\/profile_3_75sq_1325536697.jpg",
"full_name": "Kevin Systrom",
"id": "3"
}
}, {
"attribution": null,
"tags": [],
"type": "image",
"location": {
"latitude": 38.503100608,
"name": "Goose & Gander",
"longitude": -122.468387538,
"id": 12059278
},
"comments": {
"count": 85,
"data": [{
"created_time": "1342555499",
"text": "Cheers !!! \ud83d\ude18\ud83d\ude18",
"from": {
"username": "kattiab",
"profile_picture": "http:\/\/images.instagram.com\/profiles\/profile_1345073_75sq_1340495505.jpg",
"id": "1345073",
"full_name": "kattia b"
},
"id": "237695212468572732"
}, {
"created_time": "1342558279",
"text": "happy birthday instagram!",
"from": {
"username": "alanasayshi",
"profile_picture": "http:\/\/images.instagram.com\/profiles\/profile_4235095_75sq_1341960681.jpg",
"id": "4235095",
"full_name": "Alana Boy\u00e9r"
},
"id": "237718535382504383"
}, {
"created_time": "1342567977",
"text": "Happy Natal Day Instagram!",
"from": {
"username": "cynrtst",
"profile_picture": "http:\/\/images.instagram.com\/profiles\/profile_12493918_75sq_1341538462.jpg",
"id": "12493918",
"full_name": "Cynthia L"
},
"id": "237799888639758668"
}, {
"created_time": "1342568896",
"text": "Happy Birthday \ud83c\udf89\ud83c\udf89\ud83c\udf89 was it a long labour \ud83d\ude02\ud83d\ude02\ud83d\ude02",
"from": {
"username": "relzie",
"profile_picture": "http:\/\/images.instagram.com\/profiles\/profile_15718874_75sq_1332290975.jpg",
"id": "15718874",
"full_name": "Relz"
},
"id": "237807595966960050"
}, {
"created_time": "1342579289",
"text": "Cheers #kevin and Happy Birthday #instagram thank you so much Kevin for creating instagram it's truly got me back out there taking more photos and falling in love with photography all over again...",
"from": {
"username": "bpphotographs",
"profile_picture": "http:\/\/images.instagram.com\/profiles\/profile_12149171_75sq_1339457436.jpg",
"id": "12149171",
"full_name": "bpphotographs"
},
"id": "237894779172557723"
}, {
"created_time": "1342652660",
"text": "#suz_h",
"from": {
"username": "ianyorke",
"profile_picture": "http:\/\/images.instagram.com\/profiles\/profile_1041088_75sq_1339844137.jpg",
"id": "1041088",
"full_name": "Ian Yorke"
},
"id": "238510264889118973"
}, {
"created_time": "1342667574",
"text": "Love your app\ud83d\udc97\ud83d\udc97\ud83d\udc97\ud83d\udc97\ud83d\udc97",
"from": {
"username": "gothangel1997",
"profile_picture": "http:\/\/images.instagram.com\/profiles\/profile_186799125_75sq_1342668825.jpg",
"id": "186799125",
"full_name": "Angel Mercado"
},
"id": "238635368570687940"
}, {
"created_time": "1342843274",
"text": "\ud83d\ude09\u2764",
"from": {
"username": "andescu",
"profile_picture": "http:\/\/images.instagram.com\/profiles\/profile_145554839_75sq_1337641309.jpg",
"id": "145554839",
"full_name": "andescu"
},
"id": "240109245637333685"
}]
},
"filter": "Sierra",
"created_time": "1342332400",
"link": "http:\/\/instagr.am\/p\/NF0G6bABA2\/",
"likes": {
"count": 3282,
"data": [{
"username": "caysondesigns",
"profile_picture": "http:\/\/images.instagram.com\/profiles\/profile_23464609_75sq_1329177054.jpg",
"id": "23464609",
"full_name": "Jasmine at Cayson Designs"
}, {
"username": "m_azooz16",
"profile_picture": "http:\/\/images.instagram.com\/profiles\/profile_198179404_75sq_1343096096.jpg",
"id": "198179404",
"full_name": "m_azooz16"
}, {
"username": "shulinghuang",
"profile_picture": "http:\/\/images.instagram.com\/profiles\/profile_144887495_75sq_1342460246.jpg",
"id": "144887495",
"full_name": "H\u3002"
}, {
"username": "caitlyn_hammonds",
"profile_picture": "http:\/\/images.instagram.com\/profiles\/anonymousUser.jpg",
"id": "198322184",
"full_name": "caitlyn_hammonds"
}, {
"username": "sophiafrancis",
"profile_picture": "http:\/\/images.instagram.com\/profiles\/profile_43092892_75sq_1340548333.jpg",
"id": "43092892",
"full_name": "Sophiaaa."
}, {
"username": "beatle1234",
"profile_picture": "http:\/\/images.instagram.com\/profiles\/anonymousUser.jpg",
"id": "197988834",
"full_name": "beatle1234"
}, {
"username": "yaya0318",
"profile_picture": "http:\/\/images.instagram.com\/profiles\/profile_74056_75sq_1287001004.jpg",
"id": "74056",
"full_name": "Mao Yaya"
}, {
"username": "lizray1",
"profile_picture": "http:\/\/images.instagram.com\/profiles\/profile_198450407_75sq_1343144120.jpg",
"id": "198450407",
"full_name": "lizray1"
}, {
"username": "rawr1234321",
"profile_picture": "http:\/\/images.instagram.com\/profiles\/profile_198492630_75sq_1343151765.jpg",
"id": "198492630",
"full_name": "rawr1234321"
}]
},
"images": {
"low_resolution": {
"url": "http:\/\/distilleryimage6.s3.amazonaws.com\/3ec59e18ce4311e1b8031231380702ee_6.jpg",
"width": 306,
"height": 306
},
"thumbnail": {
"url": "http:\/\/distilleryimage6.s3.amazonaws.com\/3ec59e18ce4311e1b8031231380702ee_5.jpg",
"width": 150,
"height": 150
},
"standard_resolution": {
"url": "http:\/\/distilleryimage6.s3.amazonaws.com\/3ec59e18ce4311e1b8031231380702ee_7.jpg",
"width": 612,
"height": 612
}
},
"caption": {
"created_time": "1342332465",
"text": "Mellivora capensis - eagle rare, peat, honey, lemon, pineapple, black cardamom, chili, coconut foam",
"from": {
"username": "kevin",
"profile_picture": "http:\/\/images.instagram.com\/profiles\/profile_3_75sq_1325536697.jpg",
"id": "3",
"full_name": "Kevin Systrom"
},
"id": "235824269324456712"
},
"user_has_liked": false,
"id": "235823728972271670_3",
"user": {
"username": "kevin",
"website": "",
"bio": "CEO & Co-founder of Instagram",
"profile_picture": "http:\/\/images.instagram.com\/profiles\/profile_3_75sq_1325536697.jpg",
"full_name": "Kevin Systrom",
"id": "3"
}
}, .....
So I m looking to iterate over each post individually and extract the tags, id, and the image urls. I m having some trouble, (since I m a PHP developer and finding it really hard to work with Python as a beginner).
Here's the code that I m using to iterate over each post and process the attributes provided. I dont want to store them in a list or dict. Just want to search through the tags.
(this is just a attempted code since I couldnt find which loop should I use)
info= simplejson.load(info)
print type(info['data']) # I get it as a list
for k, v in info['data']:
print v
I could have done this easily using php with a foreach :
foreach($info->data as $i) {
$tags = $i->tags();
$id = $i->id();
}
If info['data'] is a list, you should be able to iterate over it like so:
for post in info['data']:
tags = post['tags']
id = post['id']
image_urls = [] # An empty list -- we'll fill it below
for img_type in ['low_resolution', 'thumbnail', 'standard_resolution']:
image_urls.append(post['images'][img_type]['url'])
# Now image_urls has all the image urls in it
I think the part that's rather different from PHP is that where the key is "tags" in the JSON structure, you have to use the string "tags" in Python, whereas you would use the literal tags() in PHP.