I have this json file which includes information about pages I have retrieving using this
python code:
import facebook # pip install facebook-sdk
import json
import codecs
# Create a connection to the Graph API with your access token
ACCESS_TOKEN = ''#my access token
g = facebook.GraphAPI(ACCESS_TOKEN)
s=g.request('search', { 'q' : '&','type' : 'page', 'limit' : 5000 , 'locale' : 'ar_AR' })
f = open("sampels.txt", 'w')
f.write(json.dumps(s, indent=1))
f.close()
#########################################################
this is my samples json file a snapshot of it:
{
"paging": {
"next": "https://graph.facebook.com/search?limit=5000&type=page&q=%26&locale=ar_AR&access_token=CAACEdEose0cBAIRlSOXkyk1lIMUIWViAoz5lf5t0pSdsu6lg5ZANJuYMIPZCy5N9KFQoLnpi1oxD8tNIaabWackCO31UYaAGkb38IPHxI33ldbRQDXJ02CtJrwE8NI4mZAz20OznLfuCpypDbxNYF3p9XauZCtoywoS9KJwAgW8NYgZCpD4ZBKfCBR5jjXnbcZD&offset=5000&__after_id=92240239536"
},
"data": [
{
"category": "\u0627\u0644\u062a\u0639\u0644\u064a\u0645",
"name": "The London School of Economics and Political Science - LSE",
"category_list": [
{
"id": "108051929285833",
"name": "\u0627\u0644\u0643\u0644\u064a\u0629 \u0648\u0627\u0644\u062c\u0627\u0645\u0639\u0629"
},
{
"id": "187751327923426",
"name": "\u0645\u0646\u0638\u0645\u0629 \u062a\u0639\u0644\u064a\u0645\u064a\u0629"
}
],
"id": "6127898346"
},
{
"category": "\u0628\u0636\u0627\u0626\u0639 \u0627\u0644\u0628\u064a\u0639 \u0628\u0627\u0644\u062a\u062c\u0632\u0626\u0629 \u0648\u0628\u0636\u0627\u0626\u0639 \u0627\u0644\u0645\u0633\u062a\u0647\u0644\u0643\u064a\u0646",
"name": "Stop & Shop",
"category_list": [
{
"id": "169207329791658",
"name": "\u0645\u062d\u0644 \u0628\u0642\u0627\u0644\u0629"
}
],
"id": "170000993071234"
},
{
"category": "\u0628\u0636\u0627\u0626\u0639 \u0627\u0644\u0628\u064a\u0639 \u0628\u0627\u0644\u062a\u062c\u0632\u0626\u0629 \u0648\u0628\u0636\u0627\u0626\u0639 \u0627\u0644\u0645\u0633\u062a\u0647\u0644\u0643\u064a\u0646",
"name": "C&A",
"category_list": [
{
"id": "186230924744328",
"name": "\u0645\u062a\u062c\u0631 \u0645\u0644\u0627\u0628\u0633"
}
],
"id": "109345009145382"
},
{
"category": "\u0645\u0646\u0638\u0645\u0629 \u063a\u064a\u0631 \u0631\u0628\u062d\u064a\u0629",
"name": "Rock and Roll Hall of Fame + Museum",
"category_list": [
{
"id": "396255327112122",
"name": "\u0645\u062a\u062c\u0631 \u0645\u0648\u0633\u064a\u0642\u0649"
},
now what I want to do is to get the next field in order to get my next 5000 page into my database.
I have tried a lot but I couldn't figure out away to do this can any body tell me how to take the next URL and how pass it to the request function I have in my code.?
g.request() returns a Python dict which gives you access to the paging/next url. Why do you dump it as json and write it to file instead of just using it ?
Related
This is the first time I'm working with JSON, and I'm trying to pull url out of the JSON below.
{
"name": "The_New11d112a_Company_Name",
"sections": [
{
"name": "Products",
"payload": [
{
"id": 1,
"name": "TERi Geriatric Patient Skills Trainer,
"type": "string"
}
]
},
{
"name": "Contact Info",
"payload": [
{
"id": 1,
"name": "contacts",
"url": "https://www.a3bs.com/catheterization-kits-8000892-3011958-3b-scientific,p_1057_31043.html",
"contacts": [
{
"name": "User",
"email": "Company Email",
"phone": "Company PhoneNumber"
}
],
"type": "contact"
}
]
}
],
"tags": [
"Male",
"Airway"
],
"_id": "0e4cd5c6-4d2f-48b9-acf2-5aa75ade36e1"
}
I have been able to access description and _id via
data = json.loads(line)
if 'xpath' in data:
xpath = data["_id"]
description = data["sections"][0]["payload"][0]["description"]
However, I can't seem to figure out a way to access url. One other issue I have is there could be other items in sections, which makes indexing into Contact Info a non starter.
Hope this helps:
import json
with open("test.json", "r") as f:
json_out = json.load(f)
for i in json_out["sections"]:
for j in i["payload"]:
for key in j:
if "url" in key:
print(key, '->', j[key])
I think your JSON is damaged, it should be like that.
{
"name": "The_New11d112a_Company_Name",
"sections": [
{
"name": "Products",
"payload": [
{
"id": 1,
"name": "TERi Geriatric Patient Skills Trainer",
"type": "string"
}
]
},
{
"name": "Contact Info",
"payload": [
{
"id": 1,
"name": "contacts",
"url": "https://www.a3bs.com/catheterization-kits-8000892-3011958-3b-scientific,p_1057_31043.html",
"contacts": [
{
"name": "User",
"email": "Company Email",
"phone": "Company PhoneNumber"
}
],
"type": "contact"
}
]
}
],
"tags": [
"Male",
"Airway"
],
"_id": "0e4cd5c6-4d2f-48b9-acf2-5aa75ade36e1"
}
You can check it on http://json.parser.online.fr/.
And if you want to get the value of the url.
import json
j = json.load(open('yourJSONfile.json'))
print(j['sections'][1]['payload'][0]['url'])
I think it's worth to write a short function to get the url(s) and make a decision whether or not to use the first found url in the returned list, or skip processing if there's no url available in your data.
The method shall looks like this:
def extract_urls(data):
payloads = []
for section in data['sections']:
payloads += section.get('payload') or []
urls = [x['url'] for x in payloads if 'url' in x]
return urls
This should print out the URL
import json
# open json file to read
with open('test.json','r') as f:
# load json, parameter as json text (file contents)
data = json.loads(f.read())
# after observing format of JSON data, the location of the URL key
# is determined and the data variable is manipulated to extract the value
print(data['sections'][1]['payload'][0]['url'])
The exact location of the 'url' key:
1st (position) of the array which is the value of the key 'sections'
Inside the array value, there is a dict, and the key 'payload' contains an array
In the 0th (position) of the array is a dict with a key 'url'
While testing my solution, I noticed that the json provided is flawed, after fixing the json flaws(3), I ended up with this.
{
"name": "The_New11d112a_Company_Name",
"sections": [
{
"name": "Products",
"payload": [
{
"id": 1,
"name": "TERi Geriatric Patient Skills Trainer",
"type": "string"
}
]
},
{
"name": "Contact Info",
"payload": [
{
"id": 1,
"name": "contacts",
"url": "https://www.a3bs.com/catheterization-kits-8000892-3011958-3b-scientific,p_1057_31043.html",
"contacts": [
{
"name": "User",
"email": "Company Email",
"phone": "Company PhoneNumber"
}
],
"type": "contact"
}
]
}
],
"tags": [
"Male",
"Airway"
],
"_id": "0e4cd5c6-4d2f-48b9-acf2-5aa75ade36e1"}
After utilizing the JSON that was provided by Vincent55.
I made a working code with exception handling and with certain assumptions.
Working Code:
## Assuming that the target data is always under sections[i].payload
from json import loads
line = open("data.json").read()
data = loads(line)["sections"]
for x in data:
try:
# With assumption that there is only one payload
if x["payload"][0]["url"]:
print(x["payload"][0]["url"])
except KeyError:
pass
{
"meta": {
"code": 200
},
"response": {
"holidays": [
{
"name": "New Year's Day",
"description": "New Year\u2019s Day is celebrated many countries such as in India on the January 1 in the Gregorian calendar.",
"country": {
"id": "in",
"name": "India"
},
"date": {
"iso": "2021-01-01",
"datetime": {
"year": 2021,
"month": 1,
"day": 1
}
},
"type": [
"Optional holiday"
],
"locations": "All",
"states": "All"
},
{
"name": "Lohri",
"description": "Lohri is a restricted holiday in India",
"country": {
"id": "in",
"name": "India"
},
"date": {
"iso": "2021-01-13",
"datetime": {
"year": 2021,
"month": 1,
"day": 13
}
},
"type": [
"National holiday"
],
"locations": "All",
"states": "All"
}
]
}
}
This is my json file saved as dates.json.
I want to search for New Year's day value in the key name and then get the value of the iso key.
How can i do that?
I am a beginner in python and json. so please tell me how to do this
Is there any search algorith or a library that can help me?
One way to do it is using the modules json and jsonpath-rw-ext.
Use json module to read the json file and jsonpath-rw-ext to parse/filter.
https://github.com/sileht/python-jsonpath-rw-ext
I made it work for your example. Take a look at this.
#!/usr/bin/env python3
import json
import jsonpath_rw_ext
with open('dates.json') as json_file:
data = json.load(json_file)
result = jsonpath_rw_ext.parse("$..holidays[?(#.name=='New Year\\'s Day')]").find(data)
print([x.value for x in result])
To get the iso value use the code below.
#!/usr/bin/env python3
import json
import jsonpath_rw_ext
with open('dates.json') as json_file:
data = json.load(json_file)
result = jsonpath_rw_ext.parse("$..holidays[?(#.name=='New Year\\'s Day')].date.iso").find(data)
print(result[0].value)
I'm trying to work with the Campaign Monitor API, posting JSON data through the API to update subscriber lists. I'm currently one change away from being able to send data,
Right now, my JSON data looks like this
{
"EmailAddress": "subscriber1#example.com",
"Name": "New Subscriber One",
"CustomFields": [
{
"Key": "website",
"Value": "http://example.com"
},
{
"Key": "interests",
"Value": "magic"
},
{
"Key": "interests",
"Value": "romantic walks"
},
{
"Key": "age",
"Value": "",
"Clear": true
}
],
},
{
"EmailAddress": "subscriber2#example.com",
"Name": "New Subscriber Two",
},
{
"EmailAddress": "subscriber3#example.com",
"Name": "New Subscriber Three",
}
}
I still need to add a new key value at the beginning of the JSON payload, incorporating the 'Subscribers' : my_json_data. How would I go about easily adding on the Subscribers key and placing my full and current json data into a list?
Final result should look like
{
'Subscribers' : [
{
"EmailAddress": "subscriber1#example.com",
"Name": "New Subscriber One",
"CustomFields": [
{
"Key": "website",
"Value": "http://example.com"
},
{
"Key": "interests",
"Value": "magic"
},
{
"Key": "interests",
"Value": "romantic walks"
},
{
"Key": "age",
"Value": "",
"Clear": true
}
],
},
{
"EmailAddress": "subscriber2#example.com",
"Name": "New Subscriber Two",
},
{
"EmailAddress": "subscriber3#example.com",
"Name": "New Subscriber Three",
}
}
]
}
I've tried to approach this with creating a new dictionary however when I convert that back to JSON I get more issues and headaches. Is there any easy way to keep everything as a JSON formatted dataset and add in the leading 'Subscribers' key?
this should do it assuming you've got a valid JSON.
your_new_json = {}
your_new_json['Subscribers'] = [your_current_json]
I have a response object that I am receiving from an api call. The response has several objects that are returned in a single call. What I want to do is grab information from each of the objects returned and store them in varialbes to use them within the application. I know to grab info from a json response when it returns a single objects but I am getting confused with multiples objects... I know how to automate the iteration process through something like a forloop... it wont iterate.
here is a sample response that I am getting:
I want to grab the _id from both items.
{
'user':"<class 'synapse_pay_rest.models.users.user.User'>(id=..622d)",
'json':{
'_id':'..6e80',
'_links':{
'self':{
'href':'https://uat-api.synapsefi.com/v3.1/users/..22d/nodes/..56e80'
}
},
'allowed':'CREDIT-AND-DEBIT',
'client':{
'id':'..26a34',
'name':'Charlie Brown LLC'
},
'extra':{
'note':None,
'other':{
},
'supp_id':''
},
'info':{
'account_num':'8902',
'address':'PO BOX 85139, RICHMOND, VA, US',
'balance':{
'amount':'750.00',
'currency':'USD'
},
'bank_long_name':'CAPITAL ONE N.A.',
'bank_name':'CAPITAL ONE N.A.',
'class':'SAVINGS',
'match_info':{
'email_match':'not_found',
'name_match':'not_found',
'phonenumber_match':'not_found'
},
'name_on_account':' ',
'nickname':'SynapsePay Test Savings Account - 8902',
'routing_num':'6110',
'type':'BUSINESS'
},
<class 'synapse_pay_rest.models.nodes.ach_us_node.AchUsNode'>({
'user':"<class 'synapse_pay_rest.models.users.user.User'>(id=..622d)",
'json':{
'_id':'..56e83',
'_links':{
'self':{
'href':'https://uat-api.synapsefi.com/v3.1/users/..d622d/nodes/..6e83'
}
},
'allowed':'CREDIT-AND-DEBIT',
'client':{
'id':'599378ec6aef1b0021026a34',
'name':'Charlie Brown LLC'
},
'extra':{
'note':None,
'other':{
},
'supp_id':''
},
'info':{
'account_num':'8901',
'address':'PO BOX 85139, RICHMOND, VA, US',
'balance':{
'amount':'800.00',
'currency':'USD'
},
'bank_long_name':'CAPITAL ONE N.A.',
'bank_name':'CAPITAL ONE N.A.',
'class':'CHECKING',
'match_info':{
'email_match':'not_found',
'name_match':'not_found',
'phonenumber_match':'not_found'
},
'name_on_account':' ',
'nickname':'SynapsePay Test Checking Account - 8901',
'routing_num':'6110',
'type':'BUSINESS'
},
})
Here is the code that I have:
It wont grab any values...
the iteration needs to be done to the nodes variable which is hte json response object.
def listedLinkAccounts(request):
currentUser = loggedInUser(request)
currentProfile = Profile.objects.get(user = currentUser)
user_id = currentProfile.synapse_id
synapseUser = SynapseUser.by_id(client, str(user_id))
options = {
'page':1,
'per_page':20,
'type': 'ACH-US',
}
nodes = Node.all(synapseUser, **options)
print(nodes)
response = nodes
_id = response["_id"]
print(_id)
return nodes
here is a sample api response from the api documenation:
{
"error_code": "0",
"http_code": "200",
"limit": 20,
"node_count": 5,
"nodes": [
{
"_id": "594e5c694d1d62002f17e3dc",
"_links": {
"self": {
"href": "https://uat-api.synapsefi.com/v3.1/users/594e0fa2838454002ea317a0/nodes/594e5c694d1d62002f17e3dc"
}
},
"allowed": "CREDIT-AND-DEBIT",
"client": {
"id": "589acd9ecb3cd400fa75ac06",
"name": "SynapseFI"
},
"extra": {
"other": {},
"supp_id": "ABC124"
},
"info": {
"account_num": "7443",
"address": "PLACE DE LA REPUBLIQUE 4 CROIX 59170 FR",
"balance": {
"amount": "0.00",
"currency": "USD"
},
"bank_long_name": "3 SUISSES INTERNATIONAL",
"bank_name": "3 SUISSES INTERNATIONAL",
"name_on_account": " ",
"nickname": "Some Account"
},
"is_active": true,
"timeline": [
{
"date": 1498307689471,
"note": "Node created."
},
{
"date": 1498307690130,
"note": "Unable to send micro deposits as node type is not ACH-US."
}
],
"type": "WIRE-INT",
"user_id": "594e0fa2838454002ea317a0"
},
{
...
},
{
...
},
...
],
"page": 1,
"page_count": 1,
"success": true
}
I have python 2.7.6 installed and I need to parse the following json. I am bound to work with this version of python using no other external libraries.
{
"entries": [
{
"author": {
"value": "plugin-demo Administrator",
"origin": "http://localhost:8080/webservice/person/18"
},
"creator": {
"value": "plugin-demo Administrator",
"origin": "http://localhost:8080/webservice/person/18"
},
"creationDate": "2015-11-04T15:14:18.000+0600",
"lastModifiedDate": "2015-11-04T15:14:18.000+0600",
"model": "http://localhost:8080/plugin-editorial/model/281/football",
"payload": [
{
"name": "basic",
"value": "Real Madrid are through"
}
],
"publishDate": "2015-11-04T15:14:18.000+0600"
},
{
"author": {
"value": "plugin-demo Administrator",
"origin": "http://localhost:8080/webservice/person/18"
},
"creator": {
"value": "plugin-demo Administrator",
"origin": "http://localhost:8080/webservice/person/18"
},
"creationDate": "2015-11-04T15:14:18.000+0600",
"lastModifiedDate": "2015-11-04T15:14:18.000+0600",
"model": "http://localhost:8080/plugin-editorial/model/281/football",
"payload": [
{
"name": "basic",
"value": "Real Madrid are through"
}
],
"publishDate": "2015-11-04T15:14:18.000+0600"
}
]
}
I want to access each json object in the json array. (e.g. entries[0], entries[1] etc...)
How can I do that?
You can use the json module. It's been available since Python 2.6.
import json
jsonString= json.loads(string)
entries = jsonString['entries']
print entries[0]
print entries[1]
import json
dictData = json.loads(strJsonString)