Paginate and collect posts and comments from Facebook (Python, Json) [duplicate] - python

I'm trying to pull posts and comments from Facebook for a project, but can't seem to pull everything. It seems that I get two links with previous and next
Here's my code:
import facebook
import requests
def some_action(post):
#print posts['data']
print(post['created_time'])
access_token = INSERT ACCESS TOKEN HERE
user = 'walkers'
graph = facebook.GraphAPI(access_token)
profile = graph.get_object(user)
posts = graph.get_connections(profile['id'], 'posts')
x = 0
while x < 900000:
#while True:
try:
posts = requests.get(posts['paging']['next']).json()
print (posts)
except KeyError:
break
x = x+1
My results are something like this:
{u'paging': {u'next': u' https://graph.facebook.com/v2.0/53198517648/posts?limit=25&__paging_token=enc_AdDr64IO8892JzsoPWiKkMDcF4lTDosOcP0H0ZB1mIpIW5EYRrCylZAji6ZBSCVBAVUYiS80oNtWtAL9GazXxRf0yva&access_token=INSERT ACCESS TOKEN HERE, u'previous': u' https://graph.facebook.com/v2.0/53198517648/posts?limit=25&__paging_token=enc_AdCqTjKBhfOsNBoKe3CJbnM2gU2RvLEYLgAQt1pHEcERVeK4qiw1dQAFHjt2sSInSZAIioZCqotwLMx8azzfZClIuCN&since=1430997206&access_token=INSERT ACCESS TOKEN HERE'}, u'data': [{u'picture': u'https://scontent.xx.fbcdn.net/hphotos-xfp1/v/t1.0-9/p130x130/11200605_10153202855322649_2472649283560371030_n.jpg?oh=50a0b3998e7bae8bb10c3a5f0854af46&oe=56556974', u'story': u'Walkers added 8 new photos to the album: 20 Years of Gary.', u'likes': {u'paging': {u'cursors': {u'after': u'MzgxNzk1MDQ4NjQ3MTI3', u'before': u'MTAxNTI0OTY1MDIwNDIyODI='}}, u'data': [{u'id': u'10152496502042282', u'name': u'Aaron Hanson'}, {u'id': u'10203040513950876', u'name': u'Gary GazRico Hinchliffe'}, {u'id': u'10152934096109345', u'name': u'Stuart Collister'}, {u'id': u'10152297022606059', u'name': u'Helen Preston'}, {u'id': u'326285380900188', u'name': u'Rhys Edwards'}, {u'id': u'10204744346589601', u'name': u'Aaron Benfield'}, {u'id': u'10200910780691953', u'name': u'Mike S Q Wilkins'}, {u'id': u'10204902354187051', u'name': u'Paul Owen Davies'}, {u'id': u'10152784755311784', u'name': u'Dafydd Ifan'}, {u'id': u'1517704468487365', u'name': u'Stephen Collier'}, {u'id': u'10202198826115234', u'name': u'John McKellar'}, {u'id': u'10151949129487143', u'name': u'Lucy Morrison'}, {u'id': u'1474199509524133', u'name': u'Christine Leek'}, {u'id': u'381795048647127', u'name': u'Sandra Taylor'}]}, u'from': {u'category': u'Product/Service', u'name':
These are the two links with the missing information:
u'https://graph.facebook.com/v2.0/53198517648/posts?limit=25&__paging_token=enc_AdDr64IO8892JzsoPWiKkMDcF4lTDosOcP0H0ZB1mIpIW5EYRrCylZAji6ZBSCVBAVUYiS80oNtWtAL9GazXxRf0yva&access_token=INSERT ACCESS TOKEN HERE',
u'previous': u'https://graph.facebook.com/v2.0/53198517648/posts?limit=25&__paging_token=enc_AdCqTjKBhfOsNBoKe3CJbnM2gU2RvLEYLgAQt1pHEcERVeK4qiw1dQAFHjt2sSInSZAIioZCqotwLMx8azzfZClIuCN&since=1430997206&access_token=INSERT ACCESS TOKEN HERE'
Obviously where I've put "INSERT ACCESS TOKEN HERE" I've removed the access token. Is there any way of getting all the data?

Related

python json delete first element list or tuple

This is my part of code:
script = soup.find("script", {"id":"product-form-data"})
data = json.loads(script.text)
all_p = data["products"]
for x in all_p.items():
print(x)
I have a problem with this json data. I'm not able to remove everything outside the {} because like this i dont know hot to get id, name and unitsAvailable.
This is my output:
(u'193654', {u'unitsAvailable': 2, u'id': 193654, u'name': u'US 10,5'})
(u'193655', {u'unitsAvailable': 1, u'id': 193655, u'name': u'US 11'})
(u'193656', {u'unitsAvailable': 1, u'id': 193656, u'name': u'US 11,5'})
(u'193650', {u'unitsAvailable': 2, u'id': 193650, u'name': u'US 8,5'})
(u'193651', {u'unitsAvailable': 3, u'id': 193651, u'name': u'US 9'})
(u'193652', {u'unitsAvailable': 2, u'id': 193652, u'name': u'US 9,5'})
(u'193653', {u'unitsAvailable': 1, u'id': 193653, u'name': u'US 10'})
(u'193647', {u'unitsAvailable': 1, u'id': 193647, u'name': u'US 7'})
When using the items() methods you ask the dictionary to return a list of tuple combining the key and its value (key, value) Python Dictionary | items() method
To sort key and value, use instead:
for key, value in all_p.items():
print(value)

I am trying to get answer in json format but it simply returns a String.What am i doing wrong?

I am trying to get answer in json format but it simply returns a String.What am i doing wrong?
from flask import Flask, request, Response
import mysql.connector
import json
app = Flask(__name__)
def createConnection():
connection = mysql.connector.connect(user="akshay", password="akshay", host="localhost", database="BOOKS")
return connection
#app.route("/contact")
def getContacts():
print(request)
connection = createConnection()
cursor = connection.cursor()
cursor.execute("SELECT ID, NAME, AUTHOR, SUBJECT, PRICE FROM BOOKS;")
contacts = []
for (ID, NAME, AUTHOR, SUBJECT, PRICE) in cursor:
contacts.append({
'ID': ID,
'NAME': NAME,
"AUTHOR": AUTHOR,
"SUBJECT": SUBJECT,
"PRICE": PRICE
})
cursor.close()
connection.close()
response = Response()
response.content_type ='application/json'
response.data = json.loads(json.dumps(contacts))
return response
app.run('0.0.0.0', 4000)
And this is Output::
[{u'PRICE': 9999.999, u'AUTHOR': u'AKSHAY', u'NAME': u'AKSHAY', u'ID':
4, u'SUBJECT': u'BIG_DATA'}, {u'PRICE': 123.456, u'AUTHOR': u'Yashwant
Kanetkar', u'NAME': u'Exploring C', u'ID': 1001, u'SUBJECT': u'C
Programming'}, {u'PRICE': 371.019, u'AUTHOR': u'Yashwant Kanetkar',
u'NAME': u'Pointers in C', u'ID': 1002, u'SUBJECT': u'C Programming'},
{u'PRICE': 334.215, u'AUTHOR': u'E Balaguruswami', u'NAME': u'ANSI C
Programming', u'ID': 1003, u'SUBJECT': u'C Programming'}, {u'PRICE':
140.121, u'AUTHOR': u'Dennis Ritchie', u'NAME': u'ANSI C Programming', u'ID': 1004, u'SUBJECT': u'C Programming'}, {u'PRICE': 417.764,
u'AUTHOR': u'Herbert Schildt', u'NAME': u'C++ Complete Reference',
u'ID': 2001, u'SUBJECT': u'C++ Programming'}, {u'PRICE': 620.665,
u'AUTHOR': u'Stanley Lippman', u'NAME': u'C++ Primer', u'ID': 2002,
u'SUBJECT': u'C++ Programming'}, {u'PRICE': 987.213, u'AUTHOR':
u'Bjarne Stroustrup', u'NAME': u'C++ Programming Language', u'ID':
2003, u'SUBJECT': u'C++ Programming'}, {u'PRICE': 525.121, u'AUTHOR':
u'Herbert Schildt', u'NAME': u'Java Complete Reference', u'ID': 3001,
u'SUBJECT': u'Java Programming'}, {u'PRICE': 575.651, u'AUTHOR': u'Cay
Horstmann', u'NAME': u'Core Java Volume I', u'ID': 3002, u'SUBJECT':
u'Java Programming'}, {u'PRICE': 458.238, u'AUTHOR': u'James Gosling',
u'NAME': u'Java Programming Language', u'ID': 3003, u'SUBJECT': u'Java
Programming'}, {u'PRICE': 567.391, u'AUTHOR': u'Peter Galvin',
u'NAME': u'Operatig System Concepts', u'ID': 4001, u'SUBJECT':
u'Operating Systems'}, {u'PRICE': 421.938, u'AUTHOR': u'Mauris J
Bach', u'NAME': u'Design of UNIX Operating System', u'ID': 4002,
u'SUBJECT': u'Operating Systems'}, {u'PRICE': 352.822, u'AUTHOR':
u'Uresh Vahalia', u'NAME': u'UNIX Internals', u'ID': 4003, u'SUBJECT':
u'Operating Systems'}, {u'PRICE': 872.652, u'AUTHOR': u'Ayn Rand',
u'NAME': u'Atlas Shrugged', u'ID': 8001, u'SUBJECT': u'Novell'},
{u'PRICE': 9999.999, u'AUTHOR': u'AKSHAY', u'NAME': u'AKSHAY', u'ID':
9001, u'SUBJECT': u'BIG_DATA'}]
There is nothing particularly wrong with your code, just remove the call to json.loads() so that the JSON encoded data is used as the response data.
Calling json.loads() will recreate and pass a Python data structure (list of dictionaries in this case) to the response, which will not serialise as valid JSON. In Python 2 it will serialise using the Python repr() format as produced by str() which will not always produce a JSON compatible string (the u'' string prefix for example is not valid JSON). In Python 3 is will probably cause an exception because it won't be able to convert to a bytes object.
You could improve the code by using flask.json.jsonify() which will return a Response object with the correct content type header and a JSON body:
from flask.json import jsonify
#app.route("/contact")
def getContacts():
# create contacts...
return jsonify(contacts)
Ok ,finally got the proper output::
/ 20171119101107
// http://localhost:4000/contact
[
{
"SUBJECT": "BIG_DATA",
"PRICE": 9999.999,
"AUTHOR": "AKSHAY",
"ID": 4,
"NAME": "AKSHAY"
},
{
"SUBJECT": "C Programming",
"PRICE": 123.456,
"AUTHOR": "Yashwant Kanetkar",
"ID": 1001,
"NAME": "Exploring C"
},
{
"SUBJECT": "C Programming",
"PRICE": 371.019,
"AUTHOR": "Yashwant Kanetkar",
"ID": 1002,
"NAME": "Pointers in C"
},
I removed json.loads() and installed json viewer plugin.Thank you very much for help.

Python :: nested JSON result in Spotify

I'm having a really hard time to get a track id in Spotify search endpoint.
It is deeply nested.
So, if I do this:
results = sp.search(q='artist:' + 'Nirvava + ' track:' + 'Milk it', type='track')
pprint.pprint(results)
I am able to get:
{u'tracks': {u'href': u'https://api.spotify.com/v1/search?query=artist%3ANirvana+track%3AMilk+it&type=track&offset=0&limit=10',
u'items': [{u'album': {u'album_type': u'album',
u'artists': [{u'external_urls': {u'spotify': u'https://open.spotify.com/artist/6olE6TJLqED3rqDCT0FyPh'},
u'href': u'https://api.spotify.com/v1/artists/6olE6TJLqED3rqDCT0FyPh',
u'id': u'6olE6TJLqED3rqDCT0FyPh',
u'name': u'Nirvana',
u'type': u'artist',
u'uri': u'spotify:artist:6olE6TJLqED3rqDCT0FyPh'}],
u'available_markets': [u'CA',
u'MX',
u'US'],
u'external_urls': {u'spotify': u'https://open.spotify.com/album/7wOOA7l306K8HfBKfPoafr'},
u'href': u'https://api.spotify.com/v1/albums/7wOOA7l306K8HfBKfPoafr',
u'id': u'7wOOA7l306K8HfBKfPoafr',
u'images': [{u'height': 640,
u'url': u'https://i.scdn.co/image/3dd2699f0fcf661c35d45745313b64e50f63f91f',
u'width': 640},
{u'height': 300,
u'url': u'https://i.scdn.co/image/a6c604a82d274e4728a8660603ef31ea35e9e1bd',
u'width': 300},
{u'height': 64,
u'url': u'https://i.scdn.co/image/f52728b0ecf5b6bfc998dfd0f6e5b6b5cdfe73f1',
u'width': 64}],
u'name': u'In Utero - 20th Anniversary Remaster',
u'type': u'album',
u'uri': u'spotify:album:7wOOA7l306K8HfBKfPoafr'},
u'artists': [{u'external_urls': {u'spotify': u'https://open.spotify.com/artist/6olE6TJLqED3rqDCT0FyPh'},
u'href': u'https://api.spotify.com/v1/artists/6olE6TJLqED3rqDCT0FyPh',
u'id': u'6olE6TJLqED3rqDCT0FyPh',
u'name': u'Nirvana',
u'type': u'artist',
u'uri': u'spotify:artist:6olE6TJLqED3rqDCT0FyPh'}],
u'available_markets': [u'CA', u'MX', u'US'],
u'disc_number': 1,
u'duration_ms': 234746,
u'explicit': False,
u'external_ids': {u'isrc': u'USGF19960708'},
u'external_urls': {u'spotify': u'https://open.spotify.com/track/4rtZtLpriBscg7zta3TZxp'},
u'href': u'https://api.spotify.com/v1/tracks/4rtZtLpriBscg7zta3TZxp',
u'id': u'4rtZtLpriBscg7zta3TZxp',
u'name': u'Milk It',
u'popularity': 43,
u'preview_url': None,
u'track_number': 8,
u'type': u'track',
-----> u'uri':u'spotify:track:4rtZtLpriBscg7zta3TZxp'},
QUESTION:
now, how do I fetch the last 'uri' (u'uri': u'spotify:track:4rtZtLpriBscg7zta3TZxp'}, under the name 'Milk It'?
>>> print results['tracks']['items'][0]['uri']
spotify:track:4rtZtLpriBscg7zta3TZxp

using twitter api to get Arabic trends , i get symbols instead of the actual trends?

am using this part of code to get trends about Egypt
`Egypt_WOE_ID = 23424802
Egypt_trends = twitter_api.trends.place(_id=Egypt_WOE_ID)
print Egypt_trends`
the problem is instead of getting the actual hastags and trends i get symobls doesn't mean any thing , this is a part of the output :-
[{u'created_at': u'2017-02-20T12:41:44Z', u'trends': [{u'url': u'http://twitter.com/search?q=%23%D9%85%D8%B0%D8%A8%D8%AD%D9%87_%D8%A8%D9%88%D8%B1%D8%B3%D8%B9%D9%8A%D8%AF', u'query': u'%23%D9%85%D8%B0%D8%A8%D8%AD%D9%87_%D8%A8%D9%88%D8%B1%D8%B3%D8%B9%D9%8A%D8%AF', u'tweet_volume': None, u'name': u'#\u0645\u0630\u0628\u062d\u0647_\u0628\u0648\u0631\u0633\u0639\u064a\u062f', u'promoted_content': None}, {u'url': u'/search?q=%23JFT74', u'query': u'%23JFT74', u'tweet_volume': None, u'name': u'#JFT74', u'promoted_content': None}, {u'url': u'/search?q=%23%D8%A8%D9%84%D8%A7%D9%87%D8%A7_%D9%84%D8%AD%D9%88%D9%85_%D9%81%D8%B1%D8%A7%D8%AE_%D8%B3%D9%85%D9%83', u'query': u'%23%D8%A8%D9%84%D8%A7%D9%87%D8%A7_%D9%84%D8%AD%D9%88%D9%85_%D9%81%D8%B1%D8%A7%D8%AE_%D8%B3%D9%85%D9%83', u'tweet_volume': None, u'name': u'#\u0628\u0644\u0627\u0647\u0627_\u0644\u062d\u0648\u0645_\u0641\u0631\u0627\u062e_\u0633\u0645\u0643', u'promoted_content': None}, {u'url': u'/search?q=%23%D8%A7%D9%85_%D8%AE%D8%AF%D8%A7%D8%B4_%D8%AA%D9%85%D8%A7%D8%B1%D8%B3_%D8%A7%D9%84%D8%AC%D9%86%D8%B3', u'query': u'%23%D8%A7%D9%85_%D8%AE%D8%AF%D8%A7%D8%B4_%D8%AA%D9%85%D8%A7%D8%B1%D8%B3_%D8%A7%D9%84%D8%AC%D9%86%D8%B3', u'tweet_volume': 14030, u'name': u'#\u0627\u0645_\u062e\u062f\u0627\u0634_\u062a\u0645\u0627\u0631\u0633_\u0627\u0644\u062c\u0646\u0633', u'promoted_content': None}]
thanks in advance , and please forgive me if my English bad or any thing.i will try to add and update any thing i found or any note any one tell me about it to make the question looks better.
Your strings containing % are url encoded. You can convert them with:
# Python 3
import urllib.parse
s='%23%D9%85%D8%B0%D8%A8%D8%AD%D9%87_%D8%A8%D9%88%D8%B1%D8%B3%D8%B9%D9%8A%D8%AF'
urllib.parse.unquote(s)
# '#مذبحه_بورسعيد'
# Python 2
import urllib
s='%23%D9%85%D8%B0%D8%A8%D8%AD%D9%87_%D8%A8%D9%88%D8%B1%D8%B3%D8%B9%D9%8A%D8%AF'
urllib.unquote(s)
# '#مذبحه_بورسعيد'

Parsing complex and changing JSON data in Python, several levels deep

I am trying to parse changing JSON data, however the JSON data is a bit complex and changes wtih each iteration.
The JSON data is being parsed inside a loop so each time the loop runs, the json data is different. I'm focused right now on the education data.
THE JSON DATA:
First one might look like this:
{u'gender': u'female', u'id': u'15394'}
Next one might be:
{
u'gender': u'male', u'birthday': u'12/10/1983', u'location': {u'id': '12', u'name': u'Mexico City, Mexico'}, u'hometown': {u'id': u'19', u'name': u'Mexico City, Mexico'},
u'education': [
{
u'school': {u'id': u'22', u'name': u'Institut Saint Dominique de Rome'},
u'type': u'High School',
u'year': {u'id': u'33', u'name': u'2002'}
},
{
u'school': {u'id': u'44', u'name': u'Instituto Cumbres'},
u'type': u'High School',
u'year': {u'id': u'55', u'name': u'1999'}
},
{
u'school': {u'id': u'66', u'name': u'Chantemerle International School'},
u'type': u'High School',
u'year': {u'id': u'77', u'name': u'1998'}
},
{
u'school': {u'id': u'88', u'name': u'Columbia University'},
u'type': u'College',
u'concentration':
[{u'id': u'91', u'name': u'Economics'},
{u'id': u'92', u'name': u'Film Studies'}]
}
],
u'id': u'100384'}
I am trying to return all the values for school name, school id and school type, so essentially I want [education][school][id], [education][school][name], [education][school][type] in one line. However, every person has a different number of schools listed and different types of schools or no schools at all. I want to return each school with its associated name, id and type on a new line within my existing loop.
IDEAL OUTPUT:
1 34 Boston Latin School High School
1 26 Harvard University College
1 22 University of Michigan Graduate School
The one in this case refers to a friend_id, which I have already set up to append to the list as the first item in each loop.
I've tried:
friend_data = response.read()
friend_json = json.loads(friend_data)
#This below is inside a loop pulling data for each friend:
try:
for school_id in friend_json['education']:
school_id = school_id['school']['id']
friendedu.append(school_id)
for school_name in friend_json['education']:
school_name = school_name['school']['name']
friendedu.append(school_name)
for school_type in friend_json['education']:
school_type = school_type['type']
friendedu.append(school_type)
except:
school_id = "NULL"
print friendedu
writer.writerow(friendedu)
CURRENT OUTPUT:
[u'22', u'44', u'66', u'88', u'Institut Saint Dominique de Rome', u'Instituto Cumbres', u'Chantemerle International School', u'Columbia University', u'High School', u'High School', u'High School', u'College']
This output is just a list of the values it has pulled, instead I'm trying to organize the output as shown above. I think that perhaps another for-loop is called for since for one person I want each school to be on its own line. Right now, the friendedu list is appending all the education info for one person into each line of the list. I want each education item in a new line and then move on to the next person and continue to write rows for the next person.
how about
friend_data = response.read()
friend_json = json.loads(friend_data)
if 'education' in friend_json.keys():
for school_id in friend_json['education']:
friendedu = []
try:
friendedu.append(school_id['school']['id'])
friendedu.append(school_name['school']['name'])
friendedu.append(school_type['school']['type'])
except:
friendedu.append('School ID, NAME, or type not found')
print(" ".join(friendedu))
import csv
import json
import requests
def student_schools(student, fields=["id", "name", "type"], default=None):
schools = student.get("education", [])
return ((school.get(field, default) for field in fields) for school in schools)
def main():
res = requests.get(STUDENT_URL).contents
students = json.loads(res)
with open(OUTPUT, "wb") as outf:
outcsv = csv.writer(outf)
for student in students["results"]: # or whatever the root label is
outcsv.writerows(student_schools(student))
if __name__=="__main__":
main()
You certainly don't need more for loops.
One will do:
friendedu = []
for school_id in friend_json['education']:
friendedu.append("{id} {name} {type}".format(
id=school_id['school']['id'],
name=school_name['school']['name'],
type=school_type['school']['type'])
Or a list comprehension:
friendedu = ["{id} {name} {type}".format(
id=school_id['school']['id'],
name=school_name['school']['name'],
type=school_type['school']['type']) for school_id in friend_json['education']]

Categories

Resources