Hi I have an assignment due using the Twitter Rest API. For some reason my get_user_profile function is not defined, and I was wondering why that is the case. Thanks in advance!
import twitter
def oauth_login():
CONSUMER_KEY='***'
CONSUMER_SECRET='**'
OAUTH_TOKEN='***'
OAUTH_TOKEN_SECRET='***'
auth = twitter.oauth.OAuth(OAUTH_TOKEN,OAUTH_TOKEN_SECRET,CONSUMER_KEY,CONSUMER_SECRET)
twitter_api = twitter.Twitter(auth=auth)
def get_user_profile(twitter_api, screen_names=None, user_ids=None):
assert (screen_names !=None) != (uder_ids !=None), \
"Must have screen_names or user_ids, but not both"
items_to_info = {}
items = screen_names or user_ids
while len(items) > 0:
items_str = ",".join([str(item) for item in items[:100]])
items = items[100:]
if screen_names:
response = make_twitter_request(twitter_api.users.lookup, screen_name=items_str)
else:
response = make_twitter_request(twitter_api.users.lookup, user_id=items_str)
for user_info in response:
if screen_names:
items_to_info[user_info['screen_name']] = user_info
else:
items_to_info[user_info['id']] = user_info
return items_to_info
#get user profile (9.17) get friends followers ids (9.19) 9.22S
return twitter_api
twitter_api=oauth_login()
print twitter_api
print get_user_profile(twitter_api, screen_names=["max_herbowy"])
Related
I am using statuses/filter and am trying to filter the tweets from the twitter stream based on the parameter "filter_level".
query = ["Donald Trump","Cristiano Ronaldo"]
numberOfTweets = 1000
dictOfTweets ={}
twitter_api = oauth_login()
twitter_stream = twitter.TwitterStream(auth=twitter_api.auth)
for q in query:
stream = twitter_stream.statuses.filter(track=q,max_count=numberOfTweets,languages= ['en'],filter_level=['medium'])
for tweet in stream:
if tweet.get('text',0) == 0:
continue
dictOfTweets.setdefault(q,[]).append(tweet['text'])
I am still getting tweets with filter_level ="low". It would be really helpful if anyone can suggest what am I missing or doing wrong?
You need to put (languages= ['en'],filter_level=['medium']) while Authentication
query = ["Donald Trump","Cristiano Ronaldo"]
numberOfTweets = 1000
dictOfTweets ={}
twitter_api = oauth_login()
twitter_stream = twitter.TwitterStream(auth=twitter_api.auth, ,languages= ['en'],filter_level=['medium'])
for q in query:
stream = twitter_stream.statuses.filter(track=q,max_count=numberOfTweets)
for tweet in stream:
if tweet.get('text',0) == 0:
continue
dictOfTweets.setdefault(q,[]).append(tweet['text'])
I am beginner for Python,
How I can solve
AttributeError: module 'urllib' has no attribute 'Request'
As I view other post, still can't understand how solve the problem
Here the screen capture of the error
And this is the code (I refer from https://github.com/minimaxir/facebook-page-post-scraper/blob/master/get_fb_posts_fb_page.py)
import urllib.request
import json, datetime, csv, time
app_id = "xxx"
app_secret = "xxx" # DO NOT SHARE WITH ANYONE!
access_token = "xxx"
page_id = 'xxx'
def testFacebookPageData(page_id, access_token):
# construct the URL string
base = "https://graph.facebook.com/v2.4"
node = "/" + page_id +'/feed'
parameters = "/?access_token=%s" % access_token
url = base + node + parameters
# retrieve data
response = urllib.request.urlopen(url)
data = json.loads(response.read().decode('utf-8'))
print (data)
def request_until_succeed(url):
req = urllib.request.urlopen(url)
success = False
while success is False:
try:
response = urllib.urlopen(req)
if response.getcode() == 200:
success = True
except Exception as e:
print (e)
time.sleep(5)
print (url, datetime.datetime.now())
return response.read()
def getFacebookPageFeedData(page_id, access_token, num_statuses):
# construct the URL string
base = "https://graph.facebook.com"
node = "/" + page_id + "/feed"
parameters = "/?fields=message,link,created_time,type,name,id,likes.limit(1).summary(true),comments.limit(1).summary(true),shares&limit=%s&access_token=%s" % (num_statuses, access_token) # changed
url = base + node + parameters
# retrieve data
data = json.loads(request_until_succeed(url))
return data
def processFacebookPageFeedStatus(status):
# The status is now a Python dictionary, so for top-level items,
# we can simply call the key.
# Additionally, some items may not always exist,
# so must check for existence first
status_id = status['id']
status_message = '' if 'message' not in status.keys() else status['message'].encode('utf-8')
link_name = '' if 'name' not in status.keys() else status['name'].encode('utf-8')
status_type = status['type']
status_link = '' if 'link' not in status.keys() else status['link']
# Time needs special care since a) it's in UTC and
# b) it's not easy to use in statistical programs.
status_published = datetime.datetime.strptime(status['created_time'],'%Y-%m-%dT%H:%M:%S+0000')
status_published = status_published + datetime.timedelta(hours=-5) # EST
status_published = status_published.strftime('%Y-%m-%d %H:%M:%S') # best time format for spreadsheet programs
# Nested items require chaining dictionary keys.
num_likes = 0 if 'likes' not in status.keys() else status['likes']['summary']['total_count']
num_comments = 0 if 'comments' not in status.keys() else status['comments']['summary']['total_count']
num_shares = 0 if 'shares' not in status.keys() else status['shares']['count']
# return a tuple of all processed data
return (status_id, status_message, link_name, status_type, status_link,
status_published, num_likes, num_comments, num_shares)
def scrapeFacebookPageFeedStatus(page_id, access_token):
with open('%s_facebook_statuses.csv' % page_id, 'w') as file:
w = csv.writer(file)
w.writerow(["status_id", "status_message", "link_name", "status_type", "status_link",
"status_published", "num_likes", "num_comments", "num_shares"])
has_next_page = True
num_processed = 0 # keep a count on how many we've processed
scrape_starttime = datetime.datetime.now()
print (page_id, scrape_starttime)
statuses = getFacebookPageFeedData(page_id, access_token, 100)
while has_next_page:
for status in statuses['data']:
w.writerow(processFacebookPageFeedStatus(status))
# output progress occasionally to make sure code is not stalling
num_processed += 1
if num_processed % 1000 == 0:
print (num_processed, datetime.datetime.now())
# if there is no next page, we're done.
if 'paging' in statuses.keys():
statuses = json.loads(request_until_succeed(statuses['paging']['next']))
else:
has_next_page = False
print (num_processed, datetime.datetime.now() - scrape_starttime)
if __name__ == '__main__':
scrapeFacebookPageFeedStatus(page_id, access_token)
There is no urllib.Request() in Python 3 - there is urllib.request.Request().
EDIT: you have url = urllib.Request(url) in error message but I don't see this line in your code - maybe you run wrong file.
I am using Tweepy to get all tweets made by #UserName. This is the following code
import urllib, json
import sys
import tweepy
from tweepy import OAuthHandler
def twitter_fetch(screen_name = "prateek",maxnumtweets=10):
consumer_token = "" #Keys removed for security
consumer_secret = ""
access_token = ""
access_secret = ""
auth = tweepy.OAuthHandler(consumer_token,consumer_secret)
auth.set_access_token(access_token,access_secret)
api = tweepy.API(auth)
for status in tweepy.Cursor(api.user_timeline,id=screen_name).items(1):
print status['statuses_count']
print '\n'
if __name__ == '__main__':
twitter_fetch('BarackObama',200)
How do I parse the JSON properly to read the Number of statuses made by that particular user ?
How about something that keeps track of how many statuses you've iterated through? I'm not positive how tweepy works, but using something like this:
statuses = 0
for status in tweepy.Cursor(api.user_timeline,id=screen_name).items(1):
print status['statuses_count']
statuses += 1
print '\n'
return statuses
Usually JSON data has a nice structure, with clear formatting like the following, making it easier to understand.
So when I want to iterate through this list to find if an x exists (achievement, in this case), I use this function, which adds 1 to index every iteration it goes through.
def achnamefdr(appid,mykey,steamid64,achname):
playerachurl = 'http://api.steampowered.com/ISteamUserStats/GetPlayerAchievements/v0001/?appid=' + str(appid) + '&key=' + mykey + '&steamid=' + steamid64 + '&l=name'
achjson = json.loads(urllib.request.urlopen(playerachurl).read().decode('utf-8'))
achjsonr = achjson['playerstats']['achievements']
index = 0
for ach in achjsonr:
if not ach['name'].lower() == achname.lower():
index += 1
continue
else:
achnamef = ach['name']
return achnamef, index, True
return 'Invalid Achievement!', index, False
It can be done by getting the JSON object from status._json and then parsing it..
print status._json["statuses_count"]
I am trying to obtain the number of calendar resources per domain using the Calendar Resources API , and the code is returning what's deemed as bad results. Specifically, the code insists that all domains have the same calendar count.
I have two functions to do that, both returning the same (bad) results:
def getCalendarCountFor(domain, userMail, password):
client = CalendarResourceClient(domain=domain)
client.ClientLogin(userMail, password, "test_app")
calendar_resources = client.GetResourceFeed()
return len(calendar_resources.entry)
The second version:
def GoogleQueryCalendars(dom, admin_id, admin_pwd):
today = datetime.datetime.now().strftime("%Y-%m-%d %H:%S")
calendarClient = CalendarResourceClient(domain=dom)
calendarClient.ClientLogin(email=admin_id, password=admin_pwd, source='TheSource')
resourceCount = 0
# loop through all the calendar feeds
try:
moreCalendars =calendarClient.GetResourceFeed()
except:
print "Exception"
calendars = {}
while moreCalendars.entry is not None:
for i, cal in enumerate(moreCalendars.entry):
str = cal.GetResourceCommonName()
pseudoDomain = re.sub("[^A-Z\d]", "", re.search("^[^-\s]*", str).group(0)).lower()
if pseudoDomain in calendars:
calendars[pseudoDomain] +=1
else:
calendars[pseudoDomain] =1
resourceCount +=1
try:
moreCalendars = calendarClient.GetNext(moreCalendars)
except:
break
return resourceCount
Thanks.
Here's a method for counting calendar resources.
def count_resources(domain, email, password):
client = CalendarResourceClient(domain=domain)
client.ClientLogin(email=email,
password=password,
source='TheSource')
count = 0
uri = client.MakeResourceFeedUri()
while uri:
feed = client.GetResourceFeed(uri)
count += len(feed.entry)
next_link = feed.GetNextLink()
uri = next_link.href if next_link else None
return count
i am trying to add entries to a playlist in youtube via the code below. when i pass the playlist uri (http://gdata.youtube.com/feeds/api/users/nashrafeeg/playlists/0F4EF4B14F514476?client=Reddit+playlist+maker) to AddPlaylistVideoEntryToPlaylist method i get from the get playlist method i get error saying Invalid request URI. what is the best way to fix this ?
import urllib,re
import gdata.youtube
import gdata.youtube.service
class reddit():
def __init__(self, rssurl ='http://www.reddit.com/r/chillmusic.rss' ):
self.URL = rssurl
self._downloadrss()
def _downloadrss(self):
if self.URL.endswith('.rss'):
# Downloadd the RSS feed of the subreddit - save as "feed.rss"
try:
print "Downloading rss from reddit..."
urllib.urlretrieve (URL, "feed.rss")
except Exception as e:
print e
def clean(self):
playList = open("feed.rss").read()
links = re.findall(r'(http?://www.youtube.com\S+)', playList)
for link in links:
firstPass = link.replace('">[link]</a>', '')
secondPass = firstPass.replace('&fmt=18', '')
thirdpass = secondPass.replace('&feature=related', '')
finalPass = thirdpass.replace('http://www.youtube.com/watch?v=', '')
print thirdpass, "\t Extracted: ", finalPass
return finalPass
class google():
def __init__(self, username, password):
self.Username = username
self.password = password
#do not change any of the following
self.key = 'AI39si5DDjGYhG_1W-8n_amjgEjbOU27sa0aw2RQI5gOaoK5KqCD2Fzffbkh8oqGu7CqFQLLQ7N7wK0gz7lrTQbd70srC72Niw'
self.appname = 'Reddit playlist maker'
self.service = gdata.youtube.service.YouTubeService()
def authenticate(self):
self.service.email = self.Username
self.service.password = self.password
self.service.developer_key = self.key
self.service.client_id = self.appname
self.service.source = self.appname
self.service.ssl = False
self.service.ProgrammaticLogin()
def get_playlists(self):
y_playlist = self.service.GetYouTubePlaylistFeed(username='default')
l = []
k = []
for p in y_playlist.entry:
k=[]
k=[p.link[1].href, p.title.text]
l.append(k)
return l
def get_playlist_id_from_url(self, href):
#quick and dirty method to get the playList id's
return href.replace('http://www.youtube.com/view_play_list?p=','')
def creat_playlist(self, name="Reddit list", disc ="videos from reddit"):
playlistentry = self.service.AddPlaylist(name, disc)
if isinstance(playlistentry, gdata.youtube.YouTubePlaylistEntry):
print 'New playlist added'
return playlistentry.link[1].href
def add_video_to_playlist(self,playlist_uri,video):
video_entry = self.service.AddPlaylistVideoEntryToPlaylist(
playlist_uri, video)
if isinstance(video_entry, gdata.youtube.YouTubePlaylistVideoEntry):
print 'Video added'
URL = "http://www.reddit.com/r/chillmusic.rss"
r = reddit(URL)
g = google('xxxxx#gmail.com', 'xxxx')
g.authenticate()
def search_playlist(playlist="Reddit list3"):
pl_id = None
for pl in g.get_playlists():
if pl[1] == playlist:
pl_id = pl[0]
print pl_id
break
if pl_id == None:
pl_id = g.creat_playlist(name=playlist)
return pl_id
pls = search_playlist()
for video_id in r.clean():
g.add_video_to_playlist(pls, video_id)
Don't know how to get it but if you strip your playlist_uri of your '/users/[username]' it will work.
Example:
playlist_uri
http://gdata.youtube.com/feeds/api/users/[username]/playlists/[long_id]
Should become
http://gdata.youtube.com/feeds/api/playlists/[long_id]