I am trying to extract 1000 unique, fully extended URI's from Twitter using Tweepy and Python. Specifically, I am interested in links that direct me outside of Twitter (so not back to other tweets/ retweets/ duplicates).
The code I wrote keeps giving me a Key error for "entities."
It will give me a few urls before breaking; some are extended, some are not. I have no idea how to go about fixing this.
Help me please!
Note: I left my credentials out.
Here is my code:
# Import the necessary methods from different libraries
import tweepy
from tweepy.streaming import StreamListener
from tweepy import OAuthHandler
from tweepy import Stream
import json
# Variables that contains the user credentials to access Twitter API
access_token = "enter token here"
access_token_secret = "enter token here"
consumer_key = "enter key here"
consumer_secret = "enter key here"
# Accessing tweepy API
# api = tweepy.API(auth)
# This is a basic listener that just prints received tweets to stdout.
class StdOutListener(StreamListener):
def on_data(self, data):
# resource: http://code.runnable.com/Us9rrMiTWf9bAAW3/how-to- stream-data-from-twitter-with-tweepy-for-python
# Twitter returns data in JSON format - we need to decode it first
decoded = json.loads(data)
# resource: http://socialmedia-class.org/twittertutorial.html
# Print each tweet in the stream to the screen
# Here we set it to stop after getting 1000 tweets.
# You don't have to set it to stop, but can continue running
# the Twitter API to collect data for days or even longer.
count = 1000
for url in decoded["entities"]["urls"]:
count -= 1
print "%s" % url["expanded_url"] + "\r\n\n"
if count <= 0:
break
def on_error(self, status):
print status
if __name__ == '__main__':
# This handles Twitter authetification and the connection to Twitter Streaming API
l = StdOutListener()
auth = OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)
stream = Stream(auth, l)
# This line filter Twitter Streams to capture data by the keyword: YouTube
stream.filter(track=['YouTube'])
It seems like the API is hitting a rate limit, so one option is to include an Exception when it gets a KeyError, I then see [u'limit']. I added a count display to verify it does get to 1000:
count = 1000 # moved outside of class definition to avoid getting reset
class StdOutListener(StreamListener):
def on_data(self, data):
decoded = json.loads(data)
global count # get the count
if count <= 0:
import sys
sys.exit()
else:
try:
for url in decoded["entities"]["urls"]:
count -= 1
print count,':', "%s" % url["expanded_url"] + "\r\n\n"
except KeyError:
print decoded.keys()
def on_error(self, status):
print status
if __name__ == '__main__':
l = StdOutListener()
auth = OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)
stream = Stream(auth, l)
stream.filter(track=['YouTube'])
Related
what do I put in my code to make it where I can force the program to stop printing data when the tweets data back to a certain point. For example, how can I get all tweets about Verratti from within a month of running this?
from tweepy.streaming import StreamListener
from tweepy import OAuthHandler
from tweepy import Stream
import json
access_token = the code
access_token_secret = the code
consumer_key = the code
consumer_secret = the code
#print
class StdOutListener(StreamListener):
def on_data(self, data):
print (json.loads(data)['text'])
return True
def on_error(self, status):
print (status)
#find
if __name__ == '__main__':
#This handles Twitter authetification and the connection to Twitter Streaming API
l = StdOutListener()
auth = OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)
stream = Stream(auth, l)
#This line filter Twitter Streams to capture data by the keywords: 'python', 'javascript', 'ruby'
stream.filter(track=['Verratti'])
Nice question. It turns out that the Twitter API only lets you look back one week from the current date. There is a way around it though, someone made a github library that can search for any timeframe using twitter's advanced search function and you don't even have to bother with the whole authentication process.
Check it out: https://github.com/Jefferson-Henrique/GetOldTweets-python
I've been playing with the Twitter Streaming API using the Tweepy library. I started by following my own account and streaming my own tweets as I posted them, which worked fine.
I then attempted to stream a fairly large region's tweets ([30,-85,31,-84]), to which I initially seemed to receive no data. I then started receiving 'Location Deletion Notices', or 'scrub_geo' messages, and have only ever received those since. I changed my code back to the previously working follow code, but I continue to receive 'scrub_geo' messages and not statuses from my profile.
Here's the script I'm using:
# Import the necessary methods from tweepy library
from tweepy.streaming import StreamListener
from tweepy import OAuthHandler
from tweepy import Stream
# Other libs
import json
# Variables that contains the user credentials to access Twitter API
access_token = "<my_access_token>"
access_token_secret = "<my_secret_token>"
consumer_key = "<my_consumer_key>"
consumer_secret = "<my_consumer_secret>"
# This is a basic listener that just prints received tweets to stdout.
class StdOutListener(StreamListener):
def on_data(self, data):
#try:
# json_data = json.loads(data)
# print json_data['created_at'] + " " + data['text']
#except:
print "Data " + str(data)
return True
def on_error(self, status):
print "Error " + str(status)
if status == 420:
print("420 error.")
return False
if __name__ == '__main__':
# This handles Twitter authetification and the connection to Twitter Streaming API
l = StdOutListener()
auth = OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)
stream = Stream(auth, l)
# Start streaming with right parameters
#tallahassee=[30,-85,31,-84]
#stream.filter(locations=tallahassee) <---- previously used
stream.filter(follow="<my_user_id>")
Your coordinates are reversed. Since we're dealing with GeoJSON always do (long,lat,alt) or (x,y,z)
So you'll need to provide tallahassee=[-85,30,-84,31]. Always provide longitude first same as you would do (x,y) in math.
There are some places, like google maps, that do latitude first. You just have to be careful as to which proper format you're dealing with.
Using the code below, I'm trying to get a hash tag. It works fine for larger searches like #StarWars, but when i ask for smaller ones it doesn't seem to return anything.
Ideas?
'code' is used instead of the actual strings for authentication
from tweepy.streaming import StreamListener
from tweepy import OAuthHandler
from tweepy import Stream
from textwrap import TextWrapper
import json
access_token = "code"
access_token_secret = "code"
consumer_key = "code"
consumer_secret = "code"
class StdOutListener(StreamListener):
''' Handles data received from the stream. '''
status_wrapper = TextWrapper(width=60, initial_indent=' ', subsequent_indent=' ')
def on_status(self, status):
try:
print self.status_wrapper.fill(status.text)
print '\n %s %s via %s\n' % (status.author.screen_name, status.created_at, status.source)
except:
# Catch any unicode errors while printing to console
# and just ignore them to avoid breaking application.
pass
def on_error(self, status_code):
print('Got an error with status code: ' + str(status_code))
return True # To continue listening
def on_timeout(self):
print('Timeout...')
return True # To continue listening
if __name__ == '__main__':
listener = StdOutListener()
auth = OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)
stream = Stream(auth, listener)
stream.filter(track=['#TestingPythonTweet'])
Ok, so found that the answer to this is that i was expecting it to work retro-actively. This was a fundamental error on my part. Instead what actually happens is that it gets what's currently being tweeted. Not was has been previously.
Im doing Twitter sentiment research at the moment. For this reason, I'm using the Twitter API to download all tweets on certain keywords. But my current code is taking a lot of time to create a large datafile, so I was wondering if there's a faster method.
This is what Im using right now:
__author__ = 'gerbuiker'
import time
#Import the necessary methods from tweepy library
from tweepy.streaming import StreamListener
from tweepy import OAuthHandler
from tweepy import Stream
#Variables that contains the user credentials to access Twitter API
access_token = "XXXXXXXXXXXXX"
access_token_secret = "XXXXXXXX"
consumer_key = "XXXXX"
consumer_secret = "XXXXXXXXXXXXXX"
#This is a basic listener that just prints received tweets to stdout.
class StdOutListener(StreamListener):
def on_data(self, data):
try:
#print data
tweet = data.split(',"text":"')[1].split('","source')[0]
print tweet
saveThis = str(time.time())+ '::'+ tweet #saves time+actual tweet
saveFile = open('twitiamsterdam.txt','a')
saveFile.write(saveThis)
saveFile.write('\n')
saveFile.close()
return True
except BaseException, e:
print 'failed ondata,',str(e)
time.sleep(5)
def on_error(self, status):
print status
if __name__ == '__main__':
#This handles Twitter authetification and the connection to Twitter Streaming API
l = StdOutListener()
auth = OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)
stream = Stream(auth, l)
#This line filter Twitter Streams to capture data by the keywords: 'Amsterdam'
stream.filter(track=['KEYWORD which i want to check'])
This gets me about 1500 tweets in one hour, for a pretty popular keyword (Amsterdam). Does anyone now a faster method in Python?
To be clear: I want to download all tweets on a certain subject for last month/year for example. So the newest tweets don't have to keep coming in, the most recent ones for a period would be sufficient. Thanks!
I need something similar to this for an academic research.
We're you able to fix it?
Would it be possible to specify a custom range of time from which to pull the data?
Sorry for asking here, but couldn't send you private messages.
I am able to extract the mentioned details about a twitter user using Tweepy API.
I want to do it for a list of users. Can anyone help me to this?
import tweepy
from tweepy import Stream
from tweepy import OAuthHandler
from tweepy.streaming import StreamListener
CONSUMER_KEY = 'ABC'
CONSUMER_SECRET = 'ABC'
ACCESS_KEY = 'ABC'
ACCESS_SECRET = 'ABC'
class TweetListener(StreamListener):
# A listener handles tweets are the received from the stream.
#This is a basic listener that just prints received tweets to standard output
def on_data(self, data):
print data
return True
def on_error(self, status):
print status
auth = OAuthHandler(CONSUMER_KEY,CONSUMER_SECRET)
api = tweepy.API(auth)
auth.set_access_token(ACCESS_KEY, ACCESS_SECRET)
twitterStream = Stream(auth,TweetListener())
user = api.get_user('User Name')
print user.screen_name
print user.description
print user.followers_count
print user.statuses_count
print user.url
This code is ready to use anyone can use it with his/her own credentials for a single user profile.
Finally exercising and reading a lot I get the answer to my question.you can try this
import tweepy
from tweepy import Stream
from tweepy.streaming import StreamListener
from tweepy import OAuthHandler
CONSUMER_KEY = 'ABC'
CONSUMER_SECRET = 'ABC'
ACCESS_KEY = 'ABC'
ACCESS_SECRET = 'ABC'
auth = OAuthHandler(CONSUMER_KEY,CONSUMER_SECRET)
api = tweepy.API(auth)
auth.set_access_token(ACCESS_KEY, ACCESS_SECRET)
class TweetListener(StreamListener):
# A listener handles tweets are the received from the stream.
#This is a basic listener that just prints received tweets to standard output
def on_data(self, data):
print data
return True
def on_error(self, status):
print status
#search
api = tweepy.API(auth)
twitterStream = Stream(auth,TweetListener())
test = api.lookup_users(user_ids=['17006157','59145948','157009365'])
for user in test:
print user.screen_name
print user.name
print user.description
print user.followers_count
print user.statuses_count
print user.url
This code is ready to use just put your valid keys in place of ABC & get the users profile.you need to get the IDs first.
Your code simply interacts with your twitter account; to find information on a specific user or group of users you should look them up using the api.lookup_users(user_ids=[]) query.
You'd do it like this:
#boring auth you already have
import tweepy
from tweepy import OAuthHandler
CONSUMER_KEY = 'ABC'
CONSUMER_SECRET = 'ABC'
ACCESS_KEY = 'ABC'
ACCESS_SECRET = 'ABC'
auth = OAuthHandler(CONSUMER_KEY,CONSUMER_SECRET)
api = tweepy.API(auth)
auth.set_access_token(ACCESS_KEY, ACCESS_SECRET)
#search
api = tweepy.API(auth)
test = api.lookup_users(user_ids=['1123728482,5539932'])
This gives you a list of two tweepy.models.User objects:
[<tweepy.models.User object at 0x103995090>, <tweepy.models.User object at 0x1039950d0>]
You can replace the list in user_ids with a list of up to 100 ids, twitter won't let you search any more than that at once, though. Once you have your list of User objects, you can access different properties (for a list, check out the tweepy doc for the User class, line 113).