I am trying to stop the streaming after 100 tweets. Any help will be appreciated. I just want 100 tweets
class listener(StreamListener):
def on_data(self, data):
all_data = json.loads(data)
tweet = all_data["text"]
username = all_data["user"]["screen_name"]
return(True)
def on_error(self, status):
print(status)
Related
I need to get the tweets from a single user in a streaming format. However, it still displays all tweets that retweet this user or are a reply to the tweets.
topic = "tweets"
accounts = ['user_id1', 'user_id2']
class TwitterStreamer():
def __init__(self):
pass
def stream_tweets(self, topic, accounts):
listener = StreamListener(topic)
auth = tweepy.OAuthHandler(api_key, api_secret_key)
auth.set_access_token(access_token, access_secret_token)
stream = tweepy.Stream(auth, listener)
stream.filter(follow=accounts)
class StreamListener(tweepy.StreamListener):
def __init__(self, file_prefix):
self.prefix = file_prefix
#property
def fetched_tweets_filename(self):
topic
date = datetime.datetime.now().strftime("%Y-%m-%d")
return f"{self.prefix}_{date}.txt"
def on_data(self, data):
try:
print(data)
with open(self.fetched_tweets_filename, 'a') as tf:
tf.write(data)
return True
except BaseException as e:
print("Error on_data %s" % str(e))
return True
def on_exception(self, exception):
print('exception', exception)
stream_tweets(topic, accounts)
def on_status(self, accounts, status):
if status.user.id_str != accounts:
return
print(status.text)
def stream_tweets(topic, accounts):
listener = StreamListener(topic)
auth = tweepy.OAuthHandler(api_key, api_secret_key)
auth.set_access_token(access_token, access_secret_token)
stream = tweepy.Stream(auth, listener)
stream.filter(track=accounts)
if __name__ == '__main__':
twitter_streamer = TwitterStreamer()
twitter_streamer.stream_tweets(topic, accounts)
I don't know what I'm doing wrong but I feel like the on_status command does not work at all.
Thanks for your help!
Don't change the parameters for on_status. Your accounts variable is a global variable and you should use it as such. Also, status.user.id_str is a str but accounts is a List[str]. You need the not ... in ... operators as opposed to !=. In other words, try out the changes below:
def on_status(self, status):
if not status.user.id_str in accounts:
return
print(status.text)
I have been learning Python for about a month now and after watching several tutorials, I decided to give Tweepy a go, to analyze the extracted data.
The thing though, is that Tweepy will not stop streaming tweets, no matter where I place the if statement. I am using Python 3.9 and Tweepy 3.10.
For instance, I would like it to stop once 10 tweets have been stored in the CSV.
Any thoughts?
# Stream Listener Class
class MyListener(tweepy.StreamListener):
def on_data(self, raw_data):
self.num_tweets = 0
self.file_name = 'path/tweet_stream.csv'
self.process_data(raw_data)
return True
def process_data(self, raw_data):
print(raw_data)
with open(self.file_name, mode='a') as f:
writer = csv.writer(f)
writer.writerow([raw_data, '\n'])
self.num_tweets += 1
if self.num_tweets < 10:
return True
else:
return False
# Creating the Stream
class MyStream():
def __init__(self, auth, listener):
self.stream = tweepy.Stream(auth=auth, listener=listener)
def start(self, keywords):
self.stream.filter(track=keywords)
# Starting
if __name__ == "__main__":
listener = MyListener()
I think every time on_data is getting called its resetting the num to 0, so if you would take it out of the on_data func it might resolve your issue.
class MyListener(tweepy.StreamListener):
def __init__(self):
self.num_tweets = 0
self.file_name = 'path/tweet_stream.csv'
def on_data(self, raw_data):
self.process_data(raw_data)
return True
def process_data(self, raw_data):
print(raw_data)
with open(self.file_name, mode='a') as f:
writer = csv.writer(f)
writer.writerow([raw_data, '\n'])
self.num_tweets += 1
if self.num_tweets < 10:
return True
else:
return False
After quite some time messing with my code, I figured a workaround. Ended up replacing the process_data with the on_data function (as it is more oriented towards what I am after anyway).
The workaround itself is keeping the constructor, creating an empty list for the tweets and writing the status json to the self.file, while appending the list. The append method is called right before the self.num_tweets counter.
I used a python script to stream tweets and store them in a Mongodb database using tweepy. Everything was working just fine, but when I tried to set a limit for the number of tweets, tweets are no longer retrieved, I can't see the error, could you please help?
class MyListener(StreamListener):
def __init__(self, num_tweets, max_tweets=60):
self.numTweets = num_tweets
self.limit = max_tweets
def on_data(self, data):
print("tweet")
while self.numTweets < self.limit:
#self.numTweets+=1
try:
client = MongoClient()
db = client['twitter1_db']
collection = db['twitter1_collection']
tweet = json.loads(data)
collection.insert_one(tweet)
print(tweet)
self.numTweets= self.numTweets+ 1
return True
except BaseException as e:
print("Error on_data: %s" % str(e))
return True
def on_error(self, status):
print(status)`
twitter_stream = Stream(auth, MyListener(num_tweets, max_tweets))
twitter_stream.filter(track=Keywords_list)
My application is to retweet a hashtag using Tweepy. Retweeting the hashtag works, I am having trouble getting errors 2 and 3 to work.
Errors
1. Your own id (done)
2. If tweet has already been RTd
3. If tweet to RT comes from protected source
Accessing api.retweet(doTweet) within the StdOutListener does not allow it to fall through to on_error(). How else can I do this? I am Python noob.
class StdOutListener(tweepy.StreamListener):
def on_data(self, data):
all_data = json.loads(data)
username = all_data["user"]["screen_name"]
doTweet = all_data["id"]
if username != our_own_id:
#make sure you haven't already retweeted
#make sure tweets aren't protected
print(username) # just so we know it's working
api.retweet(doTweet)
return True
def on_error(self, status_code):
print('error')
read_error = json.loads(status_code)
print('Got an error with status code: ' + str(read_error))
return True # To continue listening
def on_timeout(self):
print('Timeout...')
return True # To continue listening
try:
if __name__ == '__main__':
listener = StdOutListener()
stream = tweepy.Stream(auth, listener)
stream.filter(track=['#love'])
except KeyboardInterrupt:
sys.exit()
You could modify your call to api.retweet in your on_data method as follows:
...
if username != our_own_id:
print(username) # just so we know it's working
try:
api.retweet(doTweet)
except tweepy.TweepError as e:
# add here a more complex error handling
print(e)
Hope it helps.
I am trying to stream twitter data for a period of time of say 5 minutes, using the Stream.filter() method. I am storing the retrieved tweets in a JSON file. The problem is I am unable to stop the filter() method from within the program. I need to stop the execution manually. I tried stopping the data based on system time using the time package. I was able to stop writing tweets to the JSON file but the stream method is still going on, but It was not able to continue to the next line of code.
I am using IPython notebook to write and execute the code.
Here's the code:
auth = OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_secret)
api = tweepy.API(auth)
from tweepy import Stream
from tweepy.streaming import StreamListener
class MyListener(StreamListener):
def __init__(self, start_time, time_limit=60):
self.time = start_time
self.limit = time_limit
def on_data(self, data):
while (time.time() - self.time) < self.limit:
try:
saveFile = open('abcd.json', 'a')
saveFile.write(data)
saveFile.write('\n')
saveFile.close()
return True
except BaseException as e:
print 'failed ondata,', str(e)
time.sleep(5)
return True
def on_status(self, status):
if (time.time() - self.time) >= self.limit:
print 'time is over'
return false
def on_error(self, status):
if (time.time() - self.time) >= self.limit:
print 'time is over'
return false
else:
print(status)
return True
start_time = time.time()
stream_data = Stream(auth, MyListener(start_time,20))
stream_data.filter(track=['name1','name2',...list ...,'name n'])#list of the strings I want to track
These links are similar but I does not answer my question directly
Tweepy: Stream data for X minutes?
Stopping Tweepy steam after a duration parameter (# lines, seconds, #Tweets, etc)
Tweepy Streaming - Stop collecting tweets at x amount
I used this link as my reference,
http://stats.seandolinar.com/collecting-twitter-data-using-a-python-stream-listener/
In order to close the stream you need to return False from on_data(), or on_status().
Because tweepy.Stream() runs a while loop itself, you don't need the while loop in on_data().
When initializing MyListener, you didn't call the parent's class __init__ method, so it wasn't initialized properly.
So for what you're trying to do, the code should be something like:
class MyStreamListener(tweepy.StreamListener):
def __init__(self, time_limit=60):
self.start_time = time.time()
self.limit = time_limit
self.saveFile = open('abcd.json', 'a')
super(MyStreamListener, self).__init__()
def on_data(self, data):
if (time.time() - self.start_time) < self.limit:
self.saveFile.write(data)
self.saveFile.write('\n')
return True
else:
self.saveFile.close()
return False
myStream = tweepy.Stream(auth=api.auth, listener=MyStreamListener(time_limit=20))
myStream.filter(track=['test'])
Access the variable myListener.running but instead of passing MyListener directly to Stream create a variable as follows:
myListener = MyListener()
timeout code here... suchas time.sleep(20)
myListener.running = False
So, I was having this issue as well. Fortunately Tweepy is open source so it's easy so dig into the problem.
Basically the important part is this here:
def _data(self, data):
if self.listener.on_data(data) is False:
self.running = False
On Stream class in streaming.py
That means, to close the connection you just have to return false on the listener's on_data() method.
For those who are trying with Twitter api V2 (StreamingClient class), here is the solution:
client.disconnect()