I've been following the tutorial available at realpython to try and create a twitter bot that retweets a specific user based on keywords within their tweet. By using the follow and track parameters, I should be able to retweet only those tweets that contain goal AND assist but the listener seems to retweets all tweets that contain these keywords not the specific user.
#!/usr/bin/env python
# tweepy-bots/bots/retweet.py
import tweepy
import logging
from config import create_api
import json
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger()
class RetweetListener(tweepy.StreamListener):
def __init__(self, api):
self.api = api
self.me = api.me()
def on_status(self, tweet):
logger.info(f"Processing tweet id {tweet.id}")
if tweet.in_reply_to_status_id is not None or \
tweet.user.id == self.me.id:
# This tweet is a reply or I'm its author so, ignore it
return
if not tweet.retweeted:
# if 'GOAL' and 'ASSIST' in tweet.full_text:
# Retweet, since we have not retweeted it yet
try:
tweet.retweet()
except Exception as e:
logger.error("Error on fav and retweet", exc_info=True)
def on_error(self, status):
logger.error(status)
def main(keywords):
api = create_api()
tweets_listener = RetweetListener(api)
stream = tweepy.Stream(api.auth, tweets_listener)
stream.filter(follow=['761568335138058240'],
track=keywords, languages=["en"])
if __name__ == "__main__":
main(["Goal Assist"])
Posting my own answer based on how I hacked it together, not sure if it's the best method but it works.
class RetweetListener(tweepy.StreamListener):
def __init__(self, api):
self.api = api
self.me = api.me()
def on_status(self, tweet):
logger.info(f"Processing tweet id {tweet.id}")
if tweet.in_reply_to_status_id is not None or \
tweet.user.id == self.me.id:
# This tweet is a reply or I'm its author so, ignore it
return
if not tweet.retweeted:
if 'Goal -' and 'Assist -' in tweet.text:
try:
# Retweet, since we have not retweeted it yet
tweet.retweet()
except Exception as e:
logger.error("Error on fav and retweet", exc_info=True)
def on_error(self, status):
logger.error(status)
Related
I have this code that retweets a tweet if that said tweet includes the mention of #oogabooga.
How would I change it so that it would retweet every tweet tweeted by #oogabooga without the need of a mention in those said tweets?
So basically I want to retweet everything that #oogabooga tweets, regardless of content. I tried modifying it myself and went through tweepy docs, API, and looked up some similar problems trying to build from there, but with no luck. Help a noob out!
import logging
import time
import random
from datetime import datetime, timedelta
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger()
auth = tweepy.OAuthHandler('hiddenkey1','hiddenkey2')
auth.set_access_token('hiddenkey3','hiddenkey4')
api = tweepy.API(auth, wait_on_rate_limit=True, wait_on_rate_limit_notify=True)
user = api.me()
def fav_retweet_user(api, user_handle):
search_query = f"{user_handle} -filter:retweets"
logger.info(f'Retrieving tweets mentioning {user_handle}...')
tweets = api.search(q=search_query, lang ="en")
for tweet in tweets:
if tweet.in_reply_to_status_id is not None or \
tweet.user.id == api.me().id:
return
if not tweet.favorited:
try:
tweet.favorite()
logger.info(f"Liked a tweet mentioning {user_handle}")
except Exception as e:
logger.error("Error on fav", exc_info=True)
if not tweet.retweeted:
try:
tweet.retweet()
logger.info(f"Retweeted a tweet mentioning {user_handle}")
except Exception as e:
logger.error("Error on fav and retweet", exc_info=True)
while True:
fav_retweet_user(api, "#oogabooga")
logger.info("Waiting...")
time.sleep(30)
Changing your search query to the following will work:
search_query = f"-filter:from:{user_handle}"
I'm following a tutorial about analyzing twitter data. I'm wondering why I keep getting a syntax error on line 44: except BaseException as e:
from tweepy import API
from tweepy import Cursor
from tweepy.streaming import StreamListener
from tweepy import OAuthHandler
from tweepy import Stream
import twitter_credentials
#TWITTER AUTHENTICATOR
class TwitterAuthenticator():
def authenticate_twitter_app(self):
auth = OAuthHandler(twitter_credentials.CONSUMER_KEY, twitter_credentials.CONSUMER_SECRET)
auth.set_access_token(twitter_credentials.ACCESS_TOKEN, twitter_credentials.ACCESS_TOKEN_SECRET)
return auth
#TWITTER STREAMER
class TwitterStreamer():
#Class for streaming and processing live tweets
def __init__(self):
self.twitter_authenticator = TwitterAuthenticator()
def stream_tweets(self, fetched_tweets_filename, hash_tag_list):
#This handles Twitter authentication and connection to the Twitter streaming API
listener = TwitterListener()
auth = self.twitter_authenticator.authenticate_twitter_app()
stream = Stream(auth, listener)
stream.filter(track=hash_tag_list)
class TwitterListener(StreamListener):
#Basic listener class that just prints received tweets to stdout
def __init__(self, fetched_tweets_filename):
self.fetched_tweets_filename = fetched_tweets_filename
def on_data(self, data):
try:
print(data)
with open(self.fetched_tweets_filename, 'a') as tf:
tf.write(data)
return True
except BaseException as e:
print('Error on_data %s' % str(e))
return True
def on_error(self, status):
print(status)
if __name__ == '__main__':
hash_tag_list['kevin durant', 'steph curry', 'clippers']
fetched_tweets_filename = 'tweets.json'
twitter_streamer = TwitterStreamer()
twitter_streamer.stream_tweets(fetched_tweets_filename, hash_tag_list)
Your except is indented too much. Should be on the same level as try (in on_data()) and the code in except should be indented the same.
Btw said function is written wrong. There are potential cases where it returns nothing. You should have at least return False added at the end of function body.
Except should be indented as try, so try the following
def on_data(self, data):
try:
print(data)
with open(self.fetched_tweets_filename, 'a') as tf:
tf.write(data)
return True
except BaseException as e:
print('Error on_data %s' % str(e))
return True
Am doing a project to find top 10 trending topics or hashtags on Twitter. Am creating a class with the code below:
class TweetsListener(StreamListener):
def __init__(self, csocket):
self.client_socket = csocket
def on_data(self, data):
try:
msg = json.loads( data )
print(msg['user']['screen_name'].encode('utf-8'))
return True
except BaseException as e:
print("Error on_data: %s" % str(e))
return True
def on_error(self, status):
print(status)
return True
Below is the code for sending data:
def sendData(c_socket):
auth = OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_secret)
twitter_stream = Stream(auth, TweetsListener(c_socket))
twitter_stream.filter(track=['india']
Here twitter_stream.filter is filtering messages with tag India. I want to get all the messages from Twitter. In short, I do not want a filter to be applied. Is there a way to do the same?
Any help appreciated.
- P.S : Novice in Spark streaming and PySpark
Twitter now offers a sample stream: https://developer.twitter.com/en/docs/tweets/sample-realtime/overview/GET_statuse_sample.html
It's fairly new so I'm not sure if the wrappers (looks like you're using Tweepy) have implemented it yet, but it shouldn't be hard to interface with.
Is there a way for me to download all the tweets made by all twitter users in a particular region (say the USA) over a particular time period(say a week starting Nov. 15th and ending Nov 22nd) using Python? This is for an NLP task. Right now I am able to download the tweets related to certain topics which I search for and only the tweets being made while the program is running. I want to be able to get past tweets for a data mining/NLP task regardless of the topic.
Yes! You can.
Use Tweepy
import tweepy
consumer_key = ''
consumer_secret = ''
access_token_key = ''
access_token_secret = ''
auth1 = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth1.set_access_token(access_token_key, access_token_secret)
class StreamListener(tweepy.StreamListener):
def on_status(self, tweet):
print 'Ran on_status'
def on_error(self, status_code):
print 'Error: ' + repr(status_code)
return False
def on_data(self, data):
print 'Ok, this is actually running'
l = StreamListener()
streamer = tweepy.Stream(auth=auth1, listener=l)
setTerms = ['twitter']
streamer.filter(track = setTerms)
In stream.filter() you can specify the region, for more details
stream.filter(locations=[ "here you can define a region by listing the lang/lat" ], track=terms)
If you have a specific defined region, you can check that in the listner
def on_status(self, status):
if status.coordinates .. :
So far I have the following code that works and inserts the tweets into my mongodb but I had a few questions.
class CustomStreamListener(tweepy.StreamListener):
def __init__(self, api):
self.api = api
super(tweepy.StreamListener, self).__init__()
self.db = pymongo.MongoClient().test
def on_data(self, tweet):
self.db.tweets.insert(json.loads(tweet))
def on_error(self, status_code):
return True # Don't kill the stream
def on_timeout(self):
return True # Don't kill the stream
sapi = tweepy.streaming.Stream(auth, CustomStreamListener(api))
sapi.filter(track=['arsenal'] , languages = ['en'])
Could someone explain how I can get only certain parts of the tweet inserted into the database ie. just the tweet text and location.
Does the twitter streaming api allow displaying just tweets no # reply tweets?
json.loads(tweet) is just a dictionary, you can freely choose what parts of its key-values you process.
You can filter tweets by conditioning them either way you like:
tweet_obj = json.loads(tweet)
if not tweet_obj['in_reply_to_user_id']: # replies has `None` in this field
pass # add some processing here