I'm trying to retrieve tweets from a single user_id as per the JSON data tweepy fetches.
The issue is im also retrieving all mentions of the user_id i.e. other people mentioning the user_id in any way (RT, Mentions etc)
I have quoted my script below. Please let me know if this is possible in tweepy at all.
the snippet streamer.filter(follow = ['25073877'], encoding = 'utf8') shows which user i am wanting to follow
Thank you in advance.
from __future__ import print_function
import tweepy
import json
import MySQLdb
from dateutil import parser
import Twitter_API
import DBConfig
Access_Token = ""
Access_Token_Secret = ""
Consumer_Key = ""
Consumer_Secret = ""
class StreamListener(tweepy.StreamListener):
def on_connect(self):
print("You are now connected to the streaming API.")
def on_error(self, status_code):
print('An Error has occured: ' + repr(status_code))
return False
def on_data(self, data):
try:
# Decode the JSON from Twitter
datajson = json.loads(data)
#grab the wanted data from the Tweet
text = datajson['extended_tweet']['full_text']
screen_name = datajson['user']['screen_name']
tweet_id = datajson['id']
created_at = parser.parse(datajson['created_at'])
replying_to = datajson['in_reply_to_screen_name']
#print out a message to the screen that we have collected a tweet
#print("Tweet collected at " + str(created_at))
print(text, screen_name, tweet_id, created_at, replying_to)
#insert the data into the MySQL database
#store_data(created_at, text, screen_name, tweet_id)
except Exception as e:
print(e)
auth = tweepy.OAuthHandler(Consumer_Key, Consumer_Secret)
auth.set_access_token(Access_Token, Access_Token_Secret)
#Set up the listener. The 'wait_on_rate_limit=True' is needed to help with Twitter API rate limiting.
listener = StreamListener(api=tweepy.API(wait_on_rate_limit=True))
streamer = tweepy.Stream(auth=auth, listener=listener)
#print("Tracking: " + str(WORDS))
streamer.filter(follow = ['25073877'], encoding = 'utf8')
Sorry for the bad indentation if any.
Related
Hi I have looked at many guides and tutorials on how to do this, but I am having trouble with being able to use tweepy to store the JSON data in a text file.
class StreamListener(tweepy.StreamListener):
def on_status(self, status):
print(status)
def on_error(self, status):
print status
if status == 420:
return False
if __name__ == '__main__':
stream_listener = StreamListener()
auth = tweepy.OAuthHandler(consumer_token, consumer_secret)
auth.set_access_token(access_token, access_token_secret)
stream = tweepy.Stream(auth, stream_listener)
I have another python file which is supposed to read data into a list:
import pandas
import json
json_data = 'twitter_data.txt'
data_list = []
#load file
tweets_file = open(json_data, "r")
for line in tweets_file:
try:
tweet = json.loads(line) #this line causes problems
data_list.append(tweet)
except:
continue
print len(data_list)
I thought the data received from twitter comes in JSON format, and the guides I'm following all say it does, but it's actually in another object.
Should I just store everything in a list then json dump that list into the new text file?
It seems like you're on the right track. You can modify the stream listener to write tweets to a file directly.
Edit: this now writes out in JSON format.
#Import the necessary methods from tweepy library
from tweepy.streaming import StreamListener
from tweepy import OAuthHandler
from tweepy import Stream
from tweepy import API
#Variables that contains the user credentials to access Twitter API
CONSUMER_KEY = #YOUR CONSUMER KEY
CONSUMER_SECRET = #YOUR CONSUMER SECRET
ACCESS_TOKEN = #YOUR ACCESS TOKEN
ACCESS_TOKEN_SECRET = #YOUR ACCESS TOKEN SECRET
class FileWriteListener(StreamListener):
def __init__(self):
super(StreamListener, self).__init__()
self.save_file = open('tweets.json','w')
self.tweets = []
def on_data(self, tweet):
self.tweets.append(json.loads(tweet))
self.save_file.write(str(tweet))
def on_error(self, status):
print(status)
return True
auth = OAuthHandler(CONSUMER_KEY, CONSUMER_SECRET)
auth.set_access_token(ACCESS_TOKEN, ACCESS_TOKEN_SECRET)
api = API(auth)
twitter_stream = Stream(auth, MyListener())
# Here you can filter the stream by:
# - keywords (as shown)
# - users
twitter_stream.filter(track=['hello'])
This code will run indefinitely, so you either need to exit the process after some time (Ctrl-C) or modify the code.
Then you can load the data:
import json
json_data = []
with open('tweets.json','r') as f:
json_data.append( json.loads(f.readline()) )
Hope this helps!
I think something like this may be what your looking for.
def on_status(self, tweet):
json_dumps = json.dumps(tweet._json)
tweet_json = json.loads(json_dumps)
print(tweet_json['created_at'])
These are all the keys you can use in the tweet_json[ ]
dict_keys(['created_at', 'id', 'id_str', 'text', 'source', 'truncated', 'in_reply_to_status_id', 'in_reply_to_status_id_str', 'in_reply_to_user_id', 'in_reply_to_user_id_str', 'in_reply_to_screen_name', 'user', 'geo', 'coordinates', 'place', 'contributors', 'retweeted_status', 'is_quote_status', 'quote_count', 'reply_count', 'retweet_count', 'favorite_count', 'entities', 'favorited', 'retweeted', 'filter_level', 'lang', 'timestamp_ms'])
I am trying to extract 1000 unique, fully extended URI's from Twitter using Tweepy and Python. Specifically, I am interested in links that direct me outside of Twitter (so not back to other tweets/ retweets/ duplicates).
The code I wrote keeps giving me a Key error for "entities."
It will give me a few urls before breaking; some are extended, some are not. I have no idea how to go about fixing this.
Help me please!
Note: I left my credentials out.
Here is my code:
# Import the necessary methods from different libraries
import tweepy
from tweepy.streaming import StreamListener
from tweepy import OAuthHandler
from tweepy import Stream
import json
# Variables that contains the user credentials to access Twitter API
access_token = "enter token here"
access_token_secret = "enter token here"
consumer_key = "enter key here"
consumer_secret = "enter key here"
# Accessing tweepy API
# api = tweepy.API(auth)
# This is a basic listener that just prints received tweets to stdout.
class StdOutListener(StreamListener):
def on_data(self, data):
# resource: http://code.runnable.com/Us9rrMiTWf9bAAW3/how-to- stream-data-from-twitter-with-tweepy-for-python
# Twitter returns data in JSON format - we need to decode it first
decoded = json.loads(data)
# resource: http://socialmedia-class.org/twittertutorial.html
# Print each tweet in the stream to the screen
# Here we set it to stop after getting 1000 tweets.
# You don't have to set it to stop, but can continue running
# the Twitter API to collect data for days or even longer.
count = 1000
for url in decoded["entities"]["urls"]:
count -= 1
print "%s" % url["expanded_url"] + "\r\n\n"
if count <= 0:
break
def on_error(self, status):
print status
if __name__ == '__main__':
# This handles Twitter authetification and the connection to Twitter Streaming API
l = StdOutListener()
auth = OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)
stream = Stream(auth, l)
# This line filter Twitter Streams to capture data by the keyword: YouTube
stream.filter(track=['YouTube'])
It seems like the API is hitting a rate limit, so one option is to include an Exception when it gets a KeyError, I then see [u'limit']. I added a count display to verify it does get to 1000:
count = 1000 # moved outside of class definition to avoid getting reset
class StdOutListener(StreamListener):
def on_data(self, data):
decoded = json.loads(data)
global count # get the count
if count <= 0:
import sys
sys.exit()
else:
try:
for url in decoded["entities"]["urls"]:
count -= 1
print count,':', "%s" % url["expanded_url"] + "\r\n\n"
except KeyError:
print decoded.keys()
def on_error(self, status):
print status
if __name__ == '__main__':
l = StdOutListener()
auth = OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)
stream = Stream(auth, l)
stream.filter(track=['YouTube'])
I have this code here and it work perfectly.
# encoding=utf8
#Import the necessary methods from tweepy library
import sys
from tweepy import OAuthHandler
from tweepy import Stream
from tweepy.streaming import StreamListener
reload(sys)
sys.setdefaultencoding('utf8')
#Variables that contains the user credentials to access Twitter API
access_token = ""
access_token_secret = ""
consumer_key = ""
consumer_secret = ""
#This is a basic listener that just prints received tweets to stdout.
class StdOutListener(StreamListener):
def on_data(self, data):
#save data
with open('debate_data.txt', 'a') as tf:
tf.write((data).decode('unicode-escape').encode('utf-8'))
return True
def on_error(self, status):
print status
if __name__ == '__main__':
#This handles Twitter authetification and the connection to Twitter Streaming API
l = StdOutListener()
auth = OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)
stream = Stream(auth, l)
#This line filter Twitter Streams to capture data by the keywords: 'Bernier', 'Rossello', 'Bernabe'
stream.filter(track=['Bernier', 'Rosselló', 'Rossello', 'Bernabe', 'Lúgaro', 'Lugaro', 'María de Lourdes', 'Maria de Lourdes', 'Cidre'])
But when I run this other piece of code I get the wrong answer.
import json
import io
#save the tweets to this path
tweets_data_path = 'debate_data.txt'
tweets_data = []
with io.open(tweets_data_path, 'r') as tweets_file:
for line in tweets_file:
try:
tweet = json.loads(line)
tweets_data.append(tweet)
except:
continue
print len(tweets_data)
There are 42,188 Tweets on that file, but when I run the code Im only getting 291. I think is something with the encoding/decoding but I cant figure out what. Any help would be greatly appreciate.
I ran this example without any of the encoding/decoding and it worked perfectly.
http://adilmoujahid.com/posts/2014/07/twitter-analytics/
The reason of only getting 291 is the json.loads() throw some errors and except continue it.
I suggest you print the error just like:
except Exception as err:
print err
continue
now you know the error reason, and solve it.
Are you sure the format of data inside debate_data.txt are json ?
As agnewee said, I also recommend:
try:
tweet = json.loads(line)
except Exception as err:
print err # or use log to see what happen
else:
tweets_data.append(tweet)
Maybe you can help me. This following python code retrieves Twitter Streaming data and stops when 1000 tweet data are got. It works but returns the fields "created_at, screen_name, and text" separated by tab. Instead I'd like to get the data in JSON format. How can I set the code in order to get the data formatted in JSON?
# Import the necessary package to process data in JSON format
try:
import json
except ImportError:
import simplejson as json
# Import the necessary methods from "twitter" library
from twitter import Twitter, OAuth, TwitterHTTPError, TwitterStream
# Variables that contains the user credentials to access Twitter API
CONSUMER_KEY = '7pWHWtYlXM9ayJfUKv2F8v84B'
CONSUMER_SECRET = 'Dfcx10Px77Ggn0qGbCHc4TZC7M2IHsXpqk9CaGiCLzcr9VMX5n'
ACCESS_TOKEN = '245080367-zuLrIbxblOnocashgku9dsmDKgy3R7uU0VCTIRDx'
ACCESS_SECRET = 'wCx5ufD9Zft46hVjieLdv0af7p9DxUTsPgge9Zm2qelR9'
oauth = OAuth(ACCESS_TOKEN, ACCESS_SECRET, CONSUMER_KEY, CONSUMER_SECRET)
# Initiate the connection to Twitter Streaming API
twitter_stream = TwitterStream(auth=oauth)
# Get a sample of the public data following through Twitter
#iterator = twitter_stream.statuses.sample()
iterator = twitter_stream.statuses.filter(track="Euro2016", language="fr")
tweet_count = 1000
for tweet in iterator:
tweet_count -= 1
print (tweet['created_at'],"\t",tweet['user']['screen_name'],"\t",tweet['geo'], "\t",tweet['text'])
if tweet_count <= 0:
break
You can import tweepy (you need to install it first with pip) and override the listener class to be able to output the data in json format. Here is an example:
from tweepy import Stream
from tweepy.streaming import StreamListener
#Listener Class Override
class listener(StreamListener):
def on_data(self, data):
try:
tweet = json.loads(data)
with open('your_data.json', 'a') as my_file:
json.dump(tweet, my_file)
except BaseException:
print('Error')
pass
def on_error(self, status):
print(statuses)
my_listener=listener()
twitterStream = Stream(oauth, my_listener) #Inizialize Stream object
You can read more about tweepy here: http://docs.tweepy.org/en/v3.4.0/streaming_how_to.html
I am learning python and have started out a few weeks ago. I have tried to write a code to check for tweets with a particular hashtag in the streaming API and then reply to the tweet in case the a tweet has not been posted to the handle previously. While running the code, I have tried to avoid overstepping the rate limitations so as to not get any error. But there is an issue of duplicate status that Twitter raises once in a while. I would like the code to keep running and not stop on encountering an issue. Please help in this. The following is the code:
import tweepy
from tweepy import Stream
from tweepy.streaming import StreamListener
from tweepy import OAuthHandler
import json
import time
consumer_key =
consumer_secret =
access_token =
access_secret =
auth = OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_secret)
def check(status):
datafile = file('C:\Users\User\Desktop\Growth Handles.txt', 'r')
found = False
for line in datafile:
if status.user.screen_name in line:
found = True
break
return found
class MyListener(StreamListener):
def on_status(self, status):
f=status.user.screen_name
if check(status) :
pass
else:
Append=open('Growth Handles.txt' , 'a' )
Append.write(f + "\n")
Append.close()
Reply='#' + f + ' Check out Tomorrowland 2014 Setlist . http://.... '
api = tweepy.API(auth)
api.update_status(status=Reply)
time.sleep(45)
return True
def on_error(self, status):
print(status)
return True
twitter_stream = Stream(auth, MyListener())
twitter_stream.filter(track=['#musiclovers'])
In case, update_status method throws an error
try:
api.update_status(status=Reply)
except:
pass
In case twitter_stream gets disconnected.
twitter_stream = Stream(auth, MyListener())
while True:
twitter_stream.filter(track=['#musiclovers'])
Warning - Your app may got banned if it reaches certain limits, or their system caught you spamming. Check Twitter Rules