How to json dump tweepy stream into text file? - python

Hi I have looked at many guides and tutorials on how to do this, but I am having trouble with being able to use tweepy to store the JSON data in a text file.
class StreamListener(tweepy.StreamListener):
def on_status(self, status):
print(status)
def on_error(self, status):
print status
if status == 420:
return False
if __name__ == '__main__':
stream_listener = StreamListener()
auth = tweepy.OAuthHandler(consumer_token, consumer_secret)
auth.set_access_token(access_token, access_token_secret)
stream = tweepy.Stream(auth, stream_listener)
I have another python file which is supposed to read data into a list:
import pandas
import json
json_data = 'twitter_data.txt'
data_list = []
#load file
tweets_file = open(json_data, "r")
for line in tweets_file:
try:
tweet = json.loads(line) #this line causes problems
data_list.append(tweet)
except:
continue
print len(data_list)
I thought the data received from twitter comes in JSON format, and the guides I'm following all say it does, but it's actually in another object.
Should I just store everything in a list then json dump that list into the new text file?

It seems like you're on the right track. You can modify the stream listener to write tweets to a file directly.
Edit: this now writes out in JSON format.
#Import the necessary methods from tweepy library
from tweepy.streaming import StreamListener
from tweepy import OAuthHandler
from tweepy import Stream
from tweepy import API
#Variables that contains the user credentials to access Twitter API
CONSUMER_KEY = #YOUR CONSUMER KEY
CONSUMER_SECRET = #YOUR CONSUMER SECRET
ACCESS_TOKEN = #YOUR ACCESS TOKEN
ACCESS_TOKEN_SECRET = #YOUR ACCESS TOKEN SECRET
class FileWriteListener(StreamListener):
def __init__(self):
super(StreamListener, self).__init__()
self.save_file = open('tweets.json','w')
self.tweets = []
def on_data(self, tweet):
self.tweets.append(json.loads(tweet))
self.save_file.write(str(tweet))
def on_error(self, status):
print(status)
return True
auth = OAuthHandler(CONSUMER_KEY, CONSUMER_SECRET)
auth.set_access_token(ACCESS_TOKEN, ACCESS_TOKEN_SECRET)
api = API(auth)
twitter_stream = Stream(auth, MyListener())
# Here you can filter the stream by:
# - keywords (as shown)
# - users
twitter_stream.filter(track=['hello'])
This code will run indefinitely, so you either need to exit the process after some time (Ctrl-C) or modify the code.
Then you can load the data:
import json
json_data = []
with open('tweets.json','r') as f:
json_data.append( json.loads(f.readline()) )
Hope this helps!

I think something like this may be what your looking for.
def on_status(self, tweet):
json_dumps = json.dumps(tweet._json)
tweet_json = json.loads(json_dumps)
print(tweet_json['created_at'])
These are all the keys you can use in the tweet_json[ ]
dict_keys(['created_at', 'id', 'id_str', 'text', 'source', 'truncated', 'in_reply_to_status_id', 'in_reply_to_status_id_str', 'in_reply_to_user_id', 'in_reply_to_user_id_str', 'in_reply_to_screen_name', 'user', 'geo', 'coordinates', 'place', 'contributors', 'retweeted_status', 'is_quote_status', 'quote_count', 'reply_count', 'retweet_count', 'favorite_count', 'entities', 'favorited', 'retweeted', 'filter_level', 'lang', 'timestamp_ms'])

Related

save the result of lookup_users method in json file

I am trying to fetch user metadata using tweepy by user screen name and save the result as JSON file. Here is my code
import tweepy
from tweepy import Stream
from tweepy.streaming import StreamListener
from tweepy import OAuthHandler
CONSUMER_KEY = 'xxx'
CONSUMER_SECRET = 'xxx'
ACCESS_KEY = 'xxx'
ACCESS_SECRET = 'xxx'
auth = OAuthHandler(CONSUMER_KEY,CONSUMER_SECRET)
api = tweepy.API(auth)
auth.set_access_token(ACCESS_KEY, ACCESS_SECRET)
class TweetListener(StreamListener):
# A listener handles tweets are the received from the stream.
#This is a basic listener that just prints received tweets to standard output
def on_data(self, data):
print (data)
return True
def on_error(self, status):
print (status)
#search
api = tweepy.API(auth)
twitterStream = Stream(auth,TweetListener())
#name is list contains user screen names
test = api.lookup_users(screen_names= name)
for user in test:
print (user.screen_name)
print (user.name)
print (user.description)
print (user.followers_count)
print (user.statuses_count)
my code runs without any error and as you can see, I am printing the data but my intend is to save them in JSON file
I tried different cods but nothing works for me. So please any help?
JSON is just a way to save data in specific format.
In order to save your data, you first need to store it properly, and then just dump it.
the format is dict, that holds data separated by keys, and each key is a map.
in your case, I chose 'users' as a key.
each user in 'users' have keys (name, descripition, etc) and values from your list.
you need to do something like that:
import json
data = {'users': []}
for user in test:
data['users'].append({
'screen_name': user.screen_name,
'name': user.name,
'description': user.description,
'followers_count': user.followers_count,
'statuses_count': user.statuses_count
})
with open('data.txt', 'w') as outfile:
json.dump(data, outfile)

Avoid rate limitation (Error: 420) in twitter streaming api

I am trying to fetch streaming data from the twitter-streaming-api using the tweepy library in python. However, even after a lot of trials I am not able to get any data or print it as done in the on_data method. It's giving the 420 error message. How can I avoid it?
import io
import json
import time
import tweepy
access_token = 'XXXXXX'
access_token_secret = 'XXXXXX'
consumer_key = 'XXXXXX'
consumer_secret = 'XXXXXX'
class MyListener(tweepy.StreamListener):
def on_status(self, status):
print(status.text)
def on_data(self, tweetdata):
data = json.loads(tweetdata)
print(data)
def on_error(self, status):
print(status)
auth = tweepy.OAuthHandler(consumer_secret=consumer_secret,consumer_key=consumer_key)
auth.set_access_token(access_token,access_token_secret)
api = tweepy.API(auth)
myListener = MyListener()
myStream = tweepy.Stream(auth = api.auth, listener=myListener)
myStream.filter(languages='en',track=['#NBA'],async=True)
myStream.disconnect()

How to get twitter data of tweets within a certain time frame?

what do I put in my code to make it where I can force the program to stop printing data when the tweets data back to a certain point. For example, how can I get all tweets about Verratti from within a month of running this?
from tweepy.streaming import StreamListener
from tweepy import OAuthHandler
from tweepy import Stream
import json
access_token = the code
access_token_secret = the code
consumer_key = the code
consumer_secret = the code
#print
class StdOutListener(StreamListener):
def on_data(self, data):
print (json.loads(data)['text'])
return True
def on_error(self, status):
print (status)
#find
if __name__ == '__main__':
#This handles Twitter authetification and the connection to Twitter Streaming API
l = StdOutListener()
auth = OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)
stream = Stream(auth, l)
#This line filter Twitter Streams to capture data by the keywords: 'python', 'javascript', 'ruby'
stream.filter(track=['Verratti'])
Nice question. It turns out that the Twitter API only lets you look back one week from the current date. There is a way around it though, someone made a github library that can search for any timeframe using twitter's advanced search function and you don't even have to bother with the whole authentication process.
Check it out: https://github.com/Jefferson-Henrique/GetOldTweets-python

Twitter streaming formatting JSON Output

Maybe you can help me. This following python code retrieves Twitter Streaming data and stops when 1000 tweet data are got. It works but returns the fields "created_at, screen_name, and text" separated by tab. Instead I'd like to get the data in JSON format. How can I set the code in order to get the data formatted in JSON?
# Import the necessary package to process data in JSON format
try:
import json
except ImportError:
import simplejson as json
# Import the necessary methods from "twitter" library
from twitter import Twitter, OAuth, TwitterHTTPError, TwitterStream
# Variables that contains the user credentials to access Twitter API
CONSUMER_KEY = '7pWHWtYlXM9ayJfUKv2F8v84B'
CONSUMER_SECRET = 'Dfcx10Px77Ggn0qGbCHc4TZC7M2IHsXpqk9CaGiCLzcr9VMX5n'
ACCESS_TOKEN = '245080367-zuLrIbxblOnocashgku9dsmDKgy3R7uU0VCTIRDx'
ACCESS_SECRET = 'wCx5ufD9Zft46hVjieLdv0af7p9DxUTsPgge9Zm2qelR9'
oauth = OAuth(ACCESS_TOKEN, ACCESS_SECRET, CONSUMER_KEY, CONSUMER_SECRET)
# Initiate the connection to Twitter Streaming API
twitter_stream = TwitterStream(auth=oauth)
# Get a sample of the public data following through Twitter
#iterator = twitter_stream.statuses.sample()
iterator = twitter_stream.statuses.filter(track="Euro2016", language="fr")
tweet_count = 1000
for tweet in iterator:
tweet_count -= 1
print (tweet['created_at'],"\t",tweet['user']['screen_name'],"\t",tweet['geo'], "\t",tweet['text'])
if tweet_count <= 0:
break
You can import tweepy (you need to install it first with pip) and override the listener class to be able to output the data in json format. Here is an example:
from tweepy import Stream
from tweepy.streaming import StreamListener
#Listener Class Override
class listener(StreamListener):
def on_data(self, data):
try:
tweet = json.loads(data)
with open('your_data.json', 'a') as my_file:
json.dump(tweet, my_file)
except BaseException:
print('Error')
pass
def on_error(self, status):
print(statuses)
my_listener=listener()
twitterStream = Stream(oauth, my_listener) #Inizialize Stream object
You can read more about tweepy here: http://docs.tweepy.org/en/v3.4.0/streaming_how_to.html

Twitter User Profile can be extracted by this

I am able to extract the mentioned details about a twitter user using Tweepy API.
I want to do it for a list of users. Can anyone help me to this?
import tweepy
from tweepy import Stream
from tweepy import OAuthHandler
from tweepy.streaming import StreamListener
CONSUMER_KEY = 'ABC'
CONSUMER_SECRET = 'ABC'
ACCESS_KEY = 'ABC'
ACCESS_SECRET = 'ABC'
class TweetListener(StreamListener):
# A listener handles tweets are the received from the stream.
#This is a basic listener that just prints received tweets to standard output
def on_data(self, data):
print data
return True
def on_error(self, status):
print status
auth = OAuthHandler(CONSUMER_KEY,CONSUMER_SECRET)
api = tweepy.API(auth)
auth.set_access_token(ACCESS_KEY, ACCESS_SECRET)
twitterStream = Stream(auth,TweetListener())
user = api.get_user('User Name')
print user.screen_name
print user.description
print user.followers_count
print user.statuses_count
print user.url
This code is ready to use anyone can use it with his/her own credentials for a single user profile.
Finally exercising and reading a lot I get the answer to my question.you can try this
import tweepy
from tweepy import Stream
from tweepy.streaming import StreamListener
from tweepy import OAuthHandler
CONSUMER_KEY = 'ABC'
CONSUMER_SECRET = 'ABC'
ACCESS_KEY = 'ABC'
ACCESS_SECRET = 'ABC'
auth = OAuthHandler(CONSUMER_KEY,CONSUMER_SECRET)
api = tweepy.API(auth)
auth.set_access_token(ACCESS_KEY, ACCESS_SECRET)
class TweetListener(StreamListener):
# A listener handles tweets are the received from the stream.
#This is a basic listener that just prints received tweets to standard output
def on_data(self, data):
print data
return True
def on_error(self, status):
print status
#search
api = tweepy.API(auth)
twitterStream = Stream(auth,TweetListener())
test = api.lookup_users(user_ids=['17006157','59145948','157009365'])
for user in test:
print user.screen_name
print user.name
print user.description
print user.followers_count
print user.statuses_count
print user.url
This code is ready to use just put your valid keys in place of ABC & get the users profile.you need to get the IDs first.
Your code simply interacts with your twitter account; to find information on a specific user or group of users you should look them up using the api.lookup_users(user_ids=[]) query.
You'd do it like this:
#boring auth you already have
import tweepy
from tweepy import OAuthHandler
CONSUMER_KEY = 'ABC'
CONSUMER_SECRET = 'ABC'
ACCESS_KEY = 'ABC'
ACCESS_SECRET = 'ABC'
auth = OAuthHandler(CONSUMER_KEY,CONSUMER_SECRET)
api = tweepy.API(auth)
auth.set_access_token(ACCESS_KEY, ACCESS_SECRET)
#search
api = tweepy.API(auth)
test = api.lookup_users(user_ids=['1123728482,5539932'])
This gives you a list of two tweepy.models.User objects:
[<tweepy.models.User object at 0x103995090>, <tweepy.models.User object at 0x1039950d0>]
You can replace the list in user_ids with a list of up to 100 ids, twitter won't let you search any more than that at once, though. Once you have your list of User objects, you can access different properties (for a list, check out the tweepy doc for the User class, line 113).

Categories

Resources