I am trying to fetch user metadata using tweepy by user screen name and save the result as JSON file. Here is my code
import tweepy
from tweepy import Stream
from tweepy.streaming import StreamListener
from tweepy import OAuthHandler
CONSUMER_KEY = 'xxx'
CONSUMER_SECRET = 'xxx'
ACCESS_KEY = 'xxx'
ACCESS_SECRET = 'xxx'
auth = OAuthHandler(CONSUMER_KEY,CONSUMER_SECRET)
api = tweepy.API(auth)
auth.set_access_token(ACCESS_KEY, ACCESS_SECRET)
class TweetListener(StreamListener):
# A listener handles tweets are the received from the stream.
#This is a basic listener that just prints received tweets to standard output
def on_data(self, data):
print (data)
return True
def on_error(self, status):
print (status)
#search
api = tweepy.API(auth)
twitterStream = Stream(auth,TweetListener())
#name is list contains user screen names
test = api.lookup_users(screen_names= name)
for user in test:
print (user.screen_name)
print (user.name)
print (user.description)
print (user.followers_count)
print (user.statuses_count)
my code runs without any error and as you can see, I am printing the data but my intend is to save them in JSON file
I tried different cods but nothing works for me. So please any help?
JSON is just a way to save data in specific format.
In order to save your data, you first need to store it properly, and then just dump it.
the format is dict, that holds data separated by keys, and each key is a map.
in your case, I chose 'users' as a key.
each user in 'users' have keys (name, descripition, etc) and values from your list.
you need to do something like that:
import json
data = {'users': []}
for user in test:
data['users'].append({
'screen_name': user.screen_name,
'name': user.name,
'description': user.description,
'followers_count': user.followers_count,
'statuses_count': user.statuses_count
})
with open('data.txt', 'w') as outfile:
json.dump(data, outfile)
Related
I'm trying to retrieve tweets from a single user_id as per the JSON data tweepy fetches.
The issue is im also retrieving all mentions of the user_id i.e. other people mentioning the user_id in any way (RT, Mentions etc)
I have quoted my script below. Please let me know if this is possible in tweepy at all.
the snippet streamer.filter(follow = ['25073877'], encoding = 'utf8') shows which user i am wanting to follow
Thank you in advance.
from __future__ import print_function
import tweepy
import json
import MySQLdb
from dateutil import parser
import Twitter_API
import DBConfig
Access_Token = ""
Access_Token_Secret = ""
Consumer_Key = ""
Consumer_Secret = ""
class StreamListener(tweepy.StreamListener):
def on_connect(self):
print("You are now connected to the streaming API.")
def on_error(self, status_code):
print('An Error has occured: ' + repr(status_code))
return False
def on_data(self, data):
try:
# Decode the JSON from Twitter
datajson = json.loads(data)
#grab the wanted data from the Tweet
text = datajson['extended_tweet']['full_text']
screen_name = datajson['user']['screen_name']
tweet_id = datajson['id']
created_at = parser.parse(datajson['created_at'])
replying_to = datajson['in_reply_to_screen_name']
#print out a message to the screen that we have collected a tweet
#print("Tweet collected at " + str(created_at))
print(text, screen_name, tweet_id, created_at, replying_to)
#insert the data into the MySQL database
#store_data(created_at, text, screen_name, tweet_id)
except Exception as e:
print(e)
auth = tweepy.OAuthHandler(Consumer_Key, Consumer_Secret)
auth.set_access_token(Access_Token, Access_Token_Secret)
#Set up the listener. The 'wait_on_rate_limit=True' is needed to help with Twitter API rate limiting.
listener = StreamListener(api=tweepy.API(wait_on_rate_limit=True))
streamer = tweepy.Stream(auth=auth, listener=listener)
#print("Tracking: " + str(WORDS))
streamer.filter(follow = ['25073877'], encoding = 'utf8')
Sorry for the bad indentation if any.
Hi I have looked at many guides and tutorials on how to do this, but I am having trouble with being able to use tweepy to store the JSON data in a text file.
class StreamListener(tweepy.StreamListener):
def on_status(self, status):
print(status)
def on_error(self, status):
print status
if status == 420:
return False
if __name__ == '__main__':
stream_listener = StreamListener()
auth = tweepy.OAuthHandler(consumer_token, consumer_secret)
auth.set_access_token(access_token, access_token_secret)
stream = tweepy.Stream(auth, stream_listener)
I have another python file which is supposed to read data into a list:
import pandas
import json
json_data = 'twitter_data.txt'
data_list = []
#load file
tweets_file = open(json_data, "r")
for line in tweets_file:
try:
tweet = json.loads(line) #this line causes problems
data_list.append(tweet)
except:
continue
print len(data_list)
I thought the data received from twitter comes in JSON format, and the guides I'm following all say it does, but it's actually in another object.
Should I just store everything in a list then json dump that list into the new text file?
It seems like you're on the right track. You can modify the stream listener to write tweets to a file directly.
Edit: this now writes out in JSON format.
#Import the necessary methods from tweepy library
from tweepy.streaming import StreamListener
from tweepy import OAuthHandler
from tweepy import Stream
from tweepy import API
#Variables that contains the user credentials to access Twitter API
CONSUMER_KEY = #YOUR CONSUMER KEY
CONSUMER_SECRET = #YOUR CONSUMER SECRET
ACCESS_TOKEN = #YOUR ACCESS TOKEN
ACCESS_TOKEN_SECRET = #YOUR ACCESS TOKEN SECRET
class FileWriteListener(StreamListener):
def __init__(self):
super(StreamListener, self).__init__()
self.save_file = open('tweets.json','w')
self.tweets = []
def on_data(self, tweet):
self.tweets.append(json.loads(tweet))
self.save_file.write(str(tweet))
def on_error(self, status):
print(status)
return True
auth = OAuthHandler(CONSUMER_KEY, CONSUMER_SECRET)
auth.set_access_token(ACCESS_TOKEN, ACCESS_TOKEN_SECRET)
api = API(auth)
twitter_stream = Stream(auth, MyListener())
# Here you can filter the stream by:
# - keywords (as shown)
# - users
twitter_stream.filter(track=['hello'])
This code will run indefinitely, so you either need to exit the process after some time (Ctrl-C) or modify the code.
Then you can load the data:
import json
json_data = []
with open('tweets.json','r') as f:
json_data.append( json.loads(f.readline()) )
Hope this helps!
I think something like this may be what your looking for.
def on_status(self, tweet):
json_dumps = json.dumps(tweet._json)
tweet_json = json.loads(json_dumps)
print(tweet_json['created_at'])
These are all the keys you can use in the tweet_json[ ]
dict_keys(['created_at', 'id', 'id_str', 'text', 'source', 'truncated', 'in_reply_to_status_id', 'in_reply_to_status_id_str', 'in_reply_to_user_id', 'in_reply_to_user_id_str', 'in_reply_to_screen_name', 'user', 'geo', 'coordinates', 'place', 'contributors', 'retweeted_status', 'is_quote_status', 'quote_count', 'reply_count', 'retweet_count', 'favorite_count', 'entities', 'favorited', 'retweeted', 'filter_level', 'lang', 'timestamp_ms'])
Maybe you can help me. This following python code retrieves Twitter Streaming data and stops when 1000 tweet data are got. It works but returns the fields "created_at, screen_name, and text" separated by tab. Instead I'd like to get the data in JSON format. How can I set the code in order to get the data formatted in JSON?
# Import the necessary package to process data in JSON format
try:
import json
except ImportError:
import simplejson as json
# Import the necessary methods from "twitter" library
from twitter import Twitter, OAuth, TwitterHTTPError, TwitterStream
# Variables that contains the user credentials to access Twitter API
CONSUMER_KEY = '7pWHWtYlXM9ayJfUKv2F8v84B'
CONSUMER_SECRET = 'Dfcx10Px77Ggn0qGbCHc4TZC7M2IHsXpqk9CaGiCLzcr9VMX5n'
ACCESS_TOKEN = '245080367-zuLrIbxblOnocashgku9dsmDKgy3R7uU0VCTIRDx'
ACCESS_SECRET = 'wCx5ufD9Zft46hVjieLdv0af7p9DxUTsPgge9Zm2qelR9'
oauth = OAuth(ACCESS_TOKEN, ACCESS_SECRET, CONSUMER_KEY, CONSUMER_SECRET)
# Initiate the connection to Twitter Streaming API
twitter_stream = TwitterStream(auth=oauth)
# Get a sample of the public data following through Twitter
#iterator = twitter_stream.statuses.sample()
iterator = twitter_stream.statuses.filter(track="Euro2016", language="fr")
tweet_count = 1000
for tweet in iterator:
tweet_count -= 1
print (tweet['created_at'],"\t",tweet['user']['screen_name'],"\t",tweet['geo'], "\t",tweet['text'])
if tweet_count <= 0:
break
You can import tweepy (you need to install it first with pip) and override the listener class to be able to output the data in json format. Here is an example:
from tweepy import Stream
from tweepy.streaming import StreamListener
#Listener Class Override
class listener(StreamListener):
def on_data(self, data):
try:
tweet = json.loads(data)
with open('your_data.json', 'a') as my_file:
json.dump(tweet, my_file)
except BaseException:
print('Error')
pass
def on_error(self, status):
print(statuses)
my_listener=listener()
twitterStream = Stream(oauth, my_listener) #Inizialize Stream object
You can read more about tweepy here: http://docs.tweepy.org/en/v3.4.0/streaming_how_to.html
When I run it, the terminal keeps tying "23851" in new rows, which is the number of followers of the first Twitter name in my file f; I believe this means that the pointer was not moving in file f, but I'm not sure how this should be done properly in Python 2) when I check my file f1, there's nothing, i.e. the program is not writing to f1 as expected.
import tweepy
from tweepy import Stream
from tweepy.streaming import StreamListener
from tweepy import OAuthHandler
CONSUMER_KEY = 'xxx'
CONSUMER_SECRET = 'xxx'
ACCESS_KEY = 'xxx'
ACCESS_SECRET = 'xxx'
auth = OAuthHandler(CONSUMER_KEY,CONSUMER_SECRET)
api = tweepy.API(auth)
auth.set_access_token(ACCESS_KEY, ACCESS_SECRET)
#Create Class First
class TweetListener(StreamListener):
# A listener handles tweets are the received from the stream.
#This is a basic listener that just prints received tweets to standard output
def on_data(self, data): # indented inside the class
print(data)
return True
def on_error(self, status):
print(status)
# open both files outside the loop
with open('Twitternames.txt') as f,open('followers_number.txt', 'a') as f1:
for x in f:
#search
api = tweepy.API(auth)
twitterStream = Stream(auth,TweetListener())
test = api.lookup_users(screen_names=['x'])
for user in test:
print(user.followers_count)
#print it out and also write it into a file
s = user.followers_count
f1.write(str(s) +"\n") # add a newline with +
#end of stackoverflow
f.close()
Actually there are some things to consider, There are some unwanted lines as well. So I will go line by line and explain the relevant things ,as we don't need any streaming data for counting the number of follower , so we need to import only tweepy and OauthHandler, so :
import tweepy
from tweepy import OAuthHandler
Now we need to set the 4 keys required for login so, This will go same as :
CONSUMER_KEY = 'xxxxxxxx' #Replace with the original values.
CONSUMER_SECRET = 'xxx' #Replace with the original values.
ACCESS_KEY = 'xxx' #Replace with the original values.
ACCESS_SECRET = 'xxx' #Replace with the original values.
auth = OAuthHandler(CONSUMER_KEY,CONSUMER_SECRET)
auth.set_access_token(ACCESS_KEY, ACCESS_SECRET)
api = tweepy.API(auth)
I don't guess you would need, StreamListner to just log the follower_count of various users. So I am skipping that part, However you can add that code snippet afterwards.
usernames_file = open('Twitternames.txt').readlines()
I am assuming the contents of Twitternames.txt to be in the following format(every username without # symbol and separated by a new line):
user_name_1
user_name_2
user_name_3
...
now the usernames_file would be list of strings usernames_file= ['user_name_1\n', 'user_name_2\n', 'user_name_3\n'] so now we have extracted the various usernames from the text file, but we need to get rid of that \n character at the end of each name. So we can use .strip() method.
usernames = []
for i in usernames_file:
usernames.append(i.strip())
>>> usernames = ['user_name_1', 'user_name_2', 'user_name_3']
Now we are ready to use the lookup_users method as this method takes a list of usernames as input.
So it may look something like this:
test = api.lookup_users(screen_names=usernames)
for user in test:
print(user.followers_count)
If you want to log the results to a .txt file then you can use:
log_file = open("log.txt", 'a')
test = api.lookup_users(screen_names=usernames)
for user in test:
print(user.followers_count)
log_file.write(user.name+" has "+str(user.followers_count)+" followers.\n")
log_file.close()
So the short and final code would look something like this:
import tweepy
from tweepy import OAuthHandler
CONSUMER_KEY = 'xxx'
CONSUMER_SECRET = 'xxx'
ACCESS_KEY = 'xxx'
ACCESS_SECRET = 'xxx'
auth = OAuthHandler(CONSUMER_KEY,CONSUMER_SECRET)
auth.set_access_token(ACCESS_KEY, ACCESS_SECRET)
api = tweepy.API(auth)
usernames_file = open('Twitternames.txt').readlines()
usernames = []
for i in usernames_file:
usernames.append(i.strip())
log_file = open("log.txt", 'a')
test = api.lookup_users(screen_names=usernames)
for user in test:
print(user.followers_count)
log_file.write(user.name+" has "+str(user.followers_count)+" followers.\n")
log_file.close()
I am able to extract the mentioned details about a twitter user using Tweepy API.
I want to do it for a list of users. Can anyone help me to this?
import tweepy
from tweepy import Stream
from tweepy import OAuthHandler
from tweepy.streaming import StreamListener
CONSUMER_KEY = 'ABC'
CONSUMER_SECRET = 'ABC'
ACCESS_KEY = 'ABC'
ACCESS_SECRET = 'ABC'
class TweetListener(StreamListener):
# A listener handles tweets are the received from the stream.
#This is a basic listener that just prints received tweets to standard output
def on_data(self, data):
print data
return True
def on_error(self, status):
print status
auth = OAuthHandler(CONSUMER_KEY,CONSUMER_SECRET)
api = tweepy.API(auth)
auth.set_access_token(ACCESS_KEY, ACCESS_SECRET)
twitterStream = Stream(auth,TweetListener())
user = api.get_user('User Name')
print user.screen_name
print user.description
print user.followers_count
print user.statuses_count
print user.url
This code is ready to use anyone can use it with his/her own credentials for a single user profile.
Finally exercising and reading a lot I get the answer to my question.you can try this
import tweepy
from tweepy import Stream
from tweepy.streaming import StreamListener
from tweepy import OAuthHandler
CONSUMER_KEY = 'ABC'
CONSUMER_SECRET = 'ABC'
ACCESS_KEY = 'ABC'
ACCESS_SECRET = 'ABC'
auth = OAuthHandler(CONSUMER_KEY,CONSUMER_SECRET)
api = tweepy.API(auth)
auth.set_access_token(ACCESS_KEY, ACCESS_SECRET)
class TweetListener(StreamListener):
# A listener handles tweets are the received from the stream.
#This is a basic listener that just prints received tweets to standard output
def on_data(self, data):
print data
return True
def on_error(self, status):
print status
#search
api = tweepy.API(auth)
twitterStream = Stream(auth,TweetListener())
test = api.lookup_users(user_ids=['17006157','59145948','157009365'])
for user in test:
print user.screen_name
print user.name
print user.description
print user.followers_count
print user.statuses_count
print user.url
This code is ready to use just put your valid keys in place of ABC & get the users profile.you need to get the IDs first.
Your code simply interacts with your twitter account; to find information on a specific user or group of users you should look them up using the api.lookup_users(user_ids=[]) query.
You'd do it like this:
#boring auth you already have
import tweepy
from tweepy import OAuthHandler
CONSUMER_KEY = 'ABC'
CONSUMER_SECRET = 'ABC'
ACCESS_KEY = 'ABC'
ACCESS_SECRET = 'ABC'
auth = OAuthHandler(CONSUMER_KEY,CONSUMER_SECRET)
api = tweepy.API(auth)
auth.set_access_token(ACCESS_KEY, ACCESS_SECRET)
#search
api = tweepy.API(auth)
test = api.lookup_users(user_ids=['1123728482,5539932'])
This gives you a list of two tweepy.models.User objects:
[<tweepy.models.User object at 0x103995090>, <tweepy.models.User object at 0x1039950d0>]
You can replace the list in user_ids with a list of up to 100 ids, twitter won't let you search any more than that at once, though. Once you have your list of User objects, you can access different properties (for a list, check out the tweepy doc for the User class, line 113).