Problems collecting 280 characters using Tweepy - python

I'm trying to extract tweets using the following code, and I just realized I'm only getting the first 140 characters. I'm a bit new at this and now I need to put tweet_mode=extended and full_text somewhere, so if someone could point out exactly where I'd be very appreciative. Thank you!
#!/usr/bin/env python
encoding: utf-8
import tweepy #https://github.com/tweepy/tweepy
import csv
#Twitter API credentials
consumer_key = "5f55VEYRnHuBvVESy11OrBayI"
consumer_secret = "r0PcvNast4FLYD1HNQiJIsIDGtk72hhVFPzR3BfrIWfuSn2SWD"
access_key = "949748064985722880-Wpc3hErpGEeDC75MBfcDoo07X9WVcAo"
access_secret = "w02RdHMg1izgaFlKUJH3C5s9cDNue2h8XJv87E3TE0Whm"
def get_all_tweets(screen_name):
#Twitter only allows access to a users most recent 3240 tweets with
this method
#authorize twitter, initialize tweepy
auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_key, access_secret)
api = tweepy.API(auth)
#initialize a list to hold all the tweepy Tweets
alltweets = []
#make initial request for most recent tweets (200 is the maximum
allowed count)
new_tweets = api.user_timeline(screen_name = screen_name,count=200,)
#save most recent tweets
alltweets.extend(new_tweets)
#save the id of the oldest tweet less one
oldest = alltweets[-1].id - 1
#keep grabbing tweets until there are no tweets left to grab
while len(new_tweets) > 0:
print "getting tweets before %s" % (oldest)
#all subsiquent requests use the max_id param to prevent duplicates
new_tweets = api.user_timeline(screen_name =
screen_name,count=200,max_id=oldest)
#save most recent tweets
alltweets.extend(new_tweets)
#update the id of the oldest tweet less one
oldest = alltweets[-1].id - 1
print "...%s tweets downloaded so far" % (len(alltweets))
#transform the tweepy tweets into a 2D array that will populate the csv
outtweets = [[tweet.id_str, tweet.created_at, tweet.text.encode("utf-
8"),tweet.retweet_count,tweet.favorite_count] for tweet in alltweets]
#write the csv
with open('%s_tweets.csv' % screen_name, 'wb') as f:
writer = csv.writer(f)
writer.writerow(["id","created_at","full_text","retweet_count","favorite_count"])
writer.writerows(outtweets)
pass
if __name__ == '__main__':
#pass in the username of the account you want to download
get_all_tweets("realdonaldtrump")

Put "tweet_mode=extended" here:
new_tweets = api.user_timeline(screen_name = screen_name,
count=200,
tweet_mode=extended)
And here:
while len(new_tweets) > 0:
new_tweets = api.user_timeline(screen_name = screen_name,
count=200,
max_id=oldest,
tweet_mode=extended)
Put "full_tweet" here:
outtweets = [[tweet.id_str,
tweet.created_at,
tweet.full_tweet.encode("utf-8"),
tweet.retweet_count,
tweet.favorite_count] for tweet in alltweets]

Related

Why do I keep getting a line 93 error saying TwitterClient not defined when it is defined in a class above?

This is for tweepy. It says
TwitterClient not defined.
import re
import tweepy
from tweepy import OAuthHandler
from textblob import TextBlob
class TwitterClient(object):
'''
Generic Twitter Class for sentiment analysis.
'''
def __init__(self):
'''
Class constructor or initialization method.
'''
# keys and tokens from the Twitter Dev Console
consumer_key = 'remove'
consumer_secret = 'remove'
access_token = 'remove-remove'
access_token_secret = 'remove'
# attempt authentication
try:
# create OAuthHandler object
self.auth = OAuthHandler(consumer_key, consumer_secret)
# set access token and secret
self.auth.set_access_token(access_token, access_token_secret)
# create tweepy API object to fetch tweets
self.api = tweepy.API(self.auth)
except:
print("Error: Authentication Failed")
def clean_tweet(self, tweet):
'''
Utility function to clean tweet text by removing links, special characters
using simple regex statements.
'''
return ' '.join(re.sub("(#[A-Za-z0-9]+)|([^0-9A-Za-z \t])|(\w+:\/\/\S+)", " ", tweet).split())
def get_tweet_sentiment(self, tweet):
'''
Utility function to classify sentiment of passed tweet
using textblob's sentiment method
'''
# create TextBlob object of passed tweet text
analysis = TextBlob(self.clean_tweet(tweet))
# set sentiment
if analysis.sentiment.polarity > 0:
return 'positive'
elif analysis.sentiment.polarity == 0:
return 'neutral'
else:
return 'negative'
def get_tweets(self, query, count = 10):
'''
Main function to fetch tweets and parse them.
'''
# empty list to store parsed tweets
tweets = []
try:
# call twitter api to fetch tweets
fetched_tweets = self.api.search(q = query, count = count)
# parsing tweets one by one
for tweet in fetched_tweets:
# empty dictionary to store required params of a tweet
parsed_tweet = {}
# saving text of tweet
parsed_tweet['text'] = tweet.text
# saving sentiment of tweet
parsed_tweet['sentiment'] = self.get_tweet_sentiment(tweet.text)
# appending parsed tweet to tweets list
if tweet.retweet_count > 0:
# if tweet has retweets, ensure that it is appended only once
if parsed_tweet not in tweets:
tweets.append(parsed_tweet)
else:
tweets.append(parsed_tweet)
# return parsed tweets
return tweets
except tweepy.TweepError as e:
#print error (if any)
print("Error : " + str(e))
def main():
#creating object of TwitterClient Class
api = TwitterClient()
#calling function to get tweets
tweets = api.get_tweets(query = 'ADF', count = 200)
#picking positive tweets from tweets
ptweets = [tweet for tweet in tweets if tweet['sentiment'] == 'positive']
# percentage of positive tweets
print("Positive tweets percentage: {} %".format(100*len(ptweets)/len(tweets)))
#picking negative tweets from tweets
ntweets = [tweet for tweet in tweets if tweet['sentiment'] == 'negative']
#percentage of negative tweets
print("Negative tweets percentage: {} %".format(100*len(ntweets)/len(tweets)))
#percentage of neutral tweets \
netweets = [tweet for tweet in tweets if tweet['sentiment'] == 'neutral']
print("Neutral tweets percentage: {} %".format(100*(len(netweets)/len(tweets))))
#printing first 5 positive tweets
print("\n\nPositive tweets:")
for tweet in ptweets[:10]:
print(tweet['text'])
#printing first 5 negative tweets
print("\n\nNegative tweets:")
for tweet in ntweets[:10]:
print(tweet['text'])
if __name__ == "__main__":
#calling main function
main()
Here is a cut-down version of your code which demonstrates the problem.
class TwitterClient(object):
None
def main():
api = TwitterClient()
print("main()")
if __name__ == "__main__":
main()
Note the indentation of both main() and if __name__ == "__main__" place them under the definition of TwitterClient itself. Hence the error, in Python 3:
Traceback (most recent call last):
File "twitter-55610165.py", line 2, in <module>
class TwitterClient(object):
File "twitter-55610165.py", line 11, in TwitterClient
main()
File "twitter-55610165.py", line 6, in main
api = TwitterClient()
NameError: name 'TwitterClient' is not defined
TwitterClient is not defined because the class definition of TwitterClient has not finished - you're still inside it. The if is at the class scope level, so runs at time of definition of the class. Indentation determines a lot about scope in Python.
With small but important changes in whitespace, to take main() and if __name__ ... out of the TwitterClient scope and put them back at the main scope, the problem goes away.
class TwitterClient(object):
None
def main():
api = TwitterClient()
print("main()")
if __name__ == "__main__":
main()
ie these constructs are now at the same indent level as TwitterClient, further left by one level of indentation.
$ python3 twitter-55610165.py
main()
An easy solution is to remove main() from the TwitterClient() definition.
The exact problem is that main() is inside the TwitterClient(), so in other words you haven't finished defining TwitterClient(), so python is throwing errors.
How to fix
The easiest solution is to move main() and if __name__... lines from the TwitterClient() definition. That will get rid of your current error. This code should work:
import re
import tweepy
from tweepy import OAuthHandler
from textblob import TextBlob
class TwitterClient(object):
'''
Generic Twitter Class for sentiment analysis.
'''
def init(self):
'''
Class constructor or initialization method.
'''
# keys and tokens from the Twitter Dev Console
consumer_key = 'remove'
consumer_secret = 'remove'
access_token = 'remove-remove'
access_token_secret = 'remove'
# attempt authentication
try:
# create OAuthHandler object
self.auth = OAuthHandler(consumer_key, consumer_secret)
# set access token and secret
self.auth.set_access_token(access_token, access_token_secret)
# create tweepy API object to fetch tweets
self.api = tweepy.API(self.auth)
except:
print("Error: Authentication Failed")
def clean_tweet(self, tweet):
'''
Utility function to clean tweet text by removing links, special characters
using simple regex statements.
'''
return ' '.join(re.sub("(#[A-Za-z0-9]+)|([^0-9A-Za-z \t])|(\w+:\/\/\S+)", " ", tweet).split())
def get_tweet_sentiment(self, tweet):
'''
Utility function to classify sentiment of passed tweet
using textblob's sentiment method
'''
# create TextBlob object of passed tweet text
analysis = TextBlob(self.clean_tweet(tweet))
# set sentiment
if analysis.sentiment.polarity > 0:
return 'positive'
elif analysis.sentiment.polarity == 0:
return 'neutral'
else:
return 'negative'
def get_tweets(self, query, count = 10):
'''
Main function to fetch tweets and parse them.
'''
# empty list to store parsed tweets
tweets = []
try:
# call twitter api to fetch tweets
fetched_tweets = self.api.search(q = query, count = count)
# parsing tweets one by one
for tweet in fetched_tweets:
# empty dictionary to store required params of a tweet
parsed_tweet = {}
# saving text of tweet
parsed_tweet['text'] = tweet.text
# saving sentiment of tweet
parsed_tweet['sentiment'] = self.get_tweet_sentiment(tweet.text)
# appending parsed tweet to tweets list
if tweet.retweet_count > 0:
# if tweet has retweets, ensure that it is appended only once
if parsed_tweet not in tweets:
tweets.append(parsed_tweet)
else:
tweets.append(parsed_tweet)
# return parsed tweets
return tweets
except tweepy.TweepError as e:
#print error (if any)
print("Error : " + str(e))
def main():
#creating object of TwitterClient Class
api = TwitterClient()
#calling function to get tweets
tweets = api.get_tweets(query = 'ADF', count = 200)
#picking positive tweets from tweets
ptweets = [tweet for tweet in tweets if tweet['sentiment'] == 'positive']
#percentage of positive tweets
print("Positive tweets percentage: {} %".format(100*len(ptweets)/len(tweets)))
#picking negative tweets from tweets
ntweets = [tweet for tweet in tweets if tweet['sentiment'] == 'negative']
#percentage of negative tweets
print("Negative tweets percentage: {} %".format(100*len(ntweets)/len(tweets)))
#percentage of neutral tweets \
netweets = [tweet for tweet in tweets if tweet['sentiment'] == 'neutral']
print("Neutral tweets percentage: {} %".format(100*(len(netweets)/len(tweets))))
#printing first 5 positive tweets
print("\n\nPositive tweets:")
for tweet in ptweets[:10]:
print(tweet['text'])
#printing first 5 negative tweets
print("\n\nNegative tweets:")
for tweet in ntweets[:10]:
print(tweet['text'])
if __name__ == "__main__":
#calling main function
main()

Tweepy still not returning full text despite using extended text feature

I am using tweepy to download tweets about a particular topic but nobody which tutorial I follow I cannot get the tweet to output as a full tweet. There is always an ellipse that cuts it off after a certain number of characters.
Here is the code I am using
import json
import tweepy
from tweepy import OAuthHandler
import csv
import sys
from twython import Twython
nonBmpMap = dict.fromkeys(range(0x10000, sys.maxunicode + 1), 0xfffd)
with open ('Twitter_Credentials.json') as cred_data:
info = json.load(cred_data)
consumer_Key = info['Consumer_Key']
consumer_Secret = info['Consumer_Secret']
access_Key = info['Access_Key']
access_Secret = info['Access_Secret']
maxTweets = int(input('Enter the Number of tweets that you want to extract '))
userTopic = input('What topic do you want to search for ')
topic = ('"' + userTopic + '"')
tweetCount = 0
auth = OAuthHandler(consumer_Key, consumer_Secret)
auth.set_access_token(access_Key, access_Secret)
api = tweepy.API(auth, wait_on_rate_limit=True)
tweets = api.search(q=topic, count=maxTweets, tweet_mode= 'extended')
for tweet in tweets:
tweetCount = (tweetCount+1)
with open ('TweetsAbout' + userTopic, 'a', encoding='utf-8') as the_File:
print(tweet.full_text.translate(nonBmpMap))
tweet = (str(tweet.full_text).translate(nonBmpMap).replace(',','').replace('|','').replace('\n','').replace('’','\'').replace('…',"end"))
the_File.write(tweet + "\n")
print('Extracted ' + str(tweetCount) + ' tweets about ' + topic)
Try this, see if it works!
try:
specific_tweets = tweepy.Cursor(api.search, tweet_mode='extended', q=<your_query_string> +" -filter:retweets", lang='en').items(500)
except tweepy.error.TweepError:
pass
for tweet in specific_tweets:
extracted_text = tweet.full_text
all the text your trying to extract should be in extracted_text. Good Luck!!

extract tweets with some special keywords from twitter using tweepy in python

here is my code..i want to extract tweets from twitter with some keywords....my code dont give any errors but i am not getting the output file generated...please help me........
import re
import csv
import tweepy
from tweepy import OAuthHandler
#TextBlob perform simple natural language processing tasks.
from textblob import TextBlob
def search():
#text = e.get() **************************
consumer_key = ''
consumer_secret = ''
access_token = ' '
access_token_secret = ' '
# create OAuthHandler object
auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
# set access token and secret
auth.set_access_token(access_token, access_token_secret)
# create tweepy API object to fetch tweets
api = tweepy.API(auth)
def get_tweets(query, count = 300):
# empty list to store parsed tweets
tweets = []
target = open("tweets.txt", 'w',encoding="utf-8")
t1 = open("review.txt", 'w',encoding="utf-8")
# call twitter api to fetch tweets
q=str(query)
a=str(q+" sarcasm")
b=str(q+" sarcastic")
c=str(q+" irony")
fetched_tweets = api.search(a, count = count)+ api.search(b, count = count)+ api.search(c, count = count)
# parsing tweets one by one
print(len(fetched_tweets))
for tweet in fetched_tweets:
# empty dictionary to store required params of a tweet
parsed_tweet = {}
# saving text of tweet
parsed_tweet['text'] = tweet.text
if "http" not in tweet.text:
line = re.sub("[^A-Za-z]", " ", tweet.text)
target.write(line+"\n")
t1.write(line+"\n")
return tweets
# creating object of TwitterClient Class
# calling function to get tweets
tweets = get_tweets(query =text, count = 20000)
root.mainloop()
From this code i am nor getting the output generated file. Can anyone tell me what i am doing wrong ?
Thanks in advance!
I just made some slight changes and it was working perfectly for me. Removed or commented some unnecessary statements (like the review file). Changed the open function to io.open since I have python version 2.7. Here is the running code, hope it helps!!
`
import re
import io
import csv
import tweepy
from tweepy import OAuthHandler
#TextBlob perform simple natural language processing tasks.
#from textblob import TextBlob
consumer_key = 'sz6x0nvL0ls9wacR64MZu23z4'
consumer_secret = 'ofeGnzduikcHX6iaQMqBCIJ666m6nXAQACIAXMJaFhmC6rjRmT'
access_token = '854004678127910913-PUPfQYxIjpBWjXOgE25kys8kmDJdY0G'
access_token_secret = 'BC2TxbhKXkdkZ91DXofF7GX8p2JNfbpHqhshW1bwQkgxN'
# create OAuthHandler object
auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
# set access token and secret
auth.set_access_token(access_token, access_token_secret)
# create tweepy API object to fetch tweets
api = tweepy.API(auth)
def get_tweets(query, count = 300):
# empty list to store parsed tweets
tweets = []
target = io.open("mytweets.txt", 'w', encoding='utf-8')
# call twitter api to fetch tweets
q=str(query)
a=str(q+" sarcasm")
b=str(q+" sarcastic")
c=str(q+" irony")
fetched_tweets = api.search(a, count = count)+ api.search(b, count = count)+ api.search(c, count = count)
# parsing tweets one by one
print(len(fetched_tweets))
for tweet in fetched_tweets:
# empty dictionary to store required params of a tweet
parsed_tweet = {}
# saving text of tweet
parsed_tweet['text'] = tweet.text
if "http" not in tweet.text:
line = re.sub("[^A-Za-z]", " ", tweet.text)
target.write(line+"\n")
return tweets
# creating object of TwitterClient Class
# calling function to get tweets
tweets = get_tweets(query ="", count = 20000)
`

How to export data from tweepy, to xlsx?

i need your help, i am trying to export the data mining by tweepy to an xlsx file by xlsxwriter, but it is giving error. How can I solve?
EDIT 1:
(One Solution, by user Eli Lopez)
Its worked, but wrote one line... How to solve this?
Or others solutions?
(There are many tweets caught, I need to export in xlsx.)
This is my code:
import json
import csv
import xlsxwriter
import tweepy
from tweepy import OAuthHandler
consumer_key = "my_key"
consumer_secret = "my_key"
access_token = "my_key"
access_token_secret = "my_key"
auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)
api = tweepy.API(auth)
def tweet_to_xlsx(tweet):
tweet_list = []
tweet_list.append([tweet.user.screen_name, tweet.text])
# tweet_list.append(tweet.text)
workbook = xlsxwriter.Workbook('tweet.xlsx')
worksheet = workbook.add_worksheet()
row = 0
col = 0
for user, tweet in tweet_list:
worksheet.write(row, col, user)
worksheet.write(row, col + 1, tweet)
row += 1
workbook.close()
results = api.search(q=name, lang=lang, count=tweetCount)
for tweet in results:
print(tweet.user.screen_name, "Twittou:", tweet.text)
tweet_to_xlsx(tweet)
Error:
Traceback (most recent call last):
File "extTwitter.py", line 113, in <module>
tweet_to_xlsx(tweet)
File "extTwitter.py", line 60, in tweet_to_xlsx
for user, tweet in tweet_list:
ValueError: too many values to unpack (expected 2)
When you are appending, you're appending items not lists
tweet_list = [USER, TWEET USER, TWEET]
what you want is a list of lists
tweet_list = [[USER, TWEET], [USER, TWEET]]
What your code should be:
tweet_list.append([tweet.user.screen_name, tweet.text])
# you could also use () instead of [] as tuples are faster

How to use since_id and max_id in tweepy?

I am trying to pull tweets using since_id and max_id. The problem with since_id is that it correctly prints all the tweets in my eclipse console but when I try to store it line by line as csv file it does not return me all the tweets. I tried running it 4-5 times but every time I get different number of tweets.The problem with max_id is that it is not running. My code is as follows and the traceback is included after the code (For since_id I am just replacing max_id with since_id only)
#!/usr/bin/python
import tweepy
import csv
from datetime import *
import time
access_token = ''
access_secret = ''
consumer_key = ''
consumer_secret = ''
auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_secret)
api = tweepy.API(auth)
#data = api.get_user('abhi1868sharma')#'mishra1_P_K'
csvFile = open('a.csv','a')
csvWriter = csv.writer(csvFile, delimiter=',')#, tweet.favourited
i = 1
tweets = tweepy.Cursor(api.user_timeline, id = '', max_id = 510064587115225000).items()
while True:
try:
for tweet in tweets:
csvWriter.writerow([i, tweet.retweet_count, tweet.favorite_count, str(tweet.id), tweet.created_at.hour,tweet.created_at.minute,tweet.created_at.weekday(),tweet.created_at.day,tweet.created_at.month,tweet.created_at.year,tweet.created_at ,tweet.text.encode('utf8'), tweet.user.id, tweet.geo, tweet.in_reply_to_user_id, tweet.in_reply_to_status_id_str, tweet.place, tweet.retweeted, tweet.truncated, tweet.source])
print i
i+=1
except tweepy.TweepError:
time.sleep(60 * 15)
continue
except StopIteration:
break
csvFile.close()
This is my traceback for the max_id (for since_id it is not throwing any error)
for tweet in tweets:
File "C:\Python27\lib\site-packages\tweepy\cursor.py", line 181, in next
self.current_page = self.page_iterator.next()
File "C:\Python27\lib\site-packages\tweepy\cursor.py", line 99, in next
data = self.method(max_id=self.max_id, parser=RawParser(), *self.args, **self.kargs)
TypeError: _call() got multiple values for keyword argument 'max_id'
One more thing as I am storing the file in csv file my tweet_ids get rounded off. Is there any way to get around it. And the tweet_id that I am using for since_id and max_id are also rounded out tweet ids.

Categories

Resources