I am using tweepy and python to gather tweets based on certain keywords and then writing those status updates (tweets) to a CSV file. I do not consider myself a programmer and I am really lost on this.
Here is the Error:
> Traceback (most recent call last):
File "./combined-tweepy.py", line 58, in <module>
sapi.filter(track=[topics])
File "/usr/local/lib/python2.7/dist-packages/tweepy/streaming.py", line 286, in filter
encoded_track = [s.encode(encoding) for s in track]
AttributeError: 'tuple' object has no attribute 'encode'
Here is the script:
#!/usr/bin/python
import sys
import re
import tweepy
import codecs
import datetime
consumer_key = ""
consumer_secret = ""
access_key = ""
access_secret = ""
auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_key, access_secret)
api = tweepy.API(auth)
# Create a list of topics
with open('termList.txt', 'r') as f:
topics = [line.strip() for line in f]
stamp = datetime.datetime.now().strftime('%Y-%m-%d-%H%M%S')
topicFile = open(stamp + '.csv', 'w+')
sapi = tweepy.streaming.Stream(auth, CustomStreamListener(topicFile))
sapi.filter(track=[topics])
class CustomStreamListener(tweepy.StreamListener):
def __init__(self, output_file, api=None):
super(CustomStreamListener, self).__init__()
self.num_tweets = 0
self.output_file = output_file
def on_status(self, status):
### Writes one tweet per line in the CSV file
cleaned = status.text.replace('\'','').replace('&','').replace('>','').replace(',','').replace("\n",'')
self.num_tweets = self.num_tweets + 1
if self.num_tweets < 500:
self.output_file.write(status.user.location.encode("UTF-8") + ',' + cleaned.encode("UTF-8") + "\n")
print ("capturing tweet from list")
# print status.user.location
return True
else:
return False
sys.exit("terminating")
def on_error(self, status_code):
print >> sys.stderr, 'Encountered error with status code:', status_code
return True # Don't kill the stream
def on_timeout(self):
print >> sys.stderr, 'Timeout...'
return True #Don't kill the stream
f.close()
Here's the definition of a tuple according to Python's documentation. It seems like one of the words in topics is a tuple.
I see other little errors. First, the way you wrote your code, you should call your functions after you have defined them. For example, these two lines
sapi = tweepy.streaming.Stream(auth, CustomStreamListener(topicFile))
sapi.filter(track=[topics])
should come after you have defined all the functions in
class CustomStreamListener(tweepy.StreamListener):
Also, there's no need to put topics in braces
sapi.filter(track=[topics])
since it's already a list according to this line
topics = [line.strip() for line in f]
Can you show us the content of termList.txt?
Related
After reading streaming with Tweepy and going through this example. I tried to write a tweepy app to crawl live stream data with the tweepy Api and save it to .csv file. When I run my code, it returns empty csv file ('OutputStreaming.csv') with column names['Date', 'Text', 'Location','Number_Follower','User_Name', 'Friends_count','Hash_Tag], not the stream tweets. I also tried to do it in this way also this one, but I am getting the same out put with my code:-
def on_status(self, status):
with open('OutputStreaming.csv', 'w') as f:
f.write(['Author,Date,Text')
writer = csv.writer(f)
writer.writerow([status.created_at.strftime("%Y-%m-%d \
%H:%M:%S")status.text.encode,
status.location,
status.Number_of_follwers,
status.author.screen_name,
status.friends_count])
I got stuck. I can’t figure out where is the problem with the code, my code look like this:-
import tweepy
from tweepy.streaming import StreamListener
from tweepy import OAuthHandler
from tweepy import Stream
import json #data
#Variables that contains the user credentials to access Twitter API
access_token = "***"
access_token_secret = "***"
consumer_key = "***"
consumer_key_secret = "***"
auth = tweepy.OAuthHandler(consumer_key, consumer_key_secret)
auth.set_access_token(access_token, access_token_secret)
#setup api
api = tweepy.API(auth)
class CustomStreamListener(tweepy.StreamListener):
def on_data(self,data):
if data:
tweet_json = json.loads(data)
if tweet_json:
if not tweet_json['text'].strip().startswith('RT '):
Created = data.created_at.strftime("%Y-%m-%d-%H:%M:%S")`
Text = data.text.encode('utf8')
Location = data.location('utf8')
Follower = data.Number_of_follwers('utf8')
Name = data.author.screen_name('utf8')
Friend = data.friends_count('utf8')
with open('OutputStreaming.csv', 'a') as f:
writer = csv.writer(f)
writer.writerow([Created, Text ,Loaction\
,Follower ,Name ,Friend,status.entities.get('hashtags')])
Time.sleep(10)
return True
def on_error(self, status_code):
if status_code == 420:
return False
else:
print >> sys.stderr, 'Encountered error with status code:',\
status_code
def on_timeout(self):
print >> sys.stderr, 'Timeout...'
return True
# Writing csv titles
with open('OutputStreaming.csv', 'a') as f:
writer = csv.writer(f)
writer.writerow(['Date', 'Text', 'Location','Number_Follower',
'User_Name', 'Friends_count','Hash_Tag'])
if __name__ == '__main__':
l = CustomStreamListener()
streamingAPI = tweepy.streaming.Stream(api.auth, l)
streamingAPI.filter(track=['#Yoga','#Meditation'])
Here is a working code :
#!/usr/bin/python3
# coding=utf-8
import tweepy
SEP = ';'
csv = open('OutputStreaming.csv','a')
csv.write('Date' + SEP + 'Text' + SEP + 'Location' + SEP + 'Number_Follower' + SEP + 'User_Name' + SEP + 'Friends_count\n')
class MyStreamListener(tweepy.StreamListener):
def on_status(self, status):
Created = status.created_at.strftime("%Y-%m-%d-%H:%M:%S")
Text = status.text.replace('\n', ' ').replace('\r', '').replace(SEP, ' ')
Location = ''
if status.coordinates is not None:
lon = status.coordinates['coordinates'][0]
lat = status.coordinates['coordinates'][1]
Location = lat + ',' + lon
Follower = str(status.user.followers_count)
Name = status.user.screen_name
Friend = str(status.user.friends_count)
csv.write(Created + SEP + Text + SEP + Location + SEP + Follower + SEP + Name + SEP + Friend + '\n')
def on_error(self, status_code):
print(status_code)
consumer_key = '***'
consumer_secret = '***'
access_token = '***'
access_token_secret = '***'
# stream
auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)
myStream = tweepy.Stream(auth, MyStreamListener())
myStream.filter(track=['#Yoga','#Meditation'])
I am currently trying to stream tweets for a project using Python, Elasticsearch and Kibana.
While running my Python script, I have an IndentationError and I don't understand why, can anyone help me through this problem ?
Thanks in advance.
My Python script :
import json
import tweepy
import textblob
import elasticsearch
from tweepy import OAuthHandler, Stream
from tweepy.streaming import StreamListener
from textblob import TextBlob
from elasticsearch import Elasticsearch
consumer_key = '...'
consumer_secret = '...'
access_token = '...'
access_token_secret = '...'
elastic_search = Elasticsearch()
class MyStreamListener(StreamListener):
def on_data(self, data):
dict_data = json.loads(data)
tweet = TextBlob(dict_data["text"])
print(tweet.sentiment.polarity)
if tweet.sentiment.polarity < 0:
sentiment = "negative"
elif tweet.sentiment.polarity == 0:
sentiment = "neutral"
else:
sentiment = "positive"
print(sentiment)
elastic_search.index(index="sentiment",
doc_type="test-type",
body={"author": dict_data["user"]["screen_name"],
"date": dict_data["created_at"],
"message": dict_data["text"],
"polarity": tweet.sentiment.polarity,
"subjectivity": tweet.sentiment.subjectivity,
"sentiment": sentiment})
return True
def on_failure(self, status):
print(status)
if __name__ == '__main__':
listener = MyStreamListener()
auth = OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)
stream = Stream(auth, listener)
stream.filter(track=['congress'])
# user_choice = input("Please choose a Hashtag... : ")
# retrieve_tweets = api.search(user_choice)
The error message :
File "sentiment.py", line 21
tweet = TextBlob(dict_data["text"])
^
IndentationError: unindent does not match any outer indentation level
You do have tabs there.
def on_data(self, data):
dict_data = json.loads(data)
# ^ tab and 4 spaces here
tweet = TextBlob(dict_data["text"])
# ^ 8 spaces here
print(tweet.sentiment.polarity)
# ^ ^ two tabs here (equal 16 spaces)
Note that the representation in SO site translates the tabs to spaces, but if you copy the source into a code editor, it reveals the tabs:
I'm new to python programming and Twitter API.
I tired to collect tweets with a hashtag from a specific time period(say 11/24/216-11/27/2017), my goal is to get coordinates from those extracted tweets and save the coordinates and the tweet text into a csv file.
But my problem is that i don't know how to set the time filter and save them into a file. What's more, only a few tweets contained the coordinates, was that common?
Here are the python scripts that i found online.
import json
from tweepy import Stream
from tweepy import OAuthHandler
from tweepy.streaming import StreamListener
#Enter Twitter API Key information
consumer_key = ''
consumer_secret = ''
access_token = ''
access_secret = ''
file = open("C:\\Output.csv", "w") #This script didn't work on my Mac#
strong text
file.write("X,Y\n")
data_list = []
count = 0
class listener(StreamListener):
def on_data(self, data):
global count
#How many tweets you want to find, could change to time based
if count <= 2000:
json_data = json.loads(data)
coords = json_data["coordinates"]
if coords is not None:
print coords["coordinates"]
lon = coords["coordinates"][0]
lat = coords["coordinates"][1]
data_list.append(json_data)
file.write(str(lon) + ",")
file.write(str(lat) + "\n")
count += 1
return True
else:
file.close()
return False
def on_error(self, status):
print status
auth = OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_secret)
twitterStream = Stream(auth, listener())
#What you want to search for here
twitterStream.filter(track=[""])
I am super new to Python so forgive me for my lack of knowledge haha but for some reason I cannot get Python to insert rows in my database. Here is what I have:
import sys, arcpy, datetime, tweepy
consumer_key = " "
consumer_secret = " "
access_token = " "
access_token_secret = " "
auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)
api = tweepy.API(auth)
table = r"C:\....dbf"
rows = arcpy.InsertCursor(table)
class CustomStreamListener(tweepy.StreamListener):
def on_status(self, status):
try:
user = status.user.screen_name
tweet = status.text
coord_x = status.coordinates['coordinates'][0]
coord_y = status.coordinates['coordinates'][1]
date_utc = status.created_at
h_m_s_utc = (str(status.created_at.hour))+':'+(str(status.created_at.minute))+':'+(str(status.created_at.second))
date_est = datetime.datetime.now()
h_m_s_est = (str(date_est.hour))+':'+(str(date_est.minute))+':'+(str(date_est.second))
row.user_name=user
row.tweet=tweet
row.coord_x=coord_x
row.coord_y=coord_y
row.date_utc=date_utc
row.h_m_s_utc=h_m_s_utc
row.date_est=date_est
rows.insertRow(row)
del row, rows
insert_table= r"C:\....dbf"
insert_row(insert_table)
print user
print tweet
except:
# If there are no coordinates for a tweet, then pass
pass
def on_error(self, status_code):
print >> sys.stderr, 'Encountered error with status code:', status_code
return True # Don't kill the stream
def on_timeout(self):
print >> sys.stderr, 'Timeout...'
return True # Don't kill the stream
# ----------------Script execution----------------
listener = tweepy.streaming.Stream(auth, CustomStreamListener())
listener.filter(track=[' love ', '#love'])
I am pretty sure it has something to do with the row.rowID thing.
Sorry if it is a disaster! Any help is much appreciated!
I looks like you're forgetting to call the data access (.da) method for the insert cursor.
with arcpy.da.InsertCursor(in_table, field_names) as inCursor:
for row in rows:
inCursor.insertRow(row) # example
-or-
inCursor = arcpy.da.InsertCursor(in_table, field_names)
for row in rows:
cursor.insertRow(row) # example
del inCursor # make sure to delete cursor if you do it this way as to avoid data lock.
Also, if you just want the Insert Cursor method, you can
from arcpy import da
For more info, check out:
http://resources.arcgis.com/en/help/main/10.2/index.html#//018w0000000t000000
I am a Python newbie and am trying to print error messages when using Tweepy to stream tweets. I used an endless loop in my streaming code because it generates InComplete Read errors otherwise. My aim is to print all the error messages I get while continuing to stream tweets, so that I am aware of errors other than the InComplete Read errors.
My streamListerner is:
# Code from http://badhessian.org/2012/10/collecting-real-time-twitter-data- with-the-streaming-api/ with minor modifications
import json, time, sys
from tweepy import StreamListener
# create an instance of a tweepy StreamListener to handle the incoming data.
class SListener(StreamListener):
def __init__(self, fprefix = 'streamer'):
# self.api = api or API()
self.counter = 0
self.fprefix = fprefix
self.output = open('../Dissertation/stream_3_data/' + fprefix + '.' + time.strftime('%Y%m%d-%H%M%S') + '.json', 'w')
self.delout = open('delete.txt', 'a')
def on_data(self, data):
if 'in_reply_to_status' in data:
self.on_status(data)
elif 'delete' in data:
delete = json.loads(data)['delete']['status']
if self.on_delete(delete['id'], delete['user_id']) is False:
return False
elif 'limit' in data:
if self.on_limit(json.loads(data)['limit']['track']) is False:
return False
elif 'warning' in data:
warning = json.loads(data)['warnings']
print warning['message']
return False
def on_status(self, status):
self.output.write(status)
self.counter += 1
if self.counter >= 5000: # New file is started every 5,000 tweets, tagged with prefix and a timestamp.
self.output.close()
self.output = open('../Dissertation/stream_3_data/' + self.fprefix + '.'
+ time.strftime('%Y%m%d-%H%M%S') + '.json', 'w')
self.counter = 0
return
def on_delete(self, status_id, user_id):
self.delout.write( str(status_id) + "\n")
return
def on_limit(self, track):
sys.stderr.write(track + "\n")
return
def on_error(self, status_code):
sys.stderr.write('Error: ' + str(status_code) + "\n")
return True # Don't kill the stream
def on_timeout(self):
sys.stderr.write("Timeout, sleeping for 60 seconds...\n")
time.sleep(60)
return True # Don't kill the stream
The part that seems to generate problems is when I try to use the streamlistener:
twitter_api = tweepy_oauth()
Q = "twitter.com"
locations = [101.615161,3.08115,101.753663,3.167507,
115.421372,39.43277,117.501099,41.05999,
120.858322,30.69094,121.9733,31.86889]
# Create a streaming API and set a timeout value of 60 seconds.
streaming_api = tweepy.streaming.Stream(twitter_api, SListener(), timeout=60)
# Used infinite loop from https://github.com/ryanmcgrath/twython/issues/288 cause
# I kept getting InComplete Read Error. Probably due to high volumes of tweets being sent to me at once
#Endless loop
while True:
try:
streaming_api.filter(follow=None, track=None, locations=locations, stall_warnings=True)
except:
e = sys.exc_info()[0] #Get exception info
print 'ERROR:',e #Print exception info
continue
My code does run and works, but I encounter the following error occasionally, which stops my entire stream:
---------------------------------------------------------------------------
IOError Traceback (most recent call last)
<ipython-input-4-fb45fa5d8307> in <module>()
34 streaming_api.filter(follow=None, track=None, locations=locations, stall_warnings=True)
35 except:
36 e = sys.exc_info()[0] #Get exception info
---> 37 print 'ERROR:',e #Print exception info
38 continue
IOError: [Errno 22] Invalid argument
The timing when the error appears is inconsistent - it ranges from 1h into the stream to an entire day into the stream.
I concluded that the issue is with the print statement because I replaced line 37 with
print 'Error'
and the same error message appears. I am not sure how to proceed when even the basic print statement does not work - any help would be great.