After reading streaming with Tweepy and going through this example. I tried to write a tweepy app to crawl live stream data with the tweepy Api and save it to .csv file. When I run my code, it returns empty csv file ('OutputStreaming.csv') with column names['Date', 'Text', 'Location','Number_Follower','User_Name', 'Friends_count','Hash_Tag], not the stream tweets. I also tried to do it in this way also this one, but I am getting the same out put with my code:-
def on_status(self, status):
with open('OutputStreaming.csv', 'w') as f:
f.write(['Author,Date,Text')
writer = csv.writer(f)
writer.writerow([status.created_at.strftime("%Y-%m-%d \
%H:%M:%S")status.text.encode,
status.location,
status.Number_of_follwers,
status.author.screen_name,
status.friends_count])
I got stuck. I can’t figure out where is the problem with the code, my code look like this:-
import tweepy
from tweepy.streaming import StreamListener
from tweepy import OAuthHandler
from tweepy import Stream
import json #data
#Variables that contains the user credentials to access Twitter API
access_token = "***"
access_token_secret = "***"
consumer_key = "***"
consumer_key_secret = "***"
auth = tweepy.OAuthHandler(consumer_key, consumer_key_secret)
auth.set_access_token(access_token, access_token_secret)
#setup api
api = tweepy.API(auth)
class CustomStreamListener(tweepy.StreamListener):
def on_data(self,data):
if data:
tweet_json = json.loads(data)
if tweet_json:
if not tweet_json['text'].strip().startswith('RT '):
Created = data.created_at.strftime("%Y-%m-%d-%H:%M:%S")`
Text = data.text.encode('utf8')
Location = data.location('utf8')
Follower = data.Number_of_follwers('utf8')
Name = data.author.screen_name('utf8')
Friend = data.friends_count('utf8')
with open('OutputStreaming.csv', 'a') as f:
writer = csv.writer(f)
writer.writerow([Created, Text ,Loaction\
,Follower ,Name ,Friend,status.entities.get('hashtags')])
Time.sleep(10)
return True
def on_error(self, status_code):
if status_code == 420:
return False
else:
print >> sys.stderr, 'Encountered error with status code:',\
status_code
def on_timeout(self):
print >> sys.stderr, 'Timeout...'
return True
# Writing csv titles
with open('OutputStreaming.csv', 'a') as f:
writer = csv.writer(f)
writer.writerow(['Date', 'Text', 'Location','Number_Follower',
'User_Name', 'Friends_count','Hash_Tag'])
if __name__ == '__main__':
l = CustomStreamListener()
streamingAPI = tweepy.streaming.Stream(api.auth, l)
streamingAPI.filter(track=['#Yoga','#Meditation'])
Here is a working code :
#!/usr/bin/python3
# coding=utf-8
import tweepy
SEP = ';'
csv = open('OutputStreaming.csv','a')
csv.write('Date' + SEP + 'Text' + SEP + 'Location' + SEP + 'Number_Follower' + SEP + 'User_Name' + SEP + 'Friends_count\n')
class MyStreamListener(tweepy.StreamListener):
def on_status(self, status):
Created = status.created_at.strftime("%Y-%m-%d-%H:%M:%S")
Text = status.text.replace('\n', ' ').replace('\r', '').replace(SEP, ' ')
Location = ''
if status.coordinates is not None:
lon = status.coordinates['coordinates'][0]
lat = status.coordinates['coordinates'][1]
Location = lat + ',' + lon
Follower = str(status.user.followers_count)
Name = status.user.screen_name
Friend = str(status.user.friends_count)
csv.write(Created + SEP + Text + SEP + Location + SEP + Follower + SEP + Name + SEP + Friend + '\n')
def on_error(self, status_code):
print(status_code)
consumer_key = '***'
consumer_secret = '***'
access_token = '***'
access_token_secret = '***'
# stream
auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)
myStream = tweepy.Stream(auth, MyStreamListener())
myStream.filter(track=['#Yoga','#Meditation'])
Related
i'am using tweepy and i dont reach to tweet with an image in my response. When i run my code the console says "tweepy.errors.HTTPException: 413 Payload Too Large"
Is anyone can help ! thanks
The console
#!/usr/bin/env python
from msilib.schema import Media
from urllib import response
import tweepy, random
import time
import pickle
##########################################################################################
CONSUMER_KEY = "xxxxxxxxxxxxxxxxxx" #
CONSUMER_SECRET = "xxxxxxxxxxxxxxxxxxx" #
ACCESS_TOKEN = "xxxxxxxxxxxxxxxxxxxxxxxxxxx" #
ACCESS_TOKEN_SECRET = "xxxxxxxxxxxxxxxxxxxxx" #
##########################################################################################
processed_tweets = []
try:
with open('twts.pkl', 'rb') as f:
processed_tweets=pickle.load(f)
except FileNotFoundError:
pass
auth = tweepy.OAuthHandler(CONSUMER_KEY, CONSUMER_SECRET)
auth.set_access_token(ACCESS_TOKEN, ACCESS_TOKEN_SECRET)
api = tweepy.API(auth)
twt = api.search_tweets("#NFT",result_type="Drop",count=1)
for s in twt:
if s.id not in processed_tweets:
time.sleep(3)
sn = s.user.screen_name
m = "#%s " %sn + random.choice(open('tweets.txt').readlines()).strip("\n")
media = api.media_upload("devil.png")
post_result = api.update_status(status=m, media_ids=[media.media_id], in_reply_to_status_id = s.id)
processed_tweets.append(s.id)
print(s.id)
with open('twts.pkl', 'wb') as f:
pickle.dump(processed_tweets, f)
print("Done replying!")
I have a code like as below.. but there is something wrong in my main code.. Anyone can help me to spot the problem... I am literally stuck at this indentation error... It says there is indentation error occurered at except: in the main function...
import tweepy
from datetime import datetime,timedelta
import csv
def get_all_tweets(screen_name):
consumer_key = *
consumer_secret = *
access_key = *
access_secret = *
#authorize twitter, initialize tweepy
auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_key, access_secret)
api = tweepy.API(auth, wait_on_rate_limit_notify=True)
noRT = []
search_terms = 'superbowl ', 'super bowl ', '#superbowl'
for page in tweepy.Cursor(api.user_timeline,
screen_name = screen_name,
tweet_mode="extended",
wait_on_rate_limit=True,
include_retweets=False,
count=100).pages(20):
for status in page:
noRT.append([status.id_str, status.created_at, status.full_text.encode("utf-8")])
# do your process on status
with open('{}_tweets.csv'.format(screen_name), 'w') as f:
writer = csv.writer(f)
writer.writerow(["id","created_at","text"])
writer.writerows(noRT)
print('{}_tweets.csv was successfully created.'.format(screen_name))
pass
if __name__ == '__main__':
usernames = ["GregoryBlakley","Minihova","TheAMRCentre","throse_gd","CDCgov","TheAMRCentre","GuyFema","EndGameWW3","ABC","Childishnegrit0","WorldPeace24_7","byetofi","mumabear13"]
for x in usernames:
try:
get_all_tweets(x)
except:
print "%s does not exist" % (twitter_id)
pass
What's wrong with this code?
if __name__ == '__main__':
usernames = ["GregoryBlakley","Minihova","TheAMRCentre","throse_gd","CDCgov","TheAMRCentre","GuyFema","EndGameWW3","ABC","Childishnegrit0","WorldPeace24_7","byetofi","mumabear13"]
for x in usernames:
try:
get_all_tweets(x)
except:
print "%s does not exist" % (twitter_id)
pass
You're mixing tabs and spaces inside your for loops, e.g.:
Python 3 disallows mixing the use of tabs and spaces for indentation.
I'm new to python programming and Twitter API.
I tired to collect tweets with a hashtag from a specific time period(say 11/24/216-11/27/2017), my goal is to get coordinates from those extracted tweets and save the coordinates and the tweet text into a csv file.
But my problem is that i don't know how to set the time filter and save them into a file. What's more, only a few tweets contained the coordinates, was that common?
Here are the python scripts that i found online.
import json
from tweepy import Stream
from tweepy import OAuthHandler
from tweepy.streaming import StreamListener
#Enter Twitter API Key information
consumer_key = ''
consumer_secret = ''
access_token = ''
access_secret = ''
file = open("C:\\Output.csv", "w") #This script didn't work on my Mac#
strong text
file.write("X,Y\n")
data_list = []
count = 0
class listener(StreamListener):
def on_data(self, data):
global count
#How many tweets you want to find, could change to time based
if count <= 2000:
json_data = json.loads(data)
coords = json_data["coordinates"]
if coords is not None:
print coords["coordinates"]
lon = coords["coordinates"][0]
lat = coords["coordinates"][1]
data_list.append(json_data)
file.write(str(lon) + ",")
file.write(str(lat) + "\n")
count += 1
return True
else:
file.close()
return False
def on_error(self, status):
print status
auth = OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_secret)
twitterStream = Stream(auth, listener())
#What you want to search for here
twitterStream.filter(track=[""])
With the following part of code i open a csv file and write the text of a tweet in the first column
from tweepy import Stream
from tweepy import OAuthHandler
from tweepy.streaming import StreamListener
import time
ckey = ''
csecret = ''
atoken = ''
asecret = ''
class listener(StreamListener):
def on_data(self,data):
try:
#print data
text = data.split(',"text":"')[1].split('","source')[0]
print text
saveThis = str (time.time())+'::'+text
saveFile = open('tweets3.csv','a')
saveFile.write(saveThis)
saveFile.write('\n')
saveFile.close()
except BaseException, e:
print 'failed on data',str(e)
time.sleep(5)
return True
def on_error (self,status):
print status
auth = OAuthHandler (ckey, csecret)
auth.set_access_token(atoken, asecret)
twitterStream = Stream(auth, listener())
result = twitterStream.filter(track=["zika"], languages=['en'])
my output looks like this
I want to store for each tweet the username of the one that makes it in column B and the number of his followers in column C. Can anybody help?
I am super new to Python so forgive me for my lack of knowledge haha but for some reason I cannot get Python to insert rows in my database. Here is what I have:
import sys, arcpy, datetime, tweepy
consumer_key = " "
consumer_secret = " "
access_token = " "
access_token_secret = " "
auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)
api = tweepy.API(auth)
table = r"C:\....dbf"
rows = arcpy.InsertCursor(table)
class CustomStreamListener(tweepy.StreamListener):
def on_status(self, status):
try:
user = status.user.screen_name
tweet = status.text
coord_x = status.coordinates['coordinates'][0]
coord_y = status.coordinates['coordinates'][1]
date_utc = status.created_at
h_m_s_utc = (str(status.created_at.hour))+':'+(str(status.created_at.minute))+':'+(str(status.created_at.second))
date_est = datetime.datetime.now()
h_m_s_est = (str(date_est.hour))+':'+(str(date_est.minute))+':'+(str(date_est.second))
row.user_name=user
row.tweet=tweet
row.coord_x=coord_x
row.coord_y=coord_y
row.date_utc=date_utc
row.h_m_s_utc=h_m_s_utc
row.date_est=date_est
rows.insertRow(row)
del row, rows
insert_table= r"C:\....dbf"
insert_row(insert_table)
print user
print tweet
except:
# If there are no coordinates for a tweet, then pass
pass
def on_error(self, status_code):
print >> sys.stderr, 'Encountered error with status code:', status_code
return True # Don't kill the stream
def on_timeout(self):
print >> sys.stderr, 'Timeout...'
return True # Don't kill the stream
# ----------------Script execution----------------
listener = tweepy.streaming.Stream(auth, CustomStreamListener())
listener.filter(track=[' love ', '#love'])
I am pretty sure it has something to do with the row.rowID thing.
Sorry if it is a disaster! Any help is much appreciated!
I looks like you're forgetting to call the data access (.da) method for the insert cursor.
with arcpy.da.InsertCursor(in_table, field_names) as inCursor:
for row in rows:
inCursor.insertRow(row) # example
-or-
inCursor = arcpy.da.InsertCursor(in_table, field_names)
for row in rows:
cursor.insertRow(row) # example
del inCursor # make sure to delete cursor if you do it this way as to avoid data lock.
Also, if you just want the Insert Cursor method, you can
from arcpy import da
For more info, check out:
http://resources.arcgis.com/en/help/main/10.2/index.html#//018w0000000t000000