Tweepy stream ignoring some twitter accounts - python

I am working on the finishing touches on my tweepy twitter bot for my LED. I have been testing it with different accounts, and something weird is going on. With my personal account I tweet and it does nothing. My filter doesn't detect the hashtags and it just ignores everything. My friends have been able to use it, so I decided to log onto a separate twitter account. I can tweet there and my bot sees it and recognizes it.
I really have no clue what could cause this. Is it a problem with my bot, or is it with the accounts?
import tweepy
import requests
import json
consumer_key = 'nein'
consumer_secret = 'das'
access_token = 'ist'
access_token_secret = 'böse'
auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)
api = tweepy.API(auth, wait_on_rate_limit=True)
user = api.me()
print(user.name)
counter = 0
class LEDStreamListener(tweepy.StreamListener):
def on_data(self, raw_data):
with open("tweets.json", "w") as write_file:
write_file.write(raw_data)
data = json.loads(raw_data)
variable_checker(data)
def on_error(self, status_code):
if status_code == 420:
print('Stream disconnect, because of rate limit error')
return False
else:
print('Unknown Error ' + status_code)
return False
def retweet_tweet(tweet_id):
api.retweet(tweet_id)
api.create_favorite(tweet_id)
def tag_checker(tag_list):
for i in range(0, len(tag_list)):
iterated_tag = tag_list[i]['text']
if iterated_tag == 'HUNTER_LED_ON':
return iterated_tag
elif iterated_tag == 'HUNTER_LED_OFF':
return iterated_tag
elif iterated_tag == 'led_test':
return iterated_tag
return ' '
def variable_checker(json_file):
if 'delete' in json_file:
# If the tweet was deleted do nothing
print('Delete')
else:
usr = json_file['user']['screen_name']
tweet_id = json_file['id_str']
print(tweet_id)
text = json_file['text']
tag_list = json_file['entities']['hashtags']
tag = tag_checker(tag_list)
data_check(usr, tweet_id, tag, text)
def data_check(twitter_user, tweet, tag, text):
if tag == 'HUNTER_LED_OFF' and not text.startswith('RT'):
requests.get('http://192.168.1.175/off')
retweet_tweet(tweet)
api.update_status('I turned the led off for you', tweet)
print('off')
return
elif tag == 'HUNTER_LED_ON' and not text.startswith('RT'):
requests.get('http://192.168.1.175/on')
retweet_tweet(tweet)
api.update_status('I turned the led on for you', tweet)
print('on')
return
elif tag == 'led_test' and not text.startswith('RT'):
retweet_tweet(tweet)
reply = 'Nice test bro *highfives* keep up the good work'
api.update_status('#%s %s' % (twitter_user, reply), in_reply_to_status_id=tweet)
print('tested')
return
elif twitter_user == 'realDonaldTrump':
api.create_favorite(tweet)
requests.get('http://192.168.1.175/trump')
print('Make America Great Again!')
return
else:
return
ledStreamListener = LEDStreamListener()
ledStream = tweepy.Stream(auth=api.auth, listener=ledStreamListener)
ledStream.filter(track=['#HUNTER_LED_OFF', '#HUNTER_LED_ON', '#led_test'])

It could have something to do with Twitter only making available about 1% of tweets for streaming to most users. Twitter does offer a "Firehose" account that gives you everything but it's rather expensive.
You could poll your users timeline every few seconds rather than using the streaming API.
new_tweets = api.user_timeline(user_id = user_id,count= 1)

Related

TabError: inconsistent use of tabs and spaces in indentation (except: ^)

I have a code like as below.. but there is something wrong in my main code.. Anyone can help me to spot the problem... I am literally stuck at this indentation error... It says there is indentation error occurered at except: in the main function...
import tweepy
from datetime import datetime,timedelta
import csv
def get_all_tweets(screen_name):
consumer_key = *
consumer_secret = *
access_key = *
access_secret = *
#authorize twitter, initialize tweepy
auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_key, access_secret)
api = tweepy.API(auth, wait_on_rate_limit_notify=True)
noRT = []
search_terms = 'superbowl ', 'super bowl ', '#superbowl'
for page in tweepy.Cursor(api.user_timeline,
screen_name = screen_name,
tweet_mode="extended",
wait_on_rate_limit=True,
include_retweets=False,
count=100).pages(20):
for status in page:
noRT.append([status.id_str, status.created_at, status.full_text.encode("utf-8")])
# do your process on status
with open('{}_tweets.csv'.format(screen_name), 'w') as f:
writer = csv.writer(f)
writer.writerow(["id","created_at","text"])
writer.writerows(noRT)
print('{}_tweets.csv was successfully created.'.format(screen_name))
pass
if __name__ == '__main__':
usernames = ["GregoryBlakley","Minihova","TheAMRCentre","throse_gd","CDCgov","TheAMRCentre","GuyFema","EndGameWW3","ABC","Childishnegrit0","WorldPeace24_7","byetofi","mumabear13"]
for x in usernames:
try:
get_all_tweets(x)
except:
print "%s does not exist" % (twitter_id)
pass
What's wrong with this code?
if __name__ == '__main__':
usernames = ["GregoryBlakley","Minihova","TheAMRCentre","throse_gd","CDCgov","TheAMRCentre","GuyFema","EndGameWW3","ABC","Childishnegrit0","WorldPeace24_7","byetofi","mumabear13"]
for x in usernames:
try:
get_all_tweets(x)
except:
print "%s does not exist" % (twitter_id)
pass
You're mixing tabs and spaces inside your for loops, e.g.:
Python 3 disallows mixing the use of tabs and spaces for indentation.

Get tweets with hashtag from specific time period

I'm new to python programming and Twitter API.
I tired to collect tweets with a hashtag from a specific time period(say 11/24/216-11/27/2017), my goal is to get coordinates from those extracted tweets and save the coordinates and the tweet text into a csv file.
But my problem is that i don't know how to set the time filter and save them into a file. What's more, only a few tweets contained the coordinates, was that common?
Here are the python scripts that i found online.
import json
from tweepy import Stream
from tweepy import OAuthHandler
from tweepy.streaming import StreamListener
#Enter Twitter API Key information
consumer_key = ''
consumer_secret = ''
access_token = ''
access_secret = ''
file = open("C:\\Output.csv", "w") #This script didn't work on my Mac#
strong text
file.write("X,Y\n")
data_list = []
count = 0
class listener(StreamListener):
def on_data(self, data):
global count
#How many tweets you want to find, could change to time based
if count <= 2000:
json_data = json.loads(data)
coords = json_data["coordinates"]
if coords is not None:
print coords["coordinates"]
lon = coords["coordinates"][0]
lat = coords["coordinates"][1]
data_list.append(json_data)
file.write(str(lon) + ",")
file.write(str(lat) + "\n")
count += 1
return True
else:
file.close()
return False
def on_error(self, status):
print status
auth = OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_secret)
twitterStream = Stream(auth, listener())
#What you want to search for here
twitterStream.filter(track=[""])

Twitter streaming stop collecting data

I've this following code that retrieves Twitter Streaming data and crete a JSON file. What I'd like to get is to stop the data collecting after fo eg.1000 tweets. How can I set the code?
#Import the necessary methods from tweepy library
from tweepy.streaming import StreamListener
from tweepy import OAuthHandler
from tweepy import Stream
# Other libs
import json
#Variables that contains the user credentials to access Twitter API
access_token = "XXX"
access_token_secret = "XXX"
consumer_key = "XXX"
consumer_secret = "XXX"
#This is a basic listener that just prints received tweets to stdout.
class StdOutListener(StreamListener):
def on_data(self, data):
try:
tweet = json.loads(data)
with open('your_data.json', 'a') as my_file:
json.dump(tweet, my_file)
except BaseException:
print('Error')
pass
def on_error(self, status):
print ("Error " + str(status))
if status == 420:
print("Rate Limited")
return False
if __name__ == '__main__':
#This handles Twitter authetification and the connection to Twitter Streaming API
l = StdOutListener()
auth = OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)
stream = Stream(auth, l)
stream.filter(track=['Euro2016', 'FRA', 'POR'], languages=['en'])
Here is a possible solution:
class StdOutListener(StreamListener):
tweet_number=0 # class variable
def __init__(self,max_tweets):
self.max_tweets=max_tweets # max number of tweets
def on_data(self, data):
self.tweet_number+=1
try:
tweet = json.loads(data)
with open('your_data.json', 'a') as my_file:
json.dump(tweet, my_file)
except BaseException:
print('Error')
pass
if self.tweet_number>=self.max_tweets:
sys.exit('Limit of '+str(self.max_tweets)+' tweets reached.')
def on_error(self, status):
print ("Error " + str(status))
if status == 420:
print("Rate Limited")
return False
l = StdOutListener(1000) # Here you can set your maximum number of tweets (1000 in this example)
After having defined the class variable tweet_number, I used the init() method to initialize a new StdOutListener object with the maximum number of tweets you want to collect. tweet_number is increased by 1 each time the on_data(data) method is called, causing the program to terminate when tweet_number>=max_tweets
P.S. You need to import sys for the code to work.
This is the 2.7 code I would use -- sorry, I do not know 3.0 as well... I think you want what is is on my second line. .items(1000) part...?
stackoverflow messed up my indentations in my code. I am also using tweepy.
CODE:
results = []
for tweet in tweepy.Cursor(api.search, q='%INSERT_SEARCH_VARIABLE HERE').items(1000): #THE 1000 IS WHERE YOU SAY SEARCH FOR 1000 TWEETS.
results.append(tweet)
print type(results)
print len(results)
def toDataFrame(tweets):
DataSet = pd.DataFrame()
DataSet['tweetID'] = [tweet.id for tweet in tweets]
DataSet['tweetText'] = [tweet.text for tweet in tweets]
DataSet['tweetRetweetCt'] = [tweet.retweet_count for tweet
in tweets]
DataSet['tweetFavoriteCt'] = [tweet.favorite_count for tweet
in tweets]
DataSet['tweetSource'] = [tweet.source for tweet in tweets]
DataSet['tweetCreated'] = [tweet.created_at for tweet in tweets]
DataSet['userID'] = [tweet.user.id for tweet in tweets]
DataSet['userScreen'] = [tweet.user.screen_name for tweet
in tweets]
DataSet['userName'] = [tweet.user.name for tweet in tweets]
DataSet['userCreateDt'] = [tweet.user.created_at for tweet
in tweets]
DataSet['userDesc'] = [tweet.user.description for tweet in tweets]
DataSet['userFollowerCt'] = [tweet.user.followers_count for tweet
in tweets]
DataSet['userFriendsCt'] = [tweet.user.friends_count for tweet
in tweets]
DataSet['userLocation'] = [tweet.user.location for tweet in tweets]
DataSet['userTimezone'] = [tweet.user.time_zone for tweet
in tweets]
return DataSet
#Pass the tweets list to the above function to create a DataFrame
tweet_frame = toDataFrame(results)
tweet_frame[0:999]

Using Python, Tweepy, Insert Cursor and Arcpy

I am super new to Python so forgive me for my lack of knowledge haha but for some reason I cannot get Python to insert rows in my database. Here is what I have:
import sys, arcpy, datetime, tweepy
consumer_key = " "
consumer_secret = " "
access_token = " "
access_token_secret = " "
auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)
api = tweepy.API(auth)
table = r"C:\....dbf"
rows = arcpy.InsertCursor(table)
class CustomStreamListener(tweepy.StreamListener):
def on_status(self, status):
try:
user = status.user.screen_name
tweet = status.text
coord_x = status.coordinates['coordinates'][0]
coord_y = status.coordinates['coordinates'][1]
date_utc = status.created_at
h_m_s_utc = (str(status.created_at.hour))+':'+(str(status.created_at.minute))+':'+(str(status.created_at.second))
date_est = datetime.datetime.now()
h_m_s_est = (str(date_est.hour))+':'+(str(date_est.minute))+':'+(str(date_est.second))
row.user_name=user
row.tweet=tweet
row.coord_x=coord_x
row.coord_y=coord_y
row.date_utc=date_utc
row.h_m_s_utc=h_m_s_utc
row.date_est=date_est
rows.insertRow(row)
del row, rows
insert_table= r"C:\....dbf"
insert_row(insert_table)
print user
print tweet
except:
# If there are no coordinates for a tweet, then pass
pass
def on_error(self, status_code):
print >> sys.stderr, 'Encountered error with status code:', status_code
return True # Don't kill the stream
def on_timeout(self):
print >> sys.stderr, 'Timeout...'
return True # Don't kill the stream
# ----------------Script execution----------------
listener = tweepy.streaming.Stream(auth, CustomStreamListener())
listener.filter(track=[' love ', '#love'])
I am pretty sure it has something to do with the row.rowID thing.
Sorry if it is a disaster! Any help is much appreciated!
I looks like you're forgetting to call the data access (.da) method for the insert cursor.
with arcpy.da.InsertCursor(in_table, field_names) as inCursor:
for row in rows:
inCursor.insertRow(row) # example
-or-
inCursor = arcpy.da.InsertCursor(in_table, field_names)
for row in rows:
cursor.insertRow(row) # example
del inCursor # make sure to delete cursor if you do it this way as to avoid data lock.
Also, if you just want the Insert Cursor method, you can
from arcpy import da
For more info, check out:
http://resources.arcgis.com/en/help/main/10.2/index.html#//018w0000000t000000

Python script to harvest tweets to a MongoDb works with users but not hashtags. Any ideas why not?

I'm playing around the Twitter API and am in the process of developing a script to pull all Tweets with a certain hashtag down to a local mongoDB. I have it working fine when I'm downloading tweets from users, but when downloading tweets from a hashtag I get:
return loads(fp.read(),
AttributeError: 'int' object has no attribute 'read'
Can anyone offer their infinite wisdom into how I could get this script to work?
To run, save it as a .py file, cd to the folder and run:
python twitter.py
Code:
__author__ = 'Tom Cusack'
import pymongo
import oauth2 as oauth
import urllib2, json
import sys, argparse, time
def oauth_header(url, consumer, token):
params = {'oauth_version': '1.0',
'oauth_nonce': oauth.generate_nonce(),
'oauth_timestamp': int(time.time()),
}
req = oauth.Request(method = 'GET',url = url, parameters = params)
req.sign_request(oauth.SignatureMethod_HMAC_SHA1(),consumer, token)
return req.to_header()['Authorization'].encode('utf-8')
def main():
### Twitter Settings
numtweets = '32000'
verbose = 'store_true'
retweet = 'store_false'
CONSUMER_KEY = 'M7Xu9Wte0eIZvqhb4G9HnIn3G'
CONSUMER_SECRET = 'c8hB4Qwps2aODQUx7UsyzQuCRifEp3PKu6hPQll8wnJGIhbKgZ'
ACCESS_TOKEN = '3213221313-APuXuNjVMbRbZpu6sVbETbgqkponGsZJVT53QmG'
ACCESS_SECRET = 'BJHrqWC9ed3pA5oDstSMCYcUcz2pYF3DmJ7jcuDe7yxvi'
base_url = url = 'https://api.twitter.com/1.1/search/tweets.json?include_entities=true&count=200&q=#mongodb&include_rts=%s' % (retweet)
oauth_consumer = oauth.Consumer(key = CONSUMER_KEY, secret = CONSUMER_SECRET)
oauth_token = oauth.Token(key = ACCESS_TOKEN, secret = ACCESS_SECRET)
### Mongodb Settings
uri = 'mongodb://127.0.0.1:27017/SARKY'
if uri != None:
try:
conn = pymongo.MongoClient(uri)
print 'Pulling Tweets..'
except:
print 'Error: Unable to connect to DB. Check uri variable.'
return
uri_parts = pymongo.uri_parser.parse_uri(uri)
db = conn[uri_parts['database']]
db['twitter-harvest'].ensure_index('id_str')
### Helper Variables for Harvest
max_id = -1
tweet_count = 0
stream = 0
### Begin Harvesting
while True:
auth = oauth_header(url, oauth_consumer, oauth_token)
headers = {"Authorization": auth}
request = urllib2.Request(url, headers = headers)
try:
stream = urllib2.urlopen(request)
except urllib2.HTTPError, err:
if err.code == 404:
print 'Error: Unknown user. Check --user arg'
return
if err.code == 401:
print 'Error: Unauthorized. Check Twitter credentials'
return
tweet_list = json.load(stream)
if len(tweet_list) == 0:
print 'No tweets to harvest!'
return
if 'errors' in tweet_list:
print 'Hit rate limit, code: %s, message: %s' % (tweets['errors']['code'], tweets['errors']['message'])
return
if max_id == -1:
tweets = tweet_list
else:
tweets = tweet_list[1:]
if len(tweets) == 0:
print 'Finished Harvest!'
return
for tweet in tweets:
max_id = id_str = tweet['id_str']
try:
if tweet_count == numtweets:
print 'Finished Harvest- hit numtweets!'
return
if uri != None:
db[user].update({'id_str':id_str},tweet,upsert = True)
else:
print tweet['text']
tweet_count+=1
if verbose == True and uri != None:
print tweet['text']
except Exception, err:
print 'Unexpected error encountered: %s' %(err)
return
url = base_url + '&max_id=' + max_id
if __name__ == '__main__':
try:
main()
except SystemExit as e:
if e.code == 0:
pass
You initially set stream = 0. When your try...except block catches a HTTP response with a code that isn't 404 or 401, stream is still equal to 0, but your except block doesn't break out of the function.
I'd look more closely at what this response says.

Categories

Resources