Check if data is returned Twitter API - python

class StdOutListener(StreamListener):
def on_data(self, data):
all_data = json.loads(data)
Username = all_data["user"]["screen_name"]
Tweet = all_data["text"]
Location = all_data["place"]["name"]
c.execute("INSERT INTO Tweets (Username, Tweet, Location) VALUES (?,?,?)",
(Username, Tweet, Location))
I have set up a StreamListener to follow a specified Twitter account. Whenever a Tweet is posted, the username, tweet and location are retrieved and stored into an sql database. The problem I'm encountering is if no location is present then the program fails. Is there anyway to check a location value is present?

just use a try/except
try:
Location = all_data["place"]["name"]
except KeyError:
Location = "anywhere"

Related

Stream a twitter list using Tweepy

I have a problem while I want to stream a specific public Twitter list using Tweepy. I can stream a specific user, but the filter follow doesn't work in this case. I have quite a long list of accounts I would like to stream to do further analysis, so I prepared a list with all of them on twitter. Does anyone know how to handle that?
My code is as follows:
import tweepy
import sys
class MyStreamListener(tweepy.StreamListener):
def on_status(self, status):
print(status.id_str)
# if "retweeted_status" attribute exists, flag this tweet as a retweet.
is_retweet = hasattr(status, "retweeted_status")
# check if text has been truncated
if hasattr(status,"extended_tweet"):
text = status.extended_tweet["full_text"]
else:
text = status.text
# check if this is a quote tweet.
is_quote = hasattr(status, "quoted_status")
quoted_text = ""
if is_quote:
# check if quoted tweet's text has been truncated before recording it
if hasattr(status.quoted_status,"extended_tweet"):
quoted_text = status.quoted_status.extended_tweet["full_text"]
else:
quoted_text = status.quoted_status.text
# remove characters that might cause problems with csv encoding
remove_characters = [",","\n"]
for c in remove_characters:
text.replace(c," ")
quoted_text.replace(c, " ")
with open("out.csv", "a", encoding='utf-8') as f:
f.write("%s,%s,%s,%s,%s,%s\n" % (status.created_at,status.user.screen_name,is_retweet,is_quote,text,quoted_text))
def on_error(self, status_code):
print("Encountered streaming error (", status_code, ")")
sys.exit()
consumer_key = "..."
consumer_secret = "..."
access_token = "..."
access_token_secret = "..."
auth = tweepy.OAuthHandler(consumer_key,consumer_secret)
auth.set_access_token(access_token, access_token_secret)
api = tweepy.API(auth)
if (not api):
print("Authentication failed!")
sys.exit(-1)
myStreamListener = MyStreamListener()
myStream = tweepy.Stream(auth = api.auth, listener=myStreamListener,tweet_mode='extended')
with open("out.csv", "w", encoding='utf-8') as f:
f.write("date,user,is_retweet,is_quote,text,quoted_text\n")
myStream.filter(follow=['52286608'])
You should be able to use the follow parameter with a comma-separated list of user IDs. From the Twitter API documentation:
follow
A comma-separated list of user IDs, indicating the users whose Tweets should be delivered on the stream. Following protected users is not supported. For each user specified, the stream will contain:
- Tweets created by the user.
- Tweets which are retweeted by the user.
- Replies to any Tweet created by the user.
- Retweets of any Tweet created by the user.
- Manual replies, created without pressing a reply button (e.g. “#twitterapi I agree”).
The stream will not contain:
- Tweets mentioning the user (e.g. “Hello #twitterapi!”).
- Manual Retweets created without pressing a Retweet button (e.g. “RT #twitterapi The API is great”).
- Tweets by protected users.
You can follow up to 5000 IDs this way.
Note that the API you are connecting has been superseded by the v2 filtered stream API, but Tweepy does not currently support that.

Python: tweepy/psycopg2 not inserting data into tables

I'm streaming Twitter data from the API into a Postgres database by modeling this script. Using those exact methods, I'm able to stream the data successfully into the two tables (one containing user_id/user_name, and the other containing data). I've been able to make minor changes to extract a few other bits of information, but using these methods I'm only collecting retweets given a keyword list and I would like to collect all tweets given the list. Based on the way the original script is collecting/storing retweet user_ids and user_names, I changed the code tried to stream into a new table without making any references to retweets. Unfortunately, the result of this were two empty tables. The code ran fine otherwise, and was printing statements to the terminal, there was just no data. Why would this be? Below is my code:
import psycopg2
import tweepy
import json
import numpy as np
# Importing postgres credentials
import postgres_credentials
# Importing twitter credentials
import twitter_credentials
# Accesing twitter from the App created in my account
def autorize_twitter_api():
"""
This function gets the consumer key, consumer secret key, access token
and access token secret given by the app created in your Twitter account
and authenticate them with Tweepy.
"""
# Get access and costumer key and tokens
auth = tweepy.OAuthHandler(twitter_credentials.CONSUMER_KEY, twitter_credentials.CONSUMER_SECRET)
auth.set_access_token(twitter_credentials.ACCESS_TOKEN, twitter_credentials.ACCESS_TOKEN_SECRET)
return auth
def create_tweets_table(term_to_search):
"""
This function open a connection with an already created database and creates a new table to
store tweets related to a subject specified by the user
"""
# Connect to Twitter Database created in Postgres
conn_twitter = psycopg2.connect(dbname=postgres_credentials.dbname, user=postgres_credentials.user, password=postgres_credentials.password, host=postgres_credentials.host,
port=postgres_credentials.port)
# Create a cursor to perform database operations
cursor_twitter = conn_twitter.cursor()
# with the cursor now, create two tables, users twitter and the corresponding table according to the selected topic
cursor_twitter.execute("CREATE TABLE IF NOT EXISTS test_twitter_users (user_id VARCHAR PRIMARY KEY, user_name VARCHAR);")
query_create = "CREATE TABLE IF NOT EXISTS %s (id SERIAL, created_at_utc timestamp, tweet text NOT NULL, user_id VARCHAR, user_name VARCHAR, PRIMARY KEY(id), FOREIGN KEY(user_id) REFERENCES twitter_users(user_id));" % (
"test_tweet_text")
cursor_twitter.execute(query_create)
# Commit changes
conn_twitter.commit()
# Close cursor and the connection
cursor_twitter.close()
conn_twitter.close()
return
def store_tweets_in_table(term_to_search, created_at_utc, tweet, user_id, user_name):
"""
This function open a connection with an already created database and inserts into corresponding table
tweets related to the selected topic
"""
# Connect to Twitter Database created in Postgres
conn_twitter = psycopg2.connect(dbname=postgres_credentials.dbname, user=postgres_credentials.user, password=postgres_credentials.password, host=postgres_credentials.host,
port=postgres_credentials.port)
# Create a cursor to perform database operations
cursor_twitter = conn_twitter.cursor()
# with the cursor now, insert tweet into table
cursor_twitter.execute(
"INSERT INTO test_twitter_users (user_id, user_name) VALUES (%s, %s) ON CONFLICT(user_id) DO NOTHING;",
(user_id, user_name))
cursor_twitter.execute(
"INSERT INTO %s (created_at_utc, tweet, user_id, user_name) VALUES (%%s, %%s, %%s, %%s);" % (
'test_tweet_text'),
(created_at_utc, tweet, user_id, user_name))
# Commit changes
conn_twitter.commit()
# Close cursor and the connection
cursor_twitter.close()
conn_twitter.close()
return
class MyStreamListener(tweepy.StreamListener):
'''
def on_status(self, status):
print(status.text)
'''
def on_data(self, raw_data):
try:
global term_to_search
data = json.loads(raw_data)
# Obtain all the variables to store in each column
user_id = data['user']['id']
user_name = data['user']['name']
created_at_utc = data['created_at']
tweet = data['text']
# Store them in the corresponding table in the database
store_tweets_in_table(term_to_search, created_at_utc, tweet, user_id, user_name)
except Exception as e:
print(e)
def on_error(self, status_code):
if status_code == 420:
# returning False in on_error disconnects the stream
return False
########################################################################
while True:
if __name__ == "__main__":
# Creates the table for storing the tweets
term_to_search = ["donald trump","trump"]
create_tweets_table(term_to_search)
# Connect to the streaming twitter API
api = tweepy.API(wait_on_rate_limit_notify=True)
# Stream the tweets
try:
streamer = tweepy.Stream(auth=autorize_twitter_api(), listener=MyStreamListener(api=api),tweet_mode='extended')
streamer.filter(track=term_to_search)
except:
continue
What happen if you print the values in this function? do you have values there?
def on_data(self, raw_data):
try:
global term_to_search
data = json.loads(raw_data)
# Obtain all the variables to store in each column
user_id = data['user']['id']
user_name = data['user']['name']
created_at_utc = data['created_at']
tweet = data['text']
# Store them in the corresponding table in the database
store_tweets_in_table(term_to_search, created_at_utc, tweet, user_id, user_name)
except Exception as e:
print(e)
When you print the sql statements, can you see the inserts without data?
I discovered the issue - I was creating two new tables, but inserting data into two different tables.

count parameter is ignored when querying user_timeline in tweepy

I'm trying to use the tweepy library in one of my python projects. When I try the following code that creates a tweepy cursor to fetch a user's timeline status messages, the count parameter is always ignored.
def search(self, username, keyword, consumer_key, consumer_secret, access_token, access_token_secret):
#start twitter auth
try:
auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)
api = tweepy.API(auth)
user = api.get_user(username)
except Exception as e:
print(str(e))
self.error = str(e)
return
self.followercount = user.followers_count
self.screenname = user.screen_name
results = []
for status in tweepy.Cursor(api.user_timeline, id=username, count=2).items():
try:
tweet = status._json
In this instance, the count is set to 2 in the Cursor object, yet it receives all of them. What am I doing wrong?
tweepy.Cursor() does not appear to recognize a count argument. In fact, count is not mentioned anywhere in tweepy/cursor.py, the module where tweepy.Cursor is defined. Instead, it looks like you might want to use:
for status in tweepy.Cursor(api.user_timeline, id=username).items(2):
passing the limit to items() instead of as the count keyword argument. See this section in the tweepy Cursor tutorial.

Insert latitude and longitude from Python mysql.connector into MySQL using Tweepy

I'm attempting to use the Twitter Streaming API to put some Tweet objects from geo-encoded Tweets into columns in a MySQL database. Everything was going okay, but I somehow can't get the latitude and coordinates into the databse.
Here is my code:
from tweepy import Stream
from tweepy import OAuthHandler
from tweepy.streaming import StreamListener
import mysql.connector
from mysql.connector import errorcode
import json
import datetime
cnx = mysql.connector.connect(user='root', password='',
host='localhost',
database='twitterdb',
charset = 'utf8mb4')
cursor=cnx.cursor()
ckey=""
csecret=""
atoken=""
asecret=""
class listener(StreamListener):
def on_status(self, status):
if status.coordinates is not None:
created_at = status.created_at
username = status.user.screen_name
tweet = str(status.text)
long = str(status.coordinates['coordinates'][0])
lat = str(status.coordinates['coordinates'][1])
else:
return
print((str(created_at),ascii(username),ascii(tweet),long,lat))
cursor.execute("INSERT into tweettablegeo (created_at, username, tweet, long, lat) VALUES (%s,%s,%s,%s,%s)",(created_at, username, tweet, long, lat))
cnx.commit()
return
def on_error(self, status):
print(status)
auth = OAuthHandler(ckey, csecret)
auth.set_access_token(atoken, asecret)
twitterStream = Stream(auth, listener())
twitterStream.filter(locations=[-180,-90,180,90], stall_warnings = True)
I'm getting the error
"mysql.connector.errors.ProgrammingError: 1064 (42000): You have an error in your SQL syntax; check the manual that corresponds to your MySQL server version for the right syntax to use near 'long, lat) VALUES (".
If I comment out the cursor.execute and cnx.commit lines, everything prints out to the console just fine.
I don't really have any coding background so thanks for any insight you can give me!
long is a reserved word in MySQL. To use it as a column name in a query you must quote it:
cursor.execute("INSERT into tweettablegeo (created_at, username, tweet, "
"`long`, lat) VALUES (%s, %s, %s, %s, %s)",
(created_at, username, tweet, long, lat))

Trying to use Tweepy/Twitters Streaming API and psycopg2 to populate a PostgreSQL database. Very close, one line off

I've been working on trying to populate a table in a PostreSQL database using Tweepy and Twitter's Streaming API. I'm extremely close, I believe I'm just one line away from getting it. I've looked at many examples including:
http://andrewbrobinson.com/2011/07/15/using-tweepy-to-access-the-twitter-stream/
http://blog.creapptives.com/post/14062057061/the-key-value-store-everyone-ignored-postgresql
Python tweepy writing to sqlite3 db
tweepy stream to sqlite database - invalid synatx
Using tweepy to access Twitter's Streaming API
etc, etc
Im at the point where I can stream tweets quite easily using Tweepy, so I know my consumer key, consumer secret, access key and access secret are correct. I also have Postgres set up, and am successfully connecting to the database I created. I tested hard coded values into the table in my database using psycopg2 from a .py file, and that is also working. I am getting tweets streamed in based on keywords I select, and am successfully connected to a table in a database. Now I just need the tweets to stream into the table in my postgres database. Like I said, I am so close and any help would be so greatly appreciated.
This stripped down script inserts data into my desired table:
import psycopg2
try:
conn = psycopg2.connect("dbname=teststreamtweets user=postgres password=x host=localhost")
print "connected"
except:
print "unable to connect"
namedict = (
{"first_name":"Joshua", "last_name":"Drake"},
{"first_name":"Steven", "last_name":"Foo"},
{"first_name":"David", "last_name":"Bar"}
)
cur = conn.cursor()
cur.executemany("""INSERT INTO testdata(first_name, last_name) VALUES (%(first_name)s, %(last_name)s)""", namedict);
conn.commit()
Below is the script I have been editing for a while now trying to get it to work:
import psycopg2
import time
import json
from getpass import getpass
import tweepy
consumer_key = 'x'
consumer_secret = 'x'
access_key = 'x'
access_secret = 'x'
connection = psycopg2.connect("dbname=teststreamtweets user=postgres password=x host=localhost")
cursor = connection.cursor()
#always use this step to begin clean
def reset_cursor():
cursor = connection.cursor()
class StreamWatcherListener(tweepy.StreamListener):
def on_data(self, data):
try:
print 'before cursor' + data
connection = psycopg2.connect("dbname=teststreamtweets user=postgres password=x host=localhost")
cur = connection.cursor()
print 'status is: ' + str(connection.status)
#cur.execute("INSERT INTO tweet_list VALUES (%s)" % (data.text))
cur.executemany("""INSERT INTO tweets(tweet) VALUES (%(text)s)""", data);
connection.commit()
print '---------'
print type(data)
#print data
except Exception as e:
connection.rollback()
reset_cursor()
print "not saving"
return
if cursor.lastrowid == None:
print "Unable to save"
def on_error(self, status_code):
print 'Error code = %s' % status_code
return True
def on_timeout(self):
print 'timed out.....'
print 'welcome'
auth1 = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth1.set_access_token(access_key, access_secret)
api = tweepy.API(auth1)
l = StreamWatcherListener()
print 'about to stream'
stream = tweepy.Stream(auth = auth1, listener = l)
setTerms = ['microsoft']
#stream.sample()
stream.filter(track = setTerms)
Sorry if it's a bit messy of code, but have been trying many options. Like I said any suggestions, links to helpful examples, etc would be greatly appreciated as I've tried everything I can think of and am now resorting to a long walk. Thanks a ton.
Well, I'm not sure why you are using classes for this, and then why you don't have __init__ defined in your class. Seems complicated.
Here is a basic version of the functions I use to do this stuff. I've only ever used sqlite for it, but the syntax looks basically the same. Maybe you can get something from this.
def retrieve_tweets(numtweets=10, *args):
"""
This function optionally takes one or more arguments as keywords to filter tweets.
It iterates through tweets from the stream that meet the given criteria and sends them
to the database population function on a per-instance basis, so as to avoid disaster
if the stream is disconnected.
Both SampleStream and FilterStream methods access Twitter's stream of status elements.
"""
filters = []
for key in args:
filters.append(str(key))
if len(filters) == 0:
stream = tweetstream.SampleStream(username, password)
else:
stream = tweetstream.FilterStream(username, password, track=filters)
try:
count = 0
while count < numtweets:
for tweet in stream:
# a check is needed on text as some "tweets" are actually just API operations
# the language selection doesn't really work but it's better than nothing(?)
if tweet.get('text') and tweet['user']['lang'] == 'en':
if tweet['retweet_count'] == 0:
# bundle up the features I want and send them to the db population function
bundle = (tweet['id'], tweet['user']['screen_name'], tweet['retweet_count'], tweet['text'])
db_initpop(bundle)
break
else:
# a RT has a different structure. This bundles the original tweet. Getting the
# retweets comes later, after the stream is de-accessed.
bundle = (tweet['retweeted_status']['id'], tweet['retweeted_status']['user']['screen_name'], \
tweet['retweet_count'], tweet['retweeted_status']['text'])
db_initpop(bundle)
break
count += 1
except tweetstream.ConnectionError, e:
print 'Disconnected from Twitter at '+time.strftime("%d %b %Y %H:%M:%S", time.localtime()) \
+'. Reason: ', e.reason
def db_initpop(bundle):
"""
This function places basic tweet features in the database. Note the placeholder values:
these can act as a check to verify that no further expansion was available for that method.
"""
#unpack the bundle
tweet_id, user_sn, retweet_count, tweet_text = bundle
curs.execute("""INSERT INTO tblTweets VALUES (null,?,?,?,?,?,?)""", \
(tweet_id, user_sn, retweet_count, tweet_text, 'cleaned text', 'cleaned retweet text'))
conn.commit()
print 'Database populated with tweet '+str(tweet_id)+' at '+time.strftime("%d %b %Y %H:%M:%S", time.localtime())
Good luck!

Categories

Resources