Inserting unicode into MySQL database from python script - python

I am trying to retrieve the text, longitude, and latitude from a tweepy StreamListener and then store the data in my SQL database. I am able to store the coordinates fine, but for some reason the unicode is not working.
For the SQL I have:
mysql> CREATE TABLE tweets (tweet nvarchar(140), lat float(10,6) not null, lng float(10,6) not null) engine=myisam;
For my python script I have (not including the main()):
import mysql.connector
from tweepy import Stream
from tweepy import OAuthHandler
from tweepy.streaming import StreamListener
from authenticator import Authenticator
import json
#connecting to mysql database
conn = mysql.connector.connect(user='root', password='arlenyu123',host='localhost',database='twitter')
mycursor = conn.cursor()
class MyStreamListener(StreamListener):
def on_status(self, status):
if status.coordinates is not None:
#for some reason the status text isn't being fed into the database properly... not sure why.
mycursor.execute('INSERT INTO tweets (tweet, lat, lng) VALUES ({},{},{})'.
format(status.text, status.coordinates['coordinates'][0], status.coordinates['coordinates'][1]))
return True
def on_error(self, status_code):
if status_code == 403:
print("The request is understood, but it has been refused or access is not allowed. Limit is maybe reached")
return False
Please note that I am a beginner so any advice is appreciated.

You should never use string interpolation to create SQL commands. Use the parameter substitution that is provided by the SQL connector.
mycursor.execute('INSERT INTO tweets (tweet, lat, lng) VALUES (%s,%s,%s)',
(status.text, status.coordinates['coordinates'][0], status.coordinates['coordinates'][1]))

Related

fastapi snowflake connection only pulling 1 record

I am trying to read data from snowflake database using FASTAPI. I was able to create the connection which is able to pull data from snowflake.
The issue which I am facing right now is that I am only getting 1 record (instead of 10 records).
I suspect I am not using correct keyword while returning the data. appreciate any help.
Here is my code :-
from fastapi import FastAPI
import snowflake.connector as sf
import configparser
username='username_value'
password='password_value'
account= 'account_value'
warehouse= 'test_wh'
database= 'test_db'
ctx=sf.connect(user=username,password=password,account=account,warehouse=warehouse,database=database)
app = FastAPI()
#app.get('/test API')
async def fetchdata():
cursor = ctx.cursor()
cursor.execute("USE WAREHOUSE test_WH ")
cursor.execute("USE DATABASE test_db")
cursor.execute("USE SCHEMA test_schema")
sql = cursor.execute ("SELECT DISTINCT ID,NAME,AGE,CITY FROM TEST_TABLE WHERE AGE > 60")
for data in sql:
return data
You use return in your inner for-loop. This will return the first row encountered.
If you want to return all rows as a list, you can probably do (I'm not familiar with the snowflake connector):
return list(data)
instead of the for-loop, or sql.fetchall().

Python: tweepy/psycopg2 not inserting data into tables

I'm streaming Twitter data from the API into a Postgres database by modeling this script. Using those exact methods, I'm able to stream the data successfully into the two tables (one containing user_id/user_name, and the other containing data). I've been able to make minor changes to extract a few other bits of information, but using these methods I'm only collecting retweets given a keyword list and I would like to collect all tweets given the list. Based on the way the original script is collecting/storing retweet user_ids and user_names, I changed the code tried to stream into a new table without making any references to retweets. Unfortunately, the result of this were two empty tables. The code ran fine otherwise, and was printing statements to the terminal, there was just no data. Why would this be? Below is my code:
import psycopg2
import tweepy
import json
import numpy as np
# Importing postgres credentials
import postgres_credentials
# Importing twitter credentials
import twitter_credentials
# Accesing twitter from the App created in my account
def autorize_twitter_api():
"""
This function gets the consumer key, consumer secret key, access token
and access token secret given by the app created in your Twitter account
and authenticate them with Tweepy.
"""
# Get access and costumer key and tokens
auth = tweepy.OAuthHandler(twitter_credentials.CONSUMER_KEY, twitter_credentials.CONSUMER_SECRET)
auth.set_access_token(twitter_credentials.ACCESS_TOKEN, twitter_credentials.ACCESS_TOKEN_SECRET)
return auth
def create_tweets_table(term_to_search):
"""
This function open a connection with an already created database and creates a new table to
store tweets related to a subject specified by the user
"""
# Connect to Twitter Database created in Postgres
conn_twitter = psycopg2.connect(dbname=postgres_credentials.dbname, user=postgres_credentials.user, password=postgres_credentials.password, host=postgres_credentials.host,
port=postgres_credentials.port)
# Create a cursor to perform database operations
cursor_twitter = conn_twitter.cursor()
# with the cursor now, create two tables, users twitter and the corresponding table according to the selected topic
cursor_twitter.execute("CREATE TABLE IF NOT EXISTS test_twitter_users (user_id VARCHAR PRIMARY KEY, user_name VARCHAR);")
query_create = "CREATE TABLE IF NOT EXISTS %s (id SERIAL, created_at_utc timestamp, tweet text NOT NULL, user_id VARCHAR, user_name VARCHAR, PRIMARY KEY(id), FOREIGN KEY(user_id) REFERENCES twitter_users(user_id));" % (
"test_tweet_text")
cursor_twitter.execute(query_create)
# Commit changes
conn_twitter.commit()
# Close cursor and the connection
cursor_twitter.close()
conn_twitter.close()
return
def store_tweets_in_table(term_to_search, created_at_utc, tweet, user_id, user_name):
"""
This function open a connection with an already created database and inserts into corresponding table
tweets related to the selected topic
"""
# Connect to Twitter Database created in Postgres
conn_twitter = psycopg2.connect(dbname=postgres_credentials.dbname, user=postgres_credentials.user, password=postgres_credentials.password, host=postgres_credentials.host,
port=postgres_credentials.port)
# Create a cursor to perform database operations
cursor_twitter = conn_twitter.cursor()
# with the cursor now, insert tweet into table
cursor_twitter.execute(
"INSERT INTO test_twitter_users (user_id, user_name) VALUES (%s, %s) ON CONFLICT(user_id) DO NOTHING;",
(user_id, user_name))
cursor_twitter.execute(
"INSERT INTO %s (created_at_utc, tweet, user_id, user_name) VALUES (%%s, %%s, %%s, %%s);" % (
'test_tweet_text'),
(created_at_utc, tweet, user_id, user_name))
# Commit changes
conn_twitter.commit()
# Close cursor and the connection
cursor_twitter.close()
conn_twitter.close()
return
class MyStreamListener(tweepy.StreamListener):
'''
def on_status(self, status):
print(status.text)
'''
def on_data(self, raw_data):
try:
global term_to_search
data = json.loads(raw_data)
# Obtain all the variables to store in each column
user_id = data['user']['id']
user_name = data['user']['name']
created_at_utc = data['created_at']
tweet = data['text']
# Store them in the corresponding table in the database
store_tweets_in_table(term_to_search, created_at_utc, tweet, user_id, user_name)
except Exception as e:
print(e)
def on_error(self, status_code):
if status_code == 420:
# returning False in on_error disconnects the stream
return False
########################################################################
while True:
if __name__ == "__main__":
# Creates the table for storing the tweets
term_to_search = ["donald trump","trump"]
create_tweets_table(term_to_search)
# Connect to the streaming twitter API
api = tweepy.API(wait_on_rate_limit_notify=True)
# Stream the tweets
try:
streamer = tweepy.Stream(auth=autorize_twitter_api(), listener=MyStreamListener(api=api),tweet_mode='extended')
streamer.filter(track=term_to_search)
except:
continue
What happen if you print the values in this function? do you have values there?
def on_data(self, raw_data):
try:
global term_to_search
data = json.loads(raw_data)
# Obtain all the variables to store in each column
user_id = data['user']['id']
user_name = data['user']['name']
created_at_utc = data['created_at']
tweet = data['text']
# Store them in the corresponding table in the database
store_tweets_in_table(term_to_search, created_at_utc, tweet, user_id, user_name)
except Exception as e:
print(e)
When you print the sql statements, can you see the inserts without data?
I discovered the issue - I was creating two new tables, but inserting data into two different tables.

Python - How to parse and save JSON to MYSQL database

As the title indicates, how does one use python to elegantly access an API and parse and save the JSON contents onto a relational database (MYSQL) for later access?
Here, I saved the data onto a pandas object. But how do I create a mysql database, save the json contents onto it, and access the contents for later use?
# Libraries
import json, requests
import pandas as pd
from pandas.io.json import json_normalize
# Set URL
url = 'https://api-v2.themuse.com/jobs'
# For loop to
for i in range(100):
data = json.loads(requests.get(
url=url,
params={'page': i}
).text)['results']
data_norm = pd.read_json(json.dumps(data))
You create your Mysql table on your server using something like Mysql Workbench CE. then in python you do this. I wasnt sure if you want to use data in for loop or data_norm so for ease of use, here some functions. insertDb() can be put in your for loop, since data will be overwriten by itself in every iteration.
import MySQLdb
def dbconnect():
try:
db = MySQLdb.connect(
host='localhost',
user='root',
passwd='password',
db='nameofdb'
)
except Exception as e:
sys.exit("Can't connect to database")
return db
def insertDb():
try:
db = dbconnect()
cursor = db.cursor()
cursor.execute("""
INSERT INTO nameoftable(nameofcolumn) \
VALUES (%s) """, (data))
cursor.close()
except Exception as e:
print e
If this is merely for storage for processing later, kind of like a cache, a varchar field is enough. If however you need to retrieve some structured jdata, JSON field is what you need.

Insert latitude and longitude from Python mysql.connector into MySQL using Tweepy

I'm attempting to use the Twitter Streaming API to put some Tweet objects from geo-encoded Tweets into columns in a MySQL database. Everything was going okay, but I somehow can't get the latitude and coordinates into the databse.
Here is my code:
from tweepy import Stream
from tweepy import OAuthHandler
from tweepy.streaming import StreamListener
import mysql.connector
from mysql.connector import errorcode
import json
import datetime
cnx = mysql.connector.connect(user='root', password='',
host='localhost',
database='twitterdb',
charset = 'utf8mb4')
cursor=cnx.cursor()
ckey=""
csecret=""
atoken=""
asecret=""
class listener(StreamListener):
def on_status(self, status):
if status.coordinates is not None:
created_at = status.created_at
username = status.user.screen_name
tweet = str(status.text)
long = str(status.coordinates['coordinates'][0])
lat = str(status.coordinates['coordinates'][1])
else:
return
print((str(created_at),ascii(username),ascii(tweet),long,lat))
cursor.execute("INSERT into tweettablegeo (created_at, username, tweet, long, lat) VALUES (%s,%s,%s,%s,%s)",(created_at, username, tweet, long, lat))
cnx.commit()
return
def on_error(self, status):
print(status)
auth = OAuthHandler(ckey, csecret)
auth.set_access_token(atoken, asecret)
twitterStream = Stream(auth, listener())
twitterStream.filter(locations=[-180,-90,180,90], stall_warnings = True)
I'm getting the error
"mysql.connector.errors.ProgrammingError: 1064 (42000): You have an error in your SQL syntax; check the manual that corresponds to your MySQL server version for the right syntax to use near 'long, lat) VALUES (".
If I comment out the cursor.execute and cnx.commit lines, everything prints out to the console just fine.
I don't really have any coding background so thanks for any insight you can give me!
long is a reserved word in MySQL. To use it as a column name in a query you must quote it:
cursor.execute("INSERT into tweettablegeo (created_at, username, tweet, "
"`long`, lat) VALUES (%s, %s, %s, %s, %s)",
(created_at, username, tweet, long, lat))

Trying to use Tweepy/Twitters Streaming API and psycopg2 to populate a PostgreSQL database. Very close, one line off

I've been working on trying to populate a table in a PostreSQL database using Tweepy and Twitter's Streaming API. I'm extremely close, I believe I'm just one line away from getting it. I've looked at many examples including:
http://andrewbrobinson.com/2011/07/15/using-tweepy-to-access-the-twitter-stream/
http://blog.creapptives.com/post/14062057061/the-key-value-store-everyone-ignored-postgresql
Python tweepy writing to sqlite3 db
tweepy stream to sqlite database - invalid synatx
Using tweepy to access Twitter's Streaming API
etc, etc
Im at the point where I can stream tweets quite easily using Tweepy, so I know my consumer key, consumer secret, access key and access secret are correct. I also have Postgres set up, and am successfully connecting to the database I created. I tested hard coded values into the table in my database using psycopg2 from a .py file, and that is also working. I am getting tweets streamed in based on keywords I select, and am successfully connected to a table in a database. Now I just need the tweets to stream into the table in my postgres database. Like I said, I am so close and any help would be so greatly appreciated.
This stripped down script inserts data into my desired table:
import psycopg2
try:
conn = psycopg2.connect("dbname=teststreamtweets user=postgres password=x host=localhost")
print "connected"
except:
print "unable to connect"
namedict = (
{"first_name":"Joshua", "last_name":"Drake"},
{"first_name":"Steven", "last_name":"Foo"},
{"first_name":"David", "last_name":"Bar"}
)
cur = conn.cursor()
cur.executemany("""INSERT INTO testdata(first_name, last_name) VALUES (%(first_name)s, %(last_name)s)""", namedict);
conn.commit()
Below is the script I have been editing for a while now trying to get it to work:
import psycopg2
import time
import json
from getpass import getpass
import tweepy
consumer_key = 'x'
consumer_secret = 'x'
access_key = 'x'
access_secret = 'x'
connection = psycopg2.connect("dbname=teststreamtweets user=postgres password=x host=localhost")
cursor = connection.cursor()
#always use this step to begin clean
def reset_cursor():
cursor = connection.cursor()
class StreamWatcherListener(tweepy.StreamListener):
def on_data(self, data):
try:
print 'before cursor' + data
connection = psycopg2.connect("dbname=teststreamtweets user=postgres password=x host=localhost")
cur = connection.cursor()
print 'status is: ' + str(connection.status)
#cur.execute("INSERT INTO tweet_list VALUES (%s)" % (data.text))
cur.executemany("""INSERT INTO tweets(tweet) VALUES (%(text)s)""", data);
connection.commit()
print '---------'
print type(data)
#print data
except Exception as e:
connection.rollback()
reset_cursor()
print "not saving"
return
if cursor.lastrowid == None:
print "Unable to save"
def on_error(self, status_code):
print 'Error code = %s' % status_code
return True
def on_timeout(self):
print 'timed out.....'
print 'welcome'
auth1 = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth1.set_access_token(access_key, access_secret)
api = tweepy.API(auth1)
l = StreamWatcherListener()
print 'about to stream'
stream = tweepy.Stream(auth = auth1, listener = l)
setTerms = ['microsoft']
#stream.sample()
stream.filter(track = setTerms)
Sorry if it's a bit messy of code, but have been trying many options. Like I said any suggestions, links to helpful examples, etc would be greatly appreciated as I've tried everything I can think of and am now resorting to a long walk. Thanks a ton.
Well, I'm not sure why you are using classes for this, and then why you don't have __init__ defined in your class. Seems complicated.
Here is a basic version of the functions I use to do this stuff. I've only ever used sqlite for it, but the syntax looks basically the same. Maybe you can get something from this.
def retrieve_tweets(numtweets=10, *args):
"""
This function optionally takes one or more arguments as keywords to filter tweets.
It iterates through tweets from the stream that meet the given criteria and sends them
to the database population function on a per-instance basis, so as to avoid disaster
if the stream is disconnected.
Both SampleStream and FilterStream methods access Twitter's stream of status elements.
"""
filters = []
for key in args:
filters.append(str(key))
if len(filters) == 0:
stream = tweetstream.SampleStream(username, password)
else:
stream = tweetstream.FilterStream(username, password, track=filters)
try:
count = 0
while count < numtweets:
for tweet in stream:
# a check is needed on text as some "tweets" are actually just API operations
# the language selection doesn't really work but it's better than nothing(?)
if tweet.get('text') and tweet['user']['lang'] == 'en':
if tweet['retweet_count'] == 0:
# bundle up the features I want and send them to the db population function
bundle = (tweet['id'], tweet['user']['screen_name'], tweet['retweet_count'], tweet['text'])
db_initpop(bundle)
break
else:
# a RT has a different structure. This bundles the original tweet. Getting the
# retweets comes later, after the stream is de-accessed.
bundle = (tweet['retweeted_status']['id'], tweet['retweeted_status']['user']['screen_name'], \
tweet['retweet_count'], tweet['retweeted_status']['text'])
db_initpop(bundle)
break
count += 1
except tweetstream.ConnectionError, e:
print 'Disconnected from Twitter at '+time.strftime("%d %b %Y %H:%M:%S", time.localtime()) \
+'. Reason: ', e.reason
def db_initpop(bundle):
"""
This function places basic tweet features in the database. Note the placeholder values:
these can act as a check to verify that no further expansion was available for that method.
"""
#unpack the bundle
tweet_id, user_sn, retweet_count, tweet_text = bundle
curs.execute("""INSERT INTO tblTweets VALUES (null,?,?,?,?,?,?)""", \
(tweet_id, user_sn, retweet_count, tweet_text, 'cleaned text', 'cleaned retweet text'))
conn.commit()
print 'Database populated with tweet '+str(tweet_id)+' at '+time.strftime("%d %b %Y %H:%M:%S", time.localtime())
Good luck!

Categories

Resources