insert data into mysql table using python - python

im a beginner in python language , and im trying to insert my json file data into my database table using python but the problem is i have no errors i just get :
tweet number 49634 is uploading to the server
i don't get where the problem is, please i would appreciate any help
import mysql.connector
import json
mydb = mysql.connector.connect(host='localhost', port='3306', user='root', password='nihad147', database='tweets')
mycursor = mydb.cursor()
sql_request='insert ignore into tweet_location (latitude, longitude, tweet_id) values (%s,%s,%s)'""
myJsonFile = open('tweet.json', encoding="utf-8")
c = 0
for line in myJsonFile:
c = c + 1
print("tweet number ", c, " is uploading to the server")
data = json.loads(line)
#line = line.replace('','')
tweet = "SELECT * FROM tweet WHERE tweet_id = '" + str(data['tweet_id']) + "'"
mycursor.execute(tweet)
myresult = mycursor.fetchall()
row_count = mycursor.rowcount
if row_count == 0:
if 'location' in data.keys() and data['location'] != None and 'address' in data['location']:
latitude = data ['location']['lat']
longitude=data ['location']['lon']
mycursor.execute(sql_request, (latitude,longitude,data['tweet_id']))
print ('------------')
mydb.commit ()
here's an example of my json file data:
{"tweet_id":"1261276320878788609",
"date":"Fri May 15 12:44:42 +0000 2020",
"raw_text":"برنامج وطني لدعم المبدعين في مواجهة #كورو"
"geo_source":"user_location",
"location":{"address":
{"country":"Tunisia","country_code":"tn","state_district":"غزالة","county":"العرب","state":"Bizerte"},
"response":
"{'place_id': 235309103, 'licence': 'Data © OpenStreetMap contributors, ODbL 1.0. https://osm.org/copyright', 'osm_type': 'relation', 'osm_id': 7124228, 'boundingbox': ['37.105957', '37.2033466', '9.4739053', '9.6124953'], 'lat': '37.1551868', 'lon': '9.54834183807249', 'display_name': 'العرب, غزالة, Bizerte, Tunisia', 'class': 'boundary', 'type': 'administrative', 'importance': 0.45, 'icon': '/data/nominatimimages/mapicons/poi_boundary_administrative.p.20.png','address':{'county': 'العرب', 'state_district': 'غزالة', 'state': 'Bizerte', 'country': 'Tunisia', 'country_code': 'tn'}}",
"geohash":"snwg37buskzd","query_term":"arab","lon":9.54834183807249,"lat":37.1551868},
"user_friends_count":61,"user_description":"I love UAE and his great leadership",
"user_created_at":"Wed Oct 09 11:41:41 +0000 2013",
"user_screen_name":"SikandarMirani",
"user_id_str":"706377881",
"user_verified":false,
"user_statuses_count":50804,
"user_followers_count":946,
"user_location":"Dubai United Arab Emirates"}
and this my table's attributes tweet:
tweet_id,
id_user,
text,
tweet_location,
created_at,
name_screen,
categorie_id,

Don't read the JSON file one line at a time. Use json.load() to load the entire file into a dictionary.
Use a parameter in the query that selects the tweet, rather that concatenating data['tweet_id'] into the SQL.
The code that inserts the new row should be inside all the if statements that set latitude and longitude from the data. In fact, you might as well put all the database code inside the if statement that checks whether location is set in the JSON.
import mysql.connector
import json
mydb = mysql.connector.connect(host='localhost', port='3306', user='root', password='nihad147', database='tweets')
mycursor = mydb.cursor()
sql_request='insert ignore into tweet_location (latitude, longitude, tweet_id) values (%s,%s,%s)'""
with open('tweet.json', encoding="utf-8") as myJsonFile:
data = json.load(myJsonFile)
if data.get('location') and 'address' in data['location']:
tweet = "SELECT 1 FROM tweet WHERE tweet_id = %s"
mycursor.execute(tweet, (data['tweet_id'],))
myresult = mycursor.fetchall()
row_count = len(myresult)
if row_count == 0:
print(f"Inserting {data['tweet_id']} to the database");
latitude = data['location']['lat']
longitude = data['location']['lon']
mycursor.execute(sql_request, (latitude,longitude,data['tweet_id']))
mydb.commit ()
print ('------------')
else:
print(f"Tweet {data['tweet_id']} is already in the database")

Related

add API currency convert to postres table in python

I have an exercise to do:
Through library psycopg2 connect with my database and get top 10 customers along with their credit limit:
import psycopg2
connection = psycopg2.connect(
host='host',
user='user',
password='password',
dbname='db_name',
)
cursor = connection.cursor()
cursor.execute('SELECT customername, creditlimit FROM customers order by 2 desc limit 10')
for row in cursor:
row[0]
row[1]
print(f"{row[0]}: {row[1]} USD")
This is the result of the query above:
As per next step I need to connect with API and convert this creditlimit to different currencies (EUR, JPY, GBP), so the final output will be like that:
Euro+ Shopping Channel: 227600.00 USD / 187183.93EUR / 24017262.40 JPY /160578.63 GBP
...
This is the code for API (the key is valid, feel free to test):
headers = {
"apikey": "b6iGXz0JZcr2iyV0SeLcd0dEpqOr8ngK"
}
currency = ['EUR', 'JPY', 'GBP']
params = {
"from": "USD",
"to": 'EUR',
"amount": row[1]
}
url = "https://api.apilayer.com/exchangerates_data/convert"
response = requests.get(url, headers=headers, params=params)
if response.status_code == 200:
data = response.json()
I connected with API, but I have difficulties to append it to my sql data, please advise.

connect a json file data to mysql database (empty table)

im a beginner in python , Trying to connect access database to python with a json file loaded in my program so I can read it and eventually analyze it for certain things. But I can't connect to it and tried different approaches still getting the same error.
import mysql.connector
import json
# create the key
from mysql.connector import cursor
mydb = mysql.connector.connect(host='localhost', port='3306', user='root', password='nihad147', database='tweets')
mycursor = mydb.cursor()
sql_tweet = """INSERT INTO tweet ( tweet_id,
id_user,
text,
tweet_location,
created_at,
name_screen,
categorie_id,
)
VALUES (%s,%s,%s,%s,%s,%s,%s)"""
sql_user = """INSERT INTO tweetuser (
id_user,
name_screen,
location_user,
count_followers,
friends_count,
statuse_count)
VALUES (%s,%s,%s,%s,%s,%s)"""
sql_location = """"insert into tweet_location (
location_id,
latitude,
longitude
tweet_id
VALUES(%s,%s,%s,%s)"""
myJsonFile = open('tweets.json', encoding="utf-8")
mycursor.execute("DELETE FROM tweet")
mycursor.execute("DELETE FROM tweetuser")
mycursor.execute("DELETE FROM tweet_location")
c = 0
for line in myJsonFile:
c = c + 1
print("tweet number ", c, " is uploading to the server")
data = json.loads(line)
# insert into tweet
val_tweet = (
data['tweet_id'], data['user_id_str'], data['raw_text'],data['location']['address']['city'],data['date'], data['user_screen_name'])
mycursor.execute(sql_tweet,sql_location, val_tweet)
mydb.commit()
# testing ifthe user already exist
user = "SELECT * FROM tweetuser WHERE id_user = '" + str(data['user_id_str']) + "'"
mycursor.execute(user)
myresult = mycursor.fetchall()
row_count = mycursor.rowcount
if row_count == 0:
val_user = (data['user_id_str'], data['user_screen_name'], data['location']['address']['city'],data['user_followers_count'],
data['user_friends_count'], data['user_statuses_count'])
mycursor.execute(sql_user, val_user)
mydb.commit()
print('done')
here's an example of json file data :
{
"tweet_id":"1261276320878788609",
"date":"Fri May 15 12:44:42 +0000 2020",
"raw_text":"برنامج وطني لدعم المبدعين في مواجهة #كورون",
"geo_source":"user_location",
"location":{
"address":{
"country":"Tunisia",
"country_code":"tn",
"state_district":"غزالة",
"county":"العرب",
"state":"Bizerte"
},
"response":"{'place_id': 235309103, 'licence': 'Data © OpenStreetMap contributors, ODbL 1.0. https://osm.org/copyright', 'osm_type': 'relation', 'osm_id': 7124228, 'boundingbox': ['37.105957', '37.2033466', '9.4739053', '9.6124953'], 'lat': '37.1551868', 'lon': '9.54834183807249', 'display_name': 'العرب, غزالة, Bizerte, Tunisia', 'class': 'boundary', 'type': 'administrative', 'importance': 0.45, 'icon': '/data/nominatimimages/mapicons/poi_boundary_administrative.p.20.png','address':{'county': 'العرب', 'state_district': 'غزالة', 'state': 'Bizerte', 'country': 'Tunisia', 'country_code': 'tn'}}",
"geohash":"snwg37buskzd",
"query_term":"arab",
"lon":9.54834183807249,
"lat":37.1551868
},
"user_friends_count":61,
"user_description":"I love UAE and his great leadership",
"user_created_at":"Wed Oct 09 11:41:41 +0000 2013",
"user_screen_name":"SikandarMirani",
"user_id_str":"706377881",
"user_verified":false,
"user_statuses_count":50804,
"user_followers_count":946,
"user_location":"Dubai United Arab Emirates"
}
thanks to you guys , i was able to solve the previous error since i didn't check tha data type of the id user it has to be bigint not int since it's a large data .
i had no problem connecting my jsonfile to my database but it got inserted only in tweetuser table but not in tweet table .
the tweet table is empty.
i would appreciate any kind of help thank you
The error
mysql.connector.errors.DataError: 1264 (22003): Out of range value for column 'id_user' at row 1
suggests that the value you are trying to use as the id_user is numerically too large.
Since you haven't posted the table definitions, my guess is you are using MEDIUMINT or SMALLINT or TINYINT for id_user and the actual user ID that you are trying to write into the database is too large for that data type.
In your example user_id_str is 706377881, however, the maximum value for MEDIUMINT is 8388607 and 16777215 (unsigned), respectively.
Check the data types in the table definitions.
You are connecting to your DB, that is not the problem.
The problem is that the user id that you are trying to insert has a length that surpasses the maximum allowed by MySQL for the datatype of that field. See here and here for more info related to your error.

Dynamically write mysql table schema into Avro format

I want to collect the data from Mysql table and converet it to avro format using python.
Consider this table in mysql
dept_no, dept_name
'd001', 'Marketing'
'd002', 'Finance'
'd003', 'Human Resources'
'd004', 'Production'
'd005', 'Development'
'd006', 'Quality Management'
'd007', 'Sales'
'd008', 'Research'
'd009', 'Customer Service'
mycursor.execute('select * from employees')
results = mycursor.fetchall()
when i fetch the results using above query
i get the results in a class-tuple format.
Where as to convert to Avro format, Schema as to be defined in the following format:
By hard-coding we can achieve the below format
The Following code is to generate avro file.
schema = {
'doc': 'A weather reading.',
'name': 'Weather',
'namespace': 'test',
'type': 'record',
'fields': [
{'name': 'dept_no', 'type': 'string'},
{'name': 'dept_name', 'type': 'string'},
],
}
And the records as
records = [
{u'dept_no': u'd001', u'dept_name': 'Marketing'},
{u'dept_no': u'd002', u'dept_name': 'Finance'},
{u'dept_no': u'd003', u'dept_name': 'Human Resources'},
{u'dept_no': u'd004', u'dept_name': 'Production'},
]
The Question Here is How do i MAP the schema and the data in the above format using Python dynamically.
import mysql.connector
mydb = mysql.connector.connect(
host="********************",
user="***********",
passwd="**********",
database='***********'
)
def byte_to_string(x):
temp_table_list = []
for row in x:
table = row[0].decode()
temp_table_list.append(table)
return temp_table_list
mycursor = mydb.cursor()
#Query to list all the tables
mycursor.execute("show tables")
r = mycursor.fetchall()
r = byte_to_string(r)
print(r)
x = len(r)
#Fetch all the records from table EMPLOYEES using Select *
mycursor.execute('select * from employees')
results = mycursor.fetchall()
print(type(results))
print(results)
#Displays Data of table employee record by record
for i in results:
print(i)
print(type(i))
#Fectching data from 2nd table departments
mycursor.execute('select * from departments')
data=[i[0] for i in mycursor.fetchall()]
mycursor.execute('select * from departments')
data1=[i[1] for i in mycursor.fetchall()]
print(data)
print(data1)
#zipbObj = zip(data,data1)
#dictOfWords = dict(zipbObj)
#print(dictOfWords)
mycursor.execute('SELECT `COLUMN_NAME`\
FROM `INFORMATION_SCHEMA`.`COLUMNS`\
WHERE `TABLE_SCHEMA`="triggerdb1"\
AND `TABLE_NAME`="departments"')
#Fetching the column names of the table as keys
keys=[i[0] for i in mycursor.fetchall()]
print(keys)
'''
zipbObj = zip(column_schema,data1)
dictOfWords = dict(zipbObj)
print(dictOfWords)
'''
#Finally we get the Header as key and records as values in a dict format
abc = {}
abc[keys[0]] = data1
abc[keys[1]] = data
print(abc)
The result is in the form of
#print(data)
>>>'d001', 'd002', 'd003', 'd004', 'd005', 'd006', 'd007', 'd008', 'd009']
#print(data1)
>>>['Marketing', 'Finance', 'Human Resources', 'Production', 'Development', 'Quality Management', 'Sales', 'Research', 'Customer Service']
#print(keys)
>>>['dept_no', 'dept_name']
#print(abc)
>>>{'dept_no': ['Marketing', 'Finance', 'Human Resources', 'Production', 'Development', 'Quality Management', 'Sales', 'Research', 'Customer Service'], 'dept_name': ['d001', 'd002', 'd003', 'd004', 'd005', 'd006', 'd007', 'd008', 'd009']}
The Question Here is How do i dynamically MAP the schema and the data from the resulting class Tuple/Dictionary and covert to avro using Python.
Thanks in Advance!

What are the possible ways for JSON data processing using SQL, elastic search or preprocessing using python

I have a case study where i need to take data from a REST API do some analysis on the data using aggregate function,joins etc and use the response data in JSON format to plot some retail grahs.
Approaches being followed till now:
Read the data from JSON store these in python variable and use insert to hit the SQL query. Obviously it is a costly operation because for every JSON line read it is inserting into database.For 33k rows it is taking more than 20 mins which is inefficient.
This can be handled in elastic search for faster processing but complex operation like joins are not present in elastic search.
If anybody can suggest what would be the best approach (like preprocessing or post processing in python) to follow for handling such scenerios it would be helpful.
Thanks in advance
Sql Sript
def store_data(AccountNo)
db=MySQLdb.connect(host=HOST, user=USER, passwd=PASSWD, db=DATABASE, charset="utf8")
cursor = db.cursor()
insert_query = "INSERT INTO cstore (AccountNo) VALUES (%s)"
cursor.execute(insert_query, (AccountNo))
db.commit()
cursor.close()
db.close()
return
def on_data(file_path):
#This is the meat of the script...it connects to your mongoDB and stores the tweet
try:
# Decode the JSON from Twitter
testFile = open(file_path)
datajson = json.load(testFile)
#print (len(datajson))
#grab the wanted data from the Tweet
for i in range(len(datajson)):
for cosponsor in datajson[i]:
AccountNo=cosponsor['AccountNo']
store_data( AccountNo)
Edit1: Json Added
{
"StartDate": "1/1/18",
"EndDate": "3/30/18",
"Transactions": [
{
"CSPAccountNo": "41469300",
"ZIP": "60098",
"ReportDate": "2018-03-08T00:00:00",
"POSCode": "00980030003",
"POSCodeModifier": "0",
"Description": "TIC TAC GUM WATERMEL",
"ActualSalesPrice": 1.59,
"TotalCount": 1,
"Totalsales": 1.59,
"DiscountAmount": 0,
"DiscountCount": 0,
"PromotionAmount": 0,
"PromotionCount": 0,
"RefundAmount": 0,
"RefundCount": 0
},
{
"CSPAccountNo": "41469378",
"ZIP": "60098",
"ReportDate": "2018-03-08T00:00:00",
"POSCode": "01070080727",
"POSCodeModifier": "0",
"Description": "PAYDAY KS",
"ActualSalesPrice": 2.09,
"TotalCount": 1,
"Totalsales": 2.09,
"DiscountAmount": 0,
"DiscountCount": 0,
"PromotionAmount": 0,
"PromotionCount": 0,
"RefundAmount": 0,
"RefundCount": 0
}
]
}
I do not have your json file so not know if it is runnable, but I would have tried something like below: I read just your account infos to a list and than try to write to the db at once with executemany I expect it to have a better(less) execution time than 20 mins.
def store_data(AccountNo):
db = MySQLdb.connect(host=HOST, user=USER, passwd=PASSWD, db=DATABASE, charset="utf8")
cursor = db.cursor()
insert_query = "INSERT INTO cstore (AccountNo,ZIP,ReportDate) VALUES (:AccountNo,:ZIP,:ReportDate)"
cursor.executemany(insert_query, AccountNo)
db.commit()
cursor.close()
db.close()
return
def on_data(file_path):
# This is the meat of the script...it connects to your mongoDB and stores the tweet
try:
#declare an empty list for the all accountno's
accountno_list = list()
# Decode the JSON from Twitter
testFile = open(file_path)
datajson = json.load(testFile)
# print (len(datajson))
# grab the wanted data from the Tweet
for row in datajson[0]['Transactions']:
values = dict()
values['AccountNo'] = row['CSPAccountNo']
values['ZIP'] = row['ZIP']
values['ReportDate'] = row['ReportDate']
#from here on you can populate the attributes you need in a similar way..
accountno_list.append(values)
except:
pass
store_data(accountno_list)

Iterating rows with Pyodbc

I am using Pyodbc to return a number of rows which are dumped into a JSON and sent to a server. I would like to iterate my SQL table and return all records. I am using cursor.fetchall() now, and the program returns one record. As shown below. When I use fetchone an error is returned AttributeError: 'unicode' object has no attribute 'SRNUMBER' and fetchmany returns one record as well. How do I successfully return all records? I am using Python 2.6.7
Code:
import pyodbc
import json
import collections
import requests
connstr = 'DRIVER={SQL Server};SERVER=server;DATABASE=ServiceRequest; UID=SA;PWD=pwd'
conn = pyodbc.connect(connstr)
cursor = conn.cursor()
cursor.execute("""
SELECT SRNUMBER, FirstName, LastName, ParentNumber
FROM MYLA311 """)
rows = cursor.fetchone()
objects_list = []
for row in rows:
d = collections.OrderedDict()
d['SRNUMBER']= row.SRNUMBER
d['FirstName']= row.FirstName
d['LastName']= row.LastName
d['ParentNumber']= row.ParentNumber
objects_list.append(d)
output = {"MetaData": {},
"SRData": d}
print output
j = json.dumps(output)
print json.dumps(output, sort_keys=True, indent=4)`
Output for fetchall and fetchmany:
{
"MetaData": {},
"SRData": {
"FirstName": "MyLAG",
"LastName": "ThreeEleven",
"ParentNumber": "021720151654176723",
"SRNUMBER": "1-3580171"
}
}
Use code from my answer here to build a list of dictionaries for the value of output['SRData'], then JSON encode the output dict as normal.
import pyodbc
import json
connstr = 'DRIVER={SQL Server};SERVER=server;DATABASE=ServiceRequest; UID=SA;PWD=pwd'
conn = pyodbc.connect(connstr)
cursor = conn.cursor()
cursor.execute("""SELECT SRNUMBER, FirstName, LastName, ParentNumber FROM MYLA311""")
# build list of column names to use as dictionary keys from sql results
columns = [column[0] for column in cursor.description]
results = []
for row in cursor.fetchall():
results.append(dict(zip(columns, row)))
output = {"MetaData": {}, "SRData": results}
print(json.dumps(output, sort_keys=True, indent=4))
For starters, the line
objects_list.append(d)
needs to be inside the for loop, not outside.

Categories

Resources