Speedup the records inserts in to SQL server database - python

I am trying to load the data from the REST API into SQL server table using the Python script. The script below works perfectly but the issues is it takes too long to upload the data in to the database , it took few minutes to load 8000 records
import requests
import pandas as pd
import pyodbc
import sys
from requests.auth import HTTPBasicAuth
#SQL Connection
conn_str = (
r'DRIVER={SQL Server Native Client 11.0};'
r'SERVER=tcp:abcd.database.windows.net;'
r'DATABASE=DEF;'
r'UID=xxxxx;'
r'PWD=yyyy;'
r'Trusted_Connection=no;'
)
sqlconn = pyodbc.connect(conn_str)
cursor = sqlconn.cursor()
cursor.execute("TRUNCATE TABLE [dbo].[RM_Approved_Room_State]")
sqlconn.commit()
#API Connection
baseURL = 'https://testing.org/api/fdf'
appRoomsResponse = requests.get(baseURL, verify=False)
appRoomsJson = appRoomsResponse.json()
appRoomsDF = pd.json_normalize(appRoomsJson)
appRoomsDF = appRoomsDF.fillna('')
try:
cursor = sqlconn.cursor()
for index,row in appRoomsDF.iterrows():
cursor.execute("INSERT INTO RM_Approved_Room_State(APPROVED_ROOM_STATEID,SOURCE_ROOMID,DEST_ROOMID,ENTITY_TYPEID)\
values(?,?,?,?)"
,row['id']
,row['sourceRoomRefId']
,row['destinationRoomRefId']
,row['entityRefId']
)
sqlconn.commit()
except Exception:
pass
#Load main Dim_appRooms table
cursor = sqlconn.cursor()
sqlconn.commit()
cursor.close()
sqlconn.close()
Is there something I am missing to increase the speed of the insert here. My first script in Python, anyhelp is greatly appreciated

Related

Launch SQL stored procedures from python with sqlalchemy?

I can successfully connect to SQL Server Management Studio from my jupyter notebook with this script:
from sqlalchemy import create_engine
import pyodbc
import csv
import time
import urllib
params = urllib.parse.quote_plus('''DRIVER={SQL Server Native Client 11.0};
SERVER=SV;
DATABASE=DB;
TRUSTED_CONNECTION=YES;''')
engine = create_engine("mssql+pyodbc:///?odbc_connect=%s" % params)
I managed to execute some SQL scripts like this:
engine.execute("delete from table_name_X")
However, I can't execute stored procedures. I tried the following scripts from what I've seen in stored procedures with sqlAlchemy. These following scripts have an output like "sqlalchemy.engine.result.ResultProxy at 0x173ed18e470", but the procedure wasn't executed in reality (nothing happened):
# test 1
engine.execute('stored_procedure_name')
# test 2
from sqlalchemy import func
from sqlalchemy.orm import sessionmaker
session = sessionmaker(bind=engine)()
session.execute(func.upper('stored_procedure_name'))
Could you please give me the correct way to execute stored procedures?
The way you can call a stored procedure using pyodbc is :
cursor.execute("{CALL usp_StoreProcedure}")
I found a solutions in reference to this link . https://github.com/mkleehammer/pyodbc/wiki/Calling-Stored-Procedures
Here a example :
import pyodbc
import urllib
import sqlalchemy as sa
params = urllib.parse.quote_plus("DRIVER={SQL Server Native Client 11.0};"
"SERVER=xxx.xxx.xxx.xxx;"
"DATABASE=DB;"
"UID=user;"
"PWD=pass")
engine = sa.create_engine("mssql+pyodbc:///?odbc_connect={}".format(params))
connection = engine.raw_connection()
try:
cursor = connection.cursor()
cursor.execute("{CALL stored_procedure_name}")
result = cursor.fetchall()
print(result)
connection.commit()
finally:
connection.close()
Finally solved my problem with the following function :
def execute_stored_procedure(engine, procedure_name):
res = {}
connection = engine.raw_connection()
try:
cursor = connection.cursor()
cursor.execute("EXEC "+procedure_name)
cursor.close()
connection.commit()
res['status'] = 'OK'
except Exception as e:
res['status'] = 'ERROR'
res['error'] = e
finally:
connection.close()
return res

Access tables from Impala through Python

I need to access tables from Impala through CLI using python on the same cloudera server
I have tried below code to establish the connection :
def query_impala(sql):
cursor = query_impala_cursor(sql)
result = cursor.fetchall()
field_names = [f[0] for f in cursor.description]
return result, field_names
def query_impala_cursor(sql, params=None):
conn = connect(host='xx.xx.xx.xx', port=21050, database='am_playbook',user='xxxxxxxx', password='xxxxxxxx')
cursor = conn.cursor()
cursor.execute(sql.encode('utf-8'), params)
return cursor
but since I am on the same cloudera server, I will not need to provide the host name. Could you please provide the correct code to access Impala/hive tables existing on the same server through python.
you can use pyhive to make connection to hive and get access to your hive tables.
from pyhive import hive
import pandas as pd
import datetime
conn = hive.Connection(host="hostname", port=10000, username="XXXX")
hive.connect('hostname', configuration={'hive.execution.engine':'tez'})
query="select col1,col2,col3,col4 from db.yourhiveTable"
start_time= datetime.datetime.now()
data=pd.read_sql(query,conn)
print(data)
end_time=datetime.datetime.now()
print 'Finished reading from Hive table', (start_time-end_time).seconds/60.0,' minutes'

Getting error on python while transferring data from SQL server to snowflake

I am getting below error
query = command % processed_params TypeError: not all arguments
converted during string formatting
I am trying to pull data from SQL server and then inserting it into Snowflake
my below code
import pyodbc
import sqlalchemy
import snowflake.connector
driver = 'SQL Server'
server = 'tanmay'
db1 = 'testing'
tcon = 'no'
uname = 'sa'
pword = '123'
cnxn = pyodbc.connect(driver='{SQL Server}',
host=server, database=db1, trusted_connection=tcon,
user=uname, password=pword)
cursor = cnxn.cursor()
cursor.execute("select * from Admin_tbldbbackupdetails")
rows = cursor.fetchall()
#for row in rows:
# #data = [(row[0], row[1],row[2], row[3],row[4], row[5],row[6], row[7])]
print (rows[0])
cnxn.commit()
cnxn.close()
connection = snowflake.connector.connect(user='****',password='****',account='*****')
cursor2 = connection.cursor()
cursor2.execute("USE WAREHOUSE FOOD_WH")
cursor2.execute("USE DATABASE Test")
sql1="INSERT INTO CN_RND.Admin_tbldbbackupdetails_ip"
"(id,dbname, dbpath, backupdate, backuptime, backupStatus, FaildMsg, Backupsource)"
"values (?,?,?,?,?,?,?,?)"
cursor2.execute(sql1,*rows[0])
It's obviously string parsing error.
You missed to provide parameter to %s printout.
If you cannot fix it step back and try another approach.
Use another script to achieve the same and get back to you bug tomorrow :-)
My script is doing pretty much the same:
1. Connect to SQL Server
-> fetchmany
-> multipart upload to s3
-> COPY INTO Snowflake table
Details are here: Snowpipe-for-SQLServer

Python to SQL Server Insert

I'm trying to follow the method for inserting a Panda data frame into SQL Server that is mentioned here as it appears to be the fastest way to import lots of rows.
However I am struggling with figuring out the connection parameter.
I am not using DSN , I have a server name, a database name, and using trusted connection (i.e. windows login).
import sqlalchemy
import urllib
server = 'MYServer'
db = 'MyDB'
cxn_str = "DRIVER={SQL Server Native Client 11.0};SERVER=" + server +",1433;DATABASE="+db+";Trusted_Connection='Yes'"
#cxn_str = "Trusted_Connection='Yes',Driver='{ODBC Driver 13 for SQL Server}',Server="+server+",Database="+db
params = urllib.parse.quote_plus(cxn_str)
engine = sqlalchemy.create_engine("mssql+pyodbc:///?odbc_connect=%s" % params)
conn = engine.connect().connection
cursor = conn.cursor()
I'm just not sure what the correct way to specify my connection string is. Any suggestions?
I have been working with pandas and SQL server for a while and the fastest way I found to insert a lot of data in a table was in this way:
You can create a temporary CSV using:
df.to_csv('new_file_name.csv', sep=',', encoding='utf-8')
Then use pyobdc and BULK INSERT Transact-SQL:
import pyodbc
conn = pyodbc.connect(DRIVER='{SQL Server}', Server='server_name', Database='Database_name', trusted_connection='yes')
cur = conn.cursor()
cur.execute("""BULK INSERT table_name
FROM 'C:\\Users\\folders path\\new_file_name.csv'
WITH
(
CODEPAGE = 'ACP',
FIRSTROW = 2,
FIELDTERMINATOR = ',',
ROWTERMINATOR = '\n'
)""")
conn.commit()
cur.close()
conn.close()
Then you can delete the file:
import os
os.remove('new_file_name.csv')
It was a second to charge a lot of data at once into SQL Server. I hope this gives you an idea.
Note: don't forget to have a field for the index. It was my mistake when I started to use this lol.
Connection string parameter values should not be enclosed in quotes so you should use Trusted_Connection=Yes instead of Trusted_Connection='Yes'.

SSH tunnel won't close after downloading SQL server tables from PythonAnywhere

I'm downloading the tables from my SQL server on PythonAnywhere.com using a ssh tunnel following their description. Using the following code everything works fine in terms of downloading the tables, but the code then hangs at tunnel.close(). Any suggestions on how to stop it from hanging?
from __future__ import print_function
from mysql.connector import connect as sql_connect
import sshtunnel
from sshtunnel import SSHTunnelForwarder
from copy import deepcopy
import cPickle as pickle
import os
import datetime
sshtunnel.SSH_TIMEOUT = 5.0
sshtunnel.TUNNEL_TIMEOUT = 5.0
remote_bind_address = ('{}.mysql.pythonanywhere-services.com'.format(SSH_USERNAME), 3306)
tunnel = SSHTunnelForwarder(('ssh.pythonanywhere.com'),
ssh_username=SSH_USERNAME, ssh_password=SSH_PASSWORD,
remote_bind_address=remote_bind_address)
tunnel.start()
connection = sql_connect(user=SSH_USERNAME, password=DATABASE_PASSWORD,
host='127.0.0.1', port=tunnel.local_bind_port,
database=DATABASE_NAME)
print("Connection successful!")
cursor = connection.cursor() # get the cursor
cursor.execute("USE {}".format(DATABASE_NAME)) # select the database
# fetch all tables
cursor.execute("SHOW TABLES")
tables = deepcopy(cursor.fetchall()) # return data from last query
for (table_name,) in tables:
if 'contribute' in table_name:
print(table_name)
# may hang
connection.close()
tunnel.close()

Categories

Resources