Using variable in Snowflake SQL in Python script - python

I am trying to create a view that contains a variable in Snowflake SQL. The whole thing is being done in Python script. Initially, I tried the binding variable approach but binding does not work in view creation SQL. Is there any other way I can proceed with this? I have given the code below.
Code:
import snowflake.connector as sf
import pandas
ctx = sf.connect (
user = 'floatinginthecloud89',
password = '',
account = 'nq13914.southeast-asia.azure',
warehouse = 'compute_wh',
database = 'util_db',
schema = 'public'
)
print("Got the context object")
cs = ctx.cursor()
print("Got the cursor object")
column1 = 'attr_name';
try:
row = cs.execute("select listagg(('''' || attr_name || ''''), ',') from util_db.public.TBL_DIM;")
rows = cs.fetchall()
for row in rows:
print(row)
print(rows)
row1 = cs.execute("""CREATE OR REPLACE table util_db.public.HIERARCHY_VIEW_2 AS SELECT * FROM (SELECT MSTR.PROD_CODE AS PROD_CODE,DIM.ATTR_NAME AS ATTR_NAME,MSTR.ATTR_VALUE AS ATTR_VALUE FROM TBL_DIM DIM INNER JOIN TBL_MSTR MSTR ON DIM.ATTR_KEY=MSTR.ATTR_KEY ) Q
PIVOT (MAX (Q.ATTR_VALUE) FOR Q.ATTR_NAME IN (*row))
AS P
ORDER BY P.PROD_CODE;""")
rows1 = cs.fetchall()
for row1 in rows1:
print(row1)
finally:
cs.close()
ctx.close()
Error:
File "C:\Users\Anand Singh\anaconda3\lib\site-packages\snowflake\connector\errors.py", line 179, in default_errorhandler
raise error_class(
ProgrammingError: 001003 (42000): SQL compilation error:
syntax error line 2 at position 65 unexpected 'row'.

Looking at the Python binding example
and your code it appears, you need
row1 = cs.execute("""CREATE OR REPLACE table util_db.public.HIERARCHY_VIEW_2 AS
SELECT * FROM (
SELECT MSTR.PROD_CODE AS PROD_CODE,DIM.ATTR_NAME AS ATTR_NAME,MSTR.ATTR_VALUE AS ATTR_VALUE
FROM TBL_DIM DIM
INNER JOIN TBL_MSTR MSTR
ON DIM.ATTR_KEY=MSTR.ATTR_KEY
) Q
PIVOT (MAX (Q.ATTR_VALUE) FOR Q.ATTR_NAME IN (%s))
AS P
ORDER BY P.PROD_CODE;""", row)
but *row will pass the many argugments to I have changed to build the string or comman seperated as a single string.

More pythonic way to implement this is using f-string
row1 = cs.execute(f"""CREATE OR REPLACE table util_db.public.HIERARCHY_VIEW_2 AS
SELECT * FROM (
SELECT MSTR.PROD_CODE AS PROD_CODE,DIM.ATTR_NAME AS ATTR_NAME,MSTR.ATTR_VALUE AS ATTR_VALUE
FROM TBL_DIM DIM
INNER JOIN TBL_MSTR MSTR
ON DIM.ATTR_KEY=MSTR.ATTR_KEY
) Q
PIVOT (MAX (Q.ATTR_VALUE) FOR Q.ATTR_NAME IN ({row}))
AS P
ORDER BY P.PROD_CODE;""")
It is also more readable especially if you have multiple parameters in the f-string

Issue resolved! Thanks a lot, Simeon for your help.
import snowflake.connector as sf
import pandas
ctx = sf.connect (
user = 'floatinginthecloud89',
password = 'AzureSn0flake#123',
account = 'nq13914.southeast-asia.azure',
warehouse = 'compute_wh',
database = 'util_db',
schema = 'public'
)
print("Got the context object")
cs = ctx.cursor()
print("Got the cursor object")
column1 = 'attr_name';
try:
row = cs.execute("select listagg(('''' || attr_name || ''''), ',') from util_db.public.TBL_DIM;")
rows = cs.fetchall()
for row in rows:
print(row)
print(rows)
row1 = cs.execute("""CREATE OR REPLACE table util_db.public.HIERARCHY_VIEW_2 AS
SELECT * FROM (
SELECT MSTR.PROD_CODE AS PROD_CODE,DIM.ATTR_NAME AS ATTR_NAME,MSTR.ATTR_VALUE AS ATTR_VALUE
FROM TBL_DIM DIM
INNER JOIN TBL_MSTR MSTR
ON DIM.ATTR_KEY=MSTR.ATTR_KEY
) Q
PIVOT (MAX (Q.ATTR_VALUE) FOR Q.ATTR_NAME IN (%s))
AS P
ORDER BY P.PROD_CODE;""", ','.join(row))
rows1 = cs.fetchall()
for row1 in rows1:
print(row1)

Related

Does pandas support reading data from multiple tables into a dataframe?

I'm using pandas to read SQLl output into a dataframe. I'm calling a stored procedure which returns a table output. Following code works fine.If my stored procedure return more than one table outputs[1], How can I read those from dataframe. I want to write different table outputs into different excel sheets
query='exec [aa].[dbo].[sp_cc]?,?'
df = pd.read_sql(query, cnxn, params=[start,end)
writer = pd.ExcelWriter('output.xlsx')
df.to_excel(writer, index=False, sheet_name='customers')
writer.save()
[1]
CREATE procedure [dbo].[usp_vvvv] (....)
BEGIN
SET NOCOUNT ON
.....
select *
FROM #_temp_client_details
select *
FROM #_temp_address_details
select *
FROM #_temp_invoice_details
drop table #_temp_client_details
drop table #_temp_address_details
drop table #_temp_invoice_details
....
END TRY
BEGIN CATCH
..
END CATCH
END
I hope this can help you :
import pandas as pd
import pyodbc
conn = pyodbc.connect('driver={SQL Server};server=xxx.xxx.x.xxx;uid=myuser;pwd=mypass;database=mybd;autocommit=True')
cursor = conn.cursor()
cursor.execute('exec usp_with_2_select')
writer = pd.ExcelWriter('pandas_simple.xlsx', engine='xlsxwriter')
column_names = [col[0] for col in cursor.description]
df1_data = []
for row in cursor.fetchall():
df1_data.append({name: row[i] for i, name in enumerate(column_names)})
df1 = pd.DataFrame(df1_data)
print(df1)
df1.to_excel(writer,'sheet1')
# this for pass the next result
cursor.nextset ()
df2_data = []
for row in cursor.fetchall():
df2_data.append({name: row[i] for i, name in enumerate(column_names)})
df2 = pd.DataFrame(df2_data)
print(df2)
df2.to_excel(writer,'sheet2')
writer.save()
Why do you need Pandas for this? You can go from SQL Server directly to Excel many different ways. Here is one concept that will work for you. There are many ways to skin this cat...
Sub ADOExcelSQLServer()
' Carl SQL Server Connection
'
' FOR THIS CODE TO WORK
' In VBE you need to go Tools References and check Microsoft Active X Data Objects 2.x library
'
Dim Cn As ADODB.Connection
Dim Server_Name As String
Dim Database_Name As String
Dim User_ID As String
Dim Password As String
Dim SQLStr As String
Dim rs As ADODB.Recordset
Set rs = New ADODB.Recordset
Server_Name = "your_server_name" ' Enter your server name here
Database_Name = "NORTHWND" ' Enter your database name here
User_ID = "" ' enter your user ID here
Password = "" ' Enter your password here
SQLStr = "SELECT * FROM [Customers]" ' Enter your SQL here
Set Cn = New ADODB.Connection
Cn.Open "Driver={SQL Server};Server=" & Server_Name & ";Database=" & Database_Name & _
";Uid=" & User_ID & ";Pwd=" & Password & ";"
rs.Open SQLStr, Cn, adOpenStatic
' Dump to spreadsheet
For iCols = 0 To rs.Fields.Count - 1
Worksheets("Sheet1").Cells(1, iCols + 1).Value = rs.Fields(iCols).Name
Next
With Worksheets("sheet1").Range("a2:z500") ' Enter your sheet name and range here
'.ClearContents
.CopyFromRecordset rs
End With
' Tidy up
rs.Close
Set rs = Nothing
Cn.Close
Set Cn = Nothing
End Sub

Python code not creating tables on the database but able to query the results postgres

My usecase is to write create a temp table in the postgres database and fetch records from it and insert into a different table.
The code i used is:
import psycopg2
import sys
import pprint
from __future__ import print_function
from os.path import join,dirname,abspath
import xlrd
import os.path
newlist = []
itemidlist = []
def main():
conn_string = "host='prod-dump.cvv9i14mrv4k.us-east-1.rds.amazonaws.com' dbname='ebdb' user='ebroot' password='*********'"
# print the connection string we will use to connect
# print "Connecting to database" % (conn_string)
# get a connection, if a connect cannot be made an exception will be raised here
conn = psycopg2.connect(conn_string)
# conn.cursor will return a cursor object, you can use this cursor to perform queries
cursor = conn.cursor()
dealer_id = input("Please enter dealer_id: ")
group_id = input("Please enter group_id: ")
scriptpath = os.path.dirname('__file__')
filename = os.path.join(scriptpath, 'Winco - Gusti.xlsx')
xl_workbook = xlrd.open_workbook(filename, "rb")
xl_sheet = xl_workbook.sheet_by_index(0)
print('Sheet Name: %s' % xl_sheet.name)
row=xl_sheet.row(0)
from xlrd.sheet import ctype_text
print('(Column #) type:value')
for idx, cell_obj in enumerate(row):
cell_type_str = ctype_text.get(cell_obj.ctype, 'unknown type')
#print('(%s) %s %s' % (idx, cell_type_str, cell_obj.value))
num_cols = xl_sheet.ncols
for row_idx in range(0, xl_sheet.nrows): # Iterate through rows
num_cols = xl_sheet.ncols
id_obj = xl_sheet.cell(row_idx, 1) # Get cell object by row, col
itemid = id_obj.value
#if itemid not in itemidlist:
itemidlist.append(itemid)
# execute our Query
'''
cursor.execute("""
if not exists(SELECT 1 FROM model_enable AS c WHERE c.name = %s);
BEGIN;
INSERT INTO model_enable (name) VALUES (%s)
END;
""" %(itemid,itemid))
'''
cursor.execute("drop table temp_mbp1")
try:
cursor.execute("SELECT p.model_no, pc.id as PCid, g.id AS GROUPid into public.temp_mbp1 FROM products p, \
model_enable me, products_clients pc, groups g WHERE p.model_no = me.name \
and p.id = pc.product_id and pc.client_id = %s and pc.client_id = g.client_id and g.id = %s"\
% (dealer_id,group_id)
except (Exception, psycopg2.DatabaseError) as error:
print(error)
cursor.execute("select count(*) from public.temp_mbp1")
# retrieve the records from the database
records = cursor.fetchall()
# print out the records using pretty print
# note that the NAMES of the columns are not shown, instead just indexes.
# for most people this isn't very useful so we'll show you how to return
# columns as a dictionary (hash) in the next example.
pprint.pprint(records)
if __name__ == "__main__":
main()
The try except block in between the program is not throwing any error but the table is not getting created in the postgres database as i see in the data admin.
The output shown is:
Please enter dealer_id: 90
Please enter group_id: 13
Sheet Name: Winco Full 8_15_17
(Column #) type:value
[(3263,)]
Thanks,
Santosh
You didn't commit the changes, so they aren't saved in the database. Add to the bottom, just below the pprint statement:
conn.commit()

Loop not working for sql update statement (mysqldb)

I have a folder called 'testfolder' that includes two files -- 'Sigurdlogfile' and '2004ADlogfile'. Each file has a list of strings called entries. I need to run my code on both of them and am using glob to do this. My code creates a dictionary for each file and stores data extracted using regex where the dictionary keys are stored in commonterms below. Then it inserts each dictionary into a mysql table. It does all of this successfully, but my second sql statement is not inserting how it should (per file).
import glob
import re
files = glob.glob('/home/user/testfolder/*logfile*')
commonterms = (["freq", "\s?(\d+e?\d*)\s?"],
["tx", "#txpattern"],
["rx", "#rxpattern"], ...)
terms = [commonterms[i][0] for i in range(len(commonterms))]
patterns = [commonterms[i][1] for i in range(len(commonterms))]
def getTerms(entry):
for i in range(len(terms)):
term = re.search(patterns[i], entry)
if term:
term = term.groups()[0] if term.groups()[0] is not None else term.groups()[1]
else:
term = 'NULL'
d[terms[i]] += [term]
return d
for filename in files:
#code to create 'entries'
objkey = re.match(r'/home/user/testfolder/(.+?)logfile', filename).group(1)
d = {t: [] for t in terms}
for entry in entries:
d = getTerms(entry)
import MySQLdb
db = MySQLdb.connect(host='', user='', passwd='', db='')
cursor = db.cursor()
cols = d.keys()
vals = d.values()
for i in range(len(entries)):
lst = [item[i] for item in vals]
csv = "'{}'".format("','".join(lst))
sql1 = "INSERT INTO table (%s) VALUES (%s);" % (','.join(cols), csv.replace("'NULL'", "NULL"))
cursor.execute(sql1)
#now in my 2nd sql statement I need to update the table with data from an old table, which is where I have the problem...
sql2 = "UPDATE table, oldtable SET table.key1 = oldtable.key1,
table.key2 = oldtable.key2 WHERE oldtable.obj = %s;" % repr(objkey)
cursor.execute(sql2)
db.commit()
db.close()
The problem is that in the second sql statement, it ends up inserting that data into all columns of the table from only one of the objkeys, but I need it to insert different data depending on which file the code is currently running on. I can't figure out why this is, since I've defined objkey inside my for filename in files loop. How can I fix this?
Instead of doing separate INSERT and UPDATE, do them together to incorporate the fields from the old table.
for i in range(len(entries)):
lst = [item[i] for item in vals]
csv = "'{}'".format("','".join(lst))
sql1 = """INSERT INTO table (key1, key2, %s)
SELECT o.key1, o.key2, a.*
FROM (SELECT %s) AS a
LEFT JOIN oldtable AS o ON o.obj = %s""" % (','.join(cols), csv.replace("'NULL'", "NULL"), repr(objkey))
cursor.execute(sql1)

Postgresql: how to copy multiple columns from one table to another?

I am trying to copy some columns from a table called temporarytable to another one called scalingData using psycopg2 in python.
scalingData is a pandas dataframe. The dataframe contains data from cities such as: nameOfCities, population, etc.
scalingData = pd.read_csv('myFile.csv') ## 'myFile.csv' is the datasource
each column of the dataframe has a different kind of data, such as 'int64', 'float64' or 'O'.
Here a screen shot from Jupyter
import psycopg2 as ps
## Populate table scalingData
tmp = scalingData.dtypes
con = None
con = ps.connect(dbname = 'mydb', user='postgres', host='localhost', password='mypd')
con.autocommit = True
cur = con.cursor()
for i in range(0,5):
j = header[i]
stat = """ ALTER TABLE "scalingData" ADD COLUMN "%s" """%j
if tmp[i] == 'int64':
stat = stat+'bigint'
if tmp[i] == 'float64':
stat = stat+'double precision'
if tmp[i] == 'O':
stat = stat+'text'
### Add Column
cur.execute(stat)
stat1 = """INSERT INTO "scalingData" ("%s") SELECT "%s" FROM temporarytable"""%(j,j)
### Copy Column
cur.execute(stat1)
cur.close()
con.close()
My problem is that if I look at scalingData only the first column is copied while the others are empty.
Here a screenshot of the table from pgAdmin afer the query:
Also if I copy for instance the second column as first column it works, but then it fails with the others as well.
This happens because you add 1 field to your new table, than insert data only with that field set up, and you do it 5 times. So you should actually see 5 copies of your original table with only 1 field set up.
You need to first set up the structure for your scalingData table, then insert all the records with all fields.
Here is the code (not a Python developer):
import psycopg2 as ps
## Populate table scalingData
tmp = scalingData.dtypes
con = None
con = ps.connect(dbname = 'mydb', user='postgres', host='localhost', password='mypd')
con.autocommit = True
cur = con.cursor()
for i in range(0,5):
j = header[i]
stat = """ ALTER TABLE "scalingData" ADD COLUMN "%s" """%j
if tmp[i] == 'int64':
stat = stat+'bigint'
if tmp[i] == 'float64':
stat = stat+'double precision'
if tmp[i] == 'O':
stat = stat+'text'
### Add Column
cur.execute(stat)
fieldsStr = '"' + '", "'.join([header]) + '"' ### will return "header1", "header2", ... , "header5"
stat1 = """INSERT INTO "scalingData" (%s) SELECT %s FROM temporarytable"""%(fieldsStr,fieldsStr)
### Copy Table
cur.execute(stat1)
cur.close()
con.close()
I'm not familiar with Python, but just a guess as to where the issue might be coming from:
"""INSERT INTO "scalingData" ("%s") SELECT "%s" FROM temporarytable"""
... will transform the "%s" bit into "foo, bar, baz" rather than "foo", "bar", "baz".
Put another way you should remove the unneeded double quotes in your statement and escape the individual column names instead.
Double quotes are used in PG to quote identifiers. You can literally have an table or column called "foo, bar, baz" and PG will work just fine when you do - provided it's always in-between double quotes when you use it in a statement.

how to insert variables in read_sql_query using python

I am trying to retrieve data from sqlite3 with the help of variables. It is working fine with execute() statement but i would like to retrieve columns also and for that purpose i am using read_sql_query() but i am unable to pass variables in read_sql_query(), please follow below code:
def cal():
tab = ['LCOLOutput']
column_name = 'CUSTOMER_EMAIL_ID'
xyz = '**AVarma1#ra.rockwell.com'
for index, m in enumerate(tab):
table_name = m
sq = "SELECT * FROM ? where ?=?;" , (table_name, column_name, xyz,)
df = pandas.read_sql_query(sq,conn)
writer =
pandas.ExcelWriter('D:\pandas_simple.xlsx',engine='xlsxwriter')
df.to_excel(writer, sheet_name='Sheet1')
writer.save()
You need to change the syntax with the method read_sql_query() from pandas, check the doc.
For sqlite, it should work with :
sq = "SELECT * FROM ? where ?=?;"
param = (table_name, column_name, xyz,)
df = pandas.read_sql_query(sq,conn, params=param)
EDIT :
otherwise try with the following formatting for the table :
sq = "SELECT * FROM {} where ?=?;".format(table_name)
param = (column_name, xyz,)
df = pandas.read_sql_query(sq,conn, params=param)
Check this answer explaining why table cannot be passed as parameter directly.

Categories

Resources