Currently i can use the following code to get a query from excel, run it and export the result to another excel. However, i cant setup a loop to take all the value from "Query" column and run them all, and save the result in the result without overwriting the result of the first query. so i need to use temp1 to take the first value from "Query"
dsn_tns = cx_Oracle.makedsn('xxxxxxxxxx', service_name='xxxxxx') # if needed, place an 'r' before any parameter in order to address special characters such as '\'.
conn = cx_Oracle.connect(user=r'xxxxxxx', password='xxxxxx', dsn=dsn_tns)
excel_data_df = pandas.read_excel('C:\\Python\Excel\sqlinput.xlsx', sheet_name='Sheet2')
sql1 = list(excel_data_df['Query'])
temp1=(sql1[1])
cur = conn.cursor()
cur.execute(temp1)
res1 = cur.fetchall()
res1=pd.DataFrame(res1)
writer=pd.ExcelWriter('C:\\Python\Excel\output.xlsx')
writer.save()
I would do something like this
dsn_tns = cx_Oracle.makedsn('xxxxxxxxxx', service_name='xxxxxx') # if needed, place an 'r' before any parameter in order to address special characters such as '\'.
conn = cx_Oracle.connect(user=r'xxxxxxx', password='xxxxxx', dsn=dsn_tns)
excel_data_df = pandas.read_excel('C:\\Python\Excel\sqlinput.xlsx', sheet_name='Sheet2')
sql1 = list(excel_data_df['Query'])
i=0
while i < len(sql1)
temp1=(sql1[i])
cur = conn.cursor()
cur.execute(temp1)
res1 = cur.fetchall()
res1=pd.DataFrame(res1)
writer=pd.ExcelWriter('C:\\Python\Excel\output.xlsx')
writer.save()
i += 1
Here one example in my case
import cx_Oracle
import pandas
import xlrd
connection = cx_Oracle.connect('myuser', 'mypassword', "mydns:myport/myservicename" , encoding= 'UTF-8' )
excel_data_df = pandas.read_excel('C:\\python\myqueries.xlsx', sheet_name='queries')
print(excel_data_df.columns.ravel())
print(excel_data_df['COLUM_QUERY'].tolist())
print(excel_data_df['ENABLE'].tolist())
sql = excel_data_df['COLUM_QUERY'].tolist()
i = 0
while i < len(sql):
cursor = connection.cursor()
stmn = (sql[i])
print(stmn)
cursor.execute(stmn)
while True:
row = cursor.fetchone()
if row is None:
break
print(row)
i += 1
Running my example where the queries are all the same
Does the following
C:\python>python mypython.py
['COLUM_QUERY' 'ENABLE']
['select 1,2 from dual', 'select 1,2 from dual', 'select 1,2 from dual']
[1, 1, 0]
select 1,2 from dual
(1, 2)
select 1,2 from dual
(1, 2)
select 1,2 from dual
(1, 2)
select 1,2 from dual
(1, 2)
select 1,2 from dual
(1, 2)
select 1,2 from dual
(1, 2)
C:\python>
Related
I have managed to update database with several databases located in specific directory like this:
path2 = r'C://Users//samor//Session//KOPIA BAZ'
paths = []
try:
for file in os.listdir(path2):
if file.endswith('.db'):
paths.append(file)
except:
print("Ścieżka nie istnieje")
df = pd.DataFrame(paths)
print(str(df.iloc[0, 0]))
db_conn = sqlite3.connect(os.path.join(path2, str(df.iloc[0, 0])))
c = db_conn.cursor()
c.executescript("""ATTACH DATABASE 'baza kalkulacji.db' AS other;
INSERT INTO other.baza_kalkulacji (Numer_Kalkulacji,Sciezka_Kalkulacji,Opis_Kalkulacji)
SELECT Numer_Kalkulacji,Sciezka_Kalkulacji,Opis_Kalkulacji
FROM baza_kalkulacji;""")
db_conn.commit()
db_conn.close()
db_conn = sqlite3.connect('baza kalkulacji.db')
c = db_conn.cursor()
c.execute("""
delete from baza_kalkulacji
where rowid not in (select min(rowid)
from baza_kalkulacji
group by Numer_Kalkulacji,Sciezka_Kalkulacji,Opis_Kalkulacji) """)
db_conn.commit()
db_conn.close()
But the problem is...the number of databases will grow in time so i should add copy of the above code hundreds of time while changing value of row in :
db_conn = sqlite3.connect(os.path.join(path2, str(df.iloc[0, 0])))
So the question is:
IS there any solution to add this for loop or something that will connect to next and next database that is in that directory and will keep on inserting these tables like i did in code? Like automatically changing that value for a row in df.iloc ????
I figure it out like this:
path2 = r'C://Users//samor//Session//KOPIA BAZ'
paths = []
try:
for file in os.listdir(path2):
if file.endswith('.db'):
paths.append(file)
except:
print("Ścieżka nie istnieje")
df = pd.DataFrame(paths)
# print(df)
# print(str(df.iloc[0, 0]))
index = df.index
number_of_rows = len(index)
print(number_of_rows)
for bazy in range(number_of_rows):
db_conn = sqlite3.connect(os.path.join(path2, str(df.iloc[bazy, 0])))
c = db_conn.cursor()
c.executescript("""ATTACH DATABASE 'baza kalkulacji.db' AS other;
INSERT INTO other.baza_kalkulacji (Numer_Kalkulacji,Sciezka_Kalkulacji,Opis_Kalkulacji)
SELECT Numer_Kalkulacji,Sciezka_Kalkulacji,Opis_Kalkulacji
FROM baza_kalkulacji;""")
c.execute("DETACH DATABASE 'other'")
db_conn.commit()
db_conn.close()
db_conn = sqlite3.connect('baza kalkulacji.db')
c = db_conn.cursor()
c.execute("""
delete from baza_kalkulacji
where rowid not in (select max(rowid)
from baza_kalkulacji
group by Numer_Kalkulacji) """)
db_conn.commit()
db_conn.close()
I am trying to create a view that contains a variable in Snowflake SQL. The whole thing is being done in Python script. Initially, I tried the binding variable approach but binding does not work in view creation SQL. Is there any other way I can proceed with this? I have given the code below.
Code:
import snowflake.connector as sf
import pandas
ctx = sf.connect (
user = 'floatinginthecloud89',
password = '',
account = 'nq13914.southeast-asia.azure',
warehouse = 'compute_wh',
database = 'util_db',
schema = 'public'
)
print("Got the context object")
cs = ctx.cursor()
print("Got the cursor object")
column1 = 'attr_name';
try:
row = cs.execute("select listagg(('''' || attr_name || ''''), ',') from util_db.public.TBL_DIM;")
rows = cs.fetchall()
for row in rows:
print(row)
print(rows)
row1 = cs.execute("""CREATE OR REPLACE table util_db.public.HIERARCHY_VIEW_2 AS SELECT * FROM (SELECT MSTR.PROD_CODE AS PROD_CODE,DIM.ATTR_NAME AS ATTR_NAME,MSTR.ATTR_VALUE AS ATTR_VALUE FROM TBL_DIM DIM INNER JOIN TBL_MSTR MSTR ON DIM.ATTR_KEY=MSTR.ATTR_KEY ) Q
PIVOT (MAX (Q.ATTR_VALUE) FOR Q.ATTR_NAME IN (*row))
AS P
ORDER BY P.PROD_CODE;""")
rows1 = cs.fetchall()
for row1 in rows1:
print(row1)
finally:
cs.close()
ctx.close()
Error:
File "C:\Users\Anand Singh\anaconda3\lib\site-packages\snowflake\connector\errors.py", line 179, in default_errorhandler
raise error_class(
ProgrammingError: 001003 (42000): SQL compilation error:
syntax error line 2 at position 65 unexpected 'row'.
Looking at the Python binding example
and your code it appears, you need
row1 = cs.execute("""CREATE OR REPLACE table util_db.public.HIERARCHY_VIEW_2 AS
SELECT * FROM (
SELECT MSTR.PROD_CODE AS PROD_CODE,DIM.ATTR_NAME AS ATTR_NAME,MSTR.ATTR_VALUE AS ATTR_VALUE
FROM TBL_DIM DIM
INNER JOIN TBL_MSTR MSTR
ON DIM.ATTR_KEY=MSTR.ATTR_KEY
) Q
PIVOT (MAX (Q.ATTR_VALUE) FOR Q.ATTR_NAME IN (%s))
AS P
ORDER BY P.PROD_CODE;""", row)
but *row will pass the many argugments to I have changed to build the string or comman seperated as a single string.
More pythonic way to implement this is using f-string
row1 = cs.execute(f"""CREATE OR REPLACE table util_db.public.HIERARCHY_VIEW_2 AS
SELECT * FROM (
SELECT MSTR.PROD_CODE AS PROD_CODE,DIM.ATTR_NAME AS ATTR_NAME,MSTR.ATTR_VALUE AS ATTR_VALUE
FROM TBL_DIM DIM
INNER JOIN TBL_MSTR MSTR
ON DIM.ATTR_KEY=MSTR.ATTR_KEY
) Q
PIVOT (MAX (Q.ATTR_VALUE) FOR Q.ATTR_NAME IN ({row}))
AS P
ORDER BY P.PROD_CODE;""")
It is also more readable especially if you have multiple parameters in the f-string
Issue resolved! Thanks a lot, Simeon for your help.
import snowflake.connector as sf
import pandas
ctx = sf.connect (
user = 'floatinginthecloud89',
password = 'AzureSn0flake#123',
account = 'nq13914.southeast-asia.azure',
warehouse = 'compute_wh',
database = 'util_db',
schema = 'public'
)
print("Got the context object")
cs = ctx.cursor()
print("Got the cursor object")
column1 = 'attr_name';
try:
row = cs.execute("select listagg(('''' || attr_name || ''''), ',') from util_db.public.TBL_DIM;")
rows = cs.fetchall()
for row in rows:
print(row)
print(rows)
row1 = cs.execute("""CREATE OR REPLACE table util_db.public.HIERARCHY_VIEW_2 AS
SELECT * FROM (
SELECT MSTR.PROD_CODE AS PROD_CODE,DIM.ATTR_NAME AS ATTR_NAME,MSTR.ATTR_VALUE AS ATTR_VALUE
FROM TBL_DIM DIM
INNER JOIN TBL_MSTR MSTR
ON DIM.ATTR_KEY=MSTR.ATTR_KEY
) Q
PIVOT (MAX (Q.ATTR_VALUE) FOR Q.ATTR_NAME IN (%s))
AS P
ORDER BY P.PROD_CODE;""", ','.join(row))
rows1 = cs.fetchall()
for row1 in rows1:
print(row1)
So currently when I execute SELECT query and retrieve data I have to get results like this:
connection = psycopg2.connect(user="admin",
password="admin",
host="127.0.0.1",
port="5432",
database="postgres_db")
cursor = connection.cursor()
cursor.execute("SELECT * FROM user")
users = cursor.fetchall()
for row in users:
print(row[0])
print(row[1])
print(row[2])
What I want to do is, use column names instead of integers, like this:
for row in users:
print(row["id"])
print(row["first_name"])
print(row["last_name"])
Is this possible, and if it is, then how to do it?
You need to use RealDictCursor, then you can access the results like a dictionary:
import psycopg2
from psycopg2.extras import RealDictCursor
connection = psycopg2.connect(user="...",
password="...",
host="...",
port="...",
database="...",
cursor_factory=RealDictCursor)
cursor = connection.cursor()
cursor.execute("SELECT * FROM user")
users = cursor.fetchall()
print(users)
print(users[0]['user'])
Output:
[RealDictRow([('user', 'dbAdmin')])]
dbAdmin
no need to call fetchall() method, the psycopg2 cursor is an iterable object you can directly do:
cursor.execute("SELECT * FROM user")
for buff in cursor:
row = {}
c = 0
for col in cursor.description:
row.update({str(col[0]): buff[c]})
c += 1
print(row["id"])
print(row["first_name"])
print(row["last_name"])
When I want to make ETL from SQL Server to Oracle I use the code below. Actually i used process and thread but my code throws error.
After execute the error is cx_Oracle.NotSupportedError: Python value of type pyodbc.Row not supported.
Can you help me ?
import cx_Oracle
import sys
import pyodbc
BYTES_PER_BATCH = 1024 *256
# Oracle connection
connection_12c=cx_Oracle.connect()
cur_12c = connection_12c.cursor()
# SQL Server Connection
conn_str = (
'DRIVER={driver;'
'SERVER=IP:PORT;'
'DATABASE=DBNAME;'
'UID=user;'
'PWD=pass'
)
cnxn = pyodbc.connect(conn_str)
cur = cnxn.cursor()
query = """SELECT Id, OpTime, GidenData , Action FROM table """
cur.execute(query)
# Id number
# Optime Datetime
# gidendata clob
cur_batch = []
cur_size = 0
for result in cur.fetchall():
cur_batch.append(result)
cur_size += sys.getsizeof(result)
if cur_size > BYTES_PER_BATCH:
cur_12c.execute("""insert into oracle_table (Id,OpTime,GidenData,Action) values (: 1 ,: 2 ,: 3 ,: 4 ) """,cur_batch)
cur_batch = []
cur_size = 0
In your code you are creating a batch of rows, but then calling cursor.execute() which only inserts a single row! Since Oracle also supports PL/SQL with arrays of data, cx_Oracle is assuming that you are passing an array of pyodbc.Row objects, which it doesn't know how to deal with. Instead, you simply need to change cur_12c.execute() to cur_12c.executemany(). Then cx_Oracle will process an array of rows -- which is I believe what you want.
import cx_Oracle
import sys
import pyodbc
BYTES_PER_BATCH = 1024 *256
# Oracle connection
connection_12c=cx_Oracle.connect()
cur_12c = connection_12c.cursor()
# SQL Server Connection
conn_str = (
'DRIVER={driver;'
'SERVER=IP:PORT;'
'DATABASE=DBNAME;'
'UID=user;'
'PWD=pass'
)
cnxn = pyodbc.connect(conn_str)
cur = cnxn.cursor()
query = """SELECT Id, OpTime, GidenData , Action FROM table """
cur.execute(query)
# Id number
# Optime Datetime
# gidendata clob
cur_batch = []
cur_size = 0
for result in cur.fetchall():
cur_batch.append(result)
cur_size += sys.getsizeof(result)
if cur_size > BYTES_PER_BATCH:
cur_12c.executemany("""insert into oracle_table (Id,OpTime,GidenData,Action) values (: 1 ,: 2 ,: 3 ,: 4 ) """,cur_batch)
cur_batch = []
cur_size = 0
Writing a script to clean up some data. Super unoptimized but this cursor is
returning the number of results in the like query rather than the rows what am I doing wrong.
#!/usr/bin/python
import re
import MySQLdb
import collections
db = MySQLdb.connect(host="localhost", # your host, usually localhost
user="admin", # your username
passwd="", # your password
db="test") # name of the data base
# you must create a Cursor object. It will let
# you execute all the query you need
cur = db.cursor()
# Use all the SQL you like
cur.execute("SELECT * FROM vendor")
seen = []
# print all the first cell of all the rows
for row in cur.fetchall() :
for word in row[1].split(' '):
seen.append(word)
_digits = re.compile('\d')
def contains_digits(d):
return bool(_digits.search(d))
count_word = collections.Counter(seen)
found_multi = [i for i in count_word if count_word[i] > 1 and not contains_digits(i) and len(i) > 1]
unique_multiples = list(found_multi)
groups = dict()
for word in unique_multiples:
like_str = '%' + word + '%'
res = cur.execute("""SELECT * FROM vendor where name like %s""", like_str)
You are storing the result of cur.execute(), which is the number of rows. You are never actually fetching any of the results.
Use .fetchall() to get all result rows or iterate over the cursor after executing:
for word in unique_multiples:
like_str = '%' + word + '%'
cur.execute("""SELECT * FROM vendor where name like %s""", like_str)
for row in cur:
print row