I am working with the following code template in Python (using Atom to build/write).
import pyodbc
import pandas as pd
import win32com.client
cnxn = pyodbc.connect('Trusted_Connection=yes', driver = '{SQL
Server}',server ='prodserver', database = 'XXXX')
cnxn.setdecoding(pyodbc.SQL_WCHAR, encoding='utf-8')
cnxn.setencoding(str, encoding='utf-8')
cnxn.setencoding(unicode, encoding='utf-8', ctype=pyodbc.SQL_CHAR)
cursor = cnxn.cursor()
script ="""SELECT AccountsCount.AccountClass, COUNT(*) as Count
FROM
(SELECT *
FROM XXXX.dbo.table
where SubNo='001'
AND (DATENAME(WEEKDAY, GETDATE()) = 'Sunday' AND
convert(date,AddDate) = DATEADD(DAY, -2, CAST(GETDATE() as DATE))
) OR
(DATENAME(WEEKDAY, GETDATE()) = 'Monday' AND
convert(date,AddDate) = DATEADD(DAY, -3, CAST(GETDATE() as DATE))
) OR
(DATENAME(WEEKDAY, GETDATE()) = 'Sunday' AND
convert(date,AddDate) = DATEADD(DAY, -2, CAST(GETDATE() as DATE))
) OR
(DATENAME(WEEKDAY, GETDATE()) NOT IN ('Sunday', 'Monday') AND
convert(date,AddDate) = DATEADD(DAY, -1, CAST(GETDATE() as DATE))
)) AS AccountsCount
Group by AccountsCount.AccountClass
"""
df = pd.read_sql_query(script,cnxn)
writer = pd.ExcelWriter ('ExcelFile.xlsx')
df.to_excel(writer, sheet_name = 'Data Export')
writer.save()
xlApp = win32com.client.DispatchEx('Excel.Application')
xlsPath = ('OtherExcelFile.xlsm')
wb = xlApp.Workbooks.Open(Filename=xlsPath)
xlApp.Run('CopyIntoOutlook')
wb.Save()
xlApp.Quit()
All I need to do is add a second and completely separate SQL command to this script which runs absolutely flawlessly and does what I need it to do as is above. My additional script is something like this
script= """ select AccountClass, COUNT(*) as Count
FROM XXXX.dbo.table
where SubNo='001'
AND AddDate >= '1/1/2017'
Group by AccountClass """
I have had no luck with anything I've tried as far as adding into the script, any help is greatly apprecaited! You'll notice the second script is using the same DB and table as the original, I just need YTD data as well as the top query which is looking at one day previous.
update:
I was able to figure this out, wrote in the following under write.save()
script2= """
select AccountClass, COUNT(*) as Count
FROM XXXX.dbo.table
where SubNo='001'
AND AddDate >= '1/1/2017'
Group by AccountClass
"""
df2 = pd.read_sql_query(script2,cnxn)
writer = pd.ExcelWriter ('ExcelFile.xlsx')
df.to_excel(writer, sheet_name = 'Data Export')
writer.save()
Related
Here is my code.
In the excel file, it should have 4 tables, including date = 1/3,1/7,1/14,1/21.
I have run the query, it showed 4 results.
However, when I wrote to the excel, the file only has one table, which was date = 1/3.
I'm wondering how can I correct this, thanks!
import pyodbc
import pandas as pd
from datetime import datetime,timedelta
cnxn = pyodbc.connect('DRIVER=xx; SERVER=xx; DATABASE=xx; UID=xx; PWD=xx')
cursor = cnxn.cursor()
query="""
declare #START_ORDATE DATETIME
declare #END_ORDATE DATETIME
set #START_ORDATE ='2022-01-03 00:00:00:000'
set #END_ORDATE ='2022-01-24 00:00:00:000'
WHILE #START_ORDATE<=#END_ORDATE
BEGIN
select xx,xx,xx...
set #START_ORDATE = #START_ORDATE + 7
END
"""
df = pd.read_sql_query(query, cnxn)
writer = pd.ExcelWriter('test1.xlsx')
df.to_excel(writer, sheet_name='test000')
writer.save()
I solved it! Put while outside the SQL query, as the following.
import pyodbc
import pandas as pd
from datetime import datetime,timedelta
cnxn = pyodbc.connect('DRIVER=xx; SERVER=xx; DATABASE=xx; UID=xx; PWD=xx;MARS_Connection=yes')
cursor = cnxn.cursor()
start = datetime.strptime('2022-01-03','%Y-%m-%d')
end = datetime.strptime('2022-10-24','%Y-%m-%d')
query="""
...
"""
while start<=end:
cursor.execute(query,(start,start,start+timedelta(days=6)))
df = pd.DataFrame.from_records(cursor.fetchall(),columns=[desc[0] for desc in cursor.description])
start = start + timedelta(days=7)
You made need to use a for loop on the actual object to loop through what was returned. Let me know if this helps.
writer = pd.ExcelWriter('test1.xlsx')
for x in df:
df.to_excel(writer, sheet_name='test000')
writer.save()
Currently i can use the following code to get a query from excel, run it and export the result to another excel. However, i cant setup a loop to take all the value from "Query" column and run them all, and save the result in the result without overwriting the result of the first query. so i need to use temp1 to take the first value from "Query"
dsn_tns = cx_Oracle.makedsn('xxxxxxxxxx', service_name='xxxxxx') # if needed, place an 'r' before any parameter in order to address special characters such as '\'.
conn = cx_Oracle.connect(user=r'xxxxxxx', password='xxxxxx', dsn=dsn_tns)
excel_data_df = pandas.read_excel('C:\\Python\Excel\sqlinput.xlsx', sheet_name='Sheet2')
sql1 = list(excel_data_df['Query'])
temp1=(sql1[1])
cur = conn.cursor()
cur.execute(temp1)
res1 = cur.fetchall()
res1=pd.DataFrame(res1)
writer=pd.ExcelWriter('C:\\Python\Excel\output.xlsx')
writer.save()
I would do something like this
dsn_tns = cx_Oracle.makedsn('xxxxxxxxxx', service_name='xxxxxx') # if needed, place an 'r' before any parameter in order to address special characters such as '\'.
conn = cx_Oracle.connect(user=r'xxxxxxx', password='xxxxxx', dsn=dsn_tns)
excel_data_df = pandas.read_excel('C:\\Python\Excel\sqlinput.xlsx', sheet_name='Sheet2')
sql1 = list(excel_data_df['Query'])
i=0
while i < len(sql1)
temp1=(sql1[i])
cur = conn.cursor()
cur.execute(temp1)
res1 = cur.fetchall()
res1=pd.DataFrame(res1)
writer=pd.ExcelWriter('C:\\Python\Excel\output.xlsx')
writer.save()
i += 1
Here one example in my case
import cx_Oracle
import pandas
import xlrd
connection = cx_Oracle.connect('myuser', 'mypassword', "mydns:myport/myservicename" , encoding= 'UTF-8' )
excel_data_df = pandas.read_excel('C:\\python\myqueries.xlsx', sheet_name='queries')
print(excel_data_df.columns.ravel())
print(excel_data_df['COLUM_QUERY'].tolist())
print(excel_data_df['ENABLE'].tolist())
sql = excel_data_df['COLUM_QUERY'].tolist()
i = 0
while i < len(sql):
cursor = connection.cursor()
stmn = (sql[i])
print(stmn)
cursor.execute(stmn)
while True:
row = cursor.fetchone()
if row is None:
break
print(row)
i += 1
Running my example where the queries are all the same
Does the following
C:\python>python mypython.py
['COLUM_QUERY' 'ENABLE']
['select 1,2 from dual', 'select 1,2 from dual', 'select 1,2 from dual']
[1, 1, 0]
select 1,2 from dual
(1, 2)
select 1,2 from dual
(1, 2)
select 1,2 from dual
(1, 2)
select 1,2 from dual
(1, 2)
select 1,2 from dual
(1, 2)
select 1,2 from dual
(1, 2)
C:\python>
I am running redshift query which is having 40 millions of record. But when I am saving into csv file it is showing only 7 thousands of record. Could you please help me how to solve this?
Example:
Code:
conn = gcso_conn1()
with conn.cursor() as cur:
query = "select * from (select a.src_nm Source_System ,b.day_id Date,b.qty Market_Volume,b.cntng_unt Volume_Units,b.sls_in_lcl_crncy Market_Value,b.crncy_cd Value_Currency,a.panel Sales_Channel,a.cmpny Competitor_Name,a.lcl_mnfcr Local_Manufacturer ,a.src_systm_id SKU_PackID_ProductNumber,upper(a.mol_list) Molecule_Name,a.brnd_nm BrandName_Intl,a.lcl_prod_nm BrandName_Local,d.atc3_desc Brand_Indication,a.prsd_strngth_1_nbr Strength,a.prsd_strngth_1_unt Strength_Units,a.pck_desc Pack_Size_Number,a.prod_nm Product_Description,c.iso3_cntry_cd Country_ISO_Code,c.cntry_nm Country_Name from gcso_prd_cpy.dim_prod a join gcso_prd_cpy.fct_sales b on (a.SRC_NM='IMS' and b.SRC_NM='IMS' and a.prod_id = b.prod_id) join gcso_prd_cpy.dim_cntry c on (a.cntry_id = c.cntry_id) left outer join gcso_prd_cpy.dim_thrc_area d on (a.prod_id = d.prod_id) WHERE a.SRC_NM='IMS' and c.iso3_cntry_cd in ('JPN','IND','CAN','USA') and upper(a.mol_list) in ('AMBRISENTAN', 'BERAPROST','BOSENTAN') ORDER BY b.day_id ) a"
#print(query)
cur.execute(query)
result = cur.fetchall()
conn.commit()
column = [i[0] for i in cur.description]
sqldf = pd.DataFrame(result, columns= column)
print(sqldf.count())
#print(df3)
sqldf.to_csv(Output_Path, index= False, sep= '\001', encoding = 'utf-8')
Everything should work correctly. I think the main problem is debugging using count(). You expect number of records but docs says:
Count non-NA cells for each column or row.
Better to use when debugging DataFrame:
print(len(df))
print(df.shape)
print(df.info())
Also you can do it easier using read_sql:
import pandas as pd
from sqlalchemy import create_engine
header = True
for chunk in pd.read_sql(
'your query here - SELECT * FROM... ',
con=create_engine('creds', echo=True), # set creds - postgres+psycopg2://user:password#host:5432/db_name
chunksize=1000, # read by chunks
):
file_path = '/tmp/path_to_your.csv'
chunk.to_csv(
file_path,
header=header,
mode='a',
index=False,
)
header = False
I'm using pandas to read SQLl output into a dataframe. I'm calling a stored procedure which returns a table output. Following code works fine.If my stored procedure return more than one table outputs[1], How can I read those from dataframe. I want to write different table outputs into different excel sheets
query='exec [aa].[dbo].[sp_cc]?,?'
df = pd.read_sql(query, cnxn, params=[start,end)
writer = pd.ExcelWriter('output.xlsx')
df.to_excel(writer, index=False, sheet_name='customers')
writer.save()
[1]
CREATE procedure [dbo].[usp_vvvv] (....)
BEGIN
SET NOCOUNT ON
.....
select *
FROM #_temp_client_details
select *
FROM #_temp_address_details
select *
FROM #_temp_invoice_details
drop table #_temp_client_details
drop table #_temp_address_details
drop table #_temp_invoice_details
....
END TRY
BEGIN CATCH
..
END CATCH
END
I hope this can help you :
import pandas as pd
import pyodbc
conn = pyodbc.connect('driver={SQL Server};server=xxx.xxx.x.xxx;uid=myuser;pwd=mypass;database=mybd;autocommit=True')
cursor = conn.cursor()
cursor.execute('exec usp_with_2_select')
writer = pd.ExcelWriter('pandas_simple.xlsx', engine='xlsxwriter')
column_names = [col[0] for col in cursor.description]
df1_data = []
for row in cursor.fetchall():
df1_data.append({name: row[i] for i, name in enumerate(column_names)})
df1 = pd.DataFrame(df1_data)
print(df1)
df1.to_excel(writer,'sheet1')
# this for pass the next result
cursor.nextset ()
df2_data = []
for row in cursor.fetchall():
df2_data.append({name: row[i] for i, name in enumerate(column_names)})
df2 = pd.DataFrame(df2_data)
print(df2)
df2.to_excel(writer,'sheet2')
writer.save()
Why do you need Pandas for this? You can go from SQL Server directly to Excel many different ways. Here is one concept that will work for you. There are many ways to skin this cat...
Sub ADOExcelSQLServer()
' Carl SQL Server Connection
'
' FOR THIS CODE TO WORK
' In VBE you need to go Tools References and check Microsoft Active X Data Objects 2.x library
'
Dim Cn As ADODB.Connection
Dim Server_Name As String
Dim Database_Name As String
Dim User_ID As String
Dim Password As String
Dim SQLStr As String
Dim rs As ADODB.Recordset
Set rs = New ADODB.Recordset
Server_Name = "your_server_name" ' Enter your server name here
Database_Name = "NORTHWND" ' Enter your database name here
User_ID = "" ' enter your user ID here
Password = "" ' Enter your password here
SQLStr = "SELECT * FROM [Customers]" ' Enter your SQL here
Set Cn = New ADODB.Connection
Cn.Open "Driver={SQL Server};Server=" & Server_Name & ";Database=" & Database_Name & _
";Uid=" & User_ID & ";Pwd=" & Password & ";"
rs.Open SQLStr, Cn, adOpenStatic
' Dump to spreadsheet
For iCols = 0 To rs.Fields.Count - 1
Worksheets("Sheet1").Cells(1, iCols + 1).Value = rs.Fields(iCols).Name
Next
With Worksheets("sheet1").Range("a2:z500") ' Enter your sheet name and range here
'.ClearContents
.CopyFromRecordset rs
End With
' Tidy up
rs.Close
Set rs = Nothing
Cn.Close
Set Cn = Nothing
End Sub
I am trying to retrieve data from sqlite3 with the help of variables. It is working fine with execute() statement but i would like to retrieve columns also and for that purpose i am using read_sql_query() but i am unable to pass variables in read_sql_query(), please follow below code:
def cal():
tab = ['LCOLOutput']
column_name = 'CUSTOMER_EMAIL_ID'
xyz = '**AVarma1#ra.rockwell.com'
for index, m in enumerate(tab):
table_name = m
sq = "SELECT * FROM ? where ?=?;" , (table_name, column_name, xyz,)
df = pandas.read_sql_query(sq,conn)
writer =
pandas.ExcelWriter('D:\pandas_simple.xlsx',engine='xlsxwriter')
df.to_excel(writer, sheet_name='Sheet1')
writer.save()
You need to change the syntax with the method read_sql_query() from pandas, check the doc.
For sqlite, it should work with :
sq = "SELECT * FROM ? where ?=?;"
param = (table_name, column_name, xyz,)
df = pandas.read_sql_query(sq,conn, params=param)
EDIT :
otherwise try with the following formatting for the table :
sq = "SELECT * FROM {} where ?=?;".format(table_name)
param = (column_name, xyz,)
df = pandas.read_sql_query(sq,conn, params=param)
Check this answer explaining why table cannot be passed as parameter directly.