Problem converting varchar to datetime with SQLAlchemy execute - python

I can successfully connect to SQL Server Management Studio from my jupyter notebook with this script :
from sqlalchemy import create_engine
import pyodbc
import csv
import time
import urllib
params = urllib.parse.quote_plus('''DRIVER={SQL Server Native Client 11.0};
SERVER=SV;
DATABASE=DB;
TRUSTED_CONNECTION=YES;''')
engine = create_engine("mssql+pyodbc:///?odbc_connect=%s" % params)
Just for an example, the following script perfectly works :
engine.execute("delete from table_name_X")
However, I failed to get the following script to work. For information, it works when I execute its adaptation in SQL Server Management Studio :
cde = 5
reportDate = df.loc[df.index[0],'Report Date'] # when you execute reportDate it returns 2019-11-15 00:00:00
req = "DELETE table_name_Y "
req+= "WHERE code = " + str(cde)
req+= " AND report_date = '" + str(reportDate.strftime('%Y-%m-%d')) + "'"
engine.execute(req)
According to the error message, there is a problem with the conversion of a varchar to a datetime, which created a value out of range. However, independently executed, the script str(reportDate.strftime('%Y-%m-%d')) works.
Could you please help me to understand why this previous script does not work ?

As #Ilja mentions in the comments to the question, you really should not be using dynamic SQL to construct your statement. It is error-prone and potentially unsafe. If you use a proper parameterized query many of your issues will simply go away.
For what it's worth, this works for me:
import pandas as pd
import sqlalchemy as sa
# ...
with engine.begin() as conn:
# set up test environment
conn.execute(sa.text("CREATE TABLE #table_name_Y (code int, report_date date)"))
conn.execute(sa.text("INSERT INTO #table_name_Y (code, report_date) VALUES (5, '2019-11-15')"))
# verify test environment
result = conn.execute(sa.text("SELECT * FROM #table_name_Y")).fetchall()
print(result) # [(5, datetime.date(2019, 11, 15))]
# test code
df = pd.DataFrame([(5, datetime.datetime(2019, 11, 15),), ], columns=['code', 'Report Date'])
cde = int(df.loc[df.index[0],'code'])
print(type(cde)) # <class 'int'>
reportDate = df.loc[df.index[0],'Report Date']
print(type(reportDate)) # <class 'pandas._libs.tslibs.timestamps.Timestamp'>
sql = sa.text("DELETE FROM #table_name_Y WHERE code = :p0 AND report_date = :p1")
params = {'p0': cde, 'p1': reportDate}
conn.execute(sql, params)
# verify outcome
result = conn.execute(sa.text("SELECT * FROM #table_name_Y")).fetchall()
print(result) # []

Related

Postgres: invalid input syntax for type date

I have created a database and I am trying to fetch data from it. I have a class Query and inside the class I have a function that calls a table called forecasts. The function is as follows:
def forecast(self, provider: str, zone: str='Mainland',):
self.date_start = date_start)
self.date_end = (date_end)
self.df_forecasts = pd.DataFrame()
fquery = """
SELECT dp.name AS provider_name, lf.datetime_from AS date, fr.name AS run_name, lf.value AS value
FROM load_forecasts lf
INNER JOIN bidding_zones bz ON lf.zone_id = bz.zone_id
INNER JOIN data_providers dp ON lf.provider_id = dp.provider_id
INNER JOIN forecast_runs fr ON lf.run_id = fr.run_id
WHERE bz.name = '{zone}'
AND dp.name = '{provider}'
AND date(lf.datetime_from) BETWEEN '{self.date_start}' AND '{self.date_end}'
"""
df_forecasts = pd.read_sql_query(fquery, self.connection)
return df_forecasts
In the scripts that I run I am calling the Query class giving it my inputs
query = Query(date_start, date_end)
And the function
forecast_df = query.forecast(provider='Meteologica')
I run my script in the command line in the classic way
python myscript.py '2022-11-10' '2022-11-18'
My script shows the error
sqlalchemy.exc.DataError: (psycopg2.errors.InvalidDatetimeFormat) invalid input syntax for type date: "{self.date_start}"
LINE 9: AND date(lf.datetime_from) BETWEEN '{self.date_start...
when I use this syntax, but when I manually input the string for date_start and date_end it works.
I cannot find a way to solve the problem with sqlalchemy, so I opened a cursor with psycopg2.
# Returns the datetime, value and provider name and issue date of the forecasts in the load_forecasts table
# The dates range is specified by the user when the class is called
def forecast(self, provider: str, zone: str='Mainland',):
# Opens a cursor to get the data
cursor = self.connection.cursor()
# Query to run
query = """
SELECT dp.name, lf.datetime_from, fr.name, lf.value, lf.issue_date
FROM load_forecasts lf
INNER JOIN bidding_zones bz ON lf.zone_id = bz.zone_id
INNER JOIN data_providers dp ON lf.provider_id = dp.provider_id
INNER JOIN forecast_runs fr ON lf.run_id = fr.run_id
WHERE bz.name = %s
AND dp.name = %s
AND date(lf.datetime_from) BETWEEN %s AND %s
"""
# Execute the query, bring the data and close the cursor
cursor.execute(query, (zone, provider, self.date_start, self.date_end))
self.df_forecasts = cursor.fetchall()
cursor.close()
return self.df_forecasts
If anyone finds the answer with sqlalchemy, I would love to see it!

Airflow: Return BashOperartor as string for odbc connection

I´m quite new with Airflow and Python. What I´m trying to do is get the result from a Bash command and compose the connection string with that return:
import pyodbc as odbc
import pandas as pd
import datetime as dt
from airflow.operators.bash import BashOperator
from airflow.decorators import dag, task
#dag(schedule_interval='0 15 10 * *', start_date=dt.datetime(2021, 10, 1), catchup=False)
def my_dag2():
wsl_ip = BashOperator(
task_id="wsl_ip",
bash_command="grep -m 1 nameserver /etc/resolv.conf | awk '{print $2}'",
do_xcom_push=True
)
#task()
def run():
def busca_informes_cvm(ano,mes):
url = 'http://dados.cvm.gov.br/dados/FI/DOC/INF_DIARIO/DADOS/inf_diario_fi_{:4d}{:02d}.csv'.format(ano,mes)
return pd.read_csv(url, sep=';')
today = dt.date.today()
ano = (today.year)
mes= (today.month)-1
file_name_Comp = '{:4d}-{:02d}'.format(ano,mes)
file_name = '{:4d}{:02d}.csv'.format(ano,mes)
path_name = r'C:\Airflow\{:4d}{:02d}.csv'.format(ano,mes)
conn = odbc.connect('Driver={ODBC Driver 17 for SQL Server};Server= '+ wsl_ip +';Database=CVM;uid=Airflow;pwd=ubuntu')
df = pd.read_sql_query('select max(DT_COMPTC) from Historico;', conn)
left = df[''].str[:7]
if file_name_Comp <= left[0]:
print('Sair')
else:
informes_diarios = busca_informes_cvm(ano,mes)
informes_diarios.to_csv(file_name, sep=';', index=False)
db_view_nm = '[dbo].[Bulk]'
qry = "BULK INSERT " + db_view_nm + " FROM '" + path_name + "' WITH (FIELDTERMINATOR = ';', ROWTERMINATOR = '0x0a', FIRSTROW = 2,ROWS_PER_BATCH = 100000 )"
cursor = conn.cursor()
success = cursor.execute(qry)
conn.commit()
cursor.close
print('Concluído')
execute = run()
etl_dag = my_dag2()
I need to find a way to convert wsl_ip ia a string. Any help would be appreciated.
To get the output from the "wsl_ip" task, you can use the .output property that's exposed for every operator in Airflow. This property is an abstraction over the classic xcom_pull() method known as an XComArg (see docs here).
You could try something like this:
wsl_ip = BashOperator(
task_id="wsl_ip",
bash_command="grep -m 1 nameserver /etc/resolv.conf | awk '{print $2}'",
do_xcom_push=True
)
#task()
def run(ip):
...
conn = odbc.connect('Driver={ODBC Driver 17 for SQL Server};Server= '+ ip +';Database=CVM;uid=Airflow;pwd=ubuntu')
...
execute = run(ip=wsl_ip.output)
The run TaskFlow function now takes an input as the XCom pushed from your BashOperator task which should be converted to a string. Using the .output in this way also automatically creates a task dependency between the "wsl_ip" and "run" tasks too.

SQL Server Python INTERNAL ERROR: should have tag

I am trying to integrate SQL Server 2017 or 2019 with Qunatlib using python. I can run Quantlib code which in this particular case is returning a Qunatlib schedule object which is an enumerated list of type Quantlib.Date. The code look like this
EXECUTE sp_execute_external_script
#language = N'Python',
#script = N'
import QuantLib as QL
import pandas as PD
effective_date = QL.Date(1, 1, 2015)
termination_date = QL.Date(1, 1, 2016)
tenor = QL.Period(QL.Monthly)
calendar = QL.UnitedStates()
business_convention = QL.Following
termination_business_convention = QL.Following
date_generation = QL.DateGeneration.Forward
end_of_month = False
schedule = QL.Schedule(effective_date,termination_date,tenor,calendar,business_convention,termination_business_convention,date_generation,end_of_month)
OutputDataSet = PD.DataFrame(list(enumerate(schedule)), columns=[''index'',''RollDate''])'
However I get the following error
INTERNAL ERROR: should have tag
error while running BxlServer: caught exception: Error communicating between BxlServer and client: 0x000000e9
If i remove the last line
OutputDataSet = PD.DataFrame(list(enumerate(schedule)), columns=[''index'',''RollDate''])'
the script runs without error. I can also run the script in other python environments without error. I suspect that the issue is something to do with data casting, but the error is not particularly helpful. I need to get the data into a data frame for use with in SQL Server.
Worked it out. Needed to convert the data type to datetime and add it to a pandas.
import QuantLib as QL
import pandas as PD
import datetime
effective_date = QL.Date(1, 1, 2015)
termination_date = QL.Date(1, 1, 2016)
tenor = QL.Period(QL.Monthly)
calendar = QL.UnitedStates()
business_convention = QL.Following
termination_business_convention = QL.Following
date_generation = QL.DateGeneration.Forward
end_of_month = False
schedule = QL.Schedule(effective_date,termination_date,tenor,calendar,business_convention,termination_business_convention,date_generation,end_of_month)
OutputDataSet=PD.DataFrame(columns=[''RollDate''])
for i, d in enumerate(schedule):
OutputDataSet.loc[i]=datetime.datetime(d.year(), d.month(), d.dayOfMonth())

How to add current date in filename in python script

I'm trying to unload data from snowflakes to GCS, for that I'm using snowflakepython connector and python script. In the below python script in the file name 'LH_TBL_FIRST20200908' if the script runs today then the name will be same, if the script runs tomorrow then the file name should be 'LH_TBL_FIRST20200909' similarly if it runs day after then 'LH_TBL_FIRST202009010'.
Also please tell me if the code has any mistakes in it. Code is below
import snowflake.connector
# Gets the version
ctx = snowflake.connector.connect(
user='*****',
password='*******',
account='********',
warehouse='*******',
database='********',
schema='********'
)
cs = ctx.cursor()
sql = "copy into #unload_gcs/LH_TBL_FIRST20200908.csv.gz
from ( select * from TEST_BASE.LH_TBL_FIRST )
file_format =
( type=csv compression='gzip'
FIELD_DELIMITER = ','
field_optionally_enclosed_by='"'
NULL_IF=()
EMPTY_FIELD_AS_NULL = FALSE
)
single = fals
e max_file_size=5300000000
header = false;"
cur.execute(sql)
cur.close()
conn.close()
You can use f-strings to fill in (part of) your filename. Python has the datetime module to handle dates and times.
from datetime import datetime
date = datetime.now().strftime('%Y%m%d')
myFileName = f'LH_TBL_FIRST{date}.csv.gz'
print(myFileName)
>>> LH_TBL_FIRST20200908.csv.gz
As for errors in your code:
you declare your cursor as ctx.cursor() and further along you just use cur.execute(...) and cur.close(...). These won't work. Run your code to find the errors and fix them.
Edit suggested by #Lysergic:
If your python version is too old, you could use str.format().
myFileName = 'LH_TBL_FIRST{0}.csv.gz'.format(date)
from datetime import datetime
class FileNameWithDateTime(object):
def __init__(self, fileNameAppender, fileExtension="txt"):
self.fileNameAppender = fileNameAppender
self.fileExtension = fileExtension
def appendCurrentDateTimeInFileName(self,filePath):
currentTime = self.fileNameAppender
print(currentTime.strftime("%Y%m%d"))
filePath+=currentTime.strftime("%Y%m%d")
filePath+="."+self.fileExtension
try:
with open(filePath, "a") as fwrite1:
fwrite1.write(filePath)
except OSError as oserr:
print("Error while writing ",oserr)
I take the following approach
#defining what time/date related values your variable will contain
date_id = (datetime.today()).strftime('%Y%m%d')
Write the output file.
#Creating the filename
with open(date_id + "_" + "LH_TBL.csv.gz" 'w') as gzip:
output: YYYY/MM/DD _ filename
20200908_filename

PyODBC SQL type error when reading query in Pandas

Looking for some help with a specific error when I write out from a pyodbc connection. How do I fix the error:
ODBC SQL type -360 is not yet supported. column-index=1 type=-360', 'HY106' error from PYODBC
Here is my code:
import pyodbc
import pandas as pd
import sqlparse
## Function created to read SQL Query
def create_query_string(sql_full_path):
with open(sql_full_path, 'r') as f_in:
lines = f_in.read()
# remove any common leading whitespace from every line
query_string = textwrap.dedent("""{}""".format(lines))
## remove comments from SQL Code
query_string = sqlparse.format(query_string, strip_comments=True)
return query_string
query_string = create_query_string("Bad Code from R.sql")
## initializes the connection string
curs = conn.cursor()
df=pd.read_sql(query_string,conn)
df.to_csv("TestSql.csv",index=None)
We are using the following SQL code in query string:
SELECT loss_yr_qtr_cd,
CASE
WHEN loss_qtr_cd <= 2 THEN loss_yr_num
ELSE loss_yr_num + 1
END AS LOSS_YR_ENDING,
snap_yr_qtr_cd,
CASE
WHEN snap_qtr_cd <= 2 THEN snap_yr_num
ELSE snap_yr_num + 1
END AS CAL_YR_ENDING,
cur_ctstrph_loss_ind,
clm_symb_grp_cd,
adbfdb_pol_form_nm,
risk_st_nm,
wrt_co_nm,
wrt_co_part_cd,
src_of_bus_cd,
rt_zip_dlv_ofc_cd,
cur_rst_rt_terr_cd,
Sum(xtra_cntrc_py_amt) AS XTRA_CNTRC_PY_AMT
FROM (SELECT DT.loss_yr_qtr_cd,
DT.loss_qtr_cd,
DT.loss_yr_num,
SNAP.snap_yr_qtr_cd,
SNAP.snap_qtr_cd,
SNAP.snap_yr_num,
CLM.cur_ctstrph_loss_ind,
CLM.clm_symb_grp_cd,
POL_SLCT.adbfdb_pol_form_nm,
POL_SLCT.adbfdb_pol_form_cd,
CVR.bsic_cvr_ind,
POL_SLCT.priv_pass_ind,
POL_SLCT.risk_st_nm,
POL_SLCT.wrt_co_nm,
POL_SLCT.wrt_co_part_cd,
POL_SLCT.src_of_bus_cd,
TERR.rt_zip_dlv_ofc_cd,
TERR.cur_rst_rt_terr_cd,
LOSS.xtra_cntrc_py_amt
FROM ahshdm1d.vmaloss_day_dt_dim DT,
ahshdm1d.vmasnap_yr_mo_dim SNAP,
ahshdm1d.tmaaclm_dim CLM,
ahshdm1d.tmaapol_slct_dim POL_SLCT,
ahshdm1d.tmaacvr_dim CVR,
ahshdm1d.tmaart_terr_dim TERR,
ahshdm1d.tmaaloss_fct LOSS,
ahshdm1d.tmaaprod_bus_dim BUS
WHERE SNAP.snap_yr_qtr_cd BETWEEN '20083' AND '20182'
AND TRIM(POL_SLCT.adbfdb_lob_cd) = 'A'
AND CVR.bsic_cvr_ind = 'Y'
AND POL_SLCT.priv_pass_ind = 'Y'
AND POL_SLCT.adbfdb_pol_form_cd = 'V'
AND POL_SLCT.src_of_bus_cd NOT IN ( 'ITC', 'INV' )
AND LOSS.xtra_cntrc_py_amt > 0
AND LOSS.loss_day_dt_id = DT.loss_day_dt_dim_id
AND LOSS.cvr_dim_id = CVR.cvr_dim_id
AND LOSS.pol_slct_dim_id = POL_SLCT.pol_slct_dim_id
AND LOSS.rt_terr_dim_id = TERR.rt_terr_dim_id
AND LOSS.prod_bus_dim_id = BUS.prod_bus_dim_id
AND LOSS.clm_dim_id = CLM.clm_dim_id
AND LOSS.snap_yr_mo_dt_id = SNAP.snap_yr_mo_dt_id) AS TABLE1
GROUP BY loss_yr_qtr_cd,
loss_qtr_cd,
loss_yr_num,
snap_yr_qtr_cd,
snap_qtr_cd,
snap_yr_num,
cur_ctstrph_loss_ind,
clm_symb_grp_cd,
adbfdb_pol_form_nm,
risk_st_nm,
wrt_co_nm,
wrt_co_part_cd,
src_of_bus_cd,
rt_zip_dlv_ofc_cd,
cur_rst_rt_terr_cd
FOR FETCH only
Just looking how to properly write out the database.
Thanks,
Justin

Categories

Resources