Use query string template from config in bigquery result - python

Trying to use the dynamic query string from config file and format in the result section using pyodbc, where the query string is printing as static value. Expecting to print the DB result populate dynamically.
Code:
def bq_exec_sql(sql):
client = bigquery.Client(project='development')
return client.query(sql)
def generate_select(sql, templete_select):
job = bq_exec_sql(sql)
result = ''
print(templete_select)
try:
for row in job:
result += templete_select
print(result)
if __name__ == '__main__':
for source in dashboard_activity_list:
sql = config.get(source).source_select // from config file
templete_select =config.get(source).select_template // from config file
generate_select(sql, templete_select)
Actual Output:
select '"+row['table_id']+"' as table_id, '"+row['frequency']+"' as frequency from `trend-dev.test.table1`
sselect '"+row['table_id']+"' as table_id, '"+row['info']+"' as info from `trend-dev.test.table2`
Expected Output:
select table_name1 as table_id, daily from `trend-dev.test.table1`
select table_name2 as table_id, chart_data from `trend-dev.test.table2`
Config file:
dashboard_activity_list: [source_partner_trend, source_partner_normal]
source_partner_trend:
source_select : select * from `trend-dev.test.trend_partner`
source_key : trend_source_partner
distination_table : test.trend_partner_dashboard_feed
select_template : select '"+row['table_id']+"' as table_id, '"+row['frequency']+"' as frequency from `trend-dev.test.table1`
source_partner_normal:
source_select : select * from `trend-dev.test.normal_partner`
source_key : normal_source_partner
distination_table : test.normal_partner_dashboard_feed
select_template : select '"+row['table_id']+"' as table_id, '"+row['info']+"' as info from `trend-dev.test.table2`

I have replicated your code and it appears that its passing the whole query in templete_select variable as text string, I modified it so that It can separate the row from text string at the select statement and produced the expected output.
Working Snippet:
from google.cloud import bigquery
def bq_exec_sql(sql):
client = bigquery.Client(project='development')
return client.query(sql)
def generate_select(sql, templete_select):
job = bq_exec_sql(sql)
result = ''
try:
for row in job:
result += templete_select.format(row['table_id'])
print(result)
except:
print('error')
if __name__ == '__main__':
sql = 'select * from `<table_name>` limit 2'
templete_select =" select {} as table_id"
generate_select(sql, templete_select)

I found out one of best option to sort out the question, where use eval() method to assign dynamic string value formated to the resulted loop.
result += eval(f'f{templete_select!r}')
Ref : https://python-forum.io/thread-24481.html

Related

Postgres: invalid input syntax for type date

I have created a database and I am trying to fetch data from it. I have a class Query and inside the class I have a function that calls a table called forecasts. The function is as follows:
def forecast(self, provider: str, zone: str='Mainland',):
self.date_start = date_start)
self.date_end = (date_end)
self.df_forecasts = pd.DataFrame()
fquery = """
SELECT dp.name AS provider_name, lf.datetime_from AS date, fr.name AS run_name, lf.value AS value
FROM load_forecasts lf
INNER JOIN bidding_zones bz ON lf.zone_id = bz.zone_id
INNER JOIN data_providers dp ON lf.provider_id = dp.provider_id
INNER JOIN forecast_runs fr ON lf.run_id = fr.run_id
WHERE bz.name = '{zone}'
AND dp.name = '{provider}'
AND date(lf.datetime_from) BETWEEN '{self.date_start}' AND '{self.date_end}'
"""
df_forecasts = pd.read_sql_query(fquery, self.connection)
return df_forecasts
In the scripts that I run I am calling the Query class giving it my inputs
query = Query(date_start, date_end)
And the function
forecast_df = query.forecast(provider='Meteologica')
I run my script in the command line in the classic way
python myscript.py '2022-11-10' '2022-11-18'
My script shows the error
sqlalchemy.exc.DataError: (psycopg2.errors.InvalidDatetimeFormat) invalid input syntax for type date: "{self.date_start}"
LINE 9: AND date(lf.datetime_from) BETWEEN '{self.date_start...
when I use this syntax, but when I manually input the string for date_start and date_end it works.
I cannot find a way to solve the problem with sqlalchemy, so I opened a cursor with psycopg2.
# Returns the datetime, value and provider name and issue date of the forecasts in the load_forecasts table
# The dates range is specified by the user when the class is called
def forecast(self, provider: str, zone: str='Mainland',):
# Opens a cursor to get the data
cursor = self.connection.cursor()
# Query to run
query = """
SELECT dp.name, lf.datetime_from, fr.name, lf.value, lf.issue_date
FROM load_forecasts lf
INNER JOIN bidding_zones bz ON lf.zone_id = bz.zone_id
INNER JOIN data_providers dp ON lf.provider_id = dp.provider_id
INNER JOIN forecast_runs fr ON lf.run_id = fr.run_id
WHERE bz.name = %s
AND dp.name = %s
AND date(lf.datetime_from) BETWEEN %s AND %s
"""
# Execute the query, bring the data and close the cursor
cursor.execute(query, (zone, provider, self.date_start, self.date_end))
self.df_forecasts = cursor.fetchall()
cursor.close()
return self.df_forecasts
If anyone finds the answer with sqlalchemy, I would love to see it!

Import data from python (probleme with where condition)

I work in Python
I have code that allows me to import a dataset that works fine. However in my dataset I have 3 different patients and I would like to import only the patient that interests me (possible by adding the WHERE statement in the SQL query.
So the following code works:
def importecdata():
query2 = "SELECT TECDATA.[Vol_Recalage_US_VD], TECDATA.[Vol_Recalage_Us_VG], TECDATA.[SUBJID] FROM TECDATA INNER JOIN MEDDATA ON TECDATA.DateTime = MEDDATA.DateTime WHERE TECDATA.[SUBJID]='patient14';"
dftec1 = pd.read_sql(query2, sql_conn, chunksize=100000)
dftec = pd.concat(dftec1)
return(dftec)
It return the patient 14 data
But now I want to put the patient's name as a variable in my function so I made the following code:
def importecdata(patient):
query2 = "SELECT TECDATA.[Vol_Recalage_US_VD], TECDATA.[Vol_Recalage_Us_VG], TECDATA.[SUBJID] FROM TECDATA INNER JOIN MEDDATA ON TECDATA.DateTime = MEDDATA.DateTime WHERE TECDATA.[SUBJID]=patient;"
dftec1 = pd.read_sql(query2, sql_conn, chunksize=100000)
dftec = pd.concat(dftec1)
return(dftec)
I chek and the patient variable got the value patient14. But it don't work... i try to modify the value of the variable patient to 'patient14' it don't work too i have the same error :
invalid column name \xa0: 'patient'. So the code works, the problem is from the "where" condition with the patient variable
(sorry for my english i'm french)
You have to add your patient value in the query string check below code:
def importecdata(patient):
query2 = "SELECT TECDATA.[Vol_Recalage_US_VD], TECDATA.[Vol_Recalage_Us_VG], TECDATA.[SUBJID] FROM TECDATA INNER JOIN MEDDATA ON TECDATA.DateTime = MEDDATA.DateTime WHERE TECDATA.[SUBJID]='{0}';"
query2 = query2.format(patient)
dftec1 = pd.read_sql(query2, sql_conn, chunksize=100000)
dftec = pd.concat(dftec1)
return(dftec)

convert very big json to sql using Ijson

I have 60GB json file and i want to convert it to sql with ijson. ( i tried many software and tools , They weren't useful and my system was crashing.
Note : this is not duplicated ! i see all code about this methods and wrote this but still i have a very slow code .
here is my code :
import json
import sqlite3
import ijson
import threading
def solve():
with sqlite3.connect("filename.db") as conn:
try:
conn.execute('''
CREATE TABLE IF NOT EXISTS mytable(
username VARCHAR(225)
,phone INTEGER
,id INTEGER PRIMARY KEY
);''')
except :
pass
keys = ["username", "phone" , "id"]
with open('VERYBIGJSON.json' , 'r' , encoding='utf-8') as json_file:
data = ijson.parse(json_file , multiple_values=True)
for prefix, event, value in data:
if str(event) == 'start_map':
_u = None
if prefix == "id" :
_id = value
if prefix == "username":
_u = value
if prefix == "phone":
_p = value
try:
if str(event) == 'end_map' :
values = (_u , _p , _id)
cmd = """INSERT INTO mytable (username , phone , id) VALUES(
?,
?,
?
);"""
conn.execute(cmd, values)
conn.commit()
except Exception as e:
#print (str(e))
continue
if __name__ == '__main__':
t=[]
for i in range(1000):
t.append(threading.Thread(target=solve))
for i in t:
i.start()
for i in t:
i.join()
i tested both multi threading and multi procesing method and still my code run very very slowly ! (only 10KB of my sql database will generated per seconds ).
I want to do it very effeceintly.
my json sample is :
{"message":"{\"_\":\"user\",\"delete\":{\"test\":true},\"flags\":2067,\"id\":11111110,\"phone\":\"xxxxxxxxxx\",\"photo\":{\"_\":\"userProfilePhoto\",\"photo_id\":\"xxxxxxxxxx\",\"photo_small\":{\"_\":\"file\",\"dcs\":4,\"volume_id\":\"8788701\",\"local_id\":385526},\"photo\":{\"_\":\"file\",\"dcs\":4,\"local_id\":385528}},\"status\":{\"_\":\"userStat\",\"online\":1566173427}}","phone":"xxxxxxxxxx","#version":"1","id":11111110}
{"index": {"_type": "_doc", "_id": "-Lcy4m8BAObvGO9GAsFa"}}
....
Please give me an idea to improve the code speed.
UPDATE:
according to comments i wrote a code for converting my big json file to .csv but still it is so slow but faster than method 1!
here is code :
import json
import sqlite3
import ijson
from multiprocessing import Process
import csv
import pandas as pd
from pandas.io.json import json_normalize
from multiprocessing import Process
import threading
def solve():
with open('VERYBIGJSON.json' , 'r' , encoding='utf-8') as json_file:
data = ijson.parse(json_file , multiple_values=True )
for prefix, event, value in data:
if str(event) == 'start_map':
_u = None
if prefix == "id" :
_id = value
if prefix == "username":
_u = value
if prefix == "phone":
_p = value
if str(event) == 'end_map' :
values = [{'username':'{}'.format(_u) , 'id':'{}'.format(_id) , 'phone':'{}'.format(_p)}] #converting to json for add to 'json_normalize'
df = json_normalize(values)
df.to_csv('test.csv', index=False, mode='a' , encoding='utf-8' , header=False)
if __name__ == '__main__':
solve()
Also with JsonSlicer library:
def solve():
with open('VERYBIGJSON.json' , 'r' , encoding='utf-8') as json_file:
for key in JsonSlicer(json_file , (None , None) ) :
print (key)
if __name__ == '__main__':
solve()
i Get this error:
for key in JsonSlicer(json_file , (None , None) ) :
RuntimeError: YAJL error: parse error: trailing garbage
d_from":"telegram_contacts"} {"index": {"_type": "_doc", "_i
(right here) ------^
I think this library don't support my json file.

PyODBC SQL type error when reading query in Pandas

Looking for some help with a specific error when I write out from a pyodbc connection. How do I fix the error:
ODBC SQL type -360 is not yet supported. column-index=1 type=-360', 'HY106' error from PYODBC
Here is my code:
import pyodbc
import pandas as pd
import sqlparse
## Function created to read SQL Query
def create_query_string(sql_full_path):
with open(sql_full_path, 'r') as f_in:
lines = f_in.read()
# remove any common leading whitespace from every line
query_string = textwrap.dedent("""{}""".format(lines))
## remove comments from SQL Code
query_string = sqlparse.format(query_string, strip_comments=True)
return query_string
query_string = create_query_string("Bad Code from R.sql")
## initializes the connection string
curs = conn.cursor()
df=pd.read_sql(query_string,conn)
df.to_csv("TestSql.csv",index=None)
We are using the following SQL code in query string:
SELECT loss_yr_qtr_cd,
CASE
WHEN loss_qtr_cd <= 2 THEN loss_yr_num
ELSE loss_yr_num + 1
END AS LOSS_YR_ENDING,
snap_yr_qtr_cd,
CASE
WHEN snap_qtr_cd <= 2 THEN snap_yr_num
ELSE snap_yr_num + 1
END AS CAL_YR_ENDING,
cur_ctstrph_loss_ind,
clm_symb_grp_cd,
adbfdb_pol_form_nm,
risk_st_nm,
wrt_co_nm,
wrt_co_part_cd,
src_of_bus_cd,
rt_zip_dlv_ofc_cd,
cur_rst_rt_terr_cd,
Sum(xtra_cntrc_py_amt) AS XTRA_CNTRC_PY_AMT
FROM (SELECT DT.loss_yr_qtr_cd,
DT.loss_qtr_cd,
DT.loss_yr_num,
SNAP.snap_yr_qtr_cd,
SNAP.snap_qtr_cd,
SNAP.snap_yr_num,
CLM.cur_ctstrph_loss_ind,
CLM.clm_symb_grp_cd,
POL_SLCT.adbfdb_pol_form_nm,
POL_SLCT.adbfdb_pol_form_cd,
CVR.bsic_cvr_ind,
POL_SLCT.priv_pass_ind,
POL_SLCT.risk_st_nm,
POL_SLCT.wrt_co_nm,
POL_SLCT.wrt_co_part_cd,
POL_SLCT.src_of_bus_cd,
TERR.rt_zip_dlv_ofc_cd,
TERR.cur_rst_rt_terr_cd,
LOSS.xtra_cntrc_py_amt
FROM ahshdm1d.vmaloss_day_dt_dim DT,
ahshdm1d.vmasnap_yr_mo_dim SNAP,
ahshdm1d.tmaaclm_dim CLM,
ahshdm1d.tmaapol_slct_dim POL_SLCT,
ahshdm1d.tmaacvr_dim CVR,
ahshdm1d.tmaart_terr_dim TERR,
ahshdm1d.tmaaloss_fct LOSS,
ahshdm1d.tmaaprod_bus_dim BUS
WHERE SNAP.snap_yr_qtr_cd BETWEEN '20083' AND '20182'
AND TRIM(POL_SLCT.adbfdb_lob_cd) = 'A'
AND CVR.bsic_cvr_ind = 'Y'
AND POL_SLCT.priv_pass_ind = 'Y'
AND POL_SLCT.adbfdb_pol_form_cd = 'V'
AND POL_SLCT.src_of_bus_cd NOT IN ( 'ITC', 'INV' )
AND LOSS.xtra_cntrc_py_amt > 0
AND LOSS.loss_day_dt_id = DT.loss_day_dt_dim_id
AND LOSS.cvr_dim_id = CVR.cvr_dim_id
AND LOSS.pol_slct_dim_id = POL_SLCT.pol_slct_dim_id
AND LOSS.rt_terr_dim_id = TERR.rt_terr_dim_id
AND LOSS.prod_bus_dim_id = BUS.prod_bus_dim_id
AND LOSS.clm_dim_id = CLM.clm_dim_id
AND LOSS.snap_yr_mo_dt_id = SNAP.snap_yr_mo_dt_id) AS TABLE1
GROUP BY loss_yr_qtr_cd,
loss_qtr_cd,
loss_yr_num,
snap_yr_qtr_cd,
snap_qtr_cd,
snap_yr_num,
cur_ctstrph_loss_ind,
clm_symb_grp_cd,
adbfdb_pol_form_nm,
risk_st_nm,
wrt_co_nm,
wrt_co_part_cd,
src_of_bus_cd,
rt_zip_dlv_ofc_cd,
cur_rst_rt_terr_cd
FOR FETCH only
Just looking how to properly write out the database.
Thanks,
Justin

Issues returning python parameter to main function

problem: Im trying to extract values out of a mysql search to use later within the code.
I have setup a mysql function (connector_mysql) that i use to connect/run the mysql commands.
The problem i'm having is returning these values back out of the mysql function to the rest of the code.
There are two scenarios that i've tried which i think should work but dont when run.
full code sample below...
1. In the mysql function result acts like a dictionary if - using just..
result = cursor.fetchone(variable1, variable2, variable3)
return result
AND in the main function
result1 = connector_mysql(subSerialNum, ldev, today_date)
print(result1)
This appears to work and when printing result1 i get a dictionary looking output:
{'ldev_cap': '0938376656', 'ldev_usedcap': '90937763873'}
HOWEVER...
I cant then use dictionary methods to get or separate the values out.. eg
used_capacity = result1['ldev_cap']
which i would have expected that now 'used_capacity' to represent or equal 0938376656.
Instead i get an error about the object is not able to be subscripted...?
Error below:
File "/Users/graham/PycharmProjects/VmExtrat/VmExtract.py", line 160, in openRead
used_capacity = result1['ldev_cap']
TypeError: 'NoneType' object is not subscriptable
In the mysql function the result acts like a dictionary if I manipulate it and try and return multiple values with the tuple concept - using..
cursor.execute(sql_query, {'serial': subSerialNum, 'lun': ldev, 'todayD': today_date})
result = cursor.fetchone()
while result:
ldev_cap = result['ldev_cap']
ldev_usdcap = result['ldev_usdcap']
return ldev_cap, ldev_usdcap
Here, result acts like a dictionary and i'm able to assign a parameter to the key like:
ldev_cap = result['ldev_cap']
and if you print ldev_cap you get the correct figure...
If I return one figure, the main function line of:
result1 = connector_mysql(subSerialNum, ldev, today_date)
it Works....
HOWEVER...
When then trying to return multiple parameters from the mysql function by doing
return ldev_cap, ldev_usdcap
and in the main function:
capacity, usd_capacity = connector_mysql(subSerialNum, ldev, today_date)
I get errors again
File "/Users/graham/PycharmProjects/VmExtrat/VmExtract.py", line 156, in openRead
capacity, usd_capacity = connector_mysql(subSerialNum, ldev, today_date)
TypeError: 'NoneType' object is not iterable
I think i'm doing the right thing with the dictionary and the tuple but i'm obviously missing something or not doing it correctly, as i need to do this with 4-5 paramaters per sql query,
I didn't want to do multiple querys for the same thing to get the individual paramaters out.
Any help or suggestions would be greatly welcomed...
full code below.
main code:
capacity, usd_capacity = connector_mysql(subSerialNum, ldev, today_date)
print(capacity)
print(usd_capacity)
def connector_mysql(subSerialNum, ldev, today_date):
import pymysql.cursors
db_server = 'localhost'
db_name = 'CBDB'
db_pass = 'secure_password'
db_user = 'user1'
sql_query = (
"SELECT ldev_cap, ldev_usdcap FROM Ldevs WHERE sub_serial=%(serial)s "
"and ldev_id=%(lun)s and data_date=%(todayD)s")
connection = pymysql.connect(host=db_server,
user=db_user,
password=db_pass,
db=db_name,
cursorclass=pymysql.cursors.DictCursor)
try:
with connection.cursor() as cursor:
cursor.execute(sql_query, {'serial': subSerialNum, 'lun': ldev, 'todayD': today_date})
result = cursor.fetchone()
#return result #used for returning dict
while result:
ldev_cap = result['ldev_cap']
ldev_usdcap = result['ldev_usdcap']
print(result)
return ldev_cap, ldev_usdcap
finally:
connection.close()

Categories

Resources