I have to construct a dynamic update query for postgresql.
Its dynamic, because beforehand I have to determine which columns to update.
Given a sample table:
create table foo (id int, a int, b int, c int)
Then I will construct programmatically the "set" clause
_set = {}
_set['a'] = 10
_set['c'] = NULL
After that I have to build the update query. And here I'm stuck.
I have to construct this sql Update command:
update foo set a = 10, b = NULL where id = 1
How to do this with the psycopg2 parametrized command? (i.e. looping through the dict if it is not empty and build the set clause) ?
UPDATE
While I was sleeping I have found the solution by myself. It is dynamic, exactly how I wanted to be :-)
create table foo (id integer, a integer, b integer, c varchar)
updates = {}
updates['a'] = 10
updates['b'] = None
updates['c'] = 'blah blah blah'
sql = "upgrade foo set %s where id = %s" % (', '.join("%s = %%s" % u for u in updates.keys()), 10)
params = updates.values()
print cur.mogrify(sql, params)
cur.execute(sql, params)
And the result is what and how I needed (especially the nullable and quotable columns):
"upgrade foo set a = 10, c = 'blah blah blah', b = NULL where id = 10"
There is actually a slightly cleaner way to make it, using the alternative column-list syntax:
sql_template = "UPDATE foo SET ({}) = %s WHERE id = {}"
sql = sql_template.format(', '.join(updates.keys()), 10)
params = (tuple(addr_dict.values()),)
print cur.mogrify(sql, params)
cur.execute(sql, params)
Using psycopg2.sql – SQL string composition module
The module contains objects and functions useful to generate SQL dynamically, in a convenient and safe way.
from psycopg2 import connect, sql
conn = connect("dbname=test user=postgres")
upd = {'name': 'Peter', 'age': 35, 'city': 'London'}
ref_id = 12
sql_query = sql.SQL("UPDATE people SET {data} WHERE id = {id}").format(
data=sql.SQL(', ').join(
sql.Composed([sql.Identifier(k), sql.SQL(" = "), sql.Placeholder(k)]) for k in upd.keys()
),
id=sql.Placeholder('id')
)
upd.update(id=ref_id)
with conn:
with conn.cursor() as cur:
cur.execute(sql_query, upd)
conn.close()
Running print(sql_query.as_string(conn)) before closing connection will reveal this output:
UPDATE people SET "name" = %(name)s, "age" = %(age)s, "city" = %(city)s WHERE id = %(id)s
No need for dynamic SQL. Supposing a is not nullable and b is nullable.
If you want to update both a and b:
_set = dict(
id = 1,
a = 10,
b = 20, b_update = 1
)
update = """
update foo
set
a = coalesce(%(a)s, a), -- a is not nullable
b = (array[b, %(b)s])[%(b_update)s + 1] -- b is nullable
where id = %(id)s
"""
print cur.mogrify(update, _set)
cur.execute(update, _set)
Output:
update foo
set
a = coalesce(10, a), -- a is not nullable
b = (array[b, 20])[1 + 1] -- b is nullable
where id = 1
If you want to update none:
_set = dict(
id = 1,
a = None,
b = 20, b_update = 0
)
Output:
update foo
set
a = coalesce(NULL, a), -- a is not nullable
b = (array[b, 20])[0 + 1] -- b is nullable
where id = 1
An option without python format using psycopg2's AsIs function for column names (although that doesn't prevent you from SQL injection over column names). Dict is named data:
update_statement = f'UPDATE foo SET (%s) = %s WHERE id_column=%s'
columns = data.keys()
values = [data[column] for column in columns]
query = cur.mogrify(update_statement, (AsIs(','.join(columns)), tuple(values), id_value))
Here's my solution that I have within a generic DatabaseHandler class that provides a lot of flexibility when using pd.DataFrame as your source.
def update_data(
self,
table: str,
df: pd.DataFrame,
indexes: Optional[list] = None,
column_map: Optional[dict] = None,
commit: Optional[bool] = False,
) -> int:
"""Update data in the media database
Args:
table (str): the "tablename" or "namespace.tablename"
df (pandas.DataFrame): dataframe containing the data to update
indexes (list): the list of columns in the table that will be in the WHERE clause of the update statement.
If not provided, will use df indexes.
column_map (dict): dictionary mapping the columns in df to the columns in the table
columns in the column_map that are also in keys will not be updated
Key = df column.
Value = table column.
commit (bool): if True, the transaction will be committed (default=False)
Notes:
If using a column_map, only the columns in the data_map will be updated or used as indexes.
Order does not matter. If not using a column_map, all columns in df must exist in table.
Returns:
int : rows updated
"""
try:
if not indexes:
# Use the dataframe index instead
indexes = []
for c in df.index.names:
if not c:
raise Exception(
f"Dataframe contains indexes without names. Unable to determine update where clause."
)
indexes.append(c)
update_strings = []
tdf = df.reset_index()
if column_map:
target_columns = [c for c in column_map.keys() if c not in indexes]
else:
column_map = {c: c for c in tdf.columns}
target_columns = [c for c in df.columns if c not in indexes]
for i, r in tdf.iterrows():
upd_params = ", ".join(
[f"{column_map[c]} = %s" for c in target_columns]
)
upd_list = [r[c] if pd.notna(r[c]) else None for c in target_columns]
upd_str = self._cur.mogrify(upd_params, upd_list).decode("utf-8")
idx_params = " AND ".join([f"{column_map[c]} = %s" for c in indexes])
idx_list = [r[c] if pd.notna(r[c]) else None for c in indexes]
idx_str = self._cur.mogrify(idx_params, idx_list).decode("utf-8")
update_strings.append(f"UPDATE {table} SET {upd_str} WHERE {idx_str};")
full_update_string = "\n".join(update_strings)
print(full_update_string) # Debugging
self._cur.execute(full_update_string)
rowcount = self._cur.rowcount
if commit:
self.commit()
return rowcount
except Exception as e:
self.rollback()
raise e
Example usages:
>>> df = pd.DataFrame([
{'a':1,'b':'asdf','c':datetime.datetime.now()},
{'a':2,'b':'jklm','c':datetime.datetime.now()}
])
>>> cls.update_data('my_table', df, indexes = ['a'])
UPDATE my_table SET b = 'asdf', c = '2023-01-17T22:13:37.095245'::timestamp WHERE a = 1;
UPDATE my_table SET b = 'jklm', c = '2023-01-17T22:13:37.095250'::timestamp WHERE a = 2;
>>> cls.update_data('my_table', df, indexes = ['a','b'])
UPDATE my_table SET c = '2023-01-17T22:13:37.095245'::timestamp WHERE a = 1 AND b = 'asdf';
UPDATE my_table SET c = '2023-01-17T22:13:37.095250'::timestamp WHERE a = 2 AND b = 'jklm';
>>> cls.update_data('my_table', df.set_index('a'), column_map={'a':'db_a','b':'db_b','c':'db_c'} )
UPDATE my_table SET db_b = 'asdf', db_c = '2023-01-17T22:13:37.095245'::timestamp WHERE db_a = 1;
UPDATE my_table SET db_b = 'jklm', db_c = '2023-01-17T22:13:37.095250'::timestamp WHERE db_a = 2;
Note however that this is not safe from SQL injection due to the way it generates the where clause.
Related
My Query is,
engine = create_engine("postgres://")
conn = engine.connect()
conn.autocommit = True
In Flask Route i am using this query,
result = conn.execute("""UPDATE business_portal SET business_name ="""+str(business_name)+""", name_tag ="""+str(business_tag)+""",name_atr = """+str(business_attr)+""", address =""" +str(address)+""",address_tag =""" +str(address_tag)+""", address_atr = """+str(address_attr)+""", city = """+str(city)+""", city_tag ="""+str(city_tag)+""", city_atr =""" +str(city_attr)+""", state = """+str(state)+""", state_tag = """+str(state_tag)+""",state_atr = """+str(state_attr)+""",zip_code = """+str(zipcode)+""",zip_tag ="""+str(zip_tag)+""",zip_atr ="""+str(zip_attr)+""",contact_number ="""+str(contact_number)+""",num_tag = """+str(contact_tag)+""", num_atr ="""+str(contact_attr)+""",domain ="""+str(domain)+""", search_url = """+str(search_url)+""",category =""" +str(category)+""", logo_path =""" +str(logo_path)+""" WHERE id=%s """,(id))
The above query accepts the data without space (eg abcd).... But when data are with spaces (eg abcd efgh ijkl) it displays a syntax error.
Can any one help me?
The values for in the SET clause need to be quoted in the same way as the values in the WHERE clause.
>>> cur = conn.cursor()
>>> stmt = "UPDATE tbl SET col = %s WHERE id = %s"
>>>
>>> # Observe that the SET value is three separate characters
>>> cur.mogrify(stmt % ('a b c', 37))
b'UPDATE tbl SET col = a b c WHERE id = 42'
>>>
>>> # Observe that the SET value is a single, quoted value
>>> cur.mogrify(stmt, ('a b c', 37))
b"UPDATE tbl SET col = 'a b c' WHERE id = 42"
NB cursor.mogrify is a psycopg2 method that prints the query that would be sent to the server by cursor.execute: it doesn't actually execute the query.
I have text files which have sql queries.After running one file "tb_exec_ns_call_pln.txt" i'm getting two dates like- 2018-12-29 ,
2019-03-29.
i just want to pass these dates in other text file (tb_exec_ns_call_actvty.txt) using python. the text file contain the below query-
SELECT a.nm as cycle_nm,
a.start_dt as cycle_start_dt,
a.end_dt as cycle_end_dt,
a.terr as territory,sales_drctn,
x_rating1,
c.jnj_id as jnj_id,
c.prsn_first_nm,
c.prsn_last_nm,
plnnd_calls as rep_goal
FROM eureka.cycle_plan a, eureka.cycle_plan_trgt b, eureka.acct c
WHERE
a.id = b.cycle_plan
and b.acct = c.id
and b.del_flg = 'N'
***and start_dt >= '2018-12-29'***
***and end_dt <= '2019-03-29'***
and substring(a.terr,1,6) in ('106-KS','106-PI','106-VO')
and a.status = 'In_Progress_vod'
and a.del_flg = 'N'
and c.del_flg = 'N' and plnnd_calls > 0
i have written python script also.. Please guide me how to pass the value.
path = "D:/Users/SPate233/Downloads/NS dashboard/tb_exec_ns_call_pln.txt"
sql_query_file = open(path, 'r')
sql_query1 = sql_query_file.read()
cur.execute(sql_query1)
res = cur.fetchall()
print(res)
print(type(res))
for val in res:
print(val[1])
print(val[2])
One approach is to have a string variable hardcoded in to tb_exec_ns_call_actvty.txt and then use str.replace to fill in the required info.
Ex:
SELECT a.nm as cycle_nm,
a.start_dt as cycle_start_dt,
a.end_dt as cycle_end_dt,
a.terr as territory,sales_drctn,
x_rating1,
c.jnj_id as jnj_id,
c.prsn_first_nm,
c.prsn_last_nm,
plnnd_calls as rep_goal
FROM eureka.cycle_plan a, eureka.cycle_plan_trgt b, eureka.acct c
WHERE
a.id = b.cycle_plan
and b.acct = c.id
and b.del_flg = 'N'
and start_dt >= 'START_DT'
and end_dt <= 'END_DT'
and substring(a.terr,1,6) in ('106-KS','106-PI','106-VO')
and a.status = 'In_Progress_vod'
and a.del_flg = 'N'
and c.del_flg = 'N' and plnnd_calls > 0
InCode:
path = "D:/Users/SPate233/Downloads/NS dashboard/tb_exec_ns_call_pln.txt"
with open(path) as sql_query_file:
sql_query1 = sql_query_file.read()
sql_query1 = sql_query1.replace("START_DT", '2018-12-29').replace("END_DT", '2019-03-29')
cur.execute(sql_query1)
res = cur.fetchall()
I am trying to parameterize some parts of a SQL Query using the below dictionary:
query_params = dict(
{'target':'status',
'date_from':'201712',
'date_to':'201805',
'drform_target':'NPA'
})
sql_data_sample = str("""select *
from table_name
where dt = %(date_to)s
and %(target)s in (%(drform_target)s)
----------------------------------------------------
union all
----------------------------------------------------
(select *,
from table_name
where dt = %(date_from)s
and %(target)s in ('ACT')
order by random() limit 50000);""")
df_data_sample = pd.read_sql(sql_data_sample,con = cnxn,params = query_params)
However this returns a dataframe with no records at all. I am not sure what the error is since no error is being thrown.
df_data_sample.shape
Out[7]: (0, 1211)
The final PostgreSql query would be:
select *
from table_name
where dt = '201805'
and status in ('NPA')
----------------------------------------------------
union all
----------------------------------------------------
(select *
from table_name
where dt = '201712'
and status in ('ACT')
order by random() limit 50000);-- This part of random() is only for running it on my local and not on server.
Below is a small sample of data for replication. The original data has more than a million records and 1211 columns
service_change_3m service_change_6m dt grp_m2 status
0 -2 201805 $50-$75 NPA
0 0 201805 < $25 NPA
0 -1 201805 $175-$200 ACT
0 0 201712 $150-$175 ACT
0 0 201712 $125-$150 ACT
-1 1 201805 $50-$75 NPA
Can someone please help me with this?
UPDATE:
Based on suggestion by #shmee.. I am finally using :
target = 'status'
query_params = dict(
{
'date_from':'201712',
'date_to':'201805',
'drform_target':'NPA'
})
sql_data_sample = str("""select *
from table_name
where dt = %(date_to)s
and {0} in (%(drform_target)s)
----------------------------------------------------
union all
----------------------------------------------------
(select *,
from table_name
where dt = %(date_from)s
and {0} in ('ACT')
order by random() limit 50000);""").format(target)
df_data_sample = pd.read_sql(sql_data_sample,con = cnxn,params = query_params)
Yes, I am quite confident that your issue results from trying to set column names in your query via parameter binding (and %(target)s in ('ACT')) as mentioned in the comments.
This results in your query restricting the result set to records where 'status' in ('ACT') (i.e. Is the string 'status' an element of a list containing only the string 'ACT'?). This is, of course, false, hence no record gets selected and you get an empty result.
This should work as expected:
import psycopg2.sql
col_name = 'status'
table_name = 'public.churn_data'
query_params = {'date_from':'201712',
'date_to':'201805',
'drform_target':'NPA'
}
sql_data_sample = """select *
from {0}
where dt = %(date_to)s
and {1} in (%(drform_target)s)
----------------------------------------------------
union all
----------------------------------------------------
(select *
from {0}
where dt = %(date_from)s
and {1} in ('ACT')
order by random() limit 50000);"""
sql_data_sample = sql.SQL(sql_data_sample).format(sql.Identifier(table_name),
sql.Identifier(col_name))
df_data_sample = pd.read_sql(sql_data_sample,con = cnxn,params = query_params)
Here's the code I'm working on:
poljeID = int(cursor.execute("SELECT poljeID FROM stanje"))
xkoord = cursor.execute("SELECT xkoord FROM polje WHERE poljeID = %s;", poljeID)
ykoord = cursor.execute("SELECT ykoord FROM polje WHERE poljeID = %s;", poljeID)
print xkoord, ykoord
It's a snippet from it, basically what it needs to do is fetch the ID of the field (poljeID) where an agent is currently on (stanje) and use it to get the x and y coordinates of that field (xkoord, ykoord).
The initial values for the variables are:
poljeID = 1
xkoord = 0
ykoord = 0
The values that I get with that code are:
poljeID = 1
xkoord = 1
ykoord = 1
What am I doing wrong?
cursor.execute does not return the result of the query, it returns the number of rows affected. To get the result, you need to do cursor.fetchone() (or cursor.fetchall()) for each query.
(Note, really the second and third queries should be done at once: SELECT xkoord, ycoord FROM ...)
I have a database table as follows. The data is in the form of a tree with
CREATE TABLE IF NOT EXISTS DOMAIN_HIERARCHY (
COMPONENT_ID INT NOT NULL ,
LEVEL INT NOT NULL ,
COMPONENT_NAME VARCHAR(127) NOT NULL ,
PARENT INT NOT NULL ,
PRIMARY KEY ( COMPONENT_ID )
);
The following data is in the table
(1,1,'A',0)
(2,2,'AA',1)
(3,2,'AB',1)
(4,3,'AAA',2)
(5,3,'AAB',2)
(6,3,'ABA',3)
(7,3,'ABB',3)
I have to retrieve the data and store in a python dictionary
I wrote the below code
conx = sqlite3.connect( 'nameofdatabase.db' )
curs = conx.cursor()
curs.execute( 'SELECT COMPONENT_ID, LEVEL, COMPONENT_NAME, PARENT FROM DOMAIN_HIERARCHY' )
rows = curs.fetchall()
cmap = {}
for row in rows:
cmap[row[0]] = row[2]
hrcy={}
for level in range( 1, maxl + 1 ):
for row in rows:
if row[1] == level:
if hrcy == {}:
hrcy[row[2]] = []
continue
parent = cmap[row[3]]
hrcy[parent].append( { row[2]: [] } )
The problem I'm facing is for nodes more than 2nd level ,they are getting added to the root instead of their parent ; where should I do the change in the code?
The problem is that you can't directly see the nodes for the second level after you insert them. Try this:
conx = sqlite3.connect( 'nameofdatabase.db' )
curs = conx.cursor()
curs.execute( 'SELECT COMPONENT_ID, LEVEL, COMPONENT_NAME, PARENT ' +
'FROM DOMAIN_HIERARCHY' )
rows = curs.fetchall()
cmap = {}
hrcy = None
for row in rows:
entry = (row[2], {})
cmap[row[0]] = entry
if row[1] == 1:
hrcy = {entry[0]: entry[1]}
# raise if hrcy is None
for row in rows:
item = cmap[row[0]]
parent = cmap.get(row[3], None)
if parent is not None:
parent[1][row[2]] = item[1]
print hrcy
By keeping each component's map of subcomponents in cmap, I can always reach each parent's map to add the next component to it. I tried it with the following test data:
rows = [(1,1,'A',0),
(2,2,'AA',1),
(3,2,'AB',1),
(4,3,'AAA',2),
(5,3,'AAB',2),
(6,3,'ABA',3),
(7,3,'ABB',3)]
The output was this:
{'A': {'AA': {'AAA': {}, 'AAB': {}}, 'AB': {'ABA': {}, 'ABB': {}}}}