Related
I'm trying to insert some values into my vertica database using the vertica_python module:
data = {'SalesNo': ['12345', '678910'],
'ProductID': ['12345_2021-10-21_08:51:22', '678910_2021-10-21_10:27:03'],
'StoreID': ['6d522936e240cd64e1cf9176c5bfdff3bfe8146a345ff2', 'a7274d507d443c752be66b2851415138d75bd913d4949e'],
'PurchaseTime': ['2021-10-21 08:51:22.846000', '2021-10-21 10:44:06.218000'],
'Date': ['2021-10-21', '2021-10-21'],
'StoreNumber': ['0', '1'],
'PurchaseValue': ['348.0', '4893.23']
}
dataset = pd.DataFrame(data)
column = dataset.columns
n = len(column)
SQL_insert = f"INSERT INTO table_name ({','.join(list(column))}) VALUES ({' ?,'*(n-1)} ?);"
valueTuplelst = []
for i,row in dataset.iterrows():
valuelist = list(map(lambda x: str(x), row))
valuelist = [None if element == "None" else element for element in valuelist]
valuetup = tuple(valuelist)
valueTuplelst.append(valuetup)
connection.cursor().executemany(SQL_insert, valueTuplelst)
The equivalent SQL statement is
INSERT INTO table_name (SalesNo,ProductID,StoreID,PurchaseTime,Date,StoreNumber,PurchaseValue) VALUES ('12345', '12345_2021-10-21_08:51:22', '6d522936e240cd64e1cf9176c5bfdff3bfe8146a345ff2', '2021-10-21 08:51:22.846000', '2021-10-21', '0', '348.0'), ('678910', '678910_2021-10-21_10:27:03', 'a7274d507d443c752be66b2851415138d75bd913d4949e', '2021-10-21 10:44:06.218000', '2021-10-21', '1', '4893.23')
which works perfectly in SQL when I execute it.
However I get the error
File "C:\tools\Anaconda3\lib\site-packages\vertica_python\vertica\cursor.py", line 576, in format_operation_with_parameters
operation = operation % tuple(tlist)
TypeError: not all arguments converted during string formatting
I can't seem to figure out why I get this error as I convert all of my data to string format. Any idea where I'm going wrong?
My guess is that you should use %s and not ? as placeholder in your string:
SQL_insert = f"INSERT INTO table_name ({','.join(list(column))}) VALUES ({' %s,'*(n-1)} %s);"
Then the output string will be 'INSERT INTO table_name (SalesNo,ProductID,StoreID,PurchaseTime,Date,StoreNumber,PurchaseValue) VALUES ( %s, %s, %s, %s, %s, %s, %s);', which is compatible with operation = operation % tuple(tlist) replacement
I am trying to push back to SQL SERVER a data frame, but I am having a hard time doing so.
With the following code, I receive this error :
pyodbc.DataError: ('22008', '[22008] [Microsoft][ODBC SQL Server
Driver]Exceeding the capacity of the field datetime (0)
(SQLExecDirectW)')
Here's my code until now:
import pandas as pd
import pyodbc
import numpy as np
df = pd.read_excel(r'path.xlsx')
new_names = {"Calendar Date": "CALENDAR_DATE",
"Origin ID": "ORIGIN_ID",
"Dest ID": "DEST_ID",
"Destination Name": "DESTINATION_NAME",
"Destination City": "DESTINATION_CITY",
"Destination State": "DESTINATION_STATE",
"Carrier Name": "CARRIER_NAME",
"Stop Number": "STOP_NUMBER",
"Planned Arrival Time Start": "PLANNED_ARRIVAL_TIME_START",
"Planned Arrival Time End": "PLANNED_ARRIVAL_TIME_END",
"Delivery App't Time Start": "DELIVERY_APPT_TIME_START",
"Delivery App't Time End": "DELIVERY_APPT_TIME_END",
"Actual Delivery Departure Time": "ACTUAL_DELIVERY_DEPARTURE_TIME",
"Reason Code and Description": "REASON_CODE_AND_DESCRIPTION",
"Days Late Vs Plan": "DAYS_LATE_VS_PLAN",
"Hrs Late Vs Plan": "HRS_LATE_VS_PLAN",
"Days Late Vs Appt": "DAYS_LATE_VS_APPT",
"Hrs Late Vs Appt": "HRS_LATE_VS_APPT"}
df.rename(columns=new_names, inplace=True)
conn = pyodbc.connect('Driver={SQL Server};'
'Server=xxx;'
'Database=Business_Planning;'
'UID="xxx";'
'PWD="xxx";'
'Trusted_Connection=yes;')
cursor = conn.cursor()
SQL_Query = pd.read_sql_query('SELECT * FROM Business_Planning.dbo.OTD_1_DELIVERY_TRACKING_F_IMPORT', conn)
df2 = pd.DataFrame(SQL_Query, columns=["CALENDAR_DATE", "ORIGIN_ID", "DEST_ID", "DESTINATION_NAME", "DESTINATION_CITY",
"DESTINATION_STATE", "SCAC", "CARRIER_NAME", "SID", "STOP_NUMBER",
"PLANNED_ARRIVAL_TIME_START", "PLANNED_ARRIVAL_TIME_END",
"DELIVERY_APPT_TIME_START", "DELIVERY_APPT_TIME_END",
"ACTUAL_DELIVERY_DEPARTURE_TIME", "REASON_CODE_AND_DESCRIPTION",
"DAYS_LATE_VS_PLAN", "HRS_LATE_VS_PLAN", "DAYS_LATE_VS_APPT",
"HRS_LATE_VS_APPT"])
df3 = pd.concat([df2, df]).drop_duplicates(["SID", "STOP_NUMBER", "PLANNED_ARRIVAL_TIME_START"],
keep='last').sort_values(
["SID", "STOP_NUMBER", "PLANNED_ARRIVAL_TIME_START"])
df3['SID'].replace('', np.nan, inplace=True)
df3.dropna(subset=['SID'], inplace=True)
conn.execute('TRUNCATE TABLE Business_Planning.dbo.OTD_1_DELIVERY_TRACKING_F_IMPORT')
for index, row in df3.iterrows():
conn.execute(
"INSERT INTO OTD_1_DELIVERY_TRACKING_F_IMPORT([CALENDAR_DATE], [ORIGIN_ID], [DEST_ID], [DESTINATION_NAME], "
"[DESTINATION_CITY], [DESTINATION_STATE], [SCAC], [CARRIER_NAME], [SID], [STOP_NUMBER], "
"[PLANNED_ARRIVAL_TIME_START], [PLANNED_ARRIVAL_TIME_END], [DELIVERY_APPT_TIME_START], "
"[DELIVERY_APPT_TIME_END], [ACTUAL_DELIVERY_DEPARTURE_TIME], [REASON_CODE_AND_DESCRIPTION], "
"[DAYS_LATE_VS_PLAN], [HRS_LATE_VS_PLAN], [DAYS_LATE_VS_APPT], [HRS_LATE_VS_APPT]) "
"values (?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)",
row['CALENDAR_DATE'],
row['ORIGIN_ID'],
row['DEST_ID'],
row['DESTINATION_NAME'],
row['DESTINATION_CITY'],
row['DESTINATION_STATE'],
row['SCAC'],
row['CARRIER_NAME'],
row['SID'],
row['STOP_NUMBER'],
row['PLANNED_ARRIVAL_TIME_START'],
row['PLANNED_ARRIVAL_TIME_END'],
row['DELIVERY_APPT_TIME_START'],
row['DELIVERY_APPT_TIME_END'],
row['ACTUAL_DELIVERY_DEPARTURE_TIME'],
row['REASON_CODE_AND_DESCRIPTION'],
row['DAYS_LATE_VS_PLAN'],
row['HRS_LATE_VS_PLAN'],
row['DAYS_LATE_VS_APPT'],
row['HRS_LATE_VS_APPT'])
conn.commit()
conn.commit()
conn.close()
The error is coming from that part:
for index, row in df3.iterrows():
conn.execute(
"INSERT INTO OTD_1_DELIVERY_TRACKING_F_IMPORT([CALENDAR_DATE], [ORIGIN_ID], [DEST_ID], [DESTINATION_NAME], "
"[DESTINATION_CITY], [DESTINATION_STATE], [SCAC], [CARRIER_NAME], [SID], [STOP_NUMBER], "
"[PLANNED_ARRIVAL_TIME_START], [PLANNED_ARRIVAL_TIME_END], [DELIVERY_APPT_TIME_START], "
"[DELIVERY_APPT_TIME_END], [ACTUAL_DELIVERY_DEPARTURE_TIME], [REASON_CODE_AND_DESCRIPTION], "
"[DAYS_LATE_VS_PLAN], [HRS_LATE_VS_PLAN], [DAYS_LATE_VS_APPT], [HRS_LATE_VS_APPT]) "
"values (?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)",
row['CALENDAR_DATE'],
row['ORIGIN_ID'],
row['DEST_ID'],
row['DESTINATION_NAME'],
row['DESTINATION_CITY'],
row['DESTINATION_STATE'],
row['SCAC'],
row['CARRIER_NAME'],
row['SID'],
row['STOP_NUMBER'],
row['PLANNED_ARRIVAL_TIME_START'],
row['PLANNED_ARRIVAL_TIME_END'],
row['DELIVERY_APPT_TIME_START'],
row['DELIVERY_APPT_TIME_END'],
row['ACTUAL_DELIVERY_DEPARTURE_TIME'],
row['REASON_CODE_AND_DESCRIPTION'],
row['DAYS_LATE_VS_PLAN'],
row['HRS_LATE_VS_PLAN'],
row['DAYS_LATE_VS_APPT'],
row['HRS_LATE_VS_APPT'])
conn.commit()
The fields listed represent every columns from df3.
I can't seem to get it right, anybody has a clue?
I strongly advice you to use to_sql() pandas.DataFrame method.
Besides that use sqlalchemy lib to connect with your database too. Use this example:
import pyodbc
import sqlalchemy
engine = sqlalchemy.create_engine('mssql+pyodbc://{0}:{1}#{2}:1433/{3}?driver=ODBC+Driver+{4}+for+SQL+Server'.format(username,password,server,bdName,driverVersion))
pd.to_sql("TableName",con=engine,if_exists="append")
My code is pulling JSON data from an api as such, however I'm having trouble getting my code to read from the JSON objects.
url = 'https://api.test.net/Vacancy'
payload = {
"APIKey": "0000",
"Action": "GetAllVacancies",
"Content-Type" : "json",
}
headers = {}
r = requests.post(url, data=json.dumps(payload), headers=headers)
print(r.content)
cursor = mydb.cursor()
json_obj = r.json()
for index in json_obj:
cursor.execute("INSERT INTO apidata (VacancyName, Department, Location) VALUES (%s, %s, %s)", (json_obj[index]["VacancyName"], (json_obj[index]["Department"], (json_obj[index]["Location"]))
cursor.close()
My JSON response looks like this
{
"isError":false,
"Status":0,
"Message":"",
"Result":[
{
"VacancyName":"Test Vacancy",
"VacancyDescription":"test data 123",
"Location":"location 1",
"Department":"Finance",
However I keep getting the error
TypeError 'bool' object is not subscriptable
Im trying to pull this JSON data and send it to a database, thanks!
look at
for index in json_obj:
cursor.execute("INSERT INTO apidata (VacancyName, Department, Location) VALUES (%s, %s, %s)", (json_obj[index]["VacancyName"], (json_obj[index]["Department"], (json_obj[index]["Location"]))
you loop over keys in json_obj and first key is isError. so json_obj[index] is False, thus not subscriptuble and you get error when try to access key ["VacancyName"].
You need to loop over elements in json_obj["Result"]
for result in json_obj["Result"]:
cursor.execute("INSERT INTO apidata (VacancyName, Department, Location) VALUES (%s, %s, %s)", (result["VacancyName"], result["Department"], result["Location"]))
Your JSON object is a dict. Iterating over a dict iterates over the keys.
The first key in your object is "isError" so when you try to access json_obj[index]["Department"] that is equivalent to (json_obj["isError"]["Department"] which is False["Department"] which gives the error you are seeing.
Please in future include the full error message including the traceback. It makes it a lot easier to answer these questions if you do.
When using django shell: python manage.py shell
import datetime
from django.utils import timezone
DE = datetime.datetime(1970, 1, 1, tzinfo=timezone.utc)
cursor.execute("insert into monitor_regionserver (cluster_id, task_id, name, last_attempt_time, load, numberOfRegions, numberOfRequests, memStoreSizeMB, storefileSizeMB, readRequestsCount, writeRequestsCount, readRequestsCountPerSec, writeRequestsCountPerSec, replication_last_attempt_time, replicationMetrics) values (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)", [30L, 484L, '', DE, 0, 0, 0, 0, 0, 0, 0, 0, 0, DE, ''])
The cursor command looks like this (text-wrapped):
cursor.execute("insert into monitor_regionserver (cluster_id, task_id, name, last_attempt_time, load, numberOfRegions, numberOfRequests, memStoreSizeMB, storefileSizeMB, readRequestsCount, writeRequestsCount, readRequestsCountPerSec, writeRequestsCountPerSec, replication_last_attempt_time, replicationMetrics) values (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)", [30L, 484L, '', DE, 0, 0, 0, 0, 0, 0, 0, 0, 0, DE, ''])
I always get the error:
ProgrammingError: (1064, "You have an error in your SQL syntax; check the manual that corresponds to your MySQL server version for the right syntax to use near 'load, numberOfRegions, numberOfRequests, memStoreSizeMB, storefileSizeMB, readRe' at line 1")
I'm confused with this problem, any idea to share?
load is a keyword in mysql see here
try to rename the column
Here is a query I have that runs fine in MySQL workbench with the included sample values and works fine if I manually plug in the values in the code, but fails when I use the values as parameters. Any ideas?
Python Code:
print player
cur.execute("""
INSERT INTO scoredata
(gameid, playerid, starter, pos, min, fgm, fga, tpm, tpa, ftm, fta, oreb, reb, ast, stl, blk, tos, pf, pts)
VALUES
(%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s);
"""), (player[0],
int(player[20]),
int(player[19]),
player[3],
int(player[4]),
int(player[5]),
int(player[6]),
int(player[7]),
int(player[8]),
int(player[9]),
int(player[10]),
int(player[11]),
int(player[12]),
int(player[13]),
int(player[14]),
int(player[15]),
int(player[16]),
int(player[17]),
int(player[18]) )
db.commit()
Error message:
['330060130', 103, 'Roy Devyn Marble', 'G-F', '28', '4', '9', '3', '6', '3', '3', '0', '2', '1', '0', '0', '0', '1', '14', 1, 1391]
Traceback (most recent call last):
File "C:\Users\jcaine\workspace\BasketballStats\src\BasketballStats\basketballstats.py", line 350, in <module>
insert_game_data('20130106', '20130106')
File "C:\Users\jcaine\workspace\BasketballStats\src\BasketballStats\basketballstats.py", line 284, in insert_game_data
"""), (player[0], int(player[20]), int(player[19]), player[3], int(player[4]), int(player[5]), int(player[6]), int(player[7]), int(player[8]), int(player[9]), int(player[10]), int(player[11]), int(player[12]), int(player[13]), int(player[14]), int(player[15]), int(player[16]), int(player[17]), int(player[18]) )
File "c:\users\jcaine\appdata\local\temp\easy_install-7_fysp\MySQL_python-1.2.3-py2.7-win32.egg.tmp\MySQLdb\cursors.py", line 174, in execute
File "c:\users\jcaine\appdata\local\temp\easy_install-7_fysp\MySQL_python-1.2.3-py2.7-win32.egg.tmp\MySQLdb\connections.py", line 36, in defaulterrorhandler
_mysql_exceptions.ProgrammingError: (1064, "You have an error in your SQL syntax; check the manual that corresponds to your MySQL server version for the right syntax to use near '%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)' at line 4")
MySQL scoredata Table Columns:
gameid varchar
playerid int
starter int
pos varchar
min int
fgm int
fga int
tpm int
tpa int
ftm int
fta int
oreb int
reb int
ast int
stl int
blk int
tos int
pf int
pts int
MySQL Code that runs fine in Workbench:
INSERT INTO scoredata (gameid, playerid, starter, pos, min, fgm, fga, tpm,
tpa, ftm, fta, oreb, reb, ast, stl, blk, tos, pf, pts)
VALUES ('3300601300', 1391, 1, 'G-F', 28, 4, 9, 3, 6, 3, 3, 0, 2, 1, 0, 0, 0, 1, 14)
You're not passing data to the execute call. Note the closing brace in your example.
cur.execute("""
INSERT INTO scoredata
(gameid, playerid, starter, pos, min, fgm, fga, tpm, tpa, ftm, fta, oreb, reb, ast, stl, blk, tos, pf, pts)
VALUES
(%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s);
""")//*Remove me*
, (player[0],
int(player[20]),
int(player[19]),
player[3],
int(player[4]),
int(player[5]),
int(player[6]),
int(player[7]),
int(player[8]),
int(player[9]),
int(player[10]),
int(player[11]),
int(player[12]),
int(player[13]),
int(player[14]),
int(player[15]),
int(player[16]),
int(player[17]),
int(player[18]) )
db.commit()