I am trying to populate a MySQL database table from a CSV file using PyMySQL. The CSV file has approx 948,000 rows. The script works fine but only approximately 840,000 rows appear in the database, I don't know where the rest go.
I am guessing this has something to do with connection.commit() method so I have tried committing at the end of the script as well as after every 10,000 INSERT queries but nothing works so far. Any ideas what I might be doing wrong?
I have attached the relevant code snippet below:
with gzip.open(temp_file_path, "rt", encoding="utf-8") as f:
reader = csv.reader(f)
for row in reader:
# if num % 10000 == 0:
# conn.commit()
print("[+] Processing row: ", num)
sql = "INSERT INTO `{0}`({1}) VALUES({2})".format(table_name, ", ".join(columns),
", ".join(["%s"] * len(columns)))
result = cursor.execute(sql, row)
if result == 1:
num += 1
else:
print("Not inserted!")
conn.commit()
cursor.close()
conn.close()
Related
0
I am trying to create a function which is reading from my csv file and inserts it in a sql table.
This is my code:
def transaction_import(path):
with open (path, 'r') as f:
reader = csv.reader(f)
columns = next(reader)
query = 'insert into transactions({0}) values ({1})'
query = query.format(','.join(columns), ','.join('?' * len(columns)))
cursor = conn.cursor()
for data in reader:
cursor.execute(query, data)
transactions = transaction_import('../data/budapest.csv')
c.execute("select * from transactions")
transactions = c.fetchall()
for row in transactions:
print(row)
What i want to do is to read several transactions from different csvs. All of them have the same structure and column names. ex: transactions = transaction_import('../Data/Source/ny.csv') transactions = transaction_import('../Data/Source/london.csv')
When I run it I get this error: File "/Users/.../main.py", line 82, in transaction_import cursor.execute(query,(data,)) sqlite3.ProgrammingError: Incorrect number of bindings supplied. The current statement uses 4, and there are 1 supplied.
You're missing a comma in # cursor.execute(query, data)
Also google is your friend, see: sqlite3.ProgrammingError: Incorrect number of bindings supplied. The current statement uses 1, and there are 74 supplied
Im new to SQL and PostgreSQL and I cant understand whats going on with this code.
Im trying to insert csv to postgres with this code:
import csv
import psycopg2 as pg
filename = 'myfile.csv'
try:
conn = pg.connect(user="myuser",
password="mypass",
host="myhost",
port="5432",
database="mydb")
cursor = conn.cursor()
with open(filename, 'r') as f:
reader = csv.reader(f)
next(reader) # This skips the 1st row which is the header.
for record in reader:
print(record)
cursor.execute("""INSERT INTO auth VALUES (%s, %s, %s, %s)""", record)
conn.commit()
except (Exception, pg.Error) as e:
print(e)
finally:
if (conn):
cursor.close()
conn.close()
print("Connection closed.")
but it raise error insert has more target columns than expressions
LINE 1: ...00000000-0000-0000-0000-000000000000', '1580463062', 'auto')
but here is what i want to insert
['00000000-0000-0000-0000-000000000000', '00000000-0000-0000-0000-000000000000', '1580463062', 'auto']
and its look like its definitely has exactly 4 colums
I also tried to change the encoding of csv from ASCII to UTF-8 and UTF-8_SIG but i still get this error
I solve my problem with this code
import psycopg2
conn = psycopg2.connect("host=host dbname=dbname user=user
password=password")
cur = conn.cursor()
with open(filename, 'r') as f:
next(f)
cur.copy_from(f, 'auth', sep=',')
conn.commit()
cur.close()
conn.close()
I am trying to insert data by chunks from a CSV files in the folder, but I cannot get the SQLITE insert query right. I was able to perform it without the lists, so I know that the data is correct.
However when I use the lists I get the error: sqlite3.ProgrammingError: Incorrect number of bindings supplied. The current statement uses 10, and there are 65 supplied.
Any ideas?
import csv, sqlite3, time, os
def chunks(data, rows=10000):
data = list(data)
for i in range(0, len(data), rows):
yield data[i:i+rows]
if __name__ == "__main__":
datab = 'MYDB'
con=sqlite3.connect(datab+'.db')
con.text_factory = str
cur = con.cursor()
koko = 'C:\\MYFOLDER\\'
print(koko)
directory = koko
print(directory)
for file in os.listdir(directory):
for searchfile, csvfile, csvcolumn, tablecolumn, table, valuemark, valcsvcolumn in zip(['USR02_FINAL.csv'],
['USR02_FINAL.csv'],
[['SYS,MANDT, BNAME, GLTGV, GLTGB, USTYP, CLASS, UFLAG, ERDAT, TRDAT']],
[['SYS,MANDT, BNAME, GLTGV2, GLTGB2, USTYP, CLASS, UFLAG, ERDAT2, TRDAT2']],
['USR_02_ALL_RAW2'],
[['?,?,?,?,?,?,?,?,?,?']],
[['SYS,MANDT, BNAME, GLTGV, GLTGB, USTYP, CLASS, UFLAG, ERDAT, TRDAT']]):
if file.endswith(searchfile):
fileinsert = directory + '\\' + csvfile
csvData = csv.reader(open(fileinsert, "rt"))
divData = chunks(csvData) # divide into 10000 rows each
for chunk in divData:
cur.execute('BEGIN TRANSACTION')
for csvcolumn in chunk:
print(searchfile, csvfile, csvcolumn, tablecolumn, table, valuemark, valcsvcolumn)
cur.execute("""INSERT OR IGNORE INTO """ + table +""" ("""+ ', '.join(tablecolumn) +""") VALUES ("""+ ', '.join(valuemark)+""")""",( ', '.join(valcsvcolumn)))
cur.execute('COMMIT')
Look at the loops:
for chunk in divData:
# ...
for csvcolumn in chunk:
# ...
...join(valcsvcolumn)
I see that you only use csvcolumn in the print, but not in the insert statement; it's using valcsvcolumn which is an unrelated thing. Probably this is the problem.
I've accidentally created an infinite loop importing a CSV file using python. I don't see where the error is!
This is my code:
csv_reader = csv.reader(csv_file, delimiter=',') # Read the file
next(csv_reader) # Skip the header row
insert_sql = "INSERT INTO billing_info (InvoiceId, PayerAccountId) VALUES (%s, %s, )" # Insert SQL statement
print("Importing Data")
for row in csv_reader:
print(row)
cursor.execute(insert_sql, row)
mydb.commit()
The file is 4GB in size, so it's huge. However, in the print statement, I can tell that it starts over again at the top of the file.
Here is the entirety of the function:
mydb = mysql.connector.connect(user='xxxx', password='xxx',
host='xxxx',
database='aws_bill')
cursor = mydb.cursor()
def read_csv_to_sql(source):
try:
with open(source) as csv_file: # Open the file
csv_reader = csv.reader(csv_file, delimiter=',') # Read the file
next(csv_reader) # Skip the header row
insert_sql = "INSERT INTO billing_info (InvoiceId, PayerAccountId) VALUES (%s, %s)" # Insert SQL statement
print("Importing Data")
for row in csv_reader:
print(row)
cursor.execute(insert_sql, row)
mydb.commit()
print("Done importing data.")
except Exception as e:
print("Exception:", e)
mydb.rollback()
finally:
mydb.close()
Why is are these statements creating an infinite loop?
After looking around, I wrote this insertion/retrieval code to read in a .mat file, send it off to a bytea postgres database column, and then try and retrieve it and recreate the file.
I use psycopg2 for database interaction.
Insertion:
full_file_path = os.path.join( folder_path, single_file )
f = open(full_file_path,'rb')
file_data = psycopg2.Binary( f.read() )
cur.execute( "INSERT INTO file_data_table "
"( id, file_name, file_data, insertion_date) "
"VALUES ( DEFAULT, %s, %s, %s)",
(single_file, file_data, timestamp))
f.close()
conn.commit()
print single_file + " inserted"
Trying to retrieve and save it to file( the file_name is "something.mat" )
cur = conn.cursor()
cur.execute( "SELECT encode( file_data, 'hex' ), file_name FROM file_data_table")
result = cur.fetchall()
print result[0][0]
for row in result:
print row[1]
full_file_path = os.path.join(folder_path, row[1])
f = open(full_file_path,'w')
f.write(row[0])
f.close()
It retrieves data from the database and successfully saves it in a file, but the file doesn't open as a mat file, and the file size is much larger( roughly twice as big ) as the original file I tried to store in the database.
I assume some data transformation is happening that I am not handling properly.
Any assistance is greatly appreciated.
Using Josh Kupershmidt's tip, I figured out what was wrong.
By removing the encode and converting the retrieved buffer to a string, it seems to work now.
cur = conn.cursor()
cur.execute( "SELECT file_data, file_name FROM file_data_table")
result = cur.fetchall()
print result[0][0]
for row in result:
print row[1]
full_file_path = os.path.join(folder_path, row[1])
f = open(full_file_path,'w')
f.write(str(row[0]))
f.close()