Importing a CSV loops to infinity in python - python

I've accidentally created an infinite loop importing a CSV file using python. I don't see where the error is!
This is my code:
csv_reader = csv.reader(csv_file, delimiter=',') # Read the file
next(csv_reader) # Skip the header row
insert_sql = "INSERT INTO billing_info (InvoiceId, PayerAccountId) VALUES (%s, %s, )" # Insert SQL statement
print("Importing Data")
for row in csv_reader:
print(row)
cursor.execute(insert_sql, row)
mydb.commit()
The file is 4GB in size, so it's huge. However, in the print statement, I can tell that it starts over again at the top of the file.
Here is the entirety of the function:
mydb = mysql.connector.connect(user='xxxx', password='xxx',
host='xxxx',
database='aws_bill')
cursor = mydb.cursor()
def read_csv_to_sql(source):
try:
with open(source) as csv_file: # Open the file
csv_reader = csv.reader(csv_file, delimiter=',') # Read the file
next(csv_reader) # Skip the header row
insert_sql = "INSERT INTO billing_info (InvoiceId, PayerAccountId) VALUES (%s, %s)" # Insert SQL statement
print("Importing Data")
for row in csv_reader:
print(row)
cursor.execute(insert_sql, row)
mydb.commit()
print("Done importing data.")
except Exception as e:
print("Exception:", e)
mydb.rollback()
finally:
mydb.close()
Why is are these statements creating an infinite loop?

Related

How to convert dot to comma in floating point numbers in python module cx_Oracle?

Is it possible to download data to a csv file by the cx_Oracle module, so that the floating point numbers have a comma instead of a dot?
I need this functionality to properly load the downloaded csv file into another table in the Oracle database. When I try to load such a csv file with floating point numbers, I get an error: cx_Oracle.DatabaseError: ORA-01722: invalid number
I have already solved the problem using the pandas library.
My question:
Is there a solution without the use of data frame pandas.
def load_csv():
conn = cx_Oracle.connect(user=db_user, password=db_userpwd, dsn=dsn, encoding="UTF-8")
cursor = conn.cursor()
cursor.execute(str("select * from tablename"))
result_set = cursor.fetchall()
with open(table_name['schemat']+"__"+table_name['tabela']+".csv", "w") as csv_file:
csv_writer = csv.writer(csv_file, delimiter='|', lineterminator="\n", quoting=csv.QUOTE_NONNUMERIC)
for row in result_set:
csv_writer.writerow(row)
#df = pandas.read_sql("select * from tablename", conn)
#df.to_csv(table_name['schemat']+"__"+table_name['tabela']+".csv", index = False, encoding='utf-8', decimal=',', sep='|', header=False)
cursor.close()
conn.close()
def export_csv():
# Open connection to Oracle DB
conn = cx_Oracle.connect(user=db_user, password=db_userpwd, dsn=dsn, encoding="UTF-8")
# Open cursor to Oracle DB
cursor = conn.cursor()
batch_size = 1
with open(table_name['schemat']+"__"+table_name['tabela']+".csv", 'r') as csv_file:
csv_reader = csv.reader(csv_file, delimiter='|' )
sql = sql_insert
data = []
for line in csv_reader:
data.append([i for i in line])
if len(data) % batch_size == 0:
cursor.executemany(sql, data)
data = []
if data:
cursor.executemany(sql, data)
conn.commit()
cursor.close()
conn.close()
I tried to set it up by changing the session, but unfortunately it doesn't work for me.
# -*- coding: utf-8 -*-
import csv
import os
import sys
import time
import decimal
import pandas as pd
import cx_Oracle
dsn = "(DESCRIPTION=(ADDRESS=(PROTOCOL=TCP)(HOST=xxx)" \
"(PORT=xxx))(CONNECT_DATA=(SERVICE_NAME = xxx)))"
db_user = "xxx"
db_userpwd = "xxx"
def init_session(conn, requested_tag):
cursor = conn.cursor()
cursor.execute("alter session set nls_numeric_characters = ', '")
cursor.execute("select to_number(5/2) from dual")
dual, = cursor.fetchone()
print("dual=", repr(dual))
pool = cx_Oracle.SessionPool(user=db_user, password=db_userpwd,
dsn=dsn, session_callback=init_session, encoding="UTF-8")
with pool.acquire() as conn:
# Open cursor to Oracle DB
cursor = conn.cursor()
cursor.execute("select value from nls_session_parameters where parameter = 'NLS_NUMERIC_CHARACTERS'")
nls_session_parameters, = cursor.fetchone()
print("nls_session_parameters=", repr(nls_session_parameters))
#qryString = "select * from tablename"
#df = pd.read_sql(qryString,conn)
#df.to_csv(table_name['schemat']+"__"+table_name['tabela']+".csv", index = False, encoding='utf-8', decimal=',')
cursor.execute(str("select * from tablename"))
result_set = cursor.fetchall()
#result, = cursor.fetchone()
#print("result is", repr(result))
with open(table_name['schemat']+"__"+table_name['tabela']+".csv", "w") as csv_file:
csv_writer = csv.writer(csv_file, delimiter='|', lineterminator="\n")
for row in result_set:
csv_writer.writerow(row)
I would be grateful for any suggestions on how I can get data to csv file without pandas library.
example:
problematic result: 123.45
correct result: 123,45
Another, possibly simpler option:
Create an output type handler that tells Oracle to fetch the value as a string. Then replace the period with a comma:
import cx_Oracle as oracledb
def output_type_handler(cursor, name, default_type, size, precision, scale):
if default_type == oracledb.DB_TYPE_NUMBER:
return cursor.var(str, arraysize=cursor.arraysize,
outconverter=lambda s: s.replace(".", ","))
conn = oracledb.connect("user/password#host:port/service_name")
conn.outputtypehandler = output_type_handler
with conn.cursor() as cursor:
cursor.execute("select * from TestNumbers")
for row in cursor:
print(row)
Put the output type handler on the cursor if you only want to do this for one query instead of all queries.
You can do by TO_CHAR(<numeric_value>,'999999999D99999999999','NLS_NUMERIC_CHARACTERS=''.,''') conversion such as
cursor.execute("""
SELECT TRIM(TO_CHAR(5/2,'999999999D99999999999',
'NLS_NUMERIC_CHARACTERS=''.,'''))
FROM dual
""")
result_set = cursor.fetchall()
with open(table_name['schemat']+"__"+table_name['tabela']+".csv", "w") as csv_file:
csv_writer = csv.writer(csv_file, delimiter='|', lineterminator="\n")
for row in result_set:
csv_writer.writerow(row)
btw, switching ''.,'' to '',.'' will yield 2,50000000000 again
Since you're writing to a text file and presumably also want to avoid any Oracle decimal format to Python binary format precision issues, fetching as a string like Anthony showed has advantages. If you want to move the decimal separator conversion cost to the DB you could combine his solution and yours by adding this to your original code:
def output_type_handler(cursor, name, default_type, size, precision, scale):
if default_type == cx_Oracle.NUMBER:
return cursor.var(str, arraysize=cursor.arraysize)
and then after you open the cursor (and before executing), add the handler:
cursor.outputtypehandler = output_type_handler
Since the DB does the conversion to string, the value of NLS_NUMERIC_CHARACTERS is respected and you get commas as the decimal separator.

Connection.commit() not uploading all rows to database

I am trying to populate a MySQL database table from a CSV file using PyMySQL. The CSV file has approx 948,000 rows. The script works fine but only approximately 840,000 rows appear in the database, I don't know where the rest go.
I am guessing this has something to do with connection.commit() method so I have tried committing at the end of the script as well as after every 10,000 INSERT queries but nothing works so far. Any ideas what I might be doing wrong?
I have attached the relevant code snippet below:
with gzip.open(temp_file_path, "rt", encoding="utf-8") as f:
reader = csv.reader(f)
for row in reader:
# if num % 10000 == 0:
# conn.commit()
print("[+] Processing row: ", num)
sql = "INSERT INTO `{0}`({1}) VALUES({2})".format(table_name, ", ".join(columns),
", ".join(["%s"] * len(columns)))
result = cursor.execute(sql, row)
if result == 1:
num += 1
else:
print("Not inserted!")
conn.commit()
cursor.close()
conn.close()

read oracle sql file using python and fetch results in to CSV file

I am beginner to python and want to read a sql file from python and fetch those results in to CSV file. I have tried usin CX_oracle to connect to oracle database. It works when i directly give the sql query in the code. But, I am not sure how to read a sql file from python.
Here is the code which worked when i gave sql query directly in the code:
import csv
import cx_Oracle
con = cx_Oracle.connect('XCM/XCM#home.com:1500/HOME')
cursor = con.cursor()
csv_file = open("exp.csv", "w")
writer = csv.writer(csv_file, delimiter='|', lineterminator="\n", quoting=csv.QUOTE_NONE)
r = cursor.execute("select * from home_parties where CREATION_DATE >= trunc(sysdate)")
for row in cursor:
writer.writerow(row)
cursor.close()
con.close()
csv_file.close()
I tried the below code to fetch from sql file which has the same sql query as mentioned in the above code - sample.sql but it dint work
import csv
import cx_Oracle
con = cx_Oracle.connect('XCM/XCM#home.com:1500/HOME')
cursor = con.cursor()
csv_file = open("exp.csv", "w")
writer = csv.writer(csv_file, delimiter='|', lineterminator="\n", quoting=csv.QUOTE_NONE)
f = open('C:/Users/home1/sample.sql')
full_sql = f.read()
r = cursor.execute(full_sql)
for row in cursor:
writer.writerow(row)
cursor.close()
con.close()
csv_file.close()
Kindly help me out!!
The content of the sql file might be read within a for loop line by line such as
import csv
import cx_Oracle
con = cx_Oracle.connect('XCM/XCM#home.com:1500/HOME')
cursor = con.cursor()
sql=""
with open("C:/Users/home1/sample.sql") as f_in:
for line in f_in:
sql += line
f_out = open("exp.csv", "w")
writer = csv.writer(f_out, delimiter='|', lineterminator="\n", quoting=csv.QUOTE_NONE)
cursor.execute(sql)
for row in cursor:
writer.writerow(row)
cursor.close()
con.close()
f_out.close()
f_in.close()
considering the SQL statement to be spreaded across multiple lines
Assuming you have a sample.sql file in the same directory with the query as the first line. This would read the SQL query from that file, then execute the query and save the result into a CSV file named "exp.csv".
import cx_Oracle
con = cx_Oracle.connect('XCM/XCM#home.com:1500/HOME')
cursor = con.cursor()
import pandas as pd
with open('sample.sql') as f:
sql_query_string = f.readline();
sql_query = pd.read_sql_query(sql_query_string, con)
sql_query.to_csv("exp.csv", sep='|')

Python PostgreSQL "error insert has more target columns than expressions" but its not

Im new to SQL and PostgreSQL and I cant understand whats going on with this code.
Im trying to insert csv to postgres with this code:
import csv
import psycopg2 as pg
filename = 'myfile.csv'
try:
conn = pg.connect(user="myuser",
password="mypass",
host="myhost",
port="5432",
database="mydb")
cursor = conn.cursor()
with open(filename, 'r') as f:
reader = csv.reader(f)
next(reader) # This skips the 1st row which is the header.
for record in reader:
print(record)
cursor.execute("""INSERT INTO auth VALUES (%s, %s, %s, %s)""", record)
conn.commit()
except (Exception, pg.Error) as e:
print(e)
finally:
if (conn):
cursor.close()
conn.close()
print("Connection closed.")
but it raise error insert has more target columns than expressions
LINE 1: ...00000000-0000-0000-0000-000000000000', '1580463062', 'auto')
but here is what i want to insert
['00000000-0000-0000-0000-000000000000', '00000000-0000-0000-0000-000000000000', '1580463062', 'auto']
and its look like its definitely has exactly 4 colums
I also tried to change the encoding of csv from ASCII to UTF-8 and UTF-8_SIG but i still get this error
I solve my problem with this code
import psycopg2
conn = psycopg2.connect("host=host dbname=dbname user=user
password=password")
cur = conn.cursor()
with open(filename, 'r') as f:
next(f)
cur.copy_from(f, 'auth', sep=',')
conn.commit()
cur.close()
conn.close()

pydev to postgres issue with code

I am trying to send data from python using pydev to postgresql. I am finding more than one error and could really need help. Many thanks! One of the error says:
psycopg2.InterfaceError: cursor already closed.
Any help would be appreciated!
Here is my code to create the table
#!/usr/bin/python
# -*- coding: utf-8 -*-
import psycopg2
import sys
import csv
from itertools import count
path = r'C:\Users\sammy\Downloads\E0.csv'
with open(path, "r") as csvfile:
readCSV = csv.reader(csvfile, delimiter=",")
firstline = 1
con = None
con = psycopg2.connect("host='localhost' dbname='football' user='postgres' password='XXX'")
cur = con.cursor()
con.commit()
cur.execute("DROP TABLE testtest1234")
cur.execute("CREATE TABLE testtest1234 (HY INTEGER)")
for row in readCSV:
if firstline:
firstline=0
continue
new_data = row[19]
try:
cur.execute("INSERT INTO testtest1234 values ("+new_data+")")
cur.execute("SELECT * FROM testtest1234;")
except psycopg2.DatabaseError as e:
if con:
con.rollback()
print ("Error %s % e", e)
sys.exit(1)
finally:
if con:
con.close()
print (new_data)
print(" ".join(row))
out=open("new_data.csv", "w")
output = csv.writer(out)
for row in new_data:
output.writerow(row)
out.close()
Here is my code to insert in the table
#!/usr/bin/python
# -*- coding: utf-8 -*-
import psycopg2
import sys
import csv
from itertools import count
path = r'C:\Users\sammy\Downloads\E0.csv'
with open(path, "r") as csvfile:
readCSV = csv.reader(csvfile, delimiter=",")
con = None
con = psycopg2.connect("host='localhost' dbname='football' user='postgres' password='XXX'")
cur = con.cursor()
con.commit()
for row in readCSV:
new_data = row[19]
print (new_data)
try:
cur.execute("INSERT INTO testtest1234 values ("+new_data+")")
except psycopg2.DatabaseError as e:
if con:
con.rollback()
print ("Error %s % e", e)
sys.exit(1)
finally:
if con:
con.close()
print(" ".join(row))
out=open("new_data.csv", "w")
output = csv.writer(out)
for row in new_data:
output.writerow(row)
out.close()
You are closing the connection inside the loop, so only the first one row is inserted.
for row in readCSV:
try:
...
except psycopg2.DatabaseError as e:
...
finally:
if con:
con.close()

Categories

Resources