Is it possible to download data to a csv file by the cx_Oracle module, so that the floating point numbers have a comma instead of a dot?
I need this functionality to properly load the downloaded csv file into another table in the Oracle database. When I try to load such a csv file with floating point numbers, I get an error: cx_Oracle.DatabaseError: ORA-01722: invalid number
I have already solved the problem using the pandas library.
My question:
Is there a solution without the use of data frame pandas.
def load_csv():
conn = cx_Oracle.connect(user=db_user, password=db_userpwd, dsn=dsn, encoding="UTF-8")
cursor = conn.cursor()
cursor.execute(str("select * from tablename"))
result_set = cursor.fetchall()
with open(table_name['schemat']+"__"+table_name['tabela']+".csv", "w") as csv_file:
csv_writer = csv.writer(csv_file, delimiter='|', lineterminator="\n", quoting=csv.QUOTE_NONNUMERIC)
for row in result_set:
csv_writer.writerow(row)
#df = pandas.read_sql("select * from tablename", conn)
#df.to_csv(table_name['schemat']+"__"+table_name['tabela']+".csv", index = False, encoding='utf-8', decimal=',', sep='|', header=False)
cursor.close()
conn.close()
def export_csv():
# Open connection to Oracle DB
conn = cx_Oracle.connect(user=db_user, password=db_userpwd, dsn=dsn, encoding="UTF-8")
# Open cursor to Oracle DB
cursor = conn.cursor()
batch_size = 1
with open(table_name['schemat']+"__"+table_name['tabela']+".csv", 'r') as csv_file:
csv_reader = csv.reader(csv_file, delimiter='|' )
sql = sql_insert
data = []
for line in csv_reader:
data.append([i for i in line])
if len(data) % batch_size == 0:
cursor.executemany(sql, data)
data = []
if data:
cursor.executemany(sql, data)
conn.commit()
cursor.close()
conn.close()
I tried to set it up by changing the session, but unfortunately it doesn't work for me.
# -*- coding: utf-8 -*-
import csv
import os
import sys
import time
import decimal
import pandas as pd
import cx_Oracle
dsn = "(DESCRIPTION=(ADDRESS=(PROTOCOL=TCP)(HOST=xxx)" \
"(PORT=xxx))(CONNECT_DATA=(SERVICE_NAME = xxx)))"
db_user = "xxx"
db_userpwd = "xxx"
def init_session(conn, requested_tag):
cursor = conn.cursor()
cursor.execute("alter session set nls_numeric_characters = ', '")
cursor.execute("select to_number(5/2) from dual")
dual, = cursor.fetchone()
print("dual=", repr(dual))
pool = cx_Oracle.SessionPool(user=db_user, password=db_userpwd,
dsn=dsn, session_callback=init_session, encoding="UTF-8")
with pool.acquire() as conn:
# Open cursor to Oracle DB
cursor = conn.cursor()
cursor.execute("select value from nls_session_parameters where parameter = 'NLS_NUMERIC_CHARACTERS'")
nls_session_parameters, = cursor.fetchone()
print("nls_session_parameters=", repr(nls_session_parameters))
#qryString = "select * from tablename"
#df = pd.read_sql(qryString,conn)
#df.to_csv(table_name['schemat']+"__"+table_name['tabela']+".csv", index = False, encoding='utf-8', decimal=',')
cursor.execute(str("select * from tablename"))
result_set = cursor.fetchall()
#result, = cursor.fetchone()
#print("result is", repr(result))
with open(table_name['schemat']+"__"+table_name['tabela']+".csv", "w") as csv_file:
csv_writer = csv.writer(csv_file, delimiter='|', lineterminator="\n")
for row in result_set:
csv_writer.writerow(row)
I would be grateful for any suggestions on how I can get data to csv file without pandas library.
example:
problematic result: 123.45
correct result: 123,45
Another, possibly simpler option:
Create an output type handler that tells Oracle to fetch the value as a string. Then replace the period with a comma:
import cx_Oracle as oracledb
def output_type_handler(cursor, name, default_type, size, precision, scale):
if default_type == oracledb.DB_TYPE_NUMBER:
return cursor.var(str, arraysize=cursor.arraysize,
outconverter=lambda s: s.replace(".", ","))
conn = oracledb.connect("user/password#host:port/service_name")
conn.outputtypehandler = output_type_handler
with conn.cursor() as cursor:
cursor.execute("select * from TestNumbers")
for row in cursor:
print(row)
Put the output type handler on the cursor if you only want to do this for one query instead of all queries.
You can do by TO_CHAR(<numeric_value>,'999999999D99999999999','NLS_NUMERIC_CHARACTERS=''.,''') conversion such as
cursor.execute("""
SELECT TRIM(TO_CHAR(5/2,'999999999D99999999999',
'NLS_NUMERIC_CHARACTERS=''.,'''))
FROM dual
""")
result_set = cursor.fetchall()
with open(table_name['schemat']+"__"+table_name['tabela']+".csv", "w") as csv_file:
csv_writer = csv.writer(csv_file, delimiter='|', lineterminator="\n")
for row in result_set:
csv_writer.writerow(row)
btw, switching ''.,'' to '',.'' will yield 2,50000000000 again
Since you're writing to a text file and presumably also want to avoid any Oracle decimal format to Python binary format precision issues, fetching as a string like Anthony showed has advantages. If you want to move the decimal separator conversion cost to the DB you could combine his solution and yours by adding this to your original code:
def output_type_handler(cursor, name, default_type, size, precision, scale):
if default_type == cx_Oracle.NUMBER:
return cursor.var(str, arraysize=cursor.arraysize)
and then after you open the cursor (and before executing), add the handler:
cursor.outputtypehandler = output_type_handler
Since the DB does the conversion to string, the value of NLS_NUMERIC_CHARACTERS is respected and you get commas as the decimal separator.
I want to use python to turn mysql into json, but when I use the following code, the result is json line by line, not a whole set of json
import pymysql
import json
sql="**"
conn=pymysql.connect(host='localhost',user='root',passwd="abc",db="mydatabase",port=123)
cur=conn.cursor()
cur.execute(sql)
data=cur.fetchall()
fields=cur.description
cur.close ()
conn.close()
column_list = []
for i in fields:
column_list.append(i[0])
for row in data:
result = {}
result[column_list[0]] = row[0]
result[column_list[1]] = row[1]
result[column_list[2]] = str(row[2])
result[column_list[3]] = row[3]
j=json.dumps(result)
print(j)
You need to pull out the json.dumps() line from the for loop. This is resulting in conversion of into json with each iteration and printing in each iteration.
Your code should look like this :
import pymysql
import json
sql="**"
conn=pymysql.connect(host='localhost',user='root',passwd="abc",db="mydatabase",port=123)
cur=conn.cursor()
cur.execute(sql)
data=cur.fetchall()
fields=cur.description
cur.close ()
conn.close()
column_list = []
for i in fields:
column_list.append(i[0])
final_resultset = []
for row in data:
result = {}
result[column_list[0]] = row[0]
result[column_list[1]] = row[1]
result[column_list[2]] = str(row[2])
result[column_list[3]] = row[3]
final_resultset.append(result)
j=json.dumps(final_resultset)
print(j)
You can use below
cur.execute('''SELECT * FROM Users WHERE id=1''')
row_headers=[x[0] for x in cur.description] #this will extract row headers
rv = cur.fetchall()
json_data=[]
for result in rv:
json_data.append(dict(zip(row_headers,result)))
j=json.dumps(json_data)
print(j)
I have created the below code to import data in CSV file from PostgreSQL DB. However, I want to create multiple files based on date.
import psycopg2
import csv
conn_string = "host='' port='5432' user='' password='' dbname=''"
conn = psycopg2.connect(conn_string)
cur=conn.cursor()
query="select * from sample where date between '' and ''"
cur.execute(query)
title=[i[0] for i in cur.description]
result=cur.fetchall()
csvfile=open('filename.csv','w')
if result:
c = csv.writer(csvfile)
c.writerow(title)
c.writerows(result)
cur.close()
conn.close()
The files should be split similar to the below format:
01jan.csv
02jan.csv
etc.
You can loop over the query results and open a new file whenever the row date changes. The results must be ordered by date, otherwise you can lose some data.
import psycopg2
import psycopg2.extras
import csv
import datetime
# conn_string = ...
conn = psycopg2.connect(conn_string)
# we need results in dict
cur = conn.cursor(cursor_factory = psycopg2.extras.DictCursor)
# order by date - important!
query = "select * from sample where date between '2018-01-01' and '2018-01-10' order by date"
cur.execute(query)
title = [i[0] for i in cur.description]
date = None
writer = None
csvfile = None
for row in cur:
if date != row['date']:
# when date changes we should close current file (if opened)
# and open a new one with name based on date
if csvfile:
csvfile.close()
date = row['date']
filename = date.strftime("%d%b")+ '.csv'
csvfile = open(filename, 'w', newline='')
writer = csv.writer(csvfile)
writer.writerow(title)
writer.writerow(row)
cur.close()
conn.close()
The above solution is acceptable for rather small datasets. If the amount of data for one day is large, you should rather use copy_expert()
cur = conn.cursor()
# example loop for ten days of Jan 2018
for day in range(1, 10):
date = datetime.date(2018, 1, day)
filename = date.strftime("%d%b")+ '.csv'
command = 'copy (select * from sample where date = %s) to stdout with csv header'
sql = cur.mogrify(command, [date])
with open(filename, 'w') as file:
cur.copy_expert(sql, file)
I have a csv file from which I am trying to load data into pysqlite database. I am not able to find a way to extract the first row of the file and get it into the database automatically as column headers of a table. I have to enter the names "manually" in the code itself, which is ok for 1-2 columsn but becomes cumbersome with tens or hundreds of columns. Here is my code:
import sqlite3
import csv
f_n = 'test_data_1.csv'
f = open( f_n , 'r' )
csv_reader = csv.reader(f)
header = next( csv_reader )
sqlite_file = 'survey_test_db.sqlite'
table_name01 = 'test_survey_1'
field_01 = 'analyst_name'
field_type_01 = 'text'
field_02 = 'question_name'
field_type_02 = 'text'
conn = sqlite3.connect( sqlite_file )
c = conn.cursor()
c.execute('CREATE TABLE {tn}({nf_01} {ft_01},{nf_02} {ft_02})'\
.format(tn = table_name01 , nf_01 = field_01 , ft_01 = field_type_01, nf_02 = field_02 , ft_02 = field_type_02 ))
for row in csv_reader:
c.execute("INSERT INTO test_survey_1 VALUES (?,?)",row)
f.close()
for row in c.execute('SELECT * FROM test_survey_1'):
print(row)
conn.commit()
conn.close()
c.execute('CREATE TABLE {tn}({fieldlist})'.format(
tn=table_name01,
fieldlist=', '.join('{} TEXT'.format(name) for name in header),
))
Or use a ORM which is designed to make this sort of thing easy. SQLAlchemy example:
t = Table(table_name01, meta, *(Column(name, String()) for name in header))
t.create()
You can use pandas to read your csv file into DataFrame and then export
it to sqlite.
import sqlite3
import pandas as pd
sqlite_file = 'survey_test_db.sqlite'
table_name01 = 'test_survey_1'
conn = sqlite3.connect(sqlite_file)
pd.read_csv('test_data_1.csv').to_sql(table_name01, con=con)
I'm looking to format the first column of my Export01.csv into dd/mm/yyyy. sadly the current format when I export it is dd/mm/yy mm:mm.
Could someone help me tweak my code to make the changes to the date during the import procedure if it's possible?
import sys
import cx_Oracle
import csv
connection = cx_Oracle.connect('user','password','ORPM2')
cursor = connection.cursor()
SQL="SELECT * FROM EXPORT01"
cursor.execute(SQL)
filename="C:\Projects\SQL_Export\Export01.csv"
with open(filename,"wb") as fout:
writer = csv.writer(fout)
writer.writerow([ i[0] for i in cursor.description ]) # heading row
writer.writerows(cursor.fetchall())
fout.close()
cursor.close()
connection.close()
I've added a sample of the first two columns of data in Export01
WEEK_ENDING ORDER_HEADLINE
12/02/2016 00:00 Headline
15/01/2016 00:00 Headline
15/01/2016 00:00 Headline
If you handle the fetchall a row at a time, you could then convert the first column entry as follows:
from datetime import datetime
import sys
import cx_Oracle
import csv
connection = cx_Oracle.connect('user','password', 'ORPM2')
cursor = connection.cursor()
SQL="SELECT * FROM EXPORT01"
cursor.execute(SQL)
filename = r"C:\Projects\SQL_Export\Export01.csv"
with open(filename, "wb") as fout:
writer = csv.writer(fout)
writer.writerow([i[0] for i in cursor.description ]) # heading row
for row in cursor.fetchall():
cols = list(row)
cols[0] = cols[0].strftime("%d/%m/%Y")
writer.writerow(cols)
cursor.close()
connection.close()
Also note, the with statement you are using will automatically close the file when you leave its scope. I would also recommend you prefix your file path with r to avoid Python trying to escape any of the backslashes in the path.