CSV to sqlite with uft-8 - python

I would like to load a csv into a sqlite. My CSV contains utf-8 characters like (é, à, ü, ♀...).
These characters are displayed as ’ or é in my sqlite. I have used con.text_factory = str but it doesn't change anything.
I have also tried .decode('utf8') as this question suggests but I get the error: 'str' object has no attribute 'decode'
import sqlite3
import csv
import os
db= "mydb.sqlite"
con = sqlite3.connect(db)
con.text_factory = str # allows utf-8 data to be stored
cursor = con.cursor()
csvfile= 'mycsv.csv'
tablename = os.path.splitext(os.path.basename(csvfile))[0]
with open (csvfile, 'r') as f:
reader = csv.reader(f)
header_line_from_csv = next(reader)
columns = [h.strip() for h in header_line_from_csv] #Strips white space in header
headers= ', '.join([f'{column} text' for column in columns]) ##
sql = f'CREATE TABLE {tablename} ({headers})'
print (sql)
cursor.execute(sql)
query = 'insert into {0}({1}) values ({2})'
query = query.format(tablename, ','.join(columns), ','.join('?' * len(columns)))
print(query)
cursor = con.cursor()
for row in reader:
cursor.execute(query, row)
con.commit()
print(cursor.rowcount)
cursor.execute(f"SELECT * FROM {tablename}")
print("fetchall:\n",cursor.fetchall())
print(cursor.description)
con.close()

You can add this line at beginning:
# -*- coding:utf-8 -*-
to guarantee that your script will use this encoding. Try also to open csv file with utf-8 encoding:
with open (csvfile, 'r', encoding='utf-8') as f:

Related

How to convert dot to comma in floating point numbers in python module cx_Oracle?

Is it possible to download data to a csv file by the cx_Oracle module, so that the floating point numbers have a comma instead of a dot?
I need this functionality to properly load the downloaded csv file into another table in the Oracle database. When I try to load such a csv file with floating point numbers, I get an error: cx_Oracle.DatabaseError: ORA-01722: invalid number
I have already solved the problem using the pandas library.
My question:
Is there a solution without the use of data frame pandas.
def load_csv():
conn = cx_Oracle.connect(user=db_user, password=db_userpwd, dsn=dsn, encoding="UTF-8")
cursor = conn.cursor()
cursor.execute(str("select * from tablename"))
result_set = cursor.fetchall()
with open(table_name['schemat']+"__"+table_name['tabela']+".csv", "w") as csv_file:
csv_writer = csv.writer(csv_file, delimiter='|', lineterminator="\n", quoting=csv.QUOTE_NONNUMERIC)
for row in result_set:
csv_writer.writerow(row)
#df = pandas.read_sql("select * from tablename", conn)
#df.to_csv(table_name['schemat']+"__"+table_name['tabela']+".csv", index = False, encoding='utf-8', decimal=',', sep='|', header=False)
cursor.close()
conn.close()
def export_csv():
# Open connection to Oracle DB
conn = cx_Oracle.connect(user=db_user, password=db_userpwd, dsn=dsn, encoding="UTF-8")
# Open cursor to Oracle DB
cursor = conn.cursor()
batch_size = 1
with open(table_name['schemat']+"__"+table_name['tabela']+".csv", 'r') as csv_file:
csv_reader = csv.reader(csv_file, delimiter='|' )
sql = sql_insert
data = []
for line in csv_reader:
data.append([i for i in line])
if len(data) % batch_size == 0:
cursor.executemany(sql, data)
data = []
if data:
cursor.executemany(sql, data)
conn.commit()
cursor.close()
conn.close()
I tried to set it up by changing the session, but unfortunately it doesn't work for me.
# -*- coding: utf-8 -*-
import csv
import os
import sys
import time
import decimal
import pandas as pd
import cx_Oracle
dsn = "(DESCRIPTION=(ADDRESS=(PROTOCOL=TCP)(HOST=xxx)" \
"(PORT=xxx))(CONNECT_DATA=(SERVICE_NAME = xxx)))"
db_user = "xxx"
db_userpwd = "xxx"
def init_session(conn, requested_tag):
cursor = conn.cursor()
cursor.execute("alter session set nls_numeric_characters = ', '")
cursor.execute("select to_number(5/2) from dual")
dual, = cursor.fetchone()
print("dual=", repr(dual))
pool = cx_Oracle.SessionPool(user=db_user, password=db_userpwd,
dsn=dsn, session_callback=init_session, encoding="UTF-8")
with pool.acquire() as conn:
# Open cursor to Oracle DB
cursor = conn.cursor()
cursor.execute("select value from nls_session_parameters where parameter = 'NLS_NUMERIC_CHARACTERS'")
nls_session_parameters, = cursor.fetchone()
print("nls_session_parameters=", repr(nls_session_parameters))
#qryString = "select * from tablename"
#df = pd.read_sql(qryString,conn)
#df.to_csv(table_name['schemat']+"__"+table_name['tabela']+".csv", index = False, encoding='utf-8', decimal=',')
cursor.execute(str("select * from tablename"))
result_set = cursor.fetchall()
#result, = cursor.fetchone()
#print("result is", repr(result))
with open(table_name['schemat']+"__"+table_name['tabela']+".csv", "w") as csv_file:
csv_writer = csv.writer(csv_file, delimiter='|', lineterminator="\n")
for row in result_set:
csv_writer.writerow(row)
I would be grateful for any suggestions on how I can get data to csv file without pandas library.
example:
problematic result: 123.45
correct result: 123,45
Another, possibly simpler option:
Create an output type handler that tells Oracle to fetch the value as a string. Then replace the period with a comma:
import cx_Oracle as oracledb
def output_type_handler(cursor, name, default_type, size, precision, scale):
if default_type == oracledb.DB_TYPE_NUMBER:
return cursor.var(str, arraysize=cursor.arraysize,
outconverter=lambda s: s.replace(".", ","))
conn = oracledb.connect("user/password#host:port/service_name")
conn.outputtypehandler = output_type_handler
with conn.cursor() as cursor:
cursor.execute("select * from TestNumbers")
for row in cursor:
print(row)
Put the output type handler on the cursor if you only want to do this for one query instead of all queries.
You can do by TO_CHAR(<numeric_value>,'999999999D99999999999','NLS_NUMERIC_CHARACTERS=''.,''') conversion such as
cursor.execute("""
SELECT TRIM(TO_CHAR(5/2,'999999999D99999999999',
'NLS_NUMERIC_CHARACTERS=''.,'''))
FROM dual
""")
result_set = cursor.fetchall()
with open(table_name['schemat']+"__"+table_name['tabela']+".csv", "w") as csv_file:
csv_writer = csv.writer(csv_file, delimiter='|', lineterminator="\n")
for row in result_set:
csv_writer.writerow(row)
btw, switching ''.,'' to '',.'' will yield 2,50000000000 again
Since you're writing to a text file and presumably also want to avoid any Oracle decimal format to Python binary format precision issues, fetching as a string like Anthony showed has advantages. If you want to move the decimal separator conversion cost to the DB you could combine his solution and yours by adding this to your original code:
def output_type_handler(cursor, name, default_type, size, precision, scale):
if default_type == cx_Oracle.NUMBER:
return cursor.var(str, arraysize=cursor.arraysize)
and then after you open the cursor (and before executing), add the handler:
cursor.outputtypehandler = output_type_handler
Since the DB does the conversion to string, the value of NLS_NUMERIC_CHARACTERS is respected and you get commas as the decimal separator.

read oracle sql file using python and fetch results in to CSV file

I am beginner to python and want to read a sql file from python and fetch those results in to CSV file. I have tried usin CX_oracle to connect to oracle database. It works when i directly give the sql query in the code. But, I am not sure how to read a sql file from python.
Here is the code which worked when i gave sql query directly in the code:
import csv
import cx_Oracle
con = cx_Oracle.connect('XCM/XCM#home.com:1500/HOME')
cursor = con.cursor()
csv_file = open("exp.csv", "w")
writer = csv.writer(csv_file, delimiter='|', lineterminator="\n", quoting=csv.QUOTE_NONE)
r = cursor.execute("select * from home_parties where CREATION_DATE >= trunc(sysdate)")
for row in cursor:
writer.writerow(row)
cursor.close()
con.close()
csv_file.close()
I tried the below code to fetch from sql file which has the same sql query as mentioned in the above code - sample.sql but it dint work
import csv
import cx_Oracle
con = cx_Oracle.connect('XCM/XCM#home.com:1500/HOME')
cursor = con.cursor()
csv_file = open("exp.csv", "w")
writer = csv.writer(csv_file, delimiter='|', lineterminator="\n", quoting=csv.QUOTE_NONE)
f = open('C:/Users/home1/sample.sql')
full_sql = f.read()
r = cursor.execute(full_sql)
for row in cursor:
writer.writerow(row)
cursor.close()
con.close()
csv_file.close()
Kindly help me out!!
The content of the sql file might be read within a for loop line by line such as
import csv
import cx_Oracle
con = cx_Oracle.connect('XCM/XCM#home.com:1500/HOME')
cursor = con.cursor()
sql=""
with open("C:/Users/home1/sample.sql") as f_in:
for line in f_in:
sql += line
f_out = open("exp.csv", "w")
writer = csv.writer(f_out, delimiter='|', lineterminator="\n", quoting=csv.QUOTE_NONE)
cursor.execute(sql)
for row in cursor:
writer.writerow(row)
cursor.close()
con.close()
f_out.close()
f_in.close()
considering the SQL statement to be spreaded across multiple lines
Assuming you have a sample.sql file in the same directory with the query as the first line. This would read the SQL query from that file, then execute the query and save the result into a CSV file named "exp.csv".
import cx_Oracle
con = cx_Oracle.connect('XCM/XCM#home.com:1500/HOME')
cursor = con.cursor()
import pandas as pd
with open('sample.sql') as f:
sql_query_string = f.readline();
sql_query = pd.read_sql_query(sql_query_string, con)
sql_query.to_csv("exp.csv", sep='|')

pysqlite get column names from csv file

I have a csv file from which I am trying to load data into pysqlite database. I am not able to find a way to extract the first row of the file and get it into the database automatically as column headers of a table. I have to enter the names "manually" in the code itself, which is ok for 1-2 columsn but becomes cumbersome with tens or hundreds of columns. Here is my code:
import sqlite3
import csv
f_n = 'test_data_1.csv'
f = open( f_n , 'r' )
csv_reader = csv.reader(f)
header = next( csv_reader )
sqlite_file = 'survey_test_db.sqlite'
table_name01 = 'test_survey_1'
field_01 = 'analyst_name'
field_type_01 = 'text'
field_02 = 'question_name'
field_type_02 = 'text'
conn = sqlite3.connect( sqlite_file )
c = conn.cursor()
c.execute('CREATE TABLE {tn}({nf_01} {ft_01},{nf_02} {ft_02})'\
.format(tn = table_name01 , nf_01 = field_01 , ft_01 = field_type_01, nf_02 = field_02 , ft_02 = field_type_02 ))
for row in csv_reader:
c.execute("INSERT INTO test_survey_1 VALUES (?,?)",row)
f.close()
for row in c.execute('SELECT * FROM test_survey_1'):
print(row)
conn.commit()
conn.close()
c.execute('CREATE TABLE {tn}({fieldlist})'.format(
tn=table_name01,
fieldlist=', '.join('{} TEXT'.format(name) for name in header),
))
Or use a ORM which is designed to make this sort of thing easy. SQLAlchemy example:
t = Table(table_name01, meta, *(Column(name, String()) for name in header))
t.create()
You can use pandas to read your csv file into DataFrame and then export
it to sqlite.
import sqlite3
import pandas as pd
sqlite_file = 'survey_test_db.sqlite'
table_name01 = 'test_survey_1'
conn = sqlite3.connect(sqlite_file)
pd.read_csv('test_data_1.csv').to_sql(table_name01, con=con)

How to query unicode database with ascii characters

I am currently running a query on my postgresql database that ignores German characters - umlauts. I however, do not want to loose these characters and would rather have the German characters or at least their equivalent (e.g ä = ae) in the output of the query. Running Python 2.7.12
When I change the encode object to replace or xmlcharrefreplace I get the following error:
psycopg2.ProgrammingError: syntax error at or near "?"
LINE 1: ?SELECT
Code Snippet:
# -*- coding: utf-8 -*-
connection_str = r'postgresql://' + user + ':' + password + '#' + host + '/' + database
def query_db(conn, sql):
with conn.cursor() as curs:
curs.execute(sql)
rows = curs.fetchall()
print("fetched %s rows from db" % len(rows))
return rows
with psycopg2.connect(connection_str) as conn:
for filename in files:
# Read SQL
sql = u""
f = codecs.open(os.path.join(SQL_LOC, filename), "r", "utf-8")
for line in f:
sql += line.encode('ascii', 'replace').replace('\r\n', ' ')
rows = query_db(conn, f)
How can I pass a query as a unicode object with German characters ?
I also tried decoded the query as utf-8 but then I get the following error:
UnicodeEncodeError: 'ascii' codec can't encode character u'\xa0' in position 20: ordinal not in range(128)
Here is a solution to obtain their encoded equivalent. You will be able to re-encode it later and the query will not create an error:
SELECT convert_from(BYTEA 'foo ᚠ bar'::bytea, 'latin-1');
+----------------+
| convert_from |
|----------------|
| foo á<U+009A>  bar |
+----------------+
SELECT 1
Time: 0.011s
You just need to conn.set_client_encoding("utf-8") and then you can just execute unicode strings - sql and results will be encoded and decoded on the fly:
$ cat psycopg2-unicode.py
import sys
import os
import psycopg2
import csv
with psycopg2.connect("") as conn:
conn.set_client_encoding("utf-8")
for filename in sys.argv[1:]:
file = open(filename, "r", encoding="utf-8")
sql = file.read()
with conn.cursor() as cursor:
cursor.execute(sql)
try:
rows = cursor.fetchall()
except psycopg2.ProgrammingError as err:
# No results
continue
with open(filename+".out", "w", encoding="utf-8", newline="") as outfile:
csv.writer(outfile, dialect="excel-tab").writerows(rows)
$ cat sql0.sql
create temporary table t(v) as
select 'The quick brown fox jumps over the lazy dog.'
union all
select 'Zwölf große Boxkämpfer jagen Viktor quer über den Sylter Deich.'
union all
select 'Любя, съешь щипцы, — вздохнёт мэр, — кайф жгуч.'
union all
select 'Mężny bądź, chroń pułk twój i sześć flag.'
;
$ cat sql1.sql
select * from t;
$ python3 psycopg2-unicode.py sql0.sql sql1.sql
$ cat sql1.sql.out
The quick brown fox jumps over the lazy dog.
Zwölf große Boxkämpfer jagen Viktor quer über den Sylter Deich.
Любя, съешь щипцы, — вздохнёт мэр, — кайф жгуч.
Mężny bądź, chroń pułk twój i sześć flag.
A Python2 version of this program is a little bit more complicated, as we need to tell the driver that we'd like return values as unicode objects. Also csv module I used for output does not support unicode, so it needs a workaround. Here it is:
$ cat psycopg2-unicode2.py
from __future__ import print_function
import sys
import os
import csv
import codecs
import psycopg2
import psycopg2.extensions
psycopg2.extensions.register_type(psycopg2.extensions.UNICODE)
psycopg2.extensions.register_type(psycopg2.extensions.UNICODEARRAY)
with psycopg2.connect("") as conn:
conn.set_client_encoding("utf-8")
for filename in sys.argv[1:]:
file = codecs.open(filename, "r", encoding="utf-8")
sql = file.read()
with conn.cursor() as cursor:
cursor.execute(sql)
try:
rows = cursor.fetchall()
except psycopg2.ProgrammingError as err:
# No results from SQL
continue
with open(filename+".out", "wb") as outfile:
for row in rows:
row_utf8 = [v.encode('utf-8') for v in row]
csv.writer(outfile, dialect="excel-tab").writerow(row_utf8)

Exporting CSV via Python with headers & date formatting

I'm looking to format the first column of my Export01.csv into dd/mm/yyyy. sadly the current format when I export it is dd/mm/yy mm:mm.
Could someone help me tweak my code to make the changes to the date during the import procedure if it's possible?
import sys
import cx_Oracle
import csv
connection = cx_Oracle.connect('user','password','ORPM2')
cursor = connection.cursor()
SQL="SELECT * FROM EXPORT01"
cursor.execute(SQL)
filename="C:\Projects\SQL_Export\Export01.csv"
with open(filename,"wb") as fout:
writer = csv.writer(fout)
writer.writerow([ i[0] for i in cursor.description ]) # heading row
writer.writerows(cursor.fetchall())
fout.close()
cursor.close()
connection.close()
I've added a sample of the first two columns of data in Export01
WEEK_ENDING ORDER_HEADLINE
12/02/2016 00:00 Headline
15/01/2016 00:00 Headline
15/01/2016 00:00 Headline
If you handle the fetchall a row at a time, you could then convert the first column entry as follows:
from datetime import datetime
import sys
import cx_Oracle
import csv
connection = cx_Oracle.connect('user','password', 'ORPM2')
cursor = connection.cursor()
SQL="SELECT * FROM EXPORT01"
cursor.execute(SQL)
filename = r"C:\Projects\SQL_Export\Export01.csv"
with open(filename, "wb") as fout:
writer = csv.writer(fout)
writer.writerow([i[0] for i in cursor.description ]) # heading row
for row in cursor.fetchall():
cols = list(row)
cols[0] = cols[0].strftime("%d/%m/%Y")
writer.writerow(cols)
cursor.close()
connection.close()
Also note, the with statement you are using will automatically close the file when you leave its scope. I would also recommend you prefix your file path with r to avoid Python trying to escape any of the backslashes in the path.

Categories

Resources