pysqlite get column names from csv file - python

I have a csv file from which I am trying to load data into pysqlite database. I am not able to find a way to extract the first row of the file and get it into the database automatically as column headers of a table. I have to enter the names "manually" in the code itself, which is ok for 1-2 columsn but becomes cumbersome with tens or hundreds of columns. Here is my code:
import sqlite3
import csv
f_n = 'test_data_1.csv'
f = open( f_n , 'r' )
csv_reader = csv.reader(f)
header = next( csv_reader )
sqlite_file = 'survey_test_db.sqlite'
table_name01 = 'test_survey_1'
field_01 = 'analyst_name'
field_type_01 = 'text'
field_02 = 'question_name'
field_type_02 = 'text'
conn = sqlite3.connect( sqlite_file )
c = conn.cursor()
c.execute('CREATE TABLE {tn}({nf_01} {ft_01},{nf_02} {ft_02})'\
.format(tn = table_name01 , nf_01 = field_01 , ft_01 = field_type_01, nf_02 = field_02 , ft_02 = field_type_02 ))
for row in csv_reader:
c.execute("INSERT INTO test_survey_1 VALUES (?,?)",row)
f.close()
for row in c.execute('SELECT * FROM test_survey_1'):
print(row)
conn.commit()
conn.close()

c.execute('CREATE TABLE {tn}({fieldlist})'.format(
tn=table_name01,
fieldlist=', '.join('{} TEXT'.format(name) for name in header),
))
Or use a ORM which is designed to make this sort of thing easy. SQLAlchemy example:
t = Table(table_name01, meta, *(Column(name, String()) for name in header))
t.create()

You can use pandas to read your csv file into DataFrame and then export
it to sqlite.
import sqlite3
import pandas as pd
sqlite_file = 'survey_test_db.sqlite'
table_name01 = 'test_survey_1'
conn = sqlite3.connect(sqlite_file)
pd.read_csv('test_data_1.csv').to_sql(table_name01, con=con)

Related

SQLITE3: Find which one are the double records when importing from excel

I'm using python to import from Excel information to my sqlite3 database. I need to avoid the same record to be added twice, but I'd also need to know which are the records added succeffully and which are the one that were already presented in the database. This is how I'm importing with pandas:
def import(self):
file = filedialog.askopenfilename(filetypes=(("Excel File", "*.csv"), ("All Files","*.*")), title="Select Excel")
with open(file, "r", encoding='utf-8') as f:
conn = sqlite3.connect('database/save.db')
double = []
ok = []
c = conn.cursor()
c.execute("SELECT rowid, * FROM record")
r = c.fetchall()
imported = pd.read_csv(f, keep_default_na=False, sep=';')
imported.to_sql('record', conn, if_exists='replace', index = False)
for row in r:
if row in imported:
double.append(row)
else:
ok.append(row)
conn.commit()
conn.close()
Using the for loop I'm able to save the duplicate row to the list but not the one inserted correctly which is always empty
Thanks for the help!

How to convert dot to comma in floating point numbers in python module cx_Oracle?

Is it possible to download data to a csv file by the cx_Oracle module, so that the floating point numbers have a comma instead of a dot?
I need this functionality to properly load the downloaded csv file into another table in the Oracle database. When I try to load such a csv file with floating point numbers, I get an error: cx_Oracle.DatabaseError: ORA-01722: invalid number
I have already solved the problem using the pandas library.
My question:
Is there a solution without the use of data frame pandas.
def load_csv():
conn = cx_Oracle.connect(user=db_user, password=db_userpwd, dsn=dsn, encoding="UTF-8")
cursor = conn.cursor()
cursor.execute(str("select * from tablename"))
result_set = cursor.fetchall()
with open(table_name['schemat']+"__"+table_name['tabela']+".csv", "w") as csv_file:
csv_writer = csv.writer(csv_file, delimiter='|', lineterminator="\n", quoting=csv.QUOTE_NONNUMERIC)
for row in result_set:
csv_writer.writerow(row)
#df = pandas.read_sql("select * from tablename", conn)
#df.to_csv(table_name['schemat']+"__"+table_name['tabela']+".csv", index = False, encoding='utf-8', decimal=',', sep='|', header=False)
cursor.close()
conn.close()
def export_csv():
# Open connection to Oracle DB
conn = cx_Oracle.connect(user=db_user, password=db_userpwd, dsn=dsn, encoding="UTF-8")
# Open cursor to Oracle DB
cursor = conn.cursor()
batch_size = 1
with open(table_name['schemat']+"__"+table_name['tabela']+".csv", 'r') as csv_file:
csv_reader = csv.reader(csv_file, delimiter='|' )
sql = sql_insert
data = []
for line in csv_reader:
data.append([i for i in line])
if len(data) % batch_size == 0:
cursor.executemany(sql, data)
data = []
if data:
cursor.executemany(sql, data)
conn.commit()
cursor.close()
conn.close()
I tried to set it up by changing the session, but unfortunately it doesn't work for me.
# -*- coding: utf-8 -*-
import csv
import os
import sys
import time
import decimal
import pandas as pd
import cx_Oracle
dsn = "(DESCRIPTION=(ADDRESS=(PROTOCOL=TCP)(HOST=xxx)" \
"(PORT=xxx))(CONNECT_DATA=(SERVICE_NAME = xxx)))"
db_user = "xxx"
db_userpwd = "xxx"
def init_session(conn, requested_tag):
cursor = conn.cursor()
cursor.execute("alter session set nls_numeric_characters = ', '")
cursor.execute("select to_number(5/2) from dual")
dual, = cursor.fetchone()
print("dual=", repr(dual))
pool = cx_Oracle.SessionPool(user=db_user, password=db_userpwd,
dsn=dsn, session_callback=init_session, encoding="UTF-8")
with pool.acquire() as conn:
# Open cursor to Oracle DB
cursor = conn.cursor()
cursor.execute("select value from nls_session_parameters where parameter = 'NLS_NUMERIC_CHARACTERS'")
nls_session_parameters, = cursor.fetchone()
print("nls_session_parameters=", repr(nls_session_parameters))
#qryString = "select * from tablename"
#df = pd.read_sql(qryString,conn)
#df.to_csv(table_name['schemat']+"__"+table_name['tabela']+".csv", index = False, encoding='utf-8', decimal=',')
cursor.execute(str("select * from tablename"))
result_set = cursor.fetchall()
#result, = cursor.fetchone()
#print("result is", repr(result))
with open(table_name['schemat']+"__"+table_name['tabela']+".csv", "w") as csv_file:
csv_writer = csv.writer(csv_file, delimiter='|', lineterminator="\n")
for row in result_set:
csv_writer.writerow(row)
I would be grateful for any suggestions on how I can get data to csv file without pandas library.
example:
problematic result: 123.45
correct result: 123,45
Another, possibly simpler option:
Create an output type handler that tells Oracle to fetch the value as a string. Then replace the period with a comma:
import cx_Oracle as oracledb
def output_type_handler(cursor, name, default_type, size, precision, scale):
if default_type == oracledb.DB_TYPE_NUMBER:
return cursor.var(str, arraysize=cursor.arraysize,
outconverter=lambda s: s.replace(".", ","))
conn = oracledb.connect("user/password#host:port/service_name")
conn.outputtypehandler = output_type_handler
with conn.cursor() as cursor:
cursor.execute("select * from TestNumbers")
for row in cursor:
print(row)
Put the output type handler on the cursor if you only want to do this for one query instead of all queries.
You can do by TO_CHAR(<numeric_value>,'999999999D99999999999','NLS_NUMERIC_CHARACTERS=''.,''') conversion such as
cursor.execute("""
SELECT TRIM(TO_CHAR(5/2,'999999999D99999999999',
'NLS_NUMERIC_CHARACTERS=''.,'''))
FROM dual
""")
result_set = cursor.fetchall()
with open(table_name['schemat']+"__"+table_name['tabela']+".csv", "w") as csv_file:
csv_writer = csv.writer(csv_file, delimiter='|', lineterminator="\n")
for row in result_set:
csv_writer.writerow(row)
btw, switching ''.,'' to '',.'' will yield 2,50000000000 again
Since you're writing to a text file and presumably also want to avoid any Oracle decimal format to Python binary format precision issues, fetching as a string like Anthony showed has advantages. If you want to move the decimal separator conversion cost to the DB you could combine his solution and yours by adding this to your original code:
def output_type_handler(cursor, name, default_type, size, precision, scale):
if default_type == cx_Oracle.NUMBER:
return cursor.var(str, arraysize=cursor.arraysize)
and then after you open the cursor (and before executing), add the handler:
cursor.outputtypehandler = output_type_handler
Since the DB does the conversion to string, the value of NLS_NUMERIC_CHARACTERS is respected and you get commas as the decimal separator.

How to import data into multiple CSV files using Python based on date

I have created the below code to import data in CSV file from PostgreSQL DB. However, I want to create multiple files based on date.
import psycopg2
import csv
conn_string = "host='' port='5432' user='' password='' dbname=''"
conn = psycopg2.connect(conn_string)
cur=conn.cursor()
query="select * from sample where date between '' and ''"
cur.execute(query)
title=[i[0] for i in cur.description]
result=cur.fetchall()
csvfile=open('filename.csv','w')
if result:
c = csv.writer(csvfile)
c.writerow(title)
c.writerows(result)
cur.close()
conn.close()
The files should be split similar to the below format:
01jan.csv
02jan.csv
etc.
You can loop over the query results and open a new file whenever the row date changes. The results must be ordered by date, otherwise you can lose some data.
import psycopg2
import psycopg2.extras
import csv
import datetime
# conn_string = ...
conn = psycopg2.connect(conn_string)
# we need results in dict
cur = conn.cursor(cursor_factory = psycopg2.extras.DictCursor)
# order by date - important!
query = "select * from sample where date between '2018-01-01' and '2018-01-10' order by date"
cur.execute(query)
title = [i[0] for i in cur.description]
date = None
writer = None
csvfile = None
for row in cur:
if date != row['date']:
# when date changes we should close current file (if opened)
# and open a new one with name based on date
if csvfile:
csvfile.close()
date = row['date']
filename = date.strftime("%d%b")+ '.csv'
csvfile = open(filename, 'w', newline='')
writer = csv.writer(csvfile)
writer.writerow(title)
writer.writerow(row)
cur.close()
conn.close()
The above solution is acceptable for rather small datasets. If the amount of data for one day is large, you should rather use copy_expert()
cur = conn.cursor()
# example loop for ten days of Jan 2018
for day in range(1, 10):
date = datetime.date(2018, 1, day)
filename = date.strftime("%d%b")+ '.csv'
command = 'copy (select * from sample where date = %s) to stdout with csv header'
sql = cur.mogrify(command, [date])
with open(filename, 'w') as file:
cur.copy_expert(sql, file)

Python - write headers to csv

Currently i am writing query in python which export data from oracle dbo to .csv file. I am not sure how to write headers within file.
try:
connection = cx_Oracle.connect('user','pass','tns_name')
cursor = connection.cursor()
print "connected"
try:
query = """select * from """ .format(line_name)
tmp = cursor.execute(query)
results = tmp.fetchall()
except:
pass
except:
print IOError
filename='{0}.csv'.format(line_name)
csv_file = open(filename,'wb')
if results:
myFile = csv.writer(csv_file)
myFile.writerows(results)
else:
print "null"
csv_file.close()
you can ethier do this after executing your query:
columns = [i[0] for i in cursor.description]
so you get
query = """select * from """ .format(line_name)
tmp = cursor.execute(query)
columns = [i[0] for i in cursor.description]
results = tmp.fetchall()
and then do:
if results:
myFile = csv.writer(csv_file)
myFile.writerow(columns)
myFile.writerows(results)
or you can convert result to a dictionary and use DictWriter witch accepts fieldnames

Better ways to print out column names when using cx_Oracle

Found an example using cx_Oracle, this example shows all the information of Cursor.description.
import cx_Oracle
from pprint import pprint
connection = cx_Oracle.Connection("%s/%s#%s" % (dbuser, dbpasswd, oracle_sid))
cursor = cx_Oracle.Cursor(connection)
sql = "SELECT * FROM your_table"
cursor.execute(sql)
data = cursor.fetchall()
print "(name, type_code, display_size, internal_size, precision, scale, null_ok)"
pprint(cursor.description)
pprint(data)
cursor.close()
connection.close()
What I wanted to see was the list of Cursor.description[0](name), so I changed the code:
import cx_Oracle
import pprint
connection = cx_Oracle.Connection("%s/%s#%s" % (dbuser, dbpasswd, oracle_sid))
cursor = cx_Oracle.Cursor(connection)
sql = "SELECT * FROM your_table"
cursor.execute(sql)
data = cursor.fetchall()
col_names = []
for i in range(0, len(cursor.description)):
col_names.append(cursor.description[i][0])
pp = pprint.PrettyPrinter(width=1024)
pp.pprint(col_names)
pp.pprint(data)
cursor.close()
connection.close()
I think there will be better ways to print out the names of columns. Please get me alternatives to the Python beginner. :-)
You can use list comprehension as an alternative to get the column names:
col_names = [row[0] for row in cursor.description]
Since cursor.description returns a list of 7-element tuples you can get the 0th element which is a column name.
Here the code.
import csv
import sys
import cx_Oracle
db = cx_Oracle.connect('user/pass#host:1521/service_name')
SQL = "select * from dual"
print(SQL)
cursor = db.cursor()
f = open("C:\dual.csv", "w")
writer = csv.writer(f, lineterminator="\n", quoting=csv.QUOTE_NONNUMERIC)
r = cursor.execute(SQL)
#this takes the column names
col_names = [row[0] for row in cursor.description]
writer.writerow(col_names)
for row in cursor:
writer.writerow(row)
f.close()
The SQLAlchemy source code is a good starting point for robust methods of database introspection. Here is how SQLAlchemy reflects table names from Oracle:
SELECT table_name FROM all_tables
WHERE nvl(tablespace_name, 'no tablespace') NOT IN ('SYSTEM', 'SYSAUX')
AND OWNER = :owner
AND IOT_NAME IS NULL

Categories

Resources