Exporting CSV via Python with headers & date formatting - python

I'm looking to format the first column of my Export01.csv into dd/mm/yyyy. sadly the current format when I export it is dd/mm/yy mm:mm.
Could someone help me tweak my code to make the changes to the date during the import procedure if it's possible?
import sys
import cx_Oracle
import csv
connection = cx_Oracle.connect('user','password','ORPM2')
cursor = connection.cursor()
SQL="SELECT * FROM EXPORT01"
cursor.execute(SQL)
filename="C:\Projects\SQL_Export\Export01.csv"
with open(filename,"wb") as fout:
writer = csv.writer(fout)
writer.writerow([ i[0] for i in cursor.description ]) # heading row
writer.writerows(cursor.fetchall())
fout.close()
cursor.close()
connection.close()
I've added a sample of the first two columns of data in Export01
WEEK_ENDING ORDER_HEADLINE
12/02/2016 00:00 Headline
15/01/2016 00:00 Headline
15/01/2016 00:00 Headline

If you handle the fetchall a row at a time, you could then convert the first column entry as follows:
from datetime import datetime
import sys
import cx_Oracle
import csv
connection = cx_Oracle.connect('user','password', 'ORPM2')
cursor = connection.cursor()
SQL="SELECT * FROM EXPORT01"
cursor.execute(SQL)
filename = r"C:\Projects\SQL_Export\Export01.csv"
with open(filename, "wb") as fout:
writer = csv.writer(fout)
writer.writerow([i[0] for i in cursor.description ]) # heading row
for row in cursor.fetchall():
cols = list(row)
cols[0] = cols[0].strftime("%d/%m/%Y")
writer.writerow(cols)
cursor.close()
connection.close()
Also note, the with statement you are using will automatically close the file when you leave its scope. I would also recommend you prefix your file path with r to avoid Python trying to escape any of the backslashes in the path.

Related

How to convert dot to comma in floating point numbers in python module cx_Oracle?

Is it possible to download data to a csv file by the cx_Oracle module, so that the floating point numbers have a comma instead of a dot?
I need this functionality to properly load the downloaded csv file into another table in the Oracle database. When I try to load such a csv file with floating point numbers, I get an error: cx_Oracle.DatabaseError: ORA-01722: invalid number
I have already solved the problem using the pandas library.
My question:
Is there a solution without the use of data frame pandas.
def load_csv():
conn = cx_Oracle.connect(user=db_user, password=db_userpwd, dsn=dsn, encoding="UTF-8")
cursor = conn.cursor()
cursor.execute(str("select * from tablename"))
result_set = cursor.fetchall()
with open(table_name['schemat']+"__"+table_name['tabela']+".csv", "w") as csv_file:
csv_writer = csv.writer(csv_file, delimiter='|', lineterminator="\n", quoting=csv.QUOTE_NONNUMERIC)
for row in result_set:
csv_writer.writerow(row)
#df = pandas.read_sql("select * from tablename", conn)
#df.to_csv(table_name['schemat']+"__"+table_name['tabela']+".csv", index = False, encoding='utf-8', decimal=',', sep='|', header=False)
cursor.close()
conn.close()
def export_csv():
# Open connection to Oracle DB
conn = cx_Oracle.connect(user=db_user, password=db_userpwd, dsn=dsn, encoding="UTF-8")
# Open cursor to Oracle DB
cursor = conn.cursor()
batch_size = 1
with open(table_name['schemat']+"__"+table_name['tabela']+".csv", 'r') as csv_file:
csv_reader = csv.reader(csv_file, delimiter='|' )
sql = sql_insert
data = []
for line in csv_reader:
data.append([i for i in line])
if len(data) % batch_size == 0:
cursor.executemany(sql, data)
data = []
if data:
cursor.executemany(sql, data)
conn.commit()
cursor.close()
conn.close()
I tried to set it up by changing the session, but unfortunately it doesn't work for me.
# -*- coding: utf-8 -*-
import csv
import os
import sys
import time
import decimal
import pandas as pd
import cx_Oracle
dsn = "(DESCRIPTION=(ADDRESS=(PROTOCOL=TCP)(HOST=xxx)" \
"(PORT=xxx))(CONNECT_DATA=(SERVICE_NAME = xxx)))"
db_user = "xxx"
db_userpwd = "xxx"
def init_session(conn, requested_tag):
cursor = conn.cursor()
cursor.execute("alter session set nls_numeric_characters = ', '")
cursor.execute("select to_number(5/2) from dual")
dual, = cursor.fetchone()
print("dual=", repr(dual))
pool = cx_Oracle.SessionPool(user=db_user, password=db_userpwd,
dsn=dsn, session_callback=init_session, encoding="UTF-8")
with pool.acquire() as conn:
# Open cursor to Oracle DB
cursor = conn.cursor()
cursor.execute("select value from nls_session_parameters where parameter = 'NLS_NUMERIC_CHARACTERS'")
nls_session_parameters, = cursor.fetchone()
print("nls_session_parameters=", repr(nls_session_parameters))
#qryString = "select * from tablename"
#df = pd.read_sql(qryString,conn)
#df.to_csv(table_name['schemat']+"__"+table_name['tabela']+".csv", index = False, encoding='utf-8', decimal=',')
cursor.execute(str("select * from tablename"))
result_set = cursor.fetchall()
#result, = cursor.fetchone()
#print("result is", repr(result))
with open(table_name['schemat']+"__"+table_name['tabela']+".csv", "w") as csv_file:
csv_writer = csv.writer(csv_file, delimiter='|', lineterminator="\n")
for row in result_set:
csv_writer.writerow(row)
I would be grateful for any suggestions on how I can get data to csv file without pandas library.
example:
problematic result: 123.45
correct result: 123,45
Another, possibly simpler option:
Create an output type handler that tells Oracle to fetch the value as a string. Then replace the period with a comma:
import cx_Oracle as oracledb
def output_type_handler(cursor, name, default_type, size, precision, scale):
if default_type == oracledb.DB_TYPE_NUMBER:
return cursor.var(str, arraysize=cursor.arraysize,
outconverter=lambda s: s.replace(".", ","))
conn = oracledb.connect("user/password#host:port/service_name")
conn.outputtypehandler = output_type_handler
with conn.cursor() as cursor:
cursor.execute("select * from TestNumbers")
for row in cursor:
print(row)
Put the output type handler on the cursor if you only want to do this for one query instead of all queries.
You can do by TO_CHAR(<numeric_value>,'999999999D99999999999','NLS_NUMERIC_CHARACTERS=''.,''') conversion such as
cursor.execute("""
SELECT TRIM(TO_CHAR(5/2,'999999999D99999999999',
'NLS_NUMERIC_CHARACTERS=''.,'''))
FROM dual
""")
result_set = cursor.fetchall()
with open(table_name['schemat']+"__"+table_name['tabela']+".csv", "w") as csv_file:
csv_writer = csv.writer(csv_file, delimiter='|', lineterminator="\n")
for row in result_set:
csv_writer.writerow(row)
btw, switching ''.,'' to '',.'' will yield 2,50000000000 again
Since you're writing to a text file and presumably also want to avoid any Oracle decimal format to Python binary format precision issues, fetching as a string like Anthony showed has advantages. If you want to move the decimal separator conversion cost to the DB you could combine his solution and yours by adding this to your original code:
def output_type_handler(cursor, name, default_type, size, precision, scale):
if default_type == cx_Oracle.NUMBER:
return cursor.var(str, arraysize=cursor.arraysize)
and then after you open the cursor (and before executing), add the handler:
cursor.outputtypehandler = output_type_handler
Since the DB does the conversion to string, the value of NLS_NUMERIC_CHARACTERS is respected and you get commas as the decimal separator.

read oracle sql file using python and fetch results in to CSV file

I am beginner to python and want to read a sql file from python and fetch those results in to CSV file. I have tried usin CX_oracle to connect to oracle database. It works when i directly give the sql query in the code. But, I am not sure how to read a sql file from python.
Here is the code which worked when i gave sql query directly in the code:
import csv
import cx_Oracle
con = cx_Oracle.connect('XCM/XCM#home.com:1500/HOME')
cursor = con.cursor()
csv_file = open("exp.csv", "w")
writer = csv.writer(csv_file, delimiter='|', lineterminator="\n", quoting=csv.QUOTE_NONE)
r = cursor.execute("select * from home_parties where CREATION_DATE >= trunc(sysdate)")
for row in cursor:
writer.writerow(row)
cursor.close()
con.close()
csv_file.close()
I tried the below code to fetch from sql file which has the same sql query as mentioned in the above code - sample.sql but it dint work
import csv
import cx_Oracle
con = cx_Oracle.connect('XCM/XCM#home.com:1500/HOME')
cursor = con.cursor()
csv_file = open("exp.csv", "w")
writer = csv.writer(csv_file, delimiter='|', lineterminator="\n", quoting=csv.QUOTE_NONE)
f = open('C:/Users/home1/sample.sql')
full_sql = f.read()
r = cursor.execute(full_sql)
for row in cursor:
writer.writerow(row)
cursor.close()
con.close()
csv_file.close()
Kindly help me out!!
The content of the sql file might be read within a for loop line by line such as
import csv
import cx_Oracle
con = cx_Oracle.connect('XCM/XCM#home.com:1500/HOME')
cursor = con.cursor()
sql=""
with open("C:/Users/home1/sample.sql") as f_in:
for line in f_in:
sql += line
f_out = open("exp.csv", "w")
writer = csv.writer(f_out, delimiter='|', lineterminator="\n", quoting=csv.QUOTE_NONE)
cursor.execute(sql)
for row in cursor:
writer.writerow(row)
cursor.close()
con.close()
f_out.close()
f_in.close()
considering the SQL statement to be spreaded across multiple lines
Assuming you have a sample.sql file in the same directory with the query as the first line. This would read the SQL query from that file, then execute the query and save the result into a CSV file named "exp.csv".
import cx_Oracle
con = cx_Oracle.connect('XCM/XCM#home.com:1500/HOME')
cursor = con.cursor()
import pandas as pd
with open('sample.sql') as f:
sql_query_string = f.readline();
sql_query = pd.read_sql_query(sql_query_string, con)
sql_query.to_csv("exp.csv", sep='|')

How to import data into multiple CSV files using Python based on date

I have created the below code to import data in CSV file from PostgreSQL DB. However, I want to create multiple files based on date.
import psycopg2
import csv
conn_string = "host='' port='5432' user='' password='' dbname=''"
conn = psycopg2.connect(conn_string)
cur=conn.cursor()
query="select * from sample where date between '' and ''"
cur.execute(query)
title=[i[0] for i in cur.description]
result=cur.fetchall()
csvfile=open('filename.csv','w')
if result:
c = csv.writer(csvfile)
c.writerow(title)
c.writerows(result)
cur.close()
conn.close()
The files should be split similar to the below format:
01jan.csv
02jan.csv
etc.
You can loop over the query results and open a new file whenever the row date changes. The results must be ordered by date, otherwise you can lose some data.
import psycopg2
import psycopg2.extras
import csv
import datetime
# conn_string = ...
conn = psycopg2.connect(conn_string)
# we need results in dict
cur = conn.cursor(cursor_factory = psycopg2.extras.DictCursor)
# order by date - important!
query = "select * from sample where date between '2018-01-01' and '2018-01-10' order by date"
cur.execute(query)
title = [i[0] for i in cur.description]
date = None
writer = None
csvfile = None
for row in cur:
if date != row['date']:
# when date changes we should close current file (if opened)
# and open a new one with name based on date
if csvfile:
csvfile.close()
date = row['date']
filename = date.strftime("%d%b")+ '.csv'
csvfile = open(filename, 'w', newline='')
writer = csv.writer(csvfile)
writer.writerow(title)
writer.writerow(row)
cur.close()
conn.close()
The above solution is acceptable for rather small datasets. If the amount of data for one day is large, you should rather use copy_expert()
cur = conn.cursor()
# example loop for ten days of Jan 2018
for day in range(1, 10):
date = datetime.date(2018, 1, day)
filename = date.strftime("%d%b")+ '.csv'
command = 'copy (select * from sample where date = %s) to stdout with csv header'
sql = cur.mogrify(command, [date])
with open(filename, 'w') as file:
cur.copy_expert(sql, file)

pysqlite get column names from csv file

I have a csv file from which I am trying to load data into pysqlite database. I am not able to find a way to extract the first row of the file and get it into the database automatically as column headers of a table. I have to enter the names "manually" in the code itself, which is ok for 1-2 columsn but becomes cumbersome with tens or hundreds of columns. Here is my code:
import sqlite3
import csv
f_n = 'test_data_1.csv'
f = open( f_n , 'r' )
csv_reader = csv.reader(f)
header = next( csv_reader )
sqlite_file = 'survey_test_db.sqlite'
table_name01 = 'test_survey_1'
field_01 = 'analyst_name'
field_type_01 = 'text'
field_02 = 'question_name'
field_type_02 = 'text'
conn = sqlite3.connect( sqlite_file )
c = conn.cursor()
c.execute('CREATE TABLE {tn}({nf_01} {ft_01},{nf_02} {ft_02})'\
.format(tn = table_name01 , nf_01 = field_01 , ft_01 = field_type_01, nf_02 = field_02 , ft_02 = field_type_02 ))
for row in csv_reader:
c.execute("INSERT INTO test_survey_1 VALUES (?,?)",row)
f.close()
for row in c.execute('SELECT * FROM test_survey_1'):
print(row)
conn.commit()
conn.close()
c.execute('CREATE TABLE {tn}({fieldlist})'.format(
tn=table_name01,
fieldlist=', '.join('{} TEXT'.format(name) for name in header),
))
Or use a ORM which is designed to make this sort of thing easy. SQLAlchemy example:
t = Table(table_name01, meta, *(Column(name, String()) for name in header))
t.create()
You can use pandas to read your csv file into DataFrame and then export
it to sqlite.
import sqlite3
import pandas as pd
sqlite_file = 'survey_test_db.sqlite'
table_name01 = 'test_survey_1'
conn = sqlite3.connect(sqlite_file)
pd.read_csv('test_data_1.csv').to_sql(table_name01, con=con)

Better ways to print out column names when using cx_Oracle

Found an example using cx_Oracle, this example shows all the information of Cursor.description.
import cx_Oracle
from pprint import pprint
connection = cx_Oracle.Connection("%s/%s#%s" % (dbuser, dbpasswd, oracle_sid))
cursor = cx_Oracle.Cursor(connection)
sql = "SELECT * FROM your_table"
cursor.execute(sql)
data = cursor.fetchall()
print "(name, type_code, display_size, internal_size, precision, scale, null_ok)"
pprint(cursor.description)
pprint(data)
cursor.close()
connection.close()
What I wanted to see was the list of Cursor.description[0](name), so I changed the code:
import cx_Oracle
import pprint
connection = cx_Oracle.Connection("%s/%s#%s" % (dbuser, dbpasswd, oracle_sid))
cursor = cx_Oracle.Cursor(connection)
sql = "SELECT * FROM your_table"
cursor.execute(sql)
data = cursor.fetchall()
col_names = []
for i in range(0, len(cursor.description)):
col_names.append(cursor.description[i][0])
pp = pprint.PrettyPrinter(width=1024)
pp.pprint(col_names)
pp.pprint(data)
cursor.close()
connection.close()
I think there will be better ways to print out the names of columns. Please get me alternatives to the Python beginner. :-)
You can use list comprehension as an alternative to get the column names:
col_names = [row[0] for row in cursor.description]
Since cursor.description returns a list of 7-element tuples you can get the 0th element which is a column name.
Here the code.
import csv
import sys
import cx_Oracle
db = cx_Oracle.connect('user/pass#host:1521/service_name')
SQL = "select * from dual"
print(SQL)
cursor = db.cursor()
f = open("C:\dual.csv", "w")
writer = csv.writer(f, lineterminator="\n", quoting=csv.QUOTE_NONNUMERIC)
r = cursor.execute(SQL)
#this takes the column names
col_names = [row[0] for row in cursor.description]
writer.writerow(col_names)
for row in cursor:
writer.writerow(row)
f.close()
The SQLAlchemy source code is a good starting point for robust methods of database introspection. Here is how SQLAlchemy reflects table names from Oracle:
SELECT table_name FROM all_tables
WHERE nvl(tablespace_name, 'no tablespace') NOT IN ('SYSTEM', 'SYSAUX')
AND OWNER = :owner
AND IOT_NAME IS NULL

Categories

Resources