Currently i am writing query in python which export data from oracle dbo to .csv file. I am not sure how to write headers within file.
try:
connection = cx_Oracle.connect('user','pass','tns_name')
cursor = connection.cursor()
print "connected"
try:
query = """select * from """ .format(line_name)
tmp = cursor.execute(query)
results = tmp.fetchall()
except:
pass
except:
print IOError
filename='{0}.csv'.format(line_name)
csv_file = open(filename,'wb')
if results:
myFile = csv.writer(csv_file)
myFile.writerows(results)
else:
print "null"
csv_file.close()
you can ethier do this after executing your query:
columns = [i[0] for i in cursor.description]
so you get
query = """select * from """ .format(line_name)
tmp = cursor.execute(query)
columns = [i[0] for i in cursor.description]
results = tmp.fetchall()
and then do:
if results:
myFile = csv.writer(csv_file)
myFile.writerow(columns)
myFile.writerows(results)
or you can convert result to a dictionary and use DictWriter witch accepts fieldnames
Related
Is it possible to download data to a csv file by the cx_Oracle module, so that the floating point numbers have a comma instead of a dot?
I need this functionality to properly load the downloaded csv file into another table in the Oracle database. When I try to load such a csv file with floating point numbers, I get an error: cx_Oracle.DatabaseError: ORA-01722: invalid number
I have already solved the problem using the pandas library.
My question:
Is there a solution without the use of data frame pandas.
def load_csv():
conn = cx_Oracle.connect(user=db_user, password=db_userpwd, dsn=dsn, encoding="UTF-8")
cursor = conn.cursor()
cursor.execute(str("select * from tablename"))
result_set = cursor.fetchall()
with open(table_name['schemat']+"__"+table_name['tabela']+".csv", "w") as csv_file:
csv_writer = csv.writer(csv_file, delimiter='|', lineterminator="\n", quoting=csv.QUOTE_NONNUMERIC)
for row in result_set:
csv_writer.writerow(row)
#df = pandas.read_sql("select * from tablename", conn)
#df.to_csv(table_name['schemat']+"__"+table_name['tabela']+".csv", index = False, encoding='utf-8', decimal=',', sep='|', header=False)
cursor.close()
conn.close()
def export_csv():
# Open connection to Oracle DB
conn = cx_Oracle.connect(user=db_user, password=db_userpwd, dsn=dsn, encoding="UTF-8")
# Open cursor to Oracle DB
cursor = conn.cursor()
batch_size = 1
with open(table_name['schemat']+"__"+table_name['tabela']+".csv", 'r') as csv_file:
csv_reader = csv.reader(csv_file, delimiter='|' )
sql = sql_insert
data = []
for line in csv_reader:
data.append([i for i in line])
if len(data) % batch_size == 0:
cursor.executemany(sql, data)
data = []
if data:
cursor.executemany(sql, data)
conn.commit()
cursor.close()
conn.close()
I tried to set it up by changing the session, but unfortunately it doesn't work for me.
# -*- coding: utf-8 -*-
import csv
import os
import sys
import time
import decimal
import pandas as pd
import cx_Oracle
dsn = "(DESCRIPTION=(ADDRESS=(PROTOCOL=TCP)(HOST=xxx)" \
"(PORT=xxx))(CONNECT_DATA=(SERVICE_NAME = xxx)))"
db_user = "xxx"
db_userpwd = "xxx"
def init_session(conn, requested_tag):
cursor = conn.cursor()
cursor.execute("alter session set nls_numeric_characters = ', '")
cursor.execute("select to_number(5/2) from dual")
dual, = cursor.fetchone()
print("dual=", repr(dual))
pool = cx_Oracle.SessionPool(user=db_user, password=db_userpwd,
dsn=dsn, session_callback=init_session, encoding="UTF-8")
with pool.acquire() as conn:
# Open cursor to Oracle DB
cursor = conn.cursor()
cursor.execute("select value from nls_session_parameters where parameter = 'NLS_NUMERIC_CHARACTERS'")
nls_session_parameters, = cursor.fetchone()
print("nls_session_parameters=", repr(nls_session_parameters))
#qryString = "select * from tablename"
#df = pd.read_sql(qryString,conn)
#df.to_csv(table_name['schemat']+"__"+table_name['tabela']+".csv", index = False, encoding='utf-8', decimal=',')
cursor.execute(str("select * from tablename"))
result_set = cursor.fetchall()
#result, = cursor.fetchone()
#print("result is", repr(result))
with open(table_name['schemat']+"__"+table_name['tabela']+".csv", "w") as csv_file:
csv_writer = csv.writer(csv_file, delimiter='|', lineterminator="\n")
for row in result_set:
csv_writer.writerow(row)
I would be grateful for any suggestions on how I can get data to csv file without pandas library.
example:
problematic result: 123.45
correct result: 123,45
Another, possibly simpler option:
Create an output type handler that tells Oracle to fetch the value as a string. Then replace the period with a comma:
import cx_Oracle as oracledb
def output_type_handler(cursor, name, default_type, size, precision, scale):
if default_type == oracledb.DB_TYPE_NUMBER:
return cursor.var(str, arraysize=cursor.arraysize,
outconverter=lambda s: s.replace(".", ","))
conn = oracledb.connect("user/password#host:port/service_name")
conn.outputtypehandler = output_type_handler
with conn.cursor() as cursor:
cursor.execute("select * from TestNumbers")
for row in cursor:
print(row)
Put the output type handler on the cursor if you only want to do this for one query instead of all queries.
You can do by TO_CHAR(<numeric_value>,'999999999D99999999999','NLS_NUMERIC_CHARACTERS=''.,''') conversion such as
cursor.execute("""
SELECT TRIM(TO_CHAR(5/2,'999999999D99999999999',
'NLS_NUMERIC_CHARACTERS=''.,'''))
FROM dual
""")
result_set = cursor.fetchall()
with open(table_name['schemat']+"__"+table_name['tabela']+".csv", "w") as csv_file:
csv_writer = csv.writer(csv_file, delimiter='|', lineterminator="\n")
for row in result_set:
csv_writer.writerow(row)
btw, switching ''.,'' to '',.'' will yield 2,50000000000 again
Since you're writing to a text file and presumably also want to avoid any Oracle decimal format to Python binary format precision issues, fetching as a string like Anthony showed has advantages. If you want to move the decimal separator conversion cost to the DB you could combine his solution and yours by adding this to your original code:
def output_type_handler(cursor, name, default_type, size, precision, scale):
if default_type == cx_Oracle.NUMBER:
return cursor.var(str, arraysize=cursor.arraysize)
and then after you open the cursor (and before executing), add the handler:
cursor.outputtypehandler = output_type_handler
Since the DB does the conversion to string, the value of NLS_NUMERIC_CHARACTERS is respected and you get commas as the decimal separator.
I want to use python to turn mysql into json, but when I use the following code, the result is json line by line, not a whole set of json
import pymysql
import json
sql="**"
conn=pymysql.connect(host='localhost',user='root',passwd="abc",db="mydatabase",port=123)
cur=conn.cursor()
cur.execute(sql)
data=cur.fetchall()
fields=cur.description
cur.close ()
conn.close()
column_list = []
for i in fields:
column_list.append(i[0])
for row in data:
result = {}
result[column_list[0]] = row[0]
result[column_list[1]] = row[1]
result[column_list[2]] = str(row[2])
result[column_list[3]] = row[3]
j=json.dumps(result)
print(j)
You need to pull out the json.dumps() line from the for loop. This is resulting in conversion of into json with each iteration and printing in each iteration.
Your code should look like this :
import pymysql
import json
sql="**"
conn=pymysql.connect(host='localhost',user='root',passwd="abc",db="mydatabase",port=123)
cur=conn.cursor()
cur.execute(sql)
data=cur.fetchall()
fields=cur.description
cur.close ()
conn.close()
column_list = []
for i in fields:
column_list.append(i[0])
final_resultset = []
for row in data:
result = {}
result[column_list[0]] = row[0]
result[column_list[1]] = row[1]
result[column_list[2]] = str(row[2])
result[column_list[3]] = row[3]
final_resultset.append(result)
j=json.dumps(final_resultset)
print(j)
You can use below
cur.execute('''SELECT * FROM Users WHERE id=1''')
row_headers=[x[0] for x in cur.description] #this will extract row headers
rv = cur.fetchall()
json_data=[]
for result in rv:
json_data.append(dict(zip(row_headers,result)))
j=json.dumps(json_data)
print(j)
I have created the below code to import data in CSV file from PostgreSQL DB. However, I want to create multiple files based on date.
import psycopg2
import csv
conn_string = "host='' port='5432' user='' password='' dbname=''"
conn = psycopg2.connect(conn_string)
cur=conn.cursor()
query="select * from sample where date between '' and ''"
cur.execute(query)
title=[i[0] for i in cur.description]
result=cur.fetchall()
csvfile=open('filename.csv','w')
if result:
c = csv.writer(csvfile)
c.writerow(title)
c.writerows(result)
cur.close()
conn.close()
The files should be split similar to the below format:
01jan.csv
02jan.csv
etc.
You can loop over the query results and open a new file whenever the row date changes. The results must be ordered by date, otherwise you can lose some data.
import psycopg2
import psycopg2.extras
import csv
import datetime
# conn_string = ...
conn = psycopg2.connect(conn_string)
# we need results in dict
cur = conn.cursor(cursor_factory = psycopg2.extras.DictCursor)
# order by date - important!
query = "select * from sample where date between '2018-01-01' and '2018-01-10' order by date"
cur.execute(query)
title = [i[0] for i in cur.description]
date = None
writer = None
csvfile = None
for row in cur:
if date != row['date']:
# when date changes we should close current file (if opened)
# and open a new one with name based on date
if csvfile:
csvfile.close()
date = row['date']
filename = date.strftime("%d%b")+ '.csv'
csvfile = open(filename, 'w', newline='')
writer = csv.writer(csvfile)
writer.writerow(title)
writer.writerow(row)
cur.close()
conn.close()
The above solution is acceptable for rather small datasets. If the amount of data for one day is large, you should rather use copy_expert()
cur = conn.cursor()
# example loop for ten days of Jan 2018
for day in range(1, 10):
date = datetime.date(2018, 1, day)
filename = date.strftime("%d%b")+ '.csv'
command = 'copy (select * from sample where date = %s) to stdout with csv header'
sql = cur.mogrify(command, [date])
with open(filename, 'w') as file:
cur.copy_expert(sql, file)
I have a csv file from which I am trying to load data into pysqlite database. I am not able to find a way to extract the first row of the file and get it into the database automatically as column headers of a table. I have to enter the names "manually" in the code itself, which is ok for 1-2 columsn but becomes cumbersome with tens or hundreds of columns. Here is my code:
import sqlite3
import csv
f_n = 'test_data_1.csv'
f = open( f_n , 'r' )
csv_reader = csv.reader(f)
header = next( csv_reader )
sqlite_file = 'survey_test_db.sqlite'
table_name01 = 'test_survey_1'
field_01 = 'analyst_name'
field_type_01 = 'text'
field_02 = 'question_name'
field_type_02 = 'text'
conn = sqlite3.connect( sqlite_file )
c = conn.cursor()
c.execute('CREATE TABLE {tn}({nf_01} {ft_01},{nf_02} {ft_02})'\
.format(tn = table_name01 , nf_01 = field_01 , ft_01 = field_type_01, nf_02 = field_02 , ft_02 = field_type_02 ))
for row in csv_reader:
c.execute("INSERT INTO test_survey_1 VALUES (?,?)",row)
f.close()
for row in c.execute('SELECT * FROM test_survey_1'):
print(row)
conn.commit()
conn.close()
c.execute('CREATE TABLE {tn}({fieldlist})'.format(
tn=table_name01,
fieldlist=', '.join('{} TEXT'.format(name) for name in header),
))
Or use a ORM which is designed to make this sort of thing easy. SQLAlchemy example:
t = Table(table_name01, meta, *(Column(name, String()) for name in header))
t.create()
You can use pandas to read your csv file into DataFrame and then export
it to sqlite.
import sqlite3
import pandas as pd
sqlite_file = 'survey_test_db.sqlite'
table_name01 = 'test_survey_1'
conn = sqlite3.connect(sqlite_file)
pd.read_csv('test_data_1.csv').to_sql(table_name01, con=con)
Found an example using cx_Oracle, this example shows all the information of Cursor.description.
import cx_Oracle
from pprint import pprint
connection = cx_Oracle.Connection("%s/%s#%s" % (dbuser, dbpasswd, oracle_sid))
cursor = cx_Oracle.Cursor(connection)
sql = "SELECT * FROM your_table"
cursor.execute(sql)
data = cursor.fetchall()
print "(name, type_code, display_size, internal_size, precision, scale, null_ok)"
pprint(cursor.description)
pprint(data)
cursor.close()
connection.close()
What I wanted to see was the list of Cursor.description[0](name), so I changed the code:
import cx_Oracle
import pprint
connection = cx_Oracle.Connection("%s/%s#%s" % (dbuser, dbpasswd, oracle_sid))
cursor = cx_Oracle.Cursor(connection)
sql = "SELECT * FROM your_table"
cursor.execute(sql)
data = cursor.fetchall()
col_names = []
for i in range(0, len(cursor.description)):
col_names.append(cursor.description[i][0])
pp = pprint.PrettyPrinter(width=1024)
pp.pprint(col_names)
pp.pprint(data)
cursor.close()
connection.close()
I think there will be better ways to print out the names of columns. Please get me alternatives to the Python beginner. :-)
You can use list comprehension as an alternative to get the column names:
col_names = [row[0] for row in cursor.description]
Since cursor.description returns a list of 7-element tuples you can get the 0th element which is a column name.
Here the code.
import csv
import sys
import cx_Oracle
db = cx_Oracle.connect('user/pass#host:1521/service_name')
SQL = "select * from dual"
print(SQL)
cursor = db.cursor()
f = open("C:\dual.csv", "w")
writer = csv.writer(f, lineterminator="\n", quoting=csv.QUOTE_NONNUMERIC)
r = cursor.execute(SQL)
#this takes the column names
col_names = [row[0] for row in cursor.description]
writer.writerow(col_names)
for row in cursor:
writer.writerow(row)
f.close()
The SQLAlchemy source code is a good starting point for robust methods of database introspection. Here is how SQLAlchemy reflects table names from Oracle:
SELECT table_name FROM all_tables
WHERE nvl(tablespace_name, 'no tablespace') NOT IN ('SYSTEM', 'SYSAUX')
AND OWNER = :owner
AND IOT_NAME IS NULL