I have a csv file that i read into pandas, and im supposed to insert into postgres. The file contains strings in some fields with the backslash "character". This causes a problem because the copy_from function reads it as an escape character. how do i let it ignore "" and leave it as a string. i have tried many different encoding formats but i stil get a "cannot decode character" error. issue is i cannot replace that character, it is important in the string.
def load_into_db(cur, con, file,table_name):
f = open(file, mode="r", encoding='utf-8')
try:
# print("wrote to csv")
sqlstr = "COPY {} FROM STDIN DELIMITER '|' CSV".format(table_name)
cur.copy_from(f, table_name, null="nan", sep="|")
con.commit()
f.close()
except Exception as e:
print(e)
print("something went wrong")
example of the rows causing the issue
name
age
attribute
name1
23
example/1/test
name2
26
example/2/test
error: invalid byte sequence for encoding "UTF8": 0xa2
import io
import csv
def df2db(df_a, table_name, engine):
output = io.StringIO()
# ignore the index
# df_a.to_csv(output, sep='\t', index = False, header = False, quoting=csv.QUOTE_NONE)
df_a.to_csv(output, sep='\t', index = False, header = False, quoting=csv.QUOTE_NONE, escapechar='\\')
output.getvalue()
# jump to start of stream
output.seek(0)
#engine <--- from sqlalchemy import create_engine
connection = engine.raw_connection()
cursor = connection.cursor()
# null value become ''
cursor.copy_from(output,table_name,null='')
connection.commit()
cursor.close()
use the function df2db to insert a DataFrame to an exists table, as the cols of the table and the df's columns should be the same.
import pandas as pd
from sqlalchemy import create_engine
engine = create_engine('postgresql+psycopg2://user:psw#localhost:5432/dbname')
df = pd.read_csv(file)
df2db(df, table_name, engine)
Related
Is it possible to download data to a csv file by the cx_Oracle module, so that the floating point numbers have a comma instead of a dot?
I need this functionality to properly load the downloaded csv file into another table in the Oracle database. When I try to load such a csv file with floating point numbers, I get an error: cx_Oracle.DatabaseError: ORA-01722: invalid number
I have already solved the problem using the pandas library.
My question:
Is there a solution without the use of data frame pandas.
def load_csv():
conn = cx_Oracle.connect(user=db_user, password=db_userpwd, dsn=dsn, encoding="UTF-8")
cursor = conn.cursor()
cursor.execute(str("select * from tablename"))
result_set = cursor.fetchall()
with open(table_name['schemat']+"__"+table_name['tabela']+".csv", "w") as csv_file:
csv_writer = csv.writer(csv_file, delimiter='|', lineterminator="\n", quoting=csv.QUOTE_NONNUMERIC)
for row in result_set:
csv_writer.writerow(row)
#df = pandas.read_sql("select * from tablename", conn)
#df.to_csv(table_name['schemat']+"__"+table_name['tabela']+".csv", index = False, encoding='utf-8', decimal=',', sep='|', header=False)
cursor.close()
conn.close()
def export_csv():
# Open connection to Oracle DB
conn = cx_Oracle.connect(user=db_user, password=db_userpwd, dsn=dsn, encoding="UTF-8")
# Open cursor to Oracle DB
cursor = conn.cursor()
batch_size = 1
with open(table_name['schemat']+"__"+table_name['tabela']+".csv", 'r') as csv_file:
csv_reader = csv.reader(csv_file, delimiter='|' )
sql = sql_insert
data = []
for line in csv_reader:
data.append([i for i in line])
if len(data) % batch_size == 0:
cursor.executemany(sql, data)
data = []
if data:
cursor.executemany(sql, data)
conn.commit()
cursor.close()
conn.close()
I tried to set it up by changing the session, but unfortunately it doesn't work for me.
# -*- coding: utf-8 -*-
import csv
import os
import sys
import time
import decimal
import pandas as pd
import cx_Oracle
dsn = "(DESCRIPTION=(ADDRESS=(PROTOCOL=TCP)(HOST=xxx)" \
"(PORT=xxx))(CONNECT_DATA=(SERVICE_NAME = xxx)))"
db_user = "xxx"
db_userpwd = "xxx"
def init_session(conn, requested_tag):
cursor = conn.cursor()
cursor.execute("alter session set nls_numeric_characters = ', '")
cursor.execute("select to_number(5/2) from dual")
dual, = cursor.fetchone()
print("dual=", repr(dual))
pool = cx_Oracle.SessionPool(user=db_user, password=db_userpwd,
dsn=dsn, session_callback=init_session, encoding="UTF-8")
with pool.acquire() as conn:
# Open cursor to Oracle DB
cursor = conn.cursor()
cursor.execute("select value from nls_session_parameters where parameter = 'NLS_NUMERIC_CHARACTERS'")
nls_session_parameters, = cursor.fetchone()
print("nls_session_parameters=", repr(nls_session_parameters))
#qryString = "select * from tablename"
#df = pd.read_sql(qryString,conn)
#df.to_csv(table_name['schemat']+"__"+table_name['tabela']+".csv", index = False, encoding='utf-8', decimal=',')
cursor.execute(str("select * from tablename"))
result_set = cursor.fetchall()
#result, = cursor.fetchone()
#print("result is", repr(result))
with open(table_name['schemat']+"__"+table_name['tabela']+".csv", "w") as csv_file:
csv_writer = csv.writer(csv_file, delimiter='|', lineterminator="\n")
for row in result_set:
csv_writer.writerow(row)
I would be grateful for any suggestions on how I can get data to csv file without pandas library.
example:
problematic result: 123.45
correct result: 123,45
Another, possibly simpler option:
Create an output type handler that tells Oracle to fetch the value as a string. Then replace the period with a comma:
import cx_Oracle as oracledb
def output_type_handler(cursor, name, default_type, size, precision, scale):
if default_type == oracledb.DB_TYPE_NUMBER:
return cursor.var(str, arraysize=cursor.arraysize,
outconverter=lambda s: s.replace(".", ","))
conn = oracledb.connect("user/password#host:port/service_name")
conn.outputtypehandler = output_type_handler
with conn.cursor() as cursor:
cursor.execute("select * from TestNumbers")
for row in cursor:
print(row)
Put the output type handler on the cursor if you only want to do this for one query instead of all queries.
You can do by TO_CHAR(<numeric_value>,'999999999D99999999999','NLS_NUMERIC_CHARACTERS=''.,''') conversion such as
cursor.execute("""
SELECT TRIM(TO_CHAR(5/2,'999999999D99999999999',
'NLS_NUMERIC_CHARACTERS=''.,'''))
FROM dual
""")
result_set = cursor.fetchall()
with open(table_name['schemat']+"__"+table_name['tabela']+".csv", "w") as csv_file:
csv_writer = csv.writer(csv_file, delimiter='|', lineterminator="\n")
for row in result_set:
csv_writer.writerow(row)
btw, switching ''.,'' to '',.'' will yield 2,50000000000 again
Since you're writing to a text file and presumably also want to avoid any Oracle decimal format to Python binary format precision issues, fetching as a string like Anthony showed has advantages. If you want to move the decimal separator conversion cost to the DB you could combine his solution and yours by adding this to your original code:
def output_type_handler(cursor, name, default_type, size, precision, scale):
if default_type == cx_Oracle.NUMBER:
return cursor.var(str, arraysize=cursor.arraysize)
and then after you open the cursor (and before executing), add the handler:
cursor.outputtypehandler = output_type_handler
Since the DB does the conversion to string, the value of NLS_NUMERIC_CHARACTERS is respected and you get commas as the decimal separator.
I'm learning currently a python language. Here is my question i converted .txt file to .csv then want to insert to table to database file. I have a problem with iteriation on the bottom im pasting results. How can i iterate with it? Im struggle with that few days so don't really know how to solve the problem.
txt file(few rows):
id,id2,album,artysta
TRMMMYQ128F932D901,SOQMMHC12AB0180CB8,Faster Pussy cat,Silent Night
TRMMMKD128F425225D,SOVFVAK12A8C1350D9,Karkkiautomaatti,Tanssi vaan
TRMMMRX128F93187D9,SOGTUKN12AB017F4F1,Hudson Mohawke,No One Could Ever
TRMMMCH128F425532C,SOBNYVR12A8C13558C,Yerba Brava,Si Vos Querés
TRMMMWA128F426B589,SOHSBXH12A8C13B0DF,Der Mystic,Tangle Of Aspens
TRMMMXN128F42936A5,SOZVAPQ12A8C13B63C,David Montgomery,"Symphony No. 1 G minor ""Sinfonie Serieuse""/Allegro con energia"
TRMMMLR128F1494097,SOQVRHI12A6D4FB2D7,Sasha / Turbulence,We Have Got Love
TRMMMBB12903CB7D21,SOEYRFT12AB018936C,Kris Kross,2 Da Beat Ch'yall
Python:
from io import StringIO
import pandas as pd
import numpy as np
import os
import sqlite3, csv
save_path = r"C:\Users\Maticz\Desktop\python"
#konwerter txt -> csv
in_file = os.path.join(save_path, "tracks.txt")
out_file = os.path.join(save_path, "Output.csv")
#df = pd.read_csv(in_file, sep="<SEP>", engine='python')
#df.to_csv(out_file, index=False)
#print(df)
df = pd.read_csv(r'C:\Users\Maticz\PycharmProjects\zadanie\tracks.txt', delimiter='<SEP>',
engine='python', names=["id", "id2", "album", "artysta"])
print(df.head(5))
sv = df.to_csv(r'C:\Users\Maticz\PycharmProjects\zadanie\tracks.csv', index = None, header=True)
con = sqlite3.connect("artists.db")
cur = con.cursor()
cur.execute("CREATE TABLE IF NOT EXISTS tabela (id TEXT, id2 TEXT, album TEXT, artysta TEXT);")
with open(r'C:\Users\Maticz\PycharmProjects\zadanie\tracks.csv', 'a+') as fin:
dr = pd.read_csv(fin, delimiter=',', names=["id", "id2", "album", "artysta"]) # comma is default delimiter
to_db = [(i['id'], i['id2'], i['album'], i['artysta']) for i in dr]
cur.executemany("INSERT INTO tabela (id, id2, album, artysta) VALUES (?, ?, ?, ?);", to_db)
con.commit()
cur.execute("SELECT * FROM artists")
print(cur.fetchall())
con.close()
Output:
id id2 album artysta
0 TRMMMYQ128F932D901 SOQMMHC12AB0180CB8 Faster Pussy cat Silent Night
1 TRMMMKD128F425225D SOVFVAK12A8C1350D9 Karkkiautomaatti Tanssi vaan
2 TRMMMRX128F93187D9 SOGTUKN12AB017F4F1 Hudson Mohawke No One Could Ever
3 TRMMMCH128F425532C SOBNYVR12A8C13558C Yerba Brava Si Vos Querés
4 TRMMMWA128F426B589 SOHSBXH12A8C13B0DF Der Mystic Tangle Of Aspens
Traceback (most recent call last):
File "C:/Users/Maticz/PycharmProjects/zadanie/main.py", line 26, in <module>
to_db = [(i['id'], i['id2'], i['album'], i['artysta']) for i in dr]
File "C:/Users/Maticz/PycharmProjects/zadanie/main.py", line 26, in <listcomp>
to_db = [(i['id'], i['id2'], i['album'], i['artysta']) for i in dr]
TypeError: string indices must be integers
Process finished with exit code 1
Apreciate for any help thank you :)
You could simplify the operation using sqlalchemy
from sqlalchemy import create_engine
# sqlite://<nohostname>/<path>
# where <path> is relative:
engine = create_engine('sqlite:///artists.db')
df.to_sql('tabela', con = engine, if_exists = 'append', chunksize=1000)
this will remove the need to write to another CSV as you have the data in a pandas dataframe already. After this is done you can create your cursor to validate that the data has been written into the sqlite db file.
the statements:
if_exsists = 'append'
will force the new data to append to the table or even creates the table if it doesn't exist.
chunksize = 1000
this will write the records 1000 at a time (or all at once if less than 1000) then commit the records, saving the data to the table.
I would like to load a csv into a sqlite. My CSV contains utf-8 characters like (é, à, ü, ♀...).
These characters are displayed as ’ or é in my sqlite. I have used con.text_factory = str but it doesn't change anything.
I have also tried .decode('utf8') as this question suggests but I get the error: 'str' object has no attribute 'decode'
import sqlite3
import csv
import os
db= "mydb.sqlite"
con = sqlite3.connect(db)
con.text_factory = str # allows utf-8 data to be stored
cursor = con.cursor()
csvfile= 'mycsv.csv'
tablename = os.path.splitext(os.path.basename(csvfile))[0]
with open (csvfile, 'r') as f:
reader = csv.reader(f)
header_line_from_csv = next(reader)
columns = [h.strip() for h in header_line_from_csv] #Strips white space in header
headers= ', '.join([f'{column} text' for column in columns]) ##
sql = f'CREATE TABLE {tablename} ({headers})'
print (sql)
cursor.execute(sql)
query = 'insert into {0}({1}) values ({2})'
query = query.format(tablename, ','.join(columns), ','.join('?' * len(columns)))
print(query)
cursor = con.cursor()
for row in reader:
cursor.execute(query, row)
con.commit()
print(cursor.rowcount)
cursor.execute(f"SELECT * FROM {tablename}")
print("fetchall:\n",cursor.fetchall())
print(cursor.description)
con.close()
You can add this line at beginning:
# -*- coding:utf-8 -*-
to guarantee that your script will use this encoding. Try also to open csv file with utf-8 encoding:
with open (csvfile, 'r', encoding='utf-8') as f:
I am currently running a query on my postgresql database that ignores German characters - umlauts. I however, do not want to loose these characters and would rather have the German characters or at least their equivalent (e.g ä = ae) in the output of the query. Running Python 2.7.12
When I change the encode object to replace or xmlcharrefreplace I get the following error:
psycopg2.ProgrammingError: syntax error at or near "?"
LINE 1: ?SELECT
Code Snippet:
# -*- coding: utf-8 -*-
connection_str = r'postgresql://' + user + ':' + password + '#' + host + '/' + database
def query_db(conn, sql):
with conn.cursor() as curs:
curs.execute(sql)
rows = curs.fetchall()
print("fetched %s rows from db" % len(rows))
return rows
with psycopg2.connect(connection_str) as conn:
for filename in files:
# Read SQL
sql = u""
f = codecs.open(os.path.join(SQL_LOC, filename), "r", "utf-8")
for line in f:
sql += line.encode('ascii', 'replace').replace('\r\n', ' ')
rows = query_db(conn, f)
How can I pass a query as a unicode object with German characters ?
I also tried decoded the query as utf-8 but then I get the following error:
UnicodeEncodeError: 'ascii' codec can't encode character u'\xa0' in position 20: ordinal not in range(128)
Here is a solution to obtain their encoded equivalent. You will be able to re-encode it later and the query will not create an error:
SELECT convert_from(BYTEA 'foo ᚠ bar'::bytea, 'latin-1');
+----------------+
| convert_from |
|----------------|
| foo á<U+009A> bar |
+----------------+
SELECT 1
Time: 0.011s
You just need to conn.set_client_encoding("utf-8") and then you can just execute unicode strings - sql and results will be encoded and decoded on the fly:
$ cat psycopg2-unicode.py
import sys
import os
import psycopg2
import csv
with psycopg2.connect("") as conn:
conn.set_client_encoding("utf-8")
for filename in sys.argv[1:]:
file = open(filename, "r", encoding="utf-8")
sql = file.read()
with conn.cursor() as cursor:
cursor.execute(sql)
try:
rows = cursor.fetchall()
except psycopg2.ProgrammingError as err:
# No results
continue
with open(filename+".out", "w", encoding="utf-8", newline="") as outfile:
csv.writer(outfile, dialect="excel-tab").writerows(rows)
$ cat sql0.sql
create temporary table t(v) as
select 'The quick brown fox jumps over the lazy dog.'
union all
select 'Zwölf große Boxkämpfer jagen Viktor quer über den Sylter Deich.'
union all
select 'Любя, съешь щипцы, — вздохнёт мэр, — кайф жгуч.'
union all
select 'Mężny bądź, chroń pułk twój i sześć flag.'
;
$ cat sql1.sql
select * from t;
$ python3 psycopg2-unicode.py sql0.sql sql1.sql
$ cat sql1.sql.out
The quick brown fox jumps over the lazy dog.
Zwölf große Boxkämpfer jagen Viktor quer über den Sylter Deich.
Любя, съешь щипцы, — вздохнёт мэр, — кайф жгуч.
Mężny bądź, chroń pułk twój i sześć flag.
A Python2 version of this program is a little bit more complicated, as we need to tell the driver that we'd like return values as unicode objects. Also csv module I used for output does not support unicode, so it needs a workaround. Here it is:
$ cat psycopg2-unicode2.py
from __future__ import print_function
import sys
import os
import csv
import codecs
import psycopg2
import psycopg2.extensions
psycopg2.extensions.register_type(psycopg2.extensions.UNICODE)
psycopg2.extensions.register_type(psycopg2.extensions.UNICODEARRAY)
with psycopg2.connect("") as conn:
conn.set_client_encoding("utf-8")
for filename in sys.argv[1:]:
file = codecs.open(filename, "r", encoding="utf-8")
sql = file.read()
with conn.cursor() as cursor:
cursor.execute(sql)
try:
rows = cursor.fetchall()
except psycopg2.ProgrammingError as err:
# No results from SQL
continue
with open(filename+".out", "wb") as outfile:
for row in rows:
row_utf8 = [v.encode('utf-8') for v in row]
csv.writer(outfile, dialect="excel-tab").writerow(row_utf8)
I'm looking to format the first column of my Export01.csv into dd/mm/yyyy. sadly the current format when I export it is dd/mm/yy mm:mm.
Could someone help me tweak my code to make the changes to the date during the import procedure if it's possible?
import sys
import cx_Oracle
import csv
connection = cx_Oracle.connect('user','password','ORPM2')
cursor = connection.cursor()
SQL="SELECT * FROM EXPORT01"
cursor.execute(SQL)
filename="C:\Projects\SQL_Export\Export01.csv"
with open(filename,"wb") as fout:
writer = csv.writer(fout)
writer.writerow([ i[0] for i in cursor.description ]) # heading row
writer.writerows(cursor.fetchall())
fout.close()
cursor.close()
connection.close()
I've added a sample of the first two columns of data in Export01
WEEK_ENDING ORDER_HEADLINE
12/02/2016 00:00 Headline
15/01/2016 00:00 Headline
15/01/2016 00:00 Headline
If you handle the fetchall a row at a time, you could then convert the first column entry as follows:
from datetime import datetime
import sys
import cx_Oracle
import csv
connection = cx_Oracle.connect('user','password', 'ORPM2')
cursor = connection.cursor()
SQL="SELECT * FROM EXPORT01"
cursor.execute(SQL)
filename = r"C:\Projects\SQL_Export\Export01.csv"
with open(filename, "wb") as fout:
writer = csv.writer(fout)
writer.writerow([i[0] for i in cursor.description ]) # heading row
for row in cursor.fetchall():
cols = list(row)
cols[0] = cols[0].strftime("%d/%m/%Y")
writer.writerow(cols)
cursor.close()
connection.close()
Also note, the with statement you are using will automatically close the file when you leave its scope. I would also recommend you prefix your file path with r to avoid Python trying to escape any of the backslashes in the path.