Insert data from csv to postgreSQL database via Python - python

I'm brand new to postgreSQL or SQL at all.
I'm trying to create a table in a database via Python and then load data from a .csv file into the table.
My code looks like this:
import csv
import psycopg2
#Establish connection to database
con = psycopg2.connect(
host = "localhost",
database = "kundeavgang",
user = "postgres",
password = "postgres",
)
#Cursor
cur = con.cursor()
#If a mistake is made, start from scratch
cur.execute("DROP TABLE IF EXISTS kundeavgang")
#Create table
cur.execute('''
CREATE TABLE "kundeavgang"(
"customerID" TEXT,
"gender" TEXT,
"SeniorCitizen" TEXT,
"Partner" TEXT,
"Dependents" TEXT,
"tenure" INT,
"PhoneService" TEXT,
"MultipleLines" TEXT,
"InternetService" TEXT,
"OnlineSecurity" TEXT,
"DeviceProtection" TEXT,
"TechSupport" TEXT,
"StreamingMovies" TEXT,
"Contract" TEXT,
"PaperlessBilling" TEXT,
"PaymentMethod" TEXT,
"MonthlyCharges" FLOAT,
"TotalCharges" FLOAT,
"Churn" TEXT
)
''')
#Acsess .csv file
with open('kundeavgang.csv') as csvFile:
reader = csv.reader(csvFile)
skipHeader = next(reader) #Account for header
for row in reader:
customerID = row[0]
gender = row[1]
SeniorCitizen = row[2]
Partner = row[3]
Dependents = row[4]
tenure = row[5]
PhoneService = row[6]
MultipleLines = row[7]
InternetService = row[8]
OnlineSecurity = row[9]
OnlineBackup = row[10]
DeviceProtection = row[11]
TechSupport = row[12]
StreamingTV = [13]
StreamingMovies = row[14]
Contract = row[15]
PaperlessBilling = row[16]
PaymentMethod = row[17]
MonthlyCharges = row[18]
TotalCharges = row[19]
Churn = row[20]
cur.execute('''INSERT INTO kundeavgang(customerID,gender,SeniorCitizen,Partner,Dependents,tenure,PhoneService,MultipleLines,InternetService,OnlineSecurity,OnlineBackup,DeviceProtection,TechSupport,StreamingTV,StreamingMovies,Contract,PaperlessBilling,PaymentMethod,TotalCharges,Churn)
VALUES (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)''',(customerID,gender,SeniorCitizen,Partner,Dependents,tenure,PhoneService,MultipleLines,InternetService,OnlineSecurity,OnlineBackup,DeviceProtection,TechSupport,StreamingTV,StreamingMovies,Contract,PaperlessBilling,PaymentMethod,TotalCharges,Churn))
#Commit the transaction
con.commit()
#End connection
con.close()
In pgAdmin, the table comes up as existing in the database. However, I cannot find the actual table. Further, I have no idea about this line of code:
cur.execute('''INSERT INTO kundeavgang(customerID,gender,SeniorCitizen,Partner,Dependents,tenure,PhoneService,MultipleLines,InternetService,OnlineSecurity,OnlineBackup,DeviceProtection,TechSupport,StreamingTV,StreamingMovies,Contract,PaperlessBilling,PaymentMethod,TotalCharges,Churn)
VALUES (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)''',(customerID,gender,SeniorCitizen,Partner,Dependents,tenure,PhoneService,MultipleLines,InternetService,OnlineSecurity,OnlineBackup,DeviceProtection,TechSupport,StreamingTV,StreamingMovies,Contract,PaperlessBilling,PaymentMethod,TotalCharges,Churn))
What does all the %s stand for? I found it off an online example which was not very helpful, so I tried it without knowing what it means. I have seen some examples where question marks are inserted instead, but also this without explanation.
Lastly, as the code stands now, I get the error message:
VALUES (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)''',(customerID,gender,SeniorCitizen,Partner,Dependents,tenure,PhoneService,MultipleLines,InternetService,OnlineSecurity,OnlineBackup,DeviceProtection,TechSupport,StreamingTV,StreamingMovies,Contract,PaperlessBilling,PaymentMethod,TotalCharges,Churn))
IndexError: tuple index out of range
All help or explanations will be appreciated.

For bulk inserts from text files, consider copy_from or copy_expert of psycopg2. Also, be sure to commit your execution:
cur.execute("DROP TABLE IF EXISTS kundeavgang")
con.commit()
cur.execute('''CREATE TABLE "kundeavgang" ... ''')
con.commit()
with open('kundeavgang.csv') as csvFile:
next(csvFile) # SKIP HEADERS
cur.copy_from(csvFile, "kundeavgang", sep=",")
# POSTGRES COPY COMMAND FOR CSV MODE
# cur.copy_expert("""COPY "kundeavgang" FROM STDIN WITH CSV""", csvFile)
con.commit()

The %s are placeholders for the values that will be inserted and passed through the following tuple:
(customerID,gender,SeniorCitizen,Partner,Dependents,tenure,PhoneService,MultipleLines,InternetService,OnlineSecurity,OnlineBackup,DeviceProtection,TechSupport,StreamingTV,StreamingMovies,Contract,PaperlessBilling,PaymentMethod,TotalCharges,Churn)
The problem that your insert statement going to insert to 20 columns, you provide 20 values in your tuple but you have 22 placeholders (%s).

The problem is a mismatch between the number of columns to be populated and the length of the list provided. This is an easy mistake to make when dealing with a lot of columns. One way to reduce risk of error is to use the length of the columns or values list to build the statement.
cols = [name1, name2,...]
vals = [val1, val2, ...]
assert len(cols) == len(vals), 'mismatch between number of columns and number of values'
template = """INSERT INTO tbl ({}) VALUES ({})"""
stmt = template.format(', '.join(cols), ','.join(['%s'] * len(vals)))
cur.execute(stmt, vals)
Note that when building the column names dynamically it's good practice to quote them - psycopg2 provides tools for this.

Change the line reader = csv.reader(csvFile) to:
reader = csv.reader(csvFile, delimiter=';')

Related

converting .csv to db in python withsqlite3

i get TypeError: function takes exactly 2 arguments (3 given),
when i convert show_id and genre into list of tuples, i get ValueError: paramenter are of unsupported type.
import sqlite3
import csv
# create database
conn = sqlite3.connect("favorites8.db")
open("favorites8.db", "w")
db = conn.cursor()
# create table
db.execute("CREATE TABLE shows (id INTEGER AUTO INCREMENT, title TEXT NOT NULL, PRIMARY KEY (id))")
db.execute("CREATE TABLE genres (shows_id INTEGER, genre TEXT NOT NULL, FOREIGN KEY (shows_id) REFERENCES shows(id))")
# open csv file
with open("favorites.csv", "r") as file:
# create dictreader
reader = csv.DictReader(file)
# iterate over csv file
for row in reader:
# canonicalize title
title = row["title"].strip().upper()
# insert title
stmnt1 = "INSERT INTO shows (title) VALUES(?)"
show_id = db.execute(stmnt1, (title,))
# Insert genres
for genre in row["genres"].split(", "):
stmnt2 = "INSERT INTO genres (shows_id, genre) VALUES(?, ?)"
db.executemany(stmnt2, show_id, genre)
# commit changes
conn.commit()
conn.close()
# end with
I've fixed some mistakes and cleaned this up a bit:
conn = sqlite3.connect("favorites8.db")
db = conn.cursor()
db.execute("CREATE TABLE shows (id INTEGER NOT NULL PRIMARY KEY, title TEXT NOT NULL)")
db.execute("CREATE TABLE genres (shows_id INTEGER, genre TEXT NOT NULL, FOREIGN KEY (shows_id) REFERENCES shows(id))")
with open("favorites.csv", "r") as file:
reader = csv.DictReader(file)
for row in reader:
title = row["title"].strip().upper()
stmnt1 = "INSERT INTO shows (title) VALUES(?)"
db.execute(stmnt1, (title,))
show_id = db.lastrowid
# Insert genres
data = []
stmnt2 = "INSERT INTO genres (shows_id, genre) VALUES(?, ?)"
for genre in row["genres"].split(", "):
data.append((show_id, genre))
db.executemany(stmnt2, data)
# commit changes
conn.commit()
conn.close()
There were a bunch of issues:
executemany accepts iterable as a second argument.
First execute statement does not return id, but cursor object, you need to retrieve it manually.

Insert values from csv file with no header to a SQL table with headers

I have a csv file with no headers and created one SQL table with fields A, B, C, D and E. I need to import the data from the csv file into the table on python.
file = open(path)
data = csv.reader(file)
cursor = connection.cursor()
query = '''INSERT INTO table (id1, id2, name, birthday, score) VALUES (?,?, ?,?,?)'''
cursor.executemany(insert_movies, data)
cursor.close()
connection.commit()
I have also tried to loop through the rows
file = open(path)
data = csv.reader(file)
cursor = connection.cursor()
query = '''INSERT INTO table (id1, id2, name, birthday, score) VALUES (?,?, ?,?,?)'''
for row in data:
cursor.executemany(query, row)
connection.commit()
I ran this on Jupyter instead of visual code and it ran. Actually the output of this table was feeding into another which was showing no elements. But after running on Jupyter the same code generated the table. Not sure what the actual cause was.

How to load a CSV into a sqlite DB using csv headers as table columns using python

I have a csv file for all the plane crash incidents since 1908. The csv header columns are,
Date,Time,Location,Operator,Flight #,Route,Type,Registration,cn/In,Aboard,Fatalities,Ground,Summary
My code is below, but this prints the entire file,
is there a way using CSV or shelve module using which I can make a persistent db.
End goal here is to do a slice and dice on the data,
eg: sort by date, group by year etc.
#!/usr/bin/env python3.8
import sqlite3
import csv
conn = sqlite3.connect(":memory:")
cur = conn.cursor()
cur.execute('DROP TABLE IF EXISTS air_disaster')
cur.execute('''
CREATE TABLE "air_disaster" (
"Date1" TEXT,
"Time1" TEXT,
"Location1" TEXT,
"Operator1" TEXT,
"Flight_No" TEXT,
"Route" TEXT,
"Type1" TEXT,
"Registration" TEXT,
"cn" TEXT,
"Aboard" REAL,
"Fatalities" REAL,
"ground" TEXT,
"Summary" TEXT
)
''')
with open("/Users/foobar/Downloads/Airplane_Crashes_and_Fatalities_Since_1908.csv") as csv_file:
csv_reader = csv.DictReader(csv_file)
for row in csv_reader:
Date1 = row["Date"]
Time1 = row["Time"]
Location1 = row["Location"]
Operator1 = row["Operator"]
Flight_No = row["Flight #"]
Route = row["Route"]
Type1 = row["Type"]
Registration = row["Registration"]
cn = row["cn/In"]
Aboard = row["Aboard"]
Fatalities = row["Fatalities"]
ground = row["Ground"]
Summary = row["Summary"]
cur.execute('''INSERT INTO air_disaster(Date1,Time1,Location1,Operator1,Flight_No,Route,Type1,Registration,cn,Aboard,Fatalities,ground,Summary)
VALUES(?,?,?,?,?,?,?,?,?,?,?,?,?)''', (Date1,Time1,Location1,Operator1,Flight_No,Route,Type1,Registration,cn,Aboard,Fatalities,ground,Summary))
conn.commit()
(cur.execute('SELECT * from air_disaster where Fatalities > 1000'))
If you are open to using pandas, you could create a dataframe with read_csv(), and then save to a DB with to_sql(). You would also have the added benefit of having a rich environment for transforming or preprocessing the data before uploading it.

Python generating column header attributes from unseen csv file into sql table

I understand how to populate an sql table with a csv file by using:
cursor.execute('CREATE TABLE test (name, age, number)')
csvfile = open('test.csv', 'rb')
creader = csv.reader(csvfile, delimiter=',')
for t in creader:
cursor.execute('INSERT INTO sentence VALUES (?,?,?)', t)
However, I'm faced with an issue where, I dont know what the csv file may hold, thus can't explicitly create a table with named column attributes. All I know is that the file will have column headers and my question is, how do I set that the headers are the column attributes? For example:
Row 1 in csv has the (unknown) headers e.g. name, number, group. I'd like those to be attributes in the table t.
I attempted this:
import csv, sqlite3
con = sqlite3.connect(":memory:")
cur = con.cursor()
cur.execute("CREATE TABLE t (col1, col2);")
with open('data.csv','rb') as fin:
dr = csv.reader(fin)
dicts = ({'col1': line[0], 'col2': line[1]} for line in dr)
to_db = ((i['col1'], i['col2']) for i in dicts)
cur.executemany("INSERT INTO t (col1, col2) VALUES (?, ?);", to_db)
con.commit()
But the error is that I'm getting a ValueError: I/O operation on closed file
I'll make an assumption based on your indentation being wrong in the question. You're probably still trying to work with the file after finishing your with statement.
import csv, sqlite3
con = sqlite3.connect(":memory:")
cur = con.cursor()
cur.execute("CREATE TABLE t (col1, col2);")
with open('data.csv','rb') as fin:
dr = csv.reader(fin)
dicts = ({'col1': line[0], 'col2': line[1]} for line in dr)
to_db = ((i['col1'], i['col2']) for i in dicts)
cur.executemany("INSERT INTO t (col1, col2) VALUES (?, ?);", to_db)
con.commit()
When you're using with open() the file is automatically closed when you're finished with it.
The alternative, older method, would be similar to;
fin = open('data.csv', 'rb') # opens the csv file
try:
dr = csv.reader(fin)
dicts = ({'col1': line[0], 'col2': line[1]} for line in dr)
to_db = ((i['col1'], i['col2']) for i in dicts)
finally:
f.close() # closing

Python & SQLite3 Selecting from two tables

I have written this code in python, which I basically opens up my SQLite3 database and looks at each row in the table 'contact' and then takes each 'id' number and then looks at the matching 'id' in the table 'Users'. My problem is that it only outputs the first one and does not loop through all the rows.
import sqlite3
conn = sqlite3.connect('sqlite3.db')
cursor = conn.cursor()
cursor2 = conn.cursor()
cursor3 = conn.cursor()
text_file = open("Output.txt", "w");
try:
cursor.execute("SELECT Id, address FROM contact;") # Get address details by ID
for row in cursor:
ID = row[0]
address= row[1]
cursor2.execute("SELECT name FROM Users WHERE id= " + str(ID) + ";") # Get users's name by ID
row2 = cursor2.fetchone()
sendername = row2[0]
text_file.write(firstname, lastname, address);
finally:
conn.close()
Any suggestions, I'm very new to python.
You can ask the database to do a join instead:
cursor.execute("""\
SELECT u.name, c.address
FROM contact c
INNER JOIN Users u ON u.id = c.Id
""")
with open('Output.txt', 'w') as outfh:
for name, address in cursor:
outfh.write('{} {}\n'.format(name, address)
The INNER JOIN tells SQLite to only pick rows for which there is an actual match on the id columns. If you marked the id column as a foreign key in the contact table, you could use a NATURAL INNER JOIN as well, and omit the ON clause.
If I understand you:
cursor.execute("SELECT Users.name, contact.address FROM Users, contact WHERE contact.Id = Users.id;")
for row in cursor:
name= row[0]
address= row[1]
text_file.write(name+" "+address)

Categories

Resources