Importing a csv file to sqllite3 using python functional programming - python

I know there are some other posts out there, but I was not able to find the specific question I had in mind.
I'm using US_baby_names csv file. and want to import this csv file line by line into sqlite3 as a table.
I'm able to create the table called storage.
I'm then trying to read lines in the csv file and put it into that table, but I must be doing something wrong.
import sqlite3 as sql
from sqlite3 import Error
import csv
def CreateConnection ( dbFileName ):
try:
conn = sql.connect(dbFileName)
return conn
except Error as e:
print(e)
return None
def CreateNew( dbConnection, new):
sql = """INSERT INTO storage (dat, Id, Name, Year, group, subgroup, Count)
VALUES (?,?,?,?,?,?,?)"""
try:
cursor = dbConnection.cursor()
cursor.execute(sql, new)
return cursor.lastrowid
except Error as e:
print(e)
def Main():
database = "storage.db"
dbConnection = CreateConnection(database)
with open('storage.csv', 'rb') as fin:
dr = csv.DictReader(fin)
to_db = [(i['dat'], i['Id'], i['Name'], i['Year'], i['group'], i['subgroup'], i['Count']) \
for i in dr]
cursor.executemany(CreateNew(sql, to_db))
dbConnection.close()
if __name__ == "__main__":
Main()
I believe my cursor.executemany is wrong, but I'm not able to figure out what else to do..
Thanks

You are almost right with much of your code, but:
in cursor.execute(sql, new) you are passing an iterable, new, to sqlite3.execute() (which requires a simple SQL statement), instead of sqlite3.executemany().
Moreover, the result of CreateNew() is an integer, lastrowid, and you pass that result to executemany().
You must use Connection.commit() to save the changes to the database, and Connection.rollback() to discard them.
You must open the file for the csv.DictReader class as a text file, in r or rt mode.
Finally, remember that sqlite3.Connection is a context manager, so you can use it in a with statement.
This should be your desired outcome:
import sqlite3 as sql
from sqlite3 import Error
import csv
def create_table(conn):
sql = "CREATE TABLE IF NOT EXISTS baby_names("\
"dat TEXT,"\
"Id INTEGER PRIMARY KEY,"\
"Name TEXT NOT NULL,"\
"Year INTEGER NOT NULL,"\
"Gender TEXT NOT NULL,"\
"State TEXT NOT NULL,"\
"Count INTEGER)"
conn.execute(sql)
conn.execute("DELETE FROM baby_names")
def select_all(conn):
for r in conn.execute("SELECT * FROM baby_names").fetchall():
print(r)
def execute_sql_statement(conn, data):
sql = "INSERT INTO baby_names "\
"(dat, Id, Name, Year, Gender, State, Count) "\
"VALUES (?,?,?,?,?,?,?)"
try:
cursor = conn.executemany(sql, data)
except Error as e:
print(e)
conn.rollback()
return None
else:
conn.commit()
return cursor.lastrowid
def main():
with sql.connect('baby_names.db') as conn, open('US_Baby_Names_right.csv', 'r') as fin:
create_table(conn)
dr = csv.DictReader(fin)
data = [(i['dat'], i['Id'], i['Name'], i['Year'], i['Gender'], i['State'], i['Count']) for i in dr ]
lastrowid = execute_sql_statement(conn, data)
select_all(conn)
main()
I added a create_table() function just to test my code. I also made up a sample test file as follows:
dat,Id,Name,Year,Gender,State,Count
1,1,John,1998,M,Washington,2
2,2,Luke,2000,M,Arkansas,10
3,3,Carrie,1999,F,Texas,3
The output of the select_all() function is:
('1',1,'John',1998,'M','Washington',2)
('2',2,'Luke',2000,'M','Arkansas',10)
('3',3,'Carrie',1999,'F','Texas',3)

Related

Getting error while inserting data in sqlite3

I am new to Python and started off with sqlite.
I have two csv transaction.csv and users.csv from where I am reading the data and writing to the sqlite database.Below is the snippet
import csv
import sqlite3 as db
def readCSV_users():
with open('users.csv',mode='r') as data:
dr = csv.DictReader(data, delimiter=',')
users_data = [(i['user_id'], i['is_active']) for i in dr if i['is_active']=='True']
#---------------------
return users_data
def readCSV_transactions():
with open('transactions.csv',mode='r') as d:
dr = csv.DictReader(d, delimiter=',')
trans_data = [(i['user_id'], i['is_blocked'],i['transaction_amount'],i['transaction_category_id']) for i in dr if i['is_blocked']=='False']
#---------------------
return trans_data
def SQLite_connection(database):
try:
# connect to the database
conn = db.connect(database)
print("Database connection is established successfully!")
conn = db.connect(':memory:')
print("Established database connection to a database\
that resides in the memory!")
cur = conn.cursor()
return cur,conn
except exception as Err:
print(Err)
def dbQuery(users_data,trans_data,cur,conn):
try:
cur.executescript(""" CREATE TABLE if not exists users(user_id text,is_active text);
CREATE TABLE if not exists transactions(user_id text,is_blocked text,transaction_amount text,transaction_category_id text);
INSERT INTO users VALUES (?,?),users_data;
INSERT INTO transactions VALUES (?,?,?,?),trans_data""")
conn.commit()
a=[]
rows = curr.execute("SELECT * FROM users").fetchall()
for r in rows:
a.append(r)
return a
except Err:
print(Err)
finally:
conn.close()
if __name__ == "__main__":
database='uit'
users_data=readCSV_users()
trans_data=readCSV_transactions()
curr,conn=SQLite_connection(database)
print(dbQuery(users_data,trans_data,curr,conn))
But I am facing below error.I believe the ? is throwing the error in executescript
cur.executescript(""" CREATE TABLE if not exists users(user_id text,is_active text);
sqlite3.OperationalError: near "users_data": syntax error
Any pointers to resolve this?
Putting users_data directly in query is wrong. It treats it as normal string.
But it seems executescript can't use arguments.
You would have to put values directly in place of ?.
Or you have to use execute()
cur.execute("INSERT INTO users VALUES (?,?);", users_data)
cur.execute("INSERT INTO transactions VALUES (?,?,?,?)", trans_data)

"where" in mysql table

I am using a code that is using mysql. I am very new in mysql so I would be thankful if you could help. My input is a huge dumpfile of wikipediapages in xml bz2 format. The input format is some text files extracted from that xml file with this format:
<doc id="12" url="https://en.wikipedia.org/wiki?curid=12" title="Anarchism"> text... </doc>
the only parts that connects the program to sql is as follows:
def read_in_STOP_CATS(f_n = "/media/sscepano/Data/Wiki2015/STOPCAT/STOP_CATS.txt"):
s = []
f = open(f_n, "r")
for line in f:
s.append(line.rstrip().lower())
return s
def connect_2_db():
try:
cnx = mysql.connector.connect(user='test', password='test',
host='127.0.0.1',
database='wiki_category_links')
except mysql.connector.Error as err:
if err.errno == errorcode.ER_ACCESS_DENIED_ERROR:
print("Something is wrong with your user name or password")
elif err.errno == errorcode.ER_BAD_DB_ERROR:
print("Database does not exist")
else:
print(err)
return cnx
def articles_selected(aid):
global cnx
global STOP_CATS
cursor = cnx.cursor(buffered=True)
cursor.execute("SELECT * FROM categorylinks where cl_from = " + str(aid))
row = cursor.fetchone()
while row is not None:
#print(row)
cat = row[1].lower()
#print cat
for el in STOP_CATS:
if el in cat:
return False
row = cursor.fetchone()
return True
cnx = connect_2_db()
STOP_CATS = read_in_STOP_CATS()
TITLE_WEIGHT = 4
my problem is that right now I do not know how should I connect to mysql to be able to run the code and the main prob;lem is that I do not know what is categorylinks in the code? That should be the name of my sql table? Does it mean that I need to make an sql table with this name and import all my text file in this one table?
what does 'where' means in this line also????
As RiggsFolly said, you need to get something like WHERE cl_from = 'some string'
You could do it this way:
cursor.execute("SELECT * FROM categorylinks where cl_from ='" + str(aid)+"'")
But it is better to use prepared statements like this one:
select_stmt = "SELECT * FROM categorylinks where cl_from = %(aid)s"
cursor.execute(select_stmt, { 'aid':str(aid) })
So in your code you have:
A database named wiki_category_links
In that database you have a table called categorylinks
And the select you have means that you are going to get, from table categorylinks, all rows that have the column cl_from equal to the value of aid variable.

Writing column names to file

I have an SQLite DB file and I am parsing the data from each column in a table of the db to a .txt file. At the moment it is writing the column contents to the file but it won't pull the column names and write those. How can I go about it as I have tried to use this guide Is there a way to get a list of column names in sqlite? but i cannot seem to get it to work. Here is my code with an attempt at pulling the column names from the table.
import sqlite3
from sqlite3 import Error
# create a database connection to the SQLite database specified by the db_file
def create_connection(db_file,detect_types=sqlite3.PARSE_DECLTYPES):
try:
conn = sqlite3.connect(db_file)
return conn
except Error as e:
print(e)
return None
# Query specific rows in the sms table
def select_data(conn):
cur = conn.cursor()
cur.execute("SELECT _id, address, strftime('%d-%m-%Y', date / 1000, 'unixepoch'),read, type, body, seen FROM sms")
print("Writing the contents of the sms table to an evidence file")
print("\t")
# Trying to pull out column names from db table
def get_col_names():
conn = sqlite3.connect("mmssms.db")
c = conn.cursor()
c.execute("SELECT _id, address, strftime('%d-%m-%Y', date / 1000, 'unixepoch'),read, type, body, seen FROM sms")
return [member[0] for member in c.description]
# Write the data to a smsEvidence.txt file
with open('EvidenceExtractionFiles/smsInfo.txt', 'a+') as f:
rows = cur.fetchall()
for row in rows:
#print(row)
f.write("%s\n" % str(row))
print("SMS Data is written to the evidence File")
# path to where the db files are stored
def main():
database = "H:\College Fourth Year\Development Project\Final Year Project 2018\mmssms.db"
# create a database connection
conn = create_connection(database)
with conn:
# print("Query specific columns")
select_data(conn)
# close db connection
if(conn):
conn.close()
print("Database closed")
if __name__ == '__main__':
main()
You may use cursor.description which holds info about the column names:
[ ... ]
cur = cursor.execute('SELECT * FROM test_table LIMIT 100')
col_names = [ name[0] for name in cur.description ]
print (col_names)
[ ... ]

TypeError: not enough arguments for format string while inserting into mysql database

I have a csv file like this:
nohaelprince#uwaterloo.ca, 01-05-2014
nohaelprince#uwaterloo.ca, 01-05-2014
nohaelprince#uwaterloo.ca, 01-05-2014
nohaelprince#gmail.com, 01-05-2014
I am reading the above csv file and extracting domain name and also the count of emails address by domain name and date as well. All these things I need to insert into MySQL table called domains.
Below is the code in which is giving me error as TypeError: not enough arguments for format string and it's happening when I try to insert into domains table.
#!/usr/bin/python
import fileinput
import csv
import os
import sys
import time
import MySQLdb
from collections import defaultdict, Counter
domain_counts = defaultdict(Counter)
# ======================== Defined Functions ======================
def get_file_path(filename):
currentdirpath = os.getcwd()
# get current working directory path
filepath = os.path.join(currentdirpath, filename)
return filepath
# ===========================================================
def read_CSV(filepath):
with open('emails.csv') as f:
reader = csv.reader(f)
for row in reader:
domain_counts[row[0].split('#')[1].strip()][row[1]] += 1
db = MySQLdb.connect(host="localhost", # your host, usually localhost
user="root", # your username
passwd="abcdef1234", # your password
db="test") # name of the data base
cur = db.cursor()
q = """INSERT INTO domains(domain_name, cnt, date_of_entry) VALUES(%s, %s, STR_TO_DATE(%s, '%d-%m-%Y'))"""
for domain, data in domain_counts.iteritems():
for email_date, email_count in data.iteritems():
cur.execute(q, (domain, email_count, email_date))
db.commit()
# ======================= main program =======================================
path = get_file_path('emails.csv')
read_CSV(path) # read the input file
What is wrong I am doing?
As of now my data type for date_of_entry column is date in MySQL.
You need the "%d-%m-%Y" in your sql statement in exact this way. But python (or the execute command) tries first to use it for string formatting and throws this error.
I think you have to escape it and you should try following:
q = """INSERT INTO domains(domain_name, cnt, date_of_entry) VALUES(%s, %s, STR_TO_DATE(%s, '%%d-%%m-%%Y'))"""
Try this:
q = """INSERT INTO domains(domain_name, cnt, date_of_entry) VALUES(%s, %s, STR_TO_DATE(%s, 'd-m-Y'))"""
So we have changed from STR_TO_DATE(%s, '%d-%m-%Y')) to STR_TO_DATE(%s, 'd-m-Y'))
It is detecting the %s as a format string and failing on that. You need to surround it with quotes I guess
INSERT INTO domains(domain_name, cnt, date_of_entry) VALUES(%s, %s, STR_TO_DATE('%s', '%d-%m-%Y'))

reading external sql script in python

I am working on a learning how to execute SQL in python (I know SQL, not Python).
I have an external sql file. It creates and inserts data into three tables 'Zookeeper', 'Handles', 'Animal'.
Then I have a series of queries to run off the tables. The below queries are in the zookeeper.sql file that I load in at the top of the python script. Example for the first two are:
--1.1
SELECT ANAME,zookeepid
FROM ANIMAL, HANDLES
WHERE AID=ANIMALID;
--1.2
SELECT ZNAME, SUM(TIMETOFEED)
FROM ZOOKEEPER, ANIMAL, HANDLES
WHERE AID=ANIMALID AND ZOOKEEPID=ZID
GROUP BY zookeeper.zname;
These all execute fine in SQL. Now I need to execute them from within Python. I have been given and completed code to read in the file. Then execute all the queries in the loop.
The 1.1 and 1.2 is where I am getting confused. I believe in the loop this is the line where I should put in something to run the first and then second query.
result = c.execute("SELECT * FROM %s;" % table);
but what? I think I am missing something very obvious. I think what is throwing me off is % table. In query 1.1 and 1.2, I am not creating a table, but rather looking for a query result.
My entire python code is below.
import sqlite3
from sqlite3 import OperationalError
conn = sqlite3.connect('csc455_HW3.db')
c = conn.cursor()
# Open and read the file as a single buffer
fd = open('ZooDatabase.sql', 'r')
sqlFile = fd.read()
fd.close()
# all SQL commands (split on ';')
sqlCommands = sqlFile.split(';')
# Execute every command from the input file
for command in sqlCommands:
# This will skip and report errors
# For example, if the tables do not yet exist, this will skip over
# the DROP TABLE commands
try:
c.execute(command)
except OperationalError, msg:
print "Command skipped: ", msg
# For each of the 3 tables, query the database and print the contents
for table in ['ZooKeeper', 'Animal', 'Handles']:
**# Plug in the name of the table into SELECT * query
result = c.execute("SELECT * FROM %s;" % table);**
# Get all rows.
rows = result.fetchall();
# \n represents an end-of-line
print "\n--- TABLE ", table, "\n"
# This will print the name of the columns, padding each name up
# to 22 characters. Note that comma at the end prevents new lines
for desc in result.description:
print desc[0].rjust(22, ' '),
# End the line with column names
print ""
for row in rows:
for value in row:
# Print each value, padding it up with ' ' to 22 characters on the right
print str(value).rjust(22, ' '),
# End the values from the row
print ""
c.close()
conn.close()
Your code already contains a beautiful way to execute all statements from a specified sql file
# Open and read the file as a single buffer
fd = open('ZooDatabase.sql', 'r')
sqlFile = fd.read()
fd.close()
# all SQL commands (split on ';')
sqlCommands = sqlFile.split(';')
# Execute every command from the input file
for command in sqlCommands:
# This will skip and report errors
# For example, if the tables do not yet exist, this will skip over
# the DROP TABLE commands
try:
c.execute(command)
except OperationalError, msg:
print("Command skipped: ", msg)
Wrap this in a function and you can reuse it.
def executeScriptsFromFile(filename):
# Open and read the file as a single buffer
fd = open(filename, 'r')
sqlFile = fd.read()
fd.close()
# all SQL commands (split on ';')
sqlCommands = sqlFile.split(';')
# Execute every command from the input file
for command in sqlCommands:
# This will skip and report errors
# For example, if the tables do not yet exist, this will skip over
# the DROP TABLE commands
try:
c.execute(command)
except OperationalError, msg:
print("Command skipped: ", msg)
To use it
executeScriptsFromFile('zookeeper.sql')
You said you were confused by
result = c.execute("SELECT * FROM %s;" % table);
In Python, you can add stuff to a string by using something called string formatting.
You have a string "Some string with %s" with %s, that's a placeholder for something else. To replace the placeholder, you add % ("what you want to replace it with") after your string
ex:
a = "Hi, my name is %s and I have a %s hat" % ("Azeirah", "cool")
print(a)
>>> Hi, my name is Azeirah and I have a Cool hat
Bit of a childish example, but it should be clear.
Now, what
result = c.execute("SELECT * FROM %s;" % table);
means, is it replaces %s with the value of the table variable.
(created in)
for table in ['ZooKeeper', 'Animal', 'Handles']:
# for loop example
for fruit in ["apple", "pear", "orange"]:
print(fruit)
>>> apple
>>> pear
>>> orange
If you have any additional questions, poke me.
A very simple way to read an external script into an sqlite database in python is using executescript():
import sqlite3
conn = sqlite3.connect('csc455_HW3.db')
with open('ZooDatabase.sql', 'r') as sql_file:
conn.executescript(sql_file.read())
conn.close()
First make sure that a table exists if not, create a table then follow the steps.
import sqlite3
from sqlite3 import OperationalError
conn = sqlite3.connect('Client_DB.db')
c = conn.cursor()
def execute_sqlfile(filename):
c.execute("CREATE TABLE clients_parameters (adress text, ie text)")
#
fd = open(filename, 'r')
sqlFile = fd.readlines()
fd.close()
lvalues = [tuple(v.split(';')) for v in sqlFile[1:] ]
try:
#print(command)
c.executemany("INSERT INTO clients_parameters VALUES (?, ?)", lvalues)
except OperationalError as msg:
print ("Command skipped: ", msg)
execute_sqlfile('clients.sql')
print(c.rowcount)
according me, it is not possible
solution:
import .sql file on mysql server
after
import mysql.connector
import pandas as pd
and then you use .sql file by convert to dataframe

Categories

Resources