I have an SQLite DB file and I am parsing the data from each column in a table of the db to a .txt file. At the moment it is writing the column contents to the file but it won't pull the column names and write those. How can I go about it as I have tried to use this guide Is there a way to get a list of column names in sqlite? but i cannot seem to get it to work. Here is my code with an attempt at pulling the column names from the table.
import sqlite3
from sqlite3 import Error
# create a database connection to the SQLite database specified by the db_file
def create_connection(db_file,detect_types=sqlite3.PARSE_DECLTYPES):
try:
conn = sqlite3.connect(db_file)
return conn
except Error as e:
print(e)
return None
# Query specific rows in the sms table
def select_data(conn):
cur = conn.cursor()
cur.execute("SELECT _id, address, strftime('%d-%m-%Y', date / 1000, 'unixepoch'),read, type, body, seen FROM sms")
print("Writing the contents of the sms table to an evidence file")
print("\t")
# Trying to pull out column names from db table
def get_col_names():
conn = sqlite3.connect("mmssms.db")
c = conn.cursor()
c.execute("SELECT _id, address, strftime('%d-%m-%Y', date / 1000, 'unixepoch'),read, type, body, seen FROM sms")
return [member[0] for member in c.description]
# Write the data to a smsEvidence.txt file
with open('EvidenceExtractionFiles/smsInfo.txt', 'a+') as f:
rows = cur.fetchall()
for row in rows:
#print(row)
f.write("%s\n" % str(row))
print("SMS Data is written to the evidence File")
# path to where the db files are stored
def main():
database = "H:\College Fourth Year\Development Project\Final Year Project 2018\mmssms.db"
# create a database connection
conn = create_connection(database)
with conn:
# print("Query specific columns")
select_data(conn)
# close db connection
if(conn):
conn.close()
print("Database closed")
if __name__ == '__main__':
main()
You may use cursor.description which holds info about the column names:
[ ... ]
cur = cursor.execute('SELECT * FROM test_table LIMIT 100')
col_names = [ name[0] for name in cur.description ]
print (col_names)
[ ... ]
Related
I am new to Python and started off with sqlite.
I have two csv transaction.csv and users.csv from where I am reading the data and writing to the sqlite database.Below is the snippet
import csv
import sqlite3 as db
def readCSV_users():
with open('users.csv',mode='r') as data:
dr = csv.DictReader(data, delimiter=',')
users_data = [(i['user_id'], i['is_active']) for i in dr if i['is_active']=='True']
#---------------------
return users_data
def readCSV_transactions():
with open('transactions.csv',mode='r') as d:
dr = csv.DictReader(d, delimiter=',')
trans_data = [(i['user_id'], i['is_blocked'],i['transaction_amount'],i['transaction_category_id']) for i in dr if i['is_blocked']=='False']
#---------------------
return trans_data
def SQLite_connection(database):
try:
# connect to the database
conn = db.connect(database)
print("Database connection is established successfully!")
conn = db.connect(':memory:')
print("Established database connection to a database\
that resides in the memory!")
cur = conn.cursor()
return cur,conn
except exception as Err:
print(Err)
def dbQuery(users_data,trans_data,cur,conn):
try:
cur.executescript(""" CREATE TABLE if not exists users(user_id text,is_active text);
CREATE TABLE if not exists transactions(user_id text,is_blocked text,transaction_amount text,transaction_category_id text);
INSERT INTO users VALUES (?,?),users_data;
INSERT INTO transactions VALUES (?,?,?,?),trans_data""")
conn.commit()
a=[]
rows = curr.execute("SELECT * FROM users").fetchall()
for r in rows:
a.append(r)
return a
except Err:
print(Err)
finally:
conn.close()
if __name__ == "__main__":
database='uit'
users_data=readCSV_users()
trans_data=readCSV_transactions()
curr,conn=SQLite_connection(database)
print(dbQuery(users_data,trans_data,curr,conn))
But I am facing below error.I believe the ? is throwing the error in executescript
cur.executescript(""" CREATE TABLE if not exists users(user_id text,is_active text);
sqlite3.OperationalError: near "users_data": syntax error
Any pointers to resolve this?
Putting users_data directly in query is wrong. It treats it as normal string.
But it seems executescript can't use arguments.
You would have to put values directly in place of ?.
Or you have to use execute()
cur.execute("INSERT INTO users VALUES (?,?);", users_data)
cur.execute("INSERT INTO transactions VALUES (?,?,?,?)", trans_data)
I have a csv file that has several columns:
upc date quantity customer
In my physical table, I have an auto generating id column for each row:
id upc date quantity customer
It seems as though the db is interpreting the upc as the actual id when I run my python script to copy into the db. I'm getting this error message:
Error: value "1111111" is out of range for type integer
CONTEXT: COPY physical, line 1, column id: "1111111"
I've never attempted this before, but I believe this is correct:
def insert_csv(f, table):
connection = get_postgres_connection()
cursor = connection.cursor()
try:
cursor.copy_from(f, table, sep=',')
connection.commit()
return True
except (psycopg2.Error) as e:
print(e)
return False
finally:
cursor.close()
connection.close()
Am I doing something wrong here, or do I have to create another script to get the last id from the table?
Updated working code:
def insert_csv(f, table, columns):
connection = get_postgres_connection()
cursor = connection.cursor()
try:
column_names = ','.join(columns)
query = f'''
COPY {table}({column_names})
FROM STDOUT (FORMAT CSV)
'''
cursor.copy_expert(query, f)
connection.commit()
return True
except (psycopg2.Error) as e:
print(e)
return False
finally:
cursor.close()
connection.close()
columns = (
"upc",
"date_thru",
"transaction_type",
"transaction_type_subtype",
"country_code",
"customer",
"quantity",
"income_gross",
"fm_serial",
"date_usage"
)
with open(dump_file, 'r', newline='', encoding="ISO-8859-1") as f:
inserted = insert_csv(f, 'physical', columns)
You need to specify columns to import. From the documentation:
columns – iterable with name of the columns to import. The length and types should match the content of the file to read. If not specified, it is assumed that the entire table matches the file structure.
Your code may look like this:
def insert_csv(f, table, columns):
connection = connect()
cursor = connection.cursor()
try:
cursor.copy_from(f, table, sep=',', columns=columns)
connection.commit()
return True
except (psycopg2.Error) as e:
print(e)
return False
finally:
cursor.close()
connection.close()
with open("path_to_my_csv") as file:
insert_csv(file, "my_table", ("upc", "date", "quantity", "customer"))
If you have to use copy_expert() modify your function in the way as follow:
def insert_csv(f, table, columns):
connection = connect()
cursor = connection.cursor()
try:
column_names = ','.join(columns)
copy_cmd = f"copy {table}({column_names}) from stdout (format csv)"
cursor.copy_expert(copy_cmd, f)
connection.commit()
return True
except (psycopg2.Error) as e:
print(e)
return False
finally:
cursor.close()
connection.close()
From here COPY:
If a column list is specified, COPY TO copies only the data in the specified columns to the file. For COPY FROM, each field in the file is inserted, in order, into the specified column. Table columns not specified in the COPY FROM column list will receive their default values.
So the values in the CSV file will be assigned left to right and the fields at the end of the table will get their DEFAULT values. If you don't want that to happen then from here copy_from:
columns – iterable with name of the columns to import. The length and types should match the content of the file to read. If not specified, it is assumed that the entire table matches the file structure.
Create a list of columns that match the file structure, leaving out the id column which will be filled with the sequence values.
I used Python and the package SQLite to create table and insert data into the table. However, there is nothing in the table after I fired the execution. Can anyone help me figure it out? Thanks.
def conSqlite():
conn = sqlite3.connect('C:\\Users\jet.cai\Documents\Logsitic.db')
json_path = r'C:\Users\jet.cai\PycharmProjects\VJSF\txtToJson.json'
try:
create_table = ('''
CREATE TABLE IF NOT EXISTS CODE2
(Delivery TEXT,
Customer_Name NCHAR(50),
Shipment_Priority TEXT
)''')
conn.execute(create_table)
except:
print("Table Failed")
return False
with open(json_path, 'r') as jsonf:
lines = json.load(jsonf)
for line in lines:
sql = "insert into CODE2(Delivery,Customer_Name,Shipment_Priority) values('%s','%s','%s')"%(line['Delivery'],line['Customer Name'],line['Shipment Priority'])
conn.execute(sql)
# No results can be selected out
df = pd.read_sql("select Delivery from CODE2", conn)
print(df)
I know there are some other posts out there, but I was not able to find the specific question I had in mind.
I'm using US_baby_names csv file. and want to import this csv file line by line into sqlite3 as a table.
I'm able to create the table called storage.
I'm then trying to read lines in the csv file and put it into that table, but I must be doing something wrong.
import sqlite3 as sql
from sqlite3 import Error
import csv
def CreateConnection ( dbFileName ):
try:
conn = sql.connect(dbFileName)
return conn
except Error as e:
print(e)
return None
def CreateNew( dbConnection, new):
sql = """INSERT INTO storage (dat, Id, Name, Year, group, subgroup, Count)
VALUES (?,?,?,?,?,?,?)"""
try:
cursor = dbConnection.cursor()
cursor.execute(sql, new)
return cursor.lastrowid
except Error as e:
print(e)
def Main():
database = "storage.db"
dbConnection = CreateConnection(database)
with open('storage.csv', 'rb') as fin:
dr = csv.DictReader(fin)
to_db = [(i['dat'], i['Id'], i['Name'], i['Year'], i['group'], i['subgroup'], i['Count']) \
for i in dr]
cursor.executemany(CreateNew(sql, to_db))
dbConnection.close()
if __name__ == "__main__":
Main()
I believe my cursor.executemany is wrong, but I'm not able to figure out what else to do..
Thanks
You are almost right with much of your code, but:
in cursor.execute(sql, new) you are passing an iterable, new, to sqlite3.execute() (which requires a simple SQL statement), instead of sqlite3.executemany().
Moreover, the result of CreateNew() is an integer, lastrowid, and you pass that result to executemany().
You must use Connection.commit() to save the changes to the database, and Connection.rollback() to discard them.
You must open the file for the csv.DictReader class as a text file, in r or rt mode.
Finally, remember that sqlite3.Connection is a context manager, so you can use it in a with statement.
This should be your desired outcome:
import sqlite3 as sql
from sqlite3 import Error
import csv
def create_table(conn):
sql = "CREATE TABLE IF NOT EXISTS baby_names("\
"dat TEXT,"\
"Id INTEGER PRIMARY KEY,"\
"Name TEXT NOT NULL,"\
"Year INTEGER NOT NULL,"\
"Gender TEXT NOT NULL,"\
"State TEXT NOT NULL,"\
"Count INTEGER)"
conn.execute(sql)
conn.execute("DELETE FROM baby_names")
def select_all(conn):
for r in conn.execute("SELECT * FROM baby_names").fetchall():
print(r)
def execute_sql_statement(conn, data):
sql = "INSERT INTO baby_names "\
"(dat, Id, Name, Year, Gender, State, Count) "\
"VALUES (?,?,?,?,?,?,?)"
try:
cursor = conn.executemany(sql, data)
except Error as e:
print(e)
conn.rollback()
return None
else:
conn.commit()
return cursor.lastrowid
def main():
with sql.connect('baby_names.db') as conn, open('US_Baby_Names_right.csv', 'r') as fin:
create_table(conn)
dr = csv.DictReader(fin)
data = [(i['dat'], i['Id'], i['Name'], i['Year'], i['Gender'], i['State'], i['Count']) for i in dr ]
lastrowid = execute_sql_statement(conn, data)
select_all(conn)
main()
I added a create_table() function just to test my code. I also made up a sample test file as follows:
dat,Id,Name,Year,Gender,State,Count
1,1,John,1998,M,Washington,2
2,2,Luke,2000,M,Arkansas,10
3,3,Carrie,1999,F,Texas,3
The output of the select_all() function is:
('1',1,'John',1998,'M','Washington',2)
('2',2,'Luke',2000,'M','Arkansas',10)
('3',3,'Carrie',1999,'F','Texas',3)
I'm migrating a script from another language to Python. I watered this down on the specifics of the database calls etc... but this is what the file looks like. I intentionally made some queries fail as I was testing the transaction and it did not rollback() the queries executed prior to the forced error. I am a little confused as how to the transactions work with Python, the example I followed was this one, it was a loop with several queries nested within transactions so I adapted the code according to what I understood from it.
#!/usr/bin/python
import MySQLdb
import thread
import os
# Open database connection
# added local_infile=1 to allow the import to work, otherwise you get an error
db = MySQLdb.connect(CONNECTION ARGS...)
# define our function that will be called from our thread
def import_queued_file(conn,distid):
# prepare a cursor object using cursor() method
cursor = conn.cursor()
# total lines imported for all files for a distributor
total_lines_imported = 0
# current lines imported for each file on each iteration
current_lines_imported = 0
# default this to 0, this will have the total lines for our imports on each iteration
previous_lines_imported = 0
# initialize the file exists flag to 0
file_exists = 0
# sql statement to retrieve the file(s) for a specific distributor
sql = """
SELECT
...
FROM ...
WHERE ...
"""
# execute the sql statement
cursor.execute(sql)
# if we have records, execute the code below
if (cursor.rowcount > 0):
# set the records to the files variable
files = cursor.fetchall()
# set a variable to count iterations
# we'll use this to determine if we need to drop the table
cnt = 0
# keep track of the total number of lines imported per distributor (may be multiple files)
lines_imported = 0
# loop the recordset
for col in files:
# increment the cnt variable
cnt += 1
# set file_exists to 0 at the beginning of the iteration
file_exists = 0
# set some variables to be used in our sql load data statement
var1 = col[1]
var2 = col[2]
....
# this is the path of our file that we will be using for MySQL LOAD DATA also
# TODO: REFACTOR SO THAT THE /home/webex/backup/ IS NOT HARD CODED
inventoryfile = "/path/to/file/%s" % (filepath)
# check to see if we have a file
if (os.path.exists(inventoryfile)):
try:
# set file exists to true
file_exists = 1
# if cnt > 1, it means we have more than 1 file for this distributor
# only drop the table if this is the first iteration
if (cnt == 1):
# drop table sql statement
sql = "DROP TABLE IF EXISTS %s" % (temptable)
# execute the sql command
cur = conn.cursor()
cur.execute(sql)
cur.close()
# assign the create table statement to the sql variable
sql = """
CREATE TABLE IF NOT EXISTS
.......
.......
) ENGINE=MyISAM DEFAULT CHARSET=utf8
""" % (temptable)
# execute the sql statement
cur = conn.cursor()
cur.execute(sql)
cur.close()
# query the temptable to see if we have any records
sql = "SELECT COUNT(0) AS total FROM %s" % (temptable)
cur = conn.cursor()
cur.execute(sql)
cur.close()
# get the count of how many records exist in the database
number_of_line_items = cur.fetchall()
previous_lines_imported = number_of_line_items[0][0]
# load data local infile sql statement
sql = """
LOAD DATA LOCAL INFILE ...
"""
# execute the load data infile sql statement
cur = conn.cursor()
cur.execute(sql)
cur.close()
# clean up the table by removing...
# rows that don't have a part_number,
# rows that have part_number's less than 3 characters
sql = """
DELETE FROM ...
""" % (temptable)
# execute the delete query
cur = conn.cursor()
cur.execute(sql)
cur.close()
# query the temptable to see if we have any records after the import
sql = "SELECT COUNT(0) AS total FROM %s" % (temptable)
# execute the count query
cur = conn.cursor()
cur.execute(sql)
cur.close()
# get the count of how many records exist in the database after the import
number_of_line_items = cur.fetchall()
# get the current lines imported
current_lines_imported = number_of_line_items[0][0] - previous_lines_imported
# add the current lines imported to the total lines imported
total_lines_imported += current_lines_imported
# update distributor_file_settings table last_updated_on field
sql = """
UPDATE ...
""" % (file_id,distributor__id)
print sql
# execute the update query
cur = conn.cursor()
cur.execute(sql)
cur.close()
# close cursor
conn.commit()
except:
conn.rollback()
# no records exists for this distributor
else:
print "dist doesn't exist"
cursor.close()
import_queued_file(db,42)
# prepare a cursor object using cursor() method
cursor = db.cursor()
# select distinct file settings
sql = """
SELECT ...
"""
# disconnect from server
db.close()
After reviewing the code again and again, the issue happened to be the table type. After changing it to INNODB it worked as expected.