how to store a jpg in an sqlite database with python - python

I've been trying for many days to find a solution to this problem. I need to write a small jpg image for each record in an sqlite database.
Finally I managed to insert the file but judging from the size it was written in the database as raw instead of a (compressed) jpg.
The code I used is:
imgobj = Image.open('./fotocopies/checks/633.jpg')
con = sqlite3.connect("pybook.db")
cur = con.cursor()
cur.execute("UPDATE data_fotocopies SET fotocopy=? WHERE refid=633 and reftype=0", [ buffer(imgobj.tobytes()) ] )
If I try to open the file it cannot be inserted in the database so, the following code:
imgobj = open('./fotocopies/checks/632.jpg')
con = sqlite3.connect("pybook.db")
cur = con.cursor()
cur.execute("UPDATE data_fotocopies SET fotocopy=? WHERE refid=632 and reftype=0", [sqlite3.Binary(imgobj)] )
gives the following error:
cur.execute("UPDATE data_fotocopies SET fotocopy=? WHERE refid=632 and reftype=0", [sqlite3.Binary(imgobj)] )
TypeError: buffer object expected
Unfortunately no previous answer in stackoverflow covers me as I've tried them all. Furthermore all the storing retrieving has to be done via a gtk3 interface which I suspect will mean another (series of) problem(s) i.e. how to set an existing image to get its data from the db response etc.
Can anyone help?

Storing and retrieving BLOBs
import sqlite3
import os.path
from os import listdir, getcwd
from IPython.core.display import Image
def get_picture_list(rel_path):
abs_path = os.path.join(os.getcwd(),rel_path)
print 'abs_path =', abs_path
dir_files = os.listdir(abs_path)
return dir_files
def create_or_open_db(db_file):
db_is_new = not os.path.exists(db_file)
conn = sqlite3.connect(db_file)
if db_is_new:
print 'Creating schema'
sql = '''create table if not exists PICTURES(
ID INTEGER PRIMARY KEY AUTOINCREMENT,
PICTURE BLOB,
TYPE TEXT,
FILE_NAME TEXT);'''
conn.execute(sql) # shortcut for conn.cursor().execute(sql)
else:
print 'Schema exists\n'
return conn
def insert_picture(conn, picture_file):
with open(picture_file, 'rb') as input_file:
ablob = input_file.read()
base=os.path.basename(picture_file)
afile, ext = os.path.splitext(base)
sql = '''INSERT INTO PICTURES
(PICTURE, TYPE, FILE_NAME)
VALUES(?, ?, ?);'''
conn.execute(sql,[sqlite3.Binary(ablob), ext, afile])
conn.commit()
def extract_picture(cursor, picture_id):
sql = "SELECT PICTURE, TYPE, FILE_NAME FROM PICTURES WHERE id = :id"
param = {'id': picture_id}
cursor.execute(sql, param)
ablob, ext, afile = cursor.fetchone()
filename = afile + ext
with open(filename, 'wb') as output_file:
output_file.write(ablob)
return filename
conn = create_or_open_db('picture_db.sqlite')
picture_file = "./pictures/Chrysanthemum50.jpg"
insert_picture(conn, picture_file)
conn.close()
conn = create_or_open_db('picture_db.sqlite')
cur = conn.cursor()
filename = extract_picture(cur, 1)
cur.close()
conn.close()
Image(filename='./'+filename)

Finally I got it working thanks to Andrej Kesely's comment. The working solution is
imgobj = base64.b64encode(open('./fotocopies/checks/624.jpg').read())
con = sqlite3.connect("pybook.db")
cur = con.cursor()
qf="UPDATE data_fotocopies SET fotocopy='%s' WHERE refid=%d AND reftype=0"%(lite.Binary(fotocopy_blob),id)
cur.execute(qf) #yes, it is dangerous for injection`
and retrieving the image from the database is done as:
qf="SELECT fotocopy FROM data_fotocopies WHERE refid=%d and reftype=0"%self.check_id
self.cur.execute(qf)
try:
fd=base64.b64decode(self.cur.fetchall()[0][0])
byting = GLib.Bytes(fd)
self.fotocopy = Gio.MemoryInputStream.new_from_bytes(byting)
...
self.fotocopy_ent=self.builder.get_object("fotocopy") # as it is made in glade
pixbuf = GdkPixbuf.Pixbuf.new_from_stream(self.fotocopy,None) #finally the pixbuf although
#it produces errors if I have
#no stream/image to "feed" it.
self.fotocopy_ent.set_from_pixbuf(pixbuf)
Still can't figure out why all other solutions I've found don't work. I use Python 2.7.6 ang gtk3, but this one I subit does.
Thank you all for your help.

Related

Python/MySQL - Rename CSV file

I created a small application to export data from my mysql database in csv, it works, but if I want to create another report is presented the following error:
pymysql.err.InternalError: (1086, "File '/TEMP/.CSV' already exists")
Yes, the file already exists. My question is, how do I generate two reports, even with the same name. Ex. hi.csv, and following hi.csv (1)
Following is the code below:
import tkinter as tk
import pymysql
root = tk.Tk()
root.geometry("")
root.title("excel teste")
conn = pymysql.connect(host="localhost", port=3306, user="root", password="", database="omnia")
with conn:
print("connect successfull!")
cursor = conn.cursor()
with cursor:
cursor.execute("SELECT VERSION()")
versao = cursor.fetchone()
print("Versão do gerenciador Maria DB: %s" % versao)
def exp_rel_con_pag():
conn = pymysql.connect(host="localhost", port=3306, user="root", password="", database="omnia")
with conn:
statm = "SELECT * FROM omniacademp INTO OUTFILE '/TEMP/"".CSV' FIELDS TERMINATED BY ',' ENCLOSED BY ''"
cursor = conn.cursor()
with cursor:
cursor.execute(statm)
results = cursor.fetchone()
print(results)
tk.Button(root, width=15, text="run", command=exp_rel_con_pag).place(x=10, y=10)
root.mainloop()
You could import the error class:
from pymysql.err import InternalError
Add a counter:
fileIndex = 0
Then see if the file already exists:
try:
statm = "SELECT * FROM omniacademp INTO OUTFILE '/TEMP/HI.CSV' FIELDS TERMINATED BY ',' ENCLOSED BY ''"
cursor.execute(statm)
except InternalError:
statm = "SELECT * FROM omniacademp INTO OUTFILE '/TEMP/HI ({}).CSV' FIELDS TERMINATED BY ',' ENCLOSED BY ''".format(fileIndex)
cursor.execute(statm)
fileIndex += 1
You need to add some level of dynamic naming. Personally I use timestamps.
For example I use openpyxl to write my excel files and datetime for my timestamp.
By using a timestamp down to seconds There is very little chance you will ever have a problem with the filename.
Here is the code I use once I have data to write.
import os
import openpyxl
from datetime import datetime as dt
list_of_data = [['row1'], ['row2'], ['row3'], ['row4']]
wb = openpyxl.Workbook() # create workbook
main_ws = wb.worksheets[0] # designate what worksheet I am working on.
for sub_list in list_of_data:
main_ws.append(sub_list) # writing data to each row.
# creating timestamp while removing special characters.
time_stamp = ''.join([{'-': '', ' ': '', ':': '', '.': ''}.get(c, c) for c in str(dt.now())])[0:12]
# build file name.
file_name = '{} - {}.xlsx'.format('report', time_stamp)
# using os library to build path to my local documents folder.
path = os.path.join(os.environ['USERPROFILE'], 'Documents', file_name)
# saving wb.
wb.save(filename=path)
As you can see I now have an excel file in my docs folder with a timestamp.

Importing a csv file to sqllite3 using python functional programming

I know there are some other posts out there, but I was not able to find the specific question I had in mind.
I'm using US_baby_names csv file. and want to import this csv file line by line into sqlite3 as a table.
I'm able to create the table called storage.
I'm then trying to read lines in the csv file and put it into that table, but I must be doing something wrong.
import sqlite3 as sql
from sqlite3 import Error
import csv
def CreateConnection ( dbFileName ):
try:
conn = sql.connect(dbFileName)
return conn
except Error as e:
print(e)
return None
def CreateNew( dbConnection, new):
sql = """INSERT INTO storage (dat, Id, Name, Year, group, subgroup, Count)
VALUES (?,?,?,?,?,?,?)"""
try:
cursor = dbConnection.cursor()
cursor.execute(sql, new)
return cursor.lastrowid
except Error as e:
print(e)
def Main():
database = "storage.db"
dbConnection = CreateConnection(database)
with open('storage.csv', 'rb') as fin:
dr = csv.DictReader(fin)
to_db = [(i['dat'], i['Id'], i['Name'], i['Year'], i['group'], i['subgroup'], i['Count']) \
for i in dr]
cursor.executemany(CreateNew(sql, to_db))
dbConnection.close()
if __name__ == "__main__":
Main()
I believe my cursor.executemany is wrong, but I'm not able to figure out what else to do..
Thanks
You are almost right with much of your code, but:
in cursor.execute(sql, new) you are passing an iterable, new, to sqlite3.execute() (which requires a simple SQL statement), instead of sqlite3.executemany().
Moreover, the result of CreateNew() is an integer, lastrowid, and you pass that result to executemany().
You must use Connection.commit() to save the changes to the database, and Connection.rollback() to discard them.
You must open the file for the csv.DictReader class as a text file, in r or rt mode.
Finally, remember that sqlite3.Connection is a context manager, so you can use it in a with statement.
This should be your desired outcome:
import sqlite3 as sql
from sqlite3 import Error
import csv
def create_table(conn):
sql = "CREATE TABLE IF NOT EXISTS baby_names("\
"dat TEXT,"\
"Id INTEGER PRIMARY KEY,"\
"Name TEXT NOT NULL,"\
"Year INTEGER NOT NULL,"\
"Gender TEXT NOT NULL,"\
"State TEXT NOT NULL,"\
"Count INTEGER)"
conn.execute(sql)
conn.execute("DELETE FROM baby_names")
def select_all(conn):
for r in conn.execute("SELECT * FROM baby_names").fetchall():
print(r)
def execute_sql_statement(conn, data):
sql = "INSERT INTO baby_names "\
"(dat, Id, Name, Year, Gender, State, Count) "\
"VALUES (?,?,?,?,?,?,?)"
try:
cursor = conn.executemany(sql, data)
except Error as e:
print(e)
conn.rollback()
return None
else:
conn.commit()
return cursor.lastrowid
def main():
with sql.connect('baby_names.db') as conn, open('US_Baby_Names_right.csv', 'r') as fin:
create_table(conn)
dr = csv.DictReader(fin)
data = [(i['dat'], i['Id'], i['Name'], i['Year'], i['Gender'], i['State'], i['Count']) for i in dr ]
lastrowid = execute_sql_statement(conn, data)
select_all(conn)
main()
I added a create_table() function just to test my code. I also made up a sample test file as follows:
dat,Id,Name,Year,Gender,State,Count
1,1,John,1998,M,Washington,2
2,2,Luke,2000,M,Arkansas,10
3,3,Carrie,1999,F,Texas,3
The output of the select_all() function is:
('1',1,'John',1998,'M','Washington',2)
('2',2,'Luke',2000,'M','Arkansas',10)
('3',3,'Carrie',1999,'F','Texas',3)

Speeding up performance when writing from pandas to sqlite

Hoping for a few pointers on how I can optimise this code up... Ideally I'd like to keep with using pandas but assume there's some nifty sqlite tricks I can use to get some good speed-up. For additional "points", would love to know if Cython could help at all here?
Incase it's not obvious from the code.. for context, I'm having to write out millions of very small sqlite files (files in "uncompressedDir") and outputting them into a much larger "master" sqlite DB ("6th jan.db").
Thanks in advance everyone!
%%cython -a
import os
import pandas as pd
import sqlite3
import time
import sys
def main():
rootDir = "/Users/harryrobinson/Desktop/dataForMartin/"
unCompressedDir = "/Users/harryrobinson/Desktop/dataForMartin/unCompressedSqlFiles/"
with sqlite3.connect(rootDir+'6thJan.db') as conn:
destCursor = conn.cursor()
createTable = "CREATE TABLE IF NOT EXISTS userData(TimeStamp, Category, Action, Parameter1Name, Parameter1Value, Parameter2Name, Parameter2Value, formatVersion, appVersion, userID, operatingSystem)"
destCursor.execute(createTable)
for i in os.listdir(unCompressedDir):
try:
with sqlite3.connect(unCompressedDir+i) as connection:
cursor = connection.cursor()
cursor.execute('SELECT * FROM Events')
df_events = pd.DataFrame(cursor.fetchall())
cursor.execute('SELECT * FROM Global')
df_global = pd.DataFrame(cursor.fetchall())
cols = ['TimeStamp', 'Category', 'Action', 'Parameter1Name', 'Parameter1Value', 'Parameter2Name', 'Parameter2Value']
df_events = df_events.drop(0,axis=1)
df_events.columns = cols
df_events['formatVersion'] = df_global.iloc[0,0]
df_events['appVersion'] = df_global.iloc[0,1]
df_events['userID'] = df_global.iloc[0,2]
df_events['operatingSystem'] = df_global.iloc[0,3]
except Exception as e:
print(e, sys.exc_info()[-1].tb_lineno)
try:
df_events.to_sql("userData", conn, if_exists="append", index=False)
except Exception as e:
print("Sqlite error, {0} - line {1}".format(e, sys.exc_info()[-1].tb_lineno))
UPDATE: halved the time by adding a transaction instead of to_sql
Reconsider using Pandas as a staging tool (leave the library for data analysis). Simply write pure SQL queries which can be accommodated by using SQLite's ATTACH to query external databases.
with sqlite3.connect(os.path.join(rootDir,'6thJan.db')) as conn:
destCursor = conn.cursor()
createTable = """CREATE TABLE IF NOT EXISTS userData(
TimeStamp TEXT, Category TEXT, Action TEXT, Parameter1Name TEXT,
Parameter1Value TEXT, Parameter2Name TEXT, Parameter2Value TEXT,
formatVersion TEXT, appVersion TEXT, userID TEXT, operatingSystem TEXT
);"""
destCursor.execute(createTable)
conn.commit()
for i in os.listdir(unCompressedDir):
destCursor.execute("ATTACH ? AS curr_db;", i)
sql = """INSERT INTO userData
SELECT e.*, g.formatVersion, g.appVersion, g.userID, g.operatingSystem
FROM curr_db.[events] e
CROSS JOIN (SELECT * FROM curr_db.[global] LIMIT 1) g;"""
destCursor.execute(sql)
conn.commit()
destCursor.execute("DETACH curr_db;")

TypeError: not enough arguments for format string while inserting into mysql database

I have a csv file like this:
nohaelprince#uwaterloo.ca, 01-05-2014
nohaelprince#uwaterloo.ca, 01-05-2014
nohaelprince#uwaterloo.ca, 01-05-2014
nohaelprince#gmail.com, 01-05-2014
I am reading the above csv file and extracting domain name and also the count of emails address by domain name and date as well. All these things I need to insert into MySQL table called domains.
Below is the code in which is giving me error as TypeError: not enough arguments for format string and it's happening when I try to insert into domains table.
#!/usr/bin/python
import fileinput
import csv
import os
import sys
import time
import MySQLdb
from collections import defaultdict, Counter
domain_counts = defaultdict(Counter)
# ======================== Defined Functions ======================
def get_file_path(filename):
currentdirpath = os.getcwd()
# get current working directory path
filepath = os.path.join(currentdirpath, filename)
return filepath
# ===========================================================
def read_CSV(filepath):
with open('emails.csv') as f:
reader = csv.reader(f)
for row in reader:
domain_counts[row[0].split('#')[1].strip()][row[1]] += 1
db = MySQLdb.connect(host="localhost", # your host, usually localhost
user="root", # your username
passwd="abcdef1234", # your password
db="test") # name of the data base
cur = db.cursor()
q = """INSERT INTO domains(domain_name, cnt, date_of_entry) VALUES(%s, %s, STR_TO_DATE(%s, '%d-%m-%Y'))"""
for domain, data in domain_counts.iteritems():
for email_date, email_count in data.iteritems():
cur.execute(q, (domain, email_count, email_date))
db.commit()
# ======================= main program =======================================
path = get_file_path('emails.csv')
read_CSV(path) # read the input file
What is wrong I am doing?
As of now my data type for date_of_entry column is date in MySQL.
You need the "%d-%m-%Y" in your sql statement in exact this way. But python (or the execute command) tries first to use it for string formatting and throws this error.
I think you have to escape it and you should try following:
q = """INSERT INTO domains(domain_name, cnt, date_of_entry) VALUES(%s, %s, STR_TO_DATE(%s, '%%d-%%m-%%Y'))"""
Try this:
q = """INSERT INTO domains(domain_name, cnt, date_of_entry) VALUES(%s, %s, STR_TO_DATE(%s, 'd-m-Y'))"""
So we have changed from STR_TO_DATE(%s, '%d-%m-%Y')) to STR_TO_DATE(%s, 'd-m-Y'))
It is detecting the %s as a format string and failing on that. You need to surround it with quotes I guess
INSERT INTO domains(domain_name, cnt, date_of_entry) VALUES(%s, %s, STR_TO_DATE('%s', '%d-%m-%Y'))

Using Python to write to HTML file with MySQL data as source?

I am trying to generate some simple html pages that contain data stored in a MySQL database. I have searched and searched for an answer to this and while I can successfully generate html pages from txt or user input I can't get it working with SQL. I am not worried about formatting the data in html or anything, that I can work out. All I would like to do is something like the following but I can't work out how to actually get the data printing to the file. Any help would be greatly appreciated.
import MySQLdb
def data():
db_connection = MySQLdb.connect(host='localhost', user='root', passwd='')
cursor = db_connection.cursor()
cursor.execute('USE inb104')
cursor.execute("SELECT Value FROM popularity WHERE Category = 'movies'")
result = cursor.fetchall()
return result
htmlFilename = 'test.html'
htmlFile = open(htmlFilename, 'w')
htmlFile.write = data
htmlFile.close()
def data():
db_connection = MySQLdb.connect(host='localhost', user='root', passwd='')
cursor = db_connection.cursor()
cursor.execute('USE inb104')
cursor.execute("SELECT Value FROM popularity WHERE Category = 'movies'")
result = cursor.fetchall()
return ' '.join(result)
If your data() returns a tuple, you may want to use join to make it into a string.
Change this:
htmlFile.write = data
to:
def formatDataAsHtml(data):
return "<br>".join(data)
htmlFile.write(formatDataAsHtml(data()))
Make sure to replace
return ' '.join(result)
with
list_of_strings = ["(%s)" % c for c in result]
return ' '.join(list_of_strings)

Categories

Resources