mysql data export for specific value using python - python

I'm trying to get specific rows of data from mysql database table1. I'm looking wherever a certain id shows up in one of the columns.
I am having trouble with my code in the SELECT WHERE statement under 'query'. When i ask for a specific id (id_2_lookup) I get the entire table exported. where am I going wrong?
# Establish a MySQL connection
from mysql.connector import MySQLConnection, Error
from python_mysql_dbconfig import read_db_config
db_config = read_db_config()
conn = MySQLConnection(**db_config)
cursor = conn.cursor()
#to export to excel
import xlsxwriter
from xlsxwriter.workbook import Workbook
#to get the datetime functions
import datetime
from datetime import datetime
#creates the workbook
workbook = xlsxwriter.Workbook('imheretoo.xlsx')
worksheet = workbook.add_worksheet()
#formatting definitions
bold = workbook.add_format({'bold': True})
date_format = workbook.add_format({'num_format': 'yyyy-mm-dd hh:mm:ss'})
timeShape = '%Y-%m-%d %H:%M:%S'
query = ("SELECT sent_time, delivered_time, customer_name, id1_active, id2_active, id3_active, id1_inactive, id2_inactive, id3_inactive, location_active, location_inactive FROM table1 ")
"WHERE id1_active '%s'"
id_2_lookup = [input('Who are you looking for:'),0]
# Execute sql Query
cursor.execute(query)
result = cursor.fetchall()
#sets up the header row
worksheet.write('A1','sent_time',bold)
worksheet.write('B1', 'delivered_time',bold)
worksheet.write('C1', 'customer_name',bold)
worksheet.write('D1', 'id1_active',bold)
worksheet.write('E1', 'id2_active',bold)
worksheet.write('F1', 'id3_active',bold)
worksheet.write('G1', 'id1_inactive',bold)
worksheet.write('H1', 'id2_inactive',bold)
worksheet.write('I1', 'id3_inactive',bold)
worksheet.write('J1', 'location_active',bold)
worksheet.write('K1', 'location_inactive',bold)
worksheet.autofilter('A1:K1')
print("sent_time", "delivered_time", "customer_name", "id1_active", "id2_active", "id3_active", "id1_inactive", "id2_inactive", "id3_inactive", "location_active", "location_inactive")
for row in result:
print(row[0], row[1], row[2], row[3], row[4], row[5], row[6], row[7], row[8], row[9],row[10])
# Create a For loop to iterate through each row in the XLS file, starting at row 2 to skip the headers
for r, row in enumerate(result, start=1): #where you want to start printing results inside workbook
for c, col in enumerate(row):
worksheet.write_datetime(r,0,row[0], date_format)
worksheet.write_datetime(r,1, row[1], date_format)
worksheet.write(r,2, row[2])
worksheet.write(r,3, row[3])
worksheet.write(r,4, row[4])
worksheet.write(r,5, row[5])
worksheet.write(r,6, row[6])
worksheet.write(r,7, row[7])
worksheet.write(r,8, row[8])
worksheet.write(r,9, row[9])
worksheet.write(r,10, row[10])
#close out everything and save
cursor.close()
workbook.close()
conn.close()
#print number of rows and bye-bye message
print ("- - - - - - - - - - - - -")
rows = len(result)
print ("I just imported "+ str(rows) + " rows from MySQL!")
print ("")
print ("Good to Go!!!")
print ("")
I want to give you all the information I have with all the other files and what I'm shooting for to see if you can replicate and actually get it working just to see if it's not just my system or some configuration I have..
I have a database with one table.Lets call it ‘table1’ The table is broken down with columns like this:
sent_time | delivered_time |id1_active |id2_active |id3_active |id1_inactive |id2_inactive |id3_inactive |location_active |location_inactive …..`lots more
Lets say that these are two or more customers delivering goods to and from each other. Each customer has three id#s.
I created a ‘config.ini’ file to make my life a bit easier
[mysql]
host = localhost
database = db_name
user = root
password = blahblah
I created a ‘python_mysql_dbconfig.py’
from configparser import ConfigParser
def read_db_config(filename=’config.ini’, section=’mysql’):
“”” Read database configuration file and return a dictionary object
:param filename: name of the configuration file
:param section: section of database configuration
:return: a dictionary of database parameters
“””
# create parser and read ini configuration file
parser = ConfigParser()
parser.read(filename)
# get section, default to mysql
db = {}
if parser.has_section(section):
items = parser.items(section)
for item in items:
db[item[0]] = item[1]
else:
raise Exception(‘{0} not found in the {1} file’.format(section, filename))
return db
I now have the code we've been working on but now updated with what changes we had made but am still getting errors...
# Establish a MySQL connection
from mysql.connector import MySQLConnection, Error
from python_mysql_dbconfig import read_db_config
db_config = read_db_config()
conn = MySQLConnection(**db_config)
cursor = conn.cursor()
#to export to excel
import xlsxwriter
from xlsxwriter.workbook import Workbook
#to get the datetime functions
import datetime
from datetime import datetime
#creates the workbook
workbook = xlsxwriter.Workbook('imheretoo.xlsx')
worksheet = workbook.add_worksheet()
#formatting definitions
bold = workbook.add_format({'bold': True})
date_format = workbook.add_format({'num_format': 'yyyy-mm-dd hh:mm:ss'})
timeShape = '%Y-%m-%d %H:%M:%S'
#actual query
id_2_lookup= [input('Who are you looking for:'),0]
query = (
"""SELECT sent_time, delivered_time, customer_name, id1_active, id2_active, id3_active, id1_inactive, id2_inactive, id3_inactive, location_active, location_inactive FROM table1 "
"WHERE id1_active = %s""",(id_2_lookup)
)
# Execute sql Query
query = query % id_2_lookup
cursor.execute(query)
result = cursor.fetchall()
#sets up the header row
worksheet.write('A1','sent_time',bold)
worksheet.write('B1', 'delivered_time',bold)
worksheet.write('C1', 'customer_name',bold)
worksheet.write('D1', 'id1_active',bold)
worksheet.write('E1', 'id2_active',bold)
worksheet.write('F1', 'id3_active',bold)
worksheet.write('G1', 'id1_inactive',bold)
worksheet.write('H1', 'id2_inactive',bold)
worksheet.write('I1', 'id3_inactive',bold)
worksheet.write('J1', 'location_active',bold)
worksheet.write('K1', 'location_inactive',bold)
worksheet.autofilter('A1:K1')
print("sent_time", "delivered_time", "customer_name", "id1_active", "id2_active", "id3_active", "id1_inactive", "id2_inactive", "id3_inactive", "location_active", "location_inactive")
for row in result:
print(row[0], row[1], row[2], row[3], row[4], row[5], row[6], row[7], row[8], row[9],row[10])
# Create a For loop to iterate through each row in the XLS file, starting at row 2 to skip the headers
for r, row in enumerate(result, start=1): #where you want to start printing results inside workbook
for c, col in enumerate(row):
worksheet.write_datetime(r,0,row[0], date_format)
worksheet.write_datetime(r,1, row[1], date_format)
worksheet.write(r,2, row[2])
worksheet.write(r,3, row[3])
worksheet.write(r,4, row[4])
worksheet.write(r,5, row[5])
worksheet.write(r,6, row[6])
worksheet.write(r,7, row[7])
worksheet.write(r,8, row[8])
worksheet.write(r,9, row[9])
worksheet.write(r,10, row[10])
#close out everything and save
cursor.close()
workbook.close()
conn.close()
#print number of rows and bye-bye message
print ("- - - - - - - - - - - - -")
rows = len(result)
print ("I just imported "+ str(rows) + " rows from MySQL!")
print ("")
print ("Good to Go!!!")
print ("")

In the code you originally posted, the where clause isn't actually being included as part of the query. What I didn't notice at first was that you're including the results of your input call in a list that you're then trying to feed to the cursor.execute call. That won't work since you only have one marker to replace in your query string. If we assume that the user is entering the value for id1_active, then I think you want the following:
query = (
"SELECT sent_time, delivered_time, customer_name, id1_active, id2_active, id3_active, id1_inactive, id2_inactive, id3_inactive, location_active, location_inactive FROM table1 "
"WHERE id1_active = %s"
)
id_2_lookup = input('Who are you looking for:')
# Execute sql Query
cursor.execute(query, (id_2_lookup, ))
This will try to match the user entered data against id1_active. Obviously, if that's not what you intended you'll have to adjust your query as necessary.
You could, in principle, insert the input values yourself and not rely on cursor.execute to do it for you. In that case,
query = (
"SELECT sent_time, delivered_time, customer_name, id1_active, id2_active, id3_active, id1_inactive, id2_inactive, id3_inactive, location_active, location_inactive FROM table1 "
"WHERE id1_active = %s"
)
id_2_lookup = input('Who are you looking for:')
# Execute sql Query
query = query % id_2_lookup
cursor.execute(query)
This will insert the value directly into the query so that cursor.execute only sees the query string and doesn't need to do any replacement. Generally speaking, though, your way is better.

Related

Python/MySQL - Rename CSV file

I created a small application to export data from my mysql database in csv, it works, but if I want to create another report is presented the following error:
pymysql.err.InternalError: (1086, "File '/TEMP/.CSV' already exists")
Yes, the file already exists. My question is, how do I generate two reports, even with the same name. Ex. hi.csv, and following hi.csv (1)
Following is the code below:
import tkinter as tk
import pymysql
root = tk.Tk()
root.geometry("")
root.title("excel teste")
conn = pymysql.connect(host="localhost", port=3306, user="root", password="", database="omnia")
with conn:
print("connect successfull!")
cursor = conn.cursor()
with cursor:
cursor.execute("SELECT VERSION()")
versao = cursor.fetchone()
print("Versão do gerenciador Maria DB: %s" % versao)
def exp_rel_con_pag():
conn = pymysql.connect(host="localhost", port=3306, user="root", password="", database="omnia")
with conn:
statm = "SELECT * FROM omniacademp INTO OUTFILE '/TEMP/"".CSV' FIELDS TERMINATED BY ',' ENCLOSED BY ''"
cursor = conn.cursor()
with cursor:
cursor.execute(statm)
results = cursor.fetchone()
print(results)
tk.Button(root, width=15, text="run", command=exp_rel_con_pag).place(x=10, y=10)
root.mainloop()
You could import the error class:
from pymysql.err import InternalError
Add a counter:
fileIndex = 0
Then see if the file already exists:
try:
statm = "SELECT * FROM omniacademp INTO OUTFILE '/TEMP/HI.CSV' FIELDS TERMINATED BY ',' ENCLOSED BY ''"
cursor.execute(statm)
except InternalError:
statm = "SELECT * FROM omniacademp INTO OUTFILE '/TEMP/HI ({}).CSV' FIELDS TERMINATED BY ',' ENCLOSED BY ''".format(fileIndex)
cursor.execute(statm)
fileIndex += 1
You need to add some level of dynamic naming. Personally I use timestamps.
For example I use openpyxl to write my excel files and datetime for my timestamp.
By using a timestamp down to seconds There is very little chance you will ever have a problem with the filename.
Here is the code I use once I have data to write.
import os
import openpyxl
from datetime import datetime as dt
list_of_data = [['row1'], ['row2'], ['row3'], ['row4']]
wb = openpyxl.Workbook() # create workbook
main_ws = wb.worksheets[0] # designate what worksheet I am working on.
for sub_list in list_of_data:
main_ws.append(sub_list) # writing data to each row.
# creating timestamp while removing special characters.
time_stamp = ''.join([{'-': '', ' ': '', ':': '', '.': ''}.get(c, c) for c in str(dt.now())])[0:12]
# build file name.
file_name = '{} - {}.xlsx'.format('report', time_stamp)
# using os library to build path to my local documents folder.
path = os.path.join(os.environ['USERPROFILE'], 'Documents', file_name)
# saving wb.
wb.save(filename=path)
As you can see I now have an excel file in my docs folder with a timestamp.

Python code not creating tables on the database but able to query the results postgres

My usecase is to write create a temp table in the postgres database and fetch records from it and insert into a different table.
The code i used is:
import psycopg2
import sys
import pprint
from __future__ import print_function
from os.path import join,dirname,abspath
import xlrd
import os.path
newlist = []
itemidlist = []
def main():
conn_string = "host='prod-dump.cvv9i14mrv4k.us-east-1.rds.amazonaws.com' dbname='ebdb' user='ebroot' password='*********'"
# print the connection string we will use to connect
# print "Connecting to database" % (conn_string)
# get a connection, if a connect cannot be made an exception will be raised here
conn = psycopg2.connect(conn_string)
# conn.cursor will return a cursor object, you can use this cursor to perform queries
cursor = conn.cursor()
dealer_id = input("Please enter dealer_id: ")
group_id = input("Please enter group_id: ")
scriptpath = os.path.dirname('__file__')
filename = os.path.join(scriptpath, 'Winco - Gusti.xlsx')
xl_workbook = xlrd.open_workbook(filename, "rb")
xl_sheet = xl_workbook.sheet_by_index(0)
print('Sheet Name: %s' % xl_sheet.name)
row=xl_sheet.row(0)
from xlrd.sheet import ctype_text
print('(Column #) type:value')
for idx, cell_obj in enumerate(row):
cell_type_str = ctype_text.get(cell_obj.ctype, 'unknown type')
#print('(%s) %s %s' % (idx, cell_type_str, cell_obj.value))
num_cols = xl_sheet.ncols
for row_idx in range(0, xl_sheet.nrows): # Iterate through rows
num_cols = xl_sheet.ncols
id_obj = xl_sheet.cell(row_idx, 1) # Get cell object by row, col
itemid = id_obj.value
#if itemid not in itemidlist:
itemidlist.append(itemid)
# execute our Query
'''
cursor.execute("""
if not exists(SELECT 1 FROM model_enable AS c WHERE c.name = %s);
BEGIN;
INSERT INTO model_enable (name) VALUES (%s)
END;
""" %(itemid,itemid))
'''
cursor.execute("drop table temp_mbp1")
try:
cursor.execute("SELECT p.model_no, pc.id as PCid, g.id AS GROUPid into public.temp_mbp1 FROM products p, \
model_enable me, products_clients pc, groups g WHERE p.model_no = me.name \
and p.id = pc.product_id and pc.client_id = %s and pc.client_id = g.client_id and g.id = %s"\
% (dealer_id,group_id)
except (Exception, psycopg2.DatabaseError) as error:
print(error)
cursor.execute("select count(*) from public.temp_mbp1")
# retrieve the records from the database
records = cursor.fetchall()
# print out the records using pretty print
# note that the NAMES of the columns are not shown, instead just indexes.
# for most people this isn't very useful so we'll show you how to return
# columns as a dictionary (hash) in the next example.
pprint.pprint(records)
if __name__ == "__main__":
main()
The try except block in between the program is not throwing any error but the table is not getting created in the postgres database as i see in the data admin.
The output shown is:
Please enter dealer_id: 90
Please enter group_id: 13
Sheet Name: Winco Full 8_15_17
(Column #) type:value
[(3263,)]
Thanks,
Santosh
You didn't commit the changes, so they aren't saved in the database. Add to the bottom, just below the pprint statement:
conn.commit()

Postgresql: how to copy multiple columns from one table to another?

I am trying to copy some columns from a table called temporarytable to another one called scalingData using psycopg2 in python.
scalingData is a pandas dataframe. The dataframe contains data from cities such as: nameOfCities, population, etc.
scalingData = pd.read_csv('myFile.csv') ## 'myFile.csv' is the datasource
each column of the dataframe has a different kind of data, such as 'int64', 'float64' or 'O'.
Here a screen shot from Jupyter
import psycopg2 as ps
## Populate table scalingData
tmp = scalingData.dtypes
con = None
con = ps.connect(dbname = 'mydb', user='postgres', host='localhost', password='mypd')
con.autocommit = True
cur = con.cursor()
for i in range(0,5):
j = header[i]
stat = """ ALTER TABLE "scalingData" ADD COLUMN "%s" """%j
if tmp[i] == 'int64':
stat = stat+'bigint'
if tmp[i] == 'float64':
stat = stat+'double precision'
if tmp[i] == 'O':
stat = stat+'text'
### Add Column
cur.execute(stat)
stat1 = """INSERT INTO "scalingData" ("%s") SELECT "%s" FROM temporarytable"""%(j,j)
### Copy Column
cur.execute(stat1)
cur.close()
con.close()
My problem is that if I look at scalingData only the first column is copied while the others are empty.
Here a screenshot of the table from pgAdmin afer the query:
Also if I copy for instance the second column as first column it works, but then it fails with the others as well.
This happens because you add 1 field to your new table, than insert data only with that field set up, and you do it 5 times. So you should actually see 5 copies of your original table with only 1 field set up.
You need to first set up the structure for your scalingData table, then insert all the records with all fields.
Here is the code (not a Python developer):
import psycopg2 as ps
## Populate table scalingData
tmp = scalingData.dtypes
con = None
con = ps.connect(dbname = 'mydb', user='postgres', host='localhost', password='mypd')
con.autocommit = True
cur = con.cursor()
for i in range(0,5):
j = header[i]
stat = """ ALTER TABLE "scalingData" ADD COLUMN "%s" """%j
if tmp[i] == 'int64':
stat = stat+'bigint'
if tmp[i] == 'float64':
stat = stat+'double precision'
if tmp[i] == 'O':
stat = stat+'text'
### Add Column
cur.execute(stat)
fieldsStr = '"' + '", "'.join([header]) + '"' ### will return "header1", "header2", ... , "header5"
stat1 = """INSERT INTO "scalingData" (%s) SELECT %s FROM temporarytable"""%(fieldsStr,fieldsStr)
### Copy Table
cur.execute(stat1)
cur.close()
con.close()
I'm not familiar with Python, but just a guess as to where the issue might be coming from:
"""INSERT INTO "scalingData" ("%s") SELECT "%s" FROM temporarytable"""
... will transform the "%s" bit into "foo, bar, baz" rather than "foo", "bar", "baz".
Put another way you should remove the unneeded double quotes in your statement and escape the individual column names instead.
Double quotes are used in PG to quote identifiers. You can literally have an table or column called "foo, bar, baz" and PG will work just fine when you do - provided it's always in-between double quotes when you use it in a statement.

INSERT IGNORE don't work on my script

I have this python script who inserts xcell data into a mysql db, but I need to only inserts rows that aren't duplicates, so I used the INSERT IGNORE sql method also tryed with ON DUPLICATE KEY UPDATE, but it doesn't work, it just insert all the data that is in the table. This is the script:
import xlrd
import MySQLdb
#Seting the database connection
database = MySQLdb.connect (host="localhost", user = "root", passwd = "****", db = "python_insert")
cursor = database.cursor()
query = """INSERT IGNORE INTO test (masina,data_ora,conbustibil) VALUES (%s,%s,%s)"""
#Open and parse the xcell file
book = xlrd.open_workbook("asset/testing.xlsx")
sheet = book.sheet_by_name("Report")
for r in range(1, sheet.nrows):
masina = sheet.cell(r, 1).value
data_ora = sheet.cell(r, 0).value
conbustibil = sheet.cell(r, 8).value
values = (masina, data_ora, conbustibil)
cursor.execute(query, values)
#Closing cursor+database
cursor.close()
database.commit()
database.close()
print "Succes"
columns = str(sheet.ncols)
rows = str(sheet.nrows)
print ("Am importat " + columns + " coloane si " + rows + " randuri in MySQL")

Using Python to pull data from MySQL database that fall between two datetimes

I am trying to figure out how to pull data from a table that has a column that is called 'sent_time' and the datetime falls between two datetimes. The format, as you can see in the code, is 'YYYY-dd-MM HH:mm:ss' . I keep getting different errors depending on how I change the code. Could you please tell me where I'm going wrong?
I want to give you all the information I have with all the other files and what I'm shooting for to see if you can replicate and actually get it working just to see if it's not just my system or some configuration I have..
I have a database with one table.Lets call it ‘table1’ The table is broken down with columns like this:
sent_time | delivered_time |id1_active |id2_active |id3_active |id1_inactive |id2_inactive |id3_inactive |location_active |location_inactive …..`lots more
Lets say that these are two or more customers delivering goods to and from each other. Each customer has three id#s.
I created a ‘config.ini’ file to make my life a bit easier
[mysql]
host = localhost
database = db_name
user = root
password = blahblah
I created a ‘python_mysql_dbconfig.py’
from configparser import ConfigParser
def read_db_config(filename=’config.ini’, section=’mysql’):
“”” Read database configuration file and return a dictionary object
:param filename: name of the configuration file
:param section: section of database configuration
:return: a dictionary of database parameters
“””
# create parser and read ini configuration file
parser = ConfigParser()
parser.read(filename)
# get section, default to mysql
db = {}
if parser.has_section(section):
items = parser.items(section)
for item in items:
db[item[0]] = item[1]
else:
raise Exception(‘{0} not found in the {1} file’.format(section, filename))
return db
And now this is the code that I've been working on that is supposed to pull the rows that fall between two datetimes. But I keep getting this error:
**Traceback (most recent call last):
File "C:\Python34\timerange.py", line 33, in <module>
dt_start = datetime.strptime(userIn,timeShape)
TypeError: must be str, not tuple**
Can you take a look at this and tell me what I'm doing wrong?
timerange.py
# Establish a MySQL connection
from mysql.connector import MySQLConnection, Error
from python_mysql_dbconfig import read_db_config
db_config = read_db_config()
conn = MySQLConnection(**db_config)
import xlsxwriter
from xlsxwriter.workbook import Workbook
import datetime
from datetime import datetime
cursor = conn.cursor()
#creates the workbook
workbook = xlsxwriter.Workbook('imhere.xlsx')
worksheet = workbook.add_worksheet()
#formatting definitions
bold = workbook.add_format({'bold': True})
date_format = workbook.add_format({'num_format': 'yyyy-mm-dd hh:mm:ss'})
timeShape = '%Y-%m-%d %H:%M:%S'
#actual query
userIn = [input('start date:'),0]
userEnd = [input('end date:'),1]
query = (
"SELECT sent_time, delivered_time, customer_name, id1_active, id2_active, id3_active, id1_inactive, id2_inactive, id3_inactive, location_active, location_inactive FROM table1 "
"WHERE sent_time BETWEEN %s AND %s"
)
dt_start = datetime.strptime(userIn,timeShape)
dt_end = datetime.strptime(userEnd, timeShape)
# Execute sql Query
cursor.execute(query, (dt_start, dt_end))
result = cursor.fetchall()
#sets up the header row
worksheet.write('A1','sent_time',bold)
worksheet.write('B1', 'delivered_time',bold)
worksheet.write('C1', 'customer_name',bold)
worksheet.write('D1', 'id1_active',bold)
worksheet.write('E1', 'id2_active',bold)
worksheet.write('F1', 'id3_active',bold)
worksheet.write('G1', 'id1_inactive',bold)
worksheet.write('H1', 'id2_inactive',bold)
worksheet.write('I1', 'id3_inactive',bold)
worksheet.write('J1', 'location_active',bold)
worksheet.write('K1', 'location_inactive',bold)
worksheet.autofilter('A1:K1')
print("sent_time", "delivered_time", "customer_name", "id1_active", "id2_active", "id3_active", "id1_inactive", "id2_inactive", "id3_inactive", "location_active", "location_inactive")
for row in result:
print(row[0], row[1], row[2], row[3], row[4], row[5], row[6], row[7], row[8], row[9],row[10])
# Create a For loop to iterate through each row in the XLS file, starting at row 2 to skip the headers
for r, row in enumerate(result, start=1): #where you want to start printing results inside workbook
for c, col in enumerate(row):
worksheet.write_datetime(r,0,row[0], date_format)
worksheet.write_datetime(r,1, row[1], date_format)
worksheet.write(r,2, row[2])
worksheet.write(r,3, row[3])
worksheet.write(r,4, row[4])
worksheet.write(r,5, row[5])
worksheet.write(r,6, row[6])
worksheet.write(r,7, row[7])
worksheet.write(r,8, row[8])
worksheet.write(r,9, row[9])
worksheet.write(r,10, row[10])
#close out everything and save
cursor.close()
workbook.close()
conn.close()
#print number of rows and bye-bye message
print ("- - - - - - - - - - - - -")
rows = len(result)
print ("I just imported "+ str(rows) + " rows from MySQL!")
print ("")
print ("Good to Go!!!")
print ("")
I used dateutil.parser.parse in my inputs and was able to get some results...I have a new error but I will be posting it on a now question. Thank you for your help.

Categories

Resources