mysql data export for specific value using python - python
I'm trying to get specific rows of data from mysql database table1. I'm looking wherever a certain id shows up in one of the columns.
I am having trouble with my code in the SELECT WHERE statement under 'query'. When i ask for a specific id (id_2_lookup) I get the entire table exported. where am I going wrong?
# Establish a MySQL connection
from mysql.connector import MySQLConnection, Error
from python_mysql_dbconfig import read_db_config
db_config = read_db_config()
conn = MySQLConnection(**db_config)
cursor = conn.cursor()
#to export to excel
import xlsxwriter
from xlsxwriter.workbook import Workbook
#to get the datetime functions
import datetime
from datetime import datetime
#creates the workbook
workbook = xlsxwriter.Workbook('imheretoo.xlsx')
worksheet = workbook.add_worksheet()
#formatting definitions
bold = workbook.add_format({'bold': True})
date_format = workbook.add_format({'num_format': 'yyyy-mm-dd hh:mm:ss'})
timeShape = '%Y-%m-%d %H:%M:%S'
query = ("SELECT sent_time, delivered_time, customer_name, id1_active, id2_active, id3_active, id1_inactive, id2_inactive, id3_inactive, location_active, location_inactive FROM table1 ")
"WHERE id1_active '%s'"
id_2_lookup = [input('Who are you looking for:'),0]
# Execute sql Query
cursor.execute(query)
result = cursor.fetchall()
#sets up the header row
worksheet.write('A1','sent_time',bold)
worksheet.write('B1', 'delivered_time',bold)
worksheet.write('C1', 'customer_name',bold)
worksheet.write('D1', 'id1_active',bold)
worksheet.write('E1', 'id2_active',bold)
worksheet.write('F1', 'id3_active',bold)
worksheet.write('G1', 'id1_inactive',bold)
worksheet.write('H1', 'id2_inactive',bold)
worksheet.write('I1', 'id3_inactive',bold)
worksheet.write('J1', 'location_active',bold)
worksheet.write('K1', 'location_inactive',bold)
worksheet.autofilter('A1:K1')
print("sent_time", "delivered_time", "customer_name", "id1_active", "id2_active", "id3_active", "id1_inactive", "id2_inactive", "id3_inactive", "location_active", "location_inactive")
for row in result:
print(row[0], row[1], row[2], row[3], row[4], row[5], row[6], row[7], row[8], row[9],row[10])
# Create a For loop to iterate through each row in the XLS file, starting at row 2 to skip the headers
for r, row in enumerate(result, start=1): #where you want to start printing results inside workbook
for c, col in enumerate(row):
worksheet.write_datetime(r,0,row[0], date_format)
worksheet.write_datetime(r,1, row[1], date_format)
worksheet.write(r,2, row[2])
worksheet.write(r,3, row[3])
worksheet.write(r,4, row[4])
worksheet.write(r,5, row[5])
worksheet.write(r,6, row[6])
worksheet.write(r,7, row[7])
worksheet.write(r,8, row[8])
worksheet.write(r,9, row[9])
worksheet.write(r,10, row[10])
#close out everything and save
cursor.close()
workbook.close()
conn.close()
#print number of rows and bye-bye message
print ("- - - - - - - - - - - - -")
rows = len(result)
print ("I just imported "+ str(rows) + " rows from MySQL!")
print ("")
print ("Good to Go!!!")
print ("")
I want to give you all the information I have with all the other files and what I'm shooting for to see if you can replicate and actually get it working just to see if it's not just my system or some configuration I have..
I have a database with one table.Lets call it ‘table1’ The table is broken down with columns like this:
sent_time | delivered_time |id1_active |id2_active |id3_active |id1_inactive |id2_inactive |id3_inactive |location_active |location_inactive …..`lots more
Lets say that these are two or more customers delivering goods to and from each other. Each customer has three id#s.
I created a ‘config.ini’ file to make my life a bit easier
[mysql]
host = localhost
database = db_name
user = root
password = blahblah
I created a ‘python_mysql_dbconfig.py’
from configparser import ConfigParser
def read_db_config(filename=’config.ini’, section=’mysql’):
“”” Read database configuration file and return a dictionary object
:param filename: name of the configuration file
:param section: section of database configuration
:return: a dictionary of database parameters
“””
# create parser and read ini configuration file
parser = ConfigParser()
parser.read(filename)
# get section, default to mysql
db = {}
if parser.has_section(section):
items = parser.items(section)
for item in items:
db[item[0]] = item[1]
else:
raise Exception(‘{0} not found in the {1} file’.format(section, filename))
return db
I now have the code we've been working on but now updated with what changes we had made but am still getting errors...
# Establish a MySQL connection
from mysql.connector import MySQLConnection, Error
from python_mysql_dbconfig import read_db_config
db_config = read_db_config()
conn = MySQLConnection(**db_config)
cursor = conn.cursor()
#to export to excel
import xlsxwriter
from xlsxwriter.workbook import Workbook
#to get the datetime functions
import datetime
from datetime import datetime
#creates the workbook
workbook = xlsxwriter.Workbook('imheretoo.xlsx')
worksheet = workbook.add_worksheet()
#formatting definitions
bold = workbook.add_format({'bold': True})
date_format = workbook.add_format({'num_format': 'yyyy-mm-dd hh:mm:ss'})
timeShape = '%Y-%m-%d %H:%M:%S'
#actual query
id_2_lookup= [input('Who are you looking for:'),0]
query = (
"""SELECT sent_time, delivered_time, customer_name, id1_active, id2_active, id3_active, id1_inactive, id2_inactive, id3_inactive, location_active, location_inactive FROM table1 "
"WHERE id1_active = %s""",(id_2_lookup)
)
# Execute sql Query
query = query % id_2_lookup
cursor.execute(query)
result = cursor.fetchall()
#sets up the header row
worksheet.write('A1','sent_time',bold)
worksheet.write('B1', 'delivered_time',bold)
worksheet.write('C1', 'customer_name',bold)
worksheet.write('D1', 'id1_active',bold)
worksheet.write('E1', 'id2_active',bold)
worksheet.write('F1', 'id3_active',bold)
worksheet.write('G1', 'id1_inactive',bold)
worksheet.write('H1', 'id2_inactive',bold)
worksheet.write('I1', 'id3_inactive',bold)
worksheet.write('J1', 'location_active',bold)
worksheet.write('K1', 'location_inactive',bold)
worksheet.autofilter('A1:K1')
print("sent_time", "delivered_time", "customer_name", "id1_active", "id2_active", "id3_active", "id1_inactive", "id2_inactive", "id3_inactive", "location_active", "location_inactive")
for row in result:
print(row[0], row[1], row[2], row[3], row[4], row[5], row[6], row[7], row[8], row[9],row[10])
# Create a For loop to iterate through each row in the XLS file, starting at row 2 to skip the headers
for r, row in enumerate(result, start=1): #where you want to start printing results inside workbook
for c, col in enumerate(row):
worksheet.write_datetime(r,0,row[0], date_format)
worksheet.write_datetime(r,1, row[1], date_format)
worksheet.write(r,2, row[2])
worksheet.write(r,3, row[3])
worksheet.write(r,4, row[4])
worksheet.write(r,5, row[5])
worksheet.write(r,6, row[6])
worksheet.write(r,7, row[7])
worksheet.write(r,8, row[8])
worksheet.write(r,9, row[9])
worksheet.write(r,10, row[10])
#close out everything and save
cursor.close()
workbook.close()
conn.close()
#print number of rows and bye-bye message
print ("- - - - - - - - - - - - -")
rows = len(result)
print ("I just imported "+ str(rows) + " rows from MySQL!")
print ("")
print ("Good to Go!!!")
print ("")
In the code you originally posted, the where clause isn't actually being included as part of the query. What I didn't notice at first was that you're including the results of your input call in a list that you're then trying to feed to the cursor.execute call. That won't work since you only have one marker to replace in your query string. If we assume that the user is entering the value for id1_active, then I think you want the following:
query = (
"SELECT sent_time, delivered_time, customer_name, id1_active, id2_active, id3_active, id1_inactive, id2_inactive, id3_inactive, location_active, location_inactive FROM table1 "
"WHERE id1_active = %s"
)
id_2_lookup = input('Who are you looking for:')
# Execute sql Query
cursor.execute(query, (id_2_lookup, ))
This will try to match the user entered data against id1_active. Obviously, if that's not what you intended you'll have to adjust your query as necessary.
You could, in principle, insert the input values yourself and not rely on cursor.execute to do it for you. In that case,
query = (
"SELECT sent_time, delivered_time, customer_name, id1_active, id2_active, id3_active, id1_inactive, id2_inactive, id3_inactive, location_active, location_inactive FROM table1 "
"WHERE id1_active = %s"
)
id_2_lookup = input('Who are you looking for:')
# Execute sql Query
query = query % id_2_lookup
cursor.execute(query)
This will insert the value directly into the query so that cursor.execute only sees the query string and doesn't need to do any replacement. Generally speaking, though, your way is better.
Related
Python/MySQL - Rename CSV file
I created a small application to export data from my mysql database in csv, it works, but if I want to create another report is presented the following error: pymysql.err.InternalError: (1086, "File '/TEMP/.CSV' already exists") Yes, the file already exists. My question is, how do I generate two reports, even with the same name. Ex. hi.csv, and following hi.csv (1) Following is the code below: import tkinter as tk import pymysql root = tk.Tk() root.geometry("") root.title("excel teste") conn = pymysql.connect(host="localhost", port=3306, user="root", password="", database="omnia") with conn: print("connect successfull!") cursor = conn.cursor() with cursor: cursor.execute("SELECT VERSION()") versao = cursor.fetchone() print("Versão do gerenciador Maria DB: %s" % versao) def exp_rel_con_pag(): conn = pymysql.connect(host="localhost", port=3306, user="root", password="", database="omnia") with conn: statm = "SELECT * FROM omniacademp INTO OUTFILE '/TEMP/"".CSV' FIELDS TERMINATED BY ',' ENCLOSED BY ''" cursor = conn.cursor() with cursor: cursor.execute(statm) results = cursor.fetchone() print(results) tk.Button(root, width=15, text="run", command=exp_rel_con_pag).place(x=10, y=10) root.mainloop()
You could import the error class: from pymysql.err import InternalError Add a counter: fileIndex = 0 Then see if the file already exists: try: statm = "SELECT * FROM omniacademp INTO OUTFILE '/TEMP/HI.CSV' FIELDS TERMINATED BY ',' ENCLOSED BY ''" cursor.execute(statm) except InternalError: statm = "SELECT * FROM omniacademp INTO OUTFILE '/TEMP/HI ({}).CSV' FIELDS TERMINATED BY ',' ENCLOSED BY ''".format(fileIndex) cursor.execute(statm) fileIndex += 1
You need to add some level of dynamic naming. Personally I use timestamps. For example I use openpyxl to write my excel files and datetime for my timestamp. By using a timestamp down to seconds There is very little chance you will ever have a problem with the filename. Here is the code I use once I have data to write. import os import openpyxl from datetime import datetime as dt list_of_data = [['row1'], ['row2'], ['row3'], ['row4']] wb = openpyxl.Workbook() # create workbook main_ws = wb.worksheets[0] # designate what worksheet I am working on. for sub_list in list_of_data: main_ws.append(sub_list) # writing data to each row. # creating timestamp while removing special characters. time_stamp = ''.join([{'-': '', ' ': '', ':': '', '.': ''}.get(c, c) for c in str(dt.now())])[0:12] # build file name. file_name = '{} - {}.xlsx'.format('report', time_stamp) # using os library to build path to my local documents folder. path = os.path.join(os.environ['USERPROFILE'], 'Documents', file_name) # saving wb. wb.save(filename=path) As you can see I now have an excel file in my docs folder with a timestamp.
Python code not creating tables on the database but able to query the results postgres
My usecase is to write create a temp table in the postgres database and fetch records from it and insert into a different table. The code i used is: import psycopg2 import sys import pprint from __future__ import print_function from os.path import join,dirname,abspath import xlrd import os.path newlist = [] itemidlist = [] def main(): conn_string = "host='prod-dump.cvv9i14mrv4k.us-east-1.rds.amazonaws.com' dbname='ebdb' user='ebroot' password='*********'" # print the connection string we will use to connect # print "Connecting to database" % (conn_string) # get a connection, if a connect cannot be made an exception will be raised here conn = psycopg2.connect(conn_string) # conn.cursor will return a cursor object, you can use this cursor to perform queries cursor = conn.cursor() dealer_id = input("Please enter dealer_id: ") group_id = input("Please enter group_id: ") scriptpath = os.path.dirname('__file__') filename = os.path.join(scriptpath, 'Winco - Gusti.xlsx') xl_workbook = xlrd.open_workbook(filename, "rb") xl_sheet = xl_workbook.sheet_by_index(0) print('Sheet Name: %s' % xl_sheet.name) row=xl_sheet.row(0) from xlrd.sheet import ctype_text print('(Column #) type:value') for idx, cell_obj in enumerate(row): cell_type_str = ctype_text.get(cell_obj.ctype, 'unknown type') #print('(%s) %s %s' % (idx, cell_type_str, cell_obj.value)) num_cols = xl_sheet.ncols for row_idx in range(0, xl_sheet.nrows): # Iterate through rows num_cols = xl_sheet.ncols id_obj = xl_sheet.cell(row_idx, 1) # Get cell object by row, col itemid = id_obj.value #if itemid not in itemidlist: itemidlist.append(itemid) # execute our Query ''' cursor.execute(""" if not exists(SELECT 1 FROM model_enable AS c WHERE c.name = %s); BEGIN; INSERT INTO model_enable (name) VALUES (%s) END; """ %(itemid,itemid)) ''' cursor.execute("drop table temp_mbp1") try: cursor.execute("SELECT p.model_no, pc.id as PCid, g.id AS GROUPid into public.temp_mbp1 FROM products p, \ model_enable me, products_clients pc, groups g WHERE p.model_no = me.name \ and p.id = pc.product_id and pc.client_id = %s and pc.client_id = g.client_id and g.id = %s"\ % (dealer_id,group_id) except (Exception, psycopg2.DatabaseError) as error: print(error) cursor.execute("select count(*) from public.temp_mbp1") # retrieve the records from the database records = cursor.fetchall() # print out the records using pretty print # note that the NAMES of the columns are not shown, instead just indexes. # for most people this isn't very useful so we'll show you how to return # columns as a dictionary (hash) in the next example. pprint.pprint(records) if __name__ == "__main__": main() The try except block in between the program is not throwing any error but the table is not getting created in the postgres database as i see in the data admin. The output shown is: Please enter dealer_id: 90 Please enter group_id: 13 Sheet Name: Winco Full 8_15_17 (Column #) type:value [(3263,)] Thanks, Santosh
You didn't commit the changes, so they aren't saved in the database. Add to the bottom, just below the pprint statement: conn.commit()
Postgresql: how to copy multiple columns from one table to another?
I am trying to copy some columns from a table called temporarytable to another one called scalingData using psycopg2 in python. scalingData is a pandas dataframe. The dataframe contains data from cities such as: nameOfCities, population, etc. scalingData = pd.read_csv('myFile.csv') ## 'myFile.csv' is the datasource each column of the dataframe has a different kind of data, such as 'int64', 'float64' or 'O'. Here a screen shot from Jupyter import psycopg2 as ps ## Populate table scalingData tmp = scalingData.dtypes con = None con = ps.connect(dbname = 'mydb', user='postgres', host='localhost', password='mypd') con.autocommit = True cur = con.cursor() for i in range(0,5): j = header[i] stat = """ ALTER TABLE "scalingData" ADD COLUMN "%s" """%j if tmp[i] == 'int64': stat = stat+'bigint' if tmp[i] == 'float64': stat = stat+'double precision' if tmp[i] == 'O': stat = stat+'text' ### Add Column cur.execute(stat) stat1 = """INSERT INTO "scalingData" ("%s") SELECT "%s" FROM temporarytable"""%(j,j) ### Copy Column cur.execute(stat1) cur.close() con.close() My problem is that if I look at scalingData only the first column is copied while the others are empty. Here a screenshot of the table from pgAdmin afer the query: Also if I copy for instance the second column as first column it works, but then it fails with the others as well.
This happens because you add 1 field to your new table, than insert data only with that field set up, and you do it 5 times. So you should actually see 5 copies of your original table with only 1 field set up. You need to first set up the structure for your scalingData table, then insert all the records with all fields. Here is the code (not a Python developer): import psycopg2 as ps ## Populate table scalingData tmp = scalingData.dtypes con = None con = ps.connect(dbname = 'mydb', user='postgres', host='localhost', password='mypd') con.autocommit = True cur = con.cursor() for i in range(0,5): j = header[i] stat = """ ALTER TABLE "scalingData" ADD COLUMN "%s" """%j if tmp[i] == 'int64': stat = stat+'bigint' if tmp[i] == 'float64': stat = stat+'double precision' if tmp[i] == 'O': stat = stat+'text' ### Add Column cur.execute(stat) fieldsStr = '"' + '", "'.join([header]) + '"' ### will return "header1", "header2", ... , "header5" stat1 = """INSERT INTO "scalingData" (%s) SELECT %s FROM temporarytable"""%(fieldsStr,fieldsStr) ### Copy Table cur.execute(stat1) cur.close() con.close()
I'm not familiar with Python, but just a guess as to where the issue might be coming from: """INSERT INTO "scalingData" ("%s") SELECT "%s" FROM temporarytable""" ... will transform the "%s" bit into "foo, bar, baz" rather than "foo", "bar", "baz". Put another way you should remove the unneeded double quotes in your statement and escape the individual column names instead. Double quotes are used in PG to quote identifiers. You can literally have an table or column called "foo, bar, baz" and PG will work just fine when you do - provided it's always in-between double quotes when you use it in a statement.
INSERT IGNORE don't work on my script
I have this python script who inserts xcell data into a mysql db, but I need to only inserts rows that aren't duplicates, so I used the INSERT IGNORE sql method also tryed with ON DUPLICATE KEY UPDATE, but it doesn't work, it just insert all the data that is in the table. This is the script: import xlrd import MySQLdb #Seting the database connection database = MySQLdb.connect (host="localhost", user = "root", passwd = "****", db = "python_insert") cursor = database.cursor() query = """INSERT IGNORE INTO test (masina,data_ora,conbustibil) VALUES (%s,%s,%s)""" #Open and parse the xcell file book = xlrd.open_workbook("asset/testing.xlsx") sheet = book.sheet_by_name("Report") for r in range(1, sheet.nrows): masina = sheet.cell(r, 1).value data_ora = sheet.cell(r, 0).value conbustibil = sheet.cell(r, 8).value values = (masina, data_ora, conbustibil) cursor.execute(query, values) #Closing cursor+database cursor.close() database.commit() database.close() print "Succes" columns = str(sheet.ncols) rows = str(sheet.nrows) print ("Am importat " + columns + " coloane si " + rows + " randuri in MySQL")
Using Python to pull data from MySQL database that fall between two datetimes
I am trying to figure out how to pull data from a table that has a column that is called 'sent_time' and the datetime falls between two datetimes. The format, as you can see in the code, is 'YYYY-dd-MM HH:mm:ss' . I keep getting different errors depending on how I change the code. Could you please tell me where I'm going wrong? I want to give you all the information I have with all the other files and what I'm shooting for to see if you can replicate and actually get it working just to see if it's not just my system or some configuration I have.. I have a database with one table.Lets call it ‘table1’ The table is broken down with columns like this: sent_time | delivered_time |id1_active |id2_active |id3_active |id1_inactive |id2_inactive |id3_inactive |location_active |location_inactive …..`lots more Lets say that these are two or more customers delivering goods to and from each other. Each customer has three id#s. I created a ‘config.ini’ file to make my life a bit easier [mysql] host = localhost database = db_name user = root password = blahblah I created a ‘python_mysql_dbconfig.py’ from configparser import ConfigParser def read_db_config(filename=’config.ini’, section=’mysql’): “”” Read database configuration file and return a dictionary object :param filename: name of the configuration file :param section: section of database configuration :return: a dictionary of database parameters “”” # create parser and read ini configuration file parser = ConfigParser() parser.read(filename) # get section, default to mysql db = {} if parser.has_section(section): items = parser.items(section) for item in items: db[item[0]] = item[1] else: raise Exception(‘{0} not found in the {1} file’.format(section, filename)) return db And now this is the code that I've been working on that is supposed to pull the rows that fall between two datetimes. But I keep getting this error: **Traceback (most recent call last): File "C:\Python34\timerange.py", line 33, in <module> dt_start = datetime.strptime(userIn,timeShape) TypeError: must be str, not tuple** Can you take a look at this and tell me what I'm doing wrong? timerange.py # Establish a MySQL connection from mysql.connector import MySQLConnection, Error from python_mysql_dbconfig import read_db_config db_config = read_db_config() conn = MySQLConnection(**db_config) import xlsxwriter from xlsxwriter.workbook import Workbook import datetime from datetime import datetime cursor = conn.cursor() #creates the workbook workbook = xlsxwriter.Workbook('imhere.xlsx') worksheet = workbook.add_worksheet() #formatting definitions bold = workbook.add_format({'bold': True}) date_format = workbook.add_format({'num_format': 'yyyy-mm-dd hh:mm:ss'}) timeShape = '%Y-%m-%d %H:%M:%S' #actual query userIn = [input('start date:'),0] userEnd = [input('end date:'),1] query = ( "SELECT sent_time, delivered_time, customer_name, id1_active, id2_active, id3_active, id1_inactive, id2_inactive, id3_inactive, location_active, location_inactive FROM table1 " "WHERE sent_time BETWEEN %s AND %s" ) dt_start = datetime.strptime(userIn,timeShape) dt_end = datetime.strptime(userEnd, timeShape) # Execute sql Query cursor.execute(query, (dt_start, dt_end)) result = cursor.fetchall() #sets up the header row worksheet.write('A1','sent_time',bold) worksheet.write('B1', 'delivered_time',bold) worksheet.write('C1', 'customer_name',bold) worksheet.write('D1', 'id1_active',bold) worksheet.write('E1', 'id2_active',bold) worksheet.write('F1', 'id3_active',bold) worksheet.write('G1', 'id1_inactive',bold) worksheet.write('H1', 'id2_inactive',bold) worksheet.write('I1', 'id3_inactive',bold) worksheet.write('J1', 'location_active',bold) worksheet.write('K1', 'location_inactive',bold) worksheet.autofilter('A1:K1') print("sent_time", "delivered_time", "customer_name", "id1_active", "id2_active", "id3_active", "id1_inactive", "id2_inactive", "id3_inactive", "location_active", "location_inactive") for row in result: print(row[0], row[1], row[2], row[3], row[4], row[5], row[6], row[7], row[8], row[9],row[10]) # Create a For loop to iterate through each row in the XLS file, starting at row 2 to skip the headers for r, row in enumerate(result, start=1): #where you want to start printing results inside workbook for c, col in enumerate(row): worksheet.write_datetime(r,0,row[0], date_format) worksheet.write_datetime(r,1, row[1], date_format) worksheet.write(r,2, row[2]) worksheet.write(r,3, row[3]) worksheet.write(r,4, row[4]) worksheet.write(r,5, row[5]) worksheet.write(r,6, row[6]) worksheet.write(r,7, row[7]) worksheet.write(r,8, row[8]) worksheet.write(r,9, row[9]) worksheet.write(r,10, row[10]) #close out everything and save cursor.close() workbook.close() conn.close() #print number of rows and bye-bye message print ("- - - - - - - - - - - - -") rows = len(result) print ("I just imported "+ str(rows) + " rows from MySQL!") print ("") print ("Good to Go!!!") print ("")
I used dateutil.parser.parse in my inputs and was able to get some results...I have a new error but I will be posting it on a now question. Thank you for your help.