I have several ms access databases that each have a table named PlotStatus-name-3/13/12.
I need to import each of these tables into a .csv table. If I manually change the name of the tables to PlotStatus_name_3_13_12, this code works. Does anyone know how to change the table namees using python?
#connect to access database
for filename in os.listdir(prog_rep_local):
if filename[-6:] == ".accdb":
DBtable = os.path.join(prog_rep_local, filename)
conn = pyodbc.connect(r'DRIVER={Microsoft Access Driver (*.mdb, *.accdb)};DBQ=' + DBtable)
cursor = conn.cursor()
ct = cursor.tables
for row in ct():
rtn = row.table_name
if rtn[:10] == "PlotStatus":
#this does not work:
#Oldpath = os.path.join(prog_rep_local, filename, rtn)
#print Oldpath
#fpr = Oldpath.replace('-', '_')#.replace("/","_")
#print fpr
#newname = os.rename(Oldpath, fpr) this does not work
#print newname
#spqaccdb = "SELECT * FROM " + newname
#this workds if I manually change the table names in advance
sqlaccdb = "SELECT * FROM " + rtn
print sqlaccdb
cursor.execute(sqlaccdb)
rows = cursor.fetchall()
An easier solution would be to just add brackets around the table name so that the /s don't throw off the SQL command interpreter.
sqlaccdb = "SELECT * FROM [" + rtn + "]"
Related
I am trying to use Regex cleaning steps in Python to test to see if a pattern matches and if so, clean it to the specified carrier.
For instance, if re.match("\bA\.?X\.?A\.?\b", Carrier): Carrier = CarrierMatch
I've tried this by running a for loop on the number of raw carrier fields followed by another for loop on all of the match descriptions (just printing for now) and it takes FOREVER to run. Hoping someone out there has a better method.
Ideally I would like to see if it's possible to compile all match descriptions for Carrier I have in SQL (~2,000) and pull out the regex match pattern(s) to then use to append the carrier field.
For reference the SQL data fields are [raw_pattern], [Carrier]
import sys
import re
import pyodbc
import sys
import os
import pandas as pd
from datetime import datetime
import time
regexlist = list()
carrierlist = list()
rpt_id = 1234
#rpt_id = sys.argv[1]
plan_typs = list()
try:
conn = pyodbc.connect('Driver={SQL Server};'
'Server=xxxxxxxxx;'
'Database=xxxxxxxxx;'
'Trusted_Connection=xxxxx;')
except:
print('Connection Failed')
sys.exit()
cursor = conn.cursor()
sql = "delete from [dbo].[python_test1] where rpt_id = '""" + str(rpt_id) + """'"""
cursor.execute(sql)
conn.commit()
cursor = conn.cursor()
sql = "insert into [dbo].[python_test1](rpt_id, raw_carr_nm) select distinct rpt_id, raw_carr_nm from [dbo].[wrk_data] where rpt_id = '""" + str(rpt_id) + """'"""
cursor.execute(sql)
conn.commit()
sql = "SELECT [raw_pattern], [Carrier] FROM [dbo].[ref_regex_t]"
regex1 = pd.read_sql(sql, conn)
sql = "select * from [dbo].[python_test1] where rpt_id = '""" + str(rpt_id) + """'"""
carriers = pd.read_sql(sql, conn)
for index, row in regex1.iterrows():
regexlist.append(row['raw_pattern'])
for index, row in carriers.iterrows():
carrierlist.append(row['Carrier'])
for i in carrierlist:
print('"' + i + '"')
for i in regexlist:
print('"' + i + '"')
My second data frame is not loading values when i create it. Any help with why it is not working? When i make my cursor a list, it has a bunch of values in it, but for whatever reason when i try to do a normal data frame load with pandas a second time, it does not work.
My code:
conn = pyodbc.connect(constr, autocommit=True)
cursor = conn.cursor()
secondCheckList = []
checkCount = 0
maxValue = 0
strsql = "SELECT * FROM CRMCSVFILE"
cursor = cursor.execute(strsql)
cols = []
SQLupdateNewIdField = "UPDATE CRMCSVFILE SET NEW_ID = ? WHERE Email_Address_Txt = ? OR TELEPHONE_NUM = ? OR DRIVER_LICENSE_NUM = ?"
for row in cursor.description:
cols.append(row[0])
df = pd.DataFrame.from_records(cursor)
df.columns = cols
newIdInt = 1
for row in range(len(df['Email_Address_Txt'])):
#run initial search to figure out the max number of records. Look for email, phone, and drivers license, names have a chance not to be unique
SQLrecordCheck = "SELECT * FROM CRMCSVFILE WHERE Email_Address_Txt = '" + str(df['Email_Address_Txt'][row]) + "' OR TELEPHONE_NUM = '" + str(df['Telephone_Num'][row]) + "' OR DRIVER_LICENSE_NUM = '" + str(df['Driver_License_Num'][row]) + "'"
## print(SQLrecordCheck)
cursor = cursor.execute(SQLrecordCheck)
## maxValue is indeed a list filled with records
maxValue =(list(cursor))
## THIS IS WHERE PROBLEM OCCURS
tempdf = pd.DataFrame.from_records(cursor)
Why not just use pd.read_sql_query("your_query", conn) this will return the result of the query as a dataframe and requires less code. Also you set cursor to cursor.execute(strsql) at the top and then you are trying to call execute on cursor again in your for loop but you can no longer call execute on cursor you will have to set cursor = conn.cursor() again.
I have this python script who inserts xcell data into a mysql db, but I need to only inserts rows that aren't duplicates, so I used the INSERT IGNORE sql method also tryed with ON DUPLICATE KEY UPDATE, but it doesn't work, it just insert all the data that is in the table. This is the script:
import xlrd
import MySQLdb
#Seting the database connection
database = MySQLdb.connect (host="localhost", user = "root", passwd = "****", db = "python_insert")
cursor = database.cursor()
query = """INSERT IGNORE INTO test (masina,data_ora,conbustibil) VALUES (%s,%s,%s)"""
#Open and parse the xcell file
book = xlrd.open_workbook("asset/testing.xlsx")
sheet = book.sheet_by_name("Report")
for r in range(1, sheet.nrows):
masina = sheet.cell(r, 1).value
data_ora = sheet.cell(r, 0).value
conbustibil = sheet.cell(r, 8).value
values = (masina, data_ora, conbustibil)
cursor.execute(query, values)
#Closing cursor+database
cursor.close()
database.commit()
database.close()
print "Succes"
columns = str(sheet.ncols)
rows = str(sheet.nrows)
print ("Am importat " + columns + " coloane si " + rows + " randuri in MySQL")
dblist = ('database1', 'database2', 'database3', 'database4', 'database5', 'database6', 'database7')
for db in dblist:
cursor = conn.cursor()
cursor.execute("select SYSTEM from automation/awdclient where db = " + "'" + db + "'" + "")
for row in cursor:
activesystem.db = row[0]
cursor.close
conn.close
print activesystem.database1
print activesystem.database2
What I am doing is retrieving a system name from a db. I want to assign a variable equal to that system name, with the variable name system.whatever the db string was.
Use:
activesystem.setattr(db, row[0])
I am trying to search in access database for some occurrence, but I found that my code miss somes when it made a search.
I found that he miss the second occurence when it found the first one.
Example: if I have the following and I am looking for T300 and I have this structure:
T200
T300
T300
it will catch first T300 and pass the second T300
enter code here
import csv
import pyodbc
from xml.dom import minidom
# *************************************
def DBAccess (Term):
MDB = 'c:/test/mydb.mdb'
DRV = '{Microsoft Access Driver (*.mdb)}'
PWD = ''
conn = pyodbc.connect('DRIVER=%s;DBQ=%s;PWD=%s' % (DRV,MDB,PWD))
curs = conn.cursor()
curs.execute("select * from gdo_segment")
rows = curs.fetchall()
for row in rows:
T = 'T' + str(row.troncon) + '_' + row.noeud1 + '-' + row.noeud2
if (T == Term ):
print T
curs.close()
conn.close()
#*************************************
def findTerminal():
xmldoc = minidom.parse('c:\\test\mydoc.xml')
#printing the number of blocs in my xml file
itemlist = xmldoc.getElementsByTagName('ACLineSegment')
for item in itemlist:
found = False
for child in item.childNodes:
if child.nodeName == 'Terminal':
found = True
if not found:
Term = item.getAttribute('Name')
DBAccess (Term)
#***********************************
findTerminal()
I assume it is finding the last item, and this would be because of your code indenting. Correct indenting is essential in Python. the docs
Currently, your if statement only applies after all the looping has completed, so will only check the last value of T.
def DBAccess (Term):
MDB = 'c:/test/gdomt.mdb'
DRV = '{Microsoft Access Driver (*.mdb)}'
PWD = ''
conn = pyodbc.connect('DRIVER=%s;DBQ=%s;PWD=%s' % (DRV,MDB,PWD))
curs = conn.cursor()
curs.execute("select * from gdo_segment")
rows = curs.fetchall()
for row in rows:
T = 'T' + str(row.troncon) + '_' + row.noeud1 + '-' + row.noeud2
if (T == Term ):
print T
curs.close()
conn.close()