Python MySQLdb upload UnicodeEncodeError - python

I have a problem where I can upload CSV files to MySQL, but then something happens and I get an encoding error. Can some one please review my code and tell what is wrong? I'm new to enconding.
The following snippet is how I write the CSV files that will be uploaded, the data is extracted from an MDB file using the MDN tools (mdb-export):
tableIndex = 1
for tName in tableNames:
fileName = os.path.join(csvPath, os.path.basename(mdb).split('.')[0] + '_' + tName + '.csv')
try:
p = subprocess.Popen(["mdb-export", "-H", mdb, tName], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
tableContent, error = p.communicate()
if(p.returncode != 0):
_logger.error('[%3d] Export Subprocess %d %s' % (tID, p.returncode, tableContent))
SendMdbError(tID, mdb, _logger, 'ALERT: Export Subprocess')
return(['', False])
if(error):
_logger.error('[%3d] Export Communicate %d %s' % (tID, p.returncode, error.strip()))
SendMdbError(tID, mdb, _logger, 'ALERT: Export Communicate')
return(['', False])
except Exception as ex:
_logger.exception('[%3d] Export Error' % tID)
SendMdbError(tID, mdb, _logger, 'ALERT: Export Exception')
return(['', False])
except:
_logger.exception('[%3d] Export Unexpected' % tID)
SendMdbError(tID, mdb, _logger, 'ALERT: Export Unexpected')
return(['', False])
# If no data, no need for corresponding SQL
if(len(tableContent) == 0):
emptyTables.append(tName)
# If data exists, dump data
else:
# Add the 'DriveTest' to the data to upload
tableContent = tableContent.split('\n')
tableContent = [dt + ',' + line for line in tableContent if(line)]
tableContent = '\n'.join(tableContent)
try:
with open(fileName, 'wb') as f:
f.write(tableContent)
if(_VERBOSITY):
_logger.debug('[%3d] %3d - Write CSV SIZE[%8d] FILE: %s' %(tID, tableIndex, len(tableContent.split('\n')), fileName))
tableIndex += 1
except IOError as err:
_logger.exception('[%3d] Write IOError: %s' % (tID, str(err)))
SendMdbError(tID, mdb, _logger, 'ALERT: Write IOError')
return(['', False])
except Exception as ex:
_logger.exception('[%3d] Write Exception' % tID)
SendMdbError(tID, mdb, _logger, 'ALERT: Write Exception')
return(['', False])
except:
_logger.exception('[%3d] Write Unexpected: %s' % tID)
SendMdbError(tID, mdb, _logger, 'ALERT: Write Unexpected')
return(['', False])
The following is where I upload the CSV file, and here is where I get the error:
# Upload the data
tableIndex = 0
for table in tableDDL:
try:
with warnings.catch_warnings(record=True) as war:
_logger.info('[%3d] %3d Going up... %s' %(tID, tableIndex+1, os.path.basename(mdb).split('.')[0] + '_' + table))
_sqlLock[tableIndex].acquire()
#self.cursor.execute(tableDDL[table])
self.cursor.execute(tableULD[table])
self.conn.commit()
_sqlLock[tableIndex].release()
if(war):
#if(_VERBOSITY): print('[%3d] %3d WARNINGS[%3d] %s' % (tID, tableIndex+1, len(war), os.path.basename(mdb).split('.')[0] + '_' + table))
_logger.warning('[%3d] %3d WARNINGS[%3d] %s' % (tID, tableIndex+1, len(war), os.path.basename(mdb).split('.')[0] + '_' + table))
for w in war:
_logger.warning('[%3d] %s' % (tID, w.message))
#if(_VERBOSITY): print('[%3d] %3d Uploaded %s' % (tID, tableIndex+1, os.path.basename(mdb).split('.')[0] + '_' + table))
_logger.info('[%3d] %3d Uploaded %s' % (tID, tableIndex+1, os.path.basename(mdb).split('.')[0] + '_' + table))
tableIndex += 1
# Remove the uploaded CSV file
try:
os.remove(csvFiles[table]+'.csv')
_logger.info('[%3d] Removed CVS file: %s' % (tID, csvFiles[table]+'.csv'))
except OSError:
pass
except (MySQLdb.InternalError, MySQLdb.NotSupportedError) as err:
_logger.error('[%3d] %3d Internal: %s %s' % (tID, tableIndex+1, err, sys.exc_info()[0]))
self.conn.rollback()
self.Disconnect(tID, _logger, _VERBOSITY, _DEBUG)
return(False)
except MySQLdb.OperationalError as err:
_logger.error('[%3d] %3d OperationalError: %s' % (tID, tableIndex+1, sys.exc_info()[0]))
_logger.error(err)
self.conn.rollback()
self.Disconnect(tID, _logger, _VERBOSITY, _DEBUG)
return(False)
except MySQLdb.ProgrammingError as err:
_logger.error('[%3d] %3d ProgrammingError: %s' % (tID, tableIndex+1, sys.exc_info()[0]))
_logger.error(err)
self.conn.rollback()
self.Disconnect(tID, _logger, _VERBOSITY, _DEBUG)
return(False)
except MySQLdb.Error as err:
_logger.error('[%3d] %3d QUERY: %s %s' % (tID, tableIndex+1, err, sys.exc_info()[0]))
self.conn.rollback()
self.Disconnect(tID, _logger, _VERBOSITY, _DEBUG)
return(False)
except Exception as err:
_logger.error('[%3d] %3d Exception: %s %s' % (tID, tableIndex+1, err, sys.exc_info()[0]))
#self.conn.rollback()
#self.Disconnect(tID, _logger, _VERBOSITY, _DEBUG)
#return(False)
pass
except:
_logger.error('[%3d] %3d Other: %s' % (tID, tableIndex+1, sys.exc_info()[0]))
self.conn.rollback()
self.Disconnect(tID, _logger, _VERBOSITY, _DEBUG)
return(False)
The error I get is the following:
2015-06-13 19:42:21,743 __main__ - ERROR - [ 1] 1 Exception: 'ascii' codec can't encode character u'\xb4' in position 40: ordinal not in range(128) <type 'exceptions.UnicodeEncodeError'>
2015-06-13 19:42:30,962 __main__ - ERROR - [ 1] 1 Exception: 'ascii' codec can't encode character u'\xb4' in position 27: ordinal not in range(128) <type 'exceptions.UnicodeEncodeError'>
I noticed that the given data gets uploaded, but not sure if all rows are uploaded.
Thanks!

Try before putting csv into DB s.decode('UTF-8') and after getting it out of the DB s.encode('UTF-8')
I did it for SQLite and it worked OK.

Getting this to work should not be too difficult, but you have to understand what you're doing. Don't just try all possible combinations of s.encode("UTF-8").decode("UTF-8") and stuff like that.
First, understand the difference between a string and bytes. See https://docs.python.org/3/howto/unicode.html. You can encode a string to bytes: bytes = text.encode("UTF-8"), and you can decode bytes to a string: text = bytes.decode("UTF-8")
Second since a CSV file is a text file, you should open the CSV file in text mode. open(fileName, 'w', encoding="utf-8"). There's no need to encode or decode text in your code when writing the file.
Third, it is perfectly OK to write Unicode text to a TEXT field. No need for BINARYs or BLOBs. But make sure your database has a collation setting that can deal with it, usually that would be one of the utf-8 collations. Then to put Unicode in your database, use python strings and don't decode them to bytes.

The error message implies that the column definition in MySQL is CHARACTER SET ascii; is that correct?
B4 sounds like the latin1 (not utf8) encoding for ´, which could be coming from a Microsoft Word document in a context such as it´s.
So, even changing the column to be CHARACTER SET utf8 won't fix the problem.
BINARY and BLOB are essentially the same type of field -- any byte is allowed. VARCHAR and TEXT validate the bytes during INSERT to make sure they match the CHARACTER SET.

Related

Cannot post a zip file in Python. Unicode decoding error

When trying to submit a zip file using urllib2 I am getting a UnicodeDecodeError with the following messages:
Exception during urlopen: 'ascii' codec can't decode byte 0xf1 in position 12: ordinal not in range(128)
Exception: 'ascii' codec can't decode byte 0xf1 in position 12: ordinal not in range(128)
Exception of type: <type 'exceptions.UnicodeDecodeError'>
Exception. Message: "". Doc: "Unicode decoding error.".
Exception during export:
e.__doc__=Unicode decoding error.
The exception is raised on the line response = urllib2.urlopen(request).
def depositZipFile(tempZipFileName, tempZipFilePath, depositUrl, tr):
print('depositZipFile(). tempZipFileName=%s, tempZipFilePath=%s, depositUrl=%s, tr=%s' % (tempZipFileName, tempZipFilePath, depositUrl, str(tr)))
with open(tempZipFilePath, 'rb') as f:
zipData = f.read()
print('depositZipFile(). type(zipData)=%s' % type(zipData))
headers = {
'In-Progress': 'true',
'Content-Disposition': 'filename=' + tempZipFileName,
'Content-Type': 'application/zip',
'Content-Length': os.stat(tempZipFilePath).st_size,
'Content-Transfer-Encoding': 'binary',
'Packaging': 'http://purl.org/net/sword/package/METSDSpaceSIP',
}
try:
request = urllib2.Request(depositUrl, data=zipData, headers=headers)
try:
response = urllib2.urlopen(request)
except Exception as e:
print('Exception during urlopen: ' + str(e))
raise e
print('Got response. response=%s' % str(response))
xmlText = response.read()
xmlRoot = ET.fromstring(xmlText)
linkElement = xmlRoot.find('xmlns:link[#rel="alternate"]', namespaces=dict(xmlns='http://www.w3.org/2005/Atom'))
if linkElement is None:
raise ValueError('No redirection URL is found in the response.')
href = linkElement.attrib['href']
return href
except urllib2.HTTPError as e:
print('HTTPError: ' + str(e))
print('HTTPError: %s' % str(e.code))
print('HTTPError message: %s' % e.read())
raise e
except Exception as e:
print('Exception: ' + str(e))
print('Exception of type: %s' % type(e))
print('Exception. Message: "%s". Doc: "%s".' % (e.message, e.__doc__))
raise e
Before the aforementioned method is called the user is authenticated using basic authentication. See the following method.
def authenticateUser(tr, url):
user = getConfigurationProperty(tr, 'user')
password = getConfigurationProperty(tr, 'password')
realm = getConfigurationProperty(tr, 'realm')
pm = urllib2.HTTPPasswordMgr()
pm.add_password(realm, url, user, password)
authHandler = urllib2.HTTPBasicAuthHandler(pm)
opener = urllib2.build_opener(authHandler)
urllib2.install_opener(opener)
I am very new to Python and maybe I am missing something obvious. Please advise.
I am using Python 2.7, Jython implementation.
Aparently the problem was that the type of depositUrl was unicode instead of str. Therefore, the urllib2.Request() method was expecting unicode types for all parameters. When I made the following conversion everything srtarted working:
depositUrl = str(depositUrl)

AWS lambda python Socket exception: Operation not permitted (1)

I have a python 3.6 AWS lambda function "ftp_sender" which is triggered by ObjectCreated event from S3. Then it downloads the file and sends it to the SFTP server.
And in CloudWatch logs I'm constantly seeing the messages like that:
[ERROR] 2018-07-26T12:30:21.543Z a56f9678-90c9-11e8-bf10-ddb8557d0ff0 Socket exception: Operation not permitted (1)
This messages appear in random positions even before the function started working, for example:
def ftp_sender(event, context):
# The error can be triggered here
print(event)
This seems not treated as a real error, but maybe someone knows what it is about?
[EDIT]
It looks like the issue caused by paramiko library. I have the following class inside the function:
class SftpSender:
def __init__(self, hostname, username, the_password, sftp_port=22, working_directory=''):
self.hostname = hostname
self.username = username
print('SFTP port: %s' % sftp_port)
self.transport = paramiko.Transport((hostname, sftp_port))
self.transport.connect(username=username, password=the_password)
self.sftp = paramiko.SFTPClient.from_transport(self.transport)
# self.transport.set_missing_host_key_policy(paramiko.WarningPolicy())
self.folder = working_directory
def send_file(self, filename, retries=4, timeout_between_retries=10, verb=True, the_lambda_mode=False):
retry_number = 0
sending_result = 'Not run'
if the_lambda_mode:
os.chdir("/tmp/")
if not os.path.isfile(filename):
return "Error: File %s is not found" % filename
while retry_number < retries:
if verb:
print('Sending %s to %s#%s (to folder "%s") retry %s of %s\n' % (filename, self.username, self.hostname, self.folder, retry_number, retries))
try:
destination_name = self.folder + '/' + filename
sending_result = self.sftp.put(filename, destination_name)
return 'OK'
except Exception as e:
print('Error: The following exception occured: "%s", result: "%s"' % (e, sending_result))
sleep(timeout_between_retries)
retry_number += 1
return 'Error: failed to send file %s (%s)' % (filename, e)

Executing stored procedure in python

While calling the stored procedure of mysql using python, I am getting a syntax error.
The code for the stored procedure is as follows,
while True:
try:
date = time.strftime("%d/%m/%Y")
temp,humidity,light = main.reading()
args= (192.168.1.145, b8:27:eb:06:e4:4b, Temp_PI, temp)
cursor.callproc('SPR_IU_Sensor_Data',args)
conn.commit()
time.sleep(interval2)
except:
MySQLdb.Error, e:
conn.rollback()
print "Transaction aborted: %d: %s" % (e.args[0], e.args[1])
The error is as follows;
File "procedure.py", line 53
args= (192.168.1.145, b8:27:eb:06:e4:4b, Temp_PI, temp)
^
SyntaxError: invalid syntax
You need to quote the ip addresses, pass them in as strings:
args = ('192.168.1.145', 'b8:27:eb:06:e4:4b', Temp_PI, temp)
Python has no notion of an IP address literal notation.

How to debug crashing openoffice with pyuno

I'd like to use openoffice to programmatically convert docx to pdf. I know unoconv can do this, and indeed unoconv will do this for me, even if I run a separate listener (using unoconv -l) and invoke unoconv -n (so that it will die if it can't connect to the listener). Accordingly, I assume that my openoffice/pyuno environment is sane.
However, when I run an unoconv listener (or manually invoke openoffice as an acceptor), and try to connect with my own python code (derived from unoconv, and cross-checked with another openoffice library), the listener dies, and the uno bridge dies.
The error I get from the listener is:
terminate called after throwing an instance of 'com::sun::star::uno::RuntimeException'
The error I get on the python end is:
unoconv: RuntimeException during import phase:
Office probably died. Binary URP bridge disposed during call
I really have no idea how to go about diagnosing the problem here. Any suggestions as to the underlying cause or how to diagnose it would be greatly appreciated.
Code below:
#dependency on openoffice-python
import openoffice.streams as oostreams
import openoffice.officehelper as oohelper
import uno, unohelper
from com.sun.star.beans import PropertyValue
from com.sun.star.connection import NoConnectException
from com.sun.star.document.UpdateDocMode import QUIET_UPDATE
from com.sun.star.lang import DisposedException, IllegalArgumentException
from com.sun.star.io import IOException, XOutputStream
from com.sun.star.script import CannotConvertException
from com.sun.star.uno import Exception as UnoException
from com.sun.star.uno import RuntimeException
import logging
logger = logging.getLogger(__name__)
#connectionstring = 'uno:socket,host=127.0.0.1,port=2002;urp;StarOffice.ComponentContext'
connectionstring = 'socket,host=127.0.0.1,port=2002'
## context = uno.getComponentContext()
## svcmgr = context.ServiceManager
## resolver = svcmgr.createInstanceWithContext("com.sun.star.bridge.UnoUrlResolver", context)
## unocontext = resolver.resolve("uno:%s" % connectionstring)
unocontext = oohelper.connect(connectionstring)
#unosvcmgr = unocontext.ServiceManager
desktop = unocontext.ServiceManager.createInstanceWithContext("com.sun.star.frame.Desktop", unocontext)
class OutputStream( unohelper.Base, XOutputStream ):
def __init__(self, stream=None):
self.closed = 0
self.stream = stream if stream is not None else sys.stdout
def closeOutput(self):
self.closed = 1
def writeBytes( self, seq ):
self.stream.write( seq.value )
def flush( self ):
pass
def UnoProps(**args):
props = []
for key in args:
prop = PropertyValue()
prop.Name = key
prop.Value = args[key]
props.append(prop)
return tuple(props)
FILTERS = {'pdf': 'writer_pdf_Export'}
def convert_stream(instream, outstream,
outdoctype=None, outformat=None):
'''instream and outstream are streams.
outdoctype and outformat are strings. They correspond
to the first two parameters to the Fmt constructor.To
convert to pdf use outdoctype="document",
outformat="pdf".
If you choose inappropriate values, an ValueError
will result.'''
#fmts is a global object of type FmtList
outputfilter = FILTERS[outformat]
inputprops = UnoProps(Hidden=True, ReadOnly=True, UpdateDocMode=QUIET_UPDATE, InputStream=oostreams.InputStream(instream))
inputurl = 'private:stream'
convert_worker(inputurl,inputprops,outputfilter,outstream=outstream)
return outstream
def convert_worker(inputurl, inputprops, outputfilter, outstream=None,inputfn=None):
global exitcode
document = None
try:
### Import phase
phase = "import"
document = desktop.loadComponentFromURL( inputurl , "_blank", 0, inputprops )
if not document:
raise UnoException("The document '%s' could not be opened." % inputurl, None)
### Import style template
phase = "import-style"
### Update document links
phase = "update-links"
try:
document.updateLinks()
except AttributeError:
# the document doesn't implement the XLinkUpdate interface
pass
### Update document indexes
phase = "update-indexes"
for ii in range(2):
# At first update Table-of-Contents.
# ToC grows, so page numbers grows too.
# On second turn update page numbers in ToC.
try:
document.refresh()
indexes = document.getDocumentIndexes()
except AttributeError:
# the document doesn't implement the XRefreshable and/or
# XDocumentIndexesSupplier interfaces
break
else:
for i in range(0, indexes.getCount()):
indexes.getByIndex(i).update()
### Export phase
phase = "export"
outputprops = UnoProps(FilterName=outputfilter, OutputStream=OutputStream(stream=outstream), Overwrite=True)
outputurl = "private:stream"
try:
document.storeToURL(outputurl, tuple(outputprops) )
except IOException as e:
raise UnoException("Unable to store document to %s (ErrCode %d)\n\nProperties: %s" % (outputurl, e.ErrCode, outputprops), None)
phase = "dispose"
document.dispose()
document.close(True)
except SystemError as e:
logger.error("unoconv: SystemError during %s phase:\n%s" % (phase, e))
exitcode = 1
except RuntimeException as e:
logger.error("unoconv: RuntimeException during %s phase:\nOffice probably died. %s" % (phase, e))
exitcode = 6
except DisposedException as e:
logger.error("unoconv: DisposedException during %s phase:\nOffice probably died. %s" % (phase, e))
exitcode = 7
except IllegalArgumentException as e:
logger.error("UNO IllegalArgument during %s phase:\nSource file cannot be read. %s" % (phase, e))
exitcode = 8
except IOException as e:
# for attr in dir(e): print '%s: %s', (attr, getattr(e, attr))
logger.error("unoconv: IOException during %s phase:\n%s" % (phase, e.Message))
exitcode = 3
except CannotConvertException as e:
# for attr in dir(e): print '%s: %s', (attr, getattr(e, attr))
logger.error("unoconv: CannotConvertException during %s phase:\n%s" % (phase, e.Message))
exitcode = 4
except UnoException as e:
if hasattr(e, 'ErrCode'):
logger.error("unoconv: UnoException during %s phase in %s (ErrCode %d)" % (phase, repr(e.__class__), e.ErrCode))
exitcode = e.ErrCode
pass
if hasattr(e, 'Message'):
logger.error("unoconv: UnoException during %s phase:\n%s" % (phase, e.Message))
exitcode = 5
else:
logger.error("unoconv: UnoException during %s phase in %s" % (phase, repr(e.__class__)))
exitcode = 2
pass
I don't know if this could be your case, but I discovered that LogMeIn (running on my box) is also using the port 2002. When I try unoconv on that machine, I get the same error: Binary URP bridge disposed during call.
I killed LogMeIn and everything worked after that.
Hope this helps!

sys.exc_info()[1] type and format in Python 2.71

In python 2.71 on Windows XP I need to use FTP.
My code is :
try:
ftp = FTP(trec.address)
ftp.login(trec.login, trec.passw)
s = ftp.retrlines('LIST ' + trec.filetype)
ftp.quit()
except:
(type, value, tb) = sys.exc_info()
reponse = "%s" % value
But I have an error on the last line :
UnicodeDecodeError: 'ascii' codec can't decode byte 0xea in position 38: ordinal not in range(128)
As I am in French Windows env. the sys.exc_info()[1] is : [Errno 10061] Aucune connexion n'a pu être établie car l'ordinateur cible l'a expressément refusée
What is the most efficient way to format sys.exc_info()[1] ?
value is an instance of the Error class. You want to format it as a string. This is impossible. It seems you want to get the message associated with the error. That message can be found in value.message. Try this:
try:
ftp = FTP(trec.address)
ftp.login(trec.login, trec.passw)
s = ftp.retrlines('LIST ' + trec.filetype)
ftp.quit()
except:
type, value, tb = sys.exc_info()
reponse = "%s" % value.message
OK, the best way I found is to use traceback like this :
import traceback
def trace_except(sysexecinfo, smessage = ''):
""" Trace exceptions """
exc_type, exc_value, exc_traceback = sysexecinfo
i, j = (traceback.extract_tb(exc_traceback, 1))[0][0:2]
k = (traceback.format_exception_only(exc_type, exc_value))[0]
trace('E:'+ 'Err : ' + smessage + k + i + ', ligne ' + str(j))
return k
try:
ftp = FTP(trec.address)
ftp.login(trec.login, trec.passw)
s = ftp.retrlines('LIST ' + trec.filetype)
ftp.quit()
except:
reponse = trace_except(sys.exc_info())

Categories

Resources