Cannot post a zip file in Python. Unicode decoding error - python

When trying to submit a zip file using urllib2 I am getting a UnicodeDecodeError with the following messages:
Exception during urlopen: 'ascii' codec can't decode byte 0xf1 in position 12: ordinal not in range(128)
Exception: 'ascii' codec can't decode byte 0xf1 in position 12: ordinal not in range(128)
Exception of type: <type 'exceptions.UnicodeDecodeError'>
Exception. Message: "". Doc: "Unicode decoding error.".
Exception during export:
e.__doc__=Unicode decoding error.
The exception is raised on the line response = urllib2.urlopen(request).
def depositZipFile(tempZipFileName, tempZipFilePath, depositUrl, tr):
print('depositZipFile(). tempZipFileName=%s, tempZipFilePath=%s, depositUrl=%s, tr=%s' % (tempZipFileName, tempZipFilePath, depositUrl, str(tr)))
with open(tempZipFilePath, 'rb') as f:
zipData = f.read()
print('depositZipFile(). type(zipData)=%s' % type(zipData))
headers = {
'In-Progress': 'true',
'Content-Disposition': 'filename=' + tempZipFileName,
'Content-Type': 'application/zip',
'Content-Length': os.stat(tempZipFilePath).st_size,
'Content-Transfer-Encoding': 'binary',
'Packaging': 'http://purl.org/net/sword/package/METSDSpaceSIP',
}
try:
request = urllib2.Request(depositUrl, data=zipData, headers=headers)
try:
response = urllib2.urlopen(request)
except Exception as e:
print('Exception during urlopen: ' + str(e))
raise e
print('Got response. response=%s' % str(response))
xmlText = response.read()
xmlRoot = ET.fromstring(xmlText)
linkElement = xmlRoot.find('xmlns:link[#rel="alternate"]', namespaces=dict(xmlns='http://www.w3.org/2005/Atom'))
if linkElement is None:
raise ValueError('No redirection URL is found in the response.')
href = linkElement.attrib['href']
return href
except urllib2.HTTPError as e:
print('HTTPError: ' + str(e))
print('HTTPError: %s' % str(e.code))
print('HTTPError message: %s' % e.read())
raise e
except Exception as e:
print('Exception: ' + str(e))
print('Exception of type: %s' % type(e))
print('Exception. Message: "%s". Doc: "%s".' % (e.message, e.__doc__))
raise e
Before the aforementioned method is called the user is authenticated using basic authentication. See the following method.
def authenticateUser(tr, url):
user = getConfigurationProperty(tr, 'user')
password = getConfigurationProperty(tr, 'password')
realm = getConfigurationProperty(tr, 'realm')
pm = urllib2.HTTPPasswordMgr()
pm.add_password(realm, url, user, password)
authHandler = urllib2.HTTPBasicAuthHandler(pm)
opener = urllib2.build_opener(authHandler)
urllib2.install_opener(opener)
I am very new to Python and maybe I am missing something obvious. Please advise.
I am using Python 2.7, Jython implementation.

Aparently the problem was that the type of depositUrl was unicode instead of str. Therefore, the urllib2.Request() method was expecting unicode types for all parameters. When I made the following conversion everything srtarted working:
depositUrl = str(depositUrl)

Related

ERROR uploading: 'latin-1' codec can't encode character '\u2019' with JSON data upload

I am using python to upload some JSON data to the application UI but getting following error.
ERROR uploading: 'latin-1' codec can't encode character '\u2019' in position 5735: Body ('â') is not valid Latin-1. Use body.encode('utf-8') if you want to send it encoded in UTF-8.
The program takes the input from a sample.json file which includes a special character ( ' ) and that's giving the error.
Value: amex%?'
My code looks like:
def read_from_file(file_path, target_path=None):
try:
f = open(file_path, "r")
data = json.load(f)
f.close()
if target_path:
result_obj = []
for obj in data:
if target_path in obj['Key']:
result_obj.append(obj)
data = result_obj
except Exception as e:
print ("ERROR reading file:", e, file=sys.stderr)
exit(1)
return data
def upload(server, token, data):
params = {"token": token}
for obj in data:
try:
payload = obj['Value']
url = server + obj['Key']
response = requests.put(url, data=payload, params=params)
if response.status_code != 200:
raise Exception("HTTP code %s on PUT %s" % (response.status_code, url))
except Exception as e:
print ("ERROR uploading:", e, file=sys.stderr)
exit(1)
Can somebody please advise where I need to change my code to include special character ( ' ) while upload?

Python MySQLdb upload UnicodeEncodeError

I have a problem where I can upload CSV files to MySQL, but then something happens and I get an encoding error. Can some one please review my code and tell what is wrong? I'm new to enconding.
The following snippet is how I write the CSV files that will be uploaded, the data is extracted from an MDB file using the MDN tools (mdb-export):
tableIndex = 1
for tName in tableNames:
fileName = os.path.join(csvPath, os.path.basename(mdb).split('.')[0] + '_' + tName + '.csv')
try:
p = subprocess.Popen(["mdb-export", "-H", mdb, tName], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
tableContent, error = p.communicate()
if(p.returncode != 0):
_logger.error('[%3d] Export Subprocess %d %s' % (tID, p.returncode, tableContent))
SendMdbError(tID, mdb, _logger, 'ALERT: Export Subprocess')
return(['', False])
if(error):
_logger.error('[%3d] Export Communicate %d %s' % (tID, p.returncode, error.strip()))
SendMdbError(tID, mdb, _logger, 'ALERT: Export Communicate')
return(['', False])
except Exception as ex:
_logger.exception('[%3d] Export Error' % tID)
SendMdbError(tID, mdb, _logger, 'ALERT: Export Exception')
return(['', False])
except:
_logger.exception('[%3d] Export Unexpected' % tID)
SendMdbError(tID, mdb, _logger, 'ALERT: Export Unexpected')
return(['', False])
# If no data, no need for corresponding SQL
if(len(tableContent) == 0):
emptyTables.append(tName)
# If data exists, dump data
else:
# Add the 'DriveTest' to the data to upload
tableContent = tableContent.split('\n')
tableContent = [dt + ',' + line for line in tableContent if(line)]
tableContent = '\n'.join(tableContent)
try:
with open(fileName, 'wb') as f:
f.write(tableContent)
if(_VERBOSITY):
_logger.debug('[%3d] %3d - Write CSV SIZE[%8d] FILE: %s' %(tID, tableIndex, len(tableContent.split('\n')), fileName))
tableIndex += 1
except IOError as err:
_logger.exception('[%3d] Write IOError: %s' % (tID, str(err)))
SendMdbError(tID, mdb, _logger, 'ALERT: Write IOError')
return(['', False])
except Exception as ex:
_logger.exception('[%3d] Write Exception' % tID)
SendMdbError(tID, mdb, _logger, 'ALERT: Write Exception')
return(['', False])
except:
_logger.exception('[%3d] Write Unexpected: %s' % tID)
SendMdbError(tID, mdb, _logger, 'ALERT: Write Unexpected')
return(['', False])
The following is where I upload the CSV file, and here is where I get the error:
# Upload the data
tableIndex = 0
for table in tableDDL:
try:
with warnings.catch_warnings(record=True) as war:
_logger.info('[%3d] %3d Going up... %s' %(tID, tableIndex+1, os.path.basename(mdb).split('.')[0] + '_' + table))
_sqlLock[tableIndex].acquire()
#self.cursor.execute(tableDDL[table])
self.cursor.execute(tableULD[table])
self.conn.commit()
_sqlLock[tableIndex].release()
if(war):
#if(_VERBOSITY): print('[%3d] %3d WARNINGS[%3d] %s' % (tID, tableIndex+1, len(war), os.path.basename(mdb).split('.')[0] + '_' + table))
_logger.warning('[%3d] %3d WARNINGS[%3d] %s' % (tID, tableIndex+1, len(war), os.path.basename(mdb).split('.')[0] + '_' + table))
for w in war:
_logger.warning('[%3d] %s' % (tID, w.message))
#if(_VERBOSITY): print('[%3d] %3d Uploaded %s' % (tID, tableIndex+1, os.path.basename(mdb).split('.')[0] + '_' + table))
_logger.info('[%3d] %3d Uploaded %s' % (tID, tableIndex+1, os.path.basename(mdb).split('.')[0] + '_' + table))
tableIndex += 1
# Remove the uploaded CSV file
try:
os.remove(csvFiles[table]+'.csv')
_logger.info('[%3d] Removed CVS file: %s' % (tID, csvFiles[table]+'.csv'))
except OSError:
pass
except (MySQLdb.InternalError, MySQLdb.NotSupportedError) as err:
_logger.error('[%3d] %3d Internal: %s %s' % (tID, tableIndex+1, err, sys.exc_info()[0]))
self.conn.rollback()
self.Disconnect(tID, _logger, _VERBOSITY, _DEBUG)
return(False)
except MySQLdb.OperationalError as err:
_logger.error('[%3d] %3d OperationalError: %s' % (tID, tableIndex+1, sys.exc_info()[0]))
_logger.error(err)
self.conn.rollback()
self.Disconnect(tID, _logger, _VERBOSITY, _DEBUG)
return(False)
except MySQLdb.ProgrammingError as err:
_logger.error('[%3d] %3d ProgrammingError: %s' % (tID, tableIndex+1, sys.exc_info()[0]))
_logger.error(err)
self.conn.rollback()
self.Disconnect(tID, _logger, _VERBOSITY, _DEBUG)
return(False)
except MySQLdb.Error as err:
_logger.error('[%3d] %3d QUERY: %s %s' % (tID, tableIndex+1, err, sys.exc_info()[0]))
self.conn.rollback()
self.Disconnect(tID, _logger, _VERBOSITY, _DEBUG)
return(False)
except Exception as err:
_logger.error('[%3d] %3d Exception: %s %s' % (tID, tableIndex+1, err, sys.exc_info()[0]))
#self.conn.rollback()
#self.Disconnect(tID, _logger, _VERBOSITY, _DEBUG)
#return(False)
pass
except:
_logger.error('[%3d] %3d Other: %s' % (tID, tableIndex+1, sys.exc_info()[0]))
self.conn.rollback()
self.Disconnect(tID, _logger, _VERBOSITY, _DEBUG)
return(False)
The error I get is the following:
2015-06-13 19:42:21,743 __main__ - ERROR - [ 1] 1 Exception: 'ascii' codec can't encode character u'\xb4' in position 40: ordinal not in range(128) <type 'exceptions.UnicodeEncodeError'>
2015-06-13 19:42:30,962 __main__ - ERROR - [ 1] 1 Exception: 'ascii' codec can't encode character u'\xb4' in position 27: ordinal not in range(128) <type 'exceptions.UnicodeEncodeError'>
I noticed that the given data gets uploaded, but not sure if all rows are uploaded.
Thanks!
Try before putting csv into DB s.decode('UTF-8') and after getting it out of the DB s.encode('UTF-8')
I did it for SQLite and it worked OK.
Getting this to work should not be too difficult, but you have to understand what you're doing. Don't just try all possible combinations of s.encode("UTF-8").decode("UTF-8") and stuff like that.
First, understand the difference between a string and bytes. See https://docs.python.org/3/howto/unicode.html. You can encode a string to bytes: bytes = text.encode("UTF-8"), and you can decode bytes to a string: text = bytes.decode("UTF-8")
Second since a CSV file is a text file, you should open the CSV file in text mode. open(fileName, 'w', encoding="utf-8"). There's no need to encode or decode text in your code when writing the file.
Third, it is perfectly OK to write Unicode text to a TEXT field. No need for BINARYs or BLOBs. But make sure your database has a collation setting that can deal with it, usually that would be one of the utf-8 collations. Then to put Unicode in your database, use python strings and don't decode them to bytes.
The error message implies that the column definition in MySQL is CHARACTER SET ascii; is that correct?
B4 sounds like the latin1 (not utf8) encoding for ´, which could be coming from a Microsoft Word document in a context such as it´s.
So, even changing the column to be CHARACTER SET utf8 won't fix the problem.
BINARY and BLOB are essentially the same type of field -- any byte is allowed. VARCHAR and TEXT validate the bytes during INSERT to make sure they match the CHARACTER SET.

Python Utf-8 writing into CSV

I fail to save already encoded data into CSV. I could decode the CSV file afterwards, but I rather do all data cleaning before. I managed to save only text, but when I add timestamp it is impossible.
What I am doing wrong? I read that if srt() and .encode() is not working and should try .join instead, but still nothing
error:
UnicodeDecodeError: 'ascii' codec can't decode byte 0xc3 in position 4: ordinal not in range(128)
code:
def on_data(self, data):
try:
#print data
tweet = data.split(',"text":"')[1].split('","source')[0]
x = tweet.encode('utf-8')
y = x.decode('unicode-escape')
print y
saveThis = y
#saveThis = str(time.time())+'::' + tweet.decode('ascii', 'ignore')
#saveThis = u' '.join((time.time()+'::'+tweet)).encode('utf-8')
saveFile = open('twitDB.csv', 'a')
saveFile.write(saveThis)
saveFile.write('\n')
saveFile.close()
return True
except BaseException, e:
print 'fail on data,', str(e)
time.sleep(5)
def on_error(self, status):
print status
First of all, make sure you handle your JSON data properly, using the json module.
Next, don't catch BaseException, you have no reason to catch memory errors or keyboard interrupts here. Catch more specific exceptions, instead.
Next, encode your data before writing:
def on_data(self, data):
try:
tweet = json.loads(data)['text']
except (ValueError, KeyError), e:
# Not JSON or no text key
print 'fail on data {}'.format(data)
return
with open('twitDB.csv', 'a') as save_file:
save_file.write(tweet.encode('utf8') + '\n')
return True

How do I simulate connection errors and request timeouts in python unit tests

Suppose my django/flask application pulls in information from API's, how can I test that connection exceptions are caught and handled properly?
So for example here is a function that calls an API:
import requests
def call_the_api():
url = 'http://httpbin.org/get'
try:
req = requests.get(url)
if req.json().get('errors'):
logger.warn("API error response")
return {'request_error': 'api_error_response'}
except requests.exceptions.ConnectionError:
logger.warn('ConnectionError')
return {'request_error': 'ConnectionTimeout'}
except requests.exception.Timeout:
logger.warn('API request timed out')
return {'request_error': 'Timeout'}
except Exception, ex:
logger.warn("API request Exception: %s", ex)
return {'request_error': ex}
else:
return req.json()
For testing responses from the API I found mock to be very useful.
def mock_get_request():
response = requests.get.return_value
json_file = 'sample_response.json'
json_file_path = os.path.join(os.path.dirname(__file__), json_file)
with open(json_file_path, 'r') as f:
response.content = response.text = f.read()
response.status_code = 200
response.encoding = 'utf-8'
response.json = lambda: json.loads(response.content.decode(response.encoding))
response.url = u'%s' % args[0]
return response
class TestSuitabilityFunctions(TestCase):
def test_call_the_api(self):
requests.get = MagicMock(side_effect=mock_get_request)
resp = call_the_api()
self.assertEqual(resp.get('url'), "http://httpbin.org/get")
So my question is how would I go about simulating a connection timeout or error?
Untested code but...
def connection_error():
raise requests.exceptions.ConnectionError
class TestSuitabilityFunctions(TestCase):
#patch.object(module_that_youre_testing, "requests")
def test_connection_error(self, mock_requests):
mock_requests.get = MagicMock(side_effect=connection_error)
with self.assertRaises(requests.exceptions.ConnectionError) as cm:
resp = call_the_api()
exception = cm.exception
self.assertEqual(resp, {'request_error': 'ConnectionTimeout'})
... or similar should do the trick. Off the top of my head I can't remember how assertRaises interacts with errors that are caught. Maybe you don't even need the assertRaises part.

sys.exc_info()[1] type and format in Python 2.71

In python 2.71 on Windows XP I need to use FTP.
My code is :
try:
ftp = FTP(trec.address)
ftp.login(trec.login, trec.passw)
s = ftp.retrlines('LIST ' + trec.filetype)
ftp.quit()
except:
(type, value, tb) = sys.exc_info()
reponse = "%s" % value
But I have an error on the last line :
UnicodeDecodeError: 'ascii' codec can't decode byte 0xea in position 38: ordinal not in range(128)
As I am in French Windows env. the sys.exc_info()[1] is : [Errno 10061] Aucune connexion n'a pu être établie car l'ordinateur cible l'a expressément refusée
What is the most efficient way to format sys.exc_info()[1] ?
value is an instance of the Error class. You want to format it as a string. This is impossible. It seems you want to get the message associated with the error. That message can be found in value.message. Try this:
try:
ftp = FTP(trec.address)
ftp.login(trec.login, trec.passw)
s = ftp.retrlines('LIST ' + trec.filetype)
ftp.quit()
except:
type, value, tb = sys.exc_info()
reponse = "%s" % value.message
OK, the best way I found is to use traceback like this :
import traceback
def trace_except(sysexecinfo, smessage = ''):
""" Trace exceptions """
exc_type, exc_value, exc_traceback = sysexecinfo
i, j = (traceback.extract_tb(exc_traceback, 1))[0][0:2]
k = (traceback.format_exception_only(exc_type, exc_value))[0]
trace('E:'+ 'Err : ' + smessage + k + i + ', ligne ' + str(j))
return k
try:
ftp = FTP(trec.address)
ftp.login(trec.login, trec.passw)
s = ftp.retrlines('LIST ' + trec.filetype)
ftp.quit()
except:
reponse = trace_except(sys.exc_info())

Categories

Resources