I'm trying to encrypt the pdf file and then trying to decrypt to get its data with fernet key.
i'm able to encrypt it successfully but while decrypting it, i'm getting a binary stream not the actual data, please help. (assume all the needed modules are imported and pdf as data as Hi, how are you in 2 lines)
Encryption:
def encrypt_file(file_path,file_name):
try:
fernet=Fernet(fernet_key)
print("Created fernet object")
file=os.path.join(file_path,file_name)
with open(file,'rb') as f:
data=f.read()
try:
data_enc=fernet.encrypt(data)
except Exception as e:
e_msg="".join(traceback.format_exception(*sys.exc_info()))
print("An occured during data encryption, reason: "+str(e)+"Error: "+e_msg)
return False
with open(file,'wb') as f:
f.write(data_enc)
print("Encryption Successful")
except Exception as e:
print("An occured while encrypting the file, reason: "+str(e)+"Error: "+e_msg)
return False
return True
Decryption:
def decrypt_data(file_path,file_name):
try:
data=''
fernet=Fernet(fernet_key)
file=os.path.join(file_path,file_name)
with open(file,'rb') as f:
data_enc=f.read()
try:
data=fernet.decrypt(data_enc)
data=data.decode()
except Exception as e:
e_msg="".join(traceback.format_exception(*sys.exc_info()))
print("An occured during data decryption, reason: "+str(e)+"Error: "+e_msg)
except Exception as e:
e_msg="".join(traceback.format_exception(*sys.exc_info()))
print("An occured while decrypting the file, reason: "+str(e)+"Error: "+e_msg)
return False
return data
OUTPUT (trimmed)
ZxM6cMB3Ou8xWZQ4FpZVUKelqo11TcJr_Js7LFo-0XpU05hsIX0pz88lqEfLmY_TSZQWHuYb1yulBT3FYBTd-QU0RqPlPsCSkH3z_LIHyIie5RO7Rztgxs2Y2zyAzkoNQ9M52hhqNgybTE8K_OzQGb9clOTKdkidCW4VTH77HGbSP1EK-x3lTTmVVf0m-
If you just want to encrypt and decrypt a pdf file, you don't need the data=data.decode(). Instead, you can write to an output pdf by appending the code below to your decrypt_data function.
f=open(os.path.join(file_path, "output.pdf"), "wb")
f.write(data)
Now if you open output.pdf, it will be the decrypted pdf.
If you only want a string with the readable text in the pdf, then it may help to look into pdf reading libraries such as PyPDF2.
Related
Just new to Python and pandas. I like pandas because it, so far as my need at the moment is, is easy to read from and write to excel sheets.
However I like to put the reading in a try, like the following code that I use for 'normal' files.
filename = "./_csv/Orders.xlsx"
_sheetname = "Orders"
try:
with open(filename, 'r') as f:
content = f.read()
if not content:
print("no data in file " + fileName)
#do create a new datagram
with open(fileName, 'w') as outp:
# add more content
add_more_content()
outp.write(content)
except IOError as e:
print("I/O error({0}): {1}".format(e.errno, e.strerror))
except:
print("Unexpected error:", sys.exc_info()[0])
But I would like to use something similar when opening a csv or xlsx file using pandas.
result = pandas.read_excel("./_csv/Orders.xlsx", sheet_name="Orders", header=None)
How should I continue with try and with?
filename = "./_csv/Orders.xlsx"
_sheetname = "Orders"
try
with ?? pandas.read_excel(filename, sheet_name=_sheetname, header=None) as ???
add_more_content()
#and save it.
except IOError as e:
print("I/O error({0}): {1}".format(e.errno, e.strerror))
except:
print("Unexpected error:", sys.exc_info()[0])
By everything that I did try I get "Unexpected error: <class 'AttributeError'>".
For the moment I check with something similar as the first top sample code if the file exists, and if there is content, but is of course expensive when I have to read 400Mb on data.
I see at the Pandas doc's no sample or any note about this.
Any suggestions?
This is how you do it:
import pandas as pd
filename = "./_csv/Orders.xlsx"
_sheetname = "Orders"
try:
df= pd.read_excel(filename, sheet_name=_sheetname, header=None)
except FileNotFoundError as e:
print("FileNotFoundError({0}): {1}".format(e.errno, e.strerror))
except pd.errors.EmptyDataError as e:
print(e)
I want to gather all PDF files from my computer and extract the text from each one. Both functions that I have currently do that, however, some PDF files are giving me this error:
raise PDFPasswordIncorrect
pdfminer.pdfdocument.PDFPasswordIncorrect
I raised the error in the function that open and reads the PDF files, and that seemed to work in terms of ignoring the error but now its ignoring all the PDF files including the good ones that were not an issue before.
How can I make it so it only ignores the PDF files that give me this error and not every single PDF?
def pdfparser(x):
try:
raise PDFPasswordIncorrect(pdfminer.pdfdocument.PDFPasswordIncorrect)
fp = open(x, 'rb')
rsrcmgr = PDFResourceManager()
retstr = io.StringIO()
codec = 'utf-8'
laparams = LAParams()
device = TextConverter(rsrcmgr, retstr, codec=codec, laparams=laparams)
# Create a PDF interpreter object.
interpreter = PDFPageInterpreter(rsrcmgr, device)
# Process each page contained in the document.
except (RuntimeError, TypeError, NameError,ValueError,IOError,IndexError,PermissionError):
print("Error processing {}".format(name))
for page in PDFPage.get_pages(fp):
interpreter.process_page(page)
data = retstr.getvalue()
return(data)
def pdfs(files):
for name in files:
try:
IP_list = (pdfparser(name))
keyword = re.findall(inp,IP_list)
file_dict['keyword'].append(keyword)
file_dict['name'].append(name.name[0:])
file_dict['created'].append(time.ctime(name.stat().st_ctime))
file_dict['modified'].append(time.ctime(name.stat().st_mtime))
file_dict['path'].append(name)
file_dict["content"].append(IP_list)
except (RuntimeError, TypeError, NameError,ValueError,IOError,IndexError,PermissionError):
print("Error processing {}".format(name))
#print(file_dict)
return(file_dict)
pdfs(files)
Why are you manually raising an error that would happen if you opened an Pdf that is password protected if you do not supply the correct password?
This error is raised by your code every time!
Instead you need to catch the error if it happens and skip that file. See corrected code:
def pdfparser(x):
try:
# try to open your pdf here - do not raise the error yourself!
# if it happens, catch and handle it as well
except PDFPasswordIncorrect as e: # catch PDFPasswordIncorrect
print("Error processing {}: {}".format(name,e)) # with all other errors
# no sense in doing anything if you got an error until here
return None
# do something with your pdf and collect data
data = []
return(data)
def pdfs(files):
for name in files:
try:
IP_list = pdfparser(name)
if IP_list is None: # unable to read for whatever reasons
continue # process next file
# do stuff with your data if you got some
# most of these errors are already handled inside pdfparser
except (RuntimeError, TypeError, NameError,ValueError,
IOError,IndexError,PermissionError):
print("Error processing {}".format(name))
return(file_dict)
pdfs(files)
The second try/catch: in def pdfs(files): can be shrunk down, all the file related errors happen inside def pdfparser(x): and are handled there. The rest of your code is incomplete and references stuff I do not know about:
file_dict
inp
name # used as filehandle for .stat() but is a string etc
I saved tweets in a json file
This is my code :
def on_data(self, data):
try:
with codecs.open('python.json', 'a', encoding='utf-8') as f:
f.write(data)
print("Tweet ajoute au JSON")
return True
except BaseException as e:
print("Error on_data: %s" % str(e))
return True
but I get this type of character : \u0e40\u0e21\u0e19\u0e0a
I tried everything to not have this kind of character but nothing works(utf-8, latin2...)
If you want the non-ascii characters encoded directly in the JSON file, you need to encode JSON with the ensure_ascii=False option.
I am trying to scan in a text document that I have and then find certain sections and output it to a file in json format.
Unfortunatly I am not to sure how to use json and would appricate it if someone could tell me how to encode it as json properly.
Thank you everyone!
#save word and type to database
word = [{'WORD':strWrd , 'TYPE':strWrdtyp}]
with open(input_lang+'.dic', 'a') as outfile:
try:
json.dump(word, outfile)
outfile.write('\n')
outfile.close
except (TypeError, ValueError) as err:
print 'Error:', err
I have dilemma.. I'm uploading files both in scribd store and blobstore using tipfy as framework.
I have webform with action is not created by blobstore.create_upload_url (i'm just using url_for('myhandler')). I did it because if i'm using blobstore handler the POST response parsed and I cannot use normal python-scribd api to upload file into scribd store.
Now I have working scribd saver:
class UploadScribdHandler(RequestHandler, BlobstoreUploadMixin):
def post(self):
uploaded_file = self.request.files.get('upload_file')
fname = uploaded_file.filename.strip()
try:
self.post_to_scribd(uploaded_file, fname)
except Exception, e:
# ... get the exception message and do something with it
msg = e.message
# ...
# reset the stream to zero (beginning) so the file can be read again
uploaded_file.seek(0)
#removed try-except to see debug info in browser window
# Create the file
file_name = files.blobstore.create(_blobinfo_uploaded_filename=fname)
# Open the file and write to it
with files.open(file_name, 'a') as f:
f.write(uploaded_file.read())
# Finalize the file. Do this before attempting to read it.
files.finalize(file_name)
# Get the file's blob key
blob_key = files.blobstore.get_blob_key(file_name)
return Response('done')
def post_to_scribd(self, uploaded_file, fname):
errmsg =''
uploaded_file = self.request.files.get('upload_file')
fname = uploaded_file.filename.strip()
fext = fname[fname.rfind('.')+1:].lower()
if (fext not in ALLOWED_EXTENSION):
raise Exception('This file type does not allowed to be uploaded\n')
if SCRIBD_ENABLED:
doc_title = self.request.form.get('title')
doc_description = self.request.form.get('description')
doc_tags = self.request.form.get('tags')
try:
document = scribd.api_user.upload(uploaded_file, fname, access='private')
#while document.get_conversion_status() != 'DONE':
# time.sleep(2)
if not doc_title:
document.title = fname[:fname.rfind('.')]
else:
document.title = doc_title
if not doc_description:
document.description = 'This document was uploaded at ' + str(datetime.datetime.now()) +'\n'
else:
document.description = doc_description
document.tags = doc_tags
document.save()
except scribd.ResponseError, err:
raise Exception('Scribd failed: error code:%d, error message: %s\n' % (err.errno, err.strerror))
except scribd.NotReadyError, err:
raise Exception('Scribd failed: error code:%d, error message: %s\n' % (err.errno, err.strerror))
except:
raise Exception('something wrong exception')
As you can see it also saves file into blobstore.. But If i'm uploading big file (i.e. 5Mb) I'm receiving
RequestTooLargeError: The request to API call file.Append() was too large.
Request: docs.upload(access='private', doc_type='pdf', file=('PK\x03\x04\n\x00\x00\x00\x00\x00"\x01\x10=\x00\x00(...)', 'test.pdf'))
How can I fix it?
Thanks!
You need to make multiple, smaller calls to the file API, for instance like this:
with files.open(file_name, 'a') as f:
data = uploaded_file.read(65536)
while data:
f.write(data)
data = uploaded_file.read(65536)
Note that the payload size limit on regular requests to App Engine apps is 10MB; if you want to upload larger files, you'll need to use the regular blobstore upload mechanism.
finally i found solution.
Nick Johneson's answer occurred attribute error because uploaded_file is treated as string.
string didn't have read() method.
Cause string doesn't have method read(), i spliced file string and write it just like he wrote.
class UploadRankingHandler(webapp.RequestHandler):
def post(self):
fish_image_file = self.request.get('file')
file_name = files.blobstore.create(mime_type='image/png', _blobinfo_uploaded_filename="testfilename.png")
file_str_list = splitCount(fish_image_file,65520)
with files.open(file_name, 'a') as f:
for line in file_str_list:
f.write(line)
you can check about splitCount(). here
http://www.bdhwan.com/entry/gaewritebigfile