reportlab.pdfgen generating corrupted PDF

reportlab.pdfgen generating corrupted PDF - python

I have a Django Python 3.6.5 view that's intended to write and export a report to PDF using reportlab.pdfgen. I can get the PDF file to generate when the view is called, but the .pdf file that's generated is corrupted. I'm following reportlab's documentation as best as I can tell, is there an issue with how I'm creating and writing the PDF?
from datetime import datetime
from django.http import FileResponse
from io import BytesIO
from reportlab.lib.pagesizes import letter
from reportlab.pdfgen import canvas
from django.conf import settings
from django.core.files.storage import get_storage_class
from django.http import HttpResponse
from django.shortcuts import redirect
from app.models import Organization
from app.models import Question, Report
# add a footer to the PDF
def add_pdf_footer(pdf):
date_str = datetime.now().strftime("%Y-%m-%d")
pdf.saveState()
width, height = letter
pdf.setFont("Helvetica", 9)
text = f"Exported on {date_str}"
pdf.drawRightString(width - 50, 700, text)
pdf.restoreState()
def export_report_pdf(request, report_id):
org = Organization.objects.get(id=request.session["org"])
try:
report = Report.objects.get(id=report_id, org=org)
# build file
response = HttpResponse(content_type='application/pdf')
filename = f"{org.name}_{report.report_type.standard}_report_{report.year}_export_{datetime.now().strftime('%Y-%m-%d_%H-%M-%S')}.pdf"
response["Content-Disposition"] = f"attachment; filename={filename}"
buffer = BytesIO()
sections = report.get_sections()
headers = ["standard", "topic", "code", "question", "value", "units", "note"]
if len(sections) > 1:
headers = ["standard", "topic", "section", "code", "question", "value", "units", "note"]
answers = report.get_or_create_answers(request.user)
answers_to_export = []
for answer in answers:
question = answer.question
if question.dependency is not None:
dependency = question.dependency
dep_answer = list(filter(lambda x: x.question == dependency, answers))[0]
if question.is_dependency_resolved(dep_answer):
answers_to_export.append(answer)
else:
answers_to_export.append(answer)
pdf = canvas.Canvas(buffer, pagesize=letter)
pdf.setFont("Helvetica", 15)
pdf.drawString(100,800, "{org.name} - {report.year} {report.report_type.standard} - {report.report_type.name} Report")
pdf.setFont("Helvetica", 12)
y = 750
for header in headers:
pdf.drawString(100, y, header)
y -= 25
for answer in answers_to_export:
qtype = answer.question.question_type
if qtype == Question.QUESTION_TYPE_MULTISELECT:
value = ", ".join([ans.value for ans in answer.values_selected.all()])
elif qtype == Question.QUESTION_TYPE_SELECT:
value = answer.value_selected.value if answer.value_selected else ""
elif qtype == Question.QUESTION_TYPE_DATE:
value = answer.value_date.strftime("%Y-%m-%d")
else:
value = answer.value
if len(sections) > 1:
pdf.drawString(180, y, answer.question.section)
pdf.drawString(260, y, answer.question.code)
if answer.question.text:
pdf.drawString(340, y, answer.question.text)
if value:
pdf.drawString(420, y, value)
if answer.question.units:
pdf.drawString(500, y, answer.question.units)
if answer.note:
pdf.drawString(580, y, answer.note)
y -= 30
else:
pdf.drawString(260, y, answer.question.code)
if answer.question.text:
pdf.drawString(340, y, answer.question.text)
if value:
pdf.drawString(420, y, value)
if answer.question.units:
pdf.drawString(500, y, answer.question.units)
if answer.note:
pdf.drawString(580, y, answer.note)
y -= 30
# set PDF export footer
add_pdf_footer(pdf)
pdf.save()
# return file
return response
except:
log.error(traceback.print_exc())
return redirect("admin-report")

Related

Trying to parse Word Documents and getting PdfReadError: EOF marker not found

I am testing some Python code to loop through resumes, open each, parse each, and create a comprehensive report based on the contents of each resume. Here is the code that I am running.
#importing all required libraries
import PyPDF2
import os
from os import listdir
from os.path import isfile, join
from io import StringIO
import pandas as pd
from collections import Counter
import en_core_web_sm
nlp = en_core_web_sm.load()
from spacy.matcher import PhraseMatcher
#Function to read resumes from the folder one by one
mypath='C:\\path_to_resumes\\' #enter your path here where you saved the resumes
onlyfiles = [os.path.join(mypath, f) for f in os.listdir(mypath) if os.path.isfile(os.path.join(mypath, f))]
def pdfextract(file):
fileReader = PyPDF2.PdfFileReader(open(file,'rb'))
countpage = fileReader.getNumPages()
count = 0
text = []
while count < countpage:
pageObj = fileReader.getPage(count)
count +=1
t = pageObj.extractText()
print (t)
text.append(t)
return text
#function to read resume ends
#function that does phrase matching and builds a candidate profile
def create_profile(file):
text = pdfextract(file)
text = str(text)
text = text.replace("\\n", "")
text = text.lower()
#below is the csv where we have all the keywords, you can customize your own
keyword_dict = pd.read_csv('D:/NLP_Resume/resume/template_new.csv')
stats_words = [nlp(text) for text in keyword_dict['Statistics'].dropna(axis = 0)]
NLP_words = [nlp(text) for text in keyword_dict['NLP'].dropna(axis = 0)]
ML_words = [nlp(text) for text in keyword_dict['Machine Learning'].dropna(axis = 0)]
DL_words = [nlp(text) for text in keyword_dict['Deep Learning'].dropna(axis = 0)]
R_words = [nlp(text) for text in keyword_dict['R Language'].dropna(axis = 0)]
python_words = [nlp(text) for text in keyword_dict['Python Language'].dropna(axis = 0)]
Data_Engineering_words = [nlp(text) for text in keyword_dict['Data Engineering'].dropna(axis = 0)]
matcher = PhraseMatcher(nlp.vocab)
matcher.add('Stats', None, *stats_words)
matcher.add('NLP', None, *NLP_words)
matcher.add('ML', None, *ML_words)
matcher.add('DL', None, *DL_words)
matcher.add('R', None, *R_words)
matcher.add('Python', None, *python_words)
matcher.add('DE', None, *Data_Engineering_words)
doc = nlp(text)
d = []
matches = matcher(doc)
for match_id, start, end in matches:
rule_id = nlp.vocab.strings[match_id] # get the unicode ID, i.e. 'COLOR'
span = doc[start : end] # get the matched slice of the doc
d.append((rule_id, span.text))
keywords = "\n".join(f'{i[0]} {i[1]} ({j})' for i,j in Counter(d).items())
## convertimg string of keywords to dataframe
df = pd.read_csv(StringIO(keywords),names = ['Keywords_List'])
df1 = pd.DataFrame(df.Keywords_List.str.split(' ',1).tolist(),columns = ['Subject','Keyword'])
df2 = pd.DataFrame(df1.Keyword.str.split('(',1).tolist(),columns = ['Keyword', 'Count'])
df3 = pd.concat([df1['Subject'],df2['Keyword'], df2['Count']], axis =1)
df3['Count'] = df3['Count'].apply(lambda x: x.rstrip(")"))
base = os.path.basename(file)
filename = os.path.splitext(base)[0]
name = filename.split('_')
name2 = name[0]
name2 = name2.lower()
## converting str to dataframe
name3 = pd.read_csv(StringIO(name2),names = ['Candidate Name'])
dataf = pd.concat([name3['Candidate Name'], df3['Subject'], df3['Keyword'], df3['Count']], axis = 1)
dataf['Candidate Name'].fillna(dataf['Candidate Name'].iloc[0], inplace = True)
return(dataf)
#function ends
#code to execute/call the above functions
final_database=pd.DataFrame()
i = 0
while i < len(onlyfiles):
file = onlyfiles[i]
dat = create_profile(file)
final_database = final_database.append(dat)
i +=1
print(final_database)
#code to count words under each category and visulaize it through Matplotlib
final_database2 = final_database['Keyword'].groupby([final_database['Candidate Name'], final_database['Subject']]).count().unstack()
final_database2.reset_index(inplace = True)
final_database2.fillna(0,inplace=True)
new_data = final_database2.iloc[:,1:]
new_data.index = final_database2['Candidate Name']
#execute the below line if you want to see the candidate profile in a csv format
#sample2=new_data.to_csv('sample.csv')
import matplotlib.pyplot as plt
plt.rcParams.update({'font.size': 10})
ax = new_data.plot.barh(title="Resume keywords by category", legend=False, figsize=(25,7), stacked=True)
labels = []
for j in new_data.columns:
for i in new_data.index:
label = str(j)+": " + str(new_data.loc[i][j])
labels.append(label)
patches = ax.patches
for label, rect in zip(labels, patches):
width = rect.get_width()
if width > 0:
x = rect.get_x()
y = rect.get_y()
height = rect.get_height()
ax.text(x + width/2., y + height/2., label, ha='center', va='center')
plt.show()
In the folder, I have '.doc' and '.docx' files. Everything seems to work fine, up until this point, directly below. When I get here, the code throws an error. Here is the troublesome code. The weird thing is, that it looks like some kind of PDF error, but I'm iterating only through '.doc' and '.docx' files.
final_database=pd.DataFrame()
i = 0
while i < len(onlyfiles):
file = onlyfiles[i]
dat = create_profile(file)
final_database = final_database.append(dat)
i +=1
print(final_database)
Here is the StackTrace:
Traceback (most recent call last):
File "<ipython-input-2-c63fca79d39f>", line 5, in <module>
dat = create_profile(file)
File "<ipython-input-1-cdc3bf75cd26>", line 34, in create_profile
text = pdfextract(file)
File "<ipython-input-1-cdc3bf75cd26>", line 17, in pdfextract
fileReader = PyPDF2.PdfFileReader(open(file,'rb'))
File "C:\Users\ryans\Anaconda3\lib\site-packages\PyPDF2\pdf.py", line 1084, in __init__
self.read(stream)
File "C:\Users\ryans\Anaconda3\lib\site-packages\PyPDF2\pdf.py", line 1696, in read
raise utils.PdfReadError("EOF marker not found")
PdfReadError: EOF marker not found
The code comes from here.
https://towardsdatascience.com/do-the-keywords-in-your-resume-aptly-represent-what-type-of-data-scientist-you-are-59134105ba0d

You are using package PyPDF2, which is used to read and manipulate pdf files. In the article from towardsdatascience that you mentioned all resumes that author was working on were in pdf format.
Maybe if your resumes are in doc/docx format you should explore python-docx library:
https://python-docx.readthedocs.io/en/latest/index.html

Set Author, Title, and Subject for PDF using Reportlab

How can you correctly set the Author, Title and Subject attributes for a PDF File using Reportlab?
I have found the methods in the Reportlab User Guide on page 56, but I am not sure how to implement them correctly.
Below in my PDF cropping and scaling script, I have added the annotations method, but I don't know where to call them from, or if a whole new Canvas object is needed. Please excuse the lengthy code, but only after line 113 is the doc being created, above are mostly auxiliary methods, including the annotations method on line 30.
# All the necessary parameters are accessible after line 92,
# but can of course be changed manually in the Code
# imports for the crop, rename to avoid conflict with reportlab Image import
from PIL import Image as imgPIL
from PIL import ImageChops, ImageOps, ImageFilter
import os.path, sys
# import for the PDF creation
import glob
from reportlab.lib.pagesizes import A4
from reportlab.lib import utils
from reportlab.platypus import Image, SimpleDocTemplate, Spacer
from reportlab.pdfgen import canvas
# get os path for Cropping
path = (os.path.dirname(os.path.abspath("cropPDF.py")))
dirs = os.listdir(path)
def trim(im, border="white"):
bg = imgPIL.new(im.mode, im.size, border)
diff = ImageChops.difference(im, bg)
bbox = diff.getbbox()
if bbox:
return im.crop(bbox)
def annotations(canvas):
canvas.setAuthor("the ReportLab Team")
canvas.setTitle("ReportLab PDF Generation User Guide")
canvas.setSubject("How to Generate PDF files using the ReportLab modules")
def findMaxWidth():
maxWidth = 0
for item in dirs:
try:
fullpath = os.path.join(path, item)
if os.path.isfile(fullpath):
im = imgPIL.open(fullpath)
maxWidth = max(maxWidth, im.size[0])
except:
pass
return maxWidth
def padImages(docHeight):
maxWidth = findMaxWidth()
for item in dirs:
try:
fullpath = os.path.join(path, item)
if os.path.isfile(fullpath):
im = imgPIL.open(fullpath)
f, e = os.path.splitext(fullpath)
width, height = im.size # get the image dimensions, the height is needed for the blank image
if not docHeight <= height: # to prevent oversized images from bein padded, such that they remain centered
image = imgPIL.new('RGB', (maxWidth, height),
(255, 255, 255)) # create a white image with the max width
image.paste(im, (0, 0)) # paste the original image overtop the blank one, flush on the left side
image.save(f + ".png", "PNG", quality=100)
except:
pass
def crop():
for item in dirs:
try:
fullpath = os.path.join(path, item)
if os.path.isfile(fullpath):
im = imgPIL.open(fullpath)
f, e = os.path.splitext(fullpath)
imCrop = trim(im, "white")
imCrop.save(f + ".png", "PNG", quality=100)
except:
pass
def add_page_number(canvas, doc):
canvas.saveState()
canvas.setFont('Times-Roman', numberFontSize)
page_number_text = "%d" % (doc.page)
canvas.drawCentredString(
pageNumberSpacing * mm,
pageNumberSpacing * mm,
page_number_text
)
canvas.restoreState()
#############################
executeCrop = True
executePad = True
outputName = "output.pdf" #The name of the file that will be created
fileAuthor = "Roman Stadler" #these 3 attributes are visible in the file info menu
fileTitle = ""
fileSubject = ""
margin = 0.5
imageWidthDefault = 550
spacerHeight = 7
scalingIfImageTooTall = 0.95 # larger than 95 can result in an empty page after the image
includePagenumbers = True
numberFontSize = 10
pageNumberSpacing = 5
############################
doc = SimpleDocTemplate(
outputName,
topMargin=margin * mm,
leftMargin=margin * mm,
rightMargin=margin * mm,
bottomMargin=margin * mm,
pagesize=A4
)
if executeCrop:
crop()
if executePad:
padImages(doc.height)
filelist = glob.glob("*.png") # Get a list of files in the current directory
filelist.sort()
story = [] # create the list of images for the PDF
for fn in filelist:
img = utils.ImageReader(fn)
img_width, img_height = img.getSize() # necessary for the aspect ratio
aspect = img_height / float(img_width)
documentHeight = doc.height
imageWidth = imageWidthDefault
imageHeight = imageWidth * aspect
if imageHeight > documentHeight:
imageHeight = documentHeight * scalingIfImageTooTall
imageWidth = imageHeight / aspect
img = Image(
fn,
width=imageWidth,
height=imageHeight
)
story.append(img)
space = Spacer(width=0, height=spacerHeight)
story.append(space)
if includePagenumbers and not len(filelist) == 0: # if pagenumbers are desired, or not
doc.build(
story,
onFirstPage=add_page_number,
onLaterPages=add_page_number,
)
elif not len(filelist) == 0:
doc.build(story)
else: # to prevent an empty PDF that can't be opened
print("no files found")

In the meantime, I have found another way, that does not use reportlab, but instead relies on PyPDF2:
The following import is needed:
# PyPDF2 for the metadata modification
from PyPDF2 import PdfFileReader, PdfFileWriter
Then the metadata can be edited like this:
author = "Roman Stadler"
title = "CropPDF"
subject = "Stackoverflow"
#rest of the script
#attemp the metadate edit
try:
file = open('output.pdf', 'rb+')
reader = PdfFileReader(file)
writer = PdfFileWriter()
writer.appendPagesFromReader(reader)
metadata = reader.getDocumentInfo()
writer.addMetadata(metadata)
writer.addMetadata({
'/Author': author,
'/Title': title,
'/Subject' : subject,
'/Producer' : "CropPDF",
'/Creator' : "CropPDF",
})
writer.write(file)
file.close()
except:
print("Error while editing metadata")

You can define attributes like the author when defining the doc as a SimpleDocTemplate
doc = SimpleDocTemplate(
outputName,
topMargin=margin * mm,
leftMargin=margin * mm,
rightMargin=margin * mm,
bottomMargin=margin * mm,
pagesize=A4,
title="This is the title of the document", #exchange with your title
author="John Smith", #exchange with your authors name
subject"Adding metadata to pdf via reportlab" #exchange with your subject
)

face recognition prediction error (cv::face::Fisherfaces::predict)

OpenCV Error: Bad argument (This Fisherfaces model is not computed yet. Did you
call Fisherfaces::train?) in cv::face::Fisherfaces::predict,
cv2.error: C:\projects\opencv-python\opencv_contrib\modules\face\src\fisher_faces.cpp:137: error: (-5) This Fisherfaces model is not computed yet. Did you call Fisherfaces::train? in function cv::face::Fisherfaces::predict
import cv2
import numpy as np
import argparse
import time
import glob
import os
import sys
import subprocess
import pandas
import random
import Update_Model
import math
#Define variables and load classifier
camnumber = 0
video_capture = cv2.VideoCapture()
facecascade = cv2.CascadeClassifier("haarcascade_frontalface_default.xml")
fishface = cv2.face.FisherFaceRecognizer_create()
try:
fishface.read("trained_emoclassifier.xml")
except:
print("no trained xml file found, please run program with --update flagfirst")
parser = argparse.ArgumentParser(description="Options for the emotion-based music player")
parser.add_argument("--update", help="Call to grab new images and update the model accordingly", action="store_true")
args = parser.parse_args()
facedict = {}
actions = {}
emotions = ["angry", "happy", "sad", "neutral"]
df = pandas.read_excel("EmotionLinks.xlsx") #open Excel file
actions["angry"] = [x for x in df.angry.dropna()] #We need de dropna() when columns are uneven in length, which creates NaN values at missing places. The OS won't know what to do with these if we try to open them.
actions["happy"] = [x for x in df.happy.dropna()]
actions["sad"] = [x for x in df.sad.dropna()]
actions["neutral"] = [x for x in df.neutral.dropna()]
def open_stuff(): #Open the file, credit to user4815162342, on the stackoverflow link in the text above
if sys.platform == "win32":
os.startfile(filename)
else:
opener ="open" if sys.platform == "darwin" else "xdg-open"
subprocess.call([opener, filename])
def crop_face(clahe_image, face):
for (x, y, w, h) in face:
faceslice = clahe_image[y:y+h, x:x+w]
faceslice = cv2.resize(faceslice, (350, 350))
facedict["face%s" %(len(facedict)+1)] = faceslice
return faceslice
def update_model(emotions):
print("Model update mode active")
check_folders(emotions)
for i in range(0, len(emotions)):
save_face(emotions[i])
print("collected images, looking good! Now updating model...")
Update_Model.update(emotions)
print("Done!")
def check_folders(emotions):
for x in emotions:
if os.path.exists("dataset\\%s" %x):
pass
else:
os.makedirs("dataset\\%s" %x)
def save_face(emotion):
print("\n\nplease look " + emotion + ". Press enter when you're ready to have your pictures taken")
input() #Wait until enter is pressed with the raw_input() method
video_capture.open(0)
while len(facedict.keys()) < 16:
detect_face()
video_capture.release()
for x in facedict.keys():
cv2.imwrite("dataset\\%s\\%s.jpg" %(emotion, len(glob.glob("dataset\\%s\\*" %emotion))), facedict[x])
facedict.clear()
def recognize_emotion():
predictions = []
confidence = []
for x in facedict.keys():
pred, conf = fishface.predict(facedict[x])
cv2.imwrite("images\\%s.jpg" %x, facedict[x])
predictions.append(pred)
confidence.append(conf)
recognized_emotion = emotions[max(set(predictions), key=predictions.count)]
print("I think you're %s" %recognized_emotion)
actionlist = [x for x in actions[recognized_emotion]] #get list of actions/files for detected emotion
random.shuffle(actionlist) #Randomly shuffle the list
open_stuff(actionlist[0]) #Open the first entry in the list
def grab_webcamframe():
ret, frame = video_capture.read()
gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8,8))
clahe_image = clahe.apply(gray)
return clahe_image
def detect_face():
clahe_image = grab_webcamframe()
face = facecascade.detectMultiScale(clahe_image, scaleFactor=1.1, minNeighbors=15, minSize=(10, 10), flags=cv2.CASCADE_SCALE_IMAGE)
if len(face) == 1:
faceslice = crop_face(clahe_image, face)
recognize_emotion()
return faceslice
else:
print("no/multiple faces detected, passing over frame")
def run_detection():
while len(facedict) != 10:
detect_face()
recognize_emotion = recognize_emotion()
return recognize_emotion
if args.update:
update_model(emotions)
else:
video_capture.open(camnumber)
run_detection()
how can i resolve it?

Python3: Reportlab Image - ResourceWarning: unclosed file <_io.BufferedReader name=...>

When I run a unit test, I'm getting Python 3 unclosed buffer error on the "logo" image in the following code. How do I close the logo image buffer correctly? Please be aware that the Image class is coming from reportlab.platypus.
I have tried logo.close() and with Image(logo_path) as logo:, both of them does not work.
>>python -m unittest tests.test_sample_pdf
>>/tests/test_sample_pdf.py:51: ResourceWarning: unclosed file <_io.BufferedReader name='/Users/my_prj/statics/my-logo.gif'>
get_pdf()
Source Code
import unittest
import os
from io import BytesIO
from os.path import abspath, dirname
from reportlab.lib.colors import HexColor
from reportlab.lib.enums import TA_RIGHT
from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
from reportlab.lib.units import inch, cm, mm
from reportlab.platypus import SimpleDocTemplate, Table, TableStyle, BaseDocTemplate, Paragraph, Image, Spacer
COL_SORT = [{"headerName": "name",
"field": "name",
"width": 1000,}]
def get_pdf():
# setup PDF template
buffer = BytesIO()
side_margin = 12
col_widths = [row['width'] for row in COL_SORT]
page_width = sum(col_widths) + side_margin * 3
pdf = SimpleDocTemplate(buffer, pagesize=(page_width, 8.5 * inch), rightMargin=side_margin, leftMargin=side_margin,
topMargin=side_margin, bottomMargin=side_margin)
elements = []
# logo
parent_dir = dirname(dirname(abspath(__file__)))
logo_path = os.path.join(parent_dir, 'statics', 'my-logo.gif')
logo = Image(logo_path)
logo.hAlign = 'LEFT'
heading_style = ParagraphStyle(name='heading', fontSize=16, leading=20, spaceAfter=0,
textColor=HexColor('#ffffff'), backColor=HexColor('#465a81'))
heading_right_style = ParagraphStyle(name='heading', fontSize=16, leading=20, spaceAfter=0,
textColor=HexColor('#ffffff'), backColor=HexColor('#465a81'),
alignment=TA_RIGHT)
logo_tbl = Table([[logo]], colWidths=sum(col_widths))
logo_tbl.hAlign = 'LEFT'
logo_tbl.setStyle(TableStyle([('BACKGROUND', (0, 0), (-1, -1), HexColor('#B90002'))]))
elements.append(logo_tbl)
# build PDF
pdf.build(elements)
pdf_string = buffer.getvalue()
buffer.close()
class TestPDF(unittest.TestCase):
def test_pdf(self):
get_pdf()

It seems that reportlab expects that you open and close the image file. Use with open(logo_path, 'rb') as image_fd:.
This workaround solves the Warning. I've added the mentioned with and indented its following lines.
def get_pdf():
# setup PDF template
buffer = BytesIO()
side_margin = 12
col_widths = [row['width'] for row in COL_SORT]
page_width = sum(col_widths) + side_margin * 3
pdf = SimpleDocTemplate(buffer, pagesize=(page_width, 8.5 * inch), rightMargin=side_margin, leftMargin=side_margin,
topMargin=side_margin, bottomMargin=side_margin)
elements = []
# logo
parent_dir = dirname(dirname(abspath(__file__)))
logo_path = os.path.join(parent_dir, 'statics', 'nci-logo.gif')
with open(logo_path, 'rb') as image_fd: # edited this line
logo = Image(image_fd) # ... and this line
logo.hAlign = 'LEFT'
heading_style = ParagraphStyle(name='heading', fontSize=16, leading=20, spaceAfter=0,
textColor=HexColor('#ffffff'), backColor=HexColor('#465a81'))
heading_right_style = ParagraphStyle(name='heading', fontSize=16, leading=20, spaceAfter=0,
textColor=HexColor('#ffffff'), backColor=HexColor('#465a81'),
alignment=TA_RIGHT)
logo_tbl = Table([[logo]], colWidths=sum(col_widths))
logo_tbl.hAlign = 'LEFT'
logo_tbl.setStyle(TableStyle([('BACKGROUND', (0, 0), (-1, -1), HexColor('#B90002'))]))
elements.append(logo_tbl)
# build PDF
pdf.build(elements)
pdf_string = buffer.getvalue()
buffer.close()
Output:
$ python -m unittest tests.test_sample_pdf
.
----------------------------------------------------------------------
Ran 1 test in 0.042s
OK
I've put the complete example in Github

Setting NoteFilter in Evernote API

I have set up my Python page like so (extract):
import evernote.edam.userstore.constants as UserStoreConstants
import evernote.edam.type.ttypes as Types
from evernote.api.client import EvernoteClient
client = EvernoteClient(token=auth_token, sandbox=False)
note_store = client.get_note_store()
The problem comes with this code:
filter = note_store.NoteFilter
filter.setOrder(NoteSortOrder.UPDATED.getValue())
I would then go onto use note_store.findNotesMetadata. However, I get the error:
AttributeError: 'module' object has no attribute 'setOrder'
What am I doing wrong? I tried to adapt from the example given here

Here is a working example:
from evernote.api.client import EvernoteClient
from evernote.edam.notestore.ttypes import NoteFilter, NotesMetadataResultSpec
from evernote.edam.type.ttypes import NoteSortOrder
auth_token = 'your-token'
client = EvernoteClient(token=auth_token)
note_store = client.get_note_store()
updated_filter = NoteFilter(order=NoteSortOrder.UPDATED)
offset = 0
max_notes = 10
result_spec = NotesMetadataResultSpec(includeTitle=True)
result_list = note_store.findNotesMetadata(auth_token, updated_filter, offset, max_notes, result_spec)
# note is an instance of NoteMetadata
# result_list is an instance of NotesMetadataList
for note in result_list.notes:
print note.title

here is my FULL working code:
import os
import sys
import hashlib
import binascii
from datetime import datetime,timedelta
import logging
# sys.path.append("lib")
# sys.path.append("libs/evernote-sdk-python3")
sys.path.append("libs/evernote-sdk-python3/lib")
from evernote import *
from evernote.api import *
from evernote.api.client import *
from evernote.edam.limits import *
from evernote.edam.type import *
from evernote.edam.type.ttypes import *
from evernote.edam.notestore import *
from evernote.edam.notestore.ttypes import *
from evernote.edam.notestore.NoteStore import *
from evernote.edam.userstore import *
from evernote.edam.userstore.constants import *
ToProcessNotebook = "toProcess"
isSandbox=True
# isChina=False
isChina=True
AuthTokenDict = {
"sandbox": {
# China, https://sandbox.evernote.com/api/DeveloperToken.action
"yinxiang": "change_to_your_token",
# International
"evernote": "",
},
"production": {
"yinxiang": "",
"evernote": "",
},
}
ServiceHost = ""
AuthToken = ""
if isChina:
if isSandbox:
AuthToken = AuthTokenDict["sandbox"]["yinxiang"]
ServiceHost = "sandbox.yinxiang.com"
else:
AuthToken = AuthTokenDict["production"]["yinxiang"]
ServiceHost = "app.yinxiang.com"
else:
if isSandbox:
AuthToken = AuthTokenDict["sandbox"]["evernote"]
ServiceHost = "sandbox.evernote.com"
else:
AuthToken = AuthTokenDict["production"]["evernote"]
ServiceHost = "app.evernote.com"
gClient = None
gUserStore = None
gNoteStore = None
def init():
global gClient, gUserStore, gNoteStore
logFilename = "EvernoteToWordpress_%s.log" % (getCurDatetimeStr())
loggingInit(logFilename)
gClient = EvernoteClient(
token=AuthToken,
# sandbox=sandbox,
# china=china,
service_host=ServiceHost
)
logging.info("gClient=%s", gClient)
gUserStore = gClient.get_user_store()
logging.info("gUserStore=%s", gUserStore)
isVersionOk = gUserStore.checkVersion(
"Evernote EDAMTest (Python)",
EDAM_VERSION_MAJOR, # UserStoreConstants.EDAM_VERSION_MAJOR,
EDAM_VERSION_MINOR, # UserStoreConstants.EDAM_VERSION_MINOR
)
logging.info("Is my Evernote API version up to date? %s", isVersionOk)
gNoteStore = gClient.get_note_store()
logging.info("gNoteStore=%s", gNoteStore)
def EvernoteToWordpress():
"""Process evernote note into wordpress"""
global gClient, gUserStore, gNoteStore
notebookList = gNoteStore.listNotebooks()
notebookListLen = len(notebookList)
logging.info("Found %s notebooks:", notebookListLen)
for curNotebook in notebookList:
logging.info("\tguid=%s,name=%s", curNotebook.guid, curNotebook.name)
if curNotebook.name == ToProcessNotebook:
processNotes(curNotebook)
break
def processNotes(curNotebook):
"""Process each note"""
logging.info("curNotebook=%s", curNotebook)
# find all notes in notebook
searchOffset = 0
searchPageSize = 100
searchFilter = NoteStore.NoteFilter()
searchFilter.order = NoteSortOrder.UPDATED
searchFilter.ascending = False
searchFilter.notebookGuid = curNotebook.guid
logging.info("searchFilter=%s", searchFilter)
resultSpec = NotesMetadataResultSpec()
resultSpec.includeTitle = True
resultSpec.includeContentLength = True
resultSpec.includeCreated = True
resultSpec.includeUpdated = True
resultSpec.includeDeleted = True
resultSpec.includeNotebookGuid = True
resultSpec.includeTagGuids = True
resultSpec.includeAttributes = True
resultSpec.includeLargestResourceMime = True
resultSpec.includeLargestResourceSize = True
logging.info("resultSpec=%s", resultSpec)
# foundNoteResult = gNoteStore.findNotesMetadata(
# authenticationToken=AuthToken,
# filter=searchFilter,
# offset=searchOffset,
# maxNotes=pageSize,
# resultSpec=resultSpec
# )
foundNoteResult = gNoteStore.findNotesMetadata(AuthToken, searchFilter, searchOffset, searchPageSize, resultSpec)
logging.info("foundNoteResult=%s", foundNoteResult)
if __name__ == "__main__":
init()
EvernoteToWordpress()
Note: evernote sdk is download from Evernote SDK for Python 3
Latest code: crifanEvernote.py

Develop Reference

Python is a programming language that lets you work quickly and integrate systems more effectively.

reportlab.pdfgen generating corrupted PDF - python

Related

Trying to parse Word Documents and getting PdfReadError: EOF marker not found

Set Author, Title, and Subject for PDF using Reportlab

face recognition prediction error (cv::face::Fisherfaces::predict)

Python3: Reportlab Image - ResourceWarning: unclosed file <_io.BufferedReader name=...>

Setting NoteFilter in Evernote API

Categories

Resources