I'm trying to split a PDF file that gets as a post request to this Flask API. I am using PyPDF2 to work with the pdf files and I'm getting this error when trying to post the freshly split pdf: "IOError: File not open for reading". I tried opening it again before the post but that causes another type of error.
from flask import render_template, request
from app import app
from PyPDF2 import PdfFileWriter, PdfFileReader
import requests
#app.route('/')
def start():
return render_template('home.html')
#upload-route
#app.route('/upload', methods = ['POST'])
def upload():
pdf_data = None
split_pages = []
if 'pdf' in request.files:
incoming_pdf = request.files['pdf']
pdf_data = PdfFileReader(incoming_pdf, 'rb')
for i in range(pdf_data.numPages):
output = PdfFileWriter()
output.addPage(pdf_data.getPage(i))
with open("document-page%s.pdf" % i, "wb").read() as outputStream:
##new_page = output.write(outputStream)
try:
split_pages.append(output)
print('Created: {}'.format("document-page%s.pdf" % i))
files = {'userfile': outputStream }
upload_url ='*URL IM POSTING TO*'
auth_upload={
*AUTH KEYS*
}
r=requests.post(url=upload_url, files=files,data=auth_upload)
finally:
outputStream.close()
else:
return "please upload a file to process"
I know this may be a super basic question but I have spent an outrageous amount of time with this and I no longer think straight.
I just had look at your code, and what about that?
from PyPDF2 import PdfFileWriter, PdfFileReader
with open('./reklamacja.pdf', 'rb') as my_pdf:
pdf_data = PdfFileReader(my_pdf)
for i in range(pdf_data.numPages):
output = PdfFileWriter()
# output.addPage(pdf_data.getPage(i))
output.insertPage(pdf_data.getPage(i))
filename = "document-page%s.pdf" % i
with open(filename, "wb") as outputStream:
output.write(outputStream)
with open(filename, 'rb') as file_to_send:
r = requests.post('http://someurl.bin', files={'myname': file_to_send})
{ Some other code...}
Build that into your flask code, it should work.
For me you just overcomplicated a little it in 'try' block :)
Hope this solves you problem.
Related
I am having an issue trying to download download in-memory ZIP-FILE object using Flask send_file. my zip exists in memory and is full of text documents but when I try with this code
the result I get is: it downloads like it is supposed to but it downloads an empty zip file! it's like it is copying nothing ... I have no idea how to solve this problem.
#app.route('/downloads/', methods=['GET'])
def download():
from flask import send_file
import io
import zipfile
import time
FILEPATH = r"C:\Users\JD\Downloads\trydownload.zip"
fileobj = io.BytesIO()
with zipfile.ZipFile(fileobj, 'w') as zip_file:
zip_info = zipfile.ZipInfo(FILEPATH)
zip_info.date_time = time.localtime(time.time())[:6]
zip_info.compress_type = zipfile.ZIP_DEFLATED
with open(FILEPATH, 'rb') as fd:
zip_file.writestr(zip_info, fd.read())
fileobj.seek(0)
return send_file(fileobj, mimetype='zip', as_attachment=True,
attachment_filename='%s.zip' % os.path.basename(FILEPATH))
I had the exact same issue with the Flask send_file method.
Details:
Flask version 2.0.1
OS: Windows 10
Solution
I figured out a workaround to this i.e. instead of the send_file method, this can be done by returning a Response object with the data. Replace the return statement in your code with the following and this should work.
#app.route('/downloads/', methods=['GET'])
def download():
from flask import Response # Changed line
import io
import zipfile
import time
FILEPATH = r"C:\Users\JD\Downloads\trydownload.zip"
fileobj = io.BytesIO()
with zipfile.ZipFile(fileobj, 'w') as zip_file:
zip_info = zipfile.ZipInfo(FILEPATH)
zip_info.date_time = time.localtime(time.time())[:6]
zip_info.compress_type = zipfile.ZIP_DEFLATED
with open(FILEPATH, 'rb') as fd:
zip_file.writestr(zip_info, fd.read())
fileobj.seek(0)
# Changed line below
return Response(fileobj.getvalue(),
mimetype='application/zip',
headers={'Content-Disposition': 'attachment;filename=your_filename.zip'})
I have been trying to speed up our date stamping process by adding a stamp as a watermark to PDFs through PyPDF2. I found the code below online as I'm pretty new to coding.
When I run this it seems to work, but the file is corrupted and won't open. Does anyone have any ideas where I am going wrong?
from PyPDF2 import PdfFileWriter, PdfFileReader
def create_watermark(input_pdf, output_pdf, watermark):
watermark_obj = PdfFileReader(watermark,False,)
watermark_page = watermark_obj.getPage(0)
pdf_reader = PdfFileReader(input_pdf)
pdf_writer = PdfFileWriter()
# Watermark all the pages
for page in range(pdf_reader.getNumPages()):
page = pdf_reader.getPage(page)
page.mergePage(watermark_page)
pdf_writer.addPage(page)
with open(input_pdf, 'wb') as out:
pdf_writer.write(out)
if __name__ == '__main__':
input_pdf = "C:\\Users\\A***\\OneDrive - ***\\Desktop\\Invoice hold\\Test\\1.pdf"
output_pdf = "C:\\Users\\A***\\OneDrive - ***\\Desktop\\Invoice hold\\Test\\1 WM.pdf"
watermark = "C:\\Users\\A***\\OneDrive - ***\\Desktop\\Invoice hold\\WM.pdf"
create_watermark(input_pdf,output_pdf,watermark)
If you want to save pdf file under the name of output_pdf,
try this :
result = open(output_pdf, 'wb')
pdf_writer.write(result)
your code :
with open(input_pdf, 'wb') as out:
pdf_writer.write(out)
Your code is to overwrite input_pdf.
And if there is a problem while working, the pdf file will be damaged.
I succeeded in inserting the watermark by applying your code and my proposed method.
I recommend checking if the pdf file is not damaged.
import PyPDF2
template = PyPDF2.PdfFileReader(open('super.pdf', 'rb'))
watermark = PyPDF2.PdfFileReader(open('wtr.pdf', 'rb'))
output = PyPDF2.PdfFileWriter()
for i in range(template.getNumPages()):
page = template.getPage(i)
page.mergePage(watermark.getPage(0))
output.addPage(page)
with open('watermarked_output.pdf', 'wb') as file:
output.write(file)
It is not showing an error but not showing a result as well
I'm new to python and really struggling with writing a program to download a CSV from this page
So far I have:
import csv
import requests
import os
output_dir = 'C:/Users/Moshe/Downloads'
output_file = 'Covid_19_uk_timeseries.csv'
CSV_URL = 'https://coronavirus.data.gov.uk/downloads/csv/coronavirus-deaths_latest.csv'
assert os.path.exists(output_dir)# test that we can write to output_dir
with requests.Session() as s:
download = s.get(CSV_URL)
decoded_content = download.content.decode('utf-8')
cr = csv.reader(decoded_content.splitlines(), delimiter=',')
my_list = list(cr)
for row in my_list:
print(row)
Its output show that it gets the CSV fine, and has access to the output directory. But for now I just want to save it as it is, and can't seem to work out how to do that.
def main():
import requests
url = "https://coronavirus.data.gov.uk/downloads/csv/coronavirus-deaths_latest.csv"
with requests.get(url, stream=True) as response:
response.raise_for_status()
with open("deaths_latest.csv", "wb") as file:
for chunk in response.iter_content(chunk_size=8192):
file.write(chunk)
file.flush()
print("Done")
return 0
if __name__ == "__main__":
import sys
sys.exit(main())
You can use
csv.writerows()
to write the csv file. Check this site for more. https://www.programiz.com/python-programming/writing-csv-files
I'm using python 2.7 and pycharm is my editor. What i'm trying to do is have python go to a site and download an image from that site and save it to my directory. Currently I have no errors but i don't think its downloading because the file is not showing in my directory.
import random
import urllib2
def download_web_image(url):
name = random.randrange(1,1000)
full_name = str(name) + ".jpg"
urllib2.Request(url, full_name)
download_web_image("www.example.com/page1/picture.jpg")
This will do the trick. The rest can stay the same, just edit your function to include the two lines I have added.
def download_web_image(url):
name = random.randrange(1,1000)
full_name = str(name) + ".jpg"
request = urllib2.Request(url)
img = urllib2.urlopen(request).read()
with open (full_name, 'w') as f: f.write(img)
Edit 1:
Exact code as requested in comments.
import urllib2
def download_web_image(url):
request = urllib2.Request(url)
img = urllib2.urlopen(request).read()
with open ('test.jpg', 'w') as f: f.write(img)
download_web_image("http://upload.wikimedia.org/wikipedia/commons/8/8c/JPEG_example_JPG_RIP_025.jpg")
You are simply creating a Request but you are not downloading the image. Try the following instead:
urllib.urlretrieve(url, os.path.join(os.getcwd(), full_name)) # download and save image
Or try the requests library:
import requests
image = requests.get("www.example.com/page1/picture.jpg")
with open('picture.jpg', 'wb') as f:
f.write(image.content)