Open PIL image from zip (Kaggle competition) - python

I am trying to read an image from kaggle competition (It is an old competition, but I would like to practice):
https://www.kaggle.com/competitions/dogs-vs-cats-redux-kernels-edition
I am trying to read images from the training file zip using this code:
def get_files_names(zip_file_path):
with ZipFile(zip_file_path) as myzip:
return myzip.namelist()
def get_image(zip_path, image_name):
with ZipFile(zip_path) as myzip:
# print(myzip.namelist()[:10])
with myzip.open(image_name) as myfile:
# img = Image.open(myfile)
img = Image.open(myfile)
return img
names = get_files_names(train_file_path)
img = get_image(train_file_path, names[1])
img.show()
I am getting this error:
Traceback (most recent call last):
File "/cats_vs_dogs/unrelated_file.py", line 46, in <module>
img.show()
File "/Users/user/.pyenv/versions/3.7.8-thesis/lib/python3.7/site-packages/PIL/Image.py", line 2205, in show
_show(self, title=title, command=command)
File "/Users/user/.pyenv/versions/3.7.8-thesis/lib/python3.7/site-packages/PIL/Image.py", line 3167, in _show
_showxv(image, **options)
File "/Users/user/.pyenv/versions/3.7.8-thesis/lib/python3.7/site-packages/PIL/Image.py", line 3181, in _showxv
ImageShow.show(image, title, **options)
File "/Users/user/.pyenv/versions/3.7.8-thesis/lib/python3.7/site-packages/PIL/ImageShow.py", line 56, in show
if viewer.show(image, title=title, **options):
File "/Users/user/.pyenv/versions/3.7.8-thesis/lib/python3.7/site-packages/PIL/ImageShow.py", line 81, in show
return self.show_image(image, **options)
File "/Users/user/.pyenv/versions/3.7.8-thesis/lib/python3.7/site-packages/PIL/ImageShow.py", line 107, in show_image
return self.show_file(self.save_image(image), **options)
File "/Users/user/.pyenv/versions/3.7.8-thesis/lib/python3.7/site-packages/PIL/ImageShow.py", line 103, in save_image
return image._dump(format=self.get_format(image), **self.options)
File "/Users/user/.pyenv/versions/3.7.8-thesis/lib/python3.7/site-packages/PIL/Image.py", line 636, in _dump
self.load()
File "/Users/user/.pyenv/versions/3.7.8-thesis/lib/python3.7/site-packages/PIL/ImageFile.py", line 247, in load
s = read(self.decodermaxblock)
File "/Users/user/.pyenv/versions/3.7.8-thesis/lib/python3.7/site-packages/PIL/JpegImagePlugin.py", line 400, in load_read
s = self.fp.read(read_bytes)
File "/Users/user/.pyenv/versions/3.7.8/lib/python3.7/zipfile.py", line 930, in read
data = self._read1(n)
File "/Users/user/.pyenv/versions/3.7.8/lib/python3.7/zipfile.py", line 998, in _read1
data += self._read2(n - len(data))
File "/Users/user/.pyenv/versions/3.7.8/lib/python3.7/zipfile.py", line 1030, in _read2
data = self._fileobj.read(n)
File "/Users/user/.pyenv/versions/3.7.8/lib/python3.7/zipfile.py", line 753, in read
self._file.seek(self._pos)
AttributeError: 'NoneType' object has no attribute 'seek'
If I extract the file into finder (using mac), then I see this image:
Also, if I try to convert the RGB image into into a numpy array np.array(img), I get this result:
What am I doing wrong?

Related

File contains data in an unknown format. (m4a load from librosa)

So I am currently working on a DNN that takes in m4a files. I have ffmpeg, it creates a few batches and then dies on this error:
Traceback (most recent call last):
File "/users/work/s163838/./main.py", line 126, in <module>
File "/users/work/s163838/./main.py", line 96, in main
print("e")
File "/apl/tryton/python/3.9.5/lib/python3.9/site-packages/torch/utils/data/dataloader.py", line 521, in __next__
data = self._next_data()
File "/apl/tryton/python/3.9.5/lib/python3.9/site-packages/torch/utils/data/dataloader.py", line 1203, in _next_data
return self._process_data(data)
File "/apl/tryton/python/3.9.5/lib/python3.9/site-packages/torch/utils/data/dataloader.py", line 1229, in _process_data
data.reraise()
File "/apl/tryton/python/3.9.5/lib/python3.9/site-packages/torch/_utils.py", line 425, in reraise
raise self.exc_type(msg)
EOFError: Caught EOFError in DataLoader worker process 0.
Original Traceback (most recent call last):
File "/users/kdm/s163838/.local/lib/python3.9/site-packages/librosa/core/audio.py", line 164, in load
y, sr_native = __soundfile_load(path, offset, duration, dtype)
File "/users/kdm/s163838/.local/lib/python3.9/site-packages/librosa/core/audio.py", line 195, in __soundfile_load
context = sf.SoundFile(path)
File "/users/kdm/s163838/.local/lib/python3.9/site-packages/soundfile.py", line 629, in __init__
self._file = self._open(file, mode_int, closefd)
File "/users/kdm/s163838/.local/lib/python3.9/site-packages/soundfile.py", line 1183, in _open
_error_check(_snd.sf_error(file_ptr),
File "/users/kdm/s163838/.local/lib/python3.9/site-packages/soundfile.py", line 1357, in _error_check
raise RuntimeError(prefix + _ffi.string(err_str).decode('utf-8', 'replace'))
RuntimeError: Error opening 'vox2/dev/aac/id08194/QnBYPze-x9A/00079.m4a': File contains data in an unknown format.
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/apl/tryton/python/3.9.5/lib/python3.9/site-packages/torch/utils/data/_utils/worker.py", line 287, in _worker_loop
data = fetcher.fetch(index)
File "/apl/tryton/python/3.9.5/lib/python3.9/site-packages/torch/utils/data/_utils/fetch.py", line 44, in fetch
data = [self.dataset[idx] for idx in possibly_batched_index]
File "/apl/tryton/python/3.9.5/lib/python3.9/site-packages/torch/utils/data/_utils/fetch.py", line 44, in <listcomp>
data = [self.dataset[idx] for idx in possibly_batched_index]
File "/users/work/s163838/vox_celeb_loader.py", line 53, in __getitem__
load(speaker2utt1, self.num_samples)
File "/users/work/s163838/vox_celeb_loader.py", line 13, in load
wav, sr = librosa.load(path, sr=16000)
File "/users/kdm/s163838/.local/lib/python3.9/site-packages/librosa/util/decorators.py", line 88, in inner_f
return f(*args, **kwargs)
File "/users/kdm/s163838/.local/lib/python3.9/site-packages/librosa/core/audio.py", line 170, in load
y, sr_native = __audioread_load(path, offset, duration, dtype)
File "/users/kdm/s163838/.local/lib/python3.9/site-packages/librosa/core/audio.py", line 226, in __audioread_load
reader = audioread.audio_open(path)
File "/users/kdm/s163838/.local/lib/python3.9/site-packages/audioread/__init__.py", line 111, in audio_open
return BackendClass(path)
File "/users/kdm/s163838/.local/lib/python3.9/site-packages/audioread/rawread.py", line 65, in __init__
self._file = aifc.open(self._fh)
File "/apl/tryton/python/3.9.5/lib/python3.9/aifc.py", line 917, in open
return Aifc_read(f)
File "/apl/tryton/python/3.9.5/lib/python3.9/aifc.py", line 358, in __init__
self.initfp(f)
File "/apl/tryton/python/3.9.5/lib/python3.9/aifc.py", line 314, in initfp
chunk = Chunk(file)
File "/apl/tryton/python/3.9.5/lib/python3.9/chunk.py", line 63, in __init__
raise EOFError
EOFError
I am using this command
wav, sr = librosa.load(path, sr=16000)
is it just a broken file? How do I skip such then? Or is it something about loading a m4a file even with ffmpeg and the desired output when tested on a single m4a file?

Exception has occurred: OSError MoviePy error: failed to read the first frame of video file ****. That might mean that the file is corrupted

WHAT I'M DOING:
Looping a video for a certain amount of time.
However, I'm getting the following error (The file is not corrupt):
Exception has occurred: OSError
MoviePy error: failed to read the first frame of video file Pexels Videos 1292738.mp4. That might mean that the file is corrupted.
at the commented line in my code:
chdir(r'C:\Users\jack_l\Downloads\makeAVideo\stock')
myStock = next(walk(r'C:\Users\jack_l\Downloads\makeAVideo\stock'), (None, None, []))[2]
stockFile = VideoFileClip(str(myStock[0]), target_resolution=(1080, 1920), audio=False)
stockFile = stockFile.loop(duration = 300)
stockFile = stockFile.set_fps(30)
chdir(r'C:\Users\jack_l\Downloads\makeAVideo')
stockFile.write_videofile('theVideo.mp4') # this line
Does anyone know what's going wrong? Any help is greatly appreciated, thank you.
THE FILE I'M USING:
https://drive.google.com/drive/folders/1n8ReLmPj8cIUi6og_GgmlRoumCMB7zIL?usp=sharing
FULL ERROR:
Traceback (most recent call last):
File "c:\Users\jack_l\Downloads\makeVideos.py", line 40, in <module>
stockFile.write_videofile('theVideo.mp4')
File "<decorator-gen-55>", line 2, in write_videofile
File "c:\users\jack_l\appdata\local\programs\python\python310\lib\site-packages\moviepy\decorators.py", line 54, in requires_duration
return f(clip, *a, **k)
File "<decorator-gen-54>", line 2, in write_videofile
File "c:\users\jack_l\appdata\local\programs\python\python310\lib\site-packages\moviepy\decorators.py", line 135, in use_clip_fps_by_default
return f(clip, *new_a, **new_kw)
File "<decorator-gen-53>", line 2, in write_videofile
File "c:\users\jack_l\appdata\local\programs\python\python310\lib\site-packages\moviepy\decorators.py", line 22, in convert_masks_to_RGB
return f(clip, *a, **k)
File "c:\users\jack_l\appdata\local\programs\python\python310\lib\site-packages\moviepy\video\VideoClip.py", line 300,
in write_videofile
ffmpeg_write_video(self, filename, fps, codec,
File "c:\users\jack_l\appdata\local\programs\python\python310\lib\site-packages\moviepy\video\io\ffmpeg_writer.py", line 220, in ffmpeg_write_video
for t,frame in clip.iter_frames(logger=logger, with_times=True,
File "c:\users\jack_l\appdata\local\programs\python\python310\lib\site-packages\moviepy\Clip.py", line 472, in iter_frames
frame = self.get_frame(t)
File "<decorator-gen-11>", line 2, in get_frame
File "c:\users\jack_l\appdata\local\programs\python\python310\lib\site-packages\moviepy\decorators.py", line 89, in wrapper
return f(*new_a, **new_kw)
File "c:\users\jack_l\appdata\local\programs\python\python310\lib\site-packages\moviepy\Clip.py", line 93, in get_frame return self.make_frame(t)
File "c:\users\jack_l\appdata\local\programs\python\python310\lib\site-packages\moviepy\Clip.py", line 136, in <lambda> newclip = self.set_make_frame(lambda t: fun(self.get_frame, t))
File "c:\users\jack_l\appdata\local\programs\python\python310\lib\site-packages\moviepy\Clip.py", line 187, in <lambda> return self.fl(lambda gf, t: gf(t_func(t)), apply_to,
File "<decorator-gen-11>", line 2, in get_frame
File "c:\users\jack_l\appdata\local\programs\python\python310\lib\site-packages\moviepy\decorators.py", line 89, in wrapper
return f(*new_a, **new_kw)
File "c:\users\jack_l\appdata\local\programs\python\python310\lib\site-packages\moviepy\Clip.py", line 93, in get_frame return self.make_frame(t)
File "c:\users\jack_l\appdata\local\programs\python\python310\lib\site-packages\moviepy\video\io\VideoFileClip.py", line 113, in <lambda>
self.make_frame = lambda t: self.reader.get_frame(t)
File "c:\users\jack_l\appdata\local\programs\python\python310\lib\site-packages\moviepy\video\io\ffmpeg_reader.py", line 184, in get_frame
result = self.read_frame()
File "c:\users\jack_l\appdata\local\programs\python\python310\lib\site-packages\moviepy\video\io\ffmpeg_reader.py", line 133, in read_frame
raise IOError(("MoviePy error: failed to read the first frame of "
OSError: MoviePy error: failed to read the first frame of video file Pexels Videos 1292738.mp4. That might mean that the file is corrupted. That may also mean that you are using a deprecated version of FFMPEG. On Ubuntu/Debian for instance
the version in the repos is deprecated. Please update to a recent version from the website.
You change folder before write_videofile - and this can make problem.
Code can be "lazy" and it may NOT read file when you define VideoFileClip() but when you want to write new file after changing directory - and it may try to read file from new place.
You should use /full/path/to/Pexels Videos 1292738.mp4 instead of using chdir()
Full working code:
import os
from moviepy.editor import *
# --- info ---
import moviepy
print('moviepy:', moviepy.__version__)
print('ffmpeg :', moviepy.config.FFMPEG_BINARY)
# --- main ---
input_dir = r'C:\Users\jack_l\Downloads\makeAVideo\stock'
output_dir = r'C:\Users\jack_l\Downloads\makeAVideo'
#print('chdir:', input_dir)
#os.chdir(input_dir)
root, dirs, files = next(os.walk(input_dir), (None, None, []))
#print(files)
if files:
#input_path = os.path.join(root, files[0])
input_path = os.path.join(input_dir, files[0])
output_path = os.path.join(output_dir, 'theVideo.mp4')
print('input :', input_path)
print('output:', output_path)
stock_file = VideoFileClip(input_path, target_resolution=(1080, 1920), audio=False)
stock_file = stock_file.loop(duration=300)
stock_file = stock_file.set_fps(30)
#print('chdir:', output_dir)
#os.chdir(output_dir)
stock_file.write_videofile(output_path)

How to read PDF from file storage object in pdf2image?

I am working with flask, where I am uploading a pdf file to convert it to an image and perform OCR using pytesseract.
However, pdf2image is not able to read the uploaded image.
I tried searching on the internet but I could not find anything.
I tried passing the file storage object directly, but am getting an error, my code looks like this:
log_file = request.files.get('pdf')
images = convert_from_path(log_file)
text = ""
for img in images:
im = img
ocr_dict = pytesseract.image_to_data(im, lang='eng', output_type=Output.DICT)
text += " ".join(ocr_dict['text'])
cleaned_text = clean_text(txt=text)
which gives this error,
**TypeError: expected str, bytes or os.PathLike object, not FileStorage**
I also tried doing,
log_file = request.files.get('pdf')
images = convert_from_path(log_file.read())
text = ""
for img in images:
im = img
ocr_dict = pytesseract.image_to_data(im, lang='eng', output_type=Output.DICT)
text += " ".join(ocr_dict['text'])
cleaned_text = clean_text(txt=text)
which gives error:
Traceback (most recent call last):
File "/usr/local/lib/python3.8/dist-packages/pdf2image/pdf2image.py", line 458, in pdfinfo_from_path
proc = Popen(command, env=env, stdout=PIPE, stderr=PIPE)
File "/usr/lib/python3.8/subprocess.py", line 858, in __init__
self._execute_child(args, executable, preexec_fn, close_fds,
File "/usr/lib/python3.8/subprocess.py", line 1639, in _execute_child
self.pid = _posixsubprocess.fork_exec(
ValueError: embedded null byte
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/usr/local/lib/python3.8/dist-packages/flask/app.py", line 1516, in full_dispatch_request
rv = self.dispatch_request()
File "/usr/local/lib/python3.8/dist-packages/flask/app.py", line 1502, in dispatch_request
return self.ensure_sync(self.view_functions[rule.endpoint])(**req.view_args)
File "/usr/local/lib/python3.8/dist-packages/flask_restful/__init__.py", line 467, in wrapper
resp = resource(*args, **kwargs)
File "/usr/local/lib/python3.8/dist-packages/flask/views.py", line 84, in view
return current_app.ensure_sync(self.dispatch_request)(*args, **kwargs)
File "/usr/local/lib/python3.8/dist-packages/flask_restful/__init__.py", line 582, in dispatch_request
resp = meth(*args, **kwargs)
File "/usr/local/lib/python3.8/dist-packages/flask_httpauth.py", line 172, in decorated
return self.ensure_sync(f)(*args, **kwargs)
File "/home/ubuntu/Credit_Scoring/API_Script/temp2.py", line 38, in post
json_text = coi_ocr.get_coi_ocr_text()
File "/home/ubuntu/Credit_Scoring/API_Script/ocr_script/certificate_of_incorporation/coi_ocr_script_pdf.py", line 51, in get_coi_ocr_text
text1 = self.extract_text_from_COI()
File "/home/ubuntu/Credit_Scoring/API_Script/ocr_script/certificate_of_incorporation/coi_ocr_script_pdf.py", line 16, in extract_text_from_COI
images = convert_from_path(self.fl)
File "/usr/local/lib/python3.8/dist-packages/pdf2image/pdf2image.py", line 98, in convert_from_path
page_count = pdfinfo_from_path(pdf_path, userpw, poppler_path=poppler_path)["Pages"]
File "/usr/local/lib/python3.8/dist-packages/pdf2image/pdf2image.py", line 489, in pdfinfo_from_path
"Unable to get page count.\n%s" % err.decode("utf8", "ignore")
UnboundLocalError: local variable 'err' referenced before assignment
Okay, it turns out I need to pass convert_from_bytes instead of convert_from_path.

Solving IOError while generating a pdf using reportlab and generated qr code image

I am trying to generate a qr code from text, and then insert into a reportlab pdf.
My code:
def qr_code_as_image(text):
from io import BytesIO
print("In show_qr")
img = generate_qr_code(text)
print(img, type(img))
i = Image(img)
print(i, type(i))
return i
def add_patient_header_with_qr(self):
line1 = ("Name", self.linkedcustomer.name,
"Age", self.linkedcustomer.age())
line2 = ("MRD No.", self.linkedcustomer.cstid,
"Date", self.prescription_time)
line3 = ("No.", "#", "Doctor", self.doc.name)
datatb = [line1, line2, line3]
patientdetailstable = Table(datatb)
patientdetailstable.setStyle(self.patientdetails_style)
col1 = patientdetailstable
checkin_url = reverse('clinicemr', args=[self.checkin.checkinno])
qr_image = qr_code_as_image(checkin_url)
qr_image.hAlign = 'LEFT'
col2 = Table([[qr_image]])
tblrow1 = Table([[col1, col2]], colWidths=None)
tblrow1.setStyle(self.table_left_top_align)
self.elements.append(tblrow1)
def final_generate(self, footer_content, action=None):
with NamedTemporaryFile(mode='w+b') as temp:
from django.http import FileResponse, Http404
from functools import partial
# use the temp file
cmd = "cat " + str(temp.name)
print(os.system(cmd))
print(footer_content, type(footer_content))
doc = SimpleDocTemplate(
temp.name,
pagesize=A4,
rightMargin=20,
leftMargin=20,
topMargin=20,
bottomMargin=80,
allowSplitting=1,
title="Prescription",
author="System.com")
frame = Frame(doc.leftMargin, doc.bottomMargin, doc.width, doc.height,
id='normal')
template = PageTemplate(
id='test',
frames=frame,
onPage=partial(footer, content=footer_content)
)
doc.addPageTemplates([template])
doc.build(self.elements,
onFirstPage=partial(footer, content=footer_content),
onLaterPages=partial(footer, content=footer_content)
)
print(f'Generated {temp.name}')
I get the following output:
2020-11-29 13: 06: 33, 915 django.request ERROR Internal Server Error: / clinic/presc/k-0NGpApcg
Traceback(most recent call last):
File "/home/joel/myappointments/venv/lib/python3.6/site-packages/reportlab/lib/utils.py", line 655, in open_for_read
return open_for_read_by_name(name, mode)
File "/home/joel/myappointments/venv/lib/python3.6/site-packages/reportlab/lib/utils.py", line 599, in open_for_read_by_name
return open(name, mode)
ValueError: embedded null byte
During handling of the above exception, another exception occurred:
Traceback(most recent call last):
File "/home/joel/myappointments/venv/lib/python3.6/site-packages/reportlab/lib/utils.py", line 658, in open_for_read
return getBytesIO(datareader(name) if name[:5].lower() == 'data:' else urlopen(name).read())
File "/usr/lib/python3.6/urllib/request.py", line 223, in urlopen
return opener.open(url, data, timeout)
File "/usr/lib/python3.6/urllib/request.py", line 517, in open
req.timeout = timeout
AttributeError: 'bytes' object has no attribute 'timeout'
During handling of the above exception, another exception occurred:
Traceback(most recent call last):
File "/home/joel/myappointments/venv/lib/python3.6/site-packages/django/core/handlers/exception.py", line 34, in inner
response = get_response(request)
File "/home/joel/myappointments/venv/lib/python3.6/site-packages/django/core/handlers/base.py", line 115, in _get_response
response = self.process_exception_by_middleware(e, request)
File "/home/joel/myappointments/venv/lib/python3.6/site-packages/django/core/handlers/base.py", line 113, in _get_response
response = wrapped_callback(request, *callback_args, **callback_kwargs)
File "/home/joel/myappointments/clinic/views.py", line 6879, in GoGetPrescription
clinicobj = clinicobj,
File "/home/joel/myappointments/clinic/views.py", line 16222, in PDFPrescriptions
return prescription.generate_pdf(action=action, rating=True)
File "/home/joel/myappointments/clinic/views.py", line 15415, in generate_pdf
return self.final_generate(footer_content, action=action)
File "/home/joel/myappointments/clinic/views.py", line 15447, in final_generate
onLaterPages = partial(footer, content=footer_content)
File "/home/joel/myappointments/venv/lib/python3.6/site-packages/reportlab/platypus/doctemplate.py", line 1291, in build
BaseDocTemplate.build(self, flowables, canvasmaker=canvasmaker)
File "/home/joel/myappointments/venv/lib/python3.6/site-packages/reportlab/platypus/doctemplate.py", line 1056, in build
self.handle_flowable(flowables)
File "/home/joel/myappointments/venv/lib/python3.6/site-packages/reportlab/platypus/doctemplate.py", line 912, in handle_flowable
if frame.add(f, canv, trySplit=self.allowSplitting):
File "/home/joel/myappointments/venv/lib/python3.6/site-packages/reportlab/platypus/frames.py", line 174, in _add
w, h = flowable.wrap(aW, h)
File "/home/joel/myappointments/venv/lib/python3.6/site-packages/reportlab/platypus/tables.py", line 1206, in wrap
self._calc(availWidth, availHeight)
File "/home/joel/myappointments/venv/lib/python3.6/site-packages/reportlab/platypus/tables.py", line 641, in _calc
W = self._calcPreliminaryWidths(availWidth) # widths
File "/home/joel/myappointments/venv/lib/python3.6/site-packages/reportlab/platypus/tables.py", line 754, in _calcPreliminaryWidths
new = elementWidth(value, style) or 0
File "/home/joel/myappointments/venv/lib/python3.6/site-packages/reportlab/platypus/tables.py", line 518, in _elementWidth
w = v.minWidth() # should be all flowables
File "/home/joel/myappointments/venv/lib/python3.6/site-packages/reportlab/platypus/tables.py", line 873, in minWidth
style.leftPadding+style.rightPadding)
File "/home/joel/myappointments/venv/lib/python3.6/site-packages/reportlab/platypus/tables.py", line 512, in _elementWidth
if hasattr(v, 'drawWidth') and isinstance(v.drawWidth, (int, float)): return v.drawWidth
File "/home/joel/myappointments/venv/lib/python3.6/site-packages/reportlab/platypus/flowables.py", line 494, in __getattr__
self._setup_inner()
File "/home/joel/myappointments/venv/lib/python3.6/site-packages/reportlab/platypus/flowables.py", line 455, in _setup_inner
img=self._img
File "/home/joel/myappointments/venv/lib/python3.6/site-packages/reportlab/platypus/flowables.py", line 488, in __getattr__
self._img=ImageReader(self._file)
File "/home/joel/myappointments/venv/lib/python3.6/site-packages/reportlab/lib/utils.py", line 813, in __init__
annotateException('\nfileName=%r identity=%s' %
(fileName, self.identity()))
File "/home/joel/myappointments/venv/lib/python3.6/site-packages/reportlab/lib/utils.py", line 1394, in annotateException
rl_reraise(t, v, b)
File "/home/joel/myappointments/venv/lib/python3.6/site-packages/reportlab/lib/utils.py", line 147, in rl_reraise
raise v
File "/home/joel/myappointments/venv/lib/python3.6/site-packages/reportlab/lib/utils.py", line 777, in __init__
self.fp=open_for_read(fileName, 'b')
File "/home/joel/myappointments/venv/lib/python3.6/site-packages/reportlab/lib/utils.py", line 660, in open_for_read
raise IOError('Cannot open resource "%s"' % name)
OSError: Cannot open resource "b'\x89PNG\r\n\x1a\n\x00\x00\x00\rIHDR\x00\x00\x00\xd2\x00\x00\x00\xd2\x01\x00\x00\x00\x00\x17\xe2\xa3\xef\x00\x00\x01$IDATx\x9c\xed\x98An\xc4 \x10\x04k\x16\xdf\xf1\x8f\xe0gyS~\x80\x9f\x92\x1f\xe0;\xab\xde\x03\xc6\xeb\x1c"\xe5d\xd0\xda\x1c\xd0 #\xcbRk4\x9e\xee\x01\xb6\xe5$\xa9 \xe5v\xc3\x83\xbf\xd7\xe7cA\x92\x94\xc1"N\x16k\x86\xa4\xd1x\x9e\x8bA\xc8#\xc8NJ\xbe e\'\xc0]</\x07\xccb\xdb\xfas\xe9\x8eM\xefP\xac\x13b\xed\xc6e0\xccKJ\x80\xd9\xecd\x11\x90T\xfap\x19\x06\xdb\x97\x13!;\xd5v\xd3\x87\xcbH\xd8\xa4=\x14\xeb\xd3\xc0\xb7\x9be$\x9e\x9d\xea\xa5V\x89/u\xab\xca\x94F\xe2yz^\x94\xbc\x04^\xda\x8ePe{,\x9e}\xeaE\xe9\xed\xe6\xe0\xae\x17\xa0\xa6#\xf9\xb2\x9b;\xe9\x1f\xdf}:\xc6A\x80v=\xbau\xba\xd5\xcb\xef_hk7#\xf1\xec\xee_\xf0\x92\x94\x9d.\xde_\xda<\xdd\xde\x19R\xb5\xbfW\xcf\xcb\x03V3\x8b\xb4\x911\xfc\x98\xd9\xd7\xe5\xfb\xcb\x11[f\'\x96\x19\x80\xa7\x8d\xcb\xf3\x0c\xec0O\x13\xbe\xa7b\xf8\x0c\x8b\xdd\xben\xef/\xc0\xf68\xb5E#\xf1\xec\xaaGU\xac\x9d\x08\xba\xba\xdf}\x01<\xf7\xbf\x8cN\xed-\x8a\x00\x00\x00\x00IEND\xaeB`\x82'"
fileName=b'\x89PNG\r\n\x1a\n\x00\x00\x00\rIHDR\x00\x00\x00\xd2\x00\x00\x00\xd2\x01\x00\x00\x00\x00\x17\xe2\xa3\xef\x00\x00\x01$IDATx\x9c\xed\x98An\xc4 \x10\x04k\x16\xdf\xf1\x8f\xe0gyS~\x80\x9f\x92\x1f\xe0;\xab\xde\x03\xc6\xeb\x1c"\xe5d\xd0\xda\x1c\xd0#\xcbRk4\x9e\xee\x01\xb6\xe5$\xa9 \xe5v\xc3\x83\xbf\xd7\xe7cA\x92\x94\xc1"N\x16k\x86\xa4\xd1x\x9e\x8bA\xc8#\xc8NJ\xbe e\'\xc0]</\x07\xccb\xdb\xfas\xe9\x8eM\xefP\xac\x13b\xed\xc6e0\xccKJ\x80\xd9\xecd\x11\x90T\xfap\x19\x06\xdb\x97\x13!;\xd5v\xd3\x87\xcbH\xd8\xa4=\x14\xeb\xd3\xc0\xb7\x9be$\x9e\x9d\xea\xa5V\x89/u\xab\xca\x94F\xe2yz^\x94\xbc\x04^\xda\x8ePe{,\x9e}\xeaE\xe9\xed\xe6\xe0\xae\x17\xa0\xa6#\xf9\xb2\x9b;\xe9\x1f\xdf}:\xc6A\x80v=\xbau\xba\xd5\xcb\xef_hk7#\xf1\xec\xee_\xf0\x92\x94\x9d.\xde_\xda<\xdd\xde\x19R\xb5\xbfW\xcf\xcb\x03V3\x8b\xb4\x911\xfc\x98\xd9\xd7\xe5\xfb\xcb\x11[f\'\x96\x19\x80\xa7\x8d\xcb\xf3\x0c\xec0O\x13\xbe\xa7b\xf8\x0c\x8b\xdd\xben\xef/\xc0\xf68\xb5E#\xf1\xec\xaaGU\xac\x9d\x08\xba\xba\xdf}\x01<\xf7\xbf\x8cN\xed-\x8a\x00\x00\x00\x00IEND\xaeB`\x82' identity=[ImageReader#0x7f1e0987ecf8 filename=b'\x89PNG\r\n\x1a\n\x00\x00\x00\rIHDR\x00\x00\x00\xd2\x00\x00\x00\xd2\x01\x00\x00\x00\x00\x17\xe2\xa3\xef\x00\x00\x01$IDATx\x9c\xed\x98An\xc4 \x10\x04k\x16\xdf\xf1\x8f\xe0gyS~\x80\x9f\x92\x1f\xe0;\xab\xde\x03\xc6\xeb\x1c"\xe5d\xd0\xda\x1c\xd0#\xcbRk4\x9e\xee\x01\xb6\xe5$\xa9 \xe5v\xc3\x83\xbf\xd7\xe7cA\x92\x94\xc1"N\x16k\x86\xa4\xd1x\x9e\x8bA\xc8#\xc8NJ\xbe e\'\xc0]</\x07\xccb\xdb\xfas\xe9\x8eM\xefP\xac\x13b\xed\xc6e0\xccKJ\x80\xd9\xecd\x11\x90T\xfap\x19\x06\xdb\x97\x13!;\xd5v\xd3\x87\xcbH\xd8\xa4=\x14\xeb\xd3\xc0\xb7\x9be$\x9e\x9d\xea\xa5V\x89/u\xab\xca\x94F\xe2yz^\x94\xbc\x04^\xda\x8ePe{,\x9e}\xeaE\xe9\xed\xe6\xe0\xae\x17\xa0\xa6#\xf9\xb2\x9b;\xe9\x1f\xdf}:\xc6A\x80v=\xbau\xba\xd5\xcb\xef_hk7#\xf1\xec\xee_\xf0\x92\x94\x9d.\xde_\xda<\xdd\xde\x19R\xb5\xbfW\xcf\xcb\x03V3\x8b\xb4\x911\xfc\x98\xd9\xd7\xe5\xfb\xcb\x11[f\'\x96\x19\x80\xa7\x8d\xcb\xf3\x0c\xec0O\x13\xbe\xa7b\xf8\x0c\x8b\xdd\xben\xef/\xc0\xf68\xb5E#\xf1\xec\xaaGU\xac\x9d\x08\xba\xba\xdf}\x01<\xf7\xbf\x8cN\xed-\x8a\x00\x00\x00\x00IEND\xaeB`\x82']
From the error, it appears that it is erroring out on getting the name of the image file. But there is no file. The image is being generated from BytesIO.
Your generate_qr_code function, which you did not show us, is NOT returning a BytesIO object. It's returning the raw bytes of the PNG image. When you print(img, type(img)), it told you it was of type "bytes", right? That's a string of bytes, not a BytesIO object. If you wrap those bytes into a BytesIO object, then the reportlab Image constructor will be able to handle it.

how to extract text from web gif file using python

I am trying to extract text from a gif image using the below code, it has worked for png format not working for gif.
import pytesseract
import io
import requests
from PIL import Image
url = requests.get('http://article.sapub.org/email/10.5923.j.aac.20190902.01.gif')
img = Image.open(io.BytesIO(url.content))
text = pytesseract.image_to_string(img)
print(text)
getting this error
C:\python\lib\site-packages\PIL\Image.py:1048: UserWarning: Couldn't allocate palette entry for transparency
warnings.warn("Couldn't allocate palette entry for transparency")
Traceback (most recent call last):
File "D:/elifesciences/prox.py", line 8, in <module>
text = pytesseract.image_to_string(img)
File "C:\python\lib\site-packages\pytesseract\pytesseract.py", line 345, in image_to_string
}[output_type]()
File "C:\python\lib\site-packages\pytesseract\pytesseract.py", line 344, in <lambda>
Output.STRING: lambda: run_and_get_output(*args),
File "C:\python\lib\site-packages\pytesseract\pytesseract.py", line 242, in run_and_get_output
temp_name, input_filename = save_image(image)
File "C:\python\lib\site-packages\pytesseract\pytesseract.py", line 173, in save_image
image.save(input_file_name, format=extension, **image.info)
File "C:\python\lib\site-packages\PIL\Image.py", line 2088, in save
save_handler(self, fp, filename)
File "C:\python\lib\site-packages\PIL\GifImagePlugin.py", line 507, in _save
_write_single_frame(im, fp, palette)
File "C:\python\lib\site-packages\PIL\GifImagePlugin.py", line 414, in _write_single_frame
_write_local_header(fp, im, (0, 0), flags)
File "C:\python\lib\site-packages\PIL\GifImagePlugin.py", line 532, in _write_local_header
transparency = int(transparency)
TypeError: int() argument must be a string, a bytes-like object or a number, not 'tuple'
Process finished with exit code 1
The idea is to convert each of the frames to RGB image before performing the OCR on them, as shown below -
for frame in range(0,img.n_frames):
img.seek(frame)
imgrgb = img.convert('RGBA')
imgrgb.show()
text = pytesseract.image_to_string(imgrgb)
print(text)
Working sample - https://colab.research.google.com/drive/1ctjk3hH0HUaWv0st6UpTY-oo9C9YCQdw

Categories

Resources