How to analyze multiple images in a folder using a loop? - python

I am using google cloud vision api in python.
I am a Python beginner. So, I am struggling to implement the content I want to analyze in Python code.
It's a simple thing, but if you can help me, I'd appreciate it.
I want to do label detection in Google Cloud Vision.
I've done loading a single image and implementing the code, but I want to run it on an entire folder containing multiple images.
file_name = r'img_3282615_1.jpg'
image_path = f'.\save_img\{file_name}'
with io.open(image_path, 'rb') as image_file:
content = image_file.read()
image = vision.Image(content=content)
response = client.label_detection(image=image, max_results=100)
labels = response.label_annotations
df = pd.DataFrame(columns=['description', 'score', 'topicality'])
for label in labels:
df = df.append(
dict(
description=label.description,
score=label.score,
topicality=label.topicality
), ignore_index=True)
print(df)
I've tried analyzing individual images using this code.
Here I would like to do the following steps.
Open the folder
Analyze label detection for all images in the folder(The image names are 'img_3282615_1.jpg', 'img_3282615_2.jpg', 'img_3282615_3.jpg', 'img_1115368_1.jpg', 'img_1115368_2.jpg' ...)
Saving the result as csv (image name, description, score)
I studied that it is possible to repeat using the for statement, but it is difficult to actually write in code. Because I'm just starting to deal with python and lack the basics.
Your answer can be of great help to me.
thank you:)

Can you try this:
from google.cloud import vision
import os
import csv
# Create a client for the Cloud Vision API
client = vision.ImageAnnotatorClient()
# Set the path to the folder containing the images
folder_path = './image_for_text/'
fields = ['description', 'score', 'topicality']
filename_CSV = "./z.csv"
list1=[]
with open(filename_CSV, 'a+') as csvfile:
writer = csv.writer(csvfile)
writer.writerow(fields)
# Loop through all the files in the folder
for filename in os.listdir(folder_path):
# Check if the file is an image
if filename.endswith('.jpg') or filename.endswith('.png'):
# Build the full path to the image
file_path = os.path.join(folder_path, filename)
# Open the image file
with open(file_path, 'rb') as image_file:
# Read the image file into memory
content = image_file.read()
#Create a vision image from the binary data
image = vision.Image(content=content)
#Perform label detection on the image
response = client.label_detection(image=image)
labels = response.label_annotations
# Print the labels for the image
print(f'Labels for {filename}:')
for label in labels:
list1.append(f'{label.description}')
list1.append(f'{label.score*100:.2f}%')
list1.append(f'{label.topicality}')
print(list1)
with open(filename_CSV, 'a+') as csvfile:
writer = csv.writer(csvfile)
writer.writerow(list1)
list1.clear()

Related

problem with changing the orientation of the text when merging PyPDF2

If someone has already asked about this, I apologize in advance, as I could not find a similar problem
I use PyPDF2 to combine two vertical files, there is error and the text of the watermark file is turned over by 90 degrees. I tried to flip the file and enter the text vertically at once, well, either the solutions did not fit or did not work. When I merge regular files everything is ok but when working with target drawings this bug occurs.
It turns out this:
a fragment was inserted:Color ral 456 etc
input_file = "main/static/main/plans/Timeplan.pdf"
output_file = "main/static/main/plans/Timeplan.pdf"
watermark_file = "main/static/main/plans/WatermarkV.pdf"
with open(input_file, "rb") as filehandle_input:
# read the contents of the source file
pdf = PyPDF2.PdfFileReader(filehandle_input, strict=False)
with open(watermark_file, "rb") as filehandle_watermark:
# read the watermark content
mark = PyPDF2.PdfFileReader(filehandle_watermark, strict=False)
# get the first page of the original PDF
first_page = pdf.getPage(0)
# get the first page of the PDF watermark
first_page_watermark = mark.getPage(0)
# merge two pages
first_page.mergePage(first_page_watermark)
# создать объект записи PDF для выходного файла
pdf_writer = PyPDF2.PdfFileWriter()
# add a page
pdf_writer.addPage(first_page)
with open(output_file, "wb") as filehandle_output:
# write a watermarked file to a new file
pdf_writer.write(filehandle_output)
I do not know the cause of the problem.
Thank you in advance

Using Python, how to extract text and images from PDF + color strings and numbers from the output txt file

Using Python, I would like to
extract text from a PDF into a txt file (done)
color all numbers and specific strings of the txt file like this example (https://tex.stackexchange.com/questions/521383/how-to-highlight-numbers-only-outside-a-string-in-lstlisting) (not done)
Translate using Google translator all text to EN (not done)
extract images from the PDF file into PNGs/or a new PDF file containing all of the images (not done)
To perform 1. I used the following code which is working
pip install PyPDF2
from PyPDF2 import PdfFileReader, PdfFileWriter
file_path = 'AR_Finland_2021.pdf'
pdf = PdfFileReader(file_path)
with open('AR_Finland_2021.txt', 'w') as f:
for page_num in range(pdf.numPages):
# print('Page: {0}'.format(page_num))
pageObj = pdf.getPage(page_num)
try:
txt = pageObj.extractText()
print(''.center(100, '-'))
except:
pass
else:
f.write('Page {0}\n'.format(page_num+1))
f.write(''.center(100, '-'))
f.write(txt)
f.close()
To perform 3 (extract images) I tried the following code but always get an error.
pip install PyMuPDF Pillow
pip install PyMuPDF
pip install python-gettext
import fitz
import io
from PIL import Image
# file path you want to extract images from
file = "AR_Finland_2021.pdf"
# open the file
pdf_file = fitz.open(file)
# iterate over PDF pages
for page_index in range(len(pdf_file)):
# get the page itself
page = pdf_file[page_index]
image_list = page.getImageList()
# printing number of images found in this page
if image_list:
print(f"[+] Found a total of {len(image_list)} images in page {page_index}")
else:
print("[!] No images found on page", page_index)
for image_index, img in enumerate(page.getImageList(), start=1):
# get the XREF of the image
xref = img[0]
# extract the image bytes
base_image = pdf_file.extractImage(xref)
image_bytes = base_image["image"]
# get the image extension
image_ext = base_image["ext"]
# load it to PIL
image = Image.open(io.BytesIO(image_bytes))
# save it to local disk
image.save(open(f"image{page_index+1}_{image_index}.{image_ext}", "wb"))
Error:
----> 5 image_list = page.getImageList()
AttributeError: 'Page' object has no attribute 'getImageList'
Would someone know how to perform 3 (extract images) and 2 (color numbers and certain strings from the txt file extracted from the PDF)?
You can do:
import fitz
doc = fitz.open("AR_Finland_2021.pdf")
for page in doc:
for img_tuple in page.get_images():
img_dict = doc.extract_image(img_tuple[0])
img_bytes = img_dict['image']
# Do whatever you want with it
See Page.get_images() and Document.extract_image()
To write these images into a new pdf:
doc = fitz.open("/path/to/new/pdf")
page = doc.newPage()
img_location = fitz.Rect(100, 100, 200, 200)
page.insert_image(img_location, stream=img_bytes)
See Rect for different ways to construct the rectangle, but you probably want to use img_tuple[1] from earlier. Again look at get_page_images to see the data available to you there.

recommended way to download images in python requests

I see that there are two ways to download images using python-reuqests.
Uisng PIL as stated in docs (https://requests.readthedocs.io/en/master/user/quickstart/#binary-response-content):
from PIL import Image
from io import BytesIO
i = Image.open(BytesIO(r.content))
using streamed response content:
r = requests.get(url, stream=True)
with open(image_name, 'wb') as f:
for chunk in r.iter_content():
f.write(chunk)
Which is the recommended wya to download images however? both have its merits I suyppose, and I was wondering what is the optimal approach.
I love the minimalist way. There is nothing called right way. It depends on the task you want to perform and the constraints you have.
import requests
with open('file.png', 'wb') as f:
f.write(requests.get(url).content)
# if you change png to jpg, there will be no error
I did use the below lines of code in a function to save images.
# import the required libraries from Python
import pathlib,urllib.request,os,uuid
# URL of the image you want to download
image_url = "https://example.com/image.png"
# Using the uuid generate new and unique names for your images
filename = str(uuid.uuid4())
# Strip the image extension from it's original name
file_ext = pathlib.Path(image_url).suffix
# Join the new image name to the extension
picture_filename = filename + file_ext
# Using pathlib, specify where the image is to be saved
downloads_path = str(pathlib.Path.home() / "Downloads")
# Form a full image path by joining the path to the
# images' new name
picture_path = os.path.join(downloads_path, picture_filename)
# Using "urlretrieve()" from urllib.request save the image
urllib.request.urlretrieve(image_url, picture_path)
# urlretrieve() takes in 2 arguments
# 1. The URL of the image to be downloaded
# 2. The image new name after download. By default, the image is
# saved inside your current working directory

Extract Text from an image using Google Cloud Vision API using cv2 in python

We are trying to extract the text from an image using google-cloud-vision API:
import io
import os
from google.oauth2 import service_account
from google.cloud import vision
# The name of the image file to annotate (Change the line below 'image_path.jpg' ******)
path = os.path.join(os.path.dirname(__file__), '3.jpg') # Your image path from current directory
client = vision.ImageAnnotatorClient()
with io.open(path, 'rb') as image_file:
content = image_file.read()
image = vision.types.Image(content=content)
response = client.text_detection(image=image)
texts = response.text_annotations
print('Texts:')
for text in texts:
print(format(text.description))
In this code, we need to make the API read the image through the 'cv2' function only, instead of using the 'io' function:
# Read image file
with io.open(img_path, 'rb') as image_file:
content = image_file.read()
Any suggestion will be helpful
All you need is to convert numpy array that is created from cv2 to bytes which is used by Google Vision API. Here is how you do it:
import cv2
with open(path, 'rb') as image_file:
content1 = image_file.read()
image = cv2.imread(path)
success, encoded_image = cv2.imencode('.jpg', image)
content2 = encoded_image.tobytes()
image_cv2 = vision.types.Image(content=content2)
response = client.text_detection(image=image_cv2)
texts = response.text_annotations

reduce time of save encodings face recognition

i have a dataset and i am saving the result of the encoding of the images for fave recognition in pickle object.
i would like to add new images or delete images in database and when i do it then prior images that exist in the database are stored in dataset_faces.dat and only for new images encode_faces.py be done.
I want to reduce the time to save the encoding in the encoding.pickle.
Otherwise, a lot of time should be spent even adding a new image.
encode_faces.py
import face_recognition
import numpy as np
import os
import pickle
known_person = []
known_image= []
known_face_encoding=[]
for file in os.listdir("Imagefolder"):
#Extracting person name from the image filename eg:Abhilash.jpg
known_person.append(str(file).replace(".jpg", ""))
file=os.path.join("Imagefolder", file)
known_image = face_recognition.load_image_file(file)
known_face_encoding.append(face_recognition.face_encodings(known_image)[0])
with open('dataset_faces.dat', 'wb') as f:
pickle.dump(known_face_encoding, f,pickle.HIGHEST_PROTOCOL)
with open('dataset_fac.dat', 'wb') as d:
pickle.dump(known_person, d)
print(known_face_encoding)
print(known_person)

Categories

Resources