Python "for" loop does not iterate as many as it should

Python "for" loop does not iterate as many as it should - python

My python code does not iterate as many as it should. There are six image files in the working directory and len(f) also gives six. But the actual iteration of for loop stops after performing two loops.
import os
import cv2
import numpy as np
from matplotlib import pyplot as plt
path = "D:\\_my_python\\Image_histogram_equalization\\source_imgs"
os.chdir(path)
print("Current Working Directory: " , os.getcwd())
files = []
for r, d, f in os.walk(path):
for file in f:
if '.jpg' in file:
files.append(os.path.join(r, file))
print("Processing %d files.." %len(f))
count = 0
for f in files:
g = f[:f.find(".jpg")] + "_CLAHE20.jpg"
print("Converting %s to %s..." % (f, g))
img = cv2.imread(f)
img_yuv = cv2.cvtColor(img, cv2.COLOR_BGR2YUV)
clahe = cv2.createCLAHE(clipLimit=2, tileGridSize=(8,8))
img_yuv[:,:,0] = clahe.apply(img_yuv[:,:,0])
img_output = cv2.cvtColor(img_yuv, cv2.COLOR_YUV2BGR)
cv2.imwrite(g, img_output)
count = count + 1
else:
print("Process completed for %d files out of %d files. " % (count, len(f)))
It should run six loops because there are six images in the folder and len(f) also gives six.

Related

Where is the bottleneck in my image manipulation code?

I wrote this script to do some image processing on a large number of PNG files (around 1500 in total). They are organized into subdirectories.
That's my code:
from PIL import Image
import os
path = "/Some/given/path"
file_list = []
counter = 1
for root, dirs, files in os.walk(path):
for file in files:
if file.endswith(".png"):
temp_file = {"path": os.path.join(root, file), "name": file}
file_list.append(temp_file)
for curr_file in file_list:
img = Image.open(curr_file["path"])
img = img.convert("RGBA")
val = list(img.getdata())
new_data = []
for item in val:
if item[3] == 0:
new_data.append(item)
else:
new_data.append((0, 0, 0, 255))
img.putdata(new_data)
file_name = "transform" + str(counter) + ".png"
replaced_text = curr_file["name"]
new_file_name = curr_file["path"].replace(replaced_text, file_name)
img.save(new_file_name)
counter += 1
The folder structure is as follows:
Source folder
-- folder__1
-- image_1.png
-- image_2.png
-- image_3.png
-- folder__2
-- image_3.png
-- image_5.png
-- folder__3
-- image_6.png
When testing on individual images, the image processing takes only a few seconds. However, when running the script, it takes around an hour to process 15 images. Any suggestions on where I'm messing up?

The main issue is located here:
new_data = []
for item in val:
if item[3] == 0:
new_data.append(item)
else:
new_data.append((0, 0, 0, 255))
img.putdata(new_data) # <--
You don't need to update the content of img for each pixel, if you're collecting the complete new_data anyway. So, just move that line outside the loop:
new_data = []
for item in val:
if item[3] == 0:
new_data.append(item)
else:
new_data.append((0, 0, 0, 255))
img.putdata(new_data) # <--
Now, get rid of iterating all pixels at all by using NumPy and its vectorization capabilities:
from PIL import Image
import os
import numpy as np # <--
path = "/Some/given/path"
file_list = []
counter = 1
for root, dirs, files in os.walk(path):
for file in files:
if file.endswith(".png"):
temp_file = {"path": os.path.join(root, file), "name": file}
file_list.append(temp_file)
for curr_file in file_list:
img = Image.open(curr_file["path"])
img = img.convert("RGBA")
img = np.array(img) # <--
img[img[..., 3] != 0] = (0, 0, 0, 255) # <--
img = Image.fromarray(img) # <--
file_name = "transform" + str(counter) + ".png"
replaced_text = curr_file["name"]
new_file_name = curr_file["path"].replace(replaced_text, file_name)
img.save(new_file_name)
counter += 1
Basically, you set all pixels with alpha channel not equal to 0 to (0, 0, 0, 255). That's the NumPy one-liner you see there. The line before and after are just for transformation from Pillow Image to NumPy array and vice versa.
EDIT: If you don't want to have NumPy in your code, you could also get rid of the loops by using Pillow's point function, cf. this tutorial:
from PIL import Image
import os
path = "/Some/given/path"
file_list = []
counter = 1
for root, dirs, files in os.walk(path):
for file in files:
if file.endswith(".png"):
temp_file = {"path": os.path.join(root, file), "name": file}
file_list.append(temp_file)
for curr_file in file_list:
img = Image.open(curr_file["path"])
img = img.convert("RGBA")
source = img.split() # <--
mask = source[3].point(lambda i: i > 0 and 255) # <--
img.paste(Image.new("RGBA", img.size, (0, 0, 0, 255)), None, mask) # <--
file_name = "transform" + str(counter) + ".png"
replaced_text = curr_file["name"]
new_file_name = curr_file["path"].replace(replaced_text, file_name)
img.save(new_file_name)
counter += 1
----------------------------------------
System information
----------------------------------------
Platform: Windows-10-10.0.16299-SP0
Python: 3.9.1
NumPy: 1.20.2
Pillow: 8.1.2
----------------------------------------

You can use snakeviz library to profile your code -
Snakeviz - https://jiffyclub.github.io/snakeviz/
python -m cProfile -o program.prof my_program.py
Once the profile is generated you can visualise and see which function/which line is taking more time.
snakeviz program.prof

How to run python on multiple folder to create pdf?

The following code is to combine multiple images into one pdf. I am trying to run this code on multiple folder where each folder has several images as result, each folder will has one pdf.
import os
from PIL import Image
from fpdf import FPDF
pdf = FPDF()
sdir = "imageFolder/"
w,h = 0,0
for i in range(1, 100):
fname = sdir + "IMG%.3d.png" % i
if os.path.exists(fname):
if i == 1:
cover = Image.open(fname)
w,h = cover.size
pdf = FPDF(unit = "pt", format = [w,h])
image = fname
pdf.add_page()
pdf.image(image,0,0,w,h)
else:
print("File not found:", fname)
print("processed %d" % i)
pdf.output("output.pdf", "F")
print("done")
I was thinking to create another loop to bring the folder path which will come before the first loop:
For j in range(1 to 70):
folderP=sdir+folder%1
And loop in each folder
Sorry I am still learning python. Any suggestion would be great!

You can use glob to get the paths of all pdfs and add them to a list, then you just iterate through the list and you wouldn't even need to check if they exist:
from glob import glob
sDir = 'imageFolder/'
pdfPaths = []
pdfPaths.extend(glob(f'{sDir}**/*.pdf', recursive=True))
for pdf in pdfPaths:
# do stuff

Updating .txt file with year from subfolder

I am trying to learn how to update a .txt filename when os.walk switches from files in one directory to files in another directory. I am not sure about how to do this. I tried iterating through dirs and then files, but this was unsuccessful as the .pdf files would not display. Here is a full look at the code I am working on.
The directory looks like this [research] -> [2014] -> Article1.pdf, article2.pdf article3.pdf
[2015] -> Article4.pdf, article5.pdf article6.pdf
[2016] -> Article7.pdf, article8.pdf article9.pdf
from PIL import Image
import pytesseract
from pdf2image import convert_from_path
import os
pytesseract.pytesseract.tesseract_cmd = r'/usr/local/Cellar/tesseract/4.1.1/bin/tesseract'
def image_ocr(image_path, output_txt_file_name, All_text):
image_text = pytesseract.image_to_string(
image_path, lang='eng+ces', config='--psm 1')
with open(output_txt_file_name, 'a', encoding='utf-8') as f:
f.write(image_text)
with open(All_text, 'a', encoding='utf-8') as f:
f.write(image_text)
num = 0
year = 1973
year_being_recorded = 'txt_files/' + str(year) + '_article.txt'
cumulative_text = 'txt_files/cumulative.txt'
for root, dirs, files in os.walk('articles'):
for file_ in files:
if file_.endswith('.pdf'):
article_path = str(root) + '/' + str(file_)
pages = convert_from_path(article_path, 500)
for page in pages:
name = 'jpegs/a_file_' + str(num) + '.jpeg'
page.save(name, 'JPEG')
image_ocr(name, year_being_recorded, cumulative_text)
num = num + 1

python sorting filename for opencv

I'm trying to sorting the jpgs (ascending numerically) in my directory to generate a video for opencv, but I'm having a a hard time finding a solution:
images = []
for f in os.listdir('.'):
if f.endswith('.jpg'):
images.append(f)
images[]:
['img_0.jpg', 'img_1.jpg', 'img_10.jpg', 'img_100.jpg', 'img_101.jpg', 'img_102.jpg', ... 'img_99.jpg']

import cv2
vidcap = cv2.VideoCapture('big_buck_bunny_720p_5mb.mp4')
success,image = vidcap.read()
count = 0
success = True
while success:
cv2.imwrite("frame%d.jpg" % count, image) # save frame as JPEG file
success,image = vidcap.read()
print('Read a new frame: ', success)
count += 1

You can use Os:
from os import listdir
from os.path import isfile, join
jpgfiles = [f for f in listdir('.') if isfile(join('.', f)) and f.endswith(".txt")]
jpgfiles.sort()

Loading all images using imread from a given folder

Loading and saving images in OpenCV is quite limited, so... what is the preferred ways to load all images from a given folder? Should I search for files in that folder with .png or .jpg extensions, store the names and use imread with every file? Or is there a better way?

Why not just try loading all the files in the folder? If OpenCV can't open it, oh well. Move on to the next. cv2.imread() returns None if the image can't be opened. Kind of weird that it doesn't raise an exception.
import cv2
import os
def load_images_from_folder(folder):
images = []
for filename in os.listdir(folder):
img = cv2.imread(os.path.join(folder,filename))
if img is not None:
images.append(img)
return images

I used skimage. You can create a collection and access elements the standard way, i.e. col[index]. This will give you the RGB values.
from skimage.io import imread_collection
#your path
col_dir = 'cats/*.jpg'
#creating a collection with the available images
col = imread_collection(col_dir)

import glob
cv_img = []
for img in glob.glob("Path/to/dir/*.jpg"):
n= cv2.imread(img)
cv_img.append(n)`

If all images are of the same format:
import cv2
import glob
images = [cv2.imread(file) for file in glob.glob('path/to/files/*.jpg')]
For reading images of different formats:
import cv2
import glob
imdir = 'path/to/files/'
ext = ['png', 'jpg', 'gif'] # Add image formats here
files = []
[files.extend(glob.glob(imdir + '*.' + e)) for e in ext]
images = [cv2.imread(file) for file in files]

you can use glob function to do this. see the example
import cv2
import glob
for img in glob.glob("path/to/folder/*.png"):
cv_img = cv2.imread(img)

You can also use matplotlib for this, try this out:
import matplotlib.image as mpimg
def load_images(folder):
images = []
for filename in os.listdir(folder):
img = mpimg.imread(os.path.join(folder, filename))
if img is not None:
images.append(img)
return images

import os
import cv2
rootdir = "directory path"
for subdir, dirs, files in os.walk(rootdir):
for file in files:
frame = cv2.imread(os.path.join(subdir, file))

To add onto the answer from Rishabh and make it able to handle files that are not images that are found in the folder.
import matplotlib.image as mpimg
images = []
folder = './your/folder/'
for filename in os.listdir(folder):
try:
img = mpimg.imread(os.path.join(folder, filename))
if img is not None:
images.append(img)
except:
print('Cant import ' + filename)
images = np.asarray(images)

Here is a simple script that feature opencv, scikit image, and glob
#!C:\Users\test\anaconda3\envs\data_aquisition\python.exe
import glob
import argparse
from timeit import default_timer as timer
import skimage
from skimage.io import imread_collection
import cv2
def get_args():
parser = argparse.ArgumentParser(
description='script that test the fastest image loading methods')
parser.add_argument('src_path', help = "diractorry that contains the ims")
parser.add_argument('extension', help = "extension of the images",choices=['jpg','png','webp'])
return parser.parse_args()
def load_imgs_scikit_image_collection(path:str):
#creating a collection with the available images
col = imread_collection(path)
print('loaded: ',len(col),' imgs')
return col
def load_imgs_scikit_image_glob(path):
imgs = []
for img in glob.glob(path):
imgs.append(skimage.io.imread(img))
return imgs
def load_image_opencv(path:str):
imgs = []
for f in glob.glob(path):
imgs.extend(cv2.imread(f))
return imgs
def load_image_opencv_glob(path:str):
filenames = glob.glob(path)
filenames.sort()
images = [cv2.imread(img) for img in filenames]
return images
def laod_images_opencv_extisions(path):
ext = [".jpg",".gif",".png",".tga",".webp"] # Add image formats here
files = []
images = []
[files.extend(glob.glob(path + '/*' + e)) for e in ext]
images.extend([cv2.imread(file) for file in files])
return images
def laod_images_ski_extisions(path):
ext = [".jpg",".gif",".png",".tga",".webp"] # Add image formats here
files = []
images = []
[files.extend(glob.glob(path + '/*' + e)) for e in ext]
images.extend([skimage.io.imread(file) for file in files])
return images
def show_image(img):
window_name = 'image'
cv2.imshow(window_name, img)
cv2.waitKey(0)
cv2.destroyAllWindows()
def main():
args = get_args()
dir = args.src_path+'/*.'+args.extension
start = timer()
imgs=load_imgs_scikit_image_collection(dir)
end = timer()
print('scikit_image image collection',end - start) #time 0.08381089999999991
show_image(imgs[2])
start = timer()
load_imgs_scikit_image_glob(dir)
end = timer()
print('scikit_image and glob',end - start) #time 16.627431599999998
# dir = args.src_path+'\\.*'+args.extension
start = timer()
imgs_opencv = load_image_opencv_glob(dir) #time 10.9856656
end = timer()
print('opencv glob',end - start)
show_image(imgs_opencv[2])
start = timer()
valid_imgs_opencv = laod_images_opencv_extisions(args.src_path) #time 11.318516700000004
end = timer()
print('opencv glob extensions',end - start)
show_image(valid_imgs_opencv[2])
start = timer()
valid_imgs_opencv = laod_images_ski_extisions(args.src_path) #time 15.939870800000001
end = timer()
print('scikit_image glob extensions',end - start)
show_image(valid_imgs_opencv[2])
main()
Command to run script: python best_image_loader.py D:\data\dataset\radar_dome\manual png
png is used to load only png files.
Output
loaded: 876 imgs
scikit_image image collection 0.08248239999999996
scikit_image and glob 14.939381200000001
opencv glob 10.9708085
opencv glob extensions 10.974014100000005
scikit_image glob extensions 14.877048600000002

your_path = 'your_path'
ext = ['*.jpg', '*.png', '*.gif'] # Add image formats here
images = []
not_copy = 0
for item in [your_path + '/' + e for e in ext]:
images += glob(item)

Develop Reference

Python is a programming language that lets you work quickly and integrate systems more effectively.

Python "for" loop does not iterate as many as it should - python

Related

Where is the bottleneck in my image manipulation code?

How to run python on multiple folder to create pdf?

Updating .txt file with year from subfolder

python sorting filename for opencv

Loading all images using imread from a given folder

Categories

Resources