Delete Rasters that contain pixel values of Zero - python

I have a working code that will iterate through a folder, identify and delete if a .tif only contains pixel values of all zero, hence a black image. The problem is that I have 12,000 images in the folder and it take quite a long time for the process to finish. I am wondering if there is a faster way I could do this?
from PIL import Image
import os
directory = 'D:/images/'
for image in os.listdir(directory):
indiv = Image.open(directory + image)
pixel_values = list(indiv.getdata())
y = len(pixel_values)
list_yes = []
for RGBA in pixel_values:
if RGBA == (0, 0, 0, 0):
Black_image = 'yes'
list_yes.append(Black_image)
x = len(list_yes)
if x == y:
os.remove(directory + image)
Output of black .tif:
(0, 0, 0, 0)
(0, 0, 0, 0)
(0, 0, 0, 0)
(0, 0, 0, 0)
(0, 0, 0, 0)
(0, 0, 0, 0)
(0, 0, 0, 0)
(0, 0, 0, 0)
(0, 0, 0, 0)
(0, 0, 0, 0)
(0, 0, 0, 0)
(0, 0, 0, 0)
(0, 0, 0, 0)
....
Like 400,000 more rows of this

This should be substantially faster
directory = 'D:/images/'
for image in os.listdir(directory):
indiv = Image.open(directory + image)
if all(pixels == (0, 0, 0, 0) for pixels in list(indiv.getdata())):
os.remove(directory + image)
I'm not sure the list(...) is needed either, I'm not too familiar with PIL. If it works without, removing it should cause another speedup.

Related

How to delete objects (cells) touching the image boundaries?

I have an image of cells which I have thresholded and also detected the cells (using cv2).
I would like to create an array with values True or False to show whether each component touches the boundaries of the image (True) or not (False).
import cv2 as cv
# Read the image you want connected components of, IN BLACK AND WHITE
img = cv.imread('../images/37983_ERSyto/cellpaintingfollowup-reimage_a01_s1_w26ae36209-938b-45ef-b166-3aba3af125df.tif', cv.IMREAD_GRAYSCALE)
seed_pt = (100, 800) #point in the background
fill_color = 0
mask = np.zeros_like(img)
kernel = cv.getStructuringElement(cv.MORPH_RECT, (6, 5))
for th in range(7,70):
#creates a copy of the mask:
prev_mask = mask.copy()
#thresholded image:
mask = cv.threshold(img, th, 22331, cv.THRESH_BINARY)[1]
#FloodFill: fill a connected component starting from the seed point with the specified color.
mask = cv.floodFill(mask, None, seed_pt, fill_color)[1]
#cv.bitwise: calculates the per-element bit-wise disjunction of two arrays or an array and a scalar. Superposition of thresholded images
mask = cv.bitwise_or(mask, prev_mask)
#clean speckles
mask = cv.morphologyEx(mask, cv.MORPH_OPEN, kernel)
#compute the connected components labeled image of boolean image and also produce a statistics output for each label
connectivity = 8 #You need to choose 4 or 8 for connectivity type.
#OBTAIN FEATURE OF THE AREA IN PIXELS OF THE CELLS
stats = cv.connectedComponentsWithStats(mask, connectivity, cv.CV_32S)[2]
label_area = stats[1:, cv.CC_STAT_AREA] #we dont include the first element because it represents the area of the background
#OBTAIN FEATURES OF THE CENTROID POSITION
centroids = cv.connectedComponentsWithStats(mask, connectivity, cv.CV_32S)[3]
label_centroids_x = centroids[1:, 0] #dont include the first element because it represents the background
label_centroids_y = centroids[1:,1]
#HIGHT: The vertical size of the bounding box.
label_hight = stats[1:, cv.CC_STAT_HEIGHT]
#WIDTH: The horizontal size of the bounding box.
label_width = stats[1:, cv.CC_STAT_WIDTH]
#TOUCHING IMAGE BOUNDARIES: is the component touching the boundaries of the matrix/image?--> True/False
label_bounary = #boolean array
I first thought about searching for the contour of every component and defining some restriction, but I have troubles understanding how the labels of every component are stored and therefore, I could not select the desired components.
Here is the image:
Thank you very much in advance.
Using your code (thanks for commenting), I got this mask. It's possible it's not the same since .jpg compression can mess with an image (it's not a lossless compression scheme)
#fmw42 is exactly right, he commented before I could finish my code
import cv2 as cv
import numpy as np
# Read the image you want connected components of, IN BLACK AND WHITE
img = cv.imread('cells.jpg', cv.IMREAD_GRAYSCALE)
seed_pt = (100, 800) #point in the background
fill_color = 0
mask = np.zeros_like(img)
kernel = cv.getStructuringElement(cv.MORPH_RECT, (6, 5))
for th in range(7,70):
#creates a copy of the mask:
prev_mask = mask.copy()
#thresholded image:
mask = cv.threshold(img, th, 22331, cv.THRESH_BINARY)[1]
#FloodFill: fill a connected component starting from the seed point with the specified color.
mask = cv.floodFill(mask, None, seed_pt, fill_color)[1]
#cv.bitwise: calculates the per-element bit-wise disjunction of two arrays or an array and a scalar. Superposition of thresholded images
mask = cv.bitwise_or(mask, prev_mask)
#clean speckles
mask = cv.morphologyEx(mask, cv.MORPH_OPEN, kernel)
# show mask
cv.imshow("Mask", mask);
cv.waitKey(0);
# contours OpenCV 3.4, if you're using OpenCV 2 or 4, it returns (contours, _)
_, contours, _ = cv.findContours(mask, cv.RETR_TREE, cv.CHAIN_APPROX_NONE);
# get bounds and check if they're touching edge
height, width = mask.shape[:2];
touching_edge = []; # boolean array, index matches the contours list
for con in contours:
# get bounds
x, y, w, h = cv.boundingRect(con);
# check if touching edge
on_edge = False;
if x <= 0 or (x + w) >= (width - 1):
on_edge = True;
if y <= 0 or (y + h) >= (height - 1):
on_edge = True;
# add to list
touching_edge.append(on_edge);
# mark the contours on the edge
colored = cv.cvtColor(mask, cv.COLOR_GRAY2BGR);
for index in range(len(contours)):
if touching_edge[index]:
# drawContours(image, contour_list, index, color, thickness) # -1 is filled
cv.drawContours(colored, contours, index, (50,50,200), -1);
# show
cv.imshow("Colored", colored);
cv.waitKey(0);
If you're open to using scikit-image, you can try clear_border:
>>> import numpy as np
>>> from skimage.segmentation import clear_border
>>> labels = np.array([[0, 0, 0, 0, 0, 0, 0, 1, 0],
... [1, 1, 0, 0, 1, 0, 0, 1, 0],
... [1, 1, 0, 1, 0, 1, 0, 0, 0],
... [0, 0, 0, 1, 1, 1, 1, 0, 0],
... [0, 1, 1, 1, 1, 1, 1, 1, 0],
... [0, 0, 0, 0, 0, 0, 0, 0, 0]])
>>> clear_border(labels)
array([[0, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 1, 0, 0, 0, 0],
[0, 0, 0, 1, 0, 1, 0, 0, 0],
[0, 0, 0, 1, 1, 1, 1, 0, 0],
[0, 1, 1, 1, 1, 1, 1, 1, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 0]])

How can I construct a PIL Image from an array of RGBA pixels?

My goal is to use PIL to extract some details from an image, effectively cropping it down.
For this, I use Image.getdata() to get a list of the pixels in the image, since checking and modifying this is easier for me.
After all the changes I made, I am left with an array of pixels represented in tuples. For simplicity, an array like that could look like this:
new_pixels = [
(255, 0, 0, 255),
(0, 255, 0, 255),
(0, 0, 255, 255),
(0, 0, 0, 255)
]
I've seen something interesting in the PIL documentation, namely the fromarray classmethod, however passing the array to this function gives an error message:
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
File "C:\Python39\lib\site-packages\PIL\Image.py", line 2741, in fromarray
arr = obj.__array_interface__
AttributeError: 'list' object has no attribute '__array_interface__'
Trying the same with a two dimensional list gives the same result.
The question finally is, how would I go about turning this array into a PIL Image object that I can later save?
not sure if thats what you are looking for:
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Thu Feb 4 17:10:02 2021
#author: Pietro
"""
from PIL import Image
import numpy as np
new_pixels = np.array([
(255, 0, 0, 255),
(0, 255, 0, 255),
(0, 0, 255, 255),
(0, 0, 0, 255)
]).astype('uint8')
new_pixelsRGBA = np.array([[
[255, 0, 0, 255],
[0, 255, 0, 255],
[0, 0, 255, 255],
[0, 0, 0, 255]]
]).astype('uint8')
new_pixelsRGBA2 = np.array([[
[255, 0, 0, 255],
[0, 255, 0, 255]],
[[0, 0, 255, 255],
[0, 0, 0, 255]]
]).astype('uint8')
pippo = Image.fromarray(new_pixels)
pippoRGBA = Image.fromarray(new_pixelsRGBA, mode='RGBA')
# pippoRGBA = Image.fromarray(new_pixelsRGBA)
print('pippo image size : ', pippo.size)
print('pippo image mode : ', pippo.mode)
pippo.show()
print('pippoRGBA image size : ', pippoRGBA.size)
print('pippoRGBA image mode : ', pippoRGBA.mode)
pippoRGBA.show()
pippoRGBA2 = Image.fromarray(new_pixelsRGBA2)
print('pippoRGBA2 image size : ', pippoRGBA2.size)
print('pippoRGBA2 image mode : ', pippoRGBA2.mode)
pippoRGBA2.show()
the image I got is:
pippo image size : (4, 4)
pippo image mode : L : (8-bit pixels, black and white)
Apparently your array is not a RGBA pixel array ?! Or not ?
using my new_imageRGBA or new_imageRGBA2 array see above I got:
pippoRGBA image size : (4, 1)
pippoRGBA image mode : RGBA
or (pippoRGBA2 image):
pippoRGBA2 image size : (2, 2)
pippoRGBA2 image mode : RGBA
note that :
pippoRGBA = Image.fromarray(new_pixelsRGBA)
works as well; PIL knows we are talking about RGBA array
u can use matplotlib here
import matplotlib.pyplot as plt
plt.imshow(img_pixels)
plt.show()

How to get pixel values of image in rgba format?

I'm trying to get the rgba values of the pixels of an image.
Google suggests I use code similar to this:
from PIL import Image
im = Image.open("C:/Stuff/image.png", "r")
px = list(im.getdata())
My problem is the data not always being in rgba format.
On some images it does return rgba
[(0, 0, 0, 255), (0, 0, 0, 255), (0, 0, 255, 255), [...]
while on others it returns rgb
[(0, 0, 0), (0, 0, 0), (0, 0, 255), [...]
and on some it returns whatever this is
[0, 0, 1, [...]
Is there a way to always get rgba returned?

RGB color codes in Semantic Segmentation

I am using the Semantic Segmentation network (SegNet). I am trying to reduce the number of classes and thus rearranging the network.
Therefore, I am also changing the color-coding of the predictions as well. My problem is I don't get the intended colors in the output image.
For e.g.
pascal_palette = np.array([(0, 0, 0),
(0, 0, 0), (0, 0, 0), (0, 0, 0), (0, 0, 0), (0, 0, 0),
(0, 0, 128), (0, 128, 0), (0, 0, 0), (0, 0, 0), (128, 0, 0),
(0, 0, 0), (0, 0, 0)
], dtype=np.uint8)
The above line gives perfect results for the three classes as the pixels are only in 1 channel.
The output is as below:
However, if I modify the line and add values to different channels it gives weird output. The output is attached below:
pascal_palette = np.array([(0, 0, 0),
(0, 0, 0), (0, 0, 0), (0, 0, 0), (0, 0, 0), (0, 0, 0),
(0, 0, 128), (124, 252, 0), (0, 0, 0), (0, 0, 0), (128, 0, 0),
(0, 0, 0), (0, 0, 0)
], dtype=np.uint8)
Changed the color code to (124, 252, 0). The code should be for lawn green color. I also checked it on a website like RBG codes
What am I missing here? Any explanation will be helpful.
Prediciton code:
prob = model.predict(net_in)[0]
# Reshape to 2d here since the networks outputs a flat array per channel
prob_edge = np.sqrt(prob.shape[0]).astype(np.int)
prob = prob.reshape((prob_edge, prob_edge, 13))
# Upsample
if args.zoom > 1:
prob = interp_map(prob, args.zoom, image_size[1], image_size[0])
# Recover the most likely prediction (actual segment class)
prediction = np.argmax(prob, axis=2)
# Apply the color palette to the segmented image
color_image = np.array(pascal_palette)[prediction.ravel()].reshape(
prediction.shape + (3,))
print('Saving results to: ', args.output_path)
with open(args.output_path, 'wb') as out_file:
Image.fromarray(np.multiply(color_image,255)).save(out_file)
PS. I have used same model for predictions in both case
The problem is very probably in np.multiply(color_image,255).
As you created a pallete already with values from 0 to 255 and you're simply gathering values from this pallete, you don't need to multiply it by 255.
Use simply Image.fromarray(color_image).save(out_file).

PIL's Image.frombuffer creates wrong image

I'm trying to create an image from 1d numpy array of integers so that changes to this array reflects in the image. It seems that Image.frombuffer perfectly fits my needs. There's my attempts:
from PIL import Image
import numpy as np
data = np.full(100, 255, dtype = np.int32)
img = Image.frombuffer('RGB', (10, 10), data)
print(list(img.getdata()))
I expected to see a list of 100 tuples (0, 0, 255). But what I'm actually getting is (0, 0, 255), (0, 0, 0), (0, 0, 0), (0, 255, 0), (0, 0, 0), (0, 0, 0), (255, 0, 0), (0, 0, 0), (0, 0, 255), (0, 0, 0), (255, 0, 0), ...
What is the reason of that behavior?
'RGB' uses three bytes per pixel. The buffer that you provided is an array with data type numpy.int32, which uses four bytes per element. So you have a mismatch.
One way to handle it is to use mode 'RGBA':
img = Image.frombuffer('RGBA', (10, 10), data)
Whether or not that is a good solution depends on what you are going to do with the image.
Also note that whether you get (255, 0, 0, 0) or (0, 0, 0, 255) for the RGBA pixels depends on the endianess of the integers in data.
For an RGB image, here's an alternative:
data = np.zeros(300, dtype=np.uint8)
# Set the blue channel to 255.
data[2::3] = 255
img = Image.frombuffer('RGB', (10, 10), data)
Without more context for the problem, I don't know if that is useful for you.

Categories

Resources