Related
I'm currently trying to create an image preprocessor that adds specific noise for AI training. In this case, I'm trying to add contour lines over the top of my input image that resemble sketch lines.
So far I have been able to get these lines drawn on my image, but they are very sharp and pixelated, which obviously doesn't resemble real artist-drawn lines. I need some way to apply a slight blur to soften these edges, but so far I have not been able to do this.
Here is a visual guide to what I am trying to achieve:
I need to find the contours in image (A) and draw them onto a new layer with alpha channel (B). I then need to blur these lines (C) and paste it back onto the original image (D).
Here is the section I'm having problems with:
# Convert the image to a NumPy array
augmented_image = np.array(augmented_image)
augmented_shape = augmented_image.shape
# Convert image back to color
grey_image = color.rgb2gray(augmented_image)
# Detect the contours of the image using the Canny edge detector
edges = feature.canny(grey_image, sigma=3)
# Create a blank image with dimensions 256 x 256
blank_image = np.zeros((256, 256, 4))
# Create a copy of the image to draw the contours on and convert to 4 layers rgba
alpha = np.ones((augmented_image.shape[0], augmented_image.shape[1], 1), dtype=augmented_image.dtype) * 255
augmented_image = np.concatenate([augmented_image, alpha], axis=2)
# Iterate over the contours
for contour in measure.find_contours(edges, 0.8):
# Set offset
offset = 10
# Select a random point along the contour
point = np.random.randint(0, len(contour))
start_row, start_col = contour[point]
start_row = start_row + offset
start_col = start_col + offset
start_row = np.clip(start_row, 0, augmented_shape[0] - 1)
start_col = np.clip(start_col, 0, augmented_shape[0] - 1)
# Select a random point along the contour that is not the same as the first point
point = np.random.randint(0, len(contour))
while point == start_row:
point = np.random.randint(0, len(contour))
end_row, end_col = contour[point]
end_row = end_row + offset
end_col = end_col + offset
end_row = np.clip(end_row, 0, augmented_shape[0] - 1)
end_col = np.clip(end_col, 0, augmented_shape[0] - 1)
# Draw the line on the image using the draw.line function
rr, cc = draw.line(int(start_row), int(start_col), int(end_row), int(end_col))
blank_image[rr, cc] = 30
# Smooth the contour lines using the gaussian function
blank_image = filters.gaussian(blank_image, sigma=1)
# Make sure image is same data-type
blank_image = blank_image.astype(augmented_image.dtype)
# Create a mask for the contour lines
blank_alpha = blank_image[:, :, 3:]
mask = np.any(blank_alpha > 0, axis=2)
# Apply the smooth image to the masked region of the original image
augmented_image[mask] = blank_image[mask]
# Convert image back to 3 layers rgb
augmented_image = augmented_image[:, :, :3]
I know that the problem lies somewhere in the 'mask' variable definition. Something about it being a boolean type just pastes a line of pure black squares on my image rather than the expected blurred line. No amount of messing with layer order or adding extra layers to copy from has fixed this.
Doing this process without trying to blur the lines works great, minus the fact that it's very pixelated and doesn't fit the style of the training data. Blurring the image without trying to re-combine anything produces an adequate blurred line as well, however the entire image is blurred.
Here's what I can produce without the blurring process, and a rough idea of what I would like the final product to look like (made in Photoshop)
It's only when I try to mask and combine that this becomes a problem. I will post the full code below for anyone to run on their own system:
import random
import numpy as np
import skimage
from skimage.transform import rotate, resize
from skimage import draw, feature, color, measure, filters, util
from skimage.util import random_noise
import PIL
from PIL import Image
import os
import argparse
import cv2
from matplotlib import pyplot as plt
import matplotlib.image as mpimg
from pathlib import Path
import imghdr
# Set parser args
parser = argparse.ArgumentParser()
parser.add_argument("--dirty_dir", help="path to folder containing dirty images")
parser.add_argument("--clean_dir", help="path to folder containing clean images")
parser.add_argument("--dirty_savedir", help="path to dirty output folder")
parser.add_argument("--clean_savedir", help="path to clean output folder")
a = parser.parse_args()
# Set folder paths
dirty_dir = a.dirty_dir + '/'
clean_dir = a.clean_dir + '/'
dirty_savedir = a.dirty_savedir + '/'
clean_savedir = a.clean_savedir + '/'
print(f"Source Folder: {dirty_dir}")
print(f"Source Folder: {clean_dir}")
print(f"Output Folder: {dirty_savedir}")
print(f"Output Folder: {clean_savedir}")
def augment_image(image, filename, clean_dir):
for i in range(8):
# Create list for clean imgs
clean_list = []
# Randomly select a 256x256 region
w, h = image.size[0], image.size[1]
print(w,h)
top = random.randint(0, h - 256)
left = random.randint(0, w - 256)
right = left + 256
bottom = top + 256
dims = [left, top, right, bottom]
print(f'{filename} dimensions:{dims}')
# Add dimensions to clean_list
clean_list.extend(dims)
augmented_image = image.crop((dims))
print(f'{filename} shape: {augmented_image.size}')
# Randomly rotate the image by 90, 180, or 270 degrees
angle = random.choice([0, 90, 180, 270])
augmented_image = augmented_image.rotate(angle)
# Add angle to list
clean_list.append(angle)
# Randomly flip the image horizontally
flip_lr = random.choice([True, False])
if flip_lr == True:
augmented_image = augmented_image.transpose(Image.FLIP_LEFT_RIGHT)
clean_list.append("flip_lr")
else:
clean_list.append("none")
# Randomly flip the image vertically
flip_tb = random.choice([True, False])
if flip_tb == True:
augmented_image = augmented_image.transpose(Image.FLIP_TOP_BOTTOM)
clean_list.append("flip_tb")
else:
clean_list.append("none")
# Convert the image to a NumPy array
augmented_image = np.array(augmented_image)
augmented_shape = augmented_image.shape
# Convert image back to color
grey_image = color.rgb2gray(augmented_image)
# Detect the contours of the image using the Canny edge detector
edges = feature.canny(grey_image, sigma=3)
# Create a blank image with dimensions 256 x 256
blank_image = np.zeros((256, 256, 4))
# Create a copy of the image to draw the contours on and convert to 4 layers rgba
alpha = np.ones((augmented_image.shape[0], augmented_image.shape[1], 1), dtype=augmented_image.dtype) * 255
augmented_image = np.concatenate([augmented_image, alpha], axis=2)
# Iterate over the contours
for contour in measure.find_contours(edges, 0.8):
# Set offset
offset = 10
# Select a random point along the contour
point = np.random.randint(0, len(contour))
start_row, start_col = contour[point]
start_row = start_row + offset
start_col = start_col + offset
start_row = np.clip(start_row, 0, augmented_shape[0] - 1)
start_col = np.clip(start_col, 0, augmented_shape[0] - 1)
# Select a random point along the contour that is not the same as the first point
point = np.random.randint(0, len(contour))
while point == start_row:
point = np.random.randint(0, len(contour))
end_row, end_col = contour[point]
end_row = end_row + offset
end_col = end_col + offset
end_row = np.clip(end_row, 0, augmented_shape[0] - 1)
end_col = np.clip(end_col, 0, augmented_shape[0] - 1)
# Draw the line on the image using the draw.line function
rr, cc = draw.line(int(start_row), int(start_col), int(end_row), int(end_col))
blank_image[rr, cc] = 30
# Smooth the contour lines using the gaussian function
blank_image = filters.gaussian(blank_image, sigma=1)
# Make sure image is same data-type
blank_image = blank_image.astype(augmented_image.dtype)
# Create a mask for the contour lines
blank_alpha = blank_image[:, :, 3:]
mask = np.any(blank_alpha > 0, axis=2)
# Apply the smooth image to the masked region of the original image
augmented_image[mask] = blank_image[mask]
# Convert image back to 3 layers rgb
augmented_image = augmented_image[:, :, :3]
## Add more noise types (lines, wrinkles, color)/make noise random chance to occur ##
# Add random noise to the image
noise = random_noise(augmented_image, mode='pepper', amount=0.011)
# Convert the noisy image back to a PIL image
augmented_image = np.random.random_sample(augmented_image.shape) * 255
augmented_image = np.array(255 * noise, dtype=np.uint8)
augmented_image = Image.fromarray(augmented_image)
# Save file
augmented_image.save(dirty_savedir + '_' + str(i) + '_' + filename)
print(clean_list)
# Function to mirror edits onto clean images
def clean_aug(clean_dir, clean_list):
# Open clean directory
for filename in os.listdir(f"{clean_dir}"):
# Rule out any weird Mac files
if not filename.startswith("._"):
with Image.open(clean_dir + filename) as image:
# Define clean dimensions
clean_dims = clean_list[0:4]
# Crop image
clean_augmented = image.crop((clean_dims))
# Rotate clean image
clean_augmented = clean_augmented.rotate(clean_list[4])
# Flip clean image
if clean_list[5] == 'flip_lr':
clean_augmented = clean_augmented.transpose(Image.FLIP_LEFT_RIGHT)
if clean_list[6] == 'flip_tb':
clean_augmented = clean_augmented.transpose(Image.FLIP_TOP_BOTTOM)
# Save clean images
clean_augmented.save(clean_savedir + '_' + str(i) + '_' + filename)
print("Clean alterations copied successfully")
clean_aug(clean_dir, clean_list)
# Clean up unnecessary files
def file_scrub():
dirty_dir = dirty_savedir
image_extensions = [".png", ".jpg"] # add there all your images file extensions
img_type_accepted_by_tf = ["bmp", "gif", "jpeg", "png"]
for filepath in Path(dirty_dir).rglob("*"):
if filepath.suffix.lower() in image_extensions:
img_type = imghdr.what(filepath)
if img_type is None:
print(f"{filepath} is not an image")
elif img_type not in img_type_accepted_by_tf:
print(f"{filepath} is a {img_type}, not accepted by TensorFlow")
def image_aug(dirty_dir, clean_dir):
for filename in os.listdir(f"{dirty_dir}"):
# Check if the filename starts with "._"
if not filename.startswith("._"):
with Image.open(dirty_dir + filename) as image:
# Open the image
augment_image(image, filename, clean_dir)
image_aug(dirty_dir, clean_dir)
file_scrub()
Apologies for the cumbersome codebase. I just wanted to minimize the risk of discrepancies arising from an attempt at concatenating my script. If there is any clarification I can provide please let me know!
I am trying to find the dominant color in a frame in a video. This works well, however, my frames are somehow converted into different colors. Yellow/pink becomes blue/purple-ish, but black and white stay the same (thus it is not the inverted colors).
Does anyone know where it comes from and how I can change it so that the original colors are kept? This is my code:
import cv2
from sklearn.cluster import KMeans
from collections import Counter
from PIL import Image
import matplotlib.pyplot as plt
import matplotlib.patches as patches
video = cv2.VideoCapture('video.mp4')
def show_blurred_image(image, dominant_color):
frame_to_blur = Image.fromarray(image)
blurred_frame = cv2.blur(image, (200,200))
blurred_frame = Image.fromarray(blurred_frame)
plt.subplot(121),plt.imshow(frame_to_blur),plt.title('Original')
plt.xticks([]), plt.yticks([])
plt.subplot(122),plt.imshow(blurred_frame),plt.title('Blurred')
plt.xticks([]), plt.yticks([])
R = round(dominant_color[0])
G = round(dominant_color[1])
B = round(dominant_color[2])
custom_color = '#%02x%02x%02x' % (R, G, B)
print(custom_color)
rect = patches.Rectangle((1620,0),300,1080,linewidth=1,
fill = True,
edgecolor=custom_color,
facecolor=custom_color)
ax = plt.gca()
ax.add_patch(rect)
plt.show()
def get_dominant_color(image, k=4, image_processing_size = None):
"""
takes an image as input
returns the dominant color of the image as a list
dominant color is found by running k means on the
pixels & returning the centroid of the largest cluster
processing time is sped up by working with a smaller image;
this resizing can be done with the image_processing_size param
which takes a tuple of image dims as input
>>> get_dominant_color(my_image, k=4, image_processing_size = (25, 25))
[56.2423442, 34.0834233, 70.1234123]
"""
#resize image if new dims provided
if image_processing_size is not None:
image = cv2.resize(image, image_processing_size,
interpolation = cv2.INTER_AREA)
#reshape the image to be a list of pixels
image = image.reshape((image.shape[0] * image.shape[1], 3))
#cluster and assign labels to the pixels
clt = KMeans(n_clusters = k)
labels = clt.fit_predict(image)
#count labels to find most popular
label_counts = Counter(labels)
#subset out most popular centroid
dominant_color = clt.cluster_centers_[label_counts.most_common(1)[0][0]]
return list(dominant_color)
dominant_colors = []
show_frame = 10
frame_nb = 0
while(video.isOpened()):
ret, frame = video.read()
if ret == True:
if (frame_nb == show_frame):
dominant_color = get_dominant_color(frame)
show_blurred_image(frame, dominant_color)
frame_nb += 1
else:
break
video.release()
cv2.destroyAllWindows()
OpenCV loads images in a BGR format, while PIL and matplotlib works with the RGB format. If you want to use the libraries together, you need to convert the images in the right color spaces.
In your case :
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
I'm trying to configure OpenCV within Python 3.6 to match a character icon (pattern) 1 with a box of characters 2. Nevertheless, the match is quite low, especially for shaded characters like 1.
I tried to solve it by using not only matchTemplate, but also comparing histograms, nevertheless - result is still poor.
I did try using gray-scale, colors, matching just a center of picture (cropped face), matching whole picture... resizing pattern to have exact dimension as it would be in a box... all combinations... and still this is VERY random (see attached image of correlation results)
Thank you in advance for help!
Here's the code:
import numpy as np
import cv2 as cv
from PIL import Image
import os
box = Image.open("/Users/user/Desktop/dbz/my_box.jpeg")
box.thumbnail((592,1053))
#conditions for each match step
character_threshold = 0.6 #checks in box
hist_threshold = 0.3
import numpy as np
import cv2 as cv
from PIL import Image
import os
box = Image.open("/Users/user/Desktop/dbz/my_box.jpeg")
box.thumbnail((592,1053))
#conditions for each match step
character_threshold = 0.6
hist_threshold = 0.3
for root, dirs, files in os.walk("/Users/user/Desktop/dbz/img/Super/TEQ/"):
for file in files:
if not file.startswith("."):
print("now " + file)
char = os.path.join(root, file)
#Opens and generate character's icon
character = Image.open(char)
character.thumbnail((153,139))
#Crops face from the character's icon and converts to grayscale CV object
face = character.crop((22,22,94,94)) #size 72x72 with centered face (should be 22,22,94,94)
face_array = np.array(face).astype(np.uint8)
face_array_gray = cv.cvtColor(face_array, cv.COLOR_RGB2GRAY)
#Converts the character's icon to grayscale CV object
character_array = np.array(character).astype(np.uint8)
character_array_gray = cv.cvtColor(character_array, cv.COLOR_RGB2GRAY)
#Converts box screen to grayscale CV object
box_array = np.array(box).astype(np.uint8)
box_array_gray = cv.cvtColor(box_array, cv.COLOR_RGB2GRAY)
#Check whether the face is in the box
character_score = cv.matchTemplate(box_array[:,:,2],face_array[:,:,2],cv.TM_CCOEFF_NORMED)
if character_score.max() > character_threshold:
ij = np.unravel_index(np.argmax(character_score),character_score.shape)
x, y = ij[::-1] #np returns lower-left coordinates, whilst PIL accepts upper, left,lower, right !!!
w, h = face_array_gray.shape
face.show()
found = box.crop((x,y,x+w,y+h)) #expand border to 25 pixels in each size (Best is (x-20,y-5,x+w,y+h+20))
#found.show()
#found_character = np.array(found_character).astype(np.uint8)
#found_character = cv.cvtColor(found_character, cv.COLOR_RGB2GRAY)
found_array = np.array(found).astype(np.uint8)
found_array_gray = cv.cvtColor(found_array, cv.COLOR_RGB2GRAY)
found_hist = cv.calcHist([found_array],[0,1,2],None,[8,8,8],[0,256,0,256,0,256])
found_hist = cv.normalize(found_hist,found_hist).flatten()
found_hist_gray = cv.calcHist([found_array_gray],[0],None,[8],[0,256])
found_hist_gray = cv.normalize(found_hist_gray,found_hist_gray).flatten()
face_hist = cv.calcHist([face_array],[0,1,2],None,[8,8,8],[0,256,0,256,0,256])
face_hist = cv.normalize(face_hist,face_hist).flatten()
face_hist_gray = cv.calcHist([face_array_gray],[0],None,[8],[0,256])
face_hist_gray = cv.normalize(face_hist_gray,face_hist_gray).flatten()
character_hist = cv.calcHist([character_array],[0,1,2],None,[8,8,8],[0,256,0,256,0,256])
character_hist = cv.normalize(character_hist,character_hist).flatten()
character_hist_gray = cv.calcHist([character_array_gray],[0],None,[8],[0,256])
character_hist_gray = cv.normalize(character_hist_gray,character_hist_gray).flatten()
hist_compare_result_CORREL = cv.compareHist(found_hist_gray, character_hist_gray,cv.HISTCMP_CORREL)
#hist_compare_result_CHISQR = cv.compareHist(found_hist_gray, character_hist_gray,cv.HISTCMP_CHISQR)
#hist_compare_result_INTERSECT = cv.compareHist(found_hist_gray, character_hist_gray,cv.HISTCMP_INTERSECT)
#hist_compare_result_BHATTACHARYYA = cv.compareHist(found_hist_gray, character_hist_gray,cv.HISTCMP_BHATTACHARYYA)
if (hist_compare_result_CORREL+character_score.max()) > 1:
print(f"Found {file} with a score:\n match:{character_score.max()}\n hist_correl: {hist_compare_result_CORREL}\n SUM:{hist_compare_result_CORREL+character_score.max()}", file=open("/Users/user/Desktop/dbz/out.log","a+"))
(1)
(2)
Here is a simple example of masked template matching in Python/OpenCV.
Image:
Transparent Template:
Template with alpha removed:
Template alpha channel extracted as mask image:
i
mport cv2
import numpy as np
# read image
img = cv2.imread('logo.png')
# read template with alpha
tmplt = cv2.imread('hat_alpha.png', cv2.IMREAD_UNCHANGED)
hh, ww = tmplt.shape[:2]
# extract template mask as grayscale from alpha channel and make 3 channels
tmplt_mask = tmplt[:,:,3]
tmplt_mask = cv2.merge([tmplt_mask,tmplt_mask,tmplt_mask])
# extract templt2 without alpha channel from tmplt
tmplt2 = tmplt[:,:,0:3]
# do template matching
corrimg = cv2.matchTemplate(img,tmplt2,cv2.TM_CCORR_NORMED, mask=tmplt_mask)
min_val, max_val, min_loc, max_loc = cv2.minMaxLoc(corrimg)
max_val_ncc = '{:.3f}'.format(max_val)
print("correlation match score: " + max_val_ncc)
xx = max_loc[0]
yy = max_loc[1]
print('xmatch =',xx,'ymatch =',yy)
# draw red bounding box to define match location
result = img.copy()
pt1 = (xx,yy)
pt2 = (xx+ww, yy+hh)
cv2.rectangle(result, pt1, pt2, (0,0,255), 1)
cv2.imshow('image', img)
cv2.imshow('template2', tmplt2)
cv2.imshow('template_mask', tmplt_mask)
cv2.imshow('result', result)
cv2.waitKey(0)
cv2.destroyAllWindows()
# save results
cv2.imwrite('logo_hat_match2.png', result)
Match location on input:
Match Information:
correlation match score: 1.000
xmatch = 417 ymatch = 44
Without the mask, the large green area in the template would mismatch in the input and lower the match score dramatically.
I am learning how to properly use a selective search algorithm to create bounding boxes around an image, extract the smaller images within the bounding box and then run further analysis on the smaller images.
I am able to obtain the bounding boxes through the following, but how do I save/extract/export the images within each bounding box?
import skimage.data
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
import selectivesearch
import time
import io
import PIL
import scipy.misc
from skimage.io import imread
from PIL import Image
from skimage.transform import rescale, resize, downscale_local_mean
def main():
# loading astronaut image
# image = skimage.io.imread('/Users/vivek/Desktop/IMG_3350.JPG')
# img = resize(image, (500,500), mode = 'reflect')
img = skimage.io.imread('/Users/vivek/Downloads/IMG_3350_640x480.JPG')
print ('image loaded')
# perform selective search
print ('initializing selective search')
start = time.time()
img_lbl, regions = selectivesearch.selective_search(
img, scale=600, sigma=0.9, min_size=10)
candidates = set()
for r in regions:
# excluding same rectangle (with different segments)
if r['rect'] in candidates:
continue
# excluding regions smaller than 2000 pixels
if r['size'] < 2000:
continue
# distorted rects
x, y, w, h = r['rect']
if w / h > 1.2 or h / w > 1.2:
continue
candidates.add(r['rect'])
print ('selective search complete')
end = time.time()
totalTime = end - start
print ('time taken to run this is : ' + str(totalTime))
# draw rectangles on the original image
fig, ax = plt.subplots(ncols=1, nrows=1, figsize=(6, 6))
ax.imshow(img)
for x, y, w, h in candidates:
print x, y, w, h
rect = mpatches.Rectangle(
(x, y), w, h, fill=False, edgecolor='red', linewidth=1)
ax.add_patch(rect)
#plt.imsave("testerimage.jpg", None)
plt.show()
if __name__ == "__main__":
main()
Thanks in advance
You know how to get each rectangle using the lines
for x, y, w, h in candidates:
To get the image of the in this rectangle just do:
imgRect = img[y:y+h,x:x+w]
I want to use OCR to capture the bowling scores from the monitor at the lances. I had a look at this sudoku solver, as I think its pretty similar - numbers and grids right? It has trouble finding the horizontal lines. Has anyone got any tips for pre-processing this image to make it easier to detect the lines (or numbers!). Also any tips for how to deal with the split (the orange ellipse around some of the 8's int he image)?
So far I have got the outline of the score area and cropped it.
import matplotlib
matplotlib.use('TkAgg')
from skimage import io
import numpy as np
import matplotlib.pyplot as plt
from skimage import measure
from skimage.color import rgb2gray
# import pytesseract
from matplotlib.path import Path
from qhd import *
def polygonArea(poly):
"""
Return area of an unclosed polygon.
:see: https://stackoverflow.com/a/451482
:param poly: (n,2)-array
"""
# we need a plain list for the following operations
if isinstance(poly, np.ndarray):
poly = poly.tolist()
segments = zip(poly, poly[1:] + [poly[0]])
return 0.5 * abs(sum(x0*y1 - x1*y0
for ((x0, y0), (x1, y1)) in segments))
filename = 'good.jpg'
image = io.imread(filename)
image = rgb2gray(image)
# Find contours at a constant value of 0.8
contours = measure.find_contours(image, 0.4)
# Display the image and plot all contours found
fig, ax = plt.subplots()
c = 0
biggest = None
biggest_size = 0
for n, contour in enumerate(contours):
curr_size = polygonArea(contour)
if curr_size > biggest_size:
biggest = contour
biggest_size = curr_size
biggest = qhull2D(biggest)
# Approximate that so we just get a rectangle.
biggest = measure.approximate_polygon(biggest, 500)
# vertices of the cropping polygon
yc = biggest[:,0]
xc = biggest[:,1]
xycrop = np.vstack((xc, yc)).T
# xy coordinates for each pixel in the image
nr, nc = image.shape
ygrid, xgrid = np.mgrid[:nr, :nc]
xypix = np.vstack((xgrid.ravel(), ygrid.ravel())).T
# construct a Path from the vertices
pth = Path(xycrop, closed=False)
# test which pixels fall within the path
mask = pth.contains_points(xypix)
# reshape to the same size as the image
mask = mask.reshape(image.shape)
# create a masked array
masked = np.ma.masked_array(image, ~mask)
# if you want to get rid of the blank space above and below the cropped
# region, use the min and max x, y values of the cropping polygon:
xmin, xmax = int(xc.min()), int(np.ceil(xc.max()))
ymin, ymax = int(yc.min()), int(np.ceil(yc.max()))
trimmed = masked[ymin:ymax, xmin:xmax]
plt.imshow(trimmed, cmap=plt.cm.gray), plt.title('trimmed')
plt.show()
https://imgur.com/LijB85I is an example of how the score is displayed.