generating equal diagonal lines in PIL - python

What I'm trying to do is generate equal diagonal lines in PIL. What I'm doing is first making a horizontal equally square and then rotating it 45 degrees. But when I'm rotating it the lines aren't big enough, there shouldn't be any black and still be equal. It also should work with more colors
code:
import random
im = Image.new('RGB', (1000, 1000), (255, 255, 255))
draw = ImageDraw.Draw(im)
colors = [(255,0,255), (0,0,255)]
random.shuffle(colors)
length = len(colors)
amount = 1000 / length
x1 = 0
y1 = 0
x2 = 1000
y2 = 0
for color in colors:
shape = [(x1, y1 + amount // 2), (x2, y2 + amount // 2)]
draw.line(shape, fill=color, width=int(amount))
y1 += amount
y2 += amount
im.save("pre_diagonal.png")
colorimage = Image.open('pre_diagonal.png')
out = colorimage.rotate

You can do it by first generating an image of vertical lines like I showed you in my answer to your other question, rotating that by 45°, and then cropping it. To avoid having areas of black, you need to generate an initial image that is large enough for the cropping.
In this case that's simply a square image with sides the length of the hypotenuse (diagonal) of the final target image's size.
i.e.
Graphically, here's what I mean:
At any rate, here's the code that does it:
from math import hypot
from PIL import Image, ImageDraw
import random
IMG_WIDTH, IMG_HEIGHT = 1000, 1000
DIAG = round(hypot(IMG_WIDTH, IMG_HEIGHT))
img = Image.new('RGB', (DIAG, DIAG), (255, 255, 255))
draw = ImageDraw.Draw(img)
colors = [(255,0,255), (0,0,255)]
random.shuffle(colors)
length = len(colors) # Number of lines.
line_width = DIAG / length # Width of each.
difx = line_width / 2
x1, y1 = difx, 0
x2, y2 = difx, DIAG
for color in colors:
endpoints = (x1, y1), (x2, y2)
draw.line(endpoints, fill=color, width=round(line_width))
x1 += line_width
x2 += line_width
img = img.rotate(-45, resample=Image.Resampling.BICUBIC)
difx, dify = (DIAG-IMG_WIDTH) // 2, (DIAG-IMG_HEIGHT) // 2
img = img.crop((difx, dify, difx+IMG_WIDTH, dify+IMG_HEIGHT))
img.save('diagonal.png')
#img.show()
Here's the resulting image:

Related

How do I divide a given image into 8 unique pie segments?

I am pretty new to Python and want to do the following: I want to divide the following image into 8 pie segments:
I want it to look something like this (I made this in PowerPoint):
The background should be black and the edge of the figure should have an unique color as well as each pie segment.
EDIT: I have written a code that divides the whole image in 8 segments:
from PIL import Image, ImageDraw
im=Image.open('C:/Users/20191881/Documents/OGO Beeldanalyse/Python/asymmetrie/rotation.png')
fill = 255
draw = ImageDraw.Draw(im)
draw.line((0,0) + im.size, fill)
draw.line((0, im.size[1], im.size[0], 0), fill)
draw.line((0.5*im.size[0],0, 0.5*im.size[0], im.size[1]), fill)
draw.line((0, 0.5*im.size[1], im.size[0], 0.5*im.size[1]), fill)
del draw
im.show()
The output gives:
The only thing that is left to do is to find a way to make each black segment inside the border an unique color and also give all the white edge segments an unique color.
Your code divides the image in eight parts, that's correct, but with respect to the image center, you don't get eight "angular equally" pie segments like you show in your sketch.
Here would be my solution, only using Pillow and the math module:
import math
from PIL import Image, ImageDraw
def segment_color(i_color, n_colors):
r = int((192 - 64) / (n_colors - 1) * i_color + 64)
g = int((224 - 128) / (n_colors - 1) * i_color + 128)
b = 255
return (r, g, b)
# Load image; generate ImageDraw
im = Image.open('path_to/vgdrD.png').convert('RGB')
draw = ImageDraw.Draw(im)
# Number of pie segments (must be an even number)
n = 8
# Replace (all-white) edge with defined edge color
edge_color = (255, 128, 0)
pixels = im.load()
for y in range(im.height):
for x in range(im.width):
if pixels[x, y] == (255, 255, 255):
pixels[x, y] = edge_color
# Draw lines with defined line color
line_color = (0, 255, 0)
d = min(im.width, im.height) - 10
center = (int(im.width/2), int(im.height)/2)
for i in range(int(n/2)):
angle = 360 / n * i
x1 = math.cos(angle/180*math.pi) * d/2 + center[0]
y1 = math.sin(angle/180*math.pi) * d/2 + center[1]
x2 = math.cos((180+angle)/180*math.pi) * d/2 + center[0]
y2 = math.sin((180+angle)/180*math.pi) * d/2 + center[1]
draw.line([(x1, y1), (x2, y2)], line_color)
# Fill pie segments with defined segment colors
for i in range(n):
angle = 360 / n * i + 360 / n / 2
x = math.cos(angle/180*math.pi) * 20 + center[0]
y = math.sin(angle/180*math.pi) * 20 + center[1]
ImageDraw.floodfill(im, (x, y), segment_color(i, n))
im.save(str(n) + '_pie.png')
For n = 8 pie segments, the following result is produced:
The first step is to replace all white pixels in the original image with the desired edge color. Of course, the assumption here is, that there are no other (white) pixels in the image. Also, this might be better done using NumPy and vectorized code, but I wanted to keep the solution Pillow-only.
Next step is to draw the (green) lines. Here, I calculate the proper coordinates of the lines' start and end using sin and cos.
The last step is to flood fill the pie segments' area, cf. ImageDraw.floodfill. Therefore, I calculate the seed points the same way as before, but add an angular shift to hit a point exactly within the pie segment.
As you can see, n is variable in my solution (n must be even):
Of course, there are limitations regarding the angular resolution, most due to the small image.
Hope that helps!
EDIT: Here's a modified version to also allow for individually colored edges.
import math
from PIL import Image, ImageDraw
def segment_color(i_color, n_colors):
r = int((192 - 64) / (n_colors - 1) * i_color + 64)
g = int((224 - 128) / (n_colors - 1) * i_color + 128)
b = 255
return (r, g, b)
def edge_color(i_color, n_colors):
r = 255
g = 255 - int((224 - 32) / (n_colors - 1) * i_color + 32)
b = 255 - int((192 - 16) / (n_colors - 1) * i_color + 16)
return (r, g, b)
# Load image; generate ImageDraw
im = Image.open('images/vgdrD.png').convert('RGB')
draw = ImageDraw.Draw(im)
center = (int(im.width/2), int(im.height)/2)
# Number of pie segments (must be an even number)
n = 8
# Replace (all-white) edge with defined edge color
max_len = im.width + im.height
im_pix = im.load()
for i in range(n):
mask = Image.new('L', im.size, 0)
mask_draw = ImageDraw.Draw(mask)
angle = 360 / n * i
x1 = math.cos(angle/180*math.pi) * max_len + center[0]
y1 = math.sin(angle/180*math.pi) * max_len + center[1]
angle = 360 / n * (i+1)
x2 = math.cos(angle/180*math.pi) * max_len + center[0]
y2 = math.sin(angle/180*math.pi) * max_len + center[1]
mask_draw.polygon([center, (x1, y1), (x2, y2)], 255)
mask_pix = mask.load()
for y in range(im.height):
for x in range(im.width):
if (im_pix[x, y] == (255, 255, 255)) & (mask_pix[x, y] == 255):
im_pix[x, y] = edge_color(i, n)
# Draw lines with defined line color
line_color = (0, 255, 0)
d = min(im.width, im.height) - 10
for i in range(int(n/2)):
angle = 360 / n * i
x1 = math.cos(angle/180*math.pi) * d/2 + center[0]
y1 = math.sin(angle/180*math.pi) * d/2 + center[1]
x2 = math.cos((180+angle)/180*math.pi) * d/2 + center[0]
y2 = math.sin((180+angle)/180*math.pi) * d/2 + center[1]
draw.line([(x1, y1), (x2, y2)], line_color)
# Fill pie segments with defined segment colors
for i in range(n):
angle = 360 / n * i + 360 / n / 2
x = math.cos(angle/180*math.pi) * 20 + center[0]
y = math.sin(angle/180*math.pi) * 20 + center[1]
ImageDraw.floodfill(im, (x, y), segment_color(i, n))
im.save(str(n) + '_pie.png')
Binary masks for each pie segment are created, and all white pixels only within that binary mask are replaced with a defined edge color.
Using NumPy still seems favorable, but I was curious to do that in Pillow only.

How to resize text for cv2.putText according to the image size in OpenCV, Python?

fontScale = 1
fontThickness = 1
# make sure font thickness is an integer, if not, the OpenCV functions that use this may crash
fontThickness = int(fontThickness)
upperLeftTextOriginX = int(imageWidth * 0.05)
upperLeftTextOriginY = int(imageHeight * 0.05)
textSize, baseline = cv2.getTextSize(resultText, fontFace, fontScale, fontThickness)
textSizeWidth, textSizeHeight = textSize
# calculate the lower left origin of the text area based on the text area center, width, and height
lowerLeftTextOriginX = upperLeftTextOriginX
lowerLeftTextOriginY = upperLeftTextOriginY + textSizeHeight
# write the text on the image
cv2.putText(openCVImage, resultText, (lowerLeftTextOriginX, lowerLeftTextOriginY), fontFace, fontScale, Color,
fontThickness)
It seems fontScale does not scale text according to the image width and height because the text is almost in the same size for different sized images. So how can I resize the text according to the image size so that all the text could fit in the image?
Here is the solution that will fit the text inside your rectangle. If your rectangles are of variable width, then you can get the font scale by looping through the potential scales and measuring how much width (in pixels) would your text take. Once you drop below your rectangle width you can retrieve the scale and use it to actually putText:
def get_optimal_font_scale(text, width):
for scale in reversed(range(0, 60, 1)):
textSize = cv.getTextSize(text, fontFace=cv.FONT_HERSHEY_DUPLEX, fontScale=scale/10, thickness=1)
new_width = textSize[0][0]
if (new_width <= width):
print(new_width)
return scale/10
return 1
for this worked!
scale = 1 # this value can be from 0 to 1 (0,1] to change the size of the text relative to the image
fontScale = min(imageWidth,imageHeight)/(25/scale)
just keep in mind that the font type can affect the 25 constant
Approach
One way to approach this is to scale the font size proportionally to the size of the image. In my experience, more natural results are obtained when applying this not only to fontScale, but also to thickness. For example:
import math
import cv2
FONT_SCALE = 2e-3 # Adjust for larger font size in all images
THICKNESS_SCALE = 1e-3 # Adjust for larger thickness in all images
img = cv2.imread("...")
height, width, _ = img.shape
font_scale = min(width, height) * FONT_SCALE
thickness = math.ceil(min(width, height) * THICKNESS_SCALE)
Example
Let's take this free-to-use stock photo as an example. We create two versions of the base image by rescaling to a width of 2000px and 600px (keeping the aspect ratio constant). With the approach above, text looks appropriately sized to the image size in both cases (here shown in an illustrative use case where we label bounding boxes):
2000px
600px
Full code to reproduce (but note: input images have to be preprocessed):
import math
import cv2
FONT_SCALE = 2e-3 # Adjust for larger font size in all images
THICKNESS_SCALE = 1e-3 # Adjust for larger thickness in all images
TEXT_Y_OFFSET_SCALE = 1e-2 # Adjust for larger Y-offset of text and bounding box
img_width_to_bboxes = {
2000: [
{"xywh": [120, 400, 1200, 510], "label": "car"},
{"xywh": [1080, 420, 790, 340], "label": "car"},
],
600: [
{"xywh": [35, 120, 360, 155], "label": "car"},
{"xywh": [325, 130, 235, 95], "label": "car"},
],
}
def add_bbox_and_text() -> None:
for img_width, bboxes in img_width_to_bboxes.items():
# Base image from https://www.pexels.com/photo/black-suv-beside-grey-auv-crossing-the-pedestrian-line-during-daytime-125514/
# Two rescaled versions of the base image created with width of 600px and 2000px
img = cv2.imread(f"pexels-kaique-rocha-125514_{img_width}.jpg")
height, width, _ = img.shape
for bbox in bboxes:
x, y, w, h = bbox["xywh"]
cv2.rectangle(img, (x, y), (x + w, y + h), (0, 255, 0), 2)
cv2.putText(
img,
bbox["label"],
(x, y - int(height * TEXT_Y_OFFSET_SCALE)),
fontFace=cv2.FONT_HERSHEY_TRIPLEX,
fontScale=min(width, height) * FONT_SCALE,
thickness=math.ceil(min(width, height) * THICKNESS_SCALE),
color=(0, 255, 0),
)
cv2.imwrite(f"pexels-kaique-rocha-125514_{img_width}_with_text.jpg", img)
if __name__ == "__main__":
add_bbox_and_text()
If you take fontScale = 1 for images with size approximately 1000 x 1000, then this code should scale your font correctly.
fontScale = (imageWidth * imageHeight) / (1000 * 1000) # Would work best for almost square images
If you are still having any problem, do comment.
I implemented a function to find best fitted centered location for text.
Take a look if these codes help you.
def findFontLocate(s_txt, font_face, font_thick, cv_bgd):
best_scale = 1.0
bgd_w = cv_bgd.shape[1]
bgd_h = cv_bgd.shape[0]
txt_rect_w = 0
txt_rect_h = 0
baseline = 0
for scale in np.arange(1.0, 6.0, 0.2):
(ret_w, ret_h), tmp_bsl = cv2.getTextSize(
s_txt, font_face, scale, font_thick)
tmp_w = ret_w + 2 * font_thick
tmp_h = ret_h + 2 * font_thick + tmp_bsl
if tmp_w >= bgd_w or tmp_h >= bgd_h:
break
else:
baseline = tmp_bsl
txt_rect_w = tmp_w
txt_rect_h = tmp_h
best_scale = scale
lt_x, lt_y = round(bgd_w/2-txt_rect_w/2), round(bgd_h/2-txt_rect_h/2)
rb_x, rb_y = round(bgd_w/2+txt_rect_w/2), round(bgd_h/2+txt_rect_h/2)-baseline
return (lt_x, lt_y, rb_x, rb_y), best_scale, baseline
Note that, the function accept four arguments: s_txt(string to render), font_face, font_thick and cv_bgd(background image in ndarray format)
When you putText(), write codes as following:
cv2.putText(
cv_bgd, s_txt, (lt_x, rb_y), font_face,
best_scale, (0,0,0), font_thick, cv2.LINE_AA)
You can use get_optimal_font_scale function as bellow, to adjust font size according to the image size:
def get_optimal_font_scale(text, width):
for scale in reversed(range(0, 60, 1)):
textSize = cv2.getTextSize(text, fontFace=cv2.FONT_HERSHEY_DUPLEX, fontScale=scale/10, thickness=1)
new_width = textSize[0][0]
if (new_width <= width):
return scale/10
return 1
fontScale = 3*(img.shape[1]//6)
font_size = get_optimal_font_scale(text, fontScale)
cv2.putText(img, text, org, font, font_size, color, thickness, cv2.LINE_AA)
You can change fontScale for your image.
It`s work for me.
double calc_scale_rectbox(const char *txt, int box_width, int box_height,
cv::Size &textSize, int &baseline)
{
if (!txt) return 1.0;
double scale = 2.0;
double w_aprx = 0;
double h_aprx = 0;
do
{
textSize = cv::getTextSize(txt, FONT_HERSHEY_DUPLEX, scale, 2,
&baseline);
w_aprx = textSize.width * 100 / box_width;
h_aprx = textSize.height * 100 / box_height;
scale -= 0.1;
} while (w_aprx > 50 || h_aprx > 50);
return scale;
}
......
cv::Size textSize;
int baseline = 0;
double scale = calc_scale_rectbox(win_caption.c_str(), width,
height, textSize, baseline);
cv::putText(img, win_caption, Point(width / 2 - textSize.width / 2,
(height + textSize.height - baseline + 2) / 2),
FONT_HERSHEY_DUPLEX, scale, CV_RGB(255, 255, 255), 2);
A simple utility function:
def optimal_font_dims(img, font_scale = 2e-3, thickness_scale = 5e-3):
h, w, _ = img.shape
font_scale = min(w, h) * font_scale
thickness = math.ceil(min(w, h) * thickness_scale)
return font_scale, thickness
Usage:
font_scale, thickness = optimal_font_dims(image)
cv2.putText(image, "LABEL", (x, y), cv2.FONT_HERSHEY_SIMPLEX, font_scale, (255,0,0), thickness)

Image processing - eliminate arc-like smears

I am dealing with this kind of image
(upper is post-processed)
(lower is raw)
So, first I converted the grayscale image into pure black and white binary image. I am interested in detecting the white blobs, and want to get rid of the arc-like smears in the corners. How can I do that?
I general, I know that my targets are almost circular in shape, not too big, but I want to encode something that automatically gets rid of everything else, like the lighter arcs in the upper left and right corners.
How would I do this in python, ideally skimage?
You can just detect circle of the right size with skimage's methods hough_circle and hough_circle_peaks and cut it out.
Here I adapted my previous answer to your other question to do this:
# skimage version 0.14.0
import math
import numpy as np
import matplotlib.pyplot as plt
from skimage import color
from skimage.io import imread
from skimage.transform import hough_circle, hough_circle_peaks
from skimage.feature import canny
from skimage.draw import circle
from skimage.util import img_as_ubyte
INPUT_IMAGE = 'dish1.png' # input image name
BEST_COUNT = 1 # how many circles to detect (one dish)
MIN_RADIUS = 100 # min radius of the Petri dish
MAX_RADIUS = 122 # max radius of the Petri dish (in pixels)
LARGER_THRESH = 1.2 # circle is considered significantly larger than another one if its radius is at least so much bigger
OVERLAP_THRESH = 0.1 # circles are considered overlapping if this part of the smaller circle is overlapping
def circle_overlap_percent(centers_distance, radius1, radius2):
'''
Calculating the percentage area overlap between circles
See Gist for comments:
https://gist.github.com/amakukha/5019bfd4694304d85c617df0ca123854
'''
R, r = max(radius1, radius2), min(radius1, radius2)
if centers_distance >= R + r:
return 0.0
elif R >= centers_distance + r:
return 1.0
R2, r2 = R**2, r**2
x1 = (centers_distance**2 - R2 + r2 )/(2*centers_distance)
x2 = abs(centers_distance - x1)
y = math.sqrt(R2 - x1**2)
a1 = R2 * math.atan2(y, x1) - x1*y
if x1 <= centers_distance:
a2 = r2 * math.atan2(y, x2) - x2*y
else:
a2 = math.pi * r2 - a2
overlap_area = a1 + a2
return overlap_area / (math.pi * r2)
def circle_overlap(c1, c2):
d = math.sqrt((c1[0]-c2[0])**2 + (c1[1]-c2[1])**2)
return circle_overlap_percent(d, c1[2], c2[2])
def inner_circle(cs, c, thresh):
'''Is circle `c` is "inside" one of the `cs` circles?'''
for dc in cs:
# if new circle is larger than existing -> it's not inside
if c[2] > dc[2]*LARGER_THRESH: continue
# if new circle is smaller than existing one...
if circle_overlap(dc, c)>thresh:
# ...and there is a significant overlap -> it's inner circle
return True
return False
# Load picture and detect edges
image = imread(INPUT_IMAGE, 1)
image = img_as_ubyte(image)
edges = canny(image, sigma=3, low_threshold=10, high_threshold=50)
# Detect circles of specific radii
hough_radii = np.arange(MIN_RADIUS, MAX_RADIUS, 2)
hough_res = hough_circle(edges, hough_radii)
# Select the most prominent circles (in order from best to worst)
accums, cx, cy, radii = hough_circle_peaks(hough_res, hough_radii)
# Determine BEST_COUNT circles to be drawn
drawn_circles = []
for crcl in zip(cy, cx, radii):
# Do not draw circles if they are mostly inside better fitting ones
if not inner_circle(drawn_circles, crcl, OVERLAP_THRESH):
# A good circle found: exclude smaller circles it covers
i = 0
while i<len(drawn_circles):
if circle_overlap(crcl, drawn_circles[i]) > OVERLAP_THRESH:
t = drawn_circles.pop(i)
else:
i += 1
# Remember the new circle
drawn_circles.append(crcl)
# Stop after have found more circles than needed
if len(drawn_circles)>BEST_COUNT:
break
drawn_circles = drawn_circles[:BEST_COUNT]
# Draw circle and cut it out
colors = [(250, 0, 0), (0, 250, 0), (0, 0, 250)]
fig, ax = plt.subplots(ncols=1, nrows=3, figsize=(10, 4))
color_image = color.gray2rgb(image)
black_image = np.zeros_like(image)
for center_y, center_x, radius in drawn_circles[:1]:
circy, circx = circle(center_y, center_x, radius, image.shape)
color = colors.pop(0)
color_image[circy, circx] = color
black_image[circy, circx] = image[circy, circx]
colors.append(color)
# Output
ax[0].imshow(image, cmap=plt.cm.gray) # original image
ax[1].imshow(color_image) # detected circle
ax[2].imshow(black_image, cmap=plt.cm.gray) # cutout
plt.show()
Output:
Again, as in my previous answer, most of the code here is doing "hierarchy" computation to find the biggest best fitting circle.

Finding all the bounding rectangles of all non-transparent regions in PIL

I have a transparent-background image with some non-transparent text.
And I want to find all the bounding boxes of each individual word in the text.
Here is the code about creating a transparent image and draw some text ("Hello World", for example) , after that, do affine transform and thumbnail it.
from PIL import Image, ImageFont, ImageDraw, ImageOps
import numpy as np
fontcolor = (255,255,255)
fontsize = 180
# padding rate for setting the image size of font
fimg_padding = 1.1
# check code bbox padding rate
bbox_gap = fontsize * 0.05
# Rrotation +- N degree
# Choice a font type for output---
font = ImageFont.truetype('Fonts/Bebas.TTF', fontsize)
# the text is "Hello World"
code = "Hello world"
# Get the related info of font---
code_w, code_h = font.getsize(code)
# Setting the image size of font---
img_size = int((code_w) * fimg_padding)
# Create a RGBA image with transparent background
img = Image.new("RGBA", (img_size,img_size),(255,255,255,0))
d = ImageDraw.Draw(img)
# draw white text
code_x = (img_size-code_w)/2
code_y = (img_size-code_h)/2
d.text( ( code_x, code_y ), code, fontcolor, font=font)
# img.save('initial.png')
# Transform the image---
img = img_transform(img)
# crop image to the size equal to the bounding box of whole text
alpha = img.split()[-1]
img = img.crop(alpha.getbbox())
# resize the image
img.thumbnail((512,512), Image.ANTIALIAS)
# img.save('myimage.png')
# what I want is to find all the bounding box of each individual word
boxes=find_all_bbx(img)
Here is the code about affine transform (provided here for those who want to do some experiment)
def find_coeffs(pa, pb):
matrix = []
for p1, p2 in zip(pa, pb):
matrix.append([p1[0], p1[1], 1, 0, 0, 0, -p2[0]*p1[0], -p2[0]*p1[1]])
matrix.append([0, 0, 0, p1[0], p1[1], 1, -p2[1]*p1[0], -p2[1]*p1[1]])
A = np.matrix(matrix, dtype=np.float)
B = np.array(pb).reshape(8)
res = np.dot(np.linalg.inv(A.T * A) * A.T, B)
return np.array(res).reshape(8)
def rand_degree(st,en,gap):
return (np.fix(np.random.random()* (en-st) * gap )+st)
def img_transform(img):
width, height = img.size
print img.size
m = -0.5
xshift = abs(m) * width
new_width = width + int(round(xshift))
img = img.transform((new_width, height), Image.AFFINE,
(1, m, -xshift if m > 0 else 0, 0, 1, 0), Image.BICUBIC)
range_n = width*0.2
gap_n = 1
x1 = rand_degree(0,range_n,gap_n)
y1 = rand_degree(0,range_n,gap_n)
x2 = rand_degree(width-range_n,width,gap_n)
y2 = rand_degree(0,range_n,gap_n)
x3 = rand_degree(width-range_n,width,gap_n)
y3 = rand_degree(height-range_n,height,gap_n)
x4 = rand_degree(0,range_n,gap_n)
y4 = rand_degree(height-range_n,height,gap_n)
coeffs = find_coeffs(
[(x1, y1), (x2, y2), (x3, y3), (x4, y4)],
[(0, 0), (width, 0), (new_width, height), (xshift, height)])
img = img.transform((width, height), Image.PERSPECTIVE, coeffs, Image.BICUBIC)
return img
How to implement find_all_bbx to find the bounding box of each individual word?
For example, one of the box can be found in 'H' ( you can download the image to see the partial result).
For what you want to do you need to label the individual words and then compute the bounding box of each object with the same label.
The most straigh forward approach here is just taking the min and max positions of the pixels that make up that word.
The labeling is a little bit more difficult. For example you could use a morphological operation to combine the letters of the words (morphological opening, see PIL documentation) and then use ImageDraw.floodfill. Or you could try to anticipate the positions of the words from the position where you first draw the text
code_x and code_y
and the chosen font and size of the letters and the spacing (this will trickier I think).

What's the most efficient way to select a non-rectangular ROI of an Image in OpenCV?

I want to create a binary image mask, containing only ones and zeros in python. The Region of Interest(white) is non-rectangular, defined by 4 corner points and looks for example as follows:
In my approach, I first calculate the line equation of the upper and lower ROI border and then I check for each mask element, if it's smaller or bigger than the boarders. The code is working, but far to slow. A 2000x1000 mask takes up to 4s of processing my machine.
from matplotlib import pyplot as plt
import cv2
import numpy as np
import time
def line_eq(line):
"""input:
2 points of a line
returns:
slope and intersection of the line
"""
(x1, y1), (x2, y2) = line
slope = (y2 - y1) / float((x2 - x1))
intersect = int(slope * (-x1) + y1)
return slope,intersect
def maskByROI(mask,ROI):
"""
input:
ROI: with 4 corner points e.g. ((x0,y0),(x1,y1),(x2,y2),(x3,y3))
mask:
output:
mask with roi set to 1, rest to 0
"""
line1 = line_eq((ROI[0],ROI[1]))
line2 = line_eq((ROI[2],ROI[3]))
slope1 = line1[0]
intersect1 = line1[1]
#upper line
if slope1>0:
for (x,y), value in np.ndenumerate(mask):
if y > slope1*x +intersect1:
mask[x,y] = 0
else:
for (x,y), value in np.ndenumerate(mask):
if y < slope1*x +intersect1:
mask[x,y] = 0
#lower line
slope2 = line2[0]
intersect2 = line2[1]
if slope2<0:
for (x,y), value in np.ndenumerate(mask):
if y > slope2*x +intersect2:
mask[x,y] = 0
else:
for (x,y), value in np.ndenumerate(mask):
if y < slope2*x +intersect2:
mask[x,y] = 0
return mask
mask = np.ones((2000,1000))
myROI = ((750,0),(900,1000),(1000,1000),(1500,0))
t1 = time.time()
mask = maskByROI(mask,myROI)
t2 = time.time()
print "execution time: ", t2-t1
plt.imshow(mask,cmap='Greys_r')
plt.show()
What is a more efficient way to create a mask like this?
Are there any similar solutions for non-rectangular shapes provided by
numpy, OpenCV or a similar Library?
Draw the mask with fillPoly:
mask = np.ones((1000, 2000)) # (height, width)
myROI = [(750, 0), (900, 1000), (1000, 1000), (1500, 0)] # (x, y)
cv2.fillPoly(mask, [np.array(myROI)], 0)
This should take ~1ms.

Categories

Resources