I'm getting module not found error while executing my program.
import numpy as np
import cv2
from preprocessors import x_cord_contour, makeSquare, resize_to_pixel
I am quite sure that you are following along the the Deep Learning course by Rajeev D. And I further guess that you haven't downloaded the VM image as suggested in the video.
The functions x_cord_contour, makeSquare and resize_to_pixel are defined in a custom module. If you are following the course without the VM just copy and paste the functions below into your code and remove the import statement.
import numpy as np
import cv2
def x_cord_contour(contour):
# This function take a contour from findContours
# it then outputs the x centroid coordinates
M = cv2.moments(contour)
return (int(M['m10']/M['m00']))
def makeSquare(not_square):
# This function takes an image and makes the dimenions square
# It adds black pixels as the padding where needed
BLACK = [0,0,0]
img_dim = not_square.shape
height = img_dim[0]
width = img_dim[1]
#print("Height = ", height, "Width = ", width)
if (height == width):
square = not_square
return square
else:
doublesize = cv2.resize(not_square,(2*width, 2*height), interpolation = cv2.INTER_CUBIC)
height = height * 2
width = width * 2
#print("New Height = ", height, "New Width = ", width)
if (height > width):
pad = int((height - width)/2)
#print("Padding = ", pad)
doublesize_square = cv2.copyMakeBorder(doublesize,0,0,pad,pad,cv2.BORDER_CONSTANT,value=BLACK)
else:
pad = (width - height)/2
#print("Padding = ", pad)
doublesize_square = cv2.copyMakeBorder(doublesize,pad,pad,0,0,\
cv2.BORDER_CONSTANT,value=BLACK)
doublesize_square_dim = doublesize_square.shape
#print("Sq Height = ", doublesize_square_dim[0], "Sq Width = ", doublesize_square_dim[1])
return doublesize_square
def resize_to_pixel(dimensions, image):
# This function then re-sizes an image to the specificied dimenions
buffer_pix = 4
dimensions = dimensions - buffer_pix
squared = image
r = float(dimensions) / squared.shape[1]
dim = (dimensions, int(squared.shape[0] * r))
resized = cv2.resize(image, dim, interpolation = cv2.INTER_AREA)
img_dim2 = resized.shape
height_r = img_dim2[0]
width_r = img_dim2[1]
BLACK = [0,0,0]
if (height_r > width_r):
resized = cv2.copyMakeBorder(resized,0,0,0,1,cv2.BORDER_CONSTANT,value=BLACK)
if (height_r < width_r):
resized = cv2.copyMakeBorder(resized,1,0,0,0,cv2.BORDER_CONSTANT,value=BLACK)
p = 2
ReSizedImg = cv2.copyMakeBorder(resized,p,p,p,p,cv2.BORDER_CONSTANT,value=BLACK)
img_dim = ReSizedImg.shape
height = img_dim[0]
width = img_dim[1]
#print("Padded Height = ", height, "Width = ", width)
return ReSizedImg
Related
I have the following picture as an example:
529 x 550 px (100 %)
As a target I would like to have the image zoomed to about
150 %, but it should still be
529 x 550 px:
I was able to write the code using PIL, but I want to have it with Cv2. Can someone help me please?
from PIL import Image
import cv2 as cv
def zoom_at(img, x, y, zoom):
w, h = img.size
zoom2 = zoom * 2
img = img.crop((x - w / zoom2, y - h / zoom2,
x + w / zoom2, y + h / zoom2))
return img.resize((w, h), Image.LANCZOS)
img = Image.open("image.png")
img = zoom_at(img, 264.5, 275, 1.5)
img = img.save('image_zoomed.png')
#Ofer Sadan
import cv2 as cv
def zoom(img, zoom_factor=1.5):
return cv.resize(img, None, fx=zoom_factor, fy=zoom_factor)
img = cv.imread('original.png')
# Original: 529 × 550
height, width = img.shape[:2]
zoomed = zoom(img, 1.5)
# Zoomed: 794 × 825
cropped = zoomed[0:550, 0:529] # Wrong area
# Now I want to crop the middle of the new image as variable.
cv.imwrite('zoomed.png', zoomed)
cv.imwrite('cropped.png', cropped)
There you go:
cv:
import cv2 as cv
def zoom_at(img, zoom=1, angle=0, coord=None):
cy, cx = [ i/2 for i in img.shape[:-1] ] if coord is None else coord[::-1]
rot_mat = cv2.getRotationMatrix2D((cx,cy), angle, zoom)
result = cv2.warpAffine(img, rot_mat, img.shape[1::-1], flags=cv2.INTER_LINEAR)
return result
Laymans manual:
import cv2 as cv
def zoom_at(img, zoom, coord=None):
"""
Simple image zooming without boundary checking.
Centered at "coord", if given, else the image center.
img: numpy.ndarray of shape (h,w,:)
zoom: float
coord: (float, float)
"""
# Translate to zoomed coordinates
h, w, _ = [ zoom * i for i in img.shape ]
if coord is None: cx, cy = w/2, h/2
else: cx, cy = [ zoom*c for c in coord ]
img = cv.resize( img, (0, 0), fx=zoom, fy=zoom)
img = img[ int(round(cy - h/zoom * .5)) : int(round(cy + h/zoom * .5)),
int(round(cx - w/zoom * .5)) : int(round(cx + w/zoom * .5)),
: ]
return img
img = cv.imread('x3Lkg.png')
cv.imwrite('x3Lkg_zoomed.png', zoom_at(img, 1.5, coord=(264.5, 275)) )
I have a little snippet I used a while ago that I can't currently test so let me know if it actually works or not
import cv2 as cv
def zoom(img, zoom_factor=2):
return cv.resize(img, None, fx=zoom_factor, fy=zoom_factor)
And you can crop before the zoom or after it as you wish:
img = cv.imread(img_path)
cropped = img[200:300, 150:250]
zoomed = zoom(img, 3)
zoomed_and_cropped = zoom(cropped, 3)
For anyone who does not want to the math manually this works for me.
import cv2
def zoom_center(img, zoom_factor=1.5):
y_size = img.shape[0]
x_size = img.shape[1]
# define new boundaries
x1 = int(0.5*x_size*(1-1/zoom_factor))
x2 = int(x_size-0.5*x_size*(1-1/zoom_factor))
y1 = int(0.5*y_size*(1-1/zoom_factor))
y2 = int(y_size-0.5*y_size*(1-1/zoom_factor))
# first crop image then scale
img_cropped = img[y1:y2,x1:x2]
return cv2.resize(img_cropped, None, fx=zoom_factor, fy=zoom_factor)
# read original
img = cv2.imread('original.png')
# call our function
img_zoomed_and_cropped = zoom_center(img)
# write zoomed and cropped version
cv.imwrite('zoomed_and_cropped.png', img_zoomed_and_cropped)
Notice that I first cropped and then rescaled. It is more efficient and you will notice it when dealing with a live video feed.
For putting a specific point in the input image at a specific point in the output image, with a precise scale factor, you would want to use cv.warpAffine.
This function requires you to build a transformation matrix. That is easy.
def translate(tx=0, ty=0):
T = np.eye(3)
T[0:2,2] = [tx, ty]
return T
def scale(s=1, sx=1, sy=1):
T = np.diag([s*sx, s*sy, 1])
return T
def rotate(degrees):
T = np.eye(3)
# just involves some sin() and cos()
T[0:2] = cv.getRotationMatrix2D(center=(0,0), angle=-degrees, scale=1.0)
return T
im = cv.imread("x3Lkg.png")
(ih,iw) = im.shape[:2] # input height, input width
# parameters
scale_factor = 10
angle_degrees = 15
(ow, oh) = (529, 550) # output size
(icx, icy) = (459, 352) # zoom onto that pixel in input
(ocx, ocy) = ((ow-1)/2, (oh-1)/2) # put there in output (it's the exact center)
# the transformation, read from right to left
H = translate(+ocx, +ocy) # rotate(degrees=angle_degrees) # scale(scale_factor) # translate(-icx, -icy)
# assume that H is affine, not a full homography
assert np.allclose(H[2], [0,0,1])
M = H[0:2]
# produce the picture
# use INTER_LINEAR, INTER_CUBIC, INTER_LANCZOS4 for smooth interpolation
# use INTER_AREA for scale factors much below 1
out = cv.warpAffine(im, dsize=(ow,oh), M=M, flags=cv.INTER_NEAREST)
# imshow(out)
I am using putalpha function for my project. But I have a problem.
When I don't use the putalpha:
enter image description here
When I use the putalpha:
enter image description here
How can I solve this problem ?
Code:
def add_logo(pos, size=5, rotation=0, alpha=255):
mainim = Image.open("resim.png").convert("RGB")
logoim = Image.open("pawpink.png").convert("RGBA")
logoim = logoim.rotate(rotation, expand=1)
logoim.putalpha(alpha)
#Calculate size
width, height = mainim.size
width = width / size
oran = (logoim.size[0] / logoim.size[1])
height = (width * (oran ** -1))
logoim = logoim.resize((int(width), int(height)))
mainim.paste(logoim, box=pozisyon_getir_resim(pos), mask=logoim)
return mainim
Images:
cat.png
logo.png
I found this excellent article Watermark with PIL (Python recipe) and was able to get your program to work.
Here is my version (complete, tested):
import PIL.Image
import PIL.ImageEnhance
def pozisyon_getir_resim(pos):
return (pos, pos)
def reduce_opacity(im, opacity):
"""Returns an image with reduced opacity."""
assert opacity >= 0 and opacity <= 1
if im.mode != 'RGBA':
im = im.convert('RGBA')
else:
im = im.copy()
alpha = im.split()[3]
alpha = PIL.ImageEnhance.Brightness(alpha).enhance(opacity)
im.putalpha(alpha)
return im
def add_logo(pos, size=5, rotation=0, alpha=255):
mainim = PIL.Image.open("cat.png").convert("RGB")
logoim = PIL.Image.open("logo.png").convert("RGBA")
logoim = logoim.rotate(rotation, expand=1)
logoim = reduce_opacity(logoim, alpha/255.0)
# Calculate size
width, height = mainim.size
width = width / size
oran = (logoim.size[0] / logoim.size[1])
height = (width * (oran ** -1))
logoim = logoim.resize((int(width), int(height)))
if mainim.mode != 'RGBA':
mainim.convert('RGBA')
layer = PIL.Image.new('RGBA', mainim.size, (0, 0, 0, 0))
layer.paste(logoim, pozisyon_getir_resim(pos))
return PIL.Image.composite(layer, mainim, layer)
mainim = add_logo(32, 5, 0, 127)
mainim.save('cat_with_logo.png', 'PNG')
Result:
I'm trying to perform Occlusion Analysis to understand what patches in my input image correlate maximally to the output of the model (last layer is the output of softmax). However, I keep getting the same error which says that the types are not matching I guess. Can someone explain me what I'm doing wrong and how to prevent the issue.
Traceback (most recent call last):
File "occlusion.py", line 70, in <module>
occlusion(attribute_extractor, jpegfile, mgn_output_for_original_img)
File "occlusion.py", line 29, in occlusion
output_height = int(np.ceil((height - int(occ_size)) / int(occ_stride)))
TypeError: unsupported operand type(s) for -: 'tuple' and 'int'
# model -> MGN - deep learning model
# image -> b_box cropped image of the person
# label -> MGN output label for the image
def occlusion(model, image, label, occ_size = 50, occ_stride = 50, occ_pixel = 0.5):
#get the width and height of the img
width, height = image.size, image.size
print(width)
print(height)
#set the output img width and height
output_height = int(np.ceil((height - int(occ_size)) / int(occ_stride)))
output_width = int(np.ceil((width - int(occ_size)) / int(occ_stride)))
#create a white image with the sizes defined above
heatmap = torch.zeros((output_height, output_width))
#iterate all the pixels in each column
for h in range(0, height):
for w in range(0, width):
h_start = h*occ_stride
w_start = w*occ_stride
h_end = min(height, h_start + occ_size)
w_end = min(width, w_start + occ_size)
if (w_end) >= width or (h_end) >= height:
continue
input_image = image.clone().detach()
#replacing all the pixel information in the image with occ_pixel(grey) in the specified location
input_image[:, :, w_start:w_end, h_start:h_end] = occ_pixel
#run inference on modified image
output = model(input_image)
output = nn.functional.softmax(output, dim=1)
prob = output.tolist()[0][label]
#setting the heatmap location to probability value
heatmap[h, w] = prob
return heatmap
attribute_extractor = MgnWrapper("./model.pt")
jpegfile = Image.open("tmpgal/ 1.jpg")
width, height = jpegfile.size
print(type(width))
print(type(height))
mgn_output_for_original_img = attribute_extractor(jpegfile)
occlusion(attribute_extractor, jpegfile, mgn_output_for_original_img)
I think you should change the assignment expression width, height = image.size, image.size to width, height = image.size since the original ones will make width's and height's values to be a tuple as image.size while the expression width, height = image.size will take the 2 elements inside the image.size tuple and assign each of the value to width and height
def occlusion(model, image, label, occ_size = 50, occ_stride = 50, occ_pixel = 0.5):
#get the width and height of the img
width, height = image.size
print(width)
print(height)
#set the output img width and height
output_height = int(np.ceil((height - int(occ_size)) / int(occ_stride)))
output_width = int(np.ceil((width - int(occ_size)) / int(occ_stride)))
#create a white image with the sizes defined above
heatmap = torch.zeros((output_height, output_width))
#iterate all the pixels in each column
for h in range(0, height):
for w in range(0, width):
h_start = h*occ_stride
w_start = w*occ_stride
h_end = min(height, h_start + occ_size)
w_end = min(width, w_start + occ_size)
if (w_end) >= width or (h_end) >= height:
continue
input_image = image.clone().detach()
#replacing all the pixel information in the image with occ_pixel(grey) in the specified location
input_image[:, :, w_start:w_end, h_start:h_end] = occ_pixel
#run inference on modified image
output = model(input_image)
output = nn.functional.softmax(output, dim=1)
prob = output.tolist()[0][label]
#setting the heatmap location to probability value
heatmap[h, w] = prob
return heatmap
Quite simply, I'm learning how to edit photos with openCV/numpy.
My question is why is the second function using the image created by the first?
I run two functions - one to color columns in black and white, and a second that colors rows in black and white.
First function runs fine, but the second one uses the image created in the first, so I get rows and columns in black and white.
import cv2
import numpy as np
from matplotlib import pyplot as plt
img_source = "brad.jpg"
def read_image(image_source):
#global img, width, height
img = cv2.imread(image_source, 1)
height, width = img.shape[:2]
print("Image size: x ", width, " y ", height)
return img, width, height
def black_and_white_cols(image_source):
width_adjustment = 100
total_cols = round(width / width_adjustment,0)
edited_image = image_source
bw_image = cv2.imread(img_source, 0)
# The next line is to convert to the right interface
# https://stackoverflow.com/questions/11067962/is-it-possible-to-have-black-and-white-and-color-image-on-same-window-by-using-o
bw_image_b = cv2.cvtColor(bw_image,cv2.COLOR_GRAY2BGR)
for x in range(1, int(total_cols), 2):
top_row = 0
bottom_row = height
left_col = x*width_adjustment
right_col = (x * width_adjustment) + width_adjustment
bw_part = bw_image_b[top_row:bottom_row, left_col:right_col]
edited_image[top_row:bottom_row, left_col:right_col] = bw_part
show_image(edited_image)
def black_and_white_cols(image_source):
width_adjustment = 100
total_cols = round(width / width_adjustment,0)
edited_image = image_source
bw_image = cv2.imread(img_source, 0)
# The next line is to convert to the right interface
# https://stackoverflow.com/questions/11067962/is-it-possible-to-have-black-and-white-and-color-image-on-same-window-by-using-o
bw_image_b = cv2.cvtColor(bw_image,cv2.COLOR_GRAY2BGR)
for x in range(1, int(total_cols), 2):
top_row = 0
bottom_row = height
left_col = x*width_adjustment
right_col = (x * width_adjustment) + width_adjustment
bw_part = bw_image_b[top_row:bottom_row, left_col:right_col]
edited_image[top_row:bottom_row, left_col:right_col] = bw_part
show_image(edited_image)
return edited_image
def black_and_white_rows(image_source):
width_adjustment = 100
edited_image = image_source
total_rows = round(height / width_adjustment,0)
bw_image = cv2.imread(img_source, 0)
bw_image_b = cv2.cvtColor(bw_image,cv2.COLOR_GRAY2BGR)
for x in range(1, int(total_rows), 2):
top_row = x * width_adjustment
bottom_row = (x * width_adjustment) + width_adjustment
left_col = 0
right_col = width
bw_part = bw_image_b[top_row:bottom_row, left_col:right_col]
edited_image[top_row:bottom_row, left_col:right_col] = bw_part
show_image(edited_image)
def show_image(image_source):
cv2.imshow('This is your image', image_source)
cv2.waitKey(0)
cv2.destroyAllWindows()
if __name__ == "__main__":
img, width, height = read_image(img_source)
new_image = black_and_white_cols(img)
new_image_2 = black_and_white_rows(img)
This is the image after new_image = black_and_white_cols(img) runs.
and here's after new_image_2 = ... runs.
Why does the second image keep the black and white columns? I'm calling it using the very original img_source image, via read_image. Why is it using the column edited image?
As in the comments, when you do edited_image = image_source, you only copy the pointer to the image array, not cloning the array itself. You can do
edited_image = image_source.copy()
which copies image_source to edited_image.
as the title says, I'm trying to define a array of matrices (witch represents images) in Python. But, when I try to read a matrix, I got this message: "ImageData instance has no attribute 'getitem'""
I'm starting to study Python these days, so I know that this must be simple for a lot of people, but I don't know what's wrong. This is my code:
ImageData.py
import random
import math
class ImageData:
def __init__ (self, width, height):
self.width = width
self.height = height
self.data = []
for i in range(width):
self.data.append([0] * height)
def set_data (self, x, y, value):
self.data[x][y] = value
def generate_voronoi_diagram (self, seeds):
nx = []
ny = []
nr = []
ng = []
nb = []
for i in range(seeds):
# Generate a cell position
pos_x = random.randrange(self.width)
pos_y = random.randrange(self.height)
nx.append(pos_x)
ny.append(pos_y)
# Save the rgb data
nr.append(random.randrange(256))
ng.append(random.randrange(256))
nb.append(random.randrange(256))
for x in range(self.width):
for y in range(self.height):
# Return the Euclidean norm
d_min = math.hypot(self.width-1, self.height-1)
j = -1
for i in range(seeds):
# The distance from a cell to x, y point being considered
d = math.hypot(nx[i]-x, ny[i]-y)
if d < d_min:
d_min = d
j = i
self.data[x][y] = [nr[j], ng[j], nb[j]]
UncertaintyVisualisaton.py
from PIL import Image
import numpy
import ImageData
def generate_uncertainty_visualisation (images, width, height):
image = Image.new("RGB", (width, height))
putpixel = image.putpixel
r = g = b = []
for i in range(width):
r.append([0] * height)
g.append([0] * height)
b.append([0] * height)
for i in range(len(images)):
image = images[i]
for x in range(width):
for y in range(height):
#Error here
rgb = image[x][y]
r[x][y] += rgb[0]
g[x][y] += rgb[1]
b[x][y] += rgb[2]
for x in range(width):
for y in range(height):
r[x][y] /= len(images)
g[x][y] /= len(images)
b[x][y] /= len(images)
putpixel((x, y), (r[x][y], g[x][y], b[x][y]))
image.save("output.png", "PNG")
if __name__ == "__main__":
width = 10;
height = 10;
entries = []
seeds = numpy.random.poisson(20)
images = 1
for n in range(images):
entry = ImageData.ImageData(width, height)
entry.generate_voronoi_diagram(seeds)
entries.append(entry)
generate_uncertainty_visualisation(entries, width, height)
Any help would be very appreciated.
Thanks.
In UncertaintyVisualisaton.py first you set:
image = Image.new("RGB", (width, height))
And then looping over images you reassign:
image = images[i]
This is probably not want you want.
Also your error:
#Error here
rgb = image[x][y]
is happening because ImageData is not a list. The data attibute in it is:
#no more Error here
rgb = image.data[x][y]