I am trying to cv2.imshow my images for perspective transformation.
But, my image is very big, so I cannot see the whole picture:
How could I fix this problem?
The below is my code:
import cv2
import os
import numpy as np
image_path = "1.jpg"
filename, ext = os.path.splitext(os.path.basename(img_path))
ori_img = cv2.imread(img_path)
src = []
# mouse callback handler
def mouse_handler(event, x, y, flags, param):
if event == cv2.EVENT_LBUTTONUP:
img = ori_img.copy()
src.append([x, y])
for xx, yy in src:
cv2.circle(img, center=(xx, yy), radius=5, color=(0, 255, 0), thickness=-1, lineType=cv2.LINE_AA)
cv2.imshow('img', img)
# perspective transform
if len(src) == 4:
src_np = np.array(src, dtype=np.float32)
width = max(np.linalg.norm(src_np[0] - src_np[1]), np.linalg.norm(src_np[2] - src_np[3]))
height = max(np.linalg.norm(src_np[0] - src_np[3]), np.linalg.norm(src_np[1] - src_np[2]))
dst_np = np.array([
[0, 0],
[width, 0],
[width, height],
[0, height]
], dtype=np.float32)
M = cv2.getPerspectiveTransform(src=src_np, dst=dst_np)
result = cv2.warpPerspective(ori_img, M=M, dsize=(width, height))
cv2.imshow('result', result)
cv2.imwrite(r'projetive_image/%s_result%s' % (filename, ext), result)
# main
cv2.setMouseCallback('img', mouse_handler)
cv2.imshow('img', ori_img)
Before displaying the image, you could simply downsize the image using cv2.resize or if you wanted to maintain aspect ratio, you can use imutils.resize. Another method is to simply save the image using cv2.imwrite then open it in your system's native image viewer.
import cv2
import imutils
image = cv2.imread('1.jpg')
# Downsize without aspect ratio
image1 = cv2.resize(image, (500,500), interpolation=cv2.INTER_AREA)
# Downsize and maintain aspect ratio
image2 = imutils.resize(image, width=800)
cv2.imshow('image1', image1)
cv2.imshow('image2', image2)
Instead of resizing your image before displaying, you can also resize the window, that displays the image, by using cv2.resizeWindow. Therefore, you need to set the cv2.WINDOW_NORMAL WindowFlag in your cv2.namedWindow call.
That'd be a minimal example:
import cv2
import numpy as np
img = np.random.randint(0, 255, (4000, 3000, 3), np.uint8)
cv2.namedWindow('img', cv2.WINDOW_NORMAL)
cv2.resizeWindow('img', 800, 600)
cv2.imshow('img', img)
Hope that helps!
I'm trying to put an image on top of another using OpenCV / Pillow, while keeping the foreground image transparent. If you look at this image, you can see that everything is going smoothly, except I'm not sure how to keep the image transparent.
I've tried implementing this StackOverflow solution by #fireant into my program, but nothing seems to work out.
How can I execute index.py as normal, but keep the foreground png transparent?
import os
import numpy
import cv2
from PIL import Image
from os.path import join, dirname, realpath
import json
def upload_files():
face_cascade = cv2.CascadeClassifier('/Users/matt/Python/LazerEyes/haarcascade_eye.xml')
eye_cascade = cv2.CascadeClassifier('/Users/matt/Python/LazerEyes/haarcascade_eye.xml')
img = cv2.imread('new.png')
img_to_place = cv2.imread('dot_transparent.png')
gray = cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)
gray_to_place = cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)
img_h, img_w = gray.shape
img_to_place_h, img_to_place_w = gray_to_place.shape
faces = face_cascade.detectMultiScale(gray, 1.3, 5)
for (x,y,w,h) in faces:
roi_gray = gray[y:y+h, x:x+w]
roi_color = img[y:y+h, x:x+w]
eyes = eye_cascade.detectMultiScale(roi_gray)
for (ex,ey,ew,eh) in eyes:
resized_img = cv2.resize(img_to_place, (eh, ew), interpolation = cv2.INTER_AREA)
resized_img_h, resized_img_w, _ = resized_img.shape
roi_color[ey:ey+resized_img_h, ex:ex+resized_img_w, :] = resized_img
Since you have specific alpha values for each pixel, I'd follow another approach, and implement a pixel-wise alpha blending, disregarding any alpha channel in the final image.
I borrowed some lines from Quang Hoang's answer:
import cv2
import matplotlib.pyplot as plt
import numpy as np
# Read images
img = cv2.imread('image.jpg')
dot = cv2.imread('dot_transparent.png', cv2.IMREAD_UNCHANGED)
# Manual manipulating one eye
ex, ey, ew, eh = 1430, 1490, 400, 400
dot = cv2.resize(dot, (eh, ew))
# Prepare pixel-wise alpha blending
dot_alpha = dot[..., 3] / 255.0
dot_alpha = np.repeat(dot_alpha[..., np.newaxis], 3, axis=2)
dot = dot[..., :3]
# Pixel-wise alpha blending
img[ey:ey+eh, ex:ex+ew, :] = img[ey:ey+eh, ex:ex+ew, :] * (1 - dot_alpha) + dot * dot_alpha
# Output
plt.figure(figsize=(9, 9))
plt.imshow(img[..., [2, 1, 0]])
That'd be the output:
Read the dot_transparent.png with cv2.IMREAD_UNCHANGE flag, then you have the alpha channel (otherwise, you only have 3 channel)
img = cv2.imread('faces.jpg')
to_replace = cv2.imread('dot_transparent.png', cv2.IMREAD_UNCHANGED)
# sample eye dectection
ex,ey,ew,eh = 1600, 1600,200,200
resized_replace = cv2.resize(to_replace, (eh,ew))
img[ey:ey+eh, ex:ex+ew] = np.where(resized_replace[...,-1:]<50,
img[ey:ey+eh, ex:ex+ew],
# show with imshow
I want to use OCR (pytesseract) to recognize the text located in images like these:
I have thousands of these arrows. Until now the procedure is as follows: I first resize the image (for another process). Then I crop the image to get rid of the most part of the arrow. Next I draw a white rectangle as a frame to remove further noise but still have distance between text and image borders for better text recognition. I resize the image again to ensure a height of capital letters to ~30 px (https://groups.google.com/forum/#!msg/tesseract-ocr/Wdh_JJwnw94/24JHDYQbBQAJ). Finally I binarize the image with a threshold of 150.
Full code:
import cv2
image_file = '001.jpg'
# load the input image and grab the image dimensions
image = cv2.imread(image_file, cv2.IMREAD_GRAYSCALE)
(h_1, w_1) = image.shape[:2]
# resize the image and grab the new image dimensions
image = cv2.resize(image, (int(w_1*320/h_1), 320))
(h_1, w_1) = image.shape
# crop image
image_2 = image[70:h_1-70, 20:w_1-20]
# get image_2 height, width
(h_2, w_2) = image_2.shape
# draw white rectangle as a frame around the number -> remove noise
cv2.rectangle(image_2, (0, 0), (w_2, h_2), (255, 255, 255), 40)
# resize image, that capital letters are ~ 30 px in height
image_2 = cv2.resize(image_2, (int(w_2*50/h_2), 50))
# image binarization
ret, image_2 = cv2.threshold(image_2, 150, 255, cv2.THRESH_BINARY)
# save image to file
cv2.imwrite('processed_' + image_file, image_2)
# tesseract part can be commented out
import pytesseract
config_7 = ("-c tessedit_char_whitelist=0123456789AB --oem 1 --psm 7")
text = pytesseract.image_to_string(image_2, config=config_7)
print("OCR TEXT: " + "{}\n".format(text))
The problem is that the text located in the arrow is never centered. Sometimes I remove part of the text with the method described above (e.g. in image 50A).
Is there a method in image processing to get rid of the arrow in a more elegant way? For instance using contour detection and deletion? I am more interested in the OpenCV part than the tesseract part to recognize the text.
Any help is appreciated.
If you look at the pictures you will see that there is a white arrow in the image which is also the biggest contour (especially if you draw a black border on the image). If you make a blank mask and draw the arrow (biggest contour on the image) then erode it a little bit you can perform a per element bitwise conjunction of the actual image and eroded mask. If it is not clear look at the bottom code and comments and you will see that it is actually pretty simple.
# imports
import cv2
import numpy as np
img = cv2.imread("number.png") # read image
# you can resize the image here if you like - it should still work for both sizes
h, w = img.shape[:2] # get the actual images height and width
img = cv2.resize(img, (int(w*320/h), 320))
h, w = img.shape[:2]
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) # transform to grayscale
thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY+cv2.THRESH_OTSU)[1] # perform OTSU threhold
cv2.rectangle(thresh, (0, 0), (w, h), (0, 0, 0), 2)
contours = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)[0] # search for contours
max_cnt = max(contours, key=cv2.contourArea) # select biggest one
mask = np.zeros((h, w), dtype=np.uint8) # create a black mask
cv2.drawContours(mask, [max_cnt], -1, (255, 255, 255), -1) # draw biggest contour on the mask
kernel = np.ones((15, 15), dtype=np.uint8) # make a kernel with appropriate values - in both cases (resized and original) 15 is ok
erosion = cv2.erode(mask, kernel, iterations=1) # erode the mask with given kernel
reverse = cv2.bitwise_not(img.copy()) # reversed image of the actual image 0 becomes 255 and 255 becomes 0
img = cv2.bitwise_and(reverse, reverse, mask=erosion) # per-element bit-wise conjunction of the actual image and eroded mask (erosion)
img = cv2.bitwise_not(img) # revers the image again
# save image to file and display
cv2.imwrite("res.png", img)
cv2.imshow("img", img)
You can try simple Python script:
import cv2
import numpy as np
img = cv2.imread('mmubS.png', cv2.IMREAD_GRAYSCALE)
thresh = cv2.threshold(img, 200, 255, cv2.THRESH_BINARY_INV )[1]
im_flood_fill = thresh.copy()
h, w = thresh.shape[:2]
im_flood_fill=cv2.rectangle(im_flood_fill, (0,0), (w-1,h-1), 255, 2)
mask = np.zeros((h + 2, w + 2), np.uint8)
cv2.floodFill(im_flood_fill, mask, (0, 0), 0)
im_flood_fill = cv2.bitwise_not(im_flood_fill)
cv2.imshow('clear text', im_flood_fill)
cv2.imwrite('text.png', im_flood_fill)
Original Image
Expected Output.
I am using this code for translating a specific part into the same image, but output is not changing,
import numpy as np
import cv2 as cv
img = cv.imread('eye0.jpg', 0)
rows, cols = img.shape
roi = img[200: 300, 360: 450]
M = np.float32([[1, 0, 100], [0, 1, 50]])
dst = cv.warpAffine(roi, M, roi.shape)
cv.imshow('img', img)
cv.imshow('img', dst)
I see no changes from original image. How can I do so? Moreover, as an openCV newbie I would like to know which function should I use/explore here to get my purpose served?
Copy() function can help you instead of warpAffine(). You can check here also:
Here is output and code:
import numpy as np
import cv2 as cv
img = cv.imread('eye.jpg', 1)
#rows, cols = img.shape
roi = img[80: 100, 140: 160]
img2 = img.copy()
img2[95:115, 140:160]=roi
cv.imshow('img', img)
cv.imshow('imaag', img2)
**Image after warp affine tranformation... but for circling the part it seem difficult..
import numpy as np
import cv2 as cv
img = cv.imread('eye.jpg')
roi = img[78: 100, 130: 160]
M = np.float32([[1, 0, 6], [0, 1, 4]])
dst = cv.warpAffine(roi, M, (30, 22))
img[80:102, 132:162] = dst
cv.imshow('img', img)
I must write the program to detect width of object. I understand that without reference object it will be expressed in pixels but it's enough for me. The background will always be white. I have problem what i should to do right now.
I will be sow greatfull for Your help !
enter image description here
import numpy as np
import imutils
import cv2
import math
# Function to show array of images (intermediate results)
def show_images(images):
for i, img in enumerate(images):
cv2.imshow("image_" + str(i), img)
# Read image and preprocess
image = cv2.imread('44.jpg')
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
blur = cv2.GaussianBlur(gray, (9, 9), 0)
edged = cv2.Canny(blur, 50, 100)
edged = cv2.dilate(edged, None, iterations=1)
edged = cv2.erode(edged, None, iterations=1)
show_images([blur, edged])
#show_images([cnts, edged])
Since your're using OpenCV, finding the image dimensions is as simple as the code below.
import numpy as np
import cv2
img = cv2.imread('image.png')
dimension = img.shape
height = img.shape[0]
width = img.shape[1]
channels = img.shape[2]
Read more about this here:
I already have a facial landmark detector and can already save the image using opencv and dlib with the code below:
# import the necessary packages
from imutils import face_utils
import numpy as np
import argparse
import imutils
import dlib
import cv2
# construct the argument parser and parse the arguments
ap = argparse.ArgumentParser()
ap.add_argument("-p", "--shape-predictor", required=True, help="Path to facial landmark predictor")
ap.add_argument("-i", "--image", required=True, help="Path to input image")
args = vars(ap.parse_args())
# initialize dlib's face detector (HOG-based) and then create the facial landmark predictor
detector = dlib.get_frontal_face_detector()
predictor = dlib.shape_predictor(args["shape_predictor"])
# load the input image, resize it, and convert it to grayscale
image = cv2.imread(args["image"])
image = imutils.resize(image, width=500)
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
# detect faces in the grayscale image
rects = detector(gray, 1)
for (i, rect) in enumerate(rects):
# determine the facial landmarks for the face region, then
# convert the landmark (x, y)-coordinates to a NumPy array
shape = predictor(gray, rect)
shape = face_utils.shape_to_np(shape)
# loop over the face parts individually
for (name, (i, j)) in face_utils.FACIAL_LANDMARKS_IDXS.items():
print(" i = ", i, " j = ", j)
# clone the original image so we can draw on it, then
# display the name of the face part of the image
clone = image.copy()
cv2.putText(clone, name, (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 255), 2)
# loop over the subset of facial landmarks, drawing the
# specific face part using a red dots
for (x, y) in shape[i:j]:
cv2.circle(clone, (x, y), 1, (0, 0, 255), -1)
# extract the ROI of the face region as a separate image
(x, y, w, h) = cv2.boundingRect(np.array([shape[i:j]]))
roi = image[y:y+h,x:x+w]
roi = imutils.resize(roi, width=250, inter=cv2.INTER_CUBIC)
# show the particular face part
cv2.imshow("ROI", roi)
cv2.imwrite(name + '.jpg', roi)
cv2.imshow("Image", clone)
# visualize all facial landmarks with a transparent overly
output = face_utils.visualize_facial_landmarks(image, shape)
I have Arnold's face and I save part of his face using opencv imwrite.
What I'm trying to achieve is to get the image of the jaw only and I don't want to save the neck part. See the image below:
Does anyone has an idea on how I can remove the other parts, except the jaw detected by dlib.
Something like this is the expected output:
+ =
It's not very clear how much of the original image you are trying to mask off. Assuming you are using shape_predictor_68_face_landmarks.dat, DLib's landmarks 0 to 16 define the jawline, so you could make a mask that extends these to cover the bottom half of the frame.
Pardon my crude python skillset but that code will mask off below the jawline and also cut the image to the region of interest to match the expected output in your question.
# import the necessary packages
from imutils import face_utils
import numpy as np
import imutils
import dlib
import cv2
import os
detector = dlib.get_frontal_face_detector()
predictor = dlib.shape_predictor('shape_predictor_68_face_landmarks.dat')
# load image
img = cv2.imread('thegovernator.png')
h, w, ch = img.shape
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
# add an alpha channel to image
b,g,r = cv2.split(img);
a = np.ones((h,w,1), np.uint8) * 255
img = cv2.merge((b, g, r, a))
# detect face
rects = detector(gray,1)
roi = rects[0] # region of interest
shape = predictor(gray, roi)
shape = face_utils.shape_to_np(shape)
# extract jawline
jawline = shape[0:17]
top = min(jawline[:,1])
bottom = max(jawline[:,1])
# extend contour for masking
jawline = np.append(jawline, [ w-1, jawline[-1][1] ]).reshape(-1, 2)
jawline = np.append(jawline, [ w-1, h-1 ]).reshape(-1, 2)
jawline = np.append(jawline, [ 0, h-1 ]).reshape(-1, 2)
jawline = np.append(jawline, [ 0, jawline[0][1] ]).reshape(-1, 2)
contours = [ jawline ]
# generate mask
mask = np.ones((h,w,1), np.uint8) * 255 # times 255 to make mask 'showable'
cv2.drawContours(mask, contours, -1, 0, -1) # remove below jawline
# apply to image
result = cv2.bitwise_and(img, img, mask = mask)
result = result[top:bottom, roi.left():roi.left()+roi.width()] # crop ROI
cv2.imwrite('result.png', result);
cv2.imshow('masked image', result)