Why does this automatic image cropping not work on handwritten numbers?

Why does this automatic image cropping not work on handwritten numbers? - python

I am trying to crop this image:
The following code crops all sides, but I want to remove even the unwanted handwritten number, which this code does not do. Any noise removal code, which I need to include in this would be of great help.
import numpy as np
import cv2
img = cv2.imread('test.JPG')
img = img[:-20,:-20]
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
gray = 255*(gray < 128).astype(np.uint8)
gray = cv2.morphologyEx(gray, cv2.MORPH_OPEN, np.ones((2, 2), dtype=np.uint8))
coords = cv2.findNonZero(gray)
x, y, w, h = cv2.boundingRect(coords)
rect = img[y:y+h, x:x+w]
cv2.imwrite("test_crop", rect)

Inverse binary threshold your image with a low threshold, e.g. 24, so you'll get rid of some of the lighter gray pixels.
In the thresholded image, count white pixels per column and row. The table cell borders will have peaks:
Find the first and last peak for both to get the first and last horizontal and vertical table border; crop that part.
Here's the full code:
import cv2
import numpy as np
from skimage import io # Only needed for web reading images
# Read image from web
image = cv2.cvtColor(io.imread('https://i.stack.imgur.com/kA16I.jpg'), cv2.COLOR_RGB2BGR)
# Convert to grayscale; inverse binary threshold; map to [0, 1]
image_thr = cv2.threshold(cv2.cvtColor(image, cv2.COLOR_BGR2GRAY), 24, 255, cv2.THRESH_BINARY_INV)[1] / 255
# Count white pixels per column and row
col_sum = np.sum(image_thr, axis=0)
row_sum = np.sum(image_thr, axis=1)
# Get first and last vertical border of table
col_thr = 50
cols = np.where(col_sum > col_thr)[0][[0, -1]]
# Get first and last horizontal border of table
row_thr = 200
rows = np.where(row_sum > row_thr)[0][[0, -1]]
# Crop table
output = image[rows[0]:rows[1]+1, cols[0]:cols[1]+1]
# Outputs
cv2.imshow('image', image)
cv2.imshow('image_thr', image_thr)
cv2.imshow('output', output)
cv2.waitKey(0)
cv2.destroyAllWindows()
Output:
Hope that helps!
----------------------------------------
System information
----------------------------------------
Platform: Windows-10-10.0.16299-SP0
Python: 3.8.1
NumPy: 1.18.1
OpenCV: 4.2.0
----------------------------------------

Related

How to do Histogram Equalization on specific area

I have a image and I want to do HE or CLAHE on specific area of the image.
I already have a mask for the image.
Is there any possible way to do so?

Here is the code to achieve that :
import cv2 as cv
import numpy as np
# Load your color image
#src = cv.imread("___YourImagePath__.jpg",
#cv.IMREAD_COLOR)
#Create random color image
src = np.random.randint(255, size=(800,800,3),dtype=np.uint8)
cv.imshow('Random Color Image',src)
cv.waitKey(0)
# conver to gray
gray = cv.cvtColor(src, cv.COLOR_BGR2GRAY)
# process gray image
equalized = cv.equalizeHist(gray)
# create a mask (binary image with same size as source image )
height,width,depth = src.shape
mask = np.zeros((height,width))
cv.circle(mask,( int(width/2),int(height/2)),int(width/3),1,thickness=-1)
# display mask
cv.imshow('Mask',mask)
cv.waitKey(0)
# Copy processed region using the mask
ProcessedRegion = np.where(mask!=0,equalized,gray)
#display result
cv.imshow('Processed region result', ProcessedRegion)
cv.waitKey(0)
Output :

To do so you need to perform the operation on the pixel intensities of the image which fall within the mask. For that these intensities must be stored separately.
Procedure:
Get the pixel locations of those in white (255), within the mask.
Pick intensity values (0 - 255) from the gray image present in these locations.
Perform your operation (CLAHE or HE) on these intensities. The result is a different collection of intensities.
Place these new intensity values in the collected locations.
Sample:
Input image:
Mask image:
Code:
import cv2
import numpy as np
# read sample image, convert to grayscale
img = cv2.imread('flower.jpg')
gray_img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
# read mask image as binary image
mask = cv2.imread('flower_mask.jpg', 0)
# Step 1: store locations with value 255 (white)
loc = np.where(mask == 255)
# Step 2: Pick intensity values in these locations from the grayscale image:
values = gray[loc]
# Step 3: Histogram equalization on these values:
enhanced_values = cv2.equalizeHist(values)
# Step 4: Store these enhanced values in those locations:
gray2 = gray_img.copy()
for i, coord in enumerate(zip(loc[0], loc[1])):
gray2[coord[0], coord[1]] = enhanced_values[i][0]
cv2.imshow('Enhanced image', gray2)
Enhance image:
Grayscale image:

How to find only the bolded text lines from no. of images

from PIL import Image
import pytesseract
from pdf2image import convert_from_path
import os
import pandas as pd
import cv2
import numpy as np
files = os.chdir("C:/Users/abhishek_kumar1/Desktop/New folder")
#print(os.getcwd())
pages = convert_from_path("d.pdf",190,single_file=True,
poppler_path='C:/Users/abhishek_kumar1/Downloads/poppler-0.68.0_x86/poppler-0.68.0/bin')
image_counter=1
for page in pages:
filename = "page_"+str(image_counter)+".jpg"
page.save(filename,'JPEG')
img = cv2.imread(filename)
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
cv2.imwrite('grey.png',gray)
binary,thresh1 = cv2.threshold(gray, 0, 255,cv2.THRESH_OTSU|cv2.THRESH_BINARY_INV)
cv2.imwrite('Thresh1.png',thresh1)
rect_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (15, 3))
dilation = cv2.dilate(thresh1, rect_kernel, iterations = 6)
contours, hierarchy = cv2.findContours(dilation, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)
im2 = img.copy()
ROI_number = 0
for cnt in contours[::-1]:
[x,y,w,h] = cv2.boundingRect(cnt)
ROI=im2[y:y+h, x:x+w]
#print(str(w),str(h))
#cv2.putText(im2, str(h), (x,y - 10 ), cv2.FONT_HERSHEY_SIMPLEX, 0.1, (255, 0, 0), 1)
#cv2.putText(im2, str(w), (x,y + 10 ), cv2.FONT_HERSHEY_SIMPLEX, 0.1, (0, 0, 255), 1)
cv2.imwrite('ROI_{}.jpg'.format(ROI_number),ROI)
cv2.rectangle(im2,(x,y),(x+w,y+h),(36,255,12),1)
ROI_number += 1
cv2.imwrite('contours1.png',im2)
How to find only this image from above code section section, is there any options to understand font type from image like bold, italic,something else
get trouble to find only the bold line part from all of images.
Please any body have a suggestion regarding this please help me out.

Alex Alex's answer did not work for me. Here is my alternative described in words.
The general idea is that we compare how many black pixels there are in comparison to the minimum possible pixels to still form characters. This provides us with a difference from the skeleton to normal text and skeleton to bold text. In this way, we can quite clearly separate normal text from the bold text.
Use OCR software to extract bounding boxes of individual words. Optional: Combine individual words into lines of words, for example by word_num in Pytesseract.
Convert the image to grayscale and invert the image colors
Perform Zhang-Suen thinning on the selected area of text on the image (opencv contribution: cv2.ximgproc.thinning)
Sum where there are white pixels in the thinned image, i.e. where values are equal to 255 (white pixels are letters)
Sum where there are white pixels in the inverted image
Finally compute the thickness (sum_inverted_pixels - sum_skeleton_pixels) / sum_skeleton_pixels (sometimes there will be zero division error, check when the sum of the skeleton is 0 and return 0 instead)
Normalize the thickness by minimum and maximum values
Apply a threshold for deciding when a word/line of text is bold, e.g. 0.6 or 0.7

See python code and result:
import cv2
import numpy as np
img = cv2.imread('C.png')
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
thresh = cv2.threshold(gray, 160, 255, cv2.THRESH_BINARY)[1]
kernel = np.ones((5,5),np.uint8)
kernel2 = np.ones((3,3),np.uint8)
marker = cv2.dilate(thresh,kernel,iterations = 1)
mask=cv2.erode(thresh,kernel,iterations = 1)
while True:
tmp=marker.copy()
marker=cv2.erode(marker, kernel2)
marker=cv2.max(mask, marker)
difference = cv2.subtract(tmp, marker)
if cv2.countNonZero(difference) == 0:
break
marker_color = cv2.cvtColor(marker, cv2.COLOR_GRAY2BGR)
out=cv2.bitwise_or(img, marker_color)
cv2.imwrite('out.png', out)
cv2.imshow('result', out )

How to get RGB values of two separate lines in an image in two variables using opencv python

I have detected two lines in an image using cv2. now I want to get the RGB values of both lines in separate variables like left_line_veriable = ['rgb values'], right_line_rgb_values = ['rgb values']
Here is my code:
import cv2
import numpy as np
image = cv2.imread('tape.png')
image = cv2.cvtCOLOR(image, cv2.COLOR_BGR2GRAY)
# Apply adaptive threshold
image_thr = cv2.adaptiveThreshold(image, 255, cv2.THRESH_BINARY_INV, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, 81, 2)
# Apply morphological opening with vertical line kernel
kernel = np.ones((image.shape[0], 1), dtype=np.uint8) * 255
image_mop = cv2.morphologyEx(image_thr, cv2.MORPH_OPEN, kernel)
color_detected_img = cv2.bitwise_and(image, image, mask=image_mop)
cv2.imshow('image', color_detected_img)
cv2.waitKey(0)
cv2.destroyAllWindows()
This is the image from which I want to get both line's RGB values in two variables as described above:

Maybe is not the most optimal way, but it is not hard to do. As I said in my comments, you can label the image to kind of segment the lines, then get the mean of the rgb values in it and the average position to get to know which one is left and right. Here is a small script to demonstrate what I am saying. The last part is just to show the results.
import cv2
import numpy as np
# load img and get the greyscale
img = cv2.imread("x.png")
grey = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
# label the image
ret, thres = cv2.threshold(grey, 1, 255, cv2.THRESH_BINARY)
labelAmount, labels = cv2.connectedComponents(thres)
# get the mean of the color and position
values = []
# first label (0) is background
for i in range(1, labelAmount):
mask = np.zeros(labels.shape, dtype=np.uint8)
mask[labels == i] = 255
mean = cv2.mean(img, mask)[:-1]
meanPos = np.mean(cv2.findNonZero(mask), axis=0)[0]
values.append((mean, meanPos))
# sort them by x value (left to right)
values = sorted(values, key = lambda v : v[1][0])
left_line_color = values[0][0]
right_line_color = values[1][0]
# just to show the results
left_only = np.zeros(img.shape, dtype=np.uint8)
right_only = np.zeros(img.shape, dtype=np.uint8)
left_only = cv2.line (left_only, (int(values[0][1][0]), 0), (int(values[0][1][0]), img.shape[0]), left_line_color,5 )
right_only = cv2.line (right_only, (int(values[1][1][0]), 0), (int(values[1][1][0]), img.shape[0]), right_line_color,5 )
cv2.imshow("left_line", left_only)
cv2.imshow("right_line", right_only)
cv2.imshow("original", img)
cv2.waitKey(0)

How to remove whitespace from an image in OpenCV?

I have the following image which has text and a lot of white space underneath the text. I would like to crop the white space such that it looks like the second image.
Cropped Image
Here is what I've done
>>> img = cv2.imread("pg13_gau.jpg.png")
>>> gray = cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)
>>> edged = cv2.Canny(gray, 30,300)
>>> (img,cnts, _) = cv2.findContours(edged.copy(), cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
>>> cnts = sorted(cnts, key = cv2.contourArea, reverse = True)[:10]

As many have alluded in the comments, the best way is to invert the image so the black text becomes white, find all the non-zero points in the image then determine what the minimum spanning bounding box would be. You can use this bounding box to finally crop your image. Finding the contours is very expensive and it isn't needed here - especially since your text is axis-aligned. You can use a combination of cv2.findNonZero and cv2.boundingRect to do what you need.
Therefore, something like this would work:
import numpy as np
import cv2
img = cv2.imread('ws.png') # Read in the image and convert to grayscale
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
gray = 255*(gray < 128).astype(np.uint8) # To invert the text to white
coords = cv2.findNonZero(gray) # Find all non-zero points (text)
x, y, w, h = cv2.boundingRect(coords) # Find minimum spanning bounding box
rect = img[y:y+h, x:x+w] # Crop the image - note we do this on the original image
cv2.imshow("Cropped", rect) # Show it
cv2.waitKey(0)
cv2.destroyAllWindows()
cv2.imwrite("rect.png", rect) # Save the image
The code above exactly lays out what I talked about in the beginning. We read in the image, but we also convert to grayscale as your image is in colour for some reason. The tricky part is the third line of code where I threshold below the intensity of 128 so that the dark text becomes white. This however produces a binary image, so I convert to uint8, then scale by 255. This essentially inverts the text.
Next, given this image we find all of the non-zero coordinates with cv2.findNonZero and we finally put this into cv2.boundingRect which will give you the top-left corner of the bounding box as well as the width and height. We can finally use this to crop the image. Note we do this on the original image and not the inverted one. We use simply NumPy array indexing to do the cropping for us.
Finally, we show the image to show that it works and we save it to disk.
I now get this image:
For the second image, a good thing to do is to remove some of the right border and bottom border. We can do that by cropping the image down to that first. Next, this image contains some very small noisy pixels. I would recommend doing a morphological opening with a very small kernel, then redo the logic we talked about above.
Therefore:
import numpy as np
import cv2
img = cv2.imread('pg13_gau_preview.png') # Read in the image and convert to grayscale
img = img[:-20,:-20] # Perform pre-cropping
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
gray = 255*(gray < 128).astype(np.uint8) # To invert the text to white
gray = cv2.morphologyEx(gray, cv2.MORPH_OPEN, np.ones((2, 2), dtype=np.uint8)) # Perform noise filtering
coords = cv2.findNonZero(gray) # Find all non-zero points (text)
x, y, w, h = cv2.boundingRect(coords) # Find minimum spanning bounding box
rect = img[y:y+h, x:x+w] # Crop the image - note we do this on the original image
cv2.imshow("Cropped", rect) # Show it
cv2.waitKey(0)
cv2.destroyAllWindows()
cv2.imwrite("rect.png", rect) # Save the image
Note: Output image removed due to privacy

Opencv reads the image as a numpy array and it's much simpler to use numpy directly (scikit-image does the same). One possible way of doing it is to read the image as grayscale or convert to it and do the row-wise and column-wise operations as shown in the code snippet below. This will remove the columns and rows when all pixels are of pixel_value (white in this case).
def crop_image(filename, pixel_value=255):
gray = cv2.imread(filename, cv2.IMREAD_GRAYSCALE)
crop_rows = gray[~np.all(gray == pixel_value, axis=1), :]
cropped_image = crop_rows[:, ~np.all(crop_rows == pixel_value, axis=0)]
return cropped_image
and the output:

This would also work:
from PIL import Image, ImageChops
img = Image.open("pUq4x.png")
pixels = img.load()
print (f"original: {img.size[0]} x {img.size[1]}")
xlist = []
ylist = []
for y in range(0, img.size[1]):
for x in range(0, img.size[0]):
if pixels[x, y] != (255, 255, 255, 255):
xlist.append(x)
ylist.append(y)
left = min(xlist)
right = max(xlist)
top = min(ylist)
bottom = max(ylist)
img = img.crop((left-10, top-10, right+10, bottom+10))
img.show()

Opencv unable to get ROI of highest intensity part

I have tried this code.
import sys
import numpy as np
sys.path.append('/usr/local/lib/python2.7/site-packages')
import cv2
from cv2.cv import *
img=cv2.imread("test2.jpg",cv2.IMREAD_COLOR)
gimg = cv2.imread("test2.jpg",cv2.IMREAD_GRAYSCALE)
b,g,r = cv2.split(img)
ret,thresh1 = cv2.threshold(gimg,127,255,cv2.THRESH_BINARY);
numrows = len(thresh1)
numcols = len(thresh1[0])
thresh = 170
im_bw = cv2.threshold(gimg, thresh, 255, cv2.THRESH_BINARY)[1]
trig=0
trigmax=0;
xmax=0
ymax=0
for x in range(0,numrows):
for y in range(0,numcols):
if(im_bw[x][y]==1):
trig=trig+1;
if(trig>5):
xmax=x;
ymax=y;
break;
print x,y,numrows,numcols,trig
roi=gimg[xmax:xmax+200,ymax-500:ymax]
cv2.imshow("roi",roi)
WaitKey(0)
here test2.jpg is what I am tring to do is to concentrate on the high intensity part of the image(i.e the circle with high intensity in image).But my code does not seem to do so.
Can anyone help?

I found answer to my question from here
here is my code
# import the necessary packages
import numpy as np
import cv2
# load the image and convert it to grayscale
image = cv2.imread('test2.jpg')
orig = image.copy()
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
# apply a Gaussian blur to the image then find the brightest
# region
gray = cv2.GaussianBlur(gray, (41, 41), 0)
(minVal, maxVal, minLoc, maxLoc) = cv2.minMaxLoc(gray)
image = orig.copy()
roi=image[maxLoc[1]-250:maxLoc[1]+250,maxLoc[0]-250:maxLoc[0]+250,2:2]
cv2.imshow("Robust", roi)
cv2.waitKey(0)
test2.jpg
ROI

Try checking whether a pixel is not zero - it turns out those pixels have a value of 255 after thresholding, as it is a grayscale image after all.
The threshold seems to be wrong also, but I don't really know what you want to see (display it with imshow - it isn't just the circle). And your code matches the number '3' in the bottom left corner, therefore the ROI matrix indices are invalid in your example.
EDIT:
After playing around with the image, I ended up using a different approach. I used the SimpleBlobDetector and did an erosion on the image before, so the region you're interested in remains connected. For the blob detector the program inverts the image first. (You may want to read a SimpleBlobDetector tutorial as I did, parts of the code are based on that page - many thanks to the author!)
The following code displays the procedure step by step:
import cv2
import numpy as np
# Read image
gimg = cv2.imread("test2.jpg", cv2.IMREAD_GRAYSCALE)
# Invert the image
im_inv = 255 - gimg
cv2.imshow("Step 1 - inverted image", im_inv)
cv2.waitKey(0)
# display at a threshold level of 50
thresh = 45
im_bw = cv2.threshold(im_inv, thresh, 255, cv2.THRESH_BINARY)[1]
cv2.imshow("Step 2 - bw threshold", im_bw)
cv2.waitKey(0)
# erosion
kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE,(10,10))
im_bw = cv2.erode(im_bw, kernel, iterations = 1)
cv2.imshow('Step 3 - erosion connects disconnected parts', im_bw)
cv2.waitKey(0)
# Set up the detector with default parameters.
params = cv2.SimpleBlobDetector_Params()
params.filterByInertia = False
params.filterByConvexity = False
params.filterByCircularity = False
params.filterByColor = False
params.minThreshold = 0
params.maxThreshold = 50
params.filterByArea = True
params.minArea = 1000 # you may check with 10 --> finds number '3' also
params.maxArea = 100000 #im_bw.shape[0] * im_bw.shape[1] # max limit: image size
# Create a detector with the parameters
ver = (cv2.__version__).split('.')
if int(ver[0]) < 3 :
detector = cv2.SimpleBlobDetector(params)
else :
detector = cv2.SimpleBlobDetector_create(params)
# Detect blobs.
keypoints = detector.detect(im_bw)
print "Found", len(keypoints), "blobs:"
for kpt in keypoints:
print "(%.1f, %.1f) diameter: %.1f" % (kpt.pt[0], kpt.pt[1], kpt.size)
# Draw detected blobs as red circles.
# cv2.DRAW_MATCHES_FLAGS_DRAW_RICH_KEYPOINTS ensures the size of the
# circle corresponds to the size of blob
im_with_keypoints = cv2.drawKeypoints(gimg, keypoints, np.array([]), (0,0,255),
cv2.DRAW_MATCHES_FLAGS_DRAW_RICH_KEYPOINTS)
# Show keypoints
cv2.imshow("Keypoints", im_with_keypoints)
cv2.waitKey(0)
This algorithm finds the coordinate (454, 377) as the center of the blob, but if you reduce the minArea to e.g. 10 then it will find the number 3 in the bottom corner as well.

Develop Reference

Python is a programming language that lets you work quickly and integrate systems more effectively.

Why does this automatic image cropping not work on handwritten numbers? - python

Related

How to do Histogram Equalization on specific area

How to find only the bolded text lines from no. of images

How to get RGB values of two separate lines in an image in two variables using opencv python

How to remove whitespace from an image in OpenCV?

Opencv unable to get ROI of highest intensity part

Categories

Resources