I am trying to do OCR in python but not getting the correct output. Here is the code. I tried with original image, grayscale also but not getting any result
from PIL import Image
import pytesseract
def convert_to_monochrome(image):
pixels = image.load()
for i in range(image.size[0]): # for every pixel:
for j in range(image.size[1]):
r, g, b = pixels[i, j]
if r > 200 and g > 200 and b > 200:
pixels[i, j] = (255, 255, 255)
pixels[i, j] = (0, 0, 0)
return image
def interpret_chips(image):
#image = image.resize((image.size[0] * 10, image.size[1] * 10), Image.ANTIALIAS)
#image = image.convert("LA")
_image = convert_to_monochrome(image)
config = "--psm 7 -c tessedit_char_whitelist=0123456789KMT"
rank_string = pytesseract.image_to_string(_image, config=config) # expensive
return _image, rank_string
for i in range(1, 6):
img = Image.open("temp/sample" + str(i) + ".jpg")
img, text = interpret_chips(img)
img.save("temp/monochrome" + str(i) + ".jpg")
Thanks for your help
I am attaching some original images for which it is giving wrong results. Pre processed images are obtained after applying monochrome function defined please have a look. Text can be of type 4, 400, 4000, 459K, 29M etc. I am getting very awkward results.
Raw Image 1
Raw Image 2
Raw Image 3
Pre processed 1
Pre processed 2
Pre processed 3
The problem is that tesseract expects an image with dark text on a light background. The preprocessed image in your case is just the opposite. So you can just invert the preprocessed image.
Below code worked for me :
from PIL import Image
import pytesseract
def convert_to_monochrome(image):
pixels = image.load()
for i in range(image.size[0]): # for every pixel:
for j in range(image.size[1]):
r, g, b = pixels[i, j]
if r > 200 and g > 200 and b > 200:
pixels[i, j] = (0, 0, 0)
pixels[i, j] = (255, 255, 255)
return image
def interpret_chips(image):
#image = image.resize((image.size[0] * 10, image.size[1] * 10), Image.ANTIALIAS)
#image = image.convert("LA")
_image = convert_to_monochrome(image)
config = "--psm 6 -c tessedit_char_whitelist=0123456789KMT"
rank_string = pytesseract.image_to_string(_image, config=config) # expensive
return _image, rank_string
img = Image.open("orig.jpg")
img, text = interpret_chips(img)
text is 23.000,
i used this code below to extract patches from a image.
extract code:
import os
import glob
from PIL import Image
Image.MAX_IMAGE_PIXELS = None # to avoid image size warning
imgdir = "/path/to/image/folder"
filelist = [f for f in glob.glob(imgdir + "**/*.png", recursive=True)]
savedir = "/path/to/image/folder/output"
start_pos = start_x, start_y = (0, 0)
cropped_image_size = w, h = (256, 256)
for file in filelist:
img = Image.open(file)
width, height = img.size
frame_num = 1
for col_i in range(0, width, w):
for row_i in range(0, height, h):
crop = img.crop((col_i, row_i, col_i + w, row_i + h))
name = os.path.basename(file)
name = os.path.splitext(name)[0]
save_to= os.path.join(savedir, name+"_{:03}.png")
frame_num += 1
Now i want to reconstruct this imagem from all those patches extracted before, i've tried 2 diffenret codes
so my DB is 120x256x256x3 extracted patches, theres 120 patches to fit in 3840x2048 shape..:
patches = []
for directory_path in glob.glob('D:\join_exemplo'):
for img_path in glob.glob(os.path.join(directory_path, "*.png")):
img = cv2.imread(img_path,1)
input_patches = np.array(patches)
first i've tried sklearn.feature_extraction.image importing reconstruct_from_patches_2d, but got a black image:
reconstruct = reconstruct_from_patches_2d(input_patches, input_image)
reconstruct = reconstruct.astype(np.uint8)
Image.fromarray(reconstruct, 'RGB').save(r'D:\join_exemplo\re\re3.png')
also tried, this below but got a grayscale tone pattern image
input_image = (3840,2048,3)
reconstructed_arr = np.zeros(shape=(3840,2048,3))
>>> step = 256
>>> for x in range(img.shape[0]):
for y in range(img.shape[1]):
x_pos, y_pos = x * step, y * step
reconstructed_arr[x_pos:x_pos + 512, y_pos:y_pos + 512] = img[x, y, 0, ...]
>>> (input_image == reconstructed_arr).all()
Can someone see whats wrong? sorry about my bad english
I have a little project with OpenCV (python) where one of my steps is to take an x-ray image from the human body and convert it to a binary image where white pixels represent where some bone is present and black means there is no bone there.
Since sometimes "bone parts" can be darker than "non-bone parts" from another region, simple thresholding won't work. I also tried adaptive threshold and I couldn't see much difference.
I came up with a simple algorithm that applies a simple threshold for each row.
Here is the code:
def threshhold(image, val):
image = image.copy()
for row_idx in range(image.shape[0]):
max_row = image[row_idx].max()
min_row = image[row_idx].min()
tresh = np.median(image[row_idx]) + (val * (max_row - min_row))
# Or use np.mean instead of np.median
_, tresh = cv2.threshold(image[row_idx], tresh, 255, cv2.THRESH_BINARY)
image[row_idx] = tresh.ravel()
return image
And here is the code that does the same work but column-by-column instead of row-by-row:
def threshhold2(image, val):
image = image.copy()
for row_idx in range(image.shape[1]):
max_row = image[:, row_idx].max()
min_row = image[:, row_idx].min()
tresh = np.median(image[:, row_idx]) + (val * (max_row - min_row))
# Or use np.mean instead of np.median
_, tresh = cv2.threshold(image[:, row_idx], tresh, 255, cv2.THRESH_BINARY)
image[:, row_idx] = tresh.ravel()
return image
This method works pretty well with images like this:
Not quite well for this one but it is not that bad:
Very terrible:
Only the left half looks good
As you can see; this algorithm works well only for some images.
I will be glad to see more experienced people's ideas.
Images are not for me by the way.
Entire source code:
import os
import cv2
import numpy as np
files_to_see = os.listdir("data_set")
current_file = 0
def slice(image, size):
out = []
x_count = image.shape[1] // size
y_count = image.shape[0] // size
for y_idx in range(y_count):
for x_idx in range(x_count):
(y_idx, x_idx),
image[y_idx * size: (y_idx + 1) * size,
x_idx * size: (x_idx + 1) * size]
return y_count, x_count, out
def normalize(image):
image = image.copy()
min_pix = image.min()
max_pix = image.max()
for y in range(image.shape[0]):
for x in range(image.shape[1]):
val = image[y, x]
val -= min_pix
val *= 255 / (max_pix - min_pix)
image[y, x] = round(val)
# image -= min_pix
# image *= round(255 / (max_pix - min_pix))
return image
def threshhold(image, val, method):
image = image.copy()
for row_idx in range(image.shape[0]):
max_row = image[row_idx].max()
min_row = image[row_idx].min()
# tresh = np.median(image[row_idx]) + (val * (max_row - min_row))
tresh = method(image[row_idx]) + (val * (max_row - min_row))
_, tresh = cv2.threshold(image[row_idx], tresh, 255, cv2.THRESH_BINARY)
image[row_idx] = tresh.ravel()
return image
def threshhold2(image, val, method):
image = image.copy()
for row_idx in range(image.shape[1]):
max_row = image[:, row_idx].max()
min_row = image[:, row_idx].min()
tresh = method(image[:, row_idx]) + (val * (max_row - min_row))
_, tresh = cv2.threshold(image[:, row_idx], tresh, 255, cv2.THRESH_BINARY)
image[:, row_idx] = tresh.ravel()
return image
def recalculate_threshhold(v):
global original_current_image, thresh_current_image, y_c, x_c, slices
method = np.mean
if cv2.getTrackbarPos("method", "xb labeler") == 0:
method = np.median
thresh_current_image = threshhold2(original_current_image, cv2.getTrackbarPos("threshhold_value", "xb labeler") / 1000, method)
y_c, x_c, slices = slice(thresh_current_image, 128)
def thresh_current_image_mouse_event(event, x, y, flags, param):
if event == 1:
print(x // 128, y // 128)
cv2.imshow("slice", slices[(x // 128) + (y // 128) * x_c][1])
cv2.namedWindow("xb labeler")
cv2.createTrackbar("threshhold_value", "xb labeler", 0, 1000, recalculate_threshhold)
cv2.createTrackbar("method", "xb labeler", 0, 1, recalculate_threshhold)
cv2.setMouseCallback("thresh_current_image", thresh_current_image_mouse_event)
def init():
global original_current_image, thresh_current_image, x_c, y_c, slices, files_to_see, current_file
original_current_image = cv2.imread("data_set/" + files_to_see[current_file], cv2.CV_8UC1)
original_current_image = cv2.resize(original_current_image, (512, 512))
original_current_image = normalize(original_current_image)
original_current_image = cv2.GaussianBlur(original_current_image, (5, 5), 10)
y_c, x_c, slices = slice(thresh_current_image, 128)
while True:
cv2.imshow("thresh_current_image", thresh_current_image)
cv2.imshow("xb labeler", original_current_image)
k = cv2.waitKey(1)
if k == ord('p'):
cv2.imwrite("ssq.png", thresh_current_image)
current_file += 1
EDIT: Added original images:
I have a folder of images and I want to crop them in a circular shape.
This is the original image:
The result that I want is this:
My code is:
import os
import glob
from PIL import Image, ImageDraw, ImageFilter
import numpy as np
def mask_circle_solid(pil_img, background_color, blur_radius, offset=0):
background = Image.new(pil_img.mode, pil_img.size, background_color)
offset = blur_radius * 2 + offset
mask = Image.new("L", pil_img.size, 0)
draw = ImageDraw.Draw(mask)
draw.ellipse((offset, offset, pil_img.size[0] - offset, pil_img.size[1] - offset), fill=255)
mask = mask.filter(ImageFilter.GaussianBlur(blur_radius))
return Image.composite(pil_img, background, mask)
def mask_circle_transparent(pil_img, blur_radius, offset=0):
offset = blur_radius * 2 + offset
mask = Image.new("L", pil_img.size, 0)
draw = ImageDraw.Draw(mask)
draw.ellipse((offset, offset, pil_img.size[0] - offset, pil_img.size[1] - offset), fill=255)
mask = mask.filter(ImageFilter.GaussianBlur(blur_radius))
result = pil_img.copy()
return result
def crop_max_square(pil_img):
return crop_center(pil_img, min(pil_img.size), min(pil_img.size))
def crop_center(pil_img, crop_width, crop_height):
img_width, img_height = pil_img.size
return pil_img.crop(((img_width - crop_width) // 2,
(img_height - crop_height) // 2,
(img_width + crop_width) // 2,
(img_height + crop_height) // 2))
im = []
for f in glob.iglob("./*.jpg"):
thumb_width = 150
im = np.array(im)
list_files = os.listdir(".")
for i in range(0,len(im)):
im_square[i] = crop_max_square(im[i]).resize((thumb_width, thumb_width), Image.LANCZOS)
im_thumb[i] = mask_circle_transparent(im_square[i], 4)
I have used the functions from this article:
Generate square or circular thumbnail images with Python, Pillow
But I get this error:
line 30, in crop_max_square
return crop_center(pil_img, min(pil_img.size), min(pil_img.size))
TypeError: 'int' object is not iterable
I'm not sure if you really need to use numpy to load the files, but you can do what the "Sample code for batch processing" of the page you shared does:
import os
import glob
from PIL import Image, ImageDraw, ImageFilter
def mask_circle_solid(pil_img, background_color, blur_radius, offset=0):
background = Image.new(pil_img.mode, pil_img.size, background_color)
offset = blur_radius * 2 + offset
mask = Image.new("L", pil_img.size, 0)
draw = ImageDraw.Draw(mask)
draw.ellipse((offset, offset, pil_img.size[0] - offset, pil_img.size[1] - offset), fill=255)
mask = mask.filter(ImageFilter.GaussianBlur(blur_radius))
return Image.composite(pil_img, background, mask)
def mask_circle_transparent(pil_img, blur_radius, offset=0):
offset = blur_radius * 2 + offset
mask = Image.new("L", pil_img.size, 0)
draw = ImageDraw.Draw(mask)
draw.ellipse((offset, offset, pil_img.size[0] - offset, pil_img.size[1] - offset), fill=255)
mask = mask.filter(ImageFilter.GaussianBlur(blur_radius))
result = pil_img.copy()
return result
def crop_max_square(pil_img):
return crop_center(pil_img, min(pil_img.size), min(pil_img.size))
def crop_center(pil_img, crop_width, crop_height):
img_width, img_height = pil_img.size
return pil_img.crop(((img_width - crop_width) // 2,
(img_height - crop_height) // 2,
(img_width + crop_width) // 2,
(img_height + crop_height) // 2))
im = []
jpgs_files_path = "./" # Replace the "./" path by the path where the .jpg images are.
files = glob.glob(os.path.join(jpgs_files_path, '*.jpg'))
thumb_width = 150
for f in files:
im = Image.open(f)
im_thumb = crop_max_square(im).resize((thumb_width, thumb_width), Image.LANCZOS)
im_thumb = mask_circle_transparent(im_thumb, 4)
ftitle, fext = os.path.splitext(os.path.basename(f))
im_thumb.save(os.path.join("./dstdir/", ftitle + '_thumbnail.png'), quality=95)
As JPG does not support transparency you need to discard the Alpha Channel or save as something that supports transparency like PNG.
I want to detect crop rows using aerial images(CRBD). I have done the necessary image processing like converting to grayscale, edge detection, skeletonization, Hough Transform(to identify and draw the lines), and I also set the accumulator angle to math.pi*4.0/180, which I varied time after time.
The algorithm works well at detection approximately 4 crop lines, I want to improve it so that it can detect variable number of crop rows, and it should be able to highlight this crop rows
Here is a link to the sample code I modified Here
import os
import os.path
import time
import cv2
import numpy as np
import math
### Setup ###
image_data_path = os.path.abspath('../8470p/CRBD/Images')
gt_data_path = os.path.abspath('../8470p/GT data')
image_out_path = os.path.abspath('../8470p/algorithm_1')
use_camera = False # whether or not to use the test images or camera
images_to_save = [2, 3, 4, 5] # which test images to save
timing = False # whether to time the test images
curr_image = 0 # global counter
HOUGH_RHO = 2 # Distance resolution of the accumulator in pixels
HOUGH_ANGLE = math.pi*4.0/18 # Angle resolution of the accumulator in radians
HOUGH_THRESH_MAX = 80 # Accumulator threshold parameter. Only those lines are
returned that get votes
NUMBER_OF_ROWS = 10 # how many crop rows to detect
THETA_SIM_THRESH = math.pi*(6.0/180) # How similar two rows can be
RHO_SIM_THRESH = 8 # How similar two rows can be
ANGLE_THRESH = math.pi*(30.0/180) # How steep angles the crop rows can be in
def grayscale_transform(image_in):
'''Converts RGB to Grayscale and enhances green values'''
b, g, r = cv2.split(image_in)
return 2*g - r - b
def save_image(image_name, image_data):
'''Saves image if user requests before runtime'''
if curr_image in images_to_save:
image_name_new = os.path.join(image_out_path, "
str(curr_image) ))
def skeletonize(image_in):
'''Inputs and grayscale image and outputs a binary skeleton image'''
size = np.size(image_in)
skel = np.zeros(image_in.shape, np.uint8)
ret, image_edit = cv2.threshold(image_in, 0, 255, cv2.THRESH_BINARY |
element = cv2.getStructuringElement(cv2.MORPH_CROSS, (3,3))
done = False
while not done:
eroded = cv2.erode(image_edit, element)
temp = cv2.dilate(eroded, element)
temp = cv2.subtract(image_edit, temp)
skel = cv2.bitwise_or(skel, temp)
image_edit = eroded.copy()
zeros = size - cv2.countNonZero(image_edit)
if zeros == size:
done = True
return skel
def tuple_list_round(tuple_list, ndigits_1=0, ndigits_2=0):
'''Rounds each value in a list of tuples to the number of digits
new_list = []
for (value_1, value_2) in tuple_list:
new_list.append( (round(value_1, ndigits_1), round(value_2,
ndigits_2)) )
return new_list
def crop_point_hough(crop_points):
'''Iterates though Hough thresholds until optimal value found for
the desired number of crop rows. Also does filtering.
height = len(crop_points)
width = len(crop_points[0])
hough_thresh = HOUGH_THRESH_MAX
rows_found = False
while hough_thresh > HOUGH_THRESH_MIN and not rows_found:
crop_line_data = cv2.HoughLines(crop_points, HOUGH_RHO, HOUGH_ANGLE,
crop_lines = np.zeros((height, width, 3), dtype=np.uint8)
crop_lines_hough = np.zeros((height, width, 3), dtype=np.uint8)
if crop_line_data is not None:
# get rid of duplicate lines. May become redundant if a similarity
threshold is done
crop_line_data_1 = tuple_list_round(crop_line_data[:,0,:],-1, 4)
crop_line_data_2 = []
x_offsets = []
crop_lines_hough = np.zeros((height, width, 3), dtype=np.uint8)
for (rho, theta) in crop_line_data_1:
a = math.cos(theta)
b = math.sin(theta)
x0 = a*rho
y0 = b*rho
point1 = (int(round(x0+1000*(-b))), int(round(y0+1000*(a))))
point2 = (int(round(x0-1000*(-b))), int(round(y0-1000*(a))))
cv2.line(crop_lines_hough, point1, point2, (0, 0, 255), 2)
for curr_index in range(len(crop_line_data_1)):
(rho, theta) = crop_line_data_1[curr_index]
is_faulty = False
if ((theta >= ANGLE_THRESH) and (theta <= math.pi-
ANGLE_THRESH)) or(theta <= 0.001):
is_faulty = True
for (other_rho, other_theta) in
if abs(theta - other_theta) < THETA_SIM_THRESH:
is_faulty = True
elif abs(rho - other_rho) < RHO_SIM_THRESH:
is_faulty = True
if not is_faulty:
crop_line_data_2.append( (rho, theta) )
for (rho, theta) in crop_line_data_2:
a = math.cos(theta)
b = math.sin(theta)
c = math.tan(theta)
x0 = a*rho
y0 = b*rho
point1 = (int(round(x0+1000*(-b))), int(round(y0+1000*(a))))
point2 = (int(round(x0-1000*(-b))), int(round(y0-1000*(a))))
cv2.line(crop_lines, point1, point2, (0, 0, 255), 2)
#cv2.circle(crop_lines, (np.clip(int(round(a*rho+c*
#(0.5*height))),0 ,239), 0), 4, (255,0,0), -1)
#cv2.circle(crop_lines, (np.clip(int(round(a*rho-c*
#(0.5*height))),0 ,239), height), 4, (255,0,0), -1)
cv2.circle(crop_lines, (np.clip(int(round(rho/a)),0 ,239), 0), 5,
(255,0,0), -1)
#cv2.circle(img,(447,63), 63, (0,0,255), -1)
x_offsets.append(np.clip(int(round(rho/a)),0 ,239))
cv2.line(crop_lines, point1, point2, (0, 0, 255), 2)
if len(crop_line_data_2) >= NUMBER_OF_ROWS:
rows_found = True
hough_thresh -= HOUGH_THRESH_INCR
if rows_found == False:
print(NUMBER_OF_ROWS, "rows_not_found")
x_offset = min (x_offsets)
width = max (x_offsets) - min (x_offsets)
return (crop_lines, crop_lines_hough, x_offset, width)
def crop_row_detect(image_in):
'''Inputs an image and outputs the lines'''
save_image('0_image_in', image_in)
### Grayscale Transform ###
image_edit = grayscale_transform(image_in)
save_image('1_image_gray', image_edit)
### Skeletonization ###
skeleton = skeletonize(image_edit)
save_image('2_image_skeleton', skeleton)
### Hough Transform ###
(crop_lines, crop_lines_hough, x_offset, width) =
save_image('3_image_hough',cv2.addWeighted(image_in, 1,
crop_lines_hough, 1, 0.0))
save_image('4_image_lines',cv2.addWeighted(image_in, 1,crop_lines,1,0.0))
return (crop_lines , x_offset, width)
def main():
if use_camera == False:
diff_times = []
for image_name in sorted(os.listdir(image_data_path)):
global curr_image
curr_image += 1
start_time = time.time()
image_path = os.path.join(image_data_path, image_name)
image_in = cv2.imread(image_path)
crop_lines = crop_row_detect(image_in)
if timing == False:
cv2.imshow(image_name, cv2.addWeighted(image_in, 1,
crop_lines, 1, 0.0))
print('Press any key to continue...')
### Timing ###
diff_times.append(time.time() - start_time)
mean = 0
for diff_time in diff_times:
mean += diff_time
### Display Timing ###
print('max time = {0}'.format(max(diff_times)))
print('ave time = {0}'.format(1.0 * mean / len(diff_times)))
else: # use camera. Hasn't been tested on a farm.
capture = cv2.VideoCapture(0)
while cv2.waitKey(1) < 0:
_, image_in = capture.read()
(crop_lines, x_offset, width) = crop_row_detect(image_in)
cv2.imshow("Webcam", cv2.addWeighted(image_in, 1, crop_lines, 1,
Input Image
[![Input Image][1]][1]
Output Image
Expected Output
[![Expected Output][5]][5]
I have tried thresholding with cv2.inRange() to find green lines, but am still not getting the desired out.
Also the algorithms seems to be only draw the crop_line_data_2 as shown in the Output Image, it doesn't draw the crop_line_data_1
def threshold_green(image_in):
hsv = cv2.cvtColor(image_in, cv2.COLOR_BGR2HSV)
## mask of green (36,25,25) ~ (86, 255,255)
# mask = cv2.inRange(hsv, (36, 25, 25), (86, 255,255))
mask = cv2.inRange(hsv, (36, 25, 25), (70, 255,255))
## slice the green
imask = mask>0
green = np.zeros_like(image_in, np.uint8)
green[imask] = image_in[imask]
return green
i'm fairly new to python and openCV and i have been experimenting with some code that i found online. so thank you in advance for helping
although im using the imshow() function defined in opencv im unable to display the image
from __future__ import division
from __future__ import print_function
import random
import numpy as np
import cv2
def main():
"put img into target img of size imgSize, transpose for TF and normalize gray-values"
img=cv2.imread("C:\\Users\\bnsid\\Desktop\\a01-003-00-02.png", cv2.IMREAD_GRAYSCALE)
dataAugmentation = True
if img is None:
img = np.zeros([imgSize[1], imgSize[0]])
# dataaugmentation
if dataAugmentation:
stretch = (random.random() - 0.5) # -0.5 .. +0.5
wStretched = max(int(img.shape[1] * (1 + stretch)), 1) # random width, but at least 1
img = cv2.resize(img, (wStretched, img.shape[0])) # stretch horizontally by factor 0.5 .. 1.5
# create target image and copy sample image into it
(wt, ht) = imgSize
(h, w) = img.shape
fx = w / wt
fy = h / ht
f = max(fx, fy)
newSize = (max(min(wt, int(w / f)), 1), max(min(ht, int(h / f)), 1)) # scale according to f (result at least 1 and at most wt or ht)
img = cv2.resize(img, newSize)
target = np.ones([ht, wt]) * 255
target[0:newSize[1], 0:newSize[0]] = img
# transpose for TF
img = cv2.transpose(target)
# normalize
(m, s) = cv2.meanStdDev(img)
m = m[0][0]
s = s[0][0]
img = img - m
img = img / s if s>0 else img
cv2.imshow('Greyscale_Stretched', img)
k= cv2.waitKey(0) & 0xFF
if k == 27: # wait for ESC key to exit
elif k == ord('s'): # wait for 's' key to save and exit
cv2.imwrite('grey.png', img)
Just tested your code. You need to call the main() function somewhere. Since you haven't done that, the function is not executed.
simply add main() at the end of the code, and everything works.
def main():
#your code here
The main() function you have declared and defined here does not act like the main() entry function in C++. If you would like similar behavior, use this:
def function_name():
if __name__ == '__main__':
function_name() #for eg: main()