Using this code
'''
import cv2
import numpy as np
import pytesseract
from PIL import Image
pytesseract.pytesseract.tesseract_cmd = r'C:\Program Files\New folder\tesseract.exe'
# Grayscale image
img = Image.open(r"C:\Users\gusta\Downloads\PyQuad-master\test.png").convert('L')
ret,img = cv2.threshold(np.array(img), 180, 500, cv2.THRESH_BINARY)
# Older versions of pytesseract need a pillow image
# Convert back if needed
img = Image.fromarray(img.astype(np.uint8))
print(pytesseract.image_to_string(img))
text = pytesseract.image_to_string(img)
with open('file.txt', mode ='w') as f:
f.write(text)
'''
I get a text file:
Find the y-intercept of the parabola with the followin
y = —6x? — 10x — 2
Which was converted from:
Is there a way I could get it to read the ^2 exponent and also print it as ^2?
Related
I am trying to detect this letter but it doesn't seem to recognize it.
import cv2
import pytesseract as tess
img = cv2.imread("letter.jpg")
imggray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
print(tess.image_to_string(imggray))
this is the image in question:
Preprocessing of the image (e.g. inverting it) should help, and also you could take advantage of pytesseract image_to_string config options.
For instance, something along these lines:
import pytesseract
import cv2 as cv
import requests
import numpy as np
import io
# I read this directly from imgur
response = requests.get('https://i.stack.imgur.com/LGFAu.jpg')
nparr = np.frombuffer(response.content, np.uint8)
img = cv.imdecode(nparr, cv.IMREAD_GRAYSCALE)
# simple inversion as preprocessing
neg_img = cv.bitwise_not(img)
# invoke tesseract with options
text = pytesseract.image_to_string(neg_img, config='--psm 7')
print(text)
should parse the letter correctly.
Have a look at related questions for some additional info about preprocessing and tesseract options:
Why does pytesseract fail to recognise digits from image with darker background?
Why does pytesseract fail to recognize digits in this simple image?
Why does tesseract fail to read text off this simple image?
#Davide Fiocco 's answer is definitely correct.
I just want to show another way of doing it with adaptive-thresholding
When you apply adaptive-thesholding result will be:
Now when you read it:
txt = pytesseract.image_to_string(thr, config="--psm 7")
print(txt)
Result:
B
Code:
import cv2
import pytesseract
img = cv2.imread("LGFAu.jpg")
gry = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
thr = cv2.adaptiveThreshold(gry, 252, cv2.ADAPTIVE_THRESH_MEAN_C,
cv2.THRESH_BINARY_INV, 11, 2)
txt = pytesseract.image_to_string(thr, config="--psm 7")
print(txt)
Here, I want to change the default sharpness of the image dataset. It works fine for a single image, but when I apply on multiple images, it shows me an error like AttributeError: 'numpy.ndarray' object has no attribute 'filter'. What should I do to fix this? To that end, my code is given below-
from PIL import Image
from PIL import ImageEnhance
import cv2
import glob
dataset = glob.glob('input/*.png')
other_dir = 'output/'
for img_id, img_path in enumerate(dataset):
img = cv2.imread(img_path,0)
enhancer = ImageEnhance.Sharpness(img)
enhanced_im = enhancer.enhance(8.0)
cl2 = cv2.resize(enhanced_im, (1024,1024), interpolation = cv2.INTER_CUBIC)
cv2.imwrite(f'{other_dir}/enhanced_{img_id}.png',cl2)
You're trying to use PIL to enhance a numpy array. cv2 converts images from image paths into numpy arrays. This doesn't work with PIL image operations.
You can load the image using PIL, do the PIL enhancements then convert it to a numpy array to pass into your cv2.resize() method.
Try:
from PIL import Image
from PIL import ImageEnhance
import cv2
import glob
import numpy as np
dataset = glob.glob('input/*.png')
other_dir = 'output/'
for img_id, img_path in enumerate(dataset):
img = Image.open(img_path) # this is a PIL image
enhancer = ImageEnhance.Sharpness(img) # PIL wants its own image format here
enhanced_im = enhancer.enhance(8.0) # and here
enhanced_cv_im = np.array(enhanced_im) # cv2 wants a numpy array
cl2 = cv2.resize(enhanced_cv_im, (1024,1024), interpolation = cv2.INTER_CUBIC)
cv2.imwrite(f'{other_dir}/enhanced_{img_id}.png',cl2)
I'm trying to do OCR arabic on the following ID but I get a very noisy picture, and can't extract information from it.
Here is my attempt
import tesserocr
from PIL import Image
import pytesseract
import matplotlib as plt
import cv2
import imutils
import numpy as np
image = cv2.imread(r'c:\ahmed\ahmed.jpg')
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
gray = cv2.bilateralFilter(gray,11,18,18)
gray = cv2.GaussianBlur(gray,(5,5), 0)
kernel = np.ones((2,2), np.uint8)
gray = cv2.adaptiveThreshold(gray,255,cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
cv2.THRESH_BINARY,11,2)
#img_dilation = cv2.erode(gray, kernel, iterations=1)
#cv2.imshow("dilation", img_dilation)
cv2.imshow("gray", gray)
text = pytesseract.image_to_string(gray, lang='ara')
print(text)
with open(r"c:\ahmed\file.txt", "w", encoding="utf-8") as myfile:
myfile.write(text)
cv2.waitKey(0)
result
sample
The text for your id is in black color which makes the extraction process easy. All you need to do is threshold the dark pixels and you should be able to get the text out.
Here is a snip of the code
import cv2
import numpy as np
# load image in grayscale
image = cv2.imread('AVXjv.jpg',0)
# remove noise
dst = cv2.blur(image,(3,3))
# extract dark regions which corresponds to text
val, dst = cv2.threshold(dst,80,255,cv2.THRESH_BINARY_INV)
# morphological close to connect seperated blobs
dst = cv2.dilate(dst,None)
dst = cv2.erode(dst,None)
cv2.imshow("dst",dst)
cv2.waitKey(0)
And here is the result:
This is my output using ImageMagick TextCleaner script:
Script: textcleaner -g -e stretch -f 50 -o 30 -s 1 C:/Users/PC/Desktop/id.jpg C:/Users/PC/Desktop/out.png
Take a look here if you want to install and use TextCleaner script on Windows... It's a tutorial I made as simple as possible after few researches I made when I was in your same situation.
Now it should be very easy to detect the text and (not sure how simple) recognize it.
Someone could help me please, I want to convert my RGB images in one folder to grayscale at one time. I've been looking for some Python codes but haven't found any. I tried to do as following but it didn't work.
Here is my code:
from skimage.color import rgb2gray
from skimage.io import imread, imsave
from skimage.filters import threshold_otsu
from skimage import img_as_uint
inp_image = imread("C:/RGB/*.JPG")
img_gray = rgb2gray(inp_image)
thresh = threshold_otsu(img_gray)
binary_thresh_img = img_gray & gt; thresh
imsave("C:/Grayscale", img_as_uint(binary_thresh_img))
And it gave me following error:
OSError: [Errno 22] Invalid argument: 'C:/RGB/*.JPG'
You can get the list with the filenames with glob().
import glob
for filename in glob.glob("C:/RGB/*.JPG"):
inp_image = imread(filename)
[...]
To add to the list of solutions:
import os
from PIL import Image
ORIGIN_PATH = "./folder1/"
DESTIN_PATH = "./folder2/"
for filename in os.listdir(ORIGIN_PATH):
img = Image.open(ORIGIN_PATH + filename).convert("LA")
img.save(DESTIN_PATH + filename)
I'm using the Python Imaging Library for some very simple image manipulation, however I'm having trouble converting a greyscale image to a monochrome (black and white) image. If I save after changing the image to greyscale (convert('L')) then the image renders as you would expect. However, if I convert the image to a monochrome, single-band image it just gives me noise as you can see in the images below. Is there a simple way to take a colour png image to a pure black and white image using PIL / python?
from PIL import Image
import ImageEnhance
import ImageFilter
from scipy.misc import imsave
image_file = Image.open("convert_image.png") # open colour image
image_file= image_file.convert('L') # convert image to monochrome - this works
image_file= image_file.convert('1') # convert image to black and white
imsave('result_col.png', image_file)
from PIL import Image
image_file = Image.open("convert_image.png") # open colour image
image_file = image_file.convert('1') # convert image to black and white
image_file.save('result.png')
yields
A PIL only solution for creating a bi-level (black and white) image with a custom threshold:
from PIL import Image
img = Image.open('mB96s.png')
thresh = 200
fn = lambda x : 255 if x > thresh else 0
r = img.convert('L').point(fn, mode='1')
r.save('foo.png')
With just
r = img.convert('1')
r.save('foo.png')
you get a dithered image.
From left to right the input image, the black and white conversion result and the dithered result:
You can click on the images to view the unscaled versions.
Another option (which is useful e.g. for scientific purposes when you need to work with segmentation masks) is simply apply a threshold:
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""Binarize (make it black and white) an image with Python."""
from PIL import Image
from scipy.misc import imsave
import numpy
def binarize_image(img_path, target_path, threshold):
"""Binarize an image."""
image_file = Image.open(img_path)
image = image_file.convert('L') # convert image to monochrome
image = numpy.array(image)
image = binarize_array(image, threshold)
imsave(target_path, image)
def binarize_array(numpy_array, threshold=200):
"""Binarize a numpy array."""
for i in range(len(numpy_array)):
for j in range(len(numpy_array[0])):
if numpy_array[i][j] > threshold:
numpy_array[i][j] = 255
else:
numpy_array[i][j] = 0
return numpy_array
def get_parser():
"""Get parser object for script xy.py."""
from argparse import ArgumentParser, ArgumentDefaultsHelpFormatter
parser = ArgumentParser(description=__doc__,
formatter_class=ArgumentDefaultsHelpFormatter)
parser.add_argument("-i", "--input",
dest="input",
help="read this file",
metavar="FILE",
required=True)
parser.add_argument("-o", "--output",
dest="output",
help="write binarized file hre",
metavar="FILE",
required=True)
parser.add_argument("--threshold",
dest="threshold",
default=200,
type=int,
help="Threshold when to show white")
return parser
if __name__ == "__main__":
args = get_parser().parse_args()
binarize_image(args.input, args.output, args.threshold)
It looks like this for ./binarize.py -i convert_image.png -o result_bin.png --threshold 200:
As Martin Thoma has said, you need to normally apply thresholding. But you can do this using simple vectorization which will run much faster than the for loop that is used in that answer.
The code below converts the pixels of an image into 0 (black) and 1 (white).
from PIL import Image
import numpy as np
import matplotlib.pyplot as plt
#Pixels higher than this will be 1. Otherwise 0.
THRESHOLD_VALUE = 200
#Load image and convert to greyscale
img = Image.open("photo.png")
img = img.convert("L")
imgData = np.asarray(img)
thresholdedData = (imgData > THRESHOLD_VALUE) * 1.0
plt.imshow(thresholdedData)
plt.show()
A simple way to do it using python :
Python
import numpy as np
import imageio
image = imageio.imread(r'[image-path]', as_gray=True)
# getting the threshold value
thresholdValue = np.mean(image)
# getting the dimensions of the image
xDim, yDim = image.shape
# turn the image into a black and white image
for i in range(xDim):
for j in range(yDim):
if (image[i][j] > thresholdValue):
image[i][j] = 255
else:
image[i][j] = 0
this is how i did it its havd better results like a gray filter
from PIL import Image
img = Image.open("profile.png")
BaW = img.convert("L")
BaW.save("profileBaW.png")
BaW.show()