I have some B&W images, but in RGBa. I used skimage rgb2gray(inp_image) to convert them into grayscale. Yet they become grayscale images with alpha channel.
What do I do if I want to have those RGBa converted to grayscale without alpha channel?
You can try out this.
import cv2
image = cv2.imread('path to your image')
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
cv2.imshow('Gray image', gray)
cv2.waitKey(0)
cv2.destroyAllWindows()
for multiple images
import cv2
from os import listdir,makedirs
from os.path import isfile,join
source = r'path to source folder'
destination = r'path where you want to save'
files = [f for f in listdir(source) if isfile(join(source,f))]
for image in files:
try:
img = cv2.imread(os.path.join(source,image))
gray = cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)
dstPath = join(destination,image)
cv2.imwrite(destination,gray)
except:
print ("{} is not converted".format(image))
Related
I am doing a document reader that parse all text inside it to a google spreadsheet, this script is supposed to save time in my work, the problem is that the binary image has a lot of noise (really small points around text) that confuses pytesseract. How could i remove this noise? the code i am using to binarize the image is :
import pytesseract
import cv2
import numpy as np
import os
import re
import argparse
#binarization of images
def binarize(img):
#convert image to grayscale
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
#apply adaptive thresholding
thresh = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY_INV, 11, 2)
#return thresholded image
return thresh
#construct argument parser
parser = argparse.ArgumentParser(description='Binarize image and parse text in image to string')
parser.add_argument('-i', '--image', help='path to image', required=True)
parser.add_argument('-o', '--output', help='path to output file', required=True)
args = parser.parse_args()
# load image
img = cv2.imread(args.image)
#binarization of image
thresh = binarize(img)
#show image
cv2.imshow('image', thresh)
cv2.waitKey(0)
cv2.destroyAllWindows()
#save image
cv2.imwrite(args.output+'/imagen3.jpg', thresh)
and the result image i want to clean is :
and if i apply erosion this is the result:
which is worst than the other
EDIT: original image is :
You just need to increase your adaptive threshold arguments in Python/OpenCV.
Input:
import cv2
# read image
img = cv2.imread("petrol.png")
# convert img to grayscale
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
# do adaptive threshold on gray image
thresh = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_MEAN_C, cv2.THRESH_BINARY, 21, 25)
# write results to disk
cv2.imwrite("petrol_threshold.png", thresh)
# display it
cv2.imshow("THRESHOLD", thresh)
cv2.waitKey(0)
Results:
I am using OpenCV - 3.4.9.31
I was trying to run a very basic code to read an image, but to my surprise, the output was in RGB colorspace instead of BGR.
import cv2
folder = 'C:/Users/xxx/PycharmProjects/Images/'
picture = 'lena.png'
filename = folder + picture
Img = cv2.imread(filename)
cv2.imshow("Image", Img)
cv2.waitKey(0)
Why do I see RGB image instead of BGR ?
If you want to convert it to BGR, you can do this :
imgBGR = cv2.cvtColor(Img, cv2.COLOR_RGB2BGR)
For example this image returns Sieteary ear
While this image returns the correct answer
The only difference between the 2 images is 2 pixels in the height.
I have tried applying some threshold but didnt seem to help...
from PIL import Image
import pytesseract
pytesseract.pytesseract.tesseract_cmd = r"C:\Program Files\Tesseract-OCR\tesseract.exe"
image = Image.open(path)
print(pytesseract.image_to_string(image, lang='eng'))
You can perform some preprocessing using OpenCV. The idea is to enlarge the image with imutils, obtain a binary image using Otsu's threshold, then add a slight Gaussian blur. For optimal detection, the image should be in the form where desired text to be detected is in black with the background in white. Here's the preprocessing results for the two images:
Before -> After
The output result from Pytesseract for both images are the same
BigBootyHunter2
Code
import cv2
import pytesseract
import imutils
pytesseract.pytesseract.tesseract_cmd = r"C:\Program Files\Tesseract-OCR\tesseract.exe"
image = cv2.imread('1.jpg')
image = imutils.resize(image, width=500)
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1]
thresh = cv2.GaussianBlur(thresh, (3,3), 0)
data = pytesseract.image_to_string(thresh, lang='eng',config='--psm 6')
print(data)
cv2.imshow('thresh', thresh)
cv2.waitKey()
I'm trying to read off some stats off the cropped (manually) sections of tables in pdf files.
Here is the image I'm trying to process
The current result I get has most of the numbers but not all of the text, as seen below:
Hmuwinu'fg. cm’: -009,d1-I (F -o.761.l= .om,
Tamar wuall ma: 2 1.41(F-o.167
Tao! hr aubgrwp dimes: Nol wvwe
I've tried using interpolations other than inter-cubic during the resizing step, and played around changing the kernel size but 1x1 seems to work the best.
Here is the current code:
# import the packages
from PIL import Image
import pytesseract
import numpy as np
import argparse
import cv2
import os
# construct the argument parse and parse the arguments
ap = argparse.ArgumentParser()
ap.add_argument("-i", "--image", required=True,help="path to input image to OCR'd")
ap.add_argument("-p","--preprocess",type=str,default="thresh",help="type of preprocessing to be done")
args = vars(ap.parse_args())
#load the example image
image = cv2.imread(args["image"])
# Rescale image
image = cv2.resize(image,None,fx=1.5,fy=1.5,interpolation=cv2.INTER_CUBIC)
#Apply dilation and erosion to remove some noise
kernel = np.ones((1,1),np.uint8)
image = cv2.dilate(image,kernel,iterations=1)
image = cv2.erode(image,kernel,iterations=1)
#Convert it to grayscale
gray = cv2.cvtColor(image,cv2.COLOR_BGR2GRAY)
# check to see if we should apply thresholding to process image
if args["preprocess"] == "thresh":
gray = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY | cv2.THRESH_OTSU)[1]
# make a check to see if median blurring should be applied
elif args["preprocess"] == "blur":
gray = cv2.medianBlur(gray,3)
#write the gray scale image to a disk as a temp file so we can OCR it
filename = "{}.png".format(os.getpid())
cv2.imwrite(filename,gray)
#load the image as a PIL/pillow image, apploy OCR, then delete temp file
text = pytesseract.image_to_string(Image.open(filename))
os.remove(filename)
print(text)
# show the output images
cv2.imshow("Image",image)
cv2.imshow("Output",gray)
cv2.waitKey(0)
Any suggestions or methods are really appreciated.
I applied adaptive-threshold + bitwise-not operations and result is:
Now, when I read:
txt = pytesseract.image_to_string(bnt, config="--psm 6")
print(txt)
Result:
Hewrogenedty: Chit «0.09, die 1 (P = 0,78); If 0.0%
Teal for overall ettect: Z = 1.41 (P = 0.16)
Test tor subgroup ditlrenote: Not appliaalle
Not prefect but at least numbers are correct (If I'm not mistaken)
Code:
import cv2
import pytesseract
img = cv2.imread("Q8iIo.png")
img = cv2.resize(img, None, fx=2.5, fy=2.5,
interpolation=cv2.INTER_CUBIC)
gry = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
thr = cv2.adaptiveThreshold(gry, 255, cv2.ADAPTIVE_THRESH_MEAN_C,
cv2.THRESH_BINARY_INV, 25, 28)
bnt = cv2.bitwise_not(thr)
txt = pytesseract.image_to_string(bnt, config="--psm 6")
print(txt)
from PIL import Image
import pytesseract
import argparse
import cv2
import os
image = Image.open("C:/Users/NB/Desktop/Scan/Arti818.jpg")
#image = "C:/Users/NB/Desktop/Scan/Arti818.jpg"
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
# check to see if we should apply thresholding to preprocess the
# image
gray = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY | cv2.THRESH_OTSU)[1]
# make a check to see if median blurring should be done to remove
# noise
# write the grayscale image to disk as a temporary file so we can
# apply OCR to it
filename = "{}.png".format(os.getpid())
cv2.imwrite(filename, gray)
# load the image as a PIL/Pillow image, apply OCR, and then delete
# the temporary file
text = pytesseract.image_to_string(Image.open(filename))
os.remove(filename)
print(text)
# show the output images
cv2.imshow("Image", image)
cv2.imshow("Output", gray)
cv2.waitKey(0)
This is my code and I am getting following error:
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
TypeError: src is not a numpy array, neither a scalar
Read the docs. It clearly says:
PIL.Image.open(fp, mode='r')
Opens and identifies the given image file.
Returns: An Image object.
The object returned is of Image type, not a numpy.ndarray. If you want an array, convert image to one:
gray = cv2.cvtColor(np.asarray(image), cv2.COLOR_BGR2GRAY)