I try to write a Ocr for numbers in Python using OpenCV and the Knn-Algorithm. The code works pretty well but i want to expand my input data to move on to handwritten digits. Training / input of data works like that: You run the script, where the path of a image is, then the image opens with a rectangle around the number and you have to press the number on the keyboard. At the end it saves the classifications and flattened images in a textfile.
The problem is that it overrides the old textfiles so this data gets lost.
Is it possible / does it work to append the new and the old textfile?
This is the code:
import sys
import numpy as np
import cv2
import os
MIN_CONTOUR_AREA = 35
RESIZED_IMAGE_WIDTH = 20
RESIZED_IMAGE_HEIGHT = 30
def main():
imgTrainingNumbers = cv2.imread("training_chars.png")
if imgTrainingNumbers is None: # if image was not read successfully
print "error: image not read from file \n\n" # print error message to std out
os.system("pause") # pause so user can see error message
return # and exit function (which exits program)
# end if
imgGray = cv2.cvtColor(imgTrainingNumbers, cv2.COLOR_BGR2GRAY) # get grayscale image
imgBlurred = cv2.GaussianBlur(imgGray, (5,5), 0) # blur
# filter image from grayscale to black and white
imgThresh = cv2.adaptiveThreshold(imgBlurred, # input image
255, # make pixels that pass the threshold full white
cv2.ADAPTIVE_THRESH_GAUSSIAN_C, # use gaussian rather than mean, seems to give better results
cv2.THRESH_BINARY_INV, # invert so foreground will be white, background will be black
11, # size of a pixel neighborhood used to calculate threshold value
2) # constant subtracted from the mean or weighted mean
cv2.imshow("imgThresh", imgThresh) # show threshold image for reference
imgThreshCopy = imgThresh.copy() # make a copy of the thresh image, this in necessary b/c findContours modifies the image
imgContours, npaContours, npaHierarchy = cv2.findContours(imgThreshCopy, # input image, make sure to use a copy since the function will modify this image in the course of finding contours
cv2.RETR_EXTERNAL, # retrieve the outermost contours only
cv2.CHAIN_APPROX_SIMPLE) # compress horizontal, vertical, and diagonal segments and leave only their end points
# declare empty numpy array, we will use this to write to file later
# zero rows, enough cols to hold all image data
npaFlattenedImages = np.empty((0, RESIZED_IMAGE_WIDTH * RESIZED_IMAGE_HEIGHT))
intClassifications = [] # declare empty classifications list, this will be our list of how we are classifying our chars from user input, we will write to file at the end
# possible chars we are interested in are digits 0 through 9, put these in list intValidChars
intValidChars = [ord('0'), ord('1'), ord('2'), ord('3'), ord('4'), ord('5'), ord('6'), ord('7'), ord('8'), ord('9'),
ord('-'), ord('o'), ord('c')]
for npaContour in npaContours: # for each contour
if cv2.contourArea(npaContour) > MIN_CONTOUR_AREA: # if contour is big enough to consider
[intX, intY, intW, intH] = cv2.boundingRect(npaContour) # get and break out bounding rect
# draw rectangle around each contour as we ask user for input
cv2.rectangle(imgTrainingNumbers, # draw rectangle on original training image
(intX, intY), # upper left corner
(intX+intW,intY+intH), # lower right corner
(0, 0, 255), # red
2) # thickness
imgROI = imgThresh[intY:intY+intH, intX:intX+intW] # crop char out of threshold image
imgROIResized = cv2.resize(imgROI, (RESIZED_IMAGE_WIDTH, RESIZED_IMAGE_HEIGHT)) # resize image, this will be more consistent for recognition and storage
cv2.imshow("imgROI", imgROI) # show cropped out char for reference
cv2.imshow("imgROIResized", imgROIResized) # show resized image for reference
cv2.imshow("training_numbers.png", imgTrainingNumbers) # show training numbers image, this will now have red rectangles drawn on it
intChar = cv2.waitKey(0) # get key press
if intChar == 27: # if esc key was pressed
sys.exit() # exit program
elif intChar in intValidChars: # else if the char is in the list of chars we are looking for . . .
intClassifications.append(intChar) # append classification char to integer list of chars (we will convert to float later before writing to file)
npaFlattenedImage = imgROIResized.reshape((1, RESIZED_IMAGE_WIDTH * RESIZED_IMAGE_HEIGHT)) # flatten image to 1d numpy array so we can write to file later
npaFlattenedImages = np.append(npaFlattenedImages, npaFlattenedImage, 0) # add current flattened impage numpy array to list of flattened image numpy arrays
# end if
# end if
# end for
fltClassifications = np.array(intClassifications, np.float32) # convert classifications list of ints to numpy array of floats
npaClassifications = fltClassifications.reshape((fltClassifications.size, 1)) # flatten numpy array of floats to 1d so we can write to file later
print "\n\ntraining complete !!\n"
np.savetxt("classifications.txt", npaClassifications) # write flattened images to file
np.savetxt("flattened_images.txt", npaFlattenedImages) #
cv2.destroyAllWindows()
return
Thank you
This should fix it.
print "\n\ntraining complete !!\n"
if os.path.exists("classifications.txt") == False:
np.savetxt("classifications.txt", npaClassifications)
np.savetxt("flattened_images.txt", npaFlattenedImages)
elif os.stat("classifications.txt").st_size == 0:
np.savetxt("classifications.txt", npaClassifications)
np.savetxt("flattened_images.txt", npaFlattenedImages) #
else:
f = open("classifications.txt", 'ab')
np.savetxt(f, npaClassifications)
f.close()
e = open("flattened_images.txt", 'ab')
np.savetxt(e, npaFlattenedImages)
Related
I'm pretty new to image processing and python so bear with me
I'm trying to take a big image (5632x2048) which is basically a map of the world with provinces (ripped from Hearts of Iron 4), and each province is colored a different RGB value, and color it with a set of colors, each corresponding to a certain country. I'm currently using this code
import numpy as np
import cv2
import sqlite3
dbPath = 'PATH TO DB'
dirPath = 'PATH TO IMAGE'
con = sqlite3.connect(dbPath)
cur = con.cursor()
im = cv2.imread(dirPath)
cur.execute('SELECT * FROM Provinces ORDER BY id')
provinceTable = cur.fetchall()
for line in provinceTable:
input_rgb = [line[1], line[2], line[3]]
if line[7] == None:
output_rgb = [255,255,255]
else:
output_rgb = line[7].replace('[', '').replace(']','').split(',')
im[np.all(im == (int(input_rgb[0]), int(input_rgb[1]), int(input_rgb[2])), axis=-1)] = (int(output_rgb[0]), int(output_rgb[1]), int(output_rgb[2]))
cv2.imwrite('result.png',im)
The problem I'm running into is that it's painfully slow (50 minutes in and it hasn't finished), due to the fact I'm definitely using numpy wrong by looping through it instead of vectorizing (a concept I'm still new to and have no idea how to do). Google hasn't been very helpful either.
What's the best way to do this?
Edit: forgot to mention that the amount of values I'm replacing is pretty big (~15000)
As I mentioned in the comments, I think you'll want to use np.take(yourImage, LUT) where LUT is a Lookup Table.
So, if you make a dummy image the same shape as yours:
import numpy as np
# Make a dummy image of 5632x2048 RGB values
im = np.random.randint(0,256,(5632,2048,3), np.uint8)
that will be 34MB. Now reshape it to a tall vector of RGB values:
# Make image into a tall vector, as tall as necessary and 3 RGB values wide
v = im.reshape((-1,3))
which will be of shape (11534336, 3) and then flatten that to 24-bit values rather than three 8-bit values with np.dot()
# Make into tall vector of shape 11534336x1 rather than 11534336x3
v24 = np.dot(v.astype(np.uint32),[1,256,65536])
You will now have a 1-D vector of 24-bit pixel values with shape (11534336,)
Now create your RGB lookup table (I am making all 2^24 RGB entries here, you may need less):
RGBLUT = np.zeros((2**24,3),np.uint8)
And set up the LUT. So, supposing you want to map all colours in the original image to mid-grey (128) in the output image:
RGBLUT[:] = 128
Now do the np.dot() thing just the same as we did with the image so we get a LUT with shape (224,1) rather than shape (224,3):
LUT24 = np.dot(RGBLUT.astype(np.uint32), [1,256,65536])
Then do the actual lookup in the table:
result = np.take(LUT24, v24)
On my Mac, that take 334ms for your 5632x2048 image.
Then reshape and convert back to three 8-bit values by shifting and ANDing to undo effect of np.dot().
I am not currently in a position to test the re-assembly, but it will look pretty much like this:
BlueChannel = result & 0xff # Blue channel is bottom 8 bits
GreenChannel = (result>>8) &0 xff # Green channel is middle 8 bits
RedChannel = (result>>16) &0 xff # Red channel is top 8 bits
Now combine those three single channels into a 3-channel image:
RGB = np.dstack(RedChannel, GreenChannel, BlueChannel))
And reshape back from tall vector to dimensions of original image:
RGB = RGB.reshape(im.shape)
As regards setting up the LUT, to something more interesting than mid-grey, if you want to map say orange, i.e. rgb(255,128,0) to magenta, i.e. rgb(255,0,255) you would do something along the lines of:
LUT[np.dot([255,128,0],[1,256,65536])] = [255,0,255] # map orange to magenta
LUT[np.dot([255,255,255],[1,256,65536])] = [0,0,0] # map white to black
LUT[np.dot([0,0,0],[1,256,65536])] = [255,255,255] # map black to white
Keywords: Python, image processing, LUT, RGB LUT 24-bit LUT, lookup table.
Here is one way to do that using Numpy and Python/OpenCV. Here I change red to green.
Input:
import cv2
import numpy as np
# load image
img = cv2.imread('test_red.png')
# change color
result = img.copy()
result[np.where((result==[0,0,255]).all(axis=2))] = [0,255,0]
# save output
cv2.imwrite('test_green.png', result)
# Display various images to see the steps
cv2.imshow('result',result)
cv2.waitKey(0)
cv2.destroyAllWindows()
Result:
You can create a mask of the image first and use that to replace the colors. There's likely a pure numpy way of doing this that is faster, but I don't know it.
This code takes ~0.5 seconds to run. You should expect it to take about half a second for each color replacement.
import cv2
import numpy as np
import time
# make image
res = (5632, 2048, 3);
img = np.zeros(res, np.uint8);
# change black to white
black = (0,0,0);
white = (255,255,255);
# make a mask
start_time = time.time();
mask = cv2.inRange(img, black, black);
print("Mask Time: " + str(time.time() - start_time));
# replace color
start_time = time.time();
img[mask == 255] = white;
print("Replace Time: " + str(time.time() - start_time));
In terms of your code it'll look like this
for line in provinceTable:
input_rgb = [line[1], line[2], line[3]]
input_rgb = (int(input_rgb[0]), int(input_rgb[1]), int(input_rgb[2]))
if line[7] == None:
output_rgb = (255,255,255)
else:
output_rgb = line[7].replace('[', '').replace(']','').split(',')
output_rgb = (int(output_rgb[0]), int(output_rgb[1]), int(output_rgb[2]))
mask = cv2.inRange(im, input_rgb, input_rgb)
im[mask == 255] = output_rgb
I have a code which gives me binary images using Otsu thresholding. I am making a dataset for a U-Net, and I want to try different algorithms (global as well as local) for the same, so that I can save the "best" image. Below is the code for my image binarization.
import cv2
import numpy as np
import skimage.filters as filters
img = cv2.imread('math.png') # read the image
gray = cv2.cvtColor(img,cv2.COLOR_BGR2GRAY) # convert to gray
smooth = cv2.GaussianBlur(gray, (95,95), 0) # blur
division = cv2.divide(gray, smooth, scale=255) # divide gray by morphology image
# Add Morphology here. Dilation, Erosion, Opening, Closing or so.
sharp = filters.unsharp_mask(division, radius=1.5, amount=1.5, multichannel=False, preserve_range=False) # sharpen using unsharp masking
sharp = (255*sharp).clip(0,255).astype(np.uint8)
thresh = cv2.threshold(sharp, 0, 255, cv2.THRESH_OTSU )[1] # threshold
I am getting pretty decent results with a broader area, but I want to use cv2.namedWindow, cv2.createTrackbar, cv2.getTrackbarPos, so that I can set the values of radius, amount, kernel, dilation, erosion, etc. by using below functions.
cv2.namedWindow('Tracking Window')
cv2.createTrackbar('param1','Tracking Window',0,255,dummy) # dummy is just a dummy function which returns None
param1 = cv2.getTrackbarPos('param1','Tracking Window')
How can I get all those. Also, how can I save the image when I press s, open next image by pressing n?
Original Question was posted by me 6 months ago
The code of my solution got longer than expected, but it offers some fancy manipulation possibilities. First of all, let's the see the actual window:
There are sliders for
the morphological operation (dilate, erode, close, open),
the structuring element (rectangle, ellipse, cross), and
the kernel size (here: limited to the range 1 ... 21).
The window name reflects the current settings for the first two sliders:
When pressing s, the image is saved incorporating the current settings:
Saved image as Erosion_Ellipsoid_SLEM_11.png.
When pressing n, the next image from a list is selected.
At any time, when pressing q, the application is exited. It ends automatically after processing all images from the list.
Before and after the interactive part, you can add any operation you want, cf. the code.
And, here's the full code:
import cv2
# Collect morphological operations
morphs = [cv2.MORPH_DILATE, cv2.MORPH_ERODE, cv2.MORPH_CLOSE, cv2.MORPH_OPEN]
# Collect some texts for later
morph_texts = {
cv2.MORPH_DILATE: 'Dilation',
cv2.MORPH_ERODE: 'Erosion',
cv2.MORPH_CLOSE: 'Closing',
cv2.MORPH_OPEN: 'Opening'
}
# Collect structuring elements
slems = [cv2.MORPH_RECT, cv2.MORPH_ELLIPSE, cv2.MORPH_CROSS]
# Collect some texts for later
slem_texts = {
cv2.MORPH_RECT: 'Rectangular SLEM',
cv2.MORPH_ELLIPSE: 'Ellipsoid SLEM',
cv2.MORPH_CROSS: 'Cross SLEM'
}
# Collect images
images = [...]
# Set up maximum values for each slider
max_morph = len(morphs) - 1
max_slem = len(slems) - 1
max_ks = 21
# Set up initial values for each slider
morph = 0
slem = 0
ks = 1
# Set up initial working image
temp = None
# Set up initial window name
title_window = 'Interactive {} with {}'.format(morph_texts[morphs[morph]],
slem_texts[slems[slem]])
# Triggered when any slider is manipulated
def on_trackbar(unused):
global image, ks, morph, slem, temp, title_window
# Get current slider values
morph = cv2.getTrackbarPos('Operation', title_window)
slem = cv2.getTrackbarPos('SLEM', title_window)
ks = cv2.getTrackbarPos('Kernel size', title_window)
# Reset window name
cv2.setWindowTitle(title_window, 'Interactive {} with {}'.
format(morph_texts[morphs[morph]],
slem_texts[slems[slem]]))
# Get current morphological operation and structuring element
op = morphs[morph]
sl = cv2.getStructuringElement(slems[slem], (ks, ks))
# Actual morphological operation
temp = cv2.morphologyEx(image.copy(), op, sl)
# Show manipulated image with current settings
cv2.imshow(title_window, temp)
# Iterate images
for image in images:
# Here go your steps before the interactive part
# ...
image = cv2.threshold(cv2.cvtColor(image.copy(), cv2.COLOR_BGR2GRAY),
0, 255, cv2.THRESH_OTSU)[1]
# Here starts the interactive part
cv2.namedWindow(title_window)
cv2.createTrackbar('Operation', title_window, morph, max_morph, on_trackbar)
cv2.createTrackbar('SLEM', title_window, slem, max_slem, on_trackbar)
cv2.createTrackbar('Kernel size', title_window, ks, max_ks, on_trackbar)
cv2.setTrackbarMin('Kernel size', title_window, 1)
on_trackbar(0)
k = cv2.waitKey(0)
# Exit everytime on pressing q
while k != ord('q'):
# Save image on pressing s
if k == ord('s'):
# Here go your steps after the interactive part, but before saving
# ...
filename = '{} {} {}.png'.\
format(morph_texts[morphs[morph]],
slem_texts[slems[slem]],
ks).replace(' ', '_')
cv2.imwrite(filename, temp)
print('Saved image as {}.'.format(filename))
# Go to next image on pressing n
elif k == ord('n'):
print('Next image')
break
# Continue if any other key was pressed
k = cv2.waitKey(0)
# Actual exiting
if k == ord('q'):
break
Hopefully, the code is self-explanatory. If not, don't hesitate to ask questions. You should be able to easily add every slider you additionally need by yourself, e.g. for the filters.unsharp_mask stuff.
----------------------------------------
System information
----------------------------------------
Platform: Windows-10-10.0.16299-SP0
Python: 3.9.1
OpenCV: 4.5.1
----------------------------------------
Hi I need to write a program that remove demarcation from gray scale image(image with text in it)
i read about thresholding and blurring but still i dont see how can i do it.
my image is an image of a hebrew text like that:
and i need to remove the demarcation(assuming that the demarcation is the smallest element in the image) the output need to be something like that
I want to write the code in python using opencv, what topics do i need to learn to be able to do that, and how?
thank you.
Edit:
I can use only cv2 functions
The symbols you want to remove are significantly smaller than all other shapes, you can use that to determine witch ones to remove.
First use threshold to convert the image to binary. Next, you can use findContours to detect the shapes and then contourArea to determine if the shape is larger than a threshold.
Finally you can can create a mask to remove the unwanted shapes, draw the larger symbols on a new image or draw the smaller symbols in white over the original symbols in the original image - making them disappear. I used that last technique in the code below.
Result:
Code:
import cv2
# load image as grayscale
img = cv2.imread('1MioS.png',0)
# convert to binary. Inverted, so you get white symbols on black background
_ , thres = cv2.threshold(img, 200, 255, cv2.THRESH_BINARY_INV)
# find contours in the thresholded image (this gives all symbols)
contours, hierarchy = cv2.findContours(thres, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
# loop through the contours, if the size of the contour is below a threshold,
# draw a white shape over it in the input image
for cnt in contours:
if cv2.contourArea(cnt) < 250:
cv2.drawContours(img,[cnt],0,(255),-1)
# display result
cv2.imshow('res', img)
cv2.waitKey(0)
cv2.destroyAllWindows()
Update
To find the largest contour, you can loop through them and keep track of the largest value:
maxArea = 0
for cnt in contours:
currArea = cv2.contourArea(cnt)
if currArea > maxArea:
maxArea = currArea
print(maxArea)
I also whipped up a little more complex version, that creates a sorted list of the indexes and sizes of the contours. Then it looks for the largest relative difference in size of all contours, so you know which contours are 'small' and 'large'. I do not know if this works for all letters / fonts.
# create a list of the indexes of the contours and their sizes
contour_sizes = []
for index,cnt in enumerate(contours):
contour_sizes.append([index,cv2.contourArea(cnt)])
# sort the list based on the contour size.
# this changes the order of the elements in the list
contour_sizes.sort(key=lambda x:x[1])
# loop through the list and determine the largest relative distance
indexOfMaxDifference = 0
currentMaxDifference = 0
for i in range(1,len(contour_sizes)):
sizeDifference = contour_sizes[i][1] / contour_sizes[i-1][1]
if sizeDifference > currentMaxDifference:
currentMaxDifference = sizeDifference
indexOfMaxDifference = i
# loop through the list again, ending (or starting) at the indexOfMaxDifference, to draw the contour
for i in range(0, indexOfMaxDifference):
cv2.drawContours(img,contours,contour_sizes[i][0] ,(255),-1)
To get the background color you can do use minMaxLoc. This returns the lowest color value and it's position of an image (also the max value, but you don't need that). If you apply it to the thresholded image - where the background is black -, it will return the location of a background pixel (big odds it will be (0,0) ). You can then look up this pixel in the original color image.
# get the location of a pixel with background color
min_val, _, min_loc, _ = cv2.minMaxLoc(thres)
# load color image
img_color = cv2.imread('1MioS.png')
# get bgr values of background
b,g,r = img_color[min_loc]
# convert from numpy object
background_color = (int(b),int(g),int(r))
and then to draw the contours
cv2.drawContours(img_color,contours,contour_sizes[i][0],background_color,-1)
and of course
cv2.imshow('res', img_color)
This looks like a problem for template matching since you have what looks like a known font and can easily understand what the characters and/or demarcations are. Check out https://opencv-python-tutroals.readthedocs.io/en/latest/py_tutorials/py_imgproc/py_template_matching/py_template_matching.html
Admittedly, the tutorial talks about finding the match; modification is up to you. In that case, you know the exact shape of the template itself, so using that information along with the location of the match, just overwrite the image data with the appropriate background color (based on the examples above, 255).
You can solve it by removing all the small clusters.
I found a Python solution (using OpenCV) here.
For supporting smaller fonts, I added the following heuristic:
"The largest size of the demarcation cluster is 1/500 of the largest letter cluster".
The heuristic can be refined, by statistical analysts (or improved by other heuristics, such as demarcation locations relative to the letters).
import numpy as np
import cv2
I = cv2.imread('Goodluck.png', cv2.IMREAD_GRAYSCALE)
J = 255 - I # Invert I
img = cv2.threshold(J, 127, 255, cv2.THRESH_BINARY)[1] # Convert to binary
# https://answers.opencv.org/question/194566/removing-noise-using-connected-components/
nlabel,labels,stats,centroids = cv2.connectedComponentsWithStats(img, connectivity=8)
labels_small = []
areas_small = []
# Find largest cluster:
max_size = np.max(stats[:, cv2.CC_STAT_AREA])
thresh_size = max_size / 500 # Set the threshold to maximum cluster size divided by 500.
for i in range(1, nlabel):
if stats[i, cv2.CC_STAT_AREA] < thresh_size:
labels_small.append(i)
areas_small.append(stats[i, cv2.CC_STAT_AREA])
mask = np.ones_like(labels, dtype=np.uint8)
for i in labels_small:
I[labels == i] = 255
cv2.imshow('I', I)
cv2.waitKey(0)
Here is a MATLAB code sample (kept threshold = 200):
clear
I = imbinarize(rgb2gray(imread('בהצלחה.png')));
figure;imshow(I);
J = ~I;
%Clustering
CC = bwconncomp(J);
%Cover all small clusters with zewros.
for i = 1:CC.NumObjects
C = CC.PixelIdxList{i}; %Cluster coordinates.
%Fill small clusters with zeros.
if numel(C) < 200
J(C) = 0;
end
end
J = ~J;
figure;imshow(J);
Result:
I wish to create trained data for vehicle number detection. It is assumed that The vehicle number consists of digits 0 to 9 with special character. Please refer the example imagevehicle plate image. I am using the this image for creating trained data training image. The python code for generating trained data is given below.
import sys
import numpy as np
import cv2
import os
# module level variables ##########################################################################
MIN_CONTOUR_AREA = 100
RESIZED_IMAGE_WIDTH = 20
RESIZED_IMAGE_HEIGHT = 30
###################################################################################################
def main():
imgTrainingNumbers = cv2.imread("training1.png") # read in training numbers image
if imgTrainingNumbers is None: # if image was not read successfully
print "error: image not read from file \n\n" # print error message to std out
os.system("pause") # pause so user can see error message
return # and exit function (which exits program)
# end if
imgGray = cv2.cvtColor(imgTrainingNumbers, cv2.COLOR_BGR2GRAY) # get grayscale image
imgBlurred = cv2.GaussianBlur(imgGray, (5,5), 0) # blur
# filter image from grayscale to black and white
imgThresh = cv2.adaptiveThreshold(imgBlurred, # input image
255, # make pixels that pass the threshold full white
cv2.ADAPTIVE_THRESH_GAUSSIAN_C, # use gaussian rather than mean, seems to give better results
cv2.THRESH_BINARY_INV, # invert so foreground will be white, background will be black
11, # size of a pixel neighborhood used to calculate threshold value
2) # constant subtracted from the mean or weighted mean
cv2.imshow("imgThresh", imgThresh) # show threshold image for reference
imgThreshCopy = imgThresh.copy() # make a copy of the thresh image, this in necessary b/c findContours modifies the image
_,npaContours, npaHierarchy = cv2.findContours(imgThreshCopy, # input image, make sure to use a copy since the function will modify this image in the course of finding contours
cv2.RETR_EXTERNAL, # retrieve the outermost contours only
cv2.CHAIN_APPROX_SIMPLE) # compress horizontal, vertical, and diagonal segments and leave only their end points
# declare empty numpy array, we will use this to write to file later
# zero rows, enough cols to hold all image data
npaFlattenedImages = np.empty((0, RESIZED_IMAGE_WIDTH * RESIZED_IMAGE_HEIGHT))
intClassifications = [] # declare empty classifications list, this will be our list of how we are classifying our chars from user input, we will write to file at the end
# possible chars we are interested in are digits 0 through 9, put these in list intValidChars
intValidChars = [ord('0'), ord('1'), ord('2'), ord('3'), ord('4'), ord('5'), ord('6'), ord('7'), ord('8'), ord('9'),ord('♿')]
for npaContour in npaContours: # for each contour
if cv2.contourArea(npaContour) > MIN_CONTOUR_AREA: # if contour is big enough to consider
[intX, intY, intW, intH] = cv2.boundingRect(npaContour) # get and break out bounding rect
# draw rectangle around each contour as we ask user for input
cv2.rectangle(imgTrainingNumbers, # draw rectangle on original training image
(intX, intY), # upper left corner
(intX+intW,intY+intH), # lower right corner
(0, 0, 255), # red
2) # thickness
imgROI = imgThresh[intY:intY+intH, intX:intX+intW] # crop char out of threshold image
imgROIResized = cv2.resize(imgROI, (RESIZED_IMAGE_WIDTH, RESIZED_IMAGE_HEIGHT)) # resize image, this will be more consistent for recognition and storage
cv2.imshow("imgROI", imgROI) # show cropped out char for reference
cv2.imshow("imgROIResized", imgROIResized) # show resized image for reference
cv2.imshow("training_numbers.png", imgTrainingNumbers) # show training numbers image, this will now have red rectangles drawn on it
intChar = cv2.waitKey(0) # get key press
if intChar == 27: # if esc key was pressed
sys.exit() # exit program
elif intChar in intValidChars: # else if the char is in the list of chars we are looking for . . .
intClassifications.append(intChar) # append classification char to integer list of chars (we will convert to float later before writing to file)
npaFlattenedImage = imgROIResized.reshape((1, RESIZED_IMAGE_WIDTH * RESIZED_IMAGE_HEIGHT)) # flatten image to 1d numpy array so we can write to file later
npaFlattenedImages = np.append(npaFlattenedImages, npaFlattenedImage, 0)
cv2.imshow("imgROI", npaFlattenedImages)
# add current flattened impage numpy array to list of flattened image numpy arrays
# end if
# end if
# end for
fltClassifications = np.array(intClassifications, np.float32) # convert classifications list of ints to numpy array of floats
npaClassifications = fltClassifications.reshape((fltClassifications.size, 1))
print npaClassifications
# flatten numpy array of floats to 1d so we can write to file later
print "\n\ntraining complete !!\n"
# np.savetxt(str(raw_input('Insert the name of the file (ex: "a.txt"): ')), npaClassifications,delimiter="\t") # write flattened images to file
# np.savetxt((raw_input('a.txt')), npaFlattenedImages)
np.savetxt('testimage_1.txt',npaFlattenedImages) #
np.savetxt('test1_1.txt',npaClassifications)
cv2.destroyAllWindows()
#save(data)
#'a.txt'
#return data # remove windows from memory
#def save(data):
# np.savetxt(str(input('Insert name of the list(ex: a.txt): ')), npaFlattenedImages)
# return
if __name__ == "__main__":
main()
I got the error as
intValidChars = [ord('0'), ord('1'), ord('2'), ord('3'), ord('4'), ord('5'), ord('6'), ord('7'), ord('8'), ord('9'),ord('♿')]
TypeError: ord() expected a character, but string of length 3 found
How can i overcome this error?
In python 2 you can try:
u'♿'
For example:
>>> ord(u'♿')
9855
Since you are using the ord() result as a mapping between key presses and labels, you can't use ♿ as a key press. I would try to use another ascii char to identify that case (like ord('a')).
You could replace that a later when reading from the keyboard:
elif intChar in intValidChars:
if intChar == ord('a'):
intChar = ord('♿')
intClassifications.append(intChar)
But probably, the training function doesn't even need that, as long as you remember to translate the prediction result.
i am trying to recreate a picture. I take a picture edging it and save it. after i made it grayscale and save it. Found the common pixels of the two images and I am trying to recreate again the picture and i get this error. It is a picture of a road and i am trying to keep only the white lanes. so after i compare the edged picture with the first picture most common pixels are the white ones that represent the lanes of the road.
The error is thrown in line marked <———-- near the end of the code listing
TypeError: too many data entries
newpic is the list in that form `[1,1,1,1,...,1]
here is my code and explaining every part. if you have any other suggestion how to achieve the result i want please say it
#LIBRARIES
import cv2
import numpy as np
import matplotlib as mpl
from matplotlib import pyplot as plt
#read and display the image
img = cv2.imread("road.jpg")
#original picture show
cv2.imshow("Window Name",img)
# edging the image
edges = cv2.Canny(img,255,255)
#show the canny picture
cv2.imshow("Window Name",edges)
#save the canny picture First argument is the file name, second
argument is the image you want to save.
cv2.imwrite('canny.png',edges)
#making the image an array
from PIL import Image
#read the pciture
img = Image.open('road.jpg').convert('LA')
#save it
img.save('greyscale.png')
#open the edited
im=Image.open("greyscale.png")
#make it an array
pix_val = list(im.getdata())
pix_val_flat = [x for sets in pix_val for x in sets]
# pix_val_flat has the pixels for out first image without edging
#print the array
#print (pix_val_flat[125]);
#get the lenght of the array
lenght=len(pix_val_flat)
#print the array
#print(lenght);
#take the canny picture and make it grayscale
edge = Image.open('canny.png').convert('LA')
#make it array
pix_val1 = list(edge.getdata())
pix_val_flat1 = [x for sets in pix_val for x in sets]
#get the lenght of the array
#lenght1=len(pix_val_flat1)
#prnt the array
#print(lenght);
#print the array
#print (pix_val_flat1[125]);
print(lenght)
newpic = [0]*lenght
lenght2=len(newpic)
print (newpic)
for c1 in range(0,lenght,3):
if pix_val_flat[c1]==pix_val_flat1[c1] and
pix_val_flat[c1+1]==pix_val_flat1[c1+1] and
pix_val_flat[c1+2]==pix_val_flat1[c1+2]:
newpic[c1]= pix_val_flat1[c1]
newpic[c1+1]= pix_val_flat1[c1+1]
newpic[c1+2]= pix_val_flat1[c1+2]
array = np.array(newpic, dtype=np.uint8)
print (array)
im2 = Image.new(im.mode, im.size)
im2.putdata (newpic) ---------------------> here i get the error
new_image = Image.fromarray(array)
new_image.save('hello.png')
cv2.waitKey(0)
cv2.destroyAllWindows()
In this case it means that your putting more data than the size you set before.
You can check the length of data you put in with len(the_list_of_data), so you'll see length gets double every time you put data (even if you overwrite). You can set the_list_of_data length to 0 and then fill it with data. This error occurs in loops too.