I'm trying to find the pixels closest to an RGB value of (0,0,255). I'm trying to calculate the distance of the pixel in RGB values to that value using a 3D Pythagoras calculation, add them to a list, and then return the X and Y coordinates of the values that have the lowest distance. Here's what I have:
# import the necessary packages
import numpy as np
import scipy.spatial as sp
import matplotlib.pyplot as plt
import cv2
import math
from PIL import Image, ImageDraw, ImageFont
background = Image.open("test.tif").convert('RGBA')
png = background.save("test.png")
retina = cv2.imread("test.png")
#convert BGR to RGB image
retina = cv2.cvtColor(retina, cv2.COLOR_BGR2RGB)
h,w,bpp = np.shape(retina)
min1_d = float('inf')
min1_coords = (None, None)
min2_d = float('inf')
min2_coords = (None, None)
for py in range(0,h):
for px in range (0,w):
r = retina[py][px][0]
g = retina[py][px][1]
b = retina[py][px][2]
d = math.sqrt(((r-0)**2) + ((g-0)**2) + ((255-b)**2))
print(str(r) + "," + str(g) + "," + str(b) + ",," + str(px) + "," + str(py) + ",," + str(d))
if d < min1_d:
min2_d = min1_d
min2_coords = min1_coords
min1_d = d
min1_coords = (px, py)
elif d < min2_d: # if it's not the smallest, check if it's the second smallest
min2_d = d
min2_coords = (px, py)
print(min1_coords, min2_coords)
width, height = background.size
x_max = int(width)
y_max = int(height)
img = Image.new('RGBA', (x_max, y_max), (255,255,255,0))
draw = ImageDraw.Draw(img)
draw.point(min1_coords, (0,0,255))
draw.point(min2_coords, (0,0,255))
foreground = img
background.paste(foreground, (0, 0), foreground)
foreground.save("test_bluer.png")
background.save("test_bluer_composite.png")
How can I speed up my for loops? I believe this answer is on the right track, but I'm not sure how to implement the px and py variables while slicing as this answer shows.
You can speed up your code by vectorizing the for loop:
r = retina[:,:,0]
g = retina[:,:,1]
b = retina[:,:,2]
d = np.sqrt(r**2 + g**2 + (255-b)**2)
You can find the coordinates of the minimum with:
min_coords = np.unravel_index(np.argmin(d), np.shape(d))
If you want to find the second smallest distance just change the previous minimum to be a larger distance:
d[min_coords[0],min_coords[1]] = np.inf
min_coords = np.unravel_index(np.argmin(d), np.shape(d))
# min_coords now has the second smallest distance
Here is one way in Python/OpenCV.
Read the input
Define your color (pure blue)
Create an image of the color desired
Compute an image representing the rmse difference
Threshold the rmse image
Get the coordinates of all white pixels
Input:
import cv2
import numpy as np
# read image
img = cv2.imread('red_blue2.png')
# reference color (blue)
color = (255,0,0)
# create image the size of the input, but with blue color
ref = np.full_like(img, color)
# compute rmse difference image
diff = cv2.absdiff(img, ref)
diff2 = diff*diff
b,g,r = cv2.split(diff)
rmse = np.sqrt( ( b+g+r )/3 )
# threshold for pixels within 1 graylevel different
thresh = cv2.threshold(rmse, 1, 255, cv2.THRESH_BINARY_INV)[1]
# get coordinates
coords = np.argwhere(thresh == 255)
for coord in coords:
print(coord[1],coord[0])
# write results to disk
cv2.imwrite("red_blue2_rmse.png", (20*rmse).clip(0,255).astype(np.uint8))
cv2.imwrite("red_blue2_thresh.png", thresh)
# display it
cv2.imshow("rmse", rmse)
cv2.imshow("thresh", thresh)
cv2.waitKey(0)
RMSE Image (scaled in brightness by 20x for viewing):
Thresholded rmse image:
Coordinates:
127 0
128 0
127 1
128 1
127 2
128 2
127 3
128 3
127 4
128 4
127 5
128 5
127 6
128 6
127 7
128 7
127 8
128 8
127 9
128 9
127 10
128 10
127 11
128 11
127 12
128 12
127 13
128 13
127 14
128 14
127 15
128 15
127 16
128 16
127 17
128 17
127 18
128 18
127 19
128 19
127 20
128 20
127 21
128 21
127 22
128 22
127 23
128 23
127 24
128 24
127 25
128 25
127 26
128 26
127 27
128 27
127 28
128 28
127 29
128 29
127 30
128 30
127 31
128 31
127 32
128 32
127 33
128 33
127 34
128 34
127 35
128 35
127 36
128 36
127 37
128 37
127 38
128 38
127 39
128 39
127 40
128 40
127 41
128 41
127 42
128 42
127 43
128 43
127 44
128 44
127 45
128 45
127 46
128 46
127 47
128 47
127 48
128 48
127 49
128 49
As commented, subtract rgb value from array, square, average(or sum) pixel rgb values, get minimum.
Here is my variant:
import numpy
rgb_value = numpy.array([17,211,51])
img = numpy.random.randint(255, size=(1000,1000,3),dtype=numpy.uint8)
img_goal = numpy.average(numpy.square(numpy.subtract(img, rgb_value)), axis=2)
result = numpy.where(img_goal == numpy.amin(img_goal))
result_list = [result[0].tolist(),result[1].tolist()]
for i in range(len(result_list[0])):
print("RGB needed:", rgb_value)
print("Pixel:", result_list[0][i], result_list[1][i])
print("RGB gotten:", img[result_list[0][i]][result_list[1][i]])
print("Distance to value:", img_goal[result_list[0][i]][result_list[1][i]])
There can be multiple results with the same values.
Related
I'm using WinHex looking at a gif:
bocchi,gif
but when I load the image using PIL.Image.open(), I find that the size in the image's info is not equal to what I computed.
Here is what WinHex shown:
Offset 0 1 2 3 4 5 6 7 8 9 A B C D E F
00000000 47 49 46 38 39 61 68 01 40 01 F5 00 00 12 1D 25 GIF89ah # ? %
00000010 22 1E 21 17 23 2C " ! #,
Here is how do I compute it's width: 0x68 + 0x01 * 256 = 353 px
And here is PIL's output:
PIL output
>>> from PIL import Image
>>> img = Image.open(fp)
>>> img.size
(360, 320)
I want to figure out why this happend?
The 0x68 is hex, so you need:
68hex + 256 = 104 + 256 = 360
I have changed the column names and have added new columns too.
I am having a numpy array that I have to fill in the respective dataframe columns.
I am getting a delayed response in filling the dataframe using the following code:
import pandas as pd
import numpy as np
df = pd.read_csv("sample.csv")
df = df.tail(1000)
DISPLAY_IN_TRAINING = []
Slice_Middle_Piece_X = slice(None,-1, None)
Slice_Middle_Piece_Y = slice(-1, None)
input_slicer = slice(None, None)
output_slice = slice(None, None)
seq_len = 15 # choose sequence length
n_steps = seq_len - 1
Disp_Data = df
def Generate_DataSet(stock,
df_clone,
seq_len
):
global DISPLAY_IN_TRAINING
data_raw = stock.values # convert to numpy array
data = []
len_data_raw = data_raw.shape[0]
for index in range(0, len_data_raw - seq_len + 1):
data.append(data_raw[index: index + seq_len])
data = np.array(data);
test_set_size = int(np.round(30 / 100 * data.shape[0]));
train_set_size = data.shape[0] - test_set_size;
x_train, y_train = Get_Data_Chopped(data[:train_set_size])
print("Training Sliced Successful....!")
df_train_candle = df_clone[n_steps : train_set_size + n_steps]
if len(DISPLAY_IN_TRAINING) == 0:
DISPLAY_IN_TRAINING = list(df_clone)
df_train_candle.columns = DISPLAY_IN_TRAINING
return [x_train, y_train, df_train_candle]
def Get_Data_Chopped(data_related_to):
x_values = []
y_values = []
for index,iter_values in enumerate(data_related_to):
x_values.append(iter_values[Slice_Middle_Piece_X,input_slicer])
y_values.append([item for sublist in iter_values[Slice_Middle_Piece_Y,output_slice] for item in sublist])
x_values = np.asarray(x_values)
y_values = np.asarray(y_values)
return [x_values,y_values]
x_train, y_train, df_train_candle = Generate_DataSet(df,
Disp_Data,
seq_len
)
df_train_candle.reset_index(drop = True, inplace = True)
df_columns = list(df_train_candle)
df_outputs_name = []
OUTPUT_COLUMN = df.columns
for output_column_name in OUTPUT_COLUMN:
df_outputs_name.append(output_column_name + "_pred")
for i in range(len(df_columns)):
if df_columns[i] == output_column_name:
df_columns[i] = output_column_name + "_orig"
break
df_train_candle.columns = df_columns
df_pred_names = pd.DataFrame(columns = df_outputs_name)
df_train_candle = df_train_candle.join(df_pred_names, how="outer")
for row_index, row_value in enumerate(y_train):
for valueindex, output_label in enumerate(OUTPUT_COLUMN):
df_train_candle.loc[row_index, output_label + "_orig"] = row_value[valueindex]
df_train_candle.loc[row_index, output_label + "_pred"] = row_value[valueindex]
print(df_train_candle.head())
The shape of my y_train is (195, 24) and the dataframe shape is (195, 48). Now I am trying to optimize and make the process work faster. The y_train may change shape to say (195, 1) or (195, 5).
So please can someone tell me what other way (optimized way) for doing the above process? I want a general solution that could fit anything without loosing the data integrity and is faster too.
If teh data size increases from 1000 to 2000 the process become slow. Please advise how to make it faster.
Sample Data df looks like this with shape (1000, 8)
A B C D E F G H
64272 195 215 239 272 22 11 33 55
64273 196 216 240 273 22 11 33 55
64274 197 217 241 274 22 11 33 55
64275 198 218 242 275 22 11 33 55
64276 199 219 243 276 22 11 33 55
The output looks like this:
A_orig B_orig C_orig D_orig E_orig F_orig G_orig H_orig A_pred B_pred C_pred D_pred E_pred F_pred G_pred H_pred
0 10 30 54 87 22 11 33 55 10 30 54 87 22 11 33 55
1 11 31 55 88 22 11 33 55 11 31 55 88 22 11 33 55
2 12 32 56 89 22 11 33 55 12 32 56 89 22 11 33 55
3 13 33 57 90 22 11 33 55 13 33 57 90 22 11 33 55
4 14 34 58 91 22 11 33 55 14 34 58 91 22 11 33 55
Please generate csv columns with 1000 or more lines and see that the program becomes slower. I want to make it faster. I hope this is good to go for understanding.
I'm trying to read text from an image, using OpenCV and Pytesseract, but with poor results.
The image I'm interested in reading the text is: https://www.lubecreostorepratolapeligna.it/gb/img/logo.png
This is the code I am using:
pytesseract.pytesseract.tesseract_cmd = r'D:\Program Files\pytesseract\tesseract.exe'
image = cv2.imread(path_to_image)
# converting image into gray scale image
gray_image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
cv2.imshow('grey image', gray_image)
cv2.waitKey(0)
# converting it to binary image by Thresholding
# this step is require if you have colored image because if you skip this part
# then tesseract won't able to detect text correctly and this will give incorrect result
threshold_img = cv2.threshold(gray_image, 0, 255, cv2.THRESH_BINARY | cv2.THRESH_OTSU)[1]
# display image
cv2.imshow('threshold image', threshold_img)
# Maintain output window until user presses a key
cv2.waitKey(0)
# Destroying present windows on screen
cv2.destroyAllWindows()
# now feeding image to tesseract
text = pytesseract.image_to_string(threshold_img)
print(text)
The result of the execution is : ["cu"," ","LUBE"," ","STORE","PRATOLA PELIGNA"]
But the result should be these 7 words: ["cucine", "LUBE", "CREO", "kitchens", "STORE", "PRATOLA", "PELIGNA"]
Is there anyone who could help me to solve this problem ?
Edit, 17.12.2020: Using preprocessing now it recognizes all, but the "O" in CREO. See the stages in ocr8.py. Then ocr9.py demonstrates (but not automated yet) finding the lines of text by the coordinates returned from pytesseract.image_to_boxes(), approcimate size of the letters and inter-symbol distance, then extrapolating one step ahead and searching for a single character (--psm 8).
It happened that Tesseract had actually recognized the "O" in CREO, but it read it as ♀, probably confused by the little "k" below etc.
Since it is a rare and "strange"/unexpected symbol, it could be corrected - replaced automatically (see the function Correct()).
There is a technical detail: Tesseract returns the ANSI/ASCII symbol 12, (0x0C) while the code in my editor was in Unicode/UTF-8 - 9792. So I coded it inside as chr(12).
The latest version: ocr9.py
You mentioned that PRATOLA and PELIGNA have to be given sepearately - just split by " ":
splitted = text.split(" ")
RECOGNIZED
CUCINE
LUBE
STORE
PRATOLA PELIGNA
CRE [+O with correction and extrapolation of the line]
KITCHENS
...
C 39 211 47 221 0
U 62 211 69 221 0
C 84 211 92 221 0
I 107 211 108 221 0
N 123 211 131 221 0
E 146 211 153 221 0
L 39 108 59 166 0
U 63 107 93 166 0
B 98 108 128 166 0
E 133 108 152 166 0
S 440 134 468 173 0
T 470 135 499 173 0
O 500 134 539 174 0
R 544 135 575 173 0
E 580 135 608 173 0
P 287 76 315 114 0
R 319 76 350 114 0
A 352 76 390 114 0
T 387 76 417 114 0
O 417 75 456 115 0
L 461 76 487 114 0
A 489 76 526 114 0
P 543 76 572 114 0
E 576 76 604 114 0
L 609 76 634 114 0
I 639 76 643 114 0
G 649 75 683 115 0
N 690 76 722 114 0
A 726 76 764 114 0
C 21 30 55 65 0
R 62 31 93 64 0
E 99 31 127 64 0
K 47 19 52 25 0
I 61 19 62 25 0
T 71 19 76 25 0
C 84 19 89 25 0
H 96 19 109 25 0
E 113 19 117 25 0
N 127 19 132 25 0
S 141 19 145 22 0
These are from getting "boxes".
Initial message:
I guess that for the area where "cucine" is, an adaptive threshold may segment it better or maybe applying some edge detection first.
Kitchens seems very small, what about trying to enlarge that area/distance.
For the CREO, I guess it's confused with the big and small size of adjacent captions.
For the "O" in creo, you may apply dilate in order to close the gap of the "O".
Edit: I played a bit, but without Tesseract and it needs more work. My goal was to make the letters more contrasting, may need some of these processings to be applied selectively only on the Cucine, maybe applying the recognition in two passes. When getting those partial words "Cu", apply adaptive threshold etc. (below) and OCR on a top rectangle around "CU..."
Binary Threshold:
Adaptive Threshold, Median blur (to clean noise) and invert:
Dilate connects small gaps, but it also destroys detail.
import cv2
import numpy as np
#pytesseract.pytesseract.tesseract_cmd = r'D:\Program Files\pytesseract\tesseract.exe'
path_to_image = "logo.png"
#path_to_image = "logo1.png"
image = cv2.imread(path_to_image)
h, w, _ = image.shape
w*=3; h*=3
w = (int)(w); h = (int) (h)
image = cv2.resize(image, (w,h), interpolation = cv2.INTER_AREA) #Resize 3 times
# converting image into gray scale image
gray_image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
cv2.imshow('grey image', gray_image)
cv2.waitKey(0)
# converting it to binary image by Thresholding
# this step is require if you have colored image because if you skip this part
# then tesseract won't able to detect text correctly and this will give incorrect result
#threshold_img = cv2.threshold(gray_image, 0, 255, cv2.THRESH_BINARY | cv2.THRESH_OTSU)[1]
# display image
threshold_img = cv2.adaptiveThreshold(gray_image, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
cv2.THRESH_BINARY,13,3) #cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 11,2)[1]
cv2.imshow('threshold image', threshold_img)
cv2.waitKey(0)
#threshold_img = cv2.GaussianBlur(threshold_img,(3,3),0)
#threshold_img = cv2.GaussianBlur(threshold_img,(3,3),0)
threshold_img = cv2.medianBlur(threshold_img,5)
cv2.imshow('medianBlur', threshold_img)
cv2.waitKey(0)
threshold_img = cv2.bitwise_not(threshold_img)
cv2.imshow('Invert', threshold_img)
cv2.waitKey(0)
#kernel = np.ones((1, 1), np.uint8)
#threshold_img = cv2.dilate(threshold_img, kernel)
#cv2.imshow('Dilate', threshold_img)
#cv2.waitKey(0)
cv2.imshow('threshold image', thrfeshold_img)
# Maintain output window until user presses a key
cv2.waitKey(0)
# Destroying present windows on screen
cv2.destroyAllWindows()
# now feeding image to tesseract
text = pytesseract.image_to_string(threshold_img)
print(text)
I am writing a script to encrypt and decrypt an image in python3 using PIL. Here I am converting the image into a numpy array and then multiplying every element of the array with 10. Now I noticed that the default function in PIL fromarray() is converting every element of the array to the mod of 256 if its larger than the 255, so when I am trying to retrieve the original value of the matrix I'm not getting the original one. For example, if the original value is 40 then its 10 times is 400 so the fromarray() is making it as 400 mod 256, which will give 144. Now if I add 256 to 144 I will have 400 and then divided by 10 will give me 40. But if the value is 54 then 10times is 540 and 540 mod 256 is 28. Now to get back the original value I need to add 256 two times which will give me 540. 540 isn't the only number which will give me 28 when I will mod it with 256. So I will never know when to add 256 one time and when two times or more. Is there any way I can make it stop of replacing every element of the matrix with its mod of 256?
from PIL import Image
from numpy import *
from pylab import *
#encryption
img1 = (Image.open('image.jpeg').convert('L'))
img1.show() #displaying the image
img = array(Image.open('image.jpeg').convert('L'))
a,b = img.shape
print(img)
print((a,b))
tup = a,b
for i in range (0, tup[0]):
for j in range (0, tup[1]):
img[i][j]= img[i][j]*10 #converting every element of the original array to its 10times
print(img)
imgOut = Image.fromarray(img)
imgOut.show()
imgOut.save('img.jpeg')
#decryption
img2 = (Image.open('img.jpeg'))
img2.show()
img3 = array(Image.open('img.jpeg'))
print(img3)
a1,b1 = img3.shape
print((a1,b1))
tup1 = a1,b1
for i1 in range (0, tup1[0]):
for j1 in range (0, tup1[1]):
img3[i1][j1]= ((img3[i1][j1])/10) #reverse of encryption
print(img3)
imgOut1 = Image.fromarray(img3)
imgOut1.show()
part of the original matrix before multiplying with 10 :
[41 42 45 ... 47 41 33]
[41 43 45 ... 44 38 30]
[41 42 46 ... 41 36 30]
[43 43 44 ... 56 56 55]
[45 44 45 ... 55 55 54]
[46 46 46 ... 53 54 54]
part of the matrix after multiplying with 10 :
[[154 164 194 ... 214 154 74]
[154 174 194 ... 184 124 44]
[154 164 204 ... 154 104 44]
[174 174 184 ... 48 48 38]
[194 184 194 ... 38 38 28]
[204 204 204 ... 18 28 28]
part of the expected matrix after dividing by 10 :
[41 42 45 ... 47 41 33]
[41 43 45 ... 44 38 30]
[41 42 46 ... 41 36 30]
[43 43 44 ... 56 56 55]
[45 44 45 ... 55 55 54]
[46 46 46 ... 53 54 54]
part of th output the script is providing: [[41 41 45 ... 48 40 33]
[41 43 44 ... 44 37 31]
[41 41 48 ... 41 35 30]
[44 42 43 ... 30 30 29]
[44 42 45 ... 29 29 29]
[45 47 44 ... 28 28 28]]
There are several problems with what you're trying to do here.
PIL images are either 8 bit per channel or 16 bit per channel (to the best of my knowledge). When you load a JPEG, it's loaded as 8 bits per channel, so the underlying data type is an unsigned 8-bit integer, i.e. range 0..255. Operations that would overflow or underflow this range wrap, which looks like the modulus behavior you're seeing.
You could convert the 8-bit PIL image to a floating point numpy array with np.array(img).astype('float32') and then normalize this to 0..1 by dividing with 255.
At this point you have non-quantized floating point numbers you can freely mangle however you wish.
However, then you still need to save the resulting image, at which point you again have a format problem. I believe TIFFs and some HDR image formats support floating point data, but if you want something that is widely readable, you'd likely go for PNG or JPEG.
For an encryption use case, JPEGs are not a good choice, as they're always inherently lossy, and you will, more likely than not, not get the same data back.
PNGs can be 8 or 16 bits per channel, but still, you'd have the problem of having to compress a basically infinite "dynamic range" of pixels (let's say you'd multiplied everything by a thousand!) into 0..255 or 0..65535.
An obvious way to do this is to find the maximum value in the image (np.max(...)), divide everything by it (so now you're back to 0..1), then multiply with the maximum value of the image data format... so with a simple multiplication "cipher" as you'd described, you'd essentially get the same image back.
Another way would be to clip the infinite range at the allowed values, i.e. everything below zero is zero, everything above it is, say, 65535. That'd be a lossy operation though, and you'd have no way of getting the unclipped values back.
First of all, PIL only supports 8-bit per channel images - although Pillow (the PIL fork) supports many more formats including higher bit-depths. The JPEG format is defined as only 8-bit per channel.
Calling Image.open() on a JPEG in PIL will therefore return an 8-bit array, so any operations on individual pixels will be performed as equivalent to uint8_t arithmetic in the backing representation. Since the maximum value in a uint8_t value is 256, all your arithmetic is necessarily modulo 256.
If you want to avoid this, you'll need to convert the representation to a higher bit-depth, such as 16bpp or 32bpp. You can do this with the NumPy code, such as:
img16 = np.array(img, dtype=np.uint16)
# or
img32 = np.array(img, dtype=np.uint32)
That will give you the extended precision that you desire.
However - your code example shows that you are trying to encryption and decrypt the image data. In that case, you do want to use modulo arithmetic! You just need to do some more research on actual encryption algorithms.
As none of the answers have helped me that much and I have solved the problem I would like to give an answer hoping one day it will help someone. Here the keys are (3, 25777) and (16971,25777).
The working code is as follows:
from PIL import Image
import numpy as np
#encryption
img1 = (Image.open('image.jpeg').convert('L'))
img1.show()
img = array((Image.open('image.jpeg').convert('L')))
img16 = np.array(img, dtype=np.uint32)
a,b = img.shape
print('\n\nOriginal image: ')
print(img16)
print((a,b))
tup = a,b
for i in range (0, tup[0]):
for j in range (0, tup[1]):
x = img16[i][j]
x = (pow(x,3)%25777)
img16[i][j] = x
print('\n\nEncrypted image: ')
print(img16)
imgOut = Image.fromarray(img16)
imgOut.show()
#decryption
img3_16 = img16
img3_16 = np.array(img, dtype=np.uint32)
print('\n\nEncrypted image: ')
print(img3_16)
a1,b1 = img3_16.shape
print((a1,b1))
tup1 = a1,b1
for i1 in range (0, tup1[0]):
for j1 in range (0, tup1[1]):
x1 = img3_16[i1][j1]
x1 = (pow(x,16971)%25777)
img3_16[i][j] = x1
print('\n\nDecrypted image: ')
print(img3_16)
imgOut1 = Image.fromarray(img3_16)y
imgOut1.show()
Feel free to point out the faults. Thank you.
I used cv2.connectedComponentsWithStatsin Python 2.7 to find out centroids of three blobs in a binary image. (Image is given below In code it is img3).
In this image, three white bolbs are present so number of centroids should be three only but python shows 8 centroids values of which are
[[ 307.08579803 95.31441513]
[ 511.51325364 141.06288981]
[ 259. 112.5 ]
[ 296.07169811 129.18490566]
[ 276. 140.5 ]
[ 225.19643047 237.16190375]
[ 189.21212121 271.6969697 ]
[ 187.58333333 285.83333333]]
Area of each bolbs are given in last column
[[ 0 0 640 480 286024]
[ 260 65 100 68 3753]
[ 454 84 105 118 7696]
[ 259 112 1 2 2]
[ 277 117 34 23 265]
[ 276 140 1 2 2]
[ 168 173 114 128 9413]
[ 186 268 8 8 33]
[ 186 284 5 4 12]]
My code is below:
import numpy as np
import cv2
from PIL import Image
import matplotlib.pyplot as plt
import timeit
from skimage import morphology, measure
img = cv2.imread('left180.png')
lower_white = np.array([150,150,150], dtype=np.uint8)
upper_white = np.array([255,255,255], dtype=np.uint8)
binimg = cv2.inRange(img, lower_white, upper_white)
cv2.imshow('res',binimg)
binimg[binimg!=0] = 255
# flood fill background to find inner holes
holes = binimg.copy()
retval, image, mask, rect = cv2.floodFill(holes, None, (0, 0), 255)
# invert holes mask, bitwise or with img fill in holes
holes = cv2.bitwise_not(holes)
#cv2.imshow('holes',holes)
filled_holes = cv2.bitwise_or(binimg, holes)
cv2.imshow('filled holes', filled_holes)
imglab = morphology.label(filled_holes)
cv2.imshow('label',imglab)
cleaned = morphology.remove_small_objects(imglab, min_size=1264, connectivity=4)
#np.savetxt('cleaned.csv',cleaned,fmt='%.18g',delimiter=',')
cv2.imshow('clea',cleaned)
img3 = np.zeros((imglab.shape))
img3[cleaned > 0] = 255
img3= np.uint8(img3) #### here conversion of array into uint8 data conversion is important
### else cv2.connectedComponentsWithStats will show error.
#np.savetxt('img3.csv',img3,fmt='%.18g',delimiter=',')
nb_components, output, stats, centroids = cv2.connectedComponentsWithStats(img3, connectivity=4)
sizes = stats[1:, -1];
#nb_components = nb_components - 1
cv2.imshow("centroid", img3)
centroids1 = centroids[1:nb_components]
print(centroids1)
print(stats)
cv2.waitKey(0)
Why is this problem happening? I want to get the centroids of blobs which area is greater than say 1000 pixels. How to do that?