My Automated Python script loses so much performance over time - python

This is my code, i know its long and messy code, but basically. At start its pretty fast and stuff, but after while you can notice really huge performance jumps, after 6 hours, i came back. and one click of keyboard button took like 10 seconds, can anybody help me find why is it slowing so much.
If you dont understand the code because i know its very messy and no comments. Its basically while loop that runs arround pressings buttons, and when it encounters pokemon, it reads its name and if its the one i want, it notifies me. Or if it errors, it notifies me too.
If i dont want it, it kills it
while active:
gc.collect()
gc.collect()
gc.collect()
time.sleep(0.1)
def waitclick():
while True:
if keyboard.read_key() == "f6":
global active
active = False
thread = threading.Thread(target=waitclick).start()
im = numpy.asarray(pyautogui.screenshot(region=hpcheck))
# Define the blue colour we want to find - remember OpenCV uses BGR ordering
color = [175, 65, 63]
X, Y = numpy.where(numpy.all(im == color, axis=2))
x = list(X)
if lenght:
cnt = 0
if len(x) <= lenght * (3 / 4):
while len(x) <= lenght* (5 / 6):
if cnt > 20:
pyautogui.click(2119, 793)
pyautogui.keyDown('4')
time.sleep(0.01)
pyautogui.keyUp('4')
pyautogui.click(2050, 1120)
pyautogui.click(1720, 1030)
break
cnt += 1
time.sleep(1)
pyautogui.moveTo(potion[0],potion[1], random.uniform(0.5,1))
pyautogui.click(potion[0],potion[1])
time.sleep(0.01)
pyautogui.click(potion[0],potion[1])
time.sleep(0.1)
pyautogui.moveTo(mainpoke[0],mainpoke[1],random.uniform(0.5,1))
pyautogui.click(mainpoke[0],mainpoke[1])
pyautogui.click(mainpoke[0], mainpoke[1])
im = Image.fromarray(im)
im.save("health.png")
im.close()
im = numpy.asarray(pyautogui.screenshot(region=(hpcheck)))
# Define the blue colour we want to find - remember OpenCV uses BGR ordering
color = [175, 65, 63]
X, Y = numpy.where(numpy.all(im == color, axis=2))
x = list(X)
lenght = len(x)
else:
lenght = len(x)
key = keys[0] if steps % 2 == 0 else keys[1]
pyautogui.keyDown(key)
pixels = []
battle = False
timeup = False
def wait():
global stepsleft
global steps
global battle
counter = 0
for i in range(stepsleft):
if battle:
break
counter += 1
time.sleep(onetile-(random.uniform(0.001,0.002)))
stepsleft = stepsleft - counter
if stepsleft == 0:
steps += 1
stepsleft = tiles
global timeup
timeup = True
thread = threading.Thread(target=wait).start()
while True:
if timeup:
break
im = numpy.asarray(pyautogui.screenshot(region=(check)))
if list(im[0, 0]) == [230, 230, 230]:
battle = True
break
pyautogui.keyUp(key)
stepcount += 1
im = numpy.asarray(pyautogui.screenshot(region=(check)))
# im = cv2.cvtColor(im, cv2.COLOR_BGR2GRAY)
if stepcount > 15:
if fails > 100:
call = client.calls.create(twiml='<Response><Say>Ahoy, World!</Say></Response>',
to='+421949847318', from_='+18596961895')
payload = {
"content": f"Step Fail, continuing."
}
r = requests.post(f"https://discord.com/api/v9/channels/{channel_id}/messages",
data=payload,
headers=header)
gc.collect()
fails = 0
stepfails += 1
if stepfails > 10:
break
pyautogui.click(2119, 793)
pyautogui.keyDown('4')
time.sleep(0.01)
pyautogui.keyUp('4')
pyautogui.click(2050,1120)
pyautogui.click(1720, 1030)
time.sleep(1)
fails += 1
if battle == True:
breaks += 1
stepcount = 0
c = 0
e = False
while True:
if c > 600:
e = True
break
else:
im = numpy.asarray(pyautogui.screenshot(region=(namecheck)))
if list(im[0, 0]) == [254, 254, 254]:
break
c += 1
if e:
pyautogui.keyDown('4')
time.sleep(0.01)
pyautogui.keyUp('4')
pyautogui.click(2050, 1120)
time.sleep(1)
else:
encounters += 1
im = numpy.asarray(pyautogui.screenshot(region=(mon)))
im = cv2.resize(im, None, fx=1.2, fy=1.2, interpolation=cv2.INTER_CUBIC)
im = cv2.cvtColor(im, cv2.COLOR_BGR2GRAY)
kernel = numpy.ones((1, 1), numpy.uint8)
im = cv2.threshold(cv2.bilateralFilter(im, 5, 75, 75), 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1]
im = cv2.dilate(im, kernel, iterations=1)
im = cv2.erode(im, kernel, iterations=1)
im = ~im
text = pytesseract.image_to_string(im)
text = text.strip().replace("\n","")
ime = Image.fromarray(im)
ime.save(f"Wild {text}.png")
ime.close()
if text == "":
ime = Image.fromarray(im)
ime.save("fail.png")
ime.close()
im = numpy.asarray(pyautogui.screenshot(region=(mon)))
im = cv2.resize(im, None, fx=1.6, fy=1.6, interpolation=cv2.INTER_CUBIC)
im = cv2.cvtColor(im, cv2.COLOR_BGR2GRAY)
kernel = numpy.ones((1, 1), numpy.uint8)
im = cv2.threshold(cv2.bilateralFilter(im, 5, 75, 75), 0, 255,
cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1]
im = cv2.dilate(im, kernel, iterations=1)
im = cv2.erode(im, kernel, iterations=1)
im = ~im
text = pytesseract.image_to_string(im).strip()
print(">" + text + "<")
if text == "":
ime = Image.fromarray(im)
ime.save("fail2.png")
ime.close()
im = numpy.asarray(pyautogui.screenshot(region=mon))
print(">" + text + "<")
im = cv2.resize(im, None, fx=2, fy=2, interpolation=cv2.INTER_CUBIC)
im = cv2.cvtColor(im, cv2.COLOR_BGR2GRAY)
kernel = numpy.ones((1, 1), numpy.uint8)
im = cv2.threshold(cv2.bilateralFilter(im, 5, 75, 75), 0, 255,
cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1]
im = cv2.dilate(im, kernel, iterations=1)
im = cv2.erode(im, kernel, iterations=1)
im = ~im
text = pytesseract.image_to_string(im).strip()
if text == "":
im = numpy.asarray(pyautogui.screenshot(region=check))
if list(im[0, 0]) != [230, 230, 230]:
pass
else:
Image.fromarray(im)
ime.save("fail3.png")
ime.close()
print(">" + text + "<")
payload = {
"content": f"Go look at this bro, i failed again."
}
r = requests.post(f"https://discord.com/api/v9/channels/{channel_id}/messages", data=payload,
headers=header)
active = False
call = client.calls.create(twiml='<Response><Say>Ahoy, World!</Say></Response>',
to='+421949847318', from_='+18596961895')
gc.collect()
break
if text.replace("[S]","").replace("[E]","").replace("\n","") not in pokemonlist:
ime = Image.fromarray(im)
ime.save("fail.png")
ime.close()
im = numpy.asarray(pyautogui.screenshot(region=mon))
im = cv2.resize(im, None, fx=1.6, fy=1.6, interpolation=cv2.INTER_CUBIC)
im = cv2.cvtColor(im, cv2.COLOR_BGR2GRAY)
kernel = numpy.ones((1, 1), numpy.uint8)
im = cv2.threshold(cv2.bilateralFilter(im, 5, 75, 75), 0, 255,
cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1]
im = cv2.dilate(im, kernel, iterations=1)
im = cv2.erode(im, kernel, iterations=1)
im = ~im
text = pytesseract.image_to_string(im).strip().replace("\n","")
print(">" + text + "<")
if text.replace("[S]","").replace("\n","").replace("[E]","") not in pokemonlist:
ime = Image.fromarray(im)
ime.save("fail2.png")
ime.close()
im = numpy.asarray(pyautogui.screenshot(region=mon))
im = cv2.resize(im, None, fx=2, fy=2, interpolation=cv2.INTER_CUBIC)
im = cv2.cvtColor(im, cv2.COLOR_BGR2GRAY)
kernel = numpy.ones((1, 1), numpy.uint8)
im = cv2.threshold(cv2.bilateralFilter(im, 5, 75, 75), 0, 255,
cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1]
im = cv2.dilate(im, kernel, iterations=1)
im = cv2.erode(im, kernel, iterations=1)
im = ~im
text = pytesseract.image_to_string(im).strip()
print(">" + text + "<")
if text.replace("[S]","").replace("\n","").replace("[E]","") not in pokemonlist:
ime = Image.fromarray(im)
ime.save("fail3.png")
ime.close()
print(">" + text + "<")
payload = {
"content": f"Go look at this bro, i failed again."
}
r = requests.post(f"https://discord.com/api/v9/channels/{channel_id}/messages", data=payload,
headers=header)
active = False
call = client.calls.create(twiml='<Response><Say>Ahoy, World!</Say></Response>',
to='+421949847318', from_='+18596961895')
gc.collect()
break
encounter_time = datetime.datetime.utcnow()
times.append(encounter_time)
if not ram or encounters < 5:
ram = process.memory_info().rss
if encounters < 15:
recentspeed = "None yet"
else:
recentspeed = str(round(
(15 / (encounter_time - times[encounters - 15]).total_seconds()) * 60, 2)) + "/min"
print(
f"Pokemon: {text} #{encounters}\nSpeed: {round((encounters / (encounter_time - start).total_seconds()) * 60, 2)}/min \nRecent Speed: {recentspeed}\nRam usage is {process.memory_info().rss} Starting usage was {process.memory_info().rss-ram} Garbage is {gc.get_count()}")
payload = {
"content": f"{'-'*20}\nPokemon: {text} #{encounters}\nSpeed: {round((encounters / (encounter_time - start).total_seconds()) * 60, 2)}/min \nRecent Speed: {recentspeed}\n Ram Change: `{(process.memory_info().rss-ram)//1000000}MB`\nRam usage: `{process.memory_info().rss//1000000}MB`\nStarting usage: `{ram//1000000}MB`\nGarbage: {gc.get_count()}\nCPU: {psutil.cpu_percent()}"
}
r = requests.post(f"https://discord.com/api/v9/channels/{logs_id}/messages", data=payload,
headers=header)
gc.collect()
#json
with open("encounters.json", "r") as a_file:
json_object = json.load(a_file)
a_file.close()
try:
json_object[text] += 1
except:
json_object[text] = 1
with open("encounters.json", "w") as a_file:
json.dump(json_object, a_file)
a_file.close()
c = 0
e = False
while True:
if c > 500:
pyautogui.click(2119, 793)
pyautogui.keyDown('4')
time.sleep(0.01)
pyautogui.keyUp('4')
pyautogui.click(2050, 1120)
pyautogui.click(1720, 1030)
time.sleep(1)
e = True
break
im = numpy.asarray(pyautogui.screenshot(region=battlecheck))
if list(im[0, 0]) == [240, 211, 92]:
break
c += 1
if e == True:
continue
if (text.startswith("[S]") if shinies else text.replace("[E]","") in shinywanted) or text.replace("[S]","").replace("[E]","") in wanted:
time.sleep(2)
end = datetime.datetime.utcnow()
done = end - start
timed = humanize.naturaldelta(done)
print("Done! in " + timed)
im = numpy.asarray(pyautogui.screenshot(region=win))
ime = Image.fromarray(im)
ime.save(f"{text}.png")
ime.close()
# File
files = {
"file": (f"{text}.png", open(f"{text}.png", 'rb'))
# The picture that we want to send in binary
}
# Optional message to send with the picture
payload = {
"content": f"Found {text} in {timed} of searching. It took {encounters} encounters"
}
active = False
r = requests.post(f"https://discord.com/api/v9/channels/{channel_id}/messages", data=payload,
headers=header, files=files)
gc.collect()
call = client.calls.create(twiml='<Response><Say>Ahoy, World!</Say></Response>',to='+421949847318',from_='+18596961895')
time.sleep(25)
if remote:
pyautogui.click(3804, 15)
break
playsound('ringtone.mp3')
elif text.startswith("[E]") or text.replace("[S]","").replace("[E]","") in avoid:
run()
c = 0
while True:
if c > 300:
pyautogui.click(2119, 793)
pyautogui.click(1720, 1030)
pyautogui.keyDown('4')
time.sleep(0.01)
pyautogui.keyUp('4')
pyautogui.click(2050, 1120)
time.sleep(1)
break
else:
im = numpy.asarray(pyautogui.screenshot(region=check))
if list(im[0, 0]) != [230, 230, 230]:
break
c += 1
else:
#time.sleep(random.randint(50, 70) / 100)
kill(text.replace("[S]","").replace("[E]",""))
c = 0
time.sleep(1)
while True:
if c > 750:
pyautogui.click(2119, 793)
pyautogui.click(1720, 1030)
pyautogui.keyDown('4')
time.sleep(0.01)
pyautogui.keyUp('4')
pyautogui.click(2050, 1120)
time.sleep(1)
break
else:
im = numpy.asarray(pyautogui.screenshot(region=check))
if list(im[0, 0]) != [230, 230, 230]:
break
c += 1

The code is barely legible, split it in functions or classes, and avoid global variables. Chances are you are overflowing the RAM of your computer or something like that, if you are appending information to a variable, remember deleting it after if it grows to much or storing it in a database-like system.
Something similar (or worst) applies to threads if you create too many without deleting them at some point.

Related

How to put RTSP video input in OpenCV

I'm setting up a PPE Detection module using OpenVINO in my Ubuntu 18.04. Although the video input worked well with my webcam dev/video/0 but I wish it can be change to RTSP input. Whenever I put my RTSP Url inside the config.json it doesnt work and show me Either wrong input path or empty line is found. Please check the conf.json file.
Here is the main.py
#!/usr/bin/env python3
from __future__ import print_function
import sys
import os
import cv2
import numpy as np
from argparse import ArgumentParser
import datetime
import json
from inference import Network
# Global vars
cpu_extension = ''
conf_modelLayers = ''
conf_modelWeights = ''
conf_safety_modelLayers = ''
conf_safety_modelWeights = ''
targetDevice = "CPU"
conf_batchSize = 1
conf_modelPersonLabel = 1
conf_inferConfidenceThreshold = 0.7
conf_inFrameViolationsThreshold = 19
conf_inFramePeopleThreshold = 5
use_safety_model = False
padding = 30
viol_wk = 0
acceptedDevices = ['CPU', 'GPU', 'MYRIAD', 'HETERO:FPGA,CPU', 'HDDL']
videos = []
name_of_videos = []
CONFIG_FILE = '../resources/config.json'
is_async_mode = True
class Video:
def __init__(self, idx, path):
if path.isnumeric():
self.video = cv2.VideoCapture(int(path))
self.name = "Cam " + str(idx)
else:
if os.path.exists(path):
self.video = cv2.VideoCapture("rtsp://edwin:Passw0rd#192.168.0.144:554/cam/realmonitor?channel=1&subtype=1")
self.name = "Video " + str(idx)
else:
print("Either wrong input path or empty line is found. Please check the conf.json file")
exit(21)
if not self.video.isOpened():
print("Couldn't open video: " + path)
sys.exit(20)
self.height = int(self.video.get(cv2.CAP_PROP_FRAME_HEIGHT))
self.width = int(self.video.get(cv2.CAP_PROP_FRAME_WIDTH))
self.currentViolationCount = 0
self.currentViolationCountConfidence = 0
self.prevViolationCount = 0
self.totalViolations = 0
self.totalPeopleCount = 0
self.currentPeopleCount = 0
self.currentPeopleCountConfidence = 0
self.prevPeopleCount = 0
self.currentTotalPeopleCount = 0
cv2.namedWindow(self.name, cv2.WINDOW_NORMAL)
self.frame_start_time = datetime.datetime.now()
def get_args():
"""
Parses the argument.
:return: None
"""
global is_async_mode
parser = ArgumentParser()
parser.add_argument("-d", "--device",
help="Specify the target device to infer on; CPU, GPU,"
"FPGA, MYRIAD or HDDL is acceptable. Application will"
"look for a suitable plugin for device specified"
" (CPU by default)",
type=str, required=False)
parser.add_argument("-m", "--model",
help="Path to an .xml file with a trained model's"
" weights.",
required=True, type=str)
parser.add_argument("-sm", "--safety_model",
help="Path to an .xml file with a trained model's"
" weights.",
required=False, type=str, default=None)
parser.add_argument("-e", "--cpu_extension",
help="MKLDNN (CPU)-targeted custom layers. Absolute "
"path to a shared library with the kernels impl",
type=str, default=None)
parser.add_argument("-f", "--flag", help="sync or async", default="async", type=str)
args = parser.parse_args()
global conf_modelLayers, conf_modelWeights, conf_safety_modelLayers, conf_safety_modelWeights, \
targetDevice, cpu_extension, videos, use_safety_model
if args.model:
conf_modelLayers = args.model
conf_modelWeights = os.path.splitext(conf_modelLayers)[0] + ".bin"
if args.safety_model:
conf_safety_modelLayers = args.safety_model
conf_safety_modelWeights = os.path.splitext(conf_safety_modelLayers)[0] + ".bin"
use_safety_model = True
if args.device:
targetDevice = args.device
if "MULTI:" not in targetDevice:
if targetDevice not in acceptedDevices:
print("Selected device, %s not supported." % (targetDevice))
sys.exit(12)
if args.cpu_extension:
cpu_extension = args.cpu_extension
if args.flag == "async":
is_async_mode = True
print('Application running in Async mode')
else:
is_async_mode = False
print('Application running in Sync mode')
assert os.path.isfile(CONFIG_FILE), "{} file doesn't exist".format(CONFIG_FILE)
config = json.loads(open(CONFIG_FILE).read())
for idx, item in enumerate(config['inputs']):
vid = Video(idx, item['video'])
name_of_videos.append([idx, item['video']])
videos.append([idx, vid])
def detect_safety_hat(img):
"""
Detection of the hat of the person.
:param img: Current frame
:return: Boolean value of the detected hat
"""
lowH = 15
lowS = 65
lowV = 75
highH = 30
highS = 255
highV = 255
crop = 0
height = 15
perc = 8
hsv = np.zeros(1)
try:
hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
except cv2.error as e:
print("%d %d %d" % (img.shape))
print("%d %d %d" % (img.shape))
print(e)
threshold_img = cv2.inRange(hsv, (lowH, lowS, lowV), (highH, highS, highV))
x = 0
y = int(threshold_img.shape[0] * crop / 100)
w = int(threshold_img.shape[1])
h = int(threshold_img.shape[0] * height / 100)
img_cropped = threshold_img[y: y + h, x: x + w]
if cv2.countNonZero(threshold_img) < img_cropped.size * perc / 100:
return False
return True
def detect_safety_jacket(img):
"""
Detection of the safety jacket of the person.
:param img: Current frame
:return: Boolean value of the detected jacket
"""
lowH = 0
lowS = 150
lowV = 42
highH = 11
highS = 255
highV = 255
crop = 15
height = 40
perc = 23
hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
threshold_img = cv2.inRange(hsv, (lowH, lowS, lowV), (highH, highS, highV))
x = 0
y = int(threshold_img.shape[0] * crop / 100)
w = int(threshold_img.shape[1])
h = int(threshold_img.shape[0] * height / 100)
img_cropped = threshold_img[y: y + h, x: x + w]
if cv2.countNonZero(threshold_img) < img_cropped.size * perc / 100:
return False
return True
def detect_workers(workers, frame):
"""
Detection of the person with the safety guards.
:param workers: Total number of the person in the current frame
:param frame: Current frame
:return: Total violation count of the person
"""
violations = 0
global viol_wk
for worker in workers:
xmin, ymin, xmax, ymax = worker
crop = frame[ymin:ymax, xmin:xmax]
if 0 not in crop.shape:
if detect_safety_hat(crop):
if detect_safety_jacket(crop):
cv2.rectangle(frame, (xmin, ymin), (xmax, ymax),
(0, 255, 0), 2)
else:
cv2.rectangle(frame, (xmin, ymin), (xmax, ymax),
(0, 0, 255), 2)
violations += 1
viol_wk += 1
else:
cv2.rectangle(frame, (xmin, ymin), (xmax, ymax), (0, 0, 255), 2)
violations += 1
viol_wk += 1
return violations
def main():
"""
Load the network and parse the output.
:return: None
"""
get_args()
global is_async_mode
nextReq = 1
currReq = 0
nextReq_s = 1
currReq_s = 0
prevVideo = None
vid_finished = [False] * len(videos)
min_FPS = min([videos[i][1].video.get(cv2.CAP_PROP_FPS) for i in range(len(videos))])
# Initialise the class
infer_network = Network()
infer_network_safety = Network()
# Load the network to IE plugin to get shape of input layer
plugin, (batch_size, channels, model_height, model_width) = \
infer_network.load_model(conf_modelLayers, targetDevice, 1, 1, 2, cpu_extension)
if use_safety_model:
batch_size_sm, channels_sm, model_height_sm, model_width_sm = \
infer_network_safety.load_model(conf_safety_modelLayers, targetDevice, 1, 1, 2, cpu_extension, plugin)[1]
while True:
for index, currVideo in videos:
# Read image from video/cam
vfps = int(round(currVideo.video.get(cv2.CAP_PROP_FPS)))
for i in range(0, int(round(vfps / min_FPS))):
ret, current_img = currVideo.video.read()
if not ret:
vid_finished[index] = True
break
if vid_finished[index]:
stream_end_frame = np.zeros((int(currVideo.height), int(currVideo.width), 1),
dtype='uint8')
cv2.putText(stream_end_frame, "Input file {} has ended".format
(name_of_videos[index][1].split('/')[-1]),
(10, int(currVideo.height / 2)),
cv2.FONT_HERSHEY_COMPLEX, 1, (255, 255, 255), 2)
cv2.imshow(currVideo.name, stream_end_frame)
continue
# Transform image to person detection model input
rsImg = cv2.resize(current_img, (model_width, model_height))
rsImg = rsImg.transpose((2, 0, 1))
rsImg = rsImg.reshape((batch_size, channels, model_height, model_width))
infer_start_time = datetime.datetime.now()
# Infer current image
if is_async_mode:
infer_network.exec_net(nextReq, rsImg)
else:
infer_network.exec_net(currReq, rsImg)
prevVideo = currVideo
previous_img = current_img
# Wait for previous request to end
if infer_network.wait(currReq) == 0:
infer_end_time = (datetime.datetime.now() - infer_start_time) * 1000
in_frame_workers = []
people = 0
violations = 0
hard_hat_detection = False
vest_detection = False
result = infer_network.get_output(currReq)
# Filter output
for obj in result[0][0]:
if obj[2] > conf_inferConfidenceThreshold:
xmin = int(obj[3] * prevVideo.width)
ymin = int(obj[4] * prevVideo.height)
xmax = int(obj[5] * prevVideo.width)
ymax = int(obj[6] * prevVideo.height)
xmin = int(xmin - padding) if (xmin - padding) > 0 else 0
ymin = int(ymin - padding) if (ymin - padding) > 0 else 0
xmax = int(xmax + padding) if (xmax + padding) < prevVideo.width else prevVideo.width
ymax = int(ymax + padding) if (ymax + padding) < prevVideo.height else prevVideo.height
cv2.rectangle(previous_img, (xmin, ymin), (xmax, ymax), (0, 255, 0), 2)
people += 1
in_frame_workers.append((xmin, ymin, xmax, ymax))
new_frame = previous_img[ymin:ymax, xmin:xmax]
if use_safety_model:
# Transform image to safety model input
in_frame_sm = cv2.resize(new_frame, (model_width_sm, model_height_sm))
in_frame_sm = in_frame_sm.transpose((2, 0, 1))
in_frame_sm = in_frame_sm.reshape(
(batch_size_sm, channels_sm, model_height_sm, model_width_sm))
infer_start_time_sm = datetime.datetime.now()
if is_async_mode:
infer_network_safety.exec_net(nextReq_s, in_frame_sm)
else:
infer_network_safety.exec_net(currReq_s, in_frame_sm)
# Wait for the result
infer_network_safety.wait(currReq_s)
infer_end_time_sm = (datetime.datetime.now() - infer_start_time_sm) * 1000
result_sm = infer_network_safety.get_output(currReq_s)
# Filter output
hard_hat_detection = False
vest_detection = False
detection_list = []
for obj_sm in result_sm[0][0]:
if (obj_sm[2] > 0.4):
# Detect safety vest
if (int(obj_sm[1])) == 2:
xmin_sm = int(obj_sm[3] * (xmax - xmin))
ymin_sm = int(obj_sm[4] * (ymax - ymin))
xmax_sm = int(obj_sm[5] * (xmax - xmin))
ymax_sm = int(obj_sm[6] * (ymax - ymin))
if vest_detection == False:
detection_list.append(
[xmin_sm + xmin, ymin_sm + ymin, xmax_sm + xmin, ymax_sm + ymin])
vest_detection = True
# Detect hard-hat
if int(obj_sm[1]) == 4:
xmin_sm_v = int(obj_sm[3] * (xmax - xmin))
ymin_sm_v = int(obj_sm[4] * (ymax - ymin))
xmax_sm_v = int(obj_sm[5] * (xmax - xmin))
ymax_sm_v = int(obj_sm[6] * (ymax - ymin))
if hard_hat_detection == False:
detection_list.append([xmin_sm_v + xmin, ymin_sm_v + ymin, xmax_sm_v + xmin,
ymax_sm_v + ymin])
hard_hat_detection = True
if hard_hat_detection is False or vest_detection is False:
violations += 1
for _rect in detection_list:
cv2.rectangle(current_img, (_rect[0], _rect[1]), (_rect[2], _rect[3]), (0, 255, 0), 2)
if is_async_mode:
currReq_s, nextReq_s = nextReq_s, currReq_s
# Use OpenCV if worker-safety-model is not provided
else:
violations = detect_workers(in_frame_workers, previous_img)
# Check if detected violations equals previous frames
if violations == prevVideo.currentViolationCount:
prevVideo.currentViolationCountConfidence += 1
# If frame threshold is reached, change validated count
if prevVideo.currentViolationCountConfidence == conf_inFrameViolationsThreshold:
# If another violation occurred, save image
if prevVideo.currentViolationCount > prevVideo.prevViolationCount:
prevVideo.totalViolations += (
prevVideo.currentViolationCount - prevVideo.prevViolationCount)
prevVideo.prevViolationCount = prevVideo.currentViolationCount
else:
prevVideo.currentViolationCountConfidence = 0
prevVideo.currentViolationCount = violations
# Check if detected people count equals previous frames
if people == prevVideo.currentPeopleCount:
prevVideo.currentPeopleCountConfidence += 1
# If frame threshold is reached, change validated count
if prevVideo.currentPeopleCountConfidence == conf_inFrameViolationsThreshold:
prevVideo.currentTotalPeopleCount += (
prevVideo.currentPeopleCount - prevVideo.prevPeopleCount)
if prevVideo.currentTotalPeopleCount > prevVideo.prevPeopleCount:
prevVideo.totalPeopleCount += prevVideo.currentTotalPeopleCount - prevVideo.prevPeopleCount
prevVideo.prevPeopleCount = prevVideo.currentPeopleCount
else:
prevVideo.currentPeopleCountConfidence = 0
prevVideo.currentPeopleCount = people
frame_end_time = datetime.datetime.now()
cv2.putText(previous_img, 'Total people count: ' + str(
prevVideo.totalPeopleCount), (10, prevVideo.height - 10),
cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2)
cv2.putText(previous_img, 'Current people count: ' + str(
prevVideo.currentTotalPeopleCount),
(10, prevVideo.height - 40),
cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2)
cv2.putText(previous_img, 'Total violation count: ' + str(
prevVideo.totalViolations), (10, prevVideo.height - 70),
cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2)
cv2.putText(previous_img, 'FPS: %0.2fs' % (1 / (
frame_end_time - prevVideo.frame_start_time).total_seconds()),
(10, prevVideo.height - 100),
cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2)
cv2.putText(previous_img, "Inference time: N\A for async mode" if is_async_mode else \
"Inference time: {:.3f} ms".format((infer_end_time).total_seconds()),
(10, prevVideo.height - 130),
cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2)
cv2.imshow(prevVideo.name, previous_img)
prevVideo.frame_start_time = datetime.datetime.now()
# Swap
if is_async_mode:
currReq, nextReq = nextReq, currReq
previous_img = current_img
prevVideo = currVideo
if cv2.waitKey(1) == 27:
print("Attempting to stop input files")
infer_network.clean()
infer_network_safety.clean()
cv2.destroyAllWindows()
return
if False not in vid_finished:
infer_network.clean()
infer_network_safety.clean()
cv2.destroyAllWindows()
break
if __name__ == '__main__':
main()
Here is the config file
{
"inputs": [
{
"video": "rtsp://xxx:xxx#192.168.0.144:554/cam/realmonitor?channel=1&subtype=1"
}
]
}
This is because of the line if os.path.exists(path):. This if condition checks if path points towards an existing file. Your RTSP stream not being a file, it leads to your error.
For example, you can modify this condition to:
if os.path.exists(path) or path.startswith("rtsp"):
By the way, your hard-coded the rtsp stream address within the code, so it will not use your configured path. You may want to replace the hard-coded path with path.

How to get the video file length in Yolo v3

I wanted to find out how the video frame length was calculated in the below code.
[UPD] Before I was thinking it was done by Yolo, but later I realized it was OpenCV that dealt with number of frames in a video file.
"""
Class definition of YOLO_v3 style detection model on image and video
"""
import colorsys
import os
from timeit import default_timer as timer
import numpy as np
from keras import backend as K
from keras.models import load_model
from keras.layers import Input
from PIL import Image, ImageFont, ImageDraw
from yolo3.model import yolo_eval, yolo_body, tiny_yolo_body
from yolo3.utils import letterbox_image
import os
from keras.utils import multi_gpu_model
class YOLO(object):
_defaults = {
"model_path": 'model_data/yolo.h5',
"anchors_path": 'model_data/yolo_anchors.txt',
"classes_path": 'model_data/coco_classes.txt',
"score" : 0.3,
"iou" : 0.45,
"model_image_size" : (416, 416),
"gpu_num" : 1,
}
#classmethod
def get_defaults(cls, n):
if n in cls._defaults:
return cls._defaults[n]
else:
return "Unrecognized attribute name '" + n + "'"
def __init__(self, **kwargs):
self.__dict__.update(self._defaults) # set up default values
self.__dict__.update(kwargs) # and update with user overrides
self.class_names = self._get_class()
self.anchors = self._get_anchors()
self.sess = K.get_session()
self.boxes, self.scores, self.classes = self.generate()
def _get_class(self):
classes_path = os.path.expanduser(self.classes_path)
with open(classes_path) as f:
class_names = f.readlines()
class_names = [c.strip() for c in class_names]
return class_names
def _get_anchors(self):
anchors_path = os.path.expanduser(self.anchors_path)
with open(anchors_path) as f:
anchors = f.readline()
anchors = [float(x) for x in anchors.split(',')]
return np.array(anchors).reshape(-1, 2)
def generate(self):
model_path = os.path.expanduser(self.model_path)
assert model_path.endswith('.h5'), 'weights must be a .h5 file.'
# Load model, or construct model and load weights.
num_anchors = len(self.anchors)
num_classes = len(self.class_names)
is_tiny_version = num_anchors==6 # default setting
try:
self.yolo_model = load_model(model_path, compile=False)
except:
self.yolo_model = tiny_yolo_body(Input(shape=(None,None,3)), num_anchors//2, num_classes) \
if is_tiny_version else yolo_body(Input(shape=(None,None,3)), num_anchors//3, num_classes)
self.yolo_model.load_weights(self.model_path) # make sure model, anchors and classes match
else:
assert self.yolo_model.layers[-1].output_shape[-1] == \
num_anchors/len(self.yolo_model.output) * (num_classes + 5), \
'Mismatch between model and given anchor and class sizes'
print('{} model, anchors, and classes loaded.'.format(model_path))
# Generate colors for drawing bounding boxes.
hsv_tuples = [(x / len(self.class_names), 1., 1.)
for x in range(len(self.class_names))]
self.colors = list(map(lambda x: colorsys.hsv_to_rgb(*x), hsv_tuples))
self.colors = list(
map(lambda x: (int(x[0] * 255), int(x[1] * 255), int(x[2] * 255)),
self.colors))
np.random.seed(10101) # Fixed seed for consistent colors across runs.
np.random.shuffle(self.colors) # Shuffle colors to decorrelate adjacent classes.
np.random.seed(None) # Reset seed to default.
# Generate output tensor targets for filtered bounding boxes.
self.input_image_shape = K.placeholder(shape=(2, ))
if self.gpu_num>=2:
self.yolo_model = multi_gpu_model(self.yolo_model, gpus=self.gpu_num)
boxes, scores, classes = yolo_eval(self.yolo_model.output, self.anchors,
len(self.class_names), self.input_image_shape,
score_threshold=self.score, iou_threshold=self.iou)
return boxes, scores, classes
def detect_image(self, image):
start = timer()
if self.model_image_size != (None, None):
assert self.model_image_size[0]%32 == 0, 'Multiples of 32 required'
assert self.model_image_size[1]%32 == 0, 'Multiples of 32 required'
boxed_image = letterbox_image(image, tuple(reversed(self.model_image_size)))
else:
new_image_size = (image.width - (image.width % 32),
image.height - (image.height % 32))
boxed_image = letterbox_image(image, new_image_size)
image_data = np.array(boxed_image, dtype='float32')
print(image_data.shape)
image_data /= 255.
image_data = np.expand_dims(image_data, 0) # Add batch dimension.
out_boxes, out_scores, out_classes = self.sess.run(
[self.boxes, self.scores, self.classes],
feed_dict={
self.yolo_model.input: image_data,
self.input_image_shape: [image.size[1], image.size[0]],
K.learning_phase(): 0
})
print('Found {} boxes for {}'.format(len(out_boxes), 'img'))
font = ImageFont.truetype(font='font/FiraMono-Medium.otf',
size=np.floor(3e-2 * image.size[1] + 0.5).astype('int32'))
thickness = (image.size[0] + image.size[1]) // 300
for i, c in reversed(list(enumerate(out_classes))):
predicted_class = self.class_names[c]
box = out_boxes[i]
score = out_scores[i]
label = '{} {:.2f}'.format(predicted_class, score)
draw = ImageDraw.Draw(image)
label_size = draw.textsize(label, font)
top, left, bottom, right = box
top = max(0, np.floor(top + 0.5).astype('int32'))
left = max(0, np.floor(left + 0.5).astype('int32'))
bottom = min(image.size[1], np.floor(bottom + 0.5).astype('int32'))
right = min(image.size[0], np.floor(right + 0.5).astype('int32'))
print(label, (left, top), (right, bottom))
if top - label_size[1] >= 0:
text_origin = np.array([left, top - label_size[1]])
else:
text_origin = np.array([left, top + 1])
# My kingdom for a good redistributable image drawing library.
for i in range(thickness):
draw.rectangle(
[left + i, top + i, right - i, bottom - i],
outline=self.colors[c])
draw.rectangle(
[tuple(text_origin), tuple(text_origin + label_size)],
fill=self.colors[c])
draw.text(text_origin, label, fill=(0, 0, 0), font=font)
del draw
end = timer()
print(end - start)
return image
def close_session(self):
self.sess.close()
def detect_video(yolo, video_path, output_path=""):
import cv2
video_path = './input.mp4'
vid = cv2.VideoCapture(video_path)
if not vid.isOpened():
raise IOError("Couldn't open webcam or video")
video_FourCC = int(vid.get(cv2.CAP_PROP_FOURCC))
video_fps = vid.get(cv2.CAP_PROP_FPS)
video_size = (int(vid.get(cv2.CAP_PROP_FRAME_WIDTH)),
int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT)))
isOutput = True if output_path != "" else False
if isOutput:
print("!!! TYPE:", type(output_path), type(video_FourCC), type(video_fps), type(video_size))
out = cv2.VideoWriter(output_path, video_FourCC, video_fps, video_size)
accum_time = 0
curr_fps = 0
fps = "FPS: ??"
prev_time = timer()
while True:
return_value, frame = vid.read()
image = Image.fromarray(frame)
image = yolo.detect_image(image)
result = np.asarray(image)
curr_time = timer()
exec_time = curr_time - prev_time
prev_time = curr_time
accum_time = accum_time + exec_time
curr_fps = curr_fps + 1
if accum_time == 10 : mouseBrush(image)
if accum_time > 1:
accum_time = accum_time - 1
fps = "FPS: " + str(curr_fps)
curr_fps = 0
cv2.putText(result, text=fps, org=(3, 15), fontFace=cv2.FONT_HERSHEY_SIMPLEX,
fontScale=0.50, color=(255, 0, 0), thickness=2)
cv2.namedWindow("result", cv2.WINDOW_NORMAL)
cv2.imshow("result", result)
if isOutput:
out.write(result)
if cv2.waitKey(1) & 0xFF == ord('q'):
break
yolo.close_session()
Actually, this code is just one part of the all Yolo3 model, but I think the part that deals with the number of video frames is included here.
If you mean the current FPS. This is the part showing the current FPS in string.
while True:
return_value, frame = vid.read()
image = Image.fromarray(frame)
image = yolo.detect_image(image)
result = np.asarray(image)
curr_time = timer()
exec_time = curr_time - prev_time
prev_time = curr_time
accum_time = accum_time + exec_time
curr_fps = curr_fps + 1
if accum_time > 1:
accum_time = accum_time - 1
fps = "FPS: " + str(curr_fps)
curr_fps = 0
cv2.putText(result, text=fps, org=(3, 15), fontFace=cv2.FONT_HERSHEY_SIMPLEX,
fontScale=0.50, color=(255, 0, 0), thickness=2)
cv2.namedWindow("result", cv2.WINDOW_NORMAL)
cv2.imshow("result", result)
if curr_fps == 10: # Stops at 10th frame.
time.sleep(60) # Delay for 1 minute (60 seconds).
if isOutput:
out.write(result)
if cv2.waitKey(1) & 0xFF == ord('q'):
break
I needed the frame number to control every 10th frame in the video file, and thanks to above comments, I figured out that the line I was looking for is:
curr_fps = curr_fps + 1
UPD: The following line calculated the number of frames in a video file.
NumberOfFrame = int(vid.get(cv2.CAP_PROP_FRAME_COUNT))

Captcha Break or Text reader from Image: OCR-Python

I have a typical captcha image which contain only digits.
Ex.
i want to extract 78614 from this image.
I tried few library & code using OCR-Python. But its returning 0.
Sample Code-1
from captcha_solver import CaptchaSolver
solver = CaptchaSolver('browser')
with open('captcha.png', 'rb') as inp:
raw_data = inp.read()
print(solver.solve_captcha(raw_data))
Sample Code-2
from PIL import Image
def p(img, letter):
A = img.load()
B = letter.load()
mx = 1000000
max_x = 0
x = 0
for x in range(img.size[0] - letter.size[0]):
_sum = 0
for i in range(letter.size[0]):
for j in range(letter.size[1]):
_sum = _sum + abs(A[x+i, j][0] - B[i, j][0])
if _sum < mx :
mx = _sum
max_x = x
return mx, max_x
def ocr(im, threshold=200, alphabet="0123456789abcdef"):
img = Image.open(im)
img = img.convert("RGB")
box = (8, 8, 58, 18)
img = img.crop(box)
pixdata = img.load()
letters = Image.open(im)
ledata = letters.load()
# Clean the background noise, if color != white, then set to black.
for y in range(img.size[1]):
for x in range(img.size[0]):
if (pixdata[x, y][0] > threshold) \
and (pixdata[x, y][1] > threshold) \
and (pixdata[x, y][2] > threshold):
pixdata[x, y] = (255, 255, 255, 255)
else:
pixdata[x, y] = (0, 0, 0, 255)
counter = 0;
old_x = -1;
letterlist = []
for x in range(letters.size[0]):
black = True
for y in range(letters.size[1]):
if ledata[x, y][0] <> 0 :
black = False
break
if black :
if True :
box = (old_x + 1, 0, x, 10)
letter = letters.crop(box)
t = p(img, letter);
print counter, x, t
letterlist.append((t[0], alphabet[counter], t[1]))
old_x = x
counter += 1
box = (old_x + 1, 0, 140, 10)
letter = letters.crop(box)
t = p(img, letter)
letterlist.append((t[0], alphabet[counter], t[1]))
t = sorted(letterlist)
t = t[0:5] # 5-letter captcha
final = sorted(t, key=lambda e: e[2])
answer = ""
for l in final:
answer = answer + l[1]
return answer
print(ocr('captcha.png'))
Has anyone had the opportunity to get/extract text from such typical captcha?
You can use machine learning (neural networks) models to solve captchas and it will almost always outperform free OCR or any other method.
Here is a good starting point: https://medium.com/#ageitgey/how-to-break-a-captcha-system-in-15-minutes-with-machine-learning-dbebb035a710

Opencv(Python) memory usage issue

I've a problem with my software in Python. It's a big while cicle where I took a intel realsense (USB camera) stream. Using opencv I make a couple of findContours and I send the results of contours to another software.
The problem is that there is a memory consuption. In fact the RAM usage increase every 2-3 seconds by 0.1%.
II don't know what to do...
This is the code (sorry if it's not beautifull but I'm testing a lot of things)
import numpy as np
import random
import socket
import cv2
import time
import math
import pickle
import httplib, urllib
from xml.etree import ElementTree as ET
import logging
logging.basicConfig(level=logging.INFO)
try:
import pyrealsense as pyrs
except:
print("No pyralsense Module installed!")
#funzione per registrare gli eventi del mouse
def drawArea(event,x,y, flag, param):
global fx,fy,ix,iy
if event == cv2.EVENT_LBUTTONDOWN:
ix,iy = x,y
elif event == cv2.EVENT_LBUTTONUP:
fx,fy = x,y
def RepresentsInt(s):
try:
int(s)
return True
except ValueError:
return False
quit = False
read = False
while read == False:
file = open('default.xml', 'r')
tree = ET.parse(file)
root = tree.getroot()
for child in root:
if child.tag == "intel":
intel = int(child[0].text)
elif child.tag == "output":
portOut = int(child[2].text)
elif child.tag =="source":
video_source = child.text
file.close()
root.clear()
ix,iy = -1,-1
fx,fy = -1,-1
timeNP = 10
last = time.time()
smoothing = 0.9
fps_smooth = 30
#video_source = video_source.split(",")
read = True
if RepresentsInt(video_source):
video_source = int(video_source)
if intel == 1:
pyrs.start()
dev = pyrs.Device(video_source)
master = 1
address = ('', 3333)
broadSockListe = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
broadSockListe.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
broadSockListe.setsockopt(socket.SOL_SOCKET, socket.SO_BROADCAST, 1)
broadSockListe.bind(('',3333))
while True:
if master == 0:
datas, address = broadSockListe.recvfrom(1024)
if str(datas) == "8000":
separator = ":"
seq = (address[0],"8081")
masterAddr = separator.join(seq)
IP = str([l for l in (
[ip for ip in socket.gethostbyname_ex(socket.gethostname())[2] if not ip.startswith("127.")][:1], [
[(s.connect(('8.8.8.8', 53)), s.getsockname()[0], s.close()) for s in
[socket.socket(socket.AF_INET, socket.SOCK_DGRAM)]][0][1]]) if l][0][0])
params = separator.join(("addUnit",IP,str(portOut),"camera","generalList.xml"))
params = urllib.urlencode({"Python":params})
headers = {}
conn = httplib.HTTPConnection(masterAddr)
conn.request("POST",masterAddr ,params, headers)
params = separator.join(("masterIP",address[0],str(portOut)+"/","default.xml"))
params = urllib.urlencode({"Python":params})
headers = {}
myip = IP + ":8081"
conn = httplib.HTTPConnection(myip)
#eseguo una post al mio server
conn.request("POST", myip, params, headers)
broadSockListe.close()
#imposto master a 1 per dire che l'ho registrato e posso partire col programma
master = 1
read = False
while read == False:
'''# leggo le varie impostazioni dal file default
file = open('default.xml','r+')
tree = ET.parse(file)
root = tree.getroot()
for child in root:
if child.tag == "modifica" and child.text == "1":
child.text = "0"
tree.write('default.xml')
root.clear()
file.close()'''
read = True
prev,prevprev,dirX,dirY = 0,0,0,0
spostamento = 15
UDP_IP = ["", ""]
UDP_PORT = ["", ""]
UDP_IP[0] = "127.0.0.1"
UDP_PORT[0] = 3030
IP_left = "127.0.0.1"
IP_right = "127.0.0.1"
sock = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
sock.bind(("",portOut))
message = ""
sep = "-"
font = cv2.FONT_HERSHEY_SIMPLEX
kernel = cv2.getStructuringElement(cv2.MORPH_CROSS, (3, 3))
#rettangoli = [x,y,width,height,angle,box, area, contours]
rettangoli = []
cnt = 0
letto = 0
while True:
now = time.time()
if letto < now - 2 or letto == 0 or now < letto:
letto = now
print(now)
read = False
while read == False:
file = open('default.xml', 'r')
tree = ET.parse(file)
root = tree.getroot()
for child in root:
if child.tag == "output":
UDP_IP[1] = child[0].text
UDP_PORT[1] = int(child[1].text)
if child.tag == "effects":
erode = int(child[0].text)
erodePos = int(child[1].text)
erode2 = int(child[2].text)
erodePos2 = int(child[3].text)
dilate1 = int(child[4].text)
dilatePos1= int(child[5].text)
dilate2 = int(child[6].text)
dilatePos2 = int(child[7].text)
blur = int(child[8].text)
blurPos = int(child[9].text)
if child.tag == "intel":
val1Min = int(child[1].text)
val1Max = int(child[2].text)
val2Min = int(child[3].text)
val2Max = int(child[4].text)
val3Min = int(child[5].text)
val3Max = int(child[6].text)
if child.tag == "modifica":
if child.text == "1":
break
#definisco dimensioni per collisioni
if child.tag == "size":
blobSize= int(child[0].text)
dimBordoBlob= int(child[1].text)
if child.tag == "visualizza":
visualizza= child.text
if child.tag == "feedback":
SFB = int(child.text)
root.clear()
file.close()
read = True
dev.wait_for_frame()
c = dev.colour
c = cv2.cvtColor(c, cv2.COLOR_RGB2BGR)
d = dev.depth * dev.depth_scale * -60
d = d[5:485, 25:635]
d = cv2.applyColorMap(d.astype(np.uint8), cv2.COLORMAP_HSV)
c = cv2.resize(c, (320 ,240), interpolation=cv2.INTER_AREA)
d = cv2.resize(d, (320,240), interpolation=cv2.INTER_AREA)
#trasformo i colori in HSV per filtrarli
frame = cv2.cvtColor(d, cv2.COLOR_BGR2HSV)
lower_red = np.array([val1Min, val2Min, val3Min])
upper_red = np.array([val1Max, val2Max, val3Max])
frame = cv2.inRange(frame, lower_red, upper_red)
dimensions = frame.shape
widthStream = dimensions[1]
heightStream = dimensions[0]
roomFrame = np.zeros(( heightStream,widthStream, 3), np.uint8)
roomFrame[:] = (0, 0, 0)
fgmask = frame
halfheight = int(heightStream / 2)
halfwidth = int(widthStream / 2)
for i in range(0, 15):
if erode >= 1 and erodePos == i:
fgmask = cv2.erode(fgmask, kernel, iterations=erode)
if dilate1 >= 1 and dilatePos1 == i:
fgmask = cv2.dilate(fgmask, kernel, iterations=dilate1)
if erode2 >= 1 and erodePos2 == i:
fgmask = cv2.erode(fgmask, kernel, iterations=erode2)
if dilate2 >= 1 and dilatePos2 == i:
fgmask = cv2.dilate(fgmask, kernel, iterations=dilate2)
if blur == 1 and blurPos == 1:
fgmask = cv2.GaussianBlur(fgmask, (5, 5), 0)
if ix > fx:
temp = fx
fx = ix
ix = temp
if iy > fy:
temp = fy
fy = iy
iy = temp
if cnt == 0:
ix,iy = 1,1
fx,fy = widthStream-1,heightStream-1
fgmask, contours, hierarchy = cv2.findContours(fgmask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
rettangoli = []
for cont in contours:
rect = cv2.minAreaRect(cont)
box = cv2.boxPoints(rect)
box = np.int0(box)
width = rect[1][0]
height = rect[1][1]
angle = rect[2]
if width > height:
angle = 180 + angle
else:
angle = 270 + angle
x, y, w, h = cv2.boundingRect(cont)
centerX = int(w / 2 + x)
centerY = int(h / 2 + y)
M = cv2.moments(cont)
area = int(M['m00'])
if area > blobSize:
if ix < centerX < fx and iy < centerY < fy:
cv2.drawContours(fgmask, [cont], 0, (100, 100, 100), dimBordoBlob)
cv2.drawContours(fgmask, [cont], 0, (255, 255, 255), -1)
rettangoli.append([centerX, centerY, w, h, angle, box, area, cont])
indice = 0
fgmask, contours, hierarchy = cv2.findContours(fgmask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_TC89_KCOS)
if intel == 1:
fgmask = cv2.cvtColor(fgmask, cv2.COLOR_GRAY2RGB)
rettangoli = []
for cont in contours:
rect = cv2.minAreaRect(cont)
box = cv2.boxPoints(rect)
box = np.int0(box)
width = rect[1][0]
height = rect[1][1]
angle = rect[2]
if width > height:
angle = 180 + angle
else:
angle = 270 + angle
x, y, w, h = cv2.boundingRect(cont)
centerX = int(w / 2 + x)
centerY = int(h / 2 + y)
M = cv2.moments(cont)
indice += 1
if M['m00'] > blobSize:
if ix < centerX < fx and iy < centerY < fy:
rettangoli.append([centerX, centerY, w, h, angle, box, int(M['m00']), cont])
cv2.drawContours(roomFrame, [cont], 0, (255, 255, 255), -1)
for rett in rettangoli:
seq = (message,np.array_str(rett[7]))
message = sep.join(seq)
temp = 0
while temp < len(UDP_IP):
sock.sendto(bytes(message), (UDP_IP[temp], UDP_PORT[temp]))
temp += 1
message = ""
if SFB == 1:
cv2.imshow("Camera Intel", roomFrame)
if cv2.waitKey(1) & 0xFF == ord('r'):
break
if cv2.waitKey(1) & 0xFF == ord('q'):
quit = True
break
name = "color.jpeg"
cv2.imwrite(name, c)
name = "bn.jpeg"
cv2.imwrite(name, roomFrame)
if intel == 0:
cap.release()
cv2.destroyAllWindows()
You are creating new objects in your while loop. Take now for example, you create a variable and then you assign a new object to it that only lives in that loop. If you declare the variables before your loop the same object will be overwritten instead of re-created.
By just declaring the variables ahead of time with name = None you will be able to make sure you reuse these variables.
I hope this works for you.

Error while testing the handwritten recognition project

This is the code for testing the handwritten character recognition in OpenCV python using KNN.
import cv2
import numpy as np
import operator
import os
MIN_CONTOUR_AREA = 100
RESIZED_IMAGE_WIDTH = 20
RESIZED_IMAGE_HEIGHT = 30
class ContourWithData():
npaContour = None
boundingRect = None
intRectX = 0
intRectY = 0
intRectWidth = 0
intRectHeight = 0
fltArea = 0.0
def calculateRectTopLeftPointAndWidthAndHeight(self):
[intX, intY, intWidth, intHeight] = self.boundingRect
self.intRectX = intX
self.intRectY = intY
self.intRectWidth = intWidth
self.intRectHeight = intHeight
def checkIfContourIsValid(self):
if self.fltArea < MIN_CONTOUR_AREA: return False
return True
def main():
allContoursWithData = []
validContoursWithData = []
try:
npaClassifications = np.loadtxt("classifications1.txt", np.float32)
except:
print "error, unable to open classifications.txt, exiting program\n"
os.system("pause")
return
# end try
try:
npaFlattenedImages = np.loadtxt("flattened_images1.txt", np.float32)
except:
print "error, unable to open flattened_images.txt, exiting program\n"
os.system("pause")
return
# end try
npaClassifications = npaClassifications.reshape((npaClassifications.size, 1))
kNearest = cv2.ml.KNearest_create()
kNearest.train(npaFlattenedImages, cv2.ml.ROW_SAMPLE, npaClassifications)
imgTestingNumbers = cv2.imread("count.png")
if imgTestingNumbers is None:
print "error: image not read from file \n\n"
os.system("pause")
return
# end if
imgGray = cv2.cvtColor(imgTestingNumbers, cv2.COLOR_BGR2GRAY)
imgBlurred = cv2.GaussianBlur(imgGray, (5,5), 0)
imgThresh = cv2.adaptiveThreshold(imgBlurred,
255,
cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
cv2.THRESH_BINARY_INV,
11,
2)
imgThreshCopy = imgThresh.copy()
imgContours, npaContours, npaHierarchy = cv2.findContours(imgThreshCopy,
cv2.RETR_EXTERNAL,
cv2.CHAIN_APPROX_SIMPLE)
for npaContour in npaContours:
contourWithData = ContourWithData()
contourWithData.npaContour = npaContour
contourWithData.boundingRect = cv2.boundingRect(contourWithData.npaContour)
contourWithData.calculateRectTopLeftPointAndWidthAndHeight()
contourWithData.fltArea = cv2.contourArea(contourWithData.npaContour)
allContoursWithData.append(contourWithData)
# end for
for contourWithData in allContoursWithData:
if contourWithData.checkIfContourIsValid():
validContoursWithData.append(contourWithData)
# end if
# end for
validContoursWithData.sort(key = operator.attrgetter("intRectX"))
strFinalString = ""
for contourWithData in validContoursWithData:
cv2.rectangle(imgTestingNumbers,
(contourWithData.intRectX, contourWithData.intRectY),
(contourWithData.intRectX + contourWithData.intRectWidth, contourWithData.intRectY + contourWithData.intRectHeight),
(0, 255, 0),
2)
imgROI = imgThresh[contourWithData.intRectY : contourWithData.intRectY + contourWithData.intRectHeight,
contourWithData.intRectX : contourWithData.intRectX + contourWithData.intRectWidth]
imgROIResized = cv2.resize(imgROI, (RESIZED_IMAGE_WIDTH, RESIZED_IMAGE_HEIGHT))
npaROIResized = imgROIResized.reshape((1, RESIZED_IMAGE_WIDTH * RESIZED_IMAGE_HEIGHT))
npaROIResized = np.float32(npaROIResized)
retval, npaResults, neigh_resp, dists = kNearest.findNearest(npaROIResized, k = 1)
strCurrentChar = str(chr(int(npaResults[0][0])))
strFinalString = strFinalString + strCurrentChar
# end for
print "\n" + strFinalString + "\n"
cv2.imshow("imgTestingNumbers", imgTestingNumbers)
cv2.waitKey(0)
cv2.destroyAllWindows()
return
if __name__ == "__main__":
main()
# end if
And, I'm getting the following error :
Traceback (most recent call last):
File "C:\Users\malmad\Desktop\cip\Testing1.py", line 141, in
main()
File "C:\Users\malmad\Desktop\cip\Testing1.py", line 60, in main
kNearest.train(npaFlattenedImages, cv2.ml.ROW_SAMPLE, npaClassifications)
error: C:\builds\master_PackSlaveAddon-win32-vc12-static\opencv\modules\ml\src\data.cpp:290: error: (-215) (layout == ROW_SAMPLE && responses.rows == nsamples) || (layout == COL_SAMPLE && responses.cols == nsamples) in function cv::ml::TrainDataImpl::setData
How should i proceed? Sorry for the bad indentation!! I'm new to stack overflow

Categories

Resources