Opencv(Python) memory usage issue - python

I've a problem with my software in Python. It's a big while cicle where I took a intel realsense (USB camera) stream. Using opencv I make a couple of findContours and I send the results of contours to another software.
The problem is that there is a memory consuption. In fact the RAM usage increase every 2-3 seconds by 0.1%.
II don't know what to do...
This is the code (sorry if it's not beautifull but I'm testing a lot of things)
import numpy as np
import random
import socket
import cv2
import time
import math
import pickle
import httplib, urllib
from xml.etree import ElementTree as ET
import logging
logging.basicConfig(level=logging.INFO)
try:
import pyrealsense as pyrs
except:
print("No pyralsense Module installed!")
#funzione per registrare gli eventi del mouse
def drawArea(event,x,y, flag, param):
global fx,fy,ix,iy
if event == cv2.EVENT_LBUTTONDOWN:
ix,iy = x,y
elif event == cv2.EVENT_LBUTTONUP:
fx,fy = x,y
def RepresentsInt(s):
try:
int(s)
return True
except ValueError:
return False
quit = False
read = False
while read == False:
file = open('default.xml', 'r')
tree = ET.parse(file)
root = tree.getroot()
for child in root:
if child.tag == "intel":
intel = int(child[0].text)
elif child.tag == "output":
portOut = int(child[2].text)
elif child.tag =="source":
video_source = child.text
file.close()
root.clear()
ix,iy = -1,-1
fx,fy = -1,-1
timeNP = 10
last = time.time()
smoothing = 0.9
fps_smooth = 30
#video_source = video_source.split(",")
read = True
if RepresentsInt(video_source):
video_source = int(video_source)
if intel == 1:
pyrs.start()
dev = pyrs.Device(video_source)
master = 1
address = ('', 3333)
broadSockListe = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
broadSockListe.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
broadSockListe.setsockopt(socket.SOL_SOCKET, socket.SO_BROADCAST, 1)
broadSockListe.bind(('',3333))
while True:
if master == 0:
datas, address = broadSockListe.recvfrom(1024)
if str(datas) == "8000":
separator = ":"
seq = (address[0],"8081")
masterAddr = separator.join(seq)
IP = str([l for l in (
[ip for ip in socket.gethostbyname_ex(socket.gethostname())[2] if not ip.startswith("127.")][:1], [
[(s.connect(('8.8.8.8', 53)), s.getsockname()[0], s.close()) for s in
[socket.socket(socket.AF_INET, socket.SOCK_DGRAM)]][0][1]]) if l][0][0])
params = separator.join(("addUnit",IP,str(portOut),"camera","generalList.xml"))
params = urllib.urlencode({"Python":params})
headers = {}
conn = httplib.HTTPConnection(masterAddr)
conn.request("POST",masterAddr ,params, headers)
params = separator.join(("masterIP",address[0],str(portOut)+"/","default.xml"))
params = urllib.urlencode({"Python":params})
headers = {}
myip = IP + ":8081"
conn = httplib.HTTPConnection(myip)
#eseguo una post al mio server
conn.request("POST", myip, params, headers)
broadSockListe.close()
#imposto master a 1 per dire che l'ho registrato e posso partire col programma
master = 1
read = False
while read == False:
'''# leggo le varie impostazioni dal file default
file = open('default.xml','r+')
tree = ET.parse(file)
root = tree.getroot()
for child in root:
if child.tag == "modifica" and child.text == "1":
child.text = "0"
tree.write('default.xml')
root.clear()
file.close()'''
read = True
prev,prevprev,dirX,dirY = 0,0,0,0
spostamento = 15
UDP_IP = ["", ""]
UDP_PORT = ["", ""]
UDP_IP[0] = "127.0.0.1"
UDP_PORT[0] = 3030
IP_left = "127.0.0.1"
IP_right = "127.0.0.1"
sock = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
sock.bind(("",portOut))
message = ""
sep = "-"
font = cv2.FONT_HERSHEY_SIMPLEX
kernel = cv2.getStructuringElement(cv2.MORPH_CROSS, (3, 3))
#rettangoli = [x,y,width,height,angle,box, area, contours]
rettangoli = []
cnt = 0
letto = 0
while True:
now = time.time()
if letto < now - 2 or letto == 0 or now < letto:
letto = now
print(now)
read = False
while read == False:
file = open('default.xml', 'r')
tree = ET.parse(file)
root = tree.getroot()
for child in root:
if child.tag == "output":
UDP_IP[1] = child[0].text
UDP_PORT[1] = int(child[1].text)
if child.tag == "effects":
erode = int(child[0].text)
erodePos = int(child[1].text)
erode2 = int(child[2].text)
erodePos2 = int(child[3].text)
dilate1 = int(child[4].text)
dilatePos1= int(child[5].text)
dilate2 = int(child[6].text)
dilatePos2 = int(child[7].text)
blur = int(child[8].text)
blurPos = int(child[9].text)
if child.tag == "intel":
val1Min = int(child[1].text)
val1Max = int(child[2].text)
val2Min = int(child[3].text)
val2Max = int(child[4].text)
val3Min = int(child[5].text)
val3Max = int(child[6].text)
if child.tag == "modifica":
if child.text == "1":
break
#definisco dimensioni per collisioni
if child.tag == "size":
blobSize= int(child[0].text)
dimBordoBlob= int(child[1].text)
if child.tag == "visualizza":
visualizza= child.text
if child.tag == "feedback":
SFB = int(child.text)
root.clear()
file.close()
read = True
dev.wait_for_frame()
c = dev.colour
c = cv2.cvtColor(c, cv2.COLOR_RGB2BGR)
d = dev.depth * dev.depth_scale * -60
d = d[5:485, 25:635]
d = cv2.applyColorMap(d.astype(np.uint8), cv2.COLORMAP_HSV)
c = cv2.resize(c, (320 ,240), interpolation=cv2.INTER_AREA)
d = cv2.resize(d, (320,240), interpolation=cv2.INTER_AREA)
#trasformo i colori in HSV per filtrarli
frame = cv2.cvtColor(d, cv2.COLOR_BGR2HSV)
lower_red = np.array([val1Min, val2Min, val3Min])
upper_red = np.array([val1Max, val2Max, val3Max])
frame = cv2.inRange(frame, lower_red, upper_red)
dimensions = frame.shape
widthStream = dimensions[1]
heightStream = dimensions[0]
roomFrame = np.zeros(( heightStream,widthStream, 3), np.uint8)
roomFrame[:] = (0, 0, 0)
fgmask = frame
halfheight = int(heightStream / 2)
halfwidth = int(widthStream / 2)
for i in range(0, 15):
if erode >= 1 and erodePos == i:
fgmask = cv2.erode(fgmask, kernel, iterations=erode)
if dilate1 >= 1 and dilatePos1 == i:
fgmask = cv2.dilate(fgmask, kernel, iterations=dilate1)
if erode2 >= 1 and erodePos2 == i:
fgmask = cv2.erode(fgmask, kernel, iterations=erode2)
if dilate2 >= 1 and dilatePos2 == i:
fgmask = cv2.dilate(fgmask, kernel, iterations=dilate2)
if blur == 1 and blurPos == 1:
fgmask = cv2.GaussianBlur(fgmask, (5, 5), 0)
if ix > fx:
temp = fx
fx = ix
ix = temp
if iy > fy:
temp = fy
fy = iy
iy = temp
if cnt == 0:
ix,iy = 1,1
fx,fy = widthStream-1,heightStream-1
fgmask, contours, hierarchy = cv2.findContours(fgmask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
rettangoli = []
for cont in contours:
rect = cv2.minAreaRect(cont)
box = cv2.boxPoints(rect)
box = np.int0(box)
width = rect[1][0]
height = rect[1][1]
angle = rect[2]
if width > height:
angle = 180 + angle
else:
angle = 270 + angle
x, y, w, h = cv2.boundingRect(cont)
centerX = int(w / 2 + x)
centerY = int(h / 2 + y)
M = cv2.moments(cont)
area = int(M['m00'])
if area > blobSize:
if ix < centerX < fx and iy < centerY < fy:
cv2.drawContours(fgmask, [cont], 0, (100, 100, 100), dimBordoBlob)
cv2.drawContours(fgmask, [cont], 0, (255, 255, 255), -1)
rettangoli.append([centerX, centerY, w, h, angle, box, area, cont])
indice = 0
fgmask, contours, hierarchy = cv2.findContours(fgmask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_TC89_KCOS)
if intel == 1:
fgmask = cv2.cvtColor(fgmask, cv2.COLOR_GRAY2RGB)
rettangoli = []
for cont in contours:
rect = cv2.minAreaRect(cont)
box = cv2.boxPoints(rect)
box = np.int0(box)
width = rect[1][0]
height = rect[1][1]
angle = rect[2]
if width > height:
angle = 180 + angle
else:
angle = 270 + angle
x, y, w, h = cv2.boundingRect(cont)
centerX = int(w / 2 + x)
centerY = int(h / 2 + y)
M = cv2.moments(cont)
indice += 1
if M['m00'] > blobSize:
if ix < centerX < fx and iy < centerY < fy:
rettangoli.append([centerX, centerY, w, h, angle, box, int(M['m00']), cont])
cv2.drawContours(roomFrame, [cont], 0, (255, 255, 255), -1)
for rett in rettangoli:
seq = (message,np.array_str(rett[7]))
message = sep.join(seq)
temp = 0
while temp < len(UDP_IP):
sock.sendto(bytes(message), (UDP_IP[temp], UDP_PORT[temp]))
temp += 1
message = ""
if SFB == 1:
cv2.imshow("Camera Intel", roomFrame)
if cv2.waitKey(1) & 0xFF == ord('r'):
break
if cv2.waitKey(1) & 0xFF == ord('q'):
quit = True
break
name = "color.jpeg"
cv2.imwrite(name, c)
name = "bn.jpeg"
cv2.imwrite(name, roomFrame)
if intel == 0:
cap.release()
cv2.destroyAllWindows()

You are creating new objects in your while loop. Take now for example, you create a variable and then you assign a new object to it that only lives in that loop. If you declare the variables before your loop the same object will be overwritten instead of re-created.
By just declaring the variables ahead of time with name = None you will be able to make sure you reuse these variables.
I hope this works for you.

Related

My Automated Python script loses so much performance over time

This is my code, i know its long and messy code, but basically. At start its pretty fast and stuff, but after while you can notice really huge performance jumps, after 6 hours, i came back. and one click of keyboard button took like 10 seconds, can anybody help me find why is it slowing so much.
If you dont understand the code because i know its very messy and no comments. Its basically while loop that runs arround pressings buttons, and when it encounters pokemon, it reads its name and if its the one i want, it notifies me. Or if it errors, it notifies me too.
If i dont want it, it kills it
while active:
gc.collect()
gc.collect()
gc.collect()
time.sleep(0.1)
def waitclick():
while True:
if keyboard.read_key() == "f6":
global active
active = False
thread = threading.Thread(target=waitclick).start()
im = numpy.asarray(pyautogui.screenshot(region=hpcheck))
# Define the blue colour we want to find - remember OpenCV uses BGR ordering
color = [175, 65, 63]
X, Y = numpy.where(numpy.all(im == color, axis=2))
x = list(X)
if lenght:
cnt = 0
if len(x) <= lenght * (3 / 4):
while len(x) <= lenght* (5 / 6):
if cnt > 20:
pyautogui.click(2119, 793)
pyautogui.keyDown('4')
time.sleep(0.01)
pyautogui.keyUp('4')
pyautogui.click(2050, 1120)
pyautogui.click(1720, 1030)
break
cnt += 1
time.sleep(1)
pyautogui.moveTo(potion[0],potion[1], random.uniform(0.5,1))
pyautogui.click(potion[0],potion[1])
time.sleep(0.01)
pyautogui.click(potion[0],potion[1])
time.sleep(0.1)
pyautogui.moveTo(mainpoke[0],mainpoke[1],random.uniform(0.5,1))
pyautogui.click(mainpoke[0],mainpoke[1])
pyautogui.click(mainpoke[0], mainpoke[1])
im = Image.fromarray(im)
im.save("health.png")
im.close()
im = numpy.asarray(pyautogui.screenshot(region=(hpcheck)))
# Define the blue colour we want to find - remember OpenCV uses BGR ordering
color = [175, 65, 63]
X, Y = numpy.where(numpy.all(im == color, axis=2))
x = list(X)
lenght = len(x)
else:
lenght = len(x)
key = keys[0] if steps % 2 == 0 else keys[1]
pyautogui.keyDown(key)
pixels = []
battle = False
timeup = False
def wait():
global stepsleft
global steps
global battle
counter = 0
for i in range(stepsleft):
if battle:
break
counter += 1
time.sleep(onetile-(random.uniform(0.001,0.002)))
stepsleft = stepsleft - counter
if stepsleft == 0:
steps += 1
stepsleft = tiles
global timeup
timeup = True
thread = threading.Thread(target=wait).start()
while True:
if timeup:
break
im = numpy.asarray(pyautogui.screenshot(region=(check)))
if list(im[0, 0]) == [230, 230, 230]:
battle = True
break
pyautogui.keyUp(key)
stepcount += 1
im = numpy.asarray(pyautogui.screenshot(region=(check)))
# im = cv2.cvtColor(im, cv2.COLOR_BGR2GRAY)
if stepcount > 15:
if fails > 100:
call = client.calls.create(twiml='<Response><Say>Ahoy, World!</Say></Response>',
to='+421949847318', from_='+18596961895')
payload = {
"content": f"Step Fail, continuing."
}
r = requests.post(f"https://discord.com/api/v9/channels/{channel_id}/messages",
data=payload,
headers=header)
gc.collect()
fails = 0
stepfails += 1
if stepfails > 10:
break
pyautogui.click(2119, 793)
pyautogui.keyDown('4')
time.sleep(0.01)
pyautogui.keyUp('4')
pyautogui.click(2050,1120)
pyautogui.click(1720, 1030)
time.sleep(1)
fails += 1
if battle == True:
breaks += 1
stepcount = 0
c = 0
e = False
while True:
if c > 600:
e = True
break
else:
im = numpy.asarray(pyautogui.screenshot(region=(namecheck)))
if list(im[0, 0]) == [254, 254, 254]:
break
c += 1
if e:
pyautogui.keyDown('4')
time.sleep(0.01)
pyautogui.keyUp('4')
pyautogui.click(2050, 1120)
time.sleep(1)
else:
encounters += 1
im = numpy.asarray(pyautogui.screenshot(region=(mon)))
im = cv2.resize(im, None, fx=1.2, fy=1.2, interpolation=cv2.INTER_CUBIC)
im = cv2.cvtColor(im, cv2.COLOR_BGR2GRAY)
kernel = numpy.ones((1, 1), numpy.uint8)
im = cv2.threshold(cv2.bilateralFilter(im, 5, 75, 75), 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1]
im = cv2.dilate(im, kernel, iterations=1)
im = cv2.erode(im, kernel, iterations=1)
im = ~im
text = pytesseract.image_to_string(im)
text = text.strip().replace("\n","")
ime = Image.fromarray(im)
ime.save(f"Wild {text}.png")
ime.close()
if text == "":
ime = Image.fromarray(im)
ime.save("fail.png")
ime.close()
im = numpy.asarray(pyautogui.screenshot(region=(mon)))
im = cv2.resize(im, None, fx=1.6, fy=1.6, interpolation=cv2.INTER_CUBIC)
im = cv2.cvtColor(im, cv2.COLOR_BGR2GRAY)
kernel = numpy.ones((1, 1), numpy.uint8)
im = cv2.threshold(cv2.bilateralFilter(im, 5, 75, 75), 0, 255,
cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1]
im = cv2.dilate(im, kernel, iterations=1)
im = cv2.erode(im, kernel, iterations=1)
im = ~im
text = pytesseract.image_to_string(im).strip()
print(">" + text + "<")
if text == "":
ime = Image.fromarray(im)
ime.save("fail2.png")
ime.close()
im = numpy.asarray(pyautogui.screenshot(region=mon))
print(">" + text + "<")
im = cv2.resize(im, None, fx=2, fy=2, interpolation=cv2.INTER_CUBIC)
im = cv2.cvtColor(im, cv2.COLOR_BGR2GRAY)
kernel = numpy.ones((1, 1), numpy.uint8)
im = cv2.threshold(cv2.bilateralFilter(im, 5, 75, 75), 0, 255,
cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1]
im = cv2.dilate(im, kernel, iterations=1)
im = cv2.erode(im, kernel, iterations=1)
im = ~im
text = pytesseract.image_to_string(im).strip()
if text == "":
im = numpy.asarray(pyautogui.screenshot(region=check))
if list(im[0, 0]) != [230, 230, 230]:
pass
else:
Image.fromarray(im)
ime.save("fail3.png")
ime.close()
print(">" + text + "<")
payload = {
"content": f"Go look at this bro, i failed again."
}
r = requests.post(f"https://discord.com/api/v9/channels/{channel_id}/messages", data=payload,
headers=header)
active = False
call = client.calls.create(twiml='<Response><Say>Ahoy, World!</Say></Response>',
to='+421949847318', from_='+18596961895')
gc.collect()
break
if text.replace("[S]","").replace("[E]","").replace("\n","") not in pokemonlist:
ime = Image.fromarray(im)
ime.save("fail.png")
ime.close()
im = numpy.asarray(pyautogui.screenshot(region=mon))
im = cv2.resize(im, None, fx=1.6, fy=1.6, interpolation=cv2.INTER_CUBIC)
im = cv2.cvtColor(im, cv2.COLOR_BGR2GRAY)
kernel = numpy.ones((1, 1), numpy.uint8)
im = cv2.threshold(cv2.bilateralFilter(im, 5, 75, 75), 0, 255,
cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1]
im = cv2.dilate(im, kernel, iterations=1)
im = cv2.erode(im, kernel, iterations=1)
im = ~im
text = pytesseract.image_to_string(im).strip().replace("\n","")
print(">" + text + "<")
if text.replace("[S]","").replace("\n","").replace("[E]","") not in pokemonlist:
ime = Image.fromarray(im)
ime.save("fail2.png")
ime.close()
im = numpy.asarray(pyautogui.screenshot(region=mon))
im = cv2.resize(im, None, fx=2, fy=2, interpolation=cv2.INTER_CUBIC)
im = cv2.cvtColor(im, cv2.COLOR_BGR2GRAY)
kernel = numpy.ones((1, 1), numpy.uint8)
im = cv2.threshold(cv2.bilateralFilter(im, 5, 75, 75), 0, 255,
cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1]
im = cv2.dilate(im, kernel, iterations=1)
im = cv2.erode(im, kernel, iterations=1)
im = ~im
text = pytesseract.image_to_string(im).strip()
print(">" + text + "<")
if text.replace("[S]","").replace("\n","").replace("[E]","") not in pokemonlist:
ime = Image.fromarray(im)
ime.save("fail3.png")
ime.close()
print(">" + text + "<")
payload = {
"content": f"Go look at this bro, i failed again."
}
r = requests.post(f"https://discord.com/api/v9/channels/{channel_id}/messages", data=payload,
headers=header)
active = False
call = client.calls.create(twiml='<Response><Say>Ahoy, World!</Say></Response>',
to='+421949847318', from_='+18596961895')
gc.collect()
break
encounter_time = datetime.datetime.utcnow()
times.append(encounter_time)
if not ram or encounters < 5:
ram = process.memory_info().rss
if encounters < 15:
recentspeed = "None yet"
else:
recentspeed = str(round(
(15 / (encounter_time - times[encounters - 15]).total_seconds()) * 60, 2)) + "/min"
print(
f"Pokemon: {text} #{encounters}\nSpeed: {round((encounters / (encounter_time - start).total_seconds()) * 60, 2)}/min \nRecent Speed: {recentspeed}\nRam usage is {process.memory_info().rss} Starting usage was {process.memory_info().rss-ram} Garbage is {gc.get_count()}")
payload = {
"content": f"{'-'*20}\nPokemon: {text} #{encounters}\nSpeed: {round((encounters / (encounter_time - start).total_seconds()) * 60, 2)}/min \nRecent Speed: {recentspeed}\n Ram Change: `{(process.memory_info().rss-ram)//1000000}MB`\nRam usage: `{process.memory_info().rss//1000000}MB`\nStarting usage: `{ram//1000000}MB`\nGarbage: {gc.get_count()}\nCPU: {psutil.cpu_percent()}"
}
r = requests.post(f"https://discord.com/api/v9/channels/{logs_id}/messages", data=payload,
headers=header)
gc.collect()
#json
with open("encounters.json", "r") as a_file:
json_object = json.load(a_file)
a_file.close()
try:
json_object[text] += 1
except:
json_object[text] = 1
with open("encounters.json", "w") as a_file:
json.dump(json_object, a_file)
a_file.close()
c = 0
e = False
while True:
if c > 500:
pyautogui.click(2119, 793)
pyautogui.keyDown('4')
time.sleep(0.01)
pyautogui.keyUp('4')
pyautogui.click(2050, 1120)
pyautogui.click(1720, 1030)
time.sleep(1)
e = True
break
im = numpy.asarray(pyautogui.screenshot(region=battlecheck))
if list(im[0, 0]) == [240, 211, 92]:
break
c += 1
if e == True:
continue
if (text.startswith("[S]") if shinies else text.replace("[E]","") in shinywanted) or text.replace("[S]","").replace("[E]","") in wanted:
time.sleep(2)
end = datetime.datetime.utcnow()
done = end - start
timed = humanize.naturaldelta(done)
print("Done! in " + timed)
im = numpy.asarray(pyautogui.screenshot(region=win))
ime = Image.fromarray(im)
ime.save(f"{text}.png")
ime.close()
# File
files = {
"file": (f"{text}.png", open(f"{text}.png", 'rb'))
# The picture that we want to send in binary
}
# Optional message to send with the picture
payload = {
"content": f"Found {text} in {timed} of searching. It took {encounters} encounters"
}
active = False
r = requests.post(f"https://discord.com/api/v9/channels/{channel_id}/messages", data=payload,
headers=header, files=files)
gc.collect()
call = client.calls.create(twiml='<Response><Say>Ahoy, World!</Say></Response>',to='+421949847318',from_='+18596961895')
time.sleep(25)
if remote:
pyautogui.click(3804, 15)
break
playsound('ringtone.mp3')
elif text.startswith("[E]") or text.replace("[S]","").replace("[E]","") in avoid:
run()
c = 0
while True:
if c > 300:
pyautogui.click(2119, 793)
pyautogui.click(1720, 1030)
pyautogui.keyDown('4')
time.sleep(0.01)
pyautogui.keyUp('4')
pyautogui.click(2050, 1120)
time.sleep(1)
break
else:
im = numpy.asarray(pyautogui.screenshot(region=check))
if list(im[0, 0]) != [230, 230, 230]:
break
c += 1
else:
#time.sleep(random.randint(50, 70) / 100)
kill(text.replace("[S]","").replace("[E]",""))
c = 0
time.sleep(1)
while True:
if c > 750:
pyautogui.click(2119, 793)
pyautogui.click(1720, 1030)
pyautogui.keyDown('4')
time.sleep(0.01)
pyautogui.keyUp('4')
pyautogui.click(2050, 1120)
time.sleep(1)
break
else:
im = numpy.asarray(pyautogui.screenshot(region=check))
if list(im[0, 0]) != [230, 230, 230]:
break
c += 1
The code is barely legible, split it in functions or classes, and avoid global variables. Chances are you are overflowing the RAM of your computer or something like that, if you are appending information to a variable, remember deleting it after if it grows to much or storing it in a database-like system.
Something similar (or worst) applies to threads if you create too many without deleting them at some point.

how to resolve this error which generates in matching two images in image stitching code

while doing image stitching code in google colab I got an error can anyone help how to resolve it?
ERROR: File "<ipython-input-12-7c271414318b>", line 58
def filter_matches(self, matches, ratio = 0.75):
^
IndentationError: unindent does not match any outer indentation level
CODE:
import os
import sys
import cv2
import math
import numpy as np
import utils
from numpy import linalg
class AlignImagesRansac(object):
def _init_(self, image_dir, key_frame, output_dir, img_filter=None):
'''
image_dir: 'C:\Users\Hamza Ahmed\Desktop\auto' containing all images
key_frame: 'C:\Users\Hamza Ahmed\Desktop\auto\c.jpg' of the base image
output_dir: 'C:\Users\Hamza Ahmed\Desktop\auto' where to save output images
optional:
img_filter = 'JPG'; None->Take all images
'''
self.key_frame_file = os.path.split(key_frame)[-1]
self.output_dir = output_dir
# Open the directory given in the arguments
self.dir_list = []
try:
self.dir_list = os.listdir(image_dir)
if img_filter:
# remove all files that doen't end with .[image_filter]
self.dir_list = filter(lambda x: x.find(img_filter) > -1, self.dir_list)
try: #remove Thumbs.db, is existent (windows only)
self.dir_list.remove('Thumbs.db')
except ValueError:
pass
except:
print >> sys.stderr, ("Unable to open directory: %s" % image_dir)
sys.exit(-1)
self.dir_list = map(lambda x: os.path.join(image_dir, x), self.dir_list)
self.dir_list = filter(lambda x: x != key_frame, self.dir_list)
base_img_rgb = cv2.imread(key_frame)
if base_img_rgb == None:
raise IOError("%s doesn't exist" %key_frame)
# utils.showImage(base_img_rgb, scale=(0.2, 0.2), timeout=0)
# cv2.destroyAllWindows()
final_img = self.stitchImages(base_img_rgb, 0)
def filter_matches(self, matches, ratio = 0.75):
filtered_matches = []
for m in matches:
if len(m) == 2 and m[0].distance < m[1].distance * ratio:
filtered_matches.append(m[0])
return filtered_matches
def imageDistance(self, matches):
sumDistance = 0.0
for match in matches:
sumDistance += match.distance
return sumDistance
def findDimensions(self, image, homography):
base_p1 = np.ones(3, np.float32)
base_p2 = np.ones(3, np.float32)
base_p3 = np.ones(3, np.float32)
base_p4 = np.ones(3, np.float32)
(y, x) = image.shape[:2]
base_p1[:2] = [0,0]
base_p2[:2] = [x,0]
base_p3[:2] = [0,y]
base_p4[:2] = [x,y]
max_x = None
max_y = None
min_x = None
min_y = None
for pt in [base_p1, base_p2, base_p3, base_p4]:
hp = np.matrix(homography, np.float32) * np.matrix(pt, np.float32).T
hp_arr = np.array(hp, np.float32)
normal_pt = np.array([hp_arr[0]/hp_arr[2], hp_arr[1]/hp_arr[2]], np.float32)
if ( max_x == None or normal_pt[0,0] > max_x ):
max_x = normal_pt[0,0]
if ( max_y == None or normal_pt[1,0] > max_y ):
max_y = normal_pt[1,0]
if ( min_x == None or normal_pt[0,0] < min_x ):
min_x = normal_pt[0,0]
if ( min_y == None or normal_pt[1,0] < min_y ):
min_y = normal_pt[1,0]
min_x = min(0, min_x)
min_y = min(0, min_y)
return (min_x, min_y, max_x, max_y)
def stitchImages(self, base_img_rgb, round=0):
if ( len(self.dir_list) < 1 ):
return base_img_rgb
# print base_img_rgb.channels()
# if(image.channels()==1)
# { /* Grayscale */ }
# else if (image.channels==4)
# { /* ARGB or RGBA image */
base_img = cv2.GaussianBlur(cv2.cvtColor(base_img_rgb,cv2.COLOR_BGR2GRAY), (5,5), 0)
# Use the SIFT feature detector
detector = cv2.SIFT()
# Find key points in base image for motion estimation
base_features, base_descs = detector.detectAndCompute(base_img, None)
# Create new key point list
# key_points = []
# for kp in base_features:
# key_points.append((int(kp.pt[0]),int(kp.pt[1])))
# utils.showImage(base_img, key_points, scale=(0.2, 0.2), timeout=0)
# cv2.destroyAllWindows()
# Parameters for nearest-neighbor matching
FLANN_INDEX_KDTREE = 1 # bug: flann enums are missing
flann_params = dict(algorithm = FLANN_INDEX_KDTREE,
trees = 5)
matcher = cv2.FlannBasedMatcher(flann_params, {})
print ("Iterating through next images...")
closestImage = None
# TODO: Thread this loop since each iteration is independent
# Find the best next image from the remaining images
for next_img_path in self.dir_list:
print ("Reading %s..." % next_img_path)
if ( self.key_frame_file in next_img_path ):
print ("\t Skipping %s..." % self.key_frame_file)
continue
# Read in the next image...
next_img_rgb = cv2.imread(next_img_path)
next_img = cv2.GaussianBlur(cv2.cvtColor(next_img_rgb,cv2.COLOR_BGR2GRAY), (5,5), 0)
# if ( next_img.shape != base_img.shape ):
# print "\t Skipping %s, bad shape: %s" % (next_img_path, next_img.shape)
# continue
print ("\t Finding points...")
# Find points in the next frame
next_features, next_descs = detector.detectAndCompute(next_img, None)
matches = matcher.knnMatch(next_descs, trainDescriptors=base_descs, k=2)
print ("\t Match Count: ", len(matches))
matches_subset = self.filter_matches(matches)
print ("\t Filtered Match Count: ", len(matches_subset))
distance = self.imageDistance(matches_subset)
print ("\t Distance from Key Image: ", distance)
averagePointDistance = distance/float(len(matches_subset))
print ("\t Average Distance: ", averagePointDistance)
kp1 = []
kp2 = []
for match in matches_subset:
kp1.append(base_features[match.trainIdx])
kp2.append(next_features[match.queryIdx])
p1 = np.array([k.pt for k in kp1])
p2 = np.array([k.pt for k in kp2])
H, status = cv2.findHomography(p1, p2, cv2.RANSAC, 5.0)
print ('%d / %d inliers/matched' % (np.sum(status), len(status)))
inlierRatio = float(np.sum(status)) / float(len(status))
# if ( closestImage == None or averagePointDistance < closestImage['dist'] ):
if ( closestImage == None or inlierRatio > closestImage['inliers'] ):
closestImage = {}
closestImage['h'] = H
closestImage['inliers'] = inlierRatio
closestImage['dist'] = averagePointDistance
closestImage['path'] = next_img_path
closestImage['rgb'] = next_img_rgb
closestImage['img'] = next_img
closestImage['feat'] = next_features
closestImage['desc'] = next_descs
closestImage['match'] = matches_subset
print ("Closest Image: ", closestImage['path'])
print ("Closest Image Ratio: ", closestImage['inliers'])
self.dir_list = filter(lambda x: x != closestImage['path'], self.dir_list)
# utils.showImage(closestImage['img'], scale=(0.2, 0.2), timeout=0)
# cv2.destroyAllWindows()
H = closestImage['h']
H = H / H[2,2]
H_inv = linalg.inv(H)
if ( closestImage['inliers'] > 0.1 ): # and
(min_x, min_y, max_x, max_y) = self.findDimensions(closestImage['img'], H_inv)
# Adjust max_x and max_y by base img size
max_x = max(max_x, base_img.shape[1])
max_y = max(max_y, base_img.shape[0])
move_h = np.matrix(np.identity(3), np.float32)
if ( min_x < 0 ):
move_h[0,2] += -min_x
max_x += -min_x
if ( min_y < 0 ):
move_h[1,2] += -min_y
max_y += -min_y
print ("Homography: \n", H)
print ("Inverse Homography: \n", H_inv)
print ("Min Points: ", (min_x, min_y))
mod_inv_h = move_h * H_inv
img_w = int(math.ceil(max_x))
img_h = int(math.ceil(max_y))
print ("New Dimensions: ", (img_w, img_h))
# Warp the new image given the homography from the old image
base_img_warp = cv2.warpPerspective(base_img_rgb, move_h, (img_w, img_h))
print ("Warped base image")
# utils.showImage(base_img_warp, scale=(0.2, 0.2), timeout=5000)
# cv2.destroyAllWindows()
next_img_warp = cv2.warpPerspective(closestImage['rgb'], mod_inv_h, (img_w, img_h))
print ("Warped next image")
# utils.showImage(next_img_warp, scale=(0.2, 0.2), timeout=5000)
# cv2.destroyAllWindows()
# Put the base image on an enlarged palette
enlarged_base_img = np.zeros((img_h, img_w, 3), np.uint8)
print ("Enlarged Image Shape: ", enlarged_base_img.shape)
print ("Base Image Shape: ", base_img_rgb.shape)
print ("Base Image Warp Shape: ", base_img_warp.shape)
# enlarged_base_img[y:y+base_img_rgb.shape[0],x:x+base_img_rgb.shape[1]] = base_img_rgb
# enlarged_base_img[:base_img_warp.shape[0],:base_img_warp.shape[1]] = base_img_warp
# Create a mask from the warped image for constructing masked composite
(ret,data_map) = cv2.threshold(cv2.cvtColor(next_img_warp, cv2.COLOR_BGR2GRAY),
0, 255, cv2.THRESH_BINARY)
enlarged_base_img = cv2.add(enlarged_base_img, base_img_warp,
mask=np.bitwise_not(data_map),
dtype=cv2.CV_8U)
# Now add the warped image
final_img = cv2.add(enlarged_base_img, next_img_warp,
dtype=cv2.CV_8U)
# utils.showImage(final_img, scale=(0.2, 0.2), timeout=0)
# cv2.destroyAllWindows()
# Crop off the black edges
final_gray = cv2.cvtColor(final_img, cv2.COLOR_BGR2GRAY)
_, thresh = cv2.threshold(final_gray, 1, 255, cv2.THRESH_BINARY)
contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)
print ("Found %d contours..." % (len(contours)))
max_area = 0
best_rect = (0,0,0,0)
for cnt in contours:
x,y,w,h = cv2.boundingRect(cnt)
# print "Bounding Rectangle: ", (x,y,w,h)
deltaHeight = h-y
deltaWidth = w-x
area = deltaHeight * deltaWidth
if ( area > max_area and deltaHeight > 0 and deltaWidth > 0):
max_area = area
best_rect = (x,y,w,h)
if ( max_area > 0 ):
print ("Maximum Contour: ", max_area)
print ("Best Rectangle: ", best_rect)
final_img_crop = final_img[best_rect[1]:best_rect[1]+best_rect[3],
best_rect[0]:best_rect[0]+best_rect[2]]
# utils.showImage(final_img_crop, scale=(0.2, 0.2), timeout=0)
# cv2.destroyAllWindows()
final_img = final_img_crop
# Write out the current round
final_filename = "%s/%d.JPG" % (self.output_dir, round)
cv2.imwrite(final_filename, final_img)
return self.stitchImages(final_img, round+1)
else:
return self.stitchImages(base_img_rgb, round+1)
# ----------------------------------------------------------------------------
#if _name_ == '_main_':
# if ( len(args) < 4 ):
# print >> sys.stderr, ("Usage: %s <image_dir> <key_frame> <output>" % args[0])
# sys.exit(-1)
#AlignImagesRansac(sys.args[1:])

How to put RTSP video input in OpenCV

I'm setting up a PPE Detection module using OpenVINO in my Ubuntu 18.04. Although the video input worked well with my webcam dev/video/0 but I wish it can be change to RTSP input. Whenever I put my RTSP Url inside the config.json it doesnt work and show me Either wrong input path or empty line is found. Please check the conf.json file.
Here is the main.py
#!/usr/bin/env python3
from __future__ import print_function
import sys
import os
import cv2
import numpy as np
from argparse import ArgumentParser
import datetime
import json
from inference import Network
# Global vars
cpu_extension = ''
conf_modelLayers = ''
conf_modelWeights = ''
conf_safety_modelLayers = ''
conf_safety_modelWeights = ''
targetDevice = "CPU"
conf_batchSize = 1
conf_modelPersonLabel = 1
conf_inferConfidenceThreshold = 0.7
conf_inFrameViolationsThreshold = 19
conf_inFramePeopleThreshold = 5
use_safety_model = False
padding = 30
viol_wk = 0
acceptedDevices = ['CPU', 'GPU', 'MYRIAD', 'HETERO:FPGA,CPU', 'HDDL']
videos = []
name_of_videos = []
CONFIG_FILE = '../resources/config.json'
is_async_mode = True
class Video:
def __init__(self, idx, path):
if path.isnumeric():
self.video = cv2.VideoCapture(int(path))
self.name = "Cam " + str(idx)
else:
if os.path.exists(path):
self.video = cv2.VideoCapture("rtsp://edwin:Passw0rd#192.168.0.144:554/cam/realmonitor?channel=1&subtype=1")
self.name = "Video " + str(idx)
else:
print("Either wrong input path or empty line is found. Please check the conf.json file")
exit(21)
if not self.video.isOpened():
print("Couldn't open video: " + path)
sys.exit(20)
self.height = int(self.video.get(cv2.CAP_PROP_FRAME_HEIGHT))
self.width = int(self.video.get(cv2.CAP_PROP_FRAME_WIDTH))
self.currentViolationCount = 0
self.currentViolationCountConfidence = 0
self.prevViolationCount = 0
self.totalViolations = 0
self.totalPeopleCount = 0
self.currentPeopleCount = 0
self.currentPeopleCountConfidence = 0
self.prevPeopleCount = 0
self.currentTotalPeopleCount = 0
cv2.namedWindow(self.name, cv2.WINDOW_NORMAL)
self.frame_start_time = datetime.datetime.now()
def get_args():
"""
Parses the argument.
:return: None
"""
global is_async_mode
parser = ArgumentParser()
parser.add_argument("-d", "--device",
help="Specify the target device to infer on; CPU, GPU,"
"FPGA, MYRIAD or HDDL is acceptable. Application will"
"look for a suitable plugin for device specified"
" (CPU by default)",
type=str, required=False)
parser.add_argument("-m", "--model",
help="Path to an .xml file with a trained model's"
" weights.",
required=True, type=str)
parser.add_argument("-sm", "--safety_model",
help="Path to an .xml file with a trained model's"
" weights.",
required=False, type=str, default=None)
parser.add_argument("-e", "--cpu_extension",
help="MKLDNN (CPU)-targeted custom layers. Absolute "
"path to a shared library with the kernels impl",
type=str, default=None)
parser.add_argument("-f", "--flag", help="sync or async", default="async", type=str)
args = parser.parse_args()
global conf_modelLayers, conf_modelWeights, conf_safety_modelLayers, conf_safety_modelWeights, \
targetDevice, cpu_extension, videos, use_safety_model
if args.model:
conf_modelLayers = args.model
conf_modelWeights = os.path.splitext(conf_modelLayers)[0] + ".bin"
if args.safety_model:
conf_safety_modelLayers = args.safety_model
conf_safety_modelWeights = os.path.splitext(conf_safety_modelLayers)[0] + ".bin"
use_safety_model = True
if args.device:
targetDevice = args.device
if "MULTI:" not in targetDevice:
if targetDevice not in acceptedDevices:
print("Selected device, %s not supported." % (targetDevice))
sys.exit(12)
if args.cpu_extension:
cpu_extension = args.cpu_extension
if args.flag == "async":
is_async_mode = True
print('Application running in Async mode')
else:
is_async_mode = False
print('Application running in Sync mode')
assert os.path.isfile(CONFIG_FILE), "{} file doesn't exist".format(CONFIG_FILE)
config = json.loads(open(CONFIG_FILE).read())
for idx, item in enumerate(config['inputs']):
vid = Video(idx, item['video'])
name_of_videos.append([idx, item['video']])
videos.append([idx, vid])
def detect_safety_hat(img):
"""
Detection of the hat of the person.
:param img: Current frame
:return: Boolean value of the detected hat
"""
lowH = 15
lowS = 65
lowV = 75
highH = 30
highS = 255
highV = 255
crop = 0
height = 15
perc = 8
hsv = np.zeros(1)
try:
hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
except cv2.error as e:
print("%d %d %d" % (img.shape))
print("%d %d %d" % (img.shape))
print(e)
threshold_img = cv2.inRange(hsv, (lowH, lowS, lowV), (highH, highS, highV))
x = 0
y = int(threshold_img.shape[0] * crop / 100)
w = int(threshold_img.shape[1])
h = int(threshold_img.shape[0] * height / 100)
img_cropped = threshold_img[y: y + h, x: x + w]
if cv2.countNonZero(threshold_img) < img_cropped.size * perc / 100:
return False
return True
def detect_safety_jacket(img):
"""
Detection of the safety jacket of the person.
:param img: Current frame
:return: Boolean value of the detected jacket
"""
lowH = 0
lowS = 150
lowV = 42
highH = 11
highS = 255
highV = 255
crop = 15
height = 40
perc = 23
hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
threshold_img = cv2.inRange(hsv, (lowH, lowS, lowV), (highH, highS, highV))
x = 0
y = int(threshold_img.shape[0] * crop / 100)
w = int(threshold_img.shape[1])
h = int(threshold_img.shape[0] * height / 100)
img_cropped = threshold_img[y: y + h, x: x + w]
if cv2.countNonZero(threshold_img) < img_cropped.size * perc / 100:
return False
return True
def detect_workers(workers, frame):
"""
Detection of the person with the safety guards.
:param workers: Total number of the person in the current frame
:param frame: Current frame
:return: Total violation count of the person
"""
violations = 0
global viol_wk
for worker in workers:
xmin, ymin, xmax, ymax = worker
crop = frame[ymin:ymax, xmin:xmax]
if 0 not in crop.shape:
if detect_safety_hat(crop):
if detect_safety_jacket(crop):
cv2.rectangle(frame, (xmin, ymin), (xmax, ymax),
(0, 255, 0), 2)
else:
cv2.rectangle(frame, (xmin, ymin), (xmax, ymax),
(0, 0, 255), 2)
violations += 1
viol_wk += 1
else:
cv2.rectangle(frame, (xmin, ymin), (xmax, ymax), (0, 0, 255), 2)
violations += 1
viol_wk += 1
return violations
def main():
"""
Load the network and parse the output.
:return: None
"""
get_args()
global is_async_mode
nextReq = 1
currReq = 0
nextReq_s = 1
currReq_s = 0
prevVideo = None
vid_finished = [False] * len(videos)
min_FPS = min([videos[i][1].video.get(cv2.CAP_PROP_FPS) for i in range(len(videos))])
# Initialise the class
infer_network = Network()
infer_network_safety = Network()
# Load the network to IE plugin to get shape of input layer
plugin, (batch_size, channels, model_height, model_width) = \
infer_network.load_model(conf_modelLayers, targetDevice, 1, 1, 2, cpu_extension)
if use_safety_model:
batch_size_sm, channels_sm, model_height_sm, model_width_sm = \
infer_network_safety.load_model(conf_safety_modelLayers, targetDevice, 1, 1, 2, cpu_extension, plugin)[1]
while True:
for index, currVideo in videos:
# Read image from video/cam
vfps = int(round(currVideo.video.get(cv2.CAP_PROP_FPS)))
for i in range(0, int(round(vfps / min_FPS))):
ret, current_img = currVideo.video.read()
if not ret:
vid_finished[index] = True
break
if vid_finished[index]:
stream_end_frame = np.zeros((int(currVideo.height), int(currVideo.width), 1),
dtype='uint8')
cv2.putText(stream_end_frame, "Input file {} has ended".format
(name_of_videos[index][1].split('/')[-1]),
(10, int(currVideo.height / 2)),
cv2.FONT_HERSHEY_COMPLEX, 1, (255, 255, 255), 2)
cv2.imshow(currVideo.name, stream_end_frame)
continue
# Transform image to person detection model input
rsImg = cv2.resize(current_img, (model_width, model_height))
rsImg = rsImg.transpose((2, 0, 1))
rsImg = rsImg.reshape((batch_size, channels, model_height, model_width))
infer_start_time = datetime.datetime.now()
# Infer current image
if is_async_mode:
infer_network.exec_net(nextReq, rsImg)
else:
infer_network.exec_net(currReq, rsImg)
prevVideo = currVideo
previous_img = current_img
# Wait for previous request to end
if infer_network.wait(currReq) == 0:
infer_end_time = (datetime.datetime.now() - infer_start_time) * 1000
in_frame_workers = []
people = 0
violations = 0
hard_hat_detection = False
vest_detection = False
result = infer_network.get_output(currReq)
# Filter output
for obj in result[0][0]:
if obj[2] > conf_inferConfidenceThreshold:
xmin = int(obj[3] * prevVideo.width)
ymin = int(obj[4] * prevVideo.height)
xmax = int(obj[5] * prevVideo.width)
ymax = int(obj[6] * prevVideo.height)
xmin = int(xmin - padding) if (xmin - padding) > 0 else 0
ymin = int(ymin - padding) if (ymin - padding) > 0 else 0
xmax = int(xmax + padding) if (xmax + padding) < prevVideo.width else prevVideo.width
ymax = int(ymax + padding) if (ymax + padding) < prevVideo.height else prevVideo.height
cv2.rectangle(previous_img, (xmin, ymin), (xmax, ymax), (0, 255, 0), 2)
people += 1
in_frame_workers.append((xmin, ymin, xmax, ymax))
new_frame = previous_img[ymin:ymax, xmin:xmax]
if use_safety_model:
# Transform image to safety model input
in_frame_sm = cv2.resize(new_frame, (model_width_sm, model_height_sm))
in_frame_sm = in_frame_sm.transpose((2, 0, 1))
in_frame_sm = in_frame_sm.reshape(
(batch_size_sm, channels_sm, model_height_sm, model_width_sm))
infer_start_time_sm = datetime.datetime.now()
if is_async_mode:
infer_network_safety.exec_net(nextReq_s, in_frame_sm)
else:
infer_network_safety.exec_net(currReq_s, in_frame_sm)
# Wait for the result
infer_network_safety.wait(currReq_s)
infer_end_time_sm = (datetime.datetime.now() - infer_start_time_sm) * 1000
result_sm = infer_network_safety.get_output(currReq_s)
# Filter output
hard_hat_detection = False
vest_detection = False
detection_list = []
for obj_sm in result_sm[0][0]:
if (obj_sm[2] > 0.4):
# Detect safety vest
if (int(obj_sm[1])) == 2:
xmin_sm = int(obj_sm[3] * (xmax - xmin))
ymin_sm = int(obj_sm[4] * (ymax - ymin))
xmax_sm = int(obj_sm[5] * (xmax - xmin))
ymax_sm = int(obj_sm[6] * (ymax - ymin))
if vest_detection == False:
detection_list.append(
[xmin_sm + xmin, ymin_sm + ymin, xmax_sm + xmin, ymax_sm + ymin])
vest_detection = True
# Detect hard-hat
if int(obj_sm[1]) == 4:
xmin_sm_v = int(obj_sm[3] * (xmax - xmin))
ymin_sm_v = int(obj_sm[4] * (ymax - ymin))
xmax_sm_v = int(obj_sm[5] * (xmax - xmin))
ymax_sm_v = int(obj_sm[6] * (ymax - ymin))
if hard_hat_detection == False:
detection_list.append([xmin_sm_v + xmin, ymin_sm_v + ymin, xmax_sm_v + xmin,
ymax_sm_v + ymin])
hard_hat_detection = True
if hard_hat_detection is False or vest_detection is False:
violations += 1
for _rect in detection_list:
cv2.rectangle(current_img, (_rect[0], _rect[1]), (_rect[2], _rect[3]), (0, 255, 0), 2)
if is_async_mode:
currReq_s, nextReq_s = nextReq_s, currReq_s
# Use OpenCV if worker-safety-model is not provided
else:
violations = detect_workers(in_frame_workers, previous_img)
# Check if detected violations equals previous frames
if violations == prevVideo.currentViolationCount:
prevVideo.currentViolationCountConfidence += 1
# If frame threshold is reached, change validated count
if prevVideo.currentViolationCountConfidence == conf_inFrameViolationsThreshold:
# If another violation occurred, save image
if prevVideo.currentViolationCount > prevVideo.prevViolationCount:
prevVideo.totalViolations += (
prevVideo.currentViolationCount - prevVideo.prevViolationCount)
prevVideo.prevViolationCount = prevVideo.currentViolationCount
else:
prevVideo.currentViolationCountConfidence = 0
prevVideo.currentViolationCount = violations
# Check if detected people count equals previous frames
if people == prevVideo.currentPeopleCount:
prevVideo.currentPeopleCountConfidence += 1
# If frame threshold is reached, change validated count
if prevVideo.currentPeopleCountConfidence == conf_inFrameViolationsThreshold:
prevVideo.currentTotalPeopleCount += (
prevVideo.currentPeopleCount - prevVideo.prevPeopleCount)
if prevVideo.currentTotalPeopleCount > prevVideo.prevPeopleCount:
prevVideo.totalPeopleCount += prevVideo.currentTotalPeopleCount - prevVideo.prevPeopleCount
prevVideo.prevPeopleCount = prevVideo.currentPeopleCount
else:
prevVideo.currentPeopleCountConfidence = 0
prevVideo.currentPeopleCount = people
frame_end_time = datetime.datetime.now()
cv2.putText(previous_img, 'Total people count: ' + str(
prevVideo.totalPeopleCount), (10, prevVideo.height - 10),
cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2)
cv2.putText(previous_img, 'Current people count: ' + str(
prevVideo.currentTotalPeopleCount),
(10, prevVideo.height - 40),
cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2)
cv2.putText(previous_img, 'Total violation count: ' + str(
prevVideo.totalViolations), (10, prevVideo.height - 70),
cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2)
cv2.putText(previous_img, 'FPS: %0.2fs' % (1 / (
frame_end_time - prevVideo.frame_start_time).total_seconds()),
(10, prevVideo.height - 100),
cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2)
cv2.putText(previous_img, "Inference time: N\A for async mode" if is_async_mode else \
"Inference time: {:.3f} ms".format((infer_end_time).total_seconds()),
(10, prevVideo.height - 130),
cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2)
cv2.imshow(prevVideo.name, previous_img)
prevVideo.frame_start_time = datetime.datetime.now()
# Swap
if is_async_mode:
currReq, nextReq = nextReq, currReq
previous_img = current_img
prevVideo = currVideo
if cv2.waitKey(1) == 27:
print("Attempting to stop input files")
infer_network.clean()
infer_network_safety.clean()
cv2.destroyAllWindows()
return
if False not in vid_finished:
infer_network.clean()
infer_network_safety.clean()
cv2.destroyAllWindows()
break
if __name__ == '__main__':
main()
Here is the config file
{
"inputs": [
{
"video": "rtsp://xxx:xxx#192.168.0.144:554/cam/realmonitor?channel=1&subtype=1"
}
]
}
This is because of the line if os.path.exists(path):. This if condition checks if path points towards an existing file. Your RTSP stream not being a file, it leads to your error.
For example, you can modify this condition to:
if os.path.exists(path) or path.startswith("rtsp"):
By the way, your hard-coded the rtsp stream address within the code, so it will not use your configured path. You may want to replace the hard-coded path with path.

how to remove the small blocks around the max length block?

In the above image, how to get a clean line block, remove the block upper and down around the middle long strip broadband? I have tried projection, but failed.
def hProject(binary):
h, w = binary.shape
hprojection = np.zeros(binary.shape, dtype=np.uint8)
[red is the result][2]
h_h = [0]*h
for j in range(h):
for i in range(w):
if binary[j,i] == 255:
h_h[j] += 1
return h_h
def creat_T_rectangle(h, w, mode='x_up'):
assert mode in ['x_up', 'x_down', 'y_left', 'y_right']
if mode == 'x_up':
up_t = np.ones((h*2, w*3), np.uint8)
up_t[:h, :w] = 0
up_t[:h, 2*w:] = 0
return up_t, (0, h, w, 2*w) # y1, y2, x1, x2
elif mode == 'y_left':
left_t = np.ones((h*3, w*2), np.uint8)
left_t[:h, :w] = 0
left_t[2*h:, :w] = 0
return left_t, (h, 2*h, 0, w)
elif mode == 'x_down':
down_t = np.ones((h*2, w*3), np.uint8)
down_t[h:2*h, :w] = 0
down_t[h:2*h, 2*w:] = 0
return down_t, (h, 2*h, w, 2*w)
elif mode == 'y_right':
right_t = np.ones((h*3, w*2), np.uint8)
right_t[:h, w:2*w] = 0
right_t[2*h:, w:] = 0
return right_t, (h, 2*h, w, 2*w)
else:
raise NotImplementedError
def remove_around_rectangle(markers, bh, bw):
'''
markers:binary image, bh, bw = 5, 5 ...
'''
up_t, up_rect = creat_T_rectangle(bh, bw, mode='x_up')
down_t, down_rect = creat_T_rectangle(bh, bw, mode='x_down')
one_nums = up_t.sum()
i = bh
j = bw
while i < markers.shape[0]-bh:
while j < markers.shape[1]-2*bw:
block = markers[i-bh:i+bh, j-bw:j+2*bw]
inner_up = block * up_t
inner_down = block * down_t
if inner_down.sum()//255 == inner_up.sum()//255 == one_nums:
markers[down_rect[0]+i-bh:down_rect[1]+i-bh, down_rect[2]+j-bw:down_rect[3]+j-bw] = 0
else:
if inner_up.sum()//255 == one_nums:
markers[up_rect[0]+i-bh:up_rect[1]+i-bh, up_rect[2]+j-bw:up_rect[3]+j-bw] = 0
if inner_down.sum()//255 == one_nums:
markers[down_rect[0]+i-bh:down_rect[1]+i-bh, down_rect[2]+j-bw:down_rect[3]+j-bw] = 0
j += bw
i += 1
j = bw
left_t, left_rect = creat_T_rectangle(bh, bw, mode='y_left')
one_nums = left_t.sum()
right_t, right_rect = creat_T_rectangle(bh, bw, mode='y_right')
i = bh
j = bw
while i < markers.shape[0] - 2*bh:
while j < markers.shape[1] - bw:
block = markers[i-bh:i+2*bh, j-bw:j+bw]
inner_left = block * left_t
inner_right = block * right_t
if inner_left.sum()//255 == one_nums == inner_right.sum()//255 :
markers[left_rect[0]+i-bh:left_rect[1]+i-bh, left_rect[2]+j-bw:left_rect[3]+j-bw] = 0
else:
if inner_right.sum()//255 == one_nums:
markers[right_rect[0]+i-bh:right_rect[1]+i-bh, right_rect[2]+j-bw:right_rect[3]+j-bw] = 0
if inner_left.sum()//255 == one_nums :
markers[left_rect[0]+i-bh:left_rect[1]+i-bh, left_rect[2]+j-bw:left_rect[3]+j-bw] = 0
j += bw
i += 1
j = bw
return markers
the above is my code, but it is so slow.

How to get the video file length in Yolo v3

I wanted to find out how the video frame length was calculated in the below code.
[UPD] Before I was thinking it was done by Yolo, but later I realized it was OpenCV that dealt with number of frames in a video file.
"""
Class definition of YOLO_v3 style detection model on image and video
"""
import colorsys
import os
from timeit import default_timer as timer
import numpy as np
from keras import backend as K
from keras.models import load_model
from keras.layers import Input
from PIL import Image, ImageFont, ImageDraw
from yolo3.model import yolo_eval, yolo_body, tiny_yolo_body
from yolo3.utils import letterbox_image
import os
from keras.utils import multi_gpu_model
class YOLO(object):
_defaults = {
"model_path": 'model_data/yolo.h5',
"anchors_path": 'model_data/yolo_anchors.txt',
"classes_path": 'model_data/coco_classes.txt',
"score" : 0.3,
"iou" : 0.45,
"model_image_size" : (416, 416),
"gpu_num" : 1,
}
#classmethod
def get_defaults(cls, n):
if n in cls._defaults:
return cls._defaults[n]
else:
return "Unrecognized attribute name '" + n + "'"
def __init__(self, **kwargs):
self.__dict__.update(self._defaults) # set up default values
self.__dict__.update(kwargs) # and update with user overrides
self.class_names = self._get_class()
self.anchors = self._get_anchors()
self.sess = K.get_session()
self.boxes, self.scores, self.classes = self.generate()
def _get_class(self):
classes_path = os.path.expanduser(self.classes_path)
with open(classes_path) as f:
class_names = f.readlines()
class_names = [c.strip() for c in class_names]
return class_names
def _get_anchors(self):
anchors_path = os.path.expanduser(self.anchors_path)
with open(anchors_path) as f:
anchors = f.readline()
anchors = [float(x) for x in anchors.split(',')]
return np.array(anchors).reshape(-1, 2)
def generate(self):
model_path = os.path.expanduser(self.model_path)
assert model_path.endswith('.h5'), 'weights must be a .h5 file.'
# Load model, or construct model and load weights.
num_anchors = len(self.anchors)
num_classes = len(self.class_names)
is_tiny_version = num_anchors==6 # default setting
try:
self.yolo_model = load_model(model_path, compile=False)
except:
self.yolo_model = tiny_yolo_body(Input(shape=(None,None,3)), num_anchors//2, num_classes) \
if is_tiny_version else yolo_body(Input(shape=(None,None,3)), num_anchors//3, num_classes)
self.yolo_model.load_weights(self.model_path) # make sure model, anchors and classes match
else:
assert self.yolo_model.layers[-1].output_shape[-1] == \
num_anchors/len(self.yolo_model.output) * (num_classes + 5), \
'Mismatch between model and given anchor and class sizes'
print('{} model, anchors, and classes loaded.'.format(model_path))
# Generate colors for drawing bounding boxes.
hsv_tuples = [(x / len(self.class_names), 1., 1.)
for x in range(len(self.class_names))]
self.colors = list(map(lambda x: colorsys.hsv_to_rgb(*x), hsv_tuples))
self.colors = list(
map(lambda x: (int(x[0] * 255), int(x[1] * 255), int(x[2] * 255)),
self.colors))
np.random.seed(10101) # Fixed seed for consistent colors across runs.
np.random.shuffle(self.colors) # Shuffle colors to decorrelate adjacent classes.
np.random.seed(None) # Reset seed to default.
# Generate output tensor targets for filtered bounding boxes.
self.input_image_shape = K.placeholder(shape=(2, ))
if self.gpu_num>=2:
self.yolo_model = multi_gpu_model(self.yolo_model, gpus=self.gpu_num)
boxes, scores, classes = yolo_eval(self.yolo_model.output, self.anchors,
len(self.class_names), self.input_image_shape,
score_threshold=self.score, iou_threshold=self.iou)
return boxes, scores, classes
def detect_image(self, image):
start = timer()
if self.model_image_size != (None, None):
assert self.model_image_size[0]%32 == 0, 'Multiples of 32 required'
assert self.model_image_size[1]%32 == 0, 'Multiples of 32 required'
boxed_image = letterbox_image(image, tuple(reversed(self.model_image_size)))
else:
new_image_size = (image.width - (image.width % 32),
image.height - (image.height % 32))
boxed_image = letterbox_image(image, new_image_size)
image_data = np.array(boxed_image, dtype='float32')
print(image_data.shape)
image_data /= 255.
image_data = np.expand_dims(image_data, 0) # Add batch dimension.
out_boxes, out_scores, out_classes = self.sess.run(
[self.boxes, self.scores, self.classes],
feed_dict={
self.yolo_model.input: image_data,
self.input_image_shape: [image.size[1], image.size[0]],
K.learning_phase(): 0
})
print('Found {} boxes for {}'.format(len(out_boxes), 'img'))
font = ImageFont.truetype(font='font/FiraMono-Medium.otf',
size=np.floor(3e-2 * image.size[1] + 0.5).astype('int32'))
thickness = (image.size[0] + image.size[1]) // 300
for i, c in reversed(list(enumerate(out_classes))):
predicted_class = self.class_names[c]
box = out_boxes[i]
score = out_scores[i]
label = '{} {:.2f}'.format(predicted_class, score)
draw = ImageDraw.Draw(image)
label_size = draw.textsize(label, font)
top, left, bottom, right = box
top = max(0, np.floor(top + 0.5).astype('int32'))
left = max(0, np.floor(left + 0.5).astype('int32'))
bottom = min(image.size[1], np.floor(bottom + 0.5).astype('int32'))
right = min(image.size[0], np.floor(right + 0.5).astype('int32'))
print(label, (left, top), (right, bottom))
if top - label_size[1] >= 0:
text_origin = np.array([left, top - label_size[1]])
else:
text_origin = np.array([left, top + 1])
# My kingdom for a good redistributable image drawing library.
for i in range(thickness):
draw.rectangle(
[left + i, top + i, right - i, bottom - i],
outline=self.colors[c])
draw.rectangle(
[tuple(text_origin), tuple(text_origin + label_size)],
fill=self.colors[c])
draw.text(text_origin, label, fill=(0, 0, 0), font=font)
del draw
end = timer()
print(end - start)
return image
def close_session(self):
self.sess.close()
def detect_video(yolo, video_path, output_path=""):
import cv2
video_path = './input.mp4'
vid = cv2.VideoCapture(video_path)
if not vid.isOpened():
raise IOError("Couldn't open webcam or video")
video_FourCC = int(vid.get(cv2.CAP_PROP_FOURCC))
video_fps = vid.get(cv2.CAP_PROP_FPS)
video_size = (int(vid.get(cv2.CAP_PROP_FRAME_WIDTH)),
int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT)))
isOutput = True if output_path != "" else False
if isOutput:
print("!!! TYPE:", type(output_path), type(video_FourCC), type(video_fps), type(video_size))
out = cv2.VideoWriter(output_path, video_FourCC, video_fps, video_size)
accum_time = 0
curr_fps = 0
fps = "FPS: ??"
prev_time = timer()
while True:
return_value, frame = vid.read()
image = Image.fromarray(frame)
image = yolo.detect_image(image)
result = np.asarray(image)
curr_time = timer()
exec_time = curr_time - prev_time
prev_time = curr_time
accum_time = accum_time + exec_time
curr_fps = curr_fps + 1
if accum_time == 10 : mouseBrush(image)
if accum_time > 1:
accum_time = accum_time - 1
fps = "FPS: " + str(curr_fps)
curr_fps = 0
cv2.putText(result, text=fps, org=(3, 15), fontFace=cv2.FONT_HERSHEY_SIMPLEX,
fontScale=0.50, color=(255, 0, 0), thickness=2)
cv2.namedWindow("result", cv2.WINDOW_NORMAL)
cv2.imshow("result", result)
if isOutput:
out.write(result)
if cv2.waitKey(1) & 0xFF == ord('q'):
break
yolo.close_session()
Actually, this code is just one part of the all Yolo3 model, but I think the part that deals with the number of video frames is included here.
If you mean the current FPS. This is the part showing the current FPS in string.
while True:
return_value, frame = vid.read()
image = Image.fromarray(frame)
image = yolo.detect_image(image)
result = np.asarray(image)
curr_time = timer()
exec_time = curr_time - prev_time
prev_time = curr_time
accum_time = accum_time + exec_time
curr_fps = curr_fps + 1
if accum_time > 1:
accum_time = accum_time - 1
fps = "FPS: " + str(curr_fps)
curr_fps = 0
cv2.putText(result, text=fps, org=(3, 15), fontFace=cv2.FONT_HERSHEY_SIMPLEX,
fontScale=0.50, color=(255, 0, 0), thickness=2)
cv2.namedWindow("result", cv2.WINDOW_NORMAL)
cv2.imshow("result", result)
if curr_fps == 10: # Stops at 10th frame.
time.sleep(60) # Delay for 1 minute (60 seconds).
if isOutput:
out.write(result)
if cv2.waitKey(1) & 0xFF == ord('q'):
break
I needed the frame number to control every 10th frame in the video file, and thanks to above comments, I figured out that the line I was looking for is:
curr_fps = curr_fps + 1
UPD: The following line calculated the number of frames in a video file.
NumberOfFrame = int(vid.get(cv2.CAP_PROP_FRAME_COUNT))

Categories

Resources