Non maximum suppression from google cloud vision response nodejs - python

So I'd like to see if it's possible to implement non max suppression in nodejs from google clouds vision API responses, for example a response looks like this:
[
{
"mid": "/m/09728",
"languageCode": "",
"name": "Bread",
"score": 0.8558391332626343,
"boundingPoly": {
"vertices": [],
"normalizedVertices": [
{
"x": 0.010737711563706398,
"y": 0.26679491996765137
},
{
"x": 0.9930269718170166,
"y": 0.26679491996765137
},
{
"x": 0.9930269718170166,
"y": 0.7275580167770386
},
{
"x": 0.010737711563706398,
"y": 0.7275580167770386
}
]
}
},
{
"mid": "/m/052lwg6",
"languageCode": "",
"name": "Baked goods",
"score": 0.6180902123451233,
"boundingPoly": {
"vertices": [],
"normalizedVertices": [
{
"x": 0.010737711563706398,
"y": 0.26679491996765137
},
{
"x": 0.9930269718170166,
"y": 0.26679491996765137
},
{
"x": 0.9930269718170166,
"y": 0.7275580167770386
},
{
"x": 0.010737711563706398,
"y": 0.7275580167770386
}
]
}
},
{
"mid": "/m/02wbm",
"languageCode": "",
"name": "Food",
"score": 0.5861617922782898,
"boundingPoly": {
"vertices": [],
"normalizedVertices": [
{
"x": 0.321802020072937,
"y": 0.2874892055988312
},
{
"x": 0.999139130115509,
"y": 0.2874892055988312
},
{
"x": 0.999139130115509,
"y": 0.6866284608840942
},
{
"x": 0.321802020072937,
"y": 0.6866284608840942
}
]
}
}
]
So actually the bounding box that should be on the outside is food like so:
I've found examples in Python to do this, but that means I need to use subprocesses in Node to execute the python script and then pull back the response, which kinda feels a bit dirty.
Obviously those box values from google need multiplying by the image height and width, so if we assume it's 288 X 512 for example:
const left = Math.round(vertices[0].x * 288);
const top = Math.round(vertices[0].y * 512);
const width = Math.round((vertices[2].x * 288)) - left;
const height = Math.round((vertices[2].y * 512)) - top;
My adapted script is like so(just hard codes the threshold and takes the array of boxes from the command line):
# import the necessary packages
import numpy as np
import sys
import json
# Malisiewicz et al.
def non_max_suppression_fast():
overlapThresh = 0.3
boxes = json.loads(sys.argv[1])
# if there are no boxes, return an empty list
if len(boxes) == 0:
return []
# if the bounding boxes integers, convert them to floats --
# this is important since we'll be doing a bunch of divisions
if boxes.dtype.kind == "i":
boxes = boxes.astype("float")
# initialize the list of picked indexes
pick = []
# grab the coordinates of the bounding boxes
x1 = boxes[:,0]
y1 = boxes[:,1]
x2 = boxes[:,2]
y2 = boxes[:,3]
# compute the area of the bounding boxes and sort the bounding
# boxes by the bottom-right y-coordinate of the bounding box
area = (x2 - x1 + 1) * (y2 - y1 + 1)
idxs = np.argsort(y2)
# keep looping while some indexes still remain in the indexes
# list
while len(idxs) > 0:
# grab the last index in the indexes list and add the
# index value to the list of picked indexes
last = len(idxs) - 1
i = idxs[last]
pick.append(i)
# find the largest (x, y) coordinates for the start of
# the bounding box and the smallest (x, y) coordinates
# for the end of the bounding box
xx1 = np.maximum(x1[i], x1[idxs[:last]])
yy1 = np.maximum(y1[i], y1[idxs[:last]])
xx2 = np.minimum(x2[i], x2[idxs[:last]])
yy2 = np.minimum(y2[i], y2[idxs[:last]])
# compute the width and height of the bounding box
w = np.maximum(0, xx2 - xx1 + 1)
h = np.maximum(0, yy2 - yy1 + 1)
# compute the ratio of overlap
overlap = (w * h) / area[idxs[:last]]
# delete all indexes from the index list that have
idxs = np.delete(idxs, np.concatenate(([last],
np.where(overlap > overlapThresh)[0])))
# return only the bounding boxes that were picked using the
# integer data type
return boxes[pick].astype("int")
Is anyone able to give me any pointers here please? I'm pretty sure it's just about calculating the total area of the each box, but I can't quite get my brain around it.

Ok so actually this is pretty simple if you use Tensorflow.js - use the following function to take the response from google vision:
Note 288 and 512 are my image width and height you'll need to setup your own.
function nonMaxSuppression(objects){
return new Promise((resolve) => {
// Loop through the objects and convert the vertices into the right format.
for (let index = 0; index < objects.length; index++) {
const verts = objects[index].boundingPoly.normalizedVertices;
// As above note 288 and 512 are image width and image height for me.
const left = Math.round(verts[0].x * 288);
const top = Math.round(verts[0].y * 512);
const width = Math.round((verts[2].x * 288)) - left;
const height = Math.round((verts[2].y * 512)) - top;
// we need an array of boxes AND an array of scores
this.boxes.push([left, top, width, height]);
this.scores.push(objects[index].score);
}
// Params are boxes, scores, max number of boxes to select.
const theBox = tf.image.nonMaxSuppression(this.boxes, this.scores, 2);
// the function returns the box number that matched from this.boxes, seems like it's not zero based at least in my tests so we need to - 1 to get the index from the original array.
resolve(theBox.id -1 );
});
}
Ta-dah banana!

Related

When trying Generative pictures RecursionError: maximum recursion depth exceeded in comparison

i tried to generate 1000 pictures
and have this error :
line 106, in create_new_image return create_new_image()
RecursionError: maximum recursion depth exceeded in comparison
When i generate 10 or 100 it generate perfectly....
I have already did this
import sys
sys.setrecursionlimit(20000)
but still have same problem
from IPython.display import display
import random
import json
import sys
sys.setrecursionlimit(20000)
print(sys.getrecursionlimit())
from requests import head
# Each image is made up a series of traits
# The weightings for each trait drive the rarity and add up to 100%
Body = ["Body"]
Body_weights = [100]
Face = ["Face1"]
Face_weights = [8]
Hand1 = ["Hand1"]
Hand1_weights = [100]
Hand2 = ["Hand2"]
Hand2_weights = [100]
Eyes = ["Eyes1"]
Eyes_weights = [25]
Hair = ["Hair1"]
Hair_weights = [25]
Lips = ["Lips1"]
Lips_weights = [25]
# Dictionary variable for each trait.
# Eech trait corresponds to its file name
# Add more shapes and colours as you wish
Body_files = {
"Body": "body_1",
}
Face_files = {
"Face1": "face1",
}
Hand1_files = {
"Hand1": "hand1",
}
Hand2_files = {
"Hand2": "hand2",
}
Eyes_files = {
"Eyes1": "eyes_1",
}
Hair_files = {
"Hair1": "hair_1",
}
Lips_files = {
"Lips1": "lips_1",
}
TOTAL_IMAGES = 768 # Number of random unique images we want to generate ( 2 x 2 x 2 = 8)
all_images = []
def create_new_image():
new_image = {} #
# For each trait category, select a random trait based on the weightings
new_image ["Body"] = random.choices(Body, Body_weights)[0]
new_image ["Face"] = random.choices(Face, Face_weights)[0]
new_image ["Hand1"] = random.choices(Hand1, Hand1_weights)[0]
new_image ["Hand2"] = random.choices(Hand2, Hand2_weights)[0]
new_image ["Eyes"] = random.choices(Eyes, Eyes_weights)[0]
new_image ["Hair"] = random.choices(Hair, Hair_weights)[0]
new_image ["Lips"] = random.choices(Lips, Lips_weights)[0]
if new_image in all_images:
return create_new_image()
else:
return new_image
# Generate the unique combinations based on trait weightings
for i in range(TOTAL_IMAGES):
new_trait_image = create_new_image()
all_images.append(new_trait_image)
def all_images_unique(all_images):
seen = list()
return not any(i in seen or seen.append(i) for i in all_images)

how to add a fill layer in photoshop programatically

My goal is to define scale bars programmatically, that have a fixed length, but may be changed afterwards by the graphic designer.
I have come sofar as to define a closed path within the document:
def addLine(doc):
def point(x, y):
result = Dispatch("Photoshop.PathPointInfo")
result.Kind = 2 # for PsPointKind --> 2 (psCornerPoint)
result.LeftDirection = result.rightDirection = result.Anchor = (x, y)
return result
points = [
point(100, 100),
point(200, 100),
point(200, 110),
point(100, 110)
]
lineSubPathArray = Dispatch("Photoshop.SubPathInfo")
lineSubPathArray.Operation = 1 #for PsShapeOperation --> 1 (psShapeAdd
lineSubPathArray.Closed = True
lineSubPathArray.EntireSubPath = points
myPathItem = doc.PathItems.Add("bar-path", [lineSubPathArray])
From here, I can load the saved document back into photoshop (CS6) and then create a shape layer manually: Layer | New fill layer | solid color ...
This results in a shape layer, similar to what I get by using the line tool, in which the line effectively is a rectangle whose height may be changed later.
First question: how to create the fill layer by using the API?
Second: I defined a rectangle 100pixels wide, but I get one 418 pixels wide. The doc has its doc.application.preferences.rulerUnits set to psPixels (1). Why is this?
Last: Isn't it possible to define a line as true line defined by two end points and set its stroke width instead of it's height?
This may be of use:
You can define a new colour with new SolidColor()
// define fillColor
var fillColor = new SolidColor();
var myColour = [57, 181,74];
fillColor.rgb.red = myColour[0];
fillColor.rgb.green = myColour[1];
fillColor.rgb.blue = myColour[2];
and then fill your path by adding myPathItem.fillPath(fillColor,ColorBlendMode.NORMAL,100,false,0,true,true);
// Switch off any dialog boxes
displayDialogs = DialogModes.NO; // OFF
var originalUnits = app.preferences.rulerUnits;
app.preferences.rulerUnits = Units.PIXELS;
// call the source document
var srcDoc = app.activeDocument;
create_path("bar-path");
function create_path(linename)
{
var points = [
[100, 100],
[200, 100],
[200, 110],
[100, 110]
];
// create the array of PathPointInfo objects
var lineArray = new Array();
for (var i = 0; i < points.length; i++)
{
lineArray[i] = new PathPointInfo;
lineArray[i].kind = PointKind.CORNERPOINT;
lineArray[i].anchor = points[i];
lineArray[i].leftDirection = lineArray[i].anchor;
lineArray[i].rightDirection = lineArray[i].anchor;
}
// create a SubPathInfo object, which holds the line array in its entireSubPath property.
var lineSubPathArray = new Array();
lineSubPathArray.push(new SubPathInfo());
lineSubPathArray[0].operation = ShapeOperation.SHAPEXOR;
lineSubPathArray[0].closed = true;
lineSubPathArray[0].entireSubPath = lineArray;
//create the path item, passing subpath to add method
var myPathItem = srcDoc.pathItems.add(linename, lineSubPathArray);
// define fillColor
var fillColor = new SolidColor();
var myColour = [57, 181,74];
fillColor.rgb.red = myColour[0];
fillColor.rgb.green = myColour[1];
fillColor.rgb.blue = myColour[2];
//fill the path so we can see something also
myPathItem.fillPath(fillColor,ColorBlendMode.NORMAL,100,false,0,true,true);
// deselect path
deselect_path();
}
// switch back to normal
app.preferences.rulerUnits = originalUnits;
// Set Display Dialogs back to normal
displayDialogs = DialogModes.ALL; // NORMAL
function deselect_path()
{
// =======================================================
var idslct = charIDToTypeID( "slct" );
var desc76 = new ActionDescriptor();
var idnull = charIDToTypeID( "null" );
var ref63 = new ActionReference();
var idPath = charIDToTypeID( "Path" );
var idOrdn = charIDToTypeID( "Ordn" );
var idTrgt = charIDToTypeID( "Trgt" );
ref63.putEnumerated( idPath, idOrdn, idTrgt );
desc76.putReference( idnull, ref63 );
var idselectionModifier = stringIDToTypeID( "selectionModifier" );
var idselectionModifierType = stringIDToTypeID( "selectionModifierType" );
var idremoveFromSelection = stringIDToTypeID( "removeFromSelection" );
desc76.putEnumerated( idselectionModifier, idselectionModifierType, idremoveFromSelection );
executeAction( idslct, desc76, DialogModes.NO );
}
As for rectangle being too large: What resolution and units do you have the psd file set to? The script below will switch to pixels, and set the resolution of your document to 72dpi
As for doing a stroke to replace the line. Well... you've got options.
You can do a brush stroke with a two point line:
var idStrk = charIDToTypeID( "Strk" );
var desc105 = new ActionDescriptor();
var idnull = charIDToTypeID( "null" );
var ref35 = new ActionReference();
var idPath = charIDToTypeID( "Path" );
var idOrdn = charIDToTypeID( "Ordn" );
var idTrgt = charIDToTypeID( "Trgt" );
ref35.putEnumerated( idPath, idOrdn, idTrgt );
desc105.putReference( idnull, ref35 );
var idUsng = charIDToTypeID( "Usng" );
var idPbTl = charIDToTypeID( "PbTl" );
desc105.putClass( idUsng, idPbTl );
executeAction( idStrk, desc105, DialogModes.NO );
However, I don't think that's what your after. You can turn your path into a shape and then have a stroke and fill colour assigned. However... you need a minimum of 3 points for that to work.
As a suggestion, instead of a path create a shape from start - which oddly will work with a minimum of two points. But I have no idea how to do that in code!
There are two lessons I have learned form translating the solution by Ghoul Fool into Python:
using COM in python, one can make a lot of mistakes that go unnoticed, otherwise than cryptic error messages far beside the point. Thies include syntactic errors such as missing/redundant parentheses and wrong capitalisation.
solutions that work with COM in python do not work the same with the Python-Photoshop-API and vice versa, even though, following the source code of the library, apparently exactly the same thing is happening. I have not always been able to divine solutions that work both ways.
Here is, for those who have come to this question later, what I have come to to get a shape layer with a bar that can be changed in height afterwards. The action that is called is nothing more than a recording of New Fill Layer from the layer-menu (CS6):
def makeBar(doc):
app = doc.parent
app.preferences.rulerUnits = Units.Pixels
def point(x, y):
result = Dispatch("Photoshop.PathPointInfo")
result.Kind = PointKind.CornerPoint
result.LeftDirection = result.rightDirection = result.Anchor = (x, y)
return result
points = [
point(100, 100),
point(200, 100),
point(200, 110),
point(100, 110)
]
lineSubPathArray = Dispatch("Photoshop.SubPathInfo")
lineSubPathArray.Operation = ShapeOperation.ShapeAdd
lineSubPathArray.Closed = True
lineSubPathArray.EntireSubPath = points
doc.PathItems.Add("bar-path", [lineSubPathArray])
app = Dispatch("Photoshop.Application")
doc = app.Open(self.fileName)
As for my second question: The unit of the path's cornerpoints are invariantly in Points, no matter what he rulers are set to.

Tkinter update canvas colors for a number of objects

could anyone help me with this: I have a list of dictionaries, where I stock the information for future Tkinter rectangles. Once the rectangles are created, i'm trying to make them change color depending on changing "x" value, but it only works for the last created object and not for the rest of them here is a code:
from tkinter import *
a = {
"y":2,
"len" : 4
}
b = {
"y":5,
"len" : 7
}
c = {
"y":6,
"len" : 8
}
d = {
"y":2,
"len" : 4
}
e = {
"y":4,
"len" : 12
}
f = {
"y":3,
"len" : 10
}
g = {
"y":7,
"len" : 10
}
groupe = [a,d, b, c, g, e, f]
xt1 = 800
xt2= 800
yt1=720
yt2=0
master = Tk()
root = Canvas(master, width=800, height=720)
root.pack()
#Moving RedLine
def deplacer():
global xt1, yt1, xt2, yt2
xt1-=5
xt2-=5
root.coords(laser_print, xt1, yt1, xt2,yt2)
root.after(50,deplacer)
return
#drawing blocks
graf_dist = 0
dist_betw=0
untouched = "blue"
being_touched = "red"
touched= "gray"
block_color=untouched
for ensemble in groupe:
graf_dist+=60
dist_betw+=10
xo=graf_dist+dist_betw
yo=int(ensemble['y']*50)
xl=xo+60
yl=int(ensemble['len']*50)
if xo<=xt1 and xl>xt1:
block_color=being_touched
elif xo<xt1 and xl<=xt1:
block_color=untouched
drawing_block = root.create_rectangle(xo, yo, xl, yl, fill=block_color, tag="blocks")
#draw horizontal redline
laser_print=root.create_line(xt1, yt1, xt2, yt2, fill="red")
#updating the color of the rectangles
def update_color():
x_block=xt1+1
untouched = "blue"
being_touched = "red"
touched= "gray"
block_color=untouched
if xo<=x_block and xl>x_block:
block_color=being_touched
elif xo<x_block and xl<=x_block:
block_color=untouched
root.itemconfig(drawing_block, fill=block_color)
root.after(50,update_color)
update_color()
deplacer()
mainloop()
Ok, first of all, you draw seven blocks, but they are all assigned to drawing_block. When you refer to drawing_block in update_color(), this is of course the last block.
First thing you need to do is having separate objects for each block, for example, by storing them in a list:
drawing_blocks = []
for ensemble in groups:
...
drawing_blocks.append(root.create_rectangle(xo, yo, xl, yl, fill=block_color, tag="blocks"))
Then you need to change the update_color() function to test whether the laser passes each of the separate blocks and then change the color of that specific block.

Converting the annotations to COCO format from Mask-RCNN dataset format

I want to train a model that detects vehicles and roads in an image. I will use Mask R-CNN and YOLACT++ for that purpose. I labelled some of my images for Mask R-CNN with vgg image annotator and the segmentation points look like in the image below.
As you can see, there is not an area parameter or bbox parameter. I can find the bbox of my instances with minx miny maxx maxy but I couldn't find how to find the area of that segmented area. You can see the Yolact annotation formation in the image below.
It takes tons of time to label all instances. I spent a minimum 10 min while labelling all cars in an image and I already have 500 images that are labelled. Do you have any advice for me or idea that can help me to save my time while converting first annotation formation to the second one (mask r-cnn to coco(yolact))?
Something like this but it depends on how you annotate in vgg
def vgg_to_coco(vgg_path: str, outfile: str=None, class_keyword: str = "Class"):
with open(vgg_path) as f:
vgg = json.load(f)
images_ids_dict = {v["filename"]: i for i, v in enumerate(vgg.values())}
# TDOD fix
images_info = [{"file_name": k, "id": v, "width": 1024, "height": 1024} for k, v in images_ids_dict.items()]
classes = {class_keyword} | {r["region_attributes"][class_keyword] for v in vgg.values() for r in v["regions"]
if class_keyword in r["region_attributes"]}
category_ids_dict = {c: i for i, c in enumerate(classes, 1)}
categories = [{"supercategory": class_keyword, "id": v, "name": k} for k, v in category_ids_dict.items()]
annotations = []
suffix_zeros = math.ceil(math.log10(len(vgg)))
for i, v in enumerate(vgg.values()):
for j, r in enumerate(v["regions"]):
if class_keyword in r["region_attributes"]:
x, y = r["shape_attributes"]["all_points_x"], r["shape_attributes"]["all_points_y"]
annotations.append({
"segmentation": [list(chain.from_iterable(zip(x, y)))],
"area": helper.polygon_area(x, y),
"bbox": helper.bbox(x, y, out_format="width_height"),
"image_id": images_ids_dict[v["filename"]],
"category_id": category_ids_dict[r["region_attributes"][class_keyword]],
"id": int(f"{i:0>{suffix_zeros}}{j:0>{suffix_zeros}}"),
"iscrowd": 0
})
coco = {
"images": images_info,
"categories": categories,
"annotations": annotations
}
if outfile is None:
outfile = vgg_path.replace(".json", "_coco.json")
with open(outfile, "w") as f:
json.dump(coco, f)
you will have to change the 1024s to your image sizes or if you have a variable image size you will have to create a map for that.
You must create your own script and transform it, I had to do it from xml annotations to json maskrcnn.
You can check the example:
https://github.com/adions025/XMLtoJson_Mask_RCNN
Working solution: Extended from #Zac Tod's answer
The image size can be computed on the go.
import skimage
import math
from itertools import chain
import numpy as np
def vgg_to_coco(dataset_dir, vgg_path: str, outfile: str=None, class_keyword: str = "label"):
with open(vgg_path) as f:
vgg = json.load(f)
images_ids_dict = {}
images_info = []
for i,v in enumerate(vgg.values()):
images_ids_dict[v["filename"]] = i
image_path = os.path.join(dataset_dir, v['filename'])
image = skimage.io.imread(image_path)
height, width = image.shape[:2]
images_info.append({"file_name": v["filename"], "id": i, "width": width, "height": height})
classes = {class_keyword} | {r["region_attributes"][class_keyword] for v in vgg.values() for r in v["regions"].values()
if class_keyword in r["region_attributes"]}
category_ids_dict = {c: i for i, c in enumerate(classes, 1)}
categories = [{"supercategory": class_keyword, "id": v, "name": k} for k, v in category_ids_dict.items()]
annotations = []
suffix_zeros = math.ceil(math.log10(len(vgg)))
for i, v in enumerate(vgg.values()):
for j, r in enumerate(v["regions"].values()):
if class_keyword in r["region_attributes"]:
x, y = r["shape_attributes"]["all_points_x"], r["shape_attributes"]["all_points_y"]
annotations.append({
"segmentation": [list(chain.from_iterable(zip(x, y)))],
"area": PolyArea(x, y),
"bbox": [min(x), min(y), max(x)-min(x), max(y)-min(y)],
"image_id": images_ids_dict[v["filename"]],
"category_id": category_ids_dict[r["region_attributes"][class_keyword]],
"id": int(f"{i:0>{suffix_zeros}}{j:0>{suffix_zeros}}"),
"iscrowd": 0
})
coco = {
"images": images_info,
"categories": categories,
"annotations": annotations
}
if outfile is None:
outfile = vgg_path.replace(".json", "_coco.json")
with open(outfile, "w") as f:
json.dump(coco, f)
My data was labeled using makesense.ai
and region_attributes is like this so class_keyword="label" in function call.
"region_attributes": {
"label": "box"
}
To compute polygon area, Code is copied from this answer
def PolyArea(x,y):
return 0.5*np.abs(np.dot(x,np.roll(y,1))-np.dot(y,np.roll(x,1)))

How to print data when a Selectable GraphicsItem is clicked?

I have a JSON file which contains coordinate list and some other information.
My JSON structure look like this;
"annotations": [
{
"type": "Box",
"color": "red",
"box_top": 406.0,
"box_left": 656.0,
"box_height": 73.0,
"box_width": 40.0
}
],
"annotations": [
{
"type": "Box",
"color": "green",
"box_top": 450.0,
"box_left": 700.0,
"box_height": 95.0,
"box_width": 47.0
}
]
By taking the box values (box_top,box_left,box_height,box_width) I have drawn Rectangle using QGraphicsView and QGraphicsScene. The code is given below;
def load_image(self,image_item):
self.scene = QGraphicsScene(self.centralWidget) # Created a QGraphicsScene
self.pic = QPixmap(str(image_item.text())) # Loaded Image
self.brush = QBrush()
self.pen = QPen(Qt.red)
self.pen.setWidth(2)
self.pixItem = QGraphicsPixmapItem(self.pic)
self.load_view = self.scene.addItem(self.pixItem) # Image Added to Scene
# Opening JSON and fetching data
# …
for rect in json_file['annotations']:
# Taken type and color and stored in variable
self.box_type = rect['type']
self.box_color = rect['color']
# Taken box_top,box_left,box_height,box_width values
self.rect_item = self.scene.addRect(rect['box_top'],rect['box_left'],rect['box_width'],rect['box_length'],self.pen,self.brush) # x,y,w,h
self.rect_item.setFlag(QGraphicsItem.ItemIsSelectable) #Item is Selectable
#self.rect_item.setFlag(QGraphicsItem.ItemIsMovable) # Item is Movable
self.fit_view = self.gView.setScene(self.scene)
self.gView.fitInView(self.pixItem,Qt.KeepAspectRatio)
self.gView.setRenderHint(QPainter.Antialiasing)
self.gView.show()
Now What I want is when I click one box (say which has color red) from the GraphicsScene, I want to print its corresponding type and color. In a simple way, I want to print all data related to that box. A sample images also attached for reference. Note : Image is the output of this program.
Thank you.
Use the code provided in the other answer I gave you (the second and last method are probably better) and get the data you need from the item. The "type" is the object class, the color is the pen() color.

Categories

Resources