Based on a disparity matrix from a passive stereo-camera system i need to calculate a v-disparity representation for obstacle detection with OpenCV.
A working implementation is not the problem. The problem is to do it fast...
(One) Reference for v-Disparity: Labayrade, R. and Aubert, D. and Tarel, J.P.
Real time obstacle detection in stereovision on non flat road geometry through v-disparity representation
The basic in short, to get the v-disparity (figure 1), is to analyze the rows of the disparity-matrix (figure 2) an represent the result as a histogram for each row over the disparity values. u-disparity (figure 3) is the same on the columns of the disparity-matrix. (All figures are false-colored.)
I have implement the "same" in Python and C++. The speed in Python is acceptable but in C++ i get for the u- and v-disparity a time round about a half second (0.5 s).
(1. edit: due to the separate time measurement, only the calculation of the u-histogram takes a big amount of time...)
This leads me to following questions:
Is it possible to avoid the loops for the line-wise calculation of the histogram? Is there a "trick" to do it with one call of calcHist-Function from OpenCV? Perhaps with the dimensions?
Is it in C++ just bad-coded and the runtime-issue are not related to the loops used for calculation?
Thanks, all
Working implementation in Python:
#!/usr/bin/env python2
#-*- coding: utf-8 -*-
#
# THIS SOURCE-CODE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED. IN NO EVENT WILL THE AUTHOR BE HELD LIABLE FOR ANY DAMAGES ARISING FROM
# THE USE OF THIS SOURCE-CODE. USE AT YOUR OWN RISK.
import cv2
import numpy as np
import time
def draw_object(image, x, y, width=50, height=100):
color = image[y, x]
image[y-height:y, x-width//2:x+width//2] = color
IMAGE_HEIGHT = 600
IMAGE_WIDTH = 800
while True:
max_disp = 200
# create fake disparity
image = np.zeros((IMAGE_HEIGHT, IMAGE_WIDTH), np.uint8)
for c in range(IMAGE_HEIGHT)[::-1]:
image[c, ...] = int(float(c) / IMAGE_HEIGHT * max_disp)
draw_object(image, 275, 175)
draw_object(image, 300, 200)
draw_object(image, 100, 350)
# calculate v-disparity
vhist_vis = np.zeros((IMAGE_HEIGHT, max_disp), np.float)
for i in range(IMAGE_HEIGHT):
vhist_vis[i, ...] = cv2.calcHist(images=[image[i, ...]], channels=[0], mask=None, histSize=[max_disp],
ranges=[0, max_disp]).flatten() / float(IMAGE_HEIGHT)
vhist_vis = np.array(vhist_vis * 255, np.uint8)
vblack_mask = vhist_vis < 5
vhist_vis = cv2.applyColorMap(vhist_vis, cv2.COLORMAP_JET)
vhist_vis[vblack_mask] = 0
# calculate u-disparity
uhist_vis = np.zeros((max_disp, IMAGE_WIDTH), np.float)
for i in range(IMAGE_WIDTH):
uhist_vis[..., i] = cv2.calcHist(images=[image[..., i]], channels=[0], mask=None, histSize=[max_disp],
ranges=[0, max_disp]).flatten() / float(IMAGE_WIDTH)
uhist_vis = np.array(uhist_vis * 255, np.uint8)
ublack_mask = uhist_vis < 5
uhist_vis = cv2.applyColorMap(uhist_vis, cv2.COLORMAP_JET)
uhist_vis[ublack_mask] = 0
image = cv2.applyColorMap(image, cv2.COLORMAP_JET)
cv2.imshow('image', image)
cv2.imshow('vhist_vis', vhist_vis)
cv2.imshow('uhist_vis', uhist_vis)
cv2.imwrite('disparity_image.png', image)
cv2.imwrite('v-disparity.png', vhist_vis)
cv2.imwrite('u-disparity.png', uhist_vis)
if chr(cv2.waitKey(0)&255) == 'q':
break
Working implementation in C++:
#include <iostream>
#include <stdlib.h>
#include <ctime>
#include <opencv2/opencv.hpp>
using namespace std;
void draw_object(cv::Mat image, unsigned int x, unsigned int y, unsigned int width=50, unsigned int height=100)
{
image(cv::Range(y-height, y), cv::Range(x-width/2, x+width/2)) = image.at<unsigned char>(y, x);
}
int main()
{
unsigned int IMAGE_HEIGHT = 600;
unsigned int IMAGE_WIDTH = 800;
unsigned int MAX_DISP = 250;
unsigned int CYCLE = 0;
//setenv("QT_GRAPHICSSYSTEM", "native", 1);
// === PREPERATIONS ==
cv::Mat image = cv::Mat::zeros(IMAGE_HEIGHT, IMAGE_WIDTH, CV_8U);
cv::Mat uhist = cv::Mat::zeros(IMAGE_HEIGHT, MAX_DISP, CV_32F);
cv::Mat vhist = cv::Mat::zeros(MAX_DISP, IMAGE_WIDTH, CV_32F);
cv::Mat tmpImageMat, tmpHistMat;
float value_ranges[] = {(float)0, (float)MAX_DISP};
const float* hist_ranges[] = {value_ranges};
int channels[] = {0};
int histSize[] = {MAX_DISP};
struct timespec start, finish;
double elapsed;
while(1)
{
CYCLE++;
// === CLEANUP ==
image = cv::Mat::zeros(IMAGE_HEIGHT, IMAGE_WIDTH, CV_8U);
uhist = cv::Mat::zeros(IMAGE_HEIGHT, MAX_DISP, CV_32F);
vhist = cv::Mat::zeros(MAX_DISP, IMAGE_WIDTH, CV_32F);
// === CREATE FAKE DISPARITY WITH OBJECTS ===
for(int i = 0; i < IMAGE_HEIGHT; i++)
image.row(i) = ((float)i / IMAGE_HEIGHT * MAX_DISP);
draw_object(image, 200, 500);
draw_object(image, 525 + CYCLE%100, 275);
draw_object(image, 500, 300 + CYCLE%100);
clock_gettime(CLOCK_MONOTONIC, &start);
// === CALCULATE V-HIST ===
for(int i = 0; i < IMAGE_HEIGHT; i++)
{
tmpImageMat = image.row(i);
vhist.row(i).copyTo(tmpHistMat);
cv::calcHist(&tmpImageMat, 1, channels, cv::Mat(), tmpHistMat, 1, histSize, hist_ranges, true, false);
vhist.row(i) = tmpHistMat.t() / (float) IMAGE_HEIGHT;
}
clock_gettime(CLOCK_MONOTONIC, &finish);
elapsed = (finish.tv_sec - start.tv_sec);
elapsed += (finish.tv_nsec - start.tv_nsec) * 1e-9;
cout << "V-HIST-TIME: " << elapsed << endl;
clock_gettime(CLOCK_MONOTONIC, &start);
// === CALCULATE U-HIST ===
for(int i = 0; i < IMAGE_WIDTH; i++)
{
tmpImageMat = image.col(i);
uhist.col(i).copyTo(tmpHistMat);
cv::calcHist(&tmpImageMat, 1, channels, cv::Mat(), tmpHistMat, 1, histSize, hist_ranges, true, false);
uhist.col(i) = tmpHistMat / (float) IMAGE_WIDTH;
}
clock_gettime(CLOCK_MONOTONIC, &finish);
elapsed = (finish.tv_sec - start.tv_sec);
elapsed += (finish.tv_nsec - start.tv_nsec) * 1e-9;
cout << "U-HIST-TIME: " << elapsed << endl;
// === PREPARE AND SHOW RESULTS ===
uhist.convertTo(uhist, CV_8U, 255);
cv::applyColorMap(uhist, uhist, cv::COLORMAP_JET);
vhist.convertTo(vhist, CV_8U, 255);
cv::applyColorMap(vhist, vhist, cv::COLORMAP_JET);
cv::imshow("image", image);
cv::imshow("uhist", uhist);
cv::imshow("vhist", vhist);
if ((cv::waitKey(1)&255) == 'q')
break;
}
return 0;
}
Figure 1: v-disparity
Figure 2: Fake disparity matrix
Figure 3: u-disparity
edit:
correct name for u- and v-disparity and separate time measurement in c++ example
small typo
Today i had the possibility to reinvestigate the problem. Remembering the OpenCV basics (1) for the Mat-structure and the fact that only one calculation takes a huge amount of time, i had the solution.
In OpenCV, each row of an image could be reached by a row-pointer. For iterating columns (done in u-disparity calculation) i suspect, that OpenCV needs to resolve every row-pointer + column-offset for building the histogram.
Changing the Code in a way, that OpenCV is able to use row-pointer, solves the problem for me.
| old code [s] | changed [s]
------------+--------------+-------------
V-HIST-TIME | 0.00351909 | 0.00334152
U-HIST-TIME | 0.600039 | 0.00449285
So for the u-hist-loop i transpose the image and reverse the operation after the loop. The line wise access for calculation could now be done via the row-pointer.
Changed Codelines:
// === CALCULATE U-HIST ===
image = image.t();
for(int i = 0; i < IMAGE_WIDTH; i++)
{
tmpImageMat = image.row(i);
uhist.col(i).copyTo(tmpHistMat);
cv::calcHist(&tmpImageMat, 1, channels, cv::Mat(), tmpHistMat, 1, histSize, hist_ranges, true, false);
uhist.col(i) = tmpHistMat / (float) IMAGE_WIDTH;
}
image = image.t();
Finally my second question takes effect, the runtime-issue belongs not to the loop. A time less than 5 ms is (for now) fast enough.
Very nice code and very illustrative. It helped me understand u-disparity. However, your C/C++ code is broken. I fixed him with this code:
cv::Mat uhist = cv::Mat::zeros(MAX_DISP, IMAGE_WIDTH, CV_32F);
cv::Mat vhist = cv::Mat::zeros(IMAGE_WIDTH, MAX_DISP, CV_32F);
Related
I'm working on some optimazation and want to convert some parts from python to c++
Is it possible to convert this code to c++ with opencv?
The python code uses numpy
import numpy as np
from PIL import Image
pil_img = Image.open(input_filename)
img = np.array(pil_img)
pixels = img.reshape((-1, 3))
num_pixels = pixels.shape[0]
num_samples = int(num_pixels*5)
idx = np.arange(num_pixels)
np.random.shuffle(idx)
samples = pixels[idx[:num_samples]]
update
std::vector<uchar> sample_pixels(const cv::Mat& m, int sample_percent=5){
assert(m.isContinuous());
const auto* input = m.ptr<uchar>();
int
num_pixels = m.total(),
num_samples = num_pixels * sample_percent;
std::cout
<< "num pixels: " << num_pixels << '\n'
<< "num samples: " << num_samples << '\n';
std::vector<uchar> samples(num_samples);
// Fills idx with sequentially increasing values
std::vector<int> idx(num_pixels);
std::iota(idx.begin(), idx.end(), 0);
// Shuffle idx
std::mt19937 engine(0);
std::shuffle(idx.begin(), idx.end(), engine);
for(int i = 0; i < num_samples; i++){
//samples[i] = input[idx[i]];
}
//auto output_mat = cv::Mat(samples, false);
//cv::imwrite("enhance-samples.png", output_mat);
return samples;
}
This is the equivalent code in C++11. This should be several times faster than your python code.
#include <random>
#include <numeric>
#include <opencv2/opencv.hpp>
void shuffling(const std::string &input_filename, const std::string &output_filename) {
// ========== UPDATE ==========
const cv::Mat plain_input_mat = cv::imread(input_filename, -1);
// Equivalent to img.reshape((-1, 3))
const cv::Mat input_mat = plain_input_mat.reshape(3);
// ============================
// By doing this, you can access the pixels without any extra checks.
assert(input_mat.isContinuous());
const auto *input = input_mat.ptr<cv::Vec3b>();
const auto num_samples = input_mat.total();
std::vector<cv::Vec3b> output(num_samples);
std::vector<int> idx(input_mat.total());
std::iota(idx.begin(), idx.end(), 0); // Equivalent to arange.
// Note: numpy uses PCG64 which does not exist in the std library.
std::mt19937 engine(0);
std::shuffle(idx.begin(), idx.end(), engine);
for (int i = 0; i < num_samples; i++) {
output[i] = input[idx[i]];
}
// Save as an image if necessary.
auto output_mat = cv::Mat(output, false);
cv::imwrite(output_filename, output_mat);
}
There are a couple of additional notes.
Note1: Due to the difference in the shuffle algorithm between python and std, the results are not exactly the same.
Note2: With your code, num_samples cannot be larger than the number of pixels in the input image, which seems to be a bug. Please check the length of the samples.
Note3: In both implementations, the most expensive part is shuffle. 60% for python and more than 80% for C++ is spent here. If you want to optimize further, this is definitely where you should exploit.
I implemented a simple distance based LOD for future terrain rendring using tesselation control/evaluation shader in openGL. Surptisingly, I still get cracks (T junctions) in my output...
The shader code is
uniform mat4 pvmM;
uniform vec4 u_camerapos;
#ifdef TESSELATION_CONTROL_SHADER
layout(vertices = 4) out;
unsigned int dist(vec3 p1,vec3 p2,vec3 p3)
{
float d=distance((p1+p2)/2.0,p3); // precise float d=(...)
if (d<1.5) return 64;
if (d<2.5) return 32;
if (d<3) return 16;
if (d<4) return 8;
if (d<5) return 4;
if (d<6) return 2;
return 1;
}
void main() {
if (gl_InvocationID == 0){
gl_TessLevelOuter[0] = dist( gl_in[3].gl_Position.xyz,gl_in[0].gl_Position.xyz,u_camerapos.xyz);
gl_TessLevelOuter[1] = dist( gl_in[0].gl_Position.xyz,gl_in[1].gl_Position.xyz,u_camerapos.xyz);
gl_TessLevelOuter[2] = dist( gl_in[1].gl_Position.xyz,gl_in[2].gl_Position.xyz,u_camerapos.xyz);
gl_TessLevelOuter[3] = dist( gl_in[2].gl_Position.xyz,gl_in[3].gl_Position.xyz,u_camerapos.xyz);
float mean=floor((gl_TessLevelOuter[0]+gl_TessLevelOuter[1]+gl_TessLevelOuter[2]+gl_TessLevelOuter[3])/4.0);
gl_TessLevelInner[0] = int(mean);
gl_TessLevelInner[1] = int(mean);
}
gl_out[gl_InvocationID].gl_Position = gl_in[gl_InvocationID].gl_Position;
}
#endif
#ifdef TESSELATION_EVALUATION_SHADER
void main() {
vec4 p1 = mix(gl_in[0].gl_Position, gl_in[1].gl_Position, gl_TessCoord.x);
vec4 p2 = mix(gl_in[2].gl_Position, gl_in[3].gl_Position, gl_TessCoord.x);
gl_Position = pvmM*mix(p1, p2, gl_TessCoord.y);
}
#endif
Using python:
vertices and indices are generated using
blocksz=8
vertices=np.array([(x,0,z) for z in range(blocksz) for x in range(blocksz)],dtype='f4')
vertices=vertices-np.array([blocksz/2,0,blocksz/2])
faces= np.array([(x+z*blocksz,x+1+z*blocksz,x+(z+1)*blocksz,x+1+(z+1)*blocksz ) for z in range(blocksz-1) for x in range(blocksz-1) ],dtype='i4')
and drawn using
glDrawElements(GL_PATCHES,count,GL_UNSIGNED_SHORT,None)
On the image below,
you can see the results. some T-Junctions are present on the x axis (left-right, for example first and last columns) but never on the z axis (top bootom). virtual camera is represented by red dot and its position is passed as uniform.
All the rest is working as expected.
Any idea of what I'm mis-understanding?
OK, found the bug.
When generating the index array (faces), I used a wrong winding (same winding as for GL_QUAD_STRIP). The correct faces array should be:
faces= np.array([(x+z*blocksz,x+1+z*blocksz,x+1+(z+1)*blocksz,x+(z+1)*blocksz ) for z in range(blocksz-1) for x in range(blocksz-1) ],dtype='i4')
Then, in tess evaluation shader (notice fliiping points 2 and 3
vec4 p1 = mix(gl_in[0].gl_Position, gl_in[1].gl_Position, gl_TessCoord.x);
vec4 p2 = mix(gl_in[3].gl_Position, gl_in[2].gl_Position, gl_TessCoord.x);
I have a model in caffe that produce a multi-dimensional array. Size of this array is [1x10x8x8] so in python I haven't problem with this size because python automatically manage this array and I know order of elements in that. but when I switch to opencv with c++ the whole array is a vector and I haven't any idea how to regenerate something like python array, I use cv::NAryMatIterator to access multi-dimensional array such below
const cv::Mat* arrays[]={&prob,0}; //my multi-dimensional array is prob
cv::Mat my_planes[1];
cv::NAryMatIterator it(arrays,my_planes);
cv::Mat Multi_Array ; //temporary Mat
for (int p = 0; p < it.nplanes; ++p,++it) {
Multi_Array = it.planes[0];
}
after doing that I see Multi_Array size is [640x1] which seems that it is equal to 8x8x10 that python produced. is there anyway to access 8x8 planes one by one?
EDIT: my multi-dimensional array size is [1x10x8x8]
To access the 3D array as if it were a 2D array with shape [640][1], you could write 3 loops to iterate on the elements using a [x,y,z] format like:
int data[640][1] = { 0 };
int width = 8, height = 8, depth = 10;
for (int x = 0; x < width; x++)
for (int y = 0; y < height; y++)
for (int z = 0; z < depth; z++)
{
int idx = x * height * depth + y * depth + z;
data[idx][0] = idx;
}
This fills the array with numbers ranging from 0 to 639.
If you are looking to access a 2D array as a 1D, check this answer.
If your model data is ordered in row-major form, you can have OpenCV interpret the data as a Mat of the required size. Then, planes of the Mat can be accessed using multidim_mat.row( row_number ).
In order to create a Mat from the data:
int data[640] = { 0 };
const int size[] = { 8, 8, 10 };
cv::Mat multidim_mat(3, size, CV_32S, data);
std::cout << multidim_mat.dims << std::endl;
for (int i = 0; i < multidim_mat.dims; i++) {
std::cout << "Dimension " << i << " is of size " << multidim_mat.size[i] << std::endl;
}
The CV_32S is to inform OpenCV to interpret the data as signed 32-bit integers.
References: https://docs.opencv.org/3.4.0/d3/d63/classcv_1_1Mat.html#a5fafc033e089143062fd31015b5d0f40, https://docs.opencv.org/3.4.0/d3/d63/classcv_1_1Mat.html#details,
In first step we need to get a pointer to OpenCV Mat object, you can do this by the below command.(I assume that data that represent your data is primarily float and consider probability Mat is prob which we get this Mat from caffe)
float* p = (float*)(prob.data);
This pointer will points to the where data is reside in memory. So for example if we want to get access to the element in (1,3,7,7) location we can do this operation like this:
int S= sizeof(float);
float val = p[(
7*p.step[3]/S + //forth dimension
7*p.step[2]/S + //third dimension
3*p.step[1]/S //second dimension
)]
//first dimension is not needed, because it is decoded in address of p
//and if you have any higher number than 1 in first dimension you need to add it to the above command
So for traversing in the probability matrix you can do that like the below:
auto S=sizeof(float);
for (int d2 = 0; d2 < 129; ++d2) {
for (int d3 = 0; d3 < 129; ++d3) {
for (int d4 = 0; d4 < 10; ++d4) {
float val = p[(d2*prob.step[3]/S + d3*prob.step[2]/S + d4* prob.step[1]/S)];
}
}
}
Ive retrained an InceptionV3 model via the Tensorflow for Poets tutorials and can successfully run label_image.py on my trained data and on new data and get correct labels with good accuracy. Awesome!
If I run my model through my Mac Obj-C++ app my resulting labels are wildly different.
For example - my training is to classify which 'shot type' a frame of video is, (extreme close up, close up, medium, long, extreme long) for classifying video editing content.
label_image.py classifies a frame from a video as 85% likely close up.
My C++ / Obj-C App run with the same frame classifies it as Extreme Long with 60%
Both are running the same version of Tensorflow (1.1) on Mac OS X CPU compiled with AVX/SIMD/FMA optimizations.
My Apps pipeline :
I have a BGR ordered OpenCV Mat image which I can use successfully elsewhere and get sane results from. I create this CV Mat from an OS X CVPixelBufferRef mapped to a BGRA CV MAT like so:
cv::cvtColor(BGRAImage, frameMat, cv::COLOR_BGRA2BGR);
I feed that BGR CV Mat (named frameMat) into a Tensor via code borrowed from the iOS contrib example, like so :
void* baseAddress = (void*)frameMat.datastart;
size_t width = (size_t) frameMat.cols;
size_t height = (size_t) frameMat.rows;
size_t bytesPerRow = (size_t) frameMat.cols * 3; // (BGR)
const int wanted_input_width = 299;
const int wanted_input_height = 299;
const int wanted_input_channels = 3;
const float input_mean = 128.0f;
const float input_std = 128.0f;
resized_tensor = tensorflow::Tensor( tensorflow::DT_FLOAT, tensorflow::TensorShape({1, wanted_input_height, wanted_input_width, wanted_input_channels}));
auto image_tensor_mapped = resized_tensor.tensor<float, 4>();
tensorflow::uint8 *in = sourceStartAddr;
float *out = image_tensor_mapped.data();
for (int y = 0; y < wanted_input_height; ++y)
{
float *out_row = out + (y * wanted_input_width * wanted_input_channels);
for (int x = 0; x < wanted_input_width; ++x)
{
const int in_x = (y * (int)width) / wanted_input_width;
const int in_y = (x * image_height) / wanted_input_height;
tensorflow::uint8 *in_pixel = in + (in_y * width * (image_channels)) + (in_x * (image_channels));
float *out_pixel = out_row + (x * wanted_input_channels);
// Interestingly the iOS example uses BGRA and DOES NOT re-order tensor channels to RGB <-> BGR
// Matching that.
out_pixel[0] = ((float)in_pixel[0] - (float)input_mean) / (float)input_std;
out_pixel[1] = ((float)in_pixel[1] - (float)input_mean) / (float)input_std;
out_pixel[2] = ((float)in_pixel[2] - (float)input_mean) / (float)input_std;
}
}
My session creation code:
tensorflow::Status load_graph_status = ReadBinaryProto(tensorflow::Env::Default(), [inception2015GraphPath cStringUsingEncoding:NSUTF8StringEncoding], &inceptionGraphDef);
if (load_graph_status.ok())
{
tensorflow::SessionOptions options;
inceptionSession = std::unique_ptr<tensorflow::Session>(tensorflow::NewSession(options));
tensorflow::Status session_create_status = inceptionSession->Create(inceptionGraphDef);
}
Running the graph:
tensorflow::Status run_status = inceptionSession->Run({ {input_layer, resized_tensor} }, {feature_layer, final_layer}, {}, &outputs);
And pulling out the labels / feature vector (penultimate layer)
NSMutableArray* outputLabels = [NSMutableArray arrayWithCapacity:self.labelsArray.count];
NSMutableArray* outputScores = [NSMutableArray arrayWithCapacity:self.labelsArray.count];
// 1 = labels and scores
auto predictions = outputs[1].flat<float>();
for (int index = 0; index < predictions.size(); index += 1)
{
const float predictionValue = predictions(index);
NSString* labelKey = self.labelsArray[index % predictions.size()];
NSNumber* currentLabelScore = self.averageLabelScores[labelKey];
NSNumber* incrementedScore = #([currentLabelScore floatValue] + predictionValue );
self.averageLabelScores[labelKey] = incrementedScore;
[outputLabels addObject:labelKey];
[outputScores addObject:#(predictionValue)];
}
// 0 is feature vector
tensorflow::Tensor feature = outputs[0];
int64_t numElements = feature.NumElements();
tensorflow::TTypes<float>::Flat featureVec = feature.flat<float>();
NSMutableArray* featureElements = [NSMutableArray arrayWithCapacity:numElements];
for(int i = 0; i < numElements; i++)
{
[featureElements addObject:#( featureVec(i) ) ];
}
if(self.averageFeatureVec == nil)
{
self.averageFeatureVec = featureElements;
}
else
{
// average each vector element with the prior
for(int i = 0; i < featureElements.count; i++)
{
float a = [featureElements[i] floatValue];
float b = [self.averageFeatureVec[i] floatValue];
self.averageFeatureVec[i] = #( MAX(a,b)) ;
}
}
return #{ kSynopsisStandardMetadataFeatureVectorDictKey : featureElements ,
#"Labels" : outputLabels,
#"Scores" : outputScores,
};
I've attempted to look into the tensor ordering (NHWC), and have checked the tensor creation code but I might be missing something obvious to others. Ive also tried changing channel order, to no avail.
Any insight would be greatly helpful. Thank you!
My usual method for debugging issues like this is:
First save out a raw C array of values from an example input that I know works. For example, make sure that label_image works with your newly-trained model, and then write out the float* array you get from input_layer->flat<float>().data(), using pseudo-code like this:
float* input_data = input_layer->flat<float>().data();
int input_data_count = input_layer->flat<float>().size();
printf("float g_test_input[]={\n");
for (int i = 0; i < input_data_count; ++i) {
printf(" %f,\n", input_data[i]);
}
printf("};\n");
You should end up with a big array that you can copy into your new code. Overwrite whatever input you have in the code you want to test. Now run it, and you should see the same output that you saw from label_image. If you don't, you know there's something different about the model you're loading. If the output is identical, then you know that the input preprocessing is different.
Assuming that it's the preprocessing that's wrong, my next step is to try loading an image from disk. The iOS example code does that in the simple example. Save out some of your expected input into an image file, and then make sure that both label_image and your code produce the same result.
So this one is tricky.
I failed to mention I was running the graph_transform tool on my retrained graph - and was running quantize weights to lower my graphs size. In the past, I've not had an issue with this messing up classification scores at all, but apparently that caused an issue.
Running the above code with a graph transform call without quantize weights fixed the issue.
I am writing a thin wrapper around ArUco augmented reality library (which is based on OpenCV). An interface I am trying to build is very simple:
Python passes image to C++ code;
C++ code detects markers and returns their locations and other info to Python as tuple of dicts.
However, I couldn't figure out how to represent an image in Python to pass it to C++. For GUI and camera management I am going to use PyQt, so initially it is going to be QImage, but I can't simply pass it to OpenCV (or I can?). At first, I tried to use nested tuples to represent row, column and color of each pixel, so I ended up with this sample code:
using namespace cv;
namespace py = boost::python;
void display(py::tuple pix)
{
/*
Receive image from Python and display it.
*/
Mat img(py::len(pix), py::len(pix[0]), CV_8UC3, Scalar(0, 0, 255));
for (int y = 0; y < py::len(pix); y++)
for (int x = 0; x < py::len(pix[y]); x++)
{
Vec3b rgb;
for (int i = 0; i < 3; i++)
rgb[i] = py::extract<int>(pix[y][x][i]);
img.at<Vec3b>(Point(x, y)) = rgb;
}
imshow("Image", img);
waitKey(0);
}
BOOST_PYTHON_MODULE(aruco)
{
py::def("display", display);
}
It turned out to be painfully slow (a few seconds for a single frame), so I went googling and found solution that should be much faster: use NumPy arrays, so the code would look something like that:
void display(py::object array)
{
Mat img;
// ... some magic here to convert NumPy array to Mat ...
imshow("Image", img);
waitKey(0);
}
However, I have no idea how to convert NumPy Array (which in C++ level is just a Python Object) to OpenCV Mat. I would appreciate any help here.
Alternatively, maybe NumPy is not really needed, so I could just pass QImage Python object directly to C++ layer? Or maybe there is a different approach to this problem? Any advice is appreciated!
The best solution in your situation is using custom boost::python converter for cv::Mat object. OpenCV has Python wrapper and when you are using this wrapper you are operating on Numpy arrays - you don't even need to know that those arrays are converted to cv::Mat objects while "crossing the c++ <-> python border". Writing such converter for simple type is quite easy, however creating converter for cv::Mat isn't simple. Fortunetely someone else already did this - here is version for OpenCV 2.x and here for 3.x. If you are not familiar with boost::python converters, this article should help you.
Hope it helps, if you wil have any problems, let us know.
I wrote this example for who didn't know there is Boost Numpy module. You can see how to convert Mat to NDArray and vice versa. it will gives you idea the way of convert ndarray.
#define BOOST_PYTHON_STATIC_LIB
#define BOOST_LIB_NAME "boost_numpy35"
//#include <boost/config/auto_link.hpp>
#include <boost/python.hpp>
#include <boost/python/numpy.hpp>
#include <iostream>
#include <opencv2/opencv.hpp>
namespace py = boost::python;
namespace np = boost::python::numpy;
void Init() {
// set your python location.
wchar_t str[] = L"D:\\Anaconda3\\envs\\tensorflow_vision";
Py_SetPythonHome(str);
Py_Initialize();
np::initialize();
}
np::ndarray ConvertMatToNDArray(const cv::Mat& mat) {
py::tuple shape = py::make_tuple(mat.rows, mat.cols, mat.channels());
py::tuple stride = py::make_tuple(mat.channels() * mat.cols * sizeof(uchar), mat.channels() * sizeof(uchar), sizeof(uchar));
np::dtype dt = np::dtype::get_builtin<uchar>();
np::ndarray ndImg = np::from_data(mat.data, dt, shape, stride, py::object());
return ndImg;
}
cv::Mat ConvertNDArrayToMat(const np::ndarray& ndarr) {
//int length = ndarr.get_nd(); // get_nd() returns num of dimensions. this is used as a length, but we don't need to use in this case. because we know that image has 3 dimensions.
const Py_intptr_t* shape = ndarr.get_shape(); // get_shape() returns Py_intptr_t* which we can get the size of n-th dimension of the ndarray.
char* dtype_str = py::extract<char *>(py::str(ndarr.get_dtype()));
// variables for creating Mat object
int rows = shape[0];
int cols = shape[1];
int channel = shape[2];
int depth;
// you should find proper type for c++. in this case we use 'CV_8UC3' image, so we need to create 'uchar' type Mat.
if (!strcmp(dtype_str, "uint8")) {
depth = CV_8U;
}
else {
std::cout << "wrong dtype error" << std::endl;
return cv::Mat();
}
int type = CV_MAKETYPE(depth, channel); // CV_8UC3
cv::Mat mat = cv::Mat(rows, cols, type);
memcpy(mat.data, ndarr.get_data(), sizeof(uchar) * rows * cols * channel);
return mat;
}
int main()
{
using namespace std;
try
{
// initialize boost python and numpy
Init();
// import module
py::object main_module = py::import("__main__");
py::object print = main_module.attr("__builtins__").attr("print"); // this is for printing python object
// get image
cv::Mat img;
img = cv::imread("Lenna.jpg", cv::IMREAD_COLOR);
if (img.empty())
{
std::cout << "can't getting image" << std::endl;
return -1;
}
// convert Mat to NDArray
cv::Mat cloneImg = img.clone(); // converting functions will access to same data between Mat and NDArray. so we should clone Mat object. This may important in your case.
np::ndarray ndImg = ConvertMatToNDArray(cloneImg);
// You can check if it's properly converted.
//print(ndImg);
// convert NDArray to Mat
cv::Mat matImg = ConvertNDArrayToMat(ndImg); // also you can convert ndarray to mat.
// add 10 brightness to converted image
for (int i = 0; i < matImg.rows; i++) {
for (int j = 0; j < matImg.cols; j++) {
for (int c = 0; c < matImg.channels(); c++) {
matImg.at<cv::Vec3b>(i, j)[c] += 10;
}
}
}
// show image
cv::imshow("original image", img);
cv::imshow("converted image", matImg);
cv::waitKey(0);
cv::destroyAllWindows();
}
catch (py::error_already_set&)
{
PyErr_Print();
system("pause");
}
system("pause");
return 0;
}
Optionally, if you don't like to use wrappers, and want to use native python extension module, you can do it like this.
python3:
my_image = cv.imread("my_image.jpg", 1) # reads colorfull image in python
dims = my_image.shape # get image shape (h, w, c)
my_image = my_image.ravel() # flattens 3d array into 1d
cppextenionmodule.np_to_mat(dims, my_image)
c++:
static PyObject *np_to_mat(PyObject *self, PyObject *args){
PyObject *size;
PyArrayObject *image;
if (!PyArg_ParseTuple(args, "O!O!", &PyTuple_Type, &size, &PyArray_Type, &image)) {
return NULL;
}
int rows = PyLong_AsLong(PyTuple_GetItem(size ,0));
int cols = PyLong_AsLong(PyTuple_GetItem(size ,1));
int nchannels = PyLong_AsLong(PyTuple_GetItem(size ,2));
char my_arr[rows * nchannels * cols];
for(size_t length = 0; length<(rows * nchannels * cols); length++){
my_arr[length] = (*(char *)PyArray_GETPTR1(image, length));
}
cv::Mat my_img = cv::Mat(cv::Size(cols, rows), CV_8UC3, &my_arr);
... whatever with the image
}
Here is a pybind11 version of afewthings/DomQ's answer. I found pybind11 was better for my project than boost::python (both libraries are quite nice)
// convert a cv::Mat to an np.array
py::array to_array(const cv::Mat& im) {
const ssize_t channels = im.channels();
const ssize_t height = im.rows;
const ssize_t width = im.cols;
const ssize_t dim = sizeof(uchar) * height * width * channels;
auto data = new uchar[dim];
std::copy(im.data, im.data + dim, data);
return py::array_t<uchar>(
py::buffer_info(
data,
sizeof(uchar), //itemsize
py::format_descriptor<uchar>::format(),
channels, // ndim
std::vector<ssize_t> { height, width, channels }, // shape
std::vector<ssize_t> { width * channels, channels, sizeof(uchar) } // strides
),
py::capsule(data, [](void* f){
// handle releasing data
delete[] reinterpret_cast<uchar*>(f);
})
);
}
// convert an np.array to a cv::Mat
cv::Mat from_array(const py::array& ar) {
if (!ar.dtype().is(py::dtype::of<uchar>())) {
std::cout << "ERROR unsupported dtype!" << std::endl;
return cv::Mat();
}
auto shape = ar.shape();
int rows = shape[0];
int cols = shape[1];
int channels = shape[2];
int type = CV_MAKETYPE(CV_8U, channels); // CV_8UC3
cv::Mat mat = cv::Mat(rows, cols, type);
memcpy(mat.data, ar.data(), sizeof(uchar) * rows * cols * channels);
return mat;
}