How to debug crashing C++ library loaded in python project - python

I am attempting to figure out why calling a function in a dynamically loaded lib crashes python. I'm doing the following, I have a C++ function in a dynamic library file, which is loaded in python using ctypes. I then call the function from python:
lib = cdll.LoadLibrary(libPath)
# Note: using c_char_p instead of POINTER(c_char) does not yield any difference in result
# Export const char* GetSection(const char* TilesetID, int32_t X0, int32_t Y0, int32_t X1, int32_t Y1, uint8_t*& OutData, uint64_t& OutDataSize)
lib.GetSection.argtypes = [POINTER(c_char), c_int32, c_int32, c_int32, c_int32, POINTER(c_void_p), POINTER(c_uint64)]
lib.GetSection.restype = POINTER(c_char)
output_data = c_void_p()
output_size = c_uint64()
str_data = lib.GetSection(id.encode('ascii'), x0, y0, x1, y1, byref(output_data), byref(output_size))
On MacOS, this works exactly as expected. Unfortunately on Windows 11, it does not. I'm running from a Jupyter notebook and the kernel crashes and restarts immediately after the lib.GetSection call.
I have attached the Visual Studio debugger to the process, and can see that on the C++ side of things, the function is being correctly called, all parameters are correct, and it returns without error. It is at this point that the python kernel crashes, deep in a python call stack that I don't have symbols for.
How do I even approach debugging this? Does anything look wrong with the way I am calling the function?

Having a toy C++ function to demonstrate your problem would help. Below is a best guess C++ function with the same signature and the Python code to call it:
test.cpp
#include <cstdint>
#define API __declspec(dllexport)
extern "C" {
API const char* GetSection(const char* TilesetID, int32_t X0, int32_t Y0, int32_t X1,
int32_t Y1, uint8_t*& OutData, uint64_t& OutDataSize) {
OutData = new uint8_t[5] { 1, 2, 3, 4, 5 };
OutDataSize = 5;
return "hello";
}
API void Delete(uint8_t* OutData) {
delete [] OutData;
}
}
test.py
import ctypes as ct
dll = ct.CDLL('./test')
# Note change to 2nd to last argument.
dll.GetSection.argtypes = (ct.c_char_p, ct.c_int32, ct.c_int32, ct.c_int32, ct.c_int32,
ct.POINTER(ct.POINTER(ct.c_uint8)), ct.POINTER(ct.c_uint64))
dll.GetSection.restype = ct.c_char_p
def GetSection(tileid, x0, y0, x1, y1):
output_data = ct.POINTER(ct.c_uint8)()
output_size = ct.c_uint64()
str_data = dll.GetSection(tileid, x0, y0, x1, y1,
ct.byref(output_data), ct.byref(output_size))
out_data = output_data[:output_size.value] # create a Python list of the data
dll.Delete(output_data) # can delete the data now
return str_data, out_data
print(GetSection(b'id', 1, 2, 3, 4))
Output:
(b'hello', [1, 2, 3, 4, 5])

Related

What's the difference between ctypes.POINTER(ctypes.c_int) and ctypes.c_int * 5?

I'm trying to call a python function (a callback) from C++ with ctypes. The python function accept an array of ints as parameters. I need to access this array in python.
c code
typedef void (*FooFunc)(int*, int*);
void runfoo(FooFunc foo_func) {
int dims[5] = {3, 5, 1, 1, 1};
foo_func(dims, dims);
}
python code
def foo(dims1, dims2):
x = dims1[0]
y = dims1[1]
z = dims1[2]
print(x, y, z)
x = dims2[0]
y = dims2[1]
z = dims2[2]
print(x, y, z)
libc = ctypes.CDLL('./libpath.so')
protofoo = ctypes.CFUNCTYPE(None, ctypes.c_int * 5, ctypes.POINTER(ctypes.c_int))
libc.runfoo(protofoo(foo))
It gives
(-333106720, 32766, -333106720)
(3, 5, 1).
What's the difference between ctypes.c_int * 5 and ctypes.POINTER(ctypes.c_int) in prototype?
ctypes.POINTER(ctypes.int) is equivalent to int*.
ctypes.c_int * 5 is a type equivalent to the type of int arr[5]. It's implementation doesn't decay to a pointer as a parameter like C, so it looks like when using it as a parameter it expects an array to be passed by value on the stack. I tested that below by printing the address of dims in hex in C and you can see its 64-bit pointer value is the first two 32-bit elements returned to python when displayed as hexadecimal:
test.c
#include <stdio.h>
typedef void (*FooFunc)(int*, int*);
__declspec(dllexport) void runfoo(FooFunc foo_func) {
int dims[5] = {3, 5, 1, 1, 1};
printf("%p\n",dims);
foo_func(dims, dims);
}
test.py
import ctypes
def foo(dims1, dims2):
x = dims1[0]
y = dims1[1]
z = dims1[2]
print(f'{x:08X} {y:08X} {z:08X}')
x = dims2[0]
y = dims2[1]
z = dims2[2]
print(f'{x:08X} {y:08X} {z:08X}')
libc = ctypes.CDLL('test')
protofoo = ctypes.CFUNCTYPE(None, ctypes.c_int * 5, ctypes.POINTER(ctypes.c_int))
libc.runfoo(protofoo(foo))
Output
000000FD6E1EED00
6E1EED00 000000FD 6E1EED00
00000003 00000005 00000001

Wrong ctypes assignation

I made a CPP DLL and I'm trying to call function inside it from python.
I've achieved this multiple times for other functions, but this one, I just can't find my mistake.
dll_name = "..\\src\\x64\\Debug\\2019-3A-IBD-MLDLL.dll"
dllabspath = os.path.dirname(os.path.abspath(__file__)) + os.path.sep + dll_name
myDll = CDLL(dllabspath)
#fit_reg_RBF_naive
myDll.fit_reg_RBF_naive.argtypes = [ct.c_void_p, ct.c_double, ct.c_void_p, ct.c_int, ct.c_int]
myDll.fit_reg_RBF_naive.restypes = ct.c_void_p
#predict_reg_RBF_naive
myDll.predict_reg_RBF_naive.argtypes = [ct.c_void_p, ct.c_void_p, ct.c_void_p, ct.c_int, ct.c_double, ct.c_int]
myDll.predict_reg_RBF_naive.restypes = ct.c_double
def fit_reg_RBF_naive(pyXTrain, pyGamma, pyYTrain, pySampleCount, pyInputCountPerSample):
XTrain = (ct.c_double * len(pyXTrain))(*pyXTrain)
YTrain = (ct.c_double * len(pyYTrain))(*pyYTrain)
inputCountPerSample = ct.c_int(pyInputCountPerSample)
sampleCount = ct.c_int(pySampleCount)
gamma = ct.c_double(pyGamma)
return myDll.fit_reg_RBF_naive(XTrain, gamma, YTrain, sampleCount, inputCountPerSample)
def predict_reg_RBF_naive(pyW, pyXTrain, pyXpredict ,pyInputCountPerSample, pyGamma, pySampleCount):
XTrain = (ct.c_double * len(pyXTrain))(*pyXTrain)
inputCountPerSample = ct.c_int(pyInputCountPerSample)
sampleCount = ct.c_int(pySampleCount)
gamma = ct.c_double(pyGamma)
Xpredict = (ct.c_double * len(pyXpredict))(*pyXpredict)
return myDll.predict_reg_RBF_naive(W, XTrain, Xpredict, inputCountPerSample, gamma, sampleCount)
Basically I load my DLL, set Ctypes for arguments and result for both of my fonctions. Then I make a python wrapper so that the user does not have to retype every cast from python to cpp.
My types on the cpp side seems good too:
extern "C" {
SUPEREXPORT double predict_reg_RBF_naive(double* W, double* X, double* Xpredict, int inputCountPerSample, double gamma, int N);
SUPEREXPORT double* fit_reg_RBF_naive(double* XTrain, double gamma, double* YTrain, int sampleCount, int inputCountPerSample);
}
I have no warning from the compiler for the cpp part, I've printed the memory adresse before the return inside fit_reg_RBF_naive from cpp and the W in python and they are the same.
000002B358384980 // cpp address of W before return
0x58384980 # Python address of W after function call
For me it seems the same address. Maybe I'm wrong.
So when I try to call my second cpp function it said
myDll.predict_reg_RBF_naive(W, XTrain, Xpredict,inputCountPerSample, gamma, sampleCount)
OSError: exception: access violation reading 0x000000007C7380A0
It crashed in the cpp when it tries to read W. They are no free or 'delete' in the cpp and the variable is properly allocated : double* W = new double[2];
Also, when I print W type in python I get <class 'int'>.
How comes my W seems to have the same address regarding the language, but has not the good type? Changing the result type of fit_reg_RBF_naive to POINTER(ct.c_double * 2) makes no change.
EDIT:
Here is how I call my functions:
from dll_load import predict_reg_RBF_naive, fit_reg_RBF_naive
gamma = 50
sampleCount = 2
inputCountPerSample = 3
XTrain = [1.0, 1.0, 1.0, 3.0, 3.0, 3.0]
YTrain = [-1.0, 1.0]
Xpredict = [1.0, 1.0, 1.0]
W = fit_reg_RBF_naive(XTrain, gamma, YTrain, sampleCount, inputCountPerSample)
print(predict_reg_RBF_naive(W, XTrain, Xpredict, inputCountPerSample, gamma, sampleCount))
[Python 3.Docs]: ctypes - A foreign function library for Python.
You misspelled restypes (it should be restype). By doing so, restype is not initialized, and defaults to int (this wouldn't be a problem on 32bit), and you ran into:
[SO]: Python ctypes cdll.LoadLibrary, instantiate an object, execute its method, private variable address truncated (#CristiFati's answer)
[SO]: python ctypes issue on different OSes (#CristiFati's answer)
Besides that, there are several problems in the code:
If the C function specifies a pointer (double* in this case), don't use ctypes.c_void_p (in argtypes or restype) to map it, as it might be too wide, use (for this case) ctypes.POINTER(ctypes.c_double) instead
For me this doesn't even compile (I wonder how were you able to run that code). I'm going to exemplify on XTrain only, but applies to YTrain and Xpredict as well. ctypes doesn't know to convert a Python list to a ctypes.POINTER(ctypes.c_double) (or ctypes.c_void_p), and the conversion must be made manually (to a ctypes.c_double array):
XTrain = [1.0, 1.0, 1.0, 3.0, 3.0, 3.0]
xtrain_ctypes = (ctypes.c_double * len(XTrain))(*XTrain)
and pass xtrain_ctypes to the functions.

Convert C++ pointer to Python numpy array

I am working on a C/C++ DLL which used OpenCV, and in this one I perform some operations. In this example, I change the contrast of an image I read on Python, transfer to the DLL to perform the operation, and get back the result on Python to display it. I am doing this using pointers on the first pixel of each image, but in Python I don't find the way to recreate correctly the image using this pointer.
I already verified the Mat object in C++ is continuous, and I check the result saved from the DLL which is correct. The problem is in Python for me, but I don't see where I do something wrong.
The C++ class and function :
#pragma once
#include <vector>
#include <string>
#include <fstream>
#include <opencv2/core/core.hpp>
#include <opencv2\highgui\highgui.hpp>
#include <thread>
using namespace cv;
using namespace std;
class EpsImageProcessing
{
// -------------- Methods --------------
public:
EpsImageProcessing();
~EpsImageProcessing();
unsigned short * imAdjustContrast(void * ptrImg, int width, int height, int contrastValue);
// -------------- Atributes --------------
Mat imgResult;
unsigned short *imgAdress;
};
unsigned short * EpsImageProcessing::imAdjustContrast(void * ptrImg, int width, int height, int contrastValue)
{
// Get image and reshape it as Mat object
Mat imgTemp = Mat(height, width, CV_8UC1, (uchar*)ptrImg);
// Convert to double to perform calculations
imgTemp.convertTo(imgTemp, CV_32FC1);
// Calculate the contrast coefficient
float coeff = (259*((float)contrastValue+255)) / (255*(259 - (float)contrastValue));
// Change contrast
imgTemp = coeff * (imgTemp - 128) + 128;
// Convert image to original type
imgTemp.convertTo(imgTemp, CV_8UC1);
// Return result
imgResult= imgTemp.clone(); // imgTmp is an attribute of the class of my DLL
imwrite("imgAfter.jpg", imgResult);
bool test = imgResult.isContinuous(); // return true
imgAdress = imgResult.ptr<ushort>();
return imgAdress; //imgResult.ptr<ushort>(); // (unsigned short *)imgResult.data;
}
Then the C wrapper to do the link between C++ and others langages like Python :
__declspec(dllexport) unsigned short* __stdcall imAdjustContrast(void* handle, void* imgPtr, int width, int height, int contrastValue)
{
if (handle)
{
EpsImageProcessing* data = (EpsImageProcessing*)handle;
return data->imAdjustContrast(imgPtr, width, height, contrastValue);
}
return false;
}
And the Python code :
from ctypes import *
import numpy, os, cv2
import matplotlib.pyplot as plt
dirpath = os.environ['PATH']
os.environ['PATH'] = dirpath + ";C:/x64/Debug/" # include of opencv_world.dll
mydll = cdll.LoadLibrary("MyDll.dll")
class mydllClass(object):
def __init__(self, width, height, nFrame, path, filename):
mydll.AllocateHandleImg.argtypes = []
mydll.AllocateHandleImg.restype = c_void_p
mydll.imAdjustContrast.argtypes = [c_void_p, c_void_p, c_int, c_int, c_int]
mydll.imAdjustContrast.restype = POINTER(c_ushort)
self.obj = mydll.AllocateHandleImg()
def imAdjustContrast(self, ptrImg, width, height, contrast):
return mydll.imAdjustContrast(self.obj, ptrImg, width, height, contrast)
img0 = cv2.imread("C:\\Users\\mg\\Downloads\\imgInit.jpg", 0)
imgC = myclass.imAdjustContrast(img0.__array_interface__['data'][0], img0.shape[1], img0.shape[0], -127)
imgAfter = cv2.imread("C:\\Users\\mg\\Downloads\\imgAfter.jpg", 0)
image = numpy.zeros((img0.shape[0],img0.shape[1]), dtype=numpy.dtype(numpy.uint8))
for i in range(img0.shape[0]):
for j in range(img0.shape[1]):
indice = i*img0.shape[1]+j
image[i,j] = numpy.uint8(imgC[indice])
newImg = numpy.ctypeslib.as_array(cast(imgC, POINTER(c_uint8)), shape=(img0.shape))
plt.figure()
plt.subplot(221)
plt.imshow(imgAfter)
plt.gray()
plt.colorbar()
plt.title('image saved from C++ DLL')
plt.subplot(222)
plt.imshow(image)
plt.gray()
plt.colorbar()
plt.title('image recreated in Python (for loop)')
plt.subplot(223)
plt.imshow(newImg)
plt.gray()
plt.colorbar()
plt.title('image recreated in Python (cast)')
plt.show()
And the final result on Python is :
I found that the small difference between the two "good images" (image saved in C++ and recreate in Python with cast method) are from the compression of the image (.jpg) which is different between Python and C++. Dealing with a png and the image created in Python with the C++ pointer is okay with the cast method.
So the problem now is about the two for loops which don't create the image from the pointer in a good way. Any idea?

C++ conversion from NumPy array to Mat (OpenCV)

I am writing a thin wrapper around ArUco augmented reality library (which is based on OpenCV). An interface I am trying to build is very simple:
Python passes image to C++ code;
C++ code detects markers and returns their locations and other info to Python as tuple of dicts.
However, I couldn't figure out how to represent an image in Python to pass it to C++. For GUI and camera management I am going to use PyQt, so initially it is going to be QImage, but I can't simply pass it to OpenCV (or I can?). At first, I tried to use nested tuples to represent row, column and color of each pixel, so I ended up with this sample code:
using namespace cv;
namespace py = boost::python;
void display(py::tuple pix)
{
/*
Receive image from Python and display it.
*/
Mat img(py::len(pix), py::len(pix[0]), CV_8UC3, Scalar(0, 0, 255));
for (int y = 0; y < py::len(pix); y++)
for (int x = 0; x < py::len(pix[y]); x++)
{
Vec3b rgb;
for (int i = 0; i < 3; i++)
rgb[i] = py::extract<int>(pix[y][x][i]);
img.at<Vec3b>(Point(x, y)) = rgb;
}
imshow("Image", img);
waitKey(0);
}
BOOST_PYTHON_MODULE(aruco)
{
py::def("display", display);
}
It turned out to be painfully slow (a few seconds for a single frame), so I went googling and found solution that should be much faster: use NumPy arrays, so the code would look something like that:
void display(py::object array)
{
Mat img;
// ... some magic here to convert NumPy array to Mat ...
imshow("Image", img);
waitKey(0);
}
However, I have no idea how to convert NumPy Array (which in C++ level is just a Python Object) to OpenCV Mat. I would appreciate any help here.
Alternatively, maybe NumPy is not really needed, so I could just pass QImage Python object directly to C++ layer? Or maybe there is a different approach to this problem? Any advice is appreciated!
The best solution in your situation is using custom boost::python converter for cv::Mat object. OpenCV has Python wrapper and when you are using this wrapper you are operating on Numpy arrays - you don't even need to know that those arrays are converted to cv::Mat objects while "crossing the c++ <-> python border". Writing such converter for simple type is quite easy, however creating converter for cv::Mat isn't simple. Fortunetely someone else already did this - here is version for OpenCV 2.x and here for 3.x. If you are not familiar with boost::python converters, this article should help you.
Hope it helps, if you wil have any problems, let us know.
I wrote this example for who didn't know there is Boost Numpy module. You can see how to convert Mat to NDArray and vice versa. it will gives you idea the way of convert ndarray.
#define BOOST_PYTHON_STATIC_LIB
#define BOOST_LIB_NAME "boost_numpy35"
//#include <boost/config/auto_link.hpp>
#include <boost/python.hpp>
#include <boost/python/numpy.hpp>
#include <iostream>
#include <opencv2/opencv.hpp>
namespace py = boost::python;
namespace np = boost::python::numpy;
void Init() {
// set your python location.
wchar_t str[] = L"D:\\Anaconda3\\envs\\tensorflow_vision";
Py_SetPythonHome(str);
Py_Initialize();
np::initialize();
}
np::ndarray ConvertMatToNDArray(const cv::Mat& mat) {
py::tuple shape = py::make_tuple(mat.rows, mat.cols, mat.channels());
py::tuple stride = py::make_tuple(mat.channels() * mat.cols * sizeof(uchar), mat.channels() * sizeof(uchar), sizeof(uchar));
np::dtype dt = np::dtype::get_builtin<uchar>();
np::ndarray ndImg = np::from_data(mat.data, dt, shape, stride, py::object());
return ndImg;
}
cv::Mat ConvertNDArrayToMat(const np::ndarray& ndarr) {
//int length = ndarr.get_nd(); // get_nd() returns num of dimensions. this is used as a length, but we don't need to use in this case. because we know that image has 3 dimensions.
const Py_intptr_t* shape = ndarr.get_shape(); // get_shape() returns Py_intptr_t* which we can get the size of n-th dimension of the ndarray.
char* dtype_str = py::extract<char *>(py::str(ndarr.get_dtype()));
// variables for creating Mat object
int rows = shape[0];
int cols = shape[1];
int channel = shape[2];
int depth;
// you should find proper type for c++. in this case we use 'CV_8UC3' image, so we need to create 'uchar' type Mat.
if (!strcmp(dtype_str, "uint8")) {
depth = CV_8U;
}
else {
std::cout << "wrong dtype error" << std::endl;
return cv::Mat();
}
int type = CV_MAKETYPE(depth, channel); // CV_8UC3
cv::Mat mat = cv::Mat(rows, cols, type);
memcpy(mat.data, ndarr.get_data(), sizeof(uchar) * rows * cols * channel);
return mat;
}
int main()
{
using namespace std;
try
{
// initialize boost python and numpy
Init();
// import module
py::object main_module = py::import("__main__");
py::object print = main_module.attr("__builtins__").attr("print"); // this is for printing python object
// get image
cv::Mat img;
img = cv::imread("Lenna.jpg", cv::IMREAD_COLOR);
if (img.empty())
{
std::cout << "can't getting image" << std::endl;
return -1;
}
// convert Mat to NDArray
cv::Mat cloneImg = img.clone(); // converting functions will access to same data between Mat and NDArray. so we should clone Mat object. This may important in your case.
np::ndarray ndImg = ConvertMatToNDArray(cloneImg);
// You can check if it's properly converted.
//print(ndImg);
// convert NDArray to Mat
cv::Mat matImg = ConvertNDArrayToMat(ndImg); // also you can convert ndarray to mat.
// add 10 brightness to converted image
for (int i = 0; i < matImg.rows; i++) {
for (int j = 0; j < matImg.cols; j++) {
for (int c = 0; c < matImg.channels(); c++) {
matImg.at<cv::Vec3b>(i, j)[c] += 10;
}
}
}
// show image
cv::imshow("original image", img);
cv::imshow("converted image", matImg);
cv::waitKey(0);
cv::destroyAllWindows();
}
catch (py::error_already_set&)
{
PyErr_Print();
system("pause");
}
system("pause");
return 0;
}
Optionally, if you don't like to use wrappers, and want to use native python extension module, you can do it like this.
python3:
my_image = cv.imread("my_image.jpg", 1) # reads colorfull image in python
dims = my_image.shape # get image shape (h, w, c)
my_image = my_image.ravel() # flattens 3d array into 1d
cppextenionmodule.np_to_mat(dims, my_image)
c++:
static PyObject *np_to_mat(PyObject *self, PyObject *args){
PyObject *size;
PyArrayObject *image;
if (!PyArg_ParseTuple(args, "O!O!", &PyTuple_Type, &size, &PyArray_Type, &image)) {
return NULL;
}
int rows = PyLong_AsLong(PyTuple_GetItem(size ,0));
int cols = PyLong_AsLong(PyTuple_GetItem(size ,1));
int nchannels = PyLong_AsLong(PyTuple_GetItem(size ,2));
char my_arr[rows * nchannels * cols];
for(size_t length = 0; length<(rows * nchannels * cols); length++){
my_arr[length] = (*(char *)PyArray_GETPTR1(image, length));
}
cv::Mat my_img = cv::Mat(cv::Size(cols, rows), CV_8UC3, &my_arr);
... whatever with the image
}
Here is a pybind11 version of afewthings/DomQ's answer. I found pybind11 was better for my project than boost::python (both libraries are quite nice)
// convert a cv::Mat to an np.array
py::array to_array(const cv::Mat& im) {
const ssize_t channels = im.channels();
const ssize_t height = im.rows;
const ssize_t width = im.cols;
const ssize_t dim = sizeof(uchar) * height * width * channels;
auto data = new uchar[dim];
std::copy(im.data, im.data + dim, data);
return py::array_t<uchar>(
py::buffer_info(
data,
sizeof(uchar), //itemsize
py::format_descriptor<uchar>::format(),
channels, // ndim
std::vector<ssize_t> { height, width, channels }, // shape
std::vector<ssize_t> { width * channels, channels, sizeof(uchar) } // strides
),
py::capsule(data, [](void* f){
// handle releasing data
delete[] reinterpret_cast<uchar*>(f);
})
);
}
// convert an np.array to a cv::Mat
cv::Mat from_array(const py::array& ar) {
if (!ar.dtype().is(py::dtype::of<uchar>())) {
std::cout << "ERROR unsupported dtype!" << std::endl;
return cv::Mat();
}
auto shape = ar.shape();
int rows = shape[0];
int cols = shape[1];
int channels = shape[2];
int type = CV_MAKETYPE(CV_8U, channels); // CV_8UC3
cv::Mat mat = cv::Mat(rows, cols, type);
memcpy(mat.data, ar.data(), sizeof(uchar) * rows * cols * channels);
return mat;
}

Speed up cython code

I wrote a python code that manages a lot of data and thus it takes a lot of time. So, I found out Cython and I began to change my code.
Basically, all I did is to change functions' declarations (cdef type name(arguments with variable type) ), to declare cdef variables with its type, and to declare cdef classes.
I'm writing all the .pyx with eclipse, and I'm compiling with the command python setup.py build_ext --inplace and running it with eclipse.
My issue is that comparing python with cython speed, there isn't any difference.
I run the command cython -a <file> to generate a html file and there are a lot of yellow lines.
I don't know if I'm doing something wrong, I should include something else, and I don't know how to delete these yellow lines.
I just paste some code lines, that's the part that I'd like to speed up and because the code is very long.
main.pyx
'''there are a lot of ndarray objects stored in a file and in this step I get each of them until there are no more items '''
cdef ReadWavePoints (WavePointManagement wavePointManagement, ColumnManagement columnManagement):
cdef int runReadWavePoints
wavePointManagement.OpenWavePointFileLoad(wavePointsFile)
runReadWavePoints = 1
while runReadWavePoints == 1:
try:
wavePointManagement.LoadWavePointFile()
wavePointManagement.RoundCoordinates()
wavePointManagement.SortWavePointList()
GroupColumnsVoxels(wavePointManagement.GetWavePointList(), columnManagement)
except:
wavePointManagement.CloseWavePointFile()
columnManagement.CloseWriteColumnFile()
break
'''I check which points are in the same XYZ (voxel) and in the same XY (column)'''
cdef GroupColumnsVoxels (object wavePointList, ColumnManagement columnManagement):
cdef int indexWavePointRef, indexWavePoint
cdef int saved
cdef double voxelValue
cdef int sizeWavePointList
sizeWavePointList = len(wavePointList)
indexWavePointRef = 0
while indexWavePointRef < sizeWavePointList - 1:
saved = 0
voxelValue = (wavePointList[indexWavePointRef]).GetValue()
for indexWavePoint in xrange(indexWavePointRef + 1, len(wavePointList)):
if (wavePointList[indexWavePointRef]).GetX() == (wavePointList[indexWavePoint]).GetX() and (wavePointList[indexWavePointRef]).GetY() == (wavePointList[indexWavePoint]).GetY():
if (wavePointList[indexWavePointRef]).GetZ() == (wavePointList[indexWavePoint]).GetZ():
if voxelValue < (wavePointList[indexWavePoint]).GetValue():
voxelValue = (wavePointList[indexWavePoint]).GetValue()
else:
saved = 1
CheckVoxel((wavePointList[indexWavePointRef]).GetX(), (wavePointList[indexWavePointRef]).GetY(), (wavePointList[indexWavePointRef]).GetZ(), voxelValue)
indexWavePointRef = indexWavePoint
if indexWavePointRef == sizeWavePointList - 1:
CheckVoxel((wavePointList[indexWavePointRef]).GetX(), (wavePointList[indexWavePointRef]).GetY(), (wavePointList[indexWavePointRef]).GetZ(), (wavePointList[indexWavePointRef]).GetValue())
break
else:
saved = 1
CheckVoxel((wavePointList[indexWavePointRef]).GetX(), (wavePointList[indexWavePointRef]).GetY(), (wavePointList[indexWavePointRef]).GetZ(), voxelValue)
columnObject = columnInstance.Column((wavePointList[indexWavePointRef]).GetX(), (wavePointList[indexWavePointRef]).GetY())
columnManagement.AddColumn(columnObject)
MaximumHeightColumn((wavePointList[indexWavePointRef]).GetX(), (wavePointList[indexWavePointRef]).GetY(), (wavePointList[indexWavePointRef]).GetZ())
indexWavePointRef = indexWavePoint
break
if saved == 0:
CheckVoxel((wavePointList[indexWavePointRef]).GetX(), (wavePointList[indexWavePointRef]).GetY(), (wavePointList[indexWavePointRef]).GetZ(), voxelValue)
indexWavePointRef = indexWavePoint
columnObject = columnInstance.Column((wavePointList[indexWavePointRef]).GetX(), (wavePointList[indexWavePointRef]).GetY())
columnManagement.AddColumn(columnObject)
MaximumHeightColumn((wavePointList[indexWavePointRef]).GetX(), (wavePointList[indexWavePointRef]).GetY(), (wavePointList[indexWavePointRef]).GetZ())
'''I check if the data stored in a voxel is lower than the new one; if its the case, I store it'''
cdef CheckVoxel (double X, double Y, double Z, double newValue):
cdef object bandVoxel, structvalCheckVoxel, out_str
cdef tuple valueCheckVoxel
bandVoxel = datasetVoxels.GetRasterBand(int(math.floor(Z/0.3))+1)
structvalCheckVoxel = bandVoxel.ReadRaster(int(math.floor((X-Xmin)/0.25)), int(math.floor((Ymax-Y)/0.25)), 1, 1, buf_type=gdal.GDT_Float32)
valueCheckVoxel = struct.unpack('f', structvalCheckVoxel)
if newValue > valueCheckVoxel[0]:
out_str = struct.pack('f', newValue)
bandVoxel.WriteRaster(int(math.floor((X-Xmin)/0.25)), int(math.floor((Ymax-Y)/0.25)), 1, 1, out_str)
'''I check if this point has the highest Z and I store this information'''
cdef MaximumHeightColumn(double X, double Y, double newZ):
cdef object bandMetricMaximumHeightColumn, structvalMaximumHeightColumn, out_strMaximumHeightColumn
cdef tuple valueMaximumHeightColumn
bandMetricMaximumHeightColumn = datasetMetrics.GetRasterBand(10)
structvalMaximumHeightColumn = bandMetricMaximumHeightColumn.ReadRaster(int(math.floor((X-Xmin)/0.25)), int(math.floor((Ymax-Y)/0.25)), 1, 1, buf_type=gdal.GDT_Float32)
valueMaximumHeightColumn = struct.unpack('f', structvalMaximumHeightColumn)
if newZ > round(valueMaximumHeightColumn[0], 1):
out_strMaximumHeightColumn = struct.pack('f', newZ)
bandMetricMaximumHeightColumn.WriteRaster(int(math.floor((X-Xmin)/0.25)), int(math.floor((Ymax-Y)/0.25)), 1, 1, out_strMaximumHeightColumn)
WavePointManagement.pyx
'''this class serializes, rounds and sorts the points of each ndarray'''
import cPickle as pickle
import numpy as np
cimport numpy as np
import math
cdef class WavePointManagement(object):
'''
This class manages all the points extracted from the waveform
'''
cdef object fileObject, wavePointList
__slots__ = ('wavePointList', 'fileObject')
def __cinit__(self):
'''
Constructor
'''
self.fileObject = None
self.wavePointList = np.array([])
cdef object GetWavePointList(self):
return self.wavePointList
cdef void OpenWavePointFileLoad (self, object fileName):
self.fileObject = file(fileName, 'rb')
cdef void LoadWavePointFile (self):
self.wavePointList = None
self.wavePointList = pickle.load(self.fileObject)
cdef void SortWavePointList (self):
self.wavePointList = sorted(self.wavePointList, key=lambda k: (k.x, k.y, k.z))
cdef void RoundCoordinates (self):
cdef int indexPointObject, sizeWavePointList
for pointObject in self.GetWavePointList():
pointObject.SetX(round(math.floor(pointObject.GetX()/0.25)*0.25, 2))
pointObject.SetY(round(math.ceil(pointObject.GetY()/0.25)*0.25, 2))
pointObject.SetZ(round(math.floor(pointObject.GetZ()/0.3)*0.3, 1))
cdef void CloseWavePointFile(self):
self.fileObject.close()
setup.py
from distutils.core import setup
from distutils.extension import Extension
from Cython.Distutils import build_ext
import numpy
ext = Extension("main", ["main.pyx"], include_dirs = [numpy.get_include()])
setup (ext_modules=[ext],
cmdclass = {'build_ext' : build_ext}
)
test_cython.py
'''this is the file I run with eclipse after compiling'''
from main import main
main()
How could I speed up this code?
Your code jumps back and forth between using numpy arrays and lists. As such there is virtually no difference between the code that cython will produce.
The following code produces a python list, and the key function is a pure python function as well.
self.wavePointList = sorted(self.wavePointList, key=lambda k: (k.x, k.y, k.z))
You will want to use ndarray.sort (or numpy.sort if you don't want to sort inplace). To do this you will also need to change how your objects are stored in the array. That is, you will need to use a structured array. See numpy.sort for examples on how to sort structured arrays -- particularly the last two examples on the page.
Once you have your data stored in a numpy array then you need to tell cython about how the data is stored in the array. This includes providing type information and the dimensions of the array. This page provides more information how to work efficiently with numpy arrays.
An example of show to create and sort structured arrays:
import numpy as np
cimport numpy as np
DTYPE = [('name', 'S10'), ('height', np.float64), ('age', np.int32)]
cdef packed struct Person:
char name[10]
np.float64_t height
np.int32_t age
ctypedef Person DTYPE_t
def create_array():
values = [('Arthur', 1.8, 41), ('Lancelot', 1.9, 38),
('Galahad', 1.7, 38)]
return np.array(values, dtype=DTYPE)
cpdef sort_by_age_then_height(np.ndarray[DTYPE_t, ndim=1] arr):
arr.sort(order=['age', 'height'])
Finally, you will need to convert your code from using python methods to using the standard c library methods for a further speed up. Below is an example using RoundCoordinates. ``cpdef` means the function is also exposed to python by a wrapper function.
cimport cython
cimport numpy as np
from libc.math cimport floor, ceil, round
import numpy as np
DTYPE = [('x', np.float64), ('y', np.float64), ('z', np.float64)]
cdef packed struct Point3D:
np.float64_t x, y, z
ctypedef Point3D DTYPE_t
# Caution should be used when turning the bounds check off as it can lead to undefined
# behaviour if you use an invalid index.
#cython.boundscheck(False)
cpdef RoundCoordinates_cy(np.ndarray[DTYPE_t] pointlist):
cdef int i
cdef DTYPE_t point
for i in range(len(pointlist)): # this line is optimised into a c loop
point = pointlist[i] # creates a copy of the point
point.x = round(floor(point.x/0.25)*2.5) / 10
point.y = round(ceil(point.y/0.25)*2.5) / 10
point.z = round(floor(point.z/0.3)*3) / 10
pointlist[i] = point # overwrites the old point data with the new data
Finally, before rewriting your entire code base, you should profile your code to see which functions the program spends most of its time and optimise those functions before bothering about optimising other functions.

Categories

Resources