There is a thinning pip package that is currently getting compiled only with Python2.
When I install it with sudo pip install thinning and then attempt to import thinning, I get an error:
ImportError: /usr/lib/python3.5/site-packages/thinning.cpython-35m-x86_64-linux-gnu.so: undefined symbol: Py_InitModule3
I assume this is because of Py_InitModule3 is not used by Python3 anymore.
Here is complete c source file:
#define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION
#include "Python.h"
#include "arrayobject.h"
#include <stdlib.h>
#include <assert.h>
#include <stdbool.h>
#include <limits.h>
static PyObject *guo_hall_thinning(PyObject *self, PyObject *args);
int _guo_hall_thinning(unsigned char* binary_image, int width, int height);
void initthinning(void);
/* ==== Set up the methods table ====================== */
static PyMethodDef thinningMethods[] = {
{"guo_hall_thinning",guo_hall_thinning, METH_VARARGS,
"Takes a 2D numpy UBYTE array in C-order and thins it in place using the algorithm by Guo and Hall."
"Images that come out of cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) have the right format."
"\n\n"
"We assume that the dimensions of the image fit into an int on your platform. If your computer for some"
"reason has a 2 byte int and lots of memory so that the image can become too large, bad things can happen."
"\n\n"
"interface:\n"
"\tguo_hall_thinning(segmented_image)"
"\tsegmented_image is a NumPy matrix,"
"\treturns the same NumPy matrix (thinned)"},
{NULL, NULL, 0, NULL} /* Sentinel - marks the end of this structure */
};
/* ==== Initialize the C_test functions ====================== */
void initthinning() {
PyObject* module = Py_InitModule3("thinning",thinningMethods, "Thinning of segmented images. See https://bitbucket.org/adrian_n/thinning.");
PyModule_AddStringConstant(module, "__author__", "Adrian Neumann <adrian_neumann#gmx.de>");
PyModule_AddStringConstant(module, "__version__", "1.2.3");
import_array(); // Must be present for NumPy. Called first after above line.
}
/* ==== Guo Hall Thinning =========
Takes a 2D numpy UBYTE array in C-order and thins it in place using the algorithm by Guo and Hall.
Images that come out of cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) have the right format.
We assume that the dimensions of the image fit into an int on your platform. If your computer for some
reason has a 2 byte int and lots of memory so that the image can become too large, bad things can happen.
interface: guo_hall_thinning(segmented_image)
segmented_image is a NumPy matrix,
returns the same NumPy matrix (thinned)
*/
static PyObject *guo_hall_thinning(PyObject *self, PyObject *args)
{
PyArrayObject *segmented_image;
/* Parse tuples separately since args will differ between C fcns */
if (!PyArg_ParseTuple(args, "O!", &PyArray_Type, &segmented_image)) {
return NULL;
}
if (NULL == segmented_image) {
PyErr_SetString(PyExc_TypeError, "Parameter is not a valid image");
return NULL;
}
if (PyArray_TYPE(segmented_image) != NPY_UBYTE || !PyArray_CHKFLAGS(segmented_image, NPY_ARRAY_CARRAY)) {
PyErr_SetString(PyExc_TypeError, "Parameter is not a grayscale image");
return NULL;
}
npy_intp* shape = PyArray_DIMS(segmented_image);
int height = (int)shape[0];
int width = (int)shape[1];
unsigned char *in_data = PyArray_DATA(segmented_image);
if (height>=3 && width>=3) {
int ok = _guo_hall_thinning(in_data, width, height);
if (ok<0) {
return PyErr_NoMemory();
}
}
Py_INCREF(segmented_image);
return (PyObject*)segmented_image;
}
int nonzero_clever(const unsigned char* arr, unsigned int start, unsigned int len) {
/* find the first nonzero element from arr[start] to arr[start+len-1] (inclusive)
look at a long long at a time to be faster on 64 bit cpus */
const unsigned int step=sizeof(unsigned long long)/sizeof(unsigned char);
unsigned int i=start;
//unsigned types should throw exceptions on under/overflow...
while(len>step && i<len-step) {
if (*((unsigned long long*)(arr +i))==0) {
i+=step;
} else {
int j=0;
while(arr[i+j]==0) j++;
return i+j;
}
}
while(i<len) {
if (arr[i]!=0) { return i;}
i++;
}
return len;
}
int guo_hall_iteration(const unsigned char* binary_image, unsigned char* mask, const unsigned int width, const unsigned int height, const int iteration) {
/* one iteration of the algorithm by guo and hall. see their paper for an explanation.
We only consider nonzero elemets of the image. We never reinitialize the mask, once a pixel is
black, it will never become white again anyway. */
unsigned int changed = 0;
for (unsigned int j = 1; j < height-1; j++) {
const unsigned char* line = binary_image+j*width;
unsigned int start=0;
const int len = width-1;
while(start+1<len) {
start = nonzero_clever(line, start+1, len);
if (start==len) break;
const unsigned int i = start;
assert(line[i]!=0);
assert(binary_image[i + j*width]!=0);
const bool p2 = binary_image[i-1 + width*j];
const bool p6 = binary_image[i+1 + width*j];
const bool p9 = binary_image[i-1 + width*(j-1)];
const bool p8 = binary_image[i + width*(j-1)];
const bool p7 = binary_image[i+1 + width*(j-1)];
const bool p3 = binary_image[i-1 + width*(j+1)];
const bool p4 = binary_image[i + width*(j+1)];
const bool p5 = binary_image[i+1 + width*(j+1)];
const unsigned int C = ((!p2 && (p3 || p4)) +
(!p4 && (p5 || p6)) +
(!p6 && (p7 || p8)) +
(!p8 && (p9 || p2)));
// printf("%d %d %d %d %d %d %d %d\n",p2,p3,p4,p5,p6,p7,p8,p9);
if (C==1) {
const unsigned int N1 = (p9 || p2) + (p3 || p4) + (p5 || p6) + (p7 || p8);
const unsigned int N2 = (p2 || p3) + (p4 || p5) + (p6 || p7) + (p8 || p9);
const unsigned int N = N1 < N2 ? N1 : N2;
unsigned int m;
if (iteration == 0)
{m = (p8 && (p6 || p7 || !p9));}
else
{m = (p4 && (p2 || p3 || !p5));}
if (2 <= N && N <= 3 && m == 0) {
mask[i + width*j] = 0;
changed += 1;
}
}
}
}
return changed;
}
void andImage(unsigned char* image, const unsigned char* mask, const int size) {
/* calculate image &=mask.
to be faster on 64 bit cpus, we do this one long long at a time */
const int step = sizeof(unsigned long long)/sizeof(unsigned char);
unsigned long long* image_l = (unsigned long long*)image;
const unsigned long long* mask_l = (unsigned long long*) mask;
unsigned int i=0;
for(; size/step>2 && i<size/step-2; i+=2) {
image_l[i] = image_l[i] & mask_l[i];
image_l[i+1] = image_l[i+1] & mask_l[i+1];
}
for(i=i*step; i<size; ++i) {
image[i] = image[i] & mask[i];
}
}
int _guo_hall_thinning(unsigned char* binary_image, int width, int height) {
/* return -1 if we can't allocate the memory for the mask, else 0 */
int changed;
unsigned char* mask = (unsigned char*) malloc(width*height*sizeof(unsigned char));
if (mask==NULL) {
return -1;
}
memset(mask, UCHAR_MAX, width*height);
do {
changed = guo_hall_iteration(binary_image, mask, width, height, 0);
andImage(binary_image, mask, width*height);
changed += guo_hall_iteration(binary_image, mask, width, height, 1);
andImage(binary_image, mask, width*height);
} while (changed != 0);
free(mask);
return 0;
}
I've started reading Porting Extension Modules to Python 3 but I must admit there is little I can understand.
I tried to change Py_InitModule to Python 3 analogue PyModule_Create with some other code adjustments but it didn't work. Unfortunately this thinning module is a hard dependency for our application. So, I am pretty stuck right now without time and knowledge how to port this module to Python3.
What has changed:
Note: I can't really get into the details of what the function guo_hall_thinning does per se. What I know is that it uses a small subset of the numpy C-API for getting and returning the data as an ndarray; I couldn't find any documentation on them being altered so it should be good to go.
Now, what has definitely changed is the way modules are initialized; with this I can help you and get it imported in a Python 3 distribution. I'm using 3.5 for this too, even though, I believe differences between older versions of the 3.x family shouldn't exist or are backwards compatible.
As you noted, general information is provided in the Porting to Python 3 document with specifics about the initialization phase in Module Initialization and state. The new change is described in PEP 3121 which, by itself, is a nice but challenging read.
Now, the gist of it can be listed in two points:
A) Modules are now defined in a dedicated PyModuleDef struct:
struct PyModuleDef{
PyModuleDef_Base m_base; /* To be filled out by the interpreter */
Py_ssize_t m_size; /* Size of per-module data */
PyMethodDef *m_methods;
inquiry m_reload;
traverseproc m_traverse;
inquiry m_clear;
freefunc m_free;
};
This new struct contains some additional members holding the name and documentation for the module. The members m_reload, m_traverse, m_clear and m_free provide additional control during initialization/finalization but, we can opt to leave them as NULL. These along with a module m_size set to -1 are for simplicity, setting these values is generally done to support multiple interpreters/ mutliple initializations and should be more tricky.
So, in short, the fancy new module struct for the thinning module could look like this:
static struct PyModuleDef moduledef = {
PyModuleDef_HEAD_INIT,
"thinning",
"Thinning of segmented images. See https://bitbucket.org/adrian_n/thinning",
-1,
thinningMethods,
NULL,
NULL,
NULL,
NULL
};
aaand that's it for the first issue!
B) New initialization function i.e you'll need to give initthinning a major face-lift.
The new module initialization function returns a PyObject * and is now named PyInit_<module_name>. In it (heh, get it?) new modules are created with PyModule_Create(&moduledef) which takes the struct we defined and returns the initialized module. It's prettier now and looks like this:
/* ==== Initialize the C_test functions ====================== */
PyObject *
PyInit_thinning(void){
// create module
PyObject *module = PyModule_Create(&moduledef);
// handle probable error
if (module == NULL)
return NULL;
PyModule_AddStringConstant(module, "__author__", "Adrian Neumann <adrian_neumann#gmx.de>");
PyModule_AddStringConstant(module, "__version__", "1.2.3");
import_array(); // Must be present for NumPy. Called first after above line.
// return newly created module
return module;
}
Installing the module:
All this is for the initialization of the module. You can download the module (as you have done, I believe) find the thinning_folder/src/c_thinning.c file and replace everything prior to:
/* ==== Guo Hall Thinning =========
with the following:
#define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION
#include "Python.h"
#include "arrayobject.h"
#include <stdlib.h>
#include <assert.h>
#include <stdbool.h>
#include <limits.h>
static PyObject *guo_hall_thinning(PyObject *self, PyObject *args);
int _guo_hall_thinning(unsigned char* binary_image, int width, int height);
/* ==== Set up the methods table ====================== */
static PyMethodDef thinningMethods[] = {
{"guo_hall_thinning",guo_hall_thinning, METH_VARARGS,
"Takes a 2D numpy UBYTE array in C-order and thins it in place using the algorithm by Guo and Hall."
"Images that come out of cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) have the right format."
"\n\n"
"We assume that the dimensions of the image fit into an int on your platform. If your computer for some"
"reason has a 2 byte int and lots of memory so that the image can become too large, bad things can happen."
"\n\n"
"interface:\n"
"\tguo_hall_thinning(segmented_image)"
"\tsegmented_image is a NumPy matrix,"
"\treturns the same NumPy matrix (thinned)"},
{NULL, NULL, 0, NULL} /* Sentinel - marks the end of this structure */
};
static struct PyModuleDef moduledef = {
PyModuleDef_HEAD_INIT,
"thinning",
"Thinning of segmented images. See https://bitbucket.org/adrian_n/thinning.",
-1,
thinningMethods,
NULL,
NULL,
NULL,
NULL
};
/* ==== Initialize the C_test functions ====================== */
PyObject *
PyInit_thinning(void){
PyObject *module = PyModule_Create(&moduledef);
if (module == NULL)
return NULL;
PyModule_AddStringConstant(module, "__author__", "Adrian Neumann <adrian_neumann#gmx.de>");
PyModule_AddStringConstant(module, "__version__", "1.2.3");
import_array(); // Must be present for NumPy. Called first after above line.
return module;
}
/* ==== Guo Hall Thinning =========
// Leave the rest as it was
After that, navigate to the top level directory containing setup.py and run:
python setup.py install
as usual. Some compilation warnings will probably pop-up but those are safe to ignore. If all goes well you'll get a successful install and the following will not result in a nasty seg-fault:
>>> from thinning import guo_hall_thinning
>>> print(guo_hall_thinning.__doc__)
Takes a 2D numpy UBYTE array in C-order and thins it in place using the algorithm by Guo and Hall.Images that come out of cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) have the right format.
We assume that the dimensions of the image fit into an int on your platform. If your computer for somereason has a 2 byte int and lots of memory so that the image can become too large, bad things can happen.
interface:
guo_hall_thinning(segmented_image) segmented_image is a NumPy matrix, returns the same NumPy matrix (thinned)
It seems to run :) :
I further edited the source in c_thinning.c to print out the number of elements changed during every iteration. It seems to be changing things but I don't understand what underlying criteria it uses because I haven't read the corresponding paper.
In short, guo_hall_thinning(ndarr) apparently does the 'thinning' in place. This means that after it is executed, the original array that was supplied as a parameter is going to be altered. So, a check of the form:
gray_img == guo_hall_thinning(gray_img)
is always going to be True (Hint: check for equality between numpy arrays with (arr1 == arr2).all()).
Here's a test I ran in which you can visually see the altering taking place, I believe this test can be reproduced on your machine too:
# dtype = 'B' is UBYTE
>>> n = numpy.ndarray(shape=(100, 200), dtype='B')
>>> n
array([[ 40, 159, 95, ..., 114, 114, 97],
[121, 95, 108, ..., 114, 101, 32],
[ 48, 161, 90, ..., 127, 0, 0],
...,
[110, 32, 97, ..., 124, 1, 0],
[124, 5, 0, ..., 0, 0, 131],
[ 1, 0, 25, ..., 0, 125, 17]], dtype=uint8)
>>> thinning.guo_hall_thinning(n)
-- Array height 100 Array width: 200
Value of `changed` during 0 iteration is: 1695
Value of `changed` during 1 iteration is: 1216
Value of `changed` during 2 iteration is: 808
Value of `changed` during 3 iteration is: 493
Value of `changed` during 4 iteration is: 323
Value of `changed` during 5 iteration is: 229
Value of `changed` during 6 iteration is: 151
Value of `changed` during 7 iteration is: 90
Value of `changed` during 8 iteration is: 46
Value of `changed` during 9 iteration is: 27
Value of `changed` during 10 iteration is: 11
Value of `changed` during 11 iteration is: 8
Value of `changed` during 12 iteration is: 7
Value of `changed` during 13 iteration is: 4
Value of `changed` during 14 iteration is: 0
Value of `ok` is: 0
# array returned
array([[ 40, 159, 95, ..., 114, 114, 97],
[121, 0, 0, ..., 114, 0, 32],
[ 48, 0, 0, ..., 127, 0, 0],
...,
[110, 0, 97, ..., 124, 1, 0],
[124, 5, 0, ..., 0, 0, 131],
[ 1, 0, 25, ..., 0, 125, 17]], dtype=uint8)
So I'm guessing it does work :-).
Related
I'm trying to coerce the values in my NumPy array to float. However, in my array, there might be some values which might not coerce successfully and I want to replace those values with a default value. I do want the speed of NumPy though. I do not want to do the python loop. What's the best route to achieve this behavior?
For instance:
import numpy as np
my_array = np.array(["1", "2", "3", "NA"])
new_array = magic_coerce(my_array, float, -1.0) # I want to implement this
print (new_array) # should print [1., 2., 3. -1.]
I'm trying to write my own ufunc in c, and I have the following:
int is_float(const char* c)
{
regex_t regex;
regcomp(®ex, "^[+-]?([0-9]*[.])?[0-9]+$", REG_EXTENDED);
return regexec(®ex, c, 0, NULL, 0) == 0;
}
float to_float(const char *c, float default_value)
{
float result = default_value;
if (is_float(c))
{
result = atof(c);
}
return result;
}
static PyMethodDef LogitMethods[] = {
{NULL, NULL, 0, NULL}
};
/* The loop definition must precede the PyMODINIT_FUNC. */
static void double_logitprod(char **args, npy_intp *dimensions,
npy_intp* steps, void* data)
{
npy_intp i;
npy_intp n = dimensions[0];
char *in1 = args[0], *in2 = args[1];
char *out = args[2];
npy_intp in1_step = steps[0];
npy_intp out_step = steps[2];
double tmp;
for (i = 0; i < n; i++) {
/*BEGIN main ufunc computation*/
char *tmp1 = (char *) in1;
tmp = *((double *)in2);
*((double *) out) = to_float(tmp1, tmp);
/*END main ufunc computation*/
in1 += in1_step;
out += out_step;
}
}
/*This a pointer to the above function*/
PyUFuncGenericFunction funcs[1] = {&double_logitprod};
/* These are the input and return dtypes of logit.*/
static char types[3] = {NPY_OBJECT, NPY_DOUBLE,
NPY_DOUBLE};
But it looks like it's not working correctly. What's the type for UNICODE in numpy? NPY_UNICODE gives an error, so I coerced it to NPY_OBJECT, but this does not seem to play with it.
I need to use some code from LabVIEW in Python. LabVIEW code is used as DLL. So, I'm using ctypes library in Python. In one case, I need to pass clusters from Python to LabVIEW. I'm facing issues in passing clusters.
The problem is with particular ordering of elements inside the LabVIEW cluster(I am aware that clusters strictly follow ordering). My cluster has a double value and an integer array. When the double is the first element in the cluster things work well. But when the array is set as the first element and double as the second element, things don't work at all. I'm using LabVIEW 2015(32-bit) and Python 3(32-bit).
I suspect ctypes to be the cause for the issue. Because, the same DLL is working as expected in C and in LabVIEW. I have also tried using the DLL in LabVIEW which is working fine.
I have narrowed down this issue to the following example. The DLL in this example has 2 functions. Each function uses a cluster. Func1 uses cluster1 which is array followed by a double. While Func2 uses cluster which is double followed by array.
Both these functions receive a cluster as input and just return the double value(as it is from the input cluster) as output.
Find those information in the header file.
typedef struct {
int32_t dimSize;
uint64_t elt[1];
} Uint64ArrayBase;
typedef Uint64ArrayBase **Uint64Array;
typedef struct {
double Double;
Uint64Array _1DArray;
} Cluster;
typedef struct {
Uint64Array _1DArray;
double Double;
} Cluster1;
/*!
* Func2
*/
double __cdecl Func2(Cluster *Double1DArrayCluster);
/*!
* Func1
*/
double __cdecl Func1(Cluster1 *_1DArrayDoubleCluster);
MgErr __cdecl LVDLLStatus(char *errStr, int errStrLen, void *module);
/*
* Memory Allocation/Resize/Deallocation APIs for type 'Uint64Array'
*/
Uint64Array __cdecl AllocateUint64Array (int32 elmtCount);
MgErr __cdecl ResizeUint64Array (Uint64Array *hdlPtr, int32 elmtCount);
MgErr __cdecl DeAllocateUint64Array (Uint64Array *hdlPtr);
This is my python code.
from ctypes import *
lib = cdll.LoadLibrary("DLL.dll")
dbl = 45.54
elt_count = 5
class Uint64ArrayBase(Structure):
_fields_ = [
("dimSize", c_int32),
("elt", c_uint64 * elt_count)
]
class Cluster(Structure):
_fields_ = [
("Double", c_double),
("_1DArray", POINTER(POINTER(Uint64ArrayBase)))
]
class Cluster1(Structure):
_fields_ = [
("_1DArray", POINTER(POINTER(Uint64ArrayBase))),
("Double", c_double)
]
Uint64Array = POINTER(POINTER(Uint64ArrayBase))
# Use the given API to allocate array
lib.AllocateUint64Array.restype = Uint64Array
lib.AllocateUint64Array.argtypes = [c_int32]
uintarr_h = lib.AllocateUint64Array(c_int32(elt_count))
# Populate the elts of the array
for i in range(elt_count):
uintarr_h.contents.contents.elt[i] = c_uint64(i)
# Create cluster instances
dbl_arr_cluster = Cluster(c_double(dbl), uintarr_h)
arr_dbl_cluster = Cluster1(uintarr_h, c_double(dbl))
# Print the array for Debugging
print("Array from arr_dbl_cluster:", end=" ")
for i in range(arr_dbl_cluster._1DArray.contents.contents.dimSize):
print(arr_dbl_cluster._1DArray.contents.contents.elt[i], end=", ")
print()
print("Array from dbl_arr_cluster:", end=" ")
for i in range(dbl_arr_cluster._1DArray.contents.contents.dimSize):
print(dbl_arr_cluster._1DArray.contents.contents.elt[i], end=", ")
print()
print("Double from arr_dbl_cluster:", arr_dbl_cluster.Double)
print("Double from dbl_arr_cluster:", dbl_arr_cluster.Double)
# Test the funcs
lib.Func1.restype = c_double
lib.Func1.argtypes = [POINTER(Cluster1)]
lib.Func2.restype = c_double
lib.Func2.argtypes = [POINTER(Cluster)]
print()
result = lib.Func1(byref(arr_dbl_cluster))
print("Double output from DLL function cluster{Array, DBL}:", result)
result_1 = lib.Func2(byref(dbl_arr_cluster))
print("Double output from DLL function cluster{DBL, Array}:", result_1)
Also, the C code.
#include<stdio.h>
#include "DLL.h"
int main() {
double dbl = 45.54, result, result1;
int32 eltCount = 5, i;
// Use the API to allocate array
Uint64Array uia = AllocateUint64Array(eltCount);
// Populate the elts of the array
for (i = 0; i < (*uia)->dimSize; i++) {
(*uia)->elt[i] = (int)i;
}
// Create cluster instances
Cluster cluster = { dbl, uia };
Cluster1 cluster1 = { uia, dbl };
// Print for Debugging
printf("\nArray from cluster1{Array, DBL}: ");
for (i = 0; i < (*cluster1._1DArray)->dimSize; i++) {
printf("%llu, ", (*cluster1._1DArray)->elt[i]);
}
printf("\nArray from cluster{DBL, Array}: ");
for (i = 0; i < (*cluster._1DArray)->dimSize; i++) {
printf("%llu, ", (*cluster._1DArray)->elt[i]);
}
printf("\nDouble from cluster1{Array, DBL}: %lf", cluster1.Double);
printf("\nDouble from cluster{DBL, Array}: %lf", cluster.Double);
// Test the funcs
result = Func1(&cluster1);
result1 = Func2(&cluster);
printf("\n\nDouble output from DLL function cluster1{Array, DBL}: %lf", result);
printf("\nDouble output from DLL function cluster{DBL, Array}: %lf", result1);
return 0;
}
The following is the output from Python which is not expected. 45.54 must have been returned as output by both the functions.
c:/Users/samkm/Desktop/DLL Demo/test.py
Array from arr_dbl_cluster: 0, 1, 2, 3, 4,
Array from dbl_arr_cluster: 0, 1, 2, 3, 4,
Double from arr_dbl_cluster: 45.54
Double from dbl_arr_cluster: 45.54
Double output from DLL function cluster{Array, DBL}: -2.106485661434095e-37 # int(...) -> 0
Double output from DLL function cluster{DBL, Array}: 45.54
And the C code returns the expected output.
Array from cluster1{Array, DBL}: 0, 1, 2, 3, 4,
Array from cluster{DBL, Array}: 0, 1, 2, 3, 4,
Double from cluster1{Array, DBL}: 45.540000
Double from cluster{DBL, Array}: 45.540000
Double output from DLL function cluster1{Array, DBL}: 45.540000
Double output from DLL function cluster{DBL, Array}: 45.540000
LabVIEW code for func1
LabVIEW code for func2
Has anyone faced this issue? What should be done in this case?
I have written a good bit of code in python and it works great. But now I'm scaling up the size of the problems that I'm analyzing and python is dreadfully slow. The slow part of the python code is
for i in range(0,H,1):
x1 = i - length
x2 = i + length
for j in range(0,W,1):
#print i, ',', j # check the limits
y1 = j - length
y2 = j + length
IntRed[i,j] = np.mean(RawRed[x1:x2,y1:y2])
With H and W equal to 1024 the function takes around 5 minutes to excute. I've written a simple c++ program/function that performs the same computation and it excutes in less than a second with the same data size.
double summ = 0;
double total_num = 0;
double tmp_num = 0 ;
int avesize = 2;
for( i = 0+avesize; i <X-avesize ;i++)
for(j = 0+avesize;j<Y-avesize;j++)
{
// loop through sub region of the matrix
// if the value is not zero add it to the sum
// and increment the counter.
for( int ii = -2; ii < 2; ii ++)
{
int iii = i + ii;
for( int jj = -2; jj < 2 ; jj ++ )
{
int jjj = j + jj;
tmp_num = gsl_matrix_get(m,iii,jjj);
if(tmp_num != 0 )
{
summ = summ + tmp_num;
total_num++;
}
}
}
gsl_matrix_set(Matrix_mean,i,j,summ/total_num);
summ = 0;
total_num = 0;
}
I have some other methods to perform on the 2D array. The one listed is a simple examples.
What I want to do is pass a python 2D array to my c++ function and return a 2D array back to python.
I've read a bit about swig, and have sereached pervious questions, and it seems like it's a possible solution. But I can't seem to figure out what I actually need to do.
Can I get any help? Thanks
You can use arrays as it is described here: Doc - 5.4.5 Arrays, the carray.i or std_vector.i from the SWIG library.
I find it easier to work with std::vector from the SWIG library std_vector.i to send a python list to a C++ SWIG extension. Though in your case where optimization matters, it may not be the optimal.
In your case you can define:
test.i
%module test
%{
#include "test.h"
%}
%include "std_vector.i"
namespace std {
%template(Line) vector < int >;
%template(Array) vector < vector < int> >;
}
void print_array(std::vector< std::vector < int > > myarray);
test.h
#ifndef TEST_H__
#define TEST_H__
#include <stdio.h>
#include <vector>
void print_array(std::vector< std::vector < int > > myarray);
#endif /* TEST_H__ */
test.cpp
#include "test.h"
void print_array(std::vector< std::vector < int > > myarray)
{
for (int i=0; i<2; i++)
for (int j=0; j<2; j++)
printf("[%d][%d] = [%d]\n", i, j, myarray[i][j]);
}
If you run the following python code (I used python 2.6.5), you can see that the C++ function can access the python list:
>>> import test
>>> a = test.Array()
>>> a = [[0, 1], [2, 3]]
>>> test.print_array(a)
[0][0] = [0]
[0][1] = [1]
[1][0] = [2]
[1][1] = [3]
If I write a function accepting a single unsigned integer (0 - 0xFFFFFFFF), I can use:
uint32_t myInt;
if(!PyArg_ParseTuple(args, "I", &myInt))
return NULL;
And then from python, I can pass an int or long.
But what if I get passed a list of integers?
uint32_t* myInts;
PyObject* pyMyInts;
PyArg_ParseTuple(args, "O", &pyMyInts);
if (PyList_Check(intsObj)) {
size_t n = PyList_Size(v);
myInts = calloc(n, sizeof(*myInts));
for(size_t i = 0; i < n; i++) {
PyObject* item = PyList_GetItem(pyMyInts, i);
// What function do I want here?
if(!GetAUInt(item, &myInts[i]))
return NULL;
}
}
// cleanup calloc'd array on exit, etc
Specifically, my issue is with dealing with:
Lists containing a mixture of ints and longs
detecting overflow when assigning to the the uint32
You could create a tuple and use the same method you used for a single argument. On the C side, the tuple objects are not really immutable, so it wouldn't be to much trouble.
Also PyLong_AsUnsignedLong could work for you. It accepts int and long objects and raises an error otherwise. But if sizeof(long) is bigger than 4, you might need to check for an upper-bound overflow yourself.
static int
GetAUInt(PyObject *pylong, uint32_t *myint) {
static unsigned long MAX = 0xffffffff;
unsigned long l = PyLong_AsUnsignedLong(pylong);
if (l == -1 && PyErr_Occurred() || l > MAX) {
PyErr_SetString(PyExc_OverflowError, "can't convert to uint32_t");
return false;
}
*myint = (uint32_t) l;
return true;
}
I'm using Python to call a .so compiled from C. The C code adds two vectors as follows:
#include <stdio.h>
#include <stdbool.h>
bool add_vectors(const double * const a, const double * const b, double * const c)
{
if(sizeof(a) != sizeof(b) || sizeof(b) != sizeof(c))
{
fprintf(stderr, "Vectors of different lengths cannot be added");
fprintf(stderr, "Number of elements of a: %d", sizeof(a)/sizeof(double));
fprintf(stderr, "Number of elements of b: %d", sizeof(b)/sizeof(double));
fprintf(stderr, "Number of elements of c: %d", sizeof(c)/sizeof(double));
return false;
}
/* Added for diagnostics only; should print 5, does print 1 */
printf("Size of a: %d\n", sizeof(a)/sizeof(a[0]));
printf("Size of b: %d\n", sizeof(b)/sizeof(b[0]));
printf("Size of c: %d\n", sizeof(c)/sizeof(c[0]));
for(int ii = 0; ii < sizeof(a)/sizeof(double); ++ii)
{
c[ii] = a[ii] + b[ii];
}
return true;
}
This is compiled in the standard way via
gcc -std=c11 -o add_vectors.so -shared -fPIC add_vectors.c
Now I attempt to call this from the following python code:
#!/usr/bin/env python
import ctypes
import numpy
add_vectors_lib = ctypes.cdll.LoadLibrary("add_vectors.so")
add_vectors = add_vectors_lib.add_vectors
add_vectors.retype = ctypes.c_bool
array_1d_double = numpy.ctypeslib.ndpointer(dtype = numpy.double, ndim=1, flags="C_CONTIGUOUS")
add_vectors.argtypes = [array_1d_double, array_1d_double, array_1d_double]
#Random vectors to add:
a = numpy.double([1,2,3,4,5])
b = numpy.double([3,4,5,6,7])
#Zero out the return value:
c = numpy.double([0,0,0,0,0])
add_vectors(a, b,c)
print(a)
print(b)
print(c)
But the output is:
Size of a: 1
Size of b: 1
Size of c: 1
[ 1. 2. 3. 4. 5.]
[ 3. 4. 5. 6. 7.]
[ 4. 0. 0. 0. 0.]
How do I make the C code recognize the proper size of these arrays and/or make the Python pass "knowledge" of the array size to the C code?
sizeof() is a compile time operator. In the case of a pointer returns the actual size of the pointer itself. Usually, this is 4 bytes in case of 32-bit architecture and 8 in 64-bit respectively.
In the case that you passed the actual variable of a statically allocated array, it would return the total size of the array in bytes.
The quite newbee problem of sizeof() has already pointed out in comment.
Well, in order to answer your question How do I make the C code recognize the proper size of these arrays and/or make the Python pass "knowledge" of the array size to the C code. I tried to learn how to write one module with C in python by following this tutorial (I'm interested in learning python).
Notice: it's a quite long answer, omit the code part as your wish.
Your way of writing module is complex and bug prone. You need a wrapper of add_vectors which takes PyObject *args as argument, so you can check the type of your parameters(with PyArg_ParseTuple) and number of elements(with PyArray_DIM) in the array correctly.
This is part of my code:
add_vectors.c
#include <stdio.h>
#include <stdbool.h>
void add_vectors(const double * const a, const double * const b,
double * const c, int len)
{
int ii;
for(ii = 0; ii < len; ++ii)
{
c[ii] = a[ii] + b[ii];
}
}
_add_vectors.c
#include <Python.h>
#include <numpy/arrayobject.h>
void add_vectors(const double * const a, const double * const b,
double * const c, int len);
static PyObject *add_vectors_wrapper(PyObject *self, PyObject *args);
static PyMethodDef module_methods[] = {
{"add_vectors", add_vectors_wrapper, METH_VARARGS, NULL},
{NULL, NULL, 0, NULL}
};
PyMODINIT_FUNC init_add_vectors(void)
{
PyObject *m = Py_InitModule3("_add_vectors", module_methods,
NULL);
if (m == NULL)
return;
import_array();
}
static PyObject *add_vectors_wrapper(PyObject *self, PyObject *args)
{
PyObject *x_obj, *y_obj, *z_obj;
if (!PyArg_ParseTuple(args, "OOO", &x_obj, &y_obj,
&z_obj))
return NULL;
/* Interpret the input objects as numpy arrays. */
PyObject *x_array = PyArray_FROM_OTF(x_obj, NPY_DOUBLE, NPY_IN_ARRAY);
PyObject *y_array = PyArray_FROM_OTF(y_obj, NPY_DOUBLE, NPY_IN_ARRAY);
PyObject *z_array = PyArray_FROM_OTF(z_obj, NPY_DOUBLE, NPY_IN_ARRAY);
/* If that didn't work, throw an exception. */
if (x_array == NULL || y_array == NULL || z_array == NULL) {
Py_XDECREF(x_array);
Py_XDECREF(y_array);
Py_XDECREF(z_array);
return NULL;
}
/* How many data points are there? */
int xN = (int)PyArray_DIM(x_array, 0);
int yN = (int)PyArray_DIM(y_array, 0);
int zN = (int)PyArray_DIM(z_array, 0);
/* size check */
if (xN != yN || yN != zN) {
fprintf(stderr, "Vectors of different lengths cannot be added\n");
fprintf(stderr, "Number of elements of a: %d\n", xN);
fprintf(stderr, "Number of elements of b: %d\n", yN);
fprintf(stderr, "Number of elements of c: %d\n", zN);
PyObject *ret = Py_BuildValue("s", "Failed");
return ret;
}
double *x = (double*)PyArray_DATA(x_array);
double *y = (double*)PyArray_DATA(y_array);
double *z = (double*)PyArray_DATA(z_array);
add_vectors(x, y, z, xN);
/* Clean up. */
Py_DECREF(x_array);
Py_DECREF(y_array);
Py_DECREF(z_array);
/* Build the output tuple */
PyObject *ret = Py_BuildValue("s", "Success");
return ret;
}
setup.py (Run with./setup.py build_ext --inplace)
#!/usr/bin/env python
from distutils.core import setup, Extension
import numpy.distutils.misc_util
setup(
ext_modules=[Extension("_add_vectors",
["_add_vectors.c", "add_vectors.c"])],
include_dirs=numpy.distutils.misc_util.get_numpy_include_dirs(),
)
addnum.py ( a simple testcase)
#!/usr/bin/env python
import ctypes
import numpy
from _add_vectors import add_vectors
#Random vectors to add:
a = numpy.double([1,2,3,4])
b = numpy.double([3,4,5,6,7])
#Zero out the return value:
c = numpy.double([0,0,0,0,0])
add_vectors(a, b, c)
print(a)
print(b)
print(c)
result
ubuntu-user:python-module$ ./addnum.py
[ 1. 2. 3. 4. 5.]
[ 3. 4. 5. 6. 7.]
[ 4. 6. 8. 10. 12.]