C function called via Python determines wrong array size - python

I'm using Python to call a .so compiled from C. The C code adds two vectors as follows:
#include <stdio.h>
#include <stdbool.h>
bool add_vectors(const double * const a, const double * const b, double * const c)
{
if(sizeof(a) != sizeof(b) || sizeof(b) != sizeof(c))
{
fprintf(stderr, "Vectors of different lengths cannot be added");
fprintf(stderr, "Number of elements of a: %d", sizeof(a)/sizeof(double));
fprintf(stderr, "Number of elements of b: %d", sizeof(b)/sizeof(double));
fprintf(stderr, "Number of elements of c: %d", sizeof(c)/sizeof(double));
return false;
}
/* Added for diagnostics only; should print 5, does print 1 */
printf("Size of a: %d\n", sizeof(a)/sizeof(a[0]));
printf("Size of b: %d\n", sizeof(b)/sizeof(b[0]));
printf("Size of c: %d\n", sizeof(c)/sizeof(c[0]));
for(int ii = 0; ii < sizeof(a)/sizeof(double); ++ii)
{
c[ii] = a[ii] + b[ii];
}
return true;
}
This is compiled in the standard way via
gcc -std=c11 -o add_vectors.so -shared -fPIC add_vectors.c
Now I attempt to call this from the following python code:
#!/usr/bin/env python
import ctypes
import numpy
add_vectors_lib = ctypes.cdll.LoadLibrary("add_vectors.so")
add_vectors = add_vectors_lib.add_vectors
add_vectors.retype = ctypes.c_bool
array_1d_double = numpy.ctypeslib.ndpointer(dtype = numpy.double, ndim=1, flags="C_CONTIGUOUS")
add_vectors.argtypes = [array_1d_double, array_1d_double, array_1d_double]
#Random vectors to add:
a = numpy.double([1,2,3,4,5])
b = numpy.double([3,4,5,6,7])
#Zero out the return value:
c = numpy.double([0,0,0,0,0])
add_vectors(a, b,c)
print(a)
print(b)
print(c)
But the output is:
Size of a: 1
Size of b: 1
Size of c: 1
[ 1. 2. 3. 4. 5.]
[ 3. 4. 5. 6. 7.]
[ 4. 0. 0. 0. 0.]
How do I make the C code recognize the proper size of these arrays and/or make the Python pass "knowledge" of the array size to the C code?

sizeof() is a compile time operator. In the case of a pointer returns the actual size of the pointer itself. Usually, this is 4 bytes in case of 32-bit architecture and 8 in 64-bit respectively.
In the case that you passed the actual variable of a statically allocated array, it would return the total size of the array in bytes.

The quite newbee problem of sizeof() has already pointed out in comment.
Well, in order to answer your question How do I make the C code recognize the proper size of these arrays and/or make the Python pass "knowledge" of the array size to the C code. I tried to learn how to write one module with C in python by following this tutorial (I'm interested in learning python).
Notice: it's a quite long answer, omit the code part as your wish.
Your way of writing module is complex and bug prone. You need a wrapper of add_vectors which takes PyObject *args as argument, so you can check the type of your parameters(with PyArg_ParseTuple) and number of elements(with PyArray_DIM) in the array correctly.
This is part of my code:
add_vectors.c
#include <stdio.h>
#include <stdbool.h>
void add_vectors(const double * const a, const double * const b,
double * const c, int len)
{
int ii;
for(ii = 0; ii < len; ++ii)
{
c[ii] = a[ii] + b[ii];
}
}
_add_vectors.c
#include <Python.h>
#include <numpy/arrayobject.h>
void add_vectors(const double * const a, const double * const b,
double * const c, int len);
static PyObject *add_vectors_wrapper(PyObject *self, PyObject *args);
static PyMethodDef module_methods[] = {
{"add_vectors", add_vectors_wrapper, METH_VARARGS, NULL},
{NULL, NULL, 0, NULL}
};
PyMODINIT_FUNC init_add_vectors(void)
{
PyObject *m = Py_InitModule3("_add_vectors", module_methods,
NULL);
if (m == NULL)
return;
import_array();
}
static PyObject *add_vectors_wrapper(PyObject *self, PyObject *args)
{
PyObject *x_obj, *y_obj, *z_obj;
if (!PyArg_ParseTuple(args, "OOO", &x_obj, &y_obj,
&z_obj))
return NULL;
/* Interpret the input objects as numpy arrays. */
PyObject *x_array = PyArray_FROM_OTF(x_obj, NPY_DOUBLE, NPY_IN_ARRAY);
PyObject *y_array = PyArray_FROM_OTF(y_obj, NPY_DOUBLE, NPY_IN_ARRAY);
PyObject *z_array = PyArray_FROM_OTF(z_obj, NPY_DOUBLE, NPY_IN_ARRAY);
/* If that didn't work, throw an exception. */
if (x_array == NULL || y_array == NULL || z_array == NULL) {
Py_XDECREF(x_array);
Py_XDECREF(y_array);
Py_XDECREF(z_array);
return NULL;
}
/* How many data points are there? */
int xN = (int)PyArray_DIM(x_array, 0);
int yN = (int)PyArray_DIM(y_array, 0);
int zN = (int)PyArray_DIM(z_array, 0);
/* size check */
if (xN != yN || yN != zN) {
fprintf(stderr, "Vectors of different lengths cannot be added\n");
fprintf(stderr, "Number of elements of a: %d\n", xN);
fprintf(stderr, "Number of elements of b: %d\n", yN);
fprintf(stderr, "Number of elements of c: %d\n", zN);
PyObject *ret = Py_BuildValue("s", "Failed");
return ret;
}
double *x = (double*)PyArray_DATA(x_array);
double *y = (double*)PyArray_DATA(y_array);
double *z = (double*)PyArray_DATA(z_array);
add_vectors(x, y, z, xN);
/* Clean up. */
Py_DECREF(x_array);
Py_DECREF(y_array);
Py_DECREF(z_array);
/* Build the output tuple */
PyObject *ret = Py_BuildValue("s", "Success");
return ret;
}
setup.py (Run with./setup.py build_ext --inplace)
#!/usr/bin/env python
from distutils.core import setup, Extension
import numpy.distutils.misc_util
setup(
ext_modules=[Extension("_add_vectors",
["_add_vectors.c", "add_vectors.c"])],
include_dirs=numpy.distutils.misc_util.get_numpy_include_dirs(),
)
addnum.py ( a simple testcase)
#!/usr/bin/env python
import ctypes
import numpy
from _add_vectors import add_vectors
#Random vectors to add:
a = numpy.double([1,2,3,4])
b = numpy.double([3,4,5,6,7])
#Zero out the return value:
c = numpy.double([0,0,0,0,0])
add_vectors(a, b, c)
print(a)
print(b)
print(c)
result
ubuntu-user:python-module$ ./addnum.py
[ 1. 2. 3. 4. 5.]
[ 3. 4. 5. 6. 7.]
[ 4. 6. 8. 10. 12.]

Related

Replacing error with default value while coercing in numpy array

I'm trying to coerce the values in my NumPy array to float. However, in my array, there might be some values which might not coerce successfully and I want to replace those values with a default value. I do want the speed of NumPy though. I do not want to do the python loop. What's the best route to achieve this behavior?
For instance:
import numpy as np
my_array = np.array(["1", "2", "3", "NA"])
new_array = magic_coerce(my_array, float, -1.0) # I want to implement this
print (new_array) # should print [1., 2., 3. -1.]
I'm trying to write my own ufunc in c, and I have the following:
int is_float(const char* c)
{
regex_t regex;
regcomp(&regex, "^[+-]?([0-9]*[.])?[0-9]+$", REG_EXTENDED);
return regexec(&regex, c, 0, NULL, 0) == 0;
}
float to_float(const char *c, float default_value)
{
float result = default_value;
if (is_float(c))
{
result = atof(c);
}
return result;
}
static PyMethodDef LogitMethods[] = {
{NULL, NULL, 0, NULL}
};
/* The loop definition must precede the PyMODINIT_FUNC. */
static void double_logitprod(char **args, npy_intp *dimensions,
npy_intp* steps, void* data)
{
npy_intp i;
npy_intp n = dimensions[0];
char *in1 = args[0], *in2 = args[1];
char *out = args[2];
npy_intp in1_step = steps[0];
npy_intp out_step = steps[2];
double tmp;
for (i = 0; i < n; i++) {
/*BEGIN main ufunc computation*/
char *tmp1 = (char *) in1;
tmp = *((double *)in2);
*((double *) out) = to_float(tmp1, tmp);
/*END main ufunc computation*/
in1 += in1_step;
out += out_step;
}
}
/*This a pointer to the above function*/
PyUFuncGenericFunction funcs[1] = {&double_logitprod};
/* These are the input and return dtypes of logit.*/
static char types[3] = {NPY_OBJECT, NPY_DOUBLE,
NPY_DOUBLE};
But it looks like it's not working correctly. What's the type for UNICODE in numpy? NPY_UNICODE gives an error, so I coerced it to NPY_OBJECT, but this does not seem to play with it.

Python C binding - get array from python to C++

As the title says: I wold like to make a python binding in C++ that does some algebraic operations on some array. For this, I have to parse the python "array object" into C++ as a vector of double or integer or whatever the case may be.
I tried to do this but I face some issues. I've created a new python type and a class with the name Typer where I have this method that tries to get the elements of a python array, then compute the sum (as a first step).
tatic PyObject *Typer_vectorsum(Typer *self, PyObject *args)
{
PyObject *retval;
PyObject *list;
if (!PyArg_ParseTuple(args, "O", &list))
return NULL;
double *arr;
arr = (double *)malloc(sizeof(double) * PyTuple_Size(list));
int length;
length = PyTuple_Size(list);
PyObject *item = NULL;
for (int i = 0; i < length; ++i)
{
item = PyTuple_GetItem(list, i);
if (!PyFloat_Check(item))
{
exit(1);
}
arr[i] = PyFloat_AsDouble(item);
}
double result = 0.0;
for (int i = 0; i < length; ++i)
{
result += arr[i];
}
retval = PyFloat_FromDouble(result);
free(arr);
return retval;
}
In this method I parse the python array object into a C array (allocating the memory of the array with malloc). Then I add every element from the object to my C array and just compute the sum in the last for-loop.
If I build the project and then create a python test file, nothing happens (the file compiles without any issues but it is not printing anything).
y = example.Typer() . #typer is the init
tuple = (1, 2, 3)
print(y.vectorsum(tuple))
Am I missing something? And also, Is there a nice and easy way of getting a python array object into C++, but as a std::vector instead of a classic C array?
Thank you in advance!
The tuple contains ints, not floats, so your PyFloat_Check fails. And no, there is no direct way from Python tuple to C array or C++ std::vector. The reason being that the tuple is an array of Python objects, not an array of C values such as doubles.
Here's your example with improved error checking, after which it should work:
PyObject *retval;
PyObject *list;
if (!PyArg_ParseTuple(args, "O!", &PyTuple_Type, &list))
return NULL;
double *arr =
arr = (double *)malloc(sizeof(double) * PyTuple_GET_SIZE(list));
int length;
length = PyTuple_GET_SIZE(list);
PyObject *item = NULL;
for (int i = 0; i < length; ++i)
{
item = PyTuple_GET_ITEM(list, i);
arr[i] = PyFloat_AsDouble(item);
if (arr[i] == -1. && PyErr_Occurred())
{
exit(1);
}
}
double result = 0.0;
for (int i = 0; i < length; ++i)
{
result += arr[i];
}
retval = PyFloat_FromDouble(result);
free(arr);
return retval;

How to write a fast code in C++ compared to numpy.logspace() function?

This is the code in Python that generates log-spaces values at a very quick time:
import numpy
print(numpy.logspace(0,1,num=10000000))
My try to simulate its output in C++, is the following:
#include <iostream>
#include <cmath>
#include <vector>
std::vector<double> logspace (const double &a, const double &b, const int &k)
{
std::vector<double> logspace;
for (int i = 0; i < k; i++)
{
logspace.push_back(pow(10, i * (b - a) / (k - 1)));
}
return logspace;
}
void logspace_print (std::vector<double> logspace)
{
for (auto ls : logspace)
{
std::cout << ls << "\n";
}
std::cout << "\n";
}
int main ()
{
std::vector<double> my_ls = logspace(0, 1, 10000000);
logspace_print(my_ls);
}
Waiver of floating-points arithmetic, using the function pow(., .) and a for-loop (and maybe lots of other reasons), makes my code as a naive one such as its run-time is hugely faint with respect to the Python's one. I saw recommendations at Is there something like numpy.logspace in C++? also. But, there is not mentionable significant difference. So, how can I modify my code or write a new one comparable with python's version?
Interesting question! My answer has the different versions of the functions at the top. Below is only the benchmarking code. Use google-benchmark as the library.
My intermediate result can also be found here: 1 Quick-Bench.com is generally a great site.
You don't say if you want to measure printing to stdout as part of your use-case or not. Printing is generally expensive. You avoid std::endl's flush, which is good! Furthermore, printf might be faster than std::cout. Also take a look at fmtlib 2. It is fast and easy to use.
Generally, the approach that Numpy uses is fastest. (Named logspace_v3 in my version.) It consists of first running linspace and then taking to the power of 10 in-place.
Still, I strongly feel that I am missing quite a bit here. With the appropriate flags (-march=native -mtune=native, and fast-math) vectorization should kick in. But I don't believe it does. Here is some Godbolt with vectorization (Line 590) 3.
What was fasted was getting rid of the pow call. Note that this accumulates floating point error and leads to inaccurate results.
Minor: There is no benefit of passing doubles or ints by const-reference.
#include <algorithm>
#include <benchmark/benchmark.h>
#include <cmath>
#include <iostream>
#include <numeric>
#include <vector>
#include <gtest/gtest.h>
std::vector<double> logspace(double a, double b, int k) {
std::vector<double> logspace;
for (int i = 0; i < k; i++) {
logspace.push_back(pow(10, i * (b - a) / (k - 1)));
}
return logspace;
}
// Pre-allocate the correct size using .reserve()
std::vector<double> logspace_v1(double a, double b, int k) {
std::vector<double> logspace;
logspace.reserve(k);
for (int i = 0; i < k; i++) {
logspace.push_back(pow(10, i * (b - a) / (k - 1)));
}
return logspace;
}
/// Manually extract the constant factor.
std::vector<double> logspace_v2(double a, double b, int k) {
std::vector<double> logspace;
logspace.reserve(k);
const auto exp_scale = (b - a) / (k - 1);
for (int i = 0; i < k; i++) {
logspace.push_back(pow(10, i * exp_scale));
}
return logspace;
}
/// Copy the impl behavior of numpy.linspace: First linspace then power.
std::vector<double> logspace_v3(double a, double b, int k) {
/*
y = linspace(start, stop, num=num, endpoint=endpoint, axis=axis)
if dtype is None:
return _nx.power(base, y)
return _nx.power(base, y).astype(dtype, copy=False)
*/
const auto exp_scale = (b - a) / (k - 1);
std::vector<double> logspace;
logspace.reserve(k);
for (int i = 0; i < k; i++) {
logspace.push_back(i * exp_scale);
}
std::for_each(logspace.begin(), logspace.end(),
[](double &x) { x = pow(10, x); });
return logspace;
}
/// Improve on v3 by applying pow directly
std::vector<double> logspace_v4(double a, double b, int k) {
const auto exp_scale = (b - a) / (k - 1);
std::vector<double> logspace(k, 0.);
std::generate(logspace.begin(), logspace.end(),
[n = -1, exp_scale]() mutable {
n++;
return pow(10, n * exp_scale);
});
return logspace;
}
/// Use generate_n : First linspace then power.
std::vector<double> logspace_v5(double a, double b, int k) {
const auto exp_scale = (b - a) / (k - 1);
std::vector<double> logspace(k, 0.);
std::iota(logspace.begin(), logspace.end(), 0);
std::for_each(logspace.begin(), logspace.end(),
[exp_scale](double &x) { x *= exp_scale; });
std::for_each(logspace.begin(), logspace.end(),
[](double &x) { x = pow(10, x); });
return logspace;
}
std::vector<double> logspace_v6(double a, double b, int k) {
const auto exp_scale = (b - a) / (k - 1);
const auto factor = pow(10, exp_scale);
std::vector<double> logspace;
logspace.reserve(k);
// val = pow(b, i * exp_scale);
// = pow(pow(b, exp_scale), i);
// = pow(f, i); with f := pow(b, exp_scale);
// next = cur * f;
// first = pow(b, a);
double val = pow(10, a);
for (int i = 0; i < k; i++) {
logspace.push_back(val);
val *= factor;
}
return logspace;
}
template <std::vector<double> (*F)(double, double, int)>
static void LogspaceBench(benchmark::State &state) {
for (auto _ : state) {
benchmark::DoNotOptimize(F(0, 1, state.range(0)));
}
}
BENCHMARK_TEMPLATE(LogspaceBench, logspace)->Arg(1000);
BENCHMARK_TEMPLATE(LogspaceBench, logspace_v1)->Arg(1000);
BENCHMARK_TEMPLATE(LogspaceBench, logspace_v2)->Arg(1000);
BENCHMARK_TEMPLATE(LogspaceBench, logspace_v3)->Arg(1000)->Arg(10000000);
BENCHMARK_TEMPLATE(LogspaceBench, logspace_v4)->Arg(1000);
BENCHMARK_TEMPLATE(LogspaceBench, logspace_v5)->Arg(1000);
BENCHMARK_TEMPLATE(LogspaceBench, logspace_v6)->Arg(1000)->Arg(10000000);
class LogspaceTest
: public testing::TestWithParam<
std::function<std::vector<double>(double, double, int)>> {};
TEST_P(LogspaceTest, IsSame) {
auto func = GetParam();
const auto actual = func(0, 1., 1000);
const auto expected = logspace(0., 1., 1000);
// TODO: Buggy with (3, 70, 1000) and (0, 1, 1000)
ASSERT_EQ(expected.size(), actual.size());
for (int i = 0; i < expected.size(); i++) {
ASSERT_DOUBLE_EQ(actual[i], expected[i]) << i;
}
}
INSTANTIATE_TEST_SUITE_P(InstantiationName, LogspaceTest,
testing::Values(logspace, logspace_v1, logspace_v2,
logspace_v3, logspace_v4, logspace_v5,
logspace_v6));
int main(int argc, char **argv) {
::benchmark::Initialize(&argc, argv);
::benchmark::RunSpecifiedBenchmarks();
::testing::InitGoogleTest(&argc, argv);
return RUN_ALL_TESTS();
}
There are at least three obvious optimizations that can be easily made to the shown code.
1) Compile in C++17 mode to get guaranteed copy elision when returning from logspace.
2)
std::vector<double> logspace;
for (int i = 0; i < k; i++)
Use logspace.reserve() to preallocate the vector, to avoid useless repeated reallocations, while this vector gets populated.
3)
void logspace_print (std::vector<double> logspace)
Passing by value here creates an entire duplicate copy of the vector, for no useful purpose whatsoever. Change this function so that it takes the logspace parameter by reference.
There's one possible micro-optimization that may or may not make any difference:
logspace.push_back(pow(10, i * (b - a) / (k - 1)));
The "(b-a)/(k-1)" part of this formula is constant and can be unrolled out of the loop. I would, though, expect the compiler to do it on its own, it's a fairly basic optimization.

Python : Call by Reference

I'm new to python. Can anyone help me to understand the call by reference in python.
#include <stdio.h>
#include <conio.h>
#include <malloc.h>
void rd(float *a, int *n)
{
int i;
for (i=1;i<= *n;i++) {
printf("Enter element %d: ",
i); scanf("%f", &a[i]);
}
}
float sum(float *a, int *n)
{
int i; float s=0;
for (i=1 ; i <= *n ; i++) s = s +
a[i]; return s;
}
int main(void)
{
int size; float *x, g;
printf("Give size of array: "); scanf("%d", &size);
x = (float *)malloc(size*sizeof(float)); // dynamic memory allocation
printf("\n");
rd(x, &size); // passing the addresses
g = sum(x, &size); // passing the addresses
printf("\nSum of elements = %f\n", g);
printf("\nDONE ! Hit any key ...");
getch(); return 0;
}
This is C example i trying to solve in python. Any help would be appreciated.
In python there is no way to pass "the address" of a "place" (a variable, an array element, a dictionary value or an instance member).
The only way to provide other code the ability to change a place is to provide a "path" to reach it (e.g. the variable name, the array and the index and so on). As a very strange alternative (not used often in Python) you can pass a "writer" function that will change the place... for example:
def func(a, b, placeWriter):
placeWriter(a + b)
def caller():
mylist = [1, 2, 3, 4]
def writer(x):
mylist[3] = x
func(10, 20, writer)
Much more common instead is writing functions that simply return the needed values; note that in Python returning multiple values is trivial while in C this is not supported and passing addresses is used instead:
def func(): # void f(int *a, int *b, int *c) {
return 1, 2, 3 # *a=1; *b=2; *c=3;
# }
def caller(): # void caller() { int a, b, c;
a, b, c = func() # func(&a, &b, &c);
...

How to port a specific C module to Python 3?

There is a thinning pip package that is currently getting compiled only with Python2.
When I install it with sudo pip install thinning and then attempt to import thinning, I get an error:
ImportError: /usr/lib/python3.5/site-packages/thinning.cpython-35m-x86_64-linux-gnu.so: undefined symbol: Py_InitModule3
I assume this is because of Py_InitModule3 is not used by Python3 anymore.
Here is complete c source file:
#define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION
#include "Python.h"
#include "arrayobject.h"
#include <stdlib.h>
#include <assert.h>
#include <stdbool.h>
#include <limits.h>
static PyObject *guo_hall_thinning(PyObject *self, PyObject *args);
int _guo_hall_thinning(unsigned char* binary_image, int width, int height);
void initthinning(void);
/* ==== Set up the methods table ====================== */
static PyMethodDef thinningMethods[] = {
{"guo_hall_thinning",guo_hall_thinning, METH_VARARGS,
"Takes a 2D numpy UBYTE array in C-order and thins it in place using the algorithm by Guo and Hall."
"Images that come out of cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) have the right format."
"\n\n"
"We assume that the dimensions of the image fit into an int on your platform. If your computer for some"
"reason has a 2 byte int and lots of memory so that the image can become too large, bad things can happen."
"\n\n"
"interface:\n"
"\tguo_hall_thinning(segmented_image)"
"\tsegmented_image is a NumPy matrix,"
"\treturns the same NumPy matrix (thinned)"},
{NULL, NULL, 0, NULL} /* Sentinel - marks the end of this structure */
};
/* ==== Initialize the C_test functions ====================== */
void initthinning() {
PyObject* module = Py_InitModule3("thinning",thinningMethods, "Thinning of segmented images. See https://bitbucket.org/adrian_n/thinning.");
PyModule_AddStringConstant(module, "__author__", "Adrian Neumann <adrian_neumann#gmx.de>");
PyModule_AddStringConstant(module, "__version__", "1.2.3");
import_array(); // Must be present for NumPy. Called first after above line.
}
/* ==== Guo Hall Thinning =========
Takes a 2D numpy UBYTE array in C-order and thins it in place using the algorithm by Guo and Hall.
Images that come out of cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) have the right format.
We assume that the dimensions of the image fit into an int on your platform. If your computer for some
reason has a 2 byte int and lots of memory so that the image can become too large, bad things can happen.
interface: guo_hall_thinning(segmented_image)
segmented_image is a NumPy matrix,
returns the same NumPy matrix (thinned)
*/
static PyObject *guo_hall_thinning(PyObject *self, PyObject *args)
{
PyArrayObject *segmented_image;
/* Parse tuples separately since args will differ between C fcns */
if (!PyArg_ParseTuple(args, "O!", &PyArray_Type, &segmented_image)) {
return NULL;
}
if (NULL == segmented_image) {
PyErr_SetString(PyExc_TypeError, "Parameter is not a valid image");
return NULL;
}
if (PyArray_TYPE(segmented_image) != NPY_UBYTE || !PyArray_CHKFLAGS(segmented_image, NPY_ARRAY_CARRAY)) {
PyErr_SetString(PyExc_TypeError, "Parameter is not a grayscale image");
return NULL;
}
npy_intp* shape = PyArray_DIMS(segmented_image);
int height = (int)shape[0];
int width = (int)shape[1];
unsigned char *in_data = PyArray_DATA(segmented_image);
if (height>=3 && width>=3) {
int ok = _guo_hall_thinning(in_data, width, height);
if (ok<0) {
return PyErr_NoMemory();
}
}
Py_INCREF(segmented_image);
return (PyObject*)segmented_image;
}
int nonzero_clever(const unsigned char* arr, unsigned int start, unsigned int len) {
/* find the first nonzero element from arr[start] to arr[start+len-1] (inclusive)
look at a long long at a time to be faster on 64 bit cpus */
const unsigned int step=sizeof(unsigned long long)/sizeof(unsigned char);
unsigned int i=start;
//unsigned types should throw exceptions on under/overflow...
while(len>step && i<len-step) {
if (*((unsigned long long*)(arr +i))==0) {
i+=step;
} else {
int j=0;
while(arr[i+j]==0) j++;
return i+j;
}
}
while(i<len) {
if (arr[i]!=0) { return i;}
i++;
}
return len;
}
int guo_hall_iteration(const unsigned char* binary_image, unsigned char* mask, const unsigned int width, const unsigned int height, const int iteration) {
/* one iteration of the algorithm by guo and hall. see their paper for an explanation.
We only consider nonzero elemets of the image. We never reinitialize the mask, once a pixel is
black, it will never become white again anyway. */
unsigned int changed = 0;
for (unsigned int j = 1; j < height-1; j++) {
const unsigned char* line = binary_image+j*width;
unsigned int start=0;
const int len = width-1;
while(start+1<len) {
start = nonzero_clever(line, start+1, len);
if (start==len) break;
const unsigned int i = start;
assert(line[i]!=0);
assert(binary_image[i + j*width]!=0);
const bool p2 = binary_image[i-1 + width*j];
const bool p6 = binary_image[i+1 + width*j];
const bool p9 = binary_image[i-1 + width*(j-1)];
const bool p8 = binary_image[i + width*(j-1)];
const bool p7 = binary_image[i+1 + width*(j-1)];
const bool p3 = binary_image[i-1 + width*(j+1)];
const bool p4 = binary_image[i + width*(j+1)];
const bool p5 = binary_image[i+1 + width*(j+1)];
const unsigned int C = ((!p2 && (p3 || p4)) +
(!p4 && (p5 || p6)) +
(!p6 && (p7 || p8)) +
(!p8 && (p9 || p2)));
// printf("%d %d %d %d %d %d %d %d\n",p2,p3,p4,p5,p6,p7,p8,p9);
if (C==1) {
const unsigned int N1 = (p9 || p2) + (p3 || p4) + (p5 || p6) + (p7 || p8);
const unsigned int N2 = (p2 || p3) + (p4 || p5) + (p6 || p7) + (p8 || p9);
const unsigned int N = N1 < N2 ? N1 : N2;
unsigned int m;
if (iteration == 0)
{m = (p8 && (p6 || p7 || !p9));}
else
{m = (p4 && (p2 || p3 || !p5));}
if (2 <= N && N <= 3 && m == 0) {
mask[i + width*j] = 0;
changed += 1;
}
}
}
}
return changed;
}
void andImage(unsigned char* image, const unsigned char* mask, const int size) {
/* calculate image &=mask.
to be faster on 64 bit cpus, we do this one long long at a time */
const int step = sizeof(unsigned long long)/sizeof(unsigned char);
unsigned long long* image_l = (unsigned long long*)image;
const unsigned long long* mask_l = (unsigned long long*) mask;
unsigned int i=0;
for(; size/step>2 && i<size/step-2; i+=2) {
image_l[i] = image_l[i] & mask_l[i];
image_l[i+1] = image_l[i+1] & mask_l[i+1];
}
for(i=i*step; i<size; ++i) {
image[i] = image[i] & mask[i];
}
}
int _guo_hall_thinning(unsigned char* binary_image, int width, int height) {
/* return -1 if we can't allocate the memory for the mask, else 0 */
int changed;
unsigned char* mask = (unsigned char*) malloc(width*height*sizeof(unsigned char));
if (mask==NULL) {
return -1;
}
memset(mask, UCHAR_MAX, width*height);
do {
changed = guo_hall_iteration(binary_image, mask, width, height, 0);
andImage(binary_image, mask, width*height);
changed += guo_hall_iteration(binary_image, mask, width, height, 1);
andImage(binary_image, mask, width*height);
} while (changed != 0);
free(mask);
return 0;
}
I've started reading Porting Extension Modules to Python 3 but I must admit there is little I can understand.
I tried to change Py_InitModule to Python 3 analogue PyModule_Create with some other code adjustments but it didn't work. Unfortunately this thinning module is a hard dependency for our application. So, I am pretty stuck right now without time and knowledge how to port this module to Python3.
What has changed:
Note: I can't really get into the details of what the function guo_hall_thinning does per se. What I know is that it uses a small subset of the numpy C-API for getting and returning the data as an ndarray; I couldn't find any documentation on them being altered so it should be good to go.
Now, what has definitely changed is the way modules are initialized; with this I can help you and get it imported in a Python 3 distribution. I'm using 3.5 for this too, even though, I believe differences between older versions of the 3.x family shouldn't exist or are backwards compatible.
As you noted, general information is provided in the Porting to Python 3 document with specifics about the initialization phase in Module Initialization and state. The new change is described in PEP 3121 which, by itself, is a nice but challenging read.
Now, the gist of it can be listed in two points:
A) Modules are now defined in a dedicated PyModuleDef struct:
struct PyModuleDef{
PyModuleDef_Base m_base; /* To be filled out by the interpreter */
Py_ssize_t m_size; /* Size of per-module data */
PyMethodDef *m_methods;
inquiry m_reload;
traverseproc m_traverse;
inquiry m_clear;
freefunc m_free;
};
This new struct contains some additional members holding the name and documentation for the module. The members m_reload, m_traverse, m_clear and m_free provide additional control during initialization/finalization but, we can opt to leave them as NULL. These along with a module m_size set to -1 are for simplicity, setting these values is generally done to support multiple interpreters/ mutliple initializations and should be more tricky.
So, in short, the fancy new module struct for the thinning module could look like this:
static struct PyModuleDef moduledef = {
PyModuleDef_HEAD_INIT,
"thinning",
"Thinning of segmented images. See https://bitbucket.org/adrian_n/thinning",
-1,
thinningMethods,
NULL,
NULL,
NULL,
NULL
};
aaand that's it for the first issue!
B) New initialization function i.e you'll need to give initthinning a major face-lift.
The new module initialization function returns a PyObject * and is now named PyInit_<module_name>. In it (heh, get it?) new modules are created with PyModule_Create(&moduledef) which takes the struct we defined and returns the initialized module. It's prettier now and looks like this:
/* ==== Initialize the C_test functions ====================== */
PyObject *
PyInit_thinning(void){
// create module
PyObject *module = PyModule_Create(&moduledef);
// handle probable error
if (module == NULL)
return NULL;
PyModule_AddStringConstant(module, "__author__", "Adrian Neumann <adrian_neumann#gmx.de>");
PyModule_AddStringConstant(module, "__version__", "1.2.3");
import_array(); // Must be present for NumPy. Called first after above line.
// return newly created module
return module;
}
Installing the module:
All this is for the initialization of the module. You can download the module (as you have done, I believe) find the thinning_folder/src/c_thinning.c file and replace everything prior to:
/* ==== Guo Hall Thinning =========
with the following:
#define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION
#include "Python.h"
#include "arrayobject.h"
#include <stdlib.h>
#include <assert.h>
#include <stdbool.h>
#include <limits.h>
static PyObject *guo_hall_thinning(PyObject *self, PyObject *args);
int _guo_hall_thinning(unsigned char* binary_image, int width, int height);
/* ==== Set up the methods table ====================== */
static PyMethodDef thinningMethods[] = {
{"guo_hall_thinning",guo_hall_thinning, METH_VARARGS,
"Takes a 2D numpy UBYTE array in C-order and thins it in place using the algorithm by Guo and Hall."
"Images that come out of cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) have the right format."
"\n\n"
"We assume that the dimensions of the image fit into an int on your platform. If your computer for some"
"reason has a 2 byte int and lots of memory so that the image can become too large, bad things can happen."
"\n\n"
"interface:\n"
"\tguo_hall_thinning(segmented_image)"
"\tsegmented_image is a NumPy matrix,"
"\treturns the same NumPy matrix (thinned)"},
{NULL, NULL, 0, NULL} /* Sentinel - marks the end of this structure */
};
static struct PyModuleDef moduledef = {
PyModuleDef_HEAD_INIT,
"thinning",
"Thinning of segmented images. See https://bitbucket.org/adrian_n/thinning.",
-1,
thinningMethods,
NULL,
NULL,
NULL,
NULL
};
/* ==== Initialize the C_test functions ====================== */
PyObject *
PyInit_thinning(void){
PyObject *module = PyModule_Create(&moduledef);
if (module == NULL)
return NULL;
PyModule_AddStringConstant(module, "__author__", "Adrian Neumann <adrian_neumann#gmx.de>");
PyModule_AddStringConstant(module, "__version__", "1.2.3");
import_array(); // Must be present for NumPy. Called first after above line.
return module;
}
/* ==== Guo Hall Thinning =========
// Leave the rest as it was
After that, navigate to the top level directory containing setup.py and run:
python setup.py install
as usual. Some compilation warnings will probably pop-up but those are safe to ignore. If all goes well you'll get a successful install and the following will not result in a nasty seg-fault:
>>> from thinning import guo_hall_thinning
>>> print(guo_hall_thinning.__doc__)
Takes a 2D numpy UBYTE array in C-order and thins it in place using the algorithm by Guo and Hall.Images that come out of cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) have the right format.
We assume that the dimensions of the image fit into an int on your platform. If your computer for somereason has a 2 byte int and lots of memory so that the image can become too large, bad things can happen.
interface:
guo_hall_thinning(segmented_image) segmented_image is a NumPy matrix, returns the same NumPy matrix (thinned)
It seems to run :) :
I further edited the source in c_thinning.c to print out the number of elements changed during every iteration. It seems to be changing things but I don't understand what underlying criteria it uses because I haven't read the corresponding paper.
In short, guo_hall_thinning(ndarr) apparently does the 'thinning' in place. This means that after it is executed, the original array that was supplied as a parameter is going to be altered. So, a check of the form:
gray_img == guo_hall_thinning(gray_img)
is always going to be True (Hint: check for equality between numpy arrays with (arr1 == arr2).all()).
Here's a test I ran in which you can visually see the altering taking place, I believe this test can be reproduced on your machine too:
# dtype = 'B' is UBYTE
>>> n = numpy.ndarray(shape=(100, 200), dtype='B')
>>> n
array([[ 40, 159, 95, ..., 114, 114, 97],
[121, 95, 108, ..., 114, 101, 32],
[ 48, 161, 90, ..., 127, 0, 0],
...,
[110, 32, 97, ..., 124, 1, 0],
[124, 5, 0, ..., 0, 0, 131],
[ 1, 0, 25, ..., 0, 125, 17]], dtype=uint8)
>>> thinning.guo_hall_thinning(n)
-- Array height 100 Array width: 200
Value of `changed` during 0 iteration is: 1695
Value of `changed` during 1 iteration is: 1216
Value of `changed` during 2 iteration is: 808
Value of `changed` during 3 iteration is: 493
Value of `changed` during 4 iteration is: 323
Value of `changed` during 5 iteration is: 229
Value of `changed` during 6 iteration is: 151
Value of `changed` during 7 iteration is: 90
Value of `changed` during 8 iteration is: 46
Value of `changed` during 9 iteration is: 27
Value of `changed` during 10 iteration is: 11
Value of `changed` during 11 iteration is: 8
Value of `changed` during 12 iteration is: 7
Value of `changed` during 13 iteration is: 4
Value of `changed` during 14 iteration is: 0
Value of `ok` is: 0
# array returned
array([[ 40, 159, 95, ..., 114, 114, 97],
[121, 0, 0, ..., 114, 0, 32],
[ 48, 0, 0, ..., 127, 0, 0],
...,
[110, 0, 97, ..., 124, 1, 0],
[124, 5, 0, ..., 0, 0, 131],
[ 1, 0, 25, ..., 0, 125, 17]], dtype=uint8)
So I'm guessing it does work :-).

Categories

Resources