How to expose a numpy array from c array in cython? - python

cpdef myf():
# pd has to be a c array.
# Because it will then be consumed by some c function.
cdef double pd[8000]
# Do something with pd
...
# Get a memoryview.
cdef double[:] pd_view = pd
# Coercion the memoryview to numpy array. Not working.
ret = np.asarray(pd)
return ret
I would like it to return a numpy array. How can I do it?
For the moment I have to do
pd_np = np.zeros(8000, dtype=np.double)
cdef int i
for i in range(8000):
pd_np[i] = pd[i]

If you are just declaring your array in your function why not make it a numpy array to begin with, then when you need the c array you can just grab the data pointer.
cimport numpy as np
import numpy as np
def myf():
cdef np.ndarray[double, ndim=1, mode="c"] pd_numpy = np.empty(8000)
cdef double *pd = &pd_numpy[0]
# Do something to fill pd with values
for i in range(8000):
pd[i] = i
return pd_numpy

I made a typo,
ret = np.asarray(pd_view) works

In the memview example here http://docs.cython.org/src/userguide/memoryviews.html
# Memoryview on a C array
cdef int carr[3][3][3]
cdef int [:, :, :] carr_view = carr
carr_view[...] = narr_view # np.arange(27, dtype=np.dtype("i")).reshape((3, 3, 3))
carr_view[0, 0, 0] = 100
I can create a numpy array from carr_view, the memory view on carr, a C array.
# print np.array(carr) # cython error
print 'numpy array on carr_view'
print np.array(carr_view)
print np.array(carr_view).sum() # match sum3d(carr)
# or np.asarray(carr_view)
print 'numpy copy from carr_view'
carr_copy = np.empty((3,3,3))
carr_copy[...] = carr_view[...] # don't need indexed copy
print carr_copy
print carr_copy.sum() # match sum3d(carr)

Related

No real gains using cython

I am trying to use a modified version of the RANSAC regressor algorithm, but instead of changing the sklearn's function (to avoid the problems of be using sklearn 0.24.1 and to avoid some warnings from the function), I've found an algorithm in github and was trying to cythonize it to improve speed before I can make my modifications. To my surprise the speed gain was very poor, was it because the code is full of numpy calls or did I make something wrong? The following codes are the python version, cython version (with proper modifications to avoid errors):
##Python version
import numpy as np
def ransac_polyfit(x,y,order, t, n=0.8,k=100,f=0.9):
besterr = np.inf
bestfit = np.array([None])
for kk in range(k):
maybeinliers = np.random.randint(len(x), size=int(n*len(x)))
maybemodel = np.polyfit(x[maybeinliers], y[maybeinliers], order)
alsoinliers = np.abs(np.polyval(maybemodel, x)-y) < t
if sum(alsoinliers) > len(x)*f:
bettermodel = np.polyfit(x[alsoinliers], y[alsoinliers], order)
thiserr = np.sum(np.abs(np.polyval(bettermodel, x[alsoinliers])-y[alsoinliers]))
if thiserr < besterr:
bestfit = bettermodel
besterr = thiserr
return bestfit
##Cython version
cimport cython
cimport numpy as np
import numpy as np
np.import_array()
#cython.boundscheck(False)
#cython.wraparound(False)
cdef ransac_polyfit(np.ndarray[np.npy_int64, ndim=1] x,
np.ndarray[np.npy_int64, ndim=1] y,
np.npy_intp order,
np.npy_float32 t,
np.npy_float32 n=0.8,
np.npy_intp k=100,
np.npy_float32 f=0.9):
cdef np.npy_intp kk, i, ransac_control
cdef np.npy_float64 thiserr, besterr = -1.0
cdef np.ndarray[np.npy_int64, ndim=1] maybeinliers
cdef np.ndarray[np.npy_bool, ndim=1] alsoinliers
cdef np.ndarray[np.npy_float64, ndim=1] bestfit, maybemodel, bettermodel
bestfit = np.zeros(1, dtype = np.float64)
for kk in range(k):
maybeinliers = np.random.randint(len(x), size=int(n*len(x)))
maybemodel = np.polyfit(x[maybeinliers], y[maybeinliers], order)
alsoinliers = np.abs(polyval(maybemodel, x)-y) < t
if sum(alsoinliers) > len(x)*f:
bettermodel = np.polyfit(x[alsoinliers], y[alsoinliers], order)
thiserr = np.sum(np.abs(polyval(bettermodel, x[alsoinliers])-y[alsoinliers]))
if (thiserr < besterr) or (besterr == -1.0):
bestfit = bettermodel
besterr = thiserr
#Since I can't return an empty array, I had to set it as an array with a zero and the ransac_control variable will tell if the function was able or not to find a good model
if (besterr == -1.0):
ransac_control = 0
return ransac_control, bestfit
else:
ransac_control = 1
return ransac_control, bestfit
**PS: Couldn't send the image of the HTML page of the cython code because it's my first question

passing a two dimensional dynamic pointer array as an argument to a function in cython

In my code, I am trying to define a dynamic array with changing number of the rows and columns, depends on the new conditions inside the function, meaning I might add more rows or columns. I tried to make two dimensional pointer arrays and I want to be able to pass this 2-D pointer array as an argument to a function.
This is the small part of my code:
Update: test.pyx
from libc.string cimport memset
import numpy as np
cimport numpy as np
cimport cython
from cython.view cimport array as cvarray
from libc.stdlib cimport malloc, free
from libc.math cimport log, exp
from cython_gsl cimport *
import ctypes
cdef gsl_rng *r = gsl_rng_alloc(gsl_rng_mt19937)
cdef int** zeros2(dim):
assert len(dim) == 2
cdef int i
cdef int **matrix
matrix = <int**> malloc(sizeof(int*) * dim[0])
for i from 0 <= i < dim[0]:
matrix[i] = <int*> malloc(sizeof(int) * dim[1])
memset(matrix[i], 0, sizeof(int) * dim[1])
return matrix
#cython.cdivision(True)
#cython.wraparound(False)
#cython.boundscheck(False)
cdef void generator(double* alpha,int* D, double* m):
cdef Py_ssize_t i
for i from 0 <= i < D[0]:
m[i]=gsl_ran_beta(r, alpha[0], 1)
return
#cython.cdivision(True)
#cython.boundscheck(False)
#cython.wraparound(False)
cdef void initializer(double* alpha, int* D, int* N, double* m, int** Z ):
cdef int i, j
generator(alpha, D, &m[0])
for i from 0 <= i < D[0]:
for j from 0 <= j < N[0]:
Z[j][i]= gsl_ran_bernoulli(r, m[i])
print Z[j][i]
return
def run(int n, int d, double alpha):
cdef np.ndarray[double, ndim=1, mode='c'] mu=np.empty((d,), dtype=ctypes.c_double)
cdef int **Z = zeros2((n, d))
initializer(&alpha, &d, &n, &mu[0], <int **>(&Z[0][0]) )
setup.py
from distutils.core import setup, Extension
from Cython.Build import cythonize
from numpy import get_include
import numpy
import cython_gsl
from Cython.Distutils import build_ext
ext_modules = [
Extension(
"test",
["test.pyx"],
libraries=cython_gsl.get_libraries(),
library_dirs=[cython_gsl.get_library_dir()],
include_dirs=[numpy.get_include(), cython_gsl.get_include()])
]
ext_modules = cythonize(ext_modules)
setup(
name='test',
ext_modules=ext_modules,
cmdclass={'build_ext': build_ext})
Update:
The code gets compiled but when I import the run function in python I get this error:
>>> import test
>>> test.run( 10, 4,0.9)
Segmentation fault (core dumped)
I am not sure the 2-dimensional array that I defined is the best approach to solve my problem of defining a dynamical array and what is the reason I got this error?
Any suggestions would be most welcome.
Your immediate problem is that:
<int **>(&Z[0][0])
takes the address of the first element of the first row and casts it to an int**. It's actually an int* (because it's the address of an int). Therefore the memory that initializer writes to is nonsense and you get a segmentation fault. Casts are often an indication that you're doing something wrong.
You just need to pass Z which is already an int**.
The problem is that n, d and alpha are Python variables, so &n is not something you can do. You can change run to cdef function, or maybe create a temporary version:
cdef int _n = n;
and then pass &_n
However, based on your code, what's the point of passing a pointer to these three variables, anyway? You don't modify them. You can simply pass them without a pointer.

Speed up cython code

I wrote a python code that manages a lot of data and thus it takes a lot of time. So, I found out Cython and I began to change my code.
Basically, all I did is to change functions' declarations (cdef type name(arguments with variable type) ), to declare cdef variables with its type, and to declare cdef classes.
I'm writing all the .pyx with eclipse, and I'm compiling with the command python setup.py build_ext --inplace and running it with eclipse.
My issue is that comparing python with cython speed, there isn't any difference.
I run the command cython -a <file> to generate a html file and there are a lot of yellow lines.
I don't know if I'm doing something wrong, I should include something else, and I don't know how to delete these yellow lines.
I just paste some code lines, that's the part that I'd like to speed up and because the code is very long.
main.pyx
'''there are a lot of ndarray objects stored in a file and in this step I get each of them until there are no more items '''
cdef ReadWavePoints (WavePointManagement wavePointManagement, ColumnManagement columnManagement):
cdef int runReadWavePoints
wavePointManagement.OpenWavePointFileLoad(wavePointsFile)
runReadWavePoints = 1
while runReadWavePoints == 1:
try:
wavePointManagement.LoadWavePointFile()
wavePointManagement.RoundCoordinates()
wavePointManagement.SortWavePointList()
GroupColumnsVoxels(wavePointManagement.GetWavePointList(), columnManagement)
except:
wavePointManagement.CloseWavePointFile()
columnManagement.CloseWriteColumnFile()
break
'''I check which points are in the same XYZ (voxel) and in the same XY (column)'''
cdef GroupColumnsVoxels (object wavePointList, ColumnManagement columnManagement):
cdef int indexWavePointRef, indexWavePoint
cdef int saved
cdef double voxelValue
cdef int sizeWavePointList
sizeWavePointList = len(wavePointList)
indexWavePointRef = 0
while indexWavePointRef < sizeWavePointList - 1:
saved = 0
voxelValue = (wavePointList[indexWavePointRef]).GetValue()
for indexWavePoint in xrange(indexWavePointRef + 1, len(wavePointList)):
if (wavePointList[indexWavePointRef]).GetX() == (wavePointList[indexWavePoint]).GetX() and (wavePointList[indexWavePointRef]).GetY() == (wavePointList[indexWavePoint]).GetY():
if (wavePointList[indexWavePointRef]).GetZ() == (wavePointList[indexWavePoint]).GetZ():
if voxelValue < (wavePointList[indexWavePoint]).GetValue():
voxelValue = (wavePointList[indexWavePoint]).GetValue()
else:
saved = 1
CheckVoxel((wavePointList[indexWavePointRef]).GetX(), (wavePointList[indexWavePointRef]).GetY(), (wavePointList[indexWavePointRef]).GetZ(), voxelValue)
indexWavePointRef = indexWavePoint
if indexWavePointRef == sizeWavePointList - 1:
CheckVoxel((wavePointList[indexWavePointRef]).GetX(), (wavePointList[indexWavePointRef]).GetY(), (wavePointList[indexWavePointRef]).GetZ(), (wavePointList[indexWavePointRef]).GetValue())
break
else:
saved = 1
CheckVoxel((wavePointList[indexWavePointRef]).GetX(), (wavePointList[indexWavePointRef]).GetY(), (wavePointList[indexWavePointRef]).GetZ(), voxelValue)
columnObject = columnInstance.Column((wavePointList[indexWavePointRef]).GetX(), (wavePointList[indexWavePointRef]).GetY())
columnManagement.AddColumn(columnObject)
MaximumHeightColumn((wavePointList[indexWavePointRef]).GetX(), (wavePointList[indexWavePointRef]).GetY(), (wavePointList[indexWavePointRef]).GetZ())
indexWavePointRef = indexWavePoint
break
if saved == 0:
CheckVoxel((wavePointList[indexWavePointRef]).GetX(), (wavePointList[indexWavePointRef]).GetY(), (wavePointList[indexWavePointRef]).GetZ(), voxelValue)
indexWavePointRef = indexWavePoint
columnObject = columnInstance.Column((wavePointList[indexWavePointRef]).GetX(), (wavePointList[indexWavePointRef]).GetY())
columnManagement.AddColumn(columnObject)
MaximumHeightColumn((wavePointList[indexWavePointRef]).GetX(), (wavePointList[indexWavePointRef]).GetY(), (wavePointList[indexWavePointRef]).GetZ())
'''I check if the data stored in a voxel is lower than the new one; if its the case, I store it'''
cdef CheckVoxel (double X, double Y, double Z, double newValue):
cdef object bandVoxel, structvalCheckVoxel, out_str
cdef tuple valueCheckVoxel
bandVoxel = datasetVoxels.GetRasterBand(int(math.floor(Z/0.3))+1)
structvalCheckVoxel = bandVoxel.ReadRaster(int(math.floor((X-Xmin)/0.25)), int(math.floor((Ymax-Y)/0.25)), 1, 1, buf_type=gdal.GDT_Float32)
valueCheckVoxel = struct.unpack('f', structvalCheckVoxel)
if newValue > valueCheckVoxel[0]:
out_str = struct.pack('f', newValue)
bandVoxel.WriteRaster(int(math.floor((X-Xmin)/0.25)), int(math.floor((Ymax-Y)/0.25)), 1, 1, out_str)
'''I check if this point has the highest Z and I store this information'''
cdef MaximumHeightColumn(double X, double Y, double newZ):
cdef object bandMetricMaximumHeightColumn, structvalMaximumHeightColumn, out_strMaximumHeightColumn
cdef tuple valueMaximumHeightColumn
bandMetricMaximumHeightColumn = datasetMetrics.GetRasterBand(10)
structvalMaximumHeightColumn = bandMetricMaximumHeightColumn.ReadRaster(int(math.floor((X-Xmin)/0.25)), int(math.floor((Ymax-Y)/0.25)), 1, 1, buf_type=gdal.GDT_Float32)
valueMaximumHeightColumn = struct.unpack('f', structvalMaximumHeightColumn)
if newZ > round(valueMaximumHeightColumn[0], 1):
out_strMaximumHeightColumn = struct.pack('f', newZ)
bandMetricMaximumHeightColumn.WriteRaster(int(math.floor((X-Xmin)/0.25)), int(math.floor((Ymax-Y)/0.25)), 1, 1, out_strMaximumHeightColumn)
WavePointManagement.pyx
'''this class serializes, rounds and sorts the points of each ndarray'''
import cPickle as pickle
import numpy as np
cimport numpy as np
import math
cdef class WavePointManagement(object):
'''
This class manages all the points extracted from the waveform
'''
cdef object fileObject, wavePointList
__slots__ = ('wavePointList', 'fileObject')
def __cinit__(self):
'''
Constructor
'''
self.fileObject = None
self.wavePointList = np.array([])
cdef object GetWavePointList(self):
return self.wavePointList
cdef void OpenWavePointFileLoad (self, object fileName):
self.fileObject = file(fileName, 'rb')
cdef void LoadWavePointFile (self):
self.wavePointList = None
self.wavePointList = pickle.load(self.fileObject)
cdef void SortWavePointList (self):
self.wavePointList = sorted(self.wavePointList, key=lambda k: (k.x, k.y, k.z))
cdef void RoundCoordinates (self):
cdef int indexPointObject, sizeWavePointList
for pointObject in self.GetWavePointList():
pointObject.SetX(round(math.floor(pointObject.GetX()/0.25)*0.25, 2))
pointObject.SetY(round(math.ceil(pointObject.GetY()/0.25)*0.25, 2))
pointObject.SetZ(round(math.floor(pointObject.GetZ()/0.3)*0.3, 1))
cdef void CloseWavePointFile(self):
self.fileObject.close()
setup.py
from distutils.core import setup
from distutils.extension import Extension
from Cython.Distutils import build_ext
import numpy
ext = Extension("main", ["main.pyx"], include_dirs = [numpy.get_include()])
setup (ext_modules=[ext],
cmdclass = {'build_ext' : build_ext}
)
test_cython.py
'''this is the file I run with eclipse after compiling'''
from main import main
main()
How could I speed up this code?
Your code jumps back and forth between using numpy arrays and lists. As such there is virtually no difference between the code that cython will produce.
The following code produces a python list, and the key function is a pure python function as well.
self.wavePointList = sorted(self.wavePointList, key=lambda k: (k.x, k.y, k.z))
You will want to use ndarray.sort (or numpy.sort if you don't want to sort inplace). To do this you will also need to change how your objects are stored in the array. That is, you will need to use a structured array. See numpy.sort for examples on how to sort structured arrays -- particularly the last two examples on the page.
Once you have your data stored in a numpy array then you need to tell cython about how the data is stored in the array. This includes providing type information and the dimensions of the array. This page provides more information how to work efficiently with numpy arrays.
An example of show to create and sort structured arrays:
import numpy as np
cimport numpy as np
DTYPE = [('name', 'S10'), ('height', np.float64), ('age', np.int32)]
cdef packed struct Person:
char name[10]
np.float64_t height
np.int32_t age
ctypedef Person DTYPE_t
def create_array():
values = [('Arthur', 1.8, 41), ('Lancelot', 1.9, 38),
('Galahad', 1.7, 38)]
return np.array(values, dtype=DTYPE)
cpdef sort_by_age_then_height(np.ndarray[DTYPE_t, ndim=1] arr):
arr.sort(order=['age', 'height'])
Finally, you will need to convert your code from using python methods to using the standard c library methods for a further speed up. Below is an example using RoundCoordinates. ``cpdef` means the function is also exposed to python by a wrapper function.
cimport cython
cimport numpy as np
from libc.math cimport floor, ceil, round
import numpy as np
DTYPE = [('x', np.float64), ('y', np.float64), ('z', np.float64)]
cdef packed struct Point3D:
np.float64_t x, y, z
ctypedef Point3D DTYPE_t
# Caution should be used when turning the bounds check off as it can lead to undefined
# behaviour if you use an invalid index.
#cython.boundscheck(False)
cpdef RoundCoordinates_cy(np.ndarray[DTYPE_t] pointlist):
cdef int i
cdef DTYPE_t point
for i in range(len(pointlist)): # this line is optimised into a c loop
point = pointlist[i] # creates a copy of the point
point.x = round(floor(point.x/0.25)*2.5) / 10
point.y = round(ceil(point.y/0.25)*2.5) / 10
point.z = round(floor(point.z/0.3)*3) / 10
pointlist[i] = point # overwrites the old point data with the new data
Finally, before rewriting your entire code base, you should profile your code to see which functions the program spends most of its time and optimise those functions before bothering about optimising other functions.

Cython boolean indexing optimization

What is the best way to convert the following code to cython
Given the following example:
#setup example data Z and A
Z = np.random.randn(10,10)
A = np.random.randn(10,10)
A[0,1] = np.nan
A[1,3] = np.nan
A[5,3] = np.nan
A[3,5] = np.nan
B = np.isnan(A).transpose()
C = Z[B * B.transpose()]
I want to optimize the type definition of np.ndarray B in the above example and optimize the creation of ndarray C.
I tried using setting B to uint8 and python and c++ bools.
cdef np.ndarray[np.uint8_t, ndim=2, cast=True] however this yields little or no speedup
and
cdef np.ndarray[bool, ndim=2, cast=True]
where bool is either from cpython cimport bool or from libcpp cimport bool in both cases the above code will throw an error.
The right way to create a buffer which will take np.nan values is using np.float_t or np.double_t. If you try using a integer buffer the following error will raise:
ValueError: cannot convert float Nan to integer
Then, you could use something like:
cdef np.ndarray[np.double_t, ndim=2] A, Z
Z = np.random.randn(10,10)
A = np.random.randn(10,10)
A[0,1] = np.nan
A[1,3] = np.nan
A[5,3] = np.nan
A[3,5] = np.nan
B = np.isnan(A).transpose()
C = Z[B * B.transpose()]

In-place shuffling of multidimensional arrays

I am trying to implement a NaN-safe shuffling procedure in Cython that can shuffle along several axis of a multidimensional matrix of arbitrary dimension.
In the simple case of a 1D matrix, one can simply shuffle over all indices with non-NaN values using the Fisher–Yates algorithm:
def shuffle1D(np.ndarray[double, ndim=1] x):
cdef np.ndarray[long, ndim=1] idx = np.where(~np.isnan(x))[0]
cdef unsigned int i,j,n,m
randint = np.random.randint
for i in xrange(len(idx)-1, 0, -1):
j = randint(i+1)
n,m = idx[i], idx[j]
x[n], x[m] = x[m], x[n]
I would like to extend this algorithm to handle large multidimensional arrays without reshape (which triggers a copy for more complicated cases not considered here). To this end, I would need to get rid of the fixed input dimension, which seems neither possible with numpy arrays nor memoryviews in Cython. Is there a workaround?
Many thanks in advance!
Thanks to the comments of #Veedrac this answer uses more of Cython capabilities.
A pointer array stores the memory address of the values along axis
Your algorithm is used with a modification that checks for nan values, preventing them of being sorted
It won't create a copy for C ordered arrays. In case of Fortran ordered arrays the ravel() command will return a copy. This could be improved by creating another array of double pointers to carry the values of x, probably with some cache penalty...
This code is at least one order of magnitude faster than the other based on slices.
from libc.stdlib cimport malloc, free
cimport numpy as np
import numpy as np
from numpy.random import randint
cdef extern from "numpy/npy_math.h":
bint npy_isnan(double x)
def shuffleND(x, int axis=-1):
cdef np.ndarray[double, ndim=1] v # view of x
cdef np.ndarray[int, ndim=1] strides
cdef int i, j
cdef int num_axis, pos, stride
cdef double tmp
cdef double **v_axis
if axis==-1:
axis = x.ndim-1
shape = list(x.shape)
num_axis = shape.pop(axis)
v_axis = <double **>malloc(num_axis*sizeof(double *))
for i in range(num_axis):
v_axis[i] = <double *>malloc(1*sizeof(double))
try:
tmp_strides = [s//x.itemsize for s in x.strides]
stride = tmp_strides.pop(axis)
strides = np.array(tmp_strides, dtype=np.int32)
v = x.ravel()
for indices in np.ndindex(*shape):
pos = (strides*indices).sum()
for i in range(num_axis):
v_axis[i] = &v[pos + i*stride]
for i in range(num_axis-1, 0, -1):
j = randint(i+1)
if npy_isnan(v_axis[i][0]) or npy_isnan(v_axis[j][0]):
continue
tmp = v_axis[i][0]
v_axis[i][0] = v_axis[j][0]
v_axis[j][0] = tmp
finally:
free(v_axis)
return x
The following algorithm is based on slices, where no copy is made and it should work for any np.ndarray. The main steps are:
np.ndindex() is used to run throught the different multidimensional indices, excluding the one belonging to the axis you want to shuffle
the shuffle already developed by you for the 1-D case is applied.
Code:
def shuffleND(np.ndarray x, axis=-1):
cdef np.ndarray[long long, ndim=1] idx
cdef unsigned int i, j, n, m
if axis==-1:
axis = x.ndim-1
all_shape = list(np.shape(x))
shape = all_shape[:]
shape.pop(axis)
for slices in np.ndindex(*shape):
slices = list(slices)
axis_slice = slices[:]
axis_slice.insert(axis, slice(None))
idx = np.where(~np.isnan(x[tuple(axis_slice)]))[0]
for i in range(idx.shape[0]-1, 0, -1):
j = randint(i+1)
n, m = idx[i], idx[j]
slice1 = slices[:]
slice1.insert(axis, n)
slice2 = slices[:]
slice2.insert(axis, m)
slice1 = tuple(slice1)
slice2 = tuple(slice2)
x[slice1], x[slice2] = x[slice2], x[slice1]
return x

Categories

Resources