I wrote a python code that manages a lot of data and thus it takes a lot of time. So, I found out Cython and I began to change my code.
Basically, all I did is to change functions' declarations (cdef type name(arguments with variable type) ), to declare cdef variables with its type, and to declare cdef classes.
I'm writing all the .pyx with eclipse, and I'm compiling with the command python setup.py build_ext --inplace and running it with eclipse.
My issue is that comparing python with cython speed, there isn't any difference.
I run the command cython -a <file> to generate a html file and there are a lot of yellow lines.
I don't know if I'm doing something wrong, I should include something else, and I don't know how to delete these yellow lines.
I just paste some code lines, that's the part that I'd like to speed up and because the code is very long.
main.pyx
'''there are a lot of ndarray objects stored in a file and in this step I get each of them until there are no more items '''
cdef ReadWavePoints (WavePointManagement wavePointManagement, ColumnManagement columnManagement):
cdef int runReadWavePoints
wavePointManagement.OpenWavePointFileLoad(wavePointsFile)
runReadWavePoints = 1
while runReadWavePoints == 1:
try:
wavePointManagement.LoadWavePointFile()
wavePointManagement.RoundCoordinates()
wavePointManagement.SortWavePointList()
GroupColumnsVoxels(wavePointManagement.GetWavePointList(), columnManagement)
except:
wavePointManagement.CloseWavePointFile()
columnManagement.CloseWriteColumnFile()
break
'''I check which points are in the same XYZ (voxel) and in the same XY (column)'''
cdef GroupColumnsVoxels (object wavePointList, ColumnManagement columnManagement):
cdef int indexWavePointRef, indexWavePoint
cdef int saved
cdef double voxelValue
cdef int sizeWavePointList
sizeWavePointList = len(wavePointList)
indexWavePointRef = 0
while indexWavePointRef < sizeWavePointList - 1:
saved = 0
voxelValue = (wavePointList[indexWavePointRef]).GetValue()
for indexWavePoint in xrange(indexWavePointRef + 1, len(wavePointList)):
if (wavePointList[indexWavePointRef]).GetX() == (wavePointList[indexWavePoint]).GetX() and (wavePointList[indexWavePointRef]).GetY() == (wavePointList[indexWavePoint]).GetY():
if (wavePointList[indexWavePointRef]).GetZ() == (wavePointList[indexWavePoint]).GetZ():
if voxelValue < (wavePointList[indexWavePoint]).GetValue():
voxelValue = (wavePointList[indexWavePoint]).GetValue()
else:
saved = 1
CheckVoxel((wavePointList[indexWavePointRef]).GetX(), (wavePointList[indexWavePointRef]).GetY(), (wavePointList[indexWavePointRef]).GetZ(), voxelValue)
indexWavePointRef = indexWavePoint
if indexWavePointRef == sizeWavePointList - 1:
CheckVoxel((wavePointList[indexWavePointRef]).GetX(), (wavePointList[indexWavePointRef]).GetY(), (wavePointList[indexWavePointRef]).GetZ(), (wavePointList[indexWavePointRef]).GetValue())
break
else:
saved = 1
CheckVoxel((wavePointList[indexWavePointRef]).GetX(), (wavePointList[indexWavePointRef]).GetY(), (wavePointList[indexWavePointRef]).GetZ(), voxelValue)
columnObject = columnInstance.Column((wavePointList[indexWavePointRef]).GetX(), (wavePointList[indexWavePointRef]).GetY())
columnManagement.AddColumn(columnObject)
MaximumHeightColumn((wavePointList[indexWavePointRef]).GetX(), (wavePointList[indexWavePointRef]).GetY(), (wavePointList[indexWavePointRef]).GetZ())
indexWavePointRef = indexWavePoint
break
if saved == 0:
CheckVoxel((wavePointList[indexWavePointRef]).GetX(), (wavePointList[indexWavePointRef]).GetY(), (wavePointList[indexWavePointRef]).GetZ(), voxelValue)
indexWavePointRef = indexWavePoint
columnObject = columnInstance.Column((wavePointList[indexWavePointRef]).GetX(), (wavePointList[indexWavePointRef]).GetY())
columnManagement.AddColumn(columnObject)
MaximumHeightColumn((wavePointList[indexWavePointRef]).GetX(), (wavePointList[indexWavePointRef]).GetY(), (wavePointList[indexWavePointRef]).GetZ())
'''I check if the data stored in a voxel is lower than the new one; if its the case, I store it'''
cdef CheckVoxel (double X, double Y, double Z, double newValue):
cdef object bandVoxel, structvalCheckVoxel, out_str
cdef tuple valueCheckVoxel
bandVoxel = datasetVoxels.GetRasterBand(int(math.floor(Z/0.3))+1)
structvalCheckVoxel = bandVoxel.ReadRaster(int(math.floor((X-Xmin)/0.25)), int(math.floor((Ymax-Y)/0.25)), 1, 1, buf_type=gdal.GDT_Float32)
valueCheckVoxel = struct.unpack('f', structvalCheckVoxel)
if newValue > valueCheckVoxel[0]:
out_str = struct.pack('f', newValue)
bandVoxel.WriteRaster(int(math.floor((X-Xmin)/0.25)), int(math.floor((Ymax-Y)/0.25)), 1, 1, out_str)
'''I check if this point has the highest Z and I store this information'''
cdef MaximumHeightColumn(double X, double Y, double newZ):
cdef object bandMetricMaximumHeightColumn, structvalMaximumHeightColumn, out_strMaximumHeightColumn
cdef tuple valueMaximumHeightColumn
bandMetricMaximumHeightColumn = datasetMetrics.GetRasterBand(10)
structvalMaximumHeightColumn = bandMetricMaximumHeightColumn.ReadRaster(int(math.floor((X-Xmin)/0.25)), int(math.floor((Ymax-Y)/0.25)), 1, 1, buf_type=gdal.GDT_Float32)
valueMaximumHeightColumn = struct.unpack('f', structvalMaximumHeightColumn)
if newZ > round(valueMaximumHeightColumn[0], 1):
out_strMaximumHeightColumn = struct.pack('f', newZ)
bandMetricMaximumHeightColumn.WriteRaster(int(math.floor((X-Xmin)/0.25)), int(math.floor((Ymax-Y)/0.25)), 1, 1, out_strMaximumHeightColumn)
WavePointManagement.pyx
'''this class serializes, rounds and sorts the points of each ndarray'''
import cPickle as pickle
import numpy as np
cimport numpy as np
import math
cdef class WavePointManagement(object):
'''
This class manages all the points extracted from the waveform
'''
cdef object fileObject, wavePointList
__slots__ = ('wavePointList', 'fileObject')
def __cinit__(self):
'''
Constructor
'''
self.fileObject = None
self.wavePointList = np.array([])
cdef object GetWavePointList(self):
return self.wavePointList
cdef void OpenWavePointFileLoad (self, object fileName):
self.fileObject = file(fileName, 'rb')
cdef void LoadWavePointFile (self):
self.wavePointList = None
self.wavePointList = pickle.load(self.fileObject)
cdef void SortWavePointList (self):
self.wavePointList = sorted(self.wavePointList, key=lambda k: (k.x, k.y, k.z))
cdef void RoundCoordinates (self):
cdef int indexPointObject, sizeWavePointList
for pointObject in self.GetWavePointList():
pointObject.SetX(round(math.floor(pointObject.GetX()/0.25)*0.25, 2))
pointObject.SetY(round(math.ceil(pointObject.GetY()/0.25)*0.25, 2))
pointObject.SetZ(round(math.floor(pointObject.GetZ()/0.3)*0.3, 1))
cdef void CloseWavePointFile(self):
self.fileObject.close()
setup.py
from distutils.core import setup
from distutils.extension import Extension
from Cython.Distutils import build_ext
import numpy
ext = Extension("main", ["main.pyx"], include_dirs = [numpy.get_include()])
setup (ext_modules=[ext],
cmdclass = {'build_ext' : build_ext}
)
test_cython.py
'''this is the file I run with eclipse after compiling'''
from main import main
main()
How could I speed up this code?
Your code jumps back and forth between using numpy arrays and lists. As such there is virtually no difference between the code that cython will produce.
The following code produces a python list, and the key function is a pure python function as well.
self.wavePointList = sorted(self.wavePointList, key=lambda k: (k.x, k.y, k.z))
You will want to use ndarray.sort (or numpy.sort if you don't want to sort inplace). To do this you will also need to change how your objects are stored in the array. That is, you will need to use a structured array. See numpy.sort for examples on how to sort structured arrays -- particularly the last two examples on the page.
Once you have your data stored in a numpy array then you need to tell cython about how the data is stored in the array. This includes providing type information and the dimensions of the array. This page provides more information how to work efficiently with numpy arrays.
An example of show to create and sort structured arrays:
import numpy as np
cimport numpy as np
DTYPE = [('name', 'S10'), ('height', np.float64), ('age', np.int32)]
cdef packed struct Person:
char name[10]
np.float64_t height
np.int32_t age
ctypedef Person DTYPE_t
def create_array():
values = [('Arthur', 1.8, 41), ('Lancelot', 1.9, 38),
('Galahad', 1.7, 38)]
return np.array(values, dtype=DTYPE)
cpdef sort_by_age_then_height(np.ndarray[DTYPE_t, ndim=1] arr):
arr.sort(order=['age', 'height'])
Finally, you will need to convert your code from using python methods to using the standard c library methods for a further speed up. Below is an example using RoundCoordinates. ``cpdef` means the function is also exposed to python by a wrapper function.
cimport cython
cimport numpy as np
from libc.math cimport floor, ceil, round
import numpy as np
DTYPE = [('x', np.float64), ('y', np.float64), ('z', np.float64)]
cdef packed struct Point3D:
np.float64_t x, y, z
ctypedef Point3D DTYPE_t
# Caution should be used when turning the bounds check off as it can lead to undefined
# behaviour if you use an invalid index.
#cython.boundscheck(False)
cpdef RoundCoordinates_cy(np.ndarray[DTYPE_t] pointlist):
cdef int i
cdef DTYPE_t point
for i in range(len(pointlist)): # this line is optimised into a c loop
point = pointlist[i] # creates a copy of the point
point.x = round(floor(point.x/0.25)*2.5) / 10
point.y = round(ceil(point.y/0.25)*2.5) / 10
point.z = round(floor(point.z/0.3)*3) / 10
pointlist[i] = point # overwrites the old point data with the new data
Finally, before rewriting your entire code base, you should profile your code to see which functions the program spends most of its time and optimise those functions before bothering about optimising other functions.
Related
I am trying to use a modified version of the RANSAC regressor algorithm, but instead of changing the sklearn's function (to avoid the problems of be using sklearn 0.24.1 and to avoid some warnings from the function), I've found an algorithm in github and was trying to cythonize it to improve speed before I can make my modifications. To my surprise the speed gain was very poor, was it because the code is full of numpy calls or did I make something wrong? The following codes are the python version, cython version (with proper modifications to avoid errors):
##Python version
import numpy as np
def ransac_polyfit(x,y,order, t, n=0.8,k=100,f=0.9):
besterr = np.inf
bestfit = np.array([None])
for kk in range(k):
maybeinliers = np.random.randint(len(x), size=int(n*len(x)))
maybemodel = np.polyfit(x[maybeinliers], y[maybeinliers], order)
alsoinliers = np.abs(np.polyval(maybemodel, x)-y) < t
if sum(alsoinliers) > len(x)*f:
bettermodel = np.polyfit(x[alsoinliers], y[alsoinliers], order)
thiserr = np.sum(np.abs(np.polyval(bettermodel, x[alsoinliers])-y[alsoinliers]))
if thiserr < besterr:
bestfit = bettermodel
besterr = thiserr
return bestfit
##Cython version
cimport cython
cimport numpy as np
import numpy as np
np.import_array()
#cython.boundscheck(False)
#cython.wraparound(False)
cdef ransac_polyfit(np.ndarray[np.npy_int64, ndim=1] x,
np.ndarray[np.npy_int64, ndim=1] y,
np.npy_intp order,
np.npy_float32 t,
np.npy_float32 n=0.8,
np.npy_intp k=100,
np.npy_float32 f=0.9):
cdef np.npy_intp kk, i, ransac_control
cdef np.npy_float64 thiserr, besterr = -1.0
cdef np.ndarray[np.npy_int64, ndim=1] maybeinliers
cdef np.ndarray[np.npy_bool, ndim=1] alsoinliers
cdef np.ndarray[np.npy_float64, ndim=1] bestfit, maybemodel, bettermodel
bestfit = np.zeros(1, dtype = np.float64)
for kk in range(k):
maybeinliers = np.random.randint(len(x), size=int(n*len(x)))
maybemodel = np.polyfit(x[maybeinliers], y[maybeinliers], order)
alsoinliers = np.abs(polyval(maybemodel, x)-y) < t
if sum(alsoinliers) > len(x)*f:
bettermodel = np.polyfit(x[alsoinliers], y[alsoinliers], order)
thiserr = np.sum(np.abs(polyval(bettermodel, x[alsoinliers])-y[alsoinliers]))
if (thiserr < besterr) or (besterr == -1.0):
bestfit = bettermodel
besterr = thiserr
#Since I can't return an empty array, I had to set it as an array with a zero and the ransac_control variable will tell if the function was able or not to find a good model
if (besterr == -1.0):
ransac_control = 0
return ransac_control, bestfit
else:
ransac_control = 1
return ransac_control, bestfit
**PS: Couldn't send the image of the HTML page of the cython code because it's my first question
I'm trying to implement implicit recommender model and have issues with code run time calculating top 5 suggestions to ~11kk users over ~100k items.
I was able to partly solve the problem by numpy with some cython sparkles (in jupyter notebook). Lines with numpy sorting are still using single core:
%%cython -f
# cython: language_level=3
# cython: boundscheck=False
# cython: wraparound=False
# cython: linetrace=True
# cython: binding=True
# distutils: define_macros=CYTHON_TRACE_NOGIL=1
from cython.parallel import parallel, prange
import numpy as np
from tqdm import tqdm
def test(users_items=np.random.rand(11402139//1000, 134751//100)
, int N=5, show_progress=True, int num_threads=1):
# Define User count and loops indexes
cdef int users_c = users_items.shape[0], u, i
# Predefine zero 2-D C-ordered array for recommendations
cdef int[:,::1] users_recs = np.zeros((users_c, N), dtype=np.intc)
for u in tqdm(range(users_c), total=users_c, disable=not show_progress):
# numpy .dot multiplication using multiple cores
scores = np.random.rand(134751//1000, 10).dot(np.random.rand(10))
# numpy partial sort
ids_partial = np.argpartition(scores, -N)[-N:]
ids_top = ids_partial[np.argsort(scores[ids_partial])]
# Fill predefined 2-D array
for i in range(N):
users_recs[u, i] = ids_top[i]
return np.asarray(users_recs)
# Working example
tmp = test()
I profiled it - np.argpartition consumes 60% of function time and uses onde core. I'm trying to make it parallel, cause I have a server with 80 cores. So, I perform .dot operation on a subset of users (uses multiple cores) and plan to fill empty predefined array by numpy sorting results (which use single core) in parallel, but I'm stuck with error from question title:
%%cython -f
# cython: language_level=3
# cython: boundscheck=False
# cython: wraparound=False
# cython: linetrace=True
# cython: binding=True
# distutils: define_macros=CYTHON_TRACE_NOGIL=1
from cython.parallel import parallel, prange
import numpy as np
from tqdm import tqdm
from math import ceil
def test(int N=10, show_progress=True, int num_threads=1):
# Define User and Item count and loops indexes
cdef int users_c = 11402139//1000, items_c = 134751//100, u, i, u_b
# Predefine zero 2-D C-ordered array for recommendations
cdef int[:,::1] users_recs = np.zeros((users_c, N), dtype=np.intc)
# Define memoryview var
cdef float[:,::1] users_items_scores_mv
progress = tqdm(total=users_c, disable=not show_progress)
# For a batch of Users
for u_b in range(5):
# Use .dot operation which use multiple cores
users_items_scores = np.random.rand(num_threads, 10).dot(np.random.rand(134751//100, 10).T)
# Create memory view to 2-D array, which I'm trying to sort row wise
users_items_scores_mv = users_items_scores
# Here it starts, try to use numpy sorting in parallel
for u in prange(num_threads, nogil=True, num_threads=num_threads):
ids_partial = np.argpartition(users_items_scores_mv[u], items_c-N)[items_c-N:]
ids_top = ids_partial[np.argsort(users_items_scores_mv[u][ids_partial])]
# Fill predefined 2-D array
for i in range(N):
users_recs[u_b + u, i] = ids_top[i]
progress.update(num_threads)
progress.close()
return np.asarray(users_recs)
and got this (full error):
Error compiling Cython file:
------------------------------------------------------------
...
# Create memory view to 2-D array,
# which I'm trying to sort row wise
users_items_scores_mv = users_items_scores
# Here it starts, try to use numpy sorting in parallel
for u in prange(num_threads, nogil=True, num_threads=num_threads):
ids_partial = np.argpartition(users_items_scores_mv[u], items_c-N)[items_c-N:]
^
------------------------------------------------------------
/datascc/enn/.cache/ipython/cython/_cython_magic_201b296cd5a34240b4c0c6ed3e58de7c.pyx:31:12: Assignment of Python object not allowed without gil
I read about memory views and malloc-ating but haven't found example applicable to my situation.
I ended up with custom C++ function, that fills numpy array in parallel with nogil via openmp. It required rewriting numpy's argpartition partial sorting with cython. Algorythm is like this (3-4 can be looped):
define empty array A[i,j] and memory view B_mv[i,k]; where "i" is batch size, "j" some columns and "k" number of desired items to be returned after sorting
create pointers on A&B's memory
run some calculations and fill A with data
iterate in parallel over i-s and fill B
transform result into readable form
Solution consists of:
topnc.h - header of custom function implementation:
/* "Copyright [2019] <Tych0n>" [legal/copyright] */
#ifndef IMPLICIT_TOPNC_H_
#define IMPLICIT_TOPNC_H_
extern void fargsort_c(float A[], int n_row, int m_row, int m_cols, int ktop, int B[]);
#endif // IMPLICIT_TOPNC_H_
topnc.cpp - body of the function:
#include <vector>
#include <limits>
#include <algorithm>
#include <iostream>
#include "topnc.h"
struct target {int index; float value;};
bool targets_compare(target t_i, target t_j) { return (t_i.value > t_j.value); }
void fargsort_c ( float A[], int n_row, int m_row, int m_cols, int ktop, int B[] ) {
std::vector<target> targets;
for ( int j = 0; j < m_cols; j++ ) {
target c;
c.index = j;
c.value = A[(n_row*m_cols) + j];
targets.push_back(c);
}
std::partial_sort( targets.begin(), targets.begin() + ktop, targets.end(), targets_compare );
std::sort( targets.begin(), targets.begin() + ktop, targets_compare );
for ( int j = 0; j < ktop; j++ ) {
B[(m_row*ktop) + j] = targets[j].index;
}
}
ctools.pyx - example usage
# distutils: language = c++
# cython: language_level=3
# cython: boundscheck=False
# cython: wraparound=False
# cython: nonecheck=False
from cython.parallel import parallel, prange
import numpy as np
cimport numpy as np
cdef extern from "topnc.h":
cdef void fargsort_c ( float A[], int n_row, int m_row, int m_cols, int ktop, int B[] ) nogil
A = np.zeros((1000, 100), dtype=np.float32)
A[:] = np.random.rand(1000, 100).astype(np.float32)
cdef:
float[:,::1] A_mv = A
float* A_mv_p = &A_mv[0,0]
int[:,::1] B_mv = np.zeros((1000, 5), dtype=np.intc)
int* B_mv_p = &B_mv[0,0]
int i
for i in prange(1000, nogil=True, num_threads=10, schedule='dynamic'):
fargsort_c(A_mv_p, i, i, 100, 5, B_mv_p)
B = np.asarray(B_mv)
compile.py - compile file; run it by command "python compile.py build_ext --inplace -f" in terminal (this will result in file ctools.cpython-*.so, which you then use for import):
from os import path
import numpy
from setuptools import setup, Extension
from Cython.Distutils import build_ext
from Cython.Build import cythonize
ext_utils = Extension(
'ctools'
, sources=['ctools.pyx', 'topnc.cpp']
, include_dirs=[numpy.get_include()]
, extra_compile_args=['-std=c++0x', '-Os', '-fopenmp']
, extra_link_args=['-fopenmp']
, language='c++'
)
setup(
name='ctools',
setup_requires=[
'setuptools>=18.0'
, 'cython'
, 'numpy'
]
, cmdclass={'build_ext': build_ext}
, ext_modules=cythonize([ext_utils]),
)
It was used for adding "recommend all" functionality into implicit ALS model.
In my code, I am trying to define a dynamic array with changing number of the rows and columns, depends on the new conditions inside the function, meaning I might add more rows or columns. I tried to make two dimensional pointer arrays and I want to be able to pass this 2-D pointer array as an argument to a function.
This is the small part of my code:
Update: test.pyx
from libc.string cimport memset
import numpy as np
cimport numpy as np
cimport cython
from cython.view cimport array as cvarray
from libc.stdlib cimport malloc, free
from libc.math cimport log, exp
from cython_gsl cimport *
import ctypes
cdef gsl_rng *r = gsl_rng_alloc(gsl_rng_mt19937)
cdef int** zeros2(dim):
assert len(dim) == 2
cdef int i
cdef int **matrix
matrix = <int**> malloc(sizeof(int*) * dim[0])
for i from 0 <= i < dim[0]:
matrix[i] = <int*> malloc(sizeof(int) * dim[1])
memset(matrix[i], 0, sizeof(int) * dim[1])
return matrix
#cython.cdivision(True)
#cython.wraparound(False)
#cython.boundscheck(False)
cdef void generator(double* alpha,int* D, double* m):
cdef Py_ssize_t i
for i from 0 <= i < D[0]:
m[i]=gsl_ran_beta(r, alpha[0], 1)
return
#cython.cdivision(True)
#cython.boundscheck(False)
#cython.wraparound(False)
cdef void initializer(double* alpha, int* D, int* N, double* m, int** Z ):
cdef int i, j
generator(alpha, D, &m[0])
for i from 0 <= i < D[0]:
for j from 0 <= j < N[0]:
Z[j][i]= gsl_ran_bernoulli(r, m[i])
print Z[j][i]
return
def run(int n, int d, double alpha):
cdef np.ndarray[double, ndim=1, mode='c'] mu=np.empty((d,), dtype=ctypes.c_double)
cdef int **Z = zeros2((n, d))
initializer(&alpha, &d, &n, &mu[0], <int **>(&Z[0][0]) )
setup.py
from distutils.core import setup, Extension
from Cython.Build import cythonize
from numpy import get_include
import numpy
import cython_gsl
from Cython.Distutils import build_ext
ext_modules = [
Extension(
"test",
["test.pyx"],
libraries=cython_gsl.get_libraries(),
library_dirs=[cython_gsl.get_library_dir()],
include_dirs=[numpy.get_include(), cython_gsl.get_include()])
]
ext_modules = cythonize(ext_modules)
setup(
name='test',
ext_modules=ext_modules,
cmdclass={'build_ext': build_ext})
Update:
The code gets compiled but when I import the run function in python I get this error:
>>> import test
>>> test.run( 10, 4,0.9)
Segmentation fault (core dumped)
I am not sure the 2-dimensional array that I defined is the best approach to solve my problem of defining a dynamical array and what is the reason I got this error?
Any suggestions would be most welcome.
Your immediate problem is that:
<int **>(&Z[0][0])
takes the address of the first element of the first row and casts it to an int**. It's actually an int* (because it's the address of an int). Therefore the memory that initializer writes to is nonsense and you get a segmentation fault. Casts are often an indication that you're doing something wrong.
You just need to pass Z which is already an int**.
The problem is that n, d and alpha are Python variables, so &n is not something you can do. You can change run to cdef function, or maybe create a temporary version:
cdef int _n = n;
and then pass &_n
However, based on your code, what's the point of passing a pointer to these three variables, anyway? You don't modify them. You can simply pass them without a pointer.
I'm trying to write two Cython functions to wrap external functions. The functions are the inverse of each another; one accepts a string, and returns a struct with two fields: a void pointer to a 2D array (the second dimension is always two elements: [[1.0, 2.0], [3.0, 4.0], [5.0, 6.0], … ]), and the array's length. The other accepts the same struct, and returns a string. So far, I've got the following. It compiles, but the cast to and from the nested list is definitely incorrect.
My .pxd:
cdef extern from "header.h":
struct _FFIArray:
void* data
size_t len
cdef _FFIArray decode_polyline_ffi(char* polyline, int precision);
cdef char* encode_coordinates_ffi(_FFIArray, int precision);
cdef void drop_float_array(_FFIArray coords);
cdef void drop_cstring(char* polyline)
My .pyx:
import numpy as np
from pypolyline_p cimport (
_FFIArray,
decode_polyline_ffi,
encode_coordinates_ffi,
drop_float_array,
drop_cstring
)
def encode_coordinates(coords, int precision):
""" coords looks like [[1.0, 2.0], [3.0, 4.0], …] """
cdef double[::1] ncoords = np.array(coords, dtype=np.float64)
cdef _FFIArray coords_ffi
# Wrong
coords_ffi.data = <void*>&ncoords[0]
# Wrong
coords_ffi.len = ncoords.shape[0]
cdef char* result = encode_coordinates_ffi(coords_ffi, precision)
cdef bytes polyline = result
drop_cstring(result)
return polyline
def decode_polyline(bytes polyline, int precision):
cdef char* to_send = polyline
cdef _FFIArray result = decode_polyline_ffi(to_send, precision)
# Wrong
cdef double* incoming_ptr = <double*>(result.data)
# Wrong
cdef double[::1] view = <double[:result.len:1]>incoming_ptr
coords = np.copy(view)
drop_float_array(result)
return coords
I think the issue is that you're trying to use 2D arrays and 1D memoryviews
In the encoding function
# the coords are a 2D, C contiguous array
cdef double[:,::1] ncoords = np.array(coords, dtype=np.float64)
# ...
coords_ffi.data = <void*>&ncoords[0,0] # take the 0,0 element
# the rest stays the same
In the decoding function
# specify it as a 2D, len by 2, C contiguous array
cdef double[:,::1] view = <double[:result.len,:2:1]>incoming_ptr
# the rest stays the same
(It's possible that your FFI functions expect Fortran contiguous arrays. In which case the ::1 goes on the first dimension of the memoryview, and you also change incoming_ptr)
cpdef myf():
# pd has to be a c array.
# Because it will then be consumed by some c function.
cdef double pd[8000]
# Do something with pd
...
# Get a memoryview.
cdef double[:] pd_view = pd
# Coercion the memoryview to numpy array. Not working.
ret = np.asarray(pd)
return ret
I would like it to return a numpy array. How can I do it?
For the moment I have to do
pd_np = np.zeros(8000, dtype=np.double)
cdef int i
for i in range(8000):
pd_np[i] = pd[i]
If you are just declaring your array in your function why not make it a numpy array to begin with, then when you need the c array you can just grab the data pointer.
cimport numpy as np
import numpy as np
def myf():
cdef np.ndarray[double, ndim=1, mode="c"] pd_numpy = np.empty(8000)
cdef double *pd = &pd_numpy[0]
# Do something to fill pd with values
for i in range(8000):
pd[i] = i
return pd_numpy
I made a typo,
ret = np.asarray(pd_view) works
In the memview example here http://docs.cython.org/src/userguide/memoryviews.html
# Memoryview on a C array
cdef int carr[3][3][3]
cdef int [:, :, :] carr_view = carr
carr_view[...] = narr_view # np.arange(27, dtype=np.dtype("i")).reshape((3, 3, 3))
carr_view[0, 0, 0] = 100
I can create a numpy array from carr_view, the memory view on carr, a C array.
# print np.array(carr) # cython error
print 'numpy array on carr_view'
print np.array(carr_view)
print np.array(carr_view).sum() # match sum3d(carr)
# or np.asarray(carr_view)
print 'numpy copy from carr_view'
carr_copy = np.empty((3,3,3))
carr_copy[...] = carr_view[...] # don't need indexed copy
print carr_copy
print carr_copy.sum() # match sum3d(carr)