The goal is to create a numpy array in c++ and access it in python.
The block of code below runs fine when run as a program.
However, if I use ctypes and run the function, it segfaults on _import_array call. Could someone tell me why this happens ?
#include "Python.h"
#include "numpy/arrayobject.h"
using namespace std;
extern "C"
PyObject* test_create_numpy() {
Py_Initialize();
_import_array();
double* a = new double[1];
a[0] = 1.0;
npy_intp dims = {1};
PyObject *Arr = PyArray_SimpleNewFromData( 1, &dims, PyArray_FLOAT64, a);
return Arr;
}
int main() {
test_create_numpy();
return 0;
}
The python code used:
utils = cdll.LoadLibrary("test.dylib")
test_create_numpy = utils.test_create_numpy
test_create_numpy.restype = py_object
ret_vec = test_create_numpy()
print ret_vec
Related
I try to pass vector/array by reference from python through pybind11 to a C++ library. The C++ library may fill in data. After the call to C++, I hope the python side will get the data.
Here is the simplified C++ code:
#include <pybind11/pybind11.h>
#include <pybind11/numpy.h>
#include <pybind11/stl.h>
class Setup
{
public:
Setup(int version) : _version(version) {}
int _version;
};
class Calculator
{
public:
Calculator() {}
static void calc(const Setup& setup, std::vector<double>& results) { ... }
}
namespace py = pybind11;
PYBIND11_MODULE(one_calculator, m) {
// optional module docstring
m.doc() = "pybind11 one_calculator plugin";
py::class_<Setup>(m, "Setup")
.def(py::init<int>());
py::class_<Calculator>(m, "Calculator")
.def(py::init<>())
.def("calc", &Calculator::calc);
}
On the python side, I intend to:
import os
import sys
import numpy as np
import pandas as pd
sys.path.append(os.path.realpath('...'))
from one_calculator import Setup, Calculator
a_setup = Setup(1)
a_calculator = Calculator()
results = []
a_calculator.calc(a_setup, results)
results
Apparently the results are not passed back. Is there a neat way to do it?
Figured out a way:
#include <pybind11/pybind11.h>
#include <pybind11/numpy.h>
#include <pybind11/stl.h>
#include "Calculator.h" // where run_calculator is
namespace py = pybind11;
// wrap c++ function with Numpy array IO
int wrapper(const std::string& input_file, py::array_t<double>& in_results) {
if (in_results.ndim() != 2)
throw std::runtime_error("Results should be a 2-D Numpy array");
auto buf = in_results.request();
double* ptr = (double*)buf.ptr;
size_t N = in_results.shape()[0];
size_t M = in_results.shape()[1];
std::vector<std::vector<double> > results;
run_calculator(input_file, results);
size_t pos = 0;
for (size_t i = 0; i < results.size(); i++) {
const std::vector<double>& line_data = results[i];
for (size_t j = 0; j < line_data.size(); j++) {
ptr[pos] = line_data[j];
pos++;
}
}
}
PYBIND11_MODULE(calculator, m) {
// optional module docstring
m.doc() = "pybind11 calculator plugin";
m.def("run_calculator", &wrapper, "Run the calculator");
}
Python side
results= np.zeros((N, M))
run_calculator(input_file, results)
This way I also do not expose classes Setup and Calculator to the python side.
I am trying to call a C function form my python code, the C function is the following:
#include <stdio.h>
#include <stddef.h>
#include "picohttpparser.c"
#include "picohttpparser.h"
const char *path;
char buf[4096];
int pret, minor_version, i;
struct phr_header headers[100];
size_t buflen = 0, prevbuflen = 0, method_len, path_len, num_headers;
ssize_t rret;
void parse_http_request(char *request, const char *method) {
/* parse the request */
strncpy(buf, request, 4096);
num_headers = sizeof(headers) / sizeof(headers[0]);
phr_parse_request(buf, sizeof(buf) -1, &method, &method_len, &path, &path_len, &minor_version, headers, &num_headers, 0);
}
In python I am using ctypes:
import ctypes
_cparser = ctypes.CDLL('/tmp/serv.so')
_cparser.parse_http_request.argtypes = (ctypes.c_char_p, ctypes.POINTER(ctypes.c_char_p))
def parse(request):
method = ctypes.c_char_p()
_cparser.parse_http_request(ctypes.c_char_p(request.encode()), ctypes.byref(method))
print(method)
method is always None, but if I compile the C code, it prints out correctly the method variable. What am I missing here?
I have a large array in a file that I cannot modify but need to access in C++. I need to iterate through the array and return a value to a python script.
Problem: I am able to find the element in the array by it's name. When I try to return it's value and section elements to the python wrapper they return 0 or -1 no matter what their actual value is. Here is an example of the first element in the array from external_lists.h :
info_struct A1[] = {
{ "LIMITING", {{0x00, 0x02, 0xFF}}
.........
}
This array has thousands of similar elements. When I run the following code I get 0x or -0x1. Valuedll.cpp:
#include <iostream>
#include <string>
#include <array>
#include "external_lists.h"
extern info_struct A1[];
extern info_struct A2[];
extern int A1size;
extern int A2size;
#define DLLEXPORT extern "C" __declspec(dllexport)
DLLEXPORT int get_creation_data(const char* needed_name){
int A1_size = ( A1size/ sizeof(A1[0])) ;
int A2_size = ( A2size / sizeof(A2[0])) ;
for (int i = 0; i < A1_size; i++) {
if (A1[i].name == needed_name) {
return A1[i].style->value;
}
}
return -1;
}
The type info_struct is made from the following struct:
struct info_struct{
const char* name;
style_length style[MAX_SIZE];
options_length options[MAX_SIZE];
}
I need to get the values that are inside the style array. The style_length struct is the following:
struct style_length{
uint16_t value, section;
option_bits obits;
}
My python wrapper is the following:
import os, sys, re
from ctypes import *
import ctypes as ct
def get_creation_values(value_name):
valuell = CDLL('C:\\Documents\\creation.dll')
valuell.get_section_data.argtypes = [c_char_p]
valuell.get_section_data.restype = ct.c_int16
return hex(valuell.get_creation_data(value_name))
if __name__ == "__main__":
val = get_creation_values('LIMITING')
print(val)
Output:
(-0x1)
Thanks in advance for any help. If this is too much, thank you for reading. I will try to clarify.
I'm trying to link Fortran|C/C++|Python using VS2010 on Windows 64-bit. I have a main code that is written in Fortran. From that code I call a C++ function and after that I call a Python function to do some staff further. Here is a simplified version of my code:
!Fmain.f90
PROGRAM fmain
IMPLICIT NONE
INTERFACE
SUBROUTINE Call_NN(input_array, output_array) BIND(C, name='Call_NN')
USE, INTRINSIC :: iso_c_binding
IMPLICIT NONE
REAL(C_DOUBLE), INTENT(IN), DIMENSION(*) :: input_array
REAL(C_DOUBLE), INTENT(INOUT), DIMENSION(*) :: output_array
END SUBROUTINE
END INTERFACE
REAL*8, DIMENSION(0:2) :: input_array, output_array
REAL :: b
INTEGER :: i
do i = 1, 3
input_array = 0.01d0
output_array = 0.d0
call Call_NN(input_array, output_array)
enddo
END
The C++ static library is as following:
//Csub.cpp
#include <Python.h>
#include <cassert>
#include <stdio.h>
#include <conio.h>
#include <iostream>
#include <fstream>
#include <array>
#include <string.h>
#include <errno.h>
#include <limits.h>
#include <assert.h>
#include <stdlib.h>
using namespace std;
extern "C" void Call_NN(array<double,3> input_array, array<double,3> output_array)
{
// Initialize the Python interpreter.
Py_Initialize();
// Variables Declaration
float result = 0;
float result1 = 0;
float result2 = 0;
float result3 = 0;
float value1 = 0;
float value2 = 0;
float value3 = 0;
// Create some Python objects that will later be assigned values.
PyObject *pName, *pModule, *pDict, *pFunc, *pArgs, *pValue1, *pValue2, *pValue3;
PyObject *pT1, *pT2, *pT3;
// Convert the file name to a Python string.
const char* filename = "module";
pName = PyString_FromString(filename);
if (pName == nullptr)
{
PyErr_Print();
std::exit(1);
}
// Import the file as a Python module.
pModule = PyImport_Import(pName);
if (pModule == nullptr)
{
PyErr_Print();
std::exit(1);
}
// Create a dictionary for the contents of the module.
pDict = PyModule_GetDict(pModule);
if (pDict == nullptr)
{
PyErr_Print();
std::exit(1);
}
// Get the add method from the dictionary.
pFunc = PyDict_GetItemString(pDict, "NN");
if (pFunc == nullptr)
{
PyErr_Print();
std::exit(1);
}
// Create a Python tuple to hold the arguments to the method.
pArgs = PyTuple_New(3);
if (pArgs == nullptr)
{
PyErr_Print();
std::exit(1);
}
// Convert 3 to a Python integer.
value1 = input_array[0];
value2 = input_array[1];
value3 = input_array[2];
pValue1 = PyFloat_FromDouble(value1);
pValue2 = PyFloat_FromDouble(value2);
pValue3 = PyFloat_FromDouble(value3);
// Set the Python int as the first and second arguments to the method.
PyTuple_SetItem(pArgs, 0, pValue1);
PyTuple_SetItem(pArgs, 1, pValue2);
PyTuple_SetItem(pArgs, 2, pValue3);
// Call the function with the arguments.
PyObject* pResult = PyObject_CallObject(pFunc, pArgs);
// Print a message if calling the method failed.
if (pResult == NULL)
printf("Calling the add method failed.\n");
// Convert the result to a long from a Python object.
//result = PyFloat_AsDouble(pResult);
pT1 = PyTuple_GetItem(pResult, 0);
pT2 = PyTuple_GetItem(pResult, 1);
pT3 = PyTuple_GetItem(pResult, 2);
// Convert output to float
result1 = PyFloat_AsDouble(pT1);
result2 = PyFloat_AsDouble(pT2);
result3 = PyFloat_AsDouble(pT3);
output_array[0] = result1;
output_array[1] = result2;
output_array[2] = result3;
// Destroy the Python interpreter.
Py_Finalize();
}
And the Python module is defined as:
# module.py
import sys
import numpy as np
import pandas as pd
import pickle
from sklearn.externals import joblib
def NN(a,b,c,):
X_array = np.array([a, b, c])
X = pd.DataFrame(X_array).transpose()
clf = joblib.load('SavedNeuralNetwork.pkl')
y = clf.predict(X)
y = pd.DataFrame(y).transpose()
y_array = pd.DataFrame.as_matrix(y)
i = y_array.item(0)
j = y_array.item(1)
k = y_array.item(2)
output = (i, j, k)
return i, j, k
At the first iteration the program works fine, but at the second iteration I get an unhandled exception at line:
pModule = PyImport_Import(pName);
And in particular:
Unhandled exception at 0x00000001800db3ec in Fmain.exe: 0xC0000005: Access
violation writing location 0x0000000000000002.
Why is this happening? I tried to reload the python module after the first iteration, but the same thing happened. Any suggestions or other comments on this are much appreciated.
I have implemented a Python extension in C and found that executing a C function inside of Python to be 2x faster than just executing the C code from a C main.
But why is this faster? I would expect the plain C to be exactly the same performance when called from Python as it is when called from C.
Here is my experiment:
Plain C compute code (simple 3for matrix-matrix multiplication)
Plain C main function that calls the mmult() function
Python extension wrapper to call the mmult() function
All timing is happening entirely within the C code
Here are my results:
Pure C - 85us
Python Extension - 36us
Heres my code:
--mmult.cpp----------
#include "mmult.h"
void mmult(int32_t a[1024],int32_t b[1024],int32_t c[1024]) {
struct timeval t1, t2;
gettimeofday(&t1, NULL);
for(int i=0; i<32; i=i+1) {
for(int j=0; j<32; j=j+1) {
int32_t result=0;
for(int k=0; k<32; k=k+1) {
result+=a[i*32+k]*b[k*32+j];
}
c[i*32+j] = result;
}
}
gettimeofday(&t2, NULL);
double elapsedTime = (t2.tv_usec - t1.tv_usec) + (t2.tv_sec - t1.tv_sec)*1000000;
printf("elapsed time: %fus\n",elapsedTime);
}
--mmult.h-------
#include <stdint.h>
void mmult(int32_t a[1024],int32_t b[1024],int32_t c[1024]);
--main.cpp------
#include <stdio.h>
#include <stdlib.h>
#include <sys/time.h>
#include "mmult.h"
int main() {
int* a = (int*)malloc(sizeof(int)*1024);
int* b = (int*)malloc(sizeof(int)*1024);
int* c = (int*)malloc(sizeof(int)*1024);
for(int i=0; i<1024; i++) {
a[i]=i+1;
b[i]=i+1;
c[i]=0;
}
struct timeval t1, t2;
gettimeofday(&t1, NULL);
mmult(a,b,c);
gettimeofday(&t2, NULL);
double elapsedTime = (t2.tv_usec - t1.tv_usec) + (t2.tv_sec - t1.tv_sec)*1000000;
printf("elapsed time: %fus\n",elapsedTime);
free(a);
free(b);
free(c);
return 0;
}
Heres how I compile main:
gcc -o main main.cpp mmult.cpp -O3
--wrapper.cpp-----
#include <Python.h>
#include <numpy/arrayobject.h>
#include "mmult.h"
static PyObject* mmult_wrapper(PyObject* self, PyObject* args) {
int32_t* a;
PyArrayObject* a_obj = NULL;
int32_t* b;
PyArrayObject* b_obj = NULL;
int32_t* c;
PyArrayObject* c_obj = NULL;
int res = PyArg_ParseTuple(args, "OOO", &a_obj, &b_obj, &c_obj);
if (!res)
return NULL;
a = (int32_t*) PyArray_DATA(a_obj);
b = (int32_t*) PyArray_DATA(b_obj);
c = (int32_t*) PyArray_DATA(c_obj);
/* call function */
mmult(a,b,c);
Py_RETURN_NONE;
}
/* define functions in module */
static PyMethodDef TheMethods[] = {
{"mmult_wrapper", mmult_wrapper, METH_VARARGS, "your c function"},
{NULL, NULL, 0, NULL}
};
static struct PyModuleDef cModPyDem = {
PyModuleDef_HEAD_INIT,
"mmult", "Some documentation",
-1,
TheMethods
};
PyMODINIT_FUNC
PyInit_c_module(void) {
PyObject* retval = PyModule_Create(&cModPyDem);
import_array();
return retval;
}
--setup.py-----
import os
import numpy
from distutils.core import setup, Extension
cur = os.path.dirname(os.path.realpath(__file__))
c_module = Extension("c_module", sources=["wrapper.cpp","mmult.cpp"],include_dirs=[cur,numpy.get_include()])
setup(ext_modules=[c_module])
--code.py-----
import c_module
import time
import numpy as np
if __name__ == "__main__":
a = np.ndarray((32,32),dtype='int32',buffer=np.linspace(1,1024,1024,dtype='int32').reshape(32,32))
b = np.ndarray((32,32),dtype='int32',buffer=np.linspace(1,1024,1024,dtype='int32').reshape(32,32))
c = np.ndarray((32,32),dtype='int32',buffer=np.zeros((32,32),dtype='int32'))
c_module.mmult_wrapper(a,b,c)
Heres how I compile the Python extension:
python3.6 setup_sw.py build_ext --inplace
UPDATE
Ive updated the mmult.cpp code to run the 3for for 1,000,000 iterations internally. This resulted in very similar times:
Pure C - 27us
Python Extension - 27us
85 microseconds is too small a delay to be measured reliably and repeatedly. For example, CPU cache effects (or context switches, or paging) may dominate the computation time (and alter it to make that timing meaningless).
(I guess you are on Linux/x86-64)
As a rule of thumb, try to have a run lasting about half a second at least, and repeat the benchmarking a few times. You could also use time(1) for measurements.
See also time(7). There are several notions of time (elapsed "real" time, monotonic time, process cpu time, thread cpu time, etc...). You could consider using clock(3) or clock_gettime(2) to measure time.
BTW, you might compile with a more recent version of GCC (in November 2017, GCC7 and in a few weeks GCC8) and you want to compile with gcc -march=native -O3 for benchmarking purposes. Try also other optimization options and tuning. You could also try another compiler, e.g. Clang/LLVM.
Look also at this answer (regarding parallelization) to a relevant question. Probably the numpy package is using (internally) similar techniques (outside of the Python GIL), so could be faster than your naive sequential matrix multiplication code in C.