I'm new to python. Can anyone help me to understand the call by reference in python.
#include <stdio.h>
#include <conio.h>
#include <malloc.h>
void rd(float *a, int *n)
{
int i;
for (i=1;i<= *n;i++) {
printf("Enter element %d: ",
i); scanf("%f", &a[i]);
}
}
float sum(float *a, int *n)
{
int i; float s=0;
for (i=1 ; i <= *n ; i++) s = s +
a[i]; return s;
}
int main(void)
{
int size; float *x, g;
printf("Give size of array: "); scanf("%d", &size);
x = (float *)malloc(size*sizeof(float)); // dynamic memory allocation
printf("\n");
rd(x, &size); // passing the addresses
g = sum(x, &size); // passing the addresses
printf("\nSum of elements = %f\n", g);
printf("\nDONE ! Hit any key ...");
getch(); return 0;
}
This is C example i trying to solve in python. Any help would be appreciated.
In python there is no way to pass "the address" of a "place" (a variable, an array element, a dictionary value or an instance member).
The only way to provide other code the ability to change a place is to provide a "path" to reach it (e.g. the variable name, the array and the index and so on). As a very strange alternative (not used often in Python) you can pass a "writer" function that will change the place... for example:
def func(a, b, placeWriter):
placeWriter(a + b)
def caller():
mylist = [1, 2, 3, 4]
def writer(x):
mylist[3] = x
func(10, 20, writer)
Much more common instead is writing functions that simply return the needed values; note that in Python returning multiple values is trivial while in C this is not supported and passing addresses is used instead:
def func(): # void f(int *a, int *b, int *c) {
return 1, 2, 3 # *a=1; *b=2; *c=3;
# }
def caller(): # void caller() { int a, b, c;
a, b, c = func() # func(&a, &b, &c);
...
Related
I'm trying to coerce the values in my NumPy array to float. However, in my array, there might be some values which might not coerce successfully and I want to replace those values with a default value. I do want the speed of NumPy though. I do not want to do the python loop. What's the best route to achieve this behavior?
For instance:
import numpy as np
my_array = np.array(["1", "2", "3", "NA"])
new_array = magic_coerce(my_array, float, -1.0) # I want to implement this
print (new_array) # should print [1., 2., 3. -1.]
I'm trying to write my own ufunc in c, and I have the following:
int is_float(const char* c)
{
regex_t regex;
regcomp(®ex, "^[+-]?([0-9]*[.])?[0-9]+$", REG_EXTENDED);
return regexec(®ex, c, 0, NULL, 0) == 0;
}
float to_float(const char *c, float default_value)
{
float result = default_value;
if (is_float(c))
{
result = atof(c);
}
return result;
}
static PyMethodDef LogitMethods[] = {
{NULL, NULL, 0, NULL}
};
/* The loop definition must precede the PyMODINIT_FUNC. */
static void double_logitprod(char **args, npy_intp *dimensions,
npy_intp* steps, void* data)
{
npy_intp i;
npy_intp n = dimensions[0];
char *in1 = args[0], *in2 = args[1];
char *out = args[2];
npy_intp in1_step = steps[0];
npy_intp out_step = steps[2];
double tmp;
for (i = 0; i < n; i++) {
/*BEGIN main ufunc computation*/
char *tmp1 = (char *) in1;
tmp = *((double *)in2);
*((double *) out) = to_float(tmp1, tmp);
/*END main ufunc computation*/
in1 += in1_step;
out += out_step;
}
}
/*This a pointer to the above function*/
PyUFuncGenericFunction funcs[1] = {&double_logitprod};
/* These are the input and return dtypes of logit.*/
static char types[3] = {NPY_OBJECT, NPY_DOUBLE,
NPY_DOUBLE};
But it looks like it's not working correctly. What's the type for UNICODE in numpy? NPY_UNICODE gives an error, so I coerced it to NPY_OBJECT, but this does not seem to play with it.
This is the code in Python that generates log-spaces values at a very quick time:
import numpy
print(numpy.logspace(0,1,num=10000000))
My try to simulate its output in C++, is the following:
#include <iostream>
#include <cmath>
#include <vector>
std::vector<double> logspace (const double &a, const double &b, const int &k)
{
std::vector<double> logspace;
for (int i = 0; i < k; i++)
{
logspace.push_back(pow(10, i * (b - a) / (k - 1)));
}
return logspace;
}
void logspace_print (std::vector<double> logspace)
{
for (auto ls : logspace)
{
std::cout << ls << "\n";
}
std::cout << "\n";
}
int main ()
{
std::vector<double> my_ls = logspace(0, 1, 10000000);
logspace_print(my_ls);
}
Waiver of floating-points arithmetic, using the function pow(., .) and a for-loop (and maybe lots of other reasons), makes my code as a naive one such as its run-time is hugely faint with respect to the Python's one. I saw recommendations at Is there something like numpy.logspace in C++? also. But, there is not mentionable significant difference. So, how can I modify my code or write a new one comparable with python's version?
Interesting question! My answer has the different versions of the functions at the top. Below is only the benchmarking code. Use google-benchmark as the library.
My intermediate result can also be found here: 1 Quick-Bench.com is generally a great site.
You don't say if you want to measure printing to stdout as part of your use-case or not. Printing is generally expensive. You avoid std::endl's flush, which is good! Furthermore, printf might be faster than std::cout. Also take a look at fmtlib 2. It is fast and easy to use.
Generally, the approach that Numpy uses is fastest. (Named logspace_v3 in my version.) It consists of first running linspace and then taking to the power of 10 in-place.
Still, I strongly feel that I am missing quite a bit here. With the appropriate flags (-march=native -mtune=native, and fast-math) vectorization should kick in. But I don't believe it does. Here is some Godbolt with vectorization (Line 590) 3.
What was fasted was getting rid of the pow call. Note that this accumulates floating point error and leads to inaccurate results.
Minor: There is no benefit of passing doubles or ints by const-reference.
#include <algorithm>
#include <benchmark/benchmark.h>
#include <cmath>
#include <iostream>
#include <numeric>
#include <vector>
#include <gtest/gtest.h>
std::vector<double> logspace(double a, double b, int k) {
std::vector<double> logspace;
for (int i = 0; i < k; i++) {
logspace.push_back(pow(10, i * (b - a) / (k - 1)));
}
return logspace;
}
// Pre-allocate the correct size using .reserve()
std::vector<double> logspace_v1(double a, double b, int k) {
std::vector<double> logspace;
logspace.reserve(k);
for (int i = 0; i < k; i++) {
logspace.push_back(pow(10, i * (b - a) / (k - 1)));
}
return logspace;
}
/// Manually extract the constant factor.
std::vector<double> logspace_v2(double a, double b, int k) {
std::vector<double> logspace;
logspace.reserve(k);
const auto exp_scale = (b - a) / (k - 1);
for (int i = 0; i < k; i++) {
logspace.push_back(pow(10, i * exp_scale));
}
return logspace;
}
/// Copy the impl behavior of numpy.linspace: First linspace then power.
std::vector<double> logspace_v3(double a, double b, int k) {
/*
y = linspace(start, stop, num=num, endpoint=endpoint, axis=axis)
if dtype is None:
return _nx.power(base, y)
return _nx.power(base, y).astype(dtype, copy=False)
*/
const auto exp_scale = (b - a) / (k - 1);
std::vector<double> logspace;
logspace.reserve(k);
for (int i = 0; i < k; i++) {
logspace.push_back(i * exp_scale);
}
std::for_each(logspace.begin(), logspace.end(),
[](double &x) { x = pow(10, x); });
return logspace;
}
/// Improve on v3 by applying pow directly
std::vector<double> logspace_v4(double a, double b, int k) {
const auto exp_scale = (b - a) / (k - 1);
std::vector<double> logspace(k, 0.);
std::generate(logspace.begin(), logspace.end(),
[n = -1, exp_scale]() mutable {
n++;
return pow(10, n * exp_scale);
});
return logspace;
}
/// Use generate_n : First linspace then power.
std::vector<double> logspace_v5(double a, double b, int k) {
const auto exp_scale = (b - a) / (k - 1);
std::vector<double> logspace(k, 0.);
std::iota(logspace.begin(), logspace.end(), 0);
std::for_each(logspace.begin(), logspace.end(),
[exp_scale](double &x) { x *= exp_scale; });
std::for_each(logspace.begin(), logspace.end(),
[](double &x) { x = pow(10, x); });
return logspace;
}
std::vector<double> logspace_v6(double a, double b, int k) {
const auto exp_scale = (b - a) / (k - 1);
const auto factor = pow(10, exp_scale);
std::vector<double> logspace;
logspace.reserve(k);
// val = pow(b, i * exp_scale);
// = pow(pow(b, exp_scale), i);
// = pow(f, i); with f := pow(b, exp_scale);
// next = cur * f;
// first = pow(b, a);
double val = pow(10, a);
for (int i = 0; i < k; i++) {
logspace.push_back(val);
val *= factor;
}
return logspace;
}
template <std::vector<double> (*F)(double, double, int)>
static void LogspaceBench(benchmark::State &state) {
for (auto _ : state) {
benchmark::DoNotOptimize(F(0, 1, state.range(0)));
}
}
BENCHMARK_TEMPLATE(LogspaceBench, logspace)->Arg(1000);
BENCHMARK_TEMPLATE(LogspaceBench, logspace_v1)->Arg(1000);
BENCHMARK_TEMPLATE(LogspaceBench, logspace_v2)->Arg(1000);
BENCHMARK_TEMPLATE(LogspaceBench, logspace_v3)->Arg(1000)->Arg(10000000);
BENCHMARK_TEMPLATE(LogspaceBench, logspace_v4)->Arg(1000);
BENCHMARK_TEMPLATE(LogspaceBench, logspace_v5)->Arg(1000);
BENCHMARK_TEMPLATE(LogspaceBench, logspace_v6)->Arg(1000)->Arg(10000000);
class LogspaceTest
: public testing::TestWithParam<
std::function<std::vector<double>(double, double, int)>> {};
TEST_P(LogspaceTest, IsSame) {
auto func = GetParam();
const auto actual = func(0, 1., 1000);
const auto expected = logspace(0., 1., 1000);
// TODO: Buggy with (3, 70, 1000) and (0, 1, 1000)
ASSERT_EQ(expected.size(), actual.size());
for (int i = 0; i < expected.size(); i++) {
ASSERT_DOUBLE_EQ(actual[i], expected[i]) << i;
}
}
INSTANTIATE_TEST_SUITE_P(InstantiationName, LogspaceTest,
testing::Values(logspace, logspace_v1, logspace_v2,
logspace_v3, logspace_v4, logspace_v5,
logspace_v6));
int main(int argc, char **argv) {
::benchmark::Initialize(&argc, argv);
::benchmark::RunSpecifiedBenchmarks();
::testing::InitGoogleTest(&argc, argv);
return RUN_ALL_TESTS();
}
There are at least three obvious optimizations that can be easily made to the shown code.
1) Compile in C++17 mode to get guaranteed copy elision when returning from logspace.
2)
std::vector<double> logspace;
for (int i = 0; i < k; i++)
Use logspace.reserve() to preallocate the vector, to avoid useless repeated reallocations, while this vector gets populated.
3)
void logspace_print (std::vector<double> logspace)
Passing by value here creates an entire duplicate copy of the vector, for no useful purpose whatsoever. Change this function so that it takes the logspace parameter by reference.
There's one possible micro-optimization that may or may not make any difference:
logspace.push_back(pow(10, i * (b - a) / (k - 1)));
The "(b-a)/(k-1)" part of this formula is constant and can be unrolled out of the loop. I would, though, expect the compiler to do it on its own, it's a fairly basic optimization.
Let's say I have the following function written in C:
/* C-Code for calculating the average for a given dataset */
#define NOE 16
int calcAvg(float *data, float *avg)
{
float sum; sum = 0;
int i;
for (i = 0; i < NOE; i++)
{
data[i] = i;
sum += data[i];
}
avg = sum/n;
return 0;
}
Now I want to pass the data from an np.array to that C function "calAvg". Also, I want the result to be stored in "result" which I defined in python.
# Python Code
result = float(0)
a = np.array([1, 2, 3, 4])
myCfuncion.calc(a, result)
I have already created a C module and imported it into python. The problem I have is that I do not know how to pass pointers in such a way I showed.
Anybody does have an idea?
If I write a function accepting a single unsigned integer (0 - 0xFFFFFFFF), I can use:
uint32_t myInt;
if(!PyArg_ParseTuple(args, "I", &myInt))
return NULL;
And then from python, I can pass an int or long.
But what if I get passed a list of integers?
uint32_t* myInts;
PyObject* pyMyInts;
PyArg_ParseTuple(args, "O", &pyMyInts);
if (PyList_Check(intsObj)) {
size_t n = PyList_Size(v);
myInts = calloc(n, sizeof(*myInts));
for(size_t i = 0; i < n; i++) {
PyObject* item = PyList_GetItem(pyMyInts, i);
// What function do I want here?
if(!GetAUInt(item, &myInts[i]))
return NULL;
}
}
// cleanup calloc'd array on exit, etc
Specifically, my issue is with dealing with:
Lists containing a mixture of ints and longs
detecting overflow when assigning to the the uint32
You could create a tuple and use the same method you used for a single argument. On the C side, the tuple objects are not really immutable, so it wouldn't be to much trouble.
Also PyLong_AsUnsignedLong could work for you. It accepts int and long objects and raises an error otherwise. But if sizeof(long) is bigger than 4, you might need to check for an upper-bound overflow yourself.
static int
GetAUInt(PyObject *pylong, uint32_t *myint) {
static unsigned long MAX = 0xffffffff;
unsigned long l = PyLong_AsUnsignedLong(pylong);
if (l == -1 && PyErr_Occurred() || l > MAX) {
PyErr_SetString(PyExc_OverflowError, "can't convert to uint32_t");
return false;
}
*myint = (uint32_t) l;
return true;
}
I'm using Python to call a .so compiled from C. The C code adds two vectors as follows:
#include <stdio.h>
#include <stdbool.h>
bool add_vectors(const double * const a, const double * const b, double * const c)
{
if(sizeof(a) != sizeof(b) || sizeof(b) != sizeof(c))
{
fprintf(stderr, "Vectors of different lengths cannot be added");
fprintf(stderr, "Number of elements of a: %d", sizeof(a)/sizeof(double));
fprintf(stderr, "Number of elements of b: %d", sizeof(b)/sizeof(double));
fprintf(stderr, "Number of elements of c: %d", sizeof(c)/sizeof(double));
return false;
}
/* Added for diagnostics only; should print 5, does print 1 */
printf("Size of a: %d\n", sizeof(a)/sizeof(a[0]));
printf("Size of b: %d\n", sizeof(b)/sizeof(b[0]));
printf("Size of c: %d\n", sizeof(c)/sizeof(c[0]));
for(int ii = 0; ii < sizeof(a)/sizeof(double); ++ii)
{
c[ii] = a[ii] + b[ii];
}
return true;
}
This is compiled in the standard way via
gcc -std=c11 -o add_vectors.so -shared -fPIC add_vectors.c
Now I attempt to call this from the following python code:
#!/usr/bin/env python
import ctypes
import numpy
add_vectors_lib = ctypes.cdll.LoadLibrary("add_vectors.so")
add_vectors = add_vectors_lib.add_vectors
add_vectors.retype = ctypes.c_bool
array_1d_double = numpy.ctypeslib.ndpointer(dtype = numpy.double, ndim=1, flags="C_CONTIGUOUS")
add_vectors.argtypes = [array_1d_double, array_1d_double, array_1d_double]
#Random vectors to add:
a = numpy.double([1,2,3,4,5])
b = numpy.double([3,4,5,6,7])
#Zero out the return value:
c = numpy.double([0,0,0,0,0])
add_vectors(a, b,c)
print(a)
print(b)
print(c)
But the output is:
Size of a: 1
Size of b: 1
Size of c: 1
[ 1. 2. 3. 4. 5.]
[ 3. 4. 5. 6. 7.]
[ 4. 0. 0. 0. 0.]
How do I make the C code recognize the proper size of these arrays and/or make the Python pass "knowledge" of the array size to the C code?
sizeof() is a compile time operator. In the case of a pointer returns the actual size of the pointer itself. Usually, this is 4 bytes in case of 32-bit architecture and 8 in 64-bit respectively.
In the case that you passed the actual variable of a statically allocated array, it would return the total size of the array in bytes.
The quite newbee problem of sizeof() has already pointed out in comment.
Well, in order to answer your question How do I make the C code recognize the proper size of these arrays and/or make the Python pass "knowledge" of the array size to the C code. I tried to learn how to write one module with C in python by following this tutorial (I'm interested in learning python).
Notice: it's a quite long answer, omit the code part as your wish.
Your way of writing module is complex and bug prone. You need a wrapper of add_vectors which takes PyObject *args as argument, so you can check the type of your parameters(with PyArg_ParseTuple) and number of elements(with PyArray_DIM) in the array correctly.
This is part of my code:
add_vectors.c
#include <stdio.h>
#include <stdbool.h>
void add_vectors(const double * const a, const double * const b,
double * const c, int len)
{
int ii;
for(ii = 0; ii < len; ++ii)
{
c[ii] = a[ii] + b[ii];
}
}
_add_vectors.c
#include <Python.h>
#include <numpy/arrayobject.h>
void add_vectors(const double * const a, const double * const b,
double * const c, int len);
static PyObject *add_vectors_wrapper(PyObject *self, PyObject *args);
static PyMethodDef module_methods[] = {
{"add_vectors", add_vectors_wrapper, METH_VARARGS, NULL},
{NULL, NULL, 0, NULL}
};
PyMODINIT_FUNC init_add_vectors(void)
{
PyObject *m = Py_InitModule3("_add_vectors", module_methods,
NULL);
if (m == NULL)
return;
import_array();
}
static PyObject *add_vectors_wrapper(PyObject *self, PyObject *args)
{
PyObject *x_obj, *y_obj, *z_obj;
if (!PyArg_ParseTuple(args, "OOO", &x_obj, &y_obj,
&z_obj))
return NULL;
/* Interpret the input objects as numpy arrays. */
PyObject *x_array = PyArray_FROM_OTF(x_obj, NPY_DOUBLE, NPY_IN_ARRAY);
PyObject *y_array = PyArray_FROM_OTF(y_obj, NPY_DOUBLE, NPY_IN_ARRAY);
PyObject *z_array = PyArray_FROM_OTF(z_obj, NPY_DOUBLE, NPY_IN_ARRAY);
/* If that didn't work, throw an exception. */
if (x_array == NULL || y_array == NULL || z_array == NULL) {
Py_XDECREF(x_array);
Py_XDECREF(y_array);
Py_XDECREF(z_array);
return NULL;
}
/* How many data points are there? */
int xN = (int)PyArray_DIM(x_array, 0);
int yN = (int)PyArray_DIM(y_array, 0);
int zN = (int)PyArray_DIM(z_array, 0);
/* size check */
if (xN != yN || yN != zN) {
fprintf(stderr, "Vectors of different lengths cannot be added\n");
fprintf(stderr, "Number of elements of a: %d\n", xN);
fprintf(stderr, "Number of elements of b: %d\n", yN);
fprintf(stderr, "Number of elements of c: %d\n", zN);
PyObject *ret = Py_BuildValue("s", "Failed");
return ret;
}
double *x = (double*)PyArray_DATA(x_array);
double *y = (double*)PyArray_DATA(y_array);
double *z = (double*)PyArray_DATA(z_array);
add_vectors(x, y, z, xN);
/* Clean up. */
Py_DECREF(x_array);
Py_DECREF(y_array);
Py_DECREF(z_array);
/* Build the output tuple */
PyObject *ret = Py_BuildValue("s", "Success");
return ret;
}
setup.py (Run with./setup.py build_ext --inplace)
#!/usr/bin/env python
from distutils.core import setup, Extension
import numpy.distutils.misc_util
setup(
ext_modules=[Extension("_add_vectors",
["_add_vectors.c", "add_vectors.c"])],
include_dirs=numpy.distutils.misc_util.get_numpy_include_dirs(),
)
addnum.py ( a simple testcase)
#!/usr/bin/env python
import ctypes
import numpy
from _add_vectors import add_vectors
#Random vectors to add:
a = numpy.double([1,2,3,4])
b = numpy.double([3,4,5,6,7])
#Zero out the return value:
c = numpy.double([0,0,0,0,0])
add_vectors(a, b, c)
print(a)
print(b)
print(c)
result
ubuntu-user:python-module$ ./addnum.py
[ 1. 2. 3. 4. 5.]
[ 3. 4. 5. 6. 7.]
[ 4. 6. 8. 10. 12.]