I need to write a C++ function that caculate the gradient of an array like np.gradient function in numpy:
>>> f = np.array([1, 2, 4, 7, 11, 16], dtype=float)
>>> np.gradient(f)
array([1. , 1.5, 2.5, 3.5, 4.5, 5. ])
Does any one knows how to implement it?
According to the documentation in https://docs.scipy.org/doc/numpy/reference/generated/numpy.gradient.html
f'[x] = (f[x+1] - f[x-1]) / 2.0*h + o(h^2)
so you can go over the elements from 1 to n-1 and calculate (f[i+1] - f[i-1]) / 2.0
for the edges I believe you have to do:
f'[0] = f[1] - f[0]
f'[n] = f[n] = f[n-1]
implemented a very simple function myself since this question is too easy...
vector<double> gradient(vector<double> input){
if (input.size() <= 1) return input;
vector<double> res;
for(int j=0; j<input.size(); j++) {
int j_left = j - 1;
int j_right = j + 1;
if (j_left < 0) {
j_left = 0; // use your own boundary handler
j_right = 1;
}
if (j_right >= input.size()){
j_right = input.size() - 1;
j_left = j_right - 1;
}
// gradient value at position j
double dist_grad = (input[j_right] - input[j_left]) / 2.0;
res.push_back(dist_grad);
}
return res;
}
Related
the main idea of the question is How to pass array from python to the C function, get result (array to), and print this.
I created C function with algorithm for calculation convolution. (It doesn't not work correctly, but it works, and it's my first program on C, so it's not pretty good, don't judge harshly):
#include <stdio.h>
int * convolution(int a[], int b[], int inputArraySize)
{
int resArrayLength = inputArraySize*2-1;
int resArr[resArrayLength];
for (int i = 0; i < inputArraySize; i++)
{
int aPointer = 0;
int bPointer = i;
int res = 0;
for (int j = 0; j <= i; j++){
bPointer = i - j;
res = res + (b[bPointer] * a[aPointer]);
aPointer = aPointer + 1;
}
resArr[i] = res;
}
for(int i = 0; i<inputArraySize/2; i++){
int temp = a[i];
a[i] = a[inputArraySize-i-1];
a[inputArraySize-i-1] = temp;
}
for(int i = 0; i<inputArraySize/2; i++){
int temp = b[i];
b[i] = b[inputArraySize-i-1];
b[inputArraySize-i-1] = temp;
}
for (int i = 0; i < inputArraySize-1; i++)
{
int aPointer = 0;
int bPointer = i;
int res = 0;
for (int j = 0; j <= i; j++){
bPointer = i - j;
res = res + (b[bPointer] * a[aPointer]);
aPointer = aPointer + 1;
}
resArr[resArrayLength-1-i] = res;
}
return resArr;
}
I created .so file using this command cc -fPIC -shared -o convolution.so convolution.c.
Also I created python script (yeh, it's my first script on python) according to the guide from this article.
from ctypes import *
so_file = "./convolution.so"
convolution = CDLL(so_file)
print(type(convolution))
a = [1, 3, 5, 6]
b = [3, 5, 6, 7]
print(convolution.convolution(a, b, 4))
Here I try to create 2 arrays and pass to the C function.
But it doesn't work as I expect.
Output of the program:
<class 'ctypes.CDLL'>
Traceback (most recent call last):
File "t.py", line 10, in <module>
print(convolution.convolution(a, b, 4))
ctypes.ArgumentError: argument 1: <class 'TypeError'>: Don't know how to convert parameter 1
How can I pass this two arrays to the C function?
Also C function returns pointer to the first element of the result array to the Python script.
How to print all array elements using returned pointer from c function in the python?
Those are Python array, you have to convert them to C arrays before using them as parameters for a C function:
a = [1, 3, 5, 6]
b = [3, 5, 6, 7]
a_arr = (ctypes.c_int * len(a))(*a)
b_arr = (ctypes.c_int * len(b))(*b)
To get and print the resulting array, you can set the returning ctype of the function using .restype, call it, and then get a slice from the resulting array, to set the dimension ([0:4]):
convolution.convolution.restype = ctypes.POINTER(ctypes.c_int)
tmp = convolution.convolution(a, b, 4)
res = tmp[0:4]
print(res)
I am running a small code in which there are periodic boundary conditions i.e.,for point 0 the left point is the last point and for the last point zeroth point is the right point. When I run the same code in Python and C++, the answer I am getting is very different.
Python Code
import numpy as np
c= [0.467894,0.5134679,0.5123,0.476489,0.499764,0.564578]
n= len(c)
Δx= 1.0
A= 1.0
M = 1.0
K=1.0
Δt= 0.05
def delf(comp):
ans = 2*A*comp*(1-comp)*(1-2*comp)
return ans
def lap(e,v,w):
laplace = (w -2*v+ e) / (Δx**2)
return laplace
for t in range(1000000):
μ = np.zeros(n)
for i in range(n):
ans1= delf(c[i])
east= i+1
west= i-1
if i==0:
west= i-1+n
if i== (n-1):
east= i+1-n
ans2= lap(c[east], c[i], c[west])
μ[i] = ans1 - K* ans2
dc_dt= np.zeros(n)
for j in range(n):
east= j+1
west= j-1
if j==0:
west= j-1+n
if j== (n-1):
east= j+1-n
ans= lap(μ[east], μ[j], μ[west])
dc_dt[j] = M* ans
for p in range(n):
c[p] = c[p] + Δt * dc_dt[p]
print(c)
The output in Python 3.7.6 version is
[0.5057488166795907, 0.5057488166581386, 0.5057488166452102,
0.5057488166537337, 0.5057488166751858, 0.5057488166881142]
C++ Code
#include <iostream>
using namespace std ;
const float dx =1.0;
const float dt =0.05;
const float A= 1.0;
const float M=1.0;
const float K=1.0;
const int n = 6 ;
float delf(float comp){
float answer = 0.0;
answer= 2 * A* comp * (1-comp) * (1-2*comp);
return answer; }
float lap(float e, float v, float w){
float laplacian= 0.0 ;
laplacian = (e - 2*v +w) / (dx *dx);
return laplacian; }
int main(){
float c[n]= {0.467894,0.5134679,0.5123,0.476489,0.499764,0.564578};
for(int t=0; t<50;++t){
float mu[n];
for(int k=0; k<n; ++k){
int east, west =0 ;
float ans1,ans2 = 0;
ans1= delf(c[k]);
if (k ==0){ west = k-1+n; }
else{ west = k-1; }
if (k== (n-1)) { east = k+1-n; }
else{ east= k+1; }
ans2= lap(c[east], c[k], c[west]);
mu[k] = ans1 - K*ans2;
}
float dc_dt[n];
for(int p=0; p<n; ++p){
float ans3= 0;
int east, west =0 ;
if (p ==0){ west = p-1+n; }
else{ west = p-1;}
if (p== (n-1)) { east = p+1-n; }
else{ east= p+1; }
ans3= lap(mu[east], mu[p], mu[west]);
dc_dt[p] = M* ans3;
}
for(int q=0; q<n; ++q){
c[q] = c[q] + dt* dc_dt[q];
}
}
for(int i=0; i<n; ++i){
cout<< c[i]<< " ";
}
return 0;
}
Output in C++ is
0.506677 0.504968 0.50404 0.50482 0.506528 0.507457
When I am iterating for small steps say t<1000 there is no significant difference in outputs but I am supposed to do this calculation for large number of iterations (in order of 10^7) and here the difference in output is very large.
I took your code, added the missing closing bracket of the large "for" loop and also changed the length from "50" to "1000000" as in the python version.
Then I replaced all "float" with "double" and the resulting output is:
0.505749 0.505749 0.505749 0.505749 0.505749 0.505749
Thus, of course, implementing the same code in python and in c++ gives the same result. However, the types are obviously important. For example, integers are implemented in a very very different way in python3 than in c++ or almost any other language. But here it is much simpler. Python3 "float" is a "double" in c++ by definition. See https://docs.python.org/3/library/stdtypes.html
Fun fact
the simplest python program that you will have major trouble to reproduce in C++ or most other languages is something like
myInt=10000000000000000000000000000000345365753523466666
myInt = myInt*13 + 1
print (myInt)
since python can work with arbitrary large integers (until your entire computer memory is filled). The corresponding
#include <iostream>
int main(){
long int myInt = 10000000000000000000000000000000345365753523466666;
myInt = myInt*13 + 1;
std::cout << myInt << std::endl;
return 0;
}
will simply report warning: integer constant is too large for its type and will overflow and print a wrong result.
I need to convert a list of ints to a string containing all the ranges in the list.
So for example, the output should be as follows:
getIntRangesFromList([1,3,7,2,11,8,9,11,12,15]) -> "1-3,7-9,11-12,15"
So the input is not sorted and there can be duplicate values. The lists range in size from one element to 4k elements. The minimum and maximum values are 1 and 4094.
This is part of a performance critical piece of code. I have been trying to optimize this, but I can't find a way to get this faster. This is my current code:
def _getIntRangesFromList(list):
if (list==[]):
return ''
list.sort()
ranges = [[list[0],list[0]]] # ranges contains the start and end values of each range found
for val in list:
r = ranges[-1]
if val==r[1]+1:
r[1] = val
elif val>r[1]+1:
ranges.append([val,val])
return ",".join(["-".join([str(y) for y in x]) if x[0]!=x[1] else str(x[0]) for x in ranges])
Any idea on how to get this faster?
This could be a task for the itertools module.
import itertools
list_num = [1, 2, 3, 7, 8, 9, 11, 12, 15]
groups = (list(x) for _, x in
itertools.groupby(list_num, lambda x, c=itertools.count(): x - next(c)))
print(', '.join('-'.join(map(str, (item[0], item[-1])[:len(item)])) for item in groups))
This will give you 1-3, 7-9, 11-12, 15.
To understand what's going on you might want to check the content of groups.
import itertools
list_num = [1, 2, 3, 7, 8, 9, 11, 12, 15]
groups = (list(x) for _, x in
itertools.groupby(list_num, lambda x, c=itertools.count(): x - next(c)))
for element in groups:
print('element={}'.format(element))
This will give you the following output.
element=[1, 2, 3]
element=[7, 8, 9]
element=[11, 12]
element=[15]
The basic idea is to have a counter running parallel to the numbers. groupby will create individual groups for numbers with the same numerical distance to the current value of the counter.
I don't know whether this is faster on your version of Python. You'll have to check this yourself. In my setting it's slower with this data set, but faster with a bigger number of elements.
The fastest one I could come up, which tests about 10% faster than your solution on my machine (according to timeit):
def _ranges(l):
if l:
l.sort()
return ''.join([(str(l[i]) + ('-' if l[i] + 1 == l[i + 1] else ','))
for i in range(0, len(l) - 1) if l[i - 1] + 2 != l[i + 1]] +
[str(l[-1])])
else: return ''
The above code assumes that the values in the list are unique. If they aren't, it's easy to fix but there's a subtle hack which will no longer work and the end result will be slightly slower.
I actually timed _ranges(u[:]) because of the sort; u is 600 randomly selected integers from range(1000) comprising 235 subsequences; 83 are singletons and 152 contain at least two numbers. If the list is sorted, quite a lot of time is saved.
def _to_range(l, start, stop, idx, result):
if idx == len(l):
result.append((start, stop))
return result
if l[idx] - stop > 1:
result.append((start, stop))
return _to_range(l, l[idx], l[idx], idx + 1, result)
return _to_range(l, start, l[idx], idx + 1, result)
def get_range(l):
if not l:
return []
return _to_range(l, start = l[0], stop = l[0], idx = 0, result = [])
l = [1, 2, 3, 7, 8, 9, 11, 12, 15]
result = get_range(l)
print(result)
>>> [(1, 3), (7, 9), (11, 12), (15, 15)]
# I think it's better to fetch the data as it is and if needed, change it
# with
print(','.join('-'.join([str(start), str(stop)]) for start, stop in result))
>>> 1-3,7-9,11-12,15-15
Unless you don't care at all about the data, then u can just append str(start) + '-' + str(stop) in _to_range function so later there will be no need to type extra '-'.join method.
I'll concentrate on the performance that is your main issue. I'll give 2 solutions:
1) If the boundaries of the integers stored is between A and B, and you can create an array of booleans(even you can choose an array of bits for expanding the range you can storage) with (B - A + 2) elements, e.g. A = 0 and B = 1 000 000, we can do this (i'll write it in C#, sorry XD). This run in O(A - B) and is a good solution if A - B is less than the number of numbers:
public string getIntRangesFromList(int[] numbers)
{
//You can change this 2 constants
const int A = 0;
const int B = 1000000;
//Create an array with all its values in false by default
//Last value always will be in false in propourse, as you can see it storage 1 value more than needed for 2nd cycle
bool[] apparitions = new bool[B - A + 2];
int minNumber = B + 1;
int maxNumber = A - 1;
int pos;
for (int i = 0; i < numbers.Length; i++)
{
pos = numbers[i] - A;
apparitions[pos] = true;
if (minNumber > pos)
{
minNumber = pos;
}
if (maxNumber < pos)
{
maxNumber = pos;
}
}
//I will mantain the concatenation simple, but you can make it faster to improve performance
string result = "";
bool isInRange = false;
bool isFirstRange = true;
int firstPosOfRange = 0; //Irrelevant what is its initial value
for (int i = minNumber; i <= maxNumber + 1; i++)
{
if (!isInRange)
{
if (apparitions[i])
{
if (!isFirstRange)
{
result += ",";
}
else
{
isFirstRange = false;
}
result += (i + A);
isInRange = true;
firstPosOfRange = i;
}
}
else
{
if (!apparitions[i])
{
if (i > firstPosOfRange + 1)
{
result += "-" + (i + A - 1);
}
isInRange = false;
}
}
}
return result;
}
2) O(N * log N)
public string getIntRangesFromList2(int[] numbers)
{
string result = "";
if (numbers.Length > 0)
{
numbers.OrderBy(x => x); //sorting and making the algorithm complexity O(N * log N)
result += numbers[0];
int countNumbersInRange = 1;
for (int i = 1; i < numbers.Length; i++)
{
if (numbers[i] != numbers[i - 1] + 1)
{
if (countNumbersInRange > 1)
{
result += "-" + numbers[i - 1];
}
result += "," + numbers[i];
countNumbersInRange = 1;
}
else
{
countNumbersInRange++;
}
}
}
return result;
}
Sorting a list of ints in python 3 seems to be faster than sorting an array of ints in C++. Below is the code for 1 python program and 2 C++ programs that I used for the test. Any reason why the C++ programs are slower? It doesn't make sense to me.
----- Program 1 - python 3.4 -----
from time import time
x = 10000
y = 1000
start = time()
for _ in range(y):
a = list(range(x))
a.reverse()
a.sort()
print(round(time() - start, 2), 'seconds')
----- Program 2 - c++ using sort from algorithm ------
using namespace std;
#include <iostream>
#include <algorithm>
int main(){
int x = 10000;
int y = 1000;
int b[10000];
cout << "start" << endl;
for (int j = 0; j < y; j++){
for (int i = 0; i < x; i++){
b[i] = x - i;
} // still slower than python with this clause taken out
sort(b, b + x); // regular sort
}
cout << "done";
system("pause");
}
----- Program 3 - c++ using hand written merge sort ------
using namespace std;
#include <iostream>
void merge(int * arr, int *temp, int first_start, int second_start, int second_finish){
int a1 = first_start, b1 = second_start, r = 0;
while (a1 < second_start && b1 < second_finish){
if (arr[a1] < arr[b1]){
temp[r] = arr[a1];
a1++; r++;
}
else {
temp[r] = arr[b1];
b1++; r++;
}
}
if (a1 < second_start){
while (a1 < second_start){
temp[r] = arr[a1];
a1++; r++;
}
}
else {
while (b1 < second_finish){
temp[r] = arr[b1];
b1++; r++;
}
}
for (int i = first_start; i < second_finish; i++){
arr[i] = temp[i - first_start];
}
}
void merge_sort(int *a, int a_len, int *temp){
int c = 1, start = 0;
while (c < a_len){
while (start + c * 2 < a_len){
merge(a, temp, start, start + c, start + c * 2);
start += c * 2;
}
if (start + c <= a_len){
merge(a, temp, start, start + c, a_len);
}
c *= 2; start = 0;
}
}
int main(){
int x = 10000; // size of array to be sorted
int y = 1000; // number of times to sort it
int b[10000], temp[10000];
cout << "start" << endl;
for (int j = 0; j < y; j++){
for (int i = 0; i < x; i++){
b[i] = x - i; // reverse sorted array (even with this assignment taken out still runs slower than python)
}
merge_sort(b, x, temp);
}
cout << "done";
system("pause");
}
The core reason is no doubt timsort -- http://en.wikipedia.org/wiki/Timsort -- first conceived by Tim Peters for Python though now also in some Java VMs (for non-primitives only).
It's a truly amazing algorithm and you can find a C++ implementation at https://github.com/swenson/sort for example.
Lesson to retain: the proper architecture and algorithms can let you run circles around supposedly-faster languages if the latter are using less-perfect A & As!-) So, if you have really big problems to solve, deal with determining perfect architecture and algorithms first -- the language and optimizations within it are inevitably lower-priority issues.
I am studying queuing theory in which I am frequently presented with the following situation.
Let x, y both be n-tuples of nonnegative integers (depicting lengths of the n queues). In addition, x and y each have distinguished queue called their "prime queue". For example,
x = [3, 6, 1, 9, 5, 2] with x' = 1
y = [6, 1, 5, 9, 5, 5] with y' = 5
(In accordance with Python terminology I am counting the queues 0-5.)
How can I implement/construct the following permutation f on {0,1,...,5} efficiently?
first set f(x') = y'. So here f(1) = 5.
then set f(i) = i for any i such that x[i] == y[i]. Clearly there is no need to consider the indices x' and y'. So here f(3) = 3 (both length 9) and f(4) = 4 (both length 5).
there are now equally sized sets of queues unpaired in x and in y. So here in x this is {0,2,5} and in y this is {0,1,2}.
rank these from from 1 to s, where s is the common size of the sets, by length with 1 == lowest rank == shortest queue and s == highest rank == longest queue. So here, s = 3, and in x rank(0) = 1, rank(2) = 3 and rank(5) = 2, and in y rank(0) = 1, rank(1) = 3, rank(2) = 2. If there is a tie, give the queue with the larger index the higher rank.
pair these s queues off by rank. So here f(0) = 0, f(2) = 1, f(5) = 2.
This should give the permutation [0, 5, 1, 3, 4, 2].
My solution consists of tracking the indices and loops over x and y multiple times, and is terribly inefficient. (Roughly looking at n >= 1,000,000 in my application.)
Any help would be most appreciated.
Since you must do the ranking, you can't get linear and will need to sort. So it looks pretty straightforward. You do 1. in O(1) and 2. in O(n) by just going over the n-tuples. At the same time, you can construct the copy of x and y with only those that are left for 3. but do not include only the value, but instead use tuple of value and its index in the original.
In your example, x-with-tuples-left would be [[3,0],[1,2],[2,5]] and y-with-tuples-left would be [[6,0],[1,1],[5,2]].
Then just sort both x-with-tuples-left and y-with-tuples-left (it will be O(n.log n)), and read the permutation from the second element of the corresponding tuples.
In your example, sorted x-with-... would be [[1,2],[2,5],[3,0]] and sorted y-with-... would be [[1,1],[5,2],[6,0]]. Now, you nicely see 5. from the second elements: f(2)=1, f(5)=2, f(0)=0.
EDIT: Including O(n+L) in Javascript:
function qperm (x, y, xprime, yprime) {
var i;
var n = x.length;
var qperm = new Array(n);
var countsx = [], countsy = []; // same as new Array()
qperm[xprime] = yprime; // doing 1.
for (i = 0; i < n; ++i) {
if (x[i] == y[i] && i != xprime && i != yprime) { // doing 2.
qperm[i] = i; }
else { // preparing for 4. below
if (i != xprime) {
if (countsx[x[i]]) countsx[x[i]]++; else countsx[x[i]] = 1; }
if (i != yprime) {
if (countsy[y[i]]) countsy[y[i]]++; else countsy[y[i]] = 1; } }
// finishing countsx and countsy
var count, sum;
for (i = 0, count = 0; i < countsx.length; ++i) {
if (countsx[i]) {
sum = count + countsx[i];
countsx[i] = count;
count = sum; }
for (i = 0, count = 0; i < countsy.length; ++i) {
if (countsy[i]) {
sum = count + countsy[i];
countsy[i] = count;
count = sum; }
var yranked = new Array(count);
for (i = 0; i < n; ++i) {
if (i != yprime && (x[i] != y[i] || i == xprime)) { // doing 4. for y
yranked[countsy[y[i]]] = y[i];
countsy[y[i]]++; } }
for (i = 0; i < n; ++i) {
if (i != xprime && (x[i] != y[i] || i == yprime)) { // doing 4. for x and 5. at the same time
// this was here but was not right: qperm[x[i]] = yranked[countsx[x[i]]];
qperm[i] = yranked[countsx[x[i]]];
// this was here but was not right: countsy[y[i]]++; } } }
countsx[x[i]]++; } }
return qperm; }
Hopefully it's correct ;-)