OpenCL: Access neighbors error

OpenCL: Access neighbors error - python

I'm working on a solver for a differential equation for a particle smulation using Pyopencl.
To solve this equation each particle must access it's neighbors information.
The arrays I'm using are numpy complex64 arrays each with 7 elements.
When accessing the neighbors, the program returns the error:
clWaitForEvents failed: out of resources
My OpenCl code is the following. I guess most of it isn't related to this error but i'll post it anyway because it might help somehow:
#define complex_ctr(x, y) (float2)(x, y)
#define complex_add(a, b) complex_ctr((a).x + (b).x, (a).y + (b).y)
#define complex_mul(a, b) complex_ctr(mad(-(a).y, (b).y, (a).x * (b).x), mad((a).y, (b).x, (a).x * (b).y))
#define complex_mul_scalar(a, b) complex_ctr((a).x * (b), (a).y * (b))
#define complex_div_scalar(a, b) complex_ctr((a).x / (b), (a).y / (b))
#define conj(a) complex_ctr((a).x, -(a).y)
#define conj_transp(a) complex_ctr(-(a).y, (a).x)
#define conj_transp_and_mul(a, b) complex_ctr(-(a).y * (b), (a).x * (b))
#define complex_real(a) a.x
#define complex_imag(a) a.y
#define complex_unit (float2)(0, 1)
constant int M=10;
constant float L=1e-09;
constant float p0=1.0;
constant float delta=1.0;
constant float gama=1.0;
constant float omc=1.0;
constant float k_p=1.0;
constant float om_p=1.0;
constant float v = 0.001;
constant float b = 1.0;
constant float dt=0.01;
void f(__global float2 *X,
__global float2 *K,
int id,
uint W,
float t){
float exp_arg;
float2 p11, p22, p33, p21, p31, p32, op, aux, ar, al, p;
p11 = X[id*W];
p22 = X[id*W+1];
p33 = X[id*W+2];
p21 = X[id*W+3];
p31 = X[id*W+4];
p32 = X[id*W+5];
al = X[(id-1)*W+6];
ar = X[(id+1)*W+6];
op = p0 * complex_mul(X[id*W+6], complex_unit);
aux = p22 * gama/2 + complex_mul(op, p22) + conj(p22) * gama/2 + complex_mul(op, conj(p22));
K[id*W] = aux;
aux = (-p22*gama - complex_mul(op, p21) + complex_mul(p32, complex_unit)*omc
- conj(p22)*gama - complex_mul(op, conj(p21)) + complex_mul(conj(p32), complex_unit)*omc);
K[id*W+1] = aux;
aux = p22*gama/2 - complex_mul(p32, complex_unit)*omc + conj(p22)*gama/2 - complex_mul(conj(p32), complex_unit)*omc;
K[id*W+2] = aux;
aux = complex_mul(op, p11) - complex_mul(op, p22) - p21*gama + complex_mul(p21, complex_unit)*delta + complex_mul(p31, complex_unit)*omc;
K[id*W+3] = aux;
aux = complex_mul(p21, complex_unit)*omc + complex_mul(p31, complex_unit)*delta - complex_mul(op, p32);
K[id*W+4] = aux;
aux = (complex_mul(p22, complex_unit)*omc - complex_mul(p33, complex_unit)*omc - complex_mul(op, p31) - p32*gama);
K[id*W+5] = aux;
exp_arg = k_p * L * id - om_p * t;
p = complex_mul(b*p0*p21*complex_ctr(cos(exp_arg), sin(exp_arg)), complex_unit);
aux = (X[(id-1)*W+6] + X[(id+1)*W+6]);
aux = aux + p;
K[id*W+6] = aux;
}
__kernel void RK4Step(__global float2 *X,
__global float2 *K,
__global float2 *Xs,
__global float2 *Xm,
uint W,
float t){
const int gid_x = get_global_id(0);
int idx = 0;
//computation of k1
f(X, K, gid_x, W, t);
for(int i=0; i<W; i++)
{
idx = gid_x*W+i;
Xs[idx] = X[idx] + dt*K[idx]/6;
Xm[idx] = X[idx] + dt*K[idx]/2;
}
//computation of k2
f(Xm, K, gid_x, W, t);
for(int i=0; i<W; i++)
{
idx = gid_x*W+i;
Xs[idx] = Xs[idx] + dt*K[idx]/3;
Xm[idx] = X[idx] + dt*K[idx]/2;
}
//computation of k3
f(Xm, K, gid_x, W, t);
for(int i=0; i<W; i++)
{
idx = gid_x*W+i;
Xs[idx] = Xs[idx] + dt*K[idx]/3;
Xm[idx] = X[idx] + dt*K[idx];
}
//computation of k4
f(Xm, K, gid_x, W, t);
for(int i=0; i<W; i++)
{
idx = gid_x*W+i;
Xs[idx] = Xs[idx] + dt*K[idx]/6;
}
//update photon
for(int i=0; i<W; i++)
{
idx = gid_x*W+i;
X[idx] = Xs[idx];
}
}
If i comment this line:
aux = (X[(id-1)*W+6] + X[(id+1)*W+6]);
The code runs through with no errors, but if I uncomment it, i get the error i described.
The python code that calls this kernel is the following:
import pyopencl as cl
import numpy as np
from pylab import *
import matplotlib.pyplot as plt
import time
"""
Solve the problem
Xi' = M1*Xi + M2*Xi~
M1 and M2 are 6*6 Matrixes which elements are complex numbers
Xi in the form [P11i, P22i, P33i, P21i, P31i, P32i, Ai] where Pxyi is a complex number
"""
########################################################
# #
#'_h' buffers are host buffers. '_d' are device buffers#
# #
########################################################
#Initialization of the device and workspace
ctx = cl.create_some_context()
queue = cl.CommandQueue(ctx)
MF = cl.mem_flags
# Constants
M = 2000 # Number of atoms
L = np.float32(0.000000001) # Atom Spacing
N = 1000 # Number of time intervals
dt = np.float32(0.1) # Time interval
Timeline = np.arange(0.0, N, dt).astype(np.float32)
p0 = np.float32(1.0) # constant P0 [OMP = P0*Ai]
delta = np.float32(1.0) # constant DELTA
gama = np.float32(1.0) # constant GAMA
omc = np.float32(1.0) # constant OMC
# Writing the source code with the constants declared by the user
text = ""
##text = "__constant int M=" + str(M) + "; \n"
##text += "__constant float L=" + str(L) + "; \n"
##text += "__constant float dt=" + str(dt) + "; \n"
##text += "__constant float p0=" + str(p0) + "; \n"
##text += "__constant float delta=" + str(delta) + "; \n"
##text += "__constant float gama=" + str(gama) + "; \n"
##text += "__constant float omc=" + str(omc) + "; \n"
f1 = open("precode.cl", "r")
f2 = open("kernel.cl", "r")
f3 = open("source.cl",'w+')
precode = f1.read()
kernel = f2.read()
f3.write(precode + text + kernel)
f1.close()
f2.close()
f3.close()
#Initial Conditions
A_h = (np.arange(M) + 1j*np.zeros(M)).astype(np.complex64)
A_h = np.exp(-((A_h-M/2.0)/(0.05 * M))**2)*np.exp(1j * 200.0 * A_h /M)
P11_h = (np.random.randn(M) + 1j*np.random.randn(M)).astype(np.complex64)
P22_h = (np.random.randn(M) + 1j*np.random.randn(M)).astype(np.complex64)
P33_h = (np.random.randn(M) + 1j*np.random.randn(M)).astype(np.complex64)
P21_h = (np.random.randn(M) + 1j*np.random.randn(M)).astype(np.complex64)
P31_h = (np.random.randn(M) + 1j*np.random.randn(M)).astype(np.complex64)
P32_h = (np.random.randn(M) + 1j*np.random.randn(M)).astype(np.complex64)
W = np.uint32(7) # The row width to compute the index inside the kernel
X_h = []
for i in range(M):
X_h.append( np.array([P11_h[i], P22_h[i], P33_h[i], P21_h[i], P31_h[i], P32_h[i], A_h[i]]).astype(np.complex64) )
X_h = np.array(X_h).astype(np.complex64)
K_h = np.empty_like(X_h)
Xs_h = np.empty_like(X_h)
Xm_h = np.empty_like(X_h)
A_h = X_h[:,6]
figure(1)
plt.plot(np.real(A_h))
plt.plot(np.abs(A_h))
# Allocation of required buffers on the device
X_d = cl.Buffer(ctx, MF.READ_WRITE | MF.COPY_HOST_PTR, hostbuf=X_h)
K_d = cl.Buffer(ctx, MF.READ_WRITE | MF.COPY_HOST_PTR, hostbuf=K_h)
Xs_d = cl.Buffer(ctx, MF.READ_WRITE | MF.COPY_HOST_PTR, hostbuf=Xs_h)
Xm_d = cl.Buffer(ctx, MF.READ_WRITE | MF.COPY_HOST_PTR, hostbuf=Xm_h)
f = open("source.cl", "r")
source = f.read()
f.close()
prg = cl.Program(ctx, source).build()
print "Begin Calculation"
start_time = time.time()
for t in Timeline:
completeevent = prg.RK4Step(queue, (M,), None, X_d, K_d, Xs_d, Xm_d, W, t)
completeevent.wait()
cl.enqueue_copy(queue, X_h, X_d)
end_time = time.time()
print "All done"
print "Calculation took " + str(end_time - start_time) + " seconds"
A_h = X_h[:,6]
figure(2)
plt.plot(np.real(A_h))
plt.plot(np.abs(A_h))
##plt.show()
Some code is commented because I'm still working on it but it's minor stuff just to get things a bit cleaner.
I can't understand why this happens. I've tried something similiar but with a much simpler code and the neighbors access goes just fine as I intended.
For examples, when I run this module:
import pyopencl as cl
import numpy as np
ctx = cl.create_some_context()
queue = cl.CommandQueue(ctx)
MF = cl.mem_flags
M = 3
zero = np.complex64(0.0)
X1_h = np.array([1 + 1j*2, 2 + 1j*3, 3 + 1j*4]).astype(np.complex64)
X2_h = np.array([1 + 1j*2, 2 + 1j*3, 3 + 1j*4]).astype(np.complex64)
X3_h = np.array([1 + 1j*2, 2 + 1j*3, 3 + 1j*4]).astype(np.complex64)
Y1_h = np.array([4 + 1j*5, 5 + 1j*6, 6 + 1j*7]).astype(np.complex64)
Y2_h = np.array([4 + 1j*5, 5 + 1j*6, 6 + 1j*7]).astype(np.complex64)
Y3_h = np.array([4 + 1j*5, 5 + 1j*6, 6 + 1j*7]).astype(np.complex64)
aux_h = np.complex64(1 + 1j*1)
RES_h = np.empty_like(X1_h)
dados_h = []
for i in range(3):
dados_h.append(np.array([X1_h[i], X2_h[i], X3_h[i], Y1_h[i], Y2_h[i], Y3_h[i]]).astype(np.complex64))
dados_h = np.array(dados_h).astype(np.complex64)
print dados_h
aux_d = cl.Buffer(ctx, MF.READ_WRITE | MF.COPY_HOST_PTR, hostbuf=aux_h)
dados_d = cl.Buffer(ctx, MF.READ_WRITE | MF.COPY_HOST_PTR, hostbuf=dados_h)
RES_d = cl.Buffer(ctx, MF.READ_WRITE | MF.COPY_HOST_PTR, hostbuf = RES_h)
Source = """
__kernel void soma( __global float2 *dados, __global float2 *res, int rowWidth){
const int gid_x = get_global_id(0);
res[gid_x] = dados[(gid_x-1)*rowWidth] + dados[(gid_x+1)*rowWidth];
}
"""
prg = cl.Program(ctx, Source).build()
completeEvent = prg.soma(queue, (M,), None, dados_d, RES_d, np.int32(6))
completeEvent.wait()
cl.enqueue_copy(queue, RES_h, RES_d)
print "GPU RES"
print RES_h
the result i get is:
[[ 1.+2.j 1.+2.j 1.+2.j 4.+5.j 4.+5.j 4.+5.j]
[ 2.+3.j 2.+3.j 2.+3.j 5.+6.j 5.+6.j 5.+6.j]
[ 3.+4.j 3.+4.j 3.+4.j 6.+7.j 6.+7.j 6.+7.j]]
GPU RES
[ 2.+3.j 4.+6.j 2.+3.j]
which is exactly what i expected.
Can anyone give me some help on what is happening here? It's probably something simple but I can't find what's wrong.
One additional info: This happens only when i run the code on my GTX970. I have a laptop with an old ATI card that handles the code above just fine with no errors, which got me even more confused on this whole thing.
PS: Sorry for the long post

If id=0 and W=7 then you use negative index value to access element of 'X' array:
aux = (X[(id-1)*W+6] + X[(id+1)*W+6]);
which for id=0 and W=7 is:
aux = (X[-1] + X[13]);
From my experience a code with error like this one may produce still good results on some GPUs therefore I always test my opencl code using different GPU vendors. Especially opencl validation on CPU seems to be very sensitive to such errors.

Related

Random number generators C++ and Python

I have programmed a model in both C++ and Python. This model has a noisy-input component, which I can replace with this C++:
double doubleRand() {
thread_local std::mt19937 generator(std::random_device{}());
std::normal_distribution<double> distribution(0.0, 1.0);
return distribution(generator);
}
Or this Python:
Inoise = (np.random.normal(0, 1) * knoise * np.sqrt(gNa * A))
IIon = ((iNa + iK + iL) * A) + Inoise #
# Compute change of voltage
v[i + 1] = (vT + ((-IIon + IStim) / C) * dt)[0]
The following is very strange:
If I omit the noisy component (Inoise=0), then both models (C++ as well as Python) give exactly the same result. If I only introduce the noisy component (Istim=0), then both models give results (i.e. natural fluctuations that hardly differ from each other at 1000 runs). However, if I choose Istim=0.000001 and add noise, then the results differ by 30%. How is that possible?
Here is the full code. C++:
#include<math.h>
#include<iostream>
#include<random>
#include<vector>
#include<algorithm>
#include<fstream>
#include<omp.h>
#include <iomanip>
#include <assert.h>
// parameters
constexpr double v_Rest = -65.0;
constexpr double gNa = 1200.0;
constexpr double gK = 360.0;
constexpr double gL = 3.0;
constexpr double vNa = 115.0;
constexpr double vK = -12.0;
constexpr double vL = 10.6;
constexpr double c = 1.0;
constexpr double knoise = 0.0005;
bool print = false;
bool bisection = false;
bool test = true;
// stepsize PFs
constexpr int steps = 5;
double store[steps];
int prob[steps];
double step[steps];
// time constants
constexpr double t_end = 1.0;
constexpr double delay = 0.1;
constexpr double duration = 0.1;
constexpr double dt = 0.0025;
constexpr int t_steps = t_end/dt;
constexpr int runs = 1000;
double voltage[t_steps];
double doubleRand() {
thread_local std::mt19937 engine(std::random_device{}());
std::normal_distribution<double> distribution(0.0, 1.0);
return distribution(engine);
}
double alphaM(const double v){ return 12.0 * ((2.5 - 0.1 * (v)) / (exp(2.5 - 0.1 * (v)) - 1.0)); }
double betaM(const double v){ return 12.0 * (4.0 * exp(-(v) / 18.0)); }
double betaH(const double v){ return 12.0 * (1.0 / (exp(3.0 - 0.1 * (v)) + 1.0)); }
double alphaH(const double v){ return 12.0 * (0.07 * exp(-(v) / 20.0)); }
double alphaN(const double v){ return 12.0 * ((1.0 - 0.1 * (v)) / (10.0 * (exp(1.0 - 0.1 * (v)) - 1.0))); }
double betaN(const double v){ return 12.0 * (0.125 * exp(-(v) / 80.0)); }
double HH_model(const double I, const double area_factor){
const double A = 1.0e-8 * area_factor;
const double C = c*A;
const double v0 = 0.0;
const double m0 = alphaM(v0)/(alphaM(v0)+betaM(v0));
const double h0 = alphaH(v0)/(alphaH(v0)+betaH(v0));
const double n0 = alphaN(v0)/(alphaN(v0)+betaN(v0));
int count = 0;
for(int j=0; j<runs; j++){
double vT = v0;
double mT = m0;
double hT = h0;
double nT = n0;
for(int i=0; i<t_steps; i++){
double IStim = 0.0;
if ((delay / dt <= (double)i) && ((double)i <= (delay + duration) / dt))
IStim = I;
mT = (mT + dt * alphaM(vT)) / (1.0 + dt * (alphaM(vT) + betaM(vT)));
hT = (hT + dt * alphaH(vT)) / (1.0 + dt * (alphaH(vT) + betaH(vT)));
nT = (nT + dt * alphaN(vT)) / (1.0 + dt * (alphaN(vT) + betaN(vT)));
const double iNa = gNa * pow(mT, 3.0) * hT * (vT - vNa);
const double iK = gK * pow(nT, 4.0) * (vT - vK);
const double iL = gL * (vT-vL);
const double Inoise = (doubleRand() * knoise * sqrt(gNa * A));
const double IIon = ((iNa + iK + iL) * A) + Inoise;
vT += ((-IIon + IStim) / C) * dt;
voltage[i] = vT;
if(vT > 60.0) {
count++;
break;
}
}
}
return count;
}
int main(){
std::cout << HH_model(1.0e-6,1) << std::endl;
}
}
Python:
import matplotlib.pyplot as py
import numpy as np
import scipy.optimize as optimize
from tqdm import tqdm
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
# HH parameters
v_Rest = -65 # in mV
gNa = 1200 # in mS/cm^2
gK = 360 # in mS/cm^2
gL = 0.3*10 # in mS/cm^2
vNa = 115 # in mV
vK = -12 # in mV
vL = 10.6 # in mV
#Number of runs
runs = 1000
c = 1 # in uF/cm^2
def alphaM(v): return 12 * ((2.5 - 0.1 * (v)) / (np.exp(2.5 - 0.1 * (v)) - 1))
def betaM(v): return 12 * (4 * np.exp(-(v) / 18))
def betaH(v): return 12 * (1 / (np.exp(3 - 0.1 * (v)) + 1))
def alphaH(v): return 12 * (0.07 * np.exp(-(v) / 20))
def alphaN(v): return 12 * ((1 - 0.1 * (v)) / (10 * (np.exp(1 - 0.1 * (v)) - 1)))
def betaN(v): return 12 * (0.125 * np.exp(-(v) / 80))
def HH_model(I,area_factor):
count = 0
t_end = 1 # in ms
delay = 0.1 # in ms
duration = 0.1 # in ms
dt = 0.0025 # in ms
area_factor = area_factor
I = I
C = c*A # uF
for j in tqdm(range(0, runs), total=runs):
# Introduction of equations and channels
# compute the timesteps
t_steps= t_end/dt+1
# Compute the initial values
v0 = 0
m0 = alphaM(v0)/(alphaM(v0)+betaM(v0))
h0 = alphaH(v0)/(alphaH(v0)+betaH(v0))
n0 = alphaN(v0)/(alphaN(v0)+betaN(v0))
# Allocate memory for v, m, h, n
v = np.zeros((int(t_steps), 1))
m = np.zeros((int(t_steps), 1))
h = np.zeros((int(t_steps), 1))
n = np.zeros((int(t_steps), 1))
# Set Initial values
v[:, 0] = v0
m[:, 0] = m0
h[:, 0] = h0
n[:, 0] = n0
### Noise component
knoise= 0.0005 #uA/(mS)^1/2
### --------- Step3: SOLVE
for i in range(0, int(t_steps)-1, 1):
# Get current states
vT = v[i]
mT = m[i]
hT = h[i]
nT = n[i]
# Stimulus current
IStim = 0
if delay / dt <= i <= (delay + duration) / dt:
IStim = I # in uA
else:
IStim = 0
# Compute change of m, h and n
m[i + 1] = (mT + dt * alphaM(vT)) / (1 + dt * (alphaM(vT) + betaM(vT)))
h[i + 1] = (hT + dt * alphaH(vT)) / (1 + dt * (alphaH(vT) + betaH(vT)))
n[i + 1] = (nT + dt * alphaN(vT)) / (1 + dt * (alphaN(vT) + betaN(vT)))
# Ionic currents
iNa = gNa * m[i + 1] ** 3. * h[i + 1] * (vT - vNa)
iK = gK * n[i + 1] ** 4. * (vT - vK)
iL = gL * (vT-vL)
Inoise = (np.random.normal(0, 1) * knoise * np.sqrt(gNa * A))
IIon = ((iNa + iK + iL) * A) + Inoise #
# Compute change of voltage
v[i + 1] = (vT + ((-IIon + IStim) / C) * dt)[0] # in ((uA / cm ^ 2) / (uF / cm ^ 2)) * ms == mV
# adjust the voltage to the resting potential
v = v + v_Rest
# test if there was a spike
if max(v[:]-v_Rest) > 60:
count += 1
return count

You've messed up indents in the Python code. These lines
m[i + 1] = (mT + dt * alphaM(vT)) / (1 + dt * (alphaM(vT) + betaM(vT)))
h[i + 1] = (hT + dt * alphaH(vT)) / (1 + dt * (alphaH(vT) + betaH(vT)))
n[i + 1] = (nT + dt * alphaN(vT)) / (1 + dt * (alphaN(vT) + betaN(vT)))
do not execute when condition delay / dt <= i <= (delay + duration) / dt is True
After indentation is fixed the Python code produces 866, which nearly matches 876 - result of C++ code.

Implement Suffix Tree

Good afternoon. I am trying to rewrite the code from C ++ to python, but I get a key error: 0 in the last lines:
for c in range (256):
link [0] [c] = 1;
I've looked at a sample for adding a value to a SortedDict: sd ['c'] = 3, however I can't figure out what I'm doing wrong.
What needs to be fixed?
#include <string>
#include <map>
const int MAXLEN = 600000;
std::string s;
int pos[MAXLEN], len[MAXLEN], par[MAXLEN];
std::map<char,int> to[MAXLEN], link[MAXLEN];
int sz = 2;
int path[MAXLEN];
void attach(int child, int parent, char c, int child_len)
{
to[parent][c] = child;
len[child] = child_len;
par[child] = parent;
}
void extend(int i)
{
int v, vlen = s.size() - i, old = sz - 1, pstk = 0;
for (v = old; !link[v].count(s[i]); v = par[v]) {
vlen -= len[v];
path[pstk++] = v;
}
int w = link[v][s[i]];
if (to[w].count(s[i + vlen])) {
int u = to[w][s[i + vlen]];
for (pos[sz] = pos[u] - len[u]; s[pos[sz]] == s[i + vlen]; pos[sz] += len[v]) {
v = path[--pstk];
vlen += len[v];
}
attach(sz, w, s[pos[u] - len[u]], len[u] - (pos[u] - pos[sz]));
attach(u, sz, s[pos[sz]], pos[u] - pos[sz]);
w = link[v][s[i]] = sz++;
}
link[old][s[i]] = sz;
attach(sz, w, s[i + vlen], s.size() - (i + vlen));
pos[sz++] = s.size();
}
int main()
{
len[1] = 1; pos[1] = 0; par[1] = 0;
for (int c = 0; c < 256; c++)
link[0][c] = 1;
s = "abababasdsdfasdf";
for (int i = s.size() - 1; i >= 0; i--)
extend(i);
}
and python:
#pip install sortedcontainers
from sortedcontainers import SortedDict
MAXLEN = 600000
s = ""
#pos = []
#leng = []
#par = []
pos = [None]*MAXLEN
leng = [None]*MAXLEN
par = [None]*MAXLEN
to = SortedDict()
link = SortedDict()
sz = 2
path = []
def attach(child, parent, c, child_len):
to[parent][c] = child
leng[child] = child_len
par[child] = parent
def extend(i):
vlen = len(s) - i
old = sz - 1
pstk = 0;
v = old
while 1:
if link[v].count(s[i]):
break
vlen -= leng[v]
path[pstk + 1] = v
v = par[v]
w = link[v][s[i]]
if to[w].count(s[i + vlen]):
u = to[w][s[i + vlen]]
pos[sz] = pos[u] - leng[u]
while 1:
if s[pos[sz]] != s[i + vlen]:
break
v = path[pstk - 1]
vlen += leng[v]
pos[sz] += leng[v]
attach(sz, w, s[pos[u] - leng[u]], leng[u] - (pos[u] - pos[sz]))
attach(u, sz, s[pos[sz]], pos[u] - pos[sz])
w = link[v][s[i]] = sz + 1
link[old][s[i]] = sz
attach(sz, w, s[i + vlen], len(s) - (i + vlen))
pos[sz + 1] = len(s)
leng[1] = 1;
pos[1] = 0;
par[1] = 0;
for c in range(256):
link[0][c] = 1;
s = "abababasdsdfasdf";
i = len(s) - 1;
while 1:
if i <= 0:
break
extend(i)
i -= 1

How do I manually blur an image in python?

In the code below I am trying to blur an image in python by replacing each pixel with the average of its surrounding pixels as long as the original pixel is not at the border of the image. However, whenever I run my code I am simply receiving a black image. Can anyone tell me what I'm doing wrong?
import numpy as np
import matplotlib.pyplot as plt
from scipy.misc import imread, imsave, imresize
imgOld = imread('house.png') # read image into a numpy array
imgNew = imgOld
(imgH, imgW, imgC) = imgOld.shape # imgC = 2 or 4 (RGB or RGBA)
plt.imshow(imgOld, origin='lower')
# blur
for y in range(imgH):
for x in range(imgW):
xLast = 0
yLast = 0
if x != 0 and y != 0:
xLast = (x-1) % imgW
yLast = (y-1) % imgH
else:
xLast = 0
yLast = 0
xNext = (x+1) % imgW
yNext = (y+1) % imgH
rgb = imgNew[y,x]
r = (imgOld[yLast,xLast,2]
+ imgOld[yLast,x,2]
+ imgOld[yLast,xNext,2]
+ imgOld[y,xLast,2]
+ imgOld[y,x,2]
+ imgOld[y,xNext,2]
+ imgOld[yNext,xLast,2]
+ imgOld[yNext,x,2]
+ imgOld[yNext,xNext,2]) / 9
g = (imgOld[yLast,xLast,1]
+ imgOld[yLast,x,1]
+ imgOld[yLast,xNext,1]
+ imgOld[y,xLast,1]
+ imgOld[y,x,1]
+ imgOld[y,xNext,1]
+ imgOld[yNext,xLast,1]
+ imgOld[yNext,x,1]
+ imgOld[yNext,xNext,1]) / 9
b = (imgOld[yLast,xLast,0]
+ imgOld[yLast,x,0]
+ imgOld[yLast,xNext,0]
+ imgOld[y,xLast,0]
+ imgOld[y,x,0]
+ imgOld[y,xNext,0]
+ imgOld[yNext,xLast,0]
+ imgOld[yNext,x,0]
+ imgOld[yNext,xNext,0]) / 9
imgNew[y,x] = [b,g,r]
plt.imshow(imgNew, origin='lower')
Edit: I have updated my code to average based on the individual color value of each pixel however I am still receiving a black image as a result.
I am adding my perfectly working java version of this code. I do not understand where I am going wrong
Java version:
protected void proc_17() {
info = "Blur";
for (int y = 0; y < imgH; y++) {
for (int x = 0; x < imgW; x++) {
int xLast = 0;
int yLast = 0;
if (x != 0 && y != 0) {
xLast = (x-1) % imgW;
yLast = (y-1) % imgH;
} else {
xLast = 0;
yLast = 0;
}
int xNext = (x+1) % imgW;
int yNext = (y+1) % imgH;
float r = (imgOld.getR(xLast, yLast)
+ imgOld.getR(x, yLast)
+ imgOld.getR(xNext, yLast)
+ imgOld.getR(xLast, y)
+ imgOld.getR(x, y)
+ imgOld.getR(xNext, y)
+ imgOld.getR(xLast, yNext)
+ imgOld.getR(x, yNext)
+ imgOld.getR(xNext, yNext)) / 9;
float g = (imgOld.getG(xLast, yLast)
+ imgOld.getG(x, yLast)
+ imgOld.getG(xNext, yLast)
+ imgOld.getG(xLast, y)
+ imgOld.getG(x, y)
+ imgOld.getG(xNext, y)
+ imgOld.getG(xLast, yNext)
+ imgOld.getG(x, yNext)
+ imgOld.getG(xNext, yNext)) / 9;
float b = (imgOld.getB(xLast, yLast)
+ imgOld.getB(x, yLast)
+ imgOld.getB(xNext, yLast)
+ imgOld.getB(xLast, y)
+ imgOld.getB(x, y)
+ imgOld.getB(xNext, y)
+ imgOld.getB(xLast, yNext)
+ imgOld.getB(x, yNext)
+ imgOld.getB(xNext, yNext)) / 9;
imgNew.setR(x, y, r);
imgNew.setG(x, y, g);
imgNew.setB(x, y, b);
}
}
}

The problem with this code is that it is not adding up the values  in the variables r, g, and b.
This may be for reasons of indentation, so I put these values as a list, made the sum, and divided it by 9.
Follow the script below:
imgOld = imread('house.png') # read image into a numpy array imgNew =
imgOld (imgH, imgW, imgC) = imgOld.shape # imgC = 2 or 4 (RGB or RGBA)
plt.imshow(imgOld, vmin=0, vmax=255) plt.show()
# blur for y in range(imgH):
for x in range(imgW):
xLast = 0
yLast = 0
if x != 0 and y != 0:
xLast = (x-1) % imgW
yLast = (y-1) % imgH
else:
xLast = 0
yLast = 0
xNext = (x+1) % imgW
yNext = (y+1) % imgH
rgb = imgNew[y,x]
aux_r = (imgOld[yLast,xLast,2],
imgOld[yLast,x,2],
imgOld[yLast,xNext,2],
imgOld[y,xLast,2],
imgOld[y,x,2],
imgOld[y,xNext,2],
imgOld[yNext,xLast,2],
imgOld[yNext,x,2],
imgOld[yNext,xNext,2])
r = sum(aux_r)
r = r/9
aux_g = (imgOld[yLast,xLast,1],
imgOld[yLast,x,1],
imgOld[yLast,xNext,1],
imgOld[y,xLast,1],
imgOld[y,x,1],
imgOld[y,xNext,1],
imgOld[yNext,xLast,1],
imgOld[yNext,x,1],
imgOld[yNext,xNext,1])
g = sum(aux_g)
g = g/9
aux_b = (imgOld[yLast,xLast,0],
imgOld[yLast,x,0],
imgOld[yLast,xNext,0],
imgOld[y,xLast,0],
imgOld[y,x,0],
imgOld[y,xNext,0],
imgOld[yNext,xLast,0],
imgOld[yNext,x,0],
imgOld[yNext,xNext,0])
b = sum(aux_b)
b = b/9
imgNew[y,x] = [b,g,r] plt.imshow(imgNew, vmin=0, vmx=255) plt.show()

OpenCV how to smooth contour, reducing noise

I extracted the contours of an image, that you can see here:
However, it has some noise.
How can I smooth the noise? I did a close up to make clearer what I want to meant
Original image that I've used:
Code:
rMaskgray = cv2.imread('redmask.jpg', cv2.CV_LOAD_IMAGE_GRAYSCALE)
(thresh, binRed) = cv2.threshold(rMaskgray, 50, 255, cv2.THRESH_BINARY)
Rcontours, hier_r = cv2.findContours(binRed,cv2.RETR_CCOMP,cv2.CHAIN_APPROX_SIMPLE)
r_areas = [cv2.contourArea(c) for c in Rcontours]
max_rarea = np.max(r_areas)
CntExternalMask = np.ones(binRed.shape[:2], dtype="uint8") * 255
for c in Rcontours:
if(( cv2.contourArea(c) > max_rarea * 0.70) and (cv2.contourArea(c)< max_rarea)):
cv2.drawContours(CntExternalMask,[c],-1,0,1)
cv2.imwrite('contour1.jpg', CntExternalMask)

Try an upgrade to OpenCV 3.1.0. After some code adaptations for the new version as shown below, I tried it out with OpenCV version 3.1.0 and did not see any of the effects you are describing.
import cv2
import numpy as np
print cv2.__version__
rMaskgray = cv2.imread('5evOn.jpg', 0)
(thresh, binRed) = cv2.threshold(rMaskgray, 50, 255, cv2.THRESH_BINARY)
_, Rcontours, hier_r = cv2.findContours(binRed,cv2.RETR_CCOMP,cv2.CHAIN_APPROX_SIMPLE)
r_areas = [cv2.contourArea(c) for c in Rcontours]
max_rarea = np.max(r_areas)
CntExternalMask = np.ones(binRed.shape[:2], dtype="uint8") * 255
for c in Rcontours:
if(( cv2.contourArea(c) > max_rarea * 0.70) and (cv2.contourArea(c)< max_rarea)):
cv2.drawContours(CntExternalMask,[c],-1,0,1)
cv2.imwrite('contour1.jpg', CntExternalMask)

I don't know if is it ok to provide Java code - but I implemented Gaussian smoothing for openCV contour. Logic and theory is taken from here https://www.morethantechnical.com/2012/12/07/resampling-smoothing-and-interest-points-of-curves-via-css-in-opencv-w-code/
package CurveTools;
import org.apache.log4j.Logger;
import org.opencv.core.Mat;
import org.opencv.core.MatOfPoint;
import org.opencv.core.Point;
import java.util.ArrayList;
import java.util.List;
import static org.opencv.core.CvType.CV_64F;
import static org.opencv.imgproc.Imgproc.getGaussianKernel;
class CurveSmoother {
private double[] g, dg, d2g, gx, dx, d2x;
private double gx1, dgx1, d2gx1;
public double[] kappa, smoothX, smoothY;
public double[] contourX, contourY;
/* 1st and 2nd derivative of 1D gaussian */
void getGaussianDerivs(double sigma, int M) {
int L = (M - 1) / 2;
double sigma_sq = sigma * sigma;
double sigma_quad = sigma_sq * sigma_sq;
dg = new double[M];
d2g = new double[M];
g = new double[M];
Mat tmpG = getGaussianKernel(M, sigma, CV_64F);
for (double i = -L; i < L + 1.0; i += 1.0) {
int idx = (int) (i + L);
g[idx] = tmpG.get(idx, 0)[0];
// from http://www.cedar.buffalo.edu/~srihari/CSE555/Normal2.pdf
dg[idx] = -i * g[idx] / sigma_sq;
d2g[idx] = (-sigma_sq + i * i) * g[idx] / sigma_quad;
}
}
/* 1st and 2nd derivative of smoothed curve point */
void getdX(double[] x, int n, double sigma, boolean isOpen) {
int L = (g.length - 1) / 2;
gx1 = dgx1 = d2gx1 = 0.0;
for (int k = -L; k < L + 1; k++) {
double x_n_k;
if (n - k < 0) {
if (isOpen) {
//open curve - mirror values on border
x_n_k = x[-(n - k)];
} else {
//closed curve - take values from end of curve
x_n_k = x[x.length + (n - k)];
}
} else if (n - k > x.length - 1) {
if (isOpen) {
//mirror value on border
x_n_k = x[n + k];
} else {
x_n_k = x[(n - k) - x.length];
}
} else {
x_n_k = x[n - k];
}
gx1 += x_n_k * g[k + L]; //gaussians go [0 -> M-1]
dgx1 += x_n_k * dg[k + L];
d2gx1 += x_n_k * d2g[k + L];
}
}
/* 0th, 1st and 2nd derivatives of whole smoothed curve */
void getdXcurve(double[] x, double sigma, boolean isOpen) {
gx = new double[x.length];
dx = new double[x.length];
d2x = new double[x.length];
for (int i = 0; i < x.length; i++) {
getdX(x, i, sigma, isOpen);
gx[i] = gx1;
dx[i] = dgx1;
d2x[i] = d2gx1;
}
}
/*
compute curvature of curve after gaussian smoothing
from "Shape similarity retrieval under affine transforms", Mokhtarian & Abbasi 2002
curvex - x position of points
curvey - y position of points
kappa - curvature coeff for each point
sigma - gaussian sigma
*/
void computeCurveCSS(double[] curvex, double[] curvey, double sigma, boolean isOpen) {
int M = (int) Math.round((10.0 * sigma + 1.0) / 2.0) * 2 - 1;
assert (M % 2 == 1); //M is an odd number
getGaussianDerivs(sigma, M);//, g, dg, d2g
double[] X, XX, Y, YY;
getdXcurve(curvex, sigma, isOpen);
smoothX = gx.clone();
X = dx.clone();
XX = d2x.clone();
getdXcurve(curvey, sigma, isOpen);
smoothY = gx.clone();
Y = dx.clone();
YY = d2x.clone();
kappa = new double[curvex.length];
for (int i = 0; i < curvex.length; i++) {
// Mokhtarian 02' eqn (4)
kappa[i] = (X[i] * YY[i] - XX[i] * Y[i]) / Math.pow(X[i] * X[i] + Y[i] * Y[i], 1.5);
}
}
/* find zero crossings on curvature */
ArrayList<Integer> findCSSInterestPoints() {
assert (kappa != null);
ArrayList<Integer> crossings = new ArrayList<>();
for (int i = 0; i < kappa.length - 1; i++) {
if ((kappa[i] < 0.0 && kappa[i + 1] > 0.0) || kappa[i] > 0.0 && kappa[i + 1] < 0.0) {
crossings.add(i);
}
}
return crossings;
}
public void polyLineSplit(MatOfPoint pl) {
contourX = new double[pl.height()];
contourY = new double[pl.height()];
for (int j = 0; j < contourX.length; j++) {
contourX[j] = pl.get(j, 0)[0];
contourY[j] = pl.get(j, 0)[1];
}
}
public MatOfPoint polyLineMerge(double[] xContour, double[] yContour) {
assert (xContour.length == yContour.length);
MatOfPoint pl = new MatOfPoint();
List<Point> list = new ArrayList<>();
for (int j = 0; j < xContour.length; j++)
list.add(new Point(xContour[j], yContour[j]));
pl.fromList(list);
return pl;
}
MatOfPoint smoothCurve(MatOfPoint curve, double sigma) {
int M = (int) Math.round((10.0 * sigma + 1.0) / 2.0) * 2 - 1;
assert (M % 2 == 1); //M is an odd number
//create kernels
getGaussianDerivs(sigma, M);
polyLineSplit(curve);
getdXcurve(contourX, sigma, false);
smoothX = gx.clone();
getdXcurve(contourY, sigma, false);
smoothY = gx;
Logger.getRootLogger().info("Smooth curve len: " + smoothX.length);
return polyLineMerge(smoothX, smoothY);
}
}

How, when and what to vectorize in python?

Right, so this is basically a follow up of an earlier question of mine. I have some binary data that are in floating point binary format. Using C, the process is fast, but I lose some precision with atof(). I tried looking through the forum, and also elsewhere, but my problem was not solved. As such, I moved to python. Ah joy! the program worked perfectly well, but is so very slow compared to C. I looked up optimizations on python, which pointed me to Cython and Weave, but I have some doubts. If you will follow my code, I am confused where to apply the optimizing C code, since I am reading from the numpy object. My question, is it possible to read data using numpy functions within the Cython, and if so, please provide a small example.
The C Code uses PolSARpro's header files, and libbmp for creating the .bmp file
As a note, I am posting both my codes. God knows I had to go through a lot just to get the formulas working. This way, others in need can give their thoughts and input too :)
C Code (Working, but atof() loses precision, thus output lat long are slightly off)
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <math.h>
#include <polSARpro/bmpfile.c>
#include <polSARpro/graphics.c>
#include <polSARpro/matrix.c>
#include <polSARpro/processing.c>
#include <polSARpro/util.c>
#define METAL_THRESHOLD 5.000000
#define POLARIZATION_FRACTION_THRESHOLD 0.900000
#define PI 3.14159265
#define FOURTHPI PI/4
#define deg2rad PI/180
#define rad2deg 180./PI
/*double PI = 3.14159265;
double FOURTHPI = PI / 4;
double deg2rad = PI / 180;
double rad2deg = 180.0 / PI;*/
FILE *L1,*PF,*SPF;
FILE *txt;
FILE *finalLocations;
long i=0,loop_end;
int lig,col;
float l1,pf,spf;
long pos;
int Nlig,Ncol;
float *bufferout;
float *bufferin_L1,*bufferin_L2;
float valueL1,valuePF,xx;
float sizeGridX, sizeGridY, startX, startY;
float posX,posY;
int ZONE;
char Heading[10];
char setZone[15];
int p[4][2];
int degree, minute, second;
void UTM2LL(int ReferenceEllipsoid, double UTMNorthing, double UTMEasting, char* UTMZone, double *Lat, double *Long)
{
//converts UTM coords to lat/long. Equations from USGS Bulletin 1532
//East Longitudes are positive, West longitudes are negative.
//North latitudes are positive, South latitudes are negative
//Lat and Long are in decimal degrees.
//Written by Chuck Gantz- chuck.gantz#globalstar.com
double k0 = 0.9996;
double a = 6378137;
double eccSquared = 0.00669438;
double eccPrimeSquared;
double e1 = (1-sqrt(1-eccSquared))/(1+sqrt(1-eccSquared));
double N1, T1, C1, R1, D, M;
double LongOrigin;
double mu, phi1, phi1Rad;
double x, y;
int ZoneNumber;
char* ZoneLetter;
int NorthernHemisphere; //1 for northern hemispher, 0 for southern
x = UTMEasting - 500000.0; //remove 500,000 meter offset for longitude
y = UTMNorthing;
ZoneNumber = strtoul(UTMZone, &ZoneLetter, 10);
if((*ZoneLetter - 'N') >= 0)
NorthernHemisphere = 1;//point is in northern hemisphere
else
{
NorthernHemisphere = 0;//point is in southern hemisphere
y -= 10000000.0;//remove 10,000,000 meter offset used for southern hemisphere
}
LongOrigin = (ZoneNumber - 1)*6 - 180 + 3; //+3 puts origin in middle of zone
eccPrimeSquared = (eccSquared)/(1-eccSquared);
M = y / k0;
mu = M/(a*(1-eccSquared/4-3*eccSquared*eccSquared/64-5*eccSquared*eccSquared*eccSquared/256));
phi1Rad = mu + (3*e1/2-27*e1*e1*e1/32)*sin(2*mu)
+ (21*e1*e1/16-55*e1*e1*e1*e1/32)*sin(4*mu)
+(151*e1*e1*e1/96)*sin(6*mu);
phi1 = phi1Rad*rad2deg;
N1 = a/sqrt(1-eccSquared*sin(phi1Rad)*sin(phi1Rad));
T1 = tan(phi1Rad)*tan(phi1Rad);
C1 = eccPrimeSquared*cos(phi1Rad)*cos(phi1Rad);
R1 = a*(1-eccSquared)/pow(1-eccSquared*sin(phi1Rad)*sin(phi1Rad), 1.5);
D = x/(N1*k0);
*Lat = phi1Rad - (N1*tan(phi1Rad)/R1)*(D*D/2-(5+3*T1+10*C1-4*C1*C1-9*eccPrimeSquared)*D*D*D*D/24
+(61+90*T1+298*C1+45*T1*T1-252*eccPrimeSquared-3*C1*C1)*D*D*D*D*D*D/720);
*Lat = *Lat * rad2deg;
*Long = (D-(1+2*T1+C1)*D*D*D/6+(5-2*C1+28*T1-3*C1*C1+8*eccPrimeSquared+24*T1*T1)
*D*D*D*D*D/120)/cos(phi1Rad);
*Long = LongOrigin + *Long * rad2deg;
}
void convertToDegree(float decimal)
{
int negative = decimal < 0;
decimal = abs(decimal);
minute = (decimal * 3600/ 60);
second = fmodf((decimal * 3600),60);
degree = minute / 60;
minute = minute % 60;
if (negative)
{
if (degree > 0)
degree = -degree;
else if (minute > 0)
minute = -minute;
else
second = -second;
}
}
void readConfig(int *Row, int *Col)
{
char tmp[70];
int i=0;
FILE *fp = fopen("config.txt","r");
if(fp == NULL)
{
perror("Config.txt");
exit(1);
}
while(!feof(fp))
{
fgets(tmp,70,fp);
if (i==1)
*Row = atoi(tmp);
if(i==4)
*Col = atoi(tmp);
i++;
}
fclose(fp);
}
void readHDR(float *gridX,float *gridY,float *startXPos,float *startYPos)
{
FILE *fp = fopen("PF.bin.hdr","r");
int i=0;
char tmp[255];
char junk[255];
memset(tmp,0X00,sizeof(tmp));
memset(junk,0X00,sizeof(junk));
if(fp==NULL)
{
perror("Please locate or create PF.bin.hdr");
exit(0);
}
while(!feof(fp))
{
if(i==13)
break;
fgets(tmp,255,fp);
i++;
}
fclose(fp);
strcpy(junk,strtok(tmp,","));
strtok(NULL,",");
strtok(NULL,",");
strcpy(tmp,strtok(NULL,","));
//puts(tmp);
*startXPos = atof(tmp);
strcpy(tmp,strtok(NULL,","));
//puts(tmp);
*startYPos = atof(tmp);
strcpy(tmp,strtok(NULL,","));
//puts(tmp);
*gridX = atof(tmp);
strcpy(tmp,strtok(NULL,","));
//puts(tmp);
*gridY = atof(tmp);
strcpy(tmp,strtok(NULL,","));
ZONE = atoi(tmp);
strcpy(tmp,strtok(NULL,","));
strcpy(Heading,tmp);
}
int main()
{
bmpfile_t *bmp;
double Lat;
double Long;
int i;
rgb_pixel_t pixelMetal = {128, 64, 0, 0};
rgb_pixel_t pixelOthers = {128, 64, 0, 0};
readConfig(&Nlig,&Ncol);
readHDR(&sizeGridX,&sizeGridY,&startX,&startY);
//startX = startX - (double) 0.012000;
//startY = startY + (double)0.111000;
printf("Enter the rectangle's top-left and bottom-right region of interest points as: x y\n");
for(i=0;i<2;i++)
{
printf("Enter point %d::\t",i+1);
scanf("%d %d",&p[i][0], &p[i][1]);
}
printf("Grid Size(X,Y)::( %f,%f ), Start Positions(X,Y)::( %f, %f ), ZONE::%d, Heading:: %s\n\n",sizeGridX,sizeGridY,startX,startY,ZONE,Heading);
pixelMetal.red = 255;
pixelMetal.blue = 010;
pixelMetal.green = 010;
pixelOthers.red = 8;
pixelOthers.blue = 8;
pixelOthers.green = 8;
L1 = fopen("l1.bin","rb");
PF =fopen("PF.bin","rb");
SPF = fopen("SPF_L1.bin","wb");
//txt = fopen("locations(UTM).txt","w");
finalLocations = fopen("locationsROI.txt","w");
if(L1==NULL || PF==NULL || SPF==NULL || finalLocations == NULL)
{
perror("Error in opening files!");
return -1;
}
fseek(L1,0,SEEK_END);
pos = ftell(L1);
loop_end = pos;
printf("L1.bin contains::\t%ld elements\n",pos);
fseek(PF,0,SEEK_END);
pos = ftell(PF);
printf("PF.bin contains::\t%ld elements\n",pos);
fseek(L1,0,SEEK_SET);
fseek(PF,0,SEEK_SET);
bmp = bmp_create(Ncol,Nlig,8); //width * height
bufferin_L1 = vector_float(Ncol);
bufferin_L2 = vector_float(Ncol);
bufferout = vector_float(Ncol);
printf("Resources Allocated. Beginning...\n");
for (lig = 0; lig < Nlig; lig++) /* rows */
{
if (lig%(int)(Nlig/20) == 0)
{
printf("%f\r", 100. * lig / (Nlig - 1));
fflush(stdout);
}
fread(&bufferin_L1[0], sizeof(float), Ncol, L1);
fread(&bufferin_L2[0], sizeof(float), Ncol, PF);
for (col = 0; col < Ncol; col++) /* columns */
{
valueL1 = bufferin_L1[col];
valuePF = bufferin_L2[col];
if(valueL1 >= METAL_THRESHOLD && valuePF >= POLARIZATION_FRACTION_THRESHOLD)
{
if(col >= p[0][0] && col <= p[1][0] && lig >= p[0][1] && lig <= p[1][1])
{
xx = fabs(valueL1 + valuePF);
bmp_set_pixel(bmp,col,lig,pixelMetal);
posX = startX + (sizeGridX * col);
posY = startY - (sizeGridY * lig);
//fprintf(txt,"%f %f %d %s\n",posX,posY,ZONE,Heading);
sprintf(setZone,"%d",ZONE);
if(strstr(Heading,"Nor")!=NULL)
strcat(setZone,"N");
else
strcat(setZone,"S");
UTM2LL(23, posY, posX, setZone, &Lat, &Long); // 23 for WGS-84
convertToDegree(Lat);
//fprintf(finalLocations,"UTM:: %.2fE %.2fN , Decimal: %f %f , Degree: %d %d %d, ",posX,posY,Lat,Long,degree,minute,second);
//fprintf(finalLocations,"%.2fE,%.2fN,%f,%f ,%d,%d,%d,",posX,posY,Lat,Long,degree,minute,second);
fprintf(finalLocations,"%.2f,%.2f,%f,%f ,%d,%d,%d,",posX,posY,Lat,Long,degree,minute,second);
convertToDegree(Long);
fprintf(finalLocations,"%d,%d,%d\n",degree,minute,second);
}
else
{
xx = fabs(valueL1) ;
bmp_set_pixel(bmp,col,lig,pixelOthers);
}
}
else
{
xx = fabs(valueL1) ;
bmp_set_pixel(bmp,col,lig,pixelOthers);
}
bufferout[col] = xx;
}
fwrite(&bufferout[0], sizeof(float), Ncol, SPF);
}
free_vector_float(bufferout);
fclose(L1);
fclose(PF);
fclose(SPF);
//fclose(txt);
fclose(finalLocations);
printf("\n----------Writing BMP File!----------\n");
bmp_save(bmp,"SPF_L1(ROI).bmp");
bmp_destroy(bmp);
printf("\nDone!\n");
}
As well as the Python code::
# -*- coding: utf-8 -*-
"""
Created on Wed Apr 10 10:29:18 2013
#author: Binayaka
"""
import numpy as Num;
import math;
import array;
class readConfiguration(object):
def __init__(self,x):
self.readConfig(x);
def readConfig(self,x):
try:
crs = open(x,'r');
srs = open('config.txt','r');
except IOError:
print "Files missing!";
else:
rows = crs.readlines();
values = rows[12].split(',');
rows = srs.readlines();
self.startX = float(values[3]);
self.startY = float(values[4]);
self.gridSizeX = float(values[5]);
self.gridSizeY = float(values[6]);
self.Zone = int(values[7]);
self.Hemisphere = values[8];
self.NRows = int(rows[1].strip());
self.NCols = int(rows[4].strip());
self.MetalThreshold = 5.000000;
self.PFThreshold = 0.900000;
self.rad2deg = 180/math.pi;
self.deg2rad = math.pi/180;
self.FOURTHPI = math.pi/4;
crs.close();
srs.close();
def decdeg2dms(dd):
negative = dd < 0;
dd = abs(dd);
minutes,seconds = divmod(dd*3600,60);
degrees,minutes = divmod(minutes,60);
if negative:
if degrees > 0:
degrees = -degrees;
elif minutes > 0:
minutes = -minutes;
else:
seconds = -seconds;
return (degrees,minutes,seconds);
def UTM2LL(self,UTMEasting, UTMNorthing):
k0 = 0.9996;
a = 6378137;
eccSquared = 0.00669438;
e1 = (1-math.sqrt(1-eccSquared))/(1+math.sqrt(1-eccSquared));
x = UTMEasting - 500000.0;#remove 500,000 meter offset for longitude
y = UTMNorthing;
if self.Hemisphere == "North":
self.Hemi = 1;
else:
self.Hemi = -1;
y -= 10000000.0;
LongOrigin = (self.Zone - 1)*6 - 180 + 3;
eccPrimeSquared = (eccSquared)/(1-eccSquared);
M = y / k0;
mu = M/(a*(1-eccSquared/4-3*eccSquared*eccSquared/64-5*eccSquared*eccSquared*eccSquared/256));
phi1Rad = mu + (3*e1/2-27*e1*e1*e1/32)*math.sin(2*mu) + (21*e1*e1/16-55*e1*e1*e1*e1/32)*math.sin(4*mu) +(151*e1*e1*e1/96)*math.sin(6*mu);
#phi1 = phi1Rad*self.rad2deg;
N1 = a/math.sqrt(1-eccSquared*math.sin(phi1Rad)*math.sin(phi1Rad));
T1 = math.tan(phi1Rad)*math.tan(phi1Rad);
C1 = eccPrimeSquared*math.cos(phi1Rad)*math.cos(phi1Rad);
R1 = a*(1-eccSquared)/pow(1-eccSquared*math.sin(phi1Rad)*math.sin(phi1Rad), 1.5);
D = x/(N1*k0);
self.Lat = phi1Rad - (N1*math.tan(phi1Rad)/R1)*(D*D/2-(5+3*T1+10*C1-4*C1*C1-9*eccPrimeSquared)*D*D*D*D/24 +(61+90*T1+298*C1+45*T1*T1-252*eccPrimeSquared-3*C1*C1)*D*D*D*D*D*D/720);
self.Lat = self.Lat * self.rad2deg;
self.Long = (D-(1+2*T1+C1)*D*D*D/6+(5-2*C1+28*T1-3*C1*C1+8*eccPrimeSquared+24*T1*T1)*D*D*D*D*D/120)/math.cos(phi1Rad);
self.Long = LongOrigin + self.Long * self.rad2deg;
def printConfiguration(self):
""" Just to check whether our reading was correct """
print "Metal Threshold:\t" + str(self.MetalThreshold);
print "PF Threshold:\t" + str(self.PFThreshold);
print "Start X:\t" + str(self.startX);
print "Start Y:\t" + str(self.startY);
print "Grid size(X) :\t" + str(self.gridSizeX);
print "Grid size(Y) :\t" + str(self.gridSizeY);
def createROIfile(self,ROIFilename):
firstPoint = raw_input('Enter topLeft point coord\t').split();
secondPoint = raw_input('Enter bottomRight point coord\t').split();
try:
L1 = open('l1.bin','rb');
PF = open('PF.bin','rb');
SPF = open('pySPF_L1.bin','wb');
targetFilename = open(ROIFilename,'w');
except IOError:
print "Files Missing!";
else:
L1.seek(0,2);
elementsL1 = L1.tell();
L1.seek(0,0);
PF.seek(0,2);
elementsPF = PF.tell();
PF.seek(0,0);
print "L1.bin contains\t" + str(elementsL1) + " elements";
print "PF.bin contains\t" + str(elementsPF) + " elements";
binvaluesL1 = array.array('f');
binvaluesPF = array.array('f');
binvaluesSPF = array.array('f');
for row in range(0,self.NRows):
binvaluesL1.read(L1,self.NCols);
binvaluesPF.read(PF,self.NCols);
dataL1 = Num.array(binvaluesL1, dtype=Num.float);
dataPF = Num.array(binvaluesPF, dtype=Num.float);
dataSPF = dataL1 + dataPF;
binvaluesSPF.fromlist(Num.array(dataSPF).tolist());
for col in range(0,self.NCols):
if(dataL1[col] >= self.MetalThreshold and dataPF[col] >= self.PFThreshold):
if(col >= int(firstPoint[0]) and col <= int(secondPoint[0]) and row >= int(firstPoint[1]) and row <= int(secondPoint[1])):
posX = self.startX + (self.gridSizeX * col);
posY = self.startY - (self.gridSizeY * row);
self.UTM2LL(posY,posX);
tmp1 = self.decdeg2dms(posY);
tmp2 = self.decdeg2dms(posX);
strTarget = "Decimal Degree:: " + str(posX) + "E " + str(posY) + "N \t Lat long:: " + str(tmp1) + " " + str(tmp2) + "\n";
targetFilename.write(strTarget);
binvaluesSPF.tofile(SPF);
L1.close();
PF.close();
SPF.close();
targetFilename.close();
print "Done!";
dimensions = readConfiguration('PF.bin.hdr');
dimensions.printConfiguration();
dimensions.createROIfile('testPythonROI.txt');
Its the Python code that needs Optimization, as the values of NRows and NCols can and do reach the order of thousands.

A few general comments:
With python, it's really best to stick to PEP8 for a multitude of reasons. Python programmers are particularly picky about readability and essentially universally adhere to the community coding guidelines (PEP8). Avoid camelCase, keep lines below 80 columns, leave the semicolons out, and feel free to occasionally ignore these guidelines where they'd make things less readable.
There's no need for the builtin array type here if you're using numpy. I'm confused why you're constantly converting back and forth...
Use a projection library. Specify what datum and ellipsoid you're using, otherwise the coordinates (easting/northing or lat/long) have absolutely no meaning.
Don't use one big class as a hold-all for unrelated things. There's nothing wrong with just having a few functions. You don't need to make it into a class unless it makes sense to do so.
Use vectorized operations with numpy arrays.
Here's what would appear to be your performance bottleneck:
for row in range(0,self.NRows):
binvaluesL1.read(L1,self.NCols);
binvaluesPF.read(PF,self.NCols);
dataL1 = Num.array(binvaluesL1, dtype=Num.float);
dataPF = Num.array(binvaluesPF, dtype=Num.float);
dataSPF = dataL1 + dataPF;
binvaluesSPF.fromlist(Num.array(dataSPF).tolist());
for col in range(0,self.NCols):
if(dataL1[col] >= self.MetalThreshold and dataPF[col] >= self.PFThreshold):
if(col >= int(firstPoint[0]) and col <= int(secondPoint[0]) and row >= int(firstPoint[1]) and row <= int(secondPoint[1])):
posX = self.startX + (self.gridSizeX * col);
posY = self.startY - (self.gridSizeY * row);
self.UTM2LL(posY,posX);
tmp1 = self.decdeg2dms(posY);
tmp2 = self.decdeg2dms(posX);
strTarget = "Decimal Degree:: " + str(posX) + "E " + str(posY) + "N \t Lat long:: " + str(tmp1) + " " + str(tmp2) + "\n";
targetFilename.write(strTarget);
binvaluesSPF.tofile(SPF);
One of your biggest problems is the way you're reading in your data. You're constantly reading things in as one thing, then converting that to a list, then converting that to a numpy array. There's absolutely no need to jump through all those hoops. Numpy will unpack your binary floats for you just like array will.
Just do grid = np.fromfile(yourfile, dtype=np.float32).reshape(ncols, nrows). (Outside the loop.)
After that, your nested loops can be easily vectorized and expressed with just a few lines of code.
Here's how I would write your code. This probably won't run as-is, as I can't test it with your data. However, it should give you some general ideas.
import numpy as np
import pyproj
def main():
config = Config('PF.bin.hdr')
grid1, grid2 = load_data('l1.bin', 'PF.bin', config.nrows, config.ncols)
spf = grid1 + grid2
spf.tofile('pySPF_L1.bin')
easting_aoi, northing_aoi = subset_data(grid1, grid2, config)
save_selected_region(easting_aoi, northing_aoi, config.zone,
'testPythonROI.txt')
def load_data(filename1, filename2, nrows, ncols):
"""It would really be good to use more descriptive variable names than "L1"
and "PF". I have no idea what L1 and PF are, so I'm just calling them
grid1 and grid2."""
grid1 = np.fromfile(filename1, dtype=np.float32).reshape(nrows, ncols)
grid2 = np.fromfile(filename2, dtype=np.float32).reshape(nrows, ncols)
return grid1, grid2
def subset_data(grid1, grid2, config):
"""Select points that satisfy some threshold criteria (explain??) and are
within a user-specified rectangular AOI."""
northing, easting = np.mgrid[:config.nrows, :config.ncols]
easting = config.xstart + config.xgridsize * easting
northing = config.ystart + config.ygridsize * northing
grids = grid1, grid2, easting, northing
grid1, grid2, easting, northing = [item[config.user_aoi] for item in grids]
mask = (grid1 >= config.metal_threshold) & (grid2 >= config.pf_threshold)
return easting[mask], northing[mask]
def save_selected_region(easting, northing, zone, filename):
"""Convert the given eastings and northings (in UTM zone "zone") to
lat/long and save to a tab-delimited-text file."""
lat, lon = utm2geographic(easting, northing, zone)
data = np.vstack([easting, northing, lat, lon]).T
with open(filename, 'w') as outfile:
outfile.write('Easting\tNorthing\tLatitude\tLongitude\n')
np.savetxt(outfile, data, delimiter='\t')
def utm2geographic(easting, northing, zone):
"""We need to know which datum/ellipsoid the UTM coords are in as well!!!!
I'm assuming it's a Clark 1866 ellipsoid, based on the numbers in your
code..."""
utm = pyproj.Proj(proj='utm', zone=zone, ellip='clrk66')
geographic = pyproj.Proj(proj='latlong', ellip='clrk66')
return pyproj.transform(utm, geographic, easting, northing)
class Config(object):
"""Read and store configuration values for (something?)."""
config_file = 'config.txt'
def __init__(self, filename):
"""You should add docstrings to clarify what you're expecting
"filename" to contain."""
with open(filename, 'r') as infile:
crs_values = list(infile)[12].split(',')
crs_values = [float(item) for item in crs_values]
self.xstart, self.ystart = crs_values[3:5]
self.xgridsize, self.ygridsize = crs_values[5:7]
self.zone = int(crs_values[7])
with open(self.config_file, 'r') as infile:
srs_values = list(infile)
self.nrows, self.ncols = srs_values[1], srs_values[4]
# It would be good to explain a bit about these (say, units, etc)
self.metal_threshold = 5.0
self.pf_threshold = 0.9
self.user_aoi = self.read_user_aoi()
def read_user_aoi(self):
"""Get an area of interest of the grids in pixel coordinates."""
top_left = raw_input('Enter top left index\t')
bottom_right = raw_input('Enter bottom right index\t')
min_i, min_j = [int(item) for item in top_left.split()]
max_i, max_j = [int(item) for item in bottom_right.split()]
return slice(min_i, max_i), slice(min_j, max_j)
if __name__ == '__main__':
main()

Develop Reference

Python is a programming language that lets you work quickly and integrate systems more effectively.

OpenCL: Access neighbors error - python

Related

Random number generators C++ and Python

Implement Suffix Tree

How do I manually blur an image in python?

OpenCV how to smooth contour, reducing noise

How, when and what to vectorize in python?

Categories

Resources