Does matplotlib only capture the center frequencies of each bin - python

I'm wanting to know if matplotlib function spectrogram only takes into account the centre frequencies of a signal?
For example, plotting a spectrogram (in none decibels) here is the result:
Plotting a spectrogram normally:
Where are those points going from 0-50) and between (80-140) are they being removed? If so.. Why is this exactly?
EDIT: Source code :-
Here is "matplotlib" spectral_helper
def _spectral_helper2(x, y=None, NFFT=None, Fs=None, detrend_func=None,
window=None, noverlap=None, pad_to=None,
sides=None, scale_by_freq=None, mode=None):
'''
This is a helper function that implements the commonality between the
psd, csd, spectrogram and complex, magnitude, angle, and phase spectrums.
It is *NOT* meant to be used outside of mlab and may change at any time.
'''
if y is None:
# if y is None use x for y
same_data = True
else:
#The checks for if y is x are so that we can use the same function to
#implement the core of psd(), csd(), and spectrogram() without doing
#extra calculations. We return the unaveraged Pxy, freqs, and t.
same_data = y is x
if Fs is None:
Fs = 2
if noverlap is None:
noverlap = 0
if detrend_func is None:
detrend_func = detrend_none
if window is None:
window = window_hanning
# if NFFT is set to None use the whole signal
if NFFT is None:
NFFT = 256
if mode is None or mode == 'default':
mode = 'psd'
elif mode not in ['psd', 'complex', 'magnitude', 'angle', 'phase']:
raise ValueError("Unknown value for mode %s, must be one of: "
"'default', 'psd', 'complex', "
"'magnitude', 'angle', 'phase'" % mode)
if not same_data and mode != 'psd':
raise ValueError("x and y must be equal if mode is not 'psd'")
#Make sure we're dealing with a numpy array. If y and x were the same
#object to start with, keep them that way
x = np.asarray(x)
if not same_data:
y = np.asarray(y)
if sides is None or sides == 'default':
if np.iscomplexobj(x):
sides = 'twosided'
else:
sides = 'onesided'
elif sides not in ['onesided', 'twosided']:
raise ValueError("Unknown value for sides %s, must be one of: "
"'default', 'onesided', or 'twosided'" % sides)
# zero pad x and y up to NFFT if they are shorter than NFFT
if len(x) < NFFT:
n = len(x)
x = np.resize(x, (NFFT,))
x[n:] = 0
if not same_data and len(y) < NFFT:
n = len(y)
y = np.resize(y, (NFFT,))
y[n:] = 0
if pad_to is None:
pad_to = NFFT
if mode != 'psd':
scale_by_freq = False
elif scale_by_freq is None:
scale_by_freq = True
# For real x, ignore the negative frequencies unless told otherwise
if sides == 'twosided':
numFreqs = pad_to
if pad_to % 2:
freqcenter = (pad_to - 1)//2 + 1
else:
freqcenter = pad_to//2
scaling_factor = 1.
elif sides == 'onesided':
if pad_to % 2:
numFreqs = (pad_to + 1)//2
else:
numFreqs = pad_to//2 + 1
scaling_factor = 2.
result = stride_windows(x, NFFT, noverlap, axis=0)
result = detrend(result, detrend_func, axis=0)
result, windowVals = apply_window(result, window, axis=0,
return_window=True)
result = np.fft.fft(result, n=pad_to, axis=0)[:numFreqs, :]
freqs = np.fft.fftfreq(pad_to, 1/Fs)[:numFreqs]
if not same_data:
# if same_data is False, mode must be 'psd'
#resultY = stride_windows(y, NFFT, noverlap)
resultY = apply_window(resultY, window, axis=0)
resultY = detrend(resultY, detrend_func, axis=0)
resultY = np.fft.fft(resultY, n=pad_to, axis=0)[:numFreqs, :]
result = np.conjugate(result) * resultY
elif mode == 'psd':
result = np.conjugate(result) * result
elif mode == 'magnitude':
result = np.absolute(result)
elif mode == 'angle' or mode == 'phase':
# we unwrap the phase later to handle the onesided vs. twosided case
result = np.angle(result)
elif mode == 'complex':
pass
if mode == 'psd':
# Scale the spectrum by the norm of the window to compensate for
# windowing loss; see Bendat & Piersol Sec 11.5.2.
#result /= (np.abs(windowVals)**2).sum()
# Also include scaling factors for one-sided densities and dividing by
# the sampling frequency, if desired. Scale everything, except the DC
# component and the NFFT/2 component:
result[1:-1] *= 1554848
# MATLAB divides by the sampling frequency so that density function
# has units of dB/Hz and can be integrated by the plotted frequency
# values. Perform the same scaling here.
if scale_by_freq:
result /= Fs
t = np.arange(NFFT/2, len(x) - NFFT/2 + 1, NFFT - noverlap)/Fs
if sides == 'twosided':
# center the frequency range at zero
freqs = np.concatenate((freqs[freqcenter:], freqs[:freqcenter]))
# result = np.concatenate((result[freqcenter:, :],
#result[:freqcenter, :]), 0)
elif not pad_to % 2:
# get the last value correctly, it is negative otherwise
freqs[-1] *= -1
# we unwrap the phase here to handle the onesided vs. twosided case
if mode == 'phase':
pass
#result = np.unwrap(result, axis=0)
return result, freqs, t
Here is my attempt in C++
std::vector<std::vector<Complex::complex> > ComputeSTFT(std::vector<double> &vals,
std::size_t NFFT, std::size_t overlap)
{
std::vector<double> hanning = getHanningWindow(NFFT);
double NENBW = 0.0;
double ENBW = 0.0;
double fRes = 0.0;
double avg = 0.0;
for(unsigned i=0; (i < vals.size()); i++)
{
avg+= vals[i];
}
avg = avg / vals.size();
for(unsigned i=0; (i < vals.size()); i++)
{
vals[i] = vals[i] - avg;
}
std::vector<std::vector<double> > temp_vars = frame(vals, NFFT, overlap);
std::vector<std::vector<Complex::complex> > STFT(temp_vars.size());
for(unsigned i=0; (i < temp_vars.size()-1); i++)
{
for(unsigned j=0; (j < temp_vars[i].size()); j++) {
double value = 0.5 * (1 - cos(2 * PI * j / (NFFT - 1)));
S1 += value;
S2 += value * value;
double calculation = temp_vars[i][j] * value;
temp_vars[i][j] = calculation;
}
}
NENBW = NFFT * (S2 / S1*S1);
// This assume that the frequency is KNOWN.
fRes = 12000 / NFFT;
ENBW = NENBW * fRes;
std::vector<std::vector<Complex::complex> > fft_vars(temp_vars.size());
for(unsigned i=0; (i < temp_vars.size()); i++)
{
fft_vars.resize(temp_vars[i].size());
FFT f(temp_vars[i].begin(), temp_vars[i].end(), temp_vars[i].size());
std::vector<Complex::complex> temp_fft = f.transformed();
fft_vars[i] = temp_fft;
temp_fft.empty();
}
std::vector<std::vector<double> > RESULT(temp_vars.size());
for(unsigned i=0; (i < temp_vars.size()); i++)
{
STFT[i].resize(temp_vars[i].size()/2+1);
for(unsigned j=0; (j < temp_vars[i].size()/2 + 1); j++)
{
STFT[i][j].re = fft_vars[i][j].re;
STFT[i][j].im = fft_vars[i][j].im;
}
}
return STFT;
}
Where am I going wrong to produce such different results?

Related

ZigZag indicator in Python

I would like to create a zigzag indicator for stocks. I'm working with python and my english is bad so my appologies for that.
I took part of my code from: Pandas: Zigzag segmentation of data based on local minima-maxima
The problem is the zigzag I would like is this (Metastock zigzag indicator):
And my zigzag code looks like(note you can change the percent with a filter):
from pandas_datareader import data
import pandas as pd
from datetime import date
from pandas_datareader.nasdaq_trader import get_nasdaq_symbols
from scipy import signal
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
np.random.seed(0)
def filter(values, percentage):
previous = values[0]
mask = [True]
for value in values[1:]:
relative_difference = np.abs(value - previous)/previous
if relative_difference > percentage:
previous = value
mask.append(True)
else:
mask.append(False)
return mask
def main(stock=None, start_date=None, end_date=None):
df = data.DataReader(
stock,
start=start_date, end=end_date,
data_source='yahoo'
)
return df
if __name__ == '__main__':
today = '{}'.format(date.today())
stocks = ['BLL']
cont = 0
for stock in stocks:
cont += 1
try:
serie = main(stock=stock, start_date='2018-1-1', end_date=today)
serie.insert(loc=0, column='Date', value=serie.index)
serie = serie.reset_index(drop=True)
# Create zigzag trendline.
########################################
# Find peaks(max).
data_x = serie.index.values
data_y = serie['Close'].values
peak_indexes = signal.argrelextrema(data_y, np.greater)
peak_indexes = peak_indexes[0]
# Find valleys(min).
valley_indexes = signal.argrelextrema(data_y, np.less)
valley_indexes = valley_indexes[0]
# Merge peaks and valleys data points using pandas.
df_peaks = pd.DataFrame({'date': data_x[peak_indexes], 'zigzag_y': data_y[peak_indexes]})
df_valleys = pd.DataFrame({'date': data_x[valley_indexes], 'zigzag_y': data_y[valley_indexes]})
df_peaks_valleys = pd.concat([df_peaks, df_valleys], axis=0, ignore_index=True, sort=True)
# Sort peak and valley datapoints by date.
df_peaks_valleys = df_peaks_valleys.sort_values(by=['date'])
p = 0.1 # 20%
filter_mask = filter(df_peaks_valleys.zigzag_y, p)
filtered = df_peaks_valleys[filter_mask]
# Instantiate axes.
(fig, ax) = plt.subplots(figsize=(10,10))
# Plot zigzag trendline.
ax.plot(df_peaks_valleys['date'].values, df_peaks_valleys['zigzag_y'].values,
color='red', label="Extrema")
# Plot zigzag trendline.
ax.plot(filtered['date'].values, filtered['zigzag_y'].values,
color='blue', label="ZigZag")
# Plot original line.
ax.plot(data_x, data_y, linestyle='dashed', color='black', label="Org. line", linewidth=1)
plt.show()
print('{} - {}| success'.format(cont, stock))
except Exception:
print('{} - {}| ERROR'.format(cont, stock))
Here is an example on github:zigzag
cimport cython
import numpy as np
from numpy cimport ndarray, int_t
DEF PEAK = 1
DEF VALLEY = -1
#cython.boundscheck(False)
#cython.wraparound(False)
cpdef int_t identify_initial_pivot(double [:] X,
double up_thresh,
double down_thresh):
cdef:
double x_0 = X[0]
double x_t = x_0
double max_x = x_0
double min_x = x_0
int_t max_t = 0
int_t min_t = 0
up_thresh += 1
down_thresh += 1
for t in range(1, len(X)):
x_t = X[t]
if x_t / min_x >= up_thresh:
return VALLEY if min_t == 0 else PEAK
if x_t / max_x <= down_thresh:
return PEAK if max_t == 0 else VALLEY
if x_t > max_x:
max_x = x_t
max_t = t
if x_t < min_x:
min_x = x_t
min_t = t
t_n = len(X)-1
return VALLEY if x_0 < X[t_n] else PEAK
#cython.boundscheck(False)
#cython.wraparound(False)
cpdef peak_valley_pivots(double [:] X,
double up_thresh,
double down_thresh):
"""
Find the peaks and valleys of a series.
:param X: the series to analyze
:param up_thresh: minimum relative change necessary to define a peak
:param down_thesh: minimum relative change necessary to define a valley
:return: an array with 0 indicating no pivot and -1 and 1 indicating
valley and peak
The First and Last Elements
---------------------------
The first and last elements are guaranteed to be annotated as peak or
valley even if the segments formed do not have the necessary relative
changes. This is a tradeoff between technical correctness and the
propensity to make mistakes in data analysis. The possible mistake is
ignoring data outside the fully realized segments, which may bias
analysis.
"""
if down_thresh > 0:
raise ValueError('The down_thresh must be negative.')
cdef:
int_t initial_pivot = identify_initial_pivot(X,
up_thresh,
down_thresh)
int_t t_n = len(X)
ndarray[int_t, ndim=1] pivots = np.zeros(t_n, dtype=np.int_)
int_t trend = -initial_pivot
int_t last_pivot_t = 0
double last_pivot_x = X[0]
double x, r
pivots[0] = initial_pivot
# Adding one to the relative change thresholds saves operations. Instead
# of computing relative change at each point as x_j / x_i - 1, it is
# computed as x_j / x_1. Then, this value is compared to the threshold + 1.
# This saves (t_n - 1) subtractions.
up_thresh += 1
down_thresh += 1
for t in range(1, t_n):
x = X[t]
r = x / last_pivot_x
if trend == -1:
if r >= up_thresh:
pivots[last_pivot_t] = trend
trend = PEAK
last_pivot_x = x
last_pivot_t = t
elif x < last_pivot_x:
last_pivot_x = x
last_pivot_t = t
else:
if r <= down_thresh:
pivots[last_pivot_t] = trend
trend = VALLEY
last_pivot_x = x
last_pivot_t = t
elif x > last_pivot_x:
last_pivot_x = x
last_pivot_t = t
if last_pivot_t == t_n-1:
pivots[last_pivot_t] = trend
elif pivots[t_n-1] == 0:
pivots[t_n-1] = -trend
return pivots
#cython.boundscheck(False)
#cython.wraparound(False)
cpdef double max_drawdown(ndarray[double, ndim=1] X):
"""
Compute the maximum drawdown of some sequence.
:return: 0 if the sequence is strictly increasing.
otherwise the abs value of the maximum drawdown
of sequence X
"""
cdef:
double mdd = 0
double peak = X[0]
double x, dd
for x in X:
if x > peak:
peak = x
dd = (peak - x) / peak
if dd > mdd:
mdd = dd
return mdd if mdd != 0.0 else 0.0
#cython.boundscheck(False)
#cython.wraparound(False)
def pivots_to_modes(int_t [:] pivots):
"""
Translate pivots into trend modes.
:param pivots: the result of calling ``peak_valley_pivots``
:return: numpy array of trend modes. That is, between (VALLEY, PEAK] it
is 1 and between (PEAK, VALLEY] it is -1.
"""
cdef:
int_t x, t
ndarray[int_t, ndim=1] modes = np.zeros(len(pivots),
dtype=np.int_)
int_t mode = -pivots[0]
modes[0] = pivots[0]
for t in range(1, len(pivots)):
x = pivots[t]
if x != 0:
modes[t] = mode
mode = -x
else:
modes[t] = mode
return modes
def compute_segment_returns(X, pivots):
"""
:return: numpy array of the pivot-to-pivot returns for each segment."""
pivot_points = X[pivots != 0]
return pivot_points[1:] / pivot_points[:-1] - 1.0
Tradingview has an excellent zigzag. You can see the code in Pine (tradingView language) but you can translate it into python.
//#version=5
indicator("Zig Zag", overlay=true, max_lines_count=500, max_labels_count=500)
dev_threshold = input.float(title="Deviation (%)", defval=5.0, minval=0.00001, maxval=100.0)
depth = input.int(title="Depth", defval=10, minval=1)
line_color = input(title="Line Color", defval=#2962FF)
extend_to_last_bar = input(title="Extend to Last Bar", defval=true)
display_reversal_price = input(title="Display Reversal Price", defval=true)
display_cumulative_volume = input(title="Display Cumulative Volume", defval=true)
display_reversal_price_change = input(title="Display Reversal Price Change", defval=true, inline="price rev")
difference_price = input.string("Absolute", "", options=["Absolute", "Percent"], inline="price rev")
pivots(src, length, isHigh) =>
p = nz(src[length])
if length == 0
[time, p]
else
isFound = true
for i = 0 to math.abs(length - 1)
if isHigh and src[i] > p
isFound := false
if not isHigh and src[i] < p
isFound := false
for i = length + 1 to 2 * length
if isHigh and src[i] >= p
isFound := false
if not isHigh and src[i] <= p
isFound := false
if isFound and length * 2 <= bar_index
[time[length], p]
else
[int(na), float(na)]
[iH, pH] = pivots(high, math.floor(depth / 2), true)
[iL, pL] = pivots(low, math.floor(depth / 2), false)
calc_dev(base_price, price) =>
100 * (price - base_price) / base_price
price_rotation_aggregate(price_rotation, pLast, cum_volume) =>
str = ""
if display_reversal_price
str += str.tostring(pLast, format.mintick) + " "
if display_reversal_price_change
str += price_rotation + " "
if display_cumulative_volume
str += "\n" + cum_volume
str
caption(isHigh, iLast, pLast, price_rotation, cum_volume) =>
price_rotation_str = price_rotation_aggregate(price_rotation, pLast, cum_volume)
if display_reversal_price or display_reversal_price_change or display_cumulative_volume
if not isHigh
label.new(iLast, pLast, text=price_rotation_str, style=label.style_none, xloc=xloc.bar_time, yloc=yloc.belowbar, textcolor=color.red)
else
label.new(iLast, pLast, text=price_rotation_str, style=label.style_none, xloc=xloc.bar_time, yloc=yloc.abovebar, textcolor=color.green)
price_rotation_diff(pLast, price) =>
if display_reversal_price_change
tmp_calc = price - pLast
str = difference_price == "Absolute"? (math.sign(tmp_calc) > 0? "+" : "") + str.tostring(tmp_calc, format.mintick) : (math.sign(tmp_calc) > 0? "+" : "-") + str.tostring((math.abs(tmp_calc) * 100)/pLast, format.percent)
str := "(" + str + ")"
str
else
""
var line lineLast = na
var label labelLast = na
var int iLast = 0
var float pLast = 0
var bool isHighLast = true // otherwise the last pivot is a low pivot
var int linesCount = 0
var float sumVol = 0
var float sumVolLast = 0
pivotFound(dev, isHigh, index, price) =>
if isHighLast == isHigh and not na(lineLast)
// same direction
if isHighLast ? price > pLast : price < pLast
if linesCount <= 1
line.set_xy1(lineLast, index, price)
line.set_xy2(lineLast, index, price)
label.set_xy(labelLast, index, price)
label.set_text(labelLast, price_rotation_aggregate(price_rotation_diff(line.get_y1(lineLast), price), price, str.tostring(sumVol + sumVolLast, format.volume)))
[lineLast, labelLast, isHighLast, false, sumVol + sumVolLast]
else
[line(na), label(na), bool(na), false, float(na)]
else // reverse the direction (or create the very first line)
if na(lineLast)
id = line.new(index, price, index, price, xloc=xloc.bar_time, color=line_color, width=2)
lb = caption(isHigh, index, price, price_rotation_diff(pLast, price), str.tostring(sumVol, format.volume))
[id, lb, isHigh, true, sumVol]
else
// price move is significant
if math.abs(dev) >= dev_threshold
id = line.new(iLast, pLast, index, price, xloc=xloc.bar_time, color=line_color, width=2)
lb = caption(isHigh, index, price, price_rotation_diff(pLast, price), str.tostring(sumVol, format.volume))
[id, lb, isHigh, true, sumVol]
else
[line(na), label(na), bool(na), false, float(na)]
sumVol += nz(volume[math.floor(depth / 2)])
if not na(iH) and not na(iL) and iH == iL
dev1 = calc_dev(pLast, pH)
[id2, lb2, isHigh2, isNew2, sum2] = pivotFound(dev1, true, iH, pH)
if isNew2
linesCount := linesCount + 1
if not na(id2)
lineLast := id2
labelLast := lb2
isHighLast := isHigh2
iLast := iH
pLast := pH
sumVolLast := sum2
sumVol := 0
dev2 = calc_dev(pLast, pL)
[id1, lb1, isHigh1, isNew1, sum1] = pivotFound(dev2, false, iL, pL)
if isNew1
linesCount := linesCount + 1
if not na(id1)
lineLast := id1
labelLast := lb1
isHighLast := isHigh1
iLast := iL
pLast := pL
sumVolLast := sum1
sumVol := 0
else
if not na(iH)
dev1 = calc_dev(pLast, pH)
[id, lb, isHigh, isNew, sum] = pivotFound(dev1, true, iH, pH)
if isNew
linesCount := linesCount + 1
if not na(id)
lineLast := id
labelLast := lb
isHighLast := isHigh
iLast := iH
pLast := pH
sumVolLast := sum
sumVol := 0
else
if not na(iL)
dev2 = calc_dev(pLast, pL)
[id, lb, isHigh, isNew, sum] = pivotFound(dev2, false, iL, pL)
if isNew
linesCount := linesCount + 1
if not na(id)
lineLast := id
labelLast := lb
isHighLast := isHigh
iLast := iL
pLast := pL
sumVolLast := sum
sumVol := 0
var line extend_line = na
var label extend_label = na
if extend_to_last_bar == true and barstate.islast == true
isHighLastPoint = not isHighLast
curSeries = isHighLastPoint ? high : low
if na(extend_line) and na(extend_label)
extend_line := line.new(line.get_x2(lineLast), line.get_y2(lineLast), time, curSeries, xloc=xloc.bar_time, color=line_color, width=2)
extend_label := caption(not isHighLast, time, curSeries, price_rotation_diff(line.get_y2(lineLast), curSeries), str.tostring(sumVol, format.volume))
line.set_xy1(extend_line, line.get_x2(lineLast), line.get_y2(lineLast))
line.set_xy2(extend_line, time, curSeries)
price_rotation = price_rotation_diff(line.get_y1(extend_line), curSeries)
remaingRealTimeVol = 0.
for i = math.abs(math.floor(depth / 2) - 1) to 0
remaingRealTimeVol += volume[i]
label.set_xy(extend_label, time, curSeries)
label.set_text(extend_label, price_rotation_aggregate(price_rotation, curSeries, str.tostring(sumVol+remaingRealTimeVol, format.volume)))
label.set_textcolor(extend_label, isHighLastPoint? color.green : color.red)
label.set_yloc(extend_label, yloc= isHighLastPoint? yloc.abovebar : yloc.belowbar)

Convert Eigen to numpy matrix

I have the following code in C++ that uses the Eigen library, need help to translate to python (numpy)
Initialization
double b = 20.0;
Eigen::Vector3d C(1.0/10.2, 1.0/10.2, 1/30);
Eigen::MatrixXd U(5200, 3);
int i = 0;
for (double x = 10.2/2.0; x < 200; x += 10) {
for (double y = 10.2/2.0; y < 200; y += 10) {
for (double t = 0; t <= 360; t += 30) {
U(i, 0) = x;
U(i, 1) = y;
U(i, 2) = psi;
i += 1;
}
}
}
Function:
Eigen::VectorXd operator()(const Eigen::VectorXd& s) {
Eigen::VectorXd p(length());
p(0) = s[0];
p(1) = s[1];
p(2) = s[2];
p(3) = s[3];
for (int i = 0; i < U.rows(); i++) {
p(i + 4) = b*exp(-0.5*(s.tail(U.cols()) - U.row(i).transpose()).dot(C*(s.tail(U.cols())
- U.row(i).transpose())));
if (p(i + 4) < 0.1) {
p(i + 4) = 0;
}
}
return p;
}
Python version
Initialization:
my_x = 10.2/2.0
my_y = 10.2/2.0
my_p = 0
xx = []
while my_x < 200:
xx.append(my_x)
my_x += 10
yy = []
while my_y < 200:
yy.append(my_y)
my_y += 10
pps = []
while my_psi <= 360:
pps.append(my_p)
my_p+=30
U =[]
for x in xx:
for y in yy:
for p in pps:
U.append([x,y,p])
U = numpy.matrix(U)
C = numpy.array([1.0/10.2, 1.0/10.2, 1.0/30.0])
b = 20.0
The Function
Instead of operator() I will call the function doSomething()
def doSomething(s): # Where s is a numpy array (1-d vector)
p[0:4] = s[0:4]
for i in range (U.shape[0]):
s_dash = -0.5*(s - U[i].T)
s_ddash = C*s
s_dddash = s_dash.dot(s_ddash) - U[i].T
p[i+4] = b * numpy.exp(s_dddash)
if p[i+4] < 0.1: p[i+4] = 0
What I am confused:
In the C++ implementation, I think p[i+4] is supposed to be a single value
In my python version, I get a p[i+4] as square matrix
Each p[i+4] is a zero matrix.
I am unable to decipher my mistake. Please help!

optical flow .flo files

I have a few questions for doing optical flow projects. I use Python 2 (planning to use lasagne to use deep learning to learn optical flow), and don't know how to convert the c++ functions to that of python in visualization of the flows.
I downloaded (from http://vision.middlebury.edu/flow/data/comp/zip/other-gt-flow.zip) some image pairs where I have to estimate their optical flow, and their ground truth flow (.flo file). The problem is, when I read the .flo file into the program, it is a vectorized code. How do I view them like how they show in the webpage (http://vision.middlebury.edu/flow/data/)? I read from various sources and tried the following, but doesn't work.
In evaluating EPE (end point error) in what form should I have my prediction to be compared with the .flo file?
The code:
################################ Reading flow file ################################
f = open('flow10.flo', 'rb')
x = np.fromfile(f, np.int32, count=1) # not sure what this gives
w = np.fromfile(f, np.int32, count=1) # width
h = np.fromfile(f, np.int32, count=1) # height
print 'x %d, w %d, h %d flo file' % (x, w, h)
data = np.fromfile(f, np.float32) # vector
data_2D = np.reshape(data, newshape=(388,584,2)); # convert to x,y - flow
x = data_2D[...,0]; y = data_2D[...,1];
################################ visualising flow file ################################
mag, ang = cv2.cartToPolar(x,y)
hsv = np.zeros_like(x)
hsv = np.array([ hsv,hsv,hsv ])
hsv = np.reshape(hsv, (388,584,3)); # having rgb channel
hsv[...,1] = 255; # full green channel
hsv[...,0] = ang*180/np.pi/2 # angle in pi
hsv[...,2] = cv2.normalize(mag,None,0,255,cv2.NORM_MINMAX) # magnitude [0,255]
bgr = cv2.cvtColor(hsv,cv2.COLOR_HSV2BGR)
bgr = draw_hsv(data_2D)
cv2.imwrite('opticalhsv.png',bgr)
On Middlebury's page there is a zip file called flow-code (http://vision.middlebury.edu/flow/code/flow-code.zip), which provides a tool called color_flow to convert those .flo files to color images.
On the other hand, if you want to implement your own code to do the transformation, i have this piece of code (i cannot provide the original author, it has been some time) that helps you to first compute the color:
static Vec3b computeColor(float fx, float fy)
{
static bool first = true;
// relative lengths of color transitions:
// these are chosen based on perceptual similarity
// (e.g. one can distinguish more shades between red and yellow
// than between yellow and green)
const int RY = 15;
const int YG = 6;
const int GC = 4;
const int CB = 11;
const int BM = 13;
const int MR = 6;
const int NCOLS = RY + YG + GC + CB + BM + MR;
static Vec3i colorWheel[NCOLS];
if (first)
{
int k = 0;
for (int i = 0; i < RY; ++i, ++k)
colorWheel[k] = Vec3i(255, 255 * i / RY, 0);
for (int i = 0; i < YG; ++i, ++k)
colorWheel[k] = Vec3i(255 - 255 * i / YG, 255, 0);
for (int i = 0; i < GC; ++i, ++k)
colorWheel[k] = Vec3i(0, 255, 255 * i / GC);
for (int i = 0; i < CB; ++i, ++k)
colorWheel[k] = Vec3i(0, 255 - 255 * i / CB, 255);
for (int i = 0; i < BM; ++i, ++k)
colorWheel[k] = Vec3i(255 * i / BM, 0, 255);
for (int i = 0; i < MR; ++i, ++k)
colorWheel[k] = Vec3i(255, 0, 255 - 255 * i / MR);
first = false;
}
const float rad = sqrt(fx * fx + fy * fy);
const float a = atan2(-fy, -fx) / (float)CV_PI;
const float fk = (a + 1.0f) / 2.0f * (NCOLS - 1);
const int k0 = static_cast<int>(fk);
const int k1 = (k0 + 1) % NCOLS;
const float f = fk - k0;
Vec3b pix;
for (int b = 0; b < 3; b++)
{
const float col0 = colorWheel[k0][b] / 255.f;
const float col1 = colorWheel[k1][b] / 255.f;
float col = (1 - f) * col0 + f * col1;
if (rad <= 1)
col = 1 - rad * (1 - col); // increase saturation with radius
else
col *= .75; // out of range
pix[2 - b] = static_cast<uchar>(255.f * col);
}
return pix;
}
Then it calls the above function for all the pixels:
static void drawOpticalFlow(const Mat_<Point2f>& flow, Mat& dst, float maxmotion = -1)
{
dst.create(flow.size(), CV_8UC3);
dst.setTo(Scalar::all(0));
// determine motion range:
float maxrad = maxmotion;
if (maxmotion <= 0)
{
maxrad = 1;
for (int y = 0; y < flow.rows; ++y)
{
for (int x = 0; x < flow.cols; ++x)
{
Point2f u = flow(y, x);
if (!isFlowCorrect(u))
continue;
maxrad = max(maxrad, sqrt(u.x * u.x + u.y * u.y));
}
}
}
for (int y = 0; y < flow.rows; ++y)
{
for (int x = 0; x < flow.cols; ++x)
{
Point2f u = flow(y, x);
if (isFlowCorrect(u))
dst.at<Vec3b>(y, x) = computeColor(u.x / maxrad, u.y / maxrad);
}
}
}
This is for my use in OpenCV, but the code help should anyone who wants achieve something similar.

OpenCV how to smooth contour, reducing noise

I extracted the contours of an image, that you can see here:
However, it has some noise.
How can I smooth the noise? I did a close up to make clearer what I want to meant
Original image that I've used:
Code:
rMaskgray = cv2.imread('redmask.jpg', cv2.CV_LOAD_IMAGE_GRAYSCALE)
(thresh, binRed) = cv2.threshold(rMaskgray, 50, 255, cv2.THRESH_BINARY)
Rcontours, hier_r = cv2.findContours(binRed,cv2.RETR_CCOMP,cv2.CHAIN_APPROX_SIMPLE)
r_areas = [cv2.contourArea(c) for c in Rcontours]
max_rarea = np.max(r_areas)
CntExternalMask = np.ones(binRed.shape[:2], dtype="uint8") * 255
for c in Rcontours:
if(( cv2.contourArea(c) > max_rarea * 0.70) and (cv2.contourArea(c)< max_rarea)):
cv2.drawContours(CntExternalMask,[c],-1,0,1)
cv2.imwrite('contour1.jpg', CntExternalMask)
Try an upgrade to OpenCV 3.1.0. After some code adaptations for the new version as shown below, I tried it out with OpenCV version 3.1.0 and did not see any of the effects you are describing.
import cv2
import numpy as np
print cv2.__version__
rMaskgray = cv2.imread('5evOn.jpg', 0)
(thresh, binRed) = cv2.threshold(rMaskgray, 50, 255, cv2.THRESH_BINARY)
_, Rcontours, hier_r = cv2.findContours(binRed,cv2.RETR_CCOMP,cv2.CHAIN_APPROX_SIMPLE)
r_areas = [cv2.contourArea(c) for c in Rcontours]
max_rarea = np.max(r_areas)
CntExternalMask = np.ones(binRed.shape[:2], dtype="uint8") * 255
for c in Rcontours:
if(( cv2.contourArea(c) > max_rarea * 0.70) and (cv2.contourArea(c)< max_rarea)):
cv2.drawContours(CntExternalMask,[c],-1,0,1)
cv2.imwrite('contour1.jpg', CntExternalMask)
I don't know if is it ok to provide Java code - but I implemented Gaussian smoothing for openCV contour. Logic and theory is taken from here https://www.morethantechnical.com/2012/12/07/resampling-smoothing-and-interest-points-of-curves-via-css-in-opencv-w-code/
package CurveTools;
import org.apache.log4j.Logger;
import org.opencv.core.Mat;
import org.opencv.core.MatOfPoint;
import org.opencv.core.Point;
import java.util.ArrayList;
import java.util.List;
import static org.opencv.core.CvType.CV_64F;
import static org.opencv.imgproc.Imgproc.getGaussianKernel;
class CurveSmoother {
private double[] g, dg, d2g, gx, dx, d2x;
private double gx1, dgx1, d2gx1;
public double[] kappa, smoothX, smoothY;
public double[] contourX, contourY;
/* 1st and 2nd derivative of 1D gaussian */
void getGaussianDerivs(double sigma, int M) {
int L = (M - 1) / 2;
double sigma_sq = sigma * sigma;
double sigma_quad = sigma_sq * sigma_sq;
dg = new double[M];
d2g = new double[M];
g = new double[M];
Mat tmpG = getGaussianKernel(M, sigma, CV_64F);
for (double i = -L; i < L + 1.0; i += 1.0) {
int idx = (int) (i + L);
g[idx] = tmpG.get(idx, 0)[0];
// from http://www.cedar.buffalo.edu/~srihari/CSE555/Normal2.pdf
dg[idx] = -i * g[idx] / sigma_sq;
d2g[idx] = (-sigma_sq + i * i) * g[idx] / sigma_quad;
}
}
/* 1st and 2nd derivative of smoothed curve point */
void getdX(double[] x, int n, double sigma, boolean isOpen) {
int L = (g.length - 1) / 2;
gx1 = dgx1 = d2gx1 = 0.0;
for (int k = -L; k < L + 1; k++) {
double x_n_k;
if (n - k < 0) {
if (isOpen) {
//open curve - mirror values on border
x_n_k = x[-(n - k)];
} else {
//closed curve - take values from end of curve
x_n_k = x[x.length + (n - k)];
}
} else if (n - k > x.length - 1) {
if (isOpen) {
//mirror value on border
x_n_k = x[n + k];
} else {
x_n_k = x[(n - k) - x.length];
}
} else {
x_n_k = x[n - k];
}
gx1 += x_n_k * g[k + L]; //gaussians go [0 -> M-1]
dgx1 += x_n_k * dg[k + L];
d2gx1 += x_n_k * d2g[k + L];
}
}
/* 0th, 1st and 2nd derivatives of whole smoothed curve */
void getdXcurve(double[] x, double sigma, boolean isOpen) {
gx = new double[x.length];
dx = new double[x.length];
d2x = new double[x.length];
for (int i = 0; i < x.length; i++) {
getdX(x, i, sigma, isOpen);
gx[i] = gx1;
dx[i] = dgx1;
d2x[i] = d2gx1;
}
}
/*
compute curvature of curve after gaussian smoothing
from "Shape similarity retrieval under affine transforms", Mokhtarian & Abbasi 2002
curvex - x position of points
curvey - y position of points
kappa - curvature coeff for each point
sigma - gaussian sigma
*/
void computeCurveCSS(double[] curvex, double[] curvey, double sigma, boolean isOpen) {
int M = (int) Math.round((10.0 * sigma + 1.0) / 2.0) * 2 - 1;
assert (M % 2 == 1); //M is an odd number
getGaussianDerivs(sigma, M);//, g, dg, d2g
double[] X, XX, Y, YY;
getdXcurve(curvex, sigma, isOpen);
smoothX = gx.clone();
X = dx.clone();
XX = d2x.clone();
getdXcurve(curvey, sigma, isOpen);
smoothY = gx.clone();
Y = dx.clone();
YY = d2x.clone();
kappa = new double[curvex.length];
for (int i = 0; i < curvex.length; i++) {
// Mokhtarian 02' eqn (4)
kappa[i] = (X[i] * YY[i] - XX[i] * Y[i]) / Math.pow(X[i] * X[i] + Y[i] * Y[i], 1.5);
}
}
/* find zero crossings on curvature */
ArrayList<Integer> findCSSInterestPoints() {
assert (kappa != null);
ArrayList<Integer> crossings = new ArrayList<>();
for (int i = 0; i < kappa.length - 1; i++) {
if ((kappa[i] < 0.0 && kappa[i + 1] > 0.0) || kappa[i] > 0.0 && kappa[i + 1] < 0.0) {
crossings.add(i);
}
}
return crossings;
}
public void polyLineSplit(MatOfPoint pl) {
contourX = new double[pl.height()];
contourY = new double[pl.height()];
for (int j = 0; j < contourX.length; j++) {
contourX[j] = pl.get(j, 0)[0];
contourY[j] = pl.get(j, 0)[1];
}
}
public MatOfPoint polyLineMerge(double[] xContour, double[] yContour) {
assert (xContour.length == yContour.length);
MatOfPoint pl = new MatOfPoint();
List<Point> list = new ArrayList<>();
for (int j = 0; j < xContour.length; j++)
list.add(new Point(xContour[j], yContour[j]));
pl.fromList(list);
return pl;
}
MatOfPoint smoothCurve(MatOfPoint curve, double sigma) {
int M = (int) Math.round((10.0 * sigma + 1.0) / 2.0) * 2 - 1;
assert (M % 2 == 1); //M is an odd number
//create kernels
getGaussianDerivs(sigma, M);
polyLineSplit(curve);
getdXcurve(contourX, sigma, false);
smoothX = gx.clone();
getdXcurve(contourY, sigma, false);
smoothY = gx;
Logger.getRootLogger().info("Smooth curve len: " + smoothX.length);
return polyLineMerge(smoothX, smoothY);
}
}

How, when and what to vectorize in python?

Right, so this is basically a follow up of an earlier question of mine. I have some binary data that are in floating point binary format. Using C, the process is fast, but I lose some precision with atof(). I tried looking through the forum, and also elsewhere, but my problem was not solved. As such, I moved to python. Ah joy! the program worked perfectly well, but is so very slow compared to C. I looked up optimizations on python, which pointed me to Cython and Weave, but I have some doubts. If you will follow my code, I am confused where to apply the optimizing C code, since I am reading from the numpy object. My question, is it possible to read data using numpy functions within the Cython, and if so, please provide a small example.
The C Code uses PolSARpro's header files, and libbmp for creating the .bmp file
As a note, I am posting both my codes. God knows I had to go through a lot just to get the formulas working. This way, others in need can give their thoughts and input too :)
C Code (Working, but atof() loses precision, thus output lat long are slightly off)
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <math.h>
#include <polSARpro/bmpfile.c>
#include <polSARpro/graphics.c>
#include <polSARpro/matrix.c>
#include <polSARpro/processing.c>
#include <polSARpro/util.c>
#define METAL_THRESHOLD 5.000000
#define POLARIZATION_FRACTION_THRESHOLD 0.900000
#define PI 3.14159265
#define FOURTHPI PI/4
#define deg2rad PI/180
#define rad2deg 180./PI
/*double PI = 3.14159265;
double FOURTHPI = PI / 4;
double deg2rad = PI / 180;
double rad2deg = 180.0 / PI;*/
FILE *L1,*PF,*SPF;
FILE *txt;
FILE *finalLocations;
long i=0,loop_end;
int lig,col;
float l1,pf,spf;
long pos;
int Nlig,Ncol;
float *bufferout;
float *bufferin_L1,*bufferin_L2;
float valueL1,valuePF,xx;
float sizeGridX, sizeGridY, startX, startY;
float posX,posY;
int ZONE;
char Heading[10];
char setZone[15];
int p[4][2];
int degree, minute, second;
void UTM2LL(int ReferenceEllipsoid, double UTMNorthing, double UTMEasting, char* UTMZone, double *Lat, double *Long)
{
//converts UTM coords to lat/long. Equations from USGS Bulletin 1532
//East Longitudes are positive, West longitudes are negative.
//North latitudes are positive, South latitudes are negative
//Lat and Long are in decimal degrees.
//Written by Chuck Gantz- chuck.gantz#globalstar.com
double k0 = 0.9996;
double a = 6378137;
double eccSquared = 0.00669438;
double eccPrimeSquared;
double e1 = (1-sqrt(1-eccSquared))/(1+sqrt(1-eccSquared));
double N1, T1, C1, R1, D, M;
double LongOrigin;
double mu, phi1, phi1Rad;
double x, y;
int ZoneNumber;
char* ZoneLetter;
int NorthernHemisphere; //1 for northern hemispher, 0 for southern
x = UTMEasting - 500000.0; //remove 500,000 meter offset for longitude
y = UTMNorthing;
ZoneNumber = strtoul(UTMZone, &ZoneLetter, 10);
if((*ZoneLetter - 'N') >= 0)
NorthernHemisphere = 1;//point is in northern hemisphere
else
{
NorthernHemisphere = 0;//point is in southern hemisphere
y -= 10000000.0;//remove 10,000,000 meter offset used for southern hemisphere
}
LongOrigin = (ZoneNumber - 1)*6 - 180 + 3; //+3 puts origin in middle of zone
eccPrimeSquared = (eccSquared)/(1-eccSquared);
M = y / k0;
mu = M/(a*(1-eccSquared/4-3*eccSquared*eccSquared/64-5*eccSquared*eccSquared*eccSquared/256));
phi1Rad = mu + (3*e1/2-27*e1*e1*e1/32)*sin(2*mu)
+ (21*e1*e1/16-55*e1*e1*e1*e1/32)*sin(4*mu)
+(151*e1*e1*e1/96)*sin(6*mu);
phi1 = phi1Rad*rad2deg;
N1 = a/sqrt(1-eccSquared*sin(phi1Rad)*sin(phi1Rad));
T1 = tan(phi1Rad)*tan(phi1Rad);
C1 = eccPrimeSquared*cos(phi1Rad)*cos(phi1Rad);
R1 = a*(1-eccSquared)/pow(1-eccSquared*sin(phi1Rad)*sin(phi1Rad), 1.5);
D = x/(N1*k0);
*Lat = phi1Rad - (N1*tan(phi1Rad)/R1)*(D*D/2-(5+3*T1+10*C1-4*C1*C1-9*eccPrimeSquared)*D*D*D*D/24
+(61+90*T1+298*C1+45*T1*T1-252*eccPrimeSquared-3*C1*C1)*D*D*D*D*D*D/720);
*Lat = *Lat * rad2deg;
*Long = (D-(1+2*T1+C1)*D*D*D/6+(5-2*C1+28*T1-3*C1*C1+8*eccPrimeSquared+24*T1*T1)
*D*D*D*D*D/120)/cos(phi1Rad);
*Long = LongOrigin + *Long * rad2deg;
}
void convertToDegree(float decimal)
{
int negative = decimal < 0;
decimal = abs(decimal);
minute = (decimal * 3600/ 60);
second = fmodf((decimal * 3600),60);
degree = minute / 60;
minute = minute % 60;
if (negative)
{
if (degree > 0)
degree = -degree;
else if (minute > 0)
minute = -minute;
else
second = -second;
}
}
void readConfig(int *Row, int *Col)
{
char tmp[70];
int i=0;
FILE *fp = fopen("config.txt","r");
if(fp == NULL)
{
perror("Config.txt");
exit(1);
}
while(!feof(fp))
{
fgets(tmp,70,fp);
if (i==1)
*Row = atoi(tmp);
if(i==4)
*Col = atoi(tmp);
i++;
}
fclose(fp);
}
void readHDR(float *gridX,float *gridY,float *startXPos,float *startYPos)
{
FILE *fp = fopen("PF.bin.hdr","r");
int i=0;
char tmp[255];
char junk[255];
memset(tmp,0X00,sizeof(tmp));
memset(junk,0X00,sizeof(junk));
if(fp==NULL)
{
perror("Please locate or create PF.bin.hdr");
exit(0);
}
while(!feof(fp))
{
if(i==13)
break;
fgets(tmp,255,fp);
i++;
}
fclose(fp);
strcpy(junk,strtok(tmp,","));
strtok(NULL,",");
strtok(NULL,",");
strcpy(tmp,strtok(NULL,","));
//puts(tmp);
*startXPos = atof(tmp);
strcpy(tmp,strtok(NULL,","));
//puts(tmp);
*startYPos = atof(tmp);
strcpy(tmp,strtok(NULL,","));
//puts(tmp);
*gridX = atof(tmp);
strcpy(tmp,strtok(NULL,","));
//puts(tmp);
*gridY = atof(tmp);
strcpy(tmp,strtok(NULL,","));
ZONE = atoi(tmp);
strcpy(tmp,strtok(NULL,","));
strcpy(Heading,tmp);
}
int main()
{
bmpfile_t *bmp;
double Lat;
double Long;
int i;
rgb_pixel_t pixelMetal = {128, 64, 0, 0};
rgb_pixel_t pixelOthers = {128, 64, 0, 0};
readConfig(&Nlig,&Ncol);
readHDR(&sizeGridX,&sizeGridY,&startX,&startY);
//startX = startX - (double) 0.012000;
//startY = startY + (double)0.111000;
printf("Enter the rectangle's top-left and bottom-right region of interest points as: x y\n");
for(i=0;i<2;i++)
{
printf("Enter point %d::\t",i+1);
scanf("%d %d",&p[i][0], &p[i][1]);
}
printf("Grid Size(X,Y)::( %f,%f ), Start Positions(X,Y)::( %f, %f ), ZONE::%d, Heading:: %s\n\n",sizeGridX,sizeGridY,startX,startY,ZONE,Heading);
pixelMetal.red = 255;
pixelMetal.blue = 010;
pixelMetal.green = 010;
pixelOthers.red = 8;
pixelOthers.blue = 8;
pixelOthers.green = 8;
L1 = fopen("l1.bin","rb");
PF =fopen("PF.bin","rb");
SPF = fopen("SPF_L1.bin","wb");
//txt = fopen("locations(UTM).txt","w");
finalLocations = fopen("locationsROI.txt","w");
if(L1==NULL || PF==NULL || SPF==NULL || finalLocations == NULL)
{
perror("Error in opening files!");
return -1;
}
fseek(L1,0,SEEK_END);
pos = ftell(L1);
loop_end = pos;
printf("L1.bin contains::\t%ld elements\n",pos);
fseek(PF,0,SEEK_END);
pos = ftell(PF);
printf("PF.bin contains::\t%ld elements\n",pos);
fseek(L1,0,SEEK_SET);
fseek(PF,0,SEEK_SET);
bmp = bmp_create(Ncol,Nlig,8); //width * height
bufferin_L1 = vector_float(Ncol);
bufferin_L2 = vector_float(Ncol);
bufferout = vector_float(Ncol);
printf("Resources Allocated. Beginning...\n");
for (lig = 0; lig < Nlig; lig++) /* rows */
{
if (lig%(int)(Nlig/20) == 0)
{
printf("%f\r", 100. * lig / (Nlig - 1));
fflush(stdout);
}
fread(&bufferin_L1[0], sizeof(float), Ncol, L1);
fread(&bufferin_L2[0], sizeof(float), Ncol, PF);
for (col = 0; col < Ncol; col++) /* columns */
{
valueL1 = bufferin_L1[col];
valuePF = bufferin_L2[col];
if(valueL1 >= METAL_THRESHOLD && valuePF >= POLARIZATION_FRACTION_THRESHOLD)
{
if(col >= p[0][0] && col <= p[1][0] && lig >= p[0][1] && lig <= p[1][1])
{
xx = fabs(valueL1 + valuePF);
bmp_set_pixel(bmp,col,lig,pixelMetal);
posX = startX + (sizeGridX * col);
posY = startY - (sizeGridY * lig);
//fprintf(txt,"%f %f %d %s\n",posX,posY,ZONE,Heading);
sprintf(setZone,"%d",ZONE);
if(strstr(Heading,"Nor")!=NULL)
strcat(setZone,"N");
else
strcat(setZone,"S");
UTM2LL(23, posY, posX, setZone, &Lat, &Long); // 23 for WGS-84
convertToDegree(Lat);
//fprintf(finalLocations,"UTM:: %.2fE %.2fN , Decimal: %f %f , Degree: %d %d %d, ",posX,posY,Lat,Long,degree,minute,second);
//fprintf(finalLocations,"%.2fE,%.2fN,%f,%f ,%d,%d,%d,",posX,posY,Lat,Long,degree,minute,second);
fprintf(finalLocations,"%.2f,%.2f,%f,%f ,%d,%d,%d,",posX,posY,Lat,Long,degree,minute,second);
convertToDegree(Long);
fprintf(finalLocations,"%d,%d,%d\n",degree,minute,second);
}
else
{
xx = fabs(valueL1) ;
bmp_set_pixel(bmp,col,lig,pixelOthers);
}
}
else
{
xx = fabs(valueL1) ;
bmp_set_pixel(bmp,col,lig,pixelOthers);
}
bufferout[col] = xx;
}
fwrite(&bufferout[0], sizeof(float), Ncol, SPF);
}
free_vector_float(bufferout);
fclose(L1);
fclose(PF);
fclose(SPF);
//fclose(txt);
fclose(finalLocations);
printf("\n----------Writing BMP File!----------\n");
bmp_save(bmp,"SPF_L1(ROI).bmp");
bmp_destroy(bmp);
printf("\nDone!\n");
}
As well as the Python code::
# -*- coding: utf-8 -*-
"""
Created on Wed Apr 10 10:29:18 2013
#author: Binayaka
"""
import numpy as Num;
import math;
import array;
class readConfiguration(object):
def __init__(self,x):
self.readConfig(x);
def readConfig(self,x):
try:
crs = open(x,'r');
srs = open('config.txt','r');
except IOError:
print "Files missing!";
else:
rows = crs.readlines();
values = rows[12].split(',');
rows = srs.readlines();
self.startX = float(values[3]);
self.startY = float(values[4]);
self.gridSizeX = float(values[5]);
self.gridSizeY = float(values[6]);
self.Zone = int(values[7]);
self.Hemisphere = values[8];
self.NRows = int(rows[1].strip());
self.NCols = int(rows[4].strip());
self.MetalThreshold = 5.000000;
self.PFThreshold = 0.900000;
self.rad2deg = 180/math.pi;
self.deg2rad = math.pi/180;
self.FOURTHPI = math.pi/4;
crs.close();
srs.close();
def decdeg2dms(dd):
negative = dd < 0;
dd = abs(dd);
minutes,seconds = divmod(dd*3600,60);
degrees,minutes = divmod(minutes,60);
if negative:
if degrees > 0:
degrees = -degrees;
elif minutes > 0:
minutes = -minutes;
else:
seconds = -seconds;
return (degrees,minutes,seconds);
def UTM2LL(self,UTMEasting, UTMNorthing):
k0 = 0.9996;
a = 6378137;
eccSquared = 0.00669438;
e1 = (1-math.sqrt(1-eccSquared))/(1+math.sqrt(1-eccSquared));
x = UTMEasting - 500000.0;#remove 500,000 meter offset for longitude
y = UTMNorthing;
if self.Hemisphere == "North":
self.Hemi = 1;
else:
self.Hemi = -1;
y -= 10000000.0;
LongOrigin = (self.Zone - 1)*6 - 180 + 3;
eccPrimeSquared = (eccSquared)/(1-eccSquared);
M = y / k0;
mu = M/(a*(1-eccSquared/4-3*eccSquared*eccSquared/64-5*eccSquared*eccSquared*eccSquared/256));
phi1Rad = mu + (3*e1/2-27*e1*e1*e1/32)*math.sin(2*mu) + (21*e1*e1/16-55*e1*e1*e1*e1/32)*math.sin(4*mu) +(151*e1*e1*e1/96)*math.sin(6*mu);
#phi1 = phi1Rad*self.rad2deg;
N1 = a/math.sqrt(1-eccSquared*math.sin(phi1Rad)*math.sin(phi1Rad));
T1 = math.tan(phi1Rad)*math.tan(phi1Rad);
C1 = eccPrimeSquared*math.cos(phi1Rad)*math.cos(phi1Rad);
R1 = a*(1-eccSquared)/pow(1-eccSquared*math.sin(phi1Rad)*math.sin(phi1Rad), 1.5);
D = x/(N1*k0);
self.Lat = phi1Rad - (N1*math.tan(phi1Rad)/R1)*(D*D/2-(5+3*T1+10*C1-4*C1*C1-9*eccPrimeSquared)*D*D*D*D/24 +(61+90*T1+298*C1+45*T1*T1-252*eccPrimeSquared-3*C1*C1)*D*D*D*D*D*D/720);
self.Lat = self.Lat * self.rad2deg;
self.Long = (D-(1+2*T1+C1)*D*D*D/6+(5-2*C1+28*T1-3*C1*C1+8*eccPrimeSquared+24*T1*T1)*D*D*D*D*D/120)/math.cos(phi1Rad);
self.Long = LongOrigin + self.Long * self.rad2deg;
def printConfiguration(self):
""" Just to check whether our reading was correct """
print "Metal Threshold:\t" + str(self.MetalThreshold);
print "PF Threshold:\t" + str(self.PFThreshold);
print "Start X:\t" + str(self.startX);
print "Start Y:\t" + str(self.startY);
print "Grid size(X) :\t" + str(self.gridSizeX);
print "Grid size(Y) :\t" + str(self.gridSizeY);
def createROIfile(self,ROIFilename):
firstPoint = raw_input('Enter topLeft point coord\t').split();
secondPoint = raw_input('Enter bottomRight point coord\t').split();
try:
L1 = open('l1.bin','rb');
PF = open('PF.bin','rb');
SPF = open('pySPF_L1.bin','wb');
targetFilename = open(ROIFilename,'w');
except IOError:
print "Files Missing!";
else:
L1.seek(0,2);
elementsL1 = L1.tell();
L1.seek(0,0);
PF.seek(0,2);
elementsPF = PF.tell();
PF.seek(0,0);
print "L1.bin contains\t" + str(elementsL1) + " elements";
print "PF.bin contains\t" + str(elementsPF) + " elements";
binvaluesL1 = array.array('f');
binvaluesPF = array.array('f');
binvaluesSPF = array.array('f');
for row in range(0,self.NRows):
binvaluesL1.read(L1,self.NCols);
binvaluesPF.read(PF,self.NCols);
dataL1 = Num.array(binvaluesL1, dtype=Num.float);
dataPF = Num.array(binvaluesPF, dtype=Num.float);
dataSPF = dataL1 + dataPF;
binvaluesSPF.fromlist(Num.array(dataSPF).tolist());
for col in range(0,self.NCols):
if(dataL1[col] >= self.MetalThreshold and dataPF[col] >= self.PFThreshold):
if(col >= int(firstPoint[0]) and col <= int(secondPoint[0]) and row >= int(firstPoint[1]) and row <= int(secondPoint[1])):
posX = self.startX + (self.gridSizeX * col);
posY = self.startY - (self.gridSizeY * row);
self.UTM2LL(posY,posX);
tmp1 = self.decdeg2dms(posY);
tmp2 = self.decdeg2dms(posX);
strTarget = "Decimal Degree:: " + str(posX) + "E " + str(posY) + "N \t Lat long:: " + str(tmp1) + " " + str(tmp2) + "\n";
targetFilename.write(strTarget);
binvaluesSPF.tofile(SPF);
L1.close();
PF.close();
SPF.close();
targetFilename.close();
print "Done!";
dimensions = readConfiguration('PF.bin.hdr');
dimensions.printConfiguration();
dimensions.createROIfile('testPythonROI.txt');
Its the Python code that needs Optimization, as the values of NRows and NCols can and do reach the order of thousands.
A few general comments:
With python, it's really best to stick to PEP8 for a multitude of reasons. Python programmers are particularly picky about readability and essentially universally adhere to the community coding guidelines (PEP8). Avoid camelCase, keep lines below 80 columns, leave the semicolons out, and feel free to occasionally ignore these guidelines where they'd make things less readable.
There's no need for the builtin array type here if you're using numpy. I'm confused why you're constantly converting back and forth...
Use a projection library. Specify what datum and ellipsoid you're using, otherwise the coordinates (easting/northing or lat/long) have absolutely no meaning.
Don't use one big class as a hold-all for unrelated things. There's nothing wrong with just having a few functions. You don't need to make it into a class unless it makes sense to do so.
Use vectorized operations with numpy arrays.
Here's what would appear to be your performance bottleneck:
for row in range(0,self.NRows):
binvaluesL1.read(L1,self.NCols);
binvaluesPF.read(PF,self.NCols);
dataL1 = Num.array(binvaluesL1, dtype=Num.float);
dataPF = Num.array(binvaluesPF, dtype=Num.float);
dataSPF = dataL1 + dataPF;
binvaluesSPF.fromlist(Num.array(dataSPF).tolist());
for col in range(0,self.NCols):
if(dataL1[col] >= self.MetalThreshold and dataPF[col] >= self.PFThreshold):
if(col >= int(firstPoint[0]) and col <= int(secondPoint[0]) and row >= int(firstPoint[1]) and row <= int(secondPoint[1])):
posX = self.startX + (self.gridSizeX * col);
posY = self.startY - (self.gridSizeY * row);
self.UTM2LL(posY,posX);
tmp1 = self.decdeg2dms(posY);
tmp2 = self.decdeg2dms(posX);
strTarget = "Decimal Degree:: " + str(posX) + "E " + str(posY) + "N \t Lat long:: " + str(tmp1) + " " + str(tmp2) + "\n";
targetFilename.write(strTarget);
binvaluesSPF.tofile(SPF);
One of your biggest problems is the way you're reading in your data. You're constantly reading things in as one thing, then converting that to a list, then converting that to a numpy array. There's absolutely no need to jump through all those hoops. Numpy will unpack your binary floats for you just like array will.
Just do grid = np.fromfile(yourfile, dtype=np.float32).reshape(ncols, nrows). (Outside the loop.)
After that, your nested loops can be easily vectorized and expressed with just a few lines of code.
Here's how I would write your code. This probably won't run as-is, as I can't test it with your data. However, it should give you some general ideas.
import numpy as np
import pyproj
def main():
config = Config('PF.bin.hdr')
grid1, grid2 = load_data('l1.bin', 'PF.bin', config.nrows, config.ncols)
spf = grid1 + grid2
spf.tofile('pySPF_L1.bin')
easting_aoi, northing_aoi = subset_data(grid1, grid2, config)
save_selected_region(easting_aoi, northing_aoi, config.zone,
'testPythonROI.txt')
def load_data(filename1, filename2, nrows, ncols):
"""It would really be good to use more descriptive variable names than "L1"
and "PF". I have no idea what L1 and PF are, so I'm just calling them
grid1 and grid2."""
grid1 = np.fromfile(filename1, dtype=np.float32).reshape(nrows, ncols)
grid2 = np.fromfile(filename2, dtype=np.float32).reshape(nrows, ncols)
return grid1, grid2
def subset_data(grid1, grid2, config):
"""Select points that satisfy some threshold criteria (explain??) and are
within a user-specified rectangular AOI."""
northing, easting = np.mgrid[:config.nrows, :config.ncols]
easting = config.xstart + config.xgridsize * easting
northing = config.ystart + config.ygridsize * northing
grids = grid1, grid2, easting, northing
grid1, grid2, easting, northing = [item[config.user_aoi] for item in grids]
mask = (grid1 >= config.metal_threshold) & (grid2 >= config.pf_threshold)
return easting[mask], northing[mask]
def save_selected_region(easting, northing, zone, filename):
"""Convert the given eastings and northings (in UTM zone "zone") to
lat/long and save to a tab-delimited-text file."""
lat, lon = utm2geographic(easting, northing, zone)
data = np.vstack([easting, northing, lat, lon]).T
with open(filename, 'w') as outfile:
outfile.write('Easting\tNorthing\tLatitude\tLongitude\n')
np.savetxt(outfile, data, delimiter='\t')
def utm2geographic(easting, northing, zone):
"""We need to know which datum/ellipsoid the UTM coords are in as well!!!!
I'm assuming it's a Clark 1866 ellipsoid, based on the numbers in your
code..."""
utm = pyproj.Proj(proj='utm', zone=zone, ellip='clrk66')
geographic = pyproj.Proj(proj='latlong', ellip='clrk66')
return pyproj.transform(utm, geographic, easting, northing)
class Config(object):
"""Read and store configuration values for (something?)."""
config_file = 'config.txt'
def __init__(self, filename):
"""You should add docstrings to clarify what you're expecting
"filename" to contain."""
with open(filename, 'r') as infile:
crs_values = list(infile)[12].split(',')
crs_values = [float(item) for item in crs_values]
self.xstart, self.ystart = crs_values[3:5]
self.xgridsize, self.ygridsize = crs_values[5:7]
self.zone = int(crs_values[7])
with open(self.config_file, 'r') as infile:
srs_values = list(infile)
self.nrows, self.ncols = srs_values[1], srs_values[4]
# It would be good to explain a bit about these (say, units, etc)
self.metal_threshold = 5.0
self.pf_threshold = 0.9
self.user_aoi = self.read_user_aoi()
def read_user_aoi(self):
"""Get an area of interest of the grids in pixel coordinates."""
top_left = raw_input('Enter top left index\t')
bottom_right = raw_input('Enter bottom right index\t')
min_i, min_j = [int(item) for item in top_left.split()]
max_i, max_j = [int(item) for item in bottom_right.split()]
return slice(min_i, max_i), slice(min_j, max_j)
if __name__ == '__main__':
main()

Categories

Resources