I'm trying to learn more about the Laplace transform, so I've tried to implement the forward and inverse (Mellin's inverse formula) transforms in code (approximated using the trapezium rule). I would expect to get roughly the same information back out when doing the forward and inverse one after the other. However, the output values appear to have nothing to do with the input data.
CODE:
# Dependencies:
from math import ceil
from cmath import *
import numpy as np
# Constants
j = complex(0, 1)
e = exp(1).real
# Default Values
sigma_default = 0 # Real component. When 0, the result is the Fourier transform
# Forward Transform - Time Domain to Laplace Domain
def Laplace(data, is_inverse, sigma=sigma_default, frequency_stamps=None, time_stamps=None):
# Resolve empty data scenario
data = np.asarray(data)
if data.size <= 1:
return data
# Add time data if missing
if time_stamps is None:
if is_inverse is False:
time_stamps = np.arange(0, data.size)
else:
time_stamps = np.arange(0, data.size * 2)
else:
time_stamps = np.asarray(time_stamps).real
if time_stamps.size is not data.size:
time_stamps = np.arange(0, data.size)
# Add frequency stamps if missing
if frequency_stamps is None:
if is_inverse is False:
frequency_stamps = np.asarray(np.arange(0, ceil(data.size / 2))).real * 2 * pi # Added forgotten constant
else:
frequency_stamps = np.asarray(np.arange(0, ceil(data.size))).real * 2 * pi # Added forgotten constant
else:
frequency_stamps = np.asarray(frequency_stamps).real
frequency_stamps = sigma + frequency_stamps * j
# Create the vector of powers exp(1) is raised to. Also create the delta times / frequencies
if is_inverse is False:
power = -Get_Powers(time_stamps, frequency_stamps)
delta = np.diff(time_stamps)
else:
power = Get_Powers(frequency_stamps, time_stamps)
delta = np.diff(frequency_stamps)
delta = np.concatenate([[np.average(delta)], delta]) # Ensure a start value is present
# Perform a numerical approximation of the Laplace transform
laplace = data * np.power(e, power) * delta
# Trapezium rule => average 1st and last wrt zero
laplace = laplace.transpose() # Fixed bug in trapezium rule implementation
laplace[[0, -1]] *= 0.5
laplace = laplace.transpose()
laplace = np.sum(laplace, 1) # Integrate
# If inverse function, then normalise and ensure the result is real
if is_inverse is True:
laplace *= 1 / (2 * pi * j) # Scale
laplace = laplace.real # Ensure time series is real only
# Return the result
return laplace
# Used to derive the vector of powers exp(1) is to be raised to
def Get_Powers(values1, values2):
# For forward Laplace, 1 = time, 2 = frequency
# For inverse Laplace, 1 = frequency, 2 = time
power = np.ones([values1.size, values2.size])
power = (power * values2).transpose() * values1
return power
if __name__ == "__main__":
# a = [0, 1, 2, 3, 4, 5]
a = np.arange(0, 10)
b = Laplace(a, False)
c = Laplace(b, True)
print(np.asarray(a))
print(c)
EXPECTED RESULT:
[0 1 2 3 4 5 6 7 8 9]
[0 1 2 3 4 5 6 7 8 9]
ACTUAL RESULT:
[0 1 2 3 4 5 6 7 8 9]
[162. 162. 162. 162. 162. 162. 162. 162. 162. 162.]
Any ideas where I've gone awry?
EDIT 1: Added Laplace functions:
Forwards transform:
Inverse transform:
Definition of s:
Where omega is represented as frequency_stamps in my code. When sigma = 0 the system becomes the Fourier transform.
EDIT 2: Fixed two bugs. Problem still persists
Besides the two bug fixes made in the original question, there were a further 3 bugs left that I identified via Cris Luengo's suggestion to look into the conversion from the Fourier Transform into the Discrete Fourier Transform. A summary of all bug fixes is below:
Fixed a bug in how I implemented the trapezium rule.
Scaled the frequency_stamps by 2*pi to reflect the underlying circular nature of the Laplace data.
Rescaled the frequency_stamps again such that they only travel around a circle once (aka. the data is in the range 0 -> 2*pi).
Fixed a mistake where I'd assumed that there only needed to be half as many frequency points than time points. That's wrong. There should be an equal amount of both.
Allowed the passing of initial and final time series points for the inverse transform as the data otherwise gets corrupted.
Updated Code:
# Dependencies:
from cmath import *
import numpy as np
# Constants
j = complex(0, 1)
e = exp(1).real
# Default Values
sigma_default = 0.0 # Real component. When 0, the result is the Fourier transform
ends_default = np.asarray([0, 0])
# Forward Transform - Time Domain to Laplace Domain
def Laplace(data, is_inverse, sigma=sigma_default, frequency_stamps=None, time_stamps=None, ends=ends_default):
# Resolve empty data scenario
data = np.asarray(data)
if data.size <= 1:
return data
# Add time data if missing
if time_stamps is None:
time_stamps = np.arange(0, data.size) # Size doesn't change between forward and inverse
else:
time_stamps = np.asarray(time_stamps).real
if time_stamps.size is not data.size:
time_stamps = np.arange(0, data.size)
# Add frequency stamps if missing
if frequency_stamps is None:
frequency_stamps = np.asarray(np.arange(0.0, data.size)).real # Size doesn't change between forward and inverse
frequency_stamps *= 2 * pi / np.max(frequency_stamps) # Restrict the integral range to 0 -> 2pi
else:
frequency_stamps = np.asarray(frequency_stamps).real
frequency_stamps = sigma + frequency_stamps * j
# Create the vector of powers exp(1) is raised to. Also create the delta times / frequencies
if is_inverse is False:
power = -Get_Powers(time_stamps, frequency_stamps)
delta = np.diff(time_stamps)
else:
power = Get_Powers(frequency_stamps, time_stamps)
delta = np.diff(frequency_stamps)
delta = np.concatenate([[np.average(delta)], delta]) # Ensure a start value is present
# Perform a numerical approximation of the Laplace transform
laplace = data * np.power(e, power) * delta
laplace = laplace.transpose()
laplace[[0, -1]] *= 0.5 # Trapezium rule => average 1st and last wrt zero
laplace = laplace.transpose()
laplace = np.sum(laplace, 1) # Integrate
# If inverse function, then normalise and ensure the result is real
if is_inverse is True:
laplace *= 1 / (2 * pi * j) # Scale
laplace = laplace.real # Ensure time series is real only
# Correct for edge cases
laplace[0] = ends[0]
laplace[-1] = ends[-1]
# Return the result
return laplace
# Used to derive the vector of powers exp(1) is to be raised to
def Get_Powers(values1, values2):
# For forward Laplace, 1 = time, 2 = frequency
# For inverse Laplace, 1 = frequency, 2 = time
power = np.ones([values1.size, values2.size])
power = (power * values2).transpose() * values1
return power
if __name__ == "__main__":
a = np.arange(3, 13)
b = Laplace(a, False, sigma=0.5)
c = Laplace(b, True, sigma=0.5, ends=np.asarray([3, 12]))
print(np.asarray(a))
print(c)
Output
[ 3 4 5 6 7 8 9 10 11 12]
[ 3. 4. 5. 6. 7. 8. 9. 10. 11. 12.]
Thanks for the assist!
Related
I'm trying to calculate the gradient of a given function by the following definition:
I'm trying to do that with the following code:
def numerical_diff_gradient(func, vector: np.matrix, epsilon):
vec_len = vector.shape[0]
assert vector.shape[1] == 1
assert vec_len > 0
gradient = np.matrix(np.zeros((vec_len, 1)), dtype=np.float64)
for i in range(vec_len):
base_vector = np.matrix(np.zeros((vec_len, 1)), dtype=np.float64)
base_vector[i, 0] = 1
func_plus = func(vector + (epsilon * base_vector))
func_minus = func(vector - (epsilon * base_vector))
gradient[i, 0] = ((func_plus - func_minus) / (2 * epsilon))
return gradient
The problem is that I cannot handle very small values since epsilon is between 0 to 2e-60,
hence the variables func_plus and func_minus get the same value and as a result I get that the gradient is a zero vector. How can I represent very small values and keep on their accuracy?
I have two solutions to this problem actually, they are both applied below to a test case. The thing is that none of them is perfect: first one only take into account the two end points, the other one can't be made "arbitrarily smooth": there is a limit in the amount of smoothness one can achieve (the one I am showing).
I am sure there is a better solution, that kind-of go from the first solution to the other and all the way to no smoothing at all. It may already be implemented somewhere. Maybe solving a minimization problem with an arbitrary number of splines equidistributed?
Thank you very much for your help
Ps: the seed used is a challenging one
import matplotlib.pyplot as plt
from scipy import interpolate
from scipy.signal import savgol_filter
import numpy as np
import random
def scipy_bspline(cv, n=100, degree=3):
""" Calculate n samples on a bspline
cv : Array ov control vertices
n : Number of samples to return
degree: Curve degree
"""
cv = np.asarray(cv)
count = cv.shape[0]
degree = np.clip(degree,1,count-1)
kv = np.clip(np.arange(count+degree+1)-degree,0,count-degree)
# Return samples
max_param = count - (degree * (1-periodic))
spl = interpolate.BSpline(kv, cv, degree)
return spl(np.linspace(0,max_param,n))
def round_up_to_odd(f):
return np.int(np.ceil(f / 2.) * 2 + 1)
def generateRandomSignal(n=1000, seed=None):
"""
Parameters
----------
n : integer, optional
Number of points in the signal. The default is 1000.
Returns
-------
sig : numpy array
"""
np.random.seed(seed)
print("Seed was:", seed)
steps = np.random.choice(a=[-1, 0, 1], size=(n-1))
roughSig = np.concatenate([np.array([0]), steps]).cumsum(0)
sig = savgol_filter(roughSig, round_up_to_odd(n/10), 6)
return sig
# Generate a random signal to illustrate my point
n = 1000
t = np.linspace(0, 10, n)
seed = 45136. # Challenging seed
sig = generateRandomSignal(n=1000, seed=seed)
sigInit = np.copy(sig)
# Add noise to the signal
mean = 0
std = sig.max()/3.0
num_samples = n/5
idxMin = n/2-100
idxMax = idxMin + num_samples
tCut = t[idxMin+1:idxMax]
noise = np.random.normal(mean, std, size=num_samples-1) + 2*std*np.sin(2.0*np.pi*tCut/0.4)
sig[idxMin+1:idxMax] += noise
# Define filtering range enclosing the noisy area of the signal
idxMin -= 20
idxMax += 20
# Extreme filtering solution
# Spline between first and last points, the points in between have no influence
sigTrim = np.delete(sig, np.arange(idxMin,idxMax))
tTrim = np.delete(t, np.arange(idxMin,idxMax))
f = interpolate.interp1d(tTrim, sigTrim, kind='quadratic')
sigSmooth1 = f(t)
# My attempt. Not bad but not perfect because there is a limit in the maximum
# amount of smoothing we can add (degree=len(tSlice) is the maximum)
# If I could do degree=10*len(tSlice) and converging to the first solution
# I would be done!
sigSlice = sig[idxMin:idxMax]
tSlice = t[idxMin:idxMax]
cv = np.stack((tSlice, sigSlice)).T
p = scipy_bspline(cv, n=len(tSlice), degree=len(tSlice))
tSlice = p.T[0]
sigSliceSmooth = p.T[1]
sigSmooth2 = np.copy(sig)
sigSmooth2[idxMin:idxMax] = sigSliceSmooth
# Plot
plt.figure()
plt.plot(t, sig, label="Signal")
plt.plot(t, sigSmooth1, label="Solution 1")
plt.plot(t, sigSmooth2, label="Solution 2")
plt.plot(t[idxMin:idxMax], sigInit[idxMin:idxMax], label="What I'd want (kind of, smoother will be even better actually)")
plt.plot([t[idxMin],t[idxMax]], [sig[idxMin],sig[idxMax]],"o")
plt.legend()
plt.show()
sys.exit()
Yes, a minimization is a good way to approach this smoothing problem.
Least squares problem
Here is a suggestion for a least squares formulation: let s[0], ..., s[N] denote the N+1 samples of the given signal to smooth, and let L and R be the desired slopes to preserve at the left and right endpoints. Find the smoothed signal u[0], ..., u[N] as the minimizer of
min_u (1/2) sum_n (u[n] - s[n])² + (λ/2) sum_n (u[n+1] - 2 u[n] + u[n-1])²
subject to
s[0] = u[0], s[N] = u[N] (value constraints),
L = u[1] - u[0], R = u[N] - u[N-1] (slope constraints),
where in the minimization objective, the sums are over n = 1, ..., N-1 and λ is a positive parameter controlling the smoothing strength. The first term tries to keep the solution close to the original signal, and the second term penalizes u for bending to encourage a smooth solution.
The slope constraints require that
u[1] = L + u[0] = L + s[0] and u[N-1] = u[N] - R = s[N] - R. So we can consider the minimization as over only the interior samples u[2], ..., u[N-2].
Finding the minimizer
The minimizer satisfies the Euler–Lagrange equations
(u[n] - s[n]) / λ + (u[n+2] - 4 u[n+1] + 6 u[n] - 4 u[n-1] + u[n-2]) = 0
for n = 2, ..., N-2.
An easy way to find an approximate solution is by gradient descent: initialize u = np.copy(s), set u[1] = L + s[0] and u[N-1] = s[N] - R, and do 100 iterations or so of
u[2:-2] -= (0.05 / λ) * (u - s)[2:-2] + np.convolve(u, [1, -4, 6, -4, 1])[4:-4]
But with some more work, it is possible to do better than this by solving the E–L equations directly. For each n, move the known quantities to the right-hand side: s[n] and also the endpoints u[0] = s[0], u[1] = L + s[0], u[N-1] = s[N] - R, u[N] = s[N]. The you will have a linear system "A u = b", and matrix A has rows like
0, ..., 0, 1, -4, (6 + 1/λ), -4, 1, 0, ..., 0.
Finally, solve the linear system to find the smoothed signal u. You could use numpy.linalg.solve to do this if N is not too large, or if N is large, try an iterative method like conjugate gradients.
you can apply a simple smoothing method and plot the smooth curves with different smoothness values to see which one works best.
def smoothing(data, smoothness=0.5):
last = data[0]
new_data = [data[0]]
for datum in data[1:]:
new_value = smoothness * last + (1 - smoothness) * datum
new_data.append(new_value)
last = datum
return new_data
You can plot this curve for multiple values of smoothness and pick the curve which suits your needs. You can also apply this method only on a range of values in the actual curve by defining start and end
I tried the mixture of Gaussians model and am facing several problems. I have pasted my whole code at ideone. The code is at: https://ideone.com/dNYtZ2
When I try to run fitMixGauss(data, k)), I get a singular matrix error from the function below.
def fitMixGauss(data, k):
"""
Estimate a k MoG model that would fit the data. Incremently plots the outcome.
Keyword arguments:
data -- d by n matrix containing data points.
k -- scalar representing the number of gaussians to use in the MoG model.
Returns:
mixGaussEst -- dict containing the estimated MoG parameters.
"""
# MAIN E-M ROUTINE
# In the E-M algorithm, we calculate a complete posterior distribution over
# the (nData) hidden variables in the E-Step.
# In the M-Step, we update the parameters of the Gaussians (mean, cov, w).
nDims, nData = data.shape
postHidden = np.zeros(shape=(k, nData))
# we will initialize the values to random values
mixGaussEst = dict()
mixGaussEst['d'] = nDims
mixGaussEst['k'] = k
mixGaussEst['weight'] = (1 / k) * np.ones(shape=(k))
mixGaussEst['mean'] = 2 * np.random.randn(nDims, k)
mixGaussEst['cov'] = np.zeros(shape=(nDims, nDims, k))
for cGauss in range(k):
mixGaussEst['cov'][:, :, cGauss] = 2.5 + 1.5 * np.random.uniform() * np.eye(nDims)
# calculate current likelihood
# TO DO - fill in this routine
logLike = getMixGaussLogLike(data, mixGaussEst)
print('Log Likelihood Iter 0 : {:4.3f}\n'.format(logLike))
nIter = 30;
logLikeVec = np.zeros(shape=(2 * nIter))
boundVec = np.zeros(shape=(2 * nIter))
fig, ax = plt.subplots(1, 1)
for cIter in range(nIter):
# ===================== =====================
# Expectation step
# ===================== =====================
curCov = mixGaussEst['cov']
curWeight = mixGaussEst['weight']
curMean = mixGaussEst['mean']
num= np.zeros(shape=(k,nData))
for cData in range(nData):
# TO DO (g) : fill in column of 'hidden' - calculate posterior probability that
# this data point came from each of the Gaussians
# replace this:
thisData = data[:,cData]
#for c in range(k):
# num[c] = mixGaussEst['weight'][c] * (1/((2*np.pi)**(nDims)*np.linalg.det(mixGaussEst['cov'][:,:,c]))**(1/2))*np.exp(-0.5*(np.transpose(thisData-mixGaussEst['mean'][:,c])))#np.linalg.inv(mixGaussEst['cov'][:,:,c])#(thisData-mixGaussEst['mean'][:,c])
thisdata = data[:,cData];
denominatorExp = 0
for j in range(k):
mu = curMean[:,j]
sigma = curCov[:,:,j]
curNorm = (1/((2*np.pi)**(nDims)*np.linalg.det(sigma))**(1/2))*np.exp(-0.5*(np.transpose(thisData-mu)))#np.linalg.inv(sigma)#(mu)
num[j,cData] = curWeight[j]*curNorm
denominatorExp = denominatorExp + num[j,cData]
postHidden[:, cData] = num[:,cData]/denominatorExp
# ===================== =====================
# Maximization Step
# ===================== =====================
# for each constituent Gaussian
for cGauss in range(k):
# TO DO (h): Update weighting parameters mixGauss.weight based on the total
# posterior probability associated with each Gaussian. Replace this:
#mixGaussEst['weight'][cGauss] = mixGaussEst['weight'][cGauss]
sum_Kth_Gauss_Resp = np.sum(postHidden[cGauss,:])
mixGaussEst['weight'][cGauss] = sum_Kth_Gauss_Resp /np.sum(postHidden)
#mixGaussEst['weight'][cGauss] = np.sum(postHidden[cGauss,:])/sum(sum(postHidden[:,:]));
# TO DO (i): Update mean parameters mixGauss.mean by weighted average
# where weights are given by posterior probability associated with
# Gaussian. Replace this:
#mixGaussEst['mean'][:,cGauss] = mixGaussEst['mean'][:,cGauss]
numerator = 0
for j in range(nData):
numerator = numerator + postHidden[cGauss,j]*data[:,j]
numerator = np.dot( postHidden[cGauss,:],data[0,:])
mixGaussEst['mean'][:,cGauss] = numerator / sum_Kth_Gauss_Resp
# TO DO (j): Update covarance parameter based on weighted average of
# square distance from update mean, where weights are given by
# posterior probability associated with Gaussian
#mixGaussEst['cov'][:,:,cGauss] = mixGaussEst['cov'][:,:,cGauss]
muMatrix = mixGaussEst['mean'][:,cGauss]
muMatrix = muMatrix.reshape((2,1))
numerator = 0
for j in range(nData):
kk=data[:,j]
kk.reshape((2,1))
numerator_i = postHidden[cGauss,j]*(kk-muMatrix)#np.transpose(kk-muMatrix)
numerator = numerator + numerator_i
mixGaussEst['cov'][:,:,cGauss] = numerator /sum_Kth_Gauss_Resp
# draw the new solution
drawEMData2d(data, mixGaussEst)
time.sleep(0.7)
fig.canvas.draw()
# calculate the log likelihood
logLike = getMixGaussLogLike(data, mixGaussEst)
print('Log Likelihood After Iter {} : {:4.3f}\n'.format(cIter, logLike))
return mixGaussEst
Log likelihood give nan and why does the whole code(in ideone) at the end give a singular matrix error.
The numpy.linalg.lstsq(a,b) function accepts an array a with size nx2 and a 1-dimensional array b which is the dependent variable.
How would I go about doing a least squares regression where the data points are presented as a 2d array generated from an image file? The array looks something like this:
[[0, 0, 0, 0, e]
[0, 0, c, d, 0]
[b, a, f, 0, 0]]
where a, b, c, d, e, f are positive integer values.
I want to fit a line to these points. Can I use np.linalg.lstsq (and if so, how) or is there something which may make more sense (and if so, how)?
Thanks very much.
once a while I saw a similar python program from
# Prac 2 for Monte Carlo methods in a nutshell
# Richard Chopping, ANU RSES and Geoscience Australia, October 2012
# Useage
# python prac_q2.py [number of bootstrap runs]
# e.g. python prac_q2.py 10000
# would execute this and perform 10 000 bootstrap runs.
# Default is 100 runs.
# sys cause I need to access the arguments the script was called with
import sys
# math cause it's handy for scalar maths
import math
# time cause I want to benchmark how long things take
import time
# numpy cause it gives us awesome array / matrix manipulation stuff
import numpy
# scipy just in case
import scipy
# scipy.stats to make life simpler statistcally speaking
import scipy.stats as stats
def main():
print "Prac 2 solution: no graphs"
true_model = numpy.array([17.0, 10.0, 1.96])
# Here's a nifty way to write out numpy arrays.
# Unlike the data table in the prac handouts, I've got time first
# and height second.
# You can mix up the order but you need to change a lot of calculations
# to deal with this change.
data = numpy.array([[1.0, 26.94],
[2.0, 33.45],
[3.0, 40.72],
[4.0, 42.32],
[5.0, 44.30],
[6.0, 47.19],
[7.0, 43.33],
[8.0, 40.13]])
# Perform the least squares regression to find the best fit solution
best_fit = regression(data)
# Nifty way to get out elements from an array
m1,m2,m3 = best_fit
print "Best fit solution:"
print "m1 is", m1, "and m2 is", m2, "and m3 is", m3
# Calculate residuals from the best fit solution
best_fit_resid = residuals(data, best_fit)
print "The residuals from the best fit solution are:"
print best_fit_resid
print ""
# Bootstrap part
# --------------
# Number of bootstraps to run. 100 is a minimum and our default number.
num_booties = 100
# If we have an argument to the python script, use this as the
# number of bootstrap runs
if len(sys.argv) > 1:
num_booties = int(sys.argv[1])
# preallocate an array to store the results.
ensemble = numpy.zeros((num_booties, 3))
print "Starting up the bootstrap routine"
# How to do timing within a Python script - here I start a stopwatch running
start_time = time.clock()
for index in range(num_booties):
# Print every 10 % so we know where we're up to in long runs
if print_progress(index, num_booties):
percent = (float(index) / float(num_booties)) * 100.0
print "Have completed", percent, "percent"
# For each iteration of the bootstrap algorithm,
# first calculate mixed up residuals...
resamp_resid = resamp_with_replace(best_fit_resid)
# ... then generate new data...
new_data = calc_new_data(data, best_fit, resamp_resid)
# ... then perform another regression to generate a new set of m1, m2, m3
bootstrap_model = regression(new_data)
ensemble[index] = (bootstrap_model[0], bootstrap_model[1], bootstrap_model[2])
# Done with the loop
# Calculate the time the run took - what's the current time, minus when we started.
loop_time = time.clock() - start_time
print ""
print "Ensemble calculated based on", num_booties, "bootstrap runs."
print "Bootstrap runs took", loop_time, "seconds."
print ""
# Stats on the ensemble time
# --------------------------
B = num_booties
# Mean is pretty simple, 1.0/B to force it to use floating points
# This gives us an array of the means of the 3 model parameters
mean = 1.0/B * numpy.sum(ensemble, axis=0)
print "Mean is ([m1 m2 m3]):", mean
# Variance
var2 = 1.0/B * numpy.sum(((ensemble - mean)**2), axis=0)
print "Variance squared is ([m1 m2 m3]):", var2
# Bias
bias = mean - best_fit
print "Bias is ([m1 m2 m3]):", bias
bias_corr = best_fit - bias
print "Bias corrected solution is ([m1 m2 m3]):", bias_corr
print "The original solution was ([m1 m2 m3]):", best_fit
print "And the true solution is ([m1 m2 m3]):", true_model
print ""
# Confidence intervals
# ---------------------
# Sort column 1 to calculate confidence intervals
# Sorting in numpy sucks.
# Need to declare what the fields are (so it knows how to sort it)
# f8 => numpy's floating point number
# Then need to delcare what we sort it on
# Here we sort on the first column, then the second, then the third.
# f0,f1,f2 field 0, then field 1, then field 2.
# Then we make sure we sort it by column (axis = 0)
# Then we take a view of that data as a float64 so it works properly
sorted_m1 = numpy.sort(ensemble.view('f8,f8,f8'), order=['f0','f1','f2'], axis=0).view(numpy.float64)
# stats is my name for scipy.stats
# This has a wonderful function that calculates percentiles, including performing interpolation
# (important for low numbers of bootstrap runs)
m1_perc0p5 = stats.scoreatpercentile(sorted_m1,0.5)[0]
m1_perc2p5 = stats.scoreatpercentile(sorted_m1,2.5)[0]
m1_perc16 = stats.scoreatpercentile(sorted_m1,16)[0]
m1_perc84 = stats.scoreatpercentile(sorted_m1,84)[0]
m1_perc97p5 = stats.scoreatpercentile(sorted_m1,97.5)[0]
m1_perc99p5 = stats.scoreatpercentile(sorted_m1,99.5)[0]
print "m1 68% confidence interval is from", m1_perc16, "to", m1_perc84
print "m1 95% confidence interval is from", m1_perc2p5, "to", m1_perc97p5
print "m1 99% confidence interval is from", m1_perc0p5, "to", m1_perc99p5
print ""
# Now column 2, sort it...
sorted_m2 = numpy.sort(ensemble.view('f8,f8,f8'), order=['f1','f0','f2'], axis=0).view(numpy.float64)
# ... and do stats.
m2_perc0p5 = stats.scoreatpercentile(sorted_m2,0.5)[1]
m2_perc2p5 = stats.scoreatpercentile(sorted_m2,2.5)[1]
m2_perc16 = stats.scoreatpercentile(sorted_m2,16)[1]
m2_perc84 = stats.scoreatpercentile(sorted_m2,84)[1]
m2_perc97p5 = stats.scoreatpercentile(sorted_m2,97.5)[1]
m2_perc99p5 = stats.scoreatpercentile(sorted_m2,99.5)[1]
print "m2 68% confidence interval is from", m2_perc16, "to", m2_perc84
print "m2 95% confidence interval is from", m2_perc2p5, "to", m2_perc97p5
print "m2 99% confidence interval is from", m2_perc0p5, "to", m2_perc99p5
print ""
# and finally column 3, again, sort it..
sorted_m3 = numpy.sort(ensemble.view('f8,f8,f8'), order=['f2','f1','f0'], axis=0).view(numpy.float64)
# ... and do stats.
m3_perc0p5 = stats.scoreatpercentile(sorted_m3,0.5)[1]
m3_perc2p5 = stats.scoreatpercentile(sorted_m3,2.5)[1]
m3_perc16 = stats.scoreatpercentile(sorted_m3,16)[1]
m3_perc84 = stats.scoreatpercentile(sorted_m3,84)[1]
m3_perc97p5 = stats.scoreatpercentile(sorted_m3,97.5)[1]
m3_perc99p5 = stats.scoreatpercentile(sorted_m3,99.5)[1]
print "m3 68% confidence interval is from", m3_perc16, "to", m3_perc84
print "m3 95% confidence interval is from", m3_perc2p5, "to", m3_perc97p5
print "m3 99% confidence interval is from", m3_perc0p5, "to", m3_perc99p5
print ""
# End of the main function
#
#
# Helper functions go down here
#
#
# regression
# This takes a 2D numpy array and performs a least-squares regression
# using the formula on the practical sheet, page 3
# Stored in the top are the real values
# Returns an array of m1, m2 and m3.
def regression(data):
# While testing, just return the real values
# real_values = numpy.array([17.0, 10.0, 1.96])
# Creating the G matrix
# ---------------------
# Because I'm using numpy arrays here, we need
# to learn some notation.
# data[:,0] is the FIRST column
# Length of this = number of time samples in data
N = len(data[:,0])
# numpy.sum adds up all data in a row or column.
# Axis = 0 implies add up each column. [0] at end
# returns the sum of the first column
# This is the sum of Ti for i = 1..N
sum_Ti = numpy.sum(data, axis=0)[0]
# numpy.power takes each element of an array and raises them to a given power
# In this one call we also take the sum of the columns (as above) after they have
# been squared, and then just take the t column
sum_Ti2 = numpy.sum(numpy.power(data, 2), axis=0)[0]
# Now we need to get the cube of Ti, then sum that result
sum_Ti3 = numpy.sum(numpy.power(data, 3), axis=0)[0]
# Finally we need the quartic of Ti, then sum that result
sum_Ti4 = numpy.sum(numpy.power(data, 4), axis=0)[0]
# Now we can construct the G matrix
G = numpy.array([[N, sum_Ti, -0.5 * sum_Ti2],
[sum_Ti, sum_Ti2, -0.5 * sum_Ti3],
[-0.5 * sum_Ti2, -0.5 * sum_Ti3, 0.25 * sum_Ti4]])
# We also need to take the inverse of the G matrix
G_inv = numpy.linalg.inv(G)
# Creating the d matrix
# ---------------------
# Hello numpy.sum, my old friend...
sum_Yi = numpy.sum(data, axis=0)[1]
# numpy.prod multiplies the values in an array.
# We need to do the products along axis 1 (i.e. row by row)
# Then sum all the elements
sum_TiYi = numpy.sum(numpy.prod(data, axis=1))
# The final element we need is a bit tricky.
# We need the product as above
TiYi = numpy.prod(data, axis=1)
# Then we get tricky. * works how we need it here,
# remember that the Ti column is referenced by data[:,0] as above
Ti2Yi = TiYi * data[:,0]
# Then we sum
sum_Ti2Yi = numpy.sum(Ti2Yi)
#With all the elements, we make the d matrix
d = numpy.array([sum_Yi,
sum_TiYi,
-0.5 * sum_Ti2Yi])
# Do the linear algebra stuff
# To multiple numpy arrays in a matrix style,
# we need to use numpy.dot()
# Not the most useful notation, but there you go.
# To help out the Matlab users: http://www.scipy.org/NumPy_for_Matlab_Users
result = G_inv.dot(d)
#Return this result
return result
# residuals:
# Takes in a data array, and an array of best fit paramers
# calculates the difference between the observed and predicted data
# and returns an array
def residuals(data, best_fit):
# Extract ti from the data array
ti = data[:,0]
# We also need an array of the square of ti
ti2 = numpy.power(ti, 2)
# Extract yi
yi = data[:,1]
# Calculate residual (data minus predicted)
result = yi - best_fit[0] - (best_fit[1] * ti) + (0.5 * best_fit[2] * ti2)
return result
# resamp_with_replace:
# Perform a dataset resampling with replacement on parameter set.
# Uses numpy.random to generate the random numbers to pick the indices to look up.
# So for item 0, ... N, we look up a random index from the set and put that in
# our resampled data.
def resamp_with_replace(set):
# How many things do we need to do this for?
N = len(set)
# Preallocate our result array
result = numpy.zeros(N)
# Generate N random integers between 0 and N-1
indices = numpy.random.randint(0, N - 1, N)
# For i from the set 0...N-1 (that's what the range() command gives us),
# our result for that i is given by the index we randomly generated above
for i in range(N):
result[i] = set[indices[i]]
return result
# calc_new_data:
# Given a set of resampled residuals, use the model parameters to derive
# new data. This is used for bootstrapping the residuals.
# true_data is a numpy array of rows of ti, yi. We only need the ti column though.
# model is an array of three parameters, corresponding to m1, m2, m3.
# residuals are an array of our resudials
def calc_new_data(true_data, model, residuals):
# Extract the time information from the new data array
ti = true_data[:,0]
# Calculate new data using array maths
# This goes through and does the sums etc for each element of the array
# Nice and compact way to represent it.
y_new = residuals + model[0] + (model[1] * ti) - (0.5 * model[2] * ti**2)
# Our result needs to be an array of ti, y_new, so we need to combine them using
# the numpy.column_stack routine
result = numpy.column_stack((ti, y_new))
# Return this combined array
return result
# print_progress:
# Just a quick thing that returns true if we want to print for this index
# and false otherwise
def print_progress(index, total):
index = float(index)
total = float(total)
result = False
# Floating point maths is irritating
# We want to print at the start, every 10%, and at the end.
# This works up to index = 100,000
# Would also be lovely if Python had a switch statement
if (((index / total) * 100) <= 0.00001):
result = True
elif (((index / total) * 100) >= 9.99999) and (((index / total) * 100) <= 10.00001):
result = True
elif (((index / total) * 100) >= 19.99999) and (((index / total) * 100) <= 20.00001):
result = True
elif (((index / total) * 100) >= 29.99999) and (((index / total) * 100) <= 30.00001):
result = True
elif (((index / total) * 100) >= 39.99999) and (((index / total) * 100) <= 40.00001):
result = True
elif (((index / total) * 100) >= 49.99999) and (((index / total) * 100) <= 50.00001):
result = True
elif (((index / total) * 100) >= 59.99999) and (((index / total) * 100) <= 60.00001):
result = True
elif (((index / total) * 100) >= 69.99999) and (((index / total) * 100) <= 70.00001):
result = True
elif (((index / total) * 100) >= 79.99999) and (((index / total) * 100) <= 80.00001):
result = True
elif (((index / total) * 100) >= 89.99999) and (((index / total) * 100) <= 90.00001):
result = True
elif ((((index+1) / total) * 100) > 99.99999):
result = True
else:
result = False
return result
#
#
# End of helper functions
#
#
# So we can easily execute our script
if __name__ == "__main__":
main()
I guess you can take a look, here is link to complete information
Use sklearn instead of numpy (sklearn is derived from numpy but much better for this kind of calculation) :
from sklearn import linear_model
clf = linear_model.LinearRegression()
clf.fit ([[0, 0], [1, 1], [2, 2]], [0, 1, 2])
LinearRegression(copy_X=True, fit_intercept=True, n_jobs=1,
normalize=False)
clf.coef_
array([ 0.5, 0.5])
SciPy/Numpy seems to support many filters, but not the root-raised cosine filter. Is there a trick to easily create one rather than calculating the transfer function? An approximation would be fine as well.
The commpy package has several filters included with it. The order of return variables was switched in an earlier version (as of this edit, current version is 0.7.0). To install, foemphasized textllow instructions here or here.
Here's a use example for 1024 symbols of QAM16:
import numpy as np
from commpy.modulation import QAMModem
from commpy.filters import rrcosfilter
N = 1024 # Number of symbols
os = 8 #over sampling factor
# Create modulation. QAM16 makes 4 bits/symbol
mod1 = QAMModem(16)
# Generate the bit stream for N symbols
sB = np.random.randint(0, 2, N*mod1.num_bits_symbol)
# Generate N complex-integer valued symbols
sQ = mod1.modulate(sB)
sQ_upsampled = np.zeros(os*(len(sQ)-1)+1,dtype = np.complex64)
sQ_upsampled[::os] = sQ
# Create a filter with limited bandwidth. Parameters:
# N: Filter length in samples
# 0.8: Roll off factor alpha
# 1: Symbol period in time-units
# 24: Sample rate in 1/time-units
sPSF = rrcosfilter(N, alpha=0.8, Ts=1, Fs=over_sample)[1]
# Analog signal has N/2 leading and trailing near-zero samples
qW = np.convolve(sPSF, sQ_upsampled)
Here's some explanation of the parameters. N is the number of baud samples. You need 4 times as many bits (in the case of QAM) as samples. I made the sPSF array return with N elements so we can see the signal with leading and trailing samples. See the Wikipedia Root-raised-cosine filter page for explanation of parameter alpha. Ts is the symbol period in seconds and Fs is the number of filter samples per Ts. I like to pretend Ts=1 to keep things simple (unit symbol rate). Then Fs is the number of complex waveform samples per baud point.
If you use return element 0 from rrcosfilter to get the sample time indexes, you need to insert the correct symbol period and filter sample rate in Ts and Fs for the index values to be correctly scaled.
It would be nice to have the root-raised cosine filter standardized in a common package. Here is my implementation in the meantime based on commpy. It vectorized with numpy, and normalized without consideration of the symbol rate.
def raised_root_cosine(upsample, num_positive_lobes, alpha):
"""
Root raised cosine (RRC) filter (FIR) impulse response.
upsample: number of samples per symbol
num_positive_lobes: number of positive overlaping symbols
length of filter is 2 * num_positive_lobes + 1 samples
alpha: roll-off factor
"""
N = upsample * (num_positive_lobes * 2 + 1)
t = (np.arange(N) - N / 2) / upsample
# result vector
h_rrc = np.zeros(t.size, dtype=np.float)
# index for special cases
sample_i = np.zeros(t.size, dtype=np.bool)
# deal with special cases
subi = t == 0
sample_i = np.bitwise_or(sample_i, subi)
h_rrc[subi] = 1.0 - alpha + (4 * alpha / np.pi)
subi = np.abs(t) == 1 / (4 * alpha)
sample_i = np.bitwise_or(sample_i, subi)
h_rrc[subi] = (alpha / np.sqrt(2)) \
* (((1 + 2 / np.pi) * (np.sin(np.pi / (4 * alpha))))
+ ((1 - 2 / np.pi) * (np.cos(np.pi / (4 * alpha)))))
# base case
sample_i = np.bitwise_not(sample_i)
ti = t[sample_i]
h_rrc[sample_i] = np.sin(np.pi * ti * (1 - alpha)) \
+ 4 * alpha * ti * np.cos(np.pi * ti * (1 + alpha))
h_rrc[sample_i] /= (np.pi * ti * (1 - (4 * alpha * ti) ** 2))
return h_rrc
commpy doesn't seem to be released yet. But here is my nugget of knowledge.
beta = 0.20 # roll off factor
Tsample = 1.0 # sampling period, should at least twice the rate of the symbol
oversampling_rate = 8 # oversampling of the bit stream, this gives samples per symbol
# must be at least 2X the bit rate
Tsymbol = oversampling_rate * Tsample # pulse duration should be at least 2 * Ts
span = 50 # number of symbols to span, must be even
n = span*oversampling_rate # length of the filter = samples per symbol * symbol span
# t_step must be from -span/2 to +span/2 symbols.
# each symbol has 'sps' number of samples per second.
t_step = Tsample * np.linspace(-n/2,n/2,n+1) # n+1 to include 0 time
BW = (1 + beta) / Tsymbol
a = np.zeros_like(t_step)
for item in list(enumerate(t_step)):
i,t = item
# t is n*Ts
if (1-(2.0*beta*t/Tsymbol)**2) == 0:
a[i] = np.pi/4 * np.sinc(t/Tsymbol)
print 'i = %d' % i
elif t == 0:
a[i] = np.cos(beta * np.pi * t / Tsymbol)/ (1-(2.0*beta*t/Tsymbol)**2)
print 't = 0 captured'
print 'i = %d' % i
else:
numerator = np.sinc( np.pi * t/Tsymbol )*np.cos( np.pi*beta*t/Tsymbol )
denominator = (1.0 - (2.0*beta*t/Tsymbol)**2)
a[i] = numerator / denominator
#a = a/sum(a) # normalize total power
plot_filter = 0
if plot_filter == 1:
w,h = signal.freqz(a)
fig = plt.figure()
plt.subplot(2,1,1)
plt.title('Digital filter (raised cosine) frequency response')
ax1 = fig.add_subplot(211)
plt.plot(w/np.pi, 20*np.log10(abs(h)),'b')
#plt.plot(w/np.pi, abs(h),'b')
plt.ylabel('Amplitude (dB)', color = 'b')
plt.xlabel(r'Normalized Frequency ($\pi$ rad/sample)')
ax2 = ax1.twinx()
angles = np.unwrap(np.angle(h))
plt.plot(w/np.pi, angles, 'g')
plt.ylabel('Angle (radians)', color = 'g')
plt.grid()
plt.axis('tight')
plt.show()
plt.subplot(2,1,2)
plt.stem(a)
plt.show()
I think the correct response is to generate the desire impulse response. For a raised cosine filter the function is
h(n) = (sinc(n/T)*cos(pi * alpha* n /T)) / (1-4*(alpha*n/T)**2)
Select the number of points for your filter and generate the weights.
output = scipy.signal.convolve(signal_in, h)
This is basically the same function as in CommPy but much smaller in code:
def rcosfilter(N, beta, Ts, Fs):
t = (np.arange(N) - N / 2) / Fs
return np.where(np.abs(2*t) == Ts / beta,
np.pi / 4 * np.sinc(t/Ts),
np.sinc(t/Ts) * np.cos(np.pi*beta*t/Ts) / (1 - (2*beta*t/Ts) ** 2))
SciPy will support any filter. Just calculate the impulse response and use any of the appropriate scipy.signal filter/convolve functions.