I've written a wave superposition program that overlaps wave equations of multiple wave sources and then gives a single wave which contains all the constructive and destructive interferences and plots the intensities of superpositions. This code works but is very inefficient.
If I give the points here as a 1000x1000 grid, the whole program takes a while to run.
Is there any way I can make this code more efficient and clean using one or all of the following (Functions, lambda functions, mappable, defining 2D numpy arrays directly or similar?).
If so, is there a way to measure the time it takes to run the operation. This isn't homework, am trying to build something on my own for my optics research. Thanks so much for your help in advance, I really appreciate it.
import numpy as np
from matplotlib import pyplot as plt
from sklearn import mixture
import matplotlib as mpl
from mpl_toolkits.mplot3d import Axes3D
from matplotlib import cm
import scipy
import scipy.ndimage
import scipy.misc
xmin, xmax = 0,25
ymin, ymax = -12.500,12.500
xpoints, ypoints = 500,500
points,amp,distance,wave=[],[],[],[]
numsource=11
x=np.linspace(xmin, xmax, xpoints)
y=np.linspace(ymin, ymax, ypoints)
xx,yy=np.meshgrid(x,y, sparse=False)
pointer = np.concatenate([xx.reshape(-1,1),yy.reshape(-1, 1)], axis=-1)
counter=len(pointer)
A=[0]*counter #for storing amplitudes later
# Arrays of point source locations
source=tuple([0,(((numsource-1)/2)-i)*2.5] for i in range(0,numsource-1))
# Arrays of Subtraction of Coordinates of Sources from Point sources (For Distance from source)
points=[(pointer-source[p]) for p in range(0,numsource-1)]
# distance of each point in map from its source (Sqrt of Coordinate difference sum)
distance=[(points[i][:,0]**2 + points[i][:,1]**2)**0.5 for i in range(0,numsource-1)]
# Amplitudes of each wave defined arbitrarily
amp= np.array([4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4])
k=20
# wave equation for each wave based on defined amplitude and distance from source
wave=([amp[i] * np.sin (k*distance[i]) for i in range(0,numsource-1)])
#superposition
for i in range(0,numsource-1):
A=A+wave[i]
A=np.asarray(A)
print(A)
intensity = A**2
#constructive, destructive superposition plot
plt.figure(figsize=(10,10))
plt.xlim(xmin,xmax)
plt.ylim(ymin,ymax)
plt.scatter(pointer[:,0], pointer[:,1],c=intensity, cmap='viridis')
plt.colorbar()
I managed to reduce the computation time from 166 to 146 ms on my machine.
You want to get rid of the part where A is initialized as a 250,000x1 list then converted to an array. You can initialize it as an array directly. The parts I disabled are left as comments in your code.
#import matplotlib as mpl
from matplotlib import pyplot as plt
#from mpl_toolkits.mplot3d import Axes3D
#from matplotlib import cm
import numpy as np
#import scipy
#import scipy.ndimage
#import scipy.misc
#from sklearn import mixture
import time
class time_this_scope():
"A handy context manager to measure timings."""
def __init__(self):
self.t0 = None
self.dt = None
def __enter__(self):
self.t0 = time.perf_counter()
return self
def __exit__(self, *args):
self.dt = time.perf_counter() - self.t0
print(f"This scope took {int(self.dt*1000)} ms.")
def main():
xmin, xmax = 0, 25
ymin, ymax = -12.500, 12.500
xpoints, ypoints = 500, 500
points, amp, distance, wave = [], [], [], []
numsource = 11
x = np.linspace(xmin, xmax, xpoints)
y = np.linspace(ymin, ymax, ypoints)
xx, yy = np.meshgrid(x, y, sparse=False)
pointer = np.concatenate([xx.reshape(-1, 1), yy.reshape(-1, 1)], axis=-1)
# counter = len(pointer)
# A = [0]*counter #for storing amplitudes later
A = np.zeros(len(pointer))
# Arrays of point source locations
source = tuple((0, (((numsource-1)/2)-i)*2.5) for i in range(0, numsource-1))
# Arrays of Subtraction of Coordinates of Sources from Point sources (For Distance from source)
points = [(pointer-source[i]) for i in range(0, numsource-1)]
# distance of each point in map from its source (Sqrt of Coordinate difference sum)
distance = [(points[i][:,0]**2 + points[i][:,1]**2)**0.5 for i in range(0, numsource-1)]
# Amplitudes of each wave defined arbitrarily
# amp = np.array([4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4])
amp = np.array([4]*numsource)
k = 20
# wave equation for each wave based on defined amplitude and distance from source
wave = [amp[i] * np.sin(k*distance[i]) for i in range(0, numsource-1)]
#superposition
for i in range(0, numsource-1):
A = A + wave[i]
# A = np.asarray(A)
print(A)
intensity = A**2
#constructive, destructive superposition plot
# plt.figure(figsize=(10, 10))
# plt.xlim(xmin, xmax)
# plt.ylim(ymin, ymax)
# plt.scatter(pointer[:,0], pointer[:,1], c=intensity, cmap='viridis')
# plt.colorbar()
# Timing.
total_time = 0
N = 20
for _ in range(N):
with time_this_scope() as timer:
main()
total_time += timer.dt
print(total_time/N)
Now the plotting itself still takes 500 ms, so I thinkimshow would be a better option than scatter. I doubt matplotlib is very happy to have 250 000 markers to render.
Related
Using NASA's SRTM data, I've generated a global elevation heatmap.
The problem is, however, the continents tend to blend in with the ocean because of the range of elevation values. Is it possible to change the colorbar's scale so that the edges of the continents are more distinct from the ocean? I've tried different cmaps, but they all seem to suffer from the problem.
Here is my code. I'm initializing a giant array (with 0s) to hold global elevation data, and then populating it file by file from the SRTM dataset. Each file is 1 degree latitude by 1 degree longitude.
Another question I had was regarding the map itself. For some reason, the Appalachian Mountains seem to have disappeared entirely.
import os
import numpy as np
from .srtm_map import MapGenerator
from ..utils.hgt_parser import HGTParser
from tqdm import tqdm
import cv2
import matplotlib.pyplot as plt
import richdem as rd
class GlobalMapGenerator():
def __init__(self):
self.gen = MapGenerator()
self.base_dir = "data/elevation/"
self.hgt_files = os.listdir(self.base_dir)
self.global_elevation_data = None
def shrink(data, rows, cols):
return data.reshape(rows, data.shape[0]/rows, cols, data.shape[1]/cols).sum(axis=1).sum(axis=2)
def GenerateGlobalElevationMap(self, stride):
res = 1201//stride
max_N = 59
max_W = 180
max_S = 56
max_E = 179
# N59 --> N00
# S01 --> S56
# E000 --> E179
# W180 --> W001
# Initialize array global elevation
self.global_elevation_data = np.zeros(( res*(max_S+max_N+1), res*(max_E+max_W+1) ))
print("Output Image Shape:", self.global_elevation_data.shape)
for hgt_file in tqdm(self.hgt_files):
lat_letter = hgt_file[0]
lon_letter = hgt_file[3]
lat = int(hgt_file[1:3])
lon = int(hgt_file[4:7])
if lat_letter == "S":
# Shift south down by max_N, but south starts at S01 so we translate up by 1 too
lat_trans = max_N + lat - 1
else:
# Bigger N lat means further up. E.g. N59 is at index 0 and is higher than N00
lat_trans = max_N - lat
if lon_letter == "E":
# Shift east right by max_W
lon_trans = max_W + lon
else:
# Bigger W lon means further left. E.g. W180 is at index 0 and is more left than W001
lon_trans = max_W - lon
# load in data from file as resized
data = cv2.resize(HGTParser(os.path.join(self.base_dir, hgt_file)), (res, res))
# generate bounds (x/y --> lon.lat for data from this file for the giant array)
lat_bounds = [res*lat_trans, res*(lat_trans+1)]
lon_bounds = [res*lon_trans, res*(lon_trans+1)]
try:
self.global_elevation_data[ lat_bounds[0]:lat_bounds[1], lon_bounds[0]:lon_bounds[1] ] = data
except:
print("REFERENCE ERROR: " + hgt_file)
print("lat: ", lat_bounds)
print("lon: ", lon_bounds)
# generate figure
plt.figure(figsize=(20,20))
plt.imshow(self.global_elevation_data, cmap="rainbow")
plt.title("Global Elevation Heatmap")
plt.colorbar()
plt.show()
np.save("figures/GlobalElevationMap.npy", self.global_elevation_data)
plt.savefig("figures/GlobalElevationMap.png")
def GenerateGlobalSlopeMap(self, stride):
pass
Use a TwoSlopeNorm (docs) for your norm, like the example here.
From the example:
Sometimes we want to have a different colormap on either side of a conceptual center point, and we want those two colormaps to have different linear scales. An example is a topographic map where the land and ocean have a center at zero, but land typically has a greater elevation range than the water has depth range, and they are often represented by a different colormap.
If you set the midpoint at sea level (0), then you can have two very different scalings based on ocean elevation vs land elevation.
Example code (taken from the example linked above):
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.colors as colors
import matplotlib.cbook as cbook
from matplotlib import cm
dem = cbook.get_sample_data('topobathy.npz', np_load=True)
topo = dem['topo']
longitude = dem['longitude']
latitude = dem['latitude']
fig, ax = plt.subplots()
# make a colormap that has land and ocean clearly delineated and of the
# same length (256 + 256)
colors_undersea = plt.cm.terrain(np.linspace(0, 0.17, 256))
colors_land = plt.cm.terrain(np.linspace(0.25, 1, 256))
all_colors = np.vstack((colors_undersea, colors_land))
terrain_map = colors.LinearSegmentedColormap.from_list(
'terrain_map', all_colors)
# make the norm: Note the center is offset so that the land has more
# dynamic range:
divnorm = colors.TwoSlopeNorm(vmin=-500., vcenter=0, vmax=4000)
pcm = ax.pcolormesh(longitude, latitude, topo, rasterized=True, norm=divnorm,
cmap=terrain_map, shading='auto')
# Simple geographic plot, set aspect ratio beecause distance between lines of
# longitude depends on latitude.
ax.set_aspect(1 / np.cos(np.deg2rad(49)))
ax.set_title('TwoSlopeNorm(x)')
cb = fig.colorbar(pcm, shrink=0.6)
cb.set_ticks([-500, 0, 1000, 2000, 3000, 4000])
plt.show()
See how it scales numbers with this simple usage (from docs):
>>> import matplotlib. Colors as mcolors
>>> offset = mcolors.TwoSlopeNorm(vmin=-4000., vcenter=0., vmax=10000)
>>> data = [-4000., -2000., 0., 2500., 5000., 7500., 10000.]
>>> offset(data)
array([0., 0.25, 0.5, 0.625, 0.75, 0.875, 1.0])
I am trying to fit a smoothing B-spline to some data and I found this very helpful post on here. However, I not only need the spline, but also its derivatives, so I tried to add the following code to the example:
tck_der = interpolate.splder(tck, n=1)
x_der, y_der, z_der = interpolate.splev(u_fine, tck_der)
For some reason this does not seem to work due to some data type issues. I get the following traceback:
Traceback (most recent call last):
File "interpolate_point_trace.py", line 31, in spline_example
tck_der = interpolate.splder(tck, n=1)
File "/home/user/anaconda3/lib/python3.7/site-packages/scipy/interpolate/fitpack.py", line 657, in splder
return _impl.splder(tck, n)
File "/home/user/anaconda3/lib/python3.7/site-packages/scipy/interpolate/_fitpack_impl.py", line 1206, in splder
sh = (slice(None),) + ((None,)*len(c.shape[1:]))
AttributeError: 'list' object has no attribute 'shape'
The reason for this seems to be that the second argument of the tck tuple contains a list of numpy arrays. I thought turning the input data to be a numpy array as well would help, but it does not change the data types of tck.
Does this behavior reflect an error in scipy, or is the input malformed?
I tried manually turning the list into an array:
tck[1] = np.array(tck[1])
but this (which didn't surprise me) also gave an error:
ValueError: operands could not be broadcast together with shapes (0,8) (7,1)
Any ideas of what the problem could be? I have used scipy before and on 1D splines the splder function works just fine, so I assume it has something to do with the spline being a line in 3D.
------- edit --------
Here is a minimum working example:
import numpy as np
import matplotlib.pyplot as plt
from scipy import interpolate
from mpl_toolkits.mplot3d import Axes3D
total_rad = 10
z_factor = 3
noise = 0.1
num_true_pts = 200
s_true = np.linspace(0, total_rad, num_true_pts)
x_true = np.cos(s_true)
y_true = np.sin(s_true)
z_true = s_true / z_factor
num_sample_pts = 80
s_sample = np.linspace(0, total_rad, num_sample_pts)
x_sample = np.cos(s_sample) + noise * np.random.randn(num_sample_pts)
y_sample = np.sin(s_sample) + noise * np.random.randn(num_sample_pts)
z_sample = s_sample / z_factor + noise * np.random.randn(num_sample_pts)
tck, u = interpolate.splprep([x_sample, y_sample, z_sample], s=2)
x_knots, y_knots, z_knots = interpolate.splev(tck[0], tck)
u_fine = np.linspace(0, 1, num_true_pts)
x_fine, y_fine, z_fine = interpolate.splev(u_fine, tck)
# this is the part of the code I inserted: the line under this causes the crash
tck_der = interpolate.splder(tck, n=1)
x_der, y_der, z_der = interpolate.splev(u_fine, tck_der)
# end of the inserted code
fig2 = plt.figure(2)
ax3d = fig2.add_subplot(111, projection='3d')
ax3d.plot(x_true, y_true, z_true, 'b')
ax3d.plot(x_sample, y_sample, z_sample, 'r*')
ax3d.plot(x_knots, y_knots, z_knots, 'go')
ax3d.plot(x_fine, y_fine, z_fine, 'g')
fig2.show()
plt.show()
Stumbled into the same problem...
I circumvented the error by using interpolate.splder(tck, n=1) and instead used interpolate.splev(spline_ev, tck, der=1) which returns the derivatives at the points spline_ev (see Scipy Doku).
If you need the spline I think you can then use interpolate.splprep() again.
In total something like:
import numpy as np
from scipy import interpolate
import matplotlib.pyplot as plt
points = np.random.rand(10,2) * 10
(tck, u), fp, ier, msg = interpolate.splprep(points.T, s=0, k=3, full_output=True)
spline_ev = np.linspace(0.0, 1.0, 100, endpoint=True)
spline_points = interpolate.splev(spline_ev, tck)
# Calculate derivative
spline_der_points = interpolate.splev(spline_ev, tck, der=1)
spline_der = interpolate.splprep(spline_der_points.T, s=0, k=3, full_output=True)
# Plot the data and derivative
fig = plt.figure()
plt.plot(points[:,0], points[:,1], '.-', label="points")
plt.plot(spline_points[0], spline_points[1], '.-', label="tck")
plt.plot(spline_der_points[0], spline_der_points[1], '.-', label="tck_der")
# Show tangent
plt.arrow(spline_points[0][23]-spline_der_points[0][23], spline_points[1][23]-spline_der_points[1][23], 2.0*spline_der_points[0][23], 2.0*spline_der_points[1][23])
plt.legend()
plt.show()
EDIT:
I also opened an Issue on Github and according to ev-br the usage of interpolate.splprep is depreciated and one should use make_interp_spline / BSpline instead.
As noted in other answers, splprep output is incompatible with splder, but is compatible with splev. And the latter can evaluate the derivatives.
However, for interpolation, there is an alternative approach, which avoids splprep altogether. I'm basically copying a reply on the SciPy issue tracker (https://github.com/scipy/scipy/issues/10389):
Here's an example of replicating the splprep outputs. First let's make sense out of the splprep output:
# start with the OP example
import numpy as np
from scipy import interpolate
points = np.random.rand(10,2) * 10
(tck, u), fp, ier, msg = interpolate.splprep(points.T, s=0, k=3, full_output=True)
# check the meaning of the `u` array: evaluation of the spline at `u`
# gives back the original points (up to a list/transpose)
xy = interpolate.splev(u, tck)
xy = np.asarray(xy)
np.allclose(xy.T, points)
Next, let's replicate it without splprep. First, build the u array: the curve is represented parametrically, and u is essentially an approximation for the arc length. Other parametrizations are possible, but here let's stick to what splprep does. Translating the pseudocode from the doc page, https://docs.scipy.org/doc/scipy/reference/generated/scipy.interpolate.splprep.html
vv = np.sum((points[1:, :] - points[:-1, :])**2, axis=1)
vv = np.sqrt(vv).cumsum()
vv/= vv[-1]
vv = np.r_[0, vv]
# check:
np.allclose(u, vv)
Now, interpolate along the parametric curve: points vs vv:
spl = interpolate.make_interp_spline(vv, points)
# check spl.t vs knots from splPrep
spl.t - tck[0]
The result, spl, is a BSpline object which you can evaluate, differentiate etc in a usual way:
np.allclose(points, spl(vv))
# differentiate
spl_derivative = spl.derivative(vv)
I have clustered my data (12000, 3) using sklearn Gaussian mixture model algorithm (GMM). I have 3 clusters. Each point of my data represents a molecular structure. I would like to know how could I sampled each cluster. I have tried with the function:
gmm = GMM(n_components=3).fit(Data)
gmm.sample(n_samples=20)
but it does preform a sampling of the whole distribution, but I need a sample of each one of the components.
Well this is not that easy since you need to calculate the eigenvectors of all covariance matrices. Here is some example code for a problem I studied
import numpy as np
from scipy.stats import multivariate_normal
import random
from operator import truediv
import itertools
from scipy import linalg
import matplotlib.pyplot as plt
import matplotlib as mpl
from sklearn import mixture
#import some data which can be used for gmm
mix = np.loadtxt("mixture.txt", usecols=(0,1), unpack=True)
#print(mix.shape)
color_iter = itertools.cycle(['navy', 'c', 'cornflowerblue', 'gold',
'darkorange'])
def plot_results(X, Y_, means, covariances, index, title):
#function for plotting the gaussians
splot = plt.subplot(2, 1, 1 + index)
for i, (mean, covar, color) in enumerate(zip(
means, covariances, color_iter)):
v, w = linalg.eigh(covar)
v = 2. * np.sqrt(2.) * np.sqrt(v)
u = w[0] / linalg.norm(w[0])
# as the DP will not use every component it has access to
# unless it needs it, we shouldn't plot the redundant
# components.
if not np.any(Y_ == i):
continue
plt.scatter(X[Y_ == i, 0], X[Y_ == i, 1], .8, color=color)
# Plot an ellipse to show the Gaussian component
angle = np.arctan(u[1] / u[0])
angle = 180. * angle / np.pi # convert to degrees
ell = mpl.patches.Ellipse(mean, v[0], v[1], 180. + angle, color=color)
ell.set_clip_box(splot.bbox)
ell.set_alpha(0.5)
splot.add_artist(ell)
plt.xlim(-4., 3.)
plt.ylim(-4., 2.)
gmm = mixture.GaussianMixture(n_components=3, covariance_type='full').fit(mix.T)
print(gmm.predict(mix.T))
plot_results(mix.T, gmm.predict(mix.T), gmm.means_, gmm.covariances_, 0,
'Gaussian Mixture')
So for my problem the resulting plot looked like this:
Edit: here the answer to your comment. I would use pandas to do this. Assume X is your feature matrix and y are your labels, then
import pandas as pd
y_pred = gmm.predict(X)
df_all_info = pd.concat([X,y,y_pred], axis=1)
In the resulting dataframe you can check all the information you want, you can even just exclude the samples the algorithm misclassified with:
df_wrong = df_all_info[df_all_info['name of y-column'] != df_all_info['name of y_pred column']]
I'm using matplotlib. I have a list of 600 values. I also have an polynomial function that I'm graphing with values between 0 and 600. I'm trying to multiply every point by the corresponding value in the list.
I could evaluate the polynomial in a loop, and do the multiplication there, but I would end up with a graph of points instead of a line.
I think I might need to use the Transformations framework, but not sure how to apply it to the graph.
Edit:
a = [5, 2, 3 ... 0, 2, 8] # 600 values
poly_a = polyfit(a)
deriv_a = polyder(poly_a)
b = [232, 342 ... 346, 183] # 600 values
I need to multiply deriv_a by b.
I think you're misunderstanding things a bit. This is what numpy is for (if you're using matplotlib it's already converting things to a numpy array when you plot, regardless.)
Just convert your "list of 600 values" to a numpy array and then evaluate the polynomial.
As an example:
import numpy as np
import matplotlib.pyplot as plt
# Your "list of 600 values"...
x = np.linspace(0, 10, 600)
# Evaluate a polynomial at each location in `x`
y = -1.3 * x**3 + 10 * x**2 - 3 * x + 10
plt.plot(x, y)
plt.show()
Edit:
Based on your edit, it sounds like you're asking how to use numpy.polyder?
Basically, you just want to use numpy.polyval to evaluate the polynomial returned by polyder at your point locations.
To build on the example above:
import numpy as np
import matplotlib.pyplot as plt
# Your "list of 600 values"...
x = np.linspace(0, 10, 600)
coeffs = [-1.3, 10, 3, 10]
# Evaluate a polynomial at each location in `x`
y = np.polyval(coeffs, x)
# Calculate the derivative
der_coeffs = np.polyder(coeffs)
# Evaluate the derivative on the same points...
y_prime = np.polyval(der_coeffs, x)
# Plot the two...
fig, (ax1, ax2) = plt.subplots(nrows=2)
ax1.plot(x, y)
ax1.set_title('Original Function')
ax2.plot(x, y_prime)
ax2.set_title('Deriviative')
plt.show()
I have 1000 large numbers, randomly distributed in range 37231 to 56661.
I am trying to use the stats.gaussian_kde but something does not work.
(maybe because of my poor knowledge of statistics?).
Here is the code:
from scipy import stats.gaussian_kde
import matplotlib.pyplot as plt
# 'data' is a 1D array that contains the initial numbers 37231 to 56661
xmin = min(data)
xmax = max(data)
# get evenly distributed numbers for X axis.
x = linspace(xmin, xmax, 1000) # get 1000 points on x axis
nPoints = len(x)
# get actual kernel density.
density = gaussian_kde(data)
y = density(x)
# print the output data
for i in range(nPoints):
print "%s %s" % (x[i], y[i])
plt.plot(x, density(x))
plt.show()
In the printout, I get x values in the column 1, and zeros in the column 2.
The plot shows a flat line.
I simply can not find the solution.
I tried a very wide range of X-es, the same result.
What is the problem? What am I doing wrong?
Could the large numbers be the cause?
I think what's happening is that your data array is made up of integers, which leads to problems:
>>> import numpy, scipy.stats
>>>
>>> data = numpy.random.randint(37231, 56661,size=10)
>>> xmin, xmax = min(data), max(data)
>>> x = numpy.linspace(xmin, xmax, 10)
>>>
>>> density = scipy.stats.gaussian_kde(data)
>>> density.dataset
array([[52605, 45451, 46029, 40379, 48885, 41262, 39248, 38247, 55987,
44019]])
>>> density(x)
array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0])
but if we use floats:
>>> density = scipy.stats.gaussian_kde(data*1.0)
>>> density.dataset
array([[ 52605., 45451., 46029., 40379., 48885., 41262., 39248.,
38247., 55987., 44019.]])
>>> density(x)
array([ 4.42201513e-05, 5.51130237e-05, 5.94470211e-05,
5.78485526e-05, 5.21379448e-05, 4.43176188e-05,
3.66725694e-05, 3.06297511e-05, 2.56191024e-05,
2.01305127e-05])
I have made a function to do this. You can vary the bandwidth as a parameter of the function. That is, smaller number = more pointy, larger number = smoother. The default is 0.3.
It works in IPython notebook --pylab=inline
The number of bins is optimized and coded so will vary on the number of variables in your data.
import scipy.stats as stats
import matplotlib.pyplot as plt
import numpy as np
def hist_with_kde(data, bandwidth = 0.3):
#set number of bins using Freedman and Diaconis
q1 = np.percentile(data,25)
q3 = np.percentile(data,75)
n = len(data)**(.1/.3)
rng = max(data) - min(data)
iqr = 2*(q3-q1)
bins = int((n*rng)/iqr)
x = np.linspace(min(data),max(data),200)
kde = stats.gaussian_kde(data)
kde.covariance_factor = lambda : bandwidth
kde._compute_covariance()
plt.plot(x,kde(x),'r') # distribution function
plt.hist(data,bins=bins,normed=True) # histogram
data = np.random.randn(500)
hist_with_kde(data,0.25)