I have a code that generates random matrices of 0's and 1's, and I'd like to convert these matrices into scatter plots, where the coordinate corresponds to the matrix row/column, and the color of the scatter point corresponds to the value (red if 0, blue if 1 for example).
I've been able to do this with matplotlib, but my use-case involves generating thousands of these images and matplotlib is quite slow for this purpose. For this reason I've been trying to use pyctgraph, but am running into some trouble.
Matplotlib code:
import itertools
import random
import numpy as np
import matplotlib.pyplot as plt
d = 25
w = 10
l = 5
for n in range(num):
lst = list(itertools.repeat(1, d + 1)) + list(itertools.repeat(0, d - 1))
random.shuffle(lst)
a = np.array(lst).reshape((w, l))
for i in range(w):
for j in range(l):
if a[i, j] == 1:
plt.scatter(i + 1, j + 1, c="red")
else:
plt.scatter(i + 1, j + 1, c="blue")
plt.savefig(path)
plt.clf()
Pyctgraph code attempt:
import pyqtgraph as pg
import pyqtgraph.exporters
import numpy as np
import itertools
import random
w = 10
l = 5
d = 25
for n in range(num):
plt=pg.plot()
lst = list(itertools.repeat(1, d + 1)) + list(itertools.repeat(0, d - 1))
random.shuffle(lst)
a = np.array(lst).reshape((w, l))
for i in range(w):
for j in range(l):
if a[i, j] == 1:
p=pg.ScatterPlotItem([i + 1], [j + 1],brush=None)
plt.addItem(p)
else:
p = pg.ScatterPlotItem([i + 1], [j + 1], brush=None)
plt.addItem(p)
exporter = pg.exporters.ImageExporter(plt.plotItem)
exporter.parameters()['width'] = 100
exporter.export('fileName.png')
The pyctgraph code runs but extremely slowly so I must be doing something wrong due to my unfamiliarity with the package. Thank you for any help!
EDIT: Just to clarify, the desired end product is a grid of solid dots, with whitespace separating them. The number of red dots needs to be 26, and the number of blue dots 24, in a randomly shuffled order.
I think using a nested loop and running plt.scatter inside the loop is where your program is wasting a lot of time. it's best to only run plt.scatter once and instead pass a meshgrid of the (x,y) coordinates with the colors randomly shuffled.
For example, I can generate the same plot without any loops or conditionals and I only need to call plt.scatter once instead of 5x10 = 50 times (!) for every single point
x = np.arange(1,w+1)
y = np.arange(1,l+1)
xx,yy = np.meshgrid(x,y)
colors = ['r']*26 + ['b']*24
random.shuffle(colors)
plt.scatter(xx,yy,color=colors)
I added some benchmarking to demonstrate the improvement in performance we're looking at:
import itertools
import random
import numpy as np
import matplotlib.pyplot as plt
d = 25
w = 10
l = 5
## original program using matplotlib and nested loops
def make_matplotlib_grid():
lst = list(itertools.repeat(1, d + 1)) + list(itertools.repeat(0, d - 1))
random.shuffle(lst)
a = np.array(lst).reshape((w, l))
for i in range(w):
for j in range(l):
if a[i, j] == 1:
plt.scatter(i + 1, j + 1, c="red")
else:
plt.scatter(i + 1, j + 1, c="blue")
## using numpy mesh grid
def make_matplotlib_meshgrid():
x = np.arange(1,w+1)
y = np.arange(1,l+1)
xx,yy = np.meshgrid(x,y)
colors = ['r']*26 + ['b']*24
random.shuffle(colors)
plt.scatter(xx,yy,color=colors)
## benchmarking to compare speed between the two methods
if __name__ == "__main__":
import timeit
n_plots = 10
setup = "from __main__ import make_matplotlib_grid"
make_matplotlib_grid_time = timeit.timeit("make_matplotlib_grid()", setup=setup, number=n_plots)
print(f"original program creates {n_plots} plots with an average time of {make_matplotlib_grid_time / n_plots} seconds")
setup = "from __main__ import make_matplotlib_meshgrid"
make_matplotlib_meshgrid_time = timeit.timeit("make_matplotlib_meshgrid()", setup=setup, number=n_plots)
print(f"numpy meshgrid method creates {n_plots} plots with average time of {make_matplotlib_meshgrid_time / n_plots} seconds")
print(f"on average, the numpy meshgrid method is roughly {make_matplotlib_grid_time / make_matplotlib_meshgrid_time}x faster")
Output:
original program creates 10 plots with an average time of 0.1041847709 seconds
numpy meshgrid method creates 10 plots with average time of 0.003275972299999985 seconds
on average, the numpy meshgrid method is roughly 31.80270202528894x faster
Related
I am working through example 8.1 titled Euler's Method from Mark Newman's book Computational Physics. I rewrote the example as a method with Numpy arrays but when I plot it I get two plots on the same figure not sure how to correct it. Also is there better way to convert my 2 1D arrays into 1 2D array to use for plotting in Matplotlib, thanks.
Newman's example :
from math import sin
from numpy import arange
from pylab import plot,xlabel,ylabel,show
def f(x,t):
return -x**3 + sin(t)
a = 0.0 # Start of the interval
b = 10.0 # End of the interval
N = 1000 # Number of steps
h = (b-a)/N # Size of a single step
x = 0.0 # Initial condition
tpoints = arange(a,b,h)
xpoints = []
for t in tpoints:
xpoints.append(x)
x += h*f(x,t)
plot(tpoints,xpoints)
xlabel("t")
ylabel("x(t)")
show()
My modifications:
from pylab import plot,show,xlabel,ylabel
from numpy import linspace,exp,sin,zeros,vstack,column_stack
def f(x,t):
return (-x**(3) + sin(t))
def Euler(f,x0,a,b):
N=1000
h = (b-a)/N
t = linspace(a,b,N)
x = zeros(N,float)
y = x0
for i in range(N):
x[i] = y
y += h*f(x[i],t[i])
return column_stack((t,x)) #vstack((t,x)).T
plot(Euler(f,0.0,0.0,10.0))
xlabel("t")
ylabel("x(t)")
show()
The reason you get two lines is that t as well as x are plotted against their index, instead of x plotted against t
I don't see why you'd want to stack the two arrays. Just keep then separate, which will also solve the problem of the two plots.
The following works fine.
import numpy as np
import matplotlib.pyplot as plt
f = lambda x,t: -x**3 + np.sin(t)
def Euler(f,x0,a,b):
N=1000
h = (b-a)/N
t = np.linspace(a,b,N)
x = np.zeros(N,float)
y = x0
for i in range(N):
x[i] = y
y += h*f(x[i],t[i])
return t,x
t,x = Euler(f,0.0,0.0,10.0)
plt.plot(t,x)
plt.xlabel("t")
plt.ylabel("x(t)")
plt.show()
I have a program where I have to find x.
But I have to use the special function Ei - the exponential integral, and x is inside the argument of Ei.
So Python isn't recognizing it.
ei(mx) = te^r + ei(c)
Here the RHS is a constant alongwith m.
I want to find the value of x, and then append it to a list. But Python isn't able to do this.
from scipy import special
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
Y = []
X = np.arange(0,10,.1)
for i in X:
y = scipy.special.expi(i)
Y.append(y)
N_0 = 2
t_f = 100
r = 2
K = 100
N_t = [N_0,]
t = np.arange(0,100,1)
for i in t:
l = i*e**r + scipy.special.expi(r*N_t[i]/K)
N_t.append(l)
plt.plot(X,Y)
plt.plot(t,N_t)
plt.show
I've corrected some mistakes in your code to give the following. You should compare this with your code line by line.
from scipy.special import expi
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
Y = []
X = np.arange(0,10,.1)
for i in X:
y = expi(i)
Y.append(y)
N_0 = 2
t_f = 100
r = 2
K = 100
N_t = [N_0,]
t = np.arange(0,100,1)
for i in t:
l = i*np.exp(r) + expi(r*N_t[i]/K)
N_t.append(l)
plt.plot(X,Y)
plt.plot(t,N_t)
plt.show()
However, there is still one possible flaw that I notice and can't resolve. You plot X and t together in the same graph at the end yet X ranges over 0 to 10 and t ranges over 0 to 100. Is this what you intended?
Also matplotlib complains that the lengths of the vectors supplied to it in the second call to plot are not the same.
I am running a simulation and I need to update a plot of a matrix every iteration (or every n iterations for that matter). I am doing the plotting using matplotlib, in particular matshow. I tried replicating the code I saw in other StackOverflow questions but I haven't been successful. Currently the code just produces different windows with the new plots instead of updating the first one. Here's the code so far:
import numpy as np
import random
import math
import matplotlib.pyplot as plt
import matplotlib.animation as anim
# System variables initialization
N = 50
n_iter = 5
betaJ = 0.40
lattice = np.ones([N, N])
energy = -2*betaJ*N**2
choices = list(range(N))
plt.ion()
fig = plt.figure()
# Main cycle
for i in range(0, n_iter):
# Pick random spin and calculate energy variation caused by flipping it
x, y = random.choice(choices), random.choice(choices)
neighbour_spin_sum = lattice[np.mod(x-1, N), y] + lattice[np.mod(x+1, N), y] + lattice[x, np.mod(y+1, N)] + lattice[x, np.mod(y-1, N)]
delta_energy = 2*betaJ*(neighbour_spin_sum*lattice[x, y])
# If energetically favorable, flip spin
if delta_energy < 0:
lattice[x, y] = -lattice[x, y]
# Else flip with some probability
elif random.uniform(0, 1) <= math.exp(-delta_energy):
lattice[x, y] = -lattice[x, y]
plt.matshow(lattice)
plt.draw()
plt.pause(0.0001)
Thanks!
The issue is that every time the plt.matshow() is called matplotlib creates a new plotting axis. To get around this, define the axis and keep reusing it as shown below:
import numpy as np
import random
import math
import matplotlib.pyplot as plt
import matplotlib.animation as anim
# System variables initialization
N = 50
n_iter = 10000
betaJ = 0.40
lattice = np.ones([N, N])
energy = -2 * betaJ * N ** 2
choices = list(range(N))
plt.ion()
fig = plt.figure()
# Main cycle
for i in range(0, n_iter):
# Pick random spin and calculate energy variation caused by flipping it
x = random.choice(choices)
y = random.choice(choices)
neighbour_spin_sum = lattice[np.mod(x-1, N), y] + lattice[np.mod(x+1, N), y] + lattice[x, np.mod(y+1, N)] + lattice[x, np.mod(y-1, N)]
delta_energy = 2*betaJ*(neighbour_spin_sum*lattice[x, y])
# If energetically favorable, flip spin
if delta_energy < 0:
lattice[x, y] = -lattice[x, y]
# Else flip with some probability
elif random.uniform(0, 1) <= math.exp(-delta_energy):
lattice[x, y] = -lattice[x, y]
ax = fig.add_subplot(111)
ax.matshow(lattice)
plt.draw()
plt.pause(0.0001)
I am trying to convert this code from Matlab to Python:
x(1) = 0.1;
j = 0;
for z = 2.8:0.0011:3.9
j = j+1 %Gives progress of calculation
zz(j) = z;
for n = 1:200
x(n+1) = z*x(n)*(1 - x(n));
xn(n,j) = x(n);
end
end
h = plot(zz,xn(100:200,:),'r.');
set(h,'Markersize',3);
and so far I have got this:
import numpy as np
import matplotlib.pyplot as plt
x = []
x.append(0.1)
xn = []
j = 0
z_range = np.arange(2.8, 3.9, 0.0011)
n_range = range(0,200,1)
plt.figure()
for zz in z_range:
j = j+1
print j # Gives progress of calculation
for n in n_range:
w = zz * x[n] * (1.0-x[n])
x.append(zz * x[n] * (1.0-x[n]))
xn.append(w)
x = np.array(x)
xn = np.array(xn)
xn_matrix = xn.reshape((z_range.size, len(n_range)))
xn_mat = xn_matrix.T
plt.figure()
#for i in z_range:
# plt.plot(z_range, xn_mat[0:i], 'r.')
plt.show()
I'm not sure if this is the best way to convert the for loops from Matlab into Python, and I seem to have problems with plotting the result. The x(n+1) = z*x(n)*(1 - x(n)); and xn(n,j) = x(n); lines in Matlab are bugging me, so could someone please explain if there is a more efficient way of writing this in Python?
import numpy as np
import matplotlib.pyplot as plt
x = 0.1
# preallocate xn
xn = np.zeros([1001, 200])
# linspace is better for a non-integer step
zz = np.linspace(2.8, 3.9, 1001)
# use enumerate instead of counting iterations
for j,z in enumerate(zz):
print(j)
for n in range(200):
# use tuple unpacking so old values of x are unneeded
xn[j,n], x = x, z*x*(1 - x)
plt.plot(zz, xn[:, 100:], 'r.')
plt.show()
I'm new to python, and I have this code for calculating the potential inside a 1x1 box using fourier series, but a part of it is going way too slow (marked in the code below).
If someone could help me with this, I suspect I could've done something with the numpy library, but I'm not that familiar with it.
import matplotlib.pyplot as plt
import pylab
import sys
from matplotlib import rc
rc('text', usetex=False)
rc('font', family = 'serif')
#One of the boundary conditions for the potential.
def func1(x,n):
V_c = 1
V_0 = V_c * np.sin(n*np.pi*x)
return V_0*np.sin(n*np.pi*x)
#To calculate the potential inside a box:
def v(x,y):
n = 1;
sum = 0;
nmax = 20;
while n < nmax:
[C_n, err] = quad(func1, 0, 1, args=(n), );
sum = sum + 2*(C_n/np.sinh(np.pi*n)*np.sin(n*np.pi*x)*np.sinh(n*np.pi*y));
n = n + 1;
return sum;
def main(argv):
x_axis = np.linspace(0,1,100)
y_axis = np.linspace(0,1,100)
V_0 = np.zeros(100)
V_1 = np.zeros(100)
n = 4;
#Plotter for V0 = v_c * sin () x
for i in range(100):
V_0[i] = V_0_1(i/100, n)
plt.plot(x_axis, V_0)
plt.xlabel('x/L')
plt.ylabel('V_0')
plt.title('V_0(x) = sin(m*pi*x/L), n = 4')
plt.show()
#Plot for V_0 = V_c(1-(x-1/2)^4)
for i in range(100):
V_1[i] = V_0_2(i/100)
plt.figure()
plt.plot(x_axis, V_1)
plt.xlabel('x/L')
plt.ylabel('V_0')
plt.title('V_0(x) = 1- (x/L - 1/2)^4)')
#plt.legend()
plt.show()
#Plot V(x/L,y/L) on the boundary:
V_0_Y = np.zeros(100)
V_1_Y = np.zeros(100)
V_X_0 = np.zeros(100)
V_X_1 = np.zeros(100)
for i in range(100):
V_0_Y[i] = v(0, i/100)
V_1_Y[i] = v(1, i/100)
V_X_0[i] = v(i/100, 0)
V_X_1[i] = v(i/100, 1)
# V(x/L = 0, y/L):
plt.figure()
plt.plot(x_axis, V_0_Y)
plt.title('V(x/L = 0, y/L)')
plt.show()
# V(x/L = 1, y/L):
plt.figure()
plt.plot(x_axis, V_1_Y)
plt.title('V(x/L = 1, y/L)')
plt.show()
# V(x/L, y/L = 0):
plt.figure()
plt.plot(x_axis, V_X_0)
plt.title('V(x/L, y/L = 0)')
plt.show()
# V(x/L, y/L = 1):
plt.figure()
plt.plot(x_axis, V_X_1)
plt.title('V(x/L, y/L = 1)')
plt.show()
#Plot V(x,y)
#######
# This is where the code is way too slow, it takes like 10 minutes when n in v(x,y) is 20.
#######
V = np.zeros(10000).reshape((100,100))
for i in range(100):
for j in range(100):
V[i,j] = v(j/100, i/100)
plt.figure()
plt.contour(x_axis, y_axis, V, 50)
plt.savefig('V_1')
plt.show()
if __name__ == "__main__":
main(sys.argv[1:])
You can find how to use FFT/DFT in this document :
Discretized continuous Fourier transform with numpy
Also, regarding your V matrix, there are many ways to improve the execution speed. One is to make sure you use Python 3, or xrange() instead of range() if you a are still in Python 2.. I usually put these lines in my Python code, to allow it to run evenly wether I use Python 3. or 2.*
# Don't want to generate huge lists in memory... use standard range for Python 3.*
range = xrange if isinstance(range(2),
list) else range
Then, instead of re-computing j/100 and i/100, you can precompute these values and put them in an array; knowing that a division is much more costly than a multiplication ! Something like :
ratios = np.arange(100) / 100
V = np.zeros(10000).reshape((100,100))
j = 0
while j < 100:
i = 0
while i < 100:
V[i,j] = v(values[j], values[i])
i += 1
j += 1
Well, anyway, this is rather cosmetic and will not save your life; and you still need to call the function v()...
Then, you can use weave :
http://docs.scipy.org/doc/scipy-0.14.0/reference/tutorial/weave.html
Or write all your pure computation/loop code in C, compile it and generate a module which you can call from Python.
You should look into numpy's broadcasting tricks and vectorization (several references, one of the first good links that pops up is from Matlab but it is just as applicable to numpy - can anyone recommend me a good numpy link in the comments that I might point other users to in the future?).
What I saw in your code (once you remove all the unnecessary bits like plots and unused functions), is that you are essentially doing this:
from __future__ import division
from scipy.integrate import quad
import numpy as np
import matplotlib.pyplot as plt
def func1(x,n):
return 1*np.sin(n*np.pi*x)**2
def v(x,y):
n = 1;
sum = 0;
nmax = 20;
while n < nmax:
[C_n, err] = quad(func1, 0, 1, args=(n), );
sum = sum + 2*(C_n/np.sinh(np.pi*n)*np.sin(n*np.pi*x)*np.sinh(n*np.pi*y));
n = n + 1;
return sum;
def main():
x_axis = np.linspace(0,1,100)
y_axis = np.linspace(0,1,100)
#######
# This is where the code is way too slow, it takes like 10 minutes when n in v(x,y) is 20.
#######
V = np.zeros(10000).reshape((100,100))
for i in range(100):
for j in range(100):
V[i,j] = v(j/100, i/100)
plt.figure()
plt.contour(x_axis, y_axis, V, 50)
plt.show()
if __name__ == "__main__":
main()
If you look carefully (you could use a profiler too), you'll see that you're integrating your function func1 (which I'll rename into the integrand) about 20 times for each element in the 100x100 array V. However, the integrand doesn't change! So you can already bring it out of your loop. If you do that, and use broadcasting tricks, you could end up with something like this:
import numpy as np
from scipy.integrate import quad
import matplotlib.pyplot as plt
def integrand(x,n):
return 1*np.sin(n*np.pi*x)**2
sine_order = np.arange(1,20).reshape(-1,1,1) # Make an array along the third dimension
integration_results = np.empty_like(sine_order, dtype=np.float)
for enu, order in enumerate(sine_order):
integration_results[enu] = quad(integrand, 0, 1, args=(order,))[0]
y,x = np.ogrid[0:1:.01, 0:1:.01]
term = integration_results / np.sinh(np.pi * sine_order) * np.sin(sine_order * np.pi * x) * np.sinh(sine_order * np.pi * y)
# This is the key: you have a 3D matrix here and with this summation,
# you're basically squashing the entire 3D structure into a flat, 2D
# representation. This 'squashing' is done by means of a sum.
V = 2*np.sum(term, axis=0)
x_axis = np.linspace(0,1,100)
y_axis = np.linspace(0,1,100)
plt.figure()
plt.contour(x_axis, y_axis, V, 50)
plt.show()
which runs in less than a second on my system.
Broadcasting becomes much more understandable if you take pen&paper and draw out the vectors that you are "broadcasting" as if you were constructing a building, from basic Tetris-blocks.
These two versions are functionally the same, but one is completely vectorized, while the other uses python for-loops. As a new user to python and numpy, I definitely recommend reading through the broadcasting basics. Good luck!