Improve performance of matplotlib for subset of data

Improve performance of matplotlib for subset of data - python

I have a little PyQT4 application which shows plots for a big data set (100k points x 14 channels). I just want to show a period of 128 points and click to show the next period.
My naive approach was to create the figures and plot only a subset of my data on each step in the loop. This leads to a loading time for quite a second and I thought this may be to much for this task.
Is there any way to improve the performance? Did I miss some matplotlib built-in functions to plot only a subset of data? I wouldn't mind a longer loading time at the beginning of the application, so maybe I could plot it all and zoom in?
EDIT: Provided a simple running example
Took 7.39s to plot 8 samples on my machine
import time
import matplotlib.pyplot as plt
import numpy as np
plt.ion()
num_channels = 14
num_samples = 1024
data = np.random.rand(num_channels, num_samples)
figure = plt.figure()
start = 0
period = 128
axes = []
for i in range(num_channels):
axes.append(figure.add_subplot(num_channels, 1, i+1))
end = start+period
x_values = [x for x in range(start, end)]
begin = time.time()
num_plot = 0
for i in range(0, num_samples, period):
num_plot += 1
end = start+period
for i, ax in enumerate(axes):
ax.hold(False)
ax.plot(x_values, data[i][start:end], '-')
ax.set_ylabel(i)
start += period
figure.canvas.draw()
print("Took %.2fs to plot %d samples" % (time.time()-begin, num_plot))

Using the #joe-kington answer from here: How to update a plot in matplotlib improved performance to a decent value.
I now only change the y-values of the line object using set_ydata().
The line object is returned when calling ax.plot() which is only called once.
EDIT: Added a running example:
Took 3.11s to plot 8 samples on my machine
import time
import matplotlib.pyplot as plt
import numpy as np
plt.ion()
num_channels = 14
num_samples = 1024
data = np.random.rand(num_channels, num_samples)
figure = plt.figure()
start = 0
period = 128
axes = []
for i in range(num_channels):
axes.append(figure.add_subplot(num_channels, 1, i+1))
end = start+period
x_values = [x for x in range(start, end)]
lines = []
begin = time.time()
num_plot = 1 # first plot
for i, ax in enumerate(axes):
ax.hold(False)
# save the line object
line, = ax.plot(x_values, data[i][start:end], '-')
lines.append(line)
ax.set_xlim([start,end])
ax.set_ylabel(i)
start += period
for _ in range(period, num_samples, period):
num_plot += 1
end = start + period
for i, line in enumerate(lines):
line.set_ydata(data[i][start:end])
start += period
figure.canvas.draw()
print("Took %.2fs to plot %d samples" % (time.time()-begin, num_plot))

Related

Real time data plotting from a high throughput source

I want to plot Real time in a way that updates fast.
The data I have:
arrives via serial port at 62.5 Hz
data corresponds to 32 sensors (so plot 32 lines vs time).
32points *62.5Hz = 2000 points/sec
The problem with my current plotting loop is that it runs slower than 62.5[Hz], meaning I miss some data coming in from serial port.
I am looking for any solution to this problem that allows for:
All data from serial port to be saved.
Plots the data (even skipping a few points/using averages/eliminating old points and only keeping the most recent)
Here is my code, I am using random data to simulate the serial port data.
import numpy as np
import time
import matplotlib.pyplot as plt
#extra plot debugging
hz_ = [] #list of speed
time_=[] #list for time vs Hz plot
#store all data generated
store_data = np.zeros((1, 33))
#only data to plot
to_plot = np.zeros((1, 33))
#color each line
colours = [f"C{i}" for i in range (1,33)]
fig,ax = plt.subplots(1,1, figsize=(10,8))
ax.set_xlabel('time(s)')
ax.set_ylabel('y')
ax.set_ylim([0, 300])
ax.set_xlim([0, 200])
start_time = time.time()
for i in range (100):
loop_time = time.time()
#make data with col0=time and col[1:11] = y values
data = np.random.randint(1,255,(1,32)).astype(float) #simulated data, usually comes in at 62.5 [Hz]
data = np.insert(data, 0, time.time()-start_time).reshape(1,33) #adding time for first column
store_data = np.append(store_data, data , axis=0)
to_plot = store_data[-100:,]
for i in range(1, to_plot.shape[1]):
ax.plot(to_plot[:,0], to_plot[:,i],c = colours[i-1], marker=(5, 2), linewidth=0, label=i)
#ax.lines = ax.lines[-33:] #This soluition speeds it up, to clear old code.
fig.canvas.draw()
fig.canvas.flush_events()
Hz = 1/(time.time()-loop_time)
#for time vs Hz plot
hz_.append(Hz)
time_.append( time.time()-start_time)
print(1/(time.time()-loop_time), "Hz - frequncy program loops at")
#extra fig showing how speed drops off vs time
fig,ax = plt.subplots(1,1, figsize=(10,8))
fig.suptitle('Decreasingn Speed vs Time', fontsize=20)
ax.set_xlabel('time(s)')
ax.set_ylabel('Hz')
ax.plot(time_, hz_)
fig.show()
I also tried while using
ax.lines = ax.lines[-33:]
to remove older points, and this speed up the plotting, but still slower than the speed i aquire data.
Any library/solution to make sure I collect all data and plot the general trendlines (so even not all points) is ok. Maybe something that runs acquiring data and plotting in parallel?

You could try to have two separate processes:
one for acquiring and storing the data
one for plotting the data
Below there are two basic scripts to get the idea.
You first run gen.py which starts to generate numbers and save them in a file.
Then, in the same directory, you can run plot.py which will read the last part of the file and will update the a Matplotlib plot.
Here is the gen.py script to generate data:
#!/usr/bin/env python3
import time
import random
LIMIT_TIME = 100 # s
DATA_FILENAME = "data.txt"
def gen_data(filename, limit_time):
start_time = time.time()
elapsed_time = time.time() - start_time
with open(filename, "w") as f:
while elapsed_time < limit_time:
f.write(f"{time.time():30.12f} {random.random():30.12f}\n") # produces 64 bytes
f.flush()
elapsed = time.time() - start_time
gen_data(DATA_FILENAME, LIMIT_TIME)
and here is the plot.py script to plot the data (reworked from this one):
#!/usr/bin/env python3
import io
import time
import matplotlib.pyplot as plt
import matplotlib as mpl
import matplotlib.animation
BUFFER_LEN = 64
DATA_FILENAME = "data.txt"
PLOT_LIMIT = 20
ANIM_FILENAME = "video.gif"
fig, ax = plt.subplots(1, 1, figsize=(10,8))
ax.set_title("Plot of random numbers from `gen.py`")
ax.set_xlabel("time / s")
ax.set_ylabel("random number / #")
ax.set_ylim([0, 1])
def get_data(filename, buffer_len, delay=0.0):
with open(filename, "r") as f:
f.seek(0, io.SEEK_END)
data = f.read(buffer_len)
if delay:
time.sleep(delay)
return data
def animate(i, xs, ys, limit=PLOT_LIMIT, verbose=False):
# grab the data
try:
data = get_data(DATA_FILENAME, BUFFER_LEN)
if verbose:
print(data)
x, y = map(float, data.split())
if x > xs[-1]:
# Add x and y to lists
xs.append(x)
ys.append(y)
# Limit x and y lists to 10 items
xs = xs[-limit:]
ys = ys[-limit:]
else:
print(f"W: {time.time()} :: STALE!")
except ValueError:
print(f"W: {time.time()} :: EXCEPTION!")
else:
# Draw x and y lists
ax.clear()
ax.set_ylim([0, 1])
ax.plot(xs, ys)
# save video (only to attach here)
#anim = mpl.animation.FuncAnimation(fig, animate, fargs=([time.time()], [None]), interval=1, frames=3 * PLOT_LIMIT, repeat=False)
#anim.save(ANIM_FILENAME, writer='imagemagick', fps=10)
#print(f"I: Saved to `{ANIM_FILENAME}`")
# show interactively
anim = mpl.animation.FuncAnimation(fig, animate, fargs=([time.time()], [None]), interval=1)
plt.show()
plt.close()
Note that I have also included and commented out the portion of code that I used to generate the animated GIF above.
I believe this should be enough to get you going.

Limit the Number of Open Matplotlib Figures in Python 3

I have a small python 3 script:
import matplotlib.pyplot as plt;
i = 0;
while(i < 40):
x = [1,2,3,4,5];
y = [1,2,3,4,5];
fig = plt.figure();
grid = plt.GridSpec(1, 1)
axis = fig.add_subplot(grid[0,0]);
axis.bar(x,y);
fig.canvas.flush_events()
while(len(plt.get_fignums()) > 10):
pass;
plt.show(block=False);
i += 1;
My goal is to plot 40 plots. I want the first 10 to plot immediately, and then the next figures will only plot if one of the open 10 plots are closed, one by one. This script seems to almost achieve what I want to do, but it crashes when I try to close one of the first 10 plots. Why does this happen? Thanks

while loop is not proper idea for this situation, your code should listens for close event of figures, then act as desired. these few lines of code may help you:
import matplotlib.pyplot as plt;
desiredNumberOfPlots_initialPopulation=1
desiredNumberOfPlots_total=3
def figOnce():
x = [1,2,3,4,5];
y = [1,2,3,4,5];
fig = plt.figure();
grid = plt.GridSpec(1, 1)
axis = fig.add_subplot(grid[0,0]);
axis.bar(x,y);
fig.canvas.flush_events()
fig.canvas.mpl_connect('close_event', handle_close)
plt.show(block=False);
global desiredNumberOfPlots_total
desiredNumberOfPlots_total-=1
def handle_close(evt):
global desiredNumberOfPlots_total
if desiredNumberOfPlots_total>0:
figOnce()
i=1
while(i<=desiredNumberOfPlots_initialPopulation):
print(i)
i+=1
figOnce()

Updating matplotlib figures in real time for data acquisition

I want to plot data in matplotlib in real time. I want to open a figure once at the start of the programme, then update the figure when new data is acquired. Despite there being a few similar questions out there, none quite answer my specific question.
I want each set of data points new_data1 and new_data2 to be plotted on the same figure at the end of each while loop i.e. one line after the first while loop, two lines on the same figure after the second while loop etc. Currently they are all plotted together, but only right at the end of the programme, which is no use for real time data acquisition.
import matplotlib.pyplot as plt
import numpy
hl, = plt.plot([], [])
def update_line(hl, new_datax, new_datay):
hl.set_xdata(numpy.append(hl.get_xdata(), new_datax))
hl.set_ydata(numpy.append(hl.get_ydata(), new_datay))
plt.xlim(0, 50)
plt.ylim(0,200)
plt.draw()
x = 1
while x < 5:
new_data1 = []
new_data2 = []
for i in range(500):
new_data1.append(i * x)
new_data2.append(i ** 2 * x)
update_line(hl, new_data1, new_data2)
x += 1
else:
print("DONE")
This programme plots all 5 lines, but at the end of the programme. I want each line to be plotted after one another, after the while loop is completed. I have tried putting in plt.pause(0.001) in the function, but it has not worked.
This programme is different from the one that has been put forward - that programme only plots one graph and does not update with time.

If I correctly understood your specifications, you can modify just a bit your MWE as follows:
import matplotlib.pyplot as plt
import numpy
fig = plt.figure(figsize=(11.69,8.27))
ax = fig.gca()
ax.set_xlim(0, 50)
ax.set_ylim(0,200)
hl, = plt.plot([], [])
def update_line(hl, new_datax, new_datay):
# re initialize line object each time if your real xdata is not contiguous else comment next line
hl, = plt.plot([], [])
hl.set_xdata(numpy.append(hl.get_xdata(), new_datax))
hl.set_ydata(numpy.append(hl.get_ydata(), new_datay))
fig.canvas.draw_idle()
fig.canvas.flush_events()
x = 1
while x < 10:
new_data1 = []
new_data2 = []
for i in range(500):
new_data1.append(i * x)
new_data2.append(i ** 2 * x)
update_line(hl, new_data1, new_data2)
# adjust pause duration here
plt.pause(0.5)
x += 1
else:
print("DONE")
which displays :

Not sure, if I am reading the requirements right but below is a blueprint. Please change it to suit your requirements. You may want to change the function Redraw_Function and edit the frames (keyword parameter, which is np.arange(1,5,1) ) in the FuncAnimation call. Also interval=1000 means 1000 milliseconds of delay.
If you are using Jupyter then comment out the second last line (where it says plt.show()) and uncomment the last line. This will defeat your purpose of real time update but I am sorry I had trouble making it work real time in Jupyter. However if you are using python console or official IDLE please run the code as it is. It should work nicely.
import numpy as np
from matplotlib import pyplot as plt
from matplotlib.animation import FuncAnimation
fig, ax = plt.subplots()
plot, = plt.plot([],[])
def init_function():
ax.set_xlim(0,50)
ax.set_ylim(0,250)
return plot,
def Redraw_Function(UpdatedVal):
new_x = np.arange(500)*UpdatedVal
new_y = np.arange(500)**2*UpdatedVal
plot.set_data(new_x,new_y)
return plot,
Animated_Figure = FuncAnimation(fig,Redraw_Function,init_func=init_function,frames=np.arange(1,5,1),interval=1000)
plt.show()
# Animated_Figure.save('MyAnimated.gif',writer='imagemagick')
When you run the code, you obtain the below result. I tried to keep very little code but I am sorry, if your requirement was totally different.
Best Wishes,

Matplotlib scatterplot error bars two data sets

I have two data sets, which I'd like to scatter plot next to each other with error bars. Below is my code to plot one data set with error bars. And also the code to generate the second data set. I'd like the points and errors for each data for each value to be adjacent.
I'd also like to remove the line connecting the dots.
import random
import matplotlib.pyplot as plt
import numpy as np
import scipy.stats as ss
data = []
n = 100
m = 10
for i in xrange(m):
d = []
for j in xrange(n):
d.append(random.random())
data.append(d)
mean_data = []
std_data = []
for i in xrange(m):
mean = np.mean(data[i])
mean_data.append(mean)
std = np.std(data[i])
std_data.append(std)
df_data = [n] * m
plt.errorbar(range(m), mean_data, yerr=ss.t.ppf(0.95, df_data)*std_data)
plt.scatter(range(m), mean_data)
plt.show()
new_data = []
for i in xrange(m):
d = []
for j in xrange(n):
d.append(random.random())
new_data.append(d)
mean_new_data = []
std_new_data = []
for i in xrange(m):
mean = np.mean(new_data[i])
mean_new_data.append(mean)
std = np.std(new_data[i])
std_new_data.append(std)
df_new_data = [n] * m

To remove the line in the scatter plot use the fmt argument in plt.errorbar(). The plt.scatter() call is then no longer needed. To plot a second set of data, simply call plt.errorbar() a second time, with the new data.
If you don't want the datasets to overlap, you can add some small random scatter in x to the new dataset. You can do this in two ways, add a single scatter float with
random.uniform(-x_scatter, x_scatter)
which will move all the points as one:
or generate a random scatter float for each point with
x_scatter = np.random.uniform(-.5, .5, m)
which generates something like
To plot both datasets (using the second method), you can use:
plt.errorbar(
range(m), mean_data, yerr=ss.t.ppf(0.95, df_data)*std_data, fmt='o',
label="Data")
# Add some some random scatter in x
x_scatter = np.random.uniform(-.5, .5, m)
plt.errorbar(
np.arange(m) + x_scatter, mean_new_data,
yerr=ss.t.ppf(0.95, df_new_data)*std_new_data, fmt='o', label="New data")
plt.legend()
plt.show()

How to plot 2 animated graphics with x axis fixed in matplotlib with subplots?

The purpose of the program: I need to plot a signal graphic on the top and a spectrum graphic of this signal on the bottom, only the y data in both cases varies.
I generate a sine wave with a random noise on the input and plot it on the top, that's working perfecly.
The problem is when I try to plot the spectrum graph. It's not updating for some reason and I didn't understand very well the functioning of matplotlib.animation.FuncAnimation.
The code:
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.animation as animation
dt = 0.01
Fs = 44000.0 # sample rate
timestep = 1.0/Fs # sample spacing (1/sample rate)
t = np.arange(0, 10, dt) # t range
n = 256 # size of the array data
w = 10000 # frequency of the input
data = np.sin(2*np.pi*w*t)
def update(data):
# update the curves with the incoming data
line.set_ydata(data)
#line2.set_ydata(magnitude)
return line,
def generateData():
# simulate new data coming in
while True:
nse = np.random.randn(len(t))
r = np.exp(-t/0.05)
cnse = np.convolve(nse, r)*dt
cnse = cnse[:len(t)]
data = np.sin(2*np.pi*w*(t)) + cnse
magnitude = np.fft.fft(data)/n
magnitude = np.abs(magnitude[range(n//2)])
yield data
fig = plt.figure()
# plot time graph axis
timeGraph = plt.subplot(2, 1, 1)
timeGraph.set_ylim(-0.2, 0.2)
timeGraph.set_xlabel('Time')
timeGraph.set_ylabel('Amplitude')
# plot frequency graph axis
freqGraph = plt.subplot(2, 1, 2)
freqGraph.set_xlabel('Freq (Hz)')
freqGraph.set_ylabel('|Y(freq)|')
# get frequency range
n = len(data) # length of the signal
print(len(data))
k = np.arange(n)
T = n/Fs
freq = k/T # two sides frequency range
freq = freq[range(n//2)] # one side frequency range
# fft computing and normalization
magnitude = np.fft.fft(data)/n
magnitude = np.abs(magnitude[range(n//2)])
line, = timeGraph.plot(np.linspace(0, 1, len(t)), 'b')
line2, = freqGraph.plot(freq, magnitude, 'g')
# animate the curves
ani = animation.FuncAnimation(fig, update, generateData,
interval=10, blit=True)
plt.show() # open window
Bonus: how do I initialize data and magnitude correctly?

In order for both the time and frequency graph to update, you need to set the data from both to the respective plots in the update function. Of course you also need to provide this data in the generating function. So the generating function should yield (data, magnitude) and the updating function should accept this tuple as input.
It is also a good idea to set some limits for the frequency graph, freqGraph.set_ylim([0, 0.006]) such that it will not stay empty.
I do not know what you mean by how do i initialize data and magnitude correctly?. I think they are initialized correctly in the sense that they are calculated for every frame including the very first one.
Here is a working code.
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.animation as animation
dt = 0.01
Fs = 44000.0 # sample rate
timestep = 1.0/Fs # sample spacing (1/sample rate)
t = np.arange(0, 10, dt) # t range
n = 256 # size of the array data
w = 10000 # frequency of the input
data = np.sin(2*np.pi*w*t)
def update(data):
# update the curves with the incoming data
line.set_ydata(data[0])
line2.set_ydata(data[1])
return line, line2,
def generateData():
# simulate new data coming in
while True:
nse = np.random.randn(len(t))
r = np.exp(-t/0.05)
cnse = np.convolve(nse, r)*dt
cnse = cnse[:len(t)]
data = np.sin(2*np.pi*w*(t)) + cnse
magnitude = np.fft.fft(data)/n
magnitude = np.abs(magnitude[range(n//2)])
yield (data, magnitude)
fig = plt.figure()
# plot time graph axis
timeGraph = plt.subplot(2, 1, 1)
timeGraph.set_ylim(-0.2, 0.2)
timeGraph.set_xlabel('Time')
timeGraph.set_ylabel('Amplitude')
# plot frequency graph axis
freqGraph = plt.subplot(2, 1, 2)
freqGraph.set_ylim([0, 0.006])
freqGraph.set_xlabel('Freq (Hz)')
freqGraph.set_ylabel('|Y(freq)|')
# get frequency range
n = len(data) # length of the signal
print(len(data))
k = np.arange(n)
T = n/Fs
freq = k/T # two sides frequency range
freq = freq[range(n//2)] # one side frequency range
# fft computing and normalization
magnitude = np.fft.fft(data)/n
magnitude = np.abs(magnitude[range(n//2)])
line, = timeGraph.plot(np.linspace(0, 1, len(t)),'b')
line2, = freqGraph.plot(freq, magnitude,'g')
# animate the curves
ani = animation.FuncAnimation(fig, update, generateData,
interval = 10, blit=True)
plt.show() # open window

Develop Reference

Python is a programming language that lets you work quickly and integrate systems more effectively.

Improve performance of matplotlib for subset of data - python

Related

Real time data plotting from a high throughput source

Limit the Number of Open Matplotlib Figures in Python 3

Updating matplotlib figures in real time for data acquisition

Matplotlib scatterplot error bars two data sets

How to plot 2 animated graphics with x axis fixed in matplotlib with subplots?

Categories

Resources