Animate Quiver plot from values in Pandas dataframe - python

Hi im trying to make an animation of a quiver plot from data in my data frame
I have data stored like this in a pandas DataFrame, somewhat like this
QuivXLoc QuivYLoc QuivXVal QuivYVal QuivColorVal QuivPlotNum
0 -70.22 -127.241 1.624 -0.879 1.846623 1
1 -61.74 -127.241 -0.973 -0.027 0.973375 1
2 -65.98 -121.835 0.046 2.416 2.416438 1
3 -74.46 -121.835 -0.151 2.673 2.677262 1
4 -78.70 -116.429 1.073 -0.954 1.435773 2
I am currently plotting it like this, and it generates seperate plots for each sequence number perfectly.
for seq in quidf['QuivPlotNum'].unique():
temp=quidf[quidf['QuivPlotNum']==seq] ## make subset to plot
plt.quiver(temp['QuivXLoc'], temp['QuivYLoc'], temp['QuivXVal'], temp['QuivYVal'], # data
temp['QuivColorVal'], # colour the arrows based on this array
cmap=cm.jet, # colour map
headlength=3) # length of the arrows
Theres some additional code to format the plot that I left out.
What I'd like to do is animate the sequence based on iterating through the Sequence number in my data frame. All the examples I saw for Quiver Animation involved scaling previous function by some scalar that is incremented.
example of similar quiver animation I'd like to generate, I have tried but cannot figure out how to change update_quiver to work for my application:
Plotting animated quivers in Python

Using the matplotlib.animation module and its FuncAnimation class:
import matplotlib.pyplot as plt
import numpy as np
from matplotlib.animation import FuncAnimation
import pandas as pd
# read in the date and group it by the frame number
data = pd.read_csv('data2.csv', index_col=0)
grouped = data.groupby('QuivPlotNum')
# set up the figure
fig = plt.figure()
ax = fig.add_subplot(1, 1, 1)
ax.set_xlim(-200, 200)
ax.set_ylim(-200, 200)
# create empty plot for the update function to manipulate
plot = ax.quiver([], [], [], [], [], cmap='jet', headlength=3)
# create an iterator over the group, next() will return a tuple
# of QuivPlotNum, DataFrame
iterator = iter(grouped)
def update(i):
# get next thing in the iterator
key, data = next(iterator)
# set new x, y coordinates for the plot
plot.set_offsets(np.column_stack([data.QuivXLoc, data.QuivYLoc]))
# update vector and color values
plot.set_UVC(data.QuivXVal, data.QuivYVal, data.QuivColorVal)
# create the animation, update every 1000 ms
ani = FuncAnimation(fig, update, interval=1000)
# show it
plt.show()

Related

How to plot multiple animations in Matplolib for 2 different processes

In a measurement chain, each instrument embedded in various measurement loops will record a CSV and I want to monitor the live plots in separate figures i.e figure 1 for instrument1 , figure 2 for instrument2...etc. I try to implement animations but nothing out. csv is continuously generating data.
I first generate data in a CSV then i try to plot 2 animations in parallel:I get the figure 2 animated but the first is frozen. any help appreciated.
import pandas as pd
from matplotlib import pyplot as plt
from matplotlib import animation
# making figures
def makeFigure():
df = pd.read_csv('data.csv')
data = pd.DataFrame(df)
x = data['current']
y1 = data['resistance']
y2 = data['voltage']
fig=plt.figure()
ax=fig.add_subplot(1,1,1)
# # Plot 1 set of data
dataset =ax.plot(x,y1)
return fig,ax,dataset
# Frame rendering function
def renderFrame(i, dataset):
df = pd.read_csv('data.csv')
data = pd.DataFrame(df)
x = data['current']
y1 = data['resistance']
y2 = data['voltage']
# Plot data
plt.cla()
dataset, =ax.plot(x,y2)
return dataset
# Make the figures
figcomps1=makeFigure()
figcomps2=makeFigure()
# List of Animation objects for tracking
anim = []
# Animate the figures
for figcomps in [figcomps1,figcomps2]:
fig,ax,dataset = figcomps
anim.append(animation.FuncAnimation(fig,renderFrame,fargs=[dataset]))
# plt.gcf()
plt.show()
```

How to create multiple subplots from a wide dataframe with a function

I have a dataframe df with 4 unique UID - 1001,1002,1003,1004.
I want to write a user-defined function in python that does the following:
growth curve -plots Turbidity against Time for each unique UID. Turbidity values are the ones in the Time_1, Time_2, Time_3,Time_4 & Time_5 columns. For example, UID = 1003 will have 4 plots on each graph
Add a legend to each graph such as M+L, F+L, M+R, and F+R (from columns Gen and Type)
Add a title to each graph. For example- UID:1003 + Site:FRX
Export the graphs as a pdf or jpeg or tiff file - 4 graphs per page
# The dataset
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
import numpy as np
df= {
'Gen':['M','M','M','M','F','F','F','F','M','M','M','M','F','F','F','F'],
'Site':['FRX','FRX','FRX','FRX','FRX','FRX','FRX','FRX','FRX','FRX','FRX','FRX','FRX','FRX','FRX','FRX'],
'Type':['L','L','L','L','L','L','L','L','R','R','R','R','R','R','R','R'],
'UID':[1001,1002,1003,1004,1001,1002,1003,1004,1001,1002,1003,1004,1001,1002,1003,1004],
'Time1':[100.78,112.34,108.52,139.19,149.02,177.77,79.18,89.10,106.78,102.34,128.52,119.19,129.02,147.77,169.18,170.11],
'Time2':[150.78,162.34,188.53,197.69,208.07,217.76,229.48,139.51,146.87,182.54,189.57,199.97,229.28,244.73,269.91,249.19],
'Time3':[250.78,262.34,288.53,297.69,308.07,317.7,329.81,339.15,346.87,382.54,369.59,399.97,329.28,347.73,369.91,349.12],
'Time4':[240.18,232.14,258.53,276.69,338.07,307.74,359.16,339.25,365.87,392.48,399.97,410.75,429.08,448.39,465.15,469.33],
'Time5':[270.84,282.14,298.53,306.69,318.73,327.47,369.63,389.59,398.75,432.18,449.78,473.55,494.85,509.39,515.52,539.23]
}
df = pd.DataFrame(df,columns = ['Gen','Site','Type','UID','Time1','Time2','Time3','Time4','Time5'])
df
My attempt
# See below for my thoughts/attempt- I am open to other python libraries and approaches
def graph2pdf(inputdata):
#1. convert from wide to long
inputdata = pd.melt(df,id_vars = ['Gen','Type','UID'],var_name = 'Time',value_name = 'Turbidity')
#
cmaps = ['Reds', 'Blues', 'Greens', 'Greys','Yellows']
label_patches = []
for i, cmap in enumerate(cmaps):
# I want a growth curve not a distribution curve
sns.kdeplot(x = Time, y = Turbidity,data = data, cmap=cmaps[i]+'_d')
label_patch = mpatches.Patch(color=sns.color_palette(cmaps[i])[2],label=label)
label_patches.append(label_patch)
#2. add legend
plt.legend(handles=label_patches, loc='upper left')
#3. add title- 'UID number+ SiteName: FRX' to each of the graphs
plt.title('UID:1003+FRX')
plt.show()
#4. export as pdf file i.e 4 graphs per page
with PdfPages('turbidityvstime_pdf.pdf') as pdf:
plt.figure(figsize=(2,2)) # 4 graphs per page, I am anticipating more pages in the future
pdf.savefig() # saves the current figure into a pdf page
plt.close()
# testing the user-defined function
graph2pdf(df)
I want the graph to look something like the figure below (turbidity instead of density on the y-axis and time on the x-axis). if possible, a white or clear background is preferred
Thanks
I line plot is usually not appropriate for discrete data, because the slope of the lines can imply trends that do not exist.
This is discrete because measurements are taken at discrete moments in time, not a continuous time series.
Discrete data is best visualized with a bar plot.
Use seaborn figure-level methods like sns.catplot or sns.replot to create the figure with four subplots.
Tested in python 3.8.11, pandas 1.3.2, matplotlib 3.4.3, seaborn 0.11.2
import pandas as pd
import seaborn as sns
def graph2pdf(df):
# melt the dataframe; any column not a var or value, should be in id_vars
data = df.melt(id_vars=df.columns[:4], var_name='Time', value_name='Turbidity')
# combine Gen and Type to create label, which can be used for hue
data['label'] = data.Gen + '-' + data.Type
# plot a catplot for bars
p1 = sns.catplot(data=data, kind='bar', x='Time', y='Turbidity', hue='label', col='UID', col_wrap=2, height=3.25)
p1.fig.subplots_adjust(top=0.9) # adjust the figure
p1.fig.suptitle('UID:1003+FRX')
p1.savefig("barplots.png")
# plot a relplot for lines
p2 = sns.relplot(data=data, kind='line', x='Time', y='Turbidity', hue='label', col='UID', col_wrap=2, height=3.25, marker='o')
p2.fig.subplots_adjust(top=0.9)
p2.fig.suptitle('UID:1003+FRX')
p2.savefig("lineplots.png")
graph2pdf(df)

Proper Matplotlib axes construction / reuse

I currently am building a set of scatter plot charts using pandas plot.scatter. In this construction off of two base axes.
My current construction looks akin to
ax1 = pandas.scatter.plot()
ax2 = pandas.scatter.plot(ax=ax1)
for dataframe in list:
output_ax = pandas.scatter.plot(ax2)
output_ax.get_figure().save("outputfile.png")
total_output_ax = total_list.scatter.plot(ax2)
total_output_ax.get_figure().save("total_output.png")
This seems inefficient. For 1...N permutations I want to reuse a base axes that has 50% of the data already plotted. What I am trying to do is:
Add base data to scatter plot
For item x in y: (save data to base scatter and save image)
Add all data to scatter plot and save image
here's one way to do it with plt.scatter.
I plot column 0 on x-axis, and all other columns on y axis, one at a time.
Notice that there is only 1 ax object, and I don't replot all points, I just add points using the same axes with a for loop.
Each time I get a corresponding png image.
import numpy as np
import pandas as pd
np.random.seed(2)
testdf = pd.DataFrame(np.random.rand(20,4))
testdf.head(5) looks like this
0 1 2 3
0 0.435995 0.025926 0.549662 0.435322
1 0.420368 0.330335 0.204649 0.619271
2 0.299655 0.266827 0.621134 0.529142
3 0.134580 0.513578 0.184440 0.785335
4 0.853975 0.494237 0.846561 0.079645
#I put the first axis out of a loop, that can be in the loop as well
import matplotlib.pyplot as plt
fig = plt.figure()
ax = fig.add_subplot(1,1,1)
ax.scatter(testdf[0],testdf[1], color='red')
fig.legend()
fig.savefig('fig_1.png')
colors = ['pink', 'green', 'black', 'blue']
for i in range(2,4):
ax.scatter(testdf[0], testdf[i], color=colors[i])
fig.legend()
fig.savefig('full_' + str(i) + '.png')
Then you get these 3 images (fig_1, fig_2, fig_3)
Axes objects cannot be simply copied or transferred. However, it is possible to set artists to visible/invisible in a plot. Given your ambiguous question, it is not fully clear how your data are stored but it seems to be a list of dataframes. In any case, the concept can easily be adapted to different input data.
import matplotlib.pyplot as plt
#test data generation
import pandas as pd
import numpy as np
rng = np.random.default_rng(123456)
df_list = [pd.DataFrame(rng.integers(0, 100, (7, 2))) for _ in range(3)]
#plot all dataframes into an axis object to ensure
#that all plots have the same scaling
fig, ax = plt.subplots()
patch_collections = []
for i, df in enumerate(df_list):
pc = ax.scatter(x=df[0], y=df[1], label=str(i))
pc.set_visible(False)
patch_collections.append(pc)
#store individual plots
for i, pc in enumerate(patch_collections):
pc.set_visible(True)
ax.set_title(f"Dataframe {i}")
fig.savefig(f"outputfile{i}.png")
pc.set_visible(False)
#store summary plot
[pc.set_visible(True) for pc in patch_collections]
ax.set_title("All dataframes")
ax.legend()
fig.savefig(f"outputfile_0_{i}.png")
plt.show()

Matplotlib animation iterating over list of pandas dataframes

I have a list of pandas DataFrames with 2 columns each. So far I have a function that, when given an index i, it takes the frame corresponding to index i and plots a graph of data from the first column against the data of the second column.
list = [f0,f1,f2,f3,f4,f5,f6,f7,f8,f9]
def getGraph(i):
frame = list[i]
frame.plot(x = "firstColumn",y = "secondColumn")
return 0
My question now is, how do I make this iterate over the list of frames and animate the graphs displaying each one for 0.3 seconds in succession.
Preferably, I would like to use the FuncAnimation class in the animation library which does the heavy lifting and optimizations for you.
Set animate function and init to axes, figure and line:
from matplotlib import pyplot as plt
from matplotlib import animation
import pandas as pd
f0 = pd.DataFrame({'firstColumn': [1,2,3,4,5], 'secondColumn': [1,2,3,4,5]})
f1 = pd.DataFrame({'firstColumn': [5,4,3,2,1], 'secondColumn': [1,2,3,4,5]})
f2 = pd.DataFrame({'firstColumn': [5,4,3.5,2,1], 'secondColumn': [5,4,3,2,1]})
# make a global variable to store dataframes
global mylist
mylist=[f0,f1,f2]
# First set up the figure, the axis, and the plot element we want to animate
fig = plt.figure()
ax = plt.axes(xlim=(0, 5), ylim=(0, 5))
line, = ax.plot([], [], lw=2)
# initialization function: plot the background of each frame
def init():
line.set_data([], [])
return line,
# animation function of dataframes' list
def animate(i):
line.set_data(mylist[i]['firstColumn'], mylist[i]['secondColumn'])
return line,
# call the animator, animate every 300 ms
# set number of frames to the length of your list of dataframes
anim = animation.FuncAnimation(fig, animate, frames=len(mylist), init_func=init, interval=300, blit=True)
plt.show()
For more info look for the tutorial: https://jakevdp.github.io/blog/2012/08/18/matplotlib-animation-tutorial/

Overlapping boxplots in python

I have the foll. dataframe:
Av_Temp Tot_Precip
278.001 0
274 0.0751864
270.294 0.631634
271.526 0.229285
272.246 0.0652201
273 0.0840059
270.463 0.0602944
269.983 0.103563
268.774 0.0694555
269.529 0.010908
270.062 0.043915
271.982 0.0295718
and want to plot a boxplot where the x-axis is 'Av_Temp' divided into equi-sized bins (say 2 in this case), and the Y-axis shows the corresponding range of values for Tot_Precip. I have the foll. code (thanks to Find pandas quartiles based on another column), however, when I plot the boxplots, they are getting plotted one on top of another. Any suggestions?
expl_var = 'Av_Temp'
cname = 'Tot_Precip'
df[expl_var+'_Deciles'] = pandas.qcut(df[expl_var], 2)
grp_df = df.groupby(expl_var+'_Deciles').apply(lambda x: numpy.array(x[cname]))
fig, ax = plt.subplots()
for i in range(len(grp_df)):
box_arr = grp_df[i]
box_arr = box_arr[~numpy.isnan(box_arr)]
stats = cbook.boxplot_stats(box_arr, labels = str(i))
ax.bxp(stats)
ax.set_yscale('log')
plt.show()
Since you're using pandas already, why not use the boxplot method on dataframes?
expl_var = 'Av_Temp'
cname = 'Tot_Precip'
df[expl_var+'_Deciles'] = pandas.qcut(df[expl_var], 2)
ax = df.boxplot(by='Av_Temp_Deciles', column='Tot_Precip')
ax.set_yscale('log')
That produces this: http://i.stack.imgur.com/20KPx.png
If you don't like the labels, throw in a
plt.xlabel('');plt.suptitle('');plt.title('')
If you want a standard boxplot, the above should be fine. My understanding of the separation of boxplot into boxplot_stats and bxp is to allow you to modify or replace the stats generated and fed to the plotting routine. See https://github.com/matplotlib/matplotlib/pull/2643 for some details.
If you need to draw a boxplot with non-standard stats, you can use boxplot_stats on 2D numpy arrays, so you only need to call it once. No loops required.
expl_var = 'Av_Temp'
cname = 'Tot_Precip'
df[expl_var+'_Deciles'] = pandas.qcut(df[expl_var], 2)
# I moved your nan check into the df apply function
grp_df = df.groupby('Av_Temp_Deciles').apply(lambda x: numpy.array(x[cname][~numpy.isnan(x[cname])]))
# boxplot_stats can take a 2D numpy array of data, and a 1D array of labels
# stats is now a list of dictionaries of stats, one dictionary per quantile
stats = cbook.boxplot_stats(grp_df.values, labels=grp_df.index)
# now it's a one-shot plot, no loops
fig, ax = plt.subplots()
ax.bxp(stats)
ax.set_yscale('log')

Categories

Resources