Averaging multiple graph lines into one, python & matplotlib - python

I am brand new to coding, please be gentle!
I have a main script that runs a simulation of particles colliding with each other and walls in micro-gravity conditions. This part of the script outputs individual data files containing: timestep, vtotal. There are 15 particles so I get out 15 txt files.
N_max = sim.getNumTimeSteps()
particleData = [ [] for x in range(len(sim.getParticleList()))]
for n in range (N_max):
sim.runTimeStep()
if (n%1000==0):
particles = sim.getParticleList()
for i in range(len(sim.getParticleList())):
print i
x, y, z = particles[i].getVelocity()
particleData[i].append( (n, x, y, z ))
print len(sim.getParticleList())
for i in range(len(sim.getParticleList())):
with open("{0:d}.dat".format(i), "w") as f:
for j in particleData[i]:
f.write("%f,%f \n" % (j[0], (math.sqrt(float(j[1])**2+float(j[2])**2+float(j[3])**2)) ))
sim.exit()
The end result I need to work toward is a graph of the mean of those 15 particles over time. For example, in this simulation it was running for 22000 timesteps, at increments of 1000. Correct me if I am wrong, but the mean should be (vtotal1+vtotal2+vtotal3+...vtotal15)/per increment. When that is plotted over time, a single line represents the mean velocity of the 15 particles from the simulation? Here is a version of what I was doing that was adapted from another averaging attempt.
import matplotlib
matplotlib.use('agg')
import matplotlib.pyplot as plt
import csv
import math
import numpy as np
x = []
y = []
y_mean = np.array([1 for _ in range(22000/1000)])
fig, axes = plt.subplots(nrows=1, ncols=1, figsize=(10, 7))
for i in range(15):
x = []
y = []
with open("{}.dat".format(i),'r') as csvfile:
plots = csv.reader(csvfile, delimiter=',')
for row in plots:
x.append(float(row[0]))
y.append(float(row[1]))
y_mean[int(float(row[0]) / 1000)] += y[-1]
axes.plot(x,y, color='skyblue', label="Total v {}".format(i+1))
axes.plot(x,y_mean, color='olive', label="Average v {}".format(i+1))
plt.title('Particles Over Time')
plt.xlabel('Timestep Number')
plt.grid(alpha=.5,linestyle='--')
plt.ylabel('Velocity')
plt.xlim(0, 2000)
plt.show()
plt.autoscale(enable=True, axis=y, tight=True)
plt.legend()
plt.savefig("round2avgs.png")
y_mean = np.asarray(y) / 15
I just don't know what's going wrong. Any assistance is appreciated.

Normally, you should split your data processing and visualization into two different steps.
Say you have a 5 CSVs, all having the same data:
0,1
1000,2
2000,3
3000,4
4000,5
Let's name this 1.dat, 2.dat ... 3.dat.
Import the libraries and load the data
import csv
import matplotlib.pyplot as plt
import numpy as np
x = []
ys = []
for i in range(5):
with open(f'{i+1}.dat') as data_file:
data = csv.reader(data_file, delimiter=',')
y = []
for row in data:
if i == 0:
x.append(float(row[0]))
y.append(float(row[1]))
ys.append(y)
Calculate the means per timestep using numpy
means_per_timestep = np.array(ys).mean(axis=0)
Plot it
plt.plot(x, means_per_timestep)
Is this what you were expecting?

Related

Updateing multi-plot matplotlib

I have a problem witch updateing matplotlib chart. The problem is that i have many curve's on it, and after update the number of them may change. In example code I have 2 sets of data, 1st with 90 curves, and 2nd with 80, and i wish I could plot 1st set, and then 2nd, in the same matplotlib window.
import matplotlib.pyplot as plt
from matplotlib.transforms import Bbox
import numpy as np
from numpy.lib.polynomial import RankWarning
import pandas as pd
import sys
fig, ax = plt.subplots()
fig.subplots_adjust(right=0.78)
_x = []
_y = []
_y1 = []
_x1 = []
for x in range(90):
_x.append(np.linspace(0, 10*np.pi, 100))
_y.append(np.sin(_x[x])+x)
for x in range(80):
_x1.append(np.linspace(0, 10*np.pi, 150))
_y1.append(np.tan(_x1[x]+x))
def narysuj(__x, __y):
p = [] # p-pomiar
f = [] # f-czestotliwosc
for x in range(len(__x)):
p.append([])
f.append([])
ax.set_prop_cycle(color=plt.cm.gist_rainbow(np.linspace(0, 1, len(__x))))
for x in range(len(__x)):
for line in range(len(__x[x])):
#print(len(_y[x]), line)
p[x].append(__y[x][line])
f[x].append(__x[x][line])
ax.plot(f[x], p[x], label=f"Label {x}")
plt.show()
narysuj(_x, _y)
narysuj(_x1, _y1)
PS I know the way I'm drawing those charts is highly ineffective.
I found what was the problem. I had to add plt.ion() at the start of program and ax.clear() before drawing.

Plot 4D data heatmap in Python

hey how can I plot a 2D heatmap in 3D? Now I create a python script to make a 2D Heatmap Plot with data from CSV (CSV format: x,y,z,v).
For example:
First csv
0,000;-110,000;110,000;0,101
Second csv
0,000;-66,000;110,000;0,104
Third csv
0,000;-22,000;110,000;0,119
....
In this example, it is a heatmap in xz-plane and I create e.g. five more plots, so that I can insert six xz-plane Plots in a 3D room.
In 4D heatmap plot with matplotlib there is a very nice example for doing it. But I don't know how to use it in my case.
import numpy as np
import os
import matplotlib.pyplot as plt
from scipy.interpolate import griddata
'Create a list for every parameter'
x = []
y = []
z = []
v = []
file_path = "path/."
'Insert data from csv into lists'
for root, dirs, files in os.walk(file_path, topdown=False):
for name in files:
if name[-4:] != '.csv': continue
with open(os.path.join(root, name)) as data:
data = np.genfromtxt((line.replace(',', '.') for line in data), delimiter=";")
if data[1] == 22:
x.append(data[0])
y.append(data[1])
z.append(data[2])
v.append(data[3])
'Create axis data'
xi = np.linspace(min(x), max(x), 1000)
zi = np.linspace(min(z), max(z), 1000)
vi = griddata((x, z), v, (xi[None,:], zi[:,None]), method='cubic')
'Create the contour plot'
CS = plt.contourf(xi, zi, vi, 20, cmap=plt.cm.rainbow)
plt.title("Heatmap xz-plane", y=1.05,
fontweight="bold")
plt.xlabel("length x in cm")
plt.xticks(np.arange(0, 201, step=40))
plt.ylabel("height z in cm")
plt.yticks(np.arange(110, 251, step=20))
cbar = plt.colorbar()
cbar.set_label("velocity v in m/s", labelpad=10)
plt.savefig('testplot.png', dpi=400)
plt.show()
Satisfying the request of #keepAlive wishing to see the result of his untested answer... :
it actually works great :-)
Disclaimer: I am the author of the cited example, so I think that copying/pasting myself is not really a problem.
Note that your dataset does not look (at least) 3-dimensional. But I will assume there is an unwilling selection bias.
You first need to aggregate your "points" per level of altitude, which I assume is the third component of your vectors. They will be constitutive of your planes once gathered.
# libraries
from mpl_toolkits.mplot3d import Axes3D
import matplotlib.pyplot as plt
import scipy.interpolate as si
from matplotlib import cm
import collections as co # <------------------
import pandas as pd
import numpy as np
planes = co.defaultdict(list)
for root, dirs, files in os.walk(file_path, topdown=False):
# [...]
# [...]
# [...]
# [...]
# [...]
level = data[2] # <------ third component.
planes[level].append(data)
Now, at that stage, we have a list of arrays per level. Let's define our grids_maker function
def grids_maker(arrays_list, colnames=list('xyzg')):
# 0- The idea behind `list('xyzg')` is only to change the order
# of names, not the names as such. In case for example you
# want to use another component than the third to organize
# your planes.
# 1- Instantiate a dataframe so as to minimize the modification
# of the function copied/pasted pasted from
# https://stackoverflow.com/a/54075350/4194079
# 2- Pandas is also going to do some other jobs for us, such as
# stacking arrays, etc....
df = pd.DataFrame(arrays_list, columns=colnames)
# Make things more legible
xy = df.loc[:, ['x', 'y']]
x = xy.x
y = xy.y
z = df.z
g = df.g
reso_x = reso_y = 50
interp = 'cubic' # or 'nearest' or 'linear'
# Convert the 4d-space's dimensions into grids
grid_x, grid_y = np.mgrid[
x.min():x.max():1j*reso_x,
y.min():y.max():1j*reso_y
]
grid_z = si.griddata(
xy, z.values,
(grid_x, grid_y),
method=interp
)
grid_g = si.griddata(
xy, g.values,
(grid_x, grid_y),
method=interp
)
return {
'x' : grid_x,
'y' : grid_y,
'z' : grid_z,
'g' : grid_g,
}
Let's use grids_maker over our list of arrays and get the extrema of each z-level's 4th dimension.
g_mins = []
g_maxs = []
lgrids = {}
for level, arrays_list in planes.items():
lgrids[level] = grids = grids_maker(arrays_list)
g_mins.append(grids['g'].min())
g_maxs.append(grids['g'].max())
Let's create our (all-file unifying) color-scale and show the plot.
# Create the 4th color-rendered dimension
scam = plt.cm.ScalarMappable(
norm=cm.colors.Normalize(min(g_mins), max(g_maxs)),
cmap='jet' # see https://matplotlib.org/examples/color/colormaps_reference.html
)
fig = plt.figure()
ax = fig.gca(projection='3d')
for grids in lgrids.values():
scam.set_array([])
ax.plot_surface(
grids['x'], grids['y'], grids['z'],
facecolors = scam.to_rgba(grids['g']),
antialiased = True,
rstride=1, cstride=1, alpha=None
)
plt.show()
I would be glad to see the result.

Matplotlib scatterplot error bars two data sets

I have two data sets, which I'd like to scatter plot next to each other with error bars. Below is my code to plot one data set with error bars. And also the code to generate the second data set. I'd like the points and errors for each data for each value to be adjacent.
I'd also like to remove the line connecting the dots.
import random
import matplotlib.pyplot as plt
import numpy as np
import scipy.stats as ss
data = []
n = 100
m = 10
for i in xrange(m):
d = []
for j in xrange(n):
d.append(random.random())
data.append(d)
mean_data = []
std_data = []
for i in xrange(m):
mean = np.mean(data[i])
mean_data.append(mean)
std = np.std(data[i])
std_data.append(std)
df_data = [n] * m
plt.errorbar(range(m), mean_data, yerr=ss.t.ppf(0.95, df_data)*std_data)
plt.scatter(range(m), mean_data)
plt.show()
new_data = []
for i in xrange(m):
d = []
for j in xrange(n):
d.append(random.random())
new_data.append(d)
mean_new_data = []
std_new_data = []
for i in xrange(m):
mean = np.mean(new_data[i])
mean_new_data.append(mean)
std = np.std(new_data[i])
std_new_data.append(std)
df_new_data = [n] * m
To remove the line in the scatter plot use the fmt argument in plt.errorbar(). The plt.scatter() call is then no longer needed. To plot a second set of data, simply call plt.errorbar() a second time, with the new data.
If you don't want the datasets to overlap, you can add some small random scatter in x to the new dataset. You can do this in two ways, add a single scatter float with
random.uniform(-x_scatter, x_scatter)
which will move all the points as one:
or generate a random scatter float for each point with
x_scatter = np.random.uniform(-.5, .5, m)
which generates something like
To plot both datasets (using the second method), you can use:
plt.errorbar(
range(m), mean_data, yerr=ss.t.ppf(0.95, df_data)*std_data, fmt='o',
label="Data")
# Add some some random scatter in x
x_scatter = np.random.uniform(-.5, .5, m)
plt.errorbar(
np.arange(m) + x_scatter, mean_new_data,
yerr=ss.t.ppf(0.95, df_new_data)*std_new_data, fmt='o', label="New data")
plt.legend()
plt.show()

Optimize file reader and plotting script using Axes3D

I've written the code below to import a number of files from a folder, read and convert them before plotting them in a 3D plot. The number of files is usually larger than 30 and lower than 200 but exceptions might occur. Each file has arround 5000 lines with 3 plottable values. It works and produces a nice 3D plot, but it is very slow. I suspect I have made an array or list grow inside itself. I am particularly suspecting the third for loop. I've tried to run it using 121 files and takes about half an hour to plot.
Each data file is a diffractogram and what I want to do is essentially something like this:
http://www.carbonhagen.com/_/rsrc/1404718703333/abstracts/insitux-raydiffractionsynthesisofgrapheneoxideandreducedgrapheneoxide/M%C3%B8ller%20Storm%20res%20pic.png?height=371&width=522
import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from matplotlib.collections import PolyCollection
import glob
import os
file_list = glob.glob(os.path.join(os.getcwd(),'C:\Users\mkch\Python_Scripts\znox_1','*.ras'))
sourcefiles = []
for file_path in file_list: #this forloops reads all files
with open(file_path) as f_input:
sourcefiles.append(f_input.readlines())
Now all the files have been imported into the list sourcefiles.
data = []
alldata = []
cutdata = []
length = 118#len(sourcefiles)
for i in range(0,length):
l = len(sourcefiles[i])
cdata = sourcefiles[i][320:l-2]
cutdata.append(cdata)
This for loop removes headliners and the last two lines in each file.
fig = plt.figure()
ax = fig.gca(projection='3d')
verts = []
zs = list(range(length))
print zs
for j in range(length):
lines = cutdata[j][:]
x = []
y = []
z = []
for line in lines:
a, b, c = line.split()[0:3]
x.append(a)
y.append(b)
y[0], y[-1] = 0, 0
verts.append(list(zip(x, y)))
poly = PolyCollection(verts, facecolors=['r', 'g', 'b','y'])
ax.add_collection3d(poly, zs=zs, zdir='y')
This bit of code splits each line into the three values that needs plotting. Then it adds the the data to a plot. I suspect the above code is taking quite long.
poly.set_alpha(0.7)
ax.set_xlim3d(0, 100)
ax.set_ylabel('Y')
ax.set_ylim3d(-1, 120)
ax.set_zlabel('Z')
ax.set_zlim3d(0, 120000)
plt.xlabel('2$ \theta$')
plt.show()
Standard plotting things.

Matplotlib contour plot with intersecting contour lines

I am trying to make a contour plot of the following data using matplotlib in python. The data is of this form -
# x y height
77.23 22.34 56
77.53 22.87 63
77.37 22.54 72
77.29 22.44 88
The data actually consists of nearly 10,000 points, which I am reading from an input file. However the set of distinct possible values of z is small (within 50-90, integers), and I wish to have a contour lines for every such distinct z.
Here is my code -
import matplotlib
import numpy as np
import matplotlib.cm as cm
import matplotlib.mlab as mlab
import matplotlib.pyplot as plt
import csv
import sys
# read data from file
data = csv.reader(open(sys.argv[1], 'rb'), delimiter='|', quotechar='"')
x = []
y = []
z = []
for row in data:
try:
x.append(float(row[0]))
y.append(float(row[1]))
z.append(float(row[2]))
except Exception as e:
pass
#print e
X, Y = np.meshgrid(x, y) # (I don't understand why is this required)
# creating a 2D array of z whose leading diagonal elements
# are the z values from the data set and the off-diagonal
# elements are 0, as I don't care about them.
z_2d = []
default = 0
for i, no in enumerate(z):
z_temp = []
for j in xrange(i): z_temp.append(default)
z_temp.append(no)
for j in xrange(i+1, len(x)): z_temp.append(default)
z_2d.append(z_temp)
Z = z_2d
CS = plt.contour(X, Y, Z, list(set(z)))
plt.figure()
CB = plt.colorbar(CS, shrink=0.8, extend='both')
plt.show()
Here is the plot of a small sample of data -
Here is a close look to one of the regions of the above plot (note the overlapping/intersecting lines) -
I don't understand why it doesn't look like a contour plot. The lines are intersecting, which shouldn't happen. What can be possibly wrong? Please help.
Try to use the following code. This might help you -- it's the same thing which was in the Cookbook:
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.mlab import griddata
# with this way you can load your csv-file really easy -- maybe you should change
# the last 'dtype' to 'int', because you said you have int for the last column
data = np.genfromtxt('output.csv', dtype=[('x',float),('y',float),('z',float)],
comments='"', delimiter='|')
# just an assigning for better look in the plot routines
x = data['x']
y = data['y']
z = data['z']
# just an arbitrary number for grid point
ngrid = 500
# create an array with same difference between the entries
# you could use x.min()/x.max() for creating xi and y.min()/y.max() for yi
xi = np.linspace(-1,1,ngrid)
yi = np.linspace(-1,1,ngrid)
# create the grid data for the contour plot
zi = griddata(x,y,z,xi,yi)
# plot the contour and a scatter plot for checking if everything went right
plt.contour(xi,yi,zi,20,linewidths=1)
plt.scatter(x,y,c=z,s=20)
plt.xlim(-1,1)
plt.ylim(-1,1)
plt.show()
I created a sample output file with an Gaussian distribution in 2D. My result with using the code from above:
NOTE:
Maybe you noticed that the edges are kind of cropped. This is due to the fact that the griddata-function create masked arrays. I mean the border of the plot is created by the outer points. Everything outside the border is not there. If your points would be on a line then you will not have any contour for plotting. This is kind of logical. I mention it, cause of your four posted data points. It seems likely that you have this case. Maybe you don't have it =)
UPDATE
I edited the code a bit. Your problem was probably that you didn't resolve the dependencies of your input-file correctly. With the following code the plot should work correctly.
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.mlab import griddata
import csv
data = np.genfromtxt('example.csv', dtype=[('x',float),('y',float),('z',float)],
comments='"', delimiter=',')
sample_pts = 500
con_levels = 20
x = data['x']
xmin = x.min()
xmax = x.max()
y = data['y']
ymin = y.min()
ymax = y.max()
z = data['z']
xi = np.linspace(xmin,xmax,sample_pts)
yi = np.linspace(ymin,ymax,sample_pts)
zi = griddata(x,y,z,xi,yi)
plt.contour(xi,yi,zi,con_levels,linewidths=1)
plt.scatter(x,y,c=z,s=20)
plt.xlim(xmin,xmax)
plt.ylim(ymin,ymax)
plt.show()
With this code and your small sample I get the following plot:
Try to use my snippet and just change it a bit. For example, I had to change for the given sample csv-file the delimitter from | to ,. The code I wrote for you is not really nice, but it's written straight foreword.
Sorry for the late response.

Categories

Resources