I am learning matplolib. I try do display specific point on a surface and I get the following result :
Both screenshot comes from the same figure. On the first one, we can hardly see the point I want to display, not on the second. I would like to make it appears on each. Here is my code :
import numpy as np
from matplotlib import cm
from mpl_toolkits.mplot3d import Axes3D
import matplotlib.pyplot as plt
hours, pauses, results = [], [], []
with open('./data.txt', 'r') as f:
for line in f:
value = line.split()
for i, l in enumerate([hours, pauses, results]):
for l in [hours, pauses, results]:
l = np.asarray(l, dtype=float)
fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')
m = len(hours)
WP_init = 5 * np.random.rand()
WH_init = 5 * np.random.rand()
Z_init = 0
for i in range(m):
Z_init += 1. / (2 * m) * (WH_init * hours[i] + WP_init * pauses[i] - results[i])**2
ax.scatter(WH_init, WP_init, Z_init, s=40, c='r', marker='o')
WH = np.arange(-5, 5, 0.25)
WP = np.arange(-5, 5, 0.25)
WH, WP = np.meshgrid(WH, WP)
Z = np.zeros(np.shape(WP))
for i in range(m):
Z += 1. / (2 * m) * (WH * hours[i] + WP * pauses[i] - results[i])**2
ax.plot_surface(WH, WP, Z, rstride=4, cstride=4, cmap=cm.coolwarm)
Do you know how I could fix these ?
edit : here is the content of data.txt :
12 0 13
12 8 18
10 0 14
10 4 16
8 0 14
8 2 15
8 8 8
6 0 13
6 3 11
4 0 8
4 4 4
2 2 2
0 0 0
You could set the alpha of the surface to something less than 1 so that its opaqueness can not totally obscure the red dot:
ax.plot_surface(WH, WP, Z, rstride=4, cstride=4, cmap=cm.coolwarm, alpha=0.5)
I have a pandas dataframe like this -
(Creating a random dataframe)
from random import randint
from random import random
import random
import pandas as pd
x = [randint(1,20) for i in range(20)]
y1 = [random() for i in range(20)]
y2 = [random() for i in range(20)]
y3 = [random() for i in range(20)]
y4 = [random() for i in range(20)]
g = ['a', 'b', 'c']
group = [random.choice(g) for i in range(20)]
data = {'Group': group, 'x': x, 'y1':y1, 'y2':y2, 'y3':y3, 'y4':y4}
df = pd.DataFrame(data)
The dataframe is like this -
>>> df.sort_values('Group')
Group x y1 y2 y3 y4
17 a 9 0.400730 0.242629 0.858307 0.799613
16 a 14 0.644299 0.952255 0.257262 0.376845
5 a 3 0.784374 0.800639 0.753612 0.441645
18 a 3 0.988016 0.739003 0.741000 0.299011
11 a 18 0.672816 0.232951 0.763451 0.762478
0 b 7 0.670889 0.785928 0.604563 0.620951
15 b 3 0.838479 0.286988 0.374546 0.013822
4 b 4 0.495855 0.159839 0.984262 0.882428
13 b 3 0.756058 0.979226 0.423426 0.297381
8 b 13 0.835705 0.374927 0.492676 0.939113
12 b 17 0.643511 0.156267 0.248037 0.316526
14 c 13 0.303215 0.177303 0.980071 0.705428
9 c 16 0.829414 0.173755 0.992532 0.398509
7 c 9 0.774353 0.082118 0.089582 0.587679
6 c 14 0.551595 0.737882 0.127206 0.985017
3 c 4 0.072765 0.497016 0.634819 0.149798
2 c 1 0.971598 0.254215 0.325086 0.588159
1 c 14 0.467277 0.631844 0.927199 0.051251
10 c 13 0.346592 0.384929 0.185384 0.330408
19 c 16 0.790785 0.449498 0.176042 0.036896
Using this dataframe I intend to plot multiple graphs group wise (in this case 3 graphs as there are only 3 groups). Each graph is a multi line graph with x on x-axis and [y1, y2, y3, y4] on y-axis
How can I achieve this, I can plot a single multiline graph, but unable to plot multiple plots group -wise.
You can use groupby:
fig, axes = plt.subplots(1, 3, figsize=(10,3))
for (grp, data), ax in zip(df.groupby('Group'), axes.flat):
data.plot(x='x', ax=ax)
Note: You don't really need to sort by group.
I'm trying to make an animation and am looking at the code of another stack overflow question. The code is the following
import matplotlib.pyplot as plt
from matplotlib import animation as animation
import numpy as np
import pandas as pd
import io
u = u"""Time M1 M2 M3 M4 M5
1 1 2 3 1 2
2 1 3 3 1 2
3 1 3 2 1 3
4 2 2 3 1 2
5 3 3 3 1 3
6 2 3 4 1 4
7 2 3 4 3 3
8 3 4 4 3 4
9 4 4 5 3 3
10 4 4 5 5 4"""
df_Bubble = pd.read_csv(io.StringIO(u), delim_whitespace=True)
time_count = len(df_Bubble)
colors = np.arange(1, 6)
x = np.arange(1, 6)
max_radius = 25
fig, ax = plt.subplots()
pic = ax.scatter(x, df_Bubble.iloc[0, 1:], s=100, c=colors)
def init():
return pic,
def updateData(i):
y = df_Bubble.iloc[i, 1:]
area = np.pi * (max_radius * y / 10.0) ** 2
pic.set_offsets([x, y.values])
pic._sizes = area
return pic,
ani = animation.FuncAnimation(fig, updateData,
frames=10, interval = 50, blit=True, init_func=init)
When I run this code unchanged I get the error
ValueError: Points must be Nx2 array, got 2x5
I have looked at similar threads on this question and have come to the conclusion that the problem has to do with the line with [[np.nan]*len(colors)]*2. Based on the examples I found, I thought that changing a part of this line to an array might help, but none of my attempts have worked, and now I'm stuck. I would be grateful for any help.
set_offsets expects a Nx2 ndarray and you provide 2 arrays with 5 elements each in updateData(i) and 2 lists with 5 elements each in init()
def init():
return pic,
def updateData(i):
y = df_Bubble.iloc[i, 1:]
area = np.pi * (max_radius * y / 10.0) ** 2
#pic.set_offsets(np.hstack([x[:i,np.newaxis], y.values[:i, np.newaxis]]))
pic.set_offsets(np.transpose((x, y.values)))
pic._sizes = area
return pic,
Is there a way to group boxplots in matplotlib WITHOUT the use of seaborn or some other library?
e.g. in the following, I want to have blocks along the x axis, and plot values grouped by condition (so there will be 16 boxes). Like what seaborn's hue argument accomplishes.
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
blocks = 4
conditions = 4
ndatapoints = blocks * conditions
blockcol = np.repeat(list(range(1, conditions+1)), blocks)
concol = np.repeat(np.arange(1, conditions+1, 1), blocks)
trialcol = np.arange(1, ndatapoints+1, 1)
valcol = np.random.normal(0, 1, ndatapoints)
raw_data = {'blocks': np.repeat(list(range(1, conditions+1)), blocks),
'condition': list(range(1, conditions+1))*blocks,
'trial': np.arange(1, ndatapoints+1, 1),
'value': np.random.normal(0, 1, ndatapoints)}
df = pd.DataFrame(raw_data)
blocks condition trial value
0 1 1 1 1.306146
1 1 2 2 -0.024201
2 1 3 3 -0.374561
3 1 4 4 -0.093366
4 2 1 5 -0.548427
5 2 2 6 -1.205077
6 2 3 7 0.617165
7 2 4 8 -0.239830
8 3 1 9 -0.876789
9 3 2 10 0.656436
10 3 3 11 -0.471325
11 3 4 12 -1.465787
12 4 1 13 -0.495308
13 4 2 14 -0.266914
14 4 3 15 -0.305884
15 4 4 16 0.546730
I can't seem to find any examples.
I think you just want a factor plot:
import numpy
import pandas
import seaborn
blocks = 3
conditions = 4
trials = 12
ndatapoints = blocks * conditions * trials
blockcol = list(range(1, blocks + 1)) * (conditions * trials)
concol = list(range(1, conditions + 1)) * (blocks * trials)
trialcol = list(range(1, trials + 1)) * (blocks * conditions)
valcol = numpy.random.normal(0, 1, ndatapoints)
fg = pandas.DataFrame({
'blocks': blockcol,
'condition': concol,
'trial': trialcol,
'value': valcol
(seaborn.factorplot, 'data'),
x='blocks', y='value', hue='condition',
i have some questions for which i couldn't find any answers although i looked up for it.
My code so far is the following:
from mpl_toolkits.mplot3d import Axes3D
from matplotlib import cm
from math import *
from scipy.special import *
import matplotlib.pyplot as plt
import numpy as np
## Definition der Parameter für Druckgleichung nach Rudnicki (1986) ##
q = 6.0/1000
lameu = 11.2*10**9
lame = 8.4*10**9
alpha = 0.65
G = 8.4*10**9
k = 1.0e-15
eta = 0.001
t = 1000*365*24600
kappa = k/eta
print "kappa ist:",kappa
c = ((kappa*(lameu-lame)*(lame+2*G))/((alpha**2)*(lameu+2*G)))
print "c ist:",c
xmin = -10
xmax = 10
ymin = -10
ymax = 10
for x in range (xmin,xmax):
for y in range (ymin,ymax):
z = P/1e6
print x, y, z
x, y = np.meshgrid(x, y)
## Plotting in 3D ##
fig = plt.figure()
ax = fig.gca(projection='3d')
surf = ax.plot_surface(x, y, z, rstride=1, cstride=1, cmap=cm.jet, linewidth=0,
antialiased=False, vmin=np.nanmin(z), vmax=np.nanmax(z))
fig.colorbar(surf, shrink=0.5, aspect=5)
## Achsenskalen ##
ax.set_xlim(xmin,xmax) # x-Achsenskala vorgeben
ax.set_ylim(ymin,ymax) # y-Achsenskala vorgeben
## Beschriftung der Achsen ##
ax.set_xlabel('Distanz zu Well [m]')
ax.set_ylabel('Distanz zu Well [m]')
ax.set_zlabel('Druck in [MPa]')
If i try to run the program, my values for x,y and z show up as intended, but i dont get any 3D Plot. I had this issue once before, so i tried so define my infinite values for z to be treated as not a number:
z[z==np.inf] = np.nan
After adding this to my code, i get the following error:
TypeError: 'numpy.float64' object does not support item assignment
What exactly means this? I dont get it in the context. I think i need it for my plot?
Whats the exact difference in my for loop, e.g. using:
for x in range [-10,10]
for x in range (-10,10)
I know there are types of functions using
instead of only
When do i have to use the brackets?
I hope someone can lighten me up. Thanks!
To answer your various questions:
z[z==np.inf] = np.nan
After adding this to my code, i get the following error: TypeError: 'numpy.float64' object does not
support item assignment
This is because z is just a number, not an array.
The () and [] confusion is simple, you access elements of a list (or any other container class implementing __getitem__ using the [] brackets. You call objects using ().
Essentially, these two bits of syntax are short forms of the less conveneient versions;
myObject[key] results in myObject.__getitem__(key), and myObject(variable) results in myObject.__call__(variable). It's just syntax.
Typically, these are used to create functions and container classes (you could misuse them, but it would make for some very confusing code).
As for making your plotting work, you're going to want to make your z array of data points, with the correct shape.
The issue you were having is that you did not provide the data to plot_surface as it requires, it needs 2D arrays of data. XX and YY are just what numpy.meshgrid creates, iirc, x and y arguments can just be straight lists, but i haven't tried it.
At any rate, you normally have elements lookign like this (for a square grid):
1 2 3 4 5 6 7 8 9
1 2 3 4 5 6 7 8 9
1 2 3 4 5 6 7 8 9
1 2 3 4 5 6 7 8 9
1 2 3 4 5 6 7 8 9
1 2 3 4 5 6 7 8 9
1 2 3 4 5 6 7 8 9
1 2 3 4 5 6 7 8 9
1 1 1 1 1 1 1 1 1
2 2 2 2 2 2 2 2 2
3 3 3 3 3 3 3 3 3
4 4 4 4 4 4 4 4 4
5 5 5 5 5 5 5 5 5
6 6 6 6 6 6 6 6 6
7 7 7 7 7 7 7 7 7
8 8 8 8 8 8 8 8 8
9 9 9 9 9 9 9 9 9
and then ZZ is just the z vaules for the function at the corresponding point, i.e. if you're plotting some function, f(x,y) then you could do something like:
for i in range(len(XX)):
for j in range(len(XX[0])):
ZZ[i][j] = f(XX[i][j], YY[i][j])
Although there is likely some much faster numpy way to do the array operations that would be faster.
i normally do something like this:
import numpy
# other boiler plae variable definitions you have
xs = numpy.linspace(xStart, xStop, num=50)
ys = numpy.linspace(yStart, yStop, num=50)
XX, YY = numpy.meshgrid(xs,ys)
ZZ = numpy.zeros_like(XX)
for i, x in enumerate(xs):
for j, y in enumerate(ys):
ZZ[i][j] = P/1e6
fig = plt.figure()
ax = fig.gca(projection='3d')
surf = ax.plot_surface(XX, YY, ZZ, rstride=1, cstride=1, cmap=cm.jet, linewidth=0,
antialiased=False, vmin=np.nanmin(ZZ), vmax=np.nanmax(ZZ))
fig.colorbar(surf, shrink=0.5, aspect=5)
I applied pca on a data set using matplotlib in python. However, matplotlib does not provide a t-squared scores like Matlab. Is there a way to compute Hotelling's T^2 score like Matlab?
matplotlib's PCA class doesn't include the Hotelling T2 calculation, but it can be done with just a couple lines of code. The following code includes a function to compute the T2 values for each point. The __main__ script applies PCA to the same example as used in Matlab's pca documentation, so you can verify that the function generates the same values as Matlab.
from __future__ import print_function, division
import numpy as np
from matplotlib.mlab import PCA
def hotelling_tsquared(pc):
"""`pc` should be the object returned by matplotlib.mlab.PCA()."""
x = pc.a.T
cov = pc.Wt.T.dot(np.diag(pc.s)).dot(pc.Wt) / (x.shape[1] - 1)
w = np.linalg.solve(cov, x)
t2 = (x * w).sum(axis=0)
return t2
if __name__ == "__main__":
hald_text = """Y X1 X2 X3 X4
78.5 7 26 6 60
74.3 1 29 15 52
104.3 11 56 8 20
87.6 11 31 8 47
95.9 7 52 6 33
109.2 11 55 9 22
102.7 3 71 17 6
72.5 1 31 22 44
93.1 2 54 18 22
115.9 21 47 4 26
83.8 1 40 23 34
113.3 11 66 9 12
109.4 10 68 8 12
hald = np.loadtxt(hald_text.splitlines(), skiprows=1)
ingredients = hald[:, 1:]
pc = PCA(ingredients, standardize=False)
coeff = pc.Wt
# For coeff and latent, compare to
# http://www.mathworks.com/help/stats/pca.html#btjpztu-1
latent = pc.s / (ingredients.shape[0] - 1)
print("latent:" + (" %9.4f"*len(latent)) % tuple(latent))
# For tsquared, compare to
# http://www.mathworks.com/help/stats/pca.html#bti6r0c-1
tsquared = hotelling_tsquared(pc)
[[ 0.0678 0.6785 -0.029 -0.7309]
[ 0.646 0.02 -0.7553 0.1085]
[-0.5673 0.544 -0.4036 0.4684]
[ 0.5062 0.4933 0.5156 0.4844]]
latent: 517.7969 67.4964 12.4054 0.2372
[ 5.6803 3.0758 6.0002 2.6198 3.3681 0.5668 3.4818 3.9794 2.6086
7.4818 4.183 2.2327 2.7216]
Even though this is an old question, I am posting the code as it may help someone.
Here is the code, as a bonus this does multiple hotelling tests at once
import numpy as np
from scipy.stats import f as f_distrib
def hotelling_t2(X, Y):
# X and Y are 3D arrays
# dim 0: number of features
# dim 1: number of subjects
# dim 2: number of mesh nodes or voxels (numer of tests)
nx = X.shape[1]
ny = Y.shape[1]
p = X.shape[0]
Xbar = X.mean(1)
Ybar = Y.mean(1)
Xbar = Xbar.reshape(Xbar.shape[0], 1, Xbar.shape[1])
Ybar = Ybar.reshape(Ybar.shape[0], 1, Ybar.shape[1])
X_Xbar = X - Xbar
Y_Ybar = Y - Ybar
Wx = np.einsum('ijk,ljk->ilk', X_Xbar, X_Xbar)
Wy = np.einsum('ijk,ljk->ilk', Y_Ybar, Y_Ybar)
W = (Wx + Wy) / float(nx + ny - 2)
Xbar_minus_Ybar = Xbar - Ybar
x = np.linalg.solve(W.transpose(2, 0, 1),
Xbar_minus_Ybar.transpose(2, 0, 1))
x = x.transpose(1, 2, 0)
t2 = np.sum(Xbar_minus_Ybar * x, 0)
t2 = t2 * float(nx * ny) / float(nx + ny)
stat = (t2 * float(nx + ny - 1 - p) / (float(nx + ny - 2) * p))
pval = 1 - np.squeeze(f_distrib.cdf(stat, p, nx + ny - 1 - p))
return pval, t2