I am quite new to python so please bear with me.
My code so far is below:
import pandas as pd
import numpy as np
from scipy import stats
import matplotlib.pyplot as plt
df = pd.read_csv(r"/Users/aaronhuang/Desktop/ffp/exfileCLEAN2.csv", skiprows=[1])
magnitudes = df['Magnitude '].values
times = df['Time '].values
zscores = np.abs(stats.zscore(magnitudes, ddof=1))
outlier_indicies = np.argwhere(zscores > 3).flatten()
numbers = print(times[outlier_indicies])
window = 2
num = 1
x = times[outlier_indicies[num]- window:outlier_indicies[num]+window+1]
y = magnitudes[outlier_indicies[num]- window:outlier_indicies[num]+window+1]
plt.plot(x, y)
plt.xlabel('Time (units)')
plt.ylabel('Magnitude (units)')
plt.show()
fig = plt.figure()
Currently, the code only prints one graph, determined by num. I would like it to print all the graphs at once, using plt.subplots, which I think is the easiest way.
I would be great if someone could help me integrate plt.subplots as I don't really know where to start.
Thanks
PS: Here is the data if it would be useful.
Graph created.
The cause of the error is an extra space at the end of the column name in the provided CSV file. The code fixes that. If you fixed the column names in the original data, you should also fix the code.
import pandas as pd
import numpy as np
from scipy import stats
import matplotlib.pyplot as plt
# df = pd.read_csv(r"/Users/aaronhuang/Desktop/ffp/exfileCLEAN2.csv", skiprows=[1])
df = pd.read_csv(r"./exfileCLEAN2.csv", skiprows=[1])
magnitudes = df['Magnitude '].values
times = df['Time '].values
zscores = np.abs(stats.zscore(magnitudes, ddof=1))
outlier_indicies = np.argwhere(zscores > 3).flatten()
numbers = print(times[outlier_indicies])
import matplotlib.pyplot as plt
fig, axes = plt.subplots(6, 10, figsize=(30,30))
for i in range(6):
for j in range(10):
x = df.iloc[j*10:(j+1)*10,:]
axes[i][j].plot(x['Time '], x['Magnitude '])
axes[i][j].set_xticklabels(x['Time '], rotation=45)
window = 2
num = 1
x = times[outlier_indicies[num] - window:outlier_indicies[num]+window+1]
y = magnitudes[outlier_indicies[num] - window:outlier_indicies[num]+window+1]
plt.plot(x, y)
plt.xlabel('Time (units)')
plt.ylabel('Magnitude (units)')
plt.show()
fig = plt.figure()
Related
This question already has answers here:
How to plot in multiple subplots
(12 answers)
Closed 1 year ago.
I want to arrange 5 histograms in a grid. Here is my code and the result:
I was able to create the graphs but the difficulty comes by arranging them in a grid. I used the grid function to achieve that but i need to link the graphs to it in the respective places.
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from datetime import datetime
Openness = df['O']
Conscientiousness = df['C']
Extraversion = df['E']
Areeableness = df['A']
Neurocitism = df['N']
grid = plt.GridSpec(2, 3, wspace=0.4, hspace=0.3)
# Plot 1
import matplotlib.pyplot as plt
import numpy as np
plt.hist(df['O'], bins = 100)
plt.title("Openness to experience")
plt.xlabel("Value")
plt.ylabel("Frequency")
# Plot 2
import matplotlib.pyplot as plt
import numpy as np
plt.hist(df['C'], bins = 100)
plt.title("Conscientiousness")
plt.xlabel("Value")
plt.ylabel("Frequency")
# Plot 3
import matplotlib.pyplot as plt
import numpy as np
plt.hist(df['E'], bins = 100)
plt.title("Extraversion")
plt.xlabel("Value")
plt.ylabel("Frequency")
# Plot 4
import matplotlib.pyplot as plt
import numpy as np
plt.hist(df['A'], bins = 100)
plt.title("Areeableness")
plt.xlabel("Value")
plt.ylabel("Frequency")
# Plot 5
import matplotlib.pyplot as plt
import numpy as np
plt.hist(df['N'], bins = 100)
plt.title("Neurocitism")
plt.xlabel("Value")
plt.ylabel("Frequency")
Results merge everything into one chart
But it should look like this
Could you guys please help me out?
You can use plt.subplots:
fig, axes = plt.subplots(nrows=2, ncols=2)
this creates a 2x2 grid. You can access individual positions by indexing hte axes object:
top left:
ax = axes[0,0]
ax.hist(df['C'], bins = 100)
ax.set_title("Conscientiousness")
ax.set_xlabel("Value")
ax.set_ylabel("Frequency")
and so on.
You also continue use GridSpec. Visit https://matplotlib.org/stable/tutorials/intermediate/gridspec.html
for example -
fig2 = plt.figure(constrained_layout=True)
spec2 = gridspec.GridSpec(ncols=2, nrows=3, figure=fig2)
f2_ax1 = fig2.add_subplot(spec2[0, 0])
f2_ax2 = fig2.add_subplot(spec2[0, 1])
f2_ax3 = fig2.add_subplot(spec2[1, 0])
f2_ax4 = fig2.add_subplot(spec2[1, 1])
f2_ax5 = fig2.add_subplot(spec2[2, 1])
# Plot 1
f2_ax1.hist(df['O'])
f2_ax1.set_title("Openness to experience")
f2_ax1.set_xlabel("Value")
f2_ax1.set_ylabel("Frequency")
` plt.show()
Below is my code:
import pandas as pd
import numpy as np
from scipy import stats
import matplotlib.pyplot as plt
df = pd.read_csv(r"/Users/aaronhuang/Desktop/ffp/exfileCLEAN2.csv", skiprows=[1]) # replace.
this with wherever the file is.
magnitudes = df['Magnitude '].values
times = df['Time '].values
zscores = np.abs(stats.zscore(magnitudes, ddof=1))
outlier_indicies = np.argwhere(zscores > 3).flatten()
print(times[outlier_indicies])
import matplotlib.pyplot as plt
fig, axes = plt.subplots(6, 10, figsize=(30,30))
for i in range(6):
for j in range(10):
x = df.iloc[j*10:(j+1)*10,:]
axes[i][j].plot(x['Time '], x['Magnitude '])
axes[i][j].set_xticklabels(x['Time '], rotation=45)
plt.show()
It prints all the graphs at once (60), how can i change it to print less, for example, 30 graphs?
you have to change these 3 lines.
fig, axes = plt.subplots(6, 10, figsize=(30,30))
for i in range(6):
for j in range(10):
They are running 6 * 10 so in total 60. If you want for example 50 you change that into
fig, axes = plt.subplots(5, 10, figsize=(30,30))
for i in range(5):
for j in range(10):
So, you have to change the parameters in therange()andsubplots().
A better way is to declare two variables to control the size, like this:
width = 5
height = 10
fig, axes = plt.subplots(width, height, figsize=(30,30))
for i in range(width):
for j in range(height):
Now you can simply change the scale by changing the values of the two variables.
When illustrating gradient descent, we usually see the bowl shape graph below. Also, it is said that using log_loss instead of squared error, we can find minimum value of loss more easily, as using squared error as loss function, may result in multiple local minimum values.
Therefore, I want to plot the bowl shape graph like below.
However, I only managed to plot the following
Here is my code, could anyone help me fix it? thanks
from mpl_toolkits.mplot3d.axes3d import Axes3D
import matplotlib.pyplot as plt
from matplotlib import cm
import numpy as np
import math
fig, ax1 = plt.subplots(1, 1, figsize=(8, 5), subplot_kw={'projection': '3d'})
# Get the test data
x1 = 1
x2 = 1
y = 0.8
w = np.linspace(-10,10,100)
# w = np.random.random(100)
wl = np.linspace(-10,10,100)
# wl = np.random.random(100)
w1 = np.ones((100,100))
w2 = np.ones((100,100))
for idx in range(100):
w1[idx] = w1[idx]*w
w2[:,idx] = w2[:,idx]*wl
L = []
for i in range(w1.shape[0]):
for j in range(w1.shape[1]):
a = w1[i,j]*x1 + w2[i,j]*x2
f = 1/(1+math.exp(-a))
l = -(y*math.log(f)+(1-y)*math.log(1-f))
# l = (1/2)*(f-y)**2
L.append(l)
l = np.array(L).reshape(w1.shape)
ax1.plot_wireframe(w1,w2,l)
ax1.set_title("plot backpropogation")
plt.tight_layout()
plt.show()
The following ignores the Formula from the question and is probably completely unrelated to any actual problem. It just shows how to plot a bowl.
A way to plot a bowl is to use a function that is rotationally symmetric about the z axis.
For example:
from mpl_toolkits.mplot3d.axes3d import Axes3D
import matplotlib.pyplot as plt
import numpy as np
fig, ax1 = plt.subplots(figsize=(8, 5),
subplot_kw={'projection': '3d'})
alpha = 0.8
r = np.linspace(-alpha,alpha,100)
X,Y= np.meshgrid(r,r)
l = 1./(1+np.exp(-(X**2+Y**2)))
ax1.plot_wireframe(X,Y,l)
ax1.set_title("plot")
plt.show()
I want to visualize my csv data into cluster.
This is my csv data.(https://github.com/soma11soma11/EnergyDataSimulationChallenge/blob/challenge2/soma11soma/challenge2/analysis/Soma/total_watt.csv)
For your infomation.
I could visualzie the csv data into 3D graph.
And this is my code.
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import axes3d
MY_FILE = 'total_watt.csv'
df = pd.read_csv(MY_FILE, parse_dates=[0], header=None, names=['datetime', 'consumption'])
df['date'] = [x.date() for x in df['datetime']]
df['time'] = [x.time() for x in df['datetime']]
pv = df.pivot(index='time', columns='date', values='consumption')
# to avoid holes in the surface
pv = pv.fillna(0.0)
xx, yy = np.mgrid[0:len(pv),0:len(pv.columns)]
fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')
surf=ax.plot_surface(xx, yy, pv.values, cmap='jet', cstride=1, rstride=1)
fig.colorbar(surf, shrink=0.5, aspect=10)
dates = [x.strftime('%m-%d') for x in pv.columns]
times = [x.strftime('%H:%M') for x in pv.index]
ax.set_title('Energy consumptions Clusters', color='lightseagreen')
ax.set_xlabel('time', color='darkturquoise')
ax.set_ylabel('date(year 2011)', color='darkturquoise')
ax.set_zlabel('energy consumption', color='darkturquoise')
ax.set_xticks(xx[::10,0])
ax.set_xticklabels(times[::10], color='lightseagreen')
ax.set_yticks(yy[0,::10])
ax.set_yticklabels(dates[::10], color='lightseagreen')
ax.set_axis_bgcolor('black')
plt.show()
#Thanks for reading! Looking forward to the Skype Interview.
And this is the graph, I got from this code.
I think I should change some points of this code, in order to cluster the data into three group: high, medium and low energy consumption.
The image I want to get from clustering the data is like this.(2D, 3colours.)
k-means????? should I use?..
Here is the result using KMeans.
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import axes3d
from sklearn.cluster import KMeans
MY_FILE = '/home/Jian/Downloads/total_watt.csv'
df = pd.read_csv(MY_FILE, parse_dates=[0], header=None, names=['datetime', 'consumption'])
df['date'] = [x.date() for x in df['datetime']]
df['time'] = [x.time() for x in df['datetime']]
stacked = df.pivot(index='time', columns='date', values='consumption').fillna(0).stack()
# do unsupervised clustering
# =============================================
estimator = KMeans(n_clusters=3, random_state=0)
X = stacked.values.reshape(len(stacked), 1)
cluster = estimator.fit_predict(X)
# check the mean value of each cluster
X[cluster==0].mean() # Out[53]: 324.73175293698534
X[cluster==1].mean() # Out[54]: 6320.8504071851467
X[cluster==2].mean() # Out[55]: 1831.1473140192766
# plotting
# =============================================
fig, ax = plt.subplots(figsize=(10, 8))
x = stacked.index.labels[0]
y = stacked.index.labels[1]
ax.scatter(x[cluster==0], y[cluster==0], label='mean: {}'.format(X[cluster==0].mean()), c='g', alpha=0.8)
ax.scatter(x[cluster==1], y[cluster==1], label='mean: {}'.format(X[cluster==1].mean()), c='r', alpha=0.8)
ax.scatter(x[cluster==2], y[cluster==2], label='mean: {}'.format(X[cluster==2].mean()), c='b', alpha=0.8)
ax.legend(loc='best')
Sorry in advance if this is a little long winded but if I cut it down too much the problem is lost. I am trying to make a module on top of pandas and matplotlib which will give me the ability to make profile plots and profile matrices analogous to scatter_matrix. I am pretty sure my problem comes down to what object I need to return from Profile() so that I can handle Axes manipulation in Profile_Matrix(). Then the question is what to return form Profile_Matrix() so I can edit subplots.
My module (ProfileModule.py) borrows a lot from https://github.com/pydata/pandas/blob/master/pandas/tools/plotting.py and looks like:
import pandas as pd
from pandas import Series, DataFrame
import numpy as np
import matplotlib.pyplot as plt
def Profile(x,y,nbins,xmin,xmax):
df = DataFrame({'x' : x , 'y' : y})
binedges = xmin + ((xmax-xmin)/nbins) * np.arange(nbins+1)
df['bin'] = np.digitize(df['x'],binedges)
bincenters = xmin + ((xmax-xmin)/nbins)*np.arange(nbins) + ((xmax-xmin)/(2*nbins))
ProfileFrame = DataFrame({'bincenters' : bincenters, 'N' : df['bin'].value_counts(sort=False)},index=range(1,nbins+1))
bins = ProfileFrame.index.values
for bin in bins:
ProfileFrame.ix[bin,'ymean'] = df.ix[df['bin']==bin,'y'].mean()
ProfileFrame.ix[bin,'yStandDev'] = df.ix[df['bin']==bin,'y'].std()
ProfileFrame.ix[bin,'yMeanError'] = ProfileFrame.ix[bin,'yStandDev'] / np.sqrt(ProfileFrame.ix[bin,'N'])
fig = plt.figure();
ax = ProfilePlot.add_subplot(1, 1, 1)
plt.errorbar(ProfileFrame['bincenters'], ProfileFrame['ymean'], yerr=ProfileFrame['yMeanError'], xerr=(xmax-xmin)/(2*nbins), fmt=None)
return ax
#or should I "return fig"
def Profile_Matrix(frame):
import pandas.core.common as com
import pandas.tools.plotting as plots
from pandas.compat import lrange
from matplotlib.artist import setp
range_padding=0.05
df = frame._get_numeric_data()
n = df.columns.size
fig, axes = plots._subplots(nrows=n, ncols=n, squeeze=False)
# no gaps between subplots
fig.subplots_adjust(wspace=0, hspace=0)
mask = com.notnull(df)
boundaries_list = []
for a in df.columns:
values = df[a].values[mask[a].values]
rmin_, rmax_ = np.min(values), np.max(values)
rdelta_ext = (rmax_ - rmin_) * range_padding / 2.
boundaries_list.append((rmin_ - rdelta_ext, rmax_+ rdelta_ext))
for i, a in zip(lrange(n), df.columns):
for j, b in zip(lrange(n), df.columns):
ax = axes[i, j]
common = (mask[a] & mask[b]).values
nbins = 100
(xmin,xmax) = boundaries_list[i]
ax=Profile(df[b][common],df[a][common],nbins,xmin,xmax)
#Profile(df[b][common].values,df[a][common].values,nbins,xmin,xmax)
ax.set_xlabel('')
ax.set_ylabel('')
plots._label_axis(ax, kind='x', label=b, position='bottom', rotate=True)
plots._label_axis(ax, kind='y', label=a, position='left')
if j!= 0:
ax.yaxis.set_visible(False)
if i != n-1:
ax.xaxis.set_visible(False)
for ax in axes.flat:
setp(ax.get_xticklabels(), fontsize=8)
setp(ax.get_yticklabels(), fontsize=8)
return axes
This will run with something like:
import pandas as pd
from pandas import Series, DataFrame
import numpy as np
import matplotlib.pyplot as plt
import ProfileModule as pm
x = np.random.uniform(0, 100, size=1000)
y = x *x + 50*x*np.random.randn(1000)
z = x *y + 50*y*np.random.randn(1000)
nbins = 25
xmax = 100
xmin = 0
ProfilePlot = pm.Profile(x,y,nbins,xmin,xmax)
plt.title("Look this works!")
#This does not work as expected
frame = DataFrame({'z' : z,'x' : x , 'y' : y})
ProfileMatrix = pm.Profile_Matrix(frame)
plt.show()
This would hopefully produce a simple profile plot and a 3x3 profile matrix but it does not. I have tried various different methods to get this to work but I imagine it is not worth explaining them all.
I should mention I am using Enthought Canopy Express on Windows 7. Sorry for the long post and thanks again for any help with the code. This is my first week using Python.
You should pass around Axes objects and break your functions up to operate on a single axes at a time. You are close, but just change
import numpy as np
import matplotlib.pyplot as plt
def _profile(ax, x, y):
ln, = ax.plot(x, y)
# return the Artist created
return ln
def profile_matrix(n, m):
fig, ax_array = plt.subplots(n, m, sharex=True, sharey=True)
for ax in np.ravel(ax_array):
_profile(ax, np.arange(50), np.random.rand(50))
profile_matrix(3, 3)