I want to plot a series of seaborn heatmaps in a grid. I know the number of subplots (which can be odd or even).
The heatmaps will show the mean "occupation ratio" by "day of week" (y axis) and "hour of day" (x axis), e.g. they all share the same x / y domains.
Here's my current code:
df2 = df[['name','openLots','occupationRatio','DoW','Hour']]
fig, axs = plt.subplots(figsize=(24,24), nrows=7, ncols=6)
axs = axs.flatten()
locations = df2['name'].sort_values().unique()
def occupation_heatmap (name, ax):
dfn = df2[df2['name'] == name]
dfn = dfn.groupby(['DoW', 'Hour']).mean()['occupationRatio'].unstack()
dfn = dfn.reindex(['Mon', 'Tue', 'Wed','Thu','Fri','Sat','Sun'])
sns.heatmap(data=dfn, cmap="coolwarm", vmin=0, vmax=1.0, ax= ax)
ax.set_title(name)
i = 0
for n in locations:
occupation_heatmap (n, axs[i])
i = i+1
plt.tight_layout()
It looks almost like what I want (last few rows):
However want I want:
Have the y axis labels (DoW) only once per row (leftmost plot)
Have the colormap legend only on the rightmost plot in each row (or leave it out completely, the colors are pretty self-explainatory)
remove the "empty plots" in the last row because of an odd total number
Many thanks for any hints
Have the y axis labels (DoW) only once per row (leftmost plot)
This can be done using sharey = True as argument to plt.subplots.
Have the colormap legend only on the rightmost plot in each row (or leave it out completely, the colors are pretty self-explainatory)
Use the cbar = False argument to seaborn.heatmap in order not to show a colorbar. This can be given as an input to the plotting function in dependence of the actual number of subplots.
remove the "empty plots" in the last row because of an odd total number
After the loop for creating the plots you may add another loop removing the unused axes.
for j in range(len(locations), ncols*nrows):
axs[j].axis("off")
Here is a complete example (where I borrowed the cod to generate a dataframe from #Robbie):
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import seaborn as sns
days = ['Mon','Tue','Wed','Thurs','Fri','Sat','Sun']
names = ["Parkhaus {:02}".format(i+1) for i in range(22)]
nItems = 1000
df = pd.DataFrame()
df['name'] = [names[i] for i in np.random.randint(0,len(names),nItems)]
df['openLots'] = np.random.randint(0,100,nItems)
df['occupationRatio'] = np.random.rand(nItems)
df['DoW'] = [days[i] for i in np.random.randint(0,7,nItems)]
df['Hour'] = np.random.randint(0,12,nItems)
df2 = df[['name','openLots','occupationRatio','DoW','Hour']]
nrows = 4; ncols=6
fig, axs = plt.subplots(nrows=nrows, ncols=ncols, figsize=(15,9), sharey=True)
axs = axs.flatten()
locations = df2['name'].sort_values().unique()
def occupation_heatmap (name, ax, cbar=False, ylabel=False):
dfn = df2[df2['name'] == name]
dfn = dfn.groupby(['DoW', 'Hour']).mean()['occupationRatio'].unstack()
dfn = dfn.reindex(['Mon', 'Tue', 'Wed','Thu','Fri','Sat','Sun'])
sns.heatmap(data=dfn, cmap="coolwarm", vmin=0, vmax=1.0, ax=ax, cbar=cbar)
ax.set_title(name)
plt.setp(ax.get_yticklabels(), rotation=0)
if not ylabel: ax.set_ylabel("")
for i, n in enumerate(locations):
occupation_heatmap (n, axs[i], cbar=i%ncols==ncols-1, ylabel=i%ncols==0)
for j in range(len(locations), ncols*nrows):
axs[j].axis("off")
plt.tight_layout()
plt.show()
You can be more flexible and just create an axis for each name present, something like this:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import seaborn as sns
import string
days = ['Mon','Tue','Wed','Thurs','Fri','Sat','Sun']
names = [string.lowercase[i] for i in range(22)]
nItems = 1000
df = pd.DataFrame()
df['name'] = [names[i] for i in np.random.randint(0,len(names),nItems)]
df['openLots'] = np.random.randint(0,100,nItems)
df['occupationRatio'] = np.random.randint(0,100,nItems)
df['DoW'] = [days[i] for i in np.random.randint(0,7,nItems)]
df['Hour'] = np.random.randint(0,12,nItems)
fig = plt.figure(figsize=(12,12))
for index, name in enumerate(names):
ax = fig.add_subplot(4,6,index+1)
dfn = df.loc[df.name==name]
dfn = dfn.groupby(['DoW','Hour']).mean()['occupationRatio'].unstack()
dfn = dfn.reindex(days)
# Now we can operate on each plot axis individually
if index%6!=5: #i.e.
# Don't draw a colorbar
sns.heatmap(data = dfn, cmap='coolwarm', ax=ax, cbar=False)
else:
sns.heatmap(data = dfn, cmap='coolwarm', ax=ax)
if index%6!=0:
# Remove the y-axis label
ax.set_ylabel('')
ax.set_yticks(())
ax.set_title(name)
fig.tight_layout()
fig.show()
Results in:
You could also play around with the x-axes (for example remove labels and ticks except for the bottom row).
Related
I'm currently trying to change the secondary y-axis values in a matplot graph to ymin = -1 and ymax = 2. I can't find anything on how to change the values though. I am using the secondary_y = True argument in .plot(), so I am not sure if changing the secondary y-axis values is possible for this. I've included my current code for creating the plot.
df.plot()
df.plot(secondary_y = "Market")
From your example code, it seems you're using Pandas built in ploting capabilities. One option to add a second layer is by using matplotlib directly like in the example "two_scales.py".
It uses
import matplotlib.pyplot as plt
fig, ax1 = plt.subplots()
ax1.plot(df["..."])
# ...
ax2 = ax1.twinx()
ax2.plot(df["Market"])
ax2.set_ylim([0, 5])
where you can change the y-limits.
Setting ylim on plot does not appear to work in the case of secondary_y, but I was able to workaround with this:
import pandas as pd
df = pd.DataFrame({'one': range(10), 'two': range(10, 20)})
ax = df['one'].plot()
ax2 = df['two'].plot(secondary_y=True)
ax2.set_ylim(-20, 50)
fig = ax.get_figure()
fig.savefig('test.png')
This is a solution for showing as much y-axes as data columns the dataframe has
colors = ['tab:blue',
'tab:orange',
'tab:green',
'tab:red',
'tab:purple',
'tab:brown',
'tab:pink',
'tab:gray',
'tab:olive',
'tab:cyan']
#X axe and first Y axe
fig, ax1 = plt.subplots()
x_label = str( dataFrame.columns[0] )
index = dataFrame[x_label]
ax1.set_xlabel(x_label)
ax1.set_xticklabels(dataFrame[x_label], rotation=45, ha="right")
firstYLabel = str( dataFrame.columns[1] )
ax1.set_ylabel(firstYLabel, color = colors[0])
ax1.plot(index, dataFrame[firstYLabel], color = colors[0])
ax1.tick_params(axis='y', labelcolor = colors[0])
#Creates subplots with independet y-Axes
axS =[]
def newTwix(label, ax1, index, dataFrame):
print(label)
actualPos = len(axS)
axS.append(ax1.twinx())
axS[actualPos].set_ylabel(label, color = colors[actualPos%10 + 1])
axS[actualPos].plot(index, dataFrame[label], color=colors[actualPos%10 + 1])
axS[actualPos].tick_params(axis='y', labelcolor=colors[actualPos%10 + 1])
identation = 0.075 #would improve with a dynamic solution
p = 1 + identation
for i in range(2,len(dataFrame.columns)):
newTwix(str(dataFrame.columns[i]), ax1, index, dataFrame)
if (len(axS) == 1):
axS[len(axS)-1].spines.right.set_position(("axes", p))
else:
p = int((p + identation)*1000)/1000
axS[len(axS)-1].spines.right.set_position(("axes", p))
fig.tight_layout() # otherwise the right y-label is slightly clipped
plt.subplots_adjust(left=0.04, right=0.674, bottom=0.1)
mng = plt.get_current_fig_manager()
mng.full_screen_toggle()
plt.show()
multiple y-axes with independent scales
This question is related to group multiple plot in one figure python, "individual 28 plots".
This is my code:
for column in df.columns[1:]:
sns.set()
fig, ax = plt.subplots(nrows=3, ncols=3) # tried 9 plots in one figure
sns.set(style="whitegrid")
sns.swarmplot(x='GF', y=column, data=df,order=["WT", 'Eulomin']) # Choose column
sns.despine(offset=10, trim=True) #?
plt.savefig('{}.png'.format(column), bbox_inches='tight') # filename
plt.show()
I have more than 100 columns and it saves every file individually and just prints empty plots beside the normal one . How do I save 9 plots in one figure, till it reachs the moment he'll have 5 left (which will have to be in one figure either)?
Instead of iterating through columns, iterate through multiples of 9 with range to index the data frame by column number while placing each swarmplot into the ax array you define:
from itertools import product
...
sns.set(style="whitegrid")
for i in range(1, 100, 9): # ITERATE WITH STEPS
col = i
fig, ax = plt.subplots(nrows=3, ncols=3, figsize = (12,6))
# TRAVERSE 3 X 3 MATRIX
for r, c in product(range(3), range(3)):
if col in range(len(df.columns)): # CHECK IF COLUMN EXISTS
# USE ax ARGUMENT WITH MATRIX INDEX
sns.swarmplot(x='GF', y=df[df.columns[col]], data=df, ax=ax[r,c],
order=["WT", 'Eulomin'])
sns.despine(offset=10, trim=True)
col += 1
plt.tight_layout()
plt.savefig('SwarmPlots_{0}-{1}.png'.format(i,i+8), bbox_inches='tight')
To demonstrate with random, seeded data of 100 columns by 500 rows for reproducibility:
Data
import numpy as np
import pandas as pd
np.random.seed(362020)
cols = ['Col'+str(i) for i in range(1,100)]
df = (pd.DataFrame([np.random.randn(99) for n in range(500)])
.assign(GF = np.random.choice(['r', 'python', 'julia'], 500))
.set_axis(cols + ['GF'], axis='columns', inplace = False)
.reindex(['GF'] + cols, axis='columns')
)
df.shape
# (500, 100)
Plot
import matplotlib.pyplot as plt
import seaborn as sns
from itertools import product
sns.set(style="whitegrid")
for i in range(1, 100, 9):
col = i
fig, ax = plt.subplots(nrows=3, ncols=3, figsize = (12,6))
for r, c in product(range(3), range(3)):
if col in range(len(df.columns)):
sns.swarmplot(x='GF', y=df[df.columns[col]], data=df, ax=ax[r,c])
col += 1
plt.tight_layout()
plt.savefig('SwarmPlots_{0}-{1}.png'.format(i,i+8), bbox_inches='tight')
plt.show()
plt.clf()
plt.close()
Output (first plot)
I have a code from a dataframe
Y = df['label']
for col in categorical_cols:
tab = pd.crosstab(df[col],Y)
annot = x.div(x.sum(axis=1).astype('float64'),axis=0)
annot.plot(kind='bar',stacked=True)
plt.title('Distribution of %s'%col)
plt.xlabel('%s'%col,size='x-large')
plt.xticks(rotation=45)
plt.legend()
How can I plot these using different subplots in a single figure because this loops prints the last column's figure. So all figures are same.
Also: How can I produce the same using matplotlib/seaborn using matplotlib which shows me the % or absolute values.
You need to create the different subplots and then pass one axes object to each call of annot.plot via the ax keyword, something like this:
import math
import matplotlib.pyplot as plt
n = len(categorical_cols)
nrows = math.ceil(float(n) / 3.0)
fig, ax = plt.subplots(ncols=3, nrows=nrows, figsize=(9, nrows*3))
ax = ax.flatten()
Y = df['label']
for idx, col in enumerate(categorical_cols):
tab = pd.crosstab(df[col],Y)
annot = x.div(x.sum(axis=1).astype('float64'),axis=0)
annot.plot(kind='bar',stacked=True, ax=ax[idx])
ax[idx].title('Distribution of %s'%col)
ax[idx].set_xlabel('%s'%col,size='x-large')
ax.tick_params('x', labelrotation=45)
plt.legend()
I am trying to reproduce this graph - a line plot with a boxplot at every point:
Imgur
However, the line plot is always starting at the origin instead of at the first x tick:
Imgur
I have collected my datastructure in a pandas file, with each column header the k_e (of the x axis), with the column being all of the datapoints.
I am plotting the mean of each column and the boxplot like so:
df = df.astype(float)
_, ax = plt.subplots()
df.mean().plot(ax = ax)
df.boxplot(showfliers=False, ax=ax)
plt.xlabel(r'$k_{e}$')
plt.ylabel('Test error rate')
plt.title(r'Accuracies with different $k_{e}$')
plt.show()
I have referred to the link below, and so am passing the 'ax' position but this does not help.
plot line over boxplot using pandas DateFrame
EDIT: Here is a minimal example:
test_errors_dict = dict()
np.random.seed(40)
test_errors_dict[2] = np.random.rand(20)
test_errors_dict[3] = np.random.rand(20)
test_errors_dict[5] = np.random.rand(20)
df = pd.DataFrame(data=test_errors_dict)
df = df.astype(float)
_, ax = plt.subplots()
df.mean().plot(ax=ax)
df.boxplot(showfliers=False, ax=ax)
plt.show()
Result:
Imgur
As shown in the above, the line plots do not align with the boxplot
The boxes are at positions 1,2,3, while the plot is at positions 2,3,5. You may reindex the mean Series to also use the positions 1,2,3.
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
test_errors_dict = dict()
np.random.seed(40)
test_errors_dict[2] = np.random.rand(20)
test_errors_dict[3] = np.random.rand(20)
test_errors_dict[5] = np.random.rand(20)
df = pd.DataFrame(data=test_errors_dict)
df = df.astype(float)
mean = df.mean()
mean.index = np.arange(1,len(mean)+1)
_, ax = plt.subplots()
mean.plot(ax=ax)
df.boxplot(showfliers=False, ax=ax)
plt.show()
I'm currently trying to change the secondary y-axis values in a matplot graph to ymin = -1 and ymax = 2. I can't find anything on how to change the values though. I am using the secondary_y = True argument in .plot(), so I am not sure if changing the secondary y-axis values is possible for this. I've included my current code for creating the plot.
df.plot()
df.plot(secondary_y = "Market")
From your example code, it seems you're using Pandas built in ploting capabilities. One option to add a second layer is by using matplotlib directly like in the example "two_scales.py".
It uses
import matplotlib.pyplot as plt
fig, ax1 = plt.subplots()
ax1.plot(df["..."])
# ...
ax2 = ax1.twinx()
ax2.plot(df["Market"])
ax2.set_ylim([0, 5])
where you can change the y-limits.
Setting ylim on plot does not appear to work in the case of secondary_y, but I was able to workaround with this:
import pandas as pd
df = pd.DataFrame({'one': range(10), 'two': range(10, 20)})
ax = df['one'].plot()
ax2 = df['two'].plot(secondary_y=True)
ax2.set_ylim(-20, 50)
fig = ax.get_figure()
fig.savefig('test.png')
This is a solution for showing as much y-axes as data columns the dataframe has
colors = ['tab:blue',
'tab:orange',
'tab:green',
'tab:red',
'tab:purple',
'tab:brown',
'tab:pink',
'tab:gray',
'tab:olive',
'tab:cyan']
#X axe and first Y axe
fig, ax1 = plt.subplots()
x_label = str( dataFrame.columns[0] )
index = dataFrame[x_label]
ax1.set_xlabel(x_label)
ax1.set_xticklabels(dataFrame[x_label], rotation=45, ha="right")
firstYLabel = str( dataFrame.columns[1] )
ax1.set_ylabel(firstYLabel, color = colors[0])
ax1.plot(index, dataFrame[firstYLabel], color = colors[0])
ax1.tick_params(axis='y', labelcolor = colors[0])
#Creates subplots with independet y-Axes
axS =[]
def newTwix(label, ax1, index, dataFrame):
print(label)
actualPos = len(axS)
axS.append(ax1.twinx())
axS[actualPos].set_ylabel(label, color = colors[actualPos%10 + 1])
axS[actualPos].plot(index, dataFrame[label], color=colors[actualPos%10 + 1])
axS[actualPos].tick_params(axis='y', labelcolor=colors[actualPos%10 + 1])
identation = 0.075 #would improve with a dynamic solution
p = 1 + identation
for i in range(2,len(dataFrame.columns)):
newTwix(str(dataFrame.columns[i]), ax1, index, dataFrame)
if (len(axS) == 1):
axS[len(axS)-1].spines.right.set_position(("axes", p))
else:
p = int((p + identation)*1000)/1000
axS[len(axS)-1].spines.right.set_position(("axes", p))
fig.tight_layout() # otherwise the right y-label is slightly clipped
plt.subplots_adjust(left=0.04, right=0.674, bottom=0.1)
mng = plt.get_current_fig_manager()
mng.full_screen_toggle()
plt.show()
multiple y-axes with independent scales