I would like to use a code that shows all histograms in a dataframe. That will be df.hist(bins=10). However, I would like to add another histograms which shows CDF df_hist=df.hist(cumulative=True,bins=100,density=1,histtype="step")
I tried separating their matplotlib axes by using fig=plt.figure() and
plt.subplot(211). But this df.hist is actually part of pandas function, not matplotlib function. I also tried setting axes and adding ax=ax1 and ax2 options to each histogram but it didn't work.
How can I combine these histograms together?
Any help?
Histograms that I want to combine are like these. I want to show them side by side or put the second one on tip of the first one.
Sorry that I didn't care to make them look good.
It is possible to draw them together:
# toy data frame
df = pd.DataFrame(np.random.normal(0,1,(100,20)))
# draw hist
fig, axes = plt.subplots(5,4, figsize=(16,10))
df.plot(kind='hist', subplots=True, ax=axes, alpha=0.5)
# clone axes so they have different scales
ax_new = [ax.twinx() for ax in axes.flatten()]
df.plot(kind='kde', ax=ax_new, subplots=True)
plt.show()
Output:
It's also possible to draw them side-by-side. For example
fig, axes = plt.subplots(10,4, figsize=(16,10))
hist_axes = axes.flatten()[:20]
df.plot(kind='hist', subplots=True, ax=hist_axes, alpha=0.5)
kde_axes = axes.flatten()[20:]
df.plot(kind='kde', subplots=True, ax=kde_axes, alpha=0.5)
will plot hist on top of kde.
You can find more info here: Multiple histograms in Pandas (possible duplicate btw) but apparently Pandas cannot handle multiple histogram on same graphs.
It's ok because np.histogram and matplotlib.pyplot can, check the above link for a more complete answer.
Solution for overlapping histograms with df.hist with any number of subplots
You can combine two dataframe histogram figures by creating twin axes using the grid of axes returned by df.hist. Here is an example of normal histograms combined with cumulative step histograms where the size of the figure and the layout of the grid of subplots are taken care of automatically:
import numpy as np # v 1.19.2
import pandas as pd # v 1.1.3
import matplotlib.pyplot as plt # v 3.3.2
# Create sample dataset stored in a pandas dataframe
rng = np.random.default_rng(seed=1) # random number generator
letters = [chr(i) for i in range(ord('A'), ord('G')+1)]
df = pd.DataFrame(rng.exponential(1, size=(100, len(letters))), columns=letters)
# Set parameters for figure dimensions and grid layout
nplots = df.columns.size
ncols = 3
nrows = int(np.ceil(nplots/ncols))
subp_w = 10/ncols # 10 is the total figure width in inches
subp_h = 0.75*subp_w
bins = 10
# Plot grid of histograms with pandas function (with a shared y-axis)
grid = df.hist(grid=False, sharey=True, figsize=(ncols*subp_w, nrows*subp_h),
layout=(nrows, ncols), bins=bins, edgecolor='white', linewidth=0.5)
# Create list of twin axes containing second y-axis: note that due to the
# layout, the grid object may contain extra unused axes that are not shown
# (here in the H and I positions). The ax parameter of df.hist only accepts
# a number of axes that corresponds to the number of numerical variables
# in df, which is why the flattened array of grid axes is sliced here.
grid_twinx = [ax.twinx() for ax in grid.flat[:nplots]]
# Plot cumulative step histograms over normal histograms: note that the grid layout is
# preserved in grid_twinx so no need to set the layout parameter a second time here.
df.hist(ax=grid_twinx, histtype='step', bins=bins, cumulative=True, density=True,
color='tab:orange', linewidth=2, grid=False)
# Adjust space between subplots after generating twin axes
plt.gcf().subplots_adjust(wspace=0.4, hspace=0.4)
plt.show()
Solution for displaying histograms of different types side-by-side with matplotlib
To my knowledge, it is not possible to show the different types of plots side-by-side with df.hist. You need to create the figure from scratch, like in this example using the same dataset as before:
# Set parameters for figure dimensions and grid layout
nvars = df.columns.size
plot_types = 2 # normal histogram and cumulative step histogram
ncols_vars = 2
nrows = int(np.ceil(nvars/ncols_vars))
subp_w = 10/(plot_types*ncols_vars) # 10 is the total figure width in inches
subp_h = 0.75*subp_w
bins = 10
# Create figure with appropriate size
fig = plt.figure(figsize=(plot_types*ncols_vars*subp_w, nrows*subp_h))
fig.subplots_adjust(wspace=0.4, hspace=0.7)
# Create subplots by adding a new axes per type of plot for each variable
# and create lists of axes of normal histograms and their y-axis limits
axs_hist = []
axs_hist_ylims = []
for idx, var in enumerate(df.columns):
axh = fig.add_subplot(nrows, plot_types*ncols_vars, idx*plot_types+1)
axh.hist(df[var], bins=bins, edgecolor='white', linewidth=0.5)
axh.set_title(f'{var} - Histogram', size=11)
axs_hist.append(axh)
axs_hist_ylims.append(axh.get_ylim())
axc = fig.add_subplot(nrows, plot_types*ncols_vars, idx*plot_types+2)
axc.hist(df[var], bins=bins, density=True, cumulative=True,
histtype='step', color='tab:orange', linewidth=2)
axc.set_title(f'{var} - Cumulative step hist.', size=11)
# Set shared y-axis for histograms
for ax in axs_hist:
ax.set_ylim(max(axs_hist_ylims))
plt.show()
Related
I am trying to adjust the width of my second subplot (column sum with the binary cmap) to the first one.
So far I only managed to do so by randomly selecting different figsize, but every time I trying to re-use the code on a dataset of different size I alwayse come up with something like the picture below (second heatmap always wider than the first one).
Am I missing something to adjust the second one automatically ?
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
test = pd.DataFrame({'user': ['Bob', 'Bob', 'Bob','Janice','Janice','Fernand','Kevin','Sidhant'],
'tag' : ['enfant','enfant','enfant','femme','femme','jeune','jeune','jeune'],
'income': [3, 5, 1,14,8,10,13,17]})
# specify font sizes for later:
titlesize= 30
ticklabel = 23
legendlabel = 23
# Generate custom diverging colormaps:
cmap = sns.color_palette("ch:18,-.1,dark=.3", 6)
cmap2 = sns.color_palette("binary", 6)
# Preparing data for the heatmap:
heatmap1_data = pd.pivot_table(test, values='income',
index=['user'],
columns='tag')
heatmap1_data = heatmap1_data.reindex(heatmap1_data.sum().sort_values(ascending=False).index, axis=1)
# Creating figure:
fig, (ax1, ax2) = plt.subplots(2,1,figsize=(10,15))
# First subplot:
sns.heatmap(heatmap1_data, ax= ax1, cmap=cmap,square=True, linewidths=.5, annot=True, cbar = False,annot_kws={"size": legendlabel} )
# Cosmetic first subplot:
ax1.xaxis.tick_top()
ax1.tick_params(labelsize= ticklabel, top = False)
ax1.set_xlabel('')
ax1.set_ylabel('')
ax1.set_xticklabels(heatmap1_data.columns,rotation=90)
ax1.set_yticklabels(heatmap1_data.index,rotation=0)
ax1.set_title("Activités par agence et population vulnérable", size= titlesize, pad=20)
# Second subplot (column sum at the bottom):
sns.heatmap((pd.DataFrame(heatmap1_data.sum(axis=0))).transpose().round(1), ax=ax2, square=True, fmt='g', linewidths=.5, annot=True, cmap=cmap2 , cbar=False, xticklabels=False, yticklabels=False, annot_kws={"size": legendlabel})
ax2.set_xlabel("Nombre d'activités", size = ticklabel, labelpad = 5)
# More cosmetic:
ax1.set_title("Title", size= titlesize, pad=35)
ax1.set_xlabel('')
ax1.set_ylabel('')
plt.tick_params(labelsize= ticklabel,left=False, bottom=False)
plt.xticks(rotation=60)
ax1.spines['bottom'].set_color('#dfe1ec')
ax1.spines['left'].set_color('#dfe1ec')
ax1.spines['top'].set_color('#dfe1ec')
ax1.spines['right'].set_color('#dfe1ec')
plt.tight_layout()
plt.show()
The issue is using square=True in sns.heatmap. Since the aspect ratios of the two subplots are wide vs tall, the way that the "squaring" is done is different for each. For the first, it's made thinner, and the second, it's made shorter. It's done this way to fit into the constraints of the your subplot Axes' sizes, which are defined to be equal by default when you call plt.subplots.
One way to get around this is to define the aspect ratios of your two Axes to be different and fit the shape of your data. This won't work 100 % of the time but will in most cases. You can use the keyword gridspec_kw and define a dictionary with 'height_ratios' in your call of plt.subplots.
fig, (ax1, ax2) = plt.subplots(2,1, figsize=(10,15), gridspec_kw={'height_ratios':[5, 1]})
I have a Pandas DataFrame with a column called "AXLES", which can take an integer value between 3-12. I am trying to use Seaborn's countplot() option to achieve the following plot:
left y axis shows the frequencies of these values occurring in the data. The axis extends are [0%-100%], tick marks at every 10%.
right y axis shows the actual counts, values correspond to tick marks determined by the left y axis (marked at every 10%.)
x axis shows the categories for the bar plots [3, 4, 5, 6, 7, 8, 9, 10, 11, 12].
Annotation on top of the bars show the actual percentage of that category.
The following code gives me the plot below, with actual counts, but I could not find a way to convert them into frequencies. I can get the frequencies using df.AXLES.value_counts()/len(df.index) but I am not sure about how to plug this information into Seaborn's countplot().
I also found a workaround for the annotations, but I am not sure if that is the best implementation.
Any help would be appreciated!
Thanks
plt.figure(figsize=(12,8))
ax = sns.countplot(x="AXLES", data=dfWIM, order=[3,4,5,6,7,8,9,10,11,12])
plt.title('Distribution of Truck Configurations')
plt.xlabel('Number of Axles')
plt.ylabel('Frequency [%]')
for p in ax.patches:
ax.annotate('%{:.1f}'.format(p.get_height()), (p.get_x()+0.1, p.get_height()+50))
EDIT:
I got closer to what I need with the following code, using Pandas' bar plot, ditching Seaborn. Feels like I'm using so many workarounds, and there has to be an easier way to do it. The issues with this approach:
There is no order keyword in Pandas' bar plot function as Seaborn's countplot() has, so I cannot plot all categories from 3-12 as I did in the countplot(). I need to have them shown even if there is no data in that category.
The secondary y-axis messes up the bars and the annotation for some reason (see the white gridlines drawn over the text and bars).
plt.figure(figsize=(12,8))
plt.title('Distribution of Truck Configurations')
plt.xlabel('Number of Axles')
plt.ylabel('Frequency [%]')
ax = (dfWIM.AXLES.value_counts()/len(df)*100).sort_index().plot(kind="bar", rot=0)
ax.set_yticks(np.arange(0, 110, 10))
ax2 = ax.twinx()
ax2.set_yticks(np.arange(0, 110, 10)*len(df)/100)
for p in ax.patches:
ax.annotate('{:.2f}%'.format(p.get_height()), (p.get_x()+0.15, p.get_height()+1))
You can do this by making a twinx axes for the frequencies. You can switch the two y axes around so the frequencies stay on the left and the counts on the right, but without having to recalculate the counts axis (here we use tick_left() and tick_right() to move the ticks and set_label_position to move the axis labels
You can then set the ticks using the matplotlib.ticker module, specifically ticker.MultipleLocator and ticker.LinearLocator.
As for your annotations, you can get the x and y locations for all 4 corners of the bar with patch.get_bbox().get_points(). This, along with setting the horizontal and vertical alignment correctly, means you don't need to add any arbitrary offsets to the annotation location.
Finally, you need to turn the grid off for the twinned axis, to prevent grid lines showing up on top of the bars (ax2.grid(None))
Here is a working script:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
import matplotlib.ticker as ticker
# Some random data
dfWIM = pd.DataFrame({'AXLES': np.random.normal(8, 2, 5000).astype(int)})
ncount = len(dfWIM)
plt.figure(figsize=(12,8))
ax = sns.countplot(x="AXLES", data=dfWIM, order=[3,4,5,6,7,8,9,10,11,12])
plt.title('Distribution of Truck Configurations')
plt.xlabel('Number of Axles')
# Make twin axis
ax2=ax.twinx()
# Switch so count axis is on right, frequency on left
ax2.yaxis.tick_left()
ax.yaxis.tick_right()
# Also switch the labels over
ax.yaxis.set_label_position('right')
ax2.yaxis.set_label_position('left')
ax2.set_ylabel('Frequency [%]')
for p in ax.patches:
x=p.get_bbox().get_points()[:,0]
y=p.get_bbox().get_points()[1,1]
ax.annotate('{:.1f}%'.format(100.*y/ncount), (x.mean(), y),
ha='center', va='bottom') # set the alignment of the text
# Use a LinearLocator to ensure the correct number of ticks
ax.yaxis.set_major_locator(ticker.LinearLocator(11))
# Fix the frequency range to 0-100
ax2.set_ylim(0,100)
ax.set_ylim(0,ncount)
# And use a MultipleLocator to ensure a tick spacing of 10
ax2.yaxis.set_major_locator(ticker.MultipleLocator(10))
# Need to turn the grid on ax2 off, otherwise the gridlines end up on top of the bars
ax2.grid(None)
plt.savefig('snscounter.pdf')
I got it to work using core matplotlib's bar plot. I didn't have your data obviously, but adapting it to yours should be straight forward.
Approach
I used matplotlib's twin axis and plotted the data as bars on the second Axes object. The rest ist just some fiddeling around to get the ticks right and make annotations.
Hope this helps.
Code
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib
from mpl_toolkits.mplot3d import Axes3D
import seaborn as sns
tot = np.random.rand( 1 ) * 100
data = np.random.rand( 1, 12 )
data = data / sum(data,1) * tot
df = pd.DataFrame( data )
palette = sns.husl_palette(9, s=0.7 )
### Left Axis
# Plot nothing here, autmatically scales to second axis.
fig, ax1 = plt.subplots()
ax1.set_ylim( [0,100] )
# Remove grid lines.
ax1.grid( False )
# Set ticks and add percentage sign.
ax1.yaxis.set_ticks( np.arange(0,101,10) )
fmt = '%.0f%%'
yticks = matplotlib.ticker.FormatStrFormatter( fmt )
ax1.yaxis.set_major_formatter( yticks )
### Right Axis
# Plot data as bars.
x = np.arange(0,9,1)
ax2 = ax1.twinx()
rects = ax2.bar( x-0.4, np.asarray(df.loc[0,3:]), width=0.8 )
# Set ticks on x-axis and remove grid lines.
ax2.set_xlim( [-0.5,8.5] )
ax2.xaxis.set_ticks( x )
ax2.xaxis.grid( False )
# Set ticks on y-axis in 10% steps.
ax2.set_ylim( [0,tot] )
ax2.yaxis.set_ticks( np.linspace( 0, tot, 11 ) )
# Add labels and change colors.
for i,r in enumerate(rects):
h = r.get_height()
r.set_color( palette[ i % len(palette) ] )
ax2.text( r.get_x() + r.get_width()/2.0, \
h + 0.01*tot, \
r'%d%%'%int(100*h/tot), ha = 'center' )
I think you can first set the y major ticks manually and then modify each label
dfWIM = pd.DataFrame({'AXLES': np.random.randint(3, 10, 1000)})
total = len(dfWIM)*1.
plt.figure(figsize=(12,8))
ax = sns.countplot(x="AXLES", data=dfWIM, order=[3,4,5,6,7,8,9,10,11,12])
plt.title('Distribution of Truck Configurations')
plt.xlabel('Number of Axles')
plt.ylabel('Frequency [%]')
for p in ax.patches:
ax.annotate('{:.1f}%'.format(100*p.get_height()/total), (p.get_x()+0.1, p.get_height()+5))
#put 11 ticks (therefore 10 steps), from 0 to the total number of rows in the dataframe
ax.yaxis.set_ticks(np.linspace(0, total, 11))
#adjust the ticklabel to the desired format, without changing the position of the ticks.
_ = ax.set_yticklabels(map('{:.1f}%'.format, 100*ax.yaxis.get_majorticklocs()/total))
I want to plot categorical plots with the Seaborn pointplot, but data points that are not adjacent are not connected with a line in the plot. I would like to interpolate between non adjacent points, and connect them in the same way as adjacent points are connected, how can I do this?
An example: In the left and middle images, the blue and green points should be connected with a curve, respectively, but now they are separated into small parts. How can I plot the left and middle images just like the right one?
fig, axs = plt.subplots(ncols=3, figsize=(10,5))
exp_methods = ['fMRI left', 'fMRI right', 'MEG']
for i in range(3):
experiment = exp_methods[i]
dataf = df[df['data']==experiment]
sns.pointplot(x='number_of_subjects', y='accuracy', hue='training_size', data=dataf,
capsize=0.2, size=6, aspect=0.75, ci=95, legend=False, ax=axs[i])
I don't think there is an option to interpolate where there are missing data points, and hence the line stops instead. This question on the same topic from 2016 remains unanswered.
Instead, you could use plt.errorbar as suggested in the comments, or add the lines afterwards using plt.plot while still using seaborn to plot the means and error bars:
import seaborn as sns
tips = sns.load_dataset('tips')
# Create a gap in the data and plot it
tips.loc[(tips['size'] == 4) & (tips['sex'] == 'Male'), 'size'] = 5
sns.pointplot('size', 'total_bill', 'sex', tips, dodge=True)
# Fill gap with manual line plot
ax = sns.pointplot('size', 'total_bill', 'sex', tips, dodge=True, join=False)
# Loop over the collections of point in the axes and the grouped data frame
for points, (gender_name, gender_slice) in zip(ax.collections, tips.groupby('sex')):
# Retrieve the x axis positions for the points
x_coords = [coord[0] for coord in points.get_offsets()]
# Manually calculate the mean y-values to use with the line
means = gender_slice.groupby(['size']).mean()['total_bill']
ax.plot(x_coords, means, lw=2)
I have a Pandas DataFrame with a column called "AXLES", which can take an integer value between 3-12. I am trying to use Seaborn's countplot() option to achieve the following plot:
left y axis shows the frequencies of these values occurring in the data. The axis extends are [0%-100%], tick marks at every 10%.
right y axis shows the actual counts, values correspond to tick marks determined by the left y axis (marked at every 10%.)
x axis shows the categories for the bar plots [3, 4, 5, 6, 7, 8, 9, 10, 11, 12].
Annotation on top of the bars show the actual percentage of that category.
The following code gives me the plot below, with actual counts, but I could not find a way to convert them into frequencies. I can get the frequencies using df.AXLES.value_counts()/len(df.index) but I am not sure about how to plug this information into Seaborn's countplot().
I also found a workaround for the annotations, but I am not sure if that is the best implementation.
Any help would be appreciated!
Thanks
plt.figure(figsize=(12,8))
ax = sns.countplot(x="AXLES", data=dfWIM, order=[3,4,5,6,7,8,9,10,11,12])
plt.title('Distribution of Truck Configurations')
plt.xlabel('Number of Axles')
plt.ylabel('Frequency [%]')
for p in ax.patches:
ax.annotate('%{:.1f}'.format(p.get_height()), (p.get_x()+0.1, p.get_height()+50))
EDIT:
I got closer to what I need with the following code, using Pandas' bar plot, ditching Seaborn. Feels like I'm using so many workarounds, and there has to be an easier way to do it. The issues with this approach:
There is no order keyword in Pandas' bar plot function as Seaborn's countplot() has, so I cannot plot all categories from 3-12 as I did in the countplot(). I need to have them shown even if there is no data in that category.
The secondary y-axis messes up the bars and the annotation for some reason (see the white gridlines drawn over the text and bars).
plt.figure(figsize=(12,8))
plt.title('Distribution of Truck Configurations')
plt.xlabel('Number of Axles')
plt.ylabel('Frequency [%]')
ax = (dfWIM.AXLES.value_counts()/len(df)*100).sort_index().plot(kind="bar", rot=0)
ax.set_yticks(np.arange(0, 110, 10))
ax2 = ax.twinx()
ax2.set_yticks(np.arange(0, 110, 10)*len(df)/100)
for p in ax.patches:
ax.annotate('{:.2f}%'.format(p.get_height()), (p.get_x()+0.15, p.get_height()+1))
You can do this by making a twinx axes for the frequencies. You can switch the two y axes around so the frequencies stay on the left and the counts on the right, but without having to recalculate the counts axis (here we use tick_left() and tick_right() to move the ticks and set_label_position to move the axis labels
You can then set the ticks using the matplotlib.ticker module, specifically ticker.MultipleLocator and ticker.LinearLocator.
As for your annotations, you can get the x and y locations for all 4 corners of the bar with patch.get_bbox().get_points(). This, along with setting the horizontal and vertical alignment correctly, means you don't need to add any arbitrary offsets to the annotation location.
Finally, you need to turn the grid off for the twinned axis, to prevent grid lines showing up on top of the bars (ax2.grid(None))
Here is a working script:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
import matplotlib.ticker as ticker
# Some random data
dfWIM = pd.DataFrame({'AXLES': np.random.normal(8, 2, 5000).astype(int)})
ncount = len(dfWIM)
plt.figure(figsize=(12,8))
ax = sns.countplot(x="AXLES", data=dfWIM, order=[3,4,5,6,7,8,9,10,11,12])
plt.title('Distribution of Truck Configurations')
plt.xlabel('Number of Axles')
# Make twin axis
ax2=ax.twinx()
# Switch so count axis is on right, frequency on left
ax2.yaxis.tick_left()
ax.yaxis.tick_right()
# Also switch the labels over
ax.yaxis.set_label_position('right')
ax2.yaxis.set_label_position('left')
ax2.set_ylabel('Frequency [%]')
for p in ax.patches:
x=p.get_bbox().get_points()[:,0]
y=p.get_bbox().get_points()[1,1]
ax.annotate('{:.1f}%'.format(100.*y/ncount), (x.mean(), y),
ha='center', va='bottom') # set the alignment of the text
# Use a LinearLocator to ensure the correct number of ticks
ax.yaxis.set_major_locator(ticker.LinearLocator(11))
# Fix the frequency range to 0-100
ax2.set_ylim(0,100)
ax.set_ylim(0,ncount)
# And use a MultipleLocator to ensure a tick spacing of 10
ax2.yaxis.set_major_locator(ticker.MultipleLocator(10))
# Need to turn the grid on ax2 off, otherwise the gridlines end up on top of the bars
ax2.grid(None)
plt.savefig('snscounter.pdf')
I got it to work using core matplotlib's bar plot. I didn't have your data obviously, but adapting it to yours should be straight forward.
Approach
I used matplotlib's twin axis and plotted the data as bars on the second Axes object. The rest ist just some fiddeling around to get the ticks right and make annotations.
Hope this helps.
Code
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib
from mpl_toolkits.mplot3d import Axes3D
import seaborn as sns
tot = np.random.rand( 1 ) * 100
data = np.random.rand( 1, 12 )
data = data / sum(data,1) * tot
df = pd.DataFrame( data )
palette = sns.husl_palette(9, s=0.7 )
### Left Axis
# Plot nothing here, autmatically scales to second axis.
fig, ax1 = plt.subplots()
ax1.set_ylim( [0,100] )
# Remove grid lines.
ax1.grid( False )
# Set ticks and add percentage sign.
ax1.yaxis.set_ticks( np.arange(0,101,10) )
fmt = '%.0f%%'
yticks = matplotlib.ticker.FormatStrFormatter( fmt )
ax1.yaxis.set_major_formatter( yticks )
### Right Axis
# Plot data as bars.
x = np.arange(0,9,1)
ax2 = ax1.twinx()
rects = ax2.bar( x-0.4, np.asarray(df.loc[0,3:]), width=0.8 )
# Set ticks on x-axis and remove grid lines.
ax2.set_xlim( [-0.5,8.5] )
ax2.xaxis.set_ticks( x )
ax2.xaxis.grid( False )
# Set ticks on y-axis in 10% steps.
ax2.set_ylim( [0,tot] )
ax2.yaxis.set_ticks( np.linspace( 0, tot, 11 ) )
# Add labels and change colors.
for i,r in enumerate(rects):
h = r.get_height()
r.set_color( palette[ i % len(palette) ] )
ax2.text( r.get_x() + r.get_width()/2.0, \
h + 0.01*tot, \
r'%d%%'%int(100*h/tot), ha = 'center' )
I think you can first set the y major ticks manually and then modify each label
dfWIM = pd.DataFrame({'AXLES': np.random.randint(3, 10, 1000)})
total = len(dfWIM)*1.
plt.figure(figsize=(12,8))
ax = sns.countplot(x="AXLES", data=dfWIM, order=[3,4,5,6,7,8,9,10,11,12])
plt.title('Distribution of Truck Configurations')
plt.xlabel('Number of Axles')
plt.ylabel('Frequency [%]')
for p in ax.patches:
ax.annotate('{:.1f}%'.format(100*p.get_height()/total), (p.get_x()+0.1, p.get_height()+5))
#put 11 ticks (therefore 10 steps), from 0 to the total number of rows in the dataframe
ax.yaxis.set_ticks(np.linspace(0, total, 11))
#adjust the ticklabel to the desired format, without changing the position of the ticks.
_ = ax.set_yticklabels(map('{:.1f}%'.format, 100*ax.yaxis.get_majorticklocs()/total))
Original Post
I need to make several subplots with different sizes.
I have simulation areas of the size (x y) 35x6µm to 39x2µm and I want to plot them in one figure. All subplots have the same x-ticklabels (there is a grid line every 5µm on the x-axis).
When I plot the subplots into one figure, then the graphs with the small x-area are streched, so that the x-figuresize is completely used. Therefore, the x-gridlines do not match together anymore.
How can I achieve that the subplots aren't streched anymore and are aligned on the left?
Edit: Here is some code:
size=array([[3983,229],[3933,350],[3854,454],[3750,533],[3500,600]], dtype=np.float)
resolution=array([[1024,256],[1024,320],[1024,448],[1024,512],[1024,640]], dtype=np.float)
aspect_ratios=(resolution[:,0]/resolution[:,1])*(size[:,1]/size[:,0])
number_of_graphs=len(data)
fig, ax=plt.subplots(nrows=number_of_graphs, sharex=xshare)
fig.set_size_inches(12,figheight)
for i in range(number_of_graphs):
temp=np.rot90(np.loadtxt(path+'/'+data[i]))
img=ax[i].imshow(temp,
interpolation="none",
cmap=mapping,
norm=specific_norm,
aspect=aspect_ratios[i]
)
ax[i].set_adjustable('box-forced')
#Here I have to set some ticks and labels....
ax[i].xaxis.set_ticks(np.arange(0,int(size[i,0]),stepwidth_width)*resolution[i,0]/size[i,0])
ax[i].set_xticklabels((np.arange(0, int(size[i,0]), stepwidth_width)))
ax[i].yaxis.set_ticks(np.arange(0,int(size[i,1]),stepwidth_height)*resolution[i,1]/size[i,1])
ax[i].set_yticklabels((np.arange(0, int(size[i,1]), stepwidth_height)))
ax[i].set_title(str(mag[i]))
grid(True)
savefig(path+'/'+name+'all.pdf', bbox_inches='tight', pad_inches=0.05) #saves graph
Here are some examples:
If I plot different matrices in a for loop, the iPhython generates an output which is pretty much what I want. The y-distande between each subplot is constant, and the size of each figure is correct. You can see, that the x-labels match to each other:
When I plot the matrices in one figure using subplots, then this is not the case: The x-ticks do not fit together, and every subplot has the same size on the canvas (which means, that for thin subplots there is more white space reservated on the canvas...).
I simply want the first result from iPython in one output file using subplots.
Using GridSpec
After the community told me to use GridSpec to determine the size of my subplots directly I wrote a code for automatic plotting:
size=array([[3983,229],[3933,350],[3854,454],[3750,533],[3500,600]], dtype=np.float)
#total size of the figure
total_height=int(sum(size[:,1]))
total_width=int(size.max())
#determines steps of ticks
stepwidth_width=500
stepwidth_height=200
fig, ax=plt.subplots(nrows=len(size))
fig.set_size_inches(size.max()/300., total_height/200)
gs = GridSpec(total_height, total_width)
gs.update(left=0, right=0.91, hspace=0.2)
height=0
for i in range (len(size)):
ax[i] = plt.subplot(gs[int(height):int(height+size[i,1]), 0:int(size[i,0])])
temp=np.rot90(np.loadtxt(path+'/'+FFTs[i]))
img=ax[i].imshow(temp,
interpolation="none",
vmin=-100,
vmax=+100,
aspect=aspect_ratios[i],
)
#Some rescaling
ax[i].xaxis.set_ticks(np.arange(0,int(size[i,0]),stepwidth_width)*resolution[i,0]/size[i,0])
ax[i].set_xticklabels((np.arange(0, int(size[i,0]), stepwidth_width)))
ax[i].yaxis.set_ticks(np.arange(0,int(size[i,1]),stepwidth_height)*resolution[i,1]/size[i,1])
ax[i].set_yticklabels((np.arange(0, int(size[i,1]), stepwidth_height)))
ax[i].axvline(antenna[i]) #at the antenna position a vertical line is plotted
grid(True)
#colorbar
cbaxes = fig.add_axes([0.93, 0.2, 0.01, 0.6]) #[left, bottom, width, height]
cbar = plt.colorbar(img, cax = cbaxes, orientation='vertical')
tick_locator = ticker.MaxNLocator(nbins=3)
cbar.locator = tick_locator
cbar.ax.yaxis.set_major_locator(matplotlib.ticker.AutoLocator())
cbar.set_label('Intensity',
#fontsize=12
)
cbar.update_ticks()
height=height+size[i,1]
plt.show()
And here is the result....
Do you have any ideas?
What about using matplotlib.gridspec.GridSpec? Docs.
You could try something like
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.gridspec import GridSpec
gs = GridSpec(8, 39)
ax1 = plt.subplot(gs[:6, :35])
ax2 = plt.subplot(gs[6:, :])
data1 = np.random.rand(6, 35)
data2 = np.random.rand(2, 39)
ax1.imshow(data1)
ax2.imshow(data2)
plt.show()