Matplotlib - duplicated axes - prevent gridlines from covering data - set right ylabel - python

I want to duplicate axes so that I can express an exponent in terms of its doubling time.
I think I am doing things right, but I have two problems
no label on the right hand side of the chart and
y-axis gridlines that are plotted above the data that I cannot shift to the bottom, nor remove.
Example code follows:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
MARGINS = 0.02
data = pd.Series(np.arange(0.05, 1.0, 0.1))
# preliminaries
plt.style.use('ggplot')
fig, ax = plt.subplots()
ax.figure.set_size_inches((8, 4))
ax.set_ylabel('$k$') # This works
ax.margins(MARGINS)
ax.set_axisbelow(True)
# duplicate the axes
axr = ax.twinx().twiny()
axr.margins(MARGINS)
axr.set_ylabel('Doubling time') # This does not work
# No x-ticks at the top
axr.xaxis.set_ticks([])
axr.xaxis.set_ticklabels([])
# plot the data
ax.plot(data.index, data)
# label right-hand y-axis
locations = ax.get_yticks()
new_labels = [f'{np.log(2)/x:,.2f}' if x != 0 else '∞' for x in locations ]
axr.yaxis.set_ticks(locations)
axr.yaxis.set_ticklabels(new_labels)
axr.set_axisbelow(True) # this does not work
# match the left and right ylim settings
axr.set_ylim(ax.get_ylim())
axr.set_xlim(ax.get_xlim())
# remove the grid
axr.grid(False, which='both')
axr.yaxis.grid(False, which='both') # this does not work
# finish-up
ax.set_title('Chart')
fig.tight_layout(pad=1.1)
plt.show()
plt.close('all')
Desired output:
Similar chart to above but with:
a right hand side y-axis label
no y-axis gridlines over the data line (but keep the horizontal gridlines under the dataline)

Change the order of twiny and twinx:
axr = ax.twiny().twinx()

Related

Matplotlib x-axis label re-positioning

How to move the x-axis label "number of patients" right below the x-tick value '0'?
The code I used to create the bar chart is as follows:
n_counts.plot.barh(stacked=True, width=0.7, figsize=(12,8), color=['navy','cornflowerblue']);
plt.draw()
# Get current tick positions and labels
pos, lab = plt.xticks()
# Edit labels
new_label = [int(x)*-1 if int(x)<0 else int(x) for x in pos]
# Set new labels
plt.xticks(pos, new_label)
plt.ylabel('neighbourhood', fontsize=13)
plt.xlabel('number of patients', fontsize=13)
plt.legend(['attended', 'not attended'], title='Attendance')
plt.title(' Attendance Counts in Top 20 Regions', loc='left', fontsize=16);
You need to transform the data coordinate of the x-label (0 in the example) to axes coordinates to place the label:
import matplotlib.pyplot as plt
import matplotlib as mpl
fig, ax = plt.subplots()
ax.set_xlim((-2, 10))
x = 0 # position of xlabel
ax.set_xlabel("xlabel", ha='left', x=ax.transAxes.inverted().transform(ax.transData.transform((x, 0)))[0])
transData transforms data coordinates into display coordinates that are then transformed back into Axes coordinates using the inverse transAxes Axes transformation. See the Transformations Tutorial for further details.
If you want to shift the x-axis label, you can use set_label_coords() to do this. Below is a stripped down version of your plot with the adjusted position.
The position of (0,0) is the bottom left corner of the box. As the x-axis ticks go from 0 to 9, with the 0-value tick at the second position from left corner, I used 2/9 = 0.22 as the x-coordinate. This will allow you to keep the text below the second tick. For y-coordinate, -0.05 suits this figure size and you can adjust it to a larger or smaller number based on the plot getting smaller or bigger respectively. You can fine tune it to suit your requirement...
fig,ax = plt.subplots(figsize=(12,8))
ax.set_xlim(-2000, 7000)
start, end = ax.get_xlim()
# Edit labels
labels = [-2000,-1000,0,1000,2000,3000,4000,5000,6000,7000]
ax.xaxis.set_ticks(np.arange(start, end+1, 1000))
ax.set_xticklabels(labels)
# Set new labels
ax.set_ylabel('neighbourhood', fontsize=13)
ax.set_xlabel('number of patients', fontsize=13)
ax.legend(['attended', 'not attended'], title='Attendance')
ax.set_title('Attendance Counts in Top 20 Regions', loc='left', fontsize=16);
ax.xaxis.set_label_coords(.22, -0.05)

Plotting axis ticks & labels outside the axes when the spine is moved

Following on from this question and some similar situations here, and here, how does one place the x- and y-axis tick marks and labels at the outside of the plot? There is one hack here, that attempts to address the problem since as #whatitis notes, you cannot know the pad in advance in all situations (e.g. ax.get_xaxis().set_tick_params(pad=5)); so it seems there must be a native API approach, but none of the attempts (commented) in the following MWE work
import numpy as np
import matplotlib.pyplot as plt
#data generation
x = np.arange(-10,20,0.2)
y = 1.0/(1.0+np.exp(-x)) # numpy does the calculation elementwise for you
fig, ax = plt.subplots()
ax.plot(x,y)
# Eliminate upper and right axes
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
# Show ticks on the left and lower axes only (and let them protrude in both directions)
ax.xaxis.set_tick_params(bottom='on', top=False, direction='inout')
ax.yaxis.set_tick_params(left='on', right=False, direction='inout')
# Make spines pass through zero of the other axis
ax.spines['bottom'].set_position('zero')
ax.spines['left'].set_position('zero')
ax.set_ylim(-0.4,1.0)
ax.set_ylabel(r'y-axis label')
ax.set_xlabel(r'x-axis label')
#
# How to place labels and ticks on the outside of the axes area
#
# Attempt 1 - has no effect
# ax.xaxis.tick_bottom()
# ax.yaxis.tick_left()
# Attempt 2 - has no effect - is this a bug? Why isn't 'top' & 'bottom' where it should be?
ax.yaxis.set_label_position('left')
ax.xaxis.set_label_position('bottom')
plt.show()

Seaborn: add counts to countplot? [duplicate]

I have a Pandas DataFrame with a column called "AXLES", which can take an integer value between 3-12. I am trying to use Seaborn's countplot() option to achieve the following plot:
left y axis shows the frequencies of these values occurring in the data. The axis extends are [0%-100%], tick marks at every 10%.
right y axis shows the actual counts, values correspond to tick marks determined by the left y axis (marked at every 10%.)
x axis shows the categories for the bar plots [3, 4, 5, 6, 7, 8, 9, 10, 11, 12].
Annotation on top of the bars show the actual percentage of that category.
The following code gives me the plot below, with actual counts, but I could not find a way to convert them into frequencies. I can get the frequencies using df.AXLES.value_counts()/len(df.index) but I am not sure about how to plug this information into Seaborn's countplot().
I also found a workaround for the annotations, but I am not sure if that is the best implementation.
Any help would be appreciated!
Thanks
plt.figure(figsize=(12,8))
ax = sns.countplot(x="AXLES", data=dfWIM, order=[3,4,5,6,7,8,9,10,11,12])
plt.title('Distribution of Truck Configurations')
plt.xlabel('Number of Axles')
plt.ylabel('Frequency [%]')
for p in ax.patches:
ax.annotate('%{:.1f}'.format(p.get_height()), (p.get_x()+0.1, p.get_height()+50))
EDIT:
I got closer to what I need with the following code, using Pandas' bar plot, ditching Seaborn. Feels like I'm using so many workarounds, and there has to be an easier way to do it. The issues with this approach:
There is no order keyword in Pandas' bar plot function as Seaborn's countplot() has, so I cannot plot all categories from 3-12 as I did in the countplot(). I need to have them shown even if there is no data in that category.
The secondary y-axis messes up the bars and the annotation for some reason (see the white gridlines drawn over the text and bars).
plt.figure(figsize=(12,8))
plt.title('Distribution of Truck Configurations')
plt.xlabel('Number of Axles')
plt.ylabel('Frequency [%]')
ax = (dfWIM.AXLES.value_counts()/len(df)*100).sort_index().plot(kind="bar", rot=0)
ax.set_yticks(np.arange(0, 110, 10))
ax2 = ax.twinx()
ax2.set_yticks(np.arange(0, 110, 10)*len(df)/100)
for p in ax.patches:
ax.annotate('{:.2f}%'.format(p.get_height()), (p.get_x()+0.15, p.get_height()+1))
You can do this by making a twinx axes for the frequencies. You can switch the two y axes around so the frequencies stay on the left and the counts on the right, but without having to recalculate the counts axis (here we use tick_left() and tick_right() to move the ticks and set_label_position to move the axis labels
You can then set the ticks using the matplotlib.ticker module, specifically ticker.MultipleLocator and ticker.LinearLocator.
As for your annotations, you can get the x and y locations for all 4 corners of the bar with patch.get_bbox().get_points(). This, along with setting the horizontal and vertical alignment correctly, means you don't need to add any arbitrary offsets to the annotation location.
Finally, you need to turn the grid off for the twinned axis, to prevent grid lines showing up on top of the bars (ax2.grid(None))
Here is a working script:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
import matplotlib.ticker as ticker
# Some random data
dfWIM = pd.DataFrame({'AXLES': np.random.normal(8, 2, 5000).astype(int)})
ncount = len(dfWIM)
plt.figure(figsize=(12,8))
ax = sns.countplot(x="AXLES", data=dfWIM, order=[3,4,5,6,7,8,9,10,11,12])
plt.title('Distribution of Truck Configurations')
plt.xlabel('Number of Axles')
# Make twin axis
ax2=ax.twinx()
# Switch so count axis is on right, frequency on left
ax2.yaxis.tick_left()
ax.yaxis.tick_right()
# Also switch the labels over
ax.yaxis.set_label_position('right')
ax2.yaxis.set_label_position('left')
ax2.set_ylabel('Frequency [%]')
for p in ax.patches:
x=p.get_bbox().get_points()[:,0]
y=p.get_bbox().get_points()[1,1]
ax.annotate('{:.1f}%'.format(100.*y/ncount), (x.mean(), y),
ha='center', va='bottom') # set the alignment of the text
# Use a LinearLocator to ensure the correct number of ticks
ax.yaxis.set_major_locator(ticker.LinearLocator(11))
# Fix the frequency range to 0-100
ax2.set_ylim(0,100)
ax.set_ylim(0,ncount)
# And use a MultipleLocator to ensure a tick spacing of 10
ax2.yaxis.set_major_locator(ticker.MultipleLocator(10))
# Need to turn the grid on ax2 off, otherwise the gridlines end up on top of the bars
ax2.grid(None)
plt.savefig('snscounter.pdf')
I got it to work using core matplotlib's bar plot. I didn't have your data obviously, but adapting it to yours should be straight forward.
Approach
I used matplotlib's twin axis and plotted the data as bars on the second Axes object. The rest ist just some fiddeling around to get the ticks right and make annotations.
Hope this helps.
Code
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib
from mpl_toolkits.mplot3d import Axes3D
import seaborn as sns
tot = np.random.rand( 1 ) * 100
data = np.random.rand( 1, 12 )
data = data / sum(data,1) * tot
df = pd.DataFrame( data )
palette = sns.husl_palette(9, s=0.7 )
### Left Axis
# Plot nothing here, autmatically scales to second axis.
fig, ax1 = plt.subplots()
ax1.set_ylim( [0,100] )
# Remove grid lines.
ax1.grid( False )
# Set ticks and add percentage sign.
ax1.yaxis.set_ticks( np.arange(0,101,10) )
fmt = '%.0f%%'
yticks = matplotlib.ticker.FormatStrFormatter( fmt )
ax1.yaxis.set_major_formatter( yticks )
### Right Axis
# Plot data as bars.
x = np.arange(0,9,1)
ax2 = ax1.twinx()
rects = ax2.bar( x-0.4, np.asarray(df.loc[0,3:]), width=0.8 )
# Set ticks on x-axis and remove grid lines.
ax2.set_xlim( [-0.5,8.5] )
ax2.xaxis.set_ticks( x )
ax2.xaxis.grid( False )
# Set ticks on y-axis in 10% steps.
ax2.set_ylim( [0,tot] )
ax2.yaxis.set_ticks( np.linspace( 0, tot, 11 ) )
# Add labels and change colors.
for i,r in enumerate(rects):
h = r.get_height()
r.set_color( palette[ i % len(palette) ] )
ax2.text( r.get_x() + r.get_width()/2.0, \
h + 0.01*tot, \
r'%d%%'%int(100*h/tot), ha = 'center' )
I think you can first set the y major ticks manually and then modify each label
dfWIM = pd.DataFrame({'AXLES': np.random.randint(3, 10, 1000)})
total = len(dfWIM)*1.
plt.figure(figsize=(12,8))
ax = sns.countplot(x="AXLES", data=dfWIM, order=[3,4,5,6,7,8,9,10,11,12])
plt.title('Distribution of Truck Configurations')
plt.xlabel('Number of Axles')
plt.ylabel('Frequency [%]')
for p in ax.patches:
ax.annotate('{:.1f}%'.format(100*p.get_height()/total), (p.get_x()+0.1, p.get_height()+5))
#put 11 ticks (therefore 10 steps), from 0 to the total number of rows in the dataframe
ax.yaxis.set_ticks(np.linspace(0, total, 11))
#adjust the ticklabel to the desired format, without changing the position of the ticks.
_ = ax.set_yticklabels(map('{:.1f}%'.format, 100*ax.yaxis.get_majorticklocs()/total))

adjust matplotlib subplot spacing after tight_layout

I would like to minimize white space in my figure. I have a row of sub plots where four plots share their y-axis and the last plot has a separate axis.
There are no ylabels or ticklabels for the shared axis middle panels.
tight_layout creates a lot of white space between the the middle plots as if leaving space for tick labels and ylabels but I would rather stretch the sub plots. Is this possible?
import matplotlib.gridspec as gridspec
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
fig = plt.figure()
gs = gridspec.GridSpec(1, 5, width_ratios=[4,1,4,1,2])
ax = fig.add_subplot(gs[0])
axes = [ax] + [fig.add_subplot(gs[i], sharey=ax) for i in range(1, 4)]
axes[0].plot(np.random.randint(0,100,100))
barlist=axes[1].bar([1,2],[1,20])
axes[2].plot(np.random.randint(0,100,100))
barlist=axes[3].bar([1,2],[1,20])
axes[0].set_ylabel('data')
axes.append(fig.add_subplot(gs[4]))
axes[4].plot(np.random.randint(0,5,100))
axes[4].set_ylabel('other data')
for ax in axes[1:4]:
plt.setp(ax.get_yticklabels(), visible=False)
sns.despine();
plt.tight_layout(pad=0, w_pad=0, h_pad=0);
Setting w_pad = 0 is not changing the default settings of tight_layout. You need to set something like w_pad = -2. Which produces the following figure:
You could go further, to say -3 but then you would start to get some overlap with your last plot.
Another way could be to remove plt.tight_layout() and set the boundaries yourself using
plt.subplots_adjust(left=0.065, right=0.97, top=0.96, bottom=0.065, wspace=0.14)
Though this can be a bit of a trial and error process.
Edit
A nice looking graph can be achieved by moving the ticks and the labels of the last plot to the right hand side. This answer shows you can do this by using:
ax.yaxis.tick_right()
ax.yaxis.set_label_position("right")
So for your example:
axes[4].yaxis.tick_right()
axes[4].yaxis.set_label_position("right")
In addition, you need to remove sns.despine(). Finally, there is now no need to set w_pad = -2, just use plt.tight_layout(pad=0, w_pad=0, h_pad=0)
Using this creates the following figure:

Creating a percentage countplot in python with pandas [duplicate]

I have a Pandas DataFrame with a column called "AXLES", which can take an integer value between 3-12. I am trying to use Seaborn's countplot() option to achieve the following plot:
left y axis shows the frequencies of these values occurring in the data. The axis extends are [0%-100%], tick marks at every 10%.
right y axis shows the actual counts, values correspond to tick marks determined by the left y axis (marked at every 10%.)
x axis shows the categories for the bar plots [3, 4, 5, 6, 7, 8, 9, 10, 11, 12].
Annotation on top of the bars show the actual percentage of that category.
The following code gives me the plot below, with actual counts, but I could not find a way to convert them into frequencies. I can get the frequencies using df.AXLES.value_counts()/len(df.index) but I am not sure about how to plug this information into Seaborn's countplot().
I also found a workaround for the annotations, but I am not sure if that is the best implementation.
Any help would be appreciated!
Thanks
plt.figure(figsize=(12,8))
ax = sns.countplot(x="AXLES", data=dfWIM, order=[3,4,5,6,7,8,9,10,11,12])
plt.title('Distribution of Truck Configurations')
plt.xlabel('Number of Axles')
plt.ylabel('Frequency [%]')
for p in ax.patches:
ax.annotate('%{:.1f}'.format(p.get_height()), (p.get_x()+0.1, p.get_height()+50))
EDIT:
I got closer to what I need with the following code, using Pandas' bar plot, ditching Seaborn. Feels like I'm using so many workarounds, and there has to be an easier way to do it. The issues with this approach:
There is no order keyword in Pandas' bar plot function as Seaborn's countplot() has, so I cannot plot all categories from 3-12 as I did in the countplot(). I need to have them shown even if there is no data in that category.
The secondary y-axis messes up the bars and the annotation for some reason (see the white gridlines drawn over the text and bars).
plt.figure(figsize=(12,8))
plt.title('Distribution of Truck Configurations')
plt.xlabel('Number of Axles')
plt.ylabel('Frequency [%]')
ax = (dfWIM.AXLES.value_counts()/len(df)*100).sort_index().plot(kind="bar", rot=0)
ax.set_yticks(np.arange(0, 110, 10))
ax2 = ax.twinx()
ax2.set_yticks(np.arange(0, 110, 10)*len(df)/100)
for p in ax.patches:
ax.annotate('{:.2f}%'.format(p.get_height()), (p.get_x()+0.15, p.get_height()+1))
You can do this by making a twinx axes for the frequencies. You can switch the two y axes around so the frequencies stay on the left and the counts on the right, but without having to recalculate the counts axis (here we use tick_left() and tick_right() to move the ticks and set_label_position to move the axis labels
You can then set the ticks using the matplotlib.ticker module, specifically ticker.MultipleLocator and ticker.LinearLocator.
As for your annotations, you can get the x and y locations for all 4 corners of the bar with patch.get_bbox().get_points(). This, along with setting the horizontal and vertical alignment correctly, means you don't need to add any arbitrary offsets to the annotation location.
Finally, you need to turn the grid off for the twinned axis, to prevent grid lines showing up on top of the bars (ax2.grid(None))
Here is a working script:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
import matplotlib.ticker as ticker
# Some random data
dfWIM = pd.DataFrame({'AXLES': np.random.normal(8, 2, 5000).astype(int)})
ncount = len(dfWIM)
plt.figure(figsize=(12,8))
ax = sns.countplot(x="AXLES", data=dfWIM, order=[3,4,5,6,7,8,9,10,11,12])
plt.title('Distribution of Truck Configurations')
plt.xlabel('Number of Axles')
# Make twin axis
ax2=ax.twinx()
# Switch so count axis is on right, frequency on left
ax2.yaxis.tick_left()
ax.yaxis.tick_right()
# Also switch the labels over
ax.yaxis.set_label_position('right')
ax2.yaxis.set_label_position('left')
ax2.set_ylabel('Frequency [%]')
for p in ax.patches:
x=p.get_bbox().get_points()[:,0]
y=p.get_bbox().get_points()[1,1]
ax.annotate('{:.1f}%'.format(100.*y/ncount), (x.mean(), y),
ha='center', va='bottom') # set the alignment of the text
# Use a LinearLocator to ensure the correct number of ticks
ax.yaxis.set_major_locator(ticker.LinearLocator(11))
# Fix the frequency range to 0-100
ax2.set_ylim(0,100)
ax.set_ylim(0,ncount)
# And use a MultipleLocator to ensure a tick spacing of 10
ax2.yaxis.set_major_locator(ticker.MultipleLocator(10))
# Need to turn the grid on ax2 off, otherwise the gridlines end up on top of the bars
ax2.grid(None)
plt.savefig('snscounter.pdf')
I got it to work using core matplotlib's bar plot. I didn't have your data obviously, but adapting it to yours should be straight forward.
Approach
I used matplotlib's twin axis and plotted the data as bars on the second Axes object. The rest ist just some fiddeling around to get the ticks right and make annotations.
Hope this helps.
Code
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib
from mpl_toolkits.mplot3d import Axes3D
import seaborn as sns
tot = np.random.rand( 1 ) * 100
data = np.random.rand( 1, 12 )
data = data / sum(data,1) * tot
df = pd.DataFrame( data )
palette = sns.husl_palette(9, s=0.7 )
### Left Axis
# Plot nothing here, autmatically scales to second axis.
fig, ax1 = plt.subplots()
ax1.set_ylim( [0,100] )
# Remove grid lines.
ax1.grid( False )
# Set ticks and add percentage sign.
ax1.yaxis.set_ticks( np.arange(0,101,10) )
fmt = '%.0f%%'
yticks = matplotlib.ticker.FormatStrFormatter( fmt )
ax1.yaxis.set_major_formatter( yticks )
### Right Axis
# Plot data as bars.
x = np.arange(0,9,1)
ax2 = ax1.twinx()
rects = ax2.bar( x-0.4, np.asarray(df.loc[0,3:]), width=0.8 )
# Set ticks on x-axis and remove grid lines.
ax2.set_xlim( [-0.5,8.5] )
ax2.xaxis.set_ticks( x )
ax2.xaxis.grid( False )
# Set ticks on y-axis in 10% steps.
ax2.set_ylim( [0,tot] )
ax2.yaxis.set_ticks( np.linspace( 0, tot, 11 ) )
# Add labels and change colors.
for i,r in enumerate(rects):
h = r.get_height()
r.set_color( palette[ i % len(palette) ] )
ax2.text( r.get_x() + r.get_width()/2.0, \
h + 0.01*tot, \
r'%d%%'%int(100*h/tot), ha = 'center' )
I think you can first set the y major ticks manually and then modify each label
dfWIM = pd.DataFrame({'AXLES': np.random.randint(3, 10, 1000)})
total = len(dfWIM)*1.
plt.figure(figsize=(12,8))
ax = sns.countplot(x="AXLES", data=dfWIM, order=[3,4,5,6,7,8,9,10,11,12])
plt.title('Distribution of Truck Configurations')
plt.xlabel('Number of Axles')
plt.ylabel('Frequency [%]')
for p in ax.patches:
ax.annotate('{:.1f}%'.format(100*p.get_height()/total), (p.get_x()+0.1, p.get_height()+5))
#put 11 ticks (therefore 10 steps), from 0 to the total number of rows in the dataframe
ax.yaxis.set_ticks(np.linspace(0, total, 11))
#adjust the ticklabel to the desired format, without changing the position of the ticks.
_ = ax.set_yticklabels(map('{:.1f}%'.format, 100*ax.yaxis.get_majorticklocs()/total))

Categories

Resources