How to increase the size of xticks in pandas plot - python

This is a follow up for a question which i asked here:
The code is as follows:
from pandas_datareader import data as web
import matplotlib.pyplot as plt
import matplotlib.dates as md
fig, (ax1, ax2) = plt.subplots(2, 1)
df = web.DataReader('F', 'yahoo')
df2 = web.DataReader('Fb', 'yahoo')
ax = df.plot(figsize=(35,15), ax=ax1)
df2.plot(y = 'Close', figsize=(35,15), ax=ax2)
plt.xticks(fontsize = 25)
for ax in (ax1, ax2):
ax.xaxis.set_major_locator(md.MonthLocator(bymonth = range(1, 13, 6)))
ax.xaxis.set_major_formatter(md.DateFormatter('%b\n%Y'))
ax.xaxis.set_minor_locator(md.MonthLocator())
plt.setp(ax.xaxis.get_majorticklabels(), rotation = 0 )
plt.show()
This produces this plot:
How can i increase the size of both the xticks in the two subplots as you can see the size was increased for the bottom one only.
[1]: https://stackoverflow.com/questions/62358966/adding-minor-ticks-to-pandas-plot

You can use the tick_params function on the ax instance to control the size of the tick-labels on the x-axis. If you want to control the size of both x and y axis, use axis='both'. You can additionally specify which='major' or which='minor' or which='both' depending on if you want to change major, minor or both tick labels.
for ax in (ax1, ax2):
# Rest of the code
ax.tick_params(axis='x', which='both', labelsize=25)

Related

How to customize the location of color bar in Seaborn heatmap?

I have the following code to create a heatmap. However, it creates an overlap of the color bar and the right axis text. The text has no problems, I want it to be in that length.
How can I locate the colorbar on the right/left side of the heatmap with no overlap?
I tried with "pad" parameter in cbar_kws but it didn't help.enter image description here
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
PT=pd.DataFrame(np.random.randn(300,3), columns=list('ABC'))
miniPT=PT.iloc[:,:-1]
SMALL_SIZE = 8
MEDIUM_SIZE = 80
BIGGER_SIZE = 120
plt.rc('font', size=MEDIUM_SIZE) # controls default text sizes
plt.rc('axes', titlesize=MEDIUM_SIZE) # fontsize of the axes title
plt.rc('axes', labelsize=MEDIUM_SIZE) # fontsize of the x and y labels
plt.rc('xtick', labelsize=MEDIUM_SIZE) # fontsize of the tick labels
plt.rc('ytick', labelsize=SMALL_SIZE) # fontsize of the tick labels
plt.rc('legend', fontsize=MEDIUM_SIZE) # legend fontsize
plt.rc('figure', titlesize=BIGGER_SIZE) # fontsize of the figure title
plt.figure(figsize=(10, miniPT.shape[0]/5.2))
ax =sns.heatmap(miniPT, annot=False, cmap='RdYlGn')
for _, spine in ax.spines.items():
spine.set_visible(True)
# second axis
asset_list=np.asarray(PT['C'])
asset_list=asset_list[::-1]
ax3 = ax.twinx()
ax3.set_ylim([0,ax.get_ylim()[1]])
ax3.set_yticks(ax.get_yticks())
ax3.set_yticklabels(asset_list, fontsize=MEDIUM_SIZE*0.6)
# colorbar
cbar = ax.collections[0].colorbar
cbar.ax.tick_params(labelsize=MEDIUM_SIZE)
One way to get the overlap automatically adjusted by matplotlib, is to explicitly create subplots: one for the heatmap and another for the colorbar. sns.heatmap's cbar_ax= parameter can be set to point to this subplot. gridspec_kws= is needed to set the relative sizes. At the end, plt.tight_layout() will adjust all the paddings to make everything fit nicely.
The question's code contains some strange settings (e.g. a fontsize of 80 is immense). Also, 300 rows will inevitably lead to overlapping text (the fontsize needs to be so small that non-overlapping text wouldn't be readable). Here is some more simplified example code:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
PT = pd.DataFrame(np.random.randn(100, 3), columns=list('ABC'))
fig, (ax, cbar_ax) = plt.subplots(ncols=2, figsize=(10, len(PT) / 5.2), gridspec_kw={'width_ratios': [10, 1]})
sns.heatmap(PT.iloc[:, :-1], annot=False, cmap='RdYlGn', cbar_ax=cbar_ax, ax=ax)
for _, spine in ax.spines.items():
spine.set_visible(True)
# second axis
asset_list = np.asarray(PT['C'])
ax3 = ax.twinx()
ax3.set_ylim(ax.get_ylim())
ax3.set_yticks(np.arange(len(PT)))
ax3.set_yticklabels(asset_list, fontsize=80)
# colorbar
cbar_ax.tick_params(labelsize=80)
plt.tight_layout()
plt.show()
As the plot is quite large, here only the bottom part is pasted, with a link to the full plot.
This is how it would look like with:
fontsize 80 (Note that font sizes are measured in "points per inch", standard 72 points per inch);
figure width of 20 inches (instead of 10);
300 rows
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
PT = pd.DataFrame(np.random.randn(300, 3), columns=list('ABC'))
fig, (ax, cbar_ax) = plt.subplots(ncols=2, figsize=(20, len(PT) / 5.2), gridspec_kw={'width_ratios': [15, 1]})
sns.heatmap(PT.iloc[:, :-1], annot=False, cmap='RdYlGn', cbar_ax=cbar_ax, ax=ax)
for _, spine in ax.spines.items():
spine.set_visible(True)
# second axis
asset_list = np.asarray(PT['C'])
ax3 = ax.twinx()
ax3.set_ylim(ax.get_ylim())
ax3.set_yticks(np.arange(len(PT)))
ax3.set_yticklabels(asset_list, fontsize=80)
# colorbar
cbar_ax.tick_params(labelsize=80)
plt.tight_layout()
plt.show()
My solution was eventually move the colorbar to left side. This is the code and the output:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
PT = pd.DataFrame(np.random.randn(300, 3), columns=list('ABC'))
fig, (ax0, ax1) = plt.subplots(ncols=2, figsize=(10, len(PT) / 5.2), gridspec_kw={'width_ratios': [15, 15]})
sns.heatmap(PT.iloc[:, :-1], annot=False, cmap='RdYlGn', cbar_ax=ax0, ax=ax1)
for _, spine in ax1.spines.items():
spine.set_visible(True)
# second axis
asset_list = np.asarray(PT['C'])
ax3 = ax1.twinx()
ax3.set_ylim(ax1.get_ylim())
ax3.set_yticks(np.arange(len(PT)))
ax3.set_yticklabels(asset_list, fontsize=80)
# colorbar
ax0.tick_params(labelsize=80)
plt.tight_layout()
plt.show()

python adding title to individual axis in Seaborn

Below is the output (subplots) from my codes
What I want to do is -
on the first graph (left most) instead of putting "Category1 y axis / Category2 x axis" at the top I wanted Category1 along the y axis and Category2 along the x-axis at the bottom.
Also can I move xticklabels for all the graphs to the top?
codes
import matplotlib.pyplot as plt
import seaborn as sns
dataf = np.random.randint(1,10,9).reshape(3,3)
dataA = np.random.randint(1,10,9).reshape(3,3)
dataB = np.random.randint(1,10,9).reshape(3,3)
fig, (ax1, ax2, ax3) = plt.subplots(1,3)
ax1.set_title("Category1 y axis / Category2 x axis")
ax2.set_title("Category1 average value")
ax3.set_title("Category2 average value")
sns.heatmap(dataf, ax=ax1,annot=True)
sns.heatmap(dataA, ax=ax2,annot=True)
sns.heatmap(dataB, ax=ax3,annot=True)
plt.show()
To set the x- and y-labels, use set_xlabel() and set_ylabel().
To move the x-ticks, use labeltop=True in tick_params().
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
dataf = np.random.randint(1,10,9).reshape(3,3)
dataA = np.random.randint(1,10,9).reshape(3,3)
dataB = np.random.randint(1,10,9).reshape(3,3)
fig, (ax1, ax2, ax3) = plt.subplots(1,3)
sns.heatmap(dataf, ax=ax1, annot=True)
sns.heatmap(dataA, ax=ax2, annot=True)
sns.heatmap(dataB, ax=ax3, annot=True)
# set leftmost ylabel
ax1.set_ylabel('Category 1')
# set xlabels
ax1.set_xlabel('Category 2')
ax2.set_xlabel('Category 1 average value')
ax3.set_xlabel('Category 2 average value')
# move xticklabels to top
ax1.tick_params(which='major', labelbottom=False, labeltop=True)
ax2.tick_params(which='major', labelbottom=False, labeltop=True)
ax3.tick_params(which='major', labelbottom=False, labeltop=True)
ax1.set_ylabel('Category1')
ax1.set_xlabel('Category2')

How to properly plot a line over bars?

This one used to work fine, but somehow it stopped working (I must have changed something mistakenly but I can't find the issue).
I'm plotting a set of 3 bars per date, plus a line that shows the accumulated value of one of them. But only one or another (either the bars or the line) is properly being plotted. If I left the code for the bars last, only the bars are plotted. If I left the code for the line last, only the line is plotted.
fig, ax = plt.subplots(figsize = (15,8))
df.groupby("date")["result"].sum().cumsum().plot(
ax=ax,
marker='D',
lw=2,
color="purple")
df.groupby("date")[selected_columns].sum().plot(
ax=ax,
kind="bar",
color=["blue", "red", "gold"])
ax.legend(["LINE", "X", "Y", "Z"])
Appreciate the help!
Pandas draws bar plots with the x-axis as categorical, so internally numbered 0, 1, 2, ... and then setting the label. The line plot uses dates as x-axis. To combine them, both need to be categorical. The easiest way is to drop the index from the line plot. Make sure that the line plot is draw first, enabling the labels to be set correctly by the bar plot.
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
df = pd.DataFrame({'date': pd.date_range('20210101', periods=10),
'earnings': np.random.randint(100, 600, 10),
'costs': np.random.randint(0, 200, 10)})
df['result'] = df['earnings'] - df['costs']
fig, ax = plt.subplots(figsize=(15, 8))
df.groupby("date")["result"].sum().cumsum().reset_index(drop=True).plot(
ax=ax,
marker='D',
lw=2,
color="purple")
df.groupby("date")[['earnings', 'costs', 'result']].sum().plot(
ax=ax,
kind="bar",
rot=0,
width=0.8,
color=["blue", "red", "gold"])
ax.legend(['Cumul.result', 'earnings', 'costs', 'result'])
# shorten the tick labels to only the date
ax.set_xticklabels([tick.get_text()[:10] for tick in ax.get_xticklabels()])
ax.set_ylim(ymin=0) # bar plots are nicer when bars start at zero
plt.tight_layout()
plt.show()
Here I post the solution:
import matplotlib.pyplot as plt
import matplotlib.ticker as mtick
a=[11.3,222,22, 63.8,9]
b=[0.12,-1.0,1.82,16.67,6.67]
l=[i for i in range(5)]
plt.rcParams['font.sans-serif']=['SimHei']
fmt='%.1f%%'
yticks = mtick.FormatStrFormatter(fmt)
fig = plt.figure()
ax1 = fig.add_subplot(111)
ax1.plot(l, b,'og-',label=u'A')
ax1.yaxis.set_major_formatter(yticks)
for i,(_x,_y) in enumerate(zip(l,b)):
plt.text(_x,_y,b[i],color='black',fontsize=8,)
ax1.legend(loc=1)
ax1.set_ylim([-20, 30])
ax1.set_ylabel('ylabel')
plt.legend(prop={'family':'SimHei','size':8})
ax2 = ax1.twinx()
plt.bar(l,a,alpha=0.1,color='blue',label=u'label')
ax2.legend(loc=2)
plt.legend(prop={'family':'SimHei','size':8},loc="upper left")
plt.show()
The key to this is the command
ax2 = ax1.twinx()

Adjust y-axis in Seaborn multiplot

I'm plotting a CSV file from my simulation results. The plot has three graphs in the same figure fig, axes = plt.subplots(nrows=1, ncols=3, figsize=(24, 6)).
However, for comparison purposes I want the y-axis in all graphs starting at zero and the ending at a specific value. I tried the solution mentioned here from the Seaborn author. I don't get any errors, but the solution also does not work for me.
Here's my script:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
fname = 'results/filename.csv'
def plot_file():
fig, axes = plt.subplots(nrows=1, ncols=3, figsize=(24, 6))
df = pd.read_csv(fname, sep='\t')
profits = \
df.groupby(['providerId', 'periods'], as_index=False)['profits'].sum()
# y-axis needs to start at zero and end at 10
g = sns.lineplot(x='periods',
y='profits',
data=profits,
hue='providerId',
legend='full',
ax=axes[0])
# y-axis need to start at zero and end at one
g = sns.scatterplot(x='periods',
y='price',
hue='providerId',
style='providerId',
data=df,
legend=False,
ax=axes[1])
# y-axis need to start at zero and end at one
g = sns.scatterplot(x='periods',
y='quality',
hue='providerId',
style='providerId',
data=df,
legend=False,
ax=axes[2])
g.set(ylim=(0, None))
plt.show()
print(g) # -> AxesSubplot(0.672059,0.11;0.227941x0.77)
The resulting figure is as follows:
How can I adjust each individual plot?
Based on the way you've written your code, you can refer to each subplot axis with g.axis and use g.axis.set_ylim(low,high). (A difference compared to the linked answer is that your graphs are not being plotted on a seaborn FacetGrid.)
An example using dummy data and different axis ranges to illustrate:
df = pd.DataFrame(np.random.uniform(0,10,(100,2)), columns=['a','b'])
fig, axes = plt.subplots(nrows=1, ncols=3, figsize=(8,4))
g = sns.lineplot(x='a',
y='b',
data=df.sample(10),
ax=axes[0])
g.axes.set_ylim(0,25)
g = sns.scatterplot(x='a',
y='b',
data=df.sample(10),
ax=axes[1])
g.axes.set_ylim(0,3.5)
g = sns.scatterplot(x='a',
y='b',
data=df.sample(10),
ax=axes[2])
g.axes.set_ylim(0,0.3)
plt.tight_layout()
plt.show()

Share axes in matplotlib for only part of the subplots

I am having a big plot where I initiated with:
import numpy as np
import matplotlib.pyplot as plt
fig, axs = plt.subplots(5, 4)
And I want to do share-x-axis between column 1 and 2; and do the same between column 3 and 4. However, column 1 and 2 does not share the same axis with column 3 and 4.
I was wondering that would there be anyway to do this, and not sharex=True and sharey=True across all figures?
PS: This tutorial does not help too much, because it is only about sharing x/y within each row/column; they cannot do axis sharing between different rows/columns (unless share them across all axes).
I'm not exactly sure what you want to achieve from your question. However, you can specify per subplot which axis it should share with which subplot when adding a subplot to your figure.
This can be done via:
import matplotlib.pylab as plt
fig = plt.figure()
ax1 = fig.add_subplot(5, 4, 1)
ax2 = fig.add_subplot(5, 4, 2, sharex = ax1)
ax3 = fig.add_subplot(5, 4, 3, sharex = ax1, sharey = ax1)
A slightly limited but much simpler option is available for subplots. The limitation is there for a complete row or column of subplots.
For example, if one wants to have common y axis for all the subplots but common x axis only for individual columns in a 3x2 subplot, one could specify it as:
import matplotlib.pyplot as plt
fig, ax = plt.subplots(3, 2, sharey=True, sharex='col')
One can manually manage axes sharing using a Grouper object, which can be accessed via ax._shared_x_axes and ax._shared_y_axes. For example,
import matplotlib.pyplot as plt
def set_share_axes(axs, target=None, sharex=False, sharey=False):
if target is None:
target = axs.flat[0]
# Manage share using grouper objects
for ax in axs.flat:
if sharex:
target._shared_x_axes.join(target, ax)
if sharey:
target._shared_y_axes.join(target, ax)
# Turn off x tick labels and offset text for all but the bottom row
if sharex and axs.ndim > 1:
for ax in axs[:-1,:].flat:
ax.xaxis.set_tick_params(which='both', labelbottom=False, labeltop=False)
ax.xaxis.offsetText.set_visible(False)
# Turn off y tick labels and offset text for all but the left most column
if sharey and axs.ndim > 1:
for ax in axs[:,1:].flat:
ax.yaxis.set_tick_params(which='both', labelleft=False, labelright=False)
ax.yaxis.offsetText.set_visible(False)
fig, axs = plt.subplots(5, 4)
set_share_axes(axs[:,:2], sharex=True)
set_share_axes(axs[:,2:], sharex=True)
To adjust the spacing between subplots in a grouped manner, please refer to this question.
I used Axes.sharex /sharey in a similar setting
https://matplotlib.org/stable/api/_as_gen/matplotlib.axes.Axes.sharex.html#matplotlib.axes.Axes.sharex
import matplotlib.pyplot as plt
fig, axd = plt.subplot_mosaic([list(range(3))] +[['A']*3, ['B']*3])
axd[0].plot([0,0.2])
axd['A'].plot([1,2,3])
axd['B'].plot([1,2,3,4,5])
axd['B'].sharex(axd['A'])
for i in [1,2]:
axd[i].sharey(axd[0])
plt.show()

Categories

Resources