How to clean up x-axis values in matplotlib? - python

So I have the following code:
fig, ax = plt.subplots(dpi=220)
data.plot(kind='bar', y='p_1', ax=ax, color ='red')
data.plot(kind='bar', y='value_1', ax=ax, color ='blue')
ax.set_xlabel("Index values")
ax.set_ylabel("Value 1 / P_1")
#ax.legend(["Value 1, P_1"])
plt.title('Line plots')
plt.show()
Which returns the following graph:
As you can see the x-axis has some crazy stuff going on. I was wondering what went wrong and how to fix this?

Here's what you're looking for I think plt.xticks(positions, labels)
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
fig, ax = plt.subplots(dpi=220)
### used this to generate and test a new plot
data = pd.DataFrame(np.array(
[np.arange(50),
np.arange(50)]
).T).rename(columns={0: 'value_1', 1:'p_1'})
print(data)
data.plot(kind='bar', y='p_1', ax=ax, color ='red')
data.plot(kind='bar', y='value_1', ax=ax, color ='blue')
ax.set_xlabel("Index values")
ax.set_ylabel("Value 1 / P_1")
### added new code here
ticks = range(0, 50, 5)
labels = ticks
plt.xticks(ticks, labels)
#ax.legend(["Value 1, P_1"])
plt.title('Line plots')

plt.xticks(np.arange(0, len(value_1)+1, 5), np.arange(0, len(value_1)+1, 5) )
creates a tick every 5 intervals and corresponding label.

Related

ytick descriptions in seaborn

I'm trying to make a heatmap a heatmap with extensive y axis descriptions.
I would like to know if there is anyways to have a second and a third layer on the y tick labels.
fig, ax = plt.subplots(figsize=(20,25))
sns.set(style="darkgrid")
colName = [r'A', r'B', r'C', r'D', r'E']
colTitile = 'Test'
rowName = [r'a', r'b', r'c', r'd']
rowsName = [r'Vegetables', r'Fruits', r'Meats', r'Cheese',
r'Candy', r'Other']
rowTitile = 'Groups'
heatmapdata= np.arange(100).reshape(24,5)
sns.heatmap(heatmapdata,
cmap = 'turbo',
cbar = True,
vmin=0,
vmax=100,
ax=ax,
xticklabels = colName,
yticklabels = rowName)
for x in np.arange(0,len(ax.get_yticks()),4):
ax.axhline(x, color = 'white', lw=2)
Is there any way to do this? Which function should I use?
Thanks!
The labels for the rows can be set up in the graph settings, but other than that, I think the annotation function is the only way to handle this. the second level group names are set using the annotation function, and the coordinate criteria are set using the axis criteria. Axis labels are added using the text function with axis criteria.
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
fig, ax = plt.subplots(figsize=(10,10))
sns.set(style="darkgrid")
colName = [r'A', r'B', r'C', r'D', r'E']
colTitile = 'Test'
rowName = [r'a', r'b', r'c', r'd']
rowsName = [r'Vegetables', r'Fruits', r'Meats', r'Cheese',
r'Candy', r'Other']
rowTitle = 'Groups'
heatmapdata= np.arange(120).reshape(24,5)
sns.heatmap(heatmapdata,
cmap='turbo',
cbar=True,
vmin=0,
vmax=100,
ax=ax,
xticklabels=colName,
yticklabels=np.tile(rowName, 6))
for x in np.arange(0,ax.get_ylim()[0],4):
ax.axhline(x, color = 'white', lw=2)
for idx,g in enumerate(rowsName[::-1]):
ax.annotate(g, xy=(-100, idx*90+45), xycoords='axes points', size=14)
ax.text(x=-0.3, y=0.5, s=rowTitle, ha='center', transform=ax.transAxes, rotation=90, font=dict(size=16))
plt.show()

How to customize the location of color bar in Seaborn heatmap?

I have the following code to create a heatmap. However, it creates an overlap of the color bar and the right axis text. The text has no problems, I want it to be in that length.
How can I locate the colorbar on the right/left side of the heatmap with no overlap?
I tried with "pad" parameter in cbar_kws but it didn't help.enter image description here
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
PT=pd.DataFrame(np.random.randn(300,3), columns=list('ABC'))
miniPT=PT.iloc[:,:-1]
SMALL_SIZE = 8
MEDIUM_SIZE = 80
BIGGER_SIZE = 120
plt.rc('font', size=MEDIUM_SIZE) # controls default text sizes
plt.rc('axes', titlesize=MEDIUM_SIZE) # fontsize of the axes title
plt.rc('axes', labelsize=MEDIUM_SIZE) # fontsize of the x and y labels
plt.rc('xtick', labelsize=MEDIUM_SIZE) # fontsize of the tick labels
plt.rc('ytick', labelsize=SMALL_SIZE) # fontsize of the tick labels
plt.rc('legend', fontsize=MEDIUM_SIZE) # legend fontsize
plt.rc('figure', titlesize=BIGGER_SIZE) # fontsize of the figure title
plt.figure(figsize=(10, miniPT.shape[0]/5.2))
ax =sns.heatmap(miniPT, annot=False, cmap='RdYlGn')
for _, spine in ax.spines.items():
spine.set_visible(True)
# second axis
asset_list=np.asarray(PT['C'])
asset_list=asset_list[::-1]
ax3 = ax.twinx()
ax3.set_ylim([0,ax.get_ylim()[1]])
ax3.set_yticks(ax.get_yticks())
ax3.set_yticklabels(asset_list, fontsize=MEDIUM_SIZE*0.6)
# colorbar
cbar = ax.collections[0].colorbar
cbar.ax.tick_params(labelsize=MEDIUM_SIZE)
One way to get the overlap automatically adjusted by matplotlib, is to explicitly create subplots: one for the heatmap and another for the colorbar. sns.heatmap's cbar_ax= parameter can be set to point to this subplot. gridspec_kws= is needed to set the relative sizes. At the end, plt.tight_layout() will adjust all the paddings to make everything fit nicely.
The question's code contains some strange settings (e.g. a fontsize of 80 is immense). Also, 300 rows will inevitably lead to overlapping text (the fontsize needs to be so small that non-overlapping text wouldn't be readable). Here is some more simplified example code:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
PT = pd.DataFrame(np.random.randn(100, 3), columns=list('ABC'))
fig, (ax, cbar_ax) = plt.subplots(ncols=2, figsize=(10, len(PT) / 5.2), gridspec_kw={'width_ratios': [10, 1]})
sns.heatmap(PT.iloc[:, :-1], annot=False, cmap='RdYlGn', cbar_ax=cbar_ax, ax=ax)
for _, spine in ax.spines.items():
spine.set_visible(True)
# second axis
asset_list = np.asarray(PT['C'])
ax3 = ax.twinx()
ax3.set_ylim(ax.get_ylim())
ax3.set_yticks(np.arange(len(PT)))
ax3.set_yticklabels(asset_list, fontsize=80)
# colorbar
cbar_ax.tick_params(labelsize=80)
plt.tight_layout()
plt.show()
As the plot is quite large, here only the bottom part is pasted, with a link to the full plot.
This is how it would look like with:
fontsize 80 (Note that font sizes are measured in "points per inch", standard 72 points per inch);
figure width of 20 inches (instead of 10);
300 rows
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
PT = pd.DataFrame(np.random.randn(300, 3), columns=list('ABC'))
fig, (ax, cbar_ax) = plt.subplots(ncols=2, figsize=(20, len(PT) / 5.2), gridspec_kw={'width_ratios': [15, 1]})
sns.heatmap(PT.iloc[:, :-1], annot=False, cmap='RdYlGn', cbar_ax=cbar_ax, ax=ax)
for _, spine in ax.spines.items():
spine.set_visible(True)
# second axis
asset_list = np.asarray(PT['C'])
ax3 = ax.twinx()
ax3.set_ylim(ax.get_ylim())
ax3.set_yticks(np.arange(len(PT)))
ax3.set_yticklabels(asset_list, fontsize=80)
# colorbar
cbar_ax.tick_params(labelsize=80)
plt.tight_layout()
plt.show()
My solution was eventually move the colorbar to left side. This is the code and the output:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
PT = pd.DataFrame(np.random.randn(300, 3), columns=list('ABC'))
fig, (ax0, ax1) = plt.subplots(ncols=2, figsize=(10, len(PT) / 5.2), gridspec_kw={'width_ratios': [15, 15]})
sns.heatmap(PT.iloc[:, :-1], annot=False, cmap='RdYlGn', cbar_ax=ax0, ax=ax1)
for _, spine in ax1.spines.items():
spine.set_visible(True)
# second axis
asset_list = np.asarray(PT['C'])
ax3 = ax1.twinx()
ax3.set_ylim(ax1.get_ylim())
ax3.set_yticks(np.arange(len(PT)))
ax3.set_yticklabels(asset_list, fontsize=80)
# colorbar
ax0.tick_params(labelsize=80)
plt.tight_layout()
plt.show()

How to properly plot a line over bars?

This one used to work fine, but somehow it stopped working (I must have changed something mistakenly but I can't find the issue).
I'm plotting a set of 3 bars per date, plus a line that shows the accumulated value of one of them. But only one or another (either the bars or the line) is properly being plotted. If I left the code for the bars last, only the bars are plotted. If I left the code for the line last, only the line is plotted.
fig, ax = plt.subplots(figsize = (15,8))
df.groupby("date")["result"].sum().cumsum().plot(
ax=ax,
marker='D',
lw=2,
color="purple")
df.groupby("date")[selected_columns].sum().plot(
ax=ax,
kind="bar",
color=["blue", "red", "gold"])
ax.legend(["LINE", "X", "Y", "Z"])
Appreciate the help!
Pandas draws bar plots with the x-axis as categorical, so internally numbered 0, 1, 2, ... and then setting the label. The line plot uses dates as x-axis. To combine them, both need to be categorical. The easiest way is to drop the index from the line plot. Make sure that the line plot is draw first, enabling the labels to be set correctly by the bar plot.
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
df = pd.DataFrame({'date': pd.date_range('20210101', periods=10),
'earnings': np.random.randint(100, 600, 10),
'costs': np.random.randint(0, 200, 10)})
df['result'] = df['earnings'] - df['costs']
fig, ax = plt.subplots(figsize=(15, 8))
df.groupby("date")["result"].sum().cumsum().reset_index(drop=True).plot(
ax=ax,
marker='D',
lw=2,
color="purple")
df.groupby("date")[['earnings', 'costs', 'result']].sum().plot(
ax=ax,
kind="bar",
rot=0,
width=0.8,
color=["blue", "red", "gold"])
ax.legend(['Cumul.result', 'earnings', 'costs', 'result'])
# shorten the tick labels to only the date
ax.set_xticklabels([tick.get_text()[:10] for tick in ax.get_xticklabels()])
ax.set_ylim(ymin=0) # bar plots are nicer when bars start at zero
plt.tight_layout()
plt.show()
Here I post the solution:
import matplotlib.pyplot as plt
import matplotlib.ticker as mtick
a=[11.3,222,22, 63.8,9]
b=[0.12,-1.0,1.82,16.67,6.67]
l=[i for i in range(5)]
plt.rcParams['font.sans-serif']=['SimHei']
fmt='%.1f%%'
yticks = mtick.FormatStrFormatter(fmt)
fig = plt.figure()
ax1 = fig.add_subplot(111)
ax1.plot(l, b,'og-',label=u'A')
ax1.yaxis.set_major_formatter(yticks)
for i,(_x,_y) in enumerate(zip(l,b)):
plt.text(_x,_y,b[i],color='black',fontsize=8,)
ax1.legend(loc=1)
ax1.set_ylim([-20, 30])
ax1.set_ylabel('ylabel')
plt.legend(prop={'family':'SimHei','size':8})
ax2 = ax1.twinx()
plt.bar(l,a,alpha=0.1,color='blue',label=u'label')
ax2.legend(loc=2)
plt.legend(prop={'family':'SimHei','size':8},loc="upper left")
plt.show()
The key to this is the command
ax2 = ax1.twinx()

Add multiples graphs in one figure

I'm learning Python using Jupiter and I'm struggling trying to put the graphs into one figure. Here's what I have so far...
Code for my graphs(I have three of graphs, they only differ in color and lines vs. dot):
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
one = plt.figure()
plt.plot(x_v, y_v, '#008000') #change color using hex strings
plt.xlabel('x')
plt.ylabel('y')
plt.show()
two = plt.figure()
plt.plot(x_v, y_v, linestyle='none', marker='o', markersize=0.5)
plt.show()
three = plt.figure()
plt.plot(x_v, y_v, linestyle='none', marker='o', markersize=0.5, color = 'yellow')
plt.show()
Here's code that I have so far to make it one figure... I was wondering If I should should put it in a np.arange and plot it, but I can't seem to get it to work....
def f(x):
return one
def g(x):
return two
def h(x):
return three
If anyone can help, it'll be of great use! Thank you!
You can use plt.subplots:
fig, (ax1, ax2, ax3) = plt.subplots(figsize=(15, 5), ncols=3)
ax1.plot(x_v, y_v, '#008000')
ax1.set_xlabel('x')
ax1.set_ylabel('y')
ax2.plot(x_v, y_v, linestyle='none', marker='o', markersize=0.5)
ax3.plot(x_v, y_v, linestyle='none', marker='o', markersize=0.5, color = 'yellow')
Here is one way to approach multiple plots with plt.subplots. I think it is very easy to follow and also gives a lot of control over individual plots:
import numpy as np
import matplotlib.pyplot as plt
#generating test data
x = np.arange(0,9)
y = np.arange(1,10)
#defining figure layout (i.e. rows, columns, size, horizontal and vertical space between subplots
fig,ax = plt.subplots(nrows=2,ncols=2,figsize=(15,7))
plt.subplots_adjust(hspace=0.4,wspace=0.2)
#first subplot (numbering can be read as 1st plot in a grid of 2x2)
plt.subplot(2,2,1)
plt.plot(x,y)
#second subplot in a grid of 2x2
plt.subplot(2,2,2)
plt.plot(x,y,ls='--')
#third subplot in a grid of 2x2
plt.subplot(2,2,3)
plt.scatter(x,y)
#fourth subplot in a grid of 2x2
plt.subplot(2,2,4)
plt.plot(x,y)
plt.tight_layout()
plt.show()
Output:

Seaborn scatterplot legend showing true values and normalized continuous color

I have a dataframe that I'd like to use to build a scatterplot where different points have different colors:
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd
dat=pd.DataFrame(np.random.rand(20, 2), columns=['x','y'])
dat['c']=np.random.randint(0,100,20)
dat['c_norm']=(dat['c']-dat['c'].min())/(dat['c'].max()-dat['c'].min())
dat['group']=np.append(np.repeat('high',10), np.repeat('low',10))
As you can see, the column c_norm shows the c column has been normalized between 0 and 1. I would like to show a continuous legend whose color range reflect the normalized values, but labeled using the original c values as label. Say, the minimum (1), the maximum (86), and the median (49). I also want to have differing markers depending on group.
So far I was able to do this:
fig = plt.figure(figsize = (8,8))
ax = fig.add_subplot(1,1,1)
for row in dat.index:
if(dat.loc[row,'group']=='low'):
i_marker='.'
else:
i_marker='x'
ax.scatter(
x=dat.loc[row,'x'],
y=dat.loc[row,'y'],
s=50, alpha=0.5,
marker=i_marker
)
ax.legend(dat['c_norm'], loc='center right', bbox_to_anchor=(1.5, 0.5), ncol=1)
Questions:
- How to generate a continuous legend based on the values?
- How to adapt its ticks to show the original ticks in c, or at least a min, max, and mean or median?
Thanks in advance
Partial answer. Do you actually need to determine your marker colors based on the normed values? See the output of the snippet below.
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
dat = pd.DataFrame(np.random.rand(20, 2), columns=['x', 'y'])
dat['c'] = np.random.randint(0, 100, 20)
dat['c_norm'] = (dat['c'] - dat['c'].min()) / (dat['c'].max() - dat['c'].min())
dat['group'] = np.append(np.repeat('high', 10), np.repeat('low', 10))
fig, (ax, bx) = plt.subplots(nrows=1, ncols=2, num=0, figsize=(16, 8))
mask = dat['group'] == 'low'
scat = ax.scatter(dat['x'][mask], dat['y'][mask], s=50, c=dat['c'][mask],
marker='s', vmin=np.amin(dat['c']), vmax=np.amax(dat['c']),
cmap='plasma')
ax.scatter(dat['x'][~mask], dat['y'][~mask], s=50, c=dat['c'][~mask],
marker='X', vmin=np.amin(dat['c']), vmax=np.amax(dat['c']),
cmap='plasma')
cbar = fig.colorbar(scat, ax=ax)
scat = bx.scatter(dat['x'][mask], dat['y'][mask], s=50, c=dat['c_norm'][mask],
marker='s', vmin=np.amin(dat['c_norm']),
vmax=np.amax(dat['c_norm']), cmap='plasma')
bx.scatter(dat['x'][~mask], dat['y'][~mask], s=50, c=dat['c_norm'][~mask],
marker='X', vmin=np.amin(dat['c_norm']),
vmax=np.amax(dat['c_norm']), cmap='plasma')
cbar2 = fig.colorbar(scat, ax=bx)
plt.show()
You could definitely modify the second colorbar so that it matches the first one, but is that necessary?

Categories

Resources