How to add legend on Seaborn facetgrid bar plot - python

I have the following code:
import numpy as np
import pandas as pd
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt
matplotlib.style.use('ggplot')
import seaborn as sns
sns.set(style="white")
# Create a dataset with many short random walks
rs = np.random.RandomState(4)
pos = rs.randint(-1, 2, (10, 5)).cumsum(axis=1)
pos -= pos[:, 0, np.newaxis]
step = np.tile(range(5), 10)
walk = np.repeat(range(10), 5)
df = pd.DataFrame(np.c_[pos.flat, step, walk],
columns=["position", "step", "walk"])
# Initialize a grid of plots with an Axes for each walk
grid = sns.FacetGrid(df, col="walk", hue="walk", col_wrap=5, size=5,
aspect=1)
# Draw a bar plot to show the trajectory of each random walk
grid.map(sns.barplot, "step", "position", palette="Set3").add_legend();
grid.savefig("/Users/mymacmini/Desktop/test_fig.png")
#sns.plt.show()
Which makes this plot:
As you can see I get the legend wrong. How can I make it right?

Some how there is one legend item for each of the subplot. Looks like if we want to have legend corresponds to the bars in each of the subplot, we have to manually make them.
# Let's just make a 1-by-2 plot
df = df.head(10)
# Initialize a grid of plots with an Axes for each walk
grid = sns.FacetGrid(df, col="walk", hue="walk", col_wrap=2, size=5,
aspect=1)
# Draw a bar plot to show the trajectory of each random walk
bp = grid.map(sns.barplot, "step", "position", palette="Set3")
# The color cycles are going to all the same, doesn't matter which axes we use
Ax = bp.axes[0]
# Some how for a plot of 5 bars, there are 6 patches, what is the 6th one?
Boxes = [item for item in Ax.get_children()
if isinstance(item, matplotlib.patches.Rectangle)][:-1]
# There is no labels, need to define the labels
legend_labels = ['a', 'b', 'c', 'd', 'e']
# Create the legend patches
legend_patches = [matplotlib.patches.Patch(color=C, label=L) for
C, L in zip([item.get_facecolor() for item in Boxes],
legend_labels)]
# Plot the legend
plt.legend(handles=legend_patches)

When the legend doesn't work out you can always make your own easily like this:
import matplotlib
name_to_color = {
'Expected': 'green',
'Provided': 'red',
'Difference': 'blue',
}
patches = [matplotlib.patches.Patch(color=v, label=k) for k,v in name_to_color.items()]
matplotlib.pyplot.legend(handles=patches)

Related

Legend in matplotlib shows first entry of a list only

I'm trying to display a custom legend for a bar graph, but it is only displaying the first legend in the legend list. How can I display all the values in the legend?
df.time_to_travel_grouping.value_counts().plot(kind="bar",
color = ["b","tab:green","tab:red","c","m","y","tab:blue","tab:orange"],
xlabel="TTT", ylabel="Total Counts",
title="Fig4: Total Counts by Time to Travel Category (TTT)", figsize=(20,15))
plt.legend(["a","b","c","d","e","f","g","h"])
plt.subplots_adjust(bottom=0.15)
plt.subplots_adjust(left=0.15)
Let's get the patches handles from the axes using ax.get_legend_handles_labels:
s = pd.Series(np.arange(100,50,-5), index=[*'abcdefghij'])
ax = s.plot(kind="bar",
color = ["b","tab:green","tab:red","c","m","y","tab:blue","tab:orange"],
xlabel="TTT", ylabel="Total Counts",
title="Fig4: Total Counts by Time to Travel Category (TTT)", figsize=(20,15))
patches, _ = ax.get_legend_handles_labels()
labels = [*'abcdefghij']
ax.legend(*patches, labels, loc='best')
plt.subplots_adjust(bottom=0.15)
plt.subplots_adjust(left=0.15)
Output:
To create an automatic legend, matplotlib stores labels for graphical elements. In the case of this bar plot, the complete 'container' pandas assigns one label to the complete 'container'.
You could remove the label of the container (assigning a label starting with _), and assign individual labels to the bars. The xtick labels can be used, as they are already in the desired order.
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
df = pd.DataFrame({'time_to_travel_grouping': np.random.choice([*'abcdefgh'], 200)})
ax = df.time_to_travel_grouping.value_counts().plot(kind="bar",
color=["b", "tab:green", "tab:red", "c", "m", "y", "tab:blue", "tab:orange"],
xlabel="TTT", ylabel="Total Counts",
title="Fig4: Total Counts by Time to Travel Category (TTT)",
figsize=(20, 15))
ax.containers[0].set_label('_nolegend')
for bar, tick_label in zip(ax.containers[0], ax.get_xticklabels()):
bar.set_label(tick_label.get_text())
ax.legend()
plt.tight_layout()
plt.show()
With a little bit less internal manipulation, something similar can be obtained via seaborn:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import seaborn as sns
df = pd.DataFrame({'time_to_travel_grouping': np.random.choice([*'abcdefgh'], 200)})
plt.figure(figsize=(20, 15))
ax = sns.countplot(data=df, x='time_to_travel_grouping', hue='time_to_travel_grouping',
palette=["b", "tab:green", "tab:red", "c", "m", "y", "tab:blue", "tab:orange"],
order=df.time_to_travel_grouping.value_counts().index,
dodge=False)
plt.setp(ax, xlabel="TTT", ylabel="Total Counts", title="Fig4: Total Counts by Time to Travel Category (TTT)")
plt.tight_layout()
plt.show()
Just putting the strings in legend function does not work as you expected in matplotlib. So, for adding all desired legends to the plot, you can make the patch objects from them with colors and add by this way. This piece of code will do the job and I think more generalized than the other solutions:
## include this library
import matplotlib.patches as mpatches
## desired legends
legend_list = ["a","b","c","d","e","f","g","h"]
## corresponding colors in the same order
color_list = ["b","tab:green","tab:red","c","m","y","tab:blue","tab:orange"]
## make patches from the legends and corresponding colors
patch_list = []
i = 0
for each_legend in legend_list:
patch_list.append(mpatches.Patch(label=each_legend, color=color_list[i]))
i += 1
## add made patches to the plot
plt.legend(handles=patch_list, fontsize=12, loc=(1, 0))

Heatmap with circles indicating size of population

I would like to produce a heatmap in Python, similar to the one shown, where the size of the circle indicates the size of the sample in that cell. I looked in seaborn's gallery and couldn't find anything, and I don't think I can do this with matplotlib.
It's the inverse. While matplotlib can do pretty much everything, seaborn only provides a small subset of options.
So using matplotlib, you can plot a PatchCollection of circles as shown below.
Note: You could equally use a scatter plot, but since scatter dot sizes are in absolute units it would be rather hard to scale them into the grid.
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.collections import PatchCollection
N = 10
M = 11
ylabels = ["".join(np.random.choice(list("PQRSTUVXYZ"), size=7)) for _ in range(N)]
xlabels = ["".join(np.random.choice(list("ABCDE"), size=3)) for _ in range(M)]
x, y = np.meshgrid(np.arange(M), np.arange(N))
s = np.random.randint(0, 180, size=(N,M))
c = np.random.rand(N, M)-0.5
fig, ax = plt.subplots()
R = s/s.max()/2
circles = [plt.Circle((j,i), radius=r) for r, j, i in zip(R.flat, x.flat, y.flat)]
col = PatchCollection(circles, array=c.flatten(), cmap="RdYlGn")
ax.add_collection(col)
ax.set(xticks=np.arange(M), yticks=np.arange(N),
xticklabels=xlabels, yticklabels=ylabels)
ax.set_xticks(np.arange(M+1)-0.5, minor=True)
ax.set_yticks(np.arange(N+1)-0.5, minor=True)
ax.grid(which='minor')
fig.colorbar(col)
plt.show()
Here's a possible solution using Bokeh Plots:
import pandas as pd
from bokeh.palettes import RdBu
from bokeh.models import LinearColorMapper, ColumnDataSource, ColorBar
from bokeh.models.ranges import FactorRange
from bokeh.plotting import figure, show
from bokeh.io import output_notebook
import numpy as np
output_notebook()
d = dict(x = ['A','A','A', 'B','B','B','C','C','C','D','D','D'],
y = ['B','C','D', 'A','C','D','B','D','A','A','B','C'],
corr = np.random.uniform(low=-1, high=1, size=(12,)).tolist())
df = pd.DataFrame(d)
df['size'] = np.where(df['corr']<0, np.abs(df['corr']), df['corr'])*50
#added a new column to make the plot size
colors = list(reversed(RdBu[9]))
exp_cmap = LinearColorMapper(palette=colors,
low = -1,
high = 1)
p = figure(x_range = FactorRange(), y_range = FactorRange(), plot_width=700,
plot_height=450, title="Correlation",
toolbar_location=None, tools="hover")
p.scatter("x","y",source=df, fill_alpha=1, line_width=0, size="size",
fill_color={"field":"corr", "transform":exp_cmap})
p.x_range.factors = sorted(df['x'].unique().tolist())
p.y_range.factors = sorted(df['y'].unique().tolist(), reverse = True)
p.xaxis.axis_label = 'Values'
p.yaxis.axis_label = 'Values'
bar = ColorBar(color_mapper=exp_cmap, location=(0,0))
p.add_layout(bar, "right")
show(p)
One option is to use matplotlib's scatter plots with legends and grid. You can specify size of those circles with specifying the scales. You can also change the color of each circle. You should somehow specify X,Y values so that the circles sit straight on lines. This is an example I got from here:
volume = np.random.rayleigh(27, size=40)
amount = np.random.poisson(10, size=40)
ranking = np.random.normal(size=40)
price = np.random.uniform(1, 10, size=40)
fig, ax = plt.subplots()
# Because the price is much too small when being provided as size for ``s``,
# we normalize it to some useful point sizes, s=0.3*(price*3)**2
scatter = ax.scatter(volume, amount, c=ranking, s=0.3*(price*3)**2,
vmin=-3, vmax=3, cmap="Spectral")
# Produce a legend for the ranking (colors). Even though there are 40 different
# rankings, we only want to show 5 of them in the legend.
legend1 = ax.legend(*scatter.legend_elements(num=5),
loc="upper left", title="Ranking")
ax.add_artist(legend1)
# Produce a legend for the price (sizes). Because we want to show the prices
# in dollars, we use the *func* argument to supply the inverse of the function
# used to calculate the sizes from above. The *fmt* ensures to show the price
# in dollars. Note how we target at 5 elements here, but obtain only 4 in the
# created legend due to the automatic round prices that are chosen for us.
kw = dict(prop="sizes", num=5, color=scatter.cmap(0.7), fmt="$ {x:.2f}",
func=lambda s: np.sqrt(s/.3)/3)
legend2 = ax.legend(*scatter.legend_elements(**kw),
loc="lower right", title="Price")
plt.show()
Output:
I don't have enough reputation to comment on Delenges' excellent answer, so I'll leave my comment as an answer instead:
R.flat doesn't order the way we need it to, so the circles assignment should be:
circles = [plt.Circle((j,i), radius=R[j][i]) for j, i in zip(x.flat, y.flat)]
Here is an easy example to plot circle_heatmap.
from matplotlib import pyplot as plt
import pandas as pd
from sklearn.datasets import load_wine as load_data
from psynlig import plot_correlation_heatmap
plt.style.use('seaborn-talk')
data_set = load_data()
data = pd.DataFrame(data_set['data'], columns=data_set['feature_names'])
#data = df_corr_selected
kwargs = {
'heatmap': {
'vmin': -1,
'vmax': 1,
'cmap': 'viridis',
},
'figure': {
'figsize': (14, 10),
},
}
plot_correlation_heatmap(data, bubble=True, annotate=False, **kwargs)
plt.show()

Add Legend to Seaborn point plot

I am plotting multiple dataframes as point plot using seaborn. Also I am plotting all the dataframes on the same axis.
How would I add legend to the plot ?
My code takes each of the dataframe and plots it one after another on the same figure.
Each dataframe has same columns
date count
2017-01-01 35
2017-01-02 43
2017-01-03 12
2017-01-04 27
My code :
f, ax = plt.subplots(1, 1, figsize=figsize)
x_col='date'
y_col = 'count'
sns.pointplot(ax=ax,x=x_col,y=y_col,data=df_1,color='blue')
sns.pointplot(ax=ax,x=x_col,y=y_col,data=df_2,color='green')
sns.pointplot(ax=ax,x=x_col,y=y_col,data=df_3,color='red')
This plots 3 lines on the same plot. However the legend is missing. The documentation does not accept label argument .
One workaround that worked was creating a new dataframe and using hue argument.
df_1['region'] = 'A'
df_2['region'] = 'B'
df_3['region'] = 'C'
df = pd.concat([df_1,df_2,df_3])
sns.pointplot(ax=ax,x=x_col,y=y_col,data=df,hue='region')
But I would like to know if there is a way to create a legend for the code that first adds sequentially point plot to the figure and then add a legend.
Sample output :
I would suggest not to use seaborn pointplot for plotting. This makes things unnecessarily complicated.
Instead use matplotlib plot_date. This allows to set labels to the plots and have them automatically put into a legend with ax.legend().
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
import numpy as np
date = pd.date_range("2017-03", freq="M", periods=15)
count = np.random.rand(15,4)
df1 = pd.DataFrame({"date":date, "count" : count[:,0]})
df2 = pd.DataFrame({"date":date, "count" : count[:,1]+0.7})
df3 = pd.DataFrame({"date":date, "count" : count[:,2]+2})
f, ax = plt.subplots(1, 1)
x_col='date'
y_col = 'count'
ax.plot_date(df1.date, df1["count"], color="blue", label="A", linestyle="-")
ax.plot_date(df2.date, df2["count"], color="red", label="B", linestyle="-")
ax.plot_date(df3.date, df3["count"], color="green", label="C", linestyle="-")
ax.legend()
plt.gcf().autofmt_xdate()
plt.show()
In case one is still interested in obtaining the legend for pointplots, here a way to go:
sns.pointplot(ax=ax,x=x_col,y=y_col,data=df1,color='blue')
sns.pointplot(ax=ax,x=x_col,y=y_col,data=df2,color='green')
sns.pointplot(ax=ax,x=x_col,y=y_col,data=df3,color='red')
ax.legend(handles=ax.lines[::len(df1)+1], labels=["A","B","C"])
ax.set_xticklabels([t.get_text().split("T")[0] for t in ax.get_xticklabels()])
plt.gcf().autofmt_xdate()
plt.show()
Old question, but there's an easier way.
sns.pointplot(x=x_col,y=y_col,data=df_1,color='blue')
sns.pointplot(x=x_col,y=y_col,data=df_2,color='green')
sns.pointplot(x=x_col,y=y_col,data=df_3,color='red')
plt.legend(labels=['legendEntry1', 'legendEntry2', 'legendEntry3'])
This lets you add the plots sequentially, and not have to worry about any of the matplotlib crap besides defining the legend items.
I tried using Adam B's answer, however, it didn't work for me. Instead, I found the following workaround for adding legends to pointplots.
import matplotlib.patches as mpatches
red_patch = mpatches.Patch(color='#bb3f3f', label='Label1')
black_patch = mpatches.Patch(color='#000000', label='Label2')
In the pointplots, the color can be specified as mentioned in previous answers. Once these patches corresponding to the different plots are set up,
plt.legend(handles=[red_patch, black_patch])
And the legend ought to appear in the pointplot.
This goes a bit beyond the original question, but also builds on #PSub's response to something more general---I do know some of this is easier in Matplotlib directly, but many of the default styling options for Seaborn are quite nice, so I wanted to work out how you could have more than one legend for a point plot (or other Seaborn plot) without dropping into Matplotlib right at the start.
Here's one solution:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
# We will need to access some of these matplotlib classes directly
from matplotlib.lines import Line2D # For points and lines
from matplotlib.patches import Patch # For KDE and other plots
from matplotlib.legend import Legend
from matplotlib import cm
# Initialise random number generator
rng = np.random.default_rng(seed=42)
# Generate sample of 25 numbers
n = 25
clusters = []
for c in range(0,3):
# Crude way to get different distributions
# for each cluster
p = rng.integers(low=1, high=6, size=4)
df = pd.DataFrame({
'x': rng.normal(p[0], p[1], n),
'y': rng.normal(p[2], p[3], n),
'name': f"Cluster {c+1}"
})
clusters.append(df)
# Flatten to a single data frame
clusters = pd.concat(clusters)
# Now do the same for data to feed into
# the second (scatter) plot...
n = 8
points = []
for c in range(0,2):
p = rng.integers(low=1, high=6, size=4)
df = pd.DataFrame({
'x': rng.normal(p[0], p[1], n),
'y': rng.normal(p[2], p[3], n),
'name': f"Group {c+1}"
})
points.append(df)
points = pd.concat(points)
# And create the figure
f, ax = plt.subplots(figsize=(8,8))
# The KDE-plot generates a Legend 'as usual'
k = sns.kdeplot(
data=clusters,
x='x', y='y',
hue='name',
shade=True,
thresh=0.05,
n_levels=2,
alpha=0.2,
ax=ax,
)
# Notice that we access this legend via the
# axis to turn off the frame, set the title,
# and adjust the patch alpha level so that
# it closely matches the alpha of the KDE-plot
ax.get_legend().set_frame_on(False)
ax.get_legend().set_title("Clusters")
for lh in ax.get_legend().get_patches():
lh.set_alpha(0.2)
# You would probably want to sort your data
# frame or set the hue and style order in order
# to ensure consistency for your own application
# but this works for demonstration purposes
groups = points.name.unique()
markers = ['o', 'v', 's', 'X', 'D', '<', '>']
colors = cm.get_cmap('Dark2').colors
# Generate the scatterplot: notice that Legend is
# off (otherwise this legend would overwrite the
# first one) and that we're setting the hue, style,
# markers, and palette using the 'name' parameter
# from the data frame and the number of groups in
# the data.
p = sns.scatterplot(
data=points,
x="x",
y="y",
hue='name',
style='name',
markers=markers[:len(groups)],
palette=colors[:len(groups)],
legend=False,
s=30,
alpha=1.0
)
# Here's the 'magic' -- we use zip to link together
# the group name, the color, and the marker style. You
# *cannot* retreive the marker style from the scatterplot
# since that information is lost when rendered as a
# PathCollection (as far as I can tell). Anyway, this allows
# us to loop over each group in the second data frame and
# generate a 'fake' Line2D plot (with zero elements and no
# line-width in our case) that we can add to the legend. If
# you were overlaying a line plot or a second plot that uses
# patches you'd have to tweak this accordingly.
patches = []
for x in zip(groups, colors[:len(groups)], markers[:len(groups)]):
patches.append(Line2D([0],[0], linewidth=0.0, linestyle='',
color=x[1], markerfacecolor=x[1],
marker=x[2], label=x[0], alpha=1.0))
# And add these patches (with their group labels) to the new
# legend item and place it on the plot.
leg = Legend(ax, patches, labels=groups,
loc='upper left', frameon=False, title='Groups')
ax.add_artist(leg);
# Done
plt.show();
Here's the output:

Custom legend for Seaborn regplot (Python 3)

I've been trying to follow this How to make custom legend in matplotlib SO question but I think a few things are getting lost in translation. I used a custom color mapping for the different classes of points in my plot and I want to be able to put a table with those color-label pairs. I stored the info in a dictionary D_color_label and then made 2 parallel lists colors and labels. I tried using it in the ax.legend but it didn't seem to work.
np.random.seed(0)
# Create dataframe
DF_0 = pd.DataFrame(np.random.random((100,2)), columns=["x","y"])
# Label to colors
D_idx_color = {**dict(zip(range(0,25), ["#91FF61"]*25)),
**dict(zip(range(25,50), ["#BA61FF"]*25)),
**dict(zip(range(50,75), ["#916F61"]*25)),
**dict(zip(range(75,100), ["#BAF1FF"]*25))}
D_color_label = {"#91FF61":"label_0",
"#BA61FF":"label_1",
"#916F61":"label_2",
"#BAF1FF":"label_3"}
# Add color column
DF_0["color"] = pd.Series(list(D_idx_color.values()), index=list(D_idx_color.keys()))
# Plot
fig, ax = plt.subplots(figsize=(8,8))
sns.regplot(data=DF_0, x="x", y="y", scatter_kws={"c":DF_0["color"]}, ax=ax)
# Add custom legend
colors = list(set(DF_0["color"]))
labels = [D_color_label[x] for x in set(DF_0["color"])]
# If I do this, I get the following error:
# ax.legend(colors, labels)
# UserWarning: Legend does not support '#BA61FF' instances.
# A proxy artist may be used instead.
According to http://matplotlib.org/users/legend_guide.html you have to put to legend function artists which will be labeled. To use scatter_plot individually you have to group by your data by color and plot every data of one color individually to set its own label for every artist:
import pandas as pd
import numpy as np
import matplotlib.pylab as plt
import seaborn as sns
np.random.seed(0)
# Create dataframe
DF_0 = pd.DataFrame(np.random.random((100, 2)), columns=["x", "y"])
DF_0['color'] = ["#91FF61"]*25 + ["#BA61FF"]*25 + ["#91FF61"]*25 + ["#BA61FF"]*25
#print DF_0
D_color_label = {"#91FF61": "label_0", "#BA61FF": "label_1",
"#916F61": "label_2", "#BAF1FF": "label_3"}
colors = list(DF_0["color"].uniqe())
labels = [D_color_label[x] for x in DF_0["color"].unique()]
ax = sns.regplot(data=DF_0, x="x", y="y", scatter_kws={'c': DF_0['color'], 'zorder':1})
# Make a legend
# groupby and plot points of one color
for i, grp in DF_0.groupby(['color']):
grp.plot(kind='scatter', x='x', y='y', c=i, ax=ax, label=labels[i+1], zorder=0)
ax.legend(loc=2)
plt.show()

Plotting legend with correct labels python

I have a data frame and I want to plot a legend with 'A', 'B', and 'C' however, what I have only produced a legend with an 'A' label:
data = {'A1_mean': [0.457, 1],
'A2_median': [0.391,1],
'A3_range': [0.645,1],
'A4_std': [0.111,1],
'B1_mean': [0.132,3],
'B2_median': [0.10,3],
'B3_range': [0.244,3],
'B4_std': [0.297,3],
'C1_mean': [0.286,2],
'C2_median': [0.231,2],
'C3_range': [0.554,2],
'C4_std': [0.147,2]}
df = pd.DataFrame(data).T
color = {1:'red',2:'green',3:'blue'}
ax=df[0].plot(kind='bar',color=df[1].map(color).tolist())
ax.legend(['A','B','C'])
gives:
How can I change this so that I have a legend with A B and C, with the appropriate color (A:red, B:blue, C:green) ?
Per the Legend guide you could place Proxy Artists in the legend:
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
data = {'A1_mean': [0.457, 1],
'A2_median': [0.391,1],
'A3_range': [0.645,1],
'A4_std': [0.111,1],
'B1_mean': [0.132,3],
'B2_median': [0.10,3],
'B3_range': [0.244,3],
'B4_std': [0.297,3],
'C1_mean': [0.286,2],
'C2_median': [0.231,2],
'C3_range': [0.554,2],
'C4_std': [0.147,2]}
df = pd.DataFrame(data).T
color = {1:'red',2:'green',3:'blue'}
labels = ['A','C','B']
fig, ax = plt.subplots()
df[0].plot(ax=ax, kind='bar', color=df[1].map(color))
handles = []
for i, c in color.items():
handles.append(mpatches.Patch(color=c, label=labels[i-1]))
plt.legend(handles=handles, loc='best')
# auto-rotate xtick labels
fig.autofmt_xdate()
plt.show()

Categories

Resources