Add legend in Seaborn combo line bar chart - python

I'm trying to add a legend to my seaborn bar + line chart, but only getting the error "No handles with labels found to put in legend." whatever I try. How to go about this?
from pathlib import Path
import pandas as pd
import seaborn as sns
from matplotlib import pyplot as plt
import matplotlib.dates as mdates
import numpy as np
dfGroup = pd.DataFrame({
'Year': [1910, 1911, 1912, 1913, 1914, 1915, 1916, 1917, 1918, 1919, 1920],
'Total Deaths': [0, 0, 2, 3, 2, 3, 4, 5, 6, 7, 8],
'Total Affected': [0, 1, 0, 2, 3, 6, 9, 8, 12, 13, 15]
})
# Add 3-year rolling average
dfGroup['rolling_3years'] = dfGroup['Total Deaths'].rolling(3).mean().shift(0)
dfGroup = dfGroup.fillna(0)
# Make a smooth line from the 3-year rolling average
from scipy.interpolate import make_interp_spline
X_Y_Spline = make_interp_spline(dfGroup['Year'], dfGroup['rolling_3years'])
# Returns evenly spaced numbers over a specified interval.
X_ = np.linspace(dfGroup['Year'].min(), dfGroup['Year'].max(), 500)
Y_ = X_Y_Spline(X_)
# Plot the data
a4_dims = (15, 10)
fig, ax1 = plt.subplots(figsize=a4_dims)
ax1 = sns.barplot(x = "Year", y = "Total Deaths",
data = dfGroup, color='#42b7bd')
ax2 = ax1.twinx()
ax2 = sns.lineplot(X_, Y_, marker='o')

As seaborn's barplot uses categorical positions, internally numbered 0,1,2,... both plots can be drawn on the same ax. This can be accomplished by recalculating the x values for the lineplot.
To obtain a legend, the label= keyword should be used. (Creating a legend on twinx axes is a bit more complicated and would involve creating custom handles.) Seaborn often automatically creates a legend. If you want to change its appearance, you can call ax1.legend(...) with customization parameters.
Here is some example code:
from pathlib import Path
import pandas as pd
import seaborn as sns
from matplotlib import pyplot as plt
import matplotlib.dates as mdates
import numpy as np
dfGroup = pd.DataFrame({
'Year': [1910, 1911, 1912, 1913, 1914, 1915, 1916, 1917, 1918, 1919, 1920],
'Total Deaths': [0, 0, 2, 3, 2, 3, 4, 5, 6, 7, 8],
'Total Affected': [0, 1, 0, 2, 3, 6, 9, 8, 12, 13, 15]
})
# Add 3-year rolling average
dfGroup['rolling_3years'] = dfGroup['Total Deaths'].rolling(3).mean().shift(0)
dfGroup = dfGroup.fillna(0)
# Make a smooth line from the 3-year rolling average
from scipy.interpolate import make_interp_spline
X_Y_Spline = make_interp_spline(dfGroup['Year'], dfGroup['rolling_3years'])
# Returns evenly spaced numbers over a specified interval.
X_ = np.linspace(dfGroup['Year'].min(), dfGroup['Year'].max(), 500)
Y_ = X_Y_Spline(X_)
# Plot the data
a4_dims = (15, 10)
fig, ax1 = plt.subplots(figsize=a4_dims)
sns.barplot(x="Year", y="Total Deaths",
data=dfGroup, color='#42b7bd', label='Barplot label', ax=ax1)
x_plot = np.linspace(0, len(dfGroup) - 1, len(X_))
sns.lineplot(x=x_plot, y=Y_, marker='o', label='LinePlot label', ax=ax1)
ax1.set_ylim(ymin=0) # let bars touch the bottom of the plot
ax1.margins(x=0.02) # less margins left and right
# ax1.legend(title='legend title') # optionally change the legend
plt.show()
PS: If an ax is already created, it should be given as a parameter to seaborn's axes-level functions (so, sns.barplot(..., ax=ax1) instead of ax1 = sns.barplot(...).

Related

How to map heatmap tick labels to a value and add those values as a legend

I want to create a heatmap in seaborn, and have a nice way to see the labels.
With ax.figure.tight_layout(), I am getting
which is obviously bad.
Without ax.figure.tight_layout(), the labels get cropped.
The code is
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sn
n_classes = 10
confusion = np.random.randint(low=0, high=100, size=(n_classes, n_classes))
label_length = 20
label_ind_by_names = {
"A"*label_length: 0,
"B"*label_length: 1,
"C"*label_length: 2,
"D"*label_length: 3,
"E"*label_length: 4,
"F"*label_length: 5,
"G"*label_length: 6,
"H"*label_length: 7,
"I"*label_length: 8,
"J"*label_length: 9,
}
# confusion matrix
df_cm = pd.DataFrame(
confusion,
index=label_ind_by_names.keys(),
columns=label_ind_by_names.keys()
)
plt.figure()
sn.set(font_scale=1.2)
ax = sn.heatmap(df_cm, annot=True, annot_kws={"size": 16}, fmt='d')
# ax.figure.tight_layout()
plt.show()
I would like to create an extra legend based on label_ind_by_names, then post an abbreviation on the heatmap itself, and be able to look up the abbreviation in the legend.
How can this be done in seaborn?
You can define your own legend handler, e.g. for integers:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sn
n_classes = 10
confusion = np.random.randint(low=0, high=100, size=(n_classes, n_classes))
label_length = 20
label_ind_by_names = {
"A"*label_length: 0,
"B"*label_length: 1,
"C"*label_length: 2,
"D"*label_length: 3,
"E"*label_length: 4,
"F"*label_length: 5,
"G"*label_length: 6,
"H"*label_length: 7,
"I"*label_length: 8,
"J"*label_length: 9,
}
# confusion matrix
df_cm = pd.DataFrame(
confusion,
index=label_ind_by_names.values(),
columns=label_ind_by_names.values()
)
fig, ax = plt.subplots(figsize=(10, 5))
fig.subplots_adjust(left=0.05, right=.65)
sn.set(font_scale=1.2)
sn.heatmap(df_cm, annot=True, annot_kws={"size": 16}, fmt='d', ax=ax)
class IntHandler:
def legend_artist(self, legend, orig_handle, fontsize, handlebox):
x0, y0 = handlebox.xdescent, handlebox.ydescent
text = plt.matplotlib.text.Text(x0, y0, str(orig_handle))
handlebox.add_artist(text)
return text
ax.legend(label_ind_by_names.values(),
label_ind_by_names.keys(),
handler_map={int: IntHandler()},
loc='upper left',
bbox_to_anchor=(1.2, 1))
plt.show()
Explanation of the hard-coded figures: the first two are the left and right extreme positions of the Axes in the figure (0.05 = 5 % for the figure width etc). 1.2 and 1 is the location of the upper left corner of the legend box relative to the Axes (1, 1 is the upper right corner of the Axes, we add 0.2 to 1 to account for the space used by the colorbar). Ideally one would use a constrained layout instead of fiddeling with the parameters but it doesn't (yet) support figure legends and if using an Axes legend, it places it between the Axes and the colorbar.

Plotting time series data with with 30sec break point and color

I am new in python programming. I can simply plot the input data shown in the figure with my code but how can I plot the time series data as mention in the figure. Any code and suggestions will be thankful.
My code is:
import matplotlib.pyplot as plt
import numpy as np
y_values = [5, 5, 1, 1, 5, 5, 1, 1, 5, 1, 1]
x_values = np.arange(30, 331, 30)
plt.figure()
plt.plot(x_values,y_values,"-x")
plt.show()
Although there is a way to draw a series of rectangular shapes, we used a general method and used horizontal bar charts. We added a list for the values in the bar chart and stacked the values. Class label names and class titles are now supported as annotations. You can try various other parameters.
import matplotlib.pyplot as plt
import numpy as np
y = [5]*11
y_values = [5, 5, 1, 1, 5, 5, 1, 1, 5, 1, 1]
x_values = np.arange(30, 331, 30)
fig, ax = plt.subplots(figsize=(12,1))
ax.barh(y=0, height=1.0, edgecolor='k', width=y[0], label='Time Interval')
for i in range(len(y)):
if y_values[i] == 5:
color = 'y'
else:
color = 'm'
ax.barh(y=0, left=sum(y[:i]), height=1.0, width=y[i], color=color, edgecolor='k', label='Time Interval')
for s in ['top','bottom','left','right']:
ax.spines[s].set_visible(False)
for i,(p,t) in enumerate(zip(y, y_values)):
ax.text(y=0.6, x=2.5+p*i, s=str(t))
ax.text(-0.08, 1, 'Class', transform=ax.transAxes)
ax.set_xticks([])
ax.set_yticks([])
ax.set_ylabel('Time Interval', rotation=0, labelpad=40, loc='center')
plt.show()
Try:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
y_values = ['class', 5, 5, 1, 1, 5, 5, 1, 1, 5, 1, 1]
x_values = np.arange(30, 331, 30)
x_values = np.concatenate((['Time'],x_values))
df = pd.DataFrame(data={'class': y_values, 'Time': x_values})
colors = {5: 'gold', 1: 'darkviolet'}
df['colors'] = df['class'].map(colors)
df['colors'].fillna('white', inplace=True)
df['Time'].iloc[1:] = ''
print(df)
fig, ax =plt.subplots(1,1)
ax.axis('tight')
ax.axis('off')
data = df.T.values
colors = [data[2].tolist()]
table = ax.table(cellText=[data[1].tolist()], colLabels=data[0].tolist(),loc="center", cellColours=colors)
table.set_fontsize(14)
for i in range(len(data[0])):
table[0, i].visible_edges = ''
table[1, 0].visible_edges = ''
table.scale(1.5, 1.5)
plt.show()

seaborn swarmplot with markers for hue (categories) and colormap for values

I want to plot a seaborn.swarmplot with categories (hue) represented as markers, while the color of the scatterplots defines the value of an additional column.
I found this great answer to be quite similar, but instead I'd like to add another column to my dataframe, let's call it cat for a categoric variable, which is set as hue='cat' while the colormap is defined via the column c. Instead of colored categories, I want the categories to be marked with markers, f.i. markers=['x', 'o', 'd']. I adapted the above mentioned answer to implement at least the colormap to depend on c but to be also mapped by the category, but the setting a marker for each category still fails:
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.colorbar
import matplotlib.colors
import matplotlib.cm
from mpl_toolkits.axes_grid1 import make_axes_locatable
import seaborn as sns
# dataframe
df = pd.DataFrame(
data={'a': [1, 4, 5, 6, 3, 4, 5, 6],
'c': [12, 35, 12, 46, 78, 45, 34, 70],
'cat': [0, 1, 1, 1, 0, 1, 0, 1],
'key': [1, 2, 2, 1, 1, 2, 1, 2]}
)
# Create a matplotlib colormap from the sns seagreen color palette
cmap = sns.light_palette("seagreen", reverse=False, as_cmap=True)
# Normalize to the range of possible values from df["c"]
norm = matplotlib.colors.Normalize(vmin=df["c"].min(), vmax=df["c"].max())
# create a color dictionary (value in c : color from colormap)
colors = {}
for cat, cval in zip(df['cat'], df["c"]):
colors.update({'{0}-{1}'.format(cat, cval) : cmap(norm(cval))})
# save cat-col key as new reformed cat col:
df['cat-color'] = list(colors.keys())
fig = plt.figure(figsize=(5,2.8))
# plot the swarmplot with the colors dictionary as palette
m = sns.swarmplot(x='key', y='a', hue="cat-color", s=12, data=df,
palette=colors, markers=['x', 'o', 'd'])
## create colorbar ##
divider = make_axes_locatable(plt.gca())
ax_cb = divider.new_horizontal(size="5%", pad=0.05)
fig.add_axes(ax_cb)
cb1 = matplotlib.colorbar.ColorbarBase(
ax_cb, cmap=cmap, norm=norm, orientation='vertical')
plt.show()
Any idea how to integrate the markers and how to make the color only dependent on c and not on cat?
I also found this answer which deals with markers, but I can't see any way to combine it to have markers and colors independently.

How do I plot JUST the mean and stdev of data in seaborn?

I cannot for the life of me find a similar question to this, and I have been pulling my hair out trying to figure out how to do this. It seems like it should be a simple thing!
The setup: I have some X vs Y data grouped into bins, and each bin contains X and Y data points. For each bin, I would like to plot the mean of X vs mean of Y along with their respective stdevs, and most importantly: color code each bin using the Seaborn "colorblind" palette (this is mandatory).
What I've tried: Everything under the sun. Lineplot, scatterplot, catplot, plotpoints. And when none of those were working, I tried to use matplotlib's "errorbars" but I apparently can't seem to export Seaborn's "colorblind" palette to matplotlib so that was a bust too.
Some dummy code:
import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
some_data = pd.DataFrame({'X':[9,10,11,12,39,40,41,42], 'Y':[99,100,110,111,499,500,510,511], 'Bin':[1,1,1,1,2,2,2,2]})
Results of some tries:
sns.pointplot(x="X", y="Y", data=some_data, legend='full', hue='Bin')
Scatterplot completely screws up the x-axis scale, so that's another issue that I haven't been able to work around.
sns.lineplot(x="X", y="Y", data=some_data, legend='full', hue='Bin', err_style="band", estimator="mean", ci='sd')
Better but it's just drawing a line between the points instead of calculating the mean and stdev, which, I thought it would do when I specify an estimator and confidence interval method!!!!!.
sns.scatterplot(x="X", y="Y", data=some_data, legend='full', hue='Bin')
Scatterplot is fine, but it doesn't possess estimator functionality so I'm literally just plotting the raw data.
I'm just completely lost on what to do. I've been at this all night. It's 4:30 AM and I've barely slept for the past few nights. Any help would be appreciated!
The following approach draws an ellipse using the mean and sdevs:
import matplotlib.pyplot as plt
from matplotlib.patches import Ellipse
import pandas as pd
import seaborn as sns
df = pd.DataFrame({'X':[9,10,11,12,39,40,41,42], 'Y':[99,100,110,111,499,500,510,511], 'Bin':[1,1,1,1,2,2,2,2]})
means = df.groupby('Bin').mean()
sdevs = df.groupby('Bin').std()
fig, ax = plt.subplots()
colors = ['crimson', 'dodgerblue']
sns.scatterplot(x='X', y='Y', hue='Bin', palette=colors, data=df, ax=ax)
sns.scatterplot(x='X', y='Y', data=means, color='limegreen', label='means', ax=ax)
for (_, mean), (_, sdev), color in zip(means.iterrows(), sdevs.iterrows(), colors):
ellipse = Ellipse((mean['X'], mean['Y']), width=2*sdev['X'], height=2*sdev['Y'],
facecolor=color, alpha=0.3)
ax.add_patch(ellipse)
plt.show()
Here is a more elaborate example, showing ellipses for 1, 2 and 3 times the sdev.
import matplotlib.pyplot as plt
from matplotlib.patches import Ellipse
import pandas as pd
import numpy as np
import seaborn as sns
K = 5
N = 100
df = pd.DataFrame({'X': np.random.normal(np.tile(np.random.uniform(10, 40, K), N), np.tile([3, 4, 7, 9, 10], N)),
'Y': np.random.normal(np.tile(np.random.uniform(90, 500, K), N), np.tile([20, 25, 8, 22, 18], N)),
'Bin': np.tile(np.arange(1, K + 1), N)})
means = df.groupby('Bin').mean()
sdevs = df.groupby('Bin').std()
fig, axes = plt.subplots(ncols=2, figsize=(12, 4))
colors = ['crimson', 'dodgerblue', 'limegreen', 'turquoise', 'gold']
for ax in axes:
sns.scatterplot(x='X', y='Y', hue='Bin', palette=colors, s=5, ec='none', data=df, ax=ax)
sns.scatterplot(x='X', y='Y', marker='o', s=50, fc='none', ec='black', label='means', data=means, ax=ax)
if ax == axes[1]:
for (_, mean), (_, sdev), color in zip(means.iterrows(), sdevs.iterrows(), colors):
for sdev_mult in [1, 2, 3]:
ellipse = Ellipse((mean['X'], mean['Y']), width=2 * sdev['X'] * sdev_mult,
height=2 * sdev['Y'] * sdev_mult,
facecolor=color, alpha=0.2 if sdev_mult == 1 else 0.1)
ax.add_patch(ellipse)
plt.show()
I acknowledge this is not the full answer - but I hope it will help with the data stats and give you some direction with the plot. I'm not terribly good with matplot/seaborn, so to get this over to you, I've quickly written the graph in plotly. I hope it at least provides some direction for you ...
Mean / Std:
import pandas as pd
from plotly.offline import iplot
x = [9, 10, 11, 12, 39, 40, 41, 42]
y = [99, 100, 110, 111, 499, 500, 510, 511]
b = [1, 1, 1, 1, 2, 2, 2, 2]
df = pd.DataFrame({'x': x, 'y': y, 'bin': b})
df = df.groupby(['bin']).agg(['mean', 'std'])
df.columns = ['_'.join(c).rstrip('_') for c in df.columns.to_list()]
df.reset_index(inplace=True)
Output:
bin x_mean x_std y_mean y_std
0 1 10.5 1.290994 105 6.377042
1 2 40.5 1.290994 505 6.377042
Plotting:
data = []
for row in df.itertuples():
data.append({'x': [row.x_mean],
'y': [row.y_mean],
'mode': 'markers',
'name': '{} mean'.format(row.bin),
'marker': {'size': 25}})
data.append({'x': [row.x_std],
'y': [row.y_std],
'mode': 'markers',
'name': '{} std'.format(row.bin),
'marker': {'size': 25}})
iplot({'data': data})
Output:
Note that as the stds are the same, the red/purple dots overlay each other.
I hope this helps a bit ...

Is it possible to change the frequency of ticks on a pyplot INDEPENDENT of length of data set and zoom?

When I plot data using matplotlib I always have 5-9 ticks on my x-axis independent of the range I plot, and if I zoom on the x-axis the tick spacing decreases, so I still see 5-9 ticks.
however, I would like 20-30 ticks on my x-axis!
I can achieve this with the following:
from matplotlib import pyplot as plt
import numpy as np
x = [5, 10, 15, 20, 25, 30, 35, 40, 45, 50]
y = [1, 4, 3, 2, 7, 6, 9, 8, 10, 5]
number_of_ticks_on_x_axis = 20
plt.plot(x, y)
plt.xticks(np.arange(min(x), max(x)+1, (max(x) - min(x))/number_of_ticks_on_x_axis))
plt.show()
If I now zoom on the x-axis, no new ticks appear between the existing ones. I would like to still have ~20 ticks however much I zoom.
Assuming that you want to fix the no. of ticks on the X axis
...
from matplotlib.ticker import MaxNLocator
...
fig, ax = plt.subplots()
ax.xaxis.set_major_locator(MaxNLocator(15, min_n_ticks=15))
...
Please look at the docs for MaxNLocator
Example
In [36]: import numpy as np
...: import matplotlib.pyplot as plt
In [37]: from matplotlib.ticker import MaxNLocator
In [38]: fig, ax = plt.subplots(figsize=(10,4))
In [39]: ax.grid()
In [40]: ax.xaxis.set_major_locator(MaxNLocator(min_n_ticks=15))
In [41]: x = np.linspace(0, 1, 51)
In [42]: y = x*(1-x)
In [43]: plt.plot(x, y)
Out[43]: [<matplotlib.lines.Line2D at 0x7f9eab409e10>]
gives
and when I zoom into the maximum of the curve I get
You can link a callback function to an event in the canvas. In you case you can trigger a function that updates the axis when a redraw occurs.
from matplotlib import pyplot as plt
import numpy as np
x = [5, 10, 15, 20, 25, 30, 35, 40, 45, 50]
y = [1, 4, 3, 2, 7, 6, 9, 8, 10, 5]
n = 20
plt.plot(x, y)
plt.xticks(np.arange(min(x), max(x)+1, (max(x) - min(x))/n), rotation=90)
def on_zoom(event):
ax = plt.gca()
fig = plt.gcf()
x_min, x_max = ax.get_xlim()
ax.set_xticks(np.linspace(x_min, x_max, n))
# had to add flush_events to get the ticks to redraw on the last update.
fig.canvas.flush_events()
fig = plt.gcf()
fig.canvas.mpl_disconnect(cid)
cid = fig.canvas.mpl_connect('draw_event', on_zoom)

Categories

Resources