Add legend to chart with data from different columns [duplicate] - python

This question already has answers here:
Add Legend to Seaborn point plot
(4 answers)
Closed 7 months ago.
How do I add a legend to a chart where I am showing data from more than one column? The legend isn't appearing automatically and if I use ax.legend() then I get an error message like this: [WARNING] No artists with labels found to put in legend.
This is what the code looks like:
fig, ax = plt.subplots()
sns.lineplot(x=data.index, y=data[('s', 'a')], color='black', marker='o')
sns.lineplot(x=data.index, y=data[('s', 'f')], color='firebrick', linewidth=1, linestyle='--')
ax.fill_between(x=data.index, y1=data[('s', 'l')], y2=data[('s', 'u')], color='firebrick', alpha=0.2)
ax.legend()
This is what data looks like (using dummy data):
pd.DataFrame(data=[[1,1.1,0.7,1.3],[2,1.9,1.7,2.3],[3,2.8,2.7,3.3]], index=['2022-01-01', '2022-02-01', '2022-03-01'], columns=pd.MultiIndex.from_tuples([('s','a'),('s','f'),('s','l'),('s','u')]))
Not sure what I'm doing wrong but I'd like a legend that shows what the black line, red dotted line and range are.

Add label to your plots like so:
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
data = pd.DataFrame(data=[[1,1.1,0.7,1.3],[2,1.9,1.7,2.3],[3,2.8,2.7,3.3]], index=['2022-01-01', '2022-02-01', '2022-03-01'], columns=pd.MultiIndex.from_tuples([('s','a'),('s','f'),('s','l'),('s','u')]))
fig, ax = plt.subplots()
sns.lineplot(x=data.index, y=data[('s', 'a')], color='black', marker='o', label = "s a")
sns.lineplot(x=data.index, y=data[('s', 'f')], color='firebrick', linewidth=1, linestyle='--', label="s f")
ax.fill_between(x=data.index, y1=data[('s', 'l')], y2=data[('s', 'u')], color='firebrick', alpha=0.2, label="range")
ax.legend()
Output:

Related

Seaborn axvspan alterating x-axis

I'm trying to create some scatter plots, with seaborn with a specific area of each plot highlighted in red. However when I add the code for axvspan, it changes the x-axis. This is how the plots look prior to axvspan being applied.
When i apply the line for axvpsan:
fig, (ax0, ax1) = plt.subplots(2,1, figsize=(5,10))
ax0.axvspan("0.4", "0.8", color='red', alpha=0.3, label ='Problem Area')
sns.scatterplot(x='Values_1', y='Values_2', data=df3, color='green', ax=ax0)
sns.scatterplot(x='Values_3', y='Values_4', data=df3, color='green', ax=ax1)
plt.show()
It sends up looking like this:
Ultimately, the red section needs to only cover the data between 0.4 and 0.7, but by altering the x-axis it ends up covering all of it.
Any advice?
The unexpected behavior is resulting from passing the xmin and xmax arguments to matplotlib.pyplot.axvspan as str and not as float.
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import pandas as pd
# generate data
rng = np.random.default_rng(12)
df3 = pd.DataFrame({"Values_2": rng.random(100), "Values_1": np.linspace(0., 0.6, 100)})
fig, ax0 = plt.subplots(1,1, figsize=(6, 4))
ax0.axvspan(0.4, 0.8, color='red', alpha=0.3, label ='Problem Area')
sns.scatterplot(x='Values_1', y='Values_2', data=df3, color='green', ax=ax0)
plt.show()
This gives:

Showing both boxplots when using split in seaborn violinplots

I would like to make split violin plots which also show the boxplots for both datasets, like in the figure of the question Seaborn: How to apply custom color to each seaborn violinplot? , problem is that when using split seaborn shows only one of them (and it's not even clear to me to which dataset it refers to) as you can see in the answer, is there a way to overcome this or should I use a different package?
Here is an example with an artificial dataset to show how the default inner='box' shows a simple boxplot-like box for the combined dataset.
The second plot shows how inner='quartile' looks like.
The rightmost plot shows an approach to explicitly draw separate boxplots (using width= to place them close to the center).
from matplotlib import pyplot as plt
import seaborn as sns
import pandas as pd
import numpy as np
data = pd.DataFrame({'Value': (np.random.randn(4, 100).cumsum(axis=0) + np.array([[15], [5], [12], [7]])).ravel(),
'Set': np.repeat(['A', 'B', 'A', 'B'], 100),
'x': np.repeat([1, 2], 200)})
fig, (ax1, ax2, ax3) = plt.subplots(ncols=3, figsize=(15, 4))
palette = ['paleturquoise', 'yellow']
sns.violinplot(data=data, x='x', y='Value', hue='Set', split=True, inner='box', palette=palette, ax=ax1)
ax1.set_title('Default, inner="box"')
sns.violinplot(data=data, x='x', y='Value', hue='Set', split=True, inner='quartiles', palette=palette, ax=ax2)
ax2.set_title('Using inner="quartiles"')
sns.violinplot(data=data, x='x', y='Value', hue='Set', split=True, inner=None, palette=palette, ax=ax3)
sns.boxplot(data=data, x='x', y='Value', hue='Set', color='white', width=0.3, boxprops={'zorder': 2}, ax=ax3)
ax3.set_title('Explicitely drawing boxplots')
handles, labels = ax3.get_legend_handles_labels()
ax3.legend(handles[:2], labels[:2], title='Set')
plt.tight_layout()
plt.show()

How to add colors in stacked area chart

pls I need to add a area color to my code to show a plot similar to this one bellow:
My code is here:
import numpy as np
import pandas as pd
from pandas import DataFrame
import matplotlib.pyplot as plt
from matplotlib import pyplot as plt
df = pd.DataFrame({'Time': [1,2,3,4,5],
'T=0': [0.5,0.16,0,0.25,0],
'T=2': [0.5,0.5,1,1,1],
'T=10': [0.75,0.8,0.85,0.9,0.8]
})
plt.plot( 'Time', 'T=10', data=df, marker='d', color='black', markersize=5, linewidth=1.5, linestyle=':')
plt.plot( 'Time', 'T=2', data=df, marker='^', color='black', markersize=4, linewidth=1.5,linestyle='--')
plt.plot( 'Time', 'T=0', data=df, marker='o', color='black', markersize=4, linewidth=1.5,linestyle='-')
plt.legend()
plt.xlabel("Time")
plt.xticks([1,2,3,4,5])
plt.xlim(0.9, 5.02)
plt.ylabel("Average")
plt.ylim(0, 1.02)
plt.show()
The actual result:
Many thanks.
All you need to do is add the following 3 lines to your code:
plt.fill_between(df['Time'], df['T=0'], alpha = 0.3, color = 'steelblue')
plt.fill_between(df['Time'], df['T=0'], df['T=2'], alpha = 0.3, color = 'yellow')
plt.fill_between(df['Time'], df['T=2'], df['T=10'], alpha = 0.3, color = 'red')
You can also create a legend corresponding to the colors. However, in the case of your graph, since two plot lines cross, it is best to leave the legend assigned to the plot lines rather than the colors (as you have).

How to properly plot a line over bars?

This one used to work fine, but somehow it stopped working (I must have changed something mistakenly but I can't find the issue).
I'm plotting a set of 3 bars per date, plus a line that shows the accumulated value of one of them. But only one or another (either the bars or the line) is properly being plotted. If I left the code for the bars last, only the bars are plotted. If I left the code for the line last, only the line is plotted.
fig, ax = plt.subplots(figsize = (15,8))
df.groupby("date")["result"].sum().cumsum().plot(
ax=ax,
marker='D',
lw=2,
color="purple")
df.groupby("date")[selected_columns].sum().plot(
ax=ax,
kind="bar",
color=["blue", "red", "gold"])
ax.legend(["LINE", "X", "Y", "Z"])
Appreciate the help!
Pandas draws bar plots with the x-axis as categorical, so internally numbered 0, 1, 2, ... and then setting the label. The line plot uses dates as x-axis. To combine them, both need to be categorical. The easiest way is to drop the index from the line plot. Make sure that the line plot is draw first, enabling the labels to be set correctly by the bar plot.
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
df = pd.DataFrame({'date': pd.date_range('20210101', periods=10),
'earnings': np.random.randint(100, 600, 10),
'costs': np.random.randint(0, 200, 10)})
df['result'] = df['earnings'] - df['costs']
fig, ax = plt.subplots(figsize=(15, 8))
df.groupby("date")["result"].sum().cumsum().reset_index(drop=True).plot(
ax=ax,
marker='D',
lw=2,
color="purple")
df.groupby("date")[['earnings', 'costs', 'result']].sum().plot(
ax=ax,
kind="bar",
rot=0,
width=0.8,
color=["blue", "red", "gold"])
ax.legend(['Cumul.result', 'earnings', 'costs', 'result'])
# shorten the tick labels to only the date
ax.set_xticklabels([tick.get_text()[:10] for tick in ax.get_xticklabels()])
ax.set_ylim(ymin=0) # bar plots are nicer when bars start at zero
plt.tight_layout()
plt.show()
Here I post the solution:
import matplotlib.pyplot as plt
import matplotlib.ticker as mtick
a=[11.3,222,22, 63.8,9]
b=[0.12,-1.0,1.82,16.67,6.67]
l=[i for i in range(5)]
plt.rcParams['font.sans-serif']=['SimHei']
fmt='%.1f%%'
yticks = mtick.FormatStrFormatter(fmt)
fig = plt.figure()
ax1 = fig.add_subplot(111)
ax1.plot(l, b,'og-',label=u'A')
ax1.yaxis.set_major_formatter(yticks)
for i,(_x,_y) in enumerate(zip(l,b)):
plt.text(_x,_y,b[i],color='black',fontsize=8,)
ax1.legend(loc=1)
ax1.set_ylim([-20, 30])
ax1.set_ylabel('ylabel')
plt.legend(prop={'family':'SimHei','size':8})
ax2 = ax1.twinx()
plt.bar(l,a,alpha=0.1,color='blue',label=u'label')
ax2.legend(loc=2)
plt.legend(prop={'family':'SimHei','size':8},loc="upper left")
plt.show()
The key to this is the command
ax2 = ax1.twinx()

python matplotlib markerscale for area plot [duplicate]

This question already has answers here:
increase the linewidth of the legend lines in matplotlib
(4 answers)
Closed 5 years ago.
What I want to do is a plot of generation and demand in an electricity grid with Matplotlib in Python. This is my code:
fig,ax = plt.subplots(figsize=(14,8))
generation.plot(kind="area", ax=ax, linewidth=1, alpha=0.9)
load.plot(kind="area", ax=ax, linewidth=1, alpha=0.9)
labels = ['Erzeugung', 'Last']
ax.legend(labels, ncol=4, loc="best", markerscale=10)
ax.set_ylabel("GW")
ax.set_xlabel("")
plt.tight_layout()
The result looks like this:
My question is about the markerscale: Why doesn't it work with this kind of plot? The problem is the bad visibility of the marker in the legend, it would be much better with a thicker line or even a box. And this without increasing the line width of the lines. Any ideas?
You can set the line size manually after creation as follows:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
fig, ax = plt.subplots(figsize=(14,8))
generation = pd.DataFrame(np.random.randint(10, 14, 10))
load = pd.DataFrame(np.random.randint(2, 5, 10))
generation.plot(kind="area", ax=ax, linewidth=1, alpha=0.9)
load.plot(kind="area", ax=ax, linewidth=1, alpha=0.9)
labels = ['Erzeugung', 'Last']
legend = ax.legend(labels, ncol=4, loc="best")
for handle in legend.legendHandles:
handle.set_linewidth(3.0)
ax.set_ylabel("GW")
ax.set_xlabel("")
plt.tight_layout()
plt.show()
Giving you something like:

Categories

Resources