Rolling Mean Legend Labels - python

I am trying to plot the rolling mean on a double-axis graph. However, I am unable to create my legend correctly. Any pointers?
fig, ax1 = plt.subplots()
ax2 = ax1.twinx()
# df6 = t100df5.rolling(window=12).mean()
lns1 = ax1.plot(
df6,
label = ['Alpha', 'Beta'], # how do I add 'Beta' label correctly?
linewidth = 2.0)
lns2 = ax2.plot(temp,
label = 'Dollars',
color='black')
lns = lns1+lns2
labs = [l.get_label() for l in lns]
L = ax1.legend(lns, labs, loc = 0, frameon = True)
df6 looks like this:
Alpha Beta
TIME
1990-01-01 NaN NaN
1990-02-01 NaN NaN
1990-03-01 NaN NaN
1990-04-01 NaN NaN
1990-05-01 NaN NaN
... ... ...
2019-08-01 10.012447 8.331901
2019-09-01 9.909044 8.263813
2019-10-01 9.810155 8.185539
2019-11-01 9.711690 8.085016
2019-12-01 9.619968 8.03533
And temp looks like this:
Dollars
date
1994-01-01 NaN
1994-02-01 NaN
1994-03-01 225.664248
1994-04-01 217.475670
1995-01-01 216.464499
... ...
2018-04-01 179.176545
2019-01-01 177.624369
2019-02-01 178.731035
2019-03-01 176.624608
2019-04-01 177.357060
Note that the datetime objects are the indices for the dataframes.
How can I add a legend with appropriate labels for the graph below? The black line is from temp and both of the other lines are from df6.

I just added another ax1.plot statement like this:
fig, ax1 = plt.subplots()
ax2 = ax1.twinx()
lns1 = ax1.plot(
df6.index, df6.Alpha
label = 'Alpha',
linewidth = 2.0)
lns1_5 = ax1.plot(df6.index, df6.Beta, label = 'Beta')
lns2 = ax2.plot(temp,
label = 'Dollars',
color='black')
lns = lns1+lns1_5+lns2
labs = [l.get_label() for l in lns]
L = ax1.legend(lns, labs, loc = 0, frameon = True)

Related

python: complex plot with sns.FacetGrid() and secondary Y-axes

I have the following dataset:
trust_id cohort cnt
index_event_datetime
2017-06-01 chel sepsis 216.0
2017-07-01 chel sepsis 191.0
2017-08-01 chel sepsis 184.0
2017-09-01 chel sepsis 186.0
2017-10-01 chel sepsis 173.0
... ... ... ...
2022-02-01 ouh_ sepsis_thrombocytopenia 5.0
2022-03-01 ouh_ sepsis_thrombocytopenia NaN
2022-04-01 ouh_ sepsis_thrombocytopenia NaN
2022-05-01 ouh_ sepsis_thrombocytopenia NaN
2022-06-01 ouh_ sepsis_thrombocytopenia NaN
and I want to produce 4 plots for each trust with the count among three diseases:
grid = sns.FacetGrid(
incident_cnt_long.reset_index(),
col="trust_id",
hue="cohort",
col_wrap=2,
legend_out=True,
palette=["#FF4613", "#00FFAA", "#131E29"]
)
grid.map(sns.lineplot, "index_event_datetime", "cnt")
for ax in grid.axes:
# ax.xaxis.set_major_locator(mdates.MonthLocator((1, 7)))
# ax.xaxis.set_major_formatter(mdates.DateFormatter("%b-%Y"))
ax.xaxis.set_tick_params(rotation=90)
ax.set_xlabel(None)
ax.set_ylabel(None)
# grid.add_legend()
grid.fig.legend(["SEP", "STH", "SDI"])
grid.fig.supylabel("Patients (no)")
grid.fig.supxlabel("Date of index event", va="top")
grid.fig.set_size_inches(plt.rcParams["figure.figsize"])
grid.tight_layout()
plt.show()
fig = grid.fig
I get the following figure:
I want to introduce those with SDI in the second Y-axes per plot. So far I have tried to define a twin_lineplot as the following:
def twin_lineplot(x,y,color,**kwargs):
ax = plt.twinx()
sns.lineplot(x=x,y=y,color=color,**kwargs, ax=ax)
grid = sns.FacetGrid(
incident_cnt_long.reset_index(),
col="trust_id",
hue="cohort",
col_wrap=2,
legend_out=True,
palette=["#FF4613", "#00FFAA", "#131E29"]
)
grid.map(sns.lineplot, "index_event_datetime", "cnt")
grid.map(twin_lineplot, "index_event_datetime", "cnt")
for ax in grid.axes:
# ax.xaxis.set_major_locator(mdates.MonthLocator((1, 7)))
# ax.xaxis.set_major_formatter(mdates.DateFormatter("%b-%Y"))
ax.xaxis.set_tick_params(rotation=90)
ax.set_xlabel(None)
ax.set_ylabel(None)
# grid.add_legend()
grid.fig.legend(["SEP", "STH", "SDI"])
grid.fig.supylabel("Patients (no)")
grid.fig.supxlabel("Date of index event", va="top")
grid.fig.set_size_inches(plt.rcParams["figure.figsize"])
grid.tight_layout()
plt.show()
fig = grid.fig
but I am not getting the desired output:

Set color-palette in Seaborn Grouped Barplot depending on values

I have a dataframe with positive and negative values from three kind of variables.
labels variable value
0 -10e5 nat -38
1 2e5 nat 50
2 10e5 nat 16
3 -10e5 agr -24
4 2e5 agr 35
5 10e5 agr 26
6 -10e5 art -11
7 2e5 art 43
8 10e5 art 20
when values are negative I want the barplot to follow the color sequence:
n_palette = ["#ff0000","#ff0000","#00ff00"]
Instead when positive I want it to reverse the palette:
p_palette = ["#00ff00","#00ff00","#ff0000"]
I've tried this:
palette = ["#ff0000","#ff0000","#00ff00",
"#00ff00","#00ff00","#ff00",
"#00ff00","#00ff00","#ff00"]
ax = sns.barplot(x=melted['labels'], y=melted['value'], hue = melted['variable'],
linewidth=1,
palette=palette)
But I get the following output:
what I'd like is the first two bars of the group to become green and the last one red when values are positive.
You seem to want to do the coloring depending on a criterion on two columns. It seems suitable to add a new column which uniquely labels that criterion.
Further, seaborn allows the palette to be a dictionary telling exactly which hue label gets which color. Adding barplot(..., order=[...]) would define a fixed order.
Here is some example code:
from matplotlib import pyplot as plt
import seaborn as sns
import numpy as np
import pandas as pd
from io import StringIO
data_str = ''' labels variable value
0 -10e5 nat -38
1 2e5 nat 50
2 10e5 nat 16
3 -10e5 agr -24
4 2e5 agr 35
5 10e5 agr 26
6 -10e5 art -11
7 2e5 art 43
8 10e5 art 20
'''
melted = pd.read_csv(StringIO(data_str), delim_whitespace=True, dtype={'labels': str})
melted['legend'] = np.where(melted['value'] < 0, '-', '+')
melted['legend'] = melted['variable'] + melted['legend']
palette = {'nat-': "#ff0000", 'agr-': "#ff0000", 'art-': "#00ff00",
'nat+': "#00ff00", 'agr+': "#00ff00", 'art+': "#ff0000"}
ax = sns.barplot(x=melted['labels'], y=melted['value'], hue=melted['legend'],
linewidth=1, palette=palette)
ax.axhline(0, color='black')
plt.show()
PS: To remove the legend: ax.legend_.remove(). Or to have a legend with multiple columns: ax.legend(ncol=3).
A different approach, directly with the original dataframe, is to create two bar plots: one for the negative values and one for the positive. For this to work well, it is necessary that the 'labels' column (the x=) is explicitly made categorical. Also adding pd.Categorical(..., categories=['nat', 'agr', 'art']) for the 'variable' column could fix an order.
This will generate a legend with the labels twice with different colors. Depending on what you want, you can remove it or create a more custom legend.
An idea is to add the labels under the positive and on top of the negative bars:
sns.set()
melted = pd.read_csv(StringIO(data_str), delim_whitespace=True, dtype={'labels': str})
palette_pos = {'nat': "#00ff00", 'agr': "#00ff00", 'art': "#ff0000"}
palette_neg = {'nat': "#ff0000", 'agr': "#ff0000", 'art': "#00ff00"}
melted['labels'] = pd.Categorical(melted['labels'])
ax = sns.barplot(data=melted[melted['value'] < 0], x='labels', y='value', hue='variable',
linewidth=1, palette=palette_neg)
sns.barplot(data=melted[melted['value'] >= 0], x='labels', y='value', hue='variable',
linewidth=1, palette=palette_pos, ax=ax)
ax.legend_.remove()
ax.axhline(0, color='black')
ax.set_xlabel('')
ax.set_ylabel('')
for bar_container in ax.containers:
label = bar_container.get_label()
for p in bar_container:
x = p.get_x() + p.get_width() / 2
h = p.get_height()
if not np.isnan(h):
ax.text(x, 0, label + '\n\n' if h < 0 else '\n\n' + label, ha='center', va='center')
plt.show()
Still another option involves sns.catplot() which could be clearer when a lot of data is involved:
sns.set()
melted = pd.read_csv(StringIO(data_str), delim_whitespace=True, dtype={'labels': str})
melted['legend'] = np.where(melted['value'] < 0, '-', '+')
melted['legend'] = melted['variable'] + melted['legend']
palette = {'nat-': "#ff0000", 'agr-': "#ff0000", 'art-': "#00ff00",
'nat+': "#00ff00", 'agr+': "#00ff00", 'art+': "#ff0000"}
g = sns.catplot(kind='bar', data=melted, col='labels', y='value', x='legend',
linewidth=1, palette=palette, sharex=False, sharey=True)
for ax in g.axes.flat:
ax.axhline(0, color='black')
ax.set_xlabel('')
ax.set_ylabel('')
plt.show()

matplotlib data stack on Y axis

i get this code:
fig = plt.figure(figsize=(20,8))
ax1 = fig.add_subplot(111, ylabel='Price in $')
df['Close'].plot(ax=ax1, color='r', lw=2.)
signals[['short_mavg', 'long_mavg']].plot(ax=ax1, lw=2.)
ax1.plot(signals.loc[signals.positions == 1.0].index,
signals.short_mavg[signals.positions == 1.0],
'^', markersize=10, color='m')
ax1.plot(signals.loc[signals.positions == -1.0].index,
signals.short_mavg[signals.positions == -1.0],
'v', markersize=10, color='k')
plt.show()
the problem is : all of the '^' and 'v' and 'Date" values from df, placed on Y axis =(
Ill added all code part of my jupyter notebook and .csv sample
csv data:
2013.12.17,00:00,0.89469,0.89571,0.88817,0.88973,4
2013.12.18,00:00,0.88974,0.89430,0.88200,0.88595,4
code:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
%matplotlib inline
df = pd.read_csv("AUDUSD.csv",header = None)
df.columns = ['Date', 'Time', 'Open', 'High', 'Low', 'Close', 'Volume']
df=df.set_index('Date')
second df:
short_window = 20
long_window = 90
signals = pd.DataFrame(index=df.index)
signals['signal'] = 0.0
#calculating MAs
signals['short_mavg'] = df['Close'].rolling(short_window).mean()
signals['long_mavg'] = df['Close'].rolling(long_window).mean()
signals['signal'][short_window:] = np.where(signals['short_mavg'][short_window:]
> signals['long_mavg'][short_window:], 1.0, 0.0)
signals['positions'] = signals['signal'].diff()
I created a graph with your code using Yahoo Finance currency data. It may be that the time series data is not indexed. Please check your data and my data content.
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
%matplotlib inline
import yfinance as yf
ticker = yf.Ticker("AUDUSD=X")
df = ticker.history(start='2013-01-01', end='2021-01-01')
short_window = 20
long_window = 90
signals = pd.DataFrame(index=df.index)
signals['signal'] = 0.0
#calculating MAs
signals['short_mavg'] = df['Close'].rolling(short_window).mean()
signals['long_mavg'] = df['Close'].rolling(long_window).mean()
signals['signal'][short_window:] = np.where(signals['short_mavg'][short_window:]
> signals['long_mavg'][short_window:], 1.0, 0.0)
signals['positions'] = signals['signal'].diff()
fig = plt.figure(figsize=(20,8))
ax1 = fig.add_subplot(111, ylabel='Price in $')
df['Close'].plot(ax=ax1, color='r', lw=2.)
signals[['short_mavg', 'long_mavg']].plot(ax=ax1, lw=2.)
ax1.plot(signals.loc[signals.positions == 1.0].index,
signals.short_mavg[signals.positions == 1.0],
'^', markersize=10, color='m')
ax1.plot(signals.loc[signals.positions == -1.0].index,
signals.short_mavg[signals.positions == -1.0],
'v', markersize=10, color='k')
plt.show()
signals
signal short_mavg long_mavg positions
Date
2012-12-31 0.0 NaN NaN NaN
2013-01-01 0.0 NaN NaN 0.0
2013-01-02 0.0 NaN NaN 0.0
2013-01-03 0.0 NaN NaN 0.0
2013-01-04 0.0 NaN NaN 0.0
... ... ... ... ...
2020-12-25 1.0 0.749732 0.727486 0.0
2020-12-28 1.0 0.750791 0.727987 0.0
2020-12-29 1.0 0.751951 0.728454 0.0
2020-12-30 1.0 0.753096 0.728910 0.0
2020-12-31 1.0 0.754453 0.729403 0.0

Plotting multiple lines in the same graph for every different entry in a column

My dataset looks like this:
Town week price sales
A 1 1.1 101
A 2 1.2 303
A 3 1.3 234
B 1 1.2 987
B 2 1.5 213
B 3 3.9 423
C 1 2.4 129
C 2 1.3 238
C 3 1.3 132
Now I need make a single figure with 3 lines (each representing a different town), where I plot the sales and price per week. I know how to do it when I take the mean of the towns, but I can't figure out how to do it per Town.
data = pd.read_excel("data.xlsx")
dfEuroAvg = data[data['Product'] == "Euro"].groupby('Week').mean()
t = np.arange(1, 50, 1)
y3 = dfEuroAvg['Sales']
y4 = dfEuroAvg['Price']
fig, ax2 = plt.subplots()
color = 'tab:green'
ax2.set_xlabel('Week')
ax2.set_ylabel('Sales', color = color)
ax2.plot(t, y3, color = color)
ax2.tick_params(axis = 'y', labelcolor = color)
ax3 = ax2.twinx()
color = 'tab:orange'
ax3.set_ylabel('Price', color=color)
ax3.plot(t, y4, color=color)
ax3.tick_params(axis='y', labelcolor=color)
ax2.set_title("product = Euro, Sales vs. Price")
EDIT: On the X-axis are the weeks and on the Y-axis are the price and sales.
This is one way of doing it using groupby to form groups based on Town and then plot the price and sales using a secondary y axis
fig, ax = plt.subplots(figsize=(8, 6))
df_group = data.groupby('Town')['week','price','sales']
ylabels = ['price', 'sales']
colors =['r', 'g', 'b']
for i, key in enumerate(df_group.groups.keys()):
df_group.get_group(key).plot('week', 'price', color=colors[i], ax=ax, label=key)
df_group.get_group(key).plot('week', 'sales', color=colors[i], linestyle='--', secondary_y=True, ax=ax)
handles,labels = ax.get_legend_handles_labels()
legends = ax.legend()
legends.remove()
plt.legend(handles, labels)
ax1.set_ylabel('Price')
ax2.set_ylabel('Sales')
You will have to fetch the data for each town separately by filtering the dataframe.
# df = your dataframe with all the data
towns = ['A', 'B', 'C']
for town in towns:
town_df = df[df['town'] == town]
plt.plot(town_df['week'], town_df['price'], label=town)
plt.legend()
plt.xlabel('Week')
plt.ylabel('Price')
plt.title('Price Graph')
plt.show()
Output:
I have done this for the price graph, you can similarly create a graph with Sales as the y-axis using the same steps
You may plot the pivoted data directly with pandas.
ax = df.pivot("week", "Town", "price").plot()
ax2 = df.pivot("week", "Town", "sales").plot(secondary_y=True, ax=ax)
Complete example:
import io
import pandas as pd
import matplotlib.pyplot as plt
u = """Town week price sales
A 1 1.1 101
A 2 1.2 303
A 3 1.3 234
B 1 1.2 987
B 2 1.5 213
B 3 3.9 423
C 1 2.4 129
C 2 1.3 238
C 3 1.3 132"""
df = pd.read_csv(io.StringIO(u), delim_whitespace=True)
ax = df.pivot("week", "Town", "price").plot(linestyle="--", legend=False)
ax.set_prop_cycle(None)
ax2 = df.pivot("week", "Town", "sales").plot(secondary_y=True, ax=ax, legend=False)
ax.set_ylabel('Price')
ax2.set_ylabel('Sales')
ax2.legend()
plt.show()

Horizontal lines not appearing on matplotlib plot

Here is the sample data:
Datetime Price Data1 Data2 ShiftedPrice
0 2017-11-05 09:20:01.134 2123.0 12.23 34.12 300.0
1 2017-11-05 09:20:01.789 2133.0 32.43 45.62 330.0
2 2017-11-05 09:20:02.238 2423.0 35.43 55.62 NaN
3 2017-11-05 09:20:02.567 3423.0 65.43 56.62 NaN
4 2017-11-05 09:20:02.948 2463.0 45.43 58.62 NaN
I am trying to draw a plot between Datetime and Shiftedprice columns and horizontal lines for mean, confidence intervals of the ShiftedPrice column.
Have a look at the code below:
import matplotlib.pyplot as plt
%matplotlib inline
import numpy as np
df1 = df.dropna(subset=['ShiftedPrice'])
df1
fig = plt.figure(figsize=(20,10))
ax = fig.add_subplot(121)
ax = df1.plot(x='Datetime',y='ShiftedPrice')
# Plotting the mean
ax.axhline(y=df1['ShiftedPrice'].mean(), color='r', linestyle='--', lw=2)
plt.show()
# Plotting Confidence Intervals
ax.axhline(y=df1['ShiftedPrice'].mean() + 1.96*np.std(df1['ShiftedPrice'],ddof=1), color='g', linestyle=':', lw=2)
ax.axhline(y=df1['ShiftedPrice'].mean() - 1.96*np.std(df1['ShiftedPrice'],ddof=1), color='g', linestyle=':', lw=2)
plt.show()
My problem is that horizontal lines are not appearing. Instead, I get the following message
ax.axhline(y=df1['ShiftedPrice'].mean(), color='r', linestyle='--', lw=2)
Out[22]: <matplotlib.lines.Line2D at 0xccc5c18>

Categories

Resources