How to add xticks to plot with secondary y-axis - python

I have the following MWE, which plots two columns of a pandas dataframe in one single plot where each column has its own y-axis:
df = pd.DataFrame({'t': [2000, 2002, 2004, 2006],
'a': [2, 4, 6, 8],
'b': [100, 200, 300, 400]})
fig = plt.figure(figsize=(10, 10))
plt.xticks(np.arange(2000, 2020, 2))
ax1 = df['b'].plot(label="b")
ax1.set_ylabel("b")
ax1.set_ylim(0, 500)
ax2 = df['a'].plot(secondary_y=True, label="a")
ax2.set_ylabel("a")
ax2.set_ylim(0, 5)
handles, labels = [], []
for ax in fig.axes:
for h, l in zip(*ax.get_legend_handles_labels()):
handles.append(h)
labels.append(l)
plt.legend(handles, labels)
However, the x-ticks are missing although I have tried to add them with this line of code: plt.xticks(np.arange(2000, 2020, 2)).
What command do I need to add them besides what I already have?

plt.xticks(np.arange(2000, 2020, 2)) sets the ticks to be at positions 2000, 2002, etc.
However your plot ranges from 0 to 4, because that is the index of the dataframe.
Either set the index to the values of the "t" column,
df.set_index("t", inplace=True)
ax1 = df['b'].plot(label="b")
or plot the columns directly
ax1 = df.plot(x="t", y="b", label="b")

You need to specify the limit of x axis. The following solution can help.
df = pd.DataFrame({'t': [2000, 2002, 2004, 2006],
'a': [2, 4, 6, 8],
'b': [100, 200, 300, 400]})
fig = plt.figure(figsize=(10, 10))
plt.xticks(np.arange(2000, 2020, 2))
ax1 = df['b'].plot(label="b")
ax1.set_ylabel("b")
ax1.set_ylim(0, 500)
ax2 = df['a'].plot(secondary_y=True, label="a")
ax2.set_ylabel("a")
ax2.set_ylim(0, 5)
ax3 = df['t'].plot(label="t")
ax3.set_xlabel("t")
ax3.set_xlim(2000,2020)
handles, labels = [], []
for ax in fig.axes:
for h, l in zip(*ax.get_legend_handles_labels()):
handles.append(h)
labels.append(l)
plt.legend(handles, labels)

Related

Change part color/line of graph in subplots based on difference t and t+1

I have the following dataframe.
data = {'Year': [2018, 2019, 2020, 2021], 'Paris': [1020, 20200, 2000, 6000], 'Tokyo': [102000, 320000, 402000, 100]}
sales=pd.DataFrame.from_dict(data)
sales=sales.set_index('Year')
fig, a = plt.subplots(1, 2, figsize=(10, 5), tight_layout=True, sharex='all')
sales.plot(ax=a, subplots=True, rot=90,color='grey',linewidth=1, linestyle='dashed')
which results to these graphs
How can i change it so i can get the last part of the graph in another colour and thickness:
this showld made the job
import pandas as pd
import matplotlib.pyplot as plt
data = {'Year': [2018, 2019, 2020, 2021], 'Paris': [1020, 20200, 2000, 6000], 'Tokyo': [102000, 320000, 402000, 100]}
sales=pd.DataFrame.from_dict(data)
sales=sales.set_index('Year')
fig, a = plt.subplots(1, 2, figsize=(10, 5), tight_layout=True, sharex='all')
axs = sales.plot(ax=a, subplots=True, rot=90,color='grey',linewidth=1, linestyle='dashed')
axs[0].plot(sales[sales.index >= 2020].index,sales[sales.index >= 2020]['Paris'], color="g", linewidth=3, linestyle='dashed')
axs[1].plot(sales[sales.index >= 2020].index,sales[sales.index >= 2020]['Tokyo'], color="r", linewidth=3, linestyle='dashed')
plt.show()

adjust_text: set label distance to a line

I have the following dataframe:
d = {'a': [2, 3, 4.5], 'b': [3, 2, 5]}
df = pd.DataFrame(data=d, index=["val1", "val2","val3"])
df.head()
a b
val1 2.0 3
val2 3.0 2
val3 4.5 5
I plotted this dataframe with the following code:
fig, ax=plt.subplots(figsize=(10,10))
ax.scatter(df["a"], df["b"],s=1)
x1=[0, 2512]
y1=[0, 2512]
ax.plot(x1,y1, 'r-')
#set limits:
ax = plt.gca()
ax.set_xlim([0, 10])
ax.set_ylim([0, 10])
#add labels:
TEXTS = []
for idx, names in enumerate(df.index.values):
x, y = df["a"].iloc[idx], df["b"].iloc[idx]
TEXTS.append(ax.text(x, y, names, fontsize=12));
# Adjust text position and add lines
adjust_text(
TEXTS,
expand_points=(2.5, 2.5),
expand_text=(2.5,2),
autoalign="xy",
arrowprops=dict(arrowstyle="-", lw=1),
ax=ax
);
However, I can not find a way to push the labels away from the red diagonal line, in order to get this result:
You can use the regular matplotlib annotate function and change the direction of the offset depending on the position of the data point relative to the red line:
ax = df.plot.scatter('a', 'b')
ax.set_aspect(1)
ax.plot((0,10), (0,10), 'r-')
offset = np.array([-1, 1])
for s, xy in df.iterrows():
xy = xy.to_numpy()
direction = 1 if xy[1] > xy[0] else -1
ax.annotate(s, xy, xy + direction * offset, ha='center', va='center', arrowprops=dict(arrowstyle='-', lw=1))

Python Matplotlib bar chart with categories

I have data (duration of a certain activity) for two categories (Monday, Tuesday). I would like to generate a bar chart (see 1). Bars above a threshold (different for both categories) should have a different color; e.g. on Mondays data above 10 hours should be blue and on Tuesdays above 12 hours. Any ideas how I could implement this in seaborn or matplotlib?
Thank you very much.
Monday = [5,6,8,12,5,20,4, 8]
Tuesday=[3,5,8,12,4,17]
Goal
You could draw two barplots, using an array of booleans for the coloring (hue):
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
monday = np.array([5, 6, 8, 12, 5, 20, 4, 8])
tuesday = np.array([3, 5, 8, 12, 4, 17])
sns.set_style('whitegrid')
fig, (ax0, ax1) = plt.subplots(ncols=2, figsize=(10, 4), sharey=True)
palette = {False: 'skyblue', True: 'tomato'}
sns.barplot(x=np.arange(len(monday)), y=monday, hue=monday >= 10, palette=palette, dodge=False, ax=ax0)
ax0.set_xlabel('Monday', size=20)
ax0.set_xticks([])
ax0.legend_.remove()
sns.barplot(x=np.arange(len(tuesday)), y=tuesday, hue=tuesday >= 12, palette=palette, dodge=False, ax=ax1)
ax1.set_xlabel('Tuesday', size=20)
ax1.set_xticks([])
ax1.legend_.remove()
sns.despine()
plt.tight_layout()
plt.subplots_adjust(wspace=0)
plt.show()

Heatmap with multiple colormaps by column

I have a dataframe where each column contains values considered "normal" if they fall within an interval, which is different for every column:
# The main df
df = pd.DataFrame({"A": [20, 10, 7, 39],
"B": [1, 8, 12, 9],
"C": [780, 800, 1200, 250]})
The df_info represents the intervals for each column of df.
So for example df_info["A"][0] is the min for the column df["A"] and df_info["A"][1] represents the max for the column df["A"] and so on.
df_info = pd.DataFrame({"A": [22, 35],
"B": [5, 10],
"C": [850, 900]})
Thanks to this SO Answer I was able to create a custom heatmap to print in blue values below the range, in red value above the range and in white values within the range. Just remember each column has a different range. SO i normalized according to this:
df_norm = pd.DataFrame()
for col in df:
col_min = df_info[col][0]
col_max = df_info[col][1]
df_norm[col] = (df[col] - col_min) / (col_max - col_min)
And finally printed my heatmap
vmin = df_norm.min().min()
vmax = df_norm.max().max()
norm_zero = (0 - vmin) / (vmax - vmin)
norm_one = (1 - vmin) / (vmax - vmin)
colors = [[0, 'darkblue'],
[norm_zero, 'white'],
[norm_one, 'white'],
[1, 'darkred']
]
cmap = LinearSegmentedColormap.from_list('', colors, )
fig, ax = plt.subplots()
ax=sns.heatmap(data=data,
annot=True,
annot_kws={'size': 'large'},
mask=None,
cmap=cmap,
vmin=vmin,
vmax=vmax) \
.set_facecolor('white')
In the example you can see that the third column has values much higher/lower compared to the the 0-1 interval (and to the first column) so they "absorb" all the shades of red and blue.
QUESTION:
What I want to obtain is use the entire shades of red/blue for each column or at least to reduce the perceptual difference between (for example) the first and third column.
I had tough of:
create a custom colormap where each colormap normalization is performed by column
use multiple colormaps, each one applied to a different column
applying a colormap mpl.colors.LogNorm but I'm not sure how to use it with my custom LinearSegmentedColormap
Using a mask per column, you could draw the heatmap column per column, each with its own colormap:
import seaborn as sns
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.colors import LinearSegmentedColormap
from matplotlib.cm import ScalarMappable
df = pd.DataFrame({"A": [20, 10, 7, 39],
"B": [1, 8, 12, 9],
"C": [780, 800, 1200, 250]})
df_info = pd.DataFrame({"A": [22, 35],
"B": [5, 10],
"C": [850, 900]})
df_norm = pd.DataFrame()
for col in df:
col_min = df_info[col][0]
col_max = df_info[col][1]
df_norm[col] = (df[col] - col_min) / (col_max - col_min)
fig, ax = plt.subplots()
for col in df:
vmin = df_norm[col].min()
vmax = df_norm[col].max()
norm_zero = (0 - vmin) / (vmax - vmin)
norm_one = (1 - vmin) / (vmax - vmin)
colors = [[0, 'darkblue'],
[norm_zero, 'white'],
[norm_one, 'white'],
[1, 'darkred']]
cmap = LinearSegmentedColormap.from_list('', colors)
mask = df.copy()
for col_m in mask:
mask[col_m] = col != col_m
sns.heatmap(data=df_norm,
annot=df.to_numpy(), annot_kws={'size': 'large'}, fmt="g",
mask=mask,
cmap=cmap, vmin=vmin, vmax=vmax, cbar=False, ax=ax)
ax.set_facecolor('white')
colors = [[0, 'darkblue'],
[1 / 3, 'white'],
[2 / 3, 'white'],
[1, 'darkred']]
cmap = LinearSegmentedColormap.from_list('', colors)
cbar = plt.colorbar(ScalarMappable(cmap=cmap), ax=ax, ticks=[0, 1 / 3, 2 / 3, 1])
cbar.ax.yaxis.set_ticklabels(['min\nlimit', 'min', 'max', 'max\nlimit'])
plt.tight_layout()
plt.show()
You can re-scale your df_norm before plotting:
# alternative method to scale
df_norm = (df - df_info.iloc[0])/(df_info.iloc[1]-df_info.iloc[0])
# scale the norm
df_plot = (df_norm - df_norm.min())/(df_norm.max()-df_norm.min())
# heat map on the normalized `df_plot`
# use values in `df_norm` to annotate
# color bar doesn't make sense so we remove it
sns.heatmap(df_plot, annot=df_norm, cmap='RdBu_r', cbar=False))
Output:

Why does setting hue in seaborn plot change the size of a point?

The plot I am trying to make needs to achieve 3 things.
If a quiz is taken on the same day with the same score, that point needs to be bigger.
If two quiz scores overlap there needs to be some jitter so we can see all points.
Each quiz needs to have its own color
Here is how I am going about it.
import seaborn as sns
import pandas as pd
data = {'Quiz': [1, 1, 2, 1, 2, 1],
'Score': [7.5, 5.0, 10, 10, 10, 10],
'Day': [2, 5, 5, 5, 11, 11],
'Size': [115, 115, 115, 115, 115, 355]}
df = pd.DataFrame.from_dict(data)
sns.lmplot(x = 'Day', y='Score', data = df, fit_reg=False, x_jitter = True, scatter_kws={'s': df.Size})
plt.show()
Setting the hue, which almost does everything I need, results in this.
import seaborn as sns
import pandas as pd
data = {'Quiz': [1, 1, 2, 1, 2, 1],
'Score': [7.5, 5.0, 10, 10, 10, 10],
'Day': [2, 5, 5, 5, 11, 11],
'Size': [115, 115, 115, 115, 115, 355]}
df = pd.DataFrame.from_dict(data)
sns.lmplot(x = 'Day', y='Score', data = df, fit_reg=False, hue = 'Quiz', x_jitter = True, scatter_kws={'s': df.Size})
plt.show()
Is there a way I can have hue while keeping the size of my points?
It doesn't work because when you are using hue, seaborn does two separate scatterplots and therefore the size argument you are passing using scatter_kws= no longer aligns with the content of the dataframe.
You can recreate the same effect by hand however:
x_col = 'Day'
y_col = 'Score'
hue_col = 'Quiz'
size_col = 'Size'
jitter=0.2
fig, ax = plt.subplots()
for q,temp in df.groupby(hue_col):
n = len(temp[x_col])
x = temp[x_col]+np.random.normal(scale=0.2, size=(n,))
ax.scatter(x,temp[y_col],s=temp[size_col], label=q)
ax.set_xlabel(x_col)
ax.set_ylabel(y_col)
ax.legend(title=hue_col)

Categories

Resources