changing colors in barh(stacked) - python

I have one table with regions (y axes) and in x values. Each region has two bars.
I would like to change colors. I would like that each little bar in barh has a different color.
I have this code where colors repeat:
For example after pink I would like another color, not red again.It is possible to change the colors scale? using for example "tab10"
Furthemore, it is possible to get legend where each color outline one year (2010,2011,2012,2013,2014,2015,2016,2017,2018,2019)
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib
matplotlib.style.use('ggplot')
plotdata = pd.DataFrame({
"2010_y":[0.63,0.56,0.89,0.94,0.68,0.63,0.34,0.54,0.77,0.77,0.86,0.42,0.49,0.70,1.12,1.10,0.30,0.67,0.30,0.23],
"2011_y":[1.10,0.54,0.94,0.81,0.80,0.87,0.90,0.99,0.53,0.78,0.98,0.99,0.87,1.09,1.18,0.89,0.89,0.70,1.05,0.86],
"2012_y":[1.39,0.97,0.98,0.99,0.95,1.10,1.07,1.10,1.02,1.05,0.97,0.92,0.94,0.91,0.85,1.22,1.24,1.08,1.08,1.01],
"2013_m":[1.86,1.34,1.12,1.25,1.26,1.08,1.31,1.58,1.22,1.26,1.37,1.14,1.23,1.00,1.13,1.49,1.14,1.37,1.25,1.23],
"2014_m":[1.59,1.04,1.03,1.10,1.44,1.43,1.33,1.81,1.56,1.22,1.08,1.36,1.11,0.87,1.10,0.68,1.31,1.26,1.25,1.61],
"2015_m":[0.71,1.14,1.18,0.99,0.87,0.74,0.91,0.57,0.79,1.09,1.16,0.89,1.15,0.98,1.24,0.75,0.85,0.83,1.02,0.71],
"2016_m":[0.67,1.22,1.00,0.96,0.97,0.96,0.95,0.79,1.06,1.04,0.83,1.06,1.07,0.98,0.66,1.06,0.99,1.13,1.03,1.05],
"2017_m":[0.68,0.84,0.73,0.73,0.79,0.98,0.80,0.54,0.85,0.91,0.55,0.65,0.86,0.71,0.71,0.77,0.95,0.80,0.81,0.87],
"2018_m":[0.74,1.28,1.21,1.13,0.99,1.00,1.21,0.97,1.07,0.91,1.38,1.34,1.25,1.65,1.28,0.97,1.11,1.09,1.20,1.13],
"2019_m":[0.63,1.07,0.92,1.10,1.24,1.21,1.18,1.10,1.12,0.97,0.80,1.24,1.04,1.11,0.74,1.06,1.23,1.07,1.01,1.30]
}, index=["ABR", "BAS", "CAL", "CAM", "EMR","FVG","LAZ","LIG","LOM","MAR","MOL","PIE","PUG","SAR","SIC","TOS","TAA","UMB","VDA","VEN"]
)
plotdata3 = pd.DataFrame({
"2010_y":[4.12,1.44,5.73,3.91,3.43,0.00,4.26,0.00,1.95,2.65,0.00,4.82,3.61,2.17,3.05,2.66,0.00,2.86,3.00,1.15],
"2011_y":[0.00,0.39,0.00,0.00,0.00,1.32,0.00,0.00,1.02,0.00,2.72,0.00,0.00,0.00,0.83,0.00,0.00,0.00,0.00,0.00],
"2012_y":[0.08,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00],
"2013_m":[0.00,0.00,0.00,0.00,0.45,0.00,0.00,2.58,0.00,0.00,0.00,0.00,0.00,1.90,0.00,0.00,0.00,0.51,0.00,0.00],
"2014_m":[0.05,0.03,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.40,0.00,0.01,0.00,0.00,0.42,0.00,0.00,0.00,0.00,0.00],
"2015_m":[1.45,0.16,0.27,0.09,0.64,1.68,0.20,2.42,0.47,0.41,0.28,0.32,0.09,0.93,0.18,0.52,0.00,0.67,0.00,0.00],
"2016_m":[0.00,0.89,0.00,0.00,1.11,0.00,0.00,0.00,0.00,0.44,0.00,0.03,0.00,0.00,0.00,0.00,0.00,0.22,0.00,0.00],
"2017_m":[0.00,0.00,0.00,0.00,0.95,0.00,0.00,0.00,0.09,1.85,0.00,0.00,0.11,0.00,0.07,0.35,0.00,1.74,0.00,0.00],
"2018_m":[0.00,0.00,0.00,0.00,0.06,0.00,0.11,0.00,0.00,0.00,0.00,0.25,0.00,0.00,0.01,0.00,0.00,0.00,0.00,0.00],
"2019_m":[0.30,3.09,0.00,0.00,0.34,0.00,0.42,0.00,1.48,0.25,0.00,0.58,0.19,0.00,2.44,0.46,0.00,0.00,0.00,0.85]
}, index=["ABR", "BAS", "CAL", "CAM", "EMR","FVG","LAZ","LIG","LOM","MAR","MOL","PIE","PUG","SAR","SIC","TOS","TAA","UMB","VDA","VEN"]
)
fig, ax = plt.subplots()
#stacked_data = plotdata.apply(lambda x: x*100/sum(x), axis=1)
#stacked_data2 = plotdata2.apply(lambda x: x*100/sum(x), axis=1)
#stacked_data3 = plotdata3.apply(lambda x: x*100/sum(x), axis=1)
stacked_data.plot(kind="barh", stacked=True, width=0.4,
ax=ax, position=0, edgecolor='black')
#stacked_data2.plot(kind="barh", stacked=True, width=0.25,
# ax=ay, position=1, hatch='//',edgecolor='black')
stacked_data3.plot(kind="barh", stacked=True, width=0.4,
ax=ax, position=1,edgecolor='black')
ax.get_legend().remove()
ax.set_ylim(top=len(stacked_data)-0.1)
#ax.set_xlim(right=len(stacked_data)-0.5)
ax.set_facecolor('xkcd:white')
# displaying the title
plt.title("titla")
# set various colors
ax.spines['bottom'].set_color('black')
ax.spines['top'].set_color('black')
ax.spines['right'].set_color('black')
ax.spines['left'].set_color('black')

You can specify your colormap when plotting. Also use different colormaps for your barplots if you like, for example:
stacked_data3.plot(kind="barh", stacked=True, width=0.4,
ax=ax, position=1,edgecolor='black', cmap='Accent')
A list of available colormaps is available on matplotlib.org

Related

Pie chart enclosed with a black line (rectangle)

Below you can see my data and facet plot in matplotlib.
import pandas as pd
import numpy as np
pd.set_option('max_columns', None)
import matplotlib.pyplot as plt
import matplotlib as mpl
# Data
data = {
'type_sale': ['g_1','g_2','g_3','g_4','g_5','g_6','g_7','g_8','g_9','g_10'],
'open':[70,20,24,150,80,90,60,90,20,20],
'closed':[30,14,20,10,20,40,10,10,10,10],
}
df = pd.DataFrame(data, columns = ['type_sale',
'open',
'closed',
])
data1 = {
'type_sale': [ 'open','closed'],
'structure':[70,30],
}
df1 = pd.DataFrame(data1, columns = ['type_sale',
'structure',
])
# Ploting
labels = ['open','closed']
fig, axs = plt.subplots(2,2, figsize=(10,8))
plt.subplots_adjust(wspace=0.2, hspace=0.6)
df1.plot(x='type_sale', y='structure',labels=labels,autopct='%1.1f%%',kind='pie', title='Stacked Bar Graph by dataframe',ax=axs[0,0])
df.plot(x='type_sale', kind='bar', stacked=True, title='Stacked Bar Graph by dataframe', ax=axs[0,1])
df.plot(x='type_sale', kind='bar', stacked=True, title='Stacked Bar Graph by dataframe',ax=axs[1,0])
df.plot(x='type_sale', kind='bar', stacked=True,title='Stacked Bar Graph by dataframe', ax=axs[1,1])
plt.suptitle(t='Stacked Bar Graph by dataframe', fontsize=16)
plt.show()
If you compare the first pie plot with others, you can spot a big difference. Namely, the first pie plot is not enclosed with a black line (rectangle), while the other is enclosed.
So can anybody help me with how to solve this problem?
After playing around myself, it seems that this is working, but I think the pie gets stretched, which doesn't look that good.
EDIT
found a better solution with set_adjustable
also two options how you create the piechart, the frame and ticks differ in a bit.
# 1
axs[0,0].pie(df1['structure'],labels=labels,autopct='%1.1f%%',frame=True,radius=10)
axs[0,0].set_title('Stacked Bar Graph by dataframe')
# 2
df1.plot(x='type_sale', y='structure',labels=labels,autopct='%1.1f%%',kind='pie', title='Stacked Bar Graph by dataframe',ax=axs[0,0])
axs[0,0].set_frame_on(True)
axs[0,0].set_adjustable('datalim')

Legends disappear when {"hist":False} in seaborn distplot

I have the following function:
Say hue="animals have three categories dog,bird,horse and we have two dataframes df_m and df_f consisting of data of male animals and women animals only, respectively.
The function plots three distplot of y (e.g y="weight") one for each hue={dog,bird,horse}. In each subplot we plot df_m[y] and df_f[y] such that I can compare the weight of male dogs/female dogs, male birds/female birds, male horses/female horses.
If I set distkwargs={"hist":False} when calling the function the legends ["F","M"] disappears, for some reason. Having distkwargs={"hist":True}` shows the legends
def plot_multi_kde_cat(self,dfs,y,hue,subkwargs={},distkwargs={},legends=[]):
"""
Create a subplot multi_kde with categories in the same plot
dfs: List
- DataFrames for each category e.g one for male and one for females
hue: string
- column for which each category is plotted (in each subplot)
"""
hues = dfs[0][hue].cat.categories
if len(hues)==2: #Only two categories
fig,axes = plt.subplots(1,2,**subkwargs) #Get axes and flatten them
axes=axes.flatten()
for ax,hu in zip(axes,hues):
for df in dfs:
sns.distplot(df.loc[df[hue]==hu,y],ax=ax,**distkwargs)
ax.set_title(f"Segment: {hu}")
ax.legend(legends)
else: #More than two categories: create a square grid and remove unsused axes
n_rows = int(np.ceil(np.sqrt(len(hues)))) #number of rows
fig,axes = plt.subplots(n_rows,n_rows,**subkwargs)
axes = axes.flatten()
for ax,hu in zip(axes,hues):
for df in dfs:
sns.distplot(df.loc[df[hue]==hu,y],ax=ax,**distkwargs)
ax.set_title(f"Segment: {hu}")
ax.legend(legends)
n_remove = len(axes)-len(hues) #number of axes to remove
if n_remove>0:
for ax in axes[-n_remove:]:
ax.set_visible(False)
fig.tight_layout()
return fig,axes
You can work around the problem by explicitly providing the label to the distplot. This forces a legend entry for each distplot. ax.legend() then already gets the correct labels.
Here is some minimal sample code to illustrate how everything works together:
from matplotlib import pyplot as plt
import pandas as pd
import seaborn as sns
import numpy as np
def plot_multi_kde_cat(dfs, y, hue, subkwargs={}, distkwargs={}, legends=[]):
hues = np.unique(dfs[0][hue])
fig, axes = plt.subplots(1, len(hues), **subkwargs)
axes = axes.flatten()
for ax, hu in zip(axes, hues):
for df, legend_label in zip(dfs, legends):
sns.distplot(df.loc[df[hue] == hu, y], ax=ax, label=legend_label, **distkwargs)
ax.set_title(f"Segment: {hu}")
ax.legend()
N = 20
df_m = pd.DataFrame({'animal': np.random.choice(['tiger', 'horse'], N), 'weight': np.random.uniform(100, 200, N)})
df_f = pd.DataFrame({'animal': np.random.choice(['tiger', 'horse'], N), 'weight': np.random.uniform(80, 160, N)})
plot_multi_kde_cat([df_m, df_f], 'weight', 'animal',
subkwargs={}, distkwargs={'hist': False}, legends=['male', 'female'])
plt.show()

half (not split!) violin plots in seaborn

Currently seaborn offers functionality for split violinplots by setting split=True, according to a hue variable. I would like to make a 'half' violin plot, i.e. a plot where half of each violin is omitted. Such a plot depicts something similar to a pdf for each continuous variable, plotted on one side of each vertical line of each categorical variable only.
I have managed to trick seaborn to plot this with an extra data point outside the plotted range of values and an extra dummy hue, but I would like to know if this can be done without actually altering the dataset, e.g. within sns.violinplot() arguments.
For instance, this graph:
Was created by this snippet:
# imports
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
# load dataset from seaborn
datalist = sns.get_dataset_names()
dataset_name = 'iris'
if dataset_name in datalist:
df = sns.load_dataset(dataset_name)
else:
print("Dataset with name: " + dataset_name + " was not found in the available datasets online by seaborn.")
# prepare data
df2 = df.append([-999,-999,-999,-999,'setosa'])
df2['huecol'] = 0.0
df2['huecol'].iloc[-1]= -999
# plot
fig = plt.figure(figsize=(6,6))
sns.violinplot(x='species',y="sepal_width",
split=True, hue ='huecol', inner = 'quartile',
palette="pastel", data=df2, legend=False)
plt.title('iris')
# remove hue legend
leg = plt.gca().legend()
leg.remove()
plt.ylim([1,5.0])
plt.show()
I was looking for a solution similar to this but did not find anything satisfactory. I ended up calling seaborn.kdeplot multiple times as violinplot is essentially a one-sided kernel density plot.
Example
Function definition for categorical_kde_plot below
categorical_kde_plot(
df,
variable="tip",
category="day",
category_order=["Thur", "Fri", "Sat", "Sun"],
horizontal=False,
)
with horizontal=True, the output would look like:
Code
import seaborn as sns
from matplotlib import pyplot as plt
def categorical_kde_plot(
df,
variable,
category,
category_order=None,
horizontal=False,
rug=True,
figsize=None,
):
"""Draw a categorical KDE plot
Parameters
----------
df: pd.DataFrame
The data to plot
variable: str
The column in the `df` to plot (continuous variable)
category: str
The column in the `df` to use for grouping (categorical variable)
horizontal: bool
If True, draw density plots horizontally. Otherwise, draw them
vertically.
rug: bool
If True, add also a sns.rugplot.
figsize: tuple or None
If None, use default figsize of (7, 1*len(categories))
If tuple, use that figsize. Given to plt.subplots as an argument.
"""
if category_order is None:
categories = list(df[category].unique())
else:
categories = category_order[:]
figsize = (7, 1.0 * len(categories))
fig, axes = plt.subplots(
nrows=len(categories) if horizontal else 1,
ncols=1 if horizontal else len(categories),
figsize=figsize[::-1] if not horizontal else figsize,
sharex=horizontal,
sharey=not horizontal,
)
for i, (cat, ax) in enumerate(zip(categories, axes)):
sns.kdeplot(
data=df[df[category] == cat],
x=variable if horizontal else None,
y=None if horizontal else variable,
# kde kwargs
bw_adjust=0.5,
clip_on=False,
fill=True,
alpha=1,
linewidth=1.5,
ax=ax,
color="lightslategray",
)
keep_variable_axis = (i == len(fig.axes) - 1) if horizontal else (i == 0)
if rug:
sns.rugplot(
data=df[df[category] == cat],
x=variable if horizontal else None,
y=None if horizontal else variable,
ax=ax,
color="black",
height=0.025 if keep_variable_axis else 0.04,
)
_format_axis(
ax,
cat,
horizontal,
keep_variable_axis=keep_variable_axis,
)
plt.tight_layout()
plt.show()
def _format_axis(ax, category, horizontal=False, keep_variable_axis=True):
# Remove the axis lines
ax.spines["top"].set_visible(False)
ax.spines["right"].set_visible(False)
if horizontal:
ax.set_ylabel(None)
lim = ax.get_ylim()
ax.set_yticks([(lim[0] + lim[1]) / 2])
ax.set_yticklabels([category])
if not keep_variable_axis:
ax.get_xaxis().set_visible(False)
ax.spines["bottom"].set_visible(False)
else:
ax.set_xlabel(None)
lim = ax.get_xlim()
ax.set_xticks([(lim[0] + lim[1]) / 2])
ax.set_xticklabels([category])
if not keep_variable_axis:
ax.get_yaxis().set_visible(False)
ax.spines["left"].set_visible(False)
if __name__ == "__main__":
df = sns.load_dataset("tips")
categorical_kde_plot(
df,
variable="tip",
category="day",
category_order=["Thur", "Fri", "Sat", "Sun"],
horizontal=True,
)
The answer is simply, no, it's not possible with seaborn without tricking it into thinking there is a hue present.
This answer shows how to do it in matplotlib and in principle the same can be applied to seaborn violinplots as well, namely to cut out half of the violin path.
It's not necessary to modify the data:
ax = sns.violinplot(
data=tips,
x="day", y="total_bill", hue=True,
hue_order=[True, False], split=True,
)
ax.legend_ = None

Matplotlib Different Scaled Y-Axes

I have a dataframe with the data below.
ex_dict = {'revenue': [613663, 1693667, 2145183, 2045065, 2036406,
1708862, 1068232, 1196899, 2185852, 2165778, 2144738, 2030337,
1784067],
'abs_percent_diff': [0.22279211315310588, 0.13248909660765254,
0.12044821447874667, 0.09438674840975962, 0.1193588387687364,
0.062100921139322744, 0.05875297161175445, 0.06240362963749895,
0.05085338590212515, 0.034877614941165744, 0.012263947005671703,
0.029227374323993634, 0.023411816504907524],
'ds': [dt.date(2017,1,1), dt.date(2017,1,2), dt.date(2017,1,3),
dt.date(2017,1,4), dt.date(2017,1,5), dt.date(2017,1,6),
dt.date(2017,1,7), dt.date(2017,1,8), dt.date(2017,1,9),
dt.date(2017,1,10), dt.date(2017,1,11), dt.date(2017,1,12),
dt.date(2017,1,13)],
'yhat_normal': [501853.9074623253, 1952329.3521464923, 1914575.7673396615,
1868685.8215084015, 1819261.1068672044, 1608945.031482406,
1008953.0123101478, 1126595.36037955, 2302965.598289115,
2244044.9351591542, 2171367.536396199, 2091465.0313570146,
1826836.562382966]}
df_vis=pd.DataFrame.from_dict(ex_dict)
I want to graph yhat_normal and revenue on the same y-axis and abs_percent_diff on a y-axis with a different scale.
df_vis = df_vis.set_index('ds')
df_vis[['rev', 'yhat_normal']].plot(figsize=(20, 12))
I can easily graph rev and yhat_normal with the code above, but I am struggling to get abs_percent_diff on a different y-axis scale. I tried converting my columns to numpy arrays and doing this, but it looks terrible.
npdate = df_vis.as_matrix(columns= ['ds'])
nppredictions = df_vis.as_matrix(columns= ['yhat_normal'])
npactuals = df_vis.as_matrix(columns= ['rev'])
npmape = df_vis.as_matrix(columns=['abs_percent_diff'])
fig, ax1 = plt.subplots()
ax2 = ax1.twinx()
fig.set_size_inches(20,10)
ax1.plot_date(npdate, nppredictions, ls= '-', color= 'b')
ax1.plot_date(npdate, npactuals, ls='-', color='g')
ax2.plot_date(npdate, npmape, 'r-')
ax1.set_xlabel('X data')
ax1.set_ylabel('Y1 data', color='g')
ax2.set_ylabel('Y2 data', color='b')
plt.show()
This is what I want. Where the red line is the abs_percent_diff. Obviously, I drew the line by hand so it is not accurate.
I'm not sure if I got the problem correclty, but it seems you simply want to draw one of the dataframe columns at the bottom of the plot area.
import pandas as pd
import datetime as dt
import matplotlib.pyplot as plt
ex_dict = {'revenue': [613663, 1693667, 2145183, 2045065, 2036406,
1708862, 1068232, 1196899, 2185852, 2165778, 2144738, 2030337,
1784067],
'abs_percent_diff': [0.22279211315310588, 0.13248909660765254,
0.12044821447874667, 0.09438674840975962, 0.1193588387687364,
0.062100921139322744, 0.05875297161175445, 0.06240362963749895,
0.05085338590212515, 0.034877614941165744, 0.012263947005671703,
0.029227374323993634, 0.023411816504907524],
'ds': [dt.date(2017,1,1), dt.date(2017,1,2), dt.date(2017,1,3),
dt.date(2017,1,4), dt.date(2017,1,5), dt.date(2017,1,6),
dt.date(2017,1,7), dt.date(2017,1,8), dt.date(2017,1,9),
dt.date(2017,1,10), dt.date(2017,1,11), dt.date(2017,1,12),
dt.date(2017,1,13)],
'yhat_normal': [501853.9074623253, 1952329.3521464923, 1914575.7673396615,
1868685.8215084015, 1819261.1068672044, 1608945.031482406,
1008953.0123101478, 1126595.36037955, 2302965.598289115,
2244044.9351591542, 2171367.536396199, 2091465.0313570146,
1826836.562382966]}
df_vis=pd.DataFrame.from_dict(ex_dict)
df_vis = df_vis.set_index('ds')
ax = df_vis[['revenue','yhat_normal']].plot(figsize=(13, 8))
ax2 = df_vis['abs_percent_diff'].plot(secondary_y=True, ax=ax)
ax2.set_ylim(0,1)
plt.show()

Custom legend for Seaborn regplot (Python 3)

I've been trying to follow this How to make custom legend in matplotlib SO question but I think a few things are getting lost in translation. I used a custom color mapping for the different classes of points in my plot and I want to be able to put a table with those color-label pairs. I stored the info in a dictionary D_color_label and then made 2 parallel lists colors and labels. I tried using it in the ax.legend but it didn't seem to work.
np.random.seed(0)
# Create dataframe
DF_0 = pd.DataFrame(np.random.random((100,2)), columns=["x","y"])
# Label to colors
D_idx_color = {**dict(zip(range(0,25), ["#91FF61"]*25)),
**dict(zip(range(25,50), ["#BA61FF"]*25)),
**dict(zip(range(50,75), ["#916F61"]*25)),
**dict(zip(range(75,100), ["#BAF1FF"]*25))}
D_color_label = {"#91FF61":"label_0",
"#BA61FF":"label_1",
"#916F61":"label_2",
"#BAF1FF":"label_3"}
# Add color column
DF_0["color"] = pd.Series(list(D_idx_color.values()), index=list(D_idx_color.keys()))
# Plot
fig, ax = plt.subplots(figsize=(8,8))
sns.regplot(data=DF_0, x="x", y="y", scatter_kws={"c":DF_0["color"]}, ax=ax)
# Add custom legend
colors = list(set(DF_0["color"]))
labels = [D_color_label[x] for x in set(DF_0["color"])]
# If I do this, I get the following error:
# ax.legend(colors, labels)
# UserWarning: Legend does not support '#BA61FF' instances.
# A proxy artist may be used instead.
According to http://matplotlib.org/users/legend_guide.html you have to put to legend function artists which will be labeled. To use scatter_plot individually you have to group by your data by color and plot every data of one color individually to set its own label for every artist:
import pandas as pd
import numpy as np
import matplotlib.pylab as plt
import seaborn as sns
np.random.seed(0)
# Create dataframe
DF_0 = pd.DataFrame(np.random.random((100, 2)), columns=["x", "y"])
DF_0['color'] = ["#91FF61"]*25 + ["#BA61FF"]*25 + ["#91FF61"]*25 + ["#BA61FF"]*25
#print DF_0
D_color_label = {"#91FF61": "label_0", "#BA61FF": "label_1",
"#916F61": "label_2", "#BAF1FF": "label_3"}
colors = list(DF_0["color"].uniqe())
labels = [D_color_label[x] for x in DF_0["color"].unique()]
ax = sns.regplot(data=DF_0, x="x", y="y", scatter_kws={'c': DF_0['color'], 'zorder':1})
# Make a legend
# groupby and plot points of one color
for i, grp in DF_0.groupby(['color']):
grp.plot(kind='scatter', x='x', y='y', c=i, ax=ax, label=labels[i+1], zorder=0)
ax.legend(loc=2)
plt.show()

Categories

Resources