changing colors in barh(stacked) - python
I have one table with regions (y axes) and in x values. Each region has two bars.
I would like to change colors. I would like that each little bar in barh has a different color.
I have this code where colors repeat:
For example after pink I would like another color, not red again.It is possible to change the colors scale? using for example "tab10"
Furthemore, it is possible to get legend where each color outline one year (2010,2011,2012,2013,2014,2015,2016,2017,2018,2019)
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib
matplotlib.style.use('ggplot')
plotdata = pd.DataFrame({
"2010_y":[0.63,0.56,0.89,0.94,0.68,0.63,0.34,0.54,0.77,0.77,0.86,0.42,0.49,0.70,1.12,1.10,0.30,0.67,0.30,0.23],
"2011_y":[1.10,0.54,0.94,0.81,0.80,0.87,0.90,0.99,0.53,0.78,0.98,0.99,0.87,1.09,1.18,0.89,0.89,0.70,1.05,0.86],
"2012_y":[1.39,0.97,0.98,0.99,0.95,1.10,1.07,1.10,1.02,1.05,0.97,0.92,0.94,0.91,0.85,1.22,1.24,1.08,1.08,1.01],
"2013_m":[1.86,1.34,1.12,1.25,1.26,1.08,1.31,1.58,1.22,1.26,1.37,1.14,1.23,1.00,1.13,1.49,1.14,1.37,1.25,1.23],
"2014_m":[1.59,1.04,1.03,1.10,1.44,1.43,1.33,1.81,1.56,1.22,1.08,1.36,1.11,0.87,1.10,0.68,1.31,1.26,1.25,1.61],
"2015_m":[0.71,1.14,1.18,0.99,0.87,0.74,0.91,0.57,0.79,1.09,1.16,0.89,1.15,0.98,1.24,0.75,0.85,0.83,1.02,0.71],
"2016_m":[0.67,1.22,1.00,0.96,0.97,0.96,0.95,0.79,1.06,1.04,0.83,1.06,1.07,0.98,0.66,1.06,0.99,1.13,1.03,1.05],
"2017_m":[0.68,0.84,0.73,0.73,0.79,0.98,0.80,0.54,0.85,0.91,0.55,0.65,0.86,0.71,0.71,0.77,0.95,0.80,0.81,0.87],
"2018_m":[0.74,1.28,1.21,1.13,0.99,1.00,1.21,0.97,1.07,0.91,1.38,1.34,1.25,1.65,1.28,0.97,1.11,1.09,1.20,1.13],
"2019_m":[0.63,1.07,0.92,1.10,1.24,1.21,1.18,1.10,1.12,0.97,0.80,1.24,1.04,1.11,0.74,1.06,1.23,1.07,1.01,1.30]
}, index=["ABR", "BAS", "CAL", "CAM", "EMR","FVG","LAZ","LIG","LOM","MAR","MOL","PIE","PUG","SAR","SIC","TOS","TAA","UMB","VDA","VEN"]
)
plotdata3 = pd.DataFrame({
"2010_y":[4.12,1.44,5.73,3.91,3.43,0.00,4.26,0.00,1.95,2.65,0.00,4.82,3.61,2.17,3.05,2.66,0.00,2.86,3.00,1.15],
"2011_y":[0.00,0.39,0.00,0.00,0.00,1.32,0.00,0.00,1.02,0.00,2.72,0.00,0.00,0.00,0.83,0.00,0.00,0.00,0.00,0.00],
"2012_y":[0.08,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00],
"2013_m":[0.00,0.00,0.00,0.00,0.45,0.00,0.00,2.58,0.00,0.00,0.00,0.00,0.00,1.90,0.00,0.00,0.00,0.51,0.00,0.00],
"2014_m":[0.05,0.03,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.40,0.00,0.01,0.00,0.00,0.42,0.00,0.00,0.00,0.00,0.00],
"2015_m":[1.45,0.16,0.27,0.09,0.64,1.68,0.20,2.42,0.47,0.41,0.28,0.32,0.09,0.93,0.18,0.52,0.00,0.67,0.00,0.00],
"2016_m":[0.00,0.89,0.00,0.00,1.11,0.00,0.00,0.00,0.00,0.44,0.00,0.03,0.00,0.00,0.00,0.00,0.00,0.22,0.00,0.00],
"2017_m":[0.00,0.00,0.00,0.00,0.95,0.00,0.00,0.00,0.09,1.85,0.00,0.00,0.11,0.00,0.07,0.35,0.00,1.74,0.00,0.00],
"2018_m":[0.00,0.00,0.00,0.00,0.06,0.00,0.11,0.00,0.00,0.00,0.00,0.25,0.00,0.00,0.01,0.00,0.00,0.00,0.00,0.00],
"2019_m":[0.30,3.09,0.00,0.00,0.34,0.00,0.42,0.00,1.48,0.25,0.00,0.58,0.19,0.00,2.44,0.46,0.00,0.00,0.00,0.85]
}, index=["ABR", "BAS", "CAL", "CAM", "EMR","FVG","LAZ","LIG","LOM","MAR","MOL","PIE","PUG","SAR","SIC","TOS","TAA","UMB","VDA","VEN"]
)
fig, ax = plt.subplots()
#stacked_data = plotdata.apply(lambda x: x*100/sum(x), axis=1)
#stacked_data2 = plotdata2.apply(lambda x: x*100/sum(x), axis=1)
#stacked_data3 = plotdata3.apply(lambda x: x*100/sum(x), axis=1)
stacked_data.plot(kind="barh", stacked=True, width=0.4,
ax=ax, position=0, edgecolor='black')
#stacked_data2.plot(kind="barh", stacked=True, width=0.25,
# ax=ay, position=1, hatch='//',edgecolor='black')
stacked_data3.plot(kind="barh", stacked=True, width=0.4,
ax=ax, position=1,edgecolor='black')
ax.get_legend().remove()
ax.set_ylim(top=len(stacked_data)-0.1)
#ax.set_xlim(right=len(stacked_data)-0.5)
ax.set_facecolor('xkcd:white')
# displaying the title
plt.title("titla")
# set various colors
ax.spines['bottom'].set_color('black')
ax.spines['top'].set_color('black')
ax.spines['right'].set_color('black')
ax.spines['left'].set_color('black')
You can specify your colormap when plotting. Also use different colormaps for your barplots if you like, for example:
stacked_data3.plot(kind="barh", stacked=True, width=0.4,
ax=ax, position=1,edgecolor='black', cmap='Accent')
A list of available colormaps is available on matplotlib.org
Related
Pie chart enclosed with a black line (rectangle)
Below you can see my data and facet plot in matplotlib. import pandas as pd import numpy as np pd.set_option('max_columns', None) import matplotlib.pyplot as plt import matplotlib as mpl # Data data = { 'type_sale': ['g_1','g_2','g_3','g_4','g_5','g_6','g_7','g_8','g_9','g_10'], 'open':[70,20,24,150,80,90,60,90,20,20], 'closed':[30,14,20,10,20,40,10,10,10,10], } df = pd.DataFrame(data, columns = ['type_sale', 'open', 'closed', ]) data1 = { 'type_sale': [ 'open','closed'], 'structure':[70,30], } df1 = pd.DataFrame(data1, columns = ['type_sale', 'structure', ]) # Ploting labels = ['open','closed'] fig, axs = plt.subplots(2,2, figsize=(10,8)) plt.subplots_adjust(wspace=0.2, hspace=0.6) df1.plot(x='type_sale', y='structure',labels=labels,autopct='%1.1f%%',kind='pie', title='Stacked Bar Graph by dataframe',ax=axs[0,0]) df.plot(x='type_sale', kind='bar', stacked=True, title='Stacked Bar Graph by dataframe', ax=axs[0,1]) df.plot(x='type_sale', kind='bar', stacked=True, title='Stacked Bar Graph by dataframe',ax=axs[1,0]) df.plot(x='type_sale', kind='bar', stacked=True,title='Stacked Bar Graph by dataframe', ax=axs[1,1]) plt.suptitle(t='Stacked Bar Graph by dataframe', fontsize=16) plt.show() If you compare the first pie plot with others, you can spot a big difference. Namely, the first pie plot is not enclosed with a black line (rectangle), while the other is enclosed. So can anybody help me with how to solve this problem?
After playing around myself, it seems that this is working, but I think the pie gets stretched, which doesn't look that good. EDIT found a better solution with set_adjustable also two options how you create the piechart, the frame and ticks differ in a bit. # 1 axs[0,0].pie(df1['structure'],labels=labels,autopct='%1.1f%%',frame=True,radius=10) axs[0,0].set_title('Stacked Bar Graph by dataframe') # 2 df1.plot(x='type_sale', y='structure',labels=labels,autopct='%1.1f%%',kind='pie', title='Stacked Bar Graph by dataframe',ax=axs[0,0]) axs[0,0].set_frame_on(True) axs[0,0].set_adjustable('datalim')
Legends disappear when {"hist":False} in seaborn distplot
I have the following function: Say hue="animals have three categories dog,bird,horse and we have two dataframes df_m and df_f consisting of data of male animals and women animals only, respectively. The function plots three distplot of y (e.g y="weight") one for each hue={dog,bird,horse}. In each subplot we plot df_m[y] and df_f[y] such that I can compare the weight of male dogs/female dogs, male birds/female birds, male horses/female horses. If I set distkwargs={"hist":False} when calling the function the legends ["F","M"] disappears, for some reason. Having distkwargs={"hist":True}` shows the legends def plot_multi_kde_cat(self,dfs,y,hue,subkwargs={},distkwargs={},legends=[]): """ Create a subplot multi_kde with categories in the same plot dfs: List - DataFrames for each category e.g one for male and one for females hue: string - column for which each category is plotted (in each subplot) """ hues = dfs[0][hue].cat.categories if len(hues)==2: #Only two categories fig,axes = plt.subplots(1,2,**subkwargs) #Get axes and flatten them axes=axes.flatten() for ax,hu in zip(axes,hues): for df in dfs: sns.distplot(df.loc[df[hue]==hu,y],ax=ax,**distkwargs) ax.set_title(f"Segment: {hu}") ax.legend(legends) else: #More than two categories: create a square grid and remove unsused axes n_rows = int(np.ceil(np.sqrt(len(hues)))) #number of rows fig,axes = plt.subplots(n_rows,n_rows,**subkwargs) axes = axes.flatten() for ax,hu in zip(axes,hues): for df in dfs: sns.distplot(df.loc[df[hue]==hu,y],ax=ax,**distkwargs) ax.set_title(f"Segment: {hu}") ax.legend(legends) n_remove = len(axes)-len(hues) #number of axes to remove if n_remove>0: for ax in axes[-n_remove:]: ax.set_visible(False) fig.tight_layout() return fig,axes
You can work around the problem by explicitly providing the label to the distplot. This forces a legend entry for each distplot. ax.legend() then already gets the correct labels. Here is some minimal sample code to illustrate how everything works together: from matplotlib import pyplot as plt import pandas as pd import seaborn as sns import numpy as np def plot_multi_kde_cat(dfs, y, hue, subkwargs={}, distkwargs={}, legends=[]): hues = np.unique(dfs[0][hue]) fig, axes = plt.subplots(1, len(hues), **subkwargs) axes = axes.flatten() for ax, hu in zip(axes, hues): for df, legend_label in zip(dfs, legends): sns.distplot(df.loc[df[hue] == hu, y], ax=ax, label=legend_label, **distkwargs) ax.set_title(f"Segment: {hu}") ax.legend() N = 20 df_m = pd.DataFrame({'animal': np.random.choice(['tiger', 'horse'], N), 'weight': np.random.uniform(100, 200, N)}) df_f = pd.DataFrame({'animal': np.random.choice(['tiger', 'horse'], N), 'weight': np.random.uniform(80, 160, N)}) plot_multi_kde_cat([df_m, df_f], 'weight', 'animal', subkwargs={}, distkwargs={'hist': False}, legends=['male', 'female']) plt.show()
half (not split!) violin plots in seaborn
Currently seaborn offers functionality for split violinplots by setting split=True, according to a hue variable. I would like to make a 'half' violin plot, i.e. a plot where half of each violin is omitted. Such a plot depicts something similar to a pdf for each continuous variable, plotted on one side of each vertical line of each categorical variable only. I have managed to trick seaborn to plot this with an extra data point outside the plotted range of values and an extra dummy hue, but I would like to know if this can be done without actually altering the dataset, e.g. within sns.violinplot() arguments. For instance, this graph: Was created by this snippet: # imports import pandas as pd import seaborn as sns import matplotlib.pyplot as plt # load dataset from seaborn datalist = sns.get_dataset_names() dataset_name = 'iris' if dataset_name in datalist: df = sns.load_dataset(dataset_name) else: print("Dataset with name: " + dataset_name + " was not found in the available datasets online by seaborn.") # prepare data df2 = df.append([-999,-999,-999,-999,'setosa']) df2['huecol'] = 0.0 df2['huecol'].iloc[-1]= -999 # plot fig = plt.figure(figsize=(6,6)) sns.violinplot(x='species',y="sepal_width", split=True, hue ='huecol', inner = 'quartile', palette="pastel", data=df2, legend=False) plt.title('iris') # remove hue legend leg = plt.gca().legend() leg.remove() plt.ylim([1,5.0]) plt.show()
I was looking for a solution similar to this but did not find anything satisfactory. I ended up calling seaborn.kdeplot multiple times as violinplot is essentially a one-sided kernel density plot. Example Function definition for categorical_kde_plot below categorical_kde_plot( df, variable="tip", category="day", category_order=["Thur", "Fri", "Sat", "Sun"], horizontal=False, ) with horizontal=True, the output would look like: Code import seaborn as sns from matplotlib import pyplot as plt def categorical_kde_plot( df, variable, category, category_order=None, horizontal=False, rug=True, figsize=None, ): """Draw a categorical KDE plot Parameters ---------- df: pd.DataFrame The data to plot variable: str The column in the `df` to plot (continuous variable) category: str The column in the `df` to use for grouping (categorical variable) horizontal: bool If True, draw density plots horizontally. Otherwise, draw them vertically. rug: bool If True, add also a sns.rugplot. figsize: tuple or None If None, use default figsize of (7, 1*len(categories)) If tuple, use that figsize. Given to plt.subplots as an argument. """ if category_order is None: categories = list(df[category].unique()) else: categories = category_order[:] figsize = (7, 1.0 * len(categories)) fig, axes = plt.subplots( nrows=len(categories) if horizontal else 1, ncols=1 if horizontal else len(categories), figsize=figsize[::-1] if not horizontal else figsize, sharex=horizontal, sharey=not horizontal, ) for i, (cat, ax) in enumerate(zip(categories, axes)): sns.kdeplot( data=df[df[category] == cat], x=variable if horizontal else None, y=None if horizontal else variable, # kde kwargs bw_adjust=0.5, clip_on=False, fill=True, alpha=1, linewidth=1.5, ax=ax, color="lightslategray", ) keep_variable_axis = (i == len(fig.axes) - 1) if horizontal else (i == 0) if rug: sns.rugplot( data=df[df[category] == cat], x=variable if horizontal else None, y=None if horizontal else variable, ax=ax, color="black", height=0.025 if keep_variable_axis else 0.04, ) _format_axis( ax, cat, horizontal, keep_variable_axis=keep_variable_axis, ) plt.tight_layout() plt.show() def _format_axis(ax, category, horizontal=False, keep_variable_axis=True): # Remove the axis lines ax.spines["top"].set_visible(False) ax.spines["right"].set_visible(False) if horizontal: ax.set_ylabel(None) lim = ax.get_ylim() ax.set_yticks([(lim[0] + lim[1]) / 2]) ax.set_yticklabels([category]) if not keep_variable_axis: ax.get_xaxis().set_visible(False) ax.spines["bottom"].set_visible(False) else: ax.set_xlabel(None) lim = ax.get_xlim() ax.set_xticks([(lim[0] + lim[1]) / 2]) ax.set_xticklabels([category]) if not keep_variable_axis: ax.get_yaxis().set_visible(False) ax.spines["left"].set_visible(False) if __name__ == "__main__": df = sns.load_dataset("tips") categorical_kde_plot( df, variable="tip", category="day", category_order=["Thur", "Fri", "Sat", "Sun"], horizontal=True, )
The answer is simply, no, it's not possible with seaborn without tricking it into thinking there is a hue present. This answer shows how to do it in matplotlib and in principle the same can be applied to seaborn violinplots as well, namely to cut out half of the violin path.
It's not necessary to modify the data: ax = sns.violinplot( data=tips, x="day", y="total_bill", hue=True, hue_order=[True, False], split=True, ) ax.legend_ = None
Matplotlib Different Scaled Y-Axes
I have a dataframe with the data below. ex_dict = {'revenue': [613663, 1693667, 2145183, 2045065, 2036406, 1708862, 1068232, 1196899, 2185852, 2165778, 2144738, 2030337, 1784067], 'abs_percent_diff': [0.22279211315310588, 0.13248909660765254, 0.12044821447874667, 0.09438674840975962, 0.1193588387687364, 0.062100921139322744, 0.05875297161175445, 0.06240362963749895, 0.05085338590212515, 0.034877614941165744, 0.012263947005671703, 0.029227374323993634, 0.023411816504907524], 'ds': [dt.date(2017,1,1), dt.date(2017,1,2), dt.date(2017,1,3), dt.date(2017,1,4), dt.date(2017,1,5), dt.date(2017,1,6), dt.date(2017,1,7), dt.date(2017,1,8), dt.date(2017,1,9), dt.date(2017,1,10), dt.date(2017,1,11), dt.date(2017,1,12), dt.date(2017,1,13)], 'yhat_normal': [501853.9074623253, 1952329.3521464923, 1914575.7673396615, 1868685.8215084015, 1819261.1068672044, 1608945.031482406, 1008953.0123101478, 1126595.36037955, 2302965.598289115, 2244044.9351591542, 2171367.536396199, 2091465.0313570146, 1826836.562382966]} df_vis=pd.DataFrame.from_dict(ex_dict) I want to graph yhat_normal and revenue on the same y-axis and abs_percent_diff on a y-axis with a different scale. df_vis = df_vis.set_index('ds') df_vis[['rev', 'yhat_normal']].plot(figsize=(20, 12)) I can easily graph rev and yhat_normal with the code above, but I am struggling to get abs_percent_diff on a different y-axis scale. I tried converting my columns to numpy arrays and doing this, but it looks terrible. npdate = df_vis.as_matrix(columns= ['ds']) nppredictions = df_vis.as_matrix(columns= ['yhat_normal']) npactuals = df_vis.as_matrix(columns= ['rev']) npmape = df_vis.as_matrix(columns=['abs_percent_diff']) fig, ax1 = plt.subplots() ax2 = ax1.twinx() fig.set_size_inches(20,10) ax1.plot_date(npdate, nppredictions, ls= '-', color= 'b') ax1.plot_date(npdate, npactuals, ls='-', color='g') ax2.plot_date(npdate, npmape, 'r-') ax1.set_xlabel('X data') ax1.set_ylabel('Y1 data', color='g') ax2.set_ylabel('Y2 data', color='b') plt.show() This is what I want. Where the red line is the abs_percent_diff. Obviously, I drew the line by hand so it is not accurate.
I'm not sure if I got the problem correclty, but it seems you simply want to draw one of the dataframe columns at the bottom of the plot area. import pandas as pd import datetime as dt import matplotlib.pyplot as plt ex_dict = {'revenue': [613663, 1693667, 2145183, 2045065, 2036406, 1708862, 1068232, 1196899, 2185852, 2165778, 2144738, 2030337, 1784067], 'abs_percent_diff': [0.22279211315310588, 0.13248909660765254, 0.12044821447874667, 0.09438674840975962, 0.1193588387687364, 0.062100921139322744, 0.05875297161175445, 0.06240362963749895, 0.05085338590212515, 0.034877614941165744, 0.012263947005671703, 0.029227374323993634, 0.023411816504907524], 'ds': [dt.date(2017,1,1), dt.date(2017,1,2), dt.date(2017,1,3), dt.date(2017,1,4), dt.date(2017,1,5), dt.date(2017,1,6), dt.date(2017,1,7), dt.date(2017,1,8), dt.date(2017,1,9), dt.date(2017,1,10), dt.date(2017,1,11), dt.date(2017,1,12), dt.date(2017,1,13)], 'yhat_normal': [501853.9074623253, 1952329.3521464923, 1914575.7673396615, 1868685.8215084015, 1819261.1068672044, 1608945.031482406, 1008953.0123101478, 1126595.36037955, 2302965.598289115, 2244044.9351591542, 2171367.536396199, 2091465.0313570146, 1826836.562382966]} df_vis=pd.DataFrame.from_dict(ex_dict) df_vis = df_vis.set_index('ds') ax = df_vis[['revenue','yhat_normal']].plot(figsize=(13, 8)) ax2 = df_vis['abs_percent_diff'].plot(secondary_y=True, ax=ax) ax2.set_ylim(0,1) plt.show()
Custom legend for Seaborn regplot (Python 3)
I've been trying to follow this How to make custom legend in matplotlib SO question but I think a few things are getting lost in translation. I used a custom color mapping for the different classes of points in my plot and I want to be able to put a table with those color-label pairs. I stored the info in a dictionary D_color_label and then made 2 parallel lists colors and labels. I tried using it in the ax.legend but it didn't seem to work. np.random.seed(0) # Create dataframe DF_0 = pd.DataFrame(np.random.random((100,2)), columns=["x","y"]) # Label to colors D_idx_color = {**dict(zip(range(0,25), ["#91FF61"]*25)), **dict(zip(range(25,50), ["#BA61FF"]*25)), **dict(zip(range(50,75), ["#916F61"]*25)), **dict(zip(range(75,100), ["#BAF1FF"]*25))} D_color_label = {"#91FF61":"label_0", "#BA61FF":"label_1", "#916F61":"label_2", "#BAF1FF":"label_3"} # Add color column DF_0["color"] = pd.Series(list(D_idx_color.values()), index=list(D_idx_color.keys())) # Plot fig, ax = plt.subplots(figsize=(8,8)) sns.regplot(data=DF_0, x="x", y="y", scatter_kws={"c":DF_0["color"]}, ax=ax) # Add custom legend colors = list(set(DF_0["color"])) labels = [D_color_label[x] for x in set(DF_0["color"])] # If I do this, I get the following error: # ax.legend(colors, labels) # UserWarning: Legend does not support '#BA61FF' instances. # A proxy artist may be used instead.
According to http://matplotlib.org/users/legend_guide.html you have to put to legend function artists which will be labeled. To use scatter_plot individually you have to group by your data by color and plot every data of one color individually to set its own label for every artist: import pandas as pd import numpy as np import matplotlib.pylab as plt import seaborn as sns np.random.seed(0) # Create dataframe DF_0 = pd.DataFrame(np.random.random((100, 2)), columns=["x", "y"]) DF_0['color'] = ["#91FF61"]*25 + ["#BA61FF"]*25 + ["#91FF61"]*25 + ["#BA61FF"]*25 #print DF_0 D_color_label = {"#91FF61": "label_0", "#BA61FF": "label_1", "#916F61": "label_2", "#BAF1FF": "label_3"} colors = list(DF_0["color"].uniqe()) labels = [D_color_label[x] for x in DF_0["color"].unique()] ax = sns.regplot(data=DF_0, x="x", y="y", scatter_kws={'c': DF_0['color'], 'zorder':1}) # Make a legend # groupby and plot points of one color for i, grp in DF_0.groupby(['color']): grp.plot(kind='scatter', x='x', y='y', c=i, ax=ax, label=labels[i+1], zorder=0) ax.legend(loc=2) plt.show()