Draw a mean indexed bar chart? - python

How to draw the following graph showing the difference against the average using matplotlib, searborn, Plotly or with any other framework?

I have found that some calls this plot Mean indexed bar chart. Using seaborn, it can be using a code like the following:
import seaborn as sns
import matplotlib.pyplot as plt
sns.set(style="white", context="talk")
f, ax1 = plt.subplots(figsize=(7, 5), sharex=True)
mean = df.mean()
y2 = mean - df["your column"]
sns.barplot(x=dfCopy.index, y=y2, palette="deep", ax=ax1)
ax1.axhline(0, color="k", clip_on=False)
ax1.set_ylabel("Diverging")
# Finalize the plot
sns.despine(bottom=True)
plt.setp(f.axes, yticks=[])
plt.tight_layout(h_pad=2)

Related

Combine 2 kde-functions in one plot in seaborn

I have the following code for plotting the histogram and the kde-functions (Kernel density estimation) of a training and validation dataset:
#Plot histograms
import matplotlib.pyplot as plt
import matplotlib
import seaborn as sns
displot_dataTrain=sns.displot(data_train, bins='auto', kde=True)
displot_dataTrain._legend.remove()
plt.ylabel('Count')
plt.xlabel('Training Data')
plt.title("Histogram Training Data")
plt.show()
displot_dataValid =sns.displot(data_valid, bins='auto', kde=True)
displot_dataValid._legend.remove()
plt.ylabel('Count')
plt.xlabel('Validation Data')
plt.title("Histogram Validation Data")
plt.show()
# Try to plot the kde-functions together --> yields an AttributeError
X1 = np.linspace(data_train.min(), data_train.max(), 1000)
X2 = np.linspace(data_valid.min(), data_valid.max(), 1000)
fig, ax = plt.subplots(1,2, figsize=(12,6))
ax[0].plot(X1, displot_dataTest.kde.pdf(X1), label='train')
ax[1].plot(X2, displot_dataValid.kde.pdf(X1), label='valid')
The plotting of the histograms and kde-functions inside one plot works without problems. Now I would like to have the 2 kde-functions inside one plot but when using the posted code, I get the following error AttributeError: 'FacetGrid' object has no attribute 'kde'
Do you have any idea, how I can combined the 2 kde-functions inside one plot (without the histogram)?
sns.displot() returns a FacetGrid. That doesn't work as input for ax.plot(). Also, displot_dataTest.kde.pdf is never valid. However, you can write sns.kdeplot(data=data_train, ax=ax[0]) to create a kdeplot inside the first subplot. See the docs; note the optional parameters cut= and clip= that can be used to adjust the limits.
If you only want one subplot, you can use fig, ax = plt.subplots(1, 1, figsize=(12,6)) and use ax=ax instead of ax=ax[0] as in that case ax is just a single subplot, not an array of subplots.
The following code has been tested using the latest seaborn version:
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
fig, ax = plt.subplots(figsize=(12, 6))
sns.kdeplot(data=np.random.normal(0.1, 1, 100).cumsum(),
color='crimson', label='train', fill=True, ax=ax)
sns.kdeplot(data=np.random.normal(0.1, 1, 100).cumsum(),
color='limegreen', label='valid', fill=True, ax=ax)
ax.legend()
plt.tight_layout()
plt.show()

How to set ticklabel rotation and add bar annotations

I would like to draw the following bar plot with annotation and I want to keep the x-label 45 degree so that it is easily readable. I am not sure why my code is not working. I have added the sample data and desired bar plots as a attachment. I appreciate your suggestions! Thanks!
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
#sns.set(rc={"figure.dpi":300, 'savefig.dpi':300})
sns.set_context('notebook')
sns.set_style("ticks")
#sns.set_style('white')
sns.set_context("paper", font_scale = 2)
colors = ['b', 'g', 'r', 'c', 'm']
#sns.set(style="whitegrid")
#sns.set_palette(sns.color_palette(colors))
#fig, (ax1,ax2) = plt.subplots(1, 2, figsize=(16, 8))
#fig.subplots_adjust(wspace=0.3)
plots1 = sns.barplot(x="Model", y="G-mean", data=df_Aussel2014_5features, ax=ax1,palette='Spectral')
# Iterrating over the bars one-by-one
for bar in plots1.patches:
# Using Matplotlib's annotate function and
# passing the coordinates where the annotation shall be done
plots1.annotate(format(bar.get_height(), '.2f'),
(bar.get_x() + bar.get_width() / 2,
bar.get_height()), ha='center', va='center',
size=10, xytext=(0, 5),
textcoords='offset points')
plt.show()
# Save figure
#plt.savefig('Aussel2014_5features.png', dpi=300, transparent=False, bbox_inches='tight')
I got the following image.
You are using the object oriented interface (e.g. axes) so don't mix plt. and axes. methods
seaborn.barplot is an axes-level plot, which returns a matplotlib axes, p1 in this case.
Use the matplotlib.axes.Axes.tick_params to set the rotation of the axis, or a number of other parameters, as shown in the documentation.
Use matplotlib.pyplot.bar_label to add bar annotations.
See this answer with additional details and examples for using the method.
Adjust the nrow, ncols and figsize as needed, and set sharex=False and sharey=False.
Tested in python 3.8.12, pandas 1.3.4, matplotlib 3.4.3, seaborn 0.11.2
import seaborn as sns
import matplotlib.pyplot as plot
import pandas as pd
# data
data = {'Model': ['QDA', 'LDA', 'DT', 'Bagging', 'NB'],
'G-mean': [0.703780, 0.527855, 0.330928, 0.294414, 0.278713]}
df = pd.DataFrame(data)
# create figure and axes
fig, ax1 = plt.subplots(nrows=1, ncols=1, figsize=(8, 8), sharex=False, sharey=False)
# plot
p1 = sns.barplot(x="Model", y="G-mean", data=df, palette='Spectral', ax=ax1)
p1.set(title='Performance Comparison based on G-mean')
# add annotation
p1.bar_label(p1.containers[0], fmt='%0.2f')
# add a space on y for the annotations
p1.margins(y=0.1)
# rotate the axis ticklabels
p1.tick_params(axis='x', rotation=45)
import matplotlib.pyplot as plt. plt.xticks(rotation=‌​45)
Example :
import matplotlib.pyplot as plt
plt.xticks(rotation=‌​45)

Same scale for twinx() combo plot with seaborn

Let's use the classic example of weekly precipitation:
import pandas as pd
import seaborn as sns
from matplotlib import pyplot as plt
from random import randint
data = {
'Week': [i for i in range(1,9)],
'Weekly Precipitation': [randint(1,10) for i in range(1,9)]
}
df = pd.DataFrame(data)
Let's also add a column with the cumulative precipitation:
df['Cumulative'] = df['Weekly Precipitation'].expanding(min_periods=2).sum()
Now, let's say I want a single chart with a barplot for the weekly precipitation, and a lineplot with the cumulative precipitation. So I do this:
fig, ax1 = plt.subplots(figsize=(10,5))
sns.barplot(x='Week', y='Weekly Precipitation', data=df, ax=ax1)
ax2 = ax1.twinx()
sns.lineplot(x='Week', y='Cumulative', data=df, ax=ax2)
Which yields this plot:
And you can see the problem: while both series are commensurate, both x axes use a different scale, which distorts the visualization, as the line should always be higher than the bars.
So, instead of twin axes, I'm trying to put both plot on the same axis:
fig, ax1 = plt.subplots(figsize=(10,5))
ax1.set_facecolor('white')
sns.barplot(x='Week', y='Weekly Precipitation', data=df, ax=ax1)
sns.lineplot(x='Week', y='Cumulative', data=df, ax=ax1)
ax1.set_ylabel('Precipitation')
Now, of course, the scale is right (although I have to do with a single y label), but... the second plot is shifted to the right by one tick!
How does that even make sense?!

Seaborn align plots in subplots

I'm using Seaborn to plot 3 ghaphs. I would like to know how could I align vertically different plots.
This is my plot so far:
And this is my code:
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib.ticker as plticker
import seaborn as sns
import numpy as np
flatui = ["#636EFA", "#EF553B", "#00CC96", "#AB63FA"]
fig, ax = plt.subplots(figsize=(17, 7))
plot=sns.lineplot(ax=ax,x="number of weeks", y="avg streams", hue="year", data=df, palette=flatui)
ax.yaxis.set_major_formatter(ticker.FuncFormatter(lambda x, pos: '{:,.2f}'.format(x/1000) + 'K'))
plot.set(title='Streams trend')
plot.xaxis.set_major_locator(ticker.MultipleLocator(2))
fig, ax =plt.subplots(1,2, figsize=(17,7))
plot = sns.barplot(x="Artist", y="Releases", data = result.head(10), ax=ax[0])
plot.set_xticklabels(
plot.get_xticklabels(),
rotation=90,
horizontalalignment='center',
fontweight='light',
fontsize='x-large'
)
plot=sns.barplot(x="Artist", y="Streams", data = result.head(10), ax=ax[1])
plot.set_xticklabels(
plot.get_xticklabels(),
rotation=90,
horizontalalignment='center',
fontweight='light',
fontsize='x-large'
)
Basically I create a figure where I plot the trend graph and then a figure with 2 subplots where I plot my 2 bar plots.
What I would like to do is to align the trend plot and the 2 barplots. As you might notice on the left, the trend plot and the first barplot are not aligned, I would like to make the two figures start from the same point (like at the ending of the trend plot and the second barplot, where the 2 graphs are aligned).
How could I do that?
Here is a solution using GridSpec
fig = plt.figure()
gs0 = matplotlib.gridspec.GridSpec(2,2, figure=fig)
ax1 = fig.add_subplot(gs0[0,:])
ax2 = fig.add_subplot(gs0[1,0])
ax3 = fig.add_subplot(gs0[1,1])
sns.lineplot(ax=ax1, ...)
sns.barplot(ax=ax2, ...)
sns.barplot(ax=ax3, ...)
If you have the newest version of matplotlib, you can also use the new semantic figure composition engine
axd = plt.figure(constrained_layout=True).subplot_mosaic(
"""
AA
BC
"""
)
sns.lineplot(ax=axd['A'], ...)
sns.barplot(ax=axd['B'], ...)
sns.barplot(ax=axd['C'], ...)

Plotting histogram in Python with frequency percentage

I have a list of ratings for which I am plotting a histogram. On the left (y-axis) it shows the count of the frequency, is there a way for it to show the % based on traffic.
fig = plt.figure()
ax = fig.add_subplot(1, 1, 1)
ax.hist(item['ratings'], bins = 5)
ax.legend()
ax.set_title("Ratings Frequency")
ax.set_xlabel("Ratings")
ax.set_ylabel("frequency")
ax.axhline(y=0, linestyle='--', color='k')
You can use countplot try using the seaborn library it will make it very easy to do data visualization
import seaborn as sns
sns.countplot()

Categories

Resources