I need to create a plot as close to this picture as possible (given the generated dataframe code below):
And here's the output plot of my code:
What I am having problems with is:
The edge of fill_between is not sharp as in the picture. What I have is some kind of white shadow. How do I change the line between the fillings to match a target picture?
How do I aling legend color lines to the center, but not to the left border which my code does?
Here's my code:
import matplotlib.pyplot as plt
import matplotlib.colors as colors
import matplotlib.cm as cm
import numpy as np
import pandas as pd
ncols = 10
figsize = (20, 5)
fontsize = 14
dti = pd.date_range('2013-01-01', '2020-12-31', freq='2W')
probabilities_in_time = np.random.random((ncols, len(dti)))
probabilities_in_time = probabilities_in_time / \
probabilities_in_time.sum(axis=0)
probabilities_in_time = pd.DataFrame(probabilities_in_time).T
probabilities_in_time.index = dti
cm_subsection = np.linspace(0, 1, ncols)
colors = [cm.coolwarm(x) for x in cm_subsection]
def plot_time_probabilities(probabilities_in_time, figsize):
plt.figure(figsize=figsize)
plt.yticks(np.arange(0, 1.2, 0.2), fontsize=fontsize)
plt.xticks(fontsize=fontsize)
draw_stack_plot(colors, probabilities_in_time)
set_grid()
set_legend()
plt.show()
def draw_stack_plot(colors, probabilities_in_time):
for i, color in enumerate(colors):
if i == 0:
plt.plot(probabilities_in_time[i], color=color)
plt.fill_between(probabilities_in_time.index,
probabilities_in_time[0], color=color)
else:
probabilities_in_time[i] += probabilities_in_time[i-1]
plt.fill_between(probabilities_in_time.index,
probabilities_in_time[i], probabilities_in_time[i-1],
color=color)
plt.plot(probabilities_in_time[i], label=' Probability: {}'.format(
i), color=color)
def set_grid():
ax = plt.gca()
ax.set_axisbelow(False)
ax.xaxis.grid(True, linestyle='-', lw=1)
def set_legend():
leg = plt.legend(loc='lower left', fontsize=14, handlelength=1.3)
for i in leg.legendHandles:
i.set_linewidth(12)
plot_time_probabilities(probabilities_in_time, figsize)
To set the legend in the center, you can set loc='center', or you can put the legend outside. To avoid that the legend handles grow to larger, you can leave out .set_linewidth(12) (this sets a very wide edge width of 12 points).
Shifting the colors by one position can help to show the fill borders more pronounced. To still have a correct legend, the label should then be added to fill_between.
The code below also tries to simplify part of the calls:
import matplotlib.pyplot as plt
import matplotlib.cm as cm
import numpy as np
import pandas as pd
ncols = 10
figsize = (20, 5)
fontsize = 14
dti = pd.date_range('2013-01-01', '2020-12-31', freq='2W')
probabilities_in_time = np.random.random((ncols, len(dti)))
probabilities_in_time = probabilities_in_time / probabilities_in_time.sum(axis=0)
probabilities_in_time = pd.DataFrame(probabilities_in_time).T
probabilities_in_time.index = dti
cm_subsection = np.linspace(0, 1, ncols)
colors = cm.coolwarm(cm_subsection)
def plot_time_probabilities(probabilities_in_time, figsize):
plt.figure(figsize=figsize)
plt.yticks(np.arange(0, 1.2, 0.2), fontsize=fontsize)
plt.xticks(fontsize=fontsize)
draw_stack_plot(colors, probabilities_in_time)
set_grid()
set_legend()
# plt.margins(x=0, y=0)
plt.margins(x=0.02)
plt.tight_layout()
plt.show()
def draw_stack_plot(colors, probabilities_in_time):
current_probabilities = 0
for i, color in enumerate(colors):
plt.fill_between(probabilities_in_time.index,
probabilities_in_time[i] + current_probabilities, current_probabilities,
color=color, label=f' Probability: {i}')
current_probabilities += probabilities_in_time[i]
plt.plot(current_probabilities,
color=colors[0] if i <= 1 else colors[-1] if i >= 8 else colors[i - 1] if i < 5 else colors[i + 1])
def set_grid():
ax = plt.gca()
ax.set_axisbelow(False)
ax.xaxis.grid(True, linestyle='-', lw=1)
def set_legend():
leg = plt.legend(loc='lower left', fontsize=14, handlelength=1.3)
# leg = plt.legend(loc='upper left', bbox_to_anchor=(1.01, 1), fontsize=14, handlelength=1.3)
# for i in leg.legendHandles:
# i.set_linewidth(12)
plot_time_probabilities(probabilities_in_time, figsize)
Related
So let's say I have a vector of numbers.
np.random.randn(5).round(2).tolist()
[2.05, -1.57, 1.07, 1.37, 0.32]
I want a draw a rectangle that shows this elements as numbers in a rectangle.
Something like this:
Is there an easy way to do this in matplotlib?
A bit convoluted but you could take advantage of seaborn.heatmap, creating a white colormap:
import seaborn as sns
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.colors import LinearSegmentedColormap
data = np.random.randn(5).round(2).tolist()
linewidth = 2
ax = sns.heatmap([data], annot=True, cmap=LinearSegmentedColormap.from_list('', ['w', 'w'], N=1),
linewidths=linewidth, linecolor='black', square=True,
cbar=False, xticklabels=False, yticklabels=False)
plt.tight_layout()
plt.show()
In this case, the external lines won't be as thick as the internal ones. If needed, this can be fixed with:
ax.axhline(y=0, color='black', lw=linewidth*2)
ax.axhline(y=1, color='black', lw=linewidth*2)
ax.axvline(x=0, color='black', lw=linewidth*2)
ax.axvline(x=len(data), color='black', lw=linewidth*2)
Edit: avoid these lines and add clip_on=False to sns.heatmap (thanks/credit #JohanC)
Output:
We can add rectangles , and annotate them in a for loop.
from matplotlib import pyplot as plt
import numpy as np
# Our numbers
nums = np.random.randn(5).round(2).tolist()
# rectangle_size
rectangle_size = 2
# We want rectangles look squared, you can change if you want
plt.rcParams["figure.figsize"] = [rectangle_size * len(nums), rectangle_size]
plt.rcParams["figure.autolayout"] = True
fig = plt.figure()
ax = fig.add_subplot(111)
for i in range(len(nums)):
# We are adding rectangles
# You can change colors as you wish
plt.broken_barh([(rectangle_size * i, rectangle_size)], (0, rectangle_size), facecolors='white', edgecolor='black'
,linewidth = 1)
# We are calculating where to annotate numbers
cy = rectangle_size / 2.0
cx = rectangle_size * i + cy
# Annotation You can change color,font, etc ..
ax.annotate(str(nums[i]), (cx, cy), color='black', weight='bold', fontsize=20, ha='center', va='center')
# For squared look
plt.xlim([0, rectangle_size*len(nums)])
plt.ylim([0, rectangle_size])
# We dont want to show ticks
plt.axis('off')
plt.show()
One way using the Rectangle patch is:
import numpy as np
from matplotlib import pyplot as plt
from matplotlib.patches import Rectangle
x = np.random.randn(5).round(2).tolist()
fig, ax = plt.subplots(figsize=(9, 2)) # make figure
dx = 0.15 # edge size of box
buf = dx / 10 # buffer around edges
# set x and y limits
ax.set_xlim([0 - buf, len(x) * dx + buf])
ax.set_ylim([0 - buf, dx + buf])
# set axes as equal and turn off axis lines
ax.set_aspect("equal")
ax.axis("off")
# draw plot
for i in range(len(x)):
# create rectangle with linewidth=4
rect = Rectangle((dx * i, 0), dx, dx, facecolor="none", edgecolor="black", lw=4)
ax.add_patch(rect)
# get text position
x0, y0 = dx * i + dx / 2, dx / 2
# add text
ax.text(
x0, y0, f"{x[i]}", color="black", ha="center", va="center", fontsize=28, fontweight="bold"
)
fig.tight_layout()
fig.show()
which gives:
I'm trying to add the curved text to the Gauge plot using matplotlib in python
import os, sys
import matplotlib
import seaborn as sns
from matplotlib import cm
# from matplotlib import pyplot as plt
import matplotlib.pyplot as plt
from matplotlib.ticker import FuncFormatter
from matplotlib.patches import Circle, Wedge, Rectangle
# below some functions to deal with some basic trigonometry that we'll be using
# to draw the wedges (i.e. the sectors of the gauge) and to point the arrow to the right sector
def degree_range(n):
start = np.linspace(0,180,n+1, endpoint=True)[0:-1]
end = np.linspace(0,180,n+1, endpoint=True)[1::]
mid_points = start + ((end-start)/2.)
return np.c_[start, end], mid_points
def rot_text(ang):
rotation = np.degrees(np.radians(ang) * np.pi / np.pi - np.radians(90))
return rotation
def gauge(labels=['LOW','MEDIUM','HIGH','VERY HIGH','EXTREME'],
colors='jet_r',
arrow=1,
title='',
size = (6,3),
fname=False):
N = len(labels)
if arrow > N:
raise Exception("\n\nThe category ({}) is greated than \
the length\nof the labels ({})".format(arrow, N))
"""
if colors is a string, we assume it's a matplotlib colormap
and we discretize in N discrete colors
"""
if isinstance(colors, str):
cmap = cm.get_cmap(colors, N)
cmap = cmap(np.arange(N))
colors = cmap[::-1,:].tolist()
if isinstance(colors, list):
if len(colors) == N:
colors = colors[::-1]
else:
raise Exception("\n\nnumber of colors {} not equal \
to number of categories{}\n".format(len(colors), N))
"""
begins the plotting
"""
fig, ax = plt.subplots(figsize=size, dpi=100) # figsize=(5,5)
ang_range, mid_points = degree_range(N)
labels = labels[::-1]
"""
plots the sectors and the arcs
"""
patches = []
for ang, c in zip(ang_range, colors):
# sectors
patches.append(Wedge((0.,0.), .4, *ang, facecolor='w', lw=2))
# arcs
patches.append(Wedge((0.,0.), .4, *ang, width=0.10, facecolor=c, lw=2, alpha=0.5))
[ax.add_patch(p) for p in patches]
"""
set the labels (e.g. 'LOW','MEDIUM',...)
"""
for mid, lab in zip(mid_points, labels):
ax.text(0.35 * np.cos(np.radians(mid)),
0.35 * np.sin(np.radians(mid)),
lab,
horizontalalignment='center',
verticalalignment='center',
fontsize=8,
fontweight='bold',
rotation = rot_text(mid))
"""
set the bottom banner and the title
"""
r = Rectangle((-0.4,-0.1),0.8,0.1, facecolor='w', lw=2)
ax.add_patch(r)
ax.text(0,
-0.05,
title,
horizontalalignment='center',
verticalalignment='center',
fontsize=14,
fontweight='bold')
"""
plots the arrow now
"""
pos = mid_points[abs(arrow - N)]
ax.arrow(0,
0,
0.225 * np.cos(np.radians(pos)),
0.225 * np.sin(np.radians(pos)),
width=0.04,
head_width=0.09,
head_length=0.1,
fc='k',
ec='k')
ax.add_patch(Circle((0, 0), radius=0.02, facecolor='k'))
ax.add_patch(Circle((0, 0), radius=0.01, facecolor='w', zorder=11))
"""
removes frame and ticks, and makes axis equal and tight
"""
ax.set_frame_on(False)
ax.axes.set_xticks([])
ax.axes.set_yticks([])
ax.axis('equal')
plt.tight_layout()
if fname:
fig.savefig(fname, dpi=200)
gauge(labels=['LOW','MEDIUM','HIGH','VERY HIGH','EXTREME','CRITICAL'], \
colors=["#5F4690","#1D6996","#38A6A5","#0F8554","#73AF48","#EDAD08"], arrow=3, size=(5,3), title='drought severity index')
Here the text is rotated but not following the arc curve
How to annotate curved text in matplotlib figure?
I calculated the rttMeans and rttStds arrays. However, the value of rttStds makes the lower error less than 0.
rttStds = [3.330311915835426, 3.3189677330174883, 3.3319538853150386, 3.325173772304221, 3.3374145232695813]
How to set lower error to 0 instead of -#?
The python bar plot code is bellow.
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
sns.set(rc={'figure.figsize':(18,16)},style='ticks',font_scale = 1.5,font='serif')
N = 5
ind = ['RSU1', 'RSU2', 'RSU3', 'RSU4', 'RSU5'] # the x locations for the groups
width = 0.4 # the width of the bars: can also be len(x) sequence
fig = plt.figure(figsize=(10,6))
ax = fig.add_subplot(111)
p1 = plt.bar(ind, rttMeans, width, yerr=rttStds, log=False, capsize = 16, color='green', hatch="/", error_kw=dict(elinewidth=3,ecolor='black'))
plt.margins(0.01, 0)
#Optional code - Make plot look nicer
plt.xticks(rotation=0)
i=0.18
for row in rttMeans:
plt.text(i, row, "{0:.1f}".format(row), color='black', ha="center")
i = i + 1
ax.spines['right'].set_visible(False)
ax.spines['top'].set_visible(False)
params = {'axes.titlesize':24,
'axes.labelsize':24,
'xtick.labelsize':28,
'ytick.labelsize':28,
'legend.fontsize': 24,
'axes.spines.right':False,
'axes.spines.top':False}
plt.rcParams.update(params)
plt.tick_params(axis="y", labelsize=28, labelrotation=20, labelcolor="black")
plt.tick_params(axis="x", labelsize=28, labelrotation=20, labelcolor="black")
plt.ylabel('RT Time (millisecond)', fontsize=24)
plt.title('# Participating RSUs', fontsize=24)
# plt.savefig('RSUs.pdf', bbox_inches='tight')
plt.show()
You can pass yerr as a pair [lower_errors, upper_errors] where you can control lower_errors :
lowers = np.minimum(rttStds,rttMeans)
p1 = plt.bar(ind, rttMeans, width, yerr=[lowers,rttStds], log=False, capsize = 16, color='green', hatch="/", error_kw=dict(elinewidth=3,ecolor='black'))
Output:
I need to plot a hist with bot logarithmic y and x-axis, but I'd like also to have hist's bins displayed of same size.
How can I achieve this result with the following code (the x used is very long so I have intentionally avoided to insert it):
import matplotlib as plt
import numpy as np
fig, ax1 = plt.subplots()
hist, bins, _ = ax1.hist(x, log=True, color="red", rwidth=0.5)
plt.xscale("log")
np_x = np.array(x)
print("np_x.mean() = " + str(np_x.mean()))
plt.axvline(np_x.mean() * 1.1, color='lime', linestyle='dashed', linewidth=3,
label='Mean: {:.2f}'.format(np_x.mean()))
handles, labels = ax1.get_legend_handles_labels()
binwidth = math.floor(bins[1] - bins[0])
mylabel = "Binwidth: {}".format(binwidth) + ", Bins: {}".format(len(hist))
red_patch = mpatches.Patch(color='red', label=mylabel)
handles = [red_patch] + handles
labels = [mylabel] + labels
ax1.legend(handles, labels)
plt.xlabel(x_label)
plt.ylabel(y_label)
plt.show()
I'm working with a data-set, so far i have made a histogram with a overlayed normal distribution curve.
I want to mark out the quartiles as in this image (the box plot is for reference).
This is the code i'm working with:
import matplotlib.pyplot as plt
import numpy as np
import scipy.stats as stats
depDelay.sort()
plt.hist(depDelay, bins=100, normed=True)
hmean = np.mean(depDelay)
hstd = np.std(depDelay)
pdf = stats.norm.pdf(depDelay, hmean, hstd)
markers = [np.percentile(depDelay,50)]
plt.plot(DepDelay, pdf,'-o',markevery=markers)
plt.title('Distribution of Departure Delay')
plt.xlabel('Departure Delay (in mins)')
plt.ylabel('Frequency')
plt.savefig('depDelayNormDist.png')
plt.show()
How can i plot the same using matplotlib ?
I've tried to replicate the referenced image somewhat. Not sure what precisely you meant by marking the quartiles, but I've put in labels for Q1 and Q3 at the pdf and percentages in between the quartiles.
import numpy as np
import scipy
import pandas as pd
from scipy.stats import norm
import matplotlib.pyplot as plt
from matplotlib.mlab import normpdf
# dummy data
mu = 0
sigma = 1
n_bins = 50
s = np.random.normal(mu, sigma, 1000)
fig, axes = plt.subplots(nrows=2, ncols=1, sharex=True)
#histogram
n, bins, patches = axes[1].hist(s, n_bins, normed=True, alpha=.1, edgecolor='black' )
pdf = 1/(sigma*np.sqrt(2*np.pi))*np.exp(-(bins-mu)**2/(2*sigma**2))
median, q1, q3 = np.percentile(s, 50), np.percentile(s, 25), np.percentile(s, 75)
print(q1, median, q3)
#probability density function
axes[1].plot(bins, pdf, color='orange', alpha=.6)
#to ensure pdf and bins line up to use fill_between.
bins_1 = bins[(bins >= q1-1.5*(q3-q1)) & (bins <= q1)] # to ensure fill starts from Q1-1.5*IQR
bins_2 = bins[(bins <= q3+1.5*(q3-q1)) & (bins >= q3)]
pdf_1 = pdf[:int(len(pdf)/2)]
pdf_2 = pdf[int(len(pdf)/2):]
pdf_1 = pdf_1[(pdf_1 >= norm(mu,sigma).pdf(q1-1.5*(q3-q1))) & (pdf_1 <= norm(mu,sigma).pdf(q1))]
pdf_2 = pdf_2[(pdf_2 >= norm(mu,sigma).pdf(q3+1.5*(q3-q1))) & (pdf_2 <= norm(mu,sigma).pdf(q3))]
#fill from Q1-1.5*IQR to Q1 and Q3 to Q3+1.5*IQR
axes[1].fill_between(bins_1, pdf_1, 0, alpha=.6, color='orange')
axes[1].fill_between(bins_2, pdf_2, 0, alpha=.6, color='orange')
print(norm(mu, sigma).cdf(median))
print(norm(mu, sigma).pdf(median))
#add text to bottom graph.
axes[1].annotate("{:.1f}%".format(100*norm(mu, sigma).cdf(q1)), xy=((q1-1.5*(q3-q1)+q1)/2, 0), ha='center')
axes[1].annotate("{:.1f}%".format(100*(norm(mu, sigma).cdf(q3)-norm(mu, sigma).cdf(q1))), xy=(median, 0), ha='center')
axes[1].annotate("{:.1f}%".format(100*(norm(mu, sigma).cdf(q3+1.5*(q3-q1)-q3)-norm(mu, sigma).cdf(q3))), xy=((q3+1.5*(q3-q1)+q3)/2, 0), ha='center')
axes[1].annotate('q1', xy=(q1, norm(mu, sigma).pdf(q1)), ha='center')
axes[1].annotate('q3', xy=(q3, norm(mu, sigma).pdf(q3)), ha='center')
axes[1].set_ylabel('probability')
#top boxplot
axes[0].boxplot(s, 0, 'gD', vert=False)
axes[0].axvline(median, color='orange', alpha=.6, linewidth=.5)
axes[0].axis('off')
plt.subplots_adjust(hspace=0)
plt.show()
FYI, I've answered this similar question as well.
I updated the answer in form of a function following similar posts including creating dashed lines on KDE plot having quantiles:
import numpy as np
import scipy
import pandas as pd
from scipy.stats import norm
import matplotlib.pyplot as plt
#from matplotlib.mlab import normpdf #check this: https://github.com/materialsproject/pymatgen/issues/1657
def KDE_hist_plot(df):
for col in df.columns:
n_bins = 50
fig, axes = plt.subplots(nrows=2, ncols=1, sharex=True, figsize=(10,5))
#histogram
n, bins, patches = axes[1].hist(df[col], n_bins, density=True, alpha=.1, edgecolor='black' )
#data = pd.Series(s)
mu = df[col].mean()
sigma = df[col].std()
pdf = 1/(sigma*np.sqrt(2*np.pi))*np.exp(-(bins-mu)**2/(2*sigma**2))
median, q1, q3 = np.percentile(df[col], 50), np.percentile(df[col], 25), np.percentile(df[col], 75)
#probability density function
axes[1].plot(bins, pdf, color='orange', alpha=.6)
#axes[1].figsize=(10,20)
#fill from Q1-1.5*IQR to Q1 and Q3 to Q3+1.5*IQR
iqr = 1.5 * (q3-q1)
x1 = np.linspace(q1 - iqr, q1)
x2 = np.linspace(q3, q3 + iqr)
pdf1 = 1/(sigma*np.sqrt(2*np.pi))*np.exp(-(x1-mu)**2/(2*sigma**2))
pdf2 = 1/(sigma*np.sqrt(2*np.pi))*np.exp(-(x2-mu)**2/(2*sigma**2))
axes[1].fill_between(x1, pdf1, 0, alpha=.6, color='orange')
axes[1].fill_between(x2, pdf2, 0, alpha=.6, color='orange')
#add text to bottom graph.
axes[1].annotate("{:.1f}%".format(100*(norm(mu, sigma).cdf(q1) -norm(mu, sigma).cdf(q1-iqr))), xy=(q1-iqr/2, 0), ha='center')
axes[1].annotate("{:.1f}%".format(100*(norm(mu, sigma).cdf(q3) -norm(mu, sigma).cdf(q1) )), xy=(median , 0), ha='center')
axes[1].annotate("{:.1f}%".format(100*(norm(mu, sigma).cdf(q3+iqr)-norm(mu, sigma).cdf(q3) )), xy=(q3+iqr/2, 0), ha='center')
axes[1].annotate('q1', xy=(q1, norm(mu, sigma).pdf(q1)), ha='center')
axes[1].annotate('q3', xy=(q3, norm(mu, sigma).pdf(q3)), ha='center')
#dashed lines
plt.axvline(df[col].quantile(0),color='b', linestyle='-.')
plt.axvline(df[col].quantile(0.25),color='g', linestyle='--')
plt.axvline(df[col].quantile(0.50),color='g', linestyle='--')
plt.axvline(df[col].quantile(0.75),color='b', linestyle='--')
plt.axvline(df[col].quantile(1),color='r', linestyle='-.')
axes[1].set_ylabel('Probability Density')
#top boxplot
axes[0].boxplot(df[col], 0, 'gD', vert=False)
axes[0].axvline(median, color='orange', alpha=.6, linewidth=.5)
axes[0].axis('off')
Please see the results below for df with 2 columns/attributes and working function in colab notebook:
KDE_hist_plot(df)