Hist wrong binwidth with logarithmix x and y axis - python

I need to plot a hist with bot logarithmic y and x-axis, but I'd like also to have hist's bins displayed of same size.
How can I achieve this result with the following code (the x used is very long so I have intentionally avoided to insert it):
import matplotlib as plt
import numpy as np
fig, ax1 = plt.subplots()
hist, bins, _ = ax1.hist(x, log=True, color="red", rwidth=0.5)
plt.xscale("log")
np_x = np.array(x)
print("np_x.mean() = " + str(np_x.mean()))
plt.axvline(np_x.mean() * 1.1, color='lime', linestyle='dashed', linewidth=3,
label='Mean: {:.2f}'.format(np_x.mean()))
handles, labels = ax1.get_legend_handles_labels()
binwidth = math.floor(bins[1] - bins[0])
mylabel = "Binwidth: {}".format(binwidth) + ", Bins: {}".format(len(hist))
red_patch = mpatches.Patch(color='red', label=mylabel)
handles = [red_patch] + handles
labels = [mylabel] + labels
ax1.legend(handles, labels)
plt.xlabel(x_label)
plt.ylabel(y_label)
plt.show()

Related

Gradient 2D plot using contourf

I did a test code brigging something I saw on stack on different topic, and try to assemble it to make what I need : a filled curve with gradient.
After validate this test code I will make a subplot (4 plots for 4 weeks) with the same min/max for all plot (it's a power consumption).
My code :
from matplotlib import pyplot as plt
import numpy as np
# random x
x = range(100)
# smooth random y
y = 0
result = []
for _ in x:
result.append(y)
y += np.random.normal(loc=0, scale=1)#, size=len(x))
y = result
y = list(map(abs, y))
# creation of z for contour
z1 = min(y)
z3 = max(y)/(len(x)+1)
z2 = max(y)-z3
z = [[z] * len(x) for z in np.arange(z1,z2,z3)]
num_bars = len(x) # more bars = smoother gradient
# plt.contourf(x, y, z, num_bars, cmap='greys')
plt.contourf(x, y, z, num_bars, cmap='cool', levels=101)
background_color = 'w'
plt.fill_between(
x,
y,
y2=max(y),
color=background_color
)
But everytime I make the code run, the result display a different gradient scale, that is not smooth neither even straight right.
AND sometime the code is in error : TypeError: Length of y (100) must match number of rows in z (101)
I'm on it since too many time, turning around, and can't figure where I'm wrong...
I finally find something particularly cool, how to :
have both filled gradient curves in a different color (thanks to JohanC in this topic)
use x axis with datetime (thanks to Ffisegydd in this topic)
Here the code :
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import matplotlib.dates as mdates
np.random.seed(2022)
st_date = '2022-11-01 00:00:00'
st_date = pd.to_datetime(st_date)
en_date = st_date + pd.DateOffset(days=7)
x = pd.date_range(start=st_date,end=en_date,freq='30min')
x = mdates.date2num(x)
y = np.random.normal(0.01, 1, len(x)).cumsum()
fig, ax = plt.subplots(figsize=(18, 5))
ax.plot(x, y, color='grey')
########################
# positives fill
#######################
grad1 = ax.imshow(
np.linspace(0, 1, 256).reshape(-1, 1),
cmap='Blues',
vmin=-0.5,
aspect='auto',
extent=[x.min(), x.max(), 0, y.max()],
# extent=[x[0], x[1], 0, y.max()],
origin='lower'
)
poly_pos = ax.fill_between(x, y.min(), y, alpha=0.1)
grad1.set_clip_path(
poly_pos.get_paths()[0],
transform=ax.transData
)
poly_pos.remove()
########################
# negatives fill
#######################
grad2 = ax.imshow(
np.linspace(0, 1, 256).reshape(-1, 1),
cmap='Reds',
vmin=-0.5,
aspect='auto',
extent=[x.min(), x.max(), y.min(), 0],
origin='upper'
)
poly_neg = ax.fill_between(x, y, y.max(), alpha=0.1)
grad2.set_clip_path(
poly_neg.get_paths()[0],
transform=ax.transData
)
poly_neg.remove()
########################
# decorations and formatting plot
########################
ax.xaxis_date()
date_format = mdates.DateFormatter('%d-%b %H:%M')
ax.xaxis.set_major_formatter(date_format)
fig.autofmt_xdate()
ax.grid(True)

How to plot just one label per ax.plot, when the argument of ax.plot is a list?

Having the following line in my plot code
ax.plot(x, pdf_individual, '--k', label = "single Gaussians")
, with pdf_individual being a list of lists, results in this picture:
Is there a way to just have "single Gaussians" once in the labels, instead of 6 times, which is the amount of single Gaussians for the Gaussian Mixture Model?
This is the whole post with the suggested solution
import matplotlib as mpl
import matplotlib.ticker as mtick
from matplotlib.lines import Line2D
mpl.rcParams['figure.dpi'] = 600
test_input = input_list # THIS IS A 1D LIST with a few hundred items
X = np.asarray(test_input).reshape(-1,1)
N = np.arange(1, 11)
models = [None for i in range(len(N))]
for i in range(len(N)):
models[i] = GaussianMixture(N[i]).fit(X)
# compute the AIC and the BIC
AIC = [m.aic(X) for m in models]
BIC = [m.bic(X) for m in models]
fig = plt.figure(figsize=(12, 4))
fig.subplots_adjust(left=0.1, right=0.9,
bottom=0.21, top=0.9, wspace=0.3)
ax = fig.add_subplot(131)
M_best = models[np.argmin(AIC)]
comp_count = str(M_best)
x = np.linspace(0, 0.1, 100)
logprob = M_best.score_samples(x.reshape(-1, 1))
responsibilities = M_best.predict_proba(x.reshape(-1, 1))
pdf = np.exp(logprob)
pdf_individual = responsibilities * pdf[:, np.newaxis]
left, width = .245, .5
bottom, height = .4, .5
right = left + width
top = bottom + height
plt.setp( ax.xaxis.get_majorticklabels(), rotation=-45, ha="left" )
ax.yaxis.set_major_formatter(mtick.PercentFormatter())
ax.hist(X, 30, density=True, histtype='stepfilled', alpha=0.4, label="Data")
ax.plot(x, pdf, '-k', color = "red", label='GMM')
for i, pdf_individual in enumerate(pdf_individual):
ax.plot(x, pdf_individual, '--k', label = "single Gaussians" if i == 0 else "")
#for pdf in pdf_individual[1:]: ax.plot(x, pdf, '--k')
ax.text(right, top, "Anzahl Komponenten: " + comp_count[-2],
horizontalalignment='center',
verticalalignment='bottom',
transform=ax.transAxes)
ax.set_xlabel('$x$')
ax.set_ylabel('$p(x)$')
plt.legend()
plt.show()
It results in this error:
ValueError: x and y must have same first dimension, but have shapes (100,) and (6,)
EDIT:
Putting
pdf_individual = np.transpose(pdf_individual)
makes the code above work

normal distribution curve doesn't fit well over histogram in subplots using matplotlib

I am using "plt.subplots(2, 2, sharex=True, sharey=True)" to draw a 2*2 subplots. Each subplot has two Y axis and contains normal distribution curve over a histogram. Noting I particularly set "sharex=True, sharey=True" here in order to make all subplots share the same X axis and Y axis.
After running my code, everything is fine except the second, three, and fourth subplots where the normal distribution curve doesn't fit the histogram very well (please see the figure here)
I did googling but failed to get this issue solved. However, if I set "sharex=True, sharey=False" in my code, then the figure looks correct, but all subplots use their own Y axix which isn't what I want. Please see the figure here
Hope this issue can be fixed by experts in StackOverflow. Many thanks in advance!
Below is my code:
import matplotlib.pyplot as plt
from scipy.stats import norm
def align_yaxis(ax1, v1, ax2, v2):
#adjust ax2 ylimit so that v2 in ax2 is aligned to v1 in ax1
_, y1 = ax1.transData.transform((0, v1))
_, y2 = ax2.transData.transform((0, v2))
inv = ax2.transData.inverted()
_, dy = inv.transform((0, 0)) - inv.transform((0, y1-y2))
miny, maxy = ax2.get_ylim()
ax2.set_ylim(miny+dy, maxy+dy)
def drawSingle(myax, mydf , title, offset):
num_bins = 200
xs = mydf["gap"]
x = np.linspace(-1,1,1000)
mu =np.mean(x)
sigma =np.std(xs)
n, bins, patche = myax.hist(xs, num_bins, alpha=0.8, facecolor='blue', density=False)
myax.set_ylabel('frequency',color="black",fontsize=12, weight = "bold")
myax.set_xlabel('X', fontsize=12, weight = "bold",horizontalalignment='center')
ax_twin = myax.twinx()
y_normcurve = norm.pdf(bins, mu, sigma)
ax_twin.plot(bins, y_normcurve, 'r--')
align_yaxis(myax,0,ax_twin,0)
peakpoint = norm.pdf(mu,loc=mu,scale=sigma)
plt.vlines(mu, 0, peakpoint, 'y', '--', label='example')
ax_twin.set_ylabel("probablility dense",color="black",fontsize=12, weight = "bold")
def drawSubplots(mydf1,mydf2,mydf3,mydf4, pos1,pos2,pos3,pos4, title, filename):
plt.rcParams['figure.figsize'] = (18,15 )
my_x_ticks = np.arange(-0.8, 0.8,0.1)
rows, cols = 2, 2
fig, ax = plt.subplots(2, 2, sharex=True, sharey=True)
drawSingle(ax[0][0], mydf1, "Subplot1", pos1)
drawSingle(ax[0][1], mydf2, "Subplot2", pos2)
drawSingle(ax[1][0], mydf3, "Subplot3", pos3)
drawSingle(ax[1][1], mydf4, "Subplot4", pos4)
plt.text(-1, -1, title, horizontalalignment='center', fontsize=18)
plt.show()
drawSubplots(df1, df2,df3,df4,3.2,3.1,2.7,2.85,"test9", "test9")
Here is an attempt to:
have the left y-axes being "frequency" (which is very uninformative in the case of the current bin widths) and shared among the 4 subplots
have the right y-axes be a "probability density"; note how the top of all gaussians is around y=0.02 (the twin axes can only be set at the end because the shared y axes can be updated via later subplots)
have the histogram and the normal curve aligned
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from scipy.stats import norm
def drawSingle(myax, mydf, title):
num_bins = 200
xs = mydf["gap"]
x = np.linspace(-1, 1, 1000)
mu = np.mean(x)
sigma = np.std(xs)
n, bins, patches = myax.hist(xs, num_bins, alpha=0.8, facecolor='blue', density=False)
myax.set_ylabel('frequency', color="black", fontsize=12, weight="bold")
myax.set_xlabel('X', fontsize=12, weight="bold", horizontalalignment='center')
normalization_factor = len(xs) * (bins[1] - bins[0])
y_normcurve = norm.pdf(x, mu, sigma) * normalization_factor
myax.plot(x, y_normcurve, 'r--')
myax.vlines(mu, 0, y_normcurve.max(), 'y', '--', color='lime', label='example')
return normalization_factor
def drawSubplots(mydf1, mydf2, mydf3, mydf4, title):
plt.rcParams['figure.figsize'] = (18, 15)
fig, ax = plt.subplots(nrows=2, ncols=2, sharex=True, sharey=True)
dfs = [mydf1, mydf2, mydf3, mydf4]
norm_factors = [drawSingle(ax_i, df, title)
for ax_i, df, title in zip(ax.ravel(), dfs, ["Subplot1", "Subplot2", "Subplot3", "Subplot4"])]
for ax_i, norm_factor in zip(ax.ravel(), norm_factors):
ax_twin = ax_i.twinx()
ymax = ax_i.get_ylim()[1]
ax_twin.set_ylim(0, ymax / norm_factor)
plt.suptitle(title, fontsize=18)
plt.tight_layout()
plt.show()
df1, df2, df3, df4 = [pd.DataFrame({"gap": np.random.normal(0, 0.2, n)}) for n in [6000, 4000, 1800, 1200]]
drawSubplots(df1, df2, df3, df4, "Title")
Many thanks JohanC, you are amazing.
Based on your code, I just added a few lines of code within drawSubplots function in order to make 95% of the Gaussian curve area shaded between the lower bound and upper bound for each subplot. The following is my try. It seems that ax_twin.fill_between doesn't work normally here. As you could see from the figure that the shaded area is out of the Gaussian curve enter image description here. What I want is only to shade the area under the Gaussian curve between the lower bound and upper bound. If you don't mind, would you please check it out my mistake? Thank you very much!
import matplotlib.pyplot as plt
import math
from scipy.stats import norm
def align_yaxis(ax1, v1, ax2, v2):
#adjust ax2 ylimit so that v2 in ax2 is aligned to v1 in ax1
_, y1 = ax1.transData.transform((0, v1))
_, y2 = ax2.transData.transform((0, v2))
inv = ax2.transData.inverted()
_, dy = inv.transform((0, 0)) - inv.transform((0, y1-y2))
miny, maxy = ax2.get_ylim()
ax2.set_ylim(miny+dy, maxy+dy)
def drawSingle(myax, mydf , title):
num_bins = 200
xs = mydf["gap"]
x = np.linspace(-1,1,1000)
mu =np.mean(xs)
sigma =np.std(xs)
n, bins, patches = myax.hist(xs, num_bins, alpha=0.8, facecolor='blue', density=False)
myax.set_ylabel('Frequency', color="black", fontsize=12, weight="bold")
myax.set_xlabel(title, fontsize=12, weight="bold", horizontalalignment='center')
normalization_factor = len(xs) * (bins[1] - bins[0])
y_normcurve = norm.pdf(x, mu, sigma) * normalization_factor
myax.plot(x, y_normcurve, 'r--')
myax.vlines(mu, 0, y_normcurve.max(), 'y', '--', color='lime', label='example')
plt.xlim(-0.8,0.8)
my_x_ticks = np.arange(-0.8, 0.8,0.1)
plt.xticks(my_x_ticks)
return normalization_factor, mu, sigma
def drawSubplots(mydf1,mydf2,mydf3,mydf4, title):
plt.rcParams['figure.figsize'] = (18,15 )
norm_factors = []
mus = []
sigmas = []
my_x_ticks = np.arange(-0.8, 0.8,0.1)
rows, cols = 2, 2
fig, ax = plt.subplots(nrows=rows, ncols=cols, sharex=True, sharey=True)
dfs = [mydf1, mydf2, mydf3, mydf4]
#norm_factors = [drawSingle(ax_i, df, title)
#for ax_i, df, title in zip(ax.ravel(), dfs, ["Subplot1", "Subplot2", "Subplot3", "Subplot4"])]
for ax_i, df, title in zip(ax.ravel(), dfs, ["Subplot1", "Subplot2", "Subplot3", "Subplot4"]):
norm_factor, mu, sigma = drawSingle(ax_i, df, title)
norm_factors.append(norm_factor)
mus.append(mu)
sigmas.append(sigma)
for ax_i, norm_factor, mu, sigma in zip(ax.ravel(), norm_factors, mus, sigmas ):
ax_twin = ax_i.twinx()
xmax = ax_i.get_xlim()[1]
ax_twin.set_ylim(0, xmax / norm_factor)
ax_twin.set_ylabel("probablility dense",color="black",fontsize=12, weight = "bold")
CI_95_lower = mu - (1.96*sigma)
CI_95_upper = mu + (1.96*sigma)
px_shaded = np.arange(CI_95_lower,CI_95_upper,0.1)
ax_twin.fill_between(px_shaded,norm.pdf(px_shaded,loc=mu,scale=sigma) * norm_factor,alpha=0.75, color='pink')
area_shaded_95_CI = norm.cdf(x=CI_95_upper, loc=mu, scale=sigma)-norm.cdf(x=CI_95_lower, loc=mu, scale=sigma)
ax_twin.text(-0.06,0.01,str(round(area_shaded_95_CI*100,1))+"%", fontsize=20)
ax_twin.annotate(s=f'lower bound= {CI_95_lower:.3f}',xy=(CI_95_lower,norm.pdf(CI_95_lower,loc=mu,scale=sigma)),xytext=(-0.75,0.01),weight='bold',color='blue',\
arrowprops=dict(arrowstyle='-|>',connectionstyle='arc3',color='green'),\
fontsize=12
)
ax_twin.annotate(s=f'upper bound= {CI_95_upper:.3f}',xy=(CI_95_upper,norm.pdf(CI_95_upper,loc=mu,scale=sigma)),xytext=(0.28,0.01),weight='bold',color='blue',\
arrowprops=dict(arrowstyle='-|>',connectionstyle='arc3',color='green'),\
fontsize=12
)
ax_twin.text(0.05, 0.03, r"$\mu=" + f'{mu:.6f}' + ", \sigma=" + f'{sigma:.6f}' + "$" + ", confidence interval=95%" ,
horizontalalignment='center', fontsize=15)
plt.suptitle(title, fontsize=18)
plt.tight_layout()
plt.show()
df1, df2, df3, df4 = [pd.DataFrame({"gap": np.random.normal(0, 0.2, n)}) for n in [6000, 4000, 1800, 1200]]
drawSubplots(df1, df2, df3, df4, "Title")

Matplotlib fill_between edge

I need to create a plot as close to this picture as possible (given the generated dataframe code below):
And here's the output plot of my code:
What I am having problems with is:
The edge of fill_between is not sharp as in the picture. What I have is some kind of white shadow. How do I change the line between the fillings to match a target picture?
How do I aling legend color lines to the center, but not to the left border which my code does?
Here's my code:
import matplotlib.pyplot as plt
import matplotlib.colors as colors
import matplotlib.cm as cm
import numpy as np
import pandas as pd
ncols = 10
figsize = (20, 5)
fontsize = 14
dti = pd.date_range('2013-01-01', '2020-12-31', freq='2W')
probabilities_in_time = np.random.random((ncols, len(dti)))
probabilities_in_time = probabilities_in_time / \
probabilities_in_time.sum(axis=0)
probabilities_in_time = pd.DataFrame(probabilities_in_time).T
probabilities_in_time.index = dti
cm_subsection = np.linspace(0, 1, ncols)
colors = [cm.coolwarm(x) for x in cm_subsection]
def plot_time_probabilities(probabilities_in_time, figsize):
plt.figure(figsize=figsize)
plt.yticks(np.arange(0, 1.2, 0.2), fontsize=fontsize)
plt.xticks(fontsize=fontsize)
draw_stack_plot(colors, probabilities_in_time)
set_grid()
set_legend()
plt.show()
def draw_stack_plot(colors, probabilities_in_time):
for i, color in enumerate(colors):
if i == 0:
plt.plot(probabilities_in_time[i], color=color)
plt.fill_between(probabilities_in_time.index,
probabilities_in_time[0], color=color)
else:
probabilities_in_time[i] += probabilities_in_time[i-1]
plt.fill_between(probabilities_in_time.index,
probabilities_in_time[i], probabilities_in_time[i-1],
color=color)
plt.plot(probabilities_in_time[i], label=' Probability: {}'.format(
i), color=color)
def set_grid():
ax = plt.gca()
ax.set_axisbelow(False)
ax.xaxis.grid(True, linestyle='-', lw=1)
def set_legend():
leg = plt.legend(loc='lower left', fontsize=14, handlelength=1.3)
for i in leg.legendHandles:
i.set_linewidth(12)
plot_time_probabilities(probabilities_in_time, figsize)
To set the legend in the center, you can set loc='center', or you can put the legend outside. To avoid that the legend handles grow to larger, you can leave out .set_linewidth(12) (this sets a very wide edge width of 12 points).
Shifting the colors by one position can help to show the fill borders more pronounced. To still have a correct legend, the label should then be added to fill_between.
The code below also tries to simplify part of the calls:
import matplotlib.pyplot as plt
import matplotlib.cm as cm
import numpy as np
import pandas as pd
ncols = 10
figsize = (20, 5)
fontsize = 14
dti = pd.date_range('2013-01-01', '2020-12-31', freq='2W')
probabilities_in_time = np.random.random((ncols, len(dti)))
probabilities_in_time = probabilities_in_time / probabilities_in_time.sum(axis=0)
probabilities_in_time = pd.DataFrame(probabilities_in_time).T
probabilities_in_time.index = dti
cm_subsection = np.linspace(0, 1, ncols)
colors = cm.coolwarm(cm_subsection)
def plot_time_probabilities(probabilities_in_time, figsize):
plt.figure(figsize=figsize)
plt.yticks(np.arange(0, 1.2, 0.2), fontsize=fontsize)
plt.xticks(fontsize=fontsize)
draw_stack_plot(colors, probabilities_in_time)
set_grid()
set_legend()
# plt.margins(x=0, y=0)
plt.margins(x=0.02)
plt.tight_layout()
plt.show()
def draw_stack_plot(colors, probabilities_in_time):
current_probabilities = 0
for i, color in enumerate(colors):
plt.fill_between(probabilities_in_time.index,
probabilities_in_time[i] + current_probabilities, current_probabilities,
color=color, label=f' Probability: {i}')
current_probabilities += probabilities_in_time[i]
plt.plot(current_probabilities,
color=colors[0] if i <= 1 else colors[-1] if i >= 8 else colors[i - 1] if i < 5 else colors[i + 1])
def set_grid():
ax = plt.gca()
ax.set_axisbelow(False)
ax.xaxis.grid(True, linestyle='-', lw=1)
def set_legend():
leg = plt.legend(loc='lower left', fontsize=14, handlelength=1.3)
# leg = plt.legend(loc='upper left', bbox_to_anchor=(1.01, 1), fontsize=14, handlelength=1.3)
# for i in leg.legendHandles:
# i.set_linewidth(12)
plot_time_probabilities(probabilities_in_time, figsize)

How to limit lower error of bar plot to 0?

I calculated the rttMeans and rttStds arrays. However, the value of rttStds makes the lower error less than 0.
rttStds = [3.330311915835426, 3.3189677330174883, 3.3319538853150386, 3.325173772304221, 3.3374145232695813]
How to set lower error to 0 instead of -#?
The python bar plot code is bellow.
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
sns.set(rc={'figure.figsize':(18,16)},style='ticks',font_scale = 1.5,font='serif')
N = 5
ind = ['RSU1', 'RSU2', 'RSU3', 'RSU4', 'RSU5'] # the x locations for the groups
width = 0.4 # the width of the bars: can also be len(x) sequence
fig = plt.figure(figsize=(10,6))
ax = fig.add_subplot(111)
p1 = plt.bar(ind, rttMeans, width, yerr=rttStds, log=False, capsize = 16, color='green', hatch="/", error_kw=dict(elinewidth=3,ecolor='black'))
plt.margins(0.01, 0)
#Optional code - Make plot look nicer
plt.xticks(rotation=0)
i=0.18
for row in rttMeans:
plt.text(i, row, "{0:.1f}".format(row), color='black', ha="center")
i = i + 1
ax.spines['right'].set_visible(False)
ax.spines['top'].set_visible(False)
params = {'axes.titlesize':24,
'axes.labelsize':24,
'xtick.labelsize':28,
'ytick.labelsize':28,
'legend.fontsize': 24,
'axes.spines.right':False,
'axes.spines.top':False}
plt.rcParams.update(params)
plt.tick_params(axis="y", labelsize=28, labelrotation=20, labelcolor="black")
plt.tick_params(axis="x", labelsize=28, labelrotation=20, labelcolor="black")
plt.ylabel('RT Time (millisecond)', fontsize=24)
plt.title('# Participating RSUs', fontsize=24)
# plt.savefig('RSUs.pdf', bbox_inches='tight')
plt.show()
You can pass yerr as a pair [lower_errors, upper_errors] where you can control lower_errors :
lowers = np.minimum(rttStds,rttMeans)
p1 = plt.bar(ind, rttMeans, width, yerr=[lowers,rttStds], log=False, capsize = 16, color='green', hatch="/", error_kw=dict(elinewidth=3,ecolor='black'))
Output:

Categories

Resources