This code:
N = 1000
df = pd.DataFrame({
'distribution_1': np.random.randn(N),
'distribution_2': np.random.randn(N)
})
df['distribution_2'] = df['distribution_2'] / 2
df = df.melt(value_vars=['distribution_1', 'distribution_2'], value_name='value', var_name='distribution')
g = sns.ecdfplot(data=df, x='value', hue='distribution')
g.set(ylim=(-.1, 1.1))
yields a figure like this:
The CDFs of both distributions do not extend to the limits of the x-axis. I would like to know how to do this. In ggplot2 there is a boolean flag called pad that does this (see e.g. REF).
This is also possible in seaborn? (I was unable to find it ...)
sns.ecdfplot() doesn't seem to support such an option. As a workaround, you could loop through the generated lines, and move the endpoints. (Also note that sns.ecdfplot doesn't return a FacetGrid, but a subplot. Naming the return value g might be a bit confusing when comparing to code in example and tutorials.)
from matplotlib import pyplot as plt
import seaborn as sns
import numpy as np
import pandas as pd
N = 1000
df = pd.DataFrame({'distribution_1': np.random.randn(N),
'distribution_2': np.random.randn(N) / 2})
df = df.melt(value_vars=['distribution_1', 'distribution_2'], value_name='value', var_name='distribution')
ax = sns.ecdfplot(data=df, x='value', hue='distribution', palette='winter')
ax.set_ylim(-.1, 1.1)
xmin, xmax = ax.get_xlim()
for line in ax.lines:
x = line.get_xdata()
x[0] = xmin
x[-1] = xmax
line.set_xdata(x)
plt.show()
This question is adapted from this answer, however the solution provided does not work and following is my result. I am interested in adding individual title on the right side for individual subgraphs.
(p.s no matter how much offset for y-axis i provide the title seems to stay at the same y-value)
from matplotlib import pyplot as plt
import numpy as np
fig, axes = plt.subplots(nrows=2)
ax0label = axes[0].set_ylabel('Axes 0')
ax1label = axes[1].set_ylabel('Axes 1')
title = axes[0].set_title('Title')
offset = np.array([-0.15, 0.0])
title.set_position(ax0label.get_position() + offset)
title.set_rotation(90)
fig.tight_layout()
plt.show()
Something like this? This is the only other way i can think of.
from matplotlib import pyplot as plt
import numpy as np
fig, axes = plt.subplots(nrows=2)
ax0label = axes[0].set_ylabel('Axes 0')
ax1label = axes[1].set_ylabel('Axes 1')
ax01 = axes[0].twinx()
ax02 = axes[1].twinx()
ax01.set_ylabel('title')
ax02.set_ylabel('title')
fig.tight_layout()
plt.show()
import matplotlib.pyplot as plt
import numpy as np
delta = 0.0001
t = np.arange(0,5+delta,delta)
xt = np.sin(np.pi*t)
fig = plt.figure(1)
ax1= plt.subplot(3,2,1)
ax1.plot(t,xt, "tab:red")
ax1.set(ylabel = "Amplitude")
ax1.set(xlabel = 'Time(s)')
ax1.set(title = 'for n = 1')
ax1.grid()
ax2 = plt.subplot(3,2,2)
ax2.plot(t,xt, "tab:green")
ax2.set(ylabel = "Amplitude")
ax2.set(xlabel = 'Time(s)')
ax2.set(title = 'for n = 2')
ax2.grid()
plt.tight_layout()
plt.show()
Hi this is just a snip of my code but my problem basically is with the x axis of the subplots.
On the axis the values jump from 0-2-4 and I need it to be from 0-1-2-3-4-5.
Is there a way I can get those values to display on the x axis rather than just 0-2-4.
There are several possible ways of doing this. One of the simplest is to manually set the x ticks.
ax1.set_xticks(np.arange(6))
ax2.set_xticks(np.arange(6))
you can set the locator for x axis.
import matplotlib as mpl
ax1.xaxis.set_major_locator(mpl.ticker.MultipleLocator(1))
ax2.xaxis.set_major_locator(mpl.ticker.MultipleLocator(1))
When I use the matplotlib fontmanager library to change the axis ticks to a custom font only the first tick is updating. Can anyone help figure out why is this?
I can loop through the tick labels in the axis and set the font for each one, but I'm wondering why the fontproperties argument isn't working.
Example code below, results in only the first x label having the correct font and size and the rest of the ticks with default formatting.
tick_prop = fm.FontProperties(fname="Jack.ttf", size=25)
ax.set_xticks([0,1,2])
ax.set_xticklabels(x_labels,fontproperties=tick_prop)
I have the same problem, and I can reproduce this. I do not know what the underlying reason for this is. The code shown below will recreate this problem for me. Only the first tick on the x- and y-axis have the new font, the other do not.
import numpy as np
from matplotlib import font_manager as fm
import matplotlib.pyplot as plt
########
# Dummy data for reproducibility
N = 100
x = np.linspace(1,N,N)
ydata = np.random.randn(N)
ydata = np.cumsum(ydata)
yticks = [i*(max(ydata)-min(ydata))/3 + min(ydata) for i in range(4)]
xticks = [10*i for i in range(11)]
yticks_labels = ['very cold', 'chilly', 'nice', 'hot']
xticks_labels = xticks
########
# Generate plot
path = '/home/path/to/font/myfont.ttf'
tick_prop = fm.FontProperties(fname=path, size=25)
fig, ax = plt.subplots(figsize=(10,5))
ax.plot(x,ydata, color='xkcd:red')
ax.set_yticks(yticks)
ax.set_xticks(xticks)
ax.set_yticklabels(yticks_labels, fontproperties=tick_prop)
ax.set_xticklabels(xticks_labels, fontproperties=tick_prop)
plt.show()
What works for me is the following approach, where I use the path to the font explicitly (use Path() to generate path from string). I have marked all new parts with comments.
import numpy as np
from matplotlib import font_manager as fm
import matplotlib.pyplot as plt
from pathlib import Path # new part
# Dummy data
N = 100
x = np.linspace(1,N,N)
ydata = np.random.randn(N)
ydata = np.cumsum(ydata)
yticks = [i*(max(ydata)-min(ydata))/3 + min(ydata) for i in range(4)]
xticks = [10*i for i in range(11)]
yticks_labels = ['very cold', 'chilly', 'nice', 'hot']
xticks_labels = xticks
# Generate plot
path = '/home/path/to/font/myfont.ttf'
tick_prop = fm.FontProperties(fname=path, size=25)
fpath = Path(path) # new part
fig, ax = plt.subplots(figsize=(10,5))
ax.plot(x,ydata, color='xkcd:red', label='temperature')
ax.set_yticks(yticks)
ax.set_xticks(xticks)
ax.set_yticklabels(yticks_labels, font=fpath, fontsize=25) # new part
ax.set_xticklabels(xticks_labels, font=fpath, fontsize=25) # new part
ax.legend(prop=tick_prop)
plt.show()
This solution comes with its own problems. I do not like it, but it works. If anyone could give an explanation, or an alternative, I would be more than happy.
I am trying to make a profile plot for two columns of a pandas.DataFrame. I would not expect this to be in pandas directly but it seems there is nothing in matplotlib either. I have searched around and cannot find it in any package other than rootpy. Before I take the time to write this myself I thought I would ask if there was a small package that contained profile histograms, perhaps where they are known by a different name.
If you don't know what I mean by "profile histogram" have a look at the ROOT implementation. http://root.cern.ch/root/html/TProfile.html
You can easily do it using scipy.stats.binned_statistic.
import scipy.stats
import numpy
import matplotlib.pyplot as plt
x = numpy.random.rand(10000)
y = x + scipy.stats.norm(0, 0.2).rvs(10000)
means_result = scipy.stats.binned_statistic(x, [y, y**2], bins=50, range=(0,1), statistic='mean')
means, means2 = means_result.statistic
standard_deviations = numpy.sqrt(means2 - means**2)
bin_edges = means_result.bin_edges
bin_centers = (bin_edges[:-1] + bin_edges[1:])/2.
plt.errorbar(x=bin_centers, y=means, yerr=standard_deviations, linestyle='none', marker='.')
Use seaborn. Data as from #MaxNoe
import numpy as np
import seaborn as sns
# just some random numbers to get started
x = np.random.uniform(-2, 2, 10000)
y = np.random.normal(x**2, np.abs(x) + 1)
sns.regplot(x=x, y=y, x_bins=10, fit_reg=None)
You can do much more (error bands are from bootstrap, you can change the estimator on the y-axis, add regression, ...)
While #Keith's answer seems to fit what you mean, it is quite a lot of code. I think this can be done much simpler, so one gets the key concepts and can adjust and build on top of it.
Let me stress one thing: what ROOT is calling a ProfileHistogram is not a special kind of plot. It is an errorbar plot. Which can simply be done in matplotlib.
It is a special kind of computation and that's not the task of a plotting library. This lies in the pandas realm, and pandas is great at stuff like this. It's symptomatical for ROOT as the giant monolithic pile it is to have an extra class for this.
So what you want to do is: discretize in some variable x and for each bin, calculate something in another variable y.
This can easily done using np.digitize together with the pandas groupy and aggregate methods.
Putting it all together:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
# just some random numbers to get startet
x = np.random.uniform(-2, 2, 10000)
y = np.random.normal(x**2, np.abs(x) + 1)
df = pd.DataFrame({'x': x, 'y': y})
# calculate in which bin row belongs base on `x`
# bins needs the bin edges, so this will give as 100 equally sized bins
bins = np.linspace(-2, 2, 101)
df['bin'] = np.digitize(x, bins=bins)
bin_centers = 0.5 * (bins[:-1] + bins[1:])
bin_width = bins[1] - bins[0]
# grouby bin, so we can calculate stuff
binned = df.groupby('bin')
# calculate mean and standard error of the mean for y in each bin
result = binned['y'].agg(['mean', 'sem'])
result['x'] = bin_centers
result['xerr'] = bin_width / 2
# plot it
result.plot(
x='x',
y='mean',
xerr='xerr',
yerr='sem',
linestyle='none',
capsize=0,
color='black',
)
plt.savefig('result.png', dpi=300)
Just like ROOT ;)
I made a module myself for this functionality.
import pandas as pd
from pandas import Series, DataFrame
import numpy as np
import matplotlib.pyplot as plt
def Profile(x,y,nbins,xmin,xmax,ax):
df = DataFrame({'x' : x , 'y' : y})
binedges = xmin + ((xmax-xmin)/nbins) * np.arange(nbins+1)
df['bin'] = np.digitize(df['x'],binedges)
bincenters = xmin + ((xmax-xmin)/nbins)*np.arange(nbins) + ((xmax-xmin)/(2*nbins))
ProfileFrame = DataFrame({'bincenters' : bincenters, 'N' : df['bin'].value_counts(sort=False)},index=range(1,nbins+1))
bins = ProfileFrame.index.values
for bin in bins:
ProfileFrame.ix[bin,'ymean'] = df.ix[df['bin']==bin,'y'].mean()
ProfileFrame.ix[bin,'yStandDev'] = df.ix[df['bin']==bin,'y'].std()
ProfileFrame.ix[bin,'yMeanError'] = ProfileFrame.ix[bin,'yStandDev'] / np.sqrt(ProfileFrame.ix[bin,'N'])
ax.errorbar(ProfileFrame['bincenters'], ProfileFrame['ymean'], yerr=ProfileFrame['yMeanError'], xerr=(xmax-xmin)/(2*nbins), fmt=None)
return ax
def Profile_Matrix(frame):
#Much of this is stolen from https://github.com/pydata/pandas/blob/master/pandas/tools/plotting.py
import pandas.core.common as com
import pandas.tools.plotting as plots
from pandas.compat import lrange
from matplotlib.artist import setp
range_padding=0.05
df = frame._get_numeric_data()
n = df.columns.size
fig, axes = plots._subplots(nrows=n, ncols=n, squeeze=False)
# no gaps between subplots
fig.subplots_adjust(wspace=0, hspace=0)
mask = com.notnull(df)
boundaries_list = []
for a in df.columns:
values = df[a].values[mask[a].values]
rmin_, rmax_ = np.min(values), np.max(values)
rdelta_ext = (rmax_ - rmin_) * range_padding / 2.
boundaries_list.append((rmin_ - rdelta_ext, rmax_+ rdelta_ext))
for i, a in zip(lrange(n), df.columns):
for j, b in zip(lrange(n), df.columns):
common = (mask[a] & mask[b]).values
nbins = 100
(xmin,xmax) = boundaries_list[i]
ax = axes[i, j]
Profile(df[a][common],df[b][common],nbins,xmin,xmax,ax)
ax.set_xlabel('')
ax.set_ylabel('')
plots._label_axis(ax, kind='x', label=b, position='bottom', rotate=True)
plots._label_axis(ax, kind='y', label=a, position='left')
if j!= 0:
ax.yaxis.set_visible(False)
if i != n-1:
ax.xaxis.set_visible(False)
for ax in axes.flat:
setp(ax.get_xticklabels(), fontsize=8)
setp(ax.get_yticklabels(), fontsize=8)
return axes
To my knowledge matplotlib doesn't still allow to directly produce profile histograms.
You can instead give a look at Hippodraw, a package developed at SLAC, that can be used as a Python extension module.
Here there is a Profile histogram example:
http://www.slac.stanford.edu/grp/ek/hippodraw/datareps_root.html#datareps_profilehist