Set matplotlib tick locators, but specify spacing around them - python

For example if I have the following:
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
fig, ax = plt.subplots()
ax.set_xlim(left=0, right=11)
ax.xaxis.set_major_locator(ticker.MultipleLocator(1))
Which looks as:
Here the range is set to include 0 and 11 so that there's some spacing around the plotted values, but the data only contains values of [1, 2, 3, 4, 5, 6, 7, 8, 9, 10] - so I'd like to not have 0 and 11 on the xaxis.
Looking at the documentation for ticker.MultipleLocator (here) it's not clear how this should be done (of if it can be with a locator). I tried to use the view_limits method but it just seems to return a tuple.
Ideally the values 0 and 11 would be gone, and the plot would look as:
edit 1
The following "works"
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
fig, ax = plt.subplots()
ax.set_xlim(left=1e-2, right=11 - 1e-2)
ax.xaxis.set_major_locator(ticker.MultipleLocator(1))
Though removing fractional amounts in order to not display something isn't a solution here as it's constrained to the base value of the MultipleLocator.
For example - what if I wanted the following:
ax.set_xlim(left=1e-2, right=11 - 1e-2)
ax.xaxis.set_major_locator(ticker.MultipleLocator(0.5))
Which looks as:
Then I have 10.5 and so on, whereas I might still like the spacing to be a particular value not limited to the MultipleLocator size.

Getting the lower limit is something that is a bit tricky and not by default included in any of the Matplotlib ticker. However, taking a look at the source code
(https://github.com/matplotlib/matplotlib/blob/v3.5.1/lib/matplotlib/ticker.py#L2734-L2751) gives a good hint how to implement this by deriving a new MyMultipleLocator class from MultipleLocator by yourself. Here is a piece of code that should work:
import matplotlib.pyplot as plt
from matplotlib.ticker import MultipleLocator, _Edge_integer
import numpy as np
class MyMultipleLocator(MultipleLocator):
def __init__(self, base=1.0, offset=0.):
self._edge = _Edge_integer(base, 0)
self._offset = offset
def tick_values(self, vmin, vmax):
# we HAVE to re-implement this method as it is called by
# xaxis.set_major_locator(...)
vmin = self._edge.ge(vmin)
step = self._edge.step
n = (vmax - vmin + 0.001 * step) // step
locs = self._offset + vmin - step + np.arange(n + 3) * step
return self.raise_if_exceeds(locs)
fig, ax = plt.subplots()
x = np.arange(1, 11)
ax.plot(x, np.random.randint(-3, 3, size=x.size))
tick_spacing = 2
ax.xaxis.set_major_locator(MyMultipleLocator(base=tick_spacing, offset=0))
You can now additionally change the offset manually in the last line. You could even do this automatically by checking if the minimum (the nearest int, that is) is an odd or an even number, but if I understand the question correctly then this is not required here.
Output from the above code:
example 1
fig, ax = plt.subplots()
ax.xaxis.set_major_locator(MyMultipleLocator(base=1, offset=0))
ax.set_xlim(0.5, 10.5)
Which gives:
example 2
fig, ax = plt.subplots()
ax.xaxis.set_major_locator(MyMultipleLocator(base=2, offset=0.5))
ax.set_xlim(0., 11)
Which gives:

Related

How do I make my histogram of unequal bins show properly?

My data consists of the following:
Majority numbers < 60, and then a few outliers that are in the 2000s.
I want to display it in a histogram with the following bin ranges:
0-1, 1-2, 2-3, 3-4, ..., 59-60, 60-max
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.axes as axes
b = list(range(61)) + [2000] # will make [0, 1, ..., 60, 2000]
plt.hist(b, bins=b, edgecolor='black')
plt.xticks(b)
plt.show()
This shows the following:
Essentially what you see is all the numbers 0 .. 60 squished together on the left, and the 2000 on the right. This is not what I want.
So I remove the [2000] and get something like what I am looking for:
As you can see now it is better, but I still have the following problems:
How do I fix this such that the graph doesn't have any white space around (there's a big gap before 0 and after 60).
How do I fix this such that after 60, there is a 2000 tick that shows at the very end, while still keeping roughly the same spacing (not like the first?)
Here is one hacky solution using some random data. I still don't quite understand your second question but I tried to do something based on your wordings
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.axes as axes
fig, ax = plt.subplots(figsize=(12, 6))
data= np.random.normal(10, 5, 5000)
upper = 31
outlier = 2000
data = np.append(data, 100*[upper])
b = list(range(upper)) + [upper]
plt.hist(data, bins=b, edgecolor='black')
plt.xticks(b)
b[-1] = outlier
ax.set_xticklabels(b)
plt.xlim(0, upper)
plt.show()

Add meaningful minor ticks to a modified axis?

This example is specifically relating to plotting data as a function of log(redshift+1) and having a reference redshift axis but can be easily generalised to any functional modification.
I've written a neat little function (with the help of some question/answers on here) that allows me to easily add a redshift axis to the top of a log(1+redshift) plot. I am really struggling to get meaningful minor ticks (and would rather not share my dismal efforts!).
Here is the code, including example plot:
In this case, I would like redshifts at every 0.1 increment not occupied by a major tick, with the flexibility of changing that 0.1 in the function call.
import matplotlib.pyplot as plt
import numpy as np
def add_zaxis(axis,denomination):
oldx = axis.get_xlim()
axis.set_xlim(0., None)
zspan = [(10**x)-1 for x in axis.get_xlim()]
denom = denomination
zmax = int(np.floor(zspan[1]/denom))*denom
zspan[1] = zmax
k = len(np.arange(zspan[0],zspan[1],denom))+1
zs = np.linspace(zspan[0],zspan[1],k)
z_ticks = [np.log10(1+x) for x in zs]
axz = axis.twiny()
axz.set_xticks(z_ticks)
axz.set_xticklabels(['{:g}'.format(y) for y in zs])
axz.set_xlim(oldx)
axis.set_xlim(oldx)
return axz
data = np.random.randn(500)
data = data[data>0.]
fig, ax = plt.subplots(1)
plt.hist(np.log10(data+1), bins=22)
ax.set_xlabel('log(z+1)')
ax.minorticks_on()
axz = add_zaxis(ax,.3)
axz.set_xlabel('z')
axz.minorticks_on()
The idea would be to use a FixedLocator to position the ticks on the axis. You may then have one FixedLocator for the major ticks and one for the minor ticks.
import matplotlib.pyplot as plt
import matplotlib.ticker
import numpy as np
def add_zaxis(ax,d=0.3, dminor=0.1):
f = lambda x: np.log10(x+1)
invf = lambda x: 10.0**x - 1.
xlim = ax.get_xlim()
zlim = [invf(x) for x in xlim]
axz = ax.twiny()
axz.set_xlim(xlim)
zs = np.arange(0,zlim[1],d)
zpos = f(zs)
axz.xaxis.set_major_locator(matplotlib.ticker.FixedLocator(zpos))
axz.xaxis.set_major_formatter(matplotlib.ticker.FixedFormatter(zs))
zsminor = np.arange(0,zlim[1],dminor)
zposminor = f(zsminor)
axz.xaxis.set_minor_locator(matplotlib.ticker.FixedLocator(zposminor))
axz.tick_params(axis='x',which='minor',bottom='off', top="on")
axz.set_xlabel('z')
data = np.random.randn(400)
data = data[data>0.]
fig, ax = plt.subplots(1)
plt.hist(np.log10(data+1), bins=22)
ax.set_xlabel('log(z+1)')
add_zaxis(ax)
ax.minorticks_on()
ax.tick_params(axis='x',which='minor',bottom='on', top="off")
plt.show()

Python matplotlib colorbar scientific notation base

I am trying to customise a colorbar on my matpllotlib contourf plots. Whilst I am able to use scientific notation I am trying to change the base of the notation - essentially so that my ticks would be in the range of (-100,100) rather than (-10,10).
For example, this produces a simple plot...
import numpy as np
import matplotlib.pyplot as plt
z = (np.random.random((10,10)) - 0.5) * 0.2
fig, ax = plt.subplots()
plot = ax.contourf(z)
cbar = fig.colorbar(plot)
cbar.formatter.set_powerlimits((0, 0))
cbar.update_ticks()
plt.show()
like so:
However, I would like the label above the colorbar to be 1e-2 and the numbers to range from -10 to 10.
How would I go about this?
A possible solution can be to subclass the ScalarFormatter and fix the order of magnitude as in this question: Set scientific notation with fixed exponent and significant digits for multiple subplots
You would then call this formatter with the order of magnitude as the argument order, OOMFormatter(-2, mathText=False). mathText is set to false to obtain the notation from the question, i.e.
while setting it to True, would give .
You can then set the formatter to the colorbar via the colorbar's format argument.
import numpy as np; np.random.seed(0)
import matplotlib.pyplot as plt
import matplotlib.ticker
class OOMFormatter(matplotlib.ticker.ScalarFormatter):
def __init__(self, order=0, fformat="%1.1f", offset=True, mathText=True):
self.oom = order
self.fformat = fformat
matplotlib.ticker.ScalarFormatter.__init__(self,useOffset=offset,useMathText=mathText)
def _set_order_of_magnitude(self):
self.orderOfMagnitude = self.oom
def _set_format(self, vmin=None, vmax=None):
self.format = self.fformat
if self._useMathText:
self.format = r'$\mathdefault{%s}$' % self.format
z = (np.random.random((10,10)) - 0.5) * 0.2
fig, ax = plt.subplots()
plot = ax.contourf(z)
cbar = fig.colorbar(plot, format=OOMFormatter(-2, mathText=False))
plt.show()
For matplotlib versions < 3.1 the class needs to look like this:
class OOMFormatter(matplotlib.ticker.ScalarFormatter):
def __init__(self, order=0, fformat="%1.1f", offset=True, mathText=True):
self.oom = order
self.fformat = fformat
matplotlib.ticker.ScalarFormatter.__init__(self,useOffset=offset,useMathText=mathText)
def _set_orderOfMagnitude(self, nothing):
self.orderOfMagnitude = self.oom
def _set_format(self, vmin, vmax):
self.format = self.fformat
if self._useMathText:
self.format = '$%s$' % matplotlib.ticker._mathdefault(self.format)
Similar to what #ImportanceOfBeingErnes described, you could use a FuncFormatter (docs) to which you just pass a function to determine the tick labels. This removes the auto generation of the 1e-2 header for your colorbar, but I imagine you can manually add that back in (I had trouble doing it, though was able to add it on the side). Using a FuncFormatter, you can just generate string tick values which has the advantage of not having to accept the way python thinks a number should be displayed.
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.ticker as tk
z = (np.random.random((10,10)) - 0.5) * 0.2
levels = list(np.linspace(-.1,.1,9))
fig, ax = plt.subplots()
plot = ax.contourf(z, levels=levels)
def my_func(x, pos):
label = levels[pos]
return str(label*100)
fmt1 = tk.FuncFormatter(my_func)
cbar = fig.colorbar(plot, format=fmt1)
cbar.set_label("1e-2")
plt.show()
This will generate a plot which looks like this.

Changing the length of axis lines in matplotlib

I am trying to change the displayed length of the axis of matplotlib plot. This is my current code:
import matplotlib.pyplot as plt
import numpy as np
linewidth = 2
outward = 10
ticklength = 4
tickwidth = 1
fig, ax = plt.subplots()
ax.plot(np.arange(100))
ax.tick_params(right="off",top="off",length = ticklength, width = tickwidth, direction = "out")
ax.spines["top"].set_visible(False), ax.spines["right"].set_visible(False)
for line in ["left","bottom"]:
ax.spines[line].set_linewidth(linewidth)
ax.spines[line].set_position(("outward",outward))
Which generates the following plot:
I would like my plot to look like the following with axis line shortened:
I wasn't able to find this in ax[axis].spines method. I also wasn't able to plot this nicely using ax.axhline method.
You could add these lines to the end of your code:
ax.spines['left'].set_bounds(20, 80)
ax.spines['bottom'].set_bounds(20, 80)
for i in [0, -1]:
ax.get_yticklabels()[i].set_visible(False)
ax.get_xticklabels()[i].set_visible(False)
for i in [0, -2]:
ax.get_yticklines()[i].set_visible(False)
ax.get_xticklines()[i].set_visible(False)
To get this:

Plotting profile hitstograms in python

I am trying to make a profile plot for two columns of a pandas.DataFrame. I would not expect this to be in pandas directly but it seems there is nothing in matplotlib either. I have searched around and cannot find it in any package other than rootpy. Before I take the time to write this myself I thought I would ask if there was a small package that contained profile histograms, perhaps where they are known by a different name.
If you don't know what I mean by "profile histogram" have a look at the ROOT implementation. http://root.cern.ch/root/html/TProfile.html
You can easily do it using scipy.stats.binned_statistic.
import scipy.stats
import numpy
import matplotlib.pyplot as plt
x = numpy.random.rand(10000)
y = x + scipy.stats.norm(0, 0.2).rvs(10000)
means_result = scipy.stats.binned_statistic(x, [y, y**2], bins=50, range=(0,1), statistic='mean')
means, means2 = means_result.statistic
standard_deviations = numpy.sqrt(means2 - means**2)
bin_edges = means_result.bin_edges
bin_centers = (bin_edges[:-1] + bin_edges[1:])/2.
plt.errorbar(x=bin_centers, y=means, yerr=standard_deviations, linestyle='none', marker='.')
Use seaborn. Data as from #MaxNoe
import numpy as np
import seaborn as sns
# just some random numbers to get started
x = np.random.uniform(-2, 2, 10000)
y = np.random.normal(x**2, np.abs(x) + 1)
sns.regplot(x=x, y=y, x_bins=10, fit_reg=None)
You can do much more (error bands are from bootstrap, you can change the estimator on the y-axis, add regression, ...)
While #Keith's answer seems to fit what you mean, it is quite a lot of code. I think this can be done much simpler, so one gets the key concepts and can adjust and build on top of it.
Let me stress one thing: what ROOT is calling a ProfileHistogram is not a special kind of plot. It is an errorbar plot. Which can simply be done in matplotlib.
It is a special kind of computation and that's not the task of a plotting library. This lies in the pandas realm, and pandas is great at stuff like this. It's symptomatical for ROOT as the giant monolithic pile it is to have an extra class for this.
So what you want to do is: discretize in some variable x and for each bin, calculate something in another variable y.
This can easily done using np.digitize together with the pandas groupy and aggregate methods.
Putting it all together:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
# just some random numbers to get startet
x = np.random.uniform(-2, 2, 10000)
y = np.random.normal(x**2, np.abs(x) + 1)
df = pd.DataFrame({'x': x, 'y': y})
# calculate in which bin row belongs base on `x`
# bins needs the bin edges, so this will give as 100 equally sized bins
bins = np.linspace(-2, 2, 101)
df['bin'] = np.digitize(x, bins=bins)
bin_centers = 0.5 * (bins[:-1] + bins[1:])
bin_width = bins[1] - bins[0]
# grouby bin, so we can calculate stuff
binned = df.groupby('bin')
# calculate mean and standard error of the mean for y in each bin
result = binned['y'].agg(['mean', 'sem'])
result['x'] = bin_centers
result['xerr'] = bin_width / 2
# plot it
result.plot(
x='x',
y='mean',
xerr='xerr',
yerr='sem',
linestyle='none',
capsize=0,
color='black',
)
plt.savefig('result.png', dpi=300)
Just like ROOT ;)
I made a module myself for this functionality.
import pandas as pd
from pandas import Series, DataFrame
import numpy as np
import matplotlib.pyplot as plt
def Profile(x,y,nbins,xmin,xmax,ax):
df = DataFrame({'x' : x , 'y' : y})
binedges = xmin + ((xmax-xmin)/nbins) * np.arange(nbins+1)
df['bin'] = np.digitize(df['x'],binedges)
bincenters = xmin + ((xmax-xmin)/nbins)*np.arange(nbins) + ((xmax-xmin)/(2*nbins))
ProfileFrame = DataFrame({'bincenters' : bincenters, 'N' : df['bin'].value_counts(sort=False)},index=range(1,nbins+1))
bins = ProfileFrame.index.values
for bin in bins:
ProfileFrame.ix[bin,'ymean'] = df.ix[df['bin']==bin,'y'].mean()
ProfileFrame.ix[bin,'yStandDev'] = df.ix[df['bin']==bin,'y'].std()
ProfileFrame.ix[bin,'yMeanError'] = ProfileFrame.ix[bin,'yStandDev'] / np.sqrt(ProfileFrame.ix[bin,'N'])
ax.errorbar(ProfileFrame['bincenters'], ProfileFrame['ymean'], yerr=ProfileFrame['yMeanError'], xerr=(xmax-xmin)/(2*nbins), fmt=None)
return ax
def Profile_Matrix(frame):
#Much of this is stolen from https://github.com/pydata/pandas/blob/master/pandas/tools/plotting.py
import pandas.core.common as com
import pandas.tools.plotting as plots
from pandas.compat import lrange
from matplotlib.artist import setp
range_padding=0.05
df = frame._get_numeric_data()
n = df.columns.size
fig, axes = plots._subplots(nrows=n, ncols=n, squeeze=False)
# no gaps between subplots
fig.subplots_adjust(wspace=0, hspace=0)
mask = com.notnull(df)
boundaries_list = []
for a in df.columns:
values = df[a].values[mask[a].values]
rmin_, rmax_ = np.min(values), np.max(values)
rdelta_ext = (rmax_ - rmin_) * range_padding / 2.
boundaries_list.append((rmin_ - rdelta_ext, rmax_+ rdelta_ext))
for i, a in zip(lrange(n), df.columns):
for j, b in zip(lrange(n), df.columns):
common = (mask[a] & mask[b]).values
nbins = 100
(xmin,xmax) = boundaries_list[i]
ax = axes[i, j]
Profile(df[a][common],df[b][common],nbins,xmin,xmax,ax)
ax.set_xlabel('')
ax.set_ylabel('')
plots._label_axis(ax, kind='x', label=b, position='bottom', rotate=True)
plots._label_axis(ax, kind='y', label=a, position='left')
if j!= 0:
ax.yaxis.set_visible(False)
if i != n-1:
ax.xaxis.set_visible(False)
for ax in axes.flat:
setp(ax.get_xticklabels(), fontsize=8)
setp(ax.get_yticklabels(), fontsize=8)
return axes
To my knowledge matplotlib doesn't still allow to directly produce profile histograms.
You can instead give a look at Hippodraw, a package developed at SLAC, that can be used as a Python extension module.
Here there is a Profile histogram example:
http://www.slac.stanford.edu/grp/ek/hippodraw/datareps_root.html#datareps_profilehist

Categories

Resources