import pandas as pd
from plotnine import *
df=pd.DataFrame({'A':[1,1,1,1],'B':[1,9,5,11],'C':['x','x','y','y'],'D':['a','b','a','b']})
p=ggplot(df,aes('A','B'))
p + geom_bar(stat="identity") + facet_grid('D~C',scales='free_y')
This will draw axes on both the left and right grids. Is there a way to omit the axes on the right grids, such that it is only visible on the left? I know I could use scales="fixed" but I would like them to vary between rows.
It is misleading to have free scales for all panels yet have ticks and labels for only some of them. But if you really want to, you have to drop into Matplotlib for that
import pandas as pd
from plotnine import *
df=pd.DataFrame({'A':[1,1,1,1],'B':[1,9,5,11],'C':['x','x','y','y'],'D':['a','b','a','b']})
p=ggplot(df,aes('A','B'))
p = p + geom_bar(stat="identity") + facet_grid('D~C',scales='free_y')
fig, p = p.draw(return_ggplot=True)
for i, ax in enumerate(p.axs):
if i % 2:
ax.set_yticklabels([])
ax.set_yticks([])
Related
This question is adapted from this answer, however the solution provided does not work and following is my result. I am interested in adding individual title on the right side for individual subgraphs.
(p.s no matter how much offset for y-axis i provide the title seems to stay at the same y-value)
from matplotlib import pyplot as plt
import numpy as np
fig, axes = plt.subplots(nrows=2)
ax0label = axes[0].set_ylabel('Axes 0')
ax1label = axes[1].set_ylabel('Axes 1')
title = axes[0].set_title('Title')
offset = np.array([-0.15, 0.0])
title.set_position(ax0label.get_position() + offset)
title.set_rotation(90)
fig.tight_layout()
plt.show()
Something like this? This is the only other way i can think of.
from matplotlib import pyplot as plt
import numpy as np
fig, axes = plt.subplots(nrows=2)
ax0label = axes[0].set_ylabel('Axes 0')
ax1label = axes[1].set_ylabel('Axes 1')
ax01 = axes[0].twinx()
ax02 = axes[1].twinx()
ax01.set_ylabel('title')
ax02.set_ylabel('title')
fig.tight_layout()
plt.show()
I have a bar plot with two static HLine, and would like to add a label to them (or a legend) so that they are defined on the plot. I tried something like:
eq = (
sr2.hvplot(
kind="bar",
groupby ="rl",
dynamic = False,)
* hv.HLine(0.35, name="IA1").opts(color='red')
* hv.HLine(0.2, label="IA2").opts(color='green')
)
but no label comes with the chart.
This answer to a similar question explains that it is not easy, and you need a workaround:
How do I get a full-height vertical line with a legend label in holoviews + bokeh?
You can also use parts of this solution:
https://discourse.holoviz.org/t/horizontal-spikes/117
Maybe the easiest is just to not use hv.HLine() when you would like a legend with your horizontal line, but create a manual line yourself with hv.Curve() instead:
# import libraries
import pandas as pd
import seaborn as sns
import holoviews as hv
import hvplot.pandas
hv.extension('bokeh')
# create sample dataset
df = sns.load_dataset('anscombe')
# create some horizontal lines manually defining start and end point
manual_horizontal_line = hv.Curve([[0, 10], [15, 10]], label='my_own_line')
another_horizontal_line = hv.Curve([[0, 5], [15, 5]], label='another_line')
# create scatterplot
scatter_plot = df.hvplot.scatter(x='x', y='y', groupby='dataset', dynamic=False)
# overlay manual horizontal lines on scatterplot
scatter_plot * manual_horizontal_line * another_horizontal_line
Resulting plot:
If you want text labels instead of a legend, you can use hv.Text:
import holoviews as hv
hv.extension('bokeh')
hv.Scatter(([4,6,8,10],[0.1,0.5,0.01,0.7])) * \
hv.HLine(0.35).opts(color='red') * hv.Text(9, 0.38, "IA1").opts(color='red') * \
hv.HLine(0.20).opts(color='green') * hv.Text(9, 0.23, "IA2").opts(color='green')
I am new to seaborn and I am trying to plot the bar chart. I am importing data from a csv file. I would like to have the bars small in width and grouped/join for a single category. But, I am getting a lot of spaces between these graphs. Is there a way where I can solve the above problem. Note that my y-axis range is from 80 to 100, and I need to maintain that.
Current code I am using is:
import seaborn as sns; sns.set()
import matplotlib.pyplot as plt
import pandas as pd
Data=pd.read_csv('S_Data.csv')
plt.figure(figsize=(20,5))
ax = sns.barplot(x="Techniques on data", y="Test Accuracy", hue="Epochs", order = ['WNO + FIFO', 'WNO', 'WNO + on_combined data', 'WNO + replace with gsd'], palette="Blues_d", data=Data)
def change_width(ax, new_value) :
for patch in ax.patches :
current_width = patch.get_width()
diff = current_width - new_value
# we change the bar width
patch.set_width(new_value)
# we recenter the bar
patch.set_x(patch.get_x() + diff * .5)
change_width(ax, .10)
ax.set_ylim(80,100)
ax.set_title('Result')
Thanks in advance.
Consider using countplot instead of the barplot function. [link]. I believe, based on your hue="Epochs", you will be able to achieve the slimmer bar style without using an additional function.
I've spent some time fruitlessly searching for an answer to my question, so I think a new question is in order. Consider this plot:
The axes labels use scientific notation. On the y-axis, all is well. However, I have tried and failed to get rid off the scaling factor that Python added in the lower-right corner. I would like to either remove this factor completely and simply indicate it by the units in the axis title or have it multiplied to every tick label. Everything would look better than this ugly 1e14.
Here's the code:
import numpy as np data_a = np.loadtxt('exercise_2a.txt')
import matplotlib as mpl
font = {'family' : 'serif',
'size' : 12}
mpl.rc('font', **font)
import matplotlib.pyplot as plt
fig = plt.figure()
subplot = fig.add_subplot(1,1,1)
subplot.plot(data_a[:,0], data_a[:,1], label='$T(t)$', linewidth=2)
subplot.set_yscale('log')
subplot.set_xlabel("$t[10^{14}s]$",fontsize=14)
subplot.set_ylabel("$T\,[K]$",fontsize=14)
plt.xlim(right=max(data_a [:,0]))
plt.legend(loc='upper right')
plt.savefig('T(t).pdf', bbox_inches='tight')
Update: Incorporating Will's implementation of scientificNotation into my script, the plot now looks like
Much nicer if you ask me. Here's the complete code for anyone wanting to adopt some part of it:
import numpy as np
data = np.loadtxt('file.txt')
import matplotlib as mpl
font = {'family' : 'serif',
'size' : 16}
mpl.rc('font', **font)
import matplotlib.pyplot as plt
fig = plt.figure()
subplot = fig.add_subplot(1,1,1)
subplot.plot(data[:,0], data[:,1], label='$T(t)$', linewidth=2)
subplot.set_yscale('log')
subplot.set_xlabel("$t[s]$",fontsize=20)
subplot.set_ylabel("$T\,[K]$",fontsize=20)
plt.xlim(right=max(data [:,0]))
plt.legend(loc='upper right')
def scientificNotation(value):
if value == 0:
return '0'
else:
e = np.log10(np.abs(value))
m = np.sign(value) * 10 ** (e - int(e))
return r'${:.0f} \cdot 10^{{{:d}}}$'.format(m, int(e))
formatter = mpl.ticker.FuncFormatter(lambda x, p: scientificNotation(x))
plt.gca().xaxis.set_major_formatter(formatter)
plt.savefig('T(t).pdf', bbox_inches='tight', transparent=True)
Just divide the x-values by 1e14:
subplot.plot(data_a[:,0] / 1e14, data_a[:,1], label='$T(t)$', linewidth=2)
If you want to add the label to each individual tick, you'll have to provide a custom formatter, like in tom's answer.
If you want it to look like as nice as the ticks on your y-axis, you could provide a function to format it with LaTeX:
def scientificNotation(value):
if value == 0:
return '0'
else:
e = np.log10(np.abs(value))
m = np.sign(value) * 10 ** (e - int(e))
return r'${:.0f} \times 10^{{{:d}}}$'.format(m, int(e))
# x is the tick value; p is the position on the axes.
formatter = mpl.ticker.FuncFormatter(lambda x, p: scientificNotation(x))
plt.gca().xaxis.set_major_formatter(formatter)
Of course, this will clutter your x-axis up quite a bit, so you might end up needing to display them at an angle, for example.
You can also change the tick formatter with the ticker module.
An example would be to use a FormatStrFormatter:
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
fig,ax = plt.subplots()
ax.semilogy(np.linspace(0,5e14,50),np.logspace(3,7,50),'b-')
ax.xaxis.set_major_formatter(ticker.FormatStrFormatter('%.0e'))
Also see the answers here with lots of good ideas for ways to solve this.
In addition to the good answer from Will Vousden, you can set what you write in your ticks with:
plt.xticks(range(6), range(6))
the first range(6) is the location and the second is the label.
I am trying to make a profile plot for two columns of a pandas.DataFrame. I would not expect this to be in pandas directly but it seems there is nothing in matplotlib either. I have searched around and cannot find it in any package other than rootpy. Before I take the time to write this myself I thought I would ask if there was a small package that contained profile histograms, perhaps where they are known by a different name.
If you don't know what I mean by "profile histogram" have a look at the ROOT implementation. http://root.cern.ch/root/html/TProfile.html
You can easily do it using scipy.stats.binned_statistic.
import scipy.stats
import numpy
import matplotlib.pyplot as plt
x = numpy.random.rand(10000)
y = x + scipy.stats.norm(0, 0.2).rvs(10000)
means_result = scipy.stats.binned_statistic(x, [y, y**2], bins=50, range=(0,1), statistic='mean')
means, means2 = means_result.statistic
standard_deviations = numpy.sqrt(means2 - means**2)
bin_edges = means_result.bin_edges
bin_centers = (bin_edges[:-1] + bin_edges[1:])/2.
plt.errorbar(x=bin_centers, y=means, yerr=standard_deviations, linestyle='none', marker='.')
Use seaborn. Data as from #MaxNoe
import numpy as np
import seaborn as sns
# just some random numbers to get started
x = np.random.uniform(-2, 2, 10000)
y = np.random.normal(x**2, np.abs(x) + 1)
sns.regplot(x=x, y=y, x_bins=10, fit_reg=None)
You can do much more (error bands are from bootstrap, you can change the estimator on the y-axis, add regression, ...)
While #Keith's answer seems to fit what you mean, it is quite a lot of code. I think this can be done much simpler, so one gets the key concepts and can adjust and build on top of it.
Let me stress one thing: what ROOT is calling a ProfileHistogram is not a special kind of plot. It is an errorbar plot. Which can simply be done in matplotlib.
It is a special kind of computation and that's not the task of a plotting library. This lies in the pandas realm, and pandas is great at stuff like this. It's symptomatical for ROOT as the giant monolithic pile it is to have an extra class for this.
So what you want to do is: discretize in some variable x and for each bin, calculate something in another variable y.
This can easily done using np.digitize together with the pandas groupy and aggregate methods.
Putting it all together:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
# just some random numbers to get startet
x = np.random.uniform(-2, 2, 10000)
y = np.random.normal(x**2, np.abs(x) + 1)
df = pd.DataFrame({'x': x, 'y': y})
# calculate in which bin row belongs base on `x`
# bins needs the bin edges, so this will give as 100 equally sized bins
bins = np.linspace(-2, 2, 101)
df['bin'] = np.digitize(x, bins=bins)
bin_centers = 0.5 * (bins[:-1] + bins[1:])
bin_width = bins[1] - bins[0]
# grouby bin, so we can calculate stuff
binned = df.groupby('bin')
# calculate mean and standard error of the mean for y in each bin
result = binned['y'].agg(['mean', 'sem'])
result['x'] = bin_centers
result['xerr'] = bin_width / 2
# plot it
result.plot(
x='x',
y='mean',
xerr='xerr',
yerr='sem',
linestyle='none',
capsize=0,
color='black',
)
plt.savefig('result.png', dpi=300)
Just like ROOT ;)
I made a module myself for this functionality.
import pandas as pd
from pandas import Series, DataFrame
import numpy as np
import matplotlib.pyplot as plt
def Profile(x,y,nbins,xmin,xmax,ax):
df = DataFrame({'x' : x , 'y' : y})
binedges = xmin + ((xmax-xmin)/nbins) * np.arange(nbins+1)
df['bin'] = np.digitize(df['x'],binedges)
bincenters = xmin + ((xmax-xmin)/nbins)*np.arange(nbins) + ((xmax-xmin)/(2*nbins))
ProfileFrame = DataFrame({'bincenters' : bincenters, 'N' : df['bin'].value_counts(sort=False)},index=range(1,nbins+1))
bins = ProfileFrame.index.values
for bin in bins:
ProfileFrame.ix[bin,'ymean'] = df.ix[df['bin']==bin,'y'].mean()
ProfileFrame.ix[bin,'yStandDev'] = df.ix[df['bin']==bin,'y'].std()
ProfileFrame.ix[bin,'yMeanError'] = ProfileFrame.ix[bin,'yStandDev'] / np.sqrt(ProfileFrame.ix[bin,'N'])
ax.errorbar(ProfileFrame['bincenters'], ProfileFrame['ymean'], yerr=ProfileFrame['yMeanError'], xerr=(xmax-xmin)/(2*nbins), fmt=None)
return ax
def Profile_Matrix(frame):
#Much of this is stolen from https://github.com/pydata/pandas/blob/master/pandas/tools/plotting.py
import pandas.core.common as com
import pandas.tools.plotting as plots
from pandas.compat import lrange
from matplotlib.artist import setp
range_padding=0.05
df = frame._get_numeric_data()
n = df.columns.size
fig, axes = plots._subplots(nrows=n, ncols=n, squeeze=False)
# no gaps between subplots
fig.subplots_adjust(wspace=0, hspace=0)
mask = com.notnull(df)
boundaries_list = []
for a in df.columns:
values = df[a].values[mask[a].values]
rmin_, rmax_ = np.min(values), np.max(values)
rdelta_ext = (rmax_ - rmin_) * range_padding / 2.
boundaries_list.append((rmin_ - rdelta_ext, rmax_+ rdelta_ext))
for i, a in zip(lrange(n), df.columns):
for j, b in zip(lrange(n), df.columns):
common = (mask[a] & mask[b]).values
nbins = 100
(xmin,xmax) = boundaries_list[i]
ax = axes[i, j]
Profile(df[a][common],df[b][common],nbins,xmin,xmax,ax)
ax.set_xlabel('')
ax.set_ylabel('')
plots._label_axis(ax, kind='x', label=b, position='bottom', rotate=True)
plots._label_axis(ax, kind='y', label=a, position='left')
if j!= 0:
ax.yaxis.set_visible(False)
if i != n-1:
ax.xaxis.set_visible(False)
for ax in axes.flat:
setp(ax.get_xticklabels(), fontsize=8)
setp(ax.get_yticklabels(), fontsize=8)
return axes
To my knowledge matplotlib doesn't still allow to directly produce profile histograms.
You can instead give a look at Hippodraw, a package developed at SLAC, that can be used as a Python extension module.
Here there is a Profile histogram example:
http://www.slac.stanford.edu/grp/ek/hippodraw/datareps_root.html#datareps_profilehist