Matplotlib errorbar extra space at top and bottom - python

When I run the following lines, I get a plot with a large space at the top and the bottom with no bars.
How can I remove this extra space?
import pandas as pd
import numpy as np
import random
import matplotlib.pyplot as plt
from matplotlib.transforms import Affine2D
random.seed(1)
df = pd.DataFrame(np.random.randn(50, 1), columns=["parameter"])
df["standard_error"]= ((df.parameter**2)**0.5)/2
name = "plot"
x = ["A"+str(x) for x in df.index.tolist()]
y1 = df.parameter
yerr1 = df.standard_error
fig, ax = plt.subplots()
fig.set_figheight(len(x))
plt.rc('axes', labelsize=22)
plt.grid(b=True, which='major', color='#666666', linestyle='-', alpha=0.2)
trans1 = Affine2D().translate(-0.1, 0.0) + ax.transData
trans2 = Affine2D().translate(+0.1, 0.0) + ax.transData
er1 = ax.errorbar(y1, x, xerr=yerr1, marker="o", linestyle="none", transform=trans1)
ax.axvline(x=0, color="black")
plt.savefig(name + '.png', bbox_inches='tight')

If you mean the extra space below and above your smallest and largest data points along the y-axis then you can simply use plt.ylim, e.g:
plt.ylim(0, 50)
Which will change the extent of the y-axis to the range 0 - 50. Similarly for the x-axis there's plt.xlim

Related

How to label quartiles in matplotlib boxplots?

I have a list of values which I want to plot the distribution for. I'm using a box-plot but it would be nice to add some dotted lines going from the boxplot quartiles to the axis. Also I want just the quartile values displayed on the x ticks.
Here's a rough idea but with values at the end instead of names.
import numpy as np
import pandas as pd
import matplotlib.pylab as plt
vel_arr = np.random.rand(1000,1)
fig = plt.figure(1, figsize=(9, 6))
ax = fig.add_subplot(111)
# Create the boxplot
ax.boxplot(vel_arr,vert=False, manage_ticks=True)
ax.set_xlabel('value')
plt.yticks([1], ['category'])
plt.show()
np.quantile calculates the desired quantiles.
ax.vlines draws vertical lines, for example from the center of the boxplot to y=0. zorder=0 makes sure these lines go behind the boxplot.
ax.set_ylim(0.5, 1.5) resets the ylims. Default, the vlines force the ylims with some extra padding.
ax.set_xticks(quantiles) sets xticks at the position of every quantile.
import numpy as np
import matplotlib.pylab as plt
vel_arr = np.random.rand(50, 1)
fig = plt.figure(1, figsize=(9, 6))
ax = fig.add_subplot(111)
ax.boxplot(vel_arr, vert=False, manage_ticks=True)
ax.set_xlabel('value')
ax.set_yticks([1])
ax.set_yticklabels(['category'])
quantiles = np.quantile(vel_arr, np.array([0.00, 0.25, 0.50, 0.75, 1.00]))
ax.vlines(quantiles, [0] * quantiles.size, [1] * quantiles.size,
color='b', ls=':', lw=0.5, zorder=0)
ax.set_ylim(0.5, 1.5)
ax.set_xticks(quantiles)
plt.show()

Double y axis for multiple columns in Matplotlib

I would like to display the following dataframe in barchart but with double y axis, I want to show areas columns on left side and prices columns on right side:
area1 area2 price1 price2
level
first 263.16 906.58 10443.32 35101.88
second 6879.83 14343.03 2077.79 4415.53
third 31942.75 60864.24 922.87 1774.47
I tried with code below, it works but only display left side.
import matplotlib.pyplot as plt
df.plot(kind='bar')
plt.xticks(rotation=45, fontproperties="SimHei")
plt.xlabel("")
plt.legend()
Thank you.
If I understood you correctly, one way could be this, but you have to "play" a bit with the values of width and position of the ticks:
import pandas as pd
import matplotlib.pyplot as plt
fig = plt.figure(figsize=(12,5))
ax = fig.add_subplot(111)
ax2 = ax.twinx()
width = 0.1
df.area1.plot(kind='bar', color='red', ax=ax, width=width, position=0 )
df.area2.plot(kind='bar', color='orange', ax=ax, width=width, position=1)
df.price1.plot(kind='bar', color='blue', ax=ax2, width=width, position=2)
df.price2.plot(kind='bar', color='green', ax=ax2, width=width, position=3)
ax.set_ylabel('Area')
ax2.set_ylabel('Price')
ax.legend(["Area1", "Area2"], bbox_to_anchor=(0.8,1.0))
ax2.legend(["Price1", "Price2"], bbox_to_anchor=(0.9,1.0))
plt.show()
Another way is this:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
fig = plt.figure(figsize=(10,5))
ax = fig.add_subplot(111)
ax2 = ax.twinx()
# ax.set_xticklabels(ax.get_xticklabels(),rotation=45) # Rotation 45 degrees
width = 0.1
ind = np.arange(len(df))
ax.set_ylabel('Area')
ax2.set_ylabel('Price')
ax.set_xlabel('Level')
ax.bar(ind, df.area1, width, color='red', label='area1')
ax.bar(ind + width, df.area2, width, color='orange', label='area2')
ax2.bar(ind + 2*width, df.price1, width, color='blue', label='price1')
ax2.bar(ind + 3*width, df.price2, width, color='green', label='price2')
ax.set(xticks=(ind + 1.5*width), xticklabels=df.index, xlim=[2*width - 1, len(df)])
ax.legend(["Area1", "Area2"], bbox_to_anchor=(1,1))
ax2.legend(["Price1", "Price2"], bbox_to_anchor=(1,0.87))
plt.show()

matplotlib matshow xtick labels on top and bottom

I'm trying to visualize a name co-occurrence matrix. This version works okay:
import pandas as pd
import numpy as np
import string
import matplotlib.pyplot as plt
n = 10
names = ['Long Name ' + suffix for suffix in string.ascii_uppercase[:n]]
df = pd.DataFrame(np.random.randint(0, 100, size=(n,n)),
columns=names, index=names)
fig = plt.figure()
ax = plt.gca()
im = ax.matshow(df, interpolation='none')
fig.colorbar(im)
ax.set_xticks(np.arange(n))
ax.set_xticklabels(names)
ax.set_yticks(np.arange(n))
ax.set_yticklabels(names)
ax.xaxis.set_ticks_position("bottom")
plt.setp(ax.get_xticklabels(), rotation=45,
ha="right", rotation_mode="anchor")
for (i,j), z in np.ndenumerate(df):
if z != 0:
ax.text(j, i, str(z), ha="center", va="center")
ax.set_title("Name Co-Occurrences")
fig.tight_layout()
plt.show()
The problem is that the actual matrix I have is fairly large, so I would like to display the names both on the top and the bottom. I've tried to do so by using twiny:
import pandas as pd
import numpy as np
import string
import matplotlib.pyplot as plt
n = 10
names = ['Long Name ' + suffix for suffix in string.ascii_uppercase[:n]]
df = pd.DataFrame(np.random.randint(0, 100, size=(n,n)),
columns=names, index=names)
fig = plt.figure()
botax = plt.gca()
im = botax.matshow(df, interpolation='none')
fig.colorbar(im)
topax = botax.twiny()
for ax, ha, pos in zip([topax, botax], ["left", "right"], ["top", "bottom"]):
ax.set_xticks(np.arange(n))
ax.set_xticklabels(names)
ax.set_yticks(np.arange(n))
ax.set_yticklabels(names)
ax.xaxis.set_ticks_position(pos)
plt.setp(ax.get_xticklabels(), rotation=45,
ha=ha, va="center", rotation_mode="anchor")
for (i,j), z in np.ndenumerate(df):
if z != 0:
botax.text(j, i, str(z), ha="center", va="center")
botax.set_title("Name Co-Occurrences")
fig.tight_layout()
plt.show()
Unfortunately the top labels aren't aligned correctly and I don't know why:
In order to label both, bottom and top of an axes, there is no need for a twin axes. This may make this all a bit easier. You can instead just turn the bottom and top ticks and labels on, and then rotate and align them separately.
import pandas as pd
import numpy as np
import string
import matplotlib.pyplot as plt
n = 10
names = ['Long Name ' + suffix for suffix in string.ascii_uppercase[:n]]
df = pd.DataFrame(np.random.randint(0, 100, size=(n,n)),
columns=names, index=names)
fig = plt.figure()
ax = plt.gca()
im = ax.matshow(df, interpolation='none')
fig.colorbar(im)
ax.set_xticks(np.arange(n))
ax.set_xticklabels(names)
ax.set_yticks(np.arange(n))
ax.set_yticklabels(names)
# Set ticks on both sides of axes on
ax.tick_params(axis="x", bottom=True, top=True, labelbottom=True, labeltop=True)
# Rotate and align bottom ticklabels
plt.setp([tick.label1 for tick in ax.xaxis.get_major_ticks()], rotation=45,
ha="right", va="center", rotation_mode="anchor")
# Rotate and align top ticklabels
plt.setp([tick.label2 for tick in ax.xaxis.get_major_ticks()], rotation=45,
ha="left", va="center",rotation_mode="anchor")
ax.set_title("Name Co-Occurrences", pad=55)
fig.tight_layout()
plt.show()
You will have to first set the aspect ratio of the upper x-axis to be the same as that of the lower x-axis. How to do this has been answered here. Then you can use y=1.3 to lift the title a bit upward so that it does not overlap with the upper x-axis tick labels.
topax = botax.twiny()
aspect0 = botax.get_aspect()
if aspect0 == 'equal':
aspect0 = 1.0
dy = np.abs(np.diff(botax.get_ylim()))
dx = np.abs(np.diff(botax.get_xlim()))
aspect = aspect0 / (float(dy) / dx)
topax.set_aspect(aspect)
for ax, ha, pos in zip([topax, botax], ["left", "right"], ["top", "bottom"]):
ax.set_xticks(np.arange(n))
.....
.....
botax.set_title("Name Co-Occurrences", y=1.3)

Matplotlib: Set manual x-axis labels given string label array input, but only on major ticks

I tried this but the labels are not printing in the right location. Some of the labels are not printed and are not printed in the right position.
I have an array of labels that correspond to each data point. I only want some of the labels to be printed and printed only on major ticks. But I do not know how to set major ticks and still keep the labels in correct positions.
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
fig, ax1 = plt.subplots(1, 1)
top = np.arange(100)
btm = top-2
x = np.arange(len(top))
ax1.vlines(x, top, btm, color='r', linewidth=1)
labels = np.linspace(200,300,100).astype(np.int).astype(np.str)
factor = 10
labels = [label for i,label in enumerate(labels) if ((i+1)%factor==1)]
plt.xticks(x, labels, rotation='horizontal')
from matplotlib.ticker import MultipleLocator, FormatStrFormatter, FixedFormatter
majorLocator = MultipleLocator(factor)
majorFormatter = FixedFormatter(labels)
minorLocator = MultipleLocator(1)
ax1.xaxis.set_minor_locator(minorLocator)
ax1.xaxis.set_major_formatter(majorFormatter)
ax1.xaxis.set_major_locator(majorLocator)
plt.tick_params(axis='both', which='major', labelsize=9, length=10)
plt.tick_params(axis='both', which='minor', labelsize=5, length=4)
Help. Thanks.
EDIT:
The labels array is of the same length as the number of data points, which is equal to the length of the x axis. So for every increment in position of the x-axis I have the corresponding label. So for the ith position or tick on the x-axis should have either an empty label or the label equal to ith element of label array. It should be empty if it does not fall on a major tick. The labels are not simply integers, but strings. To be more specific, they are datetime strings.
Without a clear problem description, I need to guess that the following might be what you want:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from matplotlib.ticker import MultipleLocator
fig, ax1 = plt.subplots(1, 1)
top = np.arange(100)
btm = top-2
x = np.arange(len(top))
ax1.vlines(x+200, top, btm, color='r', linewidth=1)
majorLocator = MultipleLocator(10)
minorLocator = MultipleLocator(1)
ax1.xaxis.set_major_locator(majorLocator)
ax1.xaxis.set_minor_locator(minorLocator)
plt.tick_params(axis='both', which='major', labelsize=9, length=10)
plt.tick_params(axis='both', which='minor', labelsize=5, length=4)
plt.show()
You can also use a FuncFormatter for the ticklabels.
import matplotlib.pyplot as plt
import numpy as np
from matplotlib.ticker import MultipleLocator, FuncFormatter
fig, ax1 = plt.subplots(1, 1)
top = np.arange(100)
btm = top-2
x = np.arange(len(top))
ax1.vlines(x, top, btm, color='r', linewidth=1)
majorLocator = MultipleLocator(10)
minorLocator = MultipleLocator(1)
ax1.xaxis.set_major_locator(majorLocator)
ax1.xaxis.set_minor_locator(minorLocator)
fmt = lambda x,pos : str(int(x+200))
ax1.xaxis.set_major_formatter(FuncFormatter(fmt))
plt.tick_params(axis='both', which='major', labelsize=9, length=10)
plt.tick_params(axis='both', which='minor', labelsize=5, length=4)
plt.show()
What I needed was the FixedLocator with the FixedFormatter, and also an array of integers, majorpos, which specify the indices where the major ticks are located.
The other answer using FuncFormatter would introduce some problems.
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
fig, ax1 = plt.subplots(1, 1)
top = np.arange(100)
btm = top-2
x = np.arange(len(top))
ax1.vlines(x, top, btm, color='r', linewidth=1)
labels = np.linspace(200,300,100).astype(np.int).astype(np.str)
print(labels)
factor = 10
plt.xticks(x, labels, rotation='horizontal')
from matplotlib.ticker import MultipleLocator, FormatStrFormatter, FixedFormatter, FixedLocator
majorpos = np.arange(0,len(labels),int(len(labels)/10))
ax1.xaxis.set_major_locator(FixedLocator((majorpos)))
ax1.xaxis.set_major_formatter(FixedFormatter((labels[majorpos])))
ax1.xaxis.set_minor_locator(MultipleLocator(1))
plt.tick_params(axis='both', which='major', labelsize=9, length=10)
plt.tick_params(axis='both', which='minor', labelsize=5, length=4)

frequency trail in matplotlib

I'm looking into outliers detection. Brendan Gregg has a really nice article and I'm especially intrigued by his visualizations. One of the methods he uses are frequency trails.
I'm trying to reproduce this in matplotlib using this example. Which looks like this:
And the plot is based on this answer: https://stackoverflow.com/a/4152016/948369
Now my issue is, like described by Brendan, that I have a continuous line that masks the outlier (I simplified the input values so you can still see them):
Any help on making the line "non-continuous" for non existent values?
Seaborn also provides a very neat example:
They call it a joy/ridge plot however: https://seaborn.pydata.org/examples/kde_ridgeplot.html
#!/usr/bin/python
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
sns.set(style="white", rc={"axes.facecolor": (0, 0, 0, 0)})
# Create the data
rs = np.random.RandomState(1979)
x = rs.randn(500)
g = np.tile(list("ABCDEFGHIJ"), 50)
df = pd.DataFrame(dict(x=x, g=g))
m = df.g.map(ord)
df["x"] += m
# Initialize the FacetGrid object
pal = sns.cubehelix_palette(10, rot=-.25, light=.7)
g = sns.FacetGrid(df, row="g", hue="g", aspect=15, size=.5, palette=pal)
# Draw the densities in a few steps
g.map(sns.kdeplot, "x", clip_on=False, shade=True, alpha=1, lw=1.5, bw=.2)
g.map(sns.kdeplot, "x", clip_on=False, color="w", lw=2, bw=.2)
g.map(plt.axhline, y=0, lw=2, clip_on=False)
# Define and use a simple function to label the plot in axes coordinates
def label(x, color, label):
ax = plt.gca()
ax.text(0, .2, label, fontweight="bold", color=color,
ha="left", va="center", transform=ax.transAxes)
g.map(label, "x")
# Set the subplots to overlap
g.fig.subplots_adjust(hspace=-.25)
# Remove axes details that don't play will with overlap
g.set_titles("")
g.set(yticks=[])
g.despine(bottom=True, left=True)
I would stick with a flat 2D plot and displace each level by a set vertical amount. You'll have to play the the levels (in the code below I called it displace) to properly see the outliers, but this does a pretty good job at replicating your target image. The key, I think, is to set the "zero" values to None so pylab does not draw them.
import numpy as np
import pylab as plt
import itertools
k = 20
X = np.linspace(0, 20, 500)
Y = np.zeros((k,X.size))
# Add some fake data
MU = np.random.random(k)
for n in xrange(k):
Y[n] += np.exp(-(X-MU[n]*n)**2 / (1+n/3))
Y *= 50
# Add some outliers for show
Y += 2*np.random.random(Y.shape)
displace = Y.max()/4
# Add a cutoff
Y[Y<1.0] = None
face_colors = itertools.cycle(["#D3D820", "#C9CC54",
"#D7DA66", "#FDFE42"])
fig = plt.figure()
ax = fig.add_subplot(111, axisbg='black')
ax.xaxis.set_visible(False)
ax.yaxis.set_visible(False)
for n,y in enumerate(Y):
# Vertically displace each plot
y0 = np.ones(y.shape) * n * displace
y1 = y + n*displace
plt.fill_between(X, y0,y1,lw=1,
facecolor=face_colors.next(),
zorder=len(Y)-n)
plt.show()

Categories

Resources