I'm trying to visualize a name co-occurrence matrix. This version works okay:
import pandas as pd
import numpy as np
import string
import matplotlib.pyplot as plt
n = 10
names = ['Long Name ' + suffix for suffix in string.ascii_uppercase[:n]]
df = pd.DataFrame(np.random.randint(0, 100, size=(n,n)),
columns=names, index=names)
fig = plt.figure()
ax = plt.gca()
im = ax.matshow(df, interpolation='none')
fig.colorbar(im)
ax.set_xticks(np.arange(n))
ax.set_xticklabels(names)
ax.set_yticks(np.arange(n))
ax.set_yticklabels(names)
ax.xaxis.set_ticks_position("bottom")
plt.setp(ax.get_xticklabels(), rotation=45,
ha="right", rotation_mode="anchor")
for (i,j), z in np.ndenumerate(df):
if z != 0:
ax.text(j, i, str(z), ha="center", va="center")
ax.set_title("Name Co-Occurrences")
fig.tight_layout()
plt.show()
The problem is that the actual matrix I have is fairly large, so I would like to display the names both on the top and the bottom. I've tried to do so by using twiny:
import pandas as pd
import numpy as np
import string
import matplotlib.pyplot as plt
n = 10
names = ['Long Name ' + suffix for suffix in string.ascii_uppercase[:n]]
df = pd.DataFrame(np.random.randint(0, 100, size=(n,n)),
columns=names, index=names)
fig = plt.figure()
botax = plt.gca()
im = botax.matshow(df, interpolation='none')
fig.colorbar(im)
topax = botax.twiny()
for ax, ha, pos in zip([topax, botax], ["left", "right"], ["top", "bottom"]):
ax.set_xticks(np.arange(n))
ax.set_xticklabels(names)
ax.set_yticks(np.arange(n))
ax.set_yticklabels(names)
ax.xaxis.set_ticks_position(pos)
plt.setp(ax.get_xticklabels(), rotation=45,
ha=ha, va="center", rotation_mode="anchor")
for (i,j), z in np.ndenumerate(df):
if z != 0:
botax.text(j, i, str(z), ha="center", va="center")
botax.set_title("Name Co-Occurrences")
fig.tight_layout()
plt.show()
Unfortunately the top labels aren't aligned correctly and I don't know why:
In order to label both, bottom and top of an axes, there is no need for a twin axes. This may make this all a bit easier. You can instead just turn the bottom and top ticks and labels on, and then rotate and align them separately.
import pandas as pd
import numpy as np
import string
import matplotlib.pyplot as plt
n = 10
names = ['Long Name ' + suffix for suffix in string.ascii_uppercase[:n]]
df = pd.DataFrame(np.random.randint(0, 100, size=(n,n)),
columns=names, index=names)
fig = plt.figure()
ax = plt.gca()
im = ax.matshow(df, interpolation='none')
fig.colorbar(im)
ax.set_xticks(np.arange(n))
ax.set_xticklabels(names)
ax.set_yticks(np.arange(n))
ax.set_yticklabels(names)
# Set ticks on both sides of axes on
ax.tick_params(axis="x", bottom=True, top=True, labelbottom=True, labeltop=True)
# Rotate and align bottom ticklabels
plt.setp([tick.label1 for tick in ax.xaxis.get_major_ticks()], rotation=45,
ha="right", va="center", rotation_mode="anchor")
# Rotate and align top ticklabels
plt.setp([tick.label2 for tick in ax.xaxis.get_major_ticks()], rotation=45,
ha="left", va="center",rotation_mode="anchor")
ax.set_title("Name Co-Occurrences", pad=55)
fig.tight_layout()
plt.show()
You will have to first set the aspect ratio of the upper x-axis to be the same as that of the lower x-axis. How to do this has been answered here. Then you can use y=1.3 to lift the title a bit upward so that it does not overlap with the upper x-axis tick labels.
topax = botax.twiny()
aspect0 = botax.get_aspect()
if aspect0 == 'equal':
aspect0 = 1.0
dy = np.abs(np.diff(botax.get_ylim()))
dx = np.abs(np.diff(botax.get_xlim()))
aspect = aspect0 / (float(dy) / dx)
topax.set_aspect(aspect)
for ax, ha, pos in zip([topax, botax], ["left", "right"], ["top", "bottom"]):
ax.set_xticks(np.arange(n))
.....
.....
botax.set_title("Name Co-Occurrences", y=1.3)
Related
When I run the following lines, I get a plot with a large space at the top and the bottom with no bars.
How can I remove this extra space?
import pandas as pd
import numpy as np
import random
import matplotlib.pyplot as plt
from matplotlib.transforms import Affine2D
random.seed(1)
df = pd.DataFrame(np.random.randn(50, 1), columns=["parameter"])
df["standard_error"]= ((df.parameter**2)**0.5)/2
name = "plot"
x = ["A"+str(x) for x in df.index.tolist()]
y1 = df.parameter
yerr1 = df.standard_error
fig, ax = plt.subplots()
fig.set_figheight(len(x))
plt.rc('axes', labelsize=22)
plt.grid(b=True, which='major', color='#666666', linestyle='-', alpha=0.2)
trans1 = Affine2D().translate(-0.1, 0.0) + ax.transData
trans2 = Affine2D().translate(+0.1, 0.0) + ax.transData
er1 = ax.errorbar(y1, x, xerr=yerr1, marker="o", linestyle="none", transform=trans1)
ax.axvline(x=0, color="black")
plt.savefig(name + '.png', bbox_inches='tight')
If you mean the extra space below and above your smallest and largest data points along the y-axis then you can simply use plt.ylim, e.g:
plt.ylim(0, 50)
Which will change the extent of the y-axis to the range 0 - 50. Similarly for the x-axis there's plt.xlim
I would like to add a sample y-axis tick on the right side of the Ridge plot, to know what is the range of values of all the plots. Preferably I would like to add it only to one of the subplots and not to all of them.
My plot is based on the seaborn 'ridge plot' example at: https://seaborn.pydata.org/examples/kde_ridgeplot.html
I've tried the following code with no luck:
g.set(yticks=[0,200])
g.set_y_label_position("right")
g.set_ylabels('[Range]',fontsize=9,fontweight="normal")
If you want to modify one particular axes from a FacetGrid, you can get a reference from the list g.axes
Here is how I would go about it
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
sns.set(style="white", rc={"axes.facecolor": (0, 0, 0, 0)})
# Create the data
rs = np.random.RandomState(1979)
x = rs.randn(500)
g = np.tile(list("ABCDEFGHIJ"), 50)
df = pd.DataFrame(dict(x=x, g=g))
m = df.g.map(ord)
df["x"] += m
# Initialize the FacetGrid object
pal = sns.cubehelix_palette(10, rot=-.25, light=.7)
g = sns.FacetGrid(df, row="g", hue="g", aspect=15, height=.5, palette=pal)
# Draw the densities in a few steps
g.map(sns.kdeplot, "x", clip_on=False, shade=True, alpha=1, lw=1.5, bw=.2)
g.map(sns.kdeplot, "x", clip_on=False, color="w", lw=2, bw=.2)
g.map(plt.axhline, y=0, lw=2, clip_on=False)
# Define and use a simple function to label the plot in axes coordinates
def label(x, color, label):
ax = plt.gca()
ax.text(0, .2, label, fontweight="bold", color=color,
ha="left", va="center", transform=ax.transAxes)
g.map(label, "x")
#
# Changes from seaborn example below this point
#
# Set the subplots to overlap
g.fig.subplots_adjust(hspace=-.25, right=0.9)
# Remove axes details that don't play well with overlap
g.set_titles("")
#g.set(yticks=[])
g.despine(bottom=True, left=True, right=False, top=True, offset=5)
for ax in g.axes.ravel():
if ax.is_first_row(): # can use .is_last_row() to show spine on the bottom plot instead
ax.yaxis.tick_right()
ax.yaxis.set_label_position("right")
ax.set_ylabel("MW")
else:
ax.spines['right'].set_visible(False)
[l.set_visible(False) for l in ax.get_yticklabels()] # necessary because y-axes are shared
I want to create a graph that has multiple y-axis on both left and right side.
The code below produces a graph with multiple y axis on the right side. I've tried changing the rspline setting to 'left', but it doesn't work.
How do I add y4 on the left side?
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib import rcParams
%matplotlib inline
x = np.random.rand(20)
y1 = x*5
y2 = x*5 + 0.2
y3 = x*x*3.5 + 0.2*x
y4 = x*5 + 0.2*x
yLimMin = 0
yLimMax = 2.1
lineWidth = 1.0
fontSize = 24
subTitle = ""
plt.rcParams.update({'axes.labelsize': 'small'})
fig = plt.figure(figsize=(21,29.7))
ax11 = fig.add_subplot(411)
subplotAdjustRight = 0.90
mks = 19 # marker step
ax11.plot(x,y1, linestyle='-', linewidth=lineWidth, color = 'k',
marker='*', markevery=11*mks,
label="CO")
ax11.set_ylabel('CO [%]', color='k')
plt.ylim((0,5))
fig.suptitle(subTitle, fontsize = fontSize)
ax11.yaxis.grid()
plt.locator_params(axis='y', nbins=10)
ax12=ax11.twinx()
ax12.plot(x,y2,linestyle='-', linewidth=lineWidth, color='r',
marker='*', markevery=11*mks,
label="CO22")
ax12.set_ylabel('NO [%]', color='r')
plt.ylim((0,10))
plt.locator_params(axis='y', nbins=10)
ax13= ax11.twinx()
rspine = ax13.spines['right']
rspine.set_position(('axes', 1.05))
ax13.set_frame_on(True)
ax13.plot(x,y3,linestyle='-', linewidth=lineWidth, color='m',
marker='*', markevery=11*mks,
label="CO222")
ax13.set_ylabel('O [%] ', color='m')
plt.ylim((0,20))
plt.locator_params(axis='y', nbins=10)
The logic is always the same. You create a twin axes and offset the spine. The only difference when creating an axes which should appear at the left side of the plot is that you need to tell matplotlib to set the ticks and the ticklabel to the left (because by default a twin axes has them set to the right).
ax14= ax11.twinx()
ax14.yaxis.tick_left()
rspine = ax14.spines['left']
rspine.set_position(('axes', -0.15))
ax14.set_ylabel('blubb ', color='limegreen')
ax14.yaxis.set_label_position("left")
I tried this but the labels are not printing in the right location. Some of the labels are not printed and are not printed in the right position.
I have an array of labels that correspond to each data point. I only want some of the labels to be printed and printed only on major ticks. But I do not know how to set major ticks and still keep the labels in correct positions.
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
fig, ax1 = plt.subplots(1, 1)
top = np.arange(100)
btm = top-2
x = np.arange(len(top))
ax1.vlines(x, top, btm, color='r', linewidth=1)
labels = np.linspace(200,300,100).astype(np.int).astype(np.str)
factor = 10
labels = [label for i,label in enumerate(labels) if ((i+1)%factor==1)]
plt.xticks(x, labels, rotation='horizontal')
from matplotlib.ticker import MultipleLocator, FormatStrFormatter, FixedFormatter
majorLocator = MultipleLocator(factor)
majorFormatter = FixedFormatter(labels)
minorLocator = MultipleLocator(1)
ax1.xaxis.set_minor_locator(minorLocator)
ax1.xaxis.set_major_formatter(majorFormatter)
ax1.xaxis.set_major_locator(majorLocator)
plt.tick_params(axis='both', which='major', labelsize=9, length=10)
plt.tick_params(axis='both', which='minor', labelsize=5, length=4)
Help. Thanks.
EDIT:
The labels array is of the same length as the number of data points, which is equal to the length of the x axis. So for every increment in position of the x-axis I have the corresponding label. So for the ith position or tick on the x-axis should have either an empty label or the label equal to ith element of label array. It should be empty if it does not fall on a major tick. The labels are not simply integers, but strings. To be more specific, they are datetime strings.
Without a clear problem description, I need to guess that the following might be what you want:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from matplotlib.ticker import MultipleLocator
fig, ax1 = plt.subplots(1, 1)
top = np.arange(100)
btm = top-2
x = np.arange(len(top))
ax1.vlines(x+200, top, btm, color='r', linewidth=1)
majorLocator = MultipleLocator(10)
minorLocator = MultipleLocator(1)
ax1.xaxis.set_major_locator(majorLocator)
ax1.xaxis.set_minor_locator(minorLocator)
plt.tick_params(axis='both', which='major', labelsize=9, length=10)
plt.tick_params(axis='both', which='minor', labelsize=5, length=4)
plt.show()
You can also use a FuncFormatter for the ticklabels.
import matplotlib.pyplot as plt
import numpy as np
from matplotlib.ticker import MultipleLocator, FuncFormatter
fig, ax1 = plt.subplots(1, 1)
top = np.arange(100)
btm = top-2
x = np.arange(len(top))
ax1.vlines(x, top, btm, color='r', linewidth=1)
majorLocator = MultipleLocator(10)
minorLocator = MultipleLocator(1)
ax1.xaxis.set_major_locator(majorLocator)
ax1.xaxis.set_minor_locator(minorLocator)
fmt = lambda x,pos : str(int(x+200))
ax1.xaxis.set_major_formatter(FuncFormatter(fmt))
plt.tick_params(axis='both', which='major', labelsize=9, length=10)
plt.tick_params(axis='both', which='minor', labelsize=5, length=4)
plt.show()
What I needed was the FixedLocator with the FixedFormatter, and also an array of integers, majorpos, which specify the indices where the major ticks are located.
The other answer using FuncFormatter would introduce some problems.
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
fig, ax1 = plt.subplots(1, 1)
top = np.arange(100)
btm = top-2
x = np.arange(len(top))
ax1.vlines(x, top, btm, color='r', linewidth=1)
labels = np.linspace(200,300,100).astype(np.int).astype(np.str)
print(labels)
factor = 10
plt.xticks(x, labels, rotation='horizontal')
from matplotlib.ticker import MultipleLocator, FormatStrFormatter, FixedFormatter, FixedLocator
majorpos = np.arange(0,len(labels),int(len(labels)/10))
ax1.xaxis.set_major_locator(FixedLocator((majorpos)))
ax1.xaxis.set_major_formatter(FixedFormatter((labels[majorpos])))
ax1.xaxis.set_minor_locator(MultipleLocator(1))
plt.tick_params(axis='both', which='major', labelsize=9, length=10)
plt.tick_params(axis='both', which='minor', labelsize=5, length=4)
I'm looking into outliers detection. Brendan Gregg has a really nice article and I'm especially intrigued by his visualizations. One of the methods he uses are frequency trails.
I'm trying to reproduce this in matplotlib using this example. Which looks like this:
And the plot is based on this answer: https://stackoverflow.com/a/4152016/948369
Now my issue is, like described by Brendan, that I have a continuous line that masks the outlier (I simplified the input values so you can still see them):
Any help on making the line "non-continuous" for non existent values?
Seaborn also provides a very neat example:
They call it a joy/ridge plot however: https://seaborn.pydata.org/examples/kde_ridgeplot.html
#!/usr/bin/python
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
sns.set(style="white", rc={"axes.facecolor": (0, 0, 0, 0)})
# Create the data
rs = np.random.RandomState(1979)
x = rs.randn(500)
g = np.tile(list("ABCDEFGHIJ"), 50)
df = pd.DataFrame(dict(x=x, g=g))
m = df.g.map(ord)
df["x"] += m
# Initialize the FacetGrid object
pal = sns.cubehelix_palette(10, rot=-.25, light=.7)
g = sns.FacetGrid(df, row="g", hue="g", aspect=15, size=.5, palette=pal)
# Draw the densities in a few steps
g.map(sns.kdeplot, "x", clip_on=False, shade=True, alpha=1, lw=1.5, bw=.2)
g.map(sns.kdeplot, "x", clip_on=False, color="w", lw=2, bw=.2)
g.map(plt.axhline, y=0, lw=2, clip_on=False)
# Define and use a simple function to label the plot in axes coordinates
def label(x, color, label):
ax = plt.gca()
ax.text(0, .2, label, fontweight="bold", color=color,
ha="left", va="center", transform=ax.transAxes)
g.map(label, "x")
# Set the subplots to overlap
g.fig.subplots_adjust(hspace=-.25)
# Remove axes details that don't play will with overlap
g.set_titles("")
g.set(yticks=[])
g.despine(bottom=True, left=True)
I would stick with a flat 2D plot and displace each level by a set vertical amount. You'll have to play the the levels (in the code below I called it displace) to properly see the outliers, but this does a pretty good job at replicating your target image. The key, I think, is to set the "zero" values to None so pylab does not draw them.
import numpy as np
import pylab as plt
import itertools
k = 20
X = np.linspace(0, 20, 500)
Y = np.zeros((k,X.size))
# Add some fake data
MU = np.random.random(k)
for n in xrange(k):
Y[n] += np.exp(-(X-MU[n]*n)**2 / (1+n/3))
Y *= 50
# Add some outliers for show
Y += 2*np.random.random(Y.shape)
displace = Y.max()/4
# Add a cutoff
Y[Y<1.0] = None
face_colors = itertools.cycle(["#D3D820", "#C9CC54",
"#D7DA66", "#FDFE42"])
fig = plt.figure()
ax = fig.add_subplot(111, axisbg='black')
ax.xaxis.set_visible(False)
ax.yaxis.set_visible(False)
for n,y in enumerate(Y):
# Vertically displace each plot
y0 = np.ones(y.shape) * n * displace
y1 = y + n*displace
plt.fill_between(X, y0,y1,lw=1,
facecolor=face_colors.next(),
zorder=len(Y)-n)
plt.show()