Seaborn: stripplot x-log scale collapses values - python

Hi I am trying to use stripplot in seaborn with log scale for the x-axis. It seems that the path I have taken does not work as intended. I would appreciate if someone could help me with that.
import seaborn as sns
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
x = np.logspace(-8, -2, 10)
y = np.linspace(0, 100, 10)
sns.stripplot(x,y)
plt.gca().set_xscale('log')
all the xvalues are collapsed on the right edge of the plot (see plot). I works fine if I set the y-axis to be log.
PS: I would also need to restrict the number of x tick labels.
Thanks.

A scatter plot on a log scale using pyplot.scatter:
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
x = np.logspace(-8, -2, 10)
y = np.linspace(0, 100, 10)
c = np.random.rand(10)
s = 20+np.random.rand(10)*40
plt.scatter(x,y, c=c, s=s, cmap="jet")
plt.gca().set_xscale('log')
plt.xlim(5e-9, 5e-2)
plt.show()
The same scatter plot on a linear scale:
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
x = np.logspace(-8, -2, 10)
y = np.linspace(0, 100, 10)
c = np.random.rand(10)
s = 20+np.random.rand(10)*40
plt.scatter(x,y, c=c, s=s, cmap="jet")
plt.xlim(-0.003, 0.012)
plt.show()

Related

How to draw multiple lines with Seaborn?

I am trying to draw a plot with two lines. Both with different colors. And different labels. This is what I have come up with.
This is code that I have written.
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
data1 = pd.read_csv("/content/drive/MyDrive/Summer-2020/URMC/training_x_total_data_ones.csv", header=None)
data2 = pd.read_csv("/content/drive/MyDrive/Summer-2020/URMC/training_x_total_data_zeroes.csv", header=None)
sns.lineplot(data=data1, color="red")
sns.lineplot(data=data2)
What am I doing wrong?
Edit
This is how my dataset looks like
So, I just added another color in the second line and that seemed to work.
import random
import numpy as np
import seaborn as sns
mu, sigma = 0, 0.1
s = np.random.normal(mu, sigma, 100)
mu1, sigma1 = 0.5, 1
t = np.random.normal(mu1, sigma1, 100)
sns.lineplot(data= s, color = "red")
sns.lineplot(data= t, color ="blue")
Try specifying the x and y of the call to sns.lineplot?
import pandas as pd
import numpy as np
import seaborn as sns
x = np.arange(10)
df1 = pd.DataFrame({'x':x,
'y':np.sin(x)})
df2 = pd.DataFrame({'x':x,
'y':x**2})
sns.lineplot(data=df1, x='x', y='y', color="red")
sns.lineplot(data=df2, x='x', y='y')
Without doing so, I get a similar plot as yours.

How to color swarmplot dots depending on quartile?

I would like to create a plot where dots are overlaid depending on whether or not they are within the 1st-3rd quartiles in seaborn. What function to use?
Something similar to the figure:
The following code creates a Seaborn swarmplot and then recolors the dots depending on their quartile. Looping through the collections created by the swarmplot, the y-data are retrieved. np.percentile calculates the borders of the quartiles and np.digitize calculates the corresponding quartiles. These quartiles can be used to define the color.
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
from matplotlib.colors import ListedColormap
sns.set(style="whitegrid")
tips = sns.load_dataset("tips")
# cmap = plt.get_cmap('tab10')
cmap = ListedColormap(['gold', 'crimson', 'teal', 'orange'])
ax = sns.swarmplot(x="day", y="total_bill", data=tips)
for col in ax.collections:
y = col.get_offsets()[:,1]
perc = np.percentile(y, [25, 50, 75])
col.set_cmap(cmap)
col.set_array(np.digitize(y, perc))
plt.show()
The same approach can be used for a stripplot (optionally without jitter) to create a plot similar to the one in the question.
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
from matplotlib.colors import ListedColormap
sns.set(style="whitegrid")
N = 200
x = np.repeat(list('abcdefg'), N)
y = np.random.normal(np.repeat(np.random.uniform(11, 15, 7), N), 1)
cmap = ListedColormap(['grey', 'turquoise', 'grey'])
ax = sns.stripplot(x=x, y=y, jitter=False, alpha=0.2)
for col in ax.collections:
y = col.get_offsets()[:, 1]
perc = np.percentile(y, [25, 75])
col.set_cmap(cmap)
col.set_array(np.digitize(y, perc))
plt.show()

Use Seaborn to plot 1D time series as a line with marginal histogram along y-axis

I'm trying to recreate the broad features of the following figure:
(from E.M. Ozbudak, M. Thattai, I. Kurtser, A.D. Grossman, and A. van Oudenaarden, Nat Genet 31, 69 (2002))
seaborn.jointplot does most of what I need, but it seemingly can't use a line plot, and there's no obvious way to hide the histogram along the x-axis. Is there a way to get jointplot to do what I need? Barring that, is there some other reasonably simple way to create this kind of plot using Seaborn?
Here is a way to create roughly the same plot as shown in the question. You can share the axes between the two subplots and make the width-ratio asymmetric.
import matplotlib.pyplot as plt
import numpy as np; np.random.seed(42)
x = np.linspace(0,8, 300)
y = np.tanh(x)+np.random.randn(len(x))*0.08
fig, (ax, axhist) = plt.subplots(ncols=2, sharey=True,
gridspec_kw={"width_ratios" : [3,1], "wspace" : 0})
ax.plot(x,y, color="k")
ax.plot(x,np.tanh(x), color="k")
axhist.hist(y, bins=32, ec="k", fc="none", orientation="horizontal")
axhist.tick_params(axis="y", left=False)
plt.show()
It turns out that you can produce a modified jointplot with the needed characteristics by working directly with the underlying JointGrid object:
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
x = np.linspace(0,8, 300)
y = (1 - np.exp(-x*5))*.5
ynoise= y + np.random.randn(len(x))*0.08
grid = sns.JointGrid(x, ynoise, ratio=3)
grid.plot_joint(plt.plot)
grid.ax_joint.plot(x, y, c='C0')
plt.sca(grid.ax_marg_y)
sns.distplot(grid.y, kde=False, vertical=True)
# override a bunch of the default JointGrid style options
grid.fig.set_size_inches(10,6)
grid.ax_marg_x.remove()
grid.ax_joint.spines['top'].set_visible(True)
Output:
You can use ax_marg_x.patches to affect the outcome.
Here, I use it to turn the x-axis plot white so that it cannot be seen (although the margin for it remains):
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
sns.set(style="white", color_codes=True)
x, y = np.random.multivariate_normal([2, 3], [[0.3, 0], [0, 0.5]], 1000).T
g = sns.jointplot(x=x, y=y, kind="hex", stat_func=None, marginal_kws={'color': 'green'})
plt.setp(g.ax_marg_x.patches, color="w", )
plt.show()
Output:

Plotting x-axis in log scale spacing but not labeling it in exponential form

I would like to plot say two values x = [0, 10,20,50,100] and y=[1,2,3,10,100] using pylab. I want to keep the spacing of x-axis in log form. But I want to tick at the values of x i'e at 10, 20, 50, 100 and print them as it not in the form of 10e1 or 10e2. I am doing it as follows:
import matplotlib.pylab as plt
plt.xscale('log')
plt.plot(x, y)
plt.xticks(x)
plt.grid()
But it keeps the values in the form of 10e1, 10e2.
Could you please help me out?
I think what you want is to change the major_formatter of the x axis?
import matplotlib.pylab as plt
import numpy as np
from matplotlib.ticker import ScalarFormatter
x = [0, 10,20,50,100]
y=[1,2,3,10,100]
plt.plot(x, y)
plt.xscale('log')
plt.grid()
ax = plt.gca()
ax.set_xticks(x[1:]) # note that with a log axis, you can't have x = 0 so that value isn't plotted.
ax.xaxis.set_major_formatter(ScalarFormatter())
plt.show()
The following
import matplotlib.pyplot as plt
x = [0,10,20,50,100]
y = [1,2,3,10,100]
f,ax = plt.subplots()
ax.plot(x,y)
ax.set_xscale('log')
ax.set_xticks(x)
ax.set_xticklabels(x)
ax.set_xlim([0,100])
will produce

Matplotlib: displaying only unique labels for the legend

Let's say I have an array of X (X.shape = [N, 2]) and labels (labels.shape = N).
I want to produce a scatter of X[:,0], X[:,1], color corresponding to the label, and only the unique labels displayed.
Code:
import numpy as np
from numpy.random import rand
import matplotlib
from matplotlib import pyplot as plt
%matplotlib inline
import seaborn as sns
sns.set(context='poster', style='dark')
X = rand(500)
Y = rand(500)
labels = np.round(rand(500)*4).astype(int)
print(np.unique(labels)) # array([0, 1, 2, 3, 4])
plt.scatter(X, Y, c=labels, cmap='rainbow') # this does what I want minus the labels
plt.scatter(X, Y, c=labels, cmap='rainbow', label=labels)
plt.legend(loc='best') # this produces 500 labels instead of 5
You could plot each label individually. You need to choose its color from the cmap, which you need to normalize to your labels first.
import numpy as np
from numpy.random import rand
import matplotlib.pyplot as plt
from matplotlib import cm, colors
%matplotlib inline
import seaborn as sns
sns.set(context='poster', style='dark')
X = rand(500)
Y = rand(500)
labels = np.round(rand(500)*4).astype(int)
unique_labels=np.unique(labels) # array([0, 1, 2, 3, 4])
norm = colors.Normalize(vmin=unique_labels[0], vmax=unique_labels[-1])
m = cm.ScalarMappable(norm=norm, cmap=cm.rainbow)
for label in np.unique(labels):
color = m.to_rgba(label)
plt.scatter(X[labels==label], Y[labels==label], c=color, label=label)
plt.legend(loc='best')
produces this (without seaborn, as I don't have that installed, but you get the idea):

Categories

Resources