I tried to visualize my data with seaborn.heatmap.
However, the problem I have is that when I print it out in grayscle, the image is hard to read.
I follow many similar questions but it didn't work.
Is there anyway to add hatches over the cells in seaborn.heatmap?
My code is as below:
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
from matplotlib.colors import ListedColormap
import seaborn as sns
df = pd.read_csv("file.csv")
sns.heatmap(df, annot=False, fmt='.0f', square=True,
cmap="coolwarm", linewidths=1, cbar=False)
plt.show()
You could create a loop, dividing the values into e.g. 4 groups and assign a hatch pattern to each of them via pcolor applied to the subset.
Here is an example starting from random test data:
from matplotlib import pyplot as plt
import seaborn as sns
import pandas as pd
import numpy as np
column_names = [f'{c:.2f}' for c in np.arange(0, 1.5001, 0.05)]
row_names = ['Alkaid', 'Mizar', 'Alioth', 'Megrez', 'Phecda', 'Merak', 'Dubhe']
df = pd.DataFrame(np.random.normal(0.3, 1, (len(row_names), len(column_names))).cumsum(axis=1) + 5,
columns=column_names, index=row_names)
values = df.values
vmin = values.min()
vmax = values.max()
patterns = ['', 'oo', '////', 'XXX']
bounds = np.linspace(vmin, vmax, len(patterns) + 1)
bounds[-1] += 1
sns.set_style('white')
fig, ax = plt.subplots(figsize=(12, 5))
sns.heatmap(data=df, linewidths=1, square=True, cmap='coolwarm', linecolor='white', cbar=False, ax=ax)
x = np.arange(df.shape[1] + 1)
y = np.arange(df.shape[0] + 1)
handles = []
norm = plt.Normalize(vmin, vmax)
cmap = plt.get_cmap('coolwarm')
for pattern, b0, b1 in zip(patterns, bounds[:-1], bounds[1:]):
ax.pcolor(x, y, np.where((values >= b0) & (values < b1), values, np.nan), cmap=cmap, norm=norm,
hatch=pattern, ec='black', lw=1)
handles.append(plt.Rectangle((0, 0), 0, 0, color=cmap(norm((b0 + b1) / 2)), ec='black',
hatch=pattern, label=f'{b0:5.2f}-{b1:5.2f}'))
ax.hlines(y, 0, x.max(), color='w', lw=2)
ax.vlines(x, 0, y.max(), color='w', lw=2)
ax.legend(handles=handles, bbox_to_anchor=(1.01, 1.02), loc='upper left',
handlelength=2, handleheight=2, frameon=False)
plt.tight_layout()
plt.show()
Related
I have been trying this for several hours now.
I am essentially trying to get the scales of the colorbars to be the same.
I picked up this example from a previous post in which one suggestion was to use the kwargs = {'levels': np.arange(0, 0.15, 0.01)} line. I have included it, but I have not seen any changes, the scales remain the same.
This is the code that I am using:
import numpy as np
import seaborn as sns
import pandas
import matplotlib.pyplot as plt
from matplotlib import rcParams
np.random.seed(10)
sns.set(color_codes=True)
rcParams['font.family'] = 'sans-serif'
rcParams['font.sans-serif'] = ['Arial']
plt.ioff()
kwargs = {'levels': np.arange(0, 0.15, 0.01)} #trying to get the colorbar scales to be the same
f, ax = plt.subplots(figsize=(7, 5))
ax.tick_params(axis='both', which='major', labelsize=22)
mean, cov = [0, 2], [(2, 1), (.5, 1)]
x1, y1 = np.random.multivariate_normal(mean, cov, size=50).T
mean, cov = [5, 7], [(3, 2), (7, 1)]
x2, y2 = np.random.multivariate_normal(mean, cov, size=50).T
sns.kdeplot(x1, y1, cmap="Reds", shade=True, shade_lowest=False,
alpha=0.66, legend=False, cbar=True, **kwargs, ax= ax )
sns.kdeplot(x2, y2, cmap="Greens", shade=True, shade_lowest=False, alpha=0.66,\
legend=False, cbar=True,**kwargs, ax = ax)
plt.xlabel("foo", fontsize=22)
plt.ylabel("bar", fontsize=22)
I would really appreciate your help
With the clue from #mwaskom, this is the closest I managed to get to your expected outcome:
import numpy as np
import seaborn as sns
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib import rcParams
np.random.seed(10)
sns.set(color_codes=True)
rcParams['font.family'] = 'sans-serif'
rcParams['font.sans-serif'] = ['Arial']
kwargs = {}
f, ax = plt.subplots(figsize=(7, 5))
ax.tick_params(axis='both', which='major', labelsize=22)
mean, cov = [0, 2], np.array([(2, 1), (.5, 1)])
x1, y1 = np.random.multivariate_normal(mean, cov, size=50).T
mean, cov = [5, 7], np.array([(3, 2), (7., 1)])
x2, y2 = np.random.multivariate_normal(mean, cov, size=50).T
x = np.concatenate((x1, x2))
y = np.concatenate((y1, y2))
class_labels = ['class 1']*len(x1) + ['class 2']*len(x2)
df = pd.DataFrame({
'x': x,
'y': y,
'class': class_labels,
})
palette = {
'class 1' : 'Red',
'class 2': 'Green',
}
sns.kdeplot(x='x', y='y', data=df, hue='class', palette=palette, shade=True, thresh=0.05,
alpha=0.66, legend=False, cbar=True, **kwargs, ax= ax)
plt.xlabel("foo", fontsize=22)
plt.ylabel("bar", fontsize=22)
Given a line plot obtained with ax.plot(), I have the following handy code to plot the legend on a separate figure:
fig, ax = plt.subplots()
ax.plot([0, 2], label='a', linestyle='--')
ax.plot([-1, 1], label='b', linestyle='dotted')
fig = plt.figure(figsize=(30, 4), constrained_layout=True)
fig.legend(ax.lines, [l.get_label() for l in ax.lines],
loc="upper center")
However if I do this with a histogram (using ax.hist()), ax.lines is empty and I cannot get the labels and styles used. Is it possible to do that still ?
import matplotlib.pyplot as plt
import numpy as np
fig, ax = plt.subplots()
x = np.random.randn(1000, 3)
colors = ['red', 'tan', 'lime']
_,_,patches = ax.hist(x, 10, color=colors, label=colors)
fig = plt.figure(figsize=(30, 4), constrained_layout=True)
fig.legend([p[0] for p in patches], [p[0].get_label() for p in patches],
loc="upper center")
UPDATE as per comment: you can achieve the same using ax.patches instead of patches returned by ax.hist like so:
import matplotlib.pyplot as plt
import numpy as np
fig, ax = plt.subplots()
x = np.random.randn(1000, 3)
colors = ['red', 'tan', 'lime']
n_bins = 10
ax.hist(x, n_bins, color=colors, label=colors)
fig = plt.figure(figsize=(3, 4), constrained_layout=True)
fig.legend(ax.patches[::n_bins], [p.get_label() for p in ax.patches[::n_bins]], loc="upper center")
I am trying to obtain the following plot from a pandas data frame.
I am not sure how to combine seaborn with pandas for that task.
This is the dataframe I want to use:
import pandas as pd
data = pd.DataFrame({'a': np.random.randn(1000) + 1,
'b': np.random.randn(1000),
'c': np.random.rand(1000) + 10},
columns=['a', 'b', 'c'])
data.a[data.a.sample(100).index] = np.NaN
data.b[data.b.sample(800).index] = np.NaN
Notice that the frequency will need to be normalized (height of the histogram), as the number of data points and distributions differ significantly and the distributions will have different 'y scales'.
data.plot.hist();
This is the code of seaborn that generates the figure I used in the beginning.
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
sns.set(style="white", rc={"axes.facecolor": (0, 0, 0, 0)})
# Create the data
rs = np.random.RandomState(1979)
x = rs.randn(150)
g = np.tile(list("ABC"), 50)
df = pd.DataFrame(dict(x=x, g=g))
m = df.g.map(ord)
# Initialize the FacetGrid object
pal = sns.cubehelix_palette(10, rot=-.25, light=.7)
g = sns.FacetGrid(df, row="g", hue="g", aspect=5, height=1, palette=pal)
# Draw the densities in a few steps
g.map(sns.kdeplot, "x", clip_on=False, shade=True, alpha=1, lw=1.5, bw=.2)
g.map(sns.kdeplot, "x", clip_on=False, color="w", lw=2, bw=.2)
g.map(plt.axhline, y=0, lw=2, clip_on=False)
# Define and use a simple function to label the plot in axes coordinates
def label(x, color, label):
ax = plt.gca()
ax.text(0, .3, label, fontweight="bold", color=color,
ha="left", va="center", transform=ax.transAxes)
g.map(label, "x")
# Set the subplots to overlap
g.fig.subplots_adjust(hspace=-.0025)
# Remove axes details that don't play well with overlap
g.set_titles("")
g.set(yticks=[])
g.despine(bottom=True, left=True)
Here is a function to create a grid of kde plots ("joyplot") with one plot per dataframe column.
import matplotlib.pyplot as plt
import numpy as np
from scipy.stats import gaussian_kde
def joyplot_from_dataframe(data, cmap=None):
mi, ma = np.nanmin(data.values), np.nanmax(data.values)
minx = mi - (ma-mi)/5
maxx = ma + (ma-mi)/5
x = np.linspace(minx,maxx, 1000)
n = len(data.columns)
if not cmap:
cmap = plt.cm.get_cmap("Blues")
colors = cmap(np.linspace(.2,1,n))
fig, axes = plt.subplots(nrows = n, sharex=True)
for c, ax, color in zip(data.columns, axes, colors):
y = data[c].values
y = y[~np.isnan(y)]
kde = gaussian_kde(y)
ax.fill_between(x, kde(x), color=color)
ax.yaxis.set_visible(False)
for spine in ["left", "right", "top"]:
ax.spines[spine].set_visible(False)
ax.spines["bottom"].set_linewidth(2)
ax.spines["bottom"].set_color(color)
ax.margins(y=0)
ax.tick_params(bottom=False)
return fig, axes
Use it as
import pandas as pd
data = pd.DataFrame({'a': np.random.randn(1000) + 1,
'b': np.random.randn(1000),
'c': np.random.rand(1000) + 10},
columns=['a', 'b', 'c'])
data.a[data.a.sample(100).index] = np.NaN
data.b[data.b.sample(800).index] = np.NaN
joyplot_from_dataframe(data)
plt.show()
Starting from the following example:
fig, ax = plt.subplots()
df = pd.DataFrame({'n1':[1,2,1,3], 'n2':[1,3,2,1], 'l':['a','b','c','d']})
for label in df['l']:
df.plot('n1','n2', kind='scatter', ax=ax, s=50, linewidth=0.1, label=label)
what I obtained is the following scatterplot:
I'm now trying to set a different color for each of the four points. I know that I can loop over a set of, for instance, 4 colors in a list like:
colorlist = ['b','r','c','y']
but since my real dataset comprise at least 20 different points, I was looking for a sort of "color generator" to loop within it.
The following method will create a list of colors as long as your dataframe, and then plot a point with a label with each color:
import matplotlib.pyplot as plt
import matplotlib.cm as cm
import matplotlib.colors as colors
import numpy as np
import pandas as pd
fig, ax = plt.subplots()
df = pd.DataFrame({'n1':[1,2,1,3], 'n2':[1,3,2,1], 'l':['a','b','c','d']})
colormap = cm.viridis
colorlist = [colors.rgb2hex(colormap(i)) for i in np.linspace(0, 0.9, len(df['l']))]
for i,c in enumerate(colorlist):
x = df['n1'][i]
y = df['n2'][i]
l = df['l'][i]
ax.scatter(x, y, label=l, s=50, linewidth=0.1, c=c)
ax.legend()
plt.show()
IIUC you can do it this way:
import matplotlib.pyplot as plt
from matplotlib import colors
import pandas as pd
colorlist = list(colors.ColorConverter.colors.keys())
fig, ax = plt.subplots()
[df.iloc[[i]].plot.scatter('n1', 'n2', ax=ax, s=50, label=l,
color=colorlist[i % len(colorlist)])
for i,l in enumerate(df.l)]
colorlist:
In [223]: colorlist
Out[223]: ['m', 'b', 'g', 'r', 'k', 'y', 'c', 'w']
PS colorlist[i % len(colorlist)] - should always remain in the list bounds
How about this,
Here is the source code,
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from matplotlib import cm
fig, ax = plt.subplots()
df = pd.DataFrame({'n1':[1,2,1,3], 'n2':[1,3,2,1], 'l':['a','b','c','d']})
#colors = ['b','r','c','y']
nrof_labels = len(df['l'])
colors = cm.rainbow(np.linspace(0, 1, nrof_labels)) # create a bunch of colors
for i, r in df.iterrows():
ax.plot(r['n1'], r['n2'], 'o', markersize=10, color=colors[i], linewidth=0.1, label=r['l'])
ax.set_xlim(0.5, 3.5)
ax.set_ylim(0.5, 3.5)
plt.legend(loc='best')
plt.show()
Additionally, if df[l] has repeated elements and if the colors have to be assigned accordingly:
import matplotlib.cm as cm
import matplotlib.colors as colors
import numpy as np
import pandas as pd
fig, ax = plt.subplots(figsize=(8,8))
df = pd.DataFrame({'n1':[1,2,1,3], 'n2':[1,3,2,1], 'l':['b','b','c','d']})
l_unq = df['l'].unique()
colormap = cm.viridis
colorlist = [colors.rgb2hex(colormap(i)) for i in np.linspace(0, 0.9, len(l_unq))]
for i,c in enumerate(colorlist):
x = df[df.l==l_unq[i]].n1
y = df[df.l==l_unq[i]].n2
l = l_unq[i]
ax.scatter(x, y, label=l, s=50, linewidth=0.1, c=c)
ax.set_xlabel('n1')
ax.set_ylabel('n2')
ax.legend()
plt.show()
Starting from the following example:
fig, ax = plt.subplots()
df = pd.DataFrame({'n1':[1,2,1,3], 'n2':[1,3,2,1], 'l':['a','b','c','d']})
for label in df['l']:
df.plot('n1','n2', kind='scatter', ax=ax, s=50, linewidth=0.1, label=label)
what I obtained is the following scatterplot:
I'm now trying to set a different color for each of the four points. I know that I can loop over a set of, for instance, 4 colors in a list like:
colorlist = ['b','r','c','y']
but since my real dataset comprise at least 20 different points, I was looking for a sort of "color generator" to loop within it.
The following method will create a list of colors as long as your dataframe, and then plot a point with a label with each color:
import matplotlib.pyplot as plt
import matplotlib.cm as cm
import matplotlib.colors as colors
import numpy as np
import pandas as pd
fig, ax = plt.subplots()
df = pd.DataFrame({'n1':[1,2,1,3], 'n2':[1,3,2,1], 'l':['a','b','c','d']})
colormap = cm.viridis
colorlist = [colors.rgb2hex(colormap(i)) for i in np.linspace(0, 0.9, len(df['l']))]
for i,c in enumerate(colorlist):
x = df['n1'][i]
y = df['n2'][i]
l = df['l'][i]
ax.scatter(x, y, label=l, s=50, linewidth=0.1, c=c)
ax.legend()
plt.show()
IIUC you can do it this way:
import matplotlib.pyplot as plt
from matplotlib import colors
import pandas as pd
colorlist = list(colors.ColorConverter.colors.keys())
fig, ax = plt.subplots()
[df.iloc[[i]].plot.scatter('n1', 'n2', ax=ax, s=50, label=l,
color=colorlist[i % len(colorlist)])
for i,l in enumerate(df.l)]
colorlist:
In [223]: colorlist
Out[223]: ['m', 'b', 'g', 'r', 'k', 'y', 'c', 'w']
PS colorlist[i % len(colorlist)] - should always remain in the list bounds
How about this,
Here is the source code,
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from matplotlib import cm
fig, ax = plt.subplots()
df = pd.DataFrame({'n1':[1,2,1,3], 'n2':[1,3,2,1], 'l':['a','b','c','d']})
#colors = ['b','r','c','y']
nrof_labels = len(df['l'])
colors = cm.rainbow(np.linspace(0, 1, nrof_labels)) # create a bunch of colors
for i, r in df.iterrows():
ax.plot(r['n1'], r['n2'], 'o', markersize=10, color=colors[i], linewidth=0.1, label=r['l'])
ax.set_xlim(0.5, 3.5)
ax.set_ylim(0.5, 3.5)
plt.legend(loc='best')
plt.show()
Additionally, if df[l] has repeated elements and if the colors have to be assigned accordingly:
import matplotlib.cm as cm
import matplotlib.colors as colors
import numpy as np
import pandas as pd
fig, ax = plt.subplots(figsize=(8,8))
df = pd.DataFrame({'n1':[1,2,1,3], 'n2':[1,3,2,1], 'l':['b','b','c','d']})
l_unq = df['l'].unique()
colormap = cm.viridis
colorlist = [colors.rgb2hex(colormap(i)) for i in np.linspace(0, 0.9, len(l_unq))]
for i,c in enumerate(colorlist):
x = df[df.l==l_unq[i]].n1
y = df[df.l==l_unq[i]].n2
l = l_unq[i]
ax.scatter(x, y, label=l, s=50, linewidth=0.1, c=c)
ax.set_xlabel('n1')
ax.set_ylabel('n2')
ax.legend()
plt.show()