Scatterplot legend not including all data [duplicate]

Scatterplot legend not including all data [duplicate] - python

Starting from the following example:
fig, ax = plt.subplots()
df = pd.DataFrame({'n1':[1,2,1,3], 'n2':[1,3,2,1], 'l':['a','b','c','d']})
for label in df['l']:
df.plot('n1','n2', kind='scatter', ax=ax, s=50, linewidth=0.1, label=label)
what I obtained is the following scatterplot:
I'm now trying to set a different color for each of the four points. I know that I can loop over a set of, for instance, 4 colors in a list like:
colorlist = ['b','r','c','y']
but since my real dataset comprise at least 20 different points, I was looking for a sort of "color generator" to loop within it.

The following method will create a list of colors as long as your dataframe, and then plot a point with a label with each color:
import matplotlib.pyplot as plt
import matplotlib.cm as cm
import matplotlib.colors as colors
import numpy as np
import pandas as pd
fig, ax = plt.subplots()
df = pd.DataFrame({'n1':[1,2,1,3], 'n2':[1,3,2,1], 'l':['a','b','c','d']})
colormap = cm.viridis
colorlist = [colors.rgb2hex(colormap(i)) for i in np.linspace(0, 0.9, len(df['l']))]
for i,c in enumerate(colorlist):
x = df['n1'][i]
y = df['n2'][i]
l = df['l'][i]
ax.scatter(x, y, label=l, s=50, linewidth=0.1, c=c)
ax.legend()
plt.show()

IIUC you can do it this way:
import matplotlib.pyplot as plt
from matplotlib import colors
import pandas as pd
colorlist = list(colors.ColorConverter.colors.keys())
fig, ax = plt.subplots()
[df.iloc[[i]].plot.scatter('n1', 'n2', ax=ax, s=50, label=l,
color=colorlist[i % len(colorlist)])
for i,l in enumerate(df.l)]
colorlist:
In [223]: colorlist
Out[223]: ['m', 'b', 'g', 'r', 'k', 'y', 'c', 'w']
PS colorlist[i % len(colorlist)] - should always remain in the list bounds

How about this,
Here is the source code,
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from matplotlib import cm
fig, ax = plt.subplots()
df = pd.DataFrame({'n1':[1,2,1,3], 'n2':[1,3,2,1], 'l':['a','b','c','d']})
#colors = ['b','r','c','y']
nrof_labels = len(df['l'])
colors = cm.rainbow(np.linspace(0, 1, nrof_labels)) # create a bunch of colors
for i, r in df.iterrows():
ax.plot(r['n1'], r['n2'], 'o', markersize=10, color=colors[i], linewidth=0.1, label=r['l'])
ax.set_xlim(0.5, 3.5)
ax.set_ylim(0.5, 3.5)
plt.legend(loc='best')
plt.show()

Additionally, if df[l] has repeated elements and if the colors have to be assigned accordingly:
import matplotlib.cm as cm
import matplotlib.colors as colors
import numpy as np
import pandas as pd
fig, ax = plt.subplots(figsize=(8,8))
df = pd.DataFrame({'n1':[1,2,1,3], 'n2':[1,3,2,1], 'l':['b','b','c','d']})
l_unq = df['l'].unique()
colormap = cm.viridis
colorlist = [colors.rgb2hex(colormap(i)) for i in np.linspace(0, 0.9, len(l_unq))]
for i,c in enumerate(colorlist):
x = df[df.l==l_unq[i]].n1
y = df[df.l==l_unq[i]].n2
l = l_unq[i]
ax.scatter(x, y, label=l, s=50, linewidth=0.1, c=c)
ax.set_xlabel('n1')
ax.set_ylabel('n2')
ax.legend()
plt.show()

Related

how to add hatches to cells in seaborn.heatmap

I tried to visualize my data with seaborn.heatmap.
However, the problem I have is that when I print it out in grayscle, the image is hard to read.
I follow many similar questions but it didn't work.
Is there anyway to add hatches over the cells in seaborn.heatmap?
My code is as below:
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
from matplotlib.colors import ListedColormap
import seaborn as sns
df = pd.read_csv("file.csv")
sns.heatmap(df, annot=False, fmt='.0f', square=True,
cmap="coolwarm", linewidths=1, cbar=False)
plt.show()

You could create a loop, dividing the values into e.g. 4 groups and assign a hatch pattern to each of them via pcolor applied to the subset.
Here is an example starting from random test data:
from matplotlib import pyplot as plt
import seaborn as sns
import pandas as pd
import numpy as np
column_names = [f'{c:.2f}' for c in np.arange(0, 1.5001, 0.05)]
row_names = ['Alkaid', 'Mizar', 'Alioth', 'Megrez', 'Phecda', 'Merak', 'Dubhe']
df = pd.DataFrame(np.random.normal(0.3, 1, (len(row_names), len(column_names))).cumsum(axis=1) + 5,
columns=column_names, index=row_names)
values = df.values
vmin = values.min()
vmax = values.max()
patterns = ['', 'oo', '////', 'XXX']
bounds = np.linspace(vmin, vmax, len(patterns) + 1)
bounds[-1] += 1
sns.set_style('white')
fig, ax = plt.subplots(figsize=(12, 5))
sns.heatmap(data=df, linewidths=1, square=True, cmap='coolwarm', linecolor='white', cbar=False, ax=ax)
x = np.arange(df.shape[1] + 1)
y = np.arange(df.shape[0] + 1)
handles = []
norm = plt.Normalize(vmin, vmax)
cmap = plt.get_cmap('coolwarm')
for pattern, b0, b1 in zip(patterns, bounds[:-1], bounds[1:]):
ax.pcolor(x, y, np.where((values >= b0) & (values < b1), values, np.nan), cmap=cmap, norm=norm,
hatch=pattern, ec='black', lw=1)
handles.append(plt.Rectangle((0, 0), 0, 0, color=cmap(norm((b0 + b1) / 2)), ec='black',
hatch=pattern, label=f'{b0:5.2f}-{b1:5.2f}'))
ax.hlines(y, 0, x.max(), color='w', lw=2)
ax.vlines(x, 0, y.max(), color='w', lw=2)
ax.legend(handles=handles, bbox_to_anchor=(1.01, 1.02), loc='upper left',
handlelength=2, handleheight=2, frameon=False)
plt.tight_layout()
plt.show()

How to add multiple trendlines pandas

I have plotted a graph with two y axes and would now like to add two separate trendlines for each of the y plots.
This is my code:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
%matplotlib inline
amp_costs=pd.read_csv('/Users/Ampicillin_Costs.csv', index_col=None, usecols=[0,1,2])
amp_costs.columns=['PERIOD', 'ITEMS', 'COST PER ITEM']
ax=amp_costs.plot(x='PERIOD', y='COST PER ITEM', color='Blue', style='.', markersize=10)
amp_costs.plot(x='PERIOD', y='ITEMS', secondary_y=True,
color='Red', style='.', markersize=10, ax=ax)
Any guidance as to how to plot these two trend lines to this graph would be much appreciated!

Here is a quick example of how to use sklearn.linear_model.LinearRegression to make the trend line.
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
plt.style.use('ggplot')
%matplotlib inline
period = np.arange(10)
items = -2*period +1 + np.random.randint(-2,2,len(period))
cost = 35000*period +15000 + np.random.randint(-25000,25000,len(period))
data = np.vstack((period,items,cost)).T
df = pd.DataFrame(data, columns=\['P','ITEMS', 'COST'\]).set_index('P')
lmcost = LinearRegression().fit(period.reshape(-1,1), cost.reshape(-1,1))
lmitems = LinearRegression().fit(period.reshape(-1,1), items.reshape(-1,1))
df['ITEMS_LM'] = lmitems.predict(period.reshape(-1,1))
df['COST_LM'] = lmcost.predict(period.reshape(-1,1))
fig,ax = plt.subplots()
df.ITEMS.plot(ax = ax, color = 'b')
df.ITEMS_LM.plot(ax = ax,color= 'b', linestyle= 'dashed')
df.COST.plot(ax = ax, secondary_y=True, color ='g')
df.COST_LM.plot(ax = ax, secondary_y=True, color = 'g', linestyle='dashed')

Python: Sharing scale between matplotlib and seaborn

I'd like to create a seaborn heatmap which has also scatter plot color points. I'd like the final result to use the grid of the scatter plot, with the squares of the heatmap being "centered" on the scatter points.
Unfortunately, I don't find how to share scales between the two layers, as shown in the example below.
What can I do?
Thanks a lot for your help.
%matplotlib inline
import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
npoints = 3
x = np.tile(np.arange(npoints), npoints)
df = pd.DataFrame({'x': np.tile(np.arange(npoints), npoints), 'y': np.repeat(np.arange(npoints), npoints)})
df['z'] = 0
df.loc[df['x'] == df['y'], 'z'] = df.loc[df['x'] == df['y'], 'x']
df['c'] = np.random.choice(np.arange(3) + 1, df.shape[0])
df.loc[df['x'] != df['y'], 'c'] = 0
sns.heatmap(df[['x', 'y', 'z']].set_index(['x', 'y'])['z'].unstack())
plt.gca().set_title('Heatmap only')
df.plot(x='x', y='y', color=df['c'], kind='scatter')
plt.gca().set_title('Scatter points only')
fig, ax = plt.subplots()
sns.heatmap(df[['x', 'y', 'z']].set_index(['x', 'y'])['z'].unstack(), ax=ax)
df.plot(x='x', y='y', ax=ax, color=df['c'], kind='scatter')
ax.set_title('Heatmap and scatter points - scales problem')

A workaround would be to shift your scatter data by 0.5:
import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
npoints = 3
x = np.tile(np.arange(npoints), npoints)
df = pd.DataFrame({'x': np.tile(np.arange(npoints), npoints), 'y': np.repeat(np.arange(npoints), npoints)})
df['z'] = 0
df.loc[df['x'] == df['y'], 'z'] = df.loc[df['x'] == df['y'], 'x']
df['c'] = np.random.choice(np.arange(3) + 1, df.shape[0])
df.loc[df['x'] != df['y'], 'c'] = 0
fig, ax = plt.subplots()
qp = sns.heatmap(df[['x', 'y', 'z']].set_index(['x', 'y'])['z'].unstack(), ax=ax)
# df.plot(x='x', y='y', ax=ax, color=df['c'], kind='scatter')
ax.scatter(df['x']+0.5,df['y']+0.5,c=df['c'])
ax.set_title('Heatmap and scatter points - scales problem')
plt.show()
result:

pandas - scatter plot with different color legend for each point

Starting from the following example:
fig, ax = plt.subplots()
df = pd.DataFrame({'n1':[1,2,1,3], 'n2':[1,3,2,1], 'l':['a','b','c','d']})
for label in df['l']:
df.plot('n1','n2', kind='scatter', ax=ax, s=50, linewidth=0.1, label=label)
what I obtained is the following scatterplot:
I'm now trying to set a different color for each of the four points. I know that I can loop over a set of, for instance, 4 colors in a list like:
colorlist = ['b','r','c','y']
but since my real dataset comprise at least 20 different points, I was looking for a sort of "color generator" to loop within it.

The following method will create a list of colors as long as your dataframe, and then plot a point with a label with each color:
import matplotlib.pyplot as plt
import matplotlib.cm as cm
import matplotlib.colors as colors
import numpy as np
import pandas as pd
fig, ax = plt.subplots()
df = pd.DataFrame({'n1':[1,2,1,3], 'n2':[1,3,2,1], 'l':['a','b','c','d']})
colormap = cm.viridis
colorlist = [colors.rgb2hex(colormap(i)) for i in np.linspace(0, 0.9, len(df['l']))]
for i,c in enumerate(colorlist):
x = df['n1'][i]
y = df['n2'][i]
l = df['l'][i]
ax.scatter(x, y, label=l, s=50, linewidth=0.1, c=c)
ax.legend()
plt.show()

IIUC you can do it this way:
import matplotlib.pyplot as plt
from matplotlib import colors
import pandas as pd
colorlist = list(colors.ColorConverter.colors.keys())
fig, ax = plt.subplots()
[df.iloc[[i]].plot.scatter('n1', 'n2', ax=ax, s=50, label=l,
color=colorlist[i % len(colorlist)])
for i,l in enumerate(df.l)]
colorlist:
In [223]: colorlist
Out[223]: ['m', 'b', 'g', 'r', 'k', 'y', 'c', 'w']
PS colorlist[i % len(colorlist)] - should always remain in the list bounds

How about this,
Here is the source code,
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from matplotlib import cm
fig, ax = plt.subplots()
df = pd.DataFrame({'n1':[1,2,1,3], 'n2':[1,3,2,1], 'l':['a','b','c','d']})
#colors = ['b','r','c','y']
nrof_labels = len(df['l'])
colors = cm.rainbow(np.linspace(0, 1, nrof_labels)) # create a bunch of colors
for i, r in df.iterrows():
ax.plot(r['n1'], r['n2'], 'o', markersize=10, color=colors[i], linewidth=0.1, label=r['l'])
ax.set_xlim(0.5, 3.5)
ax.set_ylim(0.5, 3.5)
plt.legend(loc='best')
plt.show()

Additionally, if df[l] has repeated elements and if the colors have to be assigned accordingly:
import matplotlib.cm as cm
import matplotlib.colors as colors
import numpy as np
import pandas as pd
fig, ax = plt.subplots(figsize=(8,8))
df = pd.DataFrame({'n1':[1,2,1,3], 'n2':[1,3,2,1], 'l':['b','b','c','d']})
l_unq = df['l'].unique()
colormap = cm.viridis
colorlist = [colors.rgb2hex(colormap(i)) for i in np.linspace(0, 0.9, len(l_unq))]
for i,c in enumerate(colorlist):
x = df[df.l==l_unq[i]].n1
y = df[df.l==l_unq[i]].n2
l = l_unq[i]
ax.scatter(x, y, label=l, s=50, linewidth=0.1, c=c)
ax.set_xlabel('n1')
ax.set_ylabel('n2')
ax.legend()
plt.show()

Setting numpoints in matplotlib legend does not work

I am trying to have a single data point on a plot legend by following the suggestions here and it does not seem to work:
from pylab import scatter
import pylab
import matplotlib.pyplot as plt
fig = plt.figure()
ax = plt.gca()
ax.scatter(1,2,c = 'blue', marker = 'x')
ax.scatter(2,3, c= 'red', marker = 'o')
ax.legend(('1','2'), loc = 2, numpoints = 1)
plt.show()
Am I doing something completely stupid here? Some additional information:
In [147]: import matplotlib
print matplotlib.__version__
Out [147]: 1.1.1rc

For scatterplots, use the scatterpoints parameter:
import matplotlib.pyplot as plt
fig, ax = plt.subplots()
ax.scatter(1, 2, c='blue', marker='x')
ax.scatter(2, 3, c='red', marker='o')
ax.legend(('1', '2'), loc=2, scatterpoints=1)
plt.show()

Develop Reference

Python is a programming language that lets you work quickly and integrate systems more effectively.

Scatterplot legend not including all data [duplicate] - python

Related

how to add hatches to cells in seaborn.heatmap

How to add multiple trendlines pandas

Python: Sharing scale between matplotlib and seaborn

pandas - scatter plot with different color legend for each point

Setting numpoints in matplotlib legend does not work

Categories

Resources