Custom categorized legend matplotlib - python

I made a mock-up version of my dataset to illustrate my problem.
I have a graph like this:
import pandas as pd
import matplotlib.pyplot as plt
data = {'x': [0, 1, 2, 3],
'y': [3, 2, 1, 0],
'cat1': ['A', 'B', 'A', 'B'],
'cat2': ['f', 'g', 'h', 'i']}
df = pd.DataFrame(data)
colors = {'A':'tab:red',
'B':'tab:blue'}
markers = {'f':"o",
'g':"v",
'h':"s",
'i':"+"}
fig, ax = plt.subplots()
for i in range(df.shape[0]):
ax.scatter(df.loc[i,'x'],
df.loc[i,'y'],
color=colors[df.loc[i,'cat1']],
marker=markers[df.loc[i,'cat2']],
label = df.loc[i, 'cat2'])
ax.legend()
But I'm looking for a legend like this:
Could anyone give me some tips on how to solve this? Also, it would be better if the legend in the final plot were in a box outside the plot, on the right side.

To add additional lines (A,B), rearrange the order and move the legend outside the graph, these are the steps you can follow after plotting.
Create a custom legend entries for the new entries and existing one using Line2D
Plot the legend and use bbox_to_anchor to move the legend to where you need it. You can adjust the coordinates within bbox if you want to move the position
Adjust the labels for A, B as these are only text (no marker) so that align horizontally to the middle (you can adjust set_position() if you want to move it further to the left/right
Code
import pandas as pd
import matplotlib.pyplot as plt
data = {'x': [0, 1, 2, 3],
'y': [3, 2, 1, 0],
'cat1': ['A', 'B', 'A', 'B'],
'cat2': ['f', 'g', 'h', 'i']}
df = pd.DataFrame(data)
colors = {'A':'tab:red',
'B':'tab:blue'}
markers = {'f':"o",
'h':"s",
'g':"v",
'i':"+"}
fig, ax = plt.subplots()
for i in range(df.shape[0]):
ax.scatter(df.loc[i,'x'], df.loc[i,'y'],
color=colors[df.loc[i,'cat1']],
marker=markers[df.loc[i,'cat2']],
label = df.loc[i, 'cat2']
## Create legend handle entries for each of the items
from matplotlib.lines import Line2D
title = Line2D([0], [0], linestyle="none", marker="")
f = Line2D([0], [0], linestyle="none", marker="o", markersize=10, markeredgecolor='tab:red', markerfacecolor="tab:red")
g = Line2D([0], [0], linestyle="none", marker="v", markersize=10, markeredgecolor='tab:blue', markerfacecolor="tab:blue")
h = Line2D([0], [0], linestyle="none", marker="s", markersize=10, markeredgecolor='tab:red', markerfacecolor="tab:red")
i = Line2D([0], [0], linestyle="none", marker="+", markersize=10, markeredgecolor='tab:blue', markerfacecolor="tab:blue")
## Plot in order you want, bbox to set legend box outside
leg=ax.legend((title, f, h, title, g, i), ('A', 'f', 'h', 'B', 'g','i'), bbox_to_anchor=(1.16, 1.03))
## Adjust position of A and B so that they are in middle
for item, label in zip(leg.legendHandles, leg.texts):
if label._text in ['A', 'B']:
width=item.get_window_extent(fig.canvas.get_renderer()).width
label.set_ha('left')
label.set_position((-width/2,0)) ## Adjust here to move left/right
Plot

Related

How to generate proper legends for scatter plot in python

I am trying to prepare a box and scatter plot for 8 data points in python. I use the following code:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
x = [24.4, 6.7, 19.7, 16.0, 25.1, 19.5, 10, 22.1]
f, ax = plt.subplots()
ax.boxplot(x, vert=False, showmeans=True, showfliers=False)
x0 = np.random.normal(1, 0.05, len(x))
c = ['r', 'b', 'c', 'm', 'y', 'g', 'm', 'k']
lab = ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H']
ax.scatter(x, x0, c=c, s=60, alpha=0.2)
ax.legend(labels=lab, loc="upper left", ncol=8)
It generate a image like the following:
It looks that the legend doesn't have the proper sphere symbols with different colors, which I expected. Beside the colors for the symbols are shallow and light.
So how to generate proper legends with correct symbols and how to make the colors of the symbols brighter and sharper?
I will deeply appreciate it if anyone can help.
Best regards
To make the colours brighter, just raise the alpha value.
For the legend, the order of the plotting matters here, it is better that the boxplot is plotted after the scatter plots. Also, to get for each point a place in the legend, it should b considered as a different graph, for that I used a loop to loop over the values of x, x0 and c. Here's the outcome:
import numpy as np
import matplotlib.pyplot as plt
# init figure
f, ax = plt.subplots()
# values
x = [24.4, 6.7, 19.7, 16.0, 25.1, 19.5, 10, 22.1]
x0 = np.random.normal(1, 0.05, len(x))
# labels and colours
c = ['r', 'b', 'c', 'm', 'y', 'g', 'm', 'k']
lab = ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H']
# put the plots into a list
plots = []
for i in range(len(x)):
p = ax.scatter(x[i], x0[i], c=c[i], s=60, alpha=0.5) # raised the alpha to get sharper colors
plots.append(p)
# plot legends
plt.legend(plots,
labels=lab,
scatterpoints=1,
loc='upper left',
ncol=8,
fontsize=8)
# plot the box plot (the order here matters!)
ax.boxplot(x, vert=False, showmeans=True, showfliers=False)
# save the desired figure
plt.savefig('tt.png')
Output:

Circlify - change the colour of just one of the circles

I have this code:
import matplotlib.pyplot as plt
import pandas as pd
df = pd.DataFrame({
'Name': ['A', 'B', 'C', 'D', 'E', 'F'],
'Value': [10, 2, 23, 87, 12, 65]
})
circles = circlify.circlify(
df['Value'].tolist(),
show_enclosure=False,
target_enclosure=circlify.Circle(x=0, y=0, r=1)
)
# Create just a figure and only one subplot
fig, ax = plt.subplots(figsize=(10,10))
# Title
ax.set_title('Basic circular packing')
# Remove axes
ax.axis('off')
# Find axis boundaries
lim = max(
max(
abs(circle.x) + circle.r,
abs(circle.y) + circle.r,
)
for circle in circles
)
plt.xlim(-lim, lim)
plt.ylim(-lim, lim)
# list of labels
labels = df['Name']
# print circles
for circle, label in zip(circles, labels):
x, y, r = circle
ax.add_patch(plt.Circle((x, y), r, alpha=0.2, linewidth=2,color='#e6d4ff'))
plt.annotate(
label,
(x,y ) ,
va='center',
ha='center',
size=12
)
It produces this output:
I wanted to change the colour of just one of the circles (for example, the biggest circle).
I tried changing the colour from:
color='#e6d4ff'
to, for example, a list of colours:
color=['#e6d4ff','#e6d4ff','#e6d4ff','#e6d4ff','#e6d4ff','#ffc4c4']
with the error:
RGBA sequence should have length 3 or 4
I guess the error is saying if I'm providing a list, then the list should just be RGB dimensions.
Would someone be able to show me? (I couldn't see it in the python graph gallery e.g. [here][2] or the circlify doc here but maybe I've missed it?)
In each call to plt.Circle(...) you're only creating one circle, which has only one color. To assign different colors to different circles, the colors can be added into the for loop, e.g. : for circle, label, color in zip(circles, labels, colors):.
Note that circlify expects the list of values in sorted order, and that the returned list contains the circles sorted from smallest to largest. In your example code, D is the largest circle, but in your plot, you labeled it as F. Sorting the dataframe at the start and using that order helps to keep values and labels synchronized.
Here is the example code, having D as largest and with a different color (the code also changes a few plt. calls to ax. to be more consistent):
import matplotlib.pyplot as plt
import pandas as pd
import circlify
df = pd.DataFrame({'Name': ['A', 'B', 'C', 'D', 'E', 'F'],
'Value': [10, 2, 23, 87, 12, 65]})
df = df.sort_values('Value') # the order is now ['B', 'A', 'E', 'C', 'F', 'D']
circles = circlify.circlify(df['Value'].tolist(),
show_enclosure=False,
target_enclosure=circlify.Circle(x=0, y=0, r=1))
fig, ax = plt.subplots(figsize=(10, 10))
ax.set_title('Basic circular packing')
ax.axis('off')
ax.set_aspect('equal') # show circles as circles, not as ellipses
lim = max(max(abs(circle.x) + circle.r, abs(circle.y) + circle.r, )
for circle in circles)
ax.set_xlim(-lim, lim)
ax.set_ylim(-lim, lim)
labels = df['Name'] # ['B', 'A', 'E', 'C', 'F', 'D']
colors = ['#ffc4c4' if val == df['Value'].max() else '#e6d4ff' for val in df['Value']]
for circle, label, color in zip(circles, labels, colors):
x, y, r = circle
ax.add_patch(plt.Circle((x, y), r, alpha=0.7, linewidth=2, color=color))
ax.annotate(label, (x, y), va='center', ha='center', size=12)
plt.show()

How to change legend text when plotting 3D scatter plot with Matplotlib?

I have a 3D scatter plot which was produced using the following code
import seaborn as sns
import numpy as np
from matplotlib import pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from matplotlib.colors import ListedColormap
# Create an example dataframe
data = {'th': [1,1,1,1,1,1,1,1,2,2,2,2,2,2,2,2],
'pdvalue': [0.5, 0.5, 0.5, 0.5, 0.2,0.2,0.2,0.2,0.3,0.3,0.4,0.1,1,1.1,3,1],
'my_val': [1.2,3.2,4,5.1,1,2,5.1,1,2,4,1,3,6,6,2,3],
'name':['a','b','c','d','a','b','c','d','a','b','c','d','a','b','c','d']}
df = pd.DataFrame(data)
# convert unique str into unique int
order_dict = {k: i for i, k in enumerate ( df ['name'])}
df ['name_int'] = df ['name'].map ( order_dict )
data_np=df.to_numpy()
# generate data
x = data_np[:,0]
y = data_np[:,1]
z = data_np[:,2]
# axes instance
fig = plt.figure(figsize=(10,6))
ax = Axes3D(fig)
# get colormap from seaborn
cmap = ListedColormap(sns.color_palette("husl", 256).as_hex())
# plot
sc = ax.scatter(x, y, z, s=40, c=data_np[:,4], marker='o', cmap=cmap, alpha=1)
ax.set_xlabel('th')
ax.set_ylabel('pdvalue')
ax.set_zlabel('my_val')
# legend
plt.legend(*sc.legend_elements(), bbox_to_anchor=(1.05, 1), loc=2)
plt.show()
and this produce
In the above, I had to convert the name into integer type as the para c of the ax.scatter only accept number. As a result, the legend was map according thenumeric value instead of the original name.
May I know how to have the legend in term of name instead of the numerical representation?
The code can be simplified making use of pandas to do conversions and selections. By drawing the scatter plot for each 'name' separately, they each can be given a label for the legend.
Here is the adapted code:
import seaborn as sns
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
# Create an example dataframe
data = {'th': [1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2],
'pdvalue': [0.5, 0.5, 0.5, 0.5, 0.2, 0.2, 0.2, 0.2, 0.3, 0.3, 0.4, 0.1, 1, 1.1, 3, 1],
'my_val': [1.2, 3.2, 4, 5.1, 1, 2, 5.1, 1, 2, 4, 1, 3, 6, 6, 2, 3],
'name': ['a', 'b', 'c', 'd', 'a', 'b', 'c', 'd', 'a', 'b', 'c', 'd', 'a', 'b', 'c', 'd']}
df = pd.DataFrame(data)
# axes instance
fig = plt.figure(figsize=(10, 6))
ax = Axes3D(fig, auto_add_to_figure=False)
fig.add_axes(ax)
# find all the unique labels in the 'name' column
labels = np.unique(df['name'])
# get palette from seaborn
palette = sns.color_palette("husl", len(labels))
# plot
for label, color in zip(labels, palette):
df1 = df[df['name'] == label]
ax.scatter(df1['th'], df1['pdvalue'], df1['my_val'],
s=40, marker='o', color=color, alpha=1, label=label)
ax.set_xlabel('th')
ax.set_ylabel('pdvalue')
ax.set_zlabel('my_val')
# legend
plt.legend(bbox_to_anchor=(1.05, 1), loc=2)
plt.show()

Why does using ax.twiny shift the figure mapped to second axes rightward?

I'm trying to allow my figure to share the same y axis, but have different scales along x axis. The problem is that when I try to map the second figure to the second axes (ax1 = ax.twiny), the figure seems to move forward to the right from where it should be. Here is a minimal working example that demonstrates my problem.
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import rc
import pandas as pd
r = [0,1,2,3,4]
raw_data = {'greenBars': [20, 1.5, 7, 10, 5], 'orangeBars': [5, 15, 5, 10, 15],'blueBars': [2, 15, 18, 5, 10]}
df = pd.DataFrame(raw_data)
totals = [i+j+k for i,j,k in zip(df['greenBars'], df['orangeBars'], df['blueBars'])]
greenBars = [i / j * 100 for i,j in zip(df['greenBars'], totals)]
f, ax = plt.subplots(1, figsize=(6,6))
ax.barh(r, greenBars, color='#b5ffb9', edgecolor='white', height=0.85)
df = pd.DataFrame({'group':['A', 'B', 'C', 'D', 'E'], 'values':[300,250,150,50,10] })
ax1 = ax.twiny()
ax1.hlines(y=groups, xmin=0, xmax=df['values'], color='black', linewidth=1.5);
plt.show()
where my expected outcome is to have the ax1.hlines move left-ward to the frame (as shown by the arrows in the image below). Does anybody have any suggestions as to how to fix this behaviour?
barh usually sets lower limit at 0 while plot or others set at a little less value for aesthetic. To fix this, manually set xlim for ax1:
...
f, ax = plt.subplots(1, figsize=(6,6))
ax.barh(r, greenBars, color='#b5ffb9', edgecolor='white', height=0.85)
df = pd.DataFrame({'group':['A', 'B', 'C', 'D', 'E'], 'values':[300,250,150,50,10] })
ax1 = ax.twiny()
ax1.hlines(y=df['group'], xmin=0, xmax=df['values'], color='black', linewidth=1.5);
# this is added
ax1.set_xlim(0)
plt.show()
Output:

How to change marker and color in matplotlib for a specific column value?

I have a data file including 3 columns. The first 2 columns represent coordinates, the third one is a string value like 'foo', 'bar' or 'ter'.
I would like to display with python's matplotlib based on this label, different marker and color. Example:
foo => red circle
bar => green triangle
ter => black square
What I did till now is:
import numpy as np
import matplotlib.pyplot as plt
coordData = np.genfromtxt("mydata.csv", usecols=(0,1), delimiter=",", dtype=None)
coordLabels = np.genfromtxt("mydata.csv", usecols=2, delimiter=",", dtype=None)
fig = plt.figure()
ax = fig.add_subplot(111)
ax.scatter(coordData[:, 0], coordData[:, 1], c="r", marker="o")
plt.show()
How can I switch marker and color based on the coordLabels values?
SOLUTION
Based on the suggestion I made some changes:
coordData = np.genfromtxt("mydata.csv", usecols=(0, 1), delimiter=",", dtype=None)
coordLabels = np.genfromtxt("mydata.csv", usecols=2, delimiter=",", dtype=None)
fig = plt.figure()
ax = fig.add_subplot(111)
uniqueVals = np.unique(coordLabels)
markers = ['^', 'o', '*']
colors = { '^' : 'r',
'o' : 'b',
'*' : 'g'}
for marker, val in zip(markers, uniqueVals):
toUse = coordLabels == val
ax.scatter(coordData[toUse,0], coordData[toUse,1], c = colors[marker], marker=marker)
plt.show()
If you want the color to be dependent upon the label in coordLabels, you want to set the color equal to that variable instead of 'r' like you have.
ax.scatter(coordData[:, 0], coordData[:, 1], c=coordLabels, marker="o")
If you want different markers for each of the plots, you will need to create multiple scatter plots (one for each value in coordLabels
uniqueVals = ['foo', 'bar', 'ter']
# Create your own list of markers here (needs to be the same size as `uniqueVals`)
markers = ['o', '^', 's']
colors = ['r', 'g', 'b']
for color, marker, val in zip(colors, markers, uniqueVals):
toUse = coordLabels == val
ax.scatter(coordData[toUse,0], coordData[toUse,1], c=color, marker=marker)

Categories

Resources