How to generate proper legends for scatter plot in python - python

I am trying to prepare a box and scatter plot for 8 data points in python. I use the following code:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
x = [24.4, 6.7, 19.7, 16.0, 25.1, 19.5, 10, 22.1]
f, ax = plt.subplots()
ax.boxplot(x, vert=False, showmeans=True, showfliers=False)
x0 = np.random.normal(1, 0.05, len(x))
c = ['r', 'b', 'c', 'm', 'y', 'g', 'm', 'k']
lab = ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H']
ax.scatter(x, x0, c=c, s=60, alpha=0.2)
ax.legend(labels=lab, loc="upper left", ncol=8)
It generate a image like the following:
It looks that the legend doesn't have the proper sphere symbols with different colors, which I expected. Beside the colors for the symbols are shallow and light.
So how to generate proper legends with correct symbols and how to make the colors of the symbols brighter and sharper?
I will deeply appreciate it if anyone can help.
Best regards

To make the colours brighter, just raise the alpha value.
For the legend, the order of the plotting matters here, it is better that the boxplot is plotted after the scatter plots. Also, to get for each point a place in the legend, it should b considered as a different graph, for that I used a loop to loop over the values of x, x0 and c. Here's the outcome:
import numpy as np
import matplotlib.pyplot as plt
# init figure
f, ax = plt.subplots()
# values
x = [24.4, 6.7, 19.7, 16.0, 25.1, 19.5, 10, 22.1]
x0 = np.random.normal(1, 0.05, len(x))
# labels and colours
c = ['r', 'b', 'c', 'm', 'y', 'g', 'm', 'k']
lab = ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H']
# put the plots into a list
plots = []
for i in range(len(x)):
p = ax.scatter(x[i], x0[i], c=c[i], s=60, alpha=0.5) # raised the alpha to get sharper colors
plots.append(p)
# plot legends
plt.legend(plots,
labels=lab,
scatterpoints=1,
loc='upper left',
ncol=8,
fontsize=8)
# plot the box plot (the order here matters!)
ax.boxplot(x, vert=False, showmeans=True, showfliers=False)
# save the desired figure
plt.savefig('tt.png')
Output:

Related

Custom categorized legend matplotlib

I made a mock-up version of my dataset to illustrate my problem.
I have a graph like this:
import pandas as pd
import matplotlib.pyplot as plt
data = {'x': [0, 1, 2, 3],
'y': [3, 2, 1, 0],
'cat1': ['A', 'B', 'A', 'B'],
'cat2': ['f', 'g', 'h', 'i']}
df = pd.DataFrame(data)
colors = {'A':'tab:red',
'B':'tab:blue'}
markers = {'f':"o",
'g':"v",
'h':"s",
'i':"+"}
fig, ax = plt.subplots()
for i in range(df.shape[0]):
ax.scatter(df.loc[i,'x'],
df.loc[i,'y'],
color=colors[df.loc[i,'cat1']],
marker=markers[df.loc[i,'cat2']],
label = df.loc[i, 'cat2'])
ax.legend()
But I'm looking for a legend like this:
Could anyone give me some tips on how to solve this? Also, it would be better if the legend in the final plot were in a box outside the plot, on the right side.
To add additional lines (A,B), rearrange the order and move the legend outside the graph, these are the steps you can follow after plotting.
Create a custom legend entries for the new entries and existing one using Line2D
Plot the legend and use bbox_to_anchor to move the legend to where you need it. You can adjust the coordinates within bbox if you want to move the position
Adjust the labels for A, B as these are only text (no marker) so that align horizontally to the middle (you can adjust set_position() if you want to move it further to the left/right
Code
import pandas as pd
import matplotlib.pyplot as plt
data = {'x': [0, 1, 2, 3],
'y': [3, 2, 1, 0],
'cat1': ['A', 'B', 'A', 'B'],
'cat2': ['f', 'g', 'h', 'i']}
df = pd.DataFrame(data)
colors = {'A':'tab:red',
'B':'tab:blue'}
markers = {'f':"o",
'h':"s",
'g':"v",
'i':"+"}
fig, ax = plt.subplots()
for i in range(df.shape[0]):
ax.scatter(df.loc[i,'x'], df.loc[i,'y'],
color=colors[df.loc[i,'cat1']],
marker=markers[df.loc[i,'cat2']],
label = df.loc[i, 'cat2']
## Create legend handle entries for each of the items
from matplotlib.lines import Line2D
title = Line2D([0], [0], linestyle="none", marker="")
f = Line2D([0], [0], linestyle="none", marker="o", markersize=10, markeredgecolor='tab:red', markerfacecolor="tab:red")
g = Line2D([0], [0], linestyle="none", marker="v", markersize=10, markeredgecolor='tab:blue', markerfacecolor="tab:blue")
h = Line2D([0], [0], linestyle="none", marker="s", markersize=10, markeredgecolor='tab:red', markerfacecolor="tab:red")
i = Line2D([0], [0], linestyle="none", marker="+", markersize=10, markeredgecolor='tab:blue', markerfacecolor="tab:blue")
## Plot in order you want, bbox to set legend box outside
leg=ax.legend((title, f, h, title, g, i), ('A', 'f', 'h', 'B', 'g','i'), bbox_to_anchor=(1.16, 1.03))
## Adjust position of A and B so that they are in middle
for item, label in zip(leg.legendHandles, leg.texts):
if label._text in ['A', 'B']:
width=item.get_window_extent(fig.canvas.get_renderer()).width
label.set_ha('left')
label.set_position((-width/2,0)) ## Adjust here to move left/right
Plot

Circlify - change the colour of just one of the circles

I have this code:
import matplotlib.pyplot as plt
import pandas as pd
df = pd.DataFrame({
'Name': ['A', 'B', 'C', 'D', 'E', 'F'],
'Value': [10, 2, 23, 87, 12, 65]
})
circles = circlify.circlify(
df['Value'].tolist(),
show_enclosure=False,
target_enclosure=circlify.Circle(x=0, y=0, r=1)
)
# Create just a figure and only one subplot
fig, ax = plt.subplots(figsize=(10,10))
# Title
ax.set_title('Basic circular packing')
# Remove axes
ax.axis('off')
# Find axis boundaries
lim = max(
max(
abs(circle.x) + circle.r,
abs(circle.y) + circle.r,
)
for circle in circles
)
plt.xlim(-lim, lim)
plt.ylim(-lim, lim)
# list of labels
labels = df['Name']
# print circles
for circle, label in zip(circles, labels):
x, y, r = circle
ax.add_patch(plt.Circle((x, y), r, alpha=0.2, linewidth=2,color='#e6d4ff'))
plt.annotate(
label,
(x,y ) ,
va='center',
ha='center',
size=12
)
It produces this output:
I wanted to change the colour of just one of the circles (for example, the biggest circle).
I tried changing the colour from:
color='#e6d4ff'
to, for example, a list of colours:
color=['#e6d4ff','#e6d4ff','#e6d4ff','#e6d4ff','#e6d4ff','#ffc4c4']
with the error:
RGBA sequence should have length 3 or 4
I guess the error is saying if I'm providing a list, then the list should just be RGB dimensions.
Would someone be able to show me? (I couldn't see it in the python graph gallery e.g. [here][2] or the circlify doc here but maybe I've missed it?)
In each call to plt.Circle(...) you're only creating one circle, which has only one color. To assign different colors to different circles, the colors can be added into the for loop, e.g. : for circle, label, color in zip(circles, labels, colors):.
Note that circlify expects the list of values in sorted order, and that the returned list contains the circles sorted from smallest to largest. In your example code, D is the largest circle, but in your plot, you labeled it as F. Sorting the dataframe at the start and using that order helps to keep values and labels synchronized.
Here is the example code, having D as largest and with a different color (the code also changes a few plt. calls to ax. to be more consistent):
import matplotlib.pyplot as plt
import pandas as pd
import circlify
df = pd.DataFrame({'Name': ['A', 'B', 'C', 'D', 'E', 'F'],
'Value': [10, 2, 23, 87, 12, 65]})
df = df.sort_values('Value') # the order is now ['B', 'A', 'E', 'C', 'F', 'D']
circles = circlify.circlify(df['Value'].tolist(),
show_enclosure=False,
target_enclosure=circlify.Circle(x=0, y=0, r=1))
fig, ax = plt.subplots(figsize=(10, 10))
ax.set_title('Basic circular packing')
ax.axis('off')
ax.set_aspect('equal') # show circles as circles, not as ellipses
lim = max(max(abs(circle.x) + circle.r, abs(circle.y) + circle.r, )
for circle in circles)
ax.set_xlim(-lim, lim)
ax.set_ylim(-lim, lim)
labels = df['Name'] # ['B', 'A', 'E', 'C', 'F', 'D']
colors = ['#ffc4c4' if val == df['Value'].max() else '#e6d4ff' for val in df['Value']]
for circle, label, color in zip(circles, labels, colors):
x, y, r = circle
ax.add_patch(plt.Circle((x, y), r, alpha=0.7, linewidth=2, color=color))
ax.annotate(label, (x, y), va='center', ha='center', size=12)
plt.show()

How to add labels to the axes of subplots

I am plotting 8 subplots into a figure as follows:
import matplotlib.pyplot as plt
fig, axs = plt.subplots(8)
label = ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H']
data = [0.6, 0.4, 1.3, 0.8, 0.9, 1.0, 1.6, 0.2]
plt.xlim(0,2)
for i in range(8):
axs[i].get_yaxis().set_visible(False)
axs[i].get_xaxis().set_visible(False)
axs[i].set_xlim([0, 2])
axs[i].axvline(data[i],linestyle='--')
axs[i].get_yaxis().set_visible(False)
axs[7].get_xaxis().set_visible(True)
plt.show()
This looks like:
In order to label the subplots I would like to write label[i] (see code above) to the left of subplot i. How can you do that?
(As a quick fix), you might just be able to use Axes.text, for example:
axs[i].text(-0.1,0.2,label[i])
Adjust the x and y arguments as needed depending on the length of the labels.
As mentioned in the comments, another (much better) option is to keep the y-axis visible, but then set the ticks to nothing:
axs[i].set_yticks(())
axs[i].set_ylabel(label[i], rotation=0, ha='right', va='center')
As I mentioned in the comments, the proper approach would be to not set the y axis off, and remove the ticks.
The trick is to remove the two lines with axs[i].get_yaxis().set_visible(False) and add the following two lines:
axs[i].tick_params(left=False, labelleft=False)
axs[i].set_ylabel(label[i])
Please, consider the following code as a full answer (edited to include bnaecker's suggestion):
import matplotlib.pyplot as plt
plt.close('all')
fig, axs = plt.subplots(8, sharex="col")
label = ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H']
data = [0.6, 0.4, 1.3, 0.8, 0.9, 1.0, 1.6, 0.2]
plt.xlim(0, 2)
for i in range(8):
axs[i].set_xlim([0, 2])
axs[i].tick_params(left=False, labelleft=False)
axs[i].axvline(data[i], linestyle='--')
axs[i].set_ylabel(label[i])
plt.show()
The figure should look like this:

Linestyle graph cannot see x axis labels

I am testing a very simple exercise just plot the code below:
t = pd.Series([1,2,5,1,8], index=['a', 's', 'l', 'f', 'd' ])
t.plot(linestyle = '-', color = 'b', sharex = True)
but I cannot see the letters a, s, l, f, and d.
Any suggestions?
You can go like:
import pandas as pd
from matplotlib import pyplot as plt
t = pd.Series([1,2,5,1,8], index=['a', 's', 'l', 'f', 'd' ])
plt.plot(t.index, t.values,linestyle = '-', color = 'b')
plt.show()
Image is in the following link
Adapted from here

Why is my grid offset in this example?

I am trying to draw a randomly occupied grid with matplotlib. The grid looks offset from the blocks by a random amount:
Here is the code:
import matplotlib.pyplot as plt
import numpy as np
# Make a 10x10 grid...
nrows, ncols = 10,10
# Fill the cells randomly with 0s and 1s
image = np.random.randint(2, size = (nrows, ncols))
# Make grid
vgrid = []
for i in range(nrows + 1):
vgrid.append((i - 0.5, i - 0.5))
vgrid.append((- 0.5, 9.5))
hgrid = []
for i in range(ncols + 1):
hgrid.append((- 0.5, 9.5))
hgrid.append((i - 0.5, i - 0.5))
row_labels = range(nrows)
col_labels = ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'j']
plt.matshow(image, cmap='Greys')
for i in range(11):
plt.plot(hgrid[2 * i], hgrid[2 * i + 1], 'k-')
plt.plot(vgrid[2 * i], vgrid[2 * i + 1], 'k-')
plt.axis([-0.5, 9.5, -0.5, 9.5])
plt.xticks(range(ncols), col_labels)
plt.yticks(range(nrows), row_labels)
plt.show()
The problem seems to happen when I enforce a plot area; this line:
plt.axis([-0.5, 9.5, -0.5, 9.5])
Also, please feel free to suggest a better method. I am new to pyplot.
You can use plt.grid() to plot the axes grid. Unfortunately it won't solve the issue. The misalignment of the grid is a known issue for imshow (a function that is called by matshow).
I suggest to play with the figure size and the linewidth of the grid, until you get something acceptable.
plt.figure(figsize=(5,5));
nrows, ncols = 10,10
image = np.random.randint(2, size = (nrows, ncols))
row_labels = range(nrows)
col_labels = ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'j']
plt.matshow(image, cmap='Greys',fignum=1,interpolation="nearest")
#set x and y ticks and labels
plt.xticks(range(ncols), col_labels)
plt.yticks(range(nrows), row_labels);
#set minor axes in between the labels
ax=plt.gca()
ax.set_xticks([x-0.5 for x in range(1,ncols)],minor=True )
ax.set_yticks([y-0.5 for y in range(1,nrows)],minor=True)
#plot grid on minor axes
plt.grid(which="minor",ls="-",lw=2)
This is known behavior because, by default, matshow() calls imshow() with the argument interpolation="nearest". You should get better results by overriding the argument manually:
plt.matshow(image, cmap='Greys', interpolation="none")

Categories

Resources