I have a 4x3 grid. I have 1 broken horizontal bar plot in the first row followed by 9 scatter plots. The height of the bar plot needs to be 2x height of the scatter plots. I am using gridspec to achieve this. However, it doesn't plot the bar plot completely. See picture below:
The complete bar plot looks like this
I am not sure why is this happening. Any suggestions?
Here's my code:
import numpy as np
from matplotlib import pyplot as plt
from matplotlib import gridspec
#####Importing Data from csv file#####
dataset1 = np.genfromtxt('dataSet1.csv', dtype = float, delimiter = ',', skip_header = 1, names = ['a', 'b', 'c', 'x0'])
dataset2 = np.genfromtxt('dataSet2.csv', dtype = float, delimiter = ',', skip_header = 1, names = ['a', 'b', 'c', 'x0'])
dataset3 = np.genfromtxt('dataSet3.csv', dtype = float, delimiter = ',', skip_header = 1, names = ['a', 'b', 'c', 'x0'])
corr1 = np.corrcoef(dataset1['a'],dataset1['x0'])
corr2 = np.corrcoef(dataset1['b'],dataset1['x0'])
corr3 = np.corrcoef(dataset1['c'],dataset1['x0'])
corr4 = np.corrcoef(dataset2['a'],dataset2['x0'])
corr5 = np.corrcoef(dataset2['b'],dataset2['x0'])
corr6 = np.corrcoef(dataset2['c'],dataset2['x0'])
corr7 = np.corrcoef(dataset3['a'],dataset3['x0'])
corr8 = np.corrcoef(dataset3['b'],dataset3['x0'])
corr9 = np.corrcoef(dataset3['c'],dataset3['x0'])
fig = plt.figure(figsize = (8,8))
gs = gridspec.GridSpec(4, 3, height_ratios=[2,1,1,1])
def tornado1():
np.set_printoptions(precision=4)
variables = ['a1','b1','c1','a2','b2','c2','a3','b3','c3']
base = 0
values = np.array([corr1[0,1],corr2[0,1],corr3[0,1],
corr4[0,1],corr5[0,1],corr6[0,1],
corr7[0,1],corr8[0,1],corr9[0,1]])
variables=zip(*sorted(zip(variables, values),reverse = True, key=lambda x: abs(x[1])))[0]
values = sorted(values,key=abs, reverse=True)
# The y position for each variable
ys = range(len(values))[::-1] # top to bottom
# Plot the bars, one by one
for y, value in zip(ys, values):
high_width = base + value
# Each bar is a "broken" horizontal bar chart
ax1= plt.subplot(gs[1]).broken_barh(
[(base, high_width)],
(y - 0.4, 0.8),
facecolors=['red', 'red'], # Try different colors if you like
edgecolors=['black', 'black'],
linewidth=1,
)
# Draw a vertical line down the middle
plt.axvline(base, color='black')
# Position the x-axis on the top/bottom, hide all the other spines (=axis lines)
axes = plt.gca() # (gca = get current axes)
axes.spines['left'].set_visible(False)
axes.spines['right'].set_visible(False)
axes.spines['top'].set_visible(False)
axes.xaxis.set_ticks_position('bottom')
# Make the y-axis display the variables
plt.yticks(ys, variables)
plt.ylim(-2, len(variables))
plt.draw()
return
def correlation1():
corr1 = np.corrcoef(dataset1['a'],dataset1['x0'])
print corr1[0,1]
corr2 = np.corrcoef(dataset1['b'],dataset1['x0'])
print corr2[0,1]
corr3 = np.corrcoef(dataset1['c'],dataset1['x0'])
print corr3[0,1]
ax2=plt.subplot(gs[3])
ax2.scatter(dataset1['a'],dataset1['x0'],marker = '.')
ax2.set_xlabel('a1')
ax2.set_ylabel('x01')
ax3=plt.subplot(gs[4])
ax3.scatter(dataset1['b'],dataset1['x0'],marker = '.')
ax3.set_xlabel('b1')
#ax3.set_ylabel('x01')
ax4=plt.subplot(gs[5])
ax4.scatter(dataset1['c'],dataset1['x0'],marker = '.')
ax4.set_xlabel('c1')
#ax4.set_ylabel('x01')
ax5=fig.add_subplot(gs[6])
ax5.scatter(dataset2['a'],dataset2['x0'],marker = '.')
ax5.set_xlabel('a2')
ax5.set_ylabel('x02')
ax6=fig.add_subplot(gs[7])
ax6.scatter(dataset2['b'],dataset2['x0'],marker = '.')
ax6.set_xlabel('b2')
#ax6.set_ylabel('x02')
ax7=fig.add_subplot(gs[8])
ax7.scatter(dataset2['c'],dataset2['x0'],marker = '.')
ax7.set_xlabel('c2')
#ax7.set_ylabel('x02')
ax8=plt.subplot(gs[9])
ax8.scatter(dataset3['a'],dataset3['x0'],marker = '.')
ax8.set_xlabel('a3')
ax8.set_ylabel('x03')
ax9=plt.subplot(gs[10])
ax9.scatter(dataset3['b'],dataset3['x0'],marker = '.')
ax9.set_xlabel('b3')
#ax9.set_ylabel('x03')
ax10=plt.subplot(gs[11])
ax10.scatter(dataset3['c'],dataset3['x0'],marker = '.')
ax10.set_xlabel('c3')
#ax10.set_ylabel('x03')
plt.show()
return
tornado1()
correlation1()
plt.tight_layout()
plt.show()
Any help would be highly appreciated :-)
In the block of code:
# Plot the bars, one by one
for y, value in zip(ys, values):
high_width = base + value
# Each bar is a "broken" horizontal bar chart
ax1= plt.subplot(gs[1]).broken_barh(
[(base, high_width)],
(y - 0.4, 0.8),
facecolors=['red', 'red'], # Try different colors if you like
edgecolors=['black', 'black'],
linewidth=1,
)
You're reinitializing gs[1] on each loop so in the end, your plot only contains the last bar. You should try something like this instead:
# Plot the bars, one by one
ax1 = plt.subplot(gs[1])
for y, value in zip(ys, values):
high_width = base + value
# Each bar is a "broken" horizontal bar chart
ax1.broken_barh(
[(base, high_width)],
(y - 0.4, 0.8),
facecolors=['red', 'red'], # Try different colors if you like
edgecolors=['black', 'black'],
linewidth=1,
)
Hope that helps.
Related
I have the following development that I am working on with ElementTree, Pandas and Matplotlib modules in Python:
def extract_name_value(signals_df):
#print(signals_df)
names_list = [name for name in signals_df['Name'].unique()]
num_names_list = len(names_list)
colors = ['b', 'g', 'r', 'c', 'm', 'y', 'k']
# Creation Graphic
fig = plt.figure(figsize=(18, 20))
plt.suptitle(f'File PXML: {rootXML}', fontsize=20, fontweight='bold', color='SteelBlue', position=(0.75, 0.90))
fig.tight_layout()
i = 1
for name in names_list:
# get data
data = signals_df[signals_df["Name"] == name]["Value"]
datax = signals_df["Name"]
# x = [n for n in range(len(data))]
x = [n for n in range(len(datax))]
print(x)
# get color
j = random.randint(0, len(colors) - 1)
# add subplots
ax = plt.subplot(num_names_list, 1, i)
ax.plot(x, data, drawstyle='steps', marker='o', color=colors[j], linewidth=3)
# plt.xticks(None)
ax.set_ylabel(name, fontsize=12, fontweight='bold', color='SteelBlue', rotation=50, labelpad=45)
ax.grid(alpha=0.4)
i += 1
plt.show()
I am getting the following error:
I have been looking for the error and I totally understand that the dimensions of x and y must be equal, but there is the possibility of making a graph where the x-axis is greater than the y-axis? and also the x-axis comes from a variable not related to the y-axis? how would this be?
The x-axis is the count of all the values it has in the Signal element of the xml file: I put it here because of how extensive it is and this value is larger than the y-axis, but how to contemplate the 3 values that I bring from the xml that are Singal Name, Signal Value as y-axis and Count of Signals as x-axis. I really appreciate your comments and help.
IIUC, you are trying to plot several stepped values agains their order of appearance (X-index) in XML file. Then you should plot against original dataframe's X values. I haven't changed your code much for style or such, just fixed a little.
import xml.etree.ElementTree as ET
import pandas as pd
from matplotlib import pyplot as plt
import random
file_xml = 'example_un_child4.xml'
def transfor_data_atri(rootXML):
file_xml = ET.parse(rootXML)
data_XML = [
{"Name": signal.attrib["Name"],
"Value": int(signal.attrib["Value"].split(' ')[0])
} for signal in file_xml.findall(".//Signal")
]
signals_df = pd.DataFrame(data_XML)
extract_name_value(signals_df)
def extract_name_value(signals_df):
#print(signals_df)
names_list = [name for name in signals_df['Name'].unique()]
num_names_list = len(names_list)
colors = ['b', 'g', 'r', 'c', 'm', 'y', 'k']
# Creation Graphic
#fig = plt.figure(figsize=(18, 20), sharex=True)
fig, ax = plt.subplots(nrows=num_names_list, figsize=(10, 15), sharex=True)
plt.suptitle(f'File PXML: {file_xml}', fontsize=20, fontweight='bold', color='SteelBlue', position=(0.75, 0.90))
#fig.tight_layout()
i = 1
for pos, name in enumerate(names_list):
# get data
data = signals_df[signals_df["Name"] == name]["Value"]
datax = signals_df["Name"]
# x = [n for n in range(len(data))]
#x = [n for n in range(len(datax))]
#print(x)
# get color
j = random.randint(0, len(colors) - 1)
# add subplots
#ax[pos] = plt.subplot(num_names_list, 1, i)
ax[pos].plot(data.index, data, drawstyle='steps', marker='o', color=colors[j], linewidth=3)
# plt.xticks(None)
ax[pos].set_ylabel(name, fontsize=12, fontweight='bold', color='SteelBlue', rotation=50, labelpad=45)
ax[pos].grid(alpha=0.4)
i += 1
fig.tight_layout()
plt.show()
transfor_data_atri(file_xml)
I'm totally new at using Python for Power BI (or anything really).
I would like to add the value of the bar/scatter at the end of the line. (the datalabel)
Also to have a version where I could have the label inside of the scatter bubble would be cool.
Anyone who could help out here ?
All help appreciated
# libraries
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
# Create a dataframe
df = pd.DataFrame({'group': dataset.Genre , 'values': dataset.Revenue})
val = list(dataset.SelectedGenre)
# Reorder it following the values:
ordered_df = df.sort_values(by='values')
my_range=range(1,len(df.index)+1)
# Create a color if the group is "B"
my_color=np.where(ordered_df ['group']== val, 'orange', 'skyblue')
my_size=np.where(ordered_df ['group']== val , 150, 150)
# The vertival plot is made using the hline function
# I load the seaborn library only to benefit the nice looking feature
import seaborn as sns
val = ordered_df['values']
plt.hlines(y=my_range, xmin=0, xmax=val, color=my_color, alpha=1 , linewidth=8)
plt.scatter(val, my_range, color=my_color, s=my_size, alpha=1)
# Add title and axis names
plt.yticks(my_range, ordered_df['group'])
plt.title("What about the B group?", loc='left')
plt.xlabel('Value of the variable')
plt.ylabel('Group')
plt.box(False) #Turn of Black bx around visual
plt.show()
Found it myself
import matplotlib.pyplot as plt
import numpy as np
# Data
x = dataset.Revenue
y = dataset.Genre
labels = dataset.Revenue
val = list(dataset.SelectedGenre)
# Create the figure and axes objects
fig, ax = plt.subplots(1, figsize=(10, 6))
fig.suptitle('Example Of Labelled Scatterpoints')
my_color=np.where(y == val, 'orange', 'skyblue')
my_size=np.where( y == val , 2000, 2000)
# Plot the scatter points
ax.scatter(x, y,
color= my_color, # Color of the dots
s=1000, # Size of the dots
alpha=1, # Alpha of the dots
linewidths=1) # Size of edge around the dots
ax.hlines(y, xmin=0, xmax=x, color= my_color, alpha=1 , linewidth=8)
def human_format(num):
magnitude = 0
while abs(num) >= 1000:
magnitude += 1
num /= 1000
# add more suffixes if you need them
return '%.0f%s' % (round(num), ['', 'K', 'M', 'G', 'T', 'P'][magnitude])
# Add the participant names as text labels for each point
for x_pos, y_pos, label in zip(x, y, labels):
ax.annotate(
human_format(label), # The label for this point
xy=(x_pos, y_pos), # Position of the corresponding point
xytext=(-8, 0), # Offset text by 7 points to the right
textcoords='offset points', # tell it to use offset points
ha='left', # Horizontally aligned to the left
va='center',
color = 'white') # Vertical alignment is centered
plt.box(False) #Turn of Black bx around visual
# Show the plot
plt.show()
I am trying to create a figure that is a dendrogram on top of a scatterplot, where the ends of the leaves on the dendrogram match up with the dots on the scatterplot, which in turn match up with the tick labels below. I have this working, but for some reason the tick labels appear twice. The labels in red and green are the ones I'm trying to keep.
This is my code:
import pandas as pd
from matplotlib import pyplot as plt
import scipy.cluster.hierarchy as sch
import numpy as np
import json
import random
def scatter_and_dendrogram(df, colors,wn='',label_x=False):
'''Args:
df (Pandas DataFrame): similarity matrix
colors (list of strs): list of colors
wn (str): window name
label_x=False(Bool): whether or not to label x axis
Returns: None
'''
norm = plt.Normalize(1,4)
dist_matrix = [] #linkage
for i in range(len(df)):
arr = []
for j in range(1,len(df.iloc[i])):
arr.append(df.iloc[i,j])
dist_matrix.append(list(arr))
X = np.asarray(dist_matrix)
Z = sch.linkage(X, 'ward')
sch.set_link_color_palette(['b'])
fig = plt.figure()
fig, axs = plt.subplots(2, 1, sharex='col', sharey='row',
gridspec_kw={'width_ratios': [1],
'height_ratios': [30, 1],
'hspace': 0, 'wspace': 0})
(ax1, ax2) = axs
dendrogram = sch.dendrogram(Z=Z, p=3,ax=ax1)
icoords = dendrogram['icoord']
dcoords = dendrogram['dcoord']
lst = [[],[],colors]
for i in range(len(icoords)):
ic = icoords[i]
dc = dcoords[i]
if dc.count(0) == 2:
lst[0].append(ic[0])
lst[0].append(ic[-1])
elif dc.count(0) == 1:
ind = dc.index(0)
lst[0].append(ic[ind])
lst[1] = [-0.1]*len(lst[0])
ax2.scatter(lst[0],lst[1],s=10,norm=norm, alpha=0.7)
fig.canvas.set_window_title(wn)
ax1.set_yticklabels([])
ax1.set_xticklabels([])
ax2.set_yticklabels([])
ax2.set_xticklabels([])
if label_x:
letters = list('ABCD')
labels = [letters[ind] for ind in dendrogram['leaves']]
c1 = '#ff0033' #red
c2 = '#006600'#green
xlbls = ax2.set_xticklabels(labels,fontsize=11,linespacing=3)
for lbl in xlbls:
t = lbl.get_text()
c = c2
if letters.index(t) < 2:
c = c1
print(c)
lbl.set_color(c)
ax1.set_title(wn)
ax1.set_ylabel('Aggregation Criterion',fontsize=15)
ax2.set_xlabel('Articles', fontsize=15)
plt.show()
l = ['A','B','C','D']
df = pd.DataFrame(index=l, columns=l)
for i in range(len(l)-1):
for j in range(i+1, len(l)):
r = random.randint(0, 10)
df.iloc[i,j] = r
df.iloc[j, i] = r
df.fillna(0,inplace=True)
print(df)
wn = 'Set C'
scatter_and_dendrogram(df, l, wn,True)
This is what it looks like:
According to matplotlib.pyplot.subplots about sharex and sharey
When subplots have a shared x-axis along a column, only the x tick
labels of the bottom subplot are created.
Similarly, when subplots have a shared y-axis along a row, only the y tick labels of the first column subplot are created.
To later turn other subplots' ticklabels on, use tick_params.
You need to add ax1.tick_params(axis='x', labelbottom=False) under xlbls = ax2.set_xticklabels.
Besides, if fig = plt.figure() is useless, remove it.
I have a data file including 3 columns. The first 2 columns represent coordinates, the third one is a string value like 'foo', 'bar' or 'ter'.
I would like to display with python's matplotlib based on this label, different marker and color. Example:
foo => red circle
bar => green triangle
ter => black square
What I did till now is:
import numpy as np
import matplotlib.pyplot as plt
coordData = np.genfromtxt("mydata.csv", usecols=(0,1), delimiter=",", dtype=None)
coordLabels = np.genfromtxt("mydata.csv", usecols=2, delimiter=",", dtype=None)
fig = plt.figure()
ax = fig.add_subplot(111)
ax.scatter(coordData[:, 0], coordData[:, 1], c="r", marker="o")
plt.show()
How can I switch marker and color based on the coordLabels values?
SOLUTION
Based on the suggestion I made some changes:
coordData = np.genfromtxt("mydata.csv", usecols=(0, 1), delimiter=",", dtype=None)
coordLabels = np.genfromtxt("mydata.csv", usecols=2, delimiter=",", dtype=None)
fig = plt.figure()
ax = fig.add_subplot(111)
uniqueVals = np.unique(coordLabels)
markers = ['^', 'o', '*']
colors = { '^' : 'r',
'o' : 'b',
'*' : 'g'}
for marker, val in zip(markers, uniqueVals):
toUse = coordLabels == val
ax.scatter(coordData[toUse,0], coordData[toUse,1], c = colors[marker], marker=marker)
plt.show()
If you want the color to be dependent upon the label in coordLabels, you want to set the color equal to that variable instead of 'r' like you have.
ax.scatter(coordData[:, 0], coordData[:, 1], c=coordLabels, marker="o")
If you want different markers for each of the plots, you will need to create multiple scatter plots (one for each value in coordLabels
uniqueVals = ['foo', 'bar', 'ter']
# Create your own list of markers here (needs to be the same size as `uniqueVals`)
markers = ['o', '^', 's']
colors = ['r', 'g', 'b']
for color, marker, val in zip(colors, markers, uniqueVals):
toUse = coordLabels == val
ax.scatter(coordData[toUse,0], coordData[toUse,1], c=color, marker=marker)
What I want is like this:
What I get is this:
So how to merge the markers into one label?
also for the lines, for the lines, of course, u can realize it by not assigning label to the second line while using the same linetype, but for the markers, you can not, since they are of different shapes.
Note that in recent versions of matplotlib you can achieve this using class matplotlib.legend_handler.HandlerTuple as illustrated in this answer and also in this guide:
import matplotlib.pyplot as plt
from matplotlib.legend_handler import HandlerTuple
fig, ax1 = plt.subplots(1, 1)
# First plot: two legend keys for a single entry
p2, = ax1.plot([3, 4], [2, 3], 'o', mfc="white", mec="k")
p1, = ax1.plot([1, 2], [5, 6], 's', mfc="gray", mec="gray")
# `plot` returns a list, but we want the handle - thus the comma on the left
p3, = ax1.plot([1, 5], [4, 4], "-k")
p4, = ax1.plot([2, 6], [3, 2], "-k")
# Assign two of the handles to the same legend entry by putting them in a tuple
# and using a generic handler map (which would be used for any additional
# tuples of handles like (p1, p3)).
l = ax1.legend([(p1, p2), p3], ['data', 'models'],
handler_map={tuple: HandlerTuple(ndivide=None)})
plt.savefig("demo.png")
I think it's best to use a full legend - otherwise, how will your readers know the difference between the two models, or the two datasets? I would do it this way:
But, if you really want to do it your way, you can use a custom legend as shown in this guide. You'll need to create your own class, like they do, that defines the legend_artist method, which then adds squares and circles as appropriate. Here is the plot generated and the code used to generate it:
#!/usr/bin/env python
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
import numpy as np
# ==================================
# Define the form of the function
# ==================================
def model(x, A=190, k=1):
return A * np.exp(-k*x/50)
# ==================================
# How many data points are generated
# ==================================
num_samples = 15
# ==================================
# Create data for plots
# ==================================
x_model = np.linspace(0, 130, 200)
x_data1 = np.random.rand(num_samples) * 130
x_data1.sort()
x_data2 = np.random.rand(num_samples) * 130
x_data2.sort()
data1 = model(x_data1, k=1) * (1 + np.random.randn(num_samples) * 0.2)
data2 = model(x_data2, k=2) * (1 + np.random.randn(num_samples) * 0.15)
model1 = model(x_model, k=1)
model2 = model(x_model, k=2)
# ==================================
# Plot everything normally
# ==================================
fig = plt.figure()
ax = fig.add_subplot('111')
ax.plot(x_data1, data1, 'ok', markerfacecolor='none', label='Data (k=1)')
ax.plot(x_data2, data2, 'sk', markeredgecolor='0.5', markerfacecolor='0.5', label='Data (k=2)')
ax.plot(x_model, model1, '-k', label='Model (k=1)')
ax.plot(x_model, model2, '--k', label='Model (k=2)')
# ==================================
# Format plot
# ==================================
ax.set_xlabel('Distance from heated face($10^{-2}$ m)')
ax.set_ylabel('Temperature ($^\circ$C)')
ax.set_xlim((0, 130))
ax.set_title('Normal way to plot')
ax.legend()
fig.tight_layout()
plt.show()
# ==================================
# ==================================
# Do it again, but with custom
# legend
# ==================================
# ==================================
class AnyObject(object):
pass
class data_handler(object):
def legend_artist(self, legend, orig_handle, fontsize, handlebox):
scale = fontsize / 22
x0, y0 = handlebox.xdescent, handlebox.ydescent
width, height = handlebox.width, handlebox.height
patch_sq = mpatches.Rectangle([x0, y0 + height/2 * (1 - scale) ], height * scale, height * scale, facecolor='0.5',
edgecolor='0.5', transform=handlebox.get_transform())
patch_circ = mpatches.Circle([x0 + width - height/2, y0 + height/2], height/2 * scale, facecolor='none',
edgecolor='black', transform=handlebox.get_transform())
handlebox.add_artist(patch_sq)
handlebox.add_artist(patch_circ)
return patch_sq
# ==================================
# Plot everything
# ==================================
fig = plt.figure()
ax = fig.add_subplot('111')
d1 = ax.plot(x_data1, data1, 'ok', markerfacecolor='none', label='Data (k=2)')
d2 = ax.plot(x_data2, data2, 'sk', markeredgecolor='0.5', markerfacecolor='0.5', label='Data (k=1)')
m1 = ax.plot(x_model, model1, '-k', label='Model (k=1)')
m2 = ax.plot(x_model, model2, '-k', label='Model (k=2)')
# ax.legend([d1], handler_map={ax.plot: data_handler()})
ax.legend([AnyObject(), m1[0]], ['Data', 'Model'], handler_map={AnyObject: data_handler()})
# ==================================
# Format plot
# ==================================
ax.set_xlabel('Distance from heated face($10^{-2}$ m)')
ax.set_ylabel('Temperature ($^\circ$C)')
ax.set_xlim((0, 130))
ax.set_title('Custom legend')
fig.tight_layout()
plt.show()
I also found this link very useful (code below), it's an easier way to handle this issue. It's basically using a list of legend handles to make one of the markers of the first handle invisible and overplot it with the marker of the second handle. This way, you have both markers next to each other with one label.
fig, ax = plt.subplots()
p1 = ax.scatter([0.1],[0.5],c='r',marker='s')
p2 = ax.scatter([0.3],[0.2],c='b',marker='o')
l = ax.legend([(p1,p2)],['points'],scatterpoints=2)
With the above code, a TupleHandler is used to create legend handles which
simply overplot two handles (there are red squares behind the blue
circles if you look carefylly. What you want to do is make the second
marker of first handle and the first marker of the second handle
invisible. Unfortunately, the TupleHandler is a rather recent addition
and you need a special function to get all the handles. Otherwise, you
can use the Legend.legendHandles attribute (it only show the first
handle for the TupleHandler).
def get_handle_lists(l):
"""returns a list of lists of handles.
"""
tree = l._legend_box.get_children()[1]
for column in tree.get_children():
for row in column.get_children():
yield row.get_children()[0].get_children()
handles_list = list(get_handle_lists(l))
handles = handles_list[0] # handles is a list of two PathCollection.
# The first one is for red squares, and the second
# is for blue circles.
handles[0].set_facecolors(["r", "none"]) # for the fist
# PathCollection, make the
# second marker invisible by
# setting their facecolor and
# edgecolor to "none."
handles[0].set_edgecolors(["k", "none"])
handles[1].set_facecolors(["none", "b"])
handles[1].set_edgecolors(["none", "k"])
fig
Here is a new solution that will plot any collection of markers with the same label. I have not figured out how to make it work with markers from a line plot, but you can probably do a scatter plot on top of a line plot if you need to.
from matplotlib import pyplot as plt
import matplotlib.collections as mcol
import matplotlib.transforms as mtransforms
import numpy as np
from matplotlib.legend_handler import HandlerPathCollection
from matplotlib import cm
class HandlerMultiPathCollection(HandlerPathCollection):
"""
Handler for PathCollections, which are used by scatter
"""
def create_collection(self, orig_handle, sizes, offsets, transOffset):
p = type(orig_handle)(orig_handle.get_paths(), sizes=sizes,
offsets=offsets,
transOffset=transOffset,
)
return p
fig, ax = plt.subplots()
#make some data to plot
x = np.arange(0, 100, 10)
models = [.05 * x, 8 * np.exp(- .1 * x), np.log(x + 1), .01 * x]
tests = [model + np.random.rand(len(model)) - .5 for model in models]
#make colors and markers
colors = cm.brg(np.linspace(0, 1, len(models)))
markers = ['o', 'D', '*', 's']
markersize = 50
plots = []
#plot points and lines
for i in xrange(len(models)):
line, = plt.plot(x, models[i], linestyle = 'dashed', color = 'black', label = 'Model')
plot = plt.scatter(x, tests[i], c = colors[i], s = markersize, marker = markers[i])
plots.append(plot)
#get attributes
paths = []
sizes = []
facecolors = []
edgecolors = []
for plot in plots:
paths.append(plot.get_paths()[0])
sizes.append(plot.get_sizes()[0])
edgecolors.append(plot.get_edgecolors()[0])
facecolors.append(plot.get_facecolors()[0])
#make proxy artist out of a collection of markers
PC = mcol.PathCollection(paths, sizes, transOffset = ax.transData, facecolors = colors, edgecolors = edgecolors)
PC.set_transform(mtransforms.IdentityTransform())
plt.legend([PC, line], ['Test', 'Model'], handler_map = {type(PC) : HandlerMultiPathCollection()}, scatterpoints = len(paths), scatteryoffsets = [.5], handlelength = len(paths))
plt.show()
I have a solution for you if you're willing to use all circles for markers and differentiate by color only. You can use a circle collection to represent the markers, and then have a legend label for the collection as a whole.
Example code:
import matplotlib.pyplot as plt
import matplotlib.collections as collections
from matplotlib import cm
import numpy as np
#make some data to plot
x = np.arange(0, 100, 10)
models = [.05 * x, 8 * np.exp(- .1 * x), np.log(x + 1), .01 * x]
tests = [model + np.random.rand(len(model)) - .5 for model in models]
#make colors
colors = cm.brg(np.linspace(0, 1, len(models)))
markersize = 50
#plot points and lines
for i in xrange(len(models)):
line, = plt.plot(x, models[i], linestyle = 'dashed', color = 'black', label = 'Model')
plt.scatter(x, tests[i], c = colors[i], s = markersize)
#create collection of circles corresponding to markers
circles = collections.CircleCollection([markersize] * len(models), facecolor = colors)
#make the legend -- scatterpoints needs to be the same as the number
#of markers so that all the markers show up in the legend
plt.legend([circles, line], ['Test', 'Model'], scatterpoints = len(models), scatteryoffsets = [.5], handlelength = len(models))
plt.show()
You can do this by plotting data without any label and then adding the label separately:
from matplotlib import pyplot as plt
from numpy import random
xs = range(10)
data = random.rand(10, 2)
fig = plt.figure()
ax = fig.add_subplot(1, 1, 1)
kwargs = {'color': 'r', 'linewidth': 2, 'linestyle': '--'}
ax.plot(xs, data, **kwargs)
ax.plot([], [], label='Model', **kwargs)
ax.legend()
plt.show()