Tooltips using mpldatacursor in matplotlib - python

I have been pulling my hair out for a while over this. I am trying to use mpldatacursor along with matplotlib to provide a tooltip functionality on scatter plots. Each point has some data associated with it which I would like to show when the point is clicked.
Here is a minimal (not) working example:
import numpy as np
import mpldatacursor
import string
import matplotlib
matplotlib.use('Qt5Agg')
from matplotlib import pyplot as mpl
nations = ['Russia', 'America', 'China', 'France']
data = list()
idx = list()
np.random.seed(42) #Seed for repeatability
# Random data
for (id, nation) in enumerate(nations):
for i in range(0,10):
data.append((id+1)*np.random.random((2,1)))
name = list(string.ascii_uppercase[20:])
np.random.shuffle(name)
idx.append(nation + '-' + ''.join(name))
mpl.figure()
data = np.squeeze(np.asarray(data))
m, n = 0, 9
# Plot by group
for (id,nation) in enumerate(nations):
mpl.scatter(data[m:n,0] , data[m:n,1] , label=nation)
m = n + 1
n += 10
formatter = lambda **kwargs: ', '.join(kwargs['point_label'])
mpl.legend()
mpldatacursor.datacursor(formatter=formatter, point_labels=idx)
mpl.show(block=True)
But when I do this, the tooltips don't match the legends. Further only labels starting with Russia and USA show up in the plot. What am I doing wrong?

Usually you would have your data in a table or, for the sake of the example, several lists. One would hence probably create a single scatter plot from the data columns and use a mapping of names to numbers to create the colors in the scatter.
Then one can use the matplotlib pick_event to get the data out of the respective list, given the index of the point on which the click happened.
This all does not require any external packages like datacursor.
import numpy as np; np.random.seed(42)
import string
from matplotlib import pyplot as plt
nations = ['Russia', 'America', 'China', 'France']
#Create lists data, nat, idx
nat = np.random.choice(nations, 50)
data = np.random.rand(50,2)
strings = ["".join(np.random.choice(list(string.ascii_uppercase), 7)) for _ in range(50)]
idx = ["{}-{}".format(n,w) for n,w in zip(nat,strings)]
labels, i = np.unique(nat, return_inverse=True)
fig, ax = plt.subplots()
scatter = ax.scatter(data[:,0], data[:,1], c=i, cmap="RdYlGn", picker=5)
rect = lambda c: plt.Rectangle((0,0),1,1, color=scatter.cmap(scatter.norm(c)))
handles = [rect(c) for c in np.unique(i)]
plt.legend(handles, labels)
#Create annotation
annot = ax.annotate("", xy=(0,0), xytext=(-20,20),textcoords="offset points",
bbox=dict(boxstyle="round", fc="w"),
arrowprops=dict(arrowstyle="->"))
annot.set_visible(False)
#Create event handler
def onpick(evt):
if evt.artist == scatter:
ind = evt.ind[0]
annot.xy = (data[ind])
annot.set_text(idx[ind])
annot.set_visible(True)
if evt.mouseevent.button == 3:
annot.set_visible(False)
fig.canvas.draw_idle()
fig.canvas.mpl_connect("pick_event", onpick)
plt.show()

The issue was that each call to scatter by matplotlib was creating a new artist object. The workaround is based on the doc-string in the source code.
point_labels : sequence or dict, optional
Labels for "subitems" of an artist, passed to the formatter
function as the point_label kwarg. May be either a single
sequence (used for all artists) or a dict of artist:sequence pairs.
It does involve the import of a protected matplotlib module/member. This seems to work as I want:
import numpy as np
import mpldatacursor
import string
import matplotlib
from matplotlib import _pylab_helpers as pylab_helpers
matplotlib.use('Qt5Agg')
from matplotlib import pyplot as mpl
nations = ['Russia', 'America', 'China', 'France']
data = list()
idx = list()
np.random.seed(42)
for (index, nation) in enumerate(nations):
for i in range(0,10):
data.append((index + 1) * np.random.random((2, 1)))
name = list(string.ascii_uppercase[20:])
np.random.shuffle(name)
idx.append(nation + '-' + ''.join(name))
data = np.squeeze(np.asarray(data))
m, n = 0, 9
artist_labels = list()
mpl.figure()
for (index, nation) in enumerate(nations):
mpl.scatter(data[m:n,0] , data[m:n,1] ,label=nation)
artist_labels.append(idx[m:n])
m = n + 1
n += 10
def plotted_artists(ax):
all_artists = (ax.lines + ax.patches + ax.collections
+ ax.images + ax.containers)
return all_artists
def formatter (**kwargs):
return kwargs['point_label'].pop()
managers = pylab_helpers.Gcf.get_all_fig_managers()
figs = [manager.canvas.figure for manager in managers]
axes = [ax for fig in figs for ax in fig.axes]
artists = [artist for ax in axes for artist in plotted_artists(ax)]
my_dict = dict(zip(artists, artist_labels))
mpldatacursor.datacursor(formatter=formatter, point_labels=my_dict)
mpl.legend()
mpl.show(block=True)

Assuming you simply want names, this seems to work correctly if you change the mpldatacursor.datacursor call to use '{label}' as in the first example on the mpldatacursor website,
mpldatacursor.datacursor(formatter='{label}'.format)
I think the problem is with kwargs and the lambda function. If you want further data in your tooltip, it may be best to add this to the label on plt.scatter, using a separate call for each point, e.g.
import numpy as np
import mpldatacursor
import string
import matplotlib
matplotlib.use('Qt5Agg')
from matplotlib import pyplot as plt
nations = ['Russia', 'America', 'China', 'France']
cDict = {'Russia':'r', 'America':'b', 'China':'g', 'France':'c'}
np.random.seed(42) #Seed for repeatability
# Random data
for (id, nation) in enumerate(nations):
for i in range(0,10):
x = (id+1)*np.random.random((2,1))
name = list(string.ascii_uppercase[20:])
np.random.shuffle(name)
plt.scatter(x[0], x[1], c=cDict[nation], label=nation + '-' + ''.join(name))
mpldatacursor.datacursor(formatter='{label}'.format)
plt.show(block=True)

Related

Choosing right data to encircle

I'm relatively new to python/mongodb and working on a project but ran into an issue.
I've been given a data set based on Game of Thrones and wanted to see the relationship between Killed Count, How many people they have been killed by on a scatter plot and encircling the data points that have to do with royals. I've created the columns counting the killed/killedBy columns, and have created an encircle data set that only has the rows that have royal=1, but am not sure how to write the encircle() line in order to encircle all the data. Here is the code that i've written below (apologies as I know its probably very inefficient).
from pymongo import MongoClient
import pandas as pd
from mpl_toolkits.mplot3d import axes3d
import matplotlib.pyplot as plt
from matplotlib import style
import numpy as np
from scipy.spatial import ConvexHull
client=MongoClient()
db=client.GoT
characters = db.characters
M = characters.find()
CDB = {}
for m in M:
CDB[m["characterName"]] = m
CDBdf = pd.DataFrame(CDB)
CDBdf = CDBdf.T
CDBdf["RoyalExists"] = ""
CDBdf["KilledByExists"] = ""
CDBdf["KilledExists"] = ""
for index, row in CDBdf.iterrows():
if (pd.isna(row['royal'])==False):
row['RoyalExists']=1
else:
row['RoyalExists']=0
for index, row in CDBdf.iterrows():
if (np.any(pd.isna(row['killedBy'])== False)==False):
row['KilledByExists']=0
else:
row['KilledByExists']= len(row['killedBy'])
for index, row in CDBdf.iterrows():
if (np.any(pd.isna(row['killed'])== False)==False):
row['KilledExists']=0
else:
row['KilledExists']=len(row['killed'])
x= CDBdf["KilledExists"]
y= CDBdf["KilledByExists"]
fig = plt.figure(figsize=(16, 10), dpi= 80, facecolor='w', edgecolor='k')
plt.scatter(x, y)
def encircle(x,y, ax=None, **kw):
if not ax: ax=plt.gca()
p = np.c_[x,y]
hull = ConvexHull(p)
poly = plt.Polygon(p[hull.vertices,:], **kw)
ax.add_patch(poly)
encircle_data = CDBdf.loc[CDBdf["RoyalExists"]==1]
encircle(encircle_data.CDBdf["KilledExists"],encircle_data.CDBdf["KilledByExists"], ec="k", fc="gold", alpha=0.1)
plt.show()
encircle_data
Was just hoping for some clarification on how to write the line "encircle(encircle_data.CDBdf["KilledExists"],encircle_data.CDBdf["KilledByExists"], ec="k", fc="gold", alpha=0.1)" so that it circles all of the royals data points.

Add patch to Seaborn jointploint

I have created a patch and want to apply it to a jointplot in Seaborn. When I go to try to apply the patch, it either splits the plots into two graphics or, if I change the kind attribute in the jointplot function from kde to anything else, it throws an error inner got multiple values for keyword argument 'ax'.
When I try to apply this solution, the variable fg does not have the attribute axes and it does not work.
In the code below, if I use kind = "scatter" and omit the ax, I get a blank output then the jointpolot. If I use kind = "scatter" and add ax = ax, I get the above mentioned error. If I use kind = "kde" and ax = ax, I get the following images:
My code:
import descartes
import fiona
import matplotlib.pyplot as plt
import seaborn as sns
from shapely.geometry import shape
import pandas as pd
import time
#
start_time = time.time()
input_csv = r"C:\path\to\a\csv\with\coordinates.csv"
shapefile = r"C:\path\to\a\fun\shapefile.shp"
df = pd.read_csv(input_csv, delimiter = ",")
df = df[df["Latitude"] > 37.70833]
lat = "Latitude"
lon = "Longitude"
fig = plt.figure()
ax = fig.add_subplot(111, frameon = False)
shp = fiona.open(shapefile)
pol = shp.next()
geom = shape(pol["geometry"])
un_sf = geom.envelope.symmetric_difference(geom)
un_sf_patch = descartes.PolygonPatch(un_sf)
ax.add_patch( un_sf_patch )
my_fig = sns.jointplot(x = lon, y = lat, data = df, color = "grey", kind = "scatter")
end_time = round(time.time() - start_time, 5)
print "Seconds elapsed: {0}".format(end_time)
How can I add the patch to my Seaborn jointplot in a single graphic?
A seaborn jointplot creates its own figure, together with 3 axes.
g = sns.jointgrid(..)
g.ax_joint # big axes in the middle
g.ax_marg_y # marginal axes
g.ax_marg_x
Here you want to add your patch to the ax_joint.
g = sns.jointgrid(..)
g.ax_joint.add_patch( un_sf_patch )

Add meaningful minor ticks to a modified axis?

This example is specifically relating to plotting data as a function of log(redshift+1) and having a reference redshift axis but can be easily generalised to any functional modification.
I've written a neat little function (with the help of some question/answers on here) that allows me to easily add a redshift axis to the top of a log(1+redshift) plot. I am really struggling to get meaningful minor ticks (and would rather not share my dismal efforts!).
Here is the code, including example plot:
In this case, I would like redshifts at every 0.1 increment not occupied by a major tick, with the flexibility of changing that 0.1 in the function call.
import matplotlib.pyplot as plt
import numpy as np
def add_zaxis(axis,denomination):
oldx = axis.get_xlim()
axis.set_xlim(0., None)
zspan = [(10**x)-1 for x in axis.get_xlim()]
denom = denomination
zmax = int(np.floor(zspan[1]/denom))*denom
zspan[1] = zmax
k = len(np.arange(zspan[0],zspan[1],denom))+1
zs = np.linspace(zspan[0],zspan[1],k)
z_ticks = [np.log10(1+x) for x in zs]
axz = axis.twiny()
axz.set_xticks(z_ticks)
axz.set_xticklabels(['{:g}'.format(y) for y in zs])
axz.set_xlim(oldx)
axis.set_xlim(oldx)
return axz
data = np.random.randn(500)
data = data[data>0.]
fig, ax = plt.subplots(1)
plt.hist(np.log10(data+1), bins=22)
ax.set_xlabel('log(z+1)')
ax.minorticks_on()
axz = add_zaxis(ax,.3)
axz.set_xlabel('z')
axz.minorticks_on()
The idea would be to use a FixedLocator to position the ticks on the axis. You may then have one FixedLocator for the major ticks and one for the minor ticks.
import matplotlib.pyplot as plt
import matplotlib.ticker
import numpy as np
def add_zaxis(ax,d=0.3, dminor=0.1):
f = lambda x: np.log10(x+1)
invf = lambda x: 10.0**x - 1.
xlim = ax.get_xlim()
zlim = [invf(x) for x in xlim]
axz = ax.twiny()
axz.set_xlim(xlim)
zs = np.arange(0,zlim[1],d)
zpos = f(zs)
axz.xaxis.set_major_locator(matplotlib.ticker.FixedLocator(zpos))
axz.xaxis.set_major_formatter(matplotlib.ticker.FixedFormatter(zs))
zsminor = np.arange(0,zlim[1],dminor)
zposminor = f(zsminor)
axz.xaxis.set_minor_locator(matplotlib.ticker.FixedLocator(zposminor))
axz.tick_params(axis='x',which='minor',bottom='off', top="on")
axz.set_xlabel('z')
data = np.random.randn(400)
data = data[data>0.]
fig, ax = plt.subplots(1)
plt.hist(np.log10(data+1), bins=22)
ax.set_xlabel('log(z+1)')
add_zaxis(ax)
ax.minorticks_on()
ax.tick_params(axis='x',which='minor',bottom='on', top="off")
plt.show()

How to assign a plot to a variable and use the variable as the return value in a Python function

I am creating two Python scripts to produce some plots for a technical report. In the first script I am defining functions that produce plots from raw data on my hard-disk. Each function produces one specific kind of plot that I need. The second script is more like a batch file which is supposed to loop around those functions and store the produced plots on my hard-disk.
What I need is a way to return a plot in Python. So basically I want to do this:
fig = some_function_that_returns_a_plot(args)
fig.savefig('plot_name')
But what I do not know is how to make a plot a variable that I can return. Is this possible? Is so, how?
You can define your plotting functions like
import numpy as np
import matplotlib.pyplot as plt
# an example graph type
def fig_barh(ylabels, xvalues, title=''):
# create a new figure
fig = plt.figure()
# plot to it
yvalues = 0.1 + np.arange(len(ylabels))
plt.barh(yvalues, xvalues, figure=fig)
yvalues += 0.4
plt.yticks(yvalues, ylabels, figure=fig)
if title:
plt.title(title, figure=fig)
# return it
return fig
then use them like
from matplotlib.backends.backend_pdf import PdfPages
def write_pdf(fname, figures):
doc = PdfPages(fname)
for fig in figures:
fig.savefig(doc, format='pdf')
doc.close()
def main():
a = fig_barh(['a','b','c'], [1, 2, 3], 'Test #1')
b = fig_barh(['x','y','z'], [5, 3, 1], 'Test #2')
write_pdf('test.pdf', [a, b])
if __name__=="__main__":
main()
If you don't want the picture to be displayed and only get a variable in return, then you can try the following (with some additional stuff to remove axis):
def myplot(t,x):
fig = Figure(figsize=(2,1), dpi=80)
canvas = FigureCanvasAgg(fig)
ax = fig.add_subplot()
ax.fill_between(t,x)
ax.autoscale(tight=True)
ax.axis('off')
canvas.draw()
buf = canvas.buffer_rgba()
X = np.asarray(buf)
return X
The returned variable X can be used with OpenCV for example and do a
cv2.imshow('',X)
These import must be included:
from matplotlib.figure import Figure
from matplotlib.backends.backend_agg import FigureCanvasAgg
The currently accepted answer didn't work for me as such, as I was using scipy.stats.probplot() to plot. I used matplotlib.pyplot.gca() to access an Axes instance directly instead:
"""
For my plotting ideas, see:
https://pythonfordatascience.org/independent-t-test-python/
For the dataset, see:
https://github.com/Opensourcefordatascience/Data-sets
"""
# Import modules.
from scipy import stats
import matplotlib.pyplot as plt
import pandas as pd
from tempfile import gettempdir
from os import path
from slugify import slugify
# Define plot func.
def get_plots(df):
# plt.figure(): Create a new P-P plot. If we're inside a loop, and want
# a new plot for every iteration, this is important!
plt.figure()
stats.probplot(diff, plot=plt)
plt.title('Sepal Width P-P Plot')
pp_p = plt.gca() # Assign an Axes instance of the plot.
# Plot histogram. This uses pandas.DataFrame.plot(), which returns
# an instance of the Axes directly.
hist_p = df.plot(kind = 'hist', title = 'Sepal Width Histogram Plot',
figure=plt.figure()) # Create a new plot again.
return pp_p, hist_p
# Import raw data.
df = pd.read_csv('https://raw.githubusercontent.com/'
'Opensourcefordatascience/Data-sets/master//Iris_Data.csv')
# Subset the dataset.
setosa = df[(df['species'] == 'Iris-setosa')]
setosa.reset_index(inplace= True)
versicolor = df[(df['species'] == 'Iris-versicolor')]
versicolor.reset_index(inplace= True)
# Calculate a variable for analysis.
diff = setosa['sepal_width'] - versicolor['sepal_width']
# Create plots, save each of them to a temp file, and show them afterwards.
# As they're just Axes instances, we need to call get_figure() at first.
for plot in get_plots(diff):
outfn = path.join(gettempdir(), slugify(plot.title.get_text()) + '.png')
print('Saving a plot to "' + outfn + '".')
plot.get_figure().savefig(outfn)
plot.get_figure().show()

Discrete colorbar in matplotlib [duplicate]

How does one set the color of a line in matplotlib with scalar values provided at run time using a colormap (say jet)? I tried a couple of different approaches here and I think I'm stumped. values[] is a storted array of scalars. curves are a set of 1-d arrays, and labels are an array of text strings. Each of the arrays have the same length.
fig = plt.figure()
ax = fig.add_subplot(111)
jet = colors.Colormap('jet')
cNorm = colors.Normalize(vmin=0, vmax=values[-1])
scalarMap = cmx.ScalarMappable(norm=cNorm, cmap=jet)
lines = []
for idx in range(len(curves)):
line = curves[idx]
colorVal = scalarMap.to_rgba(values[idx])
retLine, = ax.plot(line, color=colorVal)
#retLine.set_color()
lines.append(retLine)
ax.legend(lines, labels, loc='upper right')
ax.grid()
plt.show()
The error you are receiving is due to how you define jet. You are creating the base class Colormap with the name 'jet', but this is very different from getting the default definition of the 'jet' colormap. This base class should never be created directly, and only the subclasses should be instantiated.
What you've found with your example is a buggy behavior in Matplotlib. There should be a clearer error message generated when this code is run.
This is an updated version of your example:
import matplotlib.pyplot as plt
import matplotlib.colors as colors
import matplotlib.cm as cmx
import numpy as np
# define some random data that emulates your indeded code:
NCURVES = 10
np.random.seed(101)
curves = [np.random.random(20) for i in range(NCURVES)]
values = range(NCURVES)
fig = plt.figure()
ax = fig.add_subplot(111)
# replace the next line
#jet = colors.Colormap('jet')
# with
jet = cm = plt.get_cmap('jet')
cNorm = colors.Normalize(vmin=0, vmax=values[-1])
scalarMap = cmx.ScalarMappable(norm=cNorm, cmap=jet)
print scalarMap.get_clim()
lines = []
for idx in range(len(curves)):
line = curves[idx]
colorVal = scalarMap.to_rgba(values[idx])
colorText = (
'color: (%4.2f,%4.2f,%4.2f)'%(colorVal[0],colorVal[1],colorVal[2])
)
retLine, = ax.plot(line,
color=colorVal,
label=colorText)
lines.append(retLine)
#added this to get the legend to work
handles,labels = ax.get_legend_handles_labels()
ax.legend(handles, labels, loc='upper right')
ax.grid()
plt.show()
Resulting in:
Using a ScalarMappable is an improvement over the approach presented in my related answer:
creating over 20 unique legend colors using matplotlib
I thought it would be beneficial to include what I consider to be a more simple method using numpy's linspace coupled with matplotlib's cm-type object. It's possible that the above solution is for an older version. I am using the python 3.4.3, matplotlib 1.4.3, and numpy 1.9.3., and my solution is as follows.
import matplotlib.pyplot as plt
from matplotlib import cm
from numpy import linspace
start = 0.0
stop = 1.0
number_of_lines= 1000
cm_subsection = linspace(start, stop, number_of_lines)
colors = [ cm.jet(x) for x in cm_subsection ]
for i, color in enumerate(colors):
plt.axhline(i, color=color)
plt.ylabel('Line Number')
plt.show()
This results in 1000 uniquely-colored lines that span the entire cm.jet colormap as pictured below. If you run this script you'll find that you can zoom in on the individual lines.
Now say I want my 1000 line colors to just span the greenish portion between lines 400 to 600. I simply change my start and stop values to 0.4 and 0.6 and this results in using only 20% of the cm.jet color map between 0.4 and 0.6.
So in a one line summary you can create a list of rgba colors from a matplotlib.cm colormap accordingly:
colors = [ cm.jet(x) for x in linspace(start, stop, number_of_lines) ]
In this case I use the commonly invoked map named jet but you can find the complete list of colormaps available in your matplotlib version by invoking:
>>> from matplotlib import cm
>>> dir(cm)
A combination of line styles, markers, and qualitative colors from matplotlib:
import itertools
import matplotlib as mpl
import matplotlib.pyplot as plt
N = 8*4+10
l_styles = ['-','--','-.',':']
m_styles = ['','.','o','^','*']
colormap = mpl.cm.Dark2.colors # Qualitative colormap
for i,(marker,linestyle,color) in zip(range(N),itertools.product(m_styles,l_styles, colormap)):
plt.plot([0,1,2],[0,2*i,2*i], color=color, linestyle=linestyle,marker=marker,label=i)
plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.,ncol=4);
UPDATE: Supporting not only ListedColormap, but also LinearSegmentedColormap
import itertools
import matplotlib.pyplot as plt
Ncolors = 8
#colormap = plt.cm.Dark2# ListedColormap
colormap = plt.cm.viridis# LinearSegmentedColormap
Ncolors = min(colormap.N,Ncolors)
mapcolors = [colormap(int(x*colormap.N/Ncolors)) for x in range(Ncolors)]
N = Ncolors*4+10
l_styles = ['-','--','-.',':']
m_styles = ['','.','o','^','*']
fig,ax = plt.subplots(gridspec_kw=dict(right=0.6))
for i,(marker,linestyle,color) in zip(range(N),itertools.product(m_styles,l_styles, mapcolors)):
ax.plot([0,1,2],[0,2*i,2*i], color=color, linestyle=linestyle,marker=marker,label=i)
ax.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.,ncol=3,prop={'size': 8})
U may do as I have written from my deleted account (ban for new posts :( there was). Its rather simple and nice looking.
Im using 3-rd one of these 3 ones usually, also I wasny checking 1 and 2 version.
from matplotlib.pyplot import cm
import numpy as np
#variable n should be number of curves to plot (I skipped this earlier thinking that it is obvious when looking at picture - sorry my bad mistake xD): n=len(array_of_curves_to_plot)
#version 1:
color=cm.rainbow(np.linspace(0,1,n))
for i,c in zip(range(n),color):
ax1.plot(x, y,c=c)
#or version 2: - faster and better:
color=iter(cm.rainbow(np.linspace(0,1,n)))
c=next(color)
plt.plot(x,y,c=c)
#or version 3:
color=iter(cm.rainbow(np.linspace(0,1,n)))
for i in range(n):
c=next(color)
ax1.plot(x, y,c=c)
example of 3:
Ship RAO of Roll vs Ikeda damping in function of Roll amplitude A44

Categories

Resources