Add patch to Seaborn jointploint - python

I have created a patch and want to apply it to a jointplot in Seaborn. When I go to try to apply the patch, it either splits the plots into two graphics or, if I change the kind attribute in the jointplot function from kde to anything else, it throws an error inner got multiple values for keyword argument 'ax'.
When I try to apply this solution, the variable fg does not have the attribute axes and it does not work.
In the code below, if I use kind = "scatter" and omit the ax, I get a blank output then the jointpolot. If I use kind = "scatter" and add ax = ax, I get the above mentioned error. If I use kind = "kde" and ax = ax, I get the following images:
My code:
import descartes
import fiona
import matplotlib.pyplot as plt
import seaborn as sns
from shapely.geometry import shape
import pandas as pd
import time
#
start_time = time.time()
input_csv = r"C:\path\to\a\csv\with\coordinates.csv"
shapefile = r"C:\path\to\a\fun\shapefile.shp"
df = pd.read_csv(input_csv, delimiter = ",")
df = df[df["Latitude"] > 37.70833]
lat = "Latitude"
lon = "Longitude"
fig = plt.figure()
ax = fig.add_subplot(111, frameon = False)
shp = fiona.open(shapefile)
pol = shp.next()
geom = shape(pol["geometry"])
un_sf = geom.envelope.symmetric_difference(geom)
un_sf_patch = descartes.PolygonPatch(un_sf)
ax.add_patch( un_sf_patch )
my_fig = sns.jointplot(x = lon, y = lat, data = df, color = "grey", kind = "scatter")
end_time = round(time.time() - start_time, 5)
print "Seconds elapsed: {0}".format(end_time)
How can I add the patch to my Seaborn jointplot in a single graphic?

A seaborn jointplot creates its own figure, together with 3 axes.
g = sns.jointgrid(..)
g.ax_joint # big axes in the middle
g.ax_marg_y # marginal axes
g.ax_marg_x
Here you want to add your patch to the ax_joint.
g = sns.jointgrid(..)
g.ax_joint.add_patch( un_sf_patch )

Related

gettting a colorbar programmatically from an axis object

Consider the following code in which data is being plotted within a loop. In order to not plot multiple colorbars, I remove them before plotting new ones. However, I am tracking these colorbars manually. Is there a way to get a reference to them using, say, something like ax[0].get_colorbar. That would make my code a lot simpler.
import matplotlib.pyplot as plt
import numpy as np
# How to get a colorbar from an axis?
nrows = 1
ncols = 2
nstep = 5
fig, ax = plt.subplots(nrows=nrows,ncols=ncols)
cb0 = None
cb1 = None
for istep in range(nstep):
data = np.random.random(size=(5,5))
imu0 = ax[0].pcolormesh(data)
imu1 = ax[1].pcolormesh(data)
# this code is for removing previously drawn colorbars
# I would like to get a reference to the colorbar cb0 from ax0
# and then remove it.
# I do not want to keep track of the colorbars manually
if cb0 is not None:
cb0.remove()
if cb1 is not None:
cb1.remove()
cb0 = plt.colorbar(imu0,ax=ax[0])
cb1 = plt.colorbar(imu1,ax=ax[1])
The following seems to do what I want:
import matplotlib.pyplot as plt
import numpy as np
# How to get a colorbar from an axis?
nrows = 1
ncols = 2
nstep = 10
fig, ax = plt.subplots(nrows=nrows,ncols=ncols)
cb0 = None
cb1 = None
for istep in range(nstep):
data = np.random.random(size=(5,5))+istep
imu0 = ax[0].pcolormesh(data)
imu1 = ax[1].pcolormesh(data)
if ax[0].collections[0].colorbar is None:
cb0 = plt.colorbar(imu0,ax=ax[0])
else:
ax[0].collections[0].colorbar.update_normal(imu0)
if ax[1].collections[0].colorbar is None:
cb1 = plt.colorbar(imu1,ax=ax[1])
else:
ax[1].collections[0].colorbar.update_normal(imu1)

Tooltips using mpldatacursor in matplotlib

I have been pulling my hair out for a while over this. I am trying to use mpldatacursor along with matplotlib to provide a tooltip functionality on scatter plots. Each point has some data associated with it which I would like to show when the point is clicked.
Here is a minimal (not) working example:
import numpy as np
import mpldatacursor
import string
import matplotlib
matplotlib.use('Qt5Agg')
from matplotlib import pyplot as mpl
nations = ['Russia', 'America', 'China', 'France']
data = list()
idx = list()
np.random.seed(42) #Seed for repeatability
# Random data
for (id, nation) in enumerate(nations):
for i in range(0,10):
data.append((id+1)*np.random.random((2,1)))
name = list(string.ascii_uppercase[20:])
np.random.shuffle(name)
idx.append(nation + '-' + ''.join(name))
mpl.figure()
data = np.squeeze(np.asarray(data))
m, n = 0, 9
# Plot by group
for (id,nation) in enumerate(nations):
mpl.scatter(data[m:n,0] , data[m:n,1] , label=nation)
m = n + 1
n += 10
formatter = lambda **kwargs: ', '.join(kwargs['point_label'])
mpl.legend()
mpldatacursor.datacursor(formatter=formatter, point_labels=idx)
mpl.show(block=True)
But when I do this, the tooltips don't match the legends. Further only labels starting with Russia and USA show up in the plot. What am I doing wrong?
Usually you would have your data in a table or, for the sake of the example, several lists. One would hence probably create a single scatter plot from the data columns and use a mapping of names to numbers to create the colors in the scatter.
Then one can use the matplotlib pick_event to get the data out of the respective list, given the index of the point on which the click happened.
This all does not require any external packages like datacursor.
import numpy as np; np.random.seed(42)
import string
from matplotlib import pyplot as plt
nations = ['Russia', 'America', 'China', 'France']
#Create lists data, nat, idx
nat = np.random.choice(nations, 50)
data = np.random.rand(50,2)
strings = ["".join(np.random.choice(list(string.ascii_uppercase), 7)) for _ in range(50)]
idx = ["{}-{}".format(n,w) for n,w in zip(nat,strings)]
labels, i = np.unique(nat, return_inverse=True)
fig, ax = plt.subplots()
scatter = ax.scatter(data[:,0], data[:,1], c=i, cmap="RdYlGn", picker=5)
rect = lambda c: plt.Rectangle((0,0),1,1, color=scatter.cmap(scatter.norm(c)))
handles = [rect(c) for c in np.unique(i)]
plt.legend(handles, labels)
#Create annotation
annot = ax.annotate("", xy=(0,0), xytext=(-20,20),textcoords="offset points",
bbox=dict(boxstyle="round", fc="w"),
arrowprops=dict(arrowstyle="->"))
annot.set_visible(False)
#Create event handler
def onpick(evt):
if evt.artist == scatter:
ind = evt.ind[0]
annot.xy = (data[ind])
annot.set_text(idx[ind])
annot.set_visible(True)
if evt.mouseevent.button == 3:
annot.set_visible(False)
fig.canvas.draw_idle()
fig.canvas.mpl_connect("pick_event", onpick)
plt.show()
The issue was that each call to scatter by matplotlib was creating a new artist object. The workaround is based on the doc-string in the source code.
point_labels : sequence or dict, optional
Labels for "subitems" of an artist, passed to the formatter
function as the point_label kwarg. May be either a single
sequence (used for all artists) or a dict of artist:sequence pairs.
It does involve the import of a protected matplotlib module/member. This seems to work as I want:
import numpy as np
import mpldatacursor
import string
import matplotlib
from matplotlib import _pylab_helpers as pylab_helpers
matplotlib.use('Qt5Agg')
from matplotlib import pyplot as mpl
nations = ['Russia', 'America', 'China', 'France']
data = list()
idx = list()
np.random.seed(42)
for (index, nation) in enumerate(nations):
for i in range(0,10):
data.append((index + 1) * np.random.random((2, 1)))
name = list(string.ascii_uppercase[20:])
np.random.shuffle(name)
idx.append(nation + '-' + ''.join(name))
data = np.squeeze(np.asarray(data))
m, n = 0, 9
artist_labels = list()
mpl.figure()
for (index, nation) in enumerate(nations):
mpl.scatter(data[m:n,0] , data[m:n,1] ,label=nation)
artist_labels.append(idx[m:n])
m = n + 1
n += 10
def plotted_artists(ax):
all_artists = (ax.lines + ax.patches + ax.collections
+ ax.images + ax.containers)
return all_artists
def formatter (**kwargs):
return kwargs['point_label'].pop()
managers = pylab_helpers.Gcf.get_all_fig_managers()
figs = [manager.canvas.figure for manager in managers]
axes = [ax for fig in figs for ax in fig.axes]
artists = [artist for ax in axes for artist in plotted_artists(ax)]
my_dict = dict(zip(artists, artist_labels))
mpldatacursor.datacursor(formatter=formatter, point_labels=my_dict)
mpl.legend()
mpl.show(block=True)
Assuming you simply want names, this seems to work correctly if you change the mpldatacursor.datacursor call to use '{label}' as in the first example on the mpldatacursor website,
mpldatacursor.datacursor(formatter='{label}'.format)
I think the problem is with kwargs and the lambda function. If you want further data in your tooltip, it may be best to add this to the label on plt.scatter, using a separate call for each point, e.g.
import numpy as np
import mpldatacursor
import string
import matplotlib
matplotlib.use('Qt5Agg')
from matplotlib import pyplot as plt
nations = ['Russia', 'America', 'China', 'France']
cDict = {'Russia':'r', 'America':'b', 'China':'g', 'France':'c'}
np.random.seed(42) #Seed for repeatability
# Random data
for (id, nation) in enumerate(nations):
for i in range(0,10):
x = (id+1)*np.random.random((2,1))
name = list(string.ascii_uppercase[20:])
np.random.shuffle(name)
plt.scatter(x[0], x[1], c=cDict[nation], label=nation + '-' + ''.join(name))
mpldatacursor.datacursor(formatter='{label}'.format)
plt.show(block=True)

Python Basemap Inset Bug

I am attempting to create two Basemaps, the second of which is inside an inset. I can plot just fine inside the second Basemap, however fillcontinents, drawcoastlines, and drawstates are seemingly ignored at the expense of setting the inset xlim and ylim. Oddly enough, drawmapboundary works fine. If the xlim and ylim arguments for the inset are commented out, Basemap functions are plotted properly, however mark_inset is not in the correct location. I followed the example by Basemap (http://basemaptutorial.readthedocs.io/en/latest/locator.html) and tried changing my map projection from lcc to cyl (as in their example). For some reason, plotting lines and Basemap functions works correctly, which leads me to believe there is some bug associated with the map projection. Any help would be really appreciated!
from IPython import get_ipython
get_ipython().magic('reset -sf')
import matplotlib.pyplot as P
from pylab import * ## import scientific database
close("all") ## close all windows
import netCDF4
import numpy as np
from mpl_toolkits.basemap import Basemap
from matplotlib.font_manager import FontProperties
from mpl_toolkits.axes_grid1.inset_locator import zoomed_inset_axes
from mpl_toolkits.axes_grid1.inset_locator import mark_inset
# Main Basemap lat/lon boundaries
ll_lon = -131.4
ll_lat = 22.8
ur_lon = -104.1
ur_lat = 44.1
# Center point of map
ref_lat = (ll_lat + ur_lat)/2.
ref_lon = -(abs(ll_lon) + abs(ur_lon))/2.
# Create first Basemap
fig = P.figure(figsize=(20,15))
m = Basemap(llcrnrlon=ll_lon,llcrnrlat=ll_lat,urcrnrlon=ur_lon,urcrnrlat=ur_lat,lon_0=ref_lon,lat_0=ref_lat,projection='lcc',resolution='h')
# Define corners for drawing inner domain
ll_x = 591679.34684650064
ul_x = 594344.51484522165
ll_y = 592839.05223913607
ul_y = 1806478.1475523338
ur_x = 1857338.6486264155
lr_x = 1807752.1722138769
ur_y = 1779256.3653244921
lr_y = 566631.12743243692
# Plot inner domain
m.plot([ll_x,ul_x],[ll_y,ul_y],linewidth=3,color='k')
m.plot([ul_x,ur_x],[ul_y,ur_y],linewidth=3,color='k')
m.plot([ur_x,lr_x],[ur_y,lr_y],linewidth=3,color='k')
m.plot([lr_x,ll_x],[lr_y,ll_y],linewidth=3,color='k')
# Customize map
m.drawmapboundary(fill_color='aqua')
m.fillcontinents(color='coral',lake_color='aqua')
m.drawcoastlines(linewidth=1.5)
m.drawparallels(np.arange(20.,55.,5.),labels=[True,False,False,False],fontsize=18)
m.drawmeridians(np.arange(-145.,-95.,5.),labels=[False,False,False,True],fontsize=18)
m.drawstates(linewidth=1.0)
m.drawcountries(linewidth=1.0)
##################################
########## Add inset #############
##################################
# Define new axis for inset
ax = fig.add_subplot(111)
axins = zoomed_inset_axes(ax, 5, loc=4)
# Secondary Basemap lat/lon boundaries
ll_lata = 33.374725341796875
ur_lata = 35.076236724853516
ll_lona = -121.02678680419922
ur_lona = -118.92620086669922
# Center point of map
ref_lat = (ll_lata + ur_lata)/2.
ref_lon = -(abs(ll_lona) + abs(ur_lona))/2.
# Create second Basemap
m2 = Basemap(llcrnrlon=ll_lona,llcrnrlat=ll_lata,urcrnrlon=ur_lona,urcrnrlat=ur_lata,lon_0=ref_lon,lat_0=ref_lat,ax=axins)
# Define corners for drawing inner domain
ll_x = 1180454.8544887367
ul_x = 1181076.4843945887
ll_y = 1172180.6247499525
ul_y = 1202324.6929654693
ur_x = 1245128.2633578097
lr_x = 1244450.5310503689
ur_y = 1200944.1870238895
lr_y = 1170801.3279816376
# Plot inner domain
m2.plot([ll_x,ul_x],[ll_y,ul_y],linewidth=3,color='k')
m2.plot([ul_x,ur_x],[ul_y,ur_y],linewidth=3,color='k')
m2.plot([ur_x,lr_x],[ur_y,lr_y],linewidth=3,color='k')
m2.plot([lr_x,ll_x],[lr_y,ll_y],linewidth=3,color='k')
# Define inset parameters
mark_inset(ax,axins,loc1=1,loc2=3,lw=2,fc="none")
# Set xlim and ylim for mark_inset
ll_x = 1114487.9924679566
ul_x = 1117995.9771456306
ll_y = 1094044.0480909257
ul_y = 1283251.5520485893
ur_x = 1311637.0004658864
lr_x = 1306989.5479092139
ur_y = 1279078.0132717683
lr_y = 1089895.0682288238
axins.set_xlim((ll_x+ul_x)/2.,(lr_x+ur_x)/2.)
axins.set_ylim((ll_y+lr_y)/2.,(ul_y+ur_y)/2.)
# Customize map --> this appears to be ignored with the exception of drawmapboundary
m2.drawmapboundary(fill_color='aqua')
m2.fillcontinents(color='coral',lake_color='aqua')
m2.drawcoastlines(linewidth=1.5)
m2.drawstates(linewidth=1.0)
P.show()

Change the facecolor of boxplot in pandas

I need to change the colors of the boxplot drawn using pandas utility function. I can change most properties using the color argument but can't figure out how to change the facecolor of the box. Someone knows how to do it?
import pandas as pd
import numpy as np
data = np.random.randn(100, 4)
labels = list("ABCD")
df = pd.DataFrame(data, columns=labels)
props = dict(boxes="DarkGreen", whiskers="DarkOrange", medians="DarkBlue", caps="Gray")
df.plot.box(color=props)
While I still recommend seaborn and raw matplotlib over the plotting interface in pandas, it turns out that you can pass patch_artist=True as a kwarg to df.plot.box, which will pass it as a kwarg to df.plot, which will pass is as a kwarg to matplotlib.Axes.boxplot.
import pandas as pd
import numpy as np
data = np.random.randn(100, 4)
labels = list("ABCD")
df = pd.DataFrame(data, columns=labels)
props = dict(boxes="DarkGreen", whiskers="DarkOrange", medians="DarkBlue", caps="Gray")
df.plot.box(color=props, patch_artist=True)
As suggested, I ended up creating a function to plot this, using raw matplotlib.
def plot_boxplot(data, ax):
bp = ax.boxplot(data.values, patch_artist=True)
for box in bp['boxes']:
box.set(color='DarkGreen')
box.set(facecolor='DarkGreen')
for whisker in bp['whiskers']:
whisker.set(color="DarkOrange")
for cap in bp['caps']:
cap.set(color="Gray")
for median in bp['medians']:
median.set(color="white")
ax.axhline(0, color="DarkBlue", linestyle=":")
ax.set_xticklabels(data.columns)
I suggest using df.plot.box with patch_artist=True and return_type='both' (which returns the matplotlib axes the boxplot is drawn on and a dictionary whose values are the matplotlib Lines of the boxplot) in order to have the best customization possibilities.
For example, given this data:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
df = pd.DataFrame(
data=np.random.randn(100, 4),
columns=list("ABCD")
)
you can set a specific color for all the boxes:
fig,ax = plt.subplots(figsize=(9,6))
ax,props = df.plot.box(patch_artist=True, return_type='both', ax=ax)
for patch in props['boxes']:
patch.set_facecolor('lime')
plt.show()
you can set a specific color for each box:
colors = ['green','blue','yellow','red']
fig,ax = plt.subplots(figsize=(9,6))
ax,props = df.plot.box(patch_artist=True, return_type='both', ax=ax)
for patch,color in zip(props['boxes'],colors):
patch.set_facecolor(color)
plt.show()
you can easily integrate a colormap:
colors = np.random.randint(0,10, 4)
cm = plt.cm.get_cmap('rainbow')
colors_cm = [cm((c-colors.min())/(colors.max()-colors.min())) for c in colors]
fig,ax = plt.subplots(figsize=(9,6))
ax,props = df.plot.box(patch_artist=True, return_type='both', ax=ax)
for patch,color in zip(props['boxes'],colors_cm):
patch.set_facecolor(color)
# to add colorbar
fig.colorbar(plt.cm.ScalarMappable(
plt.cm.colors.Normalize(min(colors),max(colors)),
cmap='rainbow'
), ax=ax, cmap='rainbow')
plt.show()

How to assign a plot to a variable and use the variable as the return value in a Python function

I am creating two Python scripts to produce some plots for a technical report. In the first script I am defining functions that produce plots from raw data on my hard-disk. Each function produces one specific kind of plot that I need. The second script is more like a batch file which is supposed to loop around those functions and store the produced plots on my hard-disk.
What I need is a way to return a plot in Python. So basically I want to do this:
fig = some_function_that_returns_a_plot(args)
fig.savefig('plot_name')
But what I do not know is how to make a plot a variable that I can return. Is this possible? Is so, how?
You can define your plotting functions like
import numpy as np
import matplotlib.pyplot as plt
# an example graph type
def fig_barh(ylabels, xvalues, title=''):
# create a new figure
fig = plt.figure()
# plot to it
yvalues = 0.1 + np.arange(len(ylabels))
plt.barh(yvalues, xvalues, figure=fig)
yvalues += 0.4
plt.yticks(yvalues, ylabels, figure=fig)
if title:
plt.title(title, figure=fig)
# return it
return fig
then use them like
from matplotlib.backends.backend_pdf import PdfPages
def write_pdf(fname, figures):
doc = PdfPages(fname)
for fig in figures:
fig.savefig(doc, format='pdf')
doc.close()
def main():
a = fig_barh(['a','b','c'], [1, 2, 3], 'Test #1')
b = fig_barh(['x','y','z'], [5, 3, 1], 'Test #2')
write_pdf('test.pdf', [a, b])
if __name__=="__main__":
main()
If you don't want the picture to be displayed and only get a variable in return, then you can try the following (with some additional stuff to remove axis):
def myplot(t,x):
fig = Figure(figsize=(2,1), dpi=80)
canvas = FigureCanvasAgg(fig)
ax = fig.add_subplot()
ax.fill_between(t,x)
ax.autoscale(tight=True)
ax.axis('off')
canvas.draw()
buf = canvas.buffer_rgba()
X = np.asarray(buf)
return X
The returned variable X can be used with OpenCV for example and do a
cv2.imshow('',X)
These import must be included:
from matplotlib.figure import Figure
from matplotlib.backends.backend_agg import FigureCanvasAgg
The currently accepted answer didn't work for me as such, as I was using scipy.stats.probplot() to plot. I used matplotlib.pyplot.gca() to access an Axes instance directly instead:
"""
For my plotting ideas, see:
https://pythonfordatascience.org/independent-t-test-python/
For the dataset, see:
https://github.com/Opensourcefordatascience/Data-sets
"""
# Import modules.
from scipy import stats
import matplotlib.pyplot as plt
import pandas as pd
from tempfile import gettempdir
from os import path
from slugify import slugify
# Define plot func.
def get_plots(df):
# plt.figure(): Create a new P-P plot. If we're inside a loop, and want
# a new plot for every iteration, this is important!
plt.figure()
stats.probplot(diff, plot=plt)
plt.title('Sepal Width P-P Plot')
pp_p = plt.gca() # Assign an Axes instance of the plot.
# Plot histogram. This uses pandas.DataFrame.plot(), which returns
# an instance of the Axes directly.
hist_p = df.plot(kind = 'hist', title = 'Sepal Width Histogram Plot',
figure=plt.figure()) # Create a new plot again.
return pp_p, hist_p
# Import raw data.
df = pd.read_csv('https://raw.githubusercontent.com/'
'Opensourcefordatascience/Data-sets/master//Iris_Data.csv')
# Subset the dataset.
setosa = df[(df['species'] == 'Iris-setosa')]
setosa.reset_index(inplace= True)
versicolor = df[(df['species'] == 'Iris-versicolor')]
versicolor.reset_index(inplace= True)
# Calculate a variable for analysis.
diff = setosa['sepal_width'] - versicolor['sepal_width']
# Create plots, save each of them to a temp file, and show them afterwards.
# As they're just Axes instances, we need to call get_figure() at first.
for plot in get_plots(diff):
outfn = path.join(gettempdir(), slugify(plot.title.get_text()) + '.png')
print('Saving a plot to "' + outfn + '".')
plot.get_figure().savefig(outfn)
plot.get_figure().show()

Categories

Resources