Seaborn clustermap fixed cell size - python

I am using the seaborn clustermap function and I would like to make multiple plots where the cell sizes are exactly identical. Also the size of the axis labels should be the same. This means figure size and aspect ratio will need to change, the rest needs to stay identical.
import pandas
import seaborn
import numpy as np
dataFrameA = pd.DataFrame([ [1,2],[3,4] ])
dataFrameB = pd.DataFrame( np.arange(3*6).reshape(3,-1))
Then decide how big the clustermap itself needs to be, something along the lines of:
dpi = 72
cellSizePixels = 150
This decides that dataFrameA should be should be 300 by 300 pixels. I think that those need to be converted to the size units of the figure, which will be cellSizePixels/dpi units per pixel. So for dataFrameA that will be a heatmap size of ~2.01 inches. Here I am introducing a problem: there is stuff around the heatmap, which will also take up some space, and I don't know how much space those will exactly take.
I tried to parametrize the heatmap function with a guess of the image size using the formula above:
def fixedWidthClusterMap( dpi, cellSizePixels, dataFrame):
clustermapParams = {
'square':False # Tried to set this to True before. Don't: the dendograms do not scale well with it.
}
figureWidth = (cellSizePixels/dpi)*dataFrame.shape[1]
figureHeight= (cellSizePixels/dpi)*dataFrame.shape[0]
return sns.clustermap( dataFrame, figsize=(figureWidth,figureHeight), **clustermapParams)
fixedWidthClusterMap(dpi, cellSizePixels, dataFrameA)
plt.show()
fixedWidthClusterMap(dpi, cellSizePixels, dataFrameB)
plt.show()
This yields:
My question: how do I obtain square cells which are exactly the size I want?

This is a bit tricky, because there are quite a few things to take into consideration, and in the end, it depends how "exact" you need the sizes to be.
Looking at the code for clustermap the heatmap part is designed to have a ratio of 0.8 compared to the axes used for the dendrograms. But we also need to take into account the margins used to place the axes. If one knows the size of the heatmap axes, one should therefore be able to calculate the desired figure size that would produce the right shape.
dpi = matplotlib.rcParams['figure.dpi']
marginWidth = matplotlib.rcParams['figure.subplot.right']-matplotlib.rcParams['figure.subplot.left']
marginHeight = matplotlib.rcParams['figure.subplot.top']-matplotlib.rcParams['figure.subplot.bottom']
Ny,Nx = dataFrame.shape
figWidth = (Nx*cellSizePixels/dpi)/0.8/marginWidth
figHeigh = (Ny*cellSizePixels/dpi)/0.8/marginHeight
Unfortunately, it seems matplotlib must adjust things a bit during plotting, because that was not enough the get perfectly square heatmap cells. So I choose to resize the various axes create by clustermap after the fact, starting with the heatmap, then the dendrogram axes.
I think the resulting image is pretty close to what you were trying to get, but my tests sometime show some errors by 1-2 px, which I attribute to rounding errors due to all the conversions between sizes in inches and pixels.
dataFrameA = pd.DataFrame([ [1,2],[3,4] ])
dataFrameB = pd.DataFrame( np.arange(3*6).reshape(3,-1))
def fixedWidthClusterMap(dataFrame, cellSizePixels=50):
# Calulate the figure size, this gets us close, but not quite to the right place
dpi = matplotlib.rcParams['figure.dpi']
marginWidth = matplotlib.rcParams['figure.subplot.right']-matplotlib.rcParams['figure.subplot.left']
marginHeight = matplotlib.rcParams['figure.subplot.top']-matplotlib.rcParams['figure.subplot.bottom']
Ny,Nx = dataFrame.shape
figWidth = (Nx*cellSizePixels/dpi)/0.8/marginWidth
figHeigh = (Ny*cellSizePixels/dpi)/0.8/marginHeight
# do the actual plot
grid = sns.clustermap(dataFrame, figsize=(figWidth, figHeigh))
# calculate the size of the heatmap axes
axWidth = (Nx*cellSizePixels)/(figWidth*dpi)
axHeight = (Ny*cellSizePixels)/(figHeigh*dpi)
# resize heatmap
ax_heatmap_orig_pos = grid.ax_heatmap.get_position()
grid.ax_heatmap.set_position([ax_heatmap_orig_pos.x0, ax_heatmap_orig_pos.y0,
axWidth, axHeight])
# resize dendrograms to match
ax_row_orig_pos = grid.ax_row_dendrogram.get_position()
grid.ax_row_dendrogram.set_position([ax_row_orig_pos.x0, ax_row_orig_pos.y0,
ax_row_orig_pos.width, axHeight])
ax_col_orig_pos = grid.ax_col_dendrogram.get_position()
grid.ax_col_dendrogram.set_position([ax_col_orig_pos.x0, ax_heatmap_orig_pos.y0+axHeight,
axWidth, ax_col_orig_pos.height])
return grid # return ClusterGrid object
grid = fixedWidthClusterMap(dataFrameA, cellSizePixels=75)
plt.show()
grid = fixedWidthClusterMap(dataFrameB, cellSizePixels=75)
plt.show()

Not a complete answer (not dealing with pixels) but I suspect OP has moved on after 4 years.
def reshape_clustermap(cmap, cell_width=0.02, cell_height=0.02):
ny, nx = cmap.data2d.shape
hmap_width = nx * cell_width
hmap_height = ny * cell_height
hmap_orig_pos = cmap.ax_heatmap.get_position()
cmap.ax_heatmap.set_position(
[hmap_orig_pos.x0, hmap_orig_pos.y0, hmap_width, hmap_height]
)
top_dg_pos = cmap.ax_col_dendrogram.get_position()
cmap.ax_col_dendrogram.set_position(
[hmap_orig_pos.x0, hmap_orig_pos.y0 + hmap_height, hmap_width, top_dg_pos.height]
)
left_dg_pos = cmap.ax_row_dendrogram.get_position()
cmap.ax_row_dendrogram.set_position(
[left_dg_pos.x0, left_dg_pos.y0, left_dg_pos.width, hmap_height]
)
if cmap.ax_cbar:
cbar_pos = cmap.ax_cbar.get_position()
hmap_pos = cmap.ax_heatmap.get_position()
cmap.ax_cbar.set_position(
[cbar_pos.x0, hmap_pos.y1, cbar_pos.width, cbar_pos.height]
)
cmap = sns.clustermap(dataFrameA)
reshape_clustermap(cmap)

Related

Holoviews/Datashaded map overlay not displaying

I am using the code below to get a Panel dashboard with a dropdown select box, a histogram and a map.
import pandas as pd
import holoviews as hv
from holoviews.operation.datashader import datashade, rasterize, shade
import panel as pn
from holoviews.element.tiles import OSM
import hvplot.pandas
df = pd.read_parquet('cleanedFiles/AllMNO.parquet')
mno = pn.widgets.Select(options=df['mnc'].unique().tolist())
#pn.depends(mno)
def mnoStats(operator):
return'### Operator {} has {} samples'.format(operator, len(df[df['mnc'] == operator]))
#pn.depends(mno)
def plotMap(mno):
opts = dict(width=700, height=300, tools=['hover'])
tiles = OSM().opts(alpha=0.4, xaxis=None, yaxis=None)
points = hv.Points(df[df['mnc'] == mno], ['latitude', 'longitude'])
rasterized = shade(rasterize(points, x_sampling=1, y_sampling=1)).opts(**opts)
return tiles*rasterized
def plotHist(df):
return df.hvplot.hist(y='rsrp', by='mnc', bins=20)
pn.Row(pn.Column(pn.WidgetBox('## Ofcom scanner data', mno, mnoStats)),
pn.Column(plotHist(df))).servable()
pn.Row(plotMap).servable()
The dropdown selector and histogram appear as expected, however I get a 'blocky' image for the map as below. I wanted to get the locations (lat/longs) of the measurements each coloured / datashaded by the signal level denoted by the column 'rsrp'
Please advice how this can be corrected.
According to the holoviews docs, hv.rasterize is a high-level resampling interface and passes parameters to several internal methods:
holoviews.core.operation.Operation: group, input_ranges
holoviews.operation.datashader.LinkableOperation: link_inputs
holoviews.operation.datashader.ResamplingOperation: dynamic, streams, expand, height, width, x_range, y_range, x_sampling, y_sampling, target, element_type, precompute
holoviews.operation.datashader.AggregationOperation: vdim_prefix
Based on this, it looks like your arguments x_sampling and y_sampling are passed to ResamplingOperation, which are described:
x_sampling = param.Number(allow_None=True, inclusive_bounds=(True, True), label=’X sampling’)
Specifies the smallest allowed sampling interval along the x axis.
y_sampling = param.Number(allow_None=True, inclusive_bounds=(True, True), label=’Y sampling’)
Specifies the smallest allowed sampling interval along the y axis.
So, I'd guess that the issue is that providing the arguments x_sampling=1, y_sampling=1 to rasterize has the effect of aggregating all of your data to 1 degree, or approximately 110 km/70 mile blocks, which is causing the blockiness in your figure. Changing these parameters to a smaller value, such as 0.1 or smaller, should resolve the issue, as long as your data itself has sufficient resolution.

How do I reduce the number of ticks on an Altair graph?

I am using Altair to create a graph, but for some weird reason it's seems to be generating a tick for each of the points. Creating a graph like this Altair Graph
If I filter the dataframe, it produces weird axis values. Altair graph
Is there a way to reduce the amount of ticks? I tried tickCount in the y axis paramater and it didn't work since it seems to require integers.I also tried setting the axis value parameter to a list [0,0.2,0.4,0.6,0.8,1] and that didn't work either. Here is my code (sorry it's so lengthy!). Thank you in advance!
a = alt.Chart(df_filtered).mark_point().encode(x =alt.X('Process_Time_(mins)', axis = alt.Axis(title='Process Time (mins)')),
y = alt.Y('Heavy_Phase_%SS',axis=alt.Axis(title='Heavy Phase %SS', tickCount = 10),sort = 'descending'),
color = alt.Color('DSP_Lot', legend = alt.Legend(title = 'DSP_Lot')),
shape = alt.Shape('Strain', scale = alt.Scale(range = ["circle", "square", "cross", "diamond", "triangle-up", "triangle-down", "triangle-right", "triangle-left"])),
tooltip = [alt.Tooltip('DSP_Lot',title = 'Lot'), alt.Tooltip('Heavy_Phase_%SS', title = 'Heavy Phase %SS'),
alt.Tooltip('Process_Time_(mins)', title = 'Process Time (mins)'), alt.Tooltip('Purpose', title = 'Purpose'), alt.Tooltip('Strain', title = 'Strain'),
alt.Tooltip('Trial', title = 'Trial')]).properties(width = 1000, height = 500)
It's hard to tell without a reproducible example but I suspect the issue is that your y axis is defaulting to a nominal encoding type, in which case you get one tick mark per unique value. If you specify a quantitative type in the Y encoding, it may improve things:
y = alt.Y('Heavy_Phase_%SS:Q', ...)
The reason it defaults to nominal is probably because the associated column in the pandas dataframe has a string type rather than a numerical type.

How to plot dotted lines from a shapefile in python?

I am not sure on how to plot a dotted line from a shapefile in Python. It appears that readshapefile() does not have any linestyle for me to set. Below I have a working code where I take a shapefile and plot it, but it only plots a solid line. Any ideas to set me in the right direction? Thanks!
The shapefile can be found here: http://www.natice.noaa.gov/products/daily_products.html, where the Start Date is Feb 15th, end date is Feb 17th, and the Date Types is Ice Edge. It should be the first link.
#!/awips2/python/bin/python
from mpl_toolkits.basemap import Basemap
import matplotlib.pyplot as plt
map = Basemap(llcrnrlon=-84.37,llcrnrlat=42.11,urcrnrlon=-20.93,urcrnrlat=66.48,
resolution='i', projection='tmerc', lat_0 = 55., lon_0 = -50.)
map.drawmapboundary(fill_color='aqua')
map.fillcontinents(color='#ddaa66',lake_color='aqua')
map.drawcoastlines(zorder = 3)
map.readshapefile('nic_autoc2018046n_pl_a', 'IceEdge', zorder = 2, color = 'blue')
plt.show()
From the Basemap documentation:
A tuple (num_shapes, type, min, max) containing shape file info is
returned. num_shapes is the number of shapes, type is the type code
(one of the SHPT* constants defined in the shapelib module, see
http://shapelib.maptools.org/shp_api.html) and min and max are
4-element lists with the minimum and maximum values of the vertices.
If drawbounds=True a matplotlib.patches.LineCollection object is
appended to the tuple.
drawbounds is True by default, so all you have to do is collect the return value of readshapefile and alter the linestyle of the returned LineCollection object, which can be done with LineCollection.set_linestyle(). So in principle you can change the linestyle of your plotted shape file with something like this:
result = m.readshapefile('shapefiles/nic_autoc2018046n_pl_a', 'IceEdge', zorder = 10, color = 'blue')#, drawbounds = False)
col = result[-1]
col.set_linestyle('dotted')
plt.show()
However, your shapefile contains 5429 separate line segments of different length and somehow matplotlib does not seem to be able to deal with this large amount of non-continuous lines. At least on my machine the plotting did not finish within one hour, so I interrupted the process. I played a bit with your file and it seems like many of the lines are broken into segments unnecessarily (I'm guessing this is because the ice sheet outlines are somehow determined on tiles and then pieced together afterwards, but only the providers will really know). Maybe it would help to piece together adjacent pieces, but I'm not sure.
I was also wondering whether the result would even look that great with a dotted line, because there are so many sharp bends. Below I show a picture where I only plot the 100 longest line segments (leaving out drawcoastlines and with thicker lines) using this code:
import numpy as np
result = m.readshapefile('shapefiles/nic_autoc2018046n_pl_a', 'IceEdge', zorder = 10, color = 'blue')#, drawbounds = False)
col = result[-1]
segments = col.get_segments()
seglens = [len(seg) for seg in col.get_segments()]
segments = np.array(segments)
seglens = np.array(seglens)
idx = np.argsort(seglens)
seglens = seglens[idx]
segments = segments[idx]
col.remove()
new_col = LineCollection(segments[-100:],linewidths = 2, linestyles='dotted', colors='b')
ax.add_collection(new_col)
plt.show()
And the result looks like this:

Add subplots row header that scales properly in matplotlib

How can I use annotate() (or any other command for that matter) to add a second "ylabel" to the right of a figure which makes the text "scale" the same way as the other texts (axis x,y-label and title)? With scaling I mean that I don't want to hack text offsets manually or have a solution which fails as soon as I rescale the figure/add more plots/add a colorbar or similar. I don't want to use twinx, because I'm not plotting any additional data, and I don't need another axis.
Here's an image of what I want to achieve:
Here is my code to produce this image, I want to change the ax.annotate part:
import numpy as np
import matplotlib.pyplot as plt
numPlotsY = 3
numPlotsX = 3
f, ax_grid = plt.subplots(numPlotsY,numPlotsX,sharex=True,sharey=True)
A = np.arange(numPlotsY)+1.0 # Amplitude
phi = np.arange(numPlotsX) # Phase shift
x = np.linspace(0,2.0,100) # x
for y_i in range(0,numPlotsY):
for x_i in range(0,numPlotsX):
ax = ax_grid[y_i,x_i]
y = A[y_i]*np.sin(x*np.pi + phi[x_i])
ax.plot(x,y,lw=2.0)
# Add xlabel to the left column
if x_i == 0:
ax.set_ylabel(r'$y$')
ax.set_yticks([-4,-2,0,2,4])
# Add ylabel below bottom row
if y_i == numPlotsY-1:
ax.set_xlabel(r'$x/\pi$')
ax.set_xticks([0.5,1.0,1.5])
# Add Phi label above top row
if y_i == 0:
ax.set_title(r'$\phi=%s$' % phi[x_i])
# Add amplitude label to the right... how??
if x_i == numPlotsX-1:
ax.annotate(r'$A=%d$' % A[x_i], xy=(1.1,0.5), rotation=90,
ha='center',va='center',xycoords='axes fraction')
f.subplots_adjust(wspace=0,hspace=0)
plt.suptitle(r'$A\cdot\sin\left(2\pi x + \phi\right)$',fontsize=18)
plt.show()
I've seen this topic discussed several times without an elegant solution. There's always so much hacking involved. I really think this boils down to the way matplotlib treats the axes. Why can't there be one label for each of the four sides of the figure, that behave the same way?

Change color depending on height in Mayavi iso_surface

Is is possible to change the colour of an iso-surface depending on height of the points (in python / mayavi) ?
I can create an iso-surface visualization with my script, but I don't know how to make the iso_surface change colour with z axis so that it will be let's say black at the bottom and white at the top of the plot.
I need this in order to make sense of the visualization when it is viewed from directly above the graph.
If you know any other way to achieve this, please let me know as well.
I only want to show one iso_surface plot.
I managed to do this by combining some code from examples http://docs.enthought.com/mayavi/mayavi/auto/example_atomic_orbital.html#example-atomic-orbital and http://docs.enthought.com/mayavi/mayavi/auto/example_custom_colormap.html . Basically you must create a surface as in atomic-orbital example and then make it change colour depending on x. You must create an array of values for x. My code is (the relevant part) :
#src.image_data.point_data.add_array(np.indices(list(self.data.shape)[self.nx,self.ny,self.nz])[2].T.ravel())
src.image_data.point_data.add_array(np.indices(list(self.data.shape))[0].T.ravel())
src.image_data.point_data.get_array(1).name = 'z'
# Make sure that the dataset is up to date with the different arrays:
src.image_data.point_data.update()
# We select the 'scalar' attribute, ie the norm of Phi
src2 = mlab.pipeline.set_active_attribute(src, point_scalars='scalar')
# Cut isosurfaces of the norm
contour = mlab.pipeline.contour(src2)
# contour.filter.contours=[plotIsoSurfaceContours]
# contour.filter.contours=[plotIsoSurfaceContours[0]]
min_c = min(contour.filter._data_min * 1.05,contour.filter._data_max)
max_c = max(contour.filter._data_max * 0.95,contour.filter._data_min)
plotIsoSurfaceContours = [ max(min(max_c,x),min_c) for x in plotIsoSurfaceContours ]
contour.filter.contours= plotIsoSurfaceContours
# Now we select the 'angle' attribute, ie the phase of Phi
contour2 = mlab.pipeline.set_active_attribute(contour, point_scalars='z')
# And we display the surface. The colormap is the current attribute: the phase.
# mlab.pipeline.surface(contour2, colormap='hsv')
xxx = mlab.pipeline.surface(contour2, colormap='gist_ncar')
colorbar = xxx.module_manager.scalar_lut_manager
colorbar.reverse_lut = True
lut = xxx.module_manager.scalar_lut_manager.lut.table.to_array()
lut[:,-1] = int(plotIsoSurfaceOpacity * 254)
xxx.module_manager.scalar_lut_manager.lut.table = lut
# mlab.colorbar(title='Phase', orientation='vertical', nb_labels=3)
self.data is my data, and for unknown reasons if you want to set opacity of your surface you must reverse the lut first and then set the opacity. Multiplication by 254 instead of 255 is done to avoid a possible bug in mayavi.
I hope this helps someone.

Categories

Resources