matplotlib preserve aspect ratio in Wedge patches (pie charts) - python

I want to plot colored pie charts at specific positions without distorting their circular aspect ratio. I'm using Wedge patches because I could not find a better solution. Here is the code
import matplotlib.pyplot as plt
import numpy as np
from matplotlib import patches, collections
fig, axes = plt.subplots()
for i in range(20):
x = np.random.uniform(low=0, high=1, size=10).cumsum()
axes.scatter(x=x, y=np.repeat(i, x.shape[0]), c='gray', s=1)
pies = []
N = 4
cmap = plt.cm.get_cmap("hsv", N + 1)
colors = list(map(cmap, range(N)))
print(colors)
for i in range(2, 2 + N):
thetas = np.linspace(0, 360, num=i)
assert len(thetas) - 1 <= len(colors)
for theta1, theta2, c in zip(thetas[:-1], thetas[1:], colors):
wedge = patches.Wedge((i, i), r=i / 10, theta1=theta1, theta2=theta2,
color=c)
pies.append(wedge)
axes.add_collection(collections.PatchCollection(pies,
match_original=True))
plt.show()
How to preserve the aspect ratio of pie charts? Setting axes.set_aspect("equal") is NOT an option because it squeezes the plot completely when I have more data points.
I've been looking at how to draw circles and preserve the aspect ratio but the solution cannot be adopted here - I'm plotting Wedges/pie charts, not Circles.
I also looked at matplotlib transforms but couldn't find the answer there either.

I tried the same thing, and matplotlib really doesn't try to make this easy for you, but I found a solution that you should be able to use.
You need to separate the centers from the wedges and add them to the PatchCollection as offsets. Then you can apply different transforms to the offsets (transOffset) and shape (transform).
Notice that I have changed the r-value (radius). This value is no longer in data coordinates, so it should always be the same size, regardless of how much you zoom, but it is too small to be visible at i/10.
from matplotlib import patches, collections, transforms
offsets = []
for i in range(2, 2 + N):
thetas = np.linspace(0, 360, num=i)
assert len(thetas) - 1 <= len(colors)
for theta1, theta2, c in zip(thetas[:-1], thetas[1:], colors):
wedge = patches.Wedge((0, 0), r=10, theta1=theta1, theta2=theta2,
color=c)
offsets.append((i, i))
pies.append(wedge)
coll = collections.PatchCollection(
pies, match_original=True, offsets=offsets,
transform=transforms.IdentityTransform(),
transOffset=axes.transData
)

It works fine for me when I set set_aspect('equal'):
Image is narrowed because y-range is longer than x-range I think.
If you'd set y_lim between 0 and a number lower than y_max, you'd see it better:
import matplotlib.pyplot as plt
import numpy as np
from matplotlib import patches, collections
fig, axes = plt.subplots()
for i in range(20):
x = np.random.uniform(low=0, high=1, size=10).cumsum()
axes.scatter(x=x, y=np.repeat(i, x.shape[0]), c='gray', s=1)
pies = []
N = 4
cmap = plt.cm.get_cmap("hsv", N + 1)
colors = list(map(cmap, range(N)))
print(colors)
for i in range(2, 2 + N):
thetas = np.linspace(0, 360, num=i)
assert len(thetas) - 1 <= len(colors)
for theta1, theta2, c in zip(thetas[:-1], thetas[1:], colors):
wedge = patches.Wedge((i, i), r=i / 10, theta1=theta1, theta2=theta2,
color=c)
pies.append(wedge)
axes.add_collection(collections.PatchCollection(pies,
match_original=True))
axes.set_aspect('equal')
axes.set_ylim(0,7.5)
plt.show()

Related

Python pcolormesh with separate alpha value for each bin

Lets say I have the following dataset:
import numpy as np
import matplotlib.pyplot as plt
x_bins = np.arange(10)
y_bins = np.arange(10)
z = np.random.random((9,9))
I can easily plot this data with
plt.pcolormesh(x_bins, y_bins, z, cmap = 'viridis)
However, let's say I now add some alpha value for each point:
a = np.random.random((9,9))
How can I change the alpha value of each box in the pcolormesh plot to match the corresponding value in array "a"?
The mesh created by pcolormesh can only have one alpha for the complete mesh. To set an individual alpha for each cell, the cells need to be created one by one as rectangles.
The code below shows the pcolormesh without alpha at the left, and the mesh of rectangles with alpha at the right. Note that on the spots where the rectangles touch, the semi-transparency causes some unequal overlap. This can be mitigated by not drawing the cell edge (edgecolor='none'), or by longer black lines to separate the cells.
The code below changes the x dimension so easier verify that x and y aren't mixed up. relim and autoscale are needed because with matplotlib's default behavior the x and y limits aren't changed by adding patches.
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.patches import Rectangle, Patch
x_bins = np.arange(12)
y_bins = np.arange(10)
z = np.random.random((9, 11))
a = np.random.random((9, 11))
cmap = plt.get_cmap('inferno')
norm = plt.Normalize(z.min(), z.max())
fig, (ax1, ax2) = plt.subplots(ncols=2)
ax1.pcolormesh(x_bins, y_bins, z, cmap=cmap, norm=norm)
for i in range(len(x_bins) - 1):
for j in range(len(y_bins) - 1):
rect = Rectangle((x_bins[i], y_bins[j]), x_bins[i + 1] - x_bins[i], y_bins[j + 1] - y_bins[j],
facecolor=cmap(norm(z[j, i])), alpha=a[j, i], edgecolor='none')
ax2.add_patch(rect)
# ax2.vlines(x_bins, y_bins.min(), y_bins.max(), edgecolor='black')
# ax2.hlines(y_bins, x_bins.min(), x_bins.max(), edgecolor='black')
ax2.relim()
ax2.autoscale(enable=True, tight=True)
plt.show()

draw a color grid based on points density using python matplotlib

The question is to read 10,000 coordinate points from a file and create a colored grid based on the density of each block on the grid. The range of x-axis is [-73.59, -73.55] and the y-axis is [45.49,45.530]. My code will plot a grid with many different colors, now I need a feature to only color the grid that has a specific density n, for example, The n = 100, only the grid with 100 points or higher will be colored to yellow, and other grids will be black.
I just added a link to my shapefile
https://drive.google.com/open?id=1H-8FhfonnPrYW9y7RQZDtiNLxVEiC6R8
import numpy as np
import matplotlib.pyplot as plt
import shapefile
grid_size = 0.002
x1 = np.arange(-73.59,-73.55,grid_size)
y1 = np.arange(45.49,45.530,grid_size)
shape = shapefile.Reader("Shape/crime_dt.shp",encoding='ISO-8859-1')
shapeRecords = shape.shapeRecords()
x_coordinates=[]
y_coordinates=[]
# read all points in .shp file, and store them in 2 lists.
for k in range(len(shapeRecords)):
x = float(shapeRecords[k].shape.__geo_interface__["coordinates"][0])
y = float(shapeRecords[k].shape.__geo_interface__["coordinates"][1])
x_coordinates.append(x)
y_coordinates.append(y)
plt.hist2d(x_coordinates,y_coordinates,bins=[x1,y1])
plt.show()
You can create a colormap with just two colors, and set vmin and vmax to be symmetrical around your desired pivot value.
Optionally you put the value of each bin inside the cells, while the pivot value decides the text color.
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap
grid_size = 0.002
x1 = np.arange(-73.59, -73.55, grid_size)
y1 = np.arange(45.49, 45.530, grid_size)
# read coordinates from file and put them into two lists, similar to this
x_coordinates = np.random.uniform(x1.min(), x1.max(), size=40000)
y_coordinates = np.random.uniform(y1.min(), y1.max(), size=40000)
pivot_value = 100
# create a colormap with two colors, vmin and vmax are chosen so that their center is the pivot value
cmap = ListedColormap(['indigo', 'gold'])
# create a 2d histogram with xs and ys as bin boundaries
binvalues, _, _, _ = plt.hist2d(x_coordinates, y_coordinates, bins=[x1, y1], cmap=cmap, vmin=0, vmax=2*pivot_value)
binvalues = binvalues.astype(np.int)
for i in range(len(x1) - 1):
for j in range(len(y1) - 1):
plt.text((x1[i] + x1[i + 1]) / 2, (y1[j] + y1[j + 1]) / 2, binvalues[i, j],
color='white' if binvalues[i, j] < pivot_value else 'black',
ha='center', va='center', size=8)
plt.show()
PS: If the bin values are very important, you can add them all as ticks. Then, their positions can also be used to draw gridlines as a division between the cells.
plt.yticks(y1)
plt.xticks(x1, rotation=90)
plt.grid(True, ls='-', lw=1, color='black')
To obtain contours based on these data, you could plt.contourf with the generated matrix. (You might want to use np.histogram2d to directly create the matrix.)
plt.contourf((x1[1:]+x1[:-1])/2, (y1[1:]+y1[:-1])/2, binvalues.T, levels=[0,100,1000], cmap=cmap)

plot with polycollection disappears when polygons get too small

I'm using a PolyCollection to plot data of various sizes. Sometimes the polygons are very small. If they are too small, they don't get plotted at all. I would expect the outline at least to show up so you'd have an idea that some data is there. Is there a a setting to control this?
Here's some code to reproduce the problem, as well as the output image:
import matplotlib.pyplot as plt
from matplotlib.collections import PolyCollection
from matplotlib import colors
fig = plt.figure()
ax = fig.add_subplot(111)
verts = []
edge_col = colors.colorConverter.to_rgb('lime')
face_col = [(2.0 + val) / 3.0 for val in edge_col] # a little lighter
for i in range(10):
w = 0.5 * 10**(-i)
xs = [i - w, i - w, i + w, i - w]
ys = [-w, w, 0, -w]
verts.append(list(zip(xs, ys)))
ax.set_xlim(-1, 11)
ax.set_ylim(-2, 2)
ax.add_collection(PolyCollection(verts, lw=3, alpha=0.5, edgecolor=edge_col, facecolor=face_col))
plt.savefig('out.png')
Notice that only six polygons are visible, whereas there should be ten.
Edit: I understand I could zoom in to see the others, but I was hoping to see a dot or the outline or something without doing this.
Edit 2: Here's a way to get the desired effect, by plotting the faces using a PolyCollection and then the edges using a series of Line2D plots with markers, based on Patol75's answer. My application is a matplotlib animation with lots of polygons, so I'd prefer to avoid Line2D for efficiency, and it would be cleaner if I didn't need to plot things twice, so I'm still hoping for a better answer.
ax.add_collection(PolyCollection(verts, lw=3, alpha=0.5, edgecolor=None, facecolor=face_col, zorder=1))
for pts in verts:
ax.add_line(Line2D([pt[0] for pt in pts], [pt[1] for pt in pts], lw=3, alpha=0.5, color=edge_col,
marker='.', ms=1, mec=edge_col, solid_capstyle='projecting', zorder=2))
Zooming in your plotting window, you would notice that your two remaining polygons are being plotted. They are just too small for you to see them. One way to be convinced of this is to replace
ax.set_xlim(-1, 6)
ax.set_ylim(-2, 2)
by
ax.set_xlim(1e-1, 1e1)
ax.set_ylim(1e-5, 1e0)
ax.set_xscale('log')
ax.set_yscale('log')
ax.set_aspect('equal')
Your five polygons are now visible, but on the downside the log scale restrains you to the positive side of the axes.
Now to propose an answer to your problem. If you keep a linear axis, as your polygons sizes span multiple orders of magnitude, you will not be able to see them all. What you can do is add an artist on your plot which specifies their location. This can be done with a marker, an arrow, etc... If we take the example of a marker, as you said, we only want to see this marker if we cannot see the polygon. The keyword zorder in the call to plot() allows to specify which artist should have the display priority on the figure. Please consider the example below.
import matplotlib.pyplot as plt
from matplotlib.collections import PolyCollection
fig = plt.figure()
ax = fig.add_subplot(111)
verts = []
for i in range(5):
w = 0.5 * 10**(-i)
xs = [i - w, i - w, i + w, i + w, i - w]
ys = [-w, w, w, -w, -w]
ax.plot((xs[2] + xs[1]) / 2, (ys[1] + ys[0]) / 2, linestyle='none',
marker='o', color='xkcd:crimson', markersize=1, zorder=-1)
verts.append(list(zip(xs, ys)))
ax.set_xlim(-1, 6)
ax.set_ylim(-2, 2)
poly = PolyCollection(verts, lw=5, edgecolor='black', facecolor='gray')
ax.add_collection(poly)
plt.show()
which produces
You would notice that if you zoom on the last two dots in the matplotlib figure, you actually do not see the markers, but rather the polygons.
You may introduce some minimal unit minw, which is the smallest size a shape can have.
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.collections import PolyCollection
from matplotlib import colors
fig = plt.figure()
ax = fig.add_subplot(111)
verts = []
edge_col = colors.colorConverter.to_rgb('lime')
face_col = [(2.0 + val) / 3.0 for val in edge_col] # a little lighter
ax.set_xlim(-1, 11)
ax.set_ylim(-2, 2)
u = np.diff(np.array([ax.get_xlim(), ax.get_ylim()]), axis=1).min()
px = np.max(fig.get_size_inches())*fig.dpi
minw = u/px/2
for i in range(10):
w = 0.5 * 10**(-i)
if w < minw:
w = minw
xs = [i - w, i - w, i + w, i - w]
ys = [-w, w, 0, -w]
verts.append(list(zip(xs, ys)))
ax.add_collection(PolyCollection(verts, lw=3, alpha=0.5, edgecolor=edge_col, facecolor=face_col))
plt.savefig('out.png')
plt.show()

Creating a colour bar for a plot made with plt.fill

I'm new to Python (was an IDL user before hand) so I hope that I'm asking this in an understandable way. I've been trying to create a polar plot with x number of bins where the data in the bin is averaged and given a colour associated with that value. This seems to work fine while using the plt.fill command where I can define the bin and then the fill colour. The problem comes when I then try to make a colour bar to go with it. I keep getting errors that state AttributeError: 'Figure' object has no attribute 'autoscale_None'
Any advice would be helpful thanks.
import matplotlib.pyplot as plt
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
import matplotlib.cm as cm
from matplotlib.pyplot import figure, show, rc, grid
import pylab
r = np.arange(50)/5.
rstep = r[1] - r[0]
theta = np.arange(50)/50.*2.*np.pi
tstep = theta[1] - theta[0]
colorv = np.arange(50)/50.
# force square figure and square axes looks better for polar, IMO
width, height = mpl.rcParams['figure.figsize']
size = min(width, height)
# make a square figure
fig = figure(figsize=(size, size))
ax = fig.add_axes([0.1, 0.1, .8, .8])#, polar=True)
my_cmap = cm.jet
for j in range(len(r)):
rbox = np.array([r[j], r[j], r[j]+ rstep, r[j] + rstep])
for i in range(len(theta)):
thetabox = np.array([theta[i], theta[i] + tstep, theta[i] + tstep, theta[i]])
x = rbox*np.cos(thetabox)
y = rbox*np.sin(thetabox)
plt.fill(x,y, facecolor = my_cmap(colorv[j]))
# Add colorbar, make sure to specify tick locations to match desired ticklabels
cbar = fig.colorbar(fig, ticks=[np.min(colorv), np.max(colorv)])
cb = plt.colorbar()
plt.show()
* here is a slightly better example of my real data, there are holes missing everywhere, so in this example I've just made a big one in a quarter of the circle. When I've tried meshing, the code seems to try to interpolate over these regions.
r = np.arange(50)/50.*7. + 3.
rstep = r[1] - r[0]
theta = np.arange(50)/50.*1.5*np.pi - np.pi
tstep = theta[1] - theta[0]
colorv = np.sin(r/10.*np.pi)
# force square figure and square axes looks better for polar, IMO
width, height = mpl.rcParams['figure.figsize']
size = min(width, height)
# make a square figure
fig = figure(figsize=(size, size))
ax = fig.add_axes([0.1, 0.1, .8, .8])#, polar=True)
my_cmap = cm.jet
for j in range(len(r)):
rbox = np.array([r[j], r[j], r[j]+ rstep, r[j] + rstep])
for i in range(len(theta)):
thetabox = np.array([theta[i], theta[i] + tstep, theta[i] + tstep, theta[i]])
x = rbox*np.cos(thetabox)
y = rbox*np.sin(thetabox)
plt.fill(x,y, facecolor = my_cmap(colorv[j]))
# Add colorbar, make sure to specify tick locations to match desired ticklabels
#cbar = fig.colorbar(fig, ticks=[np.min(colorv), np.max(colorv)])
#cb = plt.colorbar()
plt.show()
And then with a meshing involved...
from matplotlib.mlab import griddata
r = np.arange(50)/5.
rstep = r[1] - r[0]
theta = np.arange(50)/50.*1.5*np.pi - np.pi
tstep = theta[1] - theta[0]
colorv = np.sin(r/10.*np.pi)
# force square figure and square axes looks better for polar, IMO
width, height = mpl.rcParams['figure.figsize']
size = min(width, height)
# make a square figure
fig = figure(figsize=(size, size))
ax = fig.add_axes([0.1, 0.1, .8, .8])#, polar=True)
my_cmap = cm.jet
x = r*np.cos(theta)
y = r*np.sin(theta)
X,Y = np.meshgrid(x,y)
data = griddata(x,y,colorv,X,Y)
cax = plt.contourf(X,Y, data)
plt.colorbar()
# Add colorbar, make sure to specify tick locations to match desired ticklabels
#cbar = fig.colorbar(fig, ticks=[np.min(colorv), np.max(colorv)])
#cb = plt.colorbar()
plt.show()
colorbar needs things to be an instance of ScalarMappable in order to make a colorbar from them.
Because you're manually setting each tile, there's nothing that essentially has a colorbar.
There are a number of ways to fake it from your colormap, but in this case there's a much simpler solution.
pcolormesh does exactly what you want, and will be much faster.
As an example:
import numpy as np
import matplotlib.pyplot as plt
# Linspace makes what you're doing _much_ easier (and includes endpoints)
r = np.linspace(0, 10, 50)
theta = np.linspace(0, 2*np.pi, 50)
fig = plt.figure()
ax = fig.add_subplot(111, projection='polar')
# "Grid" r and theta into 2D arrays (see the docs for meshgrid)
r, theta = np.meshgrid(r, theta)
cax = ax.pcolormesh(theta, r, r, edgecolors='black', antialiased=True)
# We could just call `plt.colorbar`, but I prefer to be more explicit
# and pass in the artist that I want it to extract colors from.
fig.colorbar(cax)
plt.show()
Or, if you'd prefer non-polar axes, as in your example code:
import numpy as np
import matplotlib.pyplot as plt
r = np.linspace(0, 10, 50)
theta = np.linspace(0, 2*np.pi, 50)
# "Grid" r and theta and convert them to cartesian coords...
r, theta = np.meshgrid(r, theta)
x, y = r * np.cos(theta), r * np.sin(theta)
fig = plt.figure()
ax = fig.add_subplot(111)
ax.axis('equal')
cax = ax.pcolormesh(x, y, r, edgecolors='black', antialiased=True)
fig.colorbar(cax)
plt.show()
Note: If you'd prefer the boundary lines a bit less dark, just specify linewidth=0.5 or something similar to pcolormesh.
Finally, if you did want to directly make the colorbar from the colormap in your original code, you'd create an instance of ScalarMappable from it and pass this to colorbar. It's easier than it sounds, but it's a bit verbose.
As an example, in your original code, if you do something like the following:
cax = cm.ScalarMappable(cmap=my_cmap)
cax.set_array(colorv)
fig.colorbar(cax)
It should do what you want.
So I've found a workaround. Since I know of a region where I definitely won't have data, I've plotted some there. I've made sure that the data covers the entire range of what I'm potting. I then cover it up (this region was going to be covered anyway, it shows where the "earth" is located). Now I can go ahead and use plt.fill as I had originally and use the colour bar from the randomly potted data. I know this isn't probably the correct way, but it works and doesn't try to interpolate my data.
Thanks so much for helping get this sorted. and if you know of a better way, I'd be happy to hear it!
hid = plt.pcolormesh(X,Y, data, antialiased=True)
#here we cover up the region that we just plotted in
r3 = [1 for i in range(360)]
theta3 = np.arange(360)*np.pi/180.
plt.fill(theta3, r3, 'w')
#now we can go through and fill in all the regions
for j in range(len(r)):
rbox = np.array([r[j], r[j], r[j]+ rstep, r[j] + rstep])
for i in range(len(theta)):
thetabox = np.array([theta[i], theta[i] + tstep, theta[i] + tstep, theta[i]])
x = rbox*np.cos(thetabox)
y = rbox*np.sin(thetabox)
colorv = np.sin(r[j]/10.*np.pi)
plt.fill(thetabox,rbox, facecolor = my_cmap(colorv))
#And now we can plot the color bar that fits the data Tada :)
plt.colorbar()
plt.show()

Generate a heatmap using a scatter data set

I have a set of X,Y data points (about 10k) that are easy to plot as a scatter plot but that I would like to represent as a heatmap.
I looked through the examples in Matplotlib and they all seem to already start with heatmap cell values to generate the image.
Is there a method that converts a bunch of x, y, all different, to a heatmap (where zones with higher frequency of x, y would be "warmer")?
If you don't want hexagons, you can use numpy's histogram2d function:
import numpy as np
import numpy.random
import matplotlib.pyplot as plt
# Generate some test data
x = np.random.randn(8873)
y = np.random.randn(8873)
heatmap, xedges, yedges = np.histogram2d(x, y, bins=50)
extent = [xedges[0], xedges[-1], yedges[0], yedges[-1]]
plt.clf()
plt.imshow(heatmap.T, extent=extent, origin='lower')
plt.show()
This makes a 50x50 heatmap. If you want, say, 512x384, you can put bins=(512, 384) in the call to histogram2d.
Example:
In Matplotlib lexicon, i think you want a hexbin plot.
If you're not familiar with this type of plot, it's just a bivariate histogram in which the xy-plane is tessellated by a regular grid of hexagons.
So from a histogram, you can just count the number of points falling in each hexagon, discretiize the plotting region as a set of windows, assign each point to one of these windows; finally, map the windows onto a color array, and you've got a hexbin diagram.
Though less commonly used than e.g., circles, or squares, that hexagons are a better choice for the geometry of the binning container is intuitive:
hexagons have nearest-neighbor symmetry (e.g., square bins don't,
e.g., the distance from a point on a square's border to a point
inside that square is not everywhere equal) and
hexagon is the highest n-polygon that gives regular plane
tessellation (i.e., you can safely re-model your kitchen floor with hexagonal-shaped tiles because you won't have any void space between the tiles when you are finished--not true for all other higher-n, n >= 7, polygons).
(Matplotlib uses the term hexbin plot; so do (AFAIK) all of the plotting libraries for R; still i don't know if this is the generally accepted term for plots of this type, though i suspect it's likely given that hexbin is short for hexagonal binning, which is describes the essential step in preparing the data for display.)
from matplotlib import pyplot as PLT
from matplotlib import cm as CM
from matplotlib import mlab as ML
import numpy as NP
n = 1e5
x = y = NP.linspace(-5, 5, 100)
X, Y = NP.meshgrid(x, y)
Z1 = ML.bivariate_normal(X, Y, 2, 2, 0, 0)
Z2 = ML.bivariate_normal(X, Y, 4, 1, 1, 1)
ZD = Z2 - Z1
x = X.ravel()
y = Y.ravel()
z = ZD.ravel()
gridsize=30
PLT.subplot(111)
# if 'bins=None', then color of each hexagon corresponds directly to its count
# 'C' is optional--it maps values to x-y coordinates; if 'C' is None (default) then
# the result is a pure 2D histogram
PLT.hexbin(x, y, C=z, gridsize=gridsize, cmap=CM.jet, bins=None)
PLT.axis([x.min(), x.max(), y.min(), y.max()])
cb = PLT.colorbar()
cb.set_label('mean value')
PLT.show()
Edit: For a better approximation of Alejandro's answer, see below.
I know this is an old question, but wanted to add something to Alejandro's anwser: If you want a nice smoothed image without using py-sphviewer you can instead use np.histogram2d and apply a gaussian filter (from scipy.ndimage.filters) to the heatmap:
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.cm as cm
from scipy.ndimage.filters import gaussian_filter
def myplot(x, y, s, bins=1000):
heatmap, xedges, yedges = np.histogram2d(x, y, bins=bins)
heatmap = gaussian_filter(heatmap, sigma=s)
extent = [xedges[0], xedges[-1], yedges[0], yedges[-1]]
return heatmap.T, extent
fig, axs = plt.subplots(2, 2)
# Generate some test data
x = np.random.randn(1000)
y = np.random.randn(1000)
sigmas = [0, 16, 32, 64]
for ax, s in zip(axs.flatten(), sigmas):
if s == 0:
ax.plot(x, y, 'k.', markersize=5)
ax.set_title("Scatter plot")
else:
img, extent = myplot(x, y, s)
ax.imshow(img, extent=extent, origin='lower', cmap=cm.jet)
ax.set_title("Smoothing with $\sigma$ = %d" % s)
plt.show()
Produces:
The scatter plot and s=16 plotted on top of eachother for Agape Gal'lo (click for better view):
One difference I noticed with my gaussian filter approach and Alejandro's approach was that his method shows local structures much better than mine. Therefore I implemented a simple nearest neighbour method at pixel level. This method calculates for each pixel the inverse sum of the distances of the n closest points in the data. This method is at a high resolution pretty computationally expensive and I think there's a quicker way, so let me know if you have any improvements.
Update: As I suspected, there's a much faster method using Scipy's scipy.cKDTree. See Gabriel's answer for the implementation.
Anyway, here's my code:
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.cm as cm
def data_coord2view_coord(p, vlen, pmin, pmax):
dp = pmax - pmin
dv = (p - pmin) / dp * vlen
return dv
def nearest_neighbours(xs, ys, reso, n_neighbours):
im = np.zeros([reso, reso])
extent = [np.min(xs), np.max(xs), np.min(ys), np.max(ys)]
xv = data_coord2view_coord(xs, reso, extent[0], extent[1])
yv = data_coord2view_coord(ys, reso, extent[2], extent[3])
for x in range(reso):
for y in range(reso):
xp = (xv - x)
yp = (yv - y)
d = np.sqrt(xp**2 + yp**2)
im[y][x] = 1 / np.sum(d[np.argpartition(d.ravel(), n_neighbours)[:n_neighbours]])
return im, extent
n = 1000
xs = np.random.randn(n)
ys = np.random.randn(n)
resolution = 250
fig, axes = plt.subplots(2, 2)
for ax, neighbours in zip(axes.flatten(), [0, 16, 32, 64]):
if neighbours == 0:
ax.plot(xs, ys, 'k.', markersize=2)
ax.set_aspect('equal')
ax.set_title("Scatter Plot")
else:
im, extent = nearest_neighbours(xs, ys, resolution, neighbours)
ax.imshow(im, origin='lower', extent=extent, cmap=cm.jet)
ax.set_title("Smoothing over %d neighbours" % neighbours)
ax.set_xlim(extent[0], extent[1])
ax.set_ylim(extent[2], extent[3])
plt.show()
Result:
Instead of using np.hist2d, which in general produces quite ugly histograms, I would like to recycle py-sphviewer, a python package for rendering particle simulations using an adaptive smoothing kernel and that can be easily installed from pip (see webpage documentation). Consider the following code, which is based on the example:
import numpy as np
import numpy.random
import matplotlib.pyplot as plt
import sphviewer as sph
def myplot(x, y, nb=32, xsize=500, ysize=500):
xmin = np.min(x)
xmax = np.max(x)
ymin = np.min(y)
ymax = np.max(y)
x0 = (xmin+xmax)/2.
y0 = (ymin+ymax)/2.
pos = np.zeros([len(x),3])
pos[:,0] = x
pos[:,1] = y
w = np.ones(len(x))
P = sph.Particles(pos, w, nb=nb)
S = sph.Scene(P)
S.update_camera(r='infinity', x=x0, y=y0, z=0,
xsize=xsize, ysize=ysize)
R = sph.Render(S)
R.set_logscale()
img = R.get_image()
extent = R.get_extent()
for i, j in zip(xrange(4), [x0,x0,y0,y0]):
extent[i] += j
print extent
return img, extent
fig = plt.figure(1, figsize=(10,10))
ax1 = fig.add_subplot(221)
ax2 = fig.add_subplot(222)
ax3 = fig.add_subplot(223)
ax4 = fig.add_subplot(224)
# Generate some test data
x = np.random.randn(1000)
y = np.random.randn(1000)
#Plotting a regular scatter plot
ax1.plot(x,y,'k.', markersize=5)
ax1.set_xlim(-3,3)
ax1.set_ylim(-3,3)
heatmap_16, extent_16 = myplot(x,y, nb=16)
heatmap_32, extent_32 = myplot(x,y, nb=32)
heatmap_64, extent_64 = myplot(x,y, nb=64)
ax2.imshow(heatmap_16, extent=extent_16, origin='lower', aspect='auto')
ax2.set_title("Smoothing over 16 neighbors")
ax3.imshow(heatmap_32, extent=extent_32, origin='lower', aspect='auto')
ax3.set_title("Smoothing over 32 neighbors")
#Make the heatmap using a smoothing over 64 neighbors
ax4.imshow(heatmap_64, extent=extent_64, origin='lower', aspect='auto')
ax4.set_title("Smoothing over 64 neighbors")
plt.show()
which produces the following image:
As you see, the images look pretty nice, and we are able to identify different substructures on it. These images are constructed spreading a given weight for every point within a certain domain, defined by the smoothing length, which in turns is given by the distance to the closer nb neighbor (I've chosen 16, 32 and 64 for the examples). So, higher density regions typically are spread over smaller regions compared to lower density regions.
The function myplot is just a very simple function that I've written in order to give the x,y data to py-sphviewer to do the magic.
If you are using 1.2.x
import numpy as np
import matplotlib.pyplot as plt
x = np.random.randn(100000)
y = np.random.randn(100000)
plt.hist2d(x,y,bins=100)
plt.show()
Seaborn now has the jointplot function which should work nicely here:
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
# Generate some test data
x = np.random.randn(8873)
y = np.random.randn(8873)
sns.jointplot(x=x, y=y, kind='hex')
plt.show()
Here's Jurgy's great nearest neighbour approach but implemented using scipy.cKDTree. In my tests it's about 100x faster.
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.cm as cm
from scipy.spatial import cKDTree
def data_coord2view_coord(p, resolution, pmin, pmax):
dp = pmax - pmin
dv = (p - pmin) / dp * resolution
return dv
n = 1000
xs = np.random.randn(n)
ys = np.random.randn(n)
resolution = 250
extent = [np.min(xs), np.max(xs), np.min(ys), np.max(ys)]
xv = data_coord2view_coord(xs, resolution, extent[0], extent[1])
yv = data_coord2view_coord(ys, resolution, extent[2], extent[3])
def kNN2DDens(xv, yv, resolution, neighbours, dim=2):
"""
"""
# Create the tree
tree = cKDTree(np.array([xv, yv]).T)
# Find the closest nnmax-1 neighbors (first entry is the point itself)
grid = np.mgrid[0:resolution, 0:resolution].T.reshape(resolution**2, dim)
dists = tree.query(grid, neighbours)
# Inverse of the sum of distances to each grid point.
inv_sum_dists = 1. / dists[0].sum(1)
# Reshape
im = inv_sum_dists.reshape(resolution, resolution)
return im
fig, axes = plt.subplots(2, 2, figsize=(15, 15))
for ax, neighbours in zip(axes.flatten(), [0, 16, 32, 63]):
if neighbours == 0:
ax.plot(xs, ys, 'k.', markersize=5)
ax.set_aspect('equal')
ax.set_title("Scatter Plot")
else:
im = kNN2DDens(xv, yv, resolution, neighbours)
ax.imshow(im, origin='lower', extent=extent, cmap=cm.Blues)
ax.set_title("Smoothing over %d neighbours" % neighbours)
ax.set_xlim(extent[0], extent[1])
ax.set_ylim(extent[2], extent[3])
plt.savefig('new.png', dpi=150, bbox_inches='tight')
and the initial question was... how to convert scatter values to grid values, right?
histogram2d does count the frequency per cell, however, if you have other data per cell than just the frequency, you'd need some additional work to do.
x = data_x # between -10 and 4, log-gamma of an svc
y = data_y # between -4 and 11, log-C of an svc
z = data_z #between 0 and 0.78, f1-values from a difficult dataset
So, I have a dataset with Z-results for X and Y coordinates. However, I was calculating few points outside the area of interest (large gaps), and heaps of points in a small area of interest.
Yes here it becomes more difficult but also more fun. Some libraries (sorry):
from matplotlib import pyplot as plt
from matplotlib import cm
import numpy as np
from scipy.interpolate import griddata
pyplot is my graphic engine today,
cm is a range of color maps with some initeresting choice.
numpy for the calculations,
and griddata for attaching values to a fixed grid.
The last one is important especially because the frequency of xy points is not equally distributed in my data. First, let's start with some boundaries fitting to my data and an arbitrary grid size. The original data has datapoints also outside those x and y boundaries.
#determine grid boundaries
gridsize = 500
x_min = -8
x_max = 2.5
y_min = -2
y_max = 7
So we have defined a grid with 500 pixels between the min and max values of x and y.
In my data, there are lots more than the 500 values available in the area of high interest; whereas in the low-interest-area, there are not even 200 values in the total grid; between the graphic boundaries of x_min and x_max there are even less.
So for getting a nice picture, the task is to get an average for the high interest values and to fill the gaps elsewhere.
I define my grid now. For each xx-yy pair, i want to have a color.
xx = np.linspace(x_min, x_max, gridsize) # array of x values
yy = np.linspace(y_min, y_max, gridsize) # array of y values
grid = np.array(np.meshgrid(xx, yy.T))
grid = grid.reshape(2, grid.shape[1]*grid.shape[2]).T
Why the strange shape? scipy.griddata wants a shape of (n, D).
Griddata calculates one value per point in the grid, by a predefined method.
I choose "nearest" - empty grid points will be filled with values from the nearest neighbor. This looks as if the areas with less information have bigger cells (even if it is not the case). One could choose to interpolate "linear", then areas with less information look less sharp. Matter of taste, really.
points = np.array([x, y]).T # because griddata wants it that way
z_grid2 = griddata(points, z, grid, method='nearest')
# you get a 1D vector as result. Reshape to picture format!
z_grid2 = z_grid2.reshape(xx.shape[0], yy.shape[0])
And hop, we hand over to matplotlib to display the plot
fig = plt.figure(1, figsize=(10, 10))
ax1 = fig.add_subplot(111)
ax1.imshow(z_grid2, extent=[x_min, x_max,y_min, y_max, ],
origin='lower', cmap=cm.magma)
ax1.set_title("SVC: empty spots filled by nearest neighbours")
ax1.set_xlabel('log gamma')
ax1.set_ylabel('log C')
plt.show()
Around the pointy part of the V-Shape, you see I did a lot of calculations during my search for the sweet spot, whereas the less interesting parts almost everywhere else have a lower resolution.
Make a 2-dimensional array that corresponds to the cells in your final image, called say heatmap_cells and instantiate it as all zeroes.
Choose two scaling factors that define the difference between each array element in real units, for each dimension, say x_scale and y_scale. Choose these such that all your datapoints will fall within the bounds of the heatmap array.
For each raw datapoint with x_value and y_value:
heatmap_cells[floor(x_value/x_scale),floor(y_value/y_scale)]+=1
Very similar to #Piti's answer, but using 1 call instead of 2 to generate the points:
import numpy as np
import matplotlib.pyplot as plt
pts = 1000000
mean = [0.0, 0.0]
cov = [[1.0,0.0],[0.0,1.0]]
x,y = np.random.multivariate_normal(mean, cov, pts).T
plt.hist2d(x, y, bins=50, cmap=plt.cm.jet)
plt.show()
Output:
Here's one I made on a 1 Million point set with 3 categories (colored Red, Green, and Blue). Here's a link to the repository if you'd like to try the function. Github Repo
histplot(
X,
Y,
labels,
bins=2000,
range=((-3,3),(-3,3)),
normalize_each_label=True,
colors = [
[1,0,0],
[0,1,0],
[0,0,1]],
gain=50)
I'm afraid I'm a little late to the party but I had a similar question a while ago. The accepted answer (by #ptomato) helped me out but I'd also want to post this in case it's of use to someone.
''' I wanted to create a heatmap resembling a football pitch which would show the different actions performed '''
import numpy as np
import matplotlib.pyplot as plt
import random
#fixing random state for reproducibility
np.random.seed(1234324)
fig = plt.figure(12)
ax1 = fig.add_subplot(121)
ax2 = fig.add_subplot(122)
#Ratio of the pitch with respect to UEFA standards
hmap= np.full((6, 10), 0)
#print(hmap)
xlist = np.random.uniform(low=0.0, high=100.0, size=(20))
ylist = np.random.uniform(low=0.0, high =100.0, size =(20))
#UEFA Pitch Standards are 105m x 68m
xlist = (xlist/100)*10.5
ylist = (ylist/100)*6.5
ax1.scatter(xlist,ylist)
#int of the co-ordinates to populate the array
xlist_int = xlist.astype (int)
ylist_int = ylist.astype (int)
#print(xlist_int, ylist_int)
for i, j in zip(xlist_int, ylist_int):
#this populates the array according to the x,y co-ordinate values it encounters
hmap[j][i]= hmap[j][i] + 1
#Reversing the rows is necessary
hmap = hmap[::-1]
#print(hmap)
im = ax2.imshow(hmap)
Here's the result
None of these solutions worked for my application, so this is what I came up with. Essentially I am placing a 2D Gaussian at every single point:
import cv2
import numpy as np
import matplotlib.pyplot as plt
def getGaussian2D(ksize, sigma, norm=True):
oneD = cv2.getGaussianKernel(ksize=ksize, sigma=sigma)
twoD = np.outer(oneD.T, oneD)
return twoD / np.sum(twoD) if norm else twoD
def pt2heat(pts, shape, kernel=16, sigma=5):
heat = np.zeros(shape)
k = getGaussian2D(kernel, sigma)
for y,x in pts:
x, y = int(x), int(y)
for i in range(-kernel//2, kernel//2):
for j in range(-kernel//2, kernel//2):
if 0 <= x+i < shape[0] and 0 <= y+j < shape[1]:
heat[x+i, y+j] = heat[x+i, y+j] + k[i+kernel//2, j+kernel//2]
return heat
heat = pts2heat(pts, img.shape[:2])
plt.imshow(heat, cmap='heat')
Here are the points overlayed ontop of it's associated image, along with the resulting heat map:

Categories

Resources