How to plot vectors in python using matplotlib - python

I am taking a course on linear algebra and I want to visualize the vectors in action, such as vector addition, normal vector, so on.
For instance:
V = np.array([[1,1],[-2,2],[4,-7]])
In this case I want to plot 3 vectors V1 = (1,1), M2 = (-2,2), M3 = (4,-7).
Then I should be able to add V1,V2 to plot a new vector V12(all together in one figure).
when I use the following code, the plot is not as intended
import numpy as np
import matplotlib.pyplot as plt
M = np.array([[1,1],[-2,2],[4,-7]])
print("vector:1")
print(M[0,:])
# print("vector:2")
# print(M[1,:])
rows,cols = M.T.shape
print(cols)
for i,l in enumerate(range(0,cols)):
print("Iteration: {}-{}".format(i,l))
print("vector:{}".format(i))
print(M[i,:])
v1 = [0,0],[M[i,0],M[i,1]]
# v1 = [M[i,0]],[M[i,1]]
print(v1)
plt.figure(i)
plt.plot(v1)
plt.show()

How about something like
import numpy as np
import matplotlib.pyplot as plt
V = np.array([[1,1], [-2,2], [4,-7]])
origin = np.array([[0, 0, 0],[0, 0, 0]]) # origin point
plt.quiver(*origin, V[:,0], V[:,1], color=['r','b','g'], scale=21)
plt.show()
Then to add up any two vectors and plot them to the same figure, do so before you call plt.show(). Something like:
plt.quiver(*origin, V[:,0], V[:,1], color=['r','b','g'], scale=21)
v12 = V[0] + V[1] # adding up the 1st (red) and 2nd (blue) vectors
plt.quiver(*origin, v12[0], v12[1])
plt.show()
NOTE: in Python2 use origin[0], origin[1] instead of *origin

This may also be achieved using matplotlib.pyplot.quiver, as noted in the linked answer;
plt.quiver([0, 0, 0], [0, 0, 0], [1, -2, 4], [1, 2, -7], angles='xy', scale_units='xy', scale=1)
plt.xlim(-10, 10)
plt.ylim(-10, 10)
plt.show()

Your main problem is you create new figures in your loop, so each vector gets drawn on a different figure. Here's what I came up with, let me know if it's still not what you expect:
CODE:
import numpy as np
import matplotlib.pyplot as plt
M = np.array([[1,1],[-2,2],[4,-7]])
rows,cols = M.T.shape
#Get absolute maxes for axis ranges to center origin
#This is optional
maxes = 1.1*np.amax(abs(M), axis = 0)
for i,l in enumerate(range(0,cols)):
xs = [0,M[i,0]]
ys = [0,M[i,1]]
plt.plot(xs,ys)
plt.plot(0,0,'ok') #<-- plot a black point at the origin
plt.axis('equal') #<-- set the axes to the same scale
plt.xlim([-maxes[0],maxes[0]]) #<-- set the x axis limits
plt.ylim([-maxes[1],maxes[1]]) #<-- set the y axis limits
plt.legend(['V'+str(i+1) for i in range(cols)]) #<-- give a legend
plt.grid(b=True, which='major') #<-- plot grid lines
plt.show()
OUTPUT:
EDIT CODE:
import numpy as np
import matplotlib.pyplot as plt
M = np.array([[1,1],[-2,2],[4,-7]])
rows,cols = M.T.shape
#Get absolute maxes for axis ranges to center origin
#This is optional
maxes = 1.1*np.amax(abs(M), axis = 0)
colors = ['b','r','k']
for i,l in enumerate(range(0,cols)):
plt.axes().arrow(0,0,M[i,0],M[i,1],head_width=0.05,head_length=0.1,color = colors[i])
plt.plot(0,0,'ok') #<-- plot a black point at the origin
plt.axis('equal') #<-- set the axes to the same scale
plt.xlim([-maxes[0],maxes[0]]) #<-- set the x axis limits
plt.ylim([-maxes[1],maxes[1]]) #<-- set the y axis limits
plt.grid(b=True, which='major') #<-- plot grid lines
plt.show()
EDIT OUTPUT:

What did you expect the following to do?
v1 = [0,0],[M[i,0],M[i,1]]
v1 = [M[i,0]],[M[i,1]]
This is making two different tuples, and you overwrite what you did the first time... Anyway, matplotlib does not understand what a "vector" is in the sense you are using. You have to be explicit, and plot "arrows":
In [5]: ax = plt.axes()
In [6]: ax.arrow(0, 0, *v1, head_width=0.05, head_length=0.1)
Out[6]: <matplotlib.patches.FancyArrow at 0x114fc8358>
In [7]: ax.arrow(0, 0, *v2, head_width=0.05, head_length=0.1)
Out[7]: <matplotlib.patches.FancyArrow at 0x115bb1470>
In [8]: plt.ylim(-5,5)
Out[8]: (-5, 5)
In [9]: plt.xlim(-5,5)
Out[9]: (-5, 5)
In [10]: plt.show()
Result:

Thanks to everyone, each of your posts helped me a lot.
rbierman code was pretty straight for my question, I have modified a bit and created a function to plot vectors from given arrays. I'd love to see any suggestions to improve it further.
import numpy as np
import matplotlib.pyplot as plt
def plotv(M):
rows,cols = M.T.shape
print(rows,cols)
#Get absolute maxes for axis ranges to center origin
#This is optional
maxes = 1.1*np.amax(abs(M), axis = 0)
colors = ['b','r','k']
fig = plt.figure()
fig.suptitle('Vectors', fontsize=10, fontweight='bold')
ax = fig.add_subplot(111)
fig.subplots_adjust(top=0.85)
ax.set_title('Vector operations')
ax.set_xlabel('x')
ax.set_ylabel('y')
for i,l in enumerate(range(0,cols)):
# print(i)
plt.axes().arrow(0,0,M[i,0],M[i,1],head_width=0.2,head_length=0.1,zorder=3)
ax.text(M[i,0],M[i,1], str(M[i]), style='italic',
bbox={'facecolor':'red', 'alpha':0.5, 'pad':0.5})
plt.plot(0,0,'ok') #<-- plot a black point at the origin
# plt.axis('equal') #<-- set the axes to the same scale
plt.xlim([-maxes[0],maxes[0]]) #<-- set the x axis limits
plt.ylim([-maxes[1],maxes[1]]) #<-- set the y axis limits
plt.grid(b=True, which='major') #<-- plot grid lines
plt.show()
r = np.random.randint(4,size=[2,2])
print(r[0,:])
print(r[1,:])
r12 = np.add(r[0,:],r[1,:])
print(r12)
plotv(np.vstack((r,r12)))
Vector addition performed on random vectors

All nice solutions, borrowing and improvising for special case -> If you want to add a label near the arrowhead:
arr = [2,3]
txt = “Vector X”
ax.annotate(txt, arr)
ax.arrow(0, 0, *arr, head_width=0.05, head_length=0.1)

In order to match the vector lenght and angle with the x,y coordinates of the plot, you can use to following options to plt.quiver:
plt.figure(figsize=(5,2), dpi=100)
plt.quiver(0,0,250,100, angles='xy', scale_units='xy', scale=1)
plt.xlim(0,250)
plt.ylim(0,100)

Quiver is a good method once you figure out its annoying nuances, like not plotting vectors in their original scales. To do as far as I can tell you must pass these params to quiver call as many have pointed out: angles='xy', scale_units='xy', scale=1 AND you should set your plt.xlim and plt.ylim such that you get a square or near square grid. That is the only way I have gotten it to consistently plot the way I want. For instance passing a origin as *[0,0] and U, V as *[5,3] means the resulting plot should be a vector centered at 0,0 origin that goes over 5 units to the right on the x-axis and 3 units up on the y-axis.

Related

Python matplotlib polar coordinate is not plotting as it is supposed to be

I am plotting from a CSV file that contains Cartesian coordinates and I want to change it to Polar coordinates, then plot using the Polar coordinates.
Here is the code
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import seaborn as sns
df = pd.read_csv('test_for_plotting.csv',index_col = 0)
x_temp = df['x'].values
y_temp = df['y'].values
df['radius'] = np.sqrt( np.power(x_temp,2) + np.power(y_temp,2) )
df['theta'] = np.arctan2(y_temp,x_temp)
df['degrees'] = np.degrees(df['theta'].values)
df['radians'] = np.radians(df['degrees'].values)
ax = plt.axes(polar = True)
ax.set_aspect('equal')
ax.axis("off")
sns.set(rc={'axes.facecolor':'white', 'figure.facecolor':'white','figure.figsize':(10,10)})
# sns.scatterplot(data = df, x = 'x',y = 'y', s= 1,alpha = 0.1, color = 'black',ax = ax)
sns.scatterplot(data = df, x = 'radians',y = 'radius', s= 1,alpha = 0.1, color = 'black',ax = ax)
plt.tight_layout()
plt.show()
Here is the dataset
If you run this command using polar = False and use this line to plot sns.scatterplot(data = df, x = 'x',y = 'y', s= 1,alpha = 0.1, color = 'black',ax = ax) it will result in this picture
now after setting polar = True and run this line to plot sns.scatterplot(data = df, x = 'radians',y = 'radius', s= 1,alpha = 0.1, color = 'black',ax = ax) It is supposed to give you this
But it is not working as if you run the actual code the shape in the Polar format is the same as Cartesian which does not make sense and it does not match the picture I showed you for polar (If you are wondering where did I get the second picture from, I plotted it using R)
I would appreciate your help and insights and thanks in advance!
For a polar plot, the "x-axis" represents the angle in radians. So, you need to switch x and y, and convert the angles to radians (I also added ax=ax, as the axes was created explicitly):
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import seaborn as sns
data = {'radius': [0, 0.5, 1, 1.5, 2, 2.5], 'degrees': [0, 25, 75, 155, 245, 335]}
df_temp = pd.DataFrame(data)
ax = plt.axes(polar=True)
sns.scatterplot(x=np.radians(df_temp['degrees']), y=df_temp['radius'].to_numpy(),
s=100, alpha=1, color='black', ax=ax)
for deg, y in zip(df_temp['degrees'], df_temp['radius']):
x = np.radians(deg)
ax.axvline(x, color='skyblue', ls=':')
ax.text(x, y, f' {deg}', color='crimson')
ax.set_rlabel_position(-15) # Move radial labels away from plotted dots
plt.tight_layout()
plt.show()
About your new question: if you have an xy plot, and you convert these xy values to polar coordinates, and then plot these on a polar plot, you'll get again the same plot.
After some more testing with the data, I decided to create the plot directly with matplotlib, as seaborn makes some changes that don't have exactly equal effects across seaborn and matplotlib versions.
What seems to be happening in R:
The angles (given by "x") are spread out to fill the range (0,2 pi). This either requires a rescaling of x, or change how the x-values are mapped to angles. One way to get this, is subtracting the minimum. And with that result divide by the new maximum and multiply by 2 pi.
The 0 of the angles it at the top, and the angles go clockwise.
The following code should create the plot with Python. You might want to experiment with alpha and with s in the scatter plot options. (Default the scatter dots get an outline, which often isn't desired when working with very small dots, and can be removed by lw=0.)
ax = plt.axes(polar=True)
ax.set_aspect('equal')
ax.axis('off')
x_temp = df['x'].to_numpy()
y_temp = df['y'].to_numpy()
x_temp -= x_temp.min()
x_temp = x_temp / x_temp.max() * 2 * np.pi
ax.scatter(x=x_temp, y=y_temp, s=0.05, alpha=1, color='black', lw=0)
ax.set_rlim(y_temp.min(), y_temp.max())
ax.set_theta_zero_location("N") # set zero at the north (top)
ax.set_theta_direction(-1) # go clockwise
plt.show()
At the left the resulting image, at the right using the y-values for coloring (ax.scatter(..., c=y_temp, s=0.05, alpha=1, cmap='plasma_r', lw=0)):

Python pcolormesh with separate alpha value for each bin

Lets say I have the following dataset:
import numpy as np
import matplotlib.pyplot as plt
x_bins = np.arange(10)
y_bins = np.arange(10)
z = np.random.random((9,9))
I can easily plot this data with
plt.pcolormesh(x_bins, y_bins, z, cmap = 'viridis)
However, let's say I now add some alpha value for each point:
a = np.random.random((9,9))
How can I change the alpha value of each box in the pcolormesh plot to match the corresponding value in array "a"?
The mesh created by pcolormesh can only have one alpha for the complete mesh. To set an individual alpha for each cell, the cells need to be created one by one as rectangles.
The code below shows the pcolormesh without alpha at the left, and the mesh of rectangles with alpha at the right. Note that on the spots where the rectangles touch, the semi-transparency causes some unequal overlap. This can be mitigated by not drawing the cell edge (edgecolor='none'), or by longer black lines to separate the cells.
The code below changes the x dimension so easier verify that x and y aren't mixed up. relim and autoscale are needed because with matplotlib's default behavior the x and y limits aren't changed by adding patches.
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.patches import Rectangle, Patch
x_bins = np.arange(12)
y_bins = np.arange(10)
z = np.random.random((9, 11))
a = np.random.random((9, 11))
cmap = plt.get_cmap('inferno')
norm = plt.Normalize(z.min(), z.max())
fig, (ax1, ax2) = plt.subplots(ncols=2)
ax1.pcolormesh(x_bins, y_bins, z, cmap=cmap, norm=norm)
for i in range(len(x_bins) - 1):
for j in range(len(y_bins) - 1):
rect = Rectangle((x_bins[i], y_bins[j]), x_bins[i + 1] - x_bins[i], y_bins[j + 1] - y_bins[j],
facecolor=cmap(norm(z[j, i])), alpha=a[j, i], edgecolor='none')
ax2.add_patch(rect)
# ax2.vlines(x_bins, y_bins.min(), y_bins.max(), edgecolor='black')
# ax2.hlines(y_bins, x_bins.min(), x_bins.max(), edgecolor='black')
ax2.relim()
ax2.autoscale(enable=True, tight=True)
plt.show()

Shade 'cells' in polar plot with matplotlib

I've got a bunch of regularly distributed points (θ = n*π/6, r=1...8), each having a value in [0, 1]. I can plot them with their values in matplotlib using
polar(thetas, rs, c=values)
But rather then having just a meagre little dot I'd like to shade the corresponding 'cell' (ie. everything until halfway to the adjacent points) with the colour corresponding to the point's value:
(Note that here my values are just [0, .5, 1], in really they will be everything between 0 and 1. Is there any straight-forward way of realising this (or something close enough) with matplotlib? Maybe it's easier to think about it as a 2D-histogram?
This can be done quite nicely by treating it as a polar stacked barchart:
import matplotlib.pyplot as plt
import numpy as np
from random import choice
fig = plt.figure()
ax = fig.add_axes([0.1, 0.1, 0.8, 0.8], polar=True)
for i in xrange(12*8):
color = choice(['navy','maroon','lightgreen'])
ax.bar(i * 2 * np.pi / 12, 1, width=2 * np.pi / 12, bottom=i / 12,
color=color, edgecolor = color)
plt.ylim(0,10)
ax.set_yticks([])
plt.show()
Produces:
Sure! Just use pcolormesh on a polar axes.
E.g.
import matplotlib.pyplot as plt
import numpy as np
# Generate some data...
# Note that all of these are _2D_ arrays, so that we can use meshgrid
# You'll need to "grid" your data to use pcolormesh if it's un-ordered points
theta, r = np.mgrid[0:2*np.pi:20j, 0:1:10j]
z = np.random.random(theta.size).reshape(theta.shape)
fig, (ax1, ax2) = plt.subplots(ncols=2, subplot_kw=dict(projection='polar'))
ax1.scatter(theta.flatten(), r.flatten(), c=z.flatten())
ax1.set_title('Scattered Points')
ax2.pcolormesh(theta, r, z)
ax2.set_title('Cells')
for ax in [ax1, ax2]:
ax.set_ylim([0, 1])
ax.set_yticklabels([])
plt.show()
If your data isn't already on a regular grid, then you'll need to grid it to use pcolormesh.
It looks like it's on a regular grid from your plot, though. In that case, gridding it is quite simple. If it's already ordered, it may be as simple as calling reshape. Otherwise, a simple loop or exploiting numpy.histogram2d with your z values as weights will do what you need.
Well, it's fairly unpolished overall, but here's a version that rounds out the sections.
from matplotlib.pylab import *
ax = subplot(111, projection='polar')
# starts grid and colors
th = array([pi/6 * n for n in range(13)]) # so n = 0..12, allowing for full wrapping
r = array(range(9)) # r = 0..8
c = array([[random_integers(0, 10)/10 for y in range(th.size)] for x in range(r.size)])
# The smoothing
TH = cbook.simple_linear_interpolation(th, 10)
# Properly padding out C so the colors go with the right sectors (can't remember the proper word for such segments of wedges)
# A much more elegant version could probably be created using stuff from itertools or functools
C = zeros((r.size, TH.size))
oldfill = 0
TH_ = TH.tolist()
for i in range(th.size):
fillto = TH_.index(th[i])
for j, x in enumerate(c[:,i]):
C[j, oldfill:fillto].fill(x)
oldfill = fillto
# The plotting
th, r = meshgrid(TH, r)
ax.pcolormesh(th, r, C)
show()

Pyplot - rescaling y axis after limiting x axis

I'm trying to plot some data using pyplot, and then 'zoom in' by using xlim() the x axis. However, the new plot doesn't rescale the y axis when I do this - am I doing something wrong?
Example - in this code, the plot y-axis range still takes a maximum of 20, rather than 10.:
from pylab import *
x = range(20)
y = range(20)
xlim(0,10)
autoscale(enable=True, axis='y', tight=None)
scatter(x,y)
show()
close()
Realize this is an ancient question, but this is how I've (messily) gotten around the issue:
use .plot() instead of .scatter()
access plot data later (even after a figure is returned somewhere) with ax.get_lines()[0].get_xydata()
use that data to rescale y axis to xlims
Snippet should work:
import matplotlib.pyplot as plt
import numpy as np
fig = plt.figure()
ax = fig.add_subplot(1, 1, 1)
x = range(20)
y = range(20)
xlims = [0, 10]
ax.set_xlim(xlims)
ax.plot(x, y, marker='.', ls='')
# pull plot data
data = ax.get_lines()[0].get_xydata()
# cut out data in xlims window
data = data[np.logical_and(data[:, 0] >= xlims[0], data[:, 0] <= xlims[1])]
# rescale y
ax.set_ylim(np.min(data[:, 1]), np.max(data[:, 1]))
plt.show()
I don't know, though you could try manually filtering the points with
scatter([(a,b) for a,b in zip(x,y) if a>0 and a<10])

Generate a heatmap using a scatter data set

I have a set of X,Y data points (about 10k) that are easy to plot as a scatter plot but that I would like to represent as a heatmap.
I looked through the examples in Matplotlib and they all seem to already start with heatmap cell values to generate the image.
Is there a method that converts a bunch of x, y, all different, to a heatmap (where zones with higher frequency of x, y would be "warmer")?
If you don't want hexagons, you can use numpy's histogram2d function:
import numpy as np
import numpy.random
import matplotlib.pyplot as plt
# Generate some test data
x = np.random.randn(8873)
y = np.random.randn(8873)
heatmap, xedges, yedges = np.histogram2d(x, y, bins=50)
extent = [xedges[0], xedges[-1], yedges[0], yedges[-1]]
plt.clf()
plt.imshow(heatmap.T, extent=extent, origin='lower')
plt.show()
This makes a 50x50 heatmap. If you want, say, 512x384, you can put bins=(512, 384) in the call to histogram2d.
Example:
In Matplotlib lexicon, i think you want a hexbin plot.
If you're not familiar with this type of plot, it's just a bivariate histogram in which the xy-plane is tessellated by a regular grid of hexagons.
So from a histogram, you can just count the number of points falling in each hexagon, discretiize the plotting region as a set of windows, assign each point to one of these windows; finally, map the windows onto a color array, and you've got a hexbin diagram.
Though less commonly used than e.g., circles, or squares, that hexagons are a better choice for the geometry of the binning container is intuitive:
hexagons have nearest-neighbor symmetry (e.g., square bins don't,
e.g., the distance from a point on a square's border to a point
inside that square is not everywhere equal) and
hexagon is the highest n-polygon that gives regular plane
tessellation (i.e., you can safely re-model your kitchen floor with hexagonal-shaped tiles because you won't have any void space between the tiles when you are finished--not true for all other higher-n, n >= 7, polygons).
(Matplotlib uses the term hexbin plot; so do (AFAIK) all of the plotting libraries for R; still i don't know if this is the generally accepted term for plots of this type, though i suspect it's likely given that hexbin is short for hexagonal binning, which is describes the essential step in preparing the data for display.)
from matplotlib import pyplot as PLT
from matplotlib import cm as CM
from matplotlib import mlab as ML
import numpy as NP
n = 1e5
x = y = NP.linspace(-5, 5, 100)
X, Y = NP.meshgrid(x, y)
Z1 = ML.bivariate_normal(X, Y, 2, 2, 0, 0)
Z2 = ML.bivariate_normal(X, Y, 4, 1, 1, 1)
ZD = Z2 - Z1
x = X.ravel()
y = Y.ravel()
z = ZD.ravel()
gridsize=30
PLT.subplot(111)
# if 'bins=None', then color of each hexagon corresponds directly to its count
# 'C' is optional--it maps values to x-y coordinates; if 'C' is None (default) then
# the result is a pure 2D histogram
PLT.hexbin(x, y, C=z, gridsize=gridsize, cmap=CM.jet, bins=None)
PLT.axis([x.min(), x.max(), y.min(), y.max()])
cb = PLT.colorbar()
cb.set_label('mean value')
PLT.show()
Edit: For a better approximation of Alejandro's answer, see below.
I know this is an old question, but wanted to add something to Alejandro's anwser: If you want a nice smoothed image without using py-sphviewer you can instead use np.histogram2d and apply a gaussian filter (from scipy.ndimage.filters) to the heatmap:
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.cm as cm
from scipy.ndimage.filters import gaussian_filter
def myplot(x, y, s, bins=1000):
heatmap, xedges, yedges = np.histogram2d(x, y, bins=bins)
heatmap = gaussian_filter(heatmap, sigma=s)
extent = [xedges[0], xedges[-1], yedges[0], yedges[-1]]
return heatmap.T, extent
fig, axs = plt.subplots(2, 2)
# Generate some test data
x = np.random.randn(1000)
y = np.random.randn(1000)
sigmas = [0, 16, 32, 64]
for ax, s in zip(axs.flatten(), sigmas):
if s == 0:
ax.plot(x, y, 'k.', markersize=5)
ax.set_title("Scatter plot")
else:
img, extent = myplot(x, y, s)
ax.imshow(img, extent=extent, origin='lower', cmap=cm.jet)
ax.set_title("Smoothing with $\sigma$ = %d" % s)
plt.show()
Produces:
The scatter plot and s=16 plotted on top of eachother for Agape Gal'lo (click for better view):
One difference I noticed with my gaussian filter approach and Alejandro's approach was that his method shows local structures much better than mine. Therefore I implemented a simple nearest neighbour method at pixel level. This method calculates for each pixel the inverse sum of the distances of the n closest points in the data. This method is at a high resolution pretty computationally expensive and I think there's a quicker way, so let me know if you have any improvements.
Update: As I suspected, there's a much faster method using Scipy's scipy.cKDTree. See Gabriel's answer for the implementation.
Anyway, here's my code:
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.cm as cm
def data_coord2view_coord(p, vlen, pmin, pmax):
dp = pmax - pmin
dv = (p - pmin) / dp * vlen
return dv
def nearest_neighbours(xs, ys, reso, n_neighbours):
im = np.zeros([reso, reso])
extent = [np.min(xs), np.max(xs), np.min(ys), np.max(ys)]
xv = data_coord2view_coord(xs, reso, extent[0], extent[1])
yv = data_coord2view_coord(ys, reso, extent[2], extent[3])
for x in range(reso):
for y in range(reso):
xp = (xv - x)
yp = (yv - y)
d = np.sqrt(xp**2 + yp**2)
im[y][x] = 1 / np.sum(d[np.argpartition(d.ravel(), n_neighbours)[:n_neighbours]])
return im, extent
n = 1000
xs = np.random.randn(n)
ys = np.random.randn(n)
resolution = 250
fig, axes = plt.subplots(2, 2)
for ax, neighbours in zip(axes.flatten(), [0, 16, 32, 64]):
if neighbours == 0:
ax.plot(xs, ys, 'k.', markersize=2)
ax.set_aspect('equal')
ax.set_title("Scatter Plot")
else:
im, extent = nearest_neighbours(xs, ys, resolution, neighbours)
ax.imshow(im, origin='lower', extent=extent, cmap=cm.jet)
ax.set_title("Smoothing over %d neighbours" % neighbours)
ax.set_xlim(extent[0], extent[1])
ax.set_ylim(extent[2], extent[3])
plt.show()
Result:
Instead of using np.hist2d, which in general produces quite ugly histograms, I would like to recycle py-sphviewer, a python package for rendering particle simulations using an adaptive smoothing kernel and that can be easily installed from pip (see webpage documentation). Consider the following code, which is based on the example:
import numpy as np
import numpy.random
import matplotlib.pyplot as plt
import sphviewer as sph
def myplot(x, y, nb=32, xsize=500, ysize=500):
xmin = np.min(x)
xmax = np.max(x)
ymin = np.min(y)
ymax = np.max(y)
x0 = (xmin+xmax)/2.
y0 = (ymin+ymax)/2.
pos = np.zeros([len(x),3])
pos[:,0] = x
pos[:,1] = y
w = np.ones(len(x))
P = sph.Particles(pos, w, nb=nb)
S = sph.Scene(P)
S.update_camera(r='infinity', x=x0, y=y0, z=0,
xsize=xsize, ysize=ysize)
R = sph.Render(S)
R.set_logscale()
img = R.get_image()
extent = R.get_extent()
for i, j in zip(xrange(4), [x0,x0,y0,y0]):
extent[i] += j
print extent
return img, extent
fig = plt.figure(1, figsize=(10,10))
ax1 = fig.add_subplot(221)
ax2 = fig.add_subplot(222)
ax3 = fig.add_subplot(223)
ax4 = fig.add_subplot(224)
# Generate some test data
x = np.random.randn(1000)
y = np.random.randn(1000)
#Plotting a regular scatter plot
ax1.plot(x,y,'k.', markersize=5)
ax1.set_xlim(-3,3)
ax1.set_ylim(-3,3)
heatmap_16, extent_16 = myplot(x,y, nb=16)
heatmap_32, extent_32 = myplot(x,y, nb=32)
heatmap_64, extent_64 = myplot(x,y, nb=64)
ax2.imshow(heatmap_16, extent=extent_16, origin='lower', aspect='auto')
ax2.set_title("Smoothing over 16 neighbors")
ax3.imshow(heatmap_32, extent=extent_32, origin='lower', aspect='auto')
ax3.set_title("Smoothing over 32 neighbors")
#Make the heatmap using a smoothing over 64 neighbors
ax4.imshow(heatmap_64, extent=extent_64, origin='lower', aspect='auto')
ax4.set_title("Smoothing over 64 neighbors")
plt.show()
which produces the following image:
As you see, the images look pretty nice, and we are able to identify different substructures on it. These images are constructed spreading a given weight for every point within a certain domain, defined by the smoothing length, which in turns is given by the distance to the closer nb neighbor (I've chosen 16, 32 and 64 for the examples). So, higher density regions typically are spread over smaller regions compared to lower density regions.
The function myplot is just a very simple function that I've written in order to give the x,y data to py-sphviewer to do the magic.
If you are using 1.2.x
import numpy as np
import matplotlib.pyplot as plt
x = np.random.randn(100000)
y = np.random.randn(100000)
plt.hist2d(x,y,bins=100)
plt.show()
Seaborn now has the jointplot function which should work nicely here:
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
# Generate some test data
x = np.random.randn(8873)
y = np.random.randn(8873)
sns.jointplot(x=x, y=y, kind='hex')
plt.show()
Here's Jurgy's great nearest neighbour approach but implemented using scipy.cKDTree. In my tests it's about 100x faster.
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.cm as cm
from scipy.spatial import cKDTree
def data_coord2view_coord(p, resolution, pmin, pmax):
dp = pmax - pmin
dv = (p - pmin) / dp * resolution
return dv
n = 1000
xs = np.random.randn(n)
ys = np.random.randn(n)
resolution = 250
extent = [np.min(xs), np.max(xs), np.min(ys), np.max(ys)]
xv = data_coord2view_coord(xs, resolution, extent[0], extent[1])
yv = data_coord2view_coord(ys, resolution, extent[2], extent[3])
def kNN2DDens(xv, yv, resolution, neighbours, dim=2):
"""
"""
# Create the tree
tree = cKDTree(np.array([xv, yv]).T)
# Find the closest nnmax-1 neighbors (first entry is the point itself)
grid = np.mgrid[0:resolution, 0:resolution].T.reshape(resolution**2, dim)
dists = tree.query(grid, neighbours)
# Inverse of the sum of distances to each grid point.
inv_sum_dists = 1. / dists[0].sum(1)
# Reshape
im = inv_sum_dists.reshape(resolution, resolution)
return im
fig, axes = plt.subplots(2, 2, figsize=(15, 15))
for ax, neighbours in zip(axes.flatten(), [0, 16, 32, 63]):
if neighbours == 0:
ax.plot(xs, ys, 'k.', markersize=5)
ax.set_aspect('equal')
ax.set_title("Scatter Plot")
else:
im = kNN2DDens(xv, yv, resolution, neighbours)
ax.imshow(im, origin='lower', extent=extent, cmap=cm.Blues)
ax.set_title("Smoothing over %d neighbours" % neighbours)
ax.set_xlim(extent[0], extent[1])
ax.set_ylim(extent[2], extent[3])
plt.savefig('new.png', dpi=150, bbox_inches='tight')
and the initial question was... how to convert scatter values to grid values, right?
histogram2d does count the frequency per cell, however, if you have other data per cell than just the frequency, you'd need some additional work to do.
x = data_x # between -10 and 4, log-gamma of an svc
y = data_y # between -4 and 11, log-C of an svc
z = data_z #between 0 and 0.78, f1-values from a difficult dataset
So, I have a dataset with Z-results for X and Y coordinates. However, I was calculating few points outside the area of interest (large gaps), and heaps of points in a small area of interest.
Yes here it becomes more difficult but also more fun. Some libraries (sorry):
from matplotlib import pyplot as plt
from matplotlib import cm
import numpy as np
from scipy.interpolate import griddata
pyplot is my graphic engine today,
cm is a range of color maps with some initeresting choice.
numpy for the calculations,
and griddata for attaching values to a fixed grid.
The last one is important especially because the frequency of xy points is not equally distributed in my data. First, let's start with some boundaries fitting to my data and an arbitrary grid size. The original data has datapoints also outside those x and y boundaries.
#determine grid boundaries
gridsize = 500
x_min = -8
x_max = 2.5
y_min = -2
y_max = 7
So we have defined a grid with 500 pixels between the min and max values of x and y.
In my data, there are lots more than the 500 values available in the area of high interest; whereas in the low-interest-area, there are not even 200 values in the total grid; between the graphic boundaries of x_min and x_max there are even less.
So for getting a nice picture, the task is to get an average for the high interest values and to fill the gaps elsewhere.
I define my grid now. For each xx-yy pair, i want to have a color.
xx = np.linspace(x_min, x_max, gridsize) # array of x values
yy = np.linspace(y_min, y_max, gridsize) # array of y values
grid = np.array(np.meshgrid(xx, yy.T))
grid = grid.reshape(2, grid.shape[1]*grid.shape[2]).T
Why the strange shape? scipy.griddata wants a shape of (n, D).
Griddata calculates one value per point in the grid, by a predefined method.
I choose "nearest" - empty grid points will be filled with values from the nearest neighbor. This looks as if the areas with less information have bigger cells (even if it is not the case). One could choose to interpolate "linear", then areas with less information look less sharp. Matter of taste, really.
points = np.array([x, y]).T # because griddata wants it that way
z_grid2 = griddata(points, z, grid, method='nearest')
# you get a 1D vector as result. Reshape to picture format!
z_grid2 = z_grid2.reshape(xx.shape[0], yy.shape[0])
And hop, we hand over to matplotlib to display the plot
fig = plt.figure(1, figsize=(10, 10))
ax1 = fig.add_subplot(111)
ax1.imshow(z_grid2, extent=[x_min, x_max,y_min, y_max, ],
origin='lower', cmap=cm.magma)
ax1.set_title("SVC: empty spots filled by nearest neighbours")
ax1.set_xlabel('log gamma')
ax1.set_ylabel('log C')
plt.show()
Around the pointy part of the V-Shape, you see I did a lot of calculations during my search for the sweet spot, whereas the less interesting parts almost everywhere else have a lower resolution.
Make a 2-dimensional array that corresponds to the cells in your final image, called say heatmap_cells and instantiate it as all zeroes.
Choose two scaling factors that define the difference between each array element in real units, for each dimension, say x_scale and y_scale. Choose these such that all your datapoints will fall within the bounds of the heatmap array.
For each raw datapoint with x_value and y_value:
heatmap_cells[floor(x_value/x_scale),floor(y_value/y_scale)]+=1
Very similar to #Piti's answer, but using 1 call instead of 2 to generate the points:
import numpy as np
import matplotlib.pyplot as plt
pts = 1000000
mean = [0.0, 0.0]
cov = [[1.0,0.0],[0.0,1.0]]
x,y = np.random.multivariate_normal(mean, cov, pts).T
plt.hist2d(x, y, bins=50, cmap=plt.cm.jet)
plt.show()
Output:
Here's one I made on a 1 Million point set with 3 categories (colored Red, Green, and Blue). Here's a link to the repository if you'd like to try the function. Github Repo
histplot(
X,
Y,
labels,
bins=2000,
range=((-3,3),(-3,3)),
normalize_each_label=True,
colors = [
[1,0,0],
[0,1,0],
[0,0,1]],
gain=50)
I'm afraid I'm a little late to the party but I had a similar question a while ago. The accepted answer (by #ptomato) helped me out but I'd also want to post this in case it's of use to someone.
''' I wanted to create a heatmap resembling a football pitch which would show the different actions performed '''
import numpy as np
import matplotlib.pyplot as plt
import random
#fixing random state for reproducibility
np.random.seed(1234324)
fig = plt.figure(12)
ax1 = fig.add_subplot(121)
ax2 = fig.add_subplot(122)
#Ratio of the pitch with respect to UEFA standards
hmap= np.full((6, 10), 0)
#print(hmap)
xlist = np.random.uniform(low=0.0, high=100.0, size=(20))
ylist = np.random.uniform(low=0.0, high =100.0, size =(20))
#UEFA Pitch Standards are 105m x 68m
xlist = (xlist/100)*10.5
ylist = (ylist/100)*6.5
ax1.scatter(xlist,ylist)
#int of the co-ordinates to populate the array
xlist_int = xlist.astype (int)
ylist_int = ylist.astype (int)
#print(xlist_int, ylist_int)
for i, j in zip(xlist_int, ylist_int):
#this populates the array according to the x,y co-ordinate values it encounters
hmap[j][i]= hmap[j][i] + 1
#Reversing the rows is necessary
hmap = hmap[::-1]
#print(hmap)
im = ax2.imshow(hmap)
Here's the result
None of these solutions worked for my application, so this is what I came up with. Essentially I am placing a 2D Gaussian at every single point:
import cv2
import numpy as np
import matplotlib.pyplot as plt
def getGaussian2D(ksize, sigma, norm=True):
oneD = cv2.getGaussianKernel(ksize=ksize, sigma=sigma)
twoD = np.outer(oneD.T, oneD)
return twoD / np.sum(twoD) if norm else twoD
def pt2heat(pts, shape, kernel=16, sigma=5):
heat = np.zeros(shape)
k = getGaussian2D(kernel, sigma)
for y,x in pts:
x, y = int(x), int(y)
for i in range(-kernel//2, kernel//2):
for j in range(-kernel//2, kernel//2):
if 0 <= x+i < shape[0] and 0 <= y+j < shape[1]:
heat[x+i, y+j] = heat[x+i, y+j] + k[i+kernel//2, j+kernel//2]
return heat
heat = pts2heat(pts, img.shape[:2])
plt.imshow(heat, cmap='heat')
Here are the points overlayed ontop of it's associated image, along with the resulting heat map:

Categories

Resources