Weighted sum of adjacent values in numpy array - python

What is the easiest/fastest way to take a weighted sum of values in a numpy array?
Example: Solving the heat equation with the Euler method
length_l=10
time_l=10
u=zeros((length_l,length_l))# (x,y)
u[:, 0]=1
u[:,-1]=1
print(u)
def dStep(ALPHA=0.1):
for position,value in ndenumerate(u):
D2u= (u[position+(1,0)]-2*value+u[position+(-1, 0)])/(1**2) \
+(u[position+(0,1)]-2*value+u[position+( 0,-1)])/(1**2)
value+=ALPHA*D2u()
while True:
dStep()
print(u)
D2u should be the second central difference in two dimensions. This would work if I could add indexes like (1,4)+(1,3)=(2,7). Unfortunately, python adds them as (1,4)+(1,3)=(1,4,1,3).
Note that computing D2u is equivalent to taking a dot product with this kernel centered around the current position:
0, 1, 0
1,-4, 1
0, 1, 0
Can this be vectorised as a dot product?

I think you want something like:
import numpy as np
from scipy.ndimage import convolve
length_l = 10
time_l = 10
u = np.zeros((length_l, length_l))# (x,y)
u[:, 0] = 1
u[:, -1] = 1
alpha = .1
weights = np.array([[ 0, 1, 0],
[ 1, -4, 1],
[ 0, 1, 0]])
for i in range(5):
u += alpha * convolve(u, weights)
print(u)
You could reduce down a bit by doing:
weights = alpha * weights
weights[1, 1] = weights[1, 1] + 1
for i in range(5):
u = convolve(u, weights)
print(u)

Related

ValueError: setting an array element with a sequence for generating a weighted data set?

This is the code I'm trying to run to generate a data set with 3 different sample populations, where one class is weighted by a combined Gaussian distribution with 2 sets of means and covariances -- hence the addition of the two multivariate normal rvs functions to feed into the indices of the 'blank' data set. Not sure what I can do to combine them without making it into a sequence?
N_valid = 10000
def generate_data_from_gmm(N, pdf_params, fig_ax=None):
# Determine dimensionality from mixture PDF parameters
n = pdf_params['mu'].shape[1]
print(n)
# Determine number of classes/mixture components
C = len(pdf_params['priors'])
# Output samples and labels
X = np.zeros([N, n])
labels = np.zeros(N)
# Decide randomly which samples will come from each component u_i ~ Uniform(0, 1) for i = 1, ..., N (or 0, ... , N-1 in code)
u = np.random.rand(N)
# Determine the thresholds based on the mixture weights/priors for the GMM, which need to sum up to 1
thresholds = np.cumsum(pdf_params['priors'])
thresholds = np.insert(thresholds, 0, 0) # For intervals of classes
marker_shapes = 'ox+*.' # Accomodates up to C=5
marker_colors = 'brgmy'
Y = np.array(range(1, C+1))
for y in Y:
# Get randomly sampled indices for this component
indices = np.argwhere((thresholds[y-1] <= u) & (u <= thresholds[y]))[:, 0]
# No. of samples in this component
Ny = len(indices)
labels[indices] = y * np.ones(Ny) - 1
if n == 1:
X[indices, 0] = norm.rvs(pdf_params['mu'][y-1], pdf_params['Sigma'][y-1], Ny)
else:
X[indices, :] = (multivariate_normal.rvs(pdf_params['mu'][y-1], pdf_params['Sigma'][y-1], Ny) + multivariate_normal.rvs(pdf_params['mu'][y], pdf_params['Sigma'][y], Ny))
gmm_pdf = {}
# Likelihood of each distribution to be selected AND class priors!!!
gmm_pdf['priors'] = np.array([0.65, 0.35])
gmm_pdf['mu'] = np.array([[3, 0],
[0, 3],
[2, 2]]) # Gaussian distributions means
gmm_pdf['Sigma'] = np.array([[[2, 0],
[0, 1]],
[[1, 0],
[0, 2]],
[1,0],
[0,1]]) # Gaussian distributions covariance matrices
This specifically happens in this line:
X[indices, :] = (multivariate_normal.rvs(pdf_params['mu'][y-1], pdf_params['Sigma'][y-1], Ny)
+ multivariate_normal.rvs(pdf_params['mu'][y], pdf_params['Sigma'][y], Ny))
Any ideas?

Is there a vectorized way to sample multiples times with np.random.choice() with differents p?

I'm trying to implement a variation ratio, and I need T samples from an array C, but each sample has different weights p_t.
I'm using this:
import numpy as np
from scipy import stats
batch_size = 1
T = 3
C = np.array(['A', 'B', 'C'])
# p_batch_T dimensions: (batch, sample, class)
p_batch_T = np.array([[[0.01, 0.98, 0.01],
[0.3, 0.15, 0.55],
[0.85, 0.1, 0.05]]])
def variation_ratio(C, p_T):
# This function works only with one sample from the batch.
Y_T = np.array([np.random.choice(C, size=1, p=p_t) for p_t in p_T]) # vectorize this
C_mode, frecuency = stats.mode(Y_T)
T = len(Y_T)
return 1.0 - (f/T)
def variation_ratio_batch(C, p_batch_T):
return np.array([variation_ratio(C, p_T) for p_T in p_batch_T]) # and vectorize this
Is there a way to implement these functions with any for?
In stead of sampling with the given distribution p_T, we can sample uniformly between [0,1] and compare that to the cumulative distribution:
Let's start with Y_T, say for p_T = p_batch_T[0]
cum_dist = p_batch_T.cumsum(axis=-1)
idx_T = (np.random.rand(len(C),1) < cum_dist[0]).argmax(-1)
Y_T = C[idx_T[...,None]]
_, f = stats.mode(Y_T) # here axis=0 is default
Now let take that to the variation_ratio_batch:
idx_T = (np.random.rand(len(p_batch_T), len(C),1) < cum_dist).argmax(-1)
Y = C[idx_T[...,None]]
f = stats.mode(Y, axis=1) # notice axis 0 is batch
out = 1 - (f/T)
You could do it this way:
First, create a 2D weights array of shape (T, len(C)) and take the cumulative sum:
n_rows = 5
n_cols = 3
weights = np.random.rand(n_rows, n_cols)
cum_weights = (weights / weights.sum(axis=1, keepdims=True)).cumsum(axis=1)
cum_weights might look like this:
array([[0.09048919, 0.58962127, 1. ],
[0.36333997, 0.58380885, 1. ],
[0.28761923, 0.63413879, 1. ],
[0.39446498, 0.98760834, 1. ],
[0.27862476, 0.79715149, 1. ]])
Next, we can compare cum_weights to the appropriately sized output of np.random.rand. By taking argmin, we find the index in each row where the random number generated is greater than the cumulative weight:
indices = (cum_weights < np.random.rand(n_rows, 1)).argmin(axis=1)
We can then use indices to index an array of values of shape (n_cols,), which is len(C) in your original example.
np.vectorize should work:
from functools import partial
import numpy as np
#partial(np.vectorize, excluded=['rng'], signature='(),(k)->()')
def choice_batched(rng, probs):
return rng.choice(a=probs.shape[-1], p=probs)
then
num_classes = 3
batch_size = 5
alpha = .5 # Dirichlet prior hyperparameter.
rng = np.random.default_rng()
probs = np.random.dirichlet(alpha=np.full(fill_value=alpha, shape=num_classes), size=batch_size)
# Check each row sums to 1.
assert np.allclose(probs.sum(axis=-1), 1)
print(choice_batched(rng, probs))
print(choice_batched(rng, probs))
print(choice_batched(rng, probs))
print(choice_batched(rng, probs))
gives
[2 0 0 0 1]
[1 0 0 0 1]
[2 0 2 0 1]
[1 0 0 0 0]
Here is my implementation of Quang's and gmds' solutions:
def sample(ws, k):
"""Weighted sample k elements along the last axis.
ws -- Tensor of probabilities, shape (*, n)
k -- Number of elements to sample.
Returns tensor of shape (*, k) with values in {0, ..., n-1}.
"""
assert np.allclose(ws.sum(-1), 1)
cs = ws.cumsum(-1)
ps = np.random.random(ws.shape[:-1] + (k,))
return (cs[..., None, :] < ps[..., None]).sum(-1)
Say we have some stuff
>>> stuff = array([[0, 1, 2],
[3, 4, 5],
[6, 7, 8]])
And some weights / sampling probabilities.
>>> ws = array([[0.41296038, 0.36070229, 0.22633733],
[0.37576672, 0.14518771, 0.47904557],
[0.14742326, 0.29182459, 0.56075215]])
And we want to sample 2 elements along each row. Then we do
>>> ids = sample(ws, 2)
[[2, 0],
[1, 2],
[2, 2]]
And we can retrieve the sampled values from stuff using np.take_along_axis:
>>> np.take_along_axis(stuff, ids)
[[2, 0],
[4, 5],
[8, 8]]
The code could be generalized to sampling along an axis other than the last one, but I got confused about broadcasting, so somebody else should have a stab at it!

Factoring a polynomial with respect to specific terms

import numpy as np
import sympy as sp
from sympy import *
init_printing()
uVars = list(symbols(', '.join([f'u{n}' for n in range(1, 3 + 1)])))
aVars = list(symbols(', '.join([f'a{n}' for n in range(1, 3 + 1)])))
lambda1, mu = symbols('lambda, mu')
U = np.array([ [0, -uVars[2], uVars[1]], [uVars[2], 0, -uVars[0]], [-uVars[1], uVars[0], 0] ])
a = np.array([ [aVars[0], 0, 0], [0, aVars[1], 0], [0, 0, aVars[2]] ])
I = np.eye(3)
L = a*lambda1 + U
preCharPoly = L - mu*I
preCharPoly_sym = sp.Matrix(preCharPoly)
factor(preCharPoly_sym.det())
The above code outputs the following polynomial:
However, I require the polynomial to be factored with respect to the variables lambda and mu as shown here:
I have been examining the documentation at https://docs.sympy.org/latest/modules/simplify/simplify.html but cannot figure out how to do what is desired. How do I specify factor() or simplify() to perform their tasks with respect to lambda and mu?

How to convert a rectangular matrix into a stochastic and irreducible matrix?

I have written the following code to convert a matrix into a stochastic and irreducible matrix. I have followed a paper (Deeper Inside PageRank) to write this code. This code works well for the square matrix but giving an error for rectangular matrices. How can I modify it to convert rectangular matrices into stochastic and irreducible matrices?
My Code:
import numpy as np
P = np.array([[0, 1/2, 1/2, 0, 0, 0], [0, 0, 0, 0, 0, 0], [1/3, 1/3, 0, 0, 1/3, 0], [0, 0, 0, 0, 1/2, 1/2], [0, 0, 0, 1/2, 0, 1/2]])
#P is the original matrix containing 0 rows
col_len = len(P[0])
row_len = len(P)
eT = np.ones(shape=(1, col_len)) # Row vector of ones to replace row of zeros
e = eT.transpose() # it is a column vector e
eT_n = np.array(eT / col_len) # obtained by dividing row vector of ones by order of matrix
Rsum = 0
for i in range(row_len):
for j in range(col_len):
Rsum = Rsum + P[i][j]
if Rsum == 0:
P[i] = eT_n
Rsum = 0
P_bar = P.astype(float) #P_bar is stochastic matrix obtained by replacing row of ones by eT_n in P
alpha = 0.85
P_dbar = alpha * P_bar + (1 - alpha) * e * (eT_n) #P_dbar is the irreducible matrix
print("The stocastic and irreducible matrix P_dbar is:\n", P_dbar)
Expected output:
A rectangular stochastic and irreducible matrix.
Actual output:
Traceback (most recent call last):
File "C:/Users/admin/PycharmProjects/Recommender/StochasticMatrix_11Aug19_BSK_v3.py", line 13, in <module>
P_dbar = alpha * P_bar + (1 - alpha) * e * (eT_n) #P_dbar is the irreducible matrix
ValueError: operands could not be broadcast together with shapes (5,6) (6,6)
You are trying to multiply two arrays of different shapes. That will not work, since one array has 30 elements, and the other has 36 elements.
You have to make sure the array e * eT_n has the same shape as your input array P.
You are not using the row_len value. But if e has the correct number of rows, your code will run.
# e = eT.transpose() # this will only work when the input array is square
e = np.ones(shape=(row_len, 1)) # this also works with a rectangular P
You can check that the shape is correct:
(e * eT_n).shape == P.shape
You should study the numpy documentation and tutorials to learn how to use the ndarray data structure. It's very powerful, but also quite different from the native python data types.
For example, you can replace this verbose and very slow nested python loop with a vectorized array operations.
Original code (with fixed indentation):
for i in range(row_len):
Rsum = 0
for j in range(col_len):
Rsum = Rsum + P[i][j]
if Rsum == 0:
P[i] = eT_n
Idiomatic numpy code:
P[P.sum(axis=1) == 0] = eT_n
Furthermore, you don't need to create the array eT_n. Since it's just a single value repeated, you can assign the scalar 1/6 directly instead.
# eT = np.ones(shape=(1, col_len))
# eT_n = np.array(eT / col_len)
P[P.sum(axis=1) == 0] = 1 / P.shape[1]

Pythonic way to vectorize double summation

I'm attempting to convert a double summation formula into code, but can't figure out the correct matrix/vector representation of it.
The first summation is i to n, and the second is over j > i to n.
I'm guessing there is a much more efficient & pythonic way of writing this?
I resorted to nested for loops to just get it working but, as expected, it runs very slowly with a large dataset:
def wapc_denom(weights, vols):
x = []
y = []
for i, wi in enumerate(weights):
for j, wj in enumerate(weights):
if j > i:
x.append(wi * wj * vols[i] * vols[j])
y.append(np.sum(x))
return np.sum(y)
Edit:
Using guidance from smci's answer I think I have a potential solution:
def wapc_denom2(weights, vols):
return np.sum(np.tril(np.outer(weights, vols.T)**2, k=-1))
Assuming you want to count every term only once (for that you have to move the x = [] into the outer loop) one cheap way of computing the sum would be
Create mock data
weights = np.random.random(10)
vols = np.random.random(10)
Do the calculation
wv = weights * vols
result = (wv.sum()**2 - wv#wv) / 2
Check that it's the same
def wapc_denom(weights, vols):
y = []
for i, wi in enumerate(weights):
x = []
for j, wj in enumerate(weights):
if j > i:
x.append(wi * wj * vols[i] * vols[j])
y.append(np.sum(x))
return np.sum(y)
assert np.allclose(result, wapc_denom(weights, vols))
Why does it work?
What we are doing is compute the sum of the full matrix, subtract the diagonal and divide by two. This is cheap because it is easy to verify that the sum of an outer product is just the product of the summed factors.
wi * wj * vols[i] * vols[j] is a telltale. vols is another vector, so first you want to compute the vector wv = w * vols
then (wj * vols[j]) * (wi * vols[i]) = wv^T * wv is your (matrix outer product) expression; that's a column vector * a row vector. But actually you only want the sum. So I don't see a need to construct a vector y.append(np.sum(x)), you're only going to sum it anyway np.sum(y)
also the if j > i part means you only want the sum of the Lower Triangular part, and exclude the diagonal.
EDIT: the result is fully determined just from wv, I didn't think we needed the matrix to get the sum, and we didn't need the diagonal; #PaulPanzer found the most compact expression.
You can use triangulations in numpy, check np.triu and np.meshgrid. Do:
np.product(np.triu(np.meshgrid(weights,weights), 1) * np.triu(np.meshgrid(vols,vols), 1),0).sum(1).cumsum().sum()
Example:
w = np.arange(4) +1
v = np.array([1,3,2,2])
print(np.triu(np.meshgrid(w,w), k=1))
>>array([[[0, 2, 3, 4],
[0, 0, 3, 4],
[0, 0, 0, 4],
[0, 0, 0, 0]],
[[0, 1, 1, 1],
[0, 0, 2, 2],
[0, 0, 0, 3],
[0, 0, 0, 0]]])
# example of product + triu + meshgrid (your x values):
print(np.product(np.triu(np.meshgrid(w,w), 1) * np.triu(np.meshgrid(v,v), 1),0))
>>array([[ 0, 6, 6, 8],
[ 0, 0, 36, 48],
[ 0, 0, 0, 48],
[ 0, 0, 0, 0]])
print(np.product(np.triu(np.meshgrid(w,w), 1) * np.triu(np.meshgrid(v,v), 1),0).sum(1).cumsum().sum())
>> 428
print(wapc_denom(w, v))
>> 428

Categories

Resources