python PCA dimensionality reduction

python PCA dimensionality reduction - python

i'm learning using PCA to finish dimensionality reduction (Python3.6) but i've got very similar but different results when using different methods here's my code
from numpy import *
from sklearn.decomposition import PCA
data_set = [[-1., -2.],
[-1., 0.],
[0., 0.],
[2., 1.],
[0., 1.]]
# 1
pca_sk = PCA(n_components=1)
newmat = pca_sk.fit_transform(data_set)
print(newmat)
# 2
meanVals = mean(data_set, axis=0)
meanRemoved = data_set - meanVals
covMat = cov(meanRemoved, rowvar=0)
eigVals, eigVects = linalg.eig(mat(covMat))
eigValInd = argsort(eigVals)
eigValInd = eigValInd[:-(1 + 1):-1]
redEigVects = eigVects[:, eigValInd]
lowDDataMat = meanRemoved * redEigVects
print(lowDDataMat)
the first one output
[[ 2.12132034]
[ 0.70710678]
[-0. ]
[-2.12132034]
[-0.70710678]]
but anothor output
[[-2.12132034]
[-0.70710678]
[ 0. ]
[ 2.12132034]
[ 0.70710678]]
why dose it happen

Related

is there a way to trace grads through self.put_variable method in flax?

I would like to trace the grads through the self.put_variable. Is there anyway to make that possible? Or another way to update the param supplied to the module that is traced?
import jax
from jax import numpy as jnp
from jax import grad,random,jit,vmap
import flax
from flax import linen as nn
class network(nn.Module):
input_size : int
output_size : int
#nn.compact
def __call__(self,x):
W = self.param('W',nn.initializers.normal(),(self.input_size,self.output_size))
b = self.param('b',nn.initializers.normal(),(self.output_size,))
self.put_variable("params","b",(x#W+b).reshape(5,))
return jnp.sum(x+b)
if __name__ == "__main__":
key = random.PRNGKey(0)
key_x,key_param,key = random.split(key,3)
x = random.normal(key_x,(1,5))
module = network(5,5)
param = module.init(key_param,x)
print(param)
#x,param = module.apply(param,x,mutable=["params"])
#print(param)
print(grad(module.apply,has_aux=True)(param,x,mutable=["params"]))
my output grads are :
FrozenDict({
params: {
W: DeviceArray([[0., 0., 0., 0., 0.],
[0., 0., 0., 0., 0.],
[0., 0., 0., 0., 0.],
[0., 0., 0., 0., 0.],
[0., 0., 0., 0., 0.]], dtype=float32),
b: DeviceArray([1., 1., 1., 1., 1.], dtype=float32),
},
What shows that it doesnt trace the grads through the self.variable_put method, as grads to W are all zero, while b clearly relies upon W.

Just like #jakevdp noted the test above is incorrect as b is still tied to the previous b.
https://github.com/google/flax/discussions/2215 said that self.put_variable is traced.
Testing if that is actually the case using the code below:
import jax
from jax import numpy as jnp
from jax import grad,random,jit,vmap
import flax
from flax import linen as nn
class network(nn.Module):
input_size : int
output_size : int
#nn.compact
def __call__(self,x):
W = self.param('W',nn.initializers.normal(),(self.input_size,self.output_size))
b = self.param('b',nn.initializers.normal(),(self.output_size,))
b = x#W+b #update the b variable else it is still tied to the previous one.
self.put_variable("params","b",(b).reshape(5,))
return jnp.sum(x+b)
def test_update(param,x):
_, param = module.apply(param,x,mutable=["params"])
return jnp.sum(param["params"]["b"]+x),param
if __name__ == "__main__":
key = random.PRNGKey(0)
key_x,key_param,key = random.split(key,3)
x = random.normal(key_x,(1,5))
module = network(5,5)
param = module.init(key_param,x)
print(param)
print(grad(test_update,has_aux=True)(param,x))
output:
FrozenDict({
params: {
W: DeviceArray([[ 0.01678762, 0.00234134, 0.00906202, 0.00027337,
0.00599653],
[-0.00729604, -0.00417799, 0.00172333, -0.00566238,
0.0097266 ],
[ 0.00378883, -0.00901531, 0.01898266, -0.01733185,
-0.00616944],
[-0.00806503, 0.00409351, 0.0179838 , -0.00238476,
0.00252594],
[ 0.00398197, 0.00030245, -0.00640218, -0.00145424,
0.00956188]], dtype=float32),
b: DeviceArray([-0.00905032, -0.00574646, 0.01621638, -0.01165553,
-0.0285466 ], dtype=float32),
},
})
(FrozenDict({
params: {
W: DeviceArray([[-1.1489547 , -1.1489547 , -1.1489547 , -1.1489547 ,
-1.1489547 ],
[-2.0069852 , -2.0069852 , -2.0069852 , -2.0069852 ,
-2.0069852 ],
[ 0.98777294, 0.98777294, 0.98777294, 0.98777294,
0.98777294],
[ 0.9311977 , 0.9311977 , 0.9311977 , 0.9311977 ,
0.9311977 ],
[-0.2883922 , -0.2883922 , -0.2883922 , -0.2883922 ,
-0.2883922 ]], dtype=float32),
b: DeviceArray([1., 1., 1., 1., 1.], dtype=float32),
},
}), FrozenDict({
params: {
W: DeviceArray([[ 0.01678762, 0.00234134, 0.00906202, 0.00027337,
0.00599653],
[-0.00729604, -0.00417799, 0.00172333, -0.00566238,
0.0097266 ],
[ 0.00378883, -0.00901531, 0.01898266, -0.01733185,
-0.00616944],
[-0.00806503, 0.00409351, 0.0179838 , -0.00238476,
0.00252594],
[ 0.00398197, 0.00030245, -0.00640218, -0.00145424,
0.00956188]], dtype=float32),
b: DeviceArray([-0.01861148, -0.00523183, 0.03968921, -0.01952654,
-0.06145691], dtype=float32),
},
}))
The first FrozenDict is the original parameters.
The second FrozenDict is the grads, clearly being traced through self.put_variable.
The last FrozenDict is the parameters, where we can see that b is correctly updated.

The output of your model is jnp.sum(x + b), which has no dependence on W, which in turn implies that the gradient with respect to W should be zero. With this in mind, the output you show above looks to be correct.
Edit: It sounds like you're expecting the result of x#W+b that you used in your variable to be reflected in the value of b used in the return statement; perhaps you want something like this?
def __call__(self,x):
W = self.param('W',nn.initializers.normal(),(self.input_size,self.output_size))
b = self.param('b',nn.initializers.normal(),(self.output_size,))
b = x#W+b
self.put_variable("params","b",b.reshape(5,))
return jnp.sum(x+b)
That said, it's unclear to me from the question what your ultimate goal is, and given that you're asking about such an uncommon construct, I suspect this may be an XY problem. Perhaps you can edit your question to say more about what you're trying to accomplish.

openCV solvePnP returns wrong results

With cv2.solvePnP I try to do pose a estimation in pyvista, which is a python wrapper for vtk.
The results of solvePnP seem wrong to me, i.e. the resulting translation and rotation. For simplicity I try to "undo" a translation of the camera. I expect the inverse of the translation to be the result of solvePnP.
The translation is just
Translation = np.array([[ 1., 0., 0., 1000.],
[ 0., 1., 0., 0.],
[ 0., 0., 1., 0.],
[ 0., 0., 0., 1.]])
i.e. a shift along one axis. But the resulting rvec,tvec are
rvec = array([ 0., 0., -3.142]),
tvec = array([ 707.107, 408.248, 8882.736])
The resulting translation and rotation seem nonsensical to me. Since the translation does no rotation, I expect that only tvec has non zero entries to undo the translation in the opposite direction. Concretely, I expect tvec= [-1000,0,0] and rvec=[0,0,0]. If I then apply this (wrong) result to the camera, all points disappear completely.
Here is What I do:
import cv2
import pyvista as pv
from pyvista import examples
import pyvistaqt
from vtk import vtkMatrix4x4, vtkMatrix3x3, vtkTransform
from vtk.util.numpy_support import vtk_to_numpy
import numpy as np
np.set_printoptions(suppress=True,precision=3)
def getCamMatrix():
narray = np.eye(4)
vmatrix = plotter.camera.GetModelViewTransformMatrix()
vmatrix.DeepCopy(narray.ravel(), vmatrix)
return narray
def toVTK(m,n =4):
if n == 4:
newMatrixVTK = vtkMatrix4x4()
else:
newMatrixVTK = vtkMatrix3x3()
for i in range(n):
for j in range(n):
newMatrixVTK.SetElement(i,j, m[i,j])
return newMatrixVTK
def applyMatrixToCam(newMatrix):
global plotter
newMatrixVTK = toVTK(newMatrix)
transform = vtkTransform()
transform.SetMatrix(newMatrixVTK)
transform.Update()
plotter.camera.ApplyTransform(transform)
pass
print("Setting up points in world coordinates")
Points = np.array([[ 2918.972, -887.573, 416.331,1],
[ 2338.002, -702.07 , 1039.864,1],
[ 1458.473, -707.246, 1005.19,1 ],
[ 1219.4 , -890.161, 377.004,1],
[ 1318.727, -1017.829, -156.537,1],
[ 2529.132, -1026.888, -169.222,1]])
pMesh = pv.PolyData(Points[:,:3]) # vtk object to hold the six points
plotter = pyvistaqt.BackgroundPlotter() # setting up the plotting function
plotter.enable_trackball_style()
plotter.add_mesh(pMesh)
print("Transforming from World to Image Coordinates")
# Rotating the points towards a camera at the origin, i.e. applying the default camera transform
projected = (getCamMatrix() # Points.T)[:3,:].T
print("store original image points")
image_points = projected.copy()[:,:2]
print("Applying the perspective transform, i.e. division by the Z-coordinate")
image_points /= projected[:,-1].reshape(-1,1)
print("Setting up a simple translation of the camera position")
Translation = np.array([[ 1., 0., 0., 1000.],
[ 0., 1., 0., 0.],
[ 0., 0., 1., 0.],
[ 0., 0., 0., 1.]])
applyMatrixToCam(Translation)
print("Apply the new Camera Matrix to the six points")
projected_shift = (getCamMatrix() # Points.T)[:3,:].T
retval, rvec, tvec = cv2.solvePnP(projected_shift, np.array(image_points), np.eye(3), None, None, None, False, cv2.SOLVEPNP_EPNP)
R = cv2.Rodrigues(rvec)[0]
extrinsicReal = np.vstack([np.hstack([R.T, -R.T#tvec]), [0,0,0,1]])
applyMatrixToCam(extrinsicReal)

Implement Fourier Transform of cross correlation? using numpy

I am trying to implement FT of cross correlation to see if the convolution theorem holds true.
I am stuck, because right now my convolution and my fourier transform multiplication are not equal, but they contain the same elements. Just in a different order.
Can somebody help me out? I tried flipping the filter, I tried conjugating it and I tried transposing it.
My inspiration is from https://dsp.stackexchange.com/questions/29761/convolution-theorem-for-cross-correlation
Code
conv: tensor([[[[ 0.6908, 1.2291, -0.7020, 1.0186, -1.2585],
[ 0.3773, 1.8541, 0.8134, 0.2672, -0.5912],
[ 0.1427, 0.6185, 1.0000, -0.8145, -0.4732],
[ 1.9047, -0.0119, 0.8849, -1.2321, -0.5614],
[ 0.0910, 1.0923, -0.6255, -1.9881, 1.9961]]]])
FT result: [[-1.2321 -0.5614 1.9047 -0.0119 0.8849]
[-1.9881 1.9961 0.091 1.0923 -0.6255]
[ 1.0186 -1.2585 0.6908 1.2291 -0.702 ]
[ 0.2672 -0.5912 0.3773 1.8541 0.8134]
[-0.8145 -0.4732 0.1427 0.6185 1. ]]
added code:
import torch
import torch.nn.functional as F
from scipy.fft import fft2, fftshift, ifft2
import numpy as np
nb_channels = 1
h, w = 5, 5
x = [[[[ 0.6908, 1.2291, -0.7020, 1.0186, -1.2585],
[ 0.3773, 1.8541, 0.8134, 0.2672, -0.5912],
[ 0.1427, 0.6185, 1.000, -0.8145, -0.4732],
[ 1.9047, -0.0119, 0.8849, -1.2321, -0.5614],
[ 0.0910, 1.0923, -0.6255, -1.9881, 1.9961]]]]
x = torch.tensor(x)
x = x.view(1, nb_channels, h, w)
weights1 = torch.tensor([[0., 0., 0.],
[0., 1., 0.],
[0., 0., 0.]])
weights = weights1.view(1, 1, 3, 3).repeat(1, nb_channels, 1, 1)
output = F.conv2d(x, weights, padding='same')
print("conv:", output)
################### FT numpy
a = x.numpy()
mac = a.copy()
img = np.squeeze(mac)
fft_in1 = fft2(img)
w_numpy = weights1.numpy()
weights_numpy = w_numpy.copy()
pad_weights = np.pad(weights_numpy, ((1, 1), (1, 1)), mode='constant', constant_values=(0, 0))
fft_weights = fft2(pad_weights)
pad_weights = fft_weights
out1 = fft_in1 * pad_weights
output_n = out1
output_numpy = ifft2(output_n)
output_numpy = output_numpy.real
output_numpy = np.around(output_numpy, decimals = 4)
print("after IFFT:", output_numpy)

How do I to average irregularly spaced x & y coordinate tensor into a grid with a specific cell size?

I have an algorithm that generates a tensor of irregularly spaced x and y coordinates (ex: torch.size([3600, 2])), and I need to average the points into grid cells of a specific size (ex: 8 by 8). The resulting grid needs to be either an array or tensor.
It's not required, but I would also like to be able to determine if any of the resulting cells have less than a specified number of points in them.
For example I can graph the tensor using matplotlib's plt.scatter, and it looks like this:
In the above example, 100,000 points exist but the number of points can sometimes be in the tens of millions.
I've tried using histogram approaches, and most of them use a specific number of cells vs a specific cell size. Matplotlib can seemingly do it in a graph, but that doesn't help me get an array or tensor.
Edit:
This code might work, if it can be made to work properly.
def grid_torch(x_coords, y_coords, grid_size=(8,8), x_extent=(0., 1.), y_extent=(0., 1.)):
x_coords = ((x_coords - x_extent[0]) / (x_extent[1] - x_extent[0])) * grid_size[0]
y_coords = ((y_coords - y_extent[0]) / (y_extent[1] - y_extent[0])) * grid_size[1]
x_list = []
for x in range(grid_size[0]):
x = torch.ones_like(x_coords) * x
y_list = []
for y in range(grid_size[1]):
y = torch.ones_like(y_coords) * y
in_bounds_x = torch.logical_and(x <= x_coords, x_coords <= x + 1)
in_bounds_y = torch.logical_and(y <= y_coords, y_coords <= y + 1)
in_bounds = torch.logical_and(in_bounds_x, in_bounds_y)
in_bounds_indices = torch.where(in_bounds)
print(in_bounds_indices)
y_list.append(in_bounds_indices)
x_list.append(torch.stack(y_list))
return torch.stack(x_list)
out = grid_torch(xy_tensor[:,0], xy_tensor[:,1])
print(out.shape)
def create_grid(grid_layout, activ, grid_size=(8,8), min_density=8):
cells = []
for x in range(grid_size[0]):
for y in range(grid_size[1]):
indices = grid_layout[x, y]
if len(indices) > min_density:
average_activation = torch.mean(activ[indices])
cells.append((average_activation, x, y))
print(average_activation, x, y)
return torch.stack(cells)
grid_test = create_grid(out, xy_tensor, grid_size=(8,8))

I think this code would give you a good starting point.
def grid_torch(x_coords, y_coords, grid_size=(8,8), x_extent=(0., 1.), y_extent=(0., 1.)):
# This part converts coordinates to bin numbers (like (2,5), (7,7) etc)
x_bin = (((x_coords - x_extent[0]) / (x_extent[1] - x_extent[0])) * grid_size[0]).int()
y_bin = (((y_coords - y_extent[0]) / (y_extent[1] - y_extent[0])) * grid_size[1]).int()
counts = torch.zeros(grid_size)
means = torch.zeros(list(grid_size) + [2])
for x in range(grid_size[0]):
for y in range(grid_size[1]):
# these tensors are 1 where (x_bin == x and y_bin == y), 0 else where
x_where = 1 * (x_bin == x)
y_where = 1 * (y_bin == y)
p_where = (x_where * y_where)
cnt = p_where.sum()
counts[x, y] = cnt
# we'll average both x and y coords seperately.
# you can embed min_density logic here.
if cnt > 0:
means[x, y, 0] = (x_coords * p_where).sum() / p_where.sum()
means[x, y, 1] = (y_coords * p_where).sum() / p_where.sum()
return counts, means
# Generate sample points
points = torch.tensor(np.concatenate([
np.random.normal(loc=0.2, scale=0.1, size=(1000, 2)),
np.random.normal(loc=0.6, scale=0.1, size=(1000, 2))
]).clip(0,1)).float()
# plt.scatter(points[:,0], points[:,1])
# plt.grid()
counts, means = grid_torch(points[:,0], points[:,1])
counts
>>>
tensor([[ 47., 114., 75., 10., 0., 0., 0., 0.],
[102., 204., 141., 27., 0., 0., 0., 0.],
[ 60., 101., 74., 16., 7., 4., 1., 0.],
[ 5., 17., 9., 23., 72., 51., 10., 0.],
[ 1., 1., 4., 54., 186., 141., 28., 3.],
[ 0., 0., 3., 47., 154., 117., 14., 0.],
[ 0., 0., 0., 9., 37., 24., 4., 0.],
[ 0., 0., 0., 2., 0., 1., 0., 0.]])

Why are full stops appearing in the printed statement of an array?

I am currently learning how to use python and jupyter notebook. I want to create my own dataset. The code for that is as follows (which was taken from this website: How to create my own datasets using in scikit-learn?):
import numpy as np
import csv
from sklearn.datasets.base import Bunch
def load_movies_dataset():
with open('Documents/movies_dataset.csv') as csv_file:
data_file = csv.reader(csv_file)
temp = next(data_file)
n_samples = int(temp[0])
n_features = int(temp[1])
data = np.empty((n_samples, n_features))
target = np.empty((n_samples,), dtype=np.int)
for i, sample in enumerate(data_file):
data[i] = np.asarray(sample[:-1], dtype=np.int)
target[i] = np.asarray(sample[-1], dtype=np.int)
return Bunch(data=data, target=target)
This is the csv file that I'm using:
"6","2","numKicks","numKisses"
"3","104","0"
"2","100","0"
"1","81","0"
"101","10","1"
"99","5","1"
"98","2","1"
This example determines if a movie is a romance(0) or action(1) based on the number of kicks and number of kisses.
This is the code I'm using to test the creation of the dataset:
md = load_movies_dataset()
X = md.data
y = md.target
X
And this is the output:
array([[ 3., 104.],
[ 2., 100.],
[ 1., 81.],
[101., 10.],
[ 99., 5.],
[ 98., 2.]])
My question is, why are there full stops in the array display?

They are decimal points. It is an array of floats:
>>> x
array([[ 3., 104.],
[ 2., 100.],
[ 1., 81.],
[101., 10.],
[ 99., 5.],
[ 98., 2.]])
>>> y
array([0, 0, 0, 1, 1, 1])
>>> x.dtype
dtype('float64')
>>>
The default data-type for numpy.empty is numpy.float64.

Develop Reference

Python is a programming language that lets you work quickly and integrate systems more effectively.

python PCA dimensionality reduction - python

Related

is there a way to trace grads through self.put_variable method in flax?

openCV solvePnP returns wrong results

Implement Fourier Transform of cross correlation? using numpy

How do I to average irregularly spaced x & y coordinate tensor into a grid with a specific cell size?

Why are full stops appearing in the printed statement of an array?

Categories

Resources