I got this working code snippet:
import numpy as np
from matplotlib import pyplot as plt
in_raster = np.random.randn(36, 3, 2151)
matrix = np.reshape(in_raster, [(np.shape(in_raster)[0] * np.shape(in_raster)[1]), np.shape(in_raster)[2]])
# reshaping the matrix to prepare loop
out_raster = np.empty([np.shape(in_raster)[0]/3, np.shape(in_raster)[1]/3, np.shape(in_raster)[2]])
# creating empty output matrix
i = 0
j = 0
while i <= len(in_raster)-9 or j < len(out_raster):
if i % 9 == 0:
avg_in_raster = np.nanmean(matrix[i:i+9, :], axis=0)
out_raster[j] = avg_in_raster
i += 9
j += 1
out_raster = np.reshape(out_raster, [np.shape(out_raster)[0], np.shape(in_raster)[1]/3, np.shape(in_raster)[2]])
# plot example
low = 0
high = 50
for row in range(0, 3):
for col in range(np.shape(in_raster)[1]):
plt.plot(range(low,high), (in_raster[row, col, low:high]))
plt.plot(range(low,high), (out_raster[0,0,low:high]), 'k')
plt.show()
The program averages (aggregates) 3x3 slices of the input matrix (a raster image) and sets up a new one maintainig the dimensionality of the original matrix.
Now I got the feeling that there must be an easier way to achieve this.
Does somebody have an idea how to obtain the same result in a more pythonic way?
Thank you!
To my knowledge, there is no easier or quicker way to perform blockwise averaging. Your code might look big, but most of it is just preparation of arrays and resizing or plotting stuff. Your main function is a well-placed while-loop and the averaging itself you leave to numpy which is already a shortcut and should run quickly.
I don't see any reason to further shorten this, without losing readability.
If you just want to make it look shorter and "more pythonic" but less readable, go for this:
import numpy as np
from matplotlib import pyplot as plt
in_raster = np.random.randn(36, 3, 2151)
size=3
matrix=np.array([in_raster[:,:,i].flatten() for i in np.arange(in_raster.shape[2])]).transpose()
out_raster2 = np.array([np.nanmean(matrix[i:i+size**2, :], axis=0) for i in np.arange(len(matrix)) if not i%size**2]).reshape(np.shape(in_raster)[0]/size, np.shape(in_raster)[1]/size, np.shape(in_raster)[2])
# plot example
low = 0
high = 50
for row in range(0, 3):
for col in range(np.shape(in_raster)[1]):
plt.plot(range(low,high), (in_raster[row, col, low:high]))
plt.plot(range(low,high), (out_raster2[0,0,low:high]), 'k')
plt.show()
#plt.plot((out_raster2-out_raster)[0,0,low:high]) # should be all 0s
#plt.show()
And you could make it a function/method with the attribute size = 3 and quality checks (first and second dimension can be divided by size, etc.).
You should be able to do it by extending the shape in one direction and averaging it in that dimension. Like so:
out_raster1 = np.nanmean(in_raster.reshape(36*3//9, -1, 2151 ), axis=1).reshape(12, 1, -1)
To check for consistency,
>>> out_raster1-out_raster
array([[[ 0., 0., 0., ..., 0., 0., 0.]],
[[ 0., 0., 0., ..., 0., 0., 0.]],
[[ 0., 0., 0., ..., 0., 0., 0.]],
...,
[[ 0., 0., 0., ..., 0., 0., 0.]],
[[ 0., 0., 0., ..., 0., 0., 0.]],
[[ 0., 0., 0., ..., 0., 0., 0.]]])
Related
With cv2.solvePnP I try to do pose a estimation in pyvista, which is a python wrapper for vtk.
The results of solvePnP seem wrong to me, i.e. the resulting translation and rotation. For simplicity I try to "undo" a translation of the camera. I expect the inverse of the translation to be the result of solvePnP.
The translation is just
Translation = np.array([[ 1., 0., 0., 1000.],
[ 0., 1., 0., 0.],
[ 0., 0., 1., 0.],
[ 0., 0., 0., 1.]])
i.e. a shift along one axis. But the resulting rvec,tvec are
rvec = array([ 0., 0., -3.142]),
tvec = array([ 707.107, 408.248, 8882.736])
The resulting translation and rotation seem nonsensical to me. Since the translation does no rotation, I expect that only tvec has non zero entries to undo the translation in the opposite direction. Concretely, I expect tvec= [-1000,0,0] and rvec=[0,0,0]. If I then apply this (wrong) result to the camera, all points disappear completely.
Here is What I do:
import cv2
import pyvista as pv
from pyvista import examples
import pyvistaqt
from vtk import vtkMatrix4x4, vtkMatrix3x3, vtkTransform
from vtk.util.numpy_support import vtk_to_numpy
import numpy as np
np.set_printoptions(suppress=True,precision=3)
def getCamMatrix():
narray = np.eye(4)
vmatrix = plotter.camera.GetModelViewTransformMatrix()
vmatrix.DeepCopy(narray.ravel(), vmatrix)
return narray
def toVTK(m,n =4):
if n == 4:
newMatrixVTK = vtkMatrix4x4()
else:
newMatrixVTK = vtkMatrix3x3()
for i in range(n):
for j in range(n):
newMatrixVTK.SetElement(i,j, m[i,j])
return newMatrixVTK
def applyMatrixToCam(newMatrix):
global plotter
newMatrixVTK = toVTK(newMatrix)
transform = vtkTransform()
transform.SetMatrix(newMatrixVTK)
transform.Update()
plotter.camera.ApplyTransform(transform)
pass
print("Setting up points in world coordinates")
Points = np.array([[ 2918.972, -887.573, 416.331,1],
[ 2338.002, -702.07 , 1039.864,1],
[ 1458.473, -707.246, 1005.19,1 ],
[ 1219.4 , -890.161, 377.004,1],
[ 1318.727, -1017.829, -156.537,1],
[ 2529.132, -1026.888, -169.222,1]])
pMesh = pv.PolyData(Points[:,:3]) # vtk object to hold the six points
plotter = pyvistaqt.BackgroundPlotter() # setting up the plotting function
plotter.enable_trackball_style()
plotter.add_mesh(pMesh)
print("Transforming from World to Image Coordinates")
# Rotating the points towards a camera at the origin, i.e. applying the default camera transform
projected = (getCamMatrix() # Points.T)[:3,:].T
print("store original image points")
image_points = projected.copy()[:,:2]
print("Applying the perspective transform, i.e. division by the Z-coordinate")
image_points /= projected[:,-1].reshape(-1,1)
print("Setting up a simple translation of the camera position")
Translation = np.array([[ 1., 0., 0., 1000.],
[ 0., 1., 0., 0.],
[ 0., 0., 1., 0.],
[ 0., 0., 0., 1.]])
applyMatrixToCam(Translation)
print("Apply the new Camera Matrix to the six points")
projected_shift = (getCamMatrix() # Points.T)[:3,:].T
retval, rvec, tvec = cv2.solvePnP(projected_shift, np.array(image_points), np.eye(3), None, None, None, False, cv2.SOLVEPNP_EPNP)
R = cv2.Rodrigues(rvec)[0]
extrinsicReal = np.vstack([np.hstack([R.T, -R.T#tvec]), [0,0,0,1]])
applyMatrixToCam(extrinsicReal)
I have a structured numpy array in shared memory, that's only one "layer" of a higher dimensional array.
And I have a list of tuples whose values I want to copy to this (sub) array.
I've found how to make a new numpy structured array out of a list of tuples.
But I can't find out how to convert this list of tuples to an EXISTING numpy (sub) array.
The sizes already match, of course.
Of course I can copy elementwise in a Python for-loop, but this seems awfully inefficient. I'd like the looping to be done in the C++ that underlies numpy.
Explanation: The reason my array is in shared memory is that I use this as a common datatructure with a C++ process, guarded by mutex semaphores.
My list of tuples looks like:
[(25141156064, 5.3647, 221.32287846), (25141157138, 5.3647, 73.70348602), (25141155120, 5.3646, 27.77147382), (25141160388, 5.3643, 55.5000024), (25141160943, 5.3636, 166.49511561), (25141154452, 5.3578, 92), (25141154824, 5.3539, 37.22246003), (25141155187, 5.3504, 37.22246003), (25141157611, 5.34, 915), (25141157598, 5.3329, 1047.32982582), (25140831246, 5.3053, 915), (25141165780, 5.2915, 2000), (25141165781, 5.2512, 2000), (25140818946, 5.2483, 915), (25138992274, 5.1688, 458), (25121724934, 5.1542, 458), (25121034787, 4.8993, 3.47518861), (24402133353, 2.35, 341), (24859679064, 0.8, 1931.25), (24046377720, 0.5, 100), (25141166091, 5.3783, -650.51242432), (25141165779, 5.3784, -1794.28608778), (25141157632, 5.3814, -2000), (25141157601, 5.3836, -2000), (25141164181, 5.3846, -499.65636506), (25141164476, 5.4025, -91), (25141157766, 5.4026, -634.80061236), (25141153364, 5.4034, -2000), (25141107806, 5.4035, -1601.88882309), (25141157694, 5.4136, -1047.32982582), (25141148874, 5.4278, -266), (25141078136, 5.4279, -48.4864096), (25141165317, 5.4283, -2000), (25141097109, 5.4284, -914), (25141110492, 5.4344, -774.75614589), (25141110970, 5.4502, -928.32048159), (25141166045, 5.4527, -2000), (25141166041, 5.493, -2000), (25139832350, 5.5, -10.2273)]
My numpy array has elements that are defined as follows:
Id = np.uint64
Price = np.float64
Amount = np.float64
Quotation = np.dtype ([
('id', Id),
('price', Price),
('amount', Amount),
])
self._contents = np.ndarray (
shape = (
maxNrOfMarkets,
maxNrOfItemKindsPerMarket,
maxNrOfQuotationsPerItemKind
)
dtype = Quotation,
buffer = self.sharedMemory.buf,
offset = offset
)
Same way you'd do it if the array wasn't backed by shared memory. Just make sure you synchronize access properly.
your_array[:] = your_list
Say you have an array of shape (list_length, tuples_length).
Is this what you're looking for?
my_sub_array[:] = my_list_of_tuples
As an example :
my_sub_array = np.zeros((5, 3))
my_list_of_tuples = [(i, i + 1, i + 2) for i in range(5)]
my_sub_array
array([[0., 0., 0.],
[0., 0., 0.],
[0., 0., 0.],
[0., 0., 0.],
[0., 0., 0.]])
my_sub_array[:] = my_list_of_tuples
my_sub_array
array([[0., 1., 2.],
[1., 2., 3.],
[2., 3., 4.],
[3., 4., 5.],
[4., 5., 6.]])
I bet I am doing something very simple wrong. I want to start with an empty 2D numpy array and append arrays to it (with dimensions 1 row by 4 columns).
open_cost_mat_train = np.matrix([])
for i in xrange(10):
open_cost_mat = np.array([i,0,0,0])
open_cost_mat_train = np.vstack([open_cost_mat_train,open_cost_mat])
my error trace is:
File "/Users/me/anaconda/lib/python2.7/site-packages/numpy/core/shape_base.py", line 230, in vstack
return _nx.concatenate([atleast_2d(_m) for _m in tup], 0)
ValueError: all the input array dimensions except for the concatenation axis must match exactly
What am I doing wrong? I have tried append, concatenate, defining the empty 2D array as [[]], as [], array([]) and many others.
You need to reshape your original matrix so that the number of columns match the appended arrays:
open_cost_mat_train = np.matrix([]).reshape((0,4))
After which, it gives:
open_cost_mat_train
# matrix([[ 0., 0., 0., 0.],
# [ 1., 0., 0., 0.],
# [ 2., 0., 0., 0.],
# [ 3., 0., 0., 0.],
# [ 4., 0., 0., 0.],
# [ 5., 0., 0., 0.],
# [ 6., 0., 0., 0.],
# [ 7., 0., 0., 0.],
# [ 8., 0., 0., 0.],
# [ 9., 0., 0., 0.]])
If open_cost_mat_train is large I would encourage you to replace the for loop by a vectorized algorithm. I will use the following funtions to show how efficiency is improved by vectorizing loops:
def fvstack():
import numpy as np
np.random.seed(100)
ocmt = np.matrix([]).reshape((0, 4))
for i in xrange(10):
x = np.random.random()
ocm = np.array([x, x + 1, 10*x, x/10])
ocmt = np.vstack([ocmt, ocm])
return ocmt
def fshape():
import numpy as np
from numpy.matlib import empty
np.random.seed(100)
ocmt = empty((10, 4))
for i in xrange(ocmt.shape[0]):
ocmt[i, 0] = np.random.random()
ocmt[:, 1] = ocmt[:, 0] + 1
ocmt[:, 2] = 10*ocmt[:, 0]
ocmt[:, 3] = ocmt[:, 0]/10
return ocmt
I've assumed that the values that populate the first column of ocmt (shorthand for open_cost_mat_train) are obtained from a for loop, and the remaining columns are a function of the first column, as stated in your comments to my original answer. As real costs data are not available, in the forthcoming example the values in the first column are random numbers, and the second, third and fourth columns are the functions x + 1, 10*x and x/10, respectively, where x is the corresponding value in the first column.
In [594]: fvstack()
Out[594]:
matrix([[ 5.43404942e-01, 1.54340494e+00, 5.43404942e+00, 5.43404942e-02],
[ 2.78369385e-01, 1.27836939e+00, 2.78369385e+00, 2.78369385e-02],
[ 4.24517591e-01, 1.42451759e+00, 4.24517591e+00, 4.24517591e-02],
[ 8.44776132e-01, 1.84477613e+00, 8.44776132e+00, 8.44776132e-02],
[ 4.71885619e-03, 1.00471886e+00, 4.71885619e-02, 4.71885619e-04],
[ 1.21569121e-01, 1.12156912e+00, 1.21569121e+00, 1.21569121e-02],
[ 6.70749085e-01, 1.67074908e+00, 6.70749085e+00, 6.70749085e-02],
[ 8.25852755e-01, 1.82585276e+00, 8.25852755e+00, 8.25852755e-02],
[ 1.36706590e-01, 1.13670659e+00, 1.36706590e+00, 1.36706590e-02],
[ 5.75093329e-01, 1.57509333e+00, 5.75093329e+00, 5.75093329e-02]])
In [595]: np.allclose(fvstack(), fshape())
Out[595]: True
In order for the calls to fvstack() and fshape() produce the same results, the random number generator is initialized in both functions through np.random.seed(100). Notice that the equality test has been performed using numpy.allclose instead of fvstack() == fshape() to avoid the round off errors associated to floating point artihmetic.
As for efficiency, the following interactive session shows that initializing ocmt with its final shape is significantly faster than repeatedly stacking rows:
In [596]: import timeit
In [597]: timeit.timeit('fvstack()', setup="from __main__ import fvstack", number=10000)
Out[597]: 1.4884241055042366
In [598]: timeit.timeit('fshape()', setup="from __main__ import fshape", number=10000)
Out[598]: 0.8819408006311278
I am trying to build a dataset similar to mnist.pkl.gz provided in theano logistic_sgd.py implementation. Following is my code snippet.
import numpy as np
import csv
from PIL import Image
import gzip, cPickle
import theano
from theano import tensor as T
def load_dir_data(csv_file=""):
print(" reading: %s" %csv_file)
dataset=[]
labels=[]
cr=csv.reader(open(csv_file,"rb"))
for row in cr:
print row[0], row[1]
try:
image=Image.open(row[0]+'.jpg').convert('LA')
pixels=[f[0] for f in list(image.getdata())]
dataset.append(pixels)
labels.append(row[1])
del image
except:
print("image not found")
ret_val=np.array(dataset,dtype=theano.config.floatX)
return ret_val,np.array(labels).astype(float)
def generate_pkl_file(csv_file=""):
Data, y =load_dir_data(csv_file)
train_set_x = Data[:1500]
val_set_x = Data[1501:1750]
test_set_x = Data[1751:1900]
train_set_y = y[:1500]
val_set_y = y[1501:1750]
test_set_y = y[1751:1900]
# Divided dataset into 3 parts. I had 2000 images.
train_set = train_set_x, train_set_y
val_set = val_set_x, val_set_y
test_set = test_set_x, val_set_y
dataset = [train_set, val_set, test_set]
f = gzip.open('file.pkl.gz','wb')
cPickle.dump(dataset, f, protocol=2)
f.close()
if __name__=='__main__':
generate_pkl_file("trainLabels.csv")
Error Message:
Traceback (most recent call last):
File "convert_dataset_pkl_file.py", line 50, in <module>
generate_pkl_file("trainLabels.csv")
File "convert_dataset_pkl_file.py", line 29, in generate_pkl_file
Data, y =load_dir_data(csv_file)
File "convert_dataset_pkl_file.py", line 24, in load_dir_data
ret_val=np.array(dataset,dtype=theano.config.floatX)
ValueError: setting an array element with a sequence.
csv file contains two fields.. image name, classification label
when is run this in python interpreter, it seems to be working for me.. as follows.. I dont get error saying setting an array element with a sequence here..
---------python interpreter output----------
image=Image.open('sample.jpg').convert('LA')
pixels=[f[0] for f in list(image.getdata())]
dataset=[]
dataset.append(pixels)
dataset.append(pixels)
dataset.append(pixels)
dataset.append(pixels)
dataset.append(pixels)
b=numpy.array(dataset,dtype=theano.config.floatX)
b
array([[ 2., 0., 0., ..., 0., 0., 0.],
[ 2., 0., 0., ..., 0., 0., 0.],
[ 2., 0., 0., ..., 0., 0., 0.],
[ 2., 0., 0., ..., 0., 0., 0.],
[ 2., 0., 0., ..., 0., 0., 0.]])
Even though i am running same set of instruction (logically), when i run sample.py, i get valueError: setting an array element with a sequence.. I trying to understand this behavior.. any help would be great..
The problem is probably similar to that of this question.
You're trying to create a matrix of pixel values with a row per image. But each image has a different size so the number of pixels in each row is different.
You can't create a "jagged" float typed array in numpy -- every row must be of the same length.
You'll need to pad each row to the length of the largest image.
For my astronomy homework, I need to simulate the elliptical orbit of a planet around a sun. To do this, I need to use a for loop to repeatedly calculate the motion of the planet. However, every time I try to run the program, I get the following error:
RuntimeWarning: invalid value encountered in power
r=(x**2+y**2)**1.5
Traceback (most recent call last):
File "planetenstelsel3-4.py", line 25, in <module>
ax[i] = a(x[i],y[i])*x[i]
ValueError: cannot convert float NaN to integer
I've done some testing, and I think the problem lies in the fact that the values that are calculated are greater than what fits in an integer, and the arrays are defined as int arrays. So if there was a way do define them as float arrays, maybe it would work. Here is my code:
import numpy as np
import matplotlib.pyplot as plt
dt = 3600 #s
N = 5000
x = np.tile(0, N)
y = np.tile(0, N)
x[0] = 1.496e11 #m
y[0] = 0.0
vx = np.tile(0, N)
vy = np.tile(0, N)
vx[0] = 0.0
vy[0] = 28000 #m/s
ax = np.tile(0, N)
ay = np.tile(0, N)
m1 = 1.988e30 #kg
G = 6.67e-11 #Nm^2kg^-2
def a(x,y):
r=(x**2+y**2)**1.5
return (-G*m1)/r
for i in range (0,N):
r = x[i],y[i]
ax[i] = a(x[i],y[i])*x[i]
ay[i] = a(x[i],y[i])*y[i]
vx[i+1] = vx[i] + ax[i]*dt
vy[i+1] = vy[i] + ay[i]*dt
x[i+1] = x[i] + vx[i]*dt
y[i+1] = y[i] + vy[i]*dt
plt.plot(x,y)
plt.show()
The first few lines are just some starting parameters.
Thanks for the help in advance!
When you are doing physics simulations you should definitely use floats for everything. 0 is an integer constant in Python, and thus np.tile creates integer arrays; use 0.0 as the argument to np.tile to do floating point arrays; or preferably use the np.zeros(N) instead:
You can check the datatype of any array variable from its dtype member:
>>> np.tile(0, 10).dtype
dtype('int64')
>>> np.tile(0.0, 10).dtype
dtype('float64')
>>> np.zeros(10)
array([ 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])
>>> np.zeros(10).dtype
dtype('float64')
To get a zeroed array of float32 you'd need to give a float32 as the argument:
>>> np.tile(np.float32(0), 10)
array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0.], dtype=float32)
or, preferably, use zeros with a defined dtype:
>>> np.zeros(10, dtype='float32')
array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0.], dtype=float32)
You need x = np.zeros(N), etc.: this declares the arrays as float arrays.
This is the standard way of putting zeros in an array (np.tile() is convenient for creating a tiling with a fixed array).