Related
actually, I need to put the returned values of the function (global_displacement(X)) into another running loop.
can someone please tell me how to obtain the required output?
and what idiotic mistake I have been doing.
as every time it gives me only the first([ 0, 0, X[0], X[1]]) OR
the last value([ X[20], X[21], X[53], X[54]]) in the output,
because of wrong indendation of "return j" in the below written code .
import numpy as np
X = [ 0.19515612, 0.36477665, 0.244737, 0.42873321, 0.16864666, 0.08636661, 0.05376605, -0.57201897, -0.00935055, -1.24923862, 0., -1.53111525, 0.00935055, -1.24923862, -0.05376605, -0.57201897, -0.1686466,
0.08636661, -0.244737, 0.42873321, -0.19515612, 0.36477665, 0.02279911, 0. , 0.3563355 , 0.01379104, 0. , 0.42289958, -0.00747999, 0. , 0.0825908, -0.02949519 , 0. , -0.57435396,
-0.04074819, 0. , -1.25069528 ,-0.02972642, 0. , -1.53227704, -0. , 0. , -1.25069528 , 0.02972642 , 0. , -0.57435396 , 0.04074819 , 0. , 0.0825908, 0.02949519, 0. ,
0.42289958, 0.00747999 , 0. , 0.3563355 , -0.01379104, -0.02279911]
def global_displacement(X):
global_displacements = np.array( [[ 0, 0, X[0], X[1]], [ X[0], X[1], X[2], X[3]], [ X[2], X[3],X[4], X[5]], [ X[4],X[5],X[6], X[7]],[ X[6],X[7],X[8],X[9]], [ X[8],X[9],X[10], X[11] ], [ X[10], X[11],X[12], X[13]], [ X[12], X[13],X[14], X[15]],[ X[14], X[15],X[16], X[17]],[ X[16], X[17],X[18], X[19]], [ X[18], X[19],X[20], X[21]],[ X[20], X[21], 0, 0],
[ X[0], X[1], X[23], X[24]], [ X[2], X[3], X[26],X[27]], [ X[4], X[5], X[29],X[30]], [ X[6], X[7], X[32],X[33]], [ X[8],X[9],X[35], X[36]], [ X[10], X[11], X[38], X[39]], [ X[12], X[13], X[41], X[42]] ,[ X[14], X[15], X[44], X[45]],[ X[16], X[17], X[47], X[48]],[ X[18], X[19], X[50], X[51]], [ X[20], X[21], X[53], X[54]] ] )
for i in (global_displacements):
j = i.reshape(4,1)
return j
print(global_displacement(X))
this is the expected output, and I need to put these values in another loop, by calling this function.
[[0. ]
[0. ]
[0.19515612]
[0.36477665]]
[[0.19515612]
[0.36477665]
[0.244737 ]
[0.42873321]]
[[0.244737 ]
[0.42873321]
[0.16864666]
[0.08636661]]
[[ 0.16864666]
[ 0.08636661]
[ 0.05376605]
[-0.57201897]]
[[ 0.05376605]
[-0.57201897]
[-0.00935055]
[-1.24923862]]
[[-0.00935055]
[-1.24923862]
[ 0. ]
[-1.53111525]]
[[ 0. ]
[-1.53111525]
[ 0.00935055]
[-1.24923862]]
[[ 0.00935055]
[-1.24923862]
[-0.05376605]
[-0.57201897]]
[[-0.05376605]
[-0.57201897]
[-0.1686466 ]
[ 0.08636661]]
[[-0.1686466 ]
[ 0.08636661]
[-0.244737 ]
[ 0.42873321]]
[[-0.244737 ]
[ 0.42873321]
[-0.19515612]
[ 0.36477665]]
[[-0.19515612]
[ 0.36477665]
[ 0. ]
[ 0. ]]
[[0.19515612]
[0.36477665]
[0. ]
[0.3563355 ]]
[[0.244737 ]
[0.42873321]
[0. ]
[0.42289958]]
[[0.16864666]
[0.08636661]
[0. ]
[0.0825908 ]]
[[ 0.05376605]
[-0.57201897]
[ 0. ]
[-0.57435396]]
[[-0.00935055]
[-1.24923862]
[ 0. ]
[-1.25069528]]
[[ 0. ]
[-1.53111525]
[ 0. ]
[-1.53227704]]
[[ 0.00935055]
[-1.24923862]
[ 0. ]
[-1.25069528]]
[[-0.05376605]
[-0.57201897]
[ 0. ]
[-0.57435396]]
[[-0.1686466 ]
[ 0.08636661]
[ 0. ]
[ 0.0825908 ]]
[[-0.244737 ]
[ 0.42873321]
[ 0. ]
[ 0.42289958]]
[[-0.19515612]
[ 0.36477665]
[ 0. ]
[ 0.3563355 ]]
Your function already converts everything into the right format except that the inner values should be stored into a list. For this you can use numpy.newaxis. It is used to add a new dimension to your array (good post about its functionality).
import numpy as np
def global_displacement(X):
global_displacements = np.array( [[ 0, 0, X[0], X[1]], [ X[0], X[1], X[2], X[3]], [ X[2], X[3],X[4], X[5]], [ X[4],X[5],X[6], X[7]],[ X[6],X[7],X[8],X[9]], [ X[8],X[9],X[10], X[11] ], [ X[10], X[11],X[12], X[13]], [ X[12], X[13],X[14], X[15]],[ X[14], X[15],X[16], X[17]],[ X[16], X[17],X[18], X[19]], [ X[18], X[19],X[20], X[21]],[ X[20], X[21], 0, 0],
[ X[0], X[1], X[23], X[24]], [ X[2], X[3], X[26],X[27]], [ X[4], X[5], X[29],X[30]], [ X[6], X[7], X[32],X[33]], [ X[8],X[9],X[35], X[36]], [ X[10], X[11], X[38], X[39]], [ X[12], X[13], X[41], X[42]] ,[ X[14], X[15], X[44], X[45]],[ X[16], X[17], X[47], X[48]],[ X[18], X[19], X[50], X[51]], [ X[20], X[21], X[53], X[54]] ] )
new_structure = global_displacements[:, :, np.newaxis]
return new_structure
X = [ 0.19515612, 0.36477665, 0.244737, 0.42873321, 0.16864666, 0.08636661, 0.05376605, -0.57201897, -0.00935055, -1.24923862, 0., -1.53111525, 0.00935055, -1.24923862, -0.05376605, -0.57201897, -0.1686466,
0.08636661, -0.244737, 0.42873321, -0.19515612, 0.36477665, 0.02279911, 0. , 0.3563355 , 0.01379104, 0. , 0.42289958, -0.00747999, 0. , 0.0825908, -0.02949519 , 0. , -0.57435396,
-0.04074819, 0. , -1.25069528 ,-0.02972642, 0. , -1.53227704, -0. , 0. , -1.25069528 , 0.02972642 , 0. , -0.57435396 , 0.04074819 , 0. , 0.0825908, 0.02949519, 0. ,
0.42289958, 0.00747999 , 0. , 0.3563355 , -0.01379104, -0.02279911]
result = global_displacement(X)
print(result)
Output:
[[[ 0. ]
[ 0. ]
[ 0.19515612]
[ 0.36477665]]
[[ 0.19515612]
[ 0.36477665]
[ 0.244737 ]
[ 0.42873321]]
[[ 0.244737 ]
[ 0.42873321]
[ 0.16864666]
[ 0.08636661]]
[[ 0.16864666]
[ 0.08636661]
[ 0.05376605]
[-0.57201897]]
[[ 0.05376605]
[-0.57201897]
[-0.00935055]
[-1.24923862]]
[[-0.00935055]
[-1.24923862]
[ 0. ]
[-1.53111525]]
[[ 0. ]
[-1.53111525]
[ 0.00935055]
[-1.24923862]]
[[ 0.00935055]
[-1.24923862]
[-0.05376605]
[-0.57201897]]
[[-0.05376605]
[-0.57201897]
[-0.1686466 ]
[ 0.08636661]]
[[-0.1686466 ]
[ 0.08636661]
[-0.244737 ]
[ 0.42873321]]
[[-0.244737 ]
[ 0.42873321]
[-0.19515612]
[ 0.36477665]]
[[-0.19515612]
[ 0.36477665]
[ 0. ]
[ 0. ]]
[[ 0.19515612]
[ 0.36477665]
[ 0. ]
[ 0.3563355 ]]
[[ 0.244737 ]
[ 0.42873321]
[ 0. ]
[ 0.42289958]]
[[ 0.16864666]
[ 0.08636661]
[ 0. ]
[ 0.0825908 ]]
[[ 0.05376605]
[-0.57201897]
[ 0. ]
[-0.57435396]]
[[-0.00935055]
[-1.24923862]
[ 0. ]
[-1.25069528]]
[[ 0. ]
[-1.53111525]
[ 0. ]
[-1.53227704]]
[[ 0.00935055]
[-1.24923862]
[ 0. ]
[-1.25069528]]
[[-0.05376605]
[-0.57201897]
[ 0. ]
[-0.57435396]]
[[-0.1686466 ]
[ 0.08636661]
[ 0. ]
[ 0.0825908 ]]
[[-0.244737 ]
[ 0.42873321]
[ 0. ]
[ 0.42289958]]
[[-0.19515612]
[ 0.36477665]
[ 0. ]
[ 0.3563355 ]]]
First off, you don't need .reshape to transform a 1D array of N elements into a 2D array that's N by 1. You can just add a dimension to the array.
Second, you generally don't want to write loops to handle a Numpy array. You want to use Numpy tools to process everything at once. Just think about the problem in the full number of dimensions: you want to transform a 2D array that's M by N, into a 3D one that's M by N by 1. That's... still just adding a dimension to the array.
So:
global_displacements = np.array(...)
return global_displacements[..., np.newaxis]
I have this ouptut:
[[[-0.015, -0.1533, 1. ]]
[[-0.0069, 0.1421, 1. ]]
...
[[ 0.1318, -0.4406, 1. ]]
[[ 0.2059, -0.3854, 1. ]]]
But I would like to remove the square brackets that are leftover resulting as this:
[[-0.015 -0.1533 1. ]
[-0.0069 0.1421 1. ]
...
[ 0.1318 -0.4406 1. ]
[ 0.2059 -0.3854 1. ]]
My code is this:
XY = []
for i in range(4000):
Xy_1 = [round(random.uniform(-0.5, 0.5), 4), round(random.uniform(-0.5, 0.5), 4), 1]
Xy_0 = [round(random.uniform(-0.5, 0.5), 4), round(random.uniform(-0.5, 0.5), 4), 0]
Xy.append(random.choices(population=(Xy_0, Xy_1), weights=(0.15, 0.85)))
Xy = np.asarray(Xy)
You can use numpy.squeeze to remove 1 dim from array
>>> np.squeeze(Xy)
array([[ 0.3609, 0.2378, 0. ],
[-0.2432, -0.2043, 1. ],
[ 0.3081, -0.2457, 1. ],
...,
[ 0.311 , 0.03 , 1. ],
[-0.0572, -0.317 , 1. ],
[ 0.3026, 0.1829, 1. ]])
Or
reshape usingnumpy.reshape
>>> Xy.reshape(4000,3)
array([[ 0.3609, 0.2378, 0. ],
[-0.2432, -0.2043, 1. ],
[ 0.3081, -0.2457, 1. ],
...,
[ 0.311 , 0.03 , 1. ],
[-0.0572, -0.317 , 1. ],
[ 0.3026, 0.1829, 1. ]])
>>>
Try extend method.
Xy.extend(random.choices(population=(Xy_0, Xy_1), weights=(0.15, 0.85)))
You can use this one random.choices(population=(Xy_0, Xy_1), weights=(0.15, 0.85))[0]
XY = []
for i in range(4000):
Xy_1 = [round(random.uniform(-0.5, 0.5), 4), round(random.uniform(-0.5, 0.5), 4), 1]
Xy_0 = [round(random.uniform(-0.5, 0.5), 4), round(random.uniform(-0.5, 0.5), 4), 0]
# Pythonic way :-)
Xy.append(random.choices(population=(Xy_0, Xy_1), weights=(0.15, 0.85))[0])
Xy = np.asarray(Xy)
print(Xy)
Output
[[ 0.3948 0.0915 1. ]
[ 0.4197 -0.344 1. ]
[-0.4541 0.3192 1. ]
[ 0.3285 0.0453 1. ]
[-0.0171 -0.3088 1. ]
[ 0.2958 -0.2757 1. ]
[-0.1303 0.1581 0. ]
[-0.4146 -0.4454 1. ]
[ 0.0247 0.325 1. ]
[-0.227 0.139 1. ]]
You can try this to remove 1dim using sum.
a=[ [[-0.015, -0.1533, 1. ]],
[[-0.0069, 0.1421, 1. ]],
...
[[ 0.1318, -0.4406, 1. ]],
[[ 0.2059, -0.3854, 1. ]] ]
sum(a,[])
'''
[[-0.015, -0.1533, 1. ],
[-0.0069, 0.1421, 1. ],
...
[ 0.1318, -0.4406, 1. ],
[ 0.2059, -0.3854, 1. ]]
'''
My code so far is:
import numpy as np
data=np.genfromtxt('filename')
print(data)
which prints:
[[ 0.723 1. ]
[ 0.433 2. ]
[ 0.258 1. ]
[ 1.52 2. ]
[ 0.083 2. ]
[ 2.025 1. ]
[ 3.928 1. ]]
How do i split the data into two groups, based on if the line has a 1 or 2?
A simple solution is to use np.where which returns results of a conditional statement in the form of a tuple of arrays, which can be directly used with numpy's advanced slice notation to slice that data into a new variable.
import numpy as np
data = np.array(
[[ 0.723, 1. ],
[ 0.433, 2. ],
[ 0.258, 1. ],
[ 1.52, 2. ],
[ 0.083, 2. ],
[ 2.025, 1. ],
[ 3.928, 1. ]])
data1 = data[np.where(data[:,1] == 1)]
data2 = data[np.where(data[:,1] == 2)]
print(data1)
print(data2)
How about something like this:
import numpy as np
data = np.asarray([[0.723, 1.],
[0.433, 2.],
[0.258, 1.],
[1.520, 2.],
[0.083, 2.],
[2.025, 1.],
[3.928, 1.]])
split_data = [data[data[:,1] == 1.], data[data[:,1] == 2.]]
print(f'data:\n{data}')
print(f'split_data:\n{split_data}')
Explanation:
data[:,1] references the value in the 2nd "column" per se.
Output:
data:
[[0.723 1. ]
[0.433 2. ]
[0.258 1. ]
[1.52 2. ]
[0.083 2. ]
[2.025 1. ]
[3.928 1. ]]
split_data:
[array([[0.723, 1. ],
[0.258, 1. ],
[2.025, 1. ],
[3.928, 1. ]]),
array([[0.433, 2. ],
[1.52 , 2. ],
[0.083, 2. ]])]
Your question was rather brief, so I didn't quite catch the dataformat but I tried replicating it with:
foo = [[ 0.723, 1 ], [ 0.433, 2 ], [ 0.258, 1 ], [ 1.52, 2 ],
[ 0.083, 2 ], [ 2.025, 1 ], [ 3.928, 1 ]]
In case would want to filter this list foo to only contain numbers matching certain number you could use the following list comprehension:
foo_is_1 = [e for e in foo if e[1] == 1]
foo_is_2 = [e for e in foo if e[1] == 2]
print(foo_is_1)
print(foo_is_2)
In case you know nothing about the second argument and just want to split your list up in a list of lists with unique second arguments you could use:
list_of_lists = [[e for e in foo if e[1] == a] for a in list(set([a[1] for a in foo]))]
for entry in list_of_lists:
print(entry)
Which is basically two list comprehensions, one for each unique second argument a, and one for each entry e in foo.
I have the following numpy array:
foo = np.array([[0.0, 10.0], [0.13216, 12.11837], [0.25379, 42.05027], [0.30874, 13.11784]])
which yields:
[[ 0. 10. ]
[ 0.13216 12.11837]
[ 0.25379 42.05027]
[ 0.30874 13.11784]]
How can I normalize the Y component of this array. So it gives me something like:
[[ 0. 0. ]
[ 0.13216 0.06 ]
[ 0.25379 1 ]
[ 0.30874 0.097]]
Referring to this Cross Validated Link, How to normalize data to 0-1 range?, it looks like you can perform min-max normalisation on the last column of foo.
v = foo[:, 1] # foo[:, -1] for the last column
foo[:, 1] = (v - v.min()) / (v.max() - v.min())
foo
array([[ 0. , 0. ],
[ 0.13216 , 0.06609523],
[ 0.25379 , 1. ],
[ 0.30874 , 0.09727968]])
Another option for performing normalisation (as suggested by OP) is using sklearn.preprocessing.normalize, which yields slightly different results -
from sklearn.preprocessing import normalize
foo[:, [-1]] = normalize(foo[:, -1, None], norm='max', axis=0)
foo
array([[ 0. , 0.2378106 ],
[ 0.13216 , 0.28818769],
[ 0.25379 , 1. ],
[ 0.30874 , 0.31195614]])
sklearn.preprocessing.MinMaxScaler can also be used (feature_range=(0, 1) is default):
from sklearn import preprocessing
min_max_scaler = preprocessing.MinMaxScaler()
v = foo[:,1]
v_scaled = min_max_scaler.fit_transform(v)
foo[:,1] = v_scaled
print(foo)
Output:
[[ 0. 0. ]
[ 0.13216 0.06609523]
[ 0.25379 1. ]
[ 0.30874 0.09727968]]
Advantage is that scaling to any range can be done.
I think you want this:
foo[:,1] = (foo[:,1] - foo[:,1].min()) / (foo[:,1].max() - foo[:,1].min())
You are trying to min-max scale between 0 and 1 only the second column.
Using sklearn.preprocessing.minmax_scale, should easily solve your problem.
e.g.:
from sklearn.preprocessing import minmax_scale
column_1 = foo[:,0] #first column you don't want to scale
column_2 = minmax_scale(foo[:,1], feature_range=(0,1)) #second column you want to scale
foo_norm = np.stack((column_1, column_2), axis=1) #stack both columns to get a 2d array
Should yield
array([[0. , 0. ],
[0.13216 , 0.06609523],
[0.25379 , 1. ],
[0.30874 , 0.09727968]])
Maybe you want to min-max scale between 0 and 1 both columns. In this case, use:
foo_norm = minmax_scale(foo, feature_range=(0,1), axis=0)
Which yields
array([[0. , 0. ],
[0.42806245, 0.06609523],
[0.82201853, 1. ],
[1. , 0.09727968]])
note: Not to be confused with the operation that scales the norm (length) of a vector to a certain value (usually 1), which is also commonly referred to as normalization.
In R, I can easily get the performance of a random forest like the following.
How can I get the similar stuff in Python easily? Thanks a lot.
Summary of the Random Forest Model
==================================
Number of observations used to build the model: 35
Missing value imputation is active.
Call:
randomForest(formula = rank ~ .,
data = crs$dataset[crs$sample, c(crs$input, crs$target)],
ntree = 500, mtry = 3, importance = TRUE, replace = FALSE, na.action = na.roughfix)
Type of random forest: regression
Number of trees: 500
No. of variables tried at each split: 3
Mean of squared residuals: 5.578147
% Var explained: 97.22
Variable Importance
Here there is a simple example using sklearn random forest . http://scikit-learn.org/stable/modules/generated/sklearn.ensemble.RandomForestClassifier.html#sklearn.ensemble.RandomForestClassifier
You can easily get the values that you are looking for.
In this example we are using an input matrix X with 2 variables and a binary output y.
from sklearn.ensemble import RandomForestClassifier
fr = RandomForestClassifier(n_estimators=100, oob_score=True).fit(X, y)
fr.n_estimators
Out[10]: 100
fr.oob_decision_function_
Out[11]:
array([[ 0.14285714, 0.85714286],
[ 0.86666667, 0.13333333],
[ 0.02631579, 0.97368421],
[ 1. , 0. ],
[ 0.97826087, 0.02173913],
[ 0.97826087, 0.02173913],
[ 0.20512821, 0.79487179],
[ 0.97368421, 0.02631579],
[ 0.77777778, 0.22222222],
[ 0. , 1. ],
[ 0. , 1. ],
[ 1. , 0. ],
[ 0.52380952, 0.47619048],
[ 0.43243243, 0.56756757],
[ 1. , 0. ],
[ 0. , 1. ],
[ 0.05714286, 0.94285714],
[ 0. , 1. ],
[ 1. , 0. ],
[ 0.76470588, 0.23529412],
[ 1. , 0. ],
[ 0. , 1. ],
[ 0.95454545, 0.04545455],
[ 0.9 , 0.1 ],
[ 0.02222222, 0.97777778],
[ 0.875 , 0.125 ],
[ 0.02857143, 0.97142857],
[ 1. , 0. ],
[ 0.58823529, 0.41176471],
[ 0. , 1. ],
[ 0.20512821, 0.79487179],
[ 0.97435897, 0.02564103],
[ 0.91176471, 0.08823529],
[ 0. , 1. ],
[ 0.30232558, 0.69767442],
[ 1. , 0. ],
[ 0.94444444, 0.05555556],
[ 0. , 1. ],
[ 0.075 , 0.925 ],
[ 0.05263158, 0.94736842],
[ 1. , 0. ],
[ 0. , 1. ],
[ 0.02702703, 0.97297297],
[ 0.91176471, 0.08823529],
[ 0.43243243, 0.56756757],
[ 0.08333333, 0.91666667],
[ 0.10526316, 0.89473684],
[ 0.93548387, 0.06451613],
[ 0.02857143, 0.97142857],
[ 0.53658537, 0.46341463],
[ 0.5 , 0.5 ],
[ 0.66666667, 0.33333333],
[ 1. , 0. ],
[ 0.55555556, 0.44444444],
[ 0.96666667, 0.03333333],
[ 0.97142857, 0.02857143],
[ 0. , 1. ],
[ 0. , 1. ],
[ 1. , 0. ],
[ 0.05882353, 0.94117647],
[ 0.94594595, 0.05405405],
[ 0.11904762, 0.88095238],
[ 0.92307692, 0.07692308],
[ 0.69767442, 0.30232558],
[ 1. , 0. ],
[ 0.12121212, 0.87878788],
[ 1. , 0. ],
[ 0.97727273, 0.02272727],
[ 1. , 0. ],
[ 0.87878788, 0.12121212],
[ 0.02380952, 0.97619048],
[ 0. , 1. ],
[ 0. , 1. ],
[ 0. , 1. ],
[ 0.10810811, 0.89189189],
[ 1. , 0. ],
[ 1. , 0. ],
[ 0.97619048, 0.02380952],
[ 0.54545455, 0.45454545],
[ 0.02380952, 0.97619048],
[ 0.07317073, 0.92682927],
[ 0.94285714, 0.05714286],
[ 0.25714286, 0.74285714],
[ 0. , 1. ],
[ 0. , 1. ],
[ 0.97560976, 0.02439024],
[ 0.11111111, 0.88888889],
[ 1. , 0. ],
[ 1. , 0. ],
[ 0.02857143, 0.97142857],
[ 0.97916667, 0.02083333],
[ 0. , 1. ],
[ 0.02564103, 0.97435897],
[ 0. , 1. ],
[ 0.32258065, 0.67741935],
[ 0.56410256, 0.43589744],
[ 1. , 0. ],
[ 0.92682927, 0.07317073],
[ 1. , 0. ],
[ 0.08823529, 0.91176471]])
fr.oob_score_
Out[12]: 0.87
fr.feature_importances_
Out[13]: array([ 0.82407373, 0.17592627])