FCBF Python feature selection technique - python

I want to use FCBF technique from github https://github.com/shiralkarprashant/FCBF
The problem i faced is that i am working on Python 3 ad the module is implemented for python 2 users . I got the following error that describes that name 'xrange' is not defined because i work on python3
i think to solve the issue just by changing range by xrange
from FCBF_module import FCBF, FCBFK, FCBFiP, get_i
from sklearn.datasets import load_digits
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression
import time
from sklearn.grid_search import GridSearchCV
classifiers = [('DecisionTree', DecisionTreeClassifier(), {'max_depth' : [5, 10, 15]}),
('LogisticRegression', LogisticRegression(), {'C' : [0.1, 1, 10]})]
n_features = dataCAD.shape[1]
npieces = get_i(n_features)
The module code contains just one xrange occurence i tried to change it by range but it does not solve the problem:
# -*- coding: utf-8 -*-
import numpy as np
def count_vals(x):
vals = np.unique(x)
occ = np.zeros(shape = vals.shape)
for i in range(vals.size):
occ[i] = np.sum(x == vals[i])
return occ
def entropy(x):
n = float(x.shape[0])
ocurrence = count_vals(x)
px = ocurrence / n
return -1* np.sum(px*np.log2(px))
def symmetricalUncertain(x,y):
n = float(y.shape[0])
vals = np.unique(y)
# Computing Entropy for the feature x.
Hx = entropy(x)
# Computing Entropy for the feature y.
Hy = entropy(y)
#Computing Joint entropy between x and y.
partial = np.zeros(shape = (vals.shape[0]))
for i in range(vals.shape[0]):
partial[i] = entropy(x[y == vals[i]])
partial[np.isnan(partial)==1] = 0
py = count_vals(y).astype(dtype = 'float64') / n
Hxy = np.sum(py[py > 0]*partial)
IG = Hx-Hxy
return 2*IG/(Hx+Hy)
def suGroup(x, n):
m = x.shape[0]
x = np.reshape(x, (n,m/n)).T
m = x.shape[1]
SU_matrix = np.zeros(shape = (m,m))
for j in range(m-1):
x2 = x[:,j+1::]
y = x[:,j]
temp = np.apply_along_axis(symmetricalUncertain, 0, x2, y)
for k in range(temp.shape[0]):
SU_matrix[j,j+1::] = temp
SU_matrix[j+1::,j] = temp
return 1/float(m-1)*np.sum(SU_matrix, axis = 1)
def isprime(a):
return all(a % i for i in xrange(2, a))
"""
get
"""
def get_i(a):
if isprime(a):
a -= 1
return filter(lambda x: a % x == 0, range(2,a))
"""
FCBF - Fast Correlation Based Filter
L. Yu and H. Liu. Feature Selection for High‐Dimensional Data: A Fast Correlation‐Based Filter Solution.
In Proceedings of The Twentieth International Conference on Machine Leaning (ICML‐03), 856‐863.
Washington, D.C., August 21‐24, 2003.
"""
class FCBF:
idx_sel = []
def __init__(self, th = 0.01):
'''
Parameters
---------------
th = The initial threshold
'''
self.th = th
def fit(self, x, y):
'''
This function executes FCBF algorithm and saves indexes
of selected features in self.idx_sel
Parameters
---------------
x = dataset [NxM]
y = label [Nx1]
'''
self.idx_sel = []
"""
First Stage: Computing the SU for each feature with the response.
"""
SU_vec = np.apply_along_axis(symmetricalUncertain, 0, x, y)
SU_list = SU_vec[SU_vec > self.th]
SU_list[::-1].sort()
m = x[:,SU_vec > self.th].shape
x_sorted = np.zeros(shape = m)
for i in range(m[1]):
ind = np.argmax(SU_vec)
SU_vec[ind] = 0
x_sorted[:,i] = x[:,ind].copy()
self.idx_sel.append(ind)
"""
Second Stage: Identify relationships between feature to remove redundancy.
"""
j = 0
while True:
"""
Stopping Criteria:The search finishes
"""
if j >= x_sorted.shape[1]: break
y = x_sorted[:,j].copy()
x_list = x_sorted[:,j+1:].copy()
if x_list.shape[1] == 0: break
SU_list_2 = SU_list[j+1:]
SU_x = np.apply_along_axis(symmetricalUncertain, 0,
x_list, y)
comp_SU = SU_x >= SU_list_2
to_remove = np.where(comp_SU)[0] + j + 1
if to_remove.size > 0:
x_sorted = np.delete(x_sorted, to_remove, axis = 1)
SU_list = np.delete(SU_list, to_remove, axis = 0)
to_remove.sort()
for r in reversed(to_remove):
self.idx_sel.remove(self.idx_sel[r])
j = j + 1
def fit_transform(self, x, y):
'''
This function fits the feature selection
algorithm and returns the resulting subset.
Parameters
---------------
x = dataset [NxM]
y = label [Nx1]
'''
self.fit(x, y)
return x[:,self.idx_sel]
def transform(self, x):
'''
This function applies the selection
to the vector x.
Parameters
---------------
x = dataset [NxM]
'''
return x[:, self.idx_sel]
"""
FCBF# - Fast Correlation Based Filter
B. Senliol, G. Gulgezen, et al. Fast Correlation Based Filter (FCBF) with a Different Search Strategy.
In Computer and Information Sciences (ISCIS ‘08) 23rd International Symposium on, pages 1‐4.
Istanbul, October 27‐29, 2008.
"""
class FCBFK(FCBF):
idx_sel = []
def __init__(self, k = 10):
'''
Parameters
---------------
k = Number of features to include in the
subset.
'''
self.k = k
def fit(self, x, y):
'''
This function executes FCBFK algorithm and saves indexes
of selected features in self.idx_sel
Parameters
---------------
x = dataset [NxM]
y = label [Nx1]
'''
self.idx_sel = []
"""
First Stage: Computing the SU for each feature with the response.
"""
SU_vec = np.apply_along_axis(symmetricalUncertain, 0, x, y)
SU_list = SU_vec[SU_vec > 0]
SU_list[::-1].sort()
m = x[:,SU_vec > 0].shape
x_sorted = np.zeros(shape = m)
for i in range(m[1]):
ind = np.argmax(SU_vec)
SU_vec[ind] = 0
x_sorted[:,i] = x[:,ind].copy()
self.idx_sel.append(ind)
"""
Second Stage: Identify relationships between features to remove redundancy with stopping
criteria (features in x_best == k).
"""
j = 0
while True:
y = x_sorted[:,j].copy()
SU_list_2 = SU_list[j+1:]
x_list = x_sorted[:,j+1:].copy()
"""
Stopping Criteria:The search finishes
"""
if x_list.shape[1] == 0: break
SU_x = np.apply_along_axis(symmetricalUncertain, 0,
x_list, y)
comp_SU = SU_x >= SU_list_2
to_remove = np.where(comp_SU)[0] + j + 1
if to_remove.size > 0 and x.shape[1] > self.k:
for i in reversed(to_remove):
x_sorted = np.delete(x_sorted, i, axis = 1)
SU_list = np.delete(SU_list, i, axis = 0)
self.idx_sel.remove(self.idx_sel[i])
if x_sorted.shape[1] == self.k: break
if x_list.shape[1] == 1 or x_sorted.shape[1] == self.k:
break
j = j + 1
if len(self.idx_sel) > self.k:
self.idx_sel = self.idx_sel[:self.k]
"""
FCBFiP - Fast Correlation Based Filter in Pieces
"""
class FCBFiP(FCBF):
idx_sel = []
def __init__(self, k = 10, npieces = 2):
'''
Parameters
---------------
k = Number of features to include in the
subset.
npieces = Number of pieces to divide the
feature space.
'''
self.k = k
self.npieces = npieces
def fit(self, x, y):
'''
This function executes FCBF algorithm and saves indexes
of selected features in self.idx_sel
Parameters
---------------
x = dataset [NxM]
y = label [Nx1]
'''
"""
First Stage: Computing the SU for each feature with the response. We sort the
features. When we have a prime number of features we remove the last one from the
sorted features list.
"""
m = x.shape
nfeaturesPieces = int(m[1] / float(self.npieces))
SU_vec = np.apply_along_axis(symmetricalUncertain, 0, x, y)
x_sorted = np.zeros(shape = m, dtype = 'float64')
idx_sorted = np.zeros(shape = m[1], dtype = 'int64')
for i in range(m[1]):
ind = np.argmax(SU_vec)
SU_vec[ind] = -1
idx_sorted[i]= ind
x_sorted[:,i] = x[:,ind].copy()
if isprime(m[1]):
x_sorted = np.delete(x_sorted, m[1]-1, axis = 1 )
ind_prime = idx_sorted[m[1]-1]
idx_sorted = np.delete(idx_sorted, m[1]-1)
#m = x_sorted.shape
"""
Second Stage: Identify relationships between features into its vecinity
to remove redundancy with stopping criteria (features in x_best == k).
"""
x_2d = np.reshape(x_sorted.T, (self.npieces, nfeaturesPieces*m[0])).T
SU_x = np.apply_along_axis(suGroup, 0, x_2d, nfeaturesPieces)
SU_x = np.reshape(SU_x.T, (self.npieces*nfeaturesPieces,))
idx_sorted2 = np.zeros(shape = idx_sorted.shape, dtype = 'int64')
SU_x[np.isnan(SU_x)] = 1
for i in range(idx_sorted.shape[0]):
ind = np.argmin(SU_x)
idx_sorted2[i] = idx_sorted[ind]
SU_x[ind] = 10
"""
Scoring step
"""
self.scores = np.zeros(shape = m[1], dtype = 'int64')
for i in range(m[1]):
if i in idx_sorted:
self.scores[i] = np.argwhere(i == idx_sorted) + np.argwhere(i == idx_sorted2)
if isprime(m[1]):
self.scores[ind_prime] = 2*m[1]
self.set_k(self.k)
def set_k(self, k):
self.k = k
scores_temp = -1*self.scores
self.idx_sel = np.zeros(shape = self.k, dtype = 'int64')
for i in range(self.k):
ind = np.argmax(scores_temp)
scores_temp[ind] = -100000000
self.idx_sel[i] = ind

Try using 2to3 package for python to automatically convert the files. Worked for me!
https://docs.python.org/2/library/2to3.html

Related

JAX Tridiagonal Jacobians

What is the most efficient implementation of a scalable autonomous tridiagonal system using JAX?
import functools as ft
import jax as jx
import jax.numpy as jnp
import jax.random as jrn
import jax.lax as jlx
def make_T(m):
# Create a psuedo-random tridiagonal Jacobian and store band
T = jnp.zeros((3,m), dtype='f8')
T = T.at[0, 1: ].set(jrn.normal(jrn.PRNGKey(0), shape=(m-1,)))
T = T.at[1, : ].set(jrn.normal(jrn.PRNGKey(1), shape=(m ,)))
T = T.at[2, :-1].set(jrn.normal(jrn.PRNGKey(2), shape=(m-1,)))
return T
def make_y(m):
# Create a pseudo-random state array
y = jrn.normal(jrn.PRNGKey(3), shape=(m ,))
return y
def calc_f_base(y, T):
# Calculate the rate given the current state
f = T[1,:]*y
f = f.at[ 1: ].set(f[ 1: ]+T[0, 1: ]*y[ :-1])
f = f.at[ :-1].set(f[ :-1]+T[2, :-1]*y[ 1: ])
return f
m = 2**22 # potentially exhausts resources
T = make_T(m)
y = make_y(m)
calc_f = ft.partial(calc_f_base, T=T)
Using jax.jacrev or jax.jacfwd will generate a full Jacobian which limits the size of the system.
One attempt to overcome this limitation is as follows
#ft.partial(jx.jit, static_argnums=(0,))
def calc_jacfwd_trid(calc_f, y):
# Determine the Jacobian (forward-mode) tridiagonal band
def scan_body(carry, i):
t, T = carry
t = t.at[i ].set(1.0)
f, dfy = jx.jvp(calc_f, (y,), (t,))
T = T.at[2,i-1].set(dfy[i-1])
T = T.at[1,i ].set(dfy[i ])
T = T.at[0,i+1].set(dfy[i+1])
t = t.at[i-1].set(0.0)
return (t, T), None
# Initialise
m = y.size
t = jnp.zeros_like(y)
T = jnp.zeros((3,m), dtype=y.dtype)
# Differentiate wrt y[0]
t = t.at[0].set(1.0)
f, dfy = jx.jvp(calc_f, (y,), (t,))
idxs = jnp.array([1,0]), jnp.array([0,1])
T = T.at[idxs].set(dfy[0:2])
# Differentiate wrt y[1:-1]
(t, T), empty = jlx.scan(scan_body, (t,T), jnp.arange(1,m-1))
# Differentiate wrt y[-1]
t = t.at[m-2:].set(jnp.array([0.0,1.0]))
f, dfy = jx.jvp(calc_f, (y,), (t,))
idxs = jnp.array([2,1]), jnp.array([m-2,m-1])
T = T.at[idxs].set(dfy[-2:])
return T
which permits
T = jacfwd_trid(calc_f, y)
df = jrn.normal(jrn.PRNGKey(4), shape=y.shape)
dx = jlx.linalg.tridiagonal_solve(*T,df[:,None]).flatten()
Is there a better approach and/or can the time complexity of calc_jacfwd_trid be reduced further?
EDIT
The following implementation is more compact, but run times are slightly slower
#ft.partial(jx.jit, static_argnums=(0,))
def calc_jacfwd_trid_map(calc_f, y):
# Determine the Jacobian (forward-mode) tridiagonal band with lax map
def map_body(i, t):
t = t.at[i-1].set(0.0)
f, dfy = jx.jvp(calc_f, (y,), (t,))
im1 = jnp.where(i > 0, i-1, 0)
Ti = jlx.dynamic_slice(dfy, (im1,), (3,))
Ti = jnp.where(i > 0, Ti, jnp.roll(Ti, shift=+1))
Ti = jnp.where(i < m-1, Ti, jnp.roll(Ti, shift=-1))
t = t.at[i ].set(1.0)
return Ti
# Initialise
m = y.size
t = jnp.zeros_like(y)
# Differentiate wrt y[:]
T = jlx.map(lambda i : map_body(i, t=t), jnp.arange(m))
# Correct the orientation of T
T = T.transpose()
T = jnp.flip(T, axis=0)
T = T.at[0,:].set(jnp.roll(T[0,:], shift=+1))
T = T.at[2,:].set(jnp.roll(T[2,:], shift=-1))
return T

My Neural Network algorithm is not working mnist numbers

I could use a second set of eyes on my neural network.
This is the mnist number recognition project.
I'm not sure where the issue is.
I previously implemented the ai with tensor flow successfully.
I'm not looking to use an api as a solution.
I would appreciate any help anyone can give.
Here's the project on github, it's only an init file and then the neural_network.
https://github.com/nealchawn/ai_trial_2
class NeuralNetwork(object):
def __init__(self, sizes):
self.activations = []
self.outputs = []
self.weights = []
self.biases = []
self.sizes = sizes
self.set_random_weights()
self.set_random_biases()
def set_random_weights(self):
for layer_index, layer_size in enumerate(self.sizes[1:], start=1):
layer_weights = []
for size in range(layer_size):
for size in range(self.sizes[layer_index-1]):
layer_weights.append(random.uniform(-5.0, 5.0))
self.weights.append(layer_weights)
def set_random_biases(self):
total_biases = 0
# add extra zero bias to help future indexing
#self.biases.append(0)
for index, size in enumerate(self.sizes[0:-1], start=1):
total_biases += 1
for x in range(total_biases):
self.biases.append(random.uniform(-5.0, 5.0))
def train_network(self, training_data, training_labels):
if len(training_data) != len(training_labels):
print("Error data and labels must be the same length")
data = list(zip(training_data, training_labels))
self.sgd(data)
def sgd(self, data, mini_batch_size = 1000):
# first we'll create batches of training data
n = len(data)
data_batches = [
data[k:k + mini_batch_size]
for k in range(0, n, mini_batch_size)
]
print(len(data_batches))
i = 0
for mini_batch in data_batches:
print("Batch: " + str(i))
i += 1
self.update_mini_batch(mini_batch)
self.network_outputs()
print("Finished All training data!")
def update_mini_batch(self, mini_data_batch):
weight_gradients = []
bias_gradients = []
i = 0
for training_input in mini_data_batch:
training_object, training_label = training_input
self.feedforward(training_object)
weights_gradient, bias_gradient = self.backpropogation(training_label)
weight_gradients.append(weights_gradient)
bias_gradients.append(bias_gradient)
# average gradients
weights_gradient = np.average(weight_gradients,axis=0)
biases_gradient = np.average(bias_gradients, axis=0)
# may need to convert to list
weights_gradient_list = []
for weight_gradient in weights_gradient:
weights_gradient_list.append(weight_gradient.tolist())
#weights_gradient = weights_gradient.tolist()
biases_gradient = biases_gradient.tolist()
for x in range(len(self.biases)):
self.biases[x] -= 0.1*biases_gradient[x]
weight_gradient_index = 0
for layer_index, layer_weights in enumerate(self.weights, start=0):
for weight_index, weight in enumerate(layer_weights):
self.weights[layer_index][weight_index] = weight - 0.1*weights_gradient_list[layer_index][weight_index]
weight_gradient_index += 1
def feedforward(self, training_object):
# set inputs
self.outputs = []
self.activations = []
temp_activations = []
for index in range(self.sizes[0]):
temp_activations.append(training_object[index])
self.activations.append(temp_activations)
for layer_index, layer_size in enumerate(self.sizes[1:], start=0):
layer_weights = self.weights[layer_index]
layer_inputs = self.activations[layer_index]
weight_index = 0
layer_outputs = []
layer_activations = []
for node_index in range(layer_size):
node_weights = []
# get node weights
#print(f"layer size: {layer_size}, previous_layer_size: {self.sizes[layer_index]}, layer weights: {len(layer_weights)}")
for x in range(self.sizes[layer_index]):
node_weights.append(layer_weights[weight_index])
weight_index += 1
output = 0
for indx in range(len(node_weights)):
output += layer_inputs[indx]*node_weights[indx]
output = output + self.biases[layer_index]
layer_outputs.append(output)
layer_activations.append(self.sigmoid(output))
self.outputs.append(layer_outputs)
self.activations.append(layer_activations)
def backpropogation(self, training_label):
costs = []
output_layer_activations = self.activations[-1]
output_layer_outputs = self.outputs[-1]
correct_labels = self.translate_label_to_array(training_label)
costs.append(self.compute_cost_derivative(correct_labels, output_layer_activations))
for cost_index, cost in enumerate(costs[0]):
costs[0][cost_index] = cost*self.sigmoid_prime(output_layer_outputs[cost_index])
# calculate costs for layers
for layer_index, layer_size in enumerate(self.sizes[::-1][1:-1], start=1):
layer_costs = []
layer_weights = self.weights[-layer_index]
layer_outputs = self.outputs[-(layer_index+1)]
previous_layer_costs = costs[layer_index-1]
next_layer_size = self.sizes[::-1][1:][layer_index]
layer_weights_formatted = []
for x in range(layer_size):
layer_weights_formatted.append([])
for weight_index, weight in enumerate(layer_weights, start=0):
#print(f"weight index:{weight_index % next_layer_size} layer_index: {weight_index}")
layer_weights_formatted[weight_index%layer_size].append(layer_weights[weight_index])
#print(f"next_layer_size:{layer_size} costs: {len(previous_layer_costs)}, layer_weights_formatted: {layer_weights_formatted}")
for x in range(layer_size):
node_cost = 0
for y, cost in enumerate(previous_layer_costs,start=0):
node_cost += layer_weights_formatted[x][y]*cost
layer_costs.append(node_cost)
# layer_costs same order as next layer's activations
for cost_index, cost in enumerate(layer_costs):
layer_costs[cost_index] = cost * self.sigmoid_prime(layer_outputs[cost_index])
costs.append(layer_costs)
# calculate weight errors
weight_errors = []
bias_errors = []
for layer_index, layer_costs in enumerate(costs[::-1]):
layer_activations = self.activations[layer_index]
layer_weight_errors = []
for cost_index, cost in enumerate(layer_costs,start=0):
for activation in layer_activations:
layer_weight_errors.append(activation * cost)
weight_errors.append(np.array(layer_weight_errors))
bias_errors.append(sum(layer_costs))
return weight_errors, bias_errors
# conversion tool
def translate_label_to_array(self, y):
translated_label = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
translated_label[y] = 1
return np.array(translated_label)
# output tools
def network_outputs(self):
print("Output layer: ")
for x in range(self.sizes[-1]):
print("node " + str(x) + ": " + str(self.activations[-1][x]))
def total_activations(self):
print(len(self.activations))
def compute_cost_derivative(self, y, output_activations):
"""Return the vector of partial derivatives \partial C_x /
\partial a for the output activations."""
return (output_activations - y)
def sigmoid(self, z):
""""The sigmoid function."""
return (1.0 / (1.0 + np.exp(-z)))
def sigmoid_prime(self, z):
return (self.sigmoid(z) * (1 - self.sigmoid(z)))

Logistic regression - strange behaviour of the decision boundary when additional parameters are added

I am trying to build a logistic regression model for a dataset consisting of two parameters
x1 and x2, but instead of analyzing just the two of them, I have added their squares as well - x12, x22 and x1· x2.
At the first glance everything looks fine and the error function is decreasing, but whilist drawing the plot of the decision boundary I have noticed, that after circa 500 iterations something strange happens to it.
Here is an animation of the error function as a function of iterations and a respective plot of the decision boundary:
Now,I interpret the decision boundary as a quadratic function
x2=f(x1), where
the relation between both parameters is given like this:
0.5 = θ0 + θ1x1 + θ2x2 + θ3x12 + θ4x1x2
+ θ5x22
Here is the python code I use to do everything:
#!/usr/bin/python3
import numpy as np
import matplotlib.pyplot as plt
from math import log
from matplotlib.animation import FuncAnimation
def sigmoid(x):
return 1.0 / (1.0 + np.exp(-x))
def loadData(filepath):
source=""
try:
f = open(filepath, "r")
source = f.read()
f.close()
except IOError:
print("Error while reading file (" + filepath + ")")
return ""
raw_data = source.split("\n")
raw_data = [x.split(",") for x in raw_data if x !=""]
raw_data = np.matrix(raw_data).astype(float)
return (raw_data[:,:np.size(raw_data,1)-1], raw_data[:,np.size(raw_data, 1)-1:])
def standardize(dataset, skipfirst=True):
means = np.amin(dataset, 0)
deviation = np.std(dataset, 0)
if skipfirst:
dataset[:,1:] -= means[:,1:]
dataset[:,1:] /= deviation[:,1:]
return dataset
else:
dataset -= means
dataset /= deviation
return dataset
def error(X, Y, Theta):
"Calculates error values"
v_sigm = np.vectorize(sigmoid)
h_x = X # Theta
sigmo = v_sigm(h_x)
partial_vect = (Y-1).T # np.log(1-sigmo) - Y.T # np.log(sigmo)
return 1/(2*np.size(Y, axis=0))*np.sum(partial_vect)
def gradientStep(X, Y, Theta, LR):
"Returns new theta Values"
v_sigm = np.vectorize(sigmoid)
h_x = X # Theta
modif = -1*LR/np.size(Y, 0)*(h_x-Y)
sums = np.sum(modif.T # X, axis = 0)
return Theta + sums.T
X, Y = loadData("ex2data1.txt")
#add bias to X
X = np.append(np.ones((np.size(X, 0), 1)), X, axis=1)
added_params = [[x[1]**2, x[1]*x[2], x[2]**2] for x in np.array(X)]
X = np.append(X, np.matrix(added_params), axis=1)
#standardize X
X = standardize(X)
#create vector of parameters
Theta=np.zeros((np.size(X, 1), 1))
iterations = 3000
Theta_vals = []
Error_vals = []
for i in range(0, iterations):
Theta_vals.append(np.asarray(Theta).flatten())
Error_vals.append(error(X, Y, Theta))
Theta = gradientStep(X, Y, Theta, 0.07)
#CALCULATING FINISHES HERE
#plot data:
fig = plt.figure()
def_ax = fig.add_subplot(211)
def_ax.set_xlim(np.amin(X[:,1:2]), np.amax(X[:,1:2]))
def_ax.set_ylim(np.amin(X[:,2:3]), np.amax(X[:,2:3]))
err_ax = fig.add_subplot(212)
err_ax.set_ylim(0, error(X, Y, Theta))
err_ax.set_xlim(0, iterations)
positive_X1 = []
positive_X2 = []
negative_X1 = []
negative_X2 = []
for i in range(0, np.size(Y, 0)):
if(Y[i, 0] == 1):
positive_X1.append(X[i, 1])
positive_X2.append(X[i, 2])
else:
negative_X1.append(X[i, 1])
negative_X2.append(X[i, 2])
err_ax.set_ylim(np.amin(Error_vals), np.amax(Error_vals))
def animation(frame):
global Theta_vals, Error_vals, def_ax, err_ax, positive_X1, positive_X2, negative_X1, negative_X2
def_limX = def_ax.get_xlim()
def_limY = def_ax.get_ylim()
err_limX = err_ax.get_xlim()
err_limY = err_ax.get_ylim()
def_ax.clear()
err_ax.clear()
def_ax.set_xlim(def_limX)
def_ax.set_ylim(def_limY)
err_ax.set_xlim(err_limX)
err_ax.set_ylim(err_limY)
def_ax.scatter(positive_X1, positive_X2, marker="^")
def_ax.scatter(negative_X1, negative_X2, marker="o")
Theta = Theta_vals[frame]
res_x = np.linspace(*def_ax.get_xlim(), num=5)
delta_x = [(Theta[4]*x+Theta[2])**2-4*Theta[5]*(Theta[3]*x**2+Theta[1]*x+Theta[0]-0.5) for x in res_x]
delta_x = [np.sqrt(x) if x >= 0 else 0 for x in delta_x]
minb = [-(Theta[4]*x+Theta[2]) for x in res_x]
res_1 = []
res_2 = []
for i in range(0, len(res_x)):
if Theta[5] == 0:
res_1.append(0)
res_2.append(0)
else:
res_1.append((minb[i]+delta_x[i])/(2*Theta[5]))
res_2.append((minb[i]-+delta_x[i])/(2*Theta[5]))
def_ax.plot(res_x, res_1)
def_ax.plot(res_x, res_2)
err_x = np.linspace(0, frame, frame)
err_y = Error_vals[0:frame]
err_ax.plot(err_x, err_y)
anim = FuncAnimation(fig, animation, frames=iterations, interval=3, repeat_delay=2000)
print(error(X, Y, Theta))
anim.save("anim.mp4")
What could be the reason of such a strange behaviour?

Seeking advice for classifying 3d mobile data

Project
I'm working on a project who's end goal will be to classify user input/behaviour on a smartphone, based on readings of 3D (accelerometer and gyroscope) movements (displacement, velocity, acceleration and jerk) as well as 2D interactions (velocity, acceleration, number of "touches" etc). My classifier will have to output whether the user is engaged/frustrated/perhaps other emotions subject to this paper. These aren't necessary at this step however.
Data
The smartphone produces JSON files, with the position of the phone in terms of x, y and z, as well as the timestamp. There's a new timestamp every 20milliseconds.
Idea
I've done some Python tutorials and produced the code at the bottom so far. I've been advised to use NumPy and SciPy to make my life easier. Aside from the obvious kinematics, I need to include a filtering process. Google suggested Kalman filters.
Question
If anyone has experience in the matter, can they recommend an approach to this, perhaps you've encountered a similar project with a nice methodology.
import sqlalchemy
import json
import ReferenceFrame, get_motion_params, dynamicsymbols, symbols
con = sqlalchemy.create_engine('postgresql+psycopg2://postgres:#localhost/airlib')
meta = sqlalchemy.MetaData(bind=con, reflect=True)
dataObjects = []
dataAccObjects = []
dataGyroObjects = []
displacementChangeInX = []
displacementChangeInY = []
displacementChangeInZ = []
t = [] #time
v = [] #velocity
a = [] #acceleration
j = [] #jerk
results = meta.tables['rawmobiledata']
class DataEntry(object):
item = 0
time = 0
x = 0
y = 0
z = 0
def __init__(self, item, time, x, y, z):
self.item = item
self.time = time
self.x = x
self.y = y
self.z = z
def make_accelerometerEntry(item, time, x, y, z):
dataAcc = DataEntry(item ,time, x, y, z)
#print str(dataAccObjects)
return dataAcc
def make_gyroEntry(item, time, x, y, z):
dataGyro = DataEntry(item, time, x, y, z)
#print str(dataGyroObjects)
return dataGyro
for row in con.execute(results.select()):
r = row[1]
r = json.dumps(r)
loaded_r = json.loads(r)
#print(loaded_r)
if loaded_r['sensor'] == 'accelerometer':
for item in range(0, 250):
time = loaded_r['data'][item]['time']
x = loaded_r['data'][item]['x']
y = loaded_r['data'][item]['y']
z = loaded_r['data'][item]['z']
dataAccObjects.append(make_accelerometerEntry(item, time, x, y, z))
#print "this is an accelerometerObj"
elif loaded_r['sensor'] == 'gyroscope':
for item in range(0, 250):
time = loaded_r['data'][item]['time']
x = loaded_r['data'][item]['x']
y = loaded_r['data'][item]['y']
z = loaded_r['data'][item]['z']
dataGyroObjects.append(make_gyroEntry(item, time, x, y, z))
#print "gyroObj with time " + str(time) + ", reading " + str(item+1) + "/250, test#" + str((row[0]/2)+1)
for row in con.execute(results.select()):
r = row[1]
r = json.dumps(r)
loaded_r = json.loads(r)
if loaded_r['sensor'] == 'gyroscope':
for item in range (0,250):
#rate of change
# ti = |t2-t1|
t[item] = abs(int(dataGyroObjects[item].time) - int(dataGyroObjects[item+1].time))
#calculate change in displacement over the 3 axes
displacementChangeInX[item] = abs(int(dataGyroObjects[item].x) - int(dataGyroObjects[item+1].x))
displacementChangeInY[item] = abs(int(dataGyroObjects[item].y) - int(dataGyroObjects[item+1].y))
displacementChangeInZ[item] = abs(int(dataGyroObjects[item].z) - int(dataGyroObjects[item+1].z))
# v = dx/dt
v[item] = abs(t[item] - 0)
#calculate acceleration
# a = dv/dx
#calculate jerk
# j = da/dt

Python: Logistic regression - inputing my data into my algorithm

I'm trying to implement a logistic regression algorithm in python, but i'm not used to using python.
I followed a tutorial to create my algorithm:
import matplotlib.pyplot as plt
import seaborn as sns
#matplotlib inline
sns.set(style='ticks', palette='Set2')
import pandas as pd
import math
from numpy import *
def logistic_func(theta, X):
return float(1) / (1 + math.e**(-X.dot(theta))) #for x in x_values]
def log_gradient(theta, X, Y):
first_calc = logistic_func(theta, X) - np.squeeze(Y) #by attribute gives Beta(i)
final_calc = first_calc.T.dot(X)
return final_calc
def cost_func(theta, X, Y):
log_func_v = logistic_func(theta,X)
Y = np.squeeze(Y)
step1 = Y * np.log(log_func_v)
step2 = (1.5-Y) * np.log(1.5 - log_func_v)
step3 = (1-Y) * np.log(1 - log_func_v)
final = -step1 - step2 - step3
return np.mean(final)
def grad_desc(theta_values, X, Y, lr=.001, converge_change=.001):
#normalize
X = (X - np.mean(X, axis=0)) / np.std(X, axis=0)
#setup cost iter
cost_iter = []
cost = cost_func(theta_values, X, Y)
cost_iter.append([0, cost])
change_cost = 1
i = 1
while(change_cost > converge_change):
old_cost = cost
theta_values = theta_values - (lr * log_gradient(theta_values, X, Y))
cost = cost_func(theta_values, X, X)
cost_iter.append([i, cost])
change_cost = old_cost - cost
i+=1
return theta_values, np.array(cost_iter)
def pred_values(theta, X, hard=True):
#normalize
X = (X - np.mean(X, axis=0)) / np.std(X, axis=0)
pred_prob = logistic_func(theta, X)
p red_value = np.where(pred_prob >= .5, 1, 0)
if hard:
return pred_value
return pred_prob
the algorithm is supposed to predict 3 classifiers.
I can read in the data:
data = pd.read_csv('filepath')
data.loc[data["type"] == "type1", "type"] = 0
data.loc[data["type"] == "type2", "type"] = 1
data.loc[data["type"] == "type2", "type"] = 2
att1= [];
att2=[];
att3= [];
att4= [];
type=[];
for d in data["attribute1"]:
att1.append(d)
for d in data["attribute2"]:
att2.append(d)
for d in data["attribute3"]:
att3.append(d)
for d in data["attribute4"]:
att4.append(d)
for d in data["type"]:
type.append(d)
combinedClassArray = np.array([att1,att2,att3,att4])
X = combinedClassArray.T
y = type
#totalCount = type.count()
type1= data.loc[data["type"] == 0, "type"].count()
type2= data.loc[data["type"] == 1, "type"].count()
type3= data.loc[data["type"] == 1, "type"].count()
totalCount = type1+type2+type3
p = type1+type2
What i'm sure about is how i can insert my data to the algorithm.
Am I very far off?
You need a main function:
def main():
# your code here would be the calls to the algorithm with the parameters (your data)
if __name__ == "__main__":
main()

Categories

Resources