Jupyter script suddenly does not recognize class anymore (NameError) - python

I am trying to run a Jupyter script in PyCharm. The script takes +- 2 hours to run. When I ran it for the first time, my memory ran out of memory and I exited pyCharm. I deleted some files from my PC and launched PyCharm again. When I opened my script and ran the code again, it gave me an error: suddenly it does not recognize my class 'Progressbar' anymore while it did recognize it the first time I ran the script. I did not have this error before. Does anyone knows what is going on here?
This is the script:
import sys
import collections
import itertools
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import mode
from scipy.spatial.distance import squareform
import pandas as p
plt.style.use('bmh')
%matplotlib inline
try:
from IPython.display import clear_output
have_ipython = True
except ImportError:
have_ipython = False
class KnnDtw(object):
"""K-nearest neighbor classifier using dynamic time warping
as the distance measure between pairs of time series arrays
Arguments
---------
n_neighbors : int, optional (default = 5)
Number of neighbors to use by default for KNN
max_warping_window : int, optional (default = infinity)
Maximum warping window allowed by the DTW dynamic
programming function
subsample_step : int, optional (default = 1)
Step size for the timeseries array. By setting subsample_step = 2,
the timeseries length will be reduced by 50% because every second
item is skipped. Implemented by x[:, ::subsample_step]
"""
def __init__(self, n_neighbors=5, max_warping_window=10000, subsample_step=1):
self.n_neighbors = n_neighbors
self.max_warping_window = max_warping_window
self.subsample_step = subsample_step
def fit(self, x, l):
"""Fit the model using x as training data and l as class labels
Arguments
---------
x : array of shape [n_samples, n_timepoints]
Training data set for input into KNN classifer
l : array of shape [n_samples]
Training labels for input into KNN classifier
"""
self.x = x
self.l = l
def _dtw_distance(self, ts_a, ts_b, d = lambda x,y: abs(x-y)):
"""Returns the DTW similarity distance between two 2-D
timeseries numpy arrays.
Arguments
---------
ts_a, ts_b : array of shape [n_samples, n_timepoints]
Two arrays containing n_samples of timeseries data
whose DTW distance between each sample of A and B
will be compared
d : DistanceMetric object (default = abs(x-y))
the distance measure used for A_i - B_j in the
DTW dynamic programming function
Returns
-------
DTW distance between A and B
"""
# Create cost matrix via broadcasting with large int
ts_a, ts_b = np.array(ts_a), np.array(ts_b)
M, N = len(ts_a), len(ts_b)
cost = sys.maxsize * np.ones((M, N))
# Initialize the first row and column
cost[0, 0] = d(ts_a[0], ts_b[0])
for i in range(1, M):
cost[i, 0] = cost[i-1, 0] + d(ts_a[i], ts_b[0])
for j in range(1, N):
cost[0, j] = cost[0, j-1] + d(ts_a[0], ts_b[j])
# Populate rest of cost matrix within window
for i in range(1, M):
for j in range(max(1, i - self.max_warping_window),
min(N, i + self.max_warping_window)):
choices = cost[i - 1, j - 1], cost[i, j-1], cost[i-1, j]
cost[i, j] = min(choices) + d(ts_a[i], ts_b[j])
# Return DTW distance given window
return cost[-1, -1]
def _dist_matrix(self, x, y):
"""Computes the M x N distance matrix between the training
dataset and testing dataset (y) using the DTW distance measure
Arguments
---------
x : array of shape [n_samples, n_timepoints]
y : array of shape [n_samples, n_timepoints]
Returns
-------
Distance matrix between each item of x and y with
shape [training_n_samples, testing_n_samples]
"""
# Compute the distance matrix
dm_count = 0
# Compute condensed distance matrix (upper triangle) of pairwise dtw distances
# when x and y are the same array
if(np.array_equal(x, y)):
x_s = np.shape(x)
dm = np.zeros((x_s[0] * (x_s[0] - 1)) // 2, dtype=np.double)
p = ProgressBar(np.shape(dm)[0])
for i in range(0, x_s[0] - 1):
for j in range(i + 1, x_s[0]):
dm[dm_count] = self._dtw_distance(x[i, ::self.subsample_step],
y[j, ::self.subsample_step])
dm_count += 1
p.animate(dm_count)
# Convert to squareform
dm = squareform(dm)
return dm
# Compute full distance matrix of dtw distnces between x and y
else:
x_s = np.shape(x)
y_s = np.shape(y)
dm = np.zeros((x_s[0], y_s[0]))
dm_size = x_s[0]*y_s[0]
p = ProgressBar(dm_size)
for i in range(0, x_s[0]):
for j in range(0, y_s[0]):
dm[i, j] = self._dtw_distance(x[i, ::self.subsample_step],
y[j, ::self.subsample_step])
# Update progress bar
dm_count += 1
p.animate(dm_count)
return dm
def predict(self, x):
"""Predict the class labels or probability estimates for
the provided data
Arguments
---------
x : array of shape [n_samples, n_timepoints]
Array containing the testing data set to be classified
Returns
-------
2 arrays representing:
(1) the predicted class labels
(2) the knn label count probability
"""
dm = self._dist_matrix(x, self.x)
# Identify the k nearest neighbors
knn_idx = dm.argsort()[:, :self.n_neighbors]
# Identify k nearest labels
knn_labels = self.l[knn_idx]
# Model Label
mode_data = mode(knn_labels, axis=1)
mode_label = mode_data[0]
mode_proba = mode_data[1]/self.n_neighbors
return mode_label.ravel(), mode_proba.ravel()
class ProgressBar:
"""This progress bar was taken from PYMC
"""
def __init__(self, iterations):
self.iterations = iterations
self.prog_bar = '[]'
self.fill_char = '*'
self.width = 40
self.__update_amount(0)
if have_ipython:
self.animate = self.animate_ipython
else:
self.animate = self.animate_noipython
def animate_ipython(self, iter):
print ('\r', self,
sys.stdout.flush())
self.update_iteration(iter + 1)
def update_iteration(self, elapsed_iter):
self.__update_amount((elapsed_iter / float(self.iterations)) * 100.0)
self.prog_bar += ' %d of %s complete' % (elapsed_iter, self.iterations)
def __update_amount(self, new_amount):
percent_done = int(round((new_amount / 100.0) * 100.0))
all_full = self.width - 2
num_hashes = int(round((percent_done / 100.0) * all_full))
self.prog_bar = '[' + self.fill_char * num_hashes + ' ' * (all_full - num_hashes) + ']'
pct_place = (len(self.prog_bar) // 2) - len(str(percent_done))
pct_string = '%d%%' % percent_done
self.prog_bar = self.prog_bar[0:pct_place] + \
(pct_string + self.prog_bar[pct_place + len(pct_string):])
def __str__(self):
return str(self.prog_bar)
time = np.linspace(0,20,1000)
amplitude_a = 5*np.sin(time)
amplitude_b = 3*np.sin(time + 1)
m = KnnDtw()
distance = m._dtw_distance(amplitude_a, amplitude_b)
fig = plt.figure(figsize=(12,4))
_ = plt.plot(time, amplitude_a, label='A')
_ = plt.plot(time, amplitude_b, label='B')
_ = plt.title('DTW distance between A and B is %.2f' % distance)
_ = plt.ylabel('Amplitude')
_ = plt.xlabel('Time')
_ = plt.legend()
#m._dist_matrix(np.random.random((4,50)), np.random.random((4,50)))
# Import the HAR dataset
x_train_file = open('UCI HAR Dataset/train/X_train.txt', 'r')
y_train_file = open('UCI HAR Dataset/train/y_train.txt', 'r')
x_test_file = open('UCI HAR Dataset/test/X_test.txt', 'r')
y_test_file = open('UCI HAR Dataset/test/y_test.txt', 'r')
# Create empty lists
x_train = []
y_train = []
x_test = []
y_test = []
# Mapping table for classes
labels = {1:'WALKING', 2:'WALKING UPSTAIRS', 3:'WALKING DOWNSTAIRS',
4:'SITTING', 5:'STANDING', 6:'LAYING'}
# Loop through datasets
for x in x_train_file:
x_train.append([float(ts) for ts in x.split()])
for y in y_train_file:
y_train.append(int(y.rstrip('\n')))
for x in x_test_file:
x_test.append([float(ts) for ts in x.split()])
for y in y_test_file:
y_test.append(int(y.rstrip('\n')))
# Convert to numpy for efficiency
x_train = np.array(x_train)
y_train = np.array(y_train)
x_test = np.array(x_test)
y_test = np.array(y_test)
m = KnnDtw(n_neighbors=1, max_warping_window=10)
m.fit(x_train[::10], y_train[::10])
label, proba = m.predict(x_test[::10])
from sklearn.metrics import classification_report, confusion_matrix
# print(classification_report(label, y_test[::10], target_names=[l for l in labels.values()]))
conf_mat = confusion_matrix(label, y_test[::10])
fig = plt.figure(figsize=(6,6))
width = np.shape(conf_mat)[1]
height = np.shape(conf_mat)[0]
res = plt.imshow(np.array(conf_mat), cmap=plt.cm.summer, interpolation='nearest')
for i, row in enumerate(conf_mat):
for j, c in enumerate(row):
if c>0:
plt.text(j-.2, i+.1, c, fontsize=16)
cb = fig.colorbar(res)
plt.title('Confusion Matrix')
_ = plt.xticks(range(6), [l for l in labels.values()], rotation=90)
_ = plt.yticks(range(6), [l for l in labels.values()])
import time
time_taken = []
windows = [1,2,5,10,50,100,500,1000,5000]
for w in windows:
begin = time.time()
t = KnnDtw(n_neighbors=1, max_warping_window=w)
t.fit(x_train[:20], y_train[:20])
label, proba = t.predict(x_test[:20])
end = time.time()
time_taken.append(end - begin)
fig = plt.figure(figsize=(12,5))
_ = plt.plot(windows, [t/400. for t in time_taken], lw=4)
plt.title('DTW Execution Time with \nvarying Max Warping Window')
plt.ylabel('Execution Time (seconds)')
plt.xlabel('Max Warping Window')
plt.xscale('log')
Error code:
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
<ipython-input-3-4c142a5156b6> in <module>
188 return mode_label.ravel(), mode_proba.ravel()
189
--> 190 class ProgressBar:
191 """This progress bar was taken from PYMC
192 """
<ipython-input-3-4c142a5156b6> in ProgressBar()
279 m = KnnDtw(n_neighbors=1, max_warping_window=10)
280 m.fit(x_train[::10], y_train[::10])
--> 281 label, proba = m.predict(x_test[::10])
282
283 from sklearn.metrics import classification_report, confusion_matrix
<ipython-input-3-4c142a5156b6> in predict(self, x)
173 """
174
--> 175 dm = self._dist_matrix(x, self.x)
176
177 # Identify the k nearest neighbors
<ipython-input-3-4c142a5156b6> in _dist_matrix(self, x, y)
145 dm_size = x_s[0]*y_s[0]
146
--> 147 p = ProgressBar(dm_size)
148
149 for i in range(0, x_s[0]):
NameError: name 'ProgressBar' is not defined

Related

How to implement self daptive weight in neural network in Pytorch

I want to develop a Physics Informed Neural Network model in Pytorch. My network should be trained based on two losses: boundary condition (BC) and partial derivative equation (PDE). I am adding these two losses but the problem is that the BC is controlling the main loss, like the following figure:
This way I make asimple finite difference calculation for my 1D heat conduction:
import torch
import torch.nn as nn
import matplotlib.pyplot as plt
import numpy as np
from pyDOE import lhs
######### Finite difference solution
# geometry:
L = 1 # length of the rod
# mesh:
dx = 0.01
nx = int(L/dx) + 1
x = np.linspace(0, L, nx)
# temporal grid:
t_sim = 1
dt = 0.01
nt = int (t_sim/dt)
# parametrization
alpha = 0.14340344168260039
# IC
t_ic = 4
# BC
t_left = 5 # left side with 6 °C temperature
t_right = 3 # right side with 4 °C temperature
# Results
T = np.ones(nx) * t_ic
all_T = []
for i in range (0, nt):
Tn = T.copy()
T[1:-1] = Tn[1:-1] + dt/(dx++2) * alpha * (Tn[2:] - 2*Tn[1:-1] + Tn[0:-2])
T[0] = t_left
T[-1] = t_right
all_T.append(Tn)
Then,data is prepared for the PINN model through the next block of code:
x = torch.linspace(0, L, nx, dtype=torch.float32)
t = torch.linspace(0, t_sim, nt, dtype=torch.float32)
T, X = torch.meshgrid(t,x)
Temps = np.concatenate (all_T).reshape(nt,nx)
x_test = torch.hstack((X.transpose(1,0).flatten()[:,None], T.transpose(1,0).flatten()[:,None]))
y_test = torch.from_numpy(Temps) # I suppose it is the ground truth
lb = x_test[0] # lower boundary
ub = x_test[-1] # upper boundary
left_x = torch.hstack((X[:,0][:,None], T[:,0][:,None])) # x and t of left boundary
left_y = torch.ones(left_x.shape[0], 1) * t_left # Temperature of left boundary
left_y[0,0] = t_ic
right_x = torch.hstack((X[:,-1][:,None], T[:,0][:,None])) # x and t of right boundary
right_y = torch.ones(right_x.shape[0], 1) * t_right # Temperature of right boundary
right_y[0,0] = t_ic
bottom_x = torch.hstack((X[0,1:-1][:,None], T[0,1:-1][:,None])) # x and t of IC
bottom_y = torch.ones(bottom_x.shape[0], 1) * t_ic # Temperature of IC
No_BC = 1 # 50 percent of the BC data are used from training
No_IC = 1 # 75 percent of the IC data are used from training
idx_l = np.random.choice(left_x.shape[0], int (left_x.shape[0]*No_BC), replace=False)
idx_r = np.random.choice(right_x.shape[0], int (right_x.shape[0]*No_BC), replace=False)
idx_b = np.random.choice(bottom_x.shape[0], int (bottom_x.shape[0]*No_IC), replace=False)
X_train_No = torch.vstack([left_x[idx_l,:], right_x[idx_r,:], bottom_x[idx_b,:]])
Y_train_No = torch.vstack([left_y[idx_l,:], right_y[idx_r,:], bottom_y[idx_b,:]])
N_f = 5000
X_train_Nf = lb + (ub-lb)*lhs(2,N_f)
f_hat = torch.zeros(X_train_Nf.shape[0], 1, dtype=torch.float32) # zero array for loss of PDE
This is my script for PINN and I very much appreciate your help:
class FCN(nn.Module):
##Neural Network
def __init__(self,layers):
super().__init__() #call __init__ from parent class
self.activation = nn.Tanh()
self.loss_function = nn.MSELoss(reduction ='mean')
'Initialise neural network as a list using nn.Modulelist'
self.linears = nn.ModuleList([nn.Linear(layers[i], layers[i+1]) for i in range(len(layers)-1)])
self.iter = 0
'Xavier Normal Initialization'
for i in range(len(layers)-1):
nn.init.xavier_normal_(self.linears[i].weight.data, gain=1.0)
nn.init.zeros_(self.linears[i].bias.data)
'foward pass'
def forward(self,x):
if torch.is_tensor(x) != True:
x = torch.from_numpy(x)
a = x.float()
for i in range(len(layers)-2):
z = self.linears[i](a)
a = self.activation(z)
a = self.linears[-1](a)
return a
'Loss Functions'
#Loss BC
def lossBC(self, x_BC, y_BC):
loss_BC = self.loss_function(self.forward(x_BC),y_BC)
return loss_BC.float()
#Loss PDE
def lossPDE(self,x_PDE):
g = x_PDE.clone()
g.requires_grad = True # Enable differentiation
f = self.forward(g)
f_x_t = torch.autograd.grad(f,g,torch.ones([g.shape[0],1]).to(device),retain_graph=True, create_graph=True)[0] #first derivative
f_xx_tt = torch.autograd.grad(f_x_t,g,torch.ones(g.shape).to(device), create_graph=True)[0]#second derivative
f_t = f_x_t[:,[1]]
f_xx = f_xx_tt[:,[0]]
f = f_t - alpha * f_xx
return self.loss_function(f,f_hat).float()
def loss(self,x_BC,y_BC,x_PDE):
loss_bc = self.lossBC(x_BC.float(),y_BC.float())
loss_pde = self.lossPDE(x_PDE.float())
return loss_bc.float() + loss_pde.float()
And this is how I make the model, arrays representing losses and finally the plot:
layers = np.array([2, 50, 50, 50, 50, 50, 1])
PINN = FCN(layers)
optimizer = torch.optim.Adam(PINN.parameters(), lr=0.001)
def closure():
optimizer.zero_grad()
loss_p = PINN.lossPDE(X_train_Nf)
loss_p.backward()
loss_b = PINN.lossBC(X_train_No, Y_train_No)
loss_b.backward()
return loss_b + loss_p
total_l = np.array([])
BC_l = np.array([])
PDE_l = np.array([])
test_BC_l = np.array([])
for i in range(10000):
loss = optimizer.step(closure)
total_l = np.append(total_l, loss.cpu().detach().numpy())
PDE_l = np.append (PDE_l, PINN.lossPDE(X_train_Nf).cpu().detach().numpy())
BC_l = np.append(BC_l, PINN.lossBC(X_train_No, Y_train_No).cpu().detach().numpy())
with torch.no_grad():
test_loss = PINN.lossBC(X_test, Y_test.flatten().view(-1,1))
test_BC_l = np.append(test_BC_l, test_loss.cpu().detach().numpy())
import matplotlib.pyplot as plt
fig,ax=plt.subplots(1,1, figsize=(9,9))
ax.plot(PDE_l, c = 'g', lw=2, label='PDE loss in train')
ax.plot(BC_l, c = 'k', lw=2, label='BC loss in train')
ax.plot(test_BC_l, c = 'r', lw=2, label='BC loss in test')
ax.plot(total_l, c = 'b', lw=2, label='total loss in train')
ax.set_xlabel('Epoch')
ax.set_ylabel('Loss')
plt.legend()
plt.show()
You should not add the boundary and PDE based loss while performing the backpropagation. Backpropagate iteratively on the PDE and the number of different boundary conditions used (Dirichlet or Neumann). When you add both of them, the network is not learning any thing about the BC, as the majority of the loss is being generated from the PDE. So, the network learns more about the PDE based loss and none about the BC, as it is clearly evident from your graph.
The loss function should be something like this :
for _ in different_loss_types: 1) PDE loss (backprop) on PDE 2) BC loss (backprop on BC)

How to plot gradient descent using plotly

I have been trying to replicate some work similar to this code below but when I try to use this data from this link https://raw.githubusercontent.com/plotly/datasets/master/api_docs/mt_bruno_elevation.csv Its throwing some error. I think its because of shape but don't know exactly how to modify it.
It will be great, if you help me to resolve the issue.
Here is my Code
from IPython.core.display import HTML
import plotly
import plotly.graph_objects as go
import noise
import numpy as np
import matplotlib
from mpl_toolkits.mplot3d import axes3d
%matplotlib inline
import pandas as pd
data = = pd.read_csv('https://raw.githubusercontent.com/plotly/datasets/master/api_docs/mt_bruno_elevation.csv')
z = data
import numpy as np
from numpy.lib.stride_tricks import as_strided
def sliding_window(arr, window_size):
""" Construct a sliding window view of the array"""
arr = np.asarray(arr)
window_size = int(window_size)
if arr.ndim != 2:
raise ValueError("need 2-D input")
if not (window_size > 0):
raise ValueError("need a positive window size")
shape = (arr.shape[0] - window_size + 1,
arr.shape[1] - window_size + 1,
window_size, window_size)
if shape[0] <= 0:
shape = (1, shape[1], arr.shape[0], shape[3])
if shape[1] <= 0:
shape = (shape[0], 1, shape[2], arr.shape[1])
strides = (arr.shape[1]*arr.itemsize, arr.itemsize,
arr.shape[1]*arr.itemsize, arr.itemsize)
return as_strided(arr, shape=shape, strides=strides)
def cell_neighbours(arr, i, j, d):
"""Return d-th neighbors of cell (i, j)"""
w = sliding_window(arr, 2*d+1)
ix = np.clip(i - d, 0, w.shape[0]-1)
jx = np.clip(j - d, 0, w.shape[1]-1)
i0 = max(0, i - d - ix)
j0 = max(0, j - d - jx)
i1 = w.shape[2] - max(0, d - i + ix)
j1 = w.shape[3] - max(0, d - j + jx)
return w[ix, jx][i0:i1,j0:j1].ravel()
from dataclasses import dataclass
#dataclass
class descent_step:
"""Class for storing each step taken in gradient descent"""
value: float
x_index: float
y_index: float
def gradient_descent_3d(array,x_start,y_start,steps=50,step_size=1,plot=False):
# Initial point to start gradient descent at
step = descent_step(array[y_start][x_start],x_start,y_start)
# Store each step taken in gradient descent in a list
step_history = []
step_history.append(step)
# Plot 2D representation of array with startng point as a red marker
if plot:
matplotlib.pyplot.imshow(array,origin='lower',cmap='terrain')
matplotlib.pyplot.plot(x_start,y_start,'ro')
current_x = x_start
current_y = y_start
# Loop through specified number of steps of gradient descent to take
for i in range(steps):
prev_x = current_x
prev_y = current_y
# Extract array of neighbouring cells around current step location with size nominated
neighbours=cell_neighbours(array,current_y,current_x,step_size)
# Locate minimum in array (steepest slope from current point)
next_step = neighbours.min()
indices = np.where(array == next_step)
# Update current point to now be the next point after stepping
current_x, current_y = (indices[1][0],indices[0][0])
step = descent_step(array[current_y][current_x],current_x,current_y)
step_history.append(step)
# Plot each step taken as a black line to the current point nominated by a red marker
if plot:
matplotlib.pyplot.plot([prev_x,current_x],[prev_y,current_y],'k-')
matplotlib.pyplot.plot(current_x,current_y,'ro')
# If step is to the same location as previously, this infers convergence and end loop
if prev_y == current_y and prev_x == current_x:
print(f"Converged in {i} steps")
break
return next_step,step_history
np.random.seed(42)
global_minimum = z.min()
indices = np.where(z == global_minimum)
print(f"Target: {global_minimum} # {indices}")
step_size = 0
found_minimum = 99999
# Random starting point
start_x = np.random.randint(0,50)
start_y = np.random.randint(0,50)
# Increase step size until convergence on global minimum
while found_minimum != global_minimum:
step_size += 1
found_minimum,steps = gradient_descent_3d(z,start_x,start_y,step_size=step_size,plot=False)
print(f"Optimal step size {step_size}")
found_minimum,steps = gradient_descent_3d(z,start_x,start_y,step_size=step_size,plot=True)
print(f"Steps: {steps}")
def multiDimenDist(point1,point2):
#find the difference between the two points, its really the same as below
deltaVals = [point2[dimension]-point1[dimension] for dimension in range(len(point1))]
runningSquared = 0
#because the pythagarom theorm works for any dimension we can just use that
for coOrd in deltaVals:
runningSquared += coOrd**2
return runningSquared**(1/2)
def findVec(point1,point2,unitSphere = False):
#setting unitSphere to True will make the vector scaled down to a sphere with a radius one, instead of it's orginal length
finalVector = [0 for coOrd in point1]
for dimension, coOrd in enumerate(point1):
#finding total differnce for that co-ordinate(x,y,z...)
deltaCoOrd = point2[dimension]-coOrd
#adding total difference
finalVector[dimension] = deltaCoOrd
if unitSphere:
totalDist = multiDimenDist(point1,point2)
unitVector =[]
for dimen in finalVector:
unitVector.append( dimen/totalDist)
return unitVector
else:
return finalVector
def generate_3d_plot(step_history):
# Initialise empty lists for markers
step_markers_x = []
step_markers_y = []
step_markers_z = []
step_markers_u = []
step_markers_v = []
step_markers_w = []
for index, step in enumerate(step_history):
step_markers_x.append(step.x_index)
step_markers_y.append(step.y_index)
step_markers_z.append(step.value)
# If we haven't reached the final step, calculate the vector between the current step and the next step
if index < len(steps)-1:
vec1 = [step.x_index,step.y_index,step.value]
vec2 = [steps[index+1].x_index,steps[index+1].y_index,steps[index+1].value]
result_vector = findVec(vec1,vec2)
step_markers_u.append(result_vector[0])
step_markers_v.append(result_vector[1])
step_markers_w.append(result_vector[2])
else:
step_markers_u.append(0.1)
step_markers_v.append(0.1)
step_markers_w.append(0.1)
# Include cones at each marker to show direction of step, scatter3d is to show the red line between points and surface for the terrain
fig = go.Figure(data=[
go.Cone(
x=step_markers_x,
y=step_markers_y,
z=step_markers_z,
u=step_markers_u,
v=step_markers_v,
w=step_markers_w,
sizemode="absolute",
sizeref=2,
anchor='tail'),
go.Scatter3d(
x=step_markers_x,
y=step_markers_y,
z=step_markers_z,
mode='lines',
line=dict(
color='red',
width=2
)),
go.Surface(colorscale=terrain,z=world,opacity=0.5)])
# Z axis is limited to the extent of the terrain array
fig.update_layout(
title='Gradient Descent Steps',
scene = dict(zaxis = dict(range=[world.min(),world.max()],),),)
return fig
# Generate 3D plot from previous random starting location
fig = generate_3d_plot(steps)
HTML(plotly.offline.plot(fig, filename='random_starting_point_3d_gradient_descent.html',include_plotlyjs='cdn'))
The error is occurring because found_minimum is an int, but global_minimum is a Series. I think the tutorial you're referencing assumes that the data is loaded as a numpy array, but it is never explicitly stated.
So, z = data.to_numpy() solves one problem and reveals another which is that the tutorial dataset is 50x50 and your data is 25x25. It's tempting to just change the limits of the random starting point, but this doesn't end up working well. The dataset is just too small for this implementation of gradient descent to appropriately converge.
To get around this issue, I just altered your dataset to manufacture a 50x50 set:
data_arr = data.to_numpy()
double_arr = np.append(data_arr, 1.5*data_arr + 50, axis=0)
quad_arr = np.append(double_arr, 1.5*double_arr + 50, axis=1)
Passing this quad_arr as needed throughout the code as well as updating the plotly colorscale to go.Surface(colorscale=Earth) gives:
Full, copy-pastable code:
from IPython.core.display import HTML
import plotly
import plotly.graph_objects as go
import noise
import numpy as np
import matplotlib
from mpl_toolkits.mplot3d import axes3d
%matplotlib inline
import pandas as pd
data = pd.read_csv('https://raw.githubusercontent.com/plotly/datasets/master/api_docs/mt_bruno_elevation.csv')
data_arr = data.to_numpy()
double_arr = np.append(data_arr, 1.5*data_arr + 50, axis=0)
quad_arr = np.append(double_arr, 1.5*double_arr + 50, axis=1)
z = quad_arr
matplotlib.pyplot.imshow(z,origin='lower',cmap='terrain')
# Find maximum value index in numpy array
indices = np.where(z == z.max())
max_z_x_location, max_z_y_location = (indices[1][0],indices[0][0])
matplotlib.pyplot.plot(max_z_x_location,max_z_y_location,'ro',markersize=15)
# Find minimum value index in numpy array
indices = np.where(z == z.min())
min_z_x_location, min_z_y_location = (indices[1][0],indices[0][0])
matplotlib.pyplot.plot(min_z_x_location,min_z_y_location,'yo',markersize=15)
import numpy as np
from numpy.lib.stride_tricks import as_strided
def sliding_window(arr, window_size):
""" Construct a sliding window view of the array"""
arr = np.asarray(arr)
window_size = int(window_size)
if arr.ndim != 2:
raise ValueError("need 2-D input")
if not (window_size > 0):
raise ValueError("need a positive window size")
shape = (arr.shape[0] - window_size + 1,
arr.shape[1] - window_size + 1,
window_size, window_size)
if shape[0] <= 0:
shape = (1, shape[1], arr.shape[0], shape[3])
if shape[1] <= 0:
shape = (shape[0], 1, shape[2], arr.shape[1])
strides = (arr.shape[1]*arr.itemsize, arr.itemsize,
arr.shape[1]*arr.itemsize, arr.itemsize)
return as_strided(arr, shape=shape, strides=strides)
def cell_neighbours(arr, i, j, d):
"""Return d-th neighbors of cell (i, j)"""
w = sliding_window(arr, 2*d+1)
ix = np.clip(i - d, 0, w.shape[0]-1)
jx = np.clip(j - d, 0, w.shape[1]-1)
i0 = max(0, i - d - ix)
j0 = max(0, j - d - jx)
i1 = w.shape[2] - max(0, d - i + ix)
j1 = w.shape[3] - max(0, d - j + jx)
return w[ix, jx][i0:i1,j0:j1].ravel()
from dataclasses import dataclass
#dataclass
class descent_step:
"""Class for storing each step taken in gradient descent"""
value: float
x_index: float
y_index: float
def gradient_descent_3d(array,x_start,y_start,steps=50,step_size=1,plot=False):
# Initial point to start gradient descent at
step = descent_step(array[y_start][x_start],x_start,y_start)
# Store each step taken in gradient descent in a list
step_history = []
step_history.append(step)
# Plot 2D representation of array with startng point as a red marker
if plot:
matplotlib.pyplot.imshow(array,origin='lower',cmap='terrain')
matplotlib.pyplot.plot(x_start,y_start,'ro')
current_x = x_start
current_y = y_start
# Loop through specified number of steps of gradient descent to take
for i in range(steps):
prev_x = current_x
prev_y = current_y
# Extract array of neighbouring cells around current step location with size nominated
neighbours=cell_neighbours(array,current_y,current_x,step_size)
# Locate minimum in array (steepest slope from current point)
next_step = neighbours.min()
indices = np.where(array == next_step)
# Update current point to now be the next point after stepping
current_x, current_y = (indices[1][0],indices[0][0])
step = descent_step(array[current_y][current_x],current_x,current_y)
step_history.append(step)
# Plot each step taken as a black line to the current point nominated by a red marker
if plot:
matplotlib.pyplot.plot([prev_x,current_x],[prev_y,current_y],'k-')
matplotlib.pyplot.plot(current_x,current_y,'ro')
# If step is to the same location as previously, this infers convergence and end loop
if prev_y == current_y and prev_x == current_x:
print(f"Converged in {i} steps")
break
return next_step,step_history
np.random.seed(42)
global_minimum = z.min()
indices = np.where(z == global_minimum)
print(f"Target: {global_minimum} # {indices}")
step_size = 0
found_minimum = 99999
# Random starting point
start_x = np.random.randint(0,50)
start_y = np.random.randint(0,50)
# Increase step size until convergence on global minimum
print('==========================')
print(found_minimum)
print(global_minimum)
print('==========================')
while found_minimum != global_minimum:
step_size += 1
try:
found_minimum,steps = gradient_descent_3d(z,start_x,start_y,step_size=step_size,plot=True)
except ValueError:
pass
print(f"Optimal step size {step_size}")
found_minimum,steps = gradient_descent_3d(z,start_x,start_y,step_size=step_size,plot=True)
print(f"Steps: {steps}")
def multiDimenDist(point1,point2):
#find the difference between the two points, its really the same as below
deltaVals = [point2[dimension]-point1[dimension] for dimension in range(len(point1))]
runningSquared = 0
#because the pythagarom theorm works for any dimension we can just use that
for coOrd in deltaVals:
runningSquared += coOrd**2
return runningSquared**(1/2)
def findVec(point1,point2,unitSphere = False):
#setting unitSphere to True will make the vector scaled down to a sphere with a radius one, instead of it's orginal length
finalVector = [0 for coOrd in point1]
for dimension, coOrd in enumerate(point1):
#finding total differnce for that co-ordinate(x,y,z...)
deltaCoOrd = point2[dimension]-coOrd
#adding total difference
finalVector[dimension] = deltaCoOrd
if unitSphere:
totalDist = multiDimenDist(point1,point2)
unitVector =[]
for dimen in finalVector:
unitVector.append( dimen/totalDist)
return unitVector
else:
return finalVector
def generate_3d_plot(step_history):
# Initialise empty lists for markers
step_markers_x = []
step_markers_y = []
step_markers_z = []
step_markers_u = []
step_markers_v = []
step_markers_w = []
for index, step in enumerate(step_history):
step_markers_x.append(step.x_index)
step_markers_y.append(step.y_index)
step_markers_z.append(step.value)
# If we haven't reached the final step, calculate the vector between the current step and the next step
if index < len(steps)-1:
vec1 = [step.x_index,step.y_index,step.value]
vec2 = [steps[index+1].x_index,steps[index+1].y_index,steps[index+1].value]
result_vector = findVec(vec1,vec2)
step_markers_u.append(result_vector[0])
step_markers_v.append(result_vector[1])
step_markers_w.append(result_vector[2])
else:
step_markers_u.append(0.1)
step_markers_v.append(0.1)
step_markers_w.append(0.1)
# Include cones at each marker to show direction of step, scatter3d is to show the red line between points and surface for the terrain
fig = go.Figure(data=[
go.Cone(
x=step_markers_x,
y=step_markers_y,
z=step_markers_z,
u=step_markers_u,
v=step_markers_v,
w=step_markers_w,
sizemode="absolute",
sizeref=2,
anchor='tail'),
go.Scatter3d(
x=step_markers_x,
y=step_markers_y,
z=step_markers_z,
mode='lines',
line=dict(
color='red',
width=2
)),
go.Surface(colorscale='Earth', z=quad_arr,opacity=0.5)])
# Z axis is limited to the extent of the terrain array
fig.update_layout(
title='Gradient Descent Steps',
scene = dict(zaxis = dict(range=[quad_arr.min(),quad_arr.max()],),),)
return fig
# Generate 3D plot from previous random starting location
fig = generate_3d_plot(steps)
HTML(plotly.offline.plot(fig, filename='random_starting_point_3d_gradient_descent.html',include_plotlyjs='cdn'))

Something wrong with Sigmoid curve for Logistic Regression

I'm trying to use logistic regression on the popularity of hits songs on Spotify from 2010-2019 based on their durations and durability, whose data are collected from a .csv file. Basically, since the popularity values of each song are numerical, I have converted each of them to binary numbers "0" to "1". If the popularity value of a hit song is less than 70, I will replace its current value to 0, and vice versa if its value is more than 70. For some reason, as the rest of my code is pretty standard in creating a sigmoid function, the end result is a straight line instead of a sigmoid curve.
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
df = pd.read_csv('top10s [SubtitleTools.com] (2).csv')
BPM = df.bpm
BPM = np.array(BPM)
Energy = df.nrgy
Energy = np.array(Energy)
Dance = df.dnce
Dance = np.array(Dance)
dB = df.dB
dB = np.array(dB)
Live = df.live
Live = np.array(Live)
Valence = df.val
Valence = np.array(Valence)
Acous = df.acous
Acous = np.array(Acous)
Speech = df.spch
Speech = np.array(Speech)
df.loc[df['popu'] <= 70, 'popu'] = 0
df.loc[df['popu'] > 70, 'popu'] = 1
def Logistic_Regression(X, y, iterations, alpha):
ones = np.ones((X.shape[0], ))
X = np.vstack((ones, X))
X = X.T
b = np.zeros(X.shape[1])
for i in range(iterations):
z = np.dot(X, b)
p_hat = sigmoid(z)
gradient = np.dot(X.T, (y - p_hat))
b = b + alpha * gradient
if (i % 1000 == 0):
print('LL, i ', log_likelihood(X, y, b), i)
return b
def sigmoid(z):
return 1 / (1 + np.exp(-z))
def log_likelihood(X, y, b):
z = np.dot(X, b)
LL = np.sum(y*z - np.log(1 + np.exp(z)))
return LL
def LR1():
Dur = df.dur
Dur = np.array(Dur)
Pop = df.popu
Pop = [int(i) for i in Pop]; Pop = np.array(Pop)
plt.figure(figsize=(10,8))
colormap = np.array(['r', 'b'])
plt.scatter(Dur, Pop, c = colormap[Pop], alpha = .4)
b = Logistic_Regression(Dur, Pop, iterations = 8000, alpha = 0.00005)
print('Done')
p_hat = sigmoid(np.dot(Dur, b[1]) + b[0])
idxDur = np.argsort(Dur)
plt.plot(Dur[idxDur], p_hat[idxDur])
plt.show()
LR1()
df
Your logreg params arent coming out correctly, thus something is wrong in your gradient descent.
If I do
from sklearn.linear_model import LogisticRegression
df = pd.DataFrame({'popu':[0,1,0,1,1,0,0,1,0,0],'dur'[217,283,200,295,221,176,206,260,217,213]})
logreg = LogisticRegression()
logreg.fit(Dur.reshape([10,1]),Pop.reshape([10,1]))
print(logreg.coef_)
print(logreg.intercept_)
I get [0.86473507, -189.79655798]
whereas your params (b) come out [0.012136874150412973 -0.2430389407767768] for this data.
Plot of your vs scikit logregs here

Fastest way to perform sparse matrix multiplications in Python

BACKGROUND: I am trying to build a real-time drum simulation model, for which I need really fast matrix-vector products. My matrices are of the size ~5000-10000 rows/cols, out of which only 6 entries per row are non-zero, hence I am inclined to use sparse matrices. I am using scipy.sparse module. The iterations are as below.
Vjk_plus_sparse = Vjk_minus_sparse.transpose()
Vj = Vjk_plus_sparse.dot(constant)
np.put(Vj, Nr, 0.0)
Uj[t] = Uj[t-1] + np.transpose(Vj)/fs
Vj_mat = adj_mat_sparse.multiply(Vj)
Vjk_minus_sparse = Vj_mat-Vjk_plus_sparse.multiply(end_gain)
Here, Vjk_plus_sparse, Vjk_minus_sparse and Vj_mat are sparse CSR matrices, Vj is a numpy array, and Uj is a numpy matrix where each row represents Uj(t). end_gain is an array which is a static numpy array for dampening of vibrations.
THE ISSUE: A single iteration takes about 3 ms for size = 4250. With the most significant
steps being the last 2 lines. They together take about 2.5 ms. I would ideally need it to run in 0.1 ms, which would be more than a 10x speedup. This is the maximum extent of vectorization possible for the problem, and I cannot parallelize as I am marching in time, at least physically it won't be accurate.
ATTEMPTS: I tried fiddling with the sparse data structures, and found best performance with all of them being CSR (Compressed Sparse Row), with the values as quoted above. I also tried to replace the multiply() method with a matrix multiplication, by repeating Vj, but that worsened the time, as the resultant operation would be a sparse*dense operation.
How can I speed this up within python itself? I am open to trying c++ as well, though migrating now would be a major pain. Also, since scipy is essentially based in c, would it even give that much of a speedup?
Added a complete runnable example
import matplotlib.pyplot as plt
import matplotlib as mpl
import matplotlib.patches
import math
from mpl_toolkits import mplot3d
import numpy as np
import scipy.sparse as sp
import scipy.fftpack as spf
import matplotlib.animation as animation
import time
sqrt_3 = 1.73205080757
class Pt:
def __init__(self,x_0,y_0):
self.x_0 = x_0
self.y_0 = y_0
self.id = -1
self.neighbours = []
self.distance = (x_0**2 + y_0**2)**0.5
class Circle:
def __init__(self,radius,center):
self.radius = radius
self.center = center
self.nodes = []
def construct_mesh(self, unit):
queue = [self.center]
self.center.distance = 0
curr_id = 0
delta = [(1.,0.), (1./2, (3**0.5)/2),(-1./2, (3**0.5)/2),(-1.,0.), (-1./2,-(3**0.5)/2), (1./2,- (3**0.5)/2)]
node_dict = {}
node_dict[(self.center.x_0,self.center.y_0)] = curr_id
self.nodes.append(self.center)
curr_id+=1
while len(queue)!=0:
curr_pt = queue[0]
queue.pop(0)
# self.nodes.append(curr_pt)
# curr_id+=1
for i in delta:
temp_pt = Pt(curr_pt.x_0 + 2*unit*i[0], curr_pt.y_0 + 2*unit*i[1])
temp_pt.id = curr_id
temp_pt.distance = (temp_pt.x_0 ** 2 + temp_pt.y_0 ** 2)**0.5
# curr_id+=1
if (round(temp_pt.x_0,5), round(temp_pt.y_0,5)) not in node_dict and temp_pt.distance <= self.radius:
# print(temp_pt.x_0, temp_pt.y_0)
self.nodes.append(temp_pt)
node_dict[(round(temp_pt.x_0,5), round(temp_pt.y_0,5))] = curr_id
curr_id+=1
queue.append(temp_pt)
curr_pt.neighbours.append(temp_pt.id)
elif temp_pt.distance <= self.radius:
curr_pt.neighbours.append(node_dict[round(temp_pt.x_0,5), round(temp_pt.y_0,5)])
# print(node_dict)
def plot_neighbours(self, pt):
x = []
y = []
x.append(pt.x_0)
y.append(pt.y_0)
for i in (pt.neighbours):
x.append(self.nodes[i].x_0)
y.append(self.nodes[i].y_0)
plt.scatter(x,y)
plt.axis('scaled')
def boundary_node_ids(self):
boundary_nodes = []
for j in range(len(self.nodes)):
if(len(self.nodes[j].neighbours) < 6):
boundary_nodes.append(j)
return boundary_nodes
def add_rim(self, boundary_node_ids, unit):
c = self.center
rim_ids = []
N = len(self.nodes)
for i in range(len(boundary_node_ids)):
d = self.nodes[boundary_node_ids[i]].distance
xp = self.nodes[boundary_node_ids[i]].x_0
yp = self.nodes[boundary_node_ids[i]].y_0
xnew = xp + xp*unit/d
ynew = yp + yp*unit/d
new_point = Pt(xnew, ynew)
new_point.id = N + i
rim_ids.append(N+i)
self.nodes.append(new_point)
self.nodes[boundary_node_ids[i]].neighbours.append(new_point.id)
self.nodes[N+i].neighbours.append(boundary_node_ids[i])
return rim_ids
def find_nearest_point(mesh, pt):
distances_from_center = np.zeros(len(mesh.nodes))
for i in xrange(len(mesh.nodes)):
distances_from_center[i] = mesh.nodes[i].distance
target_distance = pt.distance
closest_point_id = np.argmin(np.abs(distances_from_center-target_distance))
return closest_point_id
def init_impulse(mesh, impulse, Vj, poi, roi):
data = []
for i in range(len(Vj)):
r = ((mesh.nodes[i].x_0 - mesh.nodes[poi].x_0)**2 + (mesh.nodes[i].y_0 - mesh.nodes[poi].y_0)**2)**0.5
Vj[i] = max(0, impulse*(1. - (r/roi)))
if i in Nr:
Vj[i] = 0.
for k in mesh.nodes[i].neighbours:
data.append(np.asscalar(Vj[i])/2.)
return Vj, data
r = 0.1016 #Radius of drum head
# rho = 2500 #Density of drum head
thickness = 0.001 #Thickness of membrane
# tension = 1500 #Tension in membrane in N
param = 0.9
c = (param/thickness)**(0.5) #Speed of wave in string
duration = 0.25
fs = 4000
delta = c/fs
center = Pt(0,0)
point_of_impact = Pt(r/2., 0)
center.id = 0
mesh = Circle(r,center)
mesh.construct_mesh(delta)
N = len(mesh.nodes)
Nb = []
for j in range(N):
if len(mesh.nodes[j].neighbours) < 6:
Nb.append(j)
Nr = mesh.add_rim(Nb, delta)
N = len(mesh.nodes)
print(N)
row_ind = []
col_ind = []
for j in range(N):
for k in mesh.nodes[j].neighbours:
row_ind.append(j)
col_ind.append(k)
data = np.ones(len(col_ind))
adj_mat_sparse = sp.csr_matrix((data, (row_ind, col_ind)), shape = (N,N))
Vjk_plus = sp.csr_matrix([N, N])
Vj = np.zeros([N,1])
Uj = np.zeros([int(duration*fs), N])
Vj_mat = sp.csc_matrix([N,N])
closest_point_id = find_nearest_point(mesh, point_of_impact)
Vj, Vjk_data = init_impulse(mesh, -10.0, Vj, closest_point_id, r/10.)
Vjk_minus_sparse = sp.csr_matrix((Vjk_data, (row_ind, col_ind)), shape = (N,N))
constant = (1./3)*np.ones([N,1])
Vjk_plus = Vjk_minus_sparse.transpose()
np.put(Vj, Nr, 0.0)
Uj[1] = Uj[0] + np.transpose(Vj)/fs
Vj_mat = adj_mat_sparse.multiply(Vj)
Vjk_minus_sparse = Vj_mat - Vjk_plus
end_gain = np.ones([N,1])
end_gain[Nr] = 1.0
for t in range(2,int(duration*fs)):
Vjk_plus = Vjk_minus_sparse.transpose()
Vj = Vjk_plus.dot(constant)
np.put(Vj, Nr, 0.0)
Uj[t] = Uj[t-1] + np.transpose(Vj)/fs
Vj_mat = adj_mat_sparse.multiply(Vj)
Vjk_minus_sparse = Vj_mat-Vjk_plus.multiply(end_gain)

Questions on Logistic Regression

I'm now using the training set from OpenClassroom(http://openclassroom.stanford.edu/MainFolder/DocumentPage.php?course=DeepLearning&doc=exercises/ex4/ex4.html) to give it a try on Logistic Regression, and I only use LR,unlike that page which uses LR and Newton's methods.
below is my code:
from numpy import *
import matplotlib.pyplot as plt
def loadDataSet():
dataMat = []; labelMat = []
frX = open('../ex4x.dat')
frY = open('../ex4y.dat')
for line1 in frX.readlines():
lineArr1 = line1.strip().split()
dataMat.append([1.0, float(lineArr1[0]), float(lineArr1[1])])
for line2 in frY.readlines():
lineArr2 = line2.strip().split()
labelMat.append(float(lineArr2[0]))
return dataMat,labelMat
def sigmoid(inX):
return 1.0/(1+exp(-inX))
# def autoNorm(dataSet):
# # newValue = (oldValue-min)/(max-min)
# minVals = min(dataSet)
# maxVals = max(dataSet)
# ranges = list(map(lambda x: x[0]-x[1], zip(maxVals, minVals)))
# normDataSet = zeros(shape(dataSet))
# m,n = shape(dataSet)
# normDataSet = list(map(lambda x: x[0]-x[1], zip(dataSet,tile(minVals, (m,1)))))
# normDataSet = normDataSet/tile(ranges, (m,1))
# return normDataSet, ranges, minVals
def gradDescent(dataMatIn, classLabels):
x = mat(dataMatIn)
y = mat(classLabels).transpose()
m,n = shape(x)
alpha = 0.001
maxCycles = 100000
theta = ones((n,1))
for k in range(maxCycles):
h = sigmoid(x*theta)
error = h - y
cost = -1*dot(log(h).T,y)-dot((1-y).T,log(1-h))
print("Iteration %d | Cost: %f" % (k, cost))
theta = theta - alpha * (x.transpose() * error /m)
return theta
def plotBestFit(weights):
dataMat,labelMat=loadDataSet()
dataArr = array(dataMat)
n = shape(dataArr)[0]
xcord1 = []; ycord1 = []
xcord2 = []; ycord2 = []
for i in range(n):
if int(labelMat[i])== 1:
xcord1.append(dataArr[i,1]);ycord1.append(dataArr[i,2])
else:
xcord2.append(dataArr[i,1]);ycord2.append(dataArr[i,2])
fig = plt.figure()
ax = fig.add_subplot(111)
ax.scatter(xcord1, ycord1, s=30, c='red', marker='s')
ax.scatter(xcord2, ycord2, s=30, c='green')
min_x = min(mat(dataMat)[:, 1])
max_x = max(mat(dataMat)[:, 1])
x = arange(min_x, max_x, 1)
y = (-weights[0]-weights[1]*x)/weights[2]
ax.plot(x, y)
plt.xlabel('X1'); plt.ylabel('X2');
plt.show()
dataMat, classLabel = loadDataSet()
weights = gradDescent(dataMat, classLabel)
print weights
plotBestFit(weights.getA())
here is my questions:
1. I trained it for 100,000 times, with error was printed each iteration, I didn't see it converaged anyway, well, actually I'm not sure here.
2. I'm not sure how to paint the classifier correctly by matplotlib, when the maxCycle is 200,000, I can get a somewhat reasonable classifier as well as the maxCyle is 100,000, the paint seems not reasonable at all.
maxCycle is 100,000
UPDATE CODE:
count = 0
for i in range(80):
result = sigmoid(dataMat[i] * weights)
if result > 0.5:
a = 1
else:
a = 0
if float(a) != classLabel[i][0]:
count += 1
errorRate = (float(count)/80)
print "error count is: %f, error rate is: %f" %(count,errorRate)
Your code is actually fine! Here are some remarks:
You initialized the thetas with all ones. I would not do so in this example. The first call of the sigmoid function will return values close to 1, because the product of theta and x gives very large numbers. The computation of log(1 - h) can result in error, because log is not defined at 0. I prefer to initialize thetas with 0's.
When calculating the cost function you missed the division by m. It does not matter for the algorithm, but it's better to follow the theory.
It's a good idea to plot the cost function, and not just print its values. The correct trend can be seen very clearly.
In order to converge, this particular example needs much more iterations. I reached a good result at 500.000 iterations.
The post has been updated, see the UPDATE below
Here are my plots:
As you can see the resulting separation line matches the plot shown in your tutorial very well.
Here is my code. It differs a little bit from yours, but they are very similar.
import numpy as np
import matplotlib.pyplot as plt
def loadDataSet():
dataMat = []; labelMat = []
frX = open('../ex4x.dat')
frY = open('../ex4y.dat')
for line1 in frX.readlines():
lineArr1 = line1.strip().split()
dataMat.append([1.0, float(lineArr1[0]), float(lineArr1[1])])
for line2 in frY.readlines():
lineArr2 = line2.strip().split()
labelMat.append([float(lineArr2[0])])
return dataMat,labelMat
def sigmoid(inX):
return 1.0/(1+np.exp(-inX))
def gradDescent(dataMatIn, classLabels, alpha, maxCycles):
x = np.mat(dataMatIn)
y = np.mat(classLabels)
m,n = np.shape(x)
n = n - 1 #usually n is the number of features (without the 1's)
theta = np.zeros((n+1,1))
cost_history = [] #list to accumulate the cost values
for k in range(maxCycles):
h = sigmoid(x*theta)
cost = ((-np.multiply(y, np.log(h)) -np.multiply(1-y, np.log(1-h))).sum(axis=0)/m)[0, 0]
if ((k % 1000) == 0):
cost_history.append(cost) #on each 1000th iteration the cost is saved to a list
grad = (x.transpose() * (h - y))/m
theta = theta - alpha*grad
plot_cost = 1
if (plot_cost == 1):
plt.plot(cost_history)
plt.title("Cost")
plt.show()
return theta
def plotBestFit(dataMat, classLabel, weights):
arrY = np.asarray(classLabel)
arrX = np.asarray(dataMat)
ind1 = np.where(arrY == 1)[0]
ind0 = np.where(arrY == 0)[0]
min_x1 = min(np.mat(dataMat)[:, 1])
max_x1 = max(np.mat(dataMat)[:, 1])
x1_val = np.arange(min_x1, max_x1, 1)
x2_val = (-weights[0, 0]-weights[1, 0]*x1_val)/weights[2, 0]
plt.scatter(arrX[ind1, 1], arrX[ind1, 2], s=30, c='red', marker='s')
plt.scatter(arrX[ind0, 1], arrX[ind0, 2], s=30, c='blue', marker='s')
plt.plot(x1_val, x2_val)
plt.xlabel('X1', fontsize=18)
plt.ylabel('X2', fontsize=18)
plt.title("Separation border")
plt.show()
dataMat, classLabel = loadDataSet()
weights = gradDescent(dataMat, classLabel, 0.0014, 500000)
print(weights)
plotBestFit(dataMat, classLabel, weights)
UPDATE
After reading your questions in the comments to the first edition of the post I tried to optimize the code to achieve the convergence of the cost function using much smaller number of iterations.
Indeed the feature standardization makes miracles :)
An even better result was achieved after only 30 iterations!
Here are the new plots:
Because of the standardization you need to scale each new test example, in order to classify it.
Here is the new code. I changed some data types to avoid unnecessary data type conversions.
import numpy as np
import matplotlib.pyplot as plt
def loadDataSet():
dataMat = []; labelMat = []
frX = open('../ex4x.dat')
frY = open('../ex4y.dat')
for line1 in frX.readlines():
lineArr1 = line1.strip().split()
dataMat.append([1.0, float(lineArr1[0]), float(lineArr1[1])])
for line2 in frY.readlines():
lineArr2 = line2.strip().split()
labelMat.append([float(lineArr2[0])])
return np.asarray(dataMat), np.asarray(labelMat)
def sigmoid(inX):
return 1.0/(1+np.exp(-inX))
def gradDescent(x, y, alpha, maxCycles):
m,n = np.shape(x)
n = n - 1 #usually n is the number of features (without the 1's)
theta = np.zeros((n+1,1))
cost_history = [] #list to accumulate the cost values
cost_iter = []
for k in range(maxCycles):
h = sigmoid(np.dot(x, theta))
cost = np.sum(-np.multiply(y, np.log(h)) -np.multiply(1-y, np.log(1-h)))/m
cost_history.append(cost) #on each 1000th iteration the cost is saved to a list
cost_iter.append(k)
grad = np.dot(x.transpose(), (h - y))/m
theta = theta - alpha*grad
plot_cost = 1
if (plot_cost == 1):
plt.plot(cost_iter, cost_history)
plt.title("Cost")
plt.show()
return theta
def plotBestFit(arrX, arrY, weights):
ind1 = np.where(arrY == 1)[0]
ind0 = np.where(arrY == 0)[0]
min_x1 = min(arrX[:, 1:2])
max_x1 = max(arrX[:, 1:2])
x1_val = np.arange(min_x1, max_x1, 0.1)
x2_val = (-weights[0, 0]-weights[1, 0]*x1_val)/weights[2, 0]
plt.scatter(arrX[ind1, 1], arrX[ind1, 2], s=30, c='red', marker='s')
plt.scatter(arrX[ind0, 1], arrX[ind0, 2], s=30, c='blue', marker='s')
plt.plot(x1_val, x2_val)
plt.xlabel('X1', fontsize=18)
plt.ylabel('X2', fontsize=18)
plt.title("Separation border")
plt.show()
dataMat, classLabel = loadDataSet()
m = np.shape(dataMat)[0]
#standardization
dataMatMean = np.mean(dataMat, axis=0)
dataMatStd = np.std(dataMat, axis=0)
dataMatMean_m = np.tile(dataMatMean, (m, 1))
dataMatStd_m = np.tile(dataMatStd, (m, 1))
dataMatStand = np.copy(dataMat)
dataMatStand[:, 1:3] = np.divide( (dataMatStand[:, 1:3] - dataMatMean_m[:, 1:3]), dataMatStd_m[:, 1:3])
weights = gradDescent(dataMatStand, classLabel, 1.0, 30)
print(weights)
plotBestFit(dataMatStand, classLabel, weights)

Categories

Resources