I wrote a GA program with python with 1 input, output and it works fine. But I want to find a solution with more input and output but I don't know how.
Example from https://pygad.readthedocs.io/:
Given function: y = f(w1:w6) = w1x1 + w2x2 + w3x3 + w4x4 + w5x5 + 6wx6
with
input(x1:x6)=(4,-2,3.5,5,-11,-4.7) and y=44
solution = (w1:w6)
But I want to find a solution with more input and output like input1 = (1,5,-3,5,-1,-4), y1 = 50.
Thanks for using PyGAD.
You can find the example you are looking for at this script. Here is the code:
import pygad
import numpy
"""
Given the following function:
y = f(w1:w6) = w1x1 + w2x2 + w3x3 + w4x4 + w5x5 + 6wx6
where (x1,x2,x3,x4,x5,x6)=(4,-2,3.5,5,-11,-4.7) and y=44
What are the best values for the 6 weights (w1 to w6)? We are going to use the genetic algorithm to optimize this function.
"""
function_inputs = [4,-2,3.5,5,-11,-4.7] # Function inputs.
desired_output = 44 # Function output.
def fitness_func(solution, solution_idx):
# Calculating the fitness value of each solution in the current population.
# The fitness function calulates the sum of products between each input and its corresponding weight.
output = numpy.sum(solution*function_inputs)
# The value 0.000001 is used to avoid the Inf value when the denominator numpy.abs(output - desired_output) is 0.0.
fitness = 1.0 / (numpy.abs(output - desired_output) + 0.000001)
return fitness
fitness_function = fitness_func
num_generations = 100 # Number of generations.
num_parents_mating = 10 # Number of solutions to be selected as parents in the mating pool.
# To prepare the initial population, there are 2 ways:
# 1) Prepare it yourself and pass it to the initial_population parameter. This way is useful when the user wants to start the genetic algorithm with a custom initial population.
# 2) Assign valid integer values to the sol_per_pop and num_genes parameters. If the initial_population parameter exists, then the sol_per_pop and num_genes parameters are useless.
sol_per_pop = 20 # Number of solutions in the population.
num_genes = len(function_inputs)
init_range_low = -2
init_range_high = 5
parent_selection_type = "sss" # Type of parent selection.
keep_parents = -1 # Number of parents to keep in the next population. -1 means keep all parents and 0 means keep nothing.
crossover_type = "single_point" # Type of the crossover operator.
# Parameters of the mutation operation.
mutation_type = "random" # Type of the mutation operator.
mutation_percent_genes = 10 # Percentage of genes to mutate. This parameter has no action if the parameter mutation_num_genes exists or when mutation_type is None.
last_fitness = 0
def callback_generation(ga_instance):
global last_fitness
print("Generation = {generation}".format(generation=ga_instance.generations_completed))
print("Fitness = {fitness}".format(fitness=ga_instance.best_solution()[1]))
print("Change = {change}".format(change=ga_instance.best_solution()[1] - last_fitness))
last_fitness = ga_instance.best_solution()[1]
# Creating an instance of the GA class inside the ga module. Some parameters are initialized within the constructor.
ga_instance = pygad.GA(num_generations=num_generations,
num_parents_mating=num_parents_mating,
fitness_func=fitness_function,
sol_per_pop=sol_per_pop,
num_genes=num_genes,
init_range_low=init_range_low,
init_range_high=init_range_high,
parent_selection_type=parent_selection_type,
keep_parents=keep_parents,
crossover_type=crossover_type,
mutation_type=mutation_type,
mutation_percent_genes=mutation_percent_genes,
on_generation=callback_generation)
# Running the GA to optimize the parameters of the function.
ga_instance.run()
# After the generations complete, some plots are showed that summarize the how the outputs/fitenss values evolve over generations.
ga_instance.plot_result()
# Returning the details of the best solution.
solution, solution_fitness, solution_idx = ga_instance.best_solution()
print("Parameters of the best solution : {solution}".format(solution=solution))
print("Fitness value of the best solution = {solution_fitness}".format(solution_fitness=solution_fitness))
print("Index of the best solution : {solution_idx}".format(solution_idx=solution_idx))
prediction = numpy.sum(numpy.array(function_inputs)*solution)
print("Predicted output based on the best solution : {prediction}".format(prediction=prediction))
if ga_instance.best_solution_generation != -1:
print("Best fitness value reached after {best_solution_generation} generations.".format(best_solution_generation=ga_instance.best_solution_generation))
If you have any questions, please let me know!
Related
I'm trying to find the global minimum of the function from the hundred digit hundred dollars challenge, question #4 as an exercise for simulated annealing.
As the basis of my understanding and approach to writing the code, I refer to the global optimization algorithms version 3 book which is found for free online.
Consequently, I've initially come up with the following code:
The noisy func:
def noisy_func(x, y):
return (math.exp(math.sin(50*x)) +
math.sin(60*math.exp(y)) +
math.sin(70*math.sin(x)) +
math.sin(math.sin(80*y)) -
math.sin(10*(x + y)) +
0.25*(math.pow(x, 2) +
math.pow(y, 2)))
The function used to mutate the values:
def mutate(X_Value, Y_Value):
mutationResult_X = X_Value + randomNumForInput()
mutationResult_Y = Y_Value + randomNumForInput()
while mutationResult_X > 4 or mutationResult_X < -4:
mutationResult_X = X_Value + randomNumForInput()
while mutationResult_Y > 4 or mutationResult_Y < -4:
mutationResult_Y = Y_Value + randomNumForInput()
mutationResults = [mutationResult_X, mutationResult_Y]
return mutationResults
randomNumForInput simply returns a random number between 4 and -4. (Interval Limits for the search.) Hence it is equivalent to random.uniform(-4, 4).
This is the central function of the program.
def simulated_annealing(f):
"""Peforms simulated annealing to find a solution"""
#Start by initializing the current state with the initial state
#acquired by a random generation of a number and then using it
#in the noisy func, also set solution(best_state) as current_state
#for a start
pCurSelect = [randomNumForInput(),randomNumForInput()]
current_state = f(pCurSelect[0],pCurSelect[1])
best_state = current_state
#Begin time monitoring, this will represent the
#Number of steps over time
TimeStamp = 1
#Init current temp via the func, using such values as to get the initial temp
initial_temp = 100
final_temp = .1
alpha = 0.001
num_of_steps = 1000000
#calculates by how much the temperature should be tweaked
#each iteration
#suppose the number of steps is linear, we'll send in 100
temp_Delta = calcTempDelta(initial_temp, final_temp, num_of_steps)
#set current_temp via initial temp
current_temp = getTemperature(initial_temp, temp_Delta)
#max_iterations = 100
#initial_temp = get_Temperature_Poly(TimeStamp)
#current_temp > final_temp
while current_temp > final_temp:
#get a mutated value from the current value
#hence being a 'neighbour' value
#with it, acquire the neighbouring state
#to the current state
neighbour_values = mutate(pCurSelect[0], pCurSelect[1])
neighbour_state = f(neighbour_values[0], neighbour_values[1])
#calculate the difference between the newly mutated
#neighbour state and the current state
delta_E_Of_States = neighbour_state - current_state
# Check if neighbor_state is the best state so far
# if the new solution is better (lower), accept it
if delta_E_Of_States <= 0:
pCurSelect = neighbour_values
current_state = neighbour_state
if current_state < best_state:
best_state = current_state
# if the new solution is not better, accept it with a probability of e^(-cost/temp)
else:
if random.uniform(0, 1) < math.exp(-(delta_E_Of_States) / current_temp):
pCurSelect = neighbour_values
current_state = neighbour_state
# Here, we'd decrement the temperature or increase the timestamp, normally
"""current_temp -= alpha"""
#print("Run number: " + str(TimeStamp) + " current_state = " + str(current_state) )
#increment TimeStamp
TimeStamp = TimeStamp + 1
# calc temp for next iteration
current_temp = getTemperature(current_temp, temp_Delta)
#print("Iteration Count: " + str(TimeStamp))
return best_state
alpha is not used for this implementation, however temperature is moderated linearly using the following funcs:
def calcTempDelta(T_Initial, T_Final, N):
return((T_Initial-T_Final)/N)
def getTemperature(T_old, T_new):
return (T_old - T_new)
This is how I implemented the solution described in page 245 of the book. However, this implementation does not return to me the global minimum of the noisy function, but rather, one of its near-by local minimum.
The reasons I implemented the solution in this way is two fold:
It has been provided to me as a working example of a linear temperature moderation, and thus a working template.
Although I have tried to understand the other forms of temperature moderation laid out in the book in pages 248-249, it is not entirely clear to me how the variable "Ts" is calculated, and even after trying to look through some of the cited sources the book references, it remains esoteric for me still. Thus I figured, I'd rather try to make this "simple" solution work correctly first, before proceeding to attempt other approaches of temperature quenching (logarithmic, exponential, etc).
Since then I have tried in numerous ways to acquire the global minimum of the noisy func through various different iterations of the code, which would be too much to post here all at once. I've tried different rewrites of this code:
Decrease the randomly rolled number over each iteration as in order to search within a smaller scope every time, this has resulted in more consistent but still incorrect results.
Mutate by different increments, so lets say, between -1 and 1, etc. Same effect.
Rewrite mutate as in order to examine the neighbouring points to the current point via some step size, and examine neighboring points by adding/reducing said step size from the current point's x/y values, checking the differences between the newly generated point and the current point (the delta of E's, basically), and return the appropriate values with whichever one produced the lowest distance to the current function, thus being its closest proximity neighbour.
Reduce the intervals limits over which the search occurs.
It is in these, the solutions involving step-size/reducing limits/checking neighbours by quadrants that I have used movements comprised of some constant alpha times the time_stamp.
These and other solutions which I've attempted have not worked, either producing even less accurate results (albeit in some cases more consistent results) or in one case, not working at all.
Therefore I must be missing something, whether its to do with the temperature moderation, or the precise way (formula) by which I'm supposed to make the next step (mutate) in the algorithm.
I know its a lot to take in and look at, but I'd appreciate any constructive criticism/help/advice you can provide me.
If it will be of any help to showcase code bits of the other solution attempts, I'll post them if asked.
It is important that you keep track of what you are doing.
I have put a few important tips on frigidum
The alpha cooling generally works well, it makes sure you don't speed through the interesting sweet-spot, where about 0.1 of the proposals are accepted.
Make sure your proposals are not too coarse, I have put a example where I only change x or y, but never both. The idea is that annealing will take whats best, or take a tour, and let the scheme decide.
I use the package frigidum for the algo, but its pretty much the same are your code. Also notice I have 2 proposals, a large change and a small change, combinations usually work well.
Finally, I noticed its hopping a lot. A small variation would be to pick the best-so-far before you go in the last 5% of your cooling.
I use/install frigidum
!pip install frigidum
And made a small change to make use of numpy arrays;
import math
def noisy_func(X):
x, y = X
return (math.exp(math.sin(50*x)) +
math.sin(60*math.exp(y)) +
math.sin(70*math.sin(x)) +
math.sin(math.sin(80*y)) -
math.sin(10*(x + y)) +
0.25*(math.pow(x, 2) +
math.pow(y, 2)))
import frigidum
import numpy as np
import random
def random_start():
return np.random.random( 2 ) * 4
def random_small_step(x):
if np.random.random() < .5:
return np.clip( x + np.array( [0, 0.02 * (random.random() - .5)] ), -4,4)
else:
return np.clip( x + np.array( [0.02 * (random.random() - .5), 0] ), -4,4)
def random_big_step(x):
if np.random.random() < .5:
return np.clip( x + np.array( [0, 0.5 * (random.random() - .5)] ), -4,4)
else:
return np.clip( x + np.array( [0.5 * (random.random() - .5), 0] ), -4,4)
local_opt = frigidum.sa(random_start=random_start,
neighbours=[random_small_step, random_big_step],
objective_function=noisy_func,
T_start=10**2,
T_stop=0.00001,
repeats=10**4,
copy_state=frigidum.annealing.copy)
The output of the above was
---
Neighbour Statistics:
(proportion of proposals which got accepted *and* changed the objective function)
random_small_step : 0.451045
random_big_step : 0.268002
---
(Local) Minimum Objective Value Found:
-3.30669277
With the above code sometimes I get below -3, but I also noticed sometimes it has found something around -2, than it is stuck in the last phase.
So a small tweak would be to re-anneal the last phase of the annealing, with the best-found-so-far.
Hope that helps, let me know if any questions.
The following is the objective function:
The idea is that a mean-variance optimization has already been done on a universe of securities. This gives us the weights for a target portfolio. Now suppose the investor already is holding a portfolio and does not want to change their entire portfolio to the target one.
Let w_0 = [w_0(1),w_0(2),...,w_0(N)] be the initial portfolio, where w_0(i) is the fraction of the portfolio invested in
stock i = 1,...,N. Let w_t = [w_t(1), w_t(2),...,w_t(N)] be the target portfolio, i.e., the portfolio
that it is desirable to own after rebalancing. This target portfolio may be constructed using quadratic optimization techniques such as variance minimization.
The objective is to decide the final portfolio w_f = [w_f (1), w_f (2),..., w_f(N)] that satisfies the
following characteristics:
(1) The final portfolio is close to our target portfolio
(2) The number of transactions from our initial portfolio is sufficiently small
(3) The return of the final portfolio is high
(4) The final portfolio does not hold many more securities that our initial portfolio
An objective function which is to be minimized is created by summing together the characteristic terms 1 through 4.
The first term is captured by summing the absolute difference in weights from the final and the target portfolio.
The second term is captured by the sum of an indicator function multiplied by a user specified penalty. The indicator function is y_{transactions}(i) where it is 1 if the weight of security i is different in the initial portfolio and the final portfolio, and 0 otherwise.
The third term is captured by the total final portfolio return multiplied by a negative user specified penalty since the objective is minimization.
The final term is the count of assets in the final portfolio (ie. sum of an indicator function counting the number of positive weights in the final portfolio), multiplied by a user specified penalty.
Assuming that we already have the target weights as target_w how do I setup this optimization problem in docplex python library? Or if anyone is familiar with mixed integer programming in NAG it would be helpful to know how to setup such a problem there as well.
`
final_w = [0.]*n
final_w = np.array(final_w)
obj1 = np.sum(np.absolute(final_w - target_w))
pen_trans = 1.2
def ind_trans(final,inital):
list_trans = []
for i in range(len(final)):
if abs(final[i]-inital[i]) == 0:
list_trans.append(0)
else:
list_trans.append(1)
return list_trans
obj2 = pen_trans*sum(ind_trans(final_w,initial_w))
pen_returns = 0.6
returns_np = np.array(df_secs['Return'])
obj3 = (-1)*np.dot(returns_np,final_w)
pen_count = 1.
def ind_count(final):
list_count = []
for i in range(len(final)):
if final[i] == 0:
list_count.append(0)
else:
list_count.append(1)
return list_count
obj4 = sum(ind_count(final_w))
objective = obj1 + obj2 + obj3 + obj4
The main issue in your code is that final_w is not a an array of variables but an array of data. So there will be nothing to optimize. To create an array of variables in docplex you have to do something like this:
from docplex.mp.model import Model
with Model() as m:
final = m.continuous_var_list(n, 0.0, 1.0)
That creates n variables that can take values between 0 and 1. With that in hand you can start things. For example:
obj1 = m.sum(m.abs(initial[i] - final[i]) for i in range(n))
For the next objective things become harder since you need indicator constraints. To simplify definition of these constraints first define a helper variable delta that gives the absolute difference between stocks:
delta = m.continuous_var_list(n, 0.0, 1.0)
m.add_constraints(delta[i] == m.abs(initial[i] - final[i]) for i in range(n))
Next you need an indicator variable that is 1 if a transaction is required to adjust stock i:
needtrans = m.binary_var_list(n)
for i in range(n):
# If needtrans[i] is 0 then delta[i] must be 0.
# Since needtrans[i] is penalized in the objective, the solver will
# try hard to set it to 0. It will only set it to 1 if delta[i] != 0.
# That is exactly what we want
m.add_indicator(needtrans[i], delta[i] == 0, 0)
With that you can define the second objective:
obj2 = pen_trans * m.sum(needtrans)
once all objectives have been defined, you can add their sum to the model:
m.minimize(obj1 + obj2 + obj3 + obj4)
and then solve the model and display its solution:
m.solve()
print(m.solution.get_values(final))
If any of the above is not (yet) clear to you then I suggest you take a look at the many examples that ship with docplex and also at the (reference) documentation.
I am writing a scientific code in python to calculate the energy of a system.
Here is my function : cte1, cte2, cte3, cte4 are constants previously computed; pii is np.pi (calculated beforehand, since it slows the loop otherwise). I calculate the 3 components of the total energy, then sum them up.
def calc_energy(diam):
Energy1 = cte2*((pii*diam**2/4)*t)
Energy2 = cte4*(pii*diam)*t
d=diam/t
u=np.sqrt((d)**2/(1+d**2))
cc= u**2
E = sp.special.ellipe(cc)
K = sp.special.ellipk(cc)
Id=cte3*d*(d**2+(1-d**2)*E/u-K/u)
Energy3 = cte*t**3*Id
total_energy = Energy1+Energy2+Energy3
return (total_energy,Energy1)
My first idea was to simply loop over all values of the diameter :
start_diam, stop_diam, step_diam = 1e-10, 500e-6, 1e-9 #Diametre
diametres = np.arange(start_diam,stop_diam,step_diam)
for d in diametres:
res1,res2 = calc_energy(d)
totalEnergy.append(res1)
Energy1.append(res2)
In an attempt to speed up calculations, I decided to use numpy to vectorize, as shown below :
diams = diametres.reshape(-1,1) #If not reshaped, calculations won't run
r1 = np.apply_along_axis(calc_energy,1,diams)
However, the "vectorized" solution does not properly work. When timing I get 5 seconds for the first solution and 18 seconds for the second one.
I guess I'm doing something the wrong way but can't figure out what.
With your current approach, you're applying a Python function to each element of your array, which carries additional overhead. Instead, you can pass the whole array to your function and get an array of answers back. Your existing function appears to work fine without any modification.
import numpy as np
from scipy import special
cte = 2
cte1 = 2
cte2 = 2
cte3 = 2
cte4 = 2
pii = np.pi
t = 2
def calc_energy(diam):
Energy1 = cte2*((pii*diam**2/4)*t)
Energy2 = cte4*(pii*diam)*t
d=diam/t
u=np.sqrt((d)**2/(1+d**2))
cc= u**2
E = special.ellipe(cc)
K = special.ellipk(cc)
Id=cte3*d*(d**2+(1-d**2)*E/u-K/u)
Energy3 = cte*t**3*Id
total_energy = Energy1+Energy2+Energy3
return (total_energy,Energy1)
start_diam, stop_diam, step_diam = 1e-10, 500e-6, 1e-9 #Diametre
diametres = np.arange(start_diam,stop_diam,step_diam)
a = calc_energy(diametres) # Pass the whole array
"""Some simulations to predict the future portfolio value based on past distribution. x is
a numpy array that contains past returns.The interpolated_returns are the returns
generated from the cdf of the past returns to simulate future returns. The portfolio
starts with a value of 100. portfolio_value is filled up progressively as
the program goes through every loop. The value is multiplied by the returns in that
period and a dollar is removed."""
portfolio_final = []
for i in range(10000):
portfolio_value = [100]
rand_values = np.random.rand(600)
interpolated_returns = np.interp(rand_values,cdf_values,x)
interpolated_returns = np.add(interpolated_returns,1)
for j in range(1,len(interpolated_returns)+1):
portfolio_value.append(interpolated_returns[j-1]*portfolio_value[j-1])
portfolio_value[j] = portfolio_value[j]-1
portfolio_final.append(portfolio_value[-1])
print (np.mean(portfolio_final))
I couldn't find a way to write this code using numpy. I was having a look at iterations using nditer but I was unable to move ahead with that.
I guess the easiest way to figure out how you can vectorize your stuff would be to look at the equations that govern your evolution and see how your portfolio actually iterates, finding patterns that could be vectorized instead of trying to vectorize the code you already have. You would have noticed that the cumprod actually appears quite often in your iterations.
Nevertheless you can find the semi-vectorized code below. I included your code as well such that you can compare the results. I also included a simple loop version of your code which is much easier to read and translatable into mathematical equations. So if you share this code with somebody else I would definitely use the simple loop option. If you want some fancy-pants vectorizing you can use the vector version. In case you need to keep track of your single steps you can also add an array to the simple loop option and append the pv at every step.
Hope that helps.
Edit: I have not tested anything for speed. That's something you can easily do yourself with timeit.
import numpy as np
from scipy.special import erf
# Prepare simple return model - Normal distributed with mu &sigma = 0.01
x = np.linspace(-10,10,100)
cdf_values = 0.5*(1+erf((x-0.01)/(0.01*np.sqrt(2))))
# Prepare setup such that every code snippet uses the same number of steps
# and the same random numbers
nSteps = 600
nIterations = 1
rnd = np.random.rand(nSteps)
# Your code - Gives the (supposedly) correct results
portfolio_final = []
for i in range(nIterations):
portfolio_value = [100]
rand_values = rnd
interpolated_returns = np.interp(rand_values,cdf_values,x)
interpolated_returns = np.add(interpolated_returns,1)
for j in range(1,len(interpolated_returns)+1):
portfolio_value.append(interpolated_returns[j-1]*portfolio_value[j-1])
portfolio_value[j] = portfolio_value[j]-1
portfolio_final.append(portfolio_value[-1])
print (np.mean(portfolio_final))
# Using vectors
portfolio_final = []
for i in range(nIterations):
portfolio_values = np.ones(nSteps)*100.0
rcp = np.cumprod(np.interp(rnd,cdf_values,x) + 1)
portfolio_values = rcp * (portfolio_values - np.cumsum(1.0/rcp))
portfolio_final.append(portfolio_values[-1])
print (np.mean(portfolio_final))
# Simple loop
portfolio_final = []
for i in range(nIterations):
pv = 100
rets = np.interp(rnd,cdf_values,x) + 1
for i in range(nSteps):
pv = pv * rets[i] - 1
portfolio_final.append(pv)
print (np.mean(portfolio_final))
Forget about np.nditer. It does not improve the speed of iterations. Only use if you intend to go one and use the C version (via cython).
I'm puzzled about that inner loop. What is it supposed to be doing special? Why the loop?
In tests with simulated values these 2 blocks of code produce the same thing:
interpolated_returns = np.add(interpolated_returns,1)
for j in range(1,len(interpolated_returns)+1):
portfolio_value.append(interpolated_returns[j-1]*portfolio[j-1])
portfolio_value[j] = portfolio_value[j]-1
interpolated_returns = (interpolated_returns+1)*portfolio - 1
portfolio_value = portfolio_value + interpolated_returns.tolist()
I assuming that interpolated_returns and portfolio are 1d arrays of the same length.
I'm working on a ANN, and got it working on simple AND/XOR problems using gradient descent, but I am finding it much more difficult to find the optimal weights for more complex problems. My solution was to use Scipy's minimum function instead, but I am having some trouble.
My ANN is a class, and when ANN object is created, weights are created upon initialization as the data member self.__weights, this is useful because I don't actually have to pass a weights parameter into any of the methods used on the class, and it worked in my earlier non-Scipy using implementation.
I use the self.__weights as the initial guess for the minimum function, and pass in the other arguments(input data, the correct output values) needed to make the function work. But it seems, that the optimizer keeps passing the weights into the methods(Cost Function/FeedForward/Backprop) as a parameter when there is no need to since it can be called with self.__weights. For instance, when my FeedForward method is called, it passes in the weights, and states that the input isn't of the correct size, this is because the weights were passed in instead. And the input data is passed into the minimum's optional 'arg=' parameter.
So, all explanation done, must the value to be optimized actually be a parameter of the methods being used to optimized it, and not just a class data member that can be called from anywhere within in the class? I've tried to find examples using a class they pass in there 'weights' or other value to be optimized depending on there use, not a data member of the class.
EDIT:
This is code a snippet is located in my train method. It's parameters are input_data and target.
opt_theta = minimize(self.CostFunction,x0=self.__weights,args = (input_data,target),method='Newton-CG',jac= self.Weight_Grad)
print(opt_theta)
self.__weights = opt_theta.x
self.CostFunction and self.Weight_Grad both take those same two parameters. Both of these latter two functions use a Feedforward method to get the output of the network, and do there respective task with that info. Whenever I call the minimize function it seems it passes self.__weights into self.CostFunction as the input_data parameter, which then gets passed into the FeedForward method as input and I get an error. IF i print the "input" passed into Feedforward it is the value of self.__weights. This is how I know self.__weights is getting passed in as a parameter when it shouldn't.
So, I thought I would create a "dummy"? parameter to pass the the self.__weights value into for all of the methods('self.CostFunction,'self.Weight_Grad,Feedforward) and an error did not occur but there was no change in the weights or the output. What do I need to do to get self.__weights to update.
Here are the methods in case it helps:
def Feedforward(self,input):
#Code to take self.__weights and convert to list of matrices.
weight_matrix_list = []
prev_val = 0
for i in range(len(self.__weight_sizes)):
curr_w_size = self.__weight_sizes[i]
weight_count = curr_w_size[0]*curr_w_size[1]
matrix_elements = self.__weights[prev_val:prev_val+weight_count]
weight_matrix_list.append(matrix_elements.reshape(curr_w_size))
self.__input_cases = np.shape(input)[0]
#Empty list to hold the output of every layer.
output_list = []
#Appends the output of the the 1st input layer.
output_list.append(input)
for i in range(self.__layers-1):
if i == 0:
print(self.__input_cases)
print(input)
X = np.concatenate((np.ones((self.__input_cases,1)),input),1)
output = self.sigmoid(np.dot(X,weight_matrix_list[0].T))
output_list.append(output)
else:
output = self.sigmoid(np.dot(np.concatenate((np.ones((self.__input_cases,1)),output),1),weight_matrix_list[i].T))
output_list.append(output)
return output_list
def CostFunction(self,input_data,target,error_func=1):
"""Gives the cost of using a particular weight matrix
based off of the input and targeted output"""
print("Cost")
#Run the network to get output using current theta matrices.
output = self.Feedforward(input_data)[-1]
#Determines number of input/training examples
m = np.shape(input_data)[0]
#####Allows user to choose Cost Functions.#####
#
#Log Based Error Function
#
if error_func == 0:
error = np.multiply(-target,np.log(output))-np.multiply((1-target),np.log(1-output))
total_error = np.sum(np.sum(error))
#
#Squared Error Cost Function
#
elif error_func == 1:
error = (target - output)**2
total_error = (1/(2*m)) * np.sum(np.sum(error))
return total_error
def Weight_Grad(self,input_data,target):
print('Grad')
weight_matrix_list = []
prev_val = 0
for i in range(len(self.__weight_sizes)):
curr_w_size = self.__weight_sizes[i]
weight_count = curr_w_size[0]*curr_w_size[1]
matrix_elements = self.__weights[prev_val:prev_val+weight_count]
weight_matrix_list.append(matrix_elements.reshape(curr_w_size))
output_list = self.Feedforward(theta,input_data)
#Finds the Deltas for Each Layer
#
deltas = []
for i in range(self.__layers - 1):
#Finds Error Delta for the last layer
if i == 0:
error = (target-output_list[-1])
error_delta = -1*np.multiply(error,np.multiply(output_list[-1],(1-output_list[-1])))
deltas.append(error_delta)
#Finds Error Delta for the hidden layers
else:
#Weight matrices have bias values removed
error_delta = np.multiply(np.dot(deltas[-1],weight_matrix_list[-i][:,1:]),output_list[-i-1]*(1-output_list[-i-1]))
deltas.append(error_delta)
#
#Finds the Deltas for each Weight Matrix
#
Weight_Delta_List = []
deltas.reverse()
for i in range(len(weight_matrix_list)):
current_weight_delta = (1/self.__input_cases) * np.dot(deltas[i].T,np.concatenate((np.ones((self.__input_cases,1)),output_list[i]),1))
Weight_Delta_List.append(current_weight_delta)
#
#Converts Weight Delta List to a single vector
#
Weight_Delta_Vector = np.array([])
for i in range(len(Weight_Delta_List)):
Weight_Delta_Vector = np.concatenate((Weight_Delta_Vector,Weight_Delta_List[i].flatten()))
print("WDV Shape:",np.shape(Weight_Delta_Vector))
return Weight_Delta_Vector
def Train(self,input_data,target):
opt_theta = minimize(self.CostFunction,x0=self.__weights,args = (input_data,target),method='Newton-CG',jac= self.Weight_Grad)
print(opt_theta)
self.__weights = opt_theta.x
print("Done")