I've been tinkering with neural networks and have some simple code that almost works. The only problem is that my network will not mutate properly. I've tested the network class on its own and it will mutate but it doesn't seem to want to mutate when used as a subclass.
%matplotlib inline
#for jupyter
import matplotlib #import for plotting results
import matplotlib.pyplot as plt
import numpy as np #np for random and exp
from datetime import datetime as dt #for time seed
#GLOBALS
sp = 100.0 #y setpoint
seconds = 120 #simulation length
timescale = 0.1 #timestep
generations = 10000 #generations to simulate
population = 20 #number of ships per generation
debug = False #unused
#NN class
class Network:
#create array of neuron placeholder values for feedforward function
def initNeurons(self):
neuronList = []
for i in range(len(self.layers)):
neuronList.append([])
for j in range(self.layers[i]):
neuronList[i].append(0)
self.neurons = neuronList
#print(self.neurons)
#randomly generate weights for each neuron based on number of neurons in previous layers
def initWeights(self):
weightsList = []
#for each layer
for i in range(1,len(self.layers)):
layerWeights = []
neuronsInPrevLayer = self.layers[i-1] #number of neurons in previous layer
#for each neuron in layer
for j in range(len(self.neurons[i])):
neuronWeights = []
#for each neuron in previous layer
for k in range(neuronsInPrevLayer):
neuronWeights.append(2*(np.random.rand()-0.5)) #generate random weight (-1-1)
layerWeights.append(neuronWeights)
weightsList.append(layerWeights)
self.weights = weightsList
#calculate the values of each neuron and return output neurons
def feedForward(self, netinputs):
for i in range(len(netinputs)):
#print(self.neurons[0])
self.neurons[0][i] = netinputs[i]
for i in range(1,len(self.layers)):
for j in range(len(self.neurons[i])):
value = 0.25
for k in range(len(self.neurons[i-1])):
value += self.weights[i-1][j][k] * self.neurons[i-1][k]
self.neurons[i][j] = (1/(1+np.exp(-value)))
return self.neurons[-1]
#randomly mutate weights while iterating through them
def mutate(self):
change = False
for i in range(len(self.weights)):
for j in range(len(self.weights[i])):
for k in range(len(self.weights[i][j])):
weight = self.weights[i][j][k]
#print(weight)
randnum = np.random.rand() * 1000
if randnum <= 20:
weight *= -1
change = True
elif randnum <= 40:
weight = np.random.rand() - 0.5
change = True
elif randnum <= 80:
weight *= np.random.rand()
change = True
self.weights[i][j][k] = weight
if change and debug:
#print('mutation!')
pass
#iterate through weights and copy
def copyWeights(self, copyWeight):
for i in range(len(self.weights)):
for j in range(len(self.weights[i])):
for k in range(len(self.weights[i][j])):
self.weights[i][j][k] = copyWeight[i][j][k]
#copies the weights from a passed NN
def Network(self, copyNetwork):
self.layers = []
self.neurons = []
self.weights = []
self.fitness = -9000.0
for i in range(len(copyNetwork.layers)):
self.layers.append(copyNetwork.layers[i])
np.random.seed(dt.now().microsecond)
self.initNeurons()
self.initWeights()
self.copyWeights(copyNetwork.weights)
#INITIALIZATION FUNCTION
#initializes NN given an array of neuron counts EX: [3,5,3,1] 3 input neurons 1 output neuron with 2 hidden layers
def __init__(self, inLayers ):
self.layers = [] #array with neurons per layer ex: [2,4,2]
self.neurons = [] #placeholder array for neuron values for feedforward
self.weights = [] #weight values for each layer and neuron
self.fitness = -9000.0 #initial fitness set to nonsense value
for i in range(len(inLayers)):
self.layers.append(inLayers[i])
np.random.seed(dt.now().microsecond) #seed for RNG
self.initNeurons() #create arrays for sotring neuron values
self.initWeights() #create weights for calculating neuron values
#environment for ship simulation
class environment(object):
def __init__(self): # initialize self when created
self.objects = []
self.t = 0
self.dt = timescale
self.seconds = seconds
def init(self): #initialize ships
for p in self.objects:
p.init()
def start(self): #iterate through time and call step for each ship
for i in range(0, self.seconds*100, int(self.dt*100)):
self.t += self.dt
#if i % 10 == 0:
#print(self.t)
for p in self.objects:
p.step()
class ship():
def __init__(self, m, x, y, v, thrust_max, throttle, env):
self.brain = Network([3,8,4,3,2]) #create NN for throttle control with 3 inputs and 2 outputs
self.deltasp = [] #array for difference from setpoint per step for plotting and analysis
self.yset = [] #array for y coord (height) per step for plotting and analysis
self.mass = m #mass for force and acceleration calculation
self.x = x #initial x value
self.y = y #initial y value
self.velocity = v #initial velocity
self.thrust_max = thrust_max #maximum thrust
self.throttle = throttle/100.0 #initial throttle
self.a = 0.0 #initial acceleration
env.objects.append(self) #add ship to environment objects
self.env = env
def init(self):
self.deltasp = [] #array for difference from setpoint per step for plotting and analysis
self.yset = [] #array for y coord (height) per step for plotting and analysis
self.x = 0 #x coord for plotting (unused)
self.y = 0 #y coord (height)
self.velocity = 0 #ship velocity
self.a = 0 #ship acceleration
#calculate acceleration based on thrust throttle and mass minus acceleration due to gravity
def acc(self):
return ((self.thrust_max*self.throttle)/(self.mass))-9.8
def step(self): #ship step
#get outputs from NN
self.outputs = self.brain.feedForward([self.throttle,self.a,((self.y)-sp)])
#if output 1 is high increase thrust by variable amount based on certainty
if self.outputs[0] >= 0.6:
self.throttle += self.outputs[0]-0.5
#print('throttle up')
#if output 2 is high decrease thrust by variable amount based on certainty
elif self.outputs[1] >= 0.6:
self.throttle -= self.outputs[1]-0.5
#throttle limiting between 0% and 100% (0-1)
if self.throttle <= 0:
self.throttle = 0
elif self.throttle >= 1:
self.throttle = 1
#store delta from setpoint to array for plotting and analysis
self.deltasp.append(abs(self.y-sp))
#increase x arbitrarily (legacy code from turtle version could be useful later)
self.x += 0.1
#increase y (height) by timestep times velocity
self.y += self.env.dt*self.velocity
#calculate new acceleration
self.a = self.acc()
#limit height to 0
#floor collision detection
if self.y < 0:
self.a = 0
self.velocity = 0
self.y = 0
#store y coord (height) for plotting and analysis
self.yset.append(self.y)
#calculate new velocity based on acceleration and timestep
self.velocity = self.velocity + self.env.dt*self.a
#calculate fitness as summation of difference from setpoint
self.brain.fitness = sum(self.deltasp)
#mutate NN for evolution
def evolve(self):
self.brain.mutate()
def bubble_sort(seq): #modified bubblesort borrowed from http://python3.codes/popular-sorting-algorithms/
for ob in seq:
#print(ob.brain.weights)
pass
changed = True
while changed:
changed = False
for i in range(len(seq) - 1):
if abs(seq[i].brain.fitness) < abs(seq[i+1].brain.fitness):
seq[i], seq[i+1] = seq[i+1], seq[i]
changed = True
return None
def reproduce(ships): #make new ships based on fitness
mute_ships = []
return_ships = []
for o in ships:
mute_ships.append(o)
bubble_sort(mute_ships) #sort ships by fitness
for i in range(len(ships)): #create array of mutated best ship
mute_ships[-1].evolve()
return_ships.append(mute_ships[-1])
return return_ships #array of mutated ships
def main(): #main loop
new_ships = [] #mutated ships container
for gen in range(generations): #loop for generations
ships = []
env = environment()
if gen == 0:# if first generation generate initial population
for i in range(population):
np.random.seed(dt.now().microsecond)
shp = ship(500.0, -100.0, 0.0, 0.0, 9800.0, 0.0, env)
ships.append(shp)
else: #if not first generation copy ships from mutated ships
ships = new_ships
for o in ships:
o.env.objects.append(o)
new_ships = []
env.init() #initialize environment
env.start() #start environment simulation
for o in reproduce(ships):#mutate ships
new_ships.append(o)
del env
### DEBUGGING ###
#print(len(new_ships))
#print(ships[0].brain.weights == new_ships[0].brain.weights)
#if ships[0].brain.weights == new_ships[0].brain.weights:
#print('no mutations')
#print("generation: ", gen + 1)
#for o in ships:
#print(o.brain.fitness)
#plt.plot(range(len(o.yset)),o.yset)
#print(ships[0].brain.fitness)
for o in ships: #plot different statistics
#print(o.brain.fitness)
plt.plot(range(len(o.yset)),o.yset)
#plt.plot(range(len(o.deltasp)),o.deltasp)
return "done"
if __name__ == '__main__':
main()
At this point I'm pretty stuck. Sorry for some of the spaghetti code. I've tried to clean it up a bit.
Ok, here's what I see:
In reproduce() you are doing a bunch of weird things:
you sort the list (which has no effect on the mutation), then mutate them (which probably destroys the sorted-ness);
you iterate with an index into the array, which is usually unPythonic, but operate on mute_ships[-1] each time - which means that, with 20 ships, you are mutating the last ship 20 times and the others not at all!
.evolve() seems to be an in-place operation, but you copy the result into a new list to return; then in the calling function, main(), you copy the result into a new list again using a loop (the slow way) instead of using the list() constructor (simpler and faster)
that makes no difference anyway, because the list only contains references to the same ship instances anyway!
Instead, try
def reproduce(ships):
for ship in ships:
ship.evolve()
Naming conventions: PEP8 says classes should be capitalized, methods should be lowercase. Also, some of the variable names are nasty (o.env.objects.append(o)?)
You don't need bubblesort; you can just do mute_ships = sorted(ships, key = lambda ship: ship.brain.fitness. That replaces about 16 lines of code.
You aren't evolving the ships or environments at all, so separate classes for them are kind of overkill. I would probably rename Network to ShipController, and stick the whole simulation into a ShipController.evaluate() method.
np.random seeds itself quite happily; the only good reason to seed it yourself is if you want to be able to repeat a run by giving it the same seed again. Also, np.random.seed() expects a value in 0 .. 4.3 billion; you are giving it a value in 0 .. 1 million. By doing so, you are greatly reducing the actual randomness of your algorithm.
You have a global sp but also an sp in main() which never gets used, which will confuse you if you ever try to change it.
You are not really using numpy properly; it works best on vectorized blocks of calculations, not lots of little split-up calculations inside Python loops.
Related
In this paper, a very simple model is described to illustrate how the ant colony algorithm works. In short, it assumes two nodes which are connected via two links one of which is shorter. Then, given a pheromone increment and a pheromone evaporation dynamics, one expects that all ants eventually pick the shorter path.
Now, I'm trying to replicate the simulation of this paper corresponding to scenario above whose result should be (more or less) like below.
Here is an implementation of mine (taking the same specification as that of the test above).
import random
import matplotlib.pyplot as plt
N = 10
l1 = 1
l2 = 2
ru = 0.5
Q = 1
tau1 = 0.5
tau2 = 0.5
epochs = 150
success = [0 for x in range(epochs)]
def compute_probability(tau1, tau2):
return tau1/(tau1 + tau2), tau2/(tau1 + tau2)
def select_path(prob1, prob2):
if prob1 > prob2:
return 1
if prob1 < prob2:
return 2
if prob1 == prob2:
return random.choice([1,2])
def update_accumulation(link_id):
global tau1
global tau2
if link_id == 1:
tau1 += Q / l1
return tau1
if link_id == 2:
tau2 += Q / l2
return tau2
def update_evapuration():
global tau1
global tau2
tau1 *= (1-ru)
tau2 *= (1-ru)
return tau1, tau2
def report_results(success):
plt.plot(success)
plt.show()
for epoch in range(epochs-1):
temp = 0
for ant in range(N-1):
prob1, prob2 = compute_probability(tau1, tau2)
selected_path = select_path(prob1,prob2)
if selected_path == 1:
temp += 1
update_accumulation(selected_path)
update_evapuration()
success[epoch] = temp
report_results(success)
However, what I get is fairly weird as below.
It seems that my understanding of how pheromone should be updated is flawed.
So, can one address what I am missing in this implementation?
Three problems in the proposed approach:
As #Mark mentioned in his comment, you need a weighted random choice. Otherwise the proposed approach will likely always pick one of the paths and the plot will result in a straight line as you show above. However, I think this was part of the solution, because even with this, you will likely still get a straight line because of early convergence, which led two problem two.
Ant Colony Optimization is a metaheuristic that needs several (hyper) parameters configured to guide the search for a certain solution (e.g., tau from above or number of ants). Fine tuning this parameters is important because you can converge early on a particular result (which is fine to some extent - if you want to use it as an heuristic). But the purpose of a metaheuristic is to provide you with some middle ground between the exact and heuristic algorithms, which makes the continous exploration/exploitation an important part of its workings. This means the parameters need to be careful optimised for your problem size/type.
Given that the ACO uses a probabilistic approach for guiding the search (and as the plot from the referenced paper is showing), you will need to run the experiment several times and compute some statistic on those numbers. In my case below, I computed the average over 100 samples.
import random
import matplotlib.pyplot as plt
N = 10
l1 = 1.1
l2 = 1.5
ru = 0.05
Q = 1
tau1 = 0.5
tau2 = 0.5
samples = 10
epochs = 150
success = [0 for x in range(epochs)]
def compute_probability(tau1, tau2):
return tau1/(tau1 + tau2), tau2/(tau1 + tau2)
def weighted_random_choice(choices):
max = sum(choices.values())
pick = random.uniform(0, max)
current = 0
for key, value in choices.items():
current += value
if current > pick:
return key
def select_path(prob1, prob2):
choices = {1: prob1, 2: prob2}
return weighted_random_choice(choices)
def update_accumulation(link_id):
global tau1
global tau2
if link_id == 1:
tau1 += Q / l1
else:
tau2 += Q / l2
def update_evaporation():
global tau1
global tau2
tau1 *= (1-ru)
tau2 *= (1-ru)
def report_results(success):
plt.ylim(0.0, 1.0)
plt.xlim(0, 150)
plt.plot(success)
plt.show()
for sample in range(samples):
for epoch in range(epochs):
temp = 0
for ant in range(N):
prob1, prob2 = compute_probability(tau1, tau2)
selected_path = select_path(prob1, prob2)
if selected_path == 1:
temp += 1
update_accumulation(selected_path)
update_evaporation()
ratio = ((temp + 0.0) / N)
success[epoch] += ratio
# reset pheromone values here to evaluate new sample
tau1 = 0.5
tau2 = 0.5
success = [x / samples for x in success]
for x in success:
print(x)
report_results(success)
The code above should return something close to the desired plot.
I'm trying to solve the CartPole-v1 problem from OpenAI by using backprop on a one-layer neural network - while updating the model at every time step using State action values (Q(s,a)). I'm unable to get the average reward to go up beyond about 42 steps per episode. Could anyone help? Is my approach even correct - as in, is it even possible for the agent to learn the optimal solution if I'm updating the Q-values every time-step, instead of batch updates every episode? Seems like theoretically it should be possible.
Details: After playing around and experimenting with activation functions, stochastic policies and finally settling on a deterministic policy with linear activation function and the parameters mentioned below - i'm able to get my agent to consistently converge (in about 100-300 steps) to an average reward of about 42 steps. But it doesn't go beyond 45. Adjusting the parameters (epsilon, discount_rate, and learning rate) in the program below does not have a huge impact on this.
I've tried looking for a similar solution online but none of them seem to fit the approach that I'm following. Almost all of the solutions involve learning at the end of each episode (by storing SARS' data).
Increasing the number of hidden layers doesn't help either. I also think it is unlikely that the algorithm will converge to a better value in future as I've run it for 10000+ episodes and it average reward is still around 40.
First, the hyperparameters:
epsilon = 0.5
lr = 0.05
discount_rate=0.9
# number of features in environment observations
num_inputs = 4
hidden_layer_nodes = 6
num_outputs = 2
The q function:
def calculateNNOutput(observation, m1, m2):
scaled_observation = scaleFeatures(observation)
hidden_layer = np.dot(scaled_observation, m1) # 1x4 X 4x6 -> 1x6
outputs = np.dot(hidden_layer, m2) # 1x6 X 6x2
return np.asmatrix(outputs) # 1x2
Action selection (policy):
def selectAction(observation):
#explore
global epsilon
if random.uniform(0,1) < epsilon:
return random.randint(0,1)
#exploit
outputs = calculateNNOutputs(observation)
print(outputs)
if (outputs[0,0] > outputs[0,1]):
return 0
else:
return 1
Backprop:
def backProp(prev_obs, m1, m2, experimental_values):
global lr
scaled_observation = np.asmatrix(scaleFeatures(prev_obs))
hidden_layer = np.asmatrix(np.dot(scaled_observation, m1)) #
outputs = np.asmatrix(np.dot(hidden_layer, m2)) # 1x6 X 6x2
delta_out = np.asmatrix((outputs-experimental_values)) # 1x2
delta_2=np.transpose(np.dot(m2,np.transpose(delta_out))) # 6x2 X 2x1 = 6x1_T = 1x6
GRADIENT_2 = (np.transpose(hidden_layer))*delta_out # 6x1 X 1x2 = 6x2 - same as w2
GRADIENT_1 = np.multiply(np.transpose(scaled_observation), delta_2) # 4 x 6 - same as w1
m1 = m1 - lr*GRADIENT_1
m2 = m2 - lr*GRADIENT_2
return m1, m2
Q-learning:
def updateWeights(prev_obs, action, obs, reward, done):
global weights_1, weights_2
calculated_value = calculateNNOutputs(prev_obs)
if done:
experimental_value = -1
else:
actionValues = calculateNNOutputs(obs) # 1x2
experimental_value = reward + discount_rate*(np.amax(actionValues, axis = 1)[0,0])
if action==0:
weights_1, weights_2 = backProp(prev_obs, weights_1, weights_2, np.array([[experimental_value, calculated_value[0,1]]]))
else:
weights_1, weights_2 = backProp(prev_obs, weights_1, weights_2, np.array([[calculated_value[0,0],experimental_value]]))
EDIT: the main loop -
record = 0
total = 0
for i_episode in range(num_episodes):
if (i_episode%10 == 0):
print("W1 = ", weights_1)
print("W2 = ", weights_2)
observation = env.reset()
epsilon = max(epsilon*0.9,0.01)
lr = max(lr*0.9, 0.01)
print("Average steps = ", total/(i_episode+1))
print("Record = ", record)
for t in range(1000):
action_taken = selectAction(observation)
print(action_taken)
previous_observation=observation
observation, reward, done, info = env.step(action_taken) # take the selected action
updateWeights(previous_observation, action_taken, observation,reward, done) # perform backprop to update the action value
if done:
total = total+t
if t > record:
record = t
print("Episode {} finished after {} timesteps".format(i_episode,t+1))
break
Do I need to make any changes in approach/implementation/parameter tuning?
I have written some python code to solve the N-body problem using the Euler method. The code runs without problems and seems to give a reasonable answer (e.g. if there are two particles then the start moving towards each other). However when I run this simulation over a large number of iterations I see that the particles (say I run it with two particles) pass by each other (I do not consider collisions) and keep going in their directions indefinitely. This violates the conservation of energy and so there must be a flaw in my code but I am unable to find it. Could anyone please find it and explain my mistake.
Thank you.
Thanks to #samgak for pointing out that I was updating the particles twice. I have now fixed this but the problem still keeps coming. I have also replicated the output I get when I run this simulation with two stationary particles at (0,0) and (1,0) with a time step of 1 second and 100000 iterations:
Particle with mass: 1 and position: [234.8268420043934, 0.0] and velocity: [0.011249111128594091, 0.0]
Particle with mass: 1 and position: [-233.82684200439311, 0.0] and velocity: [-0.011249111128594091, 0.0]
Also thanks to #PM2Ring for pointing out some optimizations I could make and the perils of using the Euler method.
Code:
import math
class Particle:
"""
Class to represent a single particle
"""
def __init__(self,mass,position,velocity):
"""
Initialize the particle
"""
self.G = 6.67408*10**-11 #fixed throughout the simulation
self.time_interval = 10**0 #fixed throughout the simulation, gives the interval between updates
self.mass = mass
self.position = position #should be a list
self.velocity = velocity #should be a list
self.updated_position = position
self.updated_velocity = velocity
def __str__(self):
"""
String representation of particle
"""
return "Particle with mass: " + str(self.mass) + " and position: " + str(self.position) + " and velocity: " + str(self.velocity)
def get_mass(self):
"""
Returns the mass of the particle
"""
return self.mass
def get_position(self):
"""
returns the position of the particle
"""
return self.position
def get_velocity(self):
"""
returns the velocity of the particle
"""
return self.velocity
def get_updated_position(self):
"""
calculates the future position of the particle
"""
for i in range(len(self.position)):
self.updated_position[i] = self.updated_position[i] + self.time_interval*self.velocity[i]
def update_position(self):
"""
updates the position of the particle
"""
self.position = self.updated_position.copy()
def get_distance(self,other_particle):
"""
returns the distance between the particle and another given particle
"""
tot = 0
other = other_particle.get_position()
for i in range(len(self.position)):
tot += (self.position[i]-other[i])**2
return math.sqrt(tot)
def get_updated_velocity(self,other_particle):
"""
updates the future velocity of the particle due to the acceleration
by another particle
"""
distance_vector = []
other = other_particle.get_position()
for i in range(len(self.position)):
distance_vector.append(self.position[i]-other[i])
distance_squared = 0
for item in distance_vector:
distance_squared += item**2
distance = math.sqrt(distance_squared)
force = -self.G*self.mass*other_particle.get_mass()/(distance_squared)
for i in range(len(self.velocity)):
self.updated_velocity[i] = self.updated_velocity[i]+self.time_interval*force*(distance_vector[i])/(self.mass*(distance))
def update_velocity(self):
"""
updates the velocity of the particle
"""
self.velocity = self.updated_velocity.copy()
def update_particles(particle_list):
"""
updates the position of all the particles
"""
for i in range(len(particle_list)):
for j in range(i+1,len(particle_list)):
particle_list[i].get_updated_velocity(particle_list[j])
particle_list[j].get_updated_velocity(particle_list[i])
for i in range(len(particle_list)):
particle_list[i].update_velocity()
particle_list[i].get_updated_position()
for i in range(len(particle_list)):
particle_list[i].update_position()
#the list of particles
partList = [Particle(1,[0,0],[0,0]),Particle(1,[1,0],[0,0])]
#how many iterations I perform
for i in range(100000):
update_particles(partList)
#prints out the final position of all the particles
for item in partList:
print(item)
------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------Further Edit:
I decided to implement the Leapfrog method and I have developed some code that once again runs and seems to work well (at least in the command line). However when I added plotting functionality and analysed it there seemed to be another problem. Again the system seemed to go too far and the energy again increased without bound. I have attached a picture of the output I get to showcase the problem. If I again had just two particles of equal mass they again pass each other and continue away from each other without stopping. Thus there must be a bug in my code I am not finding.
If anyone can help it will be much appreciated.
My Code:
import math
import matplotlib.pyplot as plt
class Particle:
"""
Represents a single particle
"""
def __init__(self,mass,position,velocity):
"""
Initialize the particle
"""
self.G = 6.67408*10**-11
self.time_step = 10**2
self.mass = mass
self.dimensions = len(position)
self.position = position
self.velocity = velocity
self.acceleration = [0 for i in range(len(position))]
self.next_position = position
self.next_velocity = velocity
self.next_acceleration = [0 for i in range(len(position))]
def __str__(self):
"""
A string representation of the particle
"""
return "A Particle with mass: " + str(self.mass) + " and position: " + str(self.position) + " and velocity:" + str(self.velocity)
def get_mass(self):
return self.mass
def get_position(self):
return self.position
def get_velocity(self):
return self.velocity
def get_acceleration(self):
return self.acceleration
def get_next_position(self):
return self.next_position
def put_next_position(self):
for i in range(self.dimensions):
self.next_position[i] = self.position[i] + self.time_step*self.velocity[i]+0.5*self.time_step**2*self.acceleration[i]
def put_next_velocity(self):
for i in range(self.dimensions):
self.next_velocity[i] = self.velocity[i] + 0.5*self.time_step*(self.acceleration[i]+self.next_acceleration[i])
def update_position(self):
self.position = self.next_position.copy()
def update_velocity(self):
self.velocity = self.next_velocity.copy()
def update_acceleration(self):
self.acceleration = self.next_acceleration.copy()
def reset_acceleration(self):
self.acceleration = [0 for i in range(self.dimensions)]
def reset_future_acceleration(self):
self.next_acceleration = [0 for i in range(self.dimensions)]
def calculate_acceleration(self,other_particle):
"""
Increments the acceleration of the particle due to the force from
a single other particle
"""
distances = []
other = other_particle.get_position()
distance_squared = 0
for i in range(self.dimensions):
distance_squared += (self.position[i]-other[i])**2
distances.append(self.position[i]-other[i])
distance = math.sqrt(distance_squared)
force = -self.G*self.mass*other_particle.get_mass()/distance_squared
acc = []
for i in range(self.dimensions):
acc.append(force*distances[i]/(distance*self.mass))
for i in range(self.dimensions):
self.acceleration[i] += acc[i]
def calculate_future_acceleration(self,other_particle):
"""
Increments the future acceleration of the particle due to the force from
a single other particle
"""
distances = []
other = other_particle.get_next_position()
distance_squared = 0
for i in range(self.dimensions):
distance_squared += (self.next_position[i]-other[i])**2
distances.append(self.next_position[i]-other[i])
distance = math.sqrt(distance_squared)
force = -self.G*self.mass*other_particle.get_mass()/distance_squared
acc = []
for i in range(self.dimensions):
acc.append(force*distances[i]/(distance*self.mass))
for i in range(self.dimensions):
self.next_acceleration[i] += acc[i]
def update_all(particleList):
for i in range(len(particleList)):
particleList[i].reset_acceleration()
for j in range(len(particleList)):
if i != j:
particleList[i].calculate_acceleration(particleList[j])
for i in range(len(particleList)):
particleList[i].put_next_position()
for i in range(len(particleList)):
particleList[i].reset_future_acceleration()
for j in range(len(particleList)):
if i != j:
particleList[i].calculate_future_acceleration(particleList[j])
for i in range(len(particleList)):
particleList[i].put_next_velocity()
for i in range(len(particleList)):
particleList[i].update_position()
particleList[i].update_velocity()
partList = [Particle(1,[0,0],[0,0]),Particle(1,[1,0],[0,0])]
Alist = [[],[]]
Blist = [[],[]]
for i in range(10000):
Alist[0].append(partList[0].get_position()[0])
Alist[1].append(partList[0].get_position()[1])
Blist[0].append(partList[1].get_position()[0])
Blist[1].append(partList[1].get_position()[1])
update_all(partList)
plt.scatter(Alist[0],Alist[1],color="r")
plt.scatter(Blist[0],Blist[1],color="b")
plt.grid()
plt.show()
for item in partList:
print(item)
Could someone please tell me where is the error I am making in my code.
The main problem in the code is that it uses the Euler method which is quite inaccurate as the number of iterations increase (just O(h) compared to other methods which can be O(h^4) or even better). To fix this would require a fundamental restructure of the code and so I would say that this code is not really accurate for an N-body simulation (it plays up for 2 particles, as I add more and more the error can only increase).
Thanks to #samgak and #PM2Ring for helping me remove a bug and optimize my code but overall this code is unusable...
EDIT: I have implemented the leapfrog method mentioned in the comments from scratch and have found it to work perfectly. It is very simple to understand and implement and it works too!
Further EDIT: I thought I had the leapfrog method working. Turns out that there was another bug in it I only saw when I added GUI functionality.
Im trying to write a Python class that creates a matrix of zeros, then uses a random number generator to pick spots on the matrix. It changes the zero in that spot to a one, until the matrix is all ones. Can someone critique/correct my code? (I also want the generator to check its proximity on the matrix, and try 3 times to find a spot that is 2 spots away from any ones.)
import random
import numpy as np
#agents is amount of agents available to fill grid
class Gridmodel():
def __init__(self, gridsize, agents):
self.gridsize = gridsize
self.agents = agents
self.gridmodel = np.zeros([self.gridsize, self.gridsize],dtype=int)
def foundspot(self):
foundspot = False
tries = 0
while foundspot == False and tries <= 3:
x = random.randint(0, self.gridsize)
y = random.randint(0, self.gridsize)
if self.gridmodel[x][y] < 0:
foundspot = True
else:
tries += 1
def goodspot(self, x, y):
goodspot = self.gridmodel[x][y]
for i in range(-1,2):
for j in range(-1,2):
print i, j, self.gridmodel[i][j]
import numpy as np
import matplotlib.pylab as plt
class Buffon_needle_problem:
def __init__(self,x,y,n,m):
self.x = x #width of the needle
self.y = y #witdh of the space
self.r = []#coordinated of the centre of the needle
self.z = []#measure of the alingment of the needle
self.n = n#no of throws
self.m = m#no of simulations
self.pi_approx = []
def samples(self):
# throwing the needles
for i in range(self.n):
self.r.append(np.random.uniform(0,self.y))
self.z.append(np.random.uniform(0,self.x/2.0))
return [self.r,self.z]
def simulation(self):
self.samples()
# m simulation
for j in range(self.m):
# n throw
hits = 0 #setting the succes to 0
for i in range(self.n):
#condition for a hit
if self.r[i]+self.z[i]>=self.y or self.r[i]-self.z[i] <= 0.0:
hits += 1
else:
continue
hits = 2*(self.x/self.y)*float(self.n/hits)
self.pi_approx.append(hits)
return self.pi_approx
y = Buffon_needle_problem(1,2,40000,5)
print (y.simulation())
For those who unfamiliar with Buffon's problem, here is the http://mathworld.wolfram.com/BuffonsNeedleProblem.html
or
implementing the same idea (and output)
http://pythonfiddle.com/historically-accurate-buffons-needle/
My expected output should be the value of pi but my code give me around 4. Can anyone point out the logical error?
The sampling of the needle's alignment should be a uniform cosine. See the following link for the method: http://pdg.lbl.gov/2012/reviews/rpp2012-rev-monte-carlo-techniques.pdf
Also, there were a few logical problems with the program. Here is a working version.
#!/bin/python
import numpy as np
def sample_cosine():
rr=2.
while rr > 1.:
u1=np.random.uniform(0,1.)
u2=np.random.uniform(0,1.)
v1=2*u1-1.
rr=v1*v1+u2*u2
cc=(v1*v1-u2*u2)/rr
return cc
class Buffon_needle_problem:
def __init__(self,x,y,n,m):
self.x = float(x) #width of the needle
self.y = float(y) #witdh of the space
self.r = [] #coordinated of the centre of the needle
self.z = [] #measure of the alignment of the needle
self.n = n #no of throws
self.m = m #no of simulations
self.p = self.x/self.y
self.pi_approx = []
def samples(self):
# throwing the needles
for i in range(self.n):
self.r.append(np.random.uniform(0,self.y))
C=sample_cosine()
self.z.append(C*self.x/2.)
return [self.r,self.z]
def simulation(self):
# m simulation
for j in range(self.m):
self.r=[]
self.z=[]
self.samples()
# n throw
hits = 0 #setting the success to 0
for i in range(self.n):
#condition for a hit
if self.r[i]+self.z[i]>=self.y or self.r[i]-self.z[i]<0.:
hits += 1
else:
continue
est =self.p*float(self.n)/float(hits)
self.pi_approx.append(est)
return self.pi_approx
y = Buffon_needle_problem(1,2,80000,5)
print (y.simulation())
Buffon's needle work accurately only when the distance between the two lines is double the length of needle. Make sure to cross check it.
I have seen many baffon's online simulation which are doing this mistake. They just take the distance between two adjacent lines to be equal to the needle's length. That's their main logical errors.
I would say that the problem is that you are defining the alignment of the needle by a simple linear function, when in fact the effective length of the needle from its centre is defined by a sinusoidal function.
You want to calculate the effective length of the needle (at 90° to the lines) by using a function that will calculate it from its angle.
Something like:
self.z.append(np.cos(np.random.uniform(-np.pi/2, np.pi/2))*self.x)
This will give the cosine of a random angle between -90° and +90°, times the length of the needle.
For reference, cos(+/-90) = 0 and cos(0) = 1, so at 90°, the needle with have effectively zero length, and at 0°, its full length.
I have neither mathplotlib or numpy installed on this machine, so I can't see if this fixes it, but it's definitely necessary.
Looks like you were committing a simple rounding error. The code below works, though the results are not very close to pi...
import numpy as np
import matplotlib.pylab as plt
class Buffon_needle_problem:
def __init__(self,x,y,n,m):
self.x = x #width of the needle
self.y = y #witdh of the space
self.r = []#coordinated of the centre of the needle
self.z = []#measure of the alingment of the needle
self.n = n#no of throws
self.m = m#no of simulations
self.pi_approx = []
def samples(self):
# throwing the needles
for i in range(self.n):
self.r.append(np.random.uniform(0,self.y))
self.z.append(np.random.uniform(0,self.x/2.0))
return [self.r,self.z]
def simulation(self):
#self.samples()
# m simulations
for j in range(self.m):
self.r=[]
self.z=[]
for i in range(self.n):
self.r.append(np.random.uniform(0,self.y))
self.z.append(np.random.uniform(0,self.x/2.0))
# n throws
hits = 0 # setting the succes to 0
for i in range(self.n):
# condition for a hit
if self.r[i]+self.z[i]>=self.y or self.r[i]-self.z[i] <= 0.0:
hits += 1
else:
continue
hits = 2.0*(float(self.x)/self.y)*float(self.n)/float(hits)
self.pi_approx.append(hits)
return self.pi_approx
y = Buffon_needle_problem(1,2,40000,5)
print (y.simulation())
Also note that you were using the same sample for all simulations!
I used Python turtle to approximate the value of Pi:
from turtle import *
from random import *
setworldcoordinates(-100, -200, 200, 200)
ht(); speed(0); color('blue')
drops = 20 # increase number of drops for better approximation
hits = 0 # hits counter
# draw parallel lines with distance 20 between adjacent lines
for i in range(0, 120, 20):
pu(); setpos(0, i); pd()
fd(100) # length of line
# throw needles
color('red')
for j in range(drops):
pu()
goto(randrange(10, 90), randrange(0,100))
y1 = ycor() # keep ycor of start point
seth(360*random())
pd(); fd(20) # draw needle of length 20
y2 = ycor() # keep ycor of end point
if y1//20 != y2//20: # decisive test: if it is a hit then ...
hits += 1 # increase the hits counter by 1
print(2 * drops / hits)
Output samples
With 50 drops 3.225806451612903
with 200 drops 3.3057851239669422
with 1000 drops 3.1645569620253164
NOT answer to original question, if you just want the pi estimate, here's some simple code from I did in a computational revision exercise yesterday at Uni Sydney (Aust), against my early inclinations, to reduce complexity, we only modelled for a random point between zero and distance between lines and a random angle from zero to 90 degrees.
import random
from numpy import pi, sin
def buffon(L, D, N):
'''
BUFFON takes L (needle length),
D = distance between lines and N = number of drops,
returns probability of hitting line
generate random number 'd' between 0 and D
generate theta between 0 and pi/2
hit when L*sin(theta)) - d is great than D
'''
hit = 0;
for loop in range(N) :
theta = pi*random.random()/2
if L * sin(theta) > D - D*random.random(): # d = random*D
hit += 1
return hit/N
#% Prob_hit = 2*L/(D*pi) hence: Pi_est = 2*L / (P_hit*D);
L = 1
D = 4
N = int(1e8)
Pi_est = 2*L / (buffon(L,D,N)*D)
It was in MatLab, I wanted to try it in Python, see if I could use any comprehension lists, any ideas to speed this up WELCOME.
It should be noted that the Monte Carlo method is not the best for this kind of calculation (calculating the number pi). One way or another, it is necessary to throw quite a lot of needles (or points, in the case of a quarter circle) in order to get a more accurate pi. The main disadvantage of the Monte Carlo method is its unpredictability.
https://github.com/Battle-Of-Two-K/Buffon-s-Noodle-Problem
https://github.com/Battle-Of-Two-K/Calculating-Pi-by-Monte-Carlo-Method
enter image description here