Using NEAT can I get a better result for snake game? - python

I have created snake game in python and am using NEAT to create an Neural network to play the game. I have put a lot of time into playing around with the config file and fitness functions but the average fitness doesn't increase. I would really love if someone could give some advice.
I have attached the python file for the game and also the NEAT config file I used and commented the code for readability.
import os
import random
import pygame
import neat
from scipy.spatial import distance
gen = 0
snakes = []
snacks = []
rows = 20
def draw_grid(w, surface):
size_btwn = w // rows
x, y = 0, 0
for l in range(rows):
x = x + size_btwn
y = y + size_btwn
pygame.draw.line(surface, (255, 255, 255), (x, 0), (x, w))
pygame.draw.line(surface, (255, 255, 255), (0, y), (w, y))
def redraw_window(surface):
global rows, snakes, snacks
surface.fill((0, 0, 0))
for i, s1 in enumerate(snakes):
s1.draw(surface)
s1.snack.draw(surface)
draw_grid(width, surface)
pygame.display.update()
class Cube:
rows = 20
w = 500
def __init__(self, position, color=(255, 0, 0)):
self.pos = position
self.color = color
def draw(self, surface):
dis = self.w // self.rows
i = self.pos[0]
j = self.pos[1]
pygame.draw.rect(surface, self.color, (i * dis + 1, j * dis + 1, dis - 2, dis - 2))
class Snake:
def __init__(self, pos):
self.head = Cube(pos)
self.body = []
self.body.append(self.head)
self.dirnx = 0
self.dirny = 1
self.added_cube = False
self.snack = Cube(randomSnack(rows), color=(0, 255, 0))
self.time = 50
def change_dir(self, direction_x, direction_y):
self.dirnx = direction_x
self.dirny = direction_y
def move(self):
self.head = self.body[-1]
# new_x, new_y = (self.head.pos[0] + self.dirnx) % rows, (self.head.pos[1] + self.dirny) % rows # add this to stop death from wall hit
new_x, new_y = (self.head.pos[0] + self.dirnx), (self.head.pos[1] + self.dirny)
c1 = Cube([new_x, new_y])
self.body.append(c1)
if not self.added_cube:
del self.body[0]
self.added_cube = False
def add_cube(self):
self.added_cube = True
def draw(self, surface):
for i, cube in enumerate(self.body):
cube.draw(surface)
def randomSnack(rows):
x = random.randrange(rows)
y = random.randrange(rows)
return [x, y]
def check_dir_changed():
for event in pygame.event.get():
keys = pygame.key.get_pressed()
for snake in snakes:
for key in keys:
if keys[pygame.K_LEFT]:
snake.change_dir(-1, 0)
elif keys[pygame.K_RIGHT]:
snake.change_dir(1, 0)
elif keys[pygame.K_UP]:
snake.change_dir(0, -1)
elif keys[pygame.K_DOWN]:
snake.change_dir(0, 1)
def eval_genomes(genomes, config):
global width, rows, snakes, snacks, gen
gen += 1
width = 500
rows = 20
win = pygame.display.set_mode((width, width))
nets = []
snakes = []
snacks = []
ge = []
for genome_id, genome in genomes:
genome.fitness = 1 # start with fitness level of 1
net = neat.nn.FeedForwardNetwork.create(genome, config)
nets.append(net)
start_position_x, start_position_y = random.randrange(0, 20), random.randrange(0, 20)
snakes.append(Snake([start_position_x, start_position_y]))
ge.append(genome)
clock = pygame.time.Clock()
while True and len(snakes) > 0:
pygame.time.delay(50)
clock.tick(10)
check_dir_changed()
for i, snake in enumerate(snakes):
# send info and determine from network what direction to go
output = nets[snakes.index(snake)].activate(
(snake.dirnx, snake.dirny, snake.snack.pos[0], snake.snack.pos[1],
snake.head.pos[0], snake.head.pos[1], distance.euclidean(snake.snack.pos, snake.head.pos),
distance.euclidean(snake.snack.pos[0], snake.head.pos[0]),
distance.euclidean(snake.snack.pos[1], snake.head.pos[1])))
# get the right move to make
max_output = -2
best_output = 0
for j, out in enumerate(output):
if out > max_output:
max_output = out
best_output = j
# 0 is right, 1 is left, 2 is down, 3 is up
if best_output == 0:
snake.change_dir(1, 0)
elif best_output == 1:
snake.change_dir(-1, 0)
elif best_output == 2:
snake.change_dir(0, 1)
elif best_output == 3:
snake.change_dir(0, -1)
snake.move()
# take 1 from the current snake's time, this stops snakes running around forever
snake.time -= 1
# add fitness depending how close snake is to the snack
ge[snakes.index(snake)].fitness += 20 - distance.euclidean(snake.snack.pos, snake.head.pos)
# if snake head eats snack
if snake.body[-1].pos == snake.snack.pos:
ge[snakes.index(snake)].fitness += 1000
snake.time += 40 # give snake more time since they got a snack
snake.add_cube()
snake.snack = Cube(randomSnack(rows), color=(0, 255, 0))
# if snake ran out of time without getting snack
if snake.time < 1:
ge[snakes.index(snake)].fitness -= 5
nets.pop(snakes.index(snake))
ge.pop(snakes.index(snake))
snakes.pop(snakes.index(snake))
break
# if snake hits a wall
if snake.body[-1].pos[0] > 20 or snake.body[-1].pos[0] < 0 or snake.body[-1].pos[1] > 20 or snake.body[-1].pos[1] < 0:
ge[snakes.index(snake)].fitness -= 1000
nets.pop(snakes.index(snake))
ge.pop(snakes.index(snake))
snakes.pop(snakes.index(snake))
break
# if snake eats itself
for x in range(len(snake.body) - 1):
if snake.body[x].pos == snake.body[-1].pos:
ge[snakes.index(snake)].fitness -= 10
nets.pop(snakes.index(snake))
ge.pop(snakes.index(snake))
snakes.pop(snakes.index(snake))
print('Score: ', len(snake.body))
break
redraw_window(win)
pass
def run(config_file):
config = neat.config.Config(neat.DefaultGenome, neat.DefaultReproduction,
neat.DefaultSpeciesSet, neat.DefaultStagnation,
config_file)
# Create the population, which is the top-level object for a NEAT run.
p = neat.Population(config)
# Add a stdout reporter to show progress in the terminal.
p.add_reporter(neat.StdOutReporter(True))
stats = neat.StatisticsReporter()
p.add_reporter(stats)
# p.add_reporter(neat.Checkpointer(5))
# Run for up to 50 generations.
winner = p.run(eval_genomes, 100)
# show final stats
print('\nBest genome:\n{!s}'.format(winner))
if __name__ == '__main__':
local_dir = os.path.dirname(__file__)
config_path = os.path.join(local_dir, 'config-feedforward.txt')
run(config_path)
[NEAT]
fitness_criterion = max
fitness_threshold = 10000000
pop_size = 20
reset_on_extinction = True
[DefaultGenome]
# node activation options
activation_default = sigmoid
activation_mutate_rate = 0.05
activation_options = sigmoid gauss
#abs clamped cube exp gauss hat identity inv log relu sigmoid sin softplus square tanh
# node aggregation options
aggregation_default = random
aggregation_mutate_rate = 0.05
aggregation_options = sum product min max mean median maxabs
# node bias options
bias_init_mean = 0.05
bias_init_stdev = 1.0
bias_max_value = 30.0
bias_min_value = -30.0
bias_mutate_power = 0.5
bias_mutate_rate = 0.7
bias_replace_rate = 0.1
# genome compatibility options
compatibility_disjoint_coefficient = 1.0
compatibility_weight_coefficient = 0.5
# connection add/remove rates
conn_add_prob = 0.5
conn_delete_prob = 0.5
# connection enable options
enabled_default = True
enabled_mutate_rate = 0.5
feed_forward = False
#initial_connection = unconnected
initial_connection = partial_nodirect 0.5
# node add/remove rates
node_add_prob = 0.5
node_delete_prob = 0.2
# network parameters
num_hidden = 0
num_inputs = 9
num_outputs = 4
# node response options
response_init_mean = 1.0
response_init_stdev = 0.05
response_max_value = 30.0
response_min_value = -30.0
response_mutate_power = 0.1
response_mutate_rate = 0.75
response_replace_rate = 0.1
# connection weight options
weight_init_mean = 0.1
weight_init_stdev = 1.0
weight_max_value = 30
weight_min_value = -30
weight_mutate_power = 0.5
weight_mutate_rate = 0.8
weight_replace_rate = 0.1
[DefaultSpeciesSet]
compatibility_threshold = 2.5
[DefaultStagnation]
species_fitness_func = max
max_stagnation = 50
species_elitism = 0
[DefaultReproduction]
elitism = 3
survival_threshold = 0.3

Related

Neat Snake. AI training stands still, can't find the problem

I cannot solve a problem in this code and find a mistake in it.
The essence of the problem: There is a snake script, is not very intricate, it bolted Neat library. The script, one by one, in a loop for i, runs 20 snakes and I hope to evolve. I motivate them by the fact that approaching the food +1 to reward, going beyond (death) -10 reward, if you ate food +5. But with each new generation of training stands still...
import random, pygame, sys, neat
from pygame.locals import *
FPS = 25
WINDOWWIDTH = 640
WINDOWHEIGHT = 480
CELLSIZE = 20
assert WINDOWWIDTH % CELLSIZE == 0, "Window width must be a multiple of cell size."
assert WINDOWHEIGHT % CELLSIZE == 0, "Window height must be a multiple of cell size."
CELLWIDTH = int(WINDOWWIDTH / CELLSIZE)
CELLHEIGHT = int(WINDOWHEIGHT / CELLSIZE)
# R G B
WHITE = (255, 255, 255)
BLACK = ( 0, 0, 0)
RED = (255, 0, 0)
GREEN = ( 0, 255, 0)
DARKGREEN = ( 0, 155, 0)
DARKGRAY = ( 40, 40, 40)
BGCOLOR = BLACK
txt_on = 1
UP = 'up'
DOWN = 'down'
LEFT = 'left'
RIGHT = 'right'
HEAD = 0 # syntactic sugar: index of the worm's head
info_list = [0,0,0,0,0,0]
list_apple = []
score_outzero = ['0']
location = {}
was_reward_x = 0
was_reward_y = 0
generation = 0
def main_proc(genomes, config):
global FPSCLOCK, DISPLAYSURF, BASICFONT, generation
pygame.init()
FPSCLOCK = pygame.time.Clock()
DISPLAYSURF = pygame.display.set_mode((WINDOWWIDTH, WINDOWHEIGHT))
BASICFONT = pygame.font.Font('freesansbold.ttf', 18)
pygame.display.set_caption('Wormy')
nets = []
ge = [] # Змейки
for i, g in genomes:
net = neat.nn.FeedForwardNetwork.create(g, config)
nets.append(net)
g.fitness = 0
#print('NETS:', len(nets))
ge.append(g)
print(g)
generation += 1
for ka in range(len(ge)): # as long as there are live snakes on the list
k = 19 - ka # a descending variable in order to cut the snake from the list without running into an index error in line 190
wormCoords = [{'x': 15, 'y': 15},
{'x': 15 - 1, 'y': 15},
{'x': 15 - 2, 'y': 15}]
direction = RIGHT
# Start the apple in a random place.
apple = getRandomLocation(k, len(wormCoords) - 3)
savex = 0
savey = 0
rewx = []
rewy = []
print('NETS:', len(nets))
for l in range(601): # чтобы бесконечно не крутились у них 601 попытка
def minus_stata(xa):
pass
genomes[k][1].fitness -= xa
genomes.pop(k)
nets.pop(k)
for event in pygame.event.get(): # event handling loop
if event.type == QUIT:
terminate()
elif event.type == KEYDOWN:
if (event.key == K_LEFT or event.key == K_a) and direction != RIGHT:
direction = LEFT
elif (event.key == K_RIGHT or event.key == K_d) and direction != LEFT:
direction = RIGHT
elif (event.key == K_UP or event.key == K_w) and direction != DOWN:
direction = UP
elif (event.key == K_DOWN or event.key == K_s) and direction != UP:
direction = DOWN
elif event.key == K_ESCAPE:
terminate()
#print(info_list, k, len(nets))
# check if the worm has hit itself or the edge
#print(output)
count_de = ['0'] # переменная смерти
def death_check():
if wormCoords[HEAD]['x'] == -1 or wormCoords[HEAD]['x'] == CELLWIDTH or wormCoords[HEAD]['y'] == -1 or \
wormCoords[HEAD]['y'] == CELLHEIGHT:
count_de.clear()
count_de.append('1')
for wormBody in wormCoords[1:]:
if wormBody['x'] == wormCoords[HEAD]['x'] and wormBody['y'] == wormCoords[HEAD]['y']:
count_de.clear()
count_de.append('1')
#print(wormCoords[HEAD]['x'], wormCoords[HEAD]['y'], CELLWIDTH, CELLHEIGHT)
# check if worm has eaten an apply
if wormCoords[HEAD]['x'] == apple['x'] and wormCoords[HEAD]['y'] == apple['y']:
# don't remove worm's tail segment
print('Я ВЗЯЛ ЯБЛОКО ИЧКИ', len(wormCoords) - 2)
apple = getRandomLocation(k, len(wormCoords) - 2) # set a new apple somewhere
genomes[k][1].fitness += 600
rewx.clear()
rewy.clear()
else:
del wormCoords[-1] # remove worm's tail segment
w_app_x1 = int(apple['x']) - int(wormCoords[HEAD]['x'])
w_app_y1 = int(apple['y']) - int(wormCoords[HEAD]['y'])
if w_app_x1 == 0:
w_app_x1 = w_app_x1 * -1
#print(genomes[k])
genomes[k][1].fitness += 10
if w_app_y1 == 0:
w_app_y1 = w_app_y1 * -1
genomes[k][1].fitness += 10
#time.sleep(1)
def check_pos_to_reward(): # Функция выдачи награды
global was_reward_x, was_reward_y
if savex < w_app_x1:
pass
#print('отдаляюсь по Х', savex)
genomes[k][1].fitness -= 10
elif savex == w_app_x1:
#print('Без изменений Х', savex)
pass
else:
if was_reward_x == 0 and savex not in rewx:
#print('Приближаюсь Х', savex)
genomes[k][1].fitness += 40
was_reward_x += 1
was_reward_y *= 0
rewx.append(savex)
if savey < w_app_y1:
pass
#print('отдаляюсь по y', savey)
genomes[k][1].fitness -= 10
elif savey == w_app_y1:
pass
#print('Без изменений Y', savey)
else:
if was_reward_y == 0 and savey not in rewy:
#print('Приближаюсь Y', savey)
genomes[k][1].fitness += 40
rewy.append(savey)
was_reward_y += 1
was_reward_x *= 0
# print(w_app_x1, w_app_y1 )
w_app_x = int(apple['x'])
w_app_y = int(apple['y'])
#print(w_app_x1, w_app_y1, w_app_x, w_app_y)
def get_info(): # Тут добавляются значения с карты (где змейка, еда, сколько поинтоа до еды и тд и тп)
info_list.clear()
info_list1 = [wormCoords[HEAD]['x'], wormCoords[HEAD]['y'], w_app_x1, w_app_y1, w_app_x, w_app_y]
for i in range(len(info_list1)):
info_list.append(info_list1[i])
get_info()
#print(info_list, nets)
mjh = 19 - k
#print(len(nets), k)
output = nets[k].activate(info_list)
#genomes[k].fitness += 1
death_check()
if l == 600:
minus_stata(20)
break
if int(count_de[0]) == 1:
minus_stata(30)
break
if max(output) == output[0] and direction != DOWN and direction != UP:
if txt_on == 1:
pass
#genomes[k].fitness -= 10
#print('Выбираю нопку ВВЕРХ', output, action_button)
direction = UP
if l == 0:
pass
elif max(output) == output[1] and direction != UP and direction != DOWN:
if txt_on == 1:
#genomes[k].fitness -= 10
pass
#print('Выбираю нопку ВНИЗ', output, action_button)
direction = DOWN
if l == 0:
pass
check_pos_to_reward()
#get_info()
elif max(output) == output[2] and direction != LEFT and direction != RIGHT:
if txt_on == 1:
pass
#genomes[k].fitness -= 10
#print('Выбираю нопку ВПРАВО', output, action_button)
direction = RIGHT
if l == 0:
pass
check_pos_to_reward()
#get_info()
# move the worm by adding a segment in the direction it is moving
if direction == UP:
newHead = {'x': wormCoords[HEAD]['x'], 'y': wormCoords[HEAD]['y'] - 1}
elif direction == DOWN:
newHead = {'x': wormCoords[HEAD]['x'], 'y': wormCoords[HEAD]['y'] + 1}
elif direction == LEFT:
newHead = {'x': wormCoords[HEAD]['x'] - 1, 'y': wormCoords[HEAD]['y']}
elif direction == RIGHT:
newHead = {'x': wormCoords[HEAD]['x'] + 1, 'y': wormCoords[HEAD]['y']}
wormCoords.insert(0, newHead)
check_pos_to_reward()
savex = w_app_x1
savey = w_app_y1
DISPLAYSURF.fill(BGCOLOR)
drawGrid()
drawWorm(wormCoords)
drawApple(apple)
drawScore(len(wormCoords) - 3, generation, k)
pygame.display.update()
FPSCLOCK.tick(FPS)
print(len(genomes))
#minus_stata()
def drawPressKeyMsg():
pressKeySurf = BASICFONT.render('Press a key to play.', True, DARKGRAY)
pressKeyRect = pressKeySurf.get_rect()
pressKeyRect.topleft = (WINDOWWIDTH - 200, WINDOWHEIGHT - 30)
DISPLAYSURF.blit(pressKeySurf, pressKeyRect)
def checkForKeyPress():
if len(pygame.event.get(QUIT)) > 0:
terminate()
keyUpEvents = pygame.event.get(KEYUP)
if len(keyUpEvents) == 0:
return None
if keyUpEvents[0].key == K_ESCAPE:
terminate()
return keyUpEvents[0].key
def showStartScreen():
titleFont = pygame.font.Font('freesansbold.ttf', 100)
titleSurf1 = titleFont.render('Wormy!', True, WHITE, DARKGREEN)
titleSurf2 = titleFont.render('Wormy!', True, GREEN)
degrees1 = 0
degrees2 = 0
while True:
DISPLAYSURF.fill(BGCOLOR)
rotatedSurf1 = pygame.transform.rotate(titleSurf1, degrees1)
rotatedRect1 = rotatedSurf1.get_rect()
rotatedRect1.center = (WINDOWWIDTH / 2, WINDOWHEIGHT / 2)
DISPLAYSURF.blit(rotatedSurf1, rotatedRect1)
rotatedSurf2 = pygame.transform.rotate(titleSurf2, degrees2)
rotatedRect2 = rotatedSurf2.get_rect()
rotatedRect2.center = (WINDOWWIDTH / 2, WINDOWHEIGHT / 2)
DISPLAYSURF.blit(rotatedSurf2, rotatedRect2)
drawPressKeyMsg()
if checkForKeyPress():
pygame.event.get() # clear event queue
return
pygame.display.update()
FPSCLOCK.tick(FPS)
degrees1 += 3 # rotate by 3 degrees each frame
degrees2 += 7 # rotate by 7 degrees each frame
def terminate():
pygame.quit()
sys.exit()
score_count = 0
xrandom = []
yrandom = []
start_game = 0
def getRandomLocation(i, b,): # Случайное появление еды, раз в генерацию
global score_count, start_game
#m = {'x': random.randint(0, CELLWIDTH - 1), 'y': random.randint(0, CELLHEIGHT - 1)}
#return {'x': random.randint(0, CELLWIDTH - 1), 'y': random.randint(0, CELLHEIGHT - 1)}
#print('MAX', score_count, 'СЧЕТ: ',b)
#if True:
#if start_game == 0 or score_count < b:
print(i)
if i == 19:
print('СМЕНА КООРДИНАТ ЕДЫ')
xrandom.insert(b, random.randint(0, CELLWIDTH - 1))
yrandom.insert(b, random.randint(0, CELLHEIGHT - 1))
if score_count < b:
score_count += 1
xrandom.insert(b, random.randint(0, CELLWIDTH - 1))
yrandom.insert(b, random.randint(0, CELLHEIGHT - 1))
m = {'x': xrandom[b], 'y': yrandom[b]}
location.clear()
location.update(m)
#print(i,location)
return location
def showGameOverScreen():
gameOverFont = pygame.font.Font('freesansbold.ttf', 150)
gameSurf = gameOverFont.render('Game', True, WHITE)
overSurf = gameOverFont.render('Over', True, WHITE)
gameRect = gameSurf.get_rect()
overRect = overSurf.get_rect()
gameRect.midtop = (WINDOWWIDTH / 2, 10)
overRect.midtop = (WINDOWWIDTH / 2, gameRect.height + 10 + 25)
DISPLAYSURF.blit(gameSurf, gameRect)
DISPLAYSURF.blit(overSurf, overRect)
drawPressKeyMsg()
pygame.display.update()
pygame.time.wait(500)
#checkForKeyPress() # clear out any key presses in the event queue
while True:
if checkForKeyPress():
pygame.event.get() # clear event queue
return
def drawScore(score, generation, k):
scoreSurf = BASICFONT.render('Score: %s' % (score), True, WHITE)
scoreRect = scoreSurf.get_rect()
scoreRect.topleft = (WINDOWWIDTH - 120, 10)
DISPLAYSURF.blit(scoreSurf, scoreRect)
gener = BASICFONT.render('WORMS LEFT: %s' % k, True, WHITE)
gener1 = gener.get_rect()
gener1.topright = (WINDOWWIDTH - 220, 10)
DISPLAYSURF.blit(gener, gener1)
gener3 = BASICFONT.render('GENERATION: %s' % (generation), True, WHITE)
gener4 = gener3.get_rect()
gener4.topright = (WINDOWWIDTH - 420, 10)
DISPLAYSURF.blit(gener3, gener4)
def drawWorm(wormCoords):
for coord in wormCoords:
x = coord['x'] * CELLSIZE
y = coord['y'] * CELLSIZE
wormSegmentRect = pygame.Rect(x, y, CELLSIZE, CELLSIZE)
pygame.draw.rect(DISPLAYSURF, DARKGREEN, wormSegmentRect)
wormInnerSegmentRect = pygame.Rect(x + 4, y + 4, CELLSIZE - 8, CELLSIZE - 8)
pygame.draw.rect(DISPLAYSURF, GREEN, wormInnerSegmentRect)
def drawApple(coord):
x = coord['x'] * CELLSIZE
y = coord['y'] * CELLSIZE
appleRect = pygame.Rect(x, y, CELLSIZE, CELLSIZE)
pygame.draw.rect(DISPLAYSURF, WHITE, appleRect)
def drawGrid():
for x in range(0, WINDOWWIDTH, CELLSIZE): # draw vertical lines
pygame.draw.line(DISPLAYSURF, DARKGRAY, (x, 0), (x, WINDOWHEIGHT))
for y in range(0, WINDOWHEIGHT, CELLSIZE): # draw horizontal lines
pygame.draw.line(DISPLAYSURF, DARKGRAY, (0, y), (WINDOWWIDTH, y))
if __name__ == '__main__':
config_path = "./config-feedforward.txt"
config = neat.config.Config(neat.DefaultGenome, neat.DefaultReproduction, neat.DefaultSpeciesSet,
neat.DefaultStagnation, config_path)
# init NEAT
p = neat.Population(config)
p.add_reporter(neat.StdOutReporter(True))
# run NEAT
p.run(main_proc, 100)
#main()
[NEAT]
fitness_criterion = max
fitness_threshold = 10000
pop_size = 20
reset_on_extinction = False
[DefaultGenome]
# node activation options
activation_default = tanh
activation_mutate_rate = 0.01
activation_options = tanh
# node aggregation options
aggregation_default = sum
aggregation_mutate_rate = 0.01
aggregation_options = sum
# node bias options
bias_init_mean = 0.0
bias_init_stdev = 1.0
bias_max_value = 30.0
bias_min_value = -30.0
bias_mutate_power = 0.5
bias_mutate_rate = 0.7
bias_replace_rate = 0.1
# genome compatibility options
compatibility_disjoint_coefficient = 1.0
compatibility_weight_coefficient = 0.5
# connection add/remove rates
conn_add_prob = 0.5
conn_delete_prob = 0.5
# connection enable options
enabled_default = True
enabled_mutate_rate = 0.01
feed_forward = True
initial_connection = full
# node add/remove rates
node_add_prob = 0.2
node_delete_prob = 0.2
# network parameters
num_hidden = 0
num_inputs = 6
num_outputs = 4
# node response options
response_init_mean = 1.0
response_init_stdev = 0.0
response_max_value = 30.0
response_min_value = -30.0
response_mutate_power = 0.0
response_mutate_rate = 0.0
response_replace_rate = 0.0
# connection weight options
weight_init_mean = 0.0
weight_init_stdev = 1.0
weight_max_value = 30
weight_min_value = -30
weight_mutate_power = 0.5
weight_mutate_rate = 0.8
weight_replace_rate = 0.1
[DefaultSpeciesSet]
compatibility_threshold = 3.0
[DefaultStagnation]
species_fitness_func = max
max_stagnation = 20
species_elitism = 2
[DefaultReproduction]
elitism = 3
survival_threshold = 0.2
Config File NEAT ./config-feedforward.txt
Still the same snake does not respond to:
Going out of the wall is death and should be feared.
Taking food does not motivate the next generation to strive for it
Approaching the food, does not motivate the next generation tends to move towards it.
I read that I may not be motivating the snake correctly. BUT... I have changed the values of the reward, I have removed the motivation of approaching the food and I have also removed and all motivation at all, except eating food - all to no avail. I am inclined to the fact that in the list of generation is not happening model addition of experience and the subsequent evolution, but I can not find where I made this mistake.

How can I connect two points with a series of circles?

I am trying to make realistic water in pygame:
This is till now my code:
from random import randint
import pygame
WIDTH = 700
HEIGHT = 500
win = pygame.display.set_mode((WIDTH, HEIGHT))
WHITE = (255, 255, 255)
BLACK = (0, 0, 0)
AQUA = 'aqua'
RADIUS = 1
x, y = 0, HEIGHT//2
K = 1
FORCE = 100
VELOCITY = 0.5
run = True
class Molecule:
def __init__(self, x, y, radius, force, k):
self.x = x
self.y = y
self.radius = radius
self.force = force
self.k = k
self.max_amplitude = y + force/k
self.min_amplitude = y - force/k
self.up = False
self.down = True
self.restore = False
def draw(self, win):
pygame.draw.circle(win, BLACK, (self.x, self.y), self.radius)
def oscillate(self):
if self.y <= self.max_amplitude and self.down == True:
self.y += VELOCITY
if self.y == self.max_amplitude or self.up:
self.up = True
self.down = False
self.y -= VELOCITY
if self.y == self.min_amplitude:
self.up = False
self.down = True
molecules = []
for i in range(100):
FORCE = randint(10, 20)
molecules.append(Molecule(x, y, RADIUS, FORCE, K))
x += 10
while run:
for event in pygame.event.get():
if event.type == pygame.QUIT:
run = False
win.fill(WHITE)
for molecule in molecules:
molecule.draw(win)
molecule.oscillate()
for i in range(len(molecules)):
try:
pygame.draw.line(win, BLACK, (molecules[i].x, molecules[i].y), (molecules[i+1].x, molecules[i+1].y))
pygame.draw.line(win, AQUA, (molecules[i].x, molecules[i].y), (molecules[i+1].x, HEIGHT))
except:
pass
pygame.display.flip()
pygame.quit()
But as may expected the water curve is not smooth:
Look at it:
Sample Img1
I want to connect the two randomly added wave points using a set of circles not line like in this one so that a smooth curve could occur.
And in this way i could add the water color to it such that it will draw aqua lines or my desired color line from the point to the end of screen and all this will end up with smooth water flowing simulation.
Now the question is how could i make the points connect together smoothly into a smooth curve by drawing point circles at relative points?
I suggest sticking the segments with a Bézier curves. Bézier curves can be drawn with pygame.gfxdraw.bezier
Calculate the slopes of the tangents to the points along the wavy waterline:
ts = []
for i in range(len(molecules)):
pa = molecules[max(0, i-1)]
pb = molecules[min(len(molecules)-1, i+1)]
ts.append((pb.y-pa.y) / (pb.x-pa.x))
Use the the tangents to define 4 control points for each segment and draw the curve with pygame.gfxdraw.bezier:
for i in range(len(molecules)-1):
p0 = molecules[i].x, molecules[i].y
p3 = molecules[i+1].x, molecules[i+1].y
p1 = p0[0] + 10, p0[1] + 10 * ts[i]
p2 = p3[0] - 10, p3[1] - 10 * ts[i+1]
pygame.gfxdraw.bezier(win, [p0, p1, p2, p3], 4, BLACK)
Complete example:
from random import randint
import pygame
import pygame.gfxdraw
WIDTH = 700
HEIGHT = 500
win = pygame.display.set_mode((WIDTH, HEIGHT))
WHITE = (255, 255, 255)
BLACK = (0, 0, 0)
AQUA = 'aqua'
RADIUS = 1
x, y = 0, HEIGHT//2
K = 1
FORCE = 100
VELOCITY = 0.5
class Molecule:
def __init__(self, x, y, radius, force, k):
self.x = x
self.y = y
self.radius = radius
self.force = force
self.k = k
self.max_amplitude = y + force/k
self.min_amplitude = y - force/k
self.up = False
self.down = True
self.restore = False
def draw(self, win):
pygame.draw.circle(win, BLACK, (self.x, self.y), self.radius)
def oscillate(self):
if self.y <= self.max_amplitude and self.down == True:
self.y += VELOCITY
if self.y == self.max_amplitude or self.up:
self.up = True
self.down = False
self.y -= VELOCITY
if self.y == self.min_amplitude:
self.up = False
self.down = True
molecules = []
for i in range(50):
FORCE = randint(10, 20)
molecules.append(Molecule(x, y, RADIUS, FORCE, K))
x += 20
clock = pygame.time.Clock()
run = True
while run:
clock.tick(100)
for event in pygame.event.get():
if event.type == pygame.QUIT:
run = False
win.fill(WHITE)
for molecule in molecules:
molecule.draw(win)
molecule.oscillate()
ts = []
for i in range(len(molecules)):
pa = molecules[max(0, i-1)]
pb = molecules[min(len(molecules)-1, i+1)]
ts.append((pb.y-pa.y) / (pb.x-pa.x))
for i in range(len(molecules)-1):
p0 = molecules[i].x, molecules[i].y
p3 = molecules[i+1].x, molecules[i+1].y
p1 = p0[0] + 10, p0[1] + 10 * ts[i]
p2 = p3[0] - 10, p3[1] - 10 * ts[i+1]
pygame.gfxdraw.bezier(win, [p0, p1, p2, p3], 4, BLACK)
for i in range(len(molecules)-1):
pygame.draw.line(win, AQUA, (molecules[i].x, molecules[i].y), (molecules[i].x, HEIGHT))
pygame.display.flip()
pygame.quit()
If you want to "fill" the water, you must calculate the points along the Bézier line and draw a filled polygon. How to calculate a Bézier curve is explained in Trying to make a Bezier Curve on PyGame library How Can I Make a Thicker Bezier in Pygame? and "X". You can use the following function:
def ptOnCurve(b, t):
q = b.copy()
for k in range(1, len(b)):
for i in range(len(b) - k):
q[i] = (1-t) * q[i][0] + t * q[i+1][0], (1-t) * q[i][1] + t * q[i+1][1]
return round(q[0][0]), round(q[0][1])
def bezier(b, samples):
return [ptOnCurve(b, i/samples) for i in range(samples+1)]
Use the bezier to stitch the wavy water polygon:
ts = []
for i in range(len(molecules)):
pa = molecules[max(0, i-1)]
pb = molecules[min(len(molecules)-1, i+1)]
ts.append((pb.y-pa.y) / (pb.x-pa.x))
pts = [(WIDTH, HEIGHT), (0, HEIGHT)]
for i in range(len(molecules)-1):
p0 = molecules[i].x, molecules[i].y
p3 = molecules[i+1].x, molecules[i+1].y
p1 = p0[0] + 10, p0[1] + 10 * ts[i]
p2 = p3[0] - 10, p3[1] - 10 * ts[i+1]
pts += bezier([p0, p1, p2, p3], 4)
Draw the polygon with pygame.draw.polygon():
pygame.draw.polygon(win, AQUA, pts)
Complete example:
from random import randint
import pygame
class Node:
def __init__(self, x, y, force, k, v):
self.x = x
self.y = y
self.y0 = y
self.force = force
self.k = k
self.v = v
self.direction = 1
def oscillate(self):
self.y += self.v * self.direction
if self.y0 - self.force / self.k > self.y or self.y0 + self.force / self.k < self.y:
self.direction *= -1
def draw(self, surf):
pygame.draw.circle(surf, "black", (self.x, self.y), 3)
window = pygame.display.set_mode((700, 500))
clock = pygame.time.Clock()
width, height = window.get_size()
no_of_nodes = 25
dx = width / no_of_nodes
nodes = [Node(i*dx, height//2, randint(15, 30), 1, 0.5) for i in range(no_of_nodes+1)]
def ptOnCurve(b, t):
q = b.copy()
for k in range(1, len(b)):
for i in range(len(b) - k):
q[i] = (1-t) * q[i][0] + t * q[i+1][0], (1-t) * q[i][1] + t * q[i+1][1]
return round(q[0][0]), round(q[0][1])
def bezier(b, samples):
return [ptOnCurve(b, i/samples) for i in range(samples+1)]
run = True
while run:
clock.tick(100)
for event in pygame.event.get():
if event.type == pygame.QUIT:
run = False
for molecule in nodes:
molecule.oscillate()
ts = []
for i in range(len(nodes)):
pa = nodes[max(0, i-1)]
pb = nodes[min(len(nodes)-1, i+1)]
ts.append((pb.y-pa.y) / (pb.x-pa.x))
pts = [(width, height), (0, height)]
for i in range(len(nodes)-1):
p0 = nodes[i].x, nodes[i].y
p3 = nodes[i+1].x, nodes[i+1].y
p1 = p0[0] + 10, p0[1] + 10 * ts[i]
p2 = p3[0] - 10, p3[1] - 10 * ts[i+1]
pts += bezier([p0, p1, p2, p3], 4)
window.fill("white")
pygame.draw.polygon(window, 'aqua', pts)
for molecule in nodes:
molecule.draw(window)
pygame.display.flip()
pygame.quit()
exit()

python NEAT neural network not evolving

I've been trying to programm a "game" where drones collect points floating around using a neural network with NEAT. I've tried tinkering a bit with the config file, but the drones just don't seem to evolve...
Here's the main.py:
import pygame, sys, time, random, math, neat, os
from pygame.locals import *
pygame.init()
pygame.display.set_caption("Drones")
SCREEN_WIDTH = 1100
SCREEN_HEIGHT = 600
SCREEN = pygame.display.set_mode((SCREEN_WIDTH,SCREEN_HEIGHT))
clock = pygame.time.Clock()
scoreFont = pygame.font.Font(None,50)
statFont = pygame.font.Font(None,20)
fireImg = pygame.transform.scale(pygame.image.load("images/Fire.png").convert_alpha(), [35,120])
spaceshipImg = pygame.transform.scale(pygame.image.load("images/Spaceship.png").convert_alpha(), [55,55])
pointImg = pygame.transform.scale(pygame.image.load("images/Spaceship.png").convert_alpha(), [55,55])
FONT = pygame.font.Font(None, 20)
dragCoefficientX = 0.0075
dragCoefficientY = 0.003
gravityForce = -0.2/2
accVerticalForce = 0.5/4
accHorizontalForce = 0.02/2
fps = 150
BestFitness = 0
def GenerateRandomPositionList():
randomPositionList = []
for i in range(0,1000):
n = (random.randint(100, SCREEN_WIDTH - 100), random.randint(100, SCREEN_HEIGHT - 100) )
randomPositionList.append(n)
return randomPositionList
posList = GenerateRandomPositionList()
def statistics():
global Spaceships, BestFitness
for i, genome in enumerate(ge):
if genome.fitness > BestFitness:
BestFitness = genome.fitness
text_1 = FONT.render(f'Spaceships Alive: {str(len(Spaceships))}', True, (0, 0, 0))
text_2 = FONT.render(f'Generation: {pop.generation+1}', True, (0, 0, 0))
text_4 = FONT.render(f'Best Fitness: {str(BestFitness)}', True, (0, 0, 0))
SCREEN.blit(text_1, (50, 450))
SCREEN.blit(text_2, (50, 480))
SCREEN.blit(text_4, (50, 510))
def distance(pos_a, pos_b):
dx = pos_a[0] - pos_b[0]
dy = pos_a[1] - pos_b[1]
return math.sqrt(dx**2 + dy**2)
def blitRotateCenter(surf, image, angle, pos):
rotated_image = pygame.transform.rotate(image, angle)
new_rect = rotated_image.get_rect(center = image.get_rect(topleft = pos).center)
new_rect.center = pos
surf.blit(rotated_image, new_rect)
class Spaceship:
#Initiall screen position
X_POS = SCREEN_WIDTH//2
Y_POS = SCREEN_HEIGHT//2
def __init__(self, img = spaceshipImg, fireImg = fireImg):
self.image = img
self.fireImg = fireImg
self.rightPressed = False
self.leftPressed = False
self.upPressed = False
self.biasLeft = 0
self.biasRight = 0
self.biasUp = 0
self.rect = pygame.Rect(self.X_POS, self.Y_POS, img.get_width(), img.get_height())
self.points = 0
self.accX = 0
self.accY = 0
self.velX = 0
self.velY = 0
def Update(self):
#Check keypresses and set directionbiases
self.biasLeft = -1 if self.leftPressed else 0
self.biasRight = 1 if self.rightPressed else 0
self.biasUp = -1 if self.upPressed else 0
#Handle physics
self.accX += ((self.biasLeft + self.biasRight) * accHorizontalForce)
self.accY += (self.biasUp * accVerticalForce) - gravityForce
self.velX += self.accX
self.velY += self.accY
self.rect.x += self.velX
self.rect.y += self.velY
def Draw(self, SCREEN):
#Draw potential thrusterfires
if self.leftPressed:
blitRotateCenter(SCREEN, self.fireImg, -90, [self.rect.x, self.rect.y])
if self.rightPressed:
blitRotateCenter(SCREEN, self.fireImg, 90, [self.rect.x, self.rect.y])
if self.upPressed:
blitRotateCenter(SCREEN, self.fireImg, 180, [self.rect.x, self.rect.y])
#Draw Spaceship
blitRotateCenter(SCREEN, self.image, 0, [self.rect.x, self.rect.y])
class Point:
def __init__(self, img = pointImg, randPosList = posList):
self.posIndex = 0
self.randPosList = randPosList
self.image = img
self.rect = pygame.Rect(self.randPosList[self.posIndex][0], self.randPosList[self.posIndex][1], img.get_width(), img.get_height())
self.color = (random.randint(0,255), random.randint(0,255), random.randint(0,255))
self.pointsTaken = 0
def updatePos(self):
self.posIndex += 1
self.pointsTaken += 1
self.rect.x = self.randPosList[self.posIndex][0]
self.rect.y = self.randPosList[self.posIndex][1]
def draw(self, SCREEN):
pygame.draw.circle(SCREEN, self.color, [self.rect.x, self.rect.y], 20)
def eval_genomes(genomes, config):
global Points, Spaceships, ge, nets, fps
timer = 0
timeSinceStart = time.time()
clock = pygame.time.Clock()
Points = []
Spaceships = []
ge = []
nets = []
for genome_id, genome in genomes:
Spaceships.append(Spaceship())
Points.append(Point())
ge.append(genome)
net = neat.nn.FeedForwardNetwork.create(genome, config)
nets.append(net)
genome.fitness = 1
run = True
while run:
MousePos = pygame.mouse.get_pos()
clock.tick(fps)
for event in pygame.event.get():
if event.type == pygame.QUIT:
pygame.quit()
sys.exit()
if event.type == pygame.KEYDOWN:
if event.key == pygame.K_o:
fps = 150
if event.type == pygame.KEYDOWN:
if event.key == pygame.K_p:
fps = 6
timer += 1
SCREEN.fill((255,255,255))
if len(Spaceships) == 0 or time.time() - timeSinceStart > 10:
break
for spaceship in Spaceships:
spaceship.Update()
spaceship.Draw(SCREEN)
for point in Points:
point.draw(SCREEN)
for i, spaceship in enumerate(Spaceships):
pygame.draw.line(SCREEN, Points[i].color, (Points[i].rect.x, Points[i].rect.y), (Spaceships[i].rect.x, Spaceships[i].rect.y), 3)
if distance((Points[i].rect.x, Points[i].rect.y), (Spaceships[i].rect.x, Spaceships[i].rect.y)) < 60:
Points[i].updatePos()
ge[i].fitness += Points[i].pointsTaken*50
inBounds = pygame.Rect(0, 0, SCREEN_WIDTH, SCREEN_HEIGHT).collidepoint(Spaceships[i].rect.x, Spaceships[i].rect.y)
if inBounds != True:
ge[i].fitness -= 1
Spaceships.pop(i)
ge.pop(i)
Points.pop(i)
for i, spaceship in enumerate(Spaceships):
inputs = [
Spaceships[i].velX, Spaceships[i].velY,
Points[i].rect.x - Spaceships[i].rect.x, Points[i].rect.y - Spaceships[i].rect.y,
Spaceships[i].rect.x, Spaceships[i].rect.y]
#Store outputs
output = nets[i].activate(inputs)
#outputs
Spaceships[i].leftPressed = True if output[0] < 0.5 else False
Spaceships[i].rightPressed = True if output[1] < 0.5 else False
Spaceships[i].upPressed = True if output[2] < 0.5 else False
statistics()
pygame.display.update()
def run(config_path):
global pop
config = neat.config.Config(
neat.DefaultGenome,
neat.DefaultReproduction,
neat.DefaultSpeciesSet,
neat.DefaultStagnation,
config_path
)
pop = neat.Population(config)
stats = neat.StatisticsReporter()
pop.add_reporter(stats)
pop.run(eval_genomes, n=None)
if __name__ == '__main__':
local_dir = os.path.dirname(__file__)
config_path = os.path.join(local_dir, 'config.txt')
run(config_path)
And here's the config.txt file:
[NEAT]
fitness_criterion = max
fitness_threshold = 100000
pop_size = 50
reset_on_extinction = False
[DefaultGenome]
# node activation options
activation_default = sigmoid
activation_mutate_rate = 0.0
activation_options = sigmoid
# node aggregation options
aggregation_default = sum
aggregation_mutate_rate = 0.0
aggregation_options = sum
# node bias options
bias_init_mean = 0.0
bias_init_stdev = 1.0
bias_max_value = 30.0
bias_min_value = -30.0
bias_mutate_power = 0.5
bias_mutate_rate = 0.7
bias_replace_rate = 0.1
# genome compatibility options
compatibility_disjoint_coefficient = 1.0
compatibility_weight_coefficient = 0.5
# connection add/remove rates
conn_add_prob = 0.5
conn_delete_prob = 0.5
# connection enable options
enabled_default = True
enabled_mutate_rate = 0.01
feed_forward = True
initial_connection = full_nodirect
# node add/remove rates
node_add_prob = 0.2
node_delete_prob = 0.2
# network parameters
num_hidden = 5
num_inputs = 6
num_outputs = 3
# node response options
response_init_mean = 1.0
response_init_stdev = 0.0
response_max_value = 30.0
response_min_value = -30.0
response_mutate_power = 0.0
response_mutate_rate = 0.0
response_replace_rate = 0.0
# connection weight options
weight_init_mean = 0.0
weight_init_stdev = 1.0
weight_max_value = 30
weight_min_value = -30
weight_mutate_power = 0.5
weight_mutate_rate = 0.8
weight_replace_rate = 0.1
[DefaultSpeciesSet]
compatibility_threshold = 3.0
[DefaultStagnation]
species_fitness_func = max
max_stagnation = 20
species_elitism = 2
[DefaultReproduction]
elitism = 2
survival_threshold = 0.2
I guess i just want to know what i'm doing wrong, since nothing appears to be "evolving" when i run the program...
(NEAT v0.92 and python v3.9.2 btw)
link to the github repo is here aswell

Python treating all instances of an object as the same

I'm making a game with pygame and pymunk as a physics engine. I'm trying to kill a bullet whenever it hits a player or goes past its lifetime.
When I tried to space.remove(self.shape) and the second bullet hits the player, it gives me an "AssertionError: shape not in space, already removed. I simply changed it to teleport the bullets away, and then learned of the real error.
When I have more than one bullet in the space and a bullet hits the enemy player, all the current bullets teleport away, which means that when I tried to remove one bullet, it called the remove on all the bullets and thats why I had the initial error.
However the problem still remains that one bullet is being treated as every bullet.
Why is something that should be a non-static variable being called as a static variable?
I even tried to use deepcopy to see if that fixed it, but to no avail
This is my chunk of code, apologies since I don't know what is needed to understand it.
The key parts are most likely the Bullet class, the shoot() function in the Player class, and the drawBulletCollision() function
# PyGame template.
# Import modules.
import sys, random, math, time, copy
from typing import List
import pygame
from pygame.locals import *
from pygame import mixer
import pymunk
import pymunk.pygame_util
from pymunk.shapes import Segment
from pymunk.vec2d import Vec2d
pygame.mixer.pre_init(44110, -16, 2, 512)
mixer.init()
# Set up the window.
width, height = 1440, 640
screen = pygame.display.set_mode((width, height))
bg = pygame.image.load("space.png")
def draw_bg():
screen.blit(bg, (0, 0))
#load sounds
#death_fx = pygame.mixer.Sound("")
#death_fx.set_volume(0.25)
shoot_fx = mixer.Sound("shot.wav")
shoot_fx.set_volume(0.25)
#mixer.music.load("video.mp3")
#mixer.music.play()
#time.sleep(2)
#mixer.music.stop()
#gun_mode_fx = pygame.mixer.Sound("")
#gun_mode_fx.set_volume(0.25)
#thrust_mode_fx = pygame.mixer.Sound("")
#thrust_mode_fx.set_volume(0.25)
collision_fx = mixer.Sound("thump.wav")
collision_fx.set_volume(0.25)
ship_group = pygame.sprite.Group()
space = pymunk.Space()
space.gravity = 0, 0
space.damping = 0.6
draw_options = pymunk.pygame_util.DrawOptions(screen)
bulletList = []
playerList = []
environmentList = []
arbiterList = []
b0 = space.static_body
segmentBot = pymunk.Segment(b0, (0,height), (width, height), 4)
segmentTop = pymunk.Segment(b0, (0,0), (width, 0), 4)
segmentLef = pymunk.Segment(b0, (width,0), (width, height), 4)
segmentRit = pymunk.Segment(b0, (0,0), (0, height), 4)
walls = [segmentBot,segmentLef,segmentRit,segmentTop]
for i in walls:
i.elasticity = 1
i.friction = 0.5
i.color = (255,255,255,255)
environmentList.append(i)
class Player(object):
radius = 30
def __init__(self, position, space, color):
self.body = pymunk.Body(mass=5,moment=10)
self.mode = 0 # 0 is gun, 1 is thrust, ? 2 is shield
self.body.position = position
self.shape = pymunk.Circle(self.body, radius = self.radius)
#self.image
#self.shape.friction = 0.9
self.shape.elasticity= 0.2
space.add(self.body,self.shape)
self.angleGun = 0
self.angleThrust = 0
self.health = 100
self.speed = 500
self.gearAngle = 0
self.turningSpeed = 5
self.shape.body.damping = 1000
self.cooldown = 0
self.fireRate = 30
self.shape.collision_type = 1
self.shape.color = color
playerList.append(self)
def force(self,force):
self.shape.body.apply_force_at_local_point(force,(0,0))
def rocketForce(self):
radians = self.angleThrust * math.pi/180
self.shape.body.apply_force_at_local_point((-self.speed * math.cos(radians),-self.speed * math.sin(radians)),(0,0))
def draw(self):
gear = pygame.image.load("gear.png")
gearBox = gear.get_rect(center=self.shape.body.position)
gearRotated = pygame.transform.rotate(gear, self.gearAngle)
#gearRotated.rect.center=self.shape.body.position
x,y = self.shape.body.position
radianGun = self.angleGun * math.pi/180
radianThrust = self.angleThrust * math.pi/180
radiyus = 30 *(100-self.health)/100
screen.blit(gearRotated,gearBox)
self.gearAngle += 1
if radiyus == 30:
radiyus = 32
pygame.draw.circle(screen,self.shape.color,self.shape.body.position,radiyus,0)
pygame.draw.circle(screen,(0,0,0),self.shape.body.position,radiyus,0)
pygame.draw.line(
screen,(0,255,0),
(self.radius * math.cos(radianGun) * 1.5 + x,self.radius * math.sin(radianGun) * 1.5 + y),
(x,y), 5
)
pygame.draw.line(
screen,(200,200,0),
(self.radius * math.cos(radianThrust) * 1.5 + x,self.radius * math.sin(radianThrust) * 1.5 + y),
(x,y), 5
)
#more
def targetAngleGun(self,tAngle):
tempTAngle = tAngle - self.angleGun
tempTAngle = tempTAngle % 360
if(tempTAngle < 180 and not tempTAngle == 0):
self.angleGun -= self.turningSpeed
elif(tempTAngle >= 180 and not tempTAngle == 0):
self.angleGun += self.turningSpeed
self.angleGun = self.angleGun % 360
#print(tAngle, "target Angle")
#print(self.angleGun, "selfangleGun")
#print(tempTAngle, "tempTAngle")
def targetAngleThrust(self,tAngle):
tempTAngle = tAngle - self.angleThrust
tempTAngle = tempTAngle % 360
if(tempTAngle < 180 and not tempTAngle == 0):
self.angleThrust -= self.turningSpeed
elif(tempTAngle >= 180 and not tempTAngle == 0):
self.angleThrust += self.turningSpeed
self.angleThrust = self.angleThrust % 360
#print(tAngle, "target Angle")
#print(self.angleThrust, "selfangleGun")
#print(tempTAngle, "tempTAngle")
def targetAngle(self,tAngle):
if(self.mode == 0):
self.targetAngleGun(tAngle)
elif(self.mode == 1):
self.targetAngleThrust(tAngle)
def shoot(self):
if(self.cooldown == self.fireRate):
x,y = self.shape.body.position
radianGun = self.angleGun * math.pi/180
spawnSpot = (self.radius * math.cos(radianGun) * 1.5 + x,self.radius * math.sin(radianGun)*1.5+y)
self.shape.body.apply_impulse_at_local_point((-20 * math.cos(radianGun),-20 * math.sin(radianGun)),(0,0))
print(spawnSpot)
bT = Bullet(spawnSpot, 5, 50,self.shape.color)
b = copy.deepcopy(bT)
bulletList.append(b)
space.add(b.shape,b.shape.body)
b.getShot(self.angleGun)
self.cooldown = 0
print('pew')
shoot_fx.play()
# HEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEREEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEE
def tick(self):
self.draw()
if(self.cooldown < self.fireRate):
self.cooldown += 1
#for o in playerList:
# c = self.shape.shapes_collide(o.shape)
# if(len(c.points)>0):
# self.damage(c.points[0].distance/10)
for o in bulletList:
c = self.shape.shapes_collide(o.shape)
#print(c)
for o in walls:
c = self.shape.shapes_collide(o)
if(len(c.points)>0):
self.damage(c.points[0].distance * 3)
def damage(self, damage):
self.health -= abs(damage)
if self.health < 0:
self.health = 0
#maybe make it part of the player class
def drawWallCollision(arbiter, space, data):
for c in arbiter.contact_point_set.points:
r = max(3, abs(c.distance * 5))
r = int(r)
p = tuple(map(int, c.point_a))
pygame.draw.circle(data["surface"], pygame.Color("red"), p, r, 0)
print('magnitude', math.sqrt(arbiter.total_impulse[0]**2 + arbiter.total_impulse[1]**2))
#print('position', p)
#print(data)
print("its all arbitrary")
s1, s2 = arbiter.shapes
collision_fx.play()
def drawBulletCollision(arbiter, space, data):
s1, s2 = arbiter.shapes
for c in arbiter.contact_point_set.points:
magnitude = math.sqrt(arbiter.total_impulse[0]**2 + arbiter.total_impulse[1]**2)
for p in playerList:
avr = ((c.point_a[0] + c.point_b[0])/2, (c.point_a[1] + c.point_b[1])/2)
distance = (math.sqrt((avr[0] - p.shape.body.position[0]) **2 + (avr[1] - p.shape.body.position[1]) **2 ))
if(distance < Bullet.explosionRadius + Player.radius):
if not(s1.color == s2.color):
p.damage(magnitude)
for b in bulletList:
avr = ((c.point_a[0] + c.point_b[0])/2, (c.point_a[1] + c.point_b[1])/2)
distance = (math.sqrt((avr[0] - p.shape.body.position[0]) **2 + (avr[1] - p.shape.body.position[1]) **2 ))
if(distance < Bullet.explosionRadius + Player.radius):
if not(s1.color == s2.color):
b.damage(magnitude)
pygame.draw.circle(data["surface"], pygame.Color("red"), tuple(map(int, c.point_a)), 10, 0)
print('magnitude', magnitude)
#print('position', p)
#print(data)
print("its all arbitrary")
def drawArbitraryCollision(arbiter, space, data):
collision_fx.play()
class Ship(pygame.sprite.Sprite):
def __init__(self, x, y):
pygame.sprite.Sprite.__init__(self)
self.image = pygame.image.load("gear.png")
self.rect = self.image.get_rect()
self.rect.center = [x, y]
def rotate(self):
self.image = pygame.transform.rotate(self.image,1)
class Bullet(object):
damage = 2
explosionRadius = 5
def __init__(self, position, size, speed,color):
pts = [(-size, -size), (size, -size), (size, size), (-size, size)]
self.body = copy.deepcopy(pymunk.Body(mass=0.1,moment=1))
self.shape = copy.deepcopy(pymunk.Poly(self.body, pts))
self.shape.body.position = position
self.shape.friction = 0.5
self.shape.elasticity = 1
self.shape.color = color
self.speed = speed
self.size = size
self.shape.collision_type = 2
#space.add(self.body,self.shape)
#bulletList.append(self)
self.lifetime = 0
def getShot(self,angle):
radians = angle * math.pi/180
self.shape.body.apply_impulse_at_local_point((self.speed * math.cos(radians),self.speed * math.sin(radians)),(0,0))
def tick(self):
self.lifetime += 1
if(self.lifetime > 300):
self.shape.body.position = (10000,30)
def damage(self, damage):
self.lifetime = 300
#VELOCITY OF BULLET STARTS WITH VELOCITY OF PLAYER
#MAKE VOLUME OF SOUND DEPEND ON THE IMPULSE FOR THE IMPACTS
#error on purpose so you notice this
#INSTANCES NOT WORKING????
def runPyGame():
# Initialise PyGame.
pygame.init()
# Set up the clock. This will tick every frame and thus maintain a relatively constant framerate. Hopefully.
fps = 60.0
fpsClock = pygame.time.Clock()
running = True
font = pygame.font.SysFont("Arial", 16)
p1 = Player((240,240),space,(132, 66, 245,255))
p2 = Player((1200,400),space,(47, 247, 184,255))
space.add(segmentBot,segmentTop,segmentLef,segmentRit)
# Main game loop.
ch = space.add_collision_handler(1, 0)
ch.data["surface"] = screen
ch.post_solve = drawWallCollision
ch = space.add_collision_handler(1, 2)
ch.data["surface"] = screen
ch.post_solve = drawBulletCollision
ch = space.add_collision_handler(0, 2)
ch.data["surface"] = screen
ch.post_solve = drawArbitraryCollision
dt = 1/fps # dt is the time since last frame.
while True: # Loop forever!
keys = pygame.key.get_pressed()
for event in pygame.event.get():
# We need to handle these events. Initially the only one you'll want to care
# about is the QUIT event, because if you don't handle it, your game will crash
# whenever someone tries to exit.
if event.type == QUIT:
pygame.quit() # Opposite of pygame.init
sys.exit() # Not including this line crashes the script on Windows.
if event.type == KEYDOWN:
if event.key == pygame.K_s:
p1.mode = -(p1.mode - 0.5) + 0.5
print(p1.mode)
if (event.key == pygame.K_k and p1.mode == 0):
p1.shoot()
if event.key == pygame.K_KP_5:
p2.mode = -(p2.mode - 0.5) + 0.5
print(p2.mode)
if (event.key == pygame.K_m and p2.mode == 0):
p2.shoot()
#b = Bullet((200,200),51,51)
if(keys[K_w]):
p1.targetAngle(90)
if(keys[K_q]):
p1.targetAngle(45)
if(keys[K_a]):
p1.targetAngle(0)
if(keys[K_z]):
p1.targetAngle(315)
if(keys[K_x]):
p1.targetAngle(270)
if(keys[K_c]):
p1.targetAngle(225)
if(keys[K_d]):
p1.targetAngle(180)
if(keys[K_e]):
p1.targetAngle(135)
if(keys[K_k] and p1.mode == 1):
p1.rocketForce()
if(keys[K_KP_8]):
p2.targetAngle(90)
if(keys[K_KP_7]):
p2.targetAngle(45)
if(keys[K_KP_4]):
p2.targetAngle(0)
if(keys[K_KP_1]):
p2.targetAngle(315)
if(keys[K_KP_2]):
p2.targetAngle(270)
if(keys[K_KP_3]):
p2.targetAngle(225)
if(keys[K_KP_6]):
p2.targetAngle(180)
if(keys[K_KP_9]):
p2.targetAngle(135)
if(keys[K_m] and p2.mode == 1):
p2.rocketForce()
# Handle other events as you wish.
screen.fill((250, 250, 250)) # Fill the screen with black.
# Redraw screen here.
### Draw stuff
draw_bg()
space.debug_draw(draw_options)
for i in playerList:
i.tick()
screen.blit(
font.render("P1 Health: " + str(p1.health), True, pygame.Color("white")),
(50, 10),
)
screen.blit(
font.render("P2 Health: " + str(p2.health), True, pygame.Color("white")),
(50, 30),
)
for i in bulletList:
i.tick()
ship_group.draw(screen)
# Flip the display so that the things we drew actually show up.
pygame.display.update()
dt = fpsClock.tick(fps)
space.step(0.01)
pygame.display.update()
runPyGame()
I cant point to the exact error since the code is quite long and depends on files I dont have. But here is a general advice for troubleshooting:
Try to give a name to each shape when you create them, and then print it out. Also print out the name of each shape that you add or remove from the space. This should show which shape you are actually removing and will probably make it easy to understand whats wrong.
For example:
...
self.shape = pymunk.Circle(self.body, radius = self.radius)
self.shape.name = "circle 1"
print("Created", self.shape.name)
...
print("Adding", self.shape.name)
space.add(self.body,self.shape)
...
(Note that you need to reset the name of shapes you copy, since otherwise the copy will have the same name.)

DQN doesn't make any progress after a little while

Here is my code, its a simple DQN that learns to play snake and i dont know why it stops learning after a little while, for example. it learns that the snake head should hit the wall, but it doesnt learn to eat the fruit, even though i give a reward for getting closer to the fruit and give a GREATER negative reward for going farther away (this is to make the snake understand that it should aim to go for the fruit). But for some reason, the score never goes beyond a 1 or a 2:
"""
########################################################
#MAIN.py
# -*- coding: utf-8 -*-
"""
Created on Mon Aug 10 13:04:45 2020
#author: Ryan
"""
from dq_learning import Agent
import numpy as np
import tensorflow as tf
import snake
import sys
import pygame
import gym
if __name__ == '__main__':
observation_space = 31
action_space = 4
lr = 0.001
n_games = 50000
steps = 1000
#env = gym.make("LunarLander-v2")
#observation_space = env.observation_space.shape
#action_space = env.action_space.n
agent = Agent(gamma=0.99, epsilon=1.0, lr=lr,
input_dims=observation_space,
n_actions=action_space,
batch_size=64)
scores = []
eps_history = []
r = False
for i in range(n_games):
score = 0
#first observation
observation = [0 for i in range(observation_space)]
#observation = env.reset()
for j in range(steps):
# env.render()
for evt in pygame.event.get():
if evt.type == pygame.QUIT:
pygame.quit()
sys.exit()
#actions go from 0 to n_actions - based on the model prediction or random choice
#action space is the list of all the possible actions
action = agent.choose_action(observation)
#print("action: ", action)
#env.step(action) returns -> new observation, reward, done, info
observation_, reward, done, info = snake.step(action, 25)
#observation_, reward, done, info = env.step(action)
#print(observation_, reward, done, info)
score += reward
agent.store_transition(observation, action, reward, observation_, done)
observation = observation_
agent.learn()
if done:
break
print("NEXT GAME")
done = False
eps_history.append(agent.epsilon)
scores.append(score)
avg_score = np.mean(scores[-100:])
print("episode: ", i, " scores %.2f" %score,
"average score: %.2f" %avg_score,
" epsilon %.2f" %agent.epsilon)
print("last score: ", scores[-1])
#####################################
#DQ_LEARNING.PY
# -*- coding: utf-8 -*-
"""
Created on Tue Aug 4 12:23:14 2020
#author: Ryan
"""
import numpy as np
import tensorflow as tf
from tensorflow import keras
class ReplayBuffer:
def __init__(self, max_size, input_dims):
self.mem_size = max_size
self.mem_cntr = 0
"""
print("self.mem_size: ", self.mem_size)
print("*input_dims: ", *input_dims)
"""
self.state_memory = np.zeros((self.mem_size, input_dims), dtype=np.float32)
self.new_state_memory = np.zeros((self.mem_size, input_dims), dtype=np.float32)
self.action_memory = np.zeros(self.mem_size, np.int32)
self.reward_memory = np.zeros(self.mem_size, np.float32)
self.terminal_memory = np.zeros(self.mem_size, np.int32) #done flags
def store_transitions(self, state, action, reward, state_, done):
"""print("storing transactions...")
print("mem_cntr: ", self.mem_cntr)
print("mem_size: ", self.mem_size)
"""
index = self.mem_cntr % self.mem_size
self.state_memory[index] = state
self.new_state_memory[index] = state_
self.reward_memory[index] = reward
self.action_memory[index] = action
self.terminal_memory[index] = 1 - int(done)
self.mem_cntr += 1
def sample_buffer(self, batch_size):
#print("sampling buffer...")
max_mem = min(self.mem_cntr, self.mem_size)
batch = np.random.choice(max_mem, batch_size, replace=False)
#print("batch:", batch)
states = self.state_memory[batch]
states_ = self.new_state_memory[batch]
rewards = self.reward_memory[batch]
actions = self.action_memory[batch]
terminal = self.terminal_memory[batch]
#print("self.action_mem: ", self.action_memory)
#print("actions: ", actions)
#print("state action rewards state_, terminal", (states, actions, rewards, states_, terminal))
return states, actions, rewards, states_, terminal
def build_dqn(lr, n_actions, input_dims, fc1_dims, fc2_dims):
model = keras.Sequential()
model.add(keras.layers.Dense(fc1_dims, activation='relu'))
model.add(keras.layers.Dense(fc2_dims, activation='relu'))
model.add(keras.layers.Dense(n_actions))
opt = keras.optimizers.Adam(learning_rate=lr)
model.compile(optimizer=opt, loss='mean_squared_error')
return model
class Agent():
def __init__(self, lr, gamma, n_actions, epsilon, batch_size,
input_dims, epsilon_dec=1e-3, epsilon_end=1e-2,
mem_size=1e6, fname='dqn_model.h5'):
self.action_space = [i for i in range(n_actions)]
self.gamma = gamma
self.epsilon = epsilon
self.eps_min = epsilon_end
self.eps_dec = epsilon_dec
self.batch_size = batch_size
self.model_file = fname
self.memory = ReplayBuffer(int(mem_size), input_dims)
self.q_eval = build_dqn(lr, n_actions, input_dims, 256, 256)
def store_transition(self, state, action, reward, new_state, done):
self.memory.store_transitions(state, action, reward, new_state, done)
def choose_action(self, observation):
if np.random.random() < self.epsilon:
action = np.random.choice(self.action_space)
else:
state = np.array([observation])
actions = self.q_eval.predict(state)
action = np.argmax(actions)
return action
def learn(self):
if self.memory.mem_cntr < self.batch_size:
return
states, actions, rewards, states_, dones = \
self.memory.sample_buffer(self.batch_size)
q_eval = self.q_eval.predict(states)
q_next = self.q_eval.predict(states_)
q_target = np.copy(q_eval)
batch_index = np.arange(self.batch_size, dtype=np.int32)
q_target[batch_index, actions] = rewards + \
self.gamma * np.max(q_next, axis=1)*dones
self.q_eval.train_on_batch(states, q_target)
self.epsilon = self.epsilon - self.eps_dec if self.epsilon > \
self.eps_min else self.eps_min
def save_model(self):
self.q_eval.save(self.model_file)
def load_model(self):
self.q_eval = keras.models.load_model(self.model_file)
##########################################
# snake.py
# -*- coding: utf-8 -*-
"""
Created on Fri Sep 4 14:32:30 2020
#author: Ryan
"""
import pygame
import random
from math import sqrt
import time
class Snakehead:
def __init__(self, posx, posy, width, height):
self.posx = posx
self.posy = posy
self.width = width
self.height = height
self.movement = 'null'
self.speed = 16
self.gameover = False
def draw(self, Display): #RGB #coordinates/dimentions
pygame.draw.rect(Display, [0, 0, 0], [self.posx, self.posy, self.width, self.height])
def read_input(self, key):
if key == 0 and key != 1:
self.movement = 'left'
elif key == 1 and key != 0:
self.movement = 'right'
elif key == 2 and key != 3:
self.movement = 'up'
elif key == 3 and key != 2:
self.movement = 'down'
print(self.movement)
def get_pos(self):
return self.posx, self.posy
def get_movement(self):
return self.movement
def restart(self, ScreenW, ScreenH):
self.posx = ScreenW / 2 - 16/2
self.posy = ScreenH / 2 - 16/2
def move(self, SW, SH):
if self.movement == 'right':
self.posx += self.speed # self.posx = self.posx + self.speed
elif self.movement == 'left':
self.posx -= self.speed # self.posx = self.posx - self.speed
elif self.movement == 'up':
self.posy -= self.speed # self.posy = self.posy - self.speed
elif self.movement == 'down':
self.posy += self.speed # self.posy = self.posy + self.speed
class Food:
def __init__(self, posx, posy, width, height):
self.posx = posx
self.posy = posy
self.width = width
self.height = height
self.red = random.randint(155, 255)
def draw(self, Display):
pygame.draw.rect(Display, [self.red, 0, 0], [self.posx, self.posy, self.width, self.height])
def get_pos(self):
return self.posx, self.posy
def respawn(self, ScreenW, ScreenH):
self.posx = random.randint(1, (ScreenW - 16)/16) * 16
self.posy = random.randint(1, (ScreenH - 16)/16) * 16
self.red = random.randint(155, 255)
class Tail:
def __init__(self, posx, posy, width, height):
self.width = width
self.height = height
self.posx = posx
self.posy = posy
self.RGB = [random.randint(0, 255) for i in range(3)]
def draw(self, Diplay):
pygame.draw.rect(Diplay, self.RGB, [self.posx, self.posy, 16, 16])
def move(self, px, py):
self.posx = px
self.posy = py
def get_pos(self):
return self.posx, self.posy
ScreenW = 720
ScreenH = 720
sheadX = 0
sheadY = 0
fX = 0
fY = 0
counter = 0
pygame.init()
pygame.display.set_caption("Snake Game")
Display = pygame.display.set_mode([ScreenW, ScreenH])
Display.fill([255, 255, 255]) #RGB white
black = [0, 0, 0]
font = pygame.font.SysFont(None, 30)
score = font.render("Score: 0", True, black)
shead = Snakehead(ScreenW / 2 - 16/2, ScreenH / 2 - 16/2, 16, 16)
f = Food(random.randint(0, (ScreenW - 16)/16) * 16 - 8, random.randint(0, (ScreenH - 16)/16) * 16, 16, 16)
tails = []
Fps = 60
timer_clock = pygame.time.Clock()
previous_distance = 0
d = 0
def step(action, observation_space):
global score, counter, tails, shead, gameover, previous_distance, d
shead.gameover = False
observation_, reward, done, info = [0 for i in range(observation_space+6)], 0, 0, 0
Display.fill([255, 255, 255])
shead.read_input(action)
sheadX, sheadY = shead.get_pos()
fX, fY = f.get_pos()
#detect collision
if sheadX + 16 > fX and sheadX < fX + 16:
if sheadY + 16 > fY and sheadY < fY + 16:
#collision
f.respawn(ScreenW, ScreenH)
counter += 1 # counter = counter + 1
score = font.render("Score: " + str(counter), True, black)
if len(tails) == 0:
tails.append(Tail(sheadX, sheadY, 16, 16))
#tails.append(tail.Tail(sheadX, sheadY, 16, 16, shead.get_movement()))
else:
tX, tY = tails[-1].get_pos()
tails.append(Tail(tX, tY, 16, 16))
reward = 100
print(tails)
for i in range(len(tails)):
try:
tX, tY = tails[i].get_pos()
#print("tx: ", tX, " ty: ", tY)
sX, sY = shead.get_pos()
#print("Sx: ", sX, " sy: ", sY)
if i != 0 and i != 1:
#print("more than 2 tails")
if tX == sX and tY == sY:
print("collision")
#collision
shead.restart(ScreenW, ScreenH)
tails.clear()
counter = 0
Display.blit(score, (10, 10))
pygame.display.flip()
pygame.display.update()
reward = -300
shead.gameover = True
print("lost-3")
except:
shead.restart(ScreenW, ScreenH)
tails.clear()
counter = 0
reward = -300
shead.gameover = True
print("lost-0")
sX, sY = shead.get_pos()
if sX < 0 or sX + 16 > ScreenW:
shead.restart(1280, 720)
counter = 0
Display.blit(score, (10, 10))
pygame.display.flip()
pygame.display.update()
tails.clear()
print("lost-1")
reward = -200
shead.gameover = True
#restart
elif sY < 0 or sY + 16 > ScreenH:
shead.restart(1280, 720)
counter = 0
Display.blit(score, (10, 10))
pygame.display.flip()
pygame.display.update()
tails.clear()
reward = -200
shead.gameover = True
print("lost-2")
#restart
for i in range(1, len(tails)):
tX, tY = tails[len(tails) - i - 1].get_pos() # y = b - x
tails[len(tails) - i].move(tX, tY)
if len(tails) > 0:
tX, tY = shead.get_pos()
tails[0].move(tX, tY)
shead.move(ScreenW, ScreenH)
shead.draw(Display)
Display.blit(score, (10, 10))
for tail in tails:
tail.draw(Display)
f.draw(Display)
pygame.display.flip()
pygame.display.update()
timer_clock.tick(Fps)
#observation, done
done = shead.gameover
hx, hy = shead.get_pos()
hx /= ScreenW
hy /= ScreenH
fx, fy = f.get_pos()
fx /= ScreenW
fy /= ScreenH
observation_[0] = abs(hx - fx)
observation_[1] = abs(hy - fy)
previous_distance = d
d = sqrt((fx - hx)**2 + (fy - hy)**2)
#print("distance: ", d)
observation_[2] = d
observation_[3] = 0
#print("observation_[4]: ", observation_[4])
observation_[4] = hx
observation_[5] = hy
c = 6
xlist = []
ylist = []
for t in tails:
tx, ty = t.get_pos()
tx /= 16
ty /= 16
xlist.append(tx)
ylist.append(ty)
l = int(sqrt(observation_space))
startX, startY = shead.get_pos()
startX /= 16
startY /= 16
m = (l-1)/2
#print("xlist:" , xlist)
#print("ylist:", ylist)
#print("startX: ", startX)
#print("startY: ", startY)
#print("m: ", m)
#print("l: ", l)
for x in range(l):
for y in range(l):
found = False
#print("position: (", int(startX) - m + x, ",", int(startY) - m + y, ")")
for i in range(len(xlist)):
"""print("i:", i)
print("pos: ", startX - m + x)
print("j: ", j)
print("pos: ", startY - m + y)
"""
#print("current iteration: (", int(xlist[i]), ",", int(ylist[i]), ")")
if int(xlist[i]) == int(startX) - m + x and int(ylist[i]) == int(startY) - m + y:
#print("found a match")
observation_[c] = 1
#print("c is: ", c)
#print("observation_[c] is: ", observation_[c])
found = True
break
if not found:
#print("set to 0")
observation_[c] = 0
#print("increasing c...")
c += 1
print("reward: ", reward)
print("c_reward: ", counter*10)
d_reward = 10 if d < previous_distance else - 100
print("d_reward: ", d_reward)
print(observation_, reward + d_reward + counter*10, done, 0)
return observation_, reward, done, 0
The reward function looks fine to me.
However, you say "I give a reward for getting closer to the fruit and give a GREATER negative reward for going farther away" but in the code it does not look like you use d_reward:
print("reward: ", reward)
print("c_reward: ", counter*10)
d_reward = 10 if d < previous_distance else - 100
print("d_reward: ", d_reward)
print(observation_, reward + d_reward + counter*10, done, 0)
return observation_, reward, done, 0
This is fine, as d_reward is definitely not necessary. Only giving positive reward for eating the apple, negative for dying and 0 otherwise is enough.
I suspect that the issue is in your state representation. Only by looking at your state, it is impossible for your agent to know which direction it should go, as the information of the apple position relative to the head is given with absolute values.
As an example lets say that your board is as follows:
[food, head, empty]
Your observation would be:
[1, 0, 1, 0, 1, 0]
But if your board is:
[empty, head, food]
The observation is the same:
[1, 0, 1, 0, 1, 0]
This is a problem. With a given input, the same action could be good or bad, whithout any way of knowing it. This makes learning impossible. In our example, for the input [1, 0, 1, 0, 1, 0], our network could move towards (or away) from both: left and right , never converging in any action.
This is because in your training data you will have examples of that input where moving to the left is good, others where it is neutral, others where it is bad, and examples of that input where right is good, neutral, bad etc.
I would recommend to encode more information in your state (or observation). I suggest something like this (which I took from a project of mine, you'll need to adapt it):
def get_state(self):
head = self.snake[0]
danger_top = head.y == self.board_dim.y - 1 or Point(head.x, head.y + 1) in self.snake
danger_bot = head.y == 0 or Point(head.x, head.y - 1) in self.snake
danger_right = head.x == self.board_dim.x - 1 or Point(head.x + 1, head.y) in self.snake
danger_left = head.x == 0 or Point(head.x - 1, head.y) in self.snake
apple_top = head.y < self.apple.y
apple_bot = head.y > self.apple.y
apple_right = head.x < self.apple.x
apple_left = head.x > self.apple.x
return np.array([
danger_top,
danger_bot,
danger_right,
danger_left,
apple_top,
apple_bot,
apple_right,
apple_left], dtype=int)
Please, let me know if I did miss some part of your code or if you have any doubt. Thank you in advance.

Categories

Resources