I originally posted this on code-review (hence the lengthy code) but failed to get an answer.
My model is based on this game https://en.wikipedia.org/wiki/Ultimatum_game . I won't go into the intuition behind it but generally speaking it functions as follows:
The game consists of a n x n lattice on which an agent is placed at each node.
During each time step, each player on each node plays against a random neighbour by playing a particular strategy.
Each of their strategies (a value between 1-9) has a propensity attached to it (which is randomly assigned and is just some number). The propensity then in turn determines the probability of playing that strategy. The probability is calculated as the propensity of that strategy over the sum of the propensities of all strategies.
If a game results in a positive payoff, then the payoffs from that game get added to the propensities for those strategies.
These propensities then determine the probabilities for their strategies in the next time step, and so on.
The simulation ends after time step N is reached.
For games with large lattices and large time steps, my code runs really really slowly. I ran cProfiler to check where the bottleneck(s) are, and as I suspected the update_probabilitiesand play_rounds functions seem to be taking up a lot time. I want to be able to run the game with gridsize of about 40x40 for about 100000+ time steps, but right now that is not happening.
So what would be a more efficient way to calculate and update the probabilities/propensities of each player in the grid? I've considered implementing NumPy arrays but I am not sure if it would be worth the hassle here?
import numpy as np
import random
from random import randint
from numpy.random import choice
from numpy.random import multinomial
import cProfile
mew = 0.001
error = 0.05
def create_grid(row, col):
return [[0 for j in range(col)] for i in range(row)]
def create_random_propensities():
propensities = {}
pre_propensities = [random.uniform(0, 1) for i in range(9)]
a = np.sum(pre_propensities)
for i in range(1, 10):
propensities[i] = (pre_propensities[i - 1]/a) * 10 # normalize sum of propensities to 10
return propensities
class Proposer:
def __init__(self):
self.propensities = create_random_propensities()
self.probabilites = []
self.demand = 0 # the amount the proposer demands for themselves
def pick_strat(self, n_trials): # gets strategy, an integer in the interval [1, 9]
results = multinomial(n_trials, self.probabilites)
i, = np.where(results == max(results))
if len(i) > 1:
return choice(i) + 1
else:
return i[0] + 1
def calculate_probability(self, dict_data, index, total_sum): # calculates probability for particular strat, taking propensity
return dict_data[index]/total_sum # of that strat as input
def calculate_sum(self, dict_data):
return sum(dict_data.values())
def initialize(self):
init_sum = self.calculate_sum(self.propensities)
for strategy in range(1, 10):
self.probabilites.append(self.calculate_probability(self.propensities, strategy, init_sum))
self.demand = self.pick_strat(1)
def update_strategy(self):
self.demand = self.pick_strat(1)
def update_probablities(self):
for i in range(9):
self.propensities[1 + i] *= 1 - mew
pensity_sum = self.calculate_sum(self.propensities)
for i in range(9):
self.probabilites[i] = self.calculate_probability(self.propensities, 1 + i, pensity_sum)
def update(self):
self.update_probablities()
self.update_strategy()
class Responder: # methods same as proposer class, can skip-over
def __init__(self):
self.propensities = create_random_propensities()
self.probabilites = []
self.max_thresh = 0 # the maximum demand they are willing to accept
def pick_strat(self, n_trials):
results = multinomial(n_trials, self.probabilites)
i, = np.where(results == max(results))
if len(i) > 1:
return choice(i) + 1
else:
return i[0] + 1
def calculate_probability(self, dict_data, index, total_sum):
return dict_data[index]/total_sum
def calculate_sum(self, dict_data):
return sum(dict_data.values())
def initialize(self):
init_sum = self.calculate_sum(self.propensities)
for strategy in range(1, 10):
self.probabilites.append(self.calculate_probability(self.propensities, strategy, init_sum))
self.max_thresh = self.pick_strat(1)
def update_strategy(self):
self.max_thresh = self.pick_strat(1)
def update_probablities(self):
for i in range(9):
self.propensities[1 + i] *= 1 - mew # stops sum of propensites from growing without bound
pensity_sum = self.calculate_sum(self.propensities)
for i in range(9):
self.probabilites[i] = self.calculate_probability(self.propensities, 1 + i, pensity_sum)
def update(self):
self.update_probablities()
self.update_strategy()
class Agent:
def __init__(self):
self.prop_side = Proposer()
self.resp_side = Responder()
self.prop_side.initialize()
self.resp_side.initialize()
def update_all(self):
self.prop_side.update()
self.resp_side.update()
class Grid:
def __init__(self, rowsize, colsize):
self.rowsize = rowsize
self.colsize = colsize
def make_lattice(self):
return [[Agent() for j in range(self.colsize)] for i in range(self.rowsize)]
#staticmethod
def von_neumann_neighbourhood(array, row, col, wrapped=True): # gets up, bottom, left, right neighbours of some node
neighbours = set([])
if row + 1 <= len(array) - 1:
neighbours.add(array[row + 1][col])
if row - 1 >= 0:
neighbours.add(array[row - 1][col])
if col + 1 <= len(array[0]) - 1:
neighbours.add(array[row][col + 1])
if col - 1 >= 0:
neighbours.add(array[row][col - 1])
#if wrapped is on, conditions for out of bound points
if row - 1 < 0 and wrapped == True:
neighbours.add(array[-1][col])
if col - 1 < 0 and wrapped == True:
neighbours.add(array[row][-1])
if row + 1 > len(array) - 1 and wrapped == True:
neighbours.add(array[0][col])
if col + 1 > len(array[0]) - 1 and wrapped == True:
neighbours.add(array[row][0])
return neighbours
def get_error_term(pay, strategy):
index_strat_2, index_strat_8 = 2, 8
if strategy == 1:
return (1 - (error/2)) * pay, error/2 * pay, index_strat_2
if strategy == 9:
return (1 - (error/2)) * pay, error/2 * pay, index_strat_8
else:
return (1 - error) * pay, error/2 * pay, 0
class Games:
def __init__(self, n_rows, n_cols, n_rounds):
self.rounds = n_rounds
self.rows = n_rows
self.cols = n_cols
self.lattice = Grid(self.rows, self.cols).make_lattice()
self.lookup_table = np.full((self.rows, self.cols), False, dtype=bool) # if player on grid has updated their strat, set to True
def reset_look_tab(self):
self.lookup_table = np.full((self.rows, self.cols), False, dtype=bool)
def run_game(self):
n = 0
while n < self.rounds:
for r in range(self.rows):
for c in range(self.cols):
if n != 0:
self.lattice[r][c].update_all()
self.lookup_table[r][c] = True
self.play_rounds(self.lattice, r, c)
self.reset_look_tab()
n += 1
def play_rounds(self, grid, row, col):
neighbours = Grid.von_neumann_neighbourhood(grid, row, col)
neighbour = random.sample(neighbours, 1).pop()
neighbour_index = [(ix, iy) for ix, row in enumerate(self.lattice) for iy, i in enumerate(row) if i == neighbour]
if self.lookup_table[neighbour_index[0][0]][neighbour_index[0][1]] == False: # see if neighbour has already updated their strat
neighbour.update_all()
player = grid[row][col]
coin_toss = randint(0, 1) # which player acts as proposer or responder in game
if coin_toss == 1:
if player.prop_side.demand <= neighbour.resp_side.max_thresh: # postive payoff
payoff, adjacent_payoff, index = get_error_term(player.prop_side.demand, player.prop_side.demand)
if player.prop_side.demand == 1 or player.prop_side.demand == 9: # extreme strategies get bonus payoffs
player.prop_side.propensities[player.prop_side.demand] += payoff
player.prop_side.propensities[index] += adjacent_payoff
else:
player.prop_side.propensities[player.prop_side.demand] += payoff
player.prop_side.propensities[player.prop_side.demand - 1] += adjacent_payoff
player.prop_side.propensities[player.prop_side.demand + 1] += adjacent_payoff
else:
return 0 # if demand > max thresh -> both get zero
if coin_toss != 1:
if neighbour.prop_side.demand <= player.resp_side.max_thresh:
payoff, adjacent_payoff, index = get_error_term(10 - neighbour.prop_side.demand, player.resp_side.max_thresh)
if player.resp_side.max_thresh == 1 or player.resp_side.max_thresh == 9:
player.resp_side.propensities[player.resp_side.max_thresh] += payoff
player.resp_side.propensities[index] += adjacent_payoff
else:
player.resp_side.propensities[player.resp_side.max_thresh] += payoff
player.resp_side.propensities[player.resp_side.max_thresh - 1] += adjacent_payoff
player.resp_side.propensities[player.resp_side.max_thresh + 1] += adjacent_payoff
else:
return 0
#pr = cProfile.Profile()
#pr.enable()
my_game = Games(10, 10, 2000) # (rowsize, colsize, n_steps)
my_game.run_game()
#pr.disable()
#pr.print_stats(sort='time')
(For those who might be wondering, the get_error_term just returns the propensities for strategies that are next to strategies that receive a positive payoff, for example if the strategy 8 works, then 7 and 9's propensities also get adjusted upwards and this is calculated by said function. And the first for loop inside update_probabilities just makes sure that the sum of propensities don't grow without bound).
Related
I recently tried to code an AI to solve the connect-four game. I've come quite far but I am now stuck with a minor mistake in the code that I just cant locate. Generally the algorithm played great but sometimes the algorithms ignores a row of 3 pieces the opponent has which results in the loss of the algorithm. As you will see, I have constructed the evaluation function so that positions like that should be rated with a extremely low score, which it does. Also the score of a position where the algorithm has lost is always rated with -inf. Therefore I can't imagine why the algorithm would be unable to counter positions like that.
The bot relies on a framework which I can't upload here thus I am sorry that the code itself without changes cant be simply executed.
from aiagent import AiAgent
import math
import copy
import numpy as np
class MinMaxAgent(AiAgent):
def __init__(self):
'''
Creates the board.
'''
self.board = [[0,0,0,0,0,0],
[0,0,0,0,0,0],
[0,0,0,0,0,0],
[0,0,0,0,0,0],
[0,0,0,0,0,0],
[0,0,0,0,0,0],
[0,0,0,0,0,0]]
def getNextMove(self):
'''
Calculate the index of the player move and store it on the board. Return that value.
'''
self._getMinMax(board=self.board)
self.board[self.bestIndexCol][self._getRowIndex(self.bestIndexCol, self.board)] = 1
print(f'Eval: {self._evaluate(self.board)}')
return (self.bestIndexCol, self._getRowIndex(self.bestIndexCol, self.board))
def handleNextMove(self, indexCol):
'''
Store the index of the enemy move in the board.
'''
self.board[indexCol[0]][self._getRowIndex(indexCol[0], self.board)] = -1
print(f'Eval: {self._evaluate(self.board)}')
def _getRowIndex(self, indexCol, board):
'''
Get the index of the row of a column within a board.
'''
for indexRow, elementRow in enumerate(board[indexCol]):
if elementRow == 0:
return indexRow
def _getValidIndex(self, board):
'''
Get all the valid indices of a board.
'''
validMoves = []
for indexCol, col in enumerate(board):
if col.count(0) != 0:
validMoves.append(indexCol)
return validMoves
def _getMinMax(self, board, depth=6, player=1, alpha=-math.inf, beta=math.inf):
'''
Calculates the best move within a specific depth.
'''
if depth == 0:
return self._evaluate(board)
elif self._isTerminalState(board) == 0:
return self._evaluate(board)
elif self._isTerminalState(board) == -1:
return -math.inf
elif self._isTerminalState(board) == 1:
return math.inf
if player == 1:
resultMax = -math.inf
self.bestIndexCol = None
for indexCol in self._getValidIndex(board):
# Mutate the board
self.nextBoard = copy.deepcopy(board)
self.nextBoard[indexCol][self._getRowIndex(indexCol, board)] = 1
# Calls itself with a by one decremented depth and the change of player
self.resultMinMax = self._getMinMax(board=self.nextBoard, depth=depth-1, player=-1, alpha=alpha, beta=beta)
# Take the board state with the most amount of points
if self.resultMinMax > resultMax:
resultMax = self.resultMinMax
self.bestIndexCol = indexCol
# Change alpha if the boardstate is evaluated with more points
if self.resultMinMax > alpha:
alpha = self.resultMinMax
# Break the loop if on a alphaboundry
if alpha >= beta:
break
return resultMax
elif player == -1:
resultMin = math.inf
for indexCol in self._getValidIndex(board):
# Mutate the board
self.nextBoard = copy.deepcopy(board)
self.nextBoard[indexCol][self._getRowIndex(indexCol, board)] = -1
# Calls itself with a by one decremented depth and the change of player
self.resultMinMax = self._getMinMax(board=self.nextBoard, depth=depth-1, player=1, alpha=alpha, beta=beta)
# Take the board state with the least amount of points
if self.resultMinMax < resultMin:
resultMin = self.resultMinMax
# Change beta if the boardstate is evaluated with less points
if self.resultMinMax < beta:
beta = self.resultMinMax
# Break the loop if on a betaboundry
if alpha >= beta:
break
return resultMin
def _isTerminalState(self, board):
'''
Checks the board for a terminal state of the board:
Return: 0 for a draw;
1 for a victory;
-1 for a defeat;
'''
# Evaluate draw
if [board[col].count(0) for col in range(7)] == [0,0,0,0,0,0,0]:
return 0
# Evaluate vertical for terminal state
for col in range(7): # columns
for row in range(3): # rows
if [board[col][row + i] for i in range(4)] == [1,1,1,1]:
return 1
elif [board[col][row + i] for i in range(4)] == [-1,-1,-1,-1]:
return -1
# Evaluate horizontal for terminal state
for col in range(4): # columns
for row in range(6): # rows
if [board[col + i][row] for i in range(4)] == [1,1,1,1]:
return 1
elif [board[col + i][row] for i in range(4)] == [-1,-1,-1,-1]:
return -1
# Evaluate diagonal for terminal state
for col in range(4): # columns
for row in range(3): # rows
if [board[col + i][row + i] for i in range(4)] == [1,1,1,1]:
return 1
elif [board[col + i][row + i] for i in range(4)] == [-1,-1,-1,-1]:
return -1
for col in range(4): # columns
for row in range(3, 6): # rows
if [board[col + i][row - i] for i in range(4)] == [1,1,1,1]:
return 1
elif [board[col + i][row - i] for i in range(4)] == [-1,-1,-1,-1]:
return -1
def _evaluateSection(self, section):
'''
Evaluates every section of the board and adds points according to the amount of elements of the same actor in a section:
PLAYER: 4-in-a-row: +inf
ENEMY: 4-in-a-row: -inf
PLAYER: 3-in-a-row: +1000
ENEMY: 3-in-a-row: -3000
PLAYER: 2-in-a-row: +200
ENEMY: 2-in-a-row: -600
'''
self.section_evaluation = 0
if section.count(1) == 4:
self.section_evaluation += math.inf
elif section.count(-1) == 4:
self.section_evaluation -= math.inf
elif section.count(1) == 3 and section.count(0) == 1:
self.section_evaluation += 1000
elif section.count(-1) == 3 and section.count(0) == 1:
self.section_evaluation -= 3000
elif section.count(1) == 2 and section.count(0) == 2:
self.section_evaluation += 200
elif section.count(-1) == 2 and section.count(0) == 2:
self.section_evaluation -= 600
return self.section_evaluation
def _evaluate(self, board):
'''
Takes sections of the board to evaluate.
'''
self.evaluation = 0
# Evaluate vertical sections
for col in range(7): # columns
for row in range(3): # rows
self.section = [board[col][row + i] for i in range(4)]
self.evaluation += self._evaluateSection(self.section)
# Evaluate horizontal sections
for col in range(4): # columns
for row in range(6): # rows
self.section = [board[col + i][row] for i in range(4)]
self.evaluation += self._evaluateSection(self.section)
# Evaluate diagonal sections
for col in range(4): # columns
for row in range(3): # rows
self.section = [board[col + i][row + i] for i in range(4)]
self.evaluation += self._evaluateSection(self.section)
for col in range(4): # columns
for row in range(3, 6): # rows
self.section = [board[col + i][row - i] for i in range(4)]
self.evaluation += self._evaluateSection(self.section)
return self.evaluation
I already redesigned the evaluation function and checked the minmax-algorithm which should include all the possible sources of this error but I wasn't able to find any satisfying answer.
I'm trying to solve the 15-Puzzle problem using IDA* algorithm and Manhattan heuristic.
I already implemented the algorithm from the pseudocode in this Wikipedia page (link).
Here's my code so far :
def IDA(initial_state, goal_state):
initial_node = Node(initial_state)
goal_node = Node(goal_state)
threshold = manhattan_heuristic(initial_state, goal_state)
path = [initial_node]
while 1:
tmp = search(path, goal_state, 0, threshold)
if tmp == True:
return path, threshold
elif tmp == float('inf'):
return False
else:
threshold = tmp
def search(path, goal_state, g, threshold):
node = path[-1]
f = g + manhattan_heuristic(node.state, goal_state)
if f > threshold:
return f
if np.array_equal(node.state, goal_state):
return True
minimum = float('inf')
for n in node.nextnodes():
if n not in path:
path.append(n)
tmp = search(path, goal_state, g + 1, threshold)
if tmp == True:
return True
if tmp < minimum:
minimum = tmp
path.pop()
return minimum
def manhattan_heuristic(state1, state2):
size = range(1, len(state1) ** 2)
distances = [count_distance(num, state1, state2) for num in size]
return sum(distances)
def count_distance(number, state1, state2):
position1 = np.where(state1 == number)
position2 = np.where(state2 == number)
return manhattan_distance(position1, position2)
def manhattan_distance(a, b):
return abs(b[0] - a[0]) + abs(b[1] - a[1])
class Node():
def __init__(self, state):
self.state = state
def nextnodes(self):
zero = np.where(self.state == 0)
y,x = zero
y = int(y)
x = int(x)
up = (y - 1, x)
down = (y + 1, x)
right = (y, x + 1)
left = (y, x - 1)
arr = []
for direction in (up, down, right, left):
if len(self.state) - 1 >= direction[0] >= 0 and len(self.state) - 1 >= direction[1] >= 0:
tmp = np.copy(self.state)
tmp[direction[0], direction[1]], tmp[zero] = tmp[zero], tmp[direction[0], direction[1]]
arr.append(Node(tmp))
return arr
I'm testing this code with a 3x3 Puzzle and here's the infinite loop! Due to the recursion I have some trouble testing my code...
I think the error might be here : tmp = search(path, goal_state, g + 1, threshold) (in the search function). I'm adding only one to the g cost value. It should be correct though, because I can only move a tile 1 place away.
Here's how to call the IDA() function:
initial_state = np.array([8 7 3],[4 1 2],[0 5 6])
goal_state = np.array([1 2 3],[8 0 4],[7 6 5])
IDA(initial_state, goal_state)
Can someone help me on this ?
There are couple of issues in your IDA* implementation. First, what is the purpose of the variable path? I found two purposes of path in your code:
Use as a flag/map to keep the board-states that is already been visited.
Use as a stack to manage recursion states.
But, it is not possible to do both of them by using a single data structure. So, the first modification that your code requires:
Fix-1: Pass current node as a parameter to the search method.
Fix-2: flag should be a data structure that can perform a not in query efficiently.
Now, fix-1 is easy as we can just pass the current visiting node as the parameter in the search method. For fix-2, we need to change the type of flag from list to set as:
list's average case complexity for x in s is: O(n)
set's
Average case complexity for x in s is: O(1)
Worst case complexity for x in s is: O(n)
You can check more details about performance for testing memberships: list vs sets for more details.
Now, to keep the Node information into a set, you need to implement __eq__ and __hash__ in your Node class. In the following, I have attached the modified code.
import timeit
import numpy as np
def IDA(initial_state, goal_state):
initial_node = Node(initial_state)
goal_node = Node(goal_state)
threshold = manhattan_heuristic(initial_state, goal_state)
#print("heuristic threshold: {}".format(threshold))
loop_counter = 0
while 1:
path = set([initial_node])
tmp = search(initial_node, goal_state, 0, threshold, path)
#print("tmp: {}".format(tmp))
if tmp == True:
return True, threshold
elif tmp == float('inf'):
return False, float('inf')
else:
threshold = tmp
def search(node, goal_state, g, threshold, path):
#print("node-state: {}".format(node.state))
f = g + manhattan_heuristic(node.state, goal_state)
if f > threshold:
return f
if np.array_equal(node.state, goal_state):
return True
minimum = float('inf')
for n in node.nextnodes():
if n not in path:
path.add(n)
tmp = search(n, goal_state, g + 1, threshold, path)
if tmp == True:
return True
if tmp < minimum:
minimum = tmp
return minimum
def manhattan_heuristic(state1, state2):
size = range(1, len(state1) ** 2)
distances = [count_distance(num, state1, state2) for num in size]
return sum(distances)
def count_distance(number, state1, state2):
position1 = np.where(state1 == number)
position2 = np.where(state2 == number)
return manhattan_distance(position1, position2)
def manhattan_distance(a, b):
return abs(b[0] - a[0]) + abs(b[1] - a[1])
class Node():
def __init__(self, state):
self.state = state
def __repr__(self):
return np.array_str(self.state.flatten())
def __hash__(self):
return hash(self.__repr__())
def __eq__(self, other):
return self.__hash__() == other.__hash__()
def nextnodes(self):
zero = np.where(self.state == 0)
y,x = zero
y = int(y)
x = int(x)
up = (y - 1, x)
down = (y + 1, x)
right = (y, x + 1)
left = (y, x - 1)
arr = []
for direction in (up, down, right, left):
if len(self.state) - 1 >= direction[0] >= 0 and len(self.state) - 1 >= direction[1] >= 0:
tmp = np.copy(self.state)
tmp[direction[0], direction[1]], tmp[zero] = tmp[zero], tmp[direction[0], direction[1]]
arr.append(Node(tmp))
return arr
initial_state = np.array([[8, 7, 3],[4, 1, 2],[0, 5, 6]])
goal_state = np.array([[1, 2, 3],[8, 0, 4],[7, 6, 5]])
start = timeit.default_timer()
is_found, th = IDA(initial_state, goal_state)
stop = timeit.default_timer()
print('Time: {} seconds'.format(stop - start))
if is_found is True:
print("Solution found with heuristic-upperbound: {}".format(th))
else:
print("Solution not found!")
Node: Please double check your Node.nextnodes() and manhattan_heuristic() methods as I did not pay much attention in those areas. You can check this GitHub repository for other algorithmic implementations (i.e., A*, IDS, DLS) to solve this problem.
References:
Python Wiki: Time Complexity
TechnoBeans: Performance for testing memberships: list vs tuples vs sets
GitHub: Puzzle Solver (by using problem solving techniques)
I'm currently working on making a Game of Life program (UNI related, beginner course), by using nested lists.
However I can't seem to get the update() method to work properly, I've no clue what's wrong. The generation of the first board is okay, but the update leaves only the cornercells alive, and the rest dead.
All methodcalls in this class originates from other .py files, which works well.
from random import randint
from cell import *
class Spillebrett:
def __init__(self, rows, columns):
self.genNumber = 0
self._rows = rows
self._columns = columns
self._grid = []
for i in range(self._rows):
self._grid.append([])
for j in range(self._columns):
self._grid[i].append(cell())
self.generate()
def drawBoard(self):
for i in self._grid:
print(" ".join(map(str, i)))
print()
#Method updates genNumber, checks if cells are alive or dead and updates the board accordingly
#Currently only yield board with corner-cells alive
def updateBoard(self):
self.genNumber += 1
toLive = []
toDie = []
for x, row in enumerate(self._grid):
for y, cell in enumerate(rad):
if cell.areAlive() is True:
counter = len(self.findNeighbour(x, y))
if counter < 2 or counter > 3:
toDie.append(cell)
elif counter == 2 or counter == 3:
toLive.append(cell)
elif cell.areAlive() is False:
counter = len(self.findNeighbour(x, y))
if counter == 3:
toLive.append(cell)
for i in toDie:
i.setDead()
for i in toLive:
i.setAlive()
return self.genNumber
#Code given by Uni
def generate(self):
for i in range(self._rows):
for j in range(self._columns):
rand = randint(0, 3)
if rand == 3:
self._grid[i][j].setAlive()
#Code given by Uni
def findNeighbour(self, row, column):
neighbourList = []
for i in range(-1, 2):
for j in range(-1, 2):
neighbourRow = rad + i
neighbourcolumn = column + j
if(neighbourRow == rad and neighbourcolumn == column) is not True:
if(neighbourRow < 0 or neighbourcolumn < 0 or neighbourRow >
self._rows - 1 or neighbourcolumn > self._columns - 1) is not True:
neighbourList.append(self._grid[neighbourRow][neighbourcolumn])
return neighbourList
def findAllAlive(self):
self._alive = 0
for i in range(self._rows):
for j in range(self._columns):
if self._grid[i][j].areAlive() is True:
self._alive += 1
return self._alive
my code below
I have a little knight's tour problem I'm trying to solve: find the smallest number of moves from point A to point B on an N*N chess board.
I created a board, and used a simple algorithm:
1. add point A to candidate list and start loop:
2. pop first element in candidate list and check it:
3. if end - return counter
4. else - add the candidate 's "sons" to end of candidate list
5. go to step 2 (counter is incremented after all previous level sons are popped)
This algorithm works as I expected (used it on a few test cases), but it was very slow:
The call f = Find_route(20, Tile(4,4), Tile(14,11)) (20 is the board dimensions, Tile(4,4) and Tile(14,11) are the start & end positions, respectively) checked 201590 (!!) tiles before reaching the answer.
I tried optimizing it by sorting the candidates list with sorted(tiles, key = lambda e : abs(e.x - end.x)+abs(e.y - end.y)) where tiles is the candidates list. That works for some of the cases but for some it is kind of useless.
helpful cases:
f = Find_route(20, Tile(1,4), Tile(1,10)) from 459 to 309 (~33% !!)
f = Find_route(20, Tile(7,0), Tile(1,11)) from 87738 to 79524 (~10% :( )
unhelpful cases:
f = Find_route(20, Tile(4,4), Tile(14,11)): from 201891 to 201590
f = Find_route(20, Tile(1,4), Tile(1,11)) from 2134 to 2111
I want eventually to have a list of near-end cases, from which the algorithm would know exactly what to do, (something like a 5 tiles radius), and I think that could help, but I am more interested in how to improve my optimize_list method. Any tips?
Code
class Tile(object):
def __init__(self, x, y):
self.x = x
self.y = y
def __str__(self):
tmp = '({0},{1})'.format(self.x, self.y)
return tmp
def __eq__(self, new):
return self.x == new.x and self.y == new.y
def get_horse_jumps(self, max_x , max_y):
l = [(1,2), (1,-2), (-1,2), (-1,-2), (2,1), (2,-1), (-2,1), (-2,-1)]
return [Tile(self.x + i[0], self.y + i[1]) for i in l if (self.x + i[0]) >= 0 and (self.y + i[1]) >= 0 and (self.x + i[0]) < max_x and (self.y + i[1]) < max_y]
class Board(object):
def __init__(self, n):
self.dimension = n
self.mat = [Tile(x,y) for y in range(n) for x in range(n)]
def show_board(self):
print('-'*20, 'board', '-'*20)
n = self.dimension
s = ''
for i in range(n):
for j in range(n):
s += self.mat[i*n + j].__str__()
s += '\n'
print(s,end = '')
print('-'*20, 'board', '-'*20)
class Find_route(Board):
def __init__(self, n, start, end):
super(Find_route, self).__init__(n)
#self.show_board()
self.start = start
self.end = end
def optimize_list(self, tiles, end):
return sorted(tiles, key = lambda e : abs(e.x - end.x)+abs(e.y - end.y))
def find_shortest_path(self, optimize = False):
counter = 0
sons = [self.start]
next_lvl = []
num_of_checked = 0
while True:
curr = sons.pop(0)
num_of_checked += 1
if curr == self.end:
print('checked: ', num_of_checked)
return counter
else: # check sons
next_lvl += curr.get_horse_jumps(self.dimension, self.dimension)
# sons <- next_lvl (optimize?)
# next_lvl <- []
if sons == []:
counter += 1
if optimize:
sons = self.optimize_list(next_lvl, self.end)
else:
sons = next_lvl
next_lvl = []
optimize = True
f = Find_route(20, Tile(7,0), Tile(1,11))
print(f.find_shortest_path(optimize))
print(f.find_shortest_path())
EDIT
I added another optimization level - optimize list at any insertion of new candidate tiles, and it seems to work like a charm, for some cases:
if optimize == 2:
if sons == []:
#counter += 1
sons = self.optimize_list(next_lvl, self.end)
else:
sons = self.optimize_list(sons + next_lvl, self.end)
else:
if sons == []:
counter += 1
if optimize == 1:
sons = self.optimize_list(next_lvl, self.end)
else:
sons = next_lvl
next_lvl = []
optimize = 2
f = Find_route(20, Tile(1,4), Tile(8,18)) # from 103761 to 8 ( optimal!!! )
print(f.find_shortest_path(optimize))
print(f.find_shortest_path())
I have a problem with calculating the number-of-jumps because I don't know when to increment the counter (maybe at each check?), but it seems to at least converge faster. Also, for other cases (e.g. f = Find_route(20, Tile(1,4), Tile(8,17))) it does not improve at all (not sure if it stops...)
Don't reinvent the wheel.
Build a graph with tiles as vertices. Connect tiles with an edge if a knight can get from one tile to another in one step.
Use a standard path finding algorithm. The breadth-first search looks like the best option in you're looking for a shortest path in an unweighted graph.
Someone told me that this problem should be easy to solve with a genetic algorithm.
I read some stuff about this topic (I hadn't heard about it before), and wrote (and copied) some code.
The results I get are close to optimum, but not close enough.
I'd like to have some help with it:
import time
import math
import random
def f(n, k):
return math.exp(k / n) - 1
def individual(length, min, max):
'Create a member of the population.'
return [random.randint(min, max) for x in range(length)]
def population(count, length, min, max):
"""
Create a number of individuals (i.e. a population).
count: the number of individuals in the population
length: the number of values per individual
min: the minimum possible value in an individual's list of values
max: the maximum possible value in an individual's list of values
"""
return [individual(length, min, max) for x in range(count)]
def fitness(individual, target):
def get_best_last_element(a, b, c):
s = math.pi - f(eu461.BASE, a) - f(eu461.BASE, b) - f(eu461.BASE, c)
s += 1
if s > 1:
return round(math.log(s) * eu461.BASE)
else:
return 0
def getg():
return get_best_last_element
"""
Determine the fitness of an individual. Higher is better.
individual: the individual to evaluate
target: the target number individuals are aiming for
"""
l = get_best_last_element(individual[0], individual[1], individual[2])
return abs(target - sum([f(eu461.BASE, k) for k in individual]) - f(eu461.BASE, l))
def grade(pop, target):
'Find average fitness for a population.'
return sum([fitness(x, target) for x in pop]) / (len(pop))
def evolve(pop, target, retain=0.2, random_select=0.05, mutate=0.01):
graded = [(fitness(x, target), x) for x in pop]
graded = [x[1] for x in sorted(graded)]
retain_length = int(len(graded) * retain)
parents = graded[:retain_length]
# randomly add other individuals to
# promote genetic diversity
for individual in graded[retain_length:]:
if random_select > random.random():
parents.append(individual)
# mutate some individuals
for individual in parents:
if mutate > random.random():
pos_to_mutate = random.randint(0, len(individual) - 1)
# this mutation is not ideal, because it
# restricts the range of possible values,
# but the function is unaware of the min/max
# values used to create the individuals,
individual[pos_to_mutate] = random.randint(min(individual), max(individual))
# crossover parents to create children
parents_length = len(parents)
desired_length = len(pop) - parents_length
children = []
while len(children) < desired_length:
male = random.randint(0, parents_length - 1)
female = random.randint(0, parents_length - 1)
if male != female:
male = parents[male]
female = parents[female]
half = len(male) // 2
if random.randint(0, 1):
child = male[:half] + female[half:]
else:
child = female[:half] + male[half:]
children.append(child)
parents.extend(children)
return parents
def get_best_last_element(a, b, c):
s = math.pi - f(eu461.BASE, a) - f(eu461.BASE, b) - f(eu461.BASE, c)
s += 1
if s > 0:
return round(math.log(s) * eu461.BASE)
else:
return 0
def eu461():
target = math.pi
p_count = 10000
i_length = 3
i_min = 0
i_max = round(eu461.BASE * math.log(math.pi + 1))
p = population(p_count, i_length, i_min, i_max)
fitness_history = [grade(p, target),]
for i in range(150):
p = evolve(p, target)
fitness_history.append(grade(p, target))
for datum in fitness_history:
pass #print (datum)
return p[0], get_best_last_element(p[0][0], p[0][1], p[0][2]), sum([f(eu461.BASE, k) for k in p[0]]) + f(eu461.BASE, get_best_last_element(p[0][0], p[0][1], p[0][2]))
eu461.BASE = 200
if __name__ == "__main__":
startTime = time.clock()
print (eu461())
elapsedTime = time.clock() - startTime
print ("Time spent in (", __name__, ") is: ", elapsedTime, " sec")