Minimax Recursion Timeout - python

I am trying to write a minimax algorithm for a pacman game. I am having a problem with the recursion - sometimes my algorithm 'works' (it does not crash, but returns the wrong value still), and then sometimes it crashes and gives me a recursion error, saying max recursion depth exceeded. Can someone shed some light on what I am doing wrong here? Thank you!
def minimax(self, gamestate, depth, agent):
if depth == self.depth or gamestate.isWin() or gamestate.isLose():
return self.evaluationFunction(gamestate), Directions.STOP
best_move = None
if agent == 0: ## Pacman is max
best_val = float('-inf')
else: ## ghosties are min
best_val = float('inf')
actions = gamestate.getLegalActions(agent)
for action in actions:
next_agent = agent + 1 ## quit moving me! ## this goes here to set next_agent to be same as agent each iteration, because it also gets changed below
successor = gamestate.generateSuccessor(agent, action) ## generate successors gamestate (ie new board)
if next_agent == gamestate.getNumAgents():
next_agent = 0 ## Once we reach the last agent, set back to 0
depth += 1 ## increment depth
v = self.minimax(successor, depth, next_agent)
## here is where we set the max and min values based on the agent
if agent == 0: ## pacman - max
if v > best_val:
best_val = v
best_move = action
else: ## ghost - min
if v < best_val:
best_val = v
best_move = action
return best_move, best_val

Related

Q Learning Average Return Is Flutuating Instead of Increasing

So I have created a Q learning network on TensorFlow to solve a rubrix cube. I have the problem that my average return is ossilating instead of increasing to a positive value. A positive value means that the AI is correctly predicting the next step. What could be the reason for this?
Here is Step Code I'm running
def step():
if self._episode_ended: return self.reset()
last_action = self.action_chain.pop() # Action chain not matching
inverse_action = self.cube.get_inverse_action(last_action)
last_state = self.observation_chain.pop()
# Performs the rubric cubes action
# print(self.cube.to_matrix())
self.cube.action(inverse_action)
for index in range(0, len(self.cube.to_matrix())): # For some reasons the values are not correct sometimes
a = self.cube.to_matrix()[index]
# b = self.observation_chain[-1][index]
b = last_state[index]
if a != b: raise ValueError("Inverse action is not going to correct state.")
done = self.cube.is_solved()
if done: self._episode_ended = True
reward = 0
# Default Rewards and Punishments
reward = (20 if action == inverse_action else -20)
# print("ACTION " + str(action))
# print("INVERSE_ACTION " + str(inverse_action))
# print("RIGHT ACTION " + str(action == inverse_action))
# Rewards the AI if it finds a way to get closer towards a future state
# counter = 0
# for index in range(len(self.observation_chain), 0):
# self.observation_chain[index]
# is_equal = np.array_equal(self.observation_chain[index], self.cube.to_matrix())
# if is_equal: reward = counter * 20
# counter+=1
for _ in range(0, counter):
self.observation_chain.pop()
self.action_chain.pop()
self._state = self.cube.to_matrix()
return ts.termination(self._state, reward=reward)

Move ordering for chess engine

Part 3 of my python programming project. Find it here.
Since the last post, I've managed to get work out the transposition tables and started creating the move ordering function.
The move function starts by checking if there's a move in the opening books, if there isn't one then it executes the move ordering function and lastly if there was no move found, it calculates the best move in the position.
This is what I have so far:
def domove(depth):
try:
move = chess.polyglot.MemoryMappedReader("C:/Users/bruno/Desktop/chess/books/pecg_book.bin").weighted_choice(board).move()
move = chess.polyglot.MemoryMappedReader("C:/Users/bruno/Desktop/chess/books/human.bin").weighted_choice(board).move()
move = chess.polyglot.MemoryMappedReader("C:/Users/bruno/Desktop/chess/books/computer.bin").weighted_choice(board).move()
movehistory.append(move)
return move
except:
orderMoves(move)
bestMove = move
movehistory.append(bestMove)
return bestMove
finally:
bestMove = chess.Move.null()
bestValue = -9999
alpha = -10000
beta = 10000
for move in board.legal_moves:
make_move(move)
boardValue = -alphabeta(-beta, -alpha, depth-1)
if boardValue > bestValue:
bestValue = boardValue
bestMove = move
if( boardValue > alpha ):
alpha = boardValue
unmake_move()
movehistory.append(bestMove)
return bestMove
The orderMoves function checks for 3 different things in the current position:
negamax function - searches the transposition tables
Moves that lead to checkmate
Captures that win material
.
def orderMoves(board, bestValue, material, move):
try:
negamax(board)
bestMove = move
movehistory.append(bestMove)
return bestMove
except:
for move in board.legal_moves:
if board.is_checkmate():
bestValue
return bestValue
finally:
for move in board.legal_moves:
if move == board.is_capture():
if newmaterial >= material:
newmaterial = material
return bestValue
The negamax function works via storing and looking up previously stored hashes.
def negamax(node, depth, alpha, beta, score, bestValue):
alphaOrig = alpha
EXACT = score
LOWERBOUND = alpha
UPPERBOUND = beta
## Transposition Table Lookup; node is the lookup key for ttEntry
ttEntry = transpositionTableLookup(node)
if ttEntry.is_valid is True :
if ttEntry.depth >= depth:
if ttEntry.flag == EXACT :
return ttEntry.value
elif ttEntry.flag == LOWERBOUND:
alpha = max(alpha, ttEntry.value)
elif ttEntry.flag == UPPERBOUND:
beta = min(beta, ttEntry.value)
if alpha >= beta:
return ttEntry.value
elif depth == 0 or node == terminal_node():
return bestValue
childNodes = domove(node)
childNodes = orderMoves(childNodes)
bestValue = -99999
for child in childNodes:
bestValue = max(bestValue, -negamax(child, depth - 1, -beta, -alpha))
alpha = max(alpha, bestValue)
if alpha >= beta:
break
##Transposition Table Store; node is the lookup key for ttEntry
ttEntry.value = bestValue
if bestValue <= alphaOrig:
ttEntry.flag = UPPERBOUND
if bestValue >= beta:
ttEntry.flag = LOWERBOUND
else:
ttEntry.flag = EXACT
ttEntry.depth = depth
transpositionTableStore(node, ttEntry)
return bestValue
There's probably a better way of implementing this function, but this was the best I could manage.
After testing this for a few hours of running the code, the results were the same as when I didn't have move ordering. 7 out of the 24 test positions were correct.
What changes could I make to get a cleaner implementation and make it work properly?
Great question.
The Andoma Python chess engine uses this move ordering function in movegeneration.py, that I've also use for my Ramses Chess engine (Python) :
def get_ordered_moves(board: chess.Board) -> List[chess.Move]:
"""
Get legal moves.
Attempt to sort moves by best to worst.
Use piece values (and positional gains/losses) to weight captures.
"""
end_game = check_end_game(board)
def orderer(move):
return move_value(board, move, end_game)
in_order = sorted(
board.legal_moves, key=orderer, reverse=(board.turn == chess.WHITE)
)
return list(in_order)

Minimax algorithm in python using tic tac toe

I'm trying to make tic tac toe AI, which plays the game optimally by using minimax algorithm. I got it to work only to notice it does not make optimal moves and putting it against itself results always win for 'X' player (It should result in draw).
Here is my code for algorithm:
def getBestMove(state, player):
'''
Minimax Algorithm
'''
winner_loser , done = check_current_state(state)
if done == "Done" and winner_loser == 'O': # If AI won
return 1
elif done == "Done" and winner_loser == 'X': # If Human won
return -1
elif done == "Draw": # Draw condition
return 0
moves = []
empty_cells = []
for i in range(3):
for j in range(3):
if state[i][j] is ' ':
empty_cells.append(i*3 + (j+1))
for empty_cell in empty_cells:
move = {}
move['index'] = empty_cell
new_state = copy_game_state(state)
play_move(new_state, player, empty_cell)
if player == 'O': # If AI
result = getBestMove(new_state, 'X') # make more depth tree for human
move['score'] = result
else:
result = getBestMove(new_state, 'O') # make more depth tree for AI
move['score'] = result
moves.append(move)
# Find best move
best_move = None
if player == 'O': # If AI player
best = -infinity
for move in moves:
if move['score'] > best:
best = move['score']
best_move = move['index']
else:
best = infinity
for move in moves:
if move['score'] < best:
best = move['score']
best_move = move['index']
return best_move
What can I do here to fix it?
I think it is easier if you follow the standard minimax algorithm which you can find for example here. I also suggest adding alpha-beta pruning to make it a bit faster, even though it is not really necessary in Tic Tac Toe. Here is an example of a game I made long ago that you can use for inspiration, it is basicall taken from the linked Wikipedia page, with some minor tweaks like if beta <= alpha for the alpha-beta pruning:
move, evaluation = minimax(board, 8, -math.inf, math.inf, True)
def minimax(board, depth, alpha, beta, maximizing_player):
if depth == 0 or board.is_winner() or board.is_board_full():
return None, evaluate(board)
children = board.get_possible_moves(board)
best_move = children[0]
if maximizing_player:
max_eval = -math.inf
for child in children:
board_copy = copy.deepcopy(board)
board_copy.board[child[0]][child[1]].player = 'O'
current_eval = minimax(board_copy, depth - 1, alpha, beta, False)[1]
if current_eval > max_eval:
max_eval = current_eval
best_move = child
alpha = max(alpha, current_eval)
if beta <= alpha:
break
return best_move, max_eval
else:
min_eval = math.inf
for child in children:
board_copy = copy.deepcopy(board)
board_copy.board[child[0]][child[1]].player = 'X'
current_eval = minimax(board_copy, depth - 1, alpha, beta, True)[1]
if current_eval < min_eval:
min_eval = current_eval
best_move = child
beta = min(beta, current_eval)
if beta <= alpha:
break
return best_move, min_eval
def evaluate(board):
if board.is_winner('X'):
return -1
if board.is_winner('O'):
return 1
return 0
Note that it is important to make a deepcopy of the board (or an unmake move function after the recursive minimax call), otherwise you are changing the state of the original board and will get some strange behaviours.

Monte Carlo Selection phase maximum recursion exceeded

I'm trying to implement monte carlo tree search for an ultimate tictactoe game (its like tictactoe but the board is larger) and am having trouble in the selection phase of the alogirthm. My search tree is made up of individual nodes which the first part of the algorithm is trying to select one of to expand. However, I keep on getting an error that the maximum recursion has been reached:
def select_node(node, board, state, identity):
""" Traverses the tree until the end criterion are met.
Args:
node: A tree node from which the search is traversing.
board: The game setup.
state: The state of the game.
identity: The player's identity, either 1 or 2.
Returns: A node from which the next stage of the search can proceed.
"""
if len(node.child_nodes) == 0 and len(node.untried_actions) > 0: # Node is root, choose it
return node
best_child = None
if board.current_player(state) == identity: # its our turn
best_child_utc = -inf
for move in node.child_nodes:
child_node = node.child_nodes[move]
if child_node.visits == 0:
best_child = child_node
break
child_UCB_score = (child_node.wins/child_node.visits) + 1.41 * sqrt(log(node.visits)/child_node.visits)
if child_UCB_score > best_child_utc:
best_child = child_node
best_child_utc = child_UCB_score
else: # its the opponents turn
best_child_utc = -inf
for move in node.child_nodes:
child_node = node.child_nodes[move]
if child_node.visits == 0:
best_child = child_node
break
child_UCB_score = 1 - ((child_node.wins/child_node.visits) + 1.41 * sqrt(log(node.visits)/child_node.visits))
if child_UCB_score > best_child_utc:
best_child = child_node
best_child_utc = child_UCB_score
if best_child is not None and len(best_child.child_nodes) > 0 and not board.is_ended(best_child.state):
return select_node(best_child, board, best_child.state, identity)
else:
return best_child
Is there any way to maybe transform my algorithm from being recursive to using some sort of loop so it doesn't hit the recursion limit? This is being played on 9x9 board so the maximum levels of recursion it could theoretically hit is 81.

Python - Alpha-Beta Purning for Minimax

I want to implement an agent for 3-Men's Morris game-which is very similar to tic-tac-toe game- and i want to use Minimax strategy with Alpha-Beta Pruning, here's my code in Python based on this post and this post on StackOverflow , but it doesn't work!! it gives a wrong solution,even when one of successors of current state is solution
def alpha_beta(state,alpha,beta,turn,depth):
if int(terminal_test(state,turn)) == int(MY_NUMBER):
return 1 #win
elif (int(terminal_test(state,turn))!=0) and (int(terminal_test(state,turn))!=int(MY_NUMBER)) :
return -1 #loose
else:
if int(depth) == 13:
return 0 #reached limit
moves = successors(state,turn,int(depth))
#valid moves for player based on rules
for move in moves:
state = make_move(state,move,turn)
current_eval = -alpha_beta(state, -beta, -alpha, 2-int(turn),int(depth)+1)
state = undo_move(state,move,turn)
if current_eval >= beta:
return beta
if current_eval > alpha:
alpha = current_eval
return alpha
def rootAlphaBeta(state,depth, turn):
best_move = None
max_eval = float('-infinity')
moves = successors(state,turn,int(depth))
alpha = float('infinity')
for move in moves:
state = make_move(state,move,turn)
alpha = -alpha_beta(state, float('-infinity'), alpha, 2-int(turn),int(depth)+1)
state = undo_move(state,move,turn)
if alpha > max_eval:
max_eval = alpha
best_move = move
#best_move which is selected here is not really the best move!
return best_move

Categories

Resources