Return a move from alpha-beta - python

I'm trying to use the alpha-beta minimax pruning algorithm to return a valid move from my board. The algorithm returns the correct value, but I have no idea how I would return the move as well. In the case of this code, I would want to return the child in get_successor_states when the value of bestValue is more than the current alpha. I thought about returning two values at the end of the max and min like return bestValue, child but I have no idea how I would get that to work with the other calls
def alpha_beta_pruning(board, depth, alpha, beta, isMaximizingPlayer):
if depth == 0:
return evaluate_state(board)
if isMaximizingPlayer:
bestValue = -sys.maxint - 1
bestMove = None
for child in get_successor_states(board):
temp_board = copy.deepcopy(board)
temp_board.move_queen(child[0], child[1])
temp_board.shoot_arrow(child[2])
bestValue = max(bestValue, alpha_beta_pruning(temp_board, depth-1, alpha, beta, False))
del temp_board
alpha = max(alpha, bestValue)
if beta <= alpha:
break
return bestValue
else:
bestValue = sys.maxint
for child in get_successor_states(board):
temp_board = copy.deepcopy(board)
temp_board.move_queen(child[0], child[1])
temp_board.shoot_arrow(child[2])
bestValue = min(bestValue, alpha_beta_pruning(temp_board, depth-1, alpha, beta, True))
del temp_board
beta = min(beta, bestValue)
if beta <= alpha:
break
return bestValue

Related

Move ordering for chess engine

Part 3 of my python programming project. Find it here.
Since the last post, I've managed to get work out the transposition tables and started creating the move ordering function.
The move function starts by checking if there's a move in the opening books, if there isn't one then it executes the move ordering function and lastly if there was no move found, it calculates the best move in the position.
This is what I have so far:
def domove(depth):
try:
move = chess.polyglot.MemoryMappedReader("C:/Users/bruno/Desktop/chess/books/pecg_book.bin").weighted_choice(board).move()
move = chess.polyglot.MemoryMappedReader("C:/Users/bruno/Desktop/chess/books/human.bin").weighted_choice(board).move()
move = chess.polyglot.MemoryMappedReader("C:/Users/bruno/Desktop/chess/books/computer.bin").weighted_choice(board).move()
movehistory.append(move)
return move
except:
orderMoves(move)
bestMove = move
movehistory.append(bestMove)
return bestMove
finally:
bestMove = chess.Move.null()
bestValue = -9999
alpha = -10000
beta = 10000
for move in board.legal_moves:
make_move(move)
boardValue = -alphabeta(-beta, -alpha, depth-1)
if boardValue > bestValue:
bestValue = boardValue
bestMove = move
if( boardValue > alpha ):
alpha = boardValue
unmake_move()
movehistory.append(bestMove)
return bestMove
The orderMoves function checks for 3 different things in the current position:
negamax function - searches the transposition tables
Moves that lead to checkmate
Captures that win material
.
def orderMoves(board, bestValue, material, move):
try:
negamax(board)
bestMove = move
movehistory.append(bestMove)
return bestMove
except:
for move in board.legal_moves:
if board.is_checkmate():
bestValue
return bestValue
finally:
for move in board.legal_moves:
if move == board.is_capture():
if newmaterial >= material:
newmaterial = material
return bestValue
The negamax function works via storing and looking up previously stored hashes.
def negamax(node, depth, alpha, beta, score, bestValue):
alphaOrig = alpha
EXACT = score
LOWERBOUND = alpha
UPPERBOUND = beta
## Transposition Table Lookup; node is the lookup key for ttEntry
ttEntry = transpositionTableLookup(node)
if ttEntry.is_valid is True :
if ttEntry.depth >= depth:
if ttEntry.flag == EXACT :
return ttEntry.value
elif ttEntry.flag == LOWERBOUND:
alpha = max(alpha, ttEntry.value)
elif ttEntry.flag == UPPERBOUND:
beta = min(beta, ttEntry.value)
if alpha >= beta:
return ttEntry.value
elif depth == 0 or node == terminal_node():
return bestValue
childNodes = domove(node)
childNodes = orderMoves(childNodes)
bestValue = -99999
for child in childNodes:
bestValue = max(bestValue, -negamax(child, depth - 1, -beta, -alpha))
alpha = max(alpha, bestValue)
if alpha >= beta:
break
##Transposition Table Store; node is the lookup key for ttEntry
ttEntry.value = bestValue
if bestValue <= alphaOrig:
ttEntry.flag = UPPERBOUND
if bestValue >= beta:
ttEntry.flag = LOWERBOUND
else:
ttEntry.flag = EXACT
ttEntry.depth = depth
transpositionTableStore(node, ttEntry)
return bestValue
There's probably a better way of implementing this function, but this was the best I could manage.
After testing this for a few hours of running the code, the results were the same as when I didn't have move ordering. 7 out of the 24 test positions were correct.
What changes could I make to get a cleaner implementation and make it work properly?
Great question.
The Andoma Python chess engine uses this move ordering function in movegeneration.py, that I've also use for my Ramses Chess engine (Python) :
def get_ordered_moves(board: chess.Board) -> List[chess.Move]:
"""
Get legal moves.
Attempt to sort moves by best to worst.
Use piece values (and positional gains/losses) to weight captures.
"""
end_game = check_end_game(board)
def orderer(move):
return move_value(board, move, end_game)
in_order = sorted(
board.legal_moves, key=orderer, reverse=(board.turn == chess.WHITE)
)
return list(in_order)

Minimax algorithm in python using tic tac toe

I'm trying to make tic tac toe AI, which plays the game optimally by using minimax algorithm. I got it to work only to notice it does not make optimal moves and putting it against itself results always win for 'X' player (It should result in draw).
Here is my code for algorithm:
def getBestMove(state, player):
'''
Minimax Algorithm
'''
winner_loser , done = check_current_state(state)
if done == "Done" and winner_loser == 'O': # If AI won
return 1
elif done == "Done" and winner_loser == 'X': # If Human won
return -1
elif done == "Draw": # Draw condition
return 0
moves = []
empty_cells = []
for i in range(3):
for j in range(3):
if state[i][j] is ' ':
empty_cells.append(i*3 + (j+1))
for empty_cell in empty_cells:
move = {}
move['index'] = empty_cell
new_state = copy_game_state(state)
play_move(new_state, player, empty_cell)
if player == 'O': # If AI
result = getBestMove(new_state, 'X') # make more depth tree for human
move['score'] = result
else:
result = getBestMove(new_state, 'O') # make more depth tree for AI
move['score'] = result
moves.append(move)
# Find best move
best_move = None
if player == 'O': # If AI player
best = -infinity
for move in moves:
if move['score'] > best:
best = move['score']
best_move = move['index']
else:
best = infinity
for move in moves:
if move['score'] < best:
best = move['score']
best_move = move['index']
return best_move
What can I do here to fix it?
I think it is easier if you follow the standard minimax algorithm which you can find for example here. I also suggest adding alpha-beta pruning to make it a bit faster, even though it is not really necessary in Tic Tac Toe. Here is an example of a game I made long ago that you can use for inspiration, it is basicall taken from the linked Wikipedia page, with some minor tweaks like if beta <= alpha for the alpha-beta pruning:
move, evaluation = minimax(board, 8, -math.inf, math.inf, True)
def minimax(board, depth, alpha, beta, maximizing_player):
if depth == 0 or board.is_winner() or board.is_board_full():
return None, evaluate(board)
children = board.get_possible_moves(board)
best_move = children[0]
if maximizing_player:
max_eval = -math.inf
for child in children:
board_copy = copy.deepcopy(board)
board_copy.board[child[0]][child[1]].player = 'O'
current_eval = minimax(board_copy, depth - 1, alpha, beta, False)[1]
if current_eval > max_eval:
max_eval = current_eval
best_move = child
alpha = max(alpha, current_eval)
if beta <= alpha:
break
return best_move, max_eval
else:
min_eval = math.inf
for child in children:
board_copy = copy.deepcopy(board)
board_copy.board[child[0]][child[1]].player = 'X'
current_eval = minimax(board_copy, depth - 1, alpha, beta, True)[1]
if current_eval < min_eval:
min_eval = current_eval
best_move = child
beta = min(beta, current_eval)
if beta <= alpha:
break
return best_move, min_eval
def evaluate(board):
if board.is_winner('X'):
return -1
if board.is_winner('O'):
return 1
return 0
Note that it is important to make a deepcopy of the board (or an unmake move function after the recursive minimax call), otherwise you are changing the state of the original board and will get some strange behaviours.

Minimax Recursion Timeout

I am trying to write a minimax algorithm for a pacman game. I am having a problem with the recursion - sometimes my algorithm 'works' (it does not crash, but returns the wrong value still), and then sometimes it crashes and gives me a recursion error, saying max recursion depth exceeded. Can someone shed some light on what I am doing wrong here? Thank you!
def minimax(self, gamestate, depth, agent):
if depth == self.depth or gamestate.isWin() or gamestate.isLose():
return self.evaluationFunction(gamestate), Directions.STOP
best_move = None
if agent == 0: ## Pacman is max
best_val = float('-inf')
else: ## ghosties are min
best_val = float('inf')
actions = gamestate.getLegalActions(agent)
for action in actions:
next_agent = agent + 1 ## quit moving me! ## this goes here to set next_agent to be same as agent each iteration, because it also gets changed below
successor = gamestate.generateSuccessor(agent, action) ## generate successors gamestate (ie new board)
if next_agent == gamestate.getNumAgents():
next_agent = 0 ## Once we reach the last agent, set back to 0
depth += 1 ## increment depth
v = self.minimax(successor, depth, next_agent)
## here is where we set the max and min values based on the agent
if agent == 0: ## pacman - max
if v > best_val:
best_val = v
best_move = action
else: ## ghost - min
if v < best_val:
best_val = v
best_move = action
return best_move, best_val

How does these two methods have different time complexity in Python?

I am trying to solve the Leetcode word search problem .
I have two solutions which to me is identical although one of them returns in Time Limit Exceeded.
I read all resources around this and could not understand what is happening behind the scene.
Solution 1 with optimised time complexity:
class Solution(object):
def bfs(self, position, visited, board, word):
# print(position)
if not word:
return True
destination = [(-1,0), (1,0), (0,1), (0,-1)]
res = False
for x,y in destination:
new_postion_x, new_postion_y = position[0] + x, position[1] + y
if (0<= new_postion_x < len(board)) and (0 <= new_postion_y < len(board[0])) and (new_postion_x, new_postion_y) not in visited:
if board[new_postion_x][new_postion_y] == word[0]:
new_visited = visited.copy()
new_visited[(new_postion_x,new_postion_y)] = 1
res = res or self.bfs((new_postion_x, new_postion_y), new_visited, board, word[1:])
return res
def exist(self, board, word):
for i in xrange(len(board)):
for j in xrange(len(board[0])):
if board[i][j] == word[0]:
t = self.bfs((i,j), {(i,j): 1}, board, word[1:])
if t:
return True
return False
Solution two which returns TLE on execution for large input:
class Solution(object):
def bfs(self, position, visited, board, word):
print(position)
if not word:
return True
destination = [(-1,0), (1,0), (0,1), (0,-1)]
res = False
for x,y in destination:
new_postion_x, new_postion_y = position[0] + x, position[1] + y
if (0<= new_postion_x < len(board)) and (0 <= new_postion_y < len(board[0])) and (new_postion_x, new_postion_y) not in visited:
if board[new_postion_x][new_postion_y] == word[0]:
new_visited = visited.copy()
new_visited[(new_postion_x,new_postion_y)] = 1
t = self.bfs((new_postion_x, new_postion_y), new_visited, board, word[1:])
res = res or t
return res
def exist(self, board, word):
for i in xrange(len(board)):
for j in xrange(len(board[0])):
if board[i][j] == word[0]:
t = self.bfs((i,j), {(i,j): 1}, board, word[1:])
if t:
return True
return False
The key line is here:
res = res or self.bfs((new_postion_x, new_postion_y), new_visited, board, word[1:])
Once res becomes True, this line will never execute self.bfs() again because the expression after the or does not need to be evaluated. This is called short-circuiting.
Thus, once a solution is found you don't make any more recursive calls and quickly return the True all the way back to the top.
An equivalent way of writing this, which might clarify it, would be:
if not res:
res = self.bfs(...)
In the other solution:
t = self.bfs((new_postion_x, new_postion_y), new_visited, board, word[1:])
res = res or t
This will always call self.bfs() even when it's not needed anymore, with the net effect of always searching through the entire graph even if a solution has already been found.

Python - Alpha-Beta Purning for Minimax

I want to implement an agent for 3-Men's Morris game-which is very similar to tic-tac-toe game- and i want to use Minimax strategy with Alpha-Beta Pruning, here's my code in Python based on this post and this post on StackOverflow , but it doesn't work!! it gives a wrong solution,even when one of successors of current state is solution
def alpha_beta(state,alpha,beta,turn,depth):
if int(terminal_test(state,turn)) == int(MY_NUMBER):
return 1 #win
elif (int(terminal_test(state,turn))!=0) and (int(terminal_test(state,turn))!=int(MY_NUMBER)) :
return -1 #loose
else:
if int(depth) == 13:
return 0 #reached limit
moves = successors(state,turn,int(depth))
#valid moves for player based on rules
for move in moves:
state = make_move(state,move,turn)
current_eval = -alpha_beta(state, -beta, -alpha, 2-int(turn),int(depth)+1)
state = undo_move(state,move,turn)
if current_eval >= beta:
return beta
if current_eval > alpha:
alpha = current_eval
return alpha
def rootAlphaBeta(state,depth, turn):
best_move = None
max_eval = float('-infinity')
moves = successors(state,turn,int(depth))
alpha = float('infinity')
for move in moves:
state = make_move(state,move,turn)
alpha = -alpha_beta(state, float('-infinity'), alpha, 2-int(turn),int(depth)+1)
state = undo_move(state,move,turn)
if alpha > max_eval:
max_eval = alpha
best_move = move
#best_move which is selected here is not really the best move!
return best_move

Categories

Resources