I'm trying to make tic tac toe AI, which plays the game optimally by using minimax algorithm. I got it to work only to notice it does not make optimal moves and putting it against itself results always win for 'X' player (It should result in draw).
Here is my code for algorithm:
def getBestMove(state, player):
'''
Minimax Algorithm
'''
winner_loser , done = check_current_state(state)
if done == "Done" and winner_loser == 'O': # If AI won
return 1
elif done == "Done" and winner_loser == 'X': # If Human won
return -1
elif done == "Draw": # Draw condition
return 0
moves = []
empty_cells = []
for i in range(3):
for j in range(3):
if state[i][j] is ' ':
empty_cells.append(i*3 + (j+1))
for empty_cell in empty_cells:
move = {}
move['index'] = empty_cell
new_state = copy_game_state(state)
play_move(new_state, player, empty_cell)
if player == 'O': # If AI
result = getBestMove(new_state, 'X') # make more depth tree for human
move['score'] = result
else:
result = getBestMove(new_state, 'O') # make more depth tree for AI
move['score'] = result
moves.append(move)
# Find best move
best_move = None
if player == 'O': # If AI player
best = -infinity
for move in moves:
if move['score'] > best:
best = move['score']
best_move = move['index']
else:
best = infinity
for move in moves:
if move['score'] < best:
best = move['score']
best_move = move['index']
return best_move
What can I do here to fix it?
I think it is easier if you follow the standard minimax algorithm which you can find for example here. I also suggest adding alpha-beta pruning to make it a bit faster, even though it is not really necessary in Tic Tac Toe. Here is an example of a game I made long ago that you can use for inspiration, it is basicall taken from the linked Wikipedia page, with some minor tweaks like if beta <= alpha for the alpha-beta pruning:
move, evaluation = minimax(board, 8, -math.inf, math.inf, True)
def minimax(board, depth, alpha, beta, maximizing_player):
if depth == 0 or board.is_winner() or board.is_board_full():
return None, evaluate(board)
children = board.get_possible_moves(board)
best_move = children[0]
if maximizing_player:
max_eval = -math.inf
for child in children:
board_copy = copy.deepcopy(board)
board_copy.board[child[0]][child[1]].player = 'O'
current_eval = minimax(board_copy, depth - 1, alpha, beta, False)[1]
if current_eval > max_eval:
max_eval = current_eval
best_move = child
alpha = max(alpha, current_eval)
if beta <= alpha:
break
return best_move, max_eval
else:
min_eval = math.inf
for child in children:
board_copy = copy.deepcopy(board)
board_copy.board[child[0]][child[1]].player = 'X'
current_eval = minimax(board_copy, depth - 1, alpha, beta, True)[1]
if current_eval < min_eval:
min_eval = current_eval
best_move = child
beta = min(beta, current_eval)
if beta <= alpha:
break
return best_move, min_eval
def evaluate(board):
if board.is_winner('X'):
return -1
if board.is_winner('O'):
return 1
return 0
Note that it is important to make a deepcopy of the board (or an unmake move function after the recursive minimax call), otherwise you are changing the state of the original board and will get some strange behaviours.
Related
Part 3 of my python programming project. Find it here.
Since the last post, I've managed to get work out the transposition tables and started creating the move ordering function.
The move function starts by checking if there's a move in the opening books, if there isn't one then it executes the move ordering function and lastly if there was no move found, it calculates the best move in the position.
This is what I have so far:
def domove(depth):
try:
move = chess.polyglot.MemoryMappedReader("C:/Users/bruno/Desktop/chess/books/pecg_book.bin").weighted_choice(board).move()
move = chess.polyglot.MemoryMappedReader("C:/Users/bruno/Desktop/chess/books/human.bin").weighted_choice(board).move()
move = chess.polyglot.MemoryMappedReader("C:/Users/bruno/Desktop/chess/books/computer.bin").weighted_choice(board).move()
movehistory.append(move)
return move
except:
orderMoves(move)
bestMove = move
movehistory.append(bestMove)
return bestMove
finally:
bestMove = chess.Move.null()
bestValue = -9999
alpha = -10000
beta = 10000
for move in board.legal_moves:
make_move(move)
boardValue = -alphabeta(-beta, -alpha, depth-1)
if boardValue > bestValue:
bestValue = boardValue
bestMove = move
if( boardValue > alpha ):
alpha = boardValue
unmake_move()
movehistory.append(bestMove)
return bestMove
The orderMoves function checks for 3 different things in the current position:
negamax function - searches the transposition tables
Moves that lead to checkmate
Captures that win material
.
def orderMoves(board, bestValue, material, move):
try:
negamax(board)
bestMove = move
movehistory.append(bestMove)
return bestMove
except:
for move in board.legal_moves:
if board.is_checkmate():
bestValue
return bestValue
finally:
for move in board.legal_moves:
if move == board.is_capture():
if newmaterial >= material:
newmaterial = material
return bestValue
The negamax function works via storing and looking up previously stored hashes.
def negamax(node, depth, alpha, beta, score, bestValue):
alphaOrig = alpha
EXACT = score
LOWERBOUND = alpha
UPPERBOUND = beta
## Transposition Table Lookup; node is the lookup key for ttEntry
ttEntry = transpositionTableLookup(node)
if ttEntry.is_valid is True :
if ttEntry.depth >= depth:
if ttEntry.flag == EXACT :
return ttEntry.value
elif ttEntry.flag == LOWERBOUND:
alpha = max(alpha, ttEntry.value)
elif ttEntry.flag == UPPERBOUND:
beta = min(beta, ttEntry.value)
if alpha >= beta:
return ttEntry.value
elif depth == 0 or node == terminal_node():
return bestValue
childNodes = domove(node)
childNodes = orderMoves(childNodes)
bestValue = -99999
for child in childNodes:
bestValue = max(bestValue, -negamax(child, depth - 1, -beta, -alpha))
alpha = max(alpha, bestValue)
if alpha >= beta:
break
##Transposition Table Store; node is the lookup key for ttEntry
ttEntry.value = bestValue
if bestValue <= alphaOrig:
ttEntry.flag = UPPERBOUND
if bestValue >= beta:
ttEntry.flag = LOWERBOUND
else:
ttEntry.flag = EXACT
ttEntry.depth = depth
transpositionTableStore(node, ttEntry)
return bestValue
There's probably a better way of implementing this function, but this was the best I could manage.
After testing this for a few hours of running the code, the results were the same as when I didn't have move ordering. 7 out of the 24 test positions were correct.
What changes could I make to get a cleaner implementation and make it work properly?
Great question.
The Andoma Python chess engine uses this move ordering function in movegeneration.py, that I've also use for my Ramses Chess engine (Python) :
def get_ordered_moves(board: chess.Board) -> List[chess.Move]:
"""
Get legal moves.
Attempt to sort moves by best to worst.
Use piece values (and positional gains/losses) to weight captures.
"""
end_game = check_end_game(board)
def orderer(move):
return move_value(board, move, end_game)
in_order = sorted(
board.legal_moves, key=orderer, reverse=(board.turn == chess.WHITE)
)
return list(in_order)
I've been working on a MCTS AI for a couple days now. I tried to implement it on Tic-Tac-Toe, the least complex game I could think of, but for some reason, my AI keeps making bad decisions. I've tried change the values of UCB1's exploration constant, the number of iterations per search, and even the points awarded to winning, losing, and getting to tie the game (trying to make a tie more rewarding, as this AI only plays second, and try to get a draw, win otherwise). As of now, the code looks like this:
import random
import math
import copy
class tree:
def __init__(self, board):
self.board = board
self.visits = 0
self.score = 0
self.children = []
class mcts:
def search(self, mx, player,):
root = tree(mx)
for i in range(1200):
leaf = mcts.expand(self, root.board, player, root)
result = mcts.rollout(self, leaf)
mcts.backpropagate(self, leaf, root, result)
return mcts.best_child(self, root).board
def expand(self, mx, player, root):
plays = mcts.generate_states(self, mx, player) #all possible plays
if root.visits == 0:
for j in plays:
root.children.append(j) #create child_nodes in case they havent been created yet
for j in root.children:
if j.visits == 0:
return j #first iterations of the loop
for j in plays:
if mcts.final(self, j.board, player):
return j
return mcts.best_child(self, root) #choose the one with most potential
def rollout(self, leaf):
mx = leaf.board
aux = 1
while mcts.final(self, mx, "O") != True:
if aux == 1: # "X" playing
possible_states = []
possible_nodes = mcts.generate_states(self, mx, "X")
for i in possible_nodes:
possible_states.append(i.board)
if len(possible_states) == 1: mx = possible_states[0]
else:
choice = random.randrange(0, len(possible_states) - 1)
mx = possible_states[choice]
if mcts.final(self, mx, "X"): #The play by "X" finished the game
break
elif aux == 0: # "O" playing
possible_states = []
possible_nodes = mcts.generate_states(self, mx, "O")
for i in possible_nodes:
possible_states.append(i.board)
if len(possible_states) == 1: mx = possible_states[0]
else:
choice = random.randrange(0, len(possible_states) - 1)
mx = possible_states[choice]
aux += 1
aux = aux%2
if mcts.final(self, mx, "X"):
for i in range(len(mx)):
for k in range(len(mx[i])):
if mx[i][k] == "-":
return -1 #loss
return 0 #tie
elif mcts.final(self, mx, "O"):
for i in range(len(mx)):
for k in range(len(mx[i])):
if mx[i][k] == "-":
return 1 #win
def backpropagate(self, leaf, root, result): # updating our prospects stats
leaf.score += result
leaf.visits += 1
root.visits += 1
def generate_states(self, mx, player):
possible_states = [] #generate child_nodes
for i in range(len(mx)):
for k in range(len(mx[i])):
if mx[i][k] == "-":
option = copy.deepcopy(mx)
option[i][k] = player
child_node = tree(option)
possible_states.append(child_node)
return possible_states
def final(self,mx, player): #check if game is won
possible_draw = True
win = False
for i in mx: #lines
if i == [player, player, player]:
win = True
possible_draw = False
if mx[0][0] == player: #diagonals
if mx[1][1] == player:
if mx[2][2] == player:
win = True
possible_draw = False
if mx[0][2] == player:
if mx[1][1] == player:
if mx[2][0] == player:
win = True
possible_draw = False
for i in range(3): #columns
if mx[0][i] == player and mx[1][i] == player and mx[2][i] == player:
win = True
possible_draw = False
for i in range(3):
for k in range(3):
if mx[i][k] == "-":
possible_draw = False
if possible_draw:
return possible_draw
return win
def calculate_score(self, score, child_visits, parent_visits, c): #UCB1
return score / child_visits + c * math.sqrt(math.log(parent_visits) / child_visits)
def best_child(self, root): #returns most promising node
treshold = -1*10**6
for j in root.children:
potential = mcts.calculate_score(self, j.score, j.visits, root.visits, 2)
if potential > treshold:
win_choice = j
treshold = potential
return win_choice
#todo the AI takes too long for each play, optimize that by finding the optimal approach in the rollout phase
First off, the purpose of this AI is to return an altered matrix, with the best play he could make in that circunstance. I find myself questioning if the MCTS algorithm is the reason behind all these broken plays, due to some possible mistakes in its implementation. With that said, in my eyes, the code does the following:
Check if the root already has its children, in case it has, choose the most promising.
Rollout a random simulation and save the result.
Update the leaf's score, its number of visits and the root's number of visits.
Repeat for 1200 iterations, in my example
Return the best move (matrix, child_node) possible.
Why is it not working? Why is it choosing bad plays instead of the optimal one? Is the algorithm wrongly implemented?
My mistake was choosing the node with the most visits in the expansion phase, when it should have been the one with the most potential according to the UCB1 formula. I also had some errors when it came to implementing some if clauses, as all the losses weren't being counted.
Can anyone tell me why my code prints 1 and not 8? It seems to not be going through very single state. Why is that?
using the minimax algorithm find the best possible move to make based on a game state, a possible tic tac toe board. Usually, it would branch off into a large tree of game states, each new branch called when the game doesn't end on an ending state, repeated, then finding the best possible move by recursively going down the tree finding the best moves for each player.
I was following the "tutorial" at http://giocc.com/concise-implementation-of-minimax-through-higher-order-functions.html.
My code:
#!/usr/bin/env python3
'''Minimax finds the best possible moves by applying a set of rules.
A win = 1, tie = 0, loss = -1 (for us). Assuming that each player chooses the best move
(we choose 1 if possible, opponent chooses -1). Starting at the top of a 'game tree',
generate the possible moves we can make. If It reaches a terminal state, stop. Otherwise keep searching in depth.
We find max.
'''
#[0,1,2,3,4,5,6,7,8]
class GameState: #a game state is a certain state of the board
#http://stackoverflow.com/questions/1537202/variables-inside-and-outside-of-a-class-init-function
x_went_first = True
def __init__(self,board):
self.board = board
self.winning_combos = [[0,1,2],[3,4,5],[6,7,8],[0,3,6],[1,4,7],[2,5,8],[0,4,8],[2,4,8]]
def is_gameover(self):
if self.board.count('X') + self.board.count('O') == 9:
return True
for combo in self.winning_combos:
if (self.board[combo[0]] == 'X' and self.board[combo[1]] == 'X' and self.board[combo[2]] == 'X') or (self.board[combo[0]] == 'O' and self.board[combo[1]] == 'O' and self.board[combo[2]] == 'O'):
return True
return False
def get_possible_moves(self):
squares = []
for square in self.board:
if square != 'X' and square != 'O':
squares.append(int(square))
return squares
def get_next_state(self, move):
copy = self.board
num_of_x = copy.count('X')
num_of_o = copy.count('O')
#x starts, o's turn 1 > 0 o's turn
#o starts, x's turn 1 < 0 x's turn
#x starts, x's turn 1 > 1
#o starts, o's turn 1 < 1
if (self.x_went_first and num_of_x > num_of_o) or (self.x_went_first is not True and num_of_o == num_of_x):
copy[move] = 'O'
else:
copy[move] = 'X'
return GameState(copy)
def evals(game_state):
for combo in [[0,1,2],[3,4,5],[6,7,8],[0,3,6],[1,4,7],[2,5,8],[0,4,8],[2,4,8]]:
if game_state.board[0] == 'X' and game_state.board[1] == 'X' and game_state.board[2] == 'X':
return 1
elif game_state.board[0] == 'O' and game_state.board[1] == 'O' and game_state.board[2] == 'O':
return -1
else:
return 0
def min_play(game_state):
if game_state.is_gameover():
return evals(game_state)
moves = game_state.get_possible_moves()
best_move = moves[0]
best_score = 2 #not possible, best score is -1
for move in moves:
clone = game_state.get_next_state(move)
score = max_play(clone)
if score < best_score:
best_move = move
best_score = score
return best_score
def max_play(game_state):
if game_state.is_gameover():
return evals(game_state)
moves = game_state.get_possible_moves()
best_score = -2 #not possible, best score is 1
for move in moves:
clone = game_state.get_next_state(move)
score = min_play(clone)
if score > best_score:
best_move = move
best_score = score
return best_score
def minimax(game_state):
moves = game_state.get_possible_moves()
best_move = moves[0]
best_score = -2
for move in moves:
clone = game_state.get_next_state(move)
score = min_play(clone)
if score > best_score:
best_move = move
best_score = score
return best_move
game = GameState(['X',1,2,
3,'O',5,
6,7,8])
print(minimax(game))
My evals was always returning 0, and one of the winning combinations was messed up. New evals:
def evals:
for combo in [[0,1,2],[3,4,5],[6,7,8],[0,3,6],[1,4,7],[2,5,8],[0,4,8],[2,4,6]]:
if game_state.board[0] == 'X' and game_state.board[1] == 'X' and game_state.board[2] == 'X':
return 1
elif game_state.board[0] == 'O' and game_state.board[1] == 'O' and game_state.board[2] == 'O':
return -1
return 0
I also modified it so the index isn't in every empty slot. View the full code at https://github.com/retep-mathwizard/pyai/blob/master/minimax_ttt
I'm trying to use the alpha-beta minimax pruning algorithm to return a valid move from my board. The algorithm returns the correct value, but I have no idea how I would return the move as well. In the case of this code, I would want to return the child in get_successor_states when the value of bestValue is more than the current alpha. I thought about returning two values at the end of the max and min like return bestValue, child but I have no idea how I would get that to work with the other calls
def alpha_beta_pruning(board, depth, alpha, beta, isMaximizingPlayer):
if depth == 0:
return evaluate_state(board)
if isMaximizingPlayer:
bestValue = -sys.maxint - 1
bestMove = None
for child in get_successor_states(board):
temp_board = copy.deepcopy(board)
temp_board.move_queen(child[0], child[1])
temp_board.shoot_arrow(child[2])
bestValue = max(bestValue, alpha_beta_pruning(temp_board, depth-1, alpha, beta, False))
del temp_board
alpha = max(alpha, bestValue)
if beta <= alpha:
break
return bestValue
else:
bestValue = sys.maxint
for child in get_successor_states(board):
temp_board = copy.deepcopy(board)
temp_board.move_queen(child[0], child[1])
temp_board.shoot_arrow(child[2])
bestValue = min(bestValue, alpha_beta_pruning(temp_board, depth-1, alpha, beta, True))
del temp_board
beta = min(beta, bestValue)
if beta <= alpha:
break
return bestValue
I want to implement an agent for 3-Men's Morris game-which is very similar to tic-tac-toe game- and i want to use Minimax strategy with Alpha-Beta Pruning, here's my code in Python based on this post and this post on StackOverflow , but it doesn't work!! it gives a wrong solution,even when one of successors of current state is solution
def alpha_beta(state,alpha,beta,turn,depth):
if int(terminal_test(state,turn)) == int(MY_NUMBER):
return 1 #win
elif (int(terminal_test(state,turn))!=0) and (int(terminal_test(state,turn))!=int(MY_NUMBER)) :
return -1 #loose
else:
if int(depth) == 13:
return 0 #reached limit
moves = successors(state,turn,int(depth))
#valid moves for player based on rules
for move in moves:
state = make_move(state,move,turn)
current_eval = -alpha_beta(state, -beta, -alpha, 2-int(turn),int(depth)+1)
state = undo_move(state,move,turn)
if current_eval >= beta:
return beta
if current_eval > alpha:
alpha = current_eval
return alpha
def rootAlphaBeta(state,depth, turn):
best_move = None
max_eval = float('-infinity')
moves = successors(state,turn,int(depth))
alpha = float('infinity')
for move in moves:
state = make_move(state,move,turn)
alpha = -alpha_beta(state, float('-infinity'), alpha, 2-int(turn),int(depth)+1)
state = undo_move(state,move,turn)
if alpha > max_eval:
max_eval = alpha
best_move = move
#best_move which is selected here is not really the best move!
return best_move