Can anyone tell me why my code prints 1 and not 8? It seems to not be going through very single state. Why is that?
using the minimax algorithm find the best possible move to make based on a game state, a possible tic tac toe board. Usually, it would branch off into a large tree of game states, each new branch called when the game doesn't end on an ending state, repeated, then finding the best possible move by recursively going down the tree finding the best moves for each player.
I was following the "tutorial" at http://giocc.com/concise-implementation-of-minimax-through-higher-order-functions.html.
My code:
#!/usr/bin/env python3
'''Minimax finds the best possible moves by applying a set of rules.
A win = 1, tie = 0, loss = -1 (for us). Assuming that each player chooses the best move
(we choose 1 if possible, opponent chooses -1). Starting at the top of a 'game tree',
generate the possible moves we can make. If It reaches a terminal state, stop. Otherwise keep searching in depth.
We find max.
'''
#[0,1,2,3,4,5,6,7,8]
class GameState: #a game state is a certain state of the board
#http://stackoverflow.com/questions/1537202/variables-inside-and-outside-of-a-class-init-function
x_went_first = True
def __init__(self,board):
self.board = board
self.winning_combos = [[0,1,2],[3,4,5],[6,7,8],[0,3,6],[1,4,7],[2,5,8],[0,4,8],[2,4,8]]
def is_gameover(self):
if self.board.count('X') + self.board.count('O') == 9:
return True
for combo in self.winning_combos:
if (self.board[combo[0]] == 'X' and self.board[combo[1]] == 'X' and self.board[combo[2]] == 'X') or (self.board[combo[0]] == 'O' and self.board[combo[1]] == 'O' and self.board[combo[2]] == 'O'):
return True
return False
def get_possible_moves(self):
squares = []
for square in self.board:
if square != 'X' and square != 'O':
squares.append(int(square))
return squares
def get_next_state(self, move):
copy = self.board
num_of_x = copy.count('X')
num_of_o = copy.count('O')
#x starts, o's turn 1 > 0 o's turn
#o starts, x's turn 1 < 0 x's turn
#x starts, x's turn 1 > 1
#o starts, o's turn 1 < 1
if (self.x_went_first and num_of_x > num_of_o) or (self.x_went_first is not True and num_of_o == num_of_x):
copy[move] = 'O'
else:
copy[move] = 'X'
return GameState(copy)
def evals(game_state):
for combo in [[0,1,2],[3,4,5],[6,7,8],[0,3,6],[1,4,7],[2,5,8],[0,4,8],[2,4,8]]:
if game_state.board[0] == 'X' and game_state.board[1] == 'X' and game_state.board[2] == 'X':
return 1
elif game_state.board[0] == 'O' and game_state.board[1] == 'O' and game_state.board[2] == 'O':
return -1
else:
return 0
def min_play(game_state):
if game_state.is_gameover():
return evals(game_state)
moves = game_state.get_possible_moves()
best_move = moves[0]
best_score = 2 #not possible, best score is -1
for move in moves:
clone = game_state.get_next_state(move)
score = max_play(clone)
if score < best_score:
best_move = move
best_score = score
return best_score
def max_play(game_state):
if game_state.is_gameover():
return evals(game_state)
moves = game_state.get_possible_moves()
best_score = -2 #not possible, best score is 1
for move in moves:
clone = game_state.get_next_state(move)
score = min_play(clone)
if score > best_score:
best_move = move
best_score = score
return best_score
def minimax(game_state):
moves = game_state.get_possible_moves()
best_move = moves[0]
best_score = -2
for move in moves:
clone = game_state.get_next_state(move)
score = min_play(clone)
if score > best_score:
best_move = move
best_score = score
return best_move
game = GameState(['X',1,2,
3,'O',5,
6,7,8])
print(minimax(game))
My evals was always returning 0, and one of the winning combinations was messed up. New evals:
def evals:
for combo in [[0,1,2],[3,4,5],[6,7,8],[0,3,6],[1,4,7],[2,5,8],[0,4,8],[2,4,6]]:
if game_state.board[0] == 'X' and game_state.board[1] == 'X' and game_state.board[2] == 'X':
return 1
elif game_state.board[0] == 'O' and game_state.board[1] == 'O' and game_state.board[2] == 'O':
return -1
return 0
I also modified it so the index isn't in every empty slot. View the full code at https://github.com/retep-mathwizard/pyai/blob/master/minimax_ttt
Related
I am trying to use minimax algo to find a best move for a game.
The game is 2 players take turns to pick a number from a pile [0,1,2,,,8]
Winning Condition: player wins the game if the player contains three numbers and their sum is 14.
The initial game state is given by the argument like:
4 1 2 3 4
The first number indicate the total length and the following numbers are the numbers taken by player each turn. In this example the player A has [1,3] and player B has [2,4]
My problem is the algo can't give the correct answer:
Like when the input is 4 4 1 7 8 my program cannot gives the correct move: it takes 0 not 3 for next move.
import sys
import math
totalMoves = int(sys.argv[1])
pile = [0, 1, 2, 3, 4, 5, 6, 7, 8]
# who are u
you = []
opponent = []
# cards in hand
playerA = []
playerB = []
# get all the cards into hand
for i in range(totalMoves):
# save card into playerB
if i % 2 != 0:
playerB.append(int(sys.argv[i+2]))
# save card into playerA
if i % 2 == 0:
playerA.append(int(sys.argv[i+2]))
# remove sent card in list
pile.remove(int(sys.argv[i+2]))
# identify which player you are
if totalMoves % 2 == 0:
you = playerA
opponent = playerB
else:
you = playerB
opponent = playerA
def find3Numbers(player): # calcluate the card in hand is equal to 14
arr_size = len(player)
if (len(player) >= 3):
for i in range(0, arr_size-2):
for j in range(i + 1, arr_size-1):
for k in range(j + 1, arr_size):
if (player[i] + player[j] + player[k] == 14):
return True
else:
return False
def isNumberLeft(board):
if (len(board) == 0):
return False
else:
return True
def evaluate(player, oppo):
if (find3Numbers(player) == True):
return 10
elif (find3Numbers(oppo) == True):
return -10
else:
return 0
def minimax(board, player, oppo, depth, isMax):
score = evaluate(player, oppo)
if (score == 10):
return score
if (score == -10):
return score
if (isNumberLeft(board) == False):
return 0
if (isMax):
best = -math.inf
for card in board:
player.append(card)
board.remove(card)
best = max(best, minimax(board, player, oppo, depth+1, not isMax))
board.append(card)
player.remove(card)
return best
else:
best = +math.inf
for card in board:
oppo.append(card)
board.remove(card)
best = min(best, minimax(board, player, oppo, depth+1, not isMax))
board.append(card)
oppo.remove(card)
return best
def bestMove():
bestScore = -math. inf
bestNum = -math.inf
for card in pile:
you.append(card)
pile.remove(card)
score = minimax(pile, you, opponent, 0, False)
pile.append(card)
you.remove(card)
if (bestScore < score):
bestNum = card
bestScore = score
return bestNum
you.append(bestMove())
I've been working on a MCTS AI for a couple days now. I tried to implement it on Tic-Tac-Toe, the least complex game I could think of, but for some reason, my AI keeps making bad decisions. I've tried change the values of UCB1's exploration constant, the number of iterations per search, and even the points awarded to winning, losing, and getting to tie the game (trying to make a tie more rewarding, as this AI only plays second, and try to get a draw, win otherwise). As of now, the code looks like this:
import random
import math
import copy
class tree:
def __init__(self, board):
self.board = board
self.visits = 0
self.score = 0
self.children = []
class mcts:
def search(self, mx, player,):
root = tree(mx)
for i in range(1200):
leaf = mcts.expand(self, root.board, player, root)
result = mcts.rollout(self, leaf)
mcts.backpropagate(self, leaf, root, result)
return mcts.best_child(self, root).board
def expand(self, mx, player, root):
plays = mcts.generate_states(self, mx, player) #all possible plays
if root.visits == 0:
for j in plays:
root.children.append(j) #create child_nodes in case they havent been created yet
for j in root.children:
if j.visits == 0:
return j #first iterations of the loop
for j in plays:
if mcts.final(self, j.board, player):
return j
return mcts.best_child(self, root) #choose the one with most potential
def rollout(self, leaf):
mx = leaf.board
aux = 1
while mcts.final(self, mx, "O") != True:
if aux == 1: # "X" playing
possible_states = []
possible_nodes = mcts.generate_states(self, mx, "X")
for i in possible_nodes:
possible_states.append(i.board)
if len(possible_states) == 1: mx = possible_states[0]
else:
choice = random.randrange(0, len(possible_states) - 1)
mx = possible_states[choice]
if mcts.final(self, mx, "X"): #The play by "X" finished the game
break
elif aux == 0: # "O" playing
possible_states = []
possible_nodes = mcts.generate_states(self, mx, "O")
for i in possible_nodes:
possible_states.append(i.board)
if len(possible_states) == 1: mx = possible_states[0]
else:
choice = random.randrange(0, len(possible_states) - 1)
mx = possible_states[choice]
aux += 1
aux = aux%2
if mcts.final(self, mx, "X"):
for i in range(len(mx)):
for k in range(len(mx[i])):
if mx[i][k] == "-":
return -1 #loss
return 0 #tie
elif mcts.final(self, mx, "O"):
for i in range(len(mx)):
for k in range(len(mx[i])):
if mx[i][k] == "-":
return 1 #win
def backpropagate(self, leaf, root, result): # updating our prospects stats
leaf.score += result
leaf.visits += 1
root.visits += 1
def generate_states(self, mx, player):
possible_states = [] #generate child_nodes
for i in range(len(mx)):
for k in range(len(mx[i])):
if mx[i][k] == "-":
option = copy.deepcopy(mx)
option[i][k] = player
child_node = tree(option)
possible_states.append(child_node)
return possible_states
def final(self,mx, player): #check if game is won
possible_draw = True
win = False
for i in mx: #lines
if i == [player, player, player]:
win = True
possible_draw = False
if mx[0][0] == player: #diagonals
if mx[1][1] == player:
if mx[2][2] == player:
win = True
possible_draw = False
if mx[0][2] == player:
if mx[1][1] == player:
if mx[2][0] == player:
win = True
possible_draw = False
for i in range(3): #columns
if mx[0][i] == player and mx[1][i] == player and mx[2][i] == player:
win = True
possible_draw = False
for i in range(3):
for k in range(3):
if mx[i][k] == "-":
possible_draw = False
if possible_draw:
return possible_draw
return win
def calculate_score(self, score, child_visits, parent_visits, c): #UCB1
return score / child_visits + c * math.sqrt(math.log(parent_visits) / child_visits)
def best_child(self, root): #returns most promising node
treshold = -1*10**6
for j in root.children:
potential = mcts.calculate_score(self, j.score, j.visits, root.visits, 2)
if potential > treshold:
win_choice = j
treshold = potential
return win_choice
#todo the AI takes too long for each play, optimize that by finding the optimal approach in the rollout phase
First off, the purpose of this AI is to return an altered matrix, with the best play he could make in that circunstance. I find myself questioning if the MCTS algorithm is the reason behind all these broken plays, due to some possible mistakes in its implementation. With that said, in my eyes, the code does the following:
Check if the root already has its children, in case it has, choose the most promising.
Rollout a random simulation and save the result.
Update the leaf's score, its number of visits and the root's number of visits.
Repeat for 1200 iterations, in my example
Return the best move (matrix, child_node) possible.
Why is it not working? Why is it choosing bad plays instead of the optimal one? Is the algorithm wrongly implemented?
My mistake was choosing the node with the most visits in the expansion phase, when it should have been the one with the most potential according to the UCB1 formula. I also had some errors when it came to implementing some if clauses, as all the losses weren't being counted.
I have recently being try to make a tic tac toe game using the mini max algorithm. I first created a board, then two player. Afterwards, I changed one of the players into the algorithm. I tried using something similar to this javascript implementation. I am not getting an syntax error, just the algorithm is not working.
For example, take the following game sequence.
The algorithm starts the game and places an "X" at the top right of the board or in index[0].
I, the player place "O" in the top right of the board or in index[2].
The algorithm places an "X" at the top center of the board or in index [1].
I, the player place "O" in the middle center of the board or in index[4].
The algorithm places an "X" at the middle left of the board or in index [3].
The error is that rather than stopping the win, the algorithm is playing the next free position.
In the min-max algorithm, this is a minimizing agent (seeks the least score) and the maximizing agent (the higher score) (in this case the AI). Below is the code, can you help me find the issue? Or how should I go along? Because I have been trying for the last two days. Hopefully the explanation above made sense.
board = ["."] * 9
winning_comb = [[0,1,2],[3,4,5],[6,7,8],[0,3,6],[1,4,7],[2,5,8],[3,4,6],[0,4,8]]
game = True
def new_board():
print (board[0] + "|" + board[1] + "|" + board[2])
print (board[3] + "|" + board[4] + "|" + board[5])
print (board[6] + "|" + board[7] + "|" + board[8])
new_board()
def winning(comb):
global game
for l in range(len(winning_comb)):
a = winning_comb[l][0]
f = winning_comb[l][1]
v = winning_comb[l][2]
if comb[a] == comb[f] == comb[v] == "O" or "x" == comb[a] == comb[f] == comb[v]:
game = False
if comb[a] == "x":
return 1
else:
return -1
break
else:
game = True
def minmax(board,depth, ismax):
if winning(board) != None:
h = winning(board)
return h
else:
if ismax == True:
bestscore = float('-inf')
for k in range(len(board)):
if board[k] == ".":
board[k] = "x"
score = minmax(board,depth+1,False)
board[k] = '.'
bestscore = max(bestscore, score)
return bestscore
else:
bestscore = float('inf')
for k in range(len(board)):
if board[k] == ".":
board[k] = "O"
score = minmax(board,depth+1,True)
board[k] = '.'
bestscore = min(bestscore,score)
return bestscore
def player1() :
bestscore = float('-inf')
bestmove = 0
for k in range(len(board)):
if board[k] == ".":
board[k] = "x"
score = minmax(board, 0, False)
board[k] = "."
if score > bestscore:
bestscore = score
bestmove = k
board[bestmove] = "x"
new_board()
def player2():
number = int(input("Please enter your poistion?") )
board[number - 1 ] = "O"
new_board()
winning(board)
while game==True:
player1()
player2()
[3,4,6] shouldn't be a winning combination. It should be [2,4,6]
Some issues:
[3,4,6] should be [2,4,6]
Indentation of return bestscore is wrong in the second instance: currently it interrupts the loop.
If you backtrack after game = False is executed, that assignment should be undone. For that reason it is probably easier to not use that variable at all, and just call the function winning when needed. The main loop could then be:
while winning(board) is None:
player1()
if winning(board) is not None: # need to exit
break
player2()
bestscore will be (-)infinity when there is no more free cell in the grid, and the game really is a draw. In that case bestscore should be 0, so this draw is considered better than a loss, and worse than a win. So make sure winning does not return None in that case, but 0:
if not "." in comb:
return 0
Not an error, but it is a bit odd that some functions take the board as argument, and others not. Make this consistent, and always pass the board as argument (player1(board), player2(board) and winning(board)).
With these changes, the AI will play the best play, although the calculation for the first move takes quite some time. You could improve by applying alpha-beta pruning.
I have an excercise to do and I'm stuck. It's the board game Alak, not much known, that I have to code in python. I can link the execrcise with the rules so you can help me better. The code has the main part and the library with all the procedures and function.
from Library_alak import *
n = 0
while n < 1:
n = int(input('Saisir nombre de case strictement positif : '))
loop = True
player = 1
player2 = 2
removed = [-1]
board = newboard(n)
display(board, n)
while loop:
i = select(board, n, player, removed)
print(i)
board = put(board, player, i)
display(board, n)
capture(board, n, player, player2)
loop = True if again(board, n, player, removed) is True else False
if player == 1 and loop:
player, player2 = 2, 1
elif player == 2 and loop:
player, player2 = 1, 2
win(board, n)
print(win(board, n))
And here is the library:
def newboard(n):
board = ([0] * n)
return board
def display(board, n):
for i in range(n):
if board[i] == 1:
print('X', end=' ')
elif board[i] == 2:
print('O', end=' ')
else:
print(' . ', end=' ')
def capture(board, n, player, player2):
for place in range(n):
if place == player:
place_beginning = place
while board[place] != player:
place_end = place
if board[place + x] == player:
return board
else:
return board
def again(board, n, player, removed):
for p in board(0):
if p == 0:
if p not in removed:
return True
else:
return False
def possible(n, removed, player, i, board):
for p in range(n + 1):
if p == 1:
if board[p-1] == 0:
if p not in removed:
return True
else:
return False
def win(board, n):
piecesp1 = 0
piecesp2 = 0
for i in board(0):
if i == 1:
piecesp1 += 1
else:
piecesp2 += 1
if piecesp1 > piecesp2:
print('Victory : Player 1')
elif piecesp2 > piecesp1:
print('Victory : Player 2')
else:
return 'Equality'
def select(board, n, player, removed):
loop = True
while loop:
print('player', player)
i = int(input('Enter number of boxes : '))
loop = False if possible(n, removed, player, i, board)is True else True
return i
def put(board, player, i):
i -= 1
if board[i] == 0:
if player == 1:
board[i] = 1
return board
else:
board[i] = 2
return board
else:
put(board, player, i)
So my problems here are that I have few errors, the first one is that when I enter the number '1' when asked to enter a number of boxes ( which is the place to play on ) nothing happens. Then when entering any other number, either the error is : if board[place + x] == player:
NameError: name 'x' is not defined
or there seems to be a problem with the : if board[place + x] == player:
NameError: name 'x' is not defined
I would appreciate a lot if anyone could help me. I'm conscious that it might not be as detailed as it should be and that you maybe don't get it all but you can contact me for more.
Rules of the Alak game:
Black and white take turns placing stones on the line. Unlike Go, this placement is compulsory if a move is available; if no move is possible, the game is over.
No stone may be placed in a location occupied by another stone, or in a location where a stone of your own colour has just been removed. The latter condition keeps the game from entering a neverending loop of stone placement and capture, known in Go as ko.
If placing a stone causes one or two groups of enemy stones to no longer have any adjacent empty spaces--liberties, as in Go--then those stones are removed. As the above rule states, the opponent may not play in those locations on their following turn.
If placing a stone causes one or two groups of your own colour to no longer have any liberties, the stones are not suicided, but instead are safe and not removed from play.
You shouldn't use "player2" as a variable, there's an easier way, just use "player" which take the value 1 or 2 according to the player. You know, something like that : player = 1 if x%2==0 else 2
and x is just a increasing int from 0 until the end of the game.
I've been trying out to solve the monty hall problem in Python in order to advance in coding, which is why I tried to randomize everything. The thing is: I've been running into some trouble. As most of you probably know the monty problem is supposed to show that changing the door has a higher winrate (66%) than staying on the chosen door (33%). For some odd reason though my simulation shows a 33% winrate for both cases and I am not really sure why.
Here's the code:
from random import *
def doorPriceRandomizer():
door1 = randint(0,2) #If a door is defined 0, it has a price in it
door2 = randint(0,2) #If a door is defined either 1 or 2, it has a goat in it.
door3 = randint(0,2)
while door2 == door1:
door2 = randint(0,2)
while door3 == door2 or door3 == door1:
door3 = randint(0,2)
return door1,door2,door3 #This random placement generator seems to be working fine.
while True:
loopStart = 0
amountWin = 0
amountLose = 0
try:
loopEnd = int(input("How often would you like to run this simulation: "))
if loopEnd < 0:
raise ValueError
doorChangeUser = int(input("[0] = Do not change door; [1] = Change door: "))
if doorChangeUser not in range(0,2):
raise ValueError
except ValueError:
print("Invalid input. Try again.\n")
else:
while loopStart != loopEnd:
gameDoors = doorPriceRandomizer()
inputUser = randint(0,2)
if doorChangeUser == 0:
if gameDoors[inputUser] == 0:
amountWin += 1
loopStart += 1
else:
amountLose += 1
loopStart += 1
elif doorChangeUser == 1:
ChangeRandom = 0
while gameDoors[ChangeRandom] == gameDoors[inputUser]:
ChangeRandom = randint(0,2)
if gameDoors[ChangeRandom] == 0:
amountWin += 1
loopStart += 1
else:
amountLose += 1
loopStart += 1
print("Win amount: ",amountWin,"\tLose amount: ",amountLose)
What am I doing wrong? I really appreciate all help! Thanks in advance!
ChangeRandom = 0
while gameDoors[ChangeRandom] == gameDoors[inputUser]:
ChangeRandom = randint(0,2)
This doesn't do what you think it does. Instead of checking if the ChangeRandom door is the same as the inputUser door, this checks if the ChangeRandom door and the inputUser door have the same value -- that is to say they're either both winners or both losers.
That said, that's not even what you want to do. What you want to do is to find a door that's not the user's input that IS a loser door, then switch to the OTHER one that isn't the user's input. This could be implemented with minimal change to your code as:
other_wrong_door = next(c for c, v in enumerate(gameDoors) if v != 0 and c != inputUser)
new_door = next(c for c, _ in enumerate(gameDoors) if c != inputUser and c != other_wrong_door)
But honestly this merits a re-examining of your code's structure. Give me a few minutes to work something up, and I'll edit this answer to give you an idea of how I'd implement this.
import random
DOORS = [1, 0, 0]
def runonce(switch=False):
user_choice = random.choice(DOORS)
if user_choice == 1:
# immediate winner
if switch:
# if you won before and switch doors, you must lose now
return False
else:
new_doors = [0, 0] # remove the user-selected winner
new_doors = [0] # remove another loser
return bool(random.choice(new_doors))
# of course, this is always `0`, but
# sometimes it helps to show it. In production you
# wouldn't bother writing the extra lines and just return False
else:
if switch:
new_doors = [1, 0] # remove the user-selected loser
new_doors = [1] # remove another loser
return bool(random.choice(new_doors))
# as above: this is always True, but....
else:
return False # if you lost before and don't switch, well, you lost.
num_trials = int(input("How many trials?"))
no_switch_raw = [run_once(switch=False) for _ in range(num_trials)]
switch_raw = [run_once(switch=True) for _ in range(num_trials)]
no_switch_wins = sum(1 for r in no_switch_raw if r)
switch_wins = sum(1 for r in switch_raw if r)
no_switch_prob = no_switch_wins / num_trials * 100.0
switch_prob = switch_wins / num_trials * 100.0
print( " WINS LOSSES %\n"
f"SWITCH: {switch_wins:>4} {num_trials-switch_wins:>6} {switch_prob:.02f}\n"
f"NOSWITCH:{no_switch_wins:>4} {num_trials-no_switch_wins:>6} {no_switch_prob:.02f}")
You have gotten the mechanics of the problem wrong so you are getting the wrong result. I have rewritten the choice mechanics, but I am leaving the user input stuff to you so that you can continue to learn python. This is one of many ways to solve the problem, but hopefully it demonstrates some things to you.
def get_choices():
valid_choices = [0, 1, 2] # these are the values for a valid sample
shuffle(valid_choices) # now randomly shuffle that list
return valid_choices # return the shuffled list
def get_door(user_choice):
return user_choice.index(0)
def monty_sim(n, kind):
"""
:param n: number of runs in this simulation
:param kind: whether to change the door or not, 0 - don't change, 1 = change door
:return: (win_rate, 1 - win_rate)
"""
wins = 0
for i in range(0, n):
game_doors = get_choices()
user_choice = get_door(get_choices()) # use the same method and find user door choice
# so there are two branches.
# In both, a door with a goat (game_door = 1) is chosen, which reduce the result to
# a choice between two doors, rather than 3.
if kind == 0:
if user_choice == game_doors.index(0):
wins += 1
elif kind == 1:
# so now, the user chooses to change the door
if user_choice != game_doors.index(0):
wins += 1
# Because the original choice wasn't the right one, then the new
# must be correct because the host already chose the other wrong one.
win_rate = (wins / n) * 100
return win_rate, 100 - win_rate
if __name__ == '__main__':
n = 1000
kind = 1
wins, loses = monty_sim(n, kind)
print(f'In a simulation of {n} experiments, of type {kind} user won {wins:02f} of the time, lost {loses:02f} of the time')