├── .gitignore ├── LICENSE ├── README.md ├── gamestate.py ├── gtp.py ├── gui.py ├── image ├── hex.png ├── ssss.png ├── uut.png ├── uut_2.png └── uut_3.jpg ├── main.py ├── meta.py ├── paper └── CONFITC04_172.pdf ├── playtest.py ├── qb_mctsagent.py ├── rave_mctsagent.py ├── resources └── demo.gif ├── tournament.py ├── ucb1_tuned_mctsagent.py ├── uct_mcstsagent.py └── unionfind.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Created by .ignore support plugin (hsz.mobi) 2 | ### Python template 3 | # Byte-compiled / optimized / DLL files 4 | __pycache__/ 5 | *.py[cod] 6 | *$py.class 7 | 8 | # C extensions 9 | *.so 10 | 11 | # Distribution / packaging 12 | .Python 13 | build/ 14 | develop-eggs/ 15 | dist/ 16 | downloads/ 17 | eggs/ 18 | .eggs/ 19 | lib/ 20 | lib64/ 21 | parts/ 22 | sdist/ 23 | var/ 24 | wheels/ 25 | pip-wheel-metadata/ 26 | share/python-wheels/ 27 | *.egg-info/ 28 | .installed.cfg 29 | *.egg 30 | MANIFEST 31 | 32 | # PyInstaller 33 | # Usually these files are written by a python script from a template 34 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 35 | *.manifest 36 | *.spec 37 | 38 | # Installer logs 39 | pip-log.txt 40 | pip-delete-this-directory.txt 41 | 42 | # Unit test / coverage reports 43 | htmlcov/ 44 | .tox/ 45 | .nox/ 46 | .coverage 47 | .coverage.* 48 | .cache 49 | nosetests.xml 50 | coverage.xml 51 | *.cover 52 | *.py,cover 53 | .hypothesis/ 54 | .pytest_cache/ 55 | 56 | # Translations 57 | *.mo 58 | *.pot 59 | 60 | # Django stuff: 61 | *.log 62 | local_settings.py 63 | db.sqlite3 64 | db.sqlite3-journal 65 | 66 | # Flask stuff: 67 | instance/ 68 | .webassets-cache 69 | 70 | # Scrapy stuff: 71 | .scrapy 72 | 73 | # Sphinx documentation 74 | docs/_build/ 75 | 76 | # PyBuilder 77 | target/ 78 | 79 | # Jupyter Notebook 80 | .ipynb_checkpoints 81 | 82 | # IPython 83 | profile_default/ 84 | ipython_config.py 85 | 86 | # pyenv 87 | .python-version 88 | 89 | # pipenv 90 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 91 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 92 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 93 | # install all needed dependencies. 94 | #Pipfile.lock 95 | 96 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 97 | __pypackages__/ 98 | 99 | # Celery stuff 100 | celerybeat-schedule 101 | celerybeat.pid 102 | 103 | # SageMath parsed files 104 | *.sage.py 105 | 106 | # Environments 107 | .env 108 | .venv 109 | env/ 110 | venv/ 111 | ENV/ 112 | env.bak/ 113 | venv.bak/ 114 | 115 | # Spyder project settings 116 | .spyderproject 117 | .spyproject 118 | 119 | # Rope project settings 120 | .ropeproject 121 | 122 | # mkdocs documentation 123 | /site 124 | 125 | # mypy 126 | .mypy_cache/ 127 | .dmypy.json 128 | dmypy.json 129 | 130 | # Pyre type checker 131 | .pyre/ 132 | 133 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2020 Kenny Young 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # HexPy 2 | 3 | ### Monte Carlo Tree Search Agent for the game of HEX 4 | 5 | ## Demo: 6 | 7 | ![Demo of MCTS General Game Player](https://github.com/masouduut94/MCTS-agent-python/blob/master/resources/demo.gif) 8 | 9 | ## Description 10 | This code belongs to this paper **:link: [IMPROVING MONTE CARLO TREE SEARCH BY COMBINING 11 | RAVE AND QUALITY-BASED REWARDS ALGORITHMS](https://github.com/masouduut94/MCTS-agent-python/blob/master/paper/CONFITC04_172.pdf)**. 12 | 13 | ### what is Monte Carlo Tree Search(MCTS)? 14 | MONTE Carlo Tree Search (MCTS) is a method for finding optimal decisions in a given domain by 15 | taking random samples in the decision space and building a search tree according to the results. 16 | It has already had a profound impact on Artificial Intelligence (AI) approaches for domains that 17 | can be represented as trees of sequential decisions, particularly games and planning problems. 18 | In this project I used different simulation strategies to enhance the agent policy to explore the environment. 19 | 20 | from :link: [A Survey of Monte Carlo Tree Search Methods](http://ieeexplore.ieee.org/abstract/document/6145622/) 21 | 22 | ### About contribution 23 | Before you go through the details, I recommend you to get familiar with the framework reading these medium articles: 24 | - [A simple no math included introduction to reinforcement learning](https://towardsdatascience.com/monte-carlo-tree-search-a-case-study-along-with-implementation-part-1-ebc7753a5a3b) 25 | - [A simple introduction to Monte Carlo Tree Search](https://towardsdatascience.com/monte-carlo-tree-search-implementing-reinforcement-learning-in-real-time-game-player-25b6f6ac3b43) 26 | - [Details of MCTS implementation on game of HEX](https://towardsdatascience.com/monte-carlo-tree-search-implementing-reinforcement-learning-in-real-time-game-player-a9c412ebeff5) 27 | 28 | So if you are familiar with the whole concept of MCTS and UCT algorithm, you must know that in practice it suffers from 29 | sparse rewards. it takes so much time to warm up the tree with simple UCT algorithm. So in this case we **first implemented 30 | the RAVE algorithm** that helps warm up tree faster. then implemented **several simulation strategy like Last Good Reply, 31 | PoolRAVE, Decisive Move and also UCB1-Tuned**. 32 | 33 | Then we applied **quality based rewards** in [Quality-based Rewards for Monte-Carlo Tree Search Simulations](https://dl.acm.org/doi/10.5555/3006652.3006771) 34 | which basically it asserts that we can apply discounted rewards by **knowing the length of simulation and the 35 | maximum number of actions allowed to take in environment** for each player (In some games, the game ends after limited number of moves. because there is no more movements). 36 | 37 | After that we used **HRAVE and GRAVE in the paper [Comparison of rapid action value estimation variants for general game playing 2018 - Chiara F. Sironi; Mark H. M. Winands](https://ieeexplore.ieee.org/document/7860429)** 38 | which basically states that we can use the **global information of the game to guide the simulations**. 39 | We also tested the **leaf threading on UCT**. 40 | 41 | all of above algorithms are addressed below. 42 | 43 | ### Original repo 44 | 45 | - MopyHex: Authored by Kenny Young [here](https://github.com/kenjyoung/mopyhex) 46 | 47 | ### Contributions to the original repo: 48 | - implementing Generalized Rapid Action Value Estimation 49 | - implementing HRAVE and GRAVE algorithms in [Comparison of rapid action value estimation variants for general game playing 2018 - Chiara F. Sironi; Mark H. M. Winands](https://ieeexplore.ieee.org/document/7860429) 50 | - implementing Quality-based rewards in [Quality-based Rewards for Monte-Carlo Tree Search Simulations](https://dl.acm.org/doi/10.5555/3006652.3006771) 51 | - implementing leaf-threading on basic No frills UCT. 52 | 53 | **This project has a further optimized version in [here](https://github.com/masouduut94/MCTS-agent-cythonized) which optimized by cython.** 54 | 55 | ### Researches have been done in **Urmia University of Technology**. 56 |

57 | 58 | 59 | 60 |

61 | 62 | #### Authors: 63 | - Masoud Masoumi Moghadam (Me :sunglasses:) 64 | - Prof: Mohammad Pourmahmood Aghababa [profile](https://bit.ly/3dV23Be) 65 | - Prof: Jamshid Bagherzadeh [profile](https://bit.ly/3dPX4Sc) 66 | 67 | ## What is monte carlo tree search anyway? 68 | 69 | 70 | # Requirements 71 | - OS: Windows and Ubuntu 72 | - tkinter 73 | - Numpy 74 | 75 | # To run it: 76 | You can :running: (run) program using this command: 77 | 78 | python main.py 79 | 80 | Also you can run tests for comparing two mcts-based algorithms against 81 | each other using the `playtest.py`. 82 | 83 | ## :closed_book: To know more about MCTS: 84 | 85 | This one is highly recommended: 86 | 87 | 88 | ## Algorithms used for boosting MCTS in this framework: 89 | 90 | - Upper Confidence Bounds (UCT) 91 | - UCB1-Tuned 92 | - Rapid Action Value Estimation (RAVE) 93 | - Decisive Move 94 | - Quality Based Rewards 95 | - Pool RAVE 96 | - Last Good Reply 97 | 98 | 99 | # References 100 | - [1] A Survey of Monte Carlo Tree Search Methods, Cameron B. Browne et al, 2012 [Link to paper](https://ieeexplore.ieee.org/document/6145622) 101 | - [2] Generalized Rapid Action Value Estimation, Tristan Cazenave, 2017 [Link to paper](https://www.ijcai.org/Proceedings/15/Papers/112.pdf) 102 | - [3] Comparison of rapid action value estimation variants for general game playing, C. Sironi, 2018 [Link to paper](https://ieeexplore.ieee.org/document/7860429) 103 | - [4] Quality-based Rewards for Monte-Carlo Tree Search Simulations, 2014 [Link to paper](https://dl.acm.org/doi/10.5555/3006652.3006771) 104 | - [5] The Last-Good-Reply Policy for Monte-Carlo Go 2009 [Link to paper](https://www.semanticscholar.org/paper/The-Last-Good-Reply-Policy-for-Monte-Carlo-Go-Drake/980e6b8ef765b0fe4fc3fe8f068c79ac4169b00f) 105 | - [6] On the Huge Benefit of Decisive Moves in Monte-Carlo Tree Search Algorithms, Fabien Teytaud, Olivier Teytaud, 2010 106 | 107 | -------------------------------------------------------------------------------- /gamestate.py: -------------------------------------------------------------------------------- 1 | from numpy import zeros, int_ 2 | from unionfind import UnionFind 3 | from meta import GameMeta 4 | 5 | 6 | class GameState: 7 | """ 8 | Stores information representing the current state of a game of hex, namely 9 | the board and the current turn. Also provides functions for playing game. 10 | """ 11 | # dictionary associating numbers with players 12 | # PLAYERS = {"none": 0, "white": 1, "black": 2} 13 | 14 | # move value of -1 indicates the game has ended so no move is possible 15 | # GAME_OVER = -1 16 | 17 | # represent edges in the union find structure for detecting the connection 18 | # for player 1 Edge1 is high and EDGE2 is low 19 | # for player 2 Edge1 is left and EDGE2 is right 20 | 21 | # neighbor_patterns = ((-1, 0), (0, -1), (-1, 1), (0, 1), (1, 0), (1, -1)) 22 | 23 | def __init__(self, size): 24 | """ 25 | Initialize the game board and give white first turn. 26 | Also create our union find structures for win checking. 27 | 28 | Args: 29 | size (int): The board size 30 | """ 31 | self.size = size 32 | self.to_play = GameMeta.PLAYERS['white'] 33 | self.board = zeros((size, size)) 34 | self.board = int_(self.board) 35 | self.white_played = 0 36 | self.black_played = 0 37 | self.white_groups = UnionFind() 38 | self.black_groups = UnionFind() 39 | self.white_groups.set_ignored_elements([GameMeta.EDGE1, GameMeta.EDGE2]) 40 | self.black_groups.set_ignored_elements([GameMeta.EDGE1, GameMeta.EDGE2]) 41 | 42 | def play(self, cell: tuple) -> None: 43 | """ 44 | Play a stone of the player that owns the current turn in input cell. 45 | Args: 46 | cell (tuple): row and column of the cell 47 | """ 48 | if self.to_play == GameMeta.PLAYERS['white']: 49 | self.place_white(cell) 50 | self.to_play = GameMeta.PLAYERS['black'] 51 | elif self.to_play == GameMeta.PLAYERS['black']: 52 | self.place_black(cell) 53 | self.to_play = GameMeta.PLAYERS['white'] 54 | 55 | def get_num_played(self) -> dict: 56 | return {'white': self.white_played, 'black': self.black_played} 57 | 58 | def get_white_groups(self) -> dict: 59 | """ 60 | 61 | Returns (dict): group of white groups for unionfind check 62 | 63 | """ 64 | return self.white_groups.get_groups() 65 | 66 | def get_black_groups(self) -> dict: 67 | """ 68 | 69 | Returns (dict): group of white groups for unionfind check 70 | 71 | """ 72 | return self.black_groups.get_groups() 73 | 74 | def place_white(self, cell: tuple) -> None: 75 | """ 76 | Place a white stone regardless of whose turn it is. 77 | 78 | Args: 79 | cell (tuple): row and column of the cell 80 | """ 81 | if self.board[cell] == GameMeta.PLAYERS['none']: 82 | self.board[cell] = GameMeta.PLAYERS['white'] 83 | self.white_played += 1 84 | else: 85 | raise ValueError("Cell occupied") 86 | # if the placed cell touches a white edge connect it appropriately 87 | if cell[0] == 0: 88 | self.white_groups.join(GameMeta.EDGE1, cell) 89 | if cell[0] == self.size - 1: 90 | self.white_groups.join(GameMeta.EDGE2, cell) 91 | # join any groups connected by the new white stone 92 | for n in self.neighbors(cell): 93 | if self.board[n] == GameMeta.PLAYERS['white']: 94 | self.white_groups.join(n, cell) 95 | 96 | def place_black(self, cell: tuple) -> None: 97 | """ 98 | Place a black stone regardless of whose turn it is. 99 | 100 | Args: 101 | cell (tuple): row and column of the cell 102 | """ 103 | if self.board[cell] == GameMeta.PLAYERS['none']: 104 | self.board[cell] = GameMeta.PLAYERS['black'] 105 | self.black_played += 1 106 | else: 107 | raise ValueError("Cell occupied") 108 | # if the placed cell touches a black edge connect it appropriately 109 | if cell[1] == 0: 110 | self.black_groups.join(GameMeta.EDGE1, cell) 111 | if cell[1] == self.size - 1: 112 | self.black_groups.join(GameMeta.EDGE2, cell) 113 | # join any groups connected by the new black stone 114 | for n in self.neighbors(cell): 115 | if self.board[n] == GameMeta.PLAYERS['black']: 116 | self.black_groups.join(n, cell) 117 | 118 | def would_lose(self, cell: tuple, color: int) -> bool: 119 | """ 120 | Return True is the move indicated by cell and color would lose the game, 121 | False otherwise. 122 | """ 123 | connect1 = False 124 | connect2 = False 125 | if color == GameMeta.PLAYERS['black']: 126 | if cell[1] == 0: 127 | connect1 = True 128 | elif cell[1] == self.size - 1: 129 | connect2 = True 130 | for n in self.neighbors(cell): 131 | if self.black_groups.connected(GameMeta.EDGE1, n): 132 | connect1 = True 133 | elif self.black_groups.connected(GameMeta.EDGE2, n): 134 | connect2 = True 135 | elif color == GameMeta.PLAYERS['white']: 136 | if cell[0] == 0: 137 | connect1 = True 138 | elif cell[0] == self.size - 1: 139 | connect2 = True 140 | for n in self.neighbors(cell): 141 | if self.white_groups.connected(GameMeta.EDGE1, n): 142 | connect1 = True 143 | elif self.white_groups.connected(GameMeta.EDGE2, n): 144 | connect2 = True 145 | 146 | return connect1 and connect2 147 | 148 | def turn(self) -> int: 149 | """ 150 | Return the player with the next move. 151 | """ 152 | return self.to_play 153 | 154 | def set_turn(self, player: int) -> None: 155 | """ 156 | Set the player to take the next move. 157 | Raises: 158 | ValueError if player turn is not 1 or 2 159 | """ 160 | if player in GameMeta.PLAYERS.values() and player != GameMeta.PLAYERS['none']: 161 | self.to_play = player 162 | else: 163 | raise ValueError('Invalid turn: ' + str(player)) 164 | 165 | @property 166 | def winner(self) -> int: 167 | """ 168 | Return a number corresponding to the winning player, 169 | or none if the game is not over. 170 | """ 171 | if self.white_groups.connected(GameMeta.EDGE1, GameMeta.EDGE2): 172 | return GameMeta.PLAYERS['white'] 173 | elif self.black_groups.connected(GameMeta.EDGE1, GameMeta.EDGE2): 174 | return GameMeta.PLAYERS['black'] 175 | else: 176 | return GameMeta.PLAYERS['none'] 177 | 178 | def neighbors(self, cell: tuple) -> list: 179 | """ 180 | Return list of neighbors of the passed cell. 181 | 182 | Args: 183 | cell tuple): 184 | """ 185 | x = cell[0] 186 | y = cell[1] 187 | return [(n[0] + x, n[1] + y) for n in GameMeta.NEIGHBOR_PATTERNS 188 | if (0 <= n[0] + x < self.size and 0 <= n[1] + y < self.size)] 189 | 190 | def moves(self) -> list: 191 | """ 192 | Get a list of all moves possible on the current board. 193 | """ 194 | moves = [] 195 | for y in range(self.size): 196 | for x in range(self.size): 197 | if self.board[x, y] == GameMeta.PLAYERS['none']: 198 | moves.append((x, y)) 199 | return moves 200 | 201 | def __str__(self): 202 | """ 203 | Print an ascii representation of the game board. 204 | Notes: 205 | Used for gtp interface 206 | """ 207 | white = 'W' 208 | black = 'B' 209 | empty = '.' 210 | ret = '\n' 211 | coord_size = len(str(self.size)) 212 | offset = 1 213 | ret += ' ' * (offset + 1) 214 | for x in range(self.size): 215 | ret += chr(ord('A') + x) + ' ' * offset * 2 216 | ret += '\n' 217 | for y in range(self.size): 218 | ret += str(y + 1) + ' ' * (offset * 2 + coord_size - len(str(y + 1))) 219 | for x in range(self.size): 220 | if self.board[x, y] == GameMeta.PLAYERS['white']: 221 | ret += white 222 | elif self.board[x, y] == GameMeta.PLAYERS['black']: 223 | ret += black 224 | else: 225 | ret += empty 226 | ret += ' ' * offset * 2 227 | ret += white + "\n" + ' ' * offset * (y + 1) 228 | ret += ' ' * (offset * 2 + 1) + (black + ' ' * offset * 2) * self.size 229 | return ret 230 | -------------------------------------------------------------------------------- /gtp.py: -------------------------------------------------------------------------------- 1 | from gamestate import GameState 2 | from meta import GameMeta 3 | 4 | 5 | class GTPInterface: 6 | """ 7 | Interface for using game-text-protocol to control the program 8 | Each implemented GTP command returns a string response for the user, along with 9 | a boolean indicating success or failure in executing the command. 10 | The interface contains an agent which decides which moves to make on request 11 | along with a GameState which holds the current state of the game. 12 | 13 | """ 14 | 15 | def __init__(self, agent): 16 | """ 17 | Initilize the list of available commands, binding appropriate names to the 18 | functions defined in this file. 19 | """ 20 | commands = {"size": self.gtp_boardsize, "reset": self.gtp_clear, "play": self.gtp_play, 21 | "genmove": self.gtp_genmove, "print": self.gtp_show, "set_time": self.gtp_time, 22 | "winner": self.gtp_winner} 23 | self.commands = commands 24 | self.game = GameState(8) 25 | self.agent = agent 26 | self.agent.set_GameState(self.game) 27 | self.move_time = 10 28 | self.last_move = None 29 | 30 | def send_command(self, command): 31 | """ 32 | Parse the given command into a function name and arguments, execute it 33 | then return the response. 34 | 35 | """ 36 | parsed_command = command.split() 37 | # first word specifies function to call, the rest are args 38 | name = parsed_command[0] 39 | args = parsed_command[1:] 40 | if name in self.commands: 41 | return self.commands[name](args) 42 | else: 43 | return False, "Unrecognized command" 44 | 45 | def gtp_boardsize(self, args): 46 | """ 47 | Set the size of the game board (will also clear the board). 48 | 49 | """ 50 | if len(args) < 1: 51 | return False, "Not enough arguments" 52 | try: 53 | size = int(args[0]) 54 | except ValueError: 55 | return False, "Argument is not a valid size" 56 | if size < 1: 57 | return False, "Argument is not a valid size" 58 | 59 | self.game = GameState(size) 60 | self.agent.set_GameState(self.game) 61 | self.last_move = None 62 | return True, "" 63 | 64 | def gtp_clear(self, args): 65 | """ 66 | Clear the game board. 67 | 68 | """ 69 | self.game = GameState(self.game.size) 70 | self.agent.set_GameState(self.game) 71 | self.last_move = None 72 | return True, "" 73 | 74 | def gtp_play(self, args): 75 | """ 76 | Play a stone of a given colour in a given cell. 77 | 1st arg = colour (white/w or black/b) 78 | 2nd arg = cell (i.e. g5) 79 | 80 | Note: play order is not enforced but out of order turns will cause the 81 | search tree to be reset 82 | 83 | """ 84 | if len(args) < 2: 85 | return False, "Not enough arguments" 86 | try: 87 | x = ord(args[1][0].lower()) - ord('a') 88 | y = int(args[1][1:]) - 1 89 | 90 | if x < 0 or y < 0 or x >= self.game.size or y >= self.game.size: 91 | return False, "Cell out of bounds" 92 | 93 | if args[0][0].lower() == 'w': 94 | self.last_move = (x, y) 95 | if self.game.turn() == GameMeta.PLAYERS["white"]: 96 | self.game.play((x, y)) 97 | self.agent.move((x, y)) 98 | else: 99 | self.game.place_white((x, y)) 100 | self.agent.set_GameState(self.game) 101 | 102 | elif args[0][0].lower() == 'b': 103 | self.last_move = (x, y) 104 | if self.game.turn() == GameMeta.PLAYERS["black"]: 105 | self.game.play((x, y)) 106 | self.agent.move((x, y)) 107 | else: 108 | self.game.place_black((x, y)) 109 | self.agent.set_GameState(self.game) 110 | else: 111 | return False, "Player not recognized" 112 | 113 | except ValueError: 114 | return False, "Malformed arguments" 115 | 116 | def gtp_genmove(self, args): 117 | """ 118 | Allow the agent to play a stone of the given colour (white/w or black/b) 119 | 120 | Note: play order is not enforced but out of order turns will cause the 121 | agents search tree to be reset 122 | 123 | """ 124 | # if user specifies a player generate the appropriate move 125 | # otherwise just go with the current turn 126 | if self.gtp_winner([])[1] == 'none': 127 | if len(args) > 0: 128 | if args[0][0].lower() == 'w': 129 | if self.game.turn() != GameMeta.PLAYERS["white"]: 130 | self.game.set_turn(GameMeta.PLAYERS["white"]) 131 | self.agent.set_GameState(self.game) 132 | 133 | elif args[0][0].lower() == 'b': 134 | if self.game.turn() != GameMeta.PLAYERS["black"]: 135 | self.game.set_turn(GameMeta.PLAYERS["black"]) 136 | self.agent.set_GameState(self.game) 137 | else: 138 | return False, "Player not recognized" 139 | 140 | move = None 141 | self.agent.search(self.move_time) 142 | 143 | if move is None: 144 | move = self.agent.best_move() 145 | 146 | if move == GameMeta.GAME_OVER: 147 | return (False, "The game is already over" + 148 | '\n' + 'The winner is ----> ' + str(self.send_command('winner')[1]), 0) 149 | self.game.play(move) 150 | self.agent.move(move) 151 | return True, chr(ord('a') + move[0]) + str(move[1] + 1), self.agent.statistics()[0] 152 | else: 153 | return (False, "The game is already over" + 154 | '\n' + 'The winner is ----> ' + str(self.send_command('winner')[1]), 0) 155 | 156 | def gtp_time(self, args): 157 | """ 158 | Change the time per move allocated to the search agent (in units of secounds) 159 | 160 | """ 161 | if len(args) < 1: 162 | return False, "Not enough arguments" 163 | try: 164 | time = int(args[0]) 165 | except ValueError: 166 | return False, "Argument is not a valid time limit" 167 | if time < 1: 168 | return False, "Argument is not a valid time limit" 169 | self.move_time = time 170 | return True, "" 171 | 172 | def gtp_show(self, args): 173 | """ 174 | Return an ascii representation of the current state of the game board. 175 | 176 | """ 177 | return True, str(self.game) 178 | 179 | def gtp_winner(self, args): 180 | """ 181 | Return the winner of the current game (black or white), none if undecided. 182 | 183 | """ 184 | if self.game.winner == GameMeta.PLAYERS["white"]: 185 | return True, "white" 186 | elif self.game.winner == GameMeta.PLAYERS["black"]: 187 | return True, "black" 188 | else: 189 | return True, "none" 190 | -------------------------------------------------------------------------------- /gui.py: -------------------------------------------------------------------------------- 1 | from tkinter import (Frame, Canvas, ttk, HORIZONTAL, VERTICAL, IntVar, Scale, Button, Label, PhotoImage, BOTH, LEFT, Y, 2 | X, TOP, messagebox) 3 | 4 | from numpy import int_ 5 | 6 | from gamestate import GameState 7 | from meta import GameMeta 8 | from rave_mctsagent import (RaveMctsAgent, LGRMctsAgent, PoolRaveMctsAgent, DecisiveMoveMctsAgent) 9 | from ucb1_tuned_mctsagent import UCB1TunedMctsAgent 10 | from uct_mcstsagent import UctMctsAgent 11 | 12 | 13 | class Gui: 14 | """ 15 | This class is built to let the user have a better interaction with 16 | game. 17 | inputs => 18 | root = Tk() => an object which inherits the traits of Tkinter class 19 | agent = an object which inherit the traits of mctsagent class. 20 | 21 | """ 22 | 23 | agent_type = {1: "UCT", 2: "RAVE", 3: "LAST-GOOD-REPLY", 4: "POOLRAVE", 5: "DECISIVE-MOVE", 6: "UCB1-TUNED"} 24 | 25 | AGENTS = {"UCT": UctMctsAgent, 26 | "RAVE": RaveMctsAgent, 27 | "LAST-GOOD-REPLY": LGRMctsAgent, 28 | "POOLRAVE": PoolRaveMctsAgent, 29 | "DECISIVE-MOVE": DecisiveMoveMctsAgent, 30 | "UCB1-TUNED": UCB1TunedMctsAgent} 31 | 32 | def __init__(self, root, agent_name='UCT'): 33 | self.root = root 34 | self.root.geometry('1366x690+0+0') 35 | self.agent_name = agent_name 36 | try: 37 | self.agent = self.AGENTS[agent_name]() 38 | except KeyError: 39 | print("Unknown agent defaulting to basic") 40 | self.agent_name = "uct" 41 | self.agent = self.AGENTS[agent_name]() 42 | self.game = GameState(8) 43 | self.agent.set_gamestate(self.game) 44 | self.time = 1 45 | self.root.configure(bg='#363636') 46 | self.colors = {'white': '#ffffff', 47 | 'milk': '#e9e5e5', 48 | 'red': '#9c0101', 49 | 'orange': '#ee7600', 50 | 'yellow': '#f4da03', 51 | 'green': '#00ee76', 52 | 'cyan': '#02adfd', 53 | 'blue': '#0261fd', 54 | 'purple': '#9c02fd', 55 | 'gray1': '#958989', 56 | 'gray2': '#3e3e3e', 57 | 'black': '#000000'} 58 | global BG 59 | BG = self.colors['gray2'] 60 | self.last_move = None 61 | self.frame_board = Frame(self.root) # main frame for the play board 62 | self.canvas = Canvas(self.frame_board, bg=BG) 63 | self.scroll_y = ttk.Scrollbar(self.frame_board, orient=VERTICAL) 64 | self.scroll_x = ttk.Scrollbar(self.frame_board, orient=HORIZONTAL) 65 | 66 | # the notebook frame which holds the left panel frames 67 | 68 | self.notebook = ttk.Notebook(self.frame_board, width=350) 69 | self.panel_game = Frame(self.notebook, highlightbackground=self.colors['white']) 70 | self.developers = Frame(self.notebook, highlightbackground=self.colors['white']) 71 | 72 | # Registering variables for: 73 | 74 | self.game_size_value = IntVar() # size of the board 75 | self.game_time_value = IntVar() # time of CPU player 76 | self.game_turn_value = IntVar() # defines whose turn is it 77 | 78 | self.switch_agent_value = IntVar() # defines which agent to play against 79 | self.switch_agent_value.set(1) 80 | 81 | self.game_turn_value.set(1) 82 | self.turn = {1: 'white', 2: 'black'} 83 | 84 | self.game_size = Scale(self.panel_game) 85 | self.game_time = Scale(self.panel_game) 86 | self.game_turn = Scale(self.panel_game) 87 | self.generate = Button(self.panel_game) 88 | self.reset_board = Button(self.panel_game) 89 | 90 | self.switch_agent = Scale(self.panel_game) 91 | self.agent_show = Label(self.panel_game, font=('Calibri', 14, 'bold'), fg='white', justify=LEFT, 92 | bg=BG, text='Agent Policy: ' + self.agent_name + '\n') 93 | 94 | self.hex_board = [] 95 | # Holds the IDs of hexagons in the main board for implementing the click and play functions 96 | self.game_size_value.set(8) 97 | self.game_time_value.set(1) 98 | self.size = self.game_size_value.get() 99 | self.time = self.game_time_value.get() 100 | self.board = self.game.board 101 | self.board = int_(self.board).tolist() 102 | self.gameboard2hexagons(self.board) # building the game board 103 | self.logo = PhotoImage(file='image/hex.png') 104 | self.uut_logo = PhotoImage(file='image/uut_2.png') 105 | self.generate_black_edge() 106 | self.generate_white_edge() 107 | 108 | # Frame_content 109 | 110 | self.frame_board.configure(bg=BG, width=1366, height=760) 111 | self.frame_board.pack(fill=BOTH) 112 | self.notebook.add(self.panel_game, text=' Game ') 113 | self.notebook.add(self.developers, text=' Developers ') 114 | self.notebook.pack(side=LEFT, fill=Y) 115 | self.canvas.configure(width=980, bg=BG, cursor='hand2') 116 | self.canvas.pack(side=LEFT, fill=Y) 117 | self.canvas.configure(yscrollcommand=self.scroll_y.set) 118 | self.scroll_y.configure(command=self.canvas.yview) 119 | self.scroll_x.configure(command=self.canvas.xview) 120 | self.scroll_y.place(x=387, y=482) 121 | self.scroll_x.place(x=370, y=500) 122 | 123 | # Frame_left_panel 124 | 125 | """ 126 | the left panel notebook ----> Game 127 | 128 | """ 129 | self.panel_game.configure(bg=BG) 130 | Label(self.panel_game, text='Board size', 131 | font=('Calibri', 14, 'bold'), 132 | foreground='white', bg=BG, pady=10).pack(fill=X, side=TOP) # label ---> Board size 133 | self.game_size.configure(from_=3, to=20, tickinterval=1, bg=BG, fg='white', 134 | orient=HORIZONTAL, variable=self.game_size_value) 135 | self.game_size.pack(side=TOP, fill=X) 136 | Label(self.panel_game, text='Time', 137 | font=('Calibri', 14, 'bold'), 138 | foreground='white', bg=BG, pady=10).pack(side=TOP, fill=X) # label ---> Time 139 | self.game_time.configure(from_=1, to=20, tickinterval=1, bg=BG, fg='white', 140 | orient=HORIZONTAL, variable=self.game_time_value) 141 | self.game_time.pack(side=TOP, fill=X) 142 | Label(self.panel_game, text='Player', 143 | font=('Calibri', 14, 'bold'), 144 | foreground='white', bg=BG, pady=10).pack(side=TOP, fill=X) # label ---> Turn 145 | self.game_turn.configure(from_=1, to=2, tickinterval=1, bg=BG, fg='white', 146 | orient=HORIZONTAL, variable=self.game_turn_value) 147 | self.game_turn.pack(side=TOP) 148 | Label(self.panel_game, text=' ', 149 | font=('Calibri', 14, 'bold'), 150 | foreground='white', bg=BG).pack(side=TOP, fill=X) 151 | 152 | # ################################## AGENT CONTROLS ############################# 153 | 154 | self.agent_show.pack(fill=X, side=TOP) 155 | self.switch_agent.configure(from_=1, to=len(self.agent_type), tickinterval=1, bg=BG, fg='white', 156 | orient=HORIZONTAL, variable=self.switch_agent_value, ) 157 | self.switch_agent.pack(side=TOP, fill=X) 158 | 159 | # ################################## MOVE LABELS ################################ 160 | self.move_label = Label(self.panel_game, font=('Calibri', 15, 'bold'), height=5, fg='white', justify=LEFT, 161 | bg=BG, text='PLAY : CLICK A CELL ON GAME BOARD \nMCTS BOT: CLICK GENERATE') 162 | self.move_label.pack(side=TOP, fill=X) 163 | 164 | self.reset_board.configure(text='Reset Board', pady=10, 165 | cursor='hand2', width=22, 166 | font=('Calibri', 12, 'bold')) 167 | self.reset_board.pack(side=LEFT) 168 | self.generate.configure(text='Generate', pady=10, 169 | cursor='hand2', width=22, 170 | font=('Calibri', 12, 'bold')) 171 | self.generate.pack(side=LEFT) 172 | 173 | """ 174 | the left panel notebook ---> Developers 175 | 176 | """ 177 | self.developers.configure(bg=BG) 178 | Label(self.developers, 179 | text='HEXPY', 180 | font=('Calibri', 18, 'bold'), 181 | foreground='white', bg=BG, pady=5).pack(side=TOP, fill=X) 182 | Label(self.developers, 183 | text='DEVELOPED BY:\n' 184 | + 'Masoud Masoumi Moghadam\n\n' 185 | + 'SUPERVISED BY:\n' 186 | + 'Dr.Pourmahmoud Aghababa\n' 187 | + 'Dr.Bagherzadeh\n\n' 188 | + 'SPECIAL THANKS TO:\n' 189 | + 'Nemat Rahmani\n', 190 | font=('Calibri', 16, 'bold'), justify=LEFT, 191 | foreground='white', bg=BG, pady=10).pack(side=TOP, fill=X) 192 | Label(self.developers, image=self.uut_logo, bg=BG).pack(side=TOP, fill=X) 193 | Label(self.developers, text='Summer 2016', 194 | font=('Calibri', 17, 'bold'), wraplength=350, justify=LEFT, 195 | foreground='white', bg=BG, pady=30).pack(side=TOP, fill=X) 196 | 197 | # Binding Actions 198 | 199 | """ 200 | Binding triggers for the actions defined in the class. 201 | 202 | """ 203 | self.canvas.bind('<1>', self.click2play) 204 | self.game_size.bind('', self.set_size) 205 | self.game_time.bind('', self.set_time) 206 | self.generate.bind('', self.click_to_bot_play) 207 | self.reset_board.bind('', self.reset) 208 | self.switch_agent.bind('', self.set_agent) 209 | 210 | @staticmethod 211 | def top_left_hexagon(): 212 | """ 213 | Returns the points which the first hexagon has to be created based on. 214 | 215 | """ 216 | return [[85, 50], [105, 65], [105, 90], [85, 105], [65, 90], [65, 65]] 217 | 218 | def hexagon(self, points, color): 219 | """ 220 | Creates a hexagon by getting a list of points and their assigned colors 221 | according to the game board 222 | """ 223 | match color: 224 | case 0: 225 | # if color == 0: 226 | hx = self.canvas.create_polygon(points[0], points[1], points[2], 227 | points[3], points[4], points[5], 228 | fill=self.colors['gray1'], outline='black', width=2, activefill='cyan') 229 | case 1: 230 | hx = self.canvas.create_polygon(points[0], points[1], points[2], 231 | points[3], points[4], points[5], 232 | fill=self.colors['yellow'], outline='black', width=2, activefill='cyan') 233 | case 2: 234 | hx = self.canvas.create_polygon(points[0], points[1], points[2], 235 | points[3], points[4], points[5], 236 | fill=self.colors['red'], outline='black', width=2, activefill='cyan') 237 | case 3: 238 | hx = self.canvas.create_polygon(points[0], points[1], points[2], 239 | points[3], points[4], points[5], 240 | fill=self.colors['black'], outline='black', width=2) 241 | case _: 242 | hx = self.canvas.create_polygon(points[0], points[1], points[2], 243 | points[3], points[4], points[5], 244 | fill=self.colors['white'], outline='black', width=2) 245 | # if color == 0: 246 | # hx = self.canvas.create_polygon(points[0], points[1], points[2], 247 | # points[3], points[4], points[5], 248 | # fill=self.colors['gray1'], outline='black', width=2, activefill='cyan') 249 | # elif color is 1: 250 | # hx = self.canvas.create_polygon(points[0], points[1], points[2], 251 | # points[3], points[4], points[5], 252 | # fill=self.colors['yellow'], outline='black', width=2, activefill='cyan') 253 | # elif color is 2: 254 | # hx = self.canvas.create_polygon(points[0], points[1], points[2], 255 | # points[3], points[4], points[5], 256 | # fill=self.colors['red'], outline='black', width=2, activefill='cyan') 257 | # elif color is 3: 258 | # hx = self.canvas.create_polygon(points[0], points[1], points[2], 259 | # points[3], points[4], points[5], 260 | # fill=self.colors['black'], outline='black', width=2) 261 | # else: 262 | # hx = self.canvas.create_polygon(points[0], points[1], points[2], 263 | # points[3], points[4], points[5], 264 | # fill=self.colors['white'], outline='black', width=2) 265 | return hx 266 | 267 | def generate_row(self, points, colors): 268 | """ 269 | By getting a list of points as the starting point of each row and a list of 270 | colors as the dedicated color for each item in row, it generates a row of 271 | hexagons by calling hexagon functions multiple times. 272 | """ 273 | x_offset = 40 274 | row = [] 275 | temp_array = [] 276 | for i in range(len(colors)): 277 | for point in points: 278 | temp_points_x = point[0] + x_offset * i 279 | temp_points_y = point[1] 280 | temp_array.append([temp_points_x, temp_points_y]) 281 | match colors[i]: 282 | case 0: 283 | hx = self.hexagon(temp_array, 0) 284 | case 1: 285 | hx = self.hexagon(temp_array, 4) 286 | case _: 287 | hx = self.hexagon(temp_array, 3) 288 | # if colors[i] is 0: 289 | # hx = self.hexagon(temp_array, 0) 290 | # elif colors[i] is 1: 291 | # hx = self.hexagon(temp_array, 4) 292 | # else: 293 | # hx = self.hexagon(temp_array, 3) 294 | row.append(hx) 295 | temp_array = [] 296 | return row 297 | 298 | def gameboard2hexagons(self, array): 299 | """ 300 | Simply gets the game_board and generates the hexagons by their dedicated colors. 301 | """ 302 | initial_offset = 20 303 | y_offset = 40 304 | temp = [] 305 | for i in range(len(array)): 306 | points = self.top_left_hexagon() 307 | for point in points: 308 | point[0] += initial_offset * i 309 | point[1] += y_offset * i 310 | temp.append([point[0], point[1]]) 311 | row = self.generate_row(temp, self.board[i]) 312 | temp.clear() 313 | self.hex_board.append(row) 314 | 315 | def generate_white_edge(self): 316 | """ 317 | Generates the white zones in the left and right of the board. 318 | 319 | """ 320 | init_points = self.top_left_hexagon() 321 | for pt in init_points: 322 | pt[0] -= 40 323 | for pt in init_points: 324 | pt[0] -= 20 325 | pt[1] -= 40 326 | label_x, label_y = 0, 0 327 | init_offset = 20 328 | y_offset = 40 329 | temp_list = [] 330 | for i in range(len(self.board)): 331 | for pt in range(len(init_points)): 332 | init_points[pt][0] += init_offset 333 | init_points[pt][1] += y_offset 334 | label_x += init_points[pt][0] 335 | label_y += init_points[pt][1] 336 | label_x /= 6 337 | label_y /= 6 338 | self.hexagon(init_points, 4) 339 | self.canvas.create_text(label_x, label_y, fill=self.colors['black'], font="Times 20 bold", 340 | text=chr(ord('A') + i)) 341 | label_x, label_y = 0, 0 342 | for j in init_points: 343 | temp_list.append([j[0] + (len(self.board) + 1) * 40, j[1]]) 344 | self.hexagon(temp_list, 4) 345 | temp_list.clear() 346 | 347 | def generate_black_edge(self): 348 | """ 349 | Generates the black zones in the top and bottom of the board. 350 | 351 | """ 352 | init_points = self.top_left_hexagon() 353 | label_x, label_y = 0, 0 354 | temp_list = [] 355 | for pt in init_points: 356 | pt[0] -= 60 357 | pt[1] -= 40 358 | for t in range(len(init_points)): 359 | init_points[t][0] += 40 360 | label_x += init_points[t][0] 361 | label_y += init_points[t][1] 362 | label_x /= 6 363 | label_y /= 6 364 | for i in range(len(self.board)): 365 | self.hexagon(init_points, 3) 366 | self.canvas.create_text(label_x, label_y, fill=self.colors['white'], font="Times 20 bold", text=i + 1) 367 | label_x, label_y = 0, 0 368 | for pt in init_points: 369 | temp_list.append([pt[0] + (len(self.board) + 1) * 20, pt[1] + (len(self.board) + 1) * 40]) 370 | self.hexagon(temp_list, 3) 371 | temp_list.clear() 372 | for j in range(len(init_points)): 373 | init_points[j][0] += 40 374 | label_x += init_points[j][0] 375 | label_y += init_points[j][1] 376 | label_x /= 6 377 | label_y /= 6 378 | 379 | def click2play(self, event): 380 | """ 381 | Whenever any of the hexagons in the board is clicked, depending 382 | on the player turns, it changes the color of hexagon to the player 383 | assigned color. 384 | 385 | """ 386 | if self.winner() == 'none': 387 | x = self.canvas.canvasx(event.x) 388 | y = self.canvas.canvasy(event.y) 389 | idd = self.canvas.find_overlapping(x, y, x, y) 390 | idd = list(idd) 391 | if len(idd) is not 0: 392 | clicked_cell = idd[0] 393 | if any([clicked_cell in x for x in self.hex_board]): 394 | coordinated_cell = clicked_cell - self.hex_board[0][0] 395 | col = (coordinated_cell % self.size) 396 | turn = self.turn[self.game_turn_value.get()] 397 | if coordinated_cell % self.size == 0: 398 | row = int(coordinated_cell / self.size) 399 | else: 400 | row = int(coordinated_cell / self.size) 401 | cell = str(chr(65 + row)) + str(col + 1) 402 | self.move_label.configure(text=str(turn) + ' played ' + cell, justify=LEFT, height=5) 403 | if self.board[row][col] == 0: 404 | self.board[row][col] = self.game_turn_value.get() 405 | if self.game_turn_value.get() == 1: 406 | self.game_turn_value.set(2) 407 | else: 408 | self.game_turn_value.set(1) 409 | self.refresh() 410 | y = row 411 | x = col 412 | if turn[0].lower() == 'w': 413 | self.last_move = (x, y) 414 | if self.game.turn() == GameMeta.PLAYERS["white"]: 415 | self.game.play((x, y)) 416 | self.agent.move((x, y)) 417 | if self.winner() != 'none': 418 | messagebox.showinfo(" GAME OVER", " Wow, You won! \n Winner is %s" % self.winner()) 419 | return 420 | else: 421 | self.game.place_white((x, y)) 422 | self.agent.set_gamestate(self.game) 423 | if self.winner() != 'none': 424 | messagebox.showinfo(" GAME OVER", " Wow, You won! \n Winner is %s" % self.winner()) 425 | return 426 | elif turn[0].lower() == 'b': 427 | self.last_move = (x, y) 428 | if self.game.turn() == GameMeta.PLAYERS["black"]: 429 | self.game.play((x, y)) 430 | self.agent.move((x, y)) 431 | if self.winner() != 'none': 432 | messagebox.showinfo(" GAME OVER", " Wow, You won! \n Winner is %s" % self.winner()) 433 | return 434 | else: 435 | self.game.place_black((x, y)) 436 | self.agent.set_gamestate(self.game) 437 | if self.winner() != 'none': 438 | messagebox.showinfo(" GAME OVER", " Wow, You won! \n Winner is %s" % self.winner()) 439 | return 440 | else: 441 | messagebox.showinfo(" GAME OVER ", " The game is already over! Winner is %s" % self.winner()) 442 | 443 | def set_size(self, event): 444 | """ 445 | It changes the board size and reset the whole game. 446 | 447 | """ 448 | self.canvas.delete('all') 449 | self.size = self.game_size_value.get() 450 | self.game = GameState(self.size) 451 | self.agent.set_gamestate(self.game) 452 | self.board = self.game.board 453 | self.board = int_(self.board).tolist() 454 | self.last_move = None 455 | self.move_label.config(text='PLAY : CLICK A CELL ON GAME BOARD \nMCTS BOT: CLICK GENERATE', justify='left', 456 | height=5) 457 | self.refresh() 458 | 459 | def set_time(self, event) -> None: 460 | """ 461 | It changes the time for CPU player to think and generate a move. 462 | 463 | """ 464 | self.time = self.game_time_value.get() 465 | print('The CPU time = ', self.time, ' seconds') 466 | 467 | def set_agent(self, event) -> None: 468 | """ 469 | It changes the time for CPU player to think and generate a move. 470 | 471 | """ 472 | agent_num = self.switch_agent_value.get() 473 | self.agent_name = self.agent_type[agent_num] 474 | self.agent = self.AGENTS[self.agent_name](self.game) 475 | self.agent_show.config(font=('Calibri', 14, 'bold'), justify=LEFT, 476 | text='Agent Policy: ' + self.agent_name + '\n') 477 | 478 | def winner(self) -> str: 479 | """ 480 | Return the winner of the current game (black or white), none if undecided. 481 | 482 | """ 483 | if self.game.winner == GameMeta.PLAYERS["white"]: 484 | return "white" 485 | elif self.game.winner == GameMeta.PLAYERS["black"]: 486 | return "black" 487 | else: 488 | return "none" 489 | 490 | def click_to_bot_play(self, event): 491 | """ 492 | By pushing the generate button, It produces an appropriate move 493 | by using monte carlo tree search algorithm for the player which 494 | turn is his/hers! . 495 | 496 | """ 497 | if self.winner() == 'none': 498 | self.agent.search(self.time) 499 | num_rollouts, node_count, run_time = self.agent.statistics() 500 | move = self.agent.best_move() # the move is tuple like (3, 1) 501 | self.game.play(move) 502 | self.agent.move(move) 503 | row, col = move # Relating the 'move' tuple with index of self.board 504 | self.board[col][row] = self.game_turn_value.get() 505 | if self.game_turn_value.get() == 1: # change the turn of players 506 | self.game_turn_value.set(2) 507 | else: 508 | self.game_turn_value.set(1) 509 | self.refresh() 510 | player = self.turn[self.game_turn_value.get()] 511 | cell = chr(ord('A') + move[1]) + str(move[0] + 1) 512 | self.move_label.config(font=('Calibri', 15, 'bold'), justify='left', 513 | text=str(num_rollouts) + ' Game Simulations ' + '\n' 514 | + 'In ' + str(run_time) + ' seconds ' + '\n' 515 | + 'Node Count : ' + str(node_count) + '\n' 516 | + player + ' played at ' + cell, height=5) 517 | print('move = ', cell) 518 | if self.winner() != 'none': 519 | messagebox.showinfo(" GAME OVER", " Oops!\n You lost! \n Winner is %s" % self.winner()) 520 | else: 521 | messagebox.showinfo(" GAME OVER", " The game is already over! Winner is %s" % self.winner()) 522 | 523 | def refresh(self): 524 | """ 525 | Delete the whole world and recreate it again 526 | 527 | """ 528 | self.canvas.delete('all') 529 | self.hex_board.clear() 530 | self.gameboard2hexagons(self.board) 531 | self.generate_black_edge() 532 | self.generate_white_edge() 533 | 534 | def reset(self, event): 535 | """ 536 | By clicking on the Reset button game board would be cleared 537 | for a new game 538 | 539 | """ 540 | self.game = GameState(self.game.size) 541 | self.agent.set_gamestate(self.game) 542 | self.set_size(event) 543 | self.last_move = None 544 | self.game_turn_value.set(1) 545 | self.move_label.config(text='PLAY : CLICK A CELL ON GAME BOARD \nMCTS BOT: CLICK GENERATE', justify='left', 546 | height=5) 547 | -------------------------------------------------------------------------------- /image/hex.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/masouduut94/MCTS-agent-python/bf53bd78d90a7381287b8d6b7c95082245936ab8/image/hex.png -------------------------------------------------------------------------------- /image/ssss.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/masouduut94/MCTS-agent-python/bf53bd78d90a7381287b8d6b7c95082245936ab8/image/ssss.png -------------------------------------------------------------------------------- /image/uut.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/masouduut94/MCTS-agent-python/bf53bd78d90a7381287b8d6b7c95082245936ab8/image/uut.png -------------------------------------------------------------------------------- /image/uut_2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/masouduut94/MCTS-agent-python/bf53bd78d90a7381287b8d6b7c95082245936ab8/image/uut_2.png -------------------------------------------------------------------------------- /image/uut_3.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/masouduut94/MCTS-agent-python/bf53bd78d90a7381287b8d6b7c95082245936ab8/image/uut_3.jpg -------------------------------------------------------------------------------- /main.py: -------------------------------------------------------------------------------- 1 | from tkinter import Tk 2 | 3 | from gui import Gui 4 | 5 | 6 | def main(): 7 | root = Tk() 8 | interface = Gui(root) 9 | root.mainloop() 10 | 11 | 12 | if __name__ == "__main__": 13 | main() 14 | -------------------------------------------------------------------------------- /meta.py: -------------------------------------------------------------------------------- 1 | 2 | class MCTSMeta: 3 | EXPLORATION = 0.5 4 | RAVE_CONST = 300 5 | RANDOMNESS = 0.5 6 | POOLRAVE_CAPACITY = 10 7 | K_CONST = 10 8 | A_CONST = 0.25 9 | WARMUP_ROLLOUTS = 7 10 | 11 | 12 | class GameMeta: 13 | PLAYERS = {'none': 0, 'white': 1, 'black': 2} 14 | INF = float('inf') 15 | GAME_OVER = -1 16 | EDGE1 = 1 17 | EDGE2 = 2 18 | NEIGHBOR_PATTERNS = ((-1, 0), (0, -1), (-1, 1), (0, 1), (1, 0), (1, -1)) 19 | 20 | 21 | 22 | -------------------------------------------------------------------------------- /paper/CONFITC04_172.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/masouduut94/MCTS-agent-python/bf53bd78d90a7381287b8d6b7c95082245936ab8/paper/CONFITC04_172.pdf -------------------------------------------------------------------------------- /playtest.py: -------------------------------------------------------------------------------- 1 | from gui import (UctMctsAgent, RaveMctsAgent) 2 | from gtp import GTPInterface 3 | from tournament import tournament 4 | 5 | game_number = 100 6 | move_time = 1 7 | boardsize = 11 8 | opening_moves = [] 9 | 10 | 11 | def main(): 12 | """ 13 | Run a tournament between two agents and print the resulting winrate 14 | for the first agent. 15 | """ 16 | interface1 = GTPInterface(UctMctsAgent()) 17 | interface2 = GTPInterface(RaveMctsAgent()) 18 | address = 'results/result.txt' 19 | f = open(address, 'a') 20 | f.write('Tournament between QB UCTRAVE , UCT \n') 21 | print('Tournament between QB RAVE , UCT \n') 22 | f.close() 23 | for i in range(3): 24 | result = tournament(interface1, interface2, game_number, move_time, boardsize, opening_moves) 25 | with open(address, 'a') as file: 26 | file.write('Result of tournament %a \n' % i) 27 | file.write('player 1 wins = %a games \n' % result[0]) 28 | file.write('player 2 wins = %a games \n' % result[1]) 29 | file.write("Simulations : \nAvg [ %a ] max = [ %a ] min = [ %a ] \n" % result[2]) 30 | file.write("Total time : %a \n\n\n" % result[3]) 31 | file.close() 32 | 33 | 34 | def shutdown(): 35 | import os 36 | os.system("shutdown /s /t 90") 37 | 38 | 39 | if __name__ == "__main__": 40 | main() 41 | # shutdown() 42 | -------------------------------------------------------------------------------- /qb_mctsagent.py: -------------------------------------------------------------------------------- 1 | from random import choice 2 | from time import time as clock 3 | from gamestate import GameState 4 | from uct_mcstsagent import UctMctsAgent, Node 5 | from numpy.random import randint 6 | from numpy import asarray, mean, std, exp, append 7 | from meta import MCTSMeta, GameMeta 8 | 9 | 10 | class QBMctsAgent(UctMctsAgent): 11 | """ 12 | Basic no frills implementation of an agent that preforms MCTS for hex. 13 | 14 | """ 15 | 16 | def __init__(self, state: GameState = GameState(8)): 17 | super(QBMctsAgent, self).__init__(state=state) 18 | moves_number, size = len(self.root_state.moves()), self.root_state.size 19 | initial_member = randint(moves_number // size, moves_number // 2) 20 | # initial_member = randint(divmod(moves_number, size)[0], divmod(moves_number, 2)[0]) 21 | self.pl_list = asarray([[initial_member, initial_member]]) 22 | 23 | def search(self, time_budget: int) -> None: 24 | """ 25 | Search and update the search tree for a 26 | specified amount of time in seconds. 27 | 28 | Args: 29 | time_budget: How much time to think 30 | 31 | """ 32 | start_time = clock() 33 | num_rollouts = 0 34 | 35 | # do until we exceed our time budget 36 | while clock() - start_time < time_budget: 37 | node, state = self.select_node() 38 | turn = state.turn() 39 | outcome = self.roll_out(state) 40 | self.backup(node, turn, outcome, state) 41 | num_rollouts += 1 42 | run_time = clock() - start_time 43 | node_count = self.tree_size() 44 | self.run_time = run_time 45 | self.node_count = node_count 46 | self.num_rollouts = num_rollouts 47 | 48 | def roll_out(self, state: GameState) -> tuple: 49 | """ 50 | Simulate an entirely random game from the passed state and return the winning 51 | player. 52 | 53 | Returns: 54 | tuple: consists of winner of the game (either black or white) 55 | and number of moves for each player 56 | 57 | """ 58 | moves = state.moves() # Get a list of all possible moves in current state of the game 59 | 60 | while state.winner == GameMeta.PLAYERS['none']: 61 | move = choice(moves) 62 | state.play(move) 63 | moves.remove(move) 64 | return state.winner 65 | 66 | def modify_reward(self, pl_length: dict) -> dict: 67 | """ 68 | Takes the simulation length as the input and modifies it based on the 69 | Quality-Based rewards 70 | 71 | Args: 72 | pl_length: 73 | 74 | Returns: 75 | dict: Bonus added reward based on quality based rewards 76 | 77 | """ 78 | mean_ = mean(self.pl_list, axis=0) 79 | mean_offset = asarray([mean_[0] - pl_length[0], mean_[1] - pl_length[1]]) 80 | deviation = std(self.pl_list, axis=0) 81 | landa = asarray(list(map(lambda x, y: x / y if y != 0 else 0, mean_offset, deviation))) 82 | bonus = -1 + (2 / (1 + exp(-MCTSMeta.K_CONST * landa))) 83 | result = {'white': bonus[0], 'black': bonus[1]} 84 | return result 85 | 86 | def backup(self, node, turn, outcome, state: GameState): 87 | """ 88 | Update the node statistics on the path from the passed node to root to reflect 89 | the outcome of a randomly simulated playout. 90 | 91 | """ 92 | # Careful: The reward is calculated for player who just played 93 | # at the node and not the next player to play 94 | pl_length = [state.get_num_played()['white'], state.get_num_played()['black']] 95 | self.pl_list = append(self.pl_list, [pl_length], axis=0) 96 | bonus = self.modify_reward(pl_length) 97 | reward = -1 if outcome == turn else 1 98 | 99 | while node is not None: 100 | node.N += 1 101 | max_moves_played = max(state.get_num_played().values()) 102 | 103 | if turn == GameMeta.PLAYERS['black']: 104 | qb_reward = reward + (reward * MCTSMeta.A_CONST * bonus['black']) \ 105 | if max_moves_played >= MCTSMeta.WARMUP_ROLLOUTS else reward 106 | else: 107 | qb_reward = reward + (reward * MCTSMeta.A_CONST * bonus['white']) \ 108 | if max_moves_played >= MCTSMeta.WARMUP_ROLLOUTS else reward 109 | 110 | node.Q += qb_reward 111 | turn = 1 if turn == 0 else 0 112 | node = node.parent 113 | reward = -reward 114 | 115 | def move(self, move): 116 | """ 117 | Make the passed move and update the tree appropriately. It is 118 | designed to let the player choose an action manually (which might 119 | not be the best action). 120 | 121 | """ 122 | if move in self.root.children: 123 | child = self.root.children[move] 124 | child.parent = None 125 | self.root = child 126 | self.root_state.play(child.move) 127 | moves_number, size = len(self.root_state.moves()), self.root_state.size 128 | initial_member = randint(moves_number // size, moves_number // 2) 129 | # initial_member = randint(divmod(moves_number, size)[0], divmod(moves_number, 2)[0]) 130 | self.pl_list = asarray([[initial_member, initial_member]]) 131 | return 132 | 133 | # if for whatever reason the move is not in the children of 134 | # the root just throw out the tree and start over 135 | self.root_state.play(move) 136 | self.root = Node() 137 | -------------------------------------------------------------------------------- /rave_mctsagent.py: -------------------------------------------------------------------------------- 1 | from math import sqrt, log 2 | from copy import deepcopy 3 | from random import choice, random 4 | from time import time as clock 5 | 6 | from gamestate import GameState 7 | from uct_mcstsagent import Node, UctMctsAgent 8 | from meta import * 9 | 10 | 11 | class RaveNode(Node): 12 | def __init__(self, move=None, parent=None): 13 | """ 14 | Initialize a new node with optional move and parent and initially empty 15 | children list and rollout statistics and unspecified outcome. 16 | 17 | """ 18 | super(RaveNode, self).__init__(move, parent) 19 | 20 | @property 21 | def value(self, explore: float = MCTSMeta.EXPLORATION, rave_const: float = MCTSMeta.RAVE_CONST) -> float: 22 | """ 23 | Calculate the UCT value of this node relative to its parent, the parameter 24 | "explore" specifies how much the value should favor nodes that have 25 | yet to be thoroughly explored versus nodes that seem to have a high win 26 | rate. 27 | Currently explore is set to zero when choosing the best move to play so 28 | that the move with the highest win_rate is always chosen. When searching 29 | explore is set to EXPLORATION specified above. 30 | 31 | """ 32 | # unless explore is set to zero, maximally favor unexplored nodes 33 | if self.N == 0: 34 | return 0 if explore is 0 else GameMeta.INF 35 | else: 36 | # rave valuation: 37 | alpha = max(0, (rave_const - self.N) / rave_const) 38 | UCT = self.Q / self.N + explore * sqrt(2 * log(self.parent.N) / self.N) 39 | AMAF = self.Q_RAVE / self.N_RAVE if self.N_RAVE is not 0 else 0 40 | return (1 - alpha) * UCT + alpha * AMAF 41 | 42 | 43 | class RaveMctsAgent(UctMctsAgent): 44 | 45 | def __init__(self, state: GameState = GameState(8)): 46 | self.root_state = deepcopy(state) 47 | self.root = RaveNode() 48 | self.run_time = 0 49 | self.node_count = 0 50 | self.num_rollouts = 0 51 | 52 | def set_gamestate(self, state: GameState) -> None: 53 | """ 54 | Set the root_state of the tree to the passed gamestate, this clears all 55 | the information stored in the tree since none of it applies to the new 56 | state. 57 | """ 58 | self.root_state = deepcopy(state) 59 | self.root = RaveNode() 60 | 61 | def move(self, move: tuple) -> None: 62 | """ 63 | Make the passed move and update the tree appropriately. It is 64 | designed to let the player choose an action manually (which might 65 | not be the best action). 66 | Args: 67 | move: 68 | """ 69 | if move in self.root.children: 70 | child = self.root.children[move] 71 | child.parent = None 72 | self.root = child 73 | self.root_state.play(child.move) 74 | return 75 | 76 | # if for whatever reason the move is not in the children of 77 | # the root just throw out the tree and start over 78 | self.root_state.play(move) 79 | self.root = RaveNode() 80 | 81 | def search(self, time_budget: int) -> None: 82 | """ 83 | Search and update the search tree for a specified amount of time in secounds. 84 | """ 85 | start_time = clock() 86 | num_rollouts = 0 87 | 88 | # do until we exceed our time budget 89 | while clock() - start_time < time_budget: 90 | node, state = self.select_node() 91 | turn = state.turn() 92 | outcome, black_rave_pts, white_rave_pts = self.roll_out(state) 93 | self.backup(node, turn, outcome, black_rave_pts, white_rave_pts) 94 | num_rollouts += 1 95 | run_time = clock() - start_time 96 | node_count = self.tree_size() 97 | self.run_time = run_time 98 | self.node_count = node_count 99 | self.num_rollouts = num_rollouts 100 | 101 | def select_node(self) -> tuple: 102 | """ 103 | Select a node in the tree to preform a single simulation from. 104 | """ 105 | node = self.root 106 | state = deepcopy(self.root_state) 107 | 108 | # stop if we reach a leaf node 109 | while len(node.children) != 0: 110 | max_value = max(node.children.values(), 111 | key=lambda n: 112 | n.value).value 113 | # descend to the maximum value node, break ties at random 114 | max_nodes = [n for n in node.children.values() if 115 | n.value == max_value] 116 | node = choice(max_nodes) 117 | state.play(node.move) 118 | 119 | # if some child node has not been explored select it before expanding 120 | # other children 121 | if node.N == 0: 122 | return node, state 123 | 124 | # if we reach a leaf node generate its children and return one of them 125 | # if the node is terminal, just return the terminal node 126 | if self.expand(node, state): 127 | node = choice(list(node.children.values())) 128 | state.play(node.move) 129 | return node, state 130 | 131 | @staticmethod 132 | def expand(parent: RaveNode, state: GameState) -> bool: 133 | """ 134 | Generate the children of the passed "parent" node based on the available 135 | moves in the passed gamestate and add them to the tree. 136 | 137 | Returns: 138 | object: 139 | """ 140 | children = [] 141 | if state.winner != GameMeta.PLAYERS["none"]: 142 | # game is over at this node so nothing to expand 143 | return False 144 | 145 | for move in state.moves(): 146 | children.append(RaveNode(move, parent)) 147 | 148 | parent.add_children(children) 149 | return True 150 | 151 | @staticmethod 152 | def roll_out(state: GameState) -> tuple: 153 | """ 154 | Simulate a random game except that we play all known critical 155 | cells first, return the winning player and record critical cells at the end. 156 | 157 | """ 158 | moves = state.moves() 159 | while state.winner == GameMeta.PLAYERS["none"]: 160 | move = choice(moves) 161 | state.play(move) 162 | moves.remove(move) 163 | 164 | black_rave_pts = [] 165 | white_rave_pts = [] 166 | 167 | for x in range(state.size): 168 | for y in range(state.size): 169 | if state.board[(x, y)] == GameMeta.PLAYERS["black"]: 170 | black_rave_pts.append((x, y)) 171 | elif state.board[(x, y)] == GameMeta.PLAYERS["white"]: 172 | white_rave_pts.append((x, y)) 173 | 174 | return state.winner, black_rave_pts, white_rave_pts 175 | 176 | def backup(self, node: RaveNode, turn: int, outcome: int, black_rave_pts: list, white_rave_pts: list) -> None: 177 | """ 178 | Update the node statistics on the path from the passed node to root to reflect 179 | the outcome of a randomly simulated playout. 180 | """ 181 | # note that reward is calculated for player who just played 182 | # at the node and not the next player to play 183 | reward = -1 if outcome == turn else 1 184 | 185 | while node is not None: 186 | if turn == GameMeta.PLAYERS["white"]: 187 | for point in white_rave_pts: 188 | if point in node.children: 189 | node.children[point].Q_RAVE += -reward 190 | node.children[point].N_RAVE += 1 191 | else: 192 | for point in black_rave_pts: 193 | if point in node.children: 194 | node.children[point].Q_RAVE += -reward 195 | node.children[point].N_RAVE += 1 196 | 197 | node.N += 1 198 | node.Q += reward 199 | turn = GameMeta.PLAYERS['white'] if turn == GameMeta.PLAYERS['black'] else GameMeta.PLAYERS['black'] 200 | reward = -reward 201 | node = node.parent 202 | 203 | 204 | class DecisiveMoveMctsAgent(RaveMctsAgent): 205 | 206 | def roll_out(self, state: GameState) -> tuple: 207 | """ 208 | Simulate a random game except that we play all known critical cells 209 | first, return the winning player and record critical cells at the end. 210 | """ 211 | moves = state.moves() 212 | good_moves = moves.copy() 213 | good_opponent_moves = moves.copy() 214 | to_play = state.turn() 215 | 216 | while state.winner == GameMeta.PLAYERS["none"]: 217 | done = False 218 | while len(good_moves) > 0 and not done: 219 | move = choice(good_moves) 220 | good_moves.remove(move) 221 | if not state.would_lose(move, to_play): 222 | state.play(move) 223 | moves.remove(move) 224 | if move in good_opponent_moves: 225 | good_opponent_moves.remove(move) 226 | done = True 227 | 228 | if not done: 229 | move = choice(moves) 230 | state.play(move) 231 | moves.remove(move) 232 | if move in good_opponent_moves: 233 | good_opponent_moves.remove(move) 234 | 235 | good_moves, good_opponent_moves = good_opponent_moves, good_moves 236 | 237 | black_rave_pts = [] 238 | white_rave_pts = [] 239 | 240 | for x in range(state.size): 241 | for y in range(state.size): 242 | if state.board[(x, y)] == GameMeta.PLAYERS["black"]: 243 | black_rave_pts.append((x, y)) 244 | elif state.board[(x, y)] == GameMeta.PLAYERS["white"]: 245 | white_rave_pts.append((x, y)) 246 | 247 | return state.winner, black_rave_pts, white_rave_pts 248 | 249 | 250 | class LGRMctsAgent(RaveMctsAgent): 251 | 252 | def __init__(self, state: GameState = GameState(8)): 253 | super().__init__(state) 254 | self.black_reply = {} 255 | self.white_reply = {} 256 | 257 | def set_gamestate(self, state: GameState) -> None: 258 | """ 259 | Set the root_state of the tree to the passed gamestate, this clears all 260 | the information stored in the tree since none of it applies to the new 261 | state. 262 | """ 263 | super().set_gamestate(state) 264 | self.white_reply = {} 265 | self.black_reply = {} 266 | 267 | def roll_out(self, state: GameState) -> tuple: 268 | """ 269 | Simulate a random game except that we play all known critical 270 | cells first, return the winning player and record critical cells at the end. 271 | 272 | """ 273 | moves = state.moves() 274 | first = state.turn() 275 | if first == GameMeta.PLAYERS["black"]: 276 | current_reply = self.black_reply 277 | other_reply = self.white_reply 278 | else: 279 | current_reply = self.white_reply 280 | other_reply = self.black_reply 281 | black_moves = [] 282 | white_moves = [] 283 | last_move = None 284 | while state.winner == GameMeta.PLAYERS["none"]: 285 | if last_move in current_reply: 286 | move = current_reply[last_move] 287 | if move not in moves or random() > MCTSMeta.RANDOMNESS: 288 | move = choice(moves) 289 | else: 290 | move = choice(moves) 291 | if state.turn() == GameMeta.PLAYERS["black"]: 292 | black_moves.append(move) 293 | else: 294 | white_moves.append(move) 295 | current_reply, other_reply = other_reply, current_reply 296 | state.play(move) 297 | moves.remove(move) 298 | last_move = move 299 | 300 | black_rave_pts = [] 301 | white_rave_pts = [] 302 | 303 | for x in range(state.size): 304 | for y in range(state.size): 305 | if state.board[(x, y)] == GameMeta.PLAYERS["black"]: 306 | black_rave_pts.append((x, y)) 307 | elif state.board[(x, y)] == GameMeta.PLAYERS["white"]: 308 | white_rave_pts.append((x, y)) 309 | 310 | # This part of the algorithm probably deals with adjusting 311 | # the indices of the arrays. 312 | 313 | offset = 0 314 | skip = 0 315 | if state.winner == GameMeta.PLAYERS["black"]: 316 | 317 | if first == GameMeta.PLAYERS["black"]: 318 | offset = 1 319 | if state.turn() == GameMeta.PLAYERS["black"]: 320 | skip = 1 321 | for i in range(len(white_moves) - skip): 322 | self.black_reply[white_moves[i]] = black_moves[i + offset] 323 | else: 324 | if first == GameMeta.PLAYERS["white"]: 325 | offset = 1 326 | if state.turn() == GameMeta.PLAYERS["white"]: 327 | skip = 1 328 | for i in range(len(black_moves) - skip): 329 | self.white_reply[black_moves[i]] = white_moves[i + offset] 330 | 331 | return state.winner, black_rave_pts, white_rave_pts 332 | 333 | 334 | class PoolRaveMctsAgent(RaveMctsAgent): 335 | 336 | def __init__(self, state: GameState = GameState(8)): 337 | super().__init__(state) 338 | self.black_rave = {} 339 | self.white_rave = {} 340 | 341 | def set_gamestate(self, state: GameState) -> None: 342 | """ 343 | Set the root_state of the tree to the passed gamestate, this clears all 344 | the information stored in the tree since none of it applies to the new 345 | state. 346 | """ 347 | super().set_gamestate(state) 348 | self.black_rave = {} 349 | self.white_rave = {} 350 | 351 | def roll_out(self, state: GameState) -> tuple: 352 | """ 353 | Simulate a random game except that we play all known critical 354 | cells first, return the winning player and record critical cells at the end. 355 | 356 | """ 357 | moves = state.moves() 358 | black_rave_moves = sorted(self.black_rave.keys(), 359 | key=lambda cell: self.black_rave[cell]) 360 | white_rave_moves = sorted(self.white_rave.keys(), 361 | key=lambda cell: self.white_rave[cell]) 362 | black_pool = [] 363 | white_pool = [] 364 | 365 | i = 0 366 | while len(black_pool) < MCTSMeta.POOLRAVE_CAPACITY and i < len(black_rave_moves): 367 | if black_rave_moves[i] in moves: 368 | black_pool.append(black_rave_moves[i]) 369 | i += 1 370 | i = 0 371 | while len(white_pool) < MCTSMeta.POOLRAVE_CAPACITY and i < len(white_rave_moves): 372 | if white_rave_moves[i] in moves: 373 | white_pool.append(white_rave_moves[i]) 374 | i += 1 375 | num_pool = 0 376 | while state.winner == GameMeta.PLAYERS["none"]: 377 | move = None 378 | if len(black_pool) > 0 and state.turn() == GameMeta.PLAYERS["black"]: 379 | move = choice(black_pool) 380 | num_pool += 1 381 | elif len(white_pool) > 0: 382 | move = choice(white_pool) 383 | num_pool += 1 384 | if random() > MCTSMeta.RANDOMNESS or not move or move not in moves: 385 | move = choice(moves) 386 | num_pool -= 1 387 | 388 | state.play(move) 389 | moves.remove(move) 390 | 391 | black_rave_pts = [] 392 | white_rave_pts = [] 393 | 394 | for x in range(state.size): 395 | for y in range(state.size): 396 | if state.board[(x, y)] == GameMeta.PLAYERS["black"]: 397 | black_rave_pts.append((x, y)) 398 | if state.winner == GameMeta.PLAYERS["black"]: 399 | if (x, y) in self.black_rave: 400 | self.black_rave[(x, y)] += 1 401 | else: 402 | self.black_rave[(x, y)] = 1 403 | else: 404 | if (x, y) in self.black_rave: 405 | self.black_rave[(x, y)] -= 1 406 | else: 407 | self.black_rave[(x, y)] = -1 408 | elif state.board[(x, y)] == GameMeta.PLAYERS["white"]: 409 | white_rave_pts.append((x, y)) 410 | if state.winner == GameMeta.PLAYERS["white"]: 411 | if (x, y) in self.white_rave: 412 | self.white_rave[(x, y)] += 1 413 | else: 414 | self.white_rave[(x, y)] = 1 415 | else: 416 | if (x, y) in self.white_rave: 417 | self.white_rave[(x, y)] -= 1 418 | else: 419 | self.white_rave[(x, y)] = -1 420 | 421 | return state.winner, black_rave_pts, white_rave_pts 422 | -------------------------------------------------------------------------------- /resources/demo.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/masouduut94/MCTS-agent-python/bf53bd78d90a7381287b8d6b7c95082245936ab8/resources/demo.gif -------------------------------------------------------------------------------- /tournament.py: -------------------------------------------------------------------------------- 1 | from meta import GameMeta 2 | import sys 3 | import time 4 | 5 | 6 | def print_game(game): 7 | for move in game: 8 | print(move, end=' ') 9 | print() 10 | 11 | 12 | def tournament(interface1, interface2, game_number=100, movetime=10, size=8, opening_moves=[]): 13 | """ 14 | Run some number of games between two agents, alternating who has first move 15 | each time. Return the winrate for the first of the two agents. Print games 16 | played along the way. 17 | """ 18 | begin = time.clock() 19 | p1_score = 0 # score for player 1 20 | p2_score = 0 # score for player 2 21 | interface1.gtp_time([movetime]) 22 | interface2.gtp_time([movetime]) 23 | interface1.gtp_boardsize([size]) 24 | interface2.gtp_boardsize([size]) 25 | rollouts_1 = 0 26 | genmove_calls_1 = 0 27 | list_of_rollouts = [] 28 | print('Tournament Started ...') 29 | print("%a games will be running between agents ..." % game_number) 30 | for i in range(game_number): 31 | interface1.gtp_clear([]) 32 | interface2.gtp_clear([]) 33 | turn = interface1.game.turn() 34 | c1 = 'w' if turn == GameMeta.PLAYERS["white"] else 'b' 35 | c2 = 'b' if turn == GameMeta.PLAYERS["white"] else 'w' 36 | game = [] 37 | 38 | rollouts_2 = 0 39 | genmove_calls_2 = 0 40 | 41 | if i % 2 == 0: 42 | while interface1.gtp_winner([])[1] == "none": 43 | move = interface1.gtp_genmove([c1]) 44 | rollouts_1 += move[2] 45 | genmove_calls_1 += 1 46 | list_of_rollouts.append(move[2]) 47 | if move[0]: 48 | interface2.gtp_play([c1, move[1]]) 49 | game.append(move[1]) 50 | move = interface2.gtp_genmove([c2]) 51 | rollouts_1 += move[2] 52 | genmove_calls_1 += 1 53 | list_of_rollouts.append(move[2]) 54 | if move[0]: 55 | interface1.gtp_play([c2, move[1]]) 56 | game.append(move[1]) 57 | 58 | # print(interface1.gtp_show([])[1]) 59 | if interface1.gtp_winner([])[1][0] == c1: 60 | p1_score += 1 61 | print("GAME OVER, WINNER : PLAYER 1 (" + c1 + ")\n") 62 | print("Games played = [ %i / %g ]" % (i + 1, game_number)) 63 | print("Wins | Player 1 = [%a] | Player 2 = [%s] " % (p1_score, p2_score)) 64 | else: 65 | p2_score += 1 66 | print("GAME OVER, WINNER : PLAYER 2 (" + c2 + ")\n") 67 | print("Games played = [ %i / %g ] " % (i + 1, game_number)) 68 | print("Wins | Player 1 = [%a] | Player 2 = [%s] " % (p1_score, p2_score)) 69 | 70 | else: 71 | while interface1.gtp_winner([])[1] == "none": 72 | move = interface2.gtp_genmove([c1]) 73 | rollouts_1 += move[2] 74 | genmove_calls_1 += 1 75 | list_of_rollouts.append(move[2]) 76 | if move[0]: 77 | interface1.gtp_play([c1, move[1]]) 78 | game.append(move[1]) 79 | move = interface1.gtp_genmove([c2]) 80 | rollouts_1 += move[2] 81 | genmove_calls_1 += 1 82 | list_of_rollouts.append(move[2]) 83 | if move[0]: 84 | interface2.gtp_play([c2, move[1]]) 85 | game.append(move[1]) 86 | 87 | # print(interface1.gtp_show([])[1]) 88 | if interface1.gtp_winner([])[1][0] == c2: 89 | p1_score += 1 90 | print("GAME OVER, WINNER : PLAYER 1 (" + c2 + ")\n") 91 | print("Games played = [ %i / %g ] " % (i + 1, game_number)) 92 | print("Wins | Player 1 = [%a] | Player 2 = [%s] " % (p1_score, p2_score)) 93 | else: 94 | p2_score += 1 95 | print("GAME OVER, WINNER : PLAYER 2 (" + c1 + ")\n") 96 | print("Games played = [ %i / %g ] " % (i + 1, game_number)) 97 | print("Wins | Player 1 = [%a] | Player 2 = [%s] " % (p1_score, p2_score)) 98 | sys.stdout.flush() # flush buffer so intermediate results can be viewed 99 | list_of_rollouts = list(filter(lambda a: a != 0, list_of_rollouts)) 100 | p1 = (p1_score / game_number) * 100 101 | p2 = (p2_score / game_number) * 100 102 | rollouts_info = (round(sum(list_of_rollouts) / len(list_of_rollouts)), 103 | max(list_of_rollouts), 104 | min(list_of_rollouts)) 105 | print('\n\n\n') 106 | print('player 1 wins = ', p1, ' %') 107 | print('player 2 wins = ', p2, ' %') 108 | print("Average Simulations = [ %a ] " % (rollouts_1 / genmove_calls_1)) 109 | print('Finished in %i seconds' % (time.clock() - begin)) 110 | return p1_score, p2_score, rollouts_info, time.clock() - begin 111 | -------------------------------------------------------------------------------- /ucb1_tuned_mctsagent.py: -------------------------------------------------------------------------------- 1 | from math import sqrt, log 2 | from copy import deepcopy 3 | from uct_mcstsagent import Node, UctMctsAgent 4 | from gamestate import GameState 5 | from meta import * 6 | 7 | 8 | class UCB1TunedNode(Node): 9 | """ 10 | Node for the MCTS. Stores the move applied to reach this node from its parent, 11 | stats for the associated game position, children, parent and outcome 12 | (outcome==none unless the position ends the game). 13 | """ 14 | @property 15 | def value(self, explore: float = MCTSMeta.EXPLORATION) -> float: 16 | """ 17 | Calculate the UCT value of this node relative to its parent, the parameter 18 | "explore" specifies how much the value should favor nodes that have 19 | yet to be thoroughly explored versus nodes that seem to have a high win 20 | rate. 21 | Currently explore is set to one. 22 | 23 | """ 24 | # if the node is not visited, set the value as infinity. 25 | if self.N == 0: 26 | return 0 if explore is 0 else GameMeta.INF 27 | else: 28 | avg = self.Q / self.N 29 | variance = avg * (1 - avg) 30 | return avg + explore * sqrt(log(self.parent.N) / self.N) * min(0.25, variance + sqrt( 31 | 2 * log(self.parent.N) / self.N)) 32 | 33 | 34 | class UCB1TunedMctsAgent(UctMctsAgent): 35 | """ 36 | Implementation of an agent that preforms MCTS for hex with UCB1-Tuned evaluation. 37 | 38 | """ 39 | def __init__(self, state=GameState(8)): 40 | self.root_state = deepcopy(state) 41 | self.root = UCB1TunedNode() 42 | self.run_time = 0 43 | self.node_count = 0 44 | self.num_rollouts = 0 45 | 46 | @staticmethod 47 | def expand(parent: Node, state: GameState) -> bool: 48 | """ 49 | Generate the children of the passed "parent" node based on the available 50 | moves in the passed gamestate and add them to the tree. hrth 51 | 52 | """ 53 | children = [] 54 | if state.winner != GameMeta.PLAYERS['none']: 55 | # game is over at this node so nothing to expand 56 | return False 57 | 58 | for move in state.moves(): 59 | children.append(UCB1TunedNode(move, parent)) 60 | 61 | parent.add_children(children) 62 | return True 63 | 64 | def move(self, move: tuple) -> None: 65 | """ 66 | Make the passed move and update the tree appropriately. It is 67 | designed to let the player choose an action manually (which might 68 | not be the best action). 69 | 70 | """ 71 | if move in self.root.children: 72 | child = self.root.children[move] 73 | child.parent = None 74 | self.root = child 75 | self.root_state.play(child.move) 76 | return 77 | 78 | # if for whatever reason the move is not in the children of 79 | # the root just throw out the tree and start over 80 | self.root_state.play(move) 81 | self.root = UCB1TunedNode() 82 | 83 | def set_gamestate(self, state: GameMeta) -> None: 84 | """ 85 | Set the root_state of the tree to the passed gamestate, this clears all 86 | the information stored in the tree since none of it applies to the new 87 | state. 88 | 89 | """ 90 | self.root_state = deepcopy(state) 91 | self.root = UCB1TunedNode() 92 | -------------------------------------------------------------------------------- /uct_mcstsagent.py: -------------------------------------------------------------------------------- 1 | from math import sqrt, log 2 | from copy import deepcopy 3 | from queue import Queue 4 | from random import choice 5 | from time import time as clock 6 | from meta import GameMeta, MCTSMeta 7 | from gamestate import GameState 8 | 9 | 10 | class Node: 11 | """ 12 | Node for the MCTS. Stores the move applied to reach this node from its parent, 13 | stats for the associated game position, children, parent and outcome 14 | (outcome==none unless the position ends the game). 15 | Args: 16 | move: 17 | parent: 18 | N (int): times this position was visited 19 | Q (int): average reward (wins-losses) from this position 20 | Q_RAVE (int): times this move has been critical in a rollout 21 | N_RAVE (int): times this move has appeared in a rollout 22 | children (dict): dictionary of successive nodes 23 | outcome (int): If node is a leaf, then outcome indicates 24 | the winner, else None 25 | """ 26 | 27 | def __init__(self, move: tuple = None, parent: object = None): 28 | """ 29 | Initialize a new node with optional move and parent and initially empty 30 | children list and rollout statistics and unspecified outcome. 31 | 32 | """ 33 | self.move = move 34 | self.parent = parent 35 | self.N = 0 # times this position was visited 36 | self.Q = 0 # average reward (wins-losses) from this position 37 | self.Q_RAVE = 0 # times this move has been critical in a rollout 38 | self.N_RAVE = 0 # times this move has appeared in a rollout 39 | self.children = {} 40 | self.outcome = GameMeta.PLAYERS['none'] 41 | 42 | def add_children(self, children: dict) -> None: 43 | """ 44 | Add a list of nodes to the children of this node. 45 | 46 | """ 47 | for child in children: 48 | self.children[child.move] = child 49 | 50 | @property 51 | def value(self, explore: float = MCTSMeta.EXPLORATION): 52 | """ 53 | Calculate the UCT value of this node relative to its parent, the parameter 54 | "explore" specifies how much the value should favor nodes that have 55 | yet to be thoroughly explored versus nodes that seem to have a high win 56 | rate. 57 | Currently explore is set to 0.5. 58 | 59 | """ 60 | # if the node is not visited, set the value as infinity. Nodes with no visits are on priority 61 | # (lambda: print("a"), lambda: print("b"))[test==true]() 62 | if self.N == 0: 63 | return 0 if explore == 0 else GameMeta.INF 64 | else: 65 | return self.Q / self.N + explore * sqrt(2 * log(self.parent.N) / self.N) # exploitation + exploration 66 | 67 | 68 | class UctMctsAgent: 69 | """ 70 | Basic no frills implementation of an agent that preforms MCTS for hex. 71 | Attributes: 72 | root_state (GameState): Game simulator that helps us to understand the game situation 73 | root (Node): Root of the tree search 74 | run_time (int): time per each run 75 | node_count (int): the whole nodes in tree 76 | num_rollouts (int): The number of rollouts for each search 77 | EXPLORATION (int): specifies how much the value should favor 78 | nodes that have yet to be thoroughly explored versus nodes 79 | that seem to have a high win rate. 80 | """ 81 | 82 | def __init__(self, state=GameState(8)): 83 | self.root_state = deepcopy(state) 84 | self.root = Node() 85 | self.run_time = 0 86 | self.node_count = 0 87 | self.num_rollouts = 0 88 | 89 | def search(self, time_budget: int) -> None: 90 | """ 91 | Search and update the search tree for a 92 | specified amount of time in seconds. 93 | 94 | """ 95 | start_time = clock() 96 | num_rollouts = 0 97 | 98 | # do until we exceed our time budget 99 | while clock() - start_time < time_budget: 100 | node, state = self.select_node() 101 | turn = state.turn() 102 | outcome = self.roll_out(state) 103 | self.backup(node, turn, outcome) 104 | num_rollouts += 1 105 | run_time = clock() - start_time 106 | node_count = self.tree_size() 107 | self.run_time = run_time 108 | self.node_count = node_count 109 | self.num_rollouts = num_rollouts 110 | 111 | def select_node(self) -> tuple: 112 | """ 113 | Select a node in the tree to preform a single simulation from. 114 | 115 | """ 116 | node = self.root 117 | state = deepcopy(self.root_state) 118 | 119 | # stop if we find reach a leaf node 120 | while len(node.children) != 0: 121 | # descend to the maximum value node, break ties at random 122 | children = node.children.values() 123 | max_value = max(children, key=lambda n: n.value).value 124 | max_nodes = [n for n in node.children.values() 125 | if n.value == max_value] 126 | node = choice(max_nodes) 127 | state.play(node.move) 128 | 129 | # if some child node has not been explored select it before expanding 130 | # other children 131 | if node.N == 0: 132 | return node, state 133 | 134 | # if we reach a leaf node generate its children and return one of them 135 | # if the node is terminal, just return the terminal node 136 | if self.expand(node, state): 137 | node = choice(list(node.children.values())) 138 | state.play(node.move) 139 | return node, state 140 | 141 | @staticmethod 142 | def expand(parent: Node, state: GameState) -> bool: 143 | """ 144 | Generate the children of the passed "parent" node based on the available 145 | moves in the passed gamestate and add them to the tree. 146 | 147 | Returns: 148 | bool: returns false If node is leaf (the game has ended). 149 | 150 | """ 151 | children = [] 152 | if state.winner != GameMeta.PLAYERS['none']: 153 | # game is over at this node so nothing to expand 154 | return False 155 | 156 | for move in state.moves(): 157 | children.append(Node(move, parent)) 158 | 159 | parent.add_children(children) 160 | return True 161 | 162 | @staticmethod 163 | def roll_out(state: GameState) -> int: 164 | """ 165 | Simulate an entirely random game from the passed state and return the winning 166 | player. 167 | 168 | Args: 169 | state: game state 170 | 171 | Returns: 172 | int: winner of the game 173 | 174 | """ 175 | moves = state.moves() # Get a list of all possible moves in current state of the game 176 | 177 | while state.winner == GameMeta.PLAYERS['none']: 178 | move = choice(moves) 179 | state.play(move) 180 | moves.remove(move) 181 | 182 | return state.winner 183 | 184 | @staticmethod 185 | def backup(node: Node, turn: int, outcome: int) -> None: 186 | """ 187 | Update the node statistics on the path from the passed node to root to reflect 188 | the outcome of a randomly simulated playout. 189 | 190 | Args: 191 | node: 192 | turn: winner turn 193 | outcome: outcome of the rollout 194 | 195 | Returns: 196 | object: 197 | 198 | """ 199 | # Careful: The reward is calculated for player who just played 200 | # at the node and not the next player to play 201 | reward = 0 if outcome == turn else 1 202 | 203 | while node is not None: 204 | node.N += 1 205 | node.Q += reward 206 | node = node.parent 207 | reward = 0 if reward == 1 else 1 208 | 209 | def best_move(self) -> tuple: 210 | """ 211 | Return the best move according to the current tree. 212 | Returns: 213 | best move in terms of the most simulations number unless the game is over 214 | """ 215 | if self.root_state.winner != GameMeta.PLAYERS['none']: 216 | return GameMeta.GAME_OVER 217 | 218 | # choose the move of the most simulated node breaking ties randomly 219 | max_value = max(self.root.children.values(), key=lambda n: n.N).N 220 | max_nodes = [n for n in self.root.children.values() if n.N == max_value] 221 | bestchild = choice(max_nodes) 222 | return bestchild.move 223 | 224 | def move(self, move: tuple) -> None: 225 | """ 226 | Make the passed move and update the tree appropriately. It is 227 | designed to let the player choose an action manually (which might 228 | not be the best action). 229 | Args: 230 | move: 231 | """ 232 | if move in self.root.children: 233 | child = self.root.children[move] 234 | child.parent = None 235 | self.root = child 236 | self.root_state.play(child.move) 237 | return 238 | 239 | # if for whatever reason the move is not in the children of 240 | # the root just throw out the tree and start over 241 | self.root_state.play(move) 242 | self.root = Node() 243 | 244 | def set_gamestate(self, state: GameState) -> None: 245 | """ 246 | Set the root_state of the tree to the passed gamestate, this clears all 247 | the information stored in the tree since none of it applies to the new 248 | state. 249 | 250 | """ 251 | self.root_state = deepcopy(state) 252 | self.root = Node() 253 | 254 | def statistics(self) -> tuple: 255 | return self.num_rollouts, self.node_count, self.run_time 256 | 257 | def tree_size(self) -> int: 258 | """ 259 | Count nodes in tree by BFS. 260 | """ 261 | Q = Queue() 262 | count = 0 263 | Q.put(self.root) 264 | while not Q.empty(): 265 | node = Q.get() 266 | count += 1 267 | for child in node.children.values(): 268 | Q.put(child) 269 | return count 270 | -------------------------------------------------------------------------------- /unionfind.py: -------------------------------------------------------------------------------- 1 | class UnionFind: 2 | """ 3 | Notes: 4 | unionfind data structure specialized for finding hex connections. 5 | Implementation inspired by UAlberta CMPUT 275 2015 class notes. 6 | 7 | Attributes: 8 | parent (dict): Each group parent 9 | rank (dict): Each group rank 10 | groups (dict): Stores the groups and chain of cells 11 | ignored (list): The neighborhood of board edges has to be ignored 12 | """ 13 | 14 | def __init__(self) -> None: 15 | """ 16 | Initialize parent and rank as empty dictionaries, we will 17 | lazily add items as necessary. 18 | """ 19 | self.parent = {} 20 | self.rank = {} 21 | self.groups = {} 22 | self.ignored = [] 23 | 24 | def join(self, x, y) -> bool: 25 | """ 26 | Merge the groups of x and y if they were not already, 27 | return False if they were already merged, true otherwise 28 | 29 | Args: 30 | x (tuple): game board cell 31 | y (tuple): game board cell 32 | 33 | """ 34 | rep_x = self.find(x) 35 | rep_y = self.find(y) 36 | 37 | if rep_x == rep_y: 38 | return False 39 | if self.rank[rep_x] < self.rank[rep_y]: 40 | self.parent[rep_x] = rep_y 41 | 42 | self.groups[rep_y].extend(self.groups[rep_x]) 43 | del self.groups[rep_x] 44 | elif self.rank[rep_x] > self.rank[rep_y]: 45 | self.parent[rep_y] = rep_x 46 | 47 | self.groups[rep_x].extend(self.groups[rep_y]) 48 | del self.groups[rep_y] 49 | else: 50 | self.parent[rep_x] = rep_y 51 | self.rank[rep_y] += 1 52 | 53 | self.groups[rep_y].extend(self.groups[rep_x]) 54 | del self.groups[rep_x] 55 | 56 | return True 57 | 58 | def find(self, x): 59 | """ 60 | Get the representative element associated with the set in 61 | which element x resides. Uses grandparent compression to compression 62 | the tree on each find operation so that future find operations are faster. 63 | Args: 64 | x (tuple): game board cell 65 | """ 66 | if x not in self.parent: 67 | self.parent[x] = x 68 | self.rank[x] = 0 69 | if x in self.ignored: 70 | self.groups[x] = [] 71 | else: 72 | self.groups[x] = [x] 73 | 74 | px = self.parent[x] 75 | if x == px: 76 | return x 77 | 78 | gx = self.parent[px] 79 | if gx == px: 80 | return px 81 | 82 | self.parent[x] = gx 83 | 84 | return self.find(gx) 85 | 86 | def connected(self, x, y) -> bool: 87 | """ 88 | Check if two elements are in the same group. 89 | 90 | Args: 91 | x (tuple): game board cell 92 | y (tuple): game board cell 93 | """ 94 | return self.find(x) == self.find(y) 95 | 96 | def set_ignored_elements(self, ignore): 97 | """ 98 | Elements in ignored, edges has to be ignored 99 | """ 100 | self.ignored = ignore 101 | 102 | def get_groups(self) -> dict: 103 | """ 104 | 105 | Returns: 106 | Groups 107 | """ 108 | return self.groups 109 | --------------------------------------------------------------------------------