├── requirements.txt ├── GamesAI ├── div │ ├── __init__.py │ ├── GameContent.py │ └── utils.py ├── games │ ├── __init__.py │ ├── connect4.py │ └── tictactoe.py ├── algorithms │ ├── __init__.py │ └── MCTS.py ├── __init__.py ├── Game.py └── Player.py ├── example ├── context.py ├── main.py └── evaluate_agent.py ├── setup.py ├── LICENSE ├── .gitignore └── README.md /requirements.txt: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /GamesAI/div/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /GamesAI/games/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /GamesAI/algorithms/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /GamesAI/games/connect4.py: -------------------------------------------------------------------------------- 1 | from typing import Union 2 | -------------------------------------------------------------------------------- /GamesAI/__init__.py: -------------------------------------------------------------------------------- 1 | from GamesAI import Game, Player, games 2 | from GamesAI.div import GameContent -------------------------------------------------------------------------------- /example/context.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import os 3 | sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))) 4 | 5 | import GamesAI -------------------------------------------------------------------------------- /example/main.py: -------------------------------------------------------------------------------- 1 | import context 2 | from GamesAI.Player import Player, RandomPlayer, HumanPlayer, Minimax, AlphaBeta, MinimaxPlus, MonteCarloTreeSearch 3 | from GamesAI.games.tictactoe import TicTacToeGame, TicTacToeRandomGame 4 | 5 | #Define agents ie dictionnary with key being game name and value being either agent class or tuple of agent class and kwargs for initializing the class 6 | def h(state): 7 | return {"X" : 0, "O" : 0} 8 | agents = {"X" : HumanPlayer, "O" : (MinimaxPlus, {'max_depth': 4, 'heuristic': h})} 9 | # agents = {"O" : RandomPlayer, "X" : (MonteCarloTreeSearch, {'n_rollouts': 300})} 10 | 11 | #Create the game object. 12 | game = TicTacToeRandomGame(agents) 13 | 14 | #Play a game. 15 | game.play_game(verbose=2) -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup, find_namespace_packages 2 | 3 | with open("requirements.txt", "r") as f: 4 | requirements = [package.replace("\n", "") for package in f.readlines()] 5 | 6 | setup( 7 | name="GamesAI", 8 | url="https://github.com/tboulet/AI-Agents-for-Games", 9 | author="Timothé Boulet", 10 | author_email="timothe.boulet0@gmail.com", 11 | 12 | packages=find_namespace_packages(), 13 | # Needed for dependencies 14 | install_requires=requirements[1:], 15 | dependency_links=requirements[:1], 16 | # package_data={"configs": "*.yaml"}, 17 | version="0.0.1", 18 | license="MIT", 19 | description="GamesAI is a library of AI agents for games.", 20 | long_description=open('README.md').read(), 21 | ) -------------------------------------------------------------------------------- /GamesAI/div/GameContent.py: -------------------------------------------------------------------------------- 1 | from abc import ABC, abstractmethod 2 | from typing import Union, TypeVar 3 | 4 | class State(ABC): 5 | """The class for defining a STATE of a GAME problem.""" 6 | @abstractmethod 7 | def __str__(self) -> str: 8 | pass 9 | 10 | @abstractmethod 11 | def __hash__(self) -> int: 12 | pass 13 | 14 | @abstractmethod 15 | def __eq__(self, other) -> bool: 16 | pass 17 | 18 | class Percept(ABC): 19 | @abstractmethod 20 | def __str__(self) -> str: 21 | pass 22 | 23 | @abstractmethod 24 | def __hash__(self) -> int: 25 | pass 26 | 27 | @abstractmethod 28 | def __eq__(self, other) -> bool: 29 | pass 30 | 31 | 32 | class GameType: pass 33 | class Action: pass 34 | -------------------------------------------------------------------------------- /example/evaluate_agent.py: -------------------------------------------------------------------------------- 1 | import context 2 | from GamesAI.Player import Player, RandomPlayer, HumanPlayer, Minimax, AlphaBeta, MinimaxPlus, MonteCarloTreeSearch 3 | from GamesAI.games.tictactoe import TicTacToeGame, TicTacToeRandomGame 4 | 5 | def h(state): 6 | return {"X" : 0, "O" : 0} 7 | 8 | agents = {"X" : RandomPlayer, 9 | "O" : (MinimaxPlus, {'max_depth': 4, 'heuristic': h})} 10 | # agents = {"X" : RandomPlayer, 11 | # "O" : (MonteCarloTreeSearch, {'n_rollouts': 100})} 12 | 13 | ut_mean = 0 14 | n_test = 20 15 | for _ in range(n_test): 16 | #Create the game object. 17 | game = TicTacToeRandomGame(agents) 18 | 19 | #Play a game. 20 | final_state = game.play_game(verbose=0) 21 | ut_mean += game.get_utilities(final_state)[game.players["O"]] / n_test 22 | print("Mean utility of player ", game.players["O"], "against random:", ut_mean) -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2022 Timothé Boulet 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /GamesAI/div/utils.py: -------------------------------------------------------------------------------- 1 | from enum import Enum 2 | from typing import Iterable, Callable 3 | 4 | class Constant(Enum): 5 | DRAW = "DRAW" 6 | RANDOM = "RANDOM" 7 | 8 | def argmax(indexes : Iterable, func : Callable, return_value : bool = False) -> object: 9 | """Return the object in values with the highest function. 10 | indexes : an iterable of indexes to consider. 11 | func : a function taking an index as argument and returning a value.""" 12 | if len(indexes) == 0: 13 | raise ValueError("No element in indexes") 14 | max_value = float("-inf") 15 | for idx in indexes: 16 | value = func(idx) 17 | if value > max_value: 18 | max_value = value 19 | max_idx = idx 20 | if return_value: return max_idx, max_value 21 | return max_idx 22 | 23 | def argmin(indexes : Iterable, func : Callable, return_value : bool = False) -> object: 24 | """Return the object in values with the highest function. 25 | indexes : an iterable of indexes to consider. 26 | func : a function taking an index as argument and returning a value.""" 27 | if not return_value: 28 | return argmax(indexes, lambda idx : -func(idx)) 29 | else: 30 | idx_min, minus_value_min = argmax(indexes, lambda idx : -func(idx), return_value = True) 31 | return idx_min, -minus_value_min -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | example/ 2 | 3 | # Byte-compiled / optimized / DLL files 4 | __pycache__/ 5 | *.py[cod] 6 | *$py.class 7 | 8 | # C extensions 9 | *.so 10 | 11 | # Distribution / packaging 12 | .Python 13 | build/ 14 | develop-eggs/ 15 | dist/ 16 | downloads/ 17 | eggs/ 18 | .eggs/ 19 | lib/ 20 | lib64/ 21 | parts/ 22 | sdist/ 23 | var/ 24 | wheels/ 25 | pip-wheel-metadata/ 26 | share/python-wheels/ 27 | *.egg-info/ 28 | .installed.cfg 29 | *.egg 30 | MANIFEST 31 | 32 | # PyInstaller 33 | # Usually these files are written by a python script from a template 34 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 35 | *.manifest 36 | *.spec 37 | 38 | # Installer logs 39 | pip-log.txt 40 | pip-delete-this-directory.txt 41 | 42 | # Unit test / coverage reports 43 | htmlcov/ 44 | .tox/ 45 | .nox/ 46 | .coverage 47 | .coverage.* 48 | .cache 49 | nosetests.xml 50 | coverage.xml 51 | *.cover 52 | *.py,cover 53 | .hypothesis/ 54 | .pytest_cache/ 55 | 56 | # Translations 57 | *.mo 58 | *.pot 59 | 60 | # Django stuff: 61 | *.log 62 | local_settings.py 63 | db.sqlite3 64 | db.sqlite3-journal 65 | 66 | # Flask stuff: 67 | instance/ 68 | .webassets-cache 69 | 70 | # Scrapy stuff: 71 | .scrapy 72 | 73 | # Sphinx documentation 74 | docs/_build/ 75 | 76 | # PyBuilder 77 | target/ 78 | 79 | # Jupyter Notebook 80 | .ipynb_checkpoints 81 | 82 | # IPython 83 | profile_default/ 84 | ipython_config.py 85 | 86 | # pyenv 87 | .python-version 88 | 89 | # pipenv 90 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 91 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 92 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 93 | # install all needed dependencies. 94 | #Pipfile.lock 95 | 96 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 97 | __pypackages__/ 98 | 99 | # Celery stuff 100 | celerybeat-schedule 101 | celerybeat.pid 102 | 103 | # SageMath parsed files 104 | *.sage.py 105 | 106 | # Environments 107 | .env 108 | .venv 109 | env/ 110 | venv/ 111 | ENV/ 112 | env.bak/ 113 | venv.bak/ 114 | 115 | # Spyder project settings 116 | .spyderproject 117 | .spyproject 118 | 119 | # Rope project settings 120 | .ropeproject 121 | 122 | # mkdocs documentation 123 | /site 124 | 125 | # mypy 126 | .mypy_cache/ 127 | .dmypy.json 128 | dmypy.json 129 | 130 | # Pyre type checker 131 | .pyre/ 132 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # GamesAI 2 | An implementation in python of some game agents such as AlphaBeta or MCTS, that can be applied to any n-player non deterministic game object that implements the game interface. 3 | 4 | ## Installation : 5 | Install the package using : 6 | 7 | pip install git+https://github.com/tboulet/AI-Agents-for-Games 8 | 9 | ## Using the package : 10 | Import games and players (agents) for performing on it. 11 | The agents dictionnary given to a game object must be a dictionnary with the name being the names of the roles and values being either the class or a tuple class/kwargs for the agent. 12 | ```python 13 | from GamesAI.Player import RandomPlayer, HumanPlayer, AlphaBeta 14 | from GamesAI.games.tictactoe import TicTacToeGame 15 | 16 | agents = {'X' : HumanPlayer, 17 | 'O' : (MonteCarloTreeSearch, {'n_rollouts': 200}), 18 | } 19 | game = TicTacToeGame(agents = agents) 20 | game.play_game() 21 | ``` 22 | 23 | ## Players/Game agents 24 | Some AI agents that can solve games that implements the Game interface can be found in GamesAI.Player. 25 | 26 | ```python 27 | from GamesAI.Player import RandomPlayer, HumanPlayer, Minimax, MinimaxPlus, AlphaBeta, MonteCarloTreeSearch 28 | ``` 29 | 30 | You can also create your own player classes by inheriting the Player class or NonDeterministicPlayer if your class can deal with non-deterministic games. 31 | 32 | It should have an agent_name as static attribute and implements the method get_action(state): 33 | ```python 34 | from GamesAI.Player import Player, NonDeterministicPlayer 35 | 36 | class RandomPlayer(NonDeterministicPlayer): 37 | """A player that plays randomly.""" 38 | agent_name = "RandomPlayer" 39 | 40 | def __init__(self, game : object, game_name: str, agent_name: str) -> None: 41 | super().__init__(game, game_name, agent_name) 42 | 43 | def get_action(self, state: State) -> object: 44 | """Return a random available action.""" 45 | return random.choice(self.game.get_actions(state)) 46 | ``` 47 | ## Creating a game 48 | For now only tic-tac-toe and a random version of it (where a box is randomly erased after each O's turn) are implemented in GamesAI.games, but you can create your own game object by inheriting the Game class. 49 | 50 | ```python 51 | from GamesAI.Game import Game, NonDeterministicGame, State 52 | 53 | class YourState(State): 54 | pass 55 | 56 | class YourGame(Game): 57 | names = {"Blue", "Red", "Yellow"} 58 | pass 59 | ``` 60 | 61 | Your game object will use a subclass of State class that must implements the __hash__, __eq__ and __str__ methods. A state define the complete information of the game at a certain instant. 62 | 63 | A subclass of Game should have a set of names as static attribute and implement the following methods : 64 | 65 | - get_start_state() : return the initial state of the game 66 | - get_player_playing(state) : return the player playing at the given state 67 | - get_actions(state) : return the list of actions available at the given state 68 | - get_result(state, action) : return the state reached by the given action in the given state 69 | - is_terminal_state(state) : return True if the given state is a terminal state 70 | - get_utilites(state) : return the utilities of the players at the given state 71 | 72 | If you want to create a non deterministic game, where randomness is involved, you should inherit the NonDeterministicGame class and implements the get_random_action_distribution(state) method. 73 | 74 | -------------------------------------------------------------------------------- /GamesAI/games/tictactoe.py: -------------------------------------------------------------------------------- 1 | from typing import Union 2 | from GamesAI.Game import Game, Player, State, NonDeterministicGame 3 | 4 | class TicTacToeState(State): 5 | 6 | def __init__(self, player_playing : Player, board : list[str]) -> None: 7 | super().__init__() 8 | self.board = board 9 | self.player_playing = player_playing 10 | 11 | def __str__(self) -> str: 12 | res = "" 13 | for i in range(3): 14 | for j in range(3): 15 | elem = self.board[3*i + j] 16 | if elem == 0: 17 | res += '.' 18 | else: 19 | res += elem 20 | res += '\n' 21 | return res 22 | 23 | def __eq__(self, other) -> bool: 24 | return self.board == other.board 25 | 26 | def __hash__(self) -> int: 27 | return hash(tuple(self.board)) 28 | 29 | 30 | class TicTacToeGame(Game): 31 | names = {"X", "O"} 32 | 33 | def __init__(self, agents : dict[str, type]) -> None: 34 | super().__init__(agents) 35 | board = [0, 0, 0, 0, 0, 0, 0, 0, 0] 36 | self.initial_state = TicTacToeState(self.players["X"], board) 37 | 38 | def get_start_state(self) -> TicTacToeState: 39 | return self.initial_state 40 | 41 | def get_player_playing(self, state: TicTacToeState) -> Player: 42 | return state.player_playing 43 | 44 | def get_actions(self, state: TicTacToeState) -> list[object]: 45 | actions = [] 46 | for i in range(9): 47 | if state.board[i] == 0: 48 | actions.append(i) 49 | return actions 50 | 51 | def get_result(self, state: TicTacToeState, action: object) -> TicTacToeState: 52 | board = state.board.copy() 53 | board[action] = state.player_playing.game_name 54 | next_name = "X" if state.player_playing.game_name == "O" else "O" 55 | next_player = self.players[next_name] 56 | return TicTacToeState(next_player, board) 57 | 58 | def is_terminal_state(self, state: State) -> bool: 59 | return (self.get_actions(state) == []) or (self.get_winner(state) is not None) 60 | 61 | def get_utilities(self, state: dict[Player, float]) -> float: 62 | winner = self.get_winner(state) 63 | if winner is None: 64 | return {player : 0 for player in self.get_players().values()} 65 | return {player : 1 if player == winner else -1 for player in self.get_players().values()} 66 | 67 | 68 | # specific methods to TicTacToe 69 | def get_winner_string(self, state: TicTacToeState) -> Union[str, None]: 70 | """Returns the winner of the board, or None if no winner""" 71 | board = state.board 72 | # Check rows 73 | for i in range(3): 74 | if board[3*i] == board[3*i+1] == board[3*i+2] != 0: 75 | return board[3*i] 76 | # Check columns 77 | for i in range(3): 78 | if board[i] == board[i+3] == board[i+6] != 0: 79 | return board[i] 80 | # Check diagonals 81 | if board[0] == board[4] == board[8] != 0: 82 | return board[0] 83 | if board[2] == board[4] == board[6] != 0: 84 | return board[2] 85 | return None 86 | 87 | def get_winner(self, state: TicTacToeState) -> Union[Player, None]: 88 | winner_str = self.get_winner_string(state) 89 | if winner_str is None: return None 90 | return self.players[winner_str] 91 | 92 | 93 | 94 | class TicTacToeRandomGame(TicTacToeGame, NonDeterministicGame): 95 | """TicTacToe game where a random agent randomly reset one of the box.""" 96 | 97 | def __init__(self, agents: dict[str, type]) -> None: 98 | TicTacToeGame.__init__(self, agents) 99 | 100 | def get_actions(self, state: TicTacToeState) -> list[object]: 101 | if state.player_playing is not None: 102 | return TicTacToeGame.get_actions(self, state) 103 | else: 104 | return [k for k in range(9)] 105 | 106 | def get_result(self, state: TicTacToeState, action: object) -> TicTacToeState: 107 | player_playing = state.player_playing 108 | if player_playing is None: 109 | board = state.board.copy() 110 | board[action] = 0 111 | next_player = self.players['X'] 112 | return TicTacToeState(next_player, board) 113 | 114 | else: 115 | board = state.board.copy() 116 | board[action] = state.player_playing.game_name 117 | if player_playing.game_name == "X": 118 | next_player = self.players['O'] 119 | elif player_playing.game_name == "O": 120 | next_player = None 121 | else: 122 | raise Exception("Invalid player") 123 | return TicTacToeState(next_player, board) 124 | 125 | def get_random_action_distribution(self, state: TicTacToeState) -> dict[object, float]: 126 | if state.player_playing is None: 127 | return {action: 1/9 for action in [k for k in range(9)]} 128 | else: 129 | raise Exception("The state is not a random state.") -------------------------------------------------------------------------------- /GamesAI/algorithms/MCTS.py: -------------------------------------------------------------------------------- 1 | #Inspired by following MCTS implementation : https://gist.github.com/qpwo/c538c6f73727e254fdc7fab81024f6e1 2 | import random 3 | from collections import defaultdict 4 | import math 5 | from typing import Iterable 6 | 7 | from GamesAI.div.GameContent import State, GameType, Action 8 | from GamesAI.Player import Player 9 | from GamesAI.div.utils import argmax 10 | 11 | class Node: 12 | """ 13 | A representation of a single state. 14 | MCTS works by constructing a tree of these Nodes. 15 | """ 16 | def __init__(self, state : State, game : GameType, player : Player) -> None: 17 | self.state = state 18 | self.game = game 19 | self.player = player 20 | 21 | def find_children(self) -> Iterable["Node"]: 22 | "Return all possible successors of this node" 23 | childrens = list() 24 | for action in self.game.get_actions(self.state): 25 | state = self.game.get_result(self.state, action) 26 | childrens.append(Node(state = state, game = self.game, player = self.player)) 27 | return childrens 28 | 29 | def find_random_child(self) -> "Node": 30 | "Random successor of this board state (for more efficient simulation)" 31 | actions = self.game.get_actions(self.state) 32 | action = random.choice(actions) 33 | state = state = self.game.get_result(self.state, action) 34 | return Node(state = state, game = self.game, player = self.game.get_player_playing(state)) 35 | 36 | def is_terminal(self) -> bool: 37 | "Returns True if the node has no children" 38 | return self.game.is_terminal_state(self.state) 39 | 40 | def __hash__(self) -> int: 41 | "Nodes must be hashable" 42 | return hash(self.state) 43 | 44 | def __eq__(self, node2 : "Node") -> bool: 45 | "Nodes must be comparable" 46 | return self.state == node2.state 47 | 48 | 49 | 50 | class MonteCarloTreeSearch(Player): 51 | """Player that uses Monte-Carlo Tree Search method for evaluating node. 52 | Only works for 2 player games, deterministic, zero-sum game.""" 53 | agent_name = "MCTS" 54 | 55 | def __init__(self, game: GameType, game_name: str, agent_name: str, n_rollouts : int = 50) -> None: 56 | super().__init__(game, game_name, agent_name) 57 | self.n_rollouts = n_rollouts 58 | if len(game.names) != 2: 59 | raise ValueError("MCTS can only be used for 2 player games") 60 | self.Q = defaultdict(int) # total utility of each node 61 | self.N = defaultdict(int) # total visit count for each node 62 | self.children = dict() # children of each node 63 | self.exploration_weight = 1.4 # exploration weight, should scale with the typical variational utility. Utility variation of 1 <=> exploration_weight of 1.4. 64 | 65 | 66 | def get_action(self, state: State) -> Action: 67 | 68 | "Choose the best successor of node. (Choose a move in the game)" 69 | node = Node(state = state, game = self.game, player = self) 70 | if node.is_terminal(): 71 | raise RuntimeError(f"choose called on terminal node {node}") 72 | 73 | for _ in range(self.n_rollouts): 74 | self.do_rollout(node) 75 | 76 | if node not in self.children: 77 | return node.find_random_child() 78 | 79 | def score(action : Action): 80 | n = Node(state = self.game.get_result(state, action), game = self.game, player = self) 81 | if self.N[n] == 0: 82 | return float("-inf") # avoid unseen moves 83 | return self.Q[n] / self.N[n] # average utility 84 | 85 | return argmax(indexes = self.game.get_actions(state), func = score) 86 | 87 | def do_rollout(self, node : Node) -> None: 88 | "Make the tree one layer better. (Train for one iteration.)" 89 | path = self.select(node) 90 | leaf = path[-1] 91 | self.expand(leaf) 92 | utilities = self.simulate(leaf) 93 | self.backpropagate(path, utilities) 94 | 95 | def select(self, node): 96 | "Find an unexplored descendent of a node, return the path leading from node to this descendent." 97 | path = [] 98 | while True: 99 | path.append(node) 100 | if node not in self.children or not self.children[node]: 101 | # node is either unexplored or terminal 102 | return path 103 | unexplored = self.children[node] - self.children.keys() 104 | if unexplored: 105 | n = unexplored.pop() 106 | path.append(n) 107 | return path 108 | node = self.uct_select(node) # descend a layer deeper 109 | 110 | def expand(self, node : Node) -> None: 111 | "Update the `children` dict with the children of `node`" 112 | if node in self.children: 113 | return # already expanded 114 | self.children[node] = node.find_children() 115 | 116 | def simulate(self, node : Node) -> dict[Player, float]: 117 | "Returns the utilities for a random simulation of a node" 118 | while True: 119 | if node.is_terminal(): 120 | return self.game.get_utilities(node.state) 121 | node = node.find_random_child() 122 | 123 | def backpropagate(self, path : list[Node], utilities : dict[Player, float]) -> None: 124 | "Send the utilities back up to the ancestors of the leaf" 125 | for node in reversed(path): 126 | self.N[node] += 1 127 | self.Q[node] += utilities[node.player] 128 | 129 | def uct_select(self, node : Node) -> Node: 130 | "Select a child of node, balancing exploration & exploitation" 131 | 132 | # All children of node should already be expanded: 133 | assert all(n in self.children for n in self.children[node]) 134 | 135 | log_N_vertex = math.log(self.N[node]) 136 | 137 | def uct(n): 138 | "Upper confidence bound for trees" 139 | return self.Q[n] / self.N[n] + self.exploration_weight * math.sqrt( 140 | log_N_vertex / self.N[n] 141 | ) 142 | 143 | return max(self.children[node], key=uct) -------------------------------------------------------------------------------- /GamesAI/Game.py: -------------------------------------------------------------------------------- 1 | """Module for defining the Game object. 2 | 3 | Games should be define as subclasses of Game or NonDeterministicGame. They should implement abstract methods as defined in the Game abstract class. 4 | """ 5 | 6 | #Tool imports 7 | from abc import ABC, abstractmethod 8 | from time import sleep 9 | from typing import Union 10 | import random 11 | #Game solving module imports 12 | from GamesAI.div.utils import Constant 13 | from GamesAI.div.GameContent import State, Percept, Action 14 | from GamesAI.Player import Player, NonDeterministicPlayer, NonFullyObservablePlayer 15 | 16 | 17 | class Game(ABC): 18 | """The class for defining a GAME problem. 19 | Standard games are deterministic, observable, turn-taking. 20 | """ 21 | 22 | def __init__(self, agents : dict[str, Union[type, tuple[type, dict]]]) -> None: 23 | """Creation of a GAME object. 24 | agents is a dictionnary with game_name as keys and a class of player as values. 25 | If the player class initializer requires arguments, the value can instead be a tuple (PlayerClass, kwargs). 26 | 27 | A subclass of Game should implement the following methods : 28 | - get_start_state() : return the initial state of the game 29 | - get_player_playing(state) : return the player playing at the given state 30 | - get_actions(state) : return the list of actions available at the given state 31 | - get_result(state, action) : return the state reached by the given action in the given state 32 | - is_terminal_state(state) : return True if the given state is a terminal state 33 | - get_utilites(state) : return the utilities of the players at the given state 34 | """ 35 | 36 | if not hasattr(self, 'names'): raise Exception("Game class must define class attribute .names") 37 | if self.names != set(agents.keys()): raise Exception("Game class names does not match agents names (agents keys)") 38 | 39 | self.players = dict() 40 | for game_name, PlayerClass_or_tuple in agents.items(): 41 | if isinstance(PlayerClass_or_tuple, type): 42 | self.players[game_name] = PlayerClass_or_tuple(game = self, 43 | game_name = game_name, 44 | agent_name = PlayerClass_or_tuple.agent_name) 45 | elif isinstance(PlayerClass_or_tuple, tuple): 46 | PlayerClass, kwargs = PlayerClass_or_tuple 47 | self.players[game_name] = PlayerClass(game = self, 48 | game_name = game_name, 49 | agent_name = PlayerClass.agent_name, 50 | **kwargs) 51 | else: 52 | raise Exception("Game class must define agents as dict[str, type] or dict[str, tuple[type, dict]]") 53 | 54 | @abstractmethod 55 | def get_start_state(self) -> State: 56 | """Return the initial state of the game""" 57 | pass 58 | 59 | @abstractmethod 60 | def get_player_playing(self, state : State) -> Union[Player, None]: 61 | """Return the player playing in the given state. Return None if no player should play, ie if randomness plays.""" 62 | pass 63 | 64 | @abstractmethod 65 | def get_actions(self, state : State) -> list[Action]: 66 | """Return the list of actions available in the given state for the player playing in the state""" 67 | pass 68 | 69 | @abstractmethod 70 | def get_result(self, state : State, action : Action) -> State: 71 | """Return the state reached by the game after having played the given action in the given state""" 72 | pass 73 | 74 | @abstractmethod 75 | def is_terminal_state(self, state : State) -> bool: 76 | """Return True if the given state is a terminal state, False otherwise""" 77 | pass 78 | 79 | @abstractmethod 80 | def get_utilities(self, state : State) -> dict[Player, float]: 81 | """Return the utilities of each player.""" 82 | pass 83 | 84 | #Permanent methods 85 | def get_players(self) -> dict[str, Player]: 86 | """Return the players of the game with their game names as keys""" 87 | return self.players 88 | 89 | def get_names(self) -> set: 90 | """Return the set of the game names""" 91 | if not hasattr(self, 'names'): raise Exception("Game class must define class attribute .names") 92 | return self.names 93 | 94 | def play_game(self, verbose : int, wait_time : float = 0) -> State: 95 | """Play the game until the end, print the information, return the final state. 96 | verbose = 0 : no print 97 | verbose = 1 : print game result (utilities for each player) 98 | verbose = 2 : print game result and state at each step 99 | """ 100 | state = self.get_start_state() 101 | if verbose >= 1: print("Starting game ...") 102 | while True: 103 | sleep(wait_time) 104 | if verbose >= 2: 105 | print(state) 106 | if self.is_terminal_state(state): 107 | if verbose >= 1: 108 | print("\tEnd of game, utilities of players :") 109 | for player in self.players.values(): 110 | print(player, ": ", self.get_utilities(state)[player]) 111 | return state 112 | player = self.get_player_playing(state) 113 | if player is None: 114 | distribution = self.get_random_action_distribution(state) 115 | action = random.choices(list(distribution.keys()), weights = list(distribution.values()))[0] 116 | if verbose >= 2: 117 | print(f"Random action : {action}") 118 | else: 119 | action = player.get_action(state) 120 | if verbose >= 2: 121 | print(f"{player} action : {action}") 122 | state = self.get_result(state, action) 123 | 124 | 125 | class NonDeterministicGame(Game): 126 | """Non deterministic game, where randomness happens at some node. 127 | 128 | Subclasses should implement the methods of Game as well as the get_random_action_distribution method. 129 | - get_random_action_distribution(state) : return the distribution of actions available at the given state for the player playing at the state. 130 | """ 131 | def __init__(self, agents: dict[str, Union[type, tuple[type, dict]]]) -> None: 132 | for player_class in agents.values(): 133 | if isinstance(player_class, tuple): 134 | player_class = player_class[0] 135 | if not issubclass(player_class, NonDeterministicPlayer): 136 | raise Exception(f"Non deterministic game must have only NonDeterministicPlayer players (player inheriting NonDeterministicPlayer class) but {player_class.agent_name} is not.") 137 | super().__init__(agents) 138 | 139 | @abstractmethod 140 | def get_random_action_distribution(self, state : State) -> dict[Action, float]: 141 | """Return the action distribution for the actions available in the given random state""" 142 | if self.get_player_playing is not None: 143 | raise Exception("The state is not a random state.") 144 | actions = self.get_actions(state) 145 | if len(actions) == 0: 146 | raise Exception("The state has no action available.") 147 | return {action : 1 / len(actions) for action in actions} 148 | 149 | 150 | 151 | 152 | class NonFullyObservableGame(Game): 153 | """Non fully observable game are games where each agent does not have access to the complete state but rather only some information called a percept. 154 | 155 | Sub classes should implement the methods of Game as well as the get_percept method. 156 | - get_percept_method(state, player) : return the percept of the given state for a certain player 157 | """ 158 | def __init__(self, agents: dict[str, Union[type, tuple[type, dict]]]) -> None: 159 | for player_class in agents.values(): 160 | if isinstance(player_class, tuple): 161 | player_class = player_class[0] 162 | if not issubclass(player_class, NonFullyObservablePlayer): 163 | raise Exception(f"Non fully observable game must have only NonFullyObservablePlayer players (player inheriting NonFullyObservablePlayer class) but {player_class.agent_name} is not.") 164 | super().__init__(agents) 165 | 166 | @abstractmethod 167 | def get_percept(self, state : State, player : NonFullyObservablePlayer) -> Percept: 168 | """Return the percept of the given state for a certain player""" 169 | 170 | def play_game(self, verbose : int, wait_time : float = 0) -> State: 171 | """Play the game until the end, print the information, return the final state. 172 | verbose = 0 : no print 173 | verbose = 1 : print game result (utilities for each player) 174 | verbose = 2 : print game result and state at each step 175 | """ 176 | state = self.get_start_state() 177 | if verbose >= 1: print("Starting game ...") 178 | while True: 179 | sleep(wait_time) 180 | player = self.get_player_playing(state) 181 | percept = self.get_percept(state, player) 182 | if verbose >= 2: 183 | print("State: ", state) 184 | print("Percept: ", percept) 185 | if self.is_terminal_state(state): 186 | if verbose >= 1: 187 | print("\tEnd of game, utilities of players :") 188 | for player in self.players.values(): 189 | print(player, ": ", self.get_utilities(state)[player]) 190 | return state 191 | 192 | if player is None: 193 | distribution = self.get_random_action_distribution(state) 194 | action = random.choices(list(distribution.keys()), weights = list(distribution.values()))[0] 195 | if verbose >= 2: 196 | print(f"Random action : {action}") 197 | else: 198 | distribution = player.get_action_distribution(percept) 199 | action = random.choices(list(distribution.keys()), weights = list(distribution.values()))[0] 200 | if verbose >= 2: 201 | print(f"{player} action : {action}") 202 | state = self.get_result(state, action) -------------------------------------------------------------------------------- /GamesAI/Player.py: -------------------------------------------------------------------------------- 1 | from abc import ABC, abstractmethod 2 | import random 3 | from time import time 4 | from typing import Callable, Union 5 | 6 | from GamesAI.div.GameContent import State, Percept, GameType, Action 7 | from GamesAI.div.utils import argmin, argmax 8 | 9 | 10 | #Basic algorithm for games. 11 | class Player(ABC): 12 | """The class for defining a PLAYER in a GAME. A player has a game_name inside the game and an agent_name that explains its strategy. 13 | It is defined by its get_action method. 14 | Two players are considered equal (inside a game) if their game name is equal.""" 15 | agent_name = "BasicPlayer" 16 | 17 | def __init__(self, game : GameType, game_name : str, agent_name : str) -> None: 18 | """A player instance. 19 | 20 | Args: 21 | game (GameType): the Game object in which the player will play 22 | game_name (str): the name of the player in the game eg "X", "Red", "Player1" etc 23 | agent_name (str): the name of the agent (AI) that is used eg Minimax, AlphaBeta etc 24 | """ 25 | self.game = game 26 | self.agent_name = agent_name 27 | self.game_name = game_name 28 | 29 | def __repr__(self) -> str: 30 | return f"[Player '{self.game_name}' ({self.agent_name})]" 31 | 32 | def __hash__(self) -> int: 33 | return hash(self.game_name) 34 | 35 | def __eq__(self, other) -> bool: 36 | if other is None: 37 | return False 38 | return self.game_name == other.game_name 39 | 40 | @abstractmethod 41 | def get_action(self, state: State) -> Action: 42 | """Return the action to be played in the given state""" 43 | pass 44 | 45 | 46 | class NonDeterministicPlayer(Player): 47 | """A player that can play in non deterministic games.""" 48 | agent_name = "NonDeterministicPlayer" 49 | 50 | class NonFullyObservablePlayer(Player): 51 | """A player that can play in non fully observable games.""" 52 | agent_name = "NonFullyObservablePlayer" 53 | 54 | class RandomPlayer(NonDeterministicPlayer, NonFullyObservablePlayer): 55 | """A player that plays randomly.""" 56 | agent_name = "RandomPlayer" 57 | 58 | def __init__(self, game : GameType, game_name: str, agent_name: str) -> None: 59 | super().__init__(game, game_name, agent_name) 60 | 61 | def get_action(self, state: State) -> Action: 62 | """Return a random available action.""" 63 | return random.choice(self.game.get_actions(state)) 64 | 65 | 66 | class HumanPlayer(NonDeterministicPlayer, NonFullyObservablePlayer): 67 | """A player asking for input for actions to take. Adapted to int and str actions.""" 68 | agent_name = "Human" 69 | 70 | def __init__(self, game: GameType, game_name: str, agent_name: str) -> None: 71 | super().__init__(game, game_name, agent_name) 72 | 73 | def get_action(self, state: State) -> Action: 74 | while True: 75 | actions = self.game.get_actions(state) 76 | print(f"\t{self.game_name}'s actions : {actions}") 77 | action = input("Enter action: ") 78 | if action in actions: 79 | return action 80 | elif action == '': 81 | continue 82 | elif action.isdigit() and int(action) in actions: 83 | return int(action) 84 | else: 85 | print("Invalid action") 86 | 87 | 88 | class Minimax(Player): 89 | """A player that uses the minimax algorithm to choose its actions. Only works for 2 player, zero sum games. 90 | For big state tree, the algorithm can't explore all tree and need to have a max_depth and a heuristic associated.""" 91 | agent_name = "Minimax" 92 | 93 | def __init__(self, game: GameType, game_name: str, agent_name: str, max_depth: int = float("inf"), heuristic : Callable[[State], dict[str, float]] = None) -> None: 94 | super().__init__(game, game_name, agent_name) 95 | if (max_depth == float("inf")) != (heuristic is None): 96 | raise ValueError("Heuristic and max_depth are either inf/None (default) or non_inf/non_None") 97 | if len(game.names) != 2: 98 | raise ValueError("Minimax can only be used for 2 player games") 99 | self.max_depth = max_depth 100 | self.heuristic = heuristic 101 | 102 | def get_action(self, state: State) -> Action: 103 | """Return the action that maximize Max (the player) utility.""" 104 | return argmax(indexes = self.game.get_actions(state), func = lambda action: self.min_value(self.game.get_result(state, action), depth = 1)) 105 | 106 | 107 | def min_value(self, state : State, depth : int) -> float: 108 | """Return the minimum utility of the next states of the given state after the given action.""" 109 | if self.game.is_terminal_state(state): 110 | return self.game.get_utilities(state)[self] 111 | elif depth >= self.max_depth: 112 | return self.heuristic(state)[self.game_name] 113 | else: 114 | return min([self.max_value(self.game.get_result(state, action), depth = depth + 1) for action in self.game.get_actions(state)]) 115 | 116 | def max_value(self, state : State, depth : int) -> float: 117 | """Return the maximum utility of the next states of the given state after the given action.""" 118 | if self.game.is_terminal_state(state): 119 | return self.game.get_utilities(state)[self] 120 | elif depth >= self.max_depth: 121 | return self.heuristic(state)[self.game_name] 122 | else: 123 | return max([self.min_value(self.game.get_result(state, action), depth = depth + 1) for action in self.game.get_actions(state)]) 124 | 125 | 126 | 127 | class AlphaBeta(Player): 128 | """AlphaBeta provide the same solution as Minimax but compute faster by pruning branches that are useless to explore (according to the heuristic)""" 129 | agent_name = "AlphaBeta" 130 | 131 | def __init__(self, game: GameType, game_name: str, agent_name: str, max_depth: int = float("inf"), heuristic : Callable[[State], dict[str, float]] = None) -> None: 132 | super().__init__(game, game_name, agent_name) 133 | if (max_depth == float("inf")) != (heuristic is None): 134 | raise ValueError("Heuristic and max_depth are either inf/None or non_inf/non_None") 135 | if len(game.names) != 2: 136 | raise ValueError("AlphaBeta can only be used for 2 player games") 137 | self.max_depth = max_depth 138 | self.heuristic = heuristic 139 | 140 | def get_action(self, state: State) -> Action: 141 | """Return the action that maximize Max (the player) utility.""" 142 | return argmax(indexes = self.game.get_actions(state), func = lambda action: self.min_value(self.game.get_result(state, action), depth = 1, alpha = float("-inf"), beta = float("inf"))) 143 | 144 | def min_value(self, state : State, depth : int, alpha : float, beta : float) -> float: 145 | """Return the minimum utility of the next states of the given state after the given action.""" 146 | if self.game.is_terminal_state(state): 147 | return self.game.get_utilities(state)[self] 148 | elif depth >= self.max_depth: 149 | return self.heuristic(state) 150 | else: 151 | value = float('inf') 152 | for action in self.game.get_actions(state): 153 | successor_state = self.game.get_result(state, action) 154 | value = min(value, self.max_value(successor_state, depth = depth + 1, alpha = alpha, beta = beta)) 155 | if value <= alpha: 156 | return value 157 | beta = min(beta, value) 158 | return value 159 | 160 | def max_value(self, state : State, depth : int, alpha : float, beta : float) -> float: 161 | """Return the maximum utility of the next states of the given state after the given action.""" 162 | if self.game.is_terminal_state(state): 163 | return self.game.get_utilities(state)[self] 164 | elif depth >= self.max_depth: 165 | return self.heuristic(state) 166 | else: 167 | value = float('-inf') 168 | for action in self.game.get_actions(state): 169 | successor_state = self.game.get_result(state, action) 170 | value = max(value, self.min_value(successor_state, depth = depth + 1, alpha = alpha, beta = beta)) 171 | if value >= beta: 172 | return value 173 | alpha = max(alpha, value) 174 | return value 175 | 176 | 177 | 178 | class MinimaxPlus(NonDeterministicPlayer): 179 | """A generalization of Minimax to games wtih any number of players and with randomness.""" 180 | agent_name = "MinimaxPlus" 181 | 182 | def __init__(self, game: GameType, game_name: str, agent_name: str, max_depth: int = float("inf"), heuristic: Callable[[State], dict[str, float]] = None) -> None: 183 | if (max_depth == float("inf")) != (heuristic is None): 184 | raise ValueError("Heuristic and max_depth are either inf/None or non_inf/non_None") 185 | super().__init__(game, game_name, agent_name) 186 | self.max_depth = max_depth 187 | self.heuristic = heuristic 188 | 189 | def get_action(self, state: State) -> Action: 190 | """Return action that maximizes the expected utility.""" 191 | def func_to_optimize(action): 192 | next_state = self.game.get_result(state, action) 193 | return self.best_utilities_for_player(next_state, depth = 1)[self] 194 | return argmax(indexes = self.game.get_actions(state), 195 | func = func_to_optimize) 196 | 197 | def best_utilities_for_player(self, state : State, depth : int) -> dict[Player, float]: 198 | """Return the best predicted final utilities of a state according to a given player playing as Expectiminimax and assuming each other player plays as Expectiminimax.""" 199 | if self.game.is_terminal_state(state): 200 | return self.game.get_utilities(state) 201 | elif depth >= self.max_depth: 202 | return {self.game.get_players()[game_name] : utility for game_name, utility in self.heuristic(state).items()} 203 | player_playing = self.game.get_player_playing(state) 204 | 205 | if player_playing is None: 206 | # The state is a random state 207 | utilities = {player : 0 for player in self.game.get_players().values()} 208 | for action, prob in self.game.get_random_action_distribution(state).items(): 209 | next_state = self.game.get_result(state, action) 210 | next_utilities = self.best_utilities_for_player(next_state, depth = depth + 1) 211 | for player in utilities: 212 | utilities[player] += prob * next_utilities[player] 213 | return utilities 214 | 215 | else: 216 | #The state is a deterministic state, a Player has to play 217 | best_value = float("-inf") 218 | for action in self.game.get_actions(state): 219 | next_state = self.game.get_result(state, action) 220 | next_utilities = self.best_utilities_for_player(next_state, depth = depth + 1) 221 | value = next_utilities[player_playing] 222 | if value > best_value: 223 | best_value = value 224 | best_utilities = next_utilities 225 | return best_utilities 226 | 227 | 228 | 229 | #Algorithm from other modules that requires their own modules. 230 | from GamesAI.algorithms.MCTS import MonteCarloTreeSearch --------------------------------------------------------------------------------