├── requirements.txt
├── GamesAI
    ├── div
    │   ├── __init__.py
    │   ├── GameContent.py
    │   └── utils.py
    ├── games
    │   ├── __init__.py
    │   ├── connect4.py
    │   └── tictactoe.py
    ├── algorithms
    │   ├── __init__.py
    │   └── MCTS.py
    ├── __init__.py
    ├── Game.py
    └── Player.py
├── example
    ├── context.py
    ├── main.py
    └── evaluate_agent.py
├── setup.py
├── LICENSE
├── .gitignore
└── README.md


/requirements.txt:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/GamesAI/div/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/GamesAI/games/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/GamesAI/algorithms/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/GamesAI/games/connect4.py:
--------------------------------------------------------------------------------
1 | from typing import Union
2 | 


--------------------------------------------------------------------------------
/GamesAI/__init__.py:
--------------------------------------------------------------------------------
1 | from GamesAI import Game, Player, games
2 | from GamesAI.div import GameContent


--------------------------------------------------------------------------------
/example/context.py:
--------------------------------------------------------------------------------
1 | import sys
2 | import os
3 | sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
4 | 
5 | import GamesAI


--------------------------------------------------------------------------------
/example/main.py:
--------------------------------------------------------------------------------
 1 | import context
 2 | from GamesAI.Player import Player, RandomPlayer, HumanPlayer, Minimax, AlphaBeta, MinimaxPlus, MonteCarloTreeSearch
 3 | from GamesAI.games.tictactoe import TicTacToeGame, TicTacToeRandomGame
 4 | 
 5 | #Define agents ie dictionnary with key being game name and value being either agent class or tuple of agent class and kwargs for initializing the class
 6 | def h(state):
 7 |     return {"X" : 0, "O" : 0}
 8 | agents = {"X" : HumanPlayer, "O" : (MinimaxPlus, {'max_depth': 4, 'heuristic': h})}
 9 | # agents = {"O" : RandomPlayer, "X" : (MonteCarloTreeSearch, {'n_rollouts': 300})}
10 | 
11 | #Create the game object.
12 | game = TicTacToeRandomGame(agents) 
13 | 
14 | #Play a game.
15 | game.play_game(verbose=2)


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | from setuptools import setup, find_namespace_packages
 2 | 
 3 | with open("requirements.txt", "r") as f:
 4 |     requirements = [package.replace("\n", "") for package in f.readlines()]
 5 | 
 6 | setup(
 7 |     name="GamesAI",
 8 |     url="https://github.com/tboulet/AI-Agents-for-Games",
 9 |     author="Timothé Boulet",
10 |     author_email="timothe.boulet0@gmail.com",
11 |     
12 |     packages=find_namespace_packages(),
13 |     # Needed for dependencies
14 |     install_requires=requirements[1:],
15 |     dependency_links=requirements[:1],
16 |         # package_data={"configs": "*.yaml"},
17 |     version="0.0.1",
18 |     license="MIT",
19 |     description="GamesAI is a library of AI agents for games.",
20 |     long_description=open('README.md').read(),
21 | )


--------------------------------------------------------------------------------
/GamesAI/div/GameContent.py:
--------------------------------------------------------------------------------
 1 | from abc import ABC, abstractmethod
 2 | from typing import Union, TypeVar
 3 | 
 4 | class State(ABC):
 5 |     """The class for defining a STATE of a GAME problem."""
 6 |     @abstractmethod
 7 |     def __str__(self) -> str:
 8 |         pass
 9 |     
10 |     @abstractmethod
11 |     def __hash__(self) -> int:
12 |         pass
13 |     
14 |     @abstractmethod
15 |     def __eq__(self, other) -> bool:
16 |         pass
17 | 
18 | class Percept(ABC):
19 |     @abstractmethod
20 |     def __str__(self) -> str:
21 |         pass
22 |     
23 |     @abstractmethod
24 |     def __hash__(self) -> int:
25 |         pass
26 |     
27 |     @abstractmethod
28 |     def __eq__(self, other) -> bool:
29 |         pass
30 | 
31 | 
32 | class GameType: pass
33 | class Action: pass
34 | 


--------------------------------------------------------------------------------
/example/evaluate_agent.py:
--------------------------------------------------------------------------------
 1 | import context
 2 | from GamesAI.Player import Player, RandomPlayer, HumanPlayer, Minimax, AlphaBeta, MinimaxPlus, MonteCarloTreeSearch
 3 | from GamesAI.games.tictactoe import TicTacToeGame, TicTacToeRandomGame
 4 | 
 5 | def h(state):
 6 |     return {"X" : 0, "O" : 0}
 7 | 
 8 | agents = {"X" : RandomPlayer, 
 9 |           "O" : (MinimaxPlus, {'max_depth': 4, 'heuristic': h})}
10 | # agents = {"X" : RandomPlayer, 
11 | #           "O" : (MonteCarloTreeSearch, {'n_rollouts': 100})}
12 | 
13 | ut_mean = 0
14 | n_test = 20
15 | for _ in range(n_test):
16 |     #Create the game object.
17 |     game = TicTacToeRandomGame(agents) 
18 | 
19 |     #Play a game.
20 |     final_state = game.play_game(verbose=0)
21 |     ut_mean += game.get_utilities(final_state)[game.players["O"]] / n_test
22 | print("Mean utility of player ", game.players["O"],  "against random:", ut_mean)


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2022 Timothé Boulet
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/GamesAI/div/utils.py:
--------------------------------------------------------------------------------
 1 | from enum import Enum
 2 | from typing import Iterable, Callable
 3 | 
 4 | class Constant(Enum):
 5 |     DRAW = "DRAW"
 6 |     RANDOM = "RANDOM"
 7 |     
 8 | def argmax(indexes : Iterable, func : Callable, return_value : bool = False) -> object:
 9 |     """Return the object in values with the highest function.
10 |     indexes : an iterable of indexes to consider.
11 |     func : a function taking an index as argument and returning a value."""
12 |     if len(indexes) == 0:
13 |         raise ValueError("No element in indexes")
14 |     max_value = float("-inf")
15 |     for idx in indexes:
16 |         value = func(idx)
17 |         if value > max_value:
18 |             max_value = value
19 |             max_idx = idx
20 |     if return_value: return max_idx, max_value
21 |     return max_idx
22 | 
23 | def argmin(indexes : Iterable, func : Callable, return_value : bool = False) -> object:
24 |     """Return the object in values with the highest function.
25 |     indexes : an iterable of indexes to consider.
26 |     func : a function taking an index as argument and returning a value."""
27 |     if not return_value:
28 |         return argmax(indexes, lambda idx : -func(idx))
29 |     else:
30 |         idx_min, minus_value_min = argmax(indexes, lambda idx : -func(idx), return_value = True)
31 |         return idx_min, -minus_value_min


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | example/
  2 | 
  3 | # Byte-compiled / optimized / DLL files
  4 | __pycache__/
  5 | *.py[cod]
  6 | *$py.class
  7 | 
  8 | # C extensions
  9 | *.so
 10 | 
 11 | # Distribution / packaging
 12 | .Python
 13 | build/
 14 | develop-eggs/
 15 | dist/
 16 | downloads/
 17 | eggs/
 18 | .eggs/
 19 | lib/
 20 | lib64/
 21 | parts/
 22 | sdist/
 23 | var/
 24 | wheels/
 25 | pip-wheel-metadata/
 26 | share/python-wheels/
 27 | *.egg-info/
 28 | .installed.cfg
 29 | *.egg
 30 | MANIFEST
 31 | 
 32 | # PyInstaller
 33 | #  Usually these files are written by a python script from a template
 34 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 35 | *.manifest
 36 | *.spec
 37 | 
 38 | # Installer logs
 39 | pip-log.txt
 40 | pip-delete-this-directory.txt
 41 | 
 42 | # Unit test / coverage reports
 43 | htmlcov/
 44 | .tox/
 45 | .nox/
 46 | .coverage
 47 | .coverage.*
 48 | .cache
 49 | nosetests.xml
 50 | coverage.xml
 51 | *.cover
 52 | *.py,cover
 53 | .hypothesis/
 54 | .pytest_cache/
 55 | 
 56 | # Translations
 57 | *.mo
 58 | *.pot
 59 | 
 60 | # Django stuff:
 61 | *.log
 62 | local_settings.py
 63 | db.sqlite3
 64 | db.sqlite3-journal
 65 | 
 66 | # Flask stuff:
 67 | instance/
 68 | .webassets-cache
 69 | 
 70 | # Scrapy stuff:
 71 | .scrapy
 72 | 
 73 | # Sphinx documentation
 74 | docs/_build/
 75 | 
 76 | # PyBuilder
 77 | target/
 78 | 
 79 | # Jupyter Notebook
 80 | .ipynb_checkpoints
 81 | 
 82 | # IPython
 83 | profile_default/
 84 | ipython_config.py
 85 | 
 86 | # pyenv
 87 | .python-version
 88 | 
 89 | # pipenv
 90 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 91 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 92 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 93 | #   install all needed dependencies.
 94 | #Pipfile.lock
 95 | 
 96 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
 97 | __pypackages__/
 98 | 
 99 | # Celery stuff
100 | celerybeat-schedule
101 | celerybeat.pid
102 | 
103 | # SageMath parsed files
104 | *.sage.py
105 | 
106 | # Environments
107 | .env
108 | .venv
109 | env/
110 | venv/
111 | ENV/
112 | env.bak/
113 | venv.bak/
114 | 
115 | # Spyder project settings
116 | .spyderproject
117 | .spyproject
118 | 
119 | # Rope project settings
120 | .ropeproject
121 | 
122 | # mkdocs documentation
123 | /site
124 | 
125 | # mypy
126 | .mypy_cache/
127 | .dmypy.json
128 | dmypy.json
129 | 
130 | # Pyre type checker
131 | .pyre/
132 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # GamesAI
 2 | An implementation in python of some game agents such as AlphaBeta or MCTS, that can be applied to any n-player non deterministic game object that implements the game interface.
 3 | 
 4 | ## Installation :
 5 | Install the package using :
 6 | 
 7 |     pip install git+https://github.com/tboulet/AI-Agents-for-Games
 8 |     
 9 | ## Using the package :
10 | Import games and players (agents) for performing on it.
11 | The agents dictionnary given to a game object must be a dictionnary with the name being the names of the roles and values being either the class or a tuple class/kwargs for the agent.
12 | ```python
13 | from GamesAI.Player import RandomPlayer, HumanPlayer, AlphaBeta
14 | from GamesAI.games.tictactoe import TicTacToeGame
15 | 
16 | agents = {'X' : HumanPlayer,
17 |           'O' : (MonteCarloTreeSearch, {'n_rollouts': 200}),
18 |           }
19 | game = TicTacToeGame(agents = agents)
20 | game.play_game()
21 | ```
22 | 
23 | ## Players/Game agents
24 | Some AI agents that can solve games that implements the Game interface can be found in GamesAI.Player.
25 | 
26 | ```python
27 | from GamesAI.Player import RandomPlayer, HumanPlayer, Minimax, MinimaxPlus, AlphaBeta, MonteCarloTreeSearch
28 | ```
29 | 
30 | You can also create your own player classes by inheriting the Player class or NonDeterministicPlayer if your class can deal with non-deterministic games.
31 | 
32 | It should have an agent_name as static attribute and implements the method get_action(state):
33 | ```python
34 | from GamesAI.Player import Player, NonDeterministicPlayer
35 | 
36 | class RandomPlayer(NonDeterministicPlayer):
37 |     """A player that plays randomly."""
38 |     agent_name = "RandomPlayer"
39 |     
40 |     def __init__(self, game : object, game_name: str, agent_name: str) -> None:
41 |         super().__init__(game, game_name, agent_name)
42 |         
43 |     def get_action(self, state: State) -> object:
44 |         """Return a random available action."""
45 |         return random.choice(self.game.get_actions(state))
46 | ```
47 | ## Creating a game
48 | For now only tic-tac-toe and a random version of it (where a box is randomly erased after each O's turn) are implemented in GamesAI.games, but you can create your own game object by inheriting the Game class.
49 | 
50 | ```python
51 | from GamesAI.Game import Game, NonDeterministicGame, State
52 | 
53 | class YourState(State):
54 |     pass
55 | 
56 | class YourGame(Game):
57 |     names = {"Blue", "Red", "Yellow"}
58 |     pass
59 | ```
60 | 
61 | Your game object will use a subclass of State class that must implements the __hash__, __eq__ and __str__ methods. A state define the complete information of the game at a certain instant.
62 | 
63 | A subclass of Game should have a set of names as static attribute and implement the following methods :
64 |     
65 |     - get_start_state() : return the initial state of the game
66 |     - get_player_playing(state) : return the player playing at the given state
67 |     - get_actions(state) : return the list of actions available at the given state
68 |     - get_result(state, action) : return the state reached by the given action in the given state
69 |     - is_terminal_state(state) : return True if the given state is a terminal state
70 |     - get_utilites(state) : return the utilities of the players at the given state
71 | 
72 | If you want to create a non deterministic game, where randomness is involved, you should inherit the NonDeterministicGame class and implements the get_random_action_distribution(state) method.
73 | 
74 | 


--------------------------------------------------------------------------------
/GamesAI/games/tictactoe.py:
--------------------------------------------------------------------------------
  1 | from typing import Union
  2 | from GamesAI.Game import Game, Player, State, NonDeterministicGame
  3 | 
  4 | class TicTacToeState(State):
  5 |     
  6 |     def __init__(self, player_playing : Player, board : list[str]) -> None:
  7 |         super().__init__()
  8 |         self.board = board
  9 |         self.player_playing = player_playing
 10 | 
 11 |     def __str__(self) -> str:
 12 |         res = ""
 13 |         for i in range(3):
 14 |             for j in range(3):
 15 |                 elem = self.board[3*i + j]
 16 |                 if elem == 0:
 17 |                     res += '.'
 18 |                 else:
 19 |                     res += elem
 20 |             res += '\n'
 21 |         return res
 22 |     
 23 |     def __eq__(self, other) -> bool:
 24 |         return self.board == other.board
 25 |         
 26 |     def __hash__(self) -> int:
 27 |         return hash(tuple(self.board))
 28 | 
 29 | 
 30 | class TicTacToeGame(Game):
 31 |     names = {"X", "O"}
 32 |     
 33 |     def __init__(self, agents : dict[str, type]) -> None:
 34 |         super().__init__(agents)
 35 |         board = [0, 0, 0, 0, 0, 0, 0, 0, 0]
 36 |         self.initial_state = TicTacToeState(self.players["X"], board)
 37 |         
 38 |     def get_start_state(self) -> TicTacToeState:
 39 |         return self.initial_state
 40 |     
 41 |     def get_player_playing(self, state: TicTacToeState) -> Player:
 42 |         return state.player_playing
 43 |     
 44 |     def get_actions(self, state: TicTacToeState) -> list[object]:
 45 |         actions = []
 46 |         for i in range(9):
 47 |             if state.board[i] == 0:
 48 |                 actions.append(i)
 49 |         return actions
 50 |     
 51 |     def get_result(self, state: TicTacToeState, action: object) -> TicTacToeState:
 52 |         board = state.board.copy()
 53 |         board[action] = state.player_playing.game_name
 54 |         next_name = "X" if state.player_playing.game_name == "O" else "O"
 55 |         next_player = self.players[next_name]
 56 |         return TicTacToeState(next_player, board)
 57 |     
 58 |     def is_terminal_state(self, state: State) -> bool:
 59 |         return (self.get_actions(state) == []) or (self.get_winner(state) is not None)
 60 |     
 61 |     def get_utilities(self, state: dict[Player, float]) -> float:
 62 |         winner = self.get_winner(state)
 63 |         if winner is None: 
 64 |             return {player : 0 for player in self.get_players().values()}
 65 |         return {player : 1 if player == winner else -1 for player in self.get_players().values()}
 66 |     
 67 |     
 68 |     # specific methods to TicTacToe
 69 |     def get_winner_string(self, state: TicTacToeState) -> Union[str, None]:
 70 |         """Returns the winner of the board, or None if no winner"""
 71 |         board = state.board
 72 |         # Check rows
 73 |         for i in range(3):
 74 |             if board[3*i] == board[3*i+1] == board[3*i+2] != 0:
 75 |                 return board[3*i]
 76 |         # Check columns
 77 |         for i in range(3):
 78 |             if board[i] == board[i+3] == board[i+6] != 0:
 79 |                 return board[i]
 80 |         # Check diagonals
 81 |         if board[0] == board[4] == board[8] != 0:
 82 |             return board[0]
 83 |         if board[2] == board[4] == board[6] != 0:
 84 |             return board[2]
 85 |         return None
 86 |     
 87 |     def get_winner(self, state: TicTacToeState) -> Union[Player, None]:
 88 |         winner_str = self.get_winner_string(state)
 89 |         if winner_str is None: return None
 90 |         return self.players[winner_str]
 91 |     
 92 | 
 93 | 
 94 | class TicTacToeRandomGame(TicTacToeGame, NonDeterministicGame):
 95 |     """TicTacToe game where a random agent randomly reset one of the box."""
 96 |     
 97 |     def __init__(self, agents: dict[str, type]) -> None:
 98 |         TicTacToeGame.__init__(self, agents)
 99 |         
100 |     def get_actions(self, state: TicTacToeState) -> list[object]:
101 |         if state.player_playing is not None:
102 |             return TicTacToeGame.get_actions(self, state)
103 |         else:
104 |             return [k for k in range(9)]
105 |     
106 |     def get_result(self, state: TicTacToeState, action: object) -> TicTacToeState:
107 |         player_playing = state.player_playing
108 |         if player_playing is None:
109 |             board = state.board.copy()
110 |             board[action] = 0
111 |             next_player = self.players['X']
112 |             return TicTacToeState(next_player, board)
113 |         
114 |         else:
115 |             board = state.board.copy()
116 |             board[action] = state.player_playing.game_name
117 |             if player_playing.game_name == "X":
118 |                 next_player = self.players['O']
119 |             elif player_playing.game_name == "O":
120 |                 next_player = None
121 |             else:
122 |                 raise Exception("Invalid player")
123 |             return TicTacToeState(next_player, board)
124 |     
125 |     def get_random_action_distribution(self, state: TicTacToeState) -> dict[object, float]:
126 |         if state.player_playing is None:
127 |             return {action: 1/9 for action in [k for k in range(9)]}
128 |         else:
129 |             raise Exception("The state is not a random state.")


--------------------------------------------------------------------------------
/GamesAI/algorithms/MCTS.py:
--------------------------------------------------------------------------------
  1 | #Inspired by following MCTS implementation : https://gist.github.com/qpwo/c538c6f73727e254fdc7fab81024f6e1
  2 | import random
  3 | from collections import defaultdict
  4 | import math
  5 | from typing import Iterable
  6 | 
  7 | from GamesAI.div.GameContent import State, GameType, Action
  8 | from GamesAI.Player import Player
  9 | from GamesAI.div.utils import argmax
 10 | 
 11 | class Node:
 12 |     """
 13 |     A representation of a single state.
 14 |     MCTS works by constructing a tree of these Nodes.
 15 |     """
 16 |     def __init__(self, state : State, game : GameType, player : Player) -> None:
 17 |         self.state = state
 18 |         self.game = game
 19 |         self.player = player
 20 | 
 21 |     def find_children(self) -> Iterable["Node"]:
 22 |         "Return all possible successors of this node"
 23 |         childrens = list()
 24 |         for action in self.game.get_actions(self.state):
 25 |             state = self.game.get_result(self.state, action)
 26 |             childrens.append(Node(state = state, game = self.game, player = self.player))
 27 |         return childrens
 28 |     
 29 |     def find_random_child(self) -> "Node":
 30 |         "Random successor of this board state (for more efficient simulation)"
 31 |         actions = self.game.get_actions(self.state)
 32 |         action = random.choice(actions)
 33 |         state = state = self.game.get_result(self.state, action)
 34 |         return Node(state = state, game = self.game, player = self.game.get_player_playing(state))
 35 | 
 36 |     def is_terminal(self) -> bool:
 37 |         "Returns True if the node has no children"
 38 |         return self.game.is_terminal_state(self.state)
 39 | 
 40 |     def __hash__(self) -> int:
 41 |         "Nodes must be hashable"
 42 |         return hash(self.state)
 43 | 
 44 |     def __eq__(self, node2 : "Node") -> bool:
 45 |         "Nodes must be comparable"
 46 |         return self.state == node2.state
 47 |     
 48 |     
 49 | 
 50 | class MonteCarloTreeSearch(Player):
 51 |     """Player that uses Monte-Carlo Tree Search method for evaluating node.
 52 |     Only works for 2 player games, deterministic, zero-sum game."""
 53 |     agent_name = "MCTS"
 54 |     
 55 |     def __init__(self, game: GameType, game_name: str, agent_name: str, n_rollouts : int = 50) -> None:
 56 |         super().__init__(game, game_name, agent_name)
 57 |         self.n_rollouts = n_rollouts
 58 |         if len(game.names) != 2:
 59 |             raise ValueError("MCTS can only be used for 2 player games")
 60 |         self.Q = defaultdict(int)  # total utility of each node
 61 |         self.N = defaultdict(int)  # total visit count for each node
 62 |         self.children = dict()  # children of each node
 63 |         self.exploration_weight = 1.4  # exploration weight, should scale with the typical variational utility. Utility variation of 1 <=> exploration_weight of 1.4.
 64 |         
 65 | 
 66 |     def get_action(self, state: State) -> Action:
 67 | 
 68 |         "Choose the best successor of node. (Choose a move in the game)"
 69 |         node = Node(state = state, game = self.game, player = self) 
 70 |         if node.is_terminal():
 71 |             raise RuntimeError(f"choose called on terminal node {node}")
 72 |         
 73 |         for _ in range(self.n_rollouts):
 74 |             self.do_rollout(node)
 75 |         
 76 |         if node not in self.children:
 77 |             return node.find_random_child()
 78 |         
 79 |         def score(action : Action):
 80 |             n = Node(state = self.game.get_result(state, action), game = self.game, player = self)
 81 |             if self.N[n] == 0:
 82 |                 return float("-inf")  # avoid unseen moves
 83 |             return self.Q[n] / self.N[n]  # average utility
 84 | 
 85 |         return argmax(indexes = self.game.get_actions(state), func = score)
 86 | 
 87 |     def do_rollout(self, node : Node) -> None:
 88 |         "Make the tree one layer better. (Train for one iteration.)"
 89 |         path = self.select(node)
 90 |         leaf = path[-1]
 91 |         self.expand(leaf)
 92 |         utilities = self.simulate(leaf)
 93 |         self.backpropagate(path, utilities)
 94 | 
 95 |     def select(self, node):
 96 |         "Find an unexplored descendent of a node, return the path leading from node to this descendent."
 97 |         path = []
 98 |         while True:
 99 |             path.append(node)
100 |             if node not in self.children or not self.children[node]:
101 |                 # node is either unexplored or terminal
102 |                 return path
103 |             unexplored = self.children[node] - self.children.keys()
104 |             if unexplored:
105 |                 n = unexplored.pop()
106 |                 path.append(n)
107 |                 return path
108 |             node = self.uct_select(node)  # descend a layer deeper
109 | 
110 |     def expand(self, node : Node) -> None:
111 |         "Update the `children` dict with the children of `node`"
112 |         if node in self.children:
113 |             return  # already expanded
114 |         self.children[node] = node.find_children()
115 | 
116 |     def simulate(self, node : Node) -> dict[Player, float]:
117 |         "Returns the utilities for a random simulation of a node"
118 |         while True:
119 |             if node.is_terminal():
120 |                 return self.game.get_utilities(node.state)
121 |             node = node.find_random_child()
122 | 
123 |     def backpropagate(self, path : list[Node], utilities : dict[Player, float]) -> None:
124 |         "Send the utilities back up to the ancestors of the leaf"
125 |         for node in reversed(path):
126 |             self.N[node] += 1
127 |             self.Q[node] += utilities[node.player]
128 | 
129 |     def uct_select(self, node : Node) -> Node:
130 |         "Select a child of node, balancing exploration & exploitation"
131 | 
132 |         # All children of node should already be expanded:
133 |         assert all(n in self.children for n in self.children[node])
134 | 
135 |         log_N_vertex = math.log(self.N[node])
136 | 
137 |         def uct(n):
138 |             "Upper confidence bound for trees"
139 |             return self.Q[n] / self.N[n] + self.exploration_weight * math.sqrt(
140 |                 log_N_vertex / self.N[n]
141 |             )
142 | 
143 |         return max(self.children[node], key=uct)


--------------------------------------------------------------------------------
/GamesAI/Game.py:
--------------------------------------------------------------------------------
  1 | """Module for defining the Game object.
  2 | 
  3 | Games should be define as subclasses of Game or NonDeterministicGame. They should implement abstract methods as defined in the Game abstract class.
  4 | """
  5 | 
  6 | #Tool imports
  7 | from abc import ABC, abstractmethod
  8 | from time import sleep
  9 | from typing import Union
 10 | import random
 11 | #Game solving module imports
 12 | from GamesAI.div.utils import Constant
 13 | from GamesAI.div.GameContent import State, Percept, Action
 14 | from GamesAI.Player import Player, NonDeterministicPlayer, NonFullyObservablePlayer
 15 | 
 16 | 
 17 | class Game(ABC):
 18 |     """The class for defining a GAME problem.
 19 |     Standard games are deterministic, observable, turn-taking.
 20 |     """
 21 |     
 22 |     def __init__(self, agents : dict[str, Union[type, tuple[type, dict]]]) -> None:
 23 |         """Creation of a GAME object.
 24 |         agents is a dictionnary with game_name as keys and a class of player as values.
 25 |         If the player class initializer requires arguments, the value can instead be a tuple (PlayerClass, kwargs).
 26 |         
 27 |         A subclass of Game should implement the following methods :
 28 |         - get_start_state() : return the initial state of the game
 29 |         - get_player_playing(state) : return the player playing at the given state
 30 |         - get_actions(state) : return the list of actions available at the given state
 31 |         - get_result(state, action) : return the state reached by the given action in the given state
 32 |         - is_terminal_state(state) : return True if the given state is a terminal state
 33 |         - get_utilites(state) : return the utilities of the players at the given state
 34 |         """
 35 |         
 36 |         if not hasattr(self, 'names'): raise Exception("Game class must define class attribute .names")
 37 |         if self.names != set(agents.keys()): raise Exception("Game class names does not match agents names (agents keys)")
 38 |         
 39 |         self.players = dict()
 40 |         for game_name, PlayerClass_or_tuple in agents.items():
 41 |             if isinstance(PlayerClass_or_tuple, type):
 42 |                 self.players[game_name] = PlayerClass_or_tuple(game = self, 
 43 |                                            game_name = game_name, 
 44 |                                            agent_name = PlayerClass_or_tuple.agent_name)
 45 |             elif isinstance(PlayerClass_or_tuple, tuple):
 46 |                 PlayerClass, kwargs = PlayerClass_or_tuple
 47 |                 self.players[game_name] = PlayerClass(game = self, 
 48 |                                            game_name = game_name, 
 49 |                                            agent_name = PlayerClass.agent_name,
 50 |                                            **kwargs)
 51 |             else:
 52 |                 raise Exception("Game class must define agents as dict[str, type] or dict[str, tuple[type, dict]]")
 53 |                 
 54 |     @abstractmethod
 55 |     def get_start_state(self) -> State:
 56 |         """Return the initial state of the game"""
 57 |         pass
 58 |     
 59 |     @abstractmethod
 60 |     def get_player_playing(self, state : State) -> Union[Player, None]:
 61 |         """Return the player playing in the given state. Return None if no player should play, ie if randomness plays."""
 62 |         pass
 63 |     
 64 |     @abstractmethod
 65 |     def get_actions(self, state : State) -> list[Action]:
 66 |         """Return the list of actions available in the given state for the player playing in the state"""
 67 |         pass
 68 |     
 69 |     @abstractmethod
 70 |     def get_result(self, state : State, action : Action) -> State:
 71 |         """Return the state reached by the game after having played the given action in the given state"""
 72 |         pass
 73 |     
 74 |     @abstractmethod
 75 |     def is_terminal_state(self, state : State) -> bool:
 76 |         """Return True if the given state is a terminal state, False otherwise"""
 77 |         pass
 78 |         
 79 |     @abstractmethod
 80 |     def get_utilities(self, state : State) -> dict[Player, float]:
 81 |         """Return the utilities of each player."""
 82 |         pass
 83 |     
 84 |     #Permanent methods
 85 |     def get_players(self) -> dict[str, Player]:
 86 |         """Return the players of the game with their game names as keys"""
 87 |         return self.players
 88 |     
 89 |     def get_names(self) -> set:
 90 |         """Return the set of the game names"""
 91 |         if not hasattr(self, 'names'): raise Exception("Game class must define class attribute .names")
 92 |         return self.names
 93 |     
 94 |     def play_game(self, verbose : int, wait_time : float = 0) -> State:
 95 |         """Play the game until the end, print the information, return the final state.
 96 |         verbose = 0 : no print
 97 |         verbose = 1 : print game result (utilities for each player)
 98 |         verbose = 2 : print game result and state at each step
 99 |         """
100 |         state = self.get_start_state()
101 |         if verbose >= 1: print("Starting game ...")
102 |         while True:
103 |             sleep(wait_time)
104 |             if verbose >= 2:
105 |                 print(state)
106 |             if self.is_terminal_state(state):
107 |                 if verbose >= 1:
108 |                     print("\tEnd of game, utilities of players :")
109 |                     for player in self.players.values():
110 |                         print(player, ": ", self.get_utilities(state)[player])
111 |                 return state
112 |             player = self.get_player_playing(state)
113 |             if player is None:
114 |                 distribution = self.get_random_action_distribution(state)
115 |                 action = random.choices(list(distribution.keys()), weights = list(distribution.values()))[0]
116 |                 if verbose >= 2:
117 |                     print(f"Random action : {action}")
118 |             else:
119 |                 action = player.get_action(state)
120 |                 if verbose >= 2:
121 |                     print(f"{player} action : {action}")
122 |             state = self.get_result(state, action)
123 | 
124 | 
125 | class NonDeterministicGame(Game):
126 |     """Non deterministic game, where randomness happens at some node.
127 |     
128 |     Subclasses should implement the methods of Game as well as the get_random_action_distribution method.
129 |     - get_random_action_distribution(state) : return the distribution of actions available at the given state for the player playing at the state.
130 |     """
131 |     def __init__(self, agents: dict[str, Union[type, tuple[type, dict]]]) -> None:
132 |         for player_class in agents.values():
133 |             if isinstance(player_class, tuple):
134 |                 player_class = player_class[0]
135 |             if not issubclass(player_class, NonDeterministicPlayer):
136 |                 raise Exception(f"Non deterministic game must have only NonDeterministicPlayer players (player inheriting NonDeterministicPlayer class) but {player_class.agent_name} is not.")
137 |         super().__init__(agents)
138 |     
139 |     @abstractmethod
140 |     def get_random_action_distribution(self, state : State) -> dict[Action, float]:
141 |         """Return the action distribution for the actions available in the given random state"""
142 |         if self.get_player_playing is not None:
143 |             raise Exception("The state is not a random state.")
144 |         actions = self.get_actions(state)
145 |         if len(actions) == 0:
146 |             raise Exception("The state has no action available.")
147 |         return {action : 1 / len(actions) for action in actions}
148 |         
149 | 
150 | 
151 | 
152 | class NonFullyObservableGame(Game):
153 |     """Non fully observable game are games where each agent does not have access to the complete state but rather only some information called a percept.
154 |     
155 |     Sub classes should implement the methods of Game as well as the get_percept method.
156 |     - get_percept_method(state, player) : return the percept of the given state for a certain player
157 |     """
158 |     def __init__(self, agents: dict[str, Union[type, tuple[type, dict]]]) -> None:
159 |         for player_class in agents.values():
160 |             if isinstance(player_class, tuple):
161 |                 player_class = player_class[0]
162 |             if not issubclass(player_class, NonFullyObservablePlayer):
163 |                 raise Exception(f"Non fully observable game must have only NonFullyObservablePlayer players (player inheriting NonFullyObservablePlayer class) but {player_class.agent_name} is not.")
164 |         super().__init__(agents)
165 |         
166 |     @abstractmethod
167 |     def get_percept(self, state : State, player : NonFullyObservablePlayer) -> Percept:
168 |         """Return the percept of the given state for a certain player"""
169 |     
170 |     def play_game(self, verbose : int, wait_time : float = 0) -> State:
171 |         """Play the game until the end, print the information, return the final state.
172 |         verbose = 0 : no print
173 |         verbose = 1 : print game result (utilities for each player)
174 |         verbose = 2 : print game result and state at each step
175 |         """
176 |         state = self.get_start_state()
177 |         if verbose >= 1: print("Starting game ...")
178 |         while True:
179 |             sleep(wait_time)
180 |             player = self.get_player_playing(state)
181 |             percept = self.get_percept(state, player)
182 |             if verbose >= 2:
183 |                 print("State: ", state)
184 |                 print("Percept: ", percept)
185 |             if self.is_terminal_state(state):
186 |                 if verbose >= 1:
187 |                     print("\tEnd of game, utilities of players :")
188 |                     for player in self.players.values():
189 |                         print(player, ": ", self.get_utilities(state)[player])
190 |                 return state
191 |             
192 |             if player is None:
193 |                 distribution = self.get_random_action_distribution(state)
194 |                 action = random.choices(list(distribution.keys()), weights = list(distribution.values()))[0]
195 |                 if verbose >= 2:
196 |                     print(f"Random action : {action}")
197 |             else:
198 |                 distribution = player.get_action_distribution(percept)
199 |                 action = random.choices(list(distribution.keys()), weights = list(distribution.values()))[0]
200 |                 if verbose >= 2:
201 |                     print(f"{player} action : {action}")
202 |             state = self.get_result(state, action)


--------------------------------------------------------------------------------
/GamesAI/Player.py:
--------------------------------------------------------------------------------
  1 | from abc import ABC, abstractmethod
  2 | import random
  3 | from time import time
  4 | from typing import Callable, Union
  5 | 
  6 | from GamesAI.div.GameContent import State, Percept, GameType, Action
  7 | from GamesAI.div.utils import argmin, argmax
  8 | 
  9 | 
 10 | #Basic algorithm for games.
 11 | class Player(ABC):
 12 |     """The class for defining a PLAYER in a GAME. A player has a game_name inside the game and an agent_name that explains its strategy.
 13 |     It is defined by its get_action method.
 14 |     Two players are considered equal (inside a game) if their game name is equal."""
 15 |     agent_name = "BasicPlayer"
 16 |     
 17 |     def __init__(self, game : GameType, game_name : str, agent_name : str) -> None:
 18 |         """A player instance.
 19 | 
 20 |         Args:
 21 |             game (GameType): the Game object in which the player will play
 22 |             game_name (str): the name of the player in the game eg "X", "Red", "Player1" etc
 23 |             agent_name (str): the name of the agent (AI) that is used eg Minimax, AlphaBeta etc
 24 |         """
 25 |         self.game = game
 26 |         self.agent_name = agent_name
 27 |         self.game_name = game_name
 28 |     
 29 |     def __repr__(self) -> str:
 30 |         return f"[Player '{self.game_name}' ({self.agent_name})]"
 31 | 
 32 |     def __hash__(self) -> int:
 33 |         return hash(self.game_name)
 34 |     
 35 |     def __eq__(self, other) -> bool:
 36 |         if other is None: 
 37 |             return False
 38 |         return self.game_name == other.game_name
 39 |     
 40 |     @abstractmethod
 41 |     def get_action(self, state: State) -> Action:
 42 |         """Return the action to be played in the given state"""
 43 |         pass
 44 |  
 45 |     
 46 | class NonDeterministicPlayer(Player):
 47 |     """A player that can play in non deterministic games."""
 48 |     agent_name = "NonDeterministicPlayer"    
 49 | 
 50 | class NonFullyObservablePlayer(Player):
 51 |     """A player that can play in non fully observable games."""
 52 |     agent_name = "NonFullyObservablePlayer"        
 53 | 
 54 | class RandomPlayer(NonDeterministicPlayer, NonFullyObservablePlayer):
 55 |     """A player that plays randomly."""
 56 |     agent_name = "RandomPlayer"
 57 |     
 58 |     def __init__(self, game : GameType, game_name: str, agent_name: str) -> None:
 59 |         super().__init__(game, game_name, agent_name)
 60 |         
 61 |     def get_action(self, state: State) -> Action:
 62 |         """Return a random available action."""
 63 |         return random.choice(self.game.get_actions(state))
 64 |     
 65 |     
 66 | class HumanPlayer(NonDeterministicPlayer, NonFullyObservablePlayer):
 67 |     """A player asking for input for actions to take. Adapted to int and str actions."""
 68 |     agent_name = "Human"
 69 |     
 70 |     def __init__(self, game: GameType, game_name: str, agent_name: str) -> None:
 71 |         super().__init__(game, game_name, agent_name)
 72 |         
 73 |     def get_action(self, state: State) -> Action:
 74 |         while True:
 75 |             actions = self.game.get_actions(state)
 76 |             print(f"\t{self.game_name}'s actions : {actions}")
 77 |             action = input("Enter action: ")
 78 |             if action in actions:
 79 |                 return action
 80 |             elif action == '':
 81 |                 continue
 82 |             elif action.isdigit() and int(action) in actions:
 83 |                 return int(action)
 84 |             else:
 85 |                 print("Invalid action")
 86 |             
 87 | 
 88 | class Minimax(Player):
 89 |     """A player that uses the minimax algorithm to choose its actions. Only works for 2 player, zero sum games.
 90 |     For big state tree, the algorithm can't explore all tree and need to have a max_depth and a heuristic associated."""
 91 |     agent_name = "Minimax"
 92 |     
 93 |     def __init__(self, game: GameType, game_name: str, agent_name: str, max_depth: int = float("inf"), heuristic : Callable[[State], dict[str, float]] = None) -> None:
 94 |         super().__init__(game, game_name, agent_name)
 95 |         if (max_depth == float("inf")) != (heuristic is None): 
 96 |             raise ValueError("Heuristic and max_depth are either inf/None (default) or non_inf/non_None")
 97 |         if len(game.names) != 2:
 98 |             raise ValueError("Minimax can only be used for 2 player games")
 99 |         self.max_depth = max_depth
100 |         self.heuristic = heuristic
101 |         
102 |     def get_action(self, state: State) -> Action:
103 |         """Return the action that maximize Max (the player) utility."""
104 |         return argmax(indexes = self.game.get_actions(state), func = lambda action: self.min_value(self.game.get_result(state, action), depth = 1))
105 |         
106 |         
107 |     def min_value(self, state : State, depth : int) -> float:
108 |         """Return the minimum utility of the next states of the given state after the given action."""
109 |         if self.game.is_terminal_state(state):
110 |             return self.game.get_utilities(state)[self]
111 |         elif depth >= self.max_depth:
112 |             return self.heuristic(state)[self.game_name]
113 |         else:
114 |             return min([self.max_value(self.game.get_result(state, action), depth = depth + 1) for action in self.game.get_actions(state)])
115 |         
116 |     def max_value(self, state : State, depth : int) -> float:
117 |         """Return the maximum utility of the next states of the given state after the given action."""
118 |         if self.game.is_terminal_state(state):
119 |             return self.game.get_utilities(state)[self]
120 |         elif depth >= self.max_depth:
121 |             return self.heuristic(state)[self.game_name]
122 |         else:
123 |             return max([self.min_value(self.game.get_result(state, action), depth = depth + 1) for action in self.game.get_actions(state)])
124 | 
125 |         
126 | 
127 | class AlphaBeta(Player):
128 |     """AlphaBeta provide the same solution as Minimax but compute faster by pruning branches that are useless to explore (according to the heuristic)"""
129 |     agent_name = "AlphaBeta"
130 |     
131 |     def __init__(self, game: GameType, game_name: str, agent_name: str, max_depth: int = float("inf"), heuristic : Callable[[State], dict[str, float]] = None) -> None:
132 |         super().__init__(game, game_name, agent_name)
133 |         if (max_depth == float("inf")) != (heuristic is None): 
134 |             raise ValueError("Heuristic and max_depth are either inf/None or non_inf/non_None")
135 |         if len(game.names) != 2:
136 |             raise ValueError("AlphaBeta can only be used for 2 player games")
137 |         self.max_depth = max_depth
138 |         self.heuristic = heuristic
139 |     
140 |     def get_action(self, state: State) -> Action:
141 |         """Return the action that maximize Max (the player) utility."""
142 |         return argmax(indexes = self.game.get_actions(state), func = lambda action: self.min_value(self.game.get_result(state, action), depth = 1, alpha = float("-inf"), beta = float("inf")))
143 |         
144 |     def min_value(self, state : State, depth : int, alpha : float, beta : float) -> float:
145 |         """Return the minimum utility of the next states of the given state after the given action."""
146 |         if self.game.is_terminal_state(state):
147 |             return self.game.get_utilities(state)[self]
148 |         elif depth >= self.max_depth:
149 |             return self.heuristic(state)
150 |         else:
151 |             value = float('inf')
152 |             for action in self.game.get_actions(state):
153 |                 successor_state = self.game.get_result(state, action)
154 |                 value = min(value, self.max_value(successor_state, depth = depth + 1, alpha = alpha, beta = beta))
155 |                 if value <= alpha:
156 |                     return value
157 |                 beta = min(beta, value)
158 |             return value
159 |         
160 |     def max_value(self, state : State, depth : int, alpha : float, beta : float) -> float:
161 |         """Return the maximum utility of the next states of the given state after the given action."""
162 |         if self.game.is_terminal_state(state):
163 |             return self.game.get_utilities(state)[self]
164 |         elif depth >= self.max_depth:
165 |             return self.heuristic(state)
166 |         else:
167 |             value = float('-inf')
168 |             for action in self.game.get_actions(state):
169 |                 successor_state = self.game.get_result(state, action)
170 |                 value = max(value, self.min_value(successor_state, depth = depth + 1, alpha = alpha, beta = beta))
171 |                 if value >= beta:
172 |                     return value
173 |                 alpha = max(alpha, value)
174 |             return value
175 | 
176 |     
177 |     
178 | class MinimaxPlus(NonDeterministicPlayer):
179 |     """A generalization of Minimax to games wtih any number of players and with randomness."""
180 |     agent_name = "MinimaxPlus"
181 |     
182 |     def __init__(self, game: GameType, game_name: str, agent_name: str, max_depth: int = float("inf"), heuristic: Callable[[State], dict[str, float]] = None) -> None:
183 |         if (max_depth == float("inf")) != (heuristic is None): 
184 |             raise ValueError("Heuristic and max_depth are either inf/None or non_inf/non_None")
185 |         super().__init__(game, game_name, agent_name)
186 |         self.max_depth = max_depth
187 |         self.heuristic = heuristic
188 |         
189 |     def get_action(self, state: State) -> Action:
190 |         """Return action that maximizes the expected utility."""
191 |         def func_to_optimize(action):
192 |             next_state = self.game.get_result(state, action)
193 |             return self.best_utilities_for_player(next_state, depth = 1)[self]
194 |         return argmax(indexes = self.game.get_actions(state), 
195 |                       func = func_to_optimize)
196 |     
197 |     def best_utilities_for_player(self, state : State, depth : int) -> dict[Player, float]:
198 |         """Return the best predicted final utilities of a state according to a given player playing as Expectiminimax and assuming each other player plays as Expectiminimax."""
199 |         if self.game.is_terminal_state(state):
200 |             return self.game.get_utilities(state)
201 |         elif depth >= self.max_depth:
202 |             return {self.game.get_players()[game_name] : utility for game_name, utility in self.heuristic(state).items()}
203 |         player_playing = self.game.get_player_playing(state)
204 |         
205 |         if player_playing is None:
206 |             # The state is a random state
207 |             utilities = {player : 0 for player in self.game.get_players().values()}
208 |             for action, prob in self.game.get_random_action_distribution(state).items():
209 |                 next_state = self.game.get_result(state, action)
210 |                 next_utilities = self.best_utilities_for_player(next_state, depth = depth + 1)
211 |                 for player in utilities:
212 |                     utilities[player] += prob * next_utilities[player]
213 |             return utilities
214 | 
215 |         else:
216 |             #The state is a deterministic state, a Player has to play
217 |             best_value = float("-inf")
218 |             for action in self.game.get_actions(state):
219 |                 next_state = self.game.get_result(state, action)
220 |                 next_utilities = self.best_utilities_for_player(next_state, depth = depth + 1)
221 |                 value = next_utilities[player_playing]
222 |                 if value > best_value:
223 |                     best_value = value
224 |                     best_utilities = next_utilities
225 |             return best_utilities
226 |         
227 | 
228 | 
229 | #Algorithm from other modules that requires their own modules.
230 | from GamesAI.algorithms.MCTS import MonteCarloTreeSearch


--------------------------------------------------------------------------------