├── .coveralls.yml
├── .github
    └── workflows
    │   └── python-package.yml
├── .gitignore
├── CONTRIBUTING.md
├── LICENSE.md
├── README.md
├── README.zh-CN.md
├── docs
    ├── README.md
    ├── adding-models.md
    ├── adding-new-environments.md
    ├── algorithms.md
    ├── customizing-environments.md
    ├── developping-algorithms.md
    ├── games.md
    ├── high-level-design.md
    └── toy-examples.md
├── examples
    ├── evaluate.py
    ├── human
    │   ├── blackjack_human.py
    │   ├── gin_rummy_human.py
    │   ├── leduc_holdem_human.py
    │   ├── limit_holdem_human.py
    │   ├── nolimit_holdem_human.py
    │   └── uno_human.py
    ├── pettingzoo
    │   ├── README.md
    │   ├── run_dmc.py
    │   └── run_rl.py
    ├── run_cfr.py
    ├── run_dmc.py
    ├── run_random.py
    ├── run_rl.py
    └── scripts
    │   ├── dmc_doudizhu_1_gpu.sh
    │   └── dmc_doudizhu_4_gpu.sh
├── rlcard
    ├── __init__.py
    ├── agents
    │   ├── __init__.py
    │   ├── cfr_agent.py
    │   ├── dmc_agent
    │   │   ├── __init__.py
    │   │   ├── file_writer.py
    │   │   ├── model.py
    │   │   ├── pettingzoo_model.py
    │   │   ├── pettingzoo_utils.py
    │   │   ├── trainer.py
    │   │   └── utils.py
    │   ├── dqn_agent.py
    │   ├── human_agents
    │   │   ├── __init__.py
    │   │   ├── blackjack_human_agent.py
    │   │   ├── gin_rummy_human_agent
    │   │   │   ├── __init__.py
    │   │   │   ├── gin_rummy_human_agent.py
    │   │   │   ├── gui_cards
    │   │   │   │   ├── __init__.py
    │   │   │   │   └── card_image.py
    │   │   │   └── gui_gin_rummy
    │   │   │   │   ├── Gin-Rummy-GUI-Design.md
    │   │   │   │   ├── __init__.py
    │   │   │   │   ├── canvas_item.py
    │   │   │   │   ├── configurations.py
    │   │   │   │   ├── env_thread.py
    │   │   │   │   ├── game_app.py
    │   │   │   │   ├── game_canvas.py
    │   │   │   │   ├── game_canvas_debug.py
    │   │   │   │   ├── game_canvas_getter.py
    │   │   │   │   ├── game_canvas_post_doing_action.py
    │   │   │   │   ├── game_canvas_query.py
    │   │   │   │   ├── game_canvas_updater.py
    │   │   │   │   ├── game_frame.py
    │   │   │   │   ├── game_options.ini
    │   │   │   │   ├── handling_tap.py
    │   │   │   │   ├── handling_tap_discard_pile.py
    │   │   │   │   ├── handling_tap_held_pile.py
    │   │   │   │   ├── handling_tap_player_pane.py
    │   │   │   │   ├── handling_tap_stock_pile.py
    │   │   │   │   ├── handling_tap_to_arrange_held_pile.py
    │   │   │   │   ├── info_messaging.py
    │   │   │   │   ├── menu_bar.py
    │   │   │   │   ├── player_type.py
    │   │   │   │   ├── preferences_window.py
    │   │   │   │   ├── starting_new_game.py
    │   │   │   │   ├── status_messaging.py
    │   │   │   │   ├── utils.py
    │   │   │   │   └── utils_extra.py
    │   │   ├── leduc_holdem_human_agent.py
    │   │   ├── limit_holdem_human_agent.py
    │   │   ├── nolimit_holdem_human_agent.py
    │   │   └── uno_human_agent.py
    │   ├── nfsp_agent.py
    │   ├── pettingzoo_agents.py
    │   └── random_agent.py
    ├── envs
    │   ├── __init__.py
    │   ├── blackjack.py
    │   ├── bridge.py
    │   ├── doudizhu.py
    │   ├── env.py
    │   ├── gin_rummy.py
    │   ├── leducholdem.py
    │   ├── limitholdem.py
    │   ├── mahjong.py
    │   ├── nolimitholdem.py
    │   ├── registration.py
    │   └── uno.py
    ├── games
    │   ├── __init__.py
    │   ├── base.py
    │   ├── blackjack
    │   │   ├── __init__.py
    │   │   ├── dealer.py
    │   │   ├── game.py
    │   │   ├── judger.py
    │   │   └── player.py
    │   ├── bridge
    │   │   ├── __init__.py
    │   │   ├── dealer.py
    │   │   ├── game.py
    │   │   ├── judger.py
    │   │   ├── player.py
    │   │   ├── round.py
    │   │   └── utils
    │   │   │   ├── __init__.py
    │   │   │   ├── action_event.py
    │   │   │   ├── bridge_card.py
    │   │   │   ├── move.py
    │   │   │   ├── tray.py
    │   │   │   └── utils.py
    │   ├── doudizhu
    │   │   ├── __init__.py
    │   │   ├── dealer.py
    │   │   ├── game.py
    │   │   ├── jsondata.zip
    │   │   ├── judger.py
    │   │   ├── player.py
    │   │   ├── round.py
    │   │   └── utils.py
    │   ├── gin_rummy
    │   │   ├── __init__.py
    │   │   ├── dealer.py
    │   │   ├── game.py
    │   │   ├── judge.py
    │   │   ├── player.py
    │   │   ├── round.py
    │   │   └── utils
    │   │   │   ├── __init__.py
    │   │   │   ├── action_event.py
    │   │   │   ├── gin_rummy_error.py
    │   │   │   ├── melding.py
    │   │   │   ├── move.py
    │   │   │   ├── scorers.py
    │   │   │   ├── settings.py
    │   │   │   ├── thinker.py
    │   │   │   └── utils.py
    │   ├── leducholdem
    │   │   ├── __init__.py
    │   │   ├── card2index.json
    │   │   ├── dealer.py
    │   │   ├── game.py
    │   │   ├── judger.py
    │   │   ├── player.py
    │   │   └── round.py
    │   ├── limitholdem
    │   │   ├── __init__.py
    │   │   ├── card2index.json
    │   │   ├── dealer.py
    │   │   ├── game.py
    │   │   ├── judger.py
    │   │   ├── player.py
    │   │   ├── round.py
    │   │   └── utils.py
    │   ├── mahjong
    │   │   ├── __init__.py
    │   │   ├── card.py
    │   │   ├── dealer.py
    │   │   ├── game.py
    │   │   ├── judger.py
    │   │   ├── player.py
    │   │   ├── round.py
    │   │   └── utils.py
    │   ├── nolimitholdem
    │   │   ├── __init__.py
    │   │   ├── dealer.py
    │   │   ├── game.py
    │   │   ├── judger.py
    │   │   ├── player.py
    │   │   └── round.py
    │   └── uno
    │   │   ├── __init__.py
    │   │   ├── card.py
    │   │   ├── dealer.py
    │   │   ├── game.py
    │   │   ├── jsondata
    │   │       └── action_space.json
    │   │   ├── judger.py
    │   │   ├── player.py
    │   │   ├── round.py
    │   │   └── utils.py
    ├── models
    │   ├── __init__.py
    │   ├── bridge_rule_models.py
    │   ├── doudizhu_rule_models.py
    │   ├── gin_rummy_rule_models.py
    │   ├── leducholdem_rule_models.py
    │   ├── limitholdem_rule_models.py
    │   ├── model.py
    │   ├── pretrained
    │   │   └── leduc_holdem_cfr
    │   │   │   ├── average_policy.pkl
    │   │   │   ├── iteration.pkl
    │   │   │   ├── policy.pkl
    │   │   │   └── regrets.pkl
    │   ├── pretrained_models.py
    │   ├── registration.py
    │   └── uno_rule_models.py
    └── utils
    │   ├── __init__.py
    │   ├── logger.py
    │   ├── pettingzoo_utils.py
    │   ├── seeding.py
    │   └── utils.py
├── setup.py
└── tests
    ├── __init__.py
    ├── agents
        ├── __init__.py
        ├── test_cfr.py
        ├── test_dqn.py
        ├── test_leduc_human.py
        ├── test_nfsp.py
        └── test_uno_human.py
    ├── envs
        ├── __init__.py
        ├── determism_util.py
        ├── test_blackjack_env.py
        ├── test_doudizhu_env.py
        ├── test_gin_rummy_env.py
        ├── test_leducholdem_env.py
        ├── test_limitholdem_env.py
        ├── test_mahjong.py
        ├── test_nolimitholdem_env.py
        ├── test_registration.py
        └── test_uno_env.py
    ├── games
        ├── __init__.py
        ├── test_blackjack_game.py
        ├── test_bridge_game.py
        ├── test_doudizhu_game.py
        ├── test_doudizhu_judger.py
        ├── test_gin_rummy_game.py
        ├── test_leducholdem_game.py
        ├── test_limitholdem_game.py
        ├── test_mahjong_game.py
        ├── test_nolimitholdem_game.py
        ├── test_nolimitholdem_judger.py
        └── test_uno_game.py
    ├── models
        ├── __init__.py
        ├── test_model_registeration.py
        └── test_models.py
    └── utils
        ├── __init__.py
        ├── test_holdem_utils.py
        ├── test_logger.py
        └── test_utils.py


/.coveralls.yml:
--------------------------------------------------------------------------------
1 | repo_token: a9eSNI8pkeeDAKwGtKKBSUPCaFIiQGvYU
2 | service_name: travis-ci


--------------------------------------------------------------------------------
/.github/workflows/python-package.yml:
--------------------------------------------------------------------------------
 1 | # This workflow will install Python dependencies, run tests and lint with a variety of Python versions
 2 | # For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions
 3 | 
 4 | name: Testing
 5 | 
 6 | on:
 7 |   push:
 8 |     branches: [ master ]
 9 |   pull_request:
10 |     branches: [ master ]
11 | 
12 | jobs:
13 |   build:
14 | 
15 |     runs-on: ubuntu-latest
16 |     strategy:
17 |       fail-fast: false
18 |       matrix:
19 |         python-version: ["3.7", "3.8", "3.9", "3.10", "3.11"]
20 | 
21 |     steps:
22 |     - uses: actions/checkout@v2
23 |     - name: Set up Python ${{ matrix.python-version }}
24 |       uses: actions/setup-python@v2
25 |       with:
26 |         python-version: ${{ matrix.python-version }}
27 |     - name: Install dependencies
28 |       run: |
29 |         python -m pip install --upgrade pip
30 |         pip install python-coveralls
31 |         pip install pytest-cover
32 |     - name: Install package
33 |       run: |
34 |         pip install -e .[torch]
35 |     - name: Test with pytest
36 |       run: |
37 |         py.test tests/ --cov=rlcard
38 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | **/.DS_Store
 2 | __pycache__/
 3 | *.pyc
 4 | /*.egg-info
 5 | .idea/
 6 | *.swp
 7 | *.wsn
 8 | *.swo
 9 | .scannerwork/
10 | .vscode/
11 | htmlcov/
12 | sonar-project.properties
13 | .coverage*
14 | docs/rst
15 | docs/sphinx
16 | experiments/
17 | dist/
18 | rlcard/games/doudizhu/jsondata/
19 | rlcard/agents/gin_rummy_human_agent/gui_cards/cards_png
20 | 


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
 1 | # Contibuting Guide
 2 | Contribution to this project is greatly appreciated! If you find any bugs or have any feedback, please create an issue or send a pull request to fix the bug. If you want to contribute codes for new features, please contact [daochen.zha@tamu.edu](mailto:daochen.zha@tamu.edu) or [khlai@tamu.edu](mailto:khlai@tamu.edu). We currently have several plans. Please create an issue or contact us through emails if you have other suggestions.
 3 | 
 4 | ## Roadmaps
 5 | 
 6 | *   **Game Specific Configurations.** Now we plan to gradually support game specific configurations. Currently we only support specifying the number of players in Blackjack
 7 | *   **Rule-based Agent and Pre-trained Models.** Provide more rule-based agents and pre-trained models to benchmark the evaluation. We currently have several models in `/models`.
 8 | *   **More Games and Algorithms.** Develop more games and algorithms.
 9 | *   **Hyperparameter Search** Search hyperparameters for each environment and update the best one in the example.
10 | 
11 | ## How to Create a Pull Request
12 | 
13 | If this your first time to contribute to a project, kindly follow the following instructions. You may find [Creating a pull request](https://help.github.com/en/github/collaborating-with-issues-and-pull-requests/creating-a-pull-request) helpful. Mainly, you need to take the following steps to send a pull request:
14 | 
15 | *   Click **Fork** in the upper-right corner of the project main page to create a new branch in your local Github.
16 | *   Clone the repo from your local repo in your Github.
17 | *   Make changes in your computer.
18 | *   Commit and push your local changes to your local repo.
19 | *   Send a pull request to merge your local branch to the branches in RLCard project.
20 | 
21 | ## Testing Your Code
22 | 
23 | We strongly encourage you to write the testing code in parallel with your development. We use `unittest` in RLCard. An example is [Blackjack environment testing](tests/envs/test_blackjack_env.py).
24 | 
25 | ## Making Configurable Environments
26 | We take Blackjack as an Example to show how we can define game specific configurations in RLCard. The key points are highlighted as follows:
27 | 
28 | *   We add a `DEFAULT_GAME_CONFIG` in [Blackjack Env](rlcard/envs/blackjack.py) to define the default values of the game configurations. Each field should start with `game_`
29 | *   Modify the game and environment according to the configurations. For example, we need to support multiple players in Blackjack.
30 | *	Modify [Env](rlcard/envs/env.py) to add your game to the `supported_envs`
31 | *   When making the environment, we pass the newly defined fields in `config`. For example, we pass `config={'game_player_num': 2}` for Blackjack.
32 | 
33 | 


--------------------------------------------------------------------------------
/LICENSE.md:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2019 DATA Lab at Texas A&M University
 2 | 
 3 | Permission is hereby granted, free of charge, to any person obtaining a copy
 4 | of this software and associated documentation files (the "Software"), to deal
 5 | in the Software without restriction, including without limitation the rights
 6 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 7 | copies of the Software, and to permit persons to whom the Software is
 8 | furnished to do so, subject to the following conditions:
 9 | 
10 | The above copyright notice and this permission notice shall be included in all
11 | copies or substantial portions of the Software.
12 | 
13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
19 | SOFTWARE.


--------------------------------------------------------------------------------
/docs/README.md:
--------------------------------------------------------------------------------
 1 | # Documents of RLCard
 2 | 
 3 | ## Overview
 4 | The toolkit wraps each game by `Env` class with easy-to-use interfaces. The goal of this toolkit is to enable the users to focus on algorithm development without caring about the environment. The following design principles are applied when developing the toolkit:
 5 | *   **Reproducible.** Results on the environments can be reproduced. The same result should be obtained with the same random seed in different runs.
 6 | *   **Accessible.** The experiences are collected and well organized after each game with easy-to-use interfaces. Uses can conveniently configure state representation, action encoding, reward design, or even the game rules.
 7 | *   **Scalable.** New card environments can be added conveniently into the toolkit with the above design principles. We also try to minimize the dependencies in the toolkit so that the codes can be easily maintained.
 8 | 
 9 | ## User Guide
10 | 
11 | *   [Toy examples](toy-examples.md)
12 | *   [RLCard high-level design](high-level-design.md)
13 | *   [Games in RLCard](games.md)
14 | *   [Algorithms in RLCard](algorithms.md)
15 | 
16 | ## Developer Guide
17 | 
18 | *   [Developping new algorithms](developping-algorithms.md)
19 | *   [Adding new environments](adding-new-environments.md)
20 | *   [Customizing environments](customizing-environments.md)
21 | *   [Adding pre-trained/rule-based models](adding-models.md)
22 | 
23 | ## Application Programming Interface (API)
24 | The API documents are and available at [Official Website](http://www.rlcard.org).
25 | 


--------------------------------------------------------------------------------
/docs/adding-models.md:
--------------------------------------------------------------------------------
 1 | # Adding Pre-trained/Rule-based models
 2 | You can add your own pre-trained/rule-based models to the toolkit by following several steps:
 3 | 
 4 | *   **Develop models.** You can either design a rule-based model or save a neural network model. For each game, you need to develop agents for all the players at the same time. You need to wrap each agent as a `Agent` class and make sure that `step`, `eval_step` and `use_raw` can work correctly.
 5 | *   **Wrap models.** You need to inherit the `Model` class in `rlcard/models/model.py`. Then put all the agents into a list. Rewrite `agent` property to return this list.
 6 | *   **Register the model.** Register the model in `rlcard/models/__init__.py`.
 7 | *   **Load the model in environment.** An example of loading `leduc-holdem-nfsp` model is as follows:
 8 | ```python
 9 | from rlcard import models
10 | leduc_nfsp_model = models.load('leduc-holdem-nfsp')
11 | ```
12 | Then use `leduc_nfsp_model.agents` to obtain all the agents for the game.
13 | 


--------------------------------------------------------------------------------
/docs/adding-new-environments.md:
--------------------------------------------------------------------------------
 1 | # Adding New Environments
 2 | To add a new environment to the toolkit, generally you should take the following steps:
 3 | *   **Implement a game.** Card games usually have similar structures so that they can be implemented with `Game`, `Round`, `Dealer`, `Judger`, `Player`, as in existing games. The easiest way is to inherit the classed in [rlcard/games/base.py](../rlcard/games/base.py) and implement the functions.
 4 | *   **Wrap the game with an environment.** The easiest way is to inherit `Env` in [rlcard/envs/env.py](../rlcard/envs/env.py). You need to implement `_extract_state` which encodes the state, `_decode_action` which decodes actions from the id to the text string, and `get_payoffs` which calculates payoffs of the players.
 5 | *   **Register the game.** Now it is time to tell the toolkit where to locate the new environment. Go to [rlcard/envs/\_\_init\_\_.py](../rlcard/envs/__init__.py), and indicate the name of the game and its entry point.
 6 | 
 7 | To test whether the new environment is set up successfully:
 8 | ```python
 9 | import rlcard
10 | rlcard.make(#the new evironment#)
11 | ```
12 | 


--------------------------------------------------------------------------------
/docs/algorithms.md:
--------------------------------------------------------------------------------
 1 | # Index
 2 | 
 3 | *   [DMC](algorithms.md#deep-monte-carlo)
 4 | *   [Deep-Q Learning](algorithms.md#deep-q-learning)
 5 | *   [NFSP](algorithms.md#nfsp)
 6 | *   [CFR (chance sampling)](algorithms.md#cfr)
 7 | 
 8 | ## Deep Monte-Carlo
 9 | Deep Monte-Carlo (DMC) is a very effective algorithm for card games. This is the only algorithm that shows human-level performance on complex games such as Dou Dizhu.
10 | 
11 | ## Deep-Q Learning
12 | Deep-Q Learning (DQN) [[paper]](https://arxiv.org/abs/1312.5602) is a basic reinforcement learning (RL) algorithm. We wrap DQN as an example to show how RL algorithms can be connected to the environments. In the DQN agent, the following classes are implemented:
13 | 
14 | *   `DQNAgent`: The agent class that interacts with the environment.
15 | *   `Memory`: A memory buffer that manages the storing and sampling of transitions.
16 | *   `Estimator`: The neural network that is used to make predictions.
17 | 
18 | ## NFSP
19 | Neural Fictitious Self-Play (NFSP) [[paper]](https://arxiv.org/abs/1603.01121) end-to-end approach to solve card games with deep reinforcement learning. NFSP has an inner RL agent and a supervised agent that is trained based on the data generated by the RL agent. In the toolkit, we use DQN as RL agent.
20 | 
21 | ## CFR (chance sampling)
22 | Counterfactual Regret Minimization (CFR) [[paper]](http://papers.nips.cc/paper/3306-regret-minimization-in-games-with-incomplete-information.pdf) is a regret minimizaiton method for solving imperfect information games.
23 | 


--------------------------------------------------------------------------------
/docs/customizing-environments.md:
--------------------------------------------------------------------------------
 1 | # Customizing Environments
 2 | In addition to the default state representation and action encoding, we also allow customizing an environment. In this document, we use Limit Texas Hold'em as an example to describe how to modify state representation, action encoding, reward calculation, or even the game rules.
 3 | 
 4 | ## State Representation
 5 | To define our own state representation, we can modify the ``_extract_state`` function in [/rlcard/envs/limitholdem.py](../rlcard/envs/limitholdem.py#L33).
 6 | 
 7 | ## Action Encoding
 8 | To define our own action encoding, we can modify the ``_decode_action`` function in [/rlcard/envs/limitholdem.py](../rlcard/envs/limitholdem.py#L75).
 9 | 
10 | ## Reward Calculation
11 | To define our own reward calculation, we can modify the ``get_payoffs`` function in [/rlcard/envs/limitholdem.py](../rlcard/envs/limitholdem.py#L67).
12 | 
13 | ## Modifying Game
14 | We can change the parameters of a game to adjust its difficulty. For example, we can change the number of players, the number of allowed raises in Limit Texas Hold'em in the ``__init__`` function in [rlcard/games/limitholdem/game.py](../rlcard/games/limitholdem/game.py#L11).
15 | 


--------------------------------------------------------------------------------
/docs/developping-algorithms.md:
--------------------------------------------------------------------------------
1 | # Developping Algorithms
2 | Although users may do whatever they like to design and try their algorithms. We recommend wrapping a new algorithm as an `Agent` class the [example agent](../rlcard/agents/random_agent.py). To be compatible with the basic interfaces, the agent should have the following functions and attribute:
3 | *   `step`: Given the current state, predict the next action.
4 | *   `eval_step`: Similar to `step`, but for evaluation purpose. Reinforcement learning algorithms will usually add some noise for better exploration in training. In evaluation, no noise will be added to make predictions.
5 | *   `use_raw`: A boolean attribute. `True` if the agent uses raw states to do reasoning; `False` if the agent uses numerical values to play (such as neural networks).
6 | 


--------------------------------------------------------------------------------
/docs/high-level-design.md:
--------------------------------------------------------------------------------
 1 | # RLCard High-level Design
 2 | This document introduces the high-level design for the environments, the games, and the agents (algorithms).
 3 | 
 4 | ## Environments
 5 | We wrap each game with an `Env` class. The responsibility of `Env` is to help you generate trajectories of the games. For developing Reinforcement Learning (RL) algorithms, we recommend to use the following interfaces:
 6 | 
 7 | *   `set_agents`: This function tells the `Env` what agents will be used to perform actions in the game. Different games may have a different number of agents. The input of the function is a list of `Agent` class. For example, `env.set_agent([RandomAgent(num_actions=env.num_actions) for _ in range(2)])` indicates that two random agents will be used to generate the trajectories.
 8 | *   `run`: After setting the agents, this interface will run a complete trajectory of the game, calculate the reward for each transition, and reorganize the data so that it can be directly fed into a RL algorithm.
 9 | 
10 | For advanced access to the environment, such as traversal of the game tree, we provide the following interfaces:
11 | 
12 | *   `step`: Given the current state, the environment takes one step forward, and returns the next state and the next player.
13 | *   `step_back`: Takes one step backward. The environment will restore to the last state. The `step_back` is defaultly turned off since it requires expensively recoeding previous states. To turn it on, set `allow_step_back = True` when `make` environments.
14 | *   `get_payoffs`: At the end of the game, this function can be called to obtain the payoffs for each player.
15 | 
16 | ## Games
17 | Card games usually have similar structures. We abstract some concepts in card games and follow the same design pattern. In this way, users/developers can easily dig into the code and change the rules for research purpose. Specifically, the following classes are used in all the games:
18 | 
19 | *   `Game`: A game is defined as a complete sequence starting from one of the non-terminal states to a terminal state. 
20 | *   `Round`: A round is a part of the sequence of a game. Most card games can be naturally divided into multiple rounds.
21 | *   `Dealer`: A dealer is responsible for shuffling and allocating a deck of cards.
22 | *   `Judger`: A judger is responsible for making major decisions at the end of a round or a game.
23 | *   `Player`: A player is a role who plays cards following a strategy.
24 | 
25 | To summarize, in one `Game`, a `Dealer` deals the cards for each `Player`. In each `Round` of the game, a `Judger` will make major decisions about the next round and the payoffs in the end of the game.
26 | 
27 | ## Agents
28 | We provide examples of several representative algorithms and wrap them as `Agent` to show how a learning algorithm can be connected to the toolkit. The first example is DQN which is a representative of the Reinforcement Learning (RL) algorithms category. The second example is NFSP which is a representative of the Reinforcement Learning (RL) with self-play. We also provide CFR (chance sampling) and DeepCFR which belong to Conterfactual Regret Minimization (CFR) category. Other algorithms from these three categories can be connected in similar ways.
29 | 


--------------------------------------------------------------------------------
/examples/evaluate.py:
--------------------------------------------------------------------------------
  1 | ''' An example of evluating the trained models in RLCard
  2 | '''
  3 | import os
  4 | import argparse
  5 | 
  6 | import rlcard
  7 | from rlcard.agents import (
  8 |     DQNAgent,
  9 |     RandomAgent,
 10 | )
 11 | from rlcard.utils import (
 12 |     get_device,
 13 |     set_seed,
 14 |     tournament,
 15 | )
 16 | 
 17 | def load_model(model_path, env=None, position=None, device=None):
 18 |     if os.path.isfile(model_path):  # Torch model
 19 |         import torch
 20 |         agent = torch.load(model_path, map_location=device)
 21 |         agent.set_device(device)
 22 |     elif os.path.isdir(model_path):  # CFR model
 23 |         from rlcard.agents import CFRAgent
 24 |         agent = CFRAgent(env, model_path)
 25 |         agent.load()
 26 |     elif model_path == 'random':  # Random model
 27 |         from rlcard.agents import RandomAgent
 28 |         agent = RandomAgent(num_actions=env.num_actions)
 29 |     else:  # A model in the model zoo
 30 |         from rlcard import models
 31 |         agent = models.load(model_path).agents[position]
 32 |     
 33 |     return agent
 34 | 
 35 | def evaluate(args):
 36 | 
 37 |     # Check whether gpu is available
 38 |     device = get_device()
 39 |         
 40 |     # Seed numpy, torch, random
 41 |     set_seed(args.seed)
 42 | 
 43 |     # Make the environment with seed
 44 |     env = rlcard.make(args.env, config={'seed': args.seed})
 45 | 
 46 |     # Load models
 47 |     agents = []
 48 |     for position, model_path in enumerate(args.models):
 49 |         agents.append(load_model(model_path, env, position, device))
 50 |     env.set_agents(agents)
 51 | 
 52 |     # Evaluate
 53 |     rewards = tournament(env, args.num_games)
 54 |     for position, reward in enumerate(rewards):
 55 |         print(position, args.models[position], reward)
 56 | 
 57 | if __name__ == '__main__':
 58 |     parser = argparse.ArgumentParser("Evaluation example in RLCard")
 59 |     parser.add_argument(
 60 |         '--env',
 61 |         type=str,
 62 |         default='leduc-holdem',
 63 |         choices=[
 64 |             'blackjack',
 65 |             'leduc-holdem',
 66 |             'limit-holdem',
 67 |             'doudizhu',
 68 |             'mahjong',
 69 |             'no-limit-holdem',
 70 |             'uno',
 71 |             'gin-rummy',
 72 |         ],
 73 |     )
 74 |     parser.add_argument(
 75 |         '--models',
 76 |         nargs='*',
 77 |         default=[
 78 |             'experiments/leduc_holdem_dqn_result/model.pth',
 79 |             'random',
 80 |         ],
 81 |     )
 82 |     parser.add_argument(
 83 |         '--cuda',
 84 |         type=str,
 85 |         default='',
 86 |     )
 87 |     parser.add_argument(
 88 |         '--seed',
 89 |         type=int,
 90 |         default=42,
 91 |     )
 92 |     parser.add_argument(
 93 |         '--num_games',
 94 |         type=int,
 95 |         default=10000,
 96 |     )
 97 | 
 98 |     args = parser.parse_args()
 99 | 
100 |     os.environ["CUDA_VISIBLE_DEVICES"] = args.cuda
101 |     evaluate(args)
102 | 
103 | 


--------------------------------------------------------------------------------
/examples/human/blackjack_human.py:
--------------------------------------------------------------------------------
 1 | ''' A toy example of self playing for Blackjack
 2 | '''
 3 | 
 4 | import rlcard
 5 | from rlcard.agents import RandomAgent as RandomAgent
 6 | from rlcard.agents import BlackjackHumanAgent as HumanAgent
 7 | from rlcard.utils.utils import print_card
 8 | 
 9 | # Make environment
10 | num_players = 2
11 | env = rlcard.make(
12 |     'blackjack',
13 |     config={
14 |         'game_num_players': num_players,
15 |     },
16 | )
17 | human_agent = HumanAgent(env.num_actions)
18 | random_agent = RandomAgent(env.num_actions)
19 | env.set_agents([
20 |     human_agent,
21 |     random_agent,
22 | ])
23 | 
24 | print(">> Blackjack human agent")
25 | 
26 | while (True):
27 |     print(">> Start a new game")
28 | 
29 |     trajectories, payoffs = env.run(is_training=False)
30 |     # If the human does not take the final action, we need to
31 |     # print other players action
32 | 
33 |     if len(trajectories[0]) != 0:
34 |         final_state = []
35 |         action_record = []
36 |         state = []
37 |         _action_list = []
38 | 
39 |         for i in range(num_players):
40 |             final_state.append(trajectories[i][-1])
41 |             state.append(final_state[i]['raw_obs'])
42 | 
43 |         action_record.append(final_state[i]['action_record'])
44 |         for i in range(1, len(action_record) + 1):
45 |             _action_list.insert(0, action_record[-i])
46 | 
47 |         for pair in _action_list[0]:
48 |             print('>> Player', pair[0], 'chooses', pair[1])
49 | 
50 |     # Let's take a look at what the agent card is
51 |     print('===============   Dealer hand   ===============')
52 |     print_card(state[0]['state'][1])
53 | 
54 |     for i in range(num_players):
55 |         print('===============   Player {} Hand   ==============='.format(i))
56 |         print_card(state[i]['state'][0])
57 | 
58 |     print('===============     Result     ===============')
59 |     for i in range(num_players):
60 |         if payoffs[i] == 1:
61 |             print('Player {} win {} chip!'.format(i, payoffs[i]))
62 |         elif payoffs[i] == 0:
63 |             print('Player {} is tie'.format(i))
64 |         else:
65 |             print('Player {} lose {} chip!'.format(i, -payoffs[i]))
66 |         print('')
67 | 
68 |     input("Press any key to continue...")
69 | 


--------------------------------------------------------------------------------
/examples/human/gin_rummy_human.py:
--------------------------------------------------------------------------------
 1 | '''
 2 |     Project: Gui Gin Rummy
 3 |     File name: gin_rummy_human.py
 4 |     Author: William Hale
 5 |     Date created: 3/14/2020
 6 | '''
 7 | 
 8 | #   You need to install tkinter if it is not already installed.
 9 | #   Tkinter is Python's defacto standard GUI (Graphical User Interface) package.
10 | #   It is a thin object-oriented layer on top of Tcl/Tk.
11 | #   Note that the name of the module is ‘tkinter’.
12 | #
13 | #   If you are using anaconda:
14 | #       -- I have version 8.6.11 to work with version 3.6 of Python.
15 | #       -- In the installed window for your environment, search for "tk".
16 | #       -- If it is found, make sure you have at least version 8.6.11.
17 | #       -- Otherwise, go to the "Not installed" window, search for "tk", select it, and apply it.
18 | #
19 | #   If you are using Ubuntu:
20 | #       -- You can install it with apt-get install python-tk.
21 | #
22 | #   For other cases, you can search on google to see how to install tkinter.
23 | 
24 | # from __future__ import annotations
25 | from typing import TYPE_CHECKING
26 | if TYPE_CHECKING:
27 |     from rlcard.envs.gin_rummy import GinRummyEnv
28 | 
29 | import rlcard
30 | 
31 | from rlcard.agents import RandomAgent
32 | from rlcard.models.gin_rummy_rule_models import GinRummyNoviceRuleAgent
33 | from rlcard.agents.human_agents.gin_rummy_human_agent.gin_rummy_human_agent import HumanAgent
34 | 
35 | from rlcard.agents.human_agents.gin_rummy_human_agent.gui_gin_rummy.game_app import GameApp
36 | 
37 | from rlcard.games.gin_rummy.utils import scorers
38 | 
39 | 
40 | def make_gin_rummy_env() -> 'GinRummyEnv':
41 |     gin_rummy_env = rlcard.make('gin-rummy')
42 |     # north_agent = RandomAgent(num_actions=gin_rummy_env.num_actions)
43 |     north_agent = GinRummyNoviceRuleAgent()
44 |     south_agent = HumanAgent(gin_rummy_env.num_actions)
45 |     gin_rummy_env.set_agents([
46 |         north_agent,
47 |         south_agent
48 |     ])
49 |     gin_rummy_env.game.judge.scorer = scorers.GinRummyScorer(get_payoff=scorers.get_payoff_gin_rummy_v0)
50 |     return gin_rummy_env
51 | 
52 | 
53 | # Play game
54 | gin_rummy_app = GameApp(make_gin_rummy_env=make_gin_rummy_env)
55 | 


--------------------------------------------------------------------------------
/examples/human/leduc_holdem_human.py:
--------------------------------------------------------------------------------
 1 | ''' A toy example of playing against pretrianed AI on Leduc Hold'em
 2 | '''
 3 | 
 4 | import rlcard
 5 | from rlcard import models
 6 | from rlcard.agents import LeducholdemHumanAgent as HumanAgent
 7 | from rlcard.utils import print_card
 8 | 
 9 | # Make environment
10 | env = rlcard.make('leduc-holdem')
11 | human_agent = HumanAgent(env.num_actions)
12 | cfr_agent = models.load('leduc-holdem-cfr').agents[0]
13 | env.set_agents([
14 |     human_agent,
15 |     cfr_agent,
16 | ])
17 | 
18 | print(">> Leduc Hold'em pre-trained model")
19 | 
20 | while (True):
21 |     print(">> Start a new game")
22 | 
23 |     trajectories, payoffs = env.run(is_training=False)
24 |     # If the human does not take the final action, we need to
25 |     # print other players action
26 |     final_state = trajectories[0][-1]
27 |     action_record = final_state['action_record']
28 |     state = final_state['raw_obs']
29 |     _action_list = []
30 |     for i in range(1, len(action_record)+1):
31 |         if action_record[-i][0] == state['current_player']:
32 |             break
33 |         _action_list.insert(0, action_record[-i])
34 |     for pair in _action_list:
35 |         print('>> Player', pair[0], 'chooses', pair[1])
36 | 
37 |     # Let's take a look at what the agent card is
38 |     print('===============     CFR Agent    ===============')
39 |     print_card(env.get_perfect_information()['hand_cards'][1])
40 | 
41 |     print('===============     Result     ===============')
42 |     if payoffs[0] > 0:
43 |         print('You win {} chips!'.format(payoffs[0]))
44 |     elif payoffs[0] == 0:
45 |         print('It is a tie.')
46 |     else:
47 |         print('You lose {} chips!'.format(-payoffs[0]))
48 |     print('')
49 | 
50 |     input("Press any key to continue...")
51 | 


--------------------------------------------------------------------------------
/examples/human/limit_holdem_human.py:
--------------------------------------------------------------------------------
 1 | ''' A toy example of playing against a random agent on Limit Hold'em
 2 | '''
 3 | 
 4 | import rlcard
 5 | from rlcard.agents import LimitholdemHumanAgent as HumanAgent
 6 | from rlcard.agents import RandomAgent
 7 | from rlcard.utils.utils import print_card
 8 | 
 9 | # Make environment
10 | env = rlcard.make('limit-holdem')
11 | human_agent = HumanAgent(env.num_actions)
12 | agent_0 = RandomAgent(num_actions=env.num_actions)
13 | env.set_agents([
14 |     human_agent,
15 |     agent_0,
16 | ])
17 | 
18 | print(">> Limit Hold'em random agent")
19 | 
20 | while (True):
21 |     print(">> Start a new game")
22 | 
23 |     trajectories, payoffs = env.run(is_training=False)
24 |     # If the human does not take the final action, we need to
25 |     # print other players action
26 |     if len(trajectories[0]) != 0:
27 |         final_state = trajectories[0][-1]
28 |         action_record = final_state['action_record']
29 |         state = final_state['raw_obs']
30 |         _action_list = []
31 |         for i in range(1, len(action_record)+1):
32 |             """
33 |             if action_record[-i][0] == state['current_player']:
34 |                 break
35 |             """
36 |             _action_list.insert(0, action_record[-i])
37 |         for pair in _action_list:
38 |             print('>> Player', pair[0], 'chooses', pair[1])
39 | 
40 |     # Let's take a look at what the agent card is
41 |     print('=============     Random Agent    ============')
42 |     print_card(env.get_perfect_information()['hand_cards'][1])
43 | 
44 |     print('===============     Result     ===============')
45 |     if payoffs[0] > 0:
46 |         print('You win {} chips!'.format(payoffs[0]))
47 |     elif payoffs[0] == 0:
48 |         print('It is a tie.')
49 |     else:
50 |         print('You lose {} chips!'.format(-payoffs[0]))
51 |     print('')
52 | 
53 |     input("Press any key to continue...")
54 | 


--------------------------------------------------------------------------------
/examples/human/nolimit_holdem_human.py:
--------------------------------------------------------------------------------
 1 | ''' A toy example of playing against pretrianed AI on Leduc Hold'em
 2 | '''
 3 | from rlcard.agents import RandomAgent
 4 | 
 5 | import rlcard
 6 | from rlcard import models
 7 | from rlcard.agents import NolimitholdemHumanAgent as HumanAgent
 8 | from rlcard.utils import print_card
 9 | 
10 | # Make environment
11 | env = rlcard.make('no-limit-holdem')
12 | 
13 | human_agent = HumanAgent(env.num_actions)
14 | human_agent2 = HumanAgent(env.num_actions)
15 | # random_agent = RandomAgent(num_actions=env.num_actions)
16 | 
17 | env.set_agents([human_agent, human_agent2])
18 | 
19 | 
20 | while (True):
21 |     print(">> Start a new game")
22 | 
23 |     trajectories, payoffs = env.run(is_training=False)
24 |     # If the human does not take the final action, we need to
25 |     # print other players action
26 |     final_state = trajectories[0][-1]
27 |     action_record = final_state['action_record']
28 |     state = final_state['raw_obs']
29 |     _action_list = []
30 |     for i in range(1, len(action_record)+1):
31 |         if action_record[-i][0] == state['current_player']:
32 |             break
33 |         _action_list.insert(0, action_record[-i])
34 |     for pair in _action_list:
35 |         print('>> Player', pair[0], 'chooses', pair[1])
36 | 
37 |     # Let's take a look at what the agent card is
38 |     print('===============     Cards all Players    ===============')
39 |     for hands in env.get_perfect_information()['hand_cards']:
40 |         print_card(hands)
41 | 
42 |     print('===============     Result     ===============')
43 |     if payoffs[0] > 0:
44 |         print('You win {} chips!'.format(payoffs[0]))
45 |     elif payoffs[0] == 0:
46 |         print('It is a tie.')
47 |     else:
48 |         print('You lose {} chips!'.format(-payoffs[0]))
49 |     print('')
50 | 
51 |     input("Press any key to continue...")
52 | 


--------------------------------------------------------------------------------
/examples/human/uno_human.py:
--------------------------------------------------------------------------------
 1 | ''' A toy example of playing against rule-based bot on UNO
 2 | '''
 3 | 
 4 | import rlcard
 5 | from rlcard import models
 6 | from rlcard.agents.human_agents.uno_human_agent import HumanAgent, _print_action
 7 | 
 8 | # Make environment
 9 | env = rlcard.make('uno')
10 | human_agent = HumanAgent(env.num_actions)
11 | cfr_agent = models.load('uno-rule-v1').agents[0]
12 | env.set_agents([
13 |     human_agent,
14 |     cfr_agent,
15 | ])
16 | 
17 | print(">> UNO rule model V1")
18 | 
19 | while (True):
20 |     print(">> Start a new game")
21 | 
22 |     trajectories, payoffs = env.run(is_training=False)
23 |     # If the human does not take the final action, we need to
24 |     # print other players action
25 |     final_state = trajectories[0][-1]
26 |     action_record = final_state['action_record']
27 |     state = final_state['raw_obs']
28 |     _action_list = []
29 |     for i in range(1, len(action_record)+1):
30 |         if action_record[-i][0] == state['current_player']:
31 |             break
32 |         _action_list.insert(0, action_record[-i])
33 |     for pair in _action_list:
34 |         print('>> Player', pair[0], 'chooses ', end='')
35 |         _print_action(pair[1])
36 |         print('')
37 | 
38 |     print('===============     Result     ===============')
39 |     if payoffs[0] > 0:
40 |         print('You win!')
41 |     else:
42 |         print('You lose!')
43 |     print('')
44 |     input("Press any key to continue...")
45 | 


--------------------------------------------------------------------------------
/examples/pettingzoo/README.md:
--------------------------------------------------------------------------------
 1 | # Train agents on PettingZoo Environments
 2 | 
 3 | RLCard environments are also wrapped by [PettingZoo](https://www.pettingzoo.ml/) which
 4 | implements the Agent Environment Cycle (AEC) games model. PettingZoo is a library with 
 5 | diverse sets of multi-agent environments, developed with the goal of accelerating
 6 | research in Multi-Agent Reinforcement Learning (MARL).
 7 | 
 8 | ## Setup
 9 | 
10 | First install PettingZoo with classic games. 
11 | 
12 | ```bash
13 | pip3 install pettingzoo[classic]
14 | ```
15 | 
16 | PettingZoo has RLCard as a dependency, so if you already have RLCard installed in your 
17 | Python environment, it may get replaced by the version required by PettingZoo, so
18 | you may need to re-install it.
19 | 
20 | ## Train Agents
21 | 
22 | Training scripts for DQN, NFSP, and DMC are provided. The following trains a DQN agent
23 | on the Leduc Holdem environment:
24 | 
25 | ```bash
26 | python run_rl.py
27 | ```
28 | 


--------------------------------------------------------------------------------
/examples/pettingzoo/run_dmc.py:
--------------------------------------------------------------------------------
  1 | ''' An example of training a Deep Monte-Carlo (DMC) Agent on PettingZoo environments
  2 | wrapping RLCard
  3 | '''
  4 | import os
  5 | import argparse
  6 | 
  7 | from pettingzoo.classic import (
  8 |     leduc_holdem_v4,
  9 |     texas_holdem_v4,
 10 |     dou_dizhu_v4,
 11 |     mahjong_v4,
 12 |     texas_holdem_no_limit_v6,
 13 |     uno_v4,
 14 |     gin_rummy_v4,
 15 | )
 16 | 
 17 | from rlcard.agents.dmc_agent import DMCTrainer
 18 | 
 19 | 
 20 | env_name_to_env_func = {
 21 |     "leduc-holdem": leduc_holdem_v4,
 22 |     "limit-holdem": texas_holdem_v4,
 23 |     "doudizhu": dou_dizhu_v4,
 24 |     "mahjong": mahjong_v4,
 25 |     "no-limit-holdem": texas_holdem_no_limit_v6,
 26 |     "uno": uno_v4,
 27 |     "gin-rummy": gin_rummy_v4,
 28 | }
 29 | 
 30 | 
 31 | def train(args):
 32 |     # Make the environment
 33 |     env_func = env_name_to_env_func[args.env]
 34 |     env = env_func.env()
 35 |     env.reset()
 36 | 
 37 |     # Initialize the DMC trainer
 38 |     trainer = DMCTrainer(
 39 |         env,
 40 |         is_pettingzoo_env=True,
 41 |         load_model=args.load_model,
 42 |         xpid=args.xpid,
 43 |         savedir=args.savedir,
 44 |         save_interval=args.save_interval,
 45 |         num_actor_devices=args.num_actor_devices,
 46 |         num_actors=args.num_actors,
 47 |         training_device=args.training_device,
 48 |         total_frames=args.total_frames,
 49 |     )
 50 | 
 51 |     # Train DMC Agents
 52 |     trainer.start()
 53 | 
 54 | if __name__ == '__main__':
 55 |     parser = argparse.ArgumentParser("DMC example in RLCard")
 56 |     parser.add_argument(
 57 |         '--env',
 58 |         type=str,
 59 |         default='leduc-holdem',
 60 |         choices=[
 61 |             'blackjack',
 62 |             'leduc-holdem',
 63 |             'limit-holdem',
 64 |             'doudizhu',
 65 |             'mahjong',
 66 |             'no-limit-holdem',
 67 |             'uno', 
 68 |             'gin-rummy',
 69 |         ]
 70 |     )
 71 |     parser.add_argument(
 72 |         '--cuda',
 73 |         type=str,
 74 |         default='',
 75 |     )
 76 |     parser.add_argument(
 77 |         '--load_model',
 78 |         action='store_true',
 79 |         help='Load an existing model',
 80 |     )
 81 |     parser.add_argument(
 82 |         '--xpid',
 83 |         default='leduc_holdem',
 84 |         help='Experiment id (default: leduc_holdem)',
 85 |     )
 86 |     parser.add_argument(
 87 |         '--savedir',
 88 |         default='experiments/dmc_result',
 89 |         help='Root dir where experiment data will be saved',
 90 |     )
 91 |     parser.add_argument(
 92 |         '--save_interval',
 93 |         default=30,
 94 |         type=int,
 95 |         help='Time interval (in minutes) at which to save the model',
 96 |     )
 97 |     parser.add_argument(
 98 |         '--num_actor_devices',
 99 |         default=1,
100 |         type=int,
101 |         help='The number of devices used for simulation',
102 |     )
103 |     parser.add_argument(
104 |         '--num_actors',
105 |         default=5,
106 |         type=int,
107 |         help='The number of actors for each simulation device',
108 |     )
109 |     parser.add_argument(
110 |         '--total_frames',
111 |         default=1e11,
112 |         type=int,
113 |         help='The total number of frames to train for',
114 |     )
115 |     parser.add_argument(
116 |         '--training_device',
117 |         default=0,
118 |         type=int,
119 |         help='The index of the GPU used for training models',
120 |     )
121 | 
122 |     args = parser.parse_args()
123 | 
124 |     os.environ["CUDA_VISIBLE_DEVICES"] = args.cuda
125 |     train(args)
126 | 
127 | 


--------------------------------------------------------------------------------
/examples/run_cfr.py:
--------------------------------------------------------------------------------
  1 | ''' An example of solve Leduc Hold'em with CFR (chance sampling)
  2 | '''
  3 | import os
  4 | import argparse
  5 | 
  6 | import rlcard
  7 | from rlcard.agents import (
  8 |     CFRAgent,
  9 |     RandomAgent,
 10 | )
 11 | from rlcard.utils import (
 12 |     set_seed,
 13 |     tournament,
 14 |     Logger,
 15 |     plot_curve,
 16 | )
 17 | 
 18 | def train(args):
 19 |     # Make environments, CFR only supports Leduc Holdem
 20 |     env = rlcard.make(
 21 |         'leduc-holdem',
 22 |         config={
 23 |             'seed': 0,
 24 |             'allow_step_back': True,
 25 |         }
 26 |     )
 27 |     eval_env = rlcard.make(
 28 |         'leduc-holdem',
 29 |         config={
 30 |             'seed': 0,
 31 |         }
 32 |     )
 33 | 
 34 |     # Seed numpy, torch, random
 35 |     set_seed(args.seed)
 36 | 
 37 |     # Initilize CFR Agent
 38 |     agent = CFRAgent(
 39 |         env,
 40 |         os.path.join(
 41 |             args.log_dir,
 42 |             'cfr_model',
 43 |         ),
 44 |     )
 45 |     agent.load()  # If we have saved model, we first load the model
 46 | 
 47 |     # Evaluate CFR against random
 48 |     eval_env.set_agents([
 49 |         agent,
 50 |         RandomAgent(num_actions=env.num_actions),
 51 |     ])
 52 | 
 53 |     # Start training
 54 |     with Logger(args.log_dir) as logger:
 55 |         for episode in range(args.num_episodes):
 56 |             agent.train()
 57 |             print('\rIteration {}'.format(episode), end='')
 58 |             # Evaluate the performance. Play with Random agents.
 59 |             if episode % args.evaluate_every == 0:
 60 |                 agent.save() # Save model
 61 |                 logger.log_performance(
 62 |                     episode,
 63 |                     tournament(
 64 |                         eval_env,
 65 |                         args.num_eval_games
 66 |                     )[0]
 67 |                 )
 68 | 
 69 |         # Get the paths
 70 |         csv_path, fig_path = logger.csv_path, logger.fig_path
 71 |     # Plot the learning curve
 72 |     plot_curve(csv_path, fig_path, 'cfr')
 73 | 
 74 | if __name__ == '__main__':
 75 |     parser = argparse.ArgumentParser("CFR example in RLCard")
 76 |     parser.add_argument(
 77 |         '--seed',
 78 |         type=int,
 79 |         default=42,
 80 |     )
 81 |     parser.add_argument(
 82 |         '--num_episodes',
 83 |         type=int,
 84 |         default=5000,
 85 |     )
 86 |     parser.add_argument(
 87 |         '--num_eval_games',
 88 |         type=int,
 89 |         default=2000,
 90 |     )
 91 |     parser.add_argument(
 92 |         '--evaluate_every',
 93 |         type=int,
 94 |         default=100,
 95 |     )
 96 |     parser.add_argument(
 97 |         '--log_dir',
 98 |         type=str,
 99 |         default='experiments/leduc_holdem_cfr_result/',
100 |     )
101 | 
102 |     args = parser.parse_args()
103 | 
104 |     train(args)
105 |     
106 | 


--------------------------------------------------------------------------------
/examples/run_dmc.py:
--------------------------------------------------------------------------------
 1 | ''' An example of training a Deep Monte-Carlo (DMC) Agent on the environments in RLCard
 2 | '''
 3 | import os
 4 | import argparse
 5 | 
 6 | import torch
 7 | 
 8 | import rlcard
 9 | from rlcard.agents.dmc_agent import DMCTrainer
10 | 
11 | def train(args):
12 | 
13 |     # Make the environment
14 |     env = rlcard.make(args.env)
15 | 
16 |     # Initialize the DMC trainer
17 |     trainer = DMCTrainer(
18 |         env,
19 |         cuda=args.cuda,
20 |         load_model=args.load_model,
21 |         xpid=args.xpid,
22 |         savedir=args.savedir,
23 |         save_interval=args.save_interval,
24 |         num_actor_devices=args.num_actor_devices,
25 |         num_actors=args.num_actors,
26 |         training_device=args.training_device,
27 |     )
28 | 
29 |     # Train DMC Agents
30 |     trainer.start()
31 | 
32 | if __name__ == '__main__':
33 |     parser = argparse.ArgumentParser("DMC example in RLCard")
34 |     parser.add_argument(
35 |         '--env',
36 |         type=str,
37 |         default='leduc-holdem',
38 |         choices=[
39 |             'blackjack',
40 |             'leduc-holdem',
41 |             'limit-holdem',
42 |             'doudizhu',
43 |             'mahjong',
44 |             'no-limit-holdem',
45 |             'uno',
46 |             'gin-rummy'
47 |         ],
48 |     )
49 |     parser.add_argument(
50 |         '--cuda',
51 |         type=str,
52 |         default='',
53 |     )
54 |     parser.add_argument(
55 |         '--load_model',
56 |         action='store_true',
57 |         help='Load an existing model',
58 |     )
59 |     parser.add_argument(
60 |         '--xpid',
61 |         default='leduc_holdem',
62 |         help='Experiment id (default: leduc_holdem)',
63 |     )
64 |     parser.add_argument(
65 |         '--savedir',
66 |         default='experiments/dmc_result',
67 |         help='Root dir where experiment data will be saved'
68 |     )
69 |     parser.add_argument(
70 |         '--save_interval',
71 |         default=30,
72 |         type=int,
73 |         help='Time interval (in minutes) at which to save the model',
74 |     )
75 |     parser.add_argument(
76 |         '--num_actor_devices',
77 |         default=1,
78 |         type=int,
79 |         help='The number of devices used for simulation',
80 |     )
81 |     parser.add_argument(
82 |         '--num_actors',
83 |         default=5,
84 |         type=int,
85 |         help='The number of actors for each simulation device',
86 |     )
87 |     parser.add_argument(
88 |         '--training_device',
89 |         default="0",
90 |         type=str,
91 |         help='The index of the GPU used for training models',
92 |     )
93 | 
94 |     args = parser.parse_args()
95 | 
96 |     os.environ["CUDA_VISIBLE_DEVICES"] = args.cuda
97 |     train(args)
98 | 
99 | 


--------------------------------------------------------------------------------
/examples/run_random.py:
--------------------------------------------------------------------------------
 1 | ''' An example of playing randomly in RLCard
 2 | '''
 3 | import argparse
 4 | import pprint
 5 | 
 6 | import rlcard
 7 | from rlcard.agents import RandomAgent
 8 | from rlcard.utils import set_seed
 9 | 
10 | def run(args):
11 |     # Make environment
12 |     env = rlcard.make(
13 |         args.env,
14 |         config={
15 |             'seed': 42,
16 |         }
17 |     )
18 | 
19 |     # Seed numpy, torch, random
20 |     set_seed(42)
21 | 
22 |     # Set agents
23 |     agent = RandomAgent(num_actions=env.num_actions)
24 |     env.set_agents([agent for _ in range(env.num_players)])
25 | 
26 |     # Generate data from the environment
27 |     trajectories, player_wins = env.run(is_training=False)
28 |     # Print out the trajectories
29 |     print('\nTrajectories:')
30 |     print(trajectories)
31 |     print('\nSample raw observation:')
32 |     pprint.pprint(trajectories[0][0]['raw_obs'])
33 |     print('\nSample raw legal_actions:')
34 |     pprint.pprint(trajectories[0][0]['raw_legal_actions'])
35 | 
36 | if __name__ == '__main__':
37 |     parser = argparse.ArgumentParser("Random example in RLCard")
38 |     parser.add_argument(
39 |         '--env',
40 |         type=str,
41 |         default='leduc-holdem',
42 |         choices=[
43 |             'blackjack',
44 |             'leduc-holdem',
45 |             'limit-holdem',
46 |             'doudizhu',
47 |             'mahjong',
48 |             'no-limit-holdem',
49 |             'uno',
50 |             'gin-rummy',
51 |             'bridge',
52 |         ],
53 |     )
54 | 
55 |     args = parser.parse_args()
56 | 
57 |     run(args)
58 | 
59 | 


--------------------------------------------------------------------------------
/examples/scripts/dmc_doudizhu_1_gpu.sh:
--------------------------------------------------------------------------------
1 | python3 examples/run_dmc.py --env doudizhu --xpid doudizhu --cuda 0 --num_actor_devices 1 --training_device 0 --num_actors 8 --save_interval 30
2 | 


--------------------------------------------------------------------------------
/examples/scripts/dmc_doudizhu_4_gpu.sh:
--------------------------------------------------------------------------------
1 | python3 examples/run_dmc.py --env doudizhu --xpid doudizhu --cuda 0,1,2,3 --num_actor_devices 3 --training_device 3 --num_actors 8 --save_interval 30
2 | 


--------------------------------------------------------------------------------
/rlcard/__init__.py:
--------------------------------------------------------------------------------
1 | name = "rlcard"
2 | __version__ = "1.2.0"
3 | 
4 | from rlcard.envs import make
5 | 


--------------------------------------------------------------------------------
/rlcard/agents/__init__.py:
--------------------------------------------------------------------------------
 1 | import subprocess
 2 | import sys
 3 | from distutils.version import LooseVersion
 4 | 
 5 | reqs = subprocess.check_output([sys.executable, '-m', 'pip', 'freeze'])
 6 | installed_packages = [r.decode().split('==')[0] for r in reqs.split()]
 7 | 
 8 | if 'torch' in installed_packages:
 9 |     from rlcard.agents.dqn_agent import DQNAgent as DQNAgent
10 |     from rlcard.agents.nfsp_agent import NFSPAgent as NFSPAgent
11 | 
12 | from rlcard.agents.cfr_agent import CFRAgent
13 | from rlcard.agents.human_agents.limit_holdem_human_agent import HumanAgent as LimitholdemHumanAgent
14 | from rlcard.agents.human_agents.nolimit_holdem_human_agent import HumanAgent as NolimitholdemHumanAgent
15 | from rlcard.agents.human_agents.leduc_holdem_human_agent import HumanAgent as LeducholdemHumanAgent
16 | from rlcard.agents.human_agents.blackjack_human_agent import HumanAgent as BlackjackHumanAgent
17 | from rlcard.agents.human_agents.uno_human_agent import HumanAgent as UnoHumanAgent
18 | from rlcard.agents.random_agent import RandomAgent
19 | 


--------------------------------------------------------------------------------
/rlcard/agents/dmc_agent/__init__.py:
--------------------------------------------------------------------------------
1 | from .trainer import DMCTrainer
2 | 


--------------------------------------------------------------------------------
/rlcard/agents/dmc_agent/pettingzoo_model.py:
--------------------------------------------------------------------------------
 1 | from collections import OrderedDict
 2 | 
 3 | from .model import DMCAgent
 4 | from rlcard.utils.pettingzoo_utils import wrap_state
 5 | 
 6 | 
 7 | class DMCAgentPettingZoo(DMCAgent):
 8 |     def step(self, state):
 9 |         return super().step(wrap_state(state))
10 | 
11 |     def eval_step(self, state):
12 |         return super().eval_step(wrap_state(state))
13 | 
14 |     def feed(self, ts):
15 |         state, action, reward, next_state, done = tuple(ts)
16 |         state = wrap_state(state)
17 |         next_state = wrap_state(next_state)
18 |         ts = (state, action, reward, next_state, done)
19 |         return super().feed(ts)
20 | 
21 | 
22 | class DMCModelPettingZoo:
23 |     def __init__(
24 |         self,
25 |         env,
26 |         mlp_layers=[512,512,512,512,512],
27 |         exp_epsilon=0.01,
28 |         device="0"
29 |     ):
30 |         self.agents = OrderedDict()
31 |         for agent_name in env.agents:
32 |             agent = DMCAgentPettingZoo(
33 |                 env.observation_space(agent_name)["observation"].shape,
34 |                 (env.action_space(agent_name).n,),
35 |                 mlp_layers,
36 |                 exp_epsilon,
37 |                 device,
38 |             )
39 |             self.agents[agent_name] = agent
40 | 
41 |     def share_memory(self):
42 |         for agent in self.agents.values():
43 |             agent.share_memory()
44 | 
45 |     def eval(self):
46 |         for agent in self.agents.values():
47 |             agent.eval()
48 | 
49 |     def parameters(self, index):
50 |         return list(self.agents.values())[index].parameters()
51 | 
52 |     def get_agent(self, index):
53 |         return list(self.agents.values())[index]
54 | 
55 |     def get_agents(self):
56 |         return list(self.agents.values())
57 | 


--------------------------------------------------------------------------------
/rlcard/agents/human_agents/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/datamllab/rlcard/d7d0a957baf4cc7225a50522adb0164bf130a9d0/rlcard/agents/human_agents/__init__.py


--------------------------------------------------------------------------------
/rlcard/agents/human_agents/blackjack_human_agent.py:
--------------------------------------------------------------------------------
 1 | from rlcard.utils.utils import print_card
 2 | 
 3 | 
 4 | class HumanAgent(object):
 5 |     ''' A human agent for Blackjack. It can be used to play alone for understand how the blackjack code runs
 6 |     '''
 7 | 
 8 |     def __init__(self, num_actions):
 9 |         ''' Initilize the human agent
10 | 
11 |         Args:
12 |             num_actions (int): the size of the output action space
13 |         '''
14 |         self.use_raw = True
15 |         self.num_actions = num_actions
16 | 
17 |     @staticmethod
18 |     def step(state):
19 |         ''' Human agent will display the state and make decisions through interfaces
20 | 
21 |         Args:
22 |             state (dict): A dictionary that represents the current state
23 | 
24 |         Returns:
25 |             action (int): The action decided by human
26 |         '''
27 |         _print_state(state['raw_obs'], state['raw_legal_actions'], state['action_record'])
28 |         action = int(input('>> You choose action (integer): '))
29 |         while action < 0 or action >= len(state['legal_actions']):
30 |             print('Action illegal...')
31 |             action = int(input('>> Re-choose action (integer): '))
32 |         return state['raw_legal_actions'][action]
33 | 
34 |     def eval_step(self, state):
35 |         ''' Predict the action given the current state for evaluation. The same to step here.
36 | 
37 |         Args:
38 |             state (numpy.array): an numpy array that represents the current state
39 | 
40 |         Returns:
41 |             action (int): the action predicted (randomly chosen) by the random agent
42 |         '''
43 |         return self.step(state), {}
44 | 
45 | def _print_state(state, raw_legal_actions, action_record):
46 |     ''' Print out the state
47 | 
48 |     Args:
49 |         state (dict): A dictionary of the raw state
50 |         action_record (list): A list of the each player's historical actions
51 |     '''
52 |     _action_list = []
53 |     for i in range(1, len(action_record)+1):
54 |         _action_list.insert(0, action_record[-i])
55 |     for pair in _action_list:
56 |         print('>> Player', pair[0], 'chooses', pair[1])
57 | 
58 |     print('\n=============   Dealer Hand   ===============')
59 |     print_card(state['dealer hand'])
60 | 
61 |     num_players = len(state) - 3
62 | 
63 |     for i in range(num_players):
64 |         print('===============   Player {} Hand   ==============='.format(i))
65 |         print_card(state['player' + str(i) + ' hand'])
66 | 
67 |     print('\n=========== Actions You Can Choose ===========')
68 |     print(', '.join([str(index) + ': ' + action for index, action in enumerate(raw_legal_actions)]))
69 |     print('')
70 | 


--------------------------------------------------------------------------------
/rlcard/agents/human_agents/gin_rummy_human_agent/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/datamllab/rlcard/d7d0a957baf4cc7225a50522adb0164bf130a9d0/rlcard/agents/human_agents/gin_rummy_human_agent/__init__.py


--------------------------------------------------------------------------------
/rlcard/agents/human_agents/gin_rummy_human_agent/gin_rummy_human_agent.py:
--------------------------------------------------------------------------------
 1 | '''
 2 |     Project: Gui Gin Rummy
 3 |     File name: gin_rummy_human_agent.py
 4 |     Author: William Hale
 5 |     Date created: 3/14/2020
 6 | '''
 7 | 
 8 | import time
 9 | 
10 | from rlcard.games.gin_rummy.utils.action_event import ActionEvent
11 | from rlcard.games.gin_rummy.utils.gin_rummy_error import GinRummyProgramError
12 | 
13 | 
14 | class HumanAgent(object):
15 |     ''' A human agent for Gin Rummy. It can be used to play against trained models.
16 |     '''
17 | 
18 |     def __init__(self, num_actions):
19 |         ''' Initialize the human agent
20 | 
21 |         Args:
22 |             num_actions (int): the size of the output action space
23 |         '''
24 |         self.use_raw = True
25 |         self.num_actions = num_actions
26 |         self.is_choosing_action_id = False
27 |         self.chosen_action_id = None  # type: int or None
28 |         self.state = None
29 | 
30 |     def step(self, state):
31 |         ''' Human agent will display the state and make decisions through interfaces
32 | 
33 |         Args:
34 |             state (dict): A dictionary that represents the current state
35 | 
36 |         Returns:
37 |             action (int): The action decided by human
38 |         '''
39 |         if self.is_choosing_action_id:
40 |             raise GinRummyProgramError("self.is_choosing_action_id must be False.")
41 |         if self.state is not None:
42 |             raise GinRummyProgramError("self.state must be None.")
43 |         if self.chosen_action_id is not None:
44 |             raise GinRummyProgramError("self.chosen_action_id={} must be None.".format(self.chosen_action_id))
45 |         self.state = state
46 |         self.is_choosing_action_id = True
47 |         while not self.chosen_action_id:
48 |             time.sleep(0.001)
49 |         if self.chosen_action_id is None:
50 |             raise GinRummyProgramError("self.chosen_action_id cannot be None.")
51 |         chosen_action_event = ActionEvent.decode_action(action_id=self.chosen_action_id)
52 |         self.state = None
53 |         self.is_choosing_action_id = False
54 |         self.chosen_action_id = None
55 |         return chosen_action_event
56 | 
57 |     def eval_step(self, state):
58 |         ''' Predict the action given the current state for evaluation. The same to step here.
59 | 
60 |         Args:
61 |             state (numpy.array): an numpy array that represents the current state
62 | 
63 |         Returns:
64 |             action (int): the action predicted (randomly chosen) by the random agent
65 |         '''
66 |         return self.step(state), {}
67 | 


--------------------------------------------------------------------------------
/rlcard/agents/human_agents/gin_rummy_human_agent/gui_cards/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/datamllab/rlcard/d7d0a957baf4cc7225a50522adb0164bf130a9d0/rlcard/agents/human_agents/gin_rummy_human_agent/gui_cards/__init__.py


--------------------------------------------------------------------------------
/rlcard/agents/human_agents/gin_rummy_human_agent/gui_gin_rummy/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/datamllab/rlcard/d7d0a957baf4cc7225a50522adb0164bf130a9d0/rlcard/agents/human_agents/gin_rummy_human_agent/gui_gin_rummy/__init__.py


--------------------------------------------------------------------------------
/rlcard/agents/human_agents/gin_rummy_human_agent/gui_gin_rummy/canvas_item.py:
--------------------------------------------------------------------------------
 1 | '''
 2 |     Project: Gui Gin Rummy
 3 |     File name: canvas_item.py
 4 |     Author: William Hale
 5 |     Date created: 3/14/2020
 6 | '''
 7 | 
 8 | # from __future__ import annotations
 9 | from typing import TYPE_CHECKING
10 | if TYPE_CHECKING:
11 |     from rlcard.agents.gin_rummy_human_agent.gui_gin_rummy.game_canvas import GameCanvas
12 |     from rlcard.agents.gin_rummy_human_agent.gui_cards.card_image import CardImage
13 | 
14 | 
15 | class CanvasItem(object):
16 | 
17 |     def __init__(self, item_id: int, game_canvas: 'GameCanvas'):
18 |         self.item_id = item_id
19 |         self.game_canvas = game_canvas
20 | 
21 |     def __eq__(self, other):
22 |         if isinstance(other, int):  # FIXME: temporary kludge to convert all item_id to CanvasItem
23 |             return other == self.item_id
24 |         return isinstance(other, CanvasItem) and self.item_id == other.item_id
25 | 
26 |     def __hash__(self):
27 |         return hash(self.item_id)
28 | 
29 |     def get_tags(self):
30 |         return self.game_canvas.gettags(self.item_id)
31 | 
32 | 
33 | class CardItem(CanvasItem):
34 | 
35 |     def __init__(self, item_id: int, card_id: int, card_image: 'CardImage', game_canvas: 'GameCanvas'):
36 |         super().__init__(item_id=item_id, game_canvas=game_canvas)
37 |         self.card_id = card_id
38 |         self.card_image = card_image
39 | 
40 |     def is_face_up(self) -> bool:
41 |         return self.card_image.face_up
42 | 
43 |     def set_card_id_face_up(self, face_up: bool):
44 |         if self.card_image.face_up != face_up:
45 |             target_image = self.card_image if face_up else self.game_canvas.card_back_image
46 |             self.game_canvas.itemconfig(self.item_id, image=target_image)
47 |             self.card_image.face_up = face_up
48 | 
49 |     def flip_over(self):
50 |         self.card_image.face_up = not self.card_image.face_up
51 |         target_image = self.card_image if self.card_image.face_up else self.game_canvas.card_back_image
52 |         self.game_canvas.itemconfig(self.item_id, image=target_image)
53 | 


--------------------------------------------------------------------------------
/rlcard/agents/human_agents/gin_rummy_human_agent/gui_gin_rummy/configurations.py:
--------------------------------------------------------------------------------
 1 | '''
 2 |     Project: Gui Gin Rummy
 3 |     File name: configurations.py
 4 |     Author: William Hale
 5 |     Date created: 3/14/2020
 6 | '''
 7 | 
 8 | import os
 9 | 
10 | from configparser import ConfigParser
11 | 
12 | #
13 | #   Gin Rummy parameters
14 | #
15 | 
16 | GOING_OUT_DEADWOOD_COUNT = 10
17 | 
18 | #
19 | #   RLCard Gin Rummy parameters
20 | #
21 | 
22 | MAX_DRAWN_CARD_COUNT = 52
23 | 
24 | DISCARD_PILE_TAG = "discard_pile"
25 | STOCK_PILE_TAG = "stock_pile"
26 | NORTH_HELD_PILE_TAG = "north_held_pile"
27 | SOUTH_HELD_PILE_TAG = "south_held_pile"
28 | PLAYER_HELD_PILE_TAGS = [NORTH_HELD_PILE_TAG, SOUTH_HELD_PILE_TAG]
29 | 
30 | DRAWN_TAG = "drawn"
31 | JOGGED_TAG = "jogged"
32 | SELECTED_TAG = "selected"
33 | 
34 | SCORE_PLAYER_0_ACTION_ID = 0
35 | SCORE_PLAYER_1_ACTION_ID = 1
36 | DRAW_CARD_ACTION_ID = 2
37 | PICK_UP_DISCARD_ACTION_ID = 3
38 | DECLARE_DEAD_HAND_ACTION_ID = 4
39 | GIN_ACTION_ID = 5
40 | DISCARD_ACTION_ID = 6
41 | KNOCK_ACTION_ID = DISCARD_ACTION_ID + 52
42 | 
43 | #
44 | #   Not User Modifiable Options
45 | #
46 | 
47 | IS_KEEP_TURN_WHEN_DISCARDING_CARD_PICKED_UP = False  # TODO: make True the default value
48 | 
49 | #
50 | #   User Modifiable Options
51 | #
52 | 
53 | config_path = os.path.join(os.path.abspath(os.path.dirname(__file__)), 'game_options.ini')  # Note this
54 | 
55 | config = ConfigParser()
56 | found = config.read(config_path)
57 | 
58 | # settings section
59 | settings_section = "settings"
60 | show_status_messages_option = "show_status_messages"
61 | warning_as_option = 'warning_as'
62 | game_background_color_option = 'game_background_color'
63 | window_size_factor_option = 'window_size_factor'
64 | is_show_tips_option = "is_show_tips"
65 | is_debug_option = "is_debug"
66 | 
67 | SHOW_STATUS_MESSAGES = config.get(section=settings_section, option=show_status_messages_option, fallback="verbose")
68 | WARNINGS_AS = config.get(section=settings_section, option=warning_as_option, fallback="alert_messages")
69 | GAME_BACKGROUND_COLOR = config.get(section=settings_section, option=game_background_color_option, fallback="#007F00")
70 | WINDOW_SIZE_FACTOR = config.getint(section=settings_section, option=window_size_factor_option, fallback=75)
71 | IS_SHOW_TIPS = config.getboolean(section=settings_section, option=is_show_tips_option, fallback=True)
72 | # Note: IS_DEBUG always starts off as False; must explicitly update via preference window
73 | # IS_DEBUG = config.getboolean(section=settings_section, option=is_debug_option, fallback=False)
74 | IS_DEBUG = False
75 | 


--------------------------------------------------------------------------------
/rlcard/agents/human_agents/gin_rummy_human_agent/gui_gin_rummy/game_app.py:
--------------------------------------------------------------------------------
 1 | '''
 2 |     Project: Gui Gin Rummy
 3 |     File name: game_app.py
 4 |     Author: William Hale
 5 |     Date created: 3/14/2020
 6 | '''
 7 | 
 8 | # from __future__ import annotations
 9 | from typing import TYPE_CHECKING
10 | if TYPE_CHECKING:
11 |     from rlcard.envs.gin_rummy import GinRummyEnv
12 | 
13 | from typing import Callable
14 | 
15 | import tkinter as tk
16 | 
17 | import rlcard
18 | 
19 | from rlcard.agents.random_agent import RandomAgent
20 | 
21 | from ..gin_rummy_human_agent import HumanAgent
22 | 
23 | from .game_frame import GameFrame
24 | from .menu_bar import MenuBar
25 | 
26 | 
27 | class GameApp(object):
28 | 
29 |     def __init__(self, make_gin_rummy_env: Callable[[], 'GinRummyEnv'] = None):
30 |         self.make_gin_rummy_env = make_gin_rummy_env if make_gin_rummy_env else GameApp._make_gin_rummy_env
31 |         root = tk.Tk()
32 |         root.resizable(False, False)
33 |         self.game_frame = GameFrame(root=root, game_app=self)
34 |         self.menu_bar = MenuBar(root, game_frame=self.game_frame)
35 |         root.mainloop()
36 | 
37 |     @staticmethod
38 |     def _make_gin_rummy_env() -> 'GinRummyEnv':
39 |         gin_rummy_env = rlcard.make('gin-rummy')
40 |         north_agent = RandomAgent(num_actions=gin_rummy_env.num_actions)
41 |         south_agent = HumanAgent(gin_rummy_env.num_actions)
42 |         gin_rummy_env.set_agents([north_agent, south_agent])
43 |         return gin_rummy_env
44 | 


--------------------------------------------------------------------------------
/rlcard/agents/human_agents/gin_rummy_human_agent/gui_gin_rummy/game_canvas_debug.py:
--------------------------------------------------------------------------------
 1 | '''
 2 |     Project: Gui Gin Rummy
 3 |     File name: game_canvas_debug.py
 4 |     Author: William Hale
 5 |     Date created: 3/14/2020
 6 | '''
 7 | 
 8 | # from __future__ import annotations
 9 | from typing import TYPE_CHECKING
10 | if TYPE_CHECKING:
11 |     from .game_canvas import GameCanvas
12 | 
13 | from . import configurations
14 | 
15 | from rlcard.games.gin_rummy.player import GinRummyPlayer
16 | 
17 | import rlcard.games.gin_rummy.utils.utils as gin_rummy_utils
18 | 
19 | 
20 | class GameCanvasDebug(object):
21 | 
22 |     def __init__(self, game_canvas: 'GameCanvas'):
23 |         self.game_canvas = game_canvas
24 | 
25 |     def get_card_name(self, card_item_id: int) -> str:
26 |         card_id = self.game_canvas.card_item_ids.index(card_item_id)
27 |         card = gin_rummy_utils.card_from_card_id(card_id=card_id)
28 |         return str(card)
29 | 
30 |     def description(self):
31 |         game_canvas = self.game_canvas
32 |         card_name = self.get_card_name
33 |         dealer_id = game_canvas.dealer_id
34 |         current_player_id = game_canvas.current_player_id
35 |         stock_pile_item_ids = game_canvas.find_withtag(configurations.STOCK_PILE_TAG)
36 |         discard_pile_items = game_canvas.find_withtag(configurations.DISCARD_PILE_TAG)
37 |         north_held_pile_item_ids = game_canvas.getter.get_held_pile_item_ids(player_id=0)
38 |         south_held_pile_item_ids = game_canvas.getter.get_held_pile_item_ids(player_id=1)
39 |         lines = []
40 |         lines.append("dealer: {}".format(GinRummyPlayer.short_name_of(player_id=dealer_id)))
41 |         lines.append("current_player: {}".format(GinRummyPlayer.short_name_of(player_id=current_player_id)))
42 |         lines.append("north hand: {}".format([card_name(card_item_id) for card_item_id in north_held_pile_item_ids]))
43 |         lines.append("stockpile: {}".format([card_name(card_item_id) for card_item_id in stock_pile_item_ids]))
44 |         lines.append("discard pile: {}".format([card_name(card_item_id) for card_item_id in discard_pile_items]))
45 |         lines.append("south hand: {}".format([card_name(card_item_id) for card_item_id in south_held_pile_item_ids]))
46 |         return "\n".join(lines)
47 | 


--------------------------------------------------------------------------------
/rlcard/agents/human_agents/gin_rummy_human_agent/gui_gin_rummy/game_options.ini:
--------------------------------------------------------------------------------
1 | [settings]
2 | show_status_messages = verbose
3 | warning_as = alert messages
4 | game_background_color = #007f00
5 | window_size_factor = 75
6 | is_debug = True
7 | is_show_tips = True
8 | 
9 | 


--------------------------------------------------------------------------------
/rlcard/agents/human_agents/gin_rummy_human_agent/gui_gin_rummy/handling_tap.py:
--------------------------------------------------------------------------------
 1 | '''
 2 |     Project: Gui Gin Rummy
 3 |     File name: handling_tap.py
 4 |     Author: William Hale
 5 |     Date created: 3/14/2020
 6 | '''
 7 | 
 8 | # from __future__ import annotations
 9 | from typing import TYPE_CHECKING
10 | if TYPE_CHECKING:
11 |     from .game_canvas import GameCanvas
12 | 
13 | from . import configurations
14 | from . import starting_new_game
15 | 
16 | from .canvas_item import CanvasItem
17 | from .handling_tap_stock_pile import handle_tap_stock_pile
18 | from .handling_tap_discard_pile import handle_tap_discard_pile
19 | from .handling_tap_held_pile import handle_tap_held_pile
20 | from .handling_tap_player_pane import handle_tap_player_pane
21 | 
22 | from rlcard.games.gin_rummy.utils.gin_rummy_error import GinRummyProgramError
23 | 
24 | 
25 | def on_game_canvas_tap(event):
26 |     widget = event.widget
27 |     hit_item_ids = widget.find_withtag("current")
28 |     if hit_item_ids:
29 |         if not len(hit_item_ids) == 1:
30 |             raise GinRummyProgramError("len(hit_item_ids)={} must be 1.".format(len(hit_item_ids)))
31 |         hit_item_id = hit_item_ids[0]
32 |         hit_item = None
33 |         for canvas_item in widget.canvas_items:
34 |             if canvas_item.item_id == hit_item_id:
35 |                 hit_item = canvas_item
36 |         if hit_item:
37 |             if not widget.query.is_game_over():
38 |                 _handle_tap(hit_item=hit_item, event=event, game_canvas=widget)
39 |             else:
40 |                 top_discard_pile_item_id = widget.getter.get_top_discard_pile_item_id()
41 |                 if hit_item_id == top_discard_pile_item_id:
42 |                     starting_new_game.start_new_game(game_canvas=widget)
43 | 
44 | 
45 | def _handle_tap(hit_item: CanvasItem, event, game_canvas: 'GameCanvas'):
46 |     hit_item_tags = hit_item.get_tags()
47 |     if configurations.STOCK_PILE_TAG in hit_item_tags:
48 |         current_player_id = game_canvas.current_player_id
49 |         current_player_is_human = game_canvas.query.is_human(player_id=current_player_id)
50 |         if current_player_is_human:
51 |             handle_tap_stock_pile(hit_item=hit_item, game_canvas=game_canvas)
52 |     elif configurations.DISCARD_PILE_TAG in hit_item_tags or hit_item == game_canvas.discard_pile_box_item:
53 |         current_player_id = game_canvas.current_player_id
54 |         current_player_is_human = game_canvas.query.is_human(player_id=current_player_id)
55 |         if current_player_is_human:
56 |             handle_tap_discard_pile(hit_item=hit_item, game_canvas=game_canvas)
57 |     elif game_canvas.held_pile_tags[0] in hit_item_tags:
58 |         pass  # north player is never human player
59 |     elif game_canvas.held_pile_tags[1] in hit_item_tags:
60 |         handle_tap_held_pile(hit_item=hit_item, game_canvas=game_canvas)
61 |     elif hit_item == game_canvas.player_panes[0]:
62 |         pass  # north player is never human player
63 |     elif hit_item == game_canvas.player_panes[1]:
64 |         handle_tap_player_pane(hit_item=hit_item, event=event, game_canvas=game_canvas)
65 | 


--------------------------------------------------------------------------------
/rlcard/agents/human_agents/gin_rummy_human_agent/gui_gin_rummy/handling_tap_held_pile.py:
--------------------------------------------------------------------------------
 1 | '''
 2 |     Project: Gui Gin Rummy
 3 |     File name: handling_tap_held_pile.py
 4 |     Author: William Hale
 5 |     Date created: 3/14/2020
 6 | '''
 7 | 
 8 | # from __future__ import annotations
 9 | from typing import TYPE_CHECKING
10 | if TYPE_CHECKING:
11 |     from .game_canvas import GameCanvas
12 | 
13 | from rlcard.games.gin_rummy.utils.gin_rummy_error import GinRummyProgramError
14 | 
15 | from .player_type import PlayerType
16 | from .canvas_item import CanvasItem
17 | 
18 | from . import configurations
19 | from . import info_messaging
20 | from . import utils
21 | 
22 | 
23 | def handle_tap_held_pile(hit_item: CanvasItem, game_canvas: 'GameCanvas'):
24 |     hit_item_tags = hit_item.get_tags()
25 |     if game_canvas.held_pile_tags[0] in hit_item_tags:
26 |         player_id = 0
27 |     elif game_canvas.held_pile_tags[1] in hit_item_tags:
28 |         player_id = 1
29 |     else:
30 |         raise GinRummyProgramError("handle_tap_held_pile: unknown held_pile.")
31 |     player_is_human = game_canvas.player_types[player_id] is PlayerType.human_player
32 |     can_draw_from_stock_pile = game_canvas.query.can_draw_from_stock_pile(player_id=player_id)
33 |     can_draw_from_discard_pile = game_canvas.query.can_draw_from_discard_pile(player_id=player_id)
34 |     is_game_over = game_canvas.query.is_game_over()
35 |     if is_game_over:
36 |         pass
37 |     elif game_canvas.query.can_discard_card(player_id=player_id):  # hit_item is source
38 |         if player_is_human:
39 |             utils.toggle_held_pile_item_selected(item=hit_item, game_canvas=game_canvas)
40 |     elif can_draw_from_stock_pile or can_draw_from_discard_pile:  # hit_item is target
41 |         drawn_card_item_id = None
42 |         drawn_card_item_tag = None
43 |         if not drawn_card_item_id and can_draw_from_stock_pile:
44 |             top_stock_pile_item_id = game_canvas.getter.get_top_stock_pile_item_id()
45 |             top_stock_pile_item_tags = game_canvas.getter.get_tags(top_stock_pile_item_id)
46 |             if configurations.DRAWN_TAG in top_stock_pile_item_tags:
47 |                 drawn_card_item_id = top_stock_pile_item_id
48 |                 drawn_card_item_tag = configurations.STOCK_PILE_TAG
49 |         if not drawn_card_item_id and can_draw_from_discard_pile:
50 |             top_discard_pile_item_id = game_canvas.getter.get_top_discard_pile_item_id()
51 |             top_discard_pile_item_tags = game_canvas.getter.get_tags(top_discard_pile_item_id)
52 |             if configurations.DRAWN_TAG in top_discard_pile_item_tags:
53 |                 drawn_card_item_id = top_discard_pile_item_id
54 |                 drawn_card_item_tag = configurations.DISCARD_PILE_TAG
55 |         if drawn_card_item_id:
56 |             if player_id == 1:  # remove info_message if south player
57 |                 info_messaging.blank_info_message_label(game_canvas=game_canvas)
58 |             game_canvas.post_doing_action.post_do_get_card_action(player_id=player_id,
59 |                                                                   drawn_card_item_id=drawn_card_item_id,
60 |                                                                   hit_item_id=hit_item.item_id,
61 |                                                                   drawn_card_item_tag=drawn_card_item_tag)
62 |         else:
63 |             utils.toggle_held_pile_item_selected(item=hit_item, game_canvas=game_canvas)
64 |     else:
65 |         if player_is_human:
66 |             utils.toggle_held_pile_item_selected(item=hit_item, game_canvas=game_canvas)  # arranging hand
67 | 


--------------------------------------------------------------------------------
/rlcard/agents/human_agents/gin_rummy_human_agent/gui_gin_rummy/handling_tap_player_pane.py:
--------------------------------------------------------------------------------
 1 | '''
 2 |     Project: Gui Gin Rummy
 3 |     File name: handling_tap_player_pane.py
 4 |     Author: William Hale
 5 |     Date created: 3/14/2020
 6 | '''
 7 | 
 8 | # from __future__ import annotations
 9 | from typing import TYPE_CHECKING
10 | if TYPE_CHECKING:
11 |     from .game_canvas import GameCanvas
12 | 
13 | from . import configurations
14 | from . import utils
15 | 
16 | from .canvas_item import CanvasItem
17 | 
18 | 
19 | def handle_tap_player_pane(hit_item: CanvasItem, event, game_canvas: 'GameCanvas'):
20 |     # un-select and un-jog all held cards
21 |     player_id = None
22 |     if game_canvas.player_panes[0] == hit_item:
23 |         player_id = 0
24 |     elif game_canvas.player_panes[1] == hit_item:
25 |         player_id = 1
26 |     if player_id is not None and game_canvas.query.is_human(player_id):
27 |         held_pile_item_ids = game_canvas.getter.get_held_pile_item_ids(player_id)
28 |         for item_id in held_pile_item_ids:
29 |             game_canvas.dtag(item_id, configurations.JOGGED_TAG)
30 |             game_canvas.dtag(item_id, configurations.SELECTED_TAG)
31 |         utils.fan_held_pile(player_id=player_id, game_canvas=game_canvas)
32 | 


--------------------------------------------------------------------------------
/rlcard/agents/human_agents/gin_rummy_human_agent/gui_gin_rummy/handling_tap_stock_pile.py:
--------------------------------------------------------------------------------
 1 | '''
 2 |     Project: Gui Gin Rummy
 3 |     File name: handling_tap_stock_pile.py
 4 |     Author: William Hale
 5 |     Date created: 3/14/2020
 6 | '''
 7 | 
 8 | # from __future__ import annotations
 9 | from typing import TYPE_CHECKING
10 | if TYPE_CHECKING:
11 |     from .game_canvas import GameCanvas
12 | 
13 | from .canvas_item import CanvasItem
14 | 
15 | from . import configurations
16 | from . import info_messaging
17 | from . import utils
18 | 
19 | 
20 | def handle_tap_stock_pile(hit_item: CanvasItem, game_canvas: 'GameCanvas'):  # hit_item is source
21 |     # Normal case is can_draw_from_stock_pile.
22 |     # hit_item must not be drawn.
23 |     # hit_item must be top card of stock_pile.
24 |     # reset top card of discard pile if drawn.
25 |     # reset all selected cards in held_pile
26 |     player_id = game_canvas.current_player_id
27 |     if game_canvas.query.is_game_over():
28 |         pass
29 |     elif game_canvas.query.can_discard_card(player_id=player_id):
30 |         pass
31 |     elif game_canvas.query.can_declare_dead_hand(player_id=player_id):
32 |         pass
33 |     elif game_canvas.query.can_draw_from_stock_pile(player_id=player_id):
34 |         current_player_id = game_canvas.current_player_id
35 |         hit_item_tags = hit_item.get_tags()
36 |         if configurations.DRAWN_TAG not in hit_item_tags:
37 |             top_stock_pile_item_id = game_canvas.getter.get_top_stock_pile_item_id()
38 |             if hit_item == top_stock_pile_item_id:
39 |                 utils.toggle_stock_pile_item_selected(game_canvas)
40 |                 # reset drawn top card of discard_pile if needed
41 |                 top_discard_pile_item_id = game_canvas.getter.get_top_discard_pile_item_id()
42 |                 top_discard_pile_item_tags = game_canvas.getter.get_tags(top_discard_pile_item_id)
43 |                 if configurations.DRAWN_TAG in top_discard_pile_item_tags:
44 |                     utils.toggle_discard_pile_item_selected(game_canvas=game_canvas)
45 |                 # reset selected cards of held_pile of current_player
46 |                 held_pile_item_ids = game_canvas.getter.get_held_pile_item_ids(player_id=current_player_id)
47 |                 for held_pile_item_id in held_pile_item_ids:
48 |                     held_pile_item_tags = game_canvas.getter.get_tags(item_id=held_pile_item_id)
49 |                     if configurations.SELECTED_TAG in held_pile_item_tags:
50 |                         held_pile_item = game_canvas.canvas_item_by_item_id.get(held_pile_item_id, None)
51 |                         if held_pile_item:
52 |                             utils.toggle_held_pile_item_selected(item=held_pile_item, game_canvas=game_canvas)
53 |                 # remove info_message if south player
54 |                 if player_id == 1:
55 |                     info_messaging.blank_info_message_label(game_canvas=game_canvas)


--------------------------------------------------------------------------------
/rlcard/agents/human_agents/gin_rummy_human_agent/gui_gin_rummy/menu_bar.py:
--------------------------------------------------------------------------------
 1 | '''
 2 |     Project: Gui Gin Rummy
 3 |     File name: menu_bar.py
 4 |     Author: William Hale
 5 |     Date created: 3/14/2020
 6 | '''
 7 | 
 8 | # from __future__ import annotations
 9 | from typing import TYPE_CHECKING
10 | if TYPE_CHECKING:
11 |     from .game_frame import GameFrame
12 | 
13 | import tkinter as tk
14 | from tkinter import messagebox
15 | 
16 | from .preferences_window import PreferencesWindow
17 | 
18 | 
19 | class MenuBar(tk.Menu):
20 | 
21 |     def __init__(self, root: tk.Tk, game_frame: 'GameFrame'):
22 |         super().__init__(root)
23 |         self.game_frame = game_frame
24 | 
25 |         # create file menu
26 |         file_menu = tk.Menu(self, tearoff=False)
27 |         file_menu.add_command(label="New Game", command=self.on_new_game_menu_clicked)
28 |         self.add_cascade(label="File", menu=file_menu)
29 | 
30 |         # create edit menu
31 |         edit_menu = tk.Menu(self, tearoff=False)
32 |         edit_menu.add_command(label="Preferences", command=self.on_preference_menu_clicked)
33 |         self.add_cascade(label="Edit", menu=edit_menu)
34 | 
35 |         # create about menu
36 |         help_menu = tk.Menu(self, tearoff=False)
37 |         help_menu.add_command(label="About", command=self.on_about_menu_clicked)
38 |         self.add_cascade(label="Help", menu=help_menu)
39 | 
40 |         # configure menuBar
41 |         root.configure(menu=self)
42 | 
43 |     def on_new_game_menu_clicked(self):
44 |         self.game_frame.start_new_game()
45 | 
46 |     def on_preference_menu_clicked(self):
47 |         PreferencesWindow(self.game_frame)
48 | 
49 |     @staticmethod
50 |     def on_about_menu_clicked():
51 |         messagebox.showinfo(title="Info", message="Gin Rummy\nVersion 1.0")
52 | 


--------------------------------------------------------------------------------
/rlcard/agents/human_agents/gin_rummy_human_agent/gui_gin_rummy/player_type.py:
--------------------------------------------------------------------------------
 1 | '''
 2 |     Project: Gui Gin Rummy
 3 |     File name: player_type.py
 4 |     Author: William Hale
 5 |     Date created: 3/14/2020
 6 | '''
 7 | 
 8 | import enum
 9 | 
10 | 
11 | class PlayerType(int, enum.Enum):
12 | 
13 |     computer_player = 1
14 |     human_player = 2
15 |     demo_player = 3
16 | 


--------------------------------------------------------------------------------
/rlcard/agents/human_agents/gin_rummy_human_agent/gui_gin_rummy/utils_extra.py:
--------------------------------------------------------------------------------
 1 | '''
 2 |     Project: Gui Gin Rummy
 3 |     File name: utils_extra.py
 4 |     Author: William Hale
 5 |     Date created: 3/14/2020
 6 | '''
 7 | 
 8 | from PIL import Image, ImageDraw, ImageFilter
 9 | 
10 | 
11 | def rounded_rectangle(self: ImageDraw, xy, corner_radius, fill=None, outline=None):  # FIXME: not used
12 |     upper_left_point = xy[0]
13 |     bottom_right_point = xy[1]
14 |     self.rectangle(
15 |         [
16 |             (upper_left_point[0], upper_left_point[1] + corner_radius),
17 |             (bottom_right_point[0], bottom_right_point[1] - corner_radius)
18 |         ],
19 |         fill=fill,
20 |         outline=outline
21 |     )
22 |     self.rectangle(
23 |         [
24 |             (upper_left_point[0] + corner_radius, upper_left_point[1]),
25 |             (bottom_right_point[0] - corner_radius, bottom_right_point[1])
26 |         ],
27 |         fill=fill,
28 |         outline=outline
29 |     )
30 |     self.pieslice(
31 |         [upper_left_point, (upper_left_point[0] + corner_radius * 2, upper_left_point[1] + corner_radius * 2)],
32 |         180,
33 |         270,
34 |         fill=fill,
35 |         outline=outline
36 |         )
37 |     self.pieslice(
38 |         [(bottom_right_point[0] - corner_radius * 2, bottom_right_point[1] - corner_radius * 2), bottom_right_point],
39 |         0,
40 |         90,
41 |         fill=fill,
42 |         outline=outline
43 |         )
44 |     self.pieslice([(upper_left_point[0], bottom_right_point[1] - corner_radius * 2),
45 |                    (upper_left_point[0] + corner_radius * 2, bottom_right_point[1])],
46 |                   90,
47 |                   180,
48 |                   fill=fill,
49 |                   outline=outline
50 |                   )
51 |     self.pieslice([(bottom_right_point[0] - corner_radius * 2, upper_left_point[1]),
52 |                    (bottom_right_point[0], upper_left_point[1] + corner_radius * 2)],
53 |                   270,
54 |                   360,
55 |                   fill=fill,
56 |                   outline=outline
57 |                   )
58 | 
59 | 
60 | ImageDraw.rounded_rectangle = rounded_rectangle  # FIXME: not used
61 | 
62 | 
63 | def mask_rounded_rectangle_transparent(pil_img, corner_radius=8):  # FIXME: not used
64 |     blur_radius = 0  # FIXME: what is this for ??? wch
65 |     mask = Image.new("L", pil_img.size, 0)
66 |     draw = ImageDraw.Draw(mask)
67 |     rounded_rectangle(draw, xy=((0, 0), (pil_img.size[0], pil_img.size[1])), corner_radius=corner_radius, fill=255)
68 | 
69 |     mask = mask.filter(ImageFilter.GaussianBlur(blur_radius))
70 |     result = pil_img.copy()
71 |     result.putalpha(mask)
72 |     return result
73 | 


--------------------------------------------------------------------------------
/rlcard/agents/human_agents/leduc_holdem_human_agent.py:
--------------------------------------------------------------------------------
 1 | from rlcard.utils.utils import print_card
 2 | 
 3 | 
 4 | class HumanAgent(object):
 5 |     ''' A human agent for Leduc Holdem. It can be used to play against trained models
 6 |     '''
 7 | 
 8 |     def __init__(self, num_actions):
 9 |         ''' Initilize the human agent
10 | 
11 |         Args:
12 |             num_actions (int): the size of the ouput action space
13 |         '''
14 |         self.use_raw = True
15 |         self.num_actions = num_actions
16 | 
17 |     @staticmethod
18 |     def step(state):
19 |         ''' Human agent will display the state and make decisions through interfaces
20 | 
21 |         Args:
22 |             state (dict): A dictionary that represents the current state
23 | 
24 |         Returns:
25 |             action (int): The action decided by human
26 |         '''
27 |         _print_state(state['raw_obs'], state['action_record'])
28 |         action = int(input('>> You choose action (integer): '))
29 |         while action < 0 or action >= len(state['legal_actions']):
30 |             print('Action illegal...')
31 |             action = int(input('>> Re-choose action (integer): '))
32 |         return state['raw_legal_actions'][action]
33 | 
34 |     def eval_step(self, state):
35 |         ''' Predict the action given the curent state for evaluation. The same to step here.
36 | 
37 |         Args:
38 |             state (numpy.array): an numpy array that represents the current state
39 | 
40 |         Returns:
41 |             action (int): the action predicted (randomly chosen) by the random agent
42 |         '''
43 |         return self.step(state), {}
44 | 
45 | def _print_state(state, action_record):
46 |     ''' Print out the state
47 | 
48 |     Args:
49 |         state (dict): A dictionary of the raw state
50 |         action_record (list): A list of the historical actions
51 |     '''
52 |     _action_list = []
53 |     for i in range(1, len(action_record)+1):
54 |         if action_record[-i][0] == state['current_player']:
55 |             break
56 |         _action_list.insert(0, action_record[-i])
57 |     for pair in _action_list:
58 |         print('>> Player', pair[0], 'chooses', pair[1])
59 | 
60 |     print('\n=============== Community Card ===============')
61 |     print_card(state['public_card'])
62 |     print('===============   Your Hand    ===============')
63 |     print_card(state['hand'])
64 |     print('===============     Chips      ===============')
65 |     print('Yours:   ', end='')
66 |     for _ in range(state['my_chips']):
67 |         print('+', end='')
68 |     print('')
69 |     for i in range(len(state['all_chips'])):
70 |         if i != state['current_player']:
71 |             print('Agent {}: '.format(i) , end='')
72 |             for _ in range(state['all_chips'][i]):
73 |                 print('+', end='')
74 |     print('\n=========== Actions You Can Choose ===========')
75 |     print(', '.join([str(index) + ': ' + action for index, action in enumerate(state['legal_actions'])]))
76 |     print('')
77 | 


--------------------------------------------------------------------------------
/rlcard/agents/human_agents/limit_holdem_human_agent.py:
--------------------------------------------------------------------------------
 1 | from rlcard.utils.utils import print_card
 2 | 
 3 | 
 4 | class HumanAgent(object):
 5 |     ''' A human agent for Limit Holdem. It can be used to play against trained models
 6 |     '''
 7 | 
 8 |     def __init__(self, num_actions):
 9 |         ''' Initilize the human agent
10 | 
11 |         Args:
12 |             num_actions (int): the size of the ouput action space
13 |         '''
14 |         self.use_raw = True
15 |         self.num_actions = num_actions
16 | 
17 |     @staticmethod
18 |     def step(state):
19 |         ''' Human agent will display the state and make decisions through interfaces
20 | 
21 |         Args:
22 |             state (dict): A dictionary that represents the current state
23 | 
24 |         Returns:
25 |             action (int): The action decided by human
26 |         '''
27 |         _print_state(state['raw_obs'], state['action_record'])
28 |         action = int(input('>> You choose action (integer): '))
29 |         while action < 0 or action >= len(state['legal_actions']):
30 |             print('Action illegal...')
31 |             action = int(input('>> Re-choose action (integer): '))
32 |         return state['raw_legal_actions'][action]
33 | 
34 |     def eval_step(self, state):
35 |         ''' Predict the action given the curent state for evaluation. The same to step here.
36 | 
37 |         Args:
38 |             state (numpy.array): an numpy array that represents the current state
39 | 
40 |         Returns:
41 |             action (int): the action predicted (randomly chosen) by the random agent
42 |         '''
43 |         return self.step(state), {}
44 | 
45 | def _print_state(state, action_record):
46 |     ''' Print out the state
47 | 
48 |     Args:
49 |         state (dict): A dictionary of the raw state
50 |         action_record (list): A list of the each player's historical actions
51 |     '''
52 |     _action_list = []
53 |     for i in range(1, len(action_record)+1):
54 |         _action_list.insert(0, action_record[-i])
55 |     for pair in _action_list:
56 |         print('>> Player', pair[0], 'chooses', pair[1])
57 | 
58 |     print('\n=============== Community Card ===============')
59 |     print_card(state['public_cards'])
60 |     print('===============   Your Hand    ===============')
61 |     print_card(state['hand'])
62 |     print('===============     Chips      ===============')
63 |     print('Yours:   ', end='')
64 |     for _ in range(state['my_chips']):
65 |         print('+', end='')
66 |     print('')
67 |     for i in range(len(state['all_chips'])):
68 |         for _ in range(state['all_chips'][i]):
69 |             print('+', end='')
70 |     print('\n=========== Actions You Can Choose ===========')
71 |     print(', '.join([str(index) + ': ' + action for index, action in enumerate(state['legal_actions'])]))
72 |     print('')
73 | 


--------------------------------------------------------------------------------
/rlcard/agents/human_agents/nolimit_holdem_human_agent.py:
--------------------------------------------------------------------------------
 1 | from rlcard.utils.utils import print_card
 2 | 
 3 | 
 4 | class HumanAgent(object):
 5 |     ''' A human agent for No Limit Holdem. It can be used to play against trained models
 6 |     '''
 7 | 
 8 |     def __init__(self, num_actions):
 9 |         ''' Initilize the human agent
10 | 
11 |         Args:
12 |             num_actions (int): the size of the ouput action space
13 |         '''
14 |         self.use_raw = True
15 |         self.num_actions = num_actions
16 | 
17 |     @staticmethod
18 |     def step(state):
19 |         ''' Human agent will display the state and make decisions through interfaces
20 | 
21 |         Args:
22 |             state (dict): A dictionary that represents the current state
23 | 
24 |         Returns:
25 |             action (int): The action decided by human
26 |         '''
27 |         _print_state(state['raw_obs'], state['action_record'])
28 |         action = int(input('>> You choose action (integer): '))
29 |         while action < 0 or action >= len(state['legal_actions']):
30 |             print('Action illegal...')
31 |             action = int(input('>> Re-choose action (integer): '))
32 |         return state['raw_legal_actions'][action]
33 | 
34 |     def eval_step(self, state):
35 |         ''' Predict the action given the curent state for evaluation. The same to step here.
36 | 
37 |         Args:
38 |             state (numpy.array): an numpy array that represents the current state
39 | 
40 |         Returns:
41 |             action (int): the action predicted (randomly chosen) by the random agent
42 |         '''
43 |         return self.step(state), {}
44 | 
45 | def _print_state(state, action_record):
46 |     ''' Print out the state
47 | 
48 |     Args:
49 |         state (dict): A dictionary of the raw state
50 |         action_record (list): A list of the historical actions
51 |     '''
52 |     _action_list = []
53 |     for i in range(1, len(action_record)+1):
54 |         if action_record[-i][0] == state['current_player']:
55 |             break
56 |         _action_list.insert(0, action_record[-i])
57 |     for pair in _action_list:
58 |         print('>> Player', pair[0], 'chooses', pair[1])
59 | 
60 |     print('\n=============== Community Card ===============')
61 |     print_card(state['public_cards'])
62 | 
63 |     print('=============  Player',state["current_player"],'- Hand   =============')
64 |     print_card(state['hand'])
65 | 
66 |     print('===============     Chips      ===============')
67 |     print('In Pot:',state["pot"])
68 |     print('Remaining:',state["stakes"])
69 | 
70 |     print('\n=========== Actions You Can Choose ===========')
71 |     print(', '.join([str(index) + ': ' + str(action) for index, action in enumerate(state['legal_actions'])]))
72 |     print('')
73 |     print(state)
74 | 


--------------------------------------------------------------------------------
/rlcard/agents/human_agents/uno_human_agent.py:
--------------------------------------------------------------------------------
 1 | from rlcard.games.uno.card import UnoCard
 2 | 
 3 | class HumanAgent(object):
 4 |     ''' A human agent for Leduc Holdem. It can be used to play against trained models
 5 |     '''
 6 | 
 7 |     def __init__(self, num_actions):
 8 |         ''' Initilize the human agent
 9 | 
10 |         Args:
11 |             num_actions (int): the size of the ouput action space
12 |         '''
13 |         self.use_raw = True
14 |         self.num_actions = num_actions
15 | 
16 |     @staticmethod
17 |     def step(state):
18 |         ''' Human agent will display the state and make decisions through interfaces
19 | 
20 |         Args:
21 |             state (dict): A dictionary that represents the current state
22 | 
23 |         Returns:
24 |             action (int): The action decided by human
25 |         '''
26 |         print(state['raw_obs'])
27 |         _print_state(state['raw_obs'], state['action_record'])
28 |         action = int(input('>> You choose action (integer): '))
29 |         while action < 0 or action >= len(state['legal_actions']):
30 |             print('Action illegal...')
31 |             action = int(input('>> Re-choose action (integer): '))
32 |         return state['raw_legal_actions'][action]
33 | 
34 |     def eval_step(self, state):
35 |         ''' Predict the action given the curent state for evaluation. The same to step here.
36 | 
37 |         Args:
38 |             state (numpy.array): an numpy array that represents the current state
39 | 
40 |         Returns:
41 |             action (int): the action predicted (randomly chosen) by the random agent
42 |         '''
43 |         return self.step(state), {}
44 | 
45 | def _print_state(state, action_record):
46 |     ''' Print out the state of a given player
47 | 
48 |     Args:
49 |         player (int): Player id
50 |     '''
51 |     _action_list = []
52 |     for i in range(1, len(action_record)+1):
53 |         if action_record[-i][0] == state['current_player']:
54 |             break
55 |         _action_list.insert(0, action_record[-i])
56 |     for pair in _action_list:
57 |         print('>> Player', pair[0], 'chooses ', end='')
58 |         _print_action(pair[1])
59 |         print('')
60 | 
61 |     print('\n=============== Your Hand ===============')
62 |     UnoCard.print_cards(state['hand'])
63 |     print('')
64 |     print('=============== Last Card ===============')
65 |     UnoCard.print_cards(state['target'], wild_color=True)
66 |     print('')
67 |     print('========== Players Card Number ===========')
68 |     for i in range(state['num_players']):
69 |         if i != state['current_player']:
70 |             print('Player {} has {} cards.'.format(i, state['num_cards'][i]))
71 |     print('======== Actions You Can Choose =========')
72 |     for i, action in enumerate(state['legal_actions']):
73 |         print(str(i)+': ', end='')
74 |         UnoCard.print_cards(action, wild_color=True)
75 |         if i < len(state['legal_actions']) - 1:
76 |             print(', ', end='')
77 |     print('\n')
78 | 
79 | def _print_action(action):
80 |     ''' Print out an action in a nice form
81 | 
82 |     Args:
83 |         action (str): A string a action
84 |     '''
85 |     UnoCard.print_cards(action, wild_color=True)
86 | 


--------------------------------------------------------------------------------
/rlcard/agents/pettingzoo_agents.py:
--------------------------------------------------------------------------------
 1 | from rlcard.agents.nfsp_agent import NFSPAgent
 2 | from rlcard.agents.dqn_agent import DQNAgent
 3 | from rlcard.agents.random_agent import RandomAgent
 4 | from rlcard.utils.pettingzoo_utils import wrap_state
 5 | 
 6 | 
 7 | class NFSPAgentPettingZoo(NFSPAgent):
 8 |     def step(self, state):
 9 |         return super().step(wrap_state(state))
10 | 
11 |     def eval_step(self, state):
12 |         return super().eval_step(wrap_state(state))
13 | 
14 |     def feed(self, ts):
15 |         state, action, reward, next_state, done = tuple(ts)
16 |         state = wrap_state(state)
17 |         next_state = wrap_state(next_state)
18 |         ts = (state, action, reward, next_state, done)
19 |         return super().feed(ts)
20 | 
21 | 
22 | class DQNAgentPettingZoo(DQNAgent):
23 |     def step(self, state):
24 |         return super().step(wrap_state(state))
25 | 
26 |     def eval_step(self, state):
27 |         return super().eval_step(wrap_state(state))
28 | 
29 |     def feed(self, ts):
30 |         state, action, reward, next_state, done = tuple(ts)
31 |         state = wrap_state(state)
32 |         next_state = wrap_state(next_state)
33 |         ts = (state, action, reward, next_state, done)
34 |         return super().feed(ts)
35 | 
36 | 
37 | class RandomAgentPettingZoo(RandomAgent):
38 |     def step(self, state):
39 |         return super().step(wrap_state(state))
40 | 
41 |     def eval_step(self, state):
42 |         return super().eval_step(wrap_state(state))
43 | 


--------------------------------------------------------------------------------
/rlcard/agents/random_agent.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | 
 4 | class RandomAgent(object):
 5 |     ''' A random agent. Random agents is for running toy examples on the card games
 6 |     '''
 7 | 
 8 |     def __init__(self, num_actions):
 9 |         ''' Initilize the random agent
10 | 
11 |         Args:
12 |             num_actions (int): The size of the ouput action space
13 |         '''
14 |         self.use_raw = False
15 |         self.num_actions = num_actions
16 | 
17 |     @staticmethod
18 |     def step(state):
19 |         ''' Predict the action given the curent state in gerenerating training data.
20 | 
21 |         Args:
22 |             state (dict): An dictionary that represents the current state
23 | 
24 |         Returns:
25 |             action (int): The action predicted (randomly chosen) by the random agent
26 |         '''
27 |         return np.random.choice(list(state['legal_actions'].keys()))
28 | 
29 |     def eval_step(self, state):
30 |         ''' Predict the action given the current state for evaluation.
31 |             Since the random agents are not trained. This function is equivalent to step function
32 | 
33 |         Args:
34 |             state (dict): An dictionary that represents the current state
35 | 
36 |         Returns:
37 |             action (int): The action predicted (randomly chosen) by the random agent
38 |             probs (list): The list of action probabilities
39 |         '''
40 |         probs = [0 for _ in range(self.num_actions)]
41 |         for i in state['legal_actions']:
42 |             probs[i] = 1/len(state['legal_actions'])
43 | 
44 |         info = {}
45 |         info['probs'] = {state['raw_legal_actions'][i]: probs[list(state['legal_actions'].keys())[i]] for i in range(len(state['legal_actions']))}
46 | 
47 |         return self.step(state), info
48 | 


--------------------------------------------------------------------------------
/rlcard/envs/__init__.py:
--------------------------------------------------------------------------------
 1 | ''' Register new environments
 2 | '''
 3 | from rlcard.envs.env import Env
 4 | from rlcard.envs.registration import register, make
 5 | 
 6 | register(
 7 |     env_id='blackjack',
 8 |     entry_point='rlcard.envs.blackjack:BlackjackEnv',
 9 | )
10 | 
11 | register(
12 |     env_id='doudizhu',
13 |     entry_point='rlcard.envs.doudizhu:DoudizhuEnv',
14 | )
15 | 
16 | register(
17 |     env_id='limit-holdem',
18 |     entry_point='rlcard.envs.limitholdem:LimitholdemEnv',
19 | )
20 | 
21 | register(
22 |     env_id='no-limit-holdem',
23 |     entry_point='rlcard.envs.nolimitholdem:NolimitholdemEnv',
24 | )
25 | 
26 | register(
27 |     env_id='leduc-holdem',
28 |     entry_point='rlcard.envs.leducholdem:LeducholdemEnv'
29 | )
30 | 
31 | register(
32 |     env_id='uno',
33 |     entry_point='rlcard.envs.uno:UnoEnv',
34 | )
35 | 
36 | register(
37 |     env_id='mahjong',
38 |     entry_point='rlcard.envs.mahjong:MahjongEnv',
39 | )
40 | 
41 | register(
42 |     env_id='gin-rummy',
43 |     entry_point='rlcard.envs.gin_rummy:GinRummyEnv',
44 | )
45 | 
46 | register(
47 |     env_id='bridge',
48 |     entry_point='rlcard.envs.bridge:BridgeEnv',
49 | )
50 | 


--------------------------------------------------------------------------------
/rlcard/envs/registration.py:
--------------------------------------------------------------------------------
 1 | import importlib
 2 | 
 3 | # Default Config
 4 | DEFAULT_CONFIG = {
 5 |         'allow_step_back': False,
 6 |         'seed': None,
 7 |         }
 8 | 
 9 | class EnvSpec(object):
10 |     ''' A specification for a particular instance of the environment.
11 |     '''
12 | 
13 |     def __init__(self, env_id, entry_point=None):
14 |         ''' Initilize
15 | 
16 |         Args:
17 |             env_id (string): The name of the environent
18 |             entry_point (string): A string the indicates the location of the envronment class
19 |         '''
20 |         self.env_id = env_id
21 |         mod_name, class_name = entry_point.split(':')
22 |         self._entry_point = getattr(importlib.import_module(mod_name), class_name)
23 | 
24 |     def make(self, config=DEFAULT_CONFIG):
25 |         ''' Instantiates an instance of the environment
26 | 
27 |         Returns:
28 |             env (Env): An instance of the environemnt
29 |             config (dict): A dictionary of the environment settings
30 |         '''
31 |         env = self._entry_point(config)
32 |         return env
33 | 
34 | class EnvRegistry(object):
35 |     ''' Register an environment (game) by ID
36 |     '''
37 | 
38 |     def __init__(self):
39 |         ''' Initilize
40 |         '''
41 |         self.env_specs = {}
42 | 
43 |     def register(self, env_id, entry_point):
44 |         ''' Register an environment
45 | 
46 |         Args:
47 |             env_id (string): The name of the environent
48 |             entry_point (string): A string the indicates the location of the envronment class
49 |         '''
50 |         if env_id in self.env_specs:
51 |             raise ValueError('Cannot re-register env_id: {}'.format(env_id))
52 |         self.env_specs[env_id] = EnvSpec(env_id, entry_point)
53 | 
54 |     def make(self, env_id, config=DEFAULT_CONFIG):
55 |         ''' Create and environment instance
56 | 
57 |         Args:
58 |             env_id (string): The name of the environment
59 |             config (dict): A dictionary of the environment settings
60 |         '''
61 |         if env_id not in self.env_specs:
62 |             raise ValueError('Cannot find env_id: {}'.format(env_id))
63 |         return self.env_specs[env_id].make(config)
64 | 
65 | # Have a global registry
66 | registry = EnvRegistry()
67 | 
68 | def register(env_id, entry_point):
69 |     ''' Register an environment
70 | 
71 |     Args:
72 |         env_id (string): The name of the environent
73 |         entry_point (string): A string the indicates the location of the envronment class
74 |     '''
75 |     return registry.register(env_id, entry_point)
76 | 
77 | def make(env_id, config={}):
78 |     ''' Create and environment instance
79 | 
80 |     Args:
81 |         env_id (string): The name of the environment
82 |         config (dict): A dictionary of the environment settings
83 |         env_num (int): The number of environments
84 |     '''
85 |     _config = DEFAULT_CONFIG.copy()
86 |     for key in config:
87 |         _config[key] = config[key]
88 | 
89 |     return registry.make(env_id, _config)
90 | 


--------------------------------------------------------------------------------
/rlcard/envs/uno.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from collections import OrderedDict
 3 | 
 4 | from rlcard.envs import Env
 5 | from rlcard.games.uno import Game
 6 | from rlcard.games.uno.utils import encode_hand, encode_target
 7 | from rlcard.games.uno.utils import ACTION_SPACE, ACTION_LIST
 8 | from rlcard.games.uno.utils import cards2list
 9 | 
10 | DEFAULT_GAME_CONFIG = {
11 |         'game_num_players': 2,
12 |         }
13 | 
14 | class UnoEnv(Env):
15 | 
16 |     def __init__(self, config):
17 |         self.name = 'uno'
18 |         self.default_game_config = DEFAULT_GAME_CONFIG
19 |         self.game = Game()
20 |         super().__init__(config)
21 |         self.state_shape = [[4, 4, 15] for _ in range(self.num_players)]
22 |         self.action_shape = [None for _ in range(self.num_players)]
23 | 
24 |     def _extract_state(self, state):
25 |         obs = np.zeros((4, 4, 15), dtype=int)
26 |         encode_hand(obs[:3], state['hand'])
27 |         encode_target(obs[3], state['target'])
28 |         legal_action_id = self._get_legal_actions()
29 |         extracted_state = {'obs': obs, 'legal_actions': legal_action_id}
30 |         extracted_state['raw_obs'] = state
31 |         extracted_state['raw_legal_actions'] = [a for a in state['legal_actions']]
32 |         extracted_state['action_record'] = self.action_recorder
33 |         return extracted_state
34 | 
35 |     def get_payoffs(self):
36 | 
37 |         return np.array(self.game.get_payoffs())
38 | 
39 |     def _decode_action(self, action_id):
40 |         legal_ids = self._get_legal_actions()
41 |         if action_id in legal_ids:
42 |             return ACTION_LIST[action_id]
43 |         # if (len(self.game.dealer.deck) + len(self.game.round.played_cards)) > 17:
44 |         #    return ACTION_LIST[60]
45 |         return ACTION_LIST[np.random.choice(legal_ids)]
46 | 
47 |     def _get_legal_actions(self):
48 |         legal_actions = self.game.get_legal_actions()
49 |         legal_ids = {ACTION_SPACE[action]: None for action in legal_actions}
50 |         return OrderedDict(legal_ids)
51 | 
52 |     def get_perfect_information(self):
53 |         ''' Get the perfect information of the current state
54 | 
55 |         Returns:
56 |             (dict): A dictionary of all the perfect information of the current state
57 |         '''
58 |         state = {}
59 |         state['num_players'] = self.num_players
60 |         state['hand_cards'] = [cards2list(player.hand)
61 |                                for player in self.game.players]
62 |         state['played_cards'] = cards2list(self.game.round.played_cards)
63 |         state['target'] = self.game.round.target.str
64 |         state['current_player'] = self.game.round.current_player
65 |         state['legal_actions'] = self.game.round.get_legal_actions(
66 |             self.game.players, state['current_player'])
67 |         return state
68 | 


--------------------------------------------------------------------------------
/rlcard/games/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/datamllab/rlcard/d7d0a957baf4cc7225a50522adb0164bf130a9d0/rlcard/games/__init__.py


--------------------------------------------------------------------------------
/rlcard/games/base.py:
--------------------------------------------------------------------------------
 1 | ''' Game-related base classes
 2 | '''
 3 | class Card:
 4 |     '''
 5 |     Card stores the suit and rank of a single card
 6 | 
 7 |     Note:
 8 |         The suit variable in a standard card game should be one of [S, H, D, C, BJ, RJ] meaning [Spades, Hearts, Diamonds, Clubs, Black Joker, Red Joker]
 9 |         Similarly the rank variable should be one of [A, 2, 3, 4, 5, 6, 7, 8, 9, T, J, Q, K]
10 |     '''
11 |     suit = None
12 |     rank = None
13 |     valid_suit = ['S', 'H', 'D', 'C', 'BJ', 'RJ']
14 |     valid_rank = ['A', '2', '3', '4', '5', '6', '7', '8', '9', 'T', 'J', 'Q', 'K']
15 | 
16 |     def __init__(self, suit, rank):
17 |         ''' Initialize the suit and rank of a card
18 | 
19 |         Args:
20 |             suit: string, suit of the card, should be one of valid_suit
21 |             rank: string, rank of the card, should be one of valid_rank
22 |         '''
23 |         self.suit = suit
24 |         self.rank = rank
25 | 
26 |     def __eq__(self, other):
27 |         if isinstance(other, Card):
28 |             return self.rank == other.rank and self.suit == other.suit
29 |         else:
30 |             # don't attempt to compare against unrelated types
31 |             return NotImplemented
32 | 
33 |     def __hash__(self):
34 |         suit_index = Card.valid_suit.index(self.suit)
35 |         rank_index = Card.valid_rank.index(self.rank)
36 |         return rank_index + 100 * suit_index
37 | 
38 |     def __str__(self):
39 |         ''' Get string representation of a card.
40 | 
41 |         Returns:
42 |             string: the combination of rank and suit of a card. Eg: AS, 5H, JD, 3C, ...
43 |         '''
44 |         return self.rank + self.suit
45 | 
46 |     def get_index(self):
47 |         ''' Get index of a card.
48 | 
49 |         Returns:
50 |             string: the combination of suit and rank of a card. Eg: 1S, 2H, AD, BJ, RJ...
51 |         '''
52 |         return self.suit+self.rank
53 | 


--------------------------------------------------------------------------------
/rlcard/games/blackjack/__init__.py:
--------------------------------------------------------------------------------
1 | from rlcard.games.blackjack.dealer import BlackjackDealer as Dealer
2 | from rlcard.games.blackjack.judger import BlackjackJudger as Judger
3 | from rlcard.games.blackjack.player import BlackjackPlayer as Player
4 | from rlcard.games.blackjack.game import BlackjackGame as Game
5 | 
6 | 


--------------------------------------------------------------------------------
/rlcard/games/blackjack/dealer.py:
--------------------------------------------------------------------------------
 1 | from rlcard.utils import init_standard_deck
 2 | import numpy as np
 3 | 
 4 | class BlackjackDealer:
 5 | 
 6 |     def __init__(self, np_random, num_decks=1):
 7 |         ''' Initialize a Blackjack dealer class
 8 |         '''
 9 |         self.np_random = np_random
10 |         self.num_decks = num_decks
11 |         self.deck = init_standard_deck()
12 |         if self.num_decks not in [0, 1]:  # 0 indicates infinite decks of cards
13 |             self.deck = self.deck * self.num_decks  # copy m standard decks of cards
14 |         self.shuffle()
15 |         self.hand = []
16 |         self.status = 'alive'
17 |         self.score = 0
18 | 
19 |     def shuffle(self):
20 |         ''' Shuffle the deck
21 |         '''
22 |         shuffle_deck = np.array(self.deck)
23 |         self.np_random.shuffle(shuffle_deck)
24 |         self.deck = list(shuffle_deck)
25 | 
26 |     def deal_card(self, player):
27 |         ''' Distribute one card to the player
28 | 
29 |         Args:
30 |             player_id (int): the target player's id
31 |         '''
32 |         idx = self.np_random.choice(len(self.deck))
33 |         card = self.deck[idx]
34 |         if self.num_decks != 0:  # If infinite decks, do not pop card from deck
35 |             self.deck.pop(idx)
36 |         # card = self.deck.pop()
37 |         player.hand.append(card)
38 | 


--------------------------------------------------------------------------------
/rlcard/games/blackjack/judger.py:
--------------------------------------------------------------------------------
 1 | 
 2 | class BlackjackJudger:
 3 |     def __init__(self, np_random):
 4 |         ''' Initialize a BlackJack judger class
 5 |         '''
 6 |         self.np_random = np_random
 7 |         self.rank2score = {"A":11, "2":2, "3":3, "4":4, "5":5, "6":6, "7":7, "8":8, "9":9, "T":10, "J":10, "Q":10, "K":10}
 8 | 
 9 |     def judge_round(self, player):
10 |         ''' Judge the target player's status
11 | 
12 |         Args:
13 |             player (int): target player's id
14 | 
15 |         Returns:
16 |             status (str): the status of the target player
17 |             score (int): the current score of the player
18 |         '''
19 |         score = self.judge_score(player.hand)
20 |         if score <= 21:
21 |             return "alive", score
22 |         else:
23 |             return "bust", score
24 | 
25 |     def judge_game(self, game, game_pointer):
26 |         ''' Judge the winner of the game
27 | 
28 |         Args:
29 |             game (class): target game class
30 |         '''
31 |         '''
32 |                 game.winner['dealer'] doesn't need anymore if we change code like this
33 | 
34 |                 player bust (whether dealer bust or not) => game.winner[playerX] = -1
35 |                 player and dealer tie => game.winner[playerX] = 1
36 |                 dealer bust and player not bust => game.winner[playerX] = 2
37 |                 player get higher score than dealer => game.winner[playerX] = 2
38 |                 dealer get higher score than player => game.winner[playerX] = -1
39 |                 game.winner[playerX] = 0 => the game is still ongoing
40 |                 '''
41 | 
42 |         if game.players[game_pointer].status == 'bust':
43 |             game.winner['player' + str(game_pointer)] = -1
44 |         elif game.dealer.status == 'bust':
45 |             game.winner['player' + str(game_pointer)] = 2
46 |         else:
47 |             if game.players[game_pointer].score > game.dealer.score:
48 |                 game.winner['player' + str(game_pointer)] = 2
49 |             elif game.players[game_pointer].score < game.dealer.score:
50 |                 game.winner['player' + str(game_pointer)] = -1
51 |             else:
52 |                 game.winner['player' + str(game_pointer)] = 1
53 | 
54 |     def judge_score(self, cards):
55 |         ''' Judge the score of a given cards set
56 | 
57 |         Args:
58 |             cards (list): a list of cards
59 | 
60 |         Returns:
61 |             score (int): the score of the given cards set
62 |         '''
63 |         score = 0
64 |         count_a = 0
65 |         for card in cards:
66 |             card_score = self.rank2score[card.rank]
67 |             score += card_score
68 |             if card.rank == 'A':
69 |                 count_a += 1
70 |         while score > 21 and count_a > 0:
71 |             count_a -= 1
72 |             score -= 10
73 |         return score
74 | 


--------------------------------------------------------------------------------
/rlcard/games/blackjack/player.py:
--------------------------------------------------------------------------------
 1 | 
 2 | class BlackjackPlayer:
 3 | 
 4 |     def __init__(self, player_id, np_random):
 5 |         ''' Initialize a Blackjack player class
 6 | 
 7 |         Args:
 8 |             player_id (int): id for the player
 9 |         '''
10 |         self.np_random = np_random
11 |         self.player_id = player_id
12 |         self.hand = []
13 |         self.status = 'alive'
14 |         self.score = 0
15 | 
16 |     def get_player_id(self):
17 |         ''' Return player's id
18 |         '''
19 |         return self.player_id
20 | 


--------------------------------------------------------------------------------
/rlcard/games/bridge/__init__.py:
--------------------------------------------------------------------------------
1 | from rlcard.games.bridge.game import BridgeGame as Game
2 | 


--------------------------------------------------------------------------------
/rlcard/games/bridge/dealer.py:
--------------------------------------------------------------------------------
 1 | '''
 2 |     File name: bridge/dealer.py
 3 |     Author: William Hale
 4 |     Date created: 11/25/2021
 5 | '''
 6 | 
 7 | from typing import List
 8 | 
 9 | from .player import BridgePlayer
10 | from .utils.bridge_card import BridgeCard
11 | 
12 | 
13 | class BridgeDealer:
14 |     ''' Initialize a BridgeDealer dealer class
15 |     '''
16 |     def __init__(self, np_random):
17 |         ''' set shuffled_deck, set stock_pile
18 |         '''
19 |         self.np_random = np_random
20 |         self.shuffled_deck: List[BridgeCard] = BridgeCard.get_deck()  # keep a copy of the shuffled cards at start of new hand
21 |         self.np_random.shuffle(self.shuffled_deck)
22 |         self.stock_pile: List[BridgeCard] = self.shuffled_deck.copy()
23 | 
24 |     def deal_cards(self, player: BridgePlayer, num: int):
25 |         ''' Deal some cards from stock_pile to one player
26 | 
27 |         Args:
28 |             player (BridgePlayer): The BridgePlayer object
29 |             num (int): The number of cards to be dealt
30 |         '''
31 |         for _ in range(num):
32 |             player.hand.append(self.stock_pile.pop())
33 | 


--------------------------------------------------------------------------------
/rlcard/games/bridge/game.py:
--------------------------------------------------------------------------------
 1 | '''
 2 |     File name: bridge/game.py
 3 |     Author: William Hale
 4 |     Date created: 11/25/2021
 5 | '''
 6 | 
 7 | from typing import List
 8 | 
 9 | import numpy as np
10 | 
11 | from .judger import BridgeJudger
12 | from .round import BridgeRound
13 | from .utils.action_event import ActionEvent, CallActionEvent, PlayCardAction
14 | 
15 | 
16 | class BridgeGame:
17 |     ''' Game class. This class will interact with outer environment.
18 |     '''
19 | 
20 |     def __init__(self, allow_step_back=False):
21 |         '''Initialize the class BridgeGame
22 |         '''
23 |         self.allow_step_back: bool = allow_step_back
24 |         self.np_random = np.random.RandomState()
25 |         self.judger: BridgeJudger = BridgeJudger(game=self)
26 |         self.actions: [ActionEvent] = []  # must reset in init_game
27 |         self.round: BridgeRound or None = None  # must reset in init_game
28 |         self.num_players: int = 4
29 | 
30 |     def init_game(self):
31 |         ''' Initialize all characters in the game and start round 1
32 |         '''
33 |         board_id = self.np_random.choice([1, 2, 3, 4])
34 |         self.actions: List[ActionEvent] = []
35 |         self.round = BridgeRound(num_players=self.num_players, board_id=board_id, np_random=self.np_random)
36 |         for player_id in range(4):
37 |             player = self.round.players[player_id]
38 |             self.round.dealer.deal_cards(player=player, num=13)
39 |         current_player_id = self.round.current_player_id
40 |         state = self.get_state(player_id=current_player_id)
41 |         return state, current_player_id
42 | 
43 |     def step(self, action: ActionEvent):
44 |         ''' Perform game action and return next player number, and the state for next player
45 |         '''
46 |         if isinstance(action, CallActionEvent):
47 |             self.round.make_call(action=action)
48 |         elif isinstance(action, PlayCardAction):
49 |             self.round.play_card(action=action)
50 |         else:
51 |             raise Exception(f'Unknown step action={action}')
52 |         self.actions.append(action)
53 |         next_player_id = self.round.current_player_id
54 |         next_state = self.get_state(player_id=next_player_id)
55 |         return next_state, next_player_id
56 | 
57 |     def get_num_players(self) -> int:
58 |         ''' Return the number of players in the game
59 |         '''
60 |         return self.num_players
61 | 
62 |     @staticmethod
63 |     def get_num_actions() -> int:
64 |         ''' Return the number of possible actions in the game
65 |         '''
66 |         return ActionEvent.get_num_actions()
67 | 
68 |     def get_player_id(self):
69 |         ''' Return the current player that will take actions soon
70 |         '''
71 |         return self.round.current_player_id
72 | 
73 |     def is_over(self) -> bool:
74 |         ''' Return whether the current game is over
75 |         '''
76 |         return self.round.is_over()
77 | 
78 |     def get_state(self, player_id: int):  # wch: not really used
79 |         ''' Get player's state
80 | 
81 |         Return:
82 |             state (dict): The information of the state
83 |         '''
84 |         state = {}
85 |         if not self.is_over():
86 |             state['player_id'] = player_id
87 |             state['current_player_id'] = self.round.current_player_id
88 |             state['hand'] = self.round.players[player_id].hand
89 |         else:
90 |             state['player_id'] = player_id
91 |             state['current_player_id'] = self.round.current_player_id
92 |             state['hand'] = self.round.players[player_id].hand
93 |         return state
94 | 


--------------------------------------------------------------------------------
/rlcard/games/bridge/judger.py:
--------------------------------------------------------------------------------
 1 | '''
 2 |     File name: bridge/judger.py
 3 |     Author: William Hale
 4 |     Date created: 11/25/2021
 5 | '''
 6 | 
 7 | from typing import List
 8 | 
 9 | from typing import TYPE_CHECKING
10 | if TYPE_CHECKING:
11 |     from .game import BridgeGame
12 | 
13 | from .utils.action_event import PlayCardAction
14 | from .utils.action_event import ActionEvent, BidAction, PassAction, DblAction, RdblAction
15 | from .utils.move import MakeBidMove, MakeDblMove, MakeRdblMove
16 | from .utils.bridge_card import BridgeCard
17 | 
18 | 
19 | class BridgeJudger:
20 | 
21 |     '''
22 |         Judger decides legal actions for current player
23 |     '''
24 | 
25 |     def __init__(self, game: 'BridgeGame'):
26 |         ''' Initialize the class BridgeJudger
27 |         :param game: BridgeGame
28 |         '''
29 |         self.game: BridgeGame = game
30 | 
31 |     def get_legal_actions(self) -> List[ActionEvent]:
32 |         """
33 |         :return: List[ActionEvent] of legal actions
34 |         """
35 |         legal_actions: List[ActionEvent] = []
36 |         if not self.game.is_over():
37 |             current_player = self.game.round.get_current_player()
38 |             if not self.game.round.is_bidding_over():
39 |                 legal_actions.append(PassAction())
40 |                 last_make_bid_move: MakeBidMove or None = None
41 |                 last_dbl_move: MakeDblMove or None = None
42 |                 last_rdbl_move: MakeRdblMove or None = None
43 |                 for move in reversed(self.game.round.move_sheet):
44 |                     if isinstance(move, MakeBidMove):
45 |                         last_make_bid_move = move
46 |                         break
47 |                     elif isinstance(move, MakeRdblMove):
48 |                         last_rdbl_move = move
49 |                     elif isinstance(move, MakeDblMove) and not last_rdbl_move:
50 |                         last_dbl_move = move
51 |                 first_bid_action_id = ActionEvent.first_bid_action_id
52 |                 next_bid_action_id = last_make_bid_move.action.action_id + 1 if last_make_bid_move else first_bid_action_id
53 |                 for bid_action_id in range(next_bid_action_id, first_bid_action_id + 35):
54 |                     action = BidAction.from_action_id(action_id=bid_action_id)
55 |                     legal_actions.append(action)
56 |                 if last_make_bid_move and last_make_bid_move.player.player_id % 2 != current_player.player_id % 2 and not last_dbl_move and not last_rdbl_move:
57 |                     legal_actions.append(DblAction())
58 |                 if last_dbl_move and last_dbl_move.player.player_id % 2 != current_player.player_id % 2:
59 |                     legal_actions.append(RdblAction())
60 |             else:
61 |                 trick_moves = self.game.round.get_trick_moves()
62 |                 hand = self.game.round.players[current_player.player_id].hand
63 |                 legal_cards = hand
64 |                 if trick_moves and len(trick_moves) < 4:
65 |                     led_card: BridgeCard = trick_moves[0].card
66 |                     cards_of_led_suit = [card for card in hand if card.suit == led_card.suit]
67 |                     if cards_of_led_suit:
68 |                         legal_cards = cards_of_led_suit
69 |                 for card in legal_cards:
70 |                     action = PlayCardAction(card=card)
71 |                     legal_actions.append(action)
72 |         return legal_actions
73 | 


--------------------------------------------------------------------------------
/rlcard/games/bridge/player.py:
--------------------------------------------------------------------------------
 1 | '''
 2 |     File name: bridge/player.py
 3 |     Author: William Hale
 4 |     Date created: 11/25/2021
 5 | '''
 6 | 
 7 | from typing import List
 8 | 
 9 | from .utils.bridge_card import BridgeCard
10 | 
11 | 
12 | class BridgePlayer:
13 | 
14 |     def __init__(self, player_id: int, np_random):
15 |         ''' Initialize a BridgePlayer player class
16 | 
17 |         Args:
18 |             player_id (int): id for the player
19 |         '''
20 |         if player_id < 0 or player_id > 3:
21 |             raise Exception(f'BridgePlayer has invalid player_id: {player_id}')
22 |         self.np_random = np_random
23 |         self.player_id: int = player_id
24 |         self.hand: List[BridgeCard] = []
25 | 
26 |     def remove_card_from_hand(self, card: BridgeCard):
27 |         self.hand.remove(card)
28 | 
29 |     def __str__(self):
30 |         return ['N', 'E', 'S', 'W'][self.player_id]
31 | 


--------------------------------------------------------------------------------
/rlcard/games/bridge/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/datamllab/rlcard/d7d0a957baf4cc7225a50522adb0164bf130a9d0/rlcard/games/bridge/utils/__init__.py


--------------------------------------------------------------------------------
/rlcard/games/bridge/utils/bridge_card.py:
--------------------------------------------------------------------------------
 1 | '''
 2 |     File name: bridge/utils/bridge_card.py
 3 |     Author: William Hale
 4 |     Date created: 11/25/2021
 5 | '''
 6 | 
 7 | from rlcard.games.base import Card
 8 | 
 9 | 
10 | class BridgeCard(Card):
11 | 
12 |     suits = ['C', 'D', 'H', 'S']
13 |     ranks = ['2', '3', '4', '5', '6', '7', '8', '9', 'T', 'J', 'Q', 'K', 'A']
14 | 
15 |     @staticmethod
16 |     def card(card_id: int):
17 |         return _deck[card_id]
18 | 
19 |     @staticmethod
20 |     def get_deck() -> [Card]:
21 |         return _deck.copy()
22 | 
23 |     def __init__(self, suit: str, rank: str):
24 |         super().__init__(suit=suit, rank=rank)
25 |         suit_index = BridgeCard.suits.index(self.suit)
26 |         rank_index = BridgeCard.ranks.index(self.rank)
27 |         self.card_id = 13 * suit_index + rank_index
28 | 
29 |     def __str__(self):
30 |         return f'{self.rank}{self.suit}'
31 | 
32 |     def __repr__(self):
33 |         return f'{self.rank}{self.suit}'
34 | 
35 | 
36 | # deck is always in order from 2C, ... KC, AC, 2D, ... KD, AD, 2H, ... KH, AH, 2S, ... KS, AS
37 | _deck = [BridgeCard(suit=suit, rank=rank) for suit in BridgeCard.suits for rank in BridgeCard.ranks]  # want this to be read-only
38 | 


--------------------------------------------------------------------------------
/rlcard/games/bridge/utils/move.py:
--------------------------------------------------------------------------------
 1 | '''
 2 |     File name: bridge/utils/move.py
 3 |     Author: William Hale
 4 |     Date created: 11/25/2021
 5 | '''
 6 | 
 7 | #
 8 | #   These classes are used to keep a move_sheet history of the moves in a round.
 9 | #
10 | 
11 | from .action_event import ActionEvent, BidAction, PassAction, DblAction, RdblAction, PlayCardAction
12 | from .bridge_card import BridgeCard
13 | 
14 | from ..player import BridgePlayer
15 | 
16 | 
17 | class BridgeMove(object):  # Interface
18 |     pass
19 | 
20 | 
21 | class PlayerMove(BridgeMove):  # Interface
22 | 
23 |     def __init__(self, player: BridgePlayer, action: ActionEvent):
24 |         super().__init__()
25 |         self.player = player
26 |         self.action = action
27 | 
28 | 
29 | class CallMove(PlayerMove):  # Interface
30 | 
31 |     def __init__(self, player: BridgePlayer, action: ActionEvent):
32 |         super().__init__(player=player, action=action)
33 | 
34 | 
35 | class DealHandMove(BridgeMove):
36 | 
37 |     def __init__(self, dealer: BridgePlayer, shuffled_deck: [BridgeCard]):
38 |         super().__init__()
39 |         self.dealer = dealer
40 |         self.shuffled_deck = shuffled_deck
41 | 
42 |     def __str__(self):
43 |         shuffled_deck_text = " ".join([str(card) for card in self.shuffled_deck])
44 |         return f'{self.dealer} deal shuffled_deck=[{shuffled_deck_text}]'
45 | 
46 | 
47 | class MakePassMove(CallMove):
48 | 
49 |     def __init__(self, player: BridgePlayer):
50 |         super().__init__(player=player, action=PassAction())
51 | 
52 |     def __str__(self):
53 |         return f'{self.player} {self.action}'
54 | 
55 | 
56 | class MakeDblMove(CallMove):
57 | 
58 |     def __init__(self, player: BridgePlayer):
59 |         super().__init__(player=player, action=DblAction())
60 | 
61 |     def __str__(self):
62 |         return f'{self.player} {self.action}'
63 | 
64 | 
65 | class MakeRdblMove(CallMove):
66 | 
67 |     def __init__(self, player: BridgePlayer):
68 |         super().__init__(player=player, action=RdblAction())
69 | 
70 |     def __str__(self):
71 |         return f'{self.player} {self.action}'
72 | 
73 | 
74 | class MakeBidMove(CallMove):
75 | 
76 |     def __init__(self, player: BridgePlayer, bid_action: BidAction):
77 |         super().__init__(player=player, action=bid_action)
78 |         self.action = bid_action  # Note: keep type as BidAction rather than ActionEvent
79 | 
80 |     def __str__(self):
81 |         return f'{self.player} bids {self.action}'
82 | 
83 | 
84 | class PlayCardMove(PlayerMove):
85 | 
86 |     def __init__(self, player: BridgePlayer, action: PlayCardAction):
87 |         super().__init__(player=player, action=action)
88 |         self.action = action  # Note: keep type as PlayCardAction rather than ActionEvent
89 | 
90 |     @property
91 |     def card(self):
92 |         return self.action.card
93 | 
94 |     def __str__(self):
95 |         return f'{self.player} plays {self.action}'
96 | 


--------------------------------------------------------------------------------
/rlcard/games/bridge/utils/tray.py:
--------------------------------------------------------------------------------
 1 | '''
 2 |     File name: bridge/utils/tray.py
 3 |     Author: William Hale
 4 |     Date created: 11/28/2021
 5 | '''
 6 | 
 7 | 
 8 | class Tray(object):
 9 | 
10 |     def __init__(self, board_id: int):
11 |         if board_id <= 0:
12 |             raise Exception(f'Tray: invalid board_id={board_id}')
13 |         self.board_id = board_id
14 | 
15 |     @property
16 |     def dealer_id(self):
17 |         return (self.board_id - 1) % 4
18 | 
19 |     @property
20 |     def vul(self):
21 |         vul_none = [0, 0, 0, 0]
22 |         vul_n_s = [1, 0, 1, 0]
23 |         vul_e_w = [0, 1, 0, 1]
24 |         vul_all = [1, 1, 1, 1]
25 |         basic_vuls = [vul_none, vul_n_s, vul_e_w, vul_all]
26 |         offset = (self.board_id - 1) // 4
27 |         return basic_vuls[(self.board_id - 1 + offset) % 4]
28 | 
29 |     def __str__(self):
30 |         return f'{self.board_id}: dealer_id={self.dealer_id} vul={self.vul}'
31 | 


--------------------------------------------------------------------------------
/rlcard/games/bridge/utils/utils.py:
--------------------------------------------------------------------------------
 1 | '''
 2 |     File name: bridge/utils/utils.py
 3 |     Author: William Hale
 4 |     Date created: 11/26/2021
 5 | '''
 6 | 
 7 | from typing import List
 8 | 
 9 | import numpy as np
10 | 
11 | from .bridge_card import BridgeCard
12 | 
13 | 
14 | def encode_cards(cards: List[BridgeCard]) -> np.ndarray:  # Note: not used ??
15 |     plane = np.zeros(52, dtype=int)
16 |     for card in cards:
17 |         plane[card.card_id] = 1
18 |     return plane
19 | 


--------------------------------------------------------------------------------
/rlcard/games/doudizhu/__init__.py:
--------------------------------------------------------------------------------
1 | from rlcard.games.doudizhu.dealer import DoudizhuDealer as Dealer
2 | from rlcard.games.doudizhu.judger import DoudizhuJudger as Judger
3 | from rlcard.games.doudizhu.player import DoudizhuPlayer as Player
4 | from rlcard.games.doudizhu.round import DoudizhuRound as Round
5 | from rlcard.games.doudizhu.game import DoudizhuGame as Game
6 | 
7 | 


--------------------------------------------------------------------------------
/rlcard/games/doudizhu/dealer.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | ''' Implement Doudizhu Dealer class
 3 | '''
 4 | import functools
 5 | 
 6 | from rlcard.utils import init_54_deck
 7 | from rlcard.games.doudizhu.utils import cards2str, doudizhu_sort_card
 8 | 
 9 | class DoudizhuDealer:
10 |     ''' Dealer will shuffle, deal cards, and determine players' roles
11 |     '''
12 |     def __init__(self, np_random):
13 |         '''Give dealer the deck
14 | 
15 |         Notes:
16 |             1. deck with 54 cards including black joker and red joker
17 |         '''
18 |         self.np_random = np_random
19 |         self.deck = init_54_deck()
20 |         self.deck.sort(key=functools.cmp_to_key(doudizhu_sort_card))
21 |         self.landlord = None
22 | 
23 |     def shuffle(self):
24 |         ''' Randomly shuffle the deck
25 |         '''
26 |         self.np_random.shuffle(self.deck)
27 | 
28 |     def deal_cards(self, players):
29 |         ''' Deal cards to players
30 | 
31 |         Args:
32 |             players (list): list of DoudizhuPlayer objects
33 |         '''
34 |         hand_num = (len(self.deck) - 3) // len(players)
35 |         for index, player in enumerate(players):
36 |             current_hand = self.deck[index*hand_num:(index+1)*hand_num]
37 |             current_hand.sort(key=functools.cmp_to_key(doudizhu_sort_card))
38 |             player.set_current_hand(current_hand)
39 |             player.initial_hand = cards2str(player.current_hand)
40 | 
41 |     def determine_role(self, players):
42 |         ''' Determine landlord and peasants according to players' hand
43 | 
44 |         Args:
45 |             players (list): list of DoudizhuPlayer objects
46 | 
47 |         Returns:
48 |             int: landlord's player_id
49 |         '''
50 |         # deal cards
51 |         self.shuffle()
52 |         self.deal_cards(players)
53 |         players[0].role = 'landlord'
54 |         self.landlord = players[0]
55 |         players[1].role = 'peasant'
56 |         players[2].role = 'peasant'
57 |         #players[0].role = 'peasant'
58 |         #self.landlord = players[0]
59 | 
60 |         ## determine 'landlord'
61 |         #max_score = get_landlord_score(
62 |         #    cards2str(self.landlord.current_hand))
63 |         #for player in players[1:]:
64 |         #    player.role = 'peasant'
65 |         #    score = get_landlord_score(
66 |         #        cards2str(player.current_hand))
67 |         #    if score > max_score:
68 |         #        max_score = score
69 |         #        self.landlord = player
70 |         #self.landlord.role = 'landlord'
71 | 
72 |         # give the 'landlord' the  three cards
73 |         self.landlord.current_hand.extend(self.deck[-3:])
74 |         self.landlord.current_hand.sort(key=functools.cmp_to_key(doudizhu_sort_card))
75 |         self.landlord.initial_hand = cards2str(self.landlord.current_hand)
76 |         return self.landlord.player_id
77 | 


--------------------------------------------------------------------------------
/rlcard/games/doudizhu/jsondata.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/datamllab/rlcard/d7d0a957baf4cc7225a50522adb0164bf130a9d0/rlcard/games/doudizhu/jsondata.zip


--------------------------------------------------------------------------------
/rlcard/games/gin_rummy/__init__.py:
--------------------------------------------------------------------------------
1 | from rlcard.games.gin_rummy.game import GinRummyGame as Game
2 | 


--------------------------------------------------------------------------------
/rlcard/games/gin_rummy/dealer.py:
--------------------------------------------------------------------------------
 1 | '''
 2 |     File name: gin_rummy/dealer.py
 3 |     Author: William Hale
 4 |     Date created: 2/12/2020
 5 | '''
 6 | 
 7 | from .player import GinRummyPlayer
 8 | from .utils import utils as utils
 9 | 
10 | 
11 | class GinRummyDealer:
12 |     ''' Initialize a GinRummy dealer class
13 |     '''
14 |     def __init__(self, np_random):
15 |         ''' Empty discard_pile, set shuffled_deck, set stock_pile
16 |         '''
17 |         self.np_random = np_random
18 |         self.discard_pile = []  # type: List[Card]
19 |         self.shuffled_deck = utils.get_deck()  # keep a copy of the shuffled cards at start of new hand
20 |         self.np_random.shuffle(self.shuffled_deck)
21 |         self.stock_pile = self.shuffled_deck.copy()  # type: List[Card]
22 | 
23 |     def deal_cards(self, player: GinRummyPlayer, num: int):
24 |         ''' Deal some cards from stock_pile to one player
25 | 
26 |         Args:
27 |             player (GinRummyPlayer): The GinRummyPlayer object
28 |             num (int): The number of cards to be dealt
29 |         '''
30 |         for _ in range(num):
31 |             player.hand.append(self.stock_pile.pop())
32 |         player.did_populate_hand()
33 | 


--------------------------------------------------------------------------------
/rlcard/games/gin_rummy/utils/__init__.py:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/rlcard/games/gin_rummy/utils/gin_rummy_error.py:
--------------------------------------------------------------------------------
 1 | '''
 2 |     Project: Gin Rummy
 3 |     File name: gin_rummy/utils/gin_rummy_error.py
 4 |     Author: William Hale
 5 |     Date created: 4/29/2020
 6 | '''
 7 | 
 8 | 
 9 | class GinRummyError(Exception):
10 |     pass
11 | 
12 | 
13 | class GinRummyProgramError(GinRummyError):
14 |     pass
15 | 


--------------------------------------------------------------------------------
/rlcard/games/gin_rummy/utils/scorers.py:
--------------------------------------------------------------------------------
 1 | '''
 2 |     File name: gin_rummy/scorers.py
 3 |     Author: William Hale
 4 |     Date created: 2/15/2020
 5 | '''
 6 | 
 7 | from typing import TYPE_CHECKING
 8 | if TYPE_CHECKING:
 9 |     from ..game import GinRummyGame
10 | 
11 | from typing import Callable
12 | 
13 | from .action_event import *
14 | from ..player import GinRummyPlayer
15 | from .move import ScoreNorthMove, ScoreSouthMove
16 | from .gin_rummy_error import GinRummyProgramError
17 | 
18 | from rlcard.games.gin_rummy.utils import melding
19 | from rlcard.games.gin_rummy.utils import utils
20 | 
21 | 
22 | class GinRummyScorer:
23 | 
24 |     def __init__(self, name: str = None, get_payoff: Callable[[GinRummyPlayer, 'GinRummyGame'], int or float] = None):
25 |         self.name = name if name is not None else "GinRummyScorer"
26 |         self.get_payoff = get_payoff if get_payoff else get_payoff_gin_rummy_v1
27 | 
28 |     def get_payoffs(self, game: 'GinRummyGame'):
29 |         payoffs = [0, 0]
30 |         for i in range(2):
31 |             player = game.round.players[i]
32 |             payoff = self.get_payoff(player=player, game=game)
33 |             payoffs[i] = payoff
34 |         return payoffs
35 | 
36 | 
37 | def get_payoff_gin_rummy_v0(player: GinRummyPlayer, game: 'GinRummyGame') -> int:
38 |     ''' Get the payoff of player: deadwood_count of player
39 | 
40 |     Returns:
41 |         payoff (int or float): payoff for player (lower is better)
42 |     '''
43 |     moves = game.round.move_sheet
44 |     if player.player_id == 0:
45 |         score_player_move = moves[-2]
46 |         if not isinstance(score_player_move, ScoreNorthMove):
47 |             raise GinRummyProgramError("score_player_move must be ScoreNorthMove.")
48 |     else:
49 |         score_player_move = moves[-1]
50 |         if not isinstance(score_player_move, ScoreSouthMove):
51 |             raise GinRummyProgramError("score_player_move must be ScoreSouthMove.")
52 |     deadwood_count = score_player_move.deadwood_count
53 |     return deadwood_count
54 | 
55 | 
56 | def get_payoff_gin_rummy_v1(player: GinRummyPlayer, game: 'GinRummyGame') -> float:
57 |     ''' Get the payoff of player:
58 |             a) 1.0 if player gins
59 |             b) 0.2 if player knocks
60 |             c) -deadwood_count / 100 otherwise
61 | 
62 |     Returns:
63 |         payoff (int or float): payoff for player (higher is better)
64 |     '''
65 |     # payoff is 1.0 if player gins
66 |     # payoff is 0.2 if player knocks
67 |     # payoff is -deadwood_count / 100 if otherwise
68 |     # The goal is to have the agent learn how to knock and gin.
69 |     # The negative payoff when the agent fails to knock or gin should encourage the agent to form melds.
70 |     # The payoff is scaled to lie between -1 and 1.
71 |     going_out_action = game.round.going_out_action
72 |     going_out_player_id = game.round.going_out_player_id
73 |     if going_out_player_id == player.player_id and isinstance(going_out_action, KnockAction):
74 |         payoff = 0.2
75 |     elif going_out_player_id == player.player_id and isinstance(going_out_action, GinAction):
76 |         payoff = 1
77 |     else:
78 |         hand = player.hand
79 |         best_meld_clusters = melding.get_best_meld_clusters(hand=hand)
80 |         best_meld_cluster = [] if not best_meld_clusters else best_meld_clusters[0]
81 |         deadwood_count = utils.get_deadwood_count(hand, best_meld_cluster)
82 |         payoff = -deadwood_count / 100
83 |     return payoff
84 | 


--------------------------------------------------------------------------------
/rlcard/games/gin_rummy/utils/thinker.py:
--------------------------------------------------------------------------------
 1 | '''
 2 |     File name: gin_rummy/thinker.py
 3 |     Author: William Hale
 4 |     Date created: 3/28/2020
 5 | '''
 6 | 
 7 | from typing import List
 8 | 
 9 | from rlcard.games.base import Card
10 | 
11 | from . import melding
12 | from . import utils
13 | 
14 | 
15 | class Thinker(object):
16 | 
17 |     def __init__(self, hand: List[Card]):
18 |         self.hand = hand
19 | 
20 |     # simple thinking
21 |     def get_meld_piles_with_discard_card(self, discard_card: Card) -> List[List[Card]]:
22 |         next_hand = self.hand + [discard_card]
23 |         meld_clusters = melding.get_meld_clusters(hand=next_hand)
24 |         best_deadwood_count = 999
25 |         best_deadwoods = []  # type: List[List[Card]]
26 |         best_meld_clusters = []  # type: List[List[List[Card]]]
27 |         for meld_cluster in meld_clusters:
28 |             meld_cards = [card for meld_pile in meld_cluster for card in meld_pile]
29 |             deadwood = [card for card in next_hand if card not in meld_cards]
30 |             deadwood_count = self._get_deadwood_count(deadwood=deadwood)
31 |             if deadwood_count < best_deadwood_count:
32 |                 best_deadwood_count = deadwood_count
33 |                 best_deadwoods = [deadwood]
34 |                 best_meld_clusters = [meld_cluster]
35 |             elif deadwood_count == best_deadwood_count:
36 |                 best_deadwoods.append(deadwood)
37 |                 best_meld_clusters.append(meld_cluster)
38 |         want_discard_card = False
39 |         for deadwood in best_deadwoods:
40 |             if discard_card in deadwood:
41 |                 want_discard_card = False
42 |                 break
43 |             else:
44 |                 want_discard_card = True
45 |         result = []  # type: List[List[Card]]
46 |         if want_discard_card:
47 |             for meld_cluster in best_meld_clusters:
48 |                 for meld_pile in meld_cluster:
49 |                     if discard_card in meld_pile:
50 |                         result.append(meld_pile)
51 |         return result
52 | 
53 |     @staticmethod
54 |     def _get_deadwood_count(deadwood: List[Card]) -> int:
55 |         deadwood_values = [utils.get_deadwood_value(card) for card in deadwood]
56 |         return sum(deadwood_values)
57 | 


--------------------------------------------------------------------------------
/rlcard/games/leducholdem/__init__.py:
--------------------------------------------------------------------------------
1 | from rlcard.games.leducholdem.dealer import LeducholdemDealer as Dealer
2 | from rlcard.games.leducholdem.judger import LeducholdemJudger as Judger
3 | from rlcard.games.leducholdem.player import LeducholdemPlayer as Player
4 | from rlcard.games.leducholdem.round import LeducholdemRound as Round
5 | from rlcard.games.leducholdem.game import LeducholdemGame as Game
6 | 
7 | 


--------------------------------------------------------------------------------
/rlcard/games/leducholdem/card2index.json:
--------------------------------------------------------------------------------
1 | {"SJ": 0, "SQ": 1, "SK": 2, "HJ": 0, "HQ": 1, "HK": 2}
2 | 


--------------------------------------------------------------------------------
/rlcard/games/leducholdem/dealer.py:
--------------------------------------------------------------------------------
 1 | from rlcard.games.base import Card
 2 | from rlcard.games.limitholdem import Dealer
 3 | 
 4 | class LeducholdemDealer(Dealer):
 5 | 
 6 |     def __init__(self, np_random):
 7 |         ''' Initialize a leducholdem dealer class
 8 |         '''
 9 |         self.np_random = np_random
10 |         self.deck = [Card('S', 'J'), Card('H', 'J'), Card('S', 'Q'), Card('H', 'Q'), Card('S', 'K'), Card('H', 'K')]
11 |         self.shuffle()
12 |         self.pot = 0
13 | 


--------------------------------------------------------------------------------
/rlcard/games/leducholdem/judger.py:
--------------------------------------------------------------------------------
 1 | from rlcard.utils.utils import rank2int
 2 | 
 3 | class LeducholdemJudger:
 4 |     ''' The Judger class for Leduc Hold'em
 5 |     '''
 6 |     def __init__(self, np_random):
 7 |         ''' Initialize a judger class
 8 |         '''
 9 |         self.np_random = np_random
10 | 
11 |     @staticmethod
12 |     def judge_game(players, public_card):
13 |         ''' Judge the winner of the game.
14 | 
15 |         Args:
16 |             players (list): The list of players who play the game
17 |             public_card (object): The public card that seen by all the players
18 | 
19 |         Returns:
20 |             (list): Each entry of the list corresponds to one entry of the
21 |         '''
22 |         # Judge who are the winners
23 |         winners = [0] * len(players)
24 |         fold_count = 0
25 |         ranks = []
26 |         # If every player folds except one, the alive player is the winner
27 |         for idx, player in enumerate(players):
28 |             ranks.append(rank2int(player.hand.rank))
29 |             if player.status == 'folded':
30 |                fold_count += 1
31 |             elif player.status == 'alive':
32 |                 alive_idx = idx
33 |         if fold_count == (len(players) - 1):
34 |             winners[alive_idx] = 1
35 |         
36 |         # If any of the players matches the public card wins
37 |         if sum(winners) < 1:
38 |             for idx, player in enumerate(players):
39 |                 if player.hand.rank == public_card.rank:
40 |                     winners[idx] = 1
41 |                     break
42 |         
43 |         # If non of the above conditions, the winner player is the one with the highest card rank
44 |         if sum(winners) < 1:
45 |             max_rank = max(ranks)
46 |             max_index = [i for i, j in enumerate(ranks) if j == max_rank]
47 |             for idx in max_index:
48 |                 winners[idx] = 1
49 | 
50 |         # Compute the total chips
51 |         total = 0
52 |         for p in players:
53 |             total += p.in_chips
54 | 
55 |         each_win = float(total) / sum(winners)
56 | 
57 |         payoffs = []
58 |         for i, _ in enumerate(players):
59 |             if winners[i] == 1:
60 |                 payoffs.append(each_win - players[i].in_chips)
61 |             else:
62 |                 payoffs.append(float(-players[i].in_chips))
63 | 
64 |         return payoffs
65 | 


--------------------------------------------------------------------------------
/rlcard/games/leducholdem/player.py:
--------------------------------------------------------------------------------
 1 | class LeducholdemPlayer:
 2 | 
 3 |     def __init__(self, player_id, np_random):
 4 |         ''' Initilize a player.
 5 | 
 6 |         Args:
 7 |             player_id (int): The id of the player
 8 |         '''
 9 |         self.np_random = np_random
10 |         self.player_id = player_id
11 |         self.status = 'alive'
12 |         self.hand = None
13 | 
14 |         # The chips that this player has put in until now
15 |         self.in_chips = 0
16 | 
17 |     def get_state(self, public_card, all_chips, legal_actions):
18 |         ''' Encode the state for the player
19 | 
20 |         Args:
21 |             public_card (object): The public card that seen by all the players
22 |             all_chips (int): The chips that all players have put in
23 | 
24 |         Returns:
25 |             (dict): The state of the player
26 |         '''
27 |         state = {}
28 |         state['hand'] = self.hand.get_index()
29 |         state['public_card'] = public_card.get_index() if public_card else None
30 |         state['all_chips'] = all_chips
31 |         state['my_chips'] = self.in_chips
32 |         state['legal_actions'] = legal_actions
33 |         return state
34 | 
35 |     def get_player_id(self):
36 |         ''' Return the id of the player
37 |         '''
38 |         return self.player_id
39 | 


--------------------------------------------------------------------------------
/rlcard/games/leducholdem/round.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | ''' Implement Leduc Hold'em Round class
 3 | '''
 4 | 
 5 | from rlcard.games.limitholdem import Round
 6 | 
 7 | class LeducholdemRound(Round):
 8 |     ''' Round can call other Classes' functions to keep the game running
 9 |     '''
10 | 
11 |     def __init__(self, raise_amount, allowed_raise_num, num_players, np_random):
12 |         ''' Initilize the round class
13 | 
14 |         Args:
15 |             raise_amount (int): the raise amount for each raise
16 |             allowed_raise_num (int): The number of allowed raise num
17 |             num_players (int): The number of players
18 |         '''
19 |         super(LeducholdemRound, self).__init__(raise_amount, allowed_raise_num, num_players, np_random=np_random)
20 | 


--------------------------------------------------------------------------------
/rlcard/games/limitholdem/__init__.py:
--------------------------------------------------------------------------------
1 | from rlcard.games.limitholdem.dealer import LimitHoldemDealer as Dealer
2 | from rlcard.games.limitholdem.judger import LimitHoldemJudger as Judger
3 | from rlcard.games.limitholdem.player import LimitHoldemPlayer as Player
4 | from rlcard.games.limitholdem.player import PlayerStatus
5 | from rlcard.games.limitholdem.round import LimitHoldemRound as Round
6 | from rlcard.games.limitholdem.game import LimitHoldemGame as Game
7 | 
8 | 


--------------------------------------------------------------------------------
/rlcard/games/limitholdem/card2index.json:
--------------------------------------------------------------------------------
1 | {"SA": 0, "S2": 1, "S3": 2, "S4": 3, "S5": 4, "S6": 5, "S7": 6, "S8": 7, "S9": 8, "ST": 9, "SJ": 10, "SQ": 11, "SK": 12, "HA": 13, "H2": 14, "H3": 15, "H4": 16, "H5": 17, "H6": 18, "H7": 19, "H8": 20, "H9": 21, "HT": 22, "HJ": 23, "HQ": 24, "HK": 25, "DA": 26, "D2": 27, "D3": 28, "D4": 29, "D5": 30, "D6": 31, "D7": 32, "D8": 33, "D9": 34, "DT": 35, "DJ": 36, "DQ": 37, "DK": 38, "CA": 39, "C2": 40, "C3": 41, "C4": 42, "C5": 43, "C6": 44, "C7": 45, "C8": 46, "C9": 47, "CT": 48, "CJ": 49, "CQ": 50, "CK": 51}
2 | 


--------------------------------------------------------------------------------
/rlcard/games/limitholdem/dealer.py:
--------------------------------------------------------------------------------
 1 | from rlcard.utils.utils import init_standard_deck
 2 | 
 3 | 
 4 | class LimitHoldemDealer:
 5 |     def __init__(self, np_random):
 6 |         self.np_random = np_random
 7 |         self.deck = init_standard_deck()
 8 |         self.shuffle()
 9 |         self.pot = 0
10 | 
11 |     def shuffle(self):
12 |         self.np_random.shuffle(self.deck)
13 | 
14 |     def deal_card(self):
15 |         """
16 |         Deal one card from the deck
17 | 
18 |         Returns:
19 |             (Card): The drawn card from the deck
20 |         """
21 |         return self.deck.pop()
22 | 


--------------------------------------------------------------------------------
/rlcard/games/limitholdem/player.py:
--------------------------------------------------------------------------------
 1 | from enum import Enum
 2 | 
 3 | 
 4 | class PlayerStatus(Enum):
 5 |     ALIVE = 0
 6 |     FOLDED = 1
 7 |     ALLIN = 2
 8 | 
 9 | 
10 | class LimitHoldemPlayer:
11 | 
12 |     def __init__(self, player_id, np_random):
13 |         """
14 |         Initialize a player.
15 | 
16 |         Args:
17 |             player_id (int): The id of the player
18 |         """
19 |         self.np_random = np_random
20 |         self.player_id = player_id
21 |         self.hand = []
22 |         self.status = PlayerStatus.ALIVE
23 | 
24 |         # The chips that this player has put in until now
25 |         self.in_chips = 0
26 | 
27 |     def get_state(self, public_cards, all_chips, legal_actions):
28 |         """
29 |         Encode the state for the player
30 | 
31 |         Args:
32 |             public_cards (list): A list of public cards that seen by all the players
33 |             all_chips (int): The chips that all players have put in
34 | 
35 |         Returns:
36 |             (dict): The state of the player
37 |         """
38 |         return {
39 |             'hand': [c.get_index() for c in self.hand],
40 |             'public_cards': [c.get_index() for c in public_cards],
41 |             'all_chips': all_chips,
42 |             'my_chips': self.in_chips,
43 |             'legal_actions': legal_actions
44 |         }
45 | 
46 |     def get_player_id(self):
47 |         return self.player_id
48 | 


--------------------------------------------------------------------------------
/rlcard/games/mahjong/__init__.py:
--------------------------------------------------------------------------------
1 | from rlcard.games.mahjong.dealer import MahjongDealer as Dealer
2 | from rlcard.games.mahjong.card import MahjongCard as Card
3 | from rlcard.games.mahjong.player import MahjongPlayer as Player
4 | from rlcard.games.mahjong.judger import MahjongJudger as Judger
5 | from rlcard.games.mahjong.round import MahjongRound as Round
6 | from rlcard.games.mahjong.game import MahjongGame as Game
7 | 
8 | 


--------------------------------------------------------------------------------
/rlcard/games/mahjong/card.py:
--------------------------------------------------------------------------------
 1 | 
 2 | class MahjongCard:
 3 | 
 4 |     info = {'type':  ['dots', 'bamboo', 'characters', 'dragons', 'winds'],
 5 |             'trait': ['1', '2', '3', '4', '5', '6', '7', '8', '9', 'green', 'red', 'white', 'east', 'west', 'north', 'south']
 6 |             }
 7 | 
 8 |     def __init__(self, card_type, trait):
 9 |         ''' Initialize the class of MahjongCard
10 | 
11 |         Args:
12 |             card_type (str): The type of card
13 |             trait (str): The trait of card
14 |         '''
15 |         self.type = card_type
16 |         self.trait = trait
17 |         self.index_num = 0
18 | 
19 |     def get_str(self):
20 |         ''' Get the string representation of card
21 | 
22 |         Return:
23 |             (str): The string of card's color and trait
24 |         '''
25 |         return self.type+ '-'+ self.trait
26 | 
27 |     def set_index_num(self, index_num):
28 | 
29 |         self.index_num = index_num
30 |         
31 | 
32 | 


--------------------------------------------------------------------------------
/rlcard/games/mahjong/dealer.py:
--------------------------------------------------------------------------------
 1 | from rlcard.games.mahjong.utils import init_deck
 2 | 
 3 | 
 4 | class MahjongDealer:
 5 |     ''' Initialize a mahjong dealer class
 6 |     '''
 7 |     def __init__(self, np_random):
 8 |         self.np_random = np_random
 9 |         self.deck = init_deck()
10 |         self.shuffle()
11 |         self.table = []
12 | 
13 |     def shuffle(self):
14 |         ''' Shuffle the deck
15 |         '''
16 |         self.np_random.shuffle(self.deck)
17 | 
18 |     def deal_cards(self, player, num):
19 |         ''' Deal some cards from deck to one player
20 | 
21 |         Args:
22 |             player (object): The object of DoudizhuPlayer
23 |             num (int): The number of cards to be dealed
24 |         '''
25 |         for _ in range(num):
26 |             player.hand.append(self.deck.pop())
27 | 
28 | 
29 | ## For test
30 | #if __name__ == '__main__':
31 | #    dealer = MahjongDealer()
32 | #    for card in dealer.deck:
33 | #        print(card.get_str())
34 | #    print(len(dealer.deck))
35 | 


--------------------------------------------------------------------------------
/rlcard/games/mahjong/player.py:
--------------------------------------------------------------------------------
 1 | 
 2 | class MahjongPlayer:
 3 | 
 4 |     def __init__(self, player_id, np_random):
 5 |         ''' Initilize a player.
 6 | 
 7 |         Args:
 8 |             player_id (int): The id of the player
 9 |         '''
10 |         self.np_random = np_random
11 |         self.player_id = player_id
12 |         self.hand = []
13 |         self.pile = []
14 | 
15 |     def get_player_id(self):
16 |         ''' Return the id of the player
17 |         '''
18 | 
19 |         return self.player_id
20 | 
21 |     def print_hand(self):
22 |         ''' Print the cards in hand in string.
23 |         '''
24 |         print([c.get_str() for c in self.hand])
25 | 
26 |     def print_pile(self):
27 |         ''' Print the cards in pile of the player in string.
28 |         '''
29 |         print([[c.get_str() for c in s]for s in self.pile])
30 | 
31 |     def play_card(self, dealer, card):
32 |         ''' Play one card
33 |         Args:
34 |             dealer (object): Dealer
35 |             Card (object): The card to be play.
36 |         '''
37 |         card = self.hand.pop(self.hand.index(card))
38 |         dealer.table.append(card)
39 | 
40 |     def chow(self, dealer, cards):
41 |         ''' Perform Chow
42 |         Args:
43 |             dealer (object): Dealer
44 |             Cards (object): The cards to be Chow.
45 |         '''
46 |         last_card = dealer.table.pop(-1)
47 |         for card in cards:
48 |             if card in self.hand and card != last_card:
49 |                 self.hand.pop(self.hand.index(card))
50 |         self.pile.append(cards)
51 | 
52 |     def gong(self, dealer, cards):
53 |         ''' Perform Gong
54 |         Args:
55 |             dealer (object): Dealer
56 |             Cards (object): The cards to be Gong.
57 |         '''
58 |         for card in cards:
59 |             if card in self.hand:
60 |                 self.hand.pop(self.hand.index(card))
61 |         self.pile.append(cards)
62 | 
63 |     def pong(self, dealer, cards):
64 |         ''' Perform Pong
65 |         Args:
66 |             dealer (object): Dealer
67 |             Cards (object): The cards to be Pong.
68 |         '''
69 |         for card in cards:
70 |             if card in self.hand:
71 |                 self.hand.pop(self.hand.index(card))
72 |         self.pile.append(cards)
73 | 


--------------------------------------------------------------------------------
/rlcard/games/mahjong/utils.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from rlcard.games.mahjong.card import MahjongCard as Card
 3 | 
 4 | 
 5 | card_encoding_dict = {}
 6 | num = 0
 7 | for _type in ['bamboo', 'characters', 'dots']:
 8 |     for _trait in ['1', '2', '3', '4', '5', '6', '7', '8', '9']:
 9 |         card = _type+"-"+_trait
10 |         card_encoding_dict[card] = num
11 |         num += 1
12 | for _trait in ['green', 'red', 'white']:
13 |     card = 'dragons-'+_trait
14 |     card_encoding_dict[card] = num
15 |     num += 1
16 | 
17 | for _trait in ['east', 'west', 'north', 'south']:
18 |     card = 'winds-'+_trait
19 |     card_encoding_dict[card] = num
20 |     num += 1
21 | card_encoding_dict['pong'] = num
22 | card_encoding_dict['chow'] = num + 1
23 | card_encoding_dict['gong'] = num + 2
24 | card_encoding_dict['stand'] = num + 3
25 | 
26 | card_decoding_dict = {card_encoding_dict[key]: key for key in card_encoding_dict.keys()}
27 | 
28 | def init_deck():
29 |     deck = []
30 |     info = Card.info
31 |     for _type in info['type']:
32 |         index_num = 0
33 |         if _type != 'dragons' and _type != 'winds':
34 |             for _trait in info['trait'][:9]:
35 |                 card = Card(_type, _trait)
36 |                 card.set_index_num(index_num)
37 |                 index_num = index_num + 1
38 |                 deck.append(card)
39 |         elif _type == 'dragons':
40 |             for _trait in info['trait'][9:12]:
41 |                 card = Card(_type, _trait)
42 |                 card.set_index_num(index_num)
43 |                 index_num = index_num + 1
44 |                 deck.append(card)
45 |         else:
46 |             for _trait in info['trait'][12:]:
47 |                 card = Card(_type, _trait)
48 |                 card.set_index_num(index_num)
49 |                 index_num = index_num + 1
50 |                 deck.append(card)
51 |     deck = deck * 4
52 |     return deck
53 | 
54 | 
55 | def pile2list(pile):
56 |     cards_list = []
57 |     for each in pile:
58 |         cards_list.extend(each)
59 |     return cards_list
60 | 
61 | def cards2list(cards):
62 |     cards_list = []
63 |     for each in cards:
64 |         cards_list.append(each.get_str())
65 |     return cards_list
66 | 
67 | 
68 | def encode_cards(cards):
69 |     plane = np.zeros((34,4), dtype=int)
70 |     cards = cards2list(cards)
71 |     for card in list(set(cards)):
72 |         index = card_encoding_dict[card]
73 |         num = cards.count(card)
74 |         plane[index][:num] = 1
75 |     return plane
76 | 


--------------------------------------------------------------------------------
/rlcard/games/nolimitholdem/__init__.py:
--------------------------------------------------------------------------------
1 | from rlcard.games.nolimitholdem.dealer import NolimitholdemDealer as Dealer
2 | from rlcard.games.nolimitholdem.judger import NolimitholdemJudger as Judger
3 | from rlcard.games.nolimitholdem.player import NolimitholdemPlayer as Player
4 | from rlcard.games.nolimitholdem.round import Action
5 | from rlcard.games.nolimitholdem.round import NolimitholdemRound as Round
6 | from rlcard.games.nolimitholdem.game import NolimitholdemGame as Game
7 | 
8 | 


--------------------------------------------------------------------------------
/rlcard/games/nolimitholdem/dealer.py:
--------------------------------------------------------------------------------
1 | from rlcard.games.limitholdem import Dealer
2 | 
3 | 
4 | class NolimitholdemDealer(Dealer):
5 |     pass
6 | 


--------------------------------------------------------------------------------
/rlcard/games/nolimitholdem/judger.py:
--------------------------------------------------------------------------------
1 | from rlcard.games.limitholdem import Judger
2 | 
3 | 
4 | class NolimitholdemJudger(Judger):
5 |     pass
6 | 


--------------------------------------------------------------------------------
/rlcard/games/nolimitholdem/player.py:
--------------------------------------------------------------------------------
 1 | from rlcard.games.limitholdem import Player
 2 | 
 3 | 
 4 | class NolimitholdemPlayer(Player):
 5 |     def __init__(self, player_id, init_chips, np_random):
 6 |         """
 7 |         Initialize a player.
 8 | 
 9 |         Args:
10 |             player_id (int): The id of the player
11 |             init_chips (int): The number of chips the player has initially
12 |         """
13 |         super().__init__(player_id, np_random)
14 |         self.remained_chips = init_chips
15 | 
16 |     def bet(self, chips):
17 |         quantity = chips if chips <= self.remained_chips else self.remained_chips
18 |         self.in_chips += quantity
19 |         self.remained_chips -= quantity
20 | 


--------------------------------------------------------------------------------
/rlcard/games/uno/__init__.py:
--------------------------------------------------------------------------------
1 | from rlcard.games.uno.dealer import UnoDealer as Dealer
2 | from rlcard.games.uno.judger import UnoJudger as Judger
3 | from rlcard.games.uno.player import UnoPlayer as Player
4 | from rlcard.games.uno.round import UnoRound as Round
5 | from rlcard.games.uno.game import UnoGame as Game
6 | 
7 | 


--------------------------------------------------------------------------------
/rlcard/games/uno/card.py:
--------------------------------------------------------------------------------
 1 | from termcolor import colored
 2 | 
 3 | class UnoCard:
 4 | 
 5 |     info = {'type':  ['number', 'action', 'wild'],
 6 |             'color': ['r', 'g', 'b', 'y'],
 7 |             'trait': ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
 8 |                       'skip', 'reverse', 'draw_2', 'wild', 'wild_draw_4']
 9 |             }
10 | 
11 |     def __init__(self, card_type, color, trait):
12 |         ''' Initialize the class of UnoCard
13 | 
14 |         Args:
15 |             card_type (str): The type of card
16 |             color (str): The color of card
17 |             trait (str): The trait of card
18 |         '''
19 |         self.type = card_type
20 |         self.color = color
21 |         self.trait = trait
22 |         self.str = self.get_str()
23 | 
24 |     def get_str(self):
25 |         ''' Get the string representation of card
26 | 
27 |         Return:
28 |             (str): The string of card's color and trait
29 |         '''
30 |         return self.color + '-' + self.trait
31 | 
32 | 
33 |     @staticmethod
34 |     def print_cards(cards, wild_color=False):
35 |         ''' Print out card in a nice form
36 | 
37 |         Args:
38 |             card (str or list): The string form or a list of a UNO card
39 |             wild_color (boolean): True if assign collor to wild cards
40 |         '''
41 |         if isinstance(cards, str):
42 |             cards = [cards]
43 |         for i, card in enumerate(cards):
44 |             if card == 'draw':
45 |                 trait = 'Draw'
46 |             else:
47 |                 color, trait = card.split('-')
48 |                 if trait == 'skip':
49 |                     trait = 'Skip'
50 |                 elif trait == 'reverse':
51 |                     trait = 'Reverse'
52 |                 elif trait == 'draw_2':
53 |                     trait = 'Draw-2'
54 |                 elif trait == 'wild':
55 |                     trait = 'Wild'
56 |                 elif trait == 'wild_draw_4':
57 |                     trait = 'Wild-Draw-4'
58 | 
59 |             if trait == 'Draw' or (trait[:4] == 'Wild' and not wild_color):
60 |                 print(trait, end='')
61 |             elif color == 'r':
62 |                 print(colored(trait, 'red'), end='')
63 |             elif color == 'g':
64 |                 print(colored(trait, 'green'), end='')
65 |             elif color == 'b':
66 |                 print(colored(trait, 'blue'), end='')
67 |             elif color == 'y':
68 |                 print(colored(trait, 'yellow'), end='')
69 | 
70 |             if i < len(cards) - 1:
71 |                 print(', ', end='')
72 | 


--------------------------------------------------------------------------------
/rlcard/games/uno/dealer.py:
--------------------------------------------------------------------------------
 1 | 
 2 | from rlcard.games.uno.utils import init_deck
 3 | 
 4 | 
 5 | class UnoDealer:
 6 |     ''' Initialize a uno dealer class
 7 |     '''
 8 |     def __init__(self, np_random):
 9 |         self.np_random = np_random
10 |         self.deck = init_deck()
11 |         self.shuffle()
12 | 
13 |     def shuffle(self):
14 |         ''' Shuffle the deck
15 |         '''
16 |         self.np_random.shuffle(self.deck)
17 | 
18 |     def deal_cards(self, player, num):
19 |         ''' Deal some cards from deck to one player
20 | 
21 |         Args:
22 |             player (object): The object of DoudizhuPlayer
23 |             num (int): The number of cards to be dealed
24 |         '''
25 |         for _ in range(num):
26 |             player.hand.append(self.deck.pop())
27 | 
28 |     def flip_top_card(self):
29 |         ''' Flip top card when a new game starts
30 | 
31 |         Returns:
32 |             (object): The object of UnoCard at the top of the deck
33 |         '''
34 |         top_card = self.deck.pop()
35 |         while top_card.trait == 'wild_draw_4':
36 |             self.deck.append(top_card)
37 |             self.shuffle()
38 |             top_card = self.deck.pop()
39 |         return top_card
40 | 


--------------------------------------------------------------------------------
/rlcard/games/uno/jsondata/action_space.json:
--------------------------------------------------------------------------------
1 | {"r-0": 0, "r-1": 1, "r-2": 2, "r-3": 3, "r-4": 4, "r-5": 5, "r-6": 6, "r-7": 7, "r-8": 8, "r-9": 9, "r-skip": 10, "r-reverse": 11, "r-draw_2": 12, "r-wild": 13, "r-wild_draw_4": 14, "g-0": 15, "g-1": 16, "g-2": 17, "g-3": 18, "g-4": 19, "g-5": 20, "g-6": 21, "g-7": 22, "g-8": 23, "g-9": 24, "g-skip": 25, "g-reverse": 26, "g-draw_2": 27, "g-wild": 28, "g-wild_draw_4": 29, "b-0": 30, "b-1": 31, "b-2": 32, "b-3": 33, "b-4": 34, "b-5": 35, "b-6": 36, "b-7": 37, "b-8": 38, "b-9": 39, "b-skip": 40, "b-reverse": 41, "b-draw_2": 42, "b-wild": 43, "b-wild_draw_4": 44, "y-0": 45, "y-1": 46, "y-2": 47, "y-3": 48, "y-4": 49, "y-5": 50, "y-6": 51, "y-7": 52, "y-8": 53, "y-9": 54, "y-skip": 55, "y-reverse": 56, "y-draw_2": 57, "y-wild": 58, "y-wild_draw_4": 59, "draw": 60}


--------------------------------------------------------------------------------
/rlcard/games/uno/judger.py:
--------------------------------------------------------------------------------
 1 | 
 2 | class UnoJudger:
 3 | 
 4 |     @staticmethod
 5 |     def judge_winner(players, np_random):
 6 |         ''' Judge the winner of the game
 7 | 
 8 |         Args:
 9 |             players (list): The list of players who play the game
10 | 
11 |         Returns:
12 |             (list): The player id of the winner
13 |         '''
14 |         self.np_random = np_random
15 |         count_1 = len(players[0].hand)
16 |         count_2 = len(players[1].hand)
17 |         if count_1 == count_2:
18 |             return [0, 1]
19 |         if count_1 < count_2:
20 |             return [0]
21 |         return [1]
22 | 


--------------------------------------------------------------------------------
/rlcard/games/uno/player.py:
--------------------------------------------------------------------------------
 1 | 
 2 | class UnoPlayer:
 3 | 
 4 |     def __init__(self, player_id, np_random):
 5 |         ''' Initilize a player.
 6 | 
 7 |         Args:
 8 |             player_id (int): The id of the player
 9 |         '''
10 |         self.np_random = np_random
11 |         self.player_id = player_id
12 |         self.hand = []
13 |         self.stack = []
14 | 
15 |     def get_player_id(self):
16 |         ''' Return the id of the player
17 |         '''
18 | 
19 |         return self.player_id
20 | 


--------------------------------------------------------------------------------
/rlcard/models/__init__.py:
--------------------------------------------------------------------------------
 1 | ''' Register rule-based models or pre-trianed models
 2 | '''
 3 | from rlcard.models.registration import register, load
 4 | 
 5 | register(
 6 |     model_id = 'leduc-holdem-cfr',
 7 |     entry_point='rlcard.models.pretrained_models:LeducHoldemCFRModel')
 8 | 
 9 | register(
10 |     model_id = 'leduc-holdem-rule-v1',
11 |     entry_point='rlcard.models.leducholdem_rule_models:LeducHoldemRuleModelV1')
12 | 
13 | register(
14 |     model_id = 'leduc-holdem-rule-v2',
15 |     entry_point='rlcard.models.leducholdem_rule_models:LeducHoldemRuleModelV2')
16 | 
17 | register(
18 |     model_id = 'uno-rule-v1',
19 |     entry_point='rlcard.models.uno_rule_models:UNORuleModelV1')
20 | 
21 | register(
22 |     model_id = 'limit-holdem-rule-v1',
23 |     entry_point='rlcard.models.limitholdem_rule_models:LimitholdemRuleModelV1')
24 | 
25 | register(
26 |     model_id = 'doudizhu-rule-v1',
27 |     entry_point='rlcard.models.doudizhu_rule_models:DouDizhuRuleModelV1')
28 | 
29 | register(
30 |     model_id='gin-rummy-novice-rule',
31 |     entry_point='rlcard.models.gin_rummy_rule_models:GinRummyNoviceRuleModel')
32 | 


--------------------------------------------------------------------------------
/rlcard/models/bridge_rule_models.py:
--------------------------------------------------------------------------------
 1 | '''
 2 |     File name: models/bridge_rule_models.py
 3 |     Author: William Hale
 4 |     Date created: 11/27/2021
 5 | 
 6 |     Bridge rule models
 7 | '''
 8 | 
 9 | import numpy as np
10 | 
11 | from rlcard.games.bridge.utils.action_event import ActionEvent
12 | 
13 | 
14 | class BridgeDefenderNoviceRuleAgent(object):
15 |     '''
16 |         Agent always passes during bidding
17 |     '''
18 | 
19 |     def __init__(self):
20 |         self.use_raw = False
21 | 
22 |     @staticmethod
23 |     def step(state) -> int:
24 |         ''' Predict the action given the current state.
25 |             Defender Novice strategy:
26 |                 Case during make call:
27 |                     Always choose PassAction.
28 |                 Case during play card:
29 |                     Choose a random action.
30 | 
31 |         Args:
32 |             state (numpy.array): an numpy array that represents the current state
33 | 
34 |         Returns:
35 |             action_id (int): the action_id predicted
36 |         '''
37 |         legal_action_ids = state['raw_legal_actions']
38 |         if ActionEvent.pass_action_id in legal_action_ids:
39 |             selected_action_id = ActionEvent.pass_action_id
40 |         else:
41 |             selected_action_id = np.random.choice(legal_action_ids)
42 |         return selected_action_id
43 | 
44 |     def eval_step(self, state):
45 |         ''' Predict the action given the current state for evaluation.
46 |             Since the agents is not trained, this function is equivalent to step function.
47 | 
48 |         Args:
49 |             state (numpy.array): an numpy array that represents the current state
50 | 
51 |         Returns:
52 |             action_id (int): the action_id predicted by the agent
53 |             probabilities (list): The list of action probabilities
54 |         '''
55 |         probabilities = []
56 |         return self.step(state), probabilities
57 | 


--------------------------------------------------------------------------------
/rlcard/models/model.py:
--------------------------------------------------------------------------------
 1 | 
 2 | class Model(object):
 3 |     ''' The base model class
 4 |     '''
 5 | 
 6 |     def __init__(self):
 7 |         ''' Load the model here
 8 |         '''
 9 |         pass
10 | 
11 |     @property
12 |     def agents(self):
13 |         ''' Get a list of agents for each position in a the game
14 | 
15 |         Returns:
16 |             agents (list): A list of agents
17 | 
18 |         Note: Each agent should be just like RL agent with step and eval_step
19 |               functioning well.
20 |         '''
21 |         raise NotImplementedError
22 | 


--------------------------------------------------------------------------------
/rlcard/models/pretrained/leduc_holdem_cfr/average_policy.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/datamllab/rlcard/d7d0a957baf4cc7225a50522adb0164bf130a9d0/rlcard/models/pretrained/leduc_holdem_cfr/average_policy.pkl


--------------------------------------------------------------------------------
/rlcard/models/pretrained/leduc_holdem_cfr/iteration.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/datamllab/rlcard/d7d0a957baf4cc7225a50522adb0164bf130a9d0/rlcard/models/pretrained/leduc_holdem_cfr/iteration.pkl


--------------------------------------------------------------------------------
/rlcard/models/pretrained/leduc_holdem_cfr/policy.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/datamllab/rlcard/d7d0a957baf4cc7225a50522adb0164bf130a9d0/rlcard/models/pretrained/leduc_holdem_cfr/policy.pkl


--------------------------------------------------------------------------------
/rlcard/models/pretrained/leduc_holdem_cfr/regrets.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/datamllab/rlcard/d7d0a957baf4cc7225a50522adb0164bf130a9d0/rlcard/models/pretrained/leduc_holdem_cfr/regrets.pkl


--------------------------------------------------------------------------------
/rlcard/models/pretrained_models.py:
--------------------------------------------------------------------------------
 1 | ''' Wrrapers of pretrained models.
 2 | '''
 3 | import os
 4 | 
 5 | import rlcard
 6 | from rlcard.agents import CFRAgent
 7 | from rlcard.models.model import Model
 8 | 
 9 | # Root path of pretrianed models
10 | ROOT_PATH = os.path.join(rlcard.__path__[0], 'models/pretrained')
11 | 
12 | class LeducHoldemCFRModel(Model):
13 |     ''' A pretrained model on Leduc Holdem with CFR (chance sampling)
14 |     '''
15 |     def __init__(self):
16 |         ''' Load pretrained model
17 |         '''
18 |         env = rlcard.make('leduc-holdem')
19 |         self.agent = CFRAgent(env, model_path=os.path.join(ROOT_PATH, 'leduc_holdem_cfr'))
20 |         self.agent.load()
21 |     @property
22 |     def agents(self):
23 |         ''' Get a list of agents for each position in a the game
24 | 
25 |         Returns:
26 |             agents (list): A list of agents
27 | 
28 |         Note: Each agent should be just like RL agent with step and eval_step
29 |               functioning well.
30 |         '''
31 |         return [self.agent, self.agent]
32 | 
33 | 


--------------------------------------------------------------------------------
/rlcard/models/registration.py:
--------------------------------------------------------------------------------
 1 | import importlib
 2 | 
 3 | class ModelSpec(object):
 4 |     ''' A specification for a particular Model.
 5 |     '''
 6 |     def __init__(self, model_id, entry_point=None):
 7 |         ''' Initilize
 8 | 
 9 |         Args:
10 |             model_id (string): the name of the model
11 |             entry_point (string): a string that indicates the location of the model class
12 |         '''
13 |         self.model_id = model_id
14 |         mod_name, class_name = entry_point.split(':')
15 |         self._entry_point = getattr(importlib.import_module(mod_name), class_name)
16 | 
17 |     def load(self):
18 |         ''' Instantiates an instance of the model
19 | 
20 |         Returns:
21 |             Model (Model): an instance of the Model
22 |         '''
23 |         model = self._entry_point()
24 |         return model
25 | 
26 | 
27 | class ModelRegistry(object):
28 |     ''' Register a model by ID
29 |     '''
30 | 
31 |     def __init__(self):
32 |         ''' Initilize
33 |         '''
34 |         self.model_specs = {}
35 | 
36 |     def register(self, model_id, entry_point):
37 |         ''' Register an model
38 | 
39 |         Args:
40 |             model_id (string): the name of the model
41 |             entry_point (string): a string the indicates the location of the model class
42 |         '''
43 |         if model_id in self.model_specs:
44 |             raise ValueError('Cannot re-register model_id: {}'.format(model_id))
45 |         self.model_specs[model_id] = ModelSpec(model_id, entry_point)
46 | 
47 |     def load(self, model_id):
48 |         ''' Create a model instance
49 | 
50 |         Args:
51 |             model_id (string): the name of the model
52 |         '''
53 |         if model_id not in self.model_specs:
54 |             raise ValueError('Cannot find model_id: {}'.format(model_id))
55 |         return self.model_specs[model_id].load()
56 | 
57 | # Have a global registry
58 | model_registry = ModelRegistry()
59 | 
60 | 
61 | def register(model_id, entry_point):
62 |     ''' Register a model
63 | 
64 |     Args:
65 |         model_id (string): the name of the model
66 |         entry_point (string): a string the indicates the location of the model class
67 |     '''
68 |     return model_registry.register(model_id, entry_point)
69 | 
70 | def load(model_id):
71 |     ''' Create and model instance
72 | 
73 |     Args:
74 |         model_id (string): the name of the model
75 |     '''
76 |     return model_registry.load(model_id)
77 | 


--------------------------------------------------------------------------------
/rlcard/utils/__init__.py:
--------------------------------------------------------------------------------
1 | from rlcard.utils.logger import Logger
2 | from rlcard.utils import seeding
3 | from rlcard.utils.utils import *
4 | from rlcard.utils.pettingzoo_utils import *
5 | 


--------------------------------------------------------------------------------
/rlcard/utils/logger.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import csv
 3 | 
 4 | class Logger(object):
 5 |     ''' Logger saves the running results and helps make plots from the results
 6 |     '''
 7 | 
 8 |     def __init__(self, log_dir):
 9 |         ''' Initialize the labels, legend and paths of the plot and log file.
10 | 
11 |         Args:
12 |             log_path (str): The path the log files
13 |         '''
14 |         self.log_dir = log_dir
15 | 
16 |     def __enter__(self):
17 |         self.txt_path = os.path.join(self.log_dir, 'log.txt')
18 |         self.csv_path = os.path.join(self.log_dir, 'performance.csv')
19 |         self.fig_path = os.path.join(self.log_dir, 'fig.png')
20 | 
21 |         if not os.path.exists(self.log_dir):
22 |             os.makedirs(self.log_dir)
23 | 
24 |         self.txt_file = open(self.txt_path, 'w')
25 |         self.csv_file = open(self.csv_path, 'w')
26 |         fieldnames = ['episode', 'reward']
27 |         self.writer = csv.DictWriter(self.csv_file, fieldnames=fieldnames)
28 |         self.writer.writeheader()
29 | 
30 |         return self
31 | 
32 |     def log(self, text):
33 |         ''' Write the text to log file then print it.
34 |         Args:
35 |             text(string): text to log
36 |         '''
37 |         self.txt_file.write(text+'\n')
38 |         self.txt_file.flush()
39 |         print(text)
40 | 
41 |     def log_performance(self, episode, reward):
42 |         ''' Log a point in the curve
43 |         Args:
44 |             episode (int): the episode of the current point
45 |             reward (float): the reward of the current point
46 |         '''
47 |         self.writer.writerow({'episode': episode, 'reward': reward})
48 |         print('')
49 |         self.log('----------------------------------------')
50 |         self.log('  episode      |  ' + str(episode))
51 |         self.log('  reward       |  ' + str(reward))
52 |         self.log('----------------------------------------')
53 | 
54 |     def __exit__(self, type, value, traceback):
55 |         if self.txt_path is not None:
56 |             self.txt_file.close()
57 |         if self.csv_path is not None:
58 |             self.csv_file.close()
59 |         print('\nLogs saved in', self.log_dir)
60 | 


--------------------------------------------------------------------------------
/rlcard/utils/pettingzoo_utils.py:
--------------------------------------------------------------------------------
 1 | from collections import defaultdict
 2 | import numpy as np
 3 | 
 4 | 
 5 | def wrap_state(state):
 6 |     # check if obs is already wrapped
 7 |     if "obs" in state and "legal_actions" in state and "raw_legal_actions" in state:
 8 |         return state
 9 | 
10 |     wrapped_state = {}
11 |     wrapped_state["obs"] = state["observation"]
12 |     legal_actions = np.flatnonzero(state["action_mask"])
13 |     # the values of legal_actions isn't available so setting them to None
14 |     wrapped_state["legal_actions"] = {l: None for l in legal_actions}
15 |     # raw_legal_actions isn't available so setting it to legal actions
16 |     wrapped_state["raw_legal_actions"] = list(wrapped_state["legal_actions"].keys())
17 |     return wrapped_state
18 | 
19 | 
20 | def run_game_pettingzoo(env, agents, is_training=False):
21 |     env.reset()
22 |     trajectories = defaultdict(list)
23 |     for agent_name in env.agent_iter():
24 |         obs, reward, done, _, _ = env.last()
25 |         trajectories[agent_name].append((obs, reward, done))
26 | 
27 |         if done:
28 |             action = None
29 |         else:
30 |             if is_training:
31 |                 action = agents[agent_name].step(obs)
32 |             else:
33 |                 action, _ = agents[agent_name].eval_step(obs)
34 |         trajectories[agent_name].append(action)
35 | 
36 |         env.step(action)
37 |     return trajectories
38 | 
39 | 
40 | def reorganize_pettingzoo(trajectories):
41 |     ''' Reorganize the trajectory to make it RL friendly
42 | 
43 |     Args:
44 |         trajectory (list): A list of trajectories
45 | 
46 |     Returns:
47 |         (list): A new trajectories that can be fed into RL algorithms.
48 | 
49 |     '''
50 |     new_trajectories = defaultdict(list)
51 |     for agent_name, trajectory in trajectories.items():
52 |         for i in range(0, len(trajectory)-2, 2):
53 |             transition = [
54 |                 trajectory[i][0], # obs,
55 |                 trajectory[i+1], # action
56 |                 trajectory[i+2][1], # reward
57 |                 trajectory[i+2][0], # next_obs
58 |                 trajectory[i+2][2], # done
59 |             ]
60 |             new_trajectories[agent_name].append(transition)
61 |     return new_trajectories
62 | 
63 | 
64 | def tournament_pettingzoo(env, agents, num_episodes):
65 |     total_rewards = defaultdict(float)
66 |     for _ in range(num_episodes):
67 |         trajectories = run_game_pettingzoo(env, agents)
68 |         trajectories = reorganize_pettingzoo(trajectories)
69 |         for agent_name, trajectory in trajectories.items():
70 |             reward = sum([t[2] for t in trajectory])
71 |             total_rewards[agent_name] += reward
72 |     return {k: v / num_episodes for (k, v) in total_rewards.items()}
73 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | import setuptools
 2 | 
 3 | with open("README.md", "r", encoding="utf8") as fh:
 4 |     long_description = fh.read()
 5 | 
 6 | extras = {
 7 |     'torch': ['torch', 'GitPython', 'gitdb2', 'matplotlib'],
 8 | }
 9 | 
10 | def _get_version():
11 |     with open('rlcard/__init__.py') as f:
12 |         for line in f:
13 |             if line.startswith('__version__'):
14 |                 g = {}
15 |                 exec(line, g)
16 |                 return g['__version__']
17 |         raise ValueError('`__version__` not defined')
18 | 
19 | VERSION = _get_version()
20 | 
21 | setuptools.setup(
22 |     name="rlcard",
23 |     version=VERSION,
24 |     author="Data Analytics at Texas A&M (DATA) Lab",
25 |     author_email="daochen.zha@tamu.edu",
26 |     description="A Toolkit for Reinforcement Learning in Card Games",
27 |     long_description=long_description,
28 |     long_description_content_type="text/markdown",
29 |     url="https://github.com/datamllab/rlcard",
30 |     keywords=["Reinforcement Learning", "game", "RL", "AI"],
31 |     packages=setuptools.find_packages(exclude=('tests',)),
32 |     package_data={
33 |         'rlcard': ['models/pretrained/leduc_holdem_cfr/*',
34 |                    'games/uno/jsondata/action_space.json',
35 |                    'games/limitholdem/card2index.json',
36 |                    'games/leducholdem/card2index.json',
37 |                    'games/doudizhu/jsondata.zip',
38 |                    'games/uno/jsondata/*',
39 |                    ]},
40 |     install_requires=[
41 |         'numpy>=1.16.3',
42 |         'termcolor'
43 |     ],
44 |     extras_require=extras,
45 |     requires_python='>=3.7',
46 |     classifiers=[
47 |         "Programming Language :: Python :: 3.11",
48 |         "Programming Language :: Python :: 3.10",
49 |         "Programming Language :: Python :: 3.9",
50 |         "Programming Language :: Python :: 3.8",
51 |         "Programming Language :: Python :: 3.7",
52 |         "License :: OSI Approved :: MIT License",
53 |         "Operating System :: OS Independent",
54 |     ],
55 | )
56 | 


--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/datamllab/rlcard/d7d0a957baf4cc7225a50522adb0164bf130a9d0/tests/__init__.py


--------------------------------------------------------------------------------
/tests/agents/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/datamllab/rlcard/d7d0a957baf4cc7225a50522adb0164bf130a9d0/tests/agents/__init__.py


--------------------------------------------------------------------------------
/tests/agents/test_cfr.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | import numpy as np
 3 | 
 4 | import rlcard
 5 | from rlcard.agents.cfr_agent import CFRAgent
 6 | 
 7 | class TestNFSP(unittest.TestCase):
 8 | 
 9 |     def test_train(self):
10 | 
11 |         env = rlcard.make('leduc-holdem', config={'allow_step_back':True})
12 |         agent = CFRAgent(env, model_path='experiments/cfr_model')
13 | 
14 |         for _ in range(100):
15 |             agent.train()
16 | 
17 |         state = {'obs': np.array([1., 1., 0., 0., 0., 0.]), 'legal_actions': {0: None,2: None}, 'raw_legal_actions': ['call', 'fold']}
18 |         action, _ = agent.eval_step(state)
19 | 
20 |         self.assertIn(action, [0, 2])
21 | 
22 |     def test_save_and_load(self):
23 |         env = rlcard.make('leduc-holdem', config={'allow_step_back':True})
24 |         agent = CFRAgent(env, model_path='experiments/cfr_model')
25 | 
26 |         for _ in range(100):
27 |             agent.train()
28 | 
29 |         agent.save()
30 | 
31 |         new_agent = CFRAgent(env, model_path='experiments/cfr_model')
32 |         new_agent.load()
33 |         self.assertEqual(len(agent.policy), len(new_agent.policy))
34 |         self.assertEqual(len(agent.average_policy), len(new_agent.average_policy))
35 |         self.assertEqual(len(agent.regrets), len(new_agent.regrets))
36 |         self.assertEqual(agent.iteration, new_agent.iteration)
37 | 
38 | 


--------------------------------------------------------------------------------
/tests/agents/test_dqn.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | import torch
 3 | import numpy as np
 4 | 
 5 | from rlcard.agents.dqn_agent import DQNAgent
 6 | 
 7 | class TestDQN(unittest.TestCase):
 8 | 
 9 |     def test_init(self):
10 | 
11 |         agent = DQNAgent(replay_memory_size=0,
12 |                          replay_memory_init_size=0,
13 |                          update_target_estimator_every=0,
14 |                          discount_factor=0,
15 |                          epsilon_start=0,
16 |                          epsilon_end=0,
17 |                          epsilon_decay_steps=0,
18 |                          batch_size=0,
19 |                          num_actions=2,
20 |                          state_shape=[1],
21 |                          mlp_layers=[10,10],
22 |                          device=torch.device('cpu'))
23 | 
24 |         self.assertEqual(agent.replay_memory_init_size, 0)
25 |         self.assertEqual(agent.update_target_estimator_every, 0)
26 |         self.assertEqual(agent.discount_factor, 0)
27 |         self.assertEqual(agent.epsilon_decay_steps, 0)
28 |         self.assertEqual(agent.batch_size, 0)
29 |         self.assertEqual(agent.num_actions, 2)
30 | 
31 |     def test_train(self):
32 | 
33 |         memory_init_size = 100
34 |         num_steps = 500
35 | 
36 |         agent = DQNAgent(replay_memory_size = 200,
37 |                          replay_memory_init_size=memory_init_size,
38 |                          update_target_estimator_every=100,
39 |                          state_shape=[2],
40 |                          mlp_layers=[10,10],
41 |                          device=torch.device('cpu'))
42 | 
43 |         predicted_action, _ = agent.eval_step({'obs': np.random.random_sample((2,)), 'legal_actions': {0: None, 1: None}, 'raw_legal_actions': ['call', 'raise']})
44 |         self.assertGreaterEqual(predicted_action, 0)
45 |         self.assertLessEqual(predicted_action, 1)
46 | 
47 |         for _ in range(num_steps):
48 |             ts = [{'obs': np.random.random_sample((2,)), 'legal_actions': {0: None, 1: None}}, np.random.randint(2), 0, {'obs': np.random.random_sample((2,)), 'legal_actions': {0: None, 1: None}, 'raw_legal_actions': ['call', 'raise']}, True]
49 |             agent.feed(ts)
50 | 
51 |         predicted_action = agent.step({'obs': np.random.random_sample((2,)), 'legal_actions': {0: None, 1: None}})
52 |         self.assertGreaterEqual(predicted_action, 0)
53 |         self.assertLessEqual(predicted_action, 1)
54 | 


--------------------------------------------------------------------------------
/tests/agents/test_leduc_human.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | 
 3 | from rlcard.agents.human_agents.leduc_holdem_human_agent import _print_state
 4 | 
 5 | class TestLeducHuman(unittest.TestCase):
 6 | 
 7 |     def test_print_state(self):
 8 |         raw_state = {'my_chips': 1, 'current_player': 0, 'all_chips': [1, 1], 'public_card': None, 'hand': 'SQ', 'legal_actions': ['raise', 'fold', 'check']}
 9 |         action_record = []
10 |         _print_state(raw_state, action_record)
11 | 
12 | 


--------------------------------------------------------------------------------
/tests/agents/test_nfsp.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | import torch
 3 | import numpy as np
 4 | 
 5 | from rlcard.agents.nfsp_agent import NFSPAgent
 6 | 
 7 | class TestNFSP(unittest.TestCase):
 8 | 
 9 |     def test_init(self):
10 | 
11 |         agent = NFSPAgent(num_actions=10,
12 |                           state_shape=[10],
13 |                           hidden_layers_sizes=[10,10],
14 |                           q_mlp_layers=[10,10],
15 |                           device=torch.device('cpu'))
16 | 
17 |         self.assertEqual(agent._num_actions, 10)
18 | 
19 |     def test_train(self):
20 | 
21 |         memory_init_size = 20
22 |         num_steps = 1000
23 | 
24 |         agent = NFSPAgent(num_actions=2,
25 |                           state_shape=[2],
26 |                           hidden_layers_sizes=[10,10],
27 |                           reservoir_buffer_capacity=50,
28 |                           batch_size=4,
29 |                           min_buffer_size_to_learn=memory_init_size,
30 |                           q_replay_memory_size=50,
31 |                           q_replay_memory_init_size=memory_init_size,
32 |                           q_batch_size=4,
33 |                           q_mlp_layers=[10,10],
34 |                           device=torch.device('cpu'))
35 | 
36 |         predicted_action, _ = agent.eval_step({'obs': np.random.random_sample((2,)), 'legal_actions': {0: None, 1: None}, 'raw_legal_actions': ['call', 'raise']})
37 |         self.assertGreaterEqual(predicted_action, 0)
38 |         self.assertLessEqual(predicted_action, 1)
39 | 
40 |         for _ in range(num_steps):
41 |             agent.sample_episode_policy()
42 |             predicted_action = agent.step({'obs': np.random.random_sample((2,)), 'legal_actions': {0: None, 1: None}})
43 |             self.assertGreaterEqual(predicted_action, 0)
44 |             self.assertLessEqual(predicted_action, 1)
45 | 
46 |             ts = [{'obs': np.random.random_sample((2,)), 'legal_actions': {0: None, 1: None}}, np.random.randint(2), 0, {'obs': np.random.random_sample((2,)), 'legal_actions': {0: None, 1: None}, 'raw_legal_actions': ['call', 'raise']}, True]
47 |             agent.feed(ts)
48 | 


--------------------------------------------------------------------------------
/tests/agents/test_uno_human.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | 
 3 | from rlcard.agents.human_agents.uno_human_agent import _print_state, _print_action
 4 | 
 5 | class TestLeducHuman(unittest.TestCase):
 6 | 
 7 |     def test_print_state(self):
 8 |         raw_state = {'target': 'r-reverse', 'current_player': 0, 'legal_actions': ['r-2', 'r-draw_2'], 'hand': ['y-skip', 'y-draw_2', 'r-2', 'b-3', 'b-6', 'g-wild_draw_4', 'r-draw_2'], 'played_cards': ['g-reverse', 'r-reverse'], 'num_players': 2, 'others_hand': ['y-4', 'g-6', 'b-reverse', 'b-5', 'b-reverse', 'r-9'], 'num_cards': [7, 6]}
 9 |         action_record = []
10 |         _print_state(raw_state, action_record)
11 | 
12 |     def test_print_action(self):
13 |         _print_action('r-8')
14 | 


--------------------------------------------------------------------------------
/tests/envs/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/datamllab/rlcard/d7d0a957baf4cc7225a50522adb0164bf130a9d0/tests/envs/__init__.py


--------------------------------------------------------------------------------
/tests/envs/determism_util.py:
--------------------------------------------------------------------------------
 1 | import rlcard
 2 | from rlcard.agents.random_agent import RandomAgent
 3 | import random
 4 | import numpy as np
 5 | 
 6 | def hash_obsevation(obs):
 7 |     try:
 8 |         val = hash(obs.tobytes())
 9 |         return val
10 |     except AttributeError:
11 |         try:
12 |             return hash(obs)
13 |         except TypeError:
14 |             warnings.warn("Observation not an int or an Numpy array")
15 |             return 0
16 | 
17 | def rand_iter(n):
18 |     for x in range(n+1):
19 |         random.randint(0, 1000)
20 |         np.random.normal(size=100)
21 | 
22 | def gather_observations(env, actions, num_rand_steps):
23 |     rand_iter(num_rand_steps)
24 |     state, player_id = env.reset()
25 |     rand_iter(num_rand_steps)
26 | 
27 |     action_idx = 0
28 |     observations = []
29 |     while not env.is_over() and action_idx < len(actions):
30 |         # Agent plays
31 |         rand_iter(num_rand_steps)
32 |         legals = list(state['legal_actions'].keys())
33 |         action = legals[actions[action_idx]%len(legals)]
34 |         # Environment steps
35 |         next_state, next_player_id = env.step(action)
36 |         # Set the state and player
37 |         state = next_state
38 |         player_id = next_player_id
39 | 
40 |         action_idx += 1
41 |         # Save state.
42 |         if not env.game.is_over():
43 |             observations.append(state)
44 | 
45 |     return observations
46 | 
47 | def is_deterministic(env_name):
48 |     env = rlcard.make(env_name)
49 | 
50 |     NUM_STEPS = 25
51 | 
52 |     actions = [random.randrange(env.game.get_num_actions()) for _ in range(NUM_STEPS)]
53 |     base_seed = 12941
54 |     hashes = []
55 |     for rand_iters in range(2):
56 |         env = rlcard.make(env_name,config={'seed':base_seed})
57 | 
58 |         hashes.append(hash(tuple([hash_obsevation(obs['obs']) for obs in gather_observations(env,actions,rand_iters)])))
59 | 
60 |     return hashes[0] == hashes[1]
61 | 


--------------------------------------------------------------------------------
/tests/envs/test_blackjack_env.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | import numpy as np
 3 | 
 4 | import rlcard
 5 | from rlcard.agents.random_agent import RandomAgent
 6 | from .determism_util import is_deterministic
 7 | 
 8 | class TestBlackjackEnv(unittest.TestCase):
 9 | 
10 |     def test_init_and_extract_state(self):
11 |         env = rlcard.make('blackjack')
12 |         state, _ = env.reset()
13 |         for score in state['obs']:
14 |             self.assertLessEqual(score, 30)
15 | 
16 |     def test_is_deterministic(self):
17 |         self.assertTrue(is_deterministic('blackjack'))
18 | 
19 |     def test_decode_action(self):
20 |         env = rlcard.make('blackjack')
21 |         self.assertEqual(env._decode_action(0), 'hit')
22 |         self.assertEqual(env._decode_action(1), 'stand')
23 | 
24 |     def test_get_legal_actions(self):
25 |         env = rlcard.make('blackjack')
26 |         actions = env._get_legal_actions()
27 |         self.assertEqual(len(actions), 2)
28 |         self.assertEqual(actions[0], 0)
29 |         self.assertEqual(actions[1], 1)
30 | 
31 |     def test_get_payoffs(self):
32 |         env = rlcard.make('blackjack')
33 |         for _ in range(100):
34 |             env.reset()
35 |             while not env.is_over():
36 |                 action = np.random.choice([0, 1])
37 |                 env.step(action)
38 |             payoffs = env.get_payoffs()
39 |             for payoff in payoffs:
40 |                 self.assertIn(payoff, [-1, 1, 0])
41 | 
42 |     def test_step_back(self):
43 |         env = rlcard.make('blackjack', config={'allow_step_back':True})
44 |         _, player_id = env.reset()
45 |         env.step(1)
46 |         _, back_player_id = env.step_back()
47 |         self.assertEqual(player_id, back_player_id)
48 |         self.assertEqual(env.step_back(), False)
49 | 
50 |         env = rlcard.make('blackjack')
51 |         with self.assertRaises(Exception):
52 |             env.step_back()
53 | 
54 |     def test_multiplayers(self):
55 |         env = rlcard.make('blackjack', config={'game_num_players':5})
56 |         num_players = env.game.get_num_players()
57 |         self.assertEqual(num_players, 5)
58 | 
59 |     def test_run(self):
60 |         env = rlcard.make('blackjack')
61 |         env.set_agents([RandomAgent(env.num_actions)])
62 |         trajectories, _ = env.run(is_training=False)
63 |         self.assertEqual(len(trajectories), 1)
64 |         trajectories, _ = env.run(is_training=True)
65 |         self.assertEqual(len(trajectories), 1)
66 | 
67 | if __name__ == '__main__':
68 |     unittest.main()
69 | 


--------------------------------------------------------------------------------
/tests/envs/test_doudizhu_env.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | 
 3 | import rlcard
 4 | from rlcard.agents.random_agent import RandomAgent
 5 | from .determism_util import is_deterministic
 6 | 
 7 | 
 8 | class TestDoudizhuEnv(unittest.TestCase):
 9 | 
10 |     def test_reset_and_extract_state(self):
11 |         env = rlcard.make('doudizhu')
12 |         state, _ = env.reset()
13 |         self.assertEqual(state['obs'].size, 790)
14 | 
15 |     def test_is_deterministic(self):
16 |         self.assertTrue(is_deterministic('doudizhu'))
17 | 
18 |     def test_get_legal_actions(self):
19 |         env = rlcard.make('doudizhu')
20 |         env.set_agents([RandomAgent(env.num_actions) for _ in range(env.num_actions)])
21 |         env.reset()
22 |         legal_actions = env._get_legal_actions()
23 |         for legal_action in legal_actions:
24 |             self.assertLessEqual(legal_action, env.num_actions-1)
25 | 
26 |     def test_step(self):
27 |         env = rlcard.make('doudizhu')
28 |         _, player_id = env.reset()
29 |         player = env.game.players[player_id]
30 |         _, next_player_id = env.step(env.num_actions-2)
31 |         self.assertEqual(next_player_id, (player.player_id+1)%len(env.game.players))
32 | 
33 |     def test_step_back(self):
34 |         env = rlcard.make('doudizhu', config={'allow_step_back':True})
35 |         _, player_id = env.reset()
36 |         env.step(2)
37 |         _, back_player_id = env.step_back()
38 |         self.assertEqual(player_id, back_player_id)
39 |         self.assertEqual(env.step_back(), False)
40 | 
41 |         env = rlcard.make('doudizhu')
42 |         with self.assertRaises(Exception):
43 |             env.step_back()
44 | 
45 |     def test_run(self):
46 |         env = rlcard.make('doudizhu')
47 |         env.set_agents([RandomAgent(env.num_actions) for _ in range(env.num_players)])
48 |         trajectories, payoffs = env.run(is_training=False)
49 |         self.assertEqual(len(trajectories), 3)
50 |         win = []
51 |         for player_id, payoff in enumerate(payoffs):
52 |             if payoff == 1:
53 |                 win.append(player_id)
54 |         if len(win) == 1:
55 |             self.assertEqual(env.game.players[win[0]].role, 'landlord')
56 |         if len(win) == 2:
57 |             self.assertEqual(env.game.players[win[0]].role, 'peasant')
58 |             self.assertEqual(env.game.players[win[1]].role, 'peasant')
59 | 
60 |     def test_decode_action(self):
61 |         env = rlcard.make('doudizhu')
62 |         env.reset()
63 |         env.game.state['actions'] = ['33366', '33355']
64 |         env.game.judger.playable_cards[0] = ['5', '6', '55', '555', '33366', '33355']
65 |         decoded = env._decode_action(3)
66 |         self.assertEqual(decoded, '6')
67 |         env.game.state['actions'] = ['444', '44466', '44455']
68 |         decoded = env._decode_action(29)
69 |         self.assertEqual(decoded, '444')
70 | 
71 |     def test_get_perfect_information(self):
72 |         env = rlcard.make('doudizhu')
73 |         _, player_id = env.reset()
74 |         self.assertEqual(player_id, env.get_perfect_information()['current_player'])
75 | if __name__ == '__main__':
76 |     unittest.main()
77 | 


--------------------------------------------------------------------------------
/tests/envs/test_gin_rummy_env.py:
--------------------------------------------------------------------------------
 1 | '''
 2 |     File name: tests/envs/test_gin_rummy_env.py
 3 |     Author: William Hale
 4 |     Date created: 4/20/2020
 5 | '''
 6 | 
 7 | import unittest
 8 | import numpy as np
 9 | 
10 | import rlcard
11 | from rlcard.agents.random_agent import RandomAgent
12 | from .determism_util import is_deterministic
13 | 
14 | 
15 | class TestGinRummyEnv(unittest.TestCase):
16 | 
17 |     def test_reset_and_extract_state(self):
18 |         env = rlcard.make('gin-rummy')
19 |         state, _ = env.reset()
20 |         self.assertEqual(state['obs'].size, 5 * 52)
21 | 
22 |     def test_is_deterministic(self):
23 |         self.assertTrue(is_deterministic('gin-rummy'))
24 | 
25 |     def test_get_legal_actions(self):
26 |         env = rlcard.make('gin-rummy')
27 |         env.set_agents([RandomAgent(env.num_actions) for _ in range(env.num_players)])
28 |         env.reset()
29 |         legal_actions = env._get_legal_actions()
30 |         for legal_action in legal_actions:
31 |             self.assertLessEqual(legal_action, env.num_actions-1)
32 | 
33 |     def test_step(self):
34 |         env = rlcard.make('gin-rummy')
35 |         state, _ = env.reset()
36 |         action = np.random.choice(list(state['legal_actions'].keys()))
37 |         _, player_id = env.step(action)
38 |         current_player_id = env.game.round.get_current_player().player_id
39 |         self.assertEqual(player_id, current_player_id)
40 | 
41 |     def test_run(self):
42 |         env = rlcard.make('gin-rummy')
43 |         env.set_agents([RandomAgent(env.num_actions) for _ in range(env.num_players)])
44 |         trajectories, payoffs = env.run(is_training=False)
45 |         self.assertEqual(len(trajectories), 2)
46 |         for payoff in payoffs:
47 |             self.assertLessEqual(-1, payoff)
48 |             self.assertLessEqual(payoff, 1)
49 |         trajectories, payoffs = env.run(is_training=True)
50 |         for payoff in payoffs:
51 |             self.assertLessEqual(-1, payoff)
52 |             self.assertLessEqual(payoff, 1)
53 | 
54 | 
55 | if __name__ == '__main__':
56 |     unittest.main()
57 | 


--------------------------------------------------------------------------------
/tests/envs/test_leducholdem_env.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | import numpy as np
 3 | 
 4 | import rlcard
 5 | from rlcard.agents.random_agent import RandomAgent
 6 | from .determism_util import is_deterministic
 7 | 
 8 | 
 9 | class TestLeducholdemEnv(unittest.TestCase):
10 | 
11 |     def test_reset_and_extract_state(self):
12 |         env = rlcard.make('leduc-holdem')
13 |         state, _ = env.reset()
14 |         self.assertEqual(state['obs'].size, 36)
15 |         for action in state['legal_actions']:
16 |             self.assertLess(action, env.num_actions)
17 | 
18 |     def test_is_deterministic(self):
19 |         self.assertTrue(is_deterministic('leduc-holdem'))
20 | 
21 |     def test_get_legal_actions(self):
22 |         env = rlcard.make('leduc-holdem')
23 |         env.reset()
24 |         legal_actions = env._get_legal_actions()
25 |         for action in legal_actions:
26 |             self.assertIn(action, env.actions)
27 | 
28 |     def test_decode_action(self):
29 |         env = rlcard.make('leduc-holdem')
30 |         state, _ = env.reset()
31 |         for action in state['legal_actions']:
32 |             decoded = env._decode_action(action)
33 |             self.assertIn(decoded, env.actions)
34 | 
35 |     def test_step(self):
36 |         env = rlcard.make('leduc-holdem')
37 |         state, player_id = env.reset()
38 |         self.assertEqual(player_id, env.get_player_id())
39 |         action = list(state['legal_actions'].keys())[0]
40 |         _, player_id = env.step(action)
41 |         self.assertEqual(player_id, env.get_player_id())
42 | 
43 |     def test_step_back(self):
44 |         env = rlcard.make('leduc-holdem', config={'allow_step_back':True})
45 |         _, player_id = env.reset()
46 |         env.step(0)
47 |         _, back_player_id = env.step_back()
48 |         self.assertEqual(player_id, back_player_id)
49 |         self.assertEqual(env.step_back(), False)
50 | 
51 |         env = rlcard.make('leduc-holdem')
52 |         with self.assertRaises(Exception):
53 |             env.step_back()
54 | 
55 |     def test_run(self):
56 |         env = rlcard.make('leduc-holdem')
57 |         agents = [RandomAgent(env.num_actions) for _ in range(env.num_players)]
58 |         env.set_agents(agents)
59 |         trajectories, payoffs = env.run(is_training=False)
60 |         self.assertEqual(len(trajectories), 2)
61 |         total = 0
62 |         for payoff in payoffs:
63 |             total += payoff
64 |         self.assertEqual(total, 0)
65 | 
66 |     def test_get_perfect_information(self):
67 |         env = rlcard.make('leduc-holdem')
68 |         _, player_id = env.reset()
69 |         self.assertEqual(player_id, env.get_perfect_information()['current_player'])
70 | 
71 | 
72 | if __name__ == '__main__':
73 |     unittest.main()
74 | 


--------------------------------------------------------------------------------
/tests/envs/test_limitholdem_env.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | 
 3 | import rlcard
 4 | from rlcard.agents.random_agent import RandomAgent
 5 | from .determism_util import is_deterministic
 6 | 
 7 | 
 8 | class TestLimitholdemEnv(unittest.TestCase):
 9 | 
10 |     def test_reset_and_extract_state(self):
11 |         env = rlcard.make('limit-holdem')
12 |         state, _ = env.reset()
13 |         self.assertEqual(state['obs'].size, 72)
14 |         for action in state['legal_actions']:
15 |             self.assertLess(action, env.num_actions)
16 | 
17 |     def test_is_deterministic(self):
18 |         self.assertTrue(is_deterministic('limit-holdem'))
19 | 
20 |     def test_get_legal_actions(self):
21 |         env = rlcard.make('limit-holdem')
22 |         env.reset()
23 |         legal_actions = env._get_legal_actions()
24 |         for action in legal_actions:
25 |             self.assertIn(action, env.actions)
26 | 
27 |     def test_decode_action(self):
28 |         env = rlcard.make('limit-holdem')
29 |         state, _ = env.reset()
30 |         for action in state['legal_actions']:
31 |             decoded = env._decode_action(action)
32 |             self.assertIn(decoded, env.actions)
33 | 
34 |         decoded = env._decode_action(3)
35 |         self.assertEqual(decoded, 'fold')
36 | 
37 |         env.step(0)
38 |         decoded = env._decode_action(0)
39 |         self.assertEqual(decoded, 'check')
40 | 
41 |     def test_step(self):
42 |         env = rlcard.make('limit-holdem')
43 |         state, player_id = env.reset()
44 |         self.assertEqual(player_id, env.get_player_id())
45 |         action = list(state['legal_actions'].keys())[0]
46 |         _, player_id = env.step(action)
47 |         self.assertEqual(player_id, env.get_player_id())
48 | 
49 |     def test_step_back(self):
50 |         env = rlcard.make('limit-holdem', config={'allow_step_back':True})
51 |         _, player_id = env.reset()
52 |         env.step(0)
53 |         _, back_player_id = env.step_back()
54 |         self.assertEqual(player_id, back_player_id)
55 |         self.assertEqual(env.step_back(), False)
56 | 
57 |         env = rlcard.make('limit-holdem')
58 |         with self.assertRaises(Exception):
59 |             env.step_back()
60 | 
61 |     def test_run(self):
62 |         env = rlcard.make('limit-holdem')
63 |         agents = [RandomAgent(env.num_actions) for _ in range(env.num_players)]
64 |         env.set_agents(agents)
65 |         trajectories, payoffs = env.run(is_training=False)
66 |         self.assertEqual(len(trajectories), 2)
67 |         total = 0
68 |         for payoff in payoffs:
69 |             total += payoff
70 |         self.assertEqual(total, 0)
71 | 
72 |     def test_get_perfect_information(self):
73 |         env = rlcard.make('limit-holdem')
74 |         _, player_id = env.reset()
75 |         self.assertEqual(player_id, env.get_perfect_information()['current_player'])
76 | 
77 |     def test_multiplayers(self):
78 |         env = rlcard.make('limit-holdem', config={'game_num_players':5})
79 |         num_players = env.game.get_num_players()
80 |         self.assertEqual(num_players, 5)
81 | 
82 | if __name__ == '__main__':
83 |     unittest.main()
84 | 


--------------------------------------------------------------------------------
/tests/envs/test_mahjong.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | import numpy as np
 3 | 
 4 | import rlcard
 5 | from rlcard.agents.random_agent import RandomAgent
 6 | from .determism_util import is_deterministic
 7 | 
 8 | class TestMahjongEnv(unittest.TestCase):
 9 | 
10 |     def test_reset_and_extract_state(self):
11 |         env = rlcard.make('mahjong')
12 |         state, _ = env.reset()
13 |         self.assertEqual(state['obs'].size, 816)
14 | 
15 |     def test_is_deterministic(self):
16 |         self.assertTrue(is_deterministic('mahjong'))
17 | 
18 |     def test_get_legal_actions(self):
19 |         env = rlcard.make('mahjong')
20 |         env.set_agents([RandomAgent(env.num_actions) for _ in range(env.num_players)])
21 |         env.reset()
22 |         legal_actions = env._get_legal_actions()
23 |         for legal_action in legal_actions:
24 |             self.assertLessEqual(legal_action, env.num_actions-1)
25 | 
26 |     def test_step(self):
27 |         env = rlcard.make('mahjong')
28 |         state, _ = env.reset()
29 |         action = np.random.choice(list(state['legal_actions'].keys()))
30 |         _, player_id = env.step(action)
31 |         self.assertEqual(player_id, env.game.round.current_player)
32 | 
33 |     def test_step_back(self):
34 |         env = rlcard.make('mahjong', config={'allow_step_back':True})
35 |         state, player_id = env.reset()
36 |         action = np.random.choice(list(state['legal_actions'].keys()))
37 |         env.step(action)
38 |         env.step_back()
39 |         self.assertEqual(env.game.round.current_player, player_id)
40 | 
41 |         env = rlcard.make('mahjong', config={'allow_step_back':False})
42 |         state, player_id = env.reset()
43 |         action = np.random.choice(list(state['legal_actions'].keys()))
44 |         env.step(action)
45 |         # env.step_back()
46 |         self.assertRaises(Exception, env.step_back)
47 | 
48 |     def test_run(self):
49 |         env = rlcard.make('mahjong')
50 |         env.set_agents([RandomAgent(env.num_actions) for _ in range(env.num_players)])
51 |         trajectories, payoffs = env.run(is_training=False)
52 |         trajectories, payoffs = env.run(is_training=True)
53 | 
54 | if __name__ == '__main__':
55 |     unittest.main()
56 | 


--------------------------------------------------------------------------------
/tests/envs/test_nolimitholdem_env.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | 
 3 | import rlcard
 4 | from rlcard.agents.random_agent import RandomAgent
 5 | from rlcard.games.nolimitholdem.round import Action
 6 | from .determism_util import is_deterministic
 7 | 
 8 | 
 9 | class TestNolimitholdemEnv(unittest.TestCase):
10 | 
11 |     def test_reset_and_extract_state(self):
12 |         env = rlcard.make('no-limit-holdem')
13 |         state, _ = env.reset()
14 |         self.assertEqual(state['obs'].size, 54)
15 | 
16 |     def test_is_deterministic(self):
17 |         self.assertTrue(is_deterministic('no-limit-holdem'))
18 | 
19 |     def test_get_legal_actions(self):
20 |         env = rlcard.make('no-limit-holdem')
21 |         env.reset()
22 |         legal_actions = env._get_legal_actions()
23 |         for action in legal_actions:
24 |             self.assertIn(action, env.actions)
25 | 
26 |     def test_decode_action(self):
27 |         env = rlcard.make('no-limit-holdem')
28 |         state, _ = env.reset()
29 |         for action in state['legal_actions']:
30 |             decoded = env._decode_action(action)
31 |             self.assertIn(decoded, env.actions)
32 | 
33 |         decoded = env._decode_action(Action.FOLD.value)
34 |         self.assertEqual(decoded, Action.FOLD)
35 | 
36 |         env.step(0)
37 |         decoded = env._decode_action(1)
38 |         self.assertEqual(decoded, Action.CHECK_CALL)
39 | 
40 |     def test_step(self):
41 |         env = rlcard.make('no-limit-holdem')
42 |         state, player_id = env.reset()
43 |         self.assertEqual(player_id, env.get_player_id())
44 |         action = list(state['legal_actions'].keys())[0]
45 |         _, player_id = env.step(action)
46 |         self.assertEqual(player_id, env.get_player_id())
47 | 
48 |     def test_step_back(self):
49 |         env = rlcard.make('no-limit-holdem', config={'allow_step_back':True})
50 |         _, player_id = env.reset()
51 |         env.step(0)
52 |         _, back_player_id = env.step_back()
53 |         self.assertEqual(player_id, back_player_id)
54 |         self.assertEqual(env.step_back(), False)
55 | 
56 |         env = rlcard.make('no-limit-holdem')
57 |         with self.assertRaises(Exception):
58 |             env.step_back()
59 | 
60 |     def test_run(self):
61 |         env = rlcard.make('no-limit-holdem')
62 |         agents = [RandomAgent(env.num_actions) for _ in range(env.num_players)]
63 |         env.set_agents(agents)
64 |         trajectories, payoffs = env.run(is_training=False)
65 |         self.assertEqual(len(trajectories), 2)
66 |         total = 0
67 |         for payoff in payoffs:
68 |             total += payoff
69 |         self.assertEqual(total, 0)
70 | 
71 |     def test_get_perfect_information(self):
72 |         env = rlcard.make('no-limit-holdem')
73 |         _, player_id = env.reset()
74 |         self.assertEqual(player_id, env.get_perfect_information()['current_player'])
75 | 
76 |     def test_multiplayers(self):
77 |         env = rlcard.make('no-limit-holdem', config={'game_num_players':5})
78 |         num_players = env.game.get_num_players()
79 |         self.assertEqual(num_players, 5)
80 | 
81 |     def test_config_chips(self):
82 |         env = rlcard.make('no-limit-holdem', config={'game_num_players':5, 'chips_for_each':100})
83 |         env.game.init_game()
84 |         players = env.game.players
85 |         chips = []
86 |         for i in range(5):
87 |             chips.append(players[i].remained_chips + players[i].in_chips)
88 |         self.assertEqual(chips, [100, 100, 100, 100, 100])
89 | 
90 | if __name__ == '__main__':
91 |     unittest.main()
92 | 


--------------------------------------------------------------------------------
/tests/envs/test_registration.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | 
 3 | import rlcard
 4 | from rlcard.envs.registration import register, make
 5 | from .determism_util import is_deterministic
 6 | 
 7 | 
 8 | class TestRegistration(unittest.TestCase):
 9 | 
10 |     def test_register(self):
11 |         register(env_id='test_reg', entry_point='rlcard.envs.blackjack:BlackjackEnv')
12 |         with self.assertRaises(ValueError):
13 |             register(env_id='test_reg', entry_point='rlcard.envs.blackjack:BlackjackEnv')
14 | 
15 |     def test_make(self):
16 |         register(env_id='test_make', entry_point='rlcard.envs.blackjack:BlackjackEnv')
17 |         env = rlcard.make('test_make')
18 |         _, player = env.reset()
19 |         self.assertEqual(player, 0)
20 |         with self.assertRaises(ValueError):
21 |             make('test_random_make')
22 | 
23 |     def test_make_modes(self):
24 |         register(env_id='test_env', entry_point='rlcard.envs.blackjack:BlackjackEnv')
25 | 
26 | if __name__ == '__main__':
27 |     unittest.main()
28 | 


--------------------------------------------------------------------------------
/tests/envs/test_uno_env.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | import numpy as np
 3 | 
 4 | import rlcard
 5 | from rlcard.agents.random_agent import RandomAgent
 6 | from rlcard.games.uno.utils import ACTION_LIST
 7 | from .determism_util import is_deterministic
 8 | 
 9 | 
10 | class TestUnoEnv(unittest.TestCase):
11 | 
12 |     def test_reset_and_extract_state(self):
13 |         env = rlcard.make('uno')
14 |         state, _ = env.reset()
15 |         self.assertEqual(state['obs'].size, 240)
16 | 
17 |     def test_is_deterministic(self):
18 |         self.assertTrue(is_deterministic('uno'))
19 | 
20 |     def test_get_legal_actions(self):
21 |         env = rlcard.make('uno')
22 |         env.set_agents([RandomAgent(env.num_actions) for _ in range(env.num_players)])
23 |         env.reset()
24 |         legal_actions = env._get_legal_actions()
25 |         for legal_action in legal_actions:
26 |             self.assertLessEqual(legal_action, 60)
27 | 
28 |     def test_step(self):
29 |         env = rlcard.make('uno')
30 |         state, _ = env.reset()
31 |         action = np.random.choice(list(state['legal_actions'].keys()))
32 |         _, player_id = env.step(action)
33 |         self.assertEqual(player_id, env.game.round.current_player)
34 | 
35 |     def test_step_back(self):
36 |         env = rlcard.make('uno', config={'allow_step_back':True})
37 |         state, player_id = env.reset()
38 |         action = np.random.choice(list(state['legal_actions'].keys()))
39 |         env.step(action)
40 |         env.step_back()
41 |         self.assertEqual(env.game.round.current_player, player_id)
42 | 
43 |         env = rlcard.make('uno', config={'allow_step_back':False})
44 |         state, player_id = env.reset()
45 |         action = np.random.choice(list(state['legal_actions'].keys()))
46 |         env.step(action)
47 |         # env.step_back()
48 |         self.assertRaises(Exception, env.step_back)
49 | 
50 |     def test_run(self):
51 |         env = rlcard.make('uno')
52 |         env.set_agents([RandomAgent(env.num_actions) for _ in range(env.num_players)])
53 |         trajectories, payoffs = env.run(is_training=False)
54 |         self.assertEqual(len(trajectories), 2)
55 |         total = 0
56 |         for payoff in payoffs:
57 |             total += payoff
58 |         self.assertEqual(total, 0)
59 |         trajectories, payoffs = env.run(is_training=True)
60 |         total = 0
61 |         for payoff in payoffs:
62 |             total += payoff
63 |         self.assertEqual(total, 0)
64 | 
65 |     def test_decode_action(self):
66 |         env = rlcard.make('uno')
67 |         env.reset()
68 |         legal_actions = env._get_legal_actions()
69 |         for legal_action in legal_actions:
70 |             decoded = env._decode_action(legal_action)
71 |             self.assertLessEqual(decoded, ACTION_LIST[legal_action])
72 | 
73 |     def test_get_perfect_information(self):
74 |         env = rlcard.make('uno')
75 |         _, player_id = env.reset()
76 |         self.assertEqual(player_id, env.get_perfect_information()['current_player'])
77 | if __name__ == '__main__':
78 |     unittest.main()
79 | 


--------------------------------------------------------------------------------
/tests/games/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/datamllab/rlcard/d7d0a957baf4cc7225a50522adb0164bf130a9d0/tests/games/__init__.py


--------------------------------------------------------------------------------
/tests/games/test_blackjack_game.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | import numpy as np
 3 | 
 4 | from rlcard.games.blackjack.game import BlackjackGame as Game
 5 | from rlcard.envs.blackjack import DEFAULT_GAME_CONFIG
 6 | 
 7 | class TestBlackjackGame(unittest.TestCase):
 8 | 
 9 |     def test_get_num_players(self):
10 |         game = Game()
11 |         game.configure(DEFAULT_GAME_CONFIG)
12 |         num_players = game.get_num_players()
13 |         self.assertEqual(num_players, 1)
14 | 
15 |     def test_get_num_actions(self):
16 |         game = Game()
17 |         game.configure(DEFAULT_GAME_CONFIG)
18 |         num_actions = game.get_num_actions()
19 |         self.assertEqual(num_actions, 2)
20 | 
21 |     def test_init_game(self):
22 |         game = Game()
23 |         game.configure(DEFAULT_GAME_CONFIG)
24 |         state, current_player = game.init_game()
25 |         self.assertEqual(len(game.history), 0)
26 |         self.assertEqual(current_player, 0)
27 |         self.assertEqual(game.winner['dealer'], 0)
28 |         self.assertEqual(len(state['state'][0]), len(state['state'][1])+1)
29 | 
30 |     def test_step(self):
31 |         game = Game()
32 |         game.configure(DEFAULT_GAME_CONFIG)
33 |         game.init_game()
34 |         next_state, next_player = game.step('hit')
35 |         self.assertEqual(next_player, 0)
36 |         if game.players[0].status != 'bust':
37 |             self.assertEqual(len(game.dealer.hand), len(next_state['state'][1])+1)
38 |         else:
39 |             self.assertEqual(len(game.dealer.hand), len(next_state['state'][1]))
40 |         next_state, _ = game.step('stand')
41 |         self.assertEqual(len(next_state['state'][0]), len(game.players[0].hand))
42 | 
43 |     def test_proceed_game(self):
44 |         game = Game()
45 |         game.configure(DEFAULT_GAME_CONFIG)
46 |         game.init_game()
47 |         while not game.is_over():
48 |             action = np.random.choice(['hit', 'action'])
49 |             state, _ = game.step(action)
50 |         self.assertEqual(len(state['state'][1]), len(game.dealer.hand))
51 | 
52 |     def test_step_back(self):
53 |         game = Game(allow_step_back=True)
54 |         game.configure(DEFAULT_GAME_CONFIG)
55 |         state, _ = game.init_game()
56 |         init_hand = state['state'][0]
57 |         game.step('hit')
58 |         game.step_back()
59 |         test_hand = game.get_state(0)['state'][0]
60 |         self.assertEqual(init_hand, test_hand)
61 |         self.assertEqual(len(game.history), 0)
62 |         success = game.step_back()
63 |         self.assertEqual(success, False)
64 | 
65 |     def test_get_state(self):
66 |         game = Game()
67 |         game.configure(DEFAULT_GAME_CONFIG)
68 |         game.init_game()
69 |         self.assertEqual(len(game.get_state(0)['state'][1]), 1)
70 |         game.step('stand')
71 |         self.assertGreater(len(game.get_state(0)['state'][1]), 1)
72 | 
73 | if __name__ == '__main__':
74 |     unittest.main()
75 | 


--------------------------------------------------------------------------------
/tests/games/test_mahjong_game.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | import numpy as np
 3 | 
 4 | from rlcard.games.mahjong.game import MahjongGame as Game
 5 | from rlcard.games.mahjong.player import MahjongPlayer as Player
 6 | 
 7 | class TestMahjongMethods(unittest.TestCase):
 8 | 
 9 |     def test_get_num_players(self):
10 |         game = Game()
11 |         num_players = game.get_num_players()
12 |         self.assertEqual(num_players, 4)
13 | 
14 |     def test_get_num_actions(self):
15 |         game = Game()
16 |         num_actions = game.get_num_actions()
17 |         self.assertEqual(num_actions, 38)
18 | 
19 |     def test_init_game(self):
20 |         game = Game()
21 |         state, _ = game.init_game()
22 |         total_cards = list(state['current_hand'])
23 |         self.assertGreaterEqual(len(total_cards), 14)
24 | 
25 |     def test_get_player_id(self):
26 |         game = Game()
27 |         _, player_id = game.init_game()
28 |         current = game.get_player_id()
29 |         self.assertEqual(player_id, current)
30 | 
31 | 
32 |     def test_get_legal_actions(self):
33 |         game = Game()
34 |         state, _ = game.init_game()
35 |         actions = game.get_legal_actions(state)
36 |         for action in actions:
37 |             self.assertIn(action, state['current_hand'])
38 | 
39 |     def test_step(self):
40 |         game = Game()
41 |         state, _ = game.init_game()
42 |         action = np.random.choice(game.get_legal_actions(state))
43 |         state, next_player_id = game.step(action)
44 |         current = game.round.current_player
45 |         self.assertLessEqual(len(state['current_hand']), 14)
46 |         self.assertEqual(next_player_id, current)
47 | 
48 |     def test_get_payoffs(self):
49 |         game = Game()
50 |         state, _ = game.init_game()
51 |         while not game.is_over():
52 |             actions = game.get_legal_actions(state)
53 |             action = np.random.choice(actions)
54 |             state, _ = game.step(action)
55 |             total_cards = len(state['current_hand'])
56 |             self.assertLessEqual(total_cards, 14)
57 |         win = game.is_over()
58 |         self.assertEqual(win, True)
59 | 
60 |     def test_step_back(self):
61 |         game = Game(allow_step_back=True)
62 |         state, player_id = game.init_game()
63 |         action = np.random.choice(game.get_legal_actions(state))
64 |         game.step(action)
65 |         game.step_back()
66 |         self.assertEqual(game.round.current_player, player_id)
67 |         self.assertEqual(len(game.history), 0)
68 |         success = game.step_back()
69 |         self.assertEqual(success, False)
70 | 
71 |     def test_player_get_player_id(self):
72 |         player = Player(0, np.random.RandomState())
73 |         self.assertEqual(0, player.get_player_id())
74 | 
75 | if __name__ == '__main__':
76 |     unittest.main()
77 | 


--------------------------------------------------------------------------------
/tests/games/test_nolimitholdem_judger.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | import numpy as np
 3 | from rlcard.games.nolimitholdem.player import NolimitholdemPlayer as Player
 4 | from rlcard.games.base import Card
 5 | from rlcard.games.limitholdem.judger import LimitHoldemJudger as Judger
 6 | from rlcard.games.limitholdem.utils import Hand 
 7 | 
 8 | 
 9 | rand_state = np.random.RandomState()
10 | 
11 | class TestNolimitholdemGame(unittest.TestCase):
12 | 
13 |     def get_players(self, num_players=2):
14 |         players = []
15 |         
16 |         for i in range(num_players):
17 |             players.append(Player(i, 100 + 100*i, rand_state))
18 |             players[i].bet(players[i].remained_chips) # All in
19 |             
20 |         return players
21 |     
22 |     def get_hands(self, player_hands, public_card):
23 |         hands = []
24 |         for hand in player_hands:
25 |             hands.append(hand + public_card)
26 |         return hands        
27 |     
28 |     def test_judge_with_4_players(self):
29 | 
30 |         '''
31 |         suit_list = ['S', 'H', 'D', 'C']
32 |         rank_list = ['A', '2', '3', '4', '5', '6', '7', '8', '9', 'T', 'J', 'Q', 'K']
33 |         '''
34 |         players = self.get_players(4)
35 |         
36 |         
37 |         public_card = [Card('S', 'A'), Card('S', 'K'), Card('S', 'Q'), Card('S', '2'), Card('S', '3')]
38 |         hands = [[Card('S', 'J'), Card('S', 'T')],
39 |                  [Card('S', '4'), Card('S', '5')], 
40 |                  [Card('S', '9'), Card('C', 'T')], 
41 |                  [Card('H', 'T'), Card('C', 'J')]]
42 |         
43 |         payoffs = Judger(rand_state).judge_game(players, self.get_hands(hands, public_card))
44 |         self.assertEqual(payoffs, [300, 100, -100, -300])
45 |         
46 |         public_card = [Card('H', 'A'), Card('H', 'K'), Card('S', 'Q'), Card('S', 'T'), Card('S', '9')]
47 |         
48 |         hands = [[Card('S', 'A'), Card('H', '4')], 
49 |                  [Card('D', 'A'), Card('H', '5')], 
50 |                  [Card('D', 'K'), Card('H', '6')], 
51 |                  [Card('S', 'K'), Card('H', '7')]]
52 |         
53 |         payoffs = Judger(rand_state).judge_game(players, self.get_hands(hands, public_card))
54 |         self.assertEqual(payoffs, [100, 300, -200, -200])
55 |         
56 |     def test_judge_with_6_players(self):
57 |         rand_state = np.random.RandomState()
58 |         
59 |         public_card = [Card('S', 'A'), Card('S', 'K'), Card('D', 'Q'), Card('D', 'T'), Card('C', '9')]
60 |         players = self.get_players(6)
61 |         
62 |         hands = [[Card('C', 'A'), Card('H', '2')], 
63 |                  [Card('D', 'A'), Card('H', '3')], 
64 |                  [Card('C', 'K'), Card('C', '2')], 
65 |                  [Card('D', 'K'), Card('C', '3')],
66 |                  [Card('C', 'Q'), Card('S', '2')], 
67 |                  [Card('D', 'Q'), Card('S', '3')]]
68 | 
69 |         payoffs = Judger(rand_state).judge_game(players, self.get_hands(hands, public_card))
70 |         self.assertEqual(payoffs, [200, 600, -100, 100, -400, -400])
71 | 
72 | 
73 | if __name__ == '__main__':
74 |     unittest.main()
75 | 


--------------------------------------------------------------------------------
/tests/models/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/datamllab/rlcard/d7d0a957baf4cc7225a50522adb0164bf130a9d0/tests/models/__init__.py


--------------------------------------------------------------------------------
/tests/models/test_model_registeration.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | 
 3 | from  rlcard import models
 4 | from rlcard.models.registration import register, load
 5 | 
 6 | 
 7 | class TestRegistration(unittest.TestCase):
 8 | 
 9 |     def test_register(self):
10 |         register(model_id='test_reg', entry_point='rlcard.models.pretrained_models:LeducHoldemCFRModel')
11 |         with self.assertRaises(ValueError):
12 |             register(model_id='test_reg', entry_point='rlcard.models.pretrained_models:LeducHoldemCFRModel')
13 | 
14 |     def test_load(self):
15 |         register(model_id='test_load', entry_point='rlcard.models.pretrained_models:LeducHoldemCFRModel')
16 |         models.load('test_load')
17 |         with self.assertRaises(ValueError):
18 |             load('test_random_make')
19 | 
20 | if __name__ == '__main__':
21 |     unittest.main()
22 | 


--------------------------------------------------------------------------------
/tests/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/datamllab/rlcard/d7d0a957baf4cc7225a50522adb0164bf130a9d0/tests/utils/__init__.py


--------------------------------------------------------------------------------
/tests/utils/test_logger.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | import os
 3 | import shutil
 4 | 
 5 | from rlcard.utils.logger import Logger
 6 | 
 7 | class TestLogger(unittest.TestCase):
 8 | 
 9 |     def test_log(self):
10 |         log_dir = "experiments/newtest/test_log.txt"
11 |         if os.path.exists(log_dir):
12 |             shutil.rmtree(log_dir)
13 |         with Logger(log_dir) as logger:
14 |             logger.log("test text")
15 |             logger.log_performance(1, 1)
16 |             logger.log_performance(2, 2)
17 |             logger.log_performance(3, 3)
18 | 
19 | if __name__ == '__main__':
20 |     unittest.main()
21 | 


--------------------------------------------------------------------------------
/tests/utils/test_utils.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | import numpy as np
 3 | from rlcard.utils.utils import init_54_deck, init_standard_deck, rank2int, print_card, elegent_form, reorganize, tournament
 4 | import rlcard
 5 | from rlcard.agents.random_agent import RandomAgent
 6 | 
 7 | class TestUtils(unittest.TestCase):
 8 | 
 9 |     def test_init_standard_deck(self):
10 |         self.assertEqual(len(init_standard_deck()), 52)
11 | 
12 |     def test_init_54_deck(self):
13 |         self.assertEqual(len(init_54_deck()), 54)
14 | 
15 |     def test_rank2int(self):
16 |         self.assertEqual(rank2int('A'), 14)
17 |         self.assertEqual(rank2int(''), -1)
18 |         self.assertEqual(rank2int('3'), 3)
19 |         self.assertEqual(rank2int('T'), 10)
20 |         self.assertEqual(rank2int('J'), 11)
21 |         self.assertEqual(rank2int('Q'), 12)
22 |         self.assertEqual(rank2int('1000'), None)
23 |         self.assertEqual(rank2int('abc123'), None)
24 |         self.assertEqual(rank2int('K'), 13)
25 | 
26 |     def test_print_cards(self):
27 |         self.assertEqual(len(elegent_form('S9')), 2)
28 |         self.assertEqual(len(elegent_form('ST')), 3)
29 | 
30 |         print_card(None)
31 |         print_card('S9')
32 |         print_card('ST')
33 | 
34 |     def test_reorganize(self):
35 |         trajectories = reorganize([[[1,2],1,[4,5]]], [1])
36 |         self.assertEqual(len(trajectories), 1)
37 |         self.assertEqual(len(trajectories[0]), 1)
38 |         self.assertEqual(len(trajectories[0][0]), 5)
39 | 
40 |     def test_tournament(self):
41 |         env = rlcard.make('leduc-holdem')
42 |         env.set_agents([RandomAgent(env.num_actions), RandomAgent(env.num_actions)])
43 |         payoffs = tournament(env,1000)
44 |         self.assertEqual(len(payoffs), 2)
45 | 
46 | if __name__ == '__main__':
47 |     unittest.main()
48 | 


--------------------------------------------------------------------------------