├── .coveralls.yml ├── .github └── workflows │ └── python-package.yml ├── .gitignore ├── CONTRIBUTING.md ├── LICENSE.md ├── README.md ├── README.zh-CN.md ├── docs ├── README.md ├── adding-models.md ├── adding-new-environments.md ├── algorithms.md ├── customizing-environments.md ├── developping-algorithms.md ├── games.md ├── high-level-design.md └── toy-examples.md ├── examples ├── evaluate.py ├── human │ ├── blackjack_human.py │ ├── gin_rummy_human.py │ ├── leduc_holdem_human.py │ ├── limit_holdem_human.py │ ├── nolimit_holdem_human.py │ └── uno_human.py ├── pettingzoo │ ├── README.md │ ├── run_dmc.py │ └── run_rl.py ├── run_cfr.py ├── run_dmc.py ├── run_random.py ├── run_rl.py └── scripts │ ├── dmc_doudizhu_1_gpu.sh │ └── dmc_doudizhu_4_gpu.sh ├── rlcard ├── __init__.py ├── agents │ ├── __init__.py │ ├── cfr_agent.py │ ├── dmc_agent │ │ ├── __init__.py │ │ ├── file_writer.py │ │ ├── model.py │ │ ├── pettingzoo_model.py │ │ ├── pettingzoo_utils.py │ │ ├── trainer.py │ │ └── utils.py │ ├── dqn_agent.py │ ├── human_agents │ │ ├── __init__.py │ │ ├── blackjack_human_agent.py │ │ ├── gin_rummy_human_agent │ │ │ ├── __init__.py │ │ │ ├── gin_rummy_human_agent.py │ │ │ ├── gui_cards │ │ │ │ ├── __init__.py │ │ │ │ └── card_image.py │ │ │ └── gui_gin_rummy │ │ │ │ ├── Gin-Rummy-GUI-Design.md │ │ │ │ ├── __init__.py │ │ │ │ ├── canvas_item.py │ │ │ │ ├── configurations.py │ │ │ │ ├── env_thread.py │ │ │ │ ├── game_app.py │ │ │ │ ├── game_canvas.py │ │ │ │ ├── game_canvas_debug.py │ │ │ │ ├── game_canvas_getter.py │ │ │ │ ├── game_canvas_post_doing_action.py │ │ │ │ ├── game_canvas_query.py │ │ │ │ ├── game_canvas_updater.py │ │ │ │ ├── game_frame.py │ │ │ │ ├── game_options.ini │ │ │ │ ├── handling_tap.py │ │ │ │ ├── handling_tap_discard_pile.py │ │ │ │ ├── handling_tap_held_pile.py │ │ │ │ ├── handling_tap_player_pane.py │ │ │ │ ├── handling_tap_stock_pile.py │ │ │ │ ├── handling_tap_to_arrange_held_pile.py │ │ │ │ ├── info_messaging.py │ │ │ │ ├── menu_bar.py │ │ │ │ ├── player_type.py │ │ │ │ ├── preferences_window.py │ │ │ │ ├── starting_new_game.py │ │ │ │ ├── status_messaging.py │ │ │ │ ├── utils.py │ │ │ │ └── utils_extra.py │ │ ├── leduc_holdem_human_agent.py │ │ ├── limit_holdem_human_agent.py │ │ ├── nolimit_holdem_human_agent.py │ │ └── uno_human_agent.py │ ├── nfsp_agent.py │ ├── pettingzoo_agents.py │ └── random_agent.py ├── envs │ ├── __init__.py │ ├── blackjack.py │ ├── bridge.py │ ├── doudizhu.py │ ├── env.py │ ├── gin_rummy.py │ ├── leducholdem.py │ ├── limitholdem.py │ ├── mahjong.py │ ├── nolimitholdem.py │ ├── registration.py │ └── uno.py ├── games │ ├── __init__.py │ ├── base.py │ ├── blackjack │ │ ├── __init__.py │ │ ├── dealer.py │ │ ├── game.py │ │ ├── judger.py │ │ └── player.py │ ├── bridge │ │ ├── __init__.py │ │ ├── dealer.py │ │ ├── game.py │ │ ├── judger.py │ │ ├── player.py │ │ ├── round.py │ │ └── utils │ │ │ ├── __init__.py │ │ │ ├── action_event.py │ │ │ ├── bridge_card.py │ │ │ ├── move.py │ │ │ ├── tray.py │ │ │ └── utils.py │ ├── doudizhu │ │ ├── __init__.py │ │ ├── dealer.py │ │ ├── game.py │ │ ├── jsondata.zip │ │ ├── judger.py │ │ ├── player.py │ │ ├── round.py │ │ └── utils.py │ ├── gin_rummy │ │ ├── __init__.py │ │ ├── dealer.py │ │ ├── game.py │ │ ├── judge.py │ │ ├── player.py │ │ ├── round.py │ │ └── utils │ │ │ ├── __init__.py │ │ │ ├── action_event.py │ │ │ ├── gin_rummy_error.py │ │ │ ├── melding.py │ │ │ ├── move.py │ │ │ ├── scorers.py │ │ │ ├── settings.py │ │ │ ├── thinker.py │ │ │ └── utils.py │ ├── leducholdem │ │ ├── __init__.py │ │ ├── card2index.json │ │ ├── dealer.py │ │ ├── game.py │ │ ├── judger.py │ │ ├── player.py │ │ └── round.py │ ├── limitholdem │ │ ├── __init__.py │ │ ├── card2index.json │ │ ├── dealer.py │ │ ├── game.py │ │ ├── judger.py │ │ ├── player.py │ │ ├── round.py │ │ └── utils.py │ ├── mahjong │ │ ├── __init__.py │ │ ├── card.py │ │ ├── dealer.py │ │ ├── game.py │ │ ├── judger.py │ │ ├── player.py │ │ ├── round.py │ │ └── utils.py │ ├── nolimitholdem │ │ ├── __init__.py │ │ ├── dealer.py │ │ ├── game.py │ │ ├── judger.py │ │ ├── player.py │ │ └── round.py │ └── uno │ │ ├── __init__.py │ │ ├── card.py │ │ ├── dealer.py │ │ ├── game.py │ │ ├── jsondata │ │ └── action_space.json │ │ ├── judger.py │ │ ├── player.py │ │ ├── round.py │ │ └── utils.py ├── models │ ├── __init__.py │ ├── bridge_rule_models.py │ ├── doudizhu_rule_models.py │ ├── gin_rummy_rule_models.py │ ├── leducholdem_rule_models.py │ ├── limitholdem_rule_models.py │ ├── model.py │ ├── pretrained │ │ └── leduc_holdem_cfr │ │ │ ├── average_policy.pkl │ │ │ ├── iteration.pkl │ │ │ ├── policy.pkl │ │ │ └── regrets.pkl │ ├── pretrained_models.py │ ├── registration.py │ └── uno_rule_models.py └── utils │ ├── __init__.py │ ├── logger.py │ ├── pettingzoo_utils.py │ ├── seeding.py │ └── utils.py ├── setup.py └── tests ├── __init__.py ├── agents ├── __init__.py ├── test_cfr.py ├── test_dqn.py ├── test_leduc_human.py ├── test_nfsp.py └── test_uno_human.py ├── envs ├── __init__.py ├── determism_util.py ├── test_blackjack_env.py ├── test_doudizhu_env.py ├── test_gin_rummy_env.py ├── test_leducholdem_env.py ├── test_limitholdem_env.py ├── test_mahjong.py ├── test_nolimitholdem_env.py ├── test_registration.py └── test_uno_env.py ├── games ├── __init__.py ├── test_blackjack_game.py ├── test_bridge_game.py ├── test_doudizhu_game.py ├── test_doudizhu_judger.py ├── test_gin_rummy_game.py ├── test_leducholdem_game.py ├── test_limitholdem_game.py ├── test_mahjong_game.py ├── test_nolimitholdem_game.py ├── test_nolimitholdem_judger.py └── test_uno_game.py ├── models ├── __init__.py ├── test_model_registeration.py └── test_models.py └── utils ├── __init__.py ├── test_holdem_utils.py ├── test_logger.py └── test_utils.py /.coveralls.yml: -------------------------------------------------------------------------------- 1 | repo_token: a9eSNI8pkeeDAKwGtKKBSUPCaFIiQGvYU 2 | service_name: travis-ci -------------------------------------------------------------------------------- /.github/workflows/python-package.yml: -------------------------------------------------------------------------------- 1 | # This workflow will install Python dependencies, run tests and lint with a variety of Python versions 2 | # For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions 3 | 4 | name: Testing 5 | 6 | on: 7 | push: 8 | branches: [ master ] 9 | pull_request: 10 | branches: [ master ] 11 | 12 | jobs: 13 | build: 14 | 15 | runs-on: ubuntu-latest 16 | strategy: 17 | fail-fast: false 18 | matrix: 19 | python-version: ["3.7", "3.8", "3.9", "3.10", "3.11"] 20 | 21 | steps: 22 | - uses: actions/checkout@v2 23 | - name: Set up Python ${{ matrix.python-version }} 24 | uses: actions/setup-python@v2 25 | with: 26 | python-version: ${{ matrix.python-version }} 27 | - name: Install dependencies 28 | run: | 29 | python -m pip install --upgrade pip 30 | pip install python-coveralls 31 | pip install pytest-cover 32 | - name: Install package 33 | run: | 34 | pip install -e .[torch] 35 | - name: Test with pytest 36 | run: | 37 | py.test tests/ --cov=rlcard 38 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | **/.DS_Store 2 | __pycache__/ 3 | *.pyc 4 | /*.egg-info 5 | .idea/ 6 | *.swp 7 | *.wsn 8 | *.swo 9 | .scannerwork/ 10 | .vscode/ 11 | htmlcov/ 12 | sonar-project.properties 13 | .coverage* 14 | docs/rst 15 | docs/sphinx 16 | experiments/ 17 | dist/ 18 | rlcard/games/doudizhu/jsondata/ 19 | rlcard/agents/gin_rummy_human_agent/gui_cards/cards_png 20 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contibuting Guide 2 | Contribution to this project is greatly appreciated! If you find any bugs or have any feedback, please create an issue or send a pull request to fix the bug. If you want to contribute codes for new features, please contact [daochen.zha@tamu.edu](mailto:daochen.zha@tamu.edu) or [khlai@tamu.edu](mailto:khlai@tamu.edu). We currently have several plans. Please create an issue or contact us through emails if you have other suggestions. 3 | 4 | ## Roadmaps 5 | 6 | * **Game Specific Configurations.** Now we plan to gradually support game specific configurations. Currently we only support specifying the number of players in Blackjack 7 | * **Rule-based Agent and Pre-trained Models.** Provide more rule-based agents and pre-trained models to benchmark the evaluation. We currently have several models in `/models`. 8 | * **More Games and Algorithms.** Develop more games and algorithms. 9 | * **Hyperparameter Search** Search hyperparameters for each environment and update the best one in the example. 10 | 11 | ## How to Create a Pull Request 12 | 13 | If this your first time to contribute to a project, kindly follow the following instructions. You may find [Creating a pull request](https://help.github.com/en/github/collaborating-with-issues-and-pull-requests/creating-a-pull-request) helpful. Mainly, you need to take the following steps to send a pull request: 14 | 15 | * Click **Fork** in the upper-right corner of the project main page to create a new branch in your local Github. 16 | * Clone the repo from your local repo in your Github. 17 | * Make changes in your computer. 18 | * Commit and push your local changes to your local repo. 19 | * Send a pull request to merge your local branch to the branches in RLCard project. 20 | 21 | ## Testing Your Code 22 | 23 | We strongly encourage you to write the testing code in parallel with your development. We use `unittest` in RLCard. An example is [Blackjack environment testing](tests/envs/test_blackjack_env.py). 24 | 25 | ## Making Configurable Environments 26 | We take Blackjack as an Example to show how we can define game specific configurations in RLCard. The key points are highlighted as follows: 27 | 28 | * We add a `DEFAULT_GAME_CONFIG` in [Blackjack Env](rlcard/envs/blackjack.py) to define the default values of the game configurations. Each field should start with `game_` 29 | * Modify the game and environment according to the configurations. For example, we need to support multiple players in Blackjack. 30 | * Modify [Env](rlcard/envs/env.py) to add your game to the `supported_envs` 31 | * When making the environment, we pass the newly defined fields in `config`. For example, we pass `config={'game_player_num': 2}` for Blackjack. 32 | 33 | -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | Copyright (c) 2019 DATA Lab at Texas A&M University 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy 4 | of this software and associated documentation files (the "Software"), to deal 5 | in the Software without restriction, including without limitation the rights 6 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 | copies of the Software, and to permit persons to whom the Software is 8 | furnished to do so, subject to the following conditions: 9 | 10 | The above copyright notice and this permission notice shall be included in all 11 | copies or substantial portions of the Software. 12 | 13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 19 | SOFTWARE. -------------------------------------------------------------------------------- /docs/README.md: -------------------------------------------------------------------------------- 1 | # Documents of RLCard 2 | 3 | ## Overview 4 | The toolkit wraps each game by `Env` class with easy-to-use interfaces. The goal of this toolkit is to enable the users to focus on algorithm development without caring about the environment. The following design principles are applied when developing the toolkit: 5 | * **Reproducible.** Results on the environments can be reproduced. The same result should be obtained with the same random seed in different runs. 6 | * **Accessible.** The experiences are collected and well organized after each game with easy-to-use interfaces. Uses can conveniently configure state representation, action encoding, reward design, or even the game rules. 7 | * **Scalable.** New card environments can be added conveniently into the toolkit with the above design principles. We also try to minimize the dependencies in the toolkit so that the codes can be easily maintained. 8 | 9 | ## User Guide 10 | 11 | * [Toy examples](toy-examples.md) 12 | * [RLCard high-level design](high-level-design.md) 13 | * [Games in RLCard](games.md) 14 | * [Algorithms in RLCard](algorithms.md) 15 | 16 | ## Developer Guide 17 | 18 | * [Developping new algorithms](developping-algorithms.md) 19 | * [Adding new environments](adding-new-environments.md) 20 | * [Customizing environments](customizing-environments.md) 21 | * [Adding pre-trained/rule-based models](adding-models.md) 22 | 23 | ## Application Programming Interface (API) 24 | The API documents are and available at [Official Website](http://www.rlcard.org). 25 | -------------------------------------------------------------------------------- /docs/adding-models.md: -------------------------------------------------------------------------------- 1 | # Adding Pre-trained/Rule-based models 2 | You can add your own pre-trained/rule-based models to the toolkit by following several steps: 3 | 4 | * **Develop models.** You can either design a rule-based model or save a neural network model. For each game, you need to develop agents for all the players at the same time. You need to wrap each agent as a `Agent` class and make sure that `step`, `eval_step` and `use_raw` can work correctly. 5 | * **Wrap models.** You need to inherit the `Model` class in `rlcard/models/model.py`. Then put all the agents into a list. Rewrite `agent` property to return this list. 6 | * **Register the model.** Register the model in `rlcard/models/__init__.py`. 7 | * **Load the model in environment.** An example of loading `leduc-holdem-nfsp` model is as follows: 8 | ```python 9 | from rlcard import models 10 | leduc_nfsp_model = models.load('leduc-holdem-nfsp') 11 | ``` 12 | Then use `leduc_nfsp_model.agents` to obtain all the agents for the game. 13 | -------------------------------------------------------------------------------- /docs/adding-new-environments.md: -------------------------------------------------------------------------------- 1 | # Adding New Environments 2 | To add a new environment to the toolkit, generally you should take the following steps: 3 | * **Implement a game.** Card games usually have similar structures so that they can be implemented with `Game`, `Round`, `Dealer`, `Judger`, `Player`, as in existing games. The easiest way is to inherit the classed in [rlcard/games/base.py](../rlcard/games/base.py) and implement the functions. 4 | * **Wrap the game with an environment.** The easiest way is to inherit `Env` in [rlcard/envs/env.py](../rlcard/envs/env.py). You need to implement `_extract_state` which encodes the state, `_decode_action` which decodes actions from the id to the text string, and `get_payoffs` which calculates payoffs of the players. 5 | * **Register the game.** Now it is time to tell the toolkit where to locate the new environment. Go to [rlcard/envs/\_\_init\_\_.py](../rlcard/envs/__init__.py), and indicate the name of the game and its entry point. 6 | 7 | To test whether the new environment is set up successfully: 8 | ```python 9 | import rlcard 10 | rlcard.make(#the new evironment#) 11 | ``` 12 | -------------------------------------------------------------------------------- /docs/algorithms.md: -------------------------------------------------------------------------------- 1 | # Index 2 | 3 | * [DMC](algorithms.md#deep-monte-carlo) 4 | * [Deep-Q Learning](algorithms.md#deep-q-learning) 5 | * [NFSP](algorithms.md#nfsp) 6 | * [CFR (chance sampling)](algorithms.md#cfr) 7 | 8 | ## Deep Monte-Carlo 9 | Deep Monte-Carlo (DMC) is a very effective algorithm for card games. This is the only algorithm that shows human-level performance on complex games such as Dou Dizhu. 10 | 11 | ## Deep-Q Learning 12 | Deep-Q Learning (DQN) [[paper]](https://arxiv.org/abs/1312.5602) is a basic reinforcement learning (RL) algorithm. We wrap DQN as an example to show how RL algorithms can be connected to the environments. In the DQN agent, the following classes are implemented: 13 | 14 | * `DQNAgent`: The agent class that interacts with the environment. 15 | * `Memory`: A memory buffer that manages the storing and sampling of transitions. 16 | * `Estimator`: The neural network that is used to make predictions. 17 | 18 | ## NFSP 19 | Neural Fictitious Self-Play (NFSP) [[paper]](https://arxiv.org/abs/1603.01121) end-to-end approach to solve card games with deep reinforcement learning. NFSP has an inner RL agent and a supervised agent that is trained based on the data generated by the RL agent. In the toolkit, we use DQN as RL agent. 20 | 21 | ## CFR (chance sampling) 22 | Counterfactual Regret Minimization (CFR) [[paper]](http://papers.nips.cc/paper/3306-regret-minimization-in-games-with-incomplete-information.pdf) is a regret minimizaiton method for solving imperfect information games. 23 | -------------------------------------------------------------------------------- /docs/customizing-environments.md: -------------------------------------------------------------------------------- 1 | # Customizing Environments 2 | In addition to the default state representation and action encoding, we also allow customizing an environment. In this document, we use Limit Texas Hold'em as an example to describe how to modify state representation, action encoding, reward calculation, or even the game rules. 3 | 4 | ## State Representation 5 | To define our own state representation, we can modify the ``_extract_state`` function in [/rlcard/envs/limitholdem.py](../rlcard/envs/limitholdem.py#L33). 6 | 7 | ## Action Encoding 8 | To define our own action encoding, we can modify the ``_decode_action`` function in [/rlcard/envs/limitholdem.py](../rlcard/envs/limitholdem.py#L75). 9 | 10 | ## Reward Calculation 11 | To define our own reward calculation, we can modify the ``get_payoffs`` function in [/rlcard/envs/limitholdem.py](../rlcard/envs/limitholdem.py#L67). 12 | 13 | ## Modifying Game 14 | We can change the parameters of a game to adjust its difficulty. For example, we can change the number of players, the number of allowed raises in Limit Texas Hold'em in the ``__init__`` function in [rlcard/games/limitholdem/game.py](../rlcard/games/limitholdem/game.py#L11). 15 | -------------------------------------------------------------------------------- /docs/developping-algorithms.md: -------------------------------------------------------------------------------- 1 | # Developping Algorithms 2 | Although users may do whatever they like to design and try their algorithms. We recommend wrapping a new algorithm as an `Agent` class the [example agent](../rlcard/agents/random_agent.py). To be compatible with the basic interfaces, the agent should have the following functions and attribute: 3 | * `step`: Given the current state, predict the next action. 4 | * `eval_step`: Similar to `step`, but for evaluation purpose. Reinforcement learning algorithms will usually add some noise for better exploration in training. In evaluation, no noise will be added to make predictions. 5 | * `use_raw`: A boolean attribute. `True` if the agent uses raw states to do reasoning; `False` if the agent uses numerical values to play (such as neural networks). 6 | -------------------------------------------------------------------------------- /docs/high-level-design.md: -------------------------------------------------------------------------------- 1 | # RLCard High-level Design 2 | This document introduces the high-level design for the environments, the games, and the agents (algorithms). 3 | 4 | ## Environments 5 | We wrap each game with an `Env` class. The responsibility of `Env` is to help you generate trajectories of the games. For developing Reinforcement Learning (RL) algorithms, we recommend to use the following interfaces: 6 | 7 | * `set_agents`: This function tells the `Env` what agents will be used to perform actions in the game. Different games may have a different number of agents. The input of the function is a list of `Agent` class. For example, `env.set_agent([RandomAgent(num_actions=env.num_actions) for _ in range(2)])` indicates that two random agents will be used to generate the trajectories. 8 | * `run`: After setting the agents, this interface will run a complete trajectory of the game, calculate the reward for each transition, and reorganize the data so that it can be directly fed into a RL algorithm. 9 | 10 | For advanced access to the environment, such as traversal of the game tree, we provide the following interfaces: 11 | 12 | * `step`: Given the current state, the environment takes one step forward, and returns the next state and the next player. 13 | * `step_back`: Takes one step backward. The environment will restore to the last state. The `step_back` is defaultly turned off since it requires expensively recoeding previous states. To turn it on, set `allow_step_back = True` when `make` environments. 14 | * `get_payoffs`: At the end of the game, this function can be called to obtain the payoffs for each player. 15 | 16 | ## Games 17 | Card games usually have similar structures. We abstract some concepts in card games and follow the same design pattern. In this way, users/developers can easily dig into the code and change the rules for research purpose. Specifically, the following classes are used in all the games: 18 | 19 | * `Game`: A game is defined as a complete sequence starting from one of the non-terminal states to a terminal state. 20 | * `Round`: A round is a part of the sequence of a game. Most card games can be naturally divided into multiple rounds. 21 | * `Dealer`: A dealer is responsible for shuffling and allocating a deck of cards. 22 | * `Judger`: A judger is responsible for making major decisions at the end of a round or a game. 23 | * `Player`: A player is a role who plays cards following a strategy. 24 | 25 | To summarize, in one `Game`, a `Dealer` deals the cards for each `Player`. In each `Round` of the game, a `Judger` will make major decisions about the next round and the payoffs in the end of the game. 26 | 27 | ## Agents 28 | We provide examples of several representative algorithms and wrap them as `Agent` to show how a learning algorithm can be connected to the toolkit. The first example is DQN which is a representative of the Reinforcement Learning (RL) algorithms category. The second example is NFSP which is a representative of the Reinforcement Learning (RL) with self-play. We also provide CFR (chance sampling) and DeepCFR which belong to Conterfactual Regret Minimization (CFR) category. Other algorithms from these three categories can be connected in similar ways. 29 | -------------------------------------------------------------------------------- /examples/evaluate.py: -------------------------------------------------------------------------------- 1 | ''' An example of evluating the trained models in RLCard 2 | ''' 3 | import os 4 | import argparse 5 | 6 | import rlcard 7 | from rlcard.agents import ( 8 | DQNAgent, 9 | RandomAgent, 10 | ) 11 | from rlcard.utils import ( 12 | get_device, 13 | set_seed, 14 | tournament, 15 | ) 16 | 17 | def load_model(model_path, env=None, position=None, device=None): 18 | if os.path.isfile(model_path): # Torch model 19 | import torch 20 | agent = torch.load(model_path, map_location=device) 21 | agent.set_device(device) 22 | elif os.path.isdir(model_path): # CFR model 23 | from rlcard.agents import CFRAgent 24 | agent = CFRAgent(env, model_path) 25 | agent.load() 26 | elif model_path == 'random': # Random model 27 | from rlcard.agents import RandomAgent 28 | agent = RandomAgent(num_actions=env.num_actions) 29 | else: # A model in the model zoo 30 | from rlcard import models 31 | agent = models.load(model_path).agents[position] 32 | 33 | return agent 34 | 35 | def evaluate(args): 36 | 37 | # Check whether gpu is available 38 | device = get_device() 39 | 40 | # Seed numpy, torch, random 41 | set_seed(args.seed) 42 | 43 | # Make the environment with seed 44 | env = rlcard.make(args.env, config={'seed': args.seed}) 45 | 46 | # Load models 47 | agents = [] 48 | for position, model_path in enumerate(args.models): 49 | agents.append(load_model(model_path, env, position, device)) 50 | env.set_agents(agents) 51 | 52 | # Evaluate 53 | rewards = tournament(env, args.num_games) 54 | for position, reward in enumerate(rewards): 55 | print(position, args.models[position], reward) 56 | 57 | if __name__ == '__main__': 58 | parser = argparse.ArgumentParser("Evaluation example in RLCard") 59 | parser.add_argument( 60 | '--env', 61 | type=str, 62 | default='leduc-holdem', 63 | choices=[ 64 | 'blackjack', 65 | 'leduc-holdem', 66 | 'limit-holdem', 67 | 'doudizhu', 68 | 'mahjong', 69 | 'no-limit-holdem', 70 | 'uno', 71 | 'gin-rummy', 72 | ], 73 | ) 74 | parser.add_argument( 75 | '--models', 76 | nargs='*', 77 | default=[ 78 | 'experiments/leduc_holdem_dqn_result/model.pth', 79 | 'random', 80 | ], 81 | ) 82 | parser.add_argument( 83 | '--cuda', 84 | type=str, 85 | default='', 86 | ) 87 | parser.add_argument( 88 | '--seed', 89 | type=int, 90 | default=42, 91 | ) 92 | parser.add_argument( 93 | '--num_games', 94 | type=int, 95 | default=10000, 96 | ) 97 | 98 | args = parser.parse_args() 99 | 100 | os.environ["CUDA_VISIBLE_DEVICES"] = args.cuda 101 | evaluate(args) 102 | 103 | -------------------------------------------------------------------------------- /examples/human/blackjack_human.py: -------------------------------------------------------------------------------- 1 | ''' A toy example of self playing for Blackjack 2 | ''' 3 | 4 | import rlcard 5 | from rlcard.agents import RandomAgent as RandomAgent 6 | from rlcard.agents import BlackjackHumanAgent as HumanAgent 7 | from rlcard.utils.utils import print_card 8 | 9 | # Make environment 10 | num_players = 2 11 | env = rlcard.make( 12 | 'blackjack', 13 | config={ 14 | 'game_num_players': num_players, 15 | }, 16 | ) 17 | human_agent = HumanAgent(env.num_actions) 18 | random_agent = RandomAgent(env.num_actions) 19 | env.set_agents([ 20 | human_agent, 21 | random_agent, 22 | ]) 23 | 24 | print(">> Blackjack human agent") 25 | 26 | while (True): 27 | print(">> Start a new game") 28 | 29 | trajectories, payoffs = env.run(is_training=False) 30 | # If the human does not take the final action, we need to 31 | # print other players action 32 | 33 | if len(trajectories[0]) != 0: 34 | final_state = [] 35 | action_record = [] 36 | state = [] 37 | _action_list = [] 38 | 39 | for i in range(num_players): 40 | final_state.append(trajectories[i][-1]) 41 | state.append(final_state[i]['raw_obs']) 42 | 43 | action_record.append(final_state[i]['action_record']) 44 | for i in range(1, len(action_record) + 1): 45 | _action_list.insert(0, action_record[-i]) 46 | 47 | for pair in _action_list[0]: 48 | print('>> Player', pair[0], 'chooses', pair[1]) 49 | 50 | # Let's take a look at what the agent card is 51 | print('=============== Dealer hand ===============') 52 | print_card(state[0]['state'][1]) 53 | 54 | for i in range(num_players): 55 | print('=============== Player {} Hand ==============='.format(i)) 56 | print_card(state[i]['state'][0]) 57 | 58 | print('=============== Result ===============') 59 | for i in range(num_players): 60 | if payoffs[i] == 1: 61 | print('Player {} win {} chip!'.format(i, payoffs[i])) 62 | elif payoffs[i] == 0: 63 | print('Player {} is tie'.format(i)) 64 | else: 65 | print('Player {} lose {} chip!'.format(i, -payoffs[i])) 66 | print('') 67 | 68 | input("Press any key to continue...") 69 | -------------------------------------------------------------------------------- /examples/human/gin_rummy_human.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Project: Gui Gin Rummy 3 | File name: gin_rummy_human.py 4 | Author: William Hale 5 | Date created: 3/14/2020 6 | ''' 7 | 8 | # You need to install tkinter if it is not already installed. 9 | # Tkinter is Python's defacto standard GUI (Graphical User Interface) package. 10 | # It is a thin object-oriented layer on top of Tcl/Tk. 11 | # Note that the name of the module is ‘tkinter’. 12 | # 13 | # If you are using anaconda: 14 | # -- I have version 8.6.11 to work with version 3.6 of Python. 15 | # -- In the installed window for your environment, search for "tk". 16 | # -- If it is found, make sure you have at least version 8.6.11. 17 | # -- Otherwise, go to the "Not installed" window, search for "tk", select it, and apply it. 18 | # 19 | # If you are using Ubuntu: 20 | # -- You can install it with apt-get install python-tk. 21 | # 22 | # For other cases, you can search on google to see how to install tkinter. 23 | 24 | # from __future__ import annotations 25 | from typing import TYPE_CHECKING 26 | if TYPE_CHECKING: 27 | from rlcard.envs.gin_rummy import GinRummyEnv 28 | 29 | import rlcard 30 | 31 | from rlcard.agents import RandomAgent 32 | from rlcard.models.gin_rummy_rule_models import GinRummyNoviceRuleAgent 33 | from rlcard.agents.human_agents.gin_rummy_human_agent.gin_rummy_human_agent import HumanAgent 34 | 35 | from rlcard.agents.human_agents.gin_rummy_human_agent.gui_gin_rummy.game_app import GameApp 36 | 37 | from rlcard.games.gin_rummy.utils import scorers 38 | 39 | 40 | def make_gin_rummy_env() -> 'GinRummyEnv': 41 | gin_rummy_env = rlcard.make('gin-rummy') 42 | # north_agent = RandomAgent(num_actions=gin_rummy_env.num_actions) 43 | north_agent = GinRummyNoviceRuleAgent() 44 | south_agent = HumanAgent(gin_rummy_env.num_actions) 45 | gin_rummy_env.set_agents([ 46 | north_agent, 47 | south_agent 48 | ]) 49 | gin_rummy_env.game.judge.scorer = scorers.GinRummyScorer(get_payoff=scorers.get_payoff_gin_rummy_v0) 50 | return gin_rummy_env 51 | 52 | 53 | # Play game 54 | gin_rummy_app = GameApp(make_gin_rummy_env=make_gin_rummy_env) 55 | -------------------------------------------------------------------------------- /examples/human/leduc_holdem_human.py: -------------------------------------------------------------------------------- 1 | ''' A toy example of playing against pretrianed AI on Leduc Hold'em 2 | ''' 3 | 4 | import rlcard 5 | from rlcard import models 6 | from rlcard.agents import LeducholdemHumanAgent as HumanAgent 7 | from rlcard.utils import print_card 8 | 9 | # Make environment 10 | env = rlcard.make('leduc-holdem') 11 | human_agent = HumanAgent(env.num_actions) 12 | cfr_agent = models.load('leduc-holdem-cfr').agents[0] 13 | env.set_agents([ 14 | human_agent, 15 | cfr_agent, 16 | ]) 17 | 18 | print(">> Leduc Hold'em pre-trained model") 19 | 20 | while (True): 21 | print(">> Start a new game") 22 | 23 | trajectories, payoffs = env.run(is_training=False) 24 | # If the human does not take the final action, we need to 25 | # print other players action 26 | final_state = trajectories[0][-1] 27 | action_record = final_state['action_record'] 28 | state = final_state['raw_obs'] 29 | _action_list = [] 30 | for i in range(1, len(action_record)+1): 31 | if action_record[-i][0] == state['current_player']: 32 | break 33 | _action_list.insert(0, action_record[-i]) 34 | for pair in _action_list: 35 | print('>> Player', pair[0], 'chooses', pair[1]) 36 | 37 | # Let's take a look at what the agent card is 38 | print('=============== CFR Agent ===============') 39 | print_card(env.get_perfect_information()['hand_cards'][1]) 40 | 41 | print('=============== Result ===============') 42 | if payoffs[0] > 0: 43 | print('You win {} chips!'.format(payoffs[0])) 44 | elif payoffs[0] == 0: 45 | print('It is a tie.') 46 | else: 47 | print('You lose {} chips!'.format(-payoffs[0])) 48 | print('') 49 | 50 | input("Press any key to continue...") 51 | -------------------------------------------------------------------------------- /examples/human/limit_holdem_human.py: -------------------------------------------------------------------------------- 1 | ''' A toy example of playing against a random agent on Limit Hold'em 2 | ''' 3 | 4 | import rlcard 5 | from rlcard.agents import LimitholdemHumanAgent as HumanAgent 6 | from rlcard.agents import RandomAgent 7 | from rlcard.utils.utils import print_card 8 | 9 | # Make environment 10 | env = rlcard.make('limit-holdem') 11 | human_agent = HumanAgent(env.num_actions) 12 | agent_0 = RandomAgent(num_actions=env.num_actions) 13 | env.set_agents([ 14 | human_agent, 15 | agent_0, 16 | ]) 17 | 18 | print(">> Limit Hold'em random agent") 19 | 20 | while (True): 21 | print(">> Start a new game") 22 | 23 | trajectories, payoffs = env.run(is_training=False) 24 | # If the human does not take the final action, we need to 25 | # print other players action 26 | if len(trajectories[0]) != 0: 27 | final_state = trajectories[0][-1] 28 | action_record = final_state['action_record'] 29 | state = final_state['raw_obs'] 30 | _action_list = [] 31 | for i in range(1, len(action_record)+1): 32 | """ 33 | if action_record[-i][0] == state['current_player']: 34 | break 35 | """ 36 | _action_list.insert(0, action_record[-i]) 37 | for pair in _action_list: 38 | print('>> Player', pair[0], 'chooses', pair[1]) 39 | 40 | # Let's take a look at what the agent card is 41 | print('============= Random Agent ============') 42 | print_card(env.get_perfect_information()['hand_cards'][1]) 43 | 44 | print('=============== Result ===============') 45 | if payoffs[0] > 0: 46 | print('You win {} chips!'.format(payoffs[0])) 47 | elif payoffs[0] == 0: 48 | print('It is a tie.') 49 | else: 50 | print('You lose {} chips!'.format(-payoffs[0])) 51 | print('') 52 | 53 | input("Press any key to continue...") 54 | -------------------------------------------------------------------------------- /examples/human/nolimit_holdem_human.py: -------------------------------------------------------------------------------- 1 | ''' A toy example of playing against pretrianed AI on Leduc Hold'em 2 | ''' 3 | from rlcard.agents import RandomAgent 4 | 5 | import rlcard 6 | from rlcard import models 7 | from rlcard.agents import NolimitholdemHumanAgent as HumanAgent 8 | from rlcard.utils import print_card 9 | 10 | # Make environment 11 | env = rlcard.make('no-limit-holdem') 12 | 13 | human_agent = HumanAgent(env.num_actions) 14 | human_agent2 = HumanAgent(env.num_actions) 15 | # random_agent = RandomAgent(num_actions=env.num_actions) 16 | 17 | env.set_agents([human_agent, human_agent2]) 18 | 19 | 20 | while (True): 21 | print(">> Start a new game") 22 | 23 | trajectories, payoffs = env.run(is_training=False) 24 | # If the human does not take the final action, we need to 25 | # print other players action 26 | final_state = trajectories[0][-1] 27 | action_record = final_state['action_record'] 28 | state = final_state['raw_obs'] 29 | _action_list = [] 30 | for i in range(1, len(action_record)+1): 31 | if action_record[-i][0] == state['current_player']: 32 | break 33 | _action_list.insert(0, action_record[-i]) 34 | for pair in _action_list: 35 | print('>> Player', pair[0], 'chooses', pair[1]) 36 | 37 | # Let's take a look at what the agent card is 38 | print('=============== Cards all Players ===============') 39 | for hands in env.get_perfect_information()['hand_cards']: 40 | print_card(hands) 41 | 42 | print('=============== Result ===============') 43 | if payoffs[0] > 0: 44 | print('You win {} chips!'.format(payoffs[0])) 45 | elif payoffs[0] == 0: 46 | print('It is a tie.') 47 | else: 48 | print('You lose {} chips!'.format(-payoffs[0])) 49 | print('') 50 | 51 | input("Press any key to continue...") 52 | -------------------------------------------------------------------------------- /examples/human/uno_human.py: -------------------------------------------------------------------------------- 1 | ''' A toy example of playing against rule-based bot on UNO 2 | ''' 3 | 4 | import rlcard 5 | from rlcard import models 6 | from rlcard.agents.human_agents.uno_human_agent import HumanAgent, _print_action 7 | 8 | # Make environment 9 | env = rlcard.make('uno') 10 | human_agent = HumanAgent(env.num_actions) 11 | cfr_agent = models.load('uno-rule-v1').agents[0] 12 | env.set_agents([ 13 | human_agent, 14 | cfr_agent, 15 | ]) 16 | 17 | print(">> UNO rule model V1") 18 | 19 | while (True): 20 | print(">> Start a new game") 21 | 22 | trajectories, payoffs = env.run(is_training=False) 23 | # If the human does not take the final action, we need to 24 | # print other players action 25 | final_state = trajectories[0][-1] 26 | action_record = final_state['action_record'] 27 | state = final_state['raw_obs'] 28 | _action_list = [] 29 | for i in range(1, len(action_record)+1): 30 | if action_record[-i][0] == state['current_player']: 31 | break 32 | _action_list.insert(0, action_record[-i]) 33 | for pair in _action_list: 34 | print('>> Player', pair[0], 'chooses ', end='') 35 | _print_action(pair[1]) 36 | print('') 37 | 38 | print('=============== Result ===============') 39 | if payoffs[0] > 0: 40 | print('You win!') 41 | else: 42 | print('You lose!') 43 | print('') 44 | input("Press any key to continue...") 45 | -------------------------------------------------------------------------------- /examples/pettingzoo/README.md: -------------------------------------------------------------------------------- 1 | # Train agents on PettingZoo Environments 2 | 3 | RLCard environments are also wrapped by [PettingZoo](https://www.pettingzoo.ml/) which 4 | implements the Agent Environment Cycle (AEC) games model. PettingZoo is a library with 5 | diverse sets of multi-agent environments, developed with the goal of accelerating 6 | research in Multi-Agent Reinforcement Learning (MARL). 7 | 8 | ## Setup 9 | 10 | First install PettingZoo with classic games. 11 | 12 | ```bash 13 | pip3 install pettingzoo[classic] 14 | ``` 15 | 16 | PettingZoo has RLCard as a dependency, so if you already have RLCard installed in your 17 | Python environment, it may get replaced by the version required by PettingZoo, so 18 | you may need to re-install it. 19 | 20 | ## Train Agents 21 | 22 | Training scripts for DQN, NFSP, and DMC are provided. The following trains a DQN agent 23 | on the Leduc Holdem environment: 24 | 25 | ```bash 26 | python run_rl.py 27 | ``` 28 | -------------------------------------------------------------------------------- /examples/pettingzoo/run_dmc.py: -------------------------------------------------------------------------------- 1 | ''' An example of training a Deep Monte-Carlo (DMC) Agent on PettingZoo environments 2 | wrapping RLCard 3 | ''' 4 | import os 5 | import argparse 6 | 7 | from pettingzoo.classic import ( 8 | leduc_holdem_v4, 9 | texas_holdem_v4, 10 | dou_dizhu_v4, 11 | mahjong_v4, 12 | texas_holdem_no_limit_v6, 13 | uno_v4, 14 | gin_rummy_v4, 15 | ) 16 | 17 | from rlcard.agents.dmc_agent import DMCTrainer 18 | 19 | 20 | env_name_to_env_func = { 21 | "leduc-holdem": leduc_holdem_v4, 22 | "limit-holdem": texas_holdem_v4, 23 | "doudizhu": dou_dizhu_v4, 24 | "mahjong": mahjong_v4, 25 | "no-limit-holdem": texas_holdem_no_limit_v6, 26 | "uno": uno_v4, 27 | "gin-rummy": gin_rummy_v4, 28 | } 29 | 30 | 31 | def train(args): 32 | # Make the environment 33 | env_func = env_name_to_env_func[args.env] 34 | env = env_func.env() 35 | env.reset() 36 | 37 | # Initialize the DMC trainer 38 | trainer = DMCTrainer( 39 | env, 40 | is_pettingzoo_env=True, 41 | load_model=args.load_model, 42 | xpid=args.xpid, 43 | savedir=args.savedir, 44 | save_interval=args.save_interval, 45 | num_actor_devices=args.num_actor_devices, 46 | num_actors=args.num_actors, 47 | training_device=args.training_device, 48 | total_frames=args.total_frames, 49 | ) 50 | 51 | # Train DMC Agents 52 | trainer.start() 53 | 54 | if __name__ == '__main__': 55 | parser = argparse.ArgumentParser("DMC example in RLCard") 56 | parser.add_argument( 57 | '--env', 58 | type=str, 59 | default='leduc-holdem', 60 | choices=[ 61 | 'blackjack', 62 | 'leduc-holdem', 63 | 'limit-holdem', 64 | 'doudizhu', 65 | 'mahjong', 66 | 'no-limit-holdem', 67 | 'uno', 68 | 'gin-rummy', 69 | ] 70 | ) 71 | parser.add_argument( 72 | '--cuda', 73 | type=str, 74 | default='', 75 | ) 76 | parser.add_argument( 77 | '--load_model', 78 | action='store_true', 79 | help='Load an existing model', 80 | ) 81 | parser.add_argument( 82 | '--xpid', 83 | default='leduc_holdem', 84 | help='Experiment id (default: leduc_holdem)', 85 | ) 86 | parser.add_argument( 87 | '--savedir', 88 | default='experiments/dmc_result', 89 | help='Root dir where experiment data will be saved', 90 | ) 91 | parser.add_argument( 92 | '--save_interval', 93 | default=30, 94 | type=int, 95 | help='Time interval (in minutes) at which to save the model', 96 | ) 97 | parser.add_argument( 98 | '--num_actor_devices', 99 | default=1, 100 | type=int, 101 | help='The number of devices used for simulation', 102 | ) 103 | parser.add_argument( 104 | '--num_actors', 105 | default=5, 106 | type=int, 107 | help='The number of actors for each simulation device', 108 | ) 109 | parser.add_argument( 110 | '--total_frames', 111 | default=1e11, 112 | type=int, 113 | help='The total number of frames to train for', 114 | ) 115 | parser.add_argument( 116 | '--training_device', 117 | default=0, 118 | type=int, 119 | help='The index of the GPU used for training models', 120 | ) 121 | 122 | args = parser.parse_args() 123 | 124 | os.environ["CUDA_VISIBLE_DEVICES"] = args.cuda 125 | train(args) 126 | 127 | -------------------------------------------------------------------------------- /examples/run_cfr.py: -------------------------------------------------------------------------------- 1 | ''' An example of solve Leduc Hold'em with CFR (chance sampling) 2 | ''' 3 | import os 4 | import argparse 5 | 6 | import rlcard 7 | from rlcard.agents import ( 8 | CFRAgent, 9 | RandomAgent, 10 | ) 11 | from rlcard.utils import ( 12 | set_seed, 13 | tournament, 14 | Logger, 15 | plot_curve, 16 | ) 17 | 18 | def train(args): 19 | # Make environments, CFR only supports Leduc Holdem 20 | env = rlcard.make( 21 | 'leduc-holdem', 22 | config={ 23 | 'seed': 0, 24 | 'allow_step_back': True, 25 | } 26 | ) 27 | eval_env = rlcard.make( 28 | 'leduc-holdem', 29 | config={ 30 | 'seed': 0, 31 | } 32 | ) 33 | 34 | # Seed numpy, torch, random 35 | set_seed(args.seed) 36 | 37 | # Initilize CFR Agent 38 | agent = CFRAgent( 39 | env, 40 | os.path.join( 41 | args.log_dir, 42 | 'cfr_model', 43 | ), 44 | ) 45 | agent.load() # If we have saved model, we first load the model 46 | 47 | # Evaluate CFR against random 48 | eval_env.set_agents([ 49 | agent, 50 | RandomAgent(num_actions=env.num_actions), 51 | ]) 52 | 53 | # Start training 54 | with Logger(args.log_dir) as logger: 55 | for episode in range(args.num_episodes): 56 | agent.train() 57 | print('\rIteration {}'.format(episode), end='') 58 | # Evaluate the performance. Play with Random agents. 59 | if episode % args.evaluate_every == 0: 60 | agent.save() # Save model 61 | logger.log_performance( 62 | episode, 63 | tournament( 64 | eval_env, 65 | args.num_eval_games 66 | )[0] 67 | ) 68 | 69 | # Get the paths 70 | csv_path, fig_path = logger.csv_path, logger.fig_path 71 | # Plot the learning curve 72 | plot_curve(csv_path, fig_path, 'cfr') 73 | 74 | if __name__ == '__main__': 75 | parser = argparse.ArgumentParser("CFR example in RLCard") 76 | parser.add_argument( 77 | '--seed', 78 | type=int, 79 | default=42, 80 | ) 81 | parser.add_argument( 82 | '--num_episodes', 83 | type=int, 84 | default=5000, 85 | ) 86 | parser.add_argument( 87 | '--num_eval_games', 88 | type=int, 89 | default=2000, 90 | ) 91 | parser.add_argument( 92 | '--evaluate_every', 93 | type=int, 94 | default=100, 95 | ) 96 | parser.add_argument( 97 | '--log_dir', 98 | type=str, 99 | default='experiments/leduc_holdem_cfr_result/', 100 | ) 101 | 102 | args = parser.parse_args() 103 | 104 | train(args) 105 | 106 | -------------------------------------------------------------------------------- /examples/run_dmc.py: -------------------------------------------------------------------------------- 1 | ''' An example of training a Deep Monte-Carlo (DMC) Agent on the environments in RLCard 2 | ''' 3 | import os 4 | import argparse 5 | 6 | import torch 7 | 8 | import rlcard 9 | from rlcard.agents.dmc_agent import DMCTrainer 10 | 11 | def train(args): 12 | 13 | # Make the environment 14 | env = rlcard.make(args.env) 15 | 16 | # Initialize the DMC trainer 17 | trainer = DMCTrainer( 18 | env, 19 | cuda=args.cuda, 20 | load_model=args.load_model, 21 | xpid=args.xpid, 22 | savedir=args.savedir, 23 | save_interval=args.save_interval, 24 | num_actor_devices=args.num_actor_devices, 25 | num_actors=args.num_actors, 26 | training_device=args.training_device, 27 | ) 28 | 29 | # Train DMC Agents 30 | trainer.start() 31 | 32 | if __name__ == '__main__': 33 | parser = argparse.ArgumentParser("DMC example in RLCard") 34 | parser.add_argument( 35 | '--env', 36 | type=str, 37 | default='leduc-holdem', 38 | choices=[ 39 | 'blackjack', 40 | 'leduc-holdem', 41 | 'limit-holdem', 42 | 'doudizhu', 43 | 'mahjong', 44 | 'no-limit-holdem', 45 | 'uno', 46 | 'gin-rummy' 47 | ], 48 | ) 49 | parser.add_argument( 50 | '--cuda', 51 | type=str, 52 | default='', 53 | ) 54 | parser.add_argument( 55 | '--load_model', 56 | action='store_true', 57 | help='Load an existing model', 58 | ) 59 | parser.add_argument( 60 | '--xpid', 61 | default='leduc_holdem', 62 | help='Experiment id (default: leduc_holdem)', 63 | ) 64 | parser.add_argument( 65 | '--savedir', 66 | default='experiments/dmc_result', 67 | help='Root dir where experiment data will be saved' 68 | ) 69 | parser.add_argument( 70 | '--save_interval', 71 | default=30, 72 | type=int, 73 | help='Time interval (in minutes) at which to save the model', 74 | ) 75 | parser.add_argument( 76 | '--num_actor_devices', 77 | default=1, 78 | type=int, 79 | help='The number of devices used for simulation', 80 | ) 81 | parser.add_argument( 82 | '--num_actors', 83 | default=5, 84 | type=int, 85 | help='The number of actors for each simulation device', 86 | ) 87 | parser.add_argument( 88 | '--training_device', 89 | default="0", 90 | type=str, 91 | help='The index of the GPU used for training models', 92 | ) 93 | 94 | args = parser.parse_args() 95 | 96 | os.environ["CUDA_VISIBLE_DEVICES"] = args.cuda 97 | train(args) 98 | 99 | -------------------------------------------------------------------------------- /examples/run_random.py: -------------------------------------------------------------------------------- 1 | ''' An example of playing randomly in RLCard 2 | ''' 3 | import argparse 4 | import pprint 5 | 6 | import rlcard 7 | from rlcard.agents import RandomAgent 8 | from rlcard.utils import set_seed 9 | 10 | def run(args): 11 | # Make environment 12 | env = rlcard.make( 13 | args.env, 14 | config={ 15 | 'seed': 42, 16 | } 17 | ) 18 | 19 | # Seed numpy, torch, random 20 | set_seed(42) 21 | 22 | # Set agents 23 | agent = RandomAgent(num_actions=env.num_actions) 24 | env.set_agents([agent for _ in range(env.num_players)]) 25 | 26 | # Generate data from the environment 27 | trajectories, player_wins = env.run(is_training=False) 28 | # Print out the trajectories 29 | print('\nTrajectories:') 30 | print(trajectories) 31 | print('\nSample raw observation:') 32 | pprint.pprint(trajectories[0][0]['raw_obs']) 33 | print('\nSample raw legal_actions:') 34 | pprint.pprint(trajectories[0][0]['raw_legal_actions']) 35 | 36 | if __name__ == '__main__': 37 | parser = argparse.ArgumentParser("Random example in RLCard") 38 | parser.add_argument( 39 | '--env', 40 | type=str, 41 | default='leduc-holdem', 42 | choices=[ 43 | 'blackjack', 44 | 'leduc-holdem', 45 | 'limit-holdem', 46 | 'doudizhu', 47 | 'mahjong', 48 | 'no-limit-holdem', 49 | 'uno', 50 | 'gin-rummy', 51 | 'bridge', 52 | ], 53 | ) 54 | 55 | args = parser.parse_args() 56 | 57 | run(args) 58 | 59 | -------------------------------------------------------------------------------- /examples/scripts/dmc_doudizhu_1_gpu.sh: -------------------------------------------------------------------------------- 1 | python3 examples/run_dmc.py --env doudizhu --xpid doudizhu --cuda 0 --num_actor_devices 1 --training_device 0 --num_actors 8 --save_interval 30 2 | -------------------------------------------------------------------------------- /examples/scripts/dmc_doudizhu_4_gpu.sh: -------------------------------------------------------------------------------- 1 | python3 examples/run_dmc.py --env doudizhu --xpid doudizhu --cuda 0,1,2,3 --num_actor_devices 3 --training_device 3 --num_actors 8 --save_interval 30 2 | -------------------------------------------------------------------------------- /rlcard/__init__.py: -------------------------------------------------------------------------------- 1 | name = "rlcard" 2 | __version__ = "1.2.0" 3 | 4 | from rlcard.envs import make 5 | -------------------------------------------------------------------------------- /rlcard/agents/__init__.py: -------------------------------------------------------------------------------- 1 | import subprocess 2 | import sys 3 | from distutils.version import LooseVersion 4 | 5 | reqs = subprocess.check_output([sys.executable, '-m', 'pip', 'freeze']) 6 | installed_packages = [r.decode().split('==')[0] for r in reqs.split()] 7 | 8 | if 'torch' in installed_packages: 9 | from rlcard.agents.dqn_agent import DQNAgent as DQNAgent 10 | from rlcard.agents.nfsp_agent import NFSPAgent as NFSPAgent 11 | 12 | from rlcard.agents.cfr_agent import CFRAgent 13 | from rlcard.agents.human_agents.limit_holdem_human_agent import HumanAgent as LimitholdemHumanAgent 14 | from rlcard.agents.human_agents.nolimit_holdem_human_agent import HumanAgent as NolimitholdemHumanAgent 15 | from rlcard.agents.human_agents.leduc_holdem_human_agent import HumanAgent as LeducholdemHumanAgent 16 | from rlcard.agents.human_agents.blackjack_human_agent import HumanAgent as BlackjackHumanAgent 17 | from rlcard.agents.human_agents.uno_human_agent import HumanAgent as UnoHumanAgent 18 | from rlcard.agents.random_agent import RandomAgent 19 | -------------------------------------------------------------------------------- /rlcard/agents/dmc_agent/__init__.py: -------------------------------------------------------------------------------- 1 | from .trainer import DMCTrainer 2 | -------------------------------------------------------------------------------- /rlcard/agents/dmc_agent/pettingzoo_model.py: -------------------------------------------------------------------------------- 1 | from collections import OrderedDict 2 | 3 | from .model import DMCAgent 4 | from rlcard.utils.pettingzoo_utils import wrap_state 5 | 6 | 7 | class DMCAgentPettingZoo(DMCAgent): 8 | def step(self, state): 9 | return super().step(wrap_state(state)) 10 | 11 | def eval_step(self, state): 12 | return super().eval_step(wrap_state(state)) 13 | 14 | def feed(self, ts): 15 | state, action, reward, next_state, done = tuple(ts) 16 | state = wrap_state(state) 17 | next_state = wrap_state(next_state) 18 | ts = (state, action, reward, next_state, done) 19 | return super().feed(ts) 20 | 21 | 22 | class DMCModelPettingZoo: 23 | def __init__( 24 | self, 25 | env, 26 | mlp_layers=[512,512,512,512,512], 27 | exp_epsilon=0.01, 28 | device="0" 29 | ): 30 | self.agents = OrderedDict() 31 | for agent_name in env.agents: 32 | agent = DMCAgentPettingZoo( 33 | env.observation_space(agent_name)["observation"].shape, 34 | (env.action_space(agent_name).n,), 35 | mlp_layers, 36 | exp_epsilon, 37 | device, 38 | ) 39 | self.agents[agent_name] = agent 40 | 41 | def share_memory(self): 42 | for agent in self.agents.values(): 43 | agent.share_memory() 44 | 45 | def eval(self): 46 | for agent in self.agents.values(): 47 | agent.eval() 48 | 49 | def parameters(self, index): 50 | return list(self.agents.values())[index].parameters() 51 | 52 | def get_agent(self, index): 53 | return list(self.agents.values())[index] 54 | 55 | def get_agents(self): 56 | return list(self.agents.values()) 57 | -------------------------------------------------------------------------------- /rlcard/agents/human_agents/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datamllab/rlcard/d7d0a957baf4cc7225a50522adb0164bf130a9d0/rlcard/agents/human_agents/__init__.py -------------------------------------------------------------------------------- /rlcard/agents/human_agents/blackjack_human_agent.py: -------------------------------------------------------------------------------- 1 | from rlcard.utils.utils import print_card 2 | 3 | 4 | class HumanAgent(object): 5 | ''' A human agent for Blackjack. It can be used to play alone for understand how the blackjack code runs 6 | ''' 7 | 8 | def __init__(self, num_actions): 9 | ''' Initilize the human agent 10 | 11 | Args: 12 | num_actions (int): the size of the output action space 13 | ''' 14 | self.use_raw = True 15 | self.num_actions = num_actions 16 | 17 | @staticmethod 18 | def step(state): 19 | ''' Human agent will display the state and make decisions through interfaces 20 | 21 | Args: 22 | state (dict): A dictionary that represents the current state 23 | 24 | Returns: 25 | action (int): The action decided by human 26 | ''' 27 | _print_state(state['raw_obs'], state['raw_legal_actions'], state['action_record']) 28 | action = int(input('>> You choose action (integer): ')) 29 | while action < 0 or action >= len(state['legal_actions']): 30 | print('Action illegal...') 31 | action = int(input('>> Re-choose action (integer): ')) 32 | return state['raw_legal_actions'][action] 33 | 34 | def eval_step(self, state): 35 | ''' Predict the action given the current state for evaluation. The same to step here. 36 | 37 | Args: 38 | state (numpy.array): an numpy array that represents the current state 39 | 40 | Returns: 41 | action (int): the action predicted (randomly chosen) by the random agent 42 | ''' 43 | return self.step(state), {} 44 | 45 | def _print_state(state, raw_legal_actions, action_record): 46 | ''' Print out the state 47 | 48 | Args: 49 | state (dict): A dictionary of the raw state 50 | action_record (list): A list of the each player's historical actions 51 | ''' 52 | _action_list = [] 53 | for i in range(1, len(action_record)+1): 54 | _action_list.insert(0, action_record[-i]) 55 | for pair in _action_list: 56 | print('>> Player', pair[0], 'chooses', pair[1]) 57 | 58 | print('\n============= Dealer Hand ===============') 59 | print_card(state['dealer hand']) 60 | 61 | num_players = len(state) - 3 62 | 63 | for i in range(num_players): 64 | print('=============== Player {} Hand ==============='.format(i)) 65 | print_card(state['player' + str(i) + ' hand']) 66 | 67 | print('\n=========== Actions You Can Choose ===========') 68 | print(', '.join([str(index) + ': ' + action for index, action in enumerate(raw_legal_actions)])) 69 | print('') 70 | -------------------------------------------------------------------------------- /rlcard/agents/human_agents/gin_rummy_human_agent/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datamllab/rlcard/d7d0a957baf4cc7225a50522adb0164bf130a9d0/rlcard/agents/human_agents/gin_rummy_human_agent/__init__.py -------------------------------------------------------------------------------- /rlcard/agents/human_agents/gin_rummy_human_agent/gin_rummy_human_agent.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Project: Gui Gin Rummy 3 | File name: gin_rummy_human_agent.py 4 | Author: William Hale 5 | Date created: 3/14/2020 6 | ''' 7 | 8 | import time 9 | 10 | from rlcard.games.gin_rummy.utils.action_event import ActionEvent 11 | from rlcard.games.gin_rummy.utils.gin_rummy_error import GinRummyProgramError 12 | 13 | 14 | class HumanAgent(object): 15 | ''' A human agent for Gin Rummy. It can be used to play against trained models. 16 | ''' 17 | 18 | def __init__(self, num_actions): 19 | ''' Initialize the human agent 20 | 21 | Args: 22 | num_actions (int): the size of the output action space 23 | ''' 24 | self.use_raw = True 25 | self.num_actions = num_actions 26 | self.is_choosing_action_id = False 27 | self.chosen_action_id = None # type: int or None 28 | self.state = None 29 | 30 | def step(self, state): 31 | ''' Human agent will display the state and make decisions through interfaces 32 | 33 | Args: 34 | state (dict): A dictionary that represents the current state 35 | 36 | Returns: 37 | action (int): The action decided by human 38 | ''' 39 | if self.is_choosing_action_id: 40 | raise GinRummyProgramError("self.is_choosing_action_id must be False.") 41 | if self.state is not None: 42 | raise GinRummyProgramError("self.state must be None.") 43 | if self.chosen_action_id is not None: 44 | raise GinRummyProgramError("self.chosen_action_id={} must be None.".format(self.chosen_action_id)) 45 | self.state = state 46 | self.is_choosing_action_id = True 47 | while not self.chosen_action_id: 48 | time.sleep(0.001) 49 | if self.chosen_action_id is None: 50 | raise GinRummyProgramError("self.chosen_action_id cannot be None.") 51 | chosen_action_event = ActionEvent.decode_action(action_id=self.chosen_action_id) 52 | self.state = None 53 | self.is_choosing_action_id = False 54 | self.chosen_action_id = None 55 | return chosen_action_event 56 | 57 | def eval_step(self, state): 58 | ''' Predict the action given the current state for evaluation. The same to step here. 59 | 60 | Args: 61 | state (numpy.array): an numpy array that represents the current state 62 | 63 | Returns: 64 | action (int): the action predicted (randomly chosen) by the random agent 65 | ''' 66 | return self.step(state), {} 67 | -------------------------------------------------------------------------------- /rlcard/agents/human_agents/gin_rummy_human_agent/gui_cards/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datamllab/rlcard/d7d0a957baf4cc7225a50522adb0164bf130a9d0/rlcard/agents/human_agents/gin_rummy_human_agent/gui_cards/__init__.py -------------------------------------------------------------------------------- /rlcard/agents/human_agents/gin_rummy_human_agent/gui_gin_rummy/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datamllab/rlcard/d7d0a957baf4cc7225a50522adb0164bf130a9d0/rlcard/agents/human_agents/gin_rummy_human_agent/gui_gin_rummy/__init__.py -------------------------------------------------------------------------------- /rlcard/agents/human_agents/gin_rummy_human_agent/gui_gin_rummy/canvas_item.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Project: Gui Gin Rummy 3 | File name: canvas_item.py 4 | Author: William Hale 5 | Date created: 3/14/2020 6 | ''' 7 | 8 | # from __future__ import annotations 9 | from typing import TYPE_CHECKING 10 | if TYPE_CHECKING: 11 | from rlcard.agents.gin_rummy_human_agent.gui_gin_rummy.game_canvas import GameCanvas 12 | from rlcard.agents.gin_rummy_human_agent.gui_cards.card_image import CardImage 13 | 14 | 15 | class CanvasItem(object): 16 | 17 | def __init__(self, item_id: int, game_canvas: 'GameCanvas'): 18 | self.item_id = item_id 19 | self.game_canvas = game_canvas 20 | 21 | def __eq__(self, other): 22 | if isinstance(other, int): # FIXME: temporary kludge to convert all item_id to CanvasItem 23 | return other == self.item_id 24 | return isinstance(other, CanvasItem) and self.item_id == other.item_id 25 | 26 | def __hash__(self): 27 | return hash(self.item_id) 28 | 29 | def get_tags(self): 30 | return self.game_canvas.gettags(self.item_id) 31 | 32 | 33 | class CardItem(CanvasItem): 34 | 35 | def __init__(self, item_id: int, card_id: int, card_image: 'CardImage', game_canvas: 'GameCanvas'): 36 | super().__init__(item_id=item_id, game_canvas=game_canvas) 37 | self.card_id = card_id 38 | self.card_image = card_image 39 | 40 | def is_face_up(self) -> bool: 41 | return self.card_image.face_up 42 | 43 | def set_card_id_face_up(self, face_up: bool): 44 | if self.card_image.face_up != face_up: 45 | target_image = self.card_image if face_up else self.game_canvas.card_back_image 46 | self.game_canvas.itemconfig(self.item_id, image=target_image) 47 | self.card_image.face_up = face_up 48 | 49 | def flip_over(self): 50 | self.card_image.face_up = not self.card_image.face_up 51 | target_image = self.card_image if self.card_image.face_up else self.game_canvas.card_back_image 52 | self.game_canvas.itemconfig(self.item_id, image=target_image) 53 | -------------------------------------------------------------------------------- /rlcard/agents/human_agents/gin_rummy_human_agent/gui_gin_rummy/configurations.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Project: Gui Gin Rummy 3 | File name: configurations.py 4 | Author: William Hale 5 | Date created: 3/14/2020 6 | ''' 7 | 8 | import os 9 | 10 | from configparser import ConfigParser 11 | 12 | # 13 | # Gin Rummy parameters 14 | # 15 | 16 | GOING_OUT_DEADWOOD_COUNT = 10 17 | 18 | # 19 | # RLCard Gin Rummy parameters 20 | # 21 | 22 | MAX_DRAWN_CARD_COUNT = 52 23 | 24 | DISCARD_PILE_TAG = "discard_pile" 25 | STOCK_PILE_TAG = "stock_pile" 26 | NORTH_HELD_PILE_TAG = "north_held_pile" 27 | SOUTH_HELD_PILE_TAG = "south_held_pile" 28 | PLAYER_HELD_PILE_TAGS = [NORTH_HELD_PILE_TAG, SOUTH_HELD_PILE_TAG] 29 | 30 | DRAWN_TAG = "drawn" 31 | JOGGED_TAG = "jogged" 32 | SELECTED_TAG = "selected" 33 | 34 | SCORE_PLAYER_0_ACTION_ID = 0 35 | SCORE_PLAYER_1_ACTION_ID = 1 36 | DRAW_CARD_ACTION_ID = 2 37 | PICK_UP_DISCARD_ACTION_ID = 3 38 | DECLARE_DEAD_HAND_ACTION_ID = 4 39 | GIN_ACTION_ID = 5 40 | DISCARD_ACTION_ID = 6 41 | KNOCK_ACTION_ID = DISCARD_ACTION_ID + 52 42 | 43 | # 44 | # Not User Modifiable Options 45 | # 46 | 47 | IS_KEEP_TURN_WHEN_DISCARDING_CARD_PICKED_UP = False # TODO: make True the default value 48 | 49 | # 50 | # User Modifiable Options 51 | # 52 | 53 | config_path = os.path.join(os.path.abspath(os.path.dirname(__file__)), 'game_options.ini') # Note this 54 | 55 | config = ConfigParser() 56 | found = config.read(config_path) 57 | 58 | # settings section 59 | settings_section = "settings" 60 | show_status_messages_option = "show_status_messages" 61 | warning_as_option = 'warning_as' 62 | game_background_color_option = 'game_background_color' 63 | window_size_factor_option = 'window_size_factor' 64 | is_show_tips_option = "is_show_tips" 65 | is_debug_option = "is_debug" 66 | 67 | SHOW_STATUS_MESSAGES = config.get(section=settings_section, option=show_status_messages_option, fallback="verbose") 68 | WARNINGS_AS = config.get(section=settings_section, option=warning_as_option, fallback="alert_messages") 69 | GAME_BACKGROUND_COLOR = config.get(section=settings_section, option=game_background_color_option, fallback="#007F00") 70 | WINDOW_SIZE_FACTOR = config.getint(section=settings_section, option=window_size_factor_option, fallback=75) 71 | IS_SHOW_TIPS = config.getboolean(section=settings_section, option=is_show_tips_option, fallback=True) 72 | # Note: IS_DEBUG always starts off as False; must explicitly update via preference window 73 | # IS_DEBUG = config.getboolean(section=settings_section, option=is_debug_option, fallback=False) 74 | IS_DEBUG = False 75 | -------------------------------------------------------------------------------- /rlcard/agents/human_agents/gin_rummy_human_agent/gui_gin_rummy/game_app.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Project: Gui Gin Rummy 3 | File name: game_app.py 4 | Author: William Hale 5 | Date created: 3/14/2020 6 | ''' 7 | 8 | # from __future__ import annotations 9 | from typing import TYPE_CHECKING 10 | if TYPE_CHECKING: 11 | from rlcard.envs.gin_rummy import GinRummyEnv 12 | 13 | from typing import Callable 14 | 15 | import tkinter as tk 16 | 17 | import rlcard 18 | 19 | from rlcard.agents.random_agent import RandomAgent 20 | 21 | from ..gin_rummy_human_agent import HumanAgent 22 | 23 | from .game_frame import GameFrame 24 | from .menu_bar import MenuBar 25 | 26 | 27 | class GameApp(object): 28 | 29 | def __init__(self, make_gin_rummy_env: Callable[[], 'GinRummyEnv'] = None): 30 | self.make_gin_rummy_env = make_gin_rummy_env if make_gin_rummy_env else GameApp._make_gin_rummy_env 31 | root = tk.Tk() 32 | root.resizable(False, False) 33 | self.game_frame = GameFrame(root=root, game_app=self) 34 | self.menu_bar = MenuBar(root, game_frame=self.game_frame) 35 | root.mainloop() 36 | 37 | @staticmethod 38 | def _make_gin_rummy_env() -> 'GinRummyEnv': 39 | gin_rummy_env = rlcard.make('gin-rummy') 40 | north_agent = RandomAgent(num_actions=gin_rummy_env.num_actions) 41 | south_agent = HumanAgent(gin_rummy_env.num_actions) 42 | gin_rummy_env.set_agents([north_agent, south_agent]) 43 | return gin_rummy_env 44 | -------------------------------------------------------------------------------- /rlcard/agents/human_agents/gin_rummy_human_agent/gui_gin_rummy/game_canvas_debug.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Project: Gui Gin Rummy 3 | File name: game_canvas_debug.py 4 | Author: William Hale 5 | Date created: 3/14/2020 6 | ''' 7 | 8 | # from __future__ import annotations 9 | from typing import TYPE_CHECKING 10 | if TYPE_CHECKING: 11 | from .game_canvas import GameCanvas 12 | 13 | from . import configurations 14 | 15 | from rlcard.games.gin_rummy.player import GinRummyPlayer 16 | 17 | import rlcard.games.gin_rummy.utils.utils as gin_rummy_utils 18 | 19 | 20 | class GameCanvasDebug(object): 21 | 22 | def __init__(self, game_canvas: 'GameCanvas'): 23 | self.game_canvas = game_canvas 24 | 25 | def get_card_name(self, card_item_id: int) -> str: 26 | card_id = self.game_canvas.card_item_ids.index(card_item_id) 27 | card = gin_rummy_utils.card_from_card_id(card_id=card_id) 28 | return str(card) 29 | 30 | def description(self): 31 | game_canvas = self.game_canvas 32 | card_name = self.get_card_name 33 | dealer_id = game_canvas.dealer_id 34 | current_player_id = game_canvas.current_player_id 35 | stock_pile_item_ids = game_canvas.find_withtag(configurations.STOCK_PILE_TAG) 36 | discard_pile_items = game_canvas.find_withtag(configurations.DISCARD_PILE_TAG) 37 | north_held_pile_item_ids = game_canvas.getter.get_held_pile_item_ids(player_id=0) 38 | south_held_pile_item_ids = game_canvas.getter.get_held_pile_item_ids(player_id=1) 39 | lines = [] 40 | lines.append("dealer: {}".format(GinRummyPlayer.short_name_of(player_id=dealer_id))) 41 | lines.append("current_player: {}".format(GinRummyPlayer.short_name_of(player_id=current_player_id))) 42 | lines.append("north hand: {}".format([card_name(card_item_id) for card_item_id in north_held_pile_item_ids])) 43 | lines.append("stockpile: {}".format([card_name(card_item_id) for card_item_id in stock_pile_item_ids])) 44 | lines.append("discard pile: {}".format([card_name(card_item_id) for card_item_id in discard_pile_items])) 45 | lines.append("south hand: {}".format([card_name(card_item_id) for card_item_id in south_held_pile_item_ids])) 46 | return "\n".join(lines) 47 | -------------------------------------------------------------------------------- /rlcard/agents/human_agents/gin_rummy_human_agent/gui_gin_rummy/game_options.ini: -------------------------------------------------------------------------------- 1 | [settings] 2 | show_status_messages = verbose 3 | warning_as = alert messages 4 | game_background_color = #007f00 5 | window_size_factor = 75 6 | is_debug = True 7 | is_show_tips = True 8 | 9 | -------------------------------------------------------------------------------- /rlcard/agents/human_agents/gin_rummy_human_agent/gui_gin_rummy/handling_tap.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Project: Gui Gin Rummy 3 | File name: handling_tap.py 4 | Author: William Hale 5 | Date created: 3/14/2020 6 | ''' 7 | 8 | # from __future__ import annotations 9 | from typing import TYPE_CHECKING 10 | if TYPE_CHECKING: 11 | from .game_canvas import GameCanvas 12 | 13 | from . import configurations 14 | from . import starting_new_game 15 | 16 | from .canvas_item import CanvasItem 17 | from .handling_tap_stock_pile import handle_tap_stock_pile 18 | from .handling_tap_discard_pile import handle_tap_discard_pile 19 | from .handling_tap_held_pile import handle_tap_held_pile 20 | from .handling_tap_player_pane import handle_tap_player_pane 21 | 22 | from rlcard.games.gin_rummy.utils.gin_rummy_error import GinRummyProgramError 23 | 24 | 25 | def on_game_canvas_tap(event): 26 | widget = event.widget 27 | hit_item_ids = widget.find_withtag("current") 28 | if hit_item_ids: 29 | if not len(hit_item_ids) == 1: 30 | raise GinRummyProgramError("len(hit_item_ids)={} must be 1.".format(len(hit_item_ids))) 31 | hit_item_id = hit_item_ids[0] 32 | hit_item = None 33 | for canvas_item in widget.canvas_items: 34 | if canvas_item.item_id == hit_item_id: 35 | hit_item = canvas_item 36 | if hit_item: 37 | if not widget.query.is_game_over(): 38 | _handle_tap(hit_item=hit_item, event=event, game_canvas=widget) 39 | else: 40 | top_discard_pile_item_id = widget.getter.get_top_discard_pile_item_id() 41 | if hit_item_id == top_discard_pile_item_id: 42 | starting_new_game.start_new_game(game_canvas=widget) 43 | 44 | 45 | def _handle_tap(hit_item: CanvasItem, event, game_canvas: 'GameCanvas'): 46 | hit_item_tags = hit_item.get_tags() 47 | if configurations.STOCK_PILE_TAG in hit_item_tags: 48 | current_player_id = game_canvas.current_player_id 49 | current_player_is_human = game_canvas.query.is_human(player_id=current_player_id) 50 | if current_player_is_human: 51 | handle_tap_stock_pile(hit_item=hit_item, game_canvas=game_canvas) 52 | elif configurations.DISCARD_PILE_TAG in hit_item_tags or hit_item == game_canvas.discard_pile_box_item: 53 | current_player_id = game_canvas.current_player_id 54 | current_player_is_human = game_canvas.query.is_human(player_id=current_player_id) 55 | if current_player_is_human: 56 | handle_tap_discard_pile(hit_item=hit_item, game_canvas=game_canvas) 57 | elif game_canvas.held_pile_tags[0] in hit_item_tags: 58 | pass # north player is never human player 59 | elif game_canvas.held_pile_tags[1] in hit_item_tags: 60 | handle_tap_held_pile(hit_item=hit_item, game_canvas=game_canvas) 61 | elif hit_item == game_canvas.player_panes[0]: 62 | pass # north player is never human player 63 | elif hit_item == game_canvas.player_panes[1]: 64 | handle_tap_player_pane(hit_item=hit_item, event=event, game_canvas=game_canvas) 65 | -------------------------------------------------------------------------------- /rlcard/agents/human_agents/gin_rummy_human_agent/gui_gin_rummy/handling_tap_held_pile.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Project: Gui Gin Rummy 3 | File name: handling_tap_held_pile.py 4 | Author: William Hale 5 | Date created: 3/14/2020 6 | ''' 7 | 8 | # from __future__ import annotations 9 | from typing import TYPE_CHECKING 10 | if TYPE_CHECKING: 11 | from .game_canvas import GameCanvas 12 | 13 | from rlcard.games.gin_rummy.utils.gin_rummy_error import GinRummyProgramError 14 | 15 | from .player_type import PlayerType 16 | from .canvas_item import CanvasItem 17 | 18 | from . import configurations 19 | from . import info_messaging 20 | from . import utils 21 | 22 | 23 | def handle_tap_held_pile(hit_item: CanvasItem, game_canvas: 'GameCanvas'): 24 | hit_item_tags = hit_item.get_tags() 25 | if game_canvas.held_pile_tags[0] in hit_item_tags: 26 | player_id = 0 27 | elif game_canvas.held_pile_tags[1] in hit_item_tags: 28 | player_id = 1 29 | else: 30 | raise GinRummyProgramError("handle_tap_held_pile: unknown held_pile.") 31 | player_is_human = game_canvas.player_types[player_id] is PlayerType.human_player 32 | can_draw_from_stock_pile = game_canvas.query.can_draw_from_stock_pile(player_id=player_id) 33 | can_draw_from_discard_pile = game_canvas.query.can_draw_from_discard_pile(player_id=player_id) 34 | is_game_over = game_canvas.query.is_game_over() 35 | if is_game_over: 36 | pass 37 | elif game_canvas.query.can_discard_card(player_id=player_id): # hit_item is source 38 | if player_is_human: 39 | utils.toggle_held_pile_item_selected(item=hit_item, game_canvas=game_canvas) 40 | elif can_draw_from_stock_pile or can_draw_from_discard_pile: # hit_item is target 41 | drawn_card_item_id = None 42 | drawn_card_item_tag = None 43 | if not drawn_card_item_id and can_draw_from_stock_pile: 44 | top_stock_pile_item_id = game_canvas.getter.get_top_stock_pile_item_id() 45 | top_stock_pile_item_tags = game_canvas.getter.get_tags(top_stock_pile_item_id) 46 | if configurations.DRAWN_TAG in top_stock_pile_item_tags: 47 | drawn_card_item_id = top_stock_pile_item_id 48 | drawn_card_item_tag = configurations.STOCK_PILE_TAG 49 | if not drawn_card_item_id and can_draw_from_discard_pile: 50 | top_discard_pile_item_id = game_canvas.getter.get_top_discard_pile_item_id() 51 | top_discard_pile_item_tags = game_canvas.getter.get_tags(top_discard_pile_item_id) 52 | if configurations.DRAWN_TAG in top_discard_pile_item_tags: 53 | drawn_card_item_id = top_discard_pile_item_id 54 | drawn_card_item_tag = configurations.DISCARD_PILE_TAG 55 | if drawn_card_item_id: 56 | if player_id == 1: # remove info_message if south player 57 | info_messaging.blank_info_message_label(game_canvas=game_canvas) 58 | game_canvas.post_doing_action.post_do_get_card_action(player_id=player_id, 59 | drawn_card_item_id=drawn_card_item_id, 60 | hit_item_id=hit_item.item_id, 61 | drawn_card_item_tag=drawn_card_item_tag) 62 | else: 63 | utils.toggle_held_pile_item_selected(item=hit_item, game_canvas=game_canvas) 64 | else: 65 | if player_is_human: 66 | utils.toggle_held_pile_item_selected(item=hit_item, game_canvas=game_canvas) # arranging hand 67 | -------------------------------------------------------------------------------- /rlcard/agents/human_agents/gin_rummy_human_agent/gui_gin_rummy/handling_tap_player_pane.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Project: Gui Gin Rummy 3 | File name: handling_tap_player_pane.py 4 | Author: William Hale 5 | Date created: 3/14/2020 6 | ''' 7 | 8 | # from __future__ import annotations 9 | from typing import TYPE_CHECKING 10 | if TYPE_CHECKING: 11 | from .game_canvas import GameCanvas 12 | 13 | from . import configurations 14 | from . import utils 15 | 16 | from .canvas_item import CanvasItem 17 | 18 | 19 | def handle_tap_player_pane(hit_item: CanvasItem, event, game_canvas: 'GameCanvas'): 20 | # un-select and un-jog all held cards 21 | player_id = None 22 | if game_canvas.player_panes[0] == hit_item: 23 | player_id = 0 24 | elif game_canvas.player_panes[1] == hit_item: 25 | player_id = 1 26 | if player_id is not None and game_canvas.query.is_human(player_id): 27 | held_pile_item_ids = game_canvas.getter.get_held_pile_item_ids(player_id) 28 | for item_id in held_pile_item_ids: 29 | game_canvas.dtag(item_id, configurations.JOGGED_TAG) 30 | game_canvas.dtag(item_id, configurations.SELECTED_TAG) 31 | utils.fan_held_pile(player_id=player_id, game_canvas=game_canvas) 32 | -------------------------------------------------------------------------------- /rlcard/agents/human_agents/gin_rummy_human_agent/gui_gin_rummy/handling_tap_stock_pile.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Project: Gui Gin Rummy 3 | File name: handling_tap_stock_pile.py 4 | Author: William Hale 5 | Date created: 3/14/2020 6 | ''' 7 | 8 | # from __future__ import annotations 9 | from typing import TYPE_CHECKING 10 | if TYPE_CHECKING: 11 | from .game_canvas import GameCanvas 12 | 13 | from .canvas_item import CanvasItem 14 | 15 | from . import configurations 16 | from . import info_messaging 17 | from . import utils 18 | 19 | 20 | def handle_tap_stock_pile(hit_item: CanvasItem, game_canvas: 'GameCanvas'): # hit_item is source 21 | # Normal case is can_draw_from_stock_pile. 22 | # hit_item must not be drawn. 23 | # hit_item must be top card of stock_pile. 24 | # reset top card of discard pile if drawn. 25 | # reset all selected cards in held_pile 26 | player_id = game_canvas.current_player_id 27 | if game_canvas.query.is_game_over(): 28 | pass 29 | elif game_canvas.query.can_discard_card(player_id=player_id): 30 | pass 31 | elif game_canvas.query.can_declare_dead_hand(player_id=player_id): 32 | pass 33 | elif game_canvas.query.can_draw_from_stock_pile(player_id=player_id): 34 | current_player_id = game_canvas.current_player_id 35 | hit_item_tags = hit_item.get_tags() 36 | if configurations.DRAWN_TAG not in hit_item_tags: 37 | top_stock_pile_item_id = game_canvas.getter.get_top_stock_pile_item_id() 38 | if hit_item == top_stock_pile_item_id: 39 | utils.toggle_stock_pile_item_selected(game_canvas) 40 | # reset drawn top card of discard_pile if needed 41 | top_discard_pile_item_id = game_canvas.getter.get_top_discard_pile_item_id() 42 | top_discard_pile_item_tags = game_canvas.getter.get_tags(top_discard_pile_item_id) 43 | if configurations.DRAWN_TAG in top_discard_pile_item_tags: 44 | utils.toggle_discard_pile_item_selected(game_canvas=game_canvas) 45 | # reset selected cards of held_pile of current_player 46 | held_pile_item_ids = game_canvas.getter.get_held_pile_item_ids(player_id=current_player_id) 47 | for held_pile_item_id in held_pile_item_ids: 48 | held_pile_item_tags = game_canvas.getter.get_tags(item_id=held_pile_item_id) 49 | if configurations.SELECTED_TAG in held_pile_item_tags: 50 | held_pile_item = game_canvas.canvas_item_by_item_id.get(held_pile_item_id, None) 51 | if held_pile_item: 52 | utils.toggle_held_pile_item_selected(item=held_pile_item, game_canvas=game_canvas) 53 | # remove info_message if south player 54 | if player_id == 1: 55 | info_messaging.blank_info_message_label(game_canvas=game_canvas) -------------------------------------------------------------------------------- /rlcard/agents/human_agents/gin_rummy_human_agent/gui_gin_rummy/menu_bar.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Project: Gui Gin Rummy 3 | File name: menu_bar.py 4 | Author: William Hale 5 | Date created: 3/14/2020 6 | ''' 7 | 8 | # from __future__ import annotations 9 | from typing import TYPE_CHECKING 10 | if TYPE_CHECKING: 11 | from .game_frame import GameFrame 12 | 13 | import tkinter as tk 14 | from tkinter import messagebox 15 | 16 | from .preferences_window import PreferencesWindow 17 | 18 | 19 | class MenuBar(tk.Menu): 20 | 21 | def __init__(self, root: tk.Tk, game_frame: 'GameFrame'): 22 | super().__init__(root) 23 | self.game_frame = game_frame 24 | 25 | # create file menu 26 | file_menu = tk.Menu(self, tearoff=False) 27 | file_menu.add_command(label="New Game", command=self.on_new_game_menu_clicked) 28 | self.add_cascade(label="File", menu=file_menu) 29 | 30 | # create edit menu 31 | edit_menu = tk.Menu(self, tearoff=False) 32 | edit_menu.add_command(label="Preferences", command=self.on_preference_menu_clicked) 33 | self.add_cascade(label="Edit", menu=edit_menu) 34 | 35 | # create about menu 36 | help_menu = tk.Menu(self, tearoff=False) 37 | help_menu.add_command(label="About", command=self.on_about_menu_clicked) 38 | self.add_cascade(label="Help", menu=help_menu) 39 | 40 | # configure menuBar 41 | root.configure(menu=self) 42 | 43 | def on_new_game_menu_clicked(self): 44 | self.game_frame.start_new_game() 45 | 46 | def on_preference_menu_clicked(self): 47 | PreferencesWindow(self.game_frame) 48 | 49 | @staticmethod 50 | def on_about_menu_clicked(): 51 | messagebox.showinfo(title="Info", message="Gin Rummy\nVersion 1.0") 52 | -------------------------------------------------------------------------------- /rlcard/agents/human_agents/gin_rummy_human_agent/gui_gin_rummy/player_type.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Project: Gui Gin Rummy 3 | File name: player_type.py 4 | Author: William Hale 5 | Date created: 3/14/2020 6 | ''' 7 | 8 | import enum 9 | 10 | 11 | class PlayerType(int, enum.Enum): 12 | 13 | computer_player = 1 14 | human_player = 2 15 | demo_player = 3 16 | -------------------------------------------------------------------------------- /rlcard/agents/human_agents/gin_rummy_human_agent/gui_gin_rummy/utils_extra.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Project: Gui Gin Rummy 3 | File name: utils_extra.py 4 | Author: William Hale 5 | Date created: 3/14/2020 6 | ''' 7 | 8 | from PIL import Image, ImageDraw, ImageFilter 9 | 10 | 11 | def rounded_rectangle(self: ImageDraw, xy, corner_radius, fill=None, outline=None): # FIXME: not used 12 | upper_left_point = xy[0] 13 | bottom_right_point = xy[1] 14 | self.rectangle( 15 | [ 16 | (upper_left_point[0], upper_left_point[1] + corner_radius), 17 | (bottom_right_point[0], bottom_right_point[1] - corner_radius) 18 | ], 19 | fill=fill, 20 | outline=outline 21 | ) 22 | self.rectangle( 23 | [ 24 | (upper_left_point[0] + corner_radius, upper_left_point[1]), 25 | (bottom_right_point[0] - corner_radius, bottom_right_point[1]) 26 | ], 27 | fill=fill, 28 | outline=outline 29 | ) 30 | self.pieslice( 31 | [upper_left_point, (upper_left_point[0] + corner_radius * 2, upper_left_point[1] + corner_radius * 2)], 32 | 180, 33 | 270, 34 | fill=fill, 35 | outline=outline 36 | ) 37 | self.pieslice( 38 | [(bottom_right_point[0] - corner_radius * 2, bottom_right_point[1] - corner_radius * 2), bottom_right_point], 39 | 0, 40 | 90, 41 | fill=fill, 42 | outline=outline 43 | ) 44 | self.pieslice([(upper_left_point[0], bottom_right_point[1] - corner_radius * 2), 45 | (upper_left_point[0] + corner_radius * 2, bottom_right_point[1])], 46 | 90, 47 | 180, 48 | fill=fill, 49 | outline=outline 50 | ) 51 | self.pieslice([(bottom_right_point[0] - corner_radius * 2, upper_left_point[1]), 52 | (bottom_right_point[0], upper_left_point[1] + corner_radius * 2)], 53 | 270, 54 | 360, 55 | fill=fill, 56 | outline=outline 57 | ) 58 | 59 | 60 | ImageDraw.rounded_rectangle = rounded_rectangle # FIXME: not used 61 | 62 | 63 | def mask_rounded_rectangle_transparent(pil_img, corner_radius=8): # FIXME: not used 64 | blur_radius = 0 # FIXME: what is this for ??? wch 65 | mask = Image.new("L", pil_img.size, 0) 66 | draw = ImageDraw.Draw(mask) 67 | rounded_rectangle(draw, xy=((0, 0), (pil_img.size[0], pil_img.size[1])), corner_radius=corner_radius, fill=255) 68 | 69 | mask = mask.filter(ImageFilter.GaussianBlur(blur_radius)) 70 | result = pil_img.copy() 71 | result.putalpha(mask) 72 | return result 73 | -------------------------------------------------------------------------------- /rlcard/agents/human_agents/leduc_holdem_human_agent.py: -------------------------------------------------------------------------------- 1 | from rlcard.utils.utils import print_card 2 | 3 | 4 | class HumanAgent(object): 5 | ''' A human agent for Leduc Holdem. It can be used to play against trained models 6 | ''' 7 | 8 | def __init__(self, num_actions): 9 | ''' Initilize the human agent 10 | 11 | Args: 12 | num_actions (int): the size of the ouput action space 13 | ''' 14 | self.use_raw = True 15 | self.num_actions = num_actions 16 | 17 | @staticmethod 18 | def step(state): 19 | ''' Human agent will display the state and make decisions through interfaces 20 | 21 | Args: 22 | state (dict): A dictionary that represents the current state 23 | 24 | Returns: 25 | action (int): The action decided by human 26 | ''' 27 | _print_state(state['raw_obs'], state['action_record']) 28 | action = int(input('>> You choose action (integer): ')) 29 | while action < 0 or action >= len(state['legal_actions']): 30 | print('Action illegal...') 31 | action = int(input('>> Re-choose action (integer): ')) 32 | return state['raw_legal_actions'][action] 33 | 34 | def eval_step(self, state): 35 | ''' Predict the action given the curent state for evaluation. The same to step here. 36 | 37 | Args: 38 | state (numpy.array): an numpy array that represents the current state 39 | 40 | Returns: 41 | action (int): the action predicted (randomly chosen) by the random agent 42 | ''' 43 | return self.step(state), {} 44 | 45 | def _print_state(state, action_record): 46 | ''' Print out the state 47 | 48 | Args: 49 | state (dict): A dictionary of the raw state 50 | action_record (list): A list of the historical actions 51 | ''' 52 | _action_list = [] 53 | for i in range(1, len(action_record)+1): 54 | if action_record[-i][0] == state['current_player']: 55 | break 56 | _action_list.insert(0, action_record[-i]) 57 | for pair in _action_list: 58 | print('>> Player', pair[0], 'chooses', pair[1]) 59 | 60 | print('\n=============== Community Card ===============') 61 | print_card(state['public_card']) 62 | print('=============== Your Hand ===============') 63 | print_card(state['hand']) 64 | print('=============== Chips ===============') 65 | print('Yours: ', end='') 66 | for _ in range(state['my_chips']): 67 | print('+', end='') 68 | print('') 69 | for i in range(len(state['all_chips'])): 70 | if i != state['current_player']: 71 | print('Agent {}: '.format(i) , end='') 72 | for _ in range(state['all_chips'][i]): 73 | print('+', end='') 74 | print('\n=========== Actions You Can Choose ===========') 75 | print(', '.join([str(index) + ': ' + action for index, action in enumerate(state['legal_actions'])])) 76 | print('') 77 | -------------------------------------------------------------------------------- /rlcard/agents/human_agents/limit_holdem_human_agent.py: -------------------------------------------------------------------------------- 1 | from rlcard.utils.utils import print_card 2 | 3 | 4 | class HumanAgent(object): 5 | ''' A human agent for Limit Holdem. It can be used to play against trained models 6 | ''' 7 | 8 | def __init__(self, num_actions): 9 | ''' Initilize the human agent 10 | 11 | Args: 12 | num_actions (int): the size of the ouput action space 13 | ''' 14 | self.use_raw = True 15 | self.num_actions = num_actions 16 | 17 | @staticmethod 18 | def step(state): 19 | ''' Human agent will display the state and make decisions through interfaces 20 | 21 | Args: 22 | state (dict): A dictionary that represents the current state 23 | 24 | Returns: 25 | action (int): The action decided by human 26 | ''' 27 | _print_state(state['raw_obs'], state['action_record']) 28 | action = int(input('>> You choose action (integer): ')) 29 | while action < 0 or action >= len(state['legal_actions']): 30 | print('Action illegal...') 31 | action = int(input('>> Re-choose action (integer): ')) 32 | return state['raw_legal_actions'][action] 33 | 34 | def eval_step(self, state): 35 | ''' Predict the action given the curent state for evaluation. The same to step here. 36 | 37 | Args: 38 | state (numpy.array): an numpy array that represents the current state 39 | 40 | Returns: 41 | action (int): the action predicted (randomly chosen) by the random agent 42 | ''' 43 | return self.step(state), {} 44 | 45 | def _print_state(state, action_record): 46 | ''' Print out the state 47 | 48 | Args: 49 | state (dict): A dictionary of the raw state 50 | action_record (list): A list of the each player's historical actions 51 | ''' 52 | _action_list = [] 53 | for i in range(1, len(action_record)+1): 54 | _action_list.insert(0, action_record[-i]) 55 | for pair in _action_list: 56 | print('>> Player', pair[0], 'chooses', pair[1]) 57 | 58 | print('\n=============== Community Card ===============') 59 | print_card(state['public_cards']) 60 | print('=============== Your Hand ===============') 61 | print_card(state['hand']) 62 | print('=============== Chips ===============') 63 | print('Yours: ', end='') 64 | for _ in range(state['my_chips']): 65 | print('+', end='') 66 | print('') 67 | for i in range(len(state['all_chips'])): 68 | for _ in range(state['all_chips'][i]): 69 | print('+', end='') 70 | print('\n=========== Actions You Can Choose ===========') 71 | print(', '.join([str(index) + ': ' + action for index, action in enumerate(state['legal_actions'])])) 72 | print('') 73 | -------------------------------------------------------------------------------- /rlcard/agents/human_agents/nolimit_holdem_human_agent.py: -------------------------------------------------------------------------------- 1 | from rlcard.utils.utils import print_card 2 | 3 | 4 | class HumanAgent(object): 5 | ''' A human agent for No Limit Holdem. It can be used to play against trained models 6 | ''' 7 | 8 | def __init__(self, num_actions): 9 | ''' Initilize the human agent 10 | 11 | Args: 12 | num_actions (int): the size of the ouput action space 13 | ''' 14 | self.use_raw = True 15 | self.num_actions = num_actions 16 | 17 | @staticmethod 18 | def step(state): 19 | ''' Human agent will display the state and make decisions through interfaces 20 | 21 | Args: 22 | state (dict): A dictionary that represents the current state 23 | 24 | Returns: 25 | action (int): The action decided by human 26 | ''' 27 | _print_state(state['raw_obs'], state['action_record']) 28 | action = int(input('>> You choose action (integer): ')) 29 | while action < 0 or action >= len(state['legal_actions']): 30 | print('Action illegal...') 31 | action = int(input('>> Re-choose action (integer): ')) 32 | return state['raw_legal_actions'][action] 33 | 34 | def eval_step(self, state): 35 | ''' Predict the action given the curent state for evaluation. The same to step here. 36 | 37 | Args: 38 | state (numpy.array): an numpy array that represents the current state 39 | 40 | Returns: 41 | action (int): the action predicted (randomly chosen) by the random agent 42 | ''' 43 | return self.step(state), {} 44 | 45 | def _print_state(state, action_record): 46 | ''' Print out the state 47 | 48 | Args: 49 | state (dict): A dictionary of the raw state 50 | action_record (list): A list of the historical actions 51 | ''' 52 | _action_list = [] 53 | for i in range(1, len(action_record)+1): 54 | if action_record[-i][0] == state['current_player']: 55 | break 56 | _action_list.insert(0, action_record[-i]) 57 | for pair in _action_list: 58 | print('>> Player', pair[0], 'chooses', pair[1]) 59 | 60 | print('\n=============== Community Card ===============') 61 | print_card(state['public_cards']) 62 | 63 | print('============= Player',state["current_player"],'- Hand =============') 64 | print_card(state['hand']) 65 | 66 | print('=============== Chips ===============') 67 | print('In Pot:',state["pot"]) 68 | print('Remaining:',state["stakes"]) 69 | 70 | print('\n=========== Actions You Can Choose ===========') 71 | print(', '.join([str(index) + ': ' + str(action) for index, action in enumerate(state['legal_actions'])])) 72 | print('') 73 | print(state) 74 | -------------------------------------------------------------------------------- /rlcard/agents/human_agents/uno_human_agent.py: -------------------------------------------------------------------------------- 1 | from rlcard.games.uno.card import UnoCard 2 | 3 | class HumanAgent(object): 4 | ''' A human agent for Leduc Holdem. It can be used to play against trained models 5 | ''' 6 | 7 | def __init__(self, num_actions): 8 | ''' Initilize the human agent 9 | 10 | Args: 11 | num_actions (int): the size of the ouput action space 12 | ''' 13 | self.use_raw = True 14 | self.num_actions = num_actions 15 | 16 | @staticmethod 17 | def step(state): 18 | ''' Human agent will display the state and make decisions through interfaces 19 | 20 | Args: 21 | state (dict): A dictionary that represents the current state 22 | 23 | Returns: 24 | action (int): The action decided by human 25 | ''' 26 | print(state['raw_obs']) 27 | _print_state(state['raw_obs'], state['action_record']) 28 | action = int(input('>> You choose action (integer): ')) 29 | while action < 0 or action >= len(state['legal_actions']): 30 | print('Action illegal...') 31 | action = int(input('>> Re-choose action (integer): ')) 32 | return state['raw_legal_actions'][action] 33 | 34 | def eval_step(self, state): 35 | ''' Predict the action given the curent state for evaluation. The same to step here. 36 | 37 | Args: 38 | state (numpy.array): an numpy array that represents the current state 39 | 40 | Returns: 41 | action (int): the action predicted (randomly chosen) by the random agent 42 | ''' 43 | return self.step(state), {} 44 | 45 | def _print_state(state, action_record): 46 | ''' Print out the state of a given player 47 | 48 | Args: 49 | player (int): Player id 50 | ''' 51 | _action_list = [] 52 | for i in range(1, len(action_record)+1): 53 | if action_record[-i][0] == state['current_player']: 54 | break 55 | _action_list.insert(0, action_record[-i]) 56 | for pair in _action_list: 57 | print('>> Player', pair[0], 'chooses ', end='') 58 | _print_action(pair[1]) 59 | print('') 60 | 61 | print('\n=============== Your Hand ===============') 62 | UnoCard.print_cards(state['hand']) 63 | print('') 64 | print('=============== Last Card ===============') 65 | UnoCard.print_cards(state['target'], wild_color=True) 66 | print('') 67 | print('========== Players Card Number ===========') 68 | for i in range(state['num_players']): 69 | if i != state['current_player']: 70 | print('Player {} has {} cards.'.format(i, state['num_cards'][i])) 71 | print('======== Actions You Can Choose =========') 72 | for i, action in enumerate(state['legal_actions']): 73 | print(str(i)+': ', end='') 74 | UnoCard.print_cards(action, wild_color=True) 75 | if i < len(state['legal_actions']) - 1: 76 | print(', ', end='') 77 | print('\n') 78 | 79 | def _print_action(action): 80 | ''' Print out an action in a nice form 81 | 82 | Args: 83 | action (str): A string a action 84 | ''' 85 | UnoCard.print_cards(action, wild_color=True) 86 | -------------------------------------------------------------------------------- /rlcard/agents/pettingzoo_agents.py: -------------------------------------------------------------------------------- 1 | from rlcard.agents.nfsp_agent import NFSPAgent 2 | from rlcard.agents.dqn_agent import DQNAgent 3 | from rlcard.agents.random_agent import RandomAgent 4 | from rlcard.utils.pettingzoo_utils import wrap_state 5 | 6 | 7 | class NFSPAgentPettingZoo(NFSPAgent): 8 | def step(self, state): 9 | return super().step(wrap_state(state)) 10 | 11 | def eval_step(self, state): 12 | return super().eval_step(wrap_state(state)) 13 | 14 | def feed(self, ts): 15 | state, action, reward, next_state, done = tuple(ts) 16 | state = wrap_state(state) 17 | next_state = wrap_state(next_state) 18 | ts = (state, action, reward, next_state, done) 19 | return super().feed(ts) 20 | 21 | 22 | class DQNAgentPettingZoo(DQNAgent): 23 | def step(self, state): 24 | return super().step(wrap_state(state)) 25 | 26 | def eval_step(self, state): 27 | return super().eval_step(wrap_state(state)) 28 | 29 | def feed(self, ts): 30 | state, action, reward, next_state, done = tuple(ts) 31 | state = wrap_state(state) 32 | next_state = wrap_state(next_state) 33 | ts = (state, action, reward, next_state, done) 34 | return super().feed(ts) 35 | 36 | 37 | class RandomAgentPettingZoo(RandomAgent): 38 | def step(self, state): 39 | return super().step(wrap_state(state)) 40 | 41 | def eval_step(self, state): 42 | return super().eval_step(wrap_state(state)) 43 | -------------------------------------------------------------------------------- /rlcard/agents/random_agent.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | class RandomAgent(object): 5 | ''' A random agent. Random agents is for running toy examples on the card games 6 | ''' 7 | 8 | def __init__(self, num_actions): 9 | ''' Initilize the random agent 10 | 11 | Args: 12 | num_actions (int): The size of the ouput action space 13 | ''' 14 | self.use_raw = False 15 | self.num_actions = num_actions 16 | 17 | @staticmethod 18 | def step(state): 19 | ''' Predict the action given the curent state in gerenerating training data. 20 | 21 | Args: 22 | state (dict): An dictionary that represents the current state 23 | 24 | Returns: 25 | action (int): The action predicted (randomly chosen) by the random agent 26 | ''' 27 | return np.random.choice(list(state['legal_actions'].keys())) 28 | 29 | def eval_step(self, state): 30 | ''' Predict the action given the current state for evaluation. 31 | Since the random agents are not trained. This function is equivalent to step function 32 | 33 | Args: 34 | state (dict): An dictionary that represents the current state 35 | 36 | Returns: 37 | action (int): The action predicted (randomly chosen) by the random agent 38 | probs (list): The list of action probabilities 39 | ''' 40 | probs = [0 for _ in range(self.num_actions)] 41 | for i in state['legal_actions']: 42 | probs[i] = 1/len(state['legal_actions']) 43 | 44 | info = {} 45 | info['probs'] = {state['raw_legal_actions'][i]: probs[list(state['legal_actions'].keys())[i]] for i in range(len(state['legal_actions']))} 46 | 47 | return self.step(state), info 48 | -------------------------------------------------------------------------------- /rlcard/envs/__init__.py: -------------------------------------------------------------------------------- 1 | ''' Register new environments 2 | ''' 3 | from rlcard.envs.env import Env 4 | from rlcard.envs.registration import register, make 5 | 6 | register( 7 | env_id='blackjack', 8 | entry_point='rlcard.envs.blackjack:BlackjackEnv', 9 | ) 10 | 11 | register( 12 | env_id='doudizhu', 13 | entry_point='rlcard.envs.doudizhu:DoudizhuEnv', 14 | ) 15 | 16 | register( 17 | env_id='limit-holdem', 18 | entry_point='rlcard.envs.limitholdem:LimitholdemEnv', 19 | ) 20 | 21 | register( 22 | env_id='no-limit-holdem', 23 | entry_point='rlcard.envs.nolimitholdem:NolimitholdemEnv', 24 | ) 25 | 26 | register( 27 | env_id='leduc-holdem', 28 | entry_point='rlcard.envs.leducholdem:LeducholdemEnv' 29 | ) 30 | 31 | register( 32 | env_id='uno', 33 | entry_point='rlcard.envs.uno:UnoEnv', 34 | ) 35 | 36 | register( 37 | env_id='mahjong', 38 | entry_point='rlcard.envs.mahjong:MahjongEnv', 39 | ) 40 | 41 | register( 42 | env_id='gin-rummy', 43 | entry_point='rlcard.envs.gin_rummy:GinRummyEnv', 44 | ) 45 | 46 | register( 47 | env_id='bridge', 48 | entry_point='rlcard.envs.bridge:BridgeEnv', 49 | ) 50 | -------------------------------------------------------------------------------- /rlcard/envs/registration.py: -------------------------------------------------------------------------------- 1 | import importlib 2 | 3 | # Default Config 4 | DEFAULT_CONFIG = { 5 | 'allow_step_back': False, 6 | 'seed': None, 7 | } 8 | 9 | class EnvSpec(object): 10 | ''' A specification for a particular instance of the environment. 11 | ''' 12 | 13 | def __init__(self, env_id, entry_point=None): 14 | ''' Initilize 15 | 16 | Args: 17 | env_id (string): The name of the environent 18 | entry_point (string): A string the indicates the location of the envronment class 19 | ''' 20 | self.env_id = env_id 21 | mod_name, class_name = entry_point.split(':') 22 | self._entry_point = getattr(importlib.import_module(mod_name), class_name) 23 | 24 | def make(self, config=DEFAULT_CONFIG): 25 | ''' Instantiates an instance of the environment 26 | 27 | Returns: 28 | env (Env): An instance of the environemnt 29 | config (dict): A dictionary of the environment settings 30 | ''' 31 | env = self._entry_point(config) 32 | return env 33 | 34 | class EnvRegistry(object): 35 | ''' Register an environment (game) by ID 36 | ''' 37 | 38 | def __init__(self): 39 | ''' Initilize 40 | ''' 41 | self.env_specs = {} 42 | 43 | def register(self, env_id, entry_point): 44 | ''' Register an environment 45 | 46 | Args: 47 | env_id (string): The name of the environent 48 | entry_point (string): A string the indicates the location of the envronment class 49 | ''' 50 | if env_id in self.env_specs: 51 | raise ValueError('Cannot re-register env_id: {}'.format(env_id)) 52 | self.env_specs[env_id] = EnvSpec(env_id, entry_point) 53 | 54 | def make(self, env_id, config=DEFAULT_CONFIG): 55 | ''' Create and environment instance 56 | 57 | Args: 58 | env_id (string): The name of the environment 59 | config (dict): A dictionary of the environment settings 60 | ''' 61 | if env_id not in self.env_specs: 62 | raise ValueError('Cannot find env_id: {}'.format(env_id)) 63 | return self.env_specs[env_id].make(config) 64 | 65 | # Have a global registry 66 | registry = EnvRegistry() 67 | 68 | def register(env_id, entry_point): 69 | ''' Register an environment 70 | 71 | Args: 72 | env_id (string): The name of the environent 73 | entry_point (string): A string the indicates the location of the envronment class 74 | ''' 75 | return registry.register(env_id, entry_point) 76 | 77 | def make(env_id, config={}): 78 | ''' Create and environment instance 79 | 80 | Args: 81 | env_id (string): The name of the environment 82 | config (dict): A dictionary of the environment settings 83 | env_num (int): The number of environments 84 | ''' 85 | _config = DEFAULT_CONFIG.copy() 86 | for key in config: 87 | _config[key] = config[key] 88 | 89 | return registry.make(env_id, _config) 90 | -------------------------------------------------------------------------------- /rlcard/envs/uno.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from collections import OrderedDict 3 | 4 | from rlcard.envs import Env 5 | from rlcard.games.uno import Game 6 | from rlcard.games.uno.utils import encode_hand, encode_target 7 | from rlcard.games.uno.utils import ACTION_SPACE, ACTION_LIST 8 | from rlcard.games.uno.utils import cards2list 9 | 10 | DEFAULT_GAME_CONFIG = { 11 | 'game_num_players': 2, 12 | } 13 | 14 | class UnoEnv(Env): 15 | 16 | def __init__(self, config): 17 | self.name = 'uno' 18 | self.default_game_config = DEFAULT_GAME_CONFIG 19 | self.game = Game() 20 | super().__init__(config) 21 | self.state_shape = [[4, 4, 15] for _ in range(self.num_players)] 22 | self.action_shape = [None for _ in range(self.num_players)] 23 | 24 | def _extract_state(self, state): 25 | obs = np.zeros((4, 4, 15), dtype=int) 26 | encode_hand(obs[:3], state['hand']) 27 | encode_target(obs[3], state['target']) 28 | legal_action_id = self._get_legal_actions() 29 | extracted_state = {'obs': obs, 'legal_actions': legal_action_id} 30 | extracted_state['raw_obs'] = state 31 | extracted_state['raw_legal_actions'] = [a for a in state['legal_actions']] 32 | extracted_state['action_record'] = self.action_recorder 33 | return extracted_state 34 | 35 | def get_payoffs(self): 36 | 37 | return np.array(self.game.get_payoffs()) 38 | 39 | def _decode_action(self, action_id): 40 | legal_ids = self._get_legal_actions() 41 | if action_id in legal_ids: 42 | return ACTION_LIST[action_id] 43 | # if (len(self.game.dealer.deck) + len(self.game.round.played_cards)) > 17: 44 | # return ACTION_LIST[60] 45 | return ACTION_LIST[np.random.choice(legal_ids)] 46 | 47 | def _get_legal_actions(self): 48 | legal_actions = self.game.get_legal_actions() 49 | legal_ids = {ACTION_SPACE[action]: None for action in legal_actions} 50 | return OrderedDict(legal_ids) 51 | 52 | def get_perfect_information(self): 53 | ''' Get the perfect information of the current state 54 | 55 | Returns: 56 | (dict): A dictionary of all the perfect information of the current state 57 | ''' 58 | state = {} 59 | state['num_players'] = self.num_players 60 | state['hand_cards'] = [cards2list(player.hand) 61 | for player in self.game.players] 62 | state['played_cards'] = cards2list(self.game.round.played_cards) 63 | state['target'] = self.game.round.target.str 64 | state['current_player'] = self.game.round.current_player 65 | state['legal_actions'] = self.game.round.get_legal_actions( 66 | self.game.players, state['current_player']) 67 | return state 68 | -------------------------------------------------------------------------------- /rlcard/games/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datamllab/rlcard/d7d0a957baf4cc7225a50522adb0164bf130a9d0/rlcard/games/__init__.py -------------------------------------------------------------------------------- /rlcard/games/base.py: -------------------------------------------------------------------------------- 1 | ''' Game-related base classes 2 | ''' 3 | class Card: 4 | ''' 5 | Card stores the suit and rank of a single card 6 | 7 | Note: 8 | The suit variable in a standard card game should be one of [S, H, D, C, BJ, RJ] meaning [Spades, Hearts, Diamonds, Clubs, Black Joker, Red Joker] 9 | Similarly the rank variable should be one of [A, 2, 3, 4, 5, 6, 7, 8, 9, T, J, Q, K] 10 | ''' 11 | suit = None 12 | rank = None 13 | valid_suit = ['S', 'H', 'D', 'C', 'BJ', 'RJ'] 14 | valid_rank = ['A', '2', '3', '4', '5', '6', '7', '8', '9', 'T', 'J', 'Q', 'K'] 15 | 16 | def __init__(self, suit, rank): 17 | ''' Initialize the suit and rank of a card 18 | 19 | Args: 20 | suit: string, suit of the card, should be one of valid_suit 21 | rank: string, rank of the card, should be one of valid_rank 22 | ''' 23 | self.suit = suit 24 | self.rank = rank 25 | 26 | def __eq__(self, other): 27 | if isinstance(other, Card): 28 | return self.rank == other.rank and self.suit == other.suit 29 | else: 30 | # don't attempt to compare against unrelated types 31 | return NotImplemented 32 | 33 | def __hash__(self): 34 | suit_index = Card.valid_suit.index(self.suit) 35 | rank_index = Card.valid_rank.index(self.rank) 36 | return rank_index + 100 * suit_index 37 | 38 | def __str__(self): 39 | ''' Get string representation of a card. 40 | 41 | Returns: 42 | string: the combination of rank and suit of a card. Eg: AS, 5H, JD, 3C, ... 43 | ''' 44 | return self.rank + self.suit 45 | 46 | def get_index(self): 47 | ''' Get index of a card. 48 | 49 | Returns: 50 | string: the combination of suit and rank of a card. Eg: 1S, 2H, AD, BJ, RJ... 51 | ''' 52 | return self.suit+self.rank 53 | -------------------------------------------------------------------------------- /rlcard/games/blackjack/__init__.py: -------------------------------------------------------------------------------- 1 | from rlcard.games.blackjack.dealer import BlackjackDealer as Dealer 2 | from rlcard.games.blackjack.judger import BlackjackJudger as Judger 3 | from rlcard.games.blackjack.player import BlackjackPlayer as Player 4 | from rlcard.games.blackjack.game import BlackjackGame as Game 5 | 6 | -------------------------------------------------------------------------------- /rlcard/games/blackjack/dealer.py: -------------------------------------------------------------------------------- 1 | from rlcard.utils import init_standard_deck 2 | import numpy as np 3 | 4 | class BlackjackDealer: 5 | 6 | def __init__(self, np_random, num_decks=1): 7 | ''' Initialize a Blackjack dealer class 8 | ''' 9 | self.np_random = np_random 10 | self.num_decks = num_decks 11 | self.deck = init_standard_deck() 12 | if self.num_decks not in [0, 1]: # 0 indicates infinite decks of cards 13 | self.deck = self.deck * self.num_decks # copy m standard decks of cards 14 | self.shuffle() 15 | self.hand = [] 16 | self.status = 'alive' 17 | self.score = 0 18 | 19 | def shuffle(self): 20 | ''' Shuffle the deck 21 | ''' 22 | shuffle_deck = np.array(self.deck) 23 | self.np_random.shuffle(shuffle_deck) 24 | self.deck = list(shuffle_deck) 25 | 26 | def deal_card(self, player): 27 | ''' Distribute one card to the player 28 | 29 | Args: 30 | player_id (int): the target player's id 31 | ''' 32 | idx = self.np_random.choice(len(self.deck)) 33 | card = self.deck[idx] 34 | if self.num_decks != 0: # If infinite decks, do not pop card from deck 35 | self.deck.pop(idx) 36 | # card = self.deck.pop() 37 | player.hand.append(card) 38 | -------------------------------------------------------------------------------- /rlcard/games/blackjack/judger.py: -------------------------------------------------------------------------------- 1 | 2 | class BlackjackJudger: 3 | def __init__(self, np_random): 4 | ''' Initialize a BlackJack judger class 5 | ''' 6 | self.np_random = np_random 7 | self.rank2score = {"A":11, "2":2, "3":3, "4":4, "5":5, "6":6, "7":7, "8":8, "9":9, "T":10, "J":10, "Q":10, "K":10} 8 | 9 | def judge_round(self, player): 10 | ''' Judge the target player's status 11 | 12 | Args: 13 | player (int): target player's id 14 | 15 | Returns: 16 | status (str): the status of the target player 17 | score (int): the current score of the player 18 | ''' 19 | score = self.judge_score(player.hand) 20 | if score <= 21: 21 | return "alive", score 22 | else: 23 | return "bust", score 24 | 25 | def judge_game(self, game, game_pointer): 26 | ''' Judge the winner of the game 27 | 28 | Args: 29 | game (class): target game class 30 | ''' 31 | ''' 32 | game.winner['dealer'] doesn't need anymore if we change code like this 33 | 34 | player bust (whether dealer bust or not) => game.winner[playerX] = -1 35 | player and dealer tie => game.winner[playerX] = 1 36 | dealer bust and player not bust => game.winner[playerX] = 2 37 | player get higher score than dealer => game.winner[playerX] = 2 38 | dealer get higher score than player => game.winner[playerX] = -1 39 | game.winner[playerX] = 0 => the game is still ongoing 40 | ''' 41 | 42 | if game.players[game_pointer].status == 'bust': 43 | game.winner['player' + str(game_pointer)] = -1 44 | elif game.dealer.status == 'bust': 45 | game.winner['player' + str(game_pointer)] = 2 46 | else: 47 | if game.players[game_pointer].score > game.dealer.score: 48 | game.winner['player' + str(game_pointer)] = 2 49 | elif game.players[game_pointer].score < game.dealer.score: 50 | game.winner['player' + str(game_pointer)] = -1 51 | else: 52 | game.winner['player' + str(game_pointer)] = 1 53 | 54 | def judge_score(self, cards): 55 | ''' Judge the score of a given cards set 56 | 57 | Args: 58 | cards (list): a list of cards 59 | 60 | Returns: 61 | score (int): the score of the given cards set 62 | ''' 63 | score = 0 64 | count_a = 0 65 | for card in cards: 66 | card_score = self.rank2score[card.rank] 67 | score += card_score 68 | if card.rank == 'A': 69 | count_a += 1 70 | while score > 21 and count_a > 0: 71 | count_a -= 1 72 | score -= 10 73 | return score 74 | -------------------------------------------------------------------------------- /rlcard/games/blackjack/player.py: -------------------------------------------------------------------------------- 1 | 2 | class BlackjackPlayer: 3 | 4 | def __init__(self, player_id, np_random): 5 | ''' Initialize a Blackjack player class 6 | 7 | Args: 8 | player_id (int): id for the player 9 | ''' 10 | self.np_random = np_random 11 | self.player_id = player_id 12 | self.hand = [] 13 | self.status = 'alive' 14 | self.score = 0 15 | 16 | def get_player_id(self): 17 | ''' Return player's id 18 | ''' 19 | return self.player_id 20 | -------------------------------------------------------------------------------- /rlcard/games/bridge/__init__.py: -------------------------------------------------------------------------------- 1 | from rlcard.games.bridge.game import BridgeGame as Game 2 | -------------------------------------------------------------------------------- /rlcard/games/bridge/dealer.py: -------------------------------------------------------------------------------- 1 | ''' 2 | File name: bridge/dealer.py 3 | Author: William Hale 4 | Date created: 11/25/2021 5 | ''' 6 | 7 | from typing import List 8 | 9 | from .player import BridgePlayer 10 | from .utils.bridge_card import BridgeCard 11 | 12 | 13 | class BridgeDealer: 14 | ''' Initialize a BridgeDealer dealer class 15 | ''' 16 | def __init__(self, np_random): 17 | ''' set shuffled_deck, set stock_pile 18 | ''' 19 | self.np_random = np_random 20 | self.shuffled_deck: List[BridgeCard] = BridgeCard.get_deck() # keep a copy of the shuffled cards at start of new hand 21 | self.np_random.shuffle(self.shuffled_deck) 22 | self.stock_pile: List[BridgeCard] = self.shuffled_deck.copy() 23 | 24 | def deal_cards(self, player: BridgePlayer, num: int): 25 | ''' Deal some cards from stock_pile to one player 26 | 27 | Args: 28 | player (BridgePlayer): The BridgePlayer object 29 | num (int): The number of cards to be dealt 30 | ''' 31 | for _ in range(num): 32 | player.hand.append(self.stock_pile.pop()) 33 | -------------------------------------------------------------------------------- /rlcard/games/bridge/game.py: -------------------------------------------------------------------------------- 1 | ''' 2 | File name: bridge/game.py 3 | Author: William Hale 4 | Date created: 11/25/2021 5 | ''' 6 | 7 | from typing import List 8 | 9 | import numpy as np 10 | 11 | from .judger import BridgeJudger 12 | from .round import BridgeRound 13 | from .utils.action_event import ActionEvent, CallActionEvent, PlayCardAction 14 | 15 | 16 | class BridgeGame: 17 | ''' Game class. This class will interact with outer environment. 18 | ''' 19 | 20 | def __init__(self, allow_step_back=False): 21 | '''Initialize the class BridgeGame 22 | ''' 23 | self.allow_step_back: bool = allow_step_back 24 | self.np_random = np.random.RandomState() 25 | self.judger: BridgeJudger = BridgeJudger(game=self) 26 | self.actions: [ActionEvent] = [] # must reset in init_game 27 | self.round: BridgeRound or None = None # must reset in init_game 28 | self.num_players: int = 4 29 | 30 | def init_game(self): 31 | ''' Initialize all characters in the game and start round 1 32 | ''' 33 | board_id = self.np_random.choice([1, 2, 3, 4]) 34 | self.actions: List[ActionEvent] = [] 35 | self.round = BridgeRound(num_players=self.num_players, board_id=board_id, np_random=self.np_random) 36 | for player_id in range(4): 37 | player = self.round.players[player_id] 38 | self.round.dealer.deal_cards(player=player, num=13) 39 | current_player_id = self.round.current_player_id 40 | state = self.get_state(player_id=current_player_id) 41 | return state, current_player_id 42 | 43 | def step(self, action: ActionEvent): 44 | ''' Perform game action and return next player number, and the state for next player 45 | ''' 46 | if isinstance(action, CallActionEvent): 47 | self.round.make_call(action=action) 48 | elif isinstance(action, PlayCardAction): 49 | self.round.play_card(action=action) 50 | else: 51 | raise Exception(f'Unknown step action={action}') 52 | self.actions.append(action) 53 | next_player_id = self.round.current_player_id 54 | next_state = self.get_state(player_id=next_player_id) 55 | return next_state, next_player_id 56 | 57 | def get_num_players(self) -> int: 58 | ''' Return the number of players in the game 59 | ''' 60 | return self.num_players 61 | 62 | @staticmethod 63 | def get_num_actions() -> int: 64 | ''' Return the number of possible actions in the game 65 | ''' 66 | return ActionEvent.get_num_actions() 67 | 68 | def get_player_id(self): 69 | ''' Return the current player that will take actions soon 70 | ''' 71 | return self.round.current_player_id 72 | 73 | def is_over(self) -> bool: 74 | ''' Return whether the current game is over 75 | ''' 76 | return self.round.is_over() 77 | 78 | def get_state(self, player_id: int): # wch: not really used 79 | ''' Get player's state 80 | 81 | Return: 82 | state (dict): The information of the state 83 | ''' 84 | state = {} 85 | if not self.is_over(): 86 | state['player_id'] = player_id 87 | state['current_player_id'] = self.round.current_player_id 88 | state['hand'] = self.round.players[player_id].hand 89 | else: 90 | state['player_id'] = player_id 91 | state['current_player_id'] = self.round.current_player_id 92 | state['hand'] = self.round.players[player_id].hand 93 | return state 94 | -------------------------------------------------------------------------------- /rlcard/games/bridge/judger.py: -------------------------------------------------------------------------------- 1 | ''' 2 | File name: bridge/judger.py 3 | Author: William Hale 4 | Date created: 11/25/2021 5 | ''' 6 | 7 | from typing import List 8 | 9 | from typing import TYPE_CHECKING 10 | if TYPE_CHECKING: 11 | from .game import BridgeGame 12 | 13 | from .utils.action_event import PlayCardAction 14 | from .utils.action_event import ActionEvent, BidAction, PassAction, DblAction, RdblAction 15 | from .utils.move import MakeBidMove, MakeDblMove, MakeRdblMove 16 | from .utils.bridge_card import BridgeCard 17 | 18 | 19 | class BridgeJudger: 20 | 21 | ''' 22 | Judger decides legal actions for current player 23 | ''' 24 | 25 | def __init__(self, game: 'BridgeGame'): 26 | ''' Initialize the class BridgeJudger 27 | :param game: BridgeGame 28 | ''' 29 | self.game: BridgeGame = game 30 | 31 | def get_legal_actions(self) -> List[ActionEvent]: 32 | """ 33 | :return: List[ActionEvent] of legal actions 34 | """ 35 | legal_actions: List[ActionEvent] = [] 36 | if not self.game.is_over(): 37 | current_player = self.game.round.get_current_player() 38 | if not self.game.round.is_bidding_over(): 39 | legal_actions.append(PassAction()) 40 | last_make_bid_move: MakeBidMove or None = None 41 | last_dbl_move: MakeDblMove or None = None 42 | last_rdbl_move: MakeRdblMove or None = None 43 | for move in reversed(self.game.round.move_sheet): 44 | if isinstance(move, MakeBidMove): 45 | last_make_bid_move = move 46 | break 47 | elif isinstance(move, MakeRdblMove): 48 | last_rdbl_move = move 49 | elif isinstance(move, MakeDblMove) and not last_rdbl_move: 50 | last_dbl_move = move 51 | first_bid_action_id = ActionEvent.first_bid_action_id 52 | next_bid_action_id = last_make_bid_move.action.action_id + 1 if last_make_bid_move else first_bid_action_id 53 | for bid_action_id in range(next_bid_action_id, first_bid_action_id + 35): 54 | action = BidAction.from_action_id(action_id=bid_action_id) 55 | legal_actions.append(action) 56 | if last_make_bid_move and last_make_bid_move.player.player_id % 2 != current_player.player_id % 2 and not last_dbl_move and not last_rdbl_move: 57 | legal_actions.append(DblAction()) 58 | if last_dbl_move and last_dbl_move.player.player_id % 2 != current_player.player_id % 2: 59 | legal_actions.append(RdblAction()) 60 | else: 61 | trick_moves = self.game.round.get_trick_moves() 62 | hand = self.game.round.players[current_player.player_id].hand 63 | legal_cards = hand 64 | if trick_moves and len(trick_moves) < 4: 65 | led_card: BridgeCard = trick_moves[0].card 66 | cards_of_led_suit = [card for card in hand if card.suit == led_card.suit] 67 | if cards_of_led_suit: 68 | legal_cards = cards_of_led_suit 69 | for card in legal_cards: 70 | action = PlayCardAction(card=card) 71 | legal_actions.append(action) 72 | return legal_actions 73 | -------------------------------------------------------------------------------- /rlcard/games/bridge/player.py: -------------------------------------------------------------------------------- 1 | ''' 2 | File name: bridge/player.py 3 | Author: William Hale 4 | Date created: 11/25/2021 5 | ''' 6 | 7 | from typing import List 8 | 9 | from .utils.bridge_card import BridgeCard 10 | 11 | 12 | class BridgePlayer: 13 | 14 | def __init__(self, player_id: int, np_random): 15 | ''' Initialize a BridgePlayer player class 16 | 17 | Args: 18 | player_id (int): id for the player 19 | ''' 20 | if player_id < 0 or player_id > 3: 21 | raise Exception(f'BridgePlayer has invalid player_id: {player_id}') 22 | self.np_random = np_random 23 | self.player_id: int = player_id 24 | self.hand: List[BridgeCard] = [] 25 | 26 | def remove_card_from_hand(self, card: BridgeCard): 27 | self.hand.remove(card) 28 | 29 | def __str__(self): 30 | return ['N', 'E', 'S', 'W'][self.player_id] 31 | -------------------------------------------------------------------------------- /rlcard/games/bridge/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datamllab/rlcard/d7d0a957baf4cc7225a50522adb0164bf130a9d0/rlcard/games/bridge/utils/__init__.py -------------------------------------------------------------------------------- /rlcard/games/bridge/utils/bridge_card.py: -------------------------------------------------------------------------------- 1 | ''' 2 | File name: bridge/utils/bridge_card.py 3 | Author: William Hale 4 | Date created: 11/25/2021 5 | ''' 6 | 7 | from rlcard.games.base import Card 8 | 9 | 10 | class BridgeCard(Card): 11 | 12 | suits = ['C', 'D', 'H', 'S'] 13 | ranks = ['2', '3', '4', '5', '6', '7', '8', '9', 'T', 'J', 'Q', 'K', 'A'] 14 | 15 | @staticmethod 16 | def card(card_id: int): 17 | return _deck[card_id] 18 | 19 | @staticmethod 20 | def get_deck() -> [Card]: 21 | return _deck.copy() 22 | 23 | def __init__(self, suit: str, rank: str): 24 | super().__init__(suit=suit, rank=rank) 25 | suit_index = BridgeCard.suits.index(self.suit) 26 | rank_index = BridgeCard.ranks.index(self.rank) 27 | self.card_id = 13 * suit_index + rank_index 28 | 29 | def __str__(self): 30 | return f'{self.rank}{self.suit}' 31 | 32 | def __repr__(self): 33 | return f'{self.rank}{self.suit}' 34 | 35 | 36 | # deck is always in order from 2C, ... KC, AC, 2D, ... KD, AD, 2H, ... KH, AH, 2S, ... KS, AS 37 | _deck = [BridgeCard(suit=suit, rank=rank) for suit in BridgeCard.suits for rank in BridgeCard.ranks] # want this to be read-only 38 | -------------------------------------------------------------------------------- /rlcard/games/bridge/utils/move.py: -------------------------------------------------------------------------------- 1 | ''' 2 | File name: bridge/utils/move.py 3 | Author: William Hale 4 | Date created: 11/25/2021 5 | ''' 6 | 7 | # 8 | # These classes are used to keep a move_sheet history of the moves in a round. 9 | # 10 | 11 | from .action_event import ActionEvent, BidAction, PassAction, DblAction, RdblAction, PlayCardAction 12 | from .bridge_card import BridgeCard 13 | 14 | from ..player import BridgePlayer 15 | 16 | 17 | class BridgeMove(object): # Interface 18 | pass 19 | 20 | 21 | class PlayerMove(BridgeMove): # Interface 22 | 23 | def __init__(self, player: BridgePlayer, action: ActionEvent): 24 | super().__init__() 25 | self.player = player 26 | self.action = action 27 | 28 | 29 | class CallMove(PlayerMove): # Interface 30 | 31 | def __init__(self, player: BridgePlayer, action: ActionEvent): 32 | super().__init__(player=player, action=action) 33 | 34 | 35 | class DealHandMove(BridgeMove): 36 | 37 | def __init__(self, dealer: BridgePlayer, shuffled_deck: [BridgeCard]): 38 | super().__init__() 39 | self.dealer = dealer 40 | self.shuffled_deck = shuffled_deck 41 | 42 | def __str__(self): 43 | shuffled_deck_text = " ".join([str(card) for card in self.shuffled_deck]) 44 | return f'{self.dealer} deal shuffled_deck=[{shuffled_deck_text}]' 45 | 46 | 47 | class MakePassMove(CallMove): 48 | 49 | def __init__(self, player: BridgePlayer): 50 | super().__init__(player=player, action=PassAction()) 51 | 52 | def __str__(self): 53 | return f'{self.player} {self.action}' 54 | 55 | 56 | class MakeDblMove(CallMove): 57 | 58 | def __init__(self, player: BridgePlayer): 59 | super().__init__(player=player, action=DblAction()) 60 | 61 | def __str__(self): 62 | return f'{self.player} {self.action}' 63 | 64 | 65 | class MakeRdblMove(CallMove): 66 | 67 | def __init__(self, player: BridgePlayer): 68 | super().__init__(player=player, action=RdblAction()) 69 | 70 | def __str__(self): 71 | return f'{self.player} {self.action}' 72 | 73 | 74 | class MakeBidMove(CallMove): 75 | 76 | def __init__(self, player: BridgePlayer, bid_action: BidAction): 77 | super().__init__(player=player, action=bid_action) 78 | self.action = bid_action # Note: keep type as BidAction rather than ActionEvent 79 | 80 | def __str__(self): 81 | return f'{self.player} bids {self.action}' 82 | 83 | 84 | class PlayCardMove(PlayerMove): 85 | 86 | def __init__(self, player: BridgePlayer, action: PlayCardAction): 87 | super().__init__(player=player, action=action) 88 | self.action = action # Note: keep type as PlayCardAction rather than ActionEvent 89 | 90 | @property 91 | def card(self): 92 | return self.action.card 93 | 94 | def __str__(self): 95 | return f'{self.player} plays {self.action}' 96 | -------------------------------------------------------------------------------- /rlcard/games/bridge/utils/tray.py: -------------------------------------------------------------------------------- 1 | ''' 2 | File name: bridge/utils/tray.py 3 | Author: William Hale 4 | Date created: 11/28/2021 5 | ''' 6 | 7 | 8 | class Tray(object): 9 | 10 | def __init__(self, board_id: int): 11 | if board_id <= 0: 12 | raise Exception(f'Tray: invalid board_id={board_id}') 13 | self.board_id = board_id 14 | 15 | @property 16 | def dealer_id(self): 17 | return (self.board_id - 1) % 4 18 | 19 | @property 20 | def vul(self): 21 | vul_none = [0, 0, 0, 0] 22 | vul_n_s = [1, 0, 1, 0] 23 | vul_e_w = [0, 1, 0, 1] 24 | vul_all = [1, 1, 1, 1] 25 | basic_vuls = [vul_none, vul_n_s, vul_e_w, vul_all] 26 | offset = (self.board_id - 1) // 4 27 | return basic_vuls[(self.board_id - 1 + offset) % 4] 28 | 29 | def __str__(self): 30 | return f'{self.board_id}: dealer_id={self.dealer_id} vul={self.vul}' 31 | -------------------------------------------------------------------------------- /rlcard/games/bridge/utils/utils.py: -------------------------------------------------------------------------------- 1 | ''' 2 | File name: bridge/utils/utils.py 3 | Author: William Hale 4 | Date created: 11/26/2021 5 | ''' 6 | 7 | from typing import List 8 | 9 | import numpy as np 10 | 11 | from .bridge_card import BridgeCard 12 | 13 | 14 | def encode_cards(cards: List[BridgeCard]) -> np.ndarray: # Note: not used ?? 15 | plane = np.zeros(52, dtype=int) 16 | for card in cards: 17 | plane[card.card_id] = 1 18 | return plane 19 | -------------------------------------------------------------------------------- /rlcard/games/doudizhu/__init__.py: -------------------------------------------------------------------------------- 1 | from rlcard.games.doudizhu.dealer import DoudizhuDealer as Dealer 2 | from rlcard.games.doudizhu.judger import DoudizhuJudger as Judger 3 | from rlcard.games.doudizhu.player import DoudizhuPlayer as Player 4 | from rlcard.games.doudizhu.round import DoudizhuRound as Round 5 | from rlcard.games.doudizhu.game import DoudizhuGame as Game 6 | 7 | -------------------------------------------------------------------------------- /rlcard/games/doudizhu/dealer.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | ''' Implement Doudizhu Dealer class 3 | ''' 4 | import functools 5 | 6 | from rlcard.utils import init_54_deck 7 | from rlcard.games.doudizhu.utils import cards2str, doudizhu_sort_card 8 | 9 | class DoudizhuDealer: 10 | ''' Dealer will shuffle, deal cards, and determine players' roles 11 | ''' 12 | def __init__(self, np_random): 13 | '''Give dealer the deck 14 | 15 | Notes: 16 | 1. deck with 54 cards including black joker and red joker 17 | ''' 18 | self.np_random = np_random 19 | self.deck = init_54_deck() 20 | self.deck.sort(key=functools.cmp_to_key(doudizhu_sort_card)) 21 | self.landlord = None 22 | 23 | def shuffle(self): 24 | ''' Randomly shuffle the deck 25 | ''' 26 | self.np_random.shuffle(self.deck) 27 | 28 | def deal_cards(self, players): 29 | ''' Deal cards to players 30 | 31 | Args: 32 | players (list): list of DoudizhuPlayer objects 33 | ''' 34 | hand_num = (len(self.deck) - 3) // len(players) 35 | for index, player in enumerate(players): 36 | current_hand = self.deck[index*hand_num:(index+1)*hand_num] 37 | current_hand.sort(key=functools.cmp_to_key(doudizhu_sort_card)) 38 | player.set_current_hand(current_hand) 39 | player.initial_hand = cards2str(player.current_hand) 40 | 41 | def determine_role(self, players): 42 | ''' Determine landlord and peasants according to players' hand 43 | 44 | Args: 45 | players (list): list of DoudizhuPlayer objects 46 | 47 | Returns: 48 | int: landlord's player_id 49 | ''' 50 | # deal cards 51 | self.shuffle() 52 | self.deal_cards(players) 53 | players[0].role = 'landlord' 54 | self.landlord = players[0] 55 | players[1].role = 'peasant' 56 | players[2].role = 'peasant' 57 | #players[0].role = 'peasant' 58 | #self.landlord = players[0] 59 | 60 | ## determine 'landlord' 61 | #max_score = get_landlord_score( 62 | # cards2str(self.landlord.current_hand)) 63 | #for player in players[1:]: 64 | # player.role = 'peasant' 65 | # score = get_landlord_score( 66 | # cards2str(player.current_hand)) 67 | # if score > max_score: 68 | # max_score = score 69 | # self.landlord = player 70 | #self.landlord.role = 'landlord' 71 | 72 | # give the 'landlord' the three cards 73 | self.landlord.current_hand.extend(self.deck[-3:]) 74 | self.landlord.current_hand.sort(key=functools.cmp_to_key(doudizhu_sort_card)) 75 | self.landlord.initial_hand = cards2str(self.landlord.current_hand) 76 | return self.landlord.player_id 77 | -------------------------------------------------------------------------------- /rlcard/games/doudizhu/jsondata.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datamllab/rlcard/d7d0a957baf4cc7225a50522adb0164bf130a9d0/rlcard/games/doudizhu/jsondata.zip -------------------------------------------------------------------------------- /rlcard/games/gin_rummy/__init__.py: -------------------------------------------------------------------------------- 1 | from rlcard.games.gin_rummy.game import GinRummyGame as Game 2 | -------------------------------------------------------------------------------- /rlcard/games/gin_rummy/dealer.py: -------------------------------------------------------------------------------- 1 | ''' 2 | File name: gin_rummy/dealer.py 3 | Author: William Hale 4 | Date created: 2/12/2020 5 | ''' 6 | 7 | from .player import GinRummyPlayer 8 | from .utils import utils as utils 9 | 10 | 11 | class GinRummyDealer: 12 | ''' Initialize a GinRummy dealer class 13 | ''' 14 | def __init__(self, np_random): 15 | ''' Empty discard_pile, set shuffled_deck, set stock_pile 16 | ''' 17 | self.np_random = np_random 18 | self.discard_pile = [] # type: List[Card] 19 | self.shuffled_deck = utils.get_deck() # keep a copy of the shuffled cards at start of new hand 20 | self.np_random.shuffle(self.shuffled_deck) 21 | self.stock_pile = self.shuffled_deck.copy() # type: List[Card] 22 | 23 | def deal_cards(self, player: GinRummyPlayer, num: int): 24 | ''' Deal some cards from stock_pile to one player 25 | 26 | Args: 27 | player (GinRummyPlayer): The GinRummyPlayer object 28 | num (int): The number of cards to be dealt 29 | ''' 30 | for _ in range(num): 31 | player.hand.append(self.stock_pile.pop()) 32 | player.did_populate_hand() 33 | -------------------------------------------------------------------------------- /rlcard/games/gin_rummy/utils/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /rlcard/games/gin_rummy/utils/gin_rummy_error.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Project: Gin Rummy 3 | File name: gin_rummy/utils/gin_rummy_error.py 4 | Author: William Hale 5 | Date created: 4/29/2020 6 | ''' 7 | 8 | 9 | class GinRummyError(Exception): 10 | pass 11 | 12 | 13 | class GinRummyProgramError(GinRummyError): 14 | pass 15 | -------------------------------------------------------------------------------- /rlcard/games/gin_rummy/utils/scorers.py: -------------------------------------------------------------------------------- 1 | ''' 2 | File name: gin_rummy/scorers.py 3 | Author: William Hale 4 | Date created: 2/15/2020 5 | ''' 6 | 7 | from typing import TYPE_CHECKING 8 | if TYPE_CHECKING: 9 | from ..game import GinRummyGame 10 | 11 | from typing import Callable 12 | 13 | from .action_event import * 14 | from ..player import GinRummyPlayer 15 | from .move import ScoreNorthMove, ScoreSouthMove 16 | from .gin_rummy_error import GinRummyProgramError 17 | 18 | from rlcard.games.gin_rummy.utils import melding 19 | from rlcard.games.gin_rummy.utils import utils 20 | 21 | 22 | class GinRummyScorer: 23 | 24 | def __init__(self, name: str = None, get_payoff: Callable[[GinRummyPlayer, 'GinRummyGame'], int or float] = None): 25 | self.name = name if name is not None else "GinRummyScorer" 26 | self.get_payoff = get_payoff if get_payoff else get_payoff_gin_rummy_v1 27 | 28 | def get_payoffs(self, game: 'GinRummyGame'): 29 | payoffs = [0, 0] 30 | for i in range(2): 31 | player = game.round.players[i] 32 | payoff = self.get_payoff(player=player, game=game) 33 | payoffs[i] = payoff 34 | return payoffs 35 | 36 | 37 | def get_payoff_gin_rummy_v0(player: GinRummyPlayer, game: 'GinRummyGame') -> int: 38 | ''' Get the payoff of player: deadwood_count of player 39 | 40 | Returns: 41 | payoff (int or float): payoff for player (lower is better) 42 | ''' 43 | moves = game.round.move_sheet 44 | if player.player_id == 0: 45 | score_player_move = moves[-2] 46 | if not isinstance(score_player_move, ScoreNorthMove): 47 | raise GinRummyProgramError("score_player_move must be ScoreNorthMove.") 48 | else: 49 | score_player_move = moves[-1] 50 | if not isinstance(score_player_move, ScoreSouthMove): 51 | raise GinRummyProgramError("score_player_move must be ScoreSouthMove.") 52 | deadwood_count = score_player_move.deadwood_count 53 | return deadwood_count 54 | 55 | 56 | def get_payoff_gin_rummy_v1(player: GinRummyPlayer, game: 'GinRummyGame') -> float: 57 | ''' Get the payoff of player: 58 | a) 1.0 if player gins 59 | b) 0.2 if player knocks 60 | c) -deadwood_count / 100 otherwise 61 | 62 | Returns: 63 | payoff (int or float): payoff for player (higher is better) 64 | ''' 65 | # payoff is 1.0 if player gins 66 | # payoff is 0.2 if player knocks 67 | # payoff is -deadwood_count / 100 if otherwise 68 | # The goal is to have the agent learn how to knock and gin. 69 | # The negative payoff when the agent fails to knock or gin should encourage the agent to form melds. 70 | # The payoff is scaled to lie between -1 and 1. 71 | going_out_action = game.round.going_out_action 72 | going_out_player_id = game.round.going_out_player_id 73 | if going_out_player_id == player.player_id and isinstance(going_out_action, KnockAction): 74 | payoff = 0.2 75 | elif going_out_player_id == player.player_id and isinstance(going_out_action, GinAction): 76 | payoff = 1 77 | else: 78 | hand = player.hand 79 | best_meld_clusters = melding.get_best_meld_clusters(hand=hand) 80 | best_meld_cluster = [] if not best_meld_clusters else best_meld_clusters[0] 81 | deadwood_count = utils.get_deadwood_count(hand, best_meld_cluster) 82 | payoff = -deadwood_count / 100 83 | return payoff 84 | -------------------------------------------------------------------------------- /rlcard/games/gin_rummy/utils/thinker.py: -------------------------------------------------------------------------------- 1 | ''' 2 | File name: gin_rummy/thinker.py 3 | Author: William Hale 4 | Date created: 3/28/2020 5 | ''' 6 | 7 | from typing import List 8 | 9 | from rlcard.games.base import Card 10 | 11 | from . import melding 12 | from . import utils 13 | 14 | 15 | class Thinker(object): 16 | 17 | def __init__(self, hand: List[Card]): 18 | self.hand = hand 19 | 20 | # simple thinking 21 | def get_meld_piles_with_discard_card(self, discard_card: Card) -> List[List[Card]]: 22 | next_hand = self.hand + [discard_card] 23 | meld_clusters = melding.get_meld_clusters(hand=next_hand) 24 | best_deadwood_count = 999 25 | best_deadwoods = [] # type: List[List[Card]] 26 | best_meld_clusters = [] # type: List[List[List[Card]]] 27 | for meld_cluster in meld_clusters: 28 | meld_cards = [card for meld_pile in meld_cluster for card in meld_pile] 29 | deadwood = [card for card in next_hand if card not in meld_cards] 30 | deadwood_count = self._get_deadwood_count(deadwood=deadwood) 31 | if deadwood_count < best_deadwood_count: 32 | best_deadwood_count = deadwood_count 33 | best_deadwoods = [deadwood] 34 | best_meld_clusters = [meld_cluster] 35 | elif deadwood_count == best_deadwood_count: 36 | best_deadwoods.append(deadwood) 37 | best_meld_clusters.append(meld_cluster) 38 | want_discard_card = False 39 | for deadwood in best_deadwoods: 40 | if discard_card in deadwood: 41 | want_discard_card = False 42 | break 43 | else: 44 | want_discard_card = True 45 | result = [] # type: List[List[Card]] 46 | if want_discard_card: 47 | for meld_cluster in best_meld_clusters: 48 | for meld_pile in meld_cluster: 49 | if discard_card in meld_pile: 50 | result.append(meld_pile) 51 | return result 52 | 53 | @staticmethod 54 | def _get_deadwood_count(deadwood: List[Card]) -> int: 55 | deadwood_values = [utils.get_deadwood_value(card) for card in deadwood] 56 | return sum(deadwood_values) 57 | -------------------------------------------------------------------------------- /rlcard/games/leducholdem/__init__.py: -------------------------------------------------------------------------------- 1 | from rlcard.games.leducholdem.dealer import LeducholdemDealer as Dealer 2 | from rlcard.games.leducholdem.judger import LeducholdemJudger as Judger 3 | from rlcard.games.leducholdem.player import LeducholdemPlayer as Player 4 | from rlcard.games.leducholdem.round import LeducholdemRound as Round 5 | from rlcard.games.leducholdem.game import LeducholdemGame as Game 6 | 7 | -------------------------------------------------------------------------------- /rlcard/games/leducholdem/card2index.json: -------------------------------------------------------------------------------- 1 | {"SJ": 0, "SQ": 1, "SK": 2, "HJ": 0, "HQ": 1, "HK": 2} 2 | -------------------------------------------------------------------------------- /rlcard/games/leducholdem/dealer.py: -------------------------------------------------------------------------------- 1 | from rlcard.games.base import Card 2 | from rlcard.games.limitholdem import Dealer 3 | 4 | class LeducholdemDealer(Dealer): 5 | 6 | def __init__(self, np_random): 7 | ''' Initialize a leducholdem dealer class 8 | ''' 9 | self.np_random = np_random 10 | self.deck = [Card('S', 'J'), Card('H', 'J'), Card('S', 'Q'), Card('H', 'Q'), Card('S', 'K'), Card('H', 'K')] 11 | self.shuffle() 12 | self.pot = 0 13 | -------------------------------------------------------------------------------- /rlcard/games/leducholdem/judger.py: -------------------------------------------------------------------------------- 1 | from rlcard.utils.utils import rank2int 2 | 3 | class LeducholdemJudger: 4 | ''' The Judger class for Leduc Hold'em 5 | ''' 6 | def __init__(self, np_random): 7 | ''' Initialize a judger class 8 | ''' 9 | self.np_random = np_random 10 | 11 | @staticmethod 12 | def judge_game(players, public_card): 13 | ''' Judge the winner of the game. 14 | 15 | Args: 16 | players (list): The list of players who play the game 17 | public_card (object): The public card that seen by all the players 18 | 19 | Returns: 20 | (list): Each entry of the list corresponds to one entry of the 21 | ''' 22 | # Judge who are the winners 23 | winners = [0] * len(players) 24 | fold_count = 0 25 | ranks = [] 26 | # If every player folds except one, the alive player is the winner 27 | for idx, player in enumerate(players): 28 | ranks.append(rank2int(player.hand.rank)) 29 | if player.status == 'folded': 30 | fold_count += 1 31 | elif player.status == 'alive': 32 | alive_idx = idx 33 | if fold_count == (len(players) - 1): 34 | winners[alive_idx] = 1 35 | 36 | # If any of the players matches the public card wins 37 | if sum(winners) < 1: 38 | for idx, player in enumerate(players): 39 | if player.hand.rank == public_card.rank: 40 | winners[idx] = 1 41 | break 42 | 43 | # If non of the above conditions, the winner player is the one with the highest card rank 44 | if sum(winners) < 1: 45 | max_rank = max(ranks) 46 | max_index = [i for i, j in enumerate(ranks) if j == max_rank] 47 | for idx in max_index: 48 | winners[idx] = 1 49 | 50 | # Compute the total chips 51 | total = 0 52 | for p in players: 53 | total += p.in_chips 54 | 55 | each_win = float(total) / sum(winners) 56 | 57 | payoffs = [] 58 | for i, _ in enumerate(players): 59 | if winners[i] == 1: 60 | payoffs.append(each_win - players[i].in_chips) 61 | else: 62 | payoffs.append(float(-players[i].in_chips)) 63 | 64 | return payoffs 65 | -------------------------------------------------------------------------------- /rlcard/games/leducholdem/player.py: -------------------------------------------------------------------------------- 1 | class LeducholdemPlayer: 2 | 3 | def __init__(self, player_id, np_random): 4 | ''' Initilize a player. 5 | 6 | Args: 7 | player_id (int): The id of the player 8 | ''' 9 | self.np_random = np_random 10 | self.player_id = player_id 11 | self.status = 'alive' 12 | self.hand = None 13 | 14 | # The chips that this player has put in until now 15 | self.in_chips = 0 16 | 17 | def get_state(self, public_card, all_chips, legal_actions): 18 | ''' Encode the state for the player 19 | 20 | Args: 21 | public_card (object): The public card that seen by all the players 22 | all_chips (int): The chips that all players have put in 23 | 24 | Returns: 25 | (dict): The state of the player 26 | ''' 27 | state = {} 28 | state['hand'] = self.hand.get_index() 29 | state['public_card'] = public_card.get_index() if public_card else None 30 | state['all_chips'] = all_chips 31 | state['my_chips'] = self.in_chips 32 | state['legal_actions'] = legal_actions 33 | return state 34 | 35 | def get_player_id(self): 36 | ''' Return the id of the player 37 | ''' 38 | return self.player_id 39 | -------------------------------------------------------------------------------- /rlcard/games/leducholdem/round.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | ''' Implement Leduc Hold'em Round class 3 | ''' 4 | 5 | from rlcard.games.limitholdem import Round 6 | 7 | class LeducholdemRound(Round): 8 | ''' Round can call other Classes' functions to keep the game running 9 | ''' 10 | 11 | def __init__(self, raise_amount, allowed_raise_num, num_players, np_random): 12 | ''' Initilize the round class 13 | 14 | Args: 15 | raise_amount (int): the raise amount for each raise 16 | allowed_raise_num (int): The number of allowed raise num 17 | num_players (int): The number of players 18 | ''' 19 | super(LeducholdemRound, self).__init__(raise_amount, allowed_raise_num, num_players, np_random=np_random) 20 | -------------------------------------------------------------------------------- /rlcard/games/limitholdem/__init__.py: -------------------------------------------------------------------------------- 1 | from rlcard.games.limitholdem.dealer import LimitHoldemDealer as Dealer 2 | from rlcard.games.limitholdem.judger import LimitHoldemJudger as Judger 3 | from rlcard.games.limitholdem.player import LimitHoldemPlayer as Player 4 | from rlcard.games.limitholdem.player import PlayerStatus 5 | from rlcard.games.limitholdem.round import LimitHoldemRound as Round 6 | from rlcard.games.limitholdem.game import LimitHoldemGame as Game 7 | 8 | -------------------------------------------------------------------------------- /rlcard/games/limitholdem/card2index.json: -------------------------------------------------------------------------------- 1 | {"SA": 0, "S2": 1, "S3": 2, "S4": 3, "S5": 4, "S6": 5, "S7": 6, "S8": 7, "S9": 8, "ST": 9, "SJ": 10, "SQ": 11, "SK": 12, "HA": 13, "H2": 14, "H3": 15, "H4": 16, "H5": 17, "H6": 18, "H7": 19, "H8": 20, "H9": 21, "HT": 22, "HJ": 23, "HQ": 24, "HK": 25, "DA": 26, "D2": 27, "D3": 28, "D4": 29, "D5": 30, "D6": 31, "D7": 32, "D8": 33, "D9": 34, "DT": 35, "DJ": 36, "DQ": 37, "DK": 38, "CA": 39, "C2": 40, "C3": 41, "C4": 42, "C5": 43, "C6": 44, "C7": 45, "C8": 46, "C9": 47, "CT": 48, "CJ": 49, "CQ": 50, "CK": 51} 2 | -------------------------------------------------------------------------------- /rlcard/games/limitholdem/dealer.py: -------------------------------------------------------------------------------- 1 | from rlcard.utils.utils import init_standard_deck 2 | 3 | 4 | class LimitHoldemDealer: 5 | def __init__(self, np_random): 6 | self.np_random = np_random 7 | self.deck = init_standard_deck() 8 | self.shuffle() 9 | self.pot = 0 10 | 11 | def shuffle(self): 12 | self.np_random.shuffle(self.deck) 13 | 14 | def deal_card(self): 15 | """ 16 | Deal one card from the deck 17 | 18 | Returns: 19 | (Card): The drawn card from the deck 20 | """ 21 | return self.deck.pop() 22 | -------------------------------------------------------------------------------- /rlcard/games/limitholdem/player.py: -------------------------------------------------------------------------------- 1 | from enum import Enum 2 | 3 | 4 | class PlayerStatus(Enum): 5 | ALIVE = 0 6 | FOLDED = 1 7 | ALLIN = 2 8 | 9 | 10 | class LimitHoldemPlayer: 11 | 12 | def __init__(self, player_id, np_random): 13 | """ 14 | Initialize a player. 15 | 16 | Args: 17 | player_id (int): The id of the player 18 | """ 19 | self.np_random = np_random 20 | self.player_id = player_id 21 | self.hand = [] 22 | self.status = PlayerStatus.ALIVE 23 | 24 | # The chips that this player has put in until now 25 | self.in_chips = 0 26 | 27 | def get_state(self, public_cards, all_chips, legal_actions): 28 | """ 29 | Encode the state for the player 30 | 31 | Args: 32 | public_cards (list): A list of public cards that seen by all the players 33 | all_chips (int): The chips that all players have put in 34 | 35 | Returns: 36 | (dict): The state of the player 37 | """ 38 | return { 39 | 'hand': [c.get_index() for c in self.hand], 40 | 'public_cards': [c.get_index() for c in public_cards], 41 | 'all_chips': all_chips, 42 | 'my_chips': self.in_chips, 43 | 'legal_actions': legal_actions 44 | } 45 | 46 | def get_player_id(self): 47 | return self.player_id 48 | -------------------------------------------------------------------------------- /rlcard/games/mahjong/__init__.py: -------------------------------------------------------------------------------- 1 | from rlcard.games.mahjong.dealer import MahjongDealer as Dealer 2 | from rlcard.games.mahjong.card import MahjongCard as Card 3 | from rlcard.games.mahjong.player import MahjongPlayer as Player 4 | from rlcard.games.mahjong.judger import MahjongJudger as Judger 5 | from rlcard.games.mahjong.round import MahjongRound as Round 6 | from rlcard.games.mahjong.game import MahjongGame as Game 7 | 8 | -------------------------------------------------------------------------------- /rlcard/games/mahjong/card.py: -------------------------------------------------------------------------------- 1 | 2 | class MahjongCard: 3 | 4 | info = {'type': ['dots', 'bamboo', 'characters', 'dragons', 'winds'], 5 | 'trait': ['1', '2', '3', '4', '5', '6', '7', '8', '9', 'green', 'red', 'white', 'east', 'west', 'north', 'south'] 6 | } 7 | 8 | def __init__(self, card_type, trait): 9 | ''' Initialize the class of MahjongCard 10 | 11 | Args: 12 | card_type (str): The type of card 13 | trait (str): The trait of card 14 | ''' 15 | self.type = card_type 16 | self.trait = trait 17 | self.index_num = 0 18 | 19 | def get_str(self): 20 | ''' Get the string representation of card 21 | 22 | Return: 23 | (str): The string of card's color and trait 24 | ''' 25 | return self.type+ '-'+ self.trait 26 | 27 | def set_index_num(self, index_num): 28 | 29 | self.index_num = index_num 30 | 31 | 32 | -------------------------------------------------------------------------------- /rlcard/games/mahjong/dealer.py: -------------------------------------------------------------------------------- 1 | from rlcard.games.mahjong.utils import init_deck 2 | 3 | 4 | class MahjongDealer: 5 | ''' Initialize a mahjong dealer class 6 | ''' 7 | def __init__(self, np_random): 8 | self.np_random = np_random 9 | self.deck = init_deck() 10 | self.shuffle() 11 | self.table = [] 12 | 13 | def shuffle(self): 14 | ''' Shuffle the deck 15 | ''' 16 | self.np_random.shuffle(self.deck) 17 | 18 | def deal_cards(self, player, num): 19 | ''' Deal some cards from deck to one player 20 | 21 | Args: 22 | player (object): The object of DoudizhuPlayer 23 | num (int): The number of cards to be dealed 24 | ''' 25 | for _ in range(num): 26 | player.hand.append(self.deck.pop()) 27 | 28 | 29 | ## For test 30 | #if __name__ == '__main__': 31 | # dealer = MahjongDealer() 32 | # for card in dealer.deck: 33 | # print(card.get_str()) 34 | # print(len(dealer.deck)) 35 | -------------------------------------------------------------------------------- /rlcard/games/mahjong/player.py: -------------------------------------------------------------------------------- 1 | 2 | class MahjongPlayer: 3 | 4 | def __init__(self, player_id, np_random): 5 | ''' Initilize a player. 6 | 7 | Args: 8 | player_id (int): The id of the player 9 | ''' 10 | self.np_random = np_random 11 | self.player_id = player_id 12 | self.hand = [] 13 | self.pile = [] 14 | 15 | def get_player_id(self): 16 | ''' Return the id of the player 17 | ''' 18 | 19 | return self.player_id 20 | 21 | def print_hand(self): 22 | ''' Print the cards in hand in string. 23 | ''' 24 | print([c.get_str() for c in self.hand]) 25 | 26 | def print_pile(self): 27 | ''' Print the cards in pile of the player in string. 28 | ''' 29 | print([[c.get_str() for c in s]for s in self.pile]) 30 | 31 | def play_card(self, dealer, card): 32 | ''' Play one card 33 | Args: 34 | dealer (object): Dealer 35 | Card (object): The card to be play. 36 | ''' 37 | card = self.hand.pop(self.hand.index(card)) 38 | dealer.table.append(card) 39 | 40 | def chow(self, dealer, cards): 41 | ''' Perform Chow 42 | Args: 43 | dealer (object): Dealer 44 | Cards (object): The cards to be Chow. 45 | ''' 46 | last_card = dealer.table.pop(-1) 47 | for card in cards: 48 | if card in self.hand and card != last_card: 49 | self.hand.pop(self.hand.index(card)) 50 | self.pile.append(cards) 51 | 52 | def gong(self, dealer, cards): 53 | ''' Perform Gong 54 | Args: 55 | dealer (object): Dealer 56 | Cards (object): The cards to be Gong. 57 | ''' 58 | for card in cards: 59 | if card in self.hand: 60 | self.hand.pop(self.hand.index(card)) 61 | self.pile.append(cards) 62 | 63 | def pong(self, dealer, cards): 64 | ''' Perform Pong 65 | Args: 66 | dealer (object): Dealer 67 | Cards (object): The cards to be Pong. 68 | ''' 69 | for card in cards: 70 | if card in self.hand: 71 | self.hand.pop(self.hand.index(card)) 72 | self.pile.append(cards) 73 | -------------------------------------------------------------------------------- /rlcard/games/mahjong/utils.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from rlcard.games.mahjong.card import MahjongCard as Card 3 | 4 | 5 | card_encoding_dict = {} 6 | num = 0 7 | for _type in ['bamboo', 'characters', 'dots']: 8 | for _trait in ['1', '2', '3', '4', '5', '6', '7', '8', '9']: 9 | card = _type+"-"+_trait 10 | card_encoding_dict[card] = num 11 | num += 1 12 | for _trait in ['green', 'red', 'white']: 13 | card = 'dragons-'+_trait 14 | card_encoding_dict[card] = num 15 | num += 1 16 | 17 | for _trait in ['east', 'west', 'north', 'south']: 18 | card = 'winds-'+_trait 19 | card_encoding_dict[card] = num 20 | num += 1 21 | card_encoding_dict['pong'] = num 22 | card_encoding_dict['chow'] = num + 1 23 | card_encoding_dict['gong'] = num + 2 24 | card_encoding_dict['stand'] = num + 3 25 | 26 | card_decoding_dict = {card_encoding_dict[key]: key for key in card_encoding_dict.keys()} 27 | 28 | def init_deck(): 29 | deck = [] 30 | info = Card.info 31 | for _type in info['type']: 32 | index_num = 0 33 | if _type != 'dragons' and _type != 'winds': 34 | for _trait in info['trait'][:9]: 35 | card = Card(_type, _trait) 36 | card.set_index_num(index_num) 37 | index_num = index_num + 1 38 | deck.append(card) 39 | elif _type == 'dragons': 40 | for _trait in info['trait'][9:12]: 41 | card = Card(_type, _trait) 42 | card.set_index_num(index_num) 43 | index_num = index_num + 1 44 | deck.append(card) 45 | else: 46 | for _trait in info['trait'][12:]: 47 | card = Card(_type, _trait) 48 | card.set_index_num(index_num) 49 | index_num = index_num + 1 50 | deck.append(card) 51 | deck = deck * 4 52 | return deck 53 | 54 | 55 | def pile2list(pile): 56 | cards_list = [] 57 | for each in pile: 58 | cards_list.extend(each) 59 | return cards_list 60 | 61 | def cards2list(cards): 62 | cards_list = [] 63 | for each in cards: 64 | cards_list.append(each.get_str()) 65 | return cards_list 66 | 67 | 68 | def encode_cards(cards): 69 | plane = np.zeros((34,4), dtype=int) 70 | cards = cards2list(cards) 71 | for card in list(set(cards)): 72 | index = card_encoding_dict[card] 73 | num = cards.count(card) 74 | plane[index][:num] = 1 75 | return plane 76 | -------------------------------------------------------------------------------- /rlcard/games/nolimitholdem/__init__.py: -------------------------------------------------------------------------------- 1 | from rlcard.games.nolimitholdem.dealer import NolimitholdemDealer as Dealer 2 | from rlcard.games.nolimitholdem.judger import NolimitholdemJudger as Judger 3 | from rlcard.games.nolimitholdem.player import NolimitholdemPlayer as Player 4 | from rlcard.games.nolimitholdem.round import Action 5 | from rlcard.games.nolimitholdem.round import NolimitholdemRound as Round 6 | from rlcard.games.nolimitholdem.game import NolimitholdemGame as Game 7 | 8 | -------------------------------------------------------------------------------- /rlcard/games/nolimitholdem/dealer.py: -------------------------------------------------------------------------------- 1 | from rlcard.games.limitholdem import Dealer 2 | 3 | 4 | class NolimitholdemDealer(Dealer): 5 | pass 6 | -------------------------------------------------------------------------------- /rlcard/games/nolimitholdem/judger.py: -------------------------------------------------------------------------------- 1 | from rlcard.games.limitholdem import Judger 2 | 3 | 4 | class NolimitholdemJudger(Judger): 5 | pass 6 | -------------------------------------------------------------------------------- /rlcard/games/nolimitholdem/player.py: -------------------------------------------------------------------------------- 1 | from rlcard.games.limitholdem import Player 2 | 3 | 4 | class NolimitholdemPlayer(Player): 5 | def __init__(self, player_id, init_chips, np_random): 6 | """ 7 | Initialize a player. 8 | 9 | Args: 10 | player_id (int): The id of the player 11 | init_chips (int): The number of chips the player has initially 12 | """ 13 | super().__init__(player_id, np_random) 14 | self.remained_chips = init_chips 15 | 16 | def bet(self, chips): 17 | quantity = chips if chips <= self.remained_chips else self.remained_chips 18 | self.in_chips += quantity 19 | self.remained_chips -= quantity 20 | -------------------------------------------------------------------------------- /rlcard/games/uno/__init__.py: -------------------------------------------------------------------------------- 1 | from rlcard.games.uno.dealer import UnoDealer as Dealer 2 | from rlcard.games.uno.judger import UnoJudger as Judger 3 | from rlcard.games.uno.player import UnoPlayer as Player 4 | from rlcard.games.uno.round import UnoRound as Round 5 | from rlcard.games.uno.game import UnoGame as Game 6 | 7 | -------------------------------------------------------------------------------- /rlcard/games/uno/card.py: -------------------------------------------------------------------------------- 1 | from termcolor import colored 2 | 3 | class UnoCard: 4 | 5 | info = {'type': ['number', 'action', 'wild'], 6 | 'color': ['r', 'g', 'b', 'y'], 7 | 'trait': ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 8 | 'skip', 'reverse', 'draw_2', 'wild', 'wild_draw_4'] 9 | } 10 | 11 | def __init__(self, card_type, color, trait): 12 | ''' Initialize the class of UnoCard 13 | 14 | Args: 15 | card_type (str): The type of card 16 | color (str): The color of card 17 | trait (str): The trait of card 18 | ''' 19 | self.type = card_type 20 | self.color = color 21 | self.trait = trait 22 | self.str = self.get_str() 23 | 24 | def get_str(self): 25 | ''' Get the string representation of card 26 | 27 | Return: 28 | (str): The string of card's color and trait 29 | ''' 30 | return self.color + '-' + self.trait 31 | 32 | 33 | @staticmethod 34 | def print_cards(cards, wild_color=False): 35 | ''' Print out card in a nice form 36 | 37 | Args: 38 | card (str or list): The string form or a list of a UNO card 39 | wild_color (boolean): True if assign collor to wild cards 40 | ''' 41 | if isinstance(cards, str): 42 | cards = [cards] 43 | for i, card in enumerate(cards): 44 | if card == 'draw': 45 | trait = 'Draw' 46 | else: 47 | color, trait = card.split('-') 48 | if trait == 'skip': 49 | trait = 'Skip' 50 | elif trait == 'reverse': 51 | trait = 'Reverse' 52 | elif trait == 'draw_2': 53 | trait = 'Draw-2' 54 | elif trait == 'wild': 55 | trait = 'Wild' 56 | elif trait == 'wild_draw_4': 57 | trait = 'Wild-Draw-4' 58 | 59 | if trait == 'Draw' or (trait[:4] == 'Wild' and not wild_color): 60 | print(trait, end='') 61 | elif color == 'r': 62 | print(colored(trait, 'red'), end='') 63 | elif color == 'g': 64 | print(colored(trait, 'green'), end='') 65 | elif color == 'b': 66 | print(colored(trait, 'blue'), end='') 67 | elif color == 'y': 68 | print(colored(trait, 'yellow'), end='') 69 | 70 | if i < len(cards) - 1: 71 | print(', ', end='') 72 | -------------------------------------------------------------------------------- /rlcard/games/uno/dealer.py: -------------------------------------------------------------------------------- 1 | 2 | from rlcard.games.uno.utils import init_deck 3 | 4 | 5 | class UnoDealer: 6 | ''' Initialize a uno dealer class 7 | ''' 8 | def __init__(self, np_random): 9 | self.np_random = np_random 10 | self.deck = init_deck() 11 | self.shuffle() 12 | 13 | def shuffle(self): 14 | ''' Shuffle the deck 15 | ''' 16 | self.np_random.shuffle(self.deck) 17 | 18 | def deal_cards(self, player, num): 19 | ''' Deal some cards from deck to one player 20 | 21 | Args: 22 | player (object): The object of DoudizhuPlayer 23 | num (int): The number of cards to be dealed 24 | ''' 25 | for _ in range(num): 26 | player.hand.append(self.deck.pop()) 27 | 28 | def flip_top_card(self): 29 | ''' Flip top card when a new game starts 30 | 31 | Returns: 32 | (object): The object of UnoCard at the top of the deck 33 | ''' 34 | top_card = self.deck.pop() 35 | while top_card.trait == 'wild_draw_4': 36 | self.deck.append(top_card) 37 | self.shuffle() 38 | top_card = self.deck.pop() 39 | return top_card 40 | -------------------------------------------------------------------------------- /rlcard/games/uno/jsondata/action_space.json: -------------------------------------------------------------------------------- 1 | {"r-0": 0, "r-1": 1, "r-2": 2, "r-3": 3, "r-4": 4, "r-5": 5, "r-6": 6, "r-7": 7, "r-8": 8, "r-9": 9, "r-skip": 10, "r-reverse": 11, "r-draw_2": 12, "r-wild": 13, "r-wild_draw_4": 14, "g-0": 15, "g-1": 16, "g-2": 17, "g-3": 18, "g-4": 19, "g-5": 20, "g-6": 21, "g-7": 22, "g-8": 23, "g-9": 24, "g-skip": 25, "g-reverse": 26, "g-draw_2": 27, "g-wild": 28, "g-wild_draw_4": 29, "b-0": 30, "b-1": 31, "b-2": 32, "b-3": 33, "b-4": 34, "b-5": 35, "b-6": 36, "b-7": 37, "b-8": 38, "b-9": 39, "b-skip": 40, "b-reverse": 41, "b-draw_2": 42, "b-wild": 43, "b-wild_draw_4": 44, "y-0": 45, "y-1": 46, "y-2": 47, "y-3": 48, "y-4": 49, "y-5": 50, "y-6": 51, "y-7": 52, "y-8": 53, "y-9": 54, "y-skip": 55, "y-reverse": 56, "y-draw_2": 57, "y-wild": 58, "y-wild_draw_4": 59, "draw": 60} -------------------------------------------------------------------------------- /rlcard/games/uno/judger.py: -------------------------------------------------------------------------------- 1 | 2 | class UnoJudger: 3 | 4 | @staticmethod 5 | def judge_winner(players, np_random): 6 | ''' Judge the winner of the game 7 | 8 | Args: 9 | players (list): The list of players who play the game 10 | 11 | Returns: 12 | (list): The player id of the winner 13 | ''' 14 | self.np_random = np_random 15 | count_1 = len(players[0].hand) 16 | count_2 = len(players[1].hand) 17 | if count_1 == count_2: 18 | return [0, 1] 19 | if count_1 < count_2: 20 | return [0] 21 | return [1] 22 | -------------------------------------------------------------------------------- /rlcard/games/uno/player.py: -------------------------------------------------------------------------------- 1 | 2 | class UnoPlayer: 3 | 4 | def __init__(self, player_id, np_random): 5 | ''' Initilize a player. 6 | 7 | Args: 8 | player_id (int): The id of the player 9 | ''' 10 | self.np_random = np_random 11 | self.player_id = player_id 12 | self.hand = [] 13 | self.stack = [] 14 | 15 | def get_player_id(self): 16 | ''' Return the id of the player 17 | ''' 18 | 19 | return self.player_id 20 | -------------------------------------------------------------------------------- /rlcard/models/__init__.py: -------------------------------------------------------------------------------- 1 | ''' Register rule-based models or pre-trianed models 2 | ''' 3 | from rlcard.models.registration import register, load 4 | 5 | register( 6 | model_id = 'leduc-holdem-cfr', 7 | entry_point='rlcard.models.pretrained_models:LeducHoldemCFRModel') 8 | 9 | register( 10 | model_id = 'leduc-holdem-rule-v1', 11 | entry_point='rlcard.models.leducholdem_rule_models:LeducHoldemRuleModelV1') 12 | 13 | register( 14 | model_id = 'leduc-holdem-rule-v2', 15 | entry_point='rlcard.models.leducholdem_rule_models:LeducHoldemRuleModelV2') 16 | 17 | register( 18 | model_id = 'uno-rule-v1', 19 | entry_point='rlcard.models.uno_rule_models:UNORuleModelV1') 20 | 21 | register( 22 | model_id = 'limit-holdem-rule-v1', 23 | entry_point='rlcard.models.limitholdem_rule_models:LimitholdemRuleModelV1') 24 | 25 | register( 26 | model_id = 'doudizhu-rule-v1', 27 | entry_point='rlcard.models.doudizhu_rule_models:DouDizhuRuleModelV1') 28 | 29 | register( 30 | model_id='gin-rummy-novice-rule', 31 | entry_point='rlcard.models.gin_rummy_rule_models:GinRummyNoviceRuleModel') 32 | -------------------------------------------------------------------------------- /rlcard/models/bridge_rule_models.py: -------------------------------------------------------------------------------- 1 | ''' 2 | File name: models/bridge_rule_models.py 3 | Author: William Hale 4 | Date created: 11/27/2021 5 | 6 | Bridge rule models 7 | ''' 8 | 9 | import numpy as np 10 | 11 | from rlcard.games.bridge.utils.action_event import ActionEvent 12 | 13 | 14 | class BridgeDefenderNoviceRuleAgent(object): 15 | ''' 16 | Agent always passes during bidding 17 | ''' 18 | 19 | def __init__(self): 20 | self.use_raw = False 21 | 22 | @staticmethod 23 | def step(state) -> int: 24 | ''' Predict the action given the current state. 25 | Defender Novice strategy: 26 | Case during make call: 27 | Always choose PassAction. 28 | Case during play card: 29 | Choose a random action. 30 | 31 | Args: 32 | state (numpy.array): an numpy array that represents the current state 33 | 34 | Returns: 35 | action_id (int): the action_id predicted 36 | ''' 37 | legal_action_ids = state['raw_legal_actions'] 38 | if ActionEvent.pass_action_id in legal_action_ids: 39 | selected_action_id = ActionEvent.pass_action_id 40 | else: 41 | selected_action_id = np.random.choice(legal_action_ids) 42 | return selected_action_id 43 | 44 | def eval_step(self, state): 45 | ''' Predict the action given the current state for evaluation. 46 | Since the agents is not trained, this function is equivalent to step function. 47 | 48 | Args: 49 | state (numpy.array): an numpy array that represents the current state 50 | 51 | Returns: 52 | action_id (int): the action_id predicted by the agent 53 | probabilities (list): The list of action probabilities 54 | ''' 55 | probabilities = [] 56 | return self.step(state), probabilities 57 | -------------------------------------------------------------------------------- /rlcard/models/model.py: -------------------------------------------------------------------------------- 1 | 2 | class Model(object): 3 | ''' The base model class 4 | ''' 5 | 6 | def __init__(self): 7 | ''' Load the model here 8 | ''' 9 | pass 10 | 11 | @property 12 | def agents(self): 13 | ''' Get a list of agents for each position in a the game 14 | 15 | Returns: 16 | agents (list): A list of agents 17 | 18 | Note: Each agent should be just like RL agent with step and eval_step 19 | functioning well. 20 | ''' 21 | raise NotImplementedError 22 | -------------------------------------------------------------------------------- /rlcard/models/pretrained/leduc_holdem_cfr/average_policy.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datamllab/rlcard/d7d0a957baf4cc7225a50522adb0164bf130a9d0/rlcard/models/pretrained/leduc_holdem_cfr/average_policy.pkl -------------------------------------------------------------------------------- /rlcard/models/pretrained/leduc_holdem_cfr/iteration.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datamllab/rlcard/d7d0a957baf4cc7225a50522adb0164bf130a9d0/rlcard/models/pretrained/leduc_holdem_cfr/iteration.pkl -------------------------------------------------------------------------------- /rlcard/models/pretrained/leduc_holdem_cfr/policy.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datamllab/rlcard/d7d0a957baf4cc7225a50522adb0164bf130a9d0/rlcard/models/pretrained/leduc_holdem_cfr/policy.pkl -------------------------------------------------------------------------------- /rlcard/models/pretrained/leduc_holdem_cfr/regrets.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datamllab/rlcard/d7d0a957baf4cc7225a50522adb0164bf130a9d0/rlcard/models/pretrained/leduc_holdem_cfr/regrets.pkl -------------------------------------------------------------------------------- /rlcard/models/pretrained_models.py: -------------------------------------------------------------------------------- 1 | ''' Wrrapers of pretrained models. 2 | ''' 3 | import os 4 | 5 | import rlcard 6 | from rlcard.agents import CFRAgent 7 | from rlcard.models.model import Model 8 | 9 | # Root path of pretrianed models 10 | ROOT_PATH = os.path.join(rlcard.__path__[0], 'models/pretrained') 11 | 12 | class LeducHoldemCFRModel(Model): 13 | ''' A pretrained model on Leduc Holdem with CFR (chance sampling) 14 | ''' 15 | def __init__(self): 16 | ''' Load pretrained model 17 | ''' 18 | env = rlcard.make('leduc-holdem') 19 | self.agent = CFRAgent(env, model_path=os.path.join(ROOT_PATH, 'leduc_holdem_cfr')) 20 | self.agent.load() 21 | @property 22 | def agents(self): 23 | ''' Get a list of agents for each position in a the game 24 | 25 | Returns: 26 | agents (list): A list of agents 27 | 28 | Note: Each agent should be just like RL agent with step and eval_step 29 | functioning well. 30 | ''' 31 | return [self.agent, self.agent] 32 | 33 | -------------------------------------------------------------------------------- /rlcard/models/registration.py: -------------------------------------------------------------------------------- 1 | import importlib 2 | 3 | class ModelSpec(object): 4 | ''' A specification for a particular Model. 5 | ''' 6 | def __init__(self, model_id, entry_point=None): 7 | ''' Initilize 8 | 9 | Args: 10 | model_id (string): the name of the model 11 | entry_point (string): a string that indicates the location of the model class 12 | ''' 13 | self.model_id = model_id 14 | mod_name, class_name = entry_point.split(':') 15 | self._entry_point = getattr(importlib.import_module(mod_name), class_name) 16 | 17 | def load(self): 18 | ''' Instantiates an instance of the model 19 | 20 | Returns: 21 | Model (Model): an instance of the Model 22 | ''' 23 | model = self._entry_point() 24 | return model 25 | 26 | 27 | class ModelRegistry(object): 28 | ''' Register a model by ID 29 | ''' 30 | 31 | def __init__(self): 32 | ''' Initilize 33 | ''' 34 | self.model_specs = {} 35 | 36 | def register(self, model_id, entry_point): 37 | ''' Register an model 38 | 39 | Args: 40 | model_id (string): the name of the model 41 | entry_point (string): a string the indicates the location of the model class 42 | ''' 43 | if model_id in self.model_specs: 44 | raise ValueError('Cannot re-register model_id: {}'.format(model_id)) 45 | self.model_specs[model_id] = ModelSpec(model_id, entry_point) 46 | 47 | def load(self, model_id): 48 | ''' Create a model instance 49 | 50 | Args: 51 | model_id (string): the name of the model 52 | ''' 53 | if model_id not in self.model_specs: 54 | raise ValueError('Cannot find model_id: {}'.format(model_id)) 55 | return self.model_specs[model_id].load() 56 | 57 | # Have a global registry 58 | model_registry = ModelRegistry() 59 | 60 | 61 | def register(model_id, entry_point): 62 | ''' Register a model 63 | 64 | Args: 65 | model_id (string): the name of the model 66 | entry_point (string): a string the indicates the location of the model class 67 | ''' 68 | return model_registry.register(model_id, entry_point) 69 | 70 | def load(model_id): 71 | ''' Create and model instance 72 | 73 | Args: 74 | model_id (string): the name of the model 75 | ''' 76 | return model_registry.load(model_id) 77 | -------------------------------------------------------------------------------- /rlcard/utils/__init__.py: -------------------------------------------------------------------------------- 1 | from rlcard.utils.logger import Logger 2 | from rlcard.utils import seeding 3 | from rlcard.utils.utils import * 4 | from rlcard.utils.pettingzoo_utils import * 5 | -------------------------------------------------------------------------------- /rlcard/utils/logger.py: -------------------------------------------------------------------------------- 1 | import os 2 | import csv 3 | 4 | class Logger(object): 5 | ''' Logger saves the running results and helps make plots from the results 6 | ''' 7 | 8 | def __init__(self, log_dir): 9 | ''' Initialize the labels, legend and paths of the plot and log file. 10 | 11 | Args: 12 | log_path (str): The path the log files 13 | ''' 14 | self.log_dir = log_dir 15 | 16 | def __enter__(self): 17 | self.txt_path = os.path.join(self.log_dir, 'log.txt') 18 | self.csv_path = os.path.join(self.log_dir, 'performance.csv') 19 | self.fig_path = os.path.join(self.log_dir, 'fig.png') 20 | 21 | if not os.path.exists(self.log_dir): 22 | os.makedirs(self.log_dir) 23 | 24 | self.txt_file = open(self.txt_path, 'w') 25 | self.csv_file = open(self.csv_path, 'w') 26 | fieldnames = ['episode', 'reward'] 27 | self.writer = csv.DictWriter(self.csv_file, fieldnames=fieldnames) 28 | self.writer.writeheader() 29 | 30 | return self 31 | 32 | def log(self, text): 33 | ''' Write the text to log file then print it. 34 | Args: 35 | text(string): text to log 36 | ''' 37 | self.txt_file.write(text+'\n') 38 | self.txt_file.flush() 39 | print(text) 40 | 41 | def log_performance(self, episode, reward): 42 | ''' Log a point in the curve 43 | Args: 44 | episode (int): the episode of the current point 45 | reward (float): the reward of the current point 46 | ''' 47 | self.writer.writerow({'episode': episode, 'reward': reward}) 48 | print('') 49 | self.log('----------------------------------------') 50 | self.log(' episode | ' + str(episode)) 51 | self.log(' reward | ' + str(reward)) 52 | self.log('----------------------------------------') 53 | 54 | def __exit__(self, type, value, traceback): 55 | if self.txt_path is not None: 56 | self.txt_file.close() 57 | if self.csv_path is not None: 58 | self.csv_file.close() 59 | print('\nLogs saved in', self.log_dir) 60 | -------------------------------------------------------------------------------- /rlcard/utils/pettingzoo_utils.py: -------------------------------------------------------------------------------- 1 | from collections import defaultdict 2 | import numpy as np 3 | 4 | 5 | def wrap_state(state): 6 | # check if obs is already wrapped 7 | if "obs" in state and "legal_actions" in state and "raw_legal_actions" in state: 8 | return state 9 | 10 | wrapped_state = {} 11 | wrapped_state["obs"] = state["observation"] 12 | legal_actions = np.flatnonzero(state["action_mask"]) 13 | # the values of legal_actions isn't available so setting them to None 14 | wrapped_state["legal_actions"] = {l: None for l in legal_actions} 15 | # raw_legal_actions isn't available so setting it to legal actions 16 | wrapped_state["raw_legal_actions"] = list(wrapped_state["legal_actions"].keys()) 17 | return wrapped_state 18 | 19 | 20 | def run_game_pettingzoo(env, agents, is_training=False): 21 | env.reset() 22 | trajectories = defaultdict(list) 23 | for agent_name in env.agent_iter(): 24 | obs, reward, done, _, _ = env.last() 25 | trajectories[agent_name].append((obs, reward, done)) 26 | 27 | if done: 28 | action = None 29 | else: 30 | if is_training: 31 | action = agents[agent_name].step(obs) 32 | else: 33 | action, _ = agents[agent_name].eval_step(obs) 34 | trajectories[agent_name].append(action) 35 | 36 | env.step(action) 37 | return trajectories 38 | 39 | 40 | def reorganize_pettingzoo(trajectories): 41 | ''' Reorganize the trajectory to make it RL friendly 42 | 43 | Args: 44 | trajectory (list): A list of trajectories 45 | 46 | Returns: 47 | (list): A new trajectories that can be fed into RL algorithms. 48 | 49 | ''' 50 | new_trajectories = defaultdict(list) 51 | for agent_name, trajectory in trajectories.items(): 52 | for i in range(0, len(trajectory)-2, 2): 53 | transition = [ 54 | trajectory[i][0], # obs, 55 | trajectory[i+1], # action 56 | trajectory[i+2][1], # reward 57 | trajectory[i+2][0], # next_obs 58 | trajectory[i+2][2], # done 59 | ] 60 | new_trajectories[agent_name].append(transition) 61 | return new_trajectories 62 | 63 | 64 | def tournament_pettingzoo(env, agents, num_episodes): 65 | total_rewards = defaultdict(float) 66 | for _ in range(num_episodes): 67 | trajectories = run_game_pettingzoo(env, agents) 68 | trajectories = reorganize_pettingzoo(trajectories) 69 | for agent_name, trajectory in trajectories.items(): 70 | reward = sum([t[2] for t in trajectory]) 71 | total_rewards[agent_name] += reward 72 | return {k: v / num_episodes for (k, v) in total_rewards.items()} 73 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | import setuptools 2 | 3 | with open("README.md", "r", encoding="utf8") as fh: 4 | long_description = fh.read() 5 | 6 | extras = { 7 | 'torch': ['torch', 'GitPython', 'gitdb2', 'matplotlib'], 8 | } 9 | 10 | def _get_version(): 11 | with open('rlcard/__init__.py') as f: 12 | for line in f: 13 | if line.startswith('__version__'): 14 | g = {} 15 | exec(line, g) 16 | return g['__version__'] 17 | raise ValueError('`__version__` not defined') 18 | 19 | VERSION = _get_version() 20 | 21 | setuptools.setup( 22 | name="rlcard", 23 | version=VERSION, 24 | author="Data Analytics at Texas A&M (DATA) Lab", 25 | author_email="daochen.zha@tamu.edu", 26 | description="A Toolkit for Reinforcement Learning in Card Games", 27 | long_description=long_description, 28 | long_description_content_type="text/markdown", 29 | url="https://github.com/datamllab/rlcard", 30 | keywords=["Reinforcement Learning", "game", "RL", "AI"], 31 | packages=setuptools.find_packages(exclude=('tests',)), 32 | package_data={ 33 | 'rlcard': ['models/pretrained/leduc_holdem_cfr/*', 34 | 'games/uno/jsondata/action_space.json', 35 | 'games/limitholdem/card2index.json', 36 | 'games/leducholdem/card2index.json', 37 | 'games/doudizhu/jsondata.zip', 38 | 'games/uno/jsondata/*', 39 | ]}, 40 | install_requires=[ 41 | 'numpy>=1.16.3', 42 | 'termcolor' 43 | ], 44 | extras_require=extras, 45 | requires_python='>=3.7', 46 | classifiers=[ 47 | "Programming Language :: Python :: 3.11", 48 | "Programming Language :: Python :: 3.10", 49 | "Programming Language :: Python :: 3.9", 50 | "Programming Language :: Python :: 3.8", 51 | "Programming Language :: Python :: 3.7", 52 | "License :: OSI Approved :: MIT License", 53 | "Operating System :: OS Independent", 54 | ], 55 | ) 56 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datamllab/rlcard/d7d0a957baf4cc7225a50522adb0164bf130a9d0/tests/__init__.py -------------------------------------------------------------------------------- /tests/agents/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datamllab/rlcard/d7d0a957baf4cc7225a50522adb0164bf130a9d0/tests/agents/__init__.py -------------------------------------------------------------------------------- /tests/agents/test_cfr.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | import numpy as np 3 | 4 | import rlcard 5 | from rlcard.agents.cfr_agent import CFRAgent 6 | 7 | class TestNFSP(unittest.TestCase): 8 | 9 | def test_train(self): 10 | 11 | env = rlcard.make('leduc-holdem', config={'allow_step_back':True}) 12 | agent = CFRAgent(env, model_path='experiments/cfr_model') 13 | 14 | for _ in range(100): 15 | agent.train() 16 | 17 | state = {'obs': np.array([1., 1., 0., 0., 0., 0.]), 'legal_actions': {0: None,2: None}, 'raw_legal_actions': ['call', 'fold']} 18 | action, _ = agent.eval_step(state) 19 | 20 | self.assertIn(action, [0, 2]) 21 | 22 | def test_save_and_load(self): 23 | env = rlcard.make('leduc-holdem', config={'allow_step_back':True}) 24 | agent = CFRAgent(env, model_path='experiments/cfr_model') 25 | 26 | for _ in range(100): 27 | agent.train() 28 | 29 | agent.save() 30 | 31 | new_agent = CFRAgent(env, model_path='experiments/cfr_model') 32 | new_agent.load() 33 | self.assertEqual(len(agent.policy), len(new_agent.policy)) 34 | self.assertEqual(len(agent.average_policy), len(new_agent.average_policy)) 35 | self.assertEqual(len(agent.regrets), len(new_agent.regrets)) 36 | self.assertEqual(agent.iteration, new_agent.iteration) 37 | 38 | -------------------------------------------------------------------------------- /tests/agents/test_dqn.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | import torch 3 | import numpy as np 4 | 5 | from rlcard.agents.dqn_agent import DQNAgent 6 | 7 | class TestDQN(unittest.TestCase): 8 | 9 | def test_init(self): 10 | 11 | agent = DQNAgent(replay_memory_size=0, 12 | replay_memory_init_size=0, 13 | update_target_estimator_every=0, 14 | discount_factor=0, 15 | epsilon_start=0, 16 | epsilon_end=0, 17 | epsilon_decay_steps=0, 18 | batch_size=0, 19 | num_actions=2, 20 | state_shape=[1], 21 | mlp_layers=[10,10], 22 | device=torch.device('cpu')) 23 | 24 | self.assertEqual(agent.replay_memory_init_size, 0) 25 | self.assertEqual(agent.update_target_estimator_every, 0) 26 | self.assertEqual(agent.discount_factor, 0) 27 | self.assertEqual(agent.epsilon_decay_steps, 0) 28 | self.assertEqual(agent.batch_size, 0) 29 | self.assertEqual(agent.num_actions, 2) 30 | 31 | def test_train(self): 32 | 33 | memory_init_size = 100 34 | num_steps = 500 35 | 36 | agent = DQNAgent(replay_memory_size = 200, 37 | replay_memory_init_size=memory_init_size, 38 | update_target_estimator_every=100, 39 | state_shape=[2], 40 | mlp_layers=[10,10], 41 | device=torch.device('cpu')) 42 | 43 | predicted_action, _ = agent.eval_step({'obs': np.random.random_sample((2,)), 'legal_actions': {0: None, 1: None}, 'raw_legal_actions': ['call', 'raise']}) 44 | self.assertGreaterEqual(predicted_action, 0) 45 | self.assertLessEqual(predicted_action, 1) 46 | 47 | for _ in range(num_steps): 48 | ts = [{'obs': np.random.random_sample((2,)), 'legal_actions': {0: None, 1: None}}, np.random.randint(2), 0, {'obs': np.random.random_sample((2,)), 'legal_actions': {0: None, 1: None}, 'raw_legal_actions': ['call', 'raise']}, True] 49 | agent.feed(ts) 50 | 51 | predicted_action = agent.step({'obs': np.random.random_sample((2,)), 'legal_actions': {0: None, 1: None}}) 52 | self.assertGreaterEqual(predicted_action, 0) 53 | self.assertLessEqual(predicted_action, 1) 54 | -------------------------------------------------------------------------------- /tests/agents/test_leduc_human.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | 3 | from rlcard.agents.human_agents.leduc_holdem_human_agent import _print_state 4 | 5 | class TestLeducHuman(unittest.TestCase): 6 | 7 | def test_print_state(self): 8 | raw_state = {'my_chips': 1, 'current_player': 0, 'all_chips': [1, 1], 'public_card': None, 'hand': 'SQ', 'legal_actions': ['raise', 'fold', 'check']} 9 | action_record = [] 10 | _print_state(raw_state, action_record) 11 | 12 | -------------------------------------------------------------------------------- /tests/agents/test_nfsp.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | import torch 3 | import numpy as np 4 | 5 | from rlcard.agents.nfsp_agent import NFSPAgent 6 | 7 | class TestNFSP(unittest.TestCase): 8 | 9 | def test_init(self): 10 | 11 | agent = NFSPAgent(num_actions=10, 12 | state_shape=[10], 13 | hidden_layers_sizes=[10,10], 14 | q_mlp_layers=[10,10], 15 | device=torch.device('cpu')) 16 | 17 | self.assertEqual(agent._num_actions, 10) 18 | 19 | def test_train(self): 20 | 21 | memory_init_size = 20 22 | num_steps = 1000 23 | 24 | agent = NFSPAgent(num_actions=2, 25 | state_shape=[2], 26 | hidden_layers_sizes=[10,10], 27 | reservoir_buffer_capacity=50, 28 | batch_size=4, 29 | min_buffer_size_to_learn=memory_init_size, 30 | q_replay_memory_size=50, 31 | q_replay_memory_init_size=memory_init_size, 32 | q_batch_size=4, 33 | q_mlp_layers=[10,10], 34 | device=torch.device('cpu')) 35 | 36 | predicted_action, _ = agent.eval_step({'obs': np.random.random_sample((2,)), 'legal_actions': {0: None, 1: None}, 'raw_legal_actions': ['call', 'raise']}) 37 | self.assertGreaterEqual(predicted_action, 0) 38 | self.assertLessEqual(predicted_action, 1) 39 | 40 | for _ in range(num_steps): 41 | agent.sample_episode_policy() 42 | predicted_action = agent.step({'obs': np.random.random_sample((2,)), 'legal_actions': {0: None, 1: None}}) 43 | self.assertGreaterEqual(predicted_action, 0) 44 | self.assertLessEqual(predicted_action, 1) 45 | 46 | ts = [{'obs': np.random.random_sample((2,)), 'legal_actions': {0: None, 1: None}}, np.random.randint(2), 0, {'obs': np.random.random_sample((2,)), 'legal_actions': {0: None, 1: None}, 'raw_legal_actions': ['call', 'raise']}, True] 47 | agent.feed(ts) 48 | -------------------------------------------------------------------------------- /tests/agents/test_uno_human.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | 3 | from rlcard.agents.human_agents.uno_human_agent import _print_state, _print_action 4 | 5 | class TestLeducHuman(unittest.TestCase): 6 | 7 | def test_print_state(self): 8 | raw_state = {'target': 'r-reverse', 'current_player': 0, 'legal_actions': ['r-2', 'r-draw_2'], 'hand': ['y-skip', 'y-draw_2', 'r-2', 'b-3', 'b-6', 'g-wild_draw_4', 'r-draw_2'], 'played_cards': ['g-reverse', 'r-reverse'], 'num_players': 2, 'others_hand': ['y-4', 'g-6', 'b-reverse', 'b-5', 'b-reverse', 'r-9'], 'num_cards': [7, 6]} 9 | action_record = [] 10 | _print_state(raw_state, action_record) 11 | 12 | def test_print_action(self): 13 | _print_action('r-8') 14 | -------------------------------------------------------------------------------- /tests/envs/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datamllab/rlcard/d7d0a957baf4cc7225a50522adb0164bf130a9d0/tests/envs/__init__.py -------------------------------------------------------------------------------- /tests/envs/determism_util.py: -------------------------------------------------------------------------------- 1 | import rlcard 2 | from rlcard.agents.random_agent import RandomAgent 3 | import random 4 | import numpy as np 5 | 6 | def hash_obsevation(obs): 7 | try: 8 | val = hash(obs.tobytes()) 9 | return val 10 | except AttributeError: 11 | try: 12 | return hash(obs) 13 | except TypeError: 14 | warnings.warn("Observation not an int or an Numpy array") 15 | return 0 16 | 17 | def rand_iter(n): 18 | for x in range(n+1): 19 | random.randint(0, 1000) 20 | np.random.normal(size=100) 21 | 22 | def gather_observations(env, actions, num_rand_steps): 23 | rand_iter(num_rand_steps) 24 | state, player_id = env.reset() 25 | rand_iter(num_rand_steps) 26 | 27 | action_idx = 0 28 | observations = [] 29 | while not env.is_over() and action_idx < len(actions): 30 | # Agent plays 31 | rand_iter(num_rand_steps) 32 | legals = list(state['legal_actions'].keys()) 33 | action = legals[actions[action_idx]%len(legals)] 34 | # Environment steps 35 | next_state, next_player_id = env.step(action) 36 | # Set the state and player 37 | state = next_state 38 | player_id = next_player_id 39 | 40 | action_idx += 1 41 | # Save state. 42 | if not env.game.is_over(): 43 | observations.append(state) 44 | 45 | return observations 46 | 47 | def is_deterministic(env_name): 48 | env = rlcard.make(env_name) 49 | 50 | NUM_STEPS = 25 51 | 52 | actions = [random.randrange(env.game.get_num_actions()) for _ in range(NUM_STEPS)] 53 | base_seed = 12941 54 | hashes = [] 55 | for rand_iters in range(2): 56 | env = rlcard.make(env_name,config={'seed':base_seed}) 57 | 58 | hashes.append(hash(tuple([hash_obsevation(obs['obs']) for obs in gather_observations(env,actions,rand_iters)]))) 59 | 60 | return hashes[0] == hashes[1] 61 | -------------------------------------------------------------------------------- /tests/envs/test_blackjack_env.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | import numpy as np 3 | 4 | import rlcard 5 | from rlcard.agents.random_agent import RandomAgent 6 | from .determism_util import is_deterministic 7 | 8 | class TestBlackjackEnv(unittest.TestCase): 9 | 10 | def test_init_and_extract_state(self): 11 | env = rlcard.make('blackjack') 12 | state, _ = env.reset() 13 | for score in state['obs']: 14 | self.assertLessEqual(score, 30) 15 | 16 | def test_is_deterministic(self): 17 | self.assertTrue(is_deterministic('blackjack')) 18 | 19 | def test_decode_action(self): 20 | env = rlcard.make('blackjack') 21 | self.assertEqual(env._decode_action(0), 'hit') 22 | self.assertEqual(env._decode_action(1), 'stand') 23 | 24 | def test_get_legal_actions(self): 25 | env = rlcard.make('blackjack') 26 | actions = env._get_legal_actions() 27 | self.assertEqual(len(actions), 2) 28 | self.assertEqual(actions[0], 0) 29 | self.assertEqual(actions[1], 1) 30 | 31 | def test_get_payoffs(self): 32 | env = rlcard.make('blackjack') 33 | for _ in range(100): 34 | env.reset() 35 | while not env.is_over(): 36 | action = np.random.choice([0, 1]) 37 | env.step(action) 38 | payoffs = env.get_payoffs() 39 | for payoff in payoffs: 40 | self.assertIn(payoff, [-1, 1, 0]) 41 | 42 | def test_step_back(self): 43 | env = rlcard.make('blackjack', config={'allow_step_back':True}) 44 | _, player_id = env.reset() 45 | env.step(1) 46 | _, back_player_id = env.step_back() 47 | self.assertEqual(player_id, back_player_id) 48 | self.assertEqual(env.step_back(), False) 49 | 50 | env = rlcard.make('blackjack') 51 | with self.assertRaises(Exception): 52 | env.step_back() 53 | 54 | def test_multiplayers(self): 55 | env = rlcard.make('blackjack', config={'game_num_players':5}) 56 | num_players = env.game.get_num_players() 57 | self.assertEqual(num_players, 5) 58 | 59 | def test_run(self): 60 | env = rlcard.make('blackjack') 61 | env.set_agents([RandomAgent(env.num_actions)]) 62 | trajectories, _ = env.run(is_training=False) 63 | self.assertEqual(len(trajectories), 1) 64 | trajectories, _ = env.run(is_training=True) 65 | self.assertEqual(len(trajectories), 1) 66 | 67 | if __name__ == '__main__': 68 | unittest.main() 69 | -------------------------------------------------------------------------------- /tests/envs/test_doudizhu_env.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | 3 | import rlcard 4 | from rlcard.agents.random_agent import RandomAgent 5 | from .determism_util import is_deterministic 6 | 7 | 8 | class TestDoudizhuEnv(unittest.TestCase): 9 | 10 | def test_reset_and_extract_state(self): 11 | env = rlcard.make('doudizhu') 12 | state, _ = env.reset() 13 | self.assertEqual(state['obs'].size, 790) 14 | 15 | def test_is_deterministic(self): 16 | self.assertTrue(is_deterministic('doudizhu')) 17 | 18 | def test_get_legal_actions(self): 19 | env = rlcard.make('doudizhu') 20 | env.set_agents([RandomAgent(env.num_actions) for _ in range(env.num_actions)]) 21 | env.reset() 22 | legal_actions = env._get_legal_actions() 23 | for legal_action in legal_actions: 24 | self.assertLessEqual(legal_action, env.num_actions-1) 25 | 26 | def test_step(self): 27 | env = rlcard.make('doudizhu') 28 | _, player_id = env.reset() 29 | player = env.game.players[player_id] 30 | _, next_player_id = env.step(env.num_actions-2) 31 | self.assertEqual(next_player_id, (player.player_id+1)%len(env.game.players)) 32 | 33 | def test_step_back(self): 34 | env = rlcard.make('doudizhu', config={'allow_step_back':True}) 35 | _, player_id = env.reset() 36 | env.step(2) 37 | _, back_player_id = env.step_back() 38 | self.assertEqual(player_id, back_player_id) 39 | self.assertEqual(env.step_back(), False) 40 | 41 | env = rlcard.make('doudizhu') 42 | with self.assertRaises(Exception): 43 | env.step_back() 44 | 45 | def test_run(self): 46 | env = rlcard.make('doudizhu') 47 | env.set_agents([RandomAgent(env.num_actions) for _ in range(env.num_players)]) 48 | trajectories, payoffs = env.run(is_training=False) 49 | self.assertEqual(len(trajectories), 3) 50 | win = [] 51 | for player_id, payoff in enumerate(payoffs): 52 | if payoff == 1: 53 | win.append(player_id) 54 | if len(win) == 1: 55 | self.assertEqual(env.game.players[win[0]].role, 'landlord') 56 | if len(win) == 2: 57 | self.assertEqual(env.game.players[win[0]].role, 'peasant') 58 | self.assertEqual(env.game.players[win[1]].role, 'peasant') 59 | 60 | def test_decode_action(self): 61 | env = rlcard.make('doudizhu') 62 | env.reset() 63 | env.game.state['actions'] = ['33366', '33355'] 64 | env.game.judger.playable_cards[0] = ['5', '6', '55', '555', '33366', '33355'] 65 | decoded = env._decode_action(3) 66 | self.assertEqual(decoded, '6') 67 | env.game.state['actions'] = ['444', '44466', '44455'] 68 | decoded = env._decode_action(29) 69 | self.assertEqual(decoded, '444') 70 | 71 | def test_get_perfect_information(self): 72 | env = rlcard.make('doudizhu') 73 | _, player_id = env.reset() 74 | self.assertEqual(player_id, env.get_perfect_information()['current_player']) 75 | if __name__ == '__main__': 76 | unittest.main() 77 | -------------------------------------------------------------------------------- /tests/envs/test_gin_rummy_env.py: -------------------------------------------------------------------------------- 1 | ''' 2 | File name: tests/envs/test_gin_rummy_env.py 3 | Author: William Hale 4 | Date created: 4/20/2020 5 | ''' 6 | 7 | import unittest 8 | import numpy as np 9 | 10 | import rlcard 11 | from rlcard.agents.random_agent import RandomAgent 12 | from .determism_util import is_deterministic 13 | 14 | 15 | class TestGinRummyEnv(unittest.TestCase): 16 | 17 | def test_reset_and_extract_state(self): 18 | env = rlcard.make('gin-rummy') 19 | state, _ = env.reset() 20 | self.assertEqual(state['obs'].size, 5 * 52) 21 | 22 | def test_is_deterministic(self): 23 | self.assertTrue(is_deterministic('gin-rummy')) 24 | 25 | def test_get_legal_actions(self): 26 | env = rlcard.make('gin-rummy') 27 | env.set_agents([RandomAgent(env.num_actions) for _ in range(env.num_players)]) 28 | env.reset() 29 | legal_actions = env._get_legal_actions() 30 | for legal_action in legal_actions: 31 | self.assertLessEqual(legal_action, env.num_actions-1) 32 | 33 | def test_step(self): 34 | env = rlcard.make('gin-rummy') 35 | state, _ = env.reset() 36 | action = np.random.choice(list(state['legal_actions'].keys())) 37 | _, player_id = env.step(action) 38 | current_player_id = env.game.round.get_current_player().player_id 39 | self.assertEqual(player_id, current_player_id) 40 | 41 | def test_run(self): 42 | env = rlcard.make('gin-rummy') 43 | env.set_agents([RandomAgent(env.num_actions) for _ in range(env.num_players)]) 44 | trajectories, payoffs = env.run(is_training=False) 45 | self.assertEqual(len(trajectories), 2) 46 | for payoff in payoffs: 47 | self.assertLessEqual(-1, payoff) 48 | self.assertLessEqual(payoff, 1) 49 | trajectories, payoffs = env.run(is_training=True) 50 | for payoff in payoffs: 51 | self.assertLessEqual(-1, payoff) 52 | self.assertLessEqual(payoff, 1) 53 | 54 | 55 | if __name__ == '__main__': 56 | unittest.main() 57 | -------------------------------------------------------------------------------- /tests/envs/test_leducholdem_env.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | import numpy as np 3 | 4 | import rlcard 5 | from rlcard.agents.random_agent import RandomAgent 6 | from .determism_util import is_deterministic 7 | 8 | 9 | class TestLeducholdemEnv(unittest.TestCase): 10 | 11 | def test_reset_and_extract_state(self): 12 | env = rlcard.make('leduc-holdem') 13 | state, _ = env.reset() 14 | self.assertEqual(state['obs'].size, 36) 15 | for action in state['legal_actions']: 16 | self.assertLess(action, env.num_actions) 17 | 18 | def test_is_deterministic(self): 19 | self.assertTrue(is_deterministic('leduc-holdem')) 20 | 21 | def test_get_legal_actions(self): 22 | env = rlcard.make('leduc-holdem') 23 | env.reset() 24 | legal_actions = env._get_legal_actions() 25 | for action in legal_actions: 26 | self.assertIn(action, env.actions) 27 | 28 | def test_decode_action(self): 29 | env = rlcard.make('leduc-holdem') 30 | state, _ = env.reset() 31 | for action in state['legal_actions']: 32 | decoded = env._decode_action(action) 33 | self.assertIn(decoded, env.actions) 34 | 35 | def test_step(self): 36 | env = rlcard.make('leduc-holdem') 37 | state, player_id = env.reset() 38 | self.assertEqual(player_id, env.get_player_id()) 39 | action = list(state['legal_actions'].keys())[0] 40 | _, player_id = env.step(action) 41 | self.assertEqual(player_id, env.get_player_id()) 42 | 43 | def test_step_back(self): 44 | env = rlcard.make('leduc-holdem', config={'allow_step_back':True}) 45 | _, player_id = env.reset() 46 | env.step(0) 47 | _, back_player_id = env.step_back() 48 | self.assertEqual(player_id, back_player_id) 49 | self.assertEqual(env.step_back(), False) 50 | 51 | env = rlcard.make('leduc-holdem') 52 | with self.assertRaises(Exception): 53 | env.step_back() 54 | 55 | def test_run(self): 56 | env = rlcard.make('leduc-holdem') 57 | agents = [RandomAgent(env.num_actions) for _ in range(env.num_players)] 58 | env.set_agents(agents) 59 | trajectories, payoffs = env.run(is_training=False) 60 | self.assertEqual(len(trajectories), 2) 61 | total = 0 62 | for payoff in payoffs: 63 | total += payoff 64 | self.assertEqual(total, 0) 65 | 66 | def test_get_perfect_information(self): 67 | env = rlcard.make('leduc-holdem') 68 | _, player_id = env.reset() 69 | self.assertEqual(player_id, env.get_perfect_information()['current_player']) 70 | 71 | 72 | if __name__ == '__main__': 73 | unittest.main() 74 | -------------------------------------------------------------------------------- /tests/envs/test_limitholdem_env.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | 3 | import rlcard 4 | from rlcard.agents.random_agent import RandomAgent 5 | from .determism_util import is_deterministic 6 | 7 | 8 | class TestLimitholdemEnv(unittest.TestCase): 9 | 10 | def test_reset_and_extract_state(self): 11 | env = rlcard.make('limit-holdem') 12 | state, _ = env.reset() 13 | self.assertEqual(state['obs'].size, 72) 14 | for action in state['legal_actions']: 15 | self.assertLess(action, env.num_actions) 16 | 17 | def test_is_deterministic(self): 18 | self.assertTrue(is_deterministic('limit-holdem')) 19 | 20 | def test_get_legal_actions(self): 21 | env = rlcard.make('limit-holdem') 22 | env.reset() 23 | legal_actions = env._get_legal_actions() 24 | for action in legal_actions: 25 | self.assertIn(action, env.actions) 26 | 27 | def test_decode_action(self): 28 | env = rlcard.make('limit-holdem') 29 | state, _ = env.reset() 30 | for action in state['legal_actions']: 31 | decoded = env._decode_action(action) 32 | self.assertIn(decoded, env.actions) 33 | 34 | decoded = env._decode_action(3) 35 | self.assertEqual(decoded, 'fold') 36 | 37 | env.step(0) 38 | decoded = env._decode_action(0) 39 | self.assertEqual(decoded, 'check') 40 | 41 | def test_step(self): 42 | env = rlcard.make('limit-holdem') 43 | state, player_id = env.reset() 44 | self.assertEqual(player_id, env.get_player_id()) 45 | action = list(state['legal_actions'].keys())[0] 46 | _, player_id = env.step(action) 47 | self.assertEqual(player_id, env.get_player_id()) 48 | 49 | def test_step_back(self): 50 | env = rlcard.make('limit-holdem', config={'allow_step_back':True}) 51 | _, player_id = env.reset() 52 | env.step(0) 53 | _, back_player_id = env.step_back() 54 | self.assertEqual(player_id, back_player_id) 55 | self.assertEqual(env.step_back(), False) 56 | 57 | env = rlcard.make('limit-holdem') 58 | with self.assertRaises(Exception): 59 | env.step_back() 60 | 61 | def test_run(self): 62 | env = rlcard.make('limit-holdem') 63 | agents = [RandomAgent(env.num_actions) for _ in range(env.num_players)] 64 | env.set_agents(agents) 65 | trajectories, payoffs = env.run(is_training=False) 66 | self.assertEqual(len(trajectories), 2) 67 | total = 0 68 | for payoff in payoffs: 69 | total += payoff 70 | self.assertEqual(total, 0) 71 | 72 | def test_get_perfect_information(self): 73 | env = rlcard.make('limit-holdem') 74 | _, player_id = env.reset() 75 | self.assertEqual(player_id, env.get_perfect_information()['current_player']) 76 | 77 | def test_multiplayers(self): 78 | env = rlcard.make('limit-holdem', config={'game_num_players':5}) 79 | num_players = env.game.get_num_players() 80 | self.assertEqual(num_players, 5) 81 | 82 | if __name__ == '__main__': 83 | unittest.main() 84 | -------------------------------------------------------------------------------- /tests/envs/test_mahjong.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | import numpy as np 3 | 4 | import rlcard 5 | from rlcard.agents.random_agent import RandomAgent 6 | from .determism_util import is_deterministic 7 | 8 | class TestMahjongEnv(unittest.TestCase): 9 | 10 | def test_reset_and_extract_state(self): 11 | env = rlcard.make('mahjong') 12 | state, _ = env.reset() 13 | self.assertEqual(state['obs'].size, 816) 14 | 15 | def test_is_deterministic(self): 16 | self.assertTrue(is_deterministic('mahjong')) 17 | 18 | def test_get_legal_actions(self): 19 | env = rlcard.make('mahjong') 20 | env.set_agents([RandomAgent(env.num_actions) for _ in range(env.num_players)]) 21 | env.reset() 22 | legal_actions = env._get_legal_actions() 23 | for legal_action in legal_actions: 24 | self.assertLessEqual(legal_action, env.num_actions-1) 25 | 26 | def test_step(self): 27 | env = rlcard.make('mahjong') 28 | state, _ = env.reset() 29 | action = np.random.choice(list(state['legal_actions'].keys())) 30 | _, player_id = env.step(action) 31 | self.assertEqual(player_id, env.game.round.current_player) 32 | 33 | def test_step_back(self): 34 | env = rlcard.make('mahjong', config={'allow_step_back':True}) 35 | state, player_id = env.reset() 36 | action = np.random.choice(list(state['legal_actions'].keys())) 37 | env.step(action) 38 | env.step_back() 39 | self.assertEqual(env.game.round.current_player, player_id) 40 | 41 | env = rlcard.make('mahjong', config={'allow_step_back':False}) 42 | state, player_id = env.reset() 43 | action = np.random.choice(list(state['legal_actions'].keys())) 44 | env.step(action) 45 | # env.step_back() 46 | self.assertRaises(Exception, env.step_back) 47 | 48 | def test_run(self): 49 | env = rlcard.make('mahjong') 50 | env.set_agents([RandomAgent(env.num_actions) for _ in range(env.num_players)]) 51 | trajectories, payoffs = env.run(is_training=False) 52 | trajectories, payoffs = env.run(is_training=True) 53 | 54 | if __name__ == '__main__': 55 | unittest.main() 56 | -------------------------------------------------------------------------------- /tests/envs/test_nolimitholdem_env.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | 3 | import rlcard 4 | from rlcard.agents.random_agent import RandomAgent 5 | from rlcard.games.nolimitholdem.round import Action 6 | from .determism_util import is_deterministic 7 | 8 | 9 | class TestNolimitholdemEnv(unittest.TestCase): 10 | 11 | def test_reset_and_extract_state(self): 12 | env = rlcard.make('no-limit-holdem') 13 | state, _ = env.reset() 14 | self.assertEqual(state['obs'].size, 54) 15 | 16 | def test_is_deterministic(self): 17 | self.assertTrue(is_deterministic('no-limit-holdem')) 18 | 19 | def test_get_legal_actions(self): 20 | env = rlcard.make('no-limit-holdem') 21 | env.reset() 22 | legal_actions = env._get_legal_actions() 23 | for action in legal_actions: 24 | self.assertIn(action, env.actions) 25 | 26 | def test_decode_action(self): 27 | env = rlcard.make('no-limit-holdem') 28 | state, _ = env.reset() 29 | for action in state['legal_actions']: 30 | decoded = env._decode_action(action) 31 | self.assertIn(decoded, env.actions) 32 | 33 | decoded = env._decode_action(Action.FOLD.value) 34 | self.assertEqual(decoded, Action.FOLD) 35 | 36 | env.step(0) 37 | decoded = env._decode_action(1) 38 | self.assertEqual(decoded, Action.CHECK_CALL) 39 | 40 | def test_step(self): 41 | env = rlcard.make('no-limit-holdem') 42 | state, player_id = env.reset() 43 | self.assertEqual(player_id, env.get_player_id()) 44 | action = list(state['legal_actions'].keys())[0] 45 | _, player_id = env.step(action) 46 | self.assertEqual(player_id, env.get_player_id()) 47 | 48 | def test_step_back(self): 49 | env = rlcard.make('no-limit-holdem', config={'allow_step_back':True}) 50 | _, player_id = env.reset() 51 | env.step(0) 52 | _, back_player_id = env.step_back() 53 | self.assertEqual(player_id, back_player_id) 54 | self.assertEqual(env.step_back(), False) 55 | 56 | env = rlcard.make('no-limit-holdem') 57 | with self.assertRaises(Exception): 58 | env.step_back() 59 | 60 | def test_run(self): 61 | env = rlcard.make('no-limit-holdem') 62 | agents = [RandomAgent(env.num_actions) for _ in range(env.num_players)] 63 | env.set_agents(agents) 64 | trajectories, payoffs = env.run(is_training=False) 65 | self.assertEqual(len(trajectories), 2) 66 | total = 0 67 | for payoff in payoffs: 68 | total += payoff 69 | self.assertEqual(total, 0) 70 | 71 | def test_get_perfect_information(self): 72 | env = rlcard.make('no-limit-holdem') 73 | _, player_id = env.reset() 74 | self.assertEqual(player_id, env.get_perfect_information()['current_player']) 75 | 76 | def test_multiplayers(self): 77 | env = rlcard.make('no-limit-holdem', config={'game_num_players':5}) 78 | num_players = env.game.get_num_players() 79 | self.assertEqual(num_players, 5) 80 | 81 | def test_config_chips(self): 82 | env = rlcard.make('no-limit-holdem', config={'game_num_players':5, 'chips_for_each':100}) 83 | env.game.init_game() 84 | players = env.game.players 85 | chips = [] 86 | for i in range(5): 87 | chips.append(players[i].remained_chips + players[i].in_chips) 88 | self.assertEqual(chips, [100, 100, 100, 100, 100]) 89 | 90 | if __name__ == '__main__': 91 | unittest.main() 92 | -------------------------------------------------------------------------------- /tests/envs/test_registration.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | 3 | import rlcard 4 | from rlcard.envs.registration import register, make 5 | from .determism_util import is_deterministic 6 | 7 | 8 | class TestRegistration(unittest.TestCase): 9 | 10 | def test_register(self): 11 | register(env_id='test_reg', entry_point='rlcard.envs.blackjack:BlackjackEnv') 12 | with self.assertRaises(ValueError): 13 | register(env_id='test_reg', entry_point='rlcard.envs.blackjack:BlackjackEnv') 14 | 15 | def test_make(self): 16 | register(env_id='test_make', entry_point='rlcard.envs.blackjack:BlackjackEnv') 17 | env = rlcard.make('test_make') 18 | _, player = env.reset() 19 | self.assertEqual(player, 0) 20 | with self.assertRaises(ValueError): 21 | make('test_random_make') 22 | 23 | def test_make_modes(self): 24 | register(env_id='test_env', entry_point='rlcard.envs.blackjack:BlackjackEnv') 25 | 26 | if __name__ == '__main__': 27 | unittest.main() 28 | -------------------------------------------------------------------------------- /tests/envs/test_uno_env.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | import numpy as np 3 | 4 | import rlcard 5 | from rlcard.agents.random_agent import RandomAgent 6 | from rlcard.games.uno.utils import ACTION_LIST 7 | from .determism_util import is_deterministic 8 | 9 | 10 | class TestUnoEnv(unittest.TestCase): 11 | 12 | def test_reset_and_extract_state(self): 13 | env = rlcard.make('uno') 14 | state, _ = env.reset() 15 | self.assertEqual(state['obs'].size, 240) 16 | 17 | def test_is_deterministic(self): 18 | self.assertTrue(is_deterministic('uno')) 19 | 20 | def test_get_legal_actions(self): 21 | env = rlcard.make('uno') 22 | env.set_agents([RandomAgent(env.num_actions) for _ in range(env.num_players)]) 23 | env.reset() 24 | legal_actions = env._get_legal_actions() 25 | for legal_action in legal_actions: 26 | self.assertLessEqual(legal_action, 60) 27 | 28 | def test_step(self): 29 | env = rlcard.make('uno') 30 | state, _ = env.reset() 31 | action = np.random.choice(list(state['legal_actions'].keys())) 32 | _, player_id = env.step(action) 33 | self.assertEqual(player_id, env.game.round.current_player) 34 | 35 | def test_step_back(self): 36 | env = rlcard.make('uno', config={'allow_step_back':True}) 37 | state, player_id = env.reset() 38 | action = np.random.choice(list(state['legal_actions'].keys())) 39 | env.step(action) 40 | env.step_back() 41 | self.assertEqual(env.game.round.current_player, player_id) 42 | 43 | env = rlcard.make('uno', config={'allow_step_back':False}) 44 | state, player_id = env.reset() 45 | action = np.random.choice(list(state['legal_actions'].keys())) 46 | env.step(action) 47 | # env.step_back() 48 | self.assertRaises(Exception, env.step_back) 49 | 50 | def test_run(self): 51 | env = rlcard.make('uno') 52 | env.set_agents([RandomAgent(env.num_actions) for _ in range(env.num_players)]) 53 | trajectories, payoffs = env.run(is_training=False) 54 | self.assertEqual(len(trajectories), 2) 55 | total = 0 56 | for payoff in payoffs: 57 | total += payoff 58 | self.assertEqual(total, 0) 59 | trajectories, payoffs = env.run(is_training=True) 60 | total = 0 61 | for payoff in payoffs: 62 | total += payoff 63 | self.assertEqual(total, 0) 64 | 65 | def test_decode_action(self): 66 | env = rlcard.make('uno') 67 | env.reset() 68 | legal_actions = env._get_legal_actions() 69 | for legal_action in legal_actions: 70 | decoded = env._decode_action(legal_action) 71 | self.assertLessEqual(decoded, ACTION_LIST[legal_action]) 72 | 73 | def test_get_perfect_information(self): 74 | env = rlcard.make('uno') 75 | _, player_id = env.reset() 76 | self.assertEqual(player_id, env.get_perfect_information()['current_player']) 77 | if __name__ == '__main__': 78 | unittest.main() 79 | -------------------------------------------------------------------------------- /tests/games/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datamllab/rlcard/d7d0a957baf4cc7225a50522adb0164bf130a9d0/tests/games/__init__.py -------------------------------------------------------------------------------- /tests/games/test_blackjack_game.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | import numpy as np 3 | 4 | from rlcard.games.blackjack.game import BlackjackGame as Game 5 | from rlcard.envs.blackjack import DEFAULT_GAME_CONFIG 6 | 7 | class TestBlackjackGame(unittest.TestCase): 8 | 9 | def test_get_num_players(self): 10 | game = Game() 11 | game.configure(DEFAULT_GAME_CONFIG) 12 | num_players = game.get_num_players() 13 | self.assertEqual(num_players, 1) 14 | 15 | def test_get_num_actions(self): 16 | game = Game() 17 | game.configure(DEFAULT_GAME_CONFIG) 18 | num_actions = game.get_num_actions() 19 | self.assertEqual(num_actions, 2) 20 | 21 | def test_init_game(self): 22 | game = Game() 23 | game.configure(DEFAULT_GAME_CONFIG) 24 | state, current_player = game.init_game() 25 | self.assertEqual(len(game.history), 0) 26 | self.assertEqual(current_player, 0) 27 | self.assertEqual(game.winner['dealer'], 0) 28 | self.assertEqual(len(state['state'][0]), len(state['state'][1])+1) 29 | 30 | def test_step(self): 31 | game = Game() 32 | game.configure(DEFAULT_GAME_CONFIG) 33 | game.init_game() 34 | next_state, next_player = game.step('hit') 35 | self.assertEqual(next_player, 0) 36 | if game.players[0].status != 'bust': 37 | self.assertEqual(len(game.dealer.hand), len(next_state['state'][1])+1) 38 | else: 39 | self.assertEqual(len(game.dealer.hand), len(next_state['state'][1])) 40 | next_state, _ = game.step('stand') 41 | self.assertEqual(len(next_state['state'][0]), len(game.players[0].hand)) 42 | 43 | def test_proceed_game(self): 44 | game = Game() 45 | game.configure(DEFAULT_GAME_CONFIG) 46 | game.init_game() 47 | while not game.is_over(): 48 | action = np.random.choice(['hit', 'action']) 49 | state, _ = game.step(action) 50 | self.assertEqual(len(state['state'][1]), len(game.dealer.hand)) 51 | 52 | def test_step_back(self): 53 | game = Game(allow_step_back=True) 54 | game.configure(DEFAULT_GAME_CONFIG) 55 | state, _ = game.init_game() 56 | init_hand = state['state'][0] 57 | game.step('hit') 58 | game.step_back() 59 | test_hand = game.get_state(0)['state'][0] 60 | self.assertEqual(init_hand, test_hand) 61 | self.assertEqual(len(game.history), 0) 62 | success = game.step_back() 63 | self.assertEqual(success, False) 64 | 65 | def test_get_state(self): 66 | game = Game() 67 | game.configure(DEFAULT_GAME_CONFIG) 68 | game.init_game() 69 | self.assertEqual(len(game.get_state(0)['state'][1]), 1) 70 | game.step('stand') 71 | self.assertGreater(len(game.get_state(0)['state'][1]), 1) 72 | 73 | if __name__ == '__main__': 74 | unittest.main() 75 | -------------------------------------------------------------------------------- /tests/games/test_mahjong_game.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | import numpy as np 3 | 4 | from rlcard.games.mahjong.game import MahjongGame as Game 5 | from rlcard.games.mahjong.player import MahjongPlayer as Player 6 | 7 | class TestMahjongMethods(unittest.TestCase): 8 | 9 | def test_get_num_players(self): 10 | game = Game() 11 | num_players = game.get_num_players() 12 | self.assertEqual(num_players, 4) 13 | 14 | def test_get_num_actions(self): 15 | game = Game() 16 | num_actions = game.get_num_actions() 17 | self.assertEqual(num_actions, 38) 18 | 19 | def test_init_game(self): 20 | game = Game() 21 | state, _ = game.init_game() 22 | total_cards = list(state['current_hand']) 23 | self.assertGreaterEqual(len(total_cards), 14) 24 | 25 | def test_get_player_id(self): 26 | game = Game() 27 | _, player_id = game.init_game() 28 | current = game.get_player_id() 29 | self.assertEqual(player_id, current) 30 | 31 | 32 | def test_get_legal_actions(self): 33 | game = Game() 34 | state, _ = game.init_game() 35 | actions = game.get_legal_actions(state) 36 | for action in actions: 37 | self.assertIn(action, state['current_hand']) 38 | 39 | def test_step(self): 40 | game = Game() 41 | state, _ = game.init_game() 42 | action = np.random.choice(game.get_legal_actions(state)) 43 | state, next_player_id = game.step(action) 44 | current = game.round.current_player 45 | self.assertLessEqual(len(state['current_hand']), 14) 46 | self.assertEqual(next_player_id, current) 47 | 48 | def test_get_payoffs(self): 49 | game = Game() 50 | state, _ = game.init_game() 51 | while not game.is_over(): 52 | actions = game.get_legal_actions(state) 53 | action = np.random.choice(actions) 54 | state, _ = game.step(action) 55 | total_cards = len(state['current_hand']) 56 | self.assertLessEqual(total_cards, 14) 57 | win = game.is_over() 58 | self.assertEqual(win, True) 59 | 60 | def test_step_back(self): 61 | game = Game(allow_step_back=True) 62 | state, player_id = game.init_game() 63 | action = np.random.choice(game.get_legal_actions(state)) 64 | game.step(action) 65 | game.step_back() 66 | self.assertEqual(game.round.current_player, player_id) 67 | self.assertEqual(len(game.history), 0) 68 | success = game.step_back() 69 | self.assertEqual(success, False) 70 | 71 | def test_player_get_player_id(self): 72 | player = Player(0, np.random.RandomState()) 73 | self.assertEqual(0, player.get_player_id()) 74 | 75 | if __name__ == '__main__': 76 | unittest.main() 77 | -------------------------------------------------------------------------------- /tests/games/test_nolimitholdem_judger.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | import numpy as np 3 | from rlcard.games.nolimitholdem.player import NolimitholdemPlayer as Player 4 | from rlcard.games.base import Card 5 | from rlcard.games.limitholdem.judger import LimitHoldemJudger as Judger 6 | from rlcard.games.limitholdem.utils import Hand 7 | 8 | 9 | rand_state = np.random.RandomState() 10 | 11 | class TestNolimitholdemGame(unittest.TestCase): 12 | 13 | def get_players(self, num_players=2): 14 | players = [] 15 | 16 | for i in range(num_players): 17 | players.append(Player(i, 100 + 100*i, rand_state)) 18 | players[i].bet(players[i].remained_chips) # All in 19 | 20 | return players 21 | 22 | def get_hands(self, player_hands, public_card): 23 | hands = [] 24 | for hand in player_hands: 25 | hands.append(hand + public_card) 26 | return hands 27 | 28 | def test_judge_with_4_players(self): 29 | 30 | ''' 31 | suit_list = ['S', 'H', 'D', 'C'] 32 | rank_list = ['A', '2', '3', '4', '5', '6', '7', '8', '9', 'T', 'J', 'Q', 'K'] 33 | ''' 34 | players = self.get_players(4) 35 | 36 | 37 | public_card = [Card('S', 'A'), Card('S', 'K'), Card('S', 'Q'), Card('S', '2'), Card('S', '3')] 38 | hands = [[Card('S', 'J'), Card('S', 'T')], 39 | [Card('S', '4'), Card('S', '5')], 40 | [Card('S', '9'), Card('C', 'T')], 41 | [Card('H', 'T'), Card('C', 'J')]] 42 | 43 | payoffs = Judger(rand_state).judge_game(players, self.get_hands(hands, public_card)) 44 | self.assertEqual(payoffs, [300, 100, -100, -300]) 45 | 46 | public_card = [Card('H', 'A'), Card('H', 'K'), Card('S', 'Q'), Card('S', 'T'), Card('S', '9')] 47 | 48 | hands = [[Card('S', 'A'), Card('H', '4')], 49 | [Card('D', 'A'), Card('H', '5')], 50 | [Card('D', 'K'), Card('H', '6')], 51 | [Card('S', 'K'), Card('H', '7')]] 52 | 53 | payoffs = Judger(rand_state).judge_game(players, self.get_hands(hands, public_card)) 54 | self.assertEqual(payoffs, [100, 300, -200, -200]) 55 | 56 | def test_judge_with_6_players(self): 57 | rand_state = np.random.RandomState() 58 | 59 | public_card = [Card('S', 'A'), Card('S', 'K'), Card('D', 'Q'), Card('D', 'T'), Card('C', '9')] 60 | players = self.get_players(6) 61 | 62 | hands = [[Card('C', 'A'), Card('H', '2')], 63 | [Card('D', 'A'), Card('H', '3')], 64 | [Card('C', 'K'), Card('C', '2')], 65 | [Card('D', 'K'), Card('C', '3')], 66 | [Card('C', 'Q'), Card('S', '2')], 67 | [Card('D', 'Q'), Card('S', '3')]] 68 | 69 | payoffs = Judger(rand_state).judge_game(players, self.get_hands(hands, public_card)) 70 | self.assertEqual(payoffs, [200, 600, -100, 100, -400, -400]) 71 | 72 | 73 | if __name__ == '__main__': 74 | unittest.main() 75 | -------------------------------------------------------------------------------- /tests/models/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datamllab/rlcard/d7d0a957baf4cc7225a50522adb0164bf130a9d0/tests/models/__init__.py -------------------------------------------------------------------------------- /tests/models/test_model_registeration.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | 3 | from rlcard import models 4 | from rlcard.models.registration import register, load 5 | 6 | 7 | class TestRegistration(unittest.TestCase): 8 | 9 | def test_register(self): 10 | register(model_id='test_reg', entry_point='rlcard.models.pretrained_models:LeducHoldemCFRModel') 11 | with self.assertRaises(ValueError): 12 | register(model_id='test_reg', entry_point='rlcard.models.pretrained_models:LeducHoldemCFRModel') 13 | 14 | def test_load(self): 15 | register(model_id='test_load', entry_point='rlcard.models.pretrained_models:LeducHoldemCFRModel') 16 | models.load('test_load') 17 | with self.assertRaises(ValueError): 18 | load('test_random_make') 19 | 20 | if __name__ == '__main__': 21 | unittest.main() 22 | -------------------------------------------------------------------------------- /tests/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datamllab/rlcard/d7d0a957baf4cc7225a50522adb0164bf130a9d0/tests/utils/__init__.py -------------------------------------------------------------------------------- /tests/utils/test_logger.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | import os 3 | import shutil 4 | 5 | from rlcard.utils.logger import Logger 6 | 7 | class TestLogger(unittest.TestCase): 8 | 9 | def test_log(self): 10 | log_dir = "experiments/newtest/test_log.txt" 11 | if os.path.exists(log_dir): 12 | shutil.rmtree(log_dir) 13 | with Logger(log_dir) as logger: 14 | logger.log("test text") 15 | logger.log_performance(1, 1) 16 | logger.log_performance(2, 2) 17 | logger.log_performance(3, 3) 18 | 19 | if __name__ == '__main__': 20 | unittest.main() 21 | -------------------------------------------------------------------------------- /tests/utils/test_utils.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | import numpy as np 3 | from rlcard.utils.utils import init_54_deck, init_standard_deck, rank2int, print_card, elegent_form, reorganize, tournament 4 | import rlcard 5 | from rlcard.agents.random_agent import RandomAgent 6 | 7 | class TestUtils(unittest.TestCase): 8 | 9 | def test_init_standard_deck(self): 10 | self.assertEqual(len(init_standard_deck()), 52) 11 | 12 | def test_init_54_deck(self): 13 | self.assertEqual(len(init_54_deck()), 54) 14 | 15 | def test_rank2int(self): 16 | self.assertEqual(rank2int('A'), 14) 17 | self.assertEqual(rank2int(''), -1) 18 | self.assertEqual(rank2int('3'), 3) 19 | self.assertEqual(rank2int('T'), 10) 20 | self.assertEqual(rank2int('J'), 11) 21 | self.assertEqual(rank2int('Q'), 12) 22 | self.assertEqual(rank2int('1000'), None) 23 | self.assertEqual(rank2int('abc123'), None) 24 | self.assertEqual(rank2int('K'), 13) 25 | 26 | def test_print_cards(self): 27 | self.assertEqual(len(elegent_form('S9')), 2) 28 | self.assertEqual(len(elegent_form('ST')), 3) 29 | 30 | print_card(None) 31 | print_card('S9') 32 | print_card('ST') 33 | 34 | def test_reorganize(self): 35 | trajectories = reorganize([[[1,2],1,[4,5]]], [1]) 36 | self.assertEqual(len(trajectories), 1) 37 | self.assertEqual(len(trajectories[0]), 1) 38 | self.assertEqual(len(trajectories[0][0]), 5) 39 | 40 | def test_tournament(self): 41 | env = rlcard.make('leduc-holdem') 42 | env.set_agents([RandomAgent(env.num_actions), RandomAgent(env.num_actions)]) 43 | payoffs = tournament(env,1000) 44 | self.assertEqual(len(payoffs), 2) 45 | 46 | if __name__ == '__main__': 47 | unittest.main() 48 | --------------------------------------------------------------------------------