├── .codeclimate.yml ├── .dockerignore ├── .github ├── ISSUE_TEMPLATE │ ├── bug_report.md │ └── feature_request.md ├── PULL_REQUEST_TEMPLATE.md └── workflows │ └── ci.yml ├── .gitignore ├── CODE_OF_CONDUCT.md ├── Dockerfile ├── LICENSE ├── README.md ├── bin ├── plot_benchmark.py ├── plot_script.py ├── setup ├── setup_arch ├── setup_arch_extra ├── setup_macOS ├── setup_macOS_extra ├── setup_ubuntu └── setup_ubuntu_extra ├── environment-byo.yml ├── environment.yml ├── job ├── atari_benchmark_adv.json ├── atari_benchmark_base.json ├── experiments.json └── roboschool_benchmark.json ├── package.json ├── run_lab.py ├── setup.py ├── slm_lab ├── __init__.py ├── agent │ ├── __init__.py │ ├── algorithm │ │ ├── __init__.py │ │ ├── actor_critic.py │ │ ├── base.py │ │ ├── dqn.py │ │ ├── policy_util.py │ │ ├── ppo.py │ │ ├── random.py │ │ ├── reinforce.py │ │ ├── sac.py │ │ ├── sarsa.py │ │ └── sil.py │ ├── memory │ │ ├── __init__.py │ │ ├── base.py │ │ ├── onpolicy.py │ │ ├── prioritized.py │ │ └── replay.py │ └── net │ │ ├── __init__.py │ │ ├── base.py │ │ ├── conv.py │ │ ├── mlp.py │ │ ├── net_util.py │ │ ├── q_net.py │ │ └── recurrent.py ├── env │ ├── __init__.py │ ├── base.py │ ├── openai.py │ ├── registration.py │ ├── unity.py │ ├── vec_env.py │ ├── vizdoom │ │ ├── __init__.py │ │ ├── cfgs │ │ │ ├── README.md │ │ │ ├── __init__.py │ │ │ ├── basic.cfg │ │ │ ├── basic.wad │ │ │ ├── bots.cfg │ │ │ ├── cig.cfg │ │ │ ├── cig.wad │ │ │ ├── cig_with_unknown.wad │ │ │ ├── deadly_corridor.cfg │ │ │ ├── deadly_corridor.wad │ │ │ ├── deathmatch.cfg │ │ │ ├── deathmatch.wad │ │ │ ├── defend_the_center.cfg │ │ │ ├── defend_the_center.wad │ │ │ ├── defend_the_line.cfg │ │ │ ├── defend_the_line.wad │ │ │ ├── health_gathering.cfg │ │ │ ├── health_gathering.wad │ │ │ ├── health_gathering_supreme.cfg │ │ │ ├── health_gathering_supreme.wad │ │ │ ├── learning.cfg │ │ │ ├── multi.cfg │ │ │ ├── multi_deathmatch.wad │ │ │ ├── multi_duel.cfg │ │ │ ├── multi_duel.wad │ │ │ ├── my_way_home.cfg │ │ │ ├── my_way_home.wad │ │ │ ├── oblige.cfg │ │ │ ├── predict_position.cfg │ │ │ ├── predict_position.wad │ │ │ ├── rocket_basic.cfg │ │ │ ├── rocket_basic.wad │ │ │ ├── simpler_basic.cfg │ │ │ ├── simpler_basic.wad │ │ │ ├── take_cover.cfg │ │ │ └── take_cover.wad │ │ └── vizdoom_env.py │ └── wrapper.py ├── experiment │ ├── __init__.py │ ├── analysis.py │ ├── control.py │ ├── retro_analysis.py │ └── search.py ├── lib │ ├── __init__.py │ ├── decorator.py │ ├── distribution.py │ ├── logger.py │ ├── math_util.py │ ├── optimizer.py │ ├── util.py │ └── viz.py └── spec │ ├── __init__.py │ ├── _random_baseline.json │ ├── benchmark │ ├── a2c │ │ ├── a2c_gae_atari.json │ │ ├── a2c_gae_cartpole.json │ │ ├── a2c_gae_cont.json │ │ ├── a2c_gae_lunar.json │ │ ├── a2c_gae_pong.json │ │ ├── a2c_gae_qbert.json │ │ ├── a2c_gae_roboschool.json │ │ ├── a2c_gae_unity.json │ │ ├── a2c_nstep_atari.json │ │ ├── a2c_nstep_cont.json │ │ ├── a2c_nstep_lunar.json │ │ ├── a2c_nstep_pong.json │ │ ├── a2c_nstep_qbert.json │ │ ├── a2c_nstep_roboschool.json │ │ ├── a2c_nstep_unity.json │ │ └── a2c_videopinball.json │ ├── a3c │ │ ├── a3c_gae_atari.json │ │ ├── a3c_gae_pong.json │ │ ├── a3c_gae_qbert.json │ │ ├── a3c_nstep_atari.json │ │ ├── a3c_nstep_pong.json │ │ ├── a3c_nstep_qbert.json │ │ └── a3c_videopinball.json │ ├── async_sac │ │ ├── async_sac_atari.json │ │ ├── async_sac_halfcheetah.json │ │ ├── async_sac_halfcheetah_pybullet.json │ │ ├── async_sac_lunar.json │ │ ├── async_sac_pong.json │ │ ├── async_sac_qbert.json │ │ └── async_sac_roboschool.json │ ├── dppo │ │ ├── dppo_atari.json │ │ └── dppo_pong.json │ ├── dqn │ │ ├── ddqn_atari.json │ │ ├── ddqn_per_atari.json │ │ ├── ddqn_per_lunar.json │ │ ├── ddqn_per_pong.json │ │ ├── ddqn_per_qbert.json │ │ ├── ddqn_per_unity.json │ │ ├── ddqn_pong.json │ │ ├── ddqn_qbert.json │ │ ├── ddqn_videopinball.json │ │ ├── dqn_atari.json │ │ ├── dqn_cartpole.json │ │ ├── dqn_lunar.json │ │ ├── dqn_per_atari.json │ │ ├── dqn_per_pong.json │ │ ├── dqn_per_qbert.json │ │ ├── dqn_pong.json │ │ ├── dqn_qbert.json │ │ ├── dqn_unity.json │ │ ├── dqn_videopinball.json │ │ ├── dueling_ddqn_per_atari.json │ │ ├── dueling_ddqn_per_pong.json │ │ └── dueling_dqn_pong.json │ ├── ppo │ │ ├── ppo_atari.json │ │ ├── ppo_cartpole.json │ │ ├── ppo_cont.json │ │ ├── ppo_lunar.json │ │ ├── ppo_pong.json │ │ ├── ppo_qbert.json │ │ ├── ppo_roboschool.json │ │ ├── ppo_unity.json │ │ └── ppo_videopinball.json │ ├── reinforce │ │ ├── reinforce_cartpole.json │ │ └── reinforce_videopinball.json │ ├── sac │ │ ├── sac_halfcheetah.json │ │ ├── sac_lunar.json │ │ ├── sac_per_halfcheetah.json │ │ ├── sac_per_halfcheetah_pybullet.json │ │ ├── sac_per_roboschool.json │ │ ├── sac_pong.json │ │ ├── sac_roboschool.json │ │ └── sac_unity.json │ └── sarsa │ │ ├── sarsa_cartpole.json │ │ └── sarsa_videopinball.json │ ├── demo.json │ ├── experimental │ ├── a2c │ │ ├── a2c_cartpole.json │ │ ├── a2c_gae_lam_search.json │ │ ├── a2c_nstep_n_search.json │ │ └── a2c_pendulum.json │ ├── a3c │ │ ├── a3c_cartpole.json │ │ └── a3c_nstep_worker_search.json │ ├── dqn │ │ ├── ddqn_cartpole.json │ │ ├── ddqn_lunar.json │ │ ├── ddqn_lunar_search.json │ │ ├── dqn_cartpole.json │ │ ├── dqn_cartpole_search.json │ │ ├── dqn_lunar_search.json │ │ └── dueling_dqn_cartpole.json │ ├── misc │ │ ├── base.json │ │ ├── gridworld.json │ │ ├── hydra_dqn.json │ │ ├── lunar_pg.json │ │ ├── mountain_car.json │ │ ├── pendulum.json │ │ └── random.json │ ├── ppo │ │ ├── ppo_cartpole.json │ │ ├── ppo_eps_search.json │ │ ├── ppo_lam_search.json │ │ ├── ppo_pendulum.json │ │ └── ppo_unity_lam_search.json │ ├── reinforce │ │ ├── reinforce_cartpole.json │ │ └── reinforce_pendulum.json │ ├── sac │ │ └── sac_cartpole.json │ ├── sarsa │ │ └── sarsa_cartpole.json │ └── sil │ │ ├── ppo_sil_cartpole.json │ │ └── sil_cartpole.json │ ├── random_baseline.py │ └── spec_util.py └── test ├── __init__.py ├── agent ├── memory │ ├── test_onpolicy_memory.py │ ├── test_per_memory.py │ └── test_replay_memory.py └── net │ ├── test_conv.py │ ├── test_mlp.py │ └── test_recurrent.py ├── conftest.py ├── env ├── test_registration.py ├── test_vec_env.py └── test_wrapper.py ├── experiment ├── test_control.py └── test_monitor.py ├── fixture └── lib │ └── util │ ├── test_df.csv │ ├── test_dict.json │ ├── test_dict.yml │ └── test_str.txt ├── lib ├── test_distribution.py ├── test_logger.py ├── test_math_util.py └── test_util.py └── spec ├── test_dist_spec.py ├── test_spec.py └── test_spec_util.py /.codeclimate.yml: -------------------------------------------------------------------------------- 1 | --- 2 | version: 2 3 | plugins: 4 | duplication: 5 | enabled: true 6 | config: 7 | languages: 8 | python: 9 | python_version: 3 10 | mass_threshold: 40 11 | eslint: 12 | enabled: true 13 | fixme: 14 | enabled: true 15 | pep8: 16 | enabled: true 17 | checks: 18 | E501: 19 | enabled: false 20 | radon: 21 | enabled: true 22 | config: 23 | threshold: "C" 24 | checks: 25 | argument-count: 26 | config: 27 | threshold: 10 28 | method-complexity: 29 | config: 30 | threshold: 10 31 | file-lines: 32 | config: 33 | threshold: 800 34 | exclude_patterns: 35 | - bin/ 36 | - config/ 37 | - node_modules/ 38 | - test/ 39 | -------------------------------------------------------------------------------- /.dockerignore: -------------------------------------------------------------------------------- 1 | # System 2 | **/.DS_Store 3 | 4 | # IDEs 5 | **/.idea 6 | 7 | # Python 8 | **/__pycache__ 9 | **/*.py[cod] 10 | **/*$py.class 11 | **/*.egg* 12 | **/*.manifest 13 | **/.pytest* 14 | **/.cache 15 | **/dist/ 16 | **/src 17 | **/htmlcov 18 | **/coverage.xml 19 | **/.coverage* 20 | **/.env 21 | 22 | # NodeJS 23 | **/node_modules 24 | 25 | # VizDoom 26 | **/*.ini 27 | 28 | # Data files 29 | **/data 30 | **/model 31 | **/nb.py 32 | **/*.html 33 | **/*.log 34 | **/*.meta 35 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/bug_report.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Bug report 3 | about: Create a report to help us improve 4 | 5 | --- 6 | 7 | **Describe the bug** 8 | A clear and concise description of what the bug is. 9 | 10 | **To Reproduce** 11 | 1. OS and environment: 12 | 2. SLM Lab git SHA (run `git rev-parse HEAD` to get it): 13 | 3. `spec` file used: 14 | 15 | **Additional context** 16 | Add any other context about the problem here. 17 | 18 | **Error logs** 19 | ```shell 20 | #REPLACE ME 21 | ``` 22 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature_request.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Feature request 3 | about: Suggest an idea for this project 4 | 5 | --- 6 | 7 | >Check the [Project Roadmaps](https://github.com/kengz/SLM-Lab/projects) to see if your feature request is already in there. 8 | 9 | **Are you requesting a feature or an implementation?** 10 | A clear and concise description of what the request is, and the reason. 11 | 12 | **If you have any suggested solutions** 13 | A clear and concise description of what you think could help. 14 | -------------------------------------------------------------------------------- /.github/PULL_REQUEST_TEMPLATE.md: -------------------------------------------------------------------------------- 1 | ## Title of code changes 2 | 3 | - describe the code changes and implications 4 | - add instructions to reproduce if relevant 5 | 6 | ```python 7 | some code snippets 8 | ``` 9 | 10 | 11 | *----------- USE ABOVE FOR FEATURES, BELOW FOR RESULTS -----------* 12 | 13 | 14 | # Experiment Title 15 | 16 | ## Abstract 17 | 18 | *Briefly describe the experiment and the contribution.* 19 | 20 | ## Methodology 21 | 22 | *Discuss the methods/algorithms used.* 23 | 24 | ### Reproduction 25 | 26 | 1. spec file location: 27 | 2. git SHA (find this inside the spec file): 28 | 29 | Run command: `python run_lab.py ` 30 | 31 | ## Result and Discussion 32 | 33 | *Provide data in graphs and tables; give explanations and conclusion.* 34 | 35 | Data zipfile url (We will send you a Dropbox file request): 36 | -------------------------------------------------------------------------------- /.github/workflows/ci.yml: -------------------------------------------------------------------------------- 1 | name: CI 2 | 3 | on: 4 | push: 5 | branches: [master] 6 | pull_request: 7 | branches: [master] 8 | 9 | jobs: 10 | lint: 11 | runs-on: ubuntu-latest 12 | 13 | steps: 14 | - name: Check out Git repository 15 | uses: actions/checkout@v2 16 | 17 | - name: Set up Python 18 | uses: actions/setup-python@v2 19 | with: 20 | python-version: 3.7 21 | 22 | - uses: liskin/gh-problem-matcher-wrap@v1 23 | with: 24 | action: add 25 | linters: flake8 26 | 27 | - name: Lint with flake8 28 | shell: bash -l {0} 29 | run: | 30 | pip install flake8 31 | # exit-zero treats all errors as warnings. 32 | flake8 . --ignore=E501 --exit-zero --statistics 33 | 34 | - uses: liskin/gh-problem-matcher-wrap@v1 35 | with: 36 | action: remove 37 | linters: flake8 38 | 39 | build: 40 | needs: lint 41 | runs-on: ubuntu-latest 42 | 43 | steps: 44 | - uses: actions/checkout@v2 45 | 46 | - name: Cache Conda 47 | uses: actions/cache@v2 48 | env: 49 | # change from default size of 32MB to prevent 503 err 50 | CACHE_UPLOAD_CHUNK_SIZE: 67108864 51 | with: 52 | path: /usr/share/miniconda/envs/lab 53 | key: ${{ runner.os }}-conda-${{ hashFiles('environment.yml') }} 54 | restore-keys: | 55 | ${{ runner.os }}-conda- 56 | 57 | - name: Setup Conda dependencies 58 | uses: conda-incubator/setup-miniconda@v2 59 | with: 60 | activate-environment: lab 61 | environment-file: environment.yml 62 | python-version: 3.7 63 | auto-activate-base: false 64 | 65 | - name: Conda info 66 | shell: bash -l {0} 67 | run: | 68 | conda info 69 | conda list 70 | 71 | - uses: liskin/gh-problem-matcher-wrap@v1 72 | with: 73 | action: add 74 | linters: pytest 75 | 76 | - name: Run tests 77 | shell: bash -l {0} 78 | run: | 79 | python setup.py test 80 | 81 | - uses: liskin/gh-problem-matcher-wrap@v1 82 | with: 83 | action: add 84 | linters: pytest 85 | 86 | - name: Test & publish code coverage 87 | uses: paambaati/codeclimate-action@v2.7.5 88 | env: 89 | CC_TEST_REPORTER_ID: ${{ secrets.CC_TEST_REPORTER_ID }} 90 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # System 2 | .DS_Store 3 | 4 | # IDEs 5 | .idea/ 6 | .vscode 7 | 8 | # Python 9 | __pycache__/ 10 | *.py[cod] 11 | *$py.class 12 | *.egg* 13 | *.manifest 14 | .pytest* 15 | .cache 16 | dist/ 17 | src/ 18 | htmlcov/ 19 | coverage.xml 20 | .coverage* 21 | .env 22 | .mypy_cache 23 | 24 | # NodeJS 25 | node_modules/ 26 | 27 | # VizDoom 28 | *.ini 29 | 30 | # Data files 31 | data/ 32 | model/ 33 | nb.py 34 | *.html 35 | *.log 36 | *.meta 37 | SLM-Env 38 | -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | # Contributor Covenant Code of Conduct 2 | 3 | ## Our Pledge 4 | 5 | In the interest of fostering an open and welcoming environment, we as contributors and maintainers pledge to making participation in our project and our community a harassment-free experience for everyone, regardless of age, body size, disability, ethnicity, gender identity and expression, level of experience, nationality, personal appearance, race, religion, or sexual identity and orientation. 6 | 7 | ## Our Standards 8 | 9 | Examples of behavior that contributes to creating a positive environment include: 10 | 11 | * Using welcoming and inclusive language 12 | * Being respectful of differing viewpoints and experiences 13 | * Gracefully accepting constructive criticism 14 | * Focusing on what is best for the community 15 | * Showing empathy towards other community members 16 | 17 | Examples of unacceptable behavior by participants include: 18 | 19 | * The use of sexualized language or imagery and unwelcome sexual attention or advances 20 | * Trolling, insulting/derogatory comments, and personal or political attacks 21 | * Public or private harassment 22 | * Publishing others' private information, such as a physical or electronic address, without explicit permission 23 | * Other conduct which could reasonably be considered inappropriate in a professional setting 24 | 25 | ## Our Responsibilities 26 | 27 | Project maintainers are responsible for clarifying the standards of acceptable behavior and are expected to take appropriate and fair corrective action in response to any instances of unacceptable behavior. 28 | 29 | Project maintainers have the right and responsibility to remove, edit, or reject comments, commits, code, wiki edits, issues, and other contributions that are not aligned to this Code of Conduct, or to ban temporarily or permanently any contributor for other behaviors that they deem inappropriate, threatening, offensive, or harmful. 30 | 31 | ## Scope 32 | 33 | This Code of Conduct applies both within project spaces and in public spaces when an individual is representing the project or its community. Examples of representing a project or community include using an official project e-mail address, posting via an official social media account, or acting as an appointed representative at an online or offline event. Representation of a project may be further defined and clarified by project maintainers. 34 | 35 | ## Enforcement 36 | 37 | Instances of abusive, harassing, or otherwise unacceptable behavior may be reported by contacting the project team at Wah Loon Keng (kengzwl@gmail.com) or Laura Graesser (lhgraesser@gmail.com). The project team will review and investigate all complaints, and will respond in a way that it deems appropriate to the circumstances. The project team is obligated to maintain confidentiality with regard to the reporter of an incident. Further details of specific enforcement policies may be posted separately. 38 | 39 | Project maintainers who do not follow or enforce the Code of Conduct in good faith may face temporary or permanent repercussions as determined by other members of the project's leadership. 40 | 41 | ## Attribution 42 | 43 | This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4, available at [http://contributor-covenant.org/version/1/4][version] 44 | 45 | [homepage]: http://contributor-covenant.org 46 | [version]: http://contributor-covenant.org/version/1/4/ 47 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | # run instructions: 2 | # build image: docker build -t kengz/slm_lab:latest -t kengz/slm_lab:v4.2.0 . 3 | # start container: docker run --rm -it kengz/slm_lab:v4.2.0 4 | # list image: docker images -a 5 | # push image: docker push kengz/slm_lab 6 | # prune: docker system prune 7 | 8 | FROM ubuntu:16.04 9 | 10 | LABEL maintainer="kengzwl@gmail.com" 11 | LABEL website="https://github.com/kengz/SLM-Lab" 12 | 13 | SHELL ["/bin/bash", "-c"] 14 | 15 | RUN apt-get update && \ 16 | apt-get install -y build-essential \ 17 | curl nano git wget zip libstdc++6 \ 18 | python3-dev zlib1g-dev libjpeg-dev cmake swig python-pyglet python3-opengl libboost-all-dev libsdl2-dev libosmesa6-dev patchelf ffmpeg xvfb && \ 19 | rm -rf /var/lib/apt/lists/* 20 | 21 | RUN curl -O https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh && \ 22 | bash Miniconda3-latest-Linux-x86_64.sh -b && \ 23 | rm Miniconda3-latest-Linux-x86_64.sh && \ 24 | echo '. ~/miniconda3/etc/profile.d/conda.sh' >> ~/.bashrc && \ 25 | . ~/miniconda3/etc/profile.d/conda.sh && \ 26 | conda --version 27 | 28 | # create and set the working directory 29 | RUN mkdir -p /root/SLM-Lab 30 | 31 | WORKDIR /root/SLM-Lab 32 | 33 | # install dependencies, only retrigger on dependency changes 34 | COPY environment.yml environment.yml 35 | 36 | # install Python and Conda dependencies 37 | RUN . ~/miniconda3/etc/profile.d/conda.sh && \ 38 | conda create -n lab python=3.7.3 -y && \ 39 | conda activate lab && \ 40 | conda env update -f environment.yml && \ 41 | conda clean -y --all && \ 42 | rm -rf ~/.cache/pip 43 | 44 | # copy file at last to not trigger changes above unnecessarily 45 | COPY . . 46 | 47 | RUN . ~/miniconda3/etc/profile.d/conda.sh && \ 48 | conda activate lab && \ 49 | python setup.py test 50 | # pytest --verbose --no-flaky-report test/spec/test_dist_spec.py && \ 51 | # yarn reset 52 | 53 | CMD ["/bin/bash"] 54 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2017 Wah Loon Keng, Laura Graesser 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # SLM Lab
![GitHub tag (latest SemVer)](https://img.shields.io/github/tag/kengz/slm-lab) ![CI](https://github.com/kengz/SLM-Lab/workflows/CI/badge.svg) [![Maintainability](https://api.codeclimate.com/v1/badges/20c6a124c468b4d3e967/maintainability)](https://codeclimate.com/github/kengz/SLM-Lab/maintainability) [![Test Coverage](https://api.codeclimate.com/v1/badges/20c6a124c468b4d3e967/test_coverage)](https://codeclimate.com/github/kengz/SLM-Lab/test_coverage) 2 | 3 | 4 |

5 | Modular Deep Reinforcement Learning framework in PyTorch. 6 |

7 | Documentation:
8 | https://slm-lab.gitbook.io/slm-lab/ 9 |

10 |

11 | 12 | >NOTE: the `book` branch has been updated for issue fixes. For the original code in the book _Foundations of Deep Reinforcement Learning_, check out to git tag `v4.1.1` 13 | 14 | ||||| 15 | |:---:|:---:|:---:|:---:| 16 | | ![ppo beamrider](https://user-images.githubusercontent.com/8209263/63994698-689ecf00-caaa-11e9-991f-0a5e9c2f5804.gif) | ![ppo breakout](https://user-images.githubusercontent.com/8209263/63994695-650b4800-caaa-11e9-9982-2462738caa45.gif) | ![ppo kungfumaster](https://user-images.githubusercontent.com/8209263/63994690-60469400-caaa-11e9-9093-b1cd38cee5ae.gif) | ![ppo mspacman](https://user-images.githubusercontent.com/8209263/63994685-5cb30d00-caaa-11e9-8f35-78e29a7d60f5.gif) | 17 | | BeamRider | Breakout | KungFuMaster | MsPacman | 18 | | ![ppo pong](https://user-images.githubusercontent.com/8209263/63994680-59b81c80-caaa-11e9-9253-ed98370351cd.gif) | ![ppo qbert](https://user-images.githubusercontent.com/8209263/63994672-54f36880-caaa-11e9-9757-7780725b53af.gif) | ![ppo seaquest](https://user-images.githubusercontent.com/8209263/63994665-4dcc5a80-caaa-11e9-80bf-c21db818115b.gif) | ![ppo spaceinvaders](https://user-images.githubusercontent.com/8209263/63994624-15c51780-caaa-11e9-9c9a-854d3ce9066d.gif) | 19 | | Pong | Qbert | Seaquest | Sp.Invaders | 20 | | ![sac ant](https://user-images.githubusercontent.com/8209263/63994867-ff6b8b80-caaa-11e9-971e-2fac1cddcbac.gif) | ![sac halfcheetah](https://user-images.githubusercontent.com/8209263/63994869-01354f00-caab-11e9-8e11-3893d2c2419d.gif) | ![sac hopper](https://user-images.githubusercontent.com/8209263/63994871-0397a900-caab-11e9-9566-4ca23c54b2d4.gif) | ![sac humanoid](https://user-images.githubusercontent.com/8209263/63994883-0befe400-caab-11e9-9bcc-c30c885aad73.gif) | 21 | | Ant | HalfCheetah | Hopper | Humanoid | 22 | | ![sac doublependulum](https://user-images.githubusercontent.com/8209263/63994879-07c3c680-caab-11e9-974c-06cdd25bfd68.gif) | ![sac pendulum](https://user-images.githubusercontent.com/8209263/63994880-085c5d00-caab-11e9-850d-049401540e3b.gif) | ![sac reacher](https://user-images.githubusercontent.com/8209263/63994881-098d8a00-caab-11e9-8e19-a3b32d601b10.gif) | ![sac walker](https://user-images.githubusercontent.com/8209263/63994882-0abeb700-caab-11e9-9e19-b59dc5c43393.gif) | 23 | | Inv.DoublePendulum | InvertedPendulum | Reacher | Walker | 24 | 25 | -------------------------------------------------------------------------------- /bin/setup: -------------------------------------------------------------------------------- 1 | #!/bin/bash --login 2 | # This script runs the same sequence as the Docker build 3 | # Run this as: 4 | # bin/setup 5 | 6 | 7 | # Fail on the first error; killable by SIGINT 8 | set -e 9 | trap "exit" INT 10 | 11 | read -p " 12 | ------------------------------------------------ 13 | 14 | Welcome to the SLM Lab setup script; 15 | This will invoke sudo; alternatively, 16 | inspect bin/setup_ubuntu or bin/setup_macOS and run the lines manually. 17 | 18 | Press enter to continue, Ctrl+c to quit: 19 | 20 | ------------------------------------------------ 21 | " 22 | 23 | # copy config file if not already exist 24 | BIN_DIR=`pwd`/bin 25 | 26 | # Run setup by OS 27 | if [ $(uname) == "Darwin" ]; then 28 | $BIN_DIR/setup_macOS 29 | elif [ -f '/etc/arch-release' ]; then 30 | $BIN_DIR/setup_arch 31 | else 32 | $BIN_DIR/setup_ubuntu 33 | fi 34 | 35 | # Run extra setup 36 | if [ "$1" == "extra" ]; then 37 | echo "Running extra optional setup" 38 | if [ $(uname) == "Darwin" ]; then 39 | $BIN_DIR/setup_macOS_extra 40 | elif [ -f '/etc/arch-release' ]; then 41 | $BIN_DIR/setup_arch_extra 42 | else 43 | $BIN_DIR/setup_ubuntu_extra 44 | fi 45 | fi 46 | 47 | echo " 48 | ------------------------------------------------ 49 | 50 | Installation complete. 51 | 52 | ------------------------------------------------ 53 | " 54 | -------------------------------------------------------------------------------- /bin/setup_arch: -------------------------------------------------------------------------------- 1 | #!/bin/bash --login 2 | # This script sets up SLM Lab for Linux Ubuntu 3 | 4 | # Fail on the first error; killable by SIGINT 5 | set -e 6 | trap "exit" INT 7 | 8 | echo "--- Installing system dependencies ---" 9 | pacman -Suy 10 | pacman -Sy --needed git cmake gcc 11 | pacman -Sy --needed zlib libjpeg-turbo xorg-server-xvfb gst-libav xorg-server-devel python-opengl boost sdl swig base-devel gcc-libs hdf5 openblas 12 | 13 | echo "--- Installing Conda ---" 14 | if which conda >/dev/null; then 15 | echo "Conda is already installed" 16 | else 17 | pacman -Sy --needed python-conda 18 | echo '. /etc/profile.d/conda.sh' >> ~/.bashrc 19 | source ~/.bashrc 20 | fi 21 | 22 | echo "--- Installing Conda environment ---" 23 | if conda env list | grep "^lab " >/dev/null; then 24 | echo "conda env lab is already installed" 25 | else 26 | conda create -n lab python=3.7.3 -y 27 | fi 28 | 29 | # remove for reset: 30 | # conda deactivate 31 | # conda env remove -n lab -y 32 | # conda env export > environment.yml 33 | echo "--- Updating Conda environment ---" 34 | conda env update -f environment.yml 35 | 36 | source ~/.bashrc 37 | echo "--- Lab setup complete ---" 38 | -------------------------------------------------------------------------------- /bin/setup_arch_extra: -------------------------------------------------------------------------------- 1 | #!/bin/bash --login 2 | # Sets up extra dependencies for Unity 3 | 4 | # Fail on the first error; killable by SIGINT 5 | set -e 6 | trap "exit" INT 7 | 8 | echo "--- Installing Unity ML agents ---" 9 | conda activate lab 10 | pip install gym_unity==0.4.5 11 | # clone to slm_lab/env/SLM-Env 12 | git clone https://github.com/kengz/SLM-Env.git ./slm_lab/env/SLM-Env 13 | 14 | echo "--- Installing VizDoom ---" 15 | pip install vizdoom==1.1.6 16 | -------------------------------------------------------------------------------- /bin/setup_macOS: -------------------------------------------------------------------------------- 1 | #!/bin/bash --login 2 | # This script sets up SLM Lab for macOS 3 | 4 | # Fail on the first error; killable by SIGINT 5 | set -e 6 | trap "exit" INT 7 | 8 | echo "--- Installing brew ---" 9 | if which brew >/dev/null; then 10 | echo "Brew is already installed" 11 | else 12 | ruby -e "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/master/install)" 13 | fi 14 | 15 | echo "--- Installing brew system dependencies ---" 16 | hb_list=(cmake boost boost-python3 sdl2 swig) 17 | for item in "${hb_list[@]}"; do 18 | echo "Installing ${item}" 19 | brew info "${item}" | grep --quiet "Not installed" && brew install "${item}" 20 | done 21 | 22 | echo "--- Installing Conda ---" 23 | if which conda >/dev/null; then 24 | echo "Conda is already installed" 25 | else 26 | curl -O https://repo.anaconda.com/miniconda/Miniconda3-latest-MacOSX-x86_64.sh 27 | bash Miniconda3-latest-MacOSX-x86_64.sh -b -p ~/miniconda3 28 | rm Miniconda3-latest-MacOSX-x86_64.sh 29 | echo '. ~/miniconda3/etc/profile.d/conda.sh' >> ~/.bash_profile 30 | source ~/.bash_profile 31 | fi 32 | 33 | echo "--- Installing Conda environment ---" 34 | if ! which conda >/dev/null; then 35 | # guard for when no Conda is found, e.g. in Colab 36 | export PATH=~/miniconda3/bin:$PATH 37 | fi 38 | if conda env list | grep "^lab " >/dev/null; then 39 | echo "conda env lab is already installed" 40 | else 41 | conda create -n lab python=3.7.3 -y 42 | fi 43 | 44 | # install kernel for Atom Hydrogen 45 | # conda install ipykernel 46 | # python -m ipykernel install --user --name lab 47 | 48 | # remove for reset: 49 | # conda deactivate 50 | # conda env remove -n lab -y 51 | # conda env export > environment.yml 52 | echo "--- Updating Conda environment ---" 53 | conda env update -f environment.yml 54 | 55 | source ~/.bash_profile 56 | echo "--- Lab setup complete ---" 57 | -------------------------------------------------------------------------------- /bin/setup_macOS_extra: -------------------------------------------------------------------------------- 1 | #!/bin/bash --login 2 | # Sets up extra dependencies for Unity 3 | 4 | # Fail on the first error; killable by SIGINT 5 | set -e 6 | trap "exit" INT 7 | 8 | echo "--- Installing Unity ML agents ---" 9 | conda activate lab 10 | pip install gym_unity==0.4.5 11 | # clone to slm_lab/env/SLM-Env 12 | git clone https://github.com/kengz/SLM-Env.git ./slm_lab/env/SLM-Env 13 | 14 | echo "--- Installing VizDoom ---" 15 | pip install vizdoom==1.1.6 16 | -------------------------------------------------------------------------------- /bin/setup_ubuntu: -------------------------------------------------------------------------------- 1 | #!/bin/bash --login 2 | # This script sets up SLM Lab for Linux Ubuntu 3 | 4 | # Fail on the first error; killable by SIGINT 5 | set -e 6 | trap "exit" INT 7 | 8 | echo "--- Installing system dependencies ---" 9 | apt-get update && \ 10 | apt-get install -y build-essential \ 11 | curl nano git wget zip libstdc++6 \ 12 | python3-dev zlib1g-dev libjpeg-dev cmake swig python-pyglet python3-opengl libboost-all-dev libsdl2-dev libosmesa6-dev patchelf ffmpeg xvfb && \ 13 | rm -rf /var/lib/apt/lists/* 14 | 15 | echo "--- Installing Conda ---" 16 | if which conda >/dev/null; then 17 | echo "Conda is already installed" 18 | else 19 | curl -O https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh 20 | bash Miniconda3-latest-Linux-x86_64.sh -b -p ~/miniconda3 21 | rm Miniconda3-latest-Linux-x86_64.sh 22 | echo '. ~/miniconda3/etc/profile.d/conda.sh' >> ~/.bashrc 23 | source ~/.bashrc 24 | fi 25 | 26 | echo "--- Installing Conda environment ---" 27 | if ! which conda >/dev/null; then 28 | # guard for when no Conda is found, e.g. in Colab 29 | export PATH=~/miniconda3/bin:$PATH 30 | fi 31 | if conda env list | grep "^lab " >/dev/null; then 32 | echo "conda env lab is already installed" 33 | else 34 | conda create -n lab python=3.7.3 -y 35 | fi 36 | 37 | # remove for reset: 38 | # conda deactivate 39 | # conda env remove -n lab -y 40 | # conda env export > environment.yml 41 | echo "--- Updating Conda environment ---" 42 | conda env update -f environment.yml 43 | 44 | source ~/.bashrc 45 | echo "--- Lab setup complete ---" 46 | -------------------------------------------------------------------------------- /bin/setup_ubuntu_extra: -------------------------------------------------------------------------------- 1 | #!/bin/bash --login 2 | # Sets up extra dependencies for Unity 3 | 4 | # Fail on the first error; killable by SIGINT 5 | set -e 6 | trap "exit" INT 7 | 8 | echo "--- Installing Unity ML agents ---" 9 | conda activate lab 10 | pip install gym_unity==0.4.5 11 | # clone to slm_lab/env/SLM-Env 12 | git clone https://github.com/kengz/SLM-Env.git ./slm_lab/env/SLM-Env 13 | 14 | echo "--- Installing VizDoom ---" 15 | pip install vizdoom==1.1.6 16 | -------------------------------------------------------------------------------- /environment-byo.yml: -------------------------------------------------------------------------------- 1 | name: lab 2 | channels: 3 | - plotly 4 | - conda-forge 5 | - defaults 6 | dependencies: 7 | - autopep8=1.4.4 8 | - colorlog=4.0.2 9 | - coverage=4.5.3 10 | - flaky=3.5.3 11 | - libgcc 12 | - numpy=1.16.3 13 | - openpyxl=2.6.1 14 | - pandas=0.24.2 15 | - pillow=6.2.0 16 | - pip=19.1.1 17 | - plotly=4.9.0 18 | - psutil=5.6.2 19 | - pycodestyle=2.5.0 20 | - pydash=4.2.1 21 | - pytest-cov=2.7.1 22 | - pytest-timeout=1.3.3 23 | - pytest=4.5.0 24 | - python=3.7.3 25 | - pyyaml=5.1 26 | - regex=2019.05.25 27 | - scipy=1.3.0 28 | - ujson=1.35 29 | - xlrd=1.2.0 30 | - pip: 31 | - box2d-py==2.3.8 32 | - cloudpickle==0.5.2 33 | - colorlover==0.3.0 34 | - future==0.18.2 35 | - kaleido==0.2.1 36 | - opencv-python==4.1.0.25 37 | - pyopengl==3.1.0 38 | - ray==0.7.0 39 | - redis==2.10.6 40 | - tensorboard==2.1.1 41 | - xvfbwrapper==0.2.9 42 | - gym==0.12.1 43 | - gym[atari] 44 | - gym[box2d] 45 | - gym[classic_control] 46 | - pybullet==2.8.4 47 | - roboschool==1.0.46 48 | - atari-py==0.2.6 49 | -------------------------------------------------------------------------------- /environment.yml: -------------------------------------------------------------------------------- 1 | name: lab 2 | channels: 3 | - plotly 4 | - pytorch 5 | - conda-forge 6 | - defaults 7 | dependencies: 8 | - autopep8=1.4.4 9 | - colorlog=4.0.2 10 | - coverage=4.5.3 11 | - flaky=3.5.3 12 | - libgcc 13 | - numpy=1.16.3 14 | - openpyxl=2.6.1 15 | - pandas=0.24.2 16 | - pillow=6.2.0 17 | - pip=19.1.1 18 | - plotly=4.9.0 19 | - psutil=5.6.2 20 | - pycodestyle=2.5.0 21 | - pydash=4.2.1 22 | - pytest-cov=2.7.1 23 | - pytest-timeout=1.3.3 24 | - pytest=4.5.0 25 | - python=3.7.3 26 | - pytorch=1.3.1 27 | - pyyaml=5.1 28 | - regex=2019.05.25 29 | - scipy=1.3.0 30 | - ujson=1.35 31 | - xlrd=1.2.0 32 | - more-itertools==9.1.0 33 | - pip: 34 | - box2d-py==2.3.8 35 | - cloudpickle==0.5.2 36 | - colorlover==0.3.0 37 | - future==0.18.2 38 | - kaleido==0.2.1 39 | - opencv-python==4.1.0.25 40 | - pyopengl==3.1.0 41 | - ray==0.7.0 42 | - redis==2.10.6 43 | - tensorboard==2.1.1 44 | - xvfbwrapper==0.2.9 45 | - gym==0.12.1 46 | - gym[atari] 47 | - gym[box2d] 48 | - gym[classic_control] 49 | - pybullet==2.8.4 50 | - roboschool==1.0.46 51 | - atari-py==0.2.6 52 | - pyglet==1.5.29 53 | - grpcio==1.32.0 54 | -------------------------------------------------------------------------------- /job/atari_benchmark_adv.json: -------------------------------------------------------------------------------- 1 | { 2 | "benchmark/ppo/ppo_atari.json": { 3 | "ppo_atari": "train" 4 | }, 5 | "benchmark/dqn/ddqn_per_atari.json": { 6 | "ddqn_per_atari": "train" 7 | }, 8 | } 9 | -------------------------------------------------------------------------------- /job/atari_benchmark_base.json: -------------------------------------------------------------------------------- 1 | { 2 | "benchmark/a2c/a2c_nstep_atari.json": { 3 | "a2c_nstep_atari": "train" 4 | }, 5 | "benchmark/a2c/a2c_gae_atari.json": { 6 | "a2c_gae_atari": "train" 7 | }, 8 | "benchmark/dqn/dqn_atari.json": { 9 | "dqn_atari": "train" 10 | }, 11 | } 12 | -------------------------------------------------------------------------------- /job/experiments.json: -------------------------------------------------------------------------------- 1 | { 2 | "demo.json": { 3 | "dqn_cartpole": "dev" 4 | } 5 | } 6 | -------------------------------------------------------------------------------- /job/roboschool_benchmark.json: -------------------------------------------------------------------------------- 1 | { 2 | "slm_lab/spec/benchmark/a2c/a2c_nstep_roboschool.json": { 3 | "a2c_nstep_roboschool": "train", 4 | }, 5 | "slm_lab/spec/benchmark/a2c/a2c_gae_roboschool.json": { 6 | "a2c_gae_roboschool": "train", 7 | }, 8 | "slm_lab/spec/benchmark/ppo/ppo_roboschool.json": { 9 | "ppo_roboschool": "train", 10 | }, 11 | "slm_lab/spec/benchmark/sac/sac_roboschool.json": { 12 | "sac_roboschool": "train", 13 | } 14 | } 15 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "slm_lab", 3 | "version": "4.0.0", 4 | "description": "Modular Deep Reinforcement Learning framework in PyTorch.", 5 | "main": "index.js", 6 | "scripts": { 7 | "start": "python run_lab.py", 8 | "debug": "LOG_LEVEL=DEBUG python run_lab.py", 9 | "retro_analyze": "python -c 'import sys; from slm_lab.experiment import retro_analysis; retro_analysis.retro_analyze(sys.argv[1])'", 10 | "retro_eval": "python -c 'import sys; from slm_lab.experiment import retro_analysis; retro_analysis.retro_eval(sys.argv[1])'", 11 | "reset": "rm -rf data/* .cache __pycache__ */__pycache__ *egg-info .pytest* htmlcov .coverage* *.xml", 12 | "kill": "pkill -f run_lab; pkill -f slm-env; pkill -f ipykernel; pkill -f ray; pkill -f Xvfb; ps aux | grep -i Unity | awk '{print $2}' | xargs sudo kill -9", 13 | "update": "conda env update -f environment.yml; yarn install;", 14 | "export-env": "conda env export > environment.yml", 15 | "build": "docker build -t kengz/slm_lab:latest -t kengz/slm_lab:v$v .", 16 | "test": "python setup.py test" 17 | }, 18 | "repository": "https://github.com/kengz/SLM-Lab.git", 19 | "author": "kengz , lgraesser", 20 | "license": "MIT", 21 | "private": false, 22 | "dependencies": {} 23 | } 24 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | from setuptools import setup 4 | from setuptools.command.test import test as TestCommand 5 | 6 | test_args = [ 7 | '--verbose', 8 | '--capture=sys', 9 | '--log-level=INFO', 10 | '--log-cli-level=INFO', 11 | '--log-file-level=INFO', 12 | '--no-flaky-report', 13 | '--timeout=300', 14 | '--cov-report=html', 15 | '--cov-report=term', 16 | '--cov-report=xml', 17 | '--cov=slm_lab', 18 | '--ignore=test/spec/test_dist_spec.py', 19 | 'test', 20 | ] 21 | 22 | 23 | class PyTest(TestCommand): 24 | user_options = [('pytest-args=', 'a', 'Arguments to pass to py.test')] 25 | 26 | def initialize_options(self): 27 | os.environ['PY_ENV'] = 'test' 28 | TestCommand.initialize_options(self) 29 | self.pytest_args = test_args 30 | 31 | def run_tests(self): 32 | import pytest 33 | errno = pytest.main(self.pytest_args) 34 | sys.exit(errno) 35 | 36 | 37 | setup( 38 | name='slm_lab', 39 | version='4.2.4', 40 | description='Modular Deep Reinforcement Learning framework in PyTorch.', 41 | long_description='https://github.com/kengz/slm_lab', 42 | keywords='SLM Lab', 43 | url='https://github.com/kengz/slm_lab', 44 | author='kengz,lgraesser', 45 | author_email='kengzwl@gmail.com', 46 | license='MIT', 47 | packages=['slm_lab'], 48 | # NOTE: use the optimized conda dependencies 49 | install_requires=[], 50 | zip_safe=False, 51 | include_package_data=True, 52 | dependency_links=[], 53 | extras_require={ 54 | 'dev': [], 55 | 'docs': [], 56 | 'testing': [] 57 | }, 58 | classifiers=[], 59 | test_suite='test', 60 | cmdclass={'test': PyTest}, 61 | ) 62 | -------------------------------------------------------------------------------- /slm_lab/__init__.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | os.environ['PY_ENV'] = os.environ.get('PY_ENV') or 'development' 4 | ROOT_DIR = os.path.normpath(os.path.join(os.path.dirname(__file__), '..')) 5 | 6 | # valid lab_mode in SLM Lab 7 | EVAL_MODES = ('enjoy', 'eval') 8 | TRAIN_MODES = ('search', 'train', 'dev') 9 | -------------------------------------------------------------------------------- /slm_lab/agent/algorithm/__init__.py: -------------------------------------------------------------------------------- 1 | # The algorithm module 2 | # Contains implementations of reinforcement learning algorithms. 3 | # Uses the nets module to build neural networks as the relevant function approximators 4 | from .actor_critic import * 5 | from .dqn import * 6 | from .ppo import * 7 | from .random import * 8 | from .reinforce import * 9 | from .sac import * 10 | from .sarsa import * 11 | from .sil import * 12 | -------------------------------------------------------------------------------- /slm_lab/agent/algorithm/random.py: -------------------------------------------------------------------------------- 1 | # The random agent algorithm 2 | # For basic dev purpose 3 | from slm_lab.agent.algorithm.base import Algorithm 4 | from slm_lab.lib import logger 5 | from slm_lab.lib.decorator import lab_api 6 | import numpy as np 7 | 8 | logger = logger.get_logger(__name__) 9 | 10 | 11 | class Random(Algorithm): 12 | ''' 13 | Example Random agent that works in both discrete and continuous envs 14 | ''' 15 | 16 | @lab_api 17 | def init_algorithm_params(self): 18 | '''Initialize other algorithm parameters''' 19 | self.to_train = 0 20 | self.training_frequency = 1 21 | self.training_start_step = 0 22 | 23 | @lab_api 24 | def init_nets(self, global_nets=None): 25 | '''Initialize the neural network from the spec''' 26 | self.net_names = [] 27 | 28 | @lab_api 29 | def act(self, state): 30 | '''Random action''' 31 | body = self.body 32 | if body.env.is_venv: 33 | action = np.array([body.action_space.sample() for _ in range(body.env.num_envs)]) 34 | else: 35 | action = body.action_space.sample() 36 | return action 37 | 38 | @lab_api 39 | def sample(self): 40 | self.body.memory.sample() 41 | batch = np.nan 42 | return batch 43 | 44 | @lab_api 45 | def train(self): 46 | self.sample() 47 | self.body.env.clock.tick('opt_step') # to simulate metrics calc 48 | loss = np.nan 49 | return loss 50 | 51 | @lab_api 52 | def update(self): 53 | self.body.explore_var = np.nan 54 | return self.body.explore_var 55 | -------------------------------------------------------------------------------- /slm_lab/agent/memory/__init__.py: -------------------------------------------------------------------------------- 1 | # The memory module 2 | # Implements various methods for memory storage 3 | from .replay import * 4 | from .onpolicy import * 5 | from .prioritized import * 6 | -------------------------------------------------------------------------------- /slm_lab/agent/memory/base.py: -------------------------------------------------------------------------------- 1 | from abc import ABC, abstractmethod 2 | from collections import deque 3 | from slm_lab.lib import logger, util 4 | import numpy as np 5 | import pydash as ps 6 | 7 | logger = logger.get_logger(__name__) 8 | 9 | 10 | class Memory(ABC): 11 | '''Abstract Memory class to define the API methods''' 12 | 13 | def __init__(self, memory_spec, body): 14 | ''' 15 | @param {*} body is the unit that stores its experience in this memory. Each body has a distinct memory. 16 | ''' 17 | self.memory_spec = memory_spec 18 | self.body = body 19 | # declare what data keys to store 20 | self.data_keys = ['states', 'actions', 'rewards', 'next_states', 'dones', 'priorities'] 21 | 22 | @abstractmethod 23 | def reset(self): 24 | '''Method to fully reset the memory storage and related variables''' 25 | raise NotImplementedError 26 | 27 | @abstractmethod 28 | def update(self, state, action, reward, next_state, done): 29 | '''Implement memory update given the full info from the latest timestep. NOTE: guard for np.nan reward and done when individual env resets.''' 30 | raise NotImplementedError 31 | 32 | @abstractmethod 33 | def sample(self): 34 | '''Implement memory sampling mechanism''' 35 | raise NotImplementedError 36 | -------------------------------------------------------------------------------- /slm_lab/agent/net/__init__.py: -------------------------------------------------------------------------------- 1 | # The nets module 2 | # Implements differents types of neural network 3 | from slm_lab.agent.net.conv import * 4 | from slm_lab.agent.net.mlp import * 5 | from slm_lab.agent.net.recurrent import * 6 | from slm_lab.agent.net.q_net import * 7 | -------------------------------------------------------------------------------- /slm_lab/agent/net/base.py: -------------------------------------------------------------------------------- 1 | from abc import ABC, abstractmethod 2 | from slm_lab.agent.net import net_util 3 | import pydash as ps 4 | import torch 5 | import torch.nn as nn 6 | 7 | 8 | class Net(ABC): 9 | '''Abstract Net class to define the API methods''' 10 | 11 | def __init__(self, net_spec, in_dim, out_dim): 12 | ''' 13 | @param {dict} net_spec is the spec for the net 14 | @param {int|list} in_dim is the input dimension(s) for the network. Usually use in_dim=body.state_dim 15 | @param {int|list} out_dim is the output dimension(s) for the network. Usually use out_dim=body.action_dim 16 | ''' 17 | self.net_spec = net_spec 18 | self.in_dim = in_dim 19 | self.out_dim = out_dim 20 | self.grad_norms = None # for debugging 21 | if self.net_spec.get('gpu'): 22 | if torch.cuda.device_count(): 23 | self.device = f'cuda:{net_spec.get("cuda_id", 0)}' 24 | else: 25 | self.device = 'cpu' 26 | else: 27 | self.device = 'cpu' 28 | 29 | @abstractmethod 30 | def forward(self): 31 | '''The forward step for a specific network architecture''' 32 | raise NotImplementedError 33 | 34 | @net_util.dev_check_train_step 35 | def train_step(self, loss, optim, lr_scheduler=None, clock=None, global_net=None): 36 | if lr_scheduler is not None: 37 | lr_scheduler.step(epoch=ps.get(clock, 'frame')) 38 | optim.zero_grad() 39 | loss.backward() 40 | if self.clip_grad_val is not None: 41 | nn.utils.clip_grad_norm_(self.parameters(), self.clip_grad_val) 42 | if global_net is not None: 43 | net_util.push_global_grads(self, global_net) 44 | optim.step() 45 | if global_net is not None: 46 | net_util.copy(global_net, self) 47 | if clock is not None: 48 | clock.tick('opt_step') 49 | return loss 50 | 51 | def store_grad_norms(self): 52 | '''Stores the gradient norms for debugging.''' 53 | norms = [param.grad.norm().item() for param in self.parameters()] 54 | self.grad_norms = norms 55 | -------------------------------------------------------------------------------- /slm_lab/env/__init__.py: -------------------------------------------------------------------------------- 1 | # the environment module 2 | 3 | 4 | def make_env(spec): 5 | from slm_lab.env.openai import OpenAIEnv 6 | env = OpenAIEnv(spec) 7 | return env 8 | -------------------------------------------------------------------------------- /slm_lab/env/openai.py: -------------------------------------------------------------------------------- 1 | from slm_lab.env.base import BaseEnv 2 | from slm_lab.env.wrapper import make_gym_env 3 | from slm_lab.env.vec_env import make_gym_venv 4 | from slm_lab.env.registration import try_register_env 5 | from slm_lab.lib import logger, util 6 | from slm_lab.lib.decorator import lab_api 7 | import gym 8 | import numpy as np 9 | import pydash as ps 10 | import roboschool 11 | import pybullet_envs 12 | 13 | 14 | logger = logger.get_logger(__name__) 15 | 16 | 17 | class OpenAIEnv(BaseEnv): 18 | ''' 19 | Wrapper for OpenAI Gym env to work with the Lab. 20 | 21 | e.g. env_spec 22 | "env": [{ 23 | "name": "PongNoFrameskip-v4", 24 | "frame_op": "concat", 25 | "frame_op_len": 4, 26 | "normalize_state": false, 27 | "reward_scale": "sign", 28 | "num_envs": 8, 29 | "max_t": null, 30 | "max_frame": 1e7 31 | }], 32 | ''' 33 | 34 | def __init__(self, spec): 35 | super().__init__(spec) 36 | try_register_env(spec) # register if it's a custom gym env 37 | seed = ps.get(spec, 'meta.random_seed') 38 | episode_life = util.in_train_lab_mode() 39 | if self.is_venv: # make vector environment 40 | self.u_env = make_gym_venv(name=self.name, num_envs=self.num_envs, seed=seed, frame_op=self.frame_op, frame_op_len=self.frame_op_len, image_downsize=self.image_downsize, reward_scale=self.reward_scale, normalize_state=self.normalize_state, episode_life=episode_life) 41 | else: 42 | self.u_env = make_gym_env(name=self.name, seed=seed, frame_op=self.frame_op, frame_op_len=self.frame_op_len, image_downsize=self.image_downsize, reward_scale=self.reward_scale, normalize_state=self.normalize_state, episode_life=episode_life) 43 | if self.name.startswith('Unity'): 44 | # Unity is always initialized as singleton gym env, but the Unity runtime can be vec_env 45 | self.num_envs = self.u_env.num_envs 46 | # update variables dependent on num_envs 47 | self._infer_venv_attr() 48 | self._set_clock() 49 | self._set_attr_from_u_env(self.u_env) 50 | self.max_t = self.max_t or self.u_env.spec.max_episode_steps 51 | assert self.max_t is not None 52 | logger.info(util.self_desc(self)) 53 | 54 | def seed(self, seed): 55 | self.u_env.seed(seed) 56 | 57 | @lab_api 58 | def reset(self): 59 | self.done = False 60 | state = self.u_env.reset() 61 | if self.to_render: 62 | self.u_env.render() 63 | return state 64 | 65 | @lab_api 66 | def step(self, action): 67 | if not self.is_discrete and self.action_dim == 1: # guard for continuous with action_dim 1, make array 68 | action = np.expand_dims(action, axis=-1) 69 | state, reward, done, info = self.u_env.step(action) 70 | self._update_total_reward(info) 71 | if self.to_render: 72 | self.u_env.render() 73 | if not self.is_venv and self.clock.t > self.max_t: 74 | done = True 75 | self.done = done 76 | return state, reward, done, info 77 | 78 | @lab_api 79 | def close(self): 80 | self.u_env.close() 81 | -------------------------------------------------------------------------------- /slm_lab/env/registration.py: -------------------------------------------------------------------------------- 1 | # module to register and mange multiple environment offerings 2 | from gym.envs.registration import register 3 | from slm_lab.lib import logger, util 4 | import gym 5 | import os 6 | 7 | 8 | def get_env_path(env_name): 9 | '''Get the path to Unity env binaries distributed via npm''' 10 | env_path = util.smart_path(f'slm_lab/env/SLM-Env/build/{env_name}') 11 | env_dir = os.path.dirname(env_path) 12 | assert os.path.exists(env_dir), f'Missing {env_path}. See README to install from yarn.' 13 | return env_path 14 | 15 | 16 | def try_register_env(spec): 17 | '''Try to additional environments for OpenAI gym.''' 18 | try: 19 | env_name = spec['env'][0]['name'] 20 | if env_name == 'vizdoom-v0': 21 | assert 'cfg_name' in spec['env'][0].keys(), 'Environment config name must be defined for vizdoom.' 22 | cfg_name = spec['env'][0]['cfg_name'] 23 | register( 24 | id=env_name, 25 | entry_point='slm_lab.env.vizdoom.vizdoom_env:VizDoomEnv', 26 | kwargs={'cfg_name': cfg_name}) 27 | elif env_name.startswith('Unity'): 28 | # NOTE: do not specify max_episode_steps, will cause shape inconsistency in done 29 | register( 30 | id=env_name, 31 | entry_point='slm_lab.env.unity:GymUnityEnv', 32 | kwargs={'name': env_name}) 33 | except Exception as e: 34 | logger.exception(e) 35 | -------------------------------------------------------------------------------- /slm_lab/env/unity.py: -------------------------------------------------------------------------------- 1 | from gym_unity.envs import UnityEnv 2 | from slm_lab.env.registration import get_env_path 3 | from slm_lab.lib import util 4 | import numpy as np 5 | import os 6 | import pydash as ps 7 | 8 | # NOTE: stack-frames used in ml-agents: 9 | # 3DBallHard 9 10 | # Hallways 3 11 | # PushBlock 3 12 | # Walker 5 13 | 14 | 15 | class GymUnityEnv(UnityEnv): 16 | '''Wrapper to make UnityEnv register-able under gym''' 17 | spec = None 18 | 19 | def __init__(self, name): 20 | worker_id = int(f'{os.getpid()}{int(ps.unique_id())}'[-4:]) 21 | super().__init__(get_env_path(name), worker_id, no_graphics=not util.to_render(), multiagent=True) 22 | self.num_envs = self.number_agents 23 | 24 | def reset(self): 25 | state = super().reset() 26 | # Unity returns list, we need array 27 | return np.array(state) 28 | 29 | def step(self, action): 30 | # Unity wants list instead of numpy 31 | action = list(action) 32 | state, reward, done, info = super().step(action) 33 | # Unity returns list, we need array 34 | state = np.array(state) 35 | reward = np.array(reward) 36 | done = np.array(done) 37 | return state, reward, done, info 38 | 39 | def close(self): 40 | try: # guard repeated call to close() 41 | super().close() 42 | except Exception as e: 43 | pass 44 | -------------------------------------------------------------------------------- /slm_lab/env/vizdoom/__init__.py: -------------------------------------------------------------------------------- 1 | from .vizdoom_env import VizDoomEnv 2 | -------------------------------------------------------------------------------- /slm_lab/env/vizdoom/cfgs/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kengz/SLM-Lab/cae945a294cb111ee8e568bb4465ee74c501478b/slm_lab/env/vizdoom/cfgs/__init__.py -------------------------------------------------------------------------------- /slm_lab/env/vizdoom/cfgs/basic.cfg: -------------------------------------------------------------------------------- 1 | # Lines starting with # are treated as comments (or with whitespaces+#). 2 | # It doesn't matter if you use capital letters or not. 3 | # It doesn't matter if you use underscore or camel notation for keys, e.g. episode_timeout is the same as episodeTimeout. 4 | 5 | doom_scenario_path = basic.wad 6 | doom_map = map01 7 | 8 | # Rewards 9 | living_reward = -1 10 | 11 | # Rendering options 12 | screen_resolution = RES_160X120 13 | screen_format = CRCGCB 14 | render_hud = True 15 | render_crosshair = false 16 | render_weapon = true 17 | render_decals = false 18 | render_particles = false 19 | window_visible = false 20 | 21 | # make episodes start after 20 tics (after unholstering the gun) 22 | episode_start_time = 14 23 | 24 | # make episodes finish after 300 actions (tics) 25 | episode_timeout = 300 26 | 27 | # Available buttons 28 | available_buttons = 29 | { 30 | MOVE_LEFT 31 | MOVE_RIGHT 32 | ATTACK 33 | } 34 | 35 | # Game variables that will be in the state 36 | available_game_variables = { AMMO2} 37 | 38 | mode = PLAYER 39 | doom_skill = 5 40 | -------------------------------------------------------------------------------- /slm_lab/env/vizdoom/cfgs/basic.wad: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kengz/SLM-Lab/cae945a294cb111ee8e568bb4465ee74c501478b/slm_lab/env/vizdoom/cfgs/basic.wad -------------------------------------------------------------------------------- /slm_lab/env/vizdoom/cfgs/bots.cfg: -------------------------------------------------------------------------------- 1 | { 2 | name Rambo 3 | aiming 67 4 | perfection 50 5 | reaction 70 6 | isp 50 7 | color "40 cf 00" 8 | skin base 9 | //weaponpref 012385678 10 | } 11 | 12 | { 13 | name McClane 14 | aiming 34 15 | perfection 75 16 | reaction 15 17 | isp 90 18 | color "b0 b0 b0" 19 | skin base 20 | //weaponpref 012345678 21 | } 22 | 23 | { 24 | name MacGyver 25 | aiming 80 26 | perfection 67 27 | reaction 72 28 | isp 87 29 | color "50 50 60" 30 | skin base 31 | //weaponpref 012345678 32 | } 33 | 34 | { 35 | name Plissken 36 | aiming 15 37 | perfection 50 38 | reaction 50 39 | isp 50 40 | color "8f 00 00" 41 | skin base 42 | //weaponpref 082345678 43 | } 44 | 45 | { 46 | name Machete 47 | aiming 50 48 | perfection 13 49 | reaction 20 50 | isp 100 51 | color "ff ff ff" 52 | skin base 53 | //weaponpref 012345678 54 | } 55 | 56 | { 57 | name Anderson 58 | aiming 45 59 | perfection 30 60 | reaction 70 61 | isp 60 62 | color "ff af 3f" 63 | skin base 64 | //weaponpref 012345678 65 | } 66 | 67 | { 68 | name Leone 69 | aiming 56 70 | perfection 34 71 | reaction 78 72 | isp 50 73 | color "bf 00 00" 74 | skin base 75 | //weaponpref 012345678 76 | } 77 | 78 | { 79 | name Predator 80 | aiming 25 81 | perfection 55 82 | reaction 32 83 | isp 70 84 | color "00 00 ff" 85 | skin base 86 | //weaponpref 012345678 87 | } 88 | 89 | { 90 | name Ripley 91 | aiming 61 92 | perfection 50 93 | reaction 23 94 | isp 32 95 | color "00 00 7f" 96 | skin base 97 | //weaponpref 012345678 98 | } 99 | 100 | { 101 | name T800 102 | aiming 90 103 | perfection 85 104 | reaction 10 105 | isp 30 106 | color "ff ff 00" 107 | skin base 108 | //weaponpref 012345678 109 | } 110 | 111 | { 112 | name Dredd 113 | aiming 12 114 | perfection 35 115 | reaction 56 116 | isp 37 117 | color "40 cf 00" 118 | skin base 119 | //weaponpref 012345678 120 | } 121 | 122 | { 123 | name Conan 124 | aiming 10 125 | perfection 35 126 | reaction 10 127 | isp 100 128 | color "b0 b0 b0" 129 | skin base 130 | //weaponpref 012345678 131 | } 132 | 133 | { 134 | name Bond 135 | aiming 67 136 | perfection 15 137 | reaction 76 138 | isp 37 139 | color "50 50 60" 140 | skin base 141 | //weaponpref 012345678 142 | } 143 | 144 | { 145 | name Jones 146 | aiming 52 147 | perfection 35 148 | reaction 50 149 | isp 37 150 | color "8f 00 00" 151 | skin base 152 | //weaponpref 012345678 153 | } 154 | 155 | { 156 | name Blazkowicz 157 | aiming 80 158 | perfection 80 159 | reaction 80 160 | isp 100 161 | color "00 00 00" 162 | skin base 163 | //weaponpref 012345678 164 | } 165 | 166 | -------------------------------------------------------------------------------- /slm_lab/env/vizdoom/cfgs/cig.cfg: -------------------------------------------------------------------------------- 1 | # Lines starting with # are treated as comments (or with whitespaces+#). 2 | # It doesn't matter if you use capital letters or not. 3 | # It doesn't matter if you use underscore or camel notation for keys, e.g. episode_timeout is the same as episodeTimeout. 4 | 5 | doom_scenario_path = cig.wad 6 | 7 | #12 minutes 8 | episode_timeout = 25200 9 | 10 | # Rendering options 11 | screen_resolution = RES_640X480 12 | screen_format = CRCGCB 13 | render_hud = true 14 | render_crosshair = true 15 | render_weapon = true 16 | render_decals = false 17 | render_particles = false 18 | 19 | window_visible = true 20 | 21 | # Available buttons 22 | available_buttons = 23 | { 24 | ATTACK 25 | USE 26 | 27 | TURN_LEFT 28 | TURN_RIGHT 29 | MOVE_RIGHT 30 | MOVE_LEFT 31 | MOVE_FORWARD 32 | MOVE_BACKWARD 33 | 34 | TURN_LEFT_RIGHT_DELTA 35 | LOOK_UP_DOWN_DELTA 36 | } 37 | 38 | mode = ASYNC_PLAYER 39 | 40 | -------------------------------------------------------------------------------- /slm_lab/env/vizdoom/cfgs/cig.wad: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kengz/SLM-Lab/cae945a294cb111ee8e568bb4465ee74c501478b/slm_lab/env/vizdoom/cfgs/cig.wad -------------------------------------------------------------------------------- /slm_lab/env/vizdoom/cfgs/cig_with_unknown.wad: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kengz/SLM-Lab/cae945a294cb111ee8e568bb4465ee74c501478b/slm_lab/env/vizdoom/cfgs/cig_with_unknown.wad -------------------------------------------------------------------------------- /slm_lab/env/vizdoom/cfgs/deadly_corridor.cfg: -------------------------------------------------------------------------------- 1 | # Lines starting with # are treated as comments (or with whitespaces+#). 2 | # It doesn't matter if you use capital letters or not. 3 | # It doesn't matter if you use underscore or camel notation for keys, e.g. episode_timeout is the same as episodeTimeout. 4 | 5 | doom_scenario_path = deadly_corridor.wad 6 | 7 | # Skill 5 is reccomanded for the scenario to be a challenge. 8 | doom_skill = 5 9 | 10 | # Rewards 11 | death_penalty = 100 12 | #living_reward = 0 13 | 14 | # Rendering options 15 | screen_resolution = RES_320X240 16 | screen_format = CRCGCB 17 | render_hud = true 18 | render_crosshair = false 19 | render_weapon = true 20 | render_decals = false 21 | render_particles = false 22 | window_visible = true 23 | 24 | episode_timeout = 2100 25 | 26 | # Available buttons 27 | available_buttons = 28 | { 29 | MOVE_LEFT 30 | MOVE_RIGHT 31 | ATTACK 32 | MOVE_FORWARD 33 | MOVE_BACKWARD 34 | TURN_LEFT 35 | TURN_RIGHT 36 | } 37 | 38 | # Game variables that will be in the state 39 | available_game_variables = { HEALTH } 40 | 41 | mode = PLAYER 42 | 43 | 44 | -------------------------------------------------------------------------------- /slm_lab/env/vizdoom/cfgs/deadly_corridor.wad: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kengz/SLM-Lab/cae945a294cb111ee8e568bb4465ee74c501478b/slm_lab/env/vizdoom/cfgs/deadly_corridor.wad -------------------------------------------------------------------------------- /slm_lab/env/vizdoom/cfgs/deathmatch.cfg: -------------------------------------------------------------------------------- 1 | # Lines starting with # are treated as comments (or with whitespaces+#). 2 | # It doesn't matter if you use capital letters or not. 3 | # It doesn't matter if you use underscore or camel notation for keys, e.g. episode_timeout is the same as episodeTimeout. 4 | 5 | doom_scenario_path = deathmatch.wad 6 | 7 | # Rendering options 8 | screen_resolution = RES_320X240 9 | screen_format = CRCGCB 10 | render_hud = true 11 | render_crosshair = false 12 | render_weapon = true 13 | render_decals = false 14 | render_particles = false 15 | window_visible = true 16 | 17 | # make episodes finish after 4200 actions (tics) 18 | episode_timeout = 4200 19 | 20 | # Available buttons 21 | available_buttons = 22 | { 23 | ATTACK 24 | SPEED 25 | STRAFE 26 | 27 | MOVE_RIGHT 28 | MOVE_LEFT 29 | MOVE_BACKWARD 30 | MOVE_FORWARD 31 | TURN_RIGHT 32 | TURN_LEFT 33 | 34 | SELECT_WEAPON1 35 | SELECT_WEAPON2 36 | SELECT_WEAPON3 37 | SELECT_WEAPON4 38 | SELECT_WEAPON5 39 | SELECT_WEAPON6 40 | 41 | SELECT_NEXT_WEAPON 42 | SELECT_PREV_WEAPON 43 | 44 | LOOK_UP_DOWN_DELTA 45 | TURN_LEFT_RIGHT_DELTA 46 | MOVE_LEFT_RIGHT_DELTA 47 | 48 | } 49 | 50 | # Game variables that will be in the state 51 | available_game_variables = 52 | { 53 | KILLCOUNT 54 | HEALTH 55 | ARMOR 56 | SELECTED_WEAPON 57 | SELECTED_WEAPON_AMMO 58 | } 59 | mode = PLAYER 60 | -------------------------------------------------------------------------------- /slm_lab/env/vizdoom/cfgs/deathmatch.wad: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kengz/SLM-Lab/cae945a294cb111ee8e568bb4465ee74c501478b/slm_lab/env/vizdoom/cfgs/deathmatch.wad -------------------------------------------------------------------------------- /slm_lab/env/vizdoom/cfgs/defend_the_center.cfg: -------------------------------------------------------------------------------- 1 | # Lines starting with # are treated as comments (or with whitespaces+#). 2 | # It doesn't matter if you use capital letters or not. 3 | # It doesn't matter if you use underscore or camel notation for keys, e.g. episode_timeout is the same as episodeTimeout. 4 | 5 | doom_scenario_path = defend_the_center.wad 6 | 7 | # Rewards 8 | death_penalty = 1 9 | 10 | # Rendering options 11 | screen_resolution = RES_640X480 12 | screen_format = CRCGCB 13 | render_hud = True 14 | render_crosshair = false 15 | render_weapon = true 16 | render_decals = false 17 | render_particles = false 18 | window_visible = true 19 | 20 | # make episodes start after 10 tics (after unholstering the gun) 21 | episode_start_time = 10 22 | 23 | # make episodes finish after 2100 actions (tics) 24 | episode_timeout = 2100 25 | 26 | # Available buttons 27 | available_buttons = 28 | { 29 | TURN_LEFT 30 | TURN_RIGHT 31 | ATTACK 32 | } 33 | 34 | # Game variables that will be in the state 35 | available_game_variables = { AMMO2 HEALTH } 36 | 37 | mode = PLAYER 38 | doom_skill = 3 39 | -------------------------------------------------------------------------------- /slm_lab/env/vizdoom/cfgs/defend_the_center.wad: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kengz/SLM-Lab/cae945a294cb111ee8e568bb4465ee74c501478b/slm_lab/env/vizdoom/cfgs/defend_the_center.wad -------------------------------------------------------------------------------- /slm_lab/env/vizdoom/cfgs/defend_the_line.cfg: -------------------------------------------------------------------------------- 1 | # Lines starting with # are treated as comments (or with whitespaces+#). 2 | # It doesn't matter if you use capital letters or not. 3 | # It doesn't matter if you use underscore or camel notation for keys, e.g. episode_timeout is the same as episodeTimeout. 4 | 5 | doom_scenario_path = defend_the_line.wad 6 | 7 | # Rewards 8 | death_penalty = 1 9 | 10 | # Rendering options 11 | screen_resolution = RES_320X240 12 | screen_format = CRCGCB 13 | render_hud = True 14 | render_crosshair = false 15 | render_weapon = true 16 | render_decals = false 17 | render_particles = false 18 | window_visible = true 19 | 20 | # make episodes start after 10 tics (after unholstering the gun) 21 | episode_start_time = 10 22 | 23 | 24 | # Available buttons 25 | available_buttons = 26 | { 27 | TURN_lEFT 28 | TURN_RIGHT 29 | ATTACK 30 | } 31 | 32 | # Game variables that will be in the state 33 | available_game_variables = { AMMO2 HEALTH} 34 | 35 | mode = PLAYER 36 | doom_skill = 3 37 | -------------------------------------------------------------------------------- /slm_lab/env/vizdoom/cfgs/defend_the_line.wad: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kengz/SLM-Lab/cae945a294cb111ee8e568bb4465ee74c501478b/slm_lab/env/vizdoom/cfgs/defend_the_line.wad -------------------------------------------------------------------------------- /slm_lab/env/vizdoom/cfgs/health_gathering.cfg: -------------------------------------------------------------------------------- 1 | # Lines starting with # are treated as comments (or with whitespaces+#). 2 | # It doesn't matter if you use capital letters or not. 3 | # It doesn't matter if you use underscore or camel notation for keys, e.g. episode_timeout is the same as episodeTimeout. 4 | 5 | doom_scenario_path = health_gathering.wad 6 | 7 | # Each step is good for you! 8 | living_reward = 1 9 | # And death is not! 10 | death_penalty = 100 11 | 12 | # Rendering options 13 | screen_resolution = RES_160X120 14 | screen_format = CRCGCB 15 | render_hud = false 16 | render_crosshair = false 17 | render_weapon = false 18 | render_decals = false 19 | render_particles = false 20 | window_visible = false 21 | 22 | # make episodes finish after 2100 actions (tics) 23 | episode_timeout = 2100 24 | 25 | # Available buttons 26 | available_buttons = 27 | { 28 | TURN_LEFT 29 | TURN_RIGHT 30 | MOVE_FORWARD 31 | } 32 | 33 | # Game variables that will be in the state 34 | available_game_variables = { HEALTH } 35 | 36 | mode = PLAYER 37 | -------------------------------------------------------------------------------- /slm_lab/env/vizdoom/cfgs/health_gathering.wad: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kengz/SLM-Lab/cae945a294cb111ee8e568bb4465ee74c501478b/slm_lab/env/vizdoom/cfgs/health_gathering.wad -------------------------------------------------------------------------------- /slm_lab/env/vizdoom/cfgs/health_gathering_supreme.cfg: -------------------------------------------------------------------------------- 1 | # Lines starting with # are treated as comments (or with whitespaces+#). 2 | # It doesn't matter if you use capital letters or not. 3 | # It doesn't matter if you use underscore or camel notation for keys, e.g. episode_timeout is the same as episodeTimeout. 4 | 5 | doom_scenario_path = health_gathering_supreme.wad 6 | 7 | # Each step is good for you! 8 | living_reward = 1 9 | # And death is not! 10 | death_penalty = 100 11 | 12 | # Rendering options 13 | screen_resolution = RES_320X240 14 | screen_format = CRCGCB 15 | render_hud = false 16 | render_crosshair = false 17 | render_weapon = false 18 | render_decals = false 19 | render_particles = false 20 | window_visible = true 21 | 22 | # make episodes finish after 2100 actions (tics) 23 | episode_timeout = 2100 24 | 25 | # Available buttons 26 | available_buttons = 27 | { 28 | TURN_LEFT 29 | TURN_RIGHT 30 | MOVE_FORWARD 31 | } 32 | 33 | # Game variables that will be in the state 34 | available_game_variables = { HEALTH } 35 | 36 | mode = PLAYER -------------------------------------------------------------------------------- /slm_lab/env/vizdoom/cfgs/health_gathering_supreme.wad: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kengz/SLM-Lab/cae945a294cb111ee8e568bb4465ee74c501478b/slm_lab/env/vizdoom/cfgs/health_gathering_supreme.wad -------------------------------------------------------------------------------- /slm_lab/env/vizdoom/cfgs/learning.cfg: -------------------------------------------------------------------------------- 1 | doom_scenario_path = basic.wad 2 | 3 | # Rewards 4 | living_reward = -1 5 | 6 | # Rendering options 7 | screen_resolution = RES_640X480 8 | screen_format = GRAY8 9 | render_hud = false 10 | render_crosshair = false 11 | render_weapon = true 12 | render_decals = false 13 | render_particles = false 14 | window_visible = false 15 | 16 | # make episodes start after 20 tics (after unholstering the gun) 17 | episode_start_time = 14 18 | 19 | # make episodes finish after 300 actions (tics) 20 | episode_timeout = 300 21 | 22 | # Available buttons 23 | available_buttons = 24 | { 25 | MOVE_LEFT 26 | MOVE_RIGHT 27 | ATTACK 28 | } 29 | 30 | mode = PLAYER 31 | 32 | 33 | -------------------------------------------------------------------------------- /slm_lab/env/vizdoom/cfgs/multi.cfg: -------------------------------------------------------------------------------- 1 | # Lines starting with # are treated as comments (or with whitespaces+#). 2 | # It doesn't matter if you use capital letters or not. 3 | # It doesn't matter if you use underscore or camel notation for keys, e.g. episode_timeout is the same as episodeTimeout. 4 | 5 | doom_scenario_path = multi_deathmatch.wad 6 | 7 | # Rewards 8 | death_penalty = 1 9 | 10 | # Rendering options 11 | screen_resolution = RES_640X480 12 | screen_format = CRCGCB 13 | render_hud = true 14 | render_crosshair = true 15 | render_weapon = true 16 | render_decals = false 17 | render_particles = false 18 | 19 | window_visible = true 20 | 21 | 22 | # Available buttons 23 | available_buttons = 24 | { 25 | TURN_LEFT 26 | TURN_RIGHT 27 | ATTACK 28 | 29 | MOVE_RIGHT 30 | MOVE_LEFT 31 | 32 | MOVE_FORWARD 33 | MOVE_BACKWARD 34 | TURN_LEFT_RIGHT_DELTA 35 | LOOK_UP_DOWN_DELTA 36 | 37 | } 38 | 39 | available_game_variables = 40 | { 41 | HEALTH 42 | AMMO3 43 | } 44 | mode = ASYNC_PLAYER 45 | 46 | -------------------------------------------------------------------------------- /slm_lab/env/vizdoom/cfgs/multi_deathmatch.wad: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kengz/SLM-Lab/cae945a294cb111ee8e568bb4465ee74c501478b/slm_lab/env/vizdoom/cfgs/multi_deathmatch.wad -------------------------------------------------------------------------------- /slm_lab/env/vizdoom/cfgs/multi_duel.cfg: -------------------------------------------------------------------------------- 1 | doom_scenario_path = multi_duel.wad 2 | 3 | screen_resolution = RES_640X480 4 | screen_format = CRCGCB 5 | render_hud = true 6 | render_crosshair = false 7 | render_weapon = true 8 | render_decals = true 9 | render_particles = true 10 | window_visible = true 11 | 12 | available_buttons = 13 | { 14 | MOVE_LEFT 15 | MOVE_RIGHT 16 | ATTACK 17 | } 18 | 19 | mode = PLAYER 20 | doom_skill = 5 21 | 22 | -------------------------------------------------------------------------------- /slm_lab/env/vizdoom/cfgs/multi_duel.wad: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kengz/SLM-Lab/cae945a294cb111ee8e568bb4465ee74c501478b/slm_lab/env/vizdoom/cfgs/multi_duel.wad -------------------------------------------------------------------------------- /slm_lab/env/vizdoom/cfgs/my_way_home.cfg: -------------------------------------------------------------------------------- 1 | # Lines starting with # are treated as comments (or with whitespaces+#). 2 | # It doesn't matter if you use capital letters or not. 3 | # It doesn't matter if you use underscore or camel notation for keys, e.g. episode_timeout is the same as episodeTimeout. 4 | 5 | doom_scenario_path = my_way_home.wad 6 | 7 | # Rewards 8 | living_reward = -0.0001 9 | 10 | # Rendering options 11 | screen_resolution = RES_640X480 12 | screen_format = CRCGCB 13 | render_hud = false 14 | render_crosshair = false 15 | render_weapon = true 16 | render_decals = false 17 | render_particles = false 18 | window_visible = true 19 | 20 | # make episodes start after 10 tics (after unholstering the gun) 21 | episode_start_time = 10 22 | 23 | # make episodes finish after 2100 actions (tics) 24 | episode_timeout = 2100 25 | 26 | # Available buttons 27 | available_buttons = 28 | { 29 | TURN_LEFT 30 | TURN_RIGHT 31 | MOVE_FORWARD 32 | MOVE_LEFT 33 | MOVE_RIGHT 34 | } 35 | 36 | # Game variables that will be in the state 37 | available_game_variables = { AMMO0 } 38 | 39 | mode = PLAYER 40 | doom_skill = 5 41 | -------------------------------------------------------------------------------- /slm_lab/env/vizdoom/cfgs/my_way_home.wad: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kengz/SLM-Lab/cae945a294cb111ee8e568bb4465ee74c501478b/slm_lab/env/vizdoom/cfgs/my_way_home.wad -------------------------------------------------------------------------------- /slm_lab/env/vizdoom/cfgs/oblige.cfg: -------------------------------------------------------------------------------- 1 | # Lines starting with # are treated as comments (or with whitespaces+#). 2 | # It doesn't matter if you use capital letters or not. 3 | # It doesn't matter if you use underscore or camel notation for keys, e.g. episode_timeout is the same as episodeTimeout. 4 | 5 | # Rendering options 6 | screen_resolution = RES_320X240 7 | screen_format = CRCGCB 8 | render_hud = true 9 | render_crosshair = false 10 | render_weapon = true 11 | render_decals = false 12 | render_particles = false 13 | window_visible = true 14 | 15 | # make episodes finish after 4200 actions (tics) 16 | episode_timeout = 4200 17 | 18 | # Available buttons 19 | available_buttons = 20 | { 21 | ATTACK 22 | USE 23 | SPEED 24 | STRAFE 25 | 26 | MOVE_RIGHT 27 | MOVE_LEFT 28 | MOVE_BACKWARD 29 | MOVE_FORWARD 30 | TURN_RIGHT 31 | TURN_LEFT 32 | 33 | SELECT_WEAPON1 34 | SELECT_WEAPON2 35 | SELECT_WEAPON3 36 | SELECT_WEAPON4 37 | SELECT_WEAPON5 38 | SELECT_WEAPON6 39 | 40 | SELECT_NEXT_WEAPON 41 | SELECT_PREV_WEAPON 42 | 43 | LOOK_UP_DOWN_DELTA 44 | TURN_LEFT_RIGHT_DELTA 45 | MOVE_LEFT_RIGHT_DELTA 46 | 47 | } 48 | 49 | # Game variables that will be in the state 50 | available_game_variables = 51 | { 52 | KILLCOUNT 53 | HEALTH 54 | ARMOR 55 | SELECTED_WEAPON 56 | SELECTED_WEAPON_AMMO 57 | } 58 | mode = PLAYER 59 | -------------------------------------------------------------------------------- /slm_lab/env/vizdoom/cfgs/predict_position.cfg: -------------------------------------------------------------------------------- 1 | # Lines starting with # are treated as comments (or with whitespaces+#). 2 | # It doesn't matter if you use capital letters or not. 3 | # It doesn't matter if you use underscore or camel notation for keys, e.g. episode_timeout is the same as episodeTimeout. 4 | 5 | doom_scenario_path = predict_position.wad 6 | 7 | # Rewards 8 | living_reward = -0.001 9 | 10 | # Rendering options 11 | screen_resolution = RES_800X450 12 | screen_format = CRCGCB 13 | render_hud = false 14 | render_crosshair = false 15 | render_weapon = true 16 | render_decals = false 17 | render_particles = false 18 | window_visible = true 19 | 20 | # make episodes start after 16 tics (after producing the rocket launcher) 21 | episode_start_time = 16 22 | 23 | # make episodes finish after 300 actions (tics) 24 | episode_timeout = 300 25 | 26 | # Available buttons 27 | available_buttons = 28 | { 29 | TURN_LEFT 30 | TURN_RIGHT 31 | ATTACK 32 | } 33 | 34 | # Empty list is allowed, in case you are lazy. 35 | available_game_variables = { } 36 | 37 | game_args += +sv_noautoaim 1 38 | 39 | mode = PLAYER 40 | doom_skill = 1 41 | -------------------------------------------------------------------------------- /slm_lab/env/vizdoom/cfgs/predict_position.wad: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kengz/SLM-Lab/cae945a294cb111ee8e568bb4465ee74c501478b/slm_lab/env/vizdoom/cfgs/predict_position.wad -------------------------------------------------------------------------------- /slm_lab/env/vizdoom/cfgs/rocket_basic.cfg: -------------------------------------------------------------------------------- 1 | doom_scenario_path = rocket_basic.wad 2 | 3 | # Rewards 4 | living_reward = -1 5 | 6 | # Rendering options 7 | screen_resolution = RES_640X480 8 | screen_format = GRAY8 9 | render_hud = true 10 | render_crosshair = false 11 | render_weapon = true 12 | render_decals = false 13 | render_particles = false 14 | 15 | # make episodes start after 14 tics (after unholstering the gun) 16 | episode_start_time = 14 17 | 18 | # make episodes finish after 300 actions (tics) 19 | episode_timeout = 300 20 | 21 | # Available buttons 22 | available_buttons = 23 | { 24 | MOVE_LEFT 25 | MOVE_RIGHT 26 | ATTACK 27 | } 28 | 29 | game_args += +sv_noautoaim 1 30 | -------------------------------------------------------------------------------- /slm_lab/env/vizdoom/cfgs/rocket_basic.wad: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kengz/SLM-Lab/cae945a294cb111ee8e568bb4465ee74c501478b/slm_lab/env/vizdoom/cfgs/rocket_basic.wad -------------------------------------------------------------------------------- /slm_lab/env/vizdoom/cfgs/simpler_basic.cfg: -------------------------------------------------------------------------------- 1 | doom_scenario_path = simpler_basic.wad 2 | 3 | # Rewards 4 | living_reward = -1 5 | 6 | # Rendering options 7 | screen_resolution = RES_640X480 8 | screen_format = GRAY8 9 | 10 | render_hud = true 11 | render_crosshair = false 12 | render_weapon = true 13 | render_decals = false 14 | render_particles = false 15 | 16 | # make episodes start after 20 tics (after unholstering the gun) 17 | episode_start_time = 14 18 | 19 | # make episodes finish after 300 actions (tics) 20 | episode_timeout = 300 21 | 22 | # Available buttons 23 | available_buttons = 24 | { 25 | MOVE_LEFT 26 | MOVE_RIGHT 27 | ATTACK 28 | } 29 | -------------------------------------------------------------------------------- /slm_lab/env/vizdoom/cfgs/simpler_basic.wad: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kengz/SLM-Lab/cae945a294cb111ee8e568bb4465ee74c501478b/slm_lab/env/vizdoom/cfgs/simpler_basic.wad -------------------------------------------------------------------------------- /slm_lab/env/vizdoom/cfgs/take_cover.cfg: -------------------------------------------------------------------------------- 1 | # Lines starting with # are treated as comments (or with whitespaces+#). 2 | # It doesn't matter if you use capital letters or not. 3 | # It doesn't matter if you use underscore or camel notation for keys, e.g. episode_timeout is the same as episodeTimeout. 4 | 5 | doom_scenario_path = take_cover.wad 6 | doom_map = map01 7 | 8 | # Rewards 9 | living_reward = 0.1 10 | 11 | # Rendering options 12 | screen_resolution = RES_160X120 13 | screen_format = CRCGCB 14 | render_hud = false 15 | render_crosshair = false 16 | render_weapon = false 17 | render_decals = false 18 | render_particles = false 19 | window_visible = false 20 | 21 | # Available buttons 22 | available_buttons = 23 | { 24 | MOVE_LEFT 25 | MOVE_RIGHT 26 | } 27 | 28 | # Game variables that will be in the state 29 | available_game_variables = { HEALTH } 30 | 31 | # Change it if you wish. 32 | doom_skill = 4 33 | -------------------------------------------------------------------------------- /slm_lab/env/vizdoom/cfgs/take_cover.wad: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kengz/SLM-Lab/cae945a294cb111ee8e568bb4465ee74c501478b/slm_lab/env/vizdoom/cfgs/take_cover.wad -------------------------------------------------------------------------------- /slm_lab/experiment/__init__.py: -------------------------------------------------------------------------------- 1 | # The experiment module 2 | # Handles experimentation logic: control, analysis 3 | -------------------------------------------------------------------------------- /slm_lab/experiment/retro_analysis.py: -------------------------------------------------------------------------------- 1 | # The retro analysis module 2 | # Runs analysis post-hoc using existing data files 3 | # example: yarn retro_analyze data/reinforce_cartpole_2018_01_22_211751/ 4 | from glob import glob 5 | from slm_lab.experiment import analysis 6 | from slm_lab.lib import logger, util 7 | import os 8 | import pydash as ps 9 | 10 | logger = logger.get_logger(__name__) 11 | 12 | 13 | def retro_analyze_sessions(predir): 14 | '''Retro analyze all sessions''' 15 | logger.info('Running retro_analyze_sessions') 16 | session_spec_paths = glob(f'{predir}/*_s*_spec.json') 17 | for p in session_spec_paths: 18 | _retro_analyze_session(p) 19 | 20 | 21 | def _retro_analyze_session(session_spec_path): 22 | '''Method to retro analyze a single session given only a path to its spec''' 23 | session_spec = util.read(session_spec_path) 24 | info_prepath = session_spec['meta']['info_prepath'] 25 | for df_mode in ('eval', 'train'): 26 | session_df = util.read(f'{info_prepath}_session_df_{df_mode}.csv') 27 | analysis.analyze_session(session_spec, session_df, df_mode) 28 | 29 | 30 | def retro_analyze_trials(predir): 31 | '''Retro analyze all trials''' 32 | logger.info('Running retro_analyze_trials') 33 | session_spec_paths = glob(f'{predir}/*_s*_spec.json') 34 | # remove session spec paths 35 | trial_spec_paths = ps.difference(glob(f'{predir}/*_t*_spec.json'), session_spec_paths) 36 | for p in trial_spec_paths: 37 | _retro_analyze_trial(p) 38 | 39 | 40 | def _retro_analyze_trial(trial_spec_path): 41 | '''Method to retro analyze a single trial given only a path to its spec''' 42 | trial_spec = util.read(trial_spec_path) 43 | meta_spec = trial_spec['meta'] 44 | info_prepath = meta_spec['info_prepath'] 45 | session_metrics_list = [util.read(f'{info_prepath}_s{s}_session_metrics_eval.pkl') for s in range(meta_spec['max_session'])] 46 | analysis.analyze_trial(trial_spec, session_metrics_list) 47 | 48 | 49 | def retro_analyze_experiment(predir): 50 | '''Retro analyze an experiment''' 51 | logger.info('Running retro_analyze_experiment') 52 | if ps.is_empty(glob(f'{predir}/info/*_trial_data_dict.json')): 53 | logger.info('Skipping retro_analyze_experiment since no experiment was ran.') 54 | return # only run analysis if experiment had been ran 55 | trial_spec_paths = glob(f'{predir}/*_t*_spec.json') 56 | # remove trial and session spec paths 57 | experiment_spec_paths = ps.difference(glob(f'{predir}/*_spec.json'), trial_spec_paths) 58 | experiment_spec_path = experiment_spec_paths[0] 59 | spec = util.read(experiment_spec_path) 60 | info_prepath = spec['meta'].get('info_prepath') 61 | trial_data_dict = util.read(f'{info_prepath}_trial_data_dict.json') 62 | analysis.analyze_experiment(spec, trial_data_dict) 63 | 64 | 65 | def retro_analyze(predir): 66 | ''' 67 | Method to analyze experiment/trial from files after it ran. 68 | @example 69 | 70 | yarn retro_analyze data/reinforce_cartpole_2018_01_22_211751/ 71 | ''' 72 | predir = predir.strip('/') # sanitary 73 | os.environ['LOG_PREPATH'] = f'{predir}/log/retro_analyze' # to prevent overwriting log file 74 | logger.info(f'Running retro-analysis on {predir}') 75 | retro_analyze_sessions(predir) 76 | retro_analyze_trials(predir) 77 | retro_analyze_experiment(predir) 78 | logger.info('Finished retro-analysis') 79 | -------------------------------------------------------------------------------- /slm_lab/lib/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kengz/SLM-Lab/cae945a294cb111ee8e568bb4465ee74c501478b/slm_lab/lib/__init__.py -------------------------------------------------------------------------------- /slm_lab/lib/decorator.py: -------------------------------------------------------------------------------- 1 | from functools import wraps 2 | from slm_lab.lib import logger 3 | import time 4 | 5 | logger = logger.get_logger(__name__) 6 | 7 | 8 | def lab_api(fn): 9 | ''' 10 | Function decorator to label and check Lab API methods 11 | @example 12 | 13 | from slm_lab.lib.decorator import lab_api 14 | @lab_api 15 | def foo(): 16 | print('foo') 17 | ''' 18 | return fn 19 | 20 | 21 | def timeit(fn): 22 | ''' 23 | Function decorator to measure execution time 24 | @example 25 | 26 | from slm_lab.lib.decorator import timeit 27 | @timeit 28 | def foo(sec): 29 | time.sleep(sec) 30 | print('foo') 31 | 32 | foo(1) 33 | # => foo 34 | # => Timed: foo 1000.9971ms 35 | ''' 36 | @wraps(fn) 37 | def time_fn(*args, **kwargs): 38 | start = time.time() 39 | output = fn(*args, **kwargs) 40 | end = time.time() 41 | logger.debug(f'Timed: {fn.__name__} {round((end - start) * 1000, 4)}ms') 42 | return output 43 | return time_fn 44 | -------------------------------------------------------------------------------- /slm_lab/lib/logger.py: -------------------------------------------------------------------------------- 1 | from slm_lab.lib import util 2 | import colorlog 3 | import logging 4 | import os 5 | import pandas as pd 6 | import sys 7 | import warnings 8 | 9 | 10 | class FixedList(list): 11 | '''fixed-list to restrict addition to root logger handler''' 12 | 13 | def append(self, e): 14 | pass 15 | 16 | 17 | LOG_FORMAT = '[%(asctime)s PID:%(process)d %(levelname)s %(filename)s %(funcName)s] %(message)s' 18 | color_formatter = colorlog.ColoredFormatter('%(log_color)s[%(asctime)s PID:%(process)d %(levelname)s %(filename)s %(funcName)s]%(reset)s %(message)s') 19 | sh = logging.StreamHandler(sys.stdout) 20 | sh.setFormatter(color_formatter) 21 | lab_logger = logging.getLogger() 22 | lab_logger.handlers = FixedList([sh]) 23 | logging.getLogger('ray').propagate = False # hack to mute poorly designed ray TF warning log 24 | 25 | # this will trigger from Experiment init on reload(logger) 26 | if os.environ.get('LOG_PREPATH') is not None: 27 | warnings.filterwarnings('ignore', category=pd.io.pytables.PerformanceWarning) 28 | 29 | log_filepath = os.environ['LOG_PREPATH'] + '.log' 30 | os.makedirs(os.path.dirname(log_filepath), exist_ok=True) 31 | # create file handler 32 | formatter = logging.Formatter(LOG_FORMAT) 33 | fh = logging.FileHandler(log_filepath) 34 | fh.setFormatter(formatter) 35 | # add stream and file handler 36 | lab_logger.handlers = FixedList([sh, fh]) 37 | 38 | if os.environ.get('LOG_LEVEL'): 39 | lab_logger.setLevel(os.environ['LOG_LEVEL']) 40 | else: 41 | lab_logger.setLevel('INFO') 42 | 43 | 44 | def set_level(lvl): 45 | lab_logger.setLevel(lvl) 46 | os.environ['LOG_LEVEL'] = lvl 47 | 48 | 49 | def critical(msg, *args, **kwargs): 50 | return lab_logger.critical(msg, *args, **kwargs) 51 | 52 | 53 | def debug(msg, *args, **kwargs): 54 | return lab_logger.debug(msg, *args, **kwargs) 55 | 56 | 57 | def error(msg, *args, **kwargs): 58 | return lab_logger.error(msg, *args, **kwargs) 59 | 60 | 61 | def exception(msg, *args, **kwargs): 62 | return lab_logger.exception(msg, *args, **kwargs) 63 | 64 | 65 | def info(msg, *args, **kwargs): 66 | return lab_logger.info(msg, *args, **kwargs) 67 | 68 | 69 | def warning(msg, *args, **kwargs): 70 | return lab_logger.warning(msg, *args, **kwargs) 71 | 72 | 73 | def get_logger(__name__): 74 | '''Create a child logger specific to a module''' 75 | return logging.getLogger(__name__) 76 | 77 | 78 | def toggle_debug(modules, level='DEBUG'): 79 | '''Turn on module-specific debugging using their names, e.g. algorithm, actor_critic, at the desired debug level.''' 80 | logger_names = list(logging.Logger.manager.loggerDict.keys()) 81 | for module in modules: 82 | name = module.strip() 83 | for logger_name in logger_names: 84 | if name in logger_name.split('.'): 85 | module_logger = logging.getLogger(logger_name) 86 | module_logger.setLevel(getattr(logging, level)) 87 | -------------------------------------------------------------------------------- /slm_lab/spec/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kengz/SLM-Lab/cae945a294cb111ee8e568bb4465ee74c501478b/slm_lab/spec/__init__.py -------------------------------------------------------------------------------- /slm_lab/spec/benchmark/a2c/a2c_gae_cartpole.json: -------------------------------------------------------------------------------- 1 | { 2 | "a2c_gae_cartpole": { 3 | "agent": [{ 4 | "name": "A2C", 5 | "algorithm": { 6 | "name": "ActorCritic", 7 | "action_pdtype": "default", 8 | "action_policy": "default", 9 | "explore_var_spec": null, 10 | "gamma": 0.99, 11 | "lam": 0.95, 12 | "num_step_returns": null, 13 | "entropy_coef_spec": { 14 | "name": "no_decay", 15 | "start_val": 0.001, 16 | "end_val": 0.001, 17 | "start_step": 0, 18 | "end_step": 0 19 | }, 20 | "val_loss_coef": 0.5, 21 | "training_frequency": 32 22 | }, 23 | "memory": { 24 | "name": "OnPolicyBatchReplay", 25 | }, 26 | "net": { 27 | "type": "MLPNet", 28 | "shared": false, 29 | "hid_layers": [64], 30 | "hid_layers_activation": "selu", 31 | "init_fn": null, 32 | "normalize": true, 33 | "batch_norm": false, 34 | "clip_grad_val": 0.5, 35 | "use_same_optim": true, 36 | "loss_spec": { 37 | "name": "MSELoss" 38 | }, 39 | "actor_optim_spec": { 40 | "name": "RMSprop", 41 | "lr": 5e-3, 42 | "alpha": 0.99, 43 | "eps": 1e-5 44 | }, 45 | "critic_optim_spec": { 46 | "name": "RMSprop", 47 | "lr": 5e-3, 48 | "alpha": 0.99, 49 | "eps": 1e-5 50 | }, 51 | "lr_scheduler_spec": null, 52 | "gpu": false 53 | } 54 | }], 55 | "env": [{ 56 | "name": "CartPole-v0", 57 | "num_envs": 4, 58 | "max_t": null, 59 | "max_frame": 30000, 60 | }], 61 | "body": { 62 | "product": "outer", 63 | "num": 1 64 | }, 65 | "meta": { 66 | "distributed": false, 67 | "eval_frequency": 500, 68 | "log_frequency": 500, 69 | "max_session": 4, 70 | "max_trial": 1 71 | }, 72 | "search": { 73 | "agent": [{ 74 | "algorithm": { 75 | "lam__grid_search": [0.7, 0.8, 0.9, 0.95], 76 | "training_frequency__grid_search": [16, 32] 77 | }, 78 | "net": { 79 | "actor_optim_spec": { 80 | "lr__grid_search": [5e-2, 1e-2, 5e-3, 1e-3] 81 | } 82 | } 83 | }] 84 | }, 85 | }, 86 | } 87 | -------------------------------------------------------------------------------- /slm_lab/spec/benchmark/a2c/a2c_gae_lunar.json: -------------------------------------------------------------------------------- 1 | { 2 | "a2c_gae_lunar": { 3 | "agent": [{ 4 | "name": "A2C", 5 | "algorithm": { 6 | "name": "ActorCritic", 7 | "action_pdtype": "default", 8 | "action_policy": "default", 9 | "explore_var_spec": null, 10 | "gamma": 0.99, 11 | "lam": 0.95, 12 | "num_step_returns": null, 13 | "entropy_coef_spec": { 14 | "name": "no_decay", 15 | "start_val": 0.01, 16 | "end_val": 0.01, 17 | "start_step": 0, 18 | "end_step": 0 19 | }, 20 | "val_loss_coef": 1.0, 21 | "training_frequency": 128 22 | }, 23 | "memory": { 24 | "name": "OnPolicyBatchReplay", 25 | }, 26 | "net": { 27 | "type": "MLPNet", 28 | "shared": false, 29 | "hid_layers": [64, 64, 32], 30 | "hid_layers_activation": "relu", 31 | "init_fn": "orthogonal_", 32 | "batch_norm": false, 33 | "clip_grad_val": 0.5, 34 | "use_same_optim": false, 35 | "loss_spec": { 36 | "name": "MSELoss" 37 | }, 38 | "actor_optim_spec": { 39 | "name": "Adam", 40 | "lr": 2e-3, 41 | }, 42 | "critic_optim_spec": { 43 | "name": "Adam", 44 | "lr": 2e-3, 45 | }, 46 | "lr_scheduler_spec": null, 47 | "gpu": false 48 | } 49 | }], 50 | "env": [{ 51 | "name": "LunarLander-v2", 52 | "frame_op": "concat", 53 | "frame_op_len": 4, 54 | "max_t": null, 55 | "max_frame": 300000, 56 | "num_envs": 8, 57 | "normalize_state": false 58 | }], 59 | "body": { 60 | "product": "outer", 61 | "num": 1 62 | }, 63 | "meta": { 64 | "distributed": false, 65 | "log_frequency": 1000, 66 | "eval_frequency": 1000, 67 | "max_session": 4, 68 | "max_trial": 1 69 | } 70 | }, 71 | } 72 | -------------------------------------------------------------------------------- /slm_lab/spec/benchmark/a2c/a2c_gae_pong.json: -------------------------------------------------------------------------------- 1 | { 2 | "a2c_gae_pong": { 3 | "agent": [{ 4 | "name": "A2C", 5 | "algorithm": { 6 | "name": "ActorCritic", 7 | "action_pdtype": "default", 8 | "action_policy": "default", 9 | "explore_var_spec": null, 10 | "gamma": 0.99, 11 | "lam": 0.95, 12 | "num_step_returns": null, 13 | "entropy_coef_spec": { 14 | "name": "no_decay", 15 | "start_val": 0.01, 16 | "end_val": 0.01, 17 | "start_step": 0, 18 | "end_step": 0 19 | }, 20 | "val_loss_coef": 0.5, 21 | "training_frequency": 32 22 | }, 23 | "memory": { 24 | "name": "OnPolicyBatchReplay", 25 | }, 26 | "net": { 27 | "type": "ConvNet", 28 | "shared": true, 29 | "conv_hid_layers": [ 30 | [32, 8, 4, 0, 1], 31 | [64, 4, 2, 0, 1], 32 | [32, 3, 1, 0, 1] 33 | ], 34 | "fc_hid_layers": [512], 35 | "hid_layers_activation": "relu", 36 | "init_fn": "orthogonal_", 37 | "normalize": true, 38 | "batch_norm": false, 39 | "clip_grad_val": 0.5, 40 | "use_same_optim": false, 41 | "loss_spec": { 42 | "name": "MSELoss" 43 | }, 44 | "actor_optim_spec": { 45 | "name": "RMSprop", 46 | "lr": 7e-4, 47 | "alpha": 0.99, 48 | "eps": 1e-5 49 | }, 50 | "critic_optim_spec": { 51 | "name": "RMSprop", 52 | "lr": 7e-4, 53 | "alpha": 0.99, 54 | "eps": 1e-5 55 | }, 56 | "lr_scheduler_spec": null, 57 | "gpu": true 58 | } 59 | }], 60 | "env": [{ 61 | "name": "PongNoFrameskip-v4", 62 | "frame_op": "concat", 63 | "frame_op_len": 4, 64 | "reward_scale": "sign", 65 | "num_envs": 16, 66 | "max_t": null, 67 | "max_frame": 1e7 68 | }], 69 | "body": { 70 | "product": "outer", 71 | "num": 1 72 | }, 73 | "meta": { 74 | "distributed": false, 75 | "log_frequency": 10000, 76 | "eval_frequency": 10000, 77 | "max_session": 4, 78 | "max_trial": 1, 79 | } 80 | } 81 | } 82 | -------------------------------------------------------------------------------- /slm_lab/spec/benchmark/a2c/a2c_gae_qbert.json: -------------------------------------------------------------------------------- 1 | { 2 | "a2c_gae_qbert": { 3 | "agent": [{ 4 | "name": "A2C", 5 | "algorithm": { 6 | "name": "ActorCritic", 7 | "action_pdtype": "default", 8 | "action_policy": "default", 9 | "explore_var_spec": null, 10 | "gamma": 0.99, 11 | "lam": 0.95, 12 | "num_step_returns": null, 13 | "entropy_coef_spec": { 14 | "name": "no_decay", 15 | "start_val": 0.01, 16 | "end_val": 0.01, 17 | "start_step": 0, 18 | "end_step": 0 19 | }, 20 | "val_loss_coef": 0.5, 21 | "training_frequency": 32 22 | }, 23 | "memory": { 24 | "name": "OnPolicyBatchReplay", 25 | }, 26 | "net": { 27 | "type": "ConvNet", 28 | "shared": true, 29 | "conv_hid_layers": [ 30 | [32, 8, 4, 0, 1], 31 | [64, 4, 2, 0, 1], 32 | [32, 3, 1, 0, 1] 33 | ], 34 | "fc_hid_layers": [512], 35 | "hid_layers_activation": "relu", 36 | "init_fn": "orthogonal_", 37 | "normalize": true, 38 | "batch_norm": false, 39 | "clip_grad_val": 0.5, 40 | "use_same_optim": false, 41 | "loss_spec": { 42 | "name": "MSELoss" 43 | }, 44 | "actor_optim_spec": { 45 | "name": "RMSprop", 46 | "lr": 7e-4, 47 | "alpha": 0.99, 48 | "eps": 1e-5 49 | }, 50 | "critic_optim_spec": { 51 | "name": "RMSprop", 52 | "lr": 7e-4, 53 | "alpha": 0.99, 54 | "eps": 1e-5 55 | }, 56 | "lr_scheduler_spec": null, 57 | "gpu": true 58 | } 59 | }], 60 | "env": [{ 61 | "name": "QbertNoFrameskip-v4", 62 | "frame_op": "concat", 63 | "frame_op_len": 4, 64 | "reward_scale": "sign", 65 | "num_envs": 16, 66 | "max_t": null, 67 | "max_frame": 1e7 68 | }], 69 | "body": { 70 | "product": "outer", 71 | "num": 1 72 | }, 73 | "meta": { 74 | "distributed": false, 75 | "log_frequency": 10000, 76 | "eval_frequency": 10000, 77 | "rigorous_eval": 0, 78 | "max_session": 4, 79 | "max_trial": 1, 80 | } 81 | } 82 | } 83 | -------------------------------------------------------------------------------- /slm_lab/spec/benchmark/a2c/a2c_nstep_cont.json: -------------------------------------------------------------------------------- 1 | { 2 | "a2c_nstep_bipedalwalker": { 3 | "agent": [{ 4 | "name": "A2C", 5 | "algorithm": { 6 | "name": "ActorCritic", 7 | "action_pdtype": "default", 8 | "action_policy": "default", 9 | "explore_var_spec": null, 10 | "gamma": 0.99, 11 | "lam": null, 12 | "num_step_returns": 5, 13 | "entropy_coef_spec": { 14 | "name": "no_decay", 15 | "start_val": 0.01, 16 | "end_val": 0.01, 17 | "start_step": 0, 18 | "end_step": 0 19 | }, 20 | "val_loss_coef": 0.5, 21 | "training_frequency": 256 22 | }, 23 | "memory": { 24 | "name": "OnPolicyBatchReplay", 25 | }, 26 | "net": { 27 | "type": "MLPNet", 28 | "shared": false, 29 | "hid_layers": [256, 128], 30 | "hid_layers_activation": "relu", 31 | "init_fn": "orthogonal_", 32 | "normalize": true, 33 | "batch_norm": false, 34 | "clip_grad_val": 0.5, 35 | "use_same_optim": false, 36 | "loss_spec": { 37 | "name": "MSELoss" 38 | }, 39 | "actor_optim_spec": { 40 | "name": "Adam", 41 | "lr": 3e-4, 42 | }, 43 | "critic_optim_spec": { 44 | "name": "Adam", 45 | "lr": 3e-4, 46 | }, 47 | "lr_scheduler_spec": null, 48 | "gpu": false 49 | } 50 | }], 51 | "env": [{ 52 | "name": "BipedalWalker-v2", 53 | "num_envs": 32, 54 | "max_t": null, 55 | "max_frame": 4e6 56 | }], 57 | "body": { 58 | "product": "outer", 59 | "num": 1 60 | }, 61 | "meta": { 62 | "distributed": false, 63 | "log_frequency": 10000, 64 | "eval_frequency": 10000, 65 | "max_session": 4, 66 | "max_trial": 1 67 | } 68 | }, 69 | "a2c_nstep_pendulum": { 70 | "agent": [{ 71 | "name": "A2C", 72 | "algorithm": { 73 | "name": "ActorCritic", 74 | "action_pdtype": "default", 75 | "action_policy": "default", 76 | "explore_var_spec": null, 77 | "gamma": 0.99, 78 | "lam": null, 79 | "num_step_returns": 5, 80 | "entropy_coef_spec": { 81 | "name": "no_decay", 82 | "start_val": 0.0, 83 | "end_val": 0.0, 84 | "start_step": 0, 85 | "end_step": 0 86 | }, 87 | "val_loss_coef": 0.5, 88 | "training_frequency": 2048 89 | }, 90 | "memory": { 91 | "name": "OnPolicyBatchReplay", 92 | }, 93 | "net": { 94 | "type": "MLPNet", 95 | "shared": false, 96 | "hid_layers": [64, 64], 97 | "hid_layers_activation": "tanh", 98 | "init_fn": "orthogonal_", 99 | "normalize": false, 100 | "batch_norm": false, 101 | "clip_grad_val": 0.5, 102 | "use_same_optim": false, 103 | "loss_spec": { 104 | "name": "MSELoss" 105 | }, 106 | "actor_optim_spec": { 107 | "name": "Adam", 108 | "lr": 3e-4, 109 | }, 110 | "critic_optim_spec": { 111 | "name": "Adam", 112 | "lr": 3e-4, 113 | }, 114 | "lr_scheduler_spec": { 115 | "name": "LinearToZero", 116 | "frame": 1e6 117 | }, 118 | "gpu": true 119 | } 120 | }], 121 | "env": [{ 122 | "name": "Pendulum-v0", 123 | "num_envs": 8, 124 | "max_t": null, 125 | "max_frame": 1e6 126 | }], 127 | "body": { 128 | "product": "outer", 129 | "num": 1 130 | }, 131 | "meta": { 132 | "distributed": false, 133 | "log_frequency": 20000, 134 | "eval_frequency": 20000, 135 | "max_session": 4, 136 | "max_trial": 1 137 | } 138 | } 139 | } 140 | -------------------------------------------------------------------------------- /slm_lab/spec/benchmark/a2c/a2c_nstep_lunar.json: -------------------------------------------------------------------------------- 1 | { 2 | "a2c_nstep_lunar": { 3 | "agent": [{ 4 | "name": "A2C", 5 | "algorithm": { 6 | "name": "ActorCritic", 7 | "action_pdtype": "default", 8 | "action_policy": "default", 9 | "explore_var_spec": null, 10 | "gamma": 0.99, 11 | "lam": null, 12 | "num_step_returns": 5, 13 | "entropy_coef_spec": { 14 | "name": "no_decay", 15 | "start_val": 0.01, 16 | "end_val": 0.01, 17 | "start_step": 0, 18 | "end_step": 0 19 | }, 20 | "val_loss_coef": 1.0, 21 | "training_frequency": 64 22 | }, 23 | "memory": { 24 | "name": "OnPolicyBatchReplay", 25 | }, 26 | "net": { 27 | "type": "MLPNet", 28 | "shared": false, 29 | "hid_layers": [64, 64, 32], 30 | "hid_layers_activation": "relu", 31 | "init_fn": "orthogonal_", 32 | "batch_norm": false, 33 | "clip_grad_val": 0.5, 34 | "use_same_optim": false, 35 | "loss_spec": { 36 | "name": "MSELoss" 37 | }, 38 | "actor_optim_spec": { 39 | "name": "Adam", 40 | "lr": 2e-3, 41 | }, 42 | "critic_optim_spec": { 43 | "name": "Adam", 44 | "lr": 2e-3, 45 | }, 46 | "lr_scheduler_spec": null, 47 | "gpu": false 48 | } 49 | }], 50 | "env": [{ 51 | "name": "LunarLander-v2", 52 | "frame_op": "concat", 53 | "frame_op_len": 4, 54 | "max_t": null, 55 | "max_frame": 300000, 56 | "num_envs": 8, 57 | "normalize_state": false 58 | }], 59 | "body": { 60 | "product": "outer", 61 | "num": 1 62 | }, 63 | "meta": { 64 | "distributed": false, 65 | "log_frequency": 1000, 66 | "eval_frequency": 1000, 67 | "max_session": 4, 68 | "max_trial": 1 69 | } 70 | }, 71 | } 72 | -------------------------------------------------------------------------------- /slm_lab/spec/benchmark/a2c/a2c_nstep_pong.json: -------------------------------------------------------------------------------- 1 | { 2 | "a2c_nstep_pong": { 3 | "agent": [{ 4 | "name": "A2C", 5 | "algorithm": { 6 | "name": "ActorCritic", 7 | "action_pdtype": "default", 8 | "action_policy": "default", 9 | "explore_var_spec": null, 10 | "gamma": 0.99, 11 | "lam": null, 12 | "num_step_returns": 11, 13 | "entropy_coef_spec": { 14 | "name": "no_decay", 15 | "start_val": 0.01, 16 | "end_val": 0.01, 17 | "start_step": 0, 18 | "end_step": 0 19 | }, 20 | "val_loss_coef": 0.5, 21 | "training_frequency": 5 22 | }, 23 | "memory": { 24 | "name": "OnPolicyBatchReplay" 25 | }, 26 | "net": { 27 | "type": "ConvNet", 28 | "shared": true, 29 | "conv_hid_layers": [ 30 | [32, 8, 4, 0, 1], 31 | [64, 4, 2, 0, 1], 32 | [32, 3, 1, 0, 1] 33 | ], 34 | "fc_hid_layers": [512], 35 | "hid_layers_activation": "relu", 36 | "init_fn": "orthogonal_", 37 | "normalize": true, 38 | "batch_norm": false, 39 | "clip_grad_val": 0.5, 40 | "use_same_optim": false, 41 | "loss_spec": { 42 | "name": "MSELoss" 43 | }, 44 | "actor_optim_spec": { 45 | "name": "RMSprop", 46 | "lr": 7e-4, 47 | "alpha": 0.99, 48 | "eps": 1e-5 49 | }, 50 | "critic_optim_spec": { 51 | "name": "RMSprop", 52 | "lr": 7e-4, 53 | "alpha": 0.99, 54 | "eps": 1e-5 55 | }, 56 | "lr_scheduler_spec": null, 57 | "gpu": true 58 | } 59 | }], 60 | "env": [{ 61 | "name": "PongNoFrameskip-v4", 62 | "frame_op": "concat", 63 | "frame_op_len": 4, 64 | "reward_scale": "sign", 65 | "num_envs": 16, 66 | "max_t": null, 67 | "max_frame": 1e7 68 | }], 69 | "body": { 70 | "product": "outer", 71 | "num": 1, 72 | }, 73 | "meta": { 74 | "distributed": false, 75 | "log_frequency": 10000, 76 | "eval_frequency": 10000, 77 | "max_session": 4, 78 | "max_trial": 1 79 | } 80 | } 81 | } 82 | -------------------------------------------------------------------------------- /slm_lab/spec/benchmark/a2c/a2c_nstep_qbert.json: -------------------------------------------------------------------------------- 1 | { 2 | "a2c_nstep_qbert": { 3 | "agent": [{ 4 | "name": "A2C", 5 | "algorithm": { 6 | "name": "ActorCritic", 7 | "action_pdtype": "default", 8 | "action_policy": "default", 9 | "explore_var_spec": null, 10 | "gamma": 0.99, 11 | "lam": null, 12 | "num_step_returns": 11, 13 | "entropy_coef_spec": { 14 | "name": "no_decay", 15 | "start_val": 0.01, 16 | "end_val": 0.01, 17 | "start_step": 0, 18 | "end_step": 0 19 | }, 20 | "val_loss_coef": 0.5, 21 | "training_frequency": 5 22 | }, 23 | "memory": { 24 | "name": "OnPolicyBatchReplay" 25 | }, 26 | "net": { 27 | "type": "ConvNet", 28 | "shared": true, 29 | "conv_hid_layers": [ 30 | [32, 8, 4, 0, 1], 31 | [64, 4, 2, 0, 1], 32 | [32, 3, 1, 0, 1] 33 | ], 34 | "fc_hid_layers": [512], 35 | "hid_layers_activation": "relu", 36 | "init_fn": "orthogonal_", 37 | "normalize": true, 38 | "batch_norm": false, 39 | "clip_grad_val": 0.5, 40 | "use_same_optim": false, 41 | "loss_spec": { 42 | "name": "MSELoss" 43 | }, 44 | "actor_optim_spec": { 45 | "name": "RMSprop", 46 | "lr": 7e-4, 47 | "alpha": 0.99, 48 | "eps": 1e-5 49 | }, 50 | "critic_optim_spec": { 51 | "name": "RMSprop", 52 | "lr": 7e-4, 53 | "alpha": 0.99, 54 | "eps": 1e-5 55 | }, 56 | "lr_scheduler_spec": null, 57 | "gpu": true 58 | } 59 | }], 60 | "env": [{ 61 | "name": "QbertNoFrameskip-v4", 62 | "frame_op": "concat", 63 | "frame_op_len": 4, 64 | "reward_scale": "sign", 65 | "num_envs": 16, 66 | "max_t": null, 67 | "max_frame": 1e7 68 | }], 69 | "body": { 70 | "product": "outer", 71 | "num": 1, 72 | }, 73 | "meta": { 74 | "distributed": false, 75 | "log_frequency": 10000, 76 | "eval_frequency": 10000, 77 | "rigorous_eval": 0, 78 | "max_session": 4, 79 | "max_trial": 1 80 | } 81 | } 82 | } 83 | -------------------------------------------------------------------------------- /slm_lab/spec/benchmark/async_sac/async_sac_halfcheetah.json: -------------------------------------------------------------------------------- 1 | { 2 | "async_sac_halfcheetah": { 3 | "agent": [{ 4 | "name": "SoftActorCritic", 5 | "algorithm": { 6 | "name": "SoftActorCritic", 7 | "action_pdtype": "default", 8 | "action_policy": "default", 9 | "gamma": 0.99, 10 | "training_frequency": 1 11 | }, 12 | "memory": { 13 | "name": "Replay", 14 | "batch_size": 256, 15 | "max_size": 200000, 16 | "use_cer": true 17 | }, 18 | "net": { 19 | "type": "MLPNet", 20 | "hid_layers": [256, 256], 21 | "hid_layers_activation": "relu", 22 | "init_fn": "orthogonal_", 23 | "clip_grad_val": 1.0, 24 | "loss_spec": { 25 | "name": "MSELoss" 26 | }, 27 | "optim_spec": { 28 | "name": "GlobalAdam", 29 | "lr": 1e-4, 30 | }, 31 | "lr_scheduler_spec": null, 32 | "update_type": "polyak", 33 | "update_frequency": 1, 34 | "polyak_coef": 0.005, 35 | "gpu": false 36 | } 37 | }], 38 | "env": [{ 39 | "name": "RoboschoolHalfCheetah-v1", 40 | "num_envs": 8, 41 | "max_t": null, 42 | "max_frame": 4e6 43 | }], 44 | "body": { 45 | "product": "outer", 46 | "num": 1 47 | }, 48 | "meta": { 49 | "distributed": "shared", 50 | "log_frequency": 1000, 51 | "eval_frequency": 1000, 52 | "rigorous_eval": 0, 53 | "max_session": 6, 54 | "max_trial": 1, 55 | }, 56 | "search": { 57 | "env": [{ 58 | "num_envs__grid_search": [4, 6, 8, 10] 59 | }], 60 | "meta": { 61 | "max_session__grid_search": [4, 6, 8, 10] 62 | } 63 | } 64 | } 65 | } 66 | -------------------------------------------------------------------------------- /slm_lab/spec/benchmark/async_sac/async_sac_halfcheetah_pybullet.json: -------------------------------------------------------------------------------- 1 | { 2 | "async_sac_halfcheetah": { 3 | "agent": [{ 4 | "name": "SoftActorCritic", 5 | "algorithm": { 6 | "name": "SoftActorCritic", 7 | "action_pdtype": "default", 8 | "action_policy": "default", 9 | "gamma": 0.99, 10 | "training_frequency": 1 11 | }, 12 | "memory": { 13 | "name": "Replay", 14 | "batch_size": 256, 15 | "max_size": 200000, 16 | "use_cer": true 17 | }, 18 | "net": { 19 | "type": "MLPNet", 20 | "hid_layers": [256, 256], 21 | "hid_layers_activation": "relu", 22 | "init_fn": "orthogonal_", 23 | "clip_grad_val": 1.0, 24 | "loss_spec": { 25 | "name": "MSELoss" 26 | }, 27 | "optim_spec": { 28 | "name": "GlobalAdam", 29 | "lr": 1e-4 30 | }, 31 | "lr_scheduler_spec": null, 32 | "update_type": "polyak", 33 | "update_frequency": 1, 34 | "polyak_coef": 0.005, 35 | "gpu": true 36 | } 37 | }], 38 | "env": [{ 39 | "name": "HalfCheetahBulletEnv-v0", 40 | "num_envs": 8, 41 | "max_t": null, 42 | "max_frame": 4e6 43 | }], 44 | "body": { 45 | "product": "outer", 46 | "num": 1 47 | }, 48 | "meta": { 49 | "distributed": "shared", 50 | "log_frequency": 1000, 51 | "eval_frequency": 1000, 52 | "rigorous_eval": 0, 53 | "max_session": 6, 54 | "max_trial": 1 55 | }, 56 | "search": { 57 | "env": [{ 58 | "num_envs__grid_search": [4, 6, 8, 10] 59 | }], 60 | "meta": { 61 | "max_session__grid_search": [4, 6, 8, 10] 62 | } 63 | } 64 | } 65 | } 66 | -------------------------------------------------------------------------------- /slm_lab/spec/benchmark/async_sac/async_sac_lunar.json: -------------------------------------------------------------------------------- 1 | { 2 | "async_sac_lunar": { 3 | "agent": [{ 4 | "name": "SoftActorCritic", 5 | "algorithm": { 6 | "name": "SoftActorCritic", 7 | "action_pdtype": "GumbelSoftmax", 8 | "action_policy": "default", 9 | "gamma": 0.99, 10 | "training_frequency": 1 11 | }, 12 | "memory": { 13 | "name": "Replay", 14 | "batch_size": 256, 15 | "max_size": 200000, 16 | "use_cer": true 17 | }, 18 | "net": { 19 | "type": "MLPNet", 20 | "hid_layers": [64, 64, 32], 21 | "hid_layers_activation": "relu", 22 | "init_fn": "orthogonal_", 23 | "clip_grad_val": 0.5, 24 | "loss_spec": { 25 | "name": "MSELoss" 26 | }, 27 | "optim_spec": { 28 | "name": "GlobalAdam", 29 | "lr": 1e-3, 30 | }, 31 | "lr_scheduler_spec": null, 32 | "update_type": "polyak", 33 | "update_frequency": 1, 34 | "polyak_coef": 0.005, 35 | "gpu": false 36 | } 37 | }], 38 | "env": [{ 39 | "name": "LunarLander-v2", 40 | "frame_op": "concat", 41 | "frame_op_len": 4, 42 | "max_t": null, 43 | "max_frame": 8e5, 44 | "num_envs": 8, 45 | "normalize_state": false 46 | }], 47 | "body": { 48 | "product": "outer", 49 | "num": 1 50 | }, 51 | "meta": { 52 | "distributed": "shared", 53 | "log_frequency": 500, 54 | "eval_frequency": 500, 55 | "rigorous_eval": 0, 56 | "max_session": 6, 57 | "max_trial": 1, 58 | }, 59 | "search": { 60 | "env": [{ 61 | "num_envs__grid_search": [4, 6, 8, 10] 62 | }], 63 | "meta": { 64 | "max_session__grid_search": [4, 6, 8, 10] 65 | } 66 | } 67 | } 68 | } 69 | -------------------------------------------------------------------------------- /slm_lab/spec/benchmark/async_sac/async_sac_pong.json: -------------------------------------------------------------------------------- 1 | { 2 | "async_sac_pong": { 3 | "agent": [{ 4 | "name": "SoftActorCritic", 5 | "algorithm": { 6 | "name": "SoftActorCritic", 7 | "action_pdtype": "GumbelSoftmax", 8 | "action_policy": "default", 9 | "gamma": 0.99, 10 | "training_start_step": 1000, 11 | "training_frequency": 4 12 | }, 13 | "memory": { 14 | "name": "Replay", 15 | "batch_size": 256, 16 | "max_size": 200000, 17 | "use_cer": false 18 | }, 19 | "net": { 20 | "type": "ConvNet", 21 | "shared": false, 22 | "conv_hid_layers": [ 23 | [32, 8, 4, 0, 1], 24 | [64, 4, 2, 0, 1], 25 | [32, 3, 1, 0, 1] 26 | ], 27 | "fc_hid_layers": [256, 256], 28 | "hid_layers_activation": "leakyrelu", 29 | "init_fn": "orthogonal_", 30 | "normalize": true, 31 | "batch_norm": false, 32 | "clip_grad_val": 0.5, 33 | "use_same_optim": false, 34 | "loss_spec": { 35 | "name": "SmoothL1Loss" 36 | }, 37 | "optim_spec": { 38 | "name": "GlobalAdam", 39 | "lr": 1e-4, 40 | }, 41 | "lr_scheduler_spec": null, 42 | "update_type": "polyak", 43 | "update_frequency": 1, 44 | "polyak_coef": 0.005, 45 | "gpu": true 46 | } 47 | }], 48 | "env": [{ 49 | "name": "PongNoFrameskip-v4", 50 | "frame_op": "concat", 51 | "frame_op_len": 4, 52 | "image_downsize": [64, 64], 53 | "reward_scale": "sign", 54 | "num_envs": 4, 55 | "max_t": null, 56 | "max_frame": 5e6 57 | }], 58 | "body": { 59 | "product": "outer", 60 | "num": 1 61 | }, 62 | "meta": { 63 | "distributed": "shared", 64 | "log_frequency": 1000, 65 | "eval_frequency": 1000, 66 | "rigorous_eval": 0, 67 | "max_session": 6, 68 | "max_trial": 1, 69 | } 70 | } 71 | } 72 | -------------------------------------------------------------------------------- /slm_lab/spec/benchmark/dppo/dppo_atari.json: -------------------------------------------------------------------------------- 1 | { 2 | "dppo_atari": { 3 | "agent": [{ 4 | "name": "PPO", 5 | "algorithm": { 6 | "name": "PPO", 7 | "action_pdtype": "default", 8 | "action_policy": "default", 9 | "explore_var_spec": null, 10 | "gamma": 0.99, 11 | "lam": 0.95, 12 | "clip_eps_spec": { 13 | "name": "no_decay", 14 | "start_val": 0.10, 15 | "end_val": 0.10, 16 | "start_step": 0, 17 | "end_step": 0 18 | }, 19 | "entropy_coef_spec": { 20 | "name": "no_decay", 21 | "start_val": 0.01, 22 | "end_val": 0.01, 23 | "start_step": 0, 24 | "end_step": 0 25 | }, 26 | "val_loss_coef": 0.5, 27 | "time_horizon": 128, 28 | "minibatch_size": 256, 29 | "training_epoch": 4 30 | }, 31 | "memory": { 32 | "name": "OnPolicyBatchReplay", 33 | }, 34 | "net": { 35 | "type": "ConvNet", 36 | "shared": true, 37 | "conv_hid_layers": [ 38 | [32, 8, 4, 0, 1], 39 | [64, 4, 2, 0, 1], 40 | [32, 3, 1, 0, 1] 41 | ], 42 | "fc_hid_layers": [512], 43 | "hid_layers_activation": "relu", 44 | "init_fn": "orthogonal_", 45 | "normalize": true, 46 | "batch_norm": false, 47 | "clip_grad_val": 0.5, 48 | "use_same_optim": false, 49 | "loss_spec": { 50 | "name": "MSELoss" 51 | }, 52 | "actor_optim_spec": { 53 | "name": "GlobalAdam", 54 | "lr": 1e-4 55 | }, 56 | "critic_optim_spec": { 57 | "name": "GlobalAdam", 58 | "lr": 1e-4 59 | }, 60 | "lr_scheduler_spec": null, 61 | "gpu": false 62 | } 63 | }], 64 | "env": [{ 65 | "name": "${env}", 66 | "frame_op": "concat", 67 | "frame_op_len": 4, 68 | "reward_scale": "sign", 69 | "num_envs": 8, 70 | "max_t": null, 71 | "max_frame": 1e7 72 | }], 73 | "body": { 74 | "product": "outer", 75 | "num": 1 76 | }, 77 | "meta": { 78 | "distributed": "synced", 79 | "log_frequency": 10000, 80 | "eval_frequency": 10000, 81 | "max_session": 16, 82 | "max_trial": 1, 83 | }, 84 | "spec_params": { 85 | "env": [ 86 | "BreakoutNoFrameskip-v4", "PongNoFrameskip-v4", "QbertNoFrameskip-v4", "SeaquestNoFrameskip-v4" 87 | ] 88 | } 89 | } 90 | } 91 | -------------------------------------------------------------------------------- /slm_lab/spec/benchmark/dppo/dppo_pong.json: -------------------------------------------------------------------------------- 1 | { 2 | "dppo_pong": { 3 | "agent": [{ 4 | "name": "PPO", 5 | "algorithm": { 6 | "name": "PPO", 7 | "action_pdtype": "default", 8 | "action_policy": "default", 9 | "explore_var_spec": null, 10 | "gamma": 0.99, 11 | "lam": 0.95, 12 | "clip_eps_spec": { 13 | "name": "no_decay", 14 | "start_val": 0.10, 15 | "end_val": 0.10, 16 | "start_step": 0, 17 | "end_step": 0 18 | }, 19 | "entropy_coef_spec": { 20 | "name": "no_decay", 21 | "start_val": 0.01, 22 | "end_val": 0.01, 23 | "start_step": 0, 24 | "end_step": 0 25 | }, 26 | "val_loss_coef": 0.5, 27 | "time_horizon": 128, 28 | "minibatch_size": 256, 29 | "training_epoch": 4 30 | }, 31 | "memory": { 32 | "name": "OnPolicyBatchReplay", 33 | }, 34 | "net": { 35 | "type": "ConvNet", 36 | "shared": true, 37 | "conv_hid_layers": [ 38 | [32, 8, 4, 0, 1], 39 | [64, 4, 2, 0, 1], 40 | [32, 3, 1, 0, 1] 41 | ], 42 | "fc_hid_layers": [512], 43 | "hid_layers_activation": "relu", 44 | "init_fn": "orthogonal_", 45 | "normalize": true, 46 | "batch_norm": false, 47 | "clip_grad_val": 0.5, 48 | "use_same_optim": false, 49 | "loss_spec": { 50 | "name": "MSELoss" 51 | }, 52 | "actor_optim_spec": { 53 | "name": "GlobalAdam", 54 | "lr": 7e-4, 55 | }, 56 | "critic_optim_spec": { 57 | "name": "GlobalAdam", 58 | "lr": 7e-4, 59 | }, 60 | "lr_scheduler_spec": null, 61 | "gpu": false 62 | } 63 | }], 64 | "env": [{ 65 | "name": "PongNoFrameskip-v4", 66 | "frame_op": "concat", 67 | "frame_op_len": 4, 68 | "reward_scale": "sign", 69 | "num_envs": 8, 70 | "max_t": null, 71 | "max_frame": 1e7 72 | }], 73 | "body": { 74 | "product": "outer", 75 | "num": 1 76 | }, 77 | "meta": { 78 | "distributed": "synced", 79 | "log_frequency": 10000, 80 | "eval_frequency": 10000, 81 | "max_session": 16, 82 | "max_trial": 1, 83 | } 84 | } 85 | } 86 | -------------------------------------------------------------------------------- /slm_lab/spec/benchmark/dqn/ddqn_atari.json: -------------------------------------------------------------------------------- 1 | { 2 | "ddqn_atari": { 3 | "agent": [{ 4 | "name": "DoubleDQN", 5 | "algorithm": { 6 | "name": "DoubleDQN", 7 | "action_pdtype": "Argmax", 8 | "action_policy": "epsilon_greedy", 9 | "explore_var_spec": { 10 | "name": "linear_decay", 11 | "start_val": 1.0, 12 | "end_val": 0.01, 13 | "start_step": 10000, 14 | "end_step": 1000000 15 | }, 16 | "gamma": 0.99, 17 | "training_batch_iter": 1, 18 | "training_iter": 4, 19 | "training_frequency": 4, 20 | "training_start_step": 10000 21 | }, 22 | "memory": { 23 | "name": "Replay", 24 | "batch_size": 32, 25 | "max_size": 200000, 26 | "use_cer": false, 27 | }, 28 | "net": { 29 | "type": "ConvNet", 30 | "conv_hid_layers": [ 31 | [32, 8, 4, 0, 1], 32 | [64, 4, 2, 0, 1], 33 | [64, 3, 1, 0, 1] 34 | ], 35 | "fc_hid_layers": [256], 36 | "hid_layers_activation": "relu", 37 | "init_fn": null, 38 | "batch_norm": false, 39 | "clip_grad_val": 10.0, 40 | "loss_spec": { 41 | "name": "SmoothL1Loss" 42 | }, 43 | "optim_spec": { 44 | "name": "Adam", 45 | "lr": 1e-4, 46 | }, 47 | "lr_scheduler_spec": null, 48 | "update_type": "replace", 49 | "update_frequency": 1000, 50 | "gpu": true 51 | } 52 | }], 53 | "env": [{ 54 | "name": "${env}", 55 | "frame_op": "concat", 56 | "frame_op_len": 4, 57 | "reward_scale": "sign", 58 | "num_envs": 16, 59 | "max_t": null, 60 | "max_frame": 1e7 61 | }], 62 | "body": { 63 | "product": "outer", 64 | "num": 1 65 | }, 66 | "meta": { 67 | "distributed": false, 68 | "eval_frequency": 10000, 69 | "log_frequency": 10000, 70 | "rigorous_eval": 0, 71 | "max_session": 4, 72 | "max_trial": 1 73 | }, 74 | "spec_params": { 75 | "env": [ 76 | "BreakoutNoFrameskip-v4", "PongNoFrameskip-v4", "QbertNoFrameskip-v4", "SeaquestNoFrameskip-v4" 77 | ] 78 | } 79 | } 80 | } 81 | -------------------------------------------------------------------------------- /slm_lab/spec/benchmark/dqn/ddqn_per_lunar.json: -------------------------------------------------------------------------------- 1 | { 2 | "ddqn_per_concat_lunar": { 3 | "agent": [{ 4 | "name": "DoubleDQN", 5 | "algorithm": { 6 | "name": "DoubleDQN", 7 | "action_pdtype": "Argmax", 8 | "action_policy": "epsilon_greedy", 9 | "explore_var_spec": { 10 | "name": "linear_decay", 11 | "start_val": 1.0, 12 | "end_val": 0.01, 13 | "start_step": 0, 14 | "end_step": 50000 15 | }, 16 | "gamma": 0.99, 17 | "training_batch_iter": 1, 18 | "training_iter": 1, 19 | "training_frequency": 1, 20 | "training_start_step": 32 21 | }, 22 | "memory": { 23 | "name": "PrioritizedReplay", 24 | "alpha": 0.6, 25 | "epsilon": 0.0001, 26 | "batch_size": 32, 27 | "max_size": 50000, 28 | "use_cer": false, 29 | }, 30 | "net": { 31 | "type": "MLPNet", 32 | "hid_layers": [256, 128], 33 | "hid_layers_activation": "relu", 34 | "clip_grad_val": 10.0, 35 | "loss_spec": { 36 | "name": "SmoothL1Loss" 37 | }, 38 | "optim_spec": { 39 | "name": "Adam", 40 | "lr": 2.5e-4 41 | }, 42 | "lr_scheduler_spec": null, 43 | "update_type": "replace", 44 | "update_frequency": 100, 45 | "gpu": false 46 | } 47 | }], 48 | "env": [{ 49 | "name": "LunarLander-v2", 50 | "frame_op": "concat", 51 | "frame_op_len": 4, 52 | "max_t": null, 53 | "max_frame": 300000, 54 | "normalize_state": false 55 | }], 56 | "body": { 57 | "product": "outer", 58 | "num": 1 59 | }, 60 | "meta": { 61 | "distributed": false, 62 | "log_frequency": 1000, 63 | "eval_frequency": 1000, 64 | "max_session": 4, 65 | "max_trial": 1, 66 | "search": "RandomSearch", 67 | }, 68 | } 69 | } 70 | -------------------------------------------------------------------------------- /slm_lab/spec/benchmark/dqn/ddqn_per_pong.json: -------------------------------------------------------------------------------- 1 | { 2 | "ddqn_per_pong": { 3 | "agent": [{ 4 | "name": "DoubleDQN", 5 | "algorithm": { 6 | "name": "DoubleDQN", 7 | "action_pdtype": "Argmax", 8 | "action_policy": "epsilon_greedy", 9 | "explore_var_spec": { 10 | "name": "linear_decay", 11 | "start_val": 1.0, 12 | "end_val": 0.01, 13 | "start_step": 10000, 14 | "end_step": 1000000 15 | }, 16 | "gamma": 0.99, 17 | "training_batch_iter": 1, 18 | "training_iter": 4, 19 | "training_frequency": 4, 20 | "training_start_step": 10000 21 | }, 22 | "memory": { 23 | "name": "PrioritizedReplay", 24 | "alpha": 0.6, 25 | "epsilon": 0.0001, 26 | "batch_size": 32, 27 | "max_size": 200000, 28 | "use_cer": false, 29 | }, 30 | "net": { 31 | "type": "ConvNet", 32 | "conv_hid_layers": [ 33 | [32, 8, 4, 0, 1], 34 | [64, 4, 2, 0, 1], 35 | [64, 3, 1, 0, 1] 36 | ], 37 | "fc_hid_layers": [256], 38 | "hid_layers_activation": "relu", 39 | "init_fn": null, 40 | "batch_norm": false, 41 | "clip_grad_val": 10.0, 42 | "loss_spec": { 43 | "name": "SmoothL1Loss" 44 | }, 45 | "optim_spec": { 46 | "name": "Adam", 47 | "lr": 2.5e-5, 48 | }, 49 | "lr_scheduler_spec": null, 50 | "update_type": "replace", 51 | "update_frequency": 1000, 52 | "gpu": true 53 | } 54 | }], 55 | "env": [{ 56 | "name": "PongNoFrameskip-v4", 57 | "frame_op": "concat", 58 | "frame_op_len": 4, 59 | "reward_scale": "sign", 60 | "num_envs": 16, 61 | "max_t": null, 62 | "max_frame": 4e6 63 | }], 64 | "body": { 65 | "product": "outer", 66 | "num": 1 67 | }, 68 | "meta": { 69 | "distributed": false, 70 | "eval_frequency": 10000, 71 | "log_frequency": 10000, 72 | "max_session": 4, 73 | "max_trial": 1 74 | } 75 | } 76 | } 77 | -------------------------------------------------------------------------------- /slm_lab/spec/benchmark/dqn/ddqn_per_qbert.json: -------------------------------------------------------------------------------- 1 | { 2 | "ddqn_per_qbert": { 3 | "agent": [{ 4 | "name": "DoubleDQN", 5 | "algorithm": { 6 | "name": "DoubleDQN", 7 | "action_pdtype": "Argmax", 8 | "action_policy": "epsilon_greedy", 9 | "explore_var_spec": { 10 | "name": "linear_decay", 11 | "start_val": 1.0, 12 | "end_val": 0.01, 13 | "start_step": 10000, 14 | "end_step": 1000000 15 | }, 16 | "gamma": 0.99, 17 | "training_batch_iter": 1, 18 | "training_iter": 4, 19 | "training_frequency": 4, 20 | "training_start_step": 10000 21 | }, 22 | "memory": { 23 | "name": "PrioritizedReplay", 24 | "alpha": 0.6, 25 | "epsilon": 0.0001, 26 | "batch_size": 32, 27 | "max_size": 200000, 28 | "use_cer": false, 29 | }, 30 | "net": { 31 | "type": "ConvNet", 32 | "conv_hid_layers": [ 33 | [32, 8, 4, 0, 1], 34 | [64, 4, 2, 0, 1], 35 | [64, 3, 1, 0, 1] 36 | ], 37 | "fc_hid_layers": [256], 38 | "hid_layers_activation": "relu", 39 | "init_fn": null, 40 | "batch_norm": false, 41 | "clip_grad_val": 10.0, 42 | "loss_spec": { 43 | "name": "SmoothL1Loss" 44 | }, 45 | "optim_spec": { 46 | "name": "Adam", 47 | "lr": 2.5e-5, 48 | }, 49 | "lr_scheduler_spec": null, 50 | "update_type": "replace", 51 | "update_frequency": 1000, 52 | "gpu": true 53 | } 54 | }], 55 | "env": [{ 56 | "name": "QbertNoFrameskip-v4", 57 | "frame_op": "concat", 58 | "frame_op_len": 4, 59 | "reward_scale": "sign", 60 | "num_envs": 16, 61 | "max_t": null, 62 | "max_frame": 4e6 63 | }], 64 | "body": { 65 | "product": "outer", 66 | "num": 1 67 | }, 68 | "meta": { 69 | "distributed": false, 70 | "eval_frequency": 10000, 71 | "log_frequency": 10000, 72 | "rigorous_eval": 0, 73 | "max_session": 4, 74 | "max_trial": 1 75 | } 76 | } 77 | } 78 | -------------------------------------------------------------------------------- /slm_lab/spec/benchmark/dqn/ddqn_pong.json: -------------------------------------------------------------------------------- 1 | { 2 | "ddqn_pong": { 3 | "agent": [{ 4 | "name": "DoubleDQN", 5 | "algorithm": { 6 | "name": "DoubleDQN", 7 | "action_pdtype": "Argmax", 8 | "action_policy": "epsilon_greedy", 9 | "explore_var_spec": { 10 | "name": "linear_decay", 11 | "start_val": 1.0, 12 | "end_val": 0.01, 13 | "start_step": 10000, 14 | "end_step": 1000000 15 | }, 16 | "gamma": 0.99, 17 | "training_batch_iter": 1, 18 | "training_iter": 4, 19 | "training_frequency": 4, 20 | "training_start_step": 10000 21 | }, 22 | "memory": { 23 | "name": "Replay", 24 | "batch_size": 32, 25 | "max_size": 200000, 26 | "use_cer": false, 27 | }, 28 | "net": { 29 | "type": "ConvNet", 30 | "conv_hid_layers": [ 31 | [32, 8, 4, 0, 1], 32 | [64, 4, 2, 0, 1], 33 | [64, 3, 1, 0, 1] 34 | ], 35 | "fc_hid_layers": [256], 36 | "hid_layers_activation": "relu", 37 | "init_fn": null, 38 | "batch_norm": false, 39 | "clip_grad_val": 10.0, 40 | "loss_spec": { 41 | "name": "SmoothL1Loss" 42 | }, 43 | "optim_spec": { 44 | "name": "Adam", 45 | "lr": 1e-4, 46 | }, 47 | "lr_scheduler_spec": null, 48 | "update_type": "replace", 49 | "update_frequency": 1000, 50 | "gpu": true 51 | } 52 | }], 53 | "env": [{ 54 | "name": "PongNoFrameskip-v4", 55 | "frame_op": "concat", 56 | "frame_op_len": 4, 57 | "reward_scale": "sign", 58 | "num_envs": 16, 59 | "max_t": null, 60 | "max_frame": 4e6 61 | }], 62 | "body": { 63 | "product": "outer", 64 | "num": 1 65 | }, 66 | "meta": { 67 | "distributed": false, 68 | "eval_frequency": 10000, 69 | "log_frequency": 10000, 70 | "max_session": 4, 71 | "max_trial": 1 72 | } 73 | } 74 | } 75 | -------------------------------------------------------------------------------- /slm_lab/spec/benchmark/dqn/ddqn_qbert.json: -------------------------------------------------------------------------------- 1 | { 2 | "ddqn_qbert": { 3 | "agent": [{ 4 | "name": "DoubleDQN", 5 | "algorithm": { 6 | "name": "DoubleDQN", 7 | "action_pdtype": "Argmax", 8 | "action_policy": "epsilon_greedy", 9 | "explore_var_spec": { 10 | "name": "linear_decay", 11 | "start_val": 1.0, 12 | "end_val": 0.01, 13 | "start_step": 10000, 14 | "end_step": 1000000 15 | }, 16 | "gamma": 0.99, 17 | "training_batch_iter": 1, 18 | "training_iter": 4, 19 | "training_frequency": 4, 20 | "training_start_step": 10000 21 | }, 22 | "memory": { 23 | "name": "Replay", 24 | "batch_size": 32, 25 | "max_size": 200000, 26 | "use_cer": false, 27 | }, 28 | "net": { 29 | "type": "ConvNet", 30 | "conv_hid_layers": [ 31 | [32, 8, 4, 0, 1], 32 | [64, 4, 2, 0, 1], 33 | [64, 3, 1, 0, 1] 34 | ], 35 | "fc_hid_layers": [256], 36 | "hid_layers_activation": "relu", 37 | "init_fn": null, 38 | "batch_norm": false, 39 | "clip_grad_val": 10.0, 40 | "loss_spec": { 41 | "name": "SmoothL1Loss" 42 | }, 43 | "optim_spec": { 44 | "name": "Adam", 45 | "lr": 1e-4, 46 | }, 47 | "lr_scheduler_spec": null, 48 | "update_type": "replace", 49 | "update_frequency": 1000, 50 | "gpu": true 51 | } 52 | }], 53 | "env": [{ 54 | "name": "QbertNoFrameskip-v4", 55 | "frame_op": "concat", 56 | "frame_op_len": 4, 57 | "reward_scale": "sign", 58 | "num_envs": 16, 59 | "max_t": null, 60 | "max_frame": 4e6 61 | }], 62 | "body": { 63 | "product": "outer", 64 | "num": 1 65 | }, 66 | "meta": { 67 | "distributed": false, 68 | "eval_frequency": 10000, 69 | "log_frequency": 10000, 70 | "rigorous_eval": 0, 71 | "max_session": 4, 72 | "max_trial": 1 73 | } 74 | } 75 | } 76 | -------------------------------------------------------------------------------- /slm_lab/spec/benchmark/dqn/ddqn_videopinball.json: -------------------------------------------------------------------------------- 1 | { 2 | "ddqn_videopinball": { 3 | "agent": [{ 4 | "name": "DoubleDQN", 5 | "algorithm": { 6 | "name": "DoubleDQN", 7 | "action_pdtype": "Argmax", 8 | "action_policy": "epsilon_greedy", 9 | "explore_var_spec": { 10 | "name": "linear_decay", 11 | "start_val": 1.0, 12 | "end_val": 0.01, 13 | "start_step": 1000, 14 | "end_step": 100000 15 | }, 16 | "gamma": 0.99, 17 | "training_batch_iter": 1, 18 | "training_iter": 4, 19 | "training_frequency": 4, 20 | "training_start_step": 100 21 | }, 22 | "memory": { 23 | "name": "PrioritizedReplay", 24 | "alpha": 0.6, 25 | "epsilon": 0.0001, 26 | "batch_size": 64, 27 | "max_size": 200000, 28 | "use_cer": false 29 | }, 30 | "net": { 31 | "type": "ConvNet", 32 | "conv_hid_layers": [ 33 | [32, 8, 4, 0, 1], 34 | [64, 4, 2, 0, 1], 35 | [32, 3, 1, 0, 1] 36 | ], 37 | "fc_hid_layers": [512], 38 | "hid_layers_activation": "relu", 39 | "init_fn": null, 40 | "shared": true, 41 | "batch_norm": false, 42 | "clip_grad_val": 10.0, 43 | "loss_spec": { 44 | "name": "SmoothL1Loss" 45 | }, 46 | "optim_spec": { 47 | "name": "Adam", 48 | "lr": 2.5e-5 49 | }, 50 | "lr_scheduler_spec": null, 51 | "update_frequency": 100, 52 | "gpu": true 53 | } 54 | }], 55 | "env": [{ 56 | "name": "VideoPinball-v0", 57 | "frame_op": "concat", 58 | "frame_op_len": 4, 59 | "reward_scale": "sign", 60 | "num_envs": 16, 61 | "max_t": null, 62 | "max_frame": 850000 63 | }], 64 | "body": { 65 | "product": "outer", 66 | "num": 1 67 | }, 68 | "meta": { 69 | "distributed": false, 70 | "eval_frequency": 10000, 71 | "log_frequency": 10000, 72 | "rigorous_eval": 0, 73 | "max_trial": 1, 74 | "max_session": 1 75 | } 76 | } 77 | } 78 | -------------------------------------------------------------------------------- /slm_lab/spec/benchmark/dqn/dqn_lunar.json: -------------------------------------------------------------------------------- 1 | { 2 | "dqn_concat_lunar": { 3 | "agent": [{ 4 | "name": "DQN", 5 | "algorithm": { 6 | "name": "DQN", 7 | "action_pdtype": "Argmax", 8 | "action_policy": "epsilon_greedy", 9 | "explore_var_spec": { 10 | "name": "linear_decay", 11 | "start_val": 1.0, 12 | "end_val": 0.01, 13 | "start_step": 0, 14 | "end_step": 50000 15 | }, 16 | "gamma": 0.99, 17 | "training_batch_iter": 1, 18 | "training_iter": 1, 19 | "training_frequency": 1, 20 | "training_start_step": 32 21 | }, 22 | "memory": { 23 | "name": "Replay", 24 | "batch_size": 32, 25 | "max_size": 50000, 26 | "use_cer": false 27 | }, 28 | "net": { 29 | "type": "MLPNet", 30 | "hid_layers": [256, 128], 31 | "hid_layers_activation": "relu", 32 | "clip_grad_val": 10.0, 33 | "loss_spec": { 34 | "name": "SmoothL1Loss" 35 | }, 36 | "optim_spec": { 37 | "name": "Adam", 38 | "lr": 1e-3 39 | }, 40 | "lr_scheduler_spec": null, 41 | "update_type": "replace", 42 | "update_frequency": 100, 43 | "gpu": false 44 | } 45 | }], 46 | "env": [{ 47 | "name": "LunarLander-v2", 48 | "frame_op": "concat", 49 | "frame_op_len": 4, 50 | "max_t": null, 51 | "max_frame": 300000, 52 | "normalize_state": false 53 | }], 54 | "body": { 55 | "product": "outer", 56 | "num": 1 57 | }, 58 | "meta": { 59 | "distributed": false, 60 | "log_frequency": 1000, 61 | "eval_frequency": 1000, 62 | "max_session": 4, 63 | "max_trial": 1, 64 | "search": "RandomSearch", 65 | }, 66 | } 67 | } 68 | -------------------------------------------------------------------------------- /slm_lab/spec/benchmark/dqn/dqn_per_atari.json: -------------------------------------------------------------------------------- 1 | { 2 | "dqn_per_atari": { 3 | "agent": [{ 4 | "name": "DQN", 5 | "algorithm": { 6 | "name": "DQN", 7 | "action_pdtype": "Argmax", 8 | "action_policy": "epsilon_greedy", 9 | "explore_var_spec": { 10 | "name": "linear_decay", 11 | "start_val": 1.0, 12 | "end_val": 0.01, 13 | "start_step": 10000, 14 | "end_step": 1000000 15 | }, 16 | "gamma": 0.99, 17 | "training_batch_iter": 1, 18 | "training_iter": 4, 19 | "training_frequency": 4, 20 | "training_start_step": 10000 21 | }, 22 | "memory": { 23 | "name": "PrioritizedReplay", 24 | "alpha": 0.6, 25 | "epsilon": 0.0001, 26 | "batch_size": 32, 27 | "max_size": 200000, 28 | "use_cer": false 29 | }, 30 | "net": { 31 | "type": "ConvNet", 32 | "conv_hid_layers": [ 33 | [32, 8, 4, 0, 1], 34 | [64, 4, 2, 0, 1], 35 | [64, 3, 1, 0, 1] 36 | ], 37 | "fc_hid_layers": [256], 38 | "hid_layers_activation": "relu", 39 | "init_fn": null, 40 | "batch_norm": false, 41 | "clip_grad_val": 10.0, 42 | "loss_spec": { 43 | "name": "SmoothL1Loss" 44 | }, 45 | "optim_spec": { 46 | "name": "Adam", 47 | "lr": 2.5e-5, 48 | }, 49 | "lr_scheduler_spec": null, 50 | "update_type": "replace", 51 | "update_frequency": 1000, 52 | "gpu": true 53 | } 54 | }], 55 | "env": [{ 56 | "name": "${env}", 57 | "frame_op": "concat", 58 | "frame_op_len": 4, 59 | "reward_scale": "sign", 60 | "num_envs": 16, 61 | "max_t": null, 62 | "max_frame": 1e7 63 | }], 64 | "body": { 65 | "product": "outer", 66 | "num": 1 67 | }, 68 | "meta": { 69 | "distributed": false, 70 | "eval_frequency": 10000, 71 | "log_frequency": 10000, 72 | "rigorous_eval": 0, 73 | "max_session": 4, 74 | "max_trial": 1 75 | }, 76 | "spec_params": { 77 | "env": [ 78 | "BreakoutNoFrameskip-v4", "PongNoFrameskip-v4", "QbertNoFrameskip-v4", "SeaquestNoFrameskip-v4" 79 | ] 80 | } 81 | }, 82 | } 83 | -------------------------------------------------------------------------------- /slm_lab/spec/benchmark/dqn/dqn_per_pong.json: -------------------------------------------------------------------------------- 1 | { 2 | "dqn_per_pong": { 3 | "agent": [{ 4 | "name": "DQN", 5 | "algorithm": { 6 | "name": "DQN", 7 | "action_pdtype": "Argmax", 8 | "action_policy": "epsilon_greedy", 9 | "explore_var_spec": { 10 | "name": "linear_decay", 11 | "start_val": 1.0, 12 | "end_val": 0.01, 13 | "start_step": 10000, 14 | "end_step": 1000000 15 | }, 16 | "gamma": 0.99, 17 | "training_batch_iter": 1, 18 | "training_iter": 4, 19 | "training_frequency": 4, 20 | "training_start_step": 10000 21 | }, 22 | "memory": { 23 | "name": "PrioritizedReplay", 24 | "alpha": 0.6, 25 | "epsilon": 0.0001, 26 | "batch_size": 32, 27 | "max_size": 200000, 28 | "use_cer": false 29 | }, 30 | "net": { 31 | "type": "ConvNet", 32 | "conv_hid_layers": [ 33 | [32, 8, 4, 0, 1], 34 | [64, 4, 2, 0, 1], 35 | [64, 3, 1, 0, 1] 36 | ], 37 | "fc_hid_layers": [256], 38 | "hid_layers_activation": "relu", 39 | "init_fn": null, 40 | "batch_norm": false, 41 | "clip_grad_val": 10.0, 42 | "loss_spec": { 43 | "name": "SmoothL1Loss" 44 | }, 45 | "optim_spec": { 46 | "name": "Adam", 47 | "lr": 2.5e-5, 48 | }, 49 | "lr_scheduler_spec": null, 50 | "update_type": "replace", 51 | "update_frequency": 1000, 52 | "gpu": true 53 | } 54 | }], 55 | "env": [{ 56 | "name": "PongNoFrameskip-v4", 57 | "frame_op": "concat", 58 | "frame_op_len": 4, 59 | "reward_scale": "sign", 60 | "num_envs": 16, 61 | "max_t": null, 62 | "max_frame": 4e6 63 | }], 64 | "body": { 65 | "product": "outer", 66 | "num": 1 67 | }, 68 | "meta": { 69 | "distributed": false, 70 | "eval_frequency": 10000, 71 | "log_frequency": 10000, 72 | "max_session": 4, 73 | "max_trial": 1 74 | } 75 | } 76 | } 77 | -------------------------------------------------------------------------------- /slm_lab/spec/benchmark/dqn/dqn_per_qbert.json: -------------------------------------------------------------------------------- 1 | { 2 | "dqn_per_qbert": { 3 | "agent": [{ 4 | "name": "DQN", 5 | "algorithm": { 6 | "name": "DQN", 7 | "action_pdtype": "Argmax", 8 | "action_policy": "epsilon_greedy", 9 | "explore_var_spec": { 10 | "name": "linear_decay", 11 | "start_val": 1.0, 12 | "end_val": 0.01, 13 | "start_step": 10000, 14 | "end_step": 1000000 15 | }, 16 | "gamma": 0.99, 17 | "training_batch_iter": 1, 18 | "training_iter": 4, 19 | "training_frequency": 4, 20 | "training_start_step": 10000 21 | }, 22 | "memory": { 23 | "name": "PrioritizedReplay", 24 | "alpha": 0.6, 25 | "epsilon": 0.0001, 26 | "batch_size": 32, 27 | "max_size": 200000, 28 | "use_cer": false 29 | }, 30 | "net": { 31 | "type": "ConvNet", 32 | "conv_hid_layers": [ 33 | [32, 8, 4, 0, 1], 34 | [64, 4, 2, 0, 1], 35 | [64, 3, 1, 0, 1] 36 | ], 37 | "fc_hid_layers": [256], 38 | "hid_layers_activation": "relu", 39 | "init_fn": null, 40 | "batch_norm": false, 41 | "clip_grad_val": 10.0, 42 | "loss_spec": { 43 | "name": "SmoothL1Loss" 44 | }, 45 | "optim_spec": { 46 | "name": "Adam", 47 | "lr": 2.5e-5, 48 | }, 49 | "lr_scheduler_spec": null, 50 | "update_type": "replace", 51 | "update_frequency": 1000, 52 | "gpu": true 53 | } 54 | }], 55 | "env": [{ 56 | "name": "QbertNoFrameskip-v4", 57 | "frame_op": "concat", 58 | "frame_op_len": 4, 59 | "reward_scale": "sign", 60 | "num_envs": 16, 61 | "max_t": null, 62 | "max_frame": 4e6 63 | }], 64 | "body": { 65 | "product": "outer", 66 | "num": 1 67 | }, 68 | "meta": { 69 | "distributed": false, 70 | "eval_frequency": 10000, 71 | "log_frequency": 10000, 72 | "rigorous_eval": 0, 73 | "max_session": 4, 74 | "max_trial": 1 75 | } 76 | } 77 | } 78 | -------------------------------------------------------------------------------- /slm_lab/spec/benchmark/dqn/dqn_pong.json: -------------------------------------------------------------------------------- 1 | { 2 | "dqn_pong": { 3 | "agent": [{ 4 | "name": "DQN", 5 | "algorithm": { 6 | "name": "DQN", 7 | "action_pdtype": "Argmax", 8 | "action_policy": "epsilon_greedy", 9 | "explore_var_spec": { 10 | "name": "linear_decay", 11 | "start_val": 1.0, 12 | "end_val": 0.01, 13 | "start_step": 10000, 14 | "end_step": 1000000 15 | }, 16 | "gamma": 0.99, 17 | "training_batch_iter": 1, 18 | "training_iter": 4, 19 | "training_frequency": 4, 20 | "training_start_step": 10000 21 | }, 22 | "memory": { 23 | "name": "Replay", 24 | "batch_size": 32, 25 | "max_size": 200000, 26 | "use_cer": false 27 | }, 28 | "net": { 29 | "type": "ConvNet", 30 | "conv_hid_layers": [ 31 | [32, 8, 4, 0, 1], 32 | [64, 4, 2, 0, 1], 33 | [64, 3, 1, 0, 1] 34 | ], 35 | "fc_hid_layers": [256], 36 | "hid_layers_activation": "relu", 37 | "init_fn": null, 38 | "batch_norm": false, 39 | "clip_grad_val": 10.0, 40 | "loss_spec": { 41 | "name": "SmoothL1Loss" 42 | }, 43 | "optim_spec": { 44 | "name": "Adam", 45 | "lr": 1e-4, 46 | }, 47 | "lr_scheduler_spec": null, 48 | "update_type": "replace", 49 | "update_frequency": 1000, 50 | "gpu": true 51 | } 52 | }], 53 | "env": [{ 54 | "name": "PongNoFrameskip-v4", 55 | "frame_op": "concat", 56 | "frame_op_len": 4, 57 | "reward_scale": "sign", 58 | "num_envs": 16, 59 | "max_t": null, 60 | "max_frame": 4e6 61 | }], 62 | "body": { 63 | "product": "outer", 64 | "num": 1 65 | }, 66 | "meta": { 67 | "distributed": false, 68 | "eval_frequency": 10000, 69 | "log_frequency": 10000, 70 | "max_session": 4, 71 | "max_trial": 1, 72 | } 73 | } 74 | } 75 | -------------------------------------------------------------------------------- /slm_lab/spec/benchmark/dqn/dqn_qbert.json: -------------------------------------------------------------------------------- 1 | { 2 | "dqn_qbert": { 3 | "agent": [{ 4 | "name": "DQN", 5 | "algorithm": { 6 | "name": "DQN", 7 | "action_pdtype": "Argmax", 8 | "action_policy": "epsilon_greedy", 9 | "explore_var_spec": { 10 | "name": "linear_decay", 11 | "start_val": 1.0, 12 | "end_val": 0.01, 13 | "start_step": 10000, 14 | "end_step": 1000000 15 | }, 16 | "gamma": 0.99, 17 | "training_batch_iter": 1, 18 | "training_iter": 4, 19 | "training_frequency": 4, 20 | "training_start_step": 10000 21 | }, 22 | "memory": { 23 | "name": "Replay", 24 | "batch_size": 32, 25 | "max_size": 200000, 26 | "use_cer": false 27 | }, 28 | "net": { 29 | "type": "ConvNet", 30 | "conv_hid_layers": [ 31 | [32, 8, 4, 0, 1], 32 | [64, 4, 2, 0, 1], 33 | [64, 3, 1, 0, 1] 34 | ], 35 | "fc_hid_layers": [256], 36 | "hid_layers_activation": "relu", 37 | "init_fn": null, 38 | "batch_norm": false, 39 | "clip_grad_val": 10.0, 40 | "loss_spec": { 41 | "name": "SmoothL1Loss" 42 | }, 43 | "optim_spec": { 44 | "name": "Adam", 45 | "lr": 1e-4, 46 | }, 47 | "lr_scheduler_spec": null, 48 | "update_type": "replace", 49 | "update_frequency": 1000, 50 | "gpu": true 51 | } 52 | }], 53 | "env": [{ 54 | "name": "QbertNoFrameskip-v4", 55 | "frame_op": "concat", 56 | "frame_op_len": 4, 57 | "reward_scale": "sign", 58 | "num_envs": 16, 59 | "max_t": null, 60 | "max_frame": 4e6 61 | }], 62 | "body": { 63 | "product": "outer", 64 | "num": 1 65 | }, 66 | "meta": { 67 | "distributed": false, 68 | "eval_frequency": 10000, 69 | "log_frequency": 10000, 70 | "rigorous_eval": 0, 71 | "max_session": 4, 72 | "max_trial": 1, 73 | } 74 | } 75 | } 76 | -------------------------------------------------------------------------------- /slm_lab/spec/benchmark/dqn/dqn_videopinball.json: -------------------------------------------------------------------------------- 1 | { 2 | "dqn_videopinball": { 3 | "agent": [{ 4 | "name": "VanillaDQN", 5 | "algorithm": { 6 | "name": "VanillaDQN", 7 | "action_pdtype": "Categorical", 8 | "action_policy": "boltzmann", 9 | "explore_var_spec": { 10 | "name": "linear_decay", 11 | "start_val": 5.0, 12 | "end_val": 0.5, 13 | "start_step": 1000, 14 | "end_step": 100000 15 | }, 16 | "gamma": 0.99, 17 | "training_batch_iter": 8, 18 | "training_iter": 4, 19 | "training_frequency": 4, 20 | "training_start_step": 32 21 | }, 22 | "memory": { 23 | "name": "Replay", 24 | "batch_size": 64, 25 | "max_size": 10000, 26 | "use_cer": false 27 | }, 28 | "net": { 29 | "type": "ConvNet", 30 | "conv_hid_layers": [ 31 | [32, 8, 4, 0, 1], 32 | [64, 4, 2, 0, 1], 33 | [32, 3, 1, 0, 1] 34 | ], 35 | "fc_hid_layers": [512], 36 | "hid_layers_activation": "relu", 37 | "init_fn": "orthogonal_", 38 | "shared": true, 39 | "normalize": true, 40 | "batch_norm": false, 41 | "clip_grad_val": 0.5, 42 | "loss_spec": { 43 | "name": "MSELoss" 44 | }, 45 | "optim_spec": { 46 | "name": "Adam", 47 | "lr": 0.01 48 | }, 49 | "lr_scheduler_spec": { 50 | "name": "LinearToZero", 51 | "frame": 10000 52 | }, 53 | "gpu": true 54 | } 55 | }], 56 | "env": [{ 57 | "name": "VideoPinball-v0", 58 | "frame_op": "concat", 59 | "frame_op_len": 4, 60 | "reward_scale": "sign", 61 | "num_envs": 16, 62 | "max_t": null, 63 | "max_frame": 850000 64 | }], 65 | "body": { 66 | "product": "outer", 67 | "num": 1 68 | }, 69 | "meta": { 70 | "distributed": false, 71 | "eval_frequency": 500, 72 | "max_trial": 1, 73 | "max_session": 1 74 | } 75 | } 76 | } 77 | -------------------------------------------------------------------------------- /slm_lab/spec/benchmark/dqn/dueling_ddqn_per_pong.json: -------------------------------------------------------------------------------- 1 | { 2 | "dueling_ddqn_per_pong": { 3 | "agent": [{ 4 | "name": "DuelingDoubleDQN", 5 | "algorithm": { 6 | "name": "DoubleDQN", 7 | "action_pdtype": "Argmax", 8 | "action_policy": "epsilon_greedy", 9 | "explore_var_spec": { 10 | "name": "linear_decay", 11 | "start_val": 1.0, 12 | "end_val": 0.01, 13 | "start_step": 10000, 14 | "end_step": 1000000 15 | }, 16 | "gamma": 0.99, 17 | "training_batch_iter": 1, 18 | "training_iter": 4, 19 | "training_frequency": 4, 20 | "training_start_step": 10000 21 | }, 22 | "memory": { 23 | "name": "PrioritizedReplay", 24 | "alpha": 0.6, 25 | "epsilon": 0.0001, 26 | "batch_size": 32, 27 | "max_size": 200000, 28 | "use_cer": false, 29 | }, 30 | "net": { 31 | "type": "DuelingConvNet", 32 | "conv_hid_layers": [ 33 | [32, 8, 4, 0, 1], 34 | [64, 4, 2, 0, 1], 35 | [64, 3, 1, 0, 1] 36 | ], 37 | "fc_hid_layers": [256], 38 | "hid_layers_activation": "relu", 39 | "init_fn": null, 40 | "batch_norm": false, 41 | "clip_grad_val": 10.0, 42 | "loss_spec": { 43 | "name": "SmoothL1Loss" 44 | }, 45 | "optim_spec": { 46 | "name": "Adam", 47 | "lr": 2.5e-5, 48 | }, 49 | "lr_scheduler_spec": null, 50 | "update_type": "replace", 51 | "update_frequency": 1000, 52 | "gpu": true 53 | } 54 | }], 55 | "env": [{ 56 | "name": "PongNoFrameskip-v4", 57 | "frame_op": "concat", 58 | "frame_op_len": 4, 59 | "reward_scale": "sign", 60 | "num_envs": 16, 61 | "max_t": null, 62 | "max_frame": 4e6 63 | }], 64 | "body": { 65 | "product": "outer", 66 | "num": 1 67 | }, 68 | "meta": { 69 | "distributed": false, 70 | "eval_frequency": 10000, 71 | "log_frequency": 10000, 72 | "max_session": 4, 73 | "max_trial": 1 74 | } 75 | } 76 | } 77 | -------------------------------------------------------------------------------- /slm_lab/spec/benchmark/dqn/dueling_dqn_pong.json: -------------------------------------------------------------------------------- 1 | { 2 | "dueling_dqn_pong": { 3 | "agent": [{ 4 | "name": "DuelingDQN", 5 | "algorithm": { 6 | "name": "DQN", 7 | "action_pdtype": "Argmax", 8 | "action_policy": "epsilon_greedy", 9 | "explore_var_spec": { 10 | "name": "linear_decay", 11 | "start_val": 1.0, 12 | "end_val": 0.01, 13 | "start_step": 10000, 14 | "end_step": 1000000 15 | }, 16 | "gamma": 0.99, 17 | "training_batch_iter": 1, 18 | "training_iter": 4, 19 | "training_frequency": 4, 20 | "training_start_step": 10000 21 | }, 22 | "memory": { 23 | "name": "Replay", 24 | "batch_size": 32, 25 | "max_size": 200000, 26 | "use_cer": false 27 | }, 28 | "net": { 29 | "type": "DuelingConvNet", 30 | "conv_hid_layers": [ 31 | [32, 8, 4, 0, 1], 32 | [64, 4, 2, 0, 1], 33 | [64, 3, 1, 0, 1] 34 | ], 35 | "fc_hid_layers": [256], 36 | "hid_layers_activation": "relu", 37 | "init_fn": null, 38 | "batch_norm": false, 39 | "clip_grad_val": 10.0, 40 | "loss_spec": { 41 | "name": "SmoothL1Loss" 42 | }, 43 | "optim_spec": { 44 | "name": "Adam", 45 | "lr": 1e-4, 46 | }, 47 | "lr_scheduler_spec": null, 48 | "update_type": "replace", 49 | "update_frequency": 1000, 50 | "gpu": true 51 | } 52 | }], 53 | "env": [{ 54 | "name": "PongNoFrameskip-v4", 55 | "frame_op": "concat", 56 | "frame_op_len": 4, 57 | "reward_scale": "sign", 58 | "num_envs": 16, 59 | "max_t": null, 60 | "max_frame": 4e6 61 | }], 62 | "body": { 63 | "product": "outer", 64 | "num": 1 65 | }, 66 | "meta": { 67 | "distributed": false, 68 | "eval_frequency": 10000, 69 | "log_frequency": 10000, 70 | "max_session": 4, 71 | "max_trial": 1, 72 | } 73 | } 74 | } 75 | -------------------------------------------------------------------------------- /slm_lab/spec/benchmark/ppo/ppo_cartpole.json: -------------------------------------------------------------------------------- 1 | { 2 | "ppo_shared_cartpole": { 3 | "agent": [{ 4 | "name": "PPO", 5 | "algorithm": { 6 | "name": "PPO", 7 | "action_pdtype": "default", 8 | "action_policy": "default", 9 | "explore_var_spec": null, 10 | "gamma": 0.99, 11 | "lam": 0.95, 12 | "clip_eps_spec": { 13 | "name": "no_decay", 14 | "start_val": 0.2, 15 | "end_val": 0.05, 16 | "start_step": 0, 17 | "end_step": 10000, 18 | }, 19 | "entropy_coef_spec": { 20 | "name": "linear_decay", 21 | "start_val": 0.01, 22 | "end_val": 0.001, 23 | "start_step": 1000, 24 | "end_step": 5000, 25 | }, 26 | "val_loss_coef": 0.1, 27 | "time_horizon": 32, 28 | "minibatch_size": 16, 29 | "training_epoch": 4 30 | }, 31 | "memory": { 32 | "name": "OnPolicyBatchReplay", 33 | }, 34 | "net": { 35 | "type": "MLPNet", 36 | "shared": true, 37 | "hid_layers": [64], 38 | "hid_layers_activation": "relu", 39 | "clip_grad_val": 0.5, 40 | "use_same_optim": false, 41 | "loss_spec": { 42 | "name": "MSELoss" 43 | }, 44 | "actor_optim_spec": { 45 | "name": "Adam", 46 | "lr": 0.02 47 | }, 48 | "critic_optim_spec": { 49 | "name": "Adam", 50 | "lr": 0.02 51 | }, 52 | "lr_scheduler_spec": null, 53 | "gpu": false 54 | } 55 | }], 56 | "env": [{ 57 | "name": "CartPole-v0", 58 | "max_t": null, 59 | "max_frame": 30000, 60 | }], 61 | "body": { 62 | "product": "outer", 63 | "num": 1 64 | }, 65 | "meta": { 66 | "distributed": false, 67 | "eval_frequency": 100, 68 | "max_session": 4, 69 | "max_trial": 1 70 | }, 71 | "search": { 72 | "agent": [{ 73 | "algorithm": { 74 | "lam__grid_search": [0.7, 0.8, 0.9, 0.95], 75 | "training_frequency__grid_search": [16, 32] 76 | }, 77 | "net": { 78 | "actor_optim_spec": { 79 | "lr__grid_search": [5e-2, 1e-2, 5e-3, 1e-3] 80 | } 81 | } 82 | }] 83 | }, 84 | } 85 | } 86 | -------------------------------------------------------------------------------- /slm_lab/spec/benchmark/ppo/ppo_lunar.json: -------------------------------------------------------------------------------- 1 | { 2 | "ppo_lunar": { 3 | "agent": [{ 4 | "name": "PPO", 5 | "algorithm": { 6 | "name": "PPO", 7 | "action_pdtype": "default", 8 | "action_policy": "default", 9 | "explore_var_spec": null, 10 | "gamma": 0.99, 11 | "lam": 0.95, 12 | "clip_eps_spec": { 13 | "name": "no_decay", 14 | "start_val": 0.20, 15 | "end_val": 0.0, 16 | "start_step": 10000, 17 | "end_step": 300000 18 | }, 19 | "entropy_coef_spec": { 20 | "name": "no_decay", 21 | "start_val": 0.01, 22 | "end_val": 0.01, 23 | "start_step": 0, 24 | "end_step": 0 25 | }, 26 | "val_loss_coef": 1.0, 27 | "time_horizon": 128, 28 | "minibatch_size": 256, 29 | "training_epoch": 10 30 | }, 31 | "memory": { 32 | "name": "OnPolicyBatchReplay", 33 | }, 34 | "net": { 35 | "type": "MLPNet", 36 | "shared": false, 37 | "hid_layers": [64, 64, 32], 38 | "hid_layers_activation": "relu", 39 | "init_fn": "orthogonal_", 40 | "batch_norm": false, 41 | "clip_grad_val": 0.5, 42 | "use_same_optim": true, 43 | "loss_spec": { 44 | "name": "MSELoss" 45 | }, 46 | "actor_optim_spec": { 47 | "name": "Adam", 48 | "lr": 5e-4, 49 | }, 50 | "critic_optim_spec": { 51 | "name": "Adam", 52 | "lr": 5e-4, 53 | }, 54 | "lr_scheduler_spec": null, 55 | "gpu": false 56 | } 57 | }], 58 | "env": [{ 59 | "name": "LunarLander-v2", 60 | "frame_op": "concat", 61 | "frame_op_len": 4, 62 | "max_t": null, 63 | "max_frame": 300000, 64 | "num_envs": 8, 65 | "normalize_state": false 66 | }], 67 | "body": { 68 | "product": "outer", 69 | "num": 1 70 | }, 71 | "meta": { 72 | "distributed": false, 73 | "log_frequency": 1000, 74 | "eval_frequency": 1000, 75 | "max_session": 4, 76 | "max_trial": 1 77 | } 78 | }, 79 | } 80 | -------------------------------------------------------------------------------- /slm_lab/spec/benchmark/ppo/ppo_pong.json: -------------------------------------------------------------------------------- 1 | { 2 | "ppo_pong": { 3 | "agent": [{ 4 | "name": "PPO", 5 | "algorithm": { 6 | "name": "PPO", 7 | "action_pdtype": "default", 8 | "action_policy": "default", 9 | "explore_var_spec": null, 10 | "gamma": 0.99, 11 | "lam": 0.70, 12 | "clip_eps_spec": { 13 | "name": "no_decay", 14 | "start_val": 0.10, 15 | "end_val": 0.10, 16 | "start_step": 0, 17 | "end_step": 0 18 | }, 19 | "entropy_coef_spec": { 20 | "name": "no_decay", 21 | "start_val": 0.01, 22 | "end_val": 0.01, 23 | "start_step": 0, 24 | "end_step": 0 25 | }, 26 | "val_loss_coef": 0.5, 27 | "time_horizon": 128, 28 | "minibatch_size": 256, 29 | "training_epoch": 4 30 | }, 31 | "memory": { 32 | "name": "OnPolicyBatchReplay", 33 | }, 34 | "net": { 35 | "type": "ConvNet", 36 | "shared": true, 37 | "conv_hid_layers": [ 38 | [32, 8, 4, 0, 1], 39 | [64, 4, 2, 0, 1], 40 | [32, 3, 1, 0, 1] 41 | ], 42 | "fc_hid_layers": [512], 43 | "hid_layers_activation": "relu", 44 | "init_fn": "orthogonal_", 45 | "normalize": true, 46 | "batch_norm": false, 47 | "clip_grad_val": 0.5, 48 | "use_same_optim": false, 49 | "loss_spec": { 50 | "name": "MSELoss" 51 | }, 52 | "actor_optim_spec": { 53 | "name": "Adam", 54 | "lr": 2.5e-4, 55 | }, 56 | "critic_optim_spec": { 57 | "name": "Adam", 58 | "lr": 2.5e-4, 59 | }, 60 | "lr_scheduler_spec": { 61 | "name": "LinearToZero", 62 | "frame": 1e7 63 | }, 64 | "gpu": true 65 | } 66 | }], 67 | "env": [{ 68 | "name": "PongNoFrameskip-v4", 69 | "frame_op": "concat", 70 | "frame_op_len": 4, 71 | "reward_scale": "sign", 72 | "num_envs": 16, 73 | "max_t": null, 74 | "max_frame": 1e7 75 | }], 76 | "body": { 77 | "product": "outer", 78 | "num": 1 79 | }, 80 | "meta": { 81 | "distributed": false, 82 | "log_frequency": 10000, 83 | "eval_frequency": 10000, 84 | "max_session": 4, 85 | "max_trial": 1, 86 | } 87 | } 88 | } 89 | -------------------------------------------------------------------------------- /slm_lab/spec/benchmark/ppo/ppo_qbert.json: -------------------------------------------------------------------------------- 1 | { 2 | "ppo_qbert": { 3 | "agent": [{ 4 | "name": "PPO", 5 | "algorithm": { 6 | "name": "PPO", 7 | "action_pdtype": "default", 8 | "action_policy": "default", 9 | "explore_var_spec": null, 10 | "gamma": 0.99, 11 | "lam": 0.70, 12 | "clip_eps_spec": { 13 | "name": "no_decay", 14 | "start_val": 0.10, 15 | "end_val": 0.10, 16 | "start_step": 0, 17 | "end_step": 0 18 | }, 19 | "entropy_coef_spec": { 20 | "name": "no_decay", 21 | "start_val": 0.01, 22 | "end_val": 0.01, 23 | "start_step": 0, 24 | "end_step": 0 25 | }, 26 | "val_loss_coef": 0.5, 27 | "time_horizon": 128, 28 | "minibatch_size": 256, 29 | "training_epoch": 4 30 | }, 31 | "memory": { 32 | "name": "OnPolicyBatchReplay", 33 | }, 34 | "net": { 35 | "type": "ConvNet", 36 | "shared": true, 37 | "conv_hid_layers": [ 38 | [32, 8, 4, 0, 1], 39 | [64, 4, 2, 0, 1], 40 | [32, 3, 1, 0, 1] 41 | ], 42 | "fc_hid_layers": [512], 43 | "hid_layers_activation": "relu", 44 | "init_fn": "orthogonal_", 45 | "normalize": true, 46 | "batch_norm": false, 47 | "clip_grad_val": 0.5, 48 | "use_same_optim": false, 49 | "loss_spec": { 50 | "name": "MSELoss" 51 | }, 52 | "actor_optim_spec": { 53 | "name": "Adam", 54 | "lr": 2.5e-4, 55 | }, 56 | "critic_optim_spec": { 57 | "name": "Adam", 58 | "lr": 2.5e-4, 59 | }, 60 | "lr_scheduler_spec": { 61 | "name": "LinearToZero", 62 | "frame": 1e7 63 | }, 64 | "gpu": true 65 | } 66 | }], 67 | "env": [{ 68 | "name": "QbertNoFrameskip-v4", 69 | "frame_op": "concat", 70 | "frame_op_len": 4, 71 | "reward_scale": "sign", 72 | "num_envs": 16, 73 | "max_t": null, 74 | "max_frame": 1e7 75 | }], 76 | "body": { 77 | "product": "outer", 78 | "num": 1 79 | }, 80 | "meta": { 81 | "distributed": false, 82 | "log_frequency": 10000, 83 | "eval_frequency": 10000, 84 | "rigorous_eval": 0, 85 | "max_session": 4, 86 | "max_trial": 1, 87 | } 88 | } 89 | } 90 | -------------------------------------------------------------------------------- /slm_lab/spec/benchmark/ppo/ppo_videopinball.json: -------------------------------------------------------------------------------- 1 | { 2 | "ppo_videopinball": { 3 | "agent": [{ 4 | "name": "PPO", 5 | "algorithm": { 6 | "name": "PPO", 7 | "action_pdtype": "default", 8 | "action_policy": "default", 9 | "explore_var_spec": null, 10 | "gamma": 0.99, 11 | "lam": 0.70, 12 | "entropy_coef_spec": { 13 | "name": "no_decay", 14 | "start_val": 0.01, 15 | "end_val": 0.01, 16 | "start_step": 0, 17 | "end_step": 0 18 | }, 19 | "clip_eps_spec": { 20 | "name": "no_decay", 21 | "start_val": 0.1, 22 | "end_val": 0.1, 23 | "start_step": 0, 24 | "end_step": 0 25 | }, 26 | "val_loss_coef": 0.5, 27 | "time_horizon": 128, 28 | "minibatch_size": 256, 29 | "training_epoch": 4 30 | }, 31 | "memory": { 32 | "name": "OnPolicyBatchReplay" 33 | }, 34 | "net": { 35 | "type": "ConvNet", 36 | "conv_hid_layers": [ 37 | [32, 8, 4, 0, 1], 38 | [64, 4, 2, 0, 1], 39 | [32, 3, 1, 0, 1] 40 | ], 41 | "fc_hid_layers": [512], 42 | "hid_layers_activation": "relu", 43 | "init_fn": "orthogonal_", 44 | "shared": true, 45 | "batch_norm": false, 46 | "normalize": true, 47 | "clip_grad_val": 0.5, 48 | "use_same_optim": false, 49 | "loss_spec": { 50 | "name": "MSELoss" 51 | }, 52 | "actor_optim_spec": { 53 | "name": "Adam", 54 | "lr": 2.5e-4 55 | }, 56 | "critic_optim_spec": { 57 | "name": "Adam", 58 | "lr": 2.5e-4 59 | }, 60 | "lr_scheduler_spec": { 61 | "name": "LinearToZero", 62 | "frame": 1e7 63 | }, 64 | "gpu": true 65 | } 66 | }], 67 | "env": [{ 68 | "name": "VideoPinball-v0", 69 | "frame_op": "concat", 70 | "frame_op_len": 4, 71 | "reward_scale": "sign", 72 | "num_envs": 16, 73 | "max_t": null, 74 | "max_frame": 850000 75 | }], 76 | "body": { 77 | "product": "outer", 78 | "num": 1 79 | }, 80 | "meta": { 81 | "distributed": false, 82 | "eval_frequency": 10000, 83 | "log_frequency": 10000, 84 | "rigorous_eval": 0, 85 | "max_trial": 1, 86 | "max_session": 1 87 | } 88 | } 89 | } 90 | -------------------------------------------------------------------------------- /slm_lab/spec/benchmark/reinforce/reinforce_cartpole.json: -------------------------------------------------------------------------------- 1 | { 2 | "reinforce_cartpole": { 3 | "agent": [{ 4 | "name": "Reinforce", 5 | "algorithm": { 6 | "name": "Reinforce", 7 | "action_pdtype": "default", 8 | "action_policy": "default", 9 | "center_return": true, 10 | "explore_var_spec": null, 11 | "gamma": 0.99, 12 | "entropy_coef_spec": { 13 | "name": "linear_decay", 14 | "start_val": 0.01, 15 | "end_val": 0.001, 16 | "start_step": 0, 17 | "end_step": 20000, 18 | }, 19 | "training_frequency": 1 20 | }, 21 | "memory": { 22 | "name": "OnPolicyReplay" 23 | }, 24 | "net": { 25 | "type": "MLPNet", 26 | "hid_layers": [64], 27 | "hid_layers_activation": "selu", 28 | "clip_grad_val": null, 29 | "loss_spec": { 30 | "name": "MSELoss" 31 | }, 32 | "optim_spec": { 33 | "name": "Adam", 34 | "lr": 0.002 35 | }, 36 | "lr_scheduler_spec": null 37 | } 38 | }], 39 | "env": [{ 40 | "name": "CartPole-v0", 41 | "max_t": null, 42 | "max_frame": 100000, 43 | }], 44 | "body": { 45 | "product": "outer", 46 | "num": 1 47 | }, 48 | "meta": { 49 | "distributed": false, 50 | "eval_frequency": 2000, 51 | "max_session": 4, 52 | "max_trial": 1, 53 | }, 54 | "search": { 55 | "agent": [{ 56 | "algorithm": { 57 | "gamma__grid_search": [0.1, 0.5, 0.7, 0.8, 0.90, 0.99, 0.999] 58 | } 59 | }] 60 | } 61 | }, 62 | "reinforce_baseline_cartpole": { 63 | "agent": [{ 64 | "name": "Reinforce", 65 | "algorithm": { 66 | "name": "Reinforce", 67 | "action_pdtype": "default", 68 | "action_policy": "default", 69 | "center_return": true, 70 | "explore_var_spec": null, 71 | "gamma": 0.99, 72 | "entropy_coef_spec": { 73 | "name": "linear_decay", 74 | "start_val": 0.01, 75 | "end_val": 0.001, 76 | "start_step": 0, 77 | "end_step": 20000, 78 | }, 79 | "training_frequency": 1 80 | }, 81 | "memory": { 82 | "name": "OnPolicyReplay" 83 | }, 84 | "net": { 85 | "type": "MLPNet", 86 | "hid_layers": [64], 87 | "hid_layers_activation": "selu", 88 | "clip_grad_val": null, 89 | "loss_spec": { 90 | "name": "MSELoss" 91 | }, 92 | "optim_spec": { 93 | "name": "Adam", 94 | "lr": 0.002 95 | }, 96 | "lr_scheduler_spec": null 97 | } 98 | }], 99 | "env": [{ 100 | "name": "CartPole-v0", 101 | "max_t": null, 102 | "max_frame": 100000, 103 | }], 104 | "body": { 105 | "product": "outer", 106 | "num": 1 107 | }, 108 | "meta": { 109 | "distributed": false, 110 | "eval_frequency": 2000, 111 | "max_session": 4, 112 | "max_trial": 1, 113 | }, 114 | "search": { 115 | "agent": [{ 116 | "algorithm": { 117 | "center_return__grid_search": [true, false] 118 | } 119 | }] 120 | } 121 | } 122 | } 123 | -------------------------------------------------------------------------------- /slm_lab/spec/benchmark/reinforce/reinforce_videopinball.json: -------------------------------------------------------------------------------- 1 | { 2 | "reinforce_videopinball": { 3 | "agent": [{ 4 | "name": "Reinforce", 5 | "algorithm": { 6 | "name": "Reinforce", 7 | "action_pdtype": "default", 8 | "action_policy": "default", 9 | "center_return": true, 10 | "explore_var_spec": null, 11 | "gamma": 0.99, 12 | "entropy_coef_spec": { 13 | "name": "linear_decay", 14 | "start_val": 0.01, 15 | "end_val": 0.001, 16 | "start_step": 1000, 17 | "end_step": 100000 18 | }, 19 | "training_frequency": 1 20 | }, 21 | "memory": { 22 | "name": "OnPolicyReplay", 23 | "batch_size": 64 24 | }, 25 | "net": { 26 | "type": "ConvNet", 27 | "shared": true, 28 | "conv_hid_layers": [ 29 | [32, 8, 4, 0, 1], 30 | [64, 4, 2, 0, 1], 31 | [32, 3, 1, 0, 1] 32 | ], 33 | "fc_hid_layers": [512], 34 | "hid_layers_activation": "relu", 35 | "init_fn": "orthogonal_", 36 | "normalize": true, 37 | "batch_norm": false, 38 | "clip_grad_val": 0.5, 39 | "loss_spec": { 40 | "name": "MSELoss" 41 | }, 42 | "optim_spec": { 43 | "name": "Adam", 44 | "lr": 0.002 45 | }, 46 | "lr_scheduler_spec": null, 47 | "gpu": true 48 | } 49 | }], 50 | "env": [{ 51 | "name": "VideoPinball-v0", 52 | "frame_op": "concat", 53 | "frame_op_len": 4, 54 | "reward_scale": "sign", 55 | "num_envs": 16, 56 | "max_t": null, 57 | "max_frame": 850000 58 | }], 59 | "body": { 60 | "product": "outer", 61 | "num": 1 62 | }, 63 | "meta": { 64 | "distributed": false, 65 | "eval_frequency": 2000, 66 | "max_session": 1, 67 | "max_trial": 1 68 | } 69 | } 70 | } 71 | -------------------------------------------------------------------------------- /slm_lab/spec/benchmark/sac/sac_halfcheetah.json: -------------------------------------------------------------------------------- 1 | { 2 | "sac_halfcheetah": { 3 | "agent": [{ 4 | "name": "SoftActorCritic", 5 | "algorithm": { 6 | "name": "SoftActorCritic", 7 | "action_pdtype": "default", 8 | "action_policy": "default", 9 | "gamma": 0.99, 10 | "training_frequency": 1 11 | }, 12 | "memory": { 13 | "name": "Replay", 14 | "batch_size": 256, 15 | "max_size": 1000000, 16 | "use_cer": true 17 | }, 18 | "net": { 19 | "type": "MLPNet", 20 | "hid_layers": [256, 256], 21 | "hid_layers_activation": "relu", 22 | "init_fn": "orthogonal_", 23 | "clip_grad_val": 0.5, 24 | "loss_spec": { 25 | "name": "MSELoss" 26 | }, 27 | "optim_spec": { 28 | "name": "Adam", 29 | "lr": 3e-4, 30 | }, 31 | "lr_scheduler_spec": null, 32 | "update_type": "polyak", 33 | "update_frequency": 1, 34 | "polyak_coef": 0.005, 35 | "gpu": false 36 | } 37 | }], 38 | "env": [{ 39 | "name": "RoboschoolHalfCheetah-v1", 40 | "num_envs": 8, 41 | "max_t": null, 42 | "max_frame": 2e6 43 | }], 44 | "body": { 45 | "product": "outer", 46 | "num": 1 47 | }, 48 | "meta": { 49 | "distributed": false, 50 | "log_frequency": 1000, 51 | "eval_frequency": 1000, 52 | "rigorous_eval": 0, 53 | "max_session": 4, 54 | "max_trial": 1, 55 | } 56 | } 57 | } 58 | -------------------------------------------------------------------------------- /slm_lab/spec/benchmark/sac/sac_lunar.json: -------------------------------------------------------------------------------- 1 | { 2 | "sac_lunar": { 3 | "agent": [{ 4 | "name": "SoftActorCritic", 5 | "algorithm": { 6 | "name": "SoftActorCritic", 7 | "action_pdtype": "GumbelSoftmax", 8 | "action_policy": "default", 9 | "gamma": 0.99, 10 | "training_frequency": 1 11 | }, 12 | "memory": { 13 | "name": "Replay", 14 | "batch_size": 256, 15 | "max_size": 100000, 16 | "use_cer": true 17 | }, 18 | "net": { 19 | "type": "MLPNet", 20 | "hid_layers": [64, 64, 32], 21 | "hid_layers_activation": "relu", 22 | "init_fn": "orthogonal_", 23 | "clip_grad_val": 0.5, 24 | "loss_spec": { 25 | "name": "MSELoss" 26 | }, 27 | "optim_spec": { 28 | "name": "Adam", 29 | "lr": 0.005 30 | }, 31 | "lr_scheduler_spec": null, 32 | "update_type": "polyak", 33 | "update_frequency": 1, 34 | "polyak_coef": 0.005, 35 | "gpu": false 36 | } 37 | }], 38 | "env": [{ 39 | "name": "LunarLander-v2", 40 | "frame_op": "concat", 41 | "frame_op_len": 4, 42 | "max_t": null, 43 | "max_frame": 300000, 44 | "num_envs": 8, 45 | "normalize_state": false 46 | }], 47 | "body": { 48 | "product": "outer", 49 | "num": 1 50 | }, 51 | "meta": { 52 | "distributed": false, 53 | "log_frequency": 500, 54 | "eval_frequency": 500, 55 | "rigorous_eval": 0, 56 | "max_session": 4, 57 | "max_trial": 1, 58 | } 59 | } 60 | } 61 | -------------------------------------------------------------------------------- /slm_lab/spec/benchmark/sac/sac_per_halfcheetah.json: -------------------------------------------------------------------------------- 1 | { 2 | "sac_per_halfcheetah": { 3 | "agent": [{ 4 | "name": "SoftActorCritic", 5 | "algorithm": { 6 | "name": "SoftActorCritic", 7 | "action_pdtype": "default", 8 | "action_policy": "default", 9 | "gamma": 0.99, 10 | "training_frequency": 1 11 | }, 12 | "memory": { 13 | "name": "PrioritizedReplay", 14 | "alpha": 0.6, 15 | "epsilon": 0.0001, 16 | "batch_size": 256, 17 | "max_size": 200000, 18 | "use_cer": true 19 | }, 20 | "net": { 21 | "type": "MLPNet", 22 | "hid_layers": [256, 256], 23 | "hid_layers_activation": "relu", 24 | "init_fn": "orthogonal_", 25 | "clip_grad_val": 0.5, 26 | "loss_spec": { 27 | "name": "MSELoss" 28 | }, 29 | "optim_spec": { 30 | "name": "Adam", 31 | "lr": 7e-4 32 | }, 33 | "lr_scheduler_spec": null, 34 | "update_type": "polyak", 35 | "update_frequency": 1, 36 | "polyak_coef": 0.005, 37 | "gpu": false 38 | } 39 | }], 40 | "env": [{ 41 | "name": "RoboschoolHalfCheetah-v1", 42 | "num_envs": 8, 43 | "max_t": null, 44 | "max_frame": 2e6 45 | }], 46 | "body": { 47 | "product": "outer", 48 | "num": 1 49 | }, 50 | "meta": { 51 | "distributed": false, 52 | "log_frequency": 1000, 53 | "eval_frequency": 1000, 54 | "rigorous_eval": 0, 55 | "max_session": 4, 56 | "max_trial": 1 57 | } 58 | } 59 | } 60 | -------------------------------------------------------------------------------- /slm_lab/spec/benchmark/sac/sac_per_halfcheetah_pybullet.json: -------------------------------------------------------------------------------- 1 | { 2 | "sac_per_halfcheetah": { 3 | "agent": [{ 4 | "name": "SoftActorCritic", 5 | "algorithm": { 6 | "name": "SoftActorCritic", 7 | "action_pdtype": "default", 8 | "action_policy": "default", 9 | "gamma": 0.99, 10 | "training_frequency": 1 11 | }, 12 | "memory": { 13 | "name": "PrioritizedReplay", 14 | "alpha": 0.6, 15 | "epsilon": 0.0001, 16 | "batch_size": 256, 17 | "max_size": 200000, 18 | "use_cer": true 19 | }, 20 | "net": { 21 | "type": "MLPNet", 22 | "hid_layers": [256, 256], 23 | "hid_layers_activation": "relu", 24 | "init_fn": "orthogonal_", 25 | "clip_grad_val": 0.5, 26 | "loss_spec": { 27 | "name": "MSELoss" 28 | }, 29 | "optim_spec": { 30 | "name": "Adam", 31 | "lr": 7e-4 32 | }, 33 | "lr_scheduler_spec": null, 34 | "update_type": "polyak", 35 | "update_frequency": 1, 36 | "polyak_coef": 0.005, 37 | "gpu": true 38 | } 39 | }], 40 | "env": [{ 41 | "name": "HalfCheetahBulletEnv-v0", 42 | "num_envs": 8, 43 | "max_t": null, 44 | "max_frame": 2e6 45 | }], 46 | "body": { 47 | "product": "outer", 48 | "num": 1 49 | }, 50 | "meta": { 51 | "distributed": false, 52 | "log_frequency": 1000, 53 | "eval_frequency": 1000, 54 | "rigorous_eval": 0, 55 | "max_session": 4, 56 | "max_trial": 1 57 | } 58 | } 59 | } 60 | -------------------------------------------------------------------------------- /slm_lab/spec/benchmark/sac/sac_per_roboschool.json: -------------------------------------------------------------------------------- 1 | { 2 | "sac_per_roboschool": { 3 | "agent": [{ 4 | "name": "SoftActorCritic", 5 | "algorithm": { 6 | "name": "SoftActorCritic", 7 | "action_pdtype": "default", 8 | "action_policy": "default", 9 | "gamma": 0.99, 10 | "training_frequency": 1 11 | }, 12 | "memory": { 13 | "name": "PrioritizedReplay", 14 | "alpha": 0.6, 15 | "epsilon": 0.0001, 16 | "batch_size": 256, 17 | "max_size": 1000000, 18 | "use_cer": true 19 | }, 20 | "net": { 21 | "type": "MLPNet", 22 | "hid_layers": [256, 256], 23 | "hid_layers_activation": "relu", 24 | "init_fn": "orthogonal_", 25 | "clip_grad_val": 1.0, 26 | "loss_spec": { 27 | "name": "MSELoss" 28 | }, 29 | "optim_spec": { 30 | "name": "Adam", 31 | "lr": 1e-3, 32 | }, 33 | "lr_scheduler_spec": null, 34 | "update_type": "polyak", 35 | "update_frequency": 1, 36 | "polyak_coef": 0.005, 37 | "gpu": false 38 | } 39 | }], 40 | "env": [{ 41 | "name": "${env}", 42 | "num_envs": 8, 43 | "max_t": null, 44 | "max_frame": 2e6 45 | }], 46 | "body": { 47 | "product": "outer", 48 | "num": 1 49 | }, 50 | "meta": { 51 | "distributed": false, 52 | "log_frequency": 1000, 53 | "eval_frequency": 1000, 54 | "rigorous_eval": 0, 55 | "max_session": 4, 56 | "max_trial": 1 57 | }, 58 | "spec_params": { 59 | "env": [ 60 | "RoboschoolAnt-v1", "RoboschoolAtlasForwardWalk-v1", "RoboschoolHalfCheetah-v1", "RoboschoolHopper-v1", "RoboschoolInvertedDoublePendulum-v1", "RoboschoolInvertedPendulum-v1", "RoboschoolInvertedPendulumSwingup-v1", "RoboschoolReacher-v1", "RoboschoolWalker2d-v1" 61 | ] 62 | } 63 | } 64 | } 65 | -------------------------------------------------------------------------------- /slm_lab/spec/benchmark/sac/sac_pong.json: -------------------------------------------------------------------------------- 1 | { 2 | "sac_pong": { 3 | "agent": [{ 4 | "name": "SoftActorCritic", 5 | "algorithm": { 6 | "name": "SoftActorCritic", 7 | "action_pdtype": "GumbelSoftmax", 8 | "action_policy": "default", 9 | "gamma": 0.99, 10 | "training_start_step": 1000, 11 | "training_frequency": 1 12 | }, 13 | "memory": { 14 | "name": "Replay", 15 | "batch_size": 512, 16 | "max_size": 1000000, 17 | "use_cer": false 18 | }, 19 | "net": { 20 | "type": "ConvNet", 21 | "shared": false, 22 | "conv_hid_layers": [ 23 | [32, 8, 4, 0, 1], 24 | [64, 4, 2, 0, 1], 25 | [32, 3, 1, 0, 1] 26 | ], 27 | "fc_hid_layers": [256, 256], 28 | "hid_layers_activation": "leakyrelu", 29 | "init_fn": "orthogonal_", 30 | "normalize": true, 31 | "batch_norm": true, 32 | "clip_grad_val": 0.5, 33 | "use_same_optim": false, 34 | "loss_spec": { 35 | "name": "SmoothL1Loss" 36 | }, 37 | "optim_spec": { 38 | "name": "Lookahead", 39 | "optimizer": "RAdam", 40 | "lr": 2.5e-4, 41 | }, 42 | "lr_scheduler_spec": null, 43 | "update_type": "polyak", 44 | "update_frequency": 1, 45 | "polyak_coef": 0.005, 46 | "gpu": true 47 | } 48 | }], 49 | "env": [{ 50 | "name": "PongNoFrameskip-v4", 51 | "frame_op": "concat", 52 | "frame_op_len": 4, 53 | "reward_scale": "sign", 54 | "num_envs": 4, 55 | "max_t": null, 56 | "max_frame": 1e6 57 | }], 58 | "body": { 59 | "product": "outer", 60 | "num": 1 61 | }, 62 | "meta": { 63 | "distributed": false, 64 | "log_frequency": 100, 65 | "eval_frequency": 100, 66 | "rigorous_eval": 0, 67 | "max_session": 4, 68 | "max_trial": 1, 69 | } 70 | } 71 | } 72 | -------------------------------------------------------------------------------- /slm_lab/spec/benchmark/sarsa/sarsa_cartpole.json: -------------------------------------------------------------------------------- 1 | { 2 | "sarsa_epsilon_greedy_cartpole": { 3 | "agent": [{ 4 | "name": "SARSA", 5 | "algorithm": { 6 | "name": "SARSA", 7 | "action_pdtype": "Argmax", 8 | "action_policy": "epsilon_greedy", 9 | "explore_var_spec": { 10 | "name": "linear_decay", 11 | "start_val": 1.0, 12 | "end_val": 0.05, 13 | "start_step": 0, 14 | "end_step": 10000 15 | }, 16 | "gamma": 0.99, 17 | "training_frequency": 32 18 | }, 19 | "memory": { 20 | "name": "OnPolicyBatchReplay" 21 | }, 22 | "net": { 23 | "type": "MLPNet", 24 | "hid_layers": [64], 25 | "hid_layers_activation": "selu", 26 | "clip_grad_val": 0.5, 27 | "loss_spec": { 28 | "name": "MSELoss" 29 | }, 30 | "optim_spec": { 31 | "name": "RMSprop", 32 | "lr": 0.01 33 | }, 34 | "lr_scheduler_spec": null 35 | } 36 | }], 37 | "env": [{ 38 | "name": "CartPole-v0", 39 | "max_t": null, 40 | "max_frame": 100000 41 | }], 42 | "body": { 43 | "product": "outer", 44 | "num": 1 45 | }, 46 | "meta": { 47 | "distributed": false, 48 | "eval_frequency": 2000, 49 | "max_trial": 1, 50 | "max_session": 4 51 | }, 52 | "search": { 53 | "agent": [{ 54 | "net": { 55 | "optim_spec": { 56 | "lr__grid_search": [0.0005, 0.001, 0.001, 0.005, 0.01, 0.05, 0.1] 57 | } 58 | } 59 | }] 60 | } 61 | }, 62 | "sarsa_boltzmann_cartpole": { 63 | "agent": [{ 64 | "name": "SARSA", 65 | "algorithm": { 66 | "name": "SARSA", 67 | "action_pdtype": "Argmax", 68 | "action_policy": "boltzmann", 69 | "explore_var_spec": { 70 | "name": "linear_decay", 71 | "start_val": 3.0, 72 | "end_val": 1.0, 73 | "start_step": 0, 74 | "end_step": 10000 75 | }, 76 | "gamma": 0.99, 77 | "training_frequency": 32 78 | }, 79 | "memory": { 80 | "name": "OnPolicyBatchReplay" 81 | }, 82 | "net": { 83 | "type": "MLPNet", 84 | "hid_layers": [64], 85 | "hid_layers_activation": "selu", 86 | "clip_grad_val": 0.5, 87 | "loss_spec": { 88 | "name": "MSELoss" 89 | }, 90 | "optim_spec": { 91 | "name": "RMSprop", 92 | "lr": 0.01 93 | }, 94 | "lr_scheduler_spec": null 95 | } 96 | }], 97 | "env": [{ 98 | "name": "CartPole-v0", 99 | "max_t": null, 100 | "max_frame": 100000 101 | }], 102 | "body": { 103 | "product": "outer", 104 | "num": 1 105 | }, 106 | "meta": { 107 | "distributed": false, 108 | "eval_frequency": 2000, 109 | "max_trial": 1, 110 | "max_session": 1 111 | } 112 | } 113 | } 114 | -------------------------------------------------------------------------------- /slm_lab/spec/benchmark/sarsa/sarsa_videopinball.json: -------------------------------------------------------------------------------- 1 | { 2 | "sarsa_epsilon_greedy_videopinball": { 3 | "agent": [{ 4 | "name": "SARSA", 5 | "algorithm": { 6 | "name": "SARSA", 7 | "action_pdtype": "Argmax", 8 | "action_policy": "epsilon_greedy", 9 | "explore_var_spec": { 10 | "name": "linear_decay", 11 | "start_val": 1.0, 12 | "end_val": 0.05, 13 | "start_step": 1000, 14 | "end_step": 100000 15 | }, 16 | "gamma": 0.99, 17 | "training_frequency": 32 18 | }, 19 | "memory": { 20 | "name": "OnPolicyBatchReplay", 21 | "batch_size": 64 22 | }, 23 | "net": { 24 | "type": "ConvNet", 25 | "conv_hid_layers": [ 26 | [32, 8, 4, 0, 1], 27 | [64, 4, 2, 0, 1], 28 | [32, 3, 1, 0, 1] 29 | ], 30 | "fc_hid_layers": [512], 31 | "hid_layers_activation": "relu", 32 | "init_fn": "orthogonal_", 33 | "normalize": true, 34 | "batch_norm": false, 35 | "use_same_optim": false, 36 | "clip_grad_val": 0.5, 37 | "loss_spec": { 38 | "name": "MSELoss" 39 | }, 40 | "optim_spec": { 41 | "name": "RMSprop", 42 | "lr": 0.01 43 | }, 44 | "lr_scheduler_spec": null, 45 | "gpu": true 46 | } 47 | }], 48 | "env": [{ 49 | "name": "VideoPinball-v0", 50 | "frame_op": "concat", 51 | "frame_op_len": 4, 52 | "reward_scale": "sign", 53 | "num_envs": 16, 54 | "max_t": null, 55 | "max_frame": 850000 56 | }], 57 | "body": { 58 | "product": "outer", 59 | "num": 1 60 | }, 61 | "meta": { 62 | "distributed": false, 63 | "eval_frequency": 2000, 64 | "max_trial": 1, 65 | "max_session": 1 66 | } 67 | }, 68 | "sarsa_boltzmann_videopinball": { 69 | "agent": [{ 70 | "name": "SARSA", 71 | "algorithm": { 72 | "name": "SARSA", 73 | "action_pdtype": "Argmax", 74 | "action_policy": "boltzmann", 75 | "explore_var_spec": { 76 | "name": "linear_decay", 77 | "start_val": 1.0, 78 | "end_val": 0.05, 79 | "start_step": 1000, 80 | "end_step": 100000 81 | }, 82 | "gamma": 0.99, 83 | "training_frequency": 32 84 | }, 85 | "memory": { 86 | "name": "OnPolicyBatchReplay", 87 | "batch_size": 64 88 | }, 89 | "net": { 90 | "type": "ConvNet", 91 | "conv_hid_layers": [ 92 | [32, 8, 4, 0, 1], 93 | [64, 4, 2, 0, 1], 94 | [32, 3, 1, 0, 1] 95 | ], 96 | "fc_hid_layers": [512], 97 | "hid_layers_activation": "relu", 98 | "init_fn": "orthogonal_", 99 | "normalize": true, 100 | "batch_norm": false, 101 | "use_same_optim": false, 102 | "clip_grad_val": 0.5, 103 | "loss_spec": { 104 | "name": "MSELoss" 105 | }, 106 | "optim_spec": { 107 | "name": "RMSprop", 108 | "lr": 0.01 109 | }, 110 | "lr_scheduler_spec": null, 111 | "gpu": true 112 | } 113 | }], 114 | "env": [{ 115 | "name": "VideoPinball-v0", 116 | "frame_op": "concat", 117 | "frame_op_len": 4, 118 | "reward_scale": "sign", 119 | "num_envs": 16, 120 | "max_t": null, 121 | "max_frame": 850000 122 | }], 123 | "body": { 124 | "product": "outer", 125 | "num": 1 126 | }, 127 | "meta": { 128 | "distributed": false, 129 | "eval_frequency": 2000, 130 | "max_trial": 1, 131 | "max_session": 1 132 | } 133 | } 134 | } 135 | -------------------------------------------------------------------------------- /slm_lab/spec/demo.json: -------------------------------------------------------------------------------- 1 | { 2 | "dqn_cartpole": { 3 | "agent": [{ 4 | "name": "DQN", 5 | "algorithm": { 6 | "name": "DQN", 7 | "action_pdtype": "Argmax", 8 | "action_policy": "epsilon_greedy", 9 | "explore_var_spec": { 10 | "name": "linear_decay", 11 | "start_val": 1.0, 12 | "end_val": 0.1, 13 | "start_step": 0, 14 | "end_step": 1000 15 | }, 16 | "gamma": 0.99, 17 | "training_batch_iter": 8, 18 | "training_iter": 4, 19 | "training_frequency": 4, 20 | "training_start_step": 32 21 | }, 22 | "memory": { 23 | "name": "Replay", 24 | "batch_size": 32, 25 | "max_size": 10000, 26 | "use_cer": true 27 | }, 28 | "net": { 29 | "type": "MLPNet", 30 | "hid_layers": [64], 31 | "hid_layers_activation": "selu", 32 | "clip_grad_val": 0.5, 33 | "loss_spec": { 34 | "name": "MSELoss" 35 | }, 36 | "optim_spec": { 37 | "name": "Adam", 38 | "lr": 0.02 39 | }, 40 | "lr_scheduler_spec": { 41 | "name": "StepLR", 42 | "step_size": 1000, 43 | "gamma": 0.9 44 | }, 45 | "update_type": "polyak", 46 | "update_frequency": 32, 47 | "polyak_coef": 0.1, 48 | "gpu": false 49 | } 50 | }], 51 | "env": [{ 52 | "name": "CartPole-v0", 53 | "max_t": null, 54 | "max_frame": 10000 55 | }], 56 | "body": { 57 | "product": "outer", 58 | "num": 1 59 | }, 60 | "meta": { 61 | "distributed": false, 62 | "eval_frequency": 500, 63 | "log_frequency": 500, 64 | "max_session": 2, 65 | "max_trial": 1 66 | }, 67 | "search": { 68 | "agent": [{ 69 | "algorithm": { 70 | "gamma__grid_search": [0.5, 0.7, 0.90, 0.95, 0.99] 71 | } 72 | }] 73 | } 74 | } 75 | } 76 | -------------------------------------------------------------------------------- /slm_lab/spec/experimental/a3c/a3c_nstep_worker_search.json: -------------------------------------------------------------------------------- 1 | { 2 | "a3c_nstep_pong": { 3 | "agent": [{ 4 | "name": "A3C", 5 | "algorithm": { 6 | "name": "ActorCritic", 7 | "action_pdtype": "default", 8 | "action_policy": "default", 9 | "explore_var_spec": null, 10 | "gamma": 0.99, 11 | "lam": null, 12 | "num_step_returns": 5, 13 | "entropy_coef_spec": { 14 | "name": "no_decay", 15 | "start_val": 0.01, 16 | "end_val": 0.01, 17 | "start_step": 0, 18 | "end_step": 0 19 | }, 20 | "val_loss_coef": 0.5, 21 | "training_frequency": 5 22 | }, 23 | "memory": { 24 | "name": "OnPolicyBatchReplay", 25 | }, 26 | "net": { 27 | "type": "ConvNet", 28 | "shared": true, 29 | "conv_hid_layers": [ 30 | [32, 8, 4, 0, 1], 31 | [64, 4, 2, 0, 1], 32 | [32, 3, 1, 0, 1] 33 | ], 34 | "fc_hid_layers": [512], 35 | "hid_layers_activation": "relu", 36 | "init_fn": "orthogonal_", 37 | "normalize": true, 38 | "batch_norm": false, 39 | "clip_grad_val": 0.5, 40 | "use_same_optim": false, 41 | "loss_spec": { 42 | "name": "MSELoss" 43 | }, 44 | "actor_optim_spec": { 45 | "name": "GlobalAdam", 46 | "lr": 1e-4 47 | }, 48 | "critic_optim_spec": { 49 | "name": "GlobalAdam", 50 | "lr": 1e-4 51 | }, 52 | "lr_scheduler_spec": null, 53 | "gpu": false 54 | } 55 | }], 56 | "env": [{ 57 | "name": "PongNoFrameskip-v4", 58 | "frame_op": "concat", 59 | "frame_op_len": 4, 60 | "reward_scale": "sign", 61 | "num_envs": 8, 62 | "max_t": null, 63 | "max_frame": 1e7 64 | }], 65 | "body": { 66 | "product": "outer", 67 | "num": 1 68 | }, 69 | "meta": { 70 | "distributed": "synced", 71 | "log_frequency": 50000, 72 | "eval_frequency": 50000, 73 | "max_session": 16, 74 | "max_trial": 1, 75 | }, 76 | "search": { 77 | "meta": { 78 | "max_session__grid_search": [2, 4, 8, 16, 32] 79 | } 80 | } 81 | } 82 | } 83 | -------------------------------------------------------------------------------- /slm_lab/spec/experimental/dqn/ddqn_lunar.json: -------------------------------------------------------------------------------- 1 | { 2 | "ddqn_concat_lunar": { 3 | "agent": [{ 4 | "name": "DoubleDQN", 5 | "algorithm": { 6 | "name": "DoubleDQN", 7 | "action_pdtype": "Argmax", 8 | "action_policy": "epsilon_greedy", 9 | "explore_var_spec": { 10 | "name": "linear_decay", 11 | "start_val": 1.0, 12 | "end_val": 0.08, 13 | "start_step": 0, 14 | "end_step": 14000 15 | }, 16 | "gamma": 0.99, 17 | "training_batch_iter": 3, 18 | "training_iter": 4, 19 | "training_frequency": 4, 20 | "training_start_step": 32 21 | }, 22 | "memory": { 23 | "name": "Replay", 24 | "batch_size": 32, 25 | "max_size": 100000, 26 | "use_cer": false 27 | }, 28 | "net": { 29 | "type": "MLPNet", 30 | "hid_layers": [ 31 | 400, 32 | 200 33 | ], 34 | "hid_layers_activation": "relu", 35 | "clip_grad_val": null, 36 | "loss_spec": { 37 | "name": "MSELoss" 38 | }, 39 | "optim_spec": { 40 | "name": "Adam", 41 | "lr": 0.002 42 | }, 43 | "lr_scheduler_spec": { 44 | "name": "MultiStepLR", 45 | "milestones": [ 46 | 80000, 47 | 100000 48 | ], 49 | "gamma": 0.5 50 | }, 51 | "update_type": "polyak", 52 | "polyak_coef": 0.9, 53 | "gpu": false 54 | } 55 | }], 56 | "env": [{ 57 | "name": "LunarLander-v2", 58 | "frame_op": "concat", 59 | "frame_op_len": 4, 60 | "max_t": null, 61 | "max_frame": 250000 62 | }], 63 | "body": { 64 | "product": "outer", 65 | "num": 1 66 | }, 67 | "meta": { 68 | "distributed": false, 69 | "eval_frequency": 1000, 70 | "max_session": 4, 71 | "max_trial": 62, 72 | "search": "RandomSearch", 73 | }, 74 | } 75 | } 76 | -------------------------------------------------------------------------------- /slm_lab/spec/experimental/dqn/ddqn_lunar_search.json: -------------------------------------------------------------------------------- 1 | { 2 | "ddqn_concat_lunar": { 3 | "agent": [{ 4 | "name": "DoubleDQN", 5 | "algorithm": { 6 | "name": "DoubleDQN", 7 | "action_pdtype": "Argmax", 8 | "action_policy": "epsilon_greedy", 9 | "explore_var_spec": { 10 | "name": "linear_decay", 11 | "start_val": 1.0, 12 | "end_val": 0.08, 13 | "start_step": 0, 14 | "end_step": 100000 15 | }, 16 | "gamma": 0.99, 17 | "training_batch_iter": 2, 18 | "training_iter": 4, 19 | "training_frequency": 4, 20 | "training_start_step": 32 21 | }, 22 | "memory": { 23 | "name": "Replay", 24 | "batch_size": 32, 25 | "max_size": 100000, 26 | "use_cer": false 27 | }, 28 | "net": { 29 | "type": "MLPNet", 30 | "hid_layers": [ 31 | 400, 32 | 200 33 | ], 34 | "hid_layers_activation": "relu", 35 | "clip_grad_val": 2.0, 36 | "loss_spec": { 37 | "name": "MSELoss" 38 | }, 39 | "optim_spec": { 40 | "name": "Adam", 41 | "lr": 0.001 42 | }, 43 | "lr_scheduler_spec": null, 44 | "update_type": "polyak", 45 | "update_frequency": 0.9, 46 | "gpu": false 47 | } 48 | }], 49 | "env": [{ 50 | "name": "LunarLander-v2", 51 | "frame_op": "concat", 52 | "frame_op_len": 4, 53 | "max_t": null, 54 | "max_frame": 300000, 55 | "normalize_state": false 56 | }], 57 | "body": { 58 | "product": "outer", 59 | "num": 1 60 | }, 61 | "meta": { 62 | "distributed": false, 63 | "log_frequency": 1000, 64 | "eval_frequency": 1000, 65 | "max_session": 4, 66 | "max_trial": 32, 67 | "search": "RandomSearch", 68 | }, 69 | "search": { 70 | "agent": [{ 71 | "agent": { 72 | "training_batch_iter__randint": [1, 5] 73 | }, 74 | "net": { 75 | "update_frequency__choice": [0.8, 0.9, 0.925, 0.95], 76 | "optim_spec": { 77 | "lr__choice": [0.001, 0.003, 0.005, 0.007, 0.01] 78 | } 79 | }, 80 | }] 81 | } 82 | } 83 | } 84 | -------------------------------------------------------------------------------- /slm_lab/spec/experimental/dqn/dqn_cartpole_search.json: -------------------------------------------------------------------------------- 1 | { 2 | "dqn_cartpole": { 3 | "agent": [{ 4 | "name": "DQN", 5 | "algorithm": { 6 | "name": "DQN", 7 | "action_pdtype": "Argmax", 8 | "action_policy": "epsilon_greedy", 9 | "explore_var_spec": { 10 | "name": "linear_decay", 11 | "start_val": 1.0, 12 | "end_val": 0.1, 13 | "start_step": 0, 14 | "end_step": 1000, 15 | }, 16 | "gamma": 0.99, 17 | "training_batch_iter": 8, 18 | "training_iter": 4, 19 | "training_frequency": 4, 20 | "training_start_step": 32 21 | }, 22 | "memory": { 23 | "name": "Replay", 24 | "batch_size": 32, 25 | "max_size": 10000, 26 | "use_cer": false 27 | }, 28 | "net": { 29 | "type": "MLPNet", 30 | "hid_layers": [64], 31 | "hid_layers_activation": "selu", 32 | "clip_grad_val": 0.5, 33 | "loss_spec": { 34 | "name": "MSELoss" 35 | }, 36 | "optim_spec": { 37 | "name": "Adam", 38 | "lr": 0.01 39 | }, 40 | "lr_scheduler_spec": null, 41 | "update_type": "polyak", 42 | "update_frequency": 32, 43 | "polyak_coef": 0.1, 44 | "gpu": false 45 | } 46 | }], 47 | "env": [{ 48 | "name": "CartPole-v0", 49 | "max_t": null, 50 | "max_frame": 50000 51 | }], 52 | "body": { 53 | "product": "outer", 54 | "num": 1 55 | }, 56 | "meta": { 57 | "distributed": false, 58 | "eval_frequency": 1000, 59 | "max_session": 4, 60 | "max_trial": 32 61 | }, 62 | "search": { 63 | "agent": [{ 64 | "algorithm": { 65 | "gamma__uniform": [0.50, 1.0], 66 | "training_iter__randint": [1, 10] 67 | }, 68 | "net": { 69 | "optim_spec": { 70 | "lr__choice": [0.0001, 0.001, 0.01, 0.1] 71 | } 72 | } 73 | }] 74 | } 75 | } 76 | } 77 | -------------------------------------------------------------------------------- /slm_lab/spec/experimental/misc/random.json: -------------------------------------------------------------------------------- 1 | { 2 | "random_cartpole": { 3 | "agent": [{ 4 | "name": "Random", 5 | "algorithm": { 6 | "name": "Random" 7 | }, 8 | "memory": { 9 | "name": "OnPolicyReplay" 10 | }, 11 | "net": {} 12 | }], 13 | "env": [{ 14 | "name": "CartPole-v0", 15 | "max_t": null, 16 | "max_frame": 100 17 | }], 18 | "body": { 19 | "product": "outer", 20 | "num": 1 21 | }, 22 | "meta": { 23 | "distributed": false, 24 | "eval_frequency": 1000, 25 | "max_session": 5, 26 | "max_trial": 1, 27 | } 28 | }, 29 | "random_pendulum": { 30 | "agent": [{ 31 | "name": "Random", 32 | "algorithm": { 33 | "name": "Random" 34 | }, 35 | "memory": { 36 | "name": "OnPolicyReplay" 37 | }, 38 | "net": {} 39 | }], 40 | "env": [{ 41 | "name": "Pendulum-v0", 42 | "max_t": null, 43 | "max_frame": 100 44 | }], 45 | "body": { 46 | "product": "outer", 47 | "num": 1 48 | }, 49 | "meta": { 50 | "distributed": false, 51 | "eval_frequency": 1000, 52 | "max_session": 5, 53 | "max_trial": 1, 54 | } 55 | }, 56 | "random_2dball": { 57 | "agent": [{ 58 | "name": "Random", 59 | "algorithm": { 60 | "name": "Random" 61 | }, 62 | "memory": { 63 | "name": "OnPolicyReplay" 64 | }, 65 | "net": {} 66 | }], 67 | "env": [{ 68 | "name": "2DBall", 69 | "max_t": 1000, 70 | "max_frame": 100 71 | }], 72 | "body": { 73 | "product": "outer", 74 | "num": 1 75 | }, 76 | "meta": { 77 | "distributed": false, 78 | "eval_frequency": 1000, 79 | "max_session": 5, 80 | "max_trial": 1, 81 | } 82 | }, 83 | "random_breakout": { 84 | "agent": [{ 85 | "name": "Random", 86 | "algorithm": { 87 | "name": "Random" 88 | }, 89 | "memory": { 90 | "name": "OnPolicyReplay" 91 | }, 92 | "net": {} 93 | }], 94 | "env": [{ 95 | "name": "BreakoutNoFrameskip-v4", 96 | "frame_op": "concat", 97 | "frame_op_len": 4, 98 | "reward_scale": "sign", 99 | "num_envs": 8, 100 | "max_t": null, 101 | "max_frame": 1e7 102 | }], 103 | "body": { 104 | "product": "outer", 105 | "num": 1 106 | }, 107 | "meta": { 108 | "distributed": false, 109 | "log_frequency": 10000, 110 | "eval_frequency": 10000, 111 | "rigorous_eval": 0, 112 | "max_session": 1, 113 | "max_trial": 1, 114 | } 115 | } 116 | } 117 | -------------------------------------------------------------------------------- /slm_lab/spec/experimental/reinforce/reinforce_cartpole.json: -------------------------------------------------------------------------------- 1 | { 2 | "reinforce_cartpole": { 3 | "agent": [{ 4 | "name": "Reinforce", 5 | "algorithm": { 6 | "name": "Reinforce", 7 | "action_pdtype": "default", 8 | "action_policy": "default", 9 | "explore_var_spec": null, 10 | "gamma": 0.99, 11 | "entropy_coef_spec": { 12 | "name": "linear_decay", 13 | "start_val": 0.01, 14 | "end_val": 0.001, 15 | "start_step": 0, 16 | "end_step": 20000, 17 | }, 18 | "training_frequency": 1 19 | }, 20 | "memory": { 21 | "name": "OnPolicyReplay" 22 | }, 23 | "net": { 24 | "type": "MLPNet", 25 | "hid_layers": [64], 26 | "hid_layers_activation": "selu", 27 | "clip_grad_val": null, 28 | "loss_spec": { 29 | "name": "MSELoss" 30 | }, 31 | "optim_spec": { 32 | "name": "Adam", 33 | "lr": 0.002 34 | }, 35 | "lr_scheduler_spec": null 36 | } 37 | }], 38 | "env": [{ 39 | "name": "CartPole-v0", 40 | "max_t": null, 41 | "max_frame": 80000, 42 | }], 43 | "body": { 44 | "product": "outer", 45 | "num": 1 46 | }, 47 | "meta": { 48 | "distributed": false, 49 | "eval_frequency": 2000, 50 | "max_session": 1, 51 | "max_trial": 1, 52 | } 53 | }, 54 | "reinforce_cross_entropy_cartpole": { 55 | "agent": [{ 56 | "name": "Reinforce", 57 | "algorithm": { 58 | "name": "Reinforce", 59 | "action_pdtype": "Categorical", 60 | "action_policy": "default", 61 | "center_return": true, 62 | "explore_var_spec": null, 63 | "gamma": 0.99, 64 | "entropy_coef_spec": { 65 | "name": "linear_decay", 66 | "start_val": 0.01, 67 | "end_val": 0.001, 68 | "start_step": 0, 69 | "end_step": 20000, 70 | }, 71 | "training_frequency": 16 72 | }, 73 | "memory": { 74 | "name": "OnPolicyCrossEntropy", 75 | "cross_entropy": 0.5 76 | }, 77 | "net": { 78 | "type": "MLPNet", 79 | "hid_layers": [128], 80 | "hid_layers_activation": "selu", 81 | "clip_grad_val": null, 82 | "loss_spec": { 83 | "name": "CrossEntropyLoss" 84 | }, 85 | "optim_spec": { 86 | "name": "Adam", 87 | "lr": 0.02 88 | }, 89 | "lr_scheduler_spec": null 90 | } 91 | }], 92 | "env": [{ 93 | "name": "CartPole-v0", 94 | "max_t": null, 95 | "max_frame": 100000, 96 | }], 97 | "body": { 98 | "product": "outer", 99 | "num": 1 100 | }, 101 | "meta": { 102 | "distributed": false, 103 | "eval_frequency": 2000, 104 | "max_session": 4, 105 | "max_trial": 1, 106 | }, 107 | "search": { 108 | "agent": [{ 109 | "algorithm": { 110 | "gamma__grid_search": [0.1, 0.5, 0.7, 0.8, 0.90, 0.99, 0.999] 111 | } 112 | }] 113 | } 114 | } 115 | } 116 | -------------------------------------------------------------------------------- /slm_lab/spec/experimental/reinforce/reinforce_pendulum.json: -------------------------------------------------------------------------------- 1 | { 2 | "reinforce_pendulum": { 3 | "agent": [{ 4 | "name": "Reinforce", 5 | "algorithm": { 6 | "name": "Reinforce", 7 | "action_pdtype": "default", 8 | "action_policy": "default", 9 | "explore_var_spec": null, 10 | "gamma": 0.99, 11 | "entropy_coef_spec": { 12 | "name": "linear_decay", 13 | "start_val": 0.01, 14 | "end_val": 0.001, 15 | "start_step": 0, 16 | "end_step": 20000, 17 | }, 18 | "training_frequency": 1 19 | }, 20 | "memory": { 21 | "name": "OnPolicyReplay" 22 | }, 23 | "net": { 24 | "type": "MLPNet", 25 | "hid_layers": [64], 26 | "hid_layers_activation": "selu", 27 | "clip_grad_val": null, 28 | "loss_spec": { 29 | "name": "MSELoss" 30 | }, 31 | "optim_spec": { 32 | "name": "Adam", 33 | "lr": 0.002 34 | }, 35 | "lr_scheduler_spec": null 36 | } 37 | }], 38 | "env": [{ 39 | "name": "Pendulum-v0", 40 | "max_t": null, 41 | "max_frame": 80000, 42 | }], 43 | "body": { 44 | "product": "outer", 45 | "num": 1 46 | }, 47 | "meta": { 48 | "distributed": false, 49 | "eval_frequency": 2000, 50 | "max_session": 1, 51 | "max_trial": 1, 52 | } 53 | } 54 | } 55 | -------------------------------------------------------------------------------- /slm_lab/spec/experimental/sac/sac_cartpole.json: -------------------------------------------------------------------------------- 1 | { 2 | "sac_cartpole": { 3 | "agent": [{ 4 | "name": "SoftActorCritic", 5 | "algorithm": { 6 | "name": "SoftActorCritic", 7 | "action_pdtype": "GumbelSoftmax", 8 | "action_policy": "default", 9 | "gamma": 0.99, 10 | "training_frequency": 1 11 | }, 12 | "memory": { 13 | "name": "Replay", 14 | "batch_size": 256, 15 | "max_size": 100000, 16 | "use_cer": true 17 | }, 18 | "net": { 19 | "type": "MLPNet", 20 | "hid_layers": [64], 21 | "hid_layers_activation": "relu", 22 | "init_fn": "orthogonal_", 23 | "clip_grad_val": 0.5, 24 | "loss_spec": { 25 | "name": "MSELoss" 26 | }, 27 | "optim_spec": { 28 | "name": "Adam", 29 | "lr": 0.005 30 | }, 31 | "lr_scheduler_spec": null, 32 | "update_type": "polyak", 33 | "update_frequency": 1, 34 | "polyak_coef": 0.005, 35 | "gpu": false 36 | } 37 | }], 38 | "env": [{ 39 | "name": "CartPole-v0", 40 | "max_t": null, 41 | "max_frame": 200000, 42 | "num_envs": 8, 43 | "normalize_state": false 44 | }], 45 | "body": { 46 | "product": "outer", 47 | "num": 1 48 | }, 49 | "meta": { 50 | "distributed": false, 51 | "log_frequency": 500, 52 | "eval_frequency": 500, 53 | "rigorous_eval": 0, 54 | "max_session": 1, 55 | "max_trial": 1, 56 | } 57 | } 58 | } 59 | -------------------------------------------------------------------------------- /slm_lab/spec/experimental/sarsa/sarsa_cartpole.json: -------------------------------------------------------------------------------- 1 | { 2 | "sarsa_epsilon_greedy_cartpole": { 3 | "agent": [{ 4 | "name": "SARSA", 5 | "algorithm": { 6 | "name": "SARSA", 7 | "action_pdtype": "Argmax", 8 | "action_policy": "epsilon_greedy", 9 | "explore_var_spec": { 10 | "name": "linear_decay", 11 | "start_val": 1.0, 12 | "end_val": 0.05, 13 | "start_step": 0, 14 | "end_step": 10000 15 | }, 16 | "gamma": 0.99, 17 | "training_frequency": 32 18 | }, 19 | "memory": { 20 | "name": "OnPolicyBatchReplay" 21 | }, 22 | "net": { 23 | "type": "MLPNet", 24 | "hid_layers": [64], 25 | "hid_layers_activation": "selu", 26 | "clip_grad_val": 0.5, 27 | "loss_spec": { 28 | "name": "MSELoss" 29 | }, 30 | "optim_spec": { 31 | "name": "RMSprop", 32 | "lr": 0.01 33 | }, 34 | "lr_scheduler_spec": null 35 | } 36 | }], 37 | "env": [{ 38 | "name": "CartPole-v0", 39 | "max_t": null, 40 | "max_frame": 80000 41 | }], 42 | "body": { 43 | "product": "outer", 44 | "num": 1 45 | }, 46 | "meta": { 47 | "distributed": false, 48 | "eval_frequency": 2000, 49 | "max_trial": 1, 50 | "max_session": 1 51 | } 52 | }, 53 | "sarsa_boltzmann_cartpole": { 54 | "agent": [{ 55 | "name": "SARSA", 56 | "algorithm": { 57 | "name": "SARSA", 58 | "action_pdtype": "Argmax", 59 | "action_policy": "boltzmann", 60 | "explore_var_spec": { 61 | "name": "linear_decay", 62 | "start_val": 3.0, 63 | "end_val": 1.0, 64 | "start_step": 0, 65 | "end_step": 10000 66 | }, 67 | "gamma": 0.99, 68 | "training_frequency": 32 69 | }, 70 | "memory": { 71 | "name": "OnPolicyBatchReplay" 72 | }, 73 | "net": { 74 | "type": "MLPNet", 75 | "hid_layers": [64], 76 | "hid_layers_activation": "selu", 77 | "clip_grad_val": 0.5, 78 | "loss_spec": { 79 | "name": "MSELoss" 80 | }, 81 | "optim_spec": { 82 | "name": "RMSprop", 83 | "lr": 0.01 84 | }, 85 | "lr_scheduler_spec": null 86 | } 87 | }], 88 | "env": [{ 89 | "name": "CartPole-v0", 90 | "max_t": null, 91 | "max_frame": 80000 92 | }], 93 | "body": { 94 | "product": "outer", 95 | "num": 1 96 | }, 97 | "meta": { 98 | "distributed": false, 99 | "eval_frequency": 2000, 100 | "max_trial": 1, 101 | "max_session": 1 102 | } 103 | } 104 | } 105 | -------------------------------------------------------------------------------- /test/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kengz/SLM-Lab/cae945a294cb111ee8e568bb4465ee74c501478b/test/__init__.py -------------------------------------------------------------------------------- /test/agent/net/test_conv.py: -------------------------------------------------------------------------------- 1 | from copy import deepcopy 2 | from slm_lab.env.base import Clock 3 | from slm_lab.agent.net import net_util 4 | from slm_lab.agent.net.conv import ConvNet 5 | import torch 6 | import torch.nn as nn 7 | 8 | net_spec = { 9 | "type": "ConvNet", 10 | "shared": True, 11 | "conv_hid_layers": [ 12 | [32, 8, 4, 0, 1], 13 | [64, 4, 2, 0, 1], 14 | [64, 3, 1, 0, 1] 15 | ], 16 | "fc_hid_layers": [512], 17 | "hid_layers_activation": "relu", 18 | "init_fn": "xavier_uniform_", 19 | "batch_norm": False, 20 | "clip_grad_val": 1.0, 21 | "loss_spec": { 22 | "name": "SmoothL1Loss" 23 | }, 24 | "optim_spec": { 25 | "name": "Adam", 26 | "lr": 0.02 27 | }, 28 | "lr_scheduler_spec": { 29 | "name": "StepLR", 30 | "step_size": 30, 31 | "gamma": 0.1 32 | }, 33 | "gpu": True 34 | } 35 | in_dim = (4, 84, 84) 36 | out_dim = 3 37 | batch_size = 16 38 | net = ConvNet(net_spec, in_dim, out_dim) 39 | # init net optimizer and its lr scheduler 40 | optim = net_util.get_optim(net, net.optim_spec) 41 | lr_scheduler = net_util.get_lr_scheduler(optim, net.lr_scheduler_spec) 42 | x = torch.rand((batch_size,) + in_dim) 43 | 44 | 45 | def test_init(): 46 | net = ConvNet(net_spec, in_dim, out_dim) 47 | assert isinstance(net, nn.Module) 48 | assert hasattr(net, 'conv_model') 49 | assert hasattr(net, 'fc_model') 50 | assert hasattr(net, 'model_tail') 51 | assert not hasattr(net, 'model_tails') 52 | 53 | 54 | def test_forward(): 55 | y = net.forward(x) 56 | assert y.shape == (batch_size, out_dim) 57 | 58 | 59 | def test_train_step(): 60 | y = torch.rand((batch_size, out_dim)) 61 | clock = Clock(100, 1) 62 | loss = net.loss_fn(net.forward(x), y) 63 | net.train_step(loss, optim, lr_scheduler, clock=clock) 64 | assert loss != 0.0 65 | 66 | 67 | def test_no_fc(): 68 | no_fc_net_spec = deepcopy(net_spec) 69 | no_fc_net_spec['fc_hid_layers'] = [] 70 | net = ConvNet(no_fc_net_spec, in_dim, out_dim) 71 | assert isinstance(net, nn.Module) 72 | assert hasattr(net, 'conv_model') 73 | assert not hasattr(net, 'fc_model') 74 | assert hasattr(net, 'model_tail') 75 | assert not hasattr(net, 'model_tails') 76 | 77 | y = net.forward(x) 78 | assert y.shape == (batch_size, out_dim) 79 | 80 | 81 | def test_multitails(): 82 | net = ConvNet(net_spec, in_dim, [3, 4]) 83 | assert isinstance(net, nn.Module) 84 | assert hasattr(net, 'conv_model') 85 | assert hasattr(net, 'fc_model') 86 | assert not hasattr(net, 'model_tail') 87 | assert hasattr(net, 'model_tails') 88 | assert len(net.model_tails) == 2 89 | 90 | y = net.forward(x) 91 | assert len(y) == 2 92 | assert y[0].shape == (batch_size, 3) 93 | assert y[1].shape == (batch_size, 4) 94 | -------------------------------------------------------------------------------- /test/agent/net/test_mlp.py: -------------------------------------------------------------------------------- 1 | from copy import deepcopy 2 | from slm_lab.env.base import Clock 3 | from slm_lab.agent.net import net_util 4 | from slm_lab.agent.net.mlp import MLPNet 5 | import torch 6 | import torch.nn as nn 7 | 8 | net_spec = { 9 | "type": "MLPNet", 10 | "shared": True, 11 | "hid_layers": [32], 12 | "hid_layers_activation": "relu", 13 | "init_fn": "xavier_uniform_", 14 | "clip_grad_val": 1.0, 15 | "loss_spec": { 16 | "name": "MSELoss" 17 | }, 18 | "optim_spec": { 19 | "name": "Adam", 20 | "lr": 0.02 21 | }, 22 | "lr_scheduler_spec": { 23 | "name": "StepLR", 24 | "step_size": 30, 25 | "gamma": 0.1 26 | }, 27 | "update_type": "replace", 28 | "update_frequency": 1, 29 | "polyak_coef": 0.9, 30 | "gpu": True 31 | } 32 | in_dim = 10 33 | out_dim = 3 34 | batch_size = 16 35 | net = MLPNet(net_spec, in_dim, out_dim) 36 | # init net optimizer and its lr scheduler 37 | optim = net_util.get_optim(net, net.optim_spec) 38 | lr_scheduler = net_util.get_lr_scheduler(optim, net.lr_scheduler_spec) 39 | x = torch.rand((batch_size, in_dim)) 40 | 41 | 42 | def test_init(): 43 | net = MLPNet(net_spec, in_dim, out_dim) 44 | assert isinstance(net, nn.Module) 45 | assert hasattr(net, 'model') 46 | assert hasattr(net, 'model_tail') 47 | assert not hasattr(net, 'model_tails') 48 | 49 | 50 | def test_forward(): 51 | y = net.forward(x) 52 | assert y.shape == (batch_size, out_dim) 53 | 54 | 55 | def test_train_step(): 56 | y = torch.rand((batch_size, out_dim)) 57 | clock = Clock(100, 1) 58 | loss = net.loss_fn(net.forward(x), y) 59 | net.train_step(loss, optim, lr_scheduler, clock=clock) 60 | assert loss != 0.0 61 | 62 | 63 | def test_no_lr_scheduler(): 64 | nopo_lrs_net_spec = deepcopy(net_spec) 65 | nopo_lrs_net_spec['lr_scheduler_spec'] = None 66 | net = MLPNet(nopo_lrs_net_spec, in_dim, out_dim) 67 | assert isinstance(net, nn.Module) 68 | assert hasattr(net, 'model') 69 | assert hasattr(net, 'model_tail') 70 | assert not hasattr(net, 'model_tails') 71 | 72 | y = net.forward(x) 73 | assert y.shape == (batch_size, out_dim) 74 | 75 | 76 | def test_multitails(): 77 | net = MLPNet(net_spec, in_dim, [3, 4]) 78 | assert isinstance(net, nn.Module) 79 | assert hasattr(net, 'model') 80 | assert not hasattr(net, 'model_tail') 81 | assert hasattr(net, 'model_tails') 82 | assert len(net.model_tails) == 2 83 | 84 | y = net.forward(x) 85 | assert len(y) == 2 86 | assert y[0].shape == (batch_size, 3) 87 | assert y[1].shape == (batch_size, 4) 88 | -------------------------------------------------------------------------------- /test/agent/net/test_recurrent.py: -------------------------------------------------------------------------------- 1 | from copy import deepcopy 2 | from slm_lab.env.base import Clock 3 | from slm_lab.agent.net import net_util 4 | from slm_lab.agent.net.recurrent import RecurrentNet 5 | import pytest 6 | import torch 7 | import torch.nn as nn 8 | 9 | net_spec = { 10 | "type": "RecurrentNet", 11 | "shared": True, 12 | "cell_type": "GRU", 13 | "fc_hid_layers": [10], 14 | "hid_layers_activation": "relu", 15 | "rnn_hidden_size": 64, 16 | "rnn_num_layers": 2, 17 | "bidirectional": False, 18 | "seq_len": 4, 19 | "init_fn": "xavier_uniform_", 20 | "clip_grad_val": 1.0, 21 | "loss_spec": { 22 | "name": "SmoothL1Loss" 23 | }, 24 | "optim_spec": { 25 | "name": "Adam", 26 | "lr": 0.02 27 | }, 28 | "lr_scheduler_spec": { 29 | "name": "StepLR", 30 | "step_size": 30, 31 | "gamma": 0.1 32 | }, 33 | "gpu": True 34 | } 35 | state_dim = 10 36 | out_dim = 3 37 | batch_size = 16 38 | seq_len = net_spec['seq_len'] 39 | in_dim = (seq_len, state_dim) 40 | net = RecurrentNet(net_spec, in_dim, out_dim) 41 | # init net optimizer and its lr scheduler 42 | optim = net_util.get_optim(net, net.optim_spec) 43 | lr_scheduler = net_util.get_lr_scheduler(optim, net.lr_scheduler_spec) 44 | x = torch.rand((batch_size, seq_len, state_dim)) 45 | 46 | 47 | def test_init(): 48 | net = RecurrentNet(net_spec, in_dim, out_dim) 49 | assert isinstance(net, nn.Module) 50 | assert hasattr(net, 'fc_model') 51 | assert hasattr(net, 'rnn_model') 52 | assert hasattr(net, 'model_tail') 53 | assert not hasattr(net, 'model_tails') 54 | assert net.rnn_model.bidirectional == False 55 | 56 | 57 | def test_forward(): 58 | y = net.forward(x) 59 | assert y.shape == (batch_size, out_dim) 60 | 61 | 62 | def test_train_step(): 63 | y = torch.rand((batch_size, out_dim)) 64 | clock = Clock(100, 1) 65 | loss = net.loss_fn(net.forward(x), y) 66 | net.train_step(loss, optim, lr_scheduler, clock=clock) 67 | assert loss != 0.0 68 | 69 | 70 | @pytest.mark.parametrize('bidirectional', (False, True)) 71 | @pytest.mark.parametrize('cell_type', ('RNN', 'LSTM', 'GRU')) 72 | def test_variant(bidirectional, cell_type): 73 | var_net_spec = deepcopy(net_spec) 74 | var_net_spec['bidirectional'] = bidirectional 75 | var_net_spec['cell_type'] = cell_type 76 | net = RecurrentNet(var_net_spec, in_dim, out_dim) 77 | assert isinstance(net, nn.Module) 78 | assert hasattr(net, 'fc_model') 79 | assert hasattr(net, 'rnn_model') 80 | assert hasattr(net, 'model_tail') 81 | assert not hasattr(net, 'model_tails') 82 | assert net.rnn_model.bidirectional == bidirectional 83 | 84 | y = net.forward(x) 85 | assert y.shape == (batch_size, out_dim) 86 | 87 | 88 | def test_no_fc(): 89 | no_fc_net_spec = deepcopy(net_spec) 90 | no_fc_net_spec['fc_hid_layers'] = [] 91 | net = RecurrentNet(no_fc_net_spec, in_dim, out_dim) 92 | assert isinstance(net, nn.Module) 93 | assert not hasattr(net, 'fc_model') 94 | assert hasattr(net, 'rnn_model') 95 | assert hasattr(net, 'model_tail') 96 | assert not hasattr(net, 'model_tails') 97 | 98 | y = net.forward(x) 99 | assert y.shape == (batch_size, out_dim) 100 | 101 | 102 | def test_multitails(): 103 | net = RecurrentNet(net_spec, in_dim, [3, 4]) 104 | assert isinstance(net, nn.Module) 105 | assert hasattr(net, 'fc_model') 106 | assert hasattr(net, 'rnn_model') 107 | assert not hasattr(net, 'model_tail') 108 | assert hasattr(net, 'model_tails') 109 | assert len(net.model_tails) == 2 110 | 111 | y = net.forward(x) 112 | assert len(y) == 2 113 | assert y[0].shape == (batch_size, 3) 114 | assert y[1].shape == (batch_size, 4) 115 | -------------------------------------------------------------------------------- /test/env/test_registration.py: -------------------------------------------------------------------------------- 1 | from slm_lab.env.registration import get_env_path 2 | import pytest 3 | 4 | 5 | @pytest.mark.skip(reason="Not implemented yet") 6 | def test_get_env_path(): 7 | assert 'node_modules/slm-env-3dball/build/3dball' in get_env_path( 8 | '3dball') 9 | -------------------------------------------------------------------------------- /test/experiment/test_control.py: -------------------------------------------------------------------------------- 1 | from copy import deepcopy 2 | from flaky import flaky 3 | from slm_lab.experiment import analysis 4 | from slm_lab.experiment.control import Session, Trial, Experiment 5 | from slm_lab.spec import spec_util 6 | import pandas as pd 7 | import pytest 8 | 9 | 10 | def test_session(test_spec): 11 | spec_util.tick(test_spec, 'trial') 12 | spec_util.tick(test_spec, 'session') 13 | spec_util.save(test_spec, unit='trial') 14 | session = Session(test_spec) 15 | session_metrics = session.run() 16 | assert isinstance(session_metrics, dict) 17 | 18 | 19 | def test_trial(test_spec): 20 | spec_util.tick(test_spec, 'trial') 21 | spec_util.save(test_spec, unit='trial') 22 | trial = Trial(test_spec) 23 | trial_metrics = trial.run() 24 | assert isinstance(trial_metrics, dict) 25 | 26 | 27 | def test_trial_demo(): 28 | spec = spec_util.get('demo.json', 'dqn_cartpole') 29 | spec_util.save(spec, unit='experiment') 30 | spec = spec_util.override_spec(spec, 'test') 31 | spec_util.tick(spec, 'trial') 32 | trial_metrics = Trial(spec).run() 33 | assert isinstance(trial_metrics, dict) 34 | 35 | 36 | @pytest.mark.skip(reason="Unstable") 37 | @flaky 38 | def test_demo_performance(): 39 | spec = spec_util.get('demo.json', 'dqn_cartpole') 40 | spec_util.save(spec, unit='experiment') 41 | for env_spec in spec['env']: 42 | env_spec['max_frame'] = 2000 43 | spec_util.tick(spec, 'trial') 44 | trial = Trial(spec) 45 | spec_util.tick(spec, 'session') 46 | session = Session(spec) 47 | session.run() 48 | last_reward = session.agent.body.train_df.iloc[-1]['total_reward'] 49 | assert last_reward > 50, f'last_reward is too low: {last_reward}' 50 | 51 | 52 | @pytest.mark.skip(reason="Cant run on CI") 53 | def test_experiment(): 54 | spec = spec_util.get('demo.json', 'dqn_cartpole') 55 | spec_util.save(spec, unit='experiment') 56 | spec = spec_util.override_spec(spec, 'test') 57 | spec_util.tick(spec, 'experiment') 58 | experiment_df = Experiment(spec).run() 59 | assert isinstance(experiment_df, pd.DataFrame) 60 | -------------------------------------------------------------------------------- /test/experiment/test_monitor.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | # TODO add these tests 4 | 5 | 6 | def test_clock(): 7 | return 8 | 9 | 10 | def test_body(): 11 | return 12 | -------------------------------------------------------------------------------- /test/fixture/lib/util/test_df.csv: -------------------------------------------------------------------------------- 1 | integer,letter,square 2 | 1,a,1 3 | 2,b,4 4 | 3,c,9 5 | -------------------------------------------------------------------------------- /test/fixture/lib/util/test_dict.json: -------------------------------------------------------------------------------- 1 | { 2 | "a": 1, 3 | "b": 2, 4 | "c": 3 5 | } -------------------------------------------------------------------------------- /test/fixture/lib/util/test_dict.yml: -------------------------------------------------------------------------------- 1 | {a: 1, b: 2, c: 3} 2 | -------------------------------------------------------------------------------- /test/fixture/lib/util/test_str.txt: -------------------------------------------------------------------------------- 1 | lorem ipsum dolor -------------------------------------------------------------------------------- /test/lib/test_distribution.py: -------------------------------------------------------------------------------- 1 | from flaky import flaky 2 | from slm_lab.lib import distribution 3 | import pytest 4 | import torch 5 | 6 | 7 | @pytest.mark.parametrize('pdparam_type', [ 8 | 'probs', 'logits' 9 | ]) 10 | def test_argmax(pdparam_type): 11 | pdparam = torch.tensor([1.1, 10.0, 2.1]) 12 | # test both probs or logits 13 | pd = distribution.Argmax(**{pdparam_type: pdparam}) 14 | for _ in range(10): 15 | assert pd.sample().item() == 1 16 | assert torch.equal(pd.probs, torch.tensor([0., 1., 0.])) 17 | 18 | 19 | @flaky 20 | @pytest.mark.parametrize('pdparam_type', [ 21 | 'probs', 'logits' 22 | ]) 23 | def test_gumbel_categorical(pdparam_type): 24 | pdparam = torch.tensor([1.1, 10.0, 2.1]) 25 | pd = distribution.GumbelSoftmax(**{pdparam_type: pdparam, 'temperature': torch.tensor(1.0)}) 26 | for _ in range(10): 27 | assert torch.is_tensor(pd.sample()) 28 | 29 | 30 | @pytest.mark.parametrize('pdparam_type', [ 31 | 'probs', 'logits' 32 | ]) 33 | def test_multicategorical(pdparam_type): 34 | pdparam0 = torch.tensor([10.0, 0.0, 0.0]) 35 | pdparam1 = torch.tensor([0.0, 10.0, 0.0]) 36 | pdparam2 = torch.tensor([0.0, 0.0, 10.0]) 37 | pdparams = [pdparam0, pdparam1, pdparam2] 38 | # use a probs 39 | pd = distribution.MultiCategorical(**{pdparam_type: pdparams}) 40 | assert isinstance(pd.probs, list) 41 | # test probs only since if init from logits, probs will be close but not precise 42 | if pdparam_type == 'probs': 43 | assert torch.equal(pd.probs[0], torch.tensor([1., 0., 0.])) 44 | assert torch.equal(pd.probs[1], torch.tensor([0., 1., 0.])) 45 | assert torch.equal(pd.probs[2], torch.tensor([0., 0., 1.])) 46 | for _ in range(10): 47 | assert torch.equal(pd.sample(), torch.tensor([0, 1, 2])) 48 | -------------------------------------------------------------------------------- /test/lib/test_logger.py: -------------------------------------------------------------------------------- 1 | from slm_lab.lib import logger 2 | 3 | 4 | def test_logger(test_str): 5 | logger.critical(test_str) 6 | logger.debug(test_str) 7 | logger.error(test_str) 8 | logger.exception(test_str) 9 | logger.info(test_str) 10 | logger.warning(test_str) 11 | -------------------------------------------------------------------------------- /test/lib/test_math_util.py: -------------------------------------------------------------------------------- 1 | from slm_lab.lib import math_util 2 | import numpy as np 3 | import pytest 4 | import torch 5 | 6 | 7 | @pytest.mark.parametrize('base_shape', [ 8 | [], # scalar 9 | [2], # vector 10 | [4, 84, 84], # image 11 | ]) 12 | def test_venv_pack(base_shape): 13 | batch_size = 5 14 | num_envs = 4 15 | batch_arr = torch.zeros([batch_size, num_envs] + base_shape) 16 | unpacked_arr = math_util.venv_unpack(batch_arr) 17 | packed_arr = math_util.venv_pack(unpacked_arr, num_envs) 18 | assert list(packed_arr.shape) == [batch_size, num_envs] + base_shape 19 | 20 | 21 | @pytest.mark.parametrize('base_shape', [ 22 | [], # scalar 23 | [2], # vector 24 | [4, 84, 84], # image 25 | ]) 26 | def test_venv_unpack(base_shape): 27 | batch_size = 5 28 | num_envs = 4 29 | batch_arr = torch.zeros([batch_size, num_envs] + base_shape) 30 | unpacked_arr = math_util.venv_unpack(batch_arr) 31 | assert list(unpacked_arr.shape) == [batch_size * num_envs] + base_shape 32 | 33 | 34 | def test_calc_gaes(): 35 | rewards = torch.tensor([1., 0., 1., 1., 0., 1., 1., 1.]) 36 | dones = torch.tensor([0., 0., 1., 1., 0., 0., 0., 0.]) 37 | v_preds = torch.tensor([1.1, 0.1, 1.1, 1.1, 0.1, 1.1, 1.1, 1.1, 1.1]) 38 | assert len(v_preds) == len(rewards) + 1 # includes last state 39 | gamma = 0.99 40 | lam = 0.95 41 | gaes = math_util.calc_gaes(rewards, dones, v_preds, gamma, lam) 42 | res = torch.tensor([0.84070045, 0.89495, -0.1, -0.1, 3.616724, 2.7939649, 1.9191545, 0.989]) 43 | # use allclose instead of equal to account for atol 44 | assert torch.allclose(gaes, res) 45 | 46 | 47 | @pytest.mark.parametrize('start_val, end_val, start_step, end_step, step, correct', [ 48 | (0.1, 0.0, 0, 100, 0, 0.1), 49 | (0.1, 0.0, 0, 100, 50, 0.05), 50 | (0.1, 0.0, 0, 100, 100, 0.0), 51 | (0.1, 0.0, 0, 100, 150, 0.0), 52 | (0.1, 0.0, 100, 200, 50, 0.1), 53 | (0.1, 0.0, 100, 200, 100, 0.1), 54 | (0.1, 0.0, 100, 200, 150, 0.05), 55 | (0.1, 0.0, 100, 200, 200, 0.0), 56 | (0.1, 0.0, 100, 200, 250, 0.0), 57 | ]) 58 | def test_linear_decay(start_val, end_val, start_step, end_step, step, correct): 59 | assert math_util.linear_decay(start_val, end_val, start_step, end_step, step) == correct 60 | 61 | 62 | @pytest.mark.parametrize('start_val, end_val, start_step, end_step, step, correct', [ 63 | (1.0, 0.0, 0, 100, 0, 1.0), 64 | (1.0, 0.0, 0, 100, 5, 0.9), 65 | (1.0, 0.0, 0, 100, 10, 0.81), 66 | (1.0, 0.0, 0, 100, 25, 0.59049), 67 | (1.0, 0.0, 0, 100, 50, 0.3486784401), 68 | (1.0, 0.0, 0, 100, 100, 0.0), 69 | (1.0, 0.0, 0, 100, 150, 0.0), 70 | (1.0, 0.0, 100, 200, 0, 1.0), 71 | (1.0, 0.0, 100, 200, 50, 1.0), 72 | (1.0, 0.0, 100, 200, 100, 1.0), 73 | (1.0, 0.0, 100, 200, 105, 0.9), 74 | (1.0, 0.0, 100, 200, 125, 0.59049), 75 | (1.0, 0.0, 100, 200, 200, 0.0), 76 | (1.0, 0.0, 100, 200, 250, 0.0), 77 | ]) 78 | def test_rate_decay(start_val, end_val, start_step, end_step, step, correct): 79 | np.testing.assert_almost_equal(math_util.rate_decay(start_val, end_val, start_step, end_step, step), correct) 80 | 81 | def test_calc_q_value_logits(): 82 | state_value = torch.tensor([[1.], [2.], [3.]]) 83 | advantages = torch.tensor([ 84 | [0., 1.], 85 | [1., 1.], 86 | [1., 0.]]) 87 | result = torch.tensor([ 88 | [0.5, 1.5], 89 | [2.0, 2.0], 90 | [3.5, 2.5]]) 91 | out = math_util.calc_q_value_logits(state_value, advantages) 92 | assert torch.allclose(out, result) 93 | -------------------------------------------------------------------------------- /test/spec/test_spec_util.py: -------------------------------------------------------------------------------- 1 | from slm_lab.spec import spec_util 2 | import numpy as np 3 | import pytest 4 | 5 | 6 | def test_check(): 7 | spec = spec_util.get('experimental/misc/base.json', 'base_case_openai') 8 | assert spec_util.check(spec) 9 | 10 | 11 | def test_check_all(): 12 | assert spec_util.check_all() 13 | 14 | 15 | def test_get(): 16 | spec = spec_util.get('experimental/misc/base.json', 'base_case_openai') 17 | assert spec is not None 18 | --------------------------------------------------------------------------------