├── .codeclimate.yml
├── .dockerignore
├── .github
    ├── ISSUE_TEMPLATE
    │   ├── bug_report.md
    │   └── feature_request.md
    ├── PULL_REQUEST_TEMPLATE.md
    └── workflows
    │   └── ci.yml
├── .gitignore
├── CODE_OF_CONDUCT.md
├── Dockerfile
├── LICENSE
├── README.md
├── bin
    ├── plot_benchmark.py
    ├── plot_script.py
    ├── setup
    ├── setup_arch
    ├── setup_arch_extra
    ├── setup_macOS
    ├── setup_macOS_extra
    ├── setup_ubuntu
    └── setup_ubuntu_extra
├── environment-byo.yml
├── environment.yml
├── job
    ├── atari_benchmark_adv.json
    ├── atari_benchmark_base.json
    ├── experiments.json
    └── roboschool_benchmark.json
├── package.json
├── run_lab.py
├── setup.py
├── slm_lab
    ├── __init__.py
    ├── agent
    │   ├── __init__.py
    │   ├── algorithm
    │   │   ├── __init__.py
    │   │   ├── actor_critic.py
    │   │   ├── base.py
    │   │   ├── dqn.py
    │   │   ├── policy_util.py
    │   │   ├── ppo.py
    │   │   ├── random.py
    │   │   ├── reinforce.py
    │   │   ├── sac.py
    │   │   ├── sarsa.py
    │   │   └── sil.py
    │   ├── memory
    │   │   ├── __init__.py
    │   │   ├── base.py
    │   │   ├── onpolicy.py
    │   │   ├── prioritized.py
    │   │   └── replay.py
    │   └── net
    │   │   ├── __init__.py
    │   │   ├── base.py
    │   │   ├── conv.py
    │   │   ├── mlp.py
    │   │   ├── net_util.py
    │   │   ├── q_net.py
    │   │   └── recurrent.py
    ├── env
    │   ├── __init__.py
    │   ├── base.py
    │   ├── openai.py
    │   ├── registration.py
    │   ├── unity.py
    │   ├── vec_env.py
    │   ├── vizdoom
    │   │   ├── __init__.py
    │   │   ├── cfgs
    │   │   │   ├── README.md
    │   │   │   ├── __init__.py
    │   │   │   ├── basic.cfg
    │   │   │   ├── basic.wad
    │   │   │   ├── bots.cfg
    │   │   │   ├── cig.cfg
    │   │   │   ├── cig.wad
    │   │   │   ├── cig_with_unknown.wad
    │   │   │   ├── deadly_corridor.cfg
    │   │   │   ├── deadly_corridor.wad
    │   │   │   ├── deathmatch.cfg
    │   │   │   ├── deathmatch.wad
    │   │   │   ├── defend_the_center.cfg
    │   │   │   ├── defend_the_center.wad
    │   │   │   ├── defend_the_line.cfg
    │   │   │   ├── defend_the_line.wad
    │   │   │   ├── health_gathering.cfg
    │   │   │   ├── health_gathering.wad
    │   │   │   ├── health_gathering_supreme.cfg
    │   │   │   ├── health_gathering_supreme.wad
    │   │   │   ├── learning.cfg
    │   │   │   ├── multi.cfg
    │   │   │   ├── multi_deathmatch.wad
    │   │   │   ├── multi_duel.cfg
    │   │   │   ├── multi_duel.wad
    │   │   │   ├── my_way_home.cfg
    │   │   │   ├── my_way_home.wad
    │   │   │   ├── oblige.cfg
    │   │   │   ├── predict_position.cfg
    │   │   │   ├── predict_position.wad
    │   │   │   ├── rocket_basic.cfg
    │   │   │   ├── rocket_basic.wad
    │   │   │   ├── simpler_basic.cfg
    │   │   │   ├── simpler_basic.wad
    │   │   │   ├── take_cover.cfg
    │   │   │   └── take_cover.wad
    │   │   └── vizdoom_env.py
    │   └── wrapper.py
    ├── experiment
    │   ├── __init__.py
    │   ├── analysis.py
    │   ├── control.py
    │   ├── retro_analysis.py
    │   └── search.py
    ├── lib
    │   ├── __init__.py
    │   ├── decorator.py
    │   ├── distribution.py
    │   ├── logger.py
    │   ├── math_util.py
    │   ├── optimizer.py
    │   ├── util.py
    │   └── viz.py
    └── spec
    │   ├── __init__.py
    │   ├── _random_baseline.json
    │   ├── benchmark
    │       ├── a2c
    │       │   ├── a2c_gae_atari.json
    │       │   ├── a2c_gae_cartpole.json
    │       │   ├── a2c_gae_cont.json
    │       │   ├── a2c_gae_lunar.json
    │       │   ├── a2c_gae_pong.json
    │       │   ├── a2c_gae_qbert.json
    │       │   ├── a2c_gae_roboschool.json
    │       │   ├── a2c_gae_unity.json
    │       │   ├── a2c_nstep_atari.json
    │       │   ├── a2c_nstep_cont.json
    │       │   ├── a2c_nstep_lunar.json
    │       │   ├── a2c_nstep_pong.json
    │       │   ├── a2c_nstep_qbert.json
    │       │   ├── a2c_nstep_roboschool.json
    │       │   ├── a2c_nstep_unity.json
    │       │   └── a2c_videopinball.json
    │       ├── a3c
    │       │   ├── a3c_gae_atari.json
    │       │   ├── a3c_gae_pong.json
    │       │   ├── a3c_gae_qbert.json
    │       │   ├── a3c_nstep_atari.json
    │       │   ├── a3c_nstep_pong.json
    │       │   ├── a3c_nstep_qbert.json
    │       │   └── a3c_videopinball.json
    │       ├── async_sac
    │       │   ├── async_sac_atari.json
    │       │   ├── async_sac_halfcheetah.json
    │       │   ├── async_sac_halfcheetah_pybullet.json
    │       │   ├── async_sac_lunar.json
    │       │   ├── async_sac_pong.json
    │       │   ├── async_sac_qbert.json
    │       │   └── async_sac_roboschool.json
    │       ├── dppo
    │       │   ├── dppo_atari.json
    │       │   └── dppo_pong.json
    │       ├── dqn
    │       │   ├── ddqn_atari.json
    │       │   ├── ddqn_per_atari.json
    │       │   ├── ddqn_per_lunar.json
    │       │   ├── ddqn_per_pong.json
    │       │   ├── ddqn_per_qbert.json
    │       │   ├── ddqn_per_unity.json
    │       │   ├── ddqn_pong.json
    │       │   ├── ddqn_qbert.json
    │       │   ├── ddqn_videopinball.json
    │       │   ├── dqn_atari.json
    │       │   ├── dqn_cartpole.json
    │       │   ├── dqn_lunar.json
    │       │   ├── dqn_per_atari.json
    │       │   ├── dqn_per_pong.json
    │       │   ├── dqn_per_qbert.json
    │       │   ├── dqn_pong.json
    │       │   ├── dqn_qbert.json
    │       │   ├── dqn_unity.json
    │       │   ├── dqn_videopinball.json
    │       │   ├── dueling_ddqn_per_atari.json
    │       │   ├── dueling_ddqn_per_pong.json
    │       │   └── dueling_dqn_pong.json
    │       ├── ppo
    │       │   ├── ppo_atari.json
    │       │   ├── ppo_cartpole.json
    │       │   ├── ppo_cont.json
    │       │   ├── ppo_lunar.json
    │       │   ├── ppo_pong.json
    │       │   ├── ppo_qbert.json
    │       │   ├── ppo_roboschool.json
    │       │   ├── ppo_unity.json
    │       │   └── ppo_videopinball.json
    │       ├── reinforce
    │       │   ├── reinforce_cartpole.json
    │       │   └── reinforce_videopinball.json
    │       ├── sac
    │       │   ├── sac_halfcheetah.json
    │       │   ├── sac_lunar.json
    │       │   ├── sac_per_halfcheetah.json
    │       │   ├── sac_per_halfcheetah_pybullet.json
    │       │   ├── sac_per_roboschool.json
    │       │   ├── sac_pong.json
    │       │   ├── sac_roboschool.json
    │       │   └── sac_unity.json
    │       └── sarsa
    │       │   ├── sarsa_cartpole.json
    │       │   └── sarsa_videopinball.json
    │   ├── demo.json
    │   ├── experimental
    │       ├── a2c
    │       │   ├── a2c_cartpole.json
    │       │   ├── a2c_gae_lam_search.json
    │       │   ├── a2c_nstep_n_search.json
    │       │   └── a2c_pendulum.json
    │       ├── a3c
    │       │   ├── a3c_cartpole.json
    │       │   └── a3c_nstep_worker_search.json
    │       ├── dqn
    │       │   ├── ddqn_cartpole.json
    │       │   ├── ddqn_lunar.json
    │       │   ├── ddqn_lunar_search.json
    │       │   ├── dqn_cartpole.json
    │       │   ├── dqn_cartpole_search.json
    │       │   ├── dqn_lunar_search.json
    │       │   └── dueling_dqn_cartpole.json
    │       ├── misc
    │       │   ├── base.json
    │       │   ├── gridworld.json
    │       │   ├── hydra_dqn.json
    │       │   ├── lunar_pg.json
    │       │   ├── mountain_car.json
    │       │   ├── pendulum.json
    │       │   └── random.json
    │       ├── ppo
    │       │   ├── ppo_cartpole.json
    │       │   ├── ppo_eps_search.json
    │       │   ├── ppo_lam_search.json
    │       │   ├── ppo_pendulum.json
    │       │   └── ppo_unity_lam_search.json
    │       ├── reinforce
    │       │   ├── reinforce_cartpole.json
    │       │   └── reinforce_pendulum.json
    │       ├── sac
    │       │   └── sac_cartpole.json
    │       ├── sarsa
    │       │   └── sarsa_cartpole.json
    │       └── sil
    │       │   ├── ppo_sil_cartpole.json
    │       │   └── sil_cartpole.json
    │   ├── random_baseline.py
    │   └── spec_util.py
└── test
    ├── __init__.py
    ├── agent
        ├── memory
        │   ├── test_onpolicy_memory.py
        │   ├── test_per_memory.py
        │   └── test_replay_memory.py
        └── net
        │   ├── test_conv.py
        │   ├── test_mlp.py
        │   └── test_recurrent.py
    ├── conftest.py
    ├── env
        ├── test_registration.py
        ├── test_vec_env.py
        └── test_wrapper.py
    ├── experiment
        ├── test_control.py
        └── test_monitor.py
    ├── fixture
        └── lib
        │   └── util
        │       ├── test_df.csv
        │       ├── test_dict.json
        │       ├── test_dict.yml
        │       └── test_str.txt
    ├── lib
        ├── test_distribution.py
        ├── test_logger.py
        ├── test_math_util.py
        └── test_util.py
    └── spec
        ├── test_dist_spec.py
        ├── test_spec.py
        └── test_spec_util.py


/.codeclimate.yml:
--------------------------------------------------------------------------------
 1 | ---
 2 | version: 2
 3 | plugins:
 4 |   duplication:
 5 |     enabled: true
 6 |     config:
 7 |       languages:
 8 |         python:
 9 |           python_version: 3
10 |           mass_threshold: 40
11 |   eslint:
12 |     enabled: true
13 |   fixme:
14 |     enabled: true
15 |   pep8:
16 |     enabled: true
17 |     checks:
18 |       E501:
19 |         enabled: false
20 |   radon:
21 |     enabled: true
22 |     config:
23 |       threshold: "C"
24 | checks:
25 |   argument-count:
26 |     config:
27 |       threshold: 10
28 |   method-complexity:
29 |     config:
30 |       threshold: 10
31 |   file-lines:
32 |     config:
33 |       threshold: 800
34 | exclude_patterns:
35 | - bin/
36 | - config/
37 | - node_modules/
38 | - test/
39 | 


--------------------------------------------------------------------------------
/.dockerignore:
--------------------------------------------------------------------------------
 1 | # System
 2 | **/.DS_Store
 3 | 
 4 | # IDEs
 5 | **/.idea
 6 | 
 7 | # Python
 8 | **/__pycache__
 9 | **/*.py[cod]
10 | **/*$py.class
11 | **/*.egg*
12 | **/*.manifest
13 | **/.pytest*
14 | **/.cache
15 | **/dist/
16 | **/src
17 | **/htmlcov
18 | **/coverage.xml
19 | **/.coverage*
20 | **/.env
21 | 
22 | # NodeJS
23 | **/node_modules
24 | 
25 | # VizDoom
26 | **/*.ini
27 | 
28 | # Data files
29 | **/data
30 | **/model
31 | **/nb.py
32 | **/*.html
33 | **/*.log
34 | **/*.meta
35 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/bug_report.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Bug report
 3 | about: Create a report to help us improve
 4 | 
 5 | ---
 6 | 
 7 | **Describe the bug**
 8 | A clear and concise description of what the bug is.
 9 | 
10 | **To Reproduce**
11 | 1. OS and environment:
12 | 2. SLM Lab git SHA (run `git rev-parse HEAD` to get it):
13 | 3. `spec` file used:
14 | 
15 | **Additional context**
16 | Add any other context about the problem here.
17 | 
18 | **Error logs**
19 | ```shell
20 | #REPLACE ME
21 | ```
22 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/feature_request.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Feature request
 3 | about: Suggest an idea for this project
 4 | 
 5 | ---
 6 | 
 7 | >Check the [Project Roadmaps](https://github.com/kengz/SLM-Lab/projects) to see if your feature request is already in there.
 8 | 
 9 | **Are you requesting a feature or an implementation?**
10 | A clear and concise description of what the request is, and the reason.
11 | 
12 | **If you have any suggested solutions**
13 | A clear and concise description of what you think could help.
14 | 


--------------------------------------------------------------------------------
/.github/PULL_REQUEST_TEMPLATE.md:
--------------------------------------------------------------------------------
 1 | ## Title of code changes
 2 | 
 3 | - describe the code changes and implications
 4 | - add instructions to reproduce if relevant
 5 | 
 6 | ```python
 7 | some code snippets
 8 | ```
 9 | 
10 | 
11 | *----------- USE ABOVE FOR FEATURES, BELOW FOR RESULTS -----------*
12 | 
13 | 
14 | # Experiment Title
15 | 
16 | ## Abstract
17 | 
18 | *Briefly describe the experiment and the contribution.*
19 | 
20 | ## Methodology
21 | 
22 | *Discuss the methods/algorithms used.*
23 | 
24 | ### Reproduction
25 | 
26 | 1. spec file location:
27 | 2. git SHA (find this inside the spec file):
28 | 
29 | Run command: `python run_lab.py <spec file>`
30 | 
31 | ## Result and Discussion
32 | 
33 | *Provide data in graphs and tables; give explanations and conclusion.*
34 | 
35 | Data zipfile url (We will send you a Dropbox file request):
36 | 


--------------------------------------------------------------------------------
/.github/workflows/ci.yml:
--------------------------------------------------------------------------------
 1 | name: CI
 2 | 
 3 | on:
 4 |   push:
 5 |     branches: [master]
 6 |   pull_request:
 7 |     branches: [master]
 8 | 
 9 | jobs:
10 |   lint:
11 |     runs-on: ubuntu-latest
12 | 
13 |     steps:
14 |       - name: Check out Git repository
15 |         uses: actions/checkout@v2
16 | 
17 |       - name: Set up Python
18 |         uses: actions/setup-python@v2
19 |         with:
20 |           python-version: 3.7
21 | 
22 |       - uses: liskin/gh-problem-matcher-wrap@v1
23 |         with:
24 |           action: add
25 |           linters: flake8
26 | 
27 |       - name: Lint with flake8
28 |         shell: bash -l {0}
29 |         run: |
30 |           pip install flake8
31 |           # exit-zero treats all errors as warnings.
32 |           flake8 . --ignore=E501 --exit-zero --statistics
33 | 
34 |       - uses: liskin/gh-problem-matcher-wrap@v1
35 |         with:
36 |           action: remove
37 |           linters: flake8
38 | 
39 |   build:
40 |     needs: lint
41 |     runs-on: ubuntu-latest
42 | 
43 |     steps:
44 |       - uses: actions/checkout@v2
45 | 
46 |       - name: Cache Conda
47 |         uses: actions/cache@v2
48 |         env:
49 |           # change from default size of 32MB to prevent 503 err
50 |           CACHE_UPLOAD_CHUNK_SIZE: 67108864
51 |         with:
52 |           path: /usr/share/miniconda/envs/lab
53 |           key: ${{ runner.os }}-conda-${{ hashFiles('environment.yml') }}
54 |           restore-keys: |
55 |             ${{ runner.os }}-conda-
56 | 
57 |       - name: Setup Conda dependencies
58 |         uses: conda-incubator/setup-miniconda@v2
59 |         with:
60 |           activate-environment: lab
61 |           environment-file: environment.yml
62 |           python-version: 3.7
63 |           auto-activate-base: false
64 | 
65 |       - name: Conda info
66 |         shell: bash -l {0}
67 |         run: |
68 |           conda info
69 |           conda list
70 | 
71 |       - uses: liskin/gh-problem-matcher-wrap@v1
72 |         with:
73 |           action: add
74 |           linters: pytest
75 | 
76 |       - name: Run tests
77 |         shell: bash -l {0}
78 |         run: |
79 |           python setup.py test
80 | 
81 |       - uses: liskin/gh-problem-matcher-wrap@v1
82 |         with:
83 |           action: add
84 |           linters: pytest
85 | 
86 |       - name: Test & publish code coverage
87 |         uses: paambaati/codeclimate-action@v2.7.5
88 |         env:
89 |           CC_TEST_REPORTER_ID: ${{ secrets.CC_TEST_REPORTER_ID }}
90 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # System
 2 | .DS_Store
 3 | 
 4 | # IDEs
 5 | .idea/
 6 | .vscode
 7 | 
 8 | # Python
 9 | __pycache__/
10 | *.py[cod]
11 | *$py.class
12 | *.egg*
13 | *.manifest
14 | .pytest*
15 | .cache
16 | dist/
17 | src/
18 | htmlcov/
19 | coverage.xml
20 | .coverage*
21 | .env
22 | .mypy_cache
23 | 
24 | # NodeJS
25 | node_modules/
26 | 
27 | # VizDoom
28 | *.ini
29 | 
30 | # Data files
31 | data/
32 | model/
33 | nb.py
34 | *.html
35 | *.log
36 | *.meta
37 | SLM-Env
38 | 


--------------------------------------------------------------------------------
/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
 1 | # Contributor Covenant Code of Conduct
 2 | 
 3 | ## Our Pledge
 4 | 
 5 | In the interest of fostering an open and welcoming environment, we as contributors and maintainers pledge to making participation in our project and our community a harassment-free experience for everyone, regardless of age, body size, disability, ethnicity, gender identity and expression, level of experience, nationality, personal appearance, race, religion, or sexual identity and orientation.
 6 | 
 7 | ## Our Standards
 8 | 
 9 | Examples of behavior that contributes to creating a positive environment include:
10 | 
11 | * Using welcoming and inclusive language
12 | * Being respectful of differing viewpoints and experiences
13 | * Gracefully accepting constructive criticism
14 | * Focusing on what is best for the community
15 | * Showing empathy towards other community members
16 | 
17 | Examples of unacceptable behavior by participants include:
18 | 
19 | * The use of sexualized language or imagery and unwelcome sexual attention or advances
20 | * Trolling, insulting/derogatory comments, and personal or political attacks
21 | * Public or private harassment
22 | * Publishing others' private information, such as a physical or electronic address, without explicit permission
23 | * Other conduct which could reasonably be considered inappropriate in a professional setting
24 | 
25 | ## Our Responsibilities
26 | 
27 | Project maintainers are responsible for clarifying the standards of acceptable behavior and are expected to take appropriate and fair corrective action in response to any instances of unacceptable behavior.
28 | 
29 | Project maintainers have the right and responsibility to remove, edit, or reject comments, commits, code, wiki edits, issues, and other contributions that are not aligned to this Code of Conduct, or to ban temporarily or permanently any contributor for other behaviors that they deem inappropriate, threatening, offensive, or harmful.
30 | 
31 | ## Scope
32 | 
33 | This Code of Conduct applies both within project spaces and in public spaces when an individual is representing the project or its community. Examples of representing a project or community include using an official project e-mail address, posting via an official social media account, or acting as an appointed representative at an online or offline event. Representation of a project may be further defined and clarified by project maintainers.
34 | 
35 | ## Enforcement
36 | 
37 | Instances of abusive, harassing, or otherwise unacceptable behavior may be reported by contacting the project team at Wah Loon Keng (kengzwl@gmail.com) or Laura Graesser (lhgraesser@gmail.com). The project team will review and investigate all complaints, and will respond in a way that it deems appropriate to the circumstances. The project team is obligated to maintain confidentiality with regard to the reporter of an incident. Further details of specific enforcement policies may be posted separately.
38 | 
39 | Project maintainers who do not follow or enforce the Code of Conduct in good faith may face temporary or permanent repercussions as determined by other members of the project's leadership.
40 | 
41 | ## Attribution
42 | 
43 | This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4, available at [http://contributor-covenant.org/version/1/4][version]
44 | 
45 | [homepage]: http://contributor-covenant.org
46 | [version]: http://contributor-covenant.org/version/1/4/
47 | 


--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
 1 | # run instructions:
 2 | # build image: docker build -t kengz/slm_lab:latest -t kengz/slm_lab:v4.2.0 .
 3 | # start container: docker run --rm -it kengz/slm_lab:v4.2.0
 4 | # list image: docker images -a
 5 | # push image: docker push kengz/slm_lab
 6 | # prune: docker system prune
 7 | 
 8 | FROM ubuntu:16.04
 9 | 
10 | LABEL maintainer="kengzwl@gmail.com"
11 | LABEL website="https://github.com/kengz/SLM-Lab"
12 | 
13 | SHELL ["/bin/bash", "-c"]
14 | 
15 | RUN apt-get update && \
16 |     apt-get install -y build-essential \
17 |     curl nano git wget zip libstdc++6 \
18 |     python3-dev zlib1g-dev libjpeg-dev cmake swig python-pyglet python3-opengl libboost-all-dev libsdl2-dev libosmesa6-dev patchelf ffmpeg xvfb && \
19 |     rm -rf /var/lib/apt/lists/*
20 | 
21 | RUN curl -O https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh && \
22 |     bash Miniconda3-latest-Linux-x86_64.sh -b && \
23 |     rm Miniconda3-latest-Linux-x86_64.sh && \
24 |     echo '. ~/miniconda3/etc/profile.d/conda.sh' >> ~/.bashrc && \
25 |     . ~/miniconda3/etc/profile.d/conda.sh && \
26 |     conda --version
27 | 
28 | # create and set the working directory
29 | RUN mkdir -p /root/SLM-Lab
30 | 
31 | WORKDIR /root/SLM-Lab
32 | 
33 | # install dependencies, only retrigger on dependency changes
34 | COPY environment.yml environment.yml
35 | 
36 | # install Python and Conda dependencies
37 | RUN . ~/miniconda3/etc/profile.d/conda.sh && \
38 |     conda create -n lab python=3.7.3 -y && \
39 |     conda activate lab && \
40 |     conda env update -f environment.yml && \
41 |     conda clean -y --all && \
42 |     rm -rf ~/.cache/pip
43 | 
44 | # copy file at last to not trigger changes above unnecessarily
45 | COPY . .
46 | 
47 | RUN . ~/miniconda3/etc/profile.d/conda.sh && \
48 |     conda activate lab && \
49 |     python setup.py test
50 |     # pytest --verbose --no-flaky-report test/spec/test_dist_spec.py && \
51 |     # yarn reset
52 | 
53 | CMD ["/bin/bash"]
54 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2017 Wah Loon Keng, Laura Graesser
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # SLM Lab <br> ![GitHub tag (latest SemVer)](https://img.shields.io/github/tag/kengz/slm-lab) ![CI](https://github.com/kengz/SLM-Lab/workflows/CI/badge.svg) [![Maintainability](https://api.codeclimate.com/v1/badges/20c6a124c468b4d3e967/maintainability)](https://codeclimate.com/github/kengz/SLM-Lab/maintainability) [![Test Coverage](https://api.codeclimate.com/v1/badges/20c6a124c468b4d3e967/test_coverage)](https://codeclimate.com/github/kengz/SLM-Lab/test_coverage)
 2 | 
 3 | 
 4 | <p align="center">
 5 |   <i>Modular Deep Reinforcement Learning framework in PyTorch.</i>
 6 |   <br><br>
 7 |   <b>Documentation:</b><br>
 8 |   <a href="https://slm-lab.gitbook.io/slm-lab/">https://slm-lab.gitbook.io/slm-lab/</a>
 9 |   <br><br>
10 | </p>
11 | 
12 | >NOTE: the `book` branch has been updated for issue fixes. For the original code in the book _Foundations of Deep Reinforcement Learning_, check out to git tag `v4.1.1`
13 | 
14 | |||||
15 | |:---:|:---:|:---:|:---:|
16 | | ![ppo beamrider](https://user-images.githubusercontent.com/8209263/63994698-689ecf00-caaa-11e9-991f-0a5e9c2f5804.gif) | ![ppo breakout](https://user-images.githubusercontent.com/8209263/63994695-650b4800-caaa-11e9-9982-2462738caa45.gif) | ![ppo kungfumaster](https://user-images.githubusercontent.com/8209263/63994690-60469400-caaa-11e9-9093-b1cd38cee5ae.gif) | ![ppo mspacman](https://user-images.githubusercontent.com/8209263/63994685-5cb30d00-caaa-11e9-8f35-78e29a7d60f5.gif) |
17 | | BeamRider | Breakout | KungFuMaster | MsPacman |
18 | | ![ppo pong](https://user-images.githubusercontent.com/8209263/63994680-59b81c80-caaa-11e9-9253-ed98370351cd.gif) | ![ppo qbert](https://user-images.githubusercontent.com/8209263/63994672-54f36880-caaa-11e9-9757-7780725b53af.gif) | ![ppo seaquest](https://user-images.githubusercontent.com/8209263/63994665-4dcc5a80-caaa-11e9-80bf-c21db818115b.gif) | ![ppo spaceinvaders](https://user-images.githubusercontent.com/8209263/63994624-15c51780-caaa-11e9-9c9a-854d3ce9066d.gif) |
19 | | Pong | Qbert | Seaquest | Sp.Invaders |
20 | | ![sac ant](https://user-images.githubusercontent.com/8209263/63994867-ff6b8b80-caaa-11e9-971e-2fac1cddcbac.gif) | ![sac halfcheetah](https://user-images.githubusercontent.com/8209263/63994869-01354f00-caab-11e9-8e11-3893d2c2419d.gif) | ![sac hopper](https://user-images.githubusercontent.com/8209263/63994871-0397a900-caab-11e9-9566-4ca23c54b2d4.gif) | ![sac humanoid](https://user-images.githubusercontent.com/8209263/63994883-0befe400-caab-11e9-9bcc-c30c885aad73.gif) |
21 | | Ant | HalfCheetah | Hopper | Humanoid |
22 | | ![sac doublependulum](https://user-images.githubusercontent.com/8209263/63994879-07c3c680-caab-11e9-974c-06cdd25bfd68.gif) | ![sac pendulum](https://user-images.githubusercontent.com/8209263/63994880-085c5d00-caab-11e9-850d-049401540e3b.gif) | ![sac reacher](https://user-images.githubusercontent.com/8209263/63994881-098d8a00-caab-11e9-8e19-a3b32d601b10.gif) | ![sac walker](https://user-images.githubusercontent.com/8209263/63994882-0abeb700-caab-11e9-9e19-b59dc5c43393.gif) |
23 | | Inv.DoublePendulum | InvertedPendulum | Reacher | Walker |
24 | 
25 | 


--------------------------------------------------------------------------------
/bin/setup:
--------------------------------------------------------------------------------
 1 | #!/bin/bash --login
 2 | # This script runs the same sequence as the Docker build
 3 | # Run this as:
 4 | # bin/setup
 5 | 
 6 | 
 7 | # Fail on the first error; killable by SIGINT
 8 | set -e
 9 | trap "exit" INT
10 | 
11 | read -p "
12 | ------------------------------------------------
13 | 
14 | Welcome to the SLM Lab setup script;
15 | This will invoke sudo; alternatively,
16 | inspect bin/setup_ubuntu or bin/setup_macOS and run the lines manually.
17 | 
18 | Press enter to continue, Ctrl+c to quit:
19 | 
20 | ------------------------------------------------
21 | "
22 | 
23 | # copy config file if not already exist
24 | BIN_DIR=`pwd`/bin
25 | 
26 | # Run setup by OS
27 | if [ $(uname) == "Darwin" ]; then
28 |   $BIN_DIR/setup_macOS
29 | elif [ -f '/etc/arch-release' ]; then
30 |   $BIN_DIR/setup_arch
31 | else
32 |   $BIN_DIR/setup_ubuntu
33 | fi
34 | 
35 | # Run extra setup
36 | if [ "$1" == "extra" ]; then
37 |   echo "Running extra optional setup"
38 |   if [ $(uname) == "Darwin" ]; then
39 |     $BIN_DIR/setup_macOS_extra
40 |   elif [ -f '/etc/arch-release' ]; then
41 |     $BIN_DIR/setup_arch_extra
42 |   else
43 |     $BIN_DIR/setup_ubuntu_extra
44 |   fi
45 | fi
46 | 
47 | echo "
48 | ------------------------------------------------
49 | 
50 | Installation complete.
51 | 
52 | ------------------------------------------------
53 | "
54 | 


--------------------------------------------------------------------------------
/bin/setup_arch:
--------------------------------------------------------------------------------
 1 | #!/bin/bash --login
 2 | # This script sets up SLM Lab for Linux Ubuntu
 3 | 
 4 | # Fail on the first error; killable by SIGINT
 5 | set -e
 6 | trap "exit" INT
 7 | 
 8 | echo "--- Installing system dependencies ---"
 9 | pacman -Suy
10 | pacman -Sy --needed git cmake gcc
11 | pacman -Sy --needed zlib libjpeg-turbo xorg-server-xvfb gst-libav xorg-server-devel python-opengl boost sdl swig base-devel gcc-libs hdf5 openblas
12 | 
13 | echo "--- Installing Conda ---"
14 | if which conda >/dev/null; then
15 |   echo "Conda is already installed"
16 | else
17 |   pacman -Sy --needed python-conda
18 |   echo '. /etc/profile.d/conda.sh' >> ~/.bashrc
19 |   source ~/.bashrc
20 | fi
21 | 
22 | echo "--- Installing Conda environment ---"
23 | if conda env list | grep "^lab " >/dev/null; then
24 |   echo "conda env lab is already installed"
25 | else
26 |   conda create -n lab python=3.7.3 -y
27 | fi
28 | 
29 | # remove for reset:
30 | # conda deactivate
31 | # conda env remove -n lab -y
32 | # conda env export > environment.yml
33 | echo "--- Updating Conda environment ---"
34 | conda env update -f environment.yml
35 | 
36 | source ~/.bashrc
37 | echo "--- Lab setup complete ---"
38 | 


--------------------------------------------------------------------------------
/bin/setup_arch_extra:
--------------------------------------------------------------------------------
 1 | #!/bin/bash --login
 2 | # Sets up extra dependencies for Unity
 3 | 
 4 | # Fail on the first error; killable by SIGINT
 5 | set -e
 6 | trap "exit" INT
 7 | 
 8 | echo "--- Installing Unity ML agents ---"
 9 | conda activate lab
10 | pip install gym_unity==0.4.5
11 | # clone to slm_lab/env/SLM-Env
12 | git clone https://github.com/kengz/SLM-Env.git ./slm_lab/env/SLM-Env
13 | 
14 | echo "--- Installing VizDoom ---"
15 | pip install vizdoom==1.1.6
16 | 


--------------------------------------------------------------------------------
/bin/setup_macOS:
--------------------------------------------------------------------------------
 1 | #!/bin/bash --login
 2 | # This script sets up SLM Lab for macOS
 3 | 
 4 | # Fail on the first error; killable by SIGINT
 5 | set -e
 6 | trap "exit" INT
 7 | 
 8 | echo "--- Installing brew ---"
 9 | if which brew >/dev/null; then
10 |   echo "Brew is already installed"
11 | else
12 |   ruby -e "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/master/install)"
13 | fi
14 | 
15 | echo "--- Installing brew system dependencies ---"
16 | hb_list=(cmake boost boost-python3 sdl2 swig)
17 | for item in "${hb_list[@]}"; do
18 |   echo "Installing ${item}"
19 |   brew info "${item}" | grep --quiet "Not installed" && brew install "${item}"
20 | done
21 | 
22 | echo "--- Installing Conda ---"
23 | if which conda >/dev/null; then
24 |   echo "Conda is already installed"
25 | else
26 |   curl -O https://repo.anaconda.com/miniconda/Miniconda3-latest-MacOSX-x86_64.sh
27 |   bash Miniconda3-latest-MacOSX-x86_64.sh -b -p ~/miniconda3
28 |   rm Miniconda3-latest-MacOSX-x86_64.sh
29 |   echo '. ~/miniconda3/etc/profile.d/conda.sh' >> ~/.bash_profile
30 |   source ~/.bash_profile
31 | fi
32 | 
33 | echo "--- Installing Conda environment ---"
34 | if ! which conda >/dev/null; then
35 |   # guard for when no Conda is found, e.g. in Colab
36 |   export PATH=~/miniconda3/bin:$PATH
37 | fi
38 | if conda env list | grep "^lab " >/dev/null; then
39 |   echo "conda env lab is already installed"
40 | else
41 |   conda create -n lab python=3.7.3 -y
42 | fi
43 | 
44 | # install kernel for Atom Hydrogen
45 | # conda install ipykernel
46 | # python -m ipykernel install --user --name lab
47 | 
48 | # remove for reset:
49 | # conda deactivate
50 | # conda env remove -n lab -y
51 | # conda env export > environment.yml
52 | echo "--- Updating Conda environment ---"
53 | conda env update -f environment.yml
54 | 
55 | source ~/.bash_profile
56 | echo "--- Lab setup complete ---"
57 | 


--------------------------------------------------------------------------------
/bin/setup_macOS_extra:
--------------------------------------------------------------------------------
 1 | #!/bin/bash --login
 2 | # Sets up extra dependencies for Unity
 3 | 
 4 | # Fail on the first error; killable by SIGINT
 5 | set -e
 6 | trap "exit" INT
 7 | 
 8 | echo "--- Installing Unity ML agents ---"
 9 | conda activate lab
10 | pip install gym_unity==0.4.5
11 | # clone to slm_lab/env/SLM-Env
12 | git clone https://github.com/kengz/SLM-Env.git ./slm_lab/env/SLM-Env
13 | 
14 | echo "--- Installing VizDoom ---"
15 | pip install vizdoom==1.1.6
16 | 


--------------------------------------------------------------------------------
/bin/setup_ubuntu:
--------------------------------------------------------------------------------
 1 | #!/bin/bash --login
 2 | # This script sets up SLM Lab for Linux Ubuntu
 3 | 
 4 | # Fail on the first error; killable by SIGINT
 5 | set -e
 6 | trap "exit" INT
 7 | 
 8 | echo "--- Installing system dependencies ---"
 9 | apt-get update && \
10 |   apt-get install -y build-essential \
11 |   curl nano git wget zip libstdc++6 \
12 |   python3-dev zlib1g-dev libjpeg-dev cmake swig python-pyglet python3-opengl libboost-all-dev libsdl2-dev libosmesa6-dev patchelf ffmpeg xvfb && \
13 |   rm -rf /var/lib/apt/lists/*
14 | 
15 | echo "--- Installing Conda ---"
16 | if which conda >/dev/null; then
17 |   echo "Conda is already installed"
18 | else
19 |   curl -O https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh
20 |   bash Miniconda3-latest-Linux-x86_64.sh -b -p ~/miniconda3
21 |   rm Miniconda3-latest-Linux-x86_64.sh
22 |   echo '. ~/miniconda3/etc/profile.d/conda.sh' >> ~/.bashrc
23 |   source ~/.bashrc
24 | fi
25 | 
26 | echo "--- Installing Conda environment ---"
27 | if ! which conda >/dev/null; then
28 |   # guard for when no Conda is found, e.g. in Colab
29 |   export PATH=~/miniconda3/bin:$PATH
30 | fi
31 | if conda env list | grep "^lab " >/dev/null; then
32 |   echo "conda env lab is already installed"
33 | else
34 |   conda create -n lab python=3.7.3 -y
35 | fi
36 | 
37 | # remove for reset:
38 | # conda deactivate
39 | # conda env remove -n lab -y
40 | # conda env export > environment.yml
41 | echo "--- Updating Conda environment ---"
42 | conda env update -f environment.yml
43 | 
44 | source ~/.bashrc
45 | echo "--- Lab setup complete ---"
46 | 


--------------------------------------------------------------------------------
/bin/setup_ubuntu_extra:
--------------------------------------------------------------------------------
 1 | #!/bin/bash --login
 2 | # Sets up extra dependencies for Unity
 3 | 
 4 | # Fail on the first error; killable by SIGINT
 5 | set -e
 6 | trap "exit" INT
 7 | 
 8 | echo "--- Installing Unity ML agents ---"
 9 | conda activate lab
10 | pip install gym_unity==0.4.5
11 | # clone to slm_lab/env/SLM-Env
12 | git clone https://github.com/kengz/SLM-Env.git ./slm_lab/env/SLM-Env
13 | 
14 | echo "--- Installing VizDoom ---"
15 | pip install vizdoom==1.1.6
16 | 


--------------------------------------------------------------------------------
/environment-byo.yml:
--------------------------------------------------------------------------------
 1 | name: lab
 2 | channels:
 3 |   - plotly
 4 |   - conda-forge
 5 |   - defaults
 6 | dependencies:
 7 |   - autopep8=1.4.4
 8 |   - colorlog=4.0.2
 9 |   - coverage=4.5.3
10 |   - flaky=3.5.3
11 |   - libgcc
12 |   - numpy=1.16.3
13 |   - openpyxl=2.6.1
14 |   - pandas=0.24.2
15 |   - pillow=6.2.0
16 |   - pip=19.1.1
17 |   - plotly=4.9.0
18 |   - psutil=5.6.2
19 |   - pycodestyle=2.5.0
20 |   - pydash=4.2.1
21 |   - pytest-cov=2.7.1
22 |   - pytest-timeout=1.3.3
23 |   - pytest=4.5.0
24 |   - python=3.7.3
25 |   - pyyaml=5.1
26 |   - regex=2019.05.25
27 |   - scipy=1.3.0
28 |   - ujson=1.35
29 |   - xlrd=1.2.0
30 |   - pip:
31 |       - box2d-py==2.3.8
32 |       - cloudpickle==0.5.2
33 |       - colorlover==0.3.0
34 |       - future==0.18.2
35 |       - kaleido==0.2.1
36 |       - opencv-python==4.1.0.25
37 |       - pyopengl==3.1.0
38 |       - ray==0.7.0
39 |       - redis==2.10.6
40 |       - tensorboard==2.1.1
41 |       - xvfbwrapper==0.2.9
42 |       - gym==0.12.1
43 |       - gym[atari]
44 |       - gym[box2d]
45 |       - gym[classic_control]
46 |       - pybullet==2.8.4
47 |       - roboschool==1.0.46
48 |       - atari-py==0.2.6
49 | 


--------------------------------------------------------------------------------
/environment.yml:
--------------------------------------------------------------------------------
 1 | name: lab
 2 | channels:
 3 |   - plotly
 4 |   - pytorch
 5 |   - conda-forge
 6 |   - defaults
 7 | dependencies:
 8 |   - autopep8=1.4.4
 9 |   - colorlog=4.0.2
10 |   - coverage=4.5.3
11 |   - flaky=3.5.3
12 |   - libgcc
13 |   - numpy=1.16.3
14 |   - openpyxl=2.6.1
15 |   - pandas=0.24.2
16 |   - pillow=6.2.0
17 |   - pip=19.1.1
18 |   - plotly=4.9.0
19 |   - psutil=5.6.2
20 |   - pycodestyle=2.5.0
21 |   - pydash=4.2.1
22 |   - pytest-cov=2.7.1
23 |   - pytest-timeout=1.3.3
24 |   - pytest=4.5.0
25 |   - python=3.7.3
26 |   - pytorch=1.3.1
27 |   - pyyaml=5.1
28 |   - regex=2019.05.25
29 |   - scipy=1.3.0
30 |   - ujson=1.35
31 |   - xlrd=1.2.0
32 |   - more-itertools==9.1.0
33 |   - pip:
34 |       - box2d-py==2.3.8
35 |       - cloudpickle==0.5.2
36 |       - colorlover==0.3.0
37 |       - future==0.18.2
38 |       - kaleido==0.2.1
39 |       - opencv-python==4.1.0.25
40 |       - pyopengl==3.1.0
41 |       - ray==0.7.0
42 |       - redis==2.10.6
43 |       - tensorboard==2.1.1
44 |       - xvfbwrapper==0.2.9
45 |       - gym==0.12.1
46 |       - gym[atari]
47 |       - gym[box2d]
48 |       - gym[classic_control]
49 |       - pybullet==2.8.4
50 |       - roboschool==1.0.46
51 |       - atari-py==0.2.6
52 |       - pyglet==1.5.29
53 |       - grpcio==1.32.0
54 | 


--------------------------------------------------------------------------------
/job/atari_benchmark_adv.json:
--------------------------------------------------------------------------------
1 | {
2 |   "benchmark/ppo/ppo_atari.json": {
3 |     "ppo_atari": "train"
4 |   },
5 |   "benchmark/dqn/ddqn_per_atari.json": {
6 |     "ddqn_per_atari": "train"
7 |   },
8 | }
9 | 


--------------------------------------------------------------------------------
/job/atari_benchmark_base.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "benchmark/a2c/a2c_nstep_atari.json": {
 3 |     "a2c_nstep_atari": "train"
 4 |   },
 5 |   "benchmark/a2c/a2c_gae_atari.json": {
 6 |     "a2c_gae_atari": "train"
 7 |   },
 8 |   "benchmark/dqn/dqn_atari.json": {
 9 |     "dqn_atari": "train"
10 |   },
11 | }
12 | 


--------------------------------------------------------------------------------
/job/experiments.json:
--------------------------------------------------------------------------------
1 | {
2 |   "demo.json": {
3 |     "dqn_cartpole": "dev"
4 |   }
5 | }
6 | 


--------------------------------------------------------------------------------
/job/roboschool_benchmark.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "slm_lab/spec/benchmark/a2c/a2c_nstep_roboschool.json": {
 3 |     "a2c_nstep_roboschool": "train",
 4 |   },
 5 |   "slm_lab/spec/benchmark/a2c/a2c_gae_roboschool.json": {
 6 |     "a2c_gae_roboschool": "train",
 7 |   },
 8 |   "slm_lab/spec/benchmark/ppo/ppo_roboschool.json": {
 9 |     "ppo_roboschool": "train",
10 |   },
11 |   "slm_lab/spec/benchmark/sac/sac_roboschool.json": {
12 |     "sac_roboschool": "train",
13 |   }
14 | }
15 | 


--------------------------------------------------------------------------------
/package.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "slm_lab",
 3 |   "version": "4.0.0",
 4 |   "description": "Modular Deep Reinforcement Learning framework in PyTorch.",
 5 |   "main": "index.js",
 6 |   "scripts": {
 7 |     "start": "python run_lab.py",
 8 |     "debug": "LOG_LEVEL=DEBUG python run_lab.py",
 9 |     "retro_analyze": "python -c 'import sys; from slm_lab.experiment import retro_analysis; retro_analysis.retro_analyze(sys.argv[1])'",
10 |     "retro_eval": "python -c 'import sys; from slm_lab.experiment import retro_analysis; retro_analysis.retro_eval(sys.argv[1])'",
11 |     "reset": "rm -rf data/* .cache __pycache__ */__pycache__ *egg-info .pytest* htmlcov .coverage* *.xml",
12 |     "kill": "pkill -f run_lab; pkill -f slm-env; pkill -f ipykernel; pkill -f ray; pkill -f Xvfb; ps aux | grep -i Unity | awk '{print $2}' | xargs sudo kill -9",
13 |     "update": "conda env update -f environment.yml; yarn install;",
14 |     "export-env": "conda env export > environment.yml",
15 |     "build": "docker build -t kengz/slm_lab:latest -t kengz/slm_lab:v$v .",
16 |     "test": "python setup.py test"
17 |   },
18 |   "repository": "https://github.com/kengz/SLM-Lab.git",
19 |   "author": "kengz <kengzwl@gmail.com>, lgraesser",
20 |   "license": "MIT",
21 |   "private": false,
22 |   "dependencies": {}
23 | }
24 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import sys
 3 | from setuptools import setup
 4 | from setuptools.command.test import test as TestCommand
 5 | 
 6 | test_args = [
 7 |     '--verbose',
 8 |     '--capture=sys',
 9 |     '--log-level=INFO',
10 |     '--log-cli-level=INFO',
11 |     '--log-file-level=INFO',
12 |     '--no-flaky-report',
13 |     '--timeout=300',
14 |     '--cov-report=html',
15 |     '--cov-report=term',
16 |     '--cov-report=xml',
17 |     '--cov=slm_lab',
18 |     '--ignore=test/spec/test_dist_spec.py',
19 |     'test',
20 | ]
21 | 
22 | 
23 | class PyTest(TestCommand):
24 |     user_options = [('pytest-args=', 'a', 'Arguments to pass to py.test')]
25 | 
26 |     def initialize_options(self):
27 |         os.environ['PY_ENV'] = 'test'
28 |         TestCommand.initialize_options(self)
29 |         self.pytest_args = test_args
30 | 
31 |     def run_tests(self):
32 |         import pytest
33 |         errno = pytest.main(self.pytest_args)
34 |         sys.exit(errno)
35 | 
36 | 
37 | setup(
38 |     name='slm_lab',
39 |     version='4.2.4',
40 |     description='Modular Deep Reinforcement Learning framework in PyTorch.',
41 |     long_description='https://github.com/kengz/slm_lab',
42 |     keywords='SLM Lab',
43 |     url='https://github.com/kengz/slm_lab',
44 |     author='kengz,lgraesser',
45 |     author_email='kengzwl@gmail.com',
46 |     license='MIT',
47 |     packages=['slm_lab'],
48 |     # NOTE: use the optimized conda dependencies
49 |     install_requires=[],
50 |     zip_safe=False,
51 |     include_package_data=True,
52 |     dependency_links=[],
53 |     extras_require={
54 |         'dev': [],
55 |         'docs': [],
56 |         'testing': []
57 |     },
58 |     classifiers=[],
59 |     test_suite='test',
60 |     cmdclass={'test': PyTest},
61 | )
62 | 


--------------------------------------------------------------------------------
/slm_lab/__init__.py:
--------------------------------------------------------------------------------
1 | import os
2 | 
3 | os.environ['PY_ENV'] = os.environ.get('PY_ENV') or 'development'
4 | ROOT_DIR = os.path.normpath(os.path.join(os.path.dirname(__file__), '..'))
5 | 
6 | # valid lab_mode in SLM Lab
7 | EVAL_MODES = ('enjoy', 'eval')
8 | TRAIN_MODES = ('search', 'train', 'dev')
9 | 


--------------------------------------------------------------------------------
/slm_lab/agent/algorithm/__init__.py:
--------------------------------------------------------------------------------
 1 | # The algorithm module
 2 | # Contains implementations of reinforcement learning algorithms.
 3 | # Uses the nets module to build neural networks as the relevant function approximators
 4 | from .actor_critic import *
 5 | from .dqn import *
 6 | from .ppo import *
 7 | from .random import *
 8 | from .reinforce import *
 9 | from .sac import *
10 | from .sarsa import *
11 | from .sil import *
12 | 


--------------------------------------------------------------------------------
/slm_lab/agent/algorithm/random.py:
--------------------------------------------------------------------------------
 1 | # The random agent algorithm
 2 | # For basic dev purpose
 3 | from slm_lab.agent.algorithm.base import Algorithm
 4 | from slm_lab.lib import logger
 5 | from slm_lab.lib.decorator import lab_api
 6 | import numpy as np
 7 | 
 8 | logger = logger.get_logger(__name__)
 9 | 
10 | 
11 | class Random(Algorithm):
12 |     '''
13 |     Example Random agent that works in both discrete and continuous envs
14 |     '''
15 | 
16 |     @lab_api
17 |     def init_algorithm_params(self):
18 |         '''Initialize other algorithm parameters'''
19 |         self.to_train = 0
20 |         self.training_frequency = 1
21 |         self.training_start_step = 0
22 | 
23 |     @lab_api
24 |     def init_nets(self, global_nets=None):
25 |         '''Initialize the neural network from the spec'''
26 |         self.net_names = []
27 | 
28 |     @lab_api
29 |     def act(self, state):
30 |         '''Random action'''
31 |         body = self.body
32 |         if body.env.is_venv:
33 |             action = np.array([body.action_space.sample() for _ in range(body.env.num_envs)])
34 |         else:
35 |             action = body.action_space.sample()
36 |         return action
37 | 
38 |     @lab_api
39 |     def sample(self):
40 |         self.body.memory.sample()
41 |         batch = np.nan
42 |         return batch
43 | 
44 |     @lab_api
45 |     def train(self):
46 |         self.sample()
47 |         self.body.env.clock.tick('opt_step')  # to simulate metrics calc
48 |         loss = np.nan
49 |         return loss
50 | 
51 |     @lab_api
52 |     def update(self):
53 |         self.body.explore_var = np.nan
54 |         return self.body.explore_var
55 | 


--------------------------------------------------------------------------------
/slm_lab/agent/memory/__init__.py:
--------------------------------------------------------------------------------
1 | # The memory module
2 | # Implements various methods for memory storage
3 | from .replay import *
4 | from .onpolicy import *
5 | from .prioritized import *
6 | 


--------------------------------------------------------------------------------
/slm_lab/agent/memory/base.py:
--------------------------------------------------------------------------------
 1 | from abc import ABC, abstractmethod
 2 | from collections import deque
 3 | from slm_lab.lib import logger, util
 4 | import numpy as np
 5 | import pydash as ps
 6 | 
 7 | logger = logger.get_logger(__name__)
 8 | 
 9 | 
10 | class Memory(ABC):
11 |     '''Abstract Memory class to define the API methods'''
12 | 
13 |     def __init__(self, memory_spec, body):
14 |         '''
15 |         @param {*} body is the unit that stores its experience in this memory. Each body has a distinct memory.
16 |         '''
17 |         self.memory_spec = memory_spec
18 |         self.body = body
19 |         # declare what data keys to store
20 |         self.data_keys = ['states', 'actions', 'rewards', 'next_states', 'dones', 'priorities']
21 | 
22 |     @abstractmethod
23 |     def reset(self):
24 |         '''Method to fully reset the memory storage and related variables'''
25 |         raise NotImplementedError
26 | 
27 |     @abstractmethod
28 |     def update(self, state, action, reward, next_state, done):
29 |         '''Implement memory update given the full info from the latest timestep. NOTE: guard for np.nan reward and done when individual env resets.'''
30 |         raise NotImplementedError
31 | 
32 |     @abstractmethod
33 |     def sample(self):
34 |         '''Implement memory sampling mechanism'''
35 |         raise NotImplementedError
36 | 


--------------------------------------------------------------------------------
/slm_lab/agent/net/__init__.py:
--------------------------------------------------------------------------------
1 | # The nets module
2 | # Implements differents types of neural network
3 | from slm_lab.agent.net.conv import *
4 | from slm_lab.agent.net.mlp import *
5 | from slm_lab.agent.net.recurrent import *
6 | from slm_lab.agent.net.q_net import *
7 | 


--------------------------------------------------------------------------------
/slm_lab/agent/net/base.py:
--------------------------------------------------------------------------------
 1 | from abc import ABC, abstractmethod
 2 | from slm_lab.agent.net import net_util
 3 | import pydash as ps
 4 | import torch
 5 | import torch.nn as nn
 6 | 
 7 | 
 8 | class Net(ABC):
 9 |     '''Abstract Net class to define the API methods'''
10 | 
11 |     def __init__(self, net_spec, in_dim, out_dim):
12 |         '''
13 |         @param {dict} net_spec is the spec for the net
14 |         @param {int|list} in_dim is the input dimension(s) for the network. Usually use in_dim=body.state_dim
15 |         @param {int|list} out_dim is the output dimension(s) for the network. Usually use out_dim=body.action_dim
16 |         '''
17 |         self.net_spec = net_spec
18 |         self.in_dim = in_dim
19 |         self.out_dim = out_dim
20 |         self.grad_norms = None  # for debugging
21 |         if self.net_spec.get('gpu'):
22 |             if torch.cuda.device_count():
23 |                 self.device = f'cuda:{net_spec.get("cuda_id", 0)}'
24 |             else:
25 |                 self.device = 'cpu'
26 |         else:
27 |             self.device = 'cpu'
28 | 
29 |     @abstractmethod
30 |     def forward(self):
31 |         '''The forward step for a specific network architecture'''
32 |         raise NotImplementedError
33 | 
34 |     @net_util.dev_check_train_step
35 |     def train_step(self, loss, optim, lr_scheduler=None, clock=None, global_net=None):
36 |         if lr_scheduler is not None:
37 |             lr_scheduler.step(epoch=ps.get(clock, 'frame'))
38 |         optim.zero_grad()
39 |         loss.backward()
40 |         if self.clip_grad_val is not None:
41 |             nn.utils.clip_grad_norm_(self.parameters(), self.clip_grad_val)
42 |         if global_net is not None:
43 |             net_util.push_global_grads(self, global_net)
44 |         optim.step()
45 |         if global_net is not None:
46 |             net_util.copy(global_net, self)
47 |         if clock is not None:
48 |             clock.tick('opt_step')
49 |         return loss
50 | 
51 |     def store_grad_norms(self):
52 |         '''Stores the gradient norms for debugging.'''
53 |         norms = [param.grad.norm().item() for param in self.parameters()]
54 |         self.grad_norms = norms
55 | 


--------------------------------------------------------------------------------
/slm_lab/env/__init__.py:
--------------------------------------------------------------------------------
1 | # the environment module
2 | 
3 | 
4 | def make_env(spec):
5 |     from slm_lab.env.openai import OpenAIEnv
6 |     env = OpenAIEnv(spec)
7 |     return env
8 | 


--------------------------------------------------------------------------------
/slm_lab/env/openai.py:
--------------------------------------------------------------------------------
 1 | from slm_lab.env.base import BaseEnv
 2 | from slm_lab.env.wrapper import make_gym_env
 3 | from slm_lab.env.vec_env import make_gym_venv
 4 | from slm_lab.env.registration import try_register_env
 5 | from slm_lab.lib import logger, util
 6 | from slm_lab.lib.decorator import lab_api
 7 | import gym
 8 | import numpy as np
 9 | import pydash as ps
10 | import roboschool
11 | import pybullet_envs
12 | 
13 | 
14 | logger = logger.get_logger(__name__)
15 | 
16 | 
17 | class OpenAIEnv(BaseEnv):
18 |     '''
19 |     Wrapper for OpenAI Gym env to work with the Lab.
20 | 
21 |     e.g. env_spec
22 |     "env": [{
23 |         "name": "PongNoFrameskip-v4",
24 |         "frame_op": "concat",
25 |         "frame_op_len": 4,
26 |         "normalize_state": false,
27 |         "reward_scale": "sign",
28 |         "num_envs": 8,
29 |         "max_t": null,
30 |         "max_frame": 1e7
31 |     }],
32 |     '''
33 | 
34 |     def __init__(self, spec):
35 |         super().__init__(spec)
36 |         try_register_env(spec)  # register if it's a custom gym env
37 |         seed = ps.get(spec, 'meta.random_seed')
38 |         episode_life = util.in_train_lab_mode()
39 |         if self.is_venv:  # make vector environment
40 |             self.u_env = make_gym_venv(name=self.name, num_envs=self.num_envs, seed=seed, frame_op=self.frame_op, frame_op_len=self.frame_op_len, image_downsize=self.image_downsize, reward_scale=self.reward_scale, normalize_state=self.normalize_state, episode_life=episode_life)
41 |         else:
42 |             self.u_env = make_gym_env(name=self.name, seed=seed, frame_op=self.frame_op, frame_op_len=self.frame_op_len, image_downsize=self.image_downsize, reward_scale=self.reward_scale, normalize_state=self.normalize_state, episode_life=episode_life)
43 |         if self.name.startswith('Unity'):
44 |             # Unity is always initialized as singleton gym env, but the Unity runtime can be vec_env
45 |             self.num_envs = self.u_env.num_envs
46 |             # update variables dependent on num_envs
47 |             self._infer_venv_attr()
48 |             self._set_clock()
49 |         self._set_attr_from_u_env(self.u_env)
50 |         self.max_t = self.max_t or self.u_env.spec.max_episode_steps
51 |         assert self.max_t is not None
52 |         logger.info(util.self_desc(self))
53 | 
54 |     def seed(self, seed):
55 |         self.u_env.seed(seed)
56 | 
57 |     @lab_api
58 |     def reset(self):
59 |         self.done = False
60 |         state = self.u_env.reset()
61 |         if self.to_render:
62 |             self.u_env.render()
63 |         return state
64 | 
65 |     @lab_api
66 |     def step(self, action):
67 |         if not self.is_discrete and self.action_dim == 1:  # guard for continuous with action_dim 1, make array
68 |             action = np.expand_dims(action, axis=-1)
69 |         state, reward, done, info = self.u_env.step(action)
70 |         self._update_total_reward(info)
71 |         if self.to_render:
72 |             self.u_env.render()
73 |         if not self.is_venv and self.clock.t > self.max_t:
74 |             done = True
75 |         self.done = done
76 |         return state, reward, done, info
77 | 
78 |     @lab_api
79 |     def close(self):
80 |         self.u_env.close()
81 | 


--------------------------------------------------------------------------------
/slm_lab/env/registration.py:
--------------------------------------------------------------------------------
 1 | # module to register and mange multiple environment offerings
 2 | from gym.envs.registration import register
 3 | from slm_lab.lib import logger, util
 4 | import gym
 5 | import os
 6 | 
 7 | 
 8 | def get_env_path(env_name):
 9 |     '''Get the path to Unity env binaries distributed via npm'''
10 |     env_path = util.smart_path(f'slm_lab/env/SLM-Env/build/{env_name}')
11 |     env_dir = os.path.dirname(env_path)
12 |     assert os.path.exists(env_dir), f'Missing {env_path}. See README to install from yarn.'
13 |     return env_path
14 | 
15 | 
16 | def try_register_env(spec):
17 |     '''Try to additional environments for OpenAI gym.'''
18 |     try:
19 |         env_name = spec['env'][0]['name']
20 |         if env_name == 'vizdoom-v0':
21 |             assert 'cfg_name' in spec['env'][0].keys(), 'Environment config name must be defined for vizdoom.'
22 |             cfg_name = spec['env'][0]['cfg_name']
23 |             register(
24 |                 id=env_name,
25 |                 entry_point='slm_lab.env.vizdoom.vizdoom_env:VizDoomEnv',
26 |                 kwargs={'cfg_name': cfg_name})
27 |         elif env_name.startswith('Unity'):
28 |             # NOTE: do not specify max_episode_steps, will cause shape inconsistency in done
29 |             register(
30 |                 id=env_name,
31 |                 entry_point='slm_lab.env.unity:GymUnityEnv',
32 |                 kwargs={'name': env_name})
33 |     except Exception as e:
34 |         logger.exception(e)
35 | 


--------------------------------------------------------------------------------
/slm_lab/env/unity.py:
--------------------------------------------------------------------------------
 1 | from gym_unity.envs import UnityEnv
 2 | from slm_lab.env.registration import get_env_path
 3 | from slm_lab.lib import util
 4 | import numpy as np
 5 | import os
 6 | import pydash as ps
 7 | 
 8 | # NOTE: stack-frames used in ml-agents:
 9 | # 3DBallHard 9
10 | # Hallways 3
11 | # PushBlock 3
12 | # Walker 5
13 | 
14 | 
15 | class GymUnityEnv(UnityEnv):
16 |     '''Wrapper to make UnityEnv register-able under gym'''
17 |     spec = None
18 | 
19 |     def __init__(self, name):
20 |         worker_id = int(f'{os.getpid()}{int(ps.unique_id())}'[-4:])
21 |         super().__init__(get_env_path(name), worker_id, no_graphics=not util.to_render(), multiagent=True)
22 |         self.num_envs = self.number_agents
23 | 
24 |     def reset(self):
25 |         state = super().reset()
26 |         # Unity returns list, we need array
27 |         return np.array(state)
28 | 
29 |     def step(self, action):
30 |         # Unity wants list instead of numpy
31 |         action = list(action)
32 |         state, reward, done, info = super().step(action)
33 |         # Unity returns list, we need array
34 |         state = np.array(state)
35 |         reward = np.array(reward)
36 |         done = np.array(done)
37 |         return state, reward, done, info
38 | 
39 |     def close(self):
40 |         try:  # guard repeated call to close()
41 |             super().close()
42 |         except Exception as e:
43 |             pass
44 | 


--------------------------------------------------------------------------------
/slm_lab/env/vizdoom/__init__.py:
--------------------------------------------------------------------------------
1 | from .vizdoom_env import VizDoomEnv
2 | 


--------------------------------------------------------------------------------
/slm_lab/env/vizdoom/cfgs/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kengz/SLM-Lab/cae945a294cb111ee8e568bb4465ee74c501478b/slm_lab/env/vizdoom/cfgs/__init__.py


--------------------------------------------------------------------------------
/slm_lab/env/vizdoom/cfgs/basic.cfg:
--------------------------------------------------------------------------------
 1 | # Lines starting with # are treated as comments (or with whitespaces+#).
 2 | # It doesn't matter if you use capital letters or not.
 3 | # It doesn't matter if you use underscore or camel notation for keys, e.g. episode_timeout is the same as episodeTimeout.
 4 | 
 5 | doom_scenario_path = basic.wad
 6 | doom_map = map01
 7 | 
 8 | # Rewards
 9 | living_reward = -1
10 | 
11 | # Rendering options
12 | screen_resolution = RES_160X120
13 | screen_format = CRCGCB
14 | render_hud = True
15 | render_crosshair = false
16 | render_weapon = true
17 | render_decals = false
18 | render_particles = false
19 | window_visible = false
20 | 
21 | # make episodes start after 20 tics (after unholstering the gun)
22 | episode_start_time = 14
23 | 
24 | # make episodes finish after 300 actions (tics)
25 | episode_timeout = 300
26 | 
27 | # Available buttons
28 | available_buttons =
29 | 	{
30 | 		MOVE_LEFT
31 | 		MOVE_RIGHT
32 | 		ATTACK
33 | 	}
34 | 
35 | # Game variables that will be in the state
36 | available_game_variables = { AMMO2}
37 | 
38 | mode = PLAYER
39 | doom_skill = 5
40 | 


--------------------------------------------------------------------------------
/slm_lab/env/vizdoom/cfgs/basic.wad:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kengz/SLM-Lab/cae945a294cb111ee8e568bb4465ee74c501478b/slm_lab/env/vizdoom/cfgs/basic.wad


--------------------------------------------------------------------------------
/slm_lab/env/vizdoom/cfgs/bots.cfg:
--------------------------------------------------------------------------------
  1 | {
  2 |     name        Rambo
  3 |     aiming      67
  4 |     perfection  50
  5 |     reaction    70
  6 |     isp         50
  7 |     color       "40 cf 00"
  8 |     skin        base
  9 |     //weaponpref    012385678
 10 | }
 11 | 
 12 | {
 13 |     name        McClane
 14 |     aiming      34
 15 |     perfection  75
 16 |     reaction    15
 17 |     isp         90
 18 |     color       "b0 b0 b0"
 19 |     skin        base
 20 |     //weaponpref    012345678
 21 | }
 22 | 
 23 | {
 24 |     name        MacGyver
 25 |     aiming      80
 26 |     perfection  67
 27 |     reaction    72
 28 |     isp         87
 29 |     color       "50 50 60"
 30 |     skin        base
 31 |     //weaponpref    012345678
 32 | }
 33 | 
 34 | {
 35 |     name        Plissken
 36 |     aiming      15
 37 |     perfection  50
 38 |     reaction    50
 39 |     isp         50
 40 |     color       "8f 00 00"
 41 |     skin        base
 42 |     //weaponpref    082345678
 43 | }
 44 | 
 45 | {
 46 |     name        Machete
 47 |     aiming      50
 48 |     perfection  13
 49 |     reaction    20
 50 |     isp         100
 51 |     color       "ff ff ff"
 52 |     skin        base
 53 |     //weaponpref    012345678
 54 | }
 55 | 
 56 | {
 57 |     name        Anderson
 58 |     aiming      45
 59 |     perfection  30
 60 |     reaction    70
 61 |     isp         60
 62 |     color       "ff af 3f"
 63 |     skin        base
 64 |     //weaponpref    012345678
 65 | }
 66 | 
 67 | {
 68 |     name        Leone
 69 |     aiming      56
 70 |     perfection  34
 71 |     reaction    78
 72 |     isp         50
 73 |     color       "bf 00 00"
 74 |     skin        base
 75 |     //weaponpref    012345678
 76 | }
 77 | 
 78 | {
 79 |     name        Predator
 80 |     aiming      25
 81 |     perfection  55
 82 |     reaction    32
 83 |     isp         70
 84 |     color       "00 00 ff"
 85 |     skin        base
 86 |     //weaponpref    012345678
 87 | }
 88 | 
 89 | {
 90 |     name        Ripley
 91 |     aiming      61
 92 |     perfection  50
 93 |     reaction    23
 94 |     isp         32
 95 |     color        "00 00 7f"
 96 |     skin        base
 97 |     //weaponpref    012345678
 98 | }
 99 | 
100 | {
101 |     name        T800
102 |     aiming      90
103 |     perfection  85
104 |     reaction    10
105 |     isp         30
106 |     color       "ff ff 00"
107 |     skin        base
108 |     //weaponpref    012345678
109 | }
110 | 
111 | {
112 |     name        Dredd
113 |     aiming      12
114 |     perfection  35
115 |     reaction    56
116 |     isp         37
117 |     color       "40 cf 00"
118 |     skin        base
119 |     //weaponpref    012345678
120 | }
121 | 
122 | {
123 |     name        Conan
124 |     aiming      10
125 |     perfection  35
126 |     reaction    10
127 |     isp         100
128 |     color       "b0 b0 b0"
129 |     skin        base
130 |     //weaponpref    012345678
131 | }
132 | 
133 | {
134 |     name        Bond
135 |     aiming      67
136 |     perfection  15
137 |     reaction    76
138 |     isp         37
139 |     color       "50 50 60"
140 |     skin        base
141 |     //weaponpref    012345678
142 | }
143 | 
144 | {
145 |     name        Jones
146 |     aiming      52
147 |     perfection  35
148 |     reaction    50
149 |     isp         37
150 |     color       "8f 00 00"
151 |     skin        base
152 |     //weaponpref    012345678
153 | }
154 | 
155 | {
156 |     name        Blazkowicz
157 |     aiming      80
158 |     perfection  80
159 |     reaction    80
160 |     isp         100
161 |     color       "00 00 00"
162 |     skin        base
163 |     //weaponpref    012345678
164 | }
165 | 
166 | 


--------------------------------------------------------------------------------
/slm_lab/env/vizdoom/cfgs/cig.cfg:
--------------------------------------------------------------------------------
 1 | # Lines starting with # are treated as comments (or with whitespaces+#).
 2 | # It doesn't matter if you use capital letters or not.
 3 | # It doesn't matter if you use underscore or camel notation for keys, e.g. episode_timeout is the same as episodeTimeout.
 4 | 
 5 | doom_scenario_path = cig.wad
 6 | 
 7 | #12 minutes
 8 | episode_timeout = 25200
 9 | 
10 | # Rendering options
11 | screen_resolution = RES_640X480
12 | screen_format = CRCGCB
13 | render_hud = true
14 | render_crosshair = true
15 | render_weapon = true
16 | render_decals = false
17 | render_particles = false
18 | 
19 | window_visible = true
20 | 
21 | # Available buttons
22 | available_buttons = 
23 | 	{
24 | 		ATTACK
25 | 		USE
26 | 
27 | 		TURN_LEFT
28 | 		TURN_RIGHT
29 | 		MOVE_RIGHT
30 | 		MOVE_LEFT
31 | 		MOVE_FORWARD
32 | 		MOVE_BACKWARD
33 | 
34 | 		TURN_LEFT_RIGHT_DELTA
35 | 		LOOK_UP_DOWN_DELTA
36 | 	}
37 | 
38 | mode = ASYNC_PLAYER
39 | 
40 | 


--------------------------------------------------------------------------------
/slm_lab/env/vizdoom/cfgs/cig.wad:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kengz/SLM-Lab/cae945a294cb111ee8e568bb4465ee74c501478b/slm_lab/env/vizdoom/cfgs/cig.wad


--------------------------------------------------------------------------------
/slm_lab/env/vizdoom/cfgs/cig_with_unknown.wad:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kengz/SLM-Lab/cae945a294cb111ee8e568bb4465ee74c501478b/slm_lab/env/vizdoom/cfgs/cig_with_unknown.wad


--------------------------------------------------------------------------------
/slm_lab/env/vizdoom/cfgs/deadly_corridor.cfg:
--------------------------------------------------------------------------------
 1 | # Lines starting with # are treated as comments (or with whitespaces+#).
 2 | # It doesn't matter if you use capital letters or not.
 3 | # It doesn't matter if you use underscore or camel notation for keys, e.g. episode_timeout is the same as episodeTimeout.
 4 | 
 5 | doom_scenario_path = deadly_corridor.wad
 6 | 
 7 | # Skill 5 is reccomanded for the scenario to be a challenge.
 8 | doom_skill = 5
 9 | 
10 | # Rewards
11 | death_penalty = 100
12 | #living_reward = 0
13 | 
14 | # Rendering options
15 | screen_resolution = RES_320X240
16 | screen_format = CRCGCB
17 | render_hud = true
18 | render_crosshair = false
19 | render_weapon = true
20 | render_decals = false
21 | render_particles = false
22 | window_visible = true
23 | 
24 | episode_timeout = 2100
25 | 
26 | # Available buttons
27 | available_buttons = 
28 | 	{ 
29 | 		MOVE_LEFT 
30 | 		MOVE_RIGHT 
31 | 		ATTACK 
32 | 		MOVE_FORWARD
33 | 		MOVE_BACKWARD
34 | 		TURN_LEFT
35 | 		TURN_RIGHT
36 | 	}
37 | 
38 | # Game variables that will be in the state
39 | available_game_variables = { HEALTH }
40 | 
41 | mode = PLAYER
42 | 
43 | 
44 | 


--------------------------------------------------------------------------------
/slm_lab/env/vizdoom/cfgs/deadly_corridor.wad:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kengz/SLM-Lab/cae945a294cb111ee8e568bb4465ee74c501478b/slm_lab/env/vizdoom/cfgs/deadly_corridor.wad


--------------------------------------------------------------------------------
/slm_lab/env/vizdoom/cfgs/deathmatch.cfg:
--------------------------------------------------------------------------------
 1 | # Lines starting with # are treated as comments (or with whitespaces+#).
 2 | # It doesn't matter if you use capital letters or not.
 3 | # It doesn't matter if you use underscore or camel notation for keys, e.g. episode_timeout is the same as episodeTimeout.
 4 | 
 5 | doom_scenario_path = deathmatch.wad
 6 | 
 7 | # Rendering options
 8 | screen_resolution = RES_320X240
 9 | screen_format = CRCGCB
10 | render_hud = true
11 | render_crosshair = false
12 | render_weapon = true
13 | render_decals = false
14 | render_particles = false
15 | window_visible = true
16 | 
17 | # make episodes finish after 4200 actions (tics)
18 | episode_timeout = 4200
19 | 
20 | # Available buttons
21 | available_buttons = 
22 | 	{ 
23 | 		ATTACK 
24 |         SPEED 
25 |         STRAFE 
26 | 
27 |         MOVE_RIGHT 
28 |         MOVE_LEFT 
29 |         MOVE_BACKWARD
30 |         MOVE_FORWARD
31 |         TURN_RIGHT 
32 |         TURN_LEFT 
33 | 
34 |         SELECT_WEAPON1 
35 |         SELECT_WEAPON2 
36 |         SELECT_WEAPON3 
37 |         SELECT_WEAPON4 
38 |         SELECT_WEAPON5
39 |         SELECT_WEAPON6 
40 | 
41 |         SELECT_NEXT_WEAPON 
42 |         SELECT_PREV_WEAPON 
43 | 
44 |         LOOK_UP_DOWN_DELTA
45 |         TURN_LEFT_RIGHT_DELTA 
46 |         MOVE_LEFT_RIGHT_DELTA 
47 | 		
48 | 	}
49 | 
50 | # Game variables that will be in the state
51 | available_game_variables = 
52 | 	{ 		
53 |         KILLCOUNT
54 |         HEALTH
55 |         ARMOR
56 |         SELECTED_WEAPON
57 |         SELECTED_WEAPON_AMMO
58 |     }
59 | mode = PLAYER
60 | 


--------------------------------------------------------------------------------
/slm_lab/env/vizdoom/cfgs/deathmatch.wad:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kengz/SLM-Lab/cae945a294cb111ee8e568bb4465ee74c501478b/slm_lab/env/vizdoom/cfgs/deathmatch.wad


--------------------------------------------------------------------------------
/slm_lab/env/vizdoom/cfgs/defend_the_center.cfg:
--------------------------------------------------------------------------------
 1 | # Lines starting with # are treated as comments (or with whitespaces+#).
 2 | # It doesn't matter if you use capital letters or not.
 3 | # It doesn't matter if you use underscore or camel notation for keys, e.g. episode_timeout is the same as episodeTimeout.
 4 | 
 5 | doom_scenario_path = defend_the_center.wad
 6 | 
 7 | # Rewards
 8 | death_penalty = 1
 9 | 
10 | # Rendering options
11 | screen_resolution = RES_640X480
12 | screen_format = CRCGCB
13 | render_hud = True
14 | render_crosshair = false
15 | render_weapon = true
16 | render_decals = false
17 | render_particles = false
18 | window_visible = true
19 | 
20 | # make episodes start after 10 tics (after unholstering the gun)
21 | episode_start_time = 10
22 | 
23 | # make episodes finish after 2100 actions (tics)
24 | episode_timeout = 2100
25 | 
26 | # Available buttons
27 | available_buttons = 
28 | 	{ 
29 | 		TURN_LEFT 
30 | 		TURN_RIGHT 
31 | 		ATTACK 
32 | 	}
33 | 
34 | # Game variables that will be in the state
35 | available_game_variables = { AMMO2 HEALTH  }
36 | 
37 | mode = PLAYER
38 | doom_skill = 3
39 | 


--------------------------------------------------------------------------------
/slm_lab/env/vizdoom/cfgs/defend_the_center.wad:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kengz/SLM-Lab/cae945a294cb111ee8e568bb4465ee74c501478b/slm_lab/env/vizdoom/cfgs/defend_the_center.wad


--------------------------------------------------------------------------------
/slm_lab/env/vizdoom/cfgs/defend_the_line.cfg:
--------------------------------------------------------------------------------
 1 | # Lines starting with # are treated as comments (or with whitespaces+#).
 2 | # It doesn't matter if you use capital letters or not.
 3 | # It doesn't matter if you use underscore or camel notation for keys, e.g. episode_timeout is the same as episodeTimeout.
 4 | 
 5 | doom_scenario_path = defend_the_line.wad
 6 | 
 7 | # Rewards
 8 | death_penalty = 1
 9 | 
10 | # Rendering options
11 | screen_resolution = RES_320X240
12 | screen_format = CRCGCB
13 | render_hud = True
14 | render_crosshair = false
15 | render_weapon = true
16 | render_decals = false
17 | render_particles = false
18 | window_visible = true
19 | 
20 | # make episodes start after 10 tics (after unholstering the gun)
21 | episode_start_time = 10
22 | 
23 | 
24 | # Available buttons
25 | available_buttons = 
26 | 	{ 
27 | 		TURN_lEFT 
28 | 		TURN_RIGHT 
29 | 		ATTACK 
30 | 	}
31 | 
32 | # Game variables that will be in the state
33 | available_game_variables = {  AMMO2 HEALTH}
34 | 
35 | mode = PLAYER
36 | doom_skill = 3
37 | 


--------------------------------------------------------------------------------
/slm_lab/env/vizdoom/cfgs/defend_the_line.wad:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kengz/SLM-Lab/cae945a294cb111ee8e568bb4465ee74c501478b/slm_lab/env/vizdoom/cfgs/defend_the_line.wad


--------------------------------------------------------------------------------
/slm_lab/env/vizdoom/cfgs/health_gathering.cfg:
--------------------------------------------------------------------------------
 1 | # Lines starting with # are treated as comments (or with whitespaces+#).
 2 | # It doesn't matter if you use capital letters or not.
 3 | # It doesn't matter if you use underscore or camel notation for keys, e.g. episode_timeout is the same as episodeTimeout.
 4 | 
 5 | doom_scenario_path = health_gathering.wad
 6 | 
 7 | # Each step is good for you!
 8 | living_reward = 1
 9 | # And death is not!
10 | death_penalty = 100
11 | 
12 | # Rendering options
13 | screen_resolution = RES_160X120
14 | screen_format = CRCGCB
15 | render_hud = false
16 | render_crosshair = false
17 | render_weapon = false
18 | render_decals = false
19 | render_particles = false
20 | window_visible = false
21 | 
22 | # make episodes finish after 2100 actions (tics)
23 | episode_timeout = 2100
24 | 
25 | # Available buttons
26 | available_buttons =
27 | 	{
28 | 		TURN_LEFT
29 | 		TURN_RIGHT
30 | 		MOVE_FORWARD
31 | 	}
32 | 
33 | # Game variables that will be in the state
34 | available_game_variables = { HEALTH }
35 | 
36 | mode = PLAYER
37 | 


--------------------------------------------------------------------------------
/slm_lab/env/vizdoom/cfgs/health_gathering.wad:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kengz/SLM-Lab/cae945a294cb111ee8e568bb4465ee74c501478b/slm_lab/env/vizdoom/cfgs/health_gathering.wad


--------------------------------------------------------------------------------
/slm_lab/env/vizdoom/cfgs/health_gathering_supreme.cfg:
--------------------------------------------------------------------------------
 1 | # Lines starting with # are treated as comments (or with whitespaces+#).
 2 | # It doesn't matter if you use capital letters or not.
 3 | # It doesn't matter if you use underscore or camel notation for keys, e.g. episode_timeout is the same as episodeTimeout.
 4 | 
 5 | doom_scenario_path = health_gathering_supreme.wad
 6 | 
 7 | # Each step is good for you!
 8 | living_reward = 1
 9 | # And death is not!
10 | death_penalty = 100
11 | 
12 | # Rendering options
13 | screen_resolution = RES_320X240
14 | screen_format = CRCGCB
15 | render_hud = false
16 | render_crosshair = false
17 | render_weapon = false
18 | render_decals = false
19 | render_particles = false
20 | window_visible = true
21 | 
22 | # make episodes finish after 2100 actions (tics)
23 | episode_timeout = 2100
24 | 
25 | # Available buttons
26 | available_buttons = 
27 | 	{ 
28 | 		TURN_LEFT 
29 | 		TURN_RIGHT 
30 | 		MOVE_FORWARD 
31 | 	}
32 | 
33 | # Game variables that will be in the state
34 | available_game_variables = { HEALTH }
35 | 
36 | mode = PLAYER


--------------------------------------------------------------------------------
/slm_lab/env/vizdoom/cfgs/health_gathering_supreme.wad:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kengz/SLM-Lab/cae945a294cb111ee8e568bb4465ee74c501478b/slm_lab/env/vizdoom/cfgs/health_gathering_supreme.wad


--------------------------------------------------------------------------------
/slm_lab/env/vizdoom/cfgs/learning.cfg:
--------------------------------------------------------------------------------
 1 | doom_scenario_path = basic.wad
 2 | 
 3 | # Rewards
 4 | living_reward = -1
 5 | 
 6 | # Rendering options
 7 | screen_resolution = RES_640X480
 8 | screen_format = GRAY8
 9 | render_hud = false
10 | render_crosshair = false
11 | render_weapon = true
12 | render_decals = false
13 | render_particles = false
14 | window_visible = false
15 | 
16 | # make episodes start after 20 tics (after unholstering the gun)
17 | episode_start_time = 14
18 | 
19 | # make episodes finish after 300 actions (tics)
20 | episode_timeout = 300
21 | 
22 | # Available buttons
23 | available_buttons = 
24 | 	{ 
25 | 		MOVE_LEFT 
26 | 		MOVE_RIGHT 
27 | 		ATTACK 
28 | 	}
29 | 
30 | mode = PLAYER
31 | 
32 | 
33 | 


--------------------------------------------------------------------------------
/slm_lab/env/vizdoom/cfgs/multi.cfg:
--------------------------------------------------------------------------------
 1 | # Lines starting with # are treated as comments (or with whitespaces+#).
 2 | # It doesn't matter if you use capital letters or not.
 3 | # It doesn't matter if you use underscore or camel notation for keys, e.g. episode_timeout is the same as episodeTimeout.
 4 | 
 5 | doom_scenario_path = multi_deathmatch.wad
 6 | 
 7 | # Rewards
 8 | death_penalty = 1
 9 | 
10 | # Rendering options
11 | screen_resolution = RES_640X480
12 | screen_format = CRCGCB
13 | render_hud = true
14 | render_crosshair = true
15 | render_weapon = true
16 | render_decals = false
17 | render_particles = false
18 | 
19 | window_visible = true
20 | 
21 | 
22 | # Available buttons
23 | available_buttons = 
24 | 	{ 
25 | 		TURN_LEFT
26 | 		TURN_RIGHT
27 | 		ATTACK
28 | 
29 | 		MOVE_RIGHT
30 | 		MOVE_LEFT
31 | 
32 | 		MOVE_FORWARD
33 | 		MOVE_BACKWARD
34 | 		TURN_LEFT_RIGHT_DELTA
35 | 		LOOK_UP_DOWN_DELTA	
36 | 		
37 | 	}
38 | 
39 | available_game_variables = 
40 | {
41 | 	HEALTH
42 | 	AMMO3
43 | }
44 | mode = ASYNC_PLAYER
45 | 
46 | 


--------------------------------------------------------------------------------
/slm_lab/env/vizdoom/cfgs/multi_deathmatch.wad:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kengz/SLM-Lab/cae945a294cb111ee8e568bb4465ee74c501478b/slm_lab/env/vizdoom/cfgs/multi_deathmatch.wad


--------------------------------------------------------------------------------
/slm_lab/env/vizdoom/cfgs/multi_duel.cfg:
--------------------------------------------------------------------------------
 1 | doom_scenario_path = multi_duel.wad
 2 | 
 3 | screen_resolution = RES_640X480
 4 | screen_format = CRCGCB
 5 | render_hud = true
 6 | render_crosshair = false
 7 | render_weapon = true
 8 | render_decals = true
 9 | render_particles = true
10 | window_visible = true
11 | 
12 | available_buttons = 
13 | 	{ 
14 | 		MOVE_LEFT 
15 | 		MOVE_RIGHT 
16 | 		ATTACK 
17 | 	}
18 | 
19 | mode = PLAYER
20 | doom_skill = 5
21 | 
22 | 


--------------------------------------------------------------------------------
/slm_lab/env/vizdoom/cfgs/multi_duel.wad:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kengz/SLM-Lab/cae945a294cb111ee8e568bb4465ee74c501478b/slm_lab/env/vizdoom/cfgs/multi_duel.wad


--------------------------------------------------------------------------------
/slm_lab/env/vizdoom/cfgs/my_way_home.cfg:
--------------------------------------------------------------------------------
 1 | # Lines starting with # are treated as comments (or with whitespaces+#).
 2 | # It doesn't matter if you use capital letters or not.
 3 | # It doesn't matter if you use underscore or camel notation for keys, e.g. episode_timeout is the same as episodeTimeout.
 4 | 
 5 | doom_scenario_path = my_way_home.wad
 6 | 
 7 | # Rewards
 8 | living_reward = -0.0001
 9 | 
10 | # Rendering options
11 | screen_resolution = RES_640X480
12 | screen_format = CRCGCB
13 | render_hud = false
14 | render_crosshair = false
15 | render_weapon = true
16 | render_decals = false
17 | render_particles = false
18 | window_visible = true
19 | 
20 | # make episodes start after 10 tics (after unholstering the gun)
21 | episode_start_time = 10
22 | 
23 | # make episodes finish after 2100 actions (tics)
24 | episode_timeout = 2100
25 | 
26 | # Available buttons
27 | available_buttons = 
28 | 	{ 
29 | 		TURN_LEFT
30 | 		TURN_RIGHT
31 | 		MOVE_FORWARD 
32 | 		MOVE_LEFT
33 | 		MOVE_RIGHT
34 | 	}
35 | 
36 | # Game variables that will be in the state
37 | available_game_variables = { AMMO0 }
38 | 
39 | mode = PLAYER
40 | doom_skill = 5
41 | 


--------------------------------------------------------------------------------
/slm_lab/env/vizdoom/cfgs/my_way_home.wad:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kengz/SLM-Lab/cae945a294cb111ee8e568bb4465ee74c501478b/slm_lab/env/vizdoom/cfgs/my_way_home.wad


--------------------------------------------------------------------------------
/slm_lab/env/vizdoom/cfgs/oblige.cfg:
--------------------------------------------------------------------------------
 1 | # Lines starting with # are treated as comments (or with whitespaces+#).
 2 | # It doesn't matter if you use capital letters or not.
 3 | # It doesn't matter if you use underscore or camel notation for keys, e.g. episode_timeout is the same as episodeTimeout.
 4 | 
 5 | # Rendering options
 6 | screen_resolution = RES_320X240
 7 | screen_format = CRCGCB
 8 | render_hud = true
 9 | render_crosshair = false
10 | render_weapon = true
11 | render_decals = false
12 | render_particles = false
13 | window_visible = true
14 | 
15 | # make episodes finish after 4200 actions (tics)
16 | episode_timeout = 4200
17 | 
18 | # Available buttons
19 | available_buttons =
20 | 	{
21 | 		ATTACK
22 | 		USE
23 |         SPEED
24 |         STRAFE
25 | 
26 |         MOVE_RIGHT
27 |         MOVE_LEFT
28 |         MOVE_BACKWARD
29 |         MOVE_FORWARD
30 |         TURN_RIGHT
31 |         TURN_LEFT
32 | 
33 |         SELECT_WEAPON1
34 |         SELECT_WEAPON2
35 |         SELECT_WEAPON3
36 |         SELECT_WEAPON4
37 |         SELECT_WEAPON5
38 |         SELECT_WEAPON6
39 | 
40 |         SELECT_NEXT_WEAPON
41 |         SELECT_PREV_WEAPON
42 | 
43 |         LOOK_UP_DOWN_DELTA
44 |         TURN_LEFT_RIGHT_DELTA
45 |         MOVE_LEFT_RIGHT_DELTA
46 | 
47 | 	}
48 | 
49 | # Game variables that will be in the state
50 | available_game_variables =
51 | 	{
52 |         KILLCOUNT
53 |         HEALTH
54 |         ARMOR
55 |         SELECTED_WEAPON
56 |         SELECTED_WEAPON_AMMO
57 |     }
58 | mode = PLAYER
59 | 


--------------------------------------------------------------------------------
/slm_lab/env/vizdoom/cfgs/predict_position.cfg:
--------------------------------------------------------------------------------
 1 | # Lines starting with # are treated as comments (or with whitespaces+#).
 2 | # It doesn't matter if you use capital letters or not.
 3 | # It doesn't matter if you use underscore or camel notation for keys, e.g. episode_timeout is the same as episodeTimeout.
 4 | 
 5 | doom_scenario_path = predict_position.wad
 6 | 
 7 | # Rewards
 8 | living_reward = -0.001
 9 | 
10 | # Rendering options
11 | screen_resolution = RES_800X450
12 | screen_format = CRCGCB
13 | render_hud = false
14 | render_crosshair = false
15 | render_weapon = true
16 | render_decals = false
17 | render_particles = false
18 | window_visible = true
19 | 
20 | # make episodes start after 16 tics (after producing the rocket launcher)
21 | episode_start_time = 16
22 | 
23 | # make episodes finish after 300 actions (tics)
24 | episode_timeout = 300
25 | 
26 | # Available buttons
27 | available_buttons = 
28 | 	{ 
29 | 		TURN_LEFT 
30 | 		TURN_RIGHT 
31 | 		ATTACK 
32 | 	}
33 | 
34 | # Empty list is allowed, in case you are lazy.
35 | available_game_variables = {  }
36 | 
37 | game_args += +sv_noautoaim 1
38 | 
39 | mode = PLAYER
40 | doom_skill = 1
41 | 


--------------------------------------------------------------------------------
/slm_lab/env/vizdoom/cfgs/predict_position.wad:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kengz/SLM-Lab/cae945a294cb111ee8e568bb4465ee74c501478b/slm_lab/env/vizdoom/cfgs/predict_position.wad


--------------------------------------------------------------------------------
/slm_lab/env/vizdoom/cfgs/rocket_basic.cfg:
--------------------------------------------------------------------------------
 1 | doom_scenario_path = rocket_basic.wad
 2 | 
 3 | # Rewards
 4 | living_reward = -1
 5 | 
 6 | # Rendering options
 7 | screen_resolution = RES_640X480
 8 | screen_format = GRAY8
 9 | render_hud = true
10 | render_crosshair = false
11 | render_weapon = true
12 | render_decals = false
13 | render_particles = false
14 | 
15 | # make episodes start after 14 tics (after unholstering the gun)
16 | episode_start_time = 14
17 | 
18 | # make episodes finish after 300 actions (tics)
19 | episode_timeout = 300
20 | 
21 | # Available buttons
22 | available_buttons =
23 | 	{
24 | 		MOVE_LEFT
25 | 		MOVE_RIGHT
26 | 		ATTACK
27 | 	}
28 | 
29 | game_args += +sv_noautoaim 1
30 | 


--------------------------------------------------------------------------------
/slm_lab/env/vizdoom/cfgs/rocket_basic.wad:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kengz/SLM-Lab/cae945a294cb111ee8e568bb4465ee74c501478b/slm_lab/env/vizdoom/cfgs/rocket_basic.wad


--------------------------------------------------------------------------------
/slm_lab/env/vizdoom/cfgs/simpler_basic.cfg:
--------------------------------------------------------------------------------
 1 | doom_scenario_path = simpler_basic.wad
 2 | 
 3 | # Rewards
 4 | living_reward = -1
 5 | 
 6 | # Rendering options
 7 | screen_resolution = RES_640X480
 8 | screen_format = GRAY8
 9 | 
10 | render_hud = true
11 | render_crosshair = false
12 | render_weapon = true
13 | render_decals = false
14 | render_particles = false
15 | 
16 | # make episodes start after 20 tics (after unholstering the gun)
17 | episode_start_time = 14
18 | 
19 | # make episodes finish after 300 actions (tics)
20 | episode_timeout = 300
21 | 
22 | # Available buttons
23 | available_buttons = 
24 | 	{ 
25 | 		MOVE_LEFT 
26 | 		MOVE_RIGHT 
27 | 		ATTACK 
28 | 	}
29 | 


--------------------------------------------------------------------------------
/slm_lab/env/vizdoom/cfgs/simpler_basic.wad:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kengz/SLM-Lab/cae945a294cb111ee8e568bb4465ee74c501478b/slm_lab/env/vizdoom/cfgs/simpler_basic.wad


--------------------------------------------------------------------------------
/slm_lab/env/vizdoom/cfgs/take_cover.cfg:
--------------------------------------------------------------------------------
 1 | # Lines starting with # are treated as comments (or with whitespaces+#).
 2 | # It doesn't matter if you use capital letters or not.
 3 | # It doesn't matter if you use underscore or camel notation for keys, e.g. episode_timeout is the same as episodeTimeout.
 4 | 
 5 | doom_scenario_path = take_cover.wad
 6 | doom_map = map01
 7 | 
 8 | # Rewards
 9 | living_reward = 0.1
10 | 
11 | # Rendering options
12 | screen_resolution = RES_160X120
13 | screen_format = CRCGCB
14 | render_hud = false
15 | render_crosshair = false
16 | render_weapon = false
17 | render_decals = false
18 | render_particles = false
19 | window_visible = false
20 | 
21 | # Available buttons
22 | available_buttons =
23 | 	{
24 | 		MOVE_LEFT
25 | 		MOVE_RIGHT
26 | 	}
27 | 
28 | # Game variables that will be in the state
29 | available_game_variables = { HEALTH }
30 | 
31 | # Change it if you wish.
32 | doom_skill = 4
33 | 


--------------------------------------------------------------------------------
/slm_lab/env/vizdoom/cfgs/take_cover.wad:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kengz/SLM-Lab/cae945a294cb111ee8e568bb4465ee74c501478b/slm_lab/env/vizdoom/cfgs/take_cover.wad


--------------------------------------------------------------------------------
/slm_lab/experiment/__init__.py:
--------------------------------------------------------------------------------
1 | # The experiment module
2 | # Handles experimentation logic: control, analysis
3 | 


--------------------------------------------------------------------------------
/slm_lab/experiment/retro_analysis.py:
--------------------------------------------------------------------------------
 1 | # The retro analysis module
 2 | # Runs analysis post-hoc using existing data files
 3 | # example: yarn retro_analyze data/reinforce_cartpole_2018_01_22_211751/
 4 | from glob import glob
 5 | from slm_lab.experiment import analysis
 6 | from slm_lab.lib import logger, util
 7 | import os
 8 | import pydash as ps
 9 | 
10 | logger = logger.get_logger(__name__)
11 | 
12 | 
13 | def retro_analyze_sessions(predir):
14 |     '''Retro analyze all sessions'''
15 |     logger.info('Running retro_analyze_sessions')
16 |     session_spec_paths = glob(f'{predir}/*_s*_spec.json')
17 |     for p in session_spec_paths:
18 |         _retro_analyze_session(p)
19 | 
20 | 
21 | def _retro_analyze_session(session_spec_path):
22 |     '''Method to retro analyze a single session given only a path to its spec'''
23 |     session_spec = util.read(session_spec_path)
24 |     info_prepath = session_spec['meta']['info_prepath']
25 |     for df_mode in ('eval', 'train'):
26 |         session_df = util.read(f'{info_prepath}_session_df_{df_mode}.csv')
27 |         analysis.analyze_session(session_spec, session_df, df_mode)
28 | 
29 | 
30 | def retro_analyze_trials(predir):
31 |     '''Retro analyze all trials'''
32 |     logger.info('Running retro_analyze_trials')
33 |     session_spec_paths = glob(f'{predir}/*_s*_spec.json')
34 |     # remove session spec paths
35 |     trial_spec_paths = ps.difference(glob(f'{predir}/*_t*_spec.json'), session_spec_paths)
36 |     for p in trial_spec_paths:
37 |         _retro_analyze_trial(p)
38 | 
39 | 
40 | def _retro_analyze_trial(trial_spec_path):
41 |     '''Method to retro analyze a single trial given only a path to its spec'''
42 |     trial_spec = util.read(trial_spec_path)
43 |     meta_spec = trial_spec['meta']
44 |     info_prepath = meta_spec['info_prepath']
45 |     session_metrics_list = [util.read(f'{info_prepath}_s{s}_session_metrics_eval.pkl') for s in range(meta_spec['max_session'])]
46 |     analysis.analyze_trial(trial_spec, session_metrics_list)
47 | 
48 | 
49 | def retro_analyze_experiment(predir):
50 |     '''Retro analyze an experiment'''
51 |     logger.info('Running retro_analyze_experiment')
52 |     if ps.is_empty(glob(f'{predir}/info/*_trial_data_dict.json')):
53 |         logger.info('Skipping retro_analyze_experiment since no experiment was ran.')
54 |         return  # only run analysis if experiment had been ran
55 |     trial_spec_paths = glob(f'{predir}/*_t*_spec.json')
56 |     # remove trial and session spec paths
57 |     experiment_spec_paths = ps.difference(glob(f'{predir}/*_spec.json'), trial_spec_paths)
58 |     experiment_spec_path = experiment_spec_paths[0]
59 |     spec = util.read(experiment_spec_path)
60 |     info_prepath = spec['meta'].get('info_prepath')
61 |     trial_data_dict = util.read(f'{info_prepath}_trial_data_dict.json')
62 |     analysis.analyze_experiment(spec, trial_data_dict)
63 | 
64 | 
65 | def retro_analyze(predir):
66 |     '''
67 |     Method to analyze experiment/trial from files after it ran.
68 |     @example
69 | 
70 |     yarn retro_analyze data/reinforce_cartpole_2018_01_22_211751/
71 |     '''
72 |     predir = predir.strip('/')  # sanitary
73 |     os.environ['LOG_PREPATH'] = f'{predir}/log/retro_analyze'  # to prevent overwriting log file
74 |     logger.info(f'Running retro-analysis on {predir}')
75 |     retro_analyze_sessions(predir)
76 |     retro_analyze_trials(predir)
77 |     retro_analyze_experiment(predir)
78 |     logger.info('Finished retro-analysis')
79 | 


--------------------------------------------------------------------------------
/slm_lab/lib/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kengz/SLM-Lab/cae945a294cb111ee8e568bb4465ee74c501478b/slm_lab/lib/__init__.py


--------------------------------------------------------------------------------
/slm_lab/lib/decorator.py:
--------------------------------------------------------------------------------
 1 | from functools import wraps
 2 | from slm_lab.lib import logger
 3 | import time
 4 | 
 5 | logger = logger.get_logger(__name__)
 6 | 
 7 | 
 8 | def lab_api(fn):
 9 |     '''
10 |     Function decorator to label and check Lab API methods
11 |     @example
12 | 
13 |     from slm_lab.lib.decorator import lab_api
14 |     @lab_api
15 |     def foo():
16 |         print('foo')
17 |     '''
18 |     return fn
19 | 
20 | 
21 | def timeit(fn):
22 |     '''
23 |     Function decorator to measure execution time
24 |     @example
25 | 
26 |     from slm_lab.lib.decorator import timeit
27 |     @timeit
28 |     def foo(sec):
29 |         time.sleep(sec)
30 |         print('foo')
31 | 
32 |     foo(1)
33 |     # => foo
34 |     # => Timed: foo 1000.9971ms
35 |     '''
36 |     @wraps(fn)
37 |     def time_fn(*args, **kwargs):
38 |         start = time.time()
39 |         output = fn(*args, **kwargs)
40 |         end = time.time()
41 |         logger.debug(f'Timed: {fn.__name__} {round((end - start) * 1000, 4)}ms')
42 |         return output
43 |     return time_fn
44 | 


--------------------------------------------------------------------------------
/slm_lab/lib/logger.py:
--------------------------------------------------------------------------------
 1 | from slm_lab.lib import util
 2 | import colorlog
 3 | import logging
 4 | import os
 5 | import pandas as pd
 6 | import sys
 7 | import warnings
 8 | 
 9 | 
10 | class FixedList(list):
11 |     '''fixed-list to restrict addition to root logger handler'''
12 | 
13 |     def append(self, e):
14 |         pass
15 | 
16 | 
17 | LOG_FORMAT = '[%(asctime)s PID:%(process)d %(levelname)s %(filename)s %(funcName)s] %(message)s'
18 | color_formatter = colorlog.ColoredFormatter('%(log_color)s[%(asctime)s PID:%(process)d %(levelname)s %(filename)s %(funcName)s]%(reset)s %(message)s')
19 | sh = logging.StreamHandler(sys.stdout)
20 | sh.setFormatter(color_formatter)
21 | lab_logger = logging.getLogger()
22 | lab_logger.handlers = FixedList([sh])
23 | logging.getLogger('ray').propagate = False  # hack to mute poorly designed ray TF warning log
24 | 
25 | # this will trigger from Experiment init on reload(logger)
26 | if os.environ.get('LOG_PREPATH') is not None:
27 |     warnings.filterwarnings('ignore', category=pd.io.pytables.PerformanceWarning)
28 | 
29 |     log_filepath = os.environ['LOG_PREPATH'] + '.log'
30 |     os.makedirs(os.path.dirname(log_filepath), exist_ok=True)
31 |     # create file handler
32 |     formatter = logging.Formatter(LOG_FORMAT)
33 |     fh = logging.FileHandler(log_filepath)
34 |     fh.setFormatter(formatter)
35 |     # add stream and file handler
36 |     lab_logger.handlers = FixedList([sh, fh])
37 | 
38 | if os.environ.get('LOG_LEVEL'):
39 |     lab_logger.setLevel(os.environ['LOG_LEVEL'])
40 | else:
41 |     lab_logger.setLevel('INFO')
42 | 
43 | 
44 | def set_level(lvl):
45 |     lab_logger.setLevel(lvl)
46 |     os.environ['LOG_LEVEL'] = lvl
47 | 
48 | 
49 | def critical(msg, *args, **kwargs):
50 |     return lab_logger.critical(msg, *args, **kwargs)
51 | 
52 | 
53 | def debug(msg, *args, **kwargs):
54 |     return lab_logger.debug(msg, *args, **kwargs)
55 | 
56 | 
57 | def error(msg, *args, **kwargs):
58 |     return lab_logger.error(msg, *args, **kwargs)
59 | 
60 | 
61 | def exception(msg, *args, **kwargs):
62 |     return lab_logger.exception(msg, *args, **kwargs)
63 | 
64 | 
65 | def info(msg, *args, **kwargs):
66 |     return lab_logger.info(msg, *args, **kwargs)
67 | 
68 | 
69 | def warning(msg, *args, **kwargs):
70 |     return lab_logger.warning(msg, *args, **kwargs)
71 | 
72 | 
73 | def get_logger(__name__):
74 |     '''Create a child logger specific to a module'''
75 |     return logging.getLogger(__name__)
76 | 
77 | 
78 | def toggle_debug(modules, level='DEBUG'):
79 |     '''Turn on module-specific debugging using their names, e.g. algorithm, actor_critic, at the desired debug level.'''
80 |     logger_names = list(logging.Logger.manager.loggerDict.keys())
81 |     for module in modules:
82 |         name = module.strip()
83 |         for logger_name in logger_names:
84 |             if name in logger_name.split('.'):
85 |                 module_logger = logging.getLogger(logger_name)
86 |                 module_logger.setLevel(getattr(logging, level))
87 | 


--------------------------------------------------------------------------------
/slm_lab/spec/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kengz/SLM-Lab/cae945a294cb111ee8e568bb4465ee74c501478b/slm_lab/spec/__init__.py


--------------------------------------------------------------------------------
/slm_lab/spec/benchmark/a2c/a2c_gae_cartpole.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "a2c_gae_cartpole": {
 3 |     "agent": [{
 4 |       "name": "A2C",
 5 |       "algorithm": {
 6 |         "name": "ActorCritic",
 7 |         "action_pdtype": "default",
 8 |         "action_policy": "default",
 9 |         "explore_var_spec": null,
10 |         "gamma": 0.99,
11 |         "lam": 0.95,
12 |         "num_step_returns": null,
13 |         "entropy_coef_spec": {
14 |           "name": "no_decay",
15 |           "start_val": 0.001,
16 |           "end_val": 0.001,
17 |           "start_step": 0,
18 |           "end_step": 0
19 |         },
20 |         "val_loss_coef": 0.5,
21 |         "training_frequency": 32
22 |       },
23 |       "memory": {
24 |         "name": "OnPolicyBatchReplay",
25 |       },
26 |       "net": {
27 |         "type": "MLPNet",
28 |         "shared": false,
29 |         "hid_layers": [64],
30 |         "hid_layers_activation": "selu",
31 |         "init_fn": null,
32 |         "normalize": true,
33 |         "batch_norm": false,
34 |         "clip_grad_val": 0.5,
35 |         "use_same_optim": true,
36 |         "loss_spec": {
37 |           "name": "MSELoss"
38 |         },
39 |         "actor_optim_spec": {
40 |           "name": "RMSprop",
41 |           "lr": 5e-3,
42 |           "alpha": 0.99,
43 |           "eps": 1e-5
44 |         },
45 |         "critic_optim_spec": {
46 |           "name": "RMSprop",
47 |           "lr": 5e-3,
48 |           "alpha": 0.99,
49 |           "eps": 1e-5
50 |         },
51 |         "lr_scheduler_spec": null,
52 |         "gpu": false
53 |       }
54 |     }],
55 |     "env": [{
56 |       "name": "CartPole-v0",
57 |       "num_envs": 4,
58 |       "max_t": null,
59 |       "max_frame": 30000,
60 |     }],
61 |     "body": {
62 |       "product": "outer",
63 |       "num": 1
64 |     },
65 |     "meta": {
66 |       "distributed": false,
67 |       "eval_frequency": 500,
68 |       "log_frequency": 500,
69 |       "max_session": 4,
70 |       "max_trial": 1
71 |     },
72 |     "search": {
73 |       "agent": [{
74 |         "algorithm": {
75 |           "lam__grid_search": [0.7, 0.8, 0.9, 0.95],
76 |           "training_frequency__grid_search": [16, 32]
77 |         },
78 |         "net": {
79 |           "actor_optim_spec": {
80 |             "lr__grid_search": [5e-2, 1e-2, 5e-3, 1e-3]
81 |           }
82 |         }
83 |       }]
84 |     },
85 |   },
86 | }
87 | 


--------------------------------------------------------------------------------
/slm_lab/spec/benchmark/a2c/a2c_gae_lunar.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "a2c_gae_lunar": {
 3 |     "agent": [{
 4 |       "name": "A2C",
 5 |       "algorithm": {
 6 |         "name": "ActorCritic",
 7 |         "action_pdtype": "default",
 8 |         "action_policy": "default",
 9 |         "explore_var_spec": null,
10 |         "gamma": 0.99,
11 |         "lam": 0.95,
12 |         "num_step_returns": null,
13 |         "entropy_coef_spec": {
14 |           "name": "no_decay",
15 |           "start_val": 0.01,
16 |           "end_val": 0.01,
17 |           "start_step": 0,
18 |           "end_step": 0
19 |         },
20 |         "val_loss_coef": 1.0,
21 |         "training_frequency": 128
22 |       },
23 |       "memory": {
24 |         "name": "OnPolicyBatchReplay",
25 |       },
26 |       "net": {
27 |         "type": "MLPNet",
28 |         "shared": false,
29 |         "hid_layers": [64, 64, 32],
30 |         "hid_layers_activation": "relu",
31 |         "init_fn": "orthogonal_",
32 |         "batch_norm": false,
33 |         "clip_grad_val": 0.5,
34 |         "use_same_optim": false,
35 |         "loss_spec": {
36 |           "name": "MSELoss"
37 |         },
38 |         "actor_optim_spec": {
39 |           "name": "Adam",
40 |           "lr": 2e-3,
41 |         },
42 |         "critic_optim_spec": {
43 |           "name": "Adam",
44 |           "lr": 2e-3,
45 |         },
46 |         "lr_scheduler_spec": null,
47 |         "gpu": false
48 |       }
49 |     }],
50 |     "env": [{
51 |       "name": "LunarLander-v2",
52 |       "frame_op": "concat",
53 |       "frame_op_len": 4,
54 |       "max_t": null,
55 |       "max_frame": 300000,
56 |       "num_envs": 8,
57 |       "normalize_state": false
58 |     }],
59 |     "body": {
60 |       "product": "outer",
61 |       "num": 1
62 |     },
63 |     "meta": {
64 |       "distributed": false,
65 |       "log_frequency": 1000,
66 |       "eval_frequency": 1000,
67 |       "max_session": 4,
68 |       "max_trial": 1
69 |     }
70 |   },
71 | }
72 | 


--------------------------------------------------------------------------------
/slm_lab/spec/benchmark/a2c/a2c_gae_pong.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "a2c_gae_pong": {
 3 |     "agent": [{
 4 |       "name": "A2C",
 5 |       "algorithm": {
 6 |         "name": "ActorCritic",
 7 |         "action_pdtype": "default",
 8 |         "action_policy": "default",
 9 |         "explore_var_spec": null,
10 |         "gamma": 0.99,
11 |         "lam": 0.95,
12 |         "num_step_returns": null,
13 |         "entropy_coef_spec": {
14 |           "name": "no_decay",
15 |           "start_val": 0.01,
16 |           "end_val": 0.01,
17 |           "start_step": 0,
18 |           "end_step": 0
19 |         },
20 |         "val_loss_coef": 0.5,
21 |         "training_frequency": 32
22 |       },
23 |       "memory": {
24 |         "name": "OnPolicyBatchReplay",
25 |       },
26 |       "net": {
27 |         "type": "ConvNet",
28 |         "shared": true,
29 |         "conv_hid_layers": [
30 |           [32, 8, 4, 0, 1],
31 |           [64, 4, 2, 0, 1],
32 |           [32, 3, 1, 0, 1]
33 |         ],
34 |         "fc_hid_layers": [512],
35 |         "hid_layers_activation": "relu",
36 |         "init_fn": "orthogonal_",
37 |         "normalize": true,
38 |         "batch_norm": false,
39 |         "clip_grad_val": 0.5,
40 |         "use_same_optim": false,
41 |         "loss_spec": {
42 |           "name": "MSELoss"
43 |         },
44 |         "actor_optim_spec": {
45 |           "name": "RMSprop",
46 |           "lr": 7e-4,
47 |           "alpha": 0.99,
48 |           "eps": 1e-5
49 |         },
50 |         "critic_optim_spec": {
51 |           "name": "RMSprop",
52 |           "lr": 7e-4,
53 |           "alpha": 0.99,
54 |           "eps": 1e-5
55 |         },
56 |         "lr_scheduler_spec": null,
57 |         "gpu": true
58 |       }
59 |     }],
60 |     "env": [{
61 |       "name": "PongNoFrameskip-v4",
62 |       "frame_op": "concat",
63 |       "frame_op_len": 4,
64 |       "reward_scale": "sign",
65 |       "num_envs": 16,
66 |       "max_t": null,
67 |       "max_frame": 1e7
68 |     }],
69 |     "body": {
70 |       "product": "outer",
71 |       "num": 1
72 |     },
73 |     "meta": {
74 |       "distributed": false,
75 |       "log_frequency": 10000,
76 |       "eval_frequency": 10000,
77 |       "max_session": 4,
78 |       "max_trial": 1,
79 |     }
80 |   }
81 | }
82 | 


--------------------------------------------------------------------------------
/slm_lab/spec/benchmark/a2c/a2c_gae_qbert.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "a2c_gae_qbert": {
 3 |     "agent": [{
 4 |       "name": "A2C",
 5 |       "algorithm": {
 6 |         "name": "ActorCritic",
 7 |         "action_pdtype": "default",
 8 |         "action_policy": "default",
 9 |         "explore_var_spec": null,
10 |         "gamma": 0.99,
11 |         "lam": 0.95,
12 |         "num_step_returns": null,
13 |         "entropy_coef_spec": {
14 |           "name": "no_decay",
15 |           "start_val": 0.01,
16 |           "end_val": 0.01,
17 |           "start_step": 0,
18 |           "end_step": 0
19 |         },
20 |         "val_loss_coef": 0.5,
21 |         "training_frequency": 32
22 |       },
23 |       "memory": {
24 |         "name": "OnPolicyBatchReplay",
25 |       },
26 |       "net": {
27 |         "type": "ConvNet",
28 |         "shared": true,
29 |         "conv_hid_layers": [
30 |           [32, 8, 4, 0, 1],
31 |           [64, 4, 2, 0, 1],
32 |           [32, 3, 1, 0, 1]
33 |         ],
34 |         "fc_hid_layers": [512],
35 |         "hid_layers_activation": "relu",
36 |         "init_fn": "orthogonal_",
37 |         "normalize": true,
38 |         "batch_norm": false,
39 |         "clip_grad_val": 0.5,
40 |         "use_same_optim": false,
41 |         "loss_spec": {
42 |           "name": "MSELoss"
43 |         },
44 |         "actor_optim_spec": {
45 |           "name": "RMSprop",
46 |           "lr": 7e-4,
47 |           "alpha": 0.99,
48 |           "eps": 1e-5
49 |         },
50 |         "critic_optim_spec": {
51 |           "name": "RMSprop",
52 |           "lr": 7e-4,
53 |           "alpha": 0.99,
54 |           "eps": 1e-5
55 |         },
56 |         "lr_scheduler_spec": null,
57 |         "gpu": true
58 |       }
59 |     }],
60 |     "env": [{
61 |       "name": "QbertNoFrameskip-v4",
62 |       "frame_op": "concat",
63 |       "frame_op_len": 4,
64 |       "reward_scale": "sign",
65 |       "num_envs": 16,
66 |       "max_t": null,
67 |       "max_frame": 1e7
68 |     }],
69 |     "body": {
70 |       "product": "outer",
71 |       "num": 1
72 |     },
73 |     "meta": {
74 |       "distributed": false,
75 |       "log_frequency": 10000,
76 |       "eval_frequency": 10000,
77 |       "rigorous_eval": 0,
78 |       "max_session": 4,
79 |       "max_trial": 1,
80 |     }
81 |   }
82 | }
83 | 


--------------------------------------------------------------------------------
/slm_lab/spec/benchmark/a2c/a2c_nstep_cont.json:
--------------------------------------------------------------------------------
  1 | {
  2 |   "a2c_nstep_bipedalwalker": {
  3 |     "agent": [{
  4 |       "name": "A2C",
  5 |       "algorithm": {
  6 |         "name": "ActorCritic",
  7 |         "action_pdtype": "default",
  8 |         "action_policy": "default",
  9 |         "explore_var_spec": null,
 10 |         "gamma": 0.99,
 11 |         "lam": null,
 12 |         "num_step_returns": 5,
 13 |         "entropy_coef_spec": {
 14 |           "name": "no_decay",
 15 |           "start_val": 0.01,
 16 |           "end_val": 0.01,
 17 |           "start_step": 0,
 18 |           "end_step": 0
 19 |         },
 20 |         "val_loss_coef": 0.5,
 21 |         "training_frequency": 256
 22 |       },
 23 |       "memory": {
 24 |         "name": "OnPolicyBatchReplay",
 25 |       },
 26 |       "net": {
 27 |         "type": "MLPNet",
 28 |         "shared": false,
 29 |         "hid_layers": [256, 128],
 30 |         "hid_layers_activation": "relu",
 31 |         "init_fn": "orthogonal_",
 32 |         "normalize": true,
 33 |         "batch_norm": false,
 34 |         "clip_grad_val": 0.5,
 35 |         "use_same_optim": false,
 36 |         "loss_spec": {
 37 |           "name": "MSELoss"
 38 |         },
 39 |         "actor_optim_spec": {
 40 |           "name": "Adam",
 41 |           "lr": 3e-4,
 42 |         },
 43 |         "critic_optim_spec": {
 44 |           "name": "Adam",
 45 |           "lr": 3e-4,
 46 |         },
 47 |         "lr_scheduler_spec": null,
 48 |         "gpu": false
 49 |       }
 50 |     }],
 51 |     "env": [{
 52 |       "name": "BipedalWalker-v2",
 53 |       "num_envs": 32,
 54 |       "max_t": null,
 55 |       "max_frame": 4e6
 56 |     }],
 57 |     "body": {
 58 |       "product": "outer",
 59 |       "num": 1
 60 |     },
 61 |     "meta": {
 62 |       "distributed": false,
 63 |       "log_frequency": 10000,
 64 |       "eval_frequency": 10000,
 65 |       "max_session": 4,
 66 |       "max_trial": 1
 67 |     }
 68 |   },
 69 |   "a2c_nstep_pendulum": {
 70 |     "agent": [{
 71 |       "name": "A2C",
 72 |       "algorithm": {
 73 |         "name": "ActorCritic",
 74 |         "action_pdtype": "default",
 75 |         "action_policy": "default",
 76 |         "explore_var_spec": null,
 77 |         "gamma": 0.99,
 78 |         "lam": null,
 79 |         "num_step_returns": 5,
 80 |         "entropy_coef_spec": {
 81 |           "name": "no_decay",
 82 |           "start_val": 0.0,
 83 |           "end_val": 0.0,
 84 |           "start_step": 0,
 85 |           "end_step": 0
 86 |         },
 87 |         "val_loss_coef": 0.5,
 88 |         "training_frequency": 2048
 89 |       },
 90 |       "memory": {
 91 |         "name": "OnPolicyBatchReplay",
 92 |       },
 93 |       "net": {
 94 |         "type": "MLPNet",
 95 |         "shared": false,
 96 |         "hid_layers": [64, 64],
 97 |         "hid_layers_activation": "tanh",
 98 |         "init_fn": "orthogonal_",
 99 |         "normalize": false,
100 |         "batch_norm": false,
101 |         "clip_grad_val": 0.5,
102 |         "use_same_optim": false,
103 |         "loss_spec": {
104 |           "name": "MSELoss"
105 |         },
106 |         "actor_optim_spec": {
107 |           "name": "Adam",
108 |           "lr": 3e-4,
109 |         },
110 |         "critic_optim_spec": {
111 |           "name": "Adam",
112 |           "lr": 3e-4,
113 |         },
114 |         "lr_scheduler_spec": {
115 |           "name": "LinearToZero",
116 |           "frame": 1e6
117 |         },
118 |         "gpu": true
119 |       }
120 |     }],
121 |     "env": [{
122 |       "name": "Pendulum-v0",
123 |       "num_envs": 8,
124 |       "max_t": null,
125 |       "max_frame": 1e6
126 |     }],
127 |     "body": {
128 |       "product": "outer",
129 |       "num": 1
130 |     },
131 |     "meta": {
132 |       "distributed": false,
133 |       "log_frequency": 20000,
134 |       "eval_frequency": 20000,
135 |       "max_session": 4,
136 |       "max_trial": 1
137 |     }
138 |   }
139 | }
140 | 


--------------------------------------------------------------------------------
/slm_lab/spec/benchmark/a2c/a2c_nstep_lunar.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "a2c_nstep_lunar": {
 3 |     "agent": [{
 4 |       "name": "A2C",
 5 |       "algorithm": {
 6 |         "name": "ActorCritic",
 7 |         "action_pdtype": "default",
 8 |         "action_policy": "default",
 9 |         "explore_var_spec": null,
10 |         "gamma": 0.99,
11 |         "lam": null,
12 |         "num_step_returns": 5,
13 |         "entropy_coef_spec": {
14 |           "name": "no_decay",
15 |           "start_val": 0.01,
16 |           "end_val": 0.01,
17 |           "start_step": 0,
18 |           "end_step": 0
19 |         },
20 |         "val_loss_coef": 1.0,
21 |         "training_frequency": 64
22 |       },
23 |       "memory": {
24 |         "name": "OnPolicyBatchReplay",
25 |       },
26 |       "net": {
27 |         "type": "MLPNet",
28 |         "shared": false,
29 |         "hid_layers": [64, 64, 32],
30 |         "hid_layers_activation": "relu",
31 |         "init_fn": "orthogonal_",
32 |         "batch_norm": false,
33 |         "clip_grad_val": 0.5,
34 |         "use_same_optim": false,
35 |         "loss_spec": {
36 |           "name": "MSELoss"
37 |         },
38 |         "actor_optim_spec": {
39 |           "name": "Adam",
40 |           "lr": 2e-3,
41 |         },
42 |         "critic_optim_spec": {
43 |           "name": "Adam",
44 |           "lr": 2e-3,
45 |         },
46 |         "lr_scheduler_spec": null,
47 |         "gpu": false
48 |       }
49 |     }],
50 |     "env": [{
51 |       "name": "LunarLander-v2",
52 |       "frame_op": "concat",
53 |       "frame_op_len": 4,
54 |       "max_t": null,
55 |       "max_frame": 300000,
56 |       "num_envs": 8,
57 |       "normalize_state": false
58 |     }],
59 |     "body": {
60 |       "product": "outer",
61 |       "num": 1
62 |     },
63 |     "meta": {
64 |       "distributed": false,
65 |       "log_frequency": 1000,
66 |       "eval_frequency": 1000,
67 |       "max_session": 4,
68 |       "max_trial": 1
69 |     }
70 |   },
71 | }
72 | 


--------------------------------------------------------------------------------
/slm_lab/spec/benchmark/a2c/a2c_nstep_pong.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "a2c_nstep_pong": {
 3 |     "agent": [{
 4 |       "name": "A2C",
 5 |       "algorithm": {
 6 |         "name": "ActorCritic",
 7 |         "action_pdtype": "default",
 8 |         "action_policy": "default",
 9 |         "explore_var_spec": null,
10 |         "gamma": 0.99,
11 |         "lam": null,
12 |         "num_step_returns": 11,
13 |         "entropy_coef_spec": {
14 |           "name": "no_decay",
15 |           "start_val": 0.01,
16 |           "end_val": 0.01,
17 |           "start_step": 0,
18 |           "end_step": 0
19 |         },
20 |         "val_loss_coef": 0.5,
21 |         "training_frequency": 5
22 |       },
23 |       "memory": {
24 |         "name": "OnPolicyBatchReplay"
25 |       },
26 |       "net": {
27 |         "type": "ConvNet",
28 |         "shared": true,
29 |         "conv_hid_layers": [
30 |           [32, 8, 4, 0, 1],
31 |           [64, 4, 2, 0, 1],
32 |           [32, 3, 1, 0, 1]
33 |         ],
34 |         "fc_hid_layers": [512],
35 |         "hid_layers_activation": "relu",
36 |         "init_fn": "orthogonal_",
37 |         "normalize": true,
38 |         "batch_norm": false,
39 |         "clip_grad_val": 0.5,
40 |         "use_same_optim": false,
41 |         "loss_spec": {
42 |           "name": "MSELoss"
43 |         },
44 |         "actor_optim_spec": {
45 |           "name": "RMSprop",
46 |           "lr": 7e-4,
47 |           "alpha": 0.99,
48 |           "eps": 1e-5
49 |         },
50 |         "critic_optim_spec": {
51 |           "name": "RMSprop",
52 |           "lr": 7e-4,
53 |           "alpha": 0.99,
54 |           "eps": 1e-5
55 |         },
56 |         "lr_scheduler_spec": null,
57 |         "gpu": true
58 |       }
59 |     }],
60 |     "env": [{
61 |       "name": "PongNoFrameskip-v4",
62 |       "frame_op": "concat",
63 |       "frame_op_len": 4,
64 |       "reward_scale": "sign",
65 |       "num_envs": 16,
66 |       "max_t": null,
67 |       "max_frame": 1e7
68 |     }],
69 |     "body": {
70 |       "product": "outer",
71 |       "num": 1,
72 |     },
73 |     "meta": {
74 |       "distributed": false,
75 |       "log_frequency": 10000,
76 |       "eval_frequency": 10000,
77 |       "max_session": 4,
78 |       "max_trial": 1
79 |     }
80 |   }
81 | }
82 | 


--------------------------------------------------------------------------------
/slm_lab/spec/benchmark/a2c/a2c_nstep_qbert.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "a2c_nstep_qbert": {
 3 |     "agent": [{
 4 |       "name": "A2C",
 5 |       "algorithm": {
 6 |         "name": "ActorCritic",
 7 |         "action_pdtype": "default",
 8 |         "action_policy": "default",
 9 |         "explore_var_spec": null,
10 |         "gamma": 0.99,
11 |         "lam": null,
12 |         "num_step_returns": 11,
13 |         "entropy_coef_spec": {
14 |           "name": "no_decay",
15 |           "start_val": 0.01,
16 |           "end_val": 0.01,
17 |           "start_step": 0,
18 |           "end_step": 0
19 |         },
20 |         "val_loss_coef": 0.5,
21 |         "training_frequency": 5
22 |       },
23 |       "memory": {
24 |         "name": "OnPolicyBatchReplay"
25 |       },
26 |       "net": {
27 |         "type": "ConvNet",
28 |         "shared": true,
29 |         "conv_hid_layers": [
30 |           [32, 8, 4, 0, 1],
31 |           [64, 4, 2, 0, 1],
32 |           [32, 3, 1, 0, 1]
33 |         ],
34 |         "fc_hid_layers": [512],
35 |         "hid_layers_activation": "relu",
36 |         "init_fn": "orthogonal_",
37 |         "normalize": true,
38 |         "batch_norm": false,
39 |         "clip_grad_val": 0.5,
40 |         "use_same_optim": false,
41 |         "loss_spec": {
42 |           "name": "MSELoss"
43 |         },
44 |         "actor_optim_spec": {
45 |           "name": "RMSprop",
46 |           "lr": 7e-4,
47 |           "alpha": 0.99,
48 |           "eps": 1e-5
49 |         },
50 |         "critic_optim_spec": {
51 |           "name": "RMSprop",
52 |           "lr": 7e-4,
53 |           "alpha": 0.99,
54 |           "eps": 1e-5
55 |         },
56 |         "lr_scheduler_spec": null,
57 |         "gpu": true
58 |       }
59 |     }],
60 |     "env": [{
61 |       "name": "QbertNoFrameskip-v4",
62 |       "frame_op": "concat",
63 |       "frame_op_len": 4,
64 |       "reward_scale": "sign",
65 |       "num_envs": 16,
66 |       "max_t": null,
67 |       "max_frame": 1e7
68 |     }],
69 |     "body": {
70 |       "product": "outer",
71 |       "num": 1,
72 |     },
73 |     "meta": {
74 |       "distributed": false,
75 |       "log_frequency": 10000,
76 |       "eval_frequency": 10000,
77 |       "rigorous_eval": 0,
78 |       "max_session": 4,
79 |       "max_trial": 1
80 |     }
81 |   }
82 | }
83 | 


--------------------------------------------------------------------------------
/slm_lab/spec/benchmark/async_sac/async_sac_halfcheetah.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "async_sac_halfcheetah": {
 3 |     "agent": [{
 4 |       "name": "SoftActorCritic",
 5 |       "algorithm": {
 6 |         "name": "SoftActorCritic",
 7 |         "action_pdtype": "default",
 8 |         "action_policy": "default",
 9 |         "gamma": 0.99,
10 |         "training_frequency": 1
11 |       },
12 |       "memory": {
13 |         "name": "Replay",
14 |         "batch_size": 256,
15 |         "max_size": 200000,
16 |         "use_cer": true
17 |       },
18 |       "net": {
19 |         "type": "MLPNet",
20 |         "hid_layers": [256, 256],
21 |         "hid_layers_activation": "relu",
22 |         "init_fn": "orthogonal_",
23 |         "clip_grad_val": 1.0,
24 |         "loss_spec": {
25 |           "name": "MSELoss"
26 |         },
27 |         "optim_spec": {
28 |           "name": "GlobalAdam",
29 |           "lr": 1e-4,
30 |         },
31 |         "lr_scheduler_spec": null,
32 |         "update_type": "polyak",
33 |         "update_frequency": 1,
34 |         "polyak_coef": 0.005,
35 |         "gpu": false
36 |       }
37 |     }],
38 |     "env": [{
39 |       "name": "RoboschoolHalfCheetah-v1",
40 |       "num_envs": 8,
41 |       "max_t": null,
42 |       "max_frame": 4e6
43 |     }],
44 |     "body": {
45 |       "product": "outer",
46 |       "num": 1
47 |     },
48 |     "meta": {
49 |       "distributed": "shared",
50 |       "log_frequency": 1000,
51 |       "eval_frequency": 1000,
52 |       "rigorous_eval": 0,
53 |       "max_session": 6,
54 |       "max_trial": 1,
55 |     },
56 |     "search": {
57 |       "env": [{
58 |         "num_envs__grid_search": [4, 6, 8, 10]
59 |       }],
60 |       "meta": {
61 |         "max_session__grid_search": [4, 6, 8, 10]
62 |       }
63 |     }
64 |   }
65 | }
66 | 


--------------------------------------------------------------------------------
/slm_lab/spec/benchmark/async_sac/async_sac_halfcheetah_pybullet.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "async_sac_halfcheetah": {
 3 |     "agent": [{
 4 |       "name": "SoftActorCritic",
 5 |       "algorithm": {
 6 |         "name": "SoftActorCritic",
 7 |         "action_pdtype": "default",
 8 |         "action_policy": "default",
 9 |         "gamma": 0.99,
10 |         "training_frequency": 1
11 |       },
12 |       "memory": {
13 |         "name": "Replay",
14 |         "batch_size": 256,
15 |         "max_size": 200000,
16 |         "use_cer": true
17 |       },
18 |       "net": {
19 |         "type": "MLPNet",
20 |         "hid_layers": [256, 256],
21 |         "hid_layers_activation": "relu",
22 |         "init_fn": "orthogonal_",
23 |         "clip_grad_val": 1.0,
24 |         "loss_spec": {
25 |           "name": "MSELoss"
26 |         },
27 |         "optim_spec": {
28 |           "name": "GlobalAdam",
29 |           "lr": 1e-4
30 |         },
31 |         "lr_scheduler_spec": null,
32 |         "update_type": "polyak",
33 |         "update_frequency": 1,
34 |         "polyak_coef": 0.005,
35 |         "gpu": true
36 |       }
37 |     }],
38 |     "env": [{
39 |       "name": "HalfCheetahBulletEnv-v0",
40 |       "num_envs": 8,
41 |       "max_t": null,
42 |       "max_frame": 4e6
43 |     }],
44 |     "body": {
45 |       "product": "outer",
46 |       "num": 1
47 |     },
48 |     "meta": {
49 |       "distributed": "shared",
50 |       "log_frequency": 1000,
51 |       "eval_frequency": 1000,
52 |       "rigorous_eval": 0,
53 |       "max_session": 6,
54 |       "max_trial": 1
55 |     },
56 |     "search": {
57 |       "env": [{
58 |         "num_envs__grid_search": [4, 6, 8, 10]
59 |       }],
60 |       "meta": {
61 |         "max_session__grid_search": [4, 6, 8, 10]
62 |       }
63 |     }
64 |   }
65 | }
66 | 


--------------------------------------------------------------------------------
/slm_lab/spec/benchmark/async_sac/async_sac_lunar.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "async_sac_lunar": {
 3 |     "agent": [{
 4 |       "name": "SoftActorCritic",
 5 |       "algorithm": {
 6 |         "name": "SoftActorCritic",
 7 |         "action_pdtype": "GumbelSoftmax",
 8 |         "action_policy": "default",
 9 |         "gamma": 0.99,
10 |         "training_frequency": 1
11 |       },
12 |       "memory": {
13 |         "name": "Replay",
14 |         "batch_size": 256,
15 |         "max_size": 200000,
16 |         "use_cer": true
17 |       },
18 |       "net": {
19 |         "type": "MLPNet",
20 |         "hid_layers": [64, 64, 32],
21 |         "hid_layers_activation": "relu",
22 |         "init_fn": "orthogonal_",
23 |         "clip_grad_val": 0.5,
24 |         "loss_spec": {
25 |           "name": "MSELoss"
26 |         },
27 |         "optim_spec": {
28 |           "name": "GlobalAdam",
29 |           "lr": 1e-3,
30 |         },
31 |         "lr_scheduler_spec": null,
32 |         "update_type": "polyak",
33 |         "update_frequency": 1,
34 |         "polyak_coef": 0.005,
35 |         "gpu": false
36 |       }
37 |     }],
38 |     "env": [{
39 |       "name": "LunarLander-v2",
40 |       "frame_op": "concat",
41 |       "frame_op_len": 4,
42 |       "max_t": null,
43 |       "max_frame": 8e5,
44 |       "num_envs": 8,
45 |       "normalize_state": false
46 |     }],
47 |     "body": {
48 |       "product": "outer",
49 |       "num": 1
50 |     },
51 |     "meta": {
52 |       "distributed": "shared",
53 |       "log_frequency": 500,
54 |       "eval_frequency": 500,
55 |       "rigorous_eval": 0,
56 |       "max_session": 6,
57 |       "max_trial": 1,
58 |     },
59 |     "search": {
60 |       "env": [{
61 |         "num_envs__grid_search": [4, 6, 8, 10]
62 |       }],
63 |       "meta": {
64 |         "max_session__grid_search": [4, 6, 8, 10]
65 |       }
66 |     }
67 |   }
68 | }
69 | 


--------------------------------------------------------------------------------
/slm_lab/spec/benchmark/async_sac/async_sac_pong.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "async_sac_pong": {
 3 |     "agent": [{
 4 |       "name": "SoftActorCritic",
 5 |       "algorithm": {
 6 |         "name": "SoftActorCritic",
 7 |         "action_pdtype": "GumbelSoftmax",
 8 |         "action_policy": "default",
 9 |         "gamma": 0.99,
10 |         "training_start_step": 1000,
11 |         "training_frequency": 4
12 |       },
13 |       "memory": {
14 |         "name": "Replay",
15 |         "batch_size": 256,
16 |         "max_size": 200000,
17 |         "use_cer": false
18 |       },
19 |       "net": {
20 |         "type": "ConvNet",
21 |         "shared": false,
22 |         "conv_hid_layers": [
23 |           [32, 8, 4, 0, 1],
24 |           [64, 4, 2, 0, 1],
25 |           [32, 3, 1, 0, 1]
26 |         ],
27 |         "fc_hid_layers": [256, 256],
28 |         "hid_layers_activation": "leakyrelu",
29 |         "init_fn": "orthogonal_",
30 |         "normalize": true,
31 |         "batch_norm": false,
32 |         "clip_grad_val": 0.5,
33 |         "use_same_optim": false,
34 |         "loss_spec": {
35 |           "name": "SmoothL1Loss"
36 |         },
37 |         "optim_spec": {
38 |           "name": "GlobalAdam",
39 |           "lr": 1e-4,
40 |         },
41 |         "lr_scheduler_spec": null,
42 |         "update_type": "polyak",
43 |         "update_frequency": 1,
44 |         "polyak_coef": 0.005,
45 |         "gpu": true
46 |       }
47 |     }],
48 |     "env": [{
49 |       "name": "PongNoFrameskip-v4",
50 |       "frame_op": "concat",
51 |       "frame_op_len": 4,
52 |       "image_downsize": [64, 64],
53 |       "reward_scale": "sign",
54 |       "num_envs": 4,
55 |       "max_t": null,
56 |       "max_frame": 5e6
57 |     }],
58 |     "body": {
59 |       "product": "outer",
60 |       "num": 1
61 |     },
62 |     "meta": {
63 |       "distributed": "shared",
64 |       "log_frequency": 1000,
65 |       "eval_frequency": 1000,
66 |       "rigorous_eval": 0,
67 |       "max_session": 6,
68 |       "max_trial": 1,
69 |     }
70 |   }
71 | }
72 | 


--------------------------------------------------------------------------------
/slm_lab/spec/benchmark/dppo/dppo_atari.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "dppo_atari": {
 3 |     "agent": [{
 4 |       "name": "PPO",
 5 |       "algorithm": {
 6 |         "name": "PPO",
 7 |         "action_pdtype": "default",
 8 |         "action_policy": "default",
 9 |         "explore_var_spec": null,
10 |         "gamma": 0.99,
11 |         "lam": 0.95,
12 |         "clip_eps_spec": {
13 |           "name": "no_decay",
14 |           "start_val": 0.10,
15 |           "end_val": 0.10,
16 |           "start_step": 0,
17 |           "end_step": 0
18 |         },
19 |         "entropy_coef_spec": {
20 |           "name": "no_decay",
21 |           "start_val": 0.01,
22 |           "end_val": 0.01,
23 |           "start_step": 0,
24 |           "end_step": 0
25 |         },
26 |         "val_loss_coef": 0.5,
27 |         "time_horizon": 128,
28 |         "minibatch_size": 256,
29 |         "training_epoch": 4
30 |       },
31 |       "memory": {
32 |         "name": "OnPolicyBatchReplay",
33 |       },
34 |       "net": {
35 |         "type": "ConvNet",
36 |         "shared": true,
37 |         "conv_hid_layers": [
38 |           [32, 8, 4, 0, 1],
39 |           [64, 4, 2, 0, 1],
40 |           [32, 3, 1, 0, 1]
41 |         ],
42 |         "fc_hid_layers": [512],
43 |         "hid_layers_activation": "relu",
44 |         "init_fn": "orthogonal_",
45 |         "normalize": true,
46 |         "batch_norm": false,
47 |         "clip_grad_val": 0.5,
48 |         "use_same_optim": false,
49 |         "loss_spec": {
50 |           "name": "MSELoss"
51 |         },
52 |         "actor_optim_spec": {
53 |           "name": "GlobalAdam",
54 |           "lr": 1e-4
55 |         },
56 |         "critic_optim_spec": {
57 |           "name": "GlobalAdam",
58 |           "lr": 1e-4
59 |         },
60 |         "lr_scheduler_spec": null,
61 |         "gpu": false
62 |       }
63 |     }],
64 |     "env": [{
65 |       "name": "${env}",
66 |       "frame_op": "concat",
67 |       "frame_op_len": 4,
68 |       "reward_scale": "sign",
69 |       "num_envs": 8,
70 |       "max_t": null,
71 |       "max_frame": 1e7
72 |     }],
73 |     "body": {
74 |       "product": "outer",
75 |       "num": 1
76 |     },
77 |     "meta": {
78 |       "distributed": "synced",
79 |       "log_frequency": 10000,
80 |       "eval_frequency": 10000,
81 |       "max_session": 16,
82 |       "max_trial": 1,
83 |     },
84 |     "spec_params": {
85 |       "env": [
86 |         "BreakoutNoFrameskip-v4", "PongNoFrameskip-v4", "QbertNoFrameskip-v4", "SeaquestNoFrameskip-v4"
87 |       ]
88 |     }
89 |   }
90 | }
91 | 


--------------------------------------------------------------------------------
/slm_lab/spec/benchmark/dppo/dppo_pong.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "dppo_pong": {
 3 |     "agent": [{
 4 |       "name": "PPO",
 5 |       "algorithm": {
 6 |         "name": "PPO",
 7 |         "action_pdtype": "default",
 8 |         "action_policy": "default",
 9 |         "explore_var_spec": null,
10 |         "gamma": 0.99,
11 |         "lam": 0.95,
12 |         "clip_eps_spec": {
13 |           "name": "no_decay",
14 |           "start_val": 0.10,
15 |           "end_val": 0.10,
16 |           "start_step": 0,
17 |           "end_step": 0
18 |         },
19 |         "entropy_coef_spec": {
20 |           "name": "no_decay",
21 |           "start_val": 0.01,
22 |           "end_val": 0.01,
23 |           "start_step": 0,
24 |           "end_step": 0
25 |         },
26 |         "val_loss_coef": 0.5,
27 |         "time_horizon": 128,
28 |         "minibatch_size": 256,
29 |         "training_epoch": 4
30 |       },
31 |       "memory": {
32 |         "name": "OnPolicyBatchReplay",
33 |       },
34 |       "net": {
35 |         "type": "ConvNet",
36 |         "shared": true,
37 |         "conv_hid_layers": [
38 |           [32, 8, 4, 0, 1],
39 |           [64, 4, 2, 0, 1],
40 |           [32, 3, 1, 0, 1]
41 |         ],
42 |         "fc_hid_layers": [512],
43 |         "hid_layers_activation": "relu",
44 |         "init_fn": "orthogonal_",
45 |         "normalize": true,
46 |         "batch_norm": false,
47 |         "clip_grad_val": 0.5,
48 |         "use_same_optim": false,
49 |         "loss_spec": {
50 |           "name": "MSELoss"
51 |         },
52 |         "actor_optim_spec": {
53 |           "name": "GlobalAdam",
54 |           "lr": 7e-4,
55 |         },
56 |         "critic_optim_spec": {
57 |           "name": "GlobalAdam",
58 |           "lr": 7e-4,
59 |         },
60 |         "lr_scheduler_spec": null,
61 |         "gpu": false
62 |       }
63 |     }],
64 |     "env": [{
65 |       "name": "PongNoFrameskip-v4",
66 |       "frame_op": "concat",
67 |       "frame_op_len": 4,
68 |       "reward_scale": "sign",
69 |       "num_envs": 8,
70 |       "max_t": null,
71 |       "max_frame": 1e7
72 |     }],
73 |     "body": {
74 |       "product": "outer",
75 |       "num": 1
76 |     },
77 |     "meta": {
78 |       "distributed": "synced",
79 |       "log_frequency": 10000,
80 |       "eval_frequency": 10000,
81 |       "max_session": 16,
82 |       "max_trial": 1,
83 |     }
84 |   }
85 | }
86 | 


--------------------------------------------------------------------------------
/slm_lab/spec/benchmark/dqn/ddqn_atari.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "ddqn_atari": {
 3 |     "agent": [{
 4 |       "name": "DoubleDQN",
 5 |       "algorithm": {
 6 |         "name": "DoubleDQN",
 7 |         "action_pdtype": "Argmax",
 8 |         "action_policy": "epsilon_greedy",
 9 |         "explore_var_spec": {
10 |           "name": "linear_decay",
11 |           "start_val": 1.0,
12 |           "end_val": 0.01,
13 |           "start_step": 10000,
14 |           "end_step": 1000000
15 |         },
16 |         "gamma": 0.99,
17 |         "training_batch_iter": 1,
18 |         "training_iter": 4,
19 |         "training_frequency": 4,
20 |         "training_start_step": 10000
21 |       },
22 |       "memory": {
23 |         "name": "Replay",
24 |         "batch_size": 32,
25 |         "max_size": 200000,
26 |         "use_cer": false,
27 |       },
28 |       "net": {
29 |         "type": "ConvNet",
30 |         "conv_hid_layers": [
31 |           [32, 8, 4, 0, 1],
32 |           [64, 4, 2, 0, 1],
33 |           [64, 3, 1, 0, 1]
34 |         ],
35 |         "fc_hid_layers": [256],
36 |         "hid_layers_activation": "relu",
37 |         "init_fn": null,
38 |         "batch_norm": false,
39 |         "clip_grad_val": 10.0,
40 |         "loss_spec": {
41 |           "name": "SmoothL1Loss"
42 |         },
43 |         "optim_spec": {
44 |           "name": "Adam",
45 |           "lr": 1e-4,
46 |         },
47 |         "lr_scheduler_spec": null,
48 |         "update_type": "replace",
49 |         "update_frequency": 1000,
50 |         "gpu": true
51 |       }
52 |     }],
53 |     "env": [{
54 |       "name": "${env}",
55 |       "frame_op": "concat",
56 |       "frame_op_len": 4,
57 |       "reward_scale": "sign",
58 |       "num_envs": 16,
59 |       "max_t": null,
60 |       "max_frame": 1e7
61 |     }],
62 |     "body": {
63 |       "product": "outer",
64 |       "num": 1
65 |     },
66 |     "meta": {
67 |       "distributed": false,
68 |       "eval_frequency": 10000,
69 |       "log_frequency": 10000,
70 |       "rigorous_eval": 0,
71 |       "max_session": 4,
72 |       "max_trial": 1
73 |     },
74 |     "spec_params": {
75 |       "env": [
76 |         "BreakoutNoFrameskip-v4", "PongNoFrameskip-v4", "QbertNoFrameskip-v4", "SeaquestNoFrameskip-v4"
77 |       ]
78 |     }
79 |   }
80 | }
81 | 


--------------------------------------------------------------------------------
/slm_lab/spec/benchmark/dqn/ddqn_per_lunar.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "ddqn_per_concat_lunar": {
 3 |     "agent": [{
 4 |       "name": "DoubleDQN",
 5 |       "algorithm": {
 6 |         "name": "DoubleDQN",
 7 |         "action_pdtype": "Argmax",
 8 |         "action_policy": "epsilon_greedy",
 9 |         "explore_var_spec": {
10 |           "name": "linear_decay",
11 |           "start_val": 1.0,
12 |           "end_val": 0.01,
13 |           "start_step": 0,
14 |           "end_step": 50000
15 |         },
16 |         "gamma": 0.99,
17 |         "training_batch_iter": 1,
18 |         "training_iter": 1,
19 |         "training_frequency": 1,
20 |         "training_start_step": 32
21 |       },
22 |       "memory": {
23 |         "name": "PrioritizedReplay",
24 |         "alpha": 0.6,
25 |         "epsilon": 0.0001,
26 |         "batch_size": 32,
27 |         "max_size": 50000,
28 |         "use_cer": false,
29 |       },
30 |       "net": {
31 |         "type": "MLPNet",
32 |         "hid_layers": [256, 128],
33 |         "hid_layers_activation": "relu",
34 |         "clip_grad_val": 10.0,
35 |         "loss_spec": {
36 |           "name": "SmoothL1Loss"
37 |         },
38 |         "optim_spec": {
39 |           "name": "Adam",
40 |           "lr": 2.5e-4
41 |         },
42 |         "lr_scheduler_spec": null,
43 |         "update_type": "replace",
44 |         "update_frequency": 100,
45 |         "gpu": false
46 |       }
47 |     }],
48 |     "env": [{
49 |       "name": "LunarLander-v2",
50 |       "frame_op": "concat",
51 |       "frame_op_len": 4,
52 |       "max_t": null,
53 |       "max_frame": 300000,
54 |       "normalize_state": false
55 |     }],
56 |     "body": {
57 |       "product": "outer",
58 |       "num": 1
59 |     },
60 |     "meta": {
61 |       "distributed": false,
62 |       "log_frequency": 1000,
63 |       "eval_frequency": 1000,
64 |       "max_session": 4,
65 |       "max_trial": 1,
66 |       "search": "RandomSearch",
67 |     },
68 |   }
69 | }
70 | 


--------------------------------------------------------------------------------
/slm_lab/spec/benchmark/dqn/ddqn_per_pong.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "ddqn_per_pong": {
 3 |     "agent": [{
 4 |       "name": "DoubleDQN",
 5 |       "algorithm": {
 6 |         "name": "DoubleDQN",
 7 |         "action_pdtype": "Argmax",
 8 |         "action_policy": "epsilon_greedy",
 9 |         "explore_var_spec": {
10 |           "name": "linear_decay",
11 |           "start_val": 1.0,
12 |           "end_val": 0.01,
13 |           "start_step": 10000,
14 |           "end_step": 1000000
15 |         },
16 |         "gamma": 0.99,
17 |         "training_batch_iter": 1,
18 |         "training_iter": 4,
19 |         "training_frequency": 4,
20 |         "training_start_step": 10000
21 |       },
22 |       "memory": {
23 |         "name": "PrioritizedReplay",
24 |         "alpha": 0.6,
25 |         "epsilon": 0.0001,
26 |         "batch_size": 32,
27 |         "max_size": 200000,
28 |         "use_cer": false,
29 |       },
30 |       "net": {
31 |         "type": "ConvNet",
32 |         "conv_hid_layers": [
33 |           [32, 8, 4, 0, 1],
34 |           [64, 4, 2, 0, 1],
35 |           [64, 3, 1, 0, 1]
36 |         ],
37 |         "fc_hid_layers": [256],
38 |         "hid_layers_activation": "relu",
39 |         "init_fn": null,
40 |         "batch_norm": false,
41 |         "clip_grad_val": 10.0,
42 |         "loss_spec": {
43 |           "name": "SmoothL1Loss"
44 |         },
45 |         "optim_spec": {
46 |           "name": "Adam",
47 |           "lr": 2.5e-5,
48 |         },
49 |         "lr_scheduler_spec": null,
50 |         "update_type": "replace",
51 |         "update_frequency": 1000,
52 |         "gpu": true
53 |       }
54 |     }],
55 |     "env": [{
56 |       "name": "PongNoFrameskip-v4",
57 |       "frame_op": "concat",
58 |       "frame_op_len": 4,
59 |       "reward_scale": "sign",
60 |       "num_envs": 16,
61 |       "max_t": null,
62 |       "max_frame": 4e6
63 |     }],
64 |     "body": {
65 |       "product": "outer",
66 |       "num": 1
67 |     },
68 |     "meta": {
69 |       "distributed": false,
70 |       "eval_frequency": 10000,
71 |       "log_frequency": 10000,
72 |       "max_session": 4,
73 |       "max_trial": 1
74 |     }
75 |   }
76 | }
77 | 


--------------------------------------------------------------------------------
/slm_lab/spec/benchmark/dqn/ddqn_per_qbert.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "ddqn_per_qbert": {
 3 |     "agent": [{
 4 |       "name": "DoubleDQN",
 5 |       "algorithm": {
 6 |         "name": "DoubleDQN",
 7 |         "action_pdtype": "Argmax",
 8 |         "action_policy": "epsilon_greedy",
 9 |         "explore_var_spec": {
10 |           "name": "linear_decay",
11 |           "start_val": 1.0,
12 |           "end_val": 0.01,
13 |           "start_step": 10000,
14 |           "end_step": 1000000
15 |         },
16 |         "gamma": 0.99,
17 |         "training_batch_iter": 1,
18 |         "training_iter": 4,
19 |         "training_frequency": 4,
20 |         "training_start_step": 10000
21 |       },
22 |       "memory": {
23 |         "name": "PrioritizedReplay",
24 |         "alpha": 0.6,
25 |         "epsilon": 0.0001,
26 |         "batch_size": 32,
27 |         "max_size": 200000,
28 |         "use_cer": false,
29 |       },
30 |       "net": {
31 |         "type": "ConvNet",
32 |         "conv_hid_layers": [
33 |           [32, 8, 4, 0, 1],
34 |           [64, 4, 2, 0, 1],
35 |           [64, 3, 1, 0, 1]
36 |         ],
37 |         "fc_hid_layers": [256],
38 |         "hid_layers_activation": "relu",
39 |         "init_fn": null,
40 |         "batch_norm": false,
41 |         "clip_grad_val": 10.0,
42 |         "loss_spec": {
43 |           "name": "SmoothL1Loss"
44 |         },
45 |         "optim_spec": {
46 |           "name": "Adam",
47 |           "lr": 2.5e-5,
48 |         },
49 |         "lr_scheduler_spec": null,
50 |         "update_type": "replace",
51 |         "update_frequency": 1000,
52 |         "gpu": true
53 |       }
54 |     }],
55 |     "env": [{
56 |       "name": "QbertNoFrameskip-v4",
57 |       "frame_op": "concat",
58 |       "frame_op_len": 4,
59 |       "reward_scale": "sign",
60 |       "num_envs": 16,
61 |       "max_t": null,
62 |       "max_frame": 4e6
63 |     }],
64 |     "body": {
65 |       "product": "outer",
66 |       "num": 1
67 |     },
68 |     "meta": {
69 |       "distributed": false,
70 |       "eval_frequency": 10000,
71 |       "log_frequency": 10000,
72 |       "rigorous_eval": 0,
73 |       "max_session": 4,
74 |       "max_trial": 1
75 |     }
76 |   }
77 | }
78 | 


--------------------------------------------------------------------------------
/slm_lab/spec/benchmark/dqn/ddqn_pong.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "ddqn_pong": {
 3 |     "agent": [{
 4 |       "name": "DoubleDQN",
 5 |       "algorithm": {
 6 |         "name": "DoubleDQN",
 7 |         "action_pdtype": "Argmax",
 8 |         "action_policy": "epsilon_greedy",
 9 |         "explore_var_spec": {
10 |           "name": "linear_decay",
11 |           "start_val": 1.0,
12 |           "end_val": 0.01,
13 |           "start_step": 10000,
14 |           "end_step": 1000000
15 |         },
16 |         "gamma": 0.99,
17 |         "training_batch_iter": 1,
18 |         "training_iter": 4,
19 |         "training_frequency": 4,
20 |         "training_start_step": 10000
21 |       },
22 |       "memory": {
23 |         "name": "Replay",
24 |         "batch_size": 32,
25 |         "max_size": 200000,
26 |         "use_cer": false,
27 |       },
28 |       "net": {
29 |         "type": "ConvNet",
30 |         "conv_hid_layers": [
31 |           [32, 8, 4, 0, 1],
32 |           [64, 4, 2, 0, 1],
33 |           [64, 3, 1, 0, 1]
34 |         ],
35 |         "fc_hid_layers": [256],
36 |         "hid_layers_activation": "relu",
37 |         "init_fn": null,
38 |         "batch_norm": false,
39 |         "clip_grad_val": 10.0,
40 |         "loss_spec": {
41 |           "name": "SmoothL1Loss"
42 |         },
43 |         "optim_spec": {
44 |           "name": "Adam",
45 |           "lr": 1e-4,
46 |         },
47 |         "lr_scheduler_spec": null,
48 |         "update_type": "replace",
49 |         "update_frequency": 1000,
50 |         "gpu": true
51 |       }
52 |     }],
53 |     "env": [{
54 |       "name": "PongNoFrameskip-v4",
55 |       "frame_op": "concat",
56 |       "frame_op_len": 4,
57 |       "reward_scale": "sign",
58 |       "num_envs": 16,
59 |       "max_t": null,
60 |       "max_frame": 4e6
61 |     }],
62 |     "body": {
63 |       "product": "outer",
64 |       "num": 1
65 |     },
66 |     "meta": {
67 |       "distributed": false,
68 |       "eval_frequency": 10000,
69 |       "log_frequency": 10000,
70 |       "max_session": 4,
71 |       "max_trial": 1
72 |     }
73 |   }
74 | }
75 | 


--------------------------------------------------------------------------------
/slm_lab/spec/benchmark/dqn/ddqn_qbert.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "ddqn_qbert": {
 3 |     "agent": [{
 4 |       "name": "DoubleDQN",
 5 |       "algorithm": {
 6 |         "name": "DoubleDQN",
 7 |         "action_pdtype": "Argmax",
 8 |         "action_policy": "epsilon_greedy",
 9 |         "explore_var_spec": {
10 |           "name": "linear_decay",
11 |           "start_val": 1.0,
12 |           "end_val": 0.01,
13 |           "start_step": 10000,
14 |           "end_step": 1000000
15 |         },
16 |         "gamma": 0.99,
17 |         "training_batch_iter": 1,
18 |         "training_iter": 4,
19 |         "training_frequency": 4,
20 |         "training_start_step": 10000
21 |       },
22 |       "memory": {
23 |         "name": "Replay",
24 |         "batch_size": 32,
25 |         "max_size": 200000,
26 |         "use_cer": false,
27 |       },
28 |       "net": {
29 |         "type": "ConvNet",
30 |         "conv_hid_layers": [
31 |           [32, 8, 4, 0, 1],
32 |           [64, 4, 2, 0, 1],
33 |           [64, 3, 1, 0, 1]
34 |         ],
35 |         "fc_hid_layers": [256],
36 |         "hid_layers_activation": "relu",
37 |         "init_fn": null,
38 |         "batch_norm": false,
39 |         "clip_grad_val": 10.0,
40 |         "loss_spec": {
41 |           "name": "SmoothL1Loss"
42 |         },
43 |         "optim_spec": {
44 |           "name": "Adam",
45 |           "lr": 1e-4,
46 |         },
47 |         "lr_scheduler_spec": null,
48 |         "update_type": "replace",
49 |         "update_frequency": 1000,
50 |         "gpu": true
51 |       }
52 |     }],
53 |     "env": [{
54 |       "name": "QbertNoFrameskip-v4",
55 |       "frame_op": "concat",
56 |       "frame_op_len": 4,
57 |       "reward_scale": "sign",
58 |       "num_envs": 16,
59 |       "max_t": null,
60 |       "max_frame": 4e6
61 |     }],
62 |     "body": {
63 |       "product": "outer",
64 |       "num": 1
65 |     },
66 |     "meta": {
67 |       "distributed": false,
68 |       "eval_frequency": 10000,
69 |       "log_frequency": 10000,
70 |       "rigorous_eval": 0,
71 |       "max_session": 4,
72 |       "max_trial": 1
73 |     }
74 |   }
75 | }
76 | 


--------------------------------------------------------------------------------
/slm_lab/spec/benchmark/dqn/ddqn_videopinball.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "ddqn_videopinball": {
 3 |     "agent": [{
 4 |       "name": "DoubleDQN",
 5 |       "algorithm": {
 6 |         "name": "DoubleDQN",
 7 |         "action_pdtype": "Argmax",
 8 |         "action_policy": "epsilon_greedy",
 9 |         "explore_var_spec": {
10 |           "name": "linear_decay",
11 |           "start_val": 1.0,
12 |           "end_val": 0.01,
13 |           "start_step": 1000,
14 |           "end_step": 100000
15 |         },
16 |         "gamma": 0.99,
17 |         "training_batch_iter": 1,
18 |         "training_iter": 4,
19 |         "training_frequency": 4,
20 |         "training_start_step": 100
21 |       },
22 |       "memory": {
23 |         "name": "PrioritizedReplay",
24 |         "alpha": 0.6,
25 |         "epsilon": 0.0001,
26 |         "batch_size": 64,
27 |         "max_size": 200000,
28 |         "use_cer": false
29 |       },
30 |       "net": {
31 |         "type": "ConvNet",
32 |         "conv_hid_layers": [
33 |           [32, 8, 4, 0, 1],
34 |           [64, 4, 2, 0, 1],
35 |           [32, 3, 1, 0, 1]
36 |         ],
37 |         "fc_hid_layers": [512],
38 |         "hid_layers_activation": "relu",
39 |         "init_fn": null,
40 |         "shared": true,
41 |         "batch_norm": false,
42 |         "clip_grad_val": 10.0,
43 |         "loss_spec": {
44 |           "name": "SmoothL1Loss"
45 |         },
46 |         "optim_spec": {
47 |           "name": "Adam",
48 |           "lr": 2.5e-5
49 |         },
50 |         "lr_scheduler_spec": null,
51 |         "update_frequency": 100,
52 |         "gpu": true
53 |       }
54 |     }],
55 |     "env": [{
56 |       "name": "VideoPinball-v0",
57 |       "frame_op": "concat",
58 |       "frame_op_len": 4,
59 |       "reward_scale": "sign",
60 |       "num_envs": 16,
61 |       "max_t": null,
62 |       "max_frame": 850000
63 |     }],
64 |     "body": {
65 |       "product": "outer",
66 |       "num": 1
67 |     },
68 |     "meta": {
69 |       "distributed": false,
70 |       "eval_frequency": 10000,
71 |       "log_frequency": 10000,
72 |       "rigorous_eval": 0,
73 |       "max_trial": 1,
74 |       "max_session": 1
75 |     }
76 |   }
77 | }
78 | 


--------------------------------------------------------------------------------
/slm_lab/spec/benchmark/dqn/dqn_lunar.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "dqn_concat_lunar": {
 3 |     "agent": [{
 4 |       "name": "DQN",
 5 |       "algorithm": {
 6 |         "name": "DQN",
 7 |         "action_pdtype": "Argmax",
 8 |         "action_policy": "epsilon_greedy",
 9 |         "explore_var_spec": {
10 |           "name": "linear_decay",
11 |           "start_val": 1.0,
12 |           "end_val": 0.01,
13 |           "start_step": 0,
14 |           "end_step": 50000
15 |         },
16 |         "gamma": 0.99,
17 |         "training_batch_iter": 1,
18 |         "training_iter": 1,
19 |         "training_frequency": 1,
20 |         "training_start_step": 32
21 |       },
22 |       "memory": {
23 |         "name": "Replay",
24 |         "batch_size": 32,
25 |         "max_size": 50000,
26 |         "use_cer": false
27 |       },
28 |       "net": {
29 |         "type": "MLPNet",
30 |         "hid_layers": [256, 128],
31 |         "hid_layers_activation": "relu",
32 |         "clip_grad_val": 10.0,
33 |         "loss_spec": {
34 |           "name": "SmoothL1Loss"
35 |         },
36 |         "optim_spec": {
37 |           "name": "Adam",
38 |           "lr": 1e-3
39 |         },
40 |         "lr_scheduler_spec": null,
41 |         "update_type": "replace",
42 |         "update_frequency": 100,
43 |         "gpu": false
44 |       }
45 |     }],
46 |     "env": [{
47 |       "name": "LunarLander-v2",
48 |       "frame_op": "concat",
49 |       "frame_op_len": 4,
50 |       "max_t": null,
51 |       "max_frame": 300000,
52 |       "normalize_state": false
53 |     }],
54 |     "body": {
55 |       "product": "outer",
56 |       "num": 1
57 |     },
58 |     "meta": {
59 |       "distributed": false,
60 |       "log_frequency": 1000,
61 |       "eval_frequency": 1000,
62 |       "max_session": 4,
63 |       "max_trial": 1,
64 |       "search": "RandomSearch",
65 |     },
66 |   }
67 | }
68 | 


--------------------------------------------------------------------------------
/slm_lab/spec/benchmark/dqn/dqn_per_atari.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "dqn_per_atari": {
 3 |     "agent": [{
 4 |       "name": "DQN",
 5 |       "algorithm": {
 6 |         "name": "DQN",
 7 |         "action_pdtype": "Argmax",
 8 |         "action_policy": "epsilon_greedy",
 9 |         "explore_var_spec": {
10 |           "name": "linear_decay",
11 |           "start_val": 1.0,
12 |           "end_val": 0.01,
13 |           "start_step": 10000,
14 |           "end_step": 1000000
15 |         },
16 |         "gamma": 0.99,
17 |         "training_batch_iter": 1,
18 |         "training_iter": 4,
19 |         "training_frequency": 4,
20 |         "training_start_step": 10000
21 |       },
22 |       "memory": {
23 |         "name": "PrioritizedReplay",
24 |         "alpha": 0.6,
25 |         "epsilon": 0.0001,
26 |         "batch_size": 32,
27 |         "max_size": 200000,
28 |         "use_cer": false
29 |       },
30 |       "net": {
31 |         "type": "ConvNet",
32 |         "conv_hid_layers": [
33 |           [32, 8, 4, 0, 1],
34 |           [64, 4, 2, 0, 1],
35 |           [64, 3, 1, 0, 1]
36 |         ],
37 |         "fc_hid_layers": [256],
38 |         "hid_layers_activation": "relu",
39 |         "init_fn": null,
40 |         "batch_norm": false,
41 |         "clip_grad_val": 10.0,
42 |         "loss_spec": {
43 |           "name": "SmoothL1Loss"
44 |         },
45 |         "optim_spec": {
46 |           "name": "Adam",
47 |           "lr": 2.5e-5,
48 |         },
49 |         "lr_scheduler_spec": null,
50 |         "update_type": "replace",
51 |         "update_frequency": 1000,
52 |         "gpu": true
53 |       }
54 |     }],
55 |     "env": [{
56 |       "name": "${env}",
57 |       "frame_op": "concat",
58 |       "frame_op_len": 4,
59 |       "reward_scale": "sign",
60 |       "num_envs": 16,
61 |       "max_t": null,
62 |       "max_frame": 1e7
63 |     }],
64 |     "body": {
65 |       "product": "outer",
66 |       "num": 1
67 |     },
68 |     "meta": {
69 |       "distributed": false,
70 |       "eval_frequency": 10000,
71 |       "log_frequency": 10000,
72 |       "rigorous_eval": 0,
73 |       "max_session": 4,
74 |       "max_trial": 1
75 |     },
76 |     "spec_params": {
77 |       "env": [
78 |         "BreakoutNoFrameskip-v4", "PongNoFrameskip-v4", "QbertNoFrameskip-v4", "SeaquestNoFrameskip-v4"
79 |       ]
80 |     }
81 |   },
82 | }
83 | 


--------------------------------------------------------------------------------
/slm_lab/spec/benchmark/dqn/dqn_per_pong.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "dqn_per_pong": {
 3 |     "agent": [{
 4 |       "name": "DQN",
 5 |       "algorithm": {
 6 |         "name": "DQN",
 7 |         "action_pdtype": "Argmax",
 8 |         "action_policy": "epsilon_greedy",
 9 |         "explore_var_spec": {
10 |           "name": "linear_decay",
11 |           "start_val": 1.0,
12 |           "end_val": 0.01,
13 |           "start_step": 10000,
14 |           "end_step": 1000000
15 |         },
16 |         "gamma": 0.99,
17 |         "training_batch_iter": 1,
18 |         "training_iter": 4,
19 |         "training_frequency": 4,
20 |         "training_start_step": 10000
21 |       },
22 |       "memory": {
23 |         "name": "PrioritizedReplay",
24 |         "alpha": 0.6,
25 |         "epsilon": 0.0001,
26 |         "batch_size": 32,
27 |         "max_size": 200000,
28 |         "use_cer": false
29 |       },
30 |       "net": {
31 |         "type": "ConvNet",
32 |         "conv_hid_layers": [
33 |           [32, 8, 4, 0, 1],
34 |           [64, 4, 2, 0, 1],
35 |           [64, 3, 1, 0, 1]
36 |         ],
37 |         "fc_hid_layers": [256],
38 |         "hid_layers_activation": "relu",
39 |         "init_fn": null,
40 |         "batch_norm": false,
41 |         "clip_grad_val": 10.0,
42 |         "loss_spec": {
43 |           "name": "SmoothL1Loss"
44 |         },
45 |         "optim_spec": {
46 |           "name": "Adam",
47 |           "lr": 2.5e-5,
48 |         },
49 |         "lr_scheduler_spec": null,
50 |         "update_type": "replace",
51 |         "update_frequency": 1000,
52 |         "gpu": true
53 |       }
54 |     }],
55 |     "env": [{
56 |       "name": "PongNoFrameskip-v4",
57 |       "frame_op": "concat",
58 |       "frame_op_len": 4,
59 |       "reward_scale": "sign",
60 |       "num_envs": 16,
61 |       "max_t": null,
62 |       "max_frame": 4e6
63 |     }],
64 |     "body": {
65 |       "product": "outer",
66 |       "num": 1
67 |     },
68 |     "meta": {
69 |       "distributed": false,
70 |       "eval_frequency": 10000,
71 |       "log_frequency": 10000,
72 |       "max_session": 4,
73 |       "max_trial": 1
74 |     }
75 |   }
76 | }
77 | 


--------------------------------------------------------------------------------
/slm_lab/spec/benchmark/dqn/dqn_per_qbert.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "dqn_per_qbert": {
 3 |     "agent": [{
 4 |       "name": "DQN",
 5 |       "algorithm": {
 6 |         "name": "DQN",
 7 |         "action_pdtype": "Argmax",
 8 |         "action_policy": "epsilon_greedy",
 9 |         "explore_var_spec": {
10 |           "name": "linear_decay",
11 |           "start_val": 1.0,
12 |           "end_val": 0.01,
13 |           "start_step": 10000,
14 |           "end_step": 1000000
15 |         },
16 |         "gamma": 0.99,
17 |         "training_batch_iter": 1,
18 |         "training_iter": 4,
19 |         "training_frequency": 4,
20 |         "training_start_step": 10000
21 |       },
22 |       "memory": {
23 |         "name": "PrioritizedReplay",
24 |         "alpha": 0.6,
25 |         "epsilon": 0.0001,
26 |         "batch_size": 32,
27 |         "max_size": 200000,
28 |         "use_cer": false
29 |       },
30 |       "net": {
31 |         "type": "ConvNet",
32 |         "conv_hid_layers": [
33 |           [32, 8, 4, 0, 1],
34 |           [64, 4, 2, 0, 1],
35 |           [64, 3, 1, 0, 1]
36 |         ],
37 |         "fc_hid_layers": [256],
38 |         "hid_layers_activation": "relu",
39 |         "init_fn": null,
40 |         "batch_norm": false,
41 |         "clip_grad_val": 10.0,
42 |         "loss_spec": {
43 |           "name": "SmoothL1Loss"
44 |         },
45 |         "optim_spec": {
46 |           "name": "Adam",
47 |           "lr": 2.5e-5,
48 |         },
49 |         "lr_scheduler_spec": null,
50 |         "update_type": "replace",
51 |         "update_frequency": 1000,
52 |         "gpu": true
53 |       }
54 |     }],
55 |     "env": [{
56 |       "name": "QbertNoFrameskip-v4",
57 |       "frame_op": "concat",
58 |       "frame_op_len": 4,
59 |       "reward_scale": "sign",
60 |       "num_envs": 16,
61 |       "max_t": null,
62 |       "max_frame": 4e6
63 |     }],
64 |     "body": {
65 |       "product": "outer",
66 |       "num": 1
67 |     },
68 |     "meta": {
69 |       "distributed": false,
70 |       "eval_frequency": 10000,
71 |       "log_frequency": 10000,
72 |       "rigorous_eval": 0,
73 |       "max_session": 4,
74 |       "max_trial": 1
75 |     }
76 |   }
77 | }
78 | 


--------------------------------------------------------------------------------
/slm_lab/spec/benchmark/dqn/dqn_pong.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "dqn_pong": {
 3 |     "agent": [{
 4 |       "name": "DQN",
 5 |       "algorithm": {
 6 |         "name": "DQN",
 7 |         "action_pdtype": "Argmax",
 8 |         "action_policy": "epsilon_greedy",
 9 |         "explore_var_spec": {
10 |           "name": "linear_decay",
11 |           "start_val": 1.0,
12 |           "end_val": 0.01,
13 |           "start_step": 10000,
14 |           "end_step": 1000000
15 |         },
16 |         "gamma": 0.99,
17 |         "training_batch_iter": 1,
18 |         "training_iter": 4,
19 |         "training_frequency": 4,
20 |         "training_start_step": 10000
21 |       },
22 |       "memory": {
23 |         "name": "Replay",
24 |         "batch_size": 32,
25 |         "max_size": 200000,
26 |         "use_cer": false
27 |       },
28 |       "net": {
29 |         "type": "ConvNet",
30 |         "conv_hid_layers": [
31 |           [32, 8, 4, 0, 1],
32 |           [64, 4, 2, 0, 1],
33 |           [64, 3, 1, 0, 1]
34 |         ],
35 |         "fc_hid_layers": [256],
36 |         "hid_layers_activation": "relu",
37 |         "init_fn": null,
38 |         "batch_norm": false,
39 |         "clip_grad_val": 10.0,
40 |         "loss_spec": {
41 |           "name": "SmoothL1Loss"
42 |         },
43 |         "optim_spec": {
44 |           "name": "Adam",
45 |           "lr": 1e-4,
46 |         },
47 |         "lr_scheduler_spec": null,
48 |         "update_type": "replace",
49 |         "update_frequency": 1000,
50 |         "gpu": true
51 |       }
52 |     }],
53 |     "env": [{
54 |       "name": "PongNoFrameskip-v4",
55 |       "frame_op": "concat",
56 |       "frame_op_len": 4,
57 |       "reward_scale": "sign",
58 |       "num_envs": 16,
59 |       "max_t": null,
60 |       "max_frame": 4e6
61 |     }],
62 |     "body": {
63 |       "product": "outer",
64 |       "num": 1
65 |     },
66 |     "meta": {
67 |       "distributed": false,
68 |       "eval_frequency": 10000,
69 |       "log_frequency": 10000,
70 |       "max_session": 4,
71 |       "max_trial": 1,
72 |     }
73 |   }
74 | }
75 | 


--------------------------------------------------------------------------------
/slm_lab/spec/benchmark/dqn/dqn_qbert.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "dqn_qbert": {
 3 |     "agent": [{
 4 |       "name": "DQN",
 5 |       "algorithm": {
 6 |         "name": "DQN",
 7 |         "action_pdtype": "Argmax",
 8 |         "action_policy": "epsilon_greedy",
 9 |         "explore_var_spec": {
10 |           "name": "linear_decay",
11 |           "start_val": 1.0,
12 |           "end_val": 0.01,
13 |           "start_step": 10000,
14 |           "end_step": 1000000
15 |         },
16 |         "gamma": 0.99,
17 |         "training_batch_iter": 1,
18 |         "training_iter": 4,
19 |         "training_frequency": 4,
20 |         "training_start_step": 10000
21 |       },
22 |       "memory": {
23 |         "name": "Replay",
24 |         "batch_size": 32,
25 |         "max_size": 200000,
26 |         "use_cer": false
27 |       },
28 |       "net": {
29 |         "type": "ConvNet",
30 |         "conv_hid_layers": [
31 |           [32, 8, 4, 0, 1],
32 |           [64, 4, 2, 0, 1],
33 |           [64, 3, 1, 0, 1]
34 |         ],
35 |         "fc_hid_layers": [256],
36 |         "hid_layers_activation": "relu",
37 |         "init_fn": null,
38 |         "batch_norm": false,
39 |         "clip_grad_val": 10.0,
40 |         "loss_spec": {
41 |           "name": "SmoothL1Loss"
42 |         },
43 |         "optim_spec": {
44 |           "name": "Adam",
45 |           "lr": 1e-4,
46 |         },
47 |         "lr_scheduler_spec": null,
48 |         "update_type": "replace",
49 |         "update_frequency": 1000,
50 |         "gpu": true
51 |       }
52 |     }],
53 |     "env": [{
54 |       "name": "QbertNoFrameskip-v4",
55 |       "frame_op": "concat",
56 |       "frame_op_len": 4,
57 |       "reward_scale": "sign",
58 |       "num_envs": 16,
59 |       "max_t": null,
60 |       "max_frame": 4e6
61 |     }],
62 |     "body": {
63 |       "product": "outer",
64 |       "num": 1
65 |     },
66 |     "meta": {
67 |       "distributed": false,
68 |       "eval_frequency": 10000,
69 |       "log_frequency": 10000,
70 |       "rigorous_eval": 0,
71 |       "max_session": 4,
72 |       "max_trial": 1,
73 |     }
74 |   }
75 | }
76 | 


--------------------------------------------------------------------------------
/slm_lab/spec/benchmark/dqn/dqn_videopinball.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "dqn_videopinball": {
 3 |     "agent": [{
 4 |       "name": "VanillaDQN",
 5 |       "algorithm": {
 6 |         "name": "VanillaDQN",
 7 |         "action_pdtype": "Categorical",
 8 |         "action_policy": "boltzmann",
 9 |         "explore_var_spec": {
10 |           "name": "linear_decay",
11 |           "start_val": 5.0,
12 |           "end_val": 0.5,
13 |           "start_step": 1000,
14 |           "end_step": 100000
15 |         },
16 |         "gamma": 0.99,
17 |         "training_batch_iter": 8,
18 |         "training_iter": 4,
19 |         "training_frequency": 4,
20 |         "training_start_step": 32
21 |       },
22 |       "memory": {
23 |         "name": "Replay",
24 |         "batch_size": 64,
25 |         "max_size": 10000,
26 |         "use_cer": false
27 |       },
28 |       "net": {
29 |         "type": "ConvNet",
30 |         "conv_hid_layers": [
31 |           [32, 8, 4, 0, 1],
32 |           [64, 4, 2, 0, 1],
33 |           [32, 3, 1, 0, 1]
34 |         ],
35 |         "fc_hid_layers": [512],
36 |         "hid_layers_activation": "relu",
37 |         "init_fn": "orthogonal_",
38 |         "shared": true,
39 |         "normalize": true,
40 |         "batch_norm": false,
41 |         "clip_grad_val": 0.5,
42 |         "loss_spec": {
43 |           "name": "MSELoss"
44 |         },
45 |         "optim_spec": {
46 |           "name": "Adam",
47 |           "lr": 0.01
48 |         },
49 |         "lr_scheduler_spec": {
50 |           "name": "LinearToZero",
51 |           "frame": 10000
52 |         },
53 |         "gpu": true
54 |       }
55 |     }],
56 |     "env": [{
57 |       "name": "VideoPinball-v0",
58 |       "frame_op": "concat",
59 |       "frame_op_len": 4,
60 |       "reward_scale": "sign",
61 |       "num_envs": 16,
62 |       "max_t": null,
63 |       "max_frame": 850000
64 |     }],
65 |     "body": {
66 |       "product": "outer",
67 |       "num": 1
68 |     },
69 |     "meta": {
70 |       "distributed": false,
71 |       "eval_frequency": 500,
72 |       "max_trial": 1,
73 |       "max_session": 1
74 |     }
75 |   }
76 | }
77 | 


--------------------------------------------------------------------------------
/slm_lab/spec/benchmark/dqn/dueling_ddqn_per_pong.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "dueling_ddqn_per_pong": {
 3 |     "agent": [{
 4 |       "name": "DuelingDoubleDQN",
 5 |       "algorithm": {
 6 |         "name": "DoubleDQN",
 7 |         "action_pdtype": "Argmax",
 8 |         "action_policy": "epsilon_greedy",
 9 |         "explore_var_spec": {
10 |           "name": "linear_decay",
11 |           "start_val": 1.0,
12 |           "end_val": 0.01,
13 |           "start_step": 10000,
14 |           "end_step": 1000000
15 |         },
16 |         "gamma": 0.99,
17 |         "training_batch_iter": 1,
18 |         "training_iter": 4,
19 |         "training_frequency": 4,
20 |         "training_start_step": 10000
21 |       },
22 |       "memory": {
23 |         "name": "PrioritizedReplay",
24 |         "alpha": 0.6,
25 |         "epsilon": 0.0001,
26 |         "batch_size": 32,
27 |         "max_size": 200000,
28 |         "use_cer": false,
29 |       },
30 |       "net": {
31 |         "type": "DuelingConvNet",
32 |         "conv_hid_layers": [
33 |           [32, 8, 4, 0, 1],
34 |           [64, 4, 2, 0, 1],
35 |           [64, 3, 1, 0, 1]
36 |         ],
37 |         "fc_hid_layers": [256],
38 |         "hid_layers_activation": "relu",
39 |         "init_fn": null,
40 |         "batch_norm": false,
41 |         "clip_grad_val": 10.0,
42 |         "loss_spec": {
43 |           "name": "SmoothL1Loss"
44 |         },
45 |         "optim_spec": {
46 |           "name": "Adam",
47 |           "lr": 2.5e-5,
48 |         },
49 |         "lr_scheduler_spec": null,
50 |         "update_type": "replace",
51 |         "update_frequency": 1000,
52 |         "gpu": true
53 |       }
54 |     }],
55 |     "env": [{
56 |       "name": "PongNoFrameskip-v4",
57 |       "frame_op": "concat",
58 |       "frame_op_len": 4,
59 |       "reward_scale": "sign",
60 |       "num_envs": 16,
61 |       "max_t": null,
62 |       "max_frame": 4e6
63 |     }],
64 |     "body": {
65 |       "product": "outer",
66 |       "num": 1
67 |     },
68 |     "meta": {
69 |       "distributed": false,
70 |       "eval_frequency": 10000,
71 |       "log_frequency": 10000,
72 |       "max_session": 4,
73 |       "max_trial": 1
74 |     }
75 |   }
76 | }
77 | 


--------------------------------------------------------------------------------
/slm_lab/spec/benchmark/dqn/dueling_dqn_pong.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "dueling_dqn_pong": {
 3 |     "agent": [{
 4 |       "name": "DuelingDQN",
 5 |       "algorithm": {
 6 |         "name": "DQN",
 7 |         "action_pdtype": "Argmax",
 8 |         "action_policy": "epsilon_greedy",
 9 |         "explore_var_spec": {
10 |           "name": "linear_decay",
11 |           "start_val": 1.0,
12 |           "end_val": 0.01,
13 |           "start_step": 10000,
14 |           "end_step": 1000000
15 |         },
16 |         "gamma": 0.99,
17 |         "training_batch_iter": 1,
18 |         "training_iter": 4,
19 |         "training_frequency": 4,
20 |         "training_start_step": 10000
21 |       },
22 |       "memory": {
23 |         "name": "Replay",
24 |         "batch_size": 32,
25 |         "max_size": 200000,
26 |         "use_cer": false
27 |       },
28 |       "net": {
29 |         "type": "DuelingConvNet",
30 |         "conv_hid_layers": [
31 |           [32, 8, 4, 0, 1],
32 |           [64, 4, 2, 0, 1],
33 |           [64, 3, 1, 0, 1]
34 |         ],
35 |         "fc_hid_layers": [256],
36 |         "hid_layers_activation": "relu",
37 |         "init_fn": null,
38 |         "batch_norm": false,
39 |         "clip_grad_val": 10.0,
40 |         "loss_spec": {
41 |           "name": "SmoothL1Loss"
42 |         },
43 |         "optim_spec": {
44 |           "name": "Adam",
45 |           "lr": 1e-4,
46 |         },
47 |         "lr_scheduler_spec": null,
48 |         "update_type": "replace",
49 |         "update_frequency": 1000,
50 |         "gpu": true
51 |       }
52 |     }],
53 |     "env": [{
54 |       "name": "PongNoFrameskip-v4",
55 |       "frame_op": "concat",
56 |       "frame_op_len": 4,
57 |       "reward_scale": "sign",
58 |       "num_envs": 16,
59 |       "max_t": null,
60 |       "max_frame": 4e6
61 |     }],
62 |     "body": {
63 |       "product": "outer",
64 |       "num": 1
65 |     },
66 |     "meta": {
67 |       "distributed": false,
68 |       "eval_frequency": 10000,
69 |       "log_frequency": 10000,
70 |       "max_session": 4,
71 |       "max_trial": 1,
72 |     }
73 |   }
74 | }
75 | 


--------------------------------------------------------------------------------
/slm_lab/spec/benchmark/ppo/ppo_cartpole.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "ppo_shared_cartpole": {
 3 |     "agent": [{
 4 |       "name": "PPO",
 5 |       "algorithm": {
 6 |         "name": "PPO",
 7 |         "action_pdtype": "default",
 8 |         "action_policy": "default",
 9 |         "explore_var_spec": null,
10 |         "gamma": 0.99,
11 |         "lam": 0.95,
12 |         "clip_eps_spec": {
13 |           "name": "no_decay",
14 |           "start_val": 0.2,
15 |           "end_val": 0.05,
16 |           "start_step": 0,
17 |           "end_step": 10000,
18 |         },
19 |         "entropy_coef_spec": {
20 |           "name": "linear_decay",
21 |           "start_val": 0.01,
22 |           "end_val": 0.001,
23 |           "start_step": 1000,
24 |           "end_step": 5000,
25 |         },
26 |         "val_loss_coef": 0.1,
27 |         "time_horizon": 32,
28 |         "minibatch_size": 16,
29 |         "training_epoch": 4
30 |       },
31 |       "memory": {
32 |         "name": "OnPolicyBatchReplay",
33 |       },
34 |       "net": {
35 |         "type": "MLPNet",
36 |         "shared": true,
37 |         "hid_layers": [64],
38 |         "hid_layers_activation": "relu",
39 |         "clip_grad_val": 0.5,
40 |         "use_same_optim": false,
41 |         "loss_spec": {
42 |           "name": "MSELoss"
43 |         },
44 |         "actor_optim_spec": {
45 |           "name": "Adam",
46 |           "lr": 0.02
47 |         },
48 |         "critic_optim_spec": {
49 |           "name": "Adam",
50 |           "lr": 0.02
51 |         },
52 |         "lr_scheduler_spec": null,
53 |         "gpu": false
54 |       }
55 |     }],
56 |     "env": [{
57 |       "name": "CartPole-v0",
58 |       "max_t": null,
59 |       "max_frame": 30000,
60 |     }],
61 |     "body": {
62 |       "product": "outer",
63 |       "num": 1
64 |     },
65 |     "meta": {
66 |       "distributed": false,
67 |       "eval_frequency": 100,
68 |       "max_session": 4,
69 |       "max_trial": 1
70 |     },
71 |     "search": {
72 |       "agent": [{
73 |         "algorithm": {
74 |           "lam__grid_search": [0.7, 0.8, 0.9, 0.95],
75 |           "training_frequency__grid_search": [16, 32]
76 |         },
77 |         "net": {
78 |           "actor_optim_spec": {
79 |             "lr__grid_search": [5e-2, 1e-2, 5e-3, 1e-3]
80 |           }
81 |         }
82 |       }]
83 |     },
84 |   }
85 | }
86 | 


--------------------------------------------------------------------------------
/slm_lab/spec/benchmark/ppo/ppo_lunar.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "ppo_lunar": {
 3 |     "agent": [{
 4 |       "name": "PPO",
 5 |       "algorithm": {
 6 |         "name": "PPO",
 7 |         "action_pdtype": "default",
 8 |         "action_policy": "default",
 9 |         "explore_var_spec": null,
10 |         "gamma": 0.99,
11 |         "lam": 0.95,
12 |         "clip_eps_spec": {
13 |           "name": "no_decay",
14 |           "start_val": 0.20,
15 |           "end_val": 0.0,
16 |           "start_step": 10000,
17 |           "end_step": 300000
18 |         },
19 |         "entropy_coef_spec": {
20 |           "name": "no_decay",
21 |           "start_val": 0.01,
22 |           "end_val": 0.01,
23 |           "start_step": 0,
24 |           "end_step": 0
25 |         },
26 |         "val_loss_coef": 1.0,
27 |         "time_horizon": 128,
28 |         "minibatch_size": 256,
29 |         "training_epoch": 10
30 |       },
31 |       "memory": {
32 |         "name": "OnPolicyBatchReplay",
33 |       },
34 |       "net": {
35 |         "type": "MLPNet",
36 |         "shared": false,
37 |         "hid_layers": [64, 64, 32],
38 |         "hid_layers_activation": "relu",
39 |         "init_fn": "orthogonal_",
40 |         "batch_norm": false,
41 |         "clip_grad_val": 0.5,
42 |         "use_same_optim": true,
43 |         "loss_spec": {
44 |           "name": "MSELoss"
45 |         },
46 |         "actor_optim_spec": {
47 |           "name": "Adam",
48 |           "lr": 5e-4,
49 |         },
50 |         "critic_optim_spec": {
51 |           "name": "Adam",
52 |           "lr": 5e-4,
53 |         },
54 |         "lr_scheduler_spec": null,
55 |         "gpu": false
56 |       }
57 |     }],
58 |     "env": [{
59 |       "name": "LunarLander-v2",
60 |       "frame_op": "concat",
61 |       "frame_op_len": 4,
62 |       "max_t": null,
63 |       "max_frame": 300000,
64 |       "num_envs": 8,
65 |       "normalize_state": false
66 |     }],
67 |     "body": {
68 |       "product": "outer",
69 |       "num": 1
70 |     },
71 |     "meta": {
72 |       "distributed": false,
73 |       "log_frequency": 1000,
74 |       "eval_frequency": 1000,
75 |       "max_session": 4,
76 |       "max_trial": 1
77 |     }
78 |   },
79 | }
80 | 


--------------------------------------------------------------------------------
/slm_lab/spec/benchmark/ppo/ppo_pong.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "ppo_pong": {
 3 |     "agent": [{
 4 |       "name": "PPO",
 5 |       "algorithm": {
 6 |         "name": "PPO",
 7 |         "action_pdtype": "default",
 8 |         "action_policy": "default",
 9 |         "explore_var_spec": null,
10 |         "gamma": 0.99,
11 |         "lam": 0.70,
12 |         "clip_eps_spec": {
13 |           "name": "no_decay",
14 |           "start_val": 0.10,
15 |           "end_val": 0.10,
16 |           "start_step": 0,
17 |           "end_step": 0
18 |         },
19 |         "entropy_coef_spec": {
20 |           "name": "no_decay",
21 |           "start_val": 0.01,
22 |           "end_val": 0.01,
23 |           "start_step": 0,
24 |           "end_step": 0
25 |         },
26 |         "val_loss_coef": 0.5,
27 |         "time_horizon": 128,
28 |         "minibatch_size": 256,
29 |         "training_epoch": 4
30 |       },
31 |       "memory": {
32 |         "name": "OnPolicyBatchReplay",
33 |       },
34 |       "net": {
35 |         "type": "ConvNet",
36 |         "shared": true,
37 |         "conv_hid_layers": [
38 |           [32, 8, 4, 0, 1],
39 |           [64, 4, 2, 0, 1],
40 |           [32, 3, 1, 0, 1]
41 |         ],
42 |         "fc_hid_layers": [512],
43 |         "hid_layers_activation": "relu",
44 |         "init_fn": "orthogonal_",
45 |         "normalize": true,
46 |         "batch_norm": false,
47 |         "clip_grad_val": 0.5,
48 |         "use_same_optim": false,
49 |         "loss_spec": {
50 |           "name": "MSELoss"
51 |         },
52 |         "actor_optim_spec": {
53 |           "name": "Adam",
54 |           "lr": 2.5e-4,
55 |         },
56 |         "critic_optim_spec": {
57 |           "name": "Adam",
58 |           "lr": 2.5e-4,
59 |         },
60 |         "lr_scheduler_spec": {
61 |           "name": "LinearToZero",
62 |           "frame": 1e7
63 |         },
64 |         "gpu": true
65 |       }
66 |     }],
67 |     "env": [{
68 |       "name": "PongNoFrameskip-v4",
69 |       "frame_op": "concat",
70 |       "frame_op_len": 4,
71 |       "reward_scale": "sign",
72 |       "num_envs": 16,
73 |       "max_t": null,
74 |       "max_frame": 1e7
75 |     }],
76 |     "body": {
77 |       "product": "outer",
78 |       "num": 1
79 |     },
80 |     "meta": {
81 |       "distributed": false,
82 |       "log_frequency": 10000,
83 |       "eval_frequency": 10000,
84 |       "max_session": 4,
85 |       "max_trial": 1,
86 |     }
87 |   }
88 | }
89 | 


--------------------------------------------------------------------------------
/slm_lab/spec/benchmark/ppo/ppo_qbert.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "ppo_qbert": {
 3 |     "agent": [{
 4 |       "name": "PPO",
 5 |       "algorithm": {
 6 |         "name": "PPO",
 7 |         "action_pdtype": "default",
 8 |         "action_policy": "default",
 9 |         "explore_var_spec": null,
10 |         "gamma": 0.99,
11 |         "lam": 0.70,
12 |         "clip_eps_spec": {
13 |           "name": "no_decay",
14 |           "start_val": 0.10,
15 |           "end_val": 0.10,
16 |           "start_step": 0,
17 |           "end_step": 0
18 |         },
19 |         "entropy_coef_spec": {
20 |           "name": "no_decay",
21 |           "start_val": 0.01,
22 |           "end_val": 0.01,
23 |           "start_step": 0,
24 |           "end_step": 0
25 |         },
26 |         "val_loss_coef": 0.5,
27 |         "time_horizon": 128,
28 |         "minibatch_size": 256,
29 |         "training_epoch": 4
30 |       },
31 |       "memory": {
32 |         "name": "OnPolicyBatchReplay",
33 |       },
34 |       "net": {
35 |         "type": "ConvNet",
36 |         "shared": true,
37 |         "conv_hid_layers": [
38 |           [32, 8, 4, 0, 1],
39 |           [64, 4, 2, 0, 1],
40 |           [32, 3, 1, 0, 1]
41 |         ],
42 |         "fc_hid_layers": [512],
43 |         "hid_layers_activation": "relu",
44 |         "init_fn": "orthogonal_",
45 |         "normalize": true,
46 |         "batch_norm": false,
47 |         "clip_grad_val": 0.5,
48 |         "use_same_optim": false,
49 |         "loss_spec": {
50 |           "name": "MSELoss"
51 |         },
52 |         "actor_optim_spec": {
53 |           "name": "Adam",
54 |           "lr": 2.5e-4,
55 |         },
56 |         "critic_optim_spec": {
57 |           "name": "Adam",
58 |           "lr": 2.5e-4,
59 |         },
60 |         "lr_scheduler_spec": {
61 |           "name": "LinearToZero",
62 |           "frame": 1e7
63 |         },
64 |         "gpu": true
65 |       }
66 |     }],
67 |     "env": [{
68 |       "name": "QbertNoFrameskip-v4",
69 |       "frame_op": "concat",
70 |       "frame_op_len": 4,
71 |       "reward_scale": "sign",
72 |       "num_envs": 16,
73 |       "max_t": null,
74 |       "max_frame": 1e7
75 |     }],
76 |     "body": {
77 |       "product": "outer",
78 |       "num": 1
79 |     },
80 |     "meta": {
81 |       "distributed": false,
82 |       "log_frequency": 10000,
83 |       "eval_frequency": 10000,
84 |       "rigorous_eval": 0,
85 |       "max_session": 4,
86 |       "max_trial": 1,
87 |     }
88 |   }
89 | }
90 | 


--------------------------------------------------------------------------------
/slm_lab/spec/benchmark/ppo/ppo_videopinball.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "ppo_videopinball": {
 3 |     "agent": [{
 4 |       "name": "PPO",
 5 |       "algorithm": {
 6 |         "name": "PPO",
 7 |         "action_pdtype": "default",
 8 |         "action_policy": "default",
 9 |         "explore_var_spec": null,
10 |         "gamma": 0.99,
11 |         "lam": 0.70,
12 |         "entropy_coef_spec": {
13 |           "name": "no_decay",
14 |           "start_val": 0.01,
15 |           "end_val": 0.01,
16 |           "start_step": 0,
17 |           "end_step": 0
18 |         },
19 |         "clip_eps_spec": {
20 |           "name": "no_decay",
21 |           "start_val": 0.1,
22 |           "end_val": 0.1,
23 |           "start_step": 0,
24 |           "end_step": 0
25 |         },
26 |         "val_loss_coef": 0.5,
27 |         "time_horizon": 128,
28 |         "minibatch_size": 256,
29 |         "training_epoch": 4
30 |       },
31 |       "memory": {
32 |         "name": "OnPolicyBatchReplay"
33 |       },
34 |       "net": {
35 |         "type": "ConvNet",
36 |         "conv_hid_layers": [
37 |           [32, 8, 4, 0, 1],
38 |           [64, 4, 2, 0, 1],
39 |           [32, 3, 1, 0, 1]
40 |         ],
41 |         "fc_hid_layers": [512],
42 |         "hid_layers_activation": "relu",
43 |         "init_fn": "orthogonal_",
44 |         "shared": true,
45 |         "batch_norm": false,
46 |         "normalize": true,
47 |         "clip_grad_val": 0.5,
48 |         "use_same_optim": false,
49 |         "loss_spec": {
50 |           "name": "MSELoss"
51 |         },
52 |         "actor_optim_spec": {
53 |           "name": "Adam",
54 |           "lr": 2.5e-4
55 |         },
56 |         "critic_optim_spec": {
57 |           "name": "Adam",
58 |           "lr": 2.5e-4
59 |         },
60 |         "lr_scheduler_spec": {
61 |           "name": "LinearToZero",
62 |           "frame": 1e7
63 |         },
64 |         "gpu": true
65 |       }
66 |     }],
67 |     "env": [{
68 |       "name": "VideoPinball-v0",
69 |       "frame_op": "concat",
70 |       "frame_op_len": 4,
71 |       "reward_scale": "sign",
72 |       "num_envs": 16,
73 |       "max_t": null,
74 |       "max_frame": 850000
75 |     }],
76 |     "body": {
77 |       "product": "outer",
78 |       "num": 1
79 |     },
80 |     "meta": {
81 |       "distributed": false,
82 |       "eval_frequency": 10000,
83 |       "log_frequency": 10000,
84 |       "rigorous_eval": 0,
85 |       "max_trial": 1,
86 |       "max_session": 1
87 |     }
88 |   }
89 | }
90 | 


--------------------------------------------------------------------------------
/slm_lab/spec/benchmark/reinforce/reinforce_cartpole.json:
--------------------------------------------------------------------------------
  1 | {
  2 |   "reinforce_cartpole": {
  3 |     "agent": [{
  4 |       "name": "Reinforce",
  5 |       "algorithm": {
  6 |         "name": "Reinforce",
  7 |         "action_pdtype": "default",
  8 |         "action_policy": "default",
  9 |         "center_return": true,
 10 |         "explore_var_spec": null,
 11 |         "gamma": 0.99,
 12 |         "entropy_coef_spec": {
 13 |           "name": "linear_decay",
 14 |           "start_val": 0.01,
 15 |           "end_val": 0.001,
 16 |           "start_step": 0,
 17 |           "end_step": 20000,
 18 |         },
 19 |         "training_frequency": 1
 20 |       },
 21 |       "memory": {
 22 |         "name": "OnPolicyReplay"
 23 |       },
 24 |       "net": {
 25 |         "type": "MLPNet",
 26 |         "hid_layers": [64],
 27 |         "hid_layers_activation": "selu",
 28 |         "clip_grad_val": null,
 29 |         "loss_spec": {
 30 |           "name": "MSELoss"
 31 |         },
 32 |         "optim_spec": {
 33 |           "name": "Adam",
 34 |           "lr": 0.002
 35 |         },
 36 |         "lr_scheduler_spec": null
 37 |       }
 38 |     }],
 39 |     "env": [{
 40 |       "name": "CartPole-v0",
 41 |       "max_t": null,
 42 |       "max_frame": 100000,
 43 |     }],
 44 |     "body": {
 45 |       "product": "outer",
 46 |       "num": 1
 47 |     },
 48 |     "meta": {
 49 |       "distributed": false,
 50 |       "eval_frequency": 2000,
 51 |       "max_session": 4,
 52 |       "max_trial": 1,
 53 |     },
 54 |     "search": {
 55 |       "agent": [{
 56 |         "algorithm": {
 57 |           "gamma__grid_search": [0.1, 0.5, 0.7, 0.8, 0.90, 0.99, 0.999]
 58 |         }
 59 |       }]
 60 |     }
 61 |   },
 62 |   "reinforce_baseline_cartpole": {
 63 |     "agent": [{
 64 |       "name": "Reinforce",
 65 |       "algorithm": {
 66 |         "name": "Reinforce",
 67 |         "action_pdtype": "default",
 68 |         "action_policy": "default",
 69 |         "center_return": true,
 70 |         "explore_var_spec": null,
 71 |         "gamma": 0.99,
 72 |         "entropy_coef_spec": {
 73 |           "name": "linear_decay",
 74 |           "start_val": 0.01,
 75 |           "end_val": 0.001,
 76 |           "start_step": 0,
 77 |           "end_step": 20000,
 78 |         },
 79 |         "training_frequency": 1
 80 |       },
 81 |       "memory": {
 82 |         "name": "OnPolicyReplay"
 83 |       },
 84 |       "net": {
 85 |         "type": "MLPNet",
 86 |         "hid_layers": [64],
 87 |         "hid_layers_activation": "selu",
 88 |         "clip_grad_val": null,
 89 |         "loss_spec": {
 90 |           "name": "MSELoss"
 91 |         },
 92 |         "optim_spec": {
 93 |           "name": "Adam",
 94 |           "lr": 0.002
 95 |         },
 96 |         "lr_scheduler_spec": null
 97 |       }
 98 |     }],
 99 |     "env": [{
100 |       "name": "CartPole-v0",
101 |       "max_t": null,
102 |       "max_frame": 100000,
103 |     }],
104 |     "body": {
105 |       "product": "outer",
106 |       "num": 1
107 |     },
108 |     "meta": {
109 |       "distributed": false,
110 |       "eval_frequency": 2000,
111 |       "max_session": 4,
112 |       "max_trial": 1,
113 |     },
114 |     "search": {
115 |       "agent": [{
116 |         "algorithm": {
117 |           "center_return__grid_search": [true, false]
118 |         }
119 |       }]
120 |     }
121 |   }
122 | }
123 | 


--------------------------------------------------------------------------------
/slm_lab/spec/benchmark/reinforce/reinforce_videopinball.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "reinforce_videopinball": {
 3 |     "agent": [{
 4 |       "name": "Reinforce",
 5 |       "algorithm": {
 6 |         "name": "Reinforce",
 7 |         "action_pdtype": "default",
 8 |         "action_policy": "default",
 9 |         "center_return": true,
10 |         "explore_var_spec": null,
11 |         "gamma": 0.99,
12 |         "entropy_coef_spec": {
13 |           "name": "linear_decay",
14 |           "start_val": 0.01,
15 |           "end_val": 0.001,
16 |           "start_step": 1000,
17 |           "end_step": 100000
18 |         },
19 |         "training_frequency": 1
20 |       },
21 |       "memory": {
22 |         "name": "OnPolicyReplay",
23 |         "batch_size": 64
24 |       },
25 |       "net": {
26 |         "type": "ConvNet",
27 |         "shared": true,
28 |         "conv_hid_layers": [
29 |           [32, 8, 4, 0, 1],
30 |           [64, 4, 2, 0, 1],
31 |           [32, 3, 1, 0, 1]
32 |         ],
33 |         "fc_hid_layers": [512],
34 |         "hid_layers_activation": "relu",
35 |         "init_fn": "orthogonal_",
36 |         "normalize": true,
37 |         "batch_norm": false,
38 |         "clip_grad_val": 0.5,
39 |         "loss_spec": {
40 |           "name": "MSELoss"
41 |         },
42 |         "optim_spec": {
43 |           "name": "Adam",
44 |           "lr": 0.002
45 |         },
46 |         "lr_scheduler_spec": null,
47 |         "gpu": true
48 |       }
49 |     }],
50 |     "env": [{
51 |       "name": "VideoPinball-v0",
52 |       "frame_op": "concat",
53 |       "frame_op_len": 4,
54 |       "reward_scale": "sign",
55 |       "num_envs": 16,
56 |       "max_t": null,
57 |       "max_frame": 850000
58 |     }],
59 |     "body": {
60 |       "product": "outer",
61 |       "num": 1
62 |     },
63 |     "meta": {
64 |       "distributed": false,
65 |       "eval_frequency": 2000,
66 |       "max_session": 1,
67 |       "max_trial": 1
68 |     }
69 |   }
70 | }
71 | 


--------------------------------------------------------------------------------
/slm_lab/spec/benchmark/sac/sac_halfcheetah.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "sac_halfcheetah": {
 3 |     "agent": [{
 4 |       "name": "SoftActorCritic",
 5 |       "algorithm": {
 6 |         "name": "SoftActorCritic",
 7 |         "action_pdtype": "default",
 8 |         "action_policy": "default",
 9 |         "gamma": 0.99,
10 |         "training_frequency": 1
11 |       },
12 |       "memory": {
13 |         "name": "Replay",
14 |         "batch_size": 256,
15 |         "max_size": 1000000,
16 |         "use_cer": true
17 |       },
18 |       "net": {
19 |         "type": "MLPNet",
20 |         "hid_layers": [256, 256],
21 |         "hid_layers_activation": "relu",
22 |         "init_fn": "orthogonal_",
23 |         "clip_grad_val": 0.5,
24 |         "loss_spec": {
25 |           "name": "MSELoss"
26 |         },
27 |         "optim_spec": {
28 |           "name": "Adam",
29 |           "lr": 3e-4,
30 |         },
31 |         "lr_scheduler_spec": null,
32 |         "update_type": "polyak",
33 |         "update_frequency": 1,
34 |         "polyak_coef": 0.005,
35 |         "gpu": false
36 |       }
37 |     }],
38 |     "env": [{
39 |       "name": "RoboschoolHalfCheetah-v1",
40 |       "num_envs": 8,
41 |       "max_t": null,
42 |       "max_frame": 2e6
43 |     }],
44 |     "body": {
45 |       "product": "outer",
46 |       "num": 1
47 |     },
48 |     "meta": {
49 |       "distributed": false,
50 |       "log_frequency": 1000,
51 |       "eval_frequency": 1000,
52 |       "rigorous_eval": 0,
53 |       "max_session": 4,
54 |       "max_trial": 1,
55 |     }
56 |   }
57 | }
58 | 


--------------------------------------------------------------------------------
/slm_lab/spec/benchmark/sac/sac_lunar.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "sac_lunar": {
 3 |     "agent": [{
 4 |       "name": "SoftActorCritic",
 5 |       "algorithm": {
 6 |         "name": "SoftActorCritic",
 7 |         "action_pdtype": "GumbelSoftmax",
 8 |         "action_policy": "default",
 9 |         "gamma": 0.99,
10 |         "training_frequency": 1
11 |       },
12 |       "memory": {
13 |         "name": "Replay",
14 |         "batch_size": 256,
15 |         "max_size": 100000,
16 |         "use_cer": true
17 |       },
18 |       "net": {
19 |         "type": "MLPNet",
20 |         "hid_layers": [64, 64, 32],
21 |         "hid_layers_activation": "relu",
22 |         "init_fn": "orthogonal_",
23 |         "clip_grad_val": 0.5,
24 |         "loss_spec": {
25 |           "name": "MSELoss"
26 |         },
27 |         "optim_spec": {
28 |           "name": "Adam",
29 |           "lr": 0.005
30 |         },
31 |         "lr_scheduler_spec": null,
32 |         "update_type": "polyak",
33 |         "update_frequency": 1,
34 |         "polyak_coef": 0.005,
35 |         "gpu": false
36 |       }
37 |     }],
38 |     "env": [{
39 |       "name": "LunarLander-v2",
40 |       "frame_op": "concat",
41 |       "frame_op_len": 4,
42 |       "max_t": null,
43 |       "max_frame": 300000,
44 |       "num_envs": 8,
45 |       "normalize_state": false
46 |     }],
47 |     "body": {
48 |       "product": "outer",
49 |       "num": 1
50 |     },
51 |     "meta": {
52 |       "distributed": false,
53 |       "log_frequency": 500,
54 |       "eval_frequency": 500,
55 |       "rigorous_eval": 0,
56 |       "max_session": 4,
57 |       "max_trial": 1,
58 |     }
59 |   }
60 | }
61 | 


--------------------------------------------------------------------------------
/slm_lab/spec/benchmark/sac/sac_per_halfcheetah.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "sac_per_halfcheetah": {
 3 |     "agent": [{
 4 |       "name": "SoftActorCritic",
 5 |       "algorithm": {
 6 |         "name": "SoftActorCritic",
 7 |         "action_pdtype": "default",
 8 |         "action_policy": "default",
 9 |         "gamma": 0.99,
10 |         "training_frequency": 1
11 |       },
12 |       "memory": {
13 |         "name": "PrioritizedReplay",
14 |         "alpha": 0.6,
15 |         "epsilon": 0.0001,
16 |         "batch_size": 256,
17 |         "max_size": 200000,
18 |         "use_cer": true
19 |       },
20 |       "net": {
21 |         "type": "MLPNet",
22 |         "hid_layers": [256, 256],
23 |         "hid_layers_activation": "relu",
24 |         "init_fn": "orthogonal_",
25 |         "clip_grad_val": 0.5,
26 |         "loss_spec": {
27 |           "name": "MSELoss"
28 |         },
29 |         "optim_spec": {
30 |           "name": "Adam",
31 |           "lr": 7e-4
32 |         },
33 |         "lr_scheduler_spec": null,
34 |         "update_type": "polyak",
35 |         "update_frequency": 1,
36 |         "polyak_coef": 0.005,
37 |         "gpu": false
38 |       }
39 |     }],
40 |     "env": [{
41 |       "name": "RoboschoolHalfCheetah-v1",
42 |       "num_envs": 8,
43 |       "max_t": null,
44 |       "max_frame": 2e6
45 |     }],
46 |     "body": {
47 |       "product": "outer",
48 |       "num": 1
49 |     },
50 |     "meta": {
51 |       "distributed": false,
52 |       "log_frequency": 1000,
53 |       "eval_frequency": 1000,
54 |       "rigorous_eval": 0,
55 |       "max_session": 4,
56 |       "max_trial": 1
57 |     }
58 |   }
59 | }
60 | 


--------------------------------------------------------------------------------
/slm_lab/spec/benchmark/sac/sac_per_halfcheetah_pybullet.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "sac_per_halfcheetah": {
 3 |     "agent": [{
 4 |       "name": "SoftActorCritic",
 5 |       "algorithm": {
 6 |         "name": "SoftActorCritic",
 7 |         "action_pdtype": "default",
 8 |         "action_policy": "default",
 9 |         "gamma": 0.99,
10 |         "training_frequency": 1
11 |       },
12 |       "memory": {
13 |         "name": "PrioritizedReplay",
14 |         "alpha": 0.6,
15 |         "epsilon": 0.0001,
16 |         "batch_size": 256,
17 |         "max_size": 200000,
18 |         "use_cer": true
19 |       },
20 |       "net": {
21 |         "type": "MLPNet",
22 |         "hid_layers": [256, 256],
23 |         "hid_layers_activation": "relu",
24 |         "init_fn": "orthogonal_",
25 |         "clip_grad_val": 0.5,
26 |         "loss_spec": {
27 |           "name": "MSELoss"
28 |         },
29 |         "optim_spec": {
30 |           "name": "Adam",
31 |           "lr": 7e-4
32 |         },
33 |         "lr_scheduler_spec": null,
34 |         "update_type": "polyak",
35 |         "update_frequency": 1,
36 |         "polyak_coef": 0.005,
37 |         "gpu": true
38 |       }
39 |     }],
40 |     "env": [{
41 |       "name": "HalfCheetahBulletEnv-v0",
42 |       "num_envs": 8,
43 |       "max_t": null,
44 |       "max_frame": 2e6
45 |     }],
46 |     "body": {
47 |       "product": "outer",
48 |       "num": 1
49 |     },
50 |     "meta": {
51 |       "distributed": false,
52 |       "log_frequency": 1000,
53 |       "eval_frequency": 1000,
54 |       "rigorous_eval": 0,
55 |       "max_session": 4,
56 |       "max_trial": 1
57 |     }
58 |   }
59 | }
60 | 


--------------------------------------------------------------------------------
/slm_lab/spec/benchmark/sac/sac_per_roboschool.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "sac_per_roboschool": {
 3 |     "agent": [{
 4 |       "name": "SoftActorCritic",
 5 |       "algorithm": {
 6 |         "name": "SoftActorCritic",
 7 |         "action_pdtype": "default",
 8 |         "action_policy": "default",
 9 |         "gamma": 0.99,
10 |         "training_frequency": 1
11 |       },
12 |       "memory": {
13 |         "name": "PrioritizedReplay",
14 |         "alpha": 0.6,
15 |         "epsilon": 0.0001,
16 |         "batch_size": 256,
17 |         "max_size": 1000000,
18 |         "use_cer": true
19 |       },
20 |       "net": {
21 |         "type": "MLPNet",
22 |         "hid_layers": [256, 256],
23 |         "hid_layers_activation": "relu",
24 |         "init_fn": "orthogonal_",
25 |         "clip_grad_val": 1.0,
26 |         "loss_spec": {
27 |           "name": "MSELoss"
28 |         },
29 |         "optim_spec": {
30 |           "name": "Adam",
31 |           "lr": 1e-3,
32 |         },
33 |         "lr_scheduler_spec": null,
34 |         "update_type": "polyak",
35 |         "update_frequency": 1,
36 |         "polyak_coef": 0.005,
37 |         "gpu": false
38 |       }
39 |     }],
40 |     "env": [{
41 |       "name": "${env}",
42 |       "num_envs": 8,
43 |       "max_t": null,
44 |       "max_frame": 2e6
45 |     }],
46 |     "body": {
47 |       "product": "outer",
48 |       "num": 1
49 |     },
50 |     "meta": {
51 |       "distributed": false,
52 |       "log_frequency": 1000,
53 |       "eval_frequency": 1000,
54 |       "rigorous_eval": 0,
55 |       "max_session": 4,
56 |       "max_trial": 1
57 |     },
58 |     "spec_params": {
59 |       "env": [
60 |         "RoboschoolAnt-v1", "RoboschoolAtlasForwardWalk-v1", "RoboschoolHalfCheetah-v1", "RoboschoolHopper-v1", "RoboschoolInvertedDoublePendulum-v1", "RoboschoolInvertedPendulum-v1", "RoboschoolInvertedPendulumSwingup-v1", "RoboschoolReacher-v1", "RoboschoolWalker2d-v1"
61 |       ]
62 |     }
63 |   }
64 | }
65 | 


--------------------------------------------------------------------------------
/slm_lab/spec/benchmark/sac/sac_pong.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "sac_pong": {
 3 |     "agent": [{
 4 |       "name": "SoftActorCritic",
 5 |       "algorithm": {
 6 |         "name": "SoftActorCritic",
 7 |         "action_pdtype": "GumbelSoftmax",
 8 |         "action_policy": "default",
 9 |         "gamma": 0.99,
10 |         "training_start_step": 1000,
11 |         "training_frequency": 1
12 |       },
13 |       "memory": {
14 |         "name": "Replay",
15 |         "batch_size": 512,
16 |         "max_size": 1000000,
17 |         "use_cer": false
18 |       },
19 |       "net": {
20 |         "type": "ConvNet",
21 |         "shared": false,
22 |         "conv_hid_layers": [
23 |           [32, 8, 4, 0, 1],
24 |           [64, 4, 2, 0, 1],
25 |           [32, 3, 1, 0, 1]
26 |         ],
27 |         "fc_hid_layers": [256, 256],
28 |         "hid_layers_activation": "leakyrelu",
29 |         "init_fn": "orthogonal_",
30 |         "normalize": true,
31 |         "batch_norm": true,
32 |         "clip_grad_val": 0.5,
33 |         "use_same_optim": false,
34 |         "loss_spec": {
35 |           "name": "SmoothL1Loss"
36 |         },
37 |         "optim_spec": {
38 |           "name": "Lookahead",
39 |           "optimizer": "RAdam",
40 |           "lr": 2.5e-4,
41 |         },
42 |         "lr_scheduler_spec": null,
43 |         "update_type": "polyak",
44 |         "update_frequency": 1,
45 |         "polyak_coef": 0.005,
46 |         "gpu": true
47 |       }
48 |     }],
49 |     "env": [{
50 |       "name": "PongNoFrameskip-v4",
51 |       "frame_op": "concat",
52 |       "frame_op_len": 4,
53 |       "reward_scale": "sign",
54 |       "num_envs": 4,
55 |       "max_t": null,
56 |       "max_frame": 1e6
57 |     }],
58 |     "body": {
59 |       "product": "outer",
60 |       "num": 1
61 |     },
62 |     "meta": {
63 |       "distributed": false,
64 |       "log_frequency": 100,
65 |       "eval_frequency": 100,
66 |       "rigorous_eval": 0,
67 |       "max_session": 4,
68 |       "max_trial": 1,
69 |     }
70 |   }
71 | }
72 | 


--------------------------------------------------------------------------------
/slm_lab/spec/benchmark/sarsa/sarsa_cartpole.json:
--------------------------------------------------------------------------------
  1 | {
  2 |   "sarsa_epsilon_greedy_cartpole": {
  3 |     "agent": [{
  4 |       "name": "SARSA",
  5 |       "algorithm": {
  6 |         "name": "SARSA",
  7 |         "action_pdtype": "Argmax",
  8 |         "action_policy": "epsilon_greedy",
  9 |         "explore_var_spec": {
 10 |           "name": "linear_decay",
 11 |           "start_val": 1.0,
 12 |           "end_val": 0.05,
 13 |           "start_step": 0,
 14 |           "end_step": 10000
 15 |         },
 16 |         "gamma": 0.99,
 17 |         "training_frequency": 32
 18 |       },
 19 |       "memory": {
 20 |         "name": "OnPolicyBatchReplay"
 21 |       },
 22 |       "net": {
 23 |         "type": "MLPNet",
 24 |         "hid_layers": [64],
 25 |         "hid_layers_activation": "selu",
 26 |         "clip_grad_val": 0.5,
 27 |         "loss_spec": {
 28 |           "name": "MSELoss"
 29 |         },
 30 |         "optim_spec": {
 31 |           "name": "RMSprop",
 32 |           "lr": 0.01
 33 |         },
 34 |         "lr_scheduler_spec": null
 35 |       }
 36 |     }],
 37 |     "env": [{
 38 |       "name": "CartPole-v0",
 39 |       "max_t": null,
 40 |       "max_frame": 100000
 41 |     }],
 42 |     "body": {
 43 |       "product": "outer",
 44 |       "num": 1
 45 |     },
 46 |     "meta": {
 47 |       "distributed": false,
 48 |       "eval_frequency": 2000,
 49 |       "max_trial": 1,
 50 |       "max_session": 4
 51 |     },
 52 |     "search": {
 53 |       "agent": [{
 54 |         "net": {
 55 |           "optim_spec": {
 56 |             "lr__grid_search": [0.0005, 0.001, 0.001, 0.005, 0.01, 0.05, 0.1]
 57 |           }
 58 |         }
 59 |       }]
 60 |     }
 61 |   },
 62 |   "sarsa_boltzmann_cartpole": {
 63 |     "agent": [{
 64 |       "name": "SARSA",
 65 |       "algorithm": {
 66 |         "name": "SARSA",
 67 |         "action_pdtype": "Argmax",
 68 |         "action_policy": "boltzmann",
 69 |         "explore_var_spec": {
 70 |           "name": "linear_decay",
 71 |           "start_val": 3.0,
 72 |           "end_val": 1.0,
 73 |           "start_step": 0,
 74 |           "end_step": 10000
 75 |         },
 76 |         "gamma": 0.99,
 77 |         "training_frequency": 32
 78 |       },
 79 |       "memory": {
 80 |         "name": "OnPolicyBatchReplay"
 81 |       },
 82 |       "net": {
 83 |         "type": "MLPNet",
 84 |         "hid_layers": [64],
 85 |         "hid_layers_activation": "selu",
 86 |         "clip_grad_val": 0.5,
 87 |         "loss_spec": {
 88 |           "name": "MSELoss"
 89 |         },
 90 |         "optim_spec": {
 91 |           "name": "RMSprop",
 92 |           "lr": 0.01
 93 |         },
 94 |         "lr_scheduler_spec": null
 95 |       }
 96 |     }],
 97 |     "env": [{
 98 |       "name": "CartPole-v0",
 99 |       "max_t": null,
100 |       "max_frame": 100000
101 |     }],
102 |     "body": {
103 |       "product": "outer",
104 |       "num": 1
105 |     },
106 |     "meta": {
107 |       "distributed": false,
108 |       "eval_frequency": 2000,
109 |       "max_trial": 1,
110 |       "max_session": 1
111 |     }
112 |   }
113 | }
114 | 


--------------------------------------------------------------------------------
/slm_lab/spec/benchmark/sarsa/sarsa_videopinball.json:
--------------------------------------------------------------------------------
  1 | {
  2 |   "sarsa_epsilon_greedy_videopinball": {
  3 |     "agent": [{
  4 |       "name": "SARSA",
  5 |       "algorithm": {
  6 |         "name": "SARSA",
  7 |         "action_pdtype": "Argmax",
  8 |         "action_policy": "epsilon_greedy",
  9 |         "explore_var_spec": {
 10 |           "name": "linear_decay",
 11 |           "start_val": 1.0,
 12 |           "end_val": 0.05,
 13 |           "start_step": 1000,
 14 |           "end_step": 100000
 15 |         },
 16 |         "gamma": 0.99,
 17 |         "training_frequency": 32
 18 |       },
 19 |       "memory": {
 20 |         "name": "OnPolicyBatchReplay",
 21 |         "batch_size": 64
 22 |       },
 23 |       "net": {
 24 |         "type": "ConvNet",
 25 |         "conv_hid_layers": [
 26 |           [32, 8, 4, 0, 1],
 27 |           [64, 4, 2, 0, 1],
 28 |           [32, 3, 1, 0, 1]
 29 |         ],
 30 |         "fc_hid_layers": [512],
 31 |         "hid_layers_activation": "relu",
 32 |         "init_fn": "orthogonal_",
 33 |         "normalize": true,
 34 |         "batch_norm": false,
 35 |         "use_same_optim": false,
 36 |         "clip_grad_val": 0.5,
 37 |         "loss_spec": {
 38 |           "name": "MSELoss"
 39 |         },
 40 |         "optim_spec": {
 41 |           "name": "RMSprop",
 42 |           "lr": 0.01
 43 |         },
 44 |         "lr_scheduler_spec": null,
 45 |         "gpu": true
 46 |       }
 47 |     }],
 48 |     "env": [{
 49 |       "name": "VideoPinball-v0",
 50 |       "frame_op": "concat",
 51 |       "frame_op_len": 4,
 52 |       "reward_scale": "sign",
 53 |       "num_envs": 16,
 54 |       "max_t": null,
 55 |       "max_frame": 850000
 56 |     }],
 57 |     "body": {
 58 |       "product": "outer",
 59 |       "num": 1
 60 |     },
 61 |     "meta": {
 62 |       "distributed": false,
 63 |       "eval_frequency": 2000,
 64 |       "max_trial": 1,
 65 |       "max_session": 1
 66 |     }
 67 |   },
 68 |   "sarsa_boltzmann_videopinball": {
 69 |     "agent": [{
 70 |       "name": "SARSA",
 71 |       "algorithm": {
 72 |         "name": "SARSA",
 73 |         "action_pdtype": "Argmax",
 74 |         "action_policy": "boltzmann",
 75 |         "explore_var_spec": {
 76 |           "name": "linear_decay",
 77 |           "start_val": 1.0,
 78 |           "end_val": 0.05,
 79 |           "start_step": 1000,
 80 |           "end_step": 100000
 81 |         },
 82 |         "gamma": 0.99,
 83 |         "training_frequency": 32
 84 |       },
 85 |       "memory": {
 86 |         "name": "OnPolicyBatchReplay",
 87 |         "batch_size": 64
 88 |       },
 89 |       "net": {
 90 |         "type": "ConvNet",
 91 |         "conv_hid_layers": [
 92 |           [32, 8, 4, 0, 1],
 93 |           [64, 4, 2, 0, 1],
 94 |           [32, 3, 1, 0, 1]
 95 |         ],
 96 |         "fc_hid_layers": [512],
 97 |         "hid_layers_activation": "relu",
 98 |         "init_fn": "orthogonal_",
 99 |         "normalize": true,
100 |         "batch_norm": false,
101 |         "use_same_optim": false,
102 |         "clip_grad_val": 0.5,
103 |         "loss_spec": {
104 |           "name": "MSELoss"
105 |         },
106 |         "optim_spec": {
107 |           "name": "RMSprop",
108 |           "lr": 0.01
109 |         },
110 |         "lr_scheduler_spec": null,
111 |         "gpu": true
112 |       }
113 |     }],
114 |     "env": [{
115 |       "name": "VideoPinball-v0",
116 |       "frame_op": "concat",
117 |       "frame_op_len": 4,
118 |       "reward_scale": "sign",
119 |       "num_envs": 16,
120 |       "max_t": null,
121 |       "max_frame": 850000
122 |     }],
123 |     "body": {
124 |       "product": "outer",
125 |       "num": 1
126 |     },
127 |     "meta": {
128 |       "distributed": false,
129 |       "eval_frequency": 2000,
130 |       "max_trial": 1,
131 |       "max_session": 1
132 |     }
133 |   }
134 | }
135 | 


--------------------------------------------------------------------------------
/slm_lab/spec/demo.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "dqn_cartpole": {
 3 |     "agent": [{
 4 |       "name": "DQN",
 5 |       "algorithm": {
 6 |         "name": "DQN",
 7 |         "action_pdtype": "Argmax",
 8 |         "action_policy": "epsilon_greedy",
 9 |         "explore_var_spec": {
10 |           "name": "linear_decay",
11 |           "start_val": 1.0,
12 |           "end_val": 0.1,
13 |           "start_step": 0,
14 |           "end_step": 1000
15 |         },
16 |         "gamma": 0.99,
17 |         "training_batch_iter": 8,
18 |         "training_iter": 4,
19 |         "training_frequency": 4,
20 |         "training_start_step": 32
21 |       },
22 |       "memory": {
23 |         "name": "Replay",
24 |         "batch_size": 32,
25 |         "max_size": 10000,
26 |         "use_cer": true
27 |       },
28 |       "net": {
29 |         "type": "MLPNet",
30 |         "hid_layers": [64],
31 |         "hid_layers_activation": "selu",
32 |         "clip_grad_val": 0.5,
33 |         "loss_spec": {
34 |           "name": "MSELoss"
35 |         },
36 |         "optim_spec": {
37 |           "name": "Adam",
38 |           "lr": 0.02
39 |         },
40 |         "lr_scheduler_spec": {
41 |           "name": "StepLR",
42 |           "step_size": 1000,
43 |           "gamma": 0.9
44 |         },
45 |         "update_type": "polyak",
46 |         "update_frequency": 32,
47 |         "polyak_coef": 0.1,
48 |         "gpu": false
49 |       }
50 |     }],
51 |     "env": [{
52 |       "name": "CartPole-v0",
53 |       "max_t": null,
54 |       "max_frame": 10000
55 |     }],
56 |     "body": {
57 |       "product": "outer",
58 |       "num": 1
59 |     },
60 |     "meta": {
61 |       "distributed": false,
62 |       "eval_frequency": 500,
63 |       "log_frequency": 500,
64 |       "max_session": 2,
65 |       "max_trial": 1
66 |     },
67 |     "search": {
68 |       "agent": [{
69 |         "algorithm": {
70 |           "gamma__grid_search": [0.5, 0.7, 0.90, 0.95, 0.99]
71 |         }
72 |       }]
73 |     }
74 |   }
75 | }
76 | 


--------------------------------------------------------------------------------
/slm_lab/spec/experimental/a3c/a3c_nstep_worker_search.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "a3c_nstep_pong": {
 3 |     "agent": [{
 4 |       "name": "A3C",
 5 |       "algorithm": {
 6 |         "name": "ActorCritic",
 7 |         "action_pdtype": "default",
 8 |         "action_policy": "default",
 9 |         "explore_var_spec": null,
10 |         "gamma": 0.99,
11 |         "lam": null,
12 |         "num_step_returns": 5,
13 |         "entropy_coef_spec": {
14 |           "name": "no_decay",
15 |           "start_val": 0.01,
16 |           "end_val": 0.01,
17 |           "start_step": 0,
18 |           "end_step": 0
19 |         },
20 |         "val_loss_coef": 0.5,
21 |         "training_frequency": 5
22 |       },
23 |       "memory": {
24 |         "name": "OnPolicyBatchReplay",
25 |       },
26 |       "net": {
27 |         "type": "ConvNet",
28 |         "shared": true,
29 |         "conv_hid_layers": [
30 |           [32, 8, 4, 0, 1],
31 |           [64, 4, 2, 0, 1],
32 |           [32, 3, 1, 0, 1]
33 |         ],
34 |         "fc_hid_layers": [512],
35 |         "hid_layers_activation": "relu",
36 |         "init_fn": "orthogonal_",
37 |         "normalize": true,
38 |         "batch_norm": false,
39 |         "clip_grad_val": 0.5,
40 |         "use_same_optim": false,
41 |         "loss_spec": {
42 |           "name": "MSELoss"
43 |         },
44 |         "actor_optim_spec": {
45 |           "name": "GlobalAdam",
46 |           "lr": 1e-4
47 |         },
48 |         "critic_optim_spec": {
49 |           "name": "GlobalAdam",
50 |           "lr": 1e-4
51 |         },
52 |         "lr_scheduler_spec": null,
53 |         "gpu": false
54 |       }
55 |     }],
56 |     "env": [{
57 |       "name": "PongNoFrameskip-v4",
58 |       "frame_op": "concat",
59 |       "frame_op_len": 4,
60 |       "reward_scale": "sign",
61 |       "num_envs": 8,
62 |       "max_t": null,
63 |       "max_frame": 1e7
64 |     }],
65 |     "body": {
66 |       "product": "outer",
67 |       "num": 1
68 |     },
69 |     "meta": {
70 |       "distributed": "synced",
71 |       "log_frequency": 50000,
72 |       "eval_frequency": 50000,
73 |       "max_session": 16,
74 |       "max_trial": 1,
75 |     },
76 |     "search": {
77 |       "meta": {
78 |         "max_session__grid_search": [2, 4, 8, 16, 32]
79 |       }
80 |     }
81 |   }
82 | }
83 | 


--------------------------------------------------------------------------------
/slm_lab/spec/experimental/dqn/ddqn_lunar.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "ddqn_concat_lunar": {
 3 |     "agent": [{
 4 |       "name": "DoubleDQN",
 5 |       "algorithm": {
 6 |         "name": "DoubleDQN",
 7 |         "action_pdtype": "Argmax",
 8 |         "action_policy": "epsilon_greedy",
 9 |         "explore_var_spec": {
10 |           "name": "linear_decay",
11 |           "start_val": 1.0,
12 |           "end_val": 0.08,
13 |           "start_step": 0,
14 |           "end_step": 14000
15 |         },
16 |         "gamma": 0.99,
17 |         "training_batch_iter": 3,
18 |         "training_iter": 4,
19 |         "training_frequency": 4,
20 |         "training_start_step": 32
21 |       },
22 |       "memory": {
23 |         "name": "Replay",
24 |         "batch_size": 32,
25 |         "max_size": 100000,
26 |         "use_cer": false
27 |       },
28 |       "net": {
29 |         "type": "MLPNet",
30 |         "hid_layers": [
31 |           400,
32 |           200
33 |         ],
34 |         "hid_layers_activation": "relu",
35 |         "clip_grad_val": null,
36 |         "loss_spec": {
37 |           "name": "MSELoss"
38 |         },
39 |         "optim_spec": {
40 |           "name": "Adam",
41 |           "lr": 0.002
42 |         },
43 |         "lr_scheduler_spec": {
44 |           "name": "MultiStepLR",
45 |           "milestones": [
46 |             80000,
47 |             100000
48 |           ],
49 |           "gamma": 0.5
50 |         },
51 |         "update_type": "polyak",
52 |         "polyak_coef": 0.9,
53 |         "gpu": false
54 |       }
55 |     }],
56 |     "env": [{
57 |       "name": "LunarLander-v2",
58 |       "frame_op": "concat",
59 |       "frame_op_len": 4,
60 |       "max_t": null,
61 |       "max_frame": 250000
62 |     }],
63 |     "body": {
64 |       "product": "outer",
65 |       "num": 1
66 |     },
67 |     "meta": {
68 |       "distributed": false,
69 |       "eval_frequency": 1000,
70 |       "max_session": 4,
71 |       "max_trial": 62,
72 |       "search": "RandomSearch",
73 |     },
74 |   }
75 | }
76 | 


--------------------------------------------------------------------------------
/slm_lab/spec/experimental/dqn/ddqn_lunar_search.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "ddqn_concat_lunar": {
 3 |     "agent": [{
 4 |       "name": "DoubleDQN",
 5 |       "algorithm": {
 6 |         "name": "DoubleDQN",
 7 |         "action_pdtype": "Argmax",
 8 |         "action_policy": "epsilon_greedy",
 9 |         "explore_var_spec": {
10 |           "name": "linear_decay",
11 |           "start_val": 1.0,
12 |           "end_val": 0.08,
13 |           "start_step": 0,
14 |           "end_step": 100000
15 |         },
16 |         "gamma": 0.99,
17 |         "training_batch_iter": 2,
18 |         "training_iter": 4,
19 |         "training_frequency": 4,
20 |         "training_start_step": 32
21 |       },
22 |       "memory": {
23 |         "name": "Replay",
24 |         "batch_size": 32,
25 |         "max_size": 100000,
26 |         "use_cer": false
27 |       },
28 |       "net": {
29 |         "type": "MLPNet",
30 |         "hid_layers": [
31 |           400,
32 |           200
33 |         ],
34 |         "hid_layers_activation": "relu",
35 |         "clip_grad_val": 2.0,
36 |         "loss_spec": {
37 |           "name": "MSELoss"
38 |         },
39 |         "optim_spec": {
40 |           "name": "Adam",
41 |           "lr": 0.001
42 |         },
43 |         "lr_scheduler_spec": null,
44 |         "update_type": "polyak",
45 |         "update_frequency": 0.9,
46 |         "gpu": false
47 |       }
48 |     }],
49 |     "env": [{
50 |       "name": "LunarLander-v2",
51 |       "frame_op": "concat",
52 |       "frame_op_len": 4,
53 |       "max_t": null,
54 |       "max_frame": 300000,
55 |       "normalize_state": false
56 |     }],
57 |     "body": {
58 |       "product": "outer",
59 |       "num": 1
60 |     },
61 |     "meta": {
62 |       "distributed": false,
63 |       "log_frequency": 1000,
64 |       "eval_frequency": 1000,
65 |       "max_session": 4,
66 |       "max_trial": 32,
67 |       "search": "RandomSearch",
68 |     },
69 |     "search": {
70 |       "agent": [{
71 |         "agent": {
72 |           "training_batch_iter__randint": [1, 5]
73 |         },
74 |         "net": {
75 |           "update_frequency__choice": [0.8, 0.9, 0.925, 0.95],
76 |           "optim_spec": {
77 |             "lr__choice": [0.001, 0.003, 0.005, 0.007, 0.01]
78 |           }
79 |         },
80 |       }]
81 |     }
82 |   }
83 | }
84 | 


--------------------------------------------------------------------------------
/slm_lab/spec/experimental/dqn/dqn_cartpole_search.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "dqn_cartpole": {
 3 |     "agent": [{
 4 |       "name": "DQN",
 5 |       "algorithm": {
 6 |         "name": "DQN",
 7 |         "action_pdtype": "Argmax",
 8 |         "action_policy": "epsilon_greedy",
 9 |         "explore_var_spec": {
10 |           "name": "linear_decay",
11 |           "start_val": 1.0,
12 |           "end_val": 0.1,
13 |           "start_step": 0,
14 |           "end_step": 1000,
15 |         },
16 |         "gamma": 0.99,
17 |         "training_batch_iter": 8,
18 |         "training_iter": 4,
19 |         "training_frequency": 4,
20 |         "training_start_step": 32
21 |       },
22 |       "memory": {
23 |         "name": "Replay",
24 |         "batch_size": 32,
25 |         "max_size": 10000,
26 |         "use_cer": false
27 |       },
28 |       "net": {
29 |         "type": "MLPNet",
30 |         "hid_layers": [64],
31 |         "hid_layers_activation": "selu",
32 |         "clip_grad_val": 0.5,
33 |         "loss_spec": {
34 |           "name": "MSELoss"
35 |         },
36 |         "optim_spec": {
37 |           "name": "Adam",
38 |           "lr": 0.01
39 |         },
40 |         "lr_scheduler_spec": null,
41 |         "update_type": "polyak",
42 |         "update_frequency": 32,
43 |         "polyak_coef": 0.1,
44 |         "gpu": false
45 |       }
46 |     }],
47 |     "env": [{
48 |       "name": "CartPole-v0",
49 |       "max_t": null,
50 |       "max_frame": 50000
51 |     }],
52 |     "body": {
53 |       "product": "outer",
54 |       "num": 1
55 |     },
56 |     "meta": {
57 |       "distributed": false,
58 |       "eval_frequency": 1000,
59 |       "max_session": 4,
60 |       "max_trial": 32
61 |     },
62 |     "search": {
63 |       "agent": [{
64 |         "algorithm": {
65 |           "gamma__uniform": [0.50, 1.0],
66 |           "training_iter__randint": [1, 10]
67 |         },
68 |         "net": {
69 |           "optim_spec": {
70 |             "lr__choice": [0.0001, 0.001, 0.01, 0.1]
71 |           }
72 |         }
73 |       }]
74 |     }
75 |   }
76 | }
77 | 


--------------------------------------------------------------------------------
/slm_lab/spec/experimental/misc/random.json:
--------------------------------------------------------------------------------
  1 | {
  2 |   "random_cartpole": {
  3 |     "agent": [{
  4 |       "name": "Random",
  5 |       "algorithm": {
  6 |         "name": "Random"
  7 |       },
  8 |       "memory": {
  9 |         "name": "OnPolicyReplay"
 10 |       },
 11 |       "net": {}
 12 |     }],
 13 |     "env": [{
 14 |       "name": "CartPole-v0",
 15 |       "max_t": null,
 16 |       "max_frame": 100
 17 |     }],
 18 |     "body": {
 19 |       "product": "outer",
 20 |       "num": 1
 21 |     },
 22 |     "meta": {
 23 |       "distributed": false,
 24 |       "eval_frequency": 1000,
 25 |       "max_session": 5,
 26 |       "max_trial": 1,
 27 |     }
 28 |   },
 29 |   "random_pendulum": {
 30 |     "agent": [{
 31 |       "name": "Random",
 32 |       "algorithm": {
 33 |         "name": "Random"
 34 |       },
 35 |       "memory": {
 36 |         "name": "OnPolicyReplay"
 37 |       },
 38 |       "net": {}
 39 |     }],
 40 |     "env": [{
 41 |       "name": "Pendulum-v0",
 42 |       "max_t": null,
 43 |       "max_frame": 100
 44 |     }],
 45 |     "body": {
 46 |       "product": "outer",
 47 |       "num": 1
 48 |     },
 49 |     "meta": {
 50 |       "distributed": false,
 51 |       "eval_frequency": 1000,
 52 |       "max_session": 5,
 53 |       "max_trial": 1,
 54 |     }
 55 |   },
 56 |   "random_2dball": {
 57 |     "agent": [{
 58 |       "name": "Random",
 59 |       "algorithm": {
 60 |         "name": "Random"
 61 |       },
 62 |       "memory": {
 63 |         "name": "OnPolicyReplay"
 64 |       },
 65 |       "net": {}
 66 |     }],
 67 |     "env": [{
 68 |       "name": "2DBall",
 69 |       "max_t": 1000,
 70 |       "max_frame": 100
 71 |     }],
 72 |     "body": {
 73 |       "product": "outer",
 74 |       "num": 1
 75 |     },
 76 |     "meta": {
 77 |       "distributed": false,
 78 |       "eval_frequency": 1000,
 79 |       "max_session": 5,
 80 |       "max_trial": 1,
 81 |     }
 82 |   },
 83 |   "random_breakout": {
 84 |     "agent": [{
 85 |       "name": "Random",
 86 |       "algorithm": {
 87 |         "name": "Random"
 88 |       },
 89 |       "memory": {
 90 |         "name": "OnPolicyReplay"
 91 |       },
 92 |       "net": {}
 93 |     }],
 94 |     "env": [{
 95 |       "name": "BreakoutNoFrameskip-v4",
 96 |       "frame_op": "concat",
 97 |       "frame_op_len": 4,
 98 |       "reward_scale": "sign",
 99 |       "num_envs": 8,
100 |       "max_t": null,
101 |       "max_frame": 1e7
102 |     }],
103 |     "body": {
104 |       "product": "outer",
105 |       "num": 1
106 |     },
107 |     "meta": {
108 |       "distributed": false,
109 |       "log_frequency": 10000,
110 |       "eval_frequency": 10000,
111 |       "rigorous_eval": 0,
112 |       "max_session": 1,
113 |       "max_trial": 1,
114 |     }
115 |   }
116 | }
117 | 


--------------------------------------------------------------------------------
/slm_lab/spec/experimental/reinforce/reinforce_cartpole.json:
--------------------------------------------------------------------------------
  1 | {
  2 |   "reinforce_cartpole": {
  3 |     "agent": [{
  4 |       "name": "Reinforce",
  5 |       "algorithm": {
  6 |         "name": "Reinforce",
  7 |         "action_pdtype": "default",
  8 |         "action_policy": "default",
  9 |         "explore_var_spec": null,
 10 |         "gamma": 0.99,
 11 |         "entropy_coef_spec": {
 12 |           "name": "linear_decay",
 13 |           "start_val": 0.01,
 14 |           "end_val": 0.001,
 15 |           "start_step": 0,
 16 |           "end_step": 20000,
 17 |         },
 18 |         "training_frequency": 1
 19 |       },
 20 |       "memory": {
 21 |         "name": "OnPolicyReplay"
 22 |       },
 23 |       "net": {
 24 |         "type": "MLPNet",
 25 |         "hid_layers": [64],
 26 |         "hid_layers_activation": "selu",
 27 |         "clip_grad_val": null,
 28 |         "loss_spec": {
 29 |           "name": "MSELoss"
 30 |         },
 31 |         "optim_spec": {
 32 |           "name": "Adam",
 33 |           "lr": 0.002
 34 |         },
 35 |         "lr_scheduler_spec": null
 36 |       }
 37 |     }],
 38 |     "env": [{
 39 |       "name": "CartPole-v0",
 40 |       "max_t": null,
 41 |       "max_frame": 80000,
 42 |     }],
 43 |     "body": {
 44 |       "product": "outer",
 45 |       "num": 1
 46 |     },
 47 |     "meta": {
 48 |       "distributed": false,
 49 |       "eval_frequency": 2000,
 50 |       "max_session": 1,
 51 |       "max_trial": 1,
 52 |     }
 53 |   },
 54 |   "reinforce_cross_entropy_cartpole": {
 55 |     "agent": [{
 56 |       "name": "Reinforce",
 57 |       "algorithm": {
 58 |         "name": "Reinforce",
 59 |         "action_pdtype": "Categorical",
 60 |         "action_policy": "default",
 61 |         "center_return": true,
 62 |         "explore_var_spec": null,
 63 |         "gamma": 0.99,
 64 |         "entropy_coef_spec": {
 65 |           "name": "linear_decay",
 66 |           "start_val": 0.01,
 67 |           "end_val": 0.001,
 68 |           "start_step": 0,
 69 |           "end_step": 20000,
 70 |         },
 71 |         "training_frequency": 16
 72 |       },
 73 |       "memory": {
 74 |         "name": "OnPolicyCrossEntropy",
 75 |         "cross_entropy": 0.5
 76 |       },
 77 |       "net": {
 78 |         "type": "MLPNet",
 79 |         "hid_layers": [128],
 80 |         "hid_layers_activation": "selu",
 81 |         "clip_grad_val": null,
 82 |         "loss_spec": {
 83 |           "name": "CrossEntropyLoss"
 84 |         },
 85 |         "optim_spec": {
 86 |           "name": "Adam",
 87 |           "lr": 0.02
 88 |         },
 89 |         "lr_scheduler_spec": null
 90 |       }
 91 |     }],
 92 |     "env": [{
 93 |       "name": "CartPole-v0",
 94 |       "max_t": null,
 95 |       "max_frame": 100000,
 96 |     }],
 97 |     "body": {
 98 |       "product": "outer",
 99 |       "num": 1
100 |     },
101 |     "meta": {
102 |       "distributed": false,
103 |       "eval_frequency": 2000,
104 |       "max_session": 4,
105 |       "max_trial": 1,
106 |     },
107 |     "search": {
108 |       "agent": [{
109 |         "algorithm": {
110 |           "gamma__grid_search": [0.1, 0.5, 0.7, 0.8, 0.90, 0.99, 0.999]
111 |         }
112 |       }]
113 |     }
114 |   }
115 | }
116 | 


--------------------------------------------------------------------------------
/slm_lab/spec/experimental/reinforce/reinforce_pendulum.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "reinforce_pendulum": {
 3 |     "agent": [{
 4 |       "name": "Reinforce",
 5 |       "algorithm": {
 6 |         "name": "Reinforce",
 7 |         "action_pdtype": "default",
 8 |         "action_policy": "default",
 9 |         "explore_var_spec": null,
10 |         "gamma": 0.99,
11 |         "entropy_coef_spec": {
12 |           "name": "linear_decay",
13 |           "start_val": 0.01,
14 |           "end_val": 0.001,
15 |           "start_step": 0,
16 |           "end_step": 20000,
17 |         },
18 |         "training_frequency": 1
19 |       },
20 |       "memory": {
21 |         "name": "OnPolicyReplay"
22 |       },
23 |       "net": {
24 |         "type": "MLPNet",
25 |         "hid_layers": [64],
26 |         "hid_layers_activation": "selu",
27 |         "clip_grad_val": null,
28 |         "loss_spec": {
29 |           "name": "MSELoss"
30 |         },
31 |         "optim_spec": {
32 |           "name": "Adam",
33 |           "lr": 0.002
34 |         },
35 |         "lr_scheduler_spec": null
36 |       }
37 |     }],
38 |     "env": [{
39 |       "name": "Pendulum-v0",
40 |       "max_t": null,
41 |       "max_frame": 80000,
42 |     }],
43 |     "body": {
44 |       "product": "outer",
45 |       "num": 1
46 |     },
47 |     "meta": {
48 |       "distributed": false,
49 |       "eval_frequency": 2000,
50 |       "max_session": 1,
51 |       "max_trial": 1,
52 |     }
53 |   }
54 | }
55 | 


--------------------------------------------------------------------------------
/slm_lab/spec/experimental/sac/sac_cartpole.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "sac_cartpole": {
 3 |     "agent": [{
 4 |       "name": "SoftActorCritic",
 5 |       "algorithm": {
 6 |         "name": "SoftActorCritic",
 7 |         "action_pdtype": "GumbelSoftmax",
 8 |         "action_policy": "default",
 9 |         "gamma": 0.99,
10 |         "training_frequency": 1
11 |       },
12 |       "memory": {
13 |         "name": "Replay",
14 |         "batch_size": 256,
15 |         "max_size": 100000,
16 |         "use_cer": true
17 |       },
18 |       "net": {
19 |         "type": "MLPNet",
20 |         "hid_layers": [64],
21 |         "hid_layers_activation": "relu",
22 |         "init_fn": "orthogonal_",
23 |         "clip_grad_val": 0.5,
24 |         "loss_spec": {
25 |           "name": "MSELoss"
26 |         },
27 |         "optim_spec": {
28 |           "name": "Adam",
29 |           "lr": 0.005
30 |         },
31 |         "lr_scheduler_spec": null,
32 |         "update_type": "polyak",
33 |         "update_frequency": 1,
34 |         "polyak_coef": 0.005,
35 |         "gpu": false
36 |       }
37 |     }],
38 |     "env": [{
39 |       "name": "CartPole-v0",
40 |       "max_t": null,
41 |       "max_frame": 200000,
42 |       "num_envs": 8,
43 |       "normalize_state": false
44 |     }],
45 |     "body": {
46 |       "product": "outer",
47 |       "num": 1
48 |     },
49 |     "meta": {
50 |       "distributed": false,
51 |       "log_frequency": 500,
52 |       "eval_frequency": 500,
53 |       "rigorous_eval": 0,
54 |       "max_session": 1,
55 |       "max_trial": 1,
56 |     }
57 |   }
58 | }
59 | 


--------------------------------------------------------------------------------
/slm_lab/spec/experimental/sarsa/sarsa_cartpole.json:
--------------------------------------------------------------------------------
  1 | {
  2 |   "sarsa_epsilon_greedy_cartpole": {
  3 |     "agent": [{
  4 |       "name": "SARSA",
  5 |       "algorithm": {
  6 |         "name": "SARSA",
  7 |         "action_pdtype": "Argmax",
  8 |         "action_policy": "epsilon_greedy",
  9 |         "explore_var_spec": {
 10 |           "name": "linear_decay",
 11 |           "start_val": 1.0,
 12 |           "end_val": 0.05,
 13 |           "start_step": 0,
 14 |           "end_step": 10000
 15 |         },
 16 |         "gamma": 0.99,
 17 |         "training_frequency": 32
 18 |       },
 19 |       "memory": {
 20 |         "name": "OnPolicyBatchReplay"
 21 |       },
 22 |       "net": {
 23 |         "type": "MLPNet",
 24 |         "hid_layers": [64],
 25 |         "hid_layers_activation": "selu",
 26 |         "clip_grad_val": 0.5,
 27 |         "loss_spec": {
 28 |           "name": "MSELoss"
 29 |         },
 30 |         "optim_spec": {
 31 |           "name": "RMSprop",
 32 |           "lr": 0.01
 33 |         },
 34 |         "lr_scheduler_spec": null
 35 |       }
 36 |     }],
 37 |     "env": [{
 38 |       "name": "CartPole-v0",
 39 |       "max_t": null,
 40 |       "max_frame": 80000
 41 |     }],
 42 |     "body": {
 43 |       "product": "outer",
 44 |       "num": 1
 45 |     },
 46 |     "meta": {
 47 |       "distributed": false,
 48 |       "eval_frequency": 2000,
 49 |       "max_trial": 1,
 50 |       "max_session": 1
 51 |     }
 52 |   },
 53 |   "sarsa_boltzmann_cartpole": {
 54 |     "agent": [{
 55 |       "name": "SARSA",
 56 |       "algorithm": {
 57 |         "name": "SARSA",
 58 |         "action_pdtype": "Argmax",
 59 |         "action_policy": "boltzmann",
 60 |         "explore_var_spec": {
 61 |           "name": "linear_decay",
 62 |           "start_val": 3.0,
 63 |           "end_val": 1.0,
 64 |           "start_step": 0,
 65 |           "end_step": 10000
 66 |         },
 67 |         "gamma": 0.99,
 68 |         "training_frequency": 32
 69 |       },
 70 |       "memory": {
 71 |         "name": "OnPolicyBatchReplay"
 72 |       },
 73 |       "net": {
 74 |         "type": "MLPNet",
 75 |         "hid_layers": [64],
 76 |         "hid_layers_activation": "selu",
 77 |         "clip_grad_val": 0.5,
 78 |         "loss_spec": {
 79 |           "name": "MSELoss"
 80 |         },
 81 |         "optim_spec": {
 82 |           "name": "RMSprop",
 83 |           "lr": 0.01
 84 |         },
 85 |         "lr_scheduler_spec": null
 86 |       }
 87 |     }],
 88 |     "env": [{
 89 |       "name": "CartPole-v0",
 90 |       "max_t": null,
 91 |       "max_frame": 80000
 92 |     }],
 93 |     "body": {
 94 |       "product": "outer",
 95 |       "num": 1
 96 |     },
 97 |     "meta": {
 98 |       "distributed": false,
 99 |       "eval_frequency": 2000,
100 |       "max_trial": 1,
101 |       "max_session": 1
102 |     }
103 |   }
104 | }
105 | 


--------------------------------------------------------------------------------
/test/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kengz/SLM-Lab/cae945a294cb111ee8e568bb4465ee74c501478b/test/__init__.py


--------------------------------------------------------------------------------
/test/agent/net/test_conv.py:
--------------------------------------------------------------------------------
 1 | from copy import deepcopy
 2 | from slm_lab.env.base import Clock
 3 | from slm_lab.agent.net import net_util
 4 | from slm_lab.agent.net.conv import ConvNet
 5 | import torch
 6 | import torch.nn as nn
 7 | 
 8 | net_spec = {
 9 |     "type": "ConvNet",
10 |     "shared": True,
11 |     "conv_hid_layers": [
12 |         [32, 8, 4, 0, 1],
13 |         [64, 4, 2, 0, 1],
14 |         [64, 3, 1, 0, 1]
15 |     ],
16 |     "fc_hid_layers": [512],
17 |     "hid_layers_activation": "relu",
18 |     "init_fn": "xavier_uniform_",
19 |     "batch_norm": False,
20 |     "clip_grad_val": 1.0,
21 |     "loss_spec": {
22 |         "name": "SmoothL1Loss"
23 |     },
24 |     "optim_spec": {
25 |         "name": "Adam",
26 |         "lr": 0.02
27 |     },
28 |     "lr_scheduler_spec": {
29 |         "name": "StepLR",
30 |         "step_size": 30,
31 |         "gamma": 0.1
32 |     },
33 |     "gpu": True
34 | }
35 | in_dim = (4, 84, 84)
36 | out_dim = 3
37 | batch_size = 16
38 | net = ConvNet(net_spec, in_dim, out_dim)
39 | # init net optimizer and its lr scheduler
40 | optim = net_util.get_optim(net, net.optim_spec)
41 | lr_scheduler = net_util.get_lr_scheduler(optim, net.lr_scheduler_spec)
42 | x = torch.rand((batch_size,) + in_dim)
43 | 
44 | 
45 | def test_init():
46 |     net = ConvNet(net_spec, in_dim, out_dim)
47 |     assert isinstance(net, nn.Module)
48 |     assert hasattr(net, 'conv_model')
49 |     assert hasattr(net, 'fc_model')
50 |     assert hasattr(net, 'model_tail')
51 |     assert not hasattr(net, 'model_tails')
52 | 
53 | 
54 | def test_forward():
55 |     y = net.forward(x)
56 |     assert y.shape == (batch_size, out_dim)
57 | 
58 | 
59 | def test_train_step():
60 |     y = torch.rand((batch_size, out_dim))
61 |     clock = Clock(100, 1)
62 |     loss = net.loss_fn(net.forward(x), y)
63 |     net.train_step(loss, optim, lr_scheduler, clock=clock)
64 |     assert loss != 0.0
65 | 
66 | 
67 | def test_no_fc():
68 |     no_fc_net_spec = deepcopy(net_spec)
69 |     no_fc_net_spec['fc_hid_layers'] = []
70 |     net = ConvNet(no_fc_net_spec, in_dim, out_dim)
71 |     assert isinstance(net, nn.Module)
72 |     assert hasattr(net, 'conv_model')
73 |     assert not hasattr(net, 'fc_model')
74 |     assert hasattr(net, 'model_tail')
75 |     assert not hasattr(net, 'model_tails')
76 | 
77 |     y = net.forward(x)
78 |     assert y.shape == (batch_size, out_dim)
79 | 
80 | 
81 | def test_multitails():
82 |     net = ConvNet(net_spec, in_dim, [3, 4])
83 |     assert isinstance(net, nn.Module)
84 |     assert hasattr(net, 'conv_model')
85 |     assert hasattr(net, 'fc_model')
86 |     assert not hasattr(net, 'model_tail')
87 |     assert hasattr(net, 'model_tails')
88 |     assert len(net.model_tails) == 2
89 | 
90 |     y = net.forward(x)
91 |     assert len(y) == 2
92 |     assert y[0].shape == (batch_size, 3)
93 |     assert y[1].shape == (batch_size, 4)
94 | 


--------------------------------------------------------------------------------
/test/agent/net/test_mlp.py:
--------------------------------------------------------------------------------
 1 | from copy import deepcopy
 2 | from slm_lab.env.base import Clock
 3 | from slm_lab.agent.net import net_util
 4 | from slm_lab.agent.net.mlp import MLPNet
 5 | import torch
 6 | import torch.nn as nn
 7 | 
 8 | net_spec = {
 9 |     "type": "MLPNet",
10 |     "shared": True,
11 |     "hid_layers": [32],
12 |     "hid_layers_activation": "relu",
13 |     "init_fn": "xavier_uniform_",
14 |     "clip_grad_val": 1.0,
15 |     "loss_spec": {
16 |         "name": "MSELoss"
17 |     },
18 |     "optim_spec": {
19 |         "name": "Adam",
20 |         "lr": 0.02
21 |     },
22 |     "lr_scheduler_spec": {
23 |         "name": "StepLR",
24 |         "step_size": 30,
25 |         "gamma": 0.1
26 |     },
27 |     "update_type": "replace",
28 |     "update_frequency": 1,
29 |     "polyak_coef": 0.9,
30 |     "gpu": True
31 | }
32 | in_dim = 10
33 | out_dim = 3
34 | batch_size = 16
35 | net = MLPNet(net_spec, in_dim, out_dim)
36 | # init net optimizer and its lr scheduler
37 | optim = net_util.get_optim(net, net.optim_spec)
38 | lr_scheduler = net_util.get_lr_scheduler(optim, net.lr_scheduler_spec)
39 | x = torch.rand((batch_size, in_dim))
40 | 
41 | 
42 | def test_init():
43 |     net = MLPNet(net_spec, in_dim, out_dim)
44 |     assert isinstance(net, nn.Module)
45 |     assert hasattr(net, 'model')
46 |     assert hasattr(net, 'model_tail')
47 |     assert not hasattr(net, 'model_tails')
48 | 
49 | 
50 | def test_forward():
51 |     y = net.forward(x)
52 |     assert y.shape == (batch_size, out_dim)
53 | 
54 | 
55 | def test_train_step():
56 |     y = torch.rand((batch_size, out_dim))
57 |     clock = Clock(100, 1)
58 |     loss = net.loss_fn(net.forward(x), y)
59 |     net.train_step(loss, optim, lr_scheduler, clock=clock)
60 |     assert loss != 0.0
61 | 
62 | 
63 | def test_no_lr_scheduler():
64 |     nopo_lrs_net_spec = deepcopy(net_spec)
65 |     nopo_lrs_net_spec['lr_scheduler_spec'] = None
66 |     net = MLPNet(nopo_lrs_net_spec, in_dim, out_dim)
67 |     assert isinstance(net, nn.Module)
68 |     assert hasattr(net, 'model')
69 |     assert hasattr(net, 'model_tail')
70 |     assert not hasattr(net, 'model_tails')
71 | 
72 |     y = net.forward(x)
73 |     assert y.shape == (batch_size, out_dim)
74 | 
75 | 
76 | def test_multitails():
77 |     net = MLPNet(net_spec, in_dim, [3, 4])
78 |     assert isinstance(net, nn.Module)
79 |     assert hasattr(net, 'model')
80 |     assert not hasattr(net, 'model_tail')
81 |     assert hasattr(net, 'model_tails')
82 |     assert len(net.model_tails) == 2
83 | 
84 |     y = net.forward(x)
85 |     assert len(y) == 2
86 |     assert y[0].shape == (batch_size, 3)
87 |     assert y[1].shape == (batch_size, 4)
88 | 


--------------------------------------------------------------------------------
/test/agent/net/test_recurrent.py:
--------------------------------------------------------------------------------
  1 | from copy import deepcopy
  2 | from slm_lab.env.base import Clock
  3 | from slm_lab.agent.net import net_util
  4 | from slm_lab.agent.net.recurrent import RecurrentNet
  5 | import pytest
  6 | import torch
  7 | import torch.nn as nn
  8 | 
  9 | net_spec = {
 10 |     "type": "RecurrentNet",
 11 |     "shared": True,
 12 |     "cell_type": "GRU",
 13 |     "fc_hid_layers": [10],
 14 |     "hid_layers_activation": "relu",
 15 |     "rnn_hidden_size": 64,
 16 |     "rnn_num_layers": 2,
 17 |     "bidirectional": False,
 18 |     "seq_len": 4,
 19 |     "init_fn": "xavier_uniform_",
 20 |     "clip_grad_val": 1.0,
 21 |     "loss_spec": {
 22 |         "name": "SmoothL1Loss"
 23 |     },
 24 |     "optim_spec": {
 25 |         "name": "Adam",
 26 |         "lr": 0.02
 27 |     },
 28 |     "lr_scheduler_spec": {
 29 |         "name": "StepLR",
 30 |         "step_size": 30,
 31 |         "gamma": 0.1
 32 |     },
 33 |     "gpu": True
 34 | }
 35 | state_dim = 10
 36 | out_dim = 3
 37 | batch_size = 16
 38 | seq_len = net_spec['seq_len']
 39 | in_dim = (seq_len, state_dim)
 40 | net = RecurrentNet(net_spec, in_dim, out_dim)
 41 | # init net optimizer and its lr scheduler
 42 | optim = net_util.get_optim(net, net.optim_spec)
 43 | lr_scheduler = net_util.get_lr_scheduler(optim, net.lr_scheduler_spec)
 44 | x = torch.rand((batch_size, seq_len, state_dim))
 45 | 
 46 | 
 47 | def test_init():
 48 |     net = RecurrentNet(net_spec, in_dim, out_dim)
 49 |     assert isinstance(net, nn.Module)
 50 |     assert hasattr(net, 'fc_model')
 51 |     assert hasattr(net, 'rnn_model')
 52 |     assert hasattr(net, 'model_tail')
 53 |     assert not hasattr(net, 'model_tails')
 54 |     assert net.rnn_model.bidirectional == False
 55 | 
 56 | 
 57 | def test_forward():
 58 |     y = net.forward(x)
 59 |     assert y.shape == (batch_size, out_dim)
 60 | 
 61 | 
 62 | def test_train_step():
 63 |     y = torch.rand((batch_size, out_dim))
 64 |     clock = Clock(100, 1)
 65 |     loss = net.loss_fn(net.forward(x), y)
 66 |     net.train_step(loss, optim, lr_scheduler, clock=clock)
 67 |     assert loss != 0.0
 68 | 
 69 | 
 70 | @pytest.mark.parametrize('bidirectional', (False, True))
 71 | @pytest.mark.parametrize('cell_type', ('RNN', 'LSTM', 'GRU'))
 72 | def test_variant(bidirectional, cell_type):
 73 |     var_net_spec = deepcopy(net_spec)
 74 |     var_net_spec['bidirectional'] = bidirectional
 75 |     var_net_spec['cell_type'] = cell_type
 76 |     net = RecurrentNet(var_net_spec, in_dim, out_dim)
 77 |     assert isinstance(net, nn.Module)
 78 |     assert hasattr(net, 'fc_model')
 79 |     assert hasattr(net, 'rnn_model')
 80 |     assert hasattr(net, 'model_tail')
 81 |     assert not hasattr(net, 'model_tails')
 82 |     assert net.rnn_model.bidirectional == bidirectional
 83 | 
 84 |     y = net.forward(x)
 85 |     assert y.shape == (batch_size, out_dim)
 86 | 
 87 | 
 88 | def test_no_fc():
 89 |     no_fc_net_spec = deepcopy(net_spec)
 90 |     no_fc_net_spec['fc_hid_layers'] = []
 91 |     net = RecurrentNet(no_fc_net_spec, in_dim, out_dim)
 92 |     assert isinstance(net, nn.Module)
 93 |     assert not hasattr(net, 'fc_model')
 94 |     assert hasattr(net, 'rnn_model')
 95 |     assert hasattr(net, 'model_tail')
 96 |     assert not hasattr(net, 'model_tails')
 97 | 
 98 |     y = net.forward(x)
 99 |     assert y.shape == (batch_size, out_dim)
100 | 
101 | 
102 | def test_multitails():
103 |     net = RecurrentNet(net_spec, in_dim, [3, 4])
104 |     assert isinstance(net, nn.Module)
105 |     assert hasattr(net, 'fc_model')
106 |     assert hasattr(net, 'rnn_model')
107 |     assert not hasattr(net, 'model_tail')
108 |     assert hasattr(net, 'model_tails')
109 |     assert len(net.model_tails) == 2
110 | 
111 |     y = net.forward(x)
112 |     assert len(y) == 2
113 |     assert y[0].shape == (batch_size, 3)
114 |     assert y[1].shape == (batch_size, 4)
115 | 


--------------------------------------------------------------------------------
/test/env/test_registration.py:
--------------------------------------------------------------------------------
1 | from slm_lab.env.registration import get_env_path
2 | import pytest
3 | 
4 | 
5 | @pytest.mark.skip(reason="Not implemented yet")
6 | def test_get_env_path():
7 |     assert 'node_modules/slm-env-3dball/build/3dball' in get_env_path(
8 |         '3dball')
9 | 


--------------------------------------------------------------------------------
/test/experiment/test_control.py:
--------------------------------------------------------------------------------
 1 | from copy import deepcopy
 2 | from flaky import flaky
 3 | from slm_lab.experiment import analysis
 4 | from slm_lab.experiment.control import Session, Trial, Experiment
 5 | from slm_lab.spec import spec_util
 6 | import pandas as pd
 7 | import pytest
 8 | 
 9 | 
10 | def test_session(test_spec):
11 |     spec_util.tick(test_spec, 'trial')
12 |     spec_util.tick(test_spec, 'session')
13 |     spec_util.save(test_spec, unit='trial')
14 |     session = Session(test_spec)
15 |     session_metrics = session.run()
16 |     assert isinstance(session_metrics, dict)
17 | 
18 | 
19 | def test_trial(test_spec):
20 |     spec_util.tick(test_spec, 'trial')
21 |     spec_util.save(test_spec, unit='trial')
22 |     trial = Trial(test_spec)
23 |     trial_metrics = trial.run()
24 |     assert isinstance(trial_metrics, dict)
25 | 
26 | 
27 | def test_trial_demo():
28 |     spec = spec_util.get('demo.json', 'dqn_cartpole')
29 |     spec_util.save(spec, unit='experiment')
30 |     spec = spec_util.override_spec(spec, 'test')
31 |     spec_util.tick(spec, 'trial')
32 |     trial_metrics = Trial(spec).run()
33 |     assert isinstance(trial_metrics, dict)
34 | 
35 | 
36 | @pytest.mark.skip(reason="Unstable")
37 | @flaky
38 | def test_demo_performance():
39 |     spec = spec_util.get('demo.json', 'dqn_cartpole')
40 |     spec_util.save(spec, unit='experiment')
41 |     for env_spec in spec['env']:
42 |         env_spec['max_frame'] = 2000
43 |     spec_util.tick(spec, 'trial')
44 |     trial = Trial(spec)
45 |     spec_util.tick(spec, 'session')
46 |     session = Session(spec)
47 |     session.run()
48 |     last_reward = session.agent.body.train_df.iloc[-1]['total_reward']
49 |     assert last_reward > 50, f'last_reward is too low: {last_reward}'
50 | 
51 | 
52 | @pytest.mark.skip(reason="Cant run on CI")
53 | def test_experiment():
54 |     spec = spec_util.get('demo.json', 'dqn_cartpole')
55 |     spec_util.save(spec, unit='experiment')
56 |     spec = spec_util.override_spec(spec, 'test')
57 |     spec_util.tick(spec, 'experiment')
58 |     experiment_df = Experiment(spec).run()
59 |     assert isinstance(experiment_df, pd.DataFrame)
60 | 


--------------------------------------------------------------------------------
/test/experiment/test_monitor.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | 
 3 | # TODO add these tests
 4 | 
 5 | 
 6 | def test_clock():
 7 |     return
 8 | 
 9 | 
10 | def test_body():
11 |     return
12 | 


--------------------------------------------------------------------------------
/test/fixture/lib/util/test_df.csv:
--------------------------------------------------------------------------------
1 | integer,letter,square
2 | 1,a,1
3 | 2,b,4
4 | 3,c,9
5 | 


--------------------------------------------------------------------------------
/test/fixture/lib/util/test_dict.json:
--------------------------------------------------------------------------------
1 | {
2 |   "a": 1,
3 |   "b": 2,
4 |   "c": 3
5 | }


--------------------------------------------------------------------------------
/test/fixture/lib/util/test_dict.yml:
--------------------------------------------------------------------------------
1 | {a: 1, b: 2, c: 3}
2 | 


--------------------------------------------------------------------------------
/test/fixture/lib/util/test_str.txt:
--------------------------------------------------------------------------------
1 | lorem ipsum dolor


--------------------------------------------------------------------------------
/test/lib/test_distribution.py:
--------------------------------------------------------------------------------
 1 | from flaky import flaky
 2 | from slm_lab.lib import distribution
 3 | import pytest
 4 | import torch
 5 | 
 6 | 
 7 | @pytest.mark.parametrize('pdparam_type', [
 8 |     'probs', 'logits'
 9 | ])
10 | def test_argmax(pdparam_type):
11 |     pdparam = torch.tensor([1.1, 10.0, 2.1])
12 |     # test both probs or logits
13 |     pd = distribution.Argmax(**{pdparam_type: pdparam})
14 |     for _ in range(10):
15 |         assert pd.sample().item() == 1
16 |     assert torch.equal(pd.probs, torch.tensor([0., 1., 0.]))
17 | 
18 | 
19 | @flaky
20 | @pytest.mark.parametrize('pdparam_type', [
21 |     'probs', 'logits'
22 | ])
23 | def test_gumbel_categorical(pdparam_type):
24 |     pdparam = torch.tensor([1.1, 10.0, 2.1])
25 |     pd = distribution.GumbelSoftmax(**{pdparam_type: pdparam, 'temperature': torch.tensor(1.0)})
26 |     for _ in range(10):
27 |         assert torch.is_tensor(pd.sample())
28 | 
29 | 
30 | @pytest.mark.parametrize('pdparam_type', [
31 |     'probs', 'logits'
32 | ])
33 | def test_multicategorical(pdparam_type):
34 |     pdparam0 = torch.tensor([10.0, 0.0, 0.0])
35 |     pdparam1 = torch.tensor([0.0, 10.0, 0.0])
36 |     pdparam2 = torch.tensor([0.0, 0.0, 10.0])
37 |     pdparams = [pdparam0, pdparam1, pdparam2]
38 |     # use a probs
39 |     pd = distribution.MultiCategorical(**{pdparam_type: pdparams})
40 |     assert isinstance(pd.probs, list)
41 |     # test probs only since if init from logits, probs will be close but not precise
42 |     if pdparam_type == 'probs':
43 |         assert torch.equal(pd.probs[0], torch.tensor([1., 0., 0.]))
44 |         assert torch.equal(pd.probs[1], torch.tensor([0., 1., 0.]))
45 |         assert torch.equal(pd.probs[2], torch.tensor([0., 0., 1.]))
46 |     for _ in range(10):
47 |         assert torch.equal(pd.sample(), torch.tensor([0, 1, 2]))
48 | 


--------------------------------------------------------------------------------
/test/lib/test_logger.py:
--------------------------------------------------------------------------------
 1 | from slm_lab.lib import logger
 2 | 
 3 | 
 4 | def test_logger(test_str):
 5 |     logger.critical(test_str)
 6 |     logger.debug(test_str)
 7 |     logger.error(test_str)
 8 |     logger.exception(test_str)
 9 |     logger.info(test_str)
10 |     logger.warning(test_str)
11 | 


--------------------------------------------------------------------------------
/test/lib/test_math_util.py:
--------------------------------------------------------------------------------
 1 | from slm_lab.lib import math_util
 2 | import numpy as np
 3 | import pytest
 4 | import torch
 5 | 
 6 | 
 7 | @pytest.mark.parametrize('base_shape', [
 8 |     [],  # scalar
 9 |     [2],  # vector
10 |     [4, 84, 84],  # image
11 | ])
12 | def test_venv_pack(base_shape):
13 |     batch_size = 5
14 |     num_envs = 4
15 |     batch_arr = torch.zeros([batch_size, num_envs] + base_shape)
16 |     unpacked_arr = math_util.venv_unpack(batch_arr)
17 |     packed_arr = math_util.venv_pack(unpacked_arr, num_envs)
18 |     assert list(packed_arr.shape) == [batch_size, num_envs] + base_shape
19 | 
20 | 
21 | @pytest.mark.parametrize('base_shape', [
22 |     [],  # scalar
23 |     [2],  # vector
24 |     [4, 84, 84],  # image
25 | ])
26 | def test_venv_unpack(base_shape):
27 |     batch_size = 5
28 |     num_envs = 4
29 |     batch_arr = torch.zeros([batch_size, num_envs] + base_shape)
30 |     unpacked_arr = math_util.venv_unpack(batch_arr)
31 |     assert list(unpacked_arr.shape) == [batch_size * num_envs] + base_shape
32 | 
33 | 
34 | def test_calc_gaes():
35 |     rewards = torch.tensor([1., 0., 1., 1., 0., 1., 1., 1.])
36 |     dones = torch.tensor([0., 0., 1., 1., 0., 0., 0., 0.])
37 |     v_preds = torch.tensor([1.1, 0.1, 1.1, 1.1, 0.1, 1.1, 1.1, 1.1, 1.1])
38 |     assert len(v_preds) == len(rewards) + 1  # includes last state
39 |     gamma = 0.99
40 |     lam = 0.95
41 |     gaes = math_util.calc_gaes(rewards, dones, v_preds, gamma, lam)
42 |     res = torch.tensor([0.84070045, 0.89495, -0.1, -0.1, 3.616724, 2.7939649, 1.9191545, 0.989])
43 |     # use allclose instead of equal to account for atol
44 |     assert torch.allclose(gaes, res)
45 | 
46 | 
47 | @pytest.mark.parametrize('start_val, end_val, start_step, end_step, step, correct', [
48 |     (0.1, 0.0, 0, 100, 0, 0.1),
49 |     (0.1, 0.0, 0, 100, 50, 0.05),
50 |     (0.1, 0.0, 0, 100, 100, 0.0),
51 |     (0.1, 0.0, 0, 100, 150, 0.0),
52 |     (0.1, 0.0, 100, 200, 50, 0.1),
53 |     (0.1, 0.0, 100, 200, 100, 0.1),
54 |     (0.1, 0.0, 100, 200, 150, 0.05),
55 |     (0.1, 0.0, 100, 200, 200, 0.0),
56 |     (0.1, 0.0, 100, 200, 250, 0.0),
57 | ])
58 | def test_linear_decay(start_val, end_val, start_step, end_step, step, correct):
59 |     assert math_util.linear_decay(start_val, end_val, start_step, end_step, step) == correct
60 | 
61 | 
62 | @pytest.mark.parametrize('start_val, end_val, start_step, end_step, step, correct', [
63 |     (1.0, 0.0, 0, 100, 0, 1.0),
64 |     (1.0, 0.0, 0, 100, 5, 0.9),
65 |     (1.0, 0.0, 0, 100, 10, 0.81),
66 |     (1.0, 0.0, 0, 100, 25, 0.59049),
67 |     (1.0, 0.0, 0, 100, 50, 0.3486784401),
68 |     (1.0, 0.0, 0, 100, 100, 0.0),
69 |     (1.0, 0.0, 0, 100, 150, 0.0),
70 |     (1.0, 0.0, 100, 200, 0, 1.0),
71 |     (1.0, 0.0, 100, 200, 50, 1.0),
72 |     (1.0, 0.0, 100, 200, 100, 1.0),
73 |     (1.0, 0.0, 100, 200, 105, 0.9),
74 |     (1.0, 0.0, 100, 200, 125, 0.59049),
75 |     (1.0, 0.0, 100, 200, 200, 0.0),
76 |     (1.0, 0.0, 100, 200, 250, 0.0),
77 | ])
78 | def test_rate_decay(start_val, end_val, start_step, end_step, step, correct):
79 |     np.testing.assert_almost_equal(math_util.rate_decay(start_val, end_val, start_step, end_step, step), correct)
80 | 
81 | def test_calc_q_value_logits():
82 |     state_value = torch.tensor([[1.], [2.], [3.]])
83 |     advantages = torch.tensor([
84 |         [0., 1.],
85 |         [1., 1.],
86 |         [1., 0.]])
87 |     result = torch.tensor([
88 |         [0.5, 1.5],
89 |         [2.0, 2.0],
90 |         [3.5, 2.5]])
91 |     out = math_util.calc_q_value_logits(state_value, advantages)
92 |     assert torch.allclose(out, result)
93 | 


--------------------------------------------------------------------------------
/test/spec/test_spec_util.py:
--------------------------------------------------------------------------------
 1 | from slm_lab.spec import spec_util
 2 | import numpy as np
 3 | import pytest
 4 | 
 5 | 
 6 | def test_check():
 7 |     spec = spec_util.get('experimental/misc/base.json', 'base_case_openai')
 8 |     assert spec_util.check(spec)
 9 | 
10 | 
11 | def test_check_all():
12 |     assert spec_util.check_all()
13 | 
14 | 
15 | def test_get():
16 |     spec = spec_util.get('experimental/misc/base.json', 'base_case_openai')
17 |     assert spec is not None
18 | 


--------------------------------------------------------------------------------