├── .gitignore
├── LICENSE
├── Neurips2022_rebuttal.ipynb
├── README.md
├── docker
├── README.md
├── atari
│ └── Dockerfile
├── core
│ └── Dockerfile
└── mujoco
│ └── Dockerfile
├── docs
├── README.md
├── agents.md
├── api_docs
│ └── python
│ │ ├── _toc.yaml
│ │ ├── dopamine.md
│ │ ├── dopamine
│ │ ├── _api_cache.json
│ │ ├── agents.md
│ │ ├── agents
│ │ │ ├── dqn.md
│ │ │ ├── dqn
│ │ │ │ ├── dqn_agent.md
│ │ │ │ └── dqn_agent
│ │ │ │ │ └── DQNAgent.md
│ │ │ ├── implicit_quantile.md
│ │ │ ├── implicit_quantile
│ │ │ │ ├── implicit_quantile_agent.md
│ │ │ │ └── implicit_quantile_agent
│ │ │ │ │ └── ImplicitQuantileAgent.md
│ │ │ ├── rainbow.md
│ │ │ └── rainbow
│ │ │ │ ├── rainbow_agent.md
│ │ │ │ └── rainbow_agent
│ │ │ │ ├── RainbowAgent.md
│ │ │ │ └── project_distribution.md
│ │ ├── colab.md
│ │ ├── colab
│ │ │ ├── utils.md
│ │ │ └── utils
│ │ │ │ ├── get_latest_file.md
│ │ │ │ ├── get_latest_iteration.md
│ │ │ │ ├── load_baselines.md
│ │ │ │ ├── load_statistics.md
│ │ │ │ ├── read_experiment.md
│ │ │ │ └── summarize_data.md
│ │ ├── discrete_domains.md
│ │ ├── discrete_domains
│ │ │ ├── atari_lib.md
│ │ │ ├── atari_lib
│ │ │ │ ├── AtariPreprocessing.md
│ │ │ │ └── create_atari_environment.md
│ │ │ ├── checkpointer.md
│ │ │ ├── checkpointer
│ │ │ │ └── Checkpointer.md
│ │ │ ├── gym_lib.md
│ │ │ ├── gym_lib
│ │ │ │ ├── GymPreprocessing.md
│ │ │ │ └── create_gym_environment.md
│ │ │ ├── iteration_statistics.md
│ │ │ ├── iteration_statistics
│ │ │ │ └── IterationStatistics.md
│ │ │ ├── logger.md
│ │ │ ├── logger
│ │ │ │ └── Logger.md
│ │ │ ├── run_experiment.md
│ │ │ ├── run_experiment
│ │ │ │ ├── Runner.md
│ │ │ │ ├── TrainRunner.md
│ │ │ │ ├── create_agent.md
│ │ │ │ └── create_runner.md
│ │ │ └── train.md
│ │ ├── jax.md
│ │ ├── jax
│ │ │ ├── agents.md
│ │ │ ├── agents
│ │ │ │ ├── dqn.md
│ │ │ │ ├── dqn
│ │ │ │ │ └── dqn_agent.md
│ │ │ │ ├── implicit_quantile.md
│ │ │ │ ├── implicit_quantile
│ │ │ │ │ └── implicit_quantile_agent.md
│ │ │ │ ├── quantile.md
│ │ │ │ ├── quantile
│ │ │ │ │ └── quantile_agent.md
│ │ │ │ ├── rainbow.md
│ │ │ │ └── rainbow
│ │ │ │ │ ├── rainbow_agent.md
│ │ │ │ │ └── rainbow_agent
│ │ │ │ │ └── project_distribution.md
│ │ │ └── networks.md
│ │ ├── replay_memory.md
│ │ └── replay_memory
│ │ │ ├── circular_replay_buffer.md
│ │ │ ├── circular_replay_buffer
│ │ │ ├── OutOfGraphReplayBuffer.md
│ │ │ └── WrappedReplayBuffer.md
│ │ │ ├── prioritized_replay_buffer.md
│ │ │ ├── prioritized_replay_buffer
│ │ │ ├── OutOfGraphPrioritizedReplayBuffer.md
│ │ │ └── WrappedPrioritizedReplayBuffer.md
│ │ │ └── sum_tree.md
│ │ └── index.md
└── changelist.md
├── dopamine
├── __init__.py
├── agents
│ ├── __init__.py
│ ├── dqn
│ │ ├── __init__.py
│ │ ├── configs
│ │ │ ├── dqn.gin
│ │ │ ├── dqn_acrobot.gin
│ │ │ ├── dqn_cartpole.gin
│ │ │ ├── dqn_icml.gin
│ │ │ ├── dqn_lunarlander.gin
│ │ │ ├── dqn_mountaincar.gin
│ │ │ ├── dqn_nature.gin
│ │ │ └── dqn_profiling.gin
│ │ └── dqn_agent.py
│ ├── implicit_quantile
│ │ ├── __init__.py
│ │ ├── configs
│ │ │ ├── implicit_quantile.gin
│ │ │ ├── implicit_quantile_icml.gin
│ │ │ └── implicit_quantile_profiling.gin
│ │ └── implicit_quantile_agent.py
│ └── rainbow
│ │ ├── __init__.py
│ │ ├── configs
│ │ ├── c51.gin
│ │ ├── c51_acrobot.gin
│ │ ├── c51_cartpole.gin
│ │ ├── c51_icml.gin
│ │ ├── c51_profiling.gin
│ │ ├── rainbow_aaai.gin
│ │ ├── rainbow_acrobot.gin
│ │ ├── rainbow_cartpole.gin
│ │ ├── rainbow_dqnpro.gin
│ │ ├── rainbow_original.gin
│ │ ├── rainbow_our_first_paper.gin
│ │ ├── rainbow_our_second_paper.gin
│ │ └── rainbow_profiling.gin
│ │ └── rainbow_agent.py
├── colab
│ ├── README.md
│ ├── __init__.py
│ ├── agent_visualizer.ipynb
│ ├── agents.ipynb
│ ├── cartpole.ipynb
│ ├── jax_agent_visualizer.ipynb
│ ├── load_statistics.ipynb
│ └── utils.py
├── continuous_domains
│ ├── __init__.py
│ ├── run_experiment.py
│ └── train.py
├── discrete_domains
│ ├── __init__.py
│ ├── atari_lib.py
│ ├── checkpointer.py
│ ├── gym_lib.py
│ ├── iteration_statistics.py
│ ├── legacy_networks.py
│ ├── logger.py
│ ├── run_experiment.py
│ └── train.py
├── jax
│ ├── README.md
│ ├── __init__.py
│ ├── agents
│ │ ├── __init__.py
│ │ ├── dqn
│ │ │ ├── __init__.py
│ │ │ ├── configs
│ │ │ │ ├── dqn.gin
│ │ │ │ ├── dqn_acrobot.gin
│ │ │ │ ├── dqn_cartpole.gin
│ │ │ │ ├── dqn_lunarlander.gin
│ │ │ │ ├── dqn_mountaincar.gin
│ │ │ │ └── dqn_profiling.gin
│ │ │ └── dqn_agent.py
│ │ ├── full_rainbow
│ │ │ ├── __init__.py
│ │ │ ├── configs
│ │ │ │ ├── full_rainbow.gin
│ │ │ │ └── full_rainbow_profiling.gin
│ │ │ └── full_rainbow_agent.py
│ │ ├── implicit_quantile
│ │ │ ├── __init__.py
│ │ │ ├── configs
│ │ │ │ ├── implicit_quantile.gin
│ │ │ │ └── implicit_quantile_profiling.gin
│ │ │ └── implicit_quantile_agent.py
│ │ ├── quantile
│ │ │ ├── __init__.py
│ │ │ ├── configs
│ │ │ │ ├── quantile.gin
│ │ │ │ └── quantile_profiling.gin
│ │ │ └── quantile_agent.py
│ │ ├── rainbow
│ │ │ ├── __init__.py
│ │ │ ├── configs
│ │ │ │ ├── c51.gin
│ │ │ │ ├── c51_acrobot.gin
│ │ │ │ ├── c51_cartpole.gin
│ │ │ │ ├── c51_profiling.gin
│ │ │ │ ├── rainbow.gin
│ │ │ │ ├── rainbow_acrobot.gin
│ │ │ │ ├── rainbow_cartpole.gin
│ │ │ │ └── rainbow_profiling.gin
│ │ │ └── rainbow_agent.py
│ │ └── sac
│ │ │ ├── __init__.py
│ │ │ ├── configs
│ │ │ └── sac.gin
│ │ │ └── sac_agent.py
│ ├── continuous_networks.py
│ ├── losses.py
│ └── networks.py
├── labs
│ ├── __init__.py
│ ├── atari_100k
│ │ ├── README.md
│ │ ├── __init__.py
│ │ ├── atari_100k_rainbow_agent.py
│ │ ├── configs
│ │ │ ├── DER.gin
│ │ │ ├── DrQ.gin
│ │ │ ├── DrQ_eps.gin
│ │ │ └── OTRainbow.gin
│ │ ├── eval_run_experiment.py
│ │ └── train.py
│ ├── environments
│ │ ├── __init__.py
│ │ └── minatar
│ │ │ ├── __init__.py
│ │ │ ├── dqn_asterix.gin
│ │ │ ├── dqn_breakout.gin
│ │ │ ├── dqn_freeway.gin
│ │ │ ├── dqn_seaquest.gin
│ │ │ ├── dqn_space_invaders.gin
│ │ │ ├── minatar_env.py
│ │ │ ├── quantile_asterix.gin
│ │ │ ├── quantile_breakout.gin
│ │ │ ├── quantile_freeway.gin
│ │ │ ├── quantile_seaquest.gin
│ │ │ ├── quantile_space_invaders.gin
│ │ │ ├── rainbow_asterix.gin
│ │ │ ├── rainbow_breakout.gin
│ │ │ ├── rainbow_freeway.gin
│ │ │ ├── rainbow_seaquest.gin
│ │ │ └── rainbow_space_invaders.gin
│ ├── sac_from_pixels
│ │ ├── continuous_networks.py
│ │ ├── deepmind_control_lib.py
│ │ └── sac_pixels.gin
│ └── tandem_dqn
│ │ ├── README.md
│ │ ├── requirements.txt
│ │ ├── run.sh
│ │ ├── run_experiment.py
│ │ ├── tandem_dqn_agent.py
│ │ └── train.py
├── replay_memory
│ ├── __init__.py
│ ├── circular_replay_buffer.py
│ ├── prioritized_replay_buffer.py
│ └── sum_tree.py
└── utils
│ ├── __init__.py
│ ├── agent_visualizer.py
│ ├── atari_plotter.py
│ ├── bar_plotter.py
│ ├── example_viz.py
│ ├── example_viz_lib.py
│ ├── line_plotter.py
│ ├── plotter.py
│ └── test_utils.py
├── extract_reward.py
├── plot_learning_curves.ipynb
├── requirements.txt
├── run_agents.ipynb
├── setup.py
└── tests
└── dopamine
├── agents
├── dqn
│ └── dqn_agent_test.py
├── implicit_quantile
│ └── implicit_quantile_agent_test.py
└── rainbow
│ └── rainbow_agent_test.py
├── atari_init_test.py
├── continuous_domains
└── run_experiment_test.py
├── discrete_domains
├── atari_lib_test.py
├── checkpointer_test.py
├── gym_lib_test.py
├── iteration_statistics_test.py
├── logger_test.py
└── run_experiment_test.py
├── jax
├── agents
│ ├── dqn
│ │ └── dqn_agent_test.py
│ ├── full_rainbow
│ │ └── full_rainbow_agent_test.py
│ ├── implicit_quantile
│ │ └── implicit_quantile_agent_test.py
│ ├── quantile
│ │ └── quantile_agent_test.py
│ ├── rainbow
│ │ └── rainbow_agent_test.py
│ └── sac
│ │ └── sac_agent_test.py
├── continuous_networks_test.py
├── losses_test.py
└── networks_test.py
├── labs
├── atari_100k
│ └── train_test.py
└── sac_from_pixels
│ ├── continuous_networks_test.py
│ └── deepmind_control_lib_test.py
├── replay_memory
├── circular_replay_buffer_test.py
├── prioritized_replay_buffer_test.py
└── sum_tree_test.py
├── tests
├── gin_config_test.py
├── integration_test.py
└── train_runner_integration_test.py
└── utils
└── agent_visualizer_test.py
/.gitignore:
--------------------------------------------------------------------------------
1 | tmp
2 | *results*
3 | *DS_*
4 | *images/*
5 | *.ipynb_checkpoints*
6 |
--------------------------------------------------------------------------------
/docker/atari/Dockerfile:
--------------------------------------------------------------------------------
1 | # Note: this Dockerfile expects that Atari ROMs retrieved following the
2 | # instructions from atari-py: https://github.com/openai/atari-py#roms.
3 | # It should specify a directory (e.g. ~/roms) that contains ROMS.rar.
4 | # It should be run from the rom directory.
5 |
6 | ARG base_image=dopamine/core
7 | FROM ${base_image}
8 |
9 | # Copy ROMs into the image.
10 | RUN mkdir /root/roms
11 | COPY ./Roms.rar /root/roms/
12 |
13 | RUN apt-get install rar unzip -y
14 | RUN rar x /root/roms/Roms.rar /root/roms/
15 |
16 | # Install ROMs with ale-py.
17 | RUN pip install atari_py ale-py
18 | RUN unzip /root/roms/ROMS.zip -d /root/roms
19 | RUN python -m atari_py.import_roms /root/roms
20 | RUN ale-import-roms /root/roms/ROMS
21 |
--------------------------------------------------------------------------------
/docker/core/Dockerfile:
--------------------------------------------------------------------------------
1 | # If you want to use a different version of CUDA, view the available
2 | # images here: https://hub.docker.com/r/nvidia/cuda
3 | # Note:
4 | # - Jax currently supports CUDA versions up to 11.3.
5 | # - Tensorflow required CUDA versions after 11.2.
6 | ARG cuda_docker_tag="11.2.2-cudnn8-devel-ubuntu20.04"
7 | FROM nvidia/cuda:${cuda_docker_tag}
8 |
9 | COPY . /root/dopamine/
10 |
11 | RUN apt-get update
12 | # tzdata is required below. To avoid hanging, install it first.
13 | RUN DEBIAN_FRONTEND="noninteractive" apt-get install tzdata -y
14 | RUN apt-get install git wget libgl1-mesa-glx -y
15 |
16 | # Install python3.8.
17 | RUN apt-get install software-properties-common -y
18 | RUN add-apt-repository ppa:deadsnakes/ppa -y
19 | RUN apt-get install python3.8 -y
20 |
21 | # Make python3.8 the default python.
22 | RUN rm /usr/bin/python3
23 | RUN ln -s /usr/bin/python3.8 /usr/bin/python3
24 | RUN ln -s /usr/bin/python3.8 /usr/bin/python
25 | RUN apt-get install python3-distutils -y
26 |
27 | # Install pip.
28 | RUN wget https://bootstrap.pypa.io/get-pip.py
29 | RUN python get-pip.py
30 | RUN rm get-pip.py
31 |
32 | # Install Dopamine dependencies.
33 | RUN pip install -r /root/dopamine/requirements.txt
34 |
35 | # Install JAX for GPU, overriding requirements.txt.
36 | RUN pip install --upgrade "jax[cuda111]" -f https://storage.googleapis.com/jax-releases/jax_releases.html
37 |
38 | WORKDIR /root/dopamine
39 |
--------------------------------------------------------------------------------
/docker/mujoco/Dockerfile:
--------------------------------------------------------------------------------
1 | ARG base_image=dopamine/core
2 | FROM ${base_image}
3 |
4 | # Create Mujoco subdir.
5 | RUN mkdir /root/.mujoco
6 | COPY mjkey.txt /root/.mujoco/mjkey.txt
7 |
8 | # Prerequisites
9 | RUN apt-get install \
10 | libosmesa6-dev \
11 | libgl1-mesa-glx \
12 | libglfw3 \
13 | libglew-dev \
14 | patchelf \
15 | gcc \
16 | python3.8-dev \
17 | unzip -y
18 |
19 | # Download and install mujoco.
20 | RUN wget https://www.roboti.us/download/mujoco200_linux.zip
21 | RUN unzip mujoco200_linux.zip
22 | RUN rm mujoco200_linux.zip
23 | RUN mv mujoco200_linux /root/.mujoco/mujoco200
24 |
25 | # Add LD_LIBRARY_PATH environment variable.
26 | ENV LD_LIBRARY_PATH "/root/.mujoco/mujoco200/bin:${LD_LIBRARY_PATH}"
27 | RUN echo 'export LD_LIBRARY_PATH=/root/.mujoco/mujoco200/bin:${LD_LIBRARY_PATH}' >> /etc/bash.bashrc
28 |
29 | # Finally, install mujoco_py.
30 | RUN pip install mujoco_py
31 |
--------------------------------------------------------------------------------
/docs/agents.md:
--------------------------------------------------------------------------------
1 | # DQN And Rainbow
2 |
3 |
4 | In the spirit of these principles, this first version focuses on supporting the
5 | state-of-the-art, single-GPU *Rainbow* agent ([Hessel et al., 2018][rainbow])
6 | applied to Atari 2600 game-playing ([Bellemare et al., 2013][ale]).
7 | Specifically, our Rainbow agent implements the three components identified as
8 | most important by [Hessel et al.][rainbow]:
9 |
10 | * n-step Bellman updates (see e.g. [Mnih et al., 2016][a3c])
11 | * Prioritized experience replay ([Schaul et al., 2015][prioritized_replay])
12 | * Distributional reinforcement learning ([C51; Bellemare et al., 2017][c51])
13 |
14 | For completeness, we also provide an implementation of DQN ([Mnih et al.,
15 | 2015][dqn]).
16 |
--------------------------------------------------------------------------------
/docs/api_docs/python/dopamine.md:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 | # Module: dopamine
7 |
8 |
9 |
10 |
18 |
19 | ## Modules
20 |
21 | [`agents`](./dopamine/agents.md) module
22 |
23 | [`colab`](./dopamine/colab.md) module
24 |
25 | [`discrete_domains`](./dopamine/discrete_domains.md) module
26 |
27 | [`jax`](./dopamine/jax.md) module
28 |
29 | [`replay_memory`](./dopamine/replay_memory.md) module
30 |
--------------------------------------------------------------------------------
/docs/api_docs/python/dopamine/agents.md:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 | # Module: dopamine.agents
7 |
8 |
9 |
10 |
18 |
19 | ## Modules
20 |
21 | [`dqn`](../dopamine/agents/dqn.md) module
22 |
23 | [`implicit_quantile`](../dopamine/agents/implicit_quantile.md) module
24 |
25 | [`rainbow`](../dopamine/agents/rainbow.md) module
26 |
--------------------------------------------------------------------------------
/docs/api_docs/python/dopamine/agents/dqn.md:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 | # Module: dopamine.agents.dqn
7 |
8 |
9 |
10 |
18 |
19 | ## Modules
20 |
21 | [`dqn_agent`](../../dopamine/agents/dqn/dqn_agent.md) module: Compact
22 | implementation of a DQN agent.
23 |
--------------------------------------------------------------------------------
/docs/api_docs/python/dopamine/agents/dqn/dqn_agent.md:
--------------------------------------------------------------------------------
1 | description: Compact implementation of a DQN agent.
2 |
3 |
4 |
5 |
6 |
7 |
8 | # Module: dopamine.agents.dqn.dqn_agent
9 |
10 |
11 |
12 |
20 |
21 | Compact implementation of a DQN agent.
22 |
23 | ## Classes
24 |
25 | [`class DQNAgent`](../../../dopamine/agents/dqn/dqn_agent/DQNAgent.md): An
26 | implementation of the DQN agent.
27 |
--------------------------------------------------------------------------------
/docs/api_docs/python/dopamine/agents/dqn/dqn_agent/DQNAgent.md:
--------------------------------------------------------------------------------
1 | description: An implementation of the DQN agent.
2 |
3 |
4 |
5 |
6 |
7 |
8 | # dopamine.agents.dqn.dqn_agent.DQNAgent
9 |
10 |
11 |
12 |
20 |
21 | An implementation of the DQN agent.
22 |
23 |
24 |
--------------------------------------------------------------------------------
/docs/api_docs/python/dopamine/agents/implicit_quantile.md:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 | # Module: dopamine.agents.implicit_quantile
7 |
8 |
9 |
10 |
18 |
19 | ## Modules
20 |
21 | [`implicit_quantile_agent`](../../dopamine/agents/implicit_quantile/implicit_quantile_agent.md)
22 | module: The implicit quantile networks (IQN) agent.
23 |
--------------------------------------------------------------------------------
/docs/api_docs/python/dopamine/agents/implicit_quantile/implicit_quantile_agent.md:
--------------------------------------------------------------------------------
1 | description: The implicit quantile networks (IQN) agent.
2 |
3 |
4 |
5 |
6 |
7 |
8 | # Module: dopamine.agents.implicit_quantile.implicit_quantile_agent
9 |
10 |
11 |
12 |
20 |
21 | The implicit quantile networks (IQN) agent.
22 |
23 | The agent follows the description given in "Implicit Quantile Networks for
24 | Distributional RL" (Dabney et. al, 2018).
25 |
26 | ## Classes
27 |
28 | [`class ImplicitQuantileAgent`](../../../dopamine/agents/implicit_quantile/implicit_quantile_agent/ImplicitQuantileAgent.md):
29 | An extension of Rainbow to perform implicit quantile regression.
30 |
--------------------------------------------------------------------------------
/docs/api_docs/python/dopamine/agents/implicit_quantile/implicit_quantile_agent/ImplicitQuantileAgent.md:
--------------------------------------------------------------------------------
1 | description: An extension of Rainbow to perform implicit quantile regression.
2 |
3 |
4 |
5 |
6 |
7 |
8 | # dopamine.agents.implicit_quantile.implicit_quantile_agent.ImplicitQuantileAgent
9 |
10 |
11 |
12 |
20 |
21 | An extension of Rainbow to perform implicit quantile regression.
22 |
23 | Inherits From:
24 | [`RainbowAgent`](../../../../dopamine/agents/rainbow/rainbow_agent/RainbowAgent.md)
25 |
26 |
27 |
--------------------------------------------------------------------------------
/docs/api_docs/python/dopamine/agents/rainbow.md:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 | # Module: dopamine.agents.rainbow
7 |
8 |
9 |
10 |
18 |
19 | ## Modules
20 |
21 | [`rainbow_agent`](../../dopamine/agents/rainbow/rainbow_agent.md) module:
22 | Compact implementation of a simplified Rainbow agent.
23 |
--------------------------------------------------------------------------------
/docs/api_docs/python/dopamine/agents/rainbow/rainbow_agent.md:
--------------------------------------------------------------------------------
1 | description: Compact implementation of a simplified Rainbow agent.
2 |
3 |
4 |
5 |
6 |
7 |
8 | # Module: dopamine.agents.rainbow.rainbow_agent
9 |
10 |
11 |
12 |
20 |
21 | Compact implementation of a simplified Rainbow agent.
22 |
23 | Specifically, we implement the following components from Rainbow:
24 |
25 | * n-step updates;
26 | * prioritized replay; and
27 | * distributional RL.
28 |
29 | These three components were found to significantly impact the performance of the
30 | Atari game-playing agent.
31 |
32 | Furthermore, our implementation does away with some minor hyperparameter
33 | choices. Specifically, we
34 |
35 | * keep the beta exponent fixed at beta=0.5, rather than increase it linearly;
36 | * remove the alpha parameter, which was set to alpha=0.5 throughout the paper.
37 |
38 | Details in "Rainbow: Combining Improvements in Deep Reinforcement Learning" by
39 | Hessel et al. (2018).
40 |
41 | ## Classes
42 |
43 | [`class RainbowAgent`](../../../dopamine/agents/rainbow/rainbow_agent/RainbowAgent.md):
44 | A compact implementation of a simplified Rainbow agent.
45 |
46 | ## Functions
47 |
48 | [`project_distribution(...)`](../../../dopamine/agents/rainbow/rainbow_agent/project_distribution.md):
49 | Projects a batch of (support, weights) onto target_support.
50 |
--------------------------------------------------------------------------------
/docs/api_docs/python/dopamine/agents/rainbow/rainbow_agent/RainbowAgent.md:
--------------------------------------------------------------------------------
1 | description: A compact implementation of a simplified Rainbow agent.
2 |
3 |
4 |
5 |
6 |
7 |
8 | # dopamine.agents.rainbow.rainbow_agent.RainbowAgent
9 |
10 |
11 |
12 |
20 |
21 | A compact implementation of a simplified Rainbow agent.
22 |
23 | Inherits From:
24 | [`DQNAgent`](../../../../dopamine/agents/dqn/dqn_agent/DQNAgent.md)
25 |
26 |
27 |
--------------------------------------------------------------------------------
/docs/api_docs/python/dopamine/colab.md:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 | # Module: dopamine.colab
7 |
8 |
9 |
10 |
18 |
19 | ## Modules
20 |
21 | [`utils`](../dopamine/colab/utils.md) module: This provides utilities for
22 | dealing with Dopamine data.
23 |
--------------------------------------------------------------------------------
/docs/api_docs/python/dopamine/colab/utils.md:
--------------------------------------------------------------------------------
1 | description: This provides utilities for dealing with Dopamine data.
2 |
3 |
4 |
5 |
6 |
7 |
8 | # Module: dopamine.colab.utils
9 |
10 |
11 |
12 |
20 |
21 | This provides utilities for dealing with Dopamine data.
22 |
23 | See: dopamine/common/logger.py .
24 |
25 | ## Functions
26 |
27 | [`get_latest_file(...)`](../../dopamine/colab/utils/get_latest_file.md): Return
28 | the file named 'path_[0-9]*' with the largest such number.
29 |
30 | [`get_latest_iteration(...)`](../../dopamine/colab/utils/get_latest_iteration.md):
31 | Return the largest iteration number corresponding to the given path.
32 |
33 | [`load_baselines(...)`](../../dopamine/colab/utils/load_baselines.md): Reads in
34 | the baseline experimental data from a specified base directory.
35 |
36 | [`load_statistics(...)`](../../dopamine/colab/utils/load_statistics.md): Reads
37 | in a statistics object from log_path.
38 |
39 | [`read_experiment(...)`](../../dopamine/colab/utils/read_experiment.md): Reads
40 | in a set of experimental results from log_path.
41 |
42 | [`summarize_data(...)`](../../dopamine/colab/utils/summarize_data.md): Processes
43 | log data into a per-iteration summary.
44 |
--------------------------------------------------------------------------------
/docs/api_docs/python/dopamine/colab/utils/get_latest_file.md:
--------------------------------------------------------------------------------
1 | description: Return the file named 'path_[0-9]*' with the largest such number.
2 |
3 |
4 |
5 |
6 |
7 |
8 | # dopamine.colab.utils.get_latest_file
9 |
10 |
11 |
12 |
20 |
21 | Return the file named 'path_[0-9]*' with the largest such number.
22 |
23 |
24 | dopamine.colab.utils.get_latest_file(
25 | path
26 | )
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 | Args |
35 |
36 |
37 |
38 | `path`
39 | |
40 |
41 | The base path (including directory and base name) to search.
42 | |
43 |
44 |
45 |
46 |
47 |
48 |
49 |
50 | Returns |
51 |
52 |
53 | The latest file (in terms of given numbers).
54 | |
55 |
56 |
57 |
58 |
--------------------------------------------------------------------------------
/docs/api_docs/python/dopamine/colab/utils/get_latest_iteration.md:
--------------------------------------------------------------------------------
1 | description: Return the largest iteration number corresponding to the given
2 | path.
3 |
4 |
5 |
6 |
7 |
8 |
9 | # dopamine.colab.utils.get_latest_iteration
10 |
11 |
12 |
13 |
21 |
22 | Return the largest iteration number corresponding to the given path.
23 |
24 |
25 | dopamine.colab.utils.get_latest_iteration(
26 | path
27 | )
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 | Args |
36 |
37 |
38 |
39 | `path`
40 | |
41 |
42 | The base path (including directory and base name) to search.
43 | |
44 |
45 |
46 |
47 |
48 |
49 |
50 |
51 | Returns |
52 |
53 |
54 | The latest iteration number.
55 | |
56 |
57 |
58 |
59 |
60 |
61 |
62 |
63 |
64 | Raises |
65 |
66 |
67 |
68 | `ValueError`
69 | |
70 |
71 | if there is not available log data at the given path.
72 | |
73 |
74 |
75 |
--------------------------------------------------------------------------------
/docs/api_docs/python/dopamine/colab/utils/load_baselines.md:
--------------------------------------------------------------------------------
1 | description: Reads in the baseline experimental data from a specified base
2 | directory.
3 |
4 |
5 |
6 |
7 |
8 |
9 | # dopamine.colab.utils.load_baselines
10 |
11 |
12 |
13 |
21 |
22 | Reads in the baseline experimental data from a specified base directory.
23 |
24 |
25 | dopamine.colab.utils.load_baselines(
26 | base_dir, verbose=False
27 | )
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 | Args |
36 |
37 |
38 |
39 | `base_dir`
40 | |
41 |
42 | string, base directory where to read data from.
43 | |
44 |
45 |
46 | `verbose`
47 | |
48 |
49 | bool, whether to print warning messages.
50 | |
51 |
52 |
53 |
54 |
55 |
56 |
57 |
58 | Returns |
59 |
60 |
61 | A dict containing pandas DataFrames for all available agents and games.
62 | |
63 |
64 |
65 |
66 |
--------------------------------------------------------------------------------
/docs/api_docs/python/dopamine/colab/utils/summarize_data.md:
--------------------------------------------------------------------------------
1 | description: Processes log data into a per-iteration summary.
2 |
3 |
4 |
5 |
6 |
7 |
8 | # dopamine.colab.utils.summarize_data
9 |
10 |
11 |
12 |
20 |
21 | Processes log data into a per-iteration summary.
22 |
23 |
24 | dopamine.colab.utils.summarize_data(
25 | data, summary_keys
26 | )
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 | Args |
35 |
36 |
37 |
38 | `data`
39 | |
40 |
41 | Dictionary loaded by load_statistics describing the data. This
42 | dictionary has keys iteration_0, iteration_1, ... describing per-iteration
43 | data.
44 | |
45 |
46 |
47 | `summary_keys`
48 | |
49 |
50 | List of per-iteration data to be summarized.
51 | |
52 |
53 |
54 |
55 | #### Example:
56 |
57 | data = load_statistics(...) summarize_data(data, ['train_episode_returns',
58 | 'eval_episode_returns'])
59 |
60 |
61 |
62 |
63 |
64 | Returns |
65 |
66 |
67 | A dictionary mapping each key in returns_keys to a per-iteration summary.
68 | |
69 |
70 |
71 |
72 |
--------------------------------------------------------------------------------
/docs/api_docs/python/dopamine/discrete_domains.md:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 | # Module: dopamine.discrete_domains
7 |
8 |
9 |
10 |
18 |
19 | ## Modules
20 |
21 | [`atari_lib`](../dopamine/discrete_domains/atari_lib.md) module: Atari-specific
22 | utilities including Atari-specific network architectures.
23 |
24 | [`checkpointer`](../dopamine/discrete_domains/checkpointer.md) module: A
25 | checkpointing mechanism for Dopamine agents.
26 |
27 | [`gym_lib`](../dopamine/discrete_domains/gym_lib.md) module: Gym-specific
28 | (non-Atari) utilities.
29 |
30 | [`iteration_statistics`](../dopamine/discrete_domains/iteration_statistics.md)
31 | module: A class for storing iteration-specific metrics.
32 |
33 | [`logger`](../dopamine/discrete_domains/logger.md) module: A lightweight logging
34 | mechanism for dopamine agents.
35 |
36 | [`run_experiment`](../dopamine/discrete_domains/run_experiment.md) module:
37 | Module defining classes and helper methods for general agents.
38 |
39 | [`train`](../dopamine/discrete_domains/train.md) module: The entry point for
40 | running a Dopamine agent.
41 |
--------------------------------------------------------------------------------
/docs/api_docs/python/dopamine/discrete_domains/atari_lib/AtariPreprocessing.md:
--------------------------------------------------------------------------------
1 | description: A class implementing image preprocessing for Atari 2600 agents.
2 |
3 |
4 |
5 |
6 |
7 |
8 | # dopamine.discrete_domains.atari_lib.AtariPreprocessing
9 |
10 |
11 |
12 |
20 |
21 | A class implementing image preprocessing for Atari 2600 agents.
22 |
23 |
24 |
25 | Specifically, this provides the following subset from the JAIR paper (Bellemare
26 | et al., 2013) and Nature DQN paper (Mnih et al., 2015):
27 |
28 | * Frame skipping (defaults to 4).
29 | * Terminal signal when a life is lost (off by default).
30 | * Grayscale and max-pooling of the last two frames.
31 | * Downsample the screen to a square image (defaults to 84x84).
32 |
33 | More generally, this class follows the preprocessing guidelines set down in
34 | Machado et al. (2018), "Revisiting the Arcade Learning Environment: Evaluation
35 | Protocols and Open Problems for General Agents".
36 |
--------------------------------------------------------------------------------
/docs/api_docs/python/dopamine/discrete_domains/checkpointer.md:
--------------------------------------------------------------------------------
1 | description: A checkpointing mechanism for Dopamine agents.
2 |
3 |
4 |
5 |
6 |
7 |
8 | # Module: dopamine.discrete_domains.checkpointer
9 |
10 |
11 |
12 |
20 |
21 | A checkpointing mechanism for Dopamine agents.
22 |
23 | This Checkpointer expects a base directory where checkpoints for different
24 | iterations are stored. Specifically, Checkpointer.save_checkpoint() takes in as
25 | input a dictionary 'data' to be pickled to disk. At each iteration, we write a
26 | file called 'cpkt.#', where # is the iteration number. The Checkpointer also
27 | cleans up old files, maintaining up to the CHECKPOINT_DURATION most recent
28 | iterations.
29 |
30 | The Checkpointer writes a sentinel file to indicate that checkpointing was
31 | globally successful. This means that all other checkpointing activities (saving
32 | the Tensorflow graph, the replay buffer) should be performed *prior* to calling
33 | Checkpointer.save_checkpoint(). This allows the Checkpointer to detect
34 | incomplete checkpoints.
35 |
36 | #### Example
37 |
38 | After running 10 iterations (numbered 0...9) with base_directory='/checkpoint',
39 | the following files will exist: `/checkpoint/cpkt.6 /checkpoint/cpkt.7
40 | /checkpoint/cpkt.8 /checkpoint/cpkt.9 /checkpoint/sentinel_checkpoint_complete.6
41 | /checkpoint/sentinel_checkpoint_complete.7
42 | /checkpoint/sentinel_checkpoint_complete.8
43 | /checkpoint/sentinel_checkpoint_complete.9`
44 |
45 | ## Classes
46 |
47 | [`class Checkpointer`](../../dopamine/discrete_domains/checkpointer/Checkpointer.md):
48 | Class for managing checkpoints for Dopamine agents.
49 |
--------------------------------------------------------------------------------
/docs/api_docs/python/dopamine/discrete_domains/checkpointer/Checkpointer.md:
--------------------------------------------------------------------------------
1 | description: Class for managing checkpoints for Dopamine agents.
2 |
3 |
4 |
5 |
6 |
7 |
8 | # dopamine.discrete_domains.checkpointer.Checkpointer
9 |
10 |
11 |
12 |
20 |
21 | Class for managing checkpoints for Dopamine agents.
22 |
23 |
24 |
--------------------------------------------------------------------------------
/docs/api_docs/python/dopamine/discrete_domains/gym_lib.md:
--------------------------------------------------------------------------------
1 | description: Gym-specific (non-Atari) utilities.
2 |
3 |
4 |
5 |
6 |
7 |
8 | # Module: dopamine.discrete_domains.gym_lib
9 |
10 |
11 |
12 |
20 |
21 | Gym-specific (non-Atari) utilities.
22 |
23 | Some network specifications specific to certain Gym environments are provided
24 | here.
25 |
26 | Includes a wrapper class around Gym environments. This class makes general Gym
27 | environments conformant with the API Dopamine is expecting.
28 |
29 | ## Classes
30 |
31 | [`class GymPreprocessing`](../../dopamine/discrete_domains/gym_lib/GymPreprocessing.md):
32 | A Wrapper class around Gym environments.
33 |
34 | ## Functions
35 |
36 | [`create_gym_environment(...)`](../../dopamine/discrete_domains/gym_lib/create_gym_environment.md):
37 | Wraps a Gym environment with some basic preprocessing.
38 |
--------------------------------------------------------------------------------
/docs/api_docs/python/dopamine/discrete_domains/gym_lib/GymPreprocessing.md:
--------------------------------------------------------------------------------
1 | description: A Wrapper class around Gym environments.
2 |
3 |
4 |
5 |
6 |
7 |
8 | # dopamine.discrete_domains.gym_lib.GymPreprocessing
9 |
10 |
11 |
12 |
20 |
21 | A Wrapper class around Gym environments.
22 |
23 |
24 |
--------------------------------------------------------------------------------
/docs/api_docs/python/dopamine/discrete_domains/gym_lib/create_gym_environment.md:
--------------------------------------------------------------------------------
1 | description: Wraps a Gym environment with some basic preprocessing.
2 |
3 |
4 |
5 |
6 |
7 |
8 | # dopamine.discrete_domains.gym_lib.create_gym_environment
9 |
10 |
11 |
12 |
20 |
21 | Wraps a Gym environment with some basic preprocessing.
22 |
23 |
24 | dopamine.discrete_domains.gym_lib.create_gym_environment(
25 | environment_name=None, version='v0'
26 | )
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 | Args |
35 |
36 |
37 |
38 | `environment_name`
39 | |
40 |
41 | str, the name of the environment to run.
42 | |
43 |
44 |
45 | `version`
46 | |
47 |
48 | str, version of the environment to run.
49 | |
50 |
51 |
52 |
53 |
54 |
55 |
56 |
57 | Returns |
58 |
59 |
60 | A Gym environment with some standard preprocessing.
61 | |
62 |
63 |
64 |
65 |
--------------------------------------------------------------------------------
/docs/api_docs/python/dopamine/discrete_domains/iteration_statistics.md:
--------------------------------------------------------------------------------
1 | description: A class for storing iteration-specific metrics.
2 |
3 |
4 |
5 |
6 |
7 |
8 | # Module: dopamine.discrete_domains.iteration_statistics
9 |
10 |
11 |
12 |
20 |
21 | A class for storing iteration-specific metrics.
22 |
23 | ## Classes
24 |
25 | [`class IterationStatistics`](../../dopamine/discrete_domains/iteration_statistics/IterationStatistics.md):
26 | A class for storing iteration-specific metrics.
27 |
--------------------------------------------------------------------------------
/docs/api_docs/python/dopamine/discrete_domains/iteration_statistics/IterationStatistics.md:
--------------------------------------------------------------------------------
1 | description: A class for storing iteration-specific metrics.
2 |
3 |
4 |
5 |
6 |
7 |
8 | # dopamine.discrete_domains.iteration_statistics.IterationStatistics
9 |
10 |
11 |
12 |
20 |
21 | A class for storing iteration-specific metrics.
22 |
23 |
24 |
25 | The internal format is as follows: we maintain a mapping from keys to lists.
26 | Each list contains all the values corresponding to the given key.
27 |
28 | For example, self.data_lists['train_episode_returns'] might contain the
29 | per-episode returns achieved during this iteration.
30 |
31 |
32 |
33 |
34 |
35 | Attributes |
36 |
37 |
38 |
39 | `data_lists`
40 | |
41 |
42 | dict mapping each metric_name (str) to a list of said metric
43 | across episodes.
44 | |
45 |
46 |
47 |
--------------------------------------------------------------------------------
/docs/api_docs/python/dopamine/discrete_domains/logger.md:
--------------------------------------------------------------------------------
1 | description: A lightweight logging mechanism for dopamine agents.
2 |
3 |
4 |
5 |
6 |
7 |
8 | # Module: dopamine.discrete_domains.logger
9 |
10 |
11 |
12 |
20 |
21 | A lightweight logging mechanism for dopamine agents.
22 |
23 | ## Classes
24 |
25 | [`class Logger`](../../dopamine/discrete_domains/logger/Logger.md): Class for
26 | maintaining a dictionary of data to log.
27 |
--------------------------------------------------------------------------------
/docs/api_docs/python/dopamine/discrete_domains/logger/Logger.md:
--------------------------------------------------------------------------------
1 | description: Class for maintaining a dictionary of data to log.
2 |
3 |
4 |
5 |
6 |
7 |
8 | # dopamine.discrete_domains.logger.Logger
9 |
10 |
11 |
12 |
20 |
21 | Class for maintaining a dictionary of data to log.
22 |
23 |
24 |
--------------------------------------------------------------------------------
/docs/api_docs/python/dopamine/discrete_domains/run_experiment.md:
--------------------------------------------------------------------------------
1 | description: Module defining classes and helper methods for general agents.
2 |
3 |
4 |
5 |
6 |
7 |
8 | # Module: dopamine.discrete_domains.run_experiment
9 |
10 |
11 |
12 |
20 |
21 | Module defining classes and helper methods for general agents.
22 |
23 | ## Classes
24 |
25 | [`class Runner`](../../dopamine/discrete_domains/run_experiment/Runner.md):
26 | Object that handles running Dopamine experiments.
27 |
28 | [`class TrainRunner`](../../dopamine/discrete_domains/run_experiment/TrainRunner.md):
29 | Object that handles running experiments.
30 |
31 | ## Functions
32 |
33 | [`create_agent(...)`](../../dopamine/discrete_domains/run_experiment/create_agent.md):
34 | Creates an agent.
35 |
36 | [`create_runner(...)`](../../dopamine/discrete_domains/run_experiment/create_runner.md):
37 | Creates an experiment Runner.
38 |
--------------------------------------------------------------------------------
/docs/api_docs/python/dopamine/discrete_domains/run_experiment/Runner.md:
--------------------------------------------------------------------------------
1 | description: Object that handles running Dopamine experiments.
2 |
3 |
4 |
5 |
6 |
7 |
8 | # dopamine.discrete_domains.run_experiment.Runner
9 |
10 |
11 |
12 |
20 |
21 | Object that handles running Dopamine experiments.
22 |
23 |
24 |
25 | Here we use the term 'experiment' to mean simulating interactions between the
26 | agent and the environment and reporting some statistics pertaining to these
27 | interactions.
28 |
29 | A simple scenario to train a DQN agent is as follows:
30 |
31 | ```python
32 | import dopamine.discrete_domains.atari_lib
33 | base_dir = '/tmp/simple_example'
34 | def create_agent(sess, environment):
35 | return dqn_agent.DQNAgent(sess, num_actions=environment.action_space.n)
36 | runner = Runner(base_dir, create_agent, atari_lib.create_atari_environment)
37 | runner.run()
38 | ```
39 |
--------------------------------------------------------------------------------
/docs/api_docs/python/dopamine/discrete_domains/run_experiment/TrainRunner.md:
--------------------------------------------------------------------------------
1 | description: Object that handles running experiments.
2 |
3 |
4 |
5 |
6 |
7 |
8 | # dopamine.discrete_domains.run_experiment.TrainRunner
9 |
10 |
11 |
12 |
20 |
21 | Object that handles running experiments.
22 |
23 | Inherits From:
24 | [`Runner`](../../../dopamine/discrete_domains/run_experiment/Runner.md)
25 |
26 |
27 |
28 | The `TrainRunner` differs from the base `Runner` class in that it does not the
29 | evaluation phase. Checkpointing and logging for the train phase are preserved as
30 | before.
31 |
--------------------------------------------------------------------------------
/docs/api_docs/python/dopamine/discrete_domains/run_experiment/create_runner.md:
--------------------------------------------------------------------------------
1 | description: Creates an experiment Runner.
2 |
3 |
4 |
5 |
6 |
7 |
8 | # dopamine.discrete_domains.run_experiment.create_runner
9 |
10 |
11 |
12 |
20 |
21 | Creates an experiment Runner.
22 |
23 |
24 | dopamine.discrete_domains.run_experiment.create_runner(
25 | base_dir, schedule='continuous_train_and_eval'
26 | )
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 | Args |
35 |
36 |
37 |
38 | `base_dir`
39 | |
40 |
41 | str, base directory for hosting all subdirectories.
42 | |
43 |
44 |
45 | `schedule`
46 | |
47 |
48 | string, which type of Runner to use.
49 | |
50 |
51 |
52 |
53 |
54 |
55 |
56 |
57 | Returns |
58 |
59 |
60 |
61 | `runner`
62 | |
63 |
64 | A `Runner` like object.
65 | |
66 |
67 |
68 |
69 |
70 |
71 |
72 |
73 | Raises |
74 |
75 |
76 |
77 | `ValueError`
78 | |
79 |
80 | When an unknown schedule is encountered.
81 | |
82 |
83 |
84 |
--------------------------------------------------------------------------------
/docs/api_docs/python/dopamine/discrete_domains/train.md:
--------------------------------------------------------------------------------
1 | description: The entry point for running a Dopamine agent.
2 |
3 |
4 |
5 |
6 |
7 |
8 | # Module: dopamine.discrete_domains.train
9 |
10 |
11 |
12 |
20 |
21 | The entry point for running a Dopamine agent.
22 |
23 |
--------------------------------------------------------------------------------
/docs/api_docs/python/dopamine/jax.md:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 | # Module: dopamine.jax
7 |
8 |
9 |
10 |
18 |
19 | ## Modules
20 |
21 | [`agents`](../dopamine/jax/agents.md) module
22 |
23 | [`networks`](../dopamine/jax/networks.md) module: Various networks for Jax
24 | Dopamine agents.
25 |
--------------------------------------------------------------------------------
/docs/api_docs/python/dopamine/jax/agents.md:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 | # Module: dopamine.jax.agents
7 |
8 |
9 |
10 |
18 |
19 | ## Modules
20 |
21 | [`dqn`](../../dopamine/jax/agents/dqn.md) module
22 |
23 | [`implicit_quantile`](../../dopamine/jax/agents/implicit_quantile.md) module
24 |
25 | [`quantile`](../../dopamine/jax/agents/quantile.md) module
26 |
27 | [`rainbow`](../../dopamine/jax/agents/rainbow.md) module
28 |
--------------------------------------------------------------------------------
/docs/api_docs/python/dopamine/jax/agents/dqn.md:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 | # Module: dopamine.jax.agents.dqn
7 |
8 |
9 |
10 |
18 |
19 | ## Modules
20 |
21 | [`dqn_agent`](../../../dopamine/jax/agents/dqn/dqn_agent.md) module: Compact
22 | implementation of a DQN agent in JAx.
23 |
--------------------------------------------------------------------------------
/docs/api_docs/python/dopamine/jax/agents/dqn/dqn_agent.md:
--------------------------------------------------------------------------------
1 | description: Compact implementation of a DQN agent in JAx.
2 |
3 |
4 |
5 |
6 |
7 |
8 | # Module: dopamine.jax.agents.dqn.dqn_agent
9 |
10 |
11 |
12 |
20 |
21 | Compact implementation of a DQN agent in JAx.
22 |
--------------------------------------------------------------------------------
/docs/api_docs/python/dopamine/jax/agents/implicit_quantile.md:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 | # Module: dopamine.jax.agents.implicit_quantile
7 |
8 |
9 |
10 |
18 |
19 | ## Modules
20 |
21 | [`implicit_quantile_agent`](../../../dopamine/jax/agents/implicit_quantile/implicit_quantile_agent.md)
22 | module: The implicit quantile networks (IQN) agent.
23 |
--------------------------------------------------------------------------------
/docs/api_docs/python/dopamine/jax/agents/implicit_quantile/implicit_quantile_agent.md:
--------------------------------------------------------------------------------
1 | description: The implicit quantile networks (IQN) agent.
2 |
3 |
4 |
5 |
6 |
7 |
8 | # Module: dopamine.jax.agents.implicit_quantile.implicit_quantile_agent
9 |
10 |
11 |
12 |
20 |
21 | The implicit quantile networks (IQN) agent.
22 |
23 | The agent follows the description given in "Implicit Quantile Networks for
24 | Distributional RL" (Dabney et. al, 2018).
25 |
--------------------------------------------------------------------------------
/docs/api_docs/python/dopamine/jax/agents/quantile.md:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 | # Module: dopamine.jax.agents.quantile
7 |
8 |
9 |
10 |
18 |
19 | ## Modules
20 |
21 | [`quantile_agent`](../../../dopamine/jax/agents/quantile/quantile_agent.md)
22 | module: An extension of Rainbow to perform quantile regression.
23 |
--------------------------------------------------------------------------------
/docs/api_docs/python/dopamine/jax/agents/quantile/quantile_agent.md:
--------------------------------------------------------------------------------
1 | description: An extension of Rainbow to perform quantile regression.
2 |
3 |
4 |
5 |
6 |
7 |
8 | # Module: dopamine.jax.agents.quantile.quantile_agent
9 |
10 |
11 |
12 |
20 |
21 | An extension of Rainbow to perform quantile regression.
22 |
23 | This loss is computed as in "Distributional Reinforcement Learning with Quantile
24 | Regression" - Dabney et. al, 2017"
25 |
--------------------------------------------------------------------------------
/docs/api_docs/python/dopamine/jax/agents/rainbow.md:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 | # Module: dopamine.jax.agents.rainbow
7 |
8 |
9 |
10 |
18 |
19 | ## Modules
20 |
21 | [`rainbow_agent`](../../../dopamine/jax/agents/rainbow/rainbow_agent.md) module:
22 | Compact implementation of a simplified Rainbow agent in Jax.
23 |
--------------------------------------------------------------------------------
/docs/api_docs/python/dopamine/jax/agents/rainbow/rainbow_agent.md:
--------------------------------------------------------------------------------
1 | description: Compact implementation of a simplified Rainbow agent in Jax.
2 |
3 |
4 |
5 |
6 |
7 |
8 | # Module: dopamine.jax.agents.rainbow.rainbow_agent
9 |
10 |
11 |
12 |
20 |
21 | Compact implementation of a simplified Rainbow agent in Jax.
22 |
23 | Specifically, we implement the following components from Rainbow:
24 |
25 | * n-step updates;
26 | * prioritized replay; and
27 | * distributional RL.
28 |
29 | These three components were found to significantly impact the performance of the
30 | Atari game-playing agent.
31 |
32 | Furthermore, our implementation does away with some minor hyperparameter
33 | choices. Specifically, we
34 |
35 | * keep the beta exponent fixed at beta=0.5, rather than increase it linearly;
36 | * remove the alpha parameter, which was set to alpha=0.5 throughout the paper.
37 |
38 | Details in "Rainbow: Combining Improvements in Deep Reinforcement Learning" by
39 | Hessel et al. (2018).
40 |
41 | ## Functions
42 |
43 | [`project_distribution(...)`](../../../../dopamine/jax/agents/rainbow/rainbow_agent/project_distribution.md):
44 | Projects a batch of (support, weights) onto target_support.
45 |
--------------------------------------------------------------------------------
/docs/api_docs/python/dopamine/jax/networks.md:
--------------------------------------------------------------------------------
1 | description: Various networks for Jax Dopamine agents.
2 |
3 |
4 |
5 |
6 |
7 |
8 | # Module: dopamine.jax.networks
9 |
10 |
11 |
12 |
20 |
21 | Various networks for Jax Dopamine agents.
22 |
--------------------------------------------------------------------------------
/docs/api_docs/python/dopamine/replay_memory.md:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 | # Module: dopamine.replay_memory
7 |
8 |
9 |
10 |
18 |
19 | ## Modules
20 |
21 | [`circular_replay_buffer`](../dopamine/replay_memory/circular_replay_buffer.md)
22 | module: The standard DQN replay memory.
23 |
24 | [`prioritized_replay_buffer`](../dopamine/replay_memory/prioritized_replay_buffer.md)
25 | module: An implementation of Prioritized Experience Replay (PER).
26 |
27 | [`sum_tree`](../dopamine/replay_memory/sum_tree.md) module: A sum tree data
28 | structure.
29 |
--------------------------------------------------------------------------------
/docs/api_docs/python/dopamine/replay_memory/circular_replay_buffer.md:
--------------------------------------------------------------------------------
1 | description: The standard DQN replay memory.
2 |
3 |
4 |
5 |
6 |
7 |
8 | # Module: dopamine.replay_memory.circular_replay_buffer
9 |
10 |
11 |
12 |
20 |
21 | The standard DQN replay memory.
22 |
23 | This implementation is an out-of-graph replay memory + in-graph wrapper. It
24 | supports vanilla n-step updates of the form typically found in the literature,
25 | i.e. where rewards are accumulated for n steps and the intermediate trajectory
26 | is not exposed to the agent. This does not allow, for example, performing
27 | off-policy corrections.
28 |
29 | ## Classes
30 |
31 | [`class OutOfGraphReplayBuffer`](../../dopamine/replay_memory/circular_replay_buffer/OutOfGraphReplayBuffer.md):
32 | A simple out-of-graph Replay Buffer.
33 |
34 | [`class WrappedReplayBuffer`](../../dopamine/replay_memory/circular_replay_buffer/WrappedReplayBuffer.md):
35 | Wrapper of OutOfGraphReplayBuffer with an in graph sampling mechanism.
36 |
--------------------------------------------------------------------------------
/docs/api_docs/python/dopamine/replay_memory/circular_replay_buffer/OutOfGraphReplayBuffer.md:
--------------------------------------------------------------------------------
1 | description: A simple out-of-graph Replay Buffer.
2 |
3 |
4 |
5 |
6 |
7 |
8 | # dopamine.replay_memory.circular_replay_buffer.OutOfGraphReplayBuffer
9 |
10 |
11 |
12 |
20 |
21 | A simple out-of-graph Replay Buffer.
22 |
23 |
24 |
25 | Stores transitions, state, action, reward, next_state, terminal (and any extra
26 | contents specified) in a circular buffer and provides a uniform transition
27 | sampling function.
28 |
29 | When the states consist of stacks of observations storing the states is
30 | inefficient. This class writes observations and constructs the stacked states at
31 | sample time.
32 |
33 |
34 |
35 |
36 |
37 | Attributes |
38 |
39 |
40 |
41 | `add_count`
42 | |
43 |
44 | int, counter of how many transitions have been added (including
45 | the blank ones at the beginning of an episode).
46 | |
47 |
48 |
49 | `invalid_range`
50 | |
51 |
52 | np.array, an array with the indices of cursor-related invalid
53 | transitions
54 | |
55 |
56 |
57 |
--------------------------------------------------------------------------------
/docs/api_docs/python/dopamine/replay_memory/circular_replay_buffer/WrappedReplayBuffer.md:
--------------------------------------------------------------------------------
1 | description: Wrapper of OutOfGraphReplayBuffer with an in graph sampling
2 | mechanism.
3 |
4 |
5 |
6 |
7 |
8 |
9 | # dopamine.replay_memory.circular_replay_buffer.WrappedReplayBuffer
10 |
11 |
12 |
13 |
21 |
22 | Wrapper of OutOfGraphReplayBuffer with an in graph sampling mechanism.
23 |
24 |
25 |
26 | #### Usage:
27 |
28 | To add a transition: call the add function.
29 |
30 | To sample a batch: Construct operations that depend on any of the tensors is the
31 | transition dictionary. Every sess.run that requires any of these tensors will
32 | sample a new transition.
33 |
--------------------------------------------------------------------------------
/docs/api_docs/python/dopamine/replay_memory/prioritized_replay_buffer.md:
--------------------------------------------------------------------------------
1 | description: An implementation of Prioritized Experience Replay (PER).
2 |
3 |
4 |
5 |
6 |
7 |
8 | # Module: dopamine.replay_memory.prioritized_replay_buffer
9 |
10 |
11 |
12 |
20 |
21 | An implementation of Prioritized Experience Replay (PER).
22 |
23 | This implementation is based on the paper "Prioritized Experience Replay" by Tom
24 | Schaul et al. (2015). Many thanks to Tom Schaul, John Quan, and Matteo Hessel
25 | for providing useful pointers on the algorithm and its implementation.
26 |
27 | ## Classes
28 |
29 | [`class OutOfGraphPrioritizedReplayBuffer`](../../dopamine/replay_memory/prioritized_replay_buffer/OutOfGraphPrioritizedReplayBuffer.md):
30 | An out-of-graph Replay Buffer for Prioritized Experience Replay.
31 |
32 | [`class WrappedPrioritizedReplayBuffer`](../../dopamine/replay_memory/prioritized_replay_buffer/WrappedPrioritizedReplayBuffer.md):
33 | Wrapper of OutOfGraphPrioritizedReplayBuffer with in-graph sampling.
34 |
--------------------------------------------------------------------------------
/docs/api_docs/python/dopamine/replay_memory/prioritized_replay_buffer/OutOfGraphPrioritizedReplayBuffer.md:
--------------------------------------------------------------------------------
1 | description: An out-of-graph Replay Buffer for Prioritized Experience Replay.
2 |
3 |
4 |
5 |
6 |
7 |
8 | # dopamine.replay_memory.prioritized_replay_buffer.OutOfGraphPrioritizedReplayBuffer
9 |
10 |
11 |
12 |
20 |
21 | An out-of-graph Replay Buffer for Prioritized Experience Replay.
22 |
23 | Inherits From:
24 | [`OutOfGraphReplayBuffer`](../../../dopamine/replay_memory/circular_replay_buffer/OutOfGraphReplayBuffer.md)
25 |
26 |
27 |
28 | See circular_replay_buffer.py for details.
29 |
--------------------------------------------------------------------------------
/docs/api_docs/python/dopamine/replay_memory/prioritized_replay_buffer/WrappedPrioritizedReplayBuffer.md:
--------------------------------------------------------------------------------
1 | description: Wrapper of OutOfGraphPrioritizedReplayBuffer with in-graph
2 | sampling.
3 |
4 |
5 |
6 |
7 |
8 |
9 | # dopamine.replay_memory.prioritized_replay_buffer.WrappedPrioritizedReplayBuffer
10 |
11 |
12 |
13 |
21 |
22 | Wrapper of OutOfGraphPrioritizedReplayBuffer with in-graph sampling.
23 |
24 | Inherits From:
25 | [`WrappedReplayBuffer`](../../../dopamine/replay_memory/circular_replay_buffer/WrappedReplayBuffer.md)
26 |
27 |
28 |
29 | #### Usage:
30 |
31 | * To add a transition: Call the add function.
32 |
33 | * To sample a batch: Query any of the tensors in the transition dictionary.
34 | Every sess.run that requires any of these tensors will sample a new
35 | transition.
36 |
--------------------------------------------------------------------------------
/docs/api_docs/python/dopamine/replay_memory/sum_tree.md:
--------------------------------------------------------------------------------
1 | description: A sum tree data structure.
2 |
3 |
4 |
5 |
6 |
7 |
8 | # Module: dopamine.replay_memory.sum_tree
9 |
10 |
11 |
12 |
20 |
21 | A sum tree data structure.
22 |
23 | Used for prioritized experience replay. See prioritized_replay_buffer.py and
24 | Schaul et al. (2015).
25 |
--------------------------------------------------------------------------------
/dopamine/__init__.py:
--------------------------------------------------------------------------------
1 | # coding=utf-8
2 | # Copyright 2018 The Dopamine Authors.
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | name = 'dopamine'
16 |
--------------------------------------------------------------------------------
/dopamine/agents/__init__.py:
--------------------------------------------------------------------------------
1 | # coding=utf-8
2 | # Copyright 2018 The Dopamine Authors.
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 |
16 |
--------------------------------------------------------------------------------
/dopamine/agents/dqn/__init__.py:
--------------------------------------------------------------------------------
1 | # coding=utf-8
2 | # Copyright 2018 The Dopamine Authors.
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 |
16 |
--------------------------------------------------------------------------------
/dopamine/agents/dqn/configs/dqn.gin:
--------------------------------------------------------------------------------
1 | # Hyperparameters follow the classic Nature DQN, but we modify as necessary to
2 | # match those used in Rainbow (Hessel et al., 2018), to ensure apples-to-apples
3 | # comparison.
4 | import dopamine.discrete_domains.atari_lib
5 | import dopamine.discrete_domains.run_experiment
6 | import dopamine.agents.dqn.dqn_agent
7 | import dopamine.replay_memory.circular_replay_buffer
8 | import gin.tf.external_configurables
9 |
10 | DQNAgent.gamma = 0.99
11 | DQNAgent.update_horizon = 1
12 | DQNAgent.min_replay_history = 20000 # agent steps
13 | DQNAgent.update_period = 4
14 | DQNAgent.target_update_period = 8000 # agent steps
15 | DQNAgent.epsilon_train = 0.01
16 | DQNAgent.epsilon_eval = 0.001
17 | DQNAgent.epsilon_decay_period = 250000 # agent steps
18 | DQNAgent.tf_device = '/gpu:0' # use '/cpu:*' for non-GPU version
19 | DQNAgent.optimizer = @tf.train.RMSPropOptimizer()
20 |
21 | tf.train.RMSPropOptimizer.learning_rate = 0.00025
22 | tf.train.RMSPropOptimizer.decay = 0.95
23 | tf.train.RMSPropOptimizer.momentum = 0.0
24 | tf.train.RMSPropOptimizer.epsilon = 0.00001
25 | tf.train.RMSPropOptimizer.centered = True
26 |
27 | atari_lib.create_atari_environment.game_name = 'Pong'
28 | # Sticky actions with probability 0.25, as suggested by (Machado et al., 2017).
29 | atari_lib.create_atari_environment.sticky_actions = True
30 | create_agent.agent_name = 'dqn'
31 | Runner.num_iterations = 200
32 | Runner.training_steps = 250000 # agent steps
33 | Runner.evaluation_steps = 125000 # agent steps
34 | Runner.max_steps_per_episode = 27000 # agent steps
35 |
36 | WrappedReplayBuffer.replay_capacity = 1000000
37 | WrappedReplayBuffer.batch_size = 32
38 |
--------------------------------------------------------------------------------
/dopamine/agents/dqn/configs/dqn_acrobot.gin:
--------------------------------------------------------------------------------
1 | # Hyperparameters for a simple DQN-style Acrobot agent. The hyperparameters
2 | # chosen achieve reasonable performance.
3 | import dopamine.discrete_domains.gym_lib
4 | import dopamine.discrete_domains.run_experiment
5 | import dopamine.agents.dqn.dqn_agent
6 | import dopamine.replay_memory.circular_replay_buffer
7 | import gin.tf.external_configurables
8 |
9 | DQNAgent.observation_shape = %gym_lib.ACROBOT_OBSERVATION_SHAPE
10 | DQNAgent.observation_dtype = %gym_lib.ACROBOT_OBSERVATION_DTYPE
11 | DQNAgent.stack_size = %gym_lib.ACROBOT_STACK_SIZE
12 | DQNAgent.network = @gym_lib.AcrobotDQNNetwork
13 | DQNAgent.gamma = 0.99
14 | DQNAgent.update_horizon = 1
15 | DQNAgent.min_replay_history = 500
16 | DQNAgent.update_period = 4
17 | DQNAgent.target_update_period = 100
18 | DQNAgent.epsilon_fn = @dqn_agent.identity_epsilon
19 | DQNAgent.tf_device = '/gpu:0' # use '/cpu:*' for non-GPU version
20 | DQNAgent.optimizer = @tf.train.AdamOptimizer()
21 |
22 | tf.train.AdamOptimizer.learning_rate = 0.001
23 | tf.train.AdamOptimizer.epsilon = 0.0003125
24 |
25 | create_gym_environment.environment_name = 'Acrobot'
26 | create_gym_environment.version = 'v1'
27 | create_agent.agent_name = 'dqn'
28 | Runner.create_environment_fn = @gym_lib.create_gym_environment
29 | Runner.num_iterations = 500
30 | Runner.training_steps = 1000
31 | Runner.evaluation_steps = 1000
32 | Runner.max_steps_per_episode = 500
33 |
34 | WrappedReplayBuffer.replay_capacity = 50000
35 | WrappedReplayBuffer.batch_size = 128
36 |
--------------------------------------------------------------------------------
/dopamine/agents/dqn/configs/dqn_cartpole.gin:
--------------------------------------------------------------------------------
1 | # Hyperparameters for a simple DQN-style Cartpole agent. The hyperparameters
2 | # chosen achieve reasonable performance.
3 | import dopamine.discrete_domains.gym_lib
4 | import dopamine.discrete_domains.run_experiment
5 | import dopamine.agents.dqn.dqn_agent
6 | import dopamine.replay_memory.circular_replay_buffer
7 | import gin.tf.external_configurables
8 |
9 | DQNAgent.observation_shape = %gym_lib.CARTPOLE_OBSERVATION_SHAPE
10 | DQNAgent.observation_dtype = %gym_lib.CARTPOLE_OBSERVATION_DTYPE
11 | DQNAgent.stack_size = %gym_lib.CARTPOLE_STACK_SIZE
12 | DQNAgent.network = @gym_lib.CartpoleDQNNetwork
13 | DQNAgent.gamma = 0.99
14 | DQNAgent.update_horizon = 1
15 | DQNAgent.min_replay_history = 500
16 | DQNAgent.update_period = 4
17 | DQNAgent.target_update_period = 100
18 | DQNAgent.epsilon_fn = @dqn_agent.identity_epsilon
19 | DQNAgent.tf_device = '/gpu:0' # use '/cpu:*' for non-GPU version
20 | DQNAgent.optimizer = @tf.train.AdamOptimizer()
21 |
22 | tf.train.AdamOptimizer.learning_rate = 0.001
23 | tf.train.AdamOptimizer.epsilon = 0.0003125
24 |
25 | create_gym_environment.environment_name = 'CartPole'
26 | create_gym_environment.version = 'v0'
27 | create_agent.agent_name = 'dqn'
28 | Runner.create_environment_fn = @gym_lib.create_gym_environment
29 | Runner.num_iterations = 500
30 | Runner.training_steps = 1000
31 | Runner.evaluation_steps = 1000
32 | Runner.max_steps_per_episode = 200 # Default max episode length.
33 |
34 | WrappedReplayBuffer.replay_capacity = 50000
35 | WrappedReplayBuffer.batch_size = 128
36 |
--------------------------------------------------------------------------------
/dopamine/agents/dqn/configs/dqn_icml.gin:
--------------------------------------------------------------------------------
1 | # Hyperparameters used for reporting DQN results in Bellemare et al. (2017).
2 | import dopamine.discrete_domains.atari_lib
3 | import dopamine.discrete_domains.run_experiment
4 | import dopamine.agents.dqn.dqn_agent
5 | import dopamine.replay_memory.circular_replay_buffer
6 | import gin.tf.external_configurables
7 |
8 | DQNAgent.gamma = 0.99
9 | DQNAgent.update_horizon = 1
10 | DQNAgent.min_replay_history = 50000 # agent steps
11 | DQNAgent.update_period = 4
12 | DQNAgent.target_update_period = 10000 # agent steps
13 | DQNAgent.epsilon_train = 0.01
14 | DQNAgent.epsilon_eval = 0.001
15 | DQNAgent.epsilon_decay_period = 1000000 # agent steps
16 | DQNAgent.tf_device = '/gpu:0' # use '/cpu:*' for non-GPU version
17 | DQNAgent.optimizer = @tf.train.RMSPropOptimizer()
18 |
19 | tf.train.RMSPropOptimizer.learning_rate = 0.00025
20 | tf.train.RMSPropOptimizer.decay = 0.95
21 | tf.train.RMSPropOptimizer.momentum = 0.0
22 | tf.train.RMSPropOptimizer.epsilon = 0.00001
23 | tf.train.RMSPropOptimizer.centered = True
24 |
25 | atari_lib.create_atari_environment.game_name = 'Pong'
26 | # Deterministic ALE version used in the DQN Nature paper (Mnih et al., 2015).
27 | atari_lib.create_atari_environment.sticky_actions = False
28 | create_agent.agent_name = 'dqn'
29 | Runner.num_iterations = 200
30 | Runner.training_steps = 250000 # agent steps
31 | Runner.evaluation_steps = 125000 # agent steps
32 | Runner.max_steps_per_episode = 27000 # agent steps
33 |
34 | AtariPreprocessing.terminal_on_life_loss = True
35 |
36 | WrappedReplayBuffer.replay_capacity = 1000000
37 | WrappedReplayBuffer.batch_size = 32
38 |
--------------------------------------------------------------------------------
/dopamine/agents/dqn/configs/dqn_lunarlander.gin:
--------------------------------------------------------------------------------
1 | # Hyperparameters for a simple DQN-style LunarLander agent. The hyperparameters
2 | # chosen achieve reasonable performance.
3 | import dopamine.discrete_domains.gym_lib
4 | import dopamine.discrete_domains.run_experiment
5 | import dopamine.agents.dqn.dqn_agent
6 | import dopamine.replay_memory.circular_replay_buffer
7 | import gin.tf.external_configurables
8 |
9 | DQNAgent.observation_shape = %gym_lib.LUNAR_OBSERVATION_SHAPE
10 | DQNAgent.observation_dtype = %gym_lib.LUNAR_OBSERVATION_DTYPE
11 | DQNAgent.stack_size = %gym_lib.LUNAR_STACK_SIZE
12 | DQNAgent.network = @gym_lib.LunarLanderDQNNetwork
13 | DQNAgent.gamma = 0.99
14 | DQNAgent.update_horizon = 1
15 | DQNAgent.min_replay_history = 500
16 | DQNAgent.update_period = 4
17 | DQNAgent.target_update_period = 100
18 | DQNAgent.epsilon_fn = @dqn_agent.identity_epsilon
19 | DQNAgent.tf_device = '/gpu:0' # use '/cpu:*' for non-GPU version
20 | DQNAgent.optimizer = @tf.train.AdamOptimizer()
21 |
22 | tf.train.AdamOptimizer.learning_rate = 0.001
23 | tf.train.AdamOptimizer.epsilon = 0.0003125
24 |
25 | create_gym_environment.environment_name = 'LunarLander'
26 | create_gym_environment.version = 'v2'
27 | create_agent.agent_name = 'dqn'
28 | Runner.create_environment_fn = @gym_lib.create_gym_environment
29 | Runner.num_iterations = 30
30 | Runner.training_steps = 4000
31 | Runner.evaluation_steps = 1000
32 | Runner.max_steps_per_episode = 1000
33 |
34 | WrappedReplayBuffer.replay_capacity = 50000
35 | WrappedReplayBuffer.batch_size = 128
36 |
--------------------------------------------------------------------------------
/dopamine/agents/dqn/configs/dqn_mountaincar.gin:
--------------------------------------------------------------------------------
1 | # Hyperparameters for a simple DQN-style MountainCar agent. The hyperparameters
2 | # chosen achieve reasonable performance.
3 | import dopamine.discrete_domains.gym_lib
4 | import dopamine.discrete_domains.run_experiment
5 | import dopamine.agents.dqn.dqn_agent
6 | import dopamine.replay_memory.circular_replay_buffer
7 | import gin.tf.external_configurables
8 |
9 | DQNAgent.observation_shape = %gym_lib.MOUNTAINCAR_OBSERVATION_SHAPE
10 | DQNAgent.observation_dtype = %gym_lib.MOUNTAINCAR_OBSERVATION_DTYPE
11 | DQNAgent.stack_size = %gym_lib.MOUNTAINCAR_STACK_SIZE
12 | DQNAgent.network = @gym_lib.MountainCarDQNNetwork
13 | DQNAgent.gamma = 0.99
14 | DQNAgent.update_horizon = 1
15 | DQNAgent.min_replay_history = 500
16 | DQNAgent.update_period = 4
17 | DQNAgent.target_update_period = 100
18 | DQNAgent.epsilon_fn = @dqn_agent.identity_epsilon
19 | DQNAgent.tf_device = '/gpu:0' # use '/cpu:*' for non-GPU version
20 | DQNAgent.optimizer = @tf.train.AdamOptimizer()
21 |
22 | tf.train.AdamOptimizer.learning_rate = 0.001
23 | tf.train.AdamOptimizer.epsilon = 0.0003125
24 |
25 | create_gym_environment.environment_name = 'MountainCar'
26 | create_gym_environment.version = 'v0'
27 | create_agent.agent_name = 'dqn'
28 | Runner.create_environment_fn = @gym_lib.create_gym_environment
29 | Runner.num_iterations = 30
30 | Runner.training_steps = 1000
31 | Runner.evaluation_steps = 1000
32 | Runner.max_steps_per_episode = 600 # Default max episode length.
33 |
34 | WrappedReplayBuffer.replay_capacity = 50000
35 | WrappedReplayBuffer.batch_size = 128
36 |
--------------------------------------------------------------------------------
/dopamine/agents/dqn/configs/dqn_nature.gin:
--------------------------------------------------------------------------------
1 | # Hyperparameters used in Mnih et al. (2015).
2 | import dopamine.discrete_domains.atari_lib
3 | import dopamine.discrete_domains.run_experiment
4 | import dopamine.agents.dqn.dqn_agent
5 | import dopamine.replay_memory.circular_replay_buffer
6 | import gin.tf.external_configurables
7 |
8 | DQNAgent.gamma = 0.99
9 | DQNAgent.update_horizon = 1
10 | DQNAgent.min_replay_history = 50000 # agent steps
11 | DQNAgent.update_period = 4
12 | DQNAgent.target_update_period = 10000 # agent steps
13 | DQNAgent.epsilon_train = 0.1
14 | DQNAgent.epsilon_eval = 0.05
15 | DQNAgent.epsilon_decay_period = 1000000 # agent steps
16 | DQNAgent.tf_device = '/gpu:0' # use '/cpu:*' for non-GPU version
17 | DQNAgent.optimizer = @tf.train.RMSPropOptimizer()
18 |
19 | tf.train.RMSPropOptimizer.learning_rate = 0.00025
20 | tf.train.RMSPropOptimizer.decay = 0.95
21 | tf.train.RMSPropOptimizer.momentum = 0.0
22 | tf.train.RMSPropOptimizer.epsilon = 0.00001
23 | tf.train.RMSPropOptimizer.centered = True
24 |
25 | atari_lib.create_atari_environment.game_name = 'Pong'
26 | # Deterministic ALE version used in the DQN Nature paper (Mnih et al., 2015).
27 | atari_lib.create_atari_environment.sticky_actions = False
28 | create_agent.agent_name = 'dqn'
29 | Runner.num_iterations = 200
30 | Runner.training_steps = 250000 # agent steps
31 | Runner.evaluation_steps = 125000 # agent steps
32 | Runner.max_steps_per_episode = 27000 # agent steps
33 |
34 | AtariPreprocessing.terminal_on_life_loss = True
35 |
36 | WrappedReplayBuffer.replay_capacity = 1000000
37 | WrappedReplayBuffer.batch_size = 32
38 |
--------------------------------------------------------------------------------
/dopamine/agents/dqn/configs/dqn_profiling.gin:
--------------------------------------------------------------------------------
1 | # Hyperparameters follow the classic Nature DQN, but we modify as necessary to
2 | # match those used in Rainbow (Hessel et al., 2018), to ensure apples-to-apples
3 | # comparison.
4 | import dopamine.discrete_domains.atari_lib
5 | import dopamine.discrete_domains.run_experiment
6 | import dopamine.agents.dqn.dqn_agent
7 | import dopamine.replay_memory.circular_replay_buffer
8 | import gin.tf.external_configurables
9 |
10 | DQNAgent.gamma = 0.99
11 | DQNAgent.update_horizon = 1
12 | DQNAgent.min_replay_history = 100 # agent steps
13 | DQNAgent.update_period = 4
14 | DQNAgent.target_update_period = 8000 # agent steps
15 | DQNAgent.epsilon_fn = @dqn_agent.identity_epsilon
16 | DQNAgent.epsilon_train = 0.0
17 | DQNAgent.epsilon_eval = 0.0
18 | DQNAgent.tf_device = '/gpu:0' # use '/cpu:*' for non-GPU version
19 | DQNAgent.optimizer = @tf.train.RMSPropOptimizer()
20 |
21 | tf.train.RMSPropOptimizer.learning_rate = 0.00025
22 | tf.train.RMSPropOptimizer.decay = 0.95
23 | tf.train.RMSPropOptimizer.momentum = 0.0
24 | tf.train.RMSPropOptimizer.epsilon = 0.00001
25 | tf.train.RMSPropOptimizer.centered = True
26 |
27 | atari_lib.create_atari_environment.game_name = 'Pong'
28 | # Sticky actions with probability 0.25, as suggested by (Machado et al., 2017).
29 | atari_lib.create_atari_environment.sticky_actions = True
30 | create_runner.schedule = 'continuous_train'
31 | create_agent.agent_name = 'dqn'
32 | Runner.num_iterations = 200
33 | Runner.training_steps = 250000 # agent steps
34 | Runner.evaluation_steps = 125000 # agent steps
35 | Runner.max_steps_per_episode = 27000 # agent steps
36 |
37 | WrappedReplayBuffer.replay_capacity = 1000000
38 | WrappedReplayBuffer.batch_size = 32
39 |
--------------------------------------------------------------------------------
/dopamine/agents/implicit_quantile/__init__.py:
--------------------------------------------------------------------------------
1 | # coding=utf-8
2 | # Copyright 2018 The Dopamine Authors.
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 |
16 |
--------------------------------------------------------------------------------
/dopamine/agents/implicit_quantile/configs/implicit_quantile.gin:
--------------------------------------------------------------------------------
1 | # Hyperparameters follow Dabney et al. (2018), but we modify as necessary to
2 | # match those used in Rainbow (Hessel et al., 2018), to ensure apples-to-apples
3 | # comparison.
4 | import dopamine.agents.implicit_quantile.implicit_quantile_agent
5 | import dopamine.agents.rainbow.rainbow_agent
6 | import dopamine.discrete_domains.atari_lib
7 | import dopamine.discrete_domains.run_experiment
8 | import dopamine.replay_memory.prioritized_replay_buffer
9 | import gin.tf.external_configurables
10 |
11 | ImplicitQuantileAgent.kappa = 1.0
12 | ImplicitQuantileAgent.num_tau_samples = 64
13 | ImplicitQuantileAgent.num_tau_prime_samples = 64
14 | ImplicitQuantileAgent.num_quantile_samples = 32
15 | RainbowAgent.gamma = 0.99
16 | RainbowAgent.update_horizon = 3
17 | RainbowAgent.min_replay_history = 20000 # agent steps
18 | RainbowAgent.update_period = 4
19 | RainbowAgent.target_update_period = 8000 # agent steps
20 | RainbowAgent.epsilon_train = 0.01
21 | RainbowAgent.epsilon_eval = 0.001
22 | RainbowAgent.epsilon_decay_period = 250000 # agent steps
23 | # IQN currently does not support prioritized replay.
24 | RainbowAgent.replay_scheme = 'uniform'
25 | RainbowAgent.tf_device = '/gpu:0' # '/cpu:*' use for non-GPU version
26 | RainbowAgent.optimizer = @tf.train.AdamOptimizer()
27 |
28 | tf.train.AdamOptimizer.learning_rate = 0.00005
29 | tf.train.AdamOptimizer.epsilon = 0.0003125
30 |
31 | atari_lib.create_atari_environment.game_name = 'Pong'
32 | # Sticky actions with probability 0.25, as suggested by (Machado et al., 2017).
33 | atari_lib.create_atari_environment.sticky_actions = True
34 | create_agent.agent_name = 'implicit_quantile'
35 | Runner.num_iterations = 200
36 | Runner.training_steps = 250000
37 | Runner.evaluation_steps = 125000
38 | Runner.max_steps_per_episode = 27000
39 |
40 | WrappedPrioritizedReplayBuffer.replay_capacity = 1000000
41 | WrappedPrioritizedReplayBuffer.batch_size = 32
42 |
--------------------------------------------------------------------------------
/dopamine/agents/implicit_quantile/configs/implicit_quantile_icml.gin:
--------------------------------------------------------------------------------
1 | # Hyperparameters follow Dabney et al. (2018).
2 | import dopamine.agents.implicit_quantile.implicit_quantile_agent
3 | import dopamine.agents.rainbow.rainbow_agent
4 | import dopamine.discrete_domains.atari_lib
5 | import dopamine.discrete_domains.run_experiment
6 | import dopamine.replay_memory.prioritized_replay_buffer
7 | import gin.tf.external_configurables
8 |
9 | ImplicitQuantileAgent.kappa = 1.0
10 | ImplicitQuantileAgent.num_tau_samples = 64
11 | ImplicitQuantileAgent.num_tau_prime_samples = 64
12 | ImplicitQuantileAgent.num_quantile_samples = 32
13 | RainbowAgent.gamma = 0.99
14 | RainbowAgent.update_horizon = 1
15 | RainbowAgent.min_replay_history = 50000 # agent steps
16 | RainbowAgent.update_period = 4
17 | RainbowAgent.target_update_period = 10000 # agent steps
18 | RainbowAgent.epsilon_train = 0.01
19 | RainbowAgent.epsilon_eval = 0.001
20 | RainbowAgent.epsilon_decay_period = 1000000 # agent steps
21 | RainbowAgent.replay_scheme = 'uniform'
22 | RainbowAgent.tf_device = '/gpu:0' # '/cpu:*' use for non-GPU version
23 | RainbowAgent.optimizer = @tf.train.AdamOptimizer()
24 |
25 | tf.train.AdamOptimizer.learning_rate = 0.00005
26 | tf.train.AdamOptimizer.epsilon = 0.0003125
27 |
28 | atari_lib.create_atari_environment.game_name = 'Pong'
29 | atari_lib.create_atari_environment.sticky_actions = False
30 | create_agent.agent_name = 'implicit_quantile'
31 | Runner.num_iterations = 200
32 | Runner.training_steps = 250000
33 | Runner.evaluation_steps = 125000
34 | Runner.max_steps_per_episode = 27000
35 |
36 | AtariPreprocessing.terminal_on_life_loss = True
37 |
38 | WrappedPrioritizedReplayBuffer.replay_capacity = 1000000
39 | WrappedPrioritizedReplayBuffer.batch_size = 32
40 |
--------------------------------------------------------------------------------
/dopamine/agents/implicit_quantile/configs/implicit_quantile_profiling.gin:
--------------------------------------------------------------------------------
1 | # Hyperparameters follow Dabney et al. (2018), but we modify as necessary to
2 | # match those used in Rainbow (Hessel et al., 2018), to ensure apples-to-apples
3 | # comparison.
4 | import dopamine.agents.implicit_quantile.implicit_quantile_agent
5 | import dopamine.agents.rainbow.rainbow_agent
6 | import dopamine.discrete_domains.atari_lib
7 | import dopamine.discrete_domains.run_experiment
8 | import dopamine.replay_memory.prioritized_replay_buffer
9 | import gin.tf.external_configurables
10 |
11 | ImplicitQuantileAgent.kappa = 1.0
12 | ImplicitQuantileAgent.num_tau_samples = 64
13 | ImplicitQuantileAgent.num_tau_prime_samples = 64
14 | ImplicitQuantileAgent.num_quantile_samples = 32
15 | RainbowAgent.gamma = 0.99
16 | RainbowAgent.update_horizon = 3
17 | RainbowAgent.min_replay_history = 100 # agent steps
18 | RainbowAgent.update_period = 4
19 | RainbowAgent.target_update_period = 8000 # agent steps
20 | RainbowAgent.epsilon_fn = @dqn_agent.identity_epsilon
21 | RainbowAgent.epsilon_train = 0.0
22 | RainbowAgent.epsilon_eval = 0.0
23 | # IQN currently does not support prioritized replay.
24 | RainbowAgent.replay_scheme = 'uniform'
25 | RainbowAgent.tf_device = '/gpu:0' # '/cpu:*' use for non-GPU version
26 | RainbowAgent.optimizer = @tf.train.AdamOptimizer()
27 |
28 | tf.train.AdamOptimizer.learning_rate = 0.00005
29 | tf.train.AdamOptimizer.epsilon = 0.0003125
30 |
31 | atari_lib.create_atari_environment.game_name = 'Pong'
32 | # Sticky actions with probability 0.25, as suggested by (Machado et al., 2017).
33 | atari_lib.create_atari_environment.sticky_actions = True
34 | create_runner.schedule = 'continuous_train'
35 | create_agent.agent_name = 'implicit_quantile'
36 | Runner.num_iterations = 200
37 | Runner.training_steps = 250000
38 | Runner.evaluation_steps = 125000
39 | Runner.max_steps_per_episode = 27000
40 |
41 | WrappedPrioritizedReplayBuffer.replay_capacity = 1000000
42 | WrappedPrioritizedReplayBuffer.batch_size = 32
43 |
--------------------------------------------------------------------------------
/dopamine/agents/rainbow/__init__.py:
--------------------------------------------------------------------------------
1 | # coding=utf-8
2 | # Copyright 2018 The Dopamine Authors.
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 |
16 |
--------------------------------------------------------------------------------
/dopamine/agents/rainbow/configs/c51.gin:
--------------------------------------------------------------------------------
1 | # Hyperparameters follow the settings from Bellemare et al. (2017), but we
2 | # modify as necessary to match those used in Rainbow (Hessel et al., 2018), to
3 | # ensure apples-to-apples comparison.
4 | import dopamine.agents.rainbow.rainbow_agent
5 | import dopamine.discrete_domains.atari_lib
6 | import dopamine.discrete_domains.run_experiment
7 | import dopamine.replay_memory.prioritized_replay_buffer
8 | import gin.tf.external_configurables
9 |
10 | RainbowAgent.num_atoms = 51
11 | RainbowAgent.vmax = 10.
12 | RainbowAgent.gamma = 0.99
13 | RainbowAgent.update_horizon = 1
14 | RainbowAgent.min_replay_history = 20000 # agent steps
15 | RainbowAgent.update_period = 4
16 | RainbowAgent.target_update_period = 8000 # agent steps
17 | RainbowAgent.epsilon_train = 0.01
18 | RainbowAgent.epsilon_eval = 0.001
19 | RainbowAgent.epsilon_decay_period = 250000 # agent steps
20 | RainbowAgent.replay_scheme = 'uniform'
21 | RainbowAgent.tf_device = '/gpu:0' # use '/cpu:*' for non-GPU version
22 | RainbowAgent.optimizer = @tf.train.AdamOptimizer()
23 |
24 | tf.train.AdamOptimizer.learning_rate = 0.00025
25 | tf.train.AdamOptimizer.epsilon = 0.0003125
26 |
27 | atari_lib.create_atari_environment.game_name = 'Pong'
28 | # Sticky actions with probability 0.25, as suggested by (Machado et al., 2017).
29 | atari_lib.create_atari_environment.sticky_actions = True
30 | create_agent.agent_name = 'rainbow'
31 | Runner.num_iterations = 200
32 | Runner.training_steps = 250000 # agent steps
33 | Runner.evaluation_steps = 125000 # agent steps
34 | Runner.max_steps_per_episode = 27000 # agent steps
35 |
36 | WrappedPrioritizedReplayBuffer.replay_capacity = 1000000
37 | WrappedPrioritizedReplayBuffer.batch_size = 32
38 |
--------------------------------------------------------------------------------
/dopamine/agents/rainbow/configs/c51_acrobot.gin:
--------------------------------------------------------------------------------
1 | # Hyperparameters for a simple C51-style Acrobot agent. The hyperparameters
2 | # chosen achieve reasonable performance.
3 | import dopamine.agents.dqn.dqn_agent
4 | import dopamine.agents.rainbow.rainbow_agent
5 | import dopamine.discrete_domains.gym_lib
6 | import dopamine.discrete_domains.run_experiment
7 | import dopamine.replay_memory.prioritized_replay_buffer
8 | import gin.tf.external_configurables
9 |
10 | RainbowAgent.observation_shape = %gym_lib.ACROBOT_OBSERVATION_SHAPE
11 | RainbowAgent.observation_dtype = %gym_lib.ACROBOT_OBSERVATION_DTYPE
12 | RainbowAgent.stack_size = %gym_lib.ACROBOT_STACK_SIZE
13 | RainbowAgent.network = @gym_lib.AcrobotRainbowNetwork
14 | RainbowAgent.num_atoms = 51
15 | RainbowAgent.vmax = 10.
16 | RainbowAgent.gamma = 0.99
17 | RainbowAgent.update_horizon = 1
18 | RainbowAgent.min_replay_history = 500
19 | RainbowAgent.update_period = 4
20 | RainbowAgent.target_update_period = 100
21 | RainbowAgent.epsilon_fn = @dqn_agent.identity_epsilon
22 | RainbowAgent.replay_scheme = 'uniform'
23 | RainbowAgent.tf_device = '/gpu:0' # use '/cpu:*' for non-GPU version
24 | RainbowAgent.optimizer = @tf.train.AdamOptimizer()
25 |
26 | tf.train.AdamOptimizer.learning_rate = 0.1
27 | tf.train.AdamOptimizer.epsilon = 0.0003125
28 |
29 | create_gym_environment.environment_name = 'Acrobot'
30 | create_gym_environment.version = 'v1'
31 | create_agent.agent_name = 'rainbow'
32 | Runner.create_environment_fn = @gym_lib.create_gym_environment
33 | Runner.num_iterations = 500
34 | Runner.training_steps = 1000
35 | Runner.evaluation_steps = 1000
36 | Runner.max_steps_per_episode = 500
37 |
38 | WrappedPrioritizedReplayBuffer.replay_capacity = 50000
39 | WrappedPrioritizedReplayBuffer.batch_size = 128
40 |
--------------------------------------------------------------------------------
/dopamine/agents/rainbow/configs/c51_cartpole.gin:
--------------------------------------------------------------------------------
1 | # Hyperparameters for a simple C51-style Cartpole agent. The hyperparameters
2 | # chosen achieve reasonable performance.
3 | import dopamine.agents.dqn.dqn_agent
4 | import dopamine.agents.rainbow.rainbow_agent
5 | import dopamine.discrete_domains.gym_lib
6 | import dopamine.discrete_domains.run_experiment
7 | import dopamine.replay_memory.prioritized_replay_buffer
8 | import gin.tf.external_configurables
9 |
10 | RainbowAgent.observation_shape = %gym_lib.CARTPOLE_OBSERVATION_SHAPE
11 | RainbowAgent.observation_dtype = %gym_lib.CARTPOLE_OBSERVATION_DTYPE
12 | RainbowAgent.stack_size = %gym_lib.CARTPOLE_STACK_SIZE
13 | RainbowAgent.network = @gym_lib.CartpoleRainbowNetwork
14 | RainbowAgent.num_atoms = 201
15 | RainbowAgent.vmax = 100.
16 | RainbowAgent.gamma = 0.99
17 | RainbowAgent.epsilon_eval = 0.
18 | RainbowAgent.epsilon_train = 0.01
19 | RainbowAgent.update_horizon = 1
20 | RainbowAgent.min_replay_history = 500
21 | RainbowAgent.update_period = 1
22 | RainbowAgent.target_update_period = 1
23 | RainbowAgent.epsilon_fn = @dqn_agent.identity_epsilon
24 | RainbowAgent.replay_scheme = 'uniform'
25 | RainbowAgent.tf_device = '/gpu:0' # use '/cpu:*' for non-GPU version
26 | RainbowAgent.optimizer = @tf.train.AdamOptimizer()
27 |
28 | tf.train.AdamOptimizer.learning_rate = 0.00001
29 | tf.train.AdamOptimizer.epsilon = 0.00000390625
30 |
31 | create_gym_environment.environment_name = 'CartPole'
32 | create_gym_environment.version = 'v0'
33 | create_agent.agent_name = 'rainbow'
34 | Runner.create_environment_fn = @gym_lib.create_gym_environment
35 | Runner.num_iterations = 400
36 | Runner.training_steps = 1000
37 | Runner.evaluation_steps = 1000
38 | Runner.max_steps_per_episode = 200 # Default max episode length.
39 |
40 | WrappedPrioritizedReplayBuffer.replay_capacity = 50000
41 | WrappedPrioritizedReplayBuffer.batch_size = 128
42 |
--------------------------------------------------------------------------------
/dopamine/agents/rainbow/configs/c51_icml.gin:
--------------------------------------------------------------------------------
1 | # Hyperparameters used in Bellemare et al. (2017).
2 | import dopamine.agents.rainbow.rainbow_agent
3 | import dopamine.discrete_domains.atari_lib
4 | import dopamine.discrete_domains.run_experiment
5 | import dopamine.replay_memory.prioritized_replay_buffer
6 | import gin.tf.external_configurables
7 |
8 | RainbowAgent.num_atoms = 51
9 | RainbowAgent.vmax = 10.
10 | RainbowAgent.gamma = 0.99
11 | RainbowAgent.update_horizon = 1
12 | RainbowAgent.min_replay_history = 50000 # agent steps
13 | RainbowAgent.update_period = 4
14 | RainbowAgent.target_update_period = 10000 # agent steps
15 | RainbowAgent.epsilon_train = 0.01
16 | RainbowAgent.epsilon_eval = 0.001
17 | RainbowAgent.epsilon_decay_period = 1000000 # agent steps
18 | RainbowAgent.replay_scheme = 'uniform'
19 | RainbowAgent.tf_device = '/gpu:0' # use '/cpu:*' for non-GPU version
20 | RainbowAgent.optimizer = @tf.train.AdamOptimizer()
21 |
22 | tf.train.AdamOptimizer.learning_rate = 0.00025
23 | tf.train.AdamOptimizer.epsilon = 0.0003125
24 |
25 | atari_lib.create_atari_environment.game_name = 'Pong'
26 | # Deterministic ALE version used in the DQN Nature paper (Mnih et al., 2015).
27 | atari_lib.create_atari_environment.sticky_actions = False
28 | create_agent.agent_name = 'rainbow'
29 | Runner.num_iterations = 200
30 | Runner.training_steps = 250000 # agent steps
31 | Runner.evaluation_steps = 125000 # agent steps
32 | Runner.max_steps_per_episode = 27000 # agent steps
33 |
34 | AtariPreprocessing.terminal_on_life_loss = True
35 |
36 | WrappedPrioritizedReplayBuffer.replay_capacity = 1000000
37 | WrappedPrioritizedReplayBuffer.batch_size = 32
38 |
--------------------------------------------------------------------------------
/dopamine/agents/rainbow/configs/c51_profiling.gin:
--------------------------------------------------------------------------------
1 | # Hyperparameters follow the settings from Bellemare et al. (2017), but we
2 | # modify as necessary to match those used in Rainbow (Hessel et al., 2018), to
3 | # ensure apples-to-apples comparison.
4 | import dopamine.agents.rainbow.rainbow_agent
5 | import dopamine.discrete_domains.atari_lib
6 | import dopamine.discrete_domains.run_experiment
7 | import dopamine.replay_memory.prioritized_replay_buffer
8 | import gin.tf.external_configurables
9 |
10 | RainbowAgent.num_atoms = 51
11 | RainbowAgent.vmax = 10.
12 | RainbowAgent.gamma = 0.99
13 | RainbowAgent.update_horizon = 1
14 | RainbowAgent.min_replay_history = 100 # agent steps
15 | RainbowAgent.update_period = 4
16 | RainbowAgent.target_update_period = 8000 # agent steps
17 | RainbowAgent.epsilon_fn = @dqn_agent.identity_epsilon
18 | RainbowAgent.epsilon_train = 0.0
19 | RainbowAgent.epsilon_eval = 0.0
20 | RainbowAgent.replay_scheme = 'uniform'
21 | RainbowAgent.tf_device = '/gpu:0' # use '/cpu:*' for non-GPU version
22 | RainbowAgent.optimizer = @tf.train.AdamOptimizer()
23 |
24 | tf.train.AdamOptimizer.learning_rate = 0.00025
25 | tf.train.AdamOptimizer.epsilon = 0.0003125
26 |
27 | atari_lib.create_atari_environment.game_name = 'Pong'
28 | # Sticky actions with probability 0.25, as suggested by (Machado et al., 2017).
29 | atari_lib.create_atari_environment.sticky_actions = True
30 | create_runner.schedule = 'continuous_train'
31 | create_agent.agent_name = 'rainbow'
32 | Runner.num_iterations = 200
33 | Runner.training_steps = 250000 # agent steps
34 | Runner.evaluation_steps = 125000 # agent steps
35 | Runner.max_steps_per_episode = 27000 # agent steps
36 |
37 | WrappedPrioritizedReplayBuffer.replay_capacity = 1000000
38 | WrappedPrioritizedReplayBuffer.batch_size = 32
39 |
--------------------------------------------------------------------------------
/dopamine/agents/rainbow/configs/rainbow_aaai.gin:
--------------------------------------------------------------------------------
1 | # Hyperparameters follow Hessel et al. (2018).
2 | import dopamine.agents.rainbow.rainbow_agent
3 | import dopamine.discrete_domains.atari_lib
4 | import dopamine.discrete_domains.run_experiment
5 | import dopamine.replay_memory.prioritized_replay_buffer
6 | import gin.tf.external_configurables
7 |
8 | RainbowAgent.num_atoms = 51
9 | RainbowAgent.vmax = 10.
10 | RainbowAgent.gamma = 0.99
11 | RainbowAgent.update_horizon = 3
12 | RainbowAgent.min_replay_history = 20000 # agent steps
13 | RainbowAgent.update_period = 4
14 | RainbowAgent.target_update_period = 8000 # agent steps
15 | RainbowAgent.epsilon_train = 0.01
16 | RainbowAgent.epsilon_eval = 0.001
17 | RainbowAgent.epsilon_decay_period = 250000 # agent steps
18 | RainbowAgent.replay_scheme = 'prioritized'
19 | RainbowAgent.tf_device = '/gpu:0' # use '/cpu:*' for non-GPU version
20 | RainbowAgent.optimizer = @tf.train.AdamOptimizer()
21 |
22 | # Note these parameters are different from C51's.
23 | tf.train.AdamOptimizer.learning_rate = 0.0000625
24 | tf.train.AdamOptimizer.epsilon = 0.00015
25 |
26 | atari_lib.create_atari_environment.game_name = 'Pong'
27 | # Deterministic ALE version used in the AAAI paper.
28 | atari_lib.create_atari_environment.sticky_actions = False
29 | create_agent.agent_name = 'rainbow'
30 | Runner.num_iterations = 200
31 | Runner.training_steps = 250000 # agent steps
32 | Runner.evaluation_steps = 125000 # agent steps
33 | Runner.max_steps_per_episode = 27000 # agent steps
34 |
35 | AtariPreprocessing.terminal_on_life_loss = True
36 |
37 | WrappedPrioritizedReplayBuffer.replay_capacity = 1000000
38 | WrappedPrioritizedReplayBuffer.batch_size = 32
39 |
--------------------------------------------------------------------------------
/dopamine/agents/rainbow/configs/rainbow_acrobot.gin:
--------------------------------------------------------------------------------
1 | # Hyperparameters for a simple Rainbow-style Acrobot agent. The hyperparameters
2 | # chosen achieve reasonable performance.
3 | import dopamine.agents.rainbow.rainbow_agent
4 | import dopamine.discrete_domains.gym_lib
5 | import dopamine.discrete_domains.run_experiment
6 | import dopamine.replay_memory.prioritized_replay_buffer
7 | import gin.tf.external_configurables
8 |
9 | RainbowAgent.observation_shape = %gym_lib.ACROBOT_OBSERVATION_SHAPE
10 | RainbowAgent.observation_dtype = %gym_lib.ACROBOT_OBSERVATION_DTYPE
11 | RainbowAgent.stack_size = %gym_lib.ACROBOT_STACK_SIZE
12 | RainbowAgent.network = @gym_lib.AcrobotRainbowNetwork
13 | RainbowAgent.num_atoms = 51
14 | RainbowAgent.vmax = 10.
15 | RainbowAgent.gamma = 0.99
16 | RainbowAgent.update_horizon = 3
17 | RainbowAgent.min_replay_history = 500
18 | RainbowAgent.update_period = 4
19 | RainbowAgent.target_update_period = 100
20 | RainbowAgent.epsilon_fn = @dqn_agent.identity_epsilon
21 | RainbowAgent.replay_scheme = 'prioritized'
22 | RainbowAgent.tf_device = '/gpu:0' # use '/cpu:*' for non-GPU version
23 | RainbowAgent.optimizer = @tf.train.AdamOptimizer()
24 |
25 | tf.train.AdamOptimizer.learning_rate = 0.09
26 | tf.train.AdamOptimizer.epsilon = 0.0003125
27 |
28 | create_gym_environment.environment_name = 'Acrobot'
29 | create_gym_environment.version = 'v1'
30 | create_agent.agent_name = 'rainbow'
31 | Runner.create_environment_fn = @gym_lib.create_gym_environment
32 | Runner.num_iterations = 500
33 | Runner.training_steps = 1000
34 | Runner.evaluation_steps = 1000
35 | Runner.max_steps_per_episode = 500
36 |
37 | WrappedPrioritizedReplayBuffer.replay_capacity = 50000
38 | WrappedPrioritizedReplayBuffer.batch_size = 128
39 |
--------------------------------------------------------------------------------
/dopamine/agents/rainbow/configs/rainbow_cartpole.gin:
--------------------------------------------------------------------------------
1 | # Hyperparameters for a simple Rainbow-style Cartpole agent. The
2 | # hyperparameters chosen achieve reasonable performance.
3 | import dopamine.agents.dqn.dqn_agent
4 | import dopamine.agents.rainbow.rainbow_agent
5 | import dopamine.discrete_domains.gym_lib
6 | import dopamine.discrete_domains.run_experiment
7 | import dopamine.replay_memory.prioritized_replay_buffer
8 | import gin.tf.external_configurables
9 |
10 | RainbowAgent.observation_shape = %gym_lib.CARTPOLE_OBSERVATION_SHAPE
11 | RainbowAgent.observation_dtype = %gym_lib.CARTPOLE_OBSERVATION_DTYPE
12 | RainbowAgent.stack_size = %gym_lib.CARTPOLE_STACK_SIZE
13 | RainbowAgent.network = @gym_lib.CartpoleRainbowNetwork
14 | RainbowAgent.num_atoms = 51
15 | RainbowAgent.vmax = 10.
16 | RainbowAgent.gamma = 0.99
17 | RainbowAgent.update_horizon = 3
18 | RainbowAgent.min_replay_history = 500
19 | RainbowAgent.update_period = 4
20 | RainbowAgent.target_update_period = 100
21 | RainbowAgent.epsilon_fn = @dqn_agent.identity_epsilon
22 | RainbowAgent.replay_scheme = 'prioritized'
23 | RainbowAgent.tf_device = '/gpu:0' # use '/cpu:*' for non-GPU version
24 | RainbowAgent.optimizer = @tf.train.AdamOptimizer()
25 |
26 | tf.train.AdamOptimizer.learning_rate = 0.09
27 | tf.train.AdamOptimizer.epsilon = 0.0003125
28 |
29 | create_gym_environment.environment_name = 'CartPole'
30 | create_gym_environment.version = 'v0'
31 | create_agent.agent_name = 'rainbow'
32 | Runner.create_environment_fn = @gym_lib.create_gym_environment
33 | Runner.num_iterations = 500
34 | Runner.training_steps = 1000
35 | Runner.evaluation_steps = 1000
36 | Runner.max_steps_per_episode = 200 # Default max episode length.
37 |
38 | WrappedPrioritizedReplayBuffer.replay_capacity = 50000
39 | WrappedPrioritizedReplayBuffer.batch_size = 128
40 |
--------------------------------------------------------------------------------
/dopamine/agents/rainbow/configs/rainbow_dqnpro.gin:
--------------------------------------------------------------------------------
1 | # Hyperparameters follow Hessel et al. (2018), except for sticky_actions,
2 | # which was False (not using sticky actions) in the original paper.
3 | import dopamine.agents.rainbow.rainbow_agent
4 | import dopamine.discrete_domains.atari_lib
5 | import dopamine.discrete_domains.run_experiment
6 | import dopamine.replay_memory.prioritized_replay_buffer
7 | import gin.tf.external_configurables
8 |
9 | RainbowAgent.num_atoms = 51
10 | RainbowAgent.vmax = 10.
11 | RainbowAgent.gamma = 0.99
12 | RainbowAgent.mu = 0.0
13 | RainbowAgent.nu = 0.0
14 | RainbowAgent.update_horizon = 3
15 | RainbowAgent.min_replay_history = 20000 # agent steps
16 | RainbowAgent.update_period = 4
17 | RainbowAgent.target_update_period = 8000 # agent steps
18 | RainbowAgent.epsilon_train = 0.01
19 | RainbowAgent.epsilon_eval = 0.001
20 | RainbowAgent.epsilon_decay_period = 250000 # agent steps
21 | RainbowAgent.replay_scheme = 'prioritized'
22 | RainbowAgent.tf_device = '/gpu:0' # use '/cpu:*' for non-GPU version
23 | RainbowAgent.optimizer = @tf.train.AdamOptimizer()
24 |
25 | # Note these parameters are different from C51's.
26 | tf.train.AdamOptimizer.learning_rate = 0.0000625
27 | tf.train.AdamOptimizer.epsilon = 0.00015
28 |
29 | atari_lib.create_atari_environment.game_name = 'Pong'
30 | # Deterministic ALE version used in the AAAI paper.
31 | atari_lib.create_atari_environment.sticky_actions = False
32 | create_agent.agent_name = 'rainbow'
33 | Runner.num_iterations = 200
34 | Runner.training_steps = 250000 # agent steps
35 | Runner.evaluation_steps = 125000 # agent steps
36 | Runner.max_steps_per_episode = 27000 # agent steps
37 |
38 | AtariPreprocessing.terminal_on_life_loss = True
39 |
40 | WrappedPrioritizedReplayBuffer.replay_capacity = 1000000
41 | WrappedPrioritizedReplayBuffer.batch_size = 32
--------------------------------------------------------------------------------
/dopamine/agents/rainbow/configs/rainbow_original.gin:
--------------------------------------------------------------------------------
1 | # Hyperparameters follow Hessel et al. (2018), except for sticky_actions,
2 | # which was False (not using sticky actions) in the original paper.
3 | import dopamine.agents.rainbow.rainbow_agent
4 | import dopamine.discrete_domains.atari_lib
5 | import dopamine.discrete_domains.run_experiment
6 | import dopamine.replay_memory.prioritized_replay_buffer
7 | import gin.tf.external_configurables
8 |
9 | RainbowAgent.num_atoms = 51
10 | RainbowAgent.vmax = 10.
11 | RainbowAgent.gamma = 0.99
12 | RainbowAgent.update_horizon = 3
13 | RainbowAgent.min_replay_history = 20000 # agent steps
14 | RainbowAgent.update_period = 4
15 | RainbowAgent.target_update_period = 8000 # agent steps
16 | RainbowAgent.epsilon_train = 0.01
17 | RainbowAgent.epsilon_eval = 0.001
18 | RainbowAgent.epsilon_decay_period = 250000 # agent steps
19 | RainbowAgent.replay_scheme = 'prioritized'
20 | RainbowAgent.tf_device = '/gpu:0' # use '/cpu:*' for non-GPU version
21 | RainbowAgent.optimizer = @tf.train.AdamOptimizer()
22 |
23 | # Note these parameters are different from C51's.
24 | tf.train.AdamOptimizer.learning_rate = 0.0000625
25 | tf.train.AdamOptimizer.epsilon = 0.00015
26 |
27 | atari_lib.create_atari_environment.game_name = 'Pong'
28 | # Sticky actions with probability 0.25, as suggested by (Machado et al., 2017).
29 | atari_lib.create_atari_environment.sticky_actions = True
30 | create_agent.agent_name = 'rainbow'
31 | Runner.num_iterations = 200
32 | Runner.training_steps = 250000 # agent steps
33 | Runner.evaluation_steps = 125000 # agent steps
34 | Runner.max_steps_per_episode = 27000 # agent steps
35 |
36 | WrappedPrioritizedReplayBuffer.replay_capacity = 1000000
37 | WrappedPrioritizedReplayBuffer.batch_size = 32
38 |
--------------------------------------------------------------------------------
/dopamine/agents/rainbow/configs/rainbow_our_first_paper.gin:
--------------------------------------------------------------------------------
1 | # Hyperparameters follow Hessel et al. (2018).
2 | import dopamine.agents.rainbow.rainbow_agent
3 | import dopamine.discrete_domains.atari_lib
4 | import dopamine.discrete_domains.run_experiment
5 | import dopamine.replay_memory.prioritized_replay_buffer
6 | import gin.tf.external_configurables
7 |
8 | RainbowAgent.num_atoms = 51
9 | RainbowAgent.vmax = 10.
10 | RainbowAgent.gamma = 0.99
11 | RainbowAgent.mu = 0.0
12 | RainbowAgent.update_horizon = 3
13 | RainbowAgent.min_replay_history = 20000 # agent steps
14 | RainbowAgent.update_period = 4
15 | RainbowAgent.target_update_period = 8000 # agent steps
16 | RainbowAgent.epsilon_train = 0.01
17 | RainbowAgent.epsilon_eval = 0.001
18 | RainbowAgent.epsilon_decay_period = 250000 # agent steps
19 | RainbowAgent.replay_scheme = 'prioritized'
20 | RainbowAgent.tf_device = '/gpu:0' # use '/cpu:*' for non-GPU version
21 | RainbowAgent.optimizer = @tf.train.AdamOptimizer()
22 |
23 | # Note these parameters are different from C51's.
24 | tf.train.AdamOptimizer.learning_rate = 0.0000625
25 | tf.train.AdamOptimizer.epsilon = 0.00015
26 |
27 | atari_lib.create_atari_environment.game_name = 'Pong'
28 | # Deterministic ALE version used in the AAAI paper.
29 | atari_lib.create_atari_environment.sticky_actions = False
30 | create_agent.agent_name = 'rainbow'
31 | Runner.num_iterations = 120
32 | Runner.training_steps = 250000 # agent steps
33 | Runner.evaluation_steps = 125000 # agent steps
34 | Runner.max_steps_per_episode = 27000 # agent steps
35 |
36 | AtariPreprocessing.terminal_on_life_loss = True
37 |
38 | WrappedPrioritizedReplayBuffer.replay_capacity = 1000000
39 | WrappedPrioritizedReplayBuffer.batch_size = 64
40 |
--------------------------------------------------------------------------------
/dopamine/agents/rainbow/configs/rainbow_our_second_paper.gin:
--------------------------------------------------------------------------------
1 | # Hyperparameters follow Hessel et al. (2018).
2 | import dopamine.agents.rainbow.rainbow_agent
3 | import dopamine.discrete_domains.atari_lib
4 | import dopamine.discrete_domains.run_experiment
5 | import dopamine.replay_memory.prioritized_replay_buffer
6 | import gin.tf.external_configurables
7 |
8 | RainbowAgent.num_atoms = 51
9 | RainbowAgent.vmax = 10.
10 | RainbowAgent.gamma = 0.99
11 | RainbowAgent.mu = 0.0
12 | RainbowAgent.nu = 0.0
13 | RainbowAgent.update_horizon = 3
14 | RainbowAgent.min_replay_history = 20000 # agent steps
15 | RainbowAgent.update_period = 4
16 | RainbowAgent.target_update_period = 8000 # agent steps
17 | RainbowAgent.epsilon_train = 0.01
18 | RainbowAgent.epsilon_eval = 0.001
19 | RainbowAgent.epsilon_decay_period = 250000 # agent steps
20 | RainbowAgent.replay_scheme = 'prioritized'
21 | RainbowAgent.tf_device = '/gpu:0' # use '/cpu:*' for non-GPU version
22 | RainbowAgent.optimizer = @tf.train.AdamOptimizer()
23 |
24 | # Note these parameters are different from C51's.
25 | tf.train.AdamOptimizer.learning_rate = 0.0000625
26 | tf.train.AdamOptimizer.epsilon = 0.00015
27 |
28 | atari_lib.create_atari_environment.game_name = 'Pong'
29 | # Deterministic ALE version used in the AAAI paper.
30 | atari_lib.create_atari_environment.sticky_actions = False
31 | create_agent.agent_name = 'rainbow'
32 | Runner.num_iterations = 250
33 | Runner.training_steps = 250000 # agent steps
34 | Runner.evaluation_steps = 125000 # agent steps
35 | Runner.max_steps_per_episode = 27000 # agent steps
36 |
37 | AtariPreprocessing.terminal_on_life_loss = True
38 |
39 | WrappedPrioritizedReplayBuffer.replay_capacity = 1000000
40 | WrappedPrioritizedReplayBuffer.batch_size = 64
41 |
--------------------------------------------------------------------------------
/dopamine/agents/rainbow/configs/rainbow_profiling.gin:
--------------------------------------------------------------------------------
1 | # Hyperparameters follow Hessel et al. (2018), except for sticky_actions,
2 | # which was False (not using sticky actions) in the original paper.
3 | import dopamine.agents.rainbow.rainbow_agent
4 | import dopamine.discrete_domains.atari_lib
5 | import dopamine.discrete_domains.run_experiment
6 | import dopamine.replay_memory.prioritized_replay_buffer
7 | import gin.tf.external_configurables
8 |
9 | RainbowAgent.num_atoms = 51
10 | RainbowAgent.vmax = 10.
11 | RainbowAgent.gamma = 0.99
12 | RainbowAgent.update_horizon = 3
13 | RainbowAgent.min_replay_history = 100 # agent steps
14 | RainbowAgent.update_period = 4
15 | RainbowAgent.target_update_period = 8000 # agent steps
16 | RainbowAgent.epsilon_fn = @dqn_agent.identity_epsilon
17 | RainbowAgent.epsilon_train = 0.0
18 | RainbowAgent.epsilon_eval = 0.0
19 | RainbowAgent.epsilon_decay_period = 250000 # agent steps
20 | RainbowAgent.replay_scheme = 'prioritized'
21 | RainbowAgent.tf_device = '/gpu:0' # use '/cpu:*' for non-GPU version
22 | RainbowAgent.optimizer = @tf.train.AdamOptimizer()
23 |
24 | # Note these parameters are different from C51's.
25 | tf.train.AdamOptimizer.learning_rate = 0.0000625
26 | tf.train.AdamOptimizer.epsilon = 0.00015
27 |
28 | atari_lib.create_atari_environment.game_name = 'Pong'
29 | # Sticky actions with probability 0.25, as suggested by (Machado et al., 2017).
30 | atari_lib.create_atari_environment.sticky_actions = True
31 | create_runner.schedule = 'continuous_train'
32 | create_agent.agent_name = 'rainbow'
33 | Runner.num_iterations = 200
34 | Runner.training_steps = 250000 # agent steps
35 | Runner.evaluation_steps = 125000 # agent steps
36 | Runner.max_steps_per_episode = 27000 # agent steps
37 |
38 | WrappedPrioritizedReplayBuffer.replay_capacity = 1000000
39 | WrappedPrioritizedReplayBuffer.batch_size = 32
40 |
--------------------------------------------------------------------------------
/dopamine/colab/README.md:
--------------------------------------------------------------------------------
1 | # Colabs
2 |
3 | This directory contains
4 | [`utils.py`](https://github.com/google/dopamine/blob/master/dopamine/colab/utils.py),
5 | which provides a number of useful utilities for loading experiment statistics.
6 |
7 | We also provide a set of colabs to help illustrate how you can use Dopamine.
8 |
9 | ## Agents
10 |
11 | In this
12 | [colab](https://colab.research.google.com/github/google/dopamine/blob/master/dopamine/colab/agents.ipynb)
13 | we illustrate how to create a new agent by either subclassing
14 | [`DQN`](https://github.com/google/dopamine/blob/master/dopamine/agents/dqn/dqn_agent.py)
15 | or by creating a new agent from scratch.
16 |
17 | ## Loading statistics
18 |
19 | In this
20 | [colab](https://colab.research.google.com/github/google/dopamine/blob/master/dopamine/colab/load_statistics.ipynb)
21 | we illustrate how to load and visualize the logs data produced by Dopamine.
22 |
23 | ## Visualizing trained agents
24 | In this
25 | [colab](https://colab.research.google.com/github/google/dopamine/blob/master/dopamine/colab/agent_visualizer.ipynb)
26 | we illustrate how to visualize a trained agent using the visualization utilities
27 | provided with Dopamine.
28 |
29 | In [this colab](https://colab.research.google.com/github/google/dopamine/blob/master/dopamine/colab/jax_agent_visualizer.ipynb)
30 | we can visualize trained agents' performance with the agents trained with the
31 | [JAX implementations](https://github.com/google/dopamine/tree/master/dopamine/jax).
32 |
33 | ## Visualizing with Tensorboard
34 | In this
35 | [colab](https://colab.research.google.com/github/google/dopamine/blob/master/dopamine/colab/tensorboard.ipynb)
36 | we illustrate how to download and visualize different agents with Tensorboard.
37 |
38 | ## Training on Cartpole
39 | In this
40 | [colab](https://colab.research.google.com/github/google/dopamine/blob/master/dopamine/colab/cartpole.ipynb)
41 | we illustrate how to train DQN and C51 on the Cartpole environment.
42 |
--------------------------------------------------------------------------------
/dopamine/colab/__init__.py:
--------------------------------------------------------------------------------
1 | # coding=utf-8
2 | # Copyright 2018 The Dopamine Authors.
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 |
16 |
--------------------------------------------------------------------------------
/dopamine/continuous_domains/__init__.py:
--------------------------------------------------------------------------------
1 | # coding=utf-8
2 | """Copyright 2021 The Dopamine Authors.
3 |
4 | Licensed under the Apache License, Version 2.0 (the "License");
5 | you may not use this file except in compliance with the License.
6 | You may obtain a copy of the License at
7 |
8 | http://www.apache.org/licenses/LICENSE-2.0
9 |
10 | Unless required by applicable law or agreed to in writing, software
11 | distributed under the License is distributed on an "AS IS" BASIS,
12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | See the License for the specific language governing permissions and
14 | limitations under the License.
15 | """
16 |
--------------------------------------------------------------------------------
/dopamine/continuous_domains/train.py:
--------------------------------------------------------------------------------
1 | # coding=utf-8
2 | # Copyright 2021 The Dopamine Authors.
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | r"""The entry point for running a Dopamine agent on continuous control envs.
16 |
17 | """
18 |
19 | from absl import app
20 | from absl import flags
21 | from absl import logging
22 |
23 | from dopamine.continuous_domains import run_experiment
24 |
25 | flags.DEFINE_string('base_dir', None,
26 | 'Base directory to host all required sub-directories.')
27 | flags.DEFINE_multi_string(
28 | 'gin_files', [], 'List of paths to gin configuration files (e.g.'
29 | '"dopamine/jax/agents/sac/configs/sac.gin").')
30 | flags.DEFINE_multi_string(
31 | 'gin_bindings', [],
32 | 'Gin bindings to override the values set in the config files.')
33 |
34 | FLAGS = flags.FLAGS
35 |
36 |
37 | def main(unused_argv):
38 | """Main method.
39 |
40 | Args:
41 | unused_argv: Arguments (unused).
42 | """
43 | logging.set_verbosity(logging.INFO)
44 | base_dir = FLAGS.base_dir
45 | gin_files = FLAGS.gin_files
46 | gin_bindings = FLAGS.gin_bindings
47 |
48 | run_experiment.load_gin_configs(gin_files, gin_bindings)
49 | runner = run_experiment.create_continuous_runner(base_dir)
50 | runner.run_experiment()
51 |
52 |
53 | if __name__ == '__main__':
54 | flags.mark_flag_as_required('base_dir')
55 | app.run(main)
56 |
--------------------------------------------------------------------------------
/dopamine/discrete_domains/__init__.py:
--------------------------------------------------------------------------------
1 | # coding=utf-8
2 | """Copyright 2018 The Dopamine Authors.
3 |
4 | Licensed under the Apache License, Version 2.0 (the "License");
5 | you may not use this file except in compliance with the License.
6 | You may obtain a copy of the License at
7 |
8 | http://www.apache.org/licenses/LICENSE-2.0
9 |
10 | Unless required by applicable law or agreed to in writing, software
11 | distributed under the License is distributed on an "AS IS" BASIS,
12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | See the License for the specific language governing permissions and
14 | limitations under the License.
15 | """
16 |
--------------------------------------------------------------------------------
/dopamine/discrete_domains/iteration_statistics.py:
--------------------------------------------------------------------------------
1 | # coding=utf-8
2 | # Copyright 2018 The Dopamine Authors.
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | """A class for storing iteration-specific metrics.
16 | """
17 |
18 | from __future__ import absolute_import
19 | from __future__ import division
20 | from __future__ import print_function
21 |
22 |
23 | class IterationStatistics(object):
24 | """A class for storing iteration-specific metrics.
25 |
26 | The internal format is as follows: we maintain a mapping from keys to lists.
27 | Each list contains all the values corresponding to the given key.
28 |
29 | For example, self.data_lists['train_episode_returns'] might contain the
30 | per-episode returns achieved during this iteration.
31 |
32 | Attributes:
33 | data_lists: dict mapping each metric_name (str) to a list of said metric
34 | across episodes.
35 | """
36 |
37 | def __init__(self):
38 | self.data_lists = {}
39 |
40 | def append(self, data_pairs):
41 | """Add the given values to their corresponding key-indexed lists.
42 |
43 | Args:
44 | data_pairs: A dictionary of key-value pairs to be recorded.
45 | """
46 | for key, value in data_pairs.items():
47 | if key not in self.data_lists:
48 | self.data_lists[key] = []
49 | self.data_lists[key].append(value)
50 |
--------------------------------------------------------------------------------
/dopamine/discrete_domains/train.py:
--------------------------------------------------------------------------------
1 | # coding=utf-8
2 | # Lint as: python3
3 | # Copyright 2018 The Dopamine Authors.
4 | #
5 | # Licensed under the Apache License, Version 2.0 (the "License");
6 | # you may not use this file except in compliance with the License.
7 | # You may obtain a copy of the License at
8 | #
9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | r"""The entry point for running a Dopamine agent.
17 |
18 | """
19 |
20 | from absl import app
21 | from absl import flags
22 | from absl import logging
23 |
24 | from dopamine.discrete_domains import run_experiment
25 | import tensorflow as tf
26 |
27 |
28 | flags.DEFINE_string('base_dir', None,
29 | 'Base directory to host all required sub-directories.')
30 | flags.DEFINE_multi_string(
31 | 'gin_files', [], 'List of paths to gin configuration files (e.g.'
32 | '"dopamine/agents/dqn/dqn.gin").')
33 | flags.DEFINE_multi_string(
34 | 'gin_bindings', [],
35 | 'Gin bindings to override the values set in the config files '
36 | '(e.g. "DQNAgent.epsilon_train=0.1",'
37 | ' "create_environment.game_name="Pong"").')
38 |
39 |
40 | FLAGS = flags.FLAGS
41 |
42 |
43 |
44 |
45 | def main(unused_argv):
46 | """Main method.
47 |
48 | Args:
49 | unused_argv: Arguments (unused).
50 | """
51 | logging.set_verbosity(logging.INFO)
52 | tf.compat.v1.disable_v2_behavior()
53 |
54 | base_dir = FLAGS.base_dir
55 | gin_files = FLAGS.gin_files
56 | gin_bindings = FLAGS.gin_bindings
57 | run_experiment.load_gin_configs(gin_files, gin_bindings)
58 | runner = run_experiment.create_runner(base_dir)
59 | runner.run_experiment()
60 |
61 |
62 | if __name__ == '__main__':
63 | flags.mark_flag_as_required('base_dir')
64 | app.run(main)
65 |
--------------------------------------------------------------------------------
/dopamine/jax/__init__.py:
--------------------------------------------------------------------------------
1 | # coding=utf-8
2 | # Copyright 2018 The Dopamine Authors.
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 |
16 |
--------------------------------------------------------------------------------
/dopamine/jax/agents/__init__.py:
--------------------------------------------------------------------------------
1 | # coding=utf-8
2 | # Copyright 2018 The Dopamine Authors.
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 |
16 |
--------------------------------------------------------------------------------
/dopamine/jax/agents/dqn/__init__.py:
--------------------------------------------------------------------------------
1 | # coding=utf-8
2 | # Copyright 2018 The Dopamine Authors.
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 |
16 |
--------------------------------------------------------------------------------
/dopamine/jax/agents/dqn/configs/dqn.gin:
--------------------------------------------------------------------------------
1 | # Hyperparameters follow the classic Nature DQN, but we modify as necessary to
2 | # match those used in Rainbow (Hessel et al., 2018), to ensure apples-to-apples
3 | # comparison.
4 | import dopamine.discrete_domains.atari_lib
5 | import dopamine.discrete_domains.run_experiment
6 | import dopamine.jax.agents.dqn.dqn_agent
7 | import dopamine.replay_memory.circular_replay_buffer
8 |
9 | JaxDQNAgent.gamma = 0.99
10 | JaxDQNAgent.update_horizon = 1
11 | JaxDQNAgent.min_replay_history = 20000 # agent steps
12 | JaxDQNAgent.update_period = 4
13 | JaxDQNAgent.target_update_period = 8000 # agent steps
14 | JaxDQNAgent.epsilon_train = 0.01
15 | JaxDQNAgent.epsilon_eval = 0.001
16 | JaxDQNAgent.epsilon_decay_period = 250000 # agent steps
17 | # Note: We are using the Adam optimizer by default for JaxDQN, which differs
18 | # from the original NatureDQN and the dopamine TensorFlow version. In
19 | # the experiments we have ran, we have found that using Adam yields
20 | # improved training performance.
21 | JaxDQNAgent.optimizer = 'adam'
22 | create_optimizer.learning_rate = 6.25e-5
23 | create_optimizer.eps = 1.5e-4
24 |
25 | atari_lib.create_atari_environment.game_name = 'Pong'
26 | # Sticky actions with probability 0.25, as suggested by (Machado et al., 2017).
27 | atari_lib.create_atari_environment.sticky_actions = True
28 | create_runner.schedule = 'continuous_train'
29 | create_agent.agent_name = 'jax_dqn'
30 | create_agent.debug_mode = True
31 | Runner.num_iterations = 200
32 | Runner.training_steps = 250000 # agent steps
33 | Runner.evaluation_steps = 125000 # agent steps
34 | Runner.max_steps_per_episode = 27000 # agent steps
35 |
36 | OutOfGraphReplayBuffer.replay_capacity = 1000000
37 | OutOfGraphReplayBuffer.batch_size = 32
38 |
--------------------------------------------------------------------------------
/dopamine/jax/agents/dqn/configs/dqn_acrobot.gin:
--------------------------------------------------------------------------------
1 | # Hyperparameters for a simple DQN-style Acrobot agent. The hyperparameters
2 | # chosen achieve reasonable performance.
3 | import dopamine.discrete_domains.gym_lib
4 | import dopamine.discrete_domains.run_experiment
5 | import dopamine.jax.agents.dqn.dqn_agent
6 | import dopamine.jax.networks
7 | import dopamine.replay_memory.circular_replay_buffer
8 | import flax
9 |
10 | JaxDQNAgent.observation_shape = %gym_lib.ACROBOT_OBSERVATION_SHAPE
11 | JaxDQNAgent.observation_dtype = %jax_networks.ACROBOT_OBSERVATION_DTYPE
12 | JaxDQNAgent.stack_size = %gym_lib.ACROBOT_STACK_SIZE
13 | JaxDQNAgent.network = @networks.ClassicControlDQNNetwork
14 | JaxDQNAgent.gamma = 0.99
15 | JaxDQNAgent.update_horizon = 1
16 | JaxDQNAgent.min_replay_history = 500
17 | JaxDQNAgent.update_period = 4
18 | JaxDQNAgent.target_update_period = 100
19 | JaxDQNAgent.epsilon_fn = @dqn_agent.identity_epsilon
20 | JaxDQNAgent.optimizer = 'adam'
21 | create_optimizer.learning_rate = 0.001
22 | create_optimizer.eps = 3.125e-4
23 |
24 | ClassicControlDQNNetwork.min_vals = %jax_networks.ACROBOT_MIN_VALS
25 | ClassicControlDQNNetwork.max_vals = %jax_networks.ACROBOT_MAX_VALS
26 |
27 | create_gym_environment.environment_name = 'Acrobot'
28 | create_gym_environment.version = 'v1'
29 | create_runner.schedule = 'continuous_train'
30 | create_agent.agent_name = 'jax_dqn'
31 | create_agent.debug_mode = True
32 | TrainRunner.create_environment_fn = @gym_lib.create_gym_environment
33 | Runner.num_iterations = 500
34 | Runner.training_steps = 1000
35 | Runner.evaluation_steps = 1000
36 | Runner.max_steps_per_episode = 500
37 |
38 | OutOfGraphReplayBuffer.replay_capacity = 50000
39 | OutOfGraphReplayBuffer.batch_size = 128
40 |
--------------------------------------------------------------------------------
/dopamine/jax/agents/dqn/configs/dqn_cartpole.gin:
--------------------------------------------------------------------------------
1 | # Hyperparameters for a simple DQN-style Cartpole agent. The hyperparameters
2 | # chosen achieve reasonable performance.
3 | import dopamine.discrete_domains.gym_lib
4 | import dopamine.discrete_domains.run_experiment
5 | import dopamine.jax.agents.dqn.dqn_agent
6 | import dopamine.jax.networks
7 | import dopamine.replay_memory.circular_replay_buffer
8 | import flax
9 |
10 | JaxDQNAgent.observation_shape = %gym_lib.CARTPOLE_OBSERVATION_SHAPE
11 | JaxDQNAgent.observation_dtype = %jax_networks.CARTPOLE_OBSERVATION_DTYPE
12 | JaxDQNAgent.stack_size = %gym_lib.CARTPOLE_STACK_SIZE
13 | JaxDQNAgent.network = @networks.ClassicControlDQNNetwork
14 | JaxDQNAgent.gamma = 0.99
15 | JaxDQNAgent.update_horizon = 1
16 | JaxDQNAgent.min_replay_history = 500
17 | JaxDQNAgent.update_period = 4
18 | JaxDQNAgent.target_update_period = 100
19 | JaxDQNAgent.epsilon_fn = @dqn_agent.identity_epsilon
20 | JaxDQNAgent.optimizer = 'adam'
21 | create_optimizer.learning_rate = 0.001
22 | create_optimizer.eps = 3.125e-4
23 |
24 | ClassicControlDQNNetwork.min_vals = %jax_networks.CARTPOLE_MIN_VALS
25 | ClassicControlDQNNetwork.max_vals = %jax_networks.CARTPOLE_MAX_VALS
26 |
27 | create_gym_environment.environment_name = 'CartPole'
28 | create_gym_environment.version = 'v0'
29 | create_runner.schedule = 'continuous_train'
30 | create_agent.agent_name = 'jax_dqn'
31 | create_agent.debug_mode = True
32 | TrainRunner.create_environment_fn = @gym_lib.create_gym_environment
33 | Runner.num_iterations = 500
34 | Runner.training_steps = 1000
35 | Runner.evaluation_steps = 1000
36 | Runner.max_steps_per_episode = 200 # Default max episode length.
37 |
38 | OutOfGraphReplayBuffer.replay_capacity = 50000
39 | OutOfGraphReplayBuffer.batch_size = 128
40 |
--------------------------------------------------------------------------------
/dopamine/jax/agents/dqn/configs/dqn_lunarlander.gin:
--------------------------------------------------------------------------------
1 | # Hyperparameters for a simple DQN-style LunarLander agent. The hyperparameters
2 | # chosen achieve reasonable performance.
3 | import dopamine.discrete_domains.gym_lib
4 | import dopamine.discrete_domains.run_experiment
5 | import dopamine.jax.agents.dqn.dqn_agent
6 | import dopamine.jax.networks
7 | import dopamine.replay_memory.circular_replay_buffer
8 | import flax
9 |
10 | JaxDQNAgent.observation_shape = %gym_lib.LUNAR_OBSERVATION_SHAPE
11 | JaxDQNAgent.observation_dtype = %jax_networks.LUNAR_OBSERVATION_DTYPE
12 | JaxDQNAgent.stack_size = %gym_lib.LUNAR_STACK_SIZE
13 | JaxDQNAgent.network = @networks.ClassicControlDQNNetwork
14 | JaxDQNAgent.gamma = 0.99
15 | JaxDQNAgent.update_horizon = 1
16 | JaxDQNAgent.min_replay_history = 500
17 | JaxDQNAgent.update_period = 4
18 | JaxDQNAgent.target_update_period = 100
19 | JaxDQNAgent.epsilon_fn = @dqn_agent.identity_epsilon
20 | JaxDQNAgent.optimizer = 'adam'
21 | create_optimizer.learning_rate = 0.001
22 | create_optimizer.eps = 3.125e-4
23 |
24 | create_gym_environment.environment_name = 'LunarLander'
25 | create_gym_environment.version = 'v2'
26 | create_runner.schedule = 'continuous_train'
27 | create_agent.agent_name = 'jax_dqn'
28 | create_agent.debug_mode = True
29 | TrainRunner.create_environment_fn = @gym_lib.create_gym_environment
30 | Runner.num_iterations = 125
31 | Runner.training_steps = 4000
32 | Runner.evaluation_steps = 1000
33 | Runner.max_steps_per_episode = 1000
34 |
35 | OutOfGraphReplayBuffer.replay_capacity = 50000
36 | OutOfGraphReplayBuffer.batch_size = 128
37 |
--------------------------------------------------------------------------------
/dopamine/jax/agents/dqn/configs/dqn_mountaincar.gin:
--------------------------------------------------------------------------------
1 | # Hyperparameters for a simple DQN-style MountainCar agent. The hyperparameters
2 | # chosen achieve reasonable performance.
3 | import dopamine.discrete_domains.gym_lib
4 | import dopamine.discrete_domains.run_experiment
5 | import dopamine.jax.agents.dqn.dqn_agent
6 | import dopamine.jax.networks
7 | import dopamine.replay_memory.circular_replay_buffer
8 | import flax
9 |
10 | JaxDQNAgent.observation_shape = %gym_lib.MOUNTAINCAR_OBSERVATION_SHAPE
11 | JaxDQNAgent.observation_dtype = %jax_networks.MOUNTAINCAR_OBSERVATION_DTYPE
12 | JaxDQNAgent.stack_size = %gym_lib.MOUNTAINCAR_STACK_SIZE
13 | JaxDQNAgent.network = @networks.ClassicControlDQNNetwork
14 | JaxDQNAgent.gamma = 0.99
15 | JaxDQNAgent.update_horizon = 1
16 | JaxDQNAgent.min_replay_history = 500
17 | JaxDQNAgent.update_period = 4
18 | JaxDQNAgent.target_update_period = 100
19 | JaxDQNAgent.epsilon_fn = @dqn_agent.identity_epsilon
20 | JaxDQNAgent.optimizer = 'adam'
21 | create_optimizer.learning_rate = 0.001
22 | create_optimizer.eps = 3.125e-4
23 |
24 | ClassicControlDQNNetwork.min_vals = %jax_networks.MOUNTAINCAR_MIN_VALS
25 | ClassicControlDQNNetwork.max_vals = %jax_networks.MOUNTAINCAR_MAX_VALS
26 |
27 | create_gym_environment.environment_name = 'MountainCar'
28 | create_gym_environment.version = 'v0'
29 | create_runner.schedule = 'continuous_train'
30 | create_agent.agent_name = 'jax_dqn'
31 | create_agent.debug_mode = True
32 | TrainRunner.create_environment_fn = @gym_lib.create_gym_environment
33 | Runner.num_iterations = 500
34 | Runner.training_steps = 1000
35 | Runner.max_steps_per_episode = 600 # Default max episode length.
36 |
37 | OutOfGraphReplayBuffer.replay_capacity = 50000
38 | OutOfGraphReplayBuffer.batch_size = 128
39 |
--------------------------------------------------------------------------------
/dopamine/jax/agents/dqn/configs/dqn_profiling.gin:
--------------------------------------------------------------------------------
1 | # Hyperparameters follow the classic Nature DQN, but we modify as necessary to
2 | # match those used in Rainbow (Hessel et al., 2018), to ensure apples-to-apples
3 | # comparison.
4 | import dopamine.discrete_domains.atari_lib
5 | import dopamine.discrete_domains.run_experiment
6 | import dopamine.jax.agents.dqn.dqn_agent
7 | import dopamine.replay_memory.circular_replay_buffer
8 |
9 | JaxDQNAgent.gamma = 0.99
10 | JaxDQNAgent.update_horizon = 1
11 | JaxDQNAgent.min_replay_history = 100 # agent steps
12 | JaxDQNAgent.update_period = 4
13 | JaxDQNAgent.target_update_period = 8000 # agent steps
14 | JaxDQNAgent.epsilon_fn = @dqn_agent.identity_epsilon
15 | JaxDQNAgent.epsilon_train = 0.0
16 | JaxDQNAgent.epsilon_eval = 0.0
17 |
18 | atari_lib.create_atari_environment.game_name = 'Pong'
19 | # Sticky actions with probability 0.25, as suggested by (Machado et al., 2017).
20 | atari_lib.create_atari_environment.sticky_actions = True
21 | create_runner.schedule = 'continuous_train'
22 | create_agent.agent_name = 'jax_dqn'
23 | create_agent.debug_mode = True
24 | Runner.num_iterations = 200
25 | Runner.training_steps = 250000 # agent steps
26 | Runner.evaluation_steps = 125000 # agent steps
27 | Runner.max_steps_per_episode = 27000 # agent steps
28 |
29 | OutOfGraphReplayBuffer.replay_capacity = 1000000
30 | OutOfGraphReplayBuffer.batch_size = 32
31 |
--------------------------------------------------------------------------------
/dopamine/jax/agents/full_rainbow/__init__.py:
--------------------------------------------------------------------------------
1 | # coding=utf-8
2 | # Copyright 2018 The Dopamine Authors.
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 |
16 |
--------------------------------------------------------------------------------
/dopamine/jax/agents/full_rainbow/configs/full_rainbow.gin:
--------------------------------------------------------------------------------
1 | # Hyperparameters follow Hessel et al. (2018), except for sticky_actions,
2 | # which was False (not using sticky actions) in the original paper.
3 | import dopamine.jax.agents.full_rainbow.full_rainbow_agent
4 | import dopamine.jax.agents.dqn.dqn_agent
5 | import dopamine.jax.networks
6 | import dopamine.discrete_domains.atari_lib
7 | import dopamine.discrete_domains.run_experiment
8 | import dopamine.replay_memory.prioritized_replay_buffer
9 |
10 | JaxDQNAgent.gamma = 0.99
11 | JaxDQNAgent.update_horizon = 3
12 | JaxDQNAgent.min_replay_history = 20000 # agent steps
13 | JaxDQNAgent.update_period = 4
14 | JaxDQNAgent.target_update_period = 8000 # agent steps
15 | JaxDQNAgent.epsilon_train = 0.01
16 | JaxDQNAgent.epsilon_eval = 0.001
17 | JaxDQNAgent.epsilon_decay_period = 250000 # agent steps
18 | JaxDQNAgent.optimizer = 'adam'
19 |
20 | JaxFullRainbowAgent.noisy = True
21 | JaxFullRainbowAgent.dueling = True
22 | JaxFullRainbowAgent.double_dqn = True
23 | JaxFullRainbowAgent.num_atoms = 51
24 | JaxFullRainbowAgent.vmax = 10.
25 | JaxFullRainbowAgent.replay_scheme = 'prioritized'
26 |
27 | # Note these parameters are different from C51's.
28 | create_optimizer.learning_rate = 0.0000625
29 | create_optimizer.eps = 0.00015
30 |
31 | atari_lib.create_atari_environment.game_name = 'Pong'
32 | # Sticky actions with probability 0.25, as suggested by (Machado et al., 2017).
33 | atari_lib.create_atari_environment.sticky_actions = True
34 | create_runner.schedule = 'continuous_train'
35 | create_agent.agent_name = 'full_rainbow'
36 | create_agent.debug_mode = True
37 | Runner.num_iterations = 200
38 | Runner.training_steps = 250000 # agent steps
39 | Runner.evaluation_steps = 125000 # agent steps
40 | Runner.max_steps_per_episode = 27000 # agent steps
41 |
42 | OutOfGraphPrioritizedReplayBuffer.replay_capacity = 1000000
43 | OutOfGraphPrioritizedReplayBuffer.batch_size = 32
44 |
--------------------------------------------------------------------------------
/dopamine/jax/agents/full_rainbow/configs/full_rainbow_profiling.gin:
--------------------------------------------------------------------------------
1 | # Hyperparameters follow Hessel et al. (2018), except for sticky_actions,
2 | # which was False (not using sticky actions) in the original paper.
3 | import dopamine.jax.agents.full_rainbow.full_rainbow_agent
4 | import dopamine.jax.agents.dqn.dqn_agent
5 | import dopamine.jax.networks
6 | import dopamine.discrete_domains.atari_lib
7 | import dopamine.discrete_domains.run_experiment
8 | import dopamine.replay_memory.prioritized_replay_buffer
9 |
10 | JaxDQNAgent.gamma = 0.99
11 | JaxDQNAgent.update_horizon = 3
12 | JaxDQNAgent.min_replay_history = 100 # agent steps
13 | JaxDQNAgent.update_period = 4
14 | JaxDQNAgent.target_update_period = 8000 # agent steps
15 | JaxDQNAgent.epsilon_train = 0.0
16 | JaxDQNAgent.epsilon_eval = 0.0
17 | JaxDQNAgent.epsilon_decay_period = 250000 # agent steps
18 | JaxDQNAgent.optimizer = 'adam'
19 |
20 | JaxFullRainbowAgent.num_atoms = 51
21 | JaxFullRainbowAgent.vmax = 10.
22 | JaxFullRainbowAgent.replay_scheme = 'prioritized'
23 | JaxFullRainbowAgent.noisy = True
24 | JaxFullRainbowAgent.dueling = True
25 | JaxFullRainbowAgent.double_dqn = True
26 | JaxFullRainbowAgent.epsilon_fn = @dqn_agent.identity_epsilon
27 |
28 | # Note these parameters are different from C51's.
29 | create_optimizer.learning_rate = 0.0000625
30 | create_optimizer.eps = 0.00015
31 |
32 | atari_lib.create_atari_environment.game_name = 'Pong'
33 | # Sticky actions with probability 0.25, as suggested by (Machado et al., 2017).
34 | atari_lib.create_atari_environment.sticky_actions = True
35 | create_runner.schedule = 'continuous_train'
36 | create_agent.agent_name = 'jax_rainbow'
37 | create_agent.debug_mode = True
38 | Runner.num_iterations = 200
39 | Runner.training_steps = 250000 # agent steps
40 | Runner.evaluation_steps = 125000 # agent steps
41 | Runner.max_steps_per_episode = 27000 # agent steps
42 |
43 | OutOfGraphPrioritizedReplayBuffer.replay_capacity = 1000000
44 | OutOfGraphPrioritizedReplayBuffer.batch_size = 32
45 |
--------------------------------------------------------------------------------
/dopamine/jax/agents/implicit_quantile/__init__.py:
--------------------------------------------------------------------------------
1 | # coding=utf-8
2 | # Copyright 2018 The Dopamine Authors.
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 |
16 |
--------------------------------------------------------------------------------
/dopamine/jax/agents/implicit_quantile/configs/implicit_quantile.gin:
--------------------------------------------------------------------------------
1 | # Hyperparameters follow Dabney et al. (2018), but we modify as necessary to
2 | # match those used in Rainbow (Hessel et al., 2018), to ensure apples-to-apples
3 | # comparison.
4 | import dopamine.jax.agents.implicit_quantile.implicit_quantile_agent
5 | import dopamine.discrete_domains.atari_lib
6 | import dopamine.discrete_domains.run_experiment
7 | import dopamine.replay_memory.circular_replay_buffer
8 |
9 | JaxImplicitQuantileAgent.kappa = 1.0
10 | JaxImplicitQuantileAgent.num_tau_samples = 64
11 | JaxImplicitQuantileAgent.num_tau_prime_samples = 64
12 | JaxImplicitQuantileAgent.num_quantile_samples = 32
13 | JaxImplicitQuantileAgent.gamma = 0.99
14 | JaxImplicitQuantileAgent.update_horizon = 3
15 | JaxImplicitQuantileAgent.min_replay_history = 20000 # agent steps
16 | JaxImplicitQuantileAgent.update_period = 4
17 | JaxImplicitQuantileAgent.target_update_period = 8000 # agent steps
18 | JaxImplicitQuantileAgent.epsilon_train = 0.01
19 | JaxImplicitQuantileAgent.epsilon_eval = 0.001
20 | JaxImplicitQuantileAgent.epsilon_decay_period = 250000 # agent steps
21 | JaxImplicitQuantileAgent.optimizer = 'adam'
22 | create_optimizer.learning_rate = 0.00005
23 | create_optimizer.eps = 0.0003125
24 |
25 | atari_lib.create_atari_environment.game_name = 'Pong'
26 | # Sticky actions with probability 0.25, as suggested by (Machado et al., 2017).
27 | atari_lib.create_atari_environment.sticky_actions = True
28 | create_runner.schedule = 'continuous_train'
29 | create_agent.agent_name = 'jax_implicit_quantile'
30 | create_agent.debug_mode = True
31 | Runner.num_iterations = 200
32 | Runner.training_steps = 250000
33 | Runner.evaluation_steps = 125000
34 | Runner.max_steps_per_episode = 27000
35 |
36 | OutOfGraphReplayBuffer.replay_capacity = 1000000
37 | OutOfGraphReplayBuffer.batch_size = 32
38 |
--------------------------------------------------------------------------------
/dopamine/jax/agents/implicit_quantile/configs/implicit_quantile_profiling.gin:
--------------------------------------------------------------------------------
1 | # Hyperparameters follow Dabney et al. (2018), but we modify as necessary to
2 | # match those used in Rainbow (Hessel et al., 2018), to ensure apples-to-apples
3 | # comparison.
4 | import dopamine.jax.agents.implicit_quantile.implicit_quantile_agent
5 | import dopamine.discrete_domains.atari_lib
6 | import dopamine.discrete_domains.run_experiment
7 | import dopamine.jax.agents.dqn.dqn_agent
8 | import dopamine.replay_memory.circular_replay_buffer
9 |
10 | JaxImplicitQuantileAgent.kappa = 1.0
11 | JaxImplicitQuantileAgent.num_tau_samples = 64
12 | JaxImplicitQuantileAgent.num_tau_prime_samples = 64
13 | JaxImplicitQuantileAgent.num_quantile_samples = 32
14 | JaxImplicitQuantileAgent.gamma = 0.99
15 | JaxImplicitQuantileAgent.update_horizon = 3
16 | JaxImplicitQuantileAgent.min_replay_history = 100 # agent steps
17 | JaxImplicitQuantileAgent.update_period = 4
18 | JaxImplicitQuantileAgent.target_update_period = 8000 # agent steps
19 | JaxImplicitQuantileAgent.epsilon_fn = @dqn_agent.identity_epsilon
20 | JaxImplicitQuantileAgent.epsilon_train = 0.0
21 | JaxImplicitQuantileAgent.epsilon_eval = 0.0
22 | JaxImplicitQuantileAgent.optimizer = 'adam'
23 | create_optimizer.learning_rate = 0.00005
24 | create_optimizer.eps = 0.0003125
25 |
26 | atari_lib.create_atari_environment.game_name = 'Pong'
27 | # Sticky actions with probability 0.25, as suggested by (Machado et al., 2017).
28 | atari_lib.create_atari_environment.sticky_actions = True
29 | create_runner.schedule = 'continuous_train'
30 | create_agent.agent_name = 'jax_implicit_quantile'
31 | create_agent.debug_mode = True
32 | Runner.num_iterations = 200
33 | Runner.training_steps = 250000
34 | Runner.evaluation_steps = 125000
35 | Runner.max_steps_per_episode = 27000
36 |
37 | OutOfGraphReplayBuffer.replay_capacity = 1000000
38 | OutOfGraphReplayBuffer.batch_size = 32
39 |
--------------------------------------------------------------------------------
/dopamine/jax/agents/quantile/__init__.py:
--------------------------------------------------------------------------------
1 | # coding=utf-8
2 | # Copyright 2018 The Dopamine Authors.
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 |
16 |
--------------------------------------------------------------------------------
/dopamine/jax/agents/quantile/configs/quantile.gin:
--------------------------------------------------------------------------------
1 | # Hyperparameters follow Dabney et al. (2017) but we modify as necessary to
2 | # match those used in Rainbow (Hessel et al., 2018), to ensure apples-to-apples
3 | # comparison.
4 | import dopamine.jax.agents.quantile.quantile_agent
5 | import dopamine.discrete_domains.atari_lib
6 | import dopamine.discrete_domains.run_experiment
7 | import dopamine.replay_memory.prioritized_replay_buffer
8 |
9 | JaxQuantileAgent.kappa = 1.0
10 | JaxQuantileAgent.num_atoms = 200
11 | JaxQuantileAgent.gamma = 0.99
12 | JaxQuantileAgent.update_horizon = 3
13 | JaxQuantileAgent.min_replay_history = 20000 # agent steps
14 | JaxQuantileAgent.update_period = 4
15 | JaxQuantileAgent.target_update_period = 8000 # agent steps
16 | JaxQuantileAgent.epsilon_train = 0.01
17 | JaxQuantileAgent.epsilon_eval = 0.001
18 | JaxQuantileAgent.epsilon_decay_period = 250000 # agent steps
19 | JaxQuantileAgent.replay_scheme = 'prioritized'
20 | JaxQuantileAgent.optimizer = 'adam'
21 |
22 | create_optimizer.learning_rate = 0.00005
23 | create_optimizer.eps = 0.0003125
24 |
25 | atari_lib.create_atari_environment.game_name = 'Pong'
26 | atari_lib.create_atari_environment.sticky_actions = True
27 | create_runner.schedule = 'continuous_train'
28 | create_agent.agent_name = 'jax_quantile'
29 | create_agent.debug_mode = True
30 | Runner.num_iterations = 200
31 | Runner.training_steps = 250000
32 | Runner.evaluation_steps = 125000
33 | Runner.max_steps_per_episode = 27000
34 |
35 | OutOfGraphPrioritizedReplayBuffer.replay_capacity = 1000000
36 | OutOfGraphPrioritizedReplayBuffer.batch_size = 32
37 |
--------------------------------------------------------------------------------
/dopamine/jax/agents/quantile/configs/quantile_profiling.gin:
--------------------------------------------------------------------------------
1 | import dopamine.jax.agents.quantile.quantile_agent
2 | import dopamine.discrete_domains.atari_lib
3 | import dopamine.discrete_domains.run_experiment
4 | import dopamine.jax.agents.dqn.dqn_agent
5 | import dopamine.replay_memory.prioritized_replay_buffer
6 |
7 | JaxQuantileAgent.kappa = 1.0
8 | JaxQuantileAgent.num_atoms = 200
9 | JaxQuantileAgent.gamma = 0.99
10 | JaxQuantileAgent.update_horizon = 3
11 | JaxQuantileAgent.min_replay_history = 100 # agent steps
12 | JaxQuantileAgent.update_period = 4
13 | JaxQuantileAgent.target_update_period = 8000 # agent steps
14 | JaxQuantileAgent.epsilon_fn = @dqn_agent.identity_epsilon
15 | JaxQuantileAgent.epsilon_train = 0.0
16 | JaxQuantileAgent.epsilon_eval = 0.0
17 | JaxQuantileAgent.replay_scheme = 'prioritized'
18 | JaxQuantileAgent.optimizer = 'adam'
19 |
20 | create_optimizer.learning_rate = 0.00005
21 | create_optimizer.eps = 0.0003125
22 |
23 | atari_lib.create_atari_environment.game_name = 'Pong'
24 | atari_lib.create_atari_environment.sticky_actions = True
25 | create_runner.schedule = 'continuous_train'
26 | create_agent.agent_name = 'jax_quantile'
27 | create_agent.debug_mode = True
28 | Runner.num_iterations = 200
29 | Runner.training_steps = 250000
30 | Runner.evaluation_steps = 125000
31 | Runner.max_steps_per_episode = 27000
32 |
33 | OutOfGraphPrioritizedReplayBuffer.replay_capacity = 1000000
34 | OutOfGraphPrioritizedReplayBuffer.batch_size = 32
35 |
--------------------------------------------------------------------------------
/dopamine/jax/agents/rainbow/__init__.py:
--------------------------------------------------------------------------------
1 | # coding=utf-8
2 | # Copyright 2018 The Dopamine Authors.
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 |
16 |
--------------------------------------------------------------------------------
/dopamine/jax/agents/rainbow/configs/c51.gin:
--------------------------------------------------------------------------------
1 | # Hyperparameters follow the settings from Bellemare et al. (2017), but we
2 | # modify as necessary to match those used in Rainbow (Hessel et al., 2018), to
3 | # ensure apples-to-apples comparison.
4 | import dopamine.jax.agents.rainbow.rainbow_agent
5 | import dopamine.discrete_domains.atari_lib
6 | import dopamine.discrete_domains.run_experiment
7 | import dopamine.replay_memory.prioritized_replay_buffer
8 |
9 | JaxRainbowAgent.num_atoms = 51
10 | JaxRainbowAgent.vmax = 10.
11 | JaxRainbowAgent.gamma = 0.99
12 | JaxRainbowAgent.update_horizon = 1
13 | JaxRainbowAgent.min_replay_history = 20000 # agent steps
14 | JaxRainbowAgent.update_period = 4
15 | JaxRainbowAgent.target_update_period = 8000 # agent steps
16 | JaxRainbowAgent.epsilon_train = 0.01
17 | JaxRainbowAgent.epsilon_eval = 0.001
18 | JaxRainbowAgent.epsilon_decay_period = 250000 # agent steps
19 | JaxRainbowAgent.replay_scheme = 'uniform'
20 |
21 | atari_lib.create_atari_environment.game_name = 'Pong'
22 | # Sticky actions with probability 0.25, as suggested by (Machado et al., 2017).
23 | atari_lib.create_atari_environment.sticky_actions = True
24 | create_runner.schedule = 'continuous_train'
25 | create_agent.agent_name = 'jax_rainbow'
26 | create_agent.debug_mode = True
27 | Runner.num_iterations = 200
28 | Runner.training_steps = 250000 # agent steps
29 | Runner.evaluation_steps = 125000 # agent steps
30 | Runner.max_steps_per_episode = 27000 # agent steps
31 |
32 | OutOfGraphPrioritizedReplayBuffer.replay_capacity = 1000000
33 | OutOfGraphPrioritizedReplayBuffer.batch_size = 32
34 |
--------------------------------------------------------------------------------
/dopamine/jax/agents/rainbow/configs/c51_acrobot.gin:
--------------------------------------------------------------------------------
1 | # Hyperparameters for a simple C51-style Acrobot agent. The hyperparameters
2 | # chosen achieve reasonable performance.
3 | import dopamine.jax.agents.rainbow.rainbow_agent
4 | import dopamine.jax.networks
5 | import dopamine.discrete_domains.gym_lib
6 | import dopamine.discrete_domains.run_experiment
7 | import dopamine.replay_memory.prioritized_replay_buffer
8 |
9 | JaxRainbowAgent.observation_shape = %gym_lib.ACROBOT_OBSERVATION_SHAPE
10 | JaxRainbowAgent.observation_dtype = %jax_networks.ACROBOT_OBSERVATION_DTYPE
11 | JaxRainbowAgent.stack_size = %gym_lib.ACROBOT_STACK_SIZE
12 | JaxRainbowAgent.network = @networks.ClassicControlRainbowNetwork
13 | JaxRainbowAgent.num_atoms = 51
14 | JaxRainbowAgent.vmax = 10.
15 | JaxRainbowAgent.gamma = 0.99
16 | JaxRainbowAgent.update_horizon = 1
17 | JaxRainbowAgent.min_replay_history = 500
18 | JaxRainbowAgent.update_period = 4
19 | JaxRainbowAgent.target_update_period = 100
20 | JaxRainbowAgent.epsilon_fn = @dqn_agent.identity_epsilon
21 | JaxRainbowAgent.replay_scheme = 'uniform'
22 | create_optimizer.learning_rate = 0.1
23 | create_optimizer.eps = 0.0003125
24 |
25 | ClassicControlRainbowNetwork.min_vals = %jax_networks.ACROBOT_MIN_VALS
26 | ClassicControlRainbowNetwork.max_vals = %jax_networks.ACROBOT_MAX_VALS
27 |
28 | create_gym_environment.environment_name = 'Acrobot'
29 | create_gym_environment.version = 'v1'
30 | create_runner.schedule = 'continuous_train'
31 | create_agent.agent_name = 'jax_rainbow'
32 | TrainRunner.create_environment_fn = @gym_lib.create_gym_environment
33 | Runner.num_iterations = 500
34 | Runner.training_steps = 1000
35 | Runner.evaluation_steps = 1000
36 | Runner.max_steps_per_episode = 500
37 |
38 | OutOfGraphPrioritizedReplayBuffer.replay_capacity = 50000
39 | OutOfGraphPrioritizedReplayBuffer.batch_size = 128
40 |
--------------------------------------------------------------------------------
/dopamine/jax/agents/rainbow/configs/c51_cartpole.gin:
--------------------------------------------------------------------------------
1 | # Hyperparameters for a simple C51-style Cartpole agent. The hyperparameters
2 | # chosen achieve reasonable performance.
3 | import dopamine.jax.agents.rainbow.rainbow_agent
4 | import dopamine.jax.networks
5 | import dopamine.discrete_domains.gym_lib
6 | import dopamine.discrete_domains.run_experiment
7 | import dopamine.replay_memory.prioritized_replay_buffer
8 |
9 | JaxRainbowAgent.observation_shape = %gym_lib.CARTPOLE_OBSERVATION_SHAPE
10 | JaxRainbowAgent.observation_dtype = %jax_networks.CARTPOLE_OBSERVATION_DTYPE
11 | JaxRainbowAgent.stack_size = %gym_lib.CARTPOLE_STACK_SIZE
12 | JaxRainbowAgent.network = @networks.ClassicControlRainbowNetwork
13 | JaxRainbowAgent.num_atoms = 201
14 | JaxRainbowAgent.vmax = 100.
15 | JaxRainbowAgent.gamma = 0.99
16 | JaxRainbowAgent.epsilon_eval = 0.
17 | JaxRainbowAgent.epsilon_train = 0.01
18 | JaxRainbowAgent.update_horizon = 1
19 | JaxRainbowAgent.min_replay_history = 500
20 | JaxRainbowAgent.update_period = 1
21 | JaxRainbowAgent.target_update_period = 1
22 | JaxRainbowAgent.epsilon_fn = @dqn_agent.identity_epsilon
23 | JaxRainbowAgent.replay_scheme = 'uniform'
24 | create_optimizer.learning_rate = 0.00001
25 | create_optimizer.eps = 0.00000390625
26 |
27 | ClassicControlRainbowNetwork.min_vals = %jax_networks.CARTPOLE_MIN_VALS
28 | ClassicControlRainbowNetwork.max_vals = %jax_networks.CARTPOLE_MAX_VALS
29 |
30 | create_gym_environment.environment_name = 'CartPole'
31 | create_gym_environment.version = 'v0'
32 | create_runner.schedule = 'continuous_train'
33 | create_agent.agent_name = 'jax_rainbow'
34 | TrainRunner.create_environment_fn = @gym_lib.create_gym_environment
35 | Runner.num_iterations = 400
36 | Runner.training_steps = 1000
37 | Runner.evaluation_steps = 1000
38 | Runner.max_steps_per_episode = 200 # Default max episode length.
39 |
40 | OutOfGraphPrioritizedReplayBuffer.replay_capacity = 50000
41 | OutOfGraphPrioritizedReplayBuffer.batch_size = 128
42 |
--------------------------------------------------------------------------------
/dopamine/jax/agents/rainbow/configs/c51_profiling.gin:
--------------------------------------------------------------------------------
1 | # Hyperparameters follow the settings from Bellemare et al. (2017), but we
2 | # modify as necessary to match those used in Rainbow (Hessel et al., 2018), to
3 | # ensure apples-to-apples comparison.
4 | import dopamine.jax.agents.rainbow.rainbow_agent
5 | import dopamine.discrete_domains.atari_lib
6 | import dopamine.discrete_domains.run_experiment
7 | import dopamine.jax.agents.dqn.dqn_agent
8 | import dopamine.replay_memory.prioritized_replay_buffer
9 |
10 | JaxRainbowAgent.num_atoms = 51
11 | JaxRainbowAgent.vmax = 10.
12 | JaxRainbowAgent.gamma = 0.99
13 | JaxRainbowAgent.update_horizon = 1
14 | JaxRainbowAgent.min_replay_history = 100 # agent steps
15 | JaxRainbowAgent.update_period = 4
16 | JaxRainbowAgent.target_update_period = 8000 # agent steps
17 | JaxRainbowAgent.epsilon_fn = @dqn_agent.identity_epsilon
18 | JaxRainbowAgent.epsilon_train = 0.0
19 | JaxRainbowAgent.epsilon_eval = 0.0
20 | JaxRainbowAgent.replay_scheme = 'uniform'
21 |
22 | atari_lib.create_atari_environment.game_name = 'Pong'
23 | # Sticky actions with probability 0.25, as suggested by (Machado et al., 2017).
24 | atari_lib.create_atari_environment.sticky_actions = True
25 | create_runner.schedule = 'continuous_train'
26 | create_agent.agent_name = 'jax_rainbow'
27 | create_agent.debug_mode = True
28 | Runner.num_iterations = 200
29 | Runner.training_steps = 250000 # agent steps
30 | Runner.evaluation_steps = 125000 # agent steps
31 | Runner.max_steps_per_episode = 27000 # agent steps
32 |
33 | OutOfGraphPrioritizedReplayBuffer.replay_capacity = 1000000
34 | OutOfGraphPrioritizedReplayBuffer.batch_size = 32
35 |
--------------------------------------------------------------------------------
/dopamine/jax/agents/rainbow/configs/rainbow.gin:
--------------------------------------------------------------------------------
1 | # Hyperparameters follow Hessel et al. (2018), except for sticky_actions,
2 | # which was False (not using sticky actions) in the original paper.
3 | import dopamine.jax.agents.rainbow.rainbow_agent
4 | import dopamine.discrete_domains.atari_lib
5 | import dopamine.discrete_domains.run_experiment
6 | import dopamine.replay_memory.prioritized_replay_buffer
7 |
8 | JaxRainbowAgent.num_atoms = 51
9 | JaxRainbowAgent.vmax = 10.
10 | JaxRainbowAgent.gamma = 0.99
11 | JaxRainbowAgent.update_horizon = 3
12 | JaxRainbowAgent.min_replay_history = 20000 # agent steps
13 | JaxRainbowAgent.update_period = 4
14 | JaxRainbowAgent.target_update_period = 8000 # agent steps
15 | JaxRainbowAgent.epsilon_train = 0.01
16 | JaxRainbowAgent.epsilon_eval = 0.001
17 | JaxRainbowAgent.epsilon_decay_period = 250000 # agent steps
18 | JaxRainbowAgent.replay_scheme = 'prioritized'
19 |
20 | # Note these parameters are different from C51's.
21 | create_optimizer.learning_rate = 0.0000625
22 | create_optimizer.eps = 0.00015
23 |
24 | atari_lib.create_atari_environment.game_name = 'Pong'
25 | # Sticky actions with probability 0.25, as suggested by (Machado et al., 2017).
26 | atari_lib.create_atari_environment.sticky_actions = True
27 | create_runner.schedule = 'continuous_train'
28 | create_agent.agent_name = 'jax_rainbow'
29 | create_agent.debug_mode = True
30 | Runner.num_iterations = 200
31 | Runner.training_steps = 250000 # agent steps
32 | Runner.evaluation_steps = 125000 # agent steps
33 | Runner.max_steps_per_episode = 27000 # agent steps
34 |
35 | OutOfGraphPrioritizedReplayBuffer.replay_capacity = 1000000
36 | OutOfGraphPrioritizedReplayBuffer.batch_size = 32
37 |
--------------------------------------------------------------------------------
/dopamine/jax/agents/rainbow/configs/rainbow_acrobot.gin:
--------------------------------------------------------------------------------
1 | # Hyperparameters for a simple Rainbow-style Acrobot agent. The hyperparameters
2 | # chosen achieve reasonable performance.
3 | import dopamine.jax.agents.rainbow.rainbow_agent
4 | import dopamine.jax.networks
5 | import dopamine.discrete_domains.gym_lib
6 | import dopamine.discrete_domains.run_experiment
7 | import dopamine.replay_memory.prioritized_replay_buffer
8 |
9 | JaxRainbowAgent.observation_shape = %gym_lib.ACROBOT_OBSERVATION_SHAPE
10 | JaxRainbowAgent.observation_dtype = %jax_networks.ACROBOT_OBSERVATION_DTYPE
11 | JaxRainbowAgent.stack_size = %gym_lib.ACROBOT_STACK_SIZE
12 | JaxRainbowAgent.network = @networks.ClassicControlRainbowNetwork
13 | JaxRainbowAgent.num_atoms = 51
14 | JaxRainbowAgent.vmax = 10.
15 | JaxRainbowAgent.gamma = 0.99
16 | JaxRainbowAgent.update_horizon = 3
17 | JaxRainbowAgent.min_replay_history = 500
18 | JaxRainbowAgent.update_period = 4
19 | JaxRainbowAgent.target_update_period = 100
20 | JaxRainbowAgent.epsilon_fn = @dqn_agent.identity_epsilon
21 | JaxRainbowAgent.replay_scheme = 'prioritized'
22 | create_optimizer.learning_rate = 0.09
23 | create_optimizer.eps = 0.0003125
24 |
25 | ClassicControlRainbowNetwork.min_vals = %jax_networks.ACROBOT_MIN_VALS
26 | ClassicControlRainbowNetwork.max_vals = %jax_networks.ACROBOT_MAX_VALS
27 |
28 | create_gym_environment.environment_name = 'Acrobot'
29 | create_gym_environment.version = 'v1'
30 | create_runner.schedule = 'continuous_train'
31 | create_agent.agent_name = 'jax_rainbow'
32 | TrainRunner.create_environment_fn = @gym_lib.create_gym_environment
33 | Runner.num_iterations = 500
34 | Runner.training_steps = 1000
35 | Runner.evaluation_steps = 1000
36 | Runner.max_steps_per_episode = 500
37 |
38 | OutOfGraphPrioritizedReplayBuffer.replay_capacity = 50000
39 | OutOfGraphPrioritizedReplayBuffer.batch_size = 128
40 |
--------------------------------------------------------------------------------
/dopamine/jax/agents/rainbow/configs/rainbow_cartpole.gin:
--------------------------------------------------------------------------------
1 | # Hyperparameters for a simple Rainbow-style Cartpole agent. The
2 | # hyperparameters chosen achieve reasonable performance.
3 | import dopamine.jax.agents.rainbow.rainbow_agent
4 | import dopamine.jax.networks
5 | import dopamine.discrete_domains.gym_lib
6 | import dopamine.discrete_domains.run_experiment
7 | import dopamine.replay_memory.prioritized_replay_buffer
8 |
9 | JaxRainbowAgent.observation_shape = %gym_lib.CARTPOLE_OBSERVATION_SHAPE
10 | JaxRainbowAgent.observation_dtype = %jax_networks.CARTPOLE_OBSERVATION_DTYPE
11 | JaxRainbowAgent.stack_size = %gym_lib.CARTPOLE_STACK_SIZE
12 | JaxRainbowAgent.network = @networks.ClassicControlRainbowNetwork
13 | JaxRainbowAgent.num_atoms = 51
14 | JaxRainbowAgent.vmax = 10.
15 | JaxRainbowAgent.gamma = 0.99
16 | JaxRainbowAgent.update_horizon = 3
17 | JaxRainbowAgent.min_replay_history = 500
18 | JaxRainbowAgent.update_period = 4
19 | JaxRainbowAgent.target_update_period = 100
20 | JaxRainbowAgent.epsilon_fn = @dqn_agent.identity_epsilon
21 | JaxRainbowAgent.replay_scheme = 'prioritized'
22 | create_optimizer.learning_rate = 0.09
23 | create_optimizer.eps = 0.0003125
24 |
25 | ClassicControlRainbowNetwork.min_vals = %jax_networks.CARTPOLE_MIN_VALS
26 | ClassicControlRainbowNetwork.max_vals = %jax_networks.CARTPOLE_MAX_VALS
27 |
28 | create_gym_environment.environment_name = 'CartPole'
29 | create_gym_environment.version = 'v0'
30 | create_runner.schedule = 'continuous_train'
31 | create_agent.agent_name = 'jax_rainbow'
32 | TrainRunner.create_environment_fn = @gym_lib.create_gym_environment
33 | Runner.num_iterations = 500
34 | Runner.training_steps = 1000
35 | Runner.evaluation_steps = 1000
36 | Runner.max_steps_per_episode = 200 # Default max episode length.
37 |
38 | OutOfGraphPrioritizedReplayBuffer.replay_capacity = 50000
39 | OutOfGraphPrioritizedReplayBuffer.batch_size = 128
40 |
--------------------------------------------------------------------------------
/dopamine/jax/agents/rainbow/configs/rainbow_profiling.gin:
--------------------------------------------------------------------------------
1 | # Hyperparameters follow Hessel et al. (2018), except for sticky_actions,
2 | # which was False (not using sticky actions) in the original paper.
3 | import dopamine.jax.agents.rainbow.rainbow_agent
4 | import dopamine.discrete_domains.atari_lib
5 | import dopamine.discrete_domains.run_experiment
6 | import dopamine.jax.agents.dqn.dqn_agent
7 | import dopamine.replay_memory.prioritized_replay_buffer
8 |
9 | JaxRainbowAgent.num_atoms = 51
10 | JaxRainbowAgent.vmax = 10.
11 | JaxRainbowAgent.gamma = 0.99
12 | JaxRainbowAgent.update_horizon = 3
13 | JaxRainbowAgent.min_replay_history = 100 # agent steps
14 | JaxRainbowAgent.update_period = 4
15 | JaxRainbowAgent.target_update_period = 8000 # agent steps
16 | JaxRainbowAgent.epsilon_fn = @dqn_agent.identity_epsilon
17 | JaxRainbowAgent.epsilon_train = 0.0
18 | JaxRainbowAgent.epsilon_eval = 0.0
19 | JaxRainbowAgent.replay_scheme = 'prioritized'
20 |
21 | # Note these parameters are different from C51's.
22 | create_optimizer.learning_rate = 0.0000625
23 | create_optimizer.eps = 0.00015
24 |
25 | atari_lib.create_atari_environment.game_name = 'Pong'
26 | # Sticky actions with probability 0.25, as suggested by (Machado et al., 2017).
27 | atari_lib.create_atari_environment.sticky_actions = True
28 | create_runner.schedule = 'continuous_train'
29 | create_agent.agent_name = 'jax_rainbow'
30 | create_agent.debug_mode = True
31 | Runner.num_iterations = 200
32 | Runner.training_steps = 250000 # agent steps
33 | Runner.evaluation_steps = 125000 # agent steps
34 | Runner.max_steps_per_episode = 27000 # agent steps
35 |
36 | OutOfGraphPrioritizedReplayBuffer.replay_capacity = 1000000
37 | OutOfGraphPrioritizedReplayBuffer.batch_size = 32
38 |
--------------------------------------------------------------------------------
/dopamine/jax/agents/sac/__init__.py:
--------------------------------------------------------------------------------
1 | # coding=utf-8
2 | # Copyright 2021 The Dopamine Authors.
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 |
16 |
--------------------------------------------------------------------------------
/dopamine/jax/agents/sac/configs/sac.gin:
--------------------------------------------------------------------------------
1 | # Hyperparameters follow those specified in Table 1 of Appendix D in:
2 | # "Soft Actor-Critic Algorithms and Applications"
3 | # by Tuomas Haarnoja et al.
4 | # https://arxiv.org/abs/1812.05905
5 | import dopamine.continuous_domains.run_experiment
6 | import dopamine.discrete_domains.gym_lib
7 | import dopamine.jax.agents.sac.sac_agent
8 | import dopamine.jax.agents.dqn.dqn_agent
9 | import dopamine.jax.continuous_networks
10 | import dopamine.replay_memory.circular_replay_buffer
11 |
12 | SACAgent.reward_scale_factor = 0.1
13 | SACAgent.network = @continuous_networks.SACNetwork
14 | SACAgent.num_layers = 2
15 | SACAgent.hidden_units = 256
16 | SACAgent.gamma = 0.99
17 | SACAgent.update_horizon = 1
18 | SACAgent.min_replay_history = 10000 # agent steps
19 | SACAgent.update_period = 1
20 | SACAgent.target_update_type = 'soft'
21 | SACAgent.target_smoothing_coefficient = 0.005
22 | SACAgent.target_entropy = None # Defaults to -num_action_dims/2
23 | SACAgent.optimizer = 'adam'
24 | SACAgent.seed = None # Seed with the current time
25 | SACAgent.observation_dtype = %sac_agent.STATE_DTYPE
26 | create_optimizer.learning_rate = 3.0e-4
27 | create_optimizer.beta1 = 0.9
28 | create_optimizer.beta2 = 0.999
29 | create_optimizer.eps = 1.0e-8
30 |
31 | create_gym_environment.environment_name = 'HalfCheetah'
32 | create_gym_environment.version = 'v2'
33 | create_continuous_runner.schedule = 'continuous_train_and_eval'
34 | create_continuous_agent.agent_name = 'sac'
35 | ContinuousTrainRunner.create_environment_fn = @gym_lib.create_gym_environment
36 | ContinuousRunner.num_iterations = 3200
37 | ContinuousRunner.training_steps = 1000
38 | ContinuousRunner.evaluation_steps = 10000 # agent steps
39 | ContinuousRunner.max_steps_per_episode = 1000
40 | ContinuousRunner.clip_rewards = False
41 |
42 | circular_replay_buffer.OutOfGraphReplayBuffer.replay_capacity = 1000000
43 | circular_replay_buffer.OutOfGraphReplayBuffer.batch_size = 256
44 |
45 |
--------------------------------------------------------------------------------
/dopamine/jax/losses.py:
--------------------------------------------------------------------------------
1 | # coding=utf-8
2 | # Copyright 2021 The Dopamine Authors.
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | """Various losses used by the Dopamine JAX agents."""
16 | from flax import linen as nn
17 | import jax.numpy as jnp
18 |
19 |
20 | def huber_loss(targets: jnp.array,
21 | predictions: jnp.array,
22 | delta: float = 1.0) -> jnp.ndarray:
23 | """Implementation of the Huber loss with threshold delta.
24 |
25 | Let `x = |targets - predictions|`, the Huber loss is defined as:
26 | `0.5 * x^2` if `x <= delta`
27 | `0.5 * delta^2 + delta * (x - delta)` otherwise.
28 |
29 | Args:
30 | targets: Target values.
31 | predictions: Prediction values.
32 | delta: Threshold.
33 |
34 | Returns:
35 | Huber loss.
36 | """
37 | x = jnp.abs(targets - predictions)
38 | return jnp.where(x <= delta,
39 | 0.5 * x**2,
40 | 0.5 * delta**2 + delta * (x - delta))
41 |
42 |
43 | def mse_loss(targets: jnp.array, predictions: jnp.array) -> jnp.ndarray:
44 | """Implementation of the mean squared error loss."""
45 | return jnp.power((targets - predictions), 2)
46 |
47 |
48 | def softmax_cross_entropy_loss_with_logits(labels: jnp.array,
49 | logits: jnp.array) -> jnp.ndarray:
50 | """Implementation of the softmax cross entropy loss."""
51 | return -jnp.sum(labels * nn.log_softmax(logits))
52 |
--------------------------------------------------------------------------------
/dopamine/labs/__init__.py:
--------------------------------------------------------------------------------
1 | # coding=utf-8
2 | # Copyright 2021 The Dopamine Authors.
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 |
16 |
--------------------------------------------------------------------------------
/dopamine/labs/atari_100k/__init__.py:
--------------------------------------------------------------------------------
1 | # coding=utf-8
2 | # Copyright 2021 The Dopamine Authors.
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 |
16 |
--------------------------------------------------------------------------------
/dopamine/labs/atari_100k/configs/DER.gin:
--------------------------------------------------------------------------------
1 | # Data Efficient Rainbow (DER) params
2 | import dopamine.jax.agents.dqn.dqn_agent
3 | import dopamine.jax.agents.full_rainbow.full_rainbow_agent
4 | import dopamine.jax.networks
5 | import dopamine.discrete_domains.gym_lib
6 | import dopamine.discrete_domains.run_experiment
7 | import dopamine.replay_memory.prioritized_replay_buffer
8 | import dopamine.labs.atari_100k.atari_100k_rainbow_agent
9 |
10 | JaxDQNAgent.gamma = 0.99
11 | # Use 10 instead of 20 as done by SPR paper
12 | JaxDQNAgent.update_horizon = 10 # DER (instead of 3)
13 | JaxDQNAgent.min_replay_history = 1600 # DER (instead of 20000)
14 | JaxDQNAgent.update_period = 1 # DER: Update every 1 step (rather than 4)
15 | JaxDQNAgent.target_update_period = 2000 # DER: Target every 2000 updates
16 | JaxDQNAgent.epsilon_train = 0.01
17 | JaxDQNAgent.epsilon_eval = 0.001
18 | JaxDQNAgent.epsilon_decay_period = 2000 # agent steps
19 | JaxDQNAgent.optimizer = 'adam'
20 |
21 | JaxFullRainbowAgent.noisy = True
22 | JaxFullRainbowAgent.dueling = True
23 | JaxFullRainbowAgent.double_dqn = True
24 | JaxFullRainbowAgent.num_atoms = 51
25 | JaxFullRainbowAgent.vmax = 10.
26 | JaxFullRainbowAgent.replay_scheme = 'prioritized'
27 | JaxFullRainbowAgent.num_updates_per_train_step = 1
28 | JaxFullRainbowAgent.epsilon_fn = @jax.agents.dqn.dqn_agent.linearly_decaying_epsilon
29 | Atari100kRainbowAgent.data_augmentation = False
30 |
31 | # Note these parameters are from DER (van Hasselt et al, 2019)
32 | create_optimizer.learning_rate = 0.0001
33 | create_optimizer.eps = 0.00015
34 |
35 | atari_lib.create_atari_environment.game_name = 'Pong'
36 | # Atari 100K benchmark doesn't use sticky actions.
37 | atari_lib.create_atari_environment.sticky_actions = False
38 | AtariPreprocessing.terminal_on_life_loss = True
39 | Runner.num_iterations = 10
40 | Runner.training_steps = 10000 # agent steps
41 | MaxEpisodeEvalRunner.num_eval_episodes = 100 # agent episodes
42 | Runner.max_steps_per_episode = 27000 # agent steps
43 |
44 | OutOfGraphPrioritizedReplayBuffer.replay_capacity = 1000000 # 1M as a proxy for unbounded memory
45 | OutOfGraphPrioritizedReplayBuffer.batch_size = 32
46 |
--------------------------------------------------------------------------------
/dopamine/labs/atari_100k/configs/DrQ.gin:
--------------------------------------------------------------------------------
1 | # Data Regularlized-Q (DrQ) form Kostrikov et al. (2020)
2 | import dopamine.jax.agents.dqn.dqn_agent
3 | import dopamine.jax.agents.full_rainbow.full_rainbow_agent
4 | import dopamine.jax.networks
5 | import dopamine.discrete_domains.gym_lib
6 | import dopamine.discrete_domains.run_experiment
7 | import dopamine.replay_memory.prioritized_replay_buffer
8 | import dopamine.labs.atari_100k.atari_100k_rainbow_agent
9 |
10 | # Parameters specific to DrQ are higlighted by comments
11 | JaxDQNAgent.gamma = 0.99
12 | JaxDQNAgent.update_horizon = 10 # DrQ (instead of 3)
13 | JaxDQNAgent.min_replay_history = 1600 # DrQ (instead of 20000)
14 | JaxDQNAgent.update_period = 1 # DrQ (rather than 4)
15 | JaxDQNAgent.target_update_period = 1 # DrQ (rather than 8000)
16 | JaxDQNAgent.epsilon_train = 0.1 # DrQ (rather than 0.01)
17 | JaxDQNAgent.epsilon_eval = 0.05 # DrQ (rather than 0.001)
18 | JaxDQNAgent.epsilon_decay_period = 5000 # DrQ
19 | JaxDQNAgent.optimizer = 'adam'
20 |
21 | JaxFullRainbowAgent.noisy = False # DrQ (Efficient DQN)
22 | JaxFullRainbowAgent.dueling = True
23 | JaxFullRainbowAgent.double_dqn = True
24 | JaxFullRainbowAgent.distributional = False # DrQ (Efficient DQN)
25 | JaxFullRainbowAgent.num_atoms = 1 # Since DrQ uses DQN, rather than C51
26 | JaxFullRainbowAgent.num_updates_per_train_step = 1
27 | JaxFullRainbowAgent.replay_scheme = 'uniform'
28 | JaxFullRainbowAgent.epsilon_fn = @jax.agents.dqn.dqn_agent.linearly_decaying_epsilon
29 | Atari100kRainbowAgent.data_augmentation = True
30 |
31 | # Note these parameters are from DER (van Hasselt et al, 2019)
32 | create_optimizer.learning_rate = 0.0001
33 | create_optimizer.eps = 0.00015
34 |
35 | atari_lib.create_atari_environment.game_name = 'Pong'
36 | # Atari 100K benchmark doesn't use sticky actions.
37 | atari_lib.create_atari_environment.sticky_actions = False
38 | AtariPreprocessing.terminal_on_life_loss = True
39 | Runner.num_iterations = 1
40 | Runner.training_steps = 100000 # agent steps
41 | MaxEpisodeEvalRunner.num_eval_episodes = 100 # agent episodes
42 | Runner.max_steps_per_episode = 27000 # agent steps
43 |
44 | OutOfGraphPrioritizedReplayBuffer.replay_capacity = 1000000 # 1M as a proxy for unbounded memory
45 | OutOfGraphPrioritizedReplayBuffer.batch_size = 32
46 |
--------------------------------------------------------------------------------
/dopamine/labs/atari_100k/configs/OTRainbow.gin:
--------------------------------------------------------------------------------
1 | # Overtrained Rainbow (OTRainbow) from Kielak et al.(2019)
2 | import dopamine.jax.agents.dqn.dqn_agent
3 | import dopamine.jax.agents.full_rainbow.full_rainbow_agent
4 | import dopamine.jax.networks
5 | import dopamine.discrete_domains.gym_lib
6 | import dopamine.discrete_domains.run_experiment
7 | import dopamine.replay_memory.prioritized_replay_buffer
8 | import dopamine.labs.atari_100k.atari_100k_rainbow_agent
9 |
10 | # Parameters specific to OTRainbow are higlighted by comments
11 | JaxDQNAgent.gamma = 0.99
12 | JaxDQNAgent.update_horizon = 3
13 | JaxDQNAgent.min_replay_history = 20000
14 | JaxDQNAgent.update_period = 1 # OTRainbow: Update every 1 step (rather than 4)
15 | JaxDQNAgent.target_update_period = 500 # OTRainbow (instead of 8000)
16 | JaxDQNAgent.epsilon_train = 0.01
17 | JaxDQNAgent.epsilon_eval = 0.001
18 | JaxDQNAgent.epsilon_decay_period = 50000 # OTRainbow (instead of 250000)
19 | JaxDQNAgent.optimizer = 'adam'
20 |
21 | # Don't use noisy networks, dueling DQN, and double DQN.
22 | JaxFullRainbowAgent.noisy = False
23 | JaxFullRainbowAgent.dueling = False
24 | JaxFullRainbowAgent.double_dqn = False
25 | JaxFullRainbowAgent.num_atoms = 51
26 | JaxFullRainbowAgent.num_updates_per_train_step = 8 # OTRainbow (instead of 1)
27 | JaxFullRainbowAgent.vmax = 10.
28 | JaxFullRainbowAgent.replay_scheme = 'prioritized'
29 | JaxFullRainbowAgent.epsilon_fn = @jax.agents.dqn.dqn_agent.linearly_decaying_epsilon
30 | Atari100kRainbowAgent.data_augmentation = False
31 |
32 | # Note these parameters are original Rainbow.
33 | create_optimizer.learning_rate = 0.0000625
34 | create_optimizer.eps = 0.00015
35 |
36 | atari_lib.create_atari_environment.game_name = 'Pong'
37 | # Atari 100K benchmark doesn't use sticky actions.
38 | atari_lib.create_atari_environment.sticky_actions = False
39 | AtariPreprocessing.terminal_on_life_loss = True
40 | Runner.num_iterations = 1
41 | Runner.training_steps = 100000 # agent steps
42 | MaxEpisodeEvalRunner.num_eval_episodes = 100 # agent episodes
43 | Runner.max_steps_per_episode = 27000 # agent steps
44 |
45 | OutOfGraphPrioritizedReplayBuffer.replay_capacity = 1000000 # 1M as a proxy for unbounded memory
46 | OutOfGraphPrioritizedReplayBuffer.batch_size = 32
47 |
--------------------------------------------------------------------------------
/dopamine/labs/environments/__init__.py:
--------------------------------------------------------------------------------
1 | # coding=utf-8
2 | # Copyright 2021 The Dopamine Authors.
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 |
16 |
--------------------------------------------------------------------------------
/dopamine/labs/environments/minatar/__init__.py:
--------------------------------------------------------------------------------
1 | # coding=utf-8
2 | # Copyright 2021 The Dopamine Authors.
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 |
16 |
--------------------------------------------------------------------------------
/dopamine/labs/environments/minatar/dqn_asterix.gin:
--------------------------------------------------------------------------------
1 | # Hyperparameters for a simple DQN-style Minatar agent. The hyperparameters
2 | # chosen achieve reasonable performance.
3 | import dopamine.discrete_domains.gym_lib
4 | import dopamine.discrete_domains.run_experiment
5 | import dopamine.jax.agents.dqn.dqn_agent
6 | import dopamine.jax.networks
7 | import dopamine.labs.environments.minatar.minatar_env
8 | import dopamine.replay_memory.circular_replay_buffer
9 | import flax
10 |
11 | JaxDQNAgent.observation_shape = %minatar_env.ASTERIX_SHAPE
12 | JaxDQNAgent.observation_dtype = %minatar_env.DTYPE
13 | JaxDQNAgent.stack_size = 1
14 | JaxDQNAgent.network = @minatar_env.MinatarDQNNetwork
15 | JaxDQNAgent.gamma = 0.99
16 | JaxDQNAgent.update_horizon = 1
17 | JaxDQNAgent.min_replay_history = 1000
18 | JaxDQNAgent.update_period = 4
19 | JaxDQNAgent.target_update_period = 1000
20 | JaxDQNAgent.optimizer = 'adam'
21 | create_optimizer.learning_rate = 0.00025
22 | create_optimizer.eps = 3.125e-4
23 |
24 | create_minatar_env.game_name = 'asterix'
25 | TrainRunner.create_environment_fn = @minatar_env.create_minatar_env
26 | create_runner.schedule = 'continuous_train'
27 | create_agent.agent_name = 'jax_dqn'
28 | create_agent.debug_mode = True
29 | Runner.num_iterations = 10
30 | Runner.training_steps = 1000000
31 | Runner.max_steps_per_episode = 100000000
32 |
33 | OutOfGraphReplayBuffer.replay_capacity = 100000
34 | OutOfGraphReplayBuffer.batch_size = 32
35 |
--------------------------------------------------------------------------------
/dopamine/labs/environments/minatar/dqn_breakout.gin:
--------------------------------------------------------------------------------
1 | # Hyperparameters for a simple DQN-style Minatar agent. The hyperparameters
2 | # chosen achieve reasonable performance.
3 | import dopamine.discrete_domains.gym_lib
4 | import dopamine.discrete_domains.run_experiment
5 | import dopamine.jax.agents.dqn.dqn_agent
6 | import dopamine.jax.networks
7 | import dopamine.labs.environments.minatar.minatar_env
8 | import dopamine.replay_memory.circular_replay_buffer
9 | import flax
10 |
11 | JaxDQNAgent.observation_shape = %minatar_env.BREAKOUT_SHAPE
12 | JaxDQNAgent.observation_dtype = %minatar_env.DTYPE
13 | JaxDQNAgent.stack_size = 1
14 | JaxDQNAgent.network = @minatar_env.MinatarDQNNetwork
15 | JaxDQNAgent.gamma = 0.99
16 | JaxDQNAgent.update_horizon = 1
17 | JaxDQNAgent.min_replay_history = 1000
18 | JaxDQNAgent.update_period = 4
19 | JaxDQNAgent.target_update_period = 1000
20 | JaxDQNAgent.optimizer = 'adam'
21 | create_optimizer.learning_rate = 0.00025
22 | create_optimizer.eps = 3.125e-4
23 |
24 | create_minatar_env.game_name = 'breakout'
25 | TrainRunner.create_environment_fn = @minatar_env.create_minatar_env
26 | create_runner.schedule = 'continuous_train'
27 | create_agent.agent_name = 'jax_dqn'
28 | create_agent.debug_mode = True
29 | Runner.num_iterations = 10
30 | Runner.training_steps = 1000000
31 | Runner.max_steps_per_episode = 100000000
32 |
33 | OutOfGraphReplayBuffer.replay_capacity = 100000
34 | OutOfGraphReplayBuffer.batch_size = 32
35 |
--------------------------------------------------------------------------------
/dopamine/labs/environments/minatar/dqn_freeway.gin:
--------------------------------------------------------------------------------
1 | # Hyperparameters for a simple DQN-style Minatar agent. The hyperparameters
2 | # chosen achieve reasonable performance.
3 | import dopamine.discrete_domains.gym_lib
4 | import dopamine.discrete_domains.run_experiment
5 | import dopamine.jax.agents.dqn.dqn_agent
6 | import dopamine.jax.networks
7 | import dopamine.labs.environments.minatar.minatar_env
8 | import dopamine.replay_memory.circular_replay_buffer
9 | import flax
10 |
11 | JaxDQNAgent.observation_shape = %minatar_env.FREEWAY_SHAPE
12 | JaxDQNAgent.observation_dtype = %minatar_env.DTYPE
13 | JaxDQNAgent.stack_size = 1
14 | JaxDQNAgent.network = @minatar_env.MinatarDQNNetwork
15 | JaxDQNAgent.gamma = 0.99
16 | JaxDQNAgent.update_horizon = 1
17 | JaxDQNAgent.min_replay_history = 1000
18 | JaxDQNAgent.update_period = 4
19 | JaxDQNAgent.target_update_period = 1000
20 | JaxDQNAgent.optimizer = 'adam'
21 | create_optimizer.learning_rate = 0.00025
22 | create_optimizer.eps = 3.125e-4
23 |
24 | create_minatar_env.game_name = 'freeway'
25 | TrainRunner.create_environment_fn = @minatar_env.create_minatar_env
26 | create_runner.schedule = 'continuous_train'
27 | create_agent.agent_name = 'jax_dqn'
28 | create_agent.debug_mode = True
29 | Runner.num_iterations = 10
30 | Runner.training_steps = 1000000
31 | Runner.max_steps_per_episode = 100000000
32 |
33 | OutOfGraphReplayBuffer.replay_capacity = 100000
34 | OutOfGraphReplayBuffer.batch_size = 32
35 |
--------------------------------------------------------------------------------
/dopamine/labs/environments/minatar/dqn_seaquest.gin:
--------------------------------------------------------------------------------
1 | # Hyperparameters for a simple DQN-style Minatar agent. The hyperparameters
2 | # chosen achieve reasonable performance.
3 | import dopamine.discrete_domains.gym_lib
4 | import dopamine.discrete_domains.run_experiment
5 | import dopamine.jax.agents.dqn.dqn_agent
6 | import dopamine.jax.networks
7 | import dopamine.labs.environments.minatar.minatar_env
8 | import dopamine.replay_memory.circular_replay_buffer
9 | import flax
10 |
11 | JaxDQNAgent.observation_shape = %minatar_env.SEAQUEST_SHAPE
12 | JaxDQNAgent.observation_dtype = %minatar_env.DTYPE
13 | JaxDQNAgent.stack_size = 1
14 | JaxDQNAgent.network = @minatar_env.MinatarDQNNetwork
15 | JaxDQNAgent.gamma = 0.99
16 | JaxDQNAgent.update_horizon = 1
17 | JaxDQNAgent.min_replay_history = 1000
18 | JaxDQNAgent.update_period = 4
19 | JaxDQNAgent.target_update_period = 1000
20 | JaxDQNAgent.optimizer = 'adam'
21 | create_optimizer.learning_rate = 0.00025
22 | create_optimizer.eps = 3.125e-4
23 |
24 | create_minatar_env.game_name = 'seaquest'
25 | TrainRunner.create_environment_fn = @minatar_env.create_minatar_env
26 | create_runner.schedule = 'continuous_train'
27 | create_agent.agent_name = 'jax_dqn'
28 | create_agent.debug_mode = True
29 | Runner.num_iterations = 10
30 | Runner.training_steps = 1000000
31 | Runner.max_steps_per_episode = 100000000
32 |
33 | OutOfGraphReplayBuffer.replay_capacity = 100000
34 | OutOfGraphReplayBuffer.batch_size = 32
35 |
--------------------------------------------------------------------------------
/dopamine/labs/environments/minatar/dqn_space_invaders.gin:
--------------------------------------------------------------------------------
1 | # Hyperparameters for a simple DQN-style Minatar agent. The hyperparameters
2 | # chosen achieve reasonable performance.
3 | import dopamine.discrete_domains.gym_lib
4 | import dopamine.discrete_domains.run_experiment
5 | import dopamine.jax.agents.dqn.dqn_agent
6 | import dopamine.jax.networks
7 | import dopamine.labs.environments.minatar.minatar_env
8 | import dopamine.replay_memory.circular_replay_buffer
9 | import flax
10 |
11 | JaxDQNAgent.observation_shape = %minatar_env.SPACE_INVADERS_SHAPE
12 | JaxDQNAgent.observation_dtype = %minatar_env.DTYPE
13 | JaxDQNAgent.stack_size = 1
14 | JaxDQNAgent.network = @minatar_env.MinatarDQNNetwork
15 | JaxDQNAgent.gamma = 0.99
16 | JaxDQNAgent.update_horizon = 1
17 | JaxDQNAgent.min_replay_history = 1000
18 | JaxDQNAgent.update_period = 4
19 | JaxDQNAgent.target_update_period = 1000
20 | JaxDQNAgent.optimizer = 'adam'
21 | create_optimizer.learning_rate = 0.00025
22 | create_optimizer.eps = 3.125e-4
23 |
24 | create_minatar_env.game_name = 'space_invaders'
25 | TrainRunner.create_environment_fn = @minatar_env.create_minatar_env
26 | create_runner.schedule = 'continuous_train'
27 | create_agent.agent_name = 'jax_dqn'
28 | create_agent.debug_mode = True
29 | Runner.num_iterations = 10
30 | Runner.training_steps = 1000000
31 | Runner.max_steps_per_episode = 100000000
32 |
33 | OutOfGraphReplayBuffer.replay_capacity = 100000
34 | OutOfGraphReplayBuffer.batch_size = 32
35 |
--------------------------------------------------------------------------------
/dopamine/labs/environments/minatar/quantile_asterix.gin:
--------------------------------------------------------------------------------
1 | # Hyperparameters follow Dabney et al. (2017) but we modify as necessary to
2 | # match those used in Rainbow (Hessel et al., 2018), to ensure apples-to-apples
3 | # comparison.
4 | import dopamine.discrete_domains.atari_lib
5 | import dopamine.discrete_domains.run_experiment
6 | import dopamine.jax.agents.quantile.quantile_agent
7 | import dopamine.labs.environments.minatar.minatar_env
8 | import dopamine.replay_memory.prioritized_replay_buffer
9 |
10 | JaxQuantileAgent.observation_shape = %minatar_env.ASTERIX_SHAPE
11 | JaxQuantileAgent.observation_dtype = %minatar_env.DTYPE
12 | JaxQuantileAgent.stack_size = 1
13 | JaxQuantileAgent.network = @minatar_env.MinatarQuantileNetwork
14 | JaxQuantileAgent.kappa = 1.0
15 | JaxQuantileAgent.num_atoms = 200
16 | JaxQuantileAgent.gamma = 0.99
17 | JaxQuantileAgent.update_horizon = 3
18 | JaxQuantileAgent.min_replay_history = 1000 # agent steps
19 | JaxQuantileAgent.update_period = 4
20 | JaxQuantileAgent.target_update_period = 1000 # agent steps
21 | JaxQuantileAgent.epsilon_train = 0.01
22 | JaxQuantileAgent.epsilon_eval = 0.001
23 | JaxQuantileAgent.replay_scheme = 'prioritized'
24 | JaxQuantileAgent.optimizer = 'adam'
25 | create_optimizer.learning_rate = 0.00025
26 | create_optimizer.eps = 3.125e-4
27 |
28 | create_minatar_env.game_name = 'asterix'
29 | TrainRunner.create_environment_fn = @minatar_env.create_minatar_env
30 | create_runner.schedule = 'continuous_train'
31 | create_agent.agent_name = 'jax_quantile'
32 | create_agent.debug_mode = True
33 | Runner.num_iterations = 10
34 | Runner.training_steps = 1000000
35 | Runner.max_steps_per_episode = 100000000
36 |
37 | OutOfGraphPrioritizedReplayBuffer.replay_capacity = 100000
38 | OutOfGraphPrioritizedReplayBuffer.batch_size = 32
39 |
--------------------------------------------------------------------------------
/dopamine/labs/environments/minatar/quantile_breakout.gin:
--------------------------------------------------------------------------------
1 | # Hyperparameters follow Dabney et al. (2017) but we modify as necessary to
2 | # match those used in Rainbow (Hessel et al., 2018), to ensure apples-to-apples
3 | # comparison.
4 | import dopamine.discrete_domains.atari_lib
5 | import dopamine.discrete_domains.run_experiment
6 | import dopamine.jax.agents.quantile.quantile_agent
7 | import dopamine.labs.environments.minatar.minatar_env
8 | import dopamine.replay_memory.prioritized_replay_buffer
9 |
10 | JaxQuantileAgent.observation_shape = %minatar_env.BREAKOUT_SHAPE
11 | JaxQuantileAgent.observation_dtype = %minatar_env.DTYPE
12 | JaxQuantileAgent.stack_size = 1
13 | JaxQuantileAgent.network = @minatar_env.MinatarQuantileNetwork
14 | JaxQuantileAgent.kappa = 1.0
15 | JaxQuantileAgent.num_atoms = 200
16 | JaxQuantileAgent.gamma = 0.99
17 | JaxQuantileAgent.update_horizon = 3
18 | JaxQuantileAgent.min_replay_history = 1000 # agent steps
19 | JaxQuantileAgent.update_period = 4
20 | JaxQuantileAgent.target_update_period = 1000 # agent steps
21 | JaxQuantileAgent.epsilon_train = 0.01
22 | JaxQuantileAgent.epsilon_eval = 0.001
23 | JaxQuantileAgent.replay_scheme = 'prioritized'
24 | JaxQuantileAgent.optimizer = 'adam'
25 | create_optimizer.learning_rate = 0.00025
26 | create_optimizer.eps = 3.125e-4
27 |
28 | create_minatar_env.game_name = 'breakout'
29 | TrainRunner.create_environment_fn = @minatar_env.create_minatar_env
30 | create_runner.schedule = 'continuous_train'
31 | create_agent.agent_name = 'jax_quantile'
32 | create_agent.debug_mode = True
33 | Runner.num_iterations = 10
34 | Runner.training_steps = 1000000
35 | Runner.max_steps_per_episode = 100000000
36 |
37 | OutOfGraphPrioritizedReplayBuffer.replay_capacity = 100000
38 | OutOfGraphPrioritizedReplayBuffer.batch_size = 32
39 |
--------------------------------------------------------------------------------
/dopamine/labs/environments/minatar/quantile_freeway.gin:
--------------------------------------------------------------------------------
1 | # Hyperparameters follow Dabney et al. (2017) but we modify as necessary to
2 | # match those used in Rainbow (Hessel et al., 2018), to ensure apples-to-apples
3 | # comparison.
4 | import dopamine.discrete_domains.atari_lib
5 | import dopamine.discrete_domains.run_experiment
6 | import dopamine.jax.agents.quantile.quantile_agent
7 | import dopamine.labs.environments.minatar.minatar_env
8 | import dopamine.replay_memory.prioritized_replay_buffer
9 |
10 | JaxQuantileAgent.observation_shape = %minatar_env.FREEWAY_SHAPE
11 | JaxQuantileAgent.observation_dtype = %minatar_env.DTYPE
12 | JaxQuantileAgent.stack_size = 1
13 | JaxQuantileAgent.network = @minatar_env.MinatarQuantileNetwork
14 | JaxQuantileAgent.kappa = 1.0
15 | JaxQuantileAgent.num_atoms = 200
16 | JaxQuantileAgent.gamma = 0.99
17 | JaxQuantileAgent.update_horizon = 3
18 | JaxQuantileAgent.min_replay_history = 1000 # agent steps
19 | JaxQuantileAgent.update_period = 4
20 | JaxQuantileAgent.target_update_period = 1000 # agent steps
21 | JaxQuantileAgent.epsilon_train = 0.01
22 | JaxQuantileAgent.epsilon_eval = 0.001
23 | JaxQuantileAgent.replay_scheme = 'prioritized'
24 | JaxQuantileAgent.optimizer = 'adam'
25 | create_optimizer.learning_rate = 0.00025
26 | create_optimizer.eps = 3.125e-4
27 |
28 | create_minatar_env.game_name = 'freeway'
29 | TrainRunner.create_environment_fn = @minatar_env.create_minatar_env
30 | create_runner.schedule = 'continuous_train'
31 | create_agent.agent_name = 'jax_quantile'
32 | create_agent.debug_mode = True
33 | Runner.num_iterations = 10
34 | Runner.training_steps = 1000000
35 | Runner.max_steps_per_episode = 100000000
36 |
37 | OutOfGraphPrioritizedReplayBuffer.replay_capacity = 100000
38 | OutOfGraphPrioritizedReplayBuffer.batch_size = 32
39 |
--------------------------------------------------------------------------------
/dopamine/labs/environments/minatar/quantile_seaquest.gin:
--------------------------------------------------------------------------------
1 | # Hyperparameters follow Dabney et al. (2017) but we modify as necessary to
2 | # match those used in Rainbow (Hessel et al., 2018), to ensure apples-to-apples
3 | # comparison.
4 | import dopamine.discrete_domains.atari_lib
5 | import dopamine.discrete_domains.run_experiment
6 | import dopamine.labs.environments.minatar.minatar_env
7 | import dopamine.jax.agents.quantile.quantile_agent
8 | import dopamine.replay_memory.prioritized_replay_buffer
9 |
10 | JaxQuantileAgent.observation_shape = %minatar_env.SEAQUEST_SHAPE
11 | JaxQuantileAgent.observation_dtype = %minatar_env.DTYPE
12 | JaxQuantileAgent.stack_size = 1
13 | JaxQuantileAgent.network = @minatar_env.MinatarQuantileNetwork
14 | JaxQuantileAgent.kappa = 1.0
15 | JaxQuantileAgent.num_atoms = 200
16 | JaxQuantileAgent.gamma = 0.99
17 | JaxQuantileAgent.update_horizon = 3
18 | JaxQuantileAgent.min_replay_history = 1000 # agent steps
19 | JaxQuantileAgent.update_period = 4
20 | JaxQuantileAgent.target_update_period = 1000 # agent steps
21 | JaxQuantileAgent.epsilon_train = 0.01
22 | JaxQuantileAgent.epsilon_eval = 0.001
23 | JaxQuantileAgent.replay_scheme = 'prioritized'
24 | JaxQuantileAgent.optimizer = 'adam'
25 | create_optimizer.learning_rate = 0.00025
26 | create_optimizer.eps = 3.125e-4
27 |
28 | create_minatar_env.game_name = 'seaquest'
29 | TrainRunner.create_environment_fn = @minatar_env.create_minatar_env
30 | create_runner.schedule = 'continuous_train'
31 | create_agent.agent_name = 'jax_quantile'
32 | create_agent.debug_mode = True
33 | Runner.num_iterations = 10
34 | Runner.training_steps = 1000000
35 | Runner.max_steps_per_episode = 100000000
36 |
37 | OutOfGraphPrioritizedReplayBuffer.replay_capacity = 100000
38 | OutOfGraphPrioritizedReplayBuffer.batch_size = 32
39 |
--------------------------------------------------------------------------------
/dopamine/labs/environments/minatar/quantile_space_invaders.gin:
--------------------------------------------------------------------------------
1 | # Hyperparameters follow Dabney et al. (2017) but we modify as necessary to
2 | # match those used in Rainbow (Hessel et al., 2018), to ensure apples-to-apples
3 | # comparison.
4 | import dopamine.discrete_domains.atari_lib
5 | import dopamine.discrete_domains.run_experiment
6 | import dopamine.jax.agents.quantile.quantile_agent
7 | import dopamine.labs.environments.minatar.minatar_env
8 | import dopamine.replay_memory.prioritized_replay_buffer
9 |
10 | JaxQuantileAgent.observation_shape = %minatar_env.SPACE_INVADERS_SHAPE
11 | JaxQuantileAgent.observation_dtype = %minatar_env.DTYPE
12 | JaxQuantileAgent.stack_size = 1
13 | JaxQuantileAgent.network = @minatar_env.MinatarQuantileNetwork
14 | JaxQuantileAgent.kappa = 1.0
15 | JaxQuantileAgent.num_atoms = 200
16 | JaxQuantileAgent.gamma = 0.99
17 | JaxQuantileAgent.update_horizon = 3
18 | JaxQuantileAgent.min_replay_history = 1000 # agent steps
19 | JaxQuantileAgent.update_period = 4
20 | JaxQuantileAgent.target_update_period = 1000 # agent steps
21 | JaxQuantileAgent.epsilon_train = 0.01
22 | JaxQuantileAgent.epsilon_eval = 0.001
23 | JaxQuantileAgent.replay_scheme = 'prioritized'
24 | JaxQuantileAgent.optimizer = 'adam'
25 | create_optimizer.learning_rate = 0.00025
26 | create_optimizer.eps = 3.125e-4
27 |
28 | create_minatar_env.game_name = 'space_invaders'
29 | TrainRunner.create_environment_fn = @minatar_env.create_minatar_env
30 | create_runner.schedule = 'continuous_train'
31 | create_agent.agent_name = 'jax_quantile'
32 | create_agent.debug_mode = True
33 | Runner.num_iterations = 10
34 | Runner.training_steps = 1000000
35 | Runner.max_steps_per_episode = 100000000
36 |
37 | OutOfGraphPrioritizedReplayBuffer.replay_capacity = 100000
38 | OutOfGraphPrioritizedReplayBuffer.batch_size = 32
39 |
--------------------------------------------------------------------------------
/dopamine/labs/environments/minatar/rainbow_asterix.gin:
--------------------------------------------------------------------------------
1 | # Hyperparameters for a simple Rainbow-style Cartpole agent. The
2 | # hyperparameters chosen achieve reasonable performance.
3 | import dopamine.discrete_domains.gym_lib
4 | import dopamine.discrete_domains.run_experiment
5 | import dopamine.jax.agents.rainbow.rainbow_agent
6 | import dopamine.jax.networks
7 | import dopamine.labs.environments.minatar.minatar_env
8 | import dopamine.replay_memory.prioritized_replay_buffer
9 |
10 | JaxRainbowAgent.observation_shape = %minatar_env.ASTERIX_SHAPE
11 | JaxRainbowAgent.observation_dtype = %minatar_env.DTYPE
12 | JaxRainbowAgent.stack_size = 1
13 | JaxRainbowAgent.network = @minatar_env.MinatarRainbowNetwork
14 | JaxRainbowAgent.num_atoms = 51
15 | JaxRainbowAgent.vmax = 100.
16 | JaxRainbowAgent.gamma = 0.99
17 | JaxRainbowAgent.update_horizon = 3
18 | JaxRainbowAgent.min_replay_history = 1000
19 | JaxRainbowAgent.update_period = 4
20 | JaxRainbowAgent.target_update_period = 1000
21 | JaxRainbowAgent.replay_scheme = 'prioritized'
22 | create_optimizer.learning_rate = 0.00025
23 | create_optimizer.eps = 3.125e-4
24 |
25 | create_minatar_env.game_name = 'asterix'
26 | TrainRunner.create_environment_fn = @minatar_env.create_minatar_env
27 | create_runner.schedule = 'continuous_train'
28 | create_agent.agent_name = 'jax_rainbow'
29 | create_agent.debug_mode = True
30 | Runner.num_iterations = 10
31 | Runner.training_steps = 1000000
32 | Runner.max_steps_per_episode = 100000000
33 |
34 | OutOfGraphPrioritizedReplayBuffer.replay_capacity = 100000
35 | OutOfGraphPrioritizedReplayBuffer.batch_size = 32
36 |
--------------------------------------------------------------------------------
/dopamine/labs/environments/minatar/rainbow_breakout.gin:
--------------------------------------------------------------------------------
1 | # Hyperparameters for a simple Rainbow-style Cartpole agent. The
2 | # hyperparameters chosen achieve reasonable performance.
3 | import dopamine.discrete_domains.gym_lib
4 | import dopamine.discrete_domains.run_experiment
5 | import dopamine.jax.agents.rainbow.rainbow_agent
6 | import dopamine.jax.networks
7 | import dopamine.labs.environments.minatar.minatar_env
8 | import dopamine.replay_memory.prioritized_replay_buffer
9 |
10 | JaxRainbowAgent.observation_shape = %minatar_env.BREAKOUT_SHAPE
11 | JaxRainbowAgent.observation_dtype = %minatar_env.DTYPE
12 | JaxRainbowAgent.stack_size = 1
13 | JaxRainbowAgent.network = @minatar_env.MinatarRainbowNetwork
14 | JaxRainbowAgent.num_atoms = 51
15 | JaxRainbowAgent.vmax = 100.
16 | JaxRainbowAgent.gamma = 0.99
17 | JaxRainbowAgent.update_horizon = 3
18 | JaxRainbowAgent.min_replay_history = 1000
19 | JaxRainbowAgent.update_period = 4
20 | JaxRainbowAgent.target_update_period = 1000
21 | JaxRainbowAgent.replay_scheme = 'prioritized'
22 | create_optimizer.learning_rate = 0.00025
23 | create_optimizer.eps = 3.125e-4
24 |
25 | create_minatar_env.game_name = 'breakout'
26 | TrainRunner.create_environment_fn = @minatar_env.create_minatar_env
27 | create_runner.schedule = 'continuous_train'
28 | create_agent.agent_name = 'jax_rainbow'
29 | create_agent.debug_mode = True
30 | Runner.num_iterations = 10
31 | Runner.training_steps = 1000000
32 | Runner.max_steps_per_episode = 100000000
33 |
34 | OutOfGraphPrioritizedReplayBuffer.replay_capacity = 100000
35 | OutOfGraphPrioritizedReplayBuffer.batch_size = 32
36 |
--------------------------------------------------------------------------------
/dopamine/labs/environments/minatar/rainbow_freeway.gin:
--------------------------------------------------------------------------------
1 | # Hyperparameters for a simple Rainbow-style Cartpole agent. The
2 | # hyperparameters chosen achieve reasonable performance.
3 | import dopamine.discrete_domains.gym_lib
4 | import dopamine.discrete_domains.run_experiment
5 | import dopamine.jax.agents.rainbow.rainbow_agent
6 | import dopamine.jax.networks
7 | import dopamine.labs.environments.minatar.minatar_env
8 | import dopamine.replay_memory.prioritized_replay_buffer
9 |
10 | JaxRainbowAgent.observation_shape = %minatar_env.FREEWAY_SHAPE
11 | JaxRainbowAgent.observation_dtype = %minatar_env.DTYPE
12 | JaxRainbowAgent.stack_size = 1
13 | JaxRainbowAgent.network = @minatar_env.MinatarRainbowNetwork
14 | JaxRainbowAgent.num_atoms = 51
15 | JaxRainbowAgent.vmax = 100.
16 | JaxRainbowAgent.gamma = 0.99
17 | JaxRainbowAgent.update_horizon = 3
18 | JaxRainbowAgent.min_replay_history = 1000
19 | JaxRainbowAgent.update_period = 4
20 | JaxRainbowAgent.target_update_period = 1000
21 | JaxRainbowAgent.replay_scheme = 'prioritized'
22 | create_optimizer.learning_rate = 0.00025
23 | create_optimizer.eps = 3.125e-4
24 |
25 | create_minatar_env.game_name = 'freeway'
26 | TrainRunner.create_environment_fn = @minatar_env.create_minatar_env
27 | create_runner.schedule = 'continuous_train'
28 | create_agent.agent_name = 'jax_rainbow'
29 | create_agent.debug_mode = True
30 | Runner.num_iterations = 10
31 | Runner.training_steps = 1000000
32 | Runner.max_steps_per_episode = 100000000
33 |
34 | OutOfGraphPrioritizedReplayBuffer.replay_capacity = 100000
35 | OutOfGraphPrioritizedReplayBuffer.batch_size = 32
36 |
--------------------------------------------------------------------------------
/dopamine/labs/environments/minatar/rainbow_seaquest.gin:
--------------------------------------------------------------------------------
1 | # Hyperparameters for a simple Rainbow-style Cartpole agent. The
2 | # hyperparameters chosen achieve reasonable performance.
3 | import dopamine.discrete_domains.gym_lib
4 | import dopamine.discrete_domains.run_experiment
5 | import dopamine.jax.agents.rainbow.rainbow_agent
6 | import dopamine.jax.networks
7 | import dopamine.labs.environments.minatar.minatar_env
8 | import dopamine.replay_memory.prioritized_replay_buffer
9 |
10 | JaxRainbowAgent.observation_shape = %minatar_env.SEAQUEST_SHAPE
11 | JaxRainbowAgent.observation_dtype = %minatar_env.DTYPE
12 | JaxRainbowAgent.stack_size = 1
13 | JaxRainbowAgent.network = @minatar_env.MinatarRainbowNetwork
14 | JaxRainbowAgent.num_atoms = 51
15 | JaxRainbowAgent.vmax = 100.
16 | JaxRainbowAgent.gamma = 0.99
17 | JaxRainbowAgent.update_horizon = 3
18 | JaxRainbowAgent.min_replay_history = 1000
19 | JaxRainbowAgent.update_period = 4
20 | JaxRainbowAgent.target_update_period = 1000
21 | JaxRainbowAgent.replay_scheme = 'prioritized'
22 | create_optimizer.learning_rate = 0.00025
23 | create_optimizer.eps = 3.125e-4
24 |
25 | create_minatar_env.game_name = 'seaquest'
26 | TrainRunner.create_environment_fn = @minatar_env.create_minatar_env
27 | create_runner.schedule = 'continuous_train'
28 | create_agent.agent_name = 'jax_rainbow'
29 | create_agent.debug_mode = True
30 | Runner.num_iterations = 10
31 | Runner.training_steps = 1000000
32 | Runner.max_steps_per_episode = 100000000
33 |
34 | OutOfGraphPrioritizedReplayBuffer.replay_capacity = 100000
35 | OutOfGraphPrioritizedReplayBuffer.batch_size = 32
36 |
--------------------------------------------------------------------------------
/dopamine/labs/environments/minatar/rainbow_space_invaders.gin:
--------------------------------------------------------------------------------
1 | # Hyperparameters for a simple Rainbow-style Cartpole agent. The
2 | # hyperparameters chosen achieve reasonable performance.
3 | import dopamine.discrete_domains.gym_lib
4 | import dopamine.discrete_domains.run_experiment
5 | import dopamine.jax.agents.rainbow.rainbow_agent
6 | import dopamine.jax.networks
7 | import dopamine.labs.environments.minatar.minatar_env
8 | import dopamine.replay_memory.prioritized_replay_buffer
9 |
10 | JaxRainbowAgent.observation_shape = %minatar_env.SPACE_INVADERS_SHAPE
11 | JaxRainbowAgent.observation_dtype = %minatar_env.DTYPE
12 | JaxRainbowAgent.stack_size = 1
13 | JaxRainbowAgent.network = @minatar_env.MinatarRainbowNetwork
14 | JaxRainbowAgent.num_atoms = 51
15 | JaxRainbowAgent.vmax = 100.
16 | JaxRainbowAgent.gamma = 0.99
17 | JaxRainbowAgent.update_horizon = 3
18 | JaxRainbowAgent.min_replay_history = 1000
19 | JaxRainbowAgent.update_period = 4
20 | JaxRainbowAgent.target_update_period = 1000
21 | JaxRainbowAgent.replay_scheme = 'prioritized'
22 | create_optimizer.learning_rate = 0.00025
23 | create_optimizer.eps = 3.125e-4
24 |
25 | create_minatar_env.game_name = 'space_invaders'
26 | TrainRunner.create_environment_fn = @minatar_env.create_minatar_env
27 | create_runner.schedule = 'continuous_train'
28 | create_agent.agent_name = 'jax_rainbow'
29 | create_agent.debug_mode = True
30 | Runner.num_iterations = 10
31 | Runner.training_steps = 1000000
32 | Runner.max_steps_per_episode = 100000000
33 |
34 | OutOfGraphPrioritizedReplayBuffer.replay_capacity = 100000
35 | OutOfGraphPrioritizedReplayBuffer.batch_size = 32
36 |
--------------------------------------------------------------------------------
/dopamine/labs/tandem_dqn/README.md:
--------------------------------------------------------------------------------
1 | # The Difficulty of Passive Learning in Deep Reinforcement Learning
2 |
3 | This is the Dopamine-based code accompanying the paper listed above.
4 | Although this code supports running classic control, MinAtar, and ALE
5 | environments, it was only used to run the classic control and MinAtar
6 | environments in the paper.
7 |
8 | See `run.sh` for an example of how to run it.
9 |
--------------------------------------------------------------------------------
/dopamine/labs/tandem_dqn/requirements.txt:
--------------------------------------------------------------------------------
1 | absl-py
2 | atari-py
3 | dopamine-rl
4 | gin-config
5 | gym
6 | numpy
7 | tensorflow
8 |
--------------------------------------------------------------------------------
/dopamine/labs/tandem_dqn/run.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | # Note that to run this on the classic control and ALE environments you need to
3 | # obtain the gin files for Dopamine JAX agents:
4 | # github.com/google/dopamine/tree/master/dopamine/jax/agents/dqn/configs
5 | set -e
6 | set -x
7 |
8 | virtualenv -p python3 .
9 | source ./bin/activate
10 |
11 | cd ..
12 | pip install -r tandem_dqn/requirements.txt
13 | python3 -m tandem_dqn.train \
14 | --base_dir=/tmp/tandem_dqn
15 |
--------------------------------------------------------------------------------
/dopamine/labs/tandem_dqn/train.py:
--------------------------------------------------------------------------------
1 | # coding=utf-8
2 | # Copyright 2021 The Tandem DQN authors.
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | """Binary entry-point for Tandem RL experiments."""
16 |
17 | from absl import app
18 | from absl import flags
19 | from absl import logging
20 |
21 | from dopamine.discrete_domains import run_experiment as base_run_experiment
22 | from dopamine.labs.tandem_dqn import run_experiment
23 | import tensorflow as tf
24 |
25 |
26 |
27 | flags.DEFINE_string('base_dir', None,
28 | 'Base directory to host all required sub-directories.')
29 | flags.DEFINE_multi_string(
30 | 'gin_files', [], 'List of paths to gin configuration files (e.g.'
31 | '"dopamine/agents/dqn/dqn.gin").')
32 | flags.DEFINE_multi_string(
33 | 'gin_bindings', [],
34 | 'Gin bindings to override the values set in the config files '
35 | '(e.g. "DQNAgent.epsilon_train=0.1",'
36 | ' "create_environment.game_name="Pong"").')
37 |
38 | FLAGS = flags.FLAGS
39 |
40 |
41 | def main(unused_argv):
42 | """Main method.
43 |
44 | Args:
45 | unused_argv: Arguments (unused).
46 | """
47 | logging.set_verbosity(logging.INFO)
48 | tf.compat.v1.disable_v2_behavior()
49 |
50 | base_dir = FLAGS.base_dir
51 | gin_files = FLAGS.gin_files
52 | gin_bindings = FLAGS.gin_bindings
53 | base_run_experiment.load_gin_configs(gin_files, gin_bindings)
54 | runner = run_experiment.TandemRunner(
55 | base_dir, run_experiment.create_tandem_agents_and_checkpoints)
56 | runner.run_experiment()
57 |
58 |
59 | if __name__ == '__main__':
60 | flags.mark_flag_as_required('base_dir')
61 | app.run(main)
62 |
--------------------------------------------------------------------------------
/dopamine/replay_memory/__init__.py:
--------------------------------------------------------------------------------
1 | # coding=utf-8
2 | # Copyright 2018 The Dopamine Authors.
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 |
16 |
--------------------------------------------------------------------------------
/dopamine/utils/__init__.py:
--------------------------------------------------------------------------------
1 | # coding=utf-8
2 | # Copyright 2018 The Dopamine Authors.
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 |
16 |
--------------------------------------------------------------------------------
/dopamine/utils/test_utils.py:
--------------------------------------------------------------------------------
1 | # coding=utf-8
2 | # Copyright 2018 The Dopamine Authors.
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | """Common testing utilities shared across agents."""
16 |
17 | from __future__ import absolute_import
18 | from __future__ import division
19 | from __future__ import print_function
20 |
21 |
22 |
23 | import mock
24 | import tensorflow as tf
25 |
26 |
27 | class MockReplayBuffer(object):
28 | """Mock ReplayBuffer to verify the way the agent interacts with it."""
29 |
30 | def __init__(self, is_jax=False):
31 | if is_jax:
32 | self.add = mock.Mock()
33 | self.add_count = 0
34 | self.sum_tree = mock.Mock()
35 | else:
36 | with tf.compat.v1.variable_scope(
37 | 'MockReplayBuffer', reuse=tf.compat.v1.AUTO_REUSE):
38 | self.add = mock.Mock()
39 | self.memory = mock.Mock()
40 | self.memory.add_count = 0
41 |
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | absl-py>=0.9.0
2 | astunparse>=1.6.3
3 | atari-py>=0.2.6
4 | cachetools>=4.1.1
5 | certifi>=2020.6.20
6 | chardet>=3.0.4
7 | cloudpickle>=1.3.0
8 | cycler>=0.10.0
9 | flax>=0.3.3
10 | future>=0.18.2
11 | gast>=0.3.3
12 | gin-config>=0.3.0
13 | google-auth>=1.19.2
14 | google-auth-oauthlib>=0.4.1
15 | google-pasta>=0.2.0
16 | grpcio>=1.30.0
17 | gym>=0.17.2
18 | h5py>=2.10.0
19 | idna>=2.10
20 | jax>=0.2.12
21 | jaxlib>=0.1.65
22 | Keras-Preprocessing>=1.1.2
23 | kiwisolver>=1.2.0
24 | Markdown>=3.2.2
25 | matplotlib>=3.3.0
26 | msgpack>=1.0.0
27 | numpy>=1.18.5
28 | oauthlib>=3.1.0
29 | opencv-python>=4.3.0.36
30 | opt-einsum>=3.3.0
31 | pandas>=1.0.5
32 | Pillow>=7.2.0
33 | protobuf>=3.12.2
34 | pyasn1>=0.4.8
35 | pyasn1-modules>=0.2.8
36 | pygame>=1.9.6
37 | pyglet>=1.5.0
38 | pyparsing>=2.4.7
39 | python-dateutil>=2.8.1
40 | pytz>=2020.1
41 | requests>=2.24.0
42 | requests-oauthlib>=1.3.0
43 | rsa>=4.6
44 | scipy>=1.4.1
45 | six>=1.15.0
46 | setuptools>=49.2.01
47 | tensorboard
48 | tensorboard-plugin-wit
49 | tensorflow
50 | tensorflow-estimator
51 | tensorflow-probability>=0.13.0
52 | termcolor>=1.1.0
53 | tf-slim>=1.1.0
54 | urllib3>=1.25.10
55 | Werkzeug>=1.0.1
56 | wrapt>=1.12.1
57 |
--------------------------------------------------------------------------------
/tests/dopamine/atari_init_test.py:
--------------------------------------------------------------------------------
1 | # coding=utf-8
2 | # Copyright 2018 The Dopamine Authors.
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | """A simple test for validating that the Atari env initializes."""
16 |
17 | import datetime
18 | import os
19 | import shutil
20 |
21 |
22 |
23 | from absl import flags
24 | from dopamine.discrete_domains import train
25 | import tensorflow as tf
26 |
27 |
28 | FLAGS = flags.FLAGS
29 |
30 |
31 | class AtariInitTest(tf.test.TestCase):
32 |
33 | def setUp(self):
34 | super(AtariInitTest, self).setUp()
35 | FLAGS.base_dir = os.path.join(
36 | '/tmp/dopamine_tests',
37 | datetime.datetime.utcnow().strftime('run_%Y_%m_%d_%H_%M_%S'))
38 | FLAGS.gin_files = ['dopamine/agents/dqn/configs/dqn.gin']
39 | # `num_iterations` set to zero to prevent runner execution.
40 | FLAGS.gin_bindings = [
41 | 'Runner.num_iterations=0',
42 | 'WrappedReplayBuffer.replay_capacity = 100' # To prevent OOM.
43 | ]
44 | FLAGS.alsologtostderr = True
45 |
46 | def test_atari_init(self):
47 | """Tests that a DQN agent is initialized."""
48 | train.main([])
49 | shutil.rmtree(FLAGS.base_dir)
50 |
51 |
52 | if __name__ == '__main__':
53 | tf.compat.v1.disable_v2_behavior()
54 | tf.test.main()
55 |
--------------------------------------------------------------------------------
/tests/dopamine/discrete_domains/gym_lib_test.py:
--------------------------------------------------------------------------------
1 | # coding=utf-8
2 | # Copyright 2018 The Dopamine Authors.
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | """Tests for dopamine.discrete_domains.gym_lib."""
16 |
17 | from __future__ import absolute_import
18 | from __future__ import division
19 | from __future__ import print_function
20 |
21 |
22 |
23 | from dopamine.discrete_domains import gym_lib
24 | import tensorflow as tf
25 |
26 |
27 | class MockGymEnvironment(object):
28 | """Mock environment for testing."""
29 |
30 | def __init__(self):
31 | self.observation_space = 'observation_space'
32 | self.action_space = 'action_space'
33 | self.reward_range = 'reward_range'
34 | self.metadata = 'metadata'
35 |
36 | def reset(self):
37 | return 'reset'
38 |
39 | def step(self, unused_action):
40 | return 'obs', 'rew', False, {}
41 |
42 |
43 | class GymPreprocessingTest(tf.test.TestCase):
44 |
45 | def testAll(self):
46 | env = gym_lib.GymPreprocessing(MockGymEnvironment())
47 | self.assertEqual('observation_space', env.observation_space)
48 | self.assertEqual('action_space', env.action_space)
49 | self.assertEqual('reward_range', env.reward_range)
50 | self.assertEqual('metadata', env.metadata)
51 | self.assertEqual('reset', env.reset())
52 | self.assertAllEqual(['obs', 'rew', False, {}], env.step(0))
53 |
54 |
55 | if __name__ == '__main__':
56 | tf.compat.v1.disable_v2_behavior()
57 | tf.test.main()
58 |
--------------------------------------------------------------------------------
/tests/dopamine/utils/agent_visualizer_test.py:
--------------------------------------------------------------------------------
1 | # coding=utf-8
2 | # Copyright 2019 The Dopamine Authors.
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | """Tests for dopamine.utils.agent_visualizer."""
16 | from __future__ import absolute_import
17 | from __future__ import division
18 | from __future__ import print_function
19 |
20 | import os
21 | import shutil
22 |
23 |
24 |
25 | from absl import flags
26 | from dopamine.utils.agent_visualizer import AgentVisualizer
27 | from dopamine.utils.line_plotter import LinePlotter
28 | import numpy as np
29 | from PIL import Image
30 | import tensorflow as tf
31 |
32 |
33 | FLAGS = flags.FLAGS
34 |
35 |
36 | class AgentVisualizerTest(tf.test.TestCase):
37 |
38 | def setUp(self):
39 | super(AgentVisualizerTest, self).setUp()
40 | self._test_subdir = os.path.join('/tmp/dopamine_tests', 'agent_visualizer')
41 | shutil.rmtree(self._test_subdir, ignore_errors=True)
42 | os.makedirs(self._test_subdir)
43 |
44 | def test_agent_visualizer_save_frame(self):
45 | parameter_dict = LinePlotter._defaults.copy()
46 | parameter_dict['get_line_data_fn'] = lambda: [[1, 2, 3]]
47 | plotter = LinePlotter(parameter_dict=parameter_dict)
48 |
49 | agent_visualizer = AgentVisualizer(self._test_subdir, [plotter])
50 | agent_visualizer.save_frame()
51 |
52 | frame_filename = os.path.join(self._test_subdir, 'frame_000000.png')
53 | self.assertTrue(tf.io.gfile.exists(frame_filename))
54 |
55 | im = Image.open(frame_filename)
56 | im_arr = np.array(im)
57 | self.assertTrue(np.array_equal(im_arr, agent_visualizer.record_frame))
58 |
59 | if __name__ == '__main__':
60 | tf.compat.v1.disable_v2_behavior()
61 | tf.test.main()
62 |
--------------------------------------------------------------------------------