├── .gitignore
├── LICENSE
├── Neurips2022_rebuttal.ipynb
├── README.md
├── docker
    ├── README.md
    ├── atari
    │   └── Dockerfile
    ├── core
    │   └── Dockerfile
    └── mujoco
    │   └── Dockerfile
├── docs
    ├── README.md
    ├── agents.md
    ├── api_docs
    │   └── python
    │   │   ├── _toc.yaml
    │   │   ├── dopamine.md
    │   │   ├── dopamine
    │   │       ├── _api_cache.json
    │   │       ├── agents.md
    │   │       ├── agents
    │   │       │   ├── dqn.md
    │   │       │   ├── dqn
    │   │       │   │   ├── dqn_agent.md
    │   │       │   │   └── dqn_agent
    │   │       │   │   │   └── DQNAgent.md
    │   │       │   ├── implicit_quantile.md
    │   │       │   ├── implicit_quantile
    │   │       │   │   ├── implicit_quantile_agent.md
    │   │       │   │   └── implicit_quantile_agent
    │   │       │   │   │   └── ImplicitQuantileAgent.md
    │   │       │   ├── rainbow.md
    │   │       │   └── rainbow
    │   │       │   │   ├── rainbow_agent.md
    │   │       │   │   └── rainbow_agent
    │   │       │   │       ├── RainbowAgent.md
    │   │       │   │       └── project_distribution.md
    │   │       ├── colab.md
    │   │       ├── colab
    │   │       │   ├── utils.md
    │   │       │   └── utils
    │   │       │   │   ├── get_latest_file.md
    │   │       │   │   ├── get_latest_iteration.md
    │   │       │   │   ├── load_baselines.md
    │   │       │   │   ├── load_statistics.md
    │   │       │   │   ├── read_experiment.md
    │   │       │   │   └── summarize_data.md
    │   │       ├── discrete_domains.md
    │   │       ├── discrete_domains
    │   │       │   ├── atari_lib.md
    │   │       │   ├── atari_lib
    │   │       │   │   ├── AtariPreprocessing.md
    │   │       │   │   └── create_atari_environment.md
    │   │       │   ├── checkpointer.md
    │   │       │   ├── checkpointer
    │   │       │   │   └── Checkpointer.md
    │   │       │   ├── gym_lib.md
    │   │       │   ├── gym_lib
    │   │       │   │   ├── GymPreprocessing.md
    │   │       │   │   └── create_gym_environment.md
    │   │       │   ├── iteration_statistics.md
    │   │       │   ├── iteration_statistics
    │   │       │   │   └── IterationStatistics.md
    │   │       │   ├── logger.md
    │   │       │   ├── logger
    │   │       │   │   └── Logger.md
    │   │       │   ├── run_experiment.md
    │   │       │   ├── run_experiment
    │   │       │   │   ├── Runner.md
    │   │       │   │   ├── TrainRunner.md
    │   │       │   │   ├── create_agent.md
    │   │       │   │   └── create_runner.md
    │   │       │   └── train.md
    │   │       ├── jax.md
    │   │       ├── jax
    │   │       │   ├── agents.md
    │   │       │   ├── agents
    │   │       │   │   ├── dqn.md
    │   │       │   │   ├── dqn
    │   │       │   │   │   └── dqn_agent.md
    │   │       │   │   ├── implicit_quantile.md
    │   │       │   │   ├── implicit_quantile
    │   │       │   │   │   └── implicit_quantile_agent.md
    │   │       │   │   ├── quantile.md
    │   │       │   │   ├── quantile
    │   │       │   │   │   └── quantile_agent.md
    │   │       │   │   ├── rainbow.md
    │   │       │   │   └── rainbow
    │   │       │   │   │   ├── rainbow_agent.md
    │   │       │   │   │   └── rainbow_agent
    │   │       │   │   │       └── project_distribution.md
    │   │       │   └── networks.md
    │   │       ├── replay_memory.md
    │   │       └── replay_memory
    │   │       │   ├── circular_replay_buffer.md
    │   │       │   ├── circular_replay_buffer
    │   │       │       ├── OutOfGraphReplayBuffer.md
    │   │       │       └── WrappedReplayBuffer.md
    │   │       │   ├── prioritized_replay_buffer.md
    │   │       │   ├── prioritized_replay_buffer
    │   │       │       ├── OutOfGraphPrioritizedReplayBuffer.md
    │   │       │       └── WrappedPrioritizedReplayBuffer.md
    │   │       │   └── sum_tree.md
    │   │   └── index.md
    └── changelist.md
├── dopamine
    ├── __init__.py
    ├── agents
    │   ├── __init__.py
    │   ├── dqn
    │   │   ├── __init__.py
    │   │   ├── configs
    │   │   │   ├── dqn.gin
    │   │   │   ├── dqn_acrobot.gin
    │   │   │   ├── dqn_cartpole.gin
    │   │   │   ├── dqn_icml.gin
    │   │   │   ├── dqn_lunarlander.gin
    │   │   │   ├── dqn_mountaincar.gin
    │   │   │   ├── dqn_nature.gin
    │   │   │   └── dqn_profiling.gin
    │   │   └── dqn_agent.py
    │   ├── implicit_quantile
    │   │   ├── __init__.py
    │   │   ├── configs
    │   │   │   ├── implicit_quantile.gin
    │   │   │   ├── implicit_quantile_icml.gin
    │   │   │   └── implicit_quantile_profiling.gin
    │   │   └── implicit_quantile_agent.py
    │   └── rainbow
    │   │   ├── __init__.py
    │   │   ├── configs
    │   │       ├── c51.gin
    │   │       ├── c51_acrobot.gin
    │   │       ├── c51_cartpole.gin
    │   │       ├── c51_icml.gin
    │   │       ├── c51_profiling.gin
    │   │       ├── rainbow_aaai.gin
    │   │       ├── rainbow_acrobot.gin
    │   │       ├── rainbow_cartpole.gin
    │   │       ├── rainbow_dqnpro.gin
    │   │       ├── rainbow_original.gin
    │   │       ├── rainbow_our_first_paper.gin
    │   │       ├── rainbow_our_second_paper.gin
    │   │       └── rainbow_profiling.gin
    │   │   └── rainbow_agent.py
    ├── colab
    │   ├── README.md
    │   ├── __init__.py
    │   ├── agent_visualizer.ipynb
    │   ├── agents.ipynb
    │   ├── cartpole.ipynb
    │   ├── jax_agent_visualizer.ipynb
    │   ├── load_statistics.ipynb
    │   └── utils.py
    ├── continuous_domains
    │   ├── __init__.py
    │   ├── run_experiment.py
    │   └── train.py
    ├── discrete_domains
    │   ├── __init__.py
    │   ├── atari_lib.py
    │   ├── checkpointer.py
    │   ├── gym_lib.py
    │   ├── iteration_statistics.py
    │   ├── legacy_networks.py
    │   ├── logger.py
    │   ├── run_experiment.py
    │   └── train.py
    ├── jax
    │   ├── README.md
    │   ├── __init__.py
    │   ├── agents
    │   │   ├── __init__.py
    │   │   ├── dqn
    │   │   │   ├── __init__.py
    │   │   │   ├── configs
    │   │   │   │   ├── dqn.gin
    │   │   │   │   ├── dqn_acrobot.gin
    │   │   │   │   ├── dqn_cartpole.gin
    │   │   │   │   ├── dqn_lunarlander.gin
    │   │   │   │   ├── dqn_mountaincar.gin
    │   │   │   │   └── dqn_profiling.gin
    │   │   │   └── dqn_agent.py
    │   │   ├── full_rainbow
    │   │   │   ├── __init__.py
    │   │   │   ├── configs
    │   │   │   │   ├── full_rainbow.gin
    │   │   │   │   └── full_rainbow_profiling.gin
    │   │   │   └── full_rainbow_agent.py
    │   │   ├── implicit_quantile
    │   │   │   ├── __init__.py
    │   │   │   ├── configs
    │   │   │   │   ├── implicit_quantile.gin
    │   │   │   │   └── implicit_quantile_profiling.gin
    │   │   │   └── implicit_quantile_agent.py
    │   │   ├── quantile
    │   │   │   ├── __init__.py
    │   │   │   ├── configs
    │   │   │   │   ├── quantile.gin
    │   │   │   │   └── quantile_profiling.gin
    │   │   │   └── quantile_agent.py
    │   │   ├── rainbow
    │   │   │   ├── __init__.py
    │   │   │   ├── configs
    │   │   │   │   ├── c51.gin
    │   │   │   │   ├── c51_acrobot.gin
    │   │   │   │   ├── c51_cartpole.gin
    │   │   │   │   ├── c51_profiling.gin
    │   │   │   │   ├── rainbow.gin
    │   │   │   │   ├── rainbow_acrobot.gin
    │   │   │   │   ├── rainbow_cartpole.gin
    │   │   │   │   └── rainbow_profiling.gin
    │   │   │   └── rainbow_agent.py
    │   │   └── sac
    │   │   │   ├── __init__.py
    │   │   │   ├── configs
    │   │   │       └── sac.gin
    │   │   │   └── sac_agent.py
    │   ├── continuous_networks.py
    │   ├── losses.py
    │   └── networks.py
    ├── labs
    │   ├── __init__.py
    │   ├── atari_100k
    │   │   ├── README.md
    │   │   ├── __init__.py
    │   │   ├── atari_100k_rainbow_agent.py
    │   │   ├── configs
    │   │   │   ├── DER.gin
    │   │   │   ├── DrQ.gin
    │   │   │   ├── DrQ_eps.gin
    │   │   │   └── OTRainbow.gin
    │   │   ├── eval_run_experiment.py
    │   │   └── train.py
    │   ├── environments
    │   │   ├── __init__.py
    │   │   └── minatar
    │   │   │   ├── __init__.py
    │   │   │   ├── dqn_asterix.gin
    │   │   │   ├── dqn_breakout.gin
    │   │   │   ├── dqn_freeway.gin
    │   │   │   ├── dqn_seaquest.gin
    │   │   │   ├── dqn_space_invaders.gin
    │   │   │   ├── minatar_env.py
    │   │   │   ├── quantile_asterix.gin
    │   │   │   ├── quantile_breakout.gin
    │   │   │   ├── quantile_freeway.gin
    │   │   │   ├── quantile_seaquest.gin
    │   │   │   ├── quantile_space_invaders.gin
    │   │   │   ├── rainbow_asterix.gin
    │   │   │   ├── rainbow_breakout.gin
    │   │   │   ├── rainbow_freeway.gin
    │   │   │   ├── rainbow_seaquest.gin
    │   │   │   └── rainbow_space_invaders.gin
    │   ├── sac_from_pixels
    │   │   ├── continuous_networks.py
    │   │   ├── deepmind_control_lib.py
    │   │   └── sac_pixels.gin
    │   └── tandem_dqn
    │   │   ├── README.md
    │   │   ├── requirements.txt
    │   │   ├── run.sh
    │   │   ├── run_experiment.py
    │   │   ├── tandem_dqn_agent.py
    │   │   └── train.py
    ├── replay_memory
    │   ├── __init__.py
    │   ├── circular_replay_buffer.py
    │   ├── prioritized_replay_buffer.py
    │   └── sum_tree.py
    └── utils
    │   ├── __init__.py
    │   ├── agent_visualizer.py
    │   ├── atari_plotter.py
    │   ├── bar_plotter.py
    │   ├── example_viz.py
    │   ├── example_viz_lib.py
    │   ├── line_plotter.py
    │   ├── plotter.py
    │   └── test_utils.py
├── extract_reward.py
├── plot_learning_curves.ipynb
├── requirements.txt
├── run_agents.ipynb
├── setup.py
└── tests
    └── dopamine
        ├── agents
            ├── dqn
            │   └── dqn_agent_test.py
            ├── implicit_quantile
            │   └── implicit_quantile_agent_test.py
            └── rainbow
            │   └── rainbow_agent_test.py
        ├── atari_init_test.py
        ├── continuous_domains
            └── run_experiment_test.py
        ├── discrete_domains
            ├── atari_lib_test.py
            ├── checkpointer_test.py
            ├── gym_lib_test.py
            ├── iteration_statistics_test.py
            ├── logger_test.py
            └── run_experiment_test.py
        ├── jax
            ├── agents
            │   ├── dqn
            │   │   └── dqn_agent_test.py
            │   ├── full_rainbow
            │   │   └── full_rainbow_agent_test.py
            │   ├── implicit_quantile
            │   │   └── implicit_quantile_agent_test.py
            │   ├── quantile
            │   │   └── quantile_agent_test.py
            │   ├── rainbow
            │   │   └── rainbow_agent_test.py
            │   └── sac
            │   │   └── sac_agent_test.py
            ├── continuous_networks_test.py
            ├── losses_test.py
            └── networks_test.py
        ├── labs
            ├── atari_100k
            │   └── train_test.py
            └── sac_from_pixels
            │   ├── continuous_networks_test.py
            │   └── deepmind_control_lib_test.py
        ├── replay_memory
            ├── circular_replay_buffer_test.py
            ├── prioritized_replay_buffer_test.py
            └── sum_tree_test.py
        ├── tests
            ├── gin_config_test.py
            ├── integration_test.py
            └── train_runner_integration_test.py
        └── utils
            └── agent_visualizer_test.py


/.gitignore:
--------------------------------------------------------------------------------
1 | tmp
2 | *results*
3 | *DS_*
4 | *images/*
5 | *.ipynb_checkpoints*
6 | 


--------------------------------------------------------------------------------
/docker/atari/Dockerfile:
--------------------------------------------------------------------------------
 1 | # Note: this Dockerfile expects that Atari ROMs retrieved following the
 2 | # instructions from atari-py: https://github.com/openai/atari-py#roms.
 3 | # It should specify a directory (e.g. ~/roms) that contains ROMS.rar.
 4 | # It should be run from the rom directory.
 5 | 
 6 | ARG base_image=dopamine/core
 7 | FROM ${base_image}
 8 | 
 9 | # Copy ROMs into the image.
10 | RUN mkdir /root/roms
11 | COPY ./Roms.rar /root/roms/
12 | 
13 | RUN apt-get install rar unzip -y
14 | RUN rar x /root/roms/Roms.rar /root/roms/
15 | 
16 | # Install ROMs with ale-py.
17 | RUN pip install atari_py ale-py
18 | RUN unzip /root/roms/ROMS.zip -d /root/roms
19 | RUN python -m atari_py.import_roms /root/roms
20 | RUN ale-import-roms /root/roms/ROMS
21 | 


--------------------------------------------------------------------------------
/docker/core/Dockerfile:
--------------------------------------------------------------------------------
 1 | # If you want to use a different version of CUDA, view the available
 2 | # images here: https://hub.docker.com/r/nvidia/cuda
 3 | # Note:
 4 | #   - Jax currently supports CUDA versions up to 11.3.
 5 | #   - Tensorflow required CUDA versions after 11.2.
 6 | ARG cuda_docker_tag="11.2.2-cudnn8-devel-ubuntu20.04"
 7 | FROM nvidia/cuda:${cuda_docker_tag}
 8 | 
 9 | COPY . /root/dopamine/
10 | 
11 | RUN apt-get update
12 | # tzdata is required below. To avoid hanging, install it first.
13 | RUN DEBIAN_FRONTEND="noninteractive" apt-get install tzdata -y
14 | RUN apt-get install git wget libgl1-mesa-glx -y
15 | 
16 | # Install python3.8.
17 | RUN apt-get install software-properties-common -y
18 | RUN add-apt-repository ppa:deadsnakes/ppa -y
19 | RUN apt-get install python3.8 -y
20 | 
21 | # Make python3.8 the default python.
22 | RUN rm /usr/bin/python3
23 | RUN ln -s /usr/bin/python3.8 /usr/bin/python3
24 | RUN ln -s /usr/bin/python3.8 /usr/bin/python
25 | RUN apt-get install python3-distutils -y
26 | 
27 | # Install pip.
28 | RUN wget https://bootstrap.pypa.io/get-pip.py
29 | RUN python get-pip.py
30 | RUN rm get-pip.py
31 | 
32 | # Install Dopamine dependencies.
33 | RUN pip install -r /root/dopamine/requirements.txt
34 | 
35 | # Install JAX for GPU, overriding requirements.txt.
36 | RUN pip install --upgrade "jax[cuda111]" -f https://storage.googleapis.com/jax-releases/jax_releases.html
37 | 
38 | WORKDIR /root/dopamine
39 | 


--------------------------------------------------------------------------------
/docker/mujoco/Dockerfile:
--------------------------------------------------------------------------------
 1 | ARG base_image=dopamine/core
 2 | FROM ${base_image}
 3 | 
 4 | # Create Mujoco subdir.
 5 | RUN mkdir /root/.mujoco
 6 | COPY mjkey.txt /root/.mujoco/mjkey.txt
 7 | 
 8 | # Prerequisites
 9 | RUN apt-get install \
10 |   libosmesa6-dev \
11 |   libgl1-mesa-glx \
12 |   libglfw3 \
13 |   libglew-dev \
14 |   patchelf \
15 |   gcc \
16 |   python3.8-dev \
17 |   unzip -y
18 | 
19 | # Download and install mujoco.
20 | RUN wget https://www.roboti.us/download/mujoco200_linux.zip
21 | RUN unzip mujoco200_linux.zip
22 | RUN rm mujoco200_linux.zip
23 | RUN mv mujoco200_linux /root/.mujoco/mujoco200
24 | 
25 | # Add LD_LIBRARY_PATH environment variable.
26 | ENV LD_LIBRARY_PATH "/root/.mujoco/mujoco200/bin:${LD_LIBRARY_PATH}"
27 | RUN echo 'export LD_LIBRARY_PATH=/root/.mujoco/mujoco200/bin:${LD_LIBRARY_PATH}' >> /etc/bash.bashrc
28 | 
29 | # Finally, install mujoco_py.
30 | RUN pip install mujoco_py
31 | 


--------------------------------------------------------------------------------
/docs/agents.md:
--------------------------------------------------------------------------------
 1 | # DQN And Rainbow
 2 | 
 3 | 
 4 | In the spirit of these principles, this first version focuses on supporting the
 5 | state-of-the-art, single-GPU *Rainbow* agent ([Hessel et al., 2018][rainbow])
 6 | applied to Atari 2600 game-playing ([Bellemare et al., 2013][ale]).
 7 | Specifically, our Rainbow agent implements the three components identified as
 8 | most important by [Hessel et al.][rainbow]:
 9 | 
10 | *   n-step Bellman updates (see e.g. [Mnih et al., 2016][a3c])
11 | *   Prioritized experience replay ([Schaul et al., 2015][prioritized_replay])
12 | *   Distributional reinforcement learning ([C51; Bellemare et al., 2017][c51])
13 | 
14 | For completeness, we also provide an implementation of DQN ([Mnih et al.,
15 | 2015][dqn]).
16 | 


--------------------------------------------------------------------------------
/docs/api_docs/python/dopamine.md:
--------------------------------------------------------------------------------
 1 | <div itemscope itemtype="http://developers.google.com/ReferenceObject">
 2 | <meta itemprop="name" content="dopamine" />
 3 | <meta itemprop="path" content="Stable" />
 4 | </div>
 5 | 
 6 | # Module: dopamine
 7 | 
 8 | <!-- Insert buttons and diff -->
 9 | 
10 | <table class="tfo-notebook-buttons tfo-api nocontent" align="left">
11 | <td>
12 |   <a target="_blank" href="https://github.com/google/dopamine/tree/master/dopamine/__init__.py">
13 |     <img src="https://www.tensorflow.org/images/GitHub-Mark-32px.png" />
14 |     View source on GitHub
15 |   </a>
16 | </td>
17 | </table>
18 | 
19 | ## Modules
20 | 
21 | [`agents`](./dopamine/agents.md) module
22 | 
23 | [`colab`](./dopamine/colab.md) module
24 | 
25 | [`discrete_domains`](./dopamine/discrete_domains.md) module
26 | 
27 | [`jax`](./dopamine/jax.md) module
28 | 
29 | [`replay_memory`](./dopamine/replay_memory.md) module
30 | 


--------------------------------------------------------------------------------
/docs/api_docs/python/dopamine/agents.md:
--------------------------------------------------------------------------------
 1 | <div itemscope itemtype="http://developers.google.com/ReferenceObject">
 2 | <meta itemprop="name" content="dopamine.agents" />
 3 | <meta itemprop="path" content="Stable" />
 4 | </div>
 5 | 
 6 | # Module: dopamine.agents
 7 | 
 8 | <!-- Insert buttons and diff -->
 9 | 
10 | <table class="tfo-notebook-buttons tfo-api nocontent" align="left">
11 | <td>
12 |   <a target="_blank" href="https://github.com/google/dopamine/tree/master/dopamine/agents/__init__.py">
13 |     <img src="https://www.tensorflow.org/images/GitHub-Mark-32px.png" />
14 |     View source on GitHub
15 |   </a>
16 | </td>
17 | </table>
18 | 
19 | ## Modules
20 | 
21 | [`dqn`](../dopamine/agents/dqn.md) module
22 | 
23 | [`implicit_quantile`](../dopamine/agents/implicit_quantile.md) module
24 | 
25 | [`rainbow`](../dopamine/agents/rainbow.md) module
26 | 


--------------------------------------------------------------------------------
/docs/api_docs/python/dopamine/agents/dqn.md:
--------------------------------------------------------------------------------
 1 | <div itemscope itemtype="http://developers.google.com/ReferenceObject">
 2 | <meta itemprop="name" content="dopamine.agents.dqn" />
 3 | <meta itemprop="path" content="Stable" />
 4 | </div>
 5 | 
 6 | # Module: dopamine.agents.dqn
 7 | 
 8 | <!-- Insert buttons and diff -->
 9 | 
10 | <table class="tfo-notebook-buttons tfo-api nocontent" align="left">
11 | <td>
12 |   <a target="_blank" href="https://github.com/google/dopamine/tree/master/dopamine/agents/dqn/__init__.py">
13 |     <img src="https://www.tensorflow.org/images/GitHub-Mark-32px.png" />
14 |     View source on GitHub
15 |   </a>
16 | </td>
17 | </table>
18 | 
19 | ## Modules
20 | 
21 | [`dqn_agent`](../../dopamine/agents/dqn/dqn_agent.md) module: Compact
22 | implementation of a DQN agent.
23 | 


--------------------------------------------------------------------------------
/docs/api_docs/python/dopamine/agents/dqn/dqn_agent.md:
--------------------------------------------------------------------------------
 1 | description: Compact implementation of a DQN agent.
 2 | 
 3 | <div itemscope itemtype="http://developers.google.com/ReferenceObject">
 4 | <meta itemprop="name" content="dopamine.agents.dqn.dqn_agent" />
 5 | <meta itemprop="path" content="Stable" />
 6 | </div>
 7 | 
 8 | # Module: dopamine.agents.dqn.dqn_agent
 9 | 
10 | <!-- Insert buttons and diff -->
11 | 
12 | <table class="tfo-notebook-buttons tfo-api nocontent" align="left">
13 | <td>
14 |   <a target="_blank" href="https://github.com/google/dopamine/tree/master/dopamine/agents/dqn/dqn_agent.py">
15 |     <img src="https://www.tensorflow.org/images/GitHub-Mark-32px.png" />
16 |     View source on GitHub
17 |   </a>
18 | </td>
19 | </table>
20 | 
21 | Compact implementation of a DQN agent.
22 | 
23 | ## Classes
24 | 
25 | [`class DQNAgent`](../../../dopamine/agents/dqn/dqn_agent/DQNAgent.md): An
26 | implementation of the DQN agent.
27 | 


--------------------------------------------------------------------------------
/docs/api_docs/python/dopamine/agents/dqn/dqn_agent/DQNAgent.md:
--------------------------------------------------------------------------------
 1 | description: An implementation of the DQN agent.
 2 | 
 3 | <div itemscope itemtype="http://developers.google.com/ReferenceObject">
 4 | <meta itemprop="name" content="dopamine.agents.dqn.dqn_agent.DQNAgent" />
 5 | <meta itemprop="path" content="Stable" />
 6 | </div>
 7 | 
 8 | # dopamine.agents.dqn.dqn_agent.DQNAgent
 9 | 
10 | <!-- Insert buttons and diff -->
11 | 
12 | <table class="tfo-notebook-buttons tfo-api nocontent" align="left">
13 | <td>
14 |   <a target="_blank" href="https://github.com/google/dopamine/tree/master/dopamine/agents/dqn/dqn_agent.py">
15 |     <img src="https://www.tensorflow.org/images/GitHub-Mark-32px.png" />
16 |     View source on GitHub
17 |   </a>
18 | </td>
19 | </table>
20 | 
21 | An implementation of the DQN agent.
22 | 
23 | <!-- Placeholder for "Used in" -->
24 | 


--------------------------------------------------------------------------------
/docs/api_docs/python/dopamine/agents/implicit_quantile.md:
--------------------------------------------------------------------------------
 1 | <div itemscope itemtype="http://developers.google.com/ReferenceObject">
 2 | <meta itemprop="name" content="dopamine.agents.implicit_quantile" />
 3 | <meta itemprop="path" content="Stable" />
 4 | </div>
 5 | 
 6 | # Module: dopamine.agents.implicit_quantile
 7 | 
 8 | <!-- Insert buttons and diff -->
 9 | 
10 | <table class="tfo-notebook-buttons tfo-api nocontent" align="left">
11 | <td>
12 |   <a target="_blank" href="https://github.com/google/dopamine/tree/master/dopamine/agents/implicit_quantile/__init__.py">
13 |     <img src="https://www.tensorflow.org/images/GitHub-Mark-32px.png" />
14 |     View source on GitHub
15 |   </a>
16 | </td>
17 | </table>
18 | 
19 | ## Modules
20 | 
21 | [`implicit_quantile_agent`](../../dopamine/agents/implicit_quantile/implicit_quantile_agent.md)
22 | module: The implicit quantile networks (IQN) agent.
23 | 


--------------------------------------------------------------------------------
/docs/api_docs/python/dopamine/agents/implicit_quantile/implicit_quantile_agent.md:
--------------------------------------------------------------------------------
 1 | description: The implicit quantile networks (IQN) agent.
 2 | 
 3 | <div itemscope itemtype="http://developers.google.com/ReferenceObject">
 4 | <meta itemprop="name" content="dopamine.agents.implicit_quantile.implicit_quantile_agent" />
 5 | <meta itemprop="path" content="Stable" />
 6 | </div>
 7 | 
 8 | # Module: dopamine.agents.implicit_quantile.implicit_quantile_agent
 9 | 
10 | <!-- Insert buttons and diff -->
11 | 
12 | <table class="tfo-notebook-buttons tfo-api nocontent" align="left">
13 | <td>
14 |   <a target="_blank" href="https://github.com/google/dopamine/tree/master/dopamine/agents/implicit_quantile/implicit_quantile_agent.py">
15 |     <img src="https://www.tensorflow.org/images/GitHub-Mark-32px.png" />
16 |     View source on GitHub
17 |   </a>
18 | </td>
19 | </table>
20 | 
21 | The implicit quantile networks (IQN) agent.
22 | 
23 | The agent follows the description given in "Implicit Quantile Networks for
24 | Distributional RL" (Dabney et. al, 2018).
25 | 
26 | ## Classes
27 | 
28 | [`class ImplicitQuantileAgent`](../../../dopamine/agents/implicit_quantile/implicit_quantile_agent/ImplicitQuantileAgent.md):
29 | An extension of Rainbow to perform implicit quantile regression.
30 | 


--------------------------------------------------------------------------------
/docs/api_docs/python/dopamine/agents/implicit_quantile/implicit_quantile_agent/ImplicitQuantileAgent.md:
--------------------------------------------------------------------------------
 1 | description: An extension of Rainbow to perform implicit quantile regression.
 2 | 
 3 | <div itemscope itemtype="http://developers.google.com/ReferenceObject">
 4 | <meta itemprop="name" content="dopamine.agents.implicit_quantile.implicit_quantile_agent.ImplicitQuantileAgent" />
 5 | <meta itemprop="path" content="Stable" />
 6 | </div>
 7 | 
 8 | # dopamine.agents.implicit_quantile.implicit_quantile_agent.ImplicitQuantileAgent
 9 | 
10 | <!-- Insert buttons and diff -->
11 | 
12 | <table class="tfo-notebook-buttons tfo-api nocontent" align="left">
13 | <td>
14 |   <a target="_blank" href="https://github.com/google/dopamine/tree/master/dopamine/agents/implicit_quantile/implicit_quantile_agent.py">
15 |     <img src="https://www.tensorflow.org/images/GitHub-Mark-32px.png" />
16 |     View source on GitHub
17 |   </a>
18 | </td>
19 | </table>
20 | 
21 | An extension of Rainbow to perform implicit quantile regression.
22 | 
23 | Inherits From:
24 | [`RainbowAgent`](../../../../dopamine/agents/rainbow/rainbow_agent/RainbowAgent.md)
25 | 
26 | <!-- Placeholder for "Used in" -->
27 | 


--------------------------------------------------------------------------------
/docs/api_docs/python/dopamine/agents/rainbow.md:
--------------------------------------------------------------------------------
 1 | <div itemscope itemtype="http://developers.google.com/ReferenceObject">
 2 | <meta itemprop="name" content="dopamine.agents.rainbow" />
 3 | <meta itemprop="path" content="Stable" />
 4 | </div>
 5 | 
 6 | # Module: dopamine.agents.rainbow
 7 | 
 8 | <!-- Insert buttons and diff -->
 9 | 
10 | <table class="tfo-notebook-buttons tfo-api nocontent" align="left">
11 | <td>
12 |   <a target="_blank" href="https://github.com/google/dopamine/tree/master/dopamine/agents/rainbow/__init__.py">
13 |     <img src="https://www.tensorflow.org/images/GitHub-Mark-32px.png" />
14 |     View source on GitHub
15 |   </a>
16 | </td>
17 | </table>
18 | 
19 | ## Modules
20 | 
21 | [`rainbow_agent`](../../dopamine/agents/rainbow/rainbow_agent.md) module:
22 | Compact implementation of a simplified Rainbow agent.
23 | 


--------------------------------------------------------------------------------
/docs/api_docs/python/dopamine/agents/rainbow/rainbow_agent.md:
--------------------------------------------------------------------------------
 1 | description: Compact implementation of a simplified Rainbow agent.
 2 | 
 3 | <div itemscope itemtype="http://developers.google.com/ReferenceObject">
 4 | <meta itemprop="name" content="dopamine.agents.rainbow.rainbow_agent" />
 5 | <meta itemprop="path" content="Stable" />
 6 | </div>
 7 | 
 8 | # Module: dopamine.agents.rainbow.rainbow_agent
 9 | 
10 | <!-- Insert buttons and diff -->
11 | 
12 | <table class="tfo-notebook-buttons tfo-api nocontent" align="left">
13 | <td>
14 |   <a target="_blank" href="https://github.com/google/dopamine/tree/master/dopamine/agents/rainbow/rainbow_agent.py">
15 |     <img src="https://www.tensorflow.org/images/GitHub-Mark-32px.png" />
16 |     View source on GitHub
17 |   </a>
18 | </td>
19 | </table>
20 | 
21 | Compact implementation of a simplified Rainbow agent.
22 | 
23 | Specifically, we implement the following components from Rainbow:
24 | 
25 | *   n-step updates;
26 | *   prioritized replay; and
27 | *   distributional RL.
28 | 
29 | These three components were found to significantly impact the performance of the
30 | Atari game-playing agent.
31 | 
32 | Furthermore, our implementation does away with some minor hyperparameter
33 | choices. Specifically, we
34 | 
35 | *   keep the beta exponent fixed at beta=0.5, rather than increase it linearly;
36 | *   remove the alpha parameter, which was set to alpha=0.5 throughout the paper.
37 | 
38 | Details in "Rainbow: Combining Improvements in Deep Reinforcement Learning" by
39 | Hessel et al. (2018).
40 | 
41 | ## Classes
42 | 
43 | [`class RainbowAgent`](../../../dopamine/agents/rainbow/rainbow_agent/RainbowAgent.md):
44 | A compact implementation of a simplified Rainbow agent.
45 | 
46 | ## Functions
47 | 
48 | [`project_distribution(...)`](../../../dopamine/agents/rainbow/rainbow_agent/project_distribution.md):
49 | Projects a batch of (support, weights) onto target_support.
50 | 


--------------------------------------------------------------------------------
/docs/api_docs/python/dopamine/agents/rainbow/rainbow_agent/RainbowAgent.md:
--------------------------------------------------------------------------------
 1 | description: A compact implementation of a simplified Rainbow agent.
 2 | 
 3 | <div itemscope itemtype="http://developers.google.com/ReferenceObject">
 4 | <meta itemprop="name" content="dopamine.agents.rainbow.rainbow_agent.RainbowAgent" />
 5 | <meta itemprop="path" content="Stable" />
 6 | </div>
 7 | 
 8 | # dopamine.agents.rainbow.rainbow_agent.RainbowAgent
 9 | 
10 | <!-- Insert buttons and diff -->
11 | 
12 | <table class="tfo-notebook-buttons tfo-api nocontent" align="left">
13 | <td>
14 |   <a target="_blank" href="https://github.com/google/dopamine/tree/master/dopamine/agents/rainbow/rainbow_agent.py">
15 |     <img src="https://www.tensorflow.org/images/GitHub-Mark-32px.png" />
16 |     View source on GitHub
17 |   </a>
18 | </td>
19 | </table>
20 | 
21 | A compact implementation of a simplified Rainbow agent.
22 | 
23 | Inherits From:
24 | [`DQNAgent`](../../../../dopamine/agents/dqn/dqn_agent/DQNAgent.md)
25 | 
26 | <!-- Placeholder for "Used in" -->
27 | 


--------------------------------------------------------------------------------
/docs/api_docs/python/dopamine/colab.md:
--------------------------------------------------------------------------------
 1 | <div itemscope itemtype="http://developers.google.com/ReferenceObject">
 2 | <meta itemprop="name" content="dopamine.colab" />
 3 | <meta itemprop="path" content="Stable" />
 4 | </div>
 5 | 
 6 | # Module: dopamine.colab
 7 | 
 8 | <!-- Insert buttons and diff -->
 9 | 
10 | <table class="tfo-notebook-buttons tfo-api nocontent" align="left">
11 | <td>
12 |   <a target="_blank" href="https://github.com/google/dopamine/tree/master/dopamine/colab/__init__.py">
13 |     <img src="https://www.tensorflow.org/images/GitHub-Mark-32px.png" />
14 |     View source on GitHub
15 |   </a>
16 | </td>
17 | </table>
18 | 
19 | ## Modules
20 | 
21 | [`utils`](../dopamine/colab/utils.md) module: This provides utilities for
22 | dealing with Dopamine data.
23 | 


--------------------------------------------------------------------------------
/docs/api_docs/python/dopamine/colab/utils.md:
--------------------------------------------------------------------------------
 1 | description: This provides utilities for dealing with Dopamine data.
 2 | 
 3 | <div itemscope itemtype="http://developers.google.com/ReferenceObject">
 4 | <meta itemprop="name" content="dopamine.colab.utils" />
 5 | <meta itemprop="path" content="Stable" />
 6 | </div>
 7 | 
 8 | # Module: dopamine.colab.utils
 9 | 
10 | <!-- Insert buttons and diff -->
11 | 
12 | <table class="tfo-notebook-buttons tfo-api nocontent" align="left">
13 | <td>
14 |   <a target="_blank" href="https://github.com/google/dopamine/tree/master/dopamine/colab/utils.py">
15 |     <img src="https://www.tensorflow.org/images/GitHub-Mark-32px.png" />
16 |     View source on GitHub
17 |   </a>
18 | </td>
19 | </table>
20 | 
21 | This provides utilities for dealing with Dopamine data.
22 | 
23 | See: dopamine/common/logger.py .
24 | 
25 | ## Functions
26 | 
27 | [`get_latest_file(...)`](../../dopamine/colab/utils/get_latest_file.md): Return
28 | the file named 'path_[0-9]*' with the largest such number.
29 | 
30 | [`get_latest_iteration(...)`](../../dopamine/colab/utils/get_latest_iteration.md):
31 | Return the largest iteration number corresponding to the given path.
32 | 
33 | [`load_baselines(...)`](../../dopamine/colab/utils/load_baselines.md): Reads in
34 | the baseline experimental data from a specified base directory.
35 | 
36 | [`load_statistics(...)`](../../dopamine/colab/utils/load_statistics.md): Reads
37 | in a statistics object from log_path.
38 | 
39 | [`read_experiment(...)`](../../dopamine/colab/utils/read_experiment.md): Reads
40 | in a set of experimental results from log_path.
41 | 
42 | [`summarize_data(...)`](../../dopamine/colab/utils/summarize_data.md): Processes
43 | log data into a per-iteration summary.
44 | 


--------------------------------------------------------------------------------
/docs/api_docs/python/dopamine/colab/utils/get_latest_file.md:
--------------------------------------------------------------------------------
 1 | description: Return the file named 'path_[0-9]*' with the largest such number.
 2 | 
 3 | <div itemscope itemtype="http://developers.google.com/ReferenceObject">
 4 | <meta itemprop="name" content="dopamine.colab.utils.get_latest_file" />
 5 | <meta itemprop="path" content="Stable" />
 6 | </div>
 7 | 
 8 | # dopamine.colab.utils.get_latest_file
 9 | 
10 | <!-- Insert buttons and diff -->
11 | 
12 | <table class="tfo-notebook-buttons tfo-api nocontent" align="left">
13 | <td>
14 |   <a target="_blank" href="https://github.com/google/dopamine/tree/master/dopamine/colab/utils.py">
15 |     <img src="https://www.tensorflow.org/images/GitHub-Mark-32px.png" />
16 |     View source on GitHub
17 |   </a>
18 | </td>
19 | </table>
20 | 
21 | Return the file named 'path_[0-9]*' with the largest such number.
22 | 
23 | <pre class="devsite-click-to-copy prettyprint lang-py tfo-signature-link">
24 | <code>dopamine.colab.utils.get_latest_file(
25 |     path
26 | )
27 | </code></pre>
28 | 
29 | <!-- Placeholder for "Used in" -->
30 | <!-- Tabular view -->
31 | 
32 |  <table class="responsive fixed orange">
33 | <colgroup><col width="214px"><col></colgroup>
34 | <tr><th colspan="2"><h2 class="add-link">Args</h2></th></tr>
35 | 
36 | <tr>
37 | <td>
38 | `path`
39 | </td>
40 | <td>
41 | The base path (including directory and base name) to search.
42 | </td>
43 | </tr>
44 | </table>
45 | 
46 | <!-- Tabular view -->
47 | 
48 |  <table class="responsive fixed orange">
49 | <colgroup><col width="214px"><col></colgroup>
50 | <tr><th colspan="2"><h2 class="add-link">Returns</h2></th></tr>
51 | <tr class="alt">
52 | <td colspan="2">
53 | The latest file (in terms of given numbers).
54 | </td>
55 | </tr>
56 | 
57 | </table>
58 | 


--------------------------------------------------------------------------------
/docs/api_docs/python/dopamine/colab/utils/get_latest_iteration.md:
--------------------------------------------------------------------------------
 1 | description: Return the largest iteration number corresponding to the given
 2 | path.
 3 | 
 4 | <div itemscope itemtype="http://developers.google.com/ReferenceObject">
 5 | <meta itemprop="name" content="dopamine.colab.utils.get_latest_iteration" />
 6 | <meta itemprop="path" content="Stable" />
 7 | </div>
 8 | 
 9 | # dopamine.colab.utils.get_latest_iteration
10 | 
11 | <!-- Insert buttons and diff -->
12 | 
13 | <table class="tfo-notebook-buttons tfo-api nocontent" align="left">
14 | <td>
15 |   <a target="_blank" href="https://github.com/google/dopamine/tree/master/dopamine/colab/utils.py">
16 |     <img src="https://www.tensorflow.org/images/GitHub-Mark-32px.png" />
17 |     View source on GitHub
18 |   </a>
19 | </td>
20 | </table>
21 | 
22 | Return the largest iteration number corresponding to the given path.
23 | 
24 | <pre class="devsite-click-to-copy prettyprint lang-py tfo-signature-link">
25 | <code>dopamine.colab.utils.get_latest_iteration(
26 |     path
27 | )
28 | </code></pre>
29 | 
30 | <!-- Placeholder for "Used in" -->
31 | <!-- Tabular view -->
32 | 
33 |  <table class="responsive fixed orange">
34 | <colgroup><col width="214px"><col></colgroup>
35 | <tr><th colspan="2"><h2 class="add-link">Args</h2></th></tr>
36 | 
37 | <tr>
38 | <td>
39 | `path`
40 | </td>
41 | <td>
42 | The base path (including directory and base name) to search.
43 | </td>
44 | </tr>
45 | </table>
46 | 
47 | <!-- Tabular view -->
48 | 
49 |  <table class="responsive fixed orange">
50 | <colgroup><col width="214px"><col></colgroup>
51 | <tr><th colspan="2"><h2 class="add-link">Returns</h2></th></tr>
52 | <tr class="alt">
53 | <td colspan="2">
54 | The latest iteration number.
55 | </td>
56 | </tr>
57 | 
58 | </table>
59 | 
60 | <!-- Tabular view -->
61 | 
62 |  <table class="responsive fixed orange">
63 | <colgroup><col width="214px"><col></colgroup>
64 | <tr><th colspan="2"><h2 class="add-link">Raises</h2></th></tr>
65 | 
66 | <tr>
67 | <td>
68 | `ValueError`
69 | </td>
70 | <td>
71 | if there is not available log data at the given path.
72 | </td>
73 | </tr>
74 | </table>
75 | 


--------------------------------------------------------------------------------
/docs/api_docs/python/dopamine/colab/utils/load_baselines.md:
--------------------------------------------------------------------------------
 1 | description: Reads in the baseline experimental data from a specified base
 2 | directory.
 3 | 
 4 | <div itemscope itemtype="http://developers.google.com/ReferenceObject">
 5 | <meta itemprop="name" content="dopamine.colab.utils.load_baselines" />
 6 | <meta itemprop="path" content="Stable" />
 7 | </div>
 8 | 
 9 | # dopamine.colab.utils.load_baselines
10 | 
11 | <!-- Insert buttons and diff -->
12 | 
13 | <table class="tfo-notebook-buttons tfo-api nocontent" align="left">
14 | <td>
15 |   <a target="_blank" href="https://github.com/google/dopamine/tree/master/dopamine/colab/utils.py">
16 |     <img src="https://www.tensorflow.org/images/GitHub-Mark-32px.png" />
17 |     View source on GitHub
18 |   </a>
19 | </td>
20 | </table>
21 | 
22 | Reads in the baseline experimental data from a specified base directory.
23 | 
24 | <pre class="devsite-click-to-copy prettyprint lang-py tfo-signature-link">
25 | <code>dopamine.colab.utils.load_baselines(
26 |     base_dir, verbose=False
27 | )
28 | </code></pre>
29 | 
30 | <!-- Placeholder for "Used in" -->
31 | <!-- Tabular view -->
32 | 
33 |  <table class="responsive fixed orange">
34 | <colgroup><col width="214px"><col></colgroup>
35 | <tr><th colspan="2"><h2 class="add-link">Args</h2></th></tr>
36 | 
37 | <tr>
38 | <td>
39 | `base_dir`
40 | </td>
41 | <td>
42 | string, base directory where to read data from.
43 | </td>
44 | </tr><tr>
45 | <td>
46 | `verbose`
47 | </td>
48 | <td>
49 | bool, whether to print warning messages.
50 | </td>
51 | </tr>
52 | </table>
53 | 
54 | <!-- Tabular view -->
55 | 
56 |  <table class="responsive fixed orange">
57 | <colgroup><col width="214px"><col></colgroup>
58 | <tr><th colspan="2"><h2 class="add-link">Returns</h2></th></tr>
59 | <tr class="alt">
60 | <td colspan="2">
61 | A dict containing pandas DataFrames for all available agents and games.
62 | </td>
63 | </tr>
64 | 
65 | </table>
66 | 


--------------------------------------------------------------------------------
/docs/api_docs/python/dopamine/colab/utils/summarize_data.md:
--------------------------------------------------------------------------------
 1 | description: Processes log data into a per-iteration summary.
 2 | 
 3 | <div itemscope itemtype="http://developers.google.com/ReferenceObject">
 4 | <meta itemprop="name" content="dopamine.colab.utils.summarize_data" />
 5 | <meta itemprop="path" content="Stable" />
 6 | </div>
 7 | 
 8 | # dopamine.colab.utils.summarize_data
 9 | 
10 | <!-- Insert buttons and diff -->
11 | 
12 | <table class="tfo-notebook-buttons tfo-api nocontent" align="left">
13 | <td>
14 |   <a target="_blank" href="https://github.com/google/dopamine/tree/master/dopamine/colab/utils.py">
15 |     <img src="https://www.tensorflow.org/images/GitHub-Mark-32px.png" />
16 |     View source on GitHub
17 |   </a>
18 | </td>
19 | </table>
20 | 
21 | Processes log data into a per-iteration summary.
22 | 
23 | <pre class="devsite-click-to-copy prettyprint lang-py tfo-signature-link">
24 | <code>dopamine.colab.utils.summarize_data(
25 |     data, summary_keys
26 | )
27 | </code></pre>
28 | 
29 | <!-- Placeholder for "Used in" -->
30 | <!-- Tabular view -->
31 | 
32 |  <table class="responsive fixed orange">
33 | <colgroup><col width="214px"><col></colgroup>
34 | <tr><th colspan="2"><h2 class="add-link">Args</h2></th></tr>
35 | 
36 | <tr>
37 | <td>
38 | `data`
39 | </td>
40 | <td>
41 | Dictionary loaded by load_statistics describing the data. This
42 | dictionary has keys iteration_0, iteration_1, ... describing per-iteration
43 | data.
44 | </td>
45 | </tr><tr>
46 | <td>
47 | `summary_keys`
48 | </td>
49 | <td>
50 | List of per-iteration data to be summarized.
51 | </td>
52 | </tr>
53 | </table>
54 | 
55 | #### Example:
56 | 
57 | data = load_statistics(...) summarize_data(data, ['train_episode_returns',
58 | 'eval_episode_returns'])
59 | 
60 | <!-- Tabular view -->
61 | 
62 |  <table class="responsive fixed orange">
63 | <colgroup><col width="214px"><col></colgroup>
64 | <tr><th colspan="2"><h2 class="add-link">Returns</h2></th></tr>
65 | <tr class="alt">
66 | <td colspan="2">
67 | A dictionary mapping each key in returns_keys to a per-iteration summary.
68 | </td>
69 | </tr>
70 | 
71 | </table>
72 | 


--------------------------------------------------------------------------------
/docs/api_docs/python/dopamine/discrete_domains.md:
--------------------------------------------------------------------------------
 1 | <div itemscope itemtype="http://developers.google.com/ReferenceObject">
 2 | <meta itemprop="name" content="dopamine.discrete_domains" />
 3 | <meta itemprop="path" content="Stable" />
 4 | </div>
 5 | 
 6 | # Module: dopamine.discrete_domains
 7 | 
 8 | <!-- Insert buttons and diff -->
 9 | 
10 | <table class="tfo-notebook-buttons tfo-api nocontent" align="left">
11 | <td>
12 |   <a target="_blank" href="https://github.com/google/dopamine/tree/master/dopamine/discrete_domains/__init__.py">
13 |     <img src="https://www.tensorflow.org/images/GitHub-Mark-32px.png" />
14 |     View source on GitHub
15 |   </a>
16 | </td>
17 | </table>
18 | 
19 | ## Modules
20 | 
21 | [`atari_lib`](../dopamine/discrete_domains/atari_lib.md) module: Atari-specific
22 | utilities including Atari-specific network architectures.
23 | 
24 | [`checkpointer`](../dopamine/discrete_domains/checkpointer.md) module: A
25 | checkpointing mechanism for Dopamine agents.
26 | 
27 | [`gym_lib`](../dopamine/discrete_domains/gym_lib.md) module: Gym-specific
28 | (non-Atari) utilities.
29 | 
30 | [`iteration_statistics`](../dopamine/discrete_domains/iteration_statistics.md)
31 | module: A class for storing iteration-specific metrics.
32 | 
33 | [`logger`](../dopamine/discrete_domains/logger.md) module: A lightweight logging
34 | mechanism for dopamine agents.
35 | 
36 | [`run_experiment`](../dopamine/discrete_domains/run_experiment.md) module:
37 | Module defining classes and helper methods for general agents.
38 | 
39 | [`train`](../dopamine/discrete_domains/train.md) module: The entry point for
40 | running a Dopamine agent.
41 | 


--------------------------------------------------------------------------------
/docs/api_docs/python/dopamine/discrete_domains/atari_lib/AtariPreprocessing.md:
--------------------------------------------------------------------------------
 1 | description: A class implementing image preprocessing for Atari 2600 agents.
 2 | 
 3 | <div itemscope itemtype="http://developers.google.com/ReferenceObject">
 4 | <meta itemprop="name" content="dopamine.discrete_domains.atari_lib.AtariPreprocessing" />
 5 | <meta itemprop="path" content="Stable" />
 6 | </div>
 7 | 
 8 | # dopamine.discrete_domains.atari_lib.AtariPreprocessing
 9 | 
10 | <!-- Insert buttons and diff -->
11 | 
12 | <table class="tfo-notebook-buttons tfo-api nocontent" align="left">
13 | <td>
14 |   <a target="_blank" href="https://github.com/google/dopamine/tree/master/dopamine/discrete_domains/atari_lib.py">
15 |     <img src="https://www.tensorflow.org/images/GitHub-Mark-32px.png" />
16 |     View source on GitHub
17 |   </a>
18 | </td>
19 | </table>
20 | 
21 | A class implementing image preprocessing for Atari 2600 agents.
22 | 
23 | <!-- Placeholder for "Used in" -->
24 | 
25 | Specifically, this provides the following subset from the JAIR paper (Bellemare
26 | et al., 2013) and Nature DQN paper (Mnih et al., 2015):
27 | 
28 | *   Frame skipping (defaults to 4).
29 | *   Terminal signal when a life is lost (off by default).
30 | *   Grayscale and max-pooling of the last two frames.
31 | *   Downsample the screen to a square image (defaults to 84x84).
32 | 
33 | More generally, this class follows the preprocessing guidelines set down in
34 | Machado et al. (2018), "Revisiting the Arcade Learning Environment: Evaluation
35 | Protocols and Open Problems for General Agents".
36 | 


--------------------------------------------------------------------------------
/docs/api_docs/python/dopamine/discrete_domains/checkpointer.md:
--------------------------------------------------------------------------------
 1 | description: A checkpointing mechanism for Dopamine agents.
 2 | 
 3 | <div itemscope itemtype="http://developers.google.com/ReferenceObject">
 4 | <meta itemprop="name" content="dopamine.discrete_domains.checkpointer" />
 5 | <meta itemprop="path" content="Stable" />
 6 | </div>
 7 | 
 8 | # Module: dopamine.discrete_domains.checkpointer
 9 | 
10 | <!-- Insert buttons and diff -->
11 | 
12 | <table class="tfo-notebook-buttons tfo-api nocontent" align="left">
13 | <td>
14 |   <a target="_blank" href="https://github.com/google/dopamine/tree/master/dopamine/discrete_domains/checkpointer.py">
15 |     <img src="https://www.tensorflow.org/images/GitHub-Mark-32px.png" />
16 |     View source on GitHub
17 |   </a>
18 | </td>
19 | </table>
20 | 
21 | A checkpointing mechanism for Dopamine agents.
22 | 
23 | This Checkpointer expects a base directory where checkpoints for different
24 | iterations are stored. Specifically, Checkpointer.save_checkpoint() takes in as
25 | input a dictionary 'data' to be pickled to disk. At each iteration, we write a
26 | file called 'cpkt.#', where # is the iteration number. The Checkpointer also
27 | cleans up old files, maintaining up to the CHECKPOINT_DURATION most recent
28 | iterations.
29 | 
30 | The Checkpointer writes a sentinel file to indicate that checkpointing was
31 | globally successful. This means that all other checkpointing activities (saving
32 | the Tensorflow graph, the replay buffer) should be performed *prior* to calling
33 | Checkpointer.save_checkpoint(). This allows the Checkpointer to detect
34 | incomplete checkpoints.
35 | 
36 | #### Example
37 | 
38 | After running 10 iterations (numbered 0...9) with base_directory='/checkpoint',
39 | the following files will exist: `/checkpoint/cpkt.6 /checkpoint/cpkt.7
40 | /checkpoint/cpkt.8 /checkpoint/cpkt.9 /checkpoint/sentinel_checkpoint_complete.6
41 | /checkpoint/sentinel_checkpoint_complete.7
42 | /checkpoint/sentinel_checkpoint_complete.8
43 | /checkpoint/sentinel_checkpoint_complete.9`
44 | 
45 | ## Classes
46 | 
47 | [`class Checkpointer`](../../dopamine/discrete_domains/checkpointer/Checkpointer.md):
48 | Class for managing checkpoints for Dopamine agents.
49 | 


--------------------------------------------------------------------------------
/docs/api_docs/python/dopamine/discrete_domains/checkpointer/Checkpointer.md:
--------------------------------------------------------------------------------
 1 | description: Class for managing checkpoints for Dopamine agents.
 2 | 
 3 | <div itemscope itemtype="http://developers.google.com/ReferenceObject">
 4 | <meta itemprop="name" content="dopamine.discrete_domains.checkpointer.Checkpointer" />
 5 | <meta itemprop="path" content="Stable" />
 6 | </div>
 7 | 
 8 | # dopamine.discrete_domains.checkpointer.Checkpointer
 9 | 
10 | <!-- Insert buttons and diff -->
11 | 
12 | <table class="tfo-notebook-buttons tfo-api nocontent" align="left">
13 | <td>
14 |   <a target="_blank" href="https://github.com/google/dopamine/tree/master/dopamine/discrete_domains/checkpointer.py">
15 |     <img src="https://www.tensorflow.org/images/GitHub-Mark-32px.png" />
16 |     View source on GitHub
17 |   </a>
18 | </td>
19 | </table>
20 | 
21 | Class for managing checkpoints for Dopamine agents.
22 | 
23 | <!-- Placeholder for "Used in" -->
24 | 


--------------------------------------------------------------------------------
/docs/api_docs/python/dopamine/discrete_domains/gym_lib.md:
--------------------------------------------------------------------------------
 1 | description: Gym-specific (non-Atari) utilities.
 2 | 
 3 | <div itemscope itemtype="http://developers.google.com/ReferenceObject">
 4 | <meta itemprop="name" content="dopamine.discrete_domains.gym_lib" />
 5 | <meta itemprop="path" content="Stable" />
 6 | </div>
 7 | 
 8 | # Module: dopamine.discrete_domains.gym_lib
 9 | 
10 | <!-- Insert buttons and diff -->
11 | 
12 | <table class="tfo-notebook-buttons tfo-api nocontent" align="left">
13 | <td>
14 |   <a target="_blank" href="https://github.com/google/dopamine/tree/master/dopamine/discrete_domains/gym_lib.py">
15 |     <img src="https://www.tensorflow.org/images/GitHub-Mark-32px.png" />
16 |     View source on GitHub
17 |   </a>
18 | </td>
19 | </table>
20 | 
21 | Gym-specific (non-Atari) utilities.
22 | 
23 | Some network specifications specific to certain Gym environments are provided
24 | here.
25 | 
26 | Includes a wrapper class around Gym environments. This class makes general Gym
27 | environments conformant with the API Dopamine is expecting.
28 | 
29 | ## Classes
30 | 
31 | [`class GymPreprocessing`](../../dopamine/discrete_domains/gym_lib/GymPreprocessing.md):
32 | A Wrapper class around Gym environments.
33 | 
34 | ## Functions
35 | 
36 | [`create_gym_environment(...)`](../../dopamine/discrete_domains/gym_lib/create_gym_environment.md):
37 | Wraps a Gym environment with some basic preprocessing.
38 | 


--------------------------------------------------------------------------------
/docs/api_docs/python/dopamine/discrete_domains/gym_lib/GymPreprocessing.md:
--------------------------------------------------------------------------------
 1 | description: A Wrapper class around Gym environments.
 2 | 
 3 | <div itemscope itemtype="http://developers.google.com/ReferenceObject">
 4 | <meta itemprop="name" content="dopamine.discrete_domains.gym_lib.GymPreprocessing" />
 5 | <meta itemprop="path" content="Stable" />
 6 | </div>
 7 | 
 8 | # dopamine.discrete_domains.gym_lib.GymPreprocessing
 9 | 
10 | <!-- Insert buttons and diff -->
11 | 
12 | <table class="tfo-notebook-buttons tfo-api nocontent" align="left">
13 | <td>
14 |   <a target="_blank" href="https://github.com/google/dopamine/tree/master/dopamine/discrete_domains/gym_lib.py">
15 |     <img src="https://www.tensorflow.org/images/GitHub-Mark-32px.png" />
16 |     View source on GitHub
17 |   </a>
18 | </td>
19 | </table>
20 | 
21 | A Wrapper class around Gym environments.
22 | 
23 | <!-- Placeholder for "Used in" -->
24 | 


--------------------------------------------------------------------------------
/docs/api_docs/python/dopamine/discrete_domains/gym_lib/create_gym_environment.md:
--------------------------------------------------------------------------------
 1 | description: Wraps a Gym environment with some basic preprocessing.
 2 | 
 3 | <div itemscope itemtype="http://developers.google.com/ReferenceObject">
 4 | <meta itemprop="name" content="dopamine.discrete_domains.gym_lib.create_gym_environment" />
 5 | <meta itemprop="path" content="Stable" />
 6 | </div>
 7 | 
 8 | # dopamine.discrete_domains.gym_lib.create_gym_environment
 9 | 
10 | <!-- Insert buttons and diff -->
11 | 
12 | <table class="tfo-notebook-buttons tfo-api nocontent" align="left">
13 | <td>
14 |   <a target="_blank" href="https://github.com/google/dopamine/tree/master/dopamine/discrete_domains/gym_lib.py">
15 |     <img src="https://www.tensorflow.org/images/GitHub-Mark-32px.png" />
16 |     View source on GitHub
17 |   </a>
18 | </td>
19 | </table>
20 | 
21 | Wraps a Gym environment with some basic preprocessing.
22 | 
23 | <pre class="devsite-click-to-copy prettyprint lang-py tfo-signature-link">
24 | <code>dopamine.discrete_domains.gym_lib.create_gym_environment(
25 |     environment_name=None, version='v0'
26 | )
27 | </code></pre>
28 | 
29 | <!-- Placeholder for "Used in" -->
30 | <!-- Tabular view -->
31 | 
32 |  <table class="responsive fixed orange">
33 | <colgroup><col width="214px"><col></colgroup>
34 | <tr><th colspan="2"><h2 class="add-link">Args</h2></th></tr>
35 | 
36 | <tr>
37 | <td>
38 | `environment_name`
39 | </td>
40 | <td>
41 | str, the name of the environment to run.
42 | </td>
43 | </tr><tr>
44 | <td>
45 | `version`
46 | </td>
47 | <td>
48 | str, version of the environment to run.
49 | </td>
50 | </tr>
51 | </table>
52 | 
53 | <!-- Tabular view -->
54 | 
55 |  <table class="responsive fixed orange">
56 | <colgroup><col width="214px"><col></colgroup>
57 | <tr><th colspan="2"><h2 class="add-link">Returns</h2></th></tr>
58 | <tr class="alt">
59 | <td colspan="2">
60 | A Gym environment with some standard preprocessing.
61 | </td>
62 | </tr>
63 | 
64 | </table>
65 | 


--------------------------------------------------------------------------------
/docs/api_docs/python/dopamine/discrete_domains/iteration_statistics.md:
--------------------------------------------------------------------------------
 1 | description: A class for storing iteration-specific metrics.
 2 | 
 3 | <div itemscope itemtype="http://developers.google.com/ReferenceObject">
 4 | <meta itemprop="name" content="dopamine.discrete_domains.iteration_statistics" />
 5 | <meta itemprop="path" content="Stable" />
 6 | </div>
 7 | 
 8 | # Module: dopamine.discrete_domains.iteration_statistics
 9 | 
10 | <!-- Insert buttons and diff -->
11 | 
12 | <table class="tfo-notebook-buttons tfo-api nocontent" align="left">
13 | <td>
14 |   <a target="_blank" href="https://github.com/google/dopamine/tree/master/dopamine/discrete_domains/iteration_statistics.py">
15 |     <img src="https://www.tensorflow.org/images/GitHub-Mark-32px.png" />
16 |     View source on GitHub
17 |   </a>
18 | </td>
19 | </table>
20 | 
21 | A class for storing iteration-specific metrics.
22 | 
23 | ## Classes
24 | 
25 | [`class IterationStatistics`](../../dopamine/discrete_domains/iteration_statistics/IterationStatistics.md):
26 | A class for storing iteration-specific metrics.
27 | 


--------------------------------------------------------------------------------
/docs/api_docs/python/dopamine/discrete_domains/iteration_statistics/IterationStatistics.md:
--------------------------------------------------------------------------------
 1 | description: A class for storing iteration-specific metrics.
 2 | 
 3 | <div itemscope itemtype="http://developers.google.com/ReferenceObject">
 4 | <meta itemprop="name" content="dopamine.discrete_domains.iteration_statistics.IterationStatistics" />
 5 | <meta itemprop="path" content="Stable" />
 6 | </div>
 7 | 
 8 | # dopamine.discrete_domains.iteration_statistics.IterationStatistics
 9 | 
10 | <!-- Insert buttons and diff -->
11 | 
12 | <table class="tfo-notebook-buttons tfo-api nocontent" align="left">
13 | <td>
14 |   <a target="_blank" href="https://github.com/google/dopamine/tree/master/dopamine/discrete_domains/iteration_statistics.py">
15 |     <img src="https://www.tensorflow.org/images/GitHub-Mark-32px.png" />
16 |     View source on GitHub
17 |   </a>
18 | </td>
19 | </table>
20 | 
21 | A class for storing iteration-specific metrics.
22 | 
23 | <!-- Placeholder for "Used in" -->
24 | 
25 | The internal format is as follows: we maintain a mapping from keys to lists.
26 | Each list contains all the values corresponding to the given key.
27 | 
28 | For example, self.data_lists['train_episode_returns'] might contain the
29 | per-episode returns achieved during this iteration.
30 | 
31 | <!-- Tabular view -->
32 | 
33 |  <table class="responsive fixed orange">
34 | <colgroup><col width="214px"><col></colgroup>
35 | <tr><th colspan="2"><h2 class="add-link">Attributes</h2></th></tr>
36 | 
37 | <tr>
38 | <td>
39 | `data_lists`
40 | </td>
41 | <td>
42 | dict mapping each metric_name (str) to a list of said metric
43 | across episodes.
44 | </td>
45 | </tr>
46 | </table>
47 | 


--------------------------------------------------------------------------------
/docs/api_docs/python/dopamine/discrete_domains/logger.md:
--------------------------------------------------------------------------------
 1 | description: A lightweight logging mechanism for dopamine agents.
 2 | 
 3 | <div itemscope itemtype="http://developers.google.com/ReferenceObject">
 4 | <meta itemprop="name" content="dopamine.discrete_domains.logger" />
 5 | <meta itemprop="path" content="Stable" />
 6 | </div>
 7 | 
 8 | # Module: dopamine.discrete_domains.logger
 9 | 
10 | <!-- Insert buttons and diff -->
11 | 
12 | <table class="tfo-notebook-buttons tfo-api nocontent" align="left">
13 | <td>
14 |   <a target="_blank" href="https://github.com/google/dopamine/tree/master/dopamine/discrete_domains/logger.py">
15 |     <img src="https://www.tensorflow.org/images/GitHub-Mark-32px.png" />
16 |     View source on GitHub
17 |   </a>
18 | </td>
19 | </table>
20 | 
21 | A lightweight logging mechanism for dopamine agents.
22 | 
23 | ## Classes
24 | 
25 | [`class Logger`](../../dopamine/discrete_domains/logger/Logger.md): Class for
26 | maintaining a dictionary of data to log.
27 | 


--------------------------------------------------------------------------------
/docs/api_docs/python/dopamine/discrete_domains/logger/Logger.md:
--------------------------------------------------------------------------------
 1 | description: Class for maintaining a dictionary of data to log.
 2 | 
 3 | <div itemscope itemtype="http://developers.google.com/ReferenceObject">
 4 | <meta itemprop="name" content="dopamine.discrete_domains.logger.Logger" />
 5 | <meta itemprop="path" content="Stable" />
 6 | </div>
 7 | 
 8 | # dopamine.discrete_domains.logger.Logger
 9 | 
10 | <!-- Insert buttons and diff -->
11 | 
12 | <table class="tfo-notebook-buttons tfo-api nocontent" align="left">
13 | <td>
14 |   <a target="_blank" href="https://github.com/google/dopamine/tree/master/dopamine/discrete_domains/logger.py">
15 |     <img src="https://www.tensorflow.org/images/GitHub-Mark-32px.png" />
16 |     View source on GitHub
17 |   </a>
18 | </td>
19 | </table>
20 | 
21 | Class for maintaining a dictionary of data to log.
22 | 
23 | <!-- Placeholder for "Used in" -->
24 | 


--------------------------------------------------------------------------------
/docs/api_docs/python/dopamine/discrete_domains/run_experiment.md:
--------------------------------------------------------------------------------
 1 | description: Module defining classes and helper methods for general agents.
 2 | 
 3 | <div itemscope itemtype="http://developers.google.com/ReferenceObject">
 4 | <meta itemprop="name" content="dopamine.discrete_domains.run_experiment" />
 5 | <meta itemprop="path" content="Stable" />
 6 | </div>
 7 | 
 8 | # Module: dopamine.discrete_domains.run_experiment
 9 | 
10 | <!-- Insert buttons and diff -->
11 | 
12 | <table class="tfo-notebook-buttons tfo-api nocontent" align="left">
13 | <td>
14 |   <a target="_blank" href="https://github.com/google/dopamine/tree/master/dopamine/discrete_domains/run_experiment.py">
15 |     <img src="https://www.tensorflow.org/images/GitHub-Mark-32px.png" />
16 |     View source on GitHub
17 |   </a>
18 | </td>
19 | </table>
20 | 
21 | Module defining classes and helper methods for general agents.
22 | 
23 | ## Classes
24 | 
25 | [`class Runner`](../../dopamine/discrete_domains/run_experiment/Runner.md):
26 | Object that handles running Dopamine experiments.
27 | 
28 | [`class TrainRunner`](../../dopamine/discrete_domains/run_experiment/TrainRunner.md):
29 | Object that handles running experiments.
30 | 
31 | ## Functions
32 | 
33 | [`create_agent(...)`](../../dopamine/discrete_domains/run_experiment/create_agent.md):
34 | Creates an agent.
35 | 
36 | [`create_runner(...)`](../../dopamine/discrete_domains/run_experiment/create_runner.md):
37 | Creates an experiment Runner.
38 | 


--------------------------------------------------------------------------------
/docs/api_docs/python/dopamine/discrete_domains/run_experiment/Runner.md:
--------------------------------------------------------------------------------
 1 | description: Object that handles running Dopamine experiments.
 2 | 
 3 | <div itemscope itemtype="http://developers.google.com/ReferenceObject">
 4 | <meta itemprop="name" content="dopamine.discrete_domains.run_experiment.Runner" />
 5 | <meta itemprop="path" content="Stable" />
 6 | </div>
 7 | 
 8 | # dopamine.discrete_domains.run_experiment.Runner
 9 | 
10 | <!-- Insert buttons and diff -->
11 | 
12 | <table class="tfo-notebook-buttons tfo-api nocontent" align="left">
13 | <td>
14 |   <a target="_blank" href="https://github.com/google/dopamine/tree/master/dopamine/discrete_domains/run_experiment.py">
15 |     <img src="https://www.tensorflow.org/images/GitHub-Mark-32px.png" />
16 |     View source on GitHub
17 |   </a>
18 | </td>
19 | </table>
20 | 
21 | Object that handles running Dopamine experiments.
22 | 
23 | <!-- Placeholder for "Used in" -->
24 | 
25 | Here we use the term 'experiment' to mean simulating interactions between the
26 | agent and the environment and reporting some statistics pertaining to these
27 | interactions.
28 | 
29 | A simple scenario to train a DQN agent is as follows:
30 | 
31 | ```python
32 | import dopamine.discrete_domains.atari_lib
33 | base_dir = '/tmp/simple_example'
34 | def create_agent(sess, environment):
35 |   return dqn_agent.DQNAgent(sess, num_actions=environment.action_space.n)
36 | runner = Runner(base_dir, create_agent, atari_lib.create_atari_environment)
37 | runner.run()
38 | ```
39 | 


--------------------------------------------------------------------------------
/docs/api_docs/python/dopamine/discrete_domains/run_experiment/TrainRunner.md:
--------------------------------------------------------------------------------
 1 | description: Object that handles running experiments.
 2 | 
 3 | <div itemscope itemtype="http://developers.google.com/ReferenceObject">
 4 | <meta itemprop="name" content="dopamine.discrete_domains.run_experiment.TrainRunner" />
 5 | <meta itemprop="path" content="Stable" />
 6 | </div>
 7 | 
 8 | # dopamine.discrete_domains.run_experiment.TrainRunner
 9 | 
10 | <!-- Insert buttons and diff -->
11 | 
12 | <table class="tfo-notebook-buttons tfo-api nocontent" align="left">
13 | <td>
14 |   <a target="_blank" href="https://github.com/google/dopamine/tree/master/dopamine/discrete_domains/run_experiment.py">
15 |     <img src="https://www.tensorflow.org/images/GitHub-Mark-32px.png" />
16 |     View source on GitHub
17 |   </a>
18 | </td>
19 | </table>
20 | 
21 | Object that handles running experiments.
22 | 
23 | Inherits From:
24 | [`Runner`](../../../dopamine/discrete_domains/run_experiment/Runner.md)
25 | 
26 | <!-- Placeholder for "Used in" -->
27 | 
28 | The `TrainRunner` differs from the base `Runner` class in that it does not the
29 | evaluation phase. Checkpointing and logging for the train phase are preserved as
30 | before.
31 | 


--------------------------------------------------------------------------------
/docs/api_docs/python/dopamine/discrete_domains/run_experiment/create_runner.md:
--------------------------------------------------------------------------------
 1 | description: Creates an experiment Runner.
 2 | 
 3 | <div itemscope itemtype="http://developers.google.com/ReferenceObject">
 4 | <meta itemprop="name" content="dopamine.discrete_domains.run_experiment.create_runner" />
 5 | <meta itemprop="path" content="Stable" />
 6 | </div>
 7 | 
 8 | # dopamine.discrete_domains.run_experiment.create_runner
 9 | 
10 | <!-- Insert buttons and diff -->
11 | 
12 | <table class="tfo-notebook-buttons tfo-api nocontent" align="left">
13 | <td>
14 |   <a target="_blank" href="https://github.com/google/dopamine/tree/master/dopamine/discrete_domains/run_experiment.py">
15 |     <img src="https://www.tensorflow.org/images/GitHub-Mark-32px.png" />
16 |     View source on GitHub
17 |   </a>
18 | </td>
19 | </table>
20 | 
21 | Creates an experiment Runner.
22 | 
23 | <pre class="devsite-click-to-copy prettyprint lang-py tfo-signature-link">
24 | <code>dopamine.discrete_domains.run_experiment.create_runner(
25 |     base_dir, schedule='continuous_train_and_eval'
26 | )
27 | </code></pre>
28 | 
29 | <!-- Placeholder for "Used in" -->
30 | <!-- Tabular view -->
31 | 
32 |  <table class="responsive fixed orange">
33 | <colgroup><col width="214px"><col></colgroup>
34 | <tr><th colspan="2"><h2 class="add-link">Args</h2></th></tr>
35 | 
36 | <tr>
37 | <td>
38 | `base_dir`
39 | </td>
40 | <td>
41 | str, base directory for hosting all subdirectories.
42 | </td>
43 | </tr><tr>
44 | <td>
45 | `schedule`
46 | </td>
47 | <td>
48 | string, which type of Runner to use.
49 | </td>
50 | </tr>
51 | </table>
52 | 
53 | <!-- Tabular view -->
54 | 
55 |  <table class="responsive fixed orange">
56 | <colgroup><col width="214px"><col></colgroup>
57 | <tr><th colspan="2"><h2 class="add-link">Returns</h2></th></tr>
58 | 
59 | <tr>
60 | <td>
61 | `runner`
62 | </td>
63 | <td>
64 | A `Runner` like object.
65 | </td>
66 | </tr>
67 | </table>
68 | 
69 | <!-- Tabular view -->
70 | 
71 |  <table class="responsive fixed orange">
72 | <colgroup><col width="214px"><col></colgroup>
73 | <tr><th colspan="2"><h2 class="add-link">Raises</h2></th></tr>
74 | 
75 | <tr>
76 | <td>
77 | `ValueError`
78 | </td>
79 | <td>
80 | When an unknown schedule is encountered.
81 | </td>
82 | </tr>
83 | </table>
84 | 


--------------------------------------------------------------------------------
/docs/api_docs/python/dopamine/discrete_domains/train.md:
--------------------------------------------------------------------------------
 1 | description: The entry point for running a Dopamine agent.
 2 | 
 3 | <div itemscope itemtype="http://developers.google.com/ReferenceObject">
 4 | <meta itemprop="name" content="dopamine.discrete_domains.train" />
 5 | <meta itemprop="path" content="Stable" />
 6 | </div>
 7 | 
 8 | # Module: dopamine.discrete_domains.train
 9 | 
10 | <!-- Insert buttons and diff -->
11 | 
12 | <table class="tfo-notebook-buttons tfo-api nocontent" align="left">
13 | <td>
14 |   <a target="_blank" href="https://github.com/google/dopamine/tree/master/dopamine/discrete_domains/train.py">
15 |     <img src="https://www.tensorflow.org/images/GitHub-Mark-32px.png" />
16 |     View source on GitHub
17 |   </a>
18 | </td>
19 | </table>
20 | 
21 | The entry point for running a Dopamine agent.
22 | 
23 | 


--------------------------------------------------------------------------------
/docs/api_docs/python/dopamine/jax.md:
--------------------------------------------------------------------------------
 1 | <div itemscope itemtype="http://developers.google.com/ReferenceObject">
 2 | <meta itemprop="name" content="dopamine.jax" />
 3 | <meta itemprop="path" content="Stable" />
 4 | </div>
 5 | 
 6 | # Module: dopamine.jax
 7 | 
 8 | <!-- Insert buttons and diff -->
 9 | 
10 | <table class="tfo-notebook-buttons tfo-api nocontent" align="left">
11 | <td>
12 |   <a target="_blank" href="https://github.com/google/dopamine/tree/master/dopamine/jax/__init__.py">
13 |     <img src="https://www.tensorflow.org/images/GitHub-Mark-32px.png" />
14 |     View source on GitHub
15 |   </a>
16 | </td>
17 | </table>
18 | 
19 | ## Modules
20 | 
21 | [`agents`](../dopamine/jax/agents.md) module
22 | 
23 | [`networks`](../dopamine/jax/networks.md) module: Various networks for Jax
24 | Dopamine agents.
25 | 


--------------------------------------------------------------------------------
/docs/api_docs/python/dopamine/jax/agents.md:
--------------------------------------------------------------------------------
 1 | <div itemscope itemtype="http://developers.google.com/ReferenceObject">
 2 | <meta itemprop="name" content="dopamine.jax.agents" />
 3 | <meta itemprop="path" content="Stable" />
 4 | </div>
 5 | 
 6 | # Module: dopamine.jax.agents
 7 | 
 8 | <!-- Insert buttons and diff -->
 9 | 
10 | <table class="tfo-notebook-buttons tfo-api nocontent" align="left">
11 | <td>
12 |   <a target="_blank" href="https://github.com/google/dopamine/tree/master/dopamine/jax/agents/__init__.py">
13 |     <img src="https://www.tensorflow.org/images/GitHub-Mark-32px.png" />
14 |     View source on GitHub
15 |   </a>
16 | </td>
17 | </table>
18 | 
19 | ## Modules
20 | 
21 | [`dqn`](../../dopamine/jax/agents/dqn.md) module
22 | 
23 | [`implicit_quantile`](../../dopamine/jax/agents/implicit_quantile.md) module
24 | 
25 | [`quantile`](../../dopamine/jax/agents/quantile.md) module
26 | 
27 | [`rainbow`](../../dopamine/jax/agents/rainbow.md) module
28 | 


--------------------------------------------------------------------------------
/docs/api_docs/python/dopamine/jax/agents/dqn.md:
--------------------------------------------------------------------------------
 1 | <div itemscope itemtype="http://developers.google.com/ReferenceObject">
 2 | <meta itemprop="name" content="dopamine.jax.agents.dqn" />
 3 | <meta itemprop="path" content="Stable" />
 4 | </div>
 5 | 
 6 | # Module: dopamine.jax.agents.dqn
 7 | 
 8 | <!-- Insert buttons and diff -->
 9 | 
10 | <table class="tfo-notebook-buttons tfo-api nocontent" align="left">
11 | <td>
12 |   <a target="_blank" href="https://github.com/google/dopamine/tree/master/dopamine/jax/agents/dqn/__init__.py">
13 |     <img src="https://www.tensorflow.org/images/GitHub-Mark-32px.png" />
14 |     View source on GitHub
15 |   </a>
16 | </td>
17 | </table>
18 | 
19 | ## Modules
20 | 
21 | [`dqn_agent`](../../../dopamine/jax/agents/dqn/dqn_agent.md) module: Compact
22 | implementation of a DQN agent in JAx.
23 | 


--------------------------------------------------------------------------------
/docs/api_docs/python/dopamine/jax/agents/dqn/dqn_agent.md:
--------------------------------------------------------------------------------
 1 | description: Compact implementation of a DQN agent in JAx.
 2 | 
 3 | <div itemscope itemtype="http://developers.google.com/ReferenceObject">
 4 | <meta itemprop="name" content="dopamine.jax.agents.dqn.dqn_agent" />
 5 | <meta itemprop="path" content="Stable" />
 6 | </div>
 7 | 
 8 | # Module: dopamine.jax.agents.dqn.dqn_agent
 9 | 
10 | <!-- Insert buttons and diff -->
11 | 
12 | <table class="tfo-notebook-buttons tfo-api nocontent" align="left">
13 | <td>
14 |   <a target="_blank" href="https://github.com/google/dopamine/tree/master/dopamine/jax/agents/dqn/dqn_agent.py">
15 |     <img src="https://www.tensorflow.org/images/GitHub-Mark-32px.png" />
16 |     View source on GitHub
17 |   </a>
18 | </td>
19 | </table>
20 | 
21 | Compact implementation of a DQN agent in JAx.
22 | 


--------------------------------------------------------------------------------
/docs/api_docs/python/dopamine/jax/agents/implicit_quantile.md:
--------------------------------------------------------------------------------
 1 | <div itemscope itemtype="http://developers.google.com/ReferenceObject">
 2 | <meta itemprop="name" content="dopamine.jax.agents.implicit_quantile" />
 3 | <meta itemprop="path" content="Stable" />
 4 | </div>
 5 | 
 6 | # Module: dopamine.jax.agents.implicit_quantile
 7 | 
 8 | <!-- Insert buttons and diff -->
 9 | 
10 | <table class="tfo-notebook-buttons tfo-api nocontent" align="left">
11 | <td>
12 |   <a target="_blank" href="https://github.com/google/dopamine/tree/master/dopamine/jax/agents/implicit_quantile/__init__.py">
13 |     <img src="https://www.tensorflow.org/images/GitHub-Mark-32px.png" />
14 |     View source on GitHub
15 |   </a>
16 | </td>
17 | </table>
18 | 
19 | ## Modules
20 | 
21 | [`implicit_quantile_agent`](../../../dopamine/jax/agents/implicit_quantile/implicit_quantile_agent.md)
22 | module: The implicit quantile networks (IQN) agent.
23 | 


--------------------------------------------------------------------------------
/docs/api_docs/python/dopamine/jax/agents/implicit_quantile/implicit_quantile_agent.md:
--------------------------------------------------------------------------------
 1 | description: The implicit quantile networks (IQN) agent.
 2 | 
 3 | <div itemscope itemtype="http://developers.google.com/ReferenceObject">
 4 | <meta itemprop="name" content="dopamine.jax.agents.implicit_quantile.implicit_quantile_agent" />
 5 | <meta itemprop="path" content="Stable" />
 6 | </div>
 7 | 
 8 | # Module: dopamine.jax.agents.implicit_quantile.implicit_quantile_agent
 9 | 
10 | <!-- Insert buttons and diff -->
11 | 
12 | <table class="tfo-notebook-buttons tfo-api nocontent" align="left">
13 | <td>
14 |   <a target="_blank" href="https://github.com/google/dopamine/tree/master/dopamine/jax/agents/implicit_quantile/implicit_quantile_agent.py">
15 |     <img src="https://www.tensorflow.org/images/GitHub-Mark-32px.png" />
16 |     View source on GitHub
17 |   </a>
18 | </td>
19 | </table>
20 | 
21 | The implicit quantile networks (IQN) agent.
22 | 
23 | The agent follows the description given in "Implicit Quantile Networks for
24 | Distributional RL" (Dabney et. al, 2018).
25 | 


--------------------------------------------------------------------------------
/docs/api_docs/python/dopamine/jax/agents/quantile.md:
--------------------------------------------------------------------------------
 1 | <div itemscope itemtype="http://developers.google.com/ReferenceObject">
 2 | <meta itemprop="name" content="dopamine.jax.agents.quantile" />
 3 | <meta itemprop="path" content="Stable" />
 4 | </div>
 5 | 
 6 | # Module: dopamine.jax.agents.quantile
 7 | 
 8 | <!-- Insert buttons and diff -->
 9 | 
10 | <table class="tfo-notebook-buttons tfo-api nocontent" align="left">
11 | <td>
12 |   <a target="_blank" href="https://github.com/google/dopamine/tree/master/dopamine/jax/agents/quantile/__init__.py">
13 |     <img src="https://www.tensorflow.org/images/GitHub-Mark-32px.png" />
14 |     View source on GitHub
15 |   </a>
16 | </td>
17 | </table>
18 | 
19 | ## Modules
20 | 
21 | [`quantile_agent`](../../../dopamine/jax/agents/quantile/quantile_agent.md)
22 | module: An extension of Rainbow to perform quantile regression.
23 | 


--------------------------------------------------------------------------------
/docs/api_docs/python/dopamine/jax/agents/quantile/quantile_agent.md:
--------------------------------------------------------------------------------
 1 | description: An extension of Rainbow to perform quantile regression.
 2 | 
 3 | <div itemscope itemtype="http://developers.google.com/ReferenceObject">
 4 | <meta itemprop="name" content="dopamine.jax.agents.quantile.quantile_agent" />
 5 | <meta itemprop="path" content="Stable" />
 6 | </div>
 7 | 
 8 | # Module: dopamine.jax.agents.quantile.quantile_agent
 9 | 
10 | <!-- Insert buttons and diff -->
11 | 
12 | <table class="tfo-notebook-buttons tfo-api nocontent" align="left">
13 | <td>
14 |   <a target="_blank" href="https://github.com/google/dopamine/tree/master/dopamine/jax/agents/quantile/quantile_agent.py">
15 |     <img src="https://www.tensorflow.org/images/GitHub-Mark-32px.png" />
16 |     View source on GitHub
17 |   </a>
18 | </td>
19 | </table>
20 | 
21 | An extension of Rainbow to perform quantile regression.
22 | 
23 | This loss is computed as in "Distributional Reinforcement Learning with Quantile
24 | Regression" - Dabney et. al, 2017"
25 | 


--------------------------------------------------------------------------------
/docs/api_docs/python/dopamine/jax/agents/rainbow.md:
--------------------------------------------------------------------------------
 1 | <div itemscope itemtype="http://developers.google.com/ReferenceObject">
 2 | <meta itemprop="name" content="dopamine.jax.agents.rainbow" />
 3 | <meta itemprop="path" content="Stable" />
 4 | </div>
 5 | 
 6 | # Module: dopamine.jax.agents.rainbow
 7 | 
 8 | <!-- Insert buttons and diff -->
 9 | 
10 | <table class="tfo-notebook-buttons tfo-api nocontent" align="left">
11 | <td>
12 |   <a target="_blank" href="https://github.com/google/dopamine/tree/master/dopamine/jax/agents/rainbow/__init__.py">
13 |     <img src="https://www.tensorflow.org/images/GitHub-Mark-32px.png" />
14 |     View source on GitHub
15 |   </a>
16 | </td>
17 | </table>
18 | 
19 | ## Modules
20 | 
21 | [`rainbow_agent`](../../../dopamine/jax/agents/rainbow/rainbow_agent.md) module:
22 | Compact implementation of a simplified Rainbow agent in Jax.
23 | 


--------------------------------------------------------------------------------
/docs/api_docs/python/dopamine/jax/agents/rainbow/rainbow_agent.md:
--------------------------------------------------------------------------------
 1 | description: Compact implementation of a simplified Rainbow agent in Jax.
 2 | 
 3 | <div itemscope itemtype="http://developers.google.com/ReferenceObject">
 4 | <meta itemprop="name" content="dopamine.jax.agents.rainbow.rainbow_agent" />
 5 | <meta itemprop="path" content="Stable" />
 6 | </div>
 7 | 
 8 | # Module: dopamine.jax.agents.rainbow.rainbow_agent
 9 | 
10 | <!-- Insert buttons and diff -->
11 | 
12 | <table class="tfo-notebook-buttons tfo-api nocontent" align="left">
13 | <td>
14 |   <a target="_blank" href="https://github.com/google/dopamine/tree/master/dopamine/jax/agents/rainbow/rainbow_agent.py">
15 |     <img src="https://www.tensorflow.org/images/GitHub-Mark-32px.png" />
16 |     View source on GitHub
17 |   </a>
18 | </td>
19 | </table>
20 | 
21 | Compact implementation of a simplified Rainbow agent in Jax.
22 | 
23 | Specifically, we implement the following components from Rainbow:
24 | 
25 | *   n-step updates;
26 | *   prioritized replay; and
27 | *   distributional RL.
28 | 
29 | These three components were found to significantly impact the performance of the
30 | Atari game-playing agent.
31 | 
32 | Furthermore, our implementation does away with some minor hyperparameter
33 | choices. Specifically, we
34 | 
35 | *   keep the beta exponent fixed at beta=0.5, rather than increase it linearly;
36 | *   remove the alpha parameter, which was set to alpha=0.5 throughout the paper.
37 | 
38 | Details in "Rainbow: Combining Improvements in Deep Reinforcement Learning" by
39 | Hessel et al. (2018).
40 | 
41 | ## Functions
42 | 
43 | [`project_distribution(...)`](../../../../dopamine/jax/agents/rainbow/rainbow_agent/project_distribution.md):
44 | Projects a batch of (support, weights) onto target_support.
45 | 


--------------------------------------------------------------------------------
/docs/api_docs/python/dopamine/jax/networks.md:
--------------------------------------------------------------------------------
 1 | description: Various networks for Jax Dopamine agents.
 2 | 
 3 | <div itemscope itemtype="http://developers.google.com/ReferenceObject">
 4 | <meta itemprop="name" content="dopamine.jax.networks" />
 5 | <meta itemprop="path" content="Stable" />
 6 | </div>
 7 | 
 8 | # Module: dopamine.jax.networks
 9 | 
10 | <!-- Insert buttons and diff -->
11 | 
12 | <table class="tfo-notebook-buttons tfo-api nocontent" align="left">
13 | <td>
14 |   <a target="_blank" href="https://github.com/google/dopamine/tree/master/dopamine/jax/networks.py">
15 |     <img src="https://www.tensorflow.org/images/GitHub-Mark-32px.png" />
16 |     View source on GitHub
17 |   </a>
18 | </td>
19 | </table>
20 | 
21 | Various networks for Jax Dopamine agents.
22 | 


--------------------------------------------------------------------------------
/docs/api_docs/python/dopamine/replay_memory.md:
--------------------------------------------------------------------------------
 1 | <div itemscope itemtype="http://developers.google.com/ReferenceObject">
 2 | <meta itemprop="name" content="dopamine.replay_memory" />
 3 | <meta itemprop="path" content="Stable" />
 4 | </div>
 5 | 
 6 | # Module: dopamine.replay_memory
 7 | 
 8 | <!-- Insert buttons and diff -->
 9 | 
10 | <table class="tfo-notebook-buttons tfo-api nocontent" align="left">
11 | <td>
12 |   <a target="_blank" href="https://github.com/google/dopamine/tree/master/dopamine/replay_memory/__init__.py">
13 |     <img src="https://www.tensorflow.org/images/GitHub-Mark-32px.png" />
14 |     View source on GitHub
15 |   </a>
16 | </td>
17 | </table>
18 | 
19 | ## Modules
20 | 
21 | [`circular_replay_buffer`](../dopamine/replay_memory/circular_replay_buffer.md)
22 | module: The standard DQN replay memory.
23 | 
24 | [`prioritized_replay_buffer`](../dopamine/replay_memory/prioritized_replay_buffer.md)
25 | module: An implementation of Prioritized Experience Replay (PER).
26 | 
27 | [`sum_tree`](../dopamine/replay_memory/sum_tree.md) module: A sum tree data
28 | structure.
29 | 


--------------------------------------------------------------------------------
/docs/api_docs/python/dopamine/replay_memory/circular_replay_buffer.md:
--------------------------------------------------------------------------------
 1 | description: The standard DQN replay memory.
 2 | 
 3 | <div itemscope itemtype="http://developers.google.com/ReferenceObject">
 4 | <meta itemprop="name" content="dopamine.replay_memory.circular_replay_buffer" />
 5 | <meta itemprop="path" content="Stable" />
 6 | </div>
 7 | 
 8 | # Module: dopamine.replay_memory.circular_replay_buffer
 9 | 
10 | <!-- Insert buttons and diff -->
11 | 
12 | <table class="tfo-notebook-buttons tfo-api nocontent" align="left">
13 | <td>
14 |   <a target="_blank" href="https://github.com/google/dopamine/tree/master/dopamine/replay_memory/circular_replay_buffer.py">
15 |     <img src="https://www.tensorflow.org/images/GitHub-Mark-32px.png" />
16 |     View source on GitHub
17 |   </a>
18 | </td>
19 | </table>
20 | 
21 | The standard DQN replay memory.
22 | 
23 | This implementation is an out-of-graph replay memory + in-graph wrapper. It
24 | supports vanilla n-step updates of the form typically found in the literature,
25 | i.e. where rewards are accumulated for n steps and the intermediate trajectory
26 | is not exposed to the agent. This does not allow, for example, performing
27 | off-policy corrections.
28 | 
29 | ## Classes
30 | 
31 | [`class OutOfGraphReplayBuffer`](../../dopamine/replay_memory/circular_replay_buffer/OutOfGraphReplayBuffer.md):
32 | A simple out-of-graph Replay Buffer.
33 | 
34 | [`class WrappedReplayBuffer`](../../dopamine/replay_memory/circular_replay_buffer/WrappedReplayBuffer.md):
35 | Wrapper of OutOfGraphReplayBuffer with an in graph sampling mechanism.
36 | 


--------------------------------------------------------------------------------
/docs/api_docs/python/dopamine/replay_memory/circular_replay_buffer/OutOfGraphReplayBuffer.md:
--------------------------------------------------------------------------------
 1 | description: A simple out-of-graph Replay Buffer.
 2 | 
 3 | <div itemscope itemtype="http://developers.google.com/ReferenceObject">
 4 | <meta itemprop="name" content="dopamine.replay_memory.circular_replay_buffer.OutOfGraphReplayBuffer" />
 5 | <meta itemprop="path" content="Stable" />
 6 | </div>
 7 | 
 8 | # dopamine.replay_memory.circular_replay_buffer.OutOfGraphReplayBuffer
 9 | 
10 | <!-- Insert buttons and diff -->
11 | 
12 | <table class="tfo-notebook-buttons tfo-api nocontent" align="left">
13 | <td>
14 |   <a target="_blank" href="https://github.com/google/dopamine/tree/master/dopamine/replay_memory/circular_replay_buffer.py">
15 |     <img src="https://www.tensorflow.org/images/GitHub-Mark-32px.png" />
16 |     View source on GitHub
17 |   </a>
18 | </td>
19 | </table>
20 | 
21 | A simple out-of-graph Replay Buffer.
22 | 
23 | <!-- Placeholder for "Used in" -->
24 | 
25 | Stores transitions, state, action, reward, next_state, terminal (and any extra
26 | contents specified) in a circular buffer and provides a uniform transition
27 | sampling function.
28 | 
29 | When the states consist of stacks of observations storing the states is
30 | inefficient. This class writes observations and constructs the stacked states at
31 | sample time.
32 | 
33 | <!-- Tabular view -->
34 | 
35 |  <table class="responsive fixed orange">
36 | <colgroup><col width="214px"><col></colgroup>
37 | <tr><th colspan="2"><h2 class="add-link">Attributes</h2></th></tr>
38 | 
39 | <tr>
40 | <td>
41 | `add_count`
42 | </td>
43 | <td>
44 | int, counter of how many transitions have been added (including
45 | the blank ones at the beginning of an episode).
46 | </td>
47 | </tr><tr>
48 | <td>
49 | `invalid_range`
50 | </td>
51 | <td>
52 | np.array, an array with the indices of cursor-related invalid
53 | transitions
54 | </td>
55 | </tr>
56 | </table>
57 | 


--------------------------------------------------------------------------------
/docs/api_docs/python/dopamine/replay_memory/circular_replay_buffer/WrappedReplayBuffer.md:
--------------------------------------------------------------------------------
 1 | description: Wrapper of OutOfGraphReplayBuffer with an in graph sampling
 2 | mechanism.
 3 | 
 4 | <div itemscope itemtype="http://developers.google.com/ReferenceObject">
 5 | <meta itemprop="name" content="dopamine.replay_memory.circular_replay_buffer.WrappedReplayBuffer" />
 6 | <meta itemprop="path" content="Stable" />
 7 | </div>
 8 | 
 9 | # dopamine.replay_memory.circular_replay_buffer.WrappedReplayBuffer
10 | 
11 | <!-- Insert buttons and diff -->
12 | 
13 | <table class="tfo-notebook-buttons tfo-api nocontent" align="left">
14 | <td>
15 |   <a target="_blank" href="https://github.com/google/dopamine/tree/master/dopamine/replay_memory/circular_replay_buffer.py">
16 |     <img src="https://www.tensorflow.org/images/GitHub-Mark-32px.png" />
17 |     View source on GitHub
18 |   </a>
19 | </td>
20 | </table>
21 | 
22 | Wrapper of OutOfGraphReplayBuffer with an in graph sampling mechanism.
23 | 
24 | <!-- Placeholder for "Used in" -->
25 | 
26 | #### Usage:
27 | 
28 | To add a transition: call the add function.
29 | 
30 | To sample a batch: Construct operations that depend on any of the tensors is the
31 | transition dictionary. Every sess.run that requires any of these tensors will
32 | sample a new transition.
33 | 


--------------------------------------------------------------------------------
/docs/api_docs/python/dopamine/replay_memory/prioritized_replay_buffer.md:
--------------------------------------------------------------------------------
 1 | description: An implementation of Prioritized Experience Replay (PER).
 2 | 
 3 | <div itemscope itemtype="http://developers.google.com/ReferenceObject">
 4 | <meta itemprop="name" content="dopamine.replay_memory.prioritized_replay_buffer" />
 5 | <meta itemprop="path" content="Stable" />
 6 | </div>
 7 | 
 8 | # Module: dopamine.replay_memory.prioritized_replay_buffer
 9 | 
10 | <!-- Insert buttons and diff -->
11 | 
12 | <table class="tfo-notebook-buttons tfo-api nocontent" align="left">
13 | <td>
14 |   <a target="_blank" href="https://github.com/google/dopamine/tree/master/dopamine/replay_memory/prioritized_replay_buffer.py">
15 |     <img src="https://www.tensorflow.org/images/GitHub-Mark-32px.png" />
16 |     View source on GitHub
17 |   </a>
18 | </td>
19 | </table>
20 | 
21 | An implementation of Prioritized Experience Replay (PER).
22 | 
23 | This implementation is based on the paper "Prioritized Experience Replay" by Tom
24 | Schaul et al. (2015). Many thanks to Tom Schaul, John Quan, and Matteo Hessel
25 | for providing useful pointers on the algorithm and its implementation.
26 | 
27 | ## Classes
28 | 
29 | [`class OutOfGraphPrioritizedReplayBuffer`](../../dopamine/replay_memory/prioritized_replay_buffer/OutOfGraphPrioritizedReplayBuffer.md):
30 | An out-of-graph Replay Buffer for Prioritized Experience Replay.
31 | 
32 | [`class WrappedPrioritizedReplayBuffer`](../../dopamine/replay_memory/prioritized_replay_buffer/WrappedPrioritizedReplayBuffer.md):
33 | Wrapper of OutOfGraphPrioritizedReplayBuffer with in-graph sampling.
34 | 


--------------------------------------------------------------------------------
/docs/api_docs/python/dopamine/replay_memory/prioritized_replay_buffer/OutOfGraphPrioritizedReplayBuffer.md:
--------------------------------------------------------------------------------
 1 | description: An out-of-graph Replay Buffer for Prioritized Experience Replay.
 2 | 
 3 | <div itemscope itemtype="http://developers.google.com/ReferenceObject">
 4 | <meta itemprop="name" content="dopamine.replay_memory.prioritized_replay_buffer.OutOfGraphPrioritizedReplayBuffer" />
 5 | <meta itemprop="path" content="Stable" />
 6 | </div>
 7 | 
 8 | # dopamine.replay_memory.prioritized_replay_buffer.OutOfGraphPrioritizedReplayBuffer
 9 | 
10 | <!-- Insert buttons and diff -->
11 | 
12 | <table class="tfo-notebook-buttons tfo-api nocontent" align="left">
13 | <td>
14 |   <a target="_blank" href="https://github.com/google/dopamine/tree/master/dopamine/replay_memory/prioritized_replay_buffer.py">
15 |     <img src="https://www.tensorflow.org/images/GitHub-Mark-32px.png" />
16 |     View source on GitHub
17 |   </a>
18 | </td>
19 | </table>
20 | 
21 | An out-of-graph Replay Buffer for Prioritized Experience Replay.
22 | 
23 | Inherits From:
24 | [`OutOfGraphReplayBuffer`](../../../dopamine/replay_memory/circular_replay_buffer/OutOfGraphReplayBuffer.md)
25 | 
26 | <!-- Placeholder for "Used in" -->
27 | 
28 | See circular_replay_buffer.py for details.
29 | 


--------------------------------------------------------------------------------
/docs/api_docs/python/dopamine/replay_memory/prioritized_replay_buffer/WrappedPrioritizedReplayBuffer.md:
--------------------------------------------------------------------------------
 1 | description: Wrapper of OutOfGraphPrioritizedReplayBuffer with in-graph
 2 | sampling.
 3 | 
 4 | <div itemscope itemtype="http://developers.google.com/ReferenceObject">
 5 | <meta itemprop="name" content="dopamine.replay_memory.prioritized_replay_buffer.WrappedPrioritizedReplayBuffer" />
 6 | <meta itemprop="path" content="Stable" />
 7 | </div>
 8 | 
 9 | # dopamine.replay_memory.prioritized_replay_buffer.WrappedPrioritizedReplayBuffer
10 | 
11 | <!-- Insert buttons and diff -->
12 | 
13 | <table class="tfo-notebook-buttons tfo-api nocontent" align="left">
14 | <td>
15 |   <a target="_blank" href="https://github.com/google/dopamine/tree/master/dopamine/replay_memory/prioritized_replay_buffer.py">
16 |     <img src="https://www.tensorflow.org/images/GitHub-Mark-32px.png" />
17 |     View source on GitHub
18 |   </a>
19 | </td>
20 | </table>
21 | 
22 | Wrapper of OutOfGraphPrioritizedReplayBuffer with in-graph sampling.
23 | 
24 | Inherits From:
25 | [`WrappedReplayBuffer`](../../../dopamine/replay_memory/circular_replay_buffer/WrappedReplayBuffer.md)
26 | 
27 | <!-- Placeholder for "Used in" -->
28 | 
29 | #### Usage:
30 | 
31 | *   To add a transition: Call the add function.
32 | 
33 | *   To sample a batch: Query any of the tensors in the transition dictionary.
34 |     Every sess.run that requires any of these tensors will sample a new
35 |     transition.
36 | 


--------------------------------------------------------------------------------
/docs/api_docs/python/dopamine/replay_memory/sum_tree.md:
--------------------------------------------------------------------------------
 1 | description: A sum tree data structure.
 2 | 
 3 | <div itemscope itemtype="http://developers.google.com/ReferenceObject">
 4 | <meta itemprop="name" content="dopamine.replay_memory.sum_tree" />
 5 | <meta itemprop="path" content="Stable" />
 6 | </div>
 7 | 
 8 | # Module: dopamine.replay_memory.sum_tree
 9 | 
10 | <!-- Insert buttons and diff -->
11 | 
12 | <table class="tfo-notebook-buttons tfo-api nocontent" align="left">
13 | <td>
14 |   <a target="_blank" href="https://github.com/google/dopamine/tree/master/dopamine/replay_memory/sum_tree.py">
15 |     <img src="https://www.tensorflow.org/images/GitHub-Mark-32px.png" />
16 |     View source on GitHub
17 |   </a>
18 | </td>
19 | </table>
20 | 
21 | A sum tree data structure.
22 | 
23 | Used for prioritized experience replay. See prioritized_replay_buffer.py and
24 | Schaul et al. (2015).
25 | 


--------------------------------------------------------------------------------
/dopamine/__init__.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2018 The Dopamine Authors.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #      http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | name = 'dopamine'
16 | 


--------------------------------------------------------------------------------
/dopamine/agents/__init__.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2018 The Dopamine Authors.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #      http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | 


--------------------------------------------------------------------------------
/dopamine/agents/dqn/__init__.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2018 The Dopamine Authors.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #      http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | 


--------------------------------------------------------------------------------
/dopamine/agents/dqn/configs/dqn.gin:
--------------------------------------------------------------------------------
 1 | # Hyperparameters follow the classic Nature DQN, but we modify as necessary to
 2 | # match those used in Rainbow (Hessel et al., 2018), to ensure apples-to-apples
 3 | # comparison.
 4 | import dopamine.discrete_domains.atari_lib
 5 | import dopamine.discrete_domains.run_experiment
 6 | import dopamine.agents.dqn.dqn_agent
 7 | import dopamine.replay_memory.circular_replay_buffer
 8 | import gin.tf.external_configurables
 9 | 
10 | DQNAgent.gamma = 0.99
11 | DQNAgent.update_horizon = 1
12 | DQNAgent.min_replay_history = 20000  # agent steps
13 | DQNAgent.update_period = 4
14 | DQNAgent.target_update_period = 8000  # agent steps
15 | DQNAgent.epsilon_train = 0.01
16 | DQNAgent.epsilon_eval = 0.001
17 | DQNAgent.epsilon_decay_period = 250000  # agent steps
18 | DQNAgent.tf_device = '/gpu:0'  # use '/cpu:*' for non-GPU version
19 | DQNAgent.optimizer = @tf.train.RMSPropOptimizer()
20 | 
21 | tf.train.RMSPropOptimizer.learning_rate = 0.00025
22 | tf.train.RMSPropOptimizer.decay = 0.95
23 | tf.train.RMSPropOptimizer.momentum = 0.0
24 | tf.train.RMSPropOptimizer.epsilon = 0.00001
25 | tf.train.RMSPropOptimizer.centered = True
26 | 
27 | atari_lib.create_atari_environment.game_name = 'Pong'
28 | # Sticky actions with probability 0.25, as suggested by (Machado et al., 2017).
29 | atari_lib.create_atari_environment.sticky_actions = True
30 | create_agent.agent_name = 'dqn'
31 | Runner.num_iterations = 200
32 | Runner.training_steps = 250000  # agent steps
33 | Runner.evaluation_steps = 125000  # agent steps
34 | Runner.max_steps_per_episode = 27000  # agent steps
35 | 
36 | WrappedReplayBuffer.replay_capacity = 1000000
37 | WrappedReplayBuffer.batch_size = 32
38 | 


--------------------------------------------------------------------------------
/dopamine/agents/dqn/configs/dqn_acrobot.gin:
--------------------------------------------------------------------------------
 1 | # Hyperparameters for a simple DQN-style Acrobot agent. The hyperparameters
 2 | # chosen achieve reasonable performance.
 3 | import dopamine.discrete_domains.gym_lib
 4 | import dopamine.discrete_domains.run_experiment
 5 | import dopamine.agents.dqn.dqn_agent
 6 | import dopamine.replay_memory.circular_replay_buffer
 7 | import gin.tf.external_configurables
 8 | 
 9 | DQNAgent.observation_shape = %gym_lib.ACROBOT_OBSERVATION_SHAPE
10 | DQNAgent.observation_dtype = %gym_lib.ACROBOT_OBSERVATION_DTYPE
11 | DQNAgent.stack_size = %gym_lib.ACROBOT_STACK_SIZE
12 | DQNAgent.network = @gym_lib.AcrobotDQNNetwork
13 | DQNAgent.gamma = 0.99
14 | DQNAgent.update_horizon = 1
15 | DQNAgent.min_replay_history = 500
16 | DQNAgent.update_period = 4
17 | DQNAgent.target_update_period = 100
18 | DQNAgent.epsilon_fn = @dqn_agent.identity_epsilon
19 | DQNAgent.tf_device = '/gpu:0'  # use '/cpu:*' for non-GPU version
20 | DQNAgent.optimizer = @tf.train.AdamOptimizer()
21 | 
22 | tf.train.AdamOptimizer.learning_rate = 0.001
23 | tf.train.AdamOptimizer.epsilon = 0.0003125
24 | 
25 | create_gym_environment.environment_name = 'Acrobot'
26 | create_gym_environment.version = 'v1'
27 | create_agent.agent_name = 'dqn'
28 | Runner.create_environment_fn = @gym_lib.create_gym_environment
29 | Runner.num_iterations = 500
30 | Runner.training_steps = 1000
31 | Runner.evaluation_steps = 1000
32 | Runner.max_steps_per_episode = 500
33 | 
34 | WrappedReplayBuffer.replay_capacity = 50000
35 | WrappedReplayBuffer.batch_size = 128
36 | 


--------------------------------------------------------------------------------
/dopamine/agents/dqn/configs/dqn_cartpole.gin:
--------------------------------------------------------------------------------
 1 | # Hyperparameters for a simple DQN-style Cartpole agent. The hyperparameters
 2 | # chosen achieve reasonable performance.
 3 | import dopamine.discrete_domains.gym_lib
 4 | import dopamine.discrete_domains.run_experiment
 5 | import dopamine.agents.dqn.dqn_agent
 6 | import dopamine.replay_memory.circular_replay_buffer
 7 | import gin.tf.external_configurables
 8 | 
 9 | DQNAgent.observation_shape = %gym_lib.CARTPOLE_OBSERVATION_SHAPE
10 | DQNAgent.observation_dtype = %gym_lib.CARTPOLE_OBSERVATION_DTYPE
11 | DQNAgent.stack_size = %gym_lib.CARTPOLE_STACK_SIZE
12 | DQNAgent.network = @gym_lib.CartpoleDQNNetwork
13 | DQNAgent.gamma = 0.99
14 | DQNAgent.update_horizon = 1
15 | DQNAgent.min_replay_history = 500
16 | DQNAgent.update_period = 4
17 | DQNAgent.target_update_period = 100
18 | DQNAgent.epsilon_fn = @dqn_agent.identity_epsilon
19 | DQNAgent.tf_device = '/gpu:0'  # use '/cpu:*' for non-GPU version
20 | DQNAgent.optimizer = @tf.train.AdamOptimizer()
21 | 
22 | tf.train.AdamOptimizer.learning_rate = 0.001
23 | tf.train.AdamOptimizer.epsilon = 0.0003125
24 | 
25 | create_gym_environment.environment_name = 'CartPole'
26 | create_gym_environment.version = 'v0'
27 | create_agent.agent_name = 'dqn'
28 | Runner.create_environment_fn = @gym_lib.create_gym_environment
29 | Runner.num_iterations = 500
30 | Runner.training_steps = 1000
31 | Runner.evaluation_steps = 1000
32 | Runner.max_steps_per_episode = 200  # Default max episode length.
33 | 
34 | WrappedReplayBuffer.replay_capacity = 50000
35 | WrappedReplayBuffer.batch_size = 128
36 | 


--------------------------------------------------------------------------------
/dopamine/agents/dqn/configs/dqn_icml.gin:
--------------------------------------------------------------------------------
 1 | # Hyperparameters used for reporting DQN results in Bellemare et al. (2017).
 2 | import dopamine.discrete_domains.atari_lib
 3 | import dopamine.discrete_domains.run_experiment
 4 | import dopamine.agents.dqn.dqn_agent
 5 | import dopamine.replay_memory.circular_replay_buffer
 6 | import gin.tf.external_configurables
 7 | 
 8 | DQNAgent.gamma = 0.99
 9 | DQNAgent.update_horizon = 1
10 | DQNAgent.min_replay_history = 50000  # agent steps
11 | DQNAgent.update_period = 4
12 | DQNAgent.target_update_period = 10000  # agent steps
13 | DQNAgent.epsilon_train = 0.01
14 | DQNAgent.epsilon_eval = 0.001
15 | DQNAgent.epsilon_decay_period = 1000000  # agent steps
16 | DQNAgent.tf_device = '/gpu:0'  # use '/cpu:*' for non-GPU version
17 | DQNAgent.optimizer = @tf.train.RMSPropOptimizer()
18 | 
19 | tf.train.RMSPropOptimizer.learning_rate = 0.00025
20 | tf.train.RMSPropOptimizer.decay = 0.95
21 | tf.train.RMSPropOptimizer.momentum = 0.0
22 | tf.train.RMSPropOptimizer.epsilon = 0.00001
23 | tf.train.RMSPropOptimizer.centered = True
24 | 
25 | atari_lib.create_atari_environment.game_name = 'Pong'
26 | # Deterministic ALE version used in the DQN Nature paper (Mnih et al., 2015).
27 | atari_lib.create_atari_environment.sticky_actions = False
28 | create_agent.agent_name = 'dqn'
29 | Runner.num_iterations = 200
30 | Runner.training_steps = 250000  # agent steps
31 | Runner.evaluation_steps = 125000  # agent steps
32 | Runner.max_steps_per_episode = 27000  # agent steps
33 | 
34 | AtariPreprocessing.terminal_on_life_loss = True
35 | 
36 | WrappedReplayBuffer.replay_capacity = 1000000
37 | WrappedReplayBuffer.batch_size = 32
38 | 


--------------------------------------------------------------------------------
/dopamine/agents/dqn/configs/dqn_lunarlander.gin:
--------------------------------------------------------------------------------
 1 | # Hyperparameters for a simple DQN-style LunarLander agent. The hyperparameters
 2 | # chosen achieve reasonable performance.
 3 | import dopamine.discrete_domains.gym_lib
 4 | import dopamine.discrete_domains.run_experiment
 5 | import dopamine.agents.dqn.dqn_agent
 6 | import dopamine.replay_memory.circular_replay_buffer
 7 | import gin.tf.external_configurables
 8 | 
 9 | DQNAgent.observation_shape = %gym_lib.LUNAR_OBSERVATION_SHAPE
10 | DQNAgent.observation_dtype = %gym_lib.LUNAR_OBSERVATION_DTYPE
11 | DQNAgent.stack_size = %gym_lib.LUNAR_STACK_SIZE
12 | DQNAgent.network = @gym_lib.LunarLanderDQNNetwork
13 | DQNAgent.gamma = 0.99
14 | DQNAgent.update_horizon = 1
15 | DQNAgent.min_replay_history = 500
16 | DQNAgent.update_period = 4
17 | DQNAgent.target_update_period = 100
18 | DQNAgent.epsilon_fn = @dqn_agent.identity_epsilon
19 | DQNAgent.tf_device = '/gpu:0'  # use '/cpu:*' for non-GPU version
20 | DQNAgent.optimizer = @tf.train.AdamOptimizer()
21 | 
22 | tf.train.AdamOptimizer.learning_rate = 0.001
23 | tf.train.AdamOptimizer.epsilon = 0.0003125
24 | 
25 | create_gym_environment.environment_name = 'LunarLander'
26 | create_gym_environment.version = 'v2'
27 | create_agent.agent_name = 'dqn'
28 | Runner.create_environment_fn = @gym_lib.create_gym_environment
29 | Runner.num_iterations = 30
30 | Runner.training_steps = 4000
31 | Runner.evaluation_steps = 1000
32 | Runner.max_steps_per_episode = 1000
33 | 
34 | WrappedReplayBuffer.replay_capacity = 50000
35 | WrappedReplayBuffer.batch_size = 128
36 | 


--------------------------------------------------------------------------------
/dopamine/agents/dqn/configs/dqn_mountaincar.gin:
--------------------------------------------------------------------------------
 1 | # Hyperparameters for a simple DQN-style MountainCar agent. The hyperparameters
 2 | # chosen achieve reasonable performance.
 3 | import dopamine.discrete_domains.gym_lib
 4 | import dopamine.discrete_domains.run_experiment
 5 | import dopamine.agents.dqn.dqn_agent
 6 | import dopamine.replay_memory.circular_replay_buffer
 7 | import gin.tf.external_configurables
 8 | 
 9 | DQNAgent.observation_shape = %gym_lib.MOUNTAINCAR_OBSERVATION_SHAPE
10 | DQNAgent.observation_dtype = %gym_lib.MOUNTAINCAR_OBSERVATION_DTYPE
11 | DQNAgent.stack_size = %gym_lib.MOUNTAINCAR_STACK_SIZE
12 | DQNAgent.network = @gym_lib.MountainCarDQNNetwork
13 | DQNAgent.gamma = 0.99
14 | DQNAgent.update_horizon = 1
15 | DQNAgent.min_replay_history = 500
16 | DQNAgent.update_period = 4
17 | DQNAgent.target_update_period = 100
18 | DQNAgent.epsilon_fn = @dqn_agent.identity_epsilon
19 | DQNAgent.tf_device = '/gpu:0'  # use '/cpu:*' for non-GPU version
20 | DQNAgent.optimizer = @tf.train.AdamOptimizer()
21 | 
22 | tf.train.AdamOptimizer.learning_rate = 0.001
23 | tf.train.AdamOptimizer.epsilon = 0.0003125
24 | 
25 | create_gym_environment.environment_name = 'MountainCar'
26 | create_gym_environment.version = 'v0'
27 | create_agent.agent_name = 'dqn'
28 | Runner.create_environment_fn = @gym_lib.create_gym_environment
29 | Runner.num_iterations = 30
30 | Runner.training_steps = 1000
31 | Runner.evaluation_steps = 1000
32 | Runner.max_steps_per_episode = 600  # Default max episode length.
33 | 
34 | WrappedReplayBuffer.replay_capacity = 50000
35 | WrappedReplayBuffer.batch_size = 128
36 | 


--------------------------------------------------------------------------------
/dopamine/agents/dqn/configs/dqn_nature.gin:
--------------------------------------------------------------------------------
 1 | # Hyperparameters used in Mnih et al. (2015).
 2 | import dopamine.discrete_domains.atari_lib
 3 | import dopamine.discrete_domains.run_experiment
 4 | import dopamine.agents.dqn.dqn_agent
 5 | import dopamine.replay_memory.circular_replay_buffer
 6 | import gin.tf.external_configurables
 7 | 
 8 | DQNAgent.gamma = 0.99
 9 | DQNAgent.update_horizon = 1
10 | DQNAgent.min_replay_history = 50000  # agent steps
11 | DQNAgent.update_period = 4
12 | DQNAgent.target_update_period = 10000  # agent steps
13 | DQNAgent.epsilon_train = 0.1
14 | DQNAgent.epsilon_eval = 0.05
15 | DQNAgent.epsilon_decay_period = 1000000  # agent steps
16 | DQNAgent.tf_device = '/gpu:0'  # use '/cpu:*' for non-GPU version
17 | DQNAgent.optimizer = @tf.train.RMSPropOptimizer()
18 | 
19 | tf.train.RMSPropOptimizer.learning_rate = 0.00025
20 | tf.train.RMSPropOptimizer.decay = 0.95
21 | tf.train.RMSPropOptimizer.momentum = 0.0
22 | tf.train.RMSPropOptimizer.epsilon = 0.00001
23 | tf.train.RMSPropOptimizer.centered = True
24 | 
25 | atari_lib.create_atari_environment.game_name = 'Pong'
26 | # Deterministic ALE version used in the DQN Nature paper (Mnih et al., 2015).
27 | atari_lib.create_atari_environment.sticky_actions = False
28 | create_agent.agent_name = 'dqn'
29 | Runner.num_iterations = 200
30 | Runner.training_steps = 250000  # agent steps
31 | Runner.evaluation_steps = 125000  # agent steps
32 | Runner.max_steps_per_episode = 27000  # agent steps
33 | 
34 | AtariPreprocessing.terminal_on_life_loss = True
35 | 
36 | WrappedReplayBuffer.replay_capacity = 1000000
37 | WrappedReplayBuffer.batch_size = 32
38 | 


--------------------------------------------------------------------------------
/dopamine/agents/dqn/configs/dqn_profiling.gin:
--------------------------------------------------------------------------------
 1 | # Hyperparameters follow the classic Nature DQN, but we modify as necessary to
 2 | # match those used in Rainbow (Hessel et al., 2018), to ensure apples-to-apples
 3 | # comparison.
 4 | import dopamine.discrete_domains.atari_lib
 5 | import dopamine.discrete_domains.run_experiment
 6 | import dopamine.agents.dqn.dqn_agent
 7 | import dopamine.replay_memory.circular_replay_buffer
 8 | import gin.tf.external_configurables
 9 | 
10 | DQNAgent.gamma = 0.99
11 | DQNAgent.update_horizon = 1
12 | DQNAgent.min_replay_history = 100  # agent steps
13 | DQNAgent.update_period = 4
14 | DQNAgent.target_update_period = 8000  # agent steps
15 | DQNAgent.epsilon_fn = @dqn_agent.identity_epsilon
16 | DQNAgent.epsilon_train = 0.0
17 | DQNAgent.epsilon_eval = 0.0
18 | DQNAgent.tf_device = '/gpu:0'  # use '/cpu:*' for non-GPU version
19 | DQNAgent.optimizer = @tf.train.RMSPropOptimizer()
20 | 
21 | tf.train.RMSPropOptimizer.learning_rate = 0.00025
22 | tf.train.RMSPropOptimizer.decay = 0.95
23 | tf.train.RMSPropOptimizer.momentum = 0.0
24 | tf.train.RMSPropOptimizer.epsilon = 0.00001
25 | tf.train.RMSPropOptimizer.centered = True
26 | 
27 | atari_lib.create_atari_environment.game_name = 'Pong'
28 | # Sticky actions with probability 0.25, as suggested by (Machado et al., 2017).
29 | atari_lib.create_atari_environment.sticky_actions = True
30 | create_runner.schedule = 'continuous_train'
31 | create_agent.agent_name = 'dqn'
32 | Runner.num_iterations = 200
33 | Runner.training_steps = 250000  # agent steps
34 | Runner.evaluation_steps = 125000  # agent steps
35 | Runner.max_steps_per_episode = 27000  # agent steps
36 | 
37 | WrappedReplayBuffer.replay_capacity = 1000000
38 | WrappedReplayBuffer.batch_size = 32
39 | 


--------------------------------------------------------------------------------
/dopamine/agents/implicit_quantile/__init__.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2018 The Dopamine Authors.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #      http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | 


--------------------------------------------------------------------------------
/dopamine/agents/implicit_quantile/configs/implicit_quantile.gin:
--------------------------------------------------------------------------------
 1 | # Hyperparameters follow Dabney et al. (2018), but we modify as necessary to
 2 | # match those used in Rainbow (Hessel et al., 2018), to ensure apples-to-apples
 3 | # comparison.
 4 | import dopamine.agents.implicit_quantile.implicit_quantile_agent
 5 | import dopamine.agents.rainbow.rainbow_agent
 6 | import dopamine.discrete_domains.atari_lib
 7 | import dopamine.discrete_domains.run_experiment
 8 | import dopamine.replay_memory.prioritized_replay_buffer
 9 | import gin.tf.external_configurables
10 | 
11 | ImplicitQuantileAgent.kappa = 1.0
12 | ImplicitQuantileAgent.num_tau_samples = 64
13 | ImplicitQuantileAgent.num_tau_prime_samples = 64
14 | ImplicitQuantileAgent.num_quantile_samples = 32
15 | RainbowAgent.gamma = 0.99
16 | RainbowAgent.update_horizon = 3
17 | RainbowAgent.min_replay_history = 20000 # agent steps
18 | RainbowAgent.update_period = 4
19 | RainbowAgent.target_update_period = 8000 # agent steps
20 | RainbowAgent.epsilon_train = 0.01
21 | RainbowAgent.epsilon_eval = 0.001
22 | RainbowAgent.epsilon_decay_period = 250000  # agent steps
23 | # IQN currently does not support prioritized replay.
24 | RainbowAgent.replay_scheme = 'uniform'
25 | RainbowAgent.tf_device = '/gpu:0'  # '/cpu:*' use for non-GPU version
26 | RainbowAgent.optimizer = @tf.train.AdamOptimizer()
27 | 
28 | tf.train.AdamOptimizer.learning_rate = 0.00005
29 | tf.train.AdamOptimizer.epsilon = 0.0003125
30 | 
31 | atari_lib.create_atari_environment.game_name = 'Pong'
32 | # Sticky actions with probability 0.25, as suggested by (Machado et al., 2017).
33 | atari_lib.create_atari_environment.sticky_actions = True
34 | create_agent.agent_name = 'implicit_quantile'
35 | Runner.num_iterations = 200
36 | Runner.training_steps = 250000
37 | Runner.evaluation_steps = 125000
38 | Runner.max_steps_per_episode = 27000
39 | 
40 | WrappedPrioritizedReplayBuffer.replay_capacity = 1000000
41 | WrappedPrioritizedReplayBuffer.batch_size = 32
42 | 


--------------------------------------------------------------------------------
/dopamine/agents/implicit_quantile/configs/implicit_quantile_icml.gin:
--------------------------------------------------------------------------------
 1 | # Hyperparameters follow Dabney et al. (2018).
 2 | import dopamine.agents.implicit_quantile.implicit_quantile_agent
 3 | import dopamine.agents.rainbow.rainbow_agent
 4 | import dopamine.discrete_domains.atari_lib
 5 | import dopamine.discrete_domains.run_experiment
 6 | import dopamine.replay_memory.prioritized_replay_buffer
 7 | import gin.tf.external_configurables
 8 | 
 9 | ImplicitQuantileAgent.kappa = 1.0
10 | ImplicitQuantileAgent.num_tau_samples = 64
11 | ImplicitQuantileAgent.num_tau_prime_samples = 64
12 | ImplicitQuantileAgent.num_quantile_samples = 32
13 | RainbowAgent.gamma = 0.99
14 | RainbowAgent.update_horizon = 1
15 | RainbowAgent.min_replay_history = 50000 # agent steps
16 | RainbowAgent.update_period = 4
17 | RainbowAgent.target_update_period = 10000 # agent steps
18 | RainbowAgent.epsilon_train = 0.01
19 | RainbowAgent.epsilon_eval = 0.001
20 | RainbowAgent.epsilon_decay_period = 1000000 # agent steps
21 | RainbowAgent.replay_scheme = 'uniform'
22 | RainbowAgent.tf_device = '/gpu:0'  # '/cpu:*' use for non-GPU version
23 | RainbowAgent.optimizer = @tf.train.AdamOptimizer()
24 | 
25 | tf.train.AdamOptimizer.learning_rate = 0.00005
26 | tf.train.AdamOptimizer.epsilon = 0.0003125
27 | 
28 | atari_lib.create_atari_environment.game_name = 'Pong'
29 | atari_lib.create_atari_environment.sticky_actions = False
30 | create_agent.agent_name = 'implicit_quantile'
31 | Runner.num_iterations = 200
32 | Runner.training_steps = 250000
33 | Runner.evaluation_steps = 125000
34 | Runner.max_steps_per_episode = 27000
35 | 
36 | AtariPreprocessing.terminal_on_life_loss = True
37 | 
38 | WrappedPrioritizedReplayBuffer.replay_capacity = 1000000
39 | WrappedPrioritizedReplayBuffer.batch_size = 32
40 | 


--------------------------------------------------------------------------------
/dopamine/agents/implicit_quantile/configs/implicit_quantile_profiling.gin:
--------------------------------------------------------------------------------
 1 | # Hyperparameters follow Dabney et al. (2018), but we modify as necessary to
 2 | # match those used in Rainbow (Hessel et al., 2018), to ensure apples-to-apples
 3 | # comparison.
 4 | import dopamine.agents.implicit_quantile.implicit_quantile_agent
 5 | import dopamine.agents.rainbow.rainbow_agent
 6 | import dopamine.discrete_domains.atari_lib
 7 | import dopamine.discrete_domains.run_experiment
 8 | import dopamine.replay_memory.prioritized_replay_buffer
 9 | import gin.tf.external_configurables
10 | 
11 | ImplicitQuantileAgent.kappa = 1.0
12 | ImplicitQuantileAgent.num_tau_samples = 64
13 | ImplicitQuantileAgent.num_tau_prime_samples = 64
14 | ImplicitQuantileAgent.num_quantile_samples = 32
15 | RainbowAgent.gamma = 0.99
16 | RainbowAgent.update_horizon = 3
17 | RainbowAgent.min_replay_history = 100 # agent steps
18 | RainbowAgent.update_period = 4
19 | RainbowAgent.target_update_period = 8000 # agent steps
20 | RainbowAgent.epsilon_fn = @dqn_agent.identity_epsilon
21 | RainbowAgent.epsilon_train = 0.0
22 | RainbowAgent.epsilon_eval = 0.0
23 | # IQN currently does not support prioritized replay.
24 | RainbowAgent.replay_scheme = 'uniform'
25 | RainbowAgent.tf_device = '/gpu:0'  # '/cpu:*' use for non-GPU version
26 | RainbowAgent.optimizer = @tf.train.AdamOptimizer()
27 | 
28 | tf.train.AdamOptimizer.learning_rate = 0.00005
29 | tf.train.AdamOptimizer.epsilon = 0.0003125
30 | 
31 | atari_lib.create_atari_environment.game_name = 'Pong'
32 | # Sticky actions with probability 0.25, as suggested by (Machado et al., 2017).
33 | atari_lib.create_atari_environment.sticky_actions = True
34 | create_runner.schedule = 'continuous_train'
35 | create_agent.agent_name = 'implicit_quantile'
36 | Runner.num_iterations = 200
37 | Runner.training_steps = 250000
38 | Runner.evaluation_steps = 125000
39 | Runner.max_steps_per_episode = 27000
40 | 
41 | WrappedPrioritizedReplayBuffer.replay_capacity = 1000000
42 | WrappedPrioritizedReplayBuffer.batch_size = 32
43 | 


--------------------------------------------------------------------------------
/dopamine/agents/rainbow/__init__.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2018 The Dopamine Authors.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #      http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | 


--------------------------------------------------------------------------------
/dopamine/agents/rainbow/configs/c51.gin:
--------------------------------------------------------------------------------
 1 | # Hyperparameters follow the settings from Bellemare et al. (2017), but we
 2 | # modify as necessary to match those used in Rainbow (Hessel et al., 2018), to
 3 | # ensure apples-to-apples comparison.
 4 | import dopamine.agents.rainbow.rainbow_agent
 5 | import dopamine.discrete_domains.atari_lib
 6 | import dopamine.discrete_domains.run_experiment
 7 | import dopamine.replay_memory.prioritized_replay_buffer
 8 | import gin.tf.external_configurables
 9 | 
10 | RainbowAgent.num_atoms = 51
11 | RainbowAgent.vmax = 10.
12 | RainbowAgent.gamma = 0.99
13 | RainbowAgent.update_horizon = 1
14 | RainbowAgent.min_replay_history = 20000  # agent steps
15 | RainbowAgent.update_period = 4
16 | RainbowAgent.target_update_period = 8000  # agent steps
17 | RainbowAgent.epsilon_train = 0.01
18 | RainbowAgent.epsilon_eval = 0.001
19 | RainbowAgent.epsilon_decay_period = 250000  # agent steps
20 | RainbowAgent.replay_scheme = 'uniform'
21 | RainbowAgent.tf_device = '/gpu:0'  # use '/cpu:*' for non-GPU version
22 | RainbowAgent.optimizer = @tf.train.AdamOptimizer()
23 | 
24 | tf.train.AdamOptimizer.learning_rate = 0.00025
25 | tf.train.AdamOptimizer.epsilon = 0.0003125
26 | 
27 | atari_lib.create_atari_environment.game_name = 'Pong'
28 | # Sticky actions with probability 0.25, as suggested by (Machado et al., 2017).
29 | atari_lib.create_atari_environment.sticky_actions = True
30 | create_agent.agent_name = 'rainbow'
31 | Runner.num_iterations = 200
32 | Runner.training_steps = 250000  # agent steps
33 | Runner.evaluation_steps = 125000  # agent steps
34 | Runner.max_steps_per_episode = 27000  # agent steps
35 | 
36 | WrappedPrioritizedReplayBuffer.replay_capacity = 1000000
37 | WrappedPrioritizedReplayBuffer.batch_size = 32
38 | 


--------------------------------------------------------------------------------
/dopamine/agents/rainbow/configs/c51_acrobot.gin:
--------------------------------------------------------------------------------
 1 | # Hyperparameters for a simple C51-style Acrobot agent. The hyperparameters
 2 | # chosen achieve reasonable performance.
 3 | import dopamine.agents.dqn.dqn_agent
 4 | import dopamine.agents.rainbow.rainbow_agent
 5 | import dopamine.discrete_domains.gym_lib
 6 | import dopamine.discrete_domains.run_experiment
 7 | import dopamine.replay_memory.prioritized_replay_buffer
 8 | import gin.tf.external_configurables
 9 | 
10 | RainbowAgent.observation_shape = %gym_lib.ACROBOT_OBSERVATION_SHAPE
11 | RainbowAgent.observation_dtype = %gym_lib.ACROBOT_OBSERVATION_DTYPE
12 | RainbowAgent.stack_size = %gym_lib.ACROBOT_STACK_SIZE
13 | RainbowAgent.network = @gym_lib.AcrobotRainbowNetwork
14 | RainbowAgent.num_atoms = 51
15 | RainbowAgent.vmax = 10.
16 | RainbowAgent.gamma = 0.99
17 | RainbowAgent.update_horizon = 1
18 | RainbowAgent.min_replay_history = 500
19 | RainbowAgent.update_period = 4
20 | RainbowAgent.target_update_period = 100
21 | RainbowAgent.epsilon_fn = @dqn_agent.identity_epsilon
22 | RainbowAgent.replay_scheme = 'uniform'
23 | RainbowAgent.tf_device = '/gpu:0'  # use '/cpu:*' for non-GPU version
24 | RainbowAgent.optimizer = @tf.train.AdamOptimizer()
25 | 
26 | tf.train.AdamOptimizer.learning_rate = 0.1
27 | tf.train.AdamOptimizer.epsilon = 0.0003125
28 | 
29 | create_gym_environment.environment_name = 'Acrobot'
30 | create_gym_environment.version = 'v1'
31 | create_agent.agent_name = 'rainbow'
32 | Runner.create_environment_fn = @gym_lib.create_gym_environment
33 | Runner.num_iterations = 500
34 | Runner.training_steps = 1000
35 | Runner.evaluation_steps = 1000
36 | Runner.max_steps_per_episode = 500
37 | 
38 | WrappedPrioritizedReplayBuffer.replay_capacity = 50000
39 | WrappedPrioritizedReplayBuffer.batch_size = 128
40 | 


--------------------------------------------------------------------------------
/dopamine/agents/rainbow/configs/c51_cartpole.gin:
--------------------------------------------------------------------------------
 1 | # Hyperparameters for a simple C51-style Cartpole agent. The hyperparameters
 2 | # chosen achieve reasonable performance.
 3 | import dopamine.agents.dqn.dqn_agent
 4 | import dopamine.agents.rainbow.rainbow_agent
 5 | import dopamine.discrete_domains.gym_lib
 6 | import dopamine.discrete_domains.run_experiment
 7 | import dopamine.replay_memory.prioritized_replay_buffer
 8 | import gin.tf.external_configurables
 9 | 
10 | RainbowAgent.observation_shape = %gym_lib.CARTPOLE_OBSERVATION_SHAPE
11 | RainbowAgent.observation_dtype = %gym_lib.CARTPOLE_OBSERVATION_DTYPE
12 | RainbowAgent.stack_size = %gym_lib.CARTPOLE_STACK_SIZE
13 | RainbowAgent.network = @gym_lib.CartpoleRainbowNetwork
14 | RainbowAgent.num_atoms = 201
15 | RainbowAgent.vmax = 100.
16 | RainbowAgent.gamma = 0.99
17 | RainbowAgent.epsilon_eval = 0.
18 | RainbowAgent.epsilon_train = 0.01
19 | RainbowAgent.update_horizon = 1
20 | RainbowAgent.min_replay_history = 500
21 | RainbowAgent.update_period = 1
22 | RainbowAgent.target_update_period = 1
23 | RainbowAgent.epsilon_fn = @dqn_agent.identity_epsilon
24 | RainbowAgent.replay_scheme = 'uniform'
25 | RainbowAgent.tf_device = '/gpu:0'  # use '/cpu:*' for non-GPU version
26 | RainbowAgent.optimizer = @tf.train.AdamOptimizer()
27 | 
28 | tf.train.AdamOptimizer.learning_rate = 0.00001
29 | tf.train.AdamOptimizer.epsilon = 0.00000390625
30 | 
31 | create_gym_environment.environment_name = 'CartPole'
32 | create_gym_environment.version = 'v0'
33 | create_agent.agent_name = 'rainbow'
34 | Runner.create_environment_fn = @gym_lib.create_gym_environment
35 | Runner.num_iterations = 400
36 | Runner.training_steps = 1000
37 | Runner.evaluation_steps = 1000
38 | Runner.max_steps_per_episode = 200  # Default max episode length.
39 | 
40 | WrappedPrioritizedReplayBuffer.replay_capacity = 50000
41 | WrappedPrioritizedReplayBuffer.batch_size = 128
42 | 


--------------------------------------------------------------------------------
/dopamine/agents/rainbow/configs/c51_icml.gin:
--------------------------------------------------------------------------------
 1 | # Hyperparameters used in Bellemare et al. (2017).
 2 | import dopamine.agents.rainbow.rainbow_agent
 3 | import dopamine.discrete_domains.atari_lib
 4 | import dopamine.discrete_domains.run_experiment
 5 | import dopamine.replay_memory.prioritized_replay_buffer
 6 | import gin.tf.external_configurables
 7 | 
 8 | RainbowAgent.num_atoms = 51
 9 | RainbowAgent.vmax = 10.
10 | RainbowAgent.gamma = 0.99
11 | RainbowAgent.update_horizon = 1
12 | RainbowAgent.min_replay_history = 50000  # agent steps
13 | RainbowAgent.update_period = 4
14 | RainbowAgent.target_update_period = 10000  # agent steps
15 | RainbowAgent.epsilon_train = 0.01
16 | RainbowAgent.epsilon_eval = 0.001
17 | RainbowAgent.epsilon_decay_period = 1000000  # agent steps
18 | RainbowAgent.replay_scheme = 'uniform'
19 | RainbowAgent.tf_device = '/gpu:0'  # use '/cpu:*' for non-GPU version
20 | RainbowAgent.optimizer = @tf.train.AdamOptimizer()
21 | 
22 | tf.train.AdamOptimizer.learning_rate = 0.00025
23 | tf.train.AdamOptimizer.epsilon = 0.0003125
24 | 
25 | atari_lib.create_atari_environment.game_name = 'Pong'
26 | # Deterministic ALE version used in the DQN Nature paper (Mnih et al., 2015).
27 | atari_lib.create_atari_environment.sticky_actions = False
28 | create_agent.agent_name = 'rainbow'
29 | Runner.num_iterations = 200
30 | Runner.training_steps = 250000  # agent steps
31 | Runner.evaluation_steps = 125000  # agent steps
32 | Runner.max_steps_per_episode = 27000  # agent steps
33 | 
34 | AtariPreprocessing.terminal_on_life_loss = True
35 | 
36 | WrappedPrioritizedReplayBuffer.replay_capacity = 1000000
37 | WrappedPrioritizedReplayBuffer.batch_size = 32
38 | 


--------------------------------------------------------------------------------
/dopamine/agents/rainbow/configs/c51_profiling.gin:
--------------------------------------------------------------------------------
 1 | # Hyperparameters follow the settings from Bellemare et al. (2017), but we
 2 | # modify as necessary to match those used in Rainbow (Hessel et al., 2018), to
 3 | # ensure apples-to-apples comparison.
 4 | import dopamine.agents.rainbow.rainbow_agent
 5 | import dopamine.discrete_domains.atari_lib
 6 | import dopamine.discrete_domains.run_experiment
 7 | import dopamine.replay_memory.prioritized_replay_buffer
 8 | import gin.tf.external_configurables
 9 | 
10 | RainbowAgent.num_atoms = 51
11 | RainbowAgent.vmax = 10.
12 | RainbowAgent.gamma = 0.99
13 | RainbowAgent.update_horizon = 1
14 | RainbowAgent.min_replay_history = 100  # agent steps
15 | RainbowAgent.update_period = 4
16 | RainbowAgent.target_update_period = 8000  # agent steps
17 | RainbowAgent.epsilon_fn = @dqn_agent.identity_epsilon
18 | RainbowAgent.epsilon_train = 0.0
19 | RainbowAgent.epsilon_eval = 0.0
20 | RainbowAgent.replay_scheme = 'uniform'
21 | RainbowAgent.tf_device = '/gpu:0'  # use '/cpu:*' for non-GPU version
22 | RainbowAgent.optimizer = @tf.train.AdamOptimizer()
23 | 
24 | tf.train.AdamOptimizer.learning_rate = 0.00025
25 | tf.train.AdamOptimizer.epsilon = 0.0003125
26 | 
27 | atari_lib.create_atari_environment.game_name = 'Pong'
28 | # Sticky actions with probability 0.25, as suggested by (Machado et al., 2017).
29 | atari_lib.create_atari_environment.sticky_actions = True
30 | create_runner.schedule = 'continuous_train'
31 | create_agent.agent_name = 'rainbow'
32 | Runner.num_iterations = 200
33 | Runner.training_steps = 250000  # agent steps
34 | Runner.evaluation_steps = 125000  # agent steps
35 | Runner.max_steps_per_episode = 27000  # agent steps
36 | 
37 | WrappedPrioritizedReplayBuffer.replay_capacity = 1000000
38 | WrappedPrioritizedReplayBuffer.batch_size = 32
39 | 


--------------------------------------------------------------------------------
/dopamine/agents/rainbow/configs/rainbow_aaai.gin:
--------------------------------------------------------------------------------
 1 | # Hyperparameters follow Hessel et al. (2018).
 2 | import dopamine.agents.rainbow.rainbow_agent
 3 | import dopamine.discrete_domains.atari_lib
 4 | import dopamine.discrete_domains.run_experiment
 5 | import dopamine.replay_memory.prioritized_replay_buffer
 6 | import gin.tf.external_configurables
 7 | 
 8 | RainbowAgent.num_atoms = 51
 9 | RainbowAgent.vmax = 10.
10 | RainbowAgent.gamma = 0.99
11 | RainbowAgent.update_horizon = 3
12 | RainbowAgent.min_replay_history = 20000  # agent steps
13 | RainbowAgent.update_period = 4
14 | RainbowAgent.target_update_period = 8000  # agent steps
15 | RainbowAgent.epsilon_train = 0.01
16 | RainbowAgent.epsilon_eval = 0.001
17 | RainbowAgent.epsilon_decay_period = 250000  # agent steps
18 | RainbowAgent.replay_scheme = 'prioritized'
19 | RainbowAgent.tf_device = '/gpu:0'  # use '/cpu:*' for non-GPU version
20 | RainbowAgent.optimizer = @tf.train.AdamOptimizer()
21 | 
22 | # Note these parameters are different from C51's.
23 | tf.train.AdamOptimizer.learning_rate = 0.0000625
24 | tf.train.AdamOptimizer.epsilon = 0.00015
25 | 
26 | atari_lib.create_atari_environment.game_name = 'Pong'
27 | # Deterministic ALE version used in the AAAI paper.
28 | atari_lib.create_atari_environment.sticky_actions = False
29 | create_agent.agent_name = 'rainbow'
30 | Runner.num_iterations = 200
31 | Runner.training_steps = 250000  # agent steps
32 | Runner.evaluation_steps = 125000  # agent steps
33 | Runner.max_steps_per_episode = 27000  # agent steps
34 | 
35 | AtariPreprocessing.terminal_on_life_loss = True
36 | 
37 | WrappedPrioritizedReplayBuffer.replay_capacity = 1000000
38 | WrappedPrioritizedReplayBuffer.batch_size = 32
39 | 


--------------------------------------------------------------------------------
/dopamine/agents/rainbow/configs/rainbow_acrobot.gin:
--------------------------------------------------------------------------------
 1 | # Hyperparameters for a simple Rainbow-style Acrobot agent. The hyperparameters
 2 | # chosen achieve reasonable performance.
 3 | import dopamine.agents.rainbow.rainbow_agent
 4 | import dopamine.discrete_domains.gym_lib
 5 | import dopamine.discrete_domains.run_experiment
 6 | import dopamine.replay_memory.prioritized_replay_buffer
 7 | import gin.tf.external_configurables
 8 | 
 9 | RainbowAgent.observation_shape = %gym_lib.ACROBOT_OBSERVATION_SHAPE
10 | RainbowAgent.observation_dtype = %gym_lib.ACROBOT_OBSERVATION_DTYPE
11 | RainbowAgent.stack_size = %gym_lib.ACROBOT_STACK_SIZE
12 | RainbowAgent.network = @gym_lib.AcrobotRainbowNetwork
13 | RainbowAgent.num_atoms = 51
14 | RainbowAgent.vmax = 10.
15 | RainbowAgent.gamma = 0.99
16 | RainbowAgent.update_horizon = 3
17 | RainbowAgent.min_replay_history = 500
18 | RainbowAgent.update_period = 4
19 | RainbowAgent.target_update_period = 100
20 | RainbowAgent.epsilon_fn = @dqn_agent.identity_epsilon
21 | RainbowAgent.replay_scheme = 'prioritized'
22 | RainbowAgent.tf_device = '/gpu:0'  # use '/cpu:*' for non-GPU version
23 | RainbowAgent.optimizer = @tf.train.AdamOptimizer()
24 | 
25 | tf.train.AdamOptimizer.learning_rate = 0.09
26 | tf.train.AdamOptimizer.epsilon = 0.0003125
27 | 
28 | create_gym_environment.environment_name = 'Acrobot'
29 | create_gym_environment.version = 'v1'
30 | create_agent.agent_name = 'rainbow'
31 | Runner.create_environment_fn = @gym_lib.create_gym_environment
32 | Runner.num_iterations = 500
33 | Runner.training_steps = 1000
34 | Runner.evaluation_steps = 1000
35 | Runner.max_steps_per_episode = 500
36 | 
37 | WrappedPrioritizedReplayBuffer.replay_capacity = 50000
38 | WrappedPrioritizedReplayBuffer.batch_size = 128
39 | 


--------------------------------------------------------------------------------
/dopamine/agents/rainbow/configs/rainbow_cartpole.gin:
--------------------------------------------------------------------------------
 1 | # Hyperparameters for a simple Rainbow-style Cartpole agent. The
 2 | # hyperparameters chosen achieve reasonable performance.
 3 | import dopamine.agents.dqn.dqn_agent
 4 | import dopamine.agents.rainbow.rainbow_agent
 5 | import dopamine.discrete_domains.gym_lib
 6 | import dopamine.discrete_domains.run_experiment
 7 | import dopamine.replay_memory.prioritized_replay_buffer
 8 | import gin.tf.external_configurables
 9 | 
10 | RainbowAgent.observation_shape = %gym_lib.CARTPOLE_OBSERVATION_SHAPE
11 | RainbowAgent.observation_dtype = %gym_lib.CARTPOLE_OBSERVATION_DTYPE
12 | RainbowAgent.stack_size = %gym_lib.CARTPOLE_STACK_SIZE
13 | RainbowAgent.network = @gym_lib.CartpoleRainbowNetwork
14 | RainbowAgent.num_atoms = 51
15 | RainbowAgent.vmax = 10.
16 | RainbowAgent.gamma = 0.99
17 | RainbowAgent.update_horizon = 3
18 | RainbowAgent.min_replay_history = 500
19 | RainbowAgent.update_period = 4
20 | RainbowAgent.target_update_period = 100
21 | RainbowAgent.epsilon_fn = @dqn_agent.identity_epsilon
22 | RainbowAgent.replay_scheme = 'prioritized'
23 | RainbowAgent.tf_device = '/gpu:0'  # use '/cpu:*' for non-GPU version
24 | RainbowAgent.optimizer = @tf.train.AdamOptimizer()
25 | 
26 | tf.train.AdamOptimizer.learning_rate = 0.09
27 | tf.train.AdamOptimizer.epsilon = 0.0003125
28 | 
29 | create_gym_environment.environment_name = 'CartPole'
30 | create_gym_environment.version = 'v0'
31 | create_agent.agent_name = 'rainbow'
32 | Runner.create_environment_fn = @gym_lib.create_gym_environment
33 | Runner.num_iterations = 500
34 | Runner.training_steps = 1000
35 | Runner.evaluation_steps = 1000
36 | Runner.max_steps_per_episode = 200  # Default max episode length.
37 | 
38 | WrappedPrioritizedReplayBuffer.replay_capacity = 50000
39 | WrappedPrioritizedReplayBuffer.batch_size = 128
40 | 


--------------------------------------------------------------------------------
/dopamine/agents/rainbow/configs/rainbow_dqnpro.gin:
--------------------------------------------------------------------------------
 1 | # Hyperparameters follow Hessel et al. (2018), except for sticky_actions,
 2 | # which was False (not using sticky actions) in the original paper.
 3 | import dopamine.agents.rainbow.rainbow_agent
 4 | import dopamine.discrete_domains.atari_lib
 5 | import dopamine.discrete_domains.run_experiment
 6 | import dopamine.replay_memory.prioritized_replay_buffer
 7 | import gin.tf.external_configurables
 8 | 
 9 | RainbowAgent.num_atoms = 51
10 | RainbowAgent.vmax = 10.
11 | RainbowAgent.gamma = 0.99
12 | RainbowAgent.mu = 0.0
13 | RainbowAgent.nu = 0.0
14 | RainbowAgent.update_horizon = 3
15 | RainbowAgent.min_replay_history = 20000  # agent steps
16 | RainbowAgent.update_period = 4
17 | RainbowAgent.target_update_period = 8000  # agent steps
18 | RainbowAgent.epsilon_train = 0.01
19 | RainbowAgent.epsilon_eval = 0.001
20 | RainbowAgent.epsilon_decay_period = 250000  # agent steps
21 | RainbowAgent.replay_scheme = 'prioritized'
22 | RainbowAgent.tf_device = '/gpu:0'  # use '/cpu:*' for non-GPU version
23 | RainbowAgent.optimizer = @tf.train.AdamOptimizer()
24 | 
25 | # Note these parameters are different from C51's.
26 | tf.train.AdamOptimizer.learning_rate = 0.0000625
27 | tf.train.AdamOptimizer.epsilon = 0.00015
28 | 
29 | atari_lib.create_atari_environment.game_name = 'Pong'
30 | # Deterministic ALE version used in the AAAI paper.
31 | atari_lib.create_atari_environment.sticky_actions = False
32 | create_agent.agent_name = 'rainbow'
33 | Runner.num_iterations = 200
34 | Runner.training_steps = 250000  # agent steps
35 | Runner.evaluation_steps = 125000  # agent steps
36 | Runner.max_steps_per_episode = 27000  # agent steps
37 | 
38 | AtariPreprocessing.terminal_on_life_loss = True
39 | 
40 | WrappedPrioritizedReplayBuffer.replay_capacity = 1000000
41 | WrappedPrioritizedReplayBuffer.batch_size = 32


--------------------------------------------------------------------------------
/dopamine/agents/rainbow/configs/rainbow_original.gin:
--------------------------------------------------------------------------------
 1 | # Hyperparameters follow Hessel et al. (2018), except for sticky_actions,
 2 | # which was False (not using sticky actions) in the original paper.
 3 | import dopamine.agents.rainbow.rainbow_agent
 4 | import dopamine.discrete_domains.atari_lib
 5 | import dopamine.discrete_domains.run_experiment
 6 | import dopamine.replay_memory.prioritized_replay_buffer
 7 | import gin.tf.external_configurables
 8 | 
 9 | RainbowAgent.num_atoms = 51
10 | RainbowAgent.vmax = 10.
11 | RainbowAgent.gamma = 0.99
12 | RainbowAgent.update_horizon = 3
13 | RainbowAgent.min_replay_history = 20000  # agent steps
14 | RainbowAgent.update_period = 4
15 | RainbowAgent.target_update_period = 8000  # agent steps
16 | RainbowAgent.epsilon_train = 0.01
17 | RainbowAgent.epsilon_eval = 0.001
18 | RainbowAgent.epsilon_decay_period = 250000  # agent steps
19 | RainbowAgent.replay_scheme = 'prioritized'
20 | RainbowAgent.tf_device = '/gpu:0'  # use '/cpu:*' for non-GPU version
21 | RainbowAgent.optimizer = @tf.train.AdamOptimizer()
22 | 
23 | # Note these parameters are different from C51's.
24 | tf.train.AdamOptimizer.learning_rate = 0.0000625
25 | tf.train.AdamOptimizer.epsilon = 0.00015
26 | 
27 | atari_lib.create_atari_environment.game_name = 'Pong'
28 | # Sticky actions with probability 0.25, as suggested by (Machado et al., 2017).
29 | atari_lib.create_atari_environment.sticky_actions = True
30 | create_agent.agent_name = 'rainbow'
31 | Runner.num_iterations = 200
32 | Runner.training_steps = 250000  # agent steps
33 | Runner.evaluation_steps = 125000  # agent steps
34 | Runner.max_steps_per_episode = 27000  # agent steps
35 | 
36 | WrappedPrioritizedReplayBuffer.replay_capacity = 1000000
37 | WrappedPrioritizedReplayBuffer.batch_size = 32
38 | 


--------------------------------------------------------------------------------
/dopamine/agents/rainbow/configs/rainbow_our_first_paper.gin:
--------------------------------------------------------------------------------
 1 | # Hyperparameters follow Hessel et al. (2018).
 2 | import dopamine.agents.rainbow.rainbow_agent
 3 | import dopamine.discrete_domains.atari_lib
 4 | import dopamine.discrete_domains.run_experiment
 5 | import dopamine.replay_memory.prioritized_replay_buffer
 6 | import gin.tf.external_configurables
 7 | 
 8 | RainbowAgent.num_atoms = 51
 9 | RainbowAgent.vmax = 10.
10 | RainbowAgent.gamma = 0.99
11 | RainbowAgent.mu = 0.0
12 | RainbowAgent.update_horizon = 3
13 | RainbowAgent.min_replay_history = 20000  # agent steps
14 | RainbowAgent.update_period = 4
15 | RainbowAgent.target_update_period = 8000  # agent steps
16 | RainbowAgent.epsilon_train = 0.01
17 | RainbowAgent.epsilon_eval = 0.001
18 | RainbowAgent.epsilon_decay_period = 250000  # agent steps
19 | RainbowAgent.replay_scheme = 'prioritized'
20 | RainbowAgent.tf_device = '/gpu:0'  # use '/cpu:*' for non-GPU version
21 | RainbowAgent.optimizer = @tf.train.AdamOptimizer()
22 | 
23 | # Note these parameters are different from C51's.
24 | tf.train.AdamOptimizer.learning_rate = 0.0000625
25 | tf.train.AdamOptimizer.epsilon = 0.00015
26 | 
27 | atari_lib.create_atari_environment.game_name = 'Pong'
28 | # Deterministic ALE version used in the AAAI paper.
29 | atari_lib.create_atari_environment.sticky_actions = False
30 | create_agent.agent_name = 'rainbow'
31 | Runner.num_iterations = 120
32 | Runner.training_steps = 250000  # agent steps
33 | Runner.evaluation_steps = 125000  # agent steps
34 | Runner.max_steps_per_episode = 27000  # agent steps
35 | 
36 | AtariPreprocessing.terminal_on_life_loss = True
37 | 
38 | WrappedPrioritizedReplayBuffer.replay_capacity = 1000000
39 | WrappedPrioritizedReplayBuffer.batch_size = 64
40 | 


--------------------------------------------------------------------------------
/dopamine/agents/rainbow/configs/rainbow_our_second_paper.gin:
--------------------------------------------------------------------------------
 1 | # Hyperparameters follow Hessel et al. (2018).
 2 | import dopamine.agents.rainbow.rainbow_agent
 3 | import dopamine.discrete_domains.atari_lib
 4 | import dopamine.discrete_domains.run_experiment
 5 | import dopamine.replay_memory.prioritized_replay_buffer
 6 | import gin.tf.external_configurables
 7 | 
 8 | RainbowAgent.num_atoms = 51
 9 | RainbowAgent.vmax = 10.
10 | RainbowAgent.gamma = 0.99
11 | RainbowAgent.mu = 0.0
12 | RainbowAgent.nu = 0.0
13 | RainbowAgent.update_horizon = 3
14 | RainbowAgent.min_replay_history = 20000  # agent steps
15 | RainbowAgent.update_period = 4
16 | RainbowAgent.target_update_period = 8000  # agent steps
17 | RainbowAgent.epsilon_train = 0.01
18 | RainbowAgent.epsilon_eval = 0.001
19 | RainbowAgent.epsilon_decay_period = 250000  # agent steps
20 | RainbowAgent.replay_scheme = 'prioritized'
21 | RainbowAgent.tf_device = '/gpu:0'  # use '/cpu:*' for non-GPU version
22 | RainbowAgent.optimizer = @tf.train.AdamOptimizer()
23 | 
24 | # Note these parameters are different from C51's.
25 | tf.train.AdamOptimizer.learning_rate = 0.0000625
26 | tf.train.AdamOptimizer.epsilon = 0.00015
27 | 
28 | atari_lib.create_atari_environment.game_name = 'Pong'
29 | # Deterministic ALE version used in the AAAI paper.
30 | atari_lib.create_atari_environment.sticky_actions = False
31 | create_agent.agent_name = 'rainbow'
32 | Runner.num_iterations = 250
33 | Runner.training_steps = 250000  # agent steps
34 | Runner.evaluation_steps = 125000  # agent steps
35 | Runner.max_steps_per_episode = 27000  # agent steps
36 | 
37 | AtariPreprocessing.terminal_on_life_loss = True
38 | 
39 | WrappedPrioritizedReplayBuffer.replay_capacity = 1000000
40 | WrappedPrioritizedReplayBuffer.batch_size = 64
41 | 


--------------------------------------------------------------------------------
/dopamine/agents/rainbow/configs/rainbow_profiling.gin:
--------------------------------------------------------------------------------
 1 | # Hyperparameters follow Hessel et al. (2018), except for sticky_actions,
 2 | # which was False (not using sticky actions) in the original paper.
 3 | import dopamine.agents.rainbow.rainbow_agent
 4 | import dopamine.discrete_domains.atari_lib
 5 | import dopamine.discrete_domains.run_experiment
 6 | import dopamine.replay_memory.prioritized_replay_buffer
 7 | import gin.tf.external_configurables
 8 | 
 9 | RainbowAgent.num_atoms = 51
10 | RainbowAgent.vmax = 10.
11 | RainbowAgent.gamma = 0.99
12 | RainbowAgent.update_horizon = 3
13 | RainbowAgent.min_replay_history = 100  # agent steps
14 | RainbowAgent.update_period = 4
15 | RainbowAgent.target_update_period = 8000  # agent steps
16 | RainbowAgent.epsilon_fn = @dqn_agent.identity_epsilon
17 | RainbowAgent.epsilon_train = 0.0
18 | RainbowAgent.epsilon_eval = 0.0
19 | RainbowAgent.epsilon_decay_period = 250000  # agent steps
20 | RainbowAgent.replay_scheme = 'prioritized'
21 | RainbowAgent.tf_device = '/gpu:0'  # use '/cpu:*' for non-GPU version
22 | RainbowAgent.optimizer = @tf.train.AdamOptimizer()
23 | 
24 | # Note these parameters are different from C51's.
25 | tf.train.AdamOptimizer.learning_rate = 0.0000625
26 | tf.train.AdamOptimizer.epsilon = 0.00015
27 | 
28 | atari_lib.create_atari_environment.game_name = 'Pong'
29 | # Sticky actions with probability 0.25, as suggested by (Machado et al., 2017).
30 | atari_lib.create_atari_environment.sticky_actions = True
31 | create_runner.schedule = 'continuous_train'
32 | create_agent.agent_name = 'rainbow'
33 | Runner.num_iterations = 200
34 | Runner.training_steps = 250000  # agent steps
35 | Runner.evaluation_steps = 125000  # agent steps
36 | Runner.max_steps_per_episode = 27000  # agent steps
37 | 
38 | WrappedPrioritizedReplayBuffer.replay_capacity = 1000000
39 | WrappedPrioritizedReplayBuffer.batch_size = 32
40 | 


--------------------------------------------------------------------------------
/dopamine/colab/README.md:
--------------------------------------------------------------------------------
 1 | # Colabs
 2 | 
 3 | This directory contains
 4 | [`utils.py`](https://github.com/google/dopamine/blob/master/dopamine/colab/utils.py),
 5 | which provides a number of useful utilities for loading experiment statistics.
 6 | 
 7 | We also provide a set of colabs to help illustrate how you can use Dopamine.
 8 | 
 9 | ## Agents
10 | 
11 | In this
12 | [colab](https://colab.research.google.com/github/google/dopamine/blob/master/dopamine/colab/agents.ipynb)
13 | we illustrate how to create a new agent by either subclassing
14 | [`DQN`](https://github.com/google/dopamine/blob/master/dopamine/agents/dqn/dqn_agent.py)
15 | or by creating a new agent from scratch.
16 | 
17 | ## Loading statistics
18 | 
19 | In this
20 | [colab](https://colab.research.google.com/github/google/dopamine/blob/master/dopamine/colab/load_statistics.ipynb)
21 | we illustrate how to load and visualize the logs data produced by Dopamine.
22 | 
23 | ## Visualizing trained agents
24 | In this
25 | [colab](https://colab.research.google.com/github/google/dopamine/blob/master/dopamine/colab/agent_visualizer.ipynb)
26 | we illustrate how to visualize a trained agent using the visualization utilities
27 | provided with Dopamine.
28 | 
29 | In [this colab](https://colab.research.google.com/github/google/dopamine/blob/master/dopamine/colab/jax_agent_visualizer.ipynb)
30 | we can visualize trained agents' performance with the agents trained with the
31 | [JAX implementations](https://github.com/google/dopamine/tree/master/dopamine/jax).
32 | 
33 | ## Visualizing with Tensorboard
34 | In this
35 | [colab](https://colab.research.google.com/github/google/dopamine/blob/master/dopamine/colab/tensorboard.ipynb)
36 | we illustrate how to download and visualize different agents with Tensorboard.
37 | 
38 | ## Training on Cartpole
39 | In this
40 | [colab](https://colab.research.google.com/github/google/dopamine/blob/master/dopamine/colab/cartpole.ipynb)
41 | we illustrate how to train DQN and C51 on the Cartpole environment.
42 | 


--------------------------------------------------------------------------------
/dopamine/colab/__init__.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2018 The Dopamine Authors.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #      http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | 


--------------------------------------------------------------------------------
/dopamine/continuous_domains/__init__.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | """Copyright 2021 The Dopamine Authors.
 3 | 
 4 | Licensed under the Apache License, Version 2.0 (the "License");
 5 | you may not use this file except in compliance with the License.
 6 | You may obtain a copy of the License at
 7 | 
 8 |     http://www.apache.org/licenses/LICENSE-2.0
 9 | 
10 | Unless required by applicable law or agreed to in writing, software
11 | distributed under the License is distributed on an "AS IS" BASIS,
12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | See the License for the specific language governing permissions and
14 | limitations under the License.
15 | """
16 | 


--------------------------------------------------------------------------------
/dopamine/continuous_domains/train.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2021 The Dopamine Authors.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #      http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | r"""The entry point for running a Dopamine agent on continuous control envs.
16 | 
17 | """
18 | 
19 | from absl import app
20 | from absl import flags
21 | from absl import logging
22 | 
23 | from dopamine.continuous_domains import run_experiment
24 | 
25 | flags.DEFINE_string('base_dir', None,
26 |                     'Base directory to host all required sub-directories.')
27 | flags.DEFINE_multi_string(
28 |     'gin_files', [], 'List of paths to gin configuration files (e.g.'
29 |     '"dopamine/jax/agents/sac/configs/sac.gin").')
30 | flags.DEFINE_multi_string(
31 |     'gin_bindings', [],
32 |     'Gin bindings to override the values set in the config files.')
33 | 
34 | FLAGS = flags.FLAGS
35 | 
36 | 
37 | def main(unused_argv):
38 |   """Main method.
39 | 
40 |   Args:
41 |     unused_argv: Arguments (unused).
42 |   """
43 |   logging.set_verbosity(logging.INFO)
44 |   base_dir = FLAGS.base_dir
45 |   gin_files = FLAGS.gin_files
46 |   gin_bindings = FLAGS.gin_bindings
47 | 
48 |   run_experiment.load_gin_configs(gin_files, gin_bindings)
49 |   runner = run_experiment.create_continuous_runner(base_dir)
50 |   runner.run_experiment()
51 | 
52 | 
53 | if __name__ == '__main__':
54 |   flags.mark_flag_as_required('base_dir')
55 |   app.run(main)
56 | 


--------------------------------------------------------------------------------
/dopamine/discrete_domains/__init__.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | """Copyright 2018 The Dopamine Authors.
 3 | 
 4 | Licensed under the Apache License, Version 2.0 (the "License");
 5 | you may not use this file except in compliance with the License.
 6 | You may obtain a copy of the License at
 7 | 
 8 |     http://www.apache.org/licenses/LICENSE-2.0
 9 | 
10 | Unless required by applicable law or agreed to in writing, software
11 | distributed under the License is distributed on an "AS IS" BASIS,
12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | See the License for the specific language governing permissions and
14 | limitations under the License.
15 | """
16 | 


--------------------------------------------------------------------------------
/dopamine/discrete_domains/iteration_statistics.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2018 The Dopamine Authors.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #      http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | """A class for storing iteration-specific metrics.
16 | """
17 | 
18 | from __future__ import absolute_import
19 | from __future__ import division
20 | from __future__ import print_function
21 | 
22 | 
23 | class IterationStatistics(object):
24 |   """A class for storing iteration-specific metrics.
25 | 
26 |   The internal format is as follows: we maintain a mapping from keys to lists.
27 |   Each list contains all the values corresponding to the given key.
28 | 
29 |   For example, self.data_lists['train_episode_returns'] might contain the
30 |     per-episode returns achieved during this iteration.
31 | 
32 |   Attributes:
33 |     data_lists: dict mapping each metric_name (str) to a list of said metric
34 |       across episodes.
35 |   """
36 | 
37 |   def __init__(self):
38 |     self.data_lists = {}
39 | 
40 |   def append(self, data_pairs):
41 |     """Add the given values to their corresponding key-indexed lists.
42 | 
43 |     Args:
44 |       data_pairs: A dictionary of key-value pairs to be recorded.
45 |     """
46 |     for key, value in data_pairs.items():
47 |       if key not in self.data_lists:
48 |         self.data_lists[key] = []
49 |       self.data_lists[key].append(value)
50 | 


--------------------------------------------------------------------------------
/dopamine/discrete_domains/train.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Lint as: python3
 3 | # Copyright 2018 The Dopamine Authors.
 4 | #
 5 | # Licensed under the Apache License, Version 2.0 (the "License");
 6 | # you may not use this file except in compliance with the License.
 7 | # You may obtain a copy of the License at
 8 | #
 9 | #      http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | r"""The entry point for running a Dopamine agent.
17 | 
18 | """
19 | 
20 | from absl import app
21 | from absl import flags
22 | from absl import logging
23 | 
24 | from dopamine.discrete_domains import run_experiment
25 | import tensorflow as tf
26 | 
27 | 
28 | flags.DEFINE_string('base_dir', None,
29 |                     'Base directory to host all required sub-directories.')
30 | flags.DEFINE_multi_string(
31 |     'gin_files', [], 'List of paths to gin configuration files (e.g.'
32 |     '"dopamine/agents/dqn/dqn.gin").')
33 | flags.DEFINE_multi_string(
34 |     'gin_bindings', [],
35 |     'Gin bindings to override the values set in the config files '
36 |     '(e.g. "DQNAgent.epsilon_train=0.1",'
37 |     '      "create_environment.game_name="Pong"").')
38 | 
39 | 
40 | FLAGS = flags.FLAGS
41 | 
42 | 
43 | 
44 | 
45 | def main(unused_argv):
46 |   """Main method.
47 | 
48 |   Args:
49 |     unused_argv: Arguments (unused).
50 |   """
51 |   logging.set_verbosity(logging.INFO)
52 |   tf.compat.v1.disable_v2_behavior()
53 | 
54 |   base_dir = FLAGS.base_dir
55 |   gin_files = FLAGS.gin_files
56 |   gin_bindings = FLAGS.gin_bindings
57 |   run_experiment.load_gin_configs(gin_files, gin_bindings)
58 |   runner = run_experiment.create_runner(base_dir)
59 |   runner.run_experiment()
60 | 
61 | 
62 | if __name__ == '__main__':
63 |   flags.mark_flag_as_required('base_dir')
64 |   app.run(main)
65 | 


--------------------------------------------------------------------------------
/dopamine/jax/__init__.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2018 The Dopamine Authors.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #      http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | 


--------------------------------------------------------------------------------
/dopamine/jax/agents/__init__.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2018 The Dopamine Authors.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #      http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | 


--------------------------------------------------------------------------------
/dopamine/jax/agents/dqn/__init__.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2018 The Dopamine Authors.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #      http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | 


--------------------------------------------------------------------------------
/dopamine/jax/agents/dqn/configs/dqn.gin:
--------------------------------------------------------------------------------
 1 | # Hyperparameters follow the classic Nature DQN, but we modify as necessary to
 2 | # match those used in Rainbow (Hessel et al., 2018), to ensure apples-to-apples
 3 | # comparison.
 4 | import dopamine.discrete_domains.atari_lib
 5 | import dopamine.discrete_domains.run_experiment
 6 | import dopamine.jax.agents.dqn.dqn_agent
 7 | import dopamine.replay_memory.circular_replay_buffer
 8 | 
 9 | JaxDQNAgent.gamma = 0.99
10 | JaxDQNAgent.update_horizon = 1
11 | JaxDQNAgent.min_replay_history = 20000  # agent steps
12 | JaxDQNAgent.update_period = 4
13 | JaxDQNAgent.target_update_period = 8000  # agent steps
14 | JaxDQNAgent.epsilon_train = 0.01
15 | JaxDQNAgent.epsilon_eval = 0.001
16 | JaxDQNAgent.epsilon_decay_period = 250000  # agent steps
17 | # Note: We are using the Adam optimizer by default for JaxDQN, which differs
18 | #       from the original NatureDQN and the dopamine TensorFlow version. In
19 | #       the experiments we have ran, we have found that using Adam yields
20 | #       improved training performance.
21 | JaxDQNAgent.optimizer = 'adam'
22 | create_optimizer.learning_rate = 6.25e-5
23 | create_optimizer.eps = 1.5e-4
24 | 
25 | atari_lib.create_atari_environment.game_name = 'Pong'
26 | # Sticky actions with probability 0.25, as suggested by (Machado et al., 2017).
27 | atari_lib.create_atari_environment.sticky_actions = True
28 | create_runner.schedule = 'continuous_train'
29 | create_agent.agent_name = 'jax_dqn'
30 | create_agent.debug_mode = True
31 | Runner.num_iterations = 200
32 | Runner.training_steps = 250000  # agent steps
33 | Runner.evaluation_steps = 125000  # agent steps
34 | Runner.max_steps_per_episode = 27000  # agent steps
35 | 
36 | OutOfGraphReplayBuffer.replay_capacity = 1000000
37 | OutOfGraphReplayBuffer.batch_size = 32
38 | 


--------------------------------------------------------------------------------
/dopamine/jax/agents/dqn/configs/dqn_acrobot.gin:
--------------------------------------------------------------------------------
 1 | # Hyperparameters for a simple DQN-style Acrobot agent. The hyperparameters
 2 | # chosen achieve reasonable performance.
 3 | import dopamine.discrete_domains.gym_lib
 4 | import dopamine.discrete_domains.run_experiment
 5 | import dopamine.jax.agents.dqn.dqn_agent
 6 | import dopamine.jax.networks
 7 | import dopamine.replay_memory.circular_replay_buffer
 8 | import flax
 9 | 
10 | JaxDQNAgent.observation_shape = %gym_lib.ACROBOT_OBSERVATION_SHAPE
11 | JaxDQNAgent.observation_dtype = %jax_networks.ACROBOT_OBSERVATION_DTYPE
12 | JaxDQNAgent.stack_size = %gym_lib.ACROBOT_STACK_SIZE
13 | JaxDQNAgent.network = @networks.ClassicControlDQNNetwork
14 | JaxDQNAgent.gamma = 0.99
15 | JaxDQNAgent.update_horizon = 1
16 | JaxDQNAgent.min_replay_history = 500
17 | JaxDQNAgent.update_period = 4
18 | JaxDQNAgent.target_update_period = 100
19 | JaxDQNAgent.epsilon_fn = @dqn_agent.identity_epsilon
20 | JaxDQNAgent.optimizer = 'adam'
21 | create_optimizer.learning_rate = 0.001
22 | create_optimizer.eps = 3.125e-4
23 | 
24 | ClassicControlDQNNetwork.min_vals = %jax_networks.ACROBOT_MIN_VALS
25 | ClassicControlDQNNetwork.max_vals = %jax_networks.ACROBOT_MAX_VALS
26 | 
27 | create_gym_environment.environment_name = 'Acrobot'
28 | create_gym_environment.version = 'v1'
29 | create_runner.schedule = 'continuous_train'
30 | create_agent.agent_name = 'jax_dqn'
31 | create_agent.debug_mode = True
32 | TrainRunner.create_environment_fn = @gym_lib.create_gym_environment
33 | Runner.num_iterations = 500
34 | Runner.training_steps = 1000
35 | Runner.evaluation_steps = 1000
36 | Runner.max_steps_per_episode = 500
37 | 
38 | OutOfGraphReplayBuffer.replay_capacity = 50000
39 | OutOfGraphReplayBuffer.batch_size = 128
40 | 


--------------------------------------------------------------------------------
/dopamine/jax/agents/dqn/configs/dqn_cartpole.gin:
--------------------------------------------------------------------------------
 1 | # Hyperparameters for a simple DQN-style Cartpole agent. The hyperparameters
 2 | # chosen achieve reasonable performance.
 3 | import dopamine.discrete_domains.gym_lib
 4 | import dopamine.discrete_domains.run_experiment
 5 | import dopamine.jax.agents.dqn.dqn_agent
 6 | import dopamine.jax.networks
 7 | import dopamine.replay_memory.circular_replay_buffer
 8 | import flax
 9 | 
10 | JaxDQNAgent.observation_shape = %gym_lib.CARTPOLE_OBSERVATION_SHAPE
11 | JaxDQNAgent.observation_dtype = %jax_networks.CARTPOLE_OBSERVATION_DTYPE
12 | JaxDQNAgent.stack_size = %gym_lib.CARTPOLE_STACK_SIZE
13 | JaxDQNAgent.network = @networks.ClassicControlDQNNetwork
14 | JaxDQNAgent.gamma = 0.99
15 | JaxDQNAgent.update_horizon = 1
16 | JaxDQNAgent.min_replay_history = 500
17 | JaxDQNAgent.update_period = 4
18 | JaxDQNAgent.target_update_period = 100
19 | JaxDQNAgent.epsilon_fn = @dqn_agent.identity_epsilon
20 | JaxDQNAgent.optimizer = 'adam'
21 | create_optimizer.learning_rate = 0.001
22 | create_optimizer.eps = 3.125e-4
23 | 
24 | ClassicControlDQNNetwork.min_vals = %jax_networks.CARTPOLE_MIN_VALS
25 | ClassicControlDQNNetwork.max_vals = %jax_networks.CARTPOLE_MAX_VALS
26 | 
27 | create_gym_environment.environment_name = 'CartPole'
28 | create_gym_environment.version = 'v0'
29 | create_runner.schedule = 'continuous_train'
30 | create_agent.agent_name = 'jax_dqn'
31 | create_agent.debug_mode = True
32 | TrainRunner.create_environment_fn = @gym_lib.create_gym_environment
33 | Runner.num_iterations = 500
34 | Runner.training_steps = 1000
35 | Runner.evaluation_steps = 1000
36 | Runner.max_steps_per_episode = 200  # Default max episode length.
37 | 
38 | OutOfGraphReplayBuffer.replay_capacity = 50000
39 | OutOfGraphReplayBuffer.batch_size = 128
40 | 


--------------------------------------------------------------------------------
/dopamine/jax/agents/dqn/configs/dqn_lunarlander.gin:
--------------------------------------------------------------------------------
 1 | # Hyperparameters for a simple DQN-style LunarLander agent. The hyperparameters
 2 | # chosen achieve reasonable performance.
 3 | import dopamine.discrete_domains.gym_lib
 4 | import dopamine.discrete_domains.run_experiment
 5 | import dopamine.jax.agents.dqn.dqn_agent
 6 | import dopamine.jax.networks
 7 | import dopamine.replay_memory.circular_replay_buffer
 8 | import flax
 9 | 
10 | JaxDQNAgent.observation_shape = %gym_lib.LUNAR_OBSERVATION_SHAPE
11 | JaxDQNAgent.observation_dtype = %jax_networks.LUNAR_OBSERVATION_DTYPE
12 | JaxDQNAgent.stack_size = %gym_lib.LUNAR_STACK_SIZE
13 | JaxDQNAgent.network = @networks.ClassicControlDQNNetwork
14 | JaxDQNAgent.gamma = 0.99
15 | JaxDQNAgent.update_horizon = 1
16 | JaxDQNAgent.min_replay_history = 500
17 | JaxDQNAgent.update_period = 4
18 | JaxDQNAgent.target_update_period = 100
19 | JaxDQNAgent.epsilon_fn = @dqn_agent.identity_epsilon
20 | JaxDQNAgent.optimizer = 'adam'
21 | create_optimizer.learning_rate = 0.001
22 | create_optimizer.eps = 3.125e-4
23 | 
24 | create_gym_environment.environment_name = 'LunarLander'
25 | create_gym_environment.version = 'v2'
26 | create_runner.schedule = 'continuous_train'
27 | create_agent.agent_name = 'jax_dqn'
28 | create_agent.debug_mode = True
29 | TrainRunner.create_environment_fn = @gym_lib.create_gym_environment
30 | Runner.num_iterations = 125
31 | Runner.training_steps = 4000
32 | Runner.evaluation_steps = 1000
33 | Runner.max_steps_per_episode = 1000
34 | 
35 | OutOfGraphReplayBuffer.replay_capacity = 50000
36 | OutOfGraphReplayBuffer.batch_size = 128
37 | 


--------------------------------------------------------------------------------
/dopamine/jax/agents/dqn/configs/dqn_mountaincar.gin:
--------------------------------------------------------------------------------
 1 | # Hyperparameters for a simple DQN-style MountainCar agent. The hyperparameters
 2 | # chosen achieve reasonable performance.
 3 | import dopamine.discrete_domains.gym_lib
 4 | import dopamine.discrete_domains.run_experiment
 5 | import dopamine.jax.agents.dqn.dqn_agent
 6 | import dopamine.jax.networks
 7 | import dopamine.replay_memory.circular_replay_buffer
 8 | import flax
 9 | 
10 | JaxDQNAgent.observation_shape = %gym_lib.MOUNTAINCAR_OBSERVATION_SHAPE
11 | JaxDQNAgent.observation_dtype = %jax_networks.MOUNTAINCAR_OBSERVATION_DTYPE
12 | JaxDQNAgent.stack_size = %gym_lib.MOUNTAINCAR_STACK_SIZE
13 | JaxDQNAgent.network = @networks.ClassicControlDQNNetwork
14 | JaxDQNAgent.gamma = 0.99
15 | JaxDQNAgent.update_horizon = 1
16 | JaxDQNAgent.min_replay_history = 500
17 | JaxDQNAgent.update_period = 4
18 | JaxDQNAgent.target_update_period = 100
19 | JaxDQNAgent.epsilon_fn = @dqn_agent.identity_epsilon
20 | JaxDQNAgent.optimizer = 'adam'
21 | create_optimizer.learning_rate = 0.001
22 | create_optimizer.eps = 3.125e-4
23 | 
24 | ClassicControlDQNNetwork.min_vals = %jax_networks.MOUNTAINCAR_MIN_VALS
25 | ClassicControlDQNNetwork.max_vals = %jax_networks.MOUNTAINCAR_MAX_VALS
26 | 
27 | create_gym_environment.environment_name = 'MountainCar'
28 | create_gym_environment.version = 'v0'
29 | create_runner.schedule = 'continuous_train'
30 | create_agent.agent_name = 'jax_dqn'
31 | create_agent.debug_mode = True
32 | TrainRunner.create_environment_fn = @gym_lib.create_gym_environment
33 | Runner.num_iterations = 500
34 | Runner.training_steps = 1000
35 | Runner.max_steps_per_episode = 600  # Default max episode length.
36 | 
37 | OutOfGraphReplayBuffer.replay_capacity = 50000
38 | OutOfGraphReplayBuffer.batch_size = 128
39 | 


--------------------------------------------------------------------------------
/dopamine/jax/agents/dqn/configs/dqn_profiling.gin:
--------------------------------------------------------------------------------
 1 | # Hyperparameters follow the classic Nature DQN, but we modify as necessary to
 2 | # match those used in Rainbow (Hessel et al., 2018), to ensure apples-to-apples
 3 | # comparison.
 4 | import dopamine.discrete_domains.atari_lib
 5 | import dopamine.discrete_domains.run_experiment
 6 | import dopamine.jax.agents.dqn.dqn_agent
 7 | import dopamine.replay_memory.circular_replay_buffer
 8 | 
 9 | JaxDQNAgent.gamma = 0.99
10 | JaxDQNAgent.update_horizon = 1
11 | JaxDQNAgent.min_replay_history = 100  # agent steps
12 | JaxDQNAgent.update_period = 4
13 | JaxDQNAgent.target_update_period = 8000  # agent steps
14 | JaxDQNAgent.epsilon_fn = @dqn_agent.identity_epsilon
15 | JaxDQNAgent.epsilon_train = 0.0
16 | JaxDQNAgent.epsilon_eval = 0.0
17 | 
18 | atari_lib.create_atari_environment.game_name = 'Pong'
19 | # Sticky actions with probability 0.25, as suggested by (Machado et al., 2017).
20 | atari_lib.create_atari_environment.sticky_actions = True
21 | create_runner.schedule = 'continuous_train'
22 | create_agent.agent_name = 'jax_dqn'
23 | create_agent.debug_mode = True
24 | Runner.num_iterations = 200
25 | Runner.training_steps = 250000  # agent steps
26 | Runner.evaluation_steps = 125000  # agent steps
27 | Runner.max_steps_per_episode = 27000  # agent steps
28 | 
29 | OutOfGraphReplayBuffer.replay_capacity = 1000000
30 | OutOfGraphReplayBuffer.batch_size = 32
31 | 


--------------------------------------------------------------------------------
/dopamine/jax/agents/full_rainbow/__init__.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2018 The Dopamine Authors.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #      http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | 


--------------------------------------------------------------------------------
/dopamine/jax/agents/full_rainbow/configs/full_rainbow.gin:
--------------------------------------------------------------------------------
 1 | # Hyperparameters follow Hessel et al. (2018), except for sticky_actions,
 2 | # which was False (not using sticky actions) in the original paper.
 3 | import dopamine.jax.agents.full_rainbow.full_rainbow_agent
 4 | import dopamine.jax.agents.dqn.dqn_agent
 5 | import dopamine.jax.networks
 6 | import dopamine.discrete_domains.atari_lib
 7 | import dopamine.discrete_domains.run_experiment
 8 | import dopamine.replay_memory.prioritized_replay_buffer
 9 | 
10 | JaxDQNAgent.gamma = 0.99
11 | JaxDQNAgent.update_horizon = 3
12 | JaxDQNAgent.min_replay_history = 20000  # agent steps
13 | JaxDQNAgent.update_period = 4
14 | JaxDQNAgent.target_update_period = 8000  # agent steps
15 | JaxDQNAgent.epsilon_train = 0.01
16 | JaxDQNAgent.epsilon_eval = 0.001
17 | JaxDQNAgent.epsilon_decay_period = 250000  # agent steps
18 | JaxDQNAgent.optimizer = 'adam'
19 | 
20 | JaxFullRainbowAgent.noisy = True
21 | JaxFullRainbowAgent.dueling = True
22 | JaxFullRainbowAgent.double_dqn = True
23 | JaxFullRainbowAgent.num_atoms = 51
24 | JaxFullRainbowAgent.vmax = 10.
25 | JaxFullRainbowAgent.replay_scheme = 'prioritized'
26 | 
27 | # Note these parameters are different from C51's.
28 | create_optimizer.learning_rate = 0.0000625
29 | create_optimizer.eps = 0.00015
30 | 
31 | atari_lib.create_atari_environment.game_name = 'Pong'
32 | # Sticky actions with probability 0.25, as suggested by (Machado et al., 2017).
33 | atari_lib.create_atari_environment.sticky_actions = True
34 | create_runner.schedule = 'continuous_train'
35 | create_agent.agent_name = 'full_rainbow'
36 | create_agent.debug_mode = True
37 | Runner.num_iterations = 200
38 | Runner.training_steps = 250000  # agent steps
39 | Runner.evaluation_steps = 125000  # agent steps
40 | Runner.max_steps_per_episode = 27000  # agent steps
41 | 
42 | OutOfGraphPrioritizedReplayBuffer.replay_capacity = 1000000
43 | OutOfGraphPrioritizedReplayBuffer.batch_size = 32
44 | 


--------------------------------------------------------------------------------
/dopamine/jax/agents/full_rainbow/configs/full_rainbow_profiling.gin:
--------------------------------------------------------------------------------
 1 | # Hyperparameters follow Hessel et al. (2018), except for sticky_actions,
 2 | # which was False (not using sticky actions) in the original paper.
 3 | import dopamine.jax.agents.full_rainbow.full_rainbow_agent
 4 | import dopamine.jax.agents.dqn.dqn_agent
 5 | import dopamine.jax.networks
 6 | import dopamine.discrete_domains.atari_lib
 7 | import dopamine.discrete_domains.run_experiment
 8 | import dopamine.replay_memory.prioritized_replay_buffer
 9 | 
10 | JaxDQNAgent.gamma = 0.99
11 | JaxDQNAgent.update_horizon = 3
12 | JaxDQNAgent.min_replay_history = 100  # agent steps
13 | JaxDQNAgent.update_period = 4
14 | JaxDQNAgent.target_update_period = 8000  # agent steps
15 | JaxDQNAgent.epsilon_train = 0.0
16 | JaxDQNAgent.epsilon_eval = 0.0
17 | JaxDQNAgent.epsilon_decay_period = 250000  # agent steps
18 | JaxDQNAgent.optimizer = 'adam'
19 | 
20 | JaxFullRainbowAgent.num_atoms = 51
21 | JaxFullRainbowAgent.vmax = 10.
22 | JaxFullRainbowAgent.replay_scheme = 'prioritized'
23 | JaxFullRainbowAgent.noisy = True
24 | JaxFullRainbowAgent.dueling = True
25 | JaxFullRainbowAgent.double_dqn = True
26 | JaxFullRainbowAgent.epsilon_fn = @dqn_agent.identity_epsilon
27 | 
28 | # Note these parameters are different from C51's.
29 | create_optimizer.learning_rate = 0.0000625
30 | create_optimizer.eps = 0.00015
31 | 
32 | atari_lib.create_atari_environment.game_name = 'Pong'
33 | # Sticky actions with probability 0.25, as suggested by (Machado et al., 2017).
34 | atari_lib.create_atari_environment.sticky_actions = True
35 | create_runner.schedule = 'continuous_train'
36 | create_agent.agent_name = 'jax_rainbow'
37 | create_agent.debug_mode = True
38 | Runner.num_iterations = 200
39 | Runner.training_steps = 250000  # agent steps
40 | Runner.evaluation_steps = 125000  # agent steps
41 | Runner.max_steps_per_episode = 27000  # agent steps
42 | 
43 | OutOfGraphPrioritizedReplayBuffer.replay_capacity = 1000000
44 | OutOfGraphPrioritizedReplayBuffer.batch_size = 32
45 | 


--------------------------------------------------------------------------------
/dopamine/jax/agents/implicit_quantile/__init__.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2018 The Dopamine Authors.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #      http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | 


--------------------------------------------------------------------------------
/dopamine/jax/agents/implicit_quantile/configs/implicit_quantile.gin:
--------------------------------------------------------------------------------
 1 | # Hyperparameters follow Dabney et al. (2018), but we modify as necessary to
 2 | # match those used in Rainbow (Hessel et al., 2018), to ensure apples-to-apples
 3 | # comparison.
 4 | import dopamine.jax.agents.implicit_quantile.implicit_quantile_agent
 5 | import dopamine.discrete_domains.atari_lib
 6 | import dopamine.discrete_domains.run_experiment
 7 | import dopamine.replay_memory.circular_replay_buffer
 8 | 
 9 | JaxImplicitQuantileAgent.kappa = 1.0
10 | JaxImplicitQuantileAgent.num_tau_samples = 64
11 | JaxImplicitQuantileAgent.num_tau_prime_samples = 64
12 | JaxImplicitQuantileAgent.num_quantile_samples = 32
13 | JaxImplicitQuantileAgent.gamma = 0.99
14 | JaxImplicitQuantileAgent.update_horizon = 3
15 | JaxImplicitQuantileAgent.min_replay_history = 20000 # agent steps
16 | JaxImplicitQuantileAgent.update_period = 4
17 | JaxImplicitQuantileAgent.target_update_period = 8000 # agent steps
18 | JaxImplicitQuantileAgent.epsilon_train = 0.01
19 | JaxImplicitQuantileAgent.epsilon_eval = 0.001
20 | JaxImplicitQuantileAgent.epsilon_decay_period = 250000  # agent steps
21 | JaxImplicitQuantileAgent.optimizer = 'adam'
22 | create_optimizer.learning_rate = 0.00005
23 | create_optimizer.eps = 0.0003125
24 | 
25 | atari_lib.create_atari_environment.game_name = 'Pong'
26 | # Sticky actions with probability 0.25, as suggested by (Machado et al., 2017).
27 | atari_lib.create_atari_environment.sticky_actions = True
28 | create_runner.schedule = 'continuous_train'
29 | create_agent.agent_name = 'jax_implicit_quantile'
30 | create_agent.debug_mode = True
31 | Runner.num_iterations = 200
32 | Runner.training_steps = 250000
33 | Runner.evaluation_steps = 125000
34 | Runner.max_steps_per_episode = 27000
35 | 
36 | OutOfGraphReplayBuffer.replay_capacity = 1000000
37 | OutOfGraphReplayBuffer.batch_size = 32
38 | 


--------------------------------------------------------------------------------
/dopamine/jax/agents/implicit_quantile/configs/implicit_quantile_profiling.gin:
--------------------------------------------------------------------------------
 1 | # Hyperparameters follow Dabney et al. (2018), but we modify as necessary to
 2 | # match those used in Rainbow (Hessel et al., 2018), to ensure apples-to-apples
 3 | # comparison.
 4 | import dopamine.jax.agents.implicit_quantile.implicit_quantile_agent
 5 | import dopamine.discrete_domains.atari_lib
 6 | import dopamine.discrete_domains.run_experiment
 7 | import dopamine.jax.agents.dqn.dqn_agent
 8 | import dopamine.replay_memory.circular_replay_buffer
 9 | 
10 | JaxImplicitQuantileAgent.kappa = 1.0
11 | JaxImplicitQuantileAgent.num_tau_samples = 64
12 | JaxImplicitQuantileAgent.num_tau_prime_samples = 64
13 | JaxImplicitQuantileAgent.num_quantile_samples = 32
14 | JaxImplicitQuantileAgent.gamma = 0.99
15 | JaxImplicitQuantileAgent.update_horizon = 3
16 | JaxImplicitQuantileAgent.min_replay_history = 100 # agent steps
17 | JaxImplicitQuantileAgent.update_period = 4
18 | JaxImplicitQuantileAgent.target_update_period = 8000 # agent steps
19 | JaxImplicitQuantileAgent.epsilon_fn = @dqn_agent.identity_epsilon
20 | JaxImplicitQuantileAgent.epsilon_train = 0.0
21 | JaxImplicitQuantileAgent.epsilon_eval = 0.0
22 | JaxImplicitQuantileAgent.optimizer = 'adam'
23 | create_optimizer.learning_rate = 0.00005
24 | create_optimizer.eps = 0.0003125
25 | 
26 | atari_lib.create_atari_environment.game_name = 'Pong'
27 | # Sticky actions with probability 0.25, as suggested by (Machado et al., 2017).
28 | atari_lib.create_atari_environment.sticky_actions = True
29 | create_runner.schedule = 'continuous_train'
30 | create_agent.agent_name = 'jax_implicit_quantile'
31 | create_agent.debug_mode = True
32 | Runner.num_iterations = 200
33 | Runner.training_steps = 250000
34 | Runner.evaluation_steps = 125000
35 | Runner.max_steps_per_episode = 27000
36 | 
37 | OutOfGraphReplayBuffer.replay_capacity = 1000000
38 | OutOfGraphReplayBuffer.batch_size = 32
39 | 


--------------------------------------------------------------------------------
/dopamine/jax/agents/quantile/__init__.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2018 The Dopamine Authors.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #      http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | 


--------------------------------------------------------------------------------
/dopamine/jax/agents/quantile/configs/quantile.gin:
--------------------------------------------------------------------------------
 1 | # Hyperparameters follow Dabney et al. (2017) but we modify as necessary to
 2 | # match those used in Rainbow (Hessel et al., 2018), to ensure apples-to-apples
 3 | # comparison.
 4 | import dopamine.jax.agents.quantile.quantile_agent
 5 | import dopamine.discrete_domains.atari_lib
 6 | import dopamine.discrete_domains.run_experiment
 7 | import dopamine.replay_memory.prioritized_replay_buffer
 8 | 
 9 | JaxQuantileAgent.kappa = 1.0
10 | JaxQuantileAgent.num_atoms = 200
11 | JaxQuantileAgent.gamma = 0.99
12 | JaxQuantileAgent.update_horizon = 3
13 | JaxQuantileAgent.min_replay_history = 20000 # agent steps
14 | JaxQuantileAgent.update_period = 4
15 | JaxQuantileAgent.target_update_period = 8000 # agent steps
16 | JaxQuantileAgent.epsilon_train = 0.01
17 | JaxQuantileAgent.epsilon_eval = 0.001
18 | JaxQuantileAgent.epsilon_decay_period = 250000 # agent steps
19 | JaxQuantileAgent.replay_scheme = 'prioritized'
20 | JaxQuantileAgent.optimizer = 'adam'
21 | 
22 | create_optimizer.learning_rate = 0.00005
23 | create_optimizer.eps = 0.0003125
24 | 
25 | atari_lib.create_atari_environment.game_name = 'Pong'
26 | atari_lib.create_atari_environment.sticky_actions = True
27 | create_runner.schedule = 'continuous_train'
28 | create_agent.agent_name = 'jax_quantile'
29 | create_agent.debug_mode = True
30 | Runner.num_iterations = 200
31 | Runner.training_steps = 250000
32 | Runner.evaluation_steps = 125000
33 | Runner.max_steps_per_episode = 27000
34 | 
35 | OutOfGraphPrioritizedReplayBuffer.replay_capacity = 1000000
36 | OutOfGraphPrioritizedReplayBuffer.batch_size = 32
37 | 


--------------------------------------------------------------------------------
/dopamine/jax/agents/quantile/configs/quantile_profiling.gin:
--------------------------------------------------------------------------------
 1 | import dopamine.jax.agents.quantile.quantile_agent
 2 | import dopamine.discrete_domains.atari_lib
 3 | import dopamine.discrete_domains.run_experiment
 4 | import dopamine.jax.agents.dqn.dqn_agent
 5 | import dopamine.replay_memory.prioritized_replay_buffer
 6 | 
 7 | JaxQuantileAgent.kappa = 1.0
 8 | JaxQuantileAgent.num_atoms = 200
 9 | JaxQuantileAgent.gamma = 0.99
10 | JaxQuantileAgent.update_horizon = 3
11 | JaxQuantileAgent.min_replay_history = 100  # agent steps
12 | JaxQuantileAgent.update_period = 4
13 | JaxQuantileAgent.target_update_period = 8000  # agent steps
14 | JaxQuantileAgent.epsilon_fn = @dqn_agent.identity_epsilon
15 | JaxQuantileAgent.epsilon_train = 0.0
16 | JaxQuantileAgent.epsilon_eval = 0.0
17 | JaxQuantileAgent.replay_scheme = 'prioritized'
18 | JaxQuantileAgent.optimizer = 'adam'
19 | 
20 | create_optimizer.learning_rate = 0.00005
21 | create_optimizer.eps = 0.0003125
22 | 
23 | atari_lib.create_atari_environment.game_name = 'Pong'
24 | atari_lib.create_atari_environment.sticky_actions = True
25 | create_runner.schedule = 'continuous_train'
26 | create_agent.agent_name = 'jax_quantile'
27 | create_agent.debug_mode = True
28 | Runner.num_iterations = 200
29 | Runner.training_steps = 250000
30 | Runner.evaluation_steps = 125000
31 | Runner.max_steps_per_episode = 27000
32 | 
33 | OutOfGraphPrioritizedReplayBuffer.replay_capacity = 1000000
34 | OutOfGraphPrioritizedReplayBuffer.batch_size = 32
35 | 


--------------------------------------------------------------------------------
/dopamine/jax/agents/rainbow/__init__.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2018 The Dopamine Authors.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #      http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | 


--------------------------------------------------------------------------------
/dopamine/jax/agents/rainbow/configs/c51.gin:
--------------------------------------------------------------------------------
 1 | # Hyperparameters follow the settings from Bellemare et al. (2017), but we
 2 | # modify as necessary to match those used in Rainbow (Hessel et al., 2018), to
 3 | # ensure apples-to-apples comparison.
 4 | import dopamine.jax.agents.rainbow.rainbow_agent
 5 | import dopamine.discrete_domains.atari_lib
 6 | import dopamine.discrete_domains.run_experiment
 7 | import dopamine.replay_memory.prioritized_replay_buffer
 8 | 
 9 | JaxRainbowAgent.num_atoms = 51
10 | JaxRainbowAgent.vmax = 10.
11 | JaxRainbowAgent.gamma = 0.99
12 | JaxRainbowAgent.update_horizon = 1
13 | JaxRainbowAgent.min_replay_history = 20000  # agent steps
14 | JaxRainbowAgent.update_period = 4
15 | JaxRainbowAgent.target_update_period = 8000  # agent steps
16 | JaxRainbowAgent.epsilon_train = 0.01
17 | JaxRainbowAgent.epsilon_eval = 0.001
18 | JaxRainbowAgent.epsilon_decay_period = 250000  # agent steps
19 | JaxRainbowAgent.replay_scheme = 'uniform'
20 | 
21 | atari_lib.create_atari_environment.game_name = 'Pong'
22 | # Sticky actions with probability 0.25, as suggested by (Machado et al., 2017).
23 | atari_lib.create_atari_environment.sticky_actions = True
24 | create_runner.schedule = 'continuous_train'
25 | create_agent.agent_name = 'jax_rainbow'
26 | create_agent.debug_mode = True
27 | Runner.num_iterations = 200
28 | Runner.training_steps = 250000  # agent steps
29 | Runner.evaluation_steps = 125000  # agent steps
30 | Runner.max_steps_per_episode = 27000  # agent steps
31 | 
32 | OutOfGraphPrioritizedReplayBuffer.replay_capacity = 1000000
33 | OutOfGraphPrioritizedReplayBuffer.batch_size = 32
34 | 


--------------------------------------------------------------------------------
/dopamine/jax/agents/rainbow/configs/c51_acrobot.gin:
--------------------------------------------------------------------------------
 1 | # Hyperparameters for a simple C51-style Acrobot agent. The hyperparameters
 2 | # chosen achieve reasonable performance.
 3 | import dopamine.jax.agents.rainbow.rainbow_agent
 4 | import dopamine.jax.networks
 5 | import dopamine.discrete_domains.gym_lib
 6 | import dopamine.discrete_domains.run_experiment
 7 | import dopamine.replay_memory.prioritized_replay_buffer
 8 | 
 9 | JaxRainbowAgent.observation_shape = %gym_lib.ACROBOT_OBSERVATION_SHAPE
10 | JaxRainbowAgent.observation_dtype = %jax_networks.ACROBOT_OBSERVATION_DTYPE
11 | JaxRainbowAgent.stack_size = %gym_lib.ACROBOT_STACK_SIZE
12 | JaxRainbowAgent.network = @networks.ClassicControlRainbowNetwork
13 | JaxRainbowAgent.num_atoms = 51
14 | JaxRainbowAgent.vmax = 10.
15 | JaxRainbowAgent.gamma = 0.99
16 | JaxRainbowAgent.update_horizon = 1
17 | JaxRainbowAgent.min_replay_history = 500
18 | JaxRainbowAgent.update_period = 4
19 | JaxRainbowAgent.target_update_period = 100
20 | JaxRainbowAgent.epsilon_fn = @dqn_agent.identity_epsilon
21 | JaxRainbowAgent.replay_scheme = 'uniform'
22 | create_optimizer.learning_rate = 0.1
23 | create_optimizer.eps = 0.0003125
24 | 
25 | ClassicControlRainbowNetwork.min_vals = %jax_networks.ACROBOT_MIN_VALS
26 | ClassicControlRainbowNetwork.max_vals = %jax_networks.ACROBOT_MAX_VALS
27 | 
28 | create_gym_environment.environment_name = 'Acrobot'
29 | create_gym_environment.version = 'v1'
30 | create_runner.schedule = 'continuous_train'
31 | create_agent.agent_name = 'jax_rainbow'
32 | TrainRunner.create_environment_fn = @gym_lib.create_gym_environment
33 | Runner.num_iterations = 500
34 | Runner.training_steps = 1000
35 | Runner.evaluation_steps = 1000
36 | Runner.max_steps_per_episode = 500
37 | 
38 | OutOfGraphPrioritizedReplayBuffer.replay_capacity = 50000
39 | OutOfGraphPrioritizedReplayBuffer.batch_size = 128
40 | 


--------------------------------------------------------------------------------
/dopamine/jax/agents/rainbow/configs/c51_cartpole.gin:
--------------------------------------------------------------------------------
 1 | # Hyperparameters for a simple C51-style Cartpole agent. The hyperparameters
 2 | # chosen achieve reasonable performance.
 3 | import dopamine.jax.agents.rainbow.rainbow_agent
 4 | import dopamine.jax.networks
 5 | import dopamine.discrete_domains.gym_lib
 6 | import dopamine.discrete_domains.run_experiment
 7 | import dopamine.replay_memory.prioritized_replay_buffer
 8 | 
 9 | JaxRainbowAgent.observation_shape = %gym_lib.CARTPOLE_OBSERVATION_SHAPE
10 | JaxRainbowAgent.observation_dtype = %jax_networks.CARTPOLE_OBSERVATION_DTYPE
11 | JaxRainbowAgent.stack_size = %gym_lib.CARTPOLE_STACK_SIZE
12 | JaxRainbowAgent.network = @networks.ClassicControlRainbowNetwork
13 | JaxRainbowAgent.num_atoms = 201
14 | JaxRainbowAgent.vmax = 100.
15 | JaxRainbowAgent.gamma = 0.99
16 | JaxRainbowAgent.epsilon_eval = 0.
17 | JaxRainbowAgent.epsilon_train = 0.01
18 | JaxRainbowAgent.update_horizon = 1
19 | JaxRainbowAgent.min_replay_history = 500
20 | JaxRainbowAgent.update_period = 1
21 | JaxRainbowAgent.target_update_period = 1
22 | JaxRainbowAgent.epsilon_fn = @dqn_agent.identity_epsilon
23 | JaxRainbowAgent.replay_scheme = 'uniform'
24 | create_optimizer.learning_rate = 0.00001
25 | create_optimizer.eps = 0.00000390625
26 | 
27 | ClassicControlRainbowNetwork.min_vals = %jax_networks.CARTPOLE_MIN_VALS
28 | ClassicControlRainbowNetwork.max_vals = %jax_networks.CARTPOLE_MAX_VALS
29 | 
30 | create_gym_environment.environment_name = 'CartPole'
31 | create_gym_environment.version = 'v0'
32 | create_runner.schedule = 'continuous_train'
33 | create_agent.agent_name = 'jax_rainbow'
34 | TrainRunner.create_environment_fn = @gym_lib.create_gym_environment
35 | Runner.num_iterations = 400
36 | Runner.training_steps = 1000
37 | Runner.evaluation_steps = 1000
38 | Runner.max_steps_per_episode = 200  # Default max episode length.
39 | 
40 | OutOfGraphPrioritizedReplayBuffer.replay_capacity = 50000
41 | OutOfGraphPrioritizedReplayBuffer.batch_size = 128
42 | 


--------------------------------------------------------------------------------
/dopamine/jax/agents/rainbow/configs/c51_profiling.gin:
--------------------------------------------------------------------------------
 1 | # Hyperparameters follow the settings from Bellemare et al. (2017), but we
 2 | # modify as necessary to match those used in Rainbow (Hessel et al., 2018), to
 3 | # ensure apples-to-apples comparison.
 4 | import dopamine.jax.agents.rainbow.rainbow_agent
 5 | import dopamine.discrete_domains.atari_lib
 6 | import dopamine.discrete_domains.run_experiment
 7 | import dopamine.jax.agents.dqn.dqn_agent
 8 | import dopamine.replay_memory.prioritized_replay_buffer
 9 | 
10 | JaxRainbowAgent.num_atoms = 51
11 | JaxRainbowAgent.vmax = 10.
12 | JaxRainbowAgent.gamma = 0.99
13 | JaxRainbowAgent.update_horizon = 1
14 | JaxRainbowAgent.min_replay_history = 100  # agent steps
15 | JaxRainbowAgent.update_period = 4
16 | JaxRainbowAgent.target_update_period = 8000  # agent steps
17 | JaxRainbowAgent.epsilon_fn = @dqn_agent.identity_epsilon
18 | JaxRainbowAgent.epsilon_train = 0.0
19 | JaxRainbowAgent.epsilon_eval = 0.0
20 | JaxRainbowAgent.replay_scheme = 'uniform'
21 | 
22 | atari_lib.create_atari_environment.game_name = 'Pong'
23 | # Sticky actions with probability 0.25, as suggested by (Machado et al., 2017).
24 | atari_lib.create_atari_environment.sticky_actions = True
25 | create_runner.schedule = 'continuous_train'
26 | create_agent.agent_name = 'jax_rainbow'
27 | create_agent.debug_mode = True
28 | Runner.num_iterations = 200
29 | Runner.training_steps = 250000  # agent steps
30 | Runner.evaluation_steps = 125000  # agent steps
31 | Runner.max_steps_per_episode = 27000  # agent steps
32 | 
33 | OutOfGraphPrioritizedReplayBuffer.replay_capacity = 1000000
34 | OutOfGraphPrioritizedReplayBuffer.batch_size = 32
35 | 


--------------------------------------------------------------------------------
/dopamine/jax/agents/rainbow/configs/rainbow.gin:
--------------------------------------------------------------------------------
 1 | # Hyperparameters follow Hessel et al. (2018), except for sticky_actions,
 2 | # which was False (not using sticky actions) in the original paper.
 3 | import dopamine.jax.agents.rainbow.rainbow_agent
 4 | import dopamine.discrete_domains.atari_lib
 5 | import dopamine.discrete_domains.run_experiment
 6 | import dopamine.replay_memory.prioritized_replay_buffer
 7 | 
 8 | JaxRainbowAgent.num_atoms = 51
 9 | JaxRainbowAgent.vmax = 10.
10 | JaxRainbowAgent.gamma = 0.99
11 | JaxRainbowAgent.update_horizon = 3
12 | JaxRainbowAgent.min_replay_history = 20000  # agent steps
13 | JaxRainbowAgent.update_period = 4
14 | JaxRainbowAgent.target_update_period = 8000  # agent steps
15 | JaxRainbowAgent.epsilon_train = 0.01
16 | JaxRainbowAgent.epsilon_eval = 0.001
17 | JaxRainbowAgent.epsilon_decay_period = 250000  # agent steps
18 | JaxRainbowAgent.replay_scheme = 'prioritized'
19 | 
20 | # Note these parameters are different from C51's.
21 | create_optimizer.learning_rate = 0.0000625
22 | create_optimizer.eps = 0.00015
23 | 
24 | atari_lib.create_atari_environment.game_name = 'Pong'
25 | # Sticky actions with probability 0.25, as suggested by (Machado et al., 2017).
26 | atari_lib.create_atari_environment.sticky_actions = True
27 | create_runner.schedule = 'continuous_train'
28 | create_agent.agent_name = 'jax_rainbow'
29 | create_agent.debug_mode = True
30 | Runner.num_iterations = 200
31 | Runner.training_steps = 250000  # agent steps
32 | Runner.evaluation_steps = 125000  # agent steps
33 | Runner.max_steps_per_episode = 27000  # agent steps
34 | 
35 | OutOfGraphPrioritizedReplayBuffer.replay_capacity = 1000000
36 | OutOfGraphPrioritizedReplayBuffer.batch_size = 32
37 | 


--------------------------------------------------------------------------------
/dopamine/jax/agents/rainbow/configs/rainbow_acrobot.gin:
--------------------------------------------------------------------------------
 1 | # Hyperparameters for a simple Rainbow-style Acrobot agent. The hyperparameters
 2 | # chosen achieve reasonable performance.
 3 | import dopamine.jax.agents.rainbow.rainbow_agent
 4 | import dopamine.jax.networks
 5 | import dopamine.discrete_domains.gym_lib
 6 | import dopamine.discrete_domains.run_experiment
 7 | import dopamine.replay_memory.prioritized_replay_buffer
 8 | 
 9 | JaxRainbowAgent.observation_shape = %gym_lib.ACROBOT_OBSERVATION_SHAPE
10 | JaxRainbowAgent.observation_dtype = %jax_networks.ACROBOT_OBSERVATION_DTYPE
11 | JaxRainbowAgent.stack_size = %gym_lib.ACROBOT_STACK_SIZE
12 | JaxRainbowAgent.network = @networks.ClassicControlRainbowNetwork
13 | JaxRainbowAgent.num_atoms = 51
14 | JaxRainbowAgent.vmax = 10.
15 | JaxRainbowAgent.gamma = 0.99
16 | JaxRainbowAgent.update_horizon = 3
17 | JaxRainbowAgent.min_replay_history = 500
18 | JaxRainbowAgent.update_period = 4
19 | JaxRainbowAgent.target_update_period = 100
20 | JaxRainbowAgent.epsilon_fn = @dqn_agent.identity_epsilon
21 | JaxRainbowAgent.replay_scheme = 'prioritized'
22 | create_optimizer.learning_rate = 0.09
23 | create_optimizer.eps = 0.0003125
24 | 
25 | ClassicControlRainbowNetwork.min_vals = %jax_networks.ACROBOT_MIN_VALS
26 | ClassicControlRainbowNetwork.max_vals = %jax_networks.ACROBOT_MAX_VALS
27 | 
28 | create_gym_environment.environment_name = 'Acrobot'
29 | create_gym_environment.version = 'v1'
30 | create_runner.schedule = 'continuous_train'
31 | create_agent.agent_name = 'jax_rainbow'
32 | TrainRunner.create_environment_fn = @gym_lib.create_gym_environment
33 | Runner.num_iterations = 500
34 | Runner.training_steps = 1000
35 | Runner.evaluation_steps = 1000
36 | Runner.max_steps_per_episode = 500
37 | 
38 | OutOfGraphPrioritizedReplayBuffer.replay_capacity = 50000
39 | OutOfGraphPrioritizedReplayBuffer.batch_size = 128
40 | 


--------------------------------------------------------------------------------
/dopamine/jax/agents/rainbow/configs/rainbow_cartpole.gin:
--------------------------------------------------------------------------------
 1 | # Hyperparameters for a simple Rainbow-style Cartpole agent. The
 2 | # hyperparameters chosen achieve reasonable performance.
 3 | import dopamine.jax.agents.rainbow.rainbow_agent
 4 | import dopamine.jax.networks
 5 | import dopamine.discrete_domains.gym_lib
 6 | import dopamine.discrete_domains.run_experiment
 7 | import dopamine.replay_memory.prioritized_replay_buffer
 8 | 
 9 | JaxRainbowAgent.observation_shape = %gym_lib.CARTPOLE_OBSERVATION_SHAPE
10 | JaxRainbowAgent.observation_dtype = %jax_networks.CARTPOLE_OBSERVATION_DTYPE
11 | JaxRainbowAgent.stack_size = %gym_lib.CARTPOLE_STACK_SIZE
12 | JaxRainbowAgent.network = @networks.ClassicControlRainbowNetwork
13 | JaxRainbowAgent.num_atoms = 51
14 | JaxRainbowAgent.vmax = 10.
15 | JaxRainbowAgent.gamma = 0.99
16 | JaxRainbowAgent.update_horizon = 3
17 | JaxRainbowAgent.min_replay_history = 500
18 | JaxRainbowAgent.update_period = 4
19 | JaxRainbowAgent.target_update_period = 100
20 | JaxRainbowAgent.epsilon_fn = @dqn_agent.identity_epsilon
21 | JaxRainbowAgent.replay_scheme = 'prioritized'
22 | create_optimizer.learning_rate = 0.09
23 | create_optimizer.eps = 0.0003125
24 | 
25 | ClassicControlRainbowNetwork.min_vals = %jax_networks.CARTPOLE_MIN_VALS
26 | ClassicControlRainbowNetwork.max_vals = %jax_networks.CARTPOLE_MAX_VALS
27 | 
28 | create_gym_environment.environment_name = 'CartPole'
29 | create_gym_environment.version = 'v0'
30 | create_runner.schedule = 'continuous_train'
31 | create_agent.agent_name = 'jax_rainbow'
32 | TrainRunner.create_environment_fn = @gym_lib.create_gym_environment
33 | Runner.num_iterations = 500
34 | Runner.training_steps = 1000
35 | Runner.evaluation_steps = 1000
36 | Runner.max_steps_per_episode = 200  # Default max episode length.
37 | 
38 | OutOfGraphPrioritizedReplayBuffer.replay_capacity = 50000
39 | OutOfGraphPrioritizedReplayBuffer.batch_size = 128
40 | 


--------------------------------------------------------------------------------
/dopamine/jax/agents/rainbow/configs/rainbow_profiling.gin:
--------------------------------------------------------------------------------
 1 | # Hyperparameters follow Hessel et al. (2018), except for sticky_actions,
 2 | # which was False (not using sticky actions) in the original paper.
 3 | import dopamine.jax.agents.rainbow.rainbow_agent
 4 | import dopamine.discrete_domains.atari_lib
 5 | import dopamine.discrete_domains.run_experiment
 6 | import dopamine.jax.agents.dqn.dqn_agent
 7 | import dopamine.replay_memory.prioritized_replay_buffer
 8 | 
 9 | JaxRainbowAgent.num_atoms = 51
10 | JaxRainbowAgent.vmax = 10.
11 | JaxRainbowAgent.gamma = 0.99
12 | JaxRainbowAgent.update_horizon = 3
13 | JaxRainbowAgent.min_replay_history = 100  # agent steps
14 | JaxRainbowAgent.update_period = 4
15 | JaxRainbowAgent.target_update_period = 8000  # agent steps
16 | JaxRainbowAgent.epsilon_fn = @dqn_agent.identity_epsilon
17 | JaxRainbowAgent.epsilon_train = 0.0
18 | JaxRainbowAgent.epsilon_eval = 0.0
19 | JaxRainbowAgent.replay_scheme = 'prioritized'
20 | 
21 | # Note these parameters are different from C51's.
22 | create_optimizer.learning_rate = 0.0000625
23 | create_optimizer.eps = 0.00015
24 | 
25 | atari_lib.create_atari_environment.game_name = 'Pong'
26 | # Sticky actions with probability 0.25, as suggested by (Machado et al., 2017).
27 | atari_lib.create_atari_environment.sticky_actions = True
28 | create_runner.schedule = 'continuous_train'
29 | create_agent.agent_name = 'jax_rainbow'
30 | create_agent.debug_mode = True
31 | Runner.num_iterations = 200
32 | Runner.training_steps = 250000  # agent steps
33 | Runner.evaluation_steps = 125000  # agent steps
34 | Runner.max_steps_per_episode = 27000  # agent steps
35 | 
36 | OutOfGraphPrioritizedReplayBuffer.replay_capacity = 1000000
37 | OutOfGraphPrioritizedReplayBuffer.batch_size = 32
38 | 


--------------------------------------------------------------------------------
/dopamine/jax/agents/sac/__init__.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2021 The Dopamine Authors.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #      http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | 


--------------------------------------------------------------------------------
/dopamine/jax/agents/sac/configs/sac.gin:
--------------------------------------------------------------------------------
 1 | # Hyperparameters follow those specified in Table 1 of Appendix D in:
 2 | #   "Soft Actor-Critic Algorithms and Applications"
 3 | #   by Tuomas Haarnoja et al.
 4 | #   https://arxiv.org/abs/1812.05905
 5 | import dopamine.continuous_domains.run_experiment
 6 | import dopamine.discrete_domains.gym_lib
 7 | import dopamine.jax.agents.sac.sac_agent
 8 | import dopamine.jax.agents.dqn.dqn_agent
 9 | import dopamine.jax.continuous_networks
10 | import dopamine.replay_memory.circular_replay_buffer
11 | 
12 | SACAgent.reward_scale_factor = 0.1
13 | SACAgent.network = @continuous_networks.SACNetwork
14 | SACAgent.num_layers = 2
15 | SACAgent.hidden_units = 256
16 | SACAgent.gamma = 0.99
17 | SACAgent.update_horizon = 1
18 | SACAgent.min_replay_history = 10000  # agent steps
19 | SACAgent.update_period = 1
20 | SACAgent.target_update_type = 'soft'
21 | SACAgent.target_smoothing_coefficient = 0.005
22 | SACAgent.target_entropy = None  # Defaults to -num_action_dims/2
23 | SACAgent.optimizer = 'adam'
24 | SACAgent.seed = None  # Seed with the current time
25 | SACAgent.observation_dtype = %sac_agent.STATE_DTYPE
26 | create_optimizer.learning_rate = 3.0e-4
27 | create_optimizer.beta1 = 0.9
28 | create_optimizer.beta2 = 0.999
29 | create_optimizer.eps = 1.0e-8
30 | 
31 | create_gym_environment.environment_name = 'HalfCheetah'
32 | create_gym_environment.version = 'v2'
33 | create_continuous_runner.schedule = 'continuous_train_and_eval'
34 | create_continuous_agent.agent_name = 'sac'
35 | ContinuousTrainRunner.create_environment_fn = @gym_lib.create_gym_environment
36 | ContinuousRunner.num_iterations = 3200
37 | ContinuousRunner.training_steps = 1000
38 | ContinuousRunner.evaluation_steps = 10000  # agent steps
39 | ContinuousRunner.max_steps_per_episode = 1000
40 | ContinuousRunner.clip_rewards = False
41 | 
42 | circular_replay_buffer.OutOfGraphReplayBuffer.replay_capacity = 1000000
43 | circular_replay_buffer.OutOfGraphReplayBuffer.batch_size = 256
44 | 
45 | 


--------------------------------------------------------------------------------
/dopamine/jax/losses.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2021 The Dopamine Authors.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #      http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | """Various losses used by the Dopamine JAX agents."""
16 | from flax import linen as nn
17 | import jax.numpy as jnp
18 | 
19 | 
20 | def huber_loss(targets: jnp.array,
21 |                predictions: jnp.array,
22 |                delta: float = 1.0) -> jnp.ndarray:
23 |   """Implementation of the Huber loss with threshold delta.
24 | 
25 |   Let `x = |targets - predictions|`, the Huber loss is defined as:
26 |   `0.5 * x^2` if `x <= delta`
27 |   `0.5 * delta^2 + delta * (x - delta)` otherwise.
28 | 
29 |   Args:
30 |     targets: Target values.
31 |     predictions: Prediction values.
32 |     delta: Threshold.
33 | 
34 |   Returns:
35 |     Huber loss.
36 |   """
37 |   x = jnp.abs(targets - predictions)
38 |   return jnp.where(x <= delta,
39 |                    0.5 * x**2,
40 |                    0.5 * delta**2 + delta * (x - delta))
41 | 
42 | 
43 | def mse_loss(targets: jnp.array, predictions: jnp.array) -> jnp.ndarray:
44 |   """Implementation of the mean squared error loss."""
45 |   return jnp.power((targets - predictions), 2)
46 | 
47 | 
48 | def softmax_cross_entropy_loss_with_logits(labels: jnp.array,
49 |                                            logits: jnp.array) -> jnp.ndarray:
50 |   """Implementation of the softmax cross entropy loss."""
51 |   return -jnp.sum(labels * nn.log_softmax(logits))
52 | 


--------------------------------------------------------------------------------
/dopamine/labs/__init__.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2021 The Dopamine Authors.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #      http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | 


--------------------------------------------------------------------------------
/dopamine/labs/atari_100k/__init__.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2021 The Dopamine Authors.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #      http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | 


--------------------------------------------------------------------------------
/dopamine/labs/atari_100k/configs/DER.gin:
--------------------------------------------------------------------------------
 1 | # Data Efficient Rainbow (DER) params
 2 | import dopamine.jax.agents.dqn.dqn_agent
 3 | import dopamine.jax.agents.full_rainbow.full_rainbow_agent
 4 | import dopamine.jax.networks
 5 | import dopamine.discrete_domains.gym_lib
 6 | import dopamine.discrete_domains.run_experiment
 7 | import dopamine.replay_memory.prioritized_replay_buffer
 8 | import dopamine.labs.atari_100k.atari_100k_rainbow_agent
 9 | 
10 | JaxDQNAgent.gamma = 0.99
11 | # Use 10 instead of 20 as done by SPR paper
12 | JaxDQNAgent.update_horizon = 10  # DER (instead of 3)
13 | JaxDQNAgent.min_replay_history = 1600  # DER (instead of 20000)
14 | JaxDQNAgent.update_period = 1  # DER: Update every 1 step (rather than 4)
15 | JaxDQNAgent.target_update_period = 2000  # DER: Target every 2000 updates
16 | JaxDQNAgent.epsilon_train = 0.01
17 | JaxDQNAgent.epsilon_eval = 0.001
18 | JaxDQNAgent.epsilon_decay_period = 2000  # agent steps
19 | JaxDQNAgent.optimizer = 'adam'
20 | 
21 | JaxFullRainbowAgent.noisy = True
22 | JaxFullRainbowAgent.dueling = True
23 | JaxFullRainbowAgent.double_dqn = True
24 | JaxFullRainbowAgent.num_atoms = 51
25 | JaxFullRainbowAgent.vmax = 10.
26 | JaxFullRainbowAgent.replay_scheme = 'prioritized'
27 | JaxFullRainbowAgent.num_updates_per_train_step = 1
28 | JaxFullRainbowAgent.epsilon_fn = @jax.agents.dqn.dqn_agent.linearly_decaying_epsilon
29 | Atari100kRainbowAgent.data_augmentation = False
30 | 
31 | # Note these parameters are from DER (van Hasselt et al, 2019)
32 | create_optimizer.learning_rate = 0.0001
33 | create_optimizer.eps = 0.00015
34 | 
35 | atari_lib.create_atari_environment.game_name = 'Pong'
36 | # Atari 100K benchmark doesn't use sticky actions.
37 | atari_lib.create_atari_environment.sticky_actions = False
38 | AtariPreprocessing.terminal_on_life_loss = True
39 | Runner.num_iterations = 10
40 | Runner.training_steps = 10000  # agent steps
41 | MaxEpisodeEvalRunner.num_eval_episodes = 100  # agent episodes
42 | Runner.max_steps_per_episode = 27000  # agent steps
43 | 
44 | OutOfGraphPrioritizedReplayBuffer.replay_capacity = 1000000 # 1M as a proxy for unbounded memory
45 | OutOfGraphPrioritizedReplayBuffer.batch_size = 32
46 | 


--------------------------------------------------------------------------------
/dopamine/labs/atari_100k/configs/DrQ.gin:
--------------------------------------------------------------------------------
 1 | # Data Regularlized-Q (DrQ) form Kostrikov et al. (2020)
 2 | import dopamine.jax.agents.dqn.dqn_agent
 3 | import dopamine.jax.agents.full_rainbow.full_rainbow_agent
 4 | import dopamine.jax.networks
 5 | import dopamine.discrete_domains.gym_lib
 6 | import dopamine.discrete_domains.run_experiment
 7 | import dopamine.replay_memory.prioritized_replay_buffer
 8 | import dopamine.labs.atari_100k.atari_100k_rainbow_agent
 9 | 
10 | # Parameters specific to DrQ are higlighted by comments
11 | JaxDQNAgent.gamma = 0.99
12 | JaxDQNAgent.update_horizon = 10  # DrQ (instead of 3)
13 | JaxDQNAgent.min_replay_history = 1600  # DrQ (instead of 20000)
14 | JaxDQNAgent.update_period = 1  # DrQ (rather than 4)
15 | JaxDQNAgent.target_update_period = 1  # DrQ (rather than 8000)
16 | JaxDQNAgent.epsilon_train = 0.1  # DrQ (rather than 0.01)
17 | JaxDQNAgent.epsilon_eval = 0.05  # DrQ (rather than 0.001)
18 | JaxDQNAgent.epsilon_decay_period = 5000  # DrQ
19 | JaxDQNAgent.optimizer = 'adam'
20 | 
21 | JaxFullRainbowAgent.noisy = False  # DrQ (Efficient DQN)
22 | JaxFullRainbowAgent.dueling = True
23 | JaxFullRainbowAgent.double_dqn = True
24 | JaxFullRainbowAgent.distributional = False  # DrQ (Efficient DQN)
25 | JaxFullRainbowAgent.num_atoms = 1  # Since DrQ uses DQN, rather than C51
26 | JaxFullRainbowAgent.num_updates_per_train_step = 1
27 | JaxFullRainbowAgent.replay_scheme = 'uniform'
28 | JaxFullRainbowAgent.epsilon_fn = @jax.agents.dqn.dqn_agent.linearly_decaying_epsilon
29 | Atari100kRainbowAgent.data_augmentation = True
30 | 
31 | # Note these parameters are from DER (van Hasselt et al, 2019)
32 | create_optimizer.learning_rate = 0.0001
33 | create_optimizer.eps = 0.00015
34 | 
35 | atari_lib.create_atari_environment.game_name = 'Pong'
36 | # Atari 100K benchmark doesn't use sticky actions.
37 | atari_lib.create_atari_environment.sticky_actions = False
38 | AtariPreprocessing.terminal_on_life_loss = True
39 | Runner.num_iterations = 1
40 | Runner.training_steps = 100000  # agent steps
41 | MaxEpisodeEvalRunner.num_eval_episodes = 100  # agent episodes
42 | Runner.max_steps_per_episode = 27000  # agent steps
43 | 
44 | OutOfGraphPrioritizedReplayBuffer.replay_capacity = 1000000 # 1M as a proxy for unbounded memory
45 | OutOfGraphPrioritizedReplayBuffer.batch_size = 32
46 | 


--------------------------------------------------------------------------------
/dopamine/labs/atari_100k/configs/OTRainbow.gin:
--------------------------------------------------------------------------------
 1 | # Overtrained Rainbow (OTRainbow) from Kielak et al.(2019)
 2 | import dopamine.jax.agents.dqn.dqn_agent
 3 | import dopamine.jax.agents.full_rainbow.full_rainbow_agent
 4 | import dopamine.jax.networks
 5 | import dopamine.discrete_domains.gym_lib
 6 | import dopamine.discrete_domains.run_experiment
 7 | import dopamine.replay_memory.prioritized_replay_buffer
 8 | import dopamine.labs.atari_100k.atari_100k_rainbow_agent
 9 | 
10 | # Parameters specific to OTRainbow are higlighted by comments
11 | JaxDQNAgent.gamma = 0.99
12 | JaxDQNAgent.update_horizon = 3
13 | JaxDQNAgent.min_replay_history = 20000
14 | JaxDQNAgent.update_period = 1   # OTRainbow: Update every 1 step (rather than 4)
15 | JaxDQNAgent.target_update_period = 500  # OTRainbow (instead of 8000)
16 | JaxDQNAgent.epsilon_train = 0.01
17 | JaxDQNAgent.epsilon_eval = 0.001
18 | JaxDQNAgent.epsilon_decay_period = 50000  # OTRainbow (instead of 250000)
19 | JaxDQNAgent.optimizer = 'adam'
20 | 
21 | # Don't use noisy networks, dueling DQN, and double DQN.
22 | JaxFullRainbowAgent.noisy = False
23 | JaxFullRainbowAgent.dueling = False
24 | JaxFullRainbowAgent.double_dqn = False
25 | JaxFullRainbowAgent.num_atoms = 51
26 | JaxFullRainbowAgent.num_updates_per_train_step = 8  # OTRainbow (instead of 1)
27 | JaxFullRainbowAgent.vmax = 10.
28 | JaxFullRainbowAgent.replay_scheme = 'prioritized'
29 | JaxFullRainbowAgent.epsilon_fn = @jax.agents.dqn.dqn_agent.linearly_decaying_epsilon
30 | Atari100kRainbowAgent.data_augmentation = False
31 | 
32 | # Note these parameters are original Rainbow.
33 | create_optimizer.learning_rate = 0.0000625
34 | create_optimizer.eps = 0.00015
35 | 
36 | atari_lib.create_atari_environment.game_name = 'Pong'
37 | # Atari 100K benchmark doesn't use sticky actions.
38 | atari_lib.create_atari_environment.sticky_actions = False
39 | AtariPreprocessing.terminal_on_life_loss = True
40 | Runner.num_iterations = 1
41 | Runner.training_steps = 100000  # agent steps
42 | MaxEpisodeEvalRunner.num_eval_episodes = 100  # agent episodes
43 | Runner.max_steps_per_episode = 27000  # agent steps
44 | 
45 | OutOfGraphPrioritizedReplayBuffer.replay_capacity = 1000000 # 1M as a proxy for unbounded memory
46 | OutOfGraphPrioritizedReplayBuffer.batch_size = 32
47 | 


--------------------------------------------------------------------------------
/dopamine/labs/environments/__init__.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2021 The Dopamine Authors.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #      http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | 


--------------------------------------------------------------------------------
/dopamine/labs/environments/minatar/__init__.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2021 The Dopamine Authors.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #      http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | 


--------------------------------------------------------------------------------
/dopamine/labs/environments/minatar/dqn_asterix.gin:
--------------------------------------------------------------------------------
 1 | # Hyperparameters for a simple DQN-style Minatar agent. The hyperparameters
 2 | # chosen achieve reasonable performance.
 3 | import dopamine.discrete_domains.gym_lib
 4 | import dopamine.discrete_domains.run_experiment
 5 | import dopamine.jax.agents.dqn.dqn_agent
 6 | import dopamine.jax.networks
 7 | import dopamine.labs.environments.minatar.minatar_env
 8 | import dopamine.replay_memory.circular_replay_buffer
 9 | import flax
10 | 
11 | JaxDQNAgent.observation_shape = %minatar_env.ASTERIX_SHAPE
12 | JaxDQNAgent.observation_dtype = %minatar_env.DTYPE
13 | JaxDQNAgent.stack_size = 1
14 | JaxDQNAgent.network = @minatar_env.MinatarDQNNetwork
15 | JaxDQNAgent.gamma = 0.99
16 | JaxDQNAgent.update_horizon = 1
17 | JaxDQNAgent.min_replay_history = 1000
18 | JaxDQNAgent.update_period = 4
19 | JaxDQNAgent.target_update_period = 1000
20 | JaxDQNAgent.optimizer = 'adam'
21 | create_optimizer.learning_rate = 0.00025
22 | create_optimizer.eps = 3.125e-4
23 | 
24 | create_minatar_env.game_name  = 'asterix'
25 | TrainRunner.create_environment_fn =  @minatar_env.create_minatar_env
26 | create_runner.schedule = 'continuous_train'
27 | create_agent.agent_name = 'jax_dqn'
28 | create_agent.debug_mode = True
29 | Runner.num_iterations = 10
30 | Runner.training_steps = 1000000
31 | Runner.max_steps_per_episode = 100000000
32 | 
33 | OutOfGraphReplayBuffer.replay_capacity = 100000
34 | OutOfGraphReplayBuffer.batch_size = 32
35 | 


--------------------------------------------------------------------------------
/dopamine/labs/environments/minatar/dqn_breakout.gin:
--------------------------------------------------------------------------------
 1 | # Hyperparameters for a simple DQN-style Minatar agent. The hyperparameters
 2 | # chosen achieve reasonable performance.
 3 | import dopamine.discrete_domains.gym_lib
 4 | import dopamine.discrete_domains.run_experiment
 5 | import dopamine.jax.agents.dqn.dqn_agent
 6 | import dopamine.jax.networks
 7 | import dopamine.labs.environments.minatar.minatar_env
 8 | import dopamine.replay_memory.circular_replay_buffer
 9 | import flax
10 | 
11 | JaxDQNAgent.observation_shape = %minatar_env.BREAKOUT_SHAPE
12 | JaxDQNAgent.observation_dtype = %minatar_env.DTYPE
13 | JaxDQNAgent.stack_size = 1
14 | JaxDQNAgent.network = @minatar_env.MinatarDQNNetwork
15 | JaxDQNAgent.gamma = 0.99
16 | JaxDQNAgent.update_horizon = 1
17 | JaxDQNAgent.min_replay_history = 1000
18 | JaxDQNAgent.update_period = 4
19 | JaxDQNAgent.target_update_period = 1000
20 | JaxDQNAgent.optimizer = 'adam'
21 | create_optimizer.learning_rate = 0.00025
22 | create_optimizer.eps = 3.125e-4
23 | 
24 | create_minatar_env.game_name  = 'breakout'
25 | TrainRunner.create_environment_fn =  @minatar_env.create_minatar_env
26 | create_runner.schedule = 'continuous_train'
27 | create_agent.agent_name = 'jax_dqn'
28 | create_agent.debug_mode = True
29 | Runner.num_iterations = 10
30 | Runner.training_steps = 1000000
31 | Runner.max_steps_per_episode = 100000000
32 | 
33 | OutOfGraphReplayBuffer.replay_capacity = 100000
34 | OutOfGraphReplayBuffer.batch_size = 32
35 | 


--------------------------------------------------------------------------------
/dopamine/labs/environments/minatar/dqn_freeway.gin:
--------------------------------------------------------------------------------
 1 | # Hyperparameters for a simple DQN-style Minatar agent. The hyperparameters
 2 | # chosen achieve reasonable performance.
 3 | import dopamine.discrete_domains.gym_lib
 4 | import dopamine.discrete_domains.run_experiment
 5 | import dopamine.jax.agents.dqn.dqn_agent
 6 | import dopamine.jax.networks
 7 | import dopamine.labs.environments.minatar.minatar_env
 8 | import dopamine.replay_memory.circular_replay_buffer
 9 | import flax
10 | 
11 | JaxDQNAgent.observation_shape = %minatar_env.FREEWAY_SHAPE
12 | JaxDQNAgent.observation_dtype = %minatar_env.DTYPE
13 | JaxDQNAgent.stack_size = 1
14 | JaxDQNAgent.network = @minatar_env.MinatarDQNNetwork
15 | JaxDQNAgent.gamma = 0.99
16 | JaxDQNAgent.update_horizon = 1
17 | JaxDQNAgent.min_replay_history = 1000
18 | JaxDQNAgent.update_period = 4
19 | JaxDQNAgent.target_update_period = 1000
20 | JaxDQNAgent.optimizer = 'adam'
21 | create_optimizer.learning_rate = 0.00025
22 | create_optimizer.eps = 3.125e-4
23 | 
24 | create_minatar_env.game_name  = 'freeway'
25 | TrainRunner.create_environment_fn =  @minatar_env.create_minatar_env
26 | create_runner.schedule = 'continuous_train'
27 | create_agent.agent_name = 'jax_dqn'
28 | create_agent.debug_mode = True
29 | Runner.num_iterations = 10
30 | Runner.training_steps = 1000000
31 | Runner.max_steps_per_episode = 100000000
32 | 
33 | OutOfGraphReplayBuffer.replay_capacity = 100000
34 | OutOfGraphReplayBuffer.batch_size = 32
35 | 


--------------------------------------------------------------------------------
/dopamine/labs/environments/minatar/dqn_seaquest.gin:
--------------------------------------------------------------------------------
 1 | # Hyperparameters for a simple DQN-style Minatar agent. The hyperparameters
 2 | # chosen achieve reasonable performance.
 3 | import dopamine.discrete_domains.gym_lib
 4 | import dopamine.discrete_domains.run_experiment
 5 | import dopamine.jax.agents.dqn.dqn_agent
 6 | import dopamine.jax.networks
 7 | import dopamine.labs.environments.minatar.minatar_env
 8 | import dopamine.replay_memory.circular_replay_buffer
 9 | import flax
10 | 
11 | JaxDQNAgent.observation_shape = %minatar_env.SEAQUEST_SHAPE
12 | JaxDQNAgent.observation_dtype = %minatar_env.DTYPE
13 | JaxDQNAgent.stack_size = 1
14 | JaxDQNAgent.network = @minatar_env.MinatarDQNNetwork
15 | JaxDQNAgent.gamma = 0.99
16 | JaxDQNAgent.update_horizon = 1
17 | JaxDQNAgent.min_replay_history = 1000
18 | JaxDQNAgent.update_period = 4
19 | JaxDQNAgent.target_update_period = 1000
20 | JaxDQNAgent.optimizer = 'adam'
21 | create_optimizer.learning_rate = 0.00025
22 | create_optimizer.eps = 3.125e-4
23 | 
24 | create_minatar_env.game_name  = 'seaquest'
25 | TrainRunner.create_environment_fn =  @minatar_env.create_minatar_env
26 | create_runner.schedule = 'continuous_train'
27 | create_agent.agent_name = 'jax_dqn'
28 | create_agent.debug_mode = True
29 | Runner.num_iterations = 10
30 | Runner.training_steps = 1000000
31 | Runner.max_steps_per_episode = 100000000
32 | 
33 | OutOfGraphReplayBuffer.replay_capacity = 100000
34 | OutOfGraphReplayBuffer.batch_size = 32
35 | 


--------------------------------------------------------------------------------
/dopamine/labs/environments/minatar/dqn_space_invaders.gin:
--------------------------------------------------------------------------------
 1 | # Hyperparameters for a simple DQN-style Minatar agent. The hyperparameters
 2 | # chosen achieve reasonable performance.
 3 | import dopamine.discrete_domains.gym_lib
 4 | import dopamine.discrete_domains.run_experiment
 5 | import dopamine.jax.agents.dqn.dqn_agent
 6 | import dopamine.jax.networks
 7 | import dopamine.labs.environments.minatar.minatar_env
 8 | import dopamine.replay_memory.circular_replay_buffer
 9 | import flax
10 | 
11 | JaxDQNAgent.observation_shape = %minatar_env.SPACE_INVADERS_SHAPE
12 | JaxDQNAgent.observation_dtype = %minatar_env.DTYPE
13 | JaxDQNAgent.stack_size = 1
14 | JaxDQNAgent.network = @minatar_env.MinatarDQNNetwork
15 | JaxDQNAgent.gamma = 0.99
16 | JaxDQNAgent.update_horizon = 1
17 | JaxDQNAgent.min_replay_history = 1000
18 | JaxDQNAgent.update_period = 4
19 | JaxDQNAgent.target_update_period = 1000
20 | JaxDQNAgent.optimizer = 'adam'
21 | create_optimizer.learning_rate = 0.00025
22 | create_optimizer.eps = 3.125e-4
23 | 
24 | create_minatar_env.game_name  = 'space_invaders'
25 | TrainRunner.create_environment_fn =  @minatar_env.create_minatar_env
26 | create_runner.schedule = 'continuous_train'
27 | create_agent.agent_name = 'jax_dqn'
28 | create_agent.debug_mode = True
29 | Runner.num_iterations = 10
30 | Runner.training_steps = 1000000
31 | Runner.max_steps_per_episode = 100000000
32 | 
33 | OutOfGraphReplayBuffer.replay_capacity = 100000
34 | OutOfGraphReplayBuffer.batch_size = 32
35 | 


--------------------------------------------------------------------------------
/dopamine/labs/environments/minatar/quantile_asterix.gin:
--------------------------------------------------------------------------------
 1 | # Hyperparameters follow Dabney et al. (2017) but we modify as necessary to
 2 | # match those used in Rainbow (Hessel et al., 2018), to ensure apples-to-apples
 3 | # comparison.
 4 | import dopamine.discrete_domains.atari_lib
 5 | import dopamine.discrete_domains.run_experiment
 6 | import dopamine.jax.agents.quantile.quantile_agent
 7 | import dopamine.labs.environments.minatar.minatar_env
 8 | import dopamine.replay_memory.prioritized_replay_buffer
 9 | 
10 | JaxQuantileAgent.observation_shape = %minatar_env.ASTERIX_SHAPE
11 | JaxQuantileAgent.observation_dtype = %minatar_env.DTYPE
12 | JaxQuantileAgent.stack_size = 1
13 | JaxQuantileAgent.network = @minatar_env.MinatarQuantileNetwork
14 | JaxQuantileAgent.kappa = 1.0
15 | JaxQuantileAgent.num_atoms = 200
16 | JaxQuantileAgent.gamma = 0.99
17 | JaxQuantileAgent.update_horizon = 3
18 | JaxQuantileAgent.min_replay_history = 1000 # agent steps
19 | JaxQuantileAgent.update_period = 4
20 | JaxQuantileAgent.target_update_period = 1000 # agent steps
21 | JaxQuantileAgent.epsilon_train = 0.01
22 | JaxQuantileAgent.epsilon_eval = 0.001
23 | JaxQuantileAgent.replay_scheme = 'prioritized'
24 | JaxQuantileAgent.optimizer = 'adam'
25 | create_optimizer.learning_rate = 0.00025
26 | create_optimizer.eps = 3.125e-4
27 | 
28 | create_minatar_env.game_name  = 'asterix'
29 | TrainRunner.create_environment_fn =  @minatar_env.create_minatar_env
30 | create_runner.schedule = 'continuous_train'
31 | create_agent.agent_name = 'jax_quantile'
32 | create_agent.debug_mode = True
33 | Runner.num_iterations = 10
34 | Runner.training_steps = 1000000
35 | Runner.max_steps_per_episode = 100000000
36 | 
37 | OutOfGraphPrioritizedReplayBuffer.replay_capacity = 100000
38 | OutOfGraphPrioritizedReplayBuffer.batch_size = 32
39 | 


--------------------------------------------------------------------------------
/dopamine/labs/environments/minatar/quantile_breakout.gin:
--------------------------------------------------------------------------------
 1 | # Hyperparameters follow Dabney et al. (2017) but we modify as necessary to
 2 | # match those used in Rainbow (Hessel et al., 2018), to ensure apples-to-apples
 3 | # comparison.
 4 | import dopamine.discrete_domains.atari_lib
 5 | import dopamine.discrete_domains.run_experiment
 6 | import dopamine.jax.agents.quantile.quantile_agent
 7 | import dopamine.labs.environments.minatar.minatar_env
 8 | import dopamine.replay_memory.prioritized_replay_buffer
 9 | 
10 | JaxQuantileAgent.observation_shape = %minatar_env.BREAKOUT_SHAPE
11 | JaxQuantileAgent.observation_dtype = %minatar_env.DTYPE
12 | JaxQuantileAgent.stack_size = 1
13 | JaxQuantileAgent.network = @minatar_env.MinatarQuantileNetwork
14 | JaxQuantileAgent.kappa = 1.0
15 | JaxQuantileAgent.num_atoms = 200
16 | JaxQuantileAgent.gamma = 0.99
17 | JaxQuantileAgent.update_horizon = 3
18 | JaxQuantileAgent.min_replay_history = 1000 # agent steps
19 | JaxQuantileAgent.update_period = 4
20 | JaxQuantileAgent.target_update_period = 1000 # agent steps
21 | JaxQuantileAgent.epsilon_train = 0.01
22 | JaxQuantileAgent.epsilon_eval = 0.001
23 | JaxQuantileAgent.replay_scheme = 'prioritized'
24 | JaxQuantileAgent.optimizer = 'adam'
25 | create_optimizer.learning_rate = 0.00025
26 | create_optimizer.eps = 3.125e-4
27 | 
28 | create_minatar_env.game_name  = 'breakout'
29 | TrainRunner.create_environment_fn =  @minatar_env.create_minatar_env
30 | create_runner.schedule = 'continuous_train'
31 | create_agent.agent_name = 'jax_quantile'
32 | create_agent.debug_mode = True
33 | Runner.num_iterations = 10
34 | Runner.training_steps = 1000000
35 | Runner.max_steps_per_episode = 100000000
36 | 
37 | OutOfGraphPrioritizedReplayBuffer.replay_capacity = 100000
38 | OutOfGraphPrioritizedReplayBuffer.batch_size = 32
39 | 


--------------------------------------------------------------------------------
/dopamine/labs/environments/minatar/quantile_freeway.gin:
--------------------------------------------------------------------------------
 1 | # Hyperparameters follow Dabney et al. (2017) but we modify as necessary to
 2 | # match those used in Rainbow (Hessel et al., 2018), to ensure apples-to-apples
 3 | # comparison.
 4 | import dopamine.discrete_domains.atari_lib
 5 | import dopamine.discrete_domains.run_experiment
 6 | import dopamine.jax.agents.quantile.quantile_agent
 7 | import dopamine.labs.environments.minatar.minatar_env
 8 | import dopamine.replay_memory.prioritized_replay_buffer
 9 | 
10 | JaxQuantileAgent.observation_shape = %minatar_env.FREEWAY_SHAPE
11 | JaxQuantileAgent.observation_dtype = %minatar_env.DTYPE
12 | JaxQuantileAgent.stack_size = 1
13 | JaxQuantileAgent.network = @minatar_env.MinatarQuantileNetwork
14 | JaxQuantileAgent.kappa = 1.0
15 | JaxQuantileAgent.num_atoms = 200
16 | JaxQuantileAgent.gamma = 0.99
17 | JaxQuantileAgent.update_horizon = 3
18 | JaxQuantileAgent.min_replay_history = 1000 # agent steps
19 | JaxQuantileAgent.update_period = 4
20 | JaxQuantileAgent.target_update_period = 1000 # agent steps
21 | JaxQuantileAgent.epsilon_train = 0.01
22 | JaxQuantileAgent.epsilon_eval = 0.001
23 | JaxQuantileAgent.replay_scheme = 'prioritized'
24 | JaxQuantileAgent.optimizer = 'adam'
25 | create_optimizer.learning_rate = 0.00025
26 | create_optimizer.eps = 3.125e-4
27 | 
28 | create_minatar_env.game_name  = 'freeway'
29 | TrainRunner.create_environment_fn =  @minatar_env.create_minatar_env
30 | create_runner.schedule = 'continuous_train'
31 | create_agent.agent_name = 'jax_quantile'
32 | create_agent.debug_mode = True
33 | Runner.num_iterations = 10
34 | Runner.training_steps = 1000000
35 | Runner.max_steps_per_episode = 100000000
36 | 
37 | OutOfGraphPrioritizedReplayBuffer.replay_capacity = 100000
38 | OutOfGraphPrioritizedReplayBuffer.batch_size = 32
39 | 


--------------------------------------------------------------------------------
/dopamine/labs/environments/minatar/quantile_seaquest.gin:
--------------------------------------------------------------------------------
 1 | # Hyperparameters follow Dabney et al. (2017) but we modify as necessary to
 2 | # match those used in Rainbow (Hessel et al., 2018), to ensure apples-to-apples
 3 | # comparison.
 4 | import dopamine.discrete_domains.atari_lib
 5 | import dopamine.discrete_domains.run_experiment
 6 | import dopamine.labs.environments.minatar.minatar_env
 7 | import dopamine.jax.agents.quantile.quantile_agent
 8 | import dopamine.replay_memory.prioritized_replay_buffer
 9 | 
10 | JaxQuantileAgent.observation_shape = %minatar_env.SEAQUEST_SHAPE
11 | JaxQuantileAgent.observation_dtype = %minatar_env.DTYPE
12 | JaxQuantileAgent.stack_size = 1
13 | JaxQuantileAgent.network = @minatar_env.MinatarQuantileNetwork
14 | JaxQuantileAgent.kappa = 1.0
15 | JaxQuantileAgent.num_atoms = 200
16 | JaxQuantileAgent.gamma = 0.99
17 | JaxQuantileAgent.update_horizon = 3
18 | JaxQuantileAgent.min_replay_history = 1000 # agent steps
19 | JaxQuantileAgent.update_period = 4
20 | JaxQuantileAgent.target_update_period = 1000 # agent steps
21 | JaxQuantileAgent.epsilon_train = 0.01
22 | JaxQuantileAgent.epsilon_eval = 0.001
23 | JaxQuantileAgent.replay_scheme = 'prioritized'
24 | JaxQuantileAgent.optimizer = 'adam'
25 | create_optimizer.learning_rate = 0.00025
26 | create_optimizer.eps = 3.125e-4
27 | 
28 | create_minatar_env.game_name  = 'seaquest'
29 | TrainRunner.create_environment_fn =  @minatar_env.create_minatar_env
30 | create_runner.schedule = 'continuous_train'
31 | create_agent.agent_name = 'jax_quantile'
32 | create_agent.debug_mode = True
33 | Runner.num_iterations = 10
34 | Runner.training_steps = 1000000
35 | Runner.max_steps_per_episode = 100000000
36 | 
37 | OutOfGraphPrioritizedReplayBuffer.replay_capacity = 100000
38 | OutOfGraphPrioritizedReplayBuffer.batch_size = 32
39 | 


--------------------------------------------------------------------------------
/dopamine/labs/environments/minatar/quantile_space_invaders.gin:
--------------------------------------------------------------------------------
 1 | # Hyperparameters follow Dabney et al. (2017) but we modify as necessary to
 2 | # match those used in Rainbow (Hessel et al., 2018), to ensure apples-to-apples
 3 | # comparison.
 4 | import dopamine.discrete_domains.atari_lib
 5 | import dopamine.discrete_domains.run_experiment
 6 | import dopamine.jax.agents.quantile.quantile_agent
 7 | import dopamine.labs.environments.minatar.minatar_env
 8 | import dopamine.replay_memory.prioritized_replay_buffer
 9 | 
10 | JaxQuantileAgent.observation_shape = %minatar_env.SPACE_INVADERS_SHAPE
11 | JaxQuantileAgent.observation_dtype = %minatar_env.DTYPE
12 | JaxQuantileAgent.stack_size = 1
13 | JaxQuantileAgent.network = @minatar_env.MinatarQuantileNetwork
14 | JaxQuantileAgent.kappa = 1.0
15 | JaxQuantileAgent.num_atoms = 200
16 | JaxQuantileAgent.gamma = 0.99
17 | JaxQuantileAgent.update_horizon = 3
18 | JaxQuantileAgent.min_replay_history = 1000 # agent steps
19 | JaxQuantileAgent.update_period = 4
20 | JaxQuantileAgent.target_update_period = 1000 # agent steps
21 | JaxQuantileAgent.epsilon_train = 0.01
22 | JaxQuantileAgent.epsilon_eval = 0.001
23 | JaxQuantileAgent.replay_scheme = 'prioritized'
24 | JaxQuantileAgent.optimizer = 'adam'
25 | create_optimizer.learning_rate = 0.00025
26 | create_optimizer.eps = 3.125e-4
27 | 
28 | create_minatar_env.game_name  = 'space_invaders'
29 | TrainRunner.create_environment_fn =  @minatar_env.create_minatar_env
30 | create_runner.schedule = 'continuous_train'
31 | create_agent.agent_name = 'jax_quantile'
32 | create_agent.debug_mode = True
33 | Runner.num_iterations = 10
34 | Runner.training_steps = 1000000
35 | Runner.max_steps_per_episode = 100000000
36 | 
37 | OutOfGraphPrioritizedReplayBuffer.replay_capacity = 100000
38 | OutOfGraphPrioritizedReplayBuffer.batch_size = 32
39 | 


--------------------------------------------------------------------------------
/dopamine/labs/environments/minatar/rainbow_asterix.gin:
--------------------------------------------------------------------------------
 1 | # Hyperparameters for a simple Rainbow-style Cartpole agent. The
 2 | # hyperparameters chosen achieve reasonable performance.
 3 | import dopamine.discrete_domains.gym_lib
 4 | import dopamine.discrete_domains.run_experiment
 5 | import dopamine.jax.agents.rainbow.rainbow_agent
 6 | import dopamine.jax.networks
 7 | import dopamine.labs.environments.minatar.minatar_env
 8 | import dopamine.replay_memory.prioritized_replay_buffer
 9 | 
10 | JaxRainbowAgent.observation_shape = %minatar_env.ASTERIX_SHAPE
11 | JaxRainbowAgent.observation_dtype = %minatar_env.DTYPE
12 | JaxRainbowAgent.stack_size = 1
13 | JaxRainbowAgent.network = @minatar_env.MinatarRainbowNetwork
14 | JaxRainbowAgent.num_atoms = 51
15 | JaxRainbowAgent.vmax = 100.
16 | JaxRainbowAgent.gamma = 0.99
17 | JaxRainbowAgent.update_horizon = 3
18 | JaxRainbowAgent.min_replay_history = 1000
19 | JaxRainbowAgent.update_period = 4
20 | JaxRainbowAgent.target_update_period = 1000
21 | JaxRainbowAgent.replay_scheme = 'prioritized'
22 | create_optimizer.learning_rate = 0.00025
23 | create_optimizer.eps = 3.125e-4
24 | 
25 | create_minatar_env.game_name  = 'asterix'
26 | TrainRunner.create_environment_fn =  @minatar_env.create_minatar_env
27 | create_runner.schedule = 'continuous_train'
28 | create_agent.agent_name = 'jax_rainbow'
29 | create_agent.debug_mode = True
30 | Runner.num_iterations = 10
31 | Runner.training_steps = 1000000
32 | Runner.max_steps_per_episode = 100000000
33 | 
34 | OutOfGraphPrioritizedReplayBuffer.replay_capacity = 100000
35 | OutOfGraphPrioritizedReplayBuffer.batch_size = 32
36 | 


--------------------------------------------------------------------------------
/dopamine/labs/environments/minatar/rainbow_breakout.gin:
--------------------------------------------------------------------------------
 1 | # Hyperparameters for a simple Rainbow-style Cartpole agent. The
 2 | # hyperparameters chosen achieve reasonable performance.
 3 | import dopamine.discrete_domains.gym_lib
 4 | import dopamine.discrete_domains.run_experiment
 5 | import dopamine.jax.agents.rainbow.rainbow_agent
 6 | import dopamine.jax.networks
 7 | import dopamine.labs.environments.minatar.minatar_env
 8 | import dopamine.replay_memory.prioritized_replay_buffer
 9 | 
10 | JaxRainbowAgent.observation_shape = %minatar_env.BREAKOUT_SHAPE
11 | JaxRainbowAgent.observation_dtype = %minatar_env.DTYPE
12 | JaxRainbowAgent.stack_size = 1
13 | JaxRainbowAgent.network = @minatar_env.MinatarRainbowNetwork
14 | JaxRainbowAgent.num_atoms = 51
15 | JaxRainbowAgent.vmax = 100.
16 | JaxRainbowAgent.gamma = 0.99
17 | JaxRainbowAgent.update_horizon = 3
18 | JaxRainbowAgent.min_replay_history = 1000
19 | JaxRainbowAgent.update_period = 4
20 | JaxRainbowAgent.target_update_period = 1000
21 | JaxRainbowAgent.replay_scheme = 'prioritized'
22 | create_optimizer.learning_rate = 0.00025
23 | create_optimizer.eps = 3.125e-4
24 | 
25 | create_minatar_env.game_name  = 'breakout'
26 | TrainRunner.create_environment_fn =  @minatar_env.create_minatar_env
27 | create_runner.schedule = 'continuous_train'
28 | create_agent.agent_name = 'jax_rainbow'
29 | create_agent.debug_mode = True
30 | Runner.num_iterations = 10
31 | Runner.training_steps = 1000000
32 | Runner.max_steps_per_episode = 100000000
33 | 
34 | OutOfGraphPrioritizedReplayBuffer.replay_capacity = 100000
35 | OutOfGraphPrioritizedReplayBuffer.batch_size = 32
36 | 


--------------------------------------------------------------------------------
/dopamine/labs/environments/minatar/rainbow_freeway.gin:
--------------------------------------------------------------------------------
 1 | # Hyperparameters for a simple Rainbow-style Cartpole agent. The
 2 | # hyperparameters chosen achieve reasonable performance.
 3 | import dopamine.discrete_domains.gym_lib
 4 | import dopamine.discrete_domains.run_experiment
 5 | import dopamine.jax.agents.rainbow.rainbow_agent
 6 | import dopamine.jax.networks
 7 | import dopamine.labs.environments.minatar.minatar_env
 8 | import dopamine.replay_memory.prioritized_replay_buffer
 9 | 
10 | JaxRainbowAgent.observation_shape = %minatar_env.FREEWAY_SHAPE
11 | JaxRainbowAgent.observation_dtype = %minatar_env.DTYPE
12 | JaxRainbowAgent.stack_size = 1
13 | JaxRainbowAgent.network = @minatar_env.MinatarRainbowNetwork
14 | JaxRainbowAgent.num_atoms = 51
15 | JaxRainbowAgent.vmax = 100.
16 | JaxRainbowAgent.gamma = 0.99
17 | JaxRainbowAgent.update_horizon = 3
18 | JaxRainbowAgent.min_replay_history = 1000
19 | JaxRainbowAgent.update_period = 4
20 | JaxRainbowAgent.target_update_period = 1000
21 | JaxRainbowAgent.replay_scheme = 'prioritized'
22 | create_optimizer.learning_rate = 0.00025
23 | create_optimizer.eps = 3.125e-4
24 | 
25 | create_minatar_env.game_name  = 'freeway'
26 | TrainRunner.create_environment_fn =  @minatar_env.create_minatar_env
27 | create_runner.schedule = 'continuous_train'
28 | create_agent.agent_name = 'jax_rainbow'
29 | create_agent.debug_mode = True
30 | Runner.num_iterations = 10
31 | Runner.training_steps = 1000000
32 | Runner.max_steps_per_episode = 100000000
33 | 
34 | OutOfGraphPrioritizedReplayBuffer.replay_capacity = 100000
35 | OutOfGraphPrioritizedReplayBuffer.batch_size = 32
36 | 


--------------------------------------------------------------------------------
/dopamine/labs/environments/minatar/rainbow_seaquest.gin:
--------------------------------------------------------------------------------
 1 | # Hyperparameters for a simple Rainbow-style Cartpole agent. The
 2 | # hyperparameters chosen achieve reasonable performance.
 3 | import dopamine.discrete_domains.gym_lib
 4 | import dopamine.discrete_domains.run_experiment
 5 | import dopamine.jax.agents.rainbow.rainbow_agent
 6 | import dopamine.jax.networks
 7 | import dopamine.labs.environments.minatar.minatar_env
 8 | import dopamine.replay_memory.prioritized_replay_buffer
 9 | 
10 | JaxRainbowAgent.observation_shape = %minatar_env.SEAQUEST_SHAPE
11 | JaxRainbowAgent.observation_dtype = %minatar_env.DTYPE
12 | JaxRainbowAgent.stack_size = 1
13 | JaxRainbowAgent.network = @minatar_env.MinatarRainbowNetwork
14 | JaxRainbowAgent.num_atoms = 51
15 | JaxRainbowAgent.vmax = 100.
16 | JaxRainbowAgent.gamma = 0.99
17 | JaxRainbowAgent.update_horizon = 3
18 | JaxRainbowAgent.min_replay_history = 1000
19 | JaxRainbowAgent.update_period = 4
20 | JaxRainbowAgent.target_update_period = 1000
21 | JaxRainbowAgent.replay_scheme = 'prioritized'
22 | create_optimizer.learning_rate = 0.00025
23 | create_optimizer.eps = 3.125e-4
24 | 
25 | create_minatar_env.game_name  = 'seaquest'
26 | TrainRunner.create_environment_fn =  @minatar_env.create_minatar_env
27 | create_runner.schedule = 'continuous_train'
28 | create_agent.agent_name = 'jax_rainbow'
29 | create_agent.debug_mode = True
30 | Runner.num_iterations = 10
31 | Runner.training_steps = 1000000
32 | Runner.max_steps_per_episode = 100000000
33 | 
34 | OutOfGraphPrioritizedReplayBuffer.replay_capacity = 100000
35 | OutOfGraphPrioritizedReplayBuffer.batch_size = 32
36 | 


--------------------------------------------------------------------------------
/dopamine/labs/environments/minatar/rainbow_space_invaders.gin:
--------------------------------------------------------------------------------
 1 | # Hyperparameters for a simple Rainbow-style Cartpole agent. The
 2 | # hyperparameters chosen achieve reasonable performance.
 3 | import dopamine.discrete_domains.gym_lib
 4 | import dopamine.discrete_domains.run_experiment
 5 | import dopamine.jax.agents.rainbow.rainbow_agent
 6 | import dopamine.jax.networks
 7 | import dopamine.labs.environments.minatar.minatar_env
 8 | import dopamine.replay_memory.prioritized_replay_buffer
 9 | 
10 | JaxRainbowAgent.observation_shape = %minatar_env.SPACE_INVADERS_SHAPE
11 | JaxRainbowAgent.observation_dtype = %minatar_env.DTYPE
12 | JaxRainbowAgent.stack_size = 1
13 | JaxRainbowAgent.network = @minatar_env.MinatarRainbowNetwork
14 | JaxRainbowAgent.num_atoms = 51
15 | JaxRainbowAgent.vmax = 100.
16 | JaxRainbowAgent.gamma = 0.99
17 | JaxRainbowAgent.update_horizon = 3
18 | JaxRainbowAgent.min_replay_history = 1000
19 | JaxRainbowAgent.update_period = 4
20 | JaxRainbowAgent.target_update_period = 1000
21 | JaxRainbowAgent.replay_scheme = 'prioritized'
22 | create_optimizer.learning_rate = 0.00025
23 | create_optimizer.eps = 3.125e-4
24 | 
25 | create_minatar_env.game_name  = 'space_invaders'
26 | TrainRunner.create_environment_fn =  @minatar_env.create_minatar_env
27 | create_runner.schedule = 'continuous_train'
28 | create_agent.agent_name = 'jax_rainbow'
29 | create_agent.debug_mode = True
30 | Runner.num_iterations = 10
31 | Runner.training_steps = 1000000
32 | Runner.max_steps_per_episode = 100000000
33 | 
34 | OutOfGraphPrioritizedReplayBuffer.replay_capacity = 100000
35 | OutOfGraphPrioritizedReplayBuffer.batch_size = 32
36 | 


--------------------------------------------------------------------------------
/dopamine/labs/tandem_dqn/README.md:
--------------------------------------------------------------------------------
1 | # The Difficulty of Passive Learning in Deep Reinforcement Learning
2 | 
3 | This is the Dopamine-based code accompanying the paper listed above.
4 | Although this code supports running classic control, MinAtar, and ALE
5 | environments, it was only used to run the classic control and MinAtar
6 | environments in the paper.
7 | 
8 | See `run.sh` for an example of how to run it.
9 | 


--------------------------------------------------------------------------------
/dopamine/labs/tandem_dqn/requirements.txt:
--------------------------------------------------------------------------------
1 | absl-py
2 | atari-py
3 | dopamine-rl
4 | gin-config
5 | gym
6 | numpy
7 | tensorflow
8 | 


--------------------------------------------------------------------------------
/dopamine/labs/tandem_dqn/run.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # Note that to run this on the classic control and ALE environments you need to
 3 | # obtain the gin files for Dopamine JAX agents:
 4 | # github.com/google/dopamine/tree/master/dopamine/jax/agents/dqn/configs
 5 | set -e
 6 | set -x
 7 | 
 8 | virtualenv -p python3 .
 9 | source ./bin/activate
10 | 
11 | cd ..
12 | pip install -r tandem_dqn/requirements.txt
13 | python3 -m tandem_dqn.train \
14 |   --base_dir=/tmp/tandem_dqn
15 | 


--------------------------------------------------------------------------------
/dopamine/labs/tandem_dqn/train.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2021 The Tandem DQN authors.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #      http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | """Binary entry-point for Tandem RL experiments."""
16 | 
17 | from absl import app
18 | from absl import flags
19 | from absl import logging
20 | 
21 | from dopamine.discrete_domains import run_experiment as base_run_experiment
22 | from dopamine.labs.tandem_dqn import run_experiment
23 | import tensorflow as tf
24 | 
25 | 
26 | 
27 | flags.DEFINE_string('base_dir', None,
28 |                     'Base directory to host all required sub-directories.')
29 | flags.DEFINE_multi_string(
30 |     'gin_files', [], 'List of paths to gin configuration files (e.g.'
31 |     '"dopamine/agents/dqn/dqn.gin").')
32 | flags.DEFINE_multi_string(
33 |     'gin_bindings', [],
34 |     'Gin bindings to override the values set in the config files '
35 |     '(e.g. "DQNAgent.epsilon_train=0.1",'
36 |     '      "create_environment.game_name="Pong"").')
37 | 
38 | FLAGS = flags.FLAGS
39 | 
40 | 
41 | def main(unused_argv):
42 |   """Main method.
43 | 
44 |   Args:
45 |     unused_argv: Arguments (unused).
46 |   """
47 |   logging.set_verbosity(logging.INFO)
48 |   tf.compat.v1.disable_v2_behavior()
49 | 
50 |   base_dir = FLAGS.base_dir
51 |   gin_files = FLAGS.gin_files
52 |   gin_bindings = FLAGS.gin_bindings
53 |   base_run_experiment.load_gin_configs(gin_files, gin_bindings)
54 |   runner = run_experiment.TandemRunner(
55 |       base_dir, run_experiment.create_tandem_agents_and_checkpoints)
56 |   runner.run_experiment()
57 | 
58 | 
59 | if __name__ == '__main__':
60 |   flags.mark_flag_as_required('base_dir')
61 |   app.run(main)
62 | 


--------------------------------------------------------------------------------
/dopamine/replay_memory/__init__.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2018 The Dopamine Authors.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #      http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | 


--------------------------------------------------------------------------------
/dopamine/utils/__init__.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2018 The Dopamine Authors.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #      http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | 


--------------------------------------------------------------------------------
/dopamine/utils/test_utils.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2018 The Dopamine Authors.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #      http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | """Common testing utilities shared across agents."""
16 | 
17 | from __future__ import absolute_import
18 | from __future__ import division
19 | from __future__ import print_function
20 | 
21 | 
22 | 
23 | import mock
24 | import tensorflow as tf
25 | 
26 | 
27 | class MockReplayBuffer(object):
28 |   """Mock ReplayBuffer to verify the way the agent interacts with it."""
29 | 
30 |   def __init__(self, is_jax=False):
31 |     if is_jax:
32 |       self.add = mock.Mock()
33 |       self.add_count = 0
34 |       self.sum_tree = mock.Mock()
35 |     else:
36 |       with tf.compat.v1.variable_scope(
37 |           'MockReplayBuffer', reuse=tf.compat.v1.AUTO_REUSE):
38 |         self.add = mock.Mock()
39 |         self.memory = mock.Mock()
40 |         self.memory.add_count = 0
41 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | absl-py>=0.9.0
 2 | astunparse>=1.6.3
 3 | atari-py>=0.2.6
 4 | cachetools>=4.1.1
 5 | certifi>=2020.6.20
 6 | chardet>=3.0.4
 7 | cloudpickle>=1.3.0
 8 | cycler>=0.10.0
 9 | flax>=0.3.3
10 | future>=0.18.2
11 | gast>=0.3.3
12 | gin-config>=0.3.0
13 | google-auth>=1.19.2
14 | google-auth-oauthlib>=0.4.1
15 | google-pasta>=0.2.0
16 | grpcio>=1.30.0
17 | gym>=0.17.2
18 | h5py>=2.10.0
19 | idna>=2.10
20 | jax>=0.2.12
21 | jaxlib>=0.1.65
22 | Keras-Preprocessing>=1.1.2
23 | kiwisolver>=1.2.0
24 | Markdown>=3.2.2
25 | matplotlib>=3.3.0
26 | msgpack>=1.0.0
27 | numpy>=1.18.5
28 | oauthlib>=3.1.0
29 | opencv-python>=4.3.0.36
30 | opt-einsum>=3.3.0
31 | pandas>=1.0.5
32 | Pillow>=7.2.0
33 | protobuf>=3.12.2
34 | pyasn1>=0.4.8
35 | pyasn1-modules>=0.2.8
36 | pygame>=1.9.6
37 | pyglet>=1.5.0
38 | pyparsing>=2.4.7
39 | python-dateutil>=2.8.1
40 | pytz>=2020.1
41 | requests>=2.24.0
42 | requests-oauthlib>=1.3.0
43 | rsa>=4.6
44 | scipy>=1.4.1
45 | six>=1.15.0
46 | setuptools>=49.2.01
47 | tensorboard
48 | tensorboard-plugin-wit
49 | tensorflow
50 | tensorflow-estimator
51 | tensorflow-probability>=0.13.0
52 | termcolor>=1.1.0
53 | tf-slim>=1.1.0
54 | urllib3>=1.25.10
55 | Werkzeug>=1.0.1
56 | wrapt>=1.12.1
57 | 


--------------------------------------------------------------------------------
/tests/dopamine/atari_init_test.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2018 The Dopamine Authors.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #      http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | """A simple test for validating that the Atari env initializes."""
16 | 
17 | import datetime
18 | import os
19 | import shutil
20 | 
21 | 
22 | 
23 | from absl import flags
24 | from dopamine.discrete_domains import train
25 | import tensorflow as tf
26 | 
27 | 
28 | FLAGS = flags.FLAGS
29 | 
30 | 
31 | class AtariInitTest(tf.test.TestCase):
32 | 
33 |   def setUp(self):
34 |     super(AtariInitTest, self).setUp()
35 |     FLAGS.base_dir = os.path.join(
36 |         '/tmp/dopamine_tests',
37 |         datetime.datetime.utcnow().strftime('run_%Y_%m_%d_%H_%M_%S'))
38 |     FLAGS.gin_files = ['dopamine/agents/dqn/configs/dqn.gin']
39 |     # `num_iterations` set to zero to prevent runner execution.
40 |     FLAGS.gin_bindings = [
41 |         'Runner.num_iterations=0',
42 |         'WrappedReplayBuffer.replay_capacity = 100'  # To prevent OOM.
43 |     ]
44 |     FLAGS.alsologtostderr = True
45 | 
46 |   def test_atari_init(self):
47 |     """Tests that a DQN agent is initialized."""
48 |     train.main([])
49 |     shutil.rmtree(FLAGS.base_dir)
50 | 
51 | 
52 | if __name__ == '__main__':
53 |   tf.compat.v1.disable_v2_behavior()
54 |   tf.test.main()
55 | 


--------------------------------------------------------------------------------
/tests/dopamine/discrete_domains/gym_lib_test.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2018 The Dopamine Authors.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #      http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | """Tests for dopamine.discrete_domains.gym_lib."""
16 | 
17 | from __future__ import absolute_import
18 | from __future__ import division
19 | from __future__ import print_function
20 | 
21 | 
22 | 
23 | from dopamine.discrete_domains import gym_lib
24 | import tensorflow as tf
25 | 
26 | 
27 | class MockGymEnvironment(object):
28 |   """Mock environment for testing."""
29 | 
30 |   def __init__(self):
31 |     self.observation_space = 'observation_space'
32 |     self.action_space = 'action_space'
33 |     self.reward_range = 'reward_range'
34 |     self.metadata = 'metadata'
35 | 
36 |   def reset(self):
37 |     return 'reset'
38 | 
39 |   def step(self, unused_action):
40 |     return 'obs', 'rew', False, {}
41 | 
42 | 
43 | class GymPreprocessingTest(tf.test.TestCase):
44 | 
45 |   def testAll(self):
46 |     env = gym_lib.GymPreprocessing(MockGymEnvironment())
47 |     self.assertEqual('observation_space', env.observation_space)
48 |     self.assertEqual('action_space', env.action_space)
49 |     self.assertEqual('reward_range', env.reward_range)
50 |     self.assertEqual('metadata', env.metadata)
51 |     self.assertEqual('reset', env.reset())
52 |     self.assertAllEqual(['obs', 'rew', False, {}], env.step(0))
53 | 
54 | 
55 | if __name__ == '__main__':
56 |   tf.compat.v1.disable_v2_behavior()
57 |   tf.test.main()
58 | 


--------------------------------------------------------------------------------
/tests/dopamine/utils/agent_visualizer_test.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2019 The Dopamine Authors.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #      http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | """Tests for dopamine.utils.agent_visualizer."""
16 | from __future__ import absolute_import
17 | from __future__ import division
18 | from __future__ import print_function
19 | 
20 | import os
21 | import shutil
22 | 
23 | 
24 | 
25 | from absl import flags
26 | from dopamine.utils.agent_visualizer import AgentVisualizer
27 | from dopamine.utils.line_plotter import LinePlotter
28 | import numpy as np
29 | from PIL import Image
30 | import tensorflow as tf
31 | 
32 | 
33 | FLAGS = flags.FLAGS
34 | 
35 | 
36 | class AgentVisualizerTest(tf.test.TestCase):
37 | 
38 |   def setUp(self):
39 |     super(AgentVisualizerTest, self).setUp()
40 |     self._test_subdir = os.path.join('/tmp/dopamine_tests', 'agent_visualizer')
41 |     shutil.rmtree(self._test_subdir, ignore_errors=True)
42 |     os.makedirs(self._test_subdir)
43 | 
44 |   def test_agent_visualizer_save_frame(self):
45 |     parameter_dict = LinePlotter._defaults.copy()
46 |     parameter_dict['get_line_data_fn'] = lambda: [[1, 2, 3]]
47 |     plotter = LinePlotter(parameter_dict=parameter_dict)
48 | 
49 |     agent_visualizer = AgentVisualizer(self._test_subdir, [plotter])
50 |     agent_visualizer.save_frame()
51 | 
52 |     frame_filename = os.path.join(self._test_subdir, 'frame_000000.png')
53 |     self.assertTrue(tf.io.gfile.exists(frame_filename))
54 | 
55 |     im = Image.open(frame_filename)
56 |     im_arr = np.array(im)
57 |     self.assertTrue(np.array_equal(im_arr, agent_visualizer.record_frame))
58 | 
59 | if __name__ == '__main__':
60 |   tf.compat.v1.disable_v2_behavior()
61 |   tf.test.main()
62 | 


--------------------------------------------------------------------------------