├── .gitignore ├── LICENSE ├── Neurips2022_rebuttal.ipynb ├── README.md ├── docker ├── README.md ├── atari │ └── Dockerfile ├── core │ └── Dockerfile └── mujoco │ └── Dockerfile ├── docs ├── README.md ├── agents.md ├── api_docs │ └── python │ │ ├── _toc.yaml │ │ ├── dopamine.md │ │ ├── dopamine │ │ ├── _api_cache.json │ │ ├── agents.md │ │ ├── agents │ │ │ ├── dqn.md │ │ │ ├── dqn │ │ │ │ ├── dqn_agent.md │ │ │ │ └── dqn_agent │ │ │ │ │ └── DQNAgent.md │ │ │ ├── implicit_quantile.md │ │ │ ├── implicit_quantile │ │ │ │ ├── implicit_quantile_agent.md │ │ │ │ └── implicit_quantile_agent │ │ │ │ │ └── ImplicitQuantileAgent.md │ │ │ ├── rainbow.md │ │ │ └── rainbow │ │ │ │ ├── rainbow_agent.md │ │ │ │ └── rainbow_agent │ │ │ │ ├── RainbowAgent.md │ │ │ │ └── project_distribution.md │ │ ├── colab.md │ │ ├── colab │ │ │ ├── utils.md │ │ │ └── utils │ │ │ │ ├── get_latest_file.md │ │ │ │ ├── get_latest_iteration.md │ │ │ │ ├── load_baselines.md │ │ │ │ ├── load_statistics.md │ │ │ │ ├── read_experiment.md │ │ │ │ └── summarize_data.md │ │ ├── discrete_domains.md │ │ ├── discrete_domains │ │ │ ├── atari_lib.md │ │ │ ├── atari_lib │ │ │ │ ├── AtariPreprocessing.md │ │ │ │ └── create_atari_environment.md │ │ │ ├── checkpointer.md │ │ │ ├── checkpointer │ │ │ │ └── Checkpointer.md │ │ │ ├── gym_lib.md │ │ │ ├── gym_lib │ │ │ │ ├── GymPreprocessing.md │ │ │ │ └── create_gym_environment.md │ │ │ ├── iteration_statistics.md │ │ │ ├── iteration_statistics │ │ │ │ └── IterationStatistics.md │ │ │ ├── logger.md │ │ │ ├── logger │ │ │ │ └── Logger.md │ │ │ ├── run_experiment.md │ │ │ ├── run_experiment │ │ │ │ ├── Runner.md │ │ │ │ ├── TrainRunner.md │ │ │ │ ├── create_agent.md │ │ │ │ └── create_runner.md │ │ │ └── train.md │ │ ├── jax.md │ │ ├── jax │ │ │ ├── agents.md │ │ │ ├── agents │ │ │ │ ├── dqn.md │ │ │ │ ├── dqn │ │ │ │ │ └── dqn_agent.md │ │ │ │ ├── implicit_quantile.md │ │ │ │ ├── implicit_quantile │ │ │ │ │ └── implicit_quantile_agent.md │ │ │ │ ├── quantile.md │ │ │ │ ├── quantile │ │ │ │ │ └── quantile_agent.md │ │ │ │ ├── rainbow.md │ │ │ │ └── rainbow │ │ │ │ │ ├── rainbow_agent.md │ │ │ │ │ └── rainbow_agent │ │ │ │ │ └── project_distribution.md │ │ │ └── networks.md │ │ ├── replay_memory.md │ │ └── replay_memory │ │ │ ├── circular_replay_buffer.md │ │ │ ├── circular_replay_buffer │ │ │ ├── OutOfGraphReplayBuffer.md │ │ │ └── WrappedReplayBuffer.md │ │ │ ├── prioritized_replay_buffer.md │ │ │ ├── prioritized_replay_buffer │ │ │ ├── OutOfGraphPrioritizedReplayBuffer.md │ │ │ └── WrappedPrioritizedReplayBuffer.md │ │ │ └── sum_tree.md │ │ └── index.md └── changelist.md ├── dopamine ├── __init__.py ├── agents │ ├── __init__.py │ ├── dqn │ │ ├── __init__.py │ │ ├── configs │ │ │ ├── dqn.gin │ │ │ ├── dqn_acrobot.gin │ │ │ ├── dqn_cartpole.gin │ │ │ ├── dqn_icml.gin │ │ │ ├── dqn_lunarlander.gin │ │ │ ├── dqn_mountaincar.gin │ │ │ ├── dqn_nature.gin │ │ │ └── dqn_profiling.gin │ │ └── dqn_agent.py │ ├── implicit_quantile │ │ ├── __init__.py │ │ ├── configs │ │ │ ├── implicit_quantile.gin │ │ │ ├── implicit_quantile_icml.gin │ │ │ └── implicit_quantile_profiling.gin │ │ └── implicit_quantile_agent.py │ └── rainbow │ │ ├── __init__.py │ │ ├── configs │ │ ├── c51.gin │ │ ├── c51_acrobot.gin │ │ ├── c51_cartpole.gin │ │ ├── c51_icml.gin │ │ ├── c51_profiling.gin │ │ ├── rainbow_aaai.gin │ │ ├── rainbow_acrobot.gin │ │ ├── rainbow_cartpole.gin │ │ ├── rainbow_dqnpro.gin │ │ ├── rainbow_original.gin │ │ ├── rainbow_our_first_paper.gin │ │ ├── rainbow_our_second_paper.gin │ │ └── rainbow_profiling.gin │ │ └── rainbow_agent.py ├── colab │ ├── README.md │ ├── __init__.py │ ├── agent_visualizer.ipynb │ ├── agents.ipynb │ ├── cartpole.ipynb │ ├── jax_agent_visualizer.ipynb │ ├── load_statistics.ipynb │ └── utils.py ├── continuous_domains │ ├── __init__.py │ ├── run_experiment.py │ └── train.py ├── discrete_domains │ ├── __init__.py │ ├── atari_lib.py │ ├── checkpointer.py │ ├── gym_lib.py │ ├── iteration_statistics.py │ ├── legacy_networks.py │ ├── logger.py │ ├── run_experiment.py │ └── train.py ├── jax │ ├── README.md │ ├── __init__.py │ ├── agents │ │ ├── __init__.py │ │ ├── dqn │ │ │ ├── __init__.py │ │ │ ├── configs │ │ │ │ ├── dqn.gin │ │ │ │ ├── dqn_acrobot.gin │ │ │ │ ├── dqn_cartpole.gin │ │ │ │ ├── dqn_lunarlander.gin │ │ │ │ ├── dqn_mountaincar.gin │ │ │ │ └── dqn_profiling.gin │ │ │ └── dqn_agent.py │ │ ├── full_rainbow │ │ │ ├── __init__.py │ │ │ ├── configs │ │ │ │ ├── full_rainbow.gin │ │ │ │ └── full_rainbow_profiling.gin │ │ │ └── full_rainbow_agent.py │ │ ├── implicit_quantile │ │ │ ├── __init__.py │ │ │ ├── configs │ │ │ │ ├── implicit_quantile.gin │ │ │ │ └── implicit_quantile_profiling.gin │ │ │ └── implicit_quantile_agent.py │ │ ├── quantile │ │ │ ├── __init__.py │ │ │ ├── configs │ │ │ │ ├── quantile.gin │ │ │ │ └── quantile_profiling.gin │ │ │ └── quantile_agent.py │ │ ├── rainbow │ │ │ ├── __init__.py │ │ │ ├── configs │ │ │ │ ├── c51.gin │ │ │ │ ├── c51_acrobot.gin │ │ │ │ ├── c51_cartpole.gin │ │ │ │ ├── c51_profiling.gin │ │ │ │ ├── rainbow.gin │ │ │ │ ├── rainbow_acrobot.gin │ │ │ │ ├── rainbow_cartpole.gin │ │ │ │ └── rainbow_profiling.gin │ │ │ └── rainbow_agent.py │ │ └── sac │ │ │ ├── __init__.py │ │ │ ├── configs │ │ │ └── sac.gin │ │ │ └── sac_agent.py │ ├── continuous_networks.py │ ├── losses.py │ └── networks.py ├── labs │ ├── __init__.py │ ├── atari_100k │ │ ├── README.md │ │ ├── __init__.py │ │ ├── atari_100k_rainbow_agent.py │ │ ├── configs │ │ │ ├── DER.gin │ │ │ ├── DrQ.gin │ │ │ ├── DrQ_eps.gin │ │ │ └── OTRainbow.gin │ │ ├── eval_run_experiment.py │ │ └── train.py │ ├── environments │ │ ├── __init__.py │ │ └── minatar │ │ │ ├── __init__.py │ │ │ ├── dqn_asterix.gin │ │ │ ├── dqn_breakout.gin │ │ │ ├── dqn_freeway.gin │ │ │ ├── dqn_seaquest.gin │ │ │ ├── dqn_space_invaders.gin │ │ │ ├── minatar_env.py │ │ │ ├── quantile_asterix.gin │ │ │ ├── quantile_breakout.gin │ │ │ ├── quantile_freeway.gin │ │ │ ├── quantile_seaquest.gin │ │ │ ├── quantile_space_invaders.gin │ │ │ ├── rainbow_asterix.gin │ │ │ ├── rainbow_breakout.gin │ │ │ ├── rainbow_freeway.gin │ │ │ ├── rainbow_seaquest.gin │ │ │ └── rainbow_space_invaders.gin │ ├── sac_from_pixels │ │ ├── continuous_networks.py │ │ ├── deepmind_control_lib.py │ │ └── sac_pixels.gin │ └── tandem_dqn │ │ ├── README.md │ │ ├── requirements.txt │ │ ├── run.sh │ │ ├── run_experiment.py │ │ ├── tandem_dqn_agent.py │ │ └── train.py ├── replay_memory │ ├── __init__.py │ ├── circular_replay_buffer.py │ ├── prioritized_replay_buffer.py │ └── sum_tree.py └── utils │ ├── __init__.py │ ├── agent_visualizer.py │ ├── atari_plotter.py │ ├── bar_plotter.py │ ├── example_viz.py │ ├── example_viz_lib.py │ ├── line_plotter.py │ ├── plotter.py │ └── test_utils.py ├── extract_reward.py ├── plot_learning_curves.ipynb ├── requirements.txt ├── run_agents.ipynb ├── setup.py └── tests └── dopamine ├── agents ├── dqn │ └── dqn_agent_test.py ├── implicit_quantile │ └── implicit_quantile_agent_test.py └── rainbow │ └── rainbow_agent_test.py ├── atari_init_test.py ├── continuous_domains └── run_experiment_test.py ├── discrete_domains ├── atari_lib_test.py ├── checkpointer_test.py ├── gym_lib_test.py ├── iteration_statistics_test.py ├── logger_test.py └── run_experiment_test.py ├── jax ├── agents │ ├── dqn │ │ └── dqn_agent_test.py │ ├── full_rainbow │ │ └── full_rainbow_agent_test.py │ ├── implicit_quantile │ │ └── implicit_quantile_agent_test.py │ ├── quantile │ │ └── quantile_agent_test.py │ ├── rainbow │ │ └── rainbow_agent_test.py │ └── sac │ │ └── sac_agent_test.py ├── continuous_networks_test.py ├── losses_test.py └── networks_test.py ├── labs ├── atari_100k │ └── train_test.py └── sac_from_pixels │ ├── continuous_networks_test.py │ └── deepmind_control_lib_test.py ├── replay_memory ├── circular_replay_buffer_test.py ├── prioritized_replay_buffer_test.py └── sum_tree_test.py ├── tests ├── gin_config_test.py ├── integration_test.py └── train_runner_integration_test.py └── utils └── agent_visualizer_test.py /.gitignore: -------------------------------------------------------------------------------- 1 | tmp 2 | *results* 3 | *DS_* 4 | *images/* 5 | *.ipynb_checkpoints* 6 | -------------------------------------------------------------------------------- /docker/atari/Dockerfile: -------------------------------------------------------------------------------- 1 | # Note: this Dockerfile expects that Atari ROMs retrieved following the 2 | # instructions from atari-py: https://github.com/openai/atari-py#roms. 3 | # It should specify a directory (e.g. ~/roms) that contains ROMS.rar. 4 | # It should be run from the rom directory. 5 | 6 | ARG base_image=dopamine/core 7 | FROM ${base_image} 8 | 9 | # Copy ROMs into the image. 10 | RUN mkdir /root/roms 11 | COPY ./Roms.rar /root/roms/ 12 | 13 | RUN apt-get install rar unzip -y 14 | RUN rar x /root/roms/Roms.rar /root/roms/ 15 | 16 | # Install ROMs with ale-py. 17 | RUN pip install atari_py ale-py 18 | RUN unzip /root/roms/ROMS.zip -d /root/roms 19 | RUN python -m atari_py.import_roms /root/roms 20 | RUN ale-import-roms /root/roms/ROMS 21 | -------------------------------------------------------------------------------- /docker/core/Dockerfile: -------------------------------------------------------------------------------- 1 | # If you want to use a different version of CUDA, view the available 2 | # images here: https://hub.docker.com/r/nvidia/cuda 3 | # Note: 4 | # - Jax currently supports CUDA versions up to 11.3. 5 | # - Tensorflow required CUDA versions after 11.2. 6 | ARG cuda_docker_tag="11.2.2-cudnn8-devel-ubuntu20.04" 7 | FROM nvidia/cuda:${cuda_docker_tag} 8 | 9 | COPY . /root/dopamine/ 10 | 11 | RUN apt-get update 12 | # tzdata is required below. To avoid hanging, install it first. 13 | RUN DEBIAN_FRONTEND="noninteractive" apt-get install tzdata -y 14 | RUN apt-get install git wget libgl1-mesa-glx -y 15 | 16 | # Install python3.8. 17 | RUN apt-get install software-properties-common -y 18 | RUN add-apt-repository ppa:deadsnakes/ppa -y 19 | RUN apt-get install python3.8 -y 20 | 21 | # Make python3.8 the default python. 22 | RUN rm /usr/bin/python3 23 | RUN ln -s /usr/bin/python3.8 /usr/bin/python3 24 | RUN ln -s /usr/bin/python3.8 /usr/bin/python 25 | RUN apt-get install python3-distutils -y 26 | 27 | # Install pip. 28 | RUN wget https://bootstrap.pypa.io/get-pip.py 29 | RUN python get-pip.py 30 | RUN rm get-pip.py 31 | 32 | # Install Dopamine dependencies. 33 | RUN pip install -r /root/dopamine/requirements.txt 34 | 35 | # Install JAX for GPU, overriding requirements.txt. 36 | RUN pip install --upgrade "jax[cuda111]" -f https://storage.googleapis.com/jax-releases/jax_releases.html 37 | 38 | WORKDIR /root/dopamine 39 | -------------------------------------------------------------------------------- /docker/mujoco/Dockerfile: -------------------------------------------------------------------------------- 1 | ARG base_image=dopamine/core 2 | FROM ${base_image} 3 | 4 | # Create Mujoco subdir. 5 | RUN mkdir /root/.mujoco 6 | COPY mjkey.txt /root/.mujoco/mjkey.txt 7 | 8 | # Prerequisites 9 | RUN apt-get install \ 10 | libosmesa6-dev \ 11 | libgl1-mesa-glx \ 12 | libglfw3 \ 13 | libglew-dev \ 14 | patchelf \ 15 | gcc \ 16 | python3.8-dev \ 17 | unzip -y 18 | 19 | # Download and install mujoco. 20 | RUN wget https://www.roboti.us/download/mujoco200_linux.zip 21 | RUN unzip mujoco200_linux.zip 22 | RUN rm mujoco200_linux.zip 23 | RUN mv mujoco200_linux /root/.mujoco/mujoco200 24 | 25 | # Add LD_LIBRARY_PATH environment variable. 26 | ENV LD_LIBRARY_PATH "/root/.mujoco/mujoco200/bin:${LD_LIBRARY_PATH}" 27 | RUN echo 'export LD_LIBRARY_PATH=/root/.mujoco/mujoco200/bin:${LD_LIBRARY_PATH}' >> /etc/bash.bashrc 28 | 29 | # Finally, install mujoco_py. 30 | RUN pip install mujoco_py 31 | -------------------------------------------------------------------------------- /docs/agents.md: -------------------------------------------------------------------------------- 1 | # DQN And Rainbow 2 | 3 | 4 | In the spirit of these principles, this first version focuses on supporting the 5 | state-of-the-art, single-GPU *Rainbow* agent ([Hessel et al., 2018][rainbow]) 6 | applied to Atari 2600 game-playing ([Bellemare et al., 2013][ale]). 7 | Specifically, our Rainbow agent implements the three components identified as 8 | most important by [Hessel et al.][rainbow]: 9 | 10 | * n-step Bellman updates (see e.g. [Mnih et al., 2016][a3c]) 11 | * Prioritized experience replay ([Schaul et al., 2015][prioritized_replay]) 12 | * Distributional reinforcement learning ([C51; Bellemare et al., 2017][c51]) 13 | 14 | For completeness, we also provide an implementation of DQN ([Mnih et al., 15 | 2015][dqn]). 16 | -------------------------------------------------------------------------------- /docs/api_docs/python/dopamine.md: -------------------------------------------------------------------------------- 1 |
2 | 3 | 4 |
5 | 6 | # Module: dopamine 7 | 8 | 9 | 10 | 11 | 17 | 18 | 19 | ## Modules 20 | 21 | [`agents`](./dopamine/agents.md) module 22 | 23 | [`colab`](./dopamine/colab.md) module 24 | 25 | [`discrete_domains`](./dopamine/discrete_domains.md) module 26 | 27 | [`jax`](./dopamine/jax.md) module 28 | 29 | [`replay_memory`](./dopamine/replay_memory.md) module 30 | -------------------------------------------------------------------------------- /docs/api_docs/python/dopamine/agents.md: -------------------------------------------------------------------------------- 1 |
2 | 3 | 4 |
5 | 6 | # Module: dopamine.agents 7 | 8 | 9 | 10 | 11 | 17 | 18 | 19 | ## Modules 20 | 21 | [`dqn`](../dopamine/agents/dqn.md) module 22 | 23 | [`implicit_quantile`](../dopamine/agents/implicit_quantile.md) module 24 | 25 | [`rainbow`](../dopamine/agents/rainbow.md) module 26 | -------------------------------------------------------------------------------- /docs/api_docs/python/dopamine/agents/dqn.md: -------------------------------------------------------------------------------- 1 |
2 | 3 | 4 |
5 | 6 | # Module: dopamine.agents.dqn 7 | 8 | 9 | 10 | 11 | 17 | 18 | 19 | ## Modules 20 | 21 | [`dqn_agent`](../../dopamine/agents/dqn/dqn_agent.md) module: Compact 22 | implementation of a DQN agent. 23 | -------------------------------------------------------------------------------- /docs/api_docs/python/dopamine/agents/dqn/dqn_agent.md: -------------------------------------------------------------------------------- 1 | description: Compact implementation of a DQN agent. 2 | 3 |
4 | 5 | 6 |
7 | 8 | # Module: dopamine.agents.dqn.dqn_agent 9 | 10 | 11 | 12 | 13 | 19 | 20 | 21 | Compact implementation of a DQN agent. 22 | 23 | ## Classes 24 | 25 | [`class DQNAgent`](../../../dopamine/agents/dqn/dqn_agent/DQNAgent.md): An 26 | implementation of the DQN agent. 27 | -------------------------------------------------------------------------------- /docs/api_docs/python/dopamine/agents/dqn/dqn_agent/DQNAgent.md: -------------------------------------------------------------------------------- 1 | description: An implementation of the DQN agent. 2 | 3 |
4 | 5 | 6 |
7 | 8 | # dopamine.agents.dqn.dqn_agent.DQNAgent 9 | 10 | 11 | 12 | 13 | 19 | 20 | 21 | An implementation of the DQN agent. 22 | 23 | 24 | -------------------------------------------------------------------------------- /docs/api_docs/python/dopamine/agents/implicit_quantile.md: -------------------------------------------------------------------------------- 1 |
2 | 3 | 4 |
5 | 6 | # Module: dopamine.agents.implicit_quantile 7 | 8 | 9 | 10 | 11 | 17 | 18 | 19 | ## Modules 20 | 21 | [`implicit_quantile_agent`](../../dopamine/agents/implicit_quantile/implicit_quantile_agent.md) 22 | module: The implicit quantile networks (IQN) agent. 23 | -------------------------------------------------------------------------------- /docs/api_docs/python/dopamine/agents/implicit_quantile/implicit_quantile_agent.md: -------------------------------------------------------------------------------- 1 | description: The implicit quantile networks (IQN) agent. 2 | 3 |
4 | 5 | 6 |
7 | 8 | # Module: dopamine.agents.implicit_quantile.implicit_quantile_agent 9 | 10 | 11 | 12 | 13 | 19 | 20 | 21 | The implicit quantile networks (IQN) agent. 22 | 23 | The agent follows the description given in "Implicit Quantile Networks for 24 | Distributional RL" (Dabney et. al, 2018). 25 | 26 | ## Classes 27 | 28 | [`class ImplicitQuantileAgent`](../../../dopamine/agents/implicit_quantile/implicit_quantile_agent/ImplicitQuantileAgent.md): 29 | An extension of Rainbow to perform implicit quantile regression. 30 | -------------------------------------------------------------------------------- /docs/api_docs/python/dopamine/agents/implicit_quantile/implicit_quantile_agent/ImplicitQuantileAgent.md: -------------------------------------------------------------------------------- 1 | description: An extension of Rainbow to perform implicit quantile regression. 2 | 3 |
4 | 5 | 6 |
7 | 8 | # dopamine.agents.implicit_quantile.implicit_quantile_agent.ImplicitQuantileAgent 9 | 10 | 11 | 12 | 13 | 19 | 20 | 21 | An extension of Rainbow to perform implicit quantile regression. 22 | 23 | Inherits From: 24 | [`RainbowAgent`](../../../../dopamine/agents/rainbow/rainbow_agent/RainbowAgent.md) 25 | 26 | 27 | -------------------------------------------------------------------------------- /docs/api_docs/python/dopamine/agents/rainbow.md: -------------------------------------------------------------------------------- 1 |
2 | 3 | 4 |
5 | 6 | # Module: dopamine.agents.rainbow 7 | 8 | 9 | 10 | 11 | 17 | 18 | 19 | ## Modules 20 | 21 | [`rainbow_agent`](../../dopamine/agents/rainbow/rainbow_agent.md) module: 22 | Compact implementation of a simplified Rainbow agent. 23 | -------------------------------------------------------------------------------- /docs/api_docs/python/dopamine/agents/rainbow/rainbow_agent.md: -------------------------------------------------------------------------------- 1 | description: Compact implementation of a simplified Rainbow agent. 2 | 3 |
4 | 5 | 6 |
7 | 8 | # Module: dopamine.agents.rainbow.rainbow_agent 9 | 10 | 11 | 12 | 13 | 19 | 20 | 21 | Compact implementation of a simplified Rainbow agent. 22 | 23 | Specifically, we implement the following components from Rainbow: 24 | 25 | * n-step updates; 26 | * prioritized replay; and 27 | * distributional RL. 28 | 29 | These three components were found to significantly impact the performance of the 30 | Atari game-playing agent. 31 | 32 | Furthermore, our implementation does away with some minor hyperparameter 33 | choices. Specifically, we 34 | 35 | * keep the beta exponent fixed at beta=0.5, rather than increase it linearly; 36 | * remove the alpha parameter, which was set to alpha=0.5 throughout the paper. 37 | 38 | Details in "Rainbow: Combining Improvements in Deep Reinforcement Learning" by 39 | Hessel et al. (2018). 40 | 41 | ## Classes 42 | 43 | [`class RainbowAgent`](../../../dopamine/agents/rainbow/rainbow_agent/RainbowAgent.md): 44 | A compact implementation of a simplified Rainbow agent. 45 | 46 | ## Functions 47 | 48 | [`project_distribution(...)`](../../../dopamine/agents/rainbow/rainbow_agent/project_distribution.md): 49 | Projects a batch of (support, weights) onto target_support. 50 | -------------------------------------------------------------------------------- /docs/api_docs/python/dopamine/agents/rainbow/rainbow_agent/RainbowAgent.md: -------------------------------------------------------------------------------- 1 | description: A compact implementation of a simplified Rainbow agent. 2 | 3 |
4 | 5 | 6 |
7 | 8 | # dopamine.agents.rainbow.rainbow_agent.RainbowAgent 9 | 10 | 11 | 12 | 13 | 19 | 20 | 21 | A compact implementation of a simplified Rainbow agent. 22 | 23 | Inherits From: 24 | [`DQNAgent`](../../../../dopamine/agents/dqn/dqn_agent/DQNAgent.md) 25 | 26 | 27 | -------------------------------------------------------------------------------- /docs/api_docs/python/dopamine/colab.md: -------------------------------------------------------------------------------- 1 |
2 | 3 | 4 |
5 | 6 | # Module: dopamine.colab 7 | 8 | 9 | 10 | 11 | 17 | 18 | 19 | ## Modules 20 | 21 | [`utils`](../dopamine/colab/utils.md) module: This provides utilities for 22 | dealing with Dopamine data. 23 | -------------------------------------------------------------------------------- /docs/api_docs/python/dopamine/colab/utils.md: -------------------------------------------------------------------------------- 1 | description: This provides utilities for dealing with Dopamine data. 2 | 3 |
4 | 5 | 6 |
7 | 8 | # Module: dopamine.colab.utils 9 | 10 | 11 | 12 | 13 | 19 | 20 | 21 | This provides utilities for dealing with Dopamine data. 22 | 23 | See: dopamine/common/logger.py . 24 | 25 | ## Functions 26 | 27 | [`get_latest_file(...)`](../../dopamine/colab/utils/get_latest_file.md): Return 28 | the file named 'path_[0-9]*' with the largest such number. 29 | 30 | [`get_latest_iteration(...)`](../../dopamine/colab/utils/get_latest_iteration.md): 31 | Return the largest iteration number corresponding to the given path. 32 | 33 | [`load_baselines(...)`](../../dopamine/colab/utils/load_baselines.md): Reads in 34 | the baseline experimental data from a specified base directory. 35 | 36 | [`load_statistics(...)`](../../dopamine/colab/utils/load_statistics.md): Reads 37 | in a statistics object from log_path. 38 | 39 | [`read_experiment(...)`](../../dopamine/colab/utils/read_experiment.md): Reads 40 | in a set of experimental results from log_path. 41 | 42 | [`summarize_data(...)`](../../dopamine/colab/utils/summarize_data.md): Processes 43 | log data into a per-iteration summary. 44 | -------------------------------------------------------------------------------- /docs/api_docs/python/dopamine/colab/utils/get_latest_file.md: -------------------------------------------------------------------------------- 1 | description: Return the file named 'path_[0-9]*' with the largest such number. 2 | 3 |
4 | 5 | 6 |
7 | 8 | # dopamine.colab.utils.get_latest_file 9 | 10 | 11 | 12 | 13 | 19 | 20 | 21 | Return the file named 'path_[0-9]*' with the largest such number. 22 | 23 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 40 | 43 | 44 |
38 | `path` 39 | 41 | The base path (including directory and base name) to search. 42 |
45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 55 | 56 | 57 |
53 | The latest file (in terms of given numbers). 54 |
58 | -------------------------------------------------------------------------------- /docs/api_docs/python/dopamine/colab/utils/get_latest_iteration.md: -------------------------------------------------------------------------------- 1 | description: Return the largest iteration number corresponding to the given 2 | path. 3 | 4 |
5 | 6 | 7 |
8 | 9 | # dopamine.colab.utils.get_latest_iteration 10 | 11 | 12 | 13 | 14 | 20 | 21 | 22 | Return the largest iteration number corresponding to the given path. 23 | 24 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 41 | 44 | 45 |
39 | `path` 40 | 42 | The base path (including directory and base name) to search. 43 |
46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 56 | 57 | 58 |
54 | The latest iteration number. 55 |
59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 70 | 73 | 74 |
68 | `ValueError` 69 | 71 | if there is not available log data at the given path. 72 |
75 | -------------------------------------------------------------------------------- /docs/api_docs/python/dopamine/colab/utils/load_baselines.md: -------------------------------------------------------------------------------- 1 | description: Reads in the baseline experimental data from a specified base 2 | directory. 3 | 4 |
5 | 6 | 7 |
8 | 9 | # dopamine.colab.utils.load_baselines 10 | 11 | 12 | 13 | 14 | 20 | 21 | 22 | Reads in the baseline experimental data from a specified base directory. 23 | 24 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 41 | 44 | 45 | 48 | 51 | 52 |
39 | `base_dir` 40 | 42 | string, base directory where to read data from. 43 |
46 | `verbose` 47 | 49 | bool, whether to print warning messages. 50 |
53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 63 | 64 | 65 |
61 | A dict containing pandas DataFrames for all available agents and games. 62 |
66 | -------------------------------------------------------------------------------- /docs/api_docs/python/dopamine/colab/utils/summarize_data.md: -------------------------------------------------------------------------------- 1 | description: Processes log data into a per-iteration summary. 2 | 3 |
4 | 5 | 6 |
7 | 8 | # dopamine.colab.utils.summarize_data 9 | 10 | 11 | 12 | 13 | 19 | 20 | 21 | Processes log data into a per-iteration summary. 22 | 23 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 40 | 45 | 46 | 49 | 52 | 53 |
38 | `data` 39 | 41 | Dictionary loaded by load_statistics describing the data. This 42 | dictionary has keys iteration_0, iteration_1, ... describing per-iteration 43 | data. 44 |
47 | `summary_keys` 48 | 50 | List of per-iteration data to be summarized. 51 |
54 | 55 | #### Example: 56 | 57 | data = load_statistics(...) summarize_data(data, ['train_episode_returns', 58 | 'eval_episode_returns']) 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 69 | 70 | 71 |
67 | A dictionary mapping each key in returns_keys to a per-iteration summary. 68 |
72 | -------------------------------------------------------------------------------- /docs/api_docs/python/dopamine/discrete_domains.md: -------------------------------------------------------------------------------- 1 |
2 | 3 | 4 |
5 | 6 | # Module: dopamine.discrete_domains 7 | 8 | 9 | 10 | 11 | 17 | 18 | 19 | ## Modules 20 | 21 | [`atari_lib`](../dopamine/discrete_domains/atari_lib.md) module: Atari-specific 22 | utilities including Atari-specific network architectures. 23 | 24 | [`checkpointer`](../dopamine/discrete_domains/checkpointer.md) module: A 25 | checkpointing mechanism for Dopamine agents. 26 | 27 | [`gym_lib`](../dopamine/discrete_domains/gym_lib.md) module: Gym-specific 28 | (non-Atari) utilities. 29 | 30 | [`iteration_statistics`](../dopamine/discrete_domains/iteration_statistics.md) 31 | module: A class for storing iteration-specific metrics. 32 | 33 | [`logger`](../dopamine/discrete_domains/logger.md) module: A lightweight logging 34 | mechanism for dopamine agents. 35 | 36 | [`run_experiment`](../dopamine/discrete_domains/run_experiment.md) module: 37 | Module defining classes and helper methods for general agents. 38 | 39 | [`train`](../dopamine/discrete_domains/train.md) module: The entry point for 40 | running a Dopamine agent. 41 | -------------------------------------------------------------------------------- /docs/api_docs/python/dopamine/discrete_domains/atari_lib/AtariPreprocessing.md: -------------------------------------------------------------------------------- 1 | description: A class implementing image preprocessing for Atari 2600 agents. 2 | 3 |
4 | 5 | 6 |
7 | 8 | # dopamine.discrete_domains.atari_lib.AtariPreprocessing 9 | 10 | 11 | 12 | 13 | 19 | 20 | 21 | A class implementing image preprocessing for Atari 2600 agents. 22 | 23 | 24 | 25 | Specifically, this provides the following subset from the JAIR paper (Bellemare 26 | et al., 2013) and Nature DQN paper (Mnih et al., 2015): 27 | 28 | * Frame skipping (defaults to 4). 29 | * Terminal signal when a life is lost (off by default). 30 | * Grayscale and max-pooling of the last two frames. 31 | * Downsample the screen to a square image (defaults to 84x84). 32 | 33 | More generally, this class follows the preprocessing guidelines set down in 34 | Machado et al. (2018), "Revisiting the Arcade Learning Environment: Evaluation 35 | Protocols and Open Problems for General Agents". 36 | -------------------------------------------------------------------------------- /docs/api_docs/python/dopamine/discrete_domains/checkpointer.md: -------------------------------------------------------------------------------- 1 | description: A checkpointing mechanism for Dopamine agents. 2 | 3 |
4 | 5 | 6 |
7 | 8 | # Module: dopamine.discrete_domains.checkpointer 9 | 10 | 11 | 12 | 13 | 19 | 20 | 21 | A checkpointing mechanism for Dopamine agents. 22 | 23 | This Checkpointer expects a base directory where checkpoints for different 24 | iterations are stored. Specifically, Checkpointer.save_checkpoint() takes in as 25 | input a dictionary 'data' to be pickled to disk. At each iteration, we write a 26 | file called 'cpkt.#', where # is the iteration number. The Checkpointer also 27 | cleans up old files, maintaining up to the CHECKPOINT_DURATION most recent 28 | iterations. 29 | 30 | The Checkpointer writes a sentinel file to indicate that checkpointing was 31 | globally successful. This means that all other checkpointing activities (saving 32 | the Tensorflow graph, the replay buffer) should be performed *prior* to calling 33 | Checkpointer.save_checkpoint(). This allows the Checkpointer to detect 34 | incomplete checkpoints. 35 | 36 | #### Example 37 | 38 | After running 10 iterations (numbered 0...9) with base_directory='/checkpoint', 39 | the following files will exist: `/checkpoint/cpkt.6 /checkpoint/cpkt.7 40 | /checkpoint/cpkt.8 /checkpoint/cpkt.9 /checkpoint/sentinel_checkpoint_complete.6 41 | /checkpoint/sentinel_checkpoint_complete.7 42 | /checkpoint/sentinel_checkpoint_complete.8 43 | /checkpoint/sentinel_checkpoint_complete.9` 44 | 45 | ## Classes 46 | 47 | [`class Checkpointer`](../../dopamine/discrete_domains/checkpointer/Checkpointer.md): 48 | Class for managing checkpoints for Dopamine agents. 49 | -------------------------------------------------------------------------------- /docs/api_docs/python/dopamine/discrete_domains/checkpointer/Checkpointer.md: -------------------------------------------------------------------------------- 1 | description: Class for managing checkpoints for Dopamine agents. 2 | 3 |
4 | 5 | 6 |
7 | 8 | # dopamine.discrete_domains.checkpointer.Checkpointer 9 | 10 | 11 | 12 | 13 | 19 | 20 | 21 | Class for managing checkpoints for Dopamine agents. 22 | 23 | 24 | -------------------------------------------------------------------------------- /docs/api_docs/python/dopamine/discrete_domains/gym_lib.md: -------------------------------------------------------------------------------- 1 | description: Gym-specific (non-Atari) utilities. 2 | 3 |
4 | 5 | 6 |
7 | 8 | # Module: dopamine.discrete_domains.gym_lib 9 | 10 | 11 | 12 | 13 | 19 | 20 | 21 | Gym-specific (non-Atari) utilities. 22 | 23 | Some network specifications specific to certain Gym environments are provided 24 | here. 25 | 26 | Includes a wrapper class around Gym environments. This class makes general Gym 27 | environments conformant with the API Dopamine is expecting. 28 | 29 | ## Classes 30 | 31 | [`class GymPreprocessing`](../../dopamine/discrete_domains/gym_lib/GymPreprocessing.md): 32 | A Wrapper class around Gym environments. 33 | 34 | ## Functions 35 | 36 | [`create_gym_environment(...)`](../../dopamine/discrete_domains/gym_lib/create_gym_environment.md): 37 | Wraps a Gym environment with some basic preprocessing. 38 | -------------------------------------------------------------------------------- /docs/api_docs/python/dopamine/discrete_domains/gym_lib/GymPreprocessing.md: -------------------------------------------------------------------------------- 1 | description: A Wrapper class around Gym environments. 2 | 3 |
4 | 5 | 6 |
7 | 8 | # dopamine.discrete_domains.gym_lib.GymPreprocessing 9 | 10 | 11 | 12 | 13 | 19 | 20 | 21 | A Wrapper class around Gym environments. 22 | 23 | 24 | -------------------------------------------------------------------------------- /docs/api_docs/python/dopamine/discrete_domains/gym_lib/create_gym_environment.md: -------------------------------------------------------------------------------- 1 | description: Wraps a Gym environment with some basic preprocessing. 2 | 3 |
4 | 5 | 6 |
7 | 8 | # dopamine.discrete_domains.gym_lib.create_gym_environment 9 | 10 | 11 | 12 | 13 | 19 | 20 | 21 | Wraps a Gym environment with some basic preprocessing. 22 | 23 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 40 | 43 | 44 | 47 | 50 | 51 |
38 | `environment_name` 39 | 41 | str, the name of the environment to run. 42 |
45 | `version` 46 | 48 | str, version of the environment to run. 49 |
52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 62 | 63 | 64 |
60 | A Gym environment with some standard preprocessing. 61 |
65 | -------------------------------------------------------------------------------- /docs/api_docs/python/dopamine/discrete_domains/iteration_statistics.md: -------------------------------------------------------------------------------- 1 | description: A class for storing iteration-specific metrics. 2 | 3 |
4 | 5 | 6 |
7 | 8 | # Module: dopamine.discrete_domains.iteration_statistics 9 | 10 | 11 | 12 | 13 | 19 | 20 | 21 | A class for storing iteration-specific metrics. 22 | 23 | ## Classes 24 | 25 | [`class IterationStatistics`](../../dopamine/discrete_domains/iteration_statistics/IterationStatistics.md): 26 | A class for storing iteration-specific metrics. 27 | -------------------------------------------------------------------------------- /docs/api_docs/python/dopamine/discrete_domains/iteration_statistics/IterationStatistics.md: -------------------------------------------------------------------------------- 1 | description: A class for storing iteration-specific metrics. 2 | 3 |
4 | 5 | 6 |
7 | 8 | # dopamine.discrete_domains.iteration_statistics.IterationStatistics 9 | 10 | 11 | 12 | 13 | 19 | 20 | 21 | A class for storing iteration-specific metrics. 22 | 23 | 24 | 25 | The internal format is as follows: we maintain a mapping from keys to lists. 26 | Each list contains all the values corresponding to the given key. 27 | 28 | For example, self.data_lists['train_episode_returns'] might contain the 29 | per-episode returns achieved during this iteration. 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 41 | 45 | 46 |
39 | `data_lists` 40 | 42 | dict mapping each metric_name (str) to a list of said metric 43 | across episodes. 44 |
47 | -------------------------------------------------------------------------------- /docs/api_docs/python/dopamine/discrete_domains/logger.md: -------------------------------------------------------------------------------- 1 | description: A lightweight logging mechanism for dopamine agents. 2 | 3 |
4 | 5 | 6 |
7 | 8 | # Module: dopamine.discrete_domains.logger 9 | 10 | 11 | 12 | 13 | 19 | 20 | 21 | A lightweight logging mechanism for dopamine agents. 22 | 23 | ## Classes 24 | 25 | [`class Logger`](../../dopamine/discrete_domains/logger/Logger.md): Class for 26 | maintaining a dictionary of data to log. 27 | -------------------------------------------------------------------------------- /docs/api_docs/python/dopamine/discrete_domains/logger/Logger.md: -------------------------------------------------------------------------------- 1 | description: Class for maintaining a dictionary of data to log. 2 | 3 |
4 | 5 | 6 |
7 | 8 | # dopamine.discrete_domains.logger.Logger 9 | 10 | 11 | 12 | 13 | 19 | 20 | 21 | Class for maintaining a dictionary of data to log. 22 | 23 | 24 | -------------------------------------------------------------------------------- /docs/api_docs/python/dopamine/discrete_domains/run_experiment.md: -------------------------------------------------------------------------------- 1 | description: Module defining classes and helper methods for general agents. 2 | 3 |
4 | 5 | 6 |
7 | 8 | # Module: dopamine.discrete_domains.run_experiment 9 | 10 | 11 | 12 | 13 | 19 | 20 | 21 | Module defining classes and helper methods for general agents. 22 | 23 | ## Classes 24 | 25 | [`class Runner`](../../dopamine/discrete_domains/run_experiment/Runner.md): 26 | Object that handles running Dopamine experiments. 27 | 28 | [`class TrainRunner`](../../dopamine/discrete_domains/run_experiment/TrainRunner.md): 29 | Object that handles running experiments. 30 | 31 | ## Functions 32 | 33 | [`create_agent(...)`](../../dopamine/discrete_domains/run_experiment/create_agent.md): 34 | Creates an agent. 35 | 36 | [`create_runner(...)`](../../dopamine/discrete_domains/run_experiment/create_runner.md): 37 | Creates an experiment Runner. 38 | -------------------------------------------------------------------------------- /docs/api_docs/python/dopamine/discrete_domains/run_experiment/Runner.md: -------------------------------------------------------------------------------- 1 | description: Object that handles running Dopamine experiments. 2 | 3 |
4 | 5 | 6 |
7 | 8 | # dopamine.discrete_domains.run_experiment.Runner 9 | 10 | 11 | 12 | 13 | 19 | 20 | 21 | Object that handles running Dopamine experiments. 22 | 23 | 24 | 25 | Here we use the term 'experiment' to mean simulating interactions between the 26 | agent and the environment and reporting some statistics pertaining to these 27 | interactions. 28 | 29 | A simple scenario to train a DQN agent is as follows: 30 | 31 | ```python 32 | import dopamine.discrete_domains.atari_lib 33 | base_dir = '/tmp/simple_example' 34 | def create_agent(sess, environment): 35 | return dqn_agent.DQNAgent(sess, num_actions=environment.action_space.n) 36 | runner = Runner(base_dir, create_agent, atari_lib.create_atari_environment) 37 | runner.run() 38 | ``` 39 | -------------------------------------------------------------------------------- /docs/api_docs/python/dopamine/discrete_domains/run_experiment/TrainRunner.md: -------------------------------------------------------------------------------- 1 | description: Object that handles running experiments. 2 | 3 |
4 | 5 | 6 |
7 | 8 | # dopamine.discrete_domains.run_experiment.TrainRunner 9 | 10 | 11 | 12 | 13 | 19 | 20 | 21 | Object that handles running experiments. 22 | 23 | Inherits From: 24 | [`Runner`](../../../dopamine/discrete_domains/run_experiment/Runner.md) 25 | 26 | 27 | 28 | The `TrainRunner` differs from the base `Runner` class in that it does not the 29 | evaluation phase. Checkpointing and logging for the train phase are preserved as 30 | before. 31 | -------------------------------------------------------------------------------- /docs/api_docs/python/dopamine/discrete_domains/run_experiment/create_runner.md: -------------------------------------------------------------------------------- 1 | description: Creates an experiment Runner. 2 | 3 |
4 | 5 | 6 |
7 | 8 | # dopamine.discrete_domains.run_experiment.create_runner 9 | 10 | 11 | 12 | 13 | 19 | 20 | 21 | Creates an experiment Runner. 22 | 23 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 40 | 43 | 44 | 47 | 50 | 51 |
38 | `base_dir` 39 | 41 | str, base directory for hosting all subdirectories. 42 |
45 | `schedule` 46 | 48 | string, which type of Runner to use. 49 |
52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 63 | 66 | 67 |
61 | `runner` 62 | 64 | A `Runner` like object. 65 |
68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 79 | 82 | 83 |
77 | `ValueError` 78 | 80 | When an unknown schedule is encountered. 81 |
84 | -------------------------------------------------------------------------------- /docs/api_docs/python/dopamine/discrete_domains/train.md: -------------------------------------------------------------------------------- 1 | description: The entry point for running a Dopamine agent. 2 | 3 |
4 | 5 | 6 |
7 | 8 | # Module: dopamine.discrete_domains.train 9 | 10 | 11 | 12 | 13 | 19 | 20 | 21 | The entry point for running a Dopamine agent. 22 | 23 | -------------------------------------------------------------------------------- /docs/api_docs/python/dopamine/jax.md: -------------------------------------------------------------------------------- 1 |
2 | 3 | 4 |
5 | 6 | # Module: dopamine.jax 7 | 8 | 9 | 10 | 11 | 17 | 18 | 19 | ## Modules 20 | 21 | [`agents`](../dopamine/jax/agents.md) module 22 | 23 | [`networks`](../dopamine/jax/networks.md) module: Various networks for Jax 24 | Dopamine agents. 25 | -------------------------------------------------------------------------------- /docs/api_docs/python/dopamine/jax/agents.md: -------------------------------------------------------------------------------- 1 |
2 | 3 | 4 |
5 | 6 | # Module: dopamine.jax.agents 7 | 8 | 9 | 10 | 11 | 17 | 18 | 19 | ## Modules 20 | 21 | [`dqn`](../../dopamine/jax/agents/dqn.md) module 22 | 23 | [`implicit_quantile`](../../dopamine/jax/agents/implicit_quantile.md) module 24 | 25 | [`quantile`](../../dopamine/jax/agents/quantile.md) module 26 | 27 | [`rainbow`](../../dopamine/jax/agents/rainbow.md) module 28 | -------------------------------------------------------------------------------- /docs/api_docs/python/dopamine/jax/agents/dqn.md: -------------------------------------------------------------------------------- 1 |
2 | 3 | 4 |
5 | 6 | # Module: dopamine.jax.agents.dqn 7 | 8 | 9 | 10 | 11 | 17 | 18 | 19 | ## Modules 20 | 21 | [`dqn_agent`](../../../dopamine/jax/agents/dqn/dqn_agent.md) module: Compact 22 | implementation of a DQN agent in JAx. 23 | -------------------------------------------------------------------------------- /docs/api_docs/python/dopamine/jax/agents/dqn/dqn_agent.md: -------------------------------------------------------------------------------- 1 | description: Compact implementation of a DQN agent in JAx. 2 | 3 |
4 | 5 | 6 |
7 | 8 | # Module: dopamine.jax.agents.dqn.dqn_agent 9 | 10 | 11 | 12 | 13 | 19 | 20 | 21 | Compact implementation of a DQN agent in JAx. 22 | -------------------------------------------------------------------------------- /docs/api_docs/python/dopamine/jax/agents/implicit_quantile.md: -------------------------------------------------------------------------------- 1 |
2 | 3 | 4 |
5 | 6 | # Module: dopamine.jax.agents.implicit_quantile 7 | 8 | 9 | 10 | 11 | 17 | 18 | 19 | ## Modules 20 | 21 | [`implicit_quantile_agent`](../../../dopamine/jax/agents/implicit_quantile/implicit_quantile_agent.md) 22 | module: The implicit quantile networks (IQN) agent. 23 | -------------------------------------------------------------------------------- /docs/api_docs/python/dopamine/jax/agents/implicit_quantile/implicit_quantile_agent.md: -------------------------------------------------------------------------------- 1 | description: The implicit quantile networks (IQN) agent. 2 | 3 |
4 | 5 | 6 |
7 | 8 | # Module: dopamine.jax.agents.implicit_quantile.implicit_quantile_agent 9 | 10 | 11 | 12 | 13 | 19 | 20 | 21 | The implicit quantile networks (IQN) agent. 22 | 23 | The agent follows the description given in "Implicit Quantile Networks for 24 | Distributional RL" (Dabney et. al, 2018). 25 | -------------------------------------------------------------------------------- /docs/api_docs/python/dopamine/jax/agents/quantile.md: -------------------------------------------------------------------------------- 1 |
2 | 3 | 4 |
5 | 6 | # Module: dopamine.jax.agents.quantile 7 | 8 | 9 | 10 | 11 | 17 | 18 | 19 | ## Modules 20 | 21 | [`quantile_agent`](../../../dopamine/jax/agents/quantile/quantile_agent.md) 22 | module: An extension of Rainbow to perform quantile regression. 23 | -------------------------------------------------------------------------------- /docs/api_docs/python/dopamine/jax/agents/quantile/quantile_agent.md: -------------------------------------------------------------------------------- 1 | description: An extension of Rainbow to perform quantile regression. 2 | 3 |
4 | 5 | 6 |
7 | 8 | # Module: dopamine.jax.agents.quantile.quantile_agent 9 | 10 | 11 | 12 | 13 | 19 | 20 | 21 | An extension of Rainbow to perform quantile regression. 22 | 23 | This loss is computed as in "Distributional Reinforcement Learning with Quantile 24 | Regression" - Dabney et. al, 2017" 25 | -------------------------------------------------------------------------------- /docs/api_docs/python/dopamine/jax/agents/rainbow.md: -------------------------------------------------------------------------------- 1 |
2 | 3 | 4 |
5 | 6 | # Module: dopamine.jax.agents.rainbow 7 | 8 | 9 | 10 | 11 | 17 | 18 | 19 | ## Modules 20 | 21 | [`rainbow_agent`](../../../dopamine/jax/agents/rainbow/rainbow_agent.md) module: 22 | Compact implementation of a simplified Rainbow agent in Jax. 23 | -------------------------------------------------------------------------------- /docs/api_docs/python/dopamine/jax/agents/rainbow/rainbow_agent.md: -------------------------------------------------------------------------------- 1 | description: Compact implementation of a simplified Rainbow agent in Jax. 2 | 3 |
4 | 5 | 6 |
7 | 8 | # Module: dopamine.jax.agents.rainbow.rainbow_agent 9 | 10 | 11 | 12 | 13 | 19 | 20 | 21 | Compact implementation of a simplified Rainbow agent in Jax. 22 | 23 | Specifically, we implement the following components from Rainbow: 24 | 25 | * n-step updates; 26 | * prioritized replay; and 27 | * distributional RL. 28 | 29 | These three components were found to significantly impact the performance of the 30 | Atari game-playing agent. 31 | 32 | Furthermore, our implementation does away with some minor hyperparameter 33 | choices. Specifically, we 34 | 35 | * keep the beta exponent fixed at beta=0.5, rather than increase it linearly; 36 | * remove the alpha parameter, which was set to alpha=0.5 throughout the paper. 37 | 38 | Details in "Rainbow: Combining Improvements in Deep Reinforcement Learning" by 39 | Hessel et al. (2018). 40 | 41 | ## Functions 42 | 43 | [`project_distribution(...)`](../../../../dopamine/jax/agents/rainbow/rainbow_agent/project_distribution.md): 44 | Projects a batch of (support, weights) onto target_support. 45 | -------------------------------------------------------------------------------- /docs/api_docs/python/dopamine/jax/networks.md: -------------------------------------------------------------------------------- 1 | description: Various networks for Jax Dopamine agents. 2 | 3 |
4 | 5 | 6 |
7 | 8 | # Module: dopamine.jax.networks 9 | 10 | 11 | 12 | 13 | 19 | 20 | 21 | Various networks for Jax Dopamine agents. 22 | -------------------------------------------------------------------------------- /docs/api_docs/python/dopamine/replay_memory.md: -------------------------------------------------------------------------------- 1 |
2 | 3 | 4 |
5 | 6 | # Module: dopamine.replay_memory 7 | 8 | 9 | 10 | 11 | 17 | 18 | 19 | ## Modules 20 | 21 | [`circular_replay_buffer`](../dopamine/replay_memory/circular_replay_buffer.md) 22 | module: The standard DQN replay memory. 23 | 24 | [`prioritized_replay_buffer`](../dopamine/replay_memory/prioritized_replay_buffer.md) 25 | module: An implementation of Prioritized Experience Replay (PER). 26 | 27 | [`sum_tree`](../dopamine/replay_memory/sum_tree.md) module: A sum tree data 28 | structure. 29 | -------------------------------------------------------------------------------- /docs/api_docs/python/dopamine/replay_memory/circular_replay_buffer.md: -------------------------------------------------------------------------------- 1 | description: The standard DQN replay memory. 2 | 3 |
4 | 5 | 6 |
7 | 8 | # Module: dopamine.replay_memory.circular_replay_buffer 9 | 10 | 11 | 12 | 13 | 19 | 20 | 21 | The standard DQN replay memory. 22 | 23 | This implementation is an out-of-graph replay memory + in-graph wrapper. It 24 | supports vanilla n-step updates of the form typically found in the literature, 25 | i.e. where rewards are accumulated for n steps and the intermediate trajectory 26 | is not exposed to the agent. This does not allow, for example, performing 27 | off-policy corrections. 28 | 29 | ## Classes 30 | 31 | [`class OutOfGraphReplayBuffer`](../../dopamine/replay_memory/circular_replay_buffer/OutOfGraphReplayBuffer.md): 32 | A simple out-of-graph Replay Buffer. 33 | 34 | [`class WrappedReplayBuffer`](../../dopamine/replay_memory/circular_replay_buffer/WrappedReplayBuffer.md): 35 | Wrapper of OutOfGraphReplayBuffer with an in graph sampling mechanism. 36 | -------------------------------------------------------------------------------- /docs/api_docs/python/dopamine/replay_memory/circular_replay_buffer/OutOfGraphReplayBuffer.md: -------------------------------------------------------------------------------- 1 | description: A simple out-of-graph Replay Buffer. 2 | 3 |
4 | 5 | 6 |
7 | 8 | # dopamine.replay_memory.circular_replay_buffer.OutOfGraphReplayBuffer 9 | 10 | 11 | 12 | 13 | 19 | 20 | 21 | A simple out-of-graph Replay Buffer. 22 | 23 | 24 | 25 | Stores transitions, state, action, reward, next_state, terminal (and any extra 26 | contents specified) in a circular buffer and provides a uniform transition 27 | sampling function. 28 | 29 | When the states consist of stacks of observations storing the states is 30 | inefficient. This class writes observations and constructs the stacked states at 31 | sample time. 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 43 | 47 | 48 | 51 | 55 | 56 |
41 | `add_count` 42 | 44 | int, counter of how many transitions have been added (including 45 | the blank ones at the beginning of an episode). 46 |
49 | `invalid_range` 50 | 52 | np.array, an array with the indices of cursor-related invalid 53 | transitions 54 |
57 | -------------------------------------------------------------------------------- /docs/api_docs/python/dopamine/replay_memory/circular_replay_buffer/WrappedReplayBuffer.md: -------------------------------------------------------------------------------- 1 | description: Wrapper of OutOfGraphReplayBuffer with an in graph sampling 2 | mechanism. 3 | 4 |
5 | 6 | 7 |
8 | 9 | # dopamine.replay_memory.circular_replay_buffer.WrappedReplayBuffer 10 | 11 | 12 | 13 | 14 | 20 | 21 | 22 | Wrapper of OutOfGraphReplayBuffer with an in graph sampling mechanism. 23 | 24 | 25 | 26 | #### Usage: 27 | 28 | To add a transition: call the add function. 29 | 30 | To sample a batch: Construct operations that depend on any of the tensors is the 31 | transition dictionary. Every sess.run that requires any of these tensors will 32 | sample a new transition. 33 | -------------------------------------------------------------------------------- /docs/api_docs/python/dopamine/replay_memory/prioritized_replay_buffer.md: -------------------------------------------------------------------------------- 1 | description: An implementation of Prioritized Experience Replay (PER). 2 | 3 |
4 | 5 | 6 |
7 | 8 | # Module: dopamine.replay_memory.prioritized_replay_buffer 9 | 10 | 11 | 12 | 13 | 19 | 20 | 21 | An implementation of Prioritized Experience Replay (PER). 22 | 23 | This implementation is based on the paper "Prioritized Experience Replay" by Tom 24 | Schaul et al. (2015). Many thanks to Tom Schaul, John Quan, and Matteo Hessel 25 | for providing useful pointers on the algorithm and its implementation. 26 | 27 | ## Classes 28 | 29 | [`class OutOfGraphPrioritizedReplayBuffer`](../../dopamine/replay_memory/prioritized_replay_buffer/OutOfGraphPrioritizedReplayBuffer.md): 30 | An out-of-graph Replay Buffer for Prioritized Experience Replay. 31 | 32 | [`class WrappedPrioritizedReplayBuffer`](../../dopamine/replay_memory/prioritized_replay_buffer/WrappedPrioritizedReplayBuffer.md): 33 | Wrapper of OutOfGraphPrioritizedReplayBuffer with in-graph sampling. 34 | -------------------------------------------------------------------------------- /docs/api_docs/python/dopamine/replay_memory/prioritized_replay_buffer/OutOfGraphPrioritizedReplayBuffer.md: -------------------------------------------------------------------------------- 1 | description: An out-of-graph Replay Buffer for Prioritized Experience Replay. 2 | 3 |
4 | 5 | 6 |
7 | 8 | # dopamine.replay_memory.prioritized_replay_buffer.OutOfGraphPrioritizedReplayBuffer 9 | 10 | 11 | 12 | 13 | 19 | 20 | 21 | An out-of-graph Replay Buffer for Prioritized Experience Replay. 22 | 23 | Inherits From: 24 | [`OutOfGraphReplayBuffer`](../../../dopamine/replay_memory/circular_replay_buffer/OutOfGraphReplayBuffer.md) 25 | 26 | 27 | 28 | See circular_replay_buffer.py for details. 29 | -------------------------------------------------------------------------------- /docs/api_docs/python/dopamine/replay_memory/prioritized_replay_buffer/WrappedPrioritizedReplayBuffer.md: -------------------------------------------------------------------------------- 1 | description: Wrapper of OutOfGraphPrioritizedReplayBuffer with in-graph 2 | sampling. 3 | 4 |
5 | 6 | 7 |
8 | 9 | # dopamine.replay_memory.prioritized_replay_buffer.WrappedPrioritizedReplayBuffer 10 | 11 | 12 | 13 | 14 | 20 | 21 | 22 | Wrapper of OutOfGraphPrioritizedReplayBuffer with in-graph sampling. 23 | 24 | Inherits From: 25 | [`WrappedReplayBuffer`](../../../dopamine/replay_memory/circular_replay_buffer/WrappedReplayBuffer.md) 26 | 27 | 28 | 29 | #### Usage: 30 | 31 | * To add a transition: Call the add function. 32 | 33 | * To sample a batch: Query any of the tensors in the transition dictionary. 34 | Every sess.run that requires any of these tensors will sample a new 35 | transition. 36 | -------------------------------------------------------------------------------- /docs/api_docs/python/dopamine/replay_memory/sum_tree.md: -------------------------------------------------------------------------------- 1 | description: A sum tree data structure. 2 | 3 |
4 | 5 | 6 |
7 | 8 | # Module: dopamine.replay_memory.sum_tree 9 | 10 | 11 | 12 | 13 | 19 | 20 | 21 | A sum tree data structure. 22 | 23 | Used for prioritized experience replay. See prioritized_replay_buffer.py and 24 | Schaul et al. (2015). 25 | -------------------------------------------------------------------------------- /dopamine/__init__.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2018 The Dopamine Authors. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | name = 'dopamine' 16 | -------------------------------------------------------------------------------- /dopamine/agents/__init__.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2018 The Dopamine Authors. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | -------------------------------------------------------------------------------- /dopamine/agents/dqn/__init__.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2018 The Dopamine Authors. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | -------------------------------------------------------------------------------- /dopamine/agents/dqn/configs/dqn.gin: -------------------------------------------------------------------------------- 1 | # Hyperparameters follow the classic Nature DQN, but we modify as necessary to 2 | # match those used in Rainbow (Hessel et al., 2018), to ensure apples-to-apples 3 | # comparison. 4 | import dopamine.discrete_domains.atari_lib 5 | import dopamine.discrete_domains.run_experiment 6 | import dopamine.agents.dqn.dqn_agent 7 | import dopamine.replay_memory.circular_replay_buffer 8 | import gin.tf.external_configurables 9 | 10 | DQNAgent.gamma = 0.99 11 | DQNAgent.update_horizon = 1 12 | DQNAgent.min_replay_history = 20000 # agent steps 13 | DQNAgent.update_period = 4 14 | DQNAgent.target_update_period = 8000 # agent steps 15 | DQNAgent.epsilon_train = 0.01 16 | DQNAgent.epsilon_eval = 0.001 17 | DQNAgent.epsilon_decay_period = 250000 # agent steps 18 | DQNAgent.tf_device = '/gpu:0' # use '/cpu:*' for non-GPU version 19 | DQNAgent.optimizer = @tf.train.RMSPropOptimizer() 20 | 21 | tf.train.RMSPropOptimizer.learning_rate = 0.00025 22 | tf.train.RMSPropOptimizer.decay = 0.95 23 | tf.train.RMSPropOptimizer.momentum = 0.0 24 | tf.train.RMSPropOptimizer.epsilon = 0.00001 25 | tf.train.RMSPropOptimizer.centered = True 26 | 27 | atari_lib.create_atari_environment.game_name = 'Pong' 28 | # Sticky actions with probability 0.25, as suggested by (Machado et al., 2017). 29 | atari_lib.create_atari_environment.sticky_actions = True 30 | create_agent.agent_name = 'dqn' 31 | Runner.num_iterations = 200 32 | Runner.training_steps = 250000 # agent steps 33 | Runner.evaluation_steps = 125000 # agent steps 34 | Runner.max_steps_per_episode = 27000 # agent steps 35 | 36 | WrappedReplayBuffer.replay_capacity = 1000000 37 | WrappedReplayBuffer.batch_size = 32 38 | -------------------------------------------------------------------------------- /dopamine/agents/dqn/configs/dqn_acrobot.gin: -------------------------------------------------------------------------------- 1 | # Hyperparameters for a simple DQN-style Acrobot agent. The hyperparameters 2 | # chosen achieve reasonable performance. 3 | import dopamine.discrete_domains.gym_lib 4 | import dopamine.discrete_domains.run_experiment 5 | import dopamine.agents.dqn.dqn_agent 6 | import dopamine.replay_memory.circular_replay_buffer 7 | import gin.tf.external_configurables 8 | 9 | DQNAgent.observation_shape = %gym_lib.ACROBOT_OBSERVATION_SHAPE 10 | DQNAgent.observation_dtype = %gym_lib.ACROBOT_OBSERVATION_DTYPE 11 | DQNAgent.stack_size = %gym_lib.ACROBOT_STACK_SIZE 12 | DQNAgent.network = @gym_lib.AcrobotDQNNetwork 13 | DQNAgent.gamma = 0.99 14 | DQNAgent.update_horizon = 1 15 | DQNAgent.min_replay_history = 500 16 | DQNAgent.update_period = 4 17 | DQNAgent.target_update_period = 100 18 | DQNAgent.epsilon_fn = @dqn_agent.identity_epsilon 19 | DQNAgent.tf_device = '/gpu:0' # use '/cpu:*' for non-GPU version 20 | DQNAgent.optimizer = @tf.train.AdamOptimizer() 21 | 22 | tf.train.AdamOptimizer.learning_rate = 0.001 23 | tf.train.AdamOptimizer.epsilon = 0.0003125 24 | 25 | create_gym_environment.environment_name = 'Acrobot' 26 | create_gym_environment.version = 'v1' 27 | create_agent.agent_name = 'dqn' 28 | Runner.create_environment_fn = @gym_lib.create_gym_environment 29 | Runner.num_iterations = 500 30 | Runner.training_steps = 1000 31 | Runner.evaluation_steps = 1000 32 | Runner.max_steps_per_episode = 500 33 | 34 | WrappedReplayBuffer.replay_capacity = 50000 35 | WrappedReplayBuffer.batch_size = 128 36 | -------------------------------------------------------------------------------- /dopamine/agents/dqn/configs/dqn_cartpole.gin: -------------------------------------------------------------------------------- 1 | # Hyperparameters for a simple DQN-style Cartpole agent. The hyperparameters 2 | # chosen achieve reasonable performance. 3 | import dopamine.discrete_domains.gym_lib 4 | import dopamine.discrete_domains.run_experiment 5 | import dopamine.agents.dqn.dqn_agent 6 | import dopamine.replay_memory.circular_replay_buffer 7 | import gin.tf.external_configurables 8 | 9 | DQNAgent.observation_shape = %gym_lib.CARTPOLE_OBSERVATION_SHAPE 10 | DQNAgent.observation_dtype = %gym_lib.CARTPOLE_OBSERVATION_DTYPE 11 | DQNAgent.stack_size = %gym_lib.CARTPOLE_STACK_SIZE 12 | DQNAgent.network = @gym_lib.CartpoleDQNNetwork 13 | DQNAgent.gamma = 0.99 14 | DQNAgent.update_horizon = 1 15 | DQNAgent.min_replay_history = 500 16 | DQNAgent.update_period = 4 17 | DQNAgent.target_update_period = 100 18 | DQNAgent.epsilon_fn = @dqn_agent.identity_epsilon 19 | DQNAgent.tf_device = '/gpu:0' # use '/cpu:*' for non-GPU version 20 | DQNAgent.optimizer = @tf.train.AdamOptimizer() 21 | 22 | tf.train.AdamOptimizer.learning_rate = 0.001 23 | tf.train.AdamOptimizer.epsilon = 0.0003125 24 | 25 | create_gym_environment.environment_name = 'CartPole' 26 | create_gym_environment.version = 'v0' 27 | create_agent.agent_name = 'dqn' 28 | Runner.create_environment_fn = @gym_lib.create_gym_environment 29 | Runner.num_iterations = 500 30 | Runner.training_steps = 1000 31 | Runner.evaluation_steps = 1000 32 | Runner.max_steps_per_episode = 200 # Default max episode length. 33 | 34 | WrappedReplayBuffer.replay_capacity = 50000 35 | WrappedReplayBuffer.batch_size = 128 36 | -------------------------------------------------------------------------------- /dopamine/agents/dqn/configs/dqn_icml.gin: -------------------------------------------------------------------------------- 1 | # Hyperparameters used for reporting DQN results in Bellemare et al. (2017). 2 | import dopamine.discrete_domains.atari_lib 3 | import dopamine.discrete_domains.run_experiment 4 | import dopamine.agents.dqn.dqn_agent 5 | import dopamine.replay_memory.circular_replay_buffer 6 | import gin.tf.external_configurables 7 | 8 | DQNAgent.gamma = 0.99 9 | DQNAgent.update_horizon = 1 10 | DQNAgent.min_replay_history = 50000 # agent steps 11 | DQNAgent.update_period = 4 12 | DQNAgent.target_update_period = 10000 # agent steps 13 | DQNAgent.epsilon_train = 0.01 14 | DQNAgent.epsilon_eval = 0.001 15 | DQNAgent.epsilon_decay_period = 1000000 # agent steps 16 | DQNAgent.tf_device = '/gpu:0' # use '/cpu:*' for non-GPU version 17 | DQNAgent.optimizer = @tf.train.RMSPropOptimizer() 18 | 19 | tf.train.RMSPropOptimizer.learning_rate = 0.00025 20 | tf.train.RMSPropOptimizer.decay = 0.95 21 | tf.train.RMSPropOptimizer.momentum = 0.0 22 | tf.train.RMSPropOptimizer.epsilon = 0.00001 23 | tf.train.RMSPropOptimizer.centered = True 24 | 25 | atari_lib.create_atari_environment.game_name = 'Pong' 26 | # Deterministic ALE version used in the DQN Nature paper (Mnih et al., 2015). 27 | atari_lib.create_atari_environment.sticky_actions = False 28 | create_agent.agent_name = 'dqn' 29 | Runner.num_iterations = 200 30 | Runner.training_steps = 250000 # agent steps 31 | Runner.evaluation_steps = 125000 # agent steps 32 | Runner.max_steps_per_episode = 27000 # agent steps 33 | 34 | AtariPreprocessing.terminal_on_life_loss = True 35 | 36 | WrappedReplayBuffer.replay_capacity = 1000000 37 | WrappedReplayBuffer.batch_size = 32 38 | -------------------------------------------------------------------------------- /dopamine/agents/dqn/configs/dqn_lunarlander.gin: -------------------------------------------------------------------------------- 1 | # Hyperparameters for a simple DQN-style LunarLander agent. The hyperparameters 2 | # chosen achieve reasonable performance. 3 | import dopamine.discrete_domains.gym_lib 4 | import dopamine.discrete_domains.run_experiment 5 | import dopamine.agents.dqn.dqn_agent 6 | import dopamine.replay_memory.circular_replay_buffer 7 | import gin.tf.external_configurables 8 | 9 | DQNAgent.observation_shape = %gym_lib.LUNAR_OBSERVATION_SHAPE 10 | DQNAgent.observation_dtype = %gym_lib.LUNAR_OBSERVATION_DTYPE 11 | DQNAgent.stack_size = %gym_lib.LUNAR_STACK_SIZE 12 | DQNAgent.network = @gym_lib.LunarLanderDQNNetwork 13 | DQNAgent.gamma = 0.99 14 | DQNAgent.update_horizon = 1 15 | DQNAgent.min_replay_history = 500 16 | DQNAgent.update_period = 4 17 | DQNAgent.target_update_period = 100 18 | DQNAgent.epsilon_fn = @dqn_agent.identity_epsilon 19 | DQNAgent.tf_device = '/gpu:0' # use '/cpu:*' for non-GPU version 20 | DQNAgent.optimizer = @tf.train.AdamOptimizer() 21 | 22 | tf.train.AdamOptimizer.learning_rate = 0.001 23 | tf.train.AdamOptimizer.epsilon = 0.0003125 24 | 25 | create_gym_environment.environment_name = 'LunarLander' 26 | create_gym_environment.version = 'v2' 27 | create_agent.agent_name = 'dqn' 28 | Runner.create_environment_fn = @gym_lib.create_gym_environment 29 | Runner.num_iterations = 30 30 | Runner.training_steps = 4000 31 | Runner.evaluation_steps = 1000 32 | Runner.max_steps_per_episode = 1000 33 | 34 | WrappedReplayBuffer.replay_capacity = 50000 35 | WrappedReplayBuffer.batch_size = 128 36 | -------------------------------------------------------------------------------- /dopamine/agents/dqn/configs/dqn_mountaincar.gin: -------------------------------------------------------------------------------- 1 | # Hyperparameters for a simple DQN-style MountainCar agent. The hyperparameters 2 | # chosen achieve reasonable performance. 3 | import dopamine.discrete_domains.gym_lib 4 | import dopamine.discrete_domains.run_experiment 5 | import dopamine.agents.dqn.dqn_agent 6 | import dopamine.replay_memory.circular_replay_buffer 7 | import gin.tf.external_configurables 8 | 9 | DQNAgent.observation_shape = %gym_lib.MOUNTAINCAR_OBSERVATION_SHAPE 10 | DQNAgent.observation_dtype = %gym_lib.MOUNTAINCAR_OBSERVATION_DTYPE 11 | DQNAgent.stack_size = %gym_lib.MOUNTAINCAR_STACK_SIZE 12 | DQNAgent.network = @gym_lib.MountainCarDQNNetwork 13 | DQNAgent.gamma = 0.99 14 | DQNAgent.update_horizon = 1 15 | DQNAgent.min_replay_history = 500 16 | DQNAgent.update_period = 4 17 | DQNAgent.target_update_period = 100 18 | DQNAgent.epsilon_fn = @dqn_agent.identity_epsilon 19 | DQNAgent.tf_device = '/gpu:0' # use '/cpu:*' for non-GPU version 20 | DQNAgent.optimizer = @tf.train.AdamOptimizer() 21 | 22 | tf.train.AdamOptimizer.learning_rate = 0.001 23 | tf.train.AdamOptimizer.epsilon = 0.0003125 24 | 25 | create_gym_environment.environment_name = 'MountainCar' 26 | create_gym_environment.version = 'v0' 27 | create_agent.agent_name = 'dqn' 28 | Runner.create_environment_fn = @gym_lib.create_gym_environment 29 | Runner.num_iterations = 30 30 | Runner.training_steps = 1000 31 | Runner.evaluation_steps = 1000 32 | Runner.max_steps_per_episode = 600 # Default max episode length. 33 | 34 | WrappedReplayBuffer.replay_capacity = 50000 35 | WrappedReplayBuffer.batch_size = 128 36 | -------------------------------------------------------------------------------- /dopamine/agents/dqn/configs/dqn_nature.gin: -------------------------------------------------------------------------------- 1 | # Hyperparameters used in Mnih et al. (2015). 2 | import dopamine.discrete_domains.atari_lib 3 | import dopamine.discrete_domains.run_experiment 4 | import dopamine.agents.dqn.dqn_agent 5 | import dopamine.replay_memory.circular_replay_buffer 6 | import gin.tf.external_configurables 7 | 8 | DQNAgent.gamma = 0.99 9 | DQNAgent.update_horizon = 1 10 | DQNAgent.min_replay_history = 50000 # agent steps 11 | DQNAgent.update_period = 4 12 | DQNAgent.target_update_period = 10000 # agent steps 13 | DQNAgent.epsilon_train = 0.1 14 | DQNAgent.epsilon_eval = 0.05 15 | DQNAgent.epsilon_decay_period = 1000000 # agent steps 16 | DQNAgent.tf_device = '/gpu:0' # use '/cpu:*' for non-GPU version 17 | DQNAgent.optimizer = @tf.train.RMSPropOptimizer() 18 | 19 | tf.train.RMSPropOptimizer.learning_rate = 0.00025 20 | tf.train.RMSPropOptimizer.decay = 0.95 21 | tf.train.RMSPropOptimizer.momentum = 0.0 22 | tf.train.RMSPropOptimizer.epsilon = 0.00001 23 | tf.train.RMSPropOptimizer.centered = True 24 | 25 | atari_lib.create_atari_environment.game_name = 'Pong' 26 | # Deterministic ALE version used in the DQN Nature paper (Mnih et al., 2015). 27 | atari_lib.create_atari_environment.sticky_actions = False 28 | create_agent.agent_name = 'dqn' 29 | Runner.num_iterations = 200 30 | Runner.training_steps = 250000 # agent steps 31 | Runner.evaluation_steps = 125000 # agent steps 32 | Runner.max_steps_per_episode = 27000 # agent steps 33 | 34 | AtariPreprocessing.terminal_on_life_loss = True 35 | 36 | WrappedReplayBuffer.replay_capacity = 1000000 37 | WrappedReplayBuffer.batch_size = 32 38 | -------------------------------------------------------------------------------- /dopamine/agents/dqn/configs/dqn_profiling.gin: -------------------------------------------------------------------------------- 1 | # Hyperparameters follow the classic Nature DQN, but we modify as necessary to 2 | # match those used in Rainbow (Hessel et al., 2018), to ensure apples-to-apples 3 | # comparison. 4 | import dopamine.discrete_domains.atari_lib 5 | import dopamine.discrete_domains.run_experiment 6 | import dopamine.agents.dqn.dqn_agent 7 | import dopamine.replay_memory.circular_replay_buffer 8 | import gin.tf.external_configurables 9 | 10 | DQNAgent.gamma = 0.99 11 | DQNAgent.update_horizon = 1 12 | DQNAgent.min_replay_history = 100 # agent steps 13 | DQNAgent.update_period = 4 14 | DQNAgent.target_update_period = 8000 # agent steps 15 | DQNAgent.epsilon_fn = @dqn_agent.identity_epsilon 16 | DQNAgent.epsilon_train = 0.0 17 | DQNAgent.epsilon_eval = 0.0 18 | DQNAgent.tf_device = '/gpu:0' # use '/cpu:*' for non-GPU version 19 | DQNAgent.optimizer = @tf.train.RMSPropOptimizer() 20 | 21 | tf.train.RMSPropOptimizer.learning_rate = 0.00025 22 | tf.train.RMSPropOptimizer.decay = 0.95 23 | tf.train.RMSPropOptimizer.momentum = 0.0 24 | tf.train.RMSPropOptimizer.epsilon = 0.00001 25 | tf.train.RMSPropOptimizer.centered = True 26 | 27 | atari_lib.create_atari_environment.game_name = 'Pong' 28 | # Sticky actions with probability 0.25, as suggested by (Machado et al., 2017). 29 | atari_lib.create_atari_environment.sticky_actions = True 30 | create_runner.schedule = 'continuous_train' 31 | create_agent.agent_name = 'dqn' 32 | Runner.num_iterations = 200 33 | Runner.training_steps = 250000 # agent steps 34 | Runner.evaluation_steps = 125000 # agent steps 35 | Runner.max_steps_per_episode = 27000 # agent steps 36 | 37 | WrappedReplayBuffer.replay_capacity = 1000000 38 | WrappedReplayBuffer.batch_size = 32 39 | -------------------------------------------------------------------------------- /dopamine/agents/implicit_quantile/__init__.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2018 The Dopamine Authors. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | -------------------------------------------------------------------------------- /dopamine/agents/implicit_quantile/configs/implicit_quantile.gin: -------------------------------------------------------------------------------- 1 | # Hyperparameters follow Dabney et al. (2018), but we modify as necessary to 2 | # match those used in Rainbow (Hessel et al., 2018), to ensure apples-to-apples 3 | # comparison. 4 | import dopamine.agents.implicit_quantile.implicit_quantile_agent 5 | import dopamine.agents.rainbow.rainbow_agent 6 | import dopamine.discrete_domains.atari_lib 7 | import dopamine.discrete_domains.run_experiment 8 | import dopamine.replay_memory.prioritized_replay_buffer 9 | import gin.tf.external_configurables 10 | 11 | ImplicitQuantileAgent.kappa = 1.0 12 | ImplicitQuantileAgent.num_tau_samples = 64 13 | ImplicitQuantileAgent.num_tau_prime_samples = 64 14 | ImplicitQuantileAgent.num_quantile_samples = 32 15 | RainbowAgent.gamma = 0.99 16 | RainbowAgent.update_horizon = 3 17 | RainbowAgent.min_replay_history = 20000 # agent steps 18 | RainbowAgent.update_period = 4 19 | RainbowAgent.target_update_period = 8000 # agent steps 20 | RainbowAgent.epsilon_train = 0.01 21 | RainbowAgent.epsilon_eval = 0.001 22 | RainbowAgent.epsilon_decay_period = 250000 # agent steps 23 | # IQN currently does not support prioritized replay. 24 | RainbowAgent.replay_scheme = 'uniform' 25 | RainbowAgent.tf_device = '/gpu:0' # '/cpu:*' use for non-GPU version 26 | RainbowAgent.optimizer = @tf.train.AdamOptimizer() 27 | 28 | tf.train.AdamOptimizer.learning_rate = 0.00005 29 | tf.train.AdamOptimizer.epsilon = 0.0003125 30 | 31 | atari_lib.create_atari_environment.game_name = 'Pong' 32 | # Sticky actions with probability 0.25, as suggested by (Machado et al., 2017). 33 | atari_lib.create_atari_environment.sticky_actions = True 34 | create_agent.agent_name = 'implicit_quantile' 35 | Runner.num_iterations = 200 36 | Runner.training_steps = 250000 37 | Runner.evaluation_steps = 125000 38 | Runner.max_steps_per_episode = 27000 39 | 40 | WrappedPrioritizedReplayBuffer.replay_capacity = 1000000 41 | WrappedPrioritizedReplayBuffer.batch_size = 32 42 | -------------------------------------------------------------------------------- /dopamine/agents/implicit_quantile/configs/implicit_quantile_icml.gin: -------------------------------------------------------------------------------- 1 | # Hyperparameters follow Dabney et al. (2018). 2 | import dopamine.agents.implicit_quantile.implicit_quantile_agent 3 | import dopamine.agents.rainbow.rainbow_agent 4 | import dopamine.discrete_domains.atari_lib 5 | import dopamine.discrete_domains.run_experiment 6 | import dopamine.replay_memory.prioritized_replay_buffer 7 | import gin.tf.external_configurables 8 | 9 | ImplicitQuantileAgent.kappa = 1.0 10 | ImplicitQuantileAgent.num_tau_samples = 64 11 | ImplicitQuantileAgent.num_tau_prime_samples = 64 12 | ImplicitQuantileAgent.num_quantile_samples = 32 13 | RainbowAgent.gamma = 0.99 14 | RainbowAgent.update_horizon = 1 15 | RainbowAgent.min_replay_history = 50000 # agent steps 16 | RainbowAgent.update_period = 4 17 | RainbowAgent.target_update_period = 10000 # agent steps 18 | RainbowAgent.epsilon_train = 0.01 19 | RainbowAgent.epsilon_eval = 0.001 20 | RainbowAgent.epsilon_decay_period = 1000000 # agent steps 21 | RainbowAgent.replay_scheme = 'uniform' 22 | RainbowAgent.tf_device = '/gpu:0' # '/cpu:*' use for non-GPU version 23 | RainbowAgent.optimizer = @tf.train.AdamOptimizer() 24 | 25 | tf.train.AdamOptimizer.learning_rate = 0.00005 26 | tf.train.AdamOptimizer.epsilon = 0.0003125 27 | 28 | atari_lib.create_atari_environment.game_name = 'Pong' 29 | atari_lib.create_atari_environment.sticky_actions = False 30 | create_agent.agent_name = 'implicit_quantile' 31 | Runner.num_iterations = 200 32 | Runner.training_steps = 250000 33 | Runner.evaluation_steps = 125000 34 | Runner.max_steps_per_episode = 27000 35 | 36 | AtariPreprocessing.terminal_on_life_loss = True 37 | 38 | WrappedPrioritizedReplayBuffer.replay_capacity = 1000000 39 | WrappedPrioritizedReplayBuffer.batch_size = 32 40 | -------------------------------------------------------------------------------- /dopamine/agents/implicit_quantile/configs/implicit_quantile_profiling.gin: -------------------------------------------------------------------------------- 1 | # Hyperparameters follow Dabney et al. (2018), but we modify as necessary to 2 | # match those used in Rainbow (Hessel et al., 2018), to ensure apples-to-apples 3 | # comparison. 4 | import dopamine.agents.implicit_quantile.implicit_quantile_agent 5 | import dopamine.agents.rainbow.rainbow_agent 6 | import dopamine.discrete_domains.atari_lib 7 | import dopamine.discrete_domains.run_experiment 8 | import dopamine.replay_memory.prioritized_replay_buffer 9 | import gin.tf.external_configurables 10 | 11 | ImplicitQuantileAgent.kappa = 1.0 12 | ImplicitQuantileAgent.num_tau_samples = 64 13 | ImplicitQuantileAgent.num_tau_prime_samples = 64 14 | ImplicitQuantileAgent.num_quantile_samples = 32 15 | RainbowAgent.gamma = 0.99 16 | RainbowAgent.update_horizon = 3 17 | RainbowAgent.min_replay_history = 100 # agent steps 18 | RainbowAgent.update_period = 4 19 | RainbowAgent.target_update_period = 8000 # agent steps 20 | RainbowAgent.epsilon_fn = @dqn_agent.identity_epsilon 21 | RainbowAgent.epsilon_train = 0.0 22 | RainbowAgent.epsilon_eval = 0.0 23 | # IQN currently does not support prioritized replay. 24 | RainbowAgent.replay_scheme = 'uniform' 25 | RainbowAgent.tf_device = '/gpu:0' # '/cpu:*' use for non-GPU version 26 | RainbowAgent.optimizer = @tf.train.AdamOptimizer() 27 | 28 | tf.train.AdamOptimizer.learning_rate = 0.00005 29 | tf.train.AdamOptimizer.epsilon = 0.0003125 30 | 31 | atari_lib.create_atari_environment.game_name = 'Pong' 32 | # Sticky actions with probability 0.25, as suggested by (Machado et al., 2017). 33 | atari_lib.create_atari_environment.sticky_actions = True 34 | create_runner.schedule = 'continuous_train' 35 | create_agent.agent_name = 'implicit_quantile' 36 | Runner.num_iterations = 200 37 | Runner.training_steps = 250000 38 | Runner.evaluation_steps = 125000 39 | Runner.max_steps_per_episode = 27000 40 | 41 | WrappedPrioritizedReplayBuffer.replay_capacity = 1000000 42 | WrappedPrioritizedReplayBuffer.batch_size = 32 43 | -------------------------------------------------------------------------------- /dopamine/agents/rainbow/__init__.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2018 The Dopamine Authors. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | -------------------------------------------------------------------------------- /dopamine/agents/rainbow/configs/c51.gin: -------------------------------------------------------------------------------- 1 | # Hyperparameters follow the settings from Bellemare et al. (2017), but we 2 | # modify as necessary to match those used in Rainbow (Hessel et al., 2018), to 3 | # ensure apples-to-apples comparison. 4 | import dopamine.agents.rainbow.rainbow_agent 5 | import dopamine.discrete_domains.atari_lib 6 | import dopamine.discrete_domains.run_experiment 7 | import dopamine.replay_memory.prioritized_replay_buffer 8 | import gin.tf.external_configurables 9 | 10 | RainbowAgent.num_atoms = 51 11 | RainbowAgent.vmax = 10. 12 | RainbowAgent.gamma = 0.99 13 | RainbowAgent.update_horizon = 1 14 | RainbowAgent.min_replay_history = 20000 # agent steps 15 | RainbowAgent.update_period = 4 16 | RainbowAgent.target_update_period = 8000 # agent steps 17 | RainbowAgent.epsilon_train = 0.01 18 | RainbowAgent.epsilon_eval = 0.001 19 | RainbowAgent.epsilon_decay_period = 250000 # agent steps 20 | RainbowAgent.replay_scheme = 'uniform' 21 | RainbowAgent.tf_device = '/gpu:0' # use '/cpu:*' for non-GPU version 22 | RainbowAgent.optimizer = @tf.train.AdamOptimizer() 23 | 24 | tf.train.AdamOptimizer.learning_rate = 0.00025 25 | tf.train.AdamOptimizer.epsilon = 0.0003125 26 | 27 | atari_lib.create_atari_environment.game_name = 'Pong' 28 | # Sticky actions with probability 0.25, as suggested by (Machado et al., 2017). 29 | atari_lib.create_atari_environment.sticky_actions = True 30 | create_agent.agent_name = 'rainbow' 31 | Runner.num_iterations = 200 32 | Runner.training_steps = 250000 # agent steps 33 | Runner.evaluation_steps = 125000 # agent steps 34 | Runner.max_steps_per_episode = 27000 # agent steps 35 | 36 | WrappedPrioritizedReplayBuffer.replay_capacity = 1000000 37 | WrappedPrioritizedReplayBuffer.batch_size = 32 38 | -------------------------------------------------------------------------------- /dopamine/agents/rainbow/configs/c51_acrobot.gin: -------------------------------------------------------------------------------- 1 | # Hyperparameters for a simple C51-style Acrobot agent. The hyperparameters 2 | # chosen achieve reasonable performance. 3 | import dopamine.agents.dqn.dqn_agent 4 | import dopamine.agents.rainbow.rainbow_agent 5 | import dopamine.discrete_domains.gym_lib 6 | import dopamine.discrete_domains.run_experiment 7 | import dopamine.replay_memory.prioritized_replay_buffer 8 | import gin.tf.external_configurables 9 | 10 | RainbowAgent.observation_shape = %gym_lib.ACROBOT_OBSERVATION_SHAPE 11 | RainbowAgent.observation_dtype = %gym_lib.ACROBOT_OBSERVATION_DTYPE 12 | RainbowAgent.stack_size = %gym_lib.ACROBOT_STACK_SIZE 13 | RainbowAgent.network = @gym_lib.AcrobotRainbowNetwork 14 | RainbowAgent.num_atoms = 51 15 | RainbowAgent.vmax = 10. 16 | RainbowAgent.gamma = 0.99 17 | RainbowAgent.update_horizon = 1 18 | RainbowAgent.min_replay_history = 500 19 | RainbowAgent.update_period = 4 20 | RainbowAgent.target_update_period = 100 21 | RainbowAgent.epsilon_fn = @dqn_agent.identity_epsilon 22 | RainbowAgent.replay_scheme = 'uniform' 23 | RainbowAgent.tf_device = '/gpu:0' # use '/cpu:*' for non-GPU version 24 | RainbowAgent.optimizer = @tf.train.AdamOptimizer() 25 | 26 | tf.train.AdamOptimizer.learning_rate = 0.1 27 | tf.train.AdamOptimizer.epsilon = 0.0003125 28 | 29 | create_gym_environment.environment_name = 'Acrobot' 30 | create_gym_environment.version = 'v1' 31 | create_agent.agent_name = 'rainbow' 32 | Runner.create_environment_fn = @gym_lib.create_gym_environment 33 | Runner.num_iterations = 500 34 | Runner.training_steps = 1000 35 | Runner.evaluation_steps = 1000 36 | Runner.max_steps_per_episode = 500 37 | 38 | WrappedPrioritizedReplayBuffer.replay_capacity = 50000 39 | WrappedPrioritizedReplayBuffer.batch_size = 128 40 | -------------------------------------------------------------------------------- /dopamine/agents/rainbow/configs/c51_cartpole.gin: -------------------------------------------------------------------------------- 1 | # Hyperparameters for a simple C51-style Cartpole agent. The hyperparameters 2 | # chosen achieve reasonable performance. 3 | import dopamine.agents.dqn.dqn_agent 4 | import dopamine.agents.rainbow.rainbow_agent 5 | import dopamine.discrete_domains.gym_lib 6 | import dopamine.discrete_domains.run_experiment 7 | import dopamine.replay_memory.prioritized_replay_buffer 8 | import gin.tf.external_configurables 9 | 10 | RainbowAgent.observation_shape = %gym_lib.CARTPOLE_OBSERVATION_SHAPE 11 | RainbowAgent.observation_dtype = %gym_lib.CARTPOLE_OBSERVATION_DTYPE 12 | RainbowAgent.stack_size = %gym_lib.CARTPOLE_STACK_SIZE 13 | RainbowAgent.network = @gym_lib.CartpoleRainbowNetwork 14 | RainbowAgent.num_atoms = 201 15 | RainbowAgent.vmax = 100. 16 | RainbowAgent.gamma = 0.99 17 | RainbowAgent.epsilon_eval = 0. 18 | RainbowAgent.epsilon_train = 0.01 19 | RainbowAgent.update_horizon = 1 20 | RainbowAgent.min_replay_history = 500 21 | RainbowAgent.update_period = 1 22 | RainbowAgent.target_update_period = 1 23 | RainbowAgent.epsilon_fn = @dqn_agent.identity_epsilon 24 | RainbowAgent.replay_scheme = 'uniform' 25 | RainbowAgent.tf_device = '/gpu:0' # use '/cpu:*' for non-GPU version 26 | RainbowAgent.optimizer = @tf.train.AdamOptimizer() 27 | 28 | tf.train.AdamOptimizer.learning_rate = 0.00001 29 | tf.train.AdamOptimizer.epsilon = 0.00000390625 30 | 31 | create_gym_environment.environment_name = 'CartPole' 32 | create_gym_environment.version = 'v0' 33 | create_agent.agent_name = 'rainbow' 34 | Runner.create_environment_fn = @gym_lib.create_gym_environment 35 | Runner.num_iterations = 400 36 | Runner.training_steps = 1000 37 | Runner.evaluation_steps = 1000 38 | Runner.max_steps_per_episode = 200 # Default max episode length. 39 | 40 | WrappedPrioritizedReplayBuffer.replay_capacity = 50000 41 | WrappedPrioritizedReplayBuffer.batch_size = 128 42 | -------------------------------------------------------------------------------- /dopamine/agents/rainbow/configs/c51_icml.gin: -------------------------------------------------------------------------------- 1 | # Hyperparameters used in Bellemare et al. (2017). 2 | import dopamine.agents.rainbow.rainbow_agent 3 | import dopamine.discrete_domains.atari_lib 4 | import dopamine.discrete_domains.run_experiment 5 | import dopamine.replay_memory.prioritized_replay_buffer 6 | import gin.tf.external_configurables 7 | 8 | RainbowAgent.num_atoms = 51 9 | RainbowAgent.vmax = 10. 10 | RainbowAgent.gamma = 0.99 11 | RainbowAgent.update_horizon = 1 12 | RainbowAgent.min_replay_history = 50000 # agent steps 13 | RainbowAgent.update_period = 4 14 | RainbowAgent.target_update_period = 10000 # agent steps 15 | RainbowAgent.epsilon_train = 0.01 16 | RainbowAgent.epsilon_eval = 0.001 17 | RainbowAgent.epsilon_decay_period = 1000000 # agent steps 18 | RainbowAgent.replay_scheme = 'uniform' 19 | RainbowAgent.tf_device = '/gpu:0' # use '/cpu:*' for non-GPU version 20 | RainbowAgent.optimizer = @tf.train.AdamOptimizer() 21 | 22 | tf.train.AdamOptimizer.learning_rate = 0.00025 23 | tf.train.AdamOptimizer.epsilon = 0.0003125 24 | 25 | atari_lib.create_atari_environment.game_name = 'Pong' 26 | # Deterministic ALE version used in the DQN Nature paper (Mnih et al., 2015). 27 | atari_lib.create_atari_environment.sticky_actions = False 28 | create_agent.agent_name = 'rainbow' 29 | Runner.num_iterations = 200 30 | Runner.training_steps = 250000 # agent steps 31 | Runner.evaluation_steps = 125000 # agent steps 32 | Runner.max_steps_per_episode = 27000 # agent steps 33 | 34 | AtariPreprocessing.terminal_on_life_loss = True 35 | 36 | WrappedPrioritizedReplayBuffer.replay_capacity = 1000000 37 | WrappedPrioritizedReplayBuffer.batch_size = 32 38 | -------------------------------------------------------------------------------- /dopamine/agents/rainbow/configs/c51_profiling.gin: -------------------------------------------------------------------------------- 1 | # Hyperparameters follow the settings from Bellemare et al. (2017), but we 2 | # modify as necessary to match those used in Rainbow (Hessel et al., 2018), to 3 | # ensure apples-to-apples comparison. 4 | import dopamine.agents.rainbow.rainbow_agent 5 | import dopamine.discrete_domains.atari_lib 6 | import dopamine.discrete_domains.run_experiment 7 | import dopamine.replay_memory.prioritized_replay_buffer 8 | import gin.tf.external_configurables 9 | 10 | RainbowAgent.num_atoms = 51 11 | RainbowAgent.vmax = 10. 12 | RainbowAgent.gamma = 0.99 13 | RainbowAgent.update_horizon = 1 14 | RainbowAgent.min_replay_history = 100 # agent steps 15 | RainbowAgent.update_period = 4 16 | RainbowAgent.target_update_period = 8000 # agent steps 17 | RainbowAgent.epsilon_fn = @dqn_agent.identity_epsilon 18 | RainbowAgent.epsilon_train = 0.0 19 | RainbowAgent.epsilon_eval = 0.0 20 | RainbowAgent.replay_scheme = 'uniform' 21 | RainbowAgent.tf_device = '/gpu:0' # use '/cpu:*' for non-GPU version 22 | RainbowAgent.optimizer = @tf.train.AdamOptimizer() 23 | 24 | tf.train.AdamOptimizer.learning_rate = 0.00025 25 | tf.train.AdamOptimizer.epsilon = 0.0003125 26 | 27 | atari_lib.create_atari_environment.game_name = 'Pong' 28 | # Sticky actions with probability 0.25, as suggested by (Machado et al., 2017). 29 | atari_lib.create_atari_environment.sticky_actions = True 30 | create_runner.schedule = 'continuous_train' 31 | create_agent.agent_name = 'rainbow' 32 | Runner.num_iterations = 200 33 | Runner.training_steps = 250000 # agent steps 34 | Runner.evaluation_steps = 125000 # agent steps 35 | Runner.max_steps_per_episode = 27000 # agent steps 36 | 37 | WrappedPrioritizedReplayBuffer.replay_capacity = 1000000 38 | WrappedPrioritizedReplayBuffer.batch_size = 32 39 | -------------------------------------------------------------------------------- /dopamine/agents/rainbow/configs/rainbow_aaai.gin: -------------------------------------------------------------------------------- 1 | # Hyperparameters follow Hessel et al. (2018). 2 | import dopamine.agents.rainbow.rainbow_agent 3 | import dopamine.discrete_domains.atari_lib 4 | import dopamine.discrete_domains.run_experiment 5 | import dopamine.replay_memory.prioritized_replay_buffer 6 | import gin.tf.external_configurables 7 | 8 | RainbowAgent.num_atoms = 51 9 | RainbowAgent.vmax = 10. 10 | RainbowAgent.gamma = 0.99 11 | RainbowAgent.update_horizon = 3 12 | RainbowAgent.min_replay_history = 20000 # agent steps 13 | RainbowAgent.update_period = 4 14 | RainbowAgent.target_update_period = 8000 # agent steps 15 | RainbowAgent.epsilon_train = 0.01 16 | RainbowAgent.epsilon_eval = 0.001 17 | RainbowAgent.epsilon_decay_period = 250000 # agent steps 18 | RainbowAgent.replay_scheme = 'prioritized' 19 | RainbowAgent.tf_device = '/gpu:0' # use '/cpu:*' for non-GPU version 20 | RainbowAgent.optimizer = @tf.train.AdamOptimizer() 21 | 22 | # Note these parameters are different from C51's. 23 | tf.train.AdamOptimizer.learning_rate = 0.0000625 24 | tf.train.AdamOptimizer.epsilon = 0.00015 25 | 26 | atari_lib.create_atari_environment.game_name = 'Pong' 27 | # Deterministic ALE version used in the AAAI paper. 28 | atari_lib.create_atari_environment.sticky_actions = False 29 | create_agent.agent_name = 'rainbow' 30 | Runner.num_iterations = 200 31 | Runner.training_steps = 250000 # agent steps 32 | Runner.evaluation_steps = 125000 # agent steps 33 | Runner.max_steps_per_episode = 27000 # agent steps 34 | 35 | AtariPreprocessing.terminal_on_life_loss = True 36 | 37 | WrappedPrioritizedReplayBuffer.replay_capacity = 1000000 38 | WrappedPrioritizedReplayBuffer.batch_size = 32 39 | -------------------------------------------------------------------------------- /dopamine/agents/rainbow/configs/rainbow_acrobot.gin: -------------------------------------------------------------------------------- 1 | # Hyperparameters for a simple Rainbow-style Acrobot agent. The hyperparameters 2 | # chosen achieve reasonable performance. 3 | import dopamine.agents.rainbow.rainbow_agent 4 | import dopamine.discrete_domains.gym_lib 5 | import dopamine.discrete_domains.run_experiment 6 | import dopamine.replay_memory.prioritized_replay_buffer 7 | import gin.tf.external_configurables 8 | 9 | RainbowAgent.observation_shape = %gym_lib.ACROBOT_OBSERVATION_SHAPE 10 | RainbowAgent.observation_dtype = %gym_lib.ACROBOT_OBSERVATION_DTYPE 11 | RainbowAgent.stack_size = %gym_lib.ACROBOT_STACK_SIZE 12 | RainbowAgent.network = @gym_lib.AcrobotRainbowNetwork 13 | RainbowAgent.num_atoms = 51 14 | RainbowAgent.vmax = 10. 15 | RainbowAgent.gamma = 0.99 16 | RainbowAgent.update_horizon = 3 17 | RainbowAgent.min_replay_history = 500 18 | RainbowAgent.update_period = 4 19 | RainbowAgent.target_update_period = 100 20 | RainbowAgent.epsilon_fn = @dqn_agent.identity_epsilon 21 | RainbowAgent.replay_scheme = 'prioritized' 22 | RainbowAgent.tf_device = '/gpu:0' # use '/cpu:*' for non-GPU version 23 | RainbowAgent.optimizer = @tf.train.AdamOptimizer() 24 | 25 | tf.train.AdamOptimizer.learning_rate = 0.09 26 | tf.train.AdamOptimizer.epsilon = 0.0003125 27 | 28 | create_gym_environment.environment_name = 'Acrobot' 29 | create_gym_environment.version = 'v1' 30 | create_agent.agent_name = 'rainbow' 31 | Runner.create_environment_fn = @gym_lib.create_gym_environment 32 | Runner.num_iterations = 500 33 | Runner.training_steps = 1000 34 | Runner.evaluation_steps = 1000 35 | Runner.max_steps_per_episode = 500 36 | 37 | WrappedPrioritizedReplayBuffer.replay_capacity = 50000 38 | WrappedPrioritizedReplayBuffer.batch_size = 128 39 | -------------------------------------------------------------------------------- /dopamine/agents/rainbow/configs/rainbow_cartpole.gin: -------------------------------------------------------------------------------- 1 | # Hyperparameters for a simple Rainbow-style Cartpole agent. The 2 | # hyperparameters chosen achieve reasonable performance. 3 | import dopamine.agents.dqn.dqn_agent 4 | import dopamine.agents.rainbow.rainbow_agent 5 | import dopamine.discrete_domains.gym_lib 6 | import dopamine.discrete_domains.run_experiment 7 | import dopamine.replay_memory.prioritized_replay_buffer 8 | import gin.tf.external_configurables 9 | 10 | RainbowAgent.observation_shape = %gym_lib.CARTPOLE_OBSERVATION_SHAPE 11 | RainbowAgent.observation_dtype = %gym_lib.CARTPOLE_OBSERVATION_DTYPE 12 | RainbowAgent.stack_size = %gym_lib.CARTPOLE_STACK_SIZE 13 | RainbowAgent.network = @gym_lib.CartpoleRainbowNetwork 14 | RainbowAgent.num_atoms = 51 15 | RainbowAgent.vmax = 10. 16 | RainbowAgent.gamma = 0.99 17 | RainbowAgent.update_horizon = 3 18 | RainbowAgent.min_replay_history = 500 19 | RainbowAgent.update_period = 4 20 | RainbowAgent.target_update_period = 100 21 | RainbowAgent.epsilon_fn = @dqn_agent.identity_epsilon 22 | RainbowAgent.replay_scheme = 'prioritized' 23 | RainbowAgent.tf_device = '/gpu:0' # use '/cpu:*' for non-GPU version 24 | RainbowAgent.optimizer = @tf.train.AdamOptimizer() 25 | 26 | tf.train.AdamOptimizer.learning_rate = 0.09 27 | tf.train.AdamOptimizer.epsilon = 0.0003125 28 | 29 | create_gym_environment.environment_name = 'CartPole' 30 | create_gym_environment.version = 'v0' 31 | create_agent.agent_name = 'rainbow' 32 | Runner.create_environment_fn = @gym_lib.create_gym_environment 33 | Runner.num_iterations = 500 34 | Runner.training_steps = 1000 35 | Runner.evaluation_steps = 1000 36 | Runner.max_steps_per_episode = 200 # Default max episode length. 37 | 38 | WrappedPrioritizedReplayBuffer.replay_capacity = 50000 39 | WrappedPrioritizedReplayBuffer.batch_size = 128 40 | -------------------------------------------------------------------------------- /dopamine/agents/rainbow/configs/rainbow_dqnpro.gin: -------------------------------------------------------------------------------- 1 | # Hyperparameters follow Hessel et al. (2018), except for sticky_actions, 2 | # which was False (not using sticky actions) in the original paper. 3 | import dopamine.agents.rainbow.rainbow_agent 4 | import dopamine.discrete_domains.atari_lib 5 | import dopamine.discrete_domains.run_experiment 6 | import dopamine.replay_memory.prioritized_replay_buffer 7 | import gin.tf.external_configurables 8 | 9 | RainbowAgent.num_atoms = 51 10 | RainbowAgent.vmax = 10. 11 | RainbowAgent.gamma = 0.99 12 | RainbowAgent.mu = 0.0 13 | RainbowAgent.nu = 0.0 14 | RainbowAgent.update_horizon = 3 15 | RainbowAgent.min_replay_history = 20000 # agent steps 16 | RainbowAgent.update_period = 4 17 | RainbowAgent.target_update_period = 8000 # agent steps 18 | RainbowAgent.epsilon_train = 0.01 19 | RainbowAgent.epsilon_eval = 0.001 20 | RainbowAgent.epsilon_decay_period = 250000 # agent steps 21 | RainbowAgent.replay_scheme = 'prioritized' 22 | RainbowAgent.tf_device = '/gpu:0' # use '/cpu:*' for non-GPU version 23 | RainbowAgent.optimizer = @tf.train.AdamOptimizer() 24 | 25 | # Note these parameters are different from C51's. 26 | tf.train.AdamOptimizer.learning_rate = 0.0000625 27 | tf.train.AdamOptimizer.epsilon = 0.00015 28 | 29 | atari_lib.create_atari_environment.game_name = 'Pong' 30 | # Deterministic ALE version used in the AAAI paper. 31 | atari_lib.create_atari_environment.sticky_actions = False 32 | create_agent.agent_name = 'rainbow' 33 | Runner.num_iterations = 200 34 | Runner.training_steps = 250000 # agent steps 35 | Runner.evaluation_steps = 125000 # agent steps 36 | Runner.max_steps_per_episode = 27000 # agent steps 37 | 38 | AtariPreprocessing.terminal_on_life_loss = True 39 | 40 | WrappedPrioritizedReplayBuffer.replay_capacity = 1000000 41 | WrappedPrioritizedReplayBuffer.batch_size = 32 -------------------------------------------------------------------------------- /dopamine/agents/rainbow/configs/rainbow_original.gin: -------------------------------------------------------------------------------- 1 | # Hyperparameters follow Hessel et al. (2018), except for sticky_actions, 2 | # which was False (not using sticky actions) in the original paper. 3 | import dopamine.agents.rainbow.rainbow_agent 4 | import dopamine.discrete_domains.atari_lib 5 | import dopamine.discrete_domains.run_experiment 6 | import dopamine.replay_memory.prioritized_replay_buffer 7 | import gin.tf.external_configurables 8 | 9 | RainbowAgent.num_atoms = 51 10 | RainbowAgent.vmax = 10. 11 | RainbowAgent.gamma = 0.99 12 | RainbowAgent.update_horizon = 3 13 | RainbowAgent.min_replay_history = 20000 # agent steps 14 | RainbowAgent.update_period = 4 15 | RainbowAgent.target_update_period = 8000 # agent steps 16 | RainbowAgent.epsilon_train = 0.01 17 | RainbowAgent.epsilon_eval = 0.001 18 | RainbowAgent.epsilon_decay_period = 250000 # agent steps 19 | RainbowAgent.replay_scheme = 'prioritized' 20 | RainbowAgent.tf_device = '/gpu:0' # use '/cpu:*' for non-GPU version 21 | RainbowAgent.optimizer = @tf.train.AdamOptimizer() 22 | 23 | # Note these parameters are different from C51's. 24 | tf.train.AdamOptimizer.learning_rate = 0.0000625 25 | tf.train.AdamOptimizer.epsilon = 0.00015 26 | 27 | atari_lib.create_atari_environment.game_name = 'Pong' 28 | # Sticky actions with probability 0.25, as suggested by (Machado et al., 2017). 29 | atari_lib.create_atari_environment.sticky_actions = True 30 | create_agent.agent_name = 'rainbow' 31 | Runner.num_iterations = 200 32 | Runner.training_steps = 250000 # agent steps 33 | Runner.evaluation_steps = 125000 # agent steps 34 | Runner.max_steps_per_episode = 27000 # agent steps 35 | 36 | WrappedPrioritizedReplayBuffer.replay_capacity = 1000000 37 | WrappedPrioritizedReplayBuffer.batch_size = 32 38 | -------------------------------------------------------------------------------- /dopamine/agents/rainbow/configs/rainbow_our_first_paper.gin: -------------------------------------------------------------------------------- 1 | # Hyperparameters follow Hessel et al. (2018). 2 | import dopamine.agents.rainbow.rainbow_agent 3 | import dopamine.discrete_domains.atari_lib 4 | import dopamine.discrete_domains.run_experiment 5 | import dopamine.replay_memory.prioritized_replay_buffer 6 | import gin.tf.external_configurables 7 | 8 | RainbowAgent.num_atoms = 51 9 | RainbowAgent.vmax = 10. 10 | RainbowAgent.gamma = 0.99 11 | RainbowAgent.mu = 0.0 12 | RainbowAgent.update_horizon = 3 13 | RainbowAgent.min_replay_history = 20000 # agent steps 14 | RainbowAgent.update_period = 4 15 | RainbowAgent.target_update_period = 8000 # agent steps 16 | RainbowAgent.epsilon_train = 0.01 17 | RainbowAgent.epsilon_eval = 0.001 18 | RainbowAgent.epsilon_decay_period = 250000 # agent steps 19 | RainbowAgent.replay_scheme = 'prioritized' 20 | RainbowAgent.tf_device = '/gpu:0' # use '/cpu:*' for non-GPU version 21 | RainbowAgent.optimizer = @tf.train.AdamOptimizer() 22 | 23 | # Note these parameters are different from C51's. 24 | tf.train.AdamOptimizer.learning_rate = 0.0000625 25 | tf.train.AdamOptimizer.epsilon = 0.00015 26 | 27 | atari_lib.create_atari_environment.game_name = 'Pong' 28 | # Deterministic ALE version used in the AAAI paper. 29 | atari_lib.create_atari_environment.sticky_actions = False 30 | create_agent.agent_name = 'rainbow' 31 | Runner.num_iterations = 120 32 | Runner.training_steps = 250000 # agent steps 33 | Runner.evaluation_steps = 125000 # agent steps 34 | Runner.max_steps_per_episode = 27000 # agent steps 35 | 36 | AtariPreprocessing.terminal_on_life_loss = True 37 | 38 | WrappedPrioritizedReplayBuffer.replay_capacity = 1000000 39 | WrappedPrioritizedReplayBuffer.batch_size = 64 40 | -------------------------------------------------------------------------------- /dopamine/agents/rainbow/configs/rainbow_our_second_paper.gin: -------------------------------------------------------------------------------- 1 | # Hyperparameters follow Hessel et al. (2018). 2 | import dopamine.agents.rainbow.rainbow_agent 3 | import dopamine.discrete_domains.atari_lib 4 | import dopamine.discrete_domains.run_experiment 5 | import dopamine.replay_memory.prioritized_replay_buffer 6 | import gin.tf.external_configurables 7 | 8 | RainbowAgent.num_atoms = 51 9 | RainbowAgent.vmax = 10. 10 | RainbowAgent.gamma = 0.99 11 | RainbowAgent.mu = 0.0 12 | RainbowAgent.nu = 0.0 13 | RainbowAgent.update_horizon = 3 14 | RainbowAgent.min_replay_history = 20000 # agent steps 15 | RainbowAgent.update_period = 4 16 | RainbowAgent.target_update_period = 8000 # agent steps 17 | RainbowAgent.epsilon_train = 0.01 18 | RainbowAgent.epsilon_eval = 0.001 19 | RainbowAgent.epsilon_decay_period = 250000 # agent steps 20 | RainbowAgent.replay_scheme = 'prioritized' 21 | RainbowAgent.tf_device = '/gpu:0' # use '/cpu:*' for non-GPU version 22 | RainbowAgent.optimizer = @tf.train.AdamOptimizer() 23 | 24 | # Note these parameters are different from C51's. 25 | tf.train.AdamOptimizer.learning_rate = 0.0000625 26 | tf.train.AdamOptimizer.epsilon = 0.00015 27 | 28 | atari_lib.create_atari_environment.game_name = 'Pong' 29 | # Deterministic ALE version used in the AAAI paper. 30 | atari_lib.create_atari_environment.sticky_actions = False 31 | create_agent.agent_name = 'rainbow' 32 | Runner.num_iterations = 250 33 | Runner.training_steps = 250000 # agent steps 34 | Runner.evaluation_steps = 125000 # agent steps 35 | Runner.max_steps_per_episode = 27000 # agent steps 36 | 37 | AtariPreprocessing.terminal_on_life_loss = True 38 | 39 | WrappedPrioritizedReplayBuffer.replay_capacity = 1000000 40 | WrappedPrioritizedReplayBuffer.batch_size = 64 41 | -------------------------------------------------------------------------------- /dopamine/agents/rainbow/configs/rainbow_profiling.gin: -------------------------------------------------------------------------------- 1 | # Hyperparameters follow Hessel et al. (2018), except for sticky_actions, 2 | # which was False (not using sticky actions) in the original paper. 3 | import dopamine.agents.rainbow.rainbow_agent 4 | import dopamine.discrete_domains.atari_lib 5 | import dopamine.discrete_domains.run_experiment 6 | import dopamine.replay_memory.prioritized_replay_buffer 7 | import gin.tf.external_configurables 8 | 9 | RainbowAgent.num_atoms = 51 10 | RainbowAgent.vmax = 10. 11 | RainbowAgent.gamma = 0.99 12 | RainbowAgent.update_horizon = 3 13 | RainbowAgent.min_replay_history = 100 # agent steps 14 | RainbowAgent.update_period = 4 15 | RainbowAgent.target_update_period = 8000 # agent steps 16 | RainbowAgent.epsilon_fn = @dqn_agent.identity_epsilon 17 | RainbowAgent.epsilon_train = 0.0 18 | RainbowAgent.epsilon_eval = 0.0 19 | RainbowAgent.epsilon_decay_period = 250000 # agent steps 20 | RainbowAgent.replay_scheme = 'prioritized' 21 | RainbowAgent.tf_device = '/gpu:0' # use '/cpu:*' for non-GPU version 22 | RainbowAgent.optimizer = @tf.train.AdamOptimizer() 23 | 24 | # Note these parameters are different from C51's. 25 | tf.train.AdamOptimizer.learning_rate = 0.0000625 26 | tf.train.AdamOptimizer.epsilon = 0.00015 27 | 28 | atari_lib.create_atari_environment.game_name = 'Pong' 29 | # Sticky actions with probability 0.25, as suggested by (Machado et al., 2017). 30 | atari_lib.create_atari_environment.sticky_actions = True 31 | create_runner.schedule = 'continuous_train' 32 | create_agent.agent_name = 'rainbow' 33 | Runner.num_iterations = 200 34 | Runner.training_steps = 250000 # agent steps 35 | Runner.evaluation_steps = 125000 # agent steps 36 | Runner.max_steps_per_episode = 27000 # agent steps 37 | 38 | WrappedPrioritizedReplayBuffer.replay_capacity = 1000000 39 | WrappedPrioritizedReplayBuffer.batch_size = 32 40 | -------------------------------------------------------------------------------- /dopamine/colab/README.md: -------------------------------------------------------------------------------- 1 | # Colabs 2 | 3 | This directory contains 4 | [`utils.py`](https://github.com/google/dopamine/blob/master/dopamine/colab/utils.py), 5 | which provides a number of useful utilities for loading experiment statistics. 6 | 7 | We also provide a set of colabs to help illustrate how you can use Dopamine. 8 | 9 | ## Agents 10 | 11 | In this 12 | [colab](https://colab.research.google.com/github/google/dopamine/blob/master/dopamine/colab/agents.ipynb) 13 | we illustrate how to create a new agent by either subclassing 14 | [`DQN`](https://github.com/google/dopamine/blob/master/dopamine/agents/dqn/dqn_agent.py) 15 | or by creating a new agent from scratch. 16 | 17 | ## Loading statistics 18 | 19 | In this 20 | [colab](https://colab.research.google.com/github/google/dopamine/blob/master/dopamine/colab/load_statistics.ipynb) 21 | we illustrate how to load and visualize the logs data produced by Dopamine. 22 | 23 | ## Visualizing trained agents 24 | In this 25 | [colab](https://colab.research.google.com/github/google/dopamine/blob/master/dopamine/colab/agent_visualizer.ipynb) 26 | we illustrate how to visualize a trained agent using the visualization utilities 27 | provided with Dopamine. 28 | 29 | In [this colab](https://colab.research.google.com/github/google/dopamine/blob/master/dopamine/colab/jax_agent_visualizer.ipynb) 30 | we can visualize trained agents' performance with the agents trained with the 31 | [JAX implementations](https://github.com/google/dopamine/tree/master/dopamine/jax). 32 | 33 | ## Visualizing with Tensorboard 34 | In this 35 | [colab](https://colab.research.google.com/github/google/dopamine/blob/master/dopamine/colab/tensorboard.ipynb) 36 | we illustrate how to download and visualize different agents with Tensorboard. 37 | 38 | ## Training on Cartpole 39 | In this 40 | [colab](https://colab.research.google.com/github/google/dopamine/blob/master/dopamine/colab/cartpole.ipynb) 41 | we illustrate how to train DQN and C51 on the Cartpole environment. 42 | -------------------------------------------------------------------------------- /dopamine/colab/__init__.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2018 The Dopamine Authors. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | -------------------------------------------------------------------------------- /dopamine/continuous_domains/__init__.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | """Copyright 2021 The Dopamine Authors. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | """ 16 | -------------------------------------------------------------------------------- /dopamine/continuous_domains/train.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2021 The Dopamine Authors. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | r"""The entry point for running a Dopamine agent on continuous control envs. 16 | 17 | """ 18 | 19 | from absl import app 20 | from absl import flags 21 | from absl import logging 22 | 23 | from dopamine.continuous_domains import run_experiment 24 | 25 | flags.DEFINE_string('base_dir', None, 26 | 'Base directory to host all required sub-directories.') 27 | flags.DEFINE_multi_string( 28 | 'gin_files', [], 'List of paths to gin configuration files (e.g.' 29 | '"dopamine/jax/agents/sac/configs/sac.gin").') 30 | flags.DEFINE_multi_string( 31 | 'gin_bindings', [], 32 | 'Gin bindings to override the values set in the config files.') 33 | 34 | FLAGS = flags.FLAGS 35 | 36 | 37 | def main(unused_argv): 38 | """Main method. 39 | 40 | Args: 41 | unused_argv: Arguments (unused). 42 | """ 43 | logging.set_verbosity(logging.INFO) 44 | base_dir = FLAGS.base_dir 45 | gin_files = FLAGS.gin_files 46 | gin_bindings = FLAGS.gin_bindings 47 | 48 | run_experiment.load_gin_configs(gin_files, gin_bindings) 49 | runner = run_experiment.create_continuous_runner(base_dir) 50 | runner.run_experiment() 51 | 52 | 53 | if __name__ == '__main__': 54 | flags.mark_flag_as_required('base_dir') 55 | app.run(main) 56 | -------------------------------------------------------------------------------- /dopamine/discrete_domains/__init__.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | """Copyright 2018 The Dopamine Authors. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | """ 16 | -------------------------------------------------------------------------------- /dopamine/discrete_domains/iteration_statistics.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2018 The Dopamine Authors. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | """A class for storing iteration-specific metrics. 16 | """ 17 | 18 | from __future__ import absolute_import 19 | from __future__ import division 20 | from __future__ import print_function 21 | 22 | 23 | class IterationStatistics(object): 24 | """A class for storing iteration-specific metrics. 25 | 26 | The internal format is as follows: we maintain a mapping from keys to lists. 27 | Each list contains all the values corresponding to the given key. 28 | 29 | For example, self.data_lists['train_episode_returns'] might contain the 30 | per-episode returns achieved during this iteration. 31 | 32 | Attributes: 33 | data_lists: dict mapping each metric_name (str) to a list of said metric 34 | across episodes. 35 | """ 36 | 37 | def __init__(self): 38 | self.data_lists = {} 39 | 40 | def append(self, data_pairs): 41 | """Add the given values to their corresponding key-indexed lists. 42 | 43 | Args: 44 | data_pairs: A dictionary of key-value pairs to be recorded. 45 | """ 46 | for key, value in data_pairs.items(): 47 | if key not in self.data_lists: 48 | self.data_lists[key] = [] 49 | self.data_lists[key].append(value) 50 | -------------------------------------------------------------------------------- /dopamine/discrete_domains/train.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Lint as: python3 3 | # Copyright 2018 The Dopamine Authors. 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | r"""The entry point for running a Dopamine agent. 17 | 18 | """ 19 | 20 | from absl import app 21 | from absl import flags 22 | from absl import logging 23 | 24 | from dopamine.discrete_domains import run_experiment 25 | import tensorflow as tf 26 | 27 | 28 | flags.DEFINE_string('base_dir', None, 29 | 'Base directory to host all required sub-directories.') 30 | flags.DEFINE_multi_string( 31 | 'gin_files', [], 'List of paths to gin configuration files (e.g.' 32 | '"dopamine/agents/dqn/dqn.gin").') 33 | flags.DEFINE_multi_string( 34 | 'gin_bindings', [], 35 | 'Gin bindings to override the values set in the config files ' 36 | '(e.g. "DQNAgent.epsilon_train=0.1",' 37 | ' "create_environment.game_name="Pong"").') 38 | 39 | 40 | FLAGS = flags.FLAGS 41 | 42 | 43 | 44 | 45 | def main(unused_argv): 46 | """Main method. 47 | 48 | Args: 49 | unused_argv: Arguments (unused). 50 | """ 51 | logging.set_verbosity(logging.INFO) 52 | tf.compat.v1.disable_v2_behavior() 53 | 54 | base_dir = FLAGS.base_dir 55 | gin_files = FLAGS.gin_files 56 | gin_bindings = FLAGS.gin_bindings 57 | run_experiment.load_gin_configs(gin_files, gin_bindings) 58 | runner = run_experiment.create_runner(base_dir) 59 | runner.run_experiment() 60 | 61 | 62 | if __name__ == '__main__': 63 | flags.mark_flag_as_required('base_dir') 64 | app.run(main) 65 | -------------------------------------------------------------------------------- /dopamine/jax/__init__.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2018 The Dopamine Authors. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | -------------------------------------------------------------------------------- /dopamine/jax/agents/__init__.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2018 The Dopamine Authors. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | -------------------------------------------------------------------------------- /dopamine/jax/agents/dqn/__init__.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2018 The Dopamine Authors. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | -------------------------------------------------------------------------------- /dopamine/jax/agents/dqn/configs/dqn.gin: -------------------------------------------------------------------------------- 1 | # Hyperparameters follow the classic Nature DQN, but we modify as necessary to 2 | # match those used in Rainbow (Hessel et al., 2018), to ensure apples-to-apples 3 | # comparison. 4 | import dopamine.discrete_domains.atari_lib 5 | import dopamine.discrete_domains.run_experiment 6 | import dopamine.jax.agents.dqn.dqn_agent 7 | import dopamine.replay_memory.circular_replay_buffer 8 | 9 | JaxDQNAgent.gamma = 0.99 10 | JaxDQNAgent.update_horizon = 1 11 | JaxDQNAgent.min_replay_history = 20000 # agent steps 12 | JaxDQNAgent.update_period = 4 13 | JaxDQNAgent.target_update_period = 8000 # agent steps 14 | JaxDQNAgent.epsilon_train = 0.01 15 | JaxDQNAgent.epsilon_eval = 0.001 16 | JaxDQNAgent.epsilon_decay_period = 250000 # agent steps 17 | # Note: We are using the Adam optimizer by default for JaxDQN, which differs 18 | # from the original NatureDQN and the dopamine TensorFlow version. In 19 | # the experiments we have ran, we have found that using Adam yields 20 | # improved training performance. 21 | JaxDQNAgent.optimizer = 'adam' 22 | create_optimizer.learning_rate = 6.25e-5 23 | create_optimizer.eps = 1.5e-4 24 | 25 | atari_lib.create_atari_environment.game_name = 'Pong' 26 | # Sticky actions with probability 0.25, as suggested by (Machado et al., 2017). 27 | atari_lib.create_atari_environment.sticky_actions = True 28 | create_runner.schedule = 'continuous_train' 29 | create_agent.agent_name = 'jax_dqn' 30 | create_agent.debug_mode = True 31 | Runner.num_iterations = 200 32 | Runner.training_steps = 250000 # agent steps 33 | Runner.evaluation_steps = 125000 # agent steps 34 | Runner.max_steps_per_episode = 27000 # agent steps 35 | 36 | OutOfGraphReplayBuffer.replay_capacity = 1000000 37 | OutOfGraphReplayBuffer.batch_size = 32 38 | -------------------------------------------------------------------------------- /dopamine/jax/agents/dqn/configs/dqn_acrobot.gin: -------------------------------------------------------------------------------- 1 | # Hyperparameters for a simple DQN-style Acrobot agent. The hyperparameters 2 | # chosen achieve reasonable performance. 3 | import dopamine.discrete_domains.gym_lib 4 | import dopamine.discrete_domains.run_experiment 5 | import dopamine.jax.agents.dqn.dqn_agent 6 | import dopamine.jax.networks 7 | import dopamine.replay_memory.circular_replay_buffer 8 | import flax 9 | 10 | JaxDQNAgent.observation_shape = %gym_lib.ACROBOT_OBSERVATION_SHAPE 11 | JaxDQNAgent.observation_dtype = %jax_networks.ACROBOT_OBSERVATION_DTYPE 12 | JaxDQNAgent.stack_size = %gym_lib.ACROBOT_STACK_SIZE 13 | JaxDQNAgent.network = @networks.ClassicControlDQNNetwork 14 | JaxDQNAgent.gamma = 0.99 15 | JaxDQNAgent.update_horizon = 1 16 | JaxDQNAgent.min_replay_history = 500 17 | JaxDQNAgent.update_period = 4 18 | JaxDQNAgent.target_update_period = 100 19 | JaxDQNAgent.epsilon_fn = @dqn_agent.identity_epsilon 20 | JaxDQNAgent.optimizer = 'adam' 21 | create_optimizer.learning_rate = 0.001 22 | create_optimizer.eps = 3.125e-4 23 | 24 | ClassicControlDQNNetwork.min_vals = %jax_networks.ACROBOT_MIN_VALS 25 | ClassicControlDQNNetwork.max_vals = %jax_networks.ACROBOT_MAX_VALS 26 | 27 | create_gym_environment.environment_name = 'Acrobot' 28 | create_gym_environment.version = 'v1' 29 | create_runner.schedule = 'continuous_train' 30 | create_agent.agent_name = 'jax_dqn' 31 | create_agent.debug_mode = True 32 | TrainRunner.create_environment_fn = @gym_lib.create_gym_environment 33 | Runner.num_iterations = 500 34 | Runner.training_steps = 1000 35 | Runner.evaluation_steps = 1000 36 | Runner.max_steps_per_episode = 500 37 | 38 | OutOfGraphReplayBuffer.replay_capacity = 50000 39 | OutOfGraphReplayBuffer.batch_size = 128 40 | -------------------------------------------------------------------------------- /dopamine/jax/agents/dqn/configs/dqn_cartpole.gin: -------------------------------------------------------------------------------- 1 | # Hyperparameters for a simple DQN-style Cartpole agent. The hyperparameters 2 | # chosen achieve reasonable performance. 3 | import dopamine.discrete_domains.gym_lib 4 | import dopamine.discrete_domains.run_experiment 5 | import dopamine.jax.agents.dqn.dqn_agent 6 | import dopamine.jax.networks 7 | import dopamine.replay_memory.circular_replay_buffer 8 | import flax 9 | 10 | JaxDQNAgent.observation_shape = %gym_lib.CARTPOLE_OBSERVATION_SHAPE 11 | JaxDQNAgent.observation_dtype = %jax_networks.CARTPOLE_OBSERVATION_DTYPE 12 | JaxDQNAgent.stack_size = %gym_lib.CARTPOLE_STACK_SIZE 13 | JaxDQNAgent.network = @networks.ClassicControlDQNNetwork 14 | JaxDQNAgent.gamma = 0.99 15 | JaxDQNAgent.update_horizon = 1 16 | JaxDQNAgent.min_replay_history = 500 17 | JaxDQNAgent.update_period = 4 18 | JaxDQNAgent.target_update_period = 100 19 | JaxDQNAgent.epsilon_fn = @dqn_agent.identity_epsilon 20 | JaxDQNAgent.optimizer = 'adam' 21 | create_optimizer.learning_rate = 0.001 22 | create_optimizer.eps = 3.125e-4 23 | 24 | ClassicControlDQNNetwork.min_vals = %jax_networks.CARTPOLE_MIN_VALS 25 | ClassicControlDQNNetwork.max_vals = %jax_networks.CARTPOLE_MAX_VALS 26 | 27 | create_gym_environment.environment_name = 'CartPole' 28 | create_gym_environment.version = 'v0' 29 | create_runner.schedule = 'continuous_train' 30 | create_agent.agent_name = 'jax_dqn' 31 | create_agent.debug_mode = True 32 | TrainRunner.create_environment_fn = @gym_lib.create_gym_environment 33 | Runner.num_iterations = 500 34 | Runner.training_steps = 1000 35 | Runner.evaluation_steps = 1000 36 | Runner.max_steps_per_episode = 200 # Default max episode length. 37 | 38 | OutOfGraphReplayBuffer.replay_capacity = 50000 39 | OutOfGraphReplayBuffer.batch_size = 128 40 | -------------------------------------------------------------------------------- /dopamine/jax/agents/dqn/configs/dqn_lunarlander.gin: -------------------------------------------------------------------------------- 1 | # Hyperparameters for a simple DQN-style LunarLander agent. The hyperparameters 2 | # chosen achieve reasonable performance. 3 | import dopamine.discrete_domains.gym_lib 4 | import dopamine.discrete_domains.run_experiment 5 | import dopamine.jax.agents.dqn.dqn_agent 6 | import dopamine.jax.networks 7 | import dopamine.replay_memory.circular_replay_buffer 8 | import flax 9 | 10 | JaxDQNAgent.observation_shape = %gym_lib.LUNAR_OBSERVATION_SHAPE 11 | JaxDQNAgent.observation_dtype = %jax_networks.LUNAR_OBSERVATION_DTYPE 12 | JaxDQNAgent.stack_size = %gym_lib.LUNAR_STACK_SIZE 13 | JaxDQNAgent.network = @networks.ClassicControlDQNNetwork 14 | JaxDQNAgent.gamma = 0.99 15 | JaxDQNAgent.update_horizon = 1 16 | JaxDQNAgent.min_replay_history = 500 17 | JaxDQNAgent.update_period = 4 18 | JaxDQNAgent.target_update_period = 100 19 | JaxDQNAgent.epsilon_fn = @dqn_agent.identity_epsilon 20 | JaxDQNAgent.optimizer = 'adam' 21 | create_optimizer.learning_rate = 0.001 22 | create_optimizer.eps = 3.125e-4 23 | 24 | create_gym_environment.environment_name = 'LunarLander' 25 | create_gym_environment.version = 'v2' 26 | create_runner.schedule = 'continuous_train' 27 | create_agent.agent_name = 'jax_dqn' 28 | create_agent.debug_mode = True 29 | TrainRunner.create_environment_fn = @gym_lib.create_gym_environment 30 | Runner.num_iterations = 125 31 | Runner.training_steps = 4000 32 | Runner.evaluation_steps = 1000 33 | Runner.max_steps_per_episode = 1000 34 | 35 | OutOfGraphReplayBuffer.replay_capacity = 50000 36 | OutOfGraphReplayBuffer.batch_size = 128 37 | -------------------------------------------------------------------------------- /dopamine/jax/agents/dqn/configs/dqn_mountaincar.gin: -------------------------------------------------------------------------------- 1 | # Hyperparameters for a simple DQN-style MountainCar agent. The hyperparameters 2 | # chosen achieve reasonable performance. 3 | import dopamine.discrete_domains.gym_lib 4 | import dopamine.discrete_domains.run_experiment 5 | import dopamine.jax.agents.dqn.dqn_agent 6 | import dopamine.jax.networks 7 | import dopamine.replay_memory.circular_replay_buffer 8 | import flax 9 | 10 | JaxDQNAgent.observation_shape = %gym_lib.MOUNTAINCAR_OBSERVATION_SHAPE 11 | JaxDQNAgent.observation_dtype = %jax_networks.MOUNTAINCAR_OBSERVATION_DTYPE 12 | JaxDQNAgent.stack_size = %gym_lib.MOUNTAINCAR_STACK_SIZE 13 | JaxDQNAgent.network = @networks.ClassicControlDQNNetwork 14 | JaxDQNAgent.gamma = 0.99 15 | JaxDQNAgent.update_horizon = 1 16 | JaxDQNAgent.min_replay_history = 500 17 | JaxDQNAgent.update_period = 4 18 | JaxDQNAgent.target_update_period = 100 19 | JaxDQNAgent.epsilon_fn = @dqn_agent.identity_epsilon 20 | JaxDQNAgent.optimizer = 'adam' 21 | create_optimizer.learning_rate = 0.001 22 | create_optimizer.eps = 3.125e-4 23 | 24 | ClassicControlDQNNetwork.min_vals = %jax_networks.MOUNTAINCAR_MIN_VALS 25 | ClassicControlDQNNetwork.max_vals = %jax_networks.MOUNTAINCAR_MAX_VALS 26 | 27 | create_gym_environment.environment_name = 'MountainCar' 28 | create_gym_environment.version = 'v0' 29 | create_runner.schedule = 'continuous_train' 30 | create_agent.agent_name = 'jax_dqn' 31 | create_agent.debug_mode = True 32 | TrainRunner.create_environment_fn = @gym_lib.create_gym_environment 33 | Runner.num_iterations = 500 34 | Runner.training_steps = 1000 35 | Runner.max_steps_per_episode = 600 # Default max episode length. 36 | 37 | OutOfGraphReplayBuffer.replay_capacity = 50000 38 | OutOfGraphReplayBuffer.batch_size = 128 39 | -------------------------------------------------------------------------------- /dopamine/jax/agents/dqn/configs/dqn_profiling.gin: -------------------------------------------------------------------------------- 1 | # Hyperparameters follow the classic Nature DQN, but we modify as necessary to 2 | # match those used in Rainbow (Hessel et al., 2018), to ensure apples-to-apples 3 | # comparison. 4 | import dopamine.discrete_domains.atari_lib 5 | import dopamine.discrete_domains.run_experiment 6 | import dopamine.jax.agents.dqn.dqn_agent 7 | import dopamine.replay_memory.circular_replay_buffer 8 | 9 | JaxDQNAgent.gamma = 0.99 10 | JaxDQNAgent.update_horizon = 1 11 | JaxDQNAgent.min_replay_history = 100 # agent steps 12 | JaxDQNAgent.update_period = 4 13 | JaxDQNAgent.target_update_period = 8000 # agent steps 14 | JaxDQNAgent.epsilon_fn = @dqn_agent.identity_epsilon 15 | JaxDQNAgent.epsilon_train = 0.0 16 | JaxDQNAgent.epsilon_eval = 0.0 17 | 18 | atari_lib.create_atari_environment.game_name = 'Pong' 19 | # Sticky actions with probability 0.25, as suggested by (Machado et al., 2017). 20 | atari_lib.create_atari_environment.sticky_actions = True 21 | create_runner.schedule = 'continuous_train' 22 | create_agent.agent_name = 'jax_dqn' 23 | create_agent.debug_mode = True 24 | Runner.num_iterations = 200 25 | Runner.training_steps = 250000 # agent steps 26 | Runner.evaluation_steps = 125000 # agent steps 27 | Runner.max_steps_per_episode = 27000 # agent steps 28 | 29 | OutOfGraphReplayBuffer.replay_capacity = 1000000 30 | OutOfGraphReplayBuffer.batch_size = 32 31 | -------------------------------------------------------------------------------- /dopamine/jax/agents/full_rainbow/__init__.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2018 The Dopamine Authors. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | -------------------------------------------------------------------------------- /dopamine/jax/agents/full_rainbow/configs/full_rainbow.gin: -------------------------------------------------------------------------------- 1 | # Hyperparameters follow Hessel et al. (2018), except for sticky_actions, 2 | # which was False (not using sticky actions) in the original paper. 3 | import dopamine.jax.agents.full_rainbow.full_rainbow_agent 4 | import dopamine.jax.agents.dqn.dqn_agent 5 | import dopamine.jax.networks 6 | import dopamine.discrete_domains.atari_lib 7 | import dopamine.discrete_domains.run_experiment 8 | import dopamine.replay_memory.prioritized_replay_buffer 9 | 10 | JaxDQNAgent.gamma = 0.99 11 | JaxDQNAgent.update_horizon = 3 12 | JaxDQNAgent.min_replay_history = 20000 # agent steps 13 | JaxDQNAgent.update_period = 4 14 | JaxDQNAgent.target_update_period = 8000 # agent steps 15 | JaxDQNAgent.epsilon_train = 0.01 16 | JaxDQNAgent.epsilon_eval = 0.001 17 | JaxDQNAgent.epsilon_decay_period = 250000 # agent steps 18 | JaxDQNAgent.optimizer = 'adam' 19 | 20 | JaxFullRainbowAgent.noisy = True 21 | JaxFullRainbowAgent.dueling = True 22 | JaxFullRainbowAgent.double_dqn = True 23 | JaxFullRainbowAgent.num_atoms = 51 24 | JaxFullRainbowAgent.vmax = 10. 25 | JaxFullRainbowAgent.replay_scheme = 'prioritized' 26 | 27 | # Note these parameters are different from C51's. 28 | create_optimizer.learning_rate = 0.0000625 29 | create_optimizer.eps = 0.00015 30 | 31 | atari_lib.create_atari_environment.game_name = 'Pong' 32 | # Sticky actions with probability 0.25, as suggested by (Machado et al., 2017). 33 | atari_lib.create_atari_environment.sticky_actions = True 34 | create_runner.schedule = 'continuous_train' 35 | create_agent.agent_name = 'full_rainbow' 36 | create_agent.debug_mode = True 37 | Runner.num_iterations = 200 38 | Runner.training_steps = 250000 # agent steps 39 | Runner.evaluation_steps = 125000 # agent steps 40 | Runner.max_steps_per_episode = 27000 # agent steps 41 | 42 | OutOfGraphPrioritizedReplayBuffer.replay_capacity = 1000000 43 | OutOfGraphPrioritizedReplayBuffer.batch_size = 32 44 | -------------------------------------------------------------------------------- /dopamine/jax/agents/full_rainbow/configs/full_rainbow_profiling.gin: -------------------------------------------------------------------------------- 1 | # Hyperparameters follow Hessel et al. (2018), except for sticky_actions, 2 | # which was False (not using sticky actions) in the original paper. 3 | import dopamine.jax.agents.full_rainbow.full_rainbow_agent 4 | import dopamine.jax.agents.dqn.dqn_agent 5 | import dopamine.jax.networks 6 | import dopamine.discrete_domains.atari_lib 7 | import dopamine.discrete_domains.run_experiment 8 | import dopamine.replay_memory.prioritized_replay_buffer 9 | 10 | JaxDQNAgent.gamma = 0.99 11 | JaxDQNAgent.update_horizon = 3 12 | JaxDQNAgent.min_replay_history = 100 # agent steps 13 | JaxDQNAgent.update_period = 4 14 | JaxDQNAgent.target_update_period = 8000 # agent steps 15 | JaxDQNAgent.epsilon_train = 0.0 16 | JaxDQNAgent.epsilon_eval = 0.0 17 | JaxDQNAgent.epsilon_decay_period = 250000 # agent steps 18 | JaxDQNAgent.optimizer = 'adam' 19 | 20 | JaxFullRainbowAgent.num_atoms = 51 21 | JaxFullRainbowAgent.vmax = 10. 22 | JaxFullRainbowAgent.replay_scheme = 'prioritized' 23 | JaxFullRainbowAgent.noisy = True 24 | JaxFullRainbowAgent.dueling = True 25 | JaxFullRainbowAgent.double_dqn = True 26 | JaxFullRainbowAgent.epsilon_fn = @dqn_agent.identity_epsilon 27 | 28 | # Note these parameters are different from C51's. 29 | create_optimizer.learning_rate = 0.0000625 30 | create_optimizer.eps = 0.00015 31 | 32 | atari_lib.create_atari_environment.game_name = 'Pong' 33 | # Sticky actions with probability 0.25, as suggested by (Machado et al., 2017). 34 | atari_lib.create_atari_environment.sticky_actions = True 35 | create_runner.schedule = 'continuous_train' 36 | create_agent.agent_name = 'jax_rainbow' 37 | create_agent.debug_mode = True 38 | Runner.num_iterations = 200 39 | Runner.training_steps = 250000 # agent steps 40 | Runner.evaluation_steps = 125000 # agent steps 41 | Runner.max_steps_per_episode = 27000 # agent steps 42 | 43 | OutOfGraphPrioritizedReplayBuffer.replay_capacity = 1000000 44 | OutOfGraphPrioritizedReplayBuffer.batch_size = 32 45 | -------------------------------------------------------------------------------- /dopamine/jax/agents/implicit_quantile/__init__.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2018 The Dopamine Authors. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | -------------------------------------------------------------------------------- /dopamine/jax/agents/implicit_quantile/configs/implicit_quantile.gin: -------------------------------------------------------------------------------- 1 | # Hyperparameters follow Dabney et al. (2018), but we modify as necessary to 2 | # match those used in Rainbow (Hessel et al., 2018), to ensure apples-to-apples 3 | # comparison. 4 | import dopamine.jax.agents.implicit_quantile.implicit_quantile_agent 5 | import dopamine.discrete_domains.atari_lib 6 | import dopamine.discrete_domains.run_experiment 7 | import dopamine.replay_memory.circular_replay_buffer 8 | 9 | JaxImplicitQuantileAgent.kappa = 1.0 10 | JaxImplicitQuantileAgent.num_tau_samples = 64 11 | JaxImplicitQuantileAgent.num_tau_prime_samples = 64 12 | JaxImplicitQuantileAgent.num_quantile_samples = 32 13 | JaxImplicitQuantileAgent.gamma = 0.99 14 | JaxImplicitQuantileAgent.update_horizon = 3 15 | JaxImplicitQuantileAgent.min_replay_history = 20000 # agent steps 16 | JaxImplicitQuantileAgent.update_period = 4 17 | JaxImplicitQuantileAgent.target_update_period = 8000 # agent steps 18 | JaxImplicitQuantileAgent.epsilon_train = 0.01 19 | JaxImplicitQuantileAgent.epsilon_eval = 0.001 20 | JaxImplicitQuantileAgent.epsilon_decay_period = 250000 # agent steps 21 | JaxImplicitQuantileAgent.optimizer = 'adam' 22 | create_optimizer.learning_rate = 0.00005 23 | create_optimizer.eps = 0.0003125 24 | 25 | atari_lib.create_atari_environment.game_name = 'Pong' 26 | # Sticky actions with probability 0.25, as suggested by (Machado et al., 2017). 27 | atari_lib.create_atari_environment.sticky_actions = True 28 | create_runner.schedule = 'continuous_train' 29 | create_agent.agent_name = 'jax_implicit_quantile' 30 | create_agent.debug_mode = True 31 | Runner.num_iterations = 200 32 | Runner.training_steps = 250000 33 | Runner.evaluation_steps = 125000 34 | Runner.max_steps_per_episode = 27000 35 | 36 | OutOfGraphReplayBuffer.replay_capacity = 1000000 37 | OutOfGraphReplayBuffer.batch_size = 32 38 | -------------------------------------------------------------------------------- /dopamine/jax/agents/implicit_quantile/configs/implicit_quantile_profiling.gin: -------------------------------------------------------------------------------- 1 | # Hyperparameters follow Dabney et al. (2018), but we modify as necessary to 2 | # match those used in Rainbow (Hessel et al., 2018), to ensure apples-to-apples 3 | # comparison. 4 | import dopamine.jax.agents.implicit_quantile.implicit_quantile_agent 5 | import dopamine.discrete_domains.atari_lib 6 | import dopamine.discrete_domains.run_experiment 7 | import dopamine.jax.agents.dqn.dqn_agent 8 | import dopamine.replay_memory.circular_replay_buffer 9 | 10 | JaxImplicitQuantileAgent.kappa = 1.0 11 | JaxImplicitQuantileAgent.num_tau_samples = 64 12 | JaxImplicitQuantileAgent.num_tau_prime_samples = 64 13 | JaxImplicitQuantileAgent.num_quantile_samples = 32 14 | JaxImplicitQuantileAgent.gamma = 0.99 15 | JaxImplicitQuantileAgent.update_horizon = 3 16 | JaxImplicitQuantileAgent.min_replay_history = 100 # agent steps 17 | JaxImplicitQuantileAgent.update_period = 4 18 | JaxImplicitQuantileAgent.target_update_period = 8000 # agent steps 19 | JaxImplicitQuantileAgent.epsilon_fn = @dqn_agent.identity_epsilon 20 | JaxImplicitQuantileAgent.epsilon_train = 0.0 21 | JaxImplicitQuantileAgent.epsilon_eval = 0.0 22 | JaxImplicitQuantileAgent.optimizer = 'adam' 23 | create_optimizer.learning_rate = 0.00005 24 | create_optimizer.eps = 0.0003125 25 | 26 | atari_lib.create_atari_environment.game_name = 'Pong' 27 | # Sticky actions with probability 0.25, as suggested by (Machado et al., 2017). 28 | atari_lib.create_atari_environment.sticky_actions = True 29 | create_runner.schedule = 'continuous_train' 30 | create_agent.agent_name = 'jax_implicit_quantile' 31 | create_agent.debug_mode = True 32 | Runner.num_iterations = 200 33 | Runner.training_steps = 250000 34 | Runner.evaluation_steps = 125000 35 | Runner.max_steps_per_episode = 27000 36 | 37 | OutOfGraphReplayBuffer.replay_capacity = 1000000 38 | OutOfGraphReplayBuffer.batch_size = 32 39 | -------------------------------------------------------------------------------- /dopamine/jax/agents/quantile/__init__.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2018 The Dopamine Authors. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | -------------------------------------------------------------------------------- /dopamine/jax/agents/quantile/configs/quantile.gin: -------------------------------------------------------------------------------- 1 | # Hyperparameters follow Dabney et al. (2017) but we modify as necessary to 2 | # match those used in Rainbow (Hessel et al., 2018), to ensure apples-to-apples 3 | # comparison. 4 | import dopamine.jax.agents.quantile.quantile_agent 5 | import dopamine.discrete_domains.atari_lib 6 | import dopamine.discrete_domains.run_experiment 7 | import dopamine.replay_memory.prioritized_replay_buffer 8 | 9 | JaxQuantileAgent.kappa = 1.0 10 | JaxQuantileAgent.num_atoms = 200 11 | JaxQuantileAgent.gamma = 0.99 12 | JaxQuantileAgent.update_horizon = 3 13 | JaxQuantileAgent.min_replay_history = 20000 # agent steps 14 | JaxQuantileAgent.update_period = 4 15 | JaxQuantileAgent.target_update_period = 8000 # agent steps 16 | JaxQuantileAgent.epsilon_train = 0.01 17 | JaxQuantileAgent.epsilon_eval = 0.001 18 | JaxQuantileAgent.epsilon_decay_period = 250000 # agent steps 19 | JaxQuantileAgent.replay_scheme = 'prioritized' 20 | JaxQuantileAgent.optimizer = 'adam' 21 | 22 | create_optimizer.learning_rate = 0.00005 23 | create_optimizer.eps = 0.0003125 24 | 25 | atari_lib.create_atari_environment.game_name = 'Pong' 26 | atari_lib.create_atari_environment.sticky_actions = True 27 | create_runner.schedule = 'continuous_train' 28 | create_agent.agent_name = 'jax_quantile' 29 | create_agent.debug_mode = True 30 | Runner.num_iterations = 200 31 | Runner.training_steps = 250000 32 | Runner.evaluation_steps = 125000 33 | Runner.max_steps_per_episode = 27000 34 | 35 | OutOfGraphPrioritizedReplayBuffer.replay_capacity = 1000000 36 | OutOfGraphPrioritizedReplayBuffer.batch_size = 32 37 | -------------------------------------------------------------------------------- /dopamine/jax/agents/quantile/configs/quantile_profiling.gin: -------------------------------------------------------------------------------- 1 | import dopamine.jax.agents.quantile.quantile_agent 2 | import dopamine.discrete_domains.atari_lib 3 | import dopamine.discrete_domains.run_experiment 4 | import dopamine.jax.agents.dqn.dqn_agent 5 | import dopamine.replay_memory.prioritized_replay_buffer 6 | 7 | JaxQuantileAgent.kappa = 1.0 8 | JaxQuantileAgent.num_atoms = 200 9 | JaxQuantileAgent.gamma = 0.99 10 | JaxQuantileAgent.update_horizon = 3 11 | JaxQuantileAgent.min_replay_history = 100 # agent steps 12 | JaxQuantileAgent.update_period = 4 13 | JaxQuantileAgent.target_update_period = 8000 # agent steps 14 | JaxQuantileAgent.epsilon_fn = @dqn_agent.identity_epsilon 15 | JaxQuantileAgent.epsilon_train = 0.0 16 | JaxQuantileAgent.epsilon_eval = 0.0 17 | JaxQuantileAgent.replay_scheme = 'prioritized' 18 | JaxQuantileAgent.optimizer = 'adam' 19 | 20 | create_optimizer.learning_rate = 0.00005 21 | create_optimizer.eps = 0.0003125 22 | 23 | atari_lib.create_atari_environment.game_name = 'Pong' 24 | atari_lib.create_atari_environment.sticky_actions = True 25 | create_runner.schedule = 'continuous_train' 26 | create_agent.agent_name = 'jax_quantile' 27 | create_agent.debug_mode = True 28 | Runner.num_iterations = 200 29 | Runner.training_steps = 250000 30 | Runner.evaluation_steps = 125000 31 | Runner.max_steps_per_episode = 27000 32 | 33 | OutOfGraphPrioritizedReplayBuffer.replay_capacity = 1000000 34 | OutOfGraphPrioritizedReplayBuffer.batch_size = 32 35 | -------------------------------------------------------------------------------- /dopamine/jax/agents/rainbow/__init__.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2018 The Dopamine Authors. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | -------------------------------------------------------------------------------- /dopamine/jax/agents/rainbow/configs/c51.gin: -------------------------------------------------------------------------------- 1 | # Hyperparameters follow the settings from Bellemare et al. (2017), but we 2 | # modify as necessary to match those used in Rainbow (Hessel et al., 2018), to 3 | # ensure apples-to-apples comparison. 4 | import dopamine.jax.agents.rainbow.rainbow_agent 5 | import dopamine.discrete_domains.atari_lib 6 | import dopamine.discrete_domains.run_experiment 7 | import dopamine.replay_memory.prioritized_replay_buffer 8 | 9 | JaxRainbowAgent.num_atoms = 51 10 | JaxRainbowAgent.vmax = 10. 11 | JaxRainbowAgent.gamma = 0.99 12 | JaxRainbowAgent.update_horizon = 1 13 | JaxRainbowAgent.min_replay_history = 20000 # agent steps 14 | JaxRainbowAgent.update_period = 4 15 | JaxRainbowAgent.target_update_period = 8000 # agent steps 16 | JaxRainbowAgent.epsilon_train = 0.01 17 | JaxRainbowAgent.epsilon_eval = 0.001 18 | JaxRainbowAgent.epsilon_decay_period = 250000 # agent steps 19 | JaxRainbowAgent.replay_scheme = 'uniform' 20 | 21 | atari_lib.create_atari_environment.game_name = 'Pong' 22 | # Sticky actions with probability 0.25, as suggested by (Machado et al., 2017). 23 | atari_lib.create_atari_environment.sticky_actions = True 24 | create_runner.schedule = 'continuous_train' 25 | create_agent.agent_name = 'jax_rainbow' 26 | create_agent.debug_mode = True 27 | Runner.num_iterations = 200 28 | Runner.training_steps = 250000 # agent steps 29 | Runner.evaluation_steps = 125000 # agent steps 30 | Runner.max_steps_per_episode = 27000 # agent steps 31 | 32 | OutOfGraphPrioritizedReplayBuffer.replay_capacity = 1000000 33 | OutOfGraphPrioritizedReplayBuffer.batch_size = 32 34 | -------------------------------------------------------------------------------- /dopamine/jax/agents/rainbow/configs/c51_acrobot.gin: -------------------------------------------------------------------------------- 1 | # Hyperparameters for a simple C51-style Acrobot agent. The hyperparameters 2 | # chosen achieve reasonable performance. 3 | import dopamine.jax.agents.rainbow.rainbow_agent 4 | import dopamine.jax.networks 5 | import dopamine.discrete_domains.gym_lib 6 | import dopamine.discrete_domains.run_experiment 7 | import dopamine.replay_memory.prioritized_replay_buffer 8 | 9 | JaxRainbowAgent.observation_shape = %gym_lib.ACROBOT_OBSERVATION_SHAPE 10 | JaxRainbowAgent.observation_dtype = %jax_networks.ACROBOT_OBSERVATION_DTYPE 11 | JaxRainbowAgent.stack_size = %gym_lib.ACROBOT_STACK_SIZE 12 | JaxRainbowAgent.network = @networks.ClassicControlRainbowNetwork 13 | JaxRainbowAgent.num_atoms = 51 14 | JaxRainbowAgent.vmax = 10. 15 | JaxRainbowAgent.gamma = 0.99 16 | JaxRainbowAgent.update_horizon = 1 17 | JaxRainbowAgent.min_replay_history = 500 18 | JaxRainbowAgent.update_period = 4 19 | JaxRainbowAgent.target_update_period = 100 20 | JaxRainbowAgent.epsilon_fn = @dqn_agent.identity_epsilon 21 | JaxRainbowAgent.replay_scheme = 'uniform' 22 | create_optimizer.learning_rate = 0.1 23 | create_optimizer.eps = 0.0003125 24 | 25 | ClassicControlRainbowNetwork.min_vals = %jax_networks.ACROBOT_MIN_VALS 26 | ClassicControlRainbowNetwork.max_vals = %jax_networks.ACROBOT_MAX_VALS 27 | 28 | create_gym_environment.environment_name = 'Acrobot' 29 | create_gym_environment.version = 'v1' 30 | create_runner.schedule = 'continuous_train' 31 | create_agent.agent_name = 'jax_rainbow' 32 | TrainRunner.create_environment_fn = @gym_lib.create_gym_environment 33 | Runner.num_iterations = 500 34 | Runner.training_steps = 1000 35 | Runner.evaluation_steps = 1000 36 | Runner.max_steps_per_episode = 500 37 | 38 | OutOfGraphPrioritizedReplayBuffer.replay_capacity = 50000 39 | OutOfGraphPrioritizedReplayBuffer.batch_size = 128 40 | -------------------------------------------------------------------------------- /dopamine/jax/agents/rainbow/configs/c51_cartpole.gin: -------------------------------------------------------------------------------- 1 | # Hyperparameters for a simple C51-style Cartpole agent. The hyperparameters 2 | # chosen achieve reasonable performance. 3 | import dopamine.jax.agents.rainbow.rainbow_agent 4 | import dopamine.jax.networks 5 | import dopamine.discrete_domains.gym_lib 6 | import dopamine.discrete_domains.run_experiment 7 | import dopamine.replay_memory.prioritized_replay_buffer 8 | 9 | JaxRainbowAgent.observation_shape = %gym_lib.CARTPOLE_OBSERVATION_SHAPE 10 | JaxRainbowAgent.observation_dtype = %jax_networks.CARTPOLE_OBSERVATION_DTYPE 11 | JaxRainbowAgent.stack_size = %gym_lib.CARTPOLE_STACK_SIZE 12 | JaxRainbowAgent.network = @networks.ClassicControlRainbowNetwork 13 | JaxRainbowAgent.num_atoms = 201 14 | JaxRainbowAgent.vmax = 100. 15 | JaxRainbowAgent.gamma = 0.99 16 | JaxRainbowAgent.epsilon_eval = 0. 17 | JaxRainbowAgent.epsilon_train = 0.01 18 | JaxRainbowAgent.update_horizon = 1 19 | JaxRainbowAgent.min_replay_history = 500 20 | JaxRainbowAgent.update_period = 1 21 | JaxRainbowAgent.target_update_period = 1 22 | JaxRainbowAgent.epsilon_fn = @dqn_agent.identity_epsilon 23 | JaxRainbowAgent.replay_scheme = 'uniform' 24 | create_optimizer.learning_rate = 0.00001 25 | create_optimizer.eps = 0.00000390625 26 | 27 | ClassicControlRainbowNetwork.min_vals = %jax_networks.CARTPOLE_MIN_VALS 28 | ClassicControlRainbowNetwork.max_vals = %jax_networks.CARTPOLE_MAX_VALS 29 | 30 | create_gym_environment.environment_name = 'CartPole' 31 | create_gym_environment.version = 'v0' 32 | create_runner.schedule = 'continuous_train' 33 | create_agent.agent_name = 'jax_rainbow' 34 | TrainRunner.create_environment_fn = @gym_lib.create_gym_environment 35 | Runner.num_iterations = 400 36 | Runner.training_steps = 1000 37 | Runner.evaluation_steps = 1000 38 | Runner.max_steps_per_episode = 200 # Default max episode length. 39 | 40 | OutOfGraphPrioritizedReplayBuffer.replay_capacity = 50000 41 | OutOfGraphPrioritizedReplayBuffer.batch_size = 128 42 | -------------------------------------------------------------------------------- /dopamine/jax/agents/rainbow/configs/c51_profiling.gin: -------------------------------------------------------------------------------- 1 | # Hyperparameters follow the settings from Bellemare et al. (2017), but we 2 | # modify as necessary to match those used in Rainbow (Hessel et al., 2018), to 3 | # ensure apples-to-apples comparison. 4 | import dopamine.jax.agents.rainbow.rainbow_agent 5 | import dopamine.discrete_domains.atari_lib 6 | import dopamine.discrete_domains.run_experiment 7 | import dopamine.jax.agents.dqn.dqn_agent 8 | import dopamine.replay_memory.prioritized_replay_buffer 9 | 10 | JaxRainbowAgent.num_atoms = 51 11 | JaxRainbowAgent.vmax = 10. 12 | JaxRainbowAgent.gamma = 0.99 13 | JaxRainbowAgent.update_horizon = 1 14 | JaxRainbowAgent.min_replay_history = 100 # agent steps 15 | JaxRainbowAgent.update_period = 4 16 | JaxRainbowAgent.target_update_period = 8000 # agent steps 17 | JaxRainbowAgent.epsilon_fn = @dqn_agent.identity_epsilon 18 | JaxRainbowAgent.epsilon_train = 0.0 19 | JaxRainbowAgent.epsilon_eval = 0.0 20 | JaxRainbowAgent.replay_scheme = 'uniform' 21 | 22 | atari_lib.create_atari_environment.game_name = 'Pong' 23 | # Sticky actions with probability 0.25, as suggested by (Machado et al., 2017). 24 | atari_lib.create_atari_environment.sticky_actions = True 25 | create_runner.schedule = 'continuous_train' 26 | create_agent.agent_name = 'jax_rainbow' 27 | create_agent.debug_mode = True 28 | Runner.num_iterations = 200 29 | Runner.training_steps = 250000 # agent steps 30 | Runner.evaluation_steps = 125000 # agent steps 31 | Runner.max_steps_per_episode = 27000 # agent steps 32 | 33 | OutOfGraphPrioritizedReplayBuffer.replay_capacity = 1000000 34 | OutOfGraphPrioritizedReplayBuffer.batch_size = 32 35 | -------------------------------------------------------------------------------- /dopamine/jax/agents/rainbow/configs/rainbow.gin: -------------------------------------------------------------------------------- 1 | # Hyperparameters follow Hessel et al. (2018), except for sticky_actions, 2 | # which was False (not using sticky actions) in the original paper. 3 | import dopamine.jax.agents.rainbow.rainbow_agent 4 | import dopamine.discrete_domains.atari_lib 5 | import dopamine.discrete_domains.run_experiment 6 | import dopamine.replay_memory.prioritized_replay_buffer 7 | 8 | JaxRainbowAgent.num_atoms = 51 9 | JaxRainbowAgent.vmax = 10. 10 | JaxRainbowAgent.gamma = 0.99 11 | JaxRainbowAgent.update_horizon = 3 12 | JaxRainbowAgent.min_replay_history = 20000 # agent steps 13 | JaxRainbowAgent.update_period = 4 14 | JaxRainbowAgent.target_update_period = 8000 # agent steps 15 | JaxRainbowAgent.epsilon_train = 0.01 16 | JaxRainbowAgent.epsilon_eval = 0.001 17 | JaxRainbowAgent.epsilon_decay_period = 250000 # agent steps 18 | JaxRainbowAgent.replay_scheme = 'prioritized' 19 | 20 | # Note these parameters are different from C51's. 21 | create_optimizer.learning_rate = 0.0000625 22 | create_optimizer.eps = 0.00015 23 | 24 | atari_lib.create_atari_environment.game_name = 'Pong' 25 | # Sticky actions with probability 0.25, as suggested by (Machado et al., 2017). 26 | atari_lib.create_atari_environment.sticky_actions = True 27 | create_runner.schedule = 'continuous_train' 28 | create_agent.agent_name = 'jax_rainbow' 29 | create_agent.debug_mode = True 30 | Runner.num_iterations = 200 31 | Runner.training_steps = 250000 # agent steps 32 | Runner.evaluation_steps = 125000 # agent steps 33 | Runner.max_steps_per_episode = 27000 # agent steps 34 | 35 | OutOfGraphPrioritizedReplayBuffer.replay_capacity = 1000000 36 | OutOfGraphPrioritizedReplayBuffer.batch_size = 32 37 | -------------------------------------------------------------------------------- /dopamine/jax/agents/rainbow/configs/rainbow_acrobot.gin: -------------------------------------------------------------------------------- 1 | # Hyperparameters for a simple Rainbow-style Acrobot agent. The hyperparameters 2 | # chosen achieve reasonable performance. 3 | import dopamine.jax.agents.rainbow.rainbow_agent 4 | import dopamine.jax.networks 5 | import dopamine.discrete_domains.gym_lib 6 | import dopamine.discrete_domains.run_experiment 7 | import dopamine.replay_memory.prioritized_replay_buffer 8 | 9 | JaxRainbowAgent.observation_shape = %gym_lib.ACROBOT_OBSERVATION_SHAPE 10 | JaxRainbowAgent.observation_dtype = %jax_networks.ACROBOT_OBSERVATION_DTYPE 11 | JaxRainbowAgent.stack_size = %gym_lib.ACROBOT_STACK_SIZE 12 | JaxRainbowAgent.network = @networks.ClassicControlRainbowNetwork 13 | JaxRainbowAgent.num_atoms = 51 14 | JaxRainbowAgent.vmax = 10. 15 | JaxRainbowAgent.gamma = 0.99 16 | JaxRainbowAgent.update_horizon = 3 17 | JaxRainbowAgent.min_replay_history = 500 18 | JaxRainbowAgent.update_period = 4 19 | JaxRainbowAgent.target_update_period = 100 20 | JaxRainbowAgent.epsilon_fn = @dqn_agent.identity_epsilon 21 | JaxRainbowAgent.replay_scheme = 'prioritized' 22 | create_optimizer.learning_rate = 0.09 23 | create_optimizer.eps = 0.0003125 24 | 25 | ClassicControlRainbowNetwork.min_vals = %jax_networks.ACROBOT_MIN_VALS 26 | ClassicControlRainbowNetwork.max_vals = %jax_networks.ACROBOT_MAX_VALS 27 | 28 | create_gym_environment.environment_name = 'Acrobot' 29 | create_gym_environment.version = 'v1' 30 | create_runner.schedule = 'continuous_train' 31 | create_agent.agent_name = 'jax_rainbow' 32 | TrainRunner.create_environment_fn = @gym_lib.create_gym_environment 33 | Runner.num_iterations = 500 34 | Runner.training_steps = 1000 35 | Runner.evaluation_steps = 1000 36 | Runner.max_steps_per_episode = 500 37 | 38 | OutOfGraphPrioritizedReplayBuffer.replay_capacity = 50000 39 | OutOfGraphPrioritizedReplayBuffer.batch_size = 128 40 | -------------------------------------------------------------------------------- /dopamine/jax/agents/rainbow/configs/rainbow_cartpole.gin: -------------------------------------------------------------------------------- 1 | # Hyperparameters for a simple Rainbow-style Cartpole agent. The 2 | # hyperparameters chosen achieve reasonable performance. 3 | import dopamine.jax.agents.rainbow.rainbow_agent 4 | import dopamine.jax.networks 5 | import dopamine.discrete_domains.gym_lib 6 | import dopamine.discrete_domains.run_experiment 7 | import dopamine.replay_memory.prioritized_replay_buffer 8 | 9 | JaxRainbowAgent.observation_shape = %gym_lib.CARTPOLE_OBSERVATION_SHAPE 10 | JaxRainbowAgent.observation_dtype = %jax_networks.CARTPOLE_OBSERVATION_DTYPE 11 | JaxRainbowAgent.stack_size = %gym_lib.CARTPOLE_STACK_SIZE 12 | JaxRainbowAgent.network = @networks.ClassicControlRainbowNetwork 13 | JaxRainbowAgent.num_atoms = 51 14 | JaxRainbowAgent.vmax = 10. 15 | JaxRainbowAgent.gamma = 0.99 16 | JaxRainbowAgent.update_horizon = 3 17 | JaxRainbowAgent.min_replay_history = 500 18 | JaxRainbowAgent.update_period = 4 19 | JaxRainbowAgent.target_update_period = 100 20 | JaxRainbowAgent.epsilon_fn = @dqn_agent.identity_epsilon 21 | JaxRainbowAgent.replay_scheme = 'prioritized' 22 | create_optimizer.learning_rate = 0.09 23 | create_optimizer.eps = 0.0003125 24 | 25 | ClassicControlRainbowNetwork.min_vals = %jax_networks.CARTPOLE_MIN_VALS 26 | ClassicControlRainbowNetwork.max_vals = %jax_networks.CARTPOLE_MAX_VALS 27 | 28 | create_gym_environment.environment_name = 'CartPole' 29 | create_gym_environment.version = 'v0' 30 | create_runner.schedule = 'continuous_train' 31 | create_agent.agent_name = 'jax_rainbow' 32 | TrainRunner.create_environment_fn = @gym_lib.create_gym_environment 33 | Runner.num_iterations = 500 34 | Runner.training_steps = 1000 35 | Runner.evaluation_steps = 1000 36 | Runner.max_steps_per_episode = 200 # Default max episode length. 37 | 38 | OutOfGraphPrioritizedReplayBuffer.replay_capacity = 50000 39 | OutOfGraphPrioritizedReplayBuffer.batch_size = 128 40 | -------------------------------------------------------------------------------- /dopamine/jax/agents/rainbow/configs/rainbow_profiling.gin: -------------------------------------------------------------------------------- 1 | # Hyperparameters follow Hessel et al. (2018), except for sticky_actions, 2 | # which was False (not using sticky actions) in the original paper. 3 | import dopamine.jax.agents.rainbow.rainbow_agent 4 | import dopamine.discrete_domains.atari_lib 5 | import dopamine.discrete_domains.run_experiment 6 | import dopamine.jax.agents.dqn.dqn_agent 7 | import dopamine.replay_memory.prioritized_replay_buffer 8 | 9 | JaxRainbowAgent.num_atoms = 51 10 | JaxRainbowAgent.vmax = 10. 11 | JaxRainbowAgent.gamma = 0.99 12 | JaxRainbowAgent.update_horizon = 3 13 | JaxRainbowAgent.min_replay_history = 100 # agent steps 14 | JaxRainbowAgent.update_period = 4 15 | JaxRainbowAgent.target_update_period = 8000 # agent steps 16 | JaxRainbowAgent.epsilon_fn = @dqn_agent.identity_epsilon 17 | JaxRainbowAgent.epsilon_train = 0.0 18 | JaxRainbowAgent.epsilon_eval = 0.0 19 | JaxRainbowAgent.replay_scheme = 'prioritized' 20 | 21 | # Note these parameters are different from C51's. 22 | create_optimizer.learning_rate = 0.0000625 23 | create_optimizer.eps = 0.00015 24 | 25 | atari_lib.create_atari_environment.game_name = 'Pong' 26 | # Sticky actions with probability 0.25, as suggested by (Machado et al., 2017). 27 | atari_lib.create_atari_environment.sticky_actions = True 28 | create_runner.schedule = 'continuous_train' 29 | create_agent.agent_name = 'jax_rainbow' 30 | create_agent.debug_mode = True 31 | Runner.num_iterations = 200 32 | Runner.training_steps = 250000 # agent steps 33 | Runner.evaluation_steps = 125000 # agent steps 34 | Runner.max_steps_per_episode = 27000 # agent steps 35 | 36 | OutOfGraphPrioritizedReplayBuffer.replay_capacity = 1000000 37 | OutOfGraphPrioritizedReplayBuffer.batch_size = 32 38 | -------------------------------------------------------------------------------- /dopamine/jax/agents/sac/__init__.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2021 The Dopamine Authors. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | -------------------------------------------------------------------------------- /dopamine/jax/agents/sac/configs/sac.gin: -------------------------------------------------------------------------------- 1 | # Hyperparameters follow those specified in Table 1 of Appendix D in: 2 | # "Soft Actor-Critic Algorithms and Applications" 3 | # by Tuomas Haarnoja et al. 4 | # https://arxiv.org/abs/1812.05905 5 | import dopamine.continuous_domains.run_experiment 6 | import dopamine.discrete_domains.gym_lib 7 | import dopamine.jax.agents.sac.sac_agent 8 | import dopamine.jax.agents.dqn.dqn_agent 9 | import dopamine.jax.continuous_networks 10 | import dopamine.replay_memory.circular_replay_buffer 11 | 12 | SACAgent.reward_scale_factor = 0.1 13 | SACAgent.network = @continuous_networks.SACNetwork 14 | SACAgent.num_layers = 2 15 | SACAgent.hidden_units = 256 16 | SACAgent.gamma = 0.99 17 | SACAgent.update_horizon = 1 18 | SACAgent.min_replay_history = 10000 # agent steps 19 | SACAgent.update_period = 1 20 | SACAgent.target_update_type = 'soft' 21 | SACAgent.target_smoothing_coefficient = 0.005 22 | SACAgent.target_entropy = None # Defaults to -num_action_dims/2 23 | SACAgent.optimizer = 'adam' 24 | SACAgent.seed = None # Seed with the current time 25 | SACAgent.observation_dtype = %sac_agent.STATE_DTYPE 26 | create_optimizer.learning_rate = 3.0e-4 27 | create_optimizer.beta1 = 0.9 28 | create_optimizer.beta2 = 0.999 29 | create_optimizer.eps = 1.0e-8 30 | 31 | create_gym_environment.environment_name = 'HalfCheetah' 32 | create_gym_environment.version = 'v2' 33 | create_continuous_runner.schedule = 'continuous_train_and_eval' 34 | create_continuous_agent.agent_name = 'sac' 35 | ContinuousTrainRunner.create_environment_fn = @gym_lib.create_gym_environment 36 | ContinuousRunner.num_iterations = 3200 37 | ContinuousRunner.training_steps = 1000 38 | ContinuousRunner.evaluation_steps = 10000 # agent steps 39 | ContinuousRunner.max_steps_per_episode = 1000 40 | ContinuousRunner.clip_rewards = False 41 | 42 | circular_replay_buffer.OutOfGraphReplayBuffer.replay_capacity = 1000000 43 | circular_replay_buffer.OutOfGraphReplayBuffer.batch_size = 256 44 | 45 | -------------------------------------------------------------------------------- /dopamine/jax/losses.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2021 The Dopamine Authors. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | """Various losses used by the Dopamine JAX agents.""" 16 | from flax import linen as nn 17 | import jax.numpy as jnp 18 | 19 | 20 | def huber_loss(targets: jnp.array, 21 | predictions: jnp.array, 22 | delta: float = 1.0) -> jnp.ndarray: 23 | """Implementation of the Huber loss with threshold delta. 24 | 25 | Let `x = |targets - predictions|`, the Huber loss is defined as: 26 | `0.5 * x^2` if `x <= delta` 27 | `0.5 * delta^2 + delta * (x - delta)` otherwise. 28 | 29 | Args: 30 | targets: Target values. 31 | predictions: Prediction values. 32 | delta: Threshold. 33 | 34 | Returns: 35 | Huber loss. 36 | """ 37 | x = jnp.abs(targets - predictions) 38 | return jnp.where(x <= delta, 39 | 0.5 * x**2, 40 | 0.5 * delta**2 + delta * (x - delta)) 41 | 42 | 43 | def mse_loss(targets: jnp.array, predictions: jnp.array) -> jnp.ndarray: 44 | """Implementation of the mean squared error loss.""" 45 | return jnp.power((targets - predictions), 2) 46 | 47 | 48 | def softmax_cross_entropy_loss_with_logits(labels: jnp.array, 49 | logits: jnp.array) -> jnp.ndarray: 50 | """Implementation of the softmax cross entropy loss.""" 51 | return -jnp.sum(labels * nn.log_softmax(logits)) 52 | -------------------------------------------------------------------------------- /dopamine/labs/__init__.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2021 The Dopamine Authors. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | -------------------------------------------------------------------------------- /dopamine/labs/atari_100k/__init__.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2021 The Dopamine Authors. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | -------------------------------------------------------------------------------- /dopamine/labs/atari_100k/configs/DER.gin: -------------------------------------------------------------------------------- 1 | # Data Efficient Rainbow (DER) params 2 | import dopamine.jax.agents.dqn.dqn_agent 3 | import dopamine.jax.agents.full_rainbow.full_rainbow_agent 4 | import dopamine.jax.networks 5 | import dopamine.discrete_domains.gym_lib 6 | import dopamine.discrete_domains.run_experiment 7 | import dopamine.replay_memory.prioritized_replay_buffer 8 | import dopamine.labs.atari_100k.atari_100k_rainbow_agent 9 | 10 | JaxDQNAgent.gamma = 0.99 11 | # Use 10 instead of 20 as done by SPR paper 12 | JaxDQNAgent.update_horizon = 10 # DER (instead of 3) 13 | JaxDQNAgent.min_replay_history = 1600 # DER (instead of 20000) 14 | JaxDQNAgent.update_period = 1 # DER: Update every 1 step (rather than 4) 15 | JaxDQNAgent.target_update_period = 2000 # DER: Target every 2000 updates 16 | JaxDQNAgent.epsilon_train = 0.01 17 | JaxDQNAgent.epsilon_eval = 0.001 18 | JaxDQNAgent.epsilon_decay_period = 2000 # agent steps 19 | JaxDQNAgent.optimizer = 'adam' 20 | 21 | JaxFullRainbowAgent.noisy = True 22 | JaxFullRainbowAgent.dueling = True 23 | JaxFullRainbowAgent.double_dqn = True 24 | JaxFullRainbowAgent.num_atoms = 51 25 | JaxFullRainbowAgent.vmax = 10. 26 | JaxFullRainbowAgent.replay_scheme = 'prioritized' 27 | JaxFullRainbowAgent.num_updates_per_train_step = 1 28 | JaxFullRainbowAgent.epsilon_fn = @jax.agents.dqn.dqn_agent.linearly_decaying_epsilon 29 | Atari100kRainbowAgent.data_augmentation = False 30 | 31 | # Note these parameters are from DER (van Hasselt et al, 2019) 32 | create_optimizer.learning_rate = 0.0001 33 | create_optimizer.eps = 0.00015 34 | 35 | atari_lib.create_atari_environment.game_name = 'Pong' 36 | # Atari 100K benchmark doesn't use sticky actions. 37 | atari_lib.create_atari_environment.sticky_actions = False 38 | AtariPreprocessing.terminal_on_life_loss = True 39 | Runner.num_iterations = 10 40 | Runner.training_steps = 10000 # agent steps 41 | MaxEpisodeEvalRunner.num_eval_episodes = 100 # agent episodes 42 | Runner.max_steps_per_episode = 27000 # agent steps 43 | 44 | OutOfGraphPrioritizedReplayBuffer.replay_capacity = 1000000 # 1M as a proxy for unbounded memory 45 | OutOfGraphPrioritizedReplayBuffer.batch_size = 32 46 | -------------------------------------------------------------------------------- /dopamine/labs/atari_100k/configs/DrQ.gin: -------------------------------------------------------------------------------- 1 | # Data Regularlized-Q (DrQ) form Kostrikov et al. (2020) 2 | import dopamine.jax.agents.dqn.dqn_agent 3 | import dopamine.jax.agents.full_rainbow.full_rainbow_agent 4 | import dopamine.jax.networks 5 | import dopamine.discrete_domains.gym_lib 6 | import dopamine.discrete_domains.run_experiment 7 | import dopamine.replay_memory.prioritized_replay_buffer 8 | import dopamine.labs.atari_100k.atari_100k_rainbow_agent 9 | 10 | # Parameters specific to DrQ are higlighted by comments 11 | JaxDQNAgent.gamma = 0.99 12 | JaxDQNAgent.update_horizon = 10 # DrQ (instead of 3) 13 | JaxDQNAgent.min_replay_history = 1600 # DrQ (instead of 20000) 14 | JaxDQNAgent.update_period = 1 # DrQ (rather than 4) 15 | JaxDQNAgent.target_update_period = 1 # DrQ (rather than 8000) 16 | JaxDQNAgent.epsilon_train = 0.1 # DrQ (rather than 0.01) 17 | JaxDQNAgent.epsilon_eval = 0.05 # DrQ (rather than 0.001) 18 | JaxDQNAgent.epsilon_decay_period = 5000 # DrQ 19 | JaxDQNAgent.optimizer = 'adam' 20 | 21 | JaxFullRainbowAgent.noisy = False # DrQ (Efficient DQN) 22 | JaxFullRainbowAgent.dueling = True 23 | JaxFullRainbowAgent.double_dqn = True 24 | JaxFullRainbowAgent.distributional = False # DrQ (Efficient DQN) 25 | JaxFullRainbowAgent.num_atoms = 1 # Since DrQ uses DQN, rather than C51 26 | JaxFullRainbowAgent.num_updates_per_train_step = 1 27 | JaxFullRainbowAgent.replay_scheme = 'uniform' 28 | JaxFullRainbowAgent.epsilon_fn = @jax.agents.dqn.dqn_agent.linearly_decaying_epsilon 29 | Atari100kRainbowAgent.data_augmentation = True 30 | 31 | # Note these parameters are from DER (van Hasselt et al, 2019) 32 | create_optimizer.learning_rate = 0.0001 33 | create_optimizer.eps = 0.00015 34 | 35 | atari_lib.create_atari_environment.game_name = 'Pong' 36 | # Atari 100K benchmark doesn't use sticky actions. 37 | atari_lib.create_atari_environment.sticky_actions = False 38 | AtariPreprocessing.terminal_on_life_loss = True 39 | Runner.num_iterations = 1 40 | Runner.training_steps = 100000 # agent steps 41 | MaxEpisodeEvalRunner.num_eval_episodes = 100 # agent episodes 42 | Runner.max_steps_per_episode = 27000 # agent steps 43 | 44 | OutOfGraphPrioritizedReplayBuffer.replay_capacity = 1000000 # 1M as a proxy for unbounded memory 45 | OutOfGraphPrioritizedReplayBuffer.batch_size = 32 46 | -------------------------------------------------------------------------------- /dopamine/labs/atari_100k/configs/OTRainbow.gin: -------------------------------------------------------------------------------- 1 | # Overtrained Rainbow (OTRainbow) from Kielak et al.(2019) 2 | import dopamine.jax.agents.dqn.dqn_agent 3 | import dopamine.jax.agents.full_rainbow.full_rainbow_agent 4 | import dopamine.jax.networks 5 | import dopamine.discrete_domains.gym_lib 6 | import dopamine.discrete_domains.run_experiment 7 | import dopamine.replay_memory.prioritized_replay_buffer 8 | import dopamine.labs.atari_100k.atari_100k_rainbow_agent 9 | 10 | # Parameters specific to OTRainbow are higlighted by comments 11 | JaxDQNAgent.gamma = 0.99 12 | JaxDQNAgent.update_horizon = 3 13 | JaxDQNAgent.min_replay_history = 20000 14 | JaxDQNAgent.update_period = 1 # OTRainbow: Update every 1 step (rather than 4) 15 | JaxDQNAgent.target_update_period = 500 # OTRainbow (instead of 8000) 16 | JaxDQNAgent.epsilon_train = 0.01 17 | JaxDQNAgent.epsilon_eval = 0.001 18 | JaxDQNAgent.epsilon_decay_period = 50000 # OTRainbow (instead of 250000) 19 | JaxDQNAgent.optimizer = 'adam' 20 | 21 | # Don't use noisy networks, dueling DQN, and double DQN. 22 | JaxFullRainbowAgent.noisy = False 23 | JaxFullRainbowAgent.dueling = False 24 | JaxFullRainbowAgent.double_dqn = False 25 | JaxFullRainbowAgent.num_atoms = 51 26 | JaxFullRainbowAgent.num_updates_per_train_step = 8 # OTRainbow (instead of 1) 27 | JaxFullRainbowAgent.vmax = 10. 28 | JaxFullRainbowAgent.replay_scheme = 'prioritized' 29 | JaxFullRainbowAgent.epsilon_fn = @jax.agents.dqn.dqn_agent.linearly_decaying_epsilon 30 | Atari100kRainbowAgent.data_augmentation = False 31 | 32 | # Note these parameters are original Rainbow. 33 | create_optimizer.learning_rate = 0.0000625 34 | create_optimizer.eps = 0.00015 35 | 36 | atari_lib.create_atari_environment.game_name = 'Pong' 37 | # Atari 100K benchmark doesn't use sticky actions. 38 | atari_lib.create_atari_environment.sticky_actions = False 39 | AtariPreprocessing.terminal_on_life_loss = True 40 | Runner.num_iterations = 1 41 | Runner.training_steps = 100000 # agent steps 42 | MaxEpisodeEvalRunner.num_eval_episodes = 100 # agent episodes 43 | Runner.max_steps_per_episode = 27000 # agent steps 44 | 45 | OutOfGraphPrioritizedReplayBuffer.replay_capacity = 1000000 # 1M as a proxy for unbounded memory 46 | OutOfGraphPrioritizedReplayBuffer.batch_size = 32 47 | -------------------------------------------------------------------------------- /dopamine/labs/environments/__init__.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2021 The Dopamine Authors. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | -------------------------------------------------------------------------------- /dopamine/labs/environments/minatar/__init__.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2021 The Dopamine Authors. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | -------------------------------------------------------------------------------- /dopamine/labs/environments/minatar/dqn_asterix.gin: -------------------------------------------------------------------------------- 1 | # Hyperparameters for a simple DQN-style Minatar agent. The hyperparameters 2 | # chosen achieve reasonable performance. 3 | import dopamine.discrete_domains.gym_lib 4 | import dopamine.discrete_domains.run_experiment 5 | import dopamine.jax.agents.dqn.dqn_agent 6 | import dopamine.jax.networks 7 | import dopamine.labs.environments.minatar.minatar_env 8 | import dopamine.replay_memory.circular_replay_buffer 9 | import flax 10 | 11 | JaxDQNAgent.observation_shape = %minatar_env.ASTERIX_SHAPE 12 | JaxDQNAgent.observation_dtype = %minatar_env.DTYPE 13 | JaxDQNAgent.stack_size = 1 14 | JaxDQNAgent.network = @minatar_env.MinatarDQNNetwork 15 | JaxDQNAgent.gamma = 0.99 16 | JaxDQNAgent.update_horizon = 1 17 | JaxDQNAgent.min_replay_history = 1000 18 | JaxDQNAgent.update_period = 4 19 | JaxDQNAgent.target_update_period = 1000 20 | JaxDQNAgent.optimizer = 'adam' 21 | create_optimizer.learning_rate = 0.00025 22 | create_optimizer.eps = 3.125e-4 23 | 24 | create_minatar_env.game_name = 'asterix' 25 | TrainRunner.create_environment_fn = @minatar_env.create_minatar_env 26 | create_runner.schedule = 'continuous_train' 27 | create_agent.agent_name = 'jax_dqn' 28 | create_agent.debug_mode = True 29 | Runner.num_iterations = 10 30 | Runner.training_steps = 1000000 31 | Runner.max_steps_per_episode = 100000000 32 | 33 | OutOfGraphReplayBuffer.replay_capacity = 100000 34 | OutOfGraphReplayBuffer.batch_size = 32 35 | -------------------------------------------------------------------------------- /dopamine/labs/environments/minatar/dqn_breakout.gin: -------------------------------------------------------------------------------- 1 | # Hyperparameters for a simple DQN-style Minatar agent. The hyperparameters 2 | # chosen achieve reasonable performance. 3 | import dopamine.discrete_domains.gym_lib 4 | import dopamine.discrete_domains.run_experiment 5 | import dopamine.jax.agents.dqn.dqn_agent 6 | import dopamine.jax.networks 7 | import dopamine.labs.environments.minatar.minatar_env 8 | import dopamine.replay_memory.circular_replay_buffer 9 | import flax 10 | 11 | JaxDQNAgent.observation_shape = %minatar_env.BREAKOUT_SHAPE 12 | JaxDQNAgent.observation_dtype = %minatar_env.DTYPE 13 | JaxDQNAgent.stack_size = 1 14 | JaxDQNAgent.network = @minatar_env.MinatarDQNNetwork 15 | JaxDQNAgent.gamma = 0.99 16 | JaxDQNAgent.update_horizon = 1 17 | JaxDQNAgent.min_replay_history = 1000 18 | JaxDQNAgent.update_period = 4 19 | JaxDQNAgent.target_update_period = 1000 20 | JaxDQNAgent.optimizer = 'adam' 21 | create_optimizer.learning_rate = 0.00025 22 | create_optimizer.eps = 3.125e-4 23 | 24 | create_minatar_env.game_name = 'breakout' 25 | TrainRunner.create_environment_fn = @minatar_env.create_minatar_env 26 | create_runner.schedule = 'continuous_train' 27 | create_agent.agent_name = 'jax_dqn' 28 | create_agent.debug_mode = True 29 | Runner.num_iterations = 10 30 | Runner.training_steps = 1000000 31 | Runner.max_steps_per_episode = 100000000 32 | 33 | OutOfGraphReplayBuffer.replay_capacity = 100000 34 | OutOfGraphReplayBuffer.batch_size = 32 35 | -------------------------------------------------------------------------------- /dopamine/labs/environments/minatar/dqn_freeway.gin: -------------------------------------------------------------------------------- 1 | # Hyperparameters for a simple DQN-style Minatar agent. The hyperparameters 2 | # chosen achieve reasonable performance. 3 | import dopamine.discrete_domains.gym_lib 4 | import dopamine.discrete_domains.run_experiment 5 | import dopamine.jax.agents.dqn.dqn_agent 6 | import dopamine.jax.networks 7 | import dopamine.labs.environments.minatar.minatar_env 8 | import dopamine.replay_memory.circular_replay_buffer 9 | import flax 10 | 11 | JaxDQNAgent.observation_shape = %minatar_env.FREEWAY_SHAPE 12 | JaxDQNAgent.observation_dtype = %minatar_env.DTYPE 13 | JaxDQNAgent.stack_size = 1 14 | JaxDQNAgent.network = @minatar_env.MinatarDQNNetwork 15 | JaxDQNAgent.gamma = 0.99 16 | JaxDQNAgent.update_horizon = 1 17 | JaxDQNAgent.min_replay_history = 1000 18 | JaxDQNAgent.update_period = 4 19 | JaxDQNAgent.target_update_period = 1000 20 | JaxDQNAgent.optimizer = 'adam' 21 | create_optimizer.learning_rate = 0.00025 22 | create_optimizer.eps = 3.125e-4 23 | 24 | create_minatar_env.game_name = 'freeway' 25 | TrainRunner.create_environment_fn = @minatar_env.create_minatar_env 26 | create_runner.schedule = 'continuous_train' 27 | create_agent.agent_name = 'jax_dqn' 28 | create_agent.debug_mode = True 29 | Runner.num_iterations = 10 30 | Runner.training_steps = 1000000 31 | Runner.max_steps_per_episode = 100000000 32 | 33 | OutOfGraphReplayBuffer.replay_capacity = 100000 34 | OutOfGraphReplayBuffer.batch_size = 32 35 | -------------------------------------------------------------------------------- /dopamine/labs/environments/minatar/dqn_seaquest.gin: -------------------------------------------------------------------------------- 1 | # Hyperparameters for a simple DQN-style Minatar agent. The hyperparameters 2 | # chosen achieve reasonable performance. 3 | import dopamine.discrete_domains.gym_lib 4 | import dopamine.discrete_domains.run_experiment 5 | import dopamine.jax.agents.dqn.dqn_agent 6 | import dopamine.jax.networks 7 | import dopamine.labs.environments.minatar.minatar_env 8 | import dopamine.replay_memory.circular_replay_buffer 9 | import flax 10 | 11 | JaxDQNAgent.observation_shape = %minatar_env.SEAQUEST_SHAPE 12 | JaxDQNAgent.observation_dtype = %minatar_env.DTYPE 13 | JaxDQNAgent.stack_size = 1 14 | JaxDQNAgent.network = @minatar_env.MinatarDQNNetwork 15 | JaxDQNAgent.gamma = 0.99 16 | JaxDQNAgent.update_horizon = 1 17 | JaxDQNAgent.min_replay_history = 1000 18 | JaxDQNAgent.update_period = 4 19 | JaxDQNAgent.target_update_period = 1000 20 | JaxDQNAgent.optimizer = 'adam' 21 | create_optimizer.learning_rate = 0.00025 22 | create_optimizer.eps = 3.125e-4 23 | 24 | create_minatar_env.game_name = 'seaquest' 25 | TrainRunner.create_environment_fn = @minatar_env.create_minatar_env 26 | create_runner.schedule = 'continuous_train' 27 | create_agent.agent_name = 'jax_dqn' 28 | create_agent.debug_mode = True 29 | Runner.num_iterations = 10 30 | Runner.training_steps = 1000000 31 | Runner.max_steps_per_episode = 100000000 32 | 33 | OutOfGraphReplayBuffer.replay_capacity = 100000 34 | OutOfGraphReplayBuffer.batch_size = 32 35 | -------------------------------------------------------------------------------- /dopamine/labs/environments/minatar/dqn_space_invaders.gin: -------------------------------------------------------------------------------- 1 | # Hyperparameters for a simple DQN-style Minatar agent. The hyperparameters 2 | # chosen achieve reasonable performance. 3 | import dopamine.discrete_domains.gym_lib 4 | import dopamine.discrete_domains.run_experiment 5 | import dopamine.jax.agents.dqn.dqn_agent 6 | import dopamine.jax.networks 7 | import dopamine.labs.environments.minatar.minatar_env 8 | import dopamine.replay_memory.circular_replay_buffer 9 | import flax 10 | 11 | JaxDQNAgent.observation_shape = %minatar_env.SPACE_INVADERS_SHAPE 12 | JaxDQNAgent.observation_dtype = %minatar_env.DTYPE 13 | JaxDQNAgent.stack_size = 1 14 | JaxDQNAgent.network = @minatar_env.MinatarDQNNetwork 15 | JaxDQNAgent.gamma = 0.99 16 | JaxDQNAgent.update_horizon = 1 17 | JaxDQNAgent.min_replay_history = 1000 18 | JaxDQNAgent.update_period = 4 19 | JaxDQNAgent.target_update_period = 1000 20 | JaxDQNAgent.optimizer = 'adam' 21 | create_optimizer.learning_rate = 0.00025 22 | create_optimizer.eps = 3.125e-4 23 | 24 | create_minatar_env.game_name = 'space_invaders' 25 | TrainRunner.create_environment_fn = @minatar_env.create_minatar_env 26 | create_runner.schedule = 'continuous_train' 27 | create_agent.agent_name = 'jax_dqn' 28 | create_agent.debug_mode = True 29 | Runner.num_iterations = 10 30 | Runner.training_steps = 1000000 31 | Runner.max_steps_per_episode = 100000000 32 | 33 | OutOfGraphReplayBuffer.replay_capacity = 100000 34 | OutOfGraphReplayBuffer.batch_size = 32 35 | -------------------------------------------------------------------------------- /dopamine/labs/environments/minatar/quantile_asterix.gin: -------------------------------------------------------------------------------- 1 | # Hyperparameters follow Dabney et al. (2017) but we modify as necessary to 2 | # match those used in Rainbow (Hessel et al., 2018), to ensure apples-to-apples 3 | # comparison. 4 | import dopamine.discrete_domains.atari_lib 5 | import dopamine.discrete_domains.run_experiment 6 | import dopamine.jax.agents.quantile.quantile_agent 7 | import dopamine.labs.environments.minatar.minatar_env 8 | import dopamine.replay_memory.prioritized_replay_buffer 9 | 10 | JaxQuantileAgent.observation_shape = %minatar_env.ASTERIX_SHAPE 11 | JaxQuantileAgent.observation_dtype = %minatar_env.DTYPE 12 | JaxQuantileAgent.stack_size = 1 13 | JaxQuantileAgent.network = @minatar_env.MinatarQuantileNetwork 14 | JaxQuantileAgent.kappa = 1.0 15 | JaxQuantileAgent.num_atoms = 200 16 | JaxQuantileAgent.gamma = 0.99 17 | JaxQuantileAgent.update_horizon = 3 18 | JaxQuantileAgent.min_replay_history = 1000 # agent steps 19 | JaxQuantileAgent.update_period = 4 20 | JaxQuantileAgent.target_update_period = 1000 # agent steps 21 | JaxQuantileAgent.epsilon_train = 0.01 22 | JaxQuantileAgent.epsilon_eval = 0.001 23 | JaxQuantileAgent.replay_scheme = 'prioritized' 24 | JaxQuantileAgent.optimizer = 'adam' 25 | create_optimizer.learning_rate = 0.00025 26 | create_optimizer.eps = 3.125e-4 27 | 28 | create_minatar_env.game_name = 'asterix' 29 | TrainRunner.create_environment_fn = @minatar_env.create_minatar_env 30 | create_runner.schedule = 'continuous_train' 31 | create_agent.agent_name = 'jax_quantile' 32 | create_agent.debug_mode = True 33 | Runner.num_iterations = 10 34 | Runner.training_steps = 1000000 35 | Runner.max_steps_per_episode = 100000000 36 | 37 | OutOfGraphPrioritizedReplayBuffer.replay_capacity = 100000 38 | OutOfGraphPrioritizedReplayBuffer.batch_size = 32 39 | -------------------------------------------------------------------------------- /dopamine/labs/environments/minatar/quantile_breakout.gin: -------------------------------------------------------------------------------- 1 | # Hyperparameters follow Dabney et al. (2017) but we modify as necessary to 2 | # match those used in Rainbow (Hessel et al., 2018), to ensure apples-to-apples 3 | # comparison. 4 | import dopamine.discrete_domains.atari_lib 5 | import dopamine.discrete_domains.run_experiment 6 | import dopamine.jax.agents.quantile.quantile_agent 7 | import dopamine.labs.environments.minatar.minatar_env 8 | import dopamine.replay_memory.prioritized_replay_buffer 9 | 10 | JaxQuantileAgent.observation_shape = %minatar_env.BREAKOUT_SHAPE 11 | JaxQuantileAgent.observation_dtype = %minatar_env.DTYPE 12 | JaxQuantileAgent.stack_size = 1 13 | JaxQuantileAgent.network = @minatar_env.MinatarQuantileNetwork 14 | JaxQuantileAgent.kappa = 1.0 15 | JaxQuantileAgent.num_atoms = 200 16 | JaxQuantileAgent.gamma = 0.99 17 | JaxQuantileAgent.update_horizon = 3 18 | JaxQuantileAgent.min_replay_history = 1000 # agent steps 19 | JaxQuantileAgent.update_period = 4 20 | JaxQuantileAgent.target_update_period = 1000 # agent steps 21 | JaxQuantileAgent.epsilon_train = 0.01 22 | JaxQuantileAgent.epsilon_eval = 0.001 23 | JaxQuantileAgent.replay_scheme = 'prioritized' 24 | JaxQuantileAgent.optimizer = 'adam' 25 | create_optimizer.learning_rate = 0.00025 26 | create_optimizer.eps = 3.125e-4 27 | 28 | create_minatar_env.game_name = 'breakout' 29 | TrainRunner.create_environment_fn = @minatar_env.create_minatar_env 30 | create_runner.schedule = 'continuous_train' 31 | create_agent.agent_name = 'jax_quantile' 32 | create_agent.debug_mode = True 33 | Runner.num_iterations = 10 34 | Runner.training_steps = 1000000 35 | Runner.max_steps_per_episode = 100000000 36 | 37 | OutOfGraphPrioritizedReplayBuffer.replay_capacity = 100000 38 | OutOfGraphPrioritizedReplayBuffer.batch_size = 32 39 | -------------------------------------------------------------------------------- /dopamine/labs/environments/minatar/quantile_freeway.gin: -------------------------------------------------------------------------------- 1 | # Hyperparameters follow Dabney et al. (2017) but we modify as necessary to 2 | # match those used in Rainbow (Hessel et al., 2018), to ensure apples-to-apples 3 | # comparison. 4 | import dopamine.discrete_domains.atari_lib 5 | import dopamine.discrete_domains.run_experiment 6 | import dopamine.jax.agents.quantile.quantile_agent 7 | import dopamine.labs.environments.minatar.minatar_env 8 | import dopamine.replay_memory.prioritized_replay_buffer 9 | 10 | JaxQuantileAgent.observation_shape = %minatar_env.FREEWAY_SHAPE 11 | JaxQuantileAgent.observation_dtype = %minatar_env.DTYPE 12 | JaxQuantileAgent.stack_size = 1 13 | JaxQuantileAgent.network = @minatar_env.MinatarQuantileNetwork 14 | JaxQuantileAgent.kappa = 1.0 15 | JaxQuantileAgent.num_atoms = 200 16 | JaxQuantileAgent.gamma = 0.99 17 | JaxQuantileAgent.update_horizon = 3 18 | JaxQuantileAgent.min_replay_history = 1000 # agent steps 19 | JaxQuantileAgent.update_period = 4 20 | JaxQuantileAgent.target_update_period = 1000 # agent steps 21 | JaxQuantileAgent.epsilon_train = 0.01 22 | JaxQuantileAgent.epsilon_eval = 0.001 23 | JaxQuantileAgent.replay_scheme = 'prioritized' 24 | JaxQuantileAgent.optimizer = 'adam' 25 | create_optimizer.learning_rate = 0.00025 26 | create_optimizer.eps = 3.125e-4 27 | 28 | create_minatar_env.game_name = 'freeway' 29 | TrainRunner.create_environment_fn = @minatar_env.create_minatar_env 30 | create_runner.schedule = 'continuous_train' 31 | create_agent.agent_name = 'jax_quantile' 32 | create_agent.debug_mode = True 33 | Runner.num_iterations = 10 34 | Runner.training_steps = 1000000 35 | Runner.max_steps_per_episode = 100000000 36 | 37 | OutOfGraphPrioritizedReplayBuffer.replay_capacity = 100000 38 | OutOfGraphPrioritizedReplayBuffer.batch_size = 32 39 | -------------------------------------------------------------------------------- /dopamine/labs/environments/minatar/quantile_seaquest.gin: -------------------------------------------------------------------------------- 1 | # Hyperparameters follow Dabney et al. (2017) but we modify as necessary to 2 | # match those used in Rainbow (Hessel et al., 2018), to ensure apples-to-apples 3 | # comparison. 4 | import dopamine.discrete_domains.atari_lib 5 | import dopamine.discrete_domains.run_experiment 6 | import dopamine.labs.environments.minatar.minatar_env 7 | import dopamine.jax.agents.quantile.quantile_agent 8 | import dopamine.replay_memory.prioritized_replay_buffer 9 | 10 | JaxQuantileAgent.observation_shape = %minatar_env.SEAQUEST_SHAPE 11 | JaxQuantileAgent.observation_dtype = %minatar_env.DTYPE 12 | JaxQuantileAgent.stack_size = 1 13 | JaxQuantileAgent.network = @minatar_env.MinatarQuantileNetwork 14 | JaxQuantileAgent.kappa = 1.0 15 | JaxQuantileAgent.num_atoms = 200 16 | JaxQuantileAgent.gamma = 0.99 17 | JaxQuantileAgent.update_horizon = 3 18 | JaxQuantileAgent.min_replay_history = 1000 # agent steps 19 | JaxQuantileAgent.update_period = 4 20 | JaxQuantileAgent.target_update_period = 1000 # agent steps 21 | JaxQuantileAgent.epsilon_train = 0.01 22 | JaxQuantileAgent.epsilon_eval = 0.001 23 | JaxQuantileAgent.replay_scheme = 'prioritized' 24 | JaxQuantileAgent.optimizer = 'adam' 25 | create_optimizer.learning_rate = 0.00025 26 | create_optimizer.eps = 3.125e-4 27 | 28 | create_minatar_env.game_name = 'seaquest' 29 | TrainRunner.create_environment_fn = @minatar_env.create_minatar_env 30 | create_runner.schedule = 'continuous_train' 31 | create_agent.agent_name = 'jax_quantile' 32 | create_agent.debug_mode = True 33 | Runner.num_iterations = 10 34 | Runner.training_steps = 1000000 35 | Runner.max_steps_per_episode = 100000000 36 | 37 | OutOfGraphPrioritizedReplayBuffer.replay_capacity = 100000 38 | OutOfGraphPrioritizedReplayBuffer.batch_size = 32 39 | -------------------------------------------------------------------------------- /dopamine/labs/environments/minatar/quantile_space_invaders.gin: -------------------------------------------------------------------------------- 1 | # Hyperparameters follow Dabney et al. (2017) but we modify as necessary to 2 | # match those used in Rainbow (Hessel et al., 2018), to ensure apples-to-apples 3 | # comparison. 4 | import dopamine.discrete_domains.atari_lib 5 | import dopamine.discrete_domains.run_experiment 6 | import dopamine.jax.agents.quantile.quantile_agent 7 | import dopamine.labs.environments.minatar.minatar_env 8 | import dopamine.replay_memory.prioritized_replay_buffer 9 | 10 | JaxQuantileAgent.observation_shape = %minatar_env.SPACE_INVADERS_SHAPE 11 | JaxQuantileAgent.observation_dtype = %minatar_env.DTYPE 12 | JaxQuantileAgent.stack_size = 1 13 | JaxQuantileAgent.network = @minatar_env.MinatarQuantileNetwork 14 | JaxQuantileAgent.kappa = 1.0 15 | JaxQuantileAgent.num_atoms = 200 16 | JaxQuantileAgent.gamma = 0.99 17 | JaxQuantileAgent.update_horizon = 3 18 | JaxQuantileAgent.min_replay_history = 1000 # agent steps 19 | JaxQuantileAgent.update_period = 4 20 | JaxQuantileAgent.target_update_period = 1000 # agent steps 21 | JaxQuantileAgent.epsilon_train = 0.01 22 | JaxQuantileAgent.epsilon_eval = 0.001 23 | JaxQuantileAgent.replay_scheme = 'prioritized' 24 | JaxQuantileAgent.optimizer = 'adam' 25 | create_optimizer.learning_rate = 0.00025 26 | create_optimizer.eps = 3.125e-4 27 | 28 | create_minatar_env.game_name = 'space_invaders' 29 | TrainRunner.create_environment_fn = @minatar_env.create_minatar_env 30 | create_runner.schedule = 'continuous_train' 31 | create_agent.agent_name = 'jax_quantile' 32 | create_agent.debug_mode = True 33 | Runner.num_iterations = 10 34 | Runner.training_steps = 1000000 35 | Runner.max_steps_per_episode = 100000000 36 | 37 | OutOfGraphPrioritizedReplayBuffer.replay_capacity = 100000 38 | OutOfGraphPrioritizedReplayBuffer.batch_size = 32 39 | -------------------------------------------------------------------------------- /dopamine/labs/environments/minatar/rainbow_asterix.gin: -------------------------------------------------------------------------------- 1 | # Hyperparameters for a simple Rainbow-style Cartpole agent. The 2 | # hyperparameters chosen achieve reasonable performance. 3 | import dopamine.discrete_domains.gym_lib 4 | import dopamine.discrete_domains.run_experiment 5 | import dopamine.jax.agents.rainbow.rainbow_agent 6 | import dopamine.jax.networks 7 | import dopamine.labs.environments.minatar.minatar_env 8 | import dopamine.replay_memory.prioritized_replay_buffer 9 | 10 | JaxRainbowAgent.observation_shape = %minatar_env.ASTERIX_SHAPE 11 | JaxRainbowAgent.observation_dtype = %minatar_env.DTYPE 12 | JaxRainbowAgent.stack_size = 1 13 | JaxRainbowAgent.network = @minatar_env.MinatarRainbowNetwork 14 | JaxRainbowAgent.num_atoms = 51 15 | JaxRainbowAgent.vmax = 100. 16 | JaxRainbowAgent.gamma = 0.99 17 | JaxRainbowAgent.update_horizon = 3 18 | JaxRainbowAgent.min_replay_history = 1000 19 | JaxRainbowAgent.update_period = 4 20 | JaxRainbowAgent.target_update_period = 1000 21 | JaxRainbowAgent.replay_scheme = 'prioritized' 22 | create_optimizer.learning_rate = 0.00025 23 | create_optimizer.eps = 3.125e-4 24 | 25 | create_minatar_env.game_name = 'asterix' 26 | TrainRunner.create_environment_fn = @minatar_env.create_minatar_env 27 | create_runner.schedule = 'continuous_train' 28 | create_agent.agent_name = 'jax_rainbow' 29 | create_agent.debug_mode = True 30 | Runner.num_iterations = 10 31 | Runner.training_steps = 1000000 32 | Runner.max_steps_per_episode = 100000000 33 | 34 | OutOfGraphPrioritizedReplayBuffer.replay_capacity = 100000 35 | OutOfGraphPrioritizedReplayBuffer.batch_size = 32 36 | -------------------------------------------------------------------------------- /dopamine/labs/environments/minatar/rainbow_breakout.gin: -------------------------------------------------------------------------------- 1 | # Hyperparameters for a simple Rainbow-style Cartpole agent. The 2 | # hyperparameters chosen achieve reasonable performance. 3 | import dopamine.discrete_domains.gym_lib 4 | import dopamine.discrete_domains.run_experiment 5 | import dopamine.jax.agents.rainbow.rainbow_agent 6 | import dopamine.jax.networks 7 | import dopamine.labs.environments.minatar.minatar_env 8 | import dopamine.replay_memory.prioritized_replay_buffer 9 | 10 | JaxRainbowAgent.observation_shape = %minatar_env.BREAKOUT_SHAPE 11 | JaxRainbowAgent.observation_dtype = %minatar_env.DTYPE 12 | JaxRainbowAgent.stack_size = 1 13 | JaxRainbowAgent.network = @minatar_env.MinatarRainbowNetwork 14 | JaxRainbowAgent.num_atoms = 51 15 | JaxRainbowAgent.vmax = 100. 16 | JaxRainbowAgent.gamma = 0.99 17 | JaxRainbowAgent.update_horizon = 3 18 | JaxRainbowAgent.min_replay_history = 1000 19 | JaxRainbowAgent.update_period = 4 20 | JaxRainbowAgent.target_update_period = 1000 21 | JaxRainbowAgent.replay_scheme = 'prioritized' 22 | create_optimizer.learning_rate = 0.00025 23 | create_optimizer.eps = 3.125e-4 24 | 25 | create_minatar_env.game_name = 'breakout' 26 | TrainRunner.create_environment_fn = @minatar_env.create_minatar_env 27 | create_runner.schedule = 'continuous_train' 28 | create_agent.agent_name = 'jax_rainbow' 29 | create_agent.debug_mode = True 30 | Runner.num_iterations = 10 31 | Runner.training_steps = 1000000 32 | Runner.max_steps_per_episode = 100000000 33 | 34 | OutOfGraphPrioritizedReplayBuffer.replay_capacity = 100000 35 | OutOfGraphPrioritizedReplayBuffer.batch_size = 32 36 | -------------------------------------------------------------------------------- /dopamine/labs/environments/minatar/rainbow_freeway.gin: -------------------------------------------------------------------------------- 1 | # Hyperparameters for a simple Rainbow-style Cartpole agent. The 2 | # hyperparameters chosen achieve reasonable performance. 3 | import dopamine.discrete_domains.gym_lib 4 | import dopamine.discrete_domains.run_experiment 5 | import dopamine.jax.agents.rainbow.rainbow_agent 6 | import dopamine.jax.networks 7 | import dopamine.labs.environments.minatar.minatar_env 8 | import dopamine.replay_memory.prioritized_replay_buffer 9 | 10 | JaxRainbowAgent.observation_shape = %minatar_env.FREEWAY_SHAPE 11 | JaxRainbowAgent.observation_dtype = %minatar_env.DTYPE 12 | JaxRainbowAgent.stack_size = 1 13 | JaxRainbowAgent.network = @minatar_env.MinatarRainbowNetwork 14 | JaxRainbowAgent.num_atoms = 51 15 | JaxRainbowAgent.vmax = 100. 16 | JaxRainbowAgent.gamma = 0.99 17 | JaxRainbowAgent.update_horizon = 3 18 | JaxRainbowAgent.min_replay_history = 1000 19 | JaxRainbowAgent.update_period = 4 20 | JaxRainbowAgent.target_update_period = 1000 21 | JaxRainbowAgent.replay_scheme = 'prioritized' 22 | create_optimizer.learning_rate = 0.00025 23 | create_optimizer.eps = 3.125e-4 24 | 25 | create_minatar_env.game_name = 'freeway' 26 | TrainRunner.create_environment_fn = @minatar_env.create_minatar_env 27 | create_runner.schedule = 'continuous_train' 28 | create_agent.agent_name = 'jax_rainbow' 29 | create_agent.debug_mode = True 30 | Runner.num_iterations = 10 31 | Runner.training_steps = 1000000 32 | Runner.max_steps_per_episode = 100000000 33 | 34 | OutOfGraphPrioritizedReplayBuffer.replay_capacity = 100000 35 | OutOfGraphPrioritizedReplayBuffer.batch_size = 32 36 | -------------------------------------------------------------------------------- /dopamine/labs/environments/minatar/rainbow_seaquest.gin: -------------------------------------------------------------------------------- 1 | # Hyperparameters for a simple Rainbow-style Cartpole agent. The 2 | # hyperparameters chosen achieve reasonable performance. 3 | import dopamine.discrete_domains.gym_lib 4 | import dopamine.discrete_domains.run_experiment 5 | import dopamine.jax.agents.rainbow.rainbow_agent 6 | import dopamine.jax.networks 7 | import dopamine.labs.environments.minatar.minatar_env 8 | import dopamine.replay_memory.prioritized_replay_buffer 9 | 10 | JaxRainbowAgent.observation_shape = %minatar_env.SEAQUEST_SHAPE 11 | JaxRainbowAgent.observation_dtype = %minatar_env.DTYPE 12 | JaxRainbowAgent.stack_size = 1 13 | JaxRainbowAgent.network = @minatar_env.MinatarRainbowNetwork 14 | JaxRainbowAgent.num_atoms = 51 15 | JaxRainbowAgent.vmax = 100. 16 | JaxRainbowAgent.gamma = 0.99 17 | JaxRainbowAgent.update_horizon = 3 18 | JaxRainbowAgent.min_replay_history = 1000 19 | JaxRainbowAgent.update_period = 4 20 | JaxRainbowAgent.target_update_period = 1000 21 | JaxRainbowAgent.replay_scheme = 'prioritized' 22 | create_optimizer.learning_rate = 0.00025 23 | create_optimizer.eps = 3.125e-4 24 | 25 | create_minatar_env.game_name = 'seaquest' 26 | TrainRunner.create_environment_fn = @minatar_env.create_minatar_env 27 | create_runner.schedule = 'continuous_train' 28 | create_agent.agent_name = 'jax_rainbow' 29 | create_agent.debug_mode = True 30 | Runner.num_iterations = 10 31 | Runner.training_steps = 1000000 32 | Runner.max_steps_per_episode = 100000000 33 | 34 | OutOfGraphPrioritizedReplayBuffer.replay_capacity = 100000 35 | OutOfGraphPrioritizedReplayBuffer.batch_size = 32 36 | -------------------------------------------------------------------------------- /dopamine/labs/environments/minatar/rainbow_space_invaders.gin: -------------------------------------------------------------------------------- 1 | # Hyperparameters for a simple Rainbow-style Cartpole agent. The 2 | # hyperparameters chosen achieve reasonable performance. 3 | import dopamine.discrete_domains.gym_lib 4 | import dopamine.discrete_domains.run_experiment 5 | import dopamine.jax.agents.rainbow.rainbow_agent 6 | import dopamine.jax.networks 7 | import dopamine.labs.environments.minatar.minatar_env 8 | import dopamine.replay_memory.prioritized_replay_buffer 9 | 10 | JaxRainbowAgent.observation_shape = %minatar_env.SPACE_INVADERS_SHAPE 11 | JaxRainbowAgent.observation_dtype = %minatar_env.DTYPE 12 | JaxRainbowAgent.stack_size = 1 13 | JaxRainbowAgent.network = @minatar_env.MinatarRainbowNetwork 14 | JaxRainbowAgent.num_atoms = 51 15 | JaxRainbowAgent.vmax = 100. 16 | JaxRainbowAgent.gamma = 0.99 17 | JaxRainbowAgent.update_horizon = 3 18 | JaxRainbowAgent.min_replay_history = 1000 19 | JaxRainbowAgent.update_period = 4 20 | JaxRainbowAgent.target_update_period = 1000 21 | JaxRainbowAgent.replay_scheme = 'prioritized' 22 | create_optimizer.learning_rate = 0.00025 23 | create_optimizer.eps = 3.125e-4 24 | 25 | create_minatar_env.game_name = 'space_invaders' 26 | TrainRunner.create_environment_fn = @minatar_env.create_minatar_env 27 | create_runner.schedule = 'continuous_train' 28 | create_agent.agent_name = 'jax_rainbow' 29 | create_agent.debug_mode = True 30 | Runner.num_iterations = 10 31 | Runner.training_steps = 1000000 32 | Runner.max_steps_per_episode = 100000000 33 | 34 | OutOfGraphPrioritizedReplayBuffer.replay_capacity = 100000 35 | OutOfGraphPrioritizedReplayBuffer.batch_size = 32 36 | -------------------------------------------------------------------------------- /dopamine/labs/tandem_dqn/README.md: -------------------------------------------------------------------------------- 1 | # The Difficulty of Passive Learning in Deep Reinforcement Learning 2 | 3 | This is the Dopamine-based code accompanying the paper listed above. 4 | Although this code supports running classic control, MinAtar, and ALE 5 | environments, it was only used to run the classic control and MinAtar 6 | environments in the paper. 7 | 8 | See `run.sh` for an example of how to run it. 9 | -------------------------------------------------------------------------------- /dopamine/labs/tandem_dqn/requirements.txt: -------------------------------------------------------------------------------- 1 | absl-py 2 | atari-py 3 | dopamine-rl 4 | gin-config 5 | gym 6 | numpy 7 | tensorflow 8 | -------------------------------------------------------------------------------- /dopamine/labs/tandem_dqn/run.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Note that to run this on the classic control and ALE environments you need to 3 | # obtain the gin files for Dopamine JAX agents: 4 | # github.com/google/dopamine/tree/master/dopamine/jax/agents/dqn/configs 5 | set -e 6 | set -x 7 | 8 | virtualenv -p python3 . 9 | source ./bin/activate 10 | 11 | cd .. 12 | pip install -r tandem_dqn/requirements.txt 13 | python3 -m tandem_dqn.train \ 14 | --base_dir=/tmp/tandem_dqn 15 | -------------------------------------------------------------------------------- /dopamine/labs/tandem_dqn/train.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2021 The Tandem DQN authors. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | """Binary entry-point for Tandem RL experiments.""" 16 | 17 | from absl import app 18 | from absl import flags 19 | from absl import logging 20 | 21 | from dopamine.discrete_domains import run_experiment as base_run_experiment 22 | from dopamine.labs.tandem_dqn import run_experiment 23 | import tensorflow as tf 24 | 25 | 26 | 27 | flags.DEFINE_string('base_dir', None, 28 | 'Base directory to host all required sub-directories.') 29 | flags.DEFINE_multi_string( 30 | 'gin_files', [], 'List of paths to gin configuration files (e.g.' 31 | '"dopamine/agents/dqn/dqn.gin").') 32 | flags.DEFINE_multi_string( 33 | 'gin_bindings', [], 34 | 'Gin bindings to override the values set in the config files ' 35 | '(e.g. "DQNAgent.epsilon_train=0.1",' 36 | ' "create_environment.game_name="Pong"").') 37 | 38 | FLAGS = flags.FLAGS 39 | 40 | 41 | def main(unused_argv): 42 | """Main method. 43 | 44 | Args: 45 | unused_argv: Arguments (unused). 46 | """ 47 | logging.set_verbosity(logging.INFO) 48 | tf.compat.v1.disable_v2_behavior() 49 | 50 | base_dir = FLAGS.base_dir 51 | gin_files = FLAGS.gin_files 52 | gin_bindings = FLAGS.gin_bindings 53 | base_run_experiment.load_gin_configs(gin_files, gin_bindings) 54 | runner = run_experiment.TandemRunner( 55 | base_dir, run_experiment.create_tandem_agents_and_checkpoints) 56 | runner.run_experiment() 57 | 58 | 59 | if __name__ == '__main__': 60 | flags.mark_flag_as_required('base_dir') 61 | app.run(main) 62 | -------------------------------------------------------------------------------- /dopamine/replay_memory/__init__.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2018 The Dopamine Authors. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | -------------------------------------------------------------------------------- /dopamine/utils/__init__.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2018 The Dopamine Authors. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | -------------------------------------------------------------------------------- /dopamine/utils/test_utils.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2018 The Dopamine Authors. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | """Common testing utilities shared across agents.""" 16 | 17 | from __future__ import absolute_import 18 | from __future__ import division 19 | from __future__ import print_function 20 | 21 | 22 | 23 | import mock 24 | import tensorflow as tf 25 | 26 | 27 | class MockReplayBuffer(object): 28 | """Mock ReplayBuffer to verify the way the agent interacts with it.""" 29 | 30 | def __init__(self, is_jax=False): 31 | if is_jax: 32 | self.add = mock.Mock() 33 | self.add_count = 0 34 | self.sum_tree = mock.Mock() 35 | else: 36 | with tf.compat.v1.variable_scope( 37 | 'MockReplayBuffer', reuse=tf.compat.v1.AUTO_REUSE): 38 | self.add = mock.Mock() 39 | self.memory = mock.Mock() 40 | self.memory.add_count = 0 41 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | absl-py>=0.9.0 2 | astunparse>=1.6.3 3 | atari-py>=0.2.6 4 | cachetools>=4.1.1 5 | certifi>=2020.6.20 6 | chardet>=3.0.4 7 | cloudpickle>=1.3.0 8 | cycler>=0.10.0 9 | flax>=0.3.3 10 | future>=0.18.2 11 | gast>=0.3.3 12 | gin-config>=0.3.0 13 | google-auth>=1.19.2 14 | google-auth-oauthlib>=0.4.1 15 | google-pasta>=0.2.0 16 | grpcio>=1.30.0 17 | gym>=0.17.2 18 | h5py>=2.10.0 19 | idna>=2.10 20 | jax>=0.2.12 21 | jaxlib>=0.1.65 22 | Keras-Preprocessing>=1.1.2 23 | kiwisolver>=1.2.0 24 | Markdown>=3.2.2 25 | matplotlib>=3.3.0 26 | msgpack>=1.0.0 27 | numpy>=1.18.5 28 | oauthlib>=3.1.0 29 | opencv-python>=4.3.0.36 30 | opt-einsum>=3.3.0 31 | pandas>=1.0.5 32 | Pillow>=7.2.0 33 | protobuf>=3.12.2 34 | pyasn1>=0.4.8 35 | pyasn1-modules>=0.2.8 36 | pygame>=1.9.6 37 | pyglet>=1.5.0 38 | pyparsing>=2.4.7 39 | python-dateutil>=2.8.1 40 | pytz>=2020.1 41 | requests>=2.24.0 42 | requests-oauthlib>=1.3.0 43 | rsa>=4.6 44 | scipy>=1.4.1 45 | six>=1.15.0 46 | setuptools>=49.2.01 47 | tensorboard 48 | tensorboard-plugin-wit 49 | tensorflow 50 | tensorflow-estimator 51 | tensorflow-probability>=0.13.0 52 | termcolor>=1.1.0 53 | tf-slim>=1.1.0 54 | urllib3>=1.25.10 55 | Werkzeug>=1.0.1 56 | wrapt>=1.12.1 57 | -------------------------------------------------------------------------------- /tests/dopamine/atari_init_test.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2018 The Dopamine Authors. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | """A simple test for validating that the Atari env initializes.""" 16 | 17 | import datetime 18 | import os 19 | import shutil 20 | 21 | 22 | 23 | from absl import flags 24 | from dopamine.discrete_domains import train 25 | import tensorflow as tf 26 | 27 | 28 | FLAGS = flags.FLAGS 29 | 30 | 31 | class AtariInitTest(tf.test.TestCase): 32 | 33 | def setUp(self): 34 | super(AtariInitTest, self).setUp() 35 | FLAGS.base_dir = os.path.join( 36 | '/tmp/dopamine_tests', 37 | datetime.datetime.utcnow().strftime('run_%Y_%m_%d_%H_%M_%S')) 38 | FLAGS.gin_files = ['dopamine/agents/dqn/configs/dqn.gin'] 39 | # `num_iterations` set to zero to prevent runner execution. 40 | FLAGS.gin_bindings = [ 41 | 'Runner.num_iterations=0', 42 | 'WrappedReplayBuffer.replay_capacity = 100' # To prevent OOM. 43 | ] 44 | FLAGS.alsologtostderr = True 45 | 46 | def test_atari_init(self): 47 | """Tests that a DQN agent is initialized.""" 48 | train.main([]) 49 | shutil.rmtree(FLAGS.base_dir) 50 | 51 | 52 | if __name__ == '__main__': 53 | tf.compat.v1.disable_v2_behavior() 54 | tf.test.main() 55 | -------------------------------------------------------------------------------- /tests/dopamine/discrete_domains/gym_lib_test.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2018 The Dopamine Authors. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | """Tests for dopamine.discrete_domains.gym_lib.""" 16 | 17 | from __future__ import absolute_import 18 | from __future__ import division 19 | from __future__ import print_function 20 | 21 | 22 | 23 | from dopamine.discrete_domains import gym_lib 24 | import tensorflow as tf 25 | 26 | 27 | class MockGymEnvironment(object): 28 | """Mock environment for testing.""" 29 | 30 | def __init__(self): 31 | self.observation_space = 'observation_space' 32 | self.action_space = 'action_space' 33 | self.reward_range = 'reward_range' 34 | self.metadata = 'metadata' 35 | 36 | def reset(self): 37 | return 'reset' 38 | 39 | def step(self, unused_action): 40 | return 'obs', 'rew', False, {} 41 | 42 | 43 | class GymPreprocessingTest(tf.test.TestCase): 44 | 45 | def testAll(self): 46 | env = gym_lib.GymPreprocessing(MockGymEnvironment()) 47 | self.assertEqual('observation_space', env.observation_space) 48 | self.assertEqual('action_space', env.action_space) 49 | self.assertEqual('reward_range', env.reward_range) 50 | self.assertEqual('metadata', env.metadata) 51 | self.assertEqual('reset', env.reset()) 52 | self.assertAllEqual(['obs', 'rew', False, {}], env.step(0)) 53 | 54 | 55 | if __name__ == '__main__': 56 | tf.compat.v1.disable_v2_behavior() 57 | tf.test.main() 58 | -------------------------------------------------------------------------------- /tests/dopamine/utils/agent_visualizer_test.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2019 The Dopamine Authors. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | """Tests for dopamine.utils.agent_visualizer.""" 16 | from __future__ import absolute_import 17 | from __future__ import division 18 | from __future__ import print_function 19 | 20 | import os 21 | import shutil 22 | 23 | 24 | 25 | from absl import flags 26 | from dopamine.utils.agent_visualizer import AgentVisualizer 27 | from dopamine.utils.line_plotter import LinePlotter 28 | import numpy as np 29 | from PIL import Image 30 | import tensorflow as tf 31 | 32 | 33 | FLAGS = flags.FLAGS 34 | 35 | 36 | class AgentVisualizerTest(tf.test.TestCase): 37 | 38 | def setUp(self): 39 | super(AgentVisualizerTest, self).setUp() 40 | self._test_subdir = os.path.join('/tmp/dopamine_tests', 'agent_visualizer') 41 | shutil.rmtree(self._test_subdir, ignore_errors=True) 42 | os.makedirs(self._test_subdir) 43 | 44 | def test_agent_visualizer_save_frame(self): 45 | parameter_dict = LinePlotter._defaults.copy() 46 | parameter_dict['get_line_data_fn'] = lambda: [[1, 2, 3]] 47 | plotter = LinePlotter(parameter_dict=parameter_dict) 48 | 49 | agent_visualizer = AgentVisualizer(self._test_subdir, [plotter]) 50 | agent_visualizer.save_frame() 51 | 52 | frame_filename = os.path.join(self._test_subdir, 'frame_000000.png') 53 | self.assertTrue(tf.io.gfile.exists(frame_filename)) 54 | 55 | im = Image.open(frame_filename) 56 | im_arr = np.array(im) 57 | self.assertTrue(np.array_equal(im_arr, agent_visualizer.record_frame)) 58 | 59 | if __name__ == '__main__': 60 | tf.compat.v1.disable_v2_behavior() 61 | tf.test.main() 62 | --------------------------------------------------------------------------------