├── .gitignore
├── images
    ├── all_asterix_tb.png
    ├── c51_asterix_tb.png
    └── dopamine_logo.png
├── AUTHORS
├── docs
    └── api_docs
    │   └── python
    │       ├── dqn_agent.md
    │       ├── logger.md
    │       ├── iteration_statistics.md
    │       ├── run_experiment.md
    │       ├── train.md
    │       ├── utils
    │           ├── get_latest_file.md
    │           ├── get_latest_iteration.md
    │           ├── load_baselines.md
    │           ├── summarize_data.md
    │           ├── load_statistics.md
    │           └── read_experiment.md
    │       ├── implicit_quantile_agent.md
    │       ├── train
    │           ├── create_agent.md
    │           ├── launch_experiment.md
    │           └── create_runner.md
    │       ├── prioritized_replay_buffer.md
    │       ├── circular_replay_buffer.md
    │       ├── utils.md
    │       ├── rainbow_agent.md
    │       ├── iteration_statistics
    │           └── IterationStatistics.md
    │       ├── run_experiment
    │           ├── TrainRunner.md
    │           └── Runner.md
    │       ├── checkpointer.md
    │       ├── logger
    │           └── Logger.md
    │       ├── checkpointer
    │           └── Checkpointer.md
    │       ├── rainbow_agent
    │           └── project_distribution.md
    │       ├── index.md
    │       ├── _toc.yaml
    │       ├── implicit_quantile_agent
    │           └── ImplicitQuantileAgent.md
    │       ├── circular_replay_buffer
    │           └── WrappedReplayBuffer.md
    │       └── dqn_agent
    │           └── DQNAgent.md
├── dopamine
    ├── common
    │   ├── __init__.py
    │   ├── iteration_statistics.py
    │   └── logger.py
    ├── agents
    │   ├── __init__.py
    │   ├── dqn
    │   │   ├── __init__.py
    │   │   └── configs
    │   │   │   ├── dqn_icml.gin
    │   │   │   ├── dqn_nature.gin
    │   │   │   └── dqn.gin
    │   ├── rainbow
    │   │   ├── __init__.py
    │   │   └── configs
    │   │   │   ├── c51_icml.gin
    │   │   │   ├── rainbow_aaai.gin
    │   │   │   ├── c51.gin
    │   │   │   └── rainbow.gin
    │   └── implicit_quantile
    │   │   ├── __init__.py
    │   │   └── configs
    │   │       ├── implicit_quantile_icml.gin
    │   │       └── implicit_quantile.gin
    ├── atari
    │   └── __init__.py
    ├── colab
    │   ├── __init__.py
    │   ├── README.md
    │   └── tensorboard.ipynb
    ├── utils
    │   ├── __init__.py
    │   └── test_utils.py
    ├── replay_memory
    │   └── __init__.py
    └── __init__.py
├── CONTRIBUTING.md
├── baselines
    ├── data
    │   ├── alien.vg.json
    │   ├── hero.vg.json
    │   ├── krull.vg.json
    │   ├── pong.vg.json
    │   ├── qbert.vg.json
    │   ├── airraid.vg.json
    │   ├── amidar.vg.json
    │   ├── assault.vg.json
    │   ├── asterix.vg.json
    │   ├── atlantis.vg.json
    │   ├── berzerk.vg.json
    │   ├── bowling.vg.json
    │   ├── boxing.vg.json
    │   ├── breakout.vg.json
    │   ├── carnival.vg.json
    │   ├── enduro.vg.json
    │   ├── freeway.vg.json
    │   ├── gopher.vg.json
    │   ├── gravitar.vg.json
    │   ├── kangaroo.vg.json
    │   ├── mspacman.vg.json
    │   ├── phoenix.vg.json
    │   ├── pitfall.vg.json
    │   ├── pooyan.vg.json
    │   ├── robotank.vg.json
    │   ├── seaquest.vg.json
    │   ├── skiing.vg.json
    │   ├── solaris.vg.json
    │   ├── tennis.vg.json
    │   ├── upndown.vg.json
    │   ├── venture.vg.json
    │   ├── zaxxon.vg.json
    │   ├── asteroids.vg.json
    │   ├── bankheist.vg.json
    │   ├── battlezone.vg.json
    │   ├── beamrider.vg.json
    │   ├── centipede.vg.json
    │   ├── doubledunk.vg.json
    │   ├── frostbite.vg.json
    │   ├── icehockey.vg.json
    │   ├── jamesbond.vg.json
    │   ├── privateeye.vg.json
    │   ├── riverraid.vg.json
    │   ├── roadrunner.vg.json
    │   ├── stargunner.vg.json
    │   ├── timepilot.vg.json
    │   ├── tutankham.vg.json
    │   ├── crazyclimber.vg.json
    │   ├── demonattack.vg.json
    │   ├── fishingderby.vg.json
    │   ├── journeyescape.vg.json
    │   ├── kungfumaster.vg.json
    │   ├── namethisgame.vg.json
    │   ├── spaceinvaders.vg.json
    │   ├── videopinball.vg.json
    │   ├── wizardofwor.vg.json
    │   ├── yarsrevenge.vg.json
    │   ├── choppercommand.vg.json
    │   ├── elevatoraction.vg.json
    │   └── montezumarevenge.vg.json
    ├── README.md
    └── plots.html
├── tests
    ├── atari_init_test.py
    ├── common
    │   ├── iteration_statistics_test.py
    │   └── logger_test.py
    ├── train_runner_integration_test.py
    ├── atari
    │   ├── preprocessing_test.py
    │   └── train_test.py
    └── integration_test.py
└── setup.py


/.gitignore:
--------------------------------------------------------------------------------
1 | *.swp
2 | *.pyc
3 | *.pkl
4 | *.py~
5 | .pytest_cache
6 | __pycache__
7 | .cache
8 | 


--------------------------------------------------------------------------------
/images/all_asterix_tb.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Todo/dopamine/master/images/all_asterix_tb.png


--------------------------------------------------------------------------------
/images/c51_asterix_tb.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Todo/dopamine/master/images/c51_asterix_tb.png


--------------------------------------------------------------------------------
/images/dopamine_logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Todo/dopamine/master/images/dopamine_logo.png


--------------------------------------------------------------------------------
/AUTHORS:
--------------------------------------------------------------------------------
1 | # This is the list of Dopamine authors for copyright purposes.
2 | #
3 | # This does not necessarily list everyone who has contributed code, since in
4 | # some cases, their employer may be the copyright holder.  To see the full list
5 | # of contributors, see the revision history in source control.
6 | 
7 | Google Inc.
8 | 


--------------------------------------------------------------------------------
/docs/api_docs/python/dqn_agent.md:
--------------------------------------------------------------------------------
 1 | <div itemscope itemtype="http://developers.google.com/ReferenceObject">
 2 | <meta itemprop="name" content="dqn_agent" />
 3 | <meta itemprop="path" content="stable" />
 4 | </div>
 5 | 
 6 | # Module: dqn_agent
 7 | 
 8 | Compact implementation of a DQN agent.
 9 | 
10 | ## Classes
11 | 
12 | [`class DQNAgent`](./dqn_agent/DQNAgent.md): An implementation of the DQN agent.
13 | 


--------------------------------------------------------------------------------
/docs/api_docs/python/logger.md:
--------------------------------------------------------------------------------
 1 | <div itemscope itemtype="http://developers.google.com/ReferenceObject">
 2 | <meta itemprop="name" content="logger" />
 3 | <meta itemprop="path" content="stable" />
 4 | </div>
 5 | 
 6 | # Module: logger
 7 | 
 8 | A lightweight logging mechanism for dopamine agents.
 9 | 
10 | ## Classes
11 | 
12 | [`class Logger`](./logger/Logger.md): Class for maintaining a dictionary of data
13 | to log.
14 | 


--------------------------------------------------------------------------------
/docs/api_docs/python/iteration_statistics.md:
--------------------------------------------------------------------------------
 1 | <div itemscope itemtype="http://developers.google.com/ReferenceObject">
 2 | <meta itemprop="name" content="iteration_statistics" />
 3 | <meta itemprop="path" content="stable" />
 4 | </div>
 5 | 
 6 | # Module: iteration_statistics
 7 | 
 8 | A class for storing iteration-specific metrics.
 9 | 
10 | ## Classes
11 | 
12 | [`class IterationStatistics`](./iteration_statistics/IterationStatistics.md): A
13 | class for storing iteration-specific metrics.
14 | 


--------------------------------------------------------------------------------
/docs/api_docs/python/run_experiment.md:
--------------------------------------------------------------------------------
 1 | <div itemscope itemtype="http://developers.google.com/ReferenceObject">
 2 | <meta itemprop="name" content="run_experiment" />
 3 | <meta itemprop="path" content="stable" />
 4 | </div>
 5 | 
 6 | # Module: run_experiment
 7 | 
 8 | Module defining classes and helper methods for running Atari 2600 agents.
 9 | 
10 | ## Classes
11 | 
12 | [`class Runner`](./run_experiment/Runner.md): Object that handles running Atari
13 | 2600 experiments.
14 | 
15 | [`class TrainRunner`](./run_experiment/TrainRunner.md): Object that handles
16 | running Atari 2600 experiments.
17 | 


--------------------------------------------------------------------------------
/docs/api_docs/python/train.md:
--------------------------------------------------------------------------------
 1 | <div itemscope itemtype="http://developers.google.com/ReferenceObject">
 2 | <meta itemprop="name" content="train" />
 3 | <meta itemprop="path" content="stable" />
 4 | </div>
 5 | 
 6 | # Module: train
 7 | 
 8 | The entry point for running an agent on an Atari 2600 domain.
 9 | 
10 | 
11 | ## Functions
12 | 
13 | [`create_agent(...)`](./train/create_agent.md): Creates a DQN agent.
14 | 
15 | [`create_runner(...)`](./train/create_runner.md): Creates an experiment Runner.
16 | 
17 | [`launch_experiment(...)`](./train/launch_experiment.md): Launches the
18 | experiment.
19 | 


--------------------------------------------------------------------------------
/docs/api_docs/python/utils/get_latest_file.md:
--------------------------------------------------------------------------------
 1 | <div itemscope itemtype="http://developers.google.com/ReferenceObject">
 2 | <meta itemprop="name" content="utils.get_latest_file" />
 3 | <meta itemprop="path" content="stable" />
 4 | </div>
 5 | 
 6 | # utils.get_latest_file
 7 | 
 8 | ```python
 9 | utils.get_latest_file(path)
10 | ```
11 | 
12 | Return the file named 'path_[0-9]*' with the largest such number.
13 | 
14 | #### Args:
15 | 
16 | *   <b>`path`</b>: The base path (including directory and base name) to search.
17 | 
18 | #### Returns:
19 | 
20 | The latest file (in terms of given numbers).
21 | 


--------------------------------------------------------------------------------
/docs/api_docs/python/implicit_quantile_agent.md:
--------------------------------------------------------------------------------
 1 | <div itemscope itemtype="http://developers.google.com/ReferenceObject">
 2 | <meta itemprop="name" content="implicit_quantile_agent" />
 3 | <meta itemprop="path" content="stable" />
 4 | </div>
 5 | 
 6 | # Module: implicit_quantile_agent
 7 | 
 8 | The implicit quantile networks (IQN) agent.
 9 | 
10 | The agent follows the description given in "Implicit Quantile Networks for
11 | Distributional RL" (Dabney et. al, 2018).
12 | 
13 | ## Classes
14 | 
15 | [`class ImplicitQuantileAgent`](./implicit_quantile_agent/ImplicitQuantileAgent.md):
16 | An extension of Rainbow to perform implicit quantile regression.
17 | 


--------------------------------------------------------------------------------
/dopamine/common/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2018 The Dopamine Authors.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #      http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/dopamine/agents/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2018 The Dopamine Authors.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #      http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | 


--------------------------------------------------------------------------------
/dopamine/atari/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2018 The Dopamine Authors.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #      http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | 


--------------------------------------------------------------------------------
/dopamine/colab/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2018 The Dopamine Authors.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #      http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | 


--------------------------------------------------------------------------------
/dopamine/utils/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2018 The Dopamine Authors.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #      http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | 


--------------------------------------------------------------------------------
/dopamine/agents/dqn/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2018 The Dopamine Authors.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #      http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | 


--------------------------------------------------------------------------------
/dopamine/agents/rainbow/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2018 The Dopamine Authors.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #      http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | 


--------------------------------------------------------------------------------
/dopamine/replay_memory/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2018 The Dopamine Authors.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #      http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | 


--------------------------------------------------------------------------------
/dopamine/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2018 The Dopamine Authors.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #      http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | name = 'dopamine'
15 | 


--------------------------------------------------------------------------------
/dopamine/agents/implicit_quantile/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2018 The Dopamine Authors.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #      http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | 


--------------------------------------------------------------------------------
/docs/api_docs/python/train/create_agent.md:
--------------------------------------------------------------------------------
 1 | <div itemscope itemtype="http://developers.google.com/ReferenceObject">
 2 | <meta itemprop="name" content="train.create_agent" />
 3 | <meta itemprop="path" content="stable" />
 4 | </div>
 5 | 
 6 | # train.create_agent
 7 | 
 8 | ```python
 9 | train.create_agent(
10 |     sess,
11 |     environment
12 | )
13 | ```
14 | 
15 | Creates a DQN agent.
16 | 
17 | #### Args:
18 | 
19 | *   <b>`sess`</b>: A `tf.Session` object for running associated ops.
20 | *   <b>`environment`</b>: An Atari 2600 Gym environment.
21 | 
22 | #### Returns:
23 | 
24 | *   <b>`agent`</b>: An RL agent.
25 | 
26 | #### Raises:
27 | 
28 | *   <b>`ValueError`</b>: If `agent_name` is not in supported list.
29 | 


--------------------------------------------------------------------------------
/docs/api_docs/python/utils/get_latest_iteration.md:
--------------------------------------------------------------------------------
 1 | <div itemscope itemtype="http://developers.google.com/ReferenceObject">
 2 | <meta itemprop="name" content="utils.get_latest_iteration" />
 3 | <meta itemprop="path" content="stable" />
 4 | </div>
 5 | 
 6 | # utils.get_latest_iteration
 7 | 
 8 | ```python
 9 | utils.get_latest_iteration(path)
10 | ```
11 | 
12 | Return the largest iteration number corresponding to the given path.
13 | 
14 | #### Args:
15 | 
16 | *   <b>`path`</b>: The base path (including directory and base name) to search.
17 | 
18 | #### Returns:
19 | 
20 | The latest iteration number.
21 | 
22 | #### Raises:
23 | 
24 | *   <b>`ValueError`</b>: if there is not available log data at the given path.
25 | 


--------------------------------------------------------------------------------
/docs/api_docs/python/utils/load_baselines.md:
--------------------------------------------------------------------------------
 1 | <div itemscope itemtype="http://developers.google.com/ReferenceObject">
 2 | <meta itemprop="name" content="utils.load_baselines" />
 3 | <meta itemprop="path" content="stable" />
 4 | </div>
 5 | 
 6 | # utils.load_baselines
 7 | 
 8 | ```python
 9 | utils.load_baselines(
10 |     base_dir,
11 |     verbose=False
12 | )
13 | ```
14 | 
15 | Reads in the baseline experimental data from a specified base directory.
16 | 
17 | #### Args:
18 | 
19 | *   <b>`base_dir`</b>: string, base directory where to read data from.
20 | *   <b>`verbose`</b>: bool, whether to print warning messages.
21 | 
22 | #### Returns:
23 | 
24 | A dict containing pandas DataFrames for all available agents and games.
25 | 


--------------------------------------------------------------------------------
/docs/api_docs/python/train/launch_experiment.md:
--------------------------------------------------------------------------------
 1 | <div itemscope itemtype="http://developers.google.com/ReferenceObject">
 2 | <meta itemprop="name" content="train.launch_experiment" />
 3 | <meta itemprop="path" content="stable" />
 4 | </div>
 5 | 
 6 | # train.launch_experiment
 7 | 
 8 | ```python
 9 | train.launch_experiment(
10 |     create_runner_fn,
11 |     create_agent_fn
12 | )
13 | ```
14 | 
15 | Launches the experiment.
16 | 
17 | #### Args:
18 | 
19 | *   <b>`create_runner_fn`</b>: A function that takes as args a base directory
20 |     and a function for creating an agent and returns a `Runner`-like object.
21 | *   <b>`create_agent_fn`</b>: A function that takes as args a Tensorflow session
22 |     and an Atari 2600 Gym environment, and returns an agent.
23 | 


--------------------------------------------------------------------------------
/docs/api_docs/python/train/create_runner.md:
--------------------------------------------------------------------------------
 1 | <div itemscope itemtype="http://developers.google.com/ReferenceObject">
 2 | <meta itemprop="name" content="train.create_runner" />
 3 | <meta itemprop="path" content="stable" />
 4 | </div>
 5 | 
 6 | # train.create_runner
 7 | 
 8 | ```python
 9 | train.create_runner(
10 |     base_dir,
11 |     create_agent_fn
12 | )
13 | ```
14 | 
15 | Creates an experiment Runner.
16 | 
17 | #### Args:
18 | 
19 | *   <b>`base_dir`</b>: str, base directory for hosting all subdirectories.
20 | *   <b>`create_agent_fn`</b>: A function that takes as args a Tensorflow session
21 |     and an Atari 2600 Gym environment, and returns an agent.
22 | 
23 | #### Returns:
24 | 
25 | *   <b>`runner`</b>: A
26 |     <a href="../run_experiment/Runner.md"><code>run_experiment.Runner</code></a>
27 |     like object.
28 | 
29 | #### Raises:
30 | 
31 | *   <b>`ValueError`</b>: When an unknown schedule is encountered.
32 | 


--------------------------------------------------------------------------------
/docs/api_docs/python/utils/summarize_data.md:
--------------------------------------------------------------------------------
 1 | <div itemscope itemtype="http://developers.google.com/ReferenceObject">
 2 | <meta itemprop="name" content="utils.summarize_data" />
 3 | <meta itemprop="path" content="stable" />
 4 | </div>
 5 | 
 6 | # utils.summarize_data
 7 | 
 8 | ```python
 9 | utils.summarize_data(
10 |     data,
11 |     summary_keys
12 | )
13 | ```
14 | 
15 | Processes log data into a per-iteration summary.
16 | 
17 | #### Args:
18 | 
19 | *   <b>`data`</b>: Dictionary loaded by load_statistics describing the data.
20 |     This dictionary has keys iteration_0, iteration_1, ... describing
21 |     per-iteration data.
22 | *   <b>`summary_keys`</b>: List of per-iteration data to be summarized.
23 | 
24 | Example: data = load_statistics(...) get_iteration_summmary(data,
25 | ['train_episode_returns', 'eval_episode_returns'])
26 | 
27 | #### Returns:
28 | 
29 | A dictionary mapping each key in returns_keys to a per-iteration summary.
30 | 


--------------------------------------------------------------------------------
/docs/api_docs/python/prioritized_replay_buffer.md:
--------------------------------------------------------------------------------
 1 | <div itemscope itemtype="http://developers.google.com/ReferenceObject">
 2 | <meta itemprop="name" content="prioritized_replay_buffer" />
 3 | <meta itemprop="path" content="stable" />
 4 | </div>
 5 | 
 6 | # Module: prioritized_replay_buffer
 7 | 
 8 | An implementation of Prioritized Experience Replay (PER).
 9 | 
10 | This implementation is based on the paper "Prioritized Experience Replay" by Tom
11 | Schaul et al. (2015). Many thanks to Tom Schaul, John Quan, and Matteo Hessel
12 | for providing useful pointers on the algorithm and its implementation.
13 | 
14 | ## Classes
15 | 
16 | [`class OutOfGraphPrioritizedReplayBuffer`](./prioritized_replay_buffer/OutOfGraphPrioritizedReplayBuffer.md):
17 | An out-of-graph Replay Buffer for Prioritized Experience Replay.
18 | 
19 | [`class WrappedPrioritizedReplayBuffer`](./prioritized_replay_buffer/WrappedPrioritizedReplayBuffer.md):
20 | Wrapper of OutOfGraphPrioritizedReplayBuffer with in-graph sampling.
21 | 


--------------------------------------------------------------------------------
/docs/api_docs/python/circular_replay_buffer.md:
--------------------------------------------------------------------------------
 1 | <div itemscope itemtype="http://developers.google.com/ReferenceObject">
 2 | <meta itemprop="name" content="circular_replay_buffer" />
 3 | <meta itemprop="path" content="stable" />
 4 | </div>
 5 | 
 6 | # Module: circular_replay_buffer
 7 | 
 8 | The standard DQN replay memory.
 9 | 
10 | This implementation is an out-of-graph replay memory + in-graph wrapper. It
11 | supports vanilla n-step updates of the form typically found in the literature,
12 | i.e. where rewards are accumulated for n steps and the intermediate trajectory
13 | is not exposed to the agent. This does not allow, for example, performing
14 | off-policy corrections.
15 | 
16 | ## Classes
17 | 
18 | [`class OutOfGraphReplayBuffer`](./circular_replay_buffer/OutOfGraphReplayBuffer.md):
19 | A simple out-of-graph Replay Buffer.
20 | 
21 | [`class WrappedReplayBuffer`](./circular_replay_buffer/WrappedReplayBuffer.md):
22 | Wrapper of OutOfGraphReplayBuffer with an in graph sampling mechanism.
23 | 


--------------------------------------------------------------------------------
/docs/api_docs/python/utils/load_statistics.md:
--------------------------------------------------------------------------------
 1 | <div itemscope itemtype="http://developers.google.com/ReferenceObject">
 2 | <meta itemprop="name" content="utils.load_statistics" />
 3 | <meta itemprop="path" content="stable" />
 4 | </div>
 5 | 
 6 | # utils.load_statistics
 7 | 
 8 | ```python
 9 | utils.load_statistics(
10 |     log_path,
11 |     iteration_number=None,
12 |     verbose=True
13 | )
14 | ```
15 | 
16 | Reads in a statistics object from log_path.
17 | 
18 | #### Args:
19 | 
20 | *   <b>`log_path`</b>: string, provides the full path to the training/eval
21 |     statistics.
22 | *   <b>`iteration_number`</b>: The iteration number of the statistics object we
23 |     want to read. If set to None, load the latest version.
24 | *   <b>`verbose`</b>: Whether to output information about the load procedure.
25 | 
26 | #### Returns:
27 | 
28 | *   <b>`data`</b>: The requested statistics object.
29 | *   <b>`iteration`</b>: The corresponding iteration number.
30 | 
31 | #### Raises:
32 | 
33 | *   <b>`Exception`</b>: if data is not present.
34 | 


--------------------------------------------------------------------------------
/docs/api_docs/python/utils.md:
--------------------------------------------------------------------------------
 1 | <div itemscope itemtype="http://developers.google.com/ReferenceObject">
 2 | <meta itemprop="name" content="utils" />
 3 | <meta itemprop="path" content="stable" />
 4 | </div>
 5 | 
 6 | # Module: utils
 7 | 
 8 | This provides utilities for dealing with Dopamine data.
 9 | 
10 | See: dopamine/common/logger.py .
11 | 
12 | ## Functions
13 | 
14 | [`get_latest_file(...)`](./utils/get_latest_file.md): Return the file named
15 | 'path_[0-9]*' with the largest such number.
16 | 
17 | [`get_latest_iteration(...)`](./utils/get_latest_iteration.md): Return the
18 | largest iteration number corresponding to the given path.
19 | 
20 | [`load_baselines(...)`](./utils/load_baselines.md): Reads in the baseline
21 | experimental data from a specified base directory.
22 | 
23 | [`load_statistics(...)`](./utils/load_statistics.md): Reads in a statistics
24 | object from log_path.
25 | 
26 | [`read_experiment(...)`](./utils/read_experiment.md): Reads in a set of
27 | experimental results from log_path.
28 | 
29 | [`summarize_data(...)`](./utils/summarize_data.md): Processes log data into a
30 | per-iteration summary.
31 | 


--------------------------------------------------------------------------------
/dopamine/colab/README.md:
--------------------------------------------------------------------------------
 1 | # Colabs
 2 | 
 3 | This directory contains
 4 | [`utils.py`](https://github.com/google/dopamine/blob/master/dopamine/colab/utils.py),
 5 | which provides a number of useful utilities for loading experiment statistics.
 6 | 
 7 | We also provide a set of colabs to help illustrate how you can use Dopamine.
 8 | 
 9 | ## Agents
10 | 
11 | In this
12 | [colab](https://colab.research.google.com/github/google/dopamine/blob/master/dopamine/colab/agents.ipynb)
13 | we illustrate how to create a new agent by either subclassing
14 | [`DQN`](https://github.com/google/dopamine/blob/master/dopamine/agents/dqn/dqn_agent.py)
15 | or by creating a new agent from scratch.
16 | 
17 | ## Loading statistics
18 | 
19 | In this
20 | [colab](https://colab.research.google.com/github/google/dopamine/blob/master/dopamine/colab/load_statistics.ipynb)
21 | we illustrate how to load and visualize the logs data produced by Dopamine.
22 | 
23 | ## Visualizing with Tensorboard
24 | In this
25 | [colab](https://colab.research.google.com/github/google/dopamine/blob/master/dopamine/colab/tensorboard.ipynb)
26 | we illustrate how to download and visualize different agents with Tensorboard.
27 | 


--------------------------------------------------------------------------------
/dopamine/utils/test_utils.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2018 The Dopamine Authors.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #      http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | """Common testing utilities shared across agents."""
15 | 
16 | from __future__ import absolute_import
17 | from __future__ import division
18 | from __future__ import print_function
19 | 
20 | 
21 | 
22 | import mock
23 | import tensorflow as tf
24 | 
25 | 
26 | class MockReplayBuffer(object):
27 |   """Mock ReplayBuffer to verify the way the agent interacts with it."""
28 | 
29 |   def __init__(self):
30 |     with tf.variable_scope('MockReplayBuffer', reuse=tf.AUTO_REUSE):
31 |       self.add = mock.Mock()
32 |       self.memory = mock.Mock()
33 |       self.memory.add_count = 0
34 | 


--------------------------------------------------------------------------------
/docs/api_docs/python/rainbow_agent.md:
--------------------------------------------------------------------------------
 1 | <div itemscope itemtype="http://developers.google.com/ReferenceObject">
 2 | <meta itemprop="name" content="rainbow_agent" />
 3 | <meta itemprop="path" content="stable" />
 4 | </div>
 5 | 
 6 | # Module: rainbow_agent
 7 | 
 8 | Compact implementation of a simplified Rainbow agent.
 9 | 
10 | Specifically, we implement the following components from Rainbow:
11 | 
12 | *   n-step updates;
13 | *   prioritized replay; and
14 | *   distributional RL.
15 | 
16 | These three components were found to significantly impact the performance of the
17 | Atari game-playing agent.
18 | 
19 | Furthermore, our implementation does away with some minor hyperparameter
20 | choices. Specifically, we
21 | 
22 | *   keep the beta exponent fixed at beta=0.5, rather than increase it linearly;
23 | *   remove the alpha parameter, which was set to alpha=0.5 throughout the paper.
24 | 
25 | Details in "Rainbow: Combining Improvements in Deep Reinforcement Learning" by
26 | Hessel et al. (2018).
27 | 
28 | ## Classes
29 | 
30 | [`class RainbowAgent`](./rainbow_agent/RainbowAgent.md): A compact
31 | implementation of a simplified Rainbow agent.
32 | 
33 | ## Functions
34 | 
35 | [`project_distribution(...)`](./rainbow_agent/project_distribution.md): Projects
36 | a batch of (support, weights) onto target_support.
37 | 


--------------------------------------------------------------------------------
/docs/api_docs/python/iteration_statistics/IterationStatistics.md:
--------------------------------------------------------------------------------
 1 | <div itemscope itemtype="http://developers.google.com/ReferenceObject">
 2 | <meta itemprop="name" content="iteration_statistics.IterationStatistics" />
 3 | <meta itemprop="path" content="stable" />
 4 | <meta itemprop="property" content="__init__"/>
 5 | <meta itemprop="property" content="append"/>
 6 | </div>
 7 | 
 8 | # iteration_statistics.IterationStatistics
 9 | 
10 | ## Class `IterationStatistics`
11 | 
12 | A class for storing iteration-specific metrics.
13 | 
14 | The internal format is as follows: we maintain a mapping from keys to lists.
15 | Each list contains all the values corresponding to the given key.
16 | 
17 | For example, self.data_lists['train_episode_returns'] might contain the
18 | per-episode returns achieved during this iteration.
19 | 
20 | #### Attributes:
21 | 
22 | *   <b>`data_lists`</b>: dict mapping each metric_name (str) to a list of said
23 |     metric across episodes.
24 | 
25 | ## Methods
26 | 
27 | <h3 id="__init__"><code>__init__</code></h3>
28 | 
29 | ```python
30 | __init__()
31 | ```
32 | 
33 | <h3 id="append"><code>append</code></h3>
34 | 
35 | ```python
36 | append(data_pairs)
37 | ```
38 | 
39 | Add the given values to their corresponding key-indexed lists.
40 | 
41 | #### Args:
42 | 
43 | *   <b>`data_pairs`</b>: A dictionary of key-value pairs to be recorded.
44 | 


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
 1 | # How to Contribute
 2 | 
 3 | # Issues
 4 | 
 5 | * Please tag your issue with `bug`, `feature request`, or `question` to help us
 6 |   effectively respond.
 7 | * Please include the version of Dopamine you are running
 8 |   (run `pip list | grep dopamine`)
 9 | * Please provide the command line you ran as well as the log output.
10 | 
11 | # Pull Requests
12 | 
13 | We'd love to accept your patches and contributions to this project. There are
14 | just a few small guidelines you need to follow.
15 | 
16 | ## Contributor License Agreement
17 | 
18 | Contributions to this project must be accompanied by a Contributor License
19 | Agreement. You (or your employer) retain the copyright to your contribution,
20 | this simply gives us permission to use and redistribute your contributions as
21 | part of the project. Head over to <https://cla.developers.google.com/> to see
22 | your current agreements on file or to sign a new one.
23 | 
24 | You generally only need to submit a CLA once, so if you've already submitted one
25 | (even if it was for a different project), you probably don't need to do it
26 | again.
27 | 
28 | ## Code reviews
29 | 
30 | All submissions, including submissions by project members, require review. We
31 | use GitHub pull requests for this purpose. Consult
32 | [GitHub Help](https://help.github.com/articles/about-pull-requests/) for more
33 | information on using pull requests.
34 | 


--------------------------------------------------------------------------------
/docs/api_docs/python/run_experiment/TrainRunner.md:
--------------------------------------------------------------------------------
 1 | <div itemscope itemtype="http://developers.google.com/ReferenceObject">
 2 | <meta itemprop="name" content="run_experiment.TrainRunner" />
 3 | <meta itemprop="path" content="stable" />
 4 | <meta itemprop="property" content="__init__"/>
 5 | <meta itemprop="property" content="run_experiment"/>
 6 | </div>
 7 | 
 8 | # run_experiment.TrainRunner
 9 | 
10 | ## Class `TrainRunner`
11 | 
12 | Inherits From: [`Runner`](../run_experiment/Runner.md)
13 | 
14 | Object that handles running Atari 2600 experiments.
15 | 
16 | The `TrainRunner` differs from the base `Runner` class in that it does not the
17 | evaluation phase. Checkpointing and logging for the train phase are preserved as
18 | before.
19 | 
20 | ## Methods
21 | 
22 | <h3 id="__init__"><code>__init__</code></h3>
23 | 
24 | ```python
25 | __init__(
26 |     *args,
27 |     **kwargs
28 | )
29 | ```
30 | 
31 | Initialize the TrainRunner object in charge of running a full experiment.
32 | 
33 | #### Args:
34 | 
35 | *   <b>`base_dir`</b>: str, the base directory to host all required
36 |     sub-directories.
37 | *   <b>`create_agent_fn`</b>: A function that takes as args a Tensorflow session
38 |     and an Atari 2600 Gym environment, and returns an agent.
39 | 
40 | <h3 id="run_experiment"><code>run_experiment</code></h3>
41 | 
42 | ```python
43 | run_experiment()
44 | ```
45 | 
46 | Runs a full experiment, spread over multiple iterations.
47 | 


--------------------------------------------------------------------------------
/dopamine/agents/dqn/configs/dqn_icml.gin:
--------------------------------------------------------------------------------
 1 | # Hyperparameters used for reporting DQN results in Bellemare et al. (2017).
 2 | import dopamine.atari.run_experiment
 3 | import dopamine.agents.dqn.dqn_agent
 4 | import dopamine.replay_memory.circular_replay_buffer
 5 | import gin.tf.external_configurables
 6 | 
 7 | DQNAgent.gamma = 0.99
 8 | DQNAgent.update_horizon = 1
 9 | DQNAgent.min_replay_history = 50000  # agent steps
10 | DQNAgent.update_period = 4
11 | DQNAgent.target_update_period = 10000  # agent steps
12 | DQNAgent.epsilon_train = 0.01
13 | DQNAgent.epsilon_eval = 0.001
14 | DQNAgent.epsilon_decay_period = 1000000  # agent steps
15 | DQNAgent.tf_device = '/gpu:0'  # use '/cpu:*' for non-GPU version
16 | DQNAgent.optimizer = @tf.train.RMSPropOptimizer()
17 | 
18 | tf.train.RMSPropOptimizer.learning_rate = 0.00025
19 | tf.train.RMSPropOptimizer.decay = 0.95
20 | tf.train.RMSPropOptimizer.momentum = 0.0
21 | tf.train.RMSPropOptimizer.epsilon = 0.00001
22 | tf.train.RMSPropOptimizer.centered = True
23 | 
24 | Runner.game_name = 'Pong'
25 | # Deterministic ALE version used in the DQN Nature paper (Mnih et al., 2015).
26 | Runner.sticky_actions = False
27 | Runner.num_iterations = 200
28 | Runner.training_steps = 250000  # agent steps
29 | Runner.evaluation_steps = 125000  # agent steps
30 | Runner.max_steps_per_episode = 27000  # agent steps
31 | 
32 | WrappedReplayBuffer.replay_capacity = 1000000
33 | WrappedReplayBuffer.batch_size = 32
34 | 


--------------------------------------------------------------------------------
/dopamine/agents/dqn/configs/dqn_nature.gin:
--------------------------------------------------------------------------------
 1 | # Hyperparameters used in Mnih et al. (2015).
 2 | import dopamine.atari.preprocessing
 3 | import dopamine.atari.run_experiment
 4 | import dopamine.agents.dqn.dqn_agent
 5 | import dopamine.replay_memory.circular_replay_buffer
 6 | import gin.tf.external_configurables
 7 | 
 8 | DQNAgent.gamma = 0.99
 9 | DQNAgent.update_horizon = 1
10 | DQNAgent.min_replay_history = 50000  # agent steps
11 | DQNAgent.update_period = 4
12 | DQNAgent.target_update_period = 10000  # agent steps
13 | DQNAgent.epsilon_train = 0.1
14 | DQNAgent.epsilon_eval = 0.05
15 | DQNAgent.epsilon_decay_period = 1000000  # agent steps
16 | DQNAgent.tf_device = '/gpu:0'  # use '/cpu:*' for non-GPU version
17 | DQNAgent.optimizer = @tf.train.RMSPropOptimizer()
18 | 
19 | tf.train.RMSPropOptimizer.learning_rate = 0.00025
20 | tf.train.RMSPropOptimizer.decay = 0.95
21 | tf.train.RMSPropOptimizer.momentum = 0.0
22 | tf.train.RMSPropOptimizer.epsilon = 0.00001
23 | tf.train.RMSPropOptimizer.centered = True
24 | 
25 | Runner.game_name = 'Pong'
26 | # Deterministic ALE version used in the DQN Nature paper (Mnih et al., 2015).
27 | Runner.sticky_actions = False
28 | Runner.num_iterations = 200
29 | Runner.training_steps = 250000  # agent steps
30 | Runner.evaluation_steps = 125000  # agent steps
31 | Runner.max_steps_per_episode = 27000  # agent steps
32 | 
33 | AtariPreprocessing.terminal_on_life_loss = True
34 | 
35 | WrappedReplayBuffer.replay_capacity = 1000000
36 | WrappedReplayBuffer.batch_size = 32
37 | 


--------------------------------------------------------------------------------
/dopamine/agents/dqn/configs/dqn.gin:
--------------------------------------------------------------------------------
 1 | # Hyperparameters follow the classic Nature DQN, but we modify as necessary to
 2 | # match those used in Rainbow (Hessel et al., 2018), to ensure apples-to-apples
 3 | # comparison.
 4 | import dopamine.atari.run_experiment
 5 | import dopamine.agents.dqn.dqn_agent
 6 | import dopamine.replay_memory.circular_replay_buffer
 7 | import gin.tf.external_configurables
 8 | 
 9 | DQNAgent.gamma = 0.99
10 | DQNAgent.update_horizon = 1
11 | DQNAgent.min_replay_history = 20000  # agent steps
12 | DQNAgent.update_period = 4
13 | DQNAgent.target_update_period = 8000  # agent steps
14 | DQNAgent.epsilon_train = 0.01
15 | DQNAgent.epsilon_eval = 0.001
16 | DQNAgent.epsilon_decay_period = 250000  # agent steps
17 | DQNAgent.tf_device = '/gpu:0'  # use '/cpu:*' for non-GPU version
18 | DQNAgent.optimizer = @tf.train.RMSPropOptimizer()
19 | 
20 | tf.train.RMSPropOptimizer.learning_rate = 0.00025
21 | tf.train.RMSPropOptimizer.decay = 0.95
22 | tf.train.RMSPropOptimizer.momentum = 0.0
23 | tf.train.RMSPropOptimizer.epsilon = 0.00001
24 | tf.train.RMSPropOptimizer.centered = True
25 | 
26 | Runner.game_name = 'Pong'
27 | # Sticky actions with probability 0.25, as suggested by (Machado et al., 2017).
28 | Runner.sticky_actions = True
29 | Runner.num_iterations = 200
30 | Runner.training_steps = 250000  # agent steps
31 | Runner.evaluation_steps = 125000  # agent steps
32 | Runner.max_steps_per_episode = 27000  # agent steps
33 | 
34 | WrappedReplayBuffer.replay_capacity = 1000000
35 | WrappedReplayBuffer.batch_size = 32
36 | 


--------------------------------------------------------------------------------
/dopamine/agents/implicit_quantile/configs/implicit_quantile_icml.gin:
--------------------------------------------------------------------------------
 1 | # Hyperparameters follow Dabney et al. (2018)
 2 | import dopamine.agents.implicit_quantile.implicit_quantile_agent
 3 | import dopamine.agents.rainbow.rainbow_agent
 4 | import dopamine.atari.run_experiment
 5 | import dopamine.replay_memory.prioritized_replay_buffer
 6 | import gin.tf.external_configurables
 7 | 
 8 | ImplicitQuantileAgent.kappa = 1.0
 9 | ImplicitQuantileAgent.num_tau_samples = 64
10 | ImplicitQuantileAgent.num_tau_prime_samples = 64
11 | ImplicitQuantileAgent.num_quantile_samples = 32
12 | RainbowAgent.gamma = 0.99
13 | RainbowAgent.update_horizon = 1
14 | RainbowAgent.min_replay_history = 50000 # agent steps
15 | RainbowAgent.update_period = 4
16 | RainbowAgent.target_update_period = 10000 # agent steps
17 | RainbowAgent.epsilon_train = 0.01
18 | RainbowAgent.epsilon_eval = 0.001
19 | RainbowAgent.epsilon_decay_period = 1000000 # agent steps
20 | RainbowAgent.replay_scheme = 'uniform'
21 | RainbowAgent.tf_device = '/gpu:0'  # '/cpu:*' use for non-GPU version
22 | RainbowAgent.optimizer = @tf.train.AdamOptimizer()
23 | 
24 | tf.train.AdamOptimizer.learning_rate = 0.00005
25 | tf.train.AdamOptimizer.epsilon = 0.0003125
26 | 
27 | Runner.game_name = 'Pong'
28 | Runner.sticky_actions = False
29 | Runner.num_iterations = 200
30 | Runner.training_steps = 250000
31 | Runner.evaluation_steps = 125000
32 | Runner.max_steps_per_episode = 27000
33 | 
34 | WrappedPrioritizedReplayBuffer.replay_capacity = 1000000
35 | WrappedPrioritizedReplayBuffer.batch_size = 32
36 | 


--------------------------------------------------------------------------------
/baselines/data/alien.vg.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "$schema": "https://vega.github.io/schema/vega-lite/v2.json",
 3 |   "width": 800,
 4 |   "height": 400,
 5 |   "padding": 5,
 6 | 
 7 |   "data": {"url": "data/alien.json"},
 8 |   "transform": [
 9 |     {
10 |       "filter": {
11 |         "field": "Agent",
12 |         "oneOf": ["DQN", "C51", "RAINBOW", "IMPLICIT_QUANTILE"]
13 |       }
14 |     }
15 |   ],
16 |   "layer": [
17 |     {
18 |       "mark": "area",
19 |       "encoding": {
20 |         "x": {
21 |           "field": "Iteration",
22 |           "type": "quantitative"
23 |         },
24 |         "y": {
25 |           "aggregate": "ci0",
26 |           "field": "Value",
27 |           "type": "quantitative",
28 |           "axis": {"title": "Return (95% CIs)"}
29 |         },
30 |         "y2": {
31 |           "aggregate": "ci1",
32 |           "field": "Value",
33 |           "type": "quantitative"
34 |         },
35 |         "color": {
36 |           "field": "Agent",
37 |           "type": "nominal"
38 |         },
39 |         "opacity": {"value": 0.3}
40 |       }
41 |     },
42 |     {
43 |       "mark": "line",
44 |       "encoding": {
45 |         "x": {
46 |           "field": "Iteration",
47 |           "type": "quantitative"
48 |         },
49 |         "y": {
50 |           "aggregate": "mean",
51 |           "field": "Value",
52 |           "type": "quantitative"
53 |         },
54 |         "color": {
55 |           "field": "Agent",
56 |           "type": "nominal"
57 |         }
58 |       }
59 |     }
60 |   ]
61 | }
62 | 
63 | 


--------------------------------------------------------------------------------
/baselines/data/hero.vg.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "$schema": "https://vega.github.io/schema/vega-lite/v2.json",
 3 |   "width": 800,
 4 |   "height": 400,
 5 |   "padding": 5,
 6 | 
 7 |   "data": {"url": "data/hero.json"},
 8 |   "transform": [
 9 |     {
10 |       "filter": {
11 |         "field": "Agent",
12 |         "oneOf": ["DQN", "C51", "RAINBOW", "IMPLICIT_QUANTILE"]
13 |       }
14 |     }
15 |   ],
16 |   "layer": [
17 |     {
18 |       "mark": "area",
19 |       "encoding": {
20 |         "x": {
21 |           "field": "Iteration",
22 |           "type": "quantitative"
23 |         },
24 |         "y": {
25 |           "aggregate": "ci0",
26 |           "field": "Value",
27 |           "type": "quantitative",
28 |           "axis": {"title": "Return (95% CIs)"}
29 |         },
30 |         "y2": {
31 |           "aggregate": "ci1",
32 |           "field": "Value",
33 |           "type": "quantitative"
34 |         },
35 |         "color": {
36 |           "field": "Agent",
37 |           "type": "nominal"
38 |         },
39 |         "opacity": {"value": 0.3}
40 |       }
41 |     },
42 |     {
43 |       "mark": "line",
44 |       "encoding": {
45 |         "x": {
46 |           "field": "Iteration",
47 |           "type": "quantitative"
48 |         },
49 |         "y": {
50 |           "aggregate": "mean",
51 |           "field": "Value",
52 |           "type": "quantitative"
53 |         },
54 |         "color": {
55 |           "field": "Agent",
56 |           "type": "nominal"
57 |         }
58 |       }
59 |     }
60 |   ]
61 | }
62 | 
63 | 


--------------------------------------------------------------------------------
/baselines/data/krull.vg.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "$schema": "https://vega.github.io/schema/vega-lite/v2.json",
 3 |   "width": 800,
 4 |   "height": 400,
 5 |   "padding": 5,
 6 | 
 7 |   "data": {"url": "data/krull.json"},
 8 |   "transform": [
 9 |     {
10 |       "filter": {
11 |         "field": "Agent",
12 |         "oneOf": ["DQN", "C51", "RAINBOW", "IMPLICIT_QUANTILE"]
13 |       }
14 |     }
15 |   ],
16 |   "layer": [
17 |     {
18 |       "mark": "area",
19 |       "encoding": {
20 |         "x": {
21 |           "field": "Iteration",
22 |           "type": "quantitative"
23 |         },
24 |         "y": {
25 |           "aggregate": "ci0",
26 |           "field": "Value",
27 |           "type": "quantitative",
28 |           "axis": {"title": "Return (95% CIs)"}
29 |         },
30 |         "y2": {
31 |           "aggregate": "ci1",
32 |           "field": "Value",
33 |           "type": "quantitative"
34 |         },
35 |         "color": {
36 |           "field": "Agent",
37 |           "type": "nominal"
38 |         },
39 |         "opacity": {"value": 0.3}
40 |       }
41 |     },
42 |     {
43 |       "mark": "line",
44 |       "encoding": {
45 |         "x": {
46 |           "field": "Iteration",
47 |           "type": "quantitative"
48 |         },
49 |         "y": {
50 |           "aggregate": "mean",
51 |           "field": "Value",
52 |           "type": "quantitative"
53 |         },
54 |         "color": {
55 |           "field": "Agent",
56 |           "type": "nominal"
57 |         }
58 |       }
59 |     }
60 |   ]
61 | }
62 | 
63 | 


--------------------------------------------------------------------------------
/baselines/data/pong.vg.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "$schema": "https://vega.github.io/schema/vega-lite/v2.json",
 3 |   "width": 800,
 4 |   "height": 400,
 5 |   "padding": 5,
 6 | 
 7 |   "data": {"url": "data/pong.json"},
 8 |   "transform": [
 9 |     {
10 |       "filter": {
11 |         "field": "Agent",
12 |         "oneOf": ["DQN", "C51", "RAINBOW", "IMPLICIT_QUANTILE"]
13 |       }
14 |     }
15 |   ],
16 |   "layer": [
17 |     {
18 |       "mark": "area",
19 |       "encoding": {
20 |         "x": {
21 |           "field": "Iteration",
22 |           "type": "quantitative"
23 |         },
24 |         "y": {
25 |           "aggregate": "ci0",
26 |           "field": "Value",
27 |           "type": "quantitative",
28 |           "axis": {"title": "Return (95% CIs)"}
29 |         },
30 |         "y2": {
31 |           "aggregate": "ci1",
32 |           "field": "Value",
33 |           "type": "quantitative"
34 |         },
35 |         "color": {
36 |           "field": "Agent",
37 |           "type": "nominal"
38 |         },
39 |         "opacity": {"value": 0.3}
40 |       }
41 |     },
42 |     {
43 |       "mark": "line",
44 |       "encoding": {
45 |         "x": {
46 |           "field": "Iteration",
47 |           "type": "quantitative"
48 |         },
49 |         "y": {
50 |           "aggregate": "mean",
51 |           "field": "Value",
52 |           "type": "quantitative"
53 |         },
54 |         "color": {
55 |           "field": "Agent",
56 |           "type": "nominal"
57 |         }
58 |       }
59 |     }
60 |   ]
61 | }
62 | 
63 | 


--------------------------------------------------------------------------------
/baselines/data/qbert.vg.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "$schema": "https://vega.github.io/schema/vega-lite/v2.json",
 3 |   "width": 800,
 4 |   "height": 400,
 5 |   "padding": 5,
 6 | 
 7 |   "data": {"url": "data/qbert.json"},
 8 |   "transform": [
 9 |     {
10 |       "filter": {
11 |         "field": "Agent",
12 |         "oneOf": ["DQN", "C51", "RAINBOW", "IMPLICIT_QUANTILE"]
13 |       }
14 |     }
15 |   ],
16 |   "layer": [
17 |     {
18 |       "mark": "area",
19 |       "encoding": {
20 |         "x": {
21 |           "field": "Iteration",
22 |           "type": "quantitative"
23 |         },
24 |         "y": {
25 |           "aggregate": "ci0",
26 |           "field": "Value",
27 |           "type": "quantitative",
28 |           "axis": {"title": "Return (95% CIs)"}
29 |         },
30 |         "y2": {
31 |           "aggregate": "ci1",
32 |           "field": "Value",
33 |           "type": "quantitative"
34 |         },
35 |         "color": {
36 |           "field": "Agent",
37 |           "type": "nominal"
38 |         },
39 |         "opacity": {"value": 0.3}
40 |       }
41 |     },
42 |     {
43 |       "mark": "line",
44 |       "encoding": {
45 |         "x": {
46 |           "field": "Iteration",
47 |           "type": "quantitative"
48 |         },
49 |         "y": {
50 |           "aggregate": "mean",
51 |           "field": "Value",
52 |           "type": "quantitative"
53 |         },
54 |         "color": {
55 |           "field": "Agent",
56 |           "type": "nominal"
57 |         }
58 |       }
59 |     }
60 |   ]
61 | }
62 | 
63 | 


--------------------------------------------------------------------------------
/baselines/data/airraid.vg.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "$schema": "https://vega.github.io/schema/vega-lite/v2.json",
 3 |   "width": 800,
 4 |   "height": 400,
 5 |   "padding": 5,
 6 | 
 7 |   "data": {"url": "data/airraid.json"},
 8 |   "transform": [
 9 |     {
10 |       "filter": {
11 |         "field": "Agent",
12 |         "oneOf": ["DQN", "C51", "RAINBOW", "IMPLICIT_QUANTILE"]
13 |       }
14 |     }
15 |   ],
16 |   "layer": [
17 |     {
18 |       "mark": "area",
19 |       "encoding": {
20 |         "x": {
21 |           "field": "Iteration",
22 |           "type": "quantitative"
23 |         },
24 |         "y": {
25 |           "aggregate": "ci0",
26 |           "field": "Value",
27 |           "type": "quantitative",
28 |           "axis": {"title": "Return (95% CIs)"}
29 |         },
30 |         "y2": {
31 |           "aggregate": "ci1",
32 |           "field": "Value",
33 |           "type": "quantitative"
34 |         },
35 |         "color": {
36 |           "field": "Agent",
37 |           "type": "nominal"
38 |         },
39 |         "opacity": {"value": 0.3}
40 |       }
41 |     },
42 |     {
43 |       "mark": "line",
44 |       "encoding": {
45 |         "x": {
46 |           "field": "Iteration",
47 |           "type": "quantitative"
48 |         },
49 |         "y": {
50 |           "aggregate": "mean",
51 |           "field": "Value",
52 |           "type": "quantitative"
53 |         },
54 |         "color": {
55 |           "field": "Agent",
56 |           "type": "nominal"
57 |         }
58 |       }
59 |     }
60 |   ]
61 | }
62 | 
63 | 


--------------------------------------------------------------------------------
/baselines/data/amidar.vg.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "$schema": "https://vega.github.io/schema/vega-lite/v2.json",
 3 |   "width": 800,
 4 |   "height": 400,
 5 |   "padding": 5,
 6 | 
 7 |   "data": {"url": "data/amidar.json"},
 8 |   "transform": [
 9 |     {
10 |       "filter": {
11 |         "field": "Agent",
12 |         "oneOf": ["DQN", "C51", "RAINBOW", "IMPLICIT_QUANTILE"]
13 |       }
14 |     }
15 |   ],
16 |   "layer": [
17 |     {
18 |       "mark": "area",
19 |       "encoding": {
20 |         "x": {
21 |           "field": "Iteration",
22 |           "type": "quantitative"
23 |         },
24 |         "y": {
25 |           "aggregate": "ci0",
26 |           "field": "Value",
27 |           "type": "quantitative",
28 |           "axis": {"title": "Return (95% CIs)"}
29 |         },
30 |         "y2": {
31 |           "aggregate": "ci1",
32 |           "field": "Value",
33 |           "type": "quantitative"
34 |         },
35 |         "color": {
36 |           "field": "Agent",
37 |           "type": "nominal"
38 |         },
39 |         "opacity": {"value": 0.3}
40 |       }
41 |     },
42 |     {
43 |       "mark": "line",
44 |       "encoding": {
45 |         "x": {
46 |           "field": "Iteration",
47 |           "type": "quantitative"
48 |         },
49 |         "y": {
50 |           "aggregate": "mean",
51 |           "field": "Value",
52 |           "type": "quantitative"
53 |         },
54 |         "color": {
55 |           "field": "Agent",
56 |           "type": "nominal"
57 |         }
58 |       }
59 |     }
60 |   ]
61 | }
62 | 
63 | 


--------------------------------------------------------------------------------
/baselines/data/assault.vg.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "$schema": "https://vega.github.io/schema/vega-lite/v2.json",
 3 |   "width": 800,
 4 |   "height": 400,
 5 |   "padding": 5,
 6 | 
 7 |   "data": {"url": "data/assault.json"},
 8 |   "transform": [
 9 |     {
10 |       "filter": {
11 |         "field": "Agent",
12 |         "oneOf": ["DQN", "C51", "RAINBOW", "IMPLICIT_QUANTILE"]
13 |       }
14 |     }
15 |   ],
16 |   "layer": [
17 |     {
18 |       "mark": "area",
19 |       "encoding": {
20 |         "x": {
21 |           "field": "Iteration",
22 |           "type": "quantitative"
23 |         },
24 |         "y": {
25 |           "aggregate": "ci0",
26 |           "field": "Value",
27 |           "type": "quantitative",
28 |           "axis": {"title": "Return (95% CIs)"}
29 |         },
30 |         "y2": {
31 |           "aggregate": "ci1",
32 |           "field": "Value",
33 |           "type": "quantitative"
34 |         },
35 |         "color": {
36 |           "field": "Agent",
37 |           "type": "nominal"
38 |         },
39 |         "opacity": {"value": 0.3}
40 |       }
41 |     },
42 |     {
43 |       "mark": "line",
44 |       "encoding": {
45 |         "x": {
46 |           "field": "Iteration",
47 |           "type": "quantitative"
48 |         },
49 |         "y": {
50 |           "aggregate": "mean",
51 |           "field": "Value",
52 |           "type": "quantitative"
53 |         },
54 |         "color": {
55 |           "field": "Agent",
56 |           "type": "nominal"
57 |         }
58 |       }
59 |     }
60 |   ]
61 | }
62 | 
63 | 


--------------------------------------------------------------------------------
/baselines/data/asterix.vg.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "$schema": "https://vega.github.io/schema/vega-lite/v2.json",
 3 |   "width": 800,
 4 |   "height": 400,
 5 |   "padding": 5,
 6 | 
 7 |   "data": {"url": "data/asterix.json"},
 8 |   "transform": [
 9 |     {
10 |       "filter": {
11 |         "field": "Agent",
12 |         "oneOf": ["DQN", "C51", "RAINBOW", "IMPLICIT_QUANTILE"]
13 |       }
14 |     }
15 |   ],
16 |   "layer": [
17 |     {
18 |       "mark": "area",
19 |       "encoding": {
20 |         "x": {
21 |           "field": "Iteration",
22 |           "type": "quantitative"
23 |         },
24 |         "y": {
25 |           "aggregate": "ci0",
26 |           "field": "Value",
27 |           "type": "quantitative",
28 |           "axis": {"title": "Return (95% CIs)"}
29 |         },
30 |         "y2": {
31 |           "aggregate": "ci1",
32 |           "field": "Value",
33 |           "type": "quantitative"
34 |         },
35 |         "color": {
36 |           "field": "Agent",
37 |           "type": "nominal"
38 |         },
39 |         "opacity": {"value": 0.3}
40 |       }
41 |     },
42 |     {
43 |       "mark": "line",
44 |       "encoding": {
45 |         "x": {
46 |           "field": "Iteration",
47 |           "type": "quantitative"
48 |         },
49 |         "y": {
50 |           "aggregate": "mean",
51 |           "field": "Value",
52 |           "type": "quantitative"
53 |         },
54 |         "color": {
55 |           "field": "Agent",
56 |           "type": "nominal"
57 |         }
58 |       }
59 |     }
60 |   ]
61 | }
62 | 
63 | 


--------------------------------------------------------------------------------
/baselines/data/atlantis.vg.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "$schema": "https://vega.github.io/schema/vega-lite/v2.json",
 3 |   "width": 800,
 4 |   "height": 400,
 5 |   "padding": 5,
 6 | 
 7 |   "data": {"url": "data/atlantis.json"},
 8 |   "transform": [
 9 |     {
10 |       "filter": {
11 |         "field": "Agent",
12 |         "oneOf": ["DQN", "C51", "RAINBOW", "IMPLICIT_QUANTILE"]
13 |       }
14 |     }
15 |   ],
16 |   "layer": [
17 |     {
18 |       "mark": "area",
19 |       "encoding": {
20 |         "x": {
21 |           "field": "Iteration",
22 |           "type": "quantitative"
23 |         },
24 |         "y": {
25 |           "aggregate": "ci0",
26 |           "field": "Value",
27 |           "type": "quantitative",
28 |           "axis": {"title": "Return (95% CIs)"}
29 |         },
30 |         "y2": {
31 |           "aggregate": "ci1",
32 |           "field": "Value",
33 |           "type": "quantitative"
34 |         },
35 |         "color": {
36 |           "field": "Agent",
37 |           "type": "nominal"
38 |         },
39 |         "opacity": {"value": 0.3}
40 |       }
41 |     },
42 |     {
43 |       "mark": "line",
44 |       "encoding": {
45 |         "x": {
46 |           "field": "Iteration",
47 |           "type": "quantitative"
48 |         },
49 |         "y": {
50 |           "aggregate": "mean",
51 |           "field": "Value",
52 |           "type": "quantitative"
53 |         },
54 |         "color": {
55 |           "field": "Agent",
56 |           "type": "nominal"
57 |         }
58 |       }
59 |     }
60 |   ]
61 | }
62 | 
63 | 


--------------------------------------------------------------------------------
/baselines/data/berzerk.vg.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "$schema": "https://vega.github.io/schema/vega-lite/v2.json",
 3 |   "width": 800,
 4 |   "height": 400,
 5 |   "padding": 5,
 6 | 
 7 |   "data": {"url": "data/berzerk.json"},
 8 |   "transform": [
 9 |     {
10 |       "filter": {
11 |         "field": "Agent",
12 |         "oneOf": ["DQN", "C51", "RAINBOW", "IMPLICIT_QUANTILE"]
13 |       }
14 |     }
15 |   ],
16 |   "layer": [
17 |     {
18 |       "mark": "area",
19 |       "encoding": {
20 |         "x": {
21 |           "field": "Iteration",
22 |           "type": "quantitative"
23 |         },
24 |         "y": {
25 |           "aggregate": "ci0",
26 |           "field": "Value",
27 |           "type": "quantitative",
28 |           "axis": {"title": "Return (95% CIs)"}
29 |         },
30 |         "y2": {
31 |           "aggregate": "ci1",
32 |           "field": "Value",
33 |           "type": "quantitative"
34 |         },
35 |         "color": {
36 |           "field": "Agent",
37 |           "type": "nominal"
38 |         },
39 |         "opacity": {"value": 0.3}
40 |       }
41 |     },
42 |     {
43 |       "mark": "line",
44 |       "encoding": {
45 |         "x": {
46 |           "field": "Iteration",
47 |           "type": "quantitative"
48 |         },
49 |         "y": {
50 |           "aggregate": "mean",
51 |           "field": "Value",
52 |           "type": "quantitative"
53 |         },
54 |         "color": {
55 |           "field": "Agent",
56 |           "type": "nominal"
57 |         }
58 |       }
59 |     }
60 |   ]
61 | }
62 | 
63 | 


--------------------------------------------------------------------------------
/baselines/data/bowling.vg.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "$schema": "https://vega.github.io/schema/vega-lite/v2.json",
 3 |   "width": 800,
 4 |   "height": 400,
 5 |   "padding": 5,
 6 | 
 7 |   "data": {"url": "data/bowling.json"},
 8 |   "transform": [
 9 |     {
10 |       "filter": {
11 |         "field": "Agent",
12 |         "oneOf": ["DQN", "C51", "RAINBOW", "IMPLICIT_QUANTILE"]
13 |       }
14 |     }
15 |   ],
16 |   "layer": [
17 |     {
18 |       "mark": "area",
19 |       "encoding": {
20 |         "x": {
21 |           "field": "Iteration",
22 |           "type": "quantitative"
23 |         },
24 |         "y": {
25 |           "aggregate": "ci0",
26 |           "field": "Value",
27 |           "type": "quantitative",
28 |           "axis": {"title": "Return (95% CIs)"}
29 |         },
30 |         "y2": {
31 |           "aggregate": "ci1",
32 |           "field": "Value",
33 |           "type": "quantitative"
34 |         },
35 |         "color": {
36 |           "field": "Agent",
37 |           "type": "nominal"
38 |         },
39 |         "opacity": {"value": 0.3}
40 |       }
41 |     },
42 |     {
43 |       "mark": "line",
44 |       "encoding": {
45 |         "x": {
46 |           "field": "Iteration",
47 |           "type": "quantitative"
48 |         },
49 |         "y": {
50 |           "aggregate": "mean",
51 |           "field": "Value",
52 |           "type": "quantitative"
53 |         },
54 |         "color": {
55 |           "field": "Agent",
56 |           "type": "nominal"
57 |         }
58 |       }
59 |     }
60 |   ]
61 | }
62 | 
63 | 


--------------------------------------------------------------------------------
/baselines/data/boxing.vg.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "$schema": "https://vega.github.io/schema/vega-lite/v2.json",
 3 |   "width": 800,
 4 |   "height": 400,
 5 |   "padding": 5,
 6 | 
 7 |   "data": {"url": "data/boxing.json"},
 8 |   "transform": [
 9 |     {
10 |       "filter": {
11 |         "field": "Agent",
12 |         "oneOf": ["DQN", "C51", "RAINBOW", "IMPLICIT_QUANTILE"]
13 |       }
14 |     }
15 |   ],
16 |   "layer": [
17 |     {
18 |       "mark": "area",
19 |       "encoding": {
20 |         "x": {
21 |           "field": "Iteration",
22 |           "type": "quantitative"
23 |         },
24 |         "y": {
25 |           "aggregate": "ci0",
26 |           "field": "Value",
27 |           "type": "quantitative",
28 |           "axis": {"title": "Return (95% CIs)"}
29 |         },
30 |         "y2": {
31 |           "aggregate": "ci1",
32 |           "field": "Value",
33 |           "type": "quantitative"
34 |         },
35 |         "color": {
36 |           "field": "Agent",
37 |           "type": "nominal"
38 |         },
39 |         "opacity": {"value": 0.3}
40 |       }
41 |     },
42 |     {
43 |       "mark": "line",
44 |       "encoding": {
45 |         "x": {
46 |           "field": "Iteration",
47 |           "type": "quantitative"
48 |         },
49 |         "y": {
50 |           "aggregate": "mean",
51 |           "field": "Value",
52 |           "type": "quantitative"
53 |         },
54 |         "color": {
55 |           "field": "Agent",
56 |           "type": "nominal"
57 |         }
58 |       }
59 |     }
60 |   ]
61 | }
62 | 
63 | 


--------------------------------------------------------------------------------
/baselines/data/breakout.vg.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "$schema": "https://vega.github.io/schema/vega-lite/v2.json",
 3 |   "width": 800,
 4 |   "height": 400,
 5 |   "padding": 5,
 6 | 
 7 |   "data": {"url": "data/breakout.json"},
 8 |   "transform": [
 9 |     {
10 |       "filter": {
11 |         "field": "Agent",
12 |         "oneOf": ["DQN", "C51", "RAINBOW", "IMPLICIT_QUANTILE"]
13 |       }
14 |     }
15 |   ],
16 |   "layer": [
17 |     {
18 |       "mark": "area",
19 |       "encoding": {
20 |         "x": {
21 |           "field": "Iteration",
22 |           "type": "quantitative"
23 |         },
24 |         "y": {
25 |           "aggregate": "ci0",
26 |           "field": "Value",
27 |           "type": "quantitative",
28 |           "axis": {"title": "Return (95% CIs)"}
29 |         },
30 |         "y2": {
31 |           "aggregate": "ci1",
32 |           "field": "Value",
33 |           "type": "quantitative"
34 |         },
35 |         "color": {
36 |           "field": "Agent",
37 |           "type": "nominal"
38 |         },
39 |         "opacity": {"value": 0.3}
40 |       }
41 |     },
42 |     {
43 |       "mark": "line",
44 |       "encoding": {
45 |         "x": {
46 |           "field": "Iteration",
47 |           "type": "quantitative"
48 |         },
49 |         "y": {
50 |           "aggregate": "mean",
51 |           "field": "Value",
52 |           "type": "quantitative"
53 |         },
54 |         "color": {
55 |           "field": "Agent",
56 |           "type": "nominal"
57 |         }
58 |       }
59 |     }
60 |   ]
61 | }
62 | 
63 | 


--------------------------------------------------------------------------------
/baselines/data/carnival.vg.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "$schema": "https://vega.github.io/schema/vega-lite/v2.json",
 3 |   "width": 800,
 4 |   "height": 400,
 5 |   "padding": 5,
 6 | 
 7 |   "data": {"url": "data/carnival.json"},
 8 |   "transform": [
 9 |     {
10 |       "filter": {
11 |         "field": "Agent",
12 |         "oneOf": ["DQN", "C51", "RAINBOW", "IMPLICIT_QUANTILE"]
13 |       }
14 |     }
15 |   ],
16 |   "layer": [
17 |     {
18 |       "mark": "area",
19 |       "encoding": {
20 |         "x": {
21 |           "field": "Iteration",
22 |           "type": "quantitative"
23 |         },
24 |         "y": {
25 |           "aggregate": "ci0",
26 |           "field": "Value",
27 |           "type": "quantitative",
28 |           "axis": {"title": "Return (95% CIs)"}
29 |         },
30 |         "y2": {
31 |           "aggregate": "ci1",
32 |           "field": "Value",
33 |           "type": "quantitative"
34 |         },
35 |         "color": {
36 |           "field": "Agent",
37 |           "type": "nominal"
38 |         },
39 |         "opacity": {"value": 0.3}
40 |       }
41 |     },
42 |     {
43 |       "mark": "line",
44 |       "encoding": {
45 |         "x": {
46 |           "field": "Iteration",
47 |           "type": "quantitative"
48 |         },
49 |         "y": {
50 |           "aggregate": "mean",
51 |           "field": "Value",
52 |           "type": "quantitative"
53 |         },
54 |         "color": {
55 |           "field": "Agent",
56 |           "type": "nominal"
57 |         }
58 |       }
59 |     }
60 |   ]
61 | }
62 | 
63 | 


--------------------------------------------------------------------------------
/baselines/data/enduro.vg.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "$schema": "https://vega.github.io/schema/vega-lite/v2.json",
 3 |   "width": 800,
 4 |   "height": 400,
 5 |   "padding": 5,
 6 | 
 7 |   "data": {"url": "data/enduro.json"},
 8 |   "transform": [
 9 |     {
10 |       "filter": {
11 |         "field": "Agent",
12 |         "oneOf": ["DQN", "C51", "RAINBOW", "IMPLICIT_QUANTILE"]
13 |       }
14 |     }
15 |   ],
16 |   "layer": [
17 |     {
18 |       "mark": "area",
19 |       "encoding": {
20 |         "x": {
21 |           "field": "Iteration",
22 |           "type": "quantitative"
23 |         },
24 |         "y": {
25 |           "aggregate": "ci0",
26 |           "field": "Value",
27 |           "type": "quantitative",
28 |           "axis": {"title": "Return (95% CIs)"}
29 |         },
30 |         "y2": {
31 |           "aggregate": "ci1",
32 |           "field": "Value",
33 |           "type": "quantitative"
34 |         },
35 |         "color": {
36 |           "field": "Agent",
37 |           "type": "nominal"
38 |         },
39 |         "opacity": {"value": 0.3}
40 |       }
41 |     },
42 |     {
43 |       "mark": "line",
44 |       "encoding": {
45 |         "x": {
46 |           "field": "Iteration",
47 |           "type": "quantitative"
48 |         },
49 |         "y": {
50 |           "aggregate": "mean",
51 |           "field": "Value",
52 |           "type": "quantitative"
53 |         },
54 |         "color": {
55 |           "field": "Agent",
56 |           "type": "nominal"
57 |         }
58 |       }
59 |     }
60 |   ]
61 | }
62 | 
63 | 


--------------------------------------------------------------------------------
/baselines/data/freeway.vg.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "$schema": "https://vega.github.io/schema/vega-lite/v2.json",
 3 |   "width": 800,
 4 |   "height": 400,
 5 |   "padding": 5,
 6 | 
 7 |   "data": {"url": "data/freeway.json"},
 8 |   "transform": [
 9 |     {
10 |       "filter": {
11 |         "field": "Agent",
12 |         "oneOf": ["DQN", "C51", "RAINBOW", "IMPLICIT_QUANTILE"]
13 |       }
14 |     }
15 |   ],
16 |   "layer": [
17 |     {
18 |       "mark": "area",
19 |       "encoding": {
20 |         "x": {
21 |           "field": "Iteration",
22 |           "type": "quantitative"
23 |         },
24 |         "y": {
25 |           "aggregate": "ci0",
26 |           "field": "Value",
27 |           "type": "quantitative",
28 |           "axis": {"title": "Return (95% CIs)"}
29 |         },
30 |         "y2": {
31 |           "aggregate": "ci1",
32 |           "field": "Value",
33 |           "type": "quantitative"
34 |         },
35 |         "color": {
36 |           "field": "Agent",
37 |           "type": "nominal"
38 |         },
39 |         "opacity": {"value": 0.3}
40 |       }
41 |     },
42 |     {
43 |       "mark": "line",
44 |       "encoding": {
45 |         "x": {
46 |           "field": "Iteration",
47 |           "type": "quantitative"
48 |         },
49 |         "y": {
50 |           "aggregate": "mean",
51 |           "field": "Value",
52 |           "type": "quantitative"
53 |         },
54 |         "color": {
55 |           "field": "Agent",
56 |           "type": "nominal"
57 |         }
58 |       }
59 |     }
60 |   ]
61 | }
62 | 
63 | 


--------------------------------------------------------------------------------
/baselines/data/gopher.vg.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "$schema": "https://vega.github.io/schema/vega-lite/v2.json",
 3 |   "width": 800,
 4 |   "height": 400,
 5 |   "padding": 5,
 6 | 
 7 |   "data": {"url": "data/gopher.json"},
 8 |   "transform": [
 9 |     {
10 |       "filter": {
11 |         "field": "Agent",
12 |         "oneOf": ["DQN", "C51", "RAINBOW", "IMPLICIT_QUANTILE"]
13 |       }
14 |     }
15 |   ],
16 |   "layer": [
17 |     {
18 |       "mark": "area",
19 |       "encoding": {
20 |         "x": {
21 |           "field": "Iteration",
22 |           "type": "quantitative"
23 |         },
24 |         "y": {
25 |           "aggregate": "ci0",
26 |           "field": "Value",
27 |           "type": "quantitative",
28 |           "axis": {"title": "Return (95% CIs)"}
29 |         },
30 |         "y2": {
31 |           "aggregate": "ci1",
32 |           "field": "Value",
33 |           "type": "quantitative"
34 |         },
35 |         "color": {
36 |           "field": "Agent",
37 |           "type": "nominal"
38 |         },
39 |         "opacity": {"value": 0.3}
40 |       }
41 |     },
42 |     {
43 |       "mark": "line",
44 |       "encoding": {
45 |         "x": {
46 |           "field": "Iteration",
47 |           "type": "quantitative"
48 |         },
49 |         "y": {
50 |           "aggregate": "mean",
51 |           "field": "Value",
52 |           "type": "quantitative"
53 |         },
54 |         "color": {
55 |           "field": "Agent",
56 |           "type": "nominal"
57 |         }
58 |       }
59 |     }
60 |   ]
61 | }
62 | 
63 | 


--------------------------------------------------------------------------------
/baselines/data/gravitar.vg.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "$schema": "https://vega.github.io/schema/vega-lite/v2.json",
 3 |   "width": 800,
 4 |   "height": 400,
 5 |   "padding": 5,
 6 | 
 7 |   "data": {"url": "data/gravitar.json"},
 8 |   "transform": [
 9 |     {
10 |       "filter": {
11 |         "field": "Agent",
12 |         "oneOf": ["DQN", "C51", "RAINBOW", "IMPLICIT_QUANTILE"]
13 |       }
14 |     }
15 |   ],
16 |   "layer": [
17 |     {
18 |       "mark": "area",
19 |       "encoding": {
20 |         "x": {
21 |           "field": "Iteration",
22 |           "type": "quantitative"
23 |         },
24 |         "y": {
25 |           "aggregate": "ci0",
26 |           "field": "Value",
27 |           "type": "quantitative",
28 |           "axis": {"title": "Return (95% CIs)"}
29 |         },
30 |         "y2": {
31 |           "aggregate": "ci1",
32 |           "field": "Value",
33 |           "type": "quantitative"
34 |         },
35 |         "color": {
36 |           "field": "Agent",
37 |           "type": "nominal"
38 |         },
39 |         "opacity": {"value": 0.3}
40 |       }
41 |     },
42 |     {
43 |       "mark": "line",
44 |       "encoding": {
45 |         "x": {
46 |           "field": "Iteration",
47 |           "type": "quantitative"
48 |         },
49 |         "y": {
50 |           "aggregate": "mean",
51 |           "field": "Value",
52 |           "type": "quantitative"
53 |         },
54 |         "color": {
55 |           "field": "Agent",
56 |           "type": "nominal"
57 |         }
58 |       }
59 |     }
60 |   ]
61 | }
62 | 
63 | 


--------------------------------------------------------------------------------
/baselines/data/kangaroo.vg.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "$schema": "https://vega.github.io/schema/vega-lite/v2.json",
 3 |   "width": 800,
 4 |   "height": 400,
 5 |   "padding": 5,
 6 | 
 7 |   "data": {"url": "data/kangaroo.json"},
 8 |   "transform": [
 9 |     {
10 |       "filter": {
11 |         "field": "Agent",
12 |         "oneOf": ["DQN", "C51", "RAINBOW", "IMPLICIT_QUANTILE"]
13 |       }
14 |     }
15 |   ],
16 |   "layer": [
17 |     {
18 |       "mark": "area",
19 |       "encoding": {
20 |         "x": {
21 |           "field": "Iteration",
22 |           "type": "quantitative"
23 |         },
24 |         "y": {
25 |           "aggregate": "ci0",
26 |           "field": "Value",
27 |           "type": "quantitative",
28 |           "axis": {"title": "Return (95% CIs)"}
29 |         },
30 |         "y2": {
31 |           "aggregate": "ci1",
32 |           "field": "Value",
33 |           "type": "quantitative"
34 |         },
35 |         "color": {
36 |           "field": "Agent",
37 |           "type": "nominal"
38 |         },
39 |         "opacity": {"value": 0.3}
40 |       }
41 |     },
42 |     {
43 |       "mark": "line",
44 |       "encoding": {
45 |         "x": {
46 |           "field": "Iteration",
47 |           "type": "quantitative"
48 |         },
49 |         "y": {
50 |           "aggregate": "mean",
51 |           "field": "Value",
52 |           "type": "quantitative"
53 |         },
54 |         "color": {
55 |           "field": "Agent",
56 |           "type": "nominal"
57 |         }
58 |       }
59 |     }
60 |   ]
61 | }
62 | 
63 | 


--------------------------------------------------------------------------------
/baselines/data/mspacman.vg.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "$schema": "https://vega.github.io/schema/vega-lite/v2.json",
 3 |   "width": 800,
 4 |   "height": 400,
 5 |   "padding": 5,
 6 | 
 7 |   "data": {"url": "data/mspacman.json"},
 8 |   "transform": [
 9 |     {
10 |       "filter": {
11 |         "field": "Agent",
12 |         "oneOf": ["DQN", "C51", "RAINBOW", "IMPLICIT_QUANTILE"]
13 |       }
14 |     }
15 |   ],
16 |   "layer": [
17 |     {
18 |       "mark": "area",
19 |       "encoding": {
20 |         "x": {
21 |           "field": "Iteration",
22 |           "type": "quantitative"
23 |         },
24 |         "y": {
25 |           "aggregate": "ci0",
26 |           "field": "Value",
27 |           "type": "quantitative",
28 |           "axis": {"title": "Return (95% CIs)"}
29 |         },
30 |         "y2": {
31 |           "aggregate": "ci1",
32 |           "field": "Value",
33 |           "type": "quantitative"
34 |         },
35 |         "color": {
36 |           "field": "Agent",
37 |           "type": "nominal"
38 |         },
39 |         "opacity": {"value": 0.3}
40 |       }
41 |     },
42 |     {
43 |       "mark": "line",
44 |       "encoding": {
45 |         "x": {
46 |           "field": "Iteration",
47 |           "type": "quantitative"
48 |         },
49 |         "y": {
50 |           "aggregate": "mean",
51 |           "field": "Value",
52 |           "type": "quantitative"
53 |         },
54 |         "color": {
55 |           "field": "Agent",
56 |           "type": "nominal"
57 |         }
58 |       }
59 |     }
60 |   ]
61 | }
62 | 
63 | 


--------------------------------------------------------------------------------
/baselines/data/phoenix.vg.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "$schema": "https://vega.github.io/schema/vega-lite/v2.json",
 3 |   "width": 800,
 4 |   "height": 400,
 5 |   "padding": 5,
 6 | 
 7 |   "data": {"url": "data/phoenix.json"},
 8 |   "transform": [
 9 |     {
10 |       "filter": {
11 |         "field": "Agent",
12 |         "oneOf": ["DQN", "C51", "RAINBOW", "IMPLICIT_QUANTILE"]
13 |       }
14 |     }
15 |   ],
16 |   "layer": [
17 |     {
18 |       "mark": "area",
19 |       "encoding": {
20 |         "x": {
21 |           "field": "Iteration",
22 |           "type": "quantitative"
23 |         },
24 |         "y": {
25 |           "aggregate": "ci0",
26 |           "field": "Value",
27 |           "type": "quantitative",
28 |           "axis": {"title": "Return (95% CIs)"}
29 |         },
30 |         "y2": {
31 |           "aggregate": "ci1",
32 |           "field": "Value",
33 |           "type": "quantitative"
34 |         },
35 |         "color": {
36 |           "field": "Agent",
37 |           "type": "nominal"
38 |         },
39 |         "opacity": {"value": 0.3}
40 |       }
41 |     },
42 |     {
43 |       "mark": "line",
44 |       "encoding": {
45 |         "x": {
46 |           "field": "Iteration",
47 |           "type": "quantitative"
48 |         },
49 |         "y": {
50 |           "aggregate": "mean",
51 |           "field": "Value",
52 |           "type": "quantitative"
53 |         },
54 |         "color": {
55 |           "field": "Agent",
56 |           "type": "nominal"
57 |         }
58 |       }
59 |     }
60 |   ]
61 | }
62 | 
63 | 


--------------------------------------------------------------------------------
/baselines/data/pitfall.vg.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "$schema": "https://vega.github.io/schema/vega-lite/v2.json",
 3 |   "width": 800,
 4 |   "height": 400,
 5 |   "padding": 5,
 6 | 
 7 |   "data": {"url": "data/pitfall.json"},
 8 |   "transform": [
 9 |     {
10 |       "filter": {
11 |         "field": "Agent",
12 |         "oneOf": ["DQN", "C51", "RAINBOW", "IMPLICIT_QUANTILE"]
13 |       }
14 |     }
15 |   ],
16 |   "layer": [
17 |     {
18 |       "mark": "area",
19 |       "encoding": {
20 |         "x": {
21 |           "field": "Iteration",
22 |           "type": "quantitative"
23 |         },
24 |         "y": {
25 |           "aggregate": "ci0",
26 |           "field": "Value",
27 |           "type": "quantitative",
28 |           "axis": {"title": "Return (95% CIs)"}
29 |         },
30 |         "y2": {
31 |           "aggregate": "ci1",
32 |           "field": "Value",
33 |           "type": "quantitative"
34 |         },
35 |         "color": {
36 |           "field": "Agent",
37 |           "type": "nominal"
38 |         },
39 |         "opacity": {"value": 0.3}
40 |       }
41 |     },
42 |     {
43 |       "mark": "line",
44 |       "encoding": {
45 |         "x": {
46 |           "field": "Iteration",
47 |           "type": "quantitative"
48 |         },
49 |         "y": {
50 |           "aggregate": "mean",
51 |           "field": "Value",
52 |           "type": "quantitative"
53 |         },
54 |         "color": {
55 |           "field": "Agent",
56 |           "type": "nominal"
57 |         }
58 |       }
59 |     }
60 |   ]
61 | }
62 | 
63 | 


--------------------------------------------------------------------------------
/baselines/data/pooyan.vg.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "$schema": "https://vega.github.io/schema/vega-lite/v2.json",
 3 |   "width": 800,
 4 |   "height": 400,
 5 |   "padding": 5,
 6 | 
 7 |   "data": {"url": "data/pooyan.json"},
 8 |   "transform": [
 9 |     {
10 |       "filter": {
11 |         "field": "Agent",
12 |         "oneOf": ["DQN", "C51", "RAINBOW", "IMPLICIT_QUANTILE"]
13 |       }
14 |     }
15 |   ],
16 |   "layer": [
17 |     {
18 |       "mark": "area",
19 |       "encoding": {
20 |         "x": {
21 |           "field": "Iteration",
22 |           "type": "quantitative"
23 |         },
24 |         "y": {
25 |           "aggregate": "ci0",
26 |           "field": "Value",
27 |           "type": "quantitative",
28 |           "axis": {"title": "Return (95% CIs)"}
29 |         },
30 |         "y2": {
31 |           "aggregate": "ci1",
32 |           "field": "Value",
33 |           "type": "quantitative"
34 |         },
35 |         "color": {
36 |           "field": "Agent",
37 |           "type": "nominal"
38 |         },
39 |         "opacity": {"value": 0.3}
40 |       }
41 |     },
42 |     {
43 |       "mark": "line",
44 |       "encoding": {
45 |         "x": {
46 |           "field": "Iteration",
47 |           "type": "quantitative"
48 |         },
49 |         "y": {
50 |           "aggregate": "mean",
51 |           "field": "Value",
52 |           "type": "quantitative"
53 |         },
54 |         "color": {
55 |           "field": "Agent",
56 |           "type": "nominal"
57 |         }
58 |       }
59 |     }
60 |   ]
61 | }
62 | 
63 | 


--------------------------------------------------------------------------------
/baselines/data/robotank.vg.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "$schema": "https://vega.github.io/schema/vega-lite/v2.json",
 3 |   "width": 800,
 4 |   "height": 400,
 5 |   "padding": 5,
 6 | 
 7 |   "data": {"url": "data/robotank.json"},
 8 |   "transform": [
 9 |     {
10 |       "filter": {
11 |         "field": "Agent",
12 |         "oneOf": ["DQN", "C51", "RAINBOW", "IMPLICIT_QUANTILE"]
13 |       }
14 |     }
15 |   ],
16 |   "layer": [
17 |     {
18 |       "mark": "area",
19 |       "encoding": {
20 |         "x": {
21 |           "field": "Iteration",
22 |           "type": "quantitative"
23 |         },
24 |         "y": {
25 |           "aggregate": "ci0",
26 |           "field": "Value",
27 |           "type": "quantitative",
28 |           "axis": {"title": "Return (95% CIs)"}
29 |         },
30 |         "y2": {
31 |           "aggregate": "ci1",
32 |           "field": "Value",
33 |           "type": "quantitative"
34 |         },
35 |         "color": {
36 |           "field": "Agent",
37 |           "type": "nominal"
38 |         },
39 |         "opacity": {"value": 0.3}
40 |       }
41 |     },
42 |     {
43 |       "mark": "line",
44 |       "encoding": {
45 |         "x": {
46 |           "field": "Iteration",
47 |           "type": "quantitative"
48 |         },
49 |         "y": {
50 |           "aggregate": "mean",
51 |           "field": "Value",
52 |           "type": "quantitative"
53 |         },
54 |         "color": {
55 |           "field": "Agent",
56 |           "type": "nominal"
57 |         }
58 |       }
59 |     }
60 |   ]
61 | }
62 | 
63 | 


--------------------------------------------------------------------------------
/baselines/data/seaquest.vg.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "$schema": "https://vega.github.io/schema/vega-lite/v2.json",
 3 |   "width": 800,
 4 |   "height": 400,
 5 |   "padding": 5,
 6 | 
 7 |   "data": {"url": "data/seaquest.json"},
 8 |   "transform": [
 9 |     {
10 |       "filter": {
11 |         "field": "Agent",
12 |         "oneOf": ["DQN", "C51", "RAINBOW", "IMPLICIT_QUANTILE"]
13 |       }
14 |     }
15 |   ],
16 |   "layer": [
17 |     {
18 |       "mark": "area",
19 |       "encoding": {
20 |         "x": {
21 |           "field": "Iteration",
22 |           "type": "quantitative"
23 |         },
24 |         "y": {
25 |           "aggregate": "ci0",
26 |           "field": "Value",
27 |           "type": "quantitative",
28 |           "axis": {"title": "Return (95% CIs)"}
29 |         },
30 |         "y2": {
31 |           "aggregate": "ci1",
32 |           "field": "Value",
33 |           "type": "quantitative"
34 |         },
35 |         "color": {
36 |           "field": "Agent",
37 |           "type": "nominal"
38 |         },
39 |         "opacity": {"value": 0.3}
40 |       }
41 |     },
42 |     {
43 |       "mark": "line",
44 |       "encoding": {
45 |         "x": {
46 |           "field": "Iteration",
47 |           "type": "quantitative"
48 |         },
49 |         "y": {
50 |           "aggregate": "mean",
51 |           "field": "Value",
52 |           "type": "quantitative"
53 |         },
54 |         "color": {
55 |           "field": "Agent",
56 |           "type": "nominal"
57 |         }
58 |       }
59 |     }
60 |   ]
61 | }
62 | 
63 | 


--------------------------------------------------------------------------------
/baselines/data/skiing.vg.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "$schema": "https://vega.github.io/schema/vega-lite/v2.json",
 3 |   "width": 800,
 4 |   "height": 400,
 5 |   "padding": 5,
 6 | 
 7 |   "data": {"url": "data/skiing.json"},
 8 |   "transform": [
 9 |     {
10 |       "filter": {
11 |         "field": "Agent",
12 |         "oneOf": ["DQN", "C51", "RAINBOW", "IMPLICIT_QUANTILE"]
13 |       }
14 |     }
15 |   ],
16 |   "layer": [
17 |     {
18 |       "mark": "area",
19 |       "encoding": {
20 |         "x": {
21 |           "field": "Iteration",
22 |           "type": "quantitative"
23 |         },
24 |         "y": {
25 |           "aggregate": "ci0",
26 |           "field": "Value",
27 |           "type": "quantitative",
28 |           "axis": {"title": "Return (95% CIs)"}
29 |         },
30 |         "y2": {
31 |           "aggregate": "ci1",
32 |           "field": "Value",
33 |           "type": "quantitative"
34 |         },
35 |         "color": {
36 |           "field": "Agent",
37 |           "type": "nominal"
38 |         },
39 |         "opacity": {"value": 0.3}
40 |       }
41 |     },
42 |     {
43 |       "mark": "line",
44 |       "encoding": {
45 |         "x": {
46 |           "field": "Iteration",
47 |           "type": "quantitative"
48 |         },
49 |         "y": {
50 |           "aggregate": "mean",
51 |           "field": "Value",
52 |           "type": "quantitative"
53 |         },
54 |         "color": {
55 |           "field": "Agent",
56 |           "type": "nominal"
57 |         }
58 |       }
59 |     }
60 |   ]
61 | }
62 | 
63 | 


--------------------------------------------------------------------------------
/baselines/data/solaris.vg.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "$schema": "https://vega.github.io/schema/vega-lite/v2.json",
 3 |   "width": 800,
 4 |   "height": 400,
 5 |   "padding": 5,
 6 | 
 7 |   "data": {"url": "data/solaris.json"},
 8 |   "transform": [
 9 |     {
10 |       "filter": {
11 |         "field": "Agent",
12 |         "oneOf": ["DQN", "C51", "RAINBOW", "IMPLICIT_QUANTILE"]
13 |       }
14 |     }
15 |   ],
16 |   "layer": [
17 |     {
18 |       "mark": "area",
19 |       "encoding": {
20 |         "x": {
21 |           "field": "Iteration",
22 |           "type": "quantitative"
23 |         },
24 |         "y": {
25 |           "aggregate": "ci0",
26 |           "field": "Value",
27 |           "type": "quantitative",
28 |           "axis": {"title": "Return (95% CIs)"}
29 |         },
30 |         "y2": {
31 |           "aggregate": "ci1",
32 |           "field": "Value",
33 |           "type": "quantitative"
34 |         },
35 |         "color": {
36 |           "field": "Agent",
37 |           "type": "nominal"
38 |         },
39 |         "opacity": {"value": 0.3}
40 |       }
41 |     },
42 |     {
43 |       "mark": "line",
44 |       "encoding": {
45 |         "x": {
46 |           "field": "Iteration",
47 |           "type": "quantitative"
48 |         },
49 |         "y": {
50 |           "aggregate": "mean",
51 |           "field": "Value",
52 |           "type": "quantitative"
53 |         },
54 |         "color": {
55 |           "field": "Agent",
56 |           "type": "nominal"
57 |         }
58 |       }
59 |     }
60 |   ]
61 | }
62 | 
63 | 


--------------------------------------------------------------------------------
/baselines/data/tennis.vg.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "$schema": "https://vega.github.io/schema/vega-lite/v2.json",
 3 |   "width": 800,
 4 |   "height": 400,
 5 |   "padding": 5,
 6 | 
 7 |   "data": {"url": "data/tennis.json"},
 8 |   "transform": [
 9 |     {
10 |       "filter": {
11 |         "field": "Agent",
12 |         "oneOf": ["DQN", "C51", "RAINBOW", "IMPLICIT_QUANTILE"]
13 |       }
14 |     }
15 |   ],
16 |   "layer": [
17 |     {
18 |       "mark": "area",
19 |       "encoding": {
20 |         "x": {
21 |           "field": "Iteration",
22 |           "type": "quantitative"
23 |         },
24 |         "y": {
25 |           "aggregate": "ci0",
26 |           "field": "Value",
27 |           "type": "quantitative",
28 |           "axis": {"title": "Return (95% CIs)"}
29 |         },
30 |         "y2": {
31 |           "aggregate": "ci1",
32 |           "field": "Value",
33 |           "type": "quantitative"
34 |         },
35 |         "color": {
36 |           "field": "Agent",
37 |           "type": "nominal"
38 |         },
39 |         "opacity": {"value": 0.3}
40 |       }
41 |     },
42 |     {
43 |       "mark": "line",
44 |       "encoding": {
45 |         "x": {
46 |           "field": "Iteration",
47 |           "type": "quantitative"
48 |         },
49 |         "y": {
50 |           "aggregate": "mean",
51 |           "field": "Value",
52 |           "type": "quantitative"
53 |         },
54 |         "color": {
55 |           "field": "Agent",
56 |           "type": "nominal"
57 |         }
58 |       }
59 |     }
60 |   ]
61 | }
62 | 
63 | 


--------------------------------------------------------------------------------
/baselines/data/upndown.vg.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "$schema": "https://vega.github.io/schema/vega-lite/v2.json",
 3 |   "width": 800,
 4 |   "height": 400,
 5 |   "padding": 5,
 6 | 
 7 |   "data": {"url": "data/upndown.json"},
 8 |   "transform": [
 9 |     {
10 |       "filter": {
11 |         "field": "Agent",
12 |         "oneOf": ["DQN", "C51", "RAINBOW", "IMPLICIT_QUANTILE"]
13 |       }
14 |     }
15 |   ],
16 |   "layer": [
17 |     {
18 |       "mark": "area",
19 |       "encoding": {
20 |         "x": {
21 |           "field": "Iteration",
22 |           "type": "quantitative"
23 |         },
24 |         "y": {
25 |           "aggregate": "ci0",
26 |           "field": "Value",
27 |           "type": "quantitative",
28 |           "axis": {"title": "Return (95% CIs)"}
29 |         },
30 |         "y2": {
31 |           "aggregate": "ci1",
32 |           "field": "Value",
33 |           "type": "quantitative"
34 |         },
35 |         "color": {
36 |           "field": "Agent",
37 |           "type": "nominal"
38 |         },
39 |         "opacity": {"value": 0.3}
40 |       }
41 |     },
42 |     {
43 |       "mark": "line",
44 |       "encoding": {
45 |         "x": {
46 |           "field": "Iteration",
47 |           "type": "quantitative"
48 |         },
49 |         "y": {
50 |           "aggregate": "mean",
51 |           "field": "Value",
52 |           "type": "quantitative"
53 |         },
54 |         "color": {
55 |           "field": "Agent",
56 |           "type": "nominal"
57 |         }
58 |       }
59 |     }
60 |   ]
61 | }
62 | 
63 | 


--------------------------------------------------------------------------------
/baselines/data/venture.vg.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "$schema": "https://vega.github.io/schema/vega-lite/v2.json",
 3 |   "width": 800,
 4 |   "height": 400,
 5 |   "padding": 5,
 6 | 
 7 |   "data": {"url": "data/venture.json"},
 8 |   "transform": [
 9 |     {
10 |       "filter": {
11 |         "field": "Agent",
12 |         "oneOf": ["DQN", "C51", "RAINBOW", "IMPLICIT_QUANTILE"]
13 |       }
14 |     }
15 |   ],
16 |   "layer": [
17 |     {
18 |       "mark": "area",
19 |       "encoding": {
20 |         "x": {
21 |           "field": "Iteration",
22 |           "type": "quantitative"
23 |         },
24 |         "y": {
25 |           "aggregate": "ci0",
26 |           "field": "Value",
27 |           "type": "quantitative",
28 |           "axis": {"title": "Return (95% CIs)"}
29 |         },
30 |         "y2": {
31 |           "aggregate": "ci1",
32 |           "field": "Value",
33 |           "type": "quantitative"
34 |         },
35 |         "color": {
36 |           "field": "Agent",
37 |           "type": "nominal"
38 |         },
39 |         "opacity": {"value": 0.3}
40 |       }
41 |     },
42 |     {
43 |       "mark": "line",
44 |       "encoding": {
45 |         "x": {
46 |           "field": "Iteration",
47 |           "type": "quantitative"
48 |         },
49 |         "y": {
50 |           "aggregate": "mean",
51 |           "field": "Value",
52 |           "type": "quantitative"
53 |         },
54 |         "color": {
55 |           "field": "Agent",
56 |           "type": "nominal"
57 |         }
58 |       }
59 |     }
60 |   ]
61 | }
62 | 
63 | 


--------------------------------------------------------------------------------
/baselines/data/zaxxon.vg.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "$schema": "https://vega.github.io/schema/vega-lite/v2.json",
 3 |   "width": 800,
 4 |   "height": 400,
 5 |   "padding": 5,
 6 | 
 7 |   "data": {"url": "data/zaxxon.json"},
 8 |   "transform": [
 9 |     {
10 |       "filter": {
11 |         "field": "Agent",
12 |         "oneOf": ["DQN", "C51", "RAINBOW", "IMPLICIT_QUANTILE"]
13 |       }
14 |     }
15 |   ],
16 |   "layer": [
17 |     {
18 |       "mark": "area",
19 |       "encoding": {
20 |         "x": {
21 |           "field": "Iteration",
22 |           "type": "quantitative"
23 |         },
24 |         "y": {
25 |           "aggregate": "ci0",
26 |           "field": "Value",
27 |           "type": "quantitative",
28 |           "axis": {"title": "Return (95% CIs)"}
29 |         },
30 |         "y2": {
31 |           "aggregate": "ci1",
32 |           "field": "Value",
33 |           "type": "quantitative"
34 |         },
35 |         "color": {
36 |           "field": "Agent",
37 |           "type": "nominal"
38 |         },
39 |         "opacity": {"value": 0.3}
40 |       }
41 |     },
42 |     {
43 |       "mark": "line",
44 |       "encoding": {
45 |         "x": {
46 |           "field": "Iteration",
47 |           "type": "quantitative"
48 |         },
49 |         "y": {
50 |           "aggregate": "mean",
51 |           "field": "Value",
52 |           "type": "quantitative"
53 |         },
54 |         "color": {
55 |           "field": "Agent",
56 |           "type": "nominal"
57 |         }
58 |       }
59 |     }
60 |   ]
61 | }
62 | 
63 | 


--------------------------------------------------------------------------------
/dopamine/agents/rainbow/configs/c51_icml.gin:
--------------------------------------------------------------------------------
 1 | # Hyperparameters used in Bellemare et al. (2017).
 2 | import dopamine.atari.preprocessing
 3 | import dopamine.agents.rainbow.rainbow_agent
 4 | import dopamine.atari.run_experiment
 5 | import dopamine.replay_memory.prioritized_replay_buffer
 6 | import gin.tf.external_configurables
 7 | 
 8 | RainbowAgent.num_atoms = 51
 9 | RainbowAgent.vmax = 10.
10 | RainbowAgent.gamma = 0.99
11 | RainbowAgent.update_horizon = 1
12 | RainbowAgent.min_replay_history = 50000  # agent steps
13 | RainbowAgent.update_period = 4
14 | RainbowAgent.target_update_period = 10000  # agent steps
15 | RainbowAgent.epsilon_train = 0.01
16 | RainbowAgent.epsilon_eval = 0.001
17 | RainbowAgent.epsilon_decay_period = 1000000  # agent steps
18 | RainbowAgent.replay_scheme = 'uniform'
19 | RainbowAgent.tf_device = '/gpu:0'  # use '/cpu:*' for non-GPU version
20 | RainbowAgent.optimizer = @tf.train.AdamOptimizer()
21 | 
22 | tf.train.AdamOptimizer.learning_rate = 0.00025
23 | tf.train.AdamOptimizer.epsilon = 0.0003125
24 | 
25 | Runner.game_name = 'Pong'
26 | # Deterministic ALE version used in the DQN Nature paper (Mnih et al., 2015).
27 | Runner.sticky_actions = False
28 | Runner.num_iterations = 200
29 | Runner.training_steps = 250000  # agent steps
30 | Runner.evaluation_steps = 125000  # agent steps
31 | Runner.max_steps_per_episode = 27000  # agent steps
32 | 
33 | AtariPreprocessing.terminal_on_life_loss = True
34 | 
35 | WrappedPrioritizedReplayBuffer.replay_capacity = 1000000
36 | WrappedPrioritizedReplayBuffer.batch_size = 32
37 | 


--------------------------------------------------------------------------------
/baselines/data/asteroids.vg.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "$schema": "https://vega.github.io/schema/vega-lite/v2.json",
 3 |   "width": 800,
 4 |   "height": 400,
 5 |   "padding": 5,
 6 | 
 7 |   "data": {"url": "data/asteroids.json"},
 8 |   "transform": [
 9 |     {
10 |       "filter": {
11 |         "field": "Agent",
12 |         "oneOf": ["DQN", "C51", "RAINBOW", "IMPLICIT_QUANTILE"]
13 |       }
14 |     }
15 |   ],
16 |   "layer": [
17 |     {
18 |       "mark": "area",
19 |       "encoding": {
20 |         "x": {
21 |           "field": "Iteration",
22 |           "type": "quantitative"
23 |         },
24 |         "y": {
25 |           "aggregate": "ci0",
26 |           "field": "Value",
27 |           "type": "quantitative",
28 |           "axis": {"title": "Return (95% CIs)"}
29 |         },
30 |         "y2": {
31 |           "aggregate": "ci1",
32 |           "field": "Value",
33 |           "type": "quantitative"
34 |         },
35 |         "color": {
36 |           "field": "Agent",
37 |           "type": "nominal"
38 |         },
39 |         "opacity": {"value": 0.3}
40 |       }
41 |     },
42 |     {
43 |       "mark": "line",
44 |       "encoding": {
45 |         "x": {
46 |           "field": "Iteration",
47 |           "type": "quantitative"
48 |         },
49 |         "y": {
50 |           "aggregate": "mean",
51 |           "field": "Value",
52 |           "type": "quantitative"
53 |         },
54 |         "color": {
55 |           "field": "Agent",
56 |           "type": "nominal"
57 |         }
58 |       }
59 |     }
60 |   ]
61 | }
62 | 
63 | 


--------------------------------------------------------------------------------
/baselines/data/bankheist.vg.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "$schema": "https://vega.github.io/schema/vega-lite/v2.json",
 3 |   "width": 800,
 4 |   "height": 400,
 5 |   "padding": 5,
 6 | 
 7 |   "data": {"url": "data/bankheist.json"},
 8 |   "transform": [
 9 |     {
10 |       "filter": {
11 |         "field": "Agent",
12 |         "oneOf": ["DQN", "C51", "RAINBOW", "IMPLICIT_QUANTILE"]
13 |       }
14 |     }
15 |   ],
16 |   "layer": [
17 |     {
18 |       "mark": "area",
19 |       "encoding": {
20 |         "x": {
21 |           "field": "Iteration",
22 |           "type": "quantitative"
23 |         },
24 |         "y": {
25 |           "aggregate": "ci0",
26 |           "field": "Value",
27 |           "type": "quantitative",
28 |           "axis": {"title": "Return (95% CIs)"}
29 |         },
30 |         "y2": {
31 |           "aggregate": "ci1",
32 |           "field": "Value",
33 |           "type": "quantitative"
34 |         },
35 |         "color": {
36 |           "field": "Agent",
37 |           "type": "nominal"
38 |         },
39 |         "opacity": {"value": 0.3}
40 |       }
41 |     },
42 |     {
43 |       "mark": "line",
44 |       "encoding": {
45 |         "x": {
46 |           "field": "Iteration",
47 |           "type": "quantitative"
48 |         },
49 |         "y": {
50 |           "aggregate": "mean",
51 |           "field": "Value",
52 |           "type": "quantitative"
53 |         },
54 |         "color": {
55 |           "field": "Agent",
56 |           "type": "nominal"
57 |         }
58 |       }
59 |     }
60 |   ]
61 | }
62 | 
63 | 


--------------------------------------------------------------------------------
/baselines/data/battlezone.vg.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "$schema": "https://vega.github.io/schema/vega-lite/v2.json",
 3 |   "width": 800,
 4 |   "height": 400,
 5 |   "padding": 5,
 6 | 
 7 |   "data": {"url": "data/battlezone.json"},
 8 |   "transform": [
 9 |     {
10 |       "filter": {
11 |         "field": "Agent",
12 |         "oneOf": ["DQN", "C51", "RAINBOW", "IMPLICIT_QUANTILE"]
13 |       }
14 |     }
15 |   ],
16 |   "layer": [
17 |     {
18 |       "mark": "area",
19 |       "encoding": {
20 |         "x": {
21 |           "field": "Iteration",
22 |           "type": "quantitative"
23 |         },
24 |         "y": {
25 |           "aggregate": "ci0",
26 |           "field": "Value",
27 |           "type": "quantitative",
28 |           "axis": {"title": "Return (95% CIs)"}
29 |         },
30 |         "y2": {
31 |           "aggregate": "ci1",
32 |           "field": "Value",
33 |           "type": "quantitative"
34 |         },
35 |         "color": {
36 |           "field": "Agent",
37 |           "type": "nominal"
38 |         },
39 |         "opacity": {"value": 0.3}
40 |       }
41 |     },
42 |     {
43 |       "mark": "line",
44 |       "encoding": {
45 |         "x": {
46 |           "field": "Iteration",
47 |           "type": "quantitative"
48 |         },
49 |         "y": {
50 |           "aggregate": "mean",
51 |           "field": "Value",
52 |           "type": "quantitative"
53 |         },
54 |         "color": {
55 |           "field": "Agent",
56 |           "type": "nominal"
57 |         }
58 |       }
59 |     }
60 |   ]
61 | }
62 | 
63 | 


--------------------------------------------------------------------------------
/baselines/data/beamrider.vg.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "$schema": "https://vega.github.io/schema/vega-lite/v2.json",
 3 |   "width": 800,
 4 |   "height": 400,
 5 |   "padding": 5,
 6 | 
 7 |   "data": {"url": "data/beamrider.json"},
 8 |   "transform": [
 9 |     {
10 |       "filter": {
11 |         "field": "Agent",
12 |         "oneOf": ["DQN", "C51", "RAINBOW", "IMPLICIT_QUANTILE"]
13 |       }
14 |     }
15 |   ],
16 |   "layer": [
17 |     {
18 |       "mark": "area",
19 |       "encoding": {
20 |         "x": {
21 |           "field": "Iteration",
22 |           "type": "quantitative"
23 |         },
24 |         "y": {
25 |           "aggregate": "ci0",
26 |           "field": "Value",
27 |           "type": "quantitative",
28 |           "axis": {"title": "Return (95% CIs)"}
29 |         },
30 |         "y2": {
31 |           "aggregate": "ci1",
32 |           "field": "Value",
33 |           "type": "quantitative"
34 |         },
35 |         "color": {
36 |           "field": "Agent",
37 |           "type": "nominal"
38 |         },
39 |         "opacity": {"value": 0.3}
40 |       }
41 |     },
42 |     {
43 |       "mark": "line",
44 |       "encoding": {
45 |         "x": {
46 |           "field": "Iteration",
47 |           "type": "quantitative"
48 |         },
49 |         "y": {
50 |           "aggregate": "mean",
51 |           "field": "Value",
52 |           "type": "quantitative"
53 |         },
54 |         "color": {
55 |           "field": "Agent",
56 |           "type": "nominal"
57 |         }
58 |       }
59 |     }
60 |   ]
61 | }
62 | 
63 | 


--------------------------------------------------------------------------------
/baselines/data/centipede.vg.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "$schema": "https://vega.github.io/schema/vega-lite/v2.json",
 3 |   "width": 800,
 4 |   "height": 400,
 5 |   "padding": 5,
 6 | 
 7 |   "data": {"url": "data/centipede.json"},
 8 |   "transform": [
 9 |     {
10 |       "filter": {
11 |         "field": "Agent",
12 |         "oneOf": ["DQN", "C51", "RAINBOW", "IMPLICIT_QUANTILE"]
13 |       }
14 |     }
15 |   ],
16 |   "layer": [
17 |     {
18 |       "mark": "area",
19 |       "encoding": {
20 |         "x": {
21 |           "field": "Iteration",
22 |           "type": "quantitative"
23 |         },
24 |         "y": {
25 |           "aggregate": "ci0",
26 |           "field": "Value",
27 |           "type": "quantitative",
28 |           "axis": {"title": "Return (95% CIs)"}
29 |         },
30 |         "y2": {
31 |           "aggregate": "ci1",
32 |           "field": "Value",
33 |           "type": "quantitative"
34 |         },
35 |         "color": {
36 |           "field": "Agent",
37 |           "type": "nominal"
38 |         },
39 |         "opacity": {"value": 0.3}
40 |       }
41 |     },
42 |     {
43 |       "mark": "line",
44 |       "encoding": {
45 |         "x": {
46 |           "field": "Iteration",
47 |           "type": "quantitative"
48 |         },
49 |         "y": {
50 |           "aggregate": "mean",
51 |           "field": "Value",
52 |           "type": "quantitative"
53 |         },
54 |         "color": {
55 |           "field": "Agent",
56 |           "type": "nominal"
57 |         }
58 |       }
59 |     }
60 |   ]
61 | }
62 | 
63 | 


--------------------------------------------------------------------------------
/baselines/data/doubledunk.vg.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "$schema": "https://vega.github.io/schema/vega-lite/v2.json",
 3 |   "width": 800,
 4 |   "height": 400,
 5 |   "padding": 5,
 6 | 
 7 |   "data": {"url": "data/doubledunk.json"},
 8 |   "transform": [
 9 |     {
10 |       "filter": {
11 |         "field": "Agent",
12 |         "oneOf": ["DQN", "C51", "RAINBOW", "IMPLICIT_QUANTILE"]
13 |       }
14 |     }
15 |   ],
16 |   "layer": [
17 |     {
18 |       "mark": "area",
19 |       "encoding": {
20 |         "x": {
21 |           "field": "Iteration",
22 |           "type": "quantitative"
23 |         },
24 |         "y": {
25 |           "aggregate": "ci0",
26 |           "field": "Value",
27 |           "type": "quantitative",
28 |           "axis": {"title": "Return (95% CIs)"}
29 |         },
30 |         "y2": {
31 |           "aggregate": "ci1",
32 |           "field": "Value",
33 |           "type": "quantitative"
34 |         },
35 |         "color": {
36 |           "field": "Agent",
37 |           "type": "nominal"
38 |         },
39 |         "opacity": {"value": 0.3}
40 |       }
41 |     },
42 |     {
43 |       "mark": "line",
44 |       "encoding": {
45 |         "x": {
46 |           "field": "Iteration",
47 |           "type": "quantitative"
48 |         },
49 |         "y": {
50 |           "aggregate": "mean",
51 |           "field": "Value",
52 |           "type": "quantitative"
53 |         },
54 |         "color": {
55 |           "field": "Agent",
56 |           "type": "nominal"
57 |         }
58 |       }
59 |     }
60 |   ]
61 | }
62 | 
63 | 


--------------------------------------------------------------------------------
/baselines/data/frostbite.vg.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "$schema": "https://vega.github.io/schema/vega-lite/v2.json",
 3 |   "width": 800,
 4 |   "height": 400,
 5 |   "padding": 5,
 6 | 
 7 |   "data": {"url": "data/frostbite.json"},
 8 |   "transform": [
 9 |     {
10 |       "filter": {
11 |         "field": "Agent",
12 |         "oneOf": ["DQN", "C51", "RAINBOW", "IMPLICIT_QUANTILE"]
13 |       }
14 |     }
15 |   ],
16 |   "layer": [
17 |     {
18 |       "mark": "area",
19 |       "encoding": {
20 |         "x": {
21 |           "field": "Iteration",
22 |           "type": "quantitative"
23 |         },
24 |         "y": {
25 |           "aggregate": "ci0",
26 |           "field": "Value",
27 |           "type": "quantitative",
28 |           "axis": {"title": "Return (95% CIs)"}
29 |         },
30 |         "y2": {
31 |           "aggregate": "ci1",
32 |           "field": "Value",
33 |           "type": "quantitative"
34 |         },
35 |         "color": {
36 |           "field": "Agent",
37 |           "type": "nominal"
38 |         },
39 |         "opacity": {"value": 0.3}
40 |       }
41 |     },
42 |     {
43 |       "mark": "line",
44 |       "encoding": {
45 |         "x": {
46 |           "field": "Iteration",
47 |           "type": "quantitative"
48 |         },
49 |         "y": {
50 |           "aggregate": "mean",
51 |           "field": "Value",
52 |           "type": "quantitative"
53 |         },
54 |         "color": {
55 |           "field": "Agent",
56 |           "type": "nominal"
57 |         }
58 |       }
59 |     }
60 |   ]
61 | }
62 | 
63 | 


--------------------------------------------------------------------------------
/baselines/data/icehockey.vg.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "$schema": "https://vega.github.io/schema/vega-lite/v2.json",
 3 |   "width": 800,
 4 |   "height": 400,
 5 |   "padding": 5,
 6 | 
 7 |   "data": {"url": "data/icehockey.json"},
 8 |   "transform": [
 9 |     {
10 |       "filter": {
11 |         "field": "Agent",
12 |         "oneOf": ["DQN", "C51", "RAINBOW", "IMPLICIT_QUANTILE"]
13 |       }
14 |     }
15 |   ],
16 |   "layer": [
17 |     {
18 |       "mark": "area",
19 |       "encoding": {
20 |         "x": {
21 |           "field": "Iteration",
22 |           "type": "quantitative"
23 |         },
24 |         "y": {
25 |           "aggregate": "ci0",
26 |           "field": "Value",
27 |           "type": "quantitative",
28 |           "axis": {"title": "Return (95% CIs)"}
29 |         },
30 |         "y2": {
31 |           "aggregate": "ci1",
32 |           "field": "Value",
33 |           "type": "quantitative"
34 |         },
35 |         "color": {
36 |           "field": "Agent",
37 |           "type": "nominal"
38 |         },
39 |         "opacity": {"value": 0.3}
40 |       }
41 |     },
42 |     {
43 |       "mark": "line",
44 |       "encoding": {
45 |         "x": {
46 |           "field": "Iteration",
47 |           "type": "quantitative"
48 |         },
49 |         "y": {
50 |           "aggregate": "mean",
51 |           "field": "Value",
52 |           "type": "quantitative"
53 |         },
54 |         "color": {
55 |           "field": "Agent",
56 |           "type": "nominal"
57 |         }
58 |       }
59 |     }
60 |   ]
61 | }
62 | 
63 | 


--------------------------------------------------------------------------------
/baselines/data/jamesbond.vg.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "$schema": "https://vega.github.io/schema/vega-lite/v2.json",
 3 |   "width": 800,
 4 |   "height": 400,
 5 |   "padding": 5,
 6 | 
 7 |   "data": {"url": "data/jamesbond.json"},
 8 |   "transform": [
 9 |     {
10 |       "filter": {
11 |         "field": "Agent",
12 |         "oneOf": ["DQN", "C51", "RAINBOW", "IMPLICIT_QUANTILE"]
13 |       }
14 |     }
15 |   ],
16 |   "layer": [
17 |     {
18 |       "mark": "area",
19 |       "encoding": {
20 |         "x": {
21 |           "field": "Iteration",
22 |           "type": "quantitative"
23 |         },
24 |         "y": {
25 |           "aggregate": "ci0",
26 |           "field": "Value",
27 |           "type": "quantitative",
28 |           "axis": {"title": "Return (95% CIs)"}
29 |         },
30 |         "y2": {
31 |           "aggregate": "ci1",
32 |           "field": "Value",
33 |           "type": "quantitative"
34 |         },
35 |         "color": {
36 |           "field": "Agent",
37 |           "type": "nominal"
38 |         },
39 |         "opacity": {"value": 0.3}
40 |       }
41 |     },
42 |     {
43 |       "mark": "line",
44 |       "encoding": {
45 |         "x": {
46 |           "field": "Iteration",
47 |           "type": "quantitative"
48 |         },
49 |         "y": {
50 |           "aggregate": "mean",
51 |           "field": "Value",
52 |           "type": "quantitative"
53 |         },
54 |         "color": {
55 |           "field": "Agent",
56 |           "type": "nominal"
57 |         }
58 |       }
59 |     }
60 |   ]
61 | }
62 | 
63 | 


--------------------------------------------------------------------------------
/baselines/data/privateeye.vg.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "$schema": "https://vega.github.io/schema/vega-lite/v2.json",
 3 |   "width": 800,
 4 |   "height": 400,
 5 |   "padding": 5,
 6 | 
 7 |   "data": {"url": "data/privateeye.json"},
 8 |   "transform": [
 9 |     {
10 |       "filter": {
11 |         "field": "Agent",
12 |         "oneOf": ["DQN", "C51", "RAINBOW", "IMPLICIT_QUANTILE"]
13 |       }
14 |     }
15 |   ],
16 |   "layer": [
17 |     {
18 |       "mark": "area",
19 |       "encoding": {
20 |         "x": {
21 |           "field": "Iteration",
22 |           "type": "quantitative"
23 |         },
24 |         "y": {
25 |           "aggregate": "ci0",
26 |           "field": "Value",
27 |           "type": "quantitative",
28 |           "axis": {"title": "Return (95% CIs)"}
29 |         },
30 |         "y2": {
31 |           "aggregate": "ci1",
32 |           "field": "Value",
33 |           "type": "quantitative"
34 |         },
35 |         "color": {
36 |           "field": "Agent",
37 |           "type": "nominal"
38 |         },
39 |         "opacity": {"value": 0.3}
40 |       }
41 |     },
42 |     {
43 |       "mark": "line",
44 |       "encoding": {
45 |         "x": {
46 |           "field": "Iteration",
47 |           "type": "quantitative"
48 |         },
49 |         "y": {
50 |           "aggregate": "mean",
51 |           "field": "Value",
52 |           "type": "quantitative"
53 |         },
54 |         "color": {
55 |           "field": "Agent",
56 |           "type": "nominal"
57 |         }
58 |       }
59 |     }
60 |   ]
61 | }
62 | 
63 | 


--------------------------------------------------------------------------------
/baselines/data/riverraid.vg.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "$schema": "https://vega.github.io/schema/vega-lite/v2.json",
 3 |   "width": 800,
 4 |   "height": 400,
 5 |   "padding": 5,
 6 | 
 7 |   "data": {"url": "data/riverraid.json"},
 8 |   "transform": [
 9 |     {
10 |       "filter": {
11 |         "field": "Agent",
12 |         "oneOf": ["DQN", "C51", "RAINBOW", "IMPLICIT_QUANTILE"]
13 |       }
14 |     }
15 |   ],
16 |   "layer": [
17 |     {
18 |       "mark": "area",
19 |       "encoding": {
20 |         "x": {
21 |           "field": "Iteration",
22 |           "type": "quantitative"
23 |         },
24 |         "y": {
25 |           "aggregate": "ci0",
26 |           "field": "Value",
27 |           "type": "quantitative",
28 |           "axis": {"title": "Return (95% CIs)"}
29 |         },
30 |         "y2": {
31 |           "aggregate": "ci1",
32 |           "field": "Value",
33 |           "type": "quantitative"
34 |         },
35 |         "color": {
36 |           "field": "Agent",
37 |           "type": "nominal"
38 |         },
39 |         "opacity": {"value": 0.3}
40 |       }
41 |     },
42 |     {
43 |       "mark": "line",
44 |       "encoding": {
45 |         "x": {
46 |           "field": "Iteration",
47 |           "type": "quantitative"
48 |         },
49 |         "y": {
50 |           "aggregate": "mean",
51 |           "field": "Value",
52 |           "type": "quantitative"
53 |         },
54 |         "color": {
55 |           "field": "Agent",
56 |           "type": "nominal"
57 |         }
58 |       }
59 |     }
60 |   ]
61 | }
62 | 
63 | 


--------------------------------------------------------------------------------
/baselines/data/roadrunner.vg.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "$schema": "https://vega.github.io/schema/vega-lite/v2.json",
 3 |   "width": 800,
 4 |   "height": 400,
 5 |   "padding": 5,
 6 | 
 7 |   "data": {"url": "data/roadrunner.json"},
 8 |   "transform": [
 9 |     {
10 |       "filter": {
11 |         "field": "Agent",
12 |         "oneOf": ["DQN", "C51", "RAINBOW", "IMPLICIT_QUANTILE"]
13 |       }
14 |     }
15 |   ],
16 |   "layer": [
17 |     {
18 |       "mark": "area",
19 |       "encoding": {
20 |         "x": {
21 |           "field": "Iteration",
22 |           "type": "quantitative"
23 |         },
24 |         "y": {
25 |           "aggregate": "ci0",
26 |           "field": "Value",
27 |           "type": "quantitative",
28 |           "axis": {"title": "Return (95% CIs)"}
29 |         },
30 |         "y2": {
31 |           "aggregate": "ci1",
32 |           "field": "Value",
33 |           "type": "quantitative"
34 |         },
35 |         "color": {
36 |           "field": "Agent",
37 |           "type": "nominal"
38 |         },
39 |         "opacity": {"value": 0.3}
40 |       }
41 |     },
42 |     {
43 |       "mark": "line",
44 |       "encoding": {
45 |         "x": {
46 |           "field": "Iteration",
47 |           "type": "quantitative"
48 |         },
49 |         "y": {
50 |           "aggregate": "mean",
51 |           "field": "Value",
52 |           "type": "quantitative"
53 |         },
54 |         "color": {
55 |           "field": "Agent",
56 |           "type": "nominal"
57 |         }
58 |       }
59 |     }
60 |   ]
61 | }
62 | 
63 | 


--------------------------------------------------------------------------------
/baselines/data/stargunner.vg.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "$schema": "https://vega.github.io/schema/vega-lite/v2.json",
 3 |   "width": 800,
 4 |   "height": 400,
 5 |   "padding": 5,
 6 | 
 7 |   "data": {"url": "data/stargunner.json"},
 8 |   "transform": [
 9 |     {
10 |       "filter": {
11 |         "field": "Agent",
12 |         "oneOf": ["DQN", "C51", "RAINBOW", "IMPLICIT_QUANTILE"]
13 |       }
14 |     }
15 |   ],
16 |   "layer": [
17 |     {
18 |       "mark": "area",
19 |       "encoding": {
20 |         "x": {
21 |           "field": "Iteration",
22 |           "type": "quantitative"
23 |         },
24 |         "y": {
25 |           "aggregate": "ci0",
26 |           "field": "Value",
27 |           "type": "quantitative",
28 |           "axis": {"title": "Return (95% CIs)"}
29 |         },
30 |         "y2": {
31 |           "aggregate": "ci1",
32 |           "field": "Value",
33 |           "type": "quantitative"
34 |         },
35 |         "color": {
36 |           "field": "Agent",
37 |           "type": "nominal"
38 |         },
39 |         "opacity": {"value": 0.3}
40 |       }
41 |     },
42 |     {
43 |       "mark": "line",
44 |       "encoding": {
45 |         "x": {
46 |           "field": "Iteration",
47 |           "type": "quantitative"
48 |         },
49 |         "y": {
50 |           "aggregate": "mean",
51 |           "field": "Value",
52 |           "type": "quantitative"
53 |         },
54 |         "color": {
55 |           "field": "Agent",
56 |           "type": "nominal"
57 |         }
58 |       }
59 |     }
60 |   ]
61 | }
62 | 
63 | 


--------------------------------------------------------------------------------
/baselines/data/timepilot.vg.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "$schema": "https://vega.github.io/schema/vega-lite/v2.json",
 3 |   "width": 800,
 4 |   "height": 400,
 5 |   "padding": 5,
 6 | 
 7 |   "data": {"url": "data/timepilot.json"},
 8 |   "transform": [
 9 |     {
10 |       "filter": {
11 |         "field": "Agent",
12 |         "oneOf": ["DQN", "C51", "RAINBOW", "IMPLICIT_QUANTILE"]
13 |       }
14 |     }
15 |   ],
16 |   "layer": [
17 |     {
18 |       "mark": "area",
19 |       "encoding": {
20 |         "x": {
21 |           "field": "Iteration",
22 |           "type": "quantitative"
23 |         },
24 |         "y": {
25 |           "aggregate": "ci0",
26 |           "field": "Value",
27 |           "type": "quantitative",
28 |           "axis": {"title": "Return (95% CIs)"}
29 |         },
30 |         "y2": {
31 |           "aggregate": "ci1",
32 |           "field": "Value",
33 |           "type": "quantitative"
34 |         },
35 |         "color": {
36 |           "field": "Agent",
37 |           "type": "nominal"
38 |         },
39 |         "opacity": {"value": 0.3}
40 |       }
41 |     },
42 |     {
43 |       "mark": "line",
44 |       "encoding": {
45 |         "x": {
46 |           "field": "Iteration",
47 |           "type": "quantitative"
48 |         },
49 |         "y": {
50 |           "aggregate": "mean",
51 |           "field": "Value",
52 |           "type": "quantitative"
53 |         },
54 |         "color": {
55 |           "field": "Agent",
56 |           "type": "nominal"
57 |         }
58 |       }
59 |     }
60 |   ]
61 | }
62 | 
63 | 


--------------------------------------------------------------------------------
/baselines/data/tutankham.vg.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "$schema": "https://vega.github.io/schema/vega-lite/v2.json",
 3 |   "width": 800,
 4 |   "height": 400,
 5 |   "padding": 5,
 6 | 
 7 |   "data": {"url": "data/tutankham.json"},
 8 |   "transform": [
 9 |     {
10 |       "filter": {
11 |         "field": "Agent",
12 |         "oneOf": ["DQN", "C51", "RAINBOW", "IMPLICIT_QUANTILE"]
13 |       }
14 |     }
15 |   ],
16 |   "layer": [
17 |     {
18 |       "mark": "area",
19 |       "encoding": {
20 |         "x": {
21 |           "field": "Iteration",
22 |           "type": "quantitative"
23 |         },
24 |         "y": {
25 |           "aggregate": "ci0",
26 |           "field": "Value",
27 |           "type": "quantitative",
28 |           "axis": {"title": "Return (95% CIs)"}
29 |         },
30 |         "y2": {
31 |           "aggregate": "ci1",
32 |           "field": "Value",
33 |           "type": "quantitative"
34 |         },
35 |         "color": {
36 |           "field": "Agent",
37 |           "type": "nominal"
38 |         },
39 |         "opacity": {"value": 0.3}
40 |       }
41 |     },
42 |     {
43 |       "mark": "line",
44 |       "encoding": {
45 |         "x": {
46 |           "field": "Iteration",
47 |           "type": "quantitative"
48 |         },
49 |         "y": {
50 |           "aggregate": "mean",
51 |           "field": "Value",
52 |           "type": "quantitative"
53 |         },
54 |         "color": {
55 |           "field": "Agent",
56 |           "type": "nominal"
57 |         }
58 |       }
59 |     }
60 |   ]
61 | }
62 | 
63 | 


--------------------------------------------------------------------------------
/baselines/data/crazyclimber.vg.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "$schema": "https://vega.github.io/schema/vega-lite/v2.json",
 3 |   "width": 800,
 4 |   "height": 400,
 5 |   "padding": 5,
 6 | 
 7 |   "data": {"url": "data/crazyclimber.json"},
 8 |   "transform": [
 9 |     {
10 |       "filter": {
11 |         "field": "Agent",
12 |         "oneOf": ["DQN", "C51", "RAINBOW", "IMPLICIT_QUANTILE"]
13 |       }
14 |     }
15 |   ],
16 |   "layer": [
17 |     {
18 |       "mark": "area",
19 |       "encoding": {
20 |         "x": {
21 |           "field": "Iteration",
22 |           "type": "quantitative"
23 |         },
24 |         "y": {
25 |           "aggregate": "ci0",
26 |           "field": "Value",
27 |           "type": "quantitative",
28 |           "axis": {"title": "Return (95% CIs)"}
29 |         },
30 |         "y2": {
31 |           "aggregate": "ci1",
32 |           "field": "Value",
33 |           "type": "quantitative"
34 |         },
35 |         "color": {
36 |           "field": "Agent",
37 |           "type": "nominal"
38 |         },
39 |         "opacity": {"value": 0.3}
40 |       }
41 |     },
42 |     {
43 |       "mark": "line",
44 |       "encoding": {
45 |         "x": {
46 |           "field": "Iteration",
47 |           "type": "quantitative"
48 |         },
49 |         "y": {
50 |           "aggregate": "mean",
51 |           "field": "Value",
52 |           "type": "quantitative"
53 |         },
54 |         "color": {
55 |           "field": "Agent",
56 |           "type": "nominal"
57 |         }
58 |       }
59 |     }
60 |   ]
61 | }
62 | 
63 | 


--------------------------------------------------------------------------------
/baselines/data/demonattack.vg.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "$schema": "https://vega.github.io/schema/vega-lite/v2.json",
 3 |   "width": 800,
 4 |   "height": 400,
 5 |   "padding": 5,
 6 | 
 7 |   "data": {"url": "data/demonattack.json"},
 8 |   "transform": [
 9 |     {
10 |       "filter": {
11 |         "field": "Agent",
12 |         "oneOf": ["DQN", "C51", "RAINBOW", "IMPLICIT_QUANTILE"]
13 |       }
14 |     }
15 |   ],
16 |   "layer": [
17 |     {
18 |       "mark": "area",
19 |       "encoding": {
20 |         "x": {
21 |           "field": "Iteration",
22 |           "type": "quantitative"
23 |         },
24 |         "y": {
25 |           "aggregate": "ci0",
26 |           "field": "Value",
27 |           "type": "quantitative",
28 |           "axis": {"title": "Return (95% CIs)"}
29 |         },
30 |         "y2": {
31 |           "aggregate": "ci1",
32 |           "field": "Value",
33 |           "type": "quantitative"
34 |         },
35 |         "color": {
36 |           "field": "Agent",
37 |           "type": "nominal"
38 |         },
39 |         "opacity": {"value": 0.3}
40 |       }
41 |     },
42 |     {
43 |       "mark": "line",
44 |       "encoding": {
45 |         "x": {
46 |           "field": "Iteration",
47 |           "type": "quantitative"
48 |         },
49 |         "y": {
50 |           "aggregate": "mean",
51 |           "field": "Value",
52 |           "type": "quantitative"
53 |         },
54 |         "color": {
55 |           "field": "Agent",
56 |           "type": "nominal"
57 |         }
58 |       }
59 |     }
60 |   ]
61 | }
62 | 
63 | 


--------------------------------------------------------------------------------
/baselines/data/fishingderby.vg.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "$schema": "https://vega.github.io/schema/vega-lite/v2.json",
 3 |   "width": 800,
 4 |   "height": 400,
 5 |   "padding": 5,
 6 | 
 7 |   "data": {"url": "data/fishingderby.json"},
 8 |   "transform": [
 9 |     {
10 |       "filter": {
11 |         "field": "Agent",
12 |         "oneOf": ["DQN", "C51", "RAINBOW", "IMPLICIT_QUANTILE"]
13 |       }
14 |     }
15 |   ],
16 |   "layer": [
17 |     {
18 |       "mark": "area",
19 |       "encoding": {
20 |         "x": {
21 |           "field": "Iteration",
22 |           "type": "quantitative"
23 |         },
24 |         "y": {
25 |           "aggregate": "ci0",
26 |           "field": "Value",
27 |           "type": "quantitative",
28 |           "axis": {"title": "Return (95% CIs)"}
29 |         },
30 |         "y2": {
31 |           "aggregate": "ci1",
32 |           "field": "Value",
33 |           "type": "quantitative"
34 |         },
35 |         "color": {
36 |           "field": "Agent",
37 |           "type": "nominal"
38 |         },
39 |         "opacity": {"value": 0.3}
40 |       }
41 |     },
42 |     {
43 |       "mark": "line",
44 |       "encoding": {
45 |         "x": {
46 |           "field": "Iteration",
47 |           "type": "quantitative"
48 |         },
49 |         "y": {
50 |           "aggregate": "mean",
51 |           "field": "Value",
52 |           "type": "quantitative"
53 |         },
54 |         "color": {
55 |           "field": "Agent",
56 |           "type": "nominal"
57 |         }
58 |       }
59 |     }
60 |   ]
61 | }
62 | 
63 | 


--------------------------------------------------------------------------------
/baselines/data/journeyescape.vg.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "$schema": "https://vega.github.io/schema/vega-lite/v2.json",
 3 |   "width": 800,
 4 |   "height": 400,
 5 |   "padding": 5,
 6 | 
 7 |   "data": {"url": "data/journeyescape.json"},
 8 |   "transform": [
 9 |     {
10 |       "filter": {
11 |         "field": "Agent",
12 |         "oneOf": ["DQN", "C51", "RAINBOW", "IMPLICIT_QUANTILE"]
13 |       }
14 |     }
15 |   ],
16 |   "layer": [
17 |     {
18 |       "mark": "area",
19 |       "encoding": {
20 |         "x": {
21 |           "field": "Iteration",
22 |           "type": "quantitative"
23 |         },
24 |         "y": {
25 |           "aggregate": "ci0",
26 |           "field": "Value",
27 |           "type": "quantitative",
28 |           "axis": {"title": "Return (95% CIs)"}
29 |         },
30 |         "y2": {
31 |           "aggregate": "ci1",
32 |           "field": "Value",
33 |           "type": "quantitative"
34 |         },
35 |         "color": {
36 |           "field": "Agent",
37 |           "type": "nominal"
38 |         },
39 |         "opacity": {"value": 0.3}
40 |       }
41 |     },
42 |     {
43 |       "mark": "line",
44 |       "encoding": {
45 |         "x": {
46 |           "field": "Iteration",
47 |           "type": "quantitative"
48 |         },
49 |         "y": {
50 |           "aggregate": "mean",
51 |           "field": "Value",
52 |           "type": "quantitative"
53 |         },
54 |         "color": {
55 |           "field": "Agent",
56 |           "type": "nominal"
57 |         }
58 |       }
59 |     }
60 |   ]
61 | }
62 | 
63 | 


--------------------------------------------------------------------------------
/baselines/data/kungfumaster.vg.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "$schema": "https://vega.github.io/schema/vega-lite/v2.json",
 3 |   "width": 800,
 4 |   "height": 400,
 5 |   "padding": 5,
 6 | 
 7 |   "data": {"url": "data/kungfumaster.json"},
 8 |   "transform": [
 9 |     {
10 |       "filter": {
11 |         "field": "Agent",
12 |         "oneOf": ["DQN", "C51", "RAINBOW", "IMPLICIT_QUANTILE"]
13 |       }
14 |     }
15 |   ],
16 |   "layer": [
17 |     {
18 |       "mark": "area",
19 |       "encoding": {
20 |         "x": {
21 |           "field": "Iteration",
22 |           "type": "quantitative"
23 |         },
24 |         "y": {
25 |           "aggregate": "ci0",
26 |           "field": "Value",
27 |           "type": "quantitative",
28 |           "axis": {"title": "Return (95% CIs)"}
29 |         },
30 |         "y2": {
31 |           "aggregate": "ci1",
32 |           "field": "Value",
33 |           "type": "quantitative"
34 |         },
35 |         "color": {
36 |           "field": "Agent",
37 |           "type": "nominal"
38 |         },
39 |         "opacity": {"value": 0.3}
40 |       }
41 |     },
42 |     {
43 |       "mark": "line",
44 |       "encoding": {
45 |         "x": {
46 |           "field": "Iteration",
47 |           "type": "quantitative"
48 |         },
49 |         "y": {
50 |           "aggregate": "mean",
51 |           "field": "Value",
52 |           "type": "quantitative"
53 |         },
54 |         "color": {
55 |           "field": "Agent",
56 |           "type": "nominal"
57 |         }
58 |       }
59 |     }
60 |   ]
61 | }
62 | 
63 | 


--------------------------------------------------------------------------------
/baselines/data/namethisgame.vg.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "$schema": "https://vega.github.io/schema/vega-lite/v2.json",
 3 |   "width": 800,
 4 |   "height": 400,
 5 |   "padding": 5,
 6 | 
 7 |   "data": {"url": "data/namethisgame.json"},
 8 |   "transform": [
 9 |     {
10 |       "filter": {
11 |         "field": "Agent",
12 |         "oneOf": ["DQN", "C51", "RAINBOW", "IMPLICIT_QUANTILE"]
13 |       }
14 |     }
15 |   ],
16 |   "layer": [
17 |     {
18 |       "mark": "area",
19 |       "encoding": {
20 |         "x": {
21 |           "field": "Iteration",
22 |           "type": "quantitative"
23 |         },
24 |         "y": {
25 |           "aggregate": "ci0",
26 |           "field": "Value",
27 |           "type": "quantitative",
28 |           "axis": {"title": "Return (95% CIs)"}
29 |         },
30 |         "y2": {
31 |           "aggregate": "ci1",
32 |           "field": "Value",
33 |           "type": "quantitative"
34 |         },
35 |         "color": {
36 |           "field": "Agent",
37 |           "type": "nominal"
38 |         },
39 |         "opacity": {"value": 0.3}
40 |       }
41 |     },
42 |     {
43 |       "mark": "line",
44 |       "encoding": {
45 |         "x": {
46 |           "field": "Iteration",
47 |           "type": "quantitative"
48 |         },
49 |         "y": {
50 |           "aggregate": "mean",
51 |           "field": "Value",
52 |           "type": "quantitative"
53 |         },
54 |         "color": {
55 |           "field": "Agent",
56 |           "type": "nominal"
57 |         }
58 |       }
59 |     }
60 |   ]
61 | }
62 | 
63 | 


--------------------------------------------------------------------------------
/baselines/data/spaceinvaders.vg.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "$schema": "https://vega.github.io/schema/vega-lite/v2.json",
 3 |   "width": 800,
 4 |   "height": 400,
 5 |   "padding": 5,
 6 | 
 7 |   "data": {"url": "data/spaceinvaders.json"},
 8 |   "transform": [
 9 |     {
10 |       "filter": {
11 |         "field": "Agent",
12 |         "oneOf": ["DQN", "C51", "RAINBOW", "IMPLICIT_QUANTILE"]
13 |       }
14 |     }
15 |   ],
16 |   "layer": [
17 |     {
18 |       "mark": "area",
19 |       "encoding": {
20 |         "x": {
21 |           "field": "Iteration",
22 |           "type": "quantitative"
23 |         },
24 |         "y": {
25 |           "aggregate": "ci0",
26 |           "field": "Value",
27 |           "type": "quantitative",
28 |           "axis": {"title": "Return (95% CIs)"}
29 |         },
30 |         "y2": {
31 |           "aggregate": "ci1",
32 |           "field": "Value",
33 |           "type": "quantitative"
34 |         },
35 |         "color": {
36 |           "field": "Agent",
37 |           "type": "nominal"
38 |         },
39 |         "opacity": {"value": 0.3}
40 |       }
41 |     },
42 |     {
43 |       "mark": "line",
44 |       "encoding": {
45 |         "x": {
46 |           "field": "Iteration",
47 |           "type": "quantitative"
48 |         },
49 |         "y": {
50 |           "aggregate": "mean",
51 |           "field": "Value",
52 |           "type": "quantitative"
53 |         },
54 |         "color": {
55 |           "field": "Agent",
56 |           "type": "nominal"
57 |         }
58 |       }
59 |     }
60 |   ]
61 | }
62 | 
63 | 


--------------------------------------------------------------------------------
/baselines/data/videopinball.vg.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "$schema": "https://vega.github.io/schema/vega-lite/v2.json",
 3 |   "width": 800,
 4 |   "height": 400,
 5 |   "padding": 5,
 6 | 
 7 |   "data": {"url": "data/videopinball.json"},
 8 |   "transform": [
 9 |     {
10 |       "filter": {
11 |         "field": "Agent",
12 |         "oneOf": ["DQN", "C51", "RAINBOW", "IMPLICIT_QUANTILE"]
13 |       }
14 |     }
15 |   ],
16 |   "layer": [
17 |     {
18 |       "mark": "area",
19 |       "encoding": {
20 |         "x": {
21 |           "field": "Iteration",
22 |           "type": "quantitative"
23 |         },
24 |         "y": {
25 |           "aggregate": "ci0",
26 |           "field": "Value",
27 |           "type": "quantitative",
28 |           "axis": {"title": "Return (95% CIs)"}
29 |         },
30 |         "y2": {
31 |           "aggregate": "ci1",
32 |           "field": "Value",
33 |           "type": "quantitative"
34 |         },
35 |         "color": {
36 |           "field": "Agent",
37 |           "type": "nominal"
38 |         },
39 |         "opacity": {"value": 0.3}
40 |       }
41 |     },
42 |     {
43 |       "mark": "line",
44 |       "encoding": {
45 |         "x": {
46 |           "field": "Iteration",
47 |           "type": "quantitative"
48 |         },
49 |         "y": {
50 |           "aggregate": "mean",
51 |           "field": "Value",
52 |           "type": "quantitative"
53 |         },
54 |         "color": {
55 |           "field": "Agent",
56 |           "type": "nominal"
57 |         }
58 |       }
59 |     }
60 |   ]
61 | }
62 | 
63 | 


--------------------------------------------------------------------------------
/baselines/data/wizardofwor.vg.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "$schema": "https://vega.github.io/schema/vega-lite/v2.json",
 3 |   "width": 800,
 4 |   "height": 400,
 5 |   "padding": 5,
 6 | 
 7 |   "data": {"url": "data/wizardofwor.json"},
 8 |   "transform": [
 9 |     {
10 |       "filter": {
11 |         "field": "Agent",
12 |         "oneOf": ["DQN", "C51", "RAINBOW", "IMPLICIT_QUANTILE"]
13 |       }
14 |     }
15 |   ],
16 |   "layer": [
17 |     {
18 |       "mark": "area",
19 |       "encoding": {
20 |         "x": {
21 |           "field": "Iteration",
22 |           "type": "quantitative"
23 |         },
24 |         "y": {
25 |           "aggregate": "ci0",
26 |           "field": "Value",
27 |           "type": "quantitative",
28 |           "axis": {"title": "Return (95% CIs)"}
29 |         },
30 |         "y2": {
31 |           "aggregate": "ci1",
32 |           "field": "Value",
33 |           "type": "quantitative"
34 |         },
35 |         "color": {
36 |           "field": "Agent",
37 |           "type": "nominal"
38 |         },
39 |         "opacity": {"value": 0.3}
40 |       }
41 |     },
42 |     {
43 |       "mark": "line",
44 |       "encoding": {
45 |         "x": {
46 |           "field": "Iteration",
47 |           "type": "quantitative"
48 |         },
49 |         "y": {
50 |           "aggregate": "mean",
51 |           "field": "Value",
52 |           "type": "quantitative"
53 |         },
54 |         "color": {
55 |           "field": "Agent",
56 |           "type": "nominal"
57 |         }
58 |       }
59 |     }
60 |   ]
61 | }
62 | 
63 | 


--------------------------------------------------------------------------------
/baselines/data/yarsrevenge.vg.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "$schema": "https://vega.github.io/schema/vega-lite/v2.json",
 3 |   "width": 800,
 4 |   "height": 400,
 5 |   "padding": 5,
 6 | 
 7 |   "data": {"url": "data/yarsrevenge.json"},
 8 |   "transform": [
 9 |     {
10 |       "filter": {
11 |         "field": "Agent",
12 |         "oneOf": ["DQN", "C51", "RAINBOW", "IMPLICIT_QUANTILE"]
13 |       }
14 |     }
15 |   ],
16 |   "layer": [
17 |     {
18 |       "mark": "area",
19 |       "encoding": {
20 |         "x": {
21 |           "field": "Iteration",
22 |           "type": "quantitative"
23 |         },
24 |         "y": {
25 |           "aggregate": "ci0",
26 |           "field": "Value",
27 |           "type": "quantitative",
28 |           "axis": {"title": "Return (95% CIs)"}
29 |         },
30 |         "y2": {
31 |           "aggregate": "ci1",
32 |           "field": "Value",
33 |           "type": "quantitative"
34 |         },
35 |         "color": {
36 |           "field": "Agent",
37 |           "type": "nominal"
38 |         },
39 |         "opacity": {"value": 0.3}
40 |       }
41 |     },
42 |     {
43 |       "mark": "line",
44 |       "encoding": {
45 |         "x": {
46 |           "field": "Iteration",
47 |           "type": "quantitative"
48 |         },
49 |         "y": {
50 |           "aggregate": "mean",
51 |           "field": "Value",
52 |           "type": "quantitative"
53 |         },
54 |         "color": {
55 |           "field": "Agent",
56 |           "type": "nominal"
57 |         }
58 |       }
59 |     }
60 |   ]
61 | }
62 | 
63 | 


--------------------------------------------------------------------------------
/baselines/data/choppercommand.vg.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "$schema": "https://vega.github.io/schema/vega-lite/v2.json",
 3 |   "width": 800,
 4 |   "height": 400,
 5 |   "padding": 5,
 6 | 
 7 |   "data": {"url": "data/choppercommand.json"},
 8 |   "transform": [
 9 |     {
10 |       "filter": {
11 |         "field": "Agent",
12 |         "oneOf": ["DQN", "C51", "RAINBOW", "IMPLICIT_QUANTILE"]
13 |       }
14 |     }
15 |   ],
16 |   "layer": [
17 |     {
18 |       "mark": "area",
19 |       "encoding": {
20 |         "x": {
21 |           "field": "Iteration",
22 |           "type": "quantitative"
23 |         },
24 |         "y": {
25 |           "aggregate": "ci0",
26 |           "field": "Value",
27 |           "type": "quantitative",
28 |           "axis": {"title": "Return (95% CIs)"}
29 |         },
30 |         "y2": {
31 |           "aggregate": "ci1",
32 |           "field": "Value",
33 |           "type": "quantitative"
34 |         },
35 |         "color": {
36 |           "field": "Agent",
37 |           "type": "nominal"
38 |         },
39 |         "opacity": {"value": 0.3}
40 |       }
41 |     },
42 |     {
43 |       "mark": "line",
44 |       "encoding": {
45 |         "x": {
46 |           "field": "Iteration",
47 |           "type": "quantitative"
48 |         },
49 |         "y": {
50 |           "aggregate": "mean",
51 |           "field": "Value",
52 |           "type": "quantitative"
53 |         },
54 |         "color": {
55 |           "field": "Agent",
56 |           "type": "nominal"
57 |         }
58 |       }
59 |     }
60 |   ]
61 | }
62 | 
63 | 


--------------------------------------------------------------------------------
/baselines/data/elevatoraction.vg.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "$schema": "https://vega.github.io/schema/vega-lite/v2.json",
 3 |   "width": 800,
 4 |   "height": 400,
 5 |   "padding": 5,
 6 | 
 7 |   "data": {"url": "data/elevatoraction.json"},
 8 |   "transform": [
 9 |     {
10 |       "filter": {
11 |         "field": "Agent",
12 |         "oneOf": ["DQN", "C51", "RAINBOW", "IMPLICIT_QUANTILE"]
13 |       }
14 |     }
15 |   ],
16 |   "layer": [
17 |     {
18 |       "mark": "area",
19 |       "encoding": {
20 |         "x": {
21 |           "field": "Iteration",
22 |           "type": "quantitative"
23 |         },
24 |         "y": {
25 |           "aggregate": "ci0",
26 |           "field": "Value",
27 |           "type": "quantitative",
28 |           "axis": {"title": "Return (95% CIs)"}
29 |         },
30 |         "y2": {
31 |           "aggregate": "ci1",
32 |           "field": "Value",
33 |           "type": "quantitative"
34 |         },
35 |         "color": {
36 |           "field": "Agent",
37 |           "type": "nominal"
38 |         },
39 |         "opacity": {"value": 0.3}
40 |       }
41 |     },
42 |     {
43 |       "mark": "line",
44 |       "encoding": {
45 |         "x": {
46 |           "field": "Iteration",
47 |           "type": "quantitative"
48 |         },
49 |         "y": {
50 |           "aggregate": "mean",
51 |           "field": "Value",
52 |           "type": "quantitative"
53 |         },
54 |         "color": {
55 |           "field": "Agent",
56 |           "type": "nominal"
57 |         }
58 |       }
59 |     }
60 |   ]
61 | }
62 | 
63 | 


--------------------------------------------------------------------------------
/baselines/data/montezumarevenge.vg.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "$schema": "https://vega.github.io/schema/vega-lite/v2.json",
 3 |   "width": 800,
 4 |   "height": 400,
 5 |   "padding": 5,
 6 | 
 7 |   "data": {"url": "data/montezumarevenge.json"},
 8 |   "transform": [
 9 |     {
10 |       "filter": {
11 |         "field": "Agent",
12 |         "oneOf": ["DQN", "C51", "RAINBOW", "IMPLICIT_QUANTILE"]
13 |       }
14 |     }
15 |   ],
16 |   "layer": [
17 |     {
18 |       "mark": "area",
19 |       "encoding": {
20 |         "x": {
21 |           "field": "Iteration",
22 |           "type": "quantitative"
23 |         },
24 |         "y": {
25 |           "aggregate": "ci0",
26 |           "field": "Value",
27 |           "type": "quantitative",
28 |           "axis": {"title": "Return (95% CIs)"}
29 |         },
30 |         "y2": {
31 |           "aggregate": "ci1",
32 |           "field": "Value",
33 |           "type": "quantitative"
34 |         },
35 |         "color": {
36 |           "field": "Agent",
37 |           "type": "nominal"
38 |         },
39 |         "opacity": {"value": 0.3}
40 |       }
41 |     },
42 |     {
43 |       "mark": "line",
44 |       "encoding": {
45 |         "x": {
46 |           "field": "Iteration",
47 |           "type": "quantitative"
48 |         },
49 |         "y": {
50 |           "aggregate": "mean",
51 |           "field": "Value",
52 |           "type": "quantitative"
53 |         },
54 |         "color": {
55 |           "field": "Agent",
56 |           "type": "nominal"
57 |         }
58 |       }
59 |     }
60 |   ]
61 | }
62 | 
63 | 


--------------------------------------------------------------------------------
/dopamine/agents/rainbow/configs/rainbow_aaai.gin:
--------------------------------------------------------------------------------
 1 | # Hyperparameters follow Hessel et al. (2018).
 2 | import dopamine.atari.preprocessing
 3 | import dopamine.agents.rainbow.rainbow_agent
 4 | import dopamine.atari.run_experiment
 5 | import dopamine.replay_memory.prioritized_replay_buffer
 6 | import gin.tf.external_configurables
 7 | 
 8 | RainbowAgent.num_atoms = 51
 9 | RainbowAgent.vmax = 10.
10 | RainbowAgent.gamma = 0.99
11 | RainbowAgent.update_horizon = 3
12 | RainbowAgent.min_replay_history = 20000  # agent steps
13 | RainbowAgent.update_period = 4
14 | RainbowAgent.target_update_period = 8000  # agent steps
15 | RainbowAgent.epsilon_train = 0.01
16 | RainbowAgent.epsilon_eval = 0.001
17 | RainbowAgent.epsilon_decay_period = 250000  # agent steps
18 | RainbowAgent.replay_scheme = 'prioritized'
19 | RainbowAgent.tf_device = '/gpu:0'  # use '/cpu:*' for non-GPU version
20 | RainbowAgent.optimizer = @tf.train.AdamOptimizer()
21 | 
22 | # Note these parameters are different from C51's.
23 | tf.train.AdamOptimizer.learning_rate = 0.0000625
24 | tf.train.AdamOptimizer.epsilon = 0.00015
25 | 
26 | Runner.game_name = 'Pong'
27 | # Deterministic ALE version used in the AAAI paper.
28 | Runner.sticky_actions = False
29 | Runner.num_iterations = 200
30 | Runner.training_steps = 250000  # agent steps
31 | Runner.evaluation_steps = 125000  # agent steps
32 | Runner.max_steps_per_episode = 27000  # agent steps
33 | 
34 | AtariPreprocessing.terminal_on_life_loss = True
35 | 
36 | WrappedPrioritizedReplayBuffer.replay_capacity = 1000000
37 | WrappedPrioritizedReplayBuffer.batch_size = 32
38 | 


--------------------------------------------------------------------------------
/dopamine/agents/rainbow/configs/c51.gin:
--------------------------------------------------------------------------------
 1 | # Hyperparameters follow the settings from Bellemare et al. (2017), but we
 2 | # modify as necessary to match those used in Rainbow (Hessel et al., 2018), to
 3 | # ensure apples-to-apples comparison.
 4 | import dopamine.agents.rainbow.rainbow_agent
 5 | import dopamine.atari.run_experiment
 6 | import dopamine.replay_memory.prioritized_replay_buffer
 7 | import gin.tf.external_configurables
 8 | 
 9 | RainbowAgent.num_atoms = 51
10 | RainbowAgent.vmax = 10.
11 | RainbowAgent.gamma = 0.99
12 | RainbowAgent.update_horizon = 1
13 | RainbowAgent.min_replay_history = 20000  # agent steps
14 | RainbowAgent.update_period = 4
15 | RainbowAgent.target_update_period = 8000  # agent steps
16 | RainbowAgent.epsilon_train = 0.01
17 | RainbowAgent.epsilon_eval = 0.001
18 | RainbowAgent.epsilon_decay_period = 250000  # agent steps
19 | RainbowAgent.replay_scheme = 'uniform'
20 | RainbowAgent.tf_device = '/gpu:0'  # use '/cpu:*' for non-GPU version
21 | RainbowAgent.optimizer = @tf.train.AdamOptimizer()
22 | 
23 | tf.train.AdamOptimizer.learning_rate = 0.00025
24 | tf.train.AdamOptimizer.epsilon = 0.0003125
25 | 
26 | Runner.game_name = 'Pong'
27 | # Sticky actions with probability 0.25, as suggested by (Machado et al., 2017).
28 | Runner.sticky_actions = True
29 | Runner.num_iterations = 200
30 | Runner.training_steps = 250000  # agent steps
31 | Runner.evaluation_steps = 125000  # agent steps
32 | Runner.max_steps_per_episode = 27000  # agent steps
33 | 
34 | WrappedPrioritizedReplayBuffer.replay_capacity = 1000000
35 | WrappedPrioritizedReplayBuffer.batch_size = 32
36 | 


--------------------------------------------------------------------------------
/dopamine/agents/rainbow/configs/rainbow.gin:
--------------------------------------------------------------------------------
 1 | # Hyperparameters follow Hessel et al. (2018), except for sticky_actions,
 2 | # which was False (not using sticky actions) in the original paper.
 3 | import dopamine.agents.rainbow.rainbow_agent
 4 | import dopamine.atari.run_experiment
 5 | import dopamine.replay_memory.prioritized_replay_buffer
 6 | import gin.tf.external_configurables
 7 | 
 8 | RainbowAgent.num_atoms = 51
 9 | RainbowAgent.vmax = 10.
10 | RainbowAgent.gamma = 0.99
11 | RainbowAgent.update_horizon = 3
12 | RainbowAgent.min_replay_history = 20000  # agent steps
13 | RainbowAgent.update_period = 4
14 | RainbowAgent.target_update_period = 8000  # agent steps
15 | RainbowAgent.epsilon_train = 0.01
16 | RainbowAgent.epsilon_eval = 0.001
17 | RainbowAgent.epsilon_decay_period = 250000  # agent steps
18 | RainbowAgent.replay_scheme = 'prioritized'
19 | RainbowAgent.tf_device = '/gpu:0'  # use '/cpu:*' for non-GPU version
20 | RainbowAgent.optimizer = @tf.train.AdamOptimizer()
21 | 
22 | # Note these parameters are different from C51's.
23 | tf.train.AdamOptimizer.learning_rate = 0.0000625
24 | tf.train.AdamOptimizer.epsilon = 0.00015
25 | 
26 | Runner.game_name = 'Pong'
27 | # Sticky actions with probability 0.25, as suggested by (Machado et al., 2017).
28 | Runner.sticky_actions = True
29 | Runner.num_iterations = 200
30 | Runner.training_steps = 250000  # agent steps
31 | Runner.evaluation_steps = 125000  # agent steps
32 | Runner.max_steps_per_episode = 27000  # agent steps
33 | 
34 | WrappedPrioritizedReplayBuffer.replay_capacity = 1000000
35 | WrappedPrioritizedReplayBuffer.batch_size = 32
36 | 


--------------------------------------------------------------------------------
/docs/api_docs/python/checkpointer.md:
--------------------------------------------------------------------------------
 1 | <div itemscope itemtype="http://developers.google.com/ReferenceObject">
 2 | <meta itemprop="name" content="checkpointer" />
 3 | <meta itemprop="path" content="stable" />
 4 | </div>
 5 | 
 6 | # Module: checkpointer
 7 | 
 8 | A checkpointing mechanism for Dopamine agents.
 9 | 
10 | This Checkpointer expects a base directory where checkpoints for different
11 | iterations are stored. Specifically, Checkpointer.save_checkpoint() takes in as
12 | input a dictionary 'data' to be pickled to disk. At each iteration, we write a
13 | file called 'cpkt.#', where # is the iteration number. The Checkpointer also
14 | cleans up old files, maintaining up to the CHECKPOINT_DURATION most recent
15 | iterations.
16 | 
17 | The Checkpointer writes a sentinel file to indicate that checkpointing was
18 | globally successful. This means that all other checkpointing activities (saving
19 | the Tensorflow graph, the replay buffer) should be performed *prior* to calling
20 | Checkpointer.save_checkpoint(). This allows the Checkpointer to detect
21 | incomplete checkpoints.
22 | 
23 | #### Example
24 | 
25 | After running 10 iterations (numbered 0...9) with base_directory='/checkpoint',
26 | the following files will exist: `/checkpoint/cpkt.6 /checkpoint/cpkt.7
27 | /checkpoint/cpkt.8 /checkpoint/cpkt.9 /checkpoint/sentinel_checkpoint_complete.6
28 | /checkpoint/sentinel_checkpoint_complete.7
29 | /checkpoint/sentinel_checkpoint_complete.8
30 | /checkpoint/sentinel_checkpoint_complete.9`
31 | 
32 | ## Classes
33 | 
34 | [`class Checkpointer`](./checkpointer/Checkpointer.md): Class for managing
35 | checkpoints for Dopamine agents.
36 | 


--------------------------------------------------------------------------------
/tests/atari_init_test.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2018 The Dopamine Authors.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #      http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | """A simple test for validating that the Atari env initializes."""
15 | 
16 | import datetime
17 | import os
18 | import shutil
19 | 
20 | 
21 | 
22 | from absl import flags
23 | from dopamine.atari import train
24 | import tensorflow as tf
25 | 
26 | 
27 | FLAGS = flags.FLAGS
28 | 
29 | 
30 | class AtariInitTest(tf.test.TestCase):
31 | 
32 |   def setUp(self):
33 |     FLAGS.base_dir = os.path.join(
34 |         '/tmp/dopamine_tests',
35 |         datetime.datetime.utcnow().strftime('run_%Y_%m_%d_%H_%M_%S'))
36 |     FLAGS.agent_name = 'dqn'
37 |     FLAGS.gin_files = ['dopamine/agents/dqn/configs/dqn.gin']
38 |     # `num_iterations` set to zero to prevent runner execution.
39 |     FLAGS.gin_bindings = [
40 |         'Runner.num_iterations=0',
41 |         'WrappedReplayBuffer.replay_capacity = 100'  # To prevent OOM.
42 |     ]
43 |     FLAGS.alsologtostderr = True
44 | 
45 |   def test_atari_init(self):
46 |     """Tests that a DQN agent is initialized."""
47 |     train.main([])
48 |     shutil.rmtree(FLAGS.base_dir)
49 | 
50 | 
51 | if __name__ == '__main__':
52 |   tf.test.main()
53 | 


--------------------------------------------------------------------------------
/dopamine/agents/implicit_quantile/configs/implicit_quantile.gin:
--------------------------------------------------------------------------------
 1 | # Hyperparameters follow Dabney et al. (2018), but we modify as necessary to
 2 | # match those used in Rainbow (Hessel et al., 2018), to ensure apples-to-apples
 3 | # comparison.
 4 | 
 5 | import dopamine.agents.implicit_quantile.implicit_quantile_agent
 6 | import dopamine.agents.rainbow.rainbow_agent
 7 | import dopamine.atari.run_experiment
 8 | import dopamine.replay_memory.prioritized_replay_buffer
 9 | import gin.tf.external_configurables
10 | 
11 | ImplicitQuantileAgent.kappa = 1.0
12 | ImplicitQuantileAgent.num_tau_samples = 64
13 | ImplicitQuantileAgent.num_tau_prime_samples = 64
14 | ImplicitQuantileAgent.num_quantile_samples = 32
15 | RainbowAgent.gamma = 0.99
16 | RainbowAgent.update_horizon = 3
17 | RainbowAgent.min_replay_history = 20000 # agent steps
18 | RainbowAgent.update_period = 4
19 | RainbowAgent.target_update_period = 8000 # agent steps
20 | RainbowAgent.epsilon_train = 0.01
21 | RainbowAgent.epsilon_eval = 0.001
22 | RainbowAgent.epsilon_decay_period = 250000  # agent steps
23 | # IQN currently does not support prioritized replay.
24 | RainbowAgent.replay_scheme = 'uniform'
25 | RainbowAgent.tf_device = '/gpu:0'  # '/cpu:*' use for non-GPU version
26 | RainbowAgent.optimizer = @tf.train.AdamOptimizer()
27 | 
28 | tf.train.AdamOptimizer.learning_rate = 0.0000625
29 | tf.train.AdamOptimizer.epsilon = 0.00015
30 | 
31 | Runner.game_name = 'Pong'
32 | # Sticky actions with probability 0.25, as suggested by (Machado et al., 2017).
33 | Runner.sticky_actions = True
34 | Runner.num_iterations = 200
35 | Runner.training_steps = 250000
36 | Runner.evaluation_steps = 125000
37 | Runner.max_steps_per_episode = 27000
38 | 
39 | WrappedPrioritizedReplayBuffer.replay_capacity = 1000000
40 | WrappedPrioritizedReplayBuffer.batch_size = 32
41 | 


--------------------------------------------------------------------------------
/dopamine/common/iteration_statistics.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2018 The Dopamine Authors.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #      http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | """A class for storing iteration-specific metrics.
15 | """
16 | 
17 | from __future__ import absolute_import
18 | from __future__ import division
19 | from __future__ import print_function
20 | 
21 | 
22 | class IterationStatistics(object):
23 |   """A class for storing iteration-specific metrics.
24 | 
25 |   The internal format is as follows: we maintain a mapping from keys to lists.
26 |   Each list contains all the values corresponding to the given key.
27 | 
28 |   For example, self.data_lists['train_episode_returns'] might contain the
29 |     per-episode returns achieved during this iteration.
30 | 
31 |   Attributes:
32 |     data_lists: dict mapping each metric_name (str) to a list of said metric
33 |       across episodes.
34 |   """
35 | 
36 |   def __init__(self):
37 |     self.data_lists = {}
38 | 
39 |   def append(self, data_pairs):
40 |     """Add the given values to their corresponding key-indexed lists.
41 | 
42 |     Args:
43 |       data_pairs: A dictionary of key-value pairs to be recorded.
44 |     """
45 |     for key, value in data_pairs.items():
46 |       if key not in self.data_lists:
47 |         self.data_lists[key] = []
48 |       self.data_lists[key].append(value)
49 | 


--------------------------------------------------------------------------------
/docs/api_docs/python/logger/Logger.md:
--------------------------------------------------------------------------------
 1 | <div itemscope itemtype="http://developers.google.com/ReferenceObject">
 2 | <meta itemprop="name" content="logger.Logger" />
 3 | <meta itemprop="path" content="stable" />
 4 | <meta itemprop="property" content="__init__"/>
 5 | <meta itemprop="property" content="__setitem__"/>
 6 | <meta itemprop="property" content="is_logging_enabled"/>
 7 | <meta itemprop="property" content="log_to_file"/>
 8 | </div>
 9 | 
10 | # logger.Logger
11 | 
12 | ## Class `Logger`
13 | 
14 | Class for maintaining a dictionary of data to log.
15 | 
16 | ## Methods
17 | 
18 | <h3 id="__init__"><code>__init__</code></h3>
19 | 
20 | ```python
21 | __init__(logging_dir)
22 | ```
23 | 
24 | Initializes Logger.
25 | 
26 | #### Args:
27 | 
28 | *   <b>`logging_dir`</b>: str, Directory to which logs are written.
29 | 
30 | <h3 id="__setitem__"><code>__setitem__</code></h3>
31 | 
32 | ```python
33 | __setitem__(
34 |     key,
35 |     value
36 | )
37 | ```
38 | 
39 | This method will set an entry at key with value in the dictionary.
40 | 
41 | It will effectively overwrite any previous data at the same key.
42 | 
43 | #### Args:
44 | 
45 | *   <b>`key`</b>: str, indicating key where to write the entry.
46 | *   <b>`value`</b>: A python object to store.
47 | 
48 | <h3 id="is_logging_enabled"><code>is_logging_enabled</code></h3>
49 | 
50 | ```python
51 | is_logging_enabled()
52 | ```
53 | 
54 | Return if logging is enabled.
55 | 
56 | <h3 id="log_to_file"><code>log_to_file</code></h3>
57 | 
58 | ```python
59 | log_to_file(
60 |     filename_prefix,
61 |     iteration_number
62 | )
63 | ```
64 | 
65 | Save the pickled dictionary to a file.
66 | 
67 | #### Args:
68 | 
69 | *   <b>`filename_prefix`</b>: str, name of the file to use (without iteration
70 |     number).
71 | *   <b>`iteration_number`</b>: int, the iteration number, appended to the end of
72 |     filename_prefix.
73 | 


--------------------------------------------------------------------------------
/docs/api_docs/python/utils/read_experiment.md:
--------------------------------------------------------------------------------
 1 | <div itemscope itemtype="http://developers.google.com/ReferenceObject">
 2 | <meta itemprop="name" content="utils.read_experiment" />
 3 | <meta itemprop="path" content="stable" />
 4 | </div>
 5 | 
 6 | # utils.read_experiment
 7 | 
 8 | ```python
 9 | utils.read_experiment(
10 |     log_path,
11 |     parameter_set=None,
12 |     job_descriptor='',
13 |     iteration_number=None,
14 |     summary_keys=('train_episode_returns', 'eval_episode_returns'),
15 |     verbose=False
16 | )
17 | ```
18 | 
19 | Reads in a set of experimental results from log_path.
20 | 
21 | The provided parameter_set is an ordered_dict which 1) defines the parameters of
22 | this experiment, 2) defines the order in which they occur in the job descriptor.
23 | 
24 | The method reads all experiments of the form
25 | 
26 | ${log_path}/${job_descriptor}.format(params)/logs,
27 | 
28 | where params is constructed from the cross product of the elements in the
29 | parameter_set.
30 | 
31 | For example: parameter_set = collections.OrderedDict([ ('game', ['Asterix',
32 | 'Pong']), ('epsilon', ['0', '0.1']) ]) read_experiment('/tmp/logs',
33 | parameter_set, job_descriptor='{}_{}') Will try to read logs from: -
34 | /tmp/logs/Asterix_0/logs - /tmp/logs/Asterix_0.1/logs - /tmp/logs/Pong_0/logs -
35 | /tmp/logs/Pong_0.1/logs
36 | 
37 | #### Args:
38 | 
39 | *   <b>`log_path`</b>: string, base path specifying where results live.
40 | *   <b>`parameter_set`</b>: An ordered_dict mapping parameter names to allowable
41 |     values.
42 | *   <b>`job_descriptor`</b>: A job descriptor string which is used to construct
43 |     the full path for each trial within an experiment.
44 | *   <b>`iteration_number`</b>: Int, if not None determines the iteration number
45 |     at which we read in results.
46 | *   <b>`summary_keys`</b>: Iterable of strings, iteration statistics to
47 |     summarize.
48 | *   <b>`verbose`</b>: If True, print out additional information.
49 | 
50 | #### Returns:
51 | 
52 | A Pandas dataframe containing experimental results.
53 | 


--------------------------------------------------------------------------------
/docs/api_docs/python/checkpointer/Checkpointer.md:
--------------------------------------------------------------------------------
 1 | <div itemscope itemtype="http://developers.google.com/ReferenceObject">
 2 | <meta itemprop="name" content="checkpointer.Checkpointer" />
 3 | <meta itemprop="path" content="stable" />
 4 | <meta itemprop="property" content="__init__"/>
 5 | <meta itemprop="property" content="load_checkpoint"/>
 6 | <meta itemprop="property" content="save_checkpoint"/>
 7 | </div>
 8 | 
 9 | # checkpointer.Checkpointer
10 | 
11 | ## Class `Checkpointer`
12 | 
13 | Class for managing checkpoints for Dopamine agents.
14 | 
15 | ## Methods
16 | 
17 | <h3 id="__init__"><code>__init__</code></h3>
18 | 
19 | ```python
20 | __init__(
21 |     base_directory,
22 |     checkpoint_file_prefix='ckpt',
23 |     checkpoint_frequency=1
24 | )
25 | ```
26 | 
27 | Initializes Checkpointer.
28 | 
29 | #### Args:
30 | 
31 | *   <b>`base_directory`</b>: str, directory where all checkpoints are
32 |     saved/loaded.
33 | *   <b>`checkpoint_file_prefix`</b>: str, prefix to use for naming checkpoint
34 |     files.
35 | *   <b>`checkpoint_frequency`</b>: int, the frequency at which to checkpoint.
36 | 
37 | #### Raises:
38 | 
39 | *   <b>`ValueError`</b>: if base_directory is empty, or not creatable.
40 | 
41 | <h3 id="load_checkpoint"><code>load_checkpoint</code></h3>
42 | 
43 | ```python
44 | load_checkpoint(iteration_number)
45 | ```
46 | 
47 | Tries to reload a checkpoint at the selected iteration number.
48 | 
49 | #### Args:
50 | 
51 | *   <b>`iteration_number`</b>: The checkpoint iteration number to try to load.
52 | 
53 | #### Returns:
54 | 
55 | If the checkpoint files exist, two unpickled objects that were passed in as data
56 | to save_checkpoint; returns None if the files do not exist.
57 | 
58 | <h3 id="save_checkpoint"><code>save_checkpoint</code></h3>
59 | 
60 | ```python
61 | save_checkpoint(
62 |     iteration_number,
63 |     data
64 | )
65 | ```
66 | 
67 | Saves a new checkpoint at the current iteration_number.
68 | 
69 | #### Args:
70 | 
71 | *   <b>`iteration_number`</b>: int, the current iteration number for this
72 |     checkpoint.
73 | *   <b>`data`</b>: Any (picklable) python object containing the data to store in
74 |     the checkpoint.
75 | 


--------------------------------------------------------------------------------
/docs/api_docs/python/rainbow_agent/project_distribution.md:
--------------------------------------------------------------------------------
 1 | <div itemscope itemtype="http://developers.google.com/ReferenceObject">
 2 | <meta itemprop="name" content="rainbow_agent.project_distribution" />
 3 | <meta itemprop="path" content="stable" />
 4 | </div>
 5 | 
 6 | # rainbow_agent.project_distribution
 7 | 
 8 | ```python
 9 | rainbow_agent.project_distribution(
10 |     supports,
11 |     weights,
12 |     target_support,
13 |     validate_args=False
14 | )
15 | ```
16 | 
17 | Projects a batch of (support, weights) onto target_support.
18 | 
19 | Based on equation (7) in (Bellemare et al., 2017):
20 | https://arxiv.org/abs/1707.06887 In the rest of the comments we will refer to
21 | this equation simply as Eq7.
22 | 
23 | This code is not easy to digest, so we will use a running example to clarify
24 | what is going on, with the following sample inputs:
25 | 
26 | *   supports = [[0, 2, 4, 6, 8], [1, 3, 4, 5, 6]]
27 | *   weights = [[0.1, 0.6, 0.1, 0.1, 0.1], [0.1, 0.2, 0.5, 0.1, 0.1]]
28 | *   target_support = [4, 5, 6, 7, 8]
29 | 
30 | In the code below, comments preceded with 'Ex:' will be referencing the above
31 | values.
32 | 
33 | #### Args:
34 | 
35 | *   <b>`supports`</b>: Tensor of shape (batch_size, num_dims) defining supports
36 |     for the distribution.
37 | *   <b>`weights`</b>: Tensor of shape (batch_size, num_dims) defining weights on
38 |     the original support points. Although for the CategoricalDQN agent these
39 |     weights are probabilities, it is not required that they are.
40 | *   <b>`target_support`</b>: Tensor of shape (num_dims) defining support of the
41 |     projected distribution. The values must be monotonically increasing. Vmin
42 |     and Vmax will be inferred from the first and last elements of this tensor,
43 |     respectively. The values in this tensor must be equally spaced.
44 | *   <b>`validate_args`</b>: Whether we will verify the contents of the
45 |     target_support parameter.
46 | 
47 | #### Returns:
48 | 
49 | A Tensor of shape (batch_size, num_dims) with the projection of a batch of
50 | (support, weights) onto target_support.
51 | 
52 | #### Raises:
53 | 
54 | *   <b>`ValueError`</b>: If target_support has no dimensions, or if shapes of
55 |     supports, weights, and target_support are incompatible.
56 | 


--------------------------------------------------------------------------------
/docs/api_docs/python/run_experiment/Runner.md:
--------------------------------------------------------------------------------
 1 | <div itemscope itemtype="http://developers.google.com/ReferenceObject">
 2 | <meta itemprop="name" content="run_experiment.Runner" />
 3 | <meta itemprop="path" content="stable" />
 4 | <meta itemprop="property" content="__init__"/>
 5 | <meta itemprop="property" content="run_experiment"/>
 6 | </div>
 7 | 
 8 | # run_experiment.Runner
 9 | 
10 | ## Class `Runner`
11 | 
12 | Object that handles running Atari 2600 experiments.
13 | 
14 | Here we use the term 'experiment' to mean simulating interactions between the
15 | agent and the environment and reporting some statistics pertaining to these
16 | interactions.
17 | 
18 | A simple scenario to train a DQN agent is as follows:
19 | 
20 | ```python
21 | base_dir = '/tmp/simple_example'
22 | def create_agent(sess, environment):
23 |   return dqn_agent.DQNAgent(sess, num_actions=environment.action_space.n)
24 | runner = Runner(base_dir, create_agent, game_name='Pong')
25 | runner.run()
26 | ```
27 | 
28 | ## Methods
29 | 
30 | <h3 id="__init__"><code>__init__</code></h3>
31 | 
32 | ```python
33 | __init__(
34 |     *args,
35 |     **kwargs
36 | )
37 | ```
38 | 
39 | Initialize the Runner object in charge of running a full experiment.
40 | 
41 | #### Args:
42 | 
43 | *   <b>`base_dir`</b>: str, the base directory to host all required
44 |     sub-directories.
45 | *   <b>`create_agent_fn`</b>: A function that takes as args a Tensorflow session
46 |     and an Atari 2600 Gym environment, and returns an agent.
47 | *   <b>`create_environment_fn`</b>: A function which receives a game name and
48 |     creates an Atari 2600 Gym environment.
49 | *   <b>`game_name`</b>: str, name of the Atari 2600 domain to run (required).
50 | *   <b>`sticky_actions`</b>: bool, whether to enable sticky actions in the
51 |     environment.
52 | *   <b>`checkpoint_file_prefix`</b>: str, the prefix to use for checkpoint
53 |     files.
54 | *   <b>`logging_file_prefix`</b>: str, prefix to use for the log files.
55 | *   <b>`log_every_n`</b>: int, the frequency for writing logs.
56 | *   <b>`num_iterations`</b>: int, the iteration number threshold (must be
57 |     greater than start_iteration).
58 | *   <b>`training_steps`</b>: int, the number of training steps to perform.
59 | *   <b>`evaluation_steps`</b>: int, the number of evaluation steps to perform.
60 | *   <b>`max_steps_per_episode`</b>: int, maximum number of steps after which an
61 |     episode terminates.
62 | 
63 | This constructor will take the following actions: - Initialize an environment. -
64 | Initialize a `tf.Session`. - Initialize a logger. - Initialize an agent. -
65 | Reload from the latest checkpoint, if available, and initialize the Checkpointer
66 | object.
67 | 
68 | <h3 id="run_experiment"><code>run_experiment</code></h3>
69 | 
70 | ```python
71 | run_experiment()
72 | ```
73 | 
74 | Runs a full experiment, spread over multiple iterations.
75 | 


--------------------------------------------------------------------------------
/tests/common/iteration_statistics_test.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2018 The Dopamine Authors.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #      http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | """Tests for dopamine.common.iteration_statistics."""
15 | 
16 | from __future__ import absolute_import
17 | from __future__ import division
18 | from __future__ import print_function
19 | 
20 | 
21 | 
22 | from dopamine.common import iteration_statistics
23 | import tensorflow as tf
24 | 
25 | 
26 | class IterationStatisticsTest(tf.test.TestCase):
27 | 
28 |   def setUp(self):
29 |     pass
30 | 
31 |   def testMissingValue(self):
32 |     statistics = iteration_statistics.IterationStatistics()
33 |     with self.assertRaises(KeyError):
34 |       _ = statistics.data_lists['missing_key']
35 | 
36 |   def testAddOneValue(self):
37 |     statistics = iteration_statistics.IterationStatistics()
38 | 
39 |     # The statistics data structure should be empty a-priori.
40 |     self.assertEqual(0, len(statistics.data_lists))
41 | 
42 |     statistics.append({'key1': 0})
43 |     # We should have exactly one list, containing one value.
44 |     self.assertEqual(1, len(statistics.data_lists))
45 |     self.assertEqual(1, len(statistics.data_lists['key1']))
46 |     self.assertEqual(0, statistics.data_lists['key1'][0])
47 | 
48 |   def testAddManyValues(self):
49 |     my_pi = 3.14159
50 | 
51 |     statistics = iteration_statistics.IterationStatistics()
52 | 
53 |     # Add a number of items. Each item is added to the list corresponding to its
54 |     # given key.
55 |     statistics.append({'rewards': 0,
56 |                        'nouns': 'reinforcement',
57 |                        'angles': my_pi})
58 |     # Add a second item to the 'nouns' list.
59 |     statistics.append({'nouns': 'learning'})
60 | 
61 |     # There are three lists.
62 |     self.assertEqual(3, len(statistics.data_lists))
63 |     self.assertEqual(1, len(statistics.data_lists['rewards']))
64 |     self.assertEqual(2, len(statistics.data_lists['nouns']))
65 |     self.assertEqual(1, len(statistics.data_lists['angles']))
66 | 
67 |     self.assertEqual(0, statistics.data_lists['rewards'][0])
68 |     self.assertEqual('reinforcement', statistics.data_lists['nouns'][0])
69 |     self.assertEqual('learning', statistics.data_lists['nouns'][1])
70 |     self.assertEqual(my_pi, statistics.data_lists['angles'][0])
71 | 
72 | if __name__ == '__main__':
73 |   tf.test.main()
74 | 


--------------------------------------------------------------------------------
/tests/train_runner_integration_test.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2018 The Dopamine Authors.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #      http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | """End to end tests for TrainRunner."""
15 | 
16 | import datetime
17 | import os
18 | import shutil
19 | 
20 | 
21 | 
22 | from absl import flags
23 | 
24 | from dopamine.atari import train
25 | import tensorflow as tf
26 | 
27 | FLAGS = flags.FLAGS
28 | 
29 | 
30 | class TrainRunnerIntegrationTest(tf.test.TestCase):
31 |   """Tests for Atari environment with various agents.
32 | 
33 |   """
34 | 
35 |   def setUp(self):
36 |     FLAGS.base_dir = os.path.join(
37 |         '/tmp/dopamine_tests',
38 |         datetime.datetime.utcnow().strftime('run_%Y_%m_%d_%H_%M_%S'))
39 |     self._checkpoint_dir = os.path.join(FLAGS.base_dir, 'checkpoints')
40 |     self._logging_dir = os.path.join(FLAGS.base_dir, 'logs')
41 |     FLAGS.schedule = 'continuous_train'
42 | 
43 |   def quickDqnFlags(self):
44 |     """Assign flags for a quick run of DQN agent."""
45 |     FLAGS.agent_name = 'dqn'
46 |     FLAGS.gin_files = ['dopamine/agents/dqn/configs/dqn.gin']
47 |     FLAGS.gin_bindings = [
48 |         'Runner.training_steps=100', 'Runner.evaluation_steps=10',
49 |         'Runner.num_iterations=1', 'Runner.max_steps_per_episode=100',
50 |         'dqn_agent.DQNAgent.min_replay_history=500',
51 |         'WrappedReplayBuffer.replay_capacity=100'
52 |     ]
53 |     FLAGS.alsologtostderr = True
54 | 
55 |   def verifyFilesCreated(self, base_dir):
56 |     """Verify that files have been created."""
57 |     # Check checkpoint files
58 |     self.assertTrue(
59 |         os.path.exists(os.path.join(self._checkpoint_dir, 'ckpt.0')))
60 |     self.assertTrue(
61 |         os.path.exists(os.path.join(self._checkpoint_dir, 'checkpoint')))
62 |     self.assertTrue(
63 |         os.path.exists(
64 |             os.path.join(self._checkpoint_dir,
65 |                          'sentinel_checkpoint_complete.0')))
66 |     # Check log files
67 |     self.assertTrue(os.path.exists(os.path.join(self._logging_dir, 'log_0')))
68 | 
69 |   def testIntegrationDqn(self):
70 |     """Test the DQN agent."""
71 |     tf.logging.info('####### Training the DQN agent #####')
72 |     tf.logging.info('####### DQN base_dir: {}'.format(FLAGS.base_dir))
73 |     self.quickDqnFlags()
74 |     train.main([])
75 |     self.verifyFilesCreated(FLAGS.base_dir)
76 |     shutil.rmtree(FLAGS.base_dir)
77 | 
78 | 
79 | if __name__ == '__main__':
80 |   tf.test.main()
81 | 


--------------------------------------------------------------------------------
/docs/api_docs/python/index.md:
--------------------------------------------------------------------------------
 1 | # All symbols in Dopamine
 2 | 
 3 | *   <a href="./checkpointer.md"><code>checkpointer</code></a>
 4 | *   <a href="./checkpointer/Checkpointer.md"><code>checkpointer.Checkpointer</code></a>
 5 | *   <a href="./circular_replay_buffer.md"><code>circular_replay_buffer</code></a>
 6 | *   <a href="./circular_replay_buffer/OutOfGraphReplayBuffer.md"><code>circular_replay_buffer.OutOfGraphReplayBuffer</code></a>
 7 | *   <a href="./circular_replay_buffer/WrappedReplayBuffer.md"><code>circular_replay_buffer.WrappedReplayBuffer</code></a>
 8 | *   <a href="./dqn_agent.md"><code>dqn_agent</code></a>
 9 | *   <a href="./dqn_agent/DQNAgent.md"><code>dqn_agent.DQNAgent</code></a>
10 | *   <a href="./implicit_quantile_agent.md"><code>implicit_quantile_agent</code></a>
11 | *   <a href="./implicit_quantile_agent/ImplicitQuantileAgent.md"><code>implicit_quantile_agent.ImplicitQuantileAgent</code></a>
12 | *   <a href="./iteration_statistics.md"><code>iteration_statistics</code></a>
13 | *   <a href="./iteration_statistics/IterationStatistics.md"><code>iteration_statistics.IterationStatistics</code></a>
14 | *   <a href="./logger.md"><code>logger</code></a>
15 | *   <a href="./logger/Logger.md"><code>logger.Logger</code></a>
16 | *   <a href="./prioritized_replay_buffer.md"><code>prioritized_replay_buffer</code></a>
17 | *   <a href="./prioritized_replay_buffer/OutOfGraphPrioritizedReplayBuffer.md"><code>prioritized_replay_buffer.OutOfGraphPrioritizedReplayBuffer</code></a>
18 | *   <a href="./prioritized_replay_buffer/WrappedPrioritizedReplayBuffer.md"><code>prioritized_replay_buffer.WrappedPrioritizedReplayBuffer</code></a>
19 | *   <a href="./rainbow_agent.md"><code>rainbow_agent</code></a>
20 | *   <a href="./rainbow_agent/RainbowAgent.md"><code>rainbow_agent.RainbowAgent</code></a>
21 | *   <a href="./rainbow_agent/project_distribution.md"><code>rainbow_agent.project_distribution</code></a>
22 | *   <a href="./run_experiment.md"><code>run_experiment</code></a>
23 | *   <a href="./run_experiment/Runner.md"><code>run_experiment.Runner</code></a>
24 | *   <a href="./run_experiment/TrainRunner.md"><code>run_experiment.TrainRunner</code></a>
25 | *   <a href="./train.md"><code>train</code></a>
26 | *   <a href="./train/create_agent.md"><code>train.create_agent</code></a>
27 | *   <a href="./train/create_runner.md"><code>train.create_runner</code></a>
28 | *   <a href="./train/launch_experiment.md"><code>train.launch_experiment</code></a>
29 | *   <a href="./utils.md"><code>utils</code></a>
30 | *   <a href="./utils/get_latest_file.md"><code>utils.get_latest_file</code></a>
31 | *   <a href="./utils/get_latest_iteration.md"><code>utils.get_latest_iteration</code></a>
32 | *   <a href="./utils/load_baselines.md"><code>utils.load_baselines</code></a>
33 | *   <a href="./utils/load_statistics.md"><code>utils.load_statistics</code></a>
34 | *   <a href="./utils/read_experiment.md"><code>utils.read_experiment</code></a>
35 | *   <a href="./utils/summarize_data.md"><code>utils.summarize_data</code></a>
36 | 


--------------------------------------------------------------------------------
/baselines/README.md:
--------------------------------------------------------------------------------
 1 | # Baseline data
 2 | 
 3 | This directory provides information about the baseline data provided by
 4 | Dopamine. The default hyperparameter configuration for the 4 agents we are
 5 | providing yields a standardized "apples to apples" comparison between them.
 6 | 
 7 | The default configuration files files for each agent (set up with
 8 | [gin configuration framework](https://github.com/google/gin-config)) are:
 9 | 
10 | *   [`dopamine/agents/dqn/configs/dqn.gin`](https://github.com/google/dopamine/blob/master/dopamine/agents/dqn/configs/dqn.gin)
11 | *   [`dopamine/agents/rainbow/configs/c51.gin`](https://github.com/google/dopamine/blob/master/dopamine/agents/rainbow/configs/c51.gin)
12 | *   [`dopamine/agents/rainbow/configs/rainbow.gin`](https://github.com/google/dopamine/blob/master/dopamine/agents/rainbow/configs/rainbow.gin)
13 | *   [`dopamine/agents/implicit_quantile/configs/implicit_quantile.gin`](https://github.com/google/dopamine/blob/master/dopamine/agents/implicit_quantile/configs/implicit_quantile.gin)
14 | 
15 | ## Hyperparemeter comparison
16 | Our results compare the agents with the same hyperparameters: target
17 | network update frequency, frequency at which exploratory actions are selected (ε), the
18 | length of the schedule over which ε is annealed, and the number of agent steps
19 | before training occurs. Changing these parameters can significantly affect
20 | performance, without necessarily being indicative of an algorithmic difference.
21 | Unsurprisingly, DQN performs much better when trained with 1% of exploratory
22 | actions instead of 10% (as used in the original Nature paper). Step size and
23 | optimizer were taken as published. The table below summarizes our choices. All
24 | numbers are in ALE frames.
25 | 
26 | |                                     | Our baseline results | [DQN][dqn]       | [C51][c51]       | [Rainbow][rainbow] | [IQN][iqn]       |
27 | | :---------------------------------- | :------------------: | :--------:       | :--------:       | :----------------: | :--------:       |
28 | | **Training ε**                      | 0.01                 | 0.1              | 0.01             | 0.01               | 0.01             |
29 | | **Evaluation ε**                    | 0.001                | 0.01             | 0.001            | *                  | 0.001            |
30 | | **ε decay schedule**                | 1,000,000 frames     | 4,000,000 frames | 4,000,000 frames | 1,000,000 frames   | 4,000,000 frames |
31 | | **Min. history to start learning**  | 80,000 frames        | 200,000 frames   | 200,000 frames   | 80,000 frames      | 200,000 frames   |
32 | | **Target network update frequency** | 32,000 frames        | 40,000 frames    | 40,000 frames    | 32,000 frames      | 40,000 frames    |
33 | 
34 | ## Visualization
35 | We provide a [website](https://google.github.io/dopamine/baselines/plots.html)
36 | where you can quickly visualize the training runs for all our default agents.
37 | 
38 | The plots are rendered from a set of
39 | [JSON files](https://github.com/google/dopamine/tree/master/baselines/data)
40 | which we compiled. These may prove useful in their own right to compare
41 | against results obtained from other frameworks.
42 | 
43 | 
44 | [dqn]: https://storage.googleapis.com/deepmind-media/dqn/DQNNaturePaper.pdf
45 | [c51]: https://arxiv.org/abs/1707.06887
46 | [rainbow]: https://arxiv.org/abs/1710.02298
47 | [iqn]: https://arxiv.org/abs/1806.06923
48 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2018 The Dopamine Authors.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #      http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | """Setup script for Dopamine.
15 | 
16 | This script will install Dopamine as a Python module.
17 | 
18 | See: https://github.com/google/dopamine
19 | 
20 | """
21 | 
22 | import codecs
23 | from os import path
24 | from setuptools import find_packages
25 | from setuptools import setup
26 | 
27 | here = path.abspath(path.dirname(__file__))
28 | 
29 | # Get the long description from the README file.
30 | with codecs.open(path.join(here, 'README.md'), encoding='utf-8') as f:
31 |   long_description = f.read()
32 | 
33 | install_requires = ['gin-config >= 0.1.1', 'absl-py >= 0.2.2',
34 |                     'tensorflow', 'opencv-python >= 3.4.1.15',
35 |                     'gym >= 0.10.5']
36 | tests_require = ['gin-config >= 0.1.1', 'absl-py >= 0.2.2',
37 |                  'tensorflow >= 1.9.0', 'opencv-python >= 3.4.1.15',
38 |                  'gym >= 0.10.5', 'mock >= 1.0.0']
39 | 
40 | dopamine_description = (
41 |     'Dopamine: A framework for flexible Reinforcement Learning research')
42 | 
43 | setup(
44 |     name='dopamine_rl',
45 |     version='1.0.2',
46 |     include_package_data=True,
47 |     packages=find_packages(exclude=['docs']),  # Required
48 |     package_data={'testdata': ['testdata/*.gin']},
49 |     install_requires=install_requires,
50 |     tests_require=tests_require,
51 |     description=dopamine_description,
52 |     long_description=long_description,
53 |     url='https://github.com/google/dopamine',  # Optional
54 |     author='The Dopamine Team',  # Optional
55 |     author_email='opensource@google.com',
56 |     classifiers=[  # Optional
57 |         'Development Status :: 4 - Beta',
58 | 
59 |         # Indicate who your project is intended for
60 |         'Intended Audience :: Developers',
61 |         'Intended Audience :: Education',
62 |         'Intended Audience :: Science/Research',
63 | 
64 |         # Pick your license as you wish
65 |         'License :: OSI Approved :: Apache Software License',
66 | 
67 |         # Specify the Python versions you support here. In particular, ensure
68 |         # that you indicate whether you support Python 2, Python 3 or both.
69 |         'Programming Language :: Python :: 2',
70 |         'Programming Language :: Python :: 2.7',
71 |         'Programming Language :: Python :: 3',
72 |         'Programming Language :: Python :: 3.4',
73 |         'Programming Language :: Python :: 3.5',
74 |         'Programming Language :: Python :: 3.6',
75 | 
76 |         'Topic :: Scientific/Engineering',
77 |         'Topic :: Scientific/Engineering :: Mathematics',
78 |         'Topic :: Scientific/Engineering :: Artificial Intelligence',
79 |         'Topic :: Software Development',
80 |         'Topic :: Software Development :: Libraries',
81 |         'Topic :: Software Development :: Libraries :: Python Modules',
82 | 
83 |     ],
84 |     project_urls={  # Optional
85 |         'Documentation': 'https://github.com/google/dopamine',
86 |         'Bug Reports': 'https://github.com/google/dopamine/issues',
87 |         'Source': 'https://github.com/google/dopamine',
88 |     },
89 |     license='Apache 2.0',
90 |     keywords='dopamine reinforcement-learning python machine learning'
91 | )
92 | 


--------------------------------------------------------------------------------
/docs/api_docs/python/_toc.yaml:
--------------------------------------------------------------------------------
 1 | # Automatically generated file; please do not edit
 2 | toc:
 3 |   - title: checkpointer
 4 |     section:
 5 |     - title: Overview
 6 |       path: /dopamine/api_docs/python/checkpointer
 7 |     - title: Checkpointer
 8 |       path: /dopamine/api_docs/python/checkpointer/Checkpointer
 9 |   - title: circular_replay_buffer
10 |     section:
11 |     - title: Overview
12 |       path: /dopamine/api_docs/python/circular_replay_buffer
13 |     - title: OutOfGraphReplayBuffer
14 |       path: /dopamine/api_docs/python/circular_replay_buffer/OutOfGraphReplayBuffer
15 |     - title: WrappedReplayBuffer
16 |       path: /dopamine/api_docs/python/circular_replay_buffer/WrappedReplayBuffer
17 |   - title: dqn_agent
18 |     section:
19 |     - title: Overview
20 |       path: /dopamine/api_docs/python/dqn_agent
21 |     - title: DQNAgent
22 |       path: /dopamine/api_docs/python/dqn_agent/DQNAgent
23 |   - title: implicit_quantile_agent
24 |     section:
25 |     - title: Overview
26 |       path: /dopamine/api_docs/python/implicit_quantile_agent
27 |     - title: ImplicitQuantileAgent
28 |       path: /dopamine/api_docs/python/implicit_quantile_agent/ImplicitQuantileAgent
29 |   - title: iteration_statistics
30 |     section:
31 |     - title: Overview
32 |       path: /dopamine/api_docs/python/iteration_statistics
33 |     - title: IterationStatistics
34 |       path: /dopamine/api_docs/python/iteration_statistics/IterationStatistics
35 |   - title: logger
36 |     section:
37 |     - title: Overview
38 |       path: /dopamine/api_docs/python/logger
39 |     - title: Logger
40 |       path: /dopamine/api_docs/python/logger/Logger
41 |   - title: prioritized_replay_buffer
42 |     section:
43 |     - title: Overview
44 |       path: /dopamine/api_docs/python/prioritized_replay_buffer
45 |     - title: OutOfGraphPrioritizedReplayBuffer
46 |       path: /dopamine/api_docs/python/prioritized_replay_buffer/OutOfGraphPrioritizedReplayBuffer
47 |     - title: WrappedPrioritizedReplayBuffer
48 |       path: /dopamine/api_docs/python/prioritized_replay_buffer/WrappedPrioritizedReplayBuffer
49 |   - title: rainbow_agent
50 |     section:
51 |     - title: Overview
52 |       path: /dopamine/api_docs/python/rainbow_agent
53 |     - title: project_distribution
54 |       path: /dopamine/api_docs/python/rainbow_agent/project_distribution
55 |     - title: RainbowAgent
56 |       path: /dopamine/api_docs/python/rainbow_agent/RainbowAgent
57 |   - title: run_experiment
58 |     section:
59 |     - title: Overview
60 |       path: /dopamine/api_docs/python/run_experiment
61 |     - title: Runner
62 |       path: /dopamine/api_docs/python/run_experiment/Runner
63 |     - title: TrainRunner
64 |       path: /dopamine/api_docs/python/run_experiment/TrainRunner
65 |   - title: train
66 |     section:
67 |     - title: Overview
68 |       path: /dopamine/api_docs/python/train
69 |     - title: create_agent
70 |       path: /dopamine/api_docs/python/train/create_agent
71 |     - title: create_runner
72 |       path: /dopamine/api_docs/python/train/create_runner
73 |     - title: launch_experiment
74 |       path: /dopamine/api_docs/python/train/launch_experiment
75 |   - title: utils
76 |     section:
77 |     - title: Overview
78 |       path: /dopamine/api_docs/python/utils
79 |     - title: get_latest_file
80 |       path: /dopamine/api_docs/python/utils/get_latest_file
81 |     - title: get_latest_iteration
82 |       path: /dopamine/api_docs/python/utils/get_latest_iteration
83 |     - title: load_baselines
84 |       path: /dopamine/api_docs/python/utils/load_baselines
85 |     - title: load_statistics
86 |       path: /dopamine/api_docs/python/utils/load_statistics
87 |     - title: read_experiment
88 |       path: /dopamine/api_docs/python/utils/read_experiment
89 |     - title: summarize_data
90 |       path: /dopamine/api_docs/python/utils/summarize_data
91 | 


--------------------------------------------------------------------------------
/dopamine/common/logger.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2018 The Dopamine Authors.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #      http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | """A lightweight logging mechanism for dopamine agents."""
 15 | 
 16 | from __future__ import absolute_import
 17 | from __future__ import division
 18 | from __future__ import print_function
 19 | 
 20 | import os
 21 | import pickle
 22 | import tensorflow as tf
 23 | 
 24 | 
 25 | CHECKPOINT_DURATION = 4
 26 | 
 27 | 
 28 | class Logger(object):
 29 |   """Class for maintaining a dictionary of data to log."""
 30 | 
 31 |   def __init__(self, logging_dir):
 32 |     """Initializes Logger.
 33 | 
 34 |     Args:
 35 |       logging_dir: str, Directory to which logs are written.
 36 |     """
 37 |     # Dict used by logger to store data.
 38 |     self.data = {}
 39 |     self._logging_enabled = True
 40 | 
 41 |     if not logging_dir:
 42 |       tf.logging.info('Logging directory not specified, will not log.')
 43 |       self._logging_enabled = False
 44 |       return
 45 |     # Try to create logging directory.
 46 |     try:
 47 |       tf.gfile.MakeDirs(logging_dir)
 48 |     except tf.errors.PermissionDeniedError:
 49 |       # If it already exists, ignore exception.
 50 |       pass
 51 |     if not tf.gfile.Exists(logging_dir):
 52 |       tf.logging.warning(
 53 |           'Could not create directory %s, logging will be disabled.',
 54 |           logging_dir)
 55 |       self._logging_enabled = False
 56 |       return
 57 |     self._logging_dir = logging_dir
 58 | 
 59 |   def __setitem__(self, key, value):
 60 |     """This method will set an entry at key with value in the dictionary.
 61 | 
 62 |     It will effectively overwrite any previous data at the same key.
 63 | 
 64 |     Args:
 65 |       key: str, indicating key where to write the entry.
 66 |       value: A python object to store.
 67 |     """
 68 |     if self._logging_enabled:
 69 |       self.data[key] = value
 70 | 
 71 |   def _generate_filename(self, filename_prefix, iteration_number):
 72 |     filename = '{}_{}'.format(filename_prefix, iteration_number)
 73 |     return os.path.join(self._logging_dir, filename)
 74 | 
 75 |   def log_to_file(self, filename_prefix, iteration_number):
 76 |     """Save the pickled dictionary to a file.
 77 | 
 78 |     Args:
 79 |       filename_prefix: str, name of the file to use (without iteration
 80 |         number).
 81 |       iteration_number: int, the iteration number, appended to the end of
 82 |         filename_prefix.
 83 |     """
 84 |     if not self._logging_enabled:
 85 |       tf.logging.warning('Logging is disabled.')
 86 |       return
 87 |     log_file = self._generate_filename(filename_prefix, iteration_number)
 88 |     with tf.gfile.GFile(log_file, 'w') as fout:
 89 |       pickle.dump(self.data, fout, protocol=pickle.HIGHEST_PROTOCOL)
 90 |     # After writing a checkpoint file, we garbage collect the log file
 91 |     # that is CHECKPOINT_DURATION versions old.
 92 |     stale_iteration_number = iteration_number - CHECKPOINT_DURATION
 93 |     if stale_iteration_number >= 0:
 94 |       stale_file = self._generate_filename(filename_prefix,
 95 |                                            stale_iteration_number)
 96 |       try:
 97 |         tf.gfile.Remove(stale_file)
 98 |       except tf.errors.NotFoundError:
 99 |         # Ignore if file not found.
100 |         pass
101 | 
102 |   def is_logging_enabled(self):
103 |     """Return if logging is enabled."""
104 |     return self._logging_enabled
105 | 


--------------------------------------------------------------------------------
/tests/atari/preprocessing_test.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2018 The Dopamine Authors.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #      http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | """Tests for dopamine.atari.run_experiment."""
 15 | 
 16 | from __future__ import absolute_import
 17 | from __future__ import division
 18 | from __future__ import print_function
 19 | 
 20 | 
 21 | 
 22 | from absl import flags
 23 | from dopamine.atari import preprocessing
 24 | import numpy as np
 25 | import tensorflow as tf
 26 | 
 27 | FLAGS = flags.FLAGS
 28 | 
 29 | 
 30 | class MockALE(object):
 31 |   """Mock internal ALE for testing."""
 32 | 
 33 |   def __init__(self):
 34 |     pass
 35 | 
 36 |   def lives(self):
 37 |     return 1
 38 | 
 39 |   def getScreenGrayscale(self, screen):  # pylint: disable=invalid-name
 40 |     screen.fill(self.screen_value)
 41 | 
 42 | 
 43 | class MockEnvironment(object):
 44 |   """Mock environment for testing."""
 45 | 
 46 |   def __init__(self, screen_size=10, max_steps=10):
 47 |     self.max_steps = max_steps
 48 |     self.screen_size = screen_size
 49 |     self.ale = MockALE()
 50 |     self.observation_space = np.empty((screen_size, screen_size))
 51 |     self.game_over = False
 52 | 
 53 |   def reset(self):
 54 |     self.ale.screen_value = 10
 55 |     self.num_steps = 0
 56 |     return self.get_observation()
 57 | 
 58 |   def get_observation(self):
 59 |     observation = np.empty((self.screen_size, self.screen_size))
 60 |     return self.ale.getScreenGrayscale(observation)
 61 | 
 62 |   def step(self, action):
 63 |     reward = -1. if action > 0 else 1.
 64 |     self.num_steps += 1
 65 |     is_terminal = self.num_steps >= self.max_steps
 66 | 
 67 |     unused = 0
 68 |     self.ale.screen_value -= 2
 69 |     return (self.get_observation(), reward, is_terminal, unused)
 70 | 
 71 |   def render(self, mode):
 72 |     pass
 73 | 
 74 | 
 75 | class AtariPreprocessingTest(tf.test.TestCase):
 76 | 
 77 |   def testResetPassesObservation(self):
 78 |     env = MockEnvironment()
 79 |     env = preprocessing.AtariPreprocessing(env, frame_skip=1, screen_size=16)
 80 |     observation = env.reset()
 81 | 
 82 |     self.assertEqual(observation.shape, (16, 16, 1))
 83 | 
 84 |   def testTerminalPassedThrough(self):
 85 |     max_steps = 10
 86 |     env = MockEnvironment(max_steps=max_steps)
 87 |     env = preprocessing.AtariPreprocessing(env, frame_skip=1)
 88 |     env.reset()
 89 | 
 90 |     # Make sure we get the right number of steps.
 91 |     for _ in range(max_steps - 1):
 92 |       _, _, is_terminal, _ = env.step(0)
 93 |       self.assertFalse(is_terminal)
 94 | 
 95 |     _, _, is_terminal, _ = env.step(0)
 96 |     self.assertTrue(is_terminal)
 97 | 
 98 |   def testFrameSkipAccumulatesReward(self):
 99 |     frame_skip = 2
100 |     env = MockEnvironment()
101 |     env = preprocessing.AtariPreprocessing(env, frame_skip=frame_skip)
102 |     env.reset()
103 | 
104 |     # Make sure we get the right number of steps. Reward is 1 when we
105 |     # pass in action 0.
106 |     _, reward, _, _ = env.step(0)
107 |     self.assertEqual(reward, frame_skip)
108 | 
109 |   def testMaxFramePooling(self):
110 |     frame_skip = 2
111 |     env = MockEnvironment()
112 |     env = preprocessing.AtariPreprocessing(env, frame_skip=frame_skip)
113 |     env.reset()
114 | 
115 |     # The first observation is 2, the second 0; max is 2.
116 |     observation, _, _, _ = env.step(0)
117 |     self.assertTrue((observation == 8).all())
118 | 
119 | if __name__ == '__main__':
120 |   tf.test.main()
121 | 


--------------------------------------------------------------------------------
/tests/integration_test.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2018 The Dopamine Authors.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #      http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | """End to end integration tests for Dopamine package."""
 15 | 
 16 | import datetime
 17 | import os
 18 | import shutil
 19 | 
 20 | 
 21 | 
 22 | from absl import flags
 23 | from dopamine.atari import train
 24 | import tensorflow as tf
 25 | 
 26 | import gin.tf
 27 | 
 28 | 
 29 | FLAGS = flags.FLAGS
 30 | 
 31 | 
 32 | class AtariIntegrationTest(tf.test.TestCase):
 33 |   """Tests for Atari environment with various agents.
 34 | 
 35 |   """
 36 | 
 37 |   def setUp(self):
 38 |     FLAGS.base_dir = os.path.join(
 39 |         '/tmp/dopamine_tests',
 40 |         datetime.datetime.utcnow().strftime('run_%Y_%m_%d_%H_%M_%S'))
 41 |     self._checkpoint_dir = os.path.join(FLAGS.base_dir, 'checkpoints')
 42 |     self._logging_dir = os.path.join(FLAGS.base_dir, 'logs')
 43 |     FLAGS.alsologtostderr = True
 44 |     gin.clear_config()
 45 | 
 46 |   def quickDqnFlags(self):
 47 |     """Assign flags for a quick run of DQNAgent."""
 48 |     FLAGS.agent_name = 'dqn'
 49 |     FLAGS.gin_files = ['dopamine/agents/dqn/configs/dqn.gin']
 50 |     FLAGS.gin_bindings = [
 51 |         'Runner.training_steps=100', 'Runner.evaluation_steps=10',
 52 |         'Runner.num_iterations=1', 'Runner.max_steps_per_episode=100',
 53 |         'dqn_agent.DQNAgent.min_replay_history=500',
 54 |         'WrappedReplayBuffer.replay_capacity=100'
 55 |     ]
 56 | 
 57 |   def quickRainbowFlags(self):
 58 |     """Assign flags for a quick run of RainbowAgent."""
 59 |     FLAGS.agent_name = 'rainbow'
 60 |     FLAGS.gin_files = [
 61 |         'dopamine/agents/rainbow/configs/rainbow.gin'
 62 |     ]
 63 |     FLAGS.gin_bindings = [
 64 |         'Runner.training_steps=100', 'Runner.evaluation_steps=10',
 65 |         'Runner.num_iterations=1', 'Runner.max_steps_per_episode=100',
 66 |         "rainbow_agent.RainbowAgent.replay_scheme='prioritized'",
 67 |         'rainbow_agent.RainbowAgent.min_replay_history=500',
 68 |         'WrappedReplayBuffer.replay_capacity=100'
 69 |     ]
 70 | 
 71 |   def verifyFilesCreated(self, base_dir):
 72 |     """Verify that files have been created."""
 73 |     # Check checkpoint files
 74 |     self.assertTrue(
 75 |         os.path.exists(os.path.join(self._checkpoint_dir, 'ckpt.0')))
 76 |     self.assertTrue(
 77 |         os.path.exists(os.path.join(self._checkpoint_dir, 'checkpoint')))
 78 |     self.assertTrue(
 79 |         os.path.exists(
 80 |             os.path.join(self._checkpoint_dir,
 81 |                          'sentinel_checkpoint_complete.0')))
 82 |     # Check log files
 83 |     self.assertTrue(os.path.exists(os.path.join(self._logging_dir, 'log_0')))
 84 | 
 85 |   def testIntegrationDqn(self):
 86 |     """Test the DQN agent."""
 87 |     tf.logging.info('####### Training the DQN agent #####')
 88 |     tf.logging.info('####### DQN base_dir: {}'.format(FLAGS.base_dir))
 89 |     self.quickDqnFlags()
 90 |     train.main([])
 91 |     self.verifyFilesCreated(FLAGS.base_dir)
 92 |     shutil.rmtree(FLAGS.base_dir)
 93 | 
 94 |   def testIntegrationRainbow(self):
 95 |     """Test the rainbow agent."""
 96 |     tf.logging.info('####### Training the Rainbow agent #####')
 97 |     tf.logging.info('####### Rainbow base_dir: {}'.format(FLAGS.base_dir))
 98 |     self.quickRainbowFlags()
 99 |     train.main([])
100 |     self.verifyFilesCreated(FLAGS.base_dir)
101 |     shutil.rmtree(FLAGS.base_dir)
102 | 
103 | 
104 | if __name__ == '__main__':
105 |   tf.test.main()
106 | 


--------------------------------------------------------------------------------
/baselines/plots.html:
--------------------------------------------------------------------------------
  1 | <!--
  2 | Copyright 2018 The Dopamine Authors.
  3 | 
  4 | Licensed under the Apache License, Version 2.0 (the "License");
  5 | you may not use this file except in compliance with the License.
  6 | You may obtain a copy of the License at
  7 | 
  8 |      http://www.apache.org/licenses/LICENSE-2.0
  9 | 
 10 | Unless required by applicable law or agreed to in writing, software
 11 | distributed under the License is distributed on an "AS IS" BASIS,
 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 | See the License for the specific language governing permissions and
 14 | limitations under the License.
 15 | //-->
 16 | 
 17 | <!DOCTYPE html>
 18 | <head>
 19 |   <meta charset="utf-8">
 20 |   <script src="https://cdn.jsdelivr.net/npm/vega@3"></script>
 21 |   <script src="https://cdn.jsdelivr.net/npm/vega-lite@2"></script>
 22 |   <script src="https://cdn.jsdelivr.net/npm/vega-embed@3"></script>
 23 |   <script src="https://ajax.googleapis.com/ajax/libs/jquery/3.3.1/jquery.min.js"></script>
 24 |   <style>
 25 |     body {
 26 |       font-family: sans-serif;
 27 |     }
 28 | 
 29 |     .vega-actions a {
 30 |       padding: 0.2em;
 31 |     }
 32 |   </style>
 33 | </head>
 34 | 
 35 | <body>
 36 |   <h1>Baseline plots</h1>
 37 |   <p>This page provides a quick visualization of the training runs for all our default agents.</p>
 38 | 
 39 |   <p>See the <a href="https://github.com/google/dopamine/tree/master/baselines">baselines main page</a> for details on the hyperparameters used.</p>
 40 |   <hr>
 41 |   <select id="game" name="game_select" onchange="showGame(this)">
 42 |   </select>
 43 |   <div id="vega_div"></div>
 44 | </body>
 45 | 
 46 |   <script>
 47 |    var games = [
 48 |      "airraid",
 49 |      "alien",
 50 |      "amidar",
 51 |      "assault",
 52 |      "asterix",
 53 |      "asteroids",
 54 |      "atlantis",
 55 |      "bankheist",
 56 |      "battlezone",
 57 |      "beamrider",
 58 |      "berzerk",
 59 |      "bowling",
 60 |      "boxing",
 61 |      "breakout",
 62 |      "carnival",
 63 |      "centipede",
 64 |      "choppercommand",
 65 |      "crazyclimber",
 66 |      "demonattack",
 67 |      "doubledunk",
 68 |      "elevatoraction",
 69 |      "enduro",
 70 |      "fishingderby",
 71 |      "freeway",
 72 |      "frostbite",
 73 |      "gopher",
 74 |      "gravitar",
 75 |      "hero",
 76 |      "icehockey",
 77 |      "jamesbond",
 78 |      "journeyescape",
 79 |      "kangaroo",
 80 |      "krull",
 81 |      "kungfumaster",
 82 |      "montezumarevenge",
 83 |      "mspacman",
 84 |      "namethisgame",
 85 |      "phoenix",
 86 |      "pitfall",
 87 |      "pong",
 88 |      "pooyan",
 89 |      "privateeye",
 90 |      "qbert",
 91 |      "riverraid",
 92 |      "roadrunner",
 93 |      "robotank",
 94 |      "seaquest",
 95 |      "skiing",
 96 |      "solaris",
 97 |      "spaceinvaders",
 98 |      "stargunner",
 99 |      "tennis",
100 |      "timepilot",
101 |      "tutankham",
102 |      "upndown",
103 |      "venture",
104 |      "videopinball",
105 |      "wizardofwor",
106 |      "yarsrevenge",
107 |      "zaxxon",
108 |     ];
109 |     function showGame(elem) {
110 |       var selectedGame = elem.value;
111 |       var dataSource = "data/" + selectedGame + ".vg.json";
112 |       vegaEmbed("#" + selectedGame + "_vg", dataSource);
113 |       $(".outer_div").attr("style","display:none");
114 |       document.getElementById(elem.value).style.display = "block";
115 |     }
116 |     $(document).ready(function() {
117 |       console.log("Document loaded.")
118 |       // Add options to drop down.
119 |       var gameSelect = $("#game");
120 |       $.each(games, function(idx, game) {
121 |         gameSelect.append(
122 |             $("<option></option>").val(game).html(game)
123 |         );
124 |       });
125 |       // Lazily create the vega divs
126 |       var vegaDiv = $("#vega_div")
127 |       $.each(games, function(idx, game) {
128 |         var outerDiv = $("<div></div>").attr("id", game).attr("style", "display: none;").addClass("outer_div").appendTo(vegaDiv);
129 |         outerDiv.append($("<h1></h1>").text(game));
130 |         outerDiv.append($("<div></div>").attr("id", game + "_vg"));
131 |       });
132 |       showGame(document.querySelector("option[value=airraid]"));
133 |     });
134 |   </script>
135 | 


--------------------------------------------------------------------------------
/tests/common/logger_test.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2018 The Dopamine Authors.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #      http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | """Tests for dopamine.logger."""
 15 | 
 16 | from __future__ import absolute_import
 17 | from __future__ import division
 18 | from __future__ import print_function
 19 | 
 20 | import os
 21 | import pickle
 22 | import shutil
 23 | 
 24 | 
 25 | from absl import flags
 26 | from dopamine.common import logger
 27 | import tensorflow as tf
 28 | 
 29 | FLAGS = flags.FLAGS
 30 | 
 31 | 
 32 | class LoggerTest(tf.test.TestCase):
 33 | 
 34 |   def setUp(self):
 35 |     self._test_subdir = os.path.join('/tmp/dopamine_tests', 'logging')
 36 |     shutil.rmtree(self._test_subdir, ignore_errors=True)
 37 |     os.makedirs(self._test_subdir)
 38 | 
 39 |   def testLoggingDisabledWithEmptyDirectory(self):
 40 |     exp_logger = logger.Logger('')
 41 |     self.assertFalse(exp_logger.is_logging_enabled())
 42 | 
 43 |   def testLoggingDisabledWithInvalidDirectory(self):
 44 |     exp_logger = logger.Logger('/does/not/exist')
 45 |     self.assertFalse(exp_logger.is_logging_enabled())
 46 | 
 47 |   def testLoggingEnabledWithValidDirectory(self):
 48 |     exp_logger = logger.Logger('/tmp/dopamine_tests')
 49 |     self.assertTrue(exp_logger.is_logging_enabled())
 50 | 
 51 |   def testSetEntry(self):
 52 |     exp_logger = logger.Logger('/tmp/dopamine_tests')
 53 |     self.assertEqual(len(exp_logger.data.keys()), 0)
 54 |     key = 'key'
 55 |     val = [1, 2, 3, 4]
 56 |     exp_logger[key] = val
 57 |     expected_dictionary = {}
 58 |     expected_dictionary[key] = val
 59 |     self.assertEqual(expected_dictionary, exp_logger.data)
 60 |     # Calling __setitem__ with the same value should overwrite the previous
 61 |     # value.
 62 |     val = 'new value'
 63 |     exp_logger[key] = val
 64 |     expected_dictionary[key] = val
 65 |     self.assertEqual(expected_dictionary, exp_logger.data)
 66 | 
 67 |   def testLogToFileWithInvalidDirectory(self):
 68 |     exp_logger = logger.Logger('/does/not/exist')
 69 |     self.assertFalse(exp_logger.is_logging_enabled())
 70 |     exp_logger.log_to_file(None, None)
 71 | 
 72 |   def testLogToFileWithValidDirectory(self):
 73 |     exp_logger = logger.Logger(self._test_subdir)
 74 |     self.assertTrue(exp_logger.is_logging_enabled())
 75 |     key = 'key'
 76 |     val = [1, 2, 3, 4]
 77 |     exp_logger[key] = val
 78 |     expected_dictionary = {}
 79 |     expected_dictionary[key] = val
 80 |     self.assertEqual(expected_dictionary, exp_logger.data)
 81 |     iteration_number = 7
 82 |     exp_logger.log_to_file('log', iteration_number)
 83 |     log_file = os.path.join(self._test_subdir,
 84 |                             'log_{}'.format(iteration_number))
 85 |     with tf.gfile.GFile(log_file, 'rb') as f:
 86 |       contents = f.read()
 87 |     self.assertEqual(contents, pickle.dumps(expected_dictionary,
 88 |                                             protocol=pickle.HIGHEST_PROTOCOL))
 89 | 
 90 |   def testGarbageCollection(self):
 91 |     exp_logger = logger.Logger(self._test_subdir)
 92 |     self.assertTrue(exp_logger.is_logging_enabled())
 93 |     key = 'key'
 94 |     val = [1, 2, 3, 4]
 95 |     exp_logger[key] = val
 96 |     expected_dictionary = {}
 97 |     expected_dictionary[key] = val
 98 |     self.assertEqual(expected_dictionary, exp_logger.data)
 99 |     deleted_log_files = 7
100 |     total_log_files = logger.CHECKPOINT_DURATION + deleted_log_files
101 |     for iteration_number in range(total_log_files):
102 |       exp_logger.log_to_file('log', iteration_number)
103 |     for iteration_number in range(total_log_files):
104 |       log_file = os.path.join(self._test_subdir,
105 |                               'log_{}'.format(iteration_number))
106 |       if iteration_number < deleted_log_files:
107 |         self.assertFalse(tf.gfile.Exists(log_file))
108 |       else:
109 |         self.assertTrue(tf.gfile.Exists(log_file))
110 | 
111 | 
112 | if __name__ == '__main__':
113 |   tf.test.main()
114 | 


--------------------------------------------------------------------------------
/tests/atari/train_test.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2018 The Dopamine Authors.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #      http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | """Tests for dopamine.atari.train."""
 15 | 
 16 | from __future__ import absolute_import
 17 | from __future__ import division
 18 | from __future__ import print_function
 19 | 
 20 | 
 21 | 
 22 | 
 23 | from absl import flags
 24 | from absl.testing import flagsaver
 25 | from dopamine.atari import run_experiment
 26 | from dopamine.atari import train
 27 | import mock
 28 | import tensorflow as tf
 29 | 
 30 | import gin.tf
 31 | 
 32 | FLAGS = flags.FLAGS
 33 | 
 34 | 
 35 | class TrainTest(tf.test.TestCase):
 36 | 
 37 |   def testCreateDQNAgent(self):
 38 |     FLAGS.agent_name = 'dqn'
 39 |     with mock.patch.object(train, 'dqn_agent') as mock_dqn_agent:
 40 | 
 41 |       def mock_fn(unused_sess, num_actions, summary_writer):
 42 |         del summary_writer
 43 |         return num_actions * 10
 44 | 
 45 |       mock_dqn_agent.DQNAgent.side_effect = mock_fn
 46 |       environment = mock.Mock()
 47 |       environment.action_space.n = 7
 48 |       self.assertEqual(70, train.create_agent(self.test_session(), environment))
 49 | 
 50 |   def testCreateRainbowAgent(self):
 51 |     FLAGS.agent_name = 'rainbow'
 52 |     with mock.patch.object(train, 'rainbow_agent') as mock_rainbow_agent:
 53 | 
 54 |       def mock_fn(unused_sess, num_actions, summary_writer):
 55 |         del summary_writer
 56 |         return num_actions * 10
 57 | 
 58 |       mock_rainbow_agent.RainbowAgent.side_effect = mock_fn
 59 |       environment = mock.Mock()
 60 |       environment.action_space.n = 7
 61 |       self.assertEqual(70, train.create_agent(self.test_session(), environment))
 62 | 
 63 |   @mock.patch.object(run_experiment, 'Runner')
 64 |   def testCreateRunnerUnknown(self, mock_runner_constructor):
 65 |     mock_create_agent = mock.Mock()
 66 |     base_dir = '/tmp'
 67 |     FLAGS.schedule = 'unknown_schedule'
 68 |     with self.assertRaisesRegexp(ValueError, 'Unknown schedule'):
 69 |       train.create_runner(base_dir, mock_create_agent)
 70 | 
 71 |   @mock.patch.object(run_experiment, 'Runner')
 72 |   def testCreateRunner(self, mock_runner_constructor):
 73 |     mock_create_agent = mock.Mock()
 74 |     base_dir = '/tmp'
 75 |     train.create_runner(base_dir, mock_create_agent)
 76 |     self.assertEqual(1, mock_runner_constructor.call_count)
 77 |     mock_args, _ = mock_runner_constructor.call_args
 78 |     self.assertEqual(base_dir, mock_args[0])
 79 |     self.assertEqual(mock_create_agent, mock_args[1])
 80 | 
 81 |   @flagsaver.flagsaver(schedule='continuous_train')
 82 |   @mock.patch.object(run_experiment, 'TrainRunner')
 83 |   def testCreateTrainRunner(self, mock_runner_constructor):
 84 |     mock_create_agent = mock.Mock()
 85 |     base_dir = '/tmp'
 86 |     train.create_runner(base_dir, mock_create_agent)
 87 |     self.assertEqual(1, mock_runner_constructor.call_count)
 88 |     mock_args, _ = mock_runner_constructor.call_args
 89 |     self.assertEqual(base_dir, mock_args[0])
 90 |     self.assertEqual(mock_create_agent, mock_args[1])
 91 | 
 92 |   @flagsaver.flagsaver(gin_files=['file1', 'file2', 'file3'])
 93 |   @flagsaver.flagsaver(gin_bindings=['binding1', 'binding2'])
 94 |   @mock.patch.object(gin, 'parse_config_files_and_bindings')
 95 |   @mock.patch.object(run_experiment, 'Runner')
 96 |   def testLaunchExperiment(
 97 |       self, mock_runner_constructor, mock_parse_config_files_and_bindings):
 98 |     mock_create_agent = mock.Mock()
 99 |     mock_runner = mock.Mock()
100 |     mock_runner_constructor.return_value = mock_runner
101 | 
102 |     def mock_create_runner(unused_base_dir, unused_create_agent_fn):
103 |       return mock_runner
104 | 
105 |     train.launch_experiment(mock_create_runner, mock_create_agent)
106 |     self.assertEqual(1, mock_parse_config_files_and_bindings.call_count)
107 |     mock_args, mock_kwargs = mock_parse_config_files_and_bindings.call_args
108 |     self.assertEqual(FLAGS.gin_files, mock_args[0])
109 |     self.assertEqual(FLAGS.gin_bindings, mock_kwargs['bindings'])
110 |     self.assertFalse(mock_kwargs['skip_unknown'])
111 |     self.assertEqual(1, mock_runner.run_experiment.call_count)
112 | 
113 | 
114 | if __name__ == '__main__':
115 |   tf.test.main()
116 | 


--------------------------------------------------------------------------------
/docs/api_docs/python/implicit_quantile_agent/ImplicitQuantileAgent.md:
--------------------------------------------------------------------------------
  1 | <div itemscope itemtype="http://developers.google.com/ReferenceObject">
  2 | <meta itemprop="name" content="implicit_quantile_agent.ImplicitQuantileAgent" />
  3 | <meta itemprop="path" content="stable" />
  4 | <meta itemprop="property" content="__init__"/>
  5 | <meta itemprop="property" content="begin_episode"/>
  6 | <meta itemprop="property" content="bundle_and_checkpoint"/>
  7 | <meta itemprop="property" content="end_episode"/>
  8 | <meta itemprop="property" content="step"/>
  9 | <meta itemprop="property" content="unbundle"/>
 10 | </div>
 11 | 
 12 | # implicit_quantile_agent.ImplicitQuantileAgent
 13 | 
 14 | ## Class `ImplicitQuantileAgent`
 15 | 
 16 | Inherits From: [`RainbowAgent`](../rainbow_agent/RainbowAgent.md)
 17 | 
 18 | An extension of Rainbow to perform implicit quantile regression.
 19 | 
 20 | ## Methods
 21 | 
 22 | <h3 id="__init__"><code>__init__</code></h3>
 23 | 
 24 | ```python
 25 | __init__(
 26 |     *args,
 27 |     **kwargs
 28 | )
 29 | ```
 30 | 
 31 | Initializes the agent and constructs the Graph.
 32 | 
 33 | Most of this constructor's parameters are IQN-specific hyperparameters whose
 34 | values are taken from Dabney et al. (2018).
 35 | 
 36 | #### Args:
 37 | 
 38 | *   <b>`sess`</b>: `tf.Session` object for running associated ops.
 39 | *   <b>`num_actions`</b>: int, number of actions the agent can take at any
 40 |     state.
 41 | *   <b>`kappa`</b>: float, Huber loss cutoff.
 42 | *   <b>`num_tau_samples`</b>: int, number of online quantile samples for loss
 43 |     estimation.
 44 | *   <b>`num_tau_prime_samples`</b>: int, number of target quantile samples for
 45 |     loss estimation.
 46 | *   <b>`num_quantile_samples`</b>: int, number of quantile samples for computing
 47 |     Q-values.
 48 | *   <b>`quantile_embedding_dim`</b>: int, embedding dimension for the quantile
 49 |     input.
 50 | 
 51 | <h3 id="begin_episode"><code>begin_episode</code></h3>
 52 | 
 53 | ```python
 54 | begin_episode(observation)
 55 | ```
 56 | 
 57 | Returns the agent's first action for this episode.
 58 | 
 59 | #### Args:
 60 | 
 61 | *   <b>`observation`</b>: numpy array, the environment's initial observation.
 62 | 
 63 | #### Returns:
 64 | 
 65 | int, the selected action.
 66 | 
 67 | <h3 id="bundle_and_checkpoint"><code>bundle_and_checkpoint</code></h3>
 68 | 
 69 | ```python
 70 | bundle_and_checkpoint(
 71 |     checkpoint_dir,
 72 |     iteration_number
 73 | )
 74 | ```
 75 | 
 76 | Returns a self-contained bundle of the agent's state.
 77 | 
 78 | This is used for checkpointing. It will return a dictionary containing all
 79 | non-TensorFlow objects (to be saved into a file by the caller), and it saves all
 80 | TensorFlow objects into a checkpoint file.
 81 | 
 82 | #### Args:
 83 | 
 84 | *   <b>`checkpoint_dir`</b>: str, directory where TensorFlow objects will be
 85 |     saved.
 86 | *   <b>`iteration_number`</b>: int, iteration number to use for naming the
 87 |     checkpoint file.
 88 | 
 89 | #### Returns:
 90 | 
 91 | A dict containing additional Python objects to be checkpointed by the
 92 | experiment. If the checkpoint directory does not exist, returns None.
 93 | 
 94 | <h3 id="end_episode"><code>end_episode</code></h3>
 95 | 
 96 | ```python
 97 | end_episode(reward)
 98 | ```
 99 | 
100 | Signals the end of the episode to the agent.
101 | 
102 | We store the observation of the current time step, which is the last observation
103 | of the episode.
104 | 
105 | #### Args:
106 | 
107 | *   <b>`reward`</b>: float, the last reward from the environment.
108 | 
109 | <h3 id="step"><code>step</code></h3>
110 | 
111 | ```python
112 | step(
113 |     reward,
114 |     observation
115 | )
116 | ```
117 | 
118 | Records the most recent transition and returns the agent's next action.
119 | 
120 | We store the observation of the last time step since we want to store it with
121 | the reward.
122 | 
123 | #### Args:
124 | 
125 | *   <b>`reward`</b>: float, the reward received from the agent's most recent
126 |     action.
127 | *   <b>`observation`</b>: numpy array, the most recent observation.
128 | 
129 | #### Returns:
130 | 
131 | int, the selected action.
132 | 
133 | <h3 id="unbundle"><code>unbundle</code></h3>
134 | 
135 | ```python
136 | unbundle(
137 |     checkpoint_dir,
138 |     iteration_number,
139 |     bundle_dictionary
140 | )
141 | ```
142 | 
143 | Restores the agent from a checkpoint.
144 | 
145 | Restores the agent's Python objects to those specified in bundle_dictionary, and
146 | restores the TensorFlow objects to those specified in the checkpoint_dir. If the
147 | checkpoint_dir does not exist, will not reset the agent's state.
148 | 
149 | #### Args:
150 | 
151 | *   <b>`checkpoint_dir`</b>: str, path to the checkpoint saved by tf.Save.
152 | *   <b>`iteration_number`</b>: int, checkpoint version, used when restoring
153 |     replay buffer.
154 | *   <b>`bundle_dictionary`</b>: dict, containing additional Python objects owned
155 |     by the agent.
156 | 
157 | #### Returns:
158 | 
159 | bool, True if unbundling was successful.
160 | 


--------------------------------------------------------------------------------
/dopamine/colab/tensorboard.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |   "nbformat": 4,
  3 |   "nbformat_minor": 0,
  4 |   "metadata": {
  5 |     "colab": {
  6 |       "name": "tensorboard.ipynb",
  7 |       "version": "0.3.2",
  8 |       "provenance": []
  9 |     },
 10 |     "kernelspec": {
 11 |       "name": "python3",
 12 |       "display_name": "Python 3"
 13 |     }
 14 |   },
 15 |   "cells": [
 16 |     {
 17 |       "metadata": {
 18 |         "id": "VYNA79KmgvbY",
 19 |         "colab_type": "text"
 20 |       },
 21 |       "cell_type": "markdown",
 22 |       "source": [
 23 |         "Copyright 2018 The Dopamine Authors.\n",
 24 |         "\n",
 25 |         "Licensed under the Apache License, Version 2.0 (the \"License\"); you may not use this file except in compliance with the License. You may obtain a copy of the License at\n",
 26 |         "\n",
 27 |         "https://www.apache.org/licenses/LICENSE-2.0\n",
 28 |         "\n",
 29 |         "Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License."
 30 |       ]
 31 |     },
 32 |     {
 33 |       "metadata": {
 34 |         "id": "Ctd9k0h6wnqT",
 35 |         "colab_type": "text"
 36 |       },
 37 |       "cell_type": "markdown",
 38 |       "source": [
 39 |         "# Visualize Dopamine baselines with Tensorboard\n",
 40 |         "This colab allows you to easily view the trained baselines with Tensorboard (even if you don't have Tensorboard on your local machine!).\n",
 41 |         "\n",
 42 |         "Simply specify the game you would like to visualize and then run the cells in order.\n",
 43 |         "\n",
 44 |         "_The instructions for setting up Tensorboard were obtained from https://www.dlology.com/blog/quick-guide-to-run-tensorboard-in-google-colab/_"
 45 |       ]
 46 |     },
 47 |     {
 48 |       "metadata": {
 49 |         "id": "s8r_45_0qpmb",
 50 |         "colab_type": "code",
 51 |         "colab": {},
 52 |         "cellView": "form"
 53 |       },
 54 |       "cell_type": "code",
 55 |       "source": [
 56 |         "# @title Prepare all necessary files and binaries.\n",
 57 |         "!wget https://bin.equinox.io/c/4VmDzA7iaHb/ngrok-stable-linux-amd64.zip\n",
 58 |         "!unzip ngrok-stable-linux-amd64.zip\n",
 59 |         "!gsutil -q -m cp -R gs://download-dopamine-rl/compiled_tb_event_files.tar.gz /content/\n",
 60 |         "!tar -xvzf /content/compiled_tb_event_files.tar.gz"
 61 |       ],
 62 |       "execution_count": 0,
 63 |       "outputs": []
 64 |     },
 65 |     {
 66 |       "metadata": {
 67 |         "id": "D-oZRzeWwHZN",
 68 |         "colab_type": "code",
 69 |         "colab": {},
 70 |         "cellView": "form"
 71 |       },
 72 |       "cell_type": "code",
 73 |       "source": [
 74 |         "# @title Select which game to visualize.\n",
 75 |         "game = 'Asterix'  # @param['AirRaid', 'Alien', 'Amidar', 'Assault', 'Asterix', 'Asteroids', 'Atlantis', 'BankHeist', 'BattleZone', 'BeamRider', 'Berzerk', 'Bowling', 'Boxing', 'Breakout', 'Carnival', 'Centipede', 'ChopperCommand', 'CrazyClimber', 'DemonAttack', 'DoubleDunk', 'ElevatorAction', 'Enduro', 'FishingDerby', 'Freeway', 'Frostbite', 'Gopher', 'Gravitar', 'Hero', 'IceHockey', 'Jamesbond', 'JourneyEscape', 'Kangaroo', 'Krull', 'KungFuMaster', 'MontezumaRevenge', 'MsPacman', 'NameThisGame', 'Phoenix', 'Pitfall', 'Pong', 'Pooyan', 'PrivateEye', 'Qbert', 'Riverraid', 'RoadRunner', 'Robotank', 'Seaquest', 'Skiing', 'Solaris', 'SpaceInvaders', 'StarGunner', 'Tennis', 'TimePilot', 'Tutankham', 'UpNDown', 'Venture', 'VideoPinball', 'WizardOfWor', 'YarsRevenge', 'Zaxxon']\n",
 76 |         "agents = ['dqn', 'c51', 'rainbow', 'implicit_quantile']\n",
 77 |         "for agent in agents:\n",
 78 |         "  for run in range(1, 6):\n",
 79 |         "    !mkdir -p \"/content/$game/$agent/$run\"\n",
 80 |         "    !cp -r \"/content/$agent/$game/$run\" \"/content/$game/$agent/$run\"\n",
 81 |         "LOG_DIR = '/content/{}'.format(game)\n",
 82 |         "get_ipython().system_raw(\n",
 83 |         "    'tensorboard --logdir {} --host 0.0.0.0 --port 6006 &'\n",
 84 |         "    .format(LOG_DIR)\n",
 85 |         ")"
 86 |       ],
 87 |       "execution_count": 0,
 88 |       "outputs": []
 89 |     },
 90 |     {
 91 |       "metadata": {
 92 |         "id": "zlKKnaP4y9FA",
 93 |         "colab_type": "code",
 94 |         "colab": {
 95 |           "base_uri": "https://localhost:8080/",
 96 |           "height": 35
 97 |         },
 98 |         "cellView": "form",
 99 |         "outputId": "3abff714-c484-436e-dc5f-88b15511f4f2"
100 |       },
101 |       "cell_type": "code",
102 |       "source": [
103 |         "# @title Start the tensorboard\n",
104 |         "get_ipython().system_raw('./ngrok http 6006 &')\n",
105 |         "! curl -s http://localhost:4040/api/tunnels | python3 -c \\\n",
106 |         "    \"import sys, json; print(json.load(sys.stdin)['tunnels'][0]['public_url'])\""
107 |       ],
108 |       "execution_count": 0,
109 |       "outputs": []
110 |     }
111 |   ]
112 | }
113 | 


--------------------------------------------------------------------------------
/docs/api_docs/python/circular_replay_buffer/WrappedReplayBuffer.md:
--------------------------------------------------------------------------------
  1 | <div itemscope itemtype="http://developers.google.com/ReferenceObject">
  2 | <meta itemprop="name" content="circular_replay_buffer.WrappedReplayBuffer" />
  3 | <meta itemprop="path" content="stable" />
  4 | <meta itemprop="property" content="__init__"/>
  5 | <meta itemprop="property" content="add"/>
  6 | <meta itemprop="property" content="create_sampling_ops"/>
  7 | <meta itemprop="property" content="load"/>
  8 | <meta itemprop="property" content="save"/>
  9 | <meta itemprop="property" content="unpack_transition"/>
 10 | </div>
 11 | 
 12 | # circular_replay_buffer.WrappedReplayBuffer
 13 | 
 14 | ## Class `WrappedReplayBuffer`
 15 | 
 16 | Wrapper of OutOfGraphReplayBuffer with an in graph sampling mechanism.
 17 | 
 18 | Usage: To add a transition: call the add function.
 19 | 
 20 | To sample a batch: Construct operations that depend on any of the tensors is the
 21 | transition dictionary. Every sess.run that requires any of these tensors will
 22 | sample a new transition.
 23 | 
 24 | ## Methods
 25 | 
 26 | <h3 id="__init__"><code>__init__</code></h3>
 27 | 
 28 | ```python
 29 | __init__(
 30 |     *args,
 31 |     **kwargs
 32 | )
 33 | ```
 34 | 
 35 | Initializes WrappedReplayBuffer.
 36 | 
 37 | #### Args:
 38 | 
 39 | *   <b>`observation_shape`</b>: tuple or int. If int, the observation is assumed
 40 |     to be a 2D square.
 41 | *   <b>`stack_size`</b>: int, number of frames to use in state stack.
 42 | *   <b>`use_staging`</b>: bool, when True it would use a staging area to
 43 |     prefetch the next sampling batch.
 44 | *   <b>`replay_capacity`</b>: int, number of transitions to keep in memory.
 45 | *   <b>`batch_size`</b>: int.
 46 | *   <b>`update_horizon`</b>: int, length of update ('n' in n-step update).
 47 | *   <b>`gamma`</b>: int, the discount factor.
 48 | *   <b>`wrapped_memory`</b>: The 'inner' memory data structure. If None, it
 49 |     creates the standard DQN replay memory.
 50 | *   <b>`max_sample_attempts`</b>: int, the maximum number of attempts allowed to
 51 |     get a sample.
 52 | *   <b>`extra_storage_types`</b>: list of ReplayElements defining the type of
 53 |     the extra contents that will be stored and returned by
 54 |     sample_transition_batch.
 55 | *   <b>`observation_dtype`</b>: np.dtype, type of the observations. Defaults to
 56 |     np.uint8 for Atari 2600.
 57 | 
 58 | #### Raises:
 59 | 
 60 | *   <b>`ValueError`</b>: If update_horizon is not positive.
 61 | *   <b>`ValueError`</b>: If discount factor is not in [0, 1].
 62 | 
 63 | <h3 id="add"><code>add</code></h3>
 64 | 
 65 | ```python
 66 | add(
 67 |     observation,
 68 |     action,
 69 |     reward,
 70 |     terminal,
 71 |     *args
 72 | )
 73 | ```
 74 | 
 75 | Adds a transition to the replay memory.
 76 | 
 77 | Since the next_observation in the transition will be the observation added next
 78 | there is no need to pass it.
 79 | 
 80 | If the replay memory is at capacity the oldest transition will be discarded.
 81 | 
 82 | #### Args:
 83 | 
 84 | *   <b>`observation`</b>: np.array with shape observation_shape.
 85 | *   <b>`action`</b>: int, the action in the transition.
 86 | *   <b>`reward`</b>: float, the reward received in the transition.
 87 | *   <b>`terminal`</b>: A uint8 acting as a boolean indicating whether the
 88 |     transition was terminal (1) or not (0).
 89 | *   <b>`*args`</b>: extra contents with shapes and dtypes according to
 90 |     extra_storage_types.
 91 | 
 92 | <h3 id="create_sampling_ops"><code>create_sampling_ops</code></h3>
 93 | 
 94 | ```python
 95 | create_sampling_ops(use_staging)
 96 | ```
 97 | 
 98 | Creates the ops necessary to sample from the replay buffer.
 99 | 
100 | Creates the transition dictionary containing the sampling tensors.
101 | 
102 | #### Args:
103 | 
104 | *   <b>`use_staging`</b>: bool, when True it would use a staging area to
105 |     prefetch the next sampling batch.
106 | 
107 | <h3 id="load"><code>load</code></h3>
108 | 
109 | ```python
110 | load(
111 |     checkpoint_dir,
112 |     suffix
113 | )
114 | ```
115 | 
116 | Loads the replay buffer's state from a saved file.
117 | 
118 | #### Args:
119 | 
120 | *   <b>`checkpoint_dir`</b>: str, the directory where to read the numpy
121 |     checkpointed files from.
122 | *   <b>`suffix`</b>: str, the suffix to use in numpy checkpoint files.
123 | 
124 | <h3 id="save"><code>save</code></h3>
125 | 
126 | ```python
127 | save(
128 |     checkpoint_dir,
129 |     iteration_number
130 | )
131 | ```
132 | 
133 | Save the underlying replay buffer's contents in a file.
134 | 
135 | #### Args:
136 | 
137 | *   <b>`checkpoint_dir`</b>: str, the directory where to read the numpy
138 |     checkpointed files from.
139 | *   <b>`iteration_number`</b>: int, the iteration_number to use as a suffix in
140 |     naming numpy checkpoint files.
141 | 
142 | <h3 id="unpack_transition"><code>unpack_transition</code></h3>
143 | 
144 | ```python
145 | unpack_transition(
146 |     transition_tensors,
147 |     transition_type
148 | )
149 | ```
150 | 
151 | Unpacks the given transition into member variables.
152 | 
153 | #### Args:
154 | 
155 | *   <b>`transition_tensors`</b>: tuple of tf.Tensors.
156 | *   <b>`transition_type`</b>: tuple of ReplayElements matching
157 |     transition_tensors.
158 | 


--------------------------------------------------------------------------------
/docs/api_docs/python/dqn_agent/DQNAgent.md:
--------------------------------------------------------------------------------
  1 | <div itemscope itemtype="http://developers.google.com/ReferenceObject">
  2 | <meta itemprop="name" content="dqn_agent.DQNAgent" />
  3 | <meta itemprop="path" content="stable" />
  4 | <meta itemprop="property" content="__init__"/>
  5 | <meta itemprop="property" content="begin_episode"/>
  6 | <meta itemprop="property" content="bundle_and_checkpoint"/>
  7 | <meta itemprop="property" content="end_episode"/>
  8 | <meta itemprop="property" content="step"/>
  9 | <meta itemprop="property" content="unbundle"/>
 10 | </div>
 11 | 
 12 | # dqn_agent.DQNAgent
 13 | 
 14 | ## Class `DQNAgent`
 15 | 
 16 | An implementation of the DQN agent.
 17 | 
 18 | ## Methods
 19 | 
 20 | <h3 id="__init__"><code>__init__</code></h3>
 21 | 
 22 | ```python
 23 | __init__(
 24 |     *args,
 25 |     **kwargs
 26 | )
 27 | ```
 28 | 
 29 | Initializes the agent and constructs the components of its graph.
 30 | 
 31 | #### Args:
 32 | 
 33 | *   <b>`sess`</b>: `tf.Session`, for executing ops.
 34 | *   <b>`num_actions`</b>: int, number of actions the agent can take at any
 35 |     state.
 36 | *   <b>`gamma`</b>: float, discount factor with the usual RL meaning.
 37 | *   <b>`update_horizon`</b>: int, horizon at which updates are performed, the
 38 |     'n' in n-step update.
 39 | *   <b>`min_replay_history`</b>: int, number of transitions that should be
 40 |     experienced before the agent begins training its value function.
 41 | *   <b>`update_period`</b>: int, period between DQN updates.
 42 | *   <b>`target_update_period`</b>: int, update period for the target network.
 43 | *   <b>`epsilon_fn`</b>: function expecting 4 parameters: (decay_period, step,
 44 |     warmup_steps, epsilon). This function should return the epsilon value used
 45 |     for exploration during training.
 46 | *   <b>`epsilon_train`</b>: float, the value to which the agent's epsilon is
 47 |     eventually decayed during training.
 48 | *   <b>`epsilon_eval`</b>: float, epsilon used when evaluating the agent.
 49 | *   <b>`epsilon_decay_period`</b>: int, length of the epsilon decay schedule.
 50 | *   <b>`tf_device`</b>: str, Tensorflow device on which the agent's graph is
 51 |     executed.
 52 | *   <b>`use_staging`</b>: bool, when True use a staging area to prefetch the
 53 |     next training batch, speeding training up by about 30%.
 54 | *   <b>`max_tf_checkpoints_to_keep`</b>: int, the number of TensorFlow
 55 |     checkpoints to keep.
 56 | *   <b>`optimizer`</b>: `tf.train.Optimizer`, for training the value function.
 57 | 
 58 | <h3 id="begin_episode"><code>begin_episode</code></h3>
 59 | 
 60 | ```python
 61 | begin_episode(observation)
 62 | ```
 63 | 
 64 | Returns the agent's first action for this episode.
 65 | 
 66 | #### Args:
 67 | 
 68 | *   <b>`observation`</b>: numpy array, the environment's initial observation.
 69 | 
 70 | #### Returns:
 71 | 
 72 | int, the selected action.
 73 | 
 74 | <h3 id="bundle_and_checkpoint"><code>bundle_and_checkpoint</code></h3>
 75 | 
 76 | ```python
 77 | bundle_and_checkpoint(
 78 |     checkpoint_dir,
 79 |     iteration_number
 80 | )
 81 | ```
 82 | 
 83 | Returns a self-contained bundle of the agent's state.
 84 | 
 85 | This is used for checkpointing. It will return a dictionary containing all
 86 | non-TensorFlow objects (to be saved into a file by the caller), and it saves all
 87 | TensorFlow objects into a checkpoint file.
 88 | 
 89 | #### Args:
 90 | 
 91 | *   <b>`checkpoint_dir`</b>: str, directory where TensorFlow objects will be
 92 |     saved.
 93 | *   <b>`iteration_number`</b>: int, iteration number to use for naming the
 94 |     checkpoint file.
 95 | 
 96 | #### Returns:
 97 | 
 98 | A dict containing additional Python objects to be checkpointed by the
 99 | experiment. If the checkpoint directory does not exist, returns None.
100 | 
101 | <h3 id="end_episode"><code>end_episode</code></h3>
102 | 
103 | ```python
104 | end_episode(reward)
105 | ```
106 | 
107 | Signals the end of the episode to the agent.
108 | 
109 | We store the observation of the current time step, which is the last observation
110 | of the episode.
111 | 
112 | #### Args:
113 | 
114 | *   <b>`reward`</b>: float, the last reward from the environment.
115 | 
116 | <h3 id="step"><code>step</code></h3>
117 | 
118 | ```python
119 | step(
120 |     reward,
121 |     observation
122 | )
123 | ```
124 | 
125 | Records the most recent transition and returns the agent's next action.
126 | 
127 | We store the observation of the last time step since we want to store it with
128 | the reward.
129 | 
130 | #### Args:
131 | 
132 | *   <b>`reward`</b>: float, the reward received from the agent's most recent
133 |     action.
134 | *   <b>`observation`</b>: numpy array, the most recent observation.
135 | 
136 | #### Returns:
137 | 
138 | int, the selected action.
139 | 
140 | <h3 id="unbundle"><code>unbundle</code></h3>
141 | 
142 | ```python
143 | unbundle(
144 |     checkpoint_dir,
145 |     iteration_number,
146 |     bundle_dictionary
147 | )
148 | ```
149 | 
150 | Restores the agent from a checkpoint.
151 | 
152 | Restores the agent's Python objects to those specified in bundle_dictionary, and
153 | restores the TensorFlow objects to those specified in the checkpoint_dir. If the
154 | checkpoint_dir does not exist, will not reset the agent's state.
155 | 
156 | #### Args:
157 | 
158 | *   <b>`checkpoint_dir`</b>: str, path to the checkpoint saved by tf.Save.
159 | *   <b>`iteration_number`</b>: int, checkpoint version, used when restoring
160 |     replay buffer.
161 | *   <b>`bundle_dictionary`</b>: dict, containing additional Python objects owned
162 |     by the agent.
163 | 
164 | #### Returns:
165 | 
166 | bool, True if unbundling was successful.
167 | 


--------------------------------------------------------------------------------