├── .DS_Store
├── .gitignore
├── .idea
    ├── ioc.iml
    ├── misc.xml
    ├── modules.xml
    └── vcs.xml
├── README.md
├── control
    ├── .benchmark_pattern
    ├── .gitignore
    ├── .travis.yml
    ├── Dockerfile
    ├── LICENSE
    ├── README.md
    ├── baselines
    │   ├── __init__.py
    │   ├── bench
    │   │   ├── __init__.py
    │   │   ├── benchmarks.py
    │   │   └── monitor.py
    │   ├── common
    │   │   ├── __init__.py
    │   │   ├── atari_wrappers.py
    │   │   ├── cg.py
    │   │   ├── cmd_util.py
    │   │   ├── console_util.py
    │   │   ├── dataset.py
    │   │   ├── distributions.py
    │   │   ├── input.py
    │   │   ├── math_util.py
    │   │   ├── misc_util.py
    │   │   ├── models.py
    │   │   ├── mpi_adam.py
    │   │   ├── mpi_adam_optimizer.py
    │   │   ├── mpi_fork.py
    │   │   ├── mpi_moments.py
    │   │   ├── mpi_running_mean_std.py
    │   │   ├── mpi_util.py
    │   │   ├── plot_util.py
    │   │   ├── policies.py
    │   │   ├── retro_wrappers.py
    │   │   ├── runners.py
    │   │   ├── running_mean_std.py
    │   │   ├── schedules.py
    │   │   ├── segment_tree.py
    │   │   ├── tests
    │   │   │   ├── __init__.py
    │   │   │   ├── envs
    │   │   │   │   ├── __init__.py
    │   │   │   │   ├── fixed_sequence_env.py
    │   │   │   │   ├── identity_env.py
    │   │   │   │   └── mnist_env.py
    │   │   │   ├── test_cartpole.py
    │   │   │   ├── test_doc_examples.py
    │   │   │   ├── test_env_after_learn.py
    │   │   │   ├── test_fetchreach.py
    │   │   │   ├── test_fixed_sequence.py
    │   │   │   ├── test_identity.py
    │   │   │   ├── test_mnist.py
    │   │   │   ├── test_schedules.py
    │   │   │   ├── test_segment_tree.py
    │   │   │   ├── test_serialization.py
    │   │   │   ├── test_tf_util.py
    │   │   │   └── util.py
    │   │   ├── tf_util.py
    │   │   ├── tile_images.py
    │   │   └── vec_env
    │   │   │   ├── __init__.py
    │   │   │   ├── dummy_vec_env.py
    │   │   │   ├── shmem_vec_env.py
    │   │   │   ├── subproc_vec_env.py
    │   │   │   ├── test_vec_env.py
    │   │   │   ├── test_video_recorder.py
    │   │   │   ├── util.py
    │   │   │   ├── vec_frame_stack.py
    │   │   │   ├── vec_monitor.py
    │   │   │   ├── vec_normalize.py
    │   │   │   └── vec_video_recorder.py
    │   ├── logger.py
    │   ├── ppoc_int
    │   │   ├── __init__.py
    │   │   ├── assets
    │   │   │   ├── half_cheetah.xml
    │   │   │   └── twod_tmaze.xml
    │   │   ├── half_cheetah.py
    │   │   ├── mlp_policy.py
    │   │   ├── normalized_env.py
    │   │   ├── plot_res.py
    │   │   ├── pposgd_simple.py
    │   │   ├── run_mujoco.py
    │   │   ├── seeding.py
    │   │   └── twod_tmaze.py
    │   ├── results_plotter.py
    │   └── run.py
    ├── benchmarks_atari10M.htm
    ├── benchmarks_mujoco1M.htm
    ├── data
    │   ├── cartpole.gif
    │   ├── fetchPickAndPlaceContrast.png
    │   └── logo.jpg
    ├── docs
    │   └── viz
    │   │   └── viz.ipynb
    ├── setup.cfg
    └── setup.py
├── launcher_miniworld.sh
├── launcher_mujoco.sh
├── miniworld
    ├── .benchmark_pattern
    ├── .gitignore
    ├── .travis.yml
    ├── Dockerfile
    ├── LICENSE
    ├── README.md
    ├── baselines
    │   ├── __init__.py
    │   ├── bench
    │   │   ├── __init__.py
    │   │   ├── benchmarks.py
    │   │   └── monitor.py
    │   ├── common
    │   │   ├── __init__.py
    │   │   ├── atari_wrappers.py
    │   │   ├── cg.py
    │   │   ├── cmd_util.py
    │   │   ├── console_util.py
    │   │   ├── dataset.py
    │   │   ├── distributions.py
    │   │   ├── input.py
    │   │   ├── math_util.py
    │   │   ├── misc_util.py
    │   │   ├── models.py
    │   │   ├── mpi_adam.py
    │   │   ├── mpi_adam_optimizer.py
    │   │   ├── mpi_fork.py
    │   │   ├── mpi_moments.py
    │   │   ├── mpi_running_mean_std.py
    │   │   ├── mpi_util.py
    │   │   ├── plot_util.py
    │   │   ├── policies.py
    │   │   ├── retro_wrappers.py
    │   │   ├── runners.py
    │   │   ├── running_mean_std.py
    │   │   ├── schedules.py
    │   │   ├── segment_tree.py
    │   │   ├── tests
    │   │   │   ├── __init__.py
    │   │   │   ├── envs
    │   │   │   │   ├── __init__.py
    │   │   │   │   ├── fixed_sequence_env.py
    │   │   │   │   ├── identity_env.py
    │   │   │   │   └── mnist_env.py
    │   │   │   ├── test_cartpole.py
    │   │   │   ├── test_doc_examples.py
    │   │   │   ├── test_env_after_learn.py
    │   │   │   ├── test_fetchreach.py
    │   │   │   ├── test_fixed_sequence.py
    │   │   │   ├── test_identity.py
    │   │   │   ├── test_mnist.py
    │   │   │   ├── test_schedules.py
    │   │   │   ├── test_segment_tree.py
    │   │   │   ├── test_serialization.py
    │   │   │   ├── test_tf_util.py
    │   │   │   └── util.py
    │   │   ├── tf_util.py
    │   │   ├── tile_images.py
    │   │   └── vec_env
    │   │   │   ├── __init__.py
    │   │   │   ├── dummy_vec_env.py
    │   │   │   ├── shmem_vec_env.py
    │   │   │   ├── subproc_vec_env.py
    │   │   │   ├── test_vec_env.py
    │   │   │   ├── test_video_recorder.py
    │   │   │   ├── util.py
    │   │   │   ├── vec_frame_stack.py
    │   │   │   ├── vec_monitor.py
    │   │   │   ├── vec_normalize.py
    │   │   │   └── vec_video_recorder.py
    │   ├── logger.py
    │   ├── ppoc_int
    │   │   ├── README.md
    │   │   ├── __init__.py
    │   │   ├── cnn_policy.py
    │   │   ├── mlp_policy.py
    │   │   ├── muj.py
    │   │   ├── oneroom.py
    │   │   ├── plot_res.py
    │   │   ├── pposgd_simple.py
    │   │   ├── run_miniw.py
    │   │   └── run_mujoco.py
    │   ├── results_plotter.py
    │   └── run.py
    ├── data
    │   ├── cartpole.gif
    │   ├── fetchPickAndPlaceContrast.png
    │   └── logo.jpg
    ├── docs
    │   └── viz
    │   │   └── viz.ipynb
    ├── setup.cfg
    └── setup.py
└── tabular
    ├── .DS_Store
    ├── .ipynb_checkpoints
        ├── fr_analysis_heatmaps-checkpoint.ipynb
        ├── fr_analysis_performance-checkpoint.ipynb
        └── fr_env_plots-checkpoint.ipynb
    ├── FR_Return_10Runs_Smooth_MisspecifiedPiO.pdf
    ├── FR_Steps_10Runs_Smooth_MisspecifiedPiO.pdf
    ├── GoalG62.png
    ├── InterestOptionCritic
        └── Runs10_Epsds500_Eps0.01_NOpt4_LRT0.25_LRI0.25_LRC0.5_temp0.01_IF0.15_LReg0.0_seed7200
        │   ├── History.npy
        │   ├── IOC_Task1_IntraOptionPolicy_Opt_0.png
        │   ├── IOC_Task1_IntraOptionPolicy_Opt_1.png
        │   ├── IOC_Task1_IntraOptionPolicy_Opt_2.png
        │   ├── IOC_Task1_IntraOptionPolicy_Opt_3.png
        │   ├── Params.txt
        │   ├── StateFreq.npy
        │   ├── Weights_ActionValueFunction.npy
        │   ├── Weights_InterestFunction.npy
        │   ├── Weights_IntraOption.npy
        │   ├── Weights_OptionValueFunction.npy
        │   ├── Weights_Policy.npy
        │   └── Weights_Termination.npy
    ├── OptionCritic
        └── Runs10_Epsds500_Eps0.01_NOpt4_LRT0.25_LRI0.25_LRC0.5_temp0.01_seed7200
        │   ├── History.npy
        │   ├── Params.txt
        │   ├── StateFreq.npy
        │   ├── Weights_ActionValueFunction.npy
        │   ├── Weights_IntraOption.npy
        │   ├── Weights_OptionValueFunction.npy
        │   └── Weights_Termination.npy
    ├── TransferVisual.png
    ├── __pycache__
        └── fourrooms.cpython-36.pyc
    ├── fourrooms.py
    ├── fr_analysis_heatmaps.ipynb
    ├── fr_analysis_performance.ipynb
    ├── fr_env_plots.ipynb
    ├── interestoptioncritic_tabular_fr.py
    └── optioncritic_tabular_fr.py


/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kkhetarpal/ioc/fb88d5b881e0b5c317020b23874495e614b3ddc7/.DS_Store


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | .idea/workspace.xml 
2 | .idea/tasks.xml
3 | 


--------------------------------------------------------------------------------
/.idea/ioc.iml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <module type="PYTHON_MODULE" version="4">
 3 |   <component name="NewModuleRootManager">
 4 |     <content url="file://$MODULE_DIR$" />
 5 |     <orderEntry type="inheritedJdk" />
 6 |     <orderEntry type="sourceFolder" forTests="false" />
 7 |   </component>
 8 |   <component name="TestRunnerService">
 9 |     <option name="PROJECT_TEST_RUNNER" value="Unittests" />
10 |   </component>
11 | </module>


--------------------------------------------------------------------------------
/.idea/misc.xml:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="UTF-8"?>
2 | <project version="4">
3 |   <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.7" project-jdk-type="Python SDK" />
4 | </project>


--------------------------------------------------------------------------------
/.idea/modules.xml:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="UTF-8"?>
2 | <project version="4">
3 |   <component name="ProjectModuleManager">
4 |     <modules>
5 |       <module fileurl="file://$PROJECT_DIR$/.idea/ioc.iml" filepath="$PROJECT_DIR$/.idea/ioc.iml" />
6 |     </modules>
7 |   </component>
8 | </project>


--------------------------------------------------------------------------------
/.idea/vcs.xml:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="UTF-8"?>
2 | <project version="4">
3 |   <component name="VcsDirectoryMappings">
4 |     <mapping directory="$PROJECT_DIR$" vcs="Git" />
5 |   </component>
6 | </project>


--------------------------------------------------------------------------------
/control/.benchmark_pattern:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/control/.gitignore:
--------------------------------------------------------------------------------
 1 | *.swp
 2 | *.pyc
 3 | *.pkl
 4 | *.py~
 5 | .pytest_cache
 6 | .DS_Store
 7 | .idea
 8 | 
 9 | # Setuptools distribution and build folders.
10 | /dist/
11 | /build
12 | keys/
13 | 
14 | # Virtualenv
15 | /env
16 | 
17 | 
18 | *.sublime-project
19 | *.sublime-workspace
20 | 
21 | .idea
22 | 
23 | logs/
24 | 
25 | .ipynb_checkpoints
26 | ghostdriver.log
27 | 
28 | htmlcov
29 | 
30 | junk
31 | src
32 | 
33 | *.egg-info
34 | .cache
35 | 
36 | MUJOCO_LOG.TXT
37 | 


--------------------------------------------------------------------------------
/control/.travis.yml:
--------------------------------------------------------------------------------
 1 | language: python
 2 | python:
 3 |     - "3.6"
 4 | 
 5 | services:
 6 |     - docker
 7 | 
 8 | install:
 9 |     - pip install flake8
10 |     - docker build . -t baselines-test
11 | 
12 | script:
13 |     - flake8 . --show-source --statistics
14 |     - docker run baselines-test pytest -v --forked .
15 | 


--------------------------------------------------------------------------------
/control/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM python:3.6
 2 | 
 3 | RUN apt-get -y update && apt-get -y install ffmpeg
 4 | # RUN apt-get -y update && apt-get -y install git wget python-dev python3-dev libopenmpi-dev python-pip zlib1g-dev cmake python-opencv
 5 | 
 6 | ENV CODE_DIR /root/code
 7 | 
 8 | COPY . $CODE_DIR/baselines
 9 | WORKDIR $CODE_DIR/baselines
10 | 
11 | # Clean up pycache and pyc files
12 | RUN rm -rf __pycache__ && \
13 |     find . -name "*.pyc" -delete && \
14 |     pip install tensorflow && \
15 |     pip install -e .[test]
16 | 
17 | 
18 | CMD /bin/bash
19 | 


--------------------------------------------------------------------------------
/control/LICENSE:
--------------------------------------------------------------------------------
 1 | The MIT License
 2 | 
 3 | Copyright (c) 2017 OpenAI (http://openai.com)
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in
13 | all copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21 | THE SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/control/baselines/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kkhetarpal/ioc/fb88d5b881e0b5c317020b23874495e614b3ddc7/control/baselines/__init__.py


--------------------------------------------------------------------------------
/control/baselines/bench/__init__.py:
--------------------------------------------------------------------------------
1 | from baselines.bench.benchmarks import *
2 | from baselines.bench.monitor import *
3 | 


--------------------------------------------------------------------------------
/control/baselines/common/__init__.py:
--------------------------------------------------------------------------------
1 | # flake8: noqa F403
2 | from baselines.common.console_util import *
3 | from baselines.common.dataset import Dataset
4 | from baselines.common.math_util import *
5 | from baselines.common.misc_util import *
6 | 


--------------------------------------------------------------------------------
/control/baselines/common/cg.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | def cg(f_Ax, b, cg_iters=10, callback=None, verbose=False, residual_tol=1e-10):
 3 |     """
 4 |     Demmel p 312
 5 |     """
 6 |     p = b.copy()
 7 |     r = b.copy()
 8 |     x = np.zeros_like(b)
 9 |     rdotr = r.dot(r)
10 | 
11 |     fmtstr =  "%10i %10.3g %10.3g"
12 |     titlestr =  "%10s %10s %10s"
13 |     if verbose: print(titlestr % ("iter", "residual norm", "soln norm"))
14 | 
15 |     for i in range(cg_iters):
16 |         if callback is not None:
17 |             callback(x)
18 |         if verbose: print(fmtstr % (i, rdotr, np.linalg.norm(x)))
19 |         z = f_Ax(p)
20 |         v = rdotr / p.dot(z)
21 |         x += v*p
22 |         r -= v*z
23 |         newrdotr = r.dot(r)
24 |         mu = newrdotr/rdotr
25 |         p = r + mu*p
26 | 
27 |         rdotr = newrdotr
28 |         if rdotr < residual_tol:
29 |             break
30 | 
31 |     if callback is not None:
32 |         callback(x)
33 |     if verbose: print(fmtstr % (i+1, rdotr, np.linalg.norm(x)))  # pylint: disable=W0631
34 |     return x
35 | 


--------------------------------------------------------------------------------
/control/baselines/common/console_util.py:
--------------------------------------------------------------------------------
 1 | from __future__ import print_function
 2 | from contextlib import contextmanager
 3 | import numpy as np
 4 | import time
 5 | import shlex
 6 | import subprocess
 7 | 
 8 | # ================================================================
 9 | # Misc
10 | # ================================================================
11 | 
12 | def fmt_row(width, row, header=False):
13 |     out = " | ".join(fmt_item(x, width) for x in row)
14 |     if header: out = out + "\n" + "-"*len(out)
15 |     return out
16 | 
17 | def fmt_item(x, l):
18 |     if isinstance(x, np.ndarray):
19 |         assert x.ndim==0
20 |         x = x.item()
21 |     if isinstance(x, (float, np.float32, np.float64)):
22 |         v = abs(x)
23 |         if (v < 1e-4 or v > 1e+4) and v > 0:
24 |             rep = "%7.2e" % x
25 |         else:
26 |             rep = "%7.5f" % x
27 |     else: rep = str(x)
28 |     return " "*(l - len(rep)) + rep
29 | 
30 | color2num = dict(
31 |     gray=30,
32 |     red=31,
33 |     green=32,
34 |     yellow=33,
35 |     blue=34,
36 |     magenta=35,
37 |     cyan=36,
38 |     white=37,
39 |     crimson=38
40 | )
41 | 
42 | def colorize(string, color='green', bold=False, highlight=False):
43 |     attr = []
44 |     num = color2num[color]
45 |     if highlight: num += 10
46 |     attr.append(str(num))
47 |     if bold: attr.append('1')
48 |     return '\x1b[%sm%s\x1b[0m' % (';'.join(attr), string)
49 | 
50 | def print_cmd(cmd, dry=False):
51 |     if isinstance(cmd, str):  # for shell=True
52 |         pass
53 |     else:
54 |         cmd = ' '.join(shlex.quote(arg) for arg in cmd)
55 |     print(colorize(('CMD: ' if not dry else 'DRY: ') + cmd))
56 | 
57 | 
58 | def get_git_commit(cwd=None):
59 |     return subprocess.check_output(['git', 'rev-parse', '--short', 'HEAD'], cwd=cwd).decode('utf8')
60 | 
61 | def get_git_commit_message(cwd=None):
62 |     return subprocess.check_output(['git', 'show', '-s', '--format=%B', 'HEAD'], cwd=cwd).decode('utf8')
63 | 
64 | def ccap(cmd, dry=False, env=None, **kwargs):
65 |     print_cmd(cmd, dry)
66 |     if not dry:
67 |         subprocess.check_call(cmd, env=env, **kwargs)
68 | 
69 | 
70 | MESSAGE_DEPTH = 0
71 | 
72 | @contextmanager
73 | def timed(msg):
74 |     global MESSAGE_DEPTH #pylint: disable=W0603
75 |     print(colorize('\t'*MESSAGE_DEPTH + '=: ' + msg, color='magenta'))
76 |     tstart = time.time()
77 |     MESSAGE_DEPTH += 1
78 |     yield
79 |     MESSAGE_DEPTH -= 1
80 |     print(colorize('\t'*MESSAGE_DEPTH + "done in %.3f seconds"%(time.time() - tstart), color='magenta'))
81 | 


--------------------------------------------------------------------------------
/control/baselines/common/dataset.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | class Dataset(object):
 4 |     def __init__(self, data_map, deterministic=False, shuffle=True):
 5 |         self.data_map = data_map
 6 |         self.deterministic = deterministic
 7 |         self.enable_shuffle = shuffle
 8 |         self.n = next(iter(data_map.values())).shape[0]
 9 |         self._next_id = 0
10 |         self.shuffle()
11 | 
12 |     def shuffle(self):
13 |         if self.deterministic:
14 |             return
15 |         perm = np.arange(self.n)
16 |         np.random.shuffle(perm)
17 | 
18 |         for key in self.data_map:
19 |             self.data_map[key] = self.data_map[key][perm]
20 | 
21 |         self._next_id = 0
22 | 
23 |     def next_batch(self, batch_size):
24 |         if self._next_id >= self.n and self.enable_shuffle:
25 |             self.shuffle()
26 | 
27 |         cur_id = self._next_id
28 |         cur_batch_size = min(batch_size, self.n - self._next_id)
29 |         self._next_id += cur_batch_size
30 | 
31 |         data_map = dict()
32 |         for key in self.data_map:
33 |             data_map[key] = self.data_map[key][cur_id:cur_id+cur_batch_size]
34 |         return data_map
35 | 
36 |     def iterate_once(self, batch_size):
37 |         if self.enable_shuffle: self.shuffle()
38 | 
39 |         while self._next_id <= self.n - batch_size:
40 |             yield self.next_batch(batch_size)
41 |         self._next_id = 0
42 | 
43 |     def subset(self, num_elements, deterministic=True):
44 |         data_map = dict()
45 |         for key in self.data_map:
46 |             data_map[key] = self.data_map[key][:num_elements]
47 |         return Dataset(data_map, deterministic)
48 | 
49 | 
50 | def iterbatches(arrays, *, num_batches=None, batch_size=None, shuffle=True, include_final_partial_batch=True):
51 |     assert (num_batches is None) != (batch_size is None), 'Provide num_batches or batch_size, but not both'
52 |     arrays = tuple(map(np.asarray, arrays))
53 |     n = arrays[0].shape[0]
54 |     assert all(a.shape[0] == n for a in arrays[1:])
55 |     inds = np.arange(n)
56 |     if shuffle: np.random.shuffle(inds)
57 |     sections = np.arange(0, n, batch_size)[1:] if num_batches is None else num_batches
58 |     for batch_inds in np.array_split(inds, sections):
59 |         if include_final_partial_batch or len(batch_inds) == batch_size:
60 |             yield tuple(a[batch_inds] for a in arrays)
61 | 


--------------------------------------------------------------------------------
/control/baselines/common/input.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import tensorflow as tf
 3 | from gym.spaces import Discrete, Box, MultiDiscrete
 4 | 
 5 | def observation_placeholder(ob_space, batch_size=None, name='Ob'):
 6 |     '''
 7 |     Create placeholder to feed observations into of the size appropriate to the observation space
 8 | 
 9 |     Parameters:
10 |     ----------
11 | 
12 |     ob_space: gym.Space     observation space
13 | 
14 |     batch_size: int         size of the batch to be fed into input. Can be left None in most cases.
15 | 
16 |     name: str               name of the placeholder
17 | 
18 |     Returns:
19 |     -------
20 | 
21 |     tensorflow placeholder tensor
22 |     '''
23 | 
24 |     assert isinstance(ob_space, Discrete) or isinstance(ob_space, Box) or isinstance(ob_space, MultiDiscrete), \
25 |         'Can only deal with Discrete and Box observation spaces for now'
26 | 
27 |     dtype = ob_space.dtype
28 |     if dtype == np.int8:
29 |         dtype = np.uint8
30 | 
31 |     return tf.placeholder(shape=(batch_size,) + ob_space.shape, dtype=dtype, name=name)
32 | 
33 | 
34 | def observation_input(ob_space, batch_size=None, name='Ob'):
35 |     '''
36 |     Create placeholder to feed observations into of the size appropriate to the observation space, and add input
37 |     encoder of the appropriate type.
38 |     '''
39 | 
40 |     placeholder = observation_placeholder(ob_space, batch_size, name)
41 |     return placeholder, encode_observation(ob_space, placeholder)
42 | 
43 | def encode_observation(ob_space, placeholder):
44 |     '''
45 |     Encode input in the way that is appropriate to the observation space
46 | 
47 |     Parameters:
48 |     ----------
49 | 
50 |     ob_space: gym.Space             observation space
51 | 
52 |     placeholder: tf.placeholder     observation input placeholder
53 |     '''
54 |     if isinstance(ob_space, Discrete):
55 |         return tf.to_float(tf.one_hot(placeholder, ob_space.n))
56 |     elif isinstance(ob_space, Box):
57 |         return tf.to_float(placeholder)
58 |     elif isinstance(ob_space, MultiDiscrete):
59 |         placeholder = tf.cast(placeholder, tf.int32)
60 |         one_hots = [tf.to_float(tf.one_hot(placeholder[..., i], ob_space.nvec[i])) for i in range(placeholder.shape[-1])]
61 |         return tf.concat(one_hots, axis=-1)
62 |     else:
63 |         raise NotImplementedError
64 | 
65 | 


--------------------------------------------------------------------------------
/control/baselines/common/math_util.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import scipy.signal
 3 | 
 4 | 
 5 | def discount(x, gamma):
 6 |     """
 7 |     computes discounted sums along 0th dimension of x.
 8 | 
 9 |     inputs
10 |     ------
11 |     x: ndarray
12 |     gamma: float
13 | 
14 |     outputs
15 |     -------
16 |     y: ndarray with same shape as x, satisfying
17 | 
18 |         y[t] = x[t] + gamma*x[t+1] + gamma^2*x[t+2] + ... + gamma^k x[t+k],
19 |                 where k = len(x) - t - 1
20 | 
21 |     """
22 |     assert x.ndim >= 1
23 |     return scipy.signal.lfilter([1],[1,-gamma],x[::-1], axis=0)[::-1]
24 | 
25 | def explained_variance(ypred,y):
26 |     """
27 |     Computes fraction of variance that ypred explains about y.
28 |     Returns 1 - Var[y-ypred] / Var[y]
29 | 
30 |     interpretation:
31 |         ev=0  =>  might as well have predicted zero
32 |         ev=1  =>  perfect prediction
33 |         ev<0  =>  worse than just predicting zero
34 | 
35 |     """
36 |     assert y.ndim == 1 and ypred.ndim == 1
37 |     vary = np.var(y)
38 |     return np.nan if vary==0 else 1 - np.var(y-ypred)/vary
39 | 
40 | def explained_variance_2d(ypred, y):
41 |     assert y.ndim == 2 and ypred.ndim == 2
42 |     vary = np.var(y, axis=0)
43 |     out = 1 - np.var(y-ypred)/vary
44 |     out[vary < 1e-10] = 0
45 |     return out
46 | 
47 | def ncc(ypred, y):
48 |     return np.corrcoef(ypred, y)[1,0]
49 | 
50 | def flatten_arrays(arrs):
51 |     return np.concatenate([arr.flat for arr in arrs])
52 | 
53 | def unflatten_vector(vec, shapes):
54 |     i=0
55 |     arrs = []
56 |     for shape in shapes:
57 |         size = np.prod(shape)
58 |         arr = vec[i:i+size].reshape(shape)
59 |         arrs.append(arr)
60 |         i += size
61 |     return arrs
62 | 
63 | def discount_with_boundaries(X, New, gamma):
64 |     """
65 |     X: 2d array of floats, time x features
66 |     New: 2d array of bools, indicating when a new episode has started
67 |     """
68 |     Y = np.zeros_like(X)
69 |     T = X.shape[0]
70 |     Y[T-1] = X[T-1]
71 |     for t in range(T-2, -1, -1):
72 |         Y[t] = X[t] + gamma * Y[t+1] * (1 - New[t+1])
73 |     return Y
74 | 
75 | def test_discount_with_boundaries():
76 |     gamma=0.9
77 |     x = np.array([1.0, 2.0, 3.0, 4.0], 'float32')
78 |     starts = [1.0, 0.0, 0.0, 1.0]
79 |     y = discount_with_boundaries(x, starts, gamma)
80 |     assert np.allclose(y, [
81 |         1 + gamma * 2 + gamma**2 * 3,
82 |         2 + gamma * 3,
83 |         3,
84 |         4
85 |     ])
86 | 


--------------------------------------------------------------------------------
/control/baselines/common/mpi_adam.py:
--------------------------------------------------------------------------------
  1 | import baselines.common.tf_util as U
  2 | import tensorflow as tf
  3 | import numpy as np
  4 | try:
  5 |     from mpi4py import MPI
  6 | except ImportError:
  7 |     MPI = None
  8 | 
  9 | 
 10 | class MpiAdam(object):
 11 |     def __init__(self, var_list, *, beta1=0.9, beta2=0.999, epsilon=1e-08, scale_grad_by_procs=True, comm=None):
 12 |         self.var_list = var_list
 13 |         self.beta1 = beta1
 14 |         self.beta2 = beta2
 15 |         self.epsilon = epsilon
 16 |         self.scale_grad_by_procs = scale_grad_by_procs
 17 |         size = sum(U.numel(v) for v in var_list)
 18 |         self.m = np.zeros(size, 'float32')
 19 |         self.v = np.zeros(size, 'float32')
 20 |         self.t = 0
 21 |         self.setfromflat = U.SetFromFlat(var_list)
 22 |         self.getflat = U.GetFlat(var_list)
 23 |         self.comm = MPI.COMM_WORLD if comm is None and MPI is not None else comm
 24 | 
 25 |     def update(self, localg, stepsize):
 26 |         if self.t % 100 == 0:
 27 |             self.check_synced()
 28 |         localg = localg.astype('float32')
 29 |         if self.comm is not None:
 30 |             globalg = np.zeros_like(localg)
 31 |             self.comm.Allreduce(localg, globalg, op=MPI.SUM)
 32 |             if self.scale_grad_by_procs:
 33 |                 globalg /= self.comm.Get_size()
 34 |         else:
 35 |             globalg = np.copy(localg)
 36 | 
 37 |         self.t += 1
 38 |         a = stepsize * np.sqrt(1 - self.beta2**self.t)/(1 - self.beta1**self.t)
 39 |         self.m = self.beta1 * self.m + (1 - self.beta1) * globalg
 40 |         self.v = self.beta2 * self.v + (1 - self.beta2) * (globalg * globalg)
 41 |         step = (- a) * self.m / (np.sqrt(self.v) + self.epsilon)
 42 |         self.setfromflat(self.getflat() + step)
 43 | 
 44 |     def sync(self):
 45 |         if self.comm is None:
 46 |             return
 47 |         theta = self.getflat()
 48 |         self.comm.Bcast(theta, root=0)
 49 |         self.setfromflat(theta)
 50 | 
 51 |     def check_synced(self):
 52 |         if self.comm is None:
 53 |             return
 54 |         if self.comm.Get_rank() == 0: # this is root
 55 |             theta = self.getflat()
 56 |             self.comm.Bcast(theta, root=0)
 57 |         else:
 58 |             thetalocal = self.getflat()
 59 |             thetaroot = np.empty_like(thetalocal)
 60 |             self.comm.Bcast(thetaroot, root=0)
 61 |             assert (thetaroot == thetalocal).all(), (thetaroot, thetalocal)
 62 | 
 63 | @U.in_session
 64 | def test_MpiAdam():
 65 |     np.random.seed(0)
 66 |     tf.set_random_seed(0)
 67 | 
 68 |     a = tf.Variable(np.random.randn(3).astype('float32'))
 69 |     b = tf.Variable(np.random.randn(2,5).astype('float32'))
 70 |     loss = tf.reduce_sum(tf.square(a)) + tf.reduce_sum(tf.sin(b))
 71 | 
 72 |     stepsize = 1e-2
 73 |     update_op = tf.train.AdamOptimizer(stepsize).minimize(loss)
 74 |     do_update = U.function([], loss, updates=[update_op])
 75 | 
 76 |     tf.get_default_session().run(tf.global_variables_initializer())
 77 |     losslist_ref = []
 78 |     for i in range(10):
 79 |         l = do_update()
 80 |         print(i, l)
 81 |         losslist_ref.append(l)
 82 | 
 83 | 
 84 | 
 85 |     tf.set_random_seed(0)
 86 |     tf.get_default_session().run(tf.global_variables_initializer())
 87 | 
 88 |     var_list = [a,b]
 89 |     lossandgrad = U.function([], [loss, U.flatgrad(loss, var_list)])
 90 |     adam = MpiAdam(var_list)
 91 | 
 92 |     losslist_test = []
 93 |     for i in range(10):
 94 |         l,g = lossandgrad()
 95 |         adam.update(g, stepsize)
 96 |         print(i,l)
 97 |         losslist_test.append(l)
 98 | 
 99 |     np.testing.assert_allclose(np.array(losslist_ref), np.array(losslist_test), atol=1e-4)
100 | 
101 | 
102 | if __name__ == '__main__':
103 |     test_MpiAdam()
104 | 


--------------------------------------------------------------------------------
/control/baselines/common/mpi_adam_optimizer.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import tensorflow as tf
 3 | from mpi4py import MPI
 4 | 
 5 | class MpiAdamOptimizer(tf.train.AdamOptimizer):
 6 |     """Adam optimizer that averages gradients across mpi processes."""
 7 |     def __init__(self, comm, **kwargs):
 8 |         self.comm = comm
 9 |         tf.train.AdamOptimizer.__init__(self, **kwargs)
10 |     def compute_gradients(self, loss, var_list, **kwargs):
11 |         grads_and_vars = tf.train.AdamOptimizer.compute_gradients(self, loss, var_list, **kwargs)
12 |         grads_and_vars = [(g, v) for g, v in grads_and_vars if g is not None]
13 |         flat_grad = tf.concat([tf.reshape(g, (-1,)) for g, v in grads_and_vars], axis=0)
14 |         shapes = [v.shape.as_list() for g, v in grads_and_vars]
15 |         sizes = [int(np.prod(s)) for s in shapes]
16 | 
17 |         num_tasks = self.comm.Get_size()
18 |         buf = np.zeros(sum(sizes), np.float32)
19 | 
20 |         def _collect_grads(flat_grad):
21 |             self.comm.Allreduce(flat_grad, buf, op=MPI.SUM)
22 |             np.divide(buf, float(num_tasks), out=buf)
23 |             return buf
24 | 
25 |         avg_flat_grad = tf.py_func(_collect_grads, [flat_grad], tf.float32)
26 |         avg_flat_grad.set_shape(flat_grad.shape)
27 |         avg_grads = tf.split(avg_flat_grad, sizes, axis=0)
28 |         avg_grads_and_vars = [(tf.reshape(g, v.shape), v)
29 |                     for g, (_, v) in zip(avg_grads, grads_and_vars)]
30 | 
31 |         return avg_grads_and_vars
32 | 


--------------------------------------------------------------------------------
/control/baselines/common/mpi_fork.py:
--------------------------------------------------------------------------------
 1 | import os, subprocess, sys
 2 | 
 3 | def mpi_fork(n, bind_to_core=False):
 4 |     """Re-launches the current script with workers
 5 |     Returns "parent" for original parent, "child" for MPI children
 6 |     """
 7 |     if n<=1:
 8 |         return "child"
 9 |     if os.getenv("IN_MPI") is None:
10 |         env = os.environ.copy()
11 |         env.update(
12 |             MKL_NUM_THREADS="1",
13 |             OMP_NUM_THREADS="1",
14 |             IN_MPI="1"
15 |         )
16 |         args = ["mpirun", "-np", str(n)]
17 |         if bind_to_core:
18 |             args += ["-bind-to", "core"]
19 |         args += [sys.executable] + sys.argv
20 |         subprocess.check_call(args, env=env)
21 |         return "parent"
22 |     else:
23 |         return "child"
24 | 


--------------------------------------------------------------------------------
/control/baselines/common/mpi_moments.py:
--------------------------------------------------------------------------------
 1 | from mpi4py import MPI
 2 | import numpy as np
 3 | from baselines.common import zipsame
 4 | 
 5 | 
 6 | def mpi_mean(x, axis=0, comm=None, keepdims=False):
 7 |     x = np.asarray(x)
 8 |     assert x.ndim > 0
 9 |     if comm is None: comm = MPI.COMM_WORLD
10 |     xsum = x.sum(axis=axis, keepdims=keepdims)
11 |     n = xsum.size
12 |     localsum = np.zeros(n+1, x.dtype)
13 |     localsum[:n] = xsum.ravel()
14 |     localsum[n] = x.shape[axis]
15 |     globalsum = np.zeros_like(localsum)
16 |     comm.Allreduce(localsum, globalsum, op=MPI.SUM)
17 |     return globalsum[:n].reshape(xsum.shape) / globalsum[n], globalsum[n]
18 | 
19 | def mpi_moments(x, axis=0, comm=None, keepdims=False):
20 |     x = np.asarray(x)
21 |     assert x.ndim > 0
22 |     mean, count = mpi_mean(x, axis=axis, comm=comm, keepdims=True)
23 |     sqdiffs = np.square(x - mean)
24 |     meansqdiff, count1 = mpi_mean(sqdiffs, axis=axis, comm=comm, keepdims=True)
25 |     assert count1 == count
26 |     std = np.sqrt(meansqdiff)
27 |     if not keepdims:
28 |         newshape = mean.shape[:axis] + mean.shape[axis+1:]
29 |         mean = mean.reshape(newshape)
30 |         std = std.reshape(newshape)
31 |     return mean, std, count
32 | 
33 | 
34 | def test_runningmeanstd():
35 |     import subprocess
36 |     subprocess.check_call(['mpirun', '-np', '3',
37 |         'python','-c',
38 |         'from baselines.common.mpi_moments import _helper_runningmeanstd; _helper_runningmeanstd()'])
39 | 
40 | def _helper_runningmeanstd():
41 |     comm = MPI.COMM_WORLD
42 |     np.random.seed(0)
43 |     for (triple,axis) in [
44 |         ((np.random.randn(3), np.random.randn(4), np.random.randn(5)),0),
45 |         ((np.random.randn(3,2), np.random.randn(4,2), np.random.randn(5,2)),0),
46 |         ((np.random.randn(2,3), np.random.randn(2,4), np.random.randn(2,4)),1),
47 |         ]:
48 | 
49 | 
50 |         x = np.concatenate(triple, axis=axis)
51 |         ms1 = [x.mean(axis=axis), x.std(axis=axis), x.shape[axis]]
52 | 
53 | 
54 |         ms2 = mpi_moments(triple[comm.Get_rank()],axis=axis)
55 | 
56 |         for (a1,a2) in zipsame(ms1, ms2):
57 |             print(a1, a2)
58 |             assert np.allclose(a1, a2)
59 |             print("ok!")
60 | 
61 | 


--------------------------------------------------------------------------------
/control/baselines/common/mpi_running_mean_std.py:
--------------------------------------------------------------------------------
  1 | try:
  2 |     from mpi4py import MPI
  3 | except ImportError:
  4 |     MPI = None
  5 | 
  6 | import tensorflow as tf, baselines.common.tf_util as U, numpy as np
  7 | 
  8 | class RunningMeanStd(object):
  9 |     # https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Parallel_algorithm
 10 |     def __init__(self, epsilon=1e-2, shape=()):
 11 | 
 12 |         self._sum = tf.get_variable(
 13 |             dtype=tf.float64,
 14 |             shape=shape,
 15 |             initializer=tf.constant_initializer(0.0),
 16 |             name="runningsum", trainable=False)
 17 |         self._sumsq = tf.get_variable(
 18 |             dtype=tf.float64,
 19 |             shape=shape,
 20 |             initializer=tf.constant_initializer(epsilon),
 21 |             name="runningsumsq", trainable=False)
 22 |         self._count = tf.get_variable(
 23 |             dtype=tf.float64,
 24 |             shape=(),
 25 |             initializer=tf.constant_initializer(epsilon),
 26 |             name="count", trainable=False)
 27 |         self.shape = shape
 28 | 
 29 |         self.mean = tf.to_float(self._sum / self._count)
 30 |         self.std = tf.sqrt( tf.maximum( tf.to_float(self._sumsq / self._count) - tf.square(self.mean) , 1e-2 ))
 31 | 
 32 |         newsum = tf.placeholder(shape=self.shape, dtype=tf.float64, name='sum')
 33 |         newsumsq = tf.placeholder(shape=self.shape, dtype=tf.float64, name='var')
 34 |         newcount = tf.placeholder(shape=[], dtype=tf.float64, name='count')
 35 |         self.incfiltparams = U.function([newsum, newsumsq, newcount], [],
 36 |             updates=[tf.assign_add(self._sum, newsum),
 37 |                      tf.assign_add(self._sumsq, newsumsq),
 38 |                      tf.assign_add(self._count, newcount)])
 39 | 
 40 | 
 41 |     def update(self, x):
 42 |         x = x.astype('float64')
 43 |         n = int(np.prod(self.shape))
 44 |         totalvec = np.zeros(n*2+1, 'float64')
 45 |         addvec = np.concatenate([x.sum(axis=0).ravel(), np.square(x).sum(axis=0).ravel(), np.array([len(x)],dtype='float64')])
 46 |         if MPI is not None:
 47 |             MPI.COMM_WORLD.Allreduce(addvec, totalvec, op=MPI.SUM)
 48 |         self.incfiltparams(totalvec[0:n].reshape(self.shape), totalvec[n:2*n].reshape(self.shape), totalvec[2*n])
 49 | 
 50 | @U.in_session
 51 | def test_runningmeanstd():
 52 |     for (x1, x2, x3) in [
 53 |         (np.random.randn(3), np.random.randn(4), np.random.randn(5)),
 54 |         (np.random.randn(3,2), np.random.randn(4,2), np.random.randn(5,2)),
 55 |         ]:
 56 | 
 57 |         rms = RunningMeanStd(epsilon=0.0, shape=x1.shape[1:])
 58 |         U.initialize()
 59 | 
 60 |         x = np.concatenate([x1, x2, x3], axis=0)
 61 |         ms1 = [x.mean(axis=0), x.std(axis=0)]
 62 |         rms.update(x1)
 63 |         rms.update(x2)
 64 |         rms.update(x3)
 65 |         ms2 = [rms.mean.eval(), rms.std.eval()]
 66 | 
 67 |         assert np.allclose(ms1, ms2)
 68 | 
 69 | @U.in_session
 70 | def test_dist():
 71 |     np.random.seed(0)
 72 |     p1,p2,p3=(np.random.randn(3,1), np.random.randn(4,1), np.random.randn(5,1))
 73 |     q1,q2,q3=(np.random.randn(6,1), np.random.randn(7,1), np.random.randn(8,1))
 74 | 
 75 |     # p1,p2,p3=(np.random.randn(3), np.random.randn(4), np.random.randn(5))
 76 |     # q1,q2,q3=(np.random.randn(6), np.random.randn(7), np.random.randn(8))
 77 | 
 78 |     comm = MPI.COMM_WORLD
 79 |     assert comm.Get_size()==2
 80 |     if comm.Get_rank()==0:
 81 |         x1,x2,x3 = p1,p2,p3
 82 |     elif comm.Get_rank()==1:
 83 |         x1,x2,x3 = q1,q2,q3
 84 |     else:
 85 |         assert False
 86 | 
 87 |     rms = RunningMeanStd(epsilon=0.0, shape=(1,))
 88 |     U.initialize()
 89 | 
 90 |     rms.update(x1)
 91 |     rms.update(x2)
 92 |     rms.update(x3)
 93 | 
 94 |     bigvec = np.concatenate([p1,p2,p3,q1,q2,q3])
 95 | 
 96 |     def checkallclose(x,y):
 97 |         print(x,y)
 98 |         return np.allclose(x,y)
 99 | 
100 |     assert checkallclose(
101 |         bigvec.mean(axis=0),
102 |         rms.mean.eval(),
103 |     )
104 |     assert checkallclose(
105 |         bigvec.std(axis=0),
106 |         rms.std.eval(),
107 |     )
108 | 
109 | 
110 | if __name__ == "__main__":
111 |     # Run with mpirun -np 2 python <filename>
112 |     test_dist()
113 | 


--------------------------------------------------------------------------------
/control/baselines/common/mpi_util.py:
--------------------------------------------------------------------------------
  1 | from collections import defaultdict
  2 | from mpi4py import MPI
  3 | import os, numpy as np
  4 | import platform
  5 | import shutil
  6 | import subprocess
  7 | 
  8 | def sync_from_root(sess, variables, comm=None):
  9 |     """
 10 |     Send the root node's parameters to every worker.
 11 |     Arguments:
 12 |       sess: the TensorFlow session.
 13 |       variables: all parameter variables including optimizer's
 14 |     """
 15 |     if comm is None: comm = MPI.COMM_WORLD
 16 |     rank = comm.Get_rank()
 17 |     for var in variables:
 18 |         if rank == 0:
 19 |             comm.Bcast(sess.run(var))
 20 |         else:
 21 |             import tensorflow as tf
 22 |             returned_var = np.empty(var.shape, dtype='float32')
 23 |             comm.Bcast(returned_var)
 24 |             sess.run(tf.assign(var, returned_var))
 25 | 
 26 | def gpu_count():
 27 |     """
 28 |     Count the GPUs on this machine.
 29 |     """
 30 |     if shutil.which('nvidia-smi') is None:
 31 |         return 0
 32 |     output = subprocess.check_output(['nvidia-smi', '--query-gpu=gpu_name', '--format=csv'])
 33 |     return max(0, len(output.split(b'\n')) - 2)
 34 | 
 35 | def setup_mpi_gpus():
 36 |     """
 37 |     Set CUDA_VISIBLE_DEVICES using MPI.
 38 |     """
 39 |     num_gpus = gpu_count()
 40 |     if num_gpus == 0:
 41 |         return
 42 |     local_rank, _ = get_local_rank_size(MPI.COMM_WORLD)
 43 |     os.environ['CUDA_VISIBLE_DEVICES'] = str(local_rank % num_gpus)
 44 | 
 45 | def get_local_rank_size(comm):
 46 |     """
 47 |     Returns the rank of each process on its machine
 48 |     The processes on a given machine will be assigned ranks
 49 |         0, 1, 2, ..., N-1,
 50 |     where N is the number of processes on this machine.
 51 | 
 52 |     Useful if you want to assign one gpu per machine
 53 |     """
 54 |     this_node = platform.node()
 55 |     ranks_nodes = comm.allgather((comm.Get_rank(), this_node))
 56 |     node2rankssofar = defaultdict(int)
 57 |     local_rank = None
 58 |     for (rank, node) in ranks_nodes:
 59 |         if rank == comm.Get_rank():
 60 |             local_rank = node2rankssofar[node]
 61 |         node2rankssofar[node] += 1
 62 |     assert local_rank is not None
 63 |     return local_rank, node2rankssofar[this_node]
 64 | 
 65 | def share_file(comm, path):
 66 |     """
 67 |     Copies the file from rank 0 to all other ranks
 68 |     Puts it in the same place on all machines
 69 |     """
 70 |     localrank, _ = get_local_rank_size(comm)
 71 |     if comm.Get_rank() == 0:
 72 |         with open(path, 'rb') as fh:
 73 |             data = fh.read()
 74 |         comm.bcast(data)
 75 |     else:
 76 |         data = comm.bcast(None)
 77 |         if localrank == 0:
 78 |             os.makedirs(os.path.dirname(path), exist_ok=True)
 79 |             with open(path, 'wb') as fh:
 80 |                 fh.write(data)
 81 |     comm.Barrier()
 82 | 
 83 | def dict_gather(comm, d, op='mean', assert_all_have_data=True):
 84 |     if comm is None: return d
 85 |     alldicts = comm.allgather(d)
 86 |     size = comm.size
 87 |     k2li = defaultdict(list)
 88 |     for d in alldicts:
 89 |         for (k,v) in d.items():
 90 |             k2li[k].append(v)
 91 |     result = {}
 92 |     for (k,li) in k2li.items():
 93 |         if assert_all_have_data:
 94 |             assert len(li)==size, "only %i out of %i MPI workers have sent '%s'" % (len(li), size, k)
 95 |         if op=='mean':
 96 |             result[k] = np.mean(li, axis=0)
 97 |         elif op=='sum':
 98 |             result[k] = np.sum(li, axis=0)
 99 |         else:
100 |             assert 0, op
101 |     return result
102 | 


--------------------------------------------------------------------------------
/control/baselines/common/runners.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from abc import ABC, abstractmethod
 3 | 
 4 | class AbstractEnvRunner(ABC):
 5 |     def __init__(self, *, env, model, nsteps):
 6 |         self.env = env
 7 |         self.model = model
 8 |         self.nenv = nenv = env.num_envs if hasattr(env, 'num_envs') else 1
 9 |         self.batch_ob_shape = (nenv*nsteps,) + env.observation_space.shape
10 |         self.obs = np.zeros((nenv,) + env.observation_space.shape, dtype=env.observation_space.dtype.name)
11 |         self.obs[:] = env.reset()
12 |         self.nsteps = nsteps
13 |         self.states = model.initial_state
14 |         self.dones = [False for _ in range(nenv)]
15 | 
16 |     @abstractmethod
17 |     def run(self):
18 |         raise NotImplementedError
19 | 
20 | 


--------------------------------------------------------------------------------
/control/baselines/common/schedules.py:
--------------------------------------------------------------------------------
  1 | """This file is used for specifying various schedules that evolve over
  2 | time throughout the execution of the algorithm, such as:
  3 |  - learning rate for the optimizer
  4 |  - exploration epsilon for the epsilon greedy exploration strategy
  5 |  - beta parameter for beta parameter in prioritized replay
  6 | 
  7 | Each schedule has a function `value(t)` which returns the current value
  8 | of the parameter given the timestep t of the optimization procedure.
  9 | """
 10 | 
 11 | 
 12 | class Schedule(object):
 13 |     def value(self, t):
 14 |         """Value of the schedule at time t"""
 15 |         raise NotImplementedError()
 16 | 
 17 | 
 18 | class ConstantSchedule(object):
 19 |     def __init__(self, value):
 20 |         """Value remains constant over time.
 21 | 
 22 |         Parameters
 23 |         ----------
 24 |         value: float
 25 |             Constant value of the schedule
 26 |         """
 27 |         self._v = value
 28 | 
 29 |     def value(self, t):
 30 |         """See Schedule.value"""
 31 |         return self._v
 32 | 
 33 | 
 34 | def linear_interpolation(l, r, alpha):
 35 |     return l + alpha * (r - l)
 36 | 
 37 | 
 38 | class PiecewiseSchedule(object):
 39 |     def __init__(self, endpoints, interpolation=linear_interpolation, outside_value=None):
 40 |         """Piecewise schedule.
 41 | 
 42 |         endpoints: [(int, int)]
 43 |             list of pairs `(time, value)` meanining that schedule should output
 44 |             `value` when `t==time`. All the values for time must be sorted in
 45 |             an increasing order. When t is between two times, e.g. `(time_a, value_a)`
 46 |             and `(time_b, value_b)`, such that `time_a <= t < time_b` then value outputs
 47 |             `interpolation(value_a, value_b, alpha)` where alpha is a fraction of
 48 |             time passed between `time_a` and `time_b` for time `t`.
 49 |         interpolation: lambda float, float, float: float
 50 |             a function that takes value to the left and to the right of t according
 51 |             to the `endpoints`. Alpha is the fraction of distance from left endpoint to
 52 |             right endpoint that t has covered. See linear_interpolation for example.
 53 |         outside_value: float
 54 |             if the value is requested outside of all the intervals sepecified in
 55 |             `endpoints` this value is returned. If None then AssertionError is
 56 |             raised when outside value is requested.
 57 |         """
 58 |         idxes = [e[0] for e in endpoints]
 59 |         assert idxes == sorted(idxes)
 60 |         self._interpolation = interpolation
 61 |         self._outside_value = outside_value
 62 |         self._endpoints = endpoints
 63 | 
 64 |     def value(self, t):
 65 |         """See Schedule.value"""
 66 |         for (l_t, l), (r_t, r) in zip(self._endpoints[:-1], self._endpoints[1:]):
 67 |             if l_t <= t and t < r_t:
 68 |                 alpha = float(t - l_t) / (r_t - l_t)
 69 |                 return self._interpolation(l, r, alpha)
 70 | 
 71 |         # t does not belong to any of the pieces, so doom.
 72 |         assert self._outside_value is not None
 73 |         return self._outside_value
 74 | 
 75 | 
 76 | class LinearSchedule(object):
 77 |     def __init__(self, schedule_timesteps, final_p, initial_p=1.0):
 78 |         """Linear interpolation between initial_p and final_p over
 79 |         schedule_timesteps. After this many timesteps pass final_p is
 80 |         returned.
 81 | 
 82 |         Parameters
 83 |         ----------
 84 |         schedule_timesteps: int
 85 |             Number of timesteps for which to linearly anneal initial_p
 86 |             to final_p
 87 |         initial_p: float
 88 |             initial output value
 89 |         final_p: float
 90 |             final output value
 91 |         """
 92 |         self.schedule_timesteps = schedule_timesteps
 93 |         self.final_p = final_p
 94 |         self.initial_p = initial_p
 95 | 
 96 |     def value(self, t):
 97 |         """See Schedule.value"""
 98 |         fraction = min(float(t) / self.schedule_timesteps, 1.0)
 99 |         return self.initial_p + fraction * (self.final_p - self.initial_p)
100 | 


--------------------------------------------------------------------------------
/control/baselines/common/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kkhetarpal/ioc/fb88d5b881e0b5c317020b23874495e614b3ddc7/control/baselines/common/tests/__init__.py


--------------------------------------------------------------------------------
/control/baselines/common/tests/envs/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kkhetarpal/ioc/fb88d5b881e0b5c317020b23874495e614b3ddc7/control/baselines/common/tests/envs/__init__.py


--------------------------------------------------------------------------------
/control/baselines/common/tests/envs/fixed_sequence_env.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from gym import Env
 3 | from gym.spaces import Discrete
 4 | 
 5 | 
 6 | class FixedSequenceEnv(Env):
 7 |     def __init__(
 8 |             self,
 9 |             n_actions=10,
10 |             seed=0,
11 |             episode_len=100
12 |     ):
13 |         self.np_random = np.random.RandomState()
14 |         self.np_random.seed(seed)
15 |         self.sequence = [self.np_random.randint(0, n_actions-1) for _ in range(episode_len)]
16 | 
17 |         self.action_space = Discrete(n_actions)
18 |         self.observation_space = Discrete(1)
19 | 
20 |         self.episode_len = episode_len
21 |         self.time = 0
22 |         self.reset()
23 | 
24 |     def reset(self):
25 |         self.time = 0
26 |         return 0
27 | 
28 |     def step(self, actions):
29 |         rew = self._get_reward(actions)
30 |         self._choose_next_state()
31 |         done = False
32 |         if self.episode_len and self.time >= self.episode_len:
33 |             rew = 0
34 |             done = True
35 | 
36 |         return 0, rew, done, {}
37 | 
38 |     def _choose_next_state(self):
39 |         self.time += 1
40 | 
41 |     def _get_reward(self, actions):
42 |         return 1 if actions == self.sequence[self.time] else 0
43 | 
44 | 
45 | 


--------------------------------------------------------------------------------
/control/baselines/common/tests/envs/identity_env.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from abc import abstractmethod
 3 | from gym import Env
 4 | from gym.spaces import MultiDiscrete, Discrete, Box
 5 | 
 6 | 
 7 | class IdentityEnv(Env):
 8 |     def __init__(
 9 |             self,
10 |             episode_len=None
11 |     ):
12 | 
13 |         self.episode_len = episode_len
14 |         self.time = 0
15 |         self.reset()
16 | 
17 |     def reset(self):
18 |         self._choose_next_state()
19 |         self.time = 0
20 |         self.observation_space = self.action_space
21 | 
22 |         return self.state
23 | 
24 |     def step(self, actions):
25 |         rew = self._get_reward(actions)
26 |         self._choose_next_state()
27 |         done = False
28 |         if self.episode_len and self.time >= self.episode_len:
29 |             rew = 0
30 |             done = True
31 | 
32 |         return self.state, rew, done, {}
33 | 
34 |     def _choose_next_state(self):
35 |         self.state = self.action_space.sample()
36 |         self.time += 1
37 | 
38 |     @abstractmethod
39 |     def _get_reward(self, actions):
40 |         raise NotImplementedError
41 | 
42 | 
43 | class DiscreteIdentityEnv(IdentityEnv):
44 |     def __init__(
45 |             self,
46 |             dim,
47 |             episode_len=None,
48 |     ):
49 | 
50 |         self.action_space = Discrete(dim)
51 |         super().__init__(episode_len=episode_len)
52 | 
53 |     def _get_reward(self, actions):
54 |         return 1 if self.state == actions else 0
55 | 
56 | class MultiDiscreteIdentityEnv(IdentityEnv):
57 |     def __init__(
58 |             self,
59 |             dims,
60 |             episode_len=None,
61 |     ):
62 | 
63 |         self.action_space = MultiDiscrete(dims)
64 |         super().__init__(episode_len=episode_len)
65 | 
66 |     def _get_reward(self, actions):
67 |         return 1 if all(self.state == actions) else 0
68 | 
69 | 
70 | class BoxIdentityEnv(IdentityEnv):
71 |     def __init__(
72 |             self,
73 |             shape,
74 |             episode_len=None,
75 |     ):
76 | 
77 |         self.action_space = Box(low=-1.0, high=1.0, shape=shape)
78 |         super().__init__(episode_len=episode_len)
79 | 
80 |     def _get_reward(self, actions):
81 |         diff = actions - self.state
82 |         diff = diff[:]
83 |         return -0.5 * np.dot(diff, diff)
84 | 


--------------------------------------------------------------------------------
/control/baselines/common/tests/envs/mnist_env.py:
--------------------------------------------------------------------------------
 1 | import os.path as osp
 2 | import numpy as np
 3 | import tempfile
 4 | from gym import Env
 5 | from gym.spaces import Discrete, Box
 6 | 
 7 | 
 8 | 
 9 | class MnistEnv(Env):
10 |     def __init__(
11 |             self,
12 |             seed=0,
13 |             episode_len=None,
14 |             no_images=None
15 |     ):
16 |         import filelock
17 |         from tensorflow.examples.tutorials.mnist import input_data
18 |         # we could use temporary directory for this with a context manager and
19 |         # TemporaryDirecotry, but then each test that uses mnist would re-download the data
20 |         # this way the data is not cleaned up, but we only download it once per machine
21 |         mnist_path = osp.join(tempfile.gettempdir(), 'MNIST_data')
22 |         with filelock.FileLock(mnist_path + '.lock'):
23 |            self.mnist = input_data.read_data_sets(mnist_path)
24 | 
25 |         self.np_random = np.random.RandomState()
26 |         self.np_random.seed(seed)
27 | 
28 |         self.observation_space = Box(low=0.0, high=1.0, shape=(28,28,1))
29 |         self.action_space = Discrete(10)
30 |         self.episode_len = episode_len
31 |         self.time = 0
32 |         self.no_images = no_images
33 | 
34 |         self.train_mode()
35 |         self.reset()
36 | 
37 |     def reset(self):
38 |         self._choose_next_state()
39 |         self.time = 0
40 | 
41 |         return self.state[0]
42 | 
43 |     def step(self, actions):
44 |         rew = self._get_reward(actions)
45 |         self._choose_next_state()
46 |         done = False
47 |         if self.episode_len and self.time >= self.episode_len:
48 |             rew = 0
49 |             done = True
50 | 
51 |         return self.state[0], rew, done, {}
52 | 
53 |     def train_mode(self):
54 |         self.dataset = self.mnist.train
55 | 
56 |     def test_mode(self):
57 |         self.dataset = self.mnist.test
58 | 
59 |     def _choose_next_state(self):
60 |         max_index = (self.no_images if self.no_images is not None else self.dataset.num_examples) - 1
61 |         index = self.np_random.randint(0, max_index)
62 |         image = self.dataset.images[index].reshape(28,28,1)*255
63 |         label = self.dataset.labels[index]
64 |         self.state = (image, label)
65 |         self.time += 1
66 | 
67 |     def _get_reward(self, actions):
68 |         return 1 if self.state[1] == actions else 0
69 | 
70 | 
71 | 


--------------------------------------------------------------------------------
/control/baselines/common/tests/test_cartpole.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | import gym
 3 | 
 4 | from baselines.run import get_learn_function
 5 | from baselines.common.tests.util import reward_per_episode_test
 6 | 
 7 | common_kwargs = dict(
 8 |     total_timesteps=30000,
 9 |     network='mlp',
10 |     gamma=1.0,
11 |     seed=0,
12 | )
13 | 
14 | learn_kwargs = {
15 |     'a2c' : dict(nsteps=32, value_network='copy', lr=0.05),
16 |     'acer': dict(value_network='copy'),
17 |     'acktr': dict(nsteps=32, value_network='copy', is_async=False),
18 |     'deepq': dict(total_timesteps=20000),
19 |     'ppo2': dict(value_network='copy'),
20 |     'trpo_mpi': {}
21 | }
22 | 
23 | @pytest.mark.slow
24 | @pytest.mark.parametrize("alg", learn_kwargs.keys())
25 | def test_cartpole(alg):
26 |     '''
27 |     Test if the algorithm (with an mlp policy)
28 |     can learn to balance the cartpole
29 |     '''
30 | 
31 |     kwargs = common_kwargs.copy()
32 |     kwargs.update(learn_kwargs[alg])
33 | 
34 |     learn_fn = lambda e: get_learn_function(alg)(env=e, **kwargs)
35 |     def env_fn():
36 | 
37 |         env = gym.make('CartPole-v0')
38 |         env.seed(0)
39 |         return env
40 | 
41 |     reward_per_episode_test(env_fn, learn_fn, 100)
42 | 
43 | if __name__ == '__main__':
44 |     test_cartpole('acer')
45 | 


--------------------------------------------------------------------------------
/control/baselines/common/tests/test_doc_examples.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | try:
 3 |     import mujoco_py
 4 |     _mujoco_present = True
 5 | except BaseException:
 6 |     mujoco_py = None
 7 |     _mujoco_present = False
 8 | 
 9 | 
10 | @pytest.mark.skipif(
11 |     not _mujoco_present,
12 |     reason='error loading mujoco - either mujoco / mujoco key not present, or LD_LIBRARY_PATH is not pointing to mujoco library'
13 | )
14 | def test_lstm_example():
15 |     import tensorflow as tf
16 |     from baselines.common import policies, models, cmd_util
17 |     from baselines.common.vec_env.dummy_vec_env import DummyVecEnv
18 | 
19 |     # create vectorized environment
20 |     venv = DummyVecEnv([lambda: cmd_util.make_mujoco_env('Reacher-v2', seed=0)])
21 | 
22 |     with tf.Session() as sess:
23 |         # build policy based on lstm network with 128 units
24 |         policy = policies.build_policy(venv, models.lstm(128))(nbatch=1, nsteps=1)
25 | 
26 |         # initialize tensorflow variables
27 |         sess.run(tf.global_variables_initializer())
28 | 
29 |         # prepare environment variables
30 |         ob = venv.reset()
31 |         state = policy.initial_state
32 |         done = [False]
33 |         step_counter = 0
34 | 
35 |         # run a single episode until the end (i.e. until done)
36 |         while True:
37 |             action, _, state, _ = policy.step(ob, S=state, M=done)
38 |             ob, reward, done, _ = venv.step(action)
39 |             step_counter += 1
40 |             if done:
41 |                 break
42 | 
43 | 
44 |         assert step_counter > 5
45 | 
46 | 
47 | 
48 | 
49 | 


--------------------------------------------------------------------------------
/control/baselines/common/tests/test_env_after_learn.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | import gym
 3 | import tensorflow as tf
 4 | 
 5 | from baselines.common.vec_env.subproc_vec_env import SubprocVecEnv
 6 | from baselines.run import get_learn_function
 7 | from baselines.common.tf_util import make_session
 8 | 
 9 | algos = ['a2c', 'acer', 'acktr', 'deepq', 'ppo2', 'trpo_mpi']
10 | 
11 | @pytest.mark.parametrize('algo', algos)
12 | def test_env_after_learn(algo):
13 |     def make_env():
14 |         # acktr requires too much RAM, fails on travis
15 |         env = gym.make('CartPole-v1' if algo == 'acktr' else 'PongNoFrameskip-v4')
16 |         return env
17 | 
18 |     make_session(make_default=True, graph=tf.Graph())
19 |     env = SubprocVecEnv([make_env])
20 | 
21 |     learn = get_learn_function(algo)
22 | 
23 |     # Commenting out the following line resolves the issue, though crash happens at env.reset().
24 |     learn(network='mlp', env=env, total_timesteps=0, load_path=None, seed=None)
25 | 
26 |     env.reset()
27 |     env.close()
28 | 


--------------------------------------------------------------------------------
/control/baselines/common/tests/test_fetchreach.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | import gym
 3 | 
 4 | from baselines.run import get_learn_function
 5 | from baselines.common.tests.util import reward_per_episode_test
 6 | 
 7 | pytest.importorskip('mujoco_py')
 8 | 
 9 | common_kwargs = dict(
10 |     network='mlp',
11 |     seed=0,
12 | )
13 | 
14 | learn_kwargs = {
15 |     'her': dict(total_timesteps=2000)
16 | }
17 | 
18 | @pytest.mark.slow
19 | @pytest.mark.parametrize("alg", learn_kwargs.keys())
20 | def test_fetchreach(alg):
21 |     '''
22 |     Test if the algorithm (with an mlp policy)
23 |     can learn the FetchReach task
24 |     '''
25 | 
26 |     kwargs = common_kwargs.copy()
27 |     kwargs.update(learn_kwargs[alg])
28 | 
29 |     learn_fn = lambda e: get_learn_function(alg)(env=e, **kwargs)
30 |     def env_fn():
31 | 
32 |         env = gym.make('FetchReach-v1')
33 |         env.seed(0)
34 |         return env
35 | 
36 |     reward_per_episode_test(env_fn, learn_fn, -15)
37 | 
38 | if __name__ == '__main__':
39 |     test_fetchreach('her')
40 | 


--------------------------------------------------------------------------------
/control/baselines/common/tests/test_fixed_sequence.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | from baselines.common.tests.envs.fixed_sequence_env import FixedSequenceEnv
 3 | 
 4 | from baselines.common.tests.util import simple_test
 5 | from baselines.run import get_learn_function
 6 | 
 7 | common_kwargs = dict(
 8 |     seed=0,
 9 |     total_timesteps=50000,
10 | )
11 | 
12 | learn_kwargs = {
13 |     'a2c': {},
14 |     'ppo2': dict(nsteps=10, ent_coef=0.0, nminibatches=1),
15 |     # TODO enable sequential models for trpo_mpi (proper handling of nbatch and nsteps)
16 |     # github issue: https://github.com/openai/baselines/issues/188
17 |     # 'trpo_mpi': lambda e, p: trpo_mpi.learn(policy_fn=p(env=e), env=e, max_timesteps=30000, timesteps_per_batch=100, cg_iters=10, gamma=0.9, lam=1.0, max_kl=0.001)
18 | }
19 | 
20 | 
21 | alg_list = learn_kwargs.keys()
22 | rnn_list = ['lstm']
23 | 
24 | @pytest.mark.slow
25 | @pytest.mark.parametrize("alg", alg_list)
26 | @pytest.mark.parametrize("rnn", rnn_list)
27 | def test_fixed_sequence(alg, rnn):
28 |     '''
29 |     Test if the algorithm (with a given policy)
30 |     can learn an identity transformation (i.e. return observation as an action)
31 |     '''
32 | 
33 |     kwargs = learn_kwargs[alg]
34 |     kwargs.update(common_kwargs)
35 | 
36 |     episode_len = 5
37 |     env_fn = lambda: FixedSequenceEnv(10, episode_len=episode_len)
38 |     learn = lambda e: get_learn_function(alg)(
39 |         env=e,
40 |         network=rnn,
41 |         **kwargs
42 |     )
43 | 
44 |     simple_test(env_fn, learn, 0.7)
45 | 
46 | 
47 | if __name__ == '__main__':
48 |     test_fixed_sequence('ppo2', 'lstm')
49 | 
50 | 
51 | 
52 | 


--------------------------------------------------------------------------------
/control/baselines/common/tests/test_identity.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | from baselines.common.tests.envs.identity_env import DiscreteIdentityEnv, BoxIdentityEnv, MultiDiscreteIdentityEnv
 3 | from baselines.run import get_learn_function
 4 | from baselines.common.tests.util import simple_test
 5 | 
 6 | common_kwargs = dict(
 7 |     total_timesteps=30000,
 8 |     network='mlp',
 9 |     gamma=0.9,
10 |     seed=0,
11 | )
12 | 
13 | learn_kwargs = {
14 |     'a2c' : {},
15 |     'acktr': {},
16 |     'deepq': {},
17 |     'ddpg': dict(layer_norm=True),
18 |     'ppo2': dict(lr=1e-3, nsteps=64, ent_coef=0.0),
19 |     'trpo_mpi': dict(timesteps_per_batch=100, cg_iters=10, gamma=0.9, lam=1.0, max_kl=0.01)
20 | }
21 | 
22 | 
23 | algos_disc = ['a2c', 'acktr', 'deepq', 'ppo2', 'trpo_mpi']
24 | algos_multidisc = ['a2c', 'acktr', 'ppo2', 'trpo_mpi']
25 | algos_cont = ['a2c', 'acktr', 'ddpg',  'ppo2', 'trpo_mpi']
26 | 
27 | @pytest.mark.slow
28 | @pytest.mark.parametrize("alg", algos_disc)
29 | def test_discrete_identity(alg):
30 |     '''
31 |     Test if the algorithm (with an mlp policy)
32 |     can learn an identity transformation (i.e. return observation as an action)
33 |     '''
34 | 
35 |     kwargs = learn_kwargs[alg]
36 |     kwargs.update(common_kwargs)
37 | 
38 |     learn_fn = lambda e: get_learn_function(alg)(env=e, **kwargs)
39 |     env_fn = lambda: DiscreteIdentityEnv(10, episode_len=100)
40 |     simple_test(env_fn, learn_fn, 0.9)
41 | 
42 | @pytest.mark.slow
43 | @pytest.mark.parametrize("alg", algos_multidisc)
44 | def test_multidiscrete_identity(alg):
45 |     '''
46 |     Test if the algorithm (with an mlp policy)
47 |     can learn an identity transformation (i.e. return observation as an action)
48 |     '''
49 | 
50 |     kwargs = learn_kwargs[alg]
51 |     kwargs.update(common_kwargs)
52 | 
53 |     learn_fn = lambda e: get_learn_function(alg)(env=e, **kwargs)
54 |     env_fn = lambda: MultiDiscreteIdentityEnv((3,3), episode_len=100)
55 |     simple_test(env_fn, learn_fn, 0.9)
56 | 
57 | @pytest.mark.slow
58 | @pytest.mark.parametrize("alg", algos_cont)
59 | def test_continuous_identity(alg):
60 |     '''
61 |     Test if the algorithm (with an mlp policy)
62 |     can learn an identity transformation (i.e. return observation as an action)
63 |     to a required precision
64 |     '''
65 | 
66 |     kwargs = learn_kwargs[alg]
67 |     kwargs.update(common_kwargs)
68 |     learn_fn = lambda e: get_learn_function(alg)(env=e, **kwargs)
69 | 
70 |     env_fn = lambda: BoxIdentityEnv((1,), episode_len=100)
71 |     simple_test(env_fn, learn_fn, -0.1)
72 | 
73 | if __name__ == '__main__':
74 |     test_multidiscrete_identity('acktr')
75 | 
76 | 


--------------------------------------------------------------------------------
/control/baselines/common/tests/test_mnist.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | 
 3 | # from baselines.acer import acer_simple as acer
 4 | from baselines.common.tests.envs.mnist_env import MnistEnv
 5 | from baselines.common.tests.util import simple_test
 6 | from baselines.run import get_learn_function
 7 | 
 8 | 
 9 | # TODO investigate a2c and ppo2 failures - is it due to bad hyperparameters for this problem?
10 | # GitHub issue https://github.com/openai/baselines/issues/189
11 | common_kwargs = {
12 |     'seed': 0,
13 |     'network':'cnn',
14 |     'gamma':0.9,
15 |     'pad':'SAME'
16 | }
17 | 
18 | learn_args = {
19 |     'a2c': dict(total_timesteps=50000),
20 |     'acer': dict(total_timesteps=20000),
21 |     'deepq': dict(total_timesteps=5000),
22 |     'acktr': dict(total_timesteps=30000),
23 |     'ppo2': dict(total_timesteps=50000, lr=1e-3, nsteps=128, ent_coef=0.0),
24 |     'trpo_mpi': dict(total_timesteps=80000, timesteps_per_batch=100, cg_iters=10, lam=1.0, max_kl=0.001)
25 | }
26 | 
27 | 
28 | #tests pass, but are too slow on travis. Same algorithms are covered
29 | # by other tests with less compute-hungry nn's and by benchmarks
30 | @pytest.mark.skip
31 | @pytest.mark.slow
32 | @pytest.mark.parametrize("alg", learn_args.keys())
33 | def test_mnist(alg):
34 |     '''
35 |     Test if the algorithm can learn to classify MNIST digits.
36 |     Uses CNN policy.
37 |     '''
38 | 
39 |     learn_kwargs = learn_args[alg]
40 |     learn_kwargs.update(common_kwargs)
41 | 
42 |     learn = get_learn_function(alg)
43 |     learn_fn = lambda e: learn(env=e, **learn_kwargs)
44 |     env_fn = lambda: MnistEnv(seed=0, episode_len=100)
45 | 
46 |     simple_test(env_fn, learn_fn, 0.6)
47 | 
48 | if __name__ == '__main__':
49 |     test_mnist('acer')
50 | 


--------------------------------------------------------------------------------
/control/baselines/common/tests/test_schedules.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | from baselines.common.schedules import ConstantSchedule, PiecewiseSchedule
 4 | 
 5 | 
 6 | def test_piecewise_schedule():
 7 |     ps = PiecewiseSchedule([(-5, 100), (5, 200), (10, 50), (100, 50), (200, -50)], outside_value=500)
 8 | 
 9 |     assert np.isclose(ps.value(-10), 500)
10 |     assert np.isclose(ps.value(0), 150)
11 |     assert np.isclose(ps.value(5), 200)
12 |     assert np.isclose(ps.value(9), 80)
13 |     assert np.isclose(ps.value(50), 50)
14 |     assert np.isclose(ps.value(80), 50)
15 |     assert np.isclose(ps.value(150), 0)
16 |     assert np.isclose(ps.value(175), -25)
17 |     assert np.isclose(ps.value(201), 500)
18 |     assert np.isclose(ps.value(500), 500)
19 | 
20 |     assert np.isclose(ps.value(200 - 1e-10), -50)
21 | 
22 | 
23 | def test_constant_schedule():
24 |     cs = ConstantSchedule(5)
25 |     for i in range(-100, 100):
26 |         assert np.isclose(cs.value(i), 5)
27 | 


--------------------------------------------------------------------------------
/control/baselines/common/tests/test_segment_tree.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | 
  3 | from baselines.common.segment_tree import SumSegmentTree, MinSegmentTree
  4 | 
  5 | 
  6 | def test_tree_set():
  7 |     tree = SumSegmentTree(4)
  8 | 
  9 |     tree[2] = 1.0
 10 |     tree[3] = 3.0
 11 | 
 12 |     assert np.isclose(tree.sum(), 4.0)
 13 |     assert np.isclose(tree.sum(0, 2), 0.0)
 14 |     assert np.isclose(tree.sum(0, 3), 1.0)
 15 |     assert np.isclose(tree.sum(2, 3), 1.0)
 16 |     assert np.isclose(tree.sum(2, -1), 1.0)
 17 |     assert np.isclose(tree.sum(2, 4), 4.0)
 18 | 
 19 | 
 20 | def test_tree_set_overlap():
 21 |     tree = SumSegmentTree(4)
 22 | 
 23 |     tree[2] = 1.0
 24 |     tree[2] = 3.0
 25 | 
 26 |     assert np.isclose(tree.sum(), 3.0)
 27 |     assert np.isclose(tree.sum(2, 3), 3.0)
 28 |     assert np.isclose(tree.sum(2, -1), 3.0)
 29 |     assert np.isclose(tree.sum(2, 4), 3.0)
 30 |     assert np.isclose(tree.sum(1, 2), 0.0)
 31 | 
 32 | 
 33 | def test_prefixsum_idx():
 34 |     tree = SumSegmentTree(4)
 35 | 
 36 |     tree[2] = 1.0
 37 |     tree[3] = 3.0
 38 | 
 39 |     assert tree.find_prefixsum_idx(0.0) == 2
 40 |     assert tree.find_prefixsum_idx(0.5) == 2
 41 |     assert tree.find_prefixsum_idx(0.99) == 2
 42 |     assert tree.find_prefixsum_idx(1.01) == 3
 43 |     assert tree.find_prefixsum_idx(3.00) == 3
 44 |     assert tree.find_prefixsum_idx(4.00) == 3
 45 | 
 46 | 
 47 | def test_prefixsum_idx2():
 48 |     tree = SumSegmentTree(4)
 49 | 
 50 |     tree[0] = 0.5
 51 |     tree[1] = 1.0
 52 |     tree[2] = 1.0
 53 |     tree[3] = 3.0
 54 | 
 55 |     assert tree.find_prefixsum_idx(0.00) == 0
 56 |     assert tree.find_prefixsum_idx(0.55) == 1
 57 |     assert tree.find_prefixsum_idx(0.99) == 1
 58 |     assert tree.find_prefixsum_idx(1.51) == 2
 59 |     assert tree.find_prefixsum_idx(3.00) == 3
 60 |     assert tree.find_prefixsum_idx(5.50) == 3
 61 | 
 62 | 
 63 | def test_max_interval_tree():
 64 |     tree = MinSegmentTree(4)
 65 | 
 66 |     tree[0] = 1.0
 67 |     tree[2] = 0.5
 68 |     tree[3] = 3.0
 69 | 
 70 |     assert np.isclose(tree.min(), 0.5)
 71 |     assert np.isclose(tree.min(0, 2), 1.0)
 72 |     assert np.isclose(tree.min(0, 3), 0.5)
 73 |     assert np.isclose(tree.min(0, -1), 0.5)
 74 |     assert np.isclose(tree.min(2, 4), 0.5)
 75 |     assert np.isclose(tree.min(3, 4), 3.0)
 76 | 
 77 |     tree[2] = 0.7
 78 | 
 79 |     assert np.isclose(tree.min(), 0.7)
 80 |     assert np.isclose(tree.min(0, 2), 1.0)
 81 |     assert np.isclose(tree.min(0, 3), 0.7)
 82 |     assert np.isclose(tree.min(0, -1), 0.7)
 83 |     assert np.isclose(tree.min(2, 4), 0.7)
 84 |     assert np.isclose(tree.min(3, 4), 3.0)
 85 | 
 86 |     tree[2] = 4.0
 87 | 
 88 |     assert np.isclose(tree.min(), 1.0)
 89 |     assert np.isclose(tree.min(0, 2), 1.0)
 90 |     assert np.isclose(tree.min(0, 3), 1.0)
 91 |     assert np.isclose(tree.min(0, -1), 1.0)
 92 |     assert np.isclose(tree.min(2, 4), 3.0)
 93 |     assert np.isclose(tree.min(2, 3), 4.0)
 94 |     assert np.isclose(tree.min(2, -1), 4.0)
 95 |     assert np.isclose(tree.min(3, 4), 3.0)
 96 | 
 97 | 
 98 | if __name__ == '__main__':
 99 |     test_tree_set()
100 |     test_tree_set_overlap()
101 |     test_prefixsum_idx()
102 |     test_prefixsum_idx2()
103 |     test_max_interval_tree()
104 | 


--------------------------------------------------------------------------------
/control/baselines/common/tests/test_tf_util.py:
--------------------------------------------------------------------------------
 1 | # tests for tf_util
 2 | import tensorflow as tf
 3 | from baselines.common.tf_util import (
 4 |     function,
 5 |     initialize,
 6 |     single_threaded_session
 7 | )
 8 | 
 9 | 
10 | def test_function():
11 |     with tf.Graph().as_default():
12 |         x = tf.placeholder(tf.int32, (), name="x")
13 |         y = tf.placeholder(tf.int32, (), name="y")
14 |         z = 3 * x + 2 * y
15 |         lin = function([x, y], z, givens={y: 0})
16 | 
17 |         with single_threaded_session():
18 |             initialize()
19 | 
20 |             assert lin(2) == 6
21 |             assert lin(2, 2) == 10
22 | 
23 | 
24 | def test_multikwargs():
25 |     with tf.Graph().as_default():
26 |         x = tf.placeholder(tf.int32, (), name="x")
27 |         with tf.variable_scope("other"):
28 |             x2 = tf.placeholder(tf.int32, (), name="x")
29 |         z = 3 * x + 2 * x2
30 | 
31 |         lin = function([x, x2], z, givens={x2: 0})
32 |         with single_threaded_session():
33 |             initialize()
34 |             assert lin(2) == 6
35 |             assert lin(2, 2) == 10
36 | 
37 | 
38 | if __name__ == '__main__':
39 |     test_function()
40 |     test_multikwargs()
41 | 


--------------------------------------------------------------------------------
/control/baselines/common/tests/util.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | import numpy as np
 3 | from gym.spaces import np_random
 4 | from baselines.common.vec_env.dummy_vec_env import DummyVecEnv
 5 | 
 6 | N_TRIALS = 10000
 7 | N_EPISODES = 100
 8 | 
 9 | def simple_test(env_fn, learn_fn, min_reward_fraction, n_trials=N_TRIALS):
10 |     np.random.seed(0)
11 |     np_random.seed(0)
12 | 
13 |     env = DummyVecEnv([env_fn])
14 | 
15 | 
16 |     with tf.Graph().as_default(), tf.Session(config=tf.ConfigProto(allow_soft_placement=True)).as_default():
17 |         tf.set_random_seed(0)
18 | 
19 |         model = learn_fn(env)
20 | 
21 |         sum_rew = 0
22 |         done = True
23 | 
24 |         for i in range(n_trials):
25 |             if done:
26 |                 obs = env.reset()
27 |                 state = model.initial_state
28 | 
29 |             if state is not None:
30 |                 a, v, state, _ = model.step(obs, S=state, M=[False])
31 |             else:
32 |                 a, v, _, _ = model.step(obs)
33 | 
34 |             obs, rew, done, _ = env.step(a)
35 |             sum_rew += float(rew)
36 | 
37 |         print("Reward in {} trials is {}".format(n_trials, sum_rew))
38 |         assert sum_rew > min_reward_fraction * n_trials, \
39 |             'sum of rewards {} is less than {} of the total number of trials {}'.format(sum_rew, min_reward_fraction, n_trials)
40 | 
41 | 
42 | 
43 | def reward_per_episode_test(env_fn, learn_fn, min_avg_reward, n_trials=N_EPISODES):
44 |     env = DummyVecEnv([env_fn])
45 | 
46 |     with tf.Graph().as_default(), tf.Session(config=tf.ConfigProto(allow_soft_placement=True)).as_default():
47 |         model = learn_fn(env)
48 | 
49 |         N_TRIALS = 100
50 | 
51 |         observations, actions, rewards = rollout(env, model, N_TRIALS)
52 |         rewards = [sum(r) for r in rewards]
53 | 
54 |         avg_rew = sum(rewards) / N_TRIALS
55 |         print("Average reward in {} episodes is {}".format(n_trials, avg_rew))
56 |         assert avg_rew > min_avg_reward, \
57 |             'average reward in {} episodes ({}) is less than {}'.format(n_trials, avg_rew, min_avg_reward)
58 | 
59 | def rollout(env, model, n_trials):
60 |     rewards = []
61 |     actions = []
62 |     observations = []
63 | 
64 |     for i in range(n_trials):
65 |         obs = env.reset()
66 |         state = model.initial_state if hasattr(model, 'initial_state') else None
67 |         episode_rew = []
68 |         episode_actions = []
69 |         episode_obs = []
70 | 
71 |         while True:
72 |             if state is not None:
73 |                 a, v, state, _ = model.step(obs, S=state, M=[False])
74 |             else:
75 |                 a,v, _, _ = model.step(obs)
76 | 
77 |             obs, rew, done, _ = env.step(a)
78 | 
79 |             episode_rew.append(rew)
80 |             episode_actions.append(a)
81 |             episode_obs.append(obs)
82 | 
83 |             if done:
84 |                 break
85 | 
86 |         rewards.append(episode_rew)
87 |         actions.append(episode_actions)
88 |         observations.append(episode_obs)
89 | 
90 |     return observations, actions, rewards
91 | 
92 | 


--------------------------------------------------------------------------------
/control/baselines/common/tile_images.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | def tile_images(img_nhwc):
 4 |     """
 5 |     Tile N images into one big PxQ image
 6 |     (P,Q) are chosen to be as close as possible, and if N
 7 |     is square, then P=Q.
 8 | 
 9 |     input: img_nhwc, list or array of images, ndim=4 once turned into array
10 |         n = batch index, h = height, w = width, c = channel
11 |     returns:
12 |         bigim_HWc, ndarray with ndim=3
13 |     """
14 |     img_nhwc = np.asarray(img_nhwc)
15 |     N, h, w, c = img_nhwc.shape
16 |     H = int(np.ceil(np.sqrt(N)))
17 |     W = int(np.ceil(float(N)/H))
18 |     img_nhwc = np.array(list(img_nhwc) + [img_nhwc[0]*0 for _ in range(N, H*W)])
19 |     img_HWhwc = img_nhwc.reshape(H, W, h, w, c)
20 |     img_HhWwc = img_HWhwc.transpose(0, 2, 1, 3, 4)
21 |     img_Hh_Ww_c = img_HhWwc.reshape(H*h, W*w, c)
22 |     return img_Hh_Ww_c
23 | 
24 | 


--------------------------------------------------------------------------------
/control/baselines/common/vec_env/dummy_vec_env.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from gym import spaces
 3 | from . import VecEnv
 4 | from .util import copy_obs_dict, dict_to_obs, obs_space_info
 5 | 
 6 | class DummyVecEnv(VecEnv):
 7 |     """
 8 |     VecEnv that does runs multiple environments sequentially, that is,
 9 |     the step and reset commands are send to one environment at a time.
10 |     Useful when debugging and when num_env == 1 (in the latter case,
11 |     avoids communication overhead)
12 |     """
13 |     def __init__(self, env_fns):
14 |         """
15 |         Arguments:
16 | 
17 |         env_fns: iterable of callables      functions that build environments
18 |         """
19 |         self.envs = [fn() for fn in env_fns]
20 |         env = self.envs[0]
21 |         VecEnv.__init__(self, len(env_fns), env.observation_space, env.action_space)
22 |         obs_space = env.observation_space
23 |         self.keys, shapes, dtypes = obs_space_info(obs_space)
24 | 
25 |         self.buf_obs = { k: np.zeros((self.num_envs,) + tuple(shapes[k]), dtype=dtypes[k]) for k in self.keys }
26 |         self.buf_dones = np.zeros((self.num_envs,), dtype=np.bool)
27 |         self.buf_rews  = np.zeros((self.num_envs,), dtype=np.float32)
28 |         self.buf_infos = [{} for _ in range(self.num_envs)]
29 |         self.actions = None
30 |         self.specs = [e.spec for e in self.envs]
31 | 
32 |     def step_async(self, actions):
33 |         listify = True
34 |         try:
35 |             if len(actions) == self.num_envs:
36 |                 listify = False
37 |         except TypeError:
38 |             pass
39 | 
40 |         if not listify:
41 |             self.actions = actions
42 |         else:
43 |             assert self.num_envs == 1, "actions {} is either not a list or has a wrong size - cannot match to {} environments".format(actions, self.num_envs)
44 |             self.actions = [actions]
45 | 
46 |     def step_wait(self):
47 |         for e in range(self.num_envs):
48 |             action = self.actions[e]
49 |             if isinstance(self.envs[e].action_space, spaces.Discrete):
50 |                 action = int(action)
51 | 
52 |             obs, self.buf_rews[e], self.buf_dones[e], self.buf_infos[e] = self.envs[e].step(action)
53 |             if self.buf_dones[e]:
54 |                 obs = self.envs[e].reset()
55 |             self._save_obs(e, obs)
56 |         return (self._obs_from_buf(), np.copy(self.buf_rews), np.copy(self.buf_dones),
57 |                 self.buf_infos.copy())
58 | 
59 |     def reset(self):
60 |         for e in range(self.num_envs):
61 |             obs = self.envs[e].reset()
62 |             self._save_obs(e, obs)
63 |         return self._obs_from_buf()
64 | 
65 |     def _save_obs(self, e, obs):
66 |         for k in self.keys:
67 |             if k is None:
68 |                 self.buf_obs[k][e] = obs
69 |             else:
70 |                 self.buf_obs[k][e] = obs[k]
71 | 
72 |     def _obs_from_buf(self):
73 |         return dict_to_obs(copy_obs_dict(self.buf_obs))
74 | 
75 |     def get_images(self):
76 |         return [env.render(mode='rgb_array') for env in self.envs]
77 | 
78 |     def render(self, mode='human'):
79 |         if self.num_envs == 1:
80 |             return self.envs[0].render(mode=mode)
81 |         else:
82 |             return super().render(mode=mode)
83 | 


--------------------------------------------------------------------------------
/control/baselines/common/vec_env/test_vec_env.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Tests for asynchronous vectorized environments.
  3 | """
  4 | 
  5 | import gym
  6 | import numpy as np
  7 | import pytest
  8 | from .dummy_vec_env import DummyVecEnv
  9 | from .shmem_vec_env import ShmemVecEnv
 10 | from .subproc_vec_env import SubprocVecEnv
 11 | 
 12 | 
 13 | def assert_envs_equal(env1, env2, num_steps):
 14 |     """
 15 |     Compare two environments over num_steps steps and make sure
 16 |     that the observations produced by each are the same when given
 17 |     the same actions.
 18 |     """
 19 |     assert env1.num_envs == env2.num_envs
 20 |     assert env1.action_space.shape == env2.action_space.shape
 21 |     assert env1.action_space.dtype == env2.action_space.dtype
 22 |     joint_shape = (env1.num_envs,) + env1.action_space.shape
 23 | 
 24 |     try:
 25 |         obs1, obs2 = env1.reset(), env2.reset()
 26 |         assert np.array(obs1).shape == np.array(obs2).shape
 27 |         assert np.array(obs1).shape == joint_shape
 28 |         assert np.allclose(obs1, obs2)
 29 |         np.random.seed(1337)
 30 |         for _ in range(num_steps):
 31 |             actions = np.array(np.random.randint(0, 0x100, size=joint_shape),
 32 |                                dtype=env1.action_space.dtype)
 33 |             for env in [env1, env2]:
 34 |                 env.step_async(actions)
 35 |             outs1 = env1.step_wait()
 36 |             outs2 = env2.step_wait()
 37 |             for out1, out2 in zip(outs1[:3], outs2[:3]):
 38 |                 assert np.array(out1).shape == np.array(out2).shape
 39 |                 assert np.allclose(out1, out2)
 40 |             assert list(outs1[3]) == list(outs2[3])
 41 |     finally:
 42 |         env1.close()
 43 |         env2.close()
 44 | 
 45 | 
 46 | @pytest.mark.parametrize('klass', (ShmemVecEnv, SubprocVecEnv))
 47 | @pytest.mark.parametrize('dtype', ('uint8', 'float32'))
 48 | def test_vec_env(klass, dtype):  # pylint: disable=R0914
 49 |     """
 50 |     Test that a vectorized environment is equivalent to
 51 |     DummyVecEnv, since DummyVecEnv is less likely to be
 52 |     error prone.
 53 |     """
 54 |     num_envs = 3
 55 |     num_steps = 100
 56 |     shape = (3, 8)
 57 | 
 58 |     def make_fn(seed):
 59 |         """
 60 |         Get an environment constructor with a seed.
 61 |         """
 62 |         return lambda: SimpleEnv(seed, shape, dtype)
 63 |     fns = [make_fn(i) for i in range(num_envs)]
 64 |     env1 = DummyVecEnv(fns)
 65 |     env2 = klass(fns)
 66 |     assert_envs_equal(env1, env2, num_steps=num_steps)
 67 | 
 68 | 
 69 | class SimpleEnv(gym.Env):
 70 |     """
 71 |     An environment with a pre-determined observation space
 72 |     and RNG seed.
 73 |     """
 74 | 
 75 |     def __init__(self, seed, shape, dtype):
 76 |         np.random.seed(seed)
 77 |         self._dtype = dtype
 78 |         self._start_obs = np.array(np.random.randint(0, 0x100, size=shape),
 79 |                                    dtype=dtype)
 80 |         self._max_steps = seed + 1
 81 |         self._cur_obs = None
 82 |         self._cur_step = 0
 83 |         # this is 0xFF instead of 0x100 because the Box space includes
 84 |         # the high end, while randint does not
 85 |         self.action_space = gym.spaces.Box(low=0, high=0xFF, shape=shape, dtype=dtype)
 86 |         self.observation_space = self.action_space
 87 | 
 88 |     def step(self, action):
 89 |         self._cur_obs += np.array(action, dtype=self._dtype)
 90 |         self._cur_step += 1
 91 |         done = self._cur_step >= self._max_steps
 92 |         reward = self._cur_step / self._max_steps
 93 |         return self._cur_obs, reward, done, {'foo': 'bar' + str(reward)}
 94 | 
 95 |     def reset(self):
 96 |         self._cur_obs = self._start_obs
 97 |         self._cur_step = 0
 98 |         return self._cur_obs
 99 | 
100 |     def render(self, mode=None):
101 |         raise NotImplementedError
102 | 


--------------------------------------------------------------------------------
/control/baselines/common/vec_env/test_video_recorder.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Tests for asynchronous vectorized environments.
 3 | """
 4 | 
 5 | import gym
 6 | import pytest
 7 | import os
 8 | import glob
 9 | import tempfile
10 | 
11 | from .dummy_vec_env import DummyVecEnv
12 | from .shmem_vec_env import ShmemVecEnv
13 | from .subproc_vec_env import SubprocVecEnv
14 | from .vec_video_recorder import VecVideoRecorder
15 | 
16 | @pytest.mark.parametrize('klass', (DummyVecEnv, ShmemVecEnv, SubprocVecEnv))
17 | @pytest.mark.parametrize('num_envs', (1, 4))
18 | @pytest.mark.parametrize('video_length', (10, 100))
19 | @pytest.mark.parametrize('video_interval', (1, 50))
20 | def test_video_recorder(klass, num_envs, video_length, video_interval):
21 |     """
22 |     Wrap an existing VecEnv with VevVideoRecorder,
23 |     Make (video_interval + video_length + 1) steps,
24 |     then check that the file is present
25 |     """
26 | 
27 |     def make_fn():
28 |         env = gym.make('PongNoFrameskip-v4')
29 |         return env
30 |     fns = [make_fn for _ in range(num_envs)]
31 |     env = klass(fns)
32 | 
33 |     with tempfile.TemporaryDirectory() as video_path:
34 |         env = VecVideoRecorder(env, video_path, record_video_trigger=lambda x: x % video_interval == 0, video_length=video_length)
35 | 
36 |         env.reset()
37 |         for _ in range(video_interval + video_length + 1):
38 |             env.step([0] * num_envs)
39 |         env.close()
40 | 
41 | 
42 |         recorded_video = glob.glob(os.path.join(video_path, "*.mp4"))
43 | 
44 |         # first and second step
45 |         assert len(recorded_video) == 2
46 |         # Files are not empty
47 |         assert all(os.stat(p).st_size != 0 for p in recorded_video)
48 | 
49 | 
50 | 


--------------------------------------------------------------------------------
/control/baselines/common/vec_env/util.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Helpers for dealing with vectorized environments.
 3 | """
 4 | 
 5 | from collections import OrderedDict
 6 | 
 7 | import gym
 8 | import numpy as np
 9 | 
10 | 
11 | def copy_obs_dict(obs):
12 |     """
13 |     Deep-copy an observation dict.
14 |     """
15 |     return {k: np.copy(v) for k, v in obs.items()}
16 | 
17 | 
18 | def dict_to_obs(obs_dict):
19 |     """
20 |     Convert an observation dict into a raw array if the
21 |     original observation space was not a Dict space.
22 |     """
23 |     if set(obs_dict.keys()) == {None}:
24 |         return obs_dict[None]
25 |     return obs_dict
26 | 
27 | 
28 | def obs_space_info(obs_space):
29 |     """
30 |     Get dict-structured information about a gym.Space.
31 | 
32 |     Returns:
33 |       A tuple (keys, shapes, dtypes):
34 |         keys: a list of dict keys.
35 |         shapes: a dict mapping keys to shapes.
36 |         dtypes: a dict mapping keys to dtypes.
37 |     """
38 |     if isinstance(obs_space, gym.spaces.Dict):
39 |         assert isinstance(obs_space.spaces, OrderedDict)
40 |         subspaces = obs_space.spaces
41 |     else:
42 |         subspaces = {None: obs_space}
43 |     keys = []
44 |     shapes = {}
45 |     dtypes = {}
46 |     for key, box in subspaces.items():
47 |         keys.append(key)
48 |         shapes[key] = box.shape
49 |         dtypes[key] = box.dtype
50 |     return keys, shapes, dtypes
51 | 
52 | 
53 | def obs_to_dict(obs):
54 |     """
55 |     Convert an observation into a dict.
56 |     """
57 |     if isinstance(obs, dict):
58 |         return obs
59 |     return {None: obs}
60 | 


--------------------------------------------------------------------------------
/control/baselines/common/vec_env/vec_frame_stack.py:
--------------------------------------------------------------------------------
 1 | from . import VecEnvWrapper
 2 | import numpy as np
 3 | from gym import spaces
 4 | 
 5 | 
 6 | class VecFrameStack(VecEnvWrapper):
 7 |     def __init__(self, venv, nstack):
 8 |         self.venv = venv
 9 |         self.nstack = nstack
10 |         wos = venv.observation_space  # wrapped ob space
11 |         low = np.repeat(wos.low, self.nstack, axis=-1)
12 |         high = np.repeat(wos.high, self.nstack, axis=-1)
13 |         self.stackedobs = np.zeros((venv.num_envs,) + low.shape, low.dtype)
14 |         observation_space = spaces.Box(low=low, high=high, dtype=venv.observation_space.dtype)
15 |         VecEnvWrapper.__init__(self, venv, observation_space=observation_space)
16 | 
17 |     def step_wait(self):
18 |         obs, rews, news, infos = self.venv.step_wait()
19 |         self.stackedobs = np.roll(self.stackedobs, shift=-1, axis=-1)
20 |         for (i, new) in enumerate(news):
21 |             if new:
22 |                 self.stackedobs[i] = 0
23 |         self.stackedobs[..., -obs.shape[-1]:] = obs
24 |         return self.stackedobs, rews, news, infos
25 | 
26 |     def reset(self):
27 |         obs = self.venv.reset()
28 |         self.stackedobs[...] = 0
29 |         self.stackedobs[..., -obs.shape[-1]:] = obs
30 |         return self.stackedobs
31 | 


--------------------------------------------------------------------------------
/control/baselines/common/vec_env/vec_monitor.py:
--------------------------------------------------------------------------------
 1 | from . import VecEnvWrapper
 2 | from baselines.bench.monitor import ResultsWriter
 3 | import numpy as np
 4 | import time
 5 | 
 6 | 
 7 | class VecMonitor(VecEnvWrapper):
 8 |     def __init__(self, venv, filename=None):
 9 |         VecEnvWrapper.__init__(self, venv)
10 |         self.eprets = None
11 |         self.eplens = None
12 |         self.tstart = time.time()
13 |         self.results_writer = ResultsWriter(filename, header={'t_start': self.tstart})
14 | 
15 |     def reset(self):
16 |         obs = self.venv.reset()
17 |         self.eprets = np.zeros(self.num_envs, 'f')
18 |         self.eplens = np.zeros(self.num_envs, 'i')
19 |         return obs
20 | 
21 |     def step_wait(self):
22 |         obs, rews, dones, infos = self.venv.step_wait()
23 |         self.eprets += rews
24 |         self.eplens += 1
25 |         newinfos = []
26 |         for (i, (done, ret, eplen, info)) in enumerate(zip(dones, self.eprets, self.eplens, infos)):
27 |             info = info.copy()
28 |             if done:
29 |                 epinfo = {'r': ret, 'l': eplen, 't': round(time.time() - self.tstart, 6)}
30 |                 info['episode'] = epinfo
31 |                 self.eprets[i] = 0
32 |                 self.eplens[i] = 0
33 |                 self.results_writer.write_row(epinfo)
34 | 
35 |             newinfos.append(info)
36 | 
37 |         return obs, rews, dones, newinfos
38 | 


--------------------------------------------------------------------------------
/control/baselines/common/vec_env/vec_normalize.py:
--------------------------------------------------------------------------------
 1 | from . import VecEnvWrapper
 2 | from baselines.common.running_mean_std import RunningMeanStd
 3 | import numpy as np
 4 | 
 5 | 
 6 | class VecNormalize(VecEnvWrapper):
 7 |     """
 8 |     A vectorized wrapper that normalizes the observations
 9 |     and returns from an environment.
10 |     """
11 | 
12 |     def __init__(self, venv, ob=True, ret=True, clipob=10., cliprew=10., gamma=0.99, epsilon=1e-8):
13 |         VecEnvWrapper.__init__(self, venv)
14 |         self.ob_rms = RunningMeanStd(shape=self.observation_space.shape) if ob else None
15 |         self.ret_rms = RunningMeanStd(shape=()) if ret else None
16 |         self.clipob = clipob
17 |         self.cliprew = cliprew
18 |         self.ret = np.zeros(self.num_envs)
19 |         self.gamma = gamma
20 |         self.epsilon = epsilon
21 | 
22 |     def step_wait(self):
23 |         obs, rews, news, infos = self.venv.step_wait()
24 |         self.ret = self.ret * self.gamma + rews
25 |         obs = self._obfilt(obs)
26 |         if self.ret_rms:
27 |             self.ret_rms.update(self.ret)
28 |             rews = np.clip(rews / np.sqrt(self.ret_rms.var + self.epsilon), -self.cliprew, self.cliprew)
29 |         self.ret[news] = 0.
30 |         return obs, rews, news, infos
31 | 
32 |     def _obfilt(self, obs):
33 |         if self.ob_rms:
34 |             self.ob_rms.update(obs)
35 |             obs = np.clip((obs - self.ob_rms.mean) / np.sqrt(self.ob_rms.var + self.epsilon), -self.clipob, self.clipob)
36 |             return obs
37 |         else:
38 |             return obs
39 | 
40 |     def reset(self):
41 |         self.ret = np.zeros(self.num_envs)
42 |         obs = self.venv.reset()
43 |         return self._obfilt(obs)
44 | 


--------------------------------------------------------------------------------
/control/baselines/common/vec_env/vec_video_recorder.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from baselines import logger
 3 | from baselines.common.vec_env import VecEnvWrapper
 4 | from gym.wrappers.monitoring import video_recorder
 5 | 
 6 | 
 7 | class VecVideoRecorder(VecEnvWrapper):
 8 |     """
 9 |     Wrap VecEnv to record rendered image as mp4 video.
10 |     """
11 | 
12 |     def __init__(self, venv, directory, record_video_trigger, video_length=200):
13 |         """
14 |         # Arguments
15 |             venv: VecEnv to wrap
16 |             directory: Where to save videos
17 |             record_video_trigger:
18 |                 Function that defines when to start recording.
19 |                 The function takes the current number of step,
20 |                 and returns whether we should start recording or not.
21 |             video_length: Length of recorded video
22 |         """
23 | 
24 |         VecEnvWrapper.__init__(self, venv)
25 |         self.record_video_trigger = record_video_trigger
26 |         self.video_recorder = None
27 | 
28 |         self.directory = os.path.abspath(directory)
29 |         if not os.path.exists(self.directory): os.mkdir(self.directory)
30 | 
31 |         self.file_prefix = "vecenv"
32 |         self.file_infix = '{}'.format(os.getpid())
33 |         self.step_id = 0
34 |         self.video_length = video_length
35 | 
36 |         self.recording = False
37 |         self.recorded_frames = 0
38 | 
39 |     def reset(self):
40 |         obs = self.venv.reset()
41 | 
42 |         self.start_video_recorder()
43 | 
44 |         return obs
45 | 
46 |     def start_video_recorder(self):
47 |         self.close_video_recorder()
48 | 
49 |         base_path = os.path.join(self.directory, '{}.video.{}.video{:06}'.format(self.file_prefix, self.file_infix, self.step_id))
50 |         self.video_recorder = video_recorder.VideoRecorder(
51 |                 env=self.venv,
52 |                 base_path=base_path,
53 |                 metadata={'step_id': self.step_id}
54 |                 )
55 | 
56 |         self.video_recorder.capture_frame()
57 |         self.recorded_frames = 1
58 |         self.recording = True
59 | 
60 |     def _video_enabled(self):
61 |         return self.record_video_trigger(self.step_id)
62 | 
63 |     def step_wait(self):
64 |         obs, rews, dones, infos = self.venv.step_wait()
65 | 
66 |         self.step_id += 1
67 |         if self.recording:
68 |             self.video_recorder.capture_frame()
69 |             self.recorded_frames += 1
70 |             if self.recorded_frames > self.video_length:
71 |                 logger.info("Saving video to ", self.video_recorder.path)
72 |                 self.close_video_recorder()
73 |         elif self._video_enabled():
74 |                 self.start_video_recorder()
75 | 
76 |         return obs, rews, dones, infos
77 | 
78 |     def close_video_recorder(self):
79 |         if self.recording:
80 |             self.video_recorder.close()
81 |         self.recording = False
82 |         self.recorded_frames = 0
83 | 
84 |     def close(self):
85 |         VecEnvWrapper.close(self)
86 |         self.close_video_recorder()
87 | 
88 |     def __del__(self):
89 |         self.close()
90 | 


--------------------------------------------------------------------------------
/control/baselines/ppoc_int/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kkhetarpal/ioc/fb88d5b881e0b5c317020b23874495e614b3ddc7/control/baselines/ppoc_int/__init__.py


--------------------------------------------------------------------------------
/control/baselines/ppoc_int/assets/twod_tmaze.xml:
--------------------------------------------------------------------------------
 1 | <mujoco model="twod_point">
 2 |     <compiler inertiafromgeom="true" angle="radian" coordinate="local" />
 3 |     <option timestep="0.01" gravity="0 0 0" iterations="20" integrator="Euler" />
 4 |     <default>
 5 |         <joint limited="false" damping="1" />
 6 |         <geom contype="2" conaffinity="1" condim="1" friction=".5 .1 .1" density="1000" margin="0.002" />
 7 |     </default>
 8 | 
 9 |     <worldbody>
10 |         <light directional="true" cutoff="100" exponent="1" diffuse="1 1 1" specular=".1 .1 .1" pos="0 0 1.3" dir="-0 0 -1.3" />
11 |         <!-- Pointmass -->
12 |         <body name="particle" pos="0. -0.1 0">
13 |             <geom name="particle_geom" type="sphere" size="0.03" rgba="0.0 0.0 1.0 1" contype="1"/>
14 |             <site name="particle_site" pos="0 0 0" size="0.01" />
15 |             <joint name="ball_x" type="slide" pos="0 0 0" axis="1 0 0" />
16 |             <joint name="ball_y" type="slide" pos="0 0 0" axis="0 1 0" />
17 |         </body>
18 | 
19 | 
20 | 
21 |    
22 |         <!-- Arena -->
23 |         <geom conaffinity="1" fromto="-.1 -.2 .01 .1 -.2 .01" name="sideS" rgba="0.9 0.4 0.6 1" size=".02" type="capsule"/>
24 | 
25 | 		<geom conaffinity="1" fromto=" .1 -.2 .01 .1  .2 .01" name="sideE" rgba="0.9 0.4 0.6 1" size=".02" type="capsule"/>
26 |         <geom conaffinity="1" fromto=" -.1 -.2 .01 -.1  .2 .01" name="sideW" rgba="0.9 0.4 0.6 1" size=".02" type="capsule"/>
27 | 
28 |         <geom conaffinity="1" fromto=" .1 .2 .01 .4  .2 .01" name="sideEbottom" rgba="0.9 0.4 0.6 1" size=".02" type="capsule"/>
29 |         <geom conaffinity="1" fromto=" -.1 .2 .01 -.4  .2 .01" name="sideWbottom" rgba="0.9 0.4 0.6 1" size=".02" type="capsule"/>
30 | 
31 |         <geom conaffinity="1" fromto=" .4 .2 .01 .4  .45 .01" name="sideEside" rgba="0.9 0.4 0.6 1" size=".02" type="capsule"/>
32 |         <geom conaffinity="1" fromto=" -.4 .2 .01 -.4  .45 .01" name="sideWside" rgba="0.9 0.4 0.6 1" size=".02" type="capsule"/>
33 | 
34 | 		<geom conaffinity="1" fromto="-.4  .45 .01 .4  .45 .01" name="sideN" rgba="0.9 0.4 0.6 1" size=".02" type="capsule"/>
35 | 
36 | 
37 |         <!-- Target -->
38 |         <body name="target" pos="0.3 0.3 0">
39 |             <geom conaffinity="2" name="target_geom" type="sphere" size="0.03" rgba="0 0.9 0.1 1"/>
40 |         </body>         
41 |     </worldbody>
42 | 
43 |     <actuator>
44 |         <motor joint="ball_x" ctrlrange="-1 1" ctrllimited="true"/>
45 |         <motor joint="ball_y" ctrlrange="-1 1" ctrllimited="true"/>
46 |     </actuator>
47 | </mujoco>
48 | 


--------------------------------------------------------------------------------
/control/baselines/ppoc_int/normalized_env.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import gym
 3 | from gym import spaces
 4 | 
 5 | from gym.envs.registration import load
 6 | 
 7 | 
 8 | class NormalizedActionWrapper(gym.ActionWrapper):
 9 |     """Environment wrapper to normalize the action space to [-1, 1]. This 
10 |     wrapper is adapted from rllab's [1] wrapper `NormalizedEnv`
11 |     https://github.com/rll/rllab/blob/b3a28992eca103cab3cb58363dd7a4bb07f250a0/rllab/envs/normalized_env.py
12 |     [1] Yan Duan, Xi Chen, Rein Houthooft, John Schulman, Pieter Abbeel, 
13 |         "Benchmarking Deep Reinforcement Learning for Continuous Control", 2016 
14 |         (https://arxiv.org/abs/1604.06778)
15 |     """
16 |     def __init__(self, env):
17 |         super(NormalizedActionWrapper, self).__init__(env)
18 |         self.action_space = spaces.Box(low=-1.0, high=1.0,
19 |             shape=self.env.action_space.shape)
20 | 
21 |     def action(self, action):
22 |         # Clip the action in [-1, 1]
23 |         action = np.clip(action, -1.0, 1.0)
24 |         # Map the normalized action to original action space
25 |         lb, ub = self.env.action_space.low, self.env.action_space.high
26 |         action = lb + 0.5 * (action + 1.0) * (ub - lb)
27 |         return action
28 | 
29 |     def reverse_action(self, action):
30 |         # Map the original action to normalized action space
31 |         lb, ub = self.env.action_space.low, self.env.action_space.high
32 |         action = 2.0 * (action - lb) / (ub - lb) - 1.0
33 |         # Clip the action in [-1, 1]
34 |         action = np.clip(action, -1.0, 1.0)
35 |         return action
36 | 
37 | 
38 | 
39 | def mujoco_wrapper(entry_point, **kwargs):
40 |     # Load the environment from its entry point
41 |     env_cls = load(entry_point)
42 |     env = env_cls(**kwargs)
43 |     # Normalization wrapper
44 |     env = NormalizedActionWrapper(env)
45 |     return env


--------------------------------------------------------------------------------
/control/baselines/ppoc_int/plot_res.py:
--------------------------------------------------------------------------------
 1 | import matplotlib.pyplot as plt
 2 | import seaborn as sns; sns.set(color_codes=True)
 3 | import numpy as np
 4 | from collections import deque
 5 | import pdb
 6 | sns.set(style='ticks')
 7 | 
 8 | name='TMaze'
 9 | 
10 | seeds = [0,1,2,3,4,]
11 | shortest=np.inf
12 | data=[]
13 | axes=[]
14 | direc='res'
15 | for seed in seeds:
16 | 	dat = np.genfromtxt('{}/{}seed{}_intfc1_2opts.csv'.format(direc,name,seed), delimiter=',')[1:200,1]
17 | 	print(len(dat))
18 | 	if len(dat) < shortest:
19 | 		shortest=len(dat)
20 | 
21 | 	rewbuffer = deque(maxlen=100)
22 | 	real_dat=[]
23 | 	for d in dat:
24 | 		rewbuffer.append(d)
25 | 		real_dat.append(np.mean(rewbuffer))
26 | 	data.append(real_dat)
27 | for i in range(len(data)):
28 | 	data[i] = data[i][:shortest]
29 | axes.append(sns.tsplot(data=data,legend=True,condition='IOC',color='red'))
30 | 
31 | 
32 | 
33 | shortest=np.inf
34 | data=[]
35 | for seed in seeds:
36 | 	dat = np.genfromtxt('{}/{}seed{}_intfc0_2opts.csv'.format(direc,name,seed), delimiter=',')[1:200,1]
37 | 	print(len(dat))
38 | 	if len(dat) < shortest:
39 | 		shortest=len(dat)
40 | 
41 | 	rewbuffer = deque(maxlen=100)
42 | 	real_dat=[]
43 | 	for d in dat:
44 | 		rewbuffer.append(d)
45 | 		real_dat.append(np.mean(rewbuffer))
46 | 	data.append(real_dat)
47 | for i in range(len(data)):
48 | 	data[i] = data[i][:shortest]
49 | axes.append(sns.tsplot(data=data,legend=True,condition='OC',color='blue'))
50 | 
51 | 
52 | plt.gcf().subplots_adjust(bottom=0.15)
53 | plt.xlabel('Iterations',fontsize=18)
54 | plt.ylabel('Average Rewards',fontsize=18)
55 | plt.legend()
56 | plt.title("Results on {}-v0".format(name))
57 | plt.savefig('plots/{}_notrans.png'.format(name))
58 | plt.clf()
59 | 


--------------------------------------------------------------------------------
/control/baselines/ppoc_int/run_mujoco.py:
--------------------------------------------------------------------------------
 1 | # !/usr/bin/env python
 2 | from baselines.common import set_global_seeds, tf_util as U
 3 | import gym, logging
 4 | from baselines import logger
 5 | from half_cheetah import *
 6 | 
 7 | 
 8 | def train(env_id, num_timesteps, seed, num_options,app, saves ,wsaves, epoch,dc,plots,w_intfc,switch,mainlr,intlr,piolr,fewshot,k):
 9 |     from baselines.ppoc_int import mlp_policy, pposgd_simple
10 |     U.make_session(num_cpu=1).__enter__()
11 |     set_global_seeds(seed)
12 |     
13 |     if env_id=="TMaze":
14 |         from twod_tmaze import TMaze
15 |         env=TMaze()
16 |         env.seed(seed)             
17 |     else:
18 |         env = gym.make(env_id)
19 |         env._seed(seed)
20 | 
21 | 
22 |     def policy_fn(name, ob_space, ac_space):
23 |         return mlp_policy.MlpPolicy(name=name, ob_space=ob_space, ac_space=ac_space,
24 |             hid_size=64, num_hid_layers=2, num_options=num_options, dc=dc, w_intfc=w_intfc,k=k)
25 | 
26 |     gym.logger.setLevel(logging.WARN)
27 | 
28 |     if num_options ==1:
29 |         optimsize=64
30 |     elif num_options ==2:
31 |         optimsize=32
32 |     else:
33 |         optimsize=int(64/num_options)
34 | 
35 | 
36 |     num_timesteps = num_timesteps #if env_id!="TMaze" else 5e5
37 |     tperbatch = 2048 if not epoch else int(1e4)
38 |     pposgd_simple.learn(env, policy_fn, 
39 |             max_timesteps=num_timesteps,
40 |             timesteps_per_batch=tperbatch,
41 |             clip_param=0.2, entcoeff=0.0,
42 |             optim_epochs=10, optim_stepsize=mainlr, optim_batchsize=optimsize,
43 |             gamma=0.99, lam=0.95, schedule='constant', num_options=num_options,
44 |             app=app, saves=saves, wsaves=wsaves, epoch=epoch, seed=seed,dc=dc,plots=plots,
45 |             w_intfc=w_intfc,switch=switch,intlr=intlr,piolr=piolr,fewshot=fewshot,k=k
46 |         )
47 |     env.close()
48 | 
49 | def main():
50 |     import argparse
51 |     parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
52 |     parser.add_argument('--env', help='environment ID', default='TMaze')
53 |     parser.add_argument('--timesteps', help='number of timesteps', type=int, default=1e6) 
54 |     parser.add_argument('--seed', help='RNG seed', type=int, default=1)
55 |     parser.add_argument('--opt', help='number of options', type=int, default=2) 
56 |     parser.add_argument('--app', help='Append to folder name', type=str, default='')        
57 |     parser.add_argument('--saves', help='Save the returns at each iteration', dest='saves', action='store_true', default=False)
58 |     parser.add_argument('--wsaves', help='Save the weights',dest='wsaves', action='store_true', default=False)    
59 |     parser.add_argument('--plots',  help='Plot some visualization', dest='plots', action='store_true', default=False)    
60 |     parser.add_argument('--switch', help='Switch task after 150 iterations', dest='switch', action='store_true', default=False)    
61 |     parser.add_argument('--fewshot', help='Value learning after 150 iterations', dest='fewshot', action='store_true', default=False)    
62 |     parser.add_argument('--nointfc', help='Disables interet functions', dest='w_intfc', action='store_false', default=True)    
63 |     parser.add_argument('--epoch', help='Load weights from a certain epoch', type=int, default=0) 
64 |     parser.add_argument('--dc', help='Deliberation cost  (not used)', type=float, default=0.)
65 |     parser.add_argument('--mainlr', type=float, default=3e-4)
66 |     parser.add_argument('--intlr', type=float, default=1e-4)
67 |     parser.add_argument('--piolr', type=float, default=1e-4)
68 |     parser.add_argument('--k', type=float, default=0., help='threshold for interest function')
69 | 
70 | 
71 | 
72 | 
73 |     args = parser.parse_args()
74 | 
75 |     train(args.env, num_timesteps=args.timesteps, seed=args.seed, num_options=args.opt, app=args.app,
76 |      saves=args.saves, wsaves=args.wsaves, epoch=args.epoch,dc=args.dc,plots=args.plots,
77 |      w_intfc=args.w_intfc,switch=args.switch,mainlr=args.mainlr,intlr=args.intlr,piolr=args.piolr,fewshot=args.fewshot,k=args.k)
78 | 
79 | 
80 | if __name__ == '__main__':
81 |     main()
82 | 


--------------------------------------------------------------------------------
/control/baselines/ppoc_int/seeding.py:
--------------------------------------------------------------------------------
  1 | import hashlib
  2 | import numpy as np
  3 | import os
  4 | import random as _random
  5 | import struct
  6 | import sys
  7 | 
  8 | from gym import error
  9 | 
 10 | if sys.version_info < (3,):
 11 |     integer_types = (int, long)
 12 | else:
 13 |     integer_types = (int,)
 14 | 
 15 | # Fortunately not needed right now!
 16 | #
 17 | # def random(seed=None):
 18 | #     seed = _seed(seed)
 19 | #
 20 | #     rng = _random.Random()
 21 | #     rng.seed(hash_seed(seed))
 22 | #     return rng, seed
 23 | 
 24 | def np_random(seed=None):
 25 |     if seed is not None and not (isinstance(seed, integer_types) and 0 <= seed):
 26 |         raise error.Error('Seed must be a non-negative integer or omitted, not {}'.format(seed))
 27 | 
 28 |     seed = _seed(seed)
 29 | 
 30 |     rng = np.random.RandomState()
 31 |     rng.seed(_int_list_from_bigint(hash_seed(seed)))
 32 |     return rng, seed
 33 | 
 34 | def hash_seed(seed=None, max_bytes=8):
 35 |     """Any given evaluation is likely to have many PRNG's active at
 36 |     once. (Most commonly, because the environment is running in
 37 |     multiple processes.) There's literature indicating that having
 38 |     linear correlations between seeds of multiple PRNG's can correlate
 39 |     the outputs:
 40 | 
 41 |     http://blogs.unity3d.com/2015/01/07/a-primer-on-repeatable-random-numbers/
 42 |     http://stackoverflow.com/questions/1554958/how-different-do-random-seeds-need-to-be
 43 |     http://dl.acm.org/citation.cfm?id=1276928
 44 | 
 45 |     Thus, for sanity we hash the seeds before using them. (This scheme
 46 |     is likely not crypto-strength, but it should be good enough to get
 47 |     rid of simple correlations.)
 48 | 
 49 |     Args:
 50 |         seed (Optional[int]): None seeds from an operating system specific randomness source.
 51 |         max_bytes: Maximum number of bytes to use in the hashed seed.
 52 |     """
 53 |     if seed is None:
 54 |         seed = _seed(max_bytes=max_bytes)
 55 |     hash = hashlib.sha512(str(seed).encode('utf8')).digest()
 56 |     return _bigint_from_bytes(hash[:max_bytes])
 57 | 
 58 | def _seed(a=None, max_bytes=8):
 59 |     """Create a strong random seed. Otherwise, Python 2 would seed using
 60 |     the system time, which might be non-robust especially in the
 61 |     presence of concurrency.
 62 | 
 63 |     Args:
 64 |         a (Optional[int, str]): None seeds from an operating system specific randomness source.
 65 |         max_bytes: Maximum number of bytes to use in the seed.
 66 |     """
 67 |     # Adapted from https://svn.python.org/projects/python/tags/r32/Lib/random.py
 68 |     if a is None:
 69 |         a = _bigint_from_bytes(os.urandom(max_bytes))
 70 |     elif isinstance(a, str):
 71 |         a = a.encode('utf8')
 72 |         a += hashlib.sha512(a).digest()
 73 |         a = _bigint_from_bytes(a[:max_bytes])
 74 |     elif isinstance(a, integer_types):
 75 |         a = a % 2**(8 * max_bytes)
 76 |     else:
 77 |         raise error.Error('Invalid type for seed: {} ({})'.format(type(a), a))
 78 | 
 79 |     return a
 80 | 
 81 | # TODO: don't hardcode sizeof_int here
 82 | def _bigint_from_bytes(bytes):
 83 |     sizeof_int = 4
 84 |     padding = sizeof_int - len(bytes) % sizeof_int
 85 |     bytes += b'\0' * padding
 86 |     int_count = int(len(bytes) / sizeof_int)
 87 |     unpacked = struct.unpack("{}I".format(int_count), bytes)
 88 |     accum = 0
 89 |     for i, val in enumerate(unpacked):
 90 |         accum += 2 ** (sizeof_int * 8 * i) * val
 91 |     return accum
 92 | 
 93 | def _int_list_from_bigint(bigint):
 94 |     # Special case 0
 95 |     if bigint < 0:
 96 |         raise error.Error('Seed must be non-negative, not {}'.format(bigint))
 97 |     elif bigint == 0:
 98 |         return [0]
 99 | 
100 |     ints = []
101 |     while bigint > 0:
102 |         bigint, mod = divmod(bigint, 2 ** 32)
103 |         ints.append(mod)
104 |     return ints
105 | 


--------------------------------------------------------------------------------
/control/baselines/ppoc_int/twod_tmaze.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from gym.envs.mujoco import mujoco_env
 3 | from gym.spaces import Box
 4 | import seeding
 5 | 
 6 | 
 7 | class TwoDEnv(mujoco_env.MujocoEnv):
 8 |     def __init__(self, model_path, frame_skip, xbounds, ybounds):
 9 |         super(TwoDEnv, self).__init__(model_path=model_path, frame_skip=frame_skip)
10 |         assert isinstance(self.observation_space, Box)
11 |         assert self.observation_space.shape == (2,)
12 |         
13 |     def get_viewer(self):
14 |         return self._get_viewer()
15 | 
16 | import numpy as np
17 | from gym import utils
18 | import os
19 | 
20 | 
21 | 
22 | 
23 | def get_asset_xml(xml_name):
24 |     return os.path.join(os.path.join(os.path.dirname(__file__), 'assets'), xml_name)
25 |     
26 | class TMaze(TwoDEnv, utils.EzPickle):
27 |     NAME='TMaze'
28 |     def __init__(self, verbose=False,change_goal=None):
29 |         self.verbose = verbose
30 |         self.steps = 0
31 |         self.change_goal = change_goal
32 |         utils.EzPickle.__init__(self)
33 |         TwoDEnv.__init__(self, get_asset_xml('twod_tmaze.xml'), 2, xbounds=[-0.3,0.3], ybounds=[-0.3,0.3])
34 |         
35 | 
36 |     def _step(self, a):
37 |         self.do_simulation(a, self.frame_skip)
38 |         ob = self._get_obs()
39 |         pos = ob[0:2]
40 |         
41 |         if not self.change_goal:
42 |             target = self.model.body_pos.copy()[-1][:2]
43 |         else: 
44 |             target = self.change_goal
45 |         dist_thresh = 0.1
46 | 
47 | 
48 | 
49 |         if pos[0]>target[0]-dist_thresh and pos[0]<target[0]+dist_thresh\
50 |          and pos[1]<target[1]+dist_thresh and pos[1]>target[1]-dist_thresh:
51 |             reward = 1.
52 |         else:
53 |             reward = 0.
54 | 
55 |         self.steps += 1
56 |         if self.verbose:
57 |             print(pos, reward)
58 |         done = self.steps >= 500 or int(reward)
59 |         return ob, reward, done, np.concatenate([self.model.data.qvel]).ravel()
60 | 
61 |     def reset_model(self):
62 |         qpos = self.init_qpos + self.np_random.uniform(size=self.model.nq, low=-0.01, high=0.01)
63 |         qvel = self.init_qvel + self.np_random.uniform(size=self.model.nv, low=-0.01, high=0.01)
64 |         self.set_state(qpos, qvel)
65 |         self.steps = 0
66 |         return self._get_obs()
67 | 
68 |     def _get_obs(self):
69 |         init_pos = self.model.body_pos.copy()[1][:2]
70 |         return np.concatenate([self.model.data.qpos]).ravel() + init_pos
71 | 
72 |     def viewer_setup(self):
73 |         v = self.viewer
74 | 
75 |     def seed(self, seed=None):
76 |         self.np_random, seed = seeding.np_random(seed)
77 | 


--------------------------------------------------------------------------------
/control/baselines/results_plotter.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import matplotlib
 3 | matplotlib.use('TkAgg') # Can change to 'Agg' for non-interactive mode
 4 | 
 5 | import matplotlib.pyplot as plt
 6 | plt.rcParams['svg.fonttype'] = 'none'
 7 | 
 8 | from baselines.common import plot_util
 9 | 
10 | X_TIMESTEPS = 'timesteps'
11 | X_EPISODES = 'episodes'
12 | X_WALLTIME = 'walltime_hrs'
13 | Y_REWARD = 'reward'
14 | Y_TIMESTEPS = 'timesteps'
15 | POSSIBLE_X_AXES = [X_TIMESTEPS, X_EPISODES, X_WALLTIME]
16 | EPISODES_WINDOW = 100
17 | COLORS = ['blue', 'green', 'red', 'cyan', 'magenta', 'yellow', 'black', 'purple', 'pink',
18 |         'brown', 'orange', 'teal', 'coral', 'lightblue', 'lime', 'lavender', 'turquoise',
19 |         'darkgreen', 'tan', 'salmon', 'gold', 'darkred', 'darkblue']
20 | 
21 | def rolling_window(a, window):
22 |     shape = a.shape[:-1] + (a.shape[-1] - window + 1, window)
23 |     strides = a.strides + (a.strides[-1],)
24 |     return np.lib.stride_tricks.as_strided(a, shape=shape, strides=strides)
25 | 
26 | def window_func(x, y, window, func):
27 |     yw = rolling_window(y, window)
28 |     yw_func = func(yw, axis=-1)
29 |     return x[window-1:], yw_func
30 | 
31 | def ts2xy(ts, xaxis, yaxis):
32 |     if xaxis == X_TIMESTEPS:
33 |         x = np.cumsum(ts.l.values)
34 |     elif xaxis == X_EPISODES:
35 |         x = np.arange(len(ts))
36 |     elif xaxis == X_WALLTIME:
37 |         x = ts.t.values / 3600.
38 |     else:
39 |         raise NotImplementedError
40 |     if yaxis == Y_REWARD:
41 |         y = ts.r.values
42 |     elif yaxis == Y_TIMESTEPS:
43 |         y = ts.l.values
44 |     else:
45 |         raise NotImplementedError
46 |     return x, y
47 | 
48 | def plot_curves(xy_list, xaxis, yaxis, title):
49 |     fig = plt.figure(figsize=(8,2))
50 |     maxx = max(xy[0][-1] for xy in xy_list)
51 |     minx = 0
52 |     for (i, (x, y)) in enumerate(xy_list):
53 |         color = COLORS[i % len(COLORS)]
54 |         plt.scatter(x, y, s=2)
55 |         x, y_mean = window_func(x, y, EPISODES_WINDOW, np.mean) #So returns average of last EPISODE_WINDOW episodes
56 |         plt.plot(x, y_mean, color=color)
57 |     plt.xlim(minx, maxx)
58 |     plt.title(title)
59 |     plt.xlabel(xaxis)
60 |     plt.ylabel(yaxis)
61 |     plt.tight_layout()
62 |     fig.canvas.mpl_connect('resize_event', lambda event: plt.tight_layout())
63 |     plt.grid(True)
64 | 
65 | 
66 | def split_by_task(taskpath):
67 |     return taskpath['dirname'].split('/')[-1].split('-')[0]
68 | 
69 | def plot_results(dirs, num_timesteps=10e6, xaxis=X_TIMESTEPS, yaxis=Y_REWARD, title='', split_fn=split_by_task):
70 |     results = plot_util.load_results(dirs)
71 |     plot_util.plot_results(results, xy_fn=lambda r: ts2xy(r['monitor'], xaxis, yaxis), split_fn=split_fn, average_group=True, resample=int(1e6))
72 | 
73 | # Example usage in jupyter-notebook
74 | # from baselines.results_plotter import plot_results
75 | # %matplotlib inline
76 | # plot_results("./log")
77 | # Here ./log is a directory containing the monitor.csv files
78 | 
79 | def main():
80 |     import argparse
81 |     import os
82 |     parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
83 |     parser.add_argument('--dirs', help='List of log directories', nargs = '*', default=['./log'])
84 |     parser.add_argument('--num_timesteps', type=int, default=int(10e6))
85 |     parser.add_argument('--xaxis', help = 'Varible on X-axis', default = X_TIMESTEPS)
86 |     parser.add_argument('--yaxis', help = 'Varible on Y-axis', default = Y_REWARD)
87 |     parser.add_argument('--task_name', help = 'Title of plot', default = 'Breakout')
88 |     args = parser.parse_args()
89 |     args.dirs = [os.path.abspath(dir) for dir in args.dirs]
90 |     plot_results(args.dirs, args.num_timesteps, args.xaxis, args.yaxis, args.task_name)
91 |     plt.show()
92 | 
93 | if __name__ == '__main__':
94 |     main()
95 | 


--------------------------------------------------------------------------------
/control/data/cartpole.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kkhetarpal/ioc/fb88d5b881e0b5c317020b23874495e614b3ddc7/control/data/cartpole.gif


--------------------------------------------------------------------------------
/control/data/fetchPickAndPlaceContrast.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kkhetarpal/ioc/fb88d5b881e0b5c317020b23874495e614b3ddc7/control/data/fetchPickAndPlaceContrast.png


--------------------------------------------------------------------------------
/control/data/logo.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kkhetarpal/ioc/fb88d5b881e0b5c317020b23874495e614b3ddc7/control/data/logo.jpg


--------------------------------------------------------------------------------
/control/setup.cfg:
--------------------------------------------------------------------------------
1 | [flake8]
2 | select = F,E999,W291,W293
3 | exclude = 
4 |     .git,
5 |     __pycache__,
6 |     baselines/ppo1,
7 |     baselines/bench,
8 | 


--------------------------------------------------------------------------------
/control/setup.py:
--------------------------------------------------------------------------------
 1 | import re
 2 | from setuptools import setup, find_packages
 3 | import sys
 4 | 
 5 | if sys.version_info.major != 3:
 6 |     print('This Python is only compatible with Python 3, but you are running '
 7 |           'Python {}. The installation will likely fail.'.format(sys.version_info.major))
 8 | 
 9 | 
10 | extras = {
11 |     'test': [
12 |         'filelock',
13 |         'pytest',
14 |         'pytest-forked',
15 |         'atari-py'
16 |     ],
17 |     'bullet': [
18 |         'pybullet',
19 |     ],
20 |     'mpi': [
21 |         'mpi4py'
22 |     ]
23 | }
24 | 
25 | all_deps = []
26 | for group_name in extras:
27 |     all_deps += extras[group_name]
28 | 
29 | extras['all'] = all_deps
30 | 
31 | setup(name='baselines',
32 |       packages=[package for package in find_packages()
33 |                 if package.startswith('baselines')],
34 |       install_requires=[
35 |           'gym',
36 |           'scipy',
37 |           'tqdm',
38 |           'joblib',
39 |           'dill',
40 |           'progressbar2',
41 |           'cloudpickle',
42 |           'click',
43 |           'opencv-python'
44 |       ],
45 |       extras_require=extras,
46 |       description='OpenAI baselines: high quality implementations of reinforcement learning algorithms',
47 |       author='OpenAI',
48 |       url='https://github.com/openai/baselines',
49 |       author_email='gym@openai.com',
50 |       version='0.1.5')
51 | 
52 | 
53 | # ensure there is some tensorflow build with version above 1.4
54 | import pkg_resources
55 | tf_pkg = None
56 | for tf_pkg_name in ['tensorflow', 'tensorflow-gpu', 'tf-nightly', 'tf-nightly-gpu']:
57 |     try:
58 |         tf_pkg = pkg_resources.get_distribution(tf_pkg_name)
59 |     except pkg_resources.DistributionNotFound:
60 |         pass
61 | assert tf_pkg is not None, 'TensorFlow needed, of version above 1.4'
62 | from distutils.version import LooseVersion
63 | assert LooseVersion(re.sub(r'-?rc\d+$', '', tf_pkg.version)) >= LooseVersion('1.4.0')
64 | 


--------------------------------------------------------------------------------
/launcher_miniworld.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | main_lr=(1e-4) #(1e-4 3e-4 7e-4 5e-4)
 4 | int_lr=(9e-5)  #(1e-4 3e-3 8e-4 8e-5 5e-4 3e-4 9e-5) #(3e-3 8e-5 8e-4)
 5 | seed=(0)
 6 | port=({4000..4020})
 7 | envname="MiniWorld-OneRoom-v0"  #"MiniWorld-PickupObjs-v0" #MiniWorld-PutNext-v0
 8 | numoption=2
 9 | 
10 | count=0
11 | for _main_lr in ${main_lr[@]}
12 | do
13 |    for _int_lr in ${int_lr[@]}
14 |    do
15 |         for _seed in ${seed[@]}
16 |         do
17 |                 if [ -f temprun.sh ] ; then
18 |                         rm temprun.sh
19 |                 fi
20 | 
21 |                 echo "#!/bin/bash" >> temprun.sh
22 |                 echo "#SBATCH --account=addccaccounthere" >> temprun.sh
23 |                 echo "#SBATCH --output=\"/scratch/username/slurm-%j.out\"" >> temprun.sh
24 |                 echo "#SBATCH --gres=gpu:1" >> temprun.sh
25 |                 echo "#SBATCH --mem=30G" >> temprun.sh
26 |                 echo "#SBATCH --time=10:00:00" >> temprun.sh
27 |                 echo "source $HOME/intf/bin/activate" >> temprun.sh
28 |                 echo "cd $HOME/ioc/miniworld/baselines/ppoc_int/" >> temprun.sh
29 |                 k="xvfb-run -n "${port[$count]}" -s \"-screen 0 1024x768x24 -ac +extension GLX +render -noreset\" python run_miniw.py --env "$envname" --seed $_seed --opt $numoption --saves --mainlr $_main_lr --intlr $_int_lr --switch --wsaves"
30 |                 echo $k >> temprun.sh
31 |                 echo $k
32 |                 eval "sbatch temprun.sh"
33 |                 rm temprun.sh
34 |                 count=$((count + 1))
35 |         done
36 |    done
37 | done
38 | 


--------------------------------------------------------------------------------
/launcher_mujoco.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | seed=(0 1 2 3 4)
 4 | mainlr=(1e-4)
 5 | #intfclr=(1e-4 3e-4 5e-4 7e-4 9e-4)
 6 | intfclr=(5e-4)
 7 | #piolr=(7e-4 9e-4 3e-4 5e-4)
 8 | piolr=(3e-4)
 9 | 
10 | port=($(seq 4000 1 4100))
11 | 
12 | envname="HalfCheetahDir-v1"
13 | 
14 | count=0
15 | 
16 | for _piolr in ${piolr[@]}
17 | do
18 |     for _intfclr in ${intfclr[@]}
19 |     do
20 |         for _mainlr in ${mainlr[@]}
21 |         do
22 |             for _seed in ${seed[@]}
23 |             do
24 |                 if [ -f temprun.sh ] ; then
25 |                     rm temprun.sh
26 |                 fi
27 |                 echo "#!/bin/bash" >> temprun.sh
28 |                 echo "#SBATCH --account=addaccounthere" >> temprun.sh
29 |                 echo "#SBATCH --output=\"/scratch/username/maml/Maml_seed${_seed}_mainlr${_mainlr}_intfclr_${_intfclr}_piolr_${_piolr}-%j.out\"" >> temprun.sh
30 |                 echo "#SBATCH --job-name=Maml_seed${_seed}_mainlr${_mainlr}_intfclr_${_intfclr}_piolr_${_piolr}" >> temprun.sh
31 |                 echo "#SBATCH --gres=gpu:0" >> temprun.sh
32 |                 echo "#SBATCH --mem=5G" >> temprun.sh
33 |                 echo "#SBATCH --time=1:00:00" >> temprun.sh
34 |                 echo "source $HOME/miniconda3/etc/profile.d/conda.sh" >> temprun.sh
35 |                 echo "conda activate intfc" >> temprun.sh
36 |                 echo "cd $HOME/ioc/control/baselines/ppoc_int/" >> temprun.sh
37 |                 k="xvfb-run -n "${port[$count]}" -s \"-screen 0 1024x768x24 -ac +extension GLX +render -noreset\" python run_mujoco.py --env "$envname" --saves --opt 2 --seed ${_seed} --mainlr ${_mainlr} --intlr ${_intfclr} --piolr ${_piolr} --switch --wsaves"
38 |                 echo $k >> temprun.sh
39 |                 echo $k
40 |                 eval "sbatch temprun.sh"
41 |                 rm temprun.sh
42 |                 count=$((count + 1))
43 |             done
44 |         done
45 |     done
46 | done
47 | 


--------------------------------------------------------------------------------
/miniworld/.benchmark_pattern:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/miniworld/.gitignore:
--------------------------------------------------------------------------------
 1 | *.swp
 2 | *.pyc
 3 | *.pkl
 4 | *.py~
 5 | .pytest_cache
 6 | .DS_Store
 7 | .idea
 8 | 
 9 | # Setuptools distribution and build folders.
10 | /dist/
11 | /build
12 | keys/
13 | 
14 | # Virtualenv
15 | /env
16 | 
17 | 
18 | *.sublime-project
19 | *.sublime-workspace
20 | 
21 | .idea
22 | 
23 | logs/
24 | 
25 | .ipynb_checkpoints
26 | ghostdriver.log
27 | 
28 | htmlcov
29 | 
30 | junk
31 | src
32 | 
33 | *.egg-info
34 | .cache
35 | 
36 | MUJOCO_LOG.TXT
37 | 


--------------------------------------------------------------------------------
/miniworld/.travis.yml:
--------------------------------------------------------------------------------
 1 | language: python
 2 | python:
 3 |     - "3.6"
 4 | 
 5 | services:
 6 |     - docker
 7 | 
 8 | install:
 9 |     - pip install flake8
10 |     - docker build . -t baselines-test
11 | 
12 | script:
13 |     - flake8 . --show-source --statistics
14 |     - docker run baselines-test pytest -v --forked .
15 | 


--------------------------------------------------------------------------------
/miniworld/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM python:3.6
 2 | 
 3 | RUN apt-get -y update && apt-get -y install ffmpeg
 4 | # RUN apt-get -y update && apt-get -y install git wget python-dev python3-dev libopenmpi-dev python-pip zlib1g-dev cmake python-opencv
 5 | 
 6 | ENV CODE_DIR /root/code
 7 | 
 8 | COPY . $CODE_DIR/baselines
 9 | WORKDIR $CODE_DIR/baselines
10 | 
11 | # Clean up pycache and pyc files
12 | RUN rm -rf __pycache__ && \
13 |     find . -name "*.pyc" -delete && \
14 |     pip install tensorflow && \
15 |     pip install -e .[test]
16 | 
17 | 
18 | CMD /bin/bash
19 | 


--------------------------------------------------------------------------------
/miniworld/LICENSE:
--------------------------------------------------------------------------------
 1 | The MIT License
 2 | 
 3 | Copyright (c) 2017 OpenAI (http://openai.com)
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in
13 | all copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21 | THE SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/miniworld/baselines/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kkhetarpal/ioc/fb88d5b881e0b5c317020b23874495e614b3ddc7/miniworld/baselines/__init__.py


--------------------------------------------------------------------------------
/miniworld/baselines/bench/__init__.py:
--------------------------------------------------------------------------------
1 | from baselines.bench.benchmarks import *
2 | from baselines.bench.monitor import *
3 | 


--------------------------------------------------------------------------------
/miniworld/baselines/common/__init__.py:
--------------------------------------------------------------------------------
1 | # flake8: noqa F403
2 | from baselines.common.console_util import *
3 | from baselines.common.dataset import Dataset
4 | from baselines.common.math_util import *
5 | from baselines.common.misc_util import *
6 | 


--------------------------------------------------------------------------------
/miniworld/baselines/common/cg.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | def cg(f_Ax, b, cg_iters=10, callback=None, verbose=False, residual_tol=1e-10):
 3 |     """
 4 |     Demmel p 312
 5 |     """
 6 |     p = b.copy()
 7 |     r = b.copy()
 8 |     x = np.zeros_like(b)
 9 |     rdotr = r.dot(r)
10 | 
11 |     fmtstr =  "%10i %10.3g %10.3g"
12 |     titlestr =  "%10s %10s %10s"
13 |     if verbose: print(titlestr % ("iter", "residual norm", "soln norm"))
14 | 
15 |     for i in range(cg_iters):
16 |         if callback is not None:
17 |             callback(x)
18 |         if verbose: print(fmtstr % (i, rdotr, np.linalg.norm(x)))
19 |         z = f_Ax(p)
20 |         v = rdotr / p.dot(z)
21 |         x += v*p
22 |         r -= v*z
23 |         newrdotr = r.dot(r)
24 |         mu = newrdotr/rdotr
25 |         p = r + mu*p
26 | 
27 |         rdotr = newrdotr
28 |         if rdotr < residual_tol:
29 |             break
30 | 
31 |     if callback is not None:
32 |         callback(x)
33 |     if verbose: print(fmtstr % (i+1, rdotr, np.linalg.norm(x)))  # pylint: disable=W0631
34 |     return x
35 | 


--------------------------------------------------------------------------------
/miniworld/baselines/common/console_util.py:
--------------------------------------------------------------------------------
 1 | from __future__ import print_function
 2 | from contextlib import contextmanager
 3 | import numpy as np
 4 | import time
 5 | import shlex
 6 | import subprocess
 7 | 
 8 | # ================================================================
 9 | # Misc
10 | # ================================================================
11 | 
12 | def fmt_row(width, row, header=False):
13 |     out = " | ".join(fmt_item(x, width) for x in row)
14 |     if header: out = out + "\n" + "-"*len(out)
15 |     return out
16 | 
17 | def fmt_item(x, l):
18 |     if isinstance(x, np.ndarray):
19 |         assert x.ndim==0
20 |         x = x.item()
21 |     if isinstance(x, (float, np.float32, np.float64)):
22 |         v = abs(x)
23 |         if (v < 1e-4 or v > 1e+4) and v > 0:
24 |             rep = "%7.2e" % x
25 |         else:
26 |             rep = "%7.5f" % x
27 |     else: rep = str(x)
28 |     return " "*(l - len(rep)) + rep
29 | 
30 | color2num = dict(
31 |     gray=30,
32 |     red=31,
33 |     green=32,
34 |     yellow=33,
35 |     blue=34,
36 |     magenta=35,
37 |     cyan=36,
38 |     white=37,
39 |     crimson=38
40 | )
41 | 
42 | def colorize(string, color='green', bold=False, highlight=False):
43 |     attr = []
44 |     num = color2num[color]
45 |     if highlight: num += 10
46 |     attr.append(str(num))
47 |     if bold: attr.append('1')
48 |     return '\x1b[%sm%s\x1b[0m' % (';'.join(attr), string)
49 | 
50 | def print_cmd(cmd, dry=False):
51 |     if isinstance(cmd, str):  # for shell=True
52 |         pass
53 |     else:
54 |         cmd = ' '.join(shlex.quote(arg) for arg in cmd)
55 |     print(colorize(('CMD: ' if not dry else 'DRY: ') + cmd))
56 | 
57 | 
58 | def get_git_commit(cwd=None):
59 |     return subprocess.check_output(['git', 'rev-parse', '--short', 'HEAD'], cwd=cwd).decode('utf8')
60 | 
61 | def get_git_commit_message(cwd=None):
62 |     return subprocess.check_output(['git', 'show', '-s', '--format=%B', 'HEAD'], cwd=cwd).decode('utf8')
63 | 
64 | def ccap(cmd, dry=False, env=None, **kwargs):
65 |     print_cmd(cmd, dry)
66 |     if not dry:
67 |         subprocess.check_call(cmd, env=env, **kwargs)
68 | 
69 | 
70 | MESSAGE_DEPTH = 0
71 | 
72 | @contextmanager
73 | def timed(msg):
74 |     global MESSAGE_DEPTH #pylint: disable=W0603
75 |     print(colorize('\t'*MESSAGE_DEPTH + '=: ' + msg, color='magenta'))
76 |     tstart = time.time()
77 |     MESSAGE_DEPTH += 1
78 |     yield
79 |     MESSAGE_DEPTH -= 1
80 |     print(colorize('\t'*MESSAGE_DEPTH + "done in %.3f seconds"%(time.time() - tstart), color='magenta'))
81 | 


--------------------------------------------------------------------------------
/miniworld/baselines/common/dataset.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | class Dataset(object):
 4 |     def __init__(self, data_map, deterministic=False, shuffle=True):
 5 |         self.data_map = data_map
 6 |         self.deterministic = deterministic
 7 |         self.enable_shuffle = shuffle
 8 |         self.n = next(iter(data_map.values())).shape[0]
 9 |         self._next_id = 0
10 |         self.shuffle()
11 | 
12 |     def shuffle(self):
13 |         if self.deterministic:
14 |             return
15 |         perm = np.arange(self.n)
16 |         np.random.shuffle(perm)
17 | 
18 |         for key in self.data_map:
19 |             self.data_map[key] = self.data_map[key][perm]
20 | 
21 |         self._next_id = 0
22 | 
23 |     def next_batch(self, batch_size):
24 |         if self._next_id >= self.n and self.enable_shuffle:
25 |             self.shuffle()
26 | 
27 |         cur_id = self._next_id
28 |         cur_batch_size = min(batch_size, self.n - self._next_id)
29 |         self._next_id += cur_batch_size
30 | 
31 |         data_map = dict()
32 |         for key in self.data_map:
33 |             data_map[key] = self.data_map[key][cur_id:cur_id+cur_batch_size]
34 |         return data_map
35 | 
36 |     def iterate_once(self, batch_size):
37 |         if self.enable_shuffle: self.shuffle()
38 | 
39 |         while self._next_id <= self.n - batch_size:
40 |             yield self.next_batch(batch_size)
41 |         self._next_id = 0
42 | 
43 |     def subset(self, num_elements, deterministic=True):
44 |         data_map = dict()
45 |         for key in self.data_map:
46 |             data_map[key] = self.data_map[key][:num_elements]
47 |         return Dataset(data_map, deterministic)
48 | 
49 | 
50 | def iterbatches(arrays, *, num_batches=None, batch_size=None, shuffle=True, include_final_partial_batch=True):
51 |     assert (num_batches is None) != (batch_size is None), 'Provide num_batches or batch_size, but not both'
52 |     arrays = tuple(map(np.asarray, arrays))
53 |     n = arrays[0].shape[0]
54 |     assert all(a.shape[0] == n for a in arrays[1:])
55 |     inds = np.arange(n)
56 |     if shuffle: np.random.shuffle(inds)
57 |     sections = np.arange(0, n, batch_size)[1:] if num_batches is None else num_batches
58 |     for batch_inds in np.array_split(inds, sections):
59 |         if include_final_partial_batch or len(batch_inds) == batch_size:
60 |             yield tuple(a[batch_inds] for a in arrays)
61 | 


--------------------------------------------------------------------------------
/miniworld/baselines/common/input.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import tensorflow as tf
 3 | from gym.spaces import Discrete, Box, MultiDiscrete
 4 | 
 5 | def observation_placeholder(ob_space, batch_size=None, name='Ob'):
 6 |     '''
 7 |     Create placeholder to feed observations into of the size appropriate to the observation space
 8 | 
 9 |     Parameters:
10 |     ----------
11 | 
12 |     ob_space: gym.Space     observation space
13 | 
14 |     batch_size: int         size of the batch to be fed into input. Can be left None in most cases.
15 | 
16 |     name: str               name of the placeholder
17 | 
18 |     Returns:
19 |     -------
20 | 
21 |     tensorflow placeholder tensor
22 |     '''
23 | 
24 |     assert isinstance(ob_space, Discrete) or isinstance(ob_space, Box) or isinstance(ob_space, MultiDiscrete), \
25 |         'Can only deal with Discrete and Box observation spaces for now'
26 | 
27 |     dtype = ob_space.dtype
28 |     if dtype == np.int8:
29 |         dtype = np.uint8
30 | 
31 |     return tf.placeholder(shape=(batch_size,) + ob_space.shape, dtype=dtype, name=name)
32 | 
33 | 
34 | def observation_input(ob_space, batch_size=None, name='Ob'):
35 |     '''
36 |     Create placeholder to feed observations into of the size appropriate to the observation space, and add input
37 |     encoder of the appropriate type.
38 |     '''
39 | 
40 |     placeholder = observation_placeholder(ob_space, batch_size, name)
41 |     return placeholder, encode_observation(ob_space, placeholder)
42 | 
43 | def encode_observation(ob_space, placeholder):
44 |     '''
45 |     Encode input in the way that is appropriate to the observation space
46 | 
47 |     Parameters:
48 |     ----------
49 | 
50 |     ob_space: gym.Space             observation space
51 | 
52 |     placeholder: tf.placeholder     observation input placeholder
53 |     '''
54 |     if isinstance(ob_space, Discrete):
55 |         return tf.to_float(tf.one_hot(placeholder, ob_space.n))
56 |     elif isinstance(ob_space, Box):
57 |         return tf.to_float(placeholder)
58 |     elif isinstance(ob_space, MultiDiscrete):
59 |         placeholder = tf.cast(placeholder, tf.int32)
60 |         one_hots = [tf.to_float(tf.one_hot(placeholder[..., i], ob_space.nvec[i])) for i in range(placeholder.shape[-1])]
61 |         return tf.concat(one_hots, axis=-1)
62 |     else:
63 |         raise NotImplementedError
64 | 
65 | 


--------------------------------------------------------------------------------
/miniworld/baselines/common/math_util.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import scipy.signal
 3 | 
 4 | 
 5 | def discount(x, gamma):
 6 |     """
 7 |     computes discounted sums along 0th dimension of x.
 8 | 
 9 |     inputs
10 |     ------
11 |     x: ndarray
12 |     gamma: float
13 | 
14 |     outputs
15 |     -------
16 |     y: ndarray with same shape as x, satisfying
17 | 
18 |         y[t] = x[t] + gamma*x[t+1] + gamma^2*x[t+2] + ... + gamma^k x[t+k],
19 |                 where k = len(x) - t - 1
20 | 
21 |     """
22 |     assert x.ndim >= 1
23 |     return scipy.signal.lfilter([1],[1,-gamma],x[::-1], axis=0)[::-1]
24 | 
25 | def explained_variance(ypred,y):
26 |     """
27 |     Computes fraction of variance that ypred explains about y.
28 |     Returns 1 - Var[y-ypred] / Var[y]
29 | 
30 |     interpretation:
31 |         ev=0  =>  might as well have predicted zero
32 |         ev=1  =>  perfect prediction
33 |         ev<0  =>  worse than just predicting zero
34 | 
35 |     """
36 |     assert y.ndim == 1 and ypred.ndim == 1
37 |     vary = np.var(y)
38 |     return np.nan if vary==0 else 1 - np.var(y-ypred)/vary
39 | 
40 | def explained_variance_2d(ypred, y):
41 |     assert y.ndim == 2 and ypred.ndim == 2
42 |     vary = np.var(y, axis=0)
43 |     out = 1 - np.var(y-ypred)/vary
44 |     out[vary < 1e-10] = 0
45 |     return out
46 | 
47 | def ncc(ypred, y):
48 |     return np.corrcoef(ypred, y)[1,0]
49 | 
50 | def flatten_arrays(arrs):
51 |     return np.concatenate([arr.flat for arr in arrs])
52 | 
53 | def unflatten_vector(vec, shapes):
54 |     i=0
55 |     arrs = []
56 |     for shape in shapes:
57 |         size = np.prod(shape)
58 |         arr = vec[i:i+size].reshape(shape)
59 |         arrs.append(arr)
60 |         i += size
61 |     return arrs
62 | 
63 | def discount_with_boundaries(X, New, gamma):
64 |     """
65 |     X: 2d array of floats, time x features
66 |     New: 2d array of bools, indicating when a new episode has started
67 |     """
68 |     Y = np.zeros_like(X)
69 |     T = X.shape[0]
70 |     Y[T-1] = X[T-1]
71 |     for t in range(T-2, -1, -1):
72 |         Y[t] = X[t] + gamma * Y[t+1] * (1 - New[t+1])
73 |     return Y
74 | 
75 | def test_discount_with_boundaries():
76 |     gamma=0.9
77 |     x = np.array([1.0, 2.0, 3.0, 4.0], 'float32')
78 |     starts = [1.0, 0.0, 0.0, 1.0]
79 |     y = discount_with_boundaries(x, starts, gamma)
80 |     assert np.allclose(y, [
81 |         1 + gamma * 2 + gamma**2 * 3,
82 |         2 + gamma * 3,
83 |         3,
84 |         4
85 |     ])
86 | 


--------------------------------------------------------------------------------
/miniworld/baselines/common/mpi_adam.py:
--------------------------------------------------------------------------------
  1 | import baselines.common.tf_util as U
  2 | import tensorflow as tf
  3 | import numpy as np
  4 | try:
  5 |     from mpi4py import MPI
  6 | except ImportError:
  7 |     MPI = None
  8 | 
  9 | 
 10 | class MpiAdam(object):
 11 |     def __init__(self, var_list, *, beta1=0.9, beta2=0.999, epsilon=1e-08, scale_grad_by_procs=True, comm=None):
 12 |         self.var_list = var_list
 13 |         self.beta1 = beta1
 14 |         self.beta2 = beta2
 15 |         self.epsilon = epsilon
 16 |         self.scale_grad_by_procs = scale_grad_by_procs
 17 |         size = sum(U.numel(v) for v in var_list)
 18 |         self.m = np.zeros(size, 'float32')
 19 |         self.v = np.zeros(size, 'float32')
 20 |         self.t = 0
 21 |         self.setfromflat = U.SetFromFlat(var_list)
 22 |         self.getflat = U.GetFlat(var_list)
 23 |         self.comm = MPI.COMM_WORLD if comm is None and MPI is not None else comm
 24 | 
 25 |     def update(self, localg, stepsize):
 26 |         if self.t % 100 == 0:
 27 |             self.check_synced()
 28 |         localg = localg.astype('float32')
 29 |         if self.comm is not None:
 30 |             globalg = np.zeros_like(localg)
 31 |             self.comm.Allreduce(localg, globalg, op=MPI.SUM)
 32 |             if self.scale_grad_by_procs:
 33 |                 globalg /= self.comm.Get_size()
 34 |         else:
 35 |             globalg = np.copy(localg)
 36 | 
 37 |         self.t += 1
 38 |         a = stepsize * np.sqrt(1 - self.beta2**self.t)/(1 - self.beta1**self.t)
 39 |         self.m = self.beta1 * self.m + (1 - self.beta1) * globalg
 40 |         self.v = self.beta2 * self.v + (1 - self.beta2) * (globalg * globalg)
 41 |         step = (- a) * self.m / (np.sqrt(self.v) + self.epsilon)
 42 |         self.setfromflat(self.getflat() + step)
 43 | 
 44 |     def sync(self):
 45 |         if self.comm is None:
 46 |             return
 47 |         theta = self.getflat()
 48 |         self.comm.Bcast(theta, root=0)
 49 |         self.setfromflat(theta)
 50 | 
 51 |     def check_synced(self):
 52 |         if self.comm is None:
 53 |             return
 54 |         if self.comm.Get_rank() == 0: # this is root
 55 |             theta = self.getflat()
 56 |             self.comm.Bcast(theta, root=0)
 57 |         else:
 58 |             thetalocal = self.getflat()
 59 |             thetaroot = np.empty_like(thetalocal)
 60 |             self.comm.Bcast(thetaroot, root=0)
 61 |             assert (thetaroot == thetalocal).all(), (thetaroot, thetalocal)
 62 | 
 63 | @U.in_session
 64 | def test_MpiAdam():
 65 |     np.random.seed(0)
 66 |     tf.set_random_seed(0)
 67 | 
 68 |     a = tf.Variable(np.random.randn(3).astype('float32'))
 69 |     b = tf.Variable(np.random.randn(2,5).astype('float32'))
 70 |     loss = tf.reduce_sum(tf.square(a)) + tf.reduce_sum(tf.sin(b))
 71 | 
 72 |     stepsize = 1e-2
 73 |     update_op = tf.train.AdamOptimizer(stepsize).minimize(loss)
 74 |     do_update = U.function([], loss, updates=[update_op])
 75 | 
 76 |     tf.get_default_session().run(tf.global_variables_initializer())
 77 |     losslist_ref = []
 78 |     for i in range(10):
 79 |         l = do_update()
 80 |         print(i, l)
 81 |         losslist_ref.append(l)
 82 | 
 83 | 
 84 | 
 85 |     tf.set_random_seed(0)
 86 |     tf.get_default_session().run(tf.global_variables_initializer())
 87 | 
 88 |     var_list = [a,b]
 89 |     lossandgrad = U.function([], [loss, U.flatgrad(loss, var_list)])
 90 |     adam = MpiAdam(var_list)
 91 | 
 92 |     losslist_test = []
 93 |     for i in range(10):
 94 |         l,g = lossandgrad()
 95 |         adam.update(g, stepsize)
 96 |         print(i,l)
 97 |         losslist_test.append(l)
 98 | 
 99 |     np.testing.assert_allclose(np.array(losslist_ref), np.array(losslist_test), atol=1e-4)
100 | 
101 | 
102 | if __name__ == '__main__':
103 |     test_MpiAdam()
104 | 


--------------------------------------------------------------------------------
/miniworld/baselines/common/mpi_adam_optimizer.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import tensorflow as tf
 3 | from mpi4py import MPI
 4 | 
 5 | class MpiAdamOptimizer(tf.train.AdamOptimizer):
 6 |     """Adam optimizer that averages gradients across mpi processes."""
 7 |     def __init__(self, comm, **kwargs):
 8 |         self.comm = comm
 9 |         tf.train.AdamOptimizer.__init__(self, **kwargs)
10 |     def compute_gradients(self, loss, var_list, **kwargs):
11 |         grads_and_vars = tf.train.AdamOptimizer.compute_gradients(self, loss, var_list, **kwargs)
12 |         grads_and_vars = [(g, v) for g, v in grads_and_vars if g is not None]
13 |         flat_grad = tf.concat([tf.reshape(g, (-1,)) for g, v in grads_and_vars], axis=0)
14 |         shapes = [v.shape.as_list() for g, v in grads_and_vars]
15 |         sizes = [int(np.prod(s)) for s in shapes]
16 | 
17 |         num_tasks = self.comm.Get_size()
18 |         buf = np.zeros(sum(sizes), np.float32)
19 | 
20 |         def _collect_grads(flat_grad):
21 |             self.comm.Allreduce(flat_grad, buf, op=MPI.SUM)
22 |             np.divide(buf, float(num_tasks), out=buf)
23 |             return buf
24 | 
25 |         avg_flat_grad = tf.py_func(_collect_grads, [flat_grad], tf.float32)
26 |         avg_flat_grad.set_shape(flat_grad.shape)
27 |         avg_grads = tf.split(avg_flat_grad, sizes, axis=0)
28 |         avg_grads_and_vars = [(tf.reshape(g, v.shape), v)
29 |                     for g, (_, v) in zip(avg_grads, grads_and_vars)]
30 | 
31 |         return avg_grads_and_vars
32 | 


--------------------------------------------------------------------------------
/miniworld/baselines/common/mpi_fork.py:
--------------------------------------------------------------------------------
 1 | import os, subprocess, sys
 2 | 
 3 | def mpi_fork(n, bind_to_core=False):
 4 |     """Re-launches the current script with workers
 5 |     Returns "parent" for original parent, "child" for MPI children
 6 |     """
 7 |     if n<=1:
 8 |         return "child"
 9 |     if os.getenv("IN_MPI") is None:
10 |         env = os.environ.copy()
11 |         env.update(
12 |             MKL_NUM_THREADS="1",
13 |             OMP_NUM_THREADS="1",
14 |             IN_MPI="1"
15 |         )
16 |         args = ["mpirun", "-np", str(n)]
17 |         if bind_to_core:
18 |             args += ["-bind-to", "core"]
19 |         args += [sys.executable] + sys.argv
20 |         subprocess.check_call(args, env=env)
21 |         return "parent"
22 |     else:
23 |         return "child"
24 | 


--------------------------------------------------------------------------------
/miniworld/baselines/common/mpi_moments.py:
--------------------------------------------------------------------------------
 1 | from mpi4py import MPI
 2 | import numpy as np
 3 | from baselines.common import zipsame
 4 | 
 5 | 
 6 | def mpi_mean(x, axis=0, comm=None, keepdims=False):
 7 |     x = np.asarray(x)
 8 |     assert x.ndim > 0
 9 |     if comm is None: comm = MPI.COMM_WORLD
10 |     xsum = x.sum(axis=axis, keepdims=keepdims)
11 |     n = xsum.size
12 |     localsum = np.zeros(n+1, x.dtype)
13 |     localsum[:n] = xsum.ravel()
14 |     localsum[n] = x.shape[axis]
15 |     globalsum = np.zeros_like(localsum)
16 |     comm.Allreduce(localsum, globalsum, op=MPI.SUM)
17 |     return globalsum[:n].reshape(xsum.shape) / globalsum[n], globalsum[n]
18 | 
19 | def mpi_moments(x, axis=0, comm=None, keepdims=False):
20 |     x = np.asarray(x)
21 |     assert x.ndim > 0
22 |     mean, count = mpi_mean(x, axis=axis, comm=comm, keepdims=True)
23 |     sqdiffs = np.square(x - mean)
24 |     meansqdiff, count1 = mpi_mean(sqdiffs, axis=axis, comm=comm, keepdims=True)
25 |     assert count1 == count
26 |     std = np.sqrt(meansqdiff)
27 |     if not keepdims:
28 |         newshape = mean.shape[:axis] + mean.shape[axis+1:]
29 |         mean = mean.reshape(newshape)
30 |         std = std.reshape(newshape)
31 |     return mean, std, count
32 | 
33 | 
34 | def test_runningmeanstd():
35 |     import subprocess
36 |     subprocess.check_call(['mpirun', '-np', '3',
37 |         'python','-c',
38 |         'from baselines.common.mpi_moments import _helper_runningmeanstd; _helper_runningmeanstd()'])
39 | 
40 | def _helper_runningmeanstd():
41 |     comm = MPI.COMM_WORLD
42 |     np.random.seed(0)
43 |     for (triple,axis) in [
44 |         ((np.random.randn(3), np.random.randn(4), np.random.randn(5)),0),
45 |         ((np.random.randn(3,2), np.random.randn(4,2), np.random.randn(5,2)),0),
46 |         ((np.random.randn(2,3), np.random.randn(2,4), np.random.randn(2,4)),1),
47 |         ]:
48 | 
49 | 
50 |         x = np.concatenate(triple, axis=axis)
51 |         ms1 = [x.mean(axis=axis), x.std(axis=axis), x.shape[axis]]
52 | 
53 | 
54 |         ms2 = mpi_moments(triple[comm.Get_rank()],axis=axis)
55 | 
56 |         for (a1,a2) in zipsame(ms1, ms2):
57 |             print(a1, a2)
58 |             assert np.allclose(a1, a2)
59 |             print("ok!")
60 | 
61 | 


--------------------------------------------------------------------------------
/miniworld/baselines/common/mpi_util.py:
--------------------------------------------------------------------------------
  1 | from collections import defaultdict
  2 | from mpi4py import MPI
  3 | import os, numpy as np
  4 | import platform
  5 | import shutil
  6 | import subprocess
  7 | 
  8 | def sync_from_root(sess, variables, comm=None):
  9 |     """
 10 |     Send the root node's parameters to every worker.
 11 |     Arguments:
 12 |       sess: the TensorFlow session.
 13 |       variables: all parameter variables including optimizer's
 14 |     """
 15 |     if comm is None: comm = MPI.COMM_WORLD
 16 |     rank = comm.Get_rank()
 17 |     for var in variables:
 18 |         if rank == 0:
 19 |             comm.Bcast(sess.run(var))
 20 |         else:
 21 |             import tensorflow as tf
 22 |             returned_var = np.empty(var.shape, dtype='float32')
 23 |             comm.Bcast(returned_var)
 24 |             sess.run(tf.assign(var, returned_var))
 25 | 
 26 | def gpu_count():
 27 |     """
 28 |     Count the GPUs on this machine.
 29 |     """
 30 |     if shutil.which('nvidia-smi') is None:
 31 |         return 0
 32 |     output = subprocess.check_output(['nvidia-smi', '--query-gpu=gpu_name', '--format=csv'])
 33 |     return max(0, len(output.split(b'\n')) - 2)
 34 | 
 35 | def setup_mpi_gpus():
 36 |     """
 37 |     Set CUDA_VISIBLE_DEVICES using MPI.
 38 |     """
 39 |     num_gpus = gpu_count()
 40 |     if num_gpus == 0:
 41 |         return
 42 |     local_rank, _ = get_local_rank_size(MPI.COMM_WORLD)
 43 |     os.environ['CUDA_VISIBLE_DEVICES'] = str(local_rank % num_gpus)
 44 | 
 45 | def get_local_rank_size(comm):
 46 |     """
 47 |     Returns the rank of each process on its machine
 48 |     The processes on a given machine will be assigned ranks
 49 |         0, 1, 2, ..., N-1,
 50 |     where N is the number of processes on this machine.
 51 | 
 52 |     Useful if you want to assign one gpu per machine
 53 |     """
 54 |     this_node = platform.node()
 55 |     ranks_nodes = comm.allgather((comm.Get_rank(), this_node))
 56 |     node2rankssofar = defaultdict(int)
 57 |     local_rank = None
 58 |     for (rank, node) in ranks_nodes:
 59 |         if rank == comm.Get_rank():
 60 |             local_rank = node2rankssofar[node]
 61 |         node2rankssofar[node] += 1
 62 |     assert local_rank is not None
 63 |     return local_rank, node2rankssofar[this_node]
 64 | 
 65 | def share_file(comm, path):
 66 |     """
 67 |     Copies the file from rank 0 to all other ranks
 68 |     Puts it in the same place on all machines
 69 |     """
 70 |     localrank, _ = get_local_rank_size(comm)
 71 |     if comm.Get_rank() == 0:
 72 |         with open(path, 'rb') as fh:
 73 |             data = fh.read()
 74 |         comm.bcast(data)
 75 |     else:
 76 |         data = comm.bcast(None)
 77 |         if localrank == 0:
 78 |             os.makedirs(os.path.dirname(path), exist_ok=True)
 79 |             with open(path, 'wb') as fh:
 80 |                 fh.write(data)
 81 |     comm.Barrier()
 82 | 
 83 | def dict_gather(comm, d, op='mean', assert_all_have_data=True):
 84 |     if comm is None: return d
 85 |     alldicts = comm.allgather(d)
 86 |     size = comm.size
 87 |     k2li = defaultdict(list)
 88 |     for d in alldicts:
 89 |         for (k,v) in d.items():
 90 |             k2li[k].append(v)
 91 |     result = {}
 92 |     for (k,li) in k2li.items():
 93 |         if assert_all_have_data:
 94 |             assert len(li)==size, "only %i out of %i MPI workers have sent '%s'" % (len(li), size, k)
 95 |         if op=='mean':
 96 |             result[k] = np.mean(li, axis=0)
 97 |         elif op=='sum':
 98 |             result[k] = np.sum(li, axis=0)
 99 |         else:
100 |             assert 0, op
101 |     return result
102 | 


--------------------------------------------------------------------------------
/miniworld/baselines/common/runners.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from abc import ABC, abstractmethod
 3 | 
 4 | class AbstractEnvRunner(ABC):
 5 |     def __init__(self, *, env, model, nsteps):
 6 |         self.env = env
 7 |         self.model = model
 8 |         self.nenv = nenv = env.num_envs if hasattr(env, 'num_envs') else 1
 9 |         self.batch_ob_shape = (nenv*nsteps,) + env.observation_space.shape
10 |         self.obs = np.zeros((nenv,) + env.observation_space.shape, dtype=env.observation_space.dtype.name)
11 |         self.obs[:] = env.reset()
12 |         self.nsteps = nsteps
13 |         self.states = model.initial_state
14 |         self.dones = [False for _ in range(nenv)]
15 | 
16 |     @abstractmethod
17 |     def run(self):
18 |         raise NotImplementedError
19 | 
20 | 


--------------------------------------------------------------------------------
/miniworld/baselines/common/schedules.py:
--------------------------------------------------------------------------------
  1 | """This file is used for specifying various schedules that evolve over
  2 | time throughout the execution of the algorithm, such as:
  3 |  - learning rate for the optimizer
  4 |  - exploration epsilon for the epsilon greedy exploration strategy
  5 |  - beta parameter for beta parameter in prioritized replay
  6 | 
  7 | Each schedule has a function `value(t)` which returns the current value
  8 | of the parameter given the timestep t of the optimization procedure.
  9 | """
 10 | 
 11 | 
 12 | class Schedule(object):
 13 |     def value(self, t):
 14 |         """Value of the schedule at time t"""
 15 |         raise NotImplementedError()
 16 | 
 17 | 
 18 | class ConstantSchedule(object):
 19 |     def __init__(self, value):
 20 |         """Value remains constant over time.
 21 | 
 22 |         Parameters
 23 |         ----------
 24 |         value: float
 25 |             Constant value of the schedule
 26 |         """
 27 |         self._v = value
 28 | 
 29 |     def value(self, t):
 30 |         """See Schedule.value"""
 31 |         return self._v
 32 | 
 33 | 
 34 | def linear_interpolation(l, r, alpha):
 35 |     return l + alpha * (r - l)
 36 | 
 37 | 
 38 | class PiecewiseSchedule(object):
 39 |     def __init__(self, endpoints, interpolation=linear_interpolation, outside_value=None):
 40 |         """Piecewise schedule.
 41 | 
 42 |         endpoints: [(int, int)]
 43 |             list of pairs `(time, value)` meanining that schedule should output
 44 |             `value` when `t==time`. All the values for time must be sorted in
 45 |             an increasing order. When t is between two times, e.g. `(time_a, value_a)`
 46 |             and `(time_b, value_b)`, such that `time_a <= t < time_b` then value outputs
 47 |             `interpolation(value_a, value_b, alpha)` where alpha is a fraction of
 48 |             time passed between `time_a` and `time_b` for time `t`.
 49 |         interpolation: lambda float, float, float: float
 50 |             a function that takes value to the left and to the right of t according
 51 |             to the `endpoints`. Alpha is the fraction of distance from left endpoint to
 52 |             right endpoint that t has covered. See linear_interpolation for example.
 53 |         outside_value: float
 54 |             if the value is requested outside of all the intervals sepecified in
 55 |             `endpoints` this value is returned. If None then AssertionError is
 56 |             raised when outside value is requested.
 57 |         """
 58 |         idxes = [e[0] for e in endpoints]
 59 |         assert idxes == sorted(idxes)
 60 |         self._interpolation = interpolation
 61 |         self._outside_value = outside_value
 62 |         self._endpoints = endpoints
 63 | 
 64 |     def value(self, t):
 65 |         """See Schedule.value"""
 66 |         for (l_t, l), (r_t, r) in zip(self._endpoints[:-1], self._endpoints[1:]):
 67 |             if l_t <= t and t < r_t:
 68 |                 alpha = float(t - l_t) / (r_t - l_t)
 69 |                 return self._interpolation(l, r, alpha)
 70 | 
 71 |         # t does not belong to any of the pieces, so doom.
 72 |         assert self._outside_value is not None
 73 |         return self._outside_value
 74 | 
 75 | 
 76 | class LinearSchedule(object):
 77 |     def __init__(self, schedule_timesteps, final_p, initial_p=1.0):
 78 |         """Linear interpolation between initial_p and final_p over
 79 |         schedule_timesteps. After this many timesteps pass final_p is
 80 |         returned.
 81 | 
 82 |         Parameters
 83 |         ----------
 84 |         schedule_timesteps: int
 85 |             Number of timesteps for which to linearly anneal initial_p
 86 |             to final_p
 87 |         initial_p: float
 88 |             initial output value
 89 |         final_p: float
 90 |             final output value
 91 |         """
 92 |         self.schedule_timesteps = schedule_timesteps
 93 |         self.final_p = final_p
 94 |         self.initial_p = initial_p
 95 | 
 96 |     def value(self, t):
 97 |         """See Schedule.value"""
 98 |         fraction = min(float(t) / self.schedule_timesteps, 1.0)
 99 |         return self.initial_p + fraction * (self.final_p - self.initial_p)
100 | 


--------------------------------------------------------------------------------
/miniworld/baselines/common/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kkhetarpal/ioc/fb88d5b881e0b5c317020b23874495e614b3ddc7/miniworld/baselines/common/tests/__init__.py


--------------------------------------------------------------------------------
/miniworld/baselines/common/tests/envs/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kkhetarpal/ioc/fb88d5b881e0b5c317020b23874495e614b3ddc7/miniworld/baselines/common/tests/envs/__init__.py


--------------------------------------------------------------------------------
/miniworld/baselines/common/tests/envs/fixed_sequence_env.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from gym import Env
 3 | from gym.spaces import Discrete
 4 | 
 5 | 
 6 | class FixedSequenceEnv(Env):
 7 |     def __init__(
 8 |             self,
 9 |             n_actions=10,
10 |             seed=0,
11 |             episode_len=100
12 |     ):
13 |         self.np_random = np.random.RandomState()
14 |         self.np_random.seed(seed)
15 |         self.sequence = [self.np_random.randint(0, n_actions-1) for _ in range(episode_len)]
16 | 
17 |         self.action_space = Discrete(n_actions)
18 |         self.observation_space = Discrete(1)
19 | 
20 |         self.episode_len = episode_len
21 |         self.time = 0
22 |         self.reset()
23 | 
24 |     def reset(self):
25 |         self.time = 0
26 |         return 0
27 | 
28 |     def step(self, actions):
29 |         rew = self._get_reward(actions)
30 |         self._choose_next_state()
31 |         done = False
32 |         if self.episode_len and self.time >= self.episode_len:
33 |             rew = 0
34 |             done = True
35 | 
36 |         return 0, rew, done, {}
37 | 
38 |     def _choose_next_state(self):
39 |         self.time += 1
40 | 
41 |     def _get_reward(self, actions):
42 |         return 1 if actions == self.sequence[self.time] else 0
43 | 
44 | 
45 | 


--------------------------------------------------------------------------------
/miniworld/baselines/common/tests/envs/identity_env.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from abc import abstractmethod
 3 | from gym import Env
 4 | from gym.spaces import MultiDiscrete, Discrete, Box
 5 | 
 6 | 
 7 | class IdentityEnv(Env):
 8 |     def __init__(
 9 |             self,
10 |             episode_len=None
11 |     ):
12 | 
13 |         self.episode_len = episode_len
14 |         self.time = 0
15 |         self.reset()
16 | 
17 |     def reset(self):
18 |         self._choose_next_state()
19 |         self.time = 0
20 |         self.observation_space = self.action_space
21 | 
22 |         return self.state
23 | 
24 |     def step(self, actions):
25 |         rew = self._get_reward(actions)
26 |         self._choose_next_state()
27 |         done = False
28 |         if self.episode_len and self.time >= self.episode_len:
29 |             rew = 0
30 |             done = True
31 | 
32 |         return self.state, rew, done, {}
33 | 
34 |     def _choose_next_state(self):
35 |         self.state = self.action_space.sample()
36 |         self.time += 1
37 | 
38 |     @abstractmethod
39 |     def _get_reward(self, actions):
40 |         raise NotImplementedError
41 | 
42 | 
43 | class DiscreteIdentityEnv(IdentityEnv):
44 |     def __init__(
45 |             self,
46 |             dim,
47 |             episode_len=None,
48 |     ):
49 | 
50 |         self.action_space = Discrete(dim)
51 |         super().__init__(episode_len=episode_len)
52 | 
53 |     def _get_reward(self, actions):
54 |         return 1 if self.state == actions else 0
55 | 
56 | class MultiDiscreteIdentityEnv(IdentityEnv):
57 |     def __init__(
58 |             self,
59 |             dims,
60 |             episode_len=None,
61 |     ):
62 | 
63 |         self.action_space = MultiDiscrete(dims)
64 |         super().__init__(episode_len=episode_len)
65 | 
66 |     def _get_reward(self, actions):
67 |         return 1 if all(self.state == actions) else 0
68 | 
69 | 
70 | class BoxIdentityEnv(IdentityEnv):
71 |     def __init__(
72 |             self,
73 |             shape,
74 |             episode_len=None,
75 |     ):
76 | 
77 |         self.action_space = Box(low=-1.0, high=1.0, shape=shape)
78 |         super().__init__(episode_len=episode_len)
79 | 
80 |     def _get_reward(self, actions):
81 |         diff = actions - self.state
82 |         diff = diff[:]
83 |         return -0.5 * np.dot(diff, diff)
84 | 


--------------------------------------------------------------------------------
/miniworld/baselines/common/tests/envs/mnist_env.py:
--------------------------------------------------------------------------------
 1 | import os.path as osp
 2 | import numpy as np
 3 | import tempfile
 4 | from gym import Env
 5 | from gym.spaces import Discrete, Box
 6 | 
 7 | 
 8 | 
 9 | class MnistEnv(Env):
10 |     def __init__(
11 |             self,
12 |             seed=0,
13 |             episode_len=None,
14 |             no_images=None
15 |     ):
16 |         import filelock
17 |         from tensorflow.examples.tutorials.mnist import input_data
18 |         # we could use temporary directory for this with a context manager and
19 |         # TemporaryDirecotry, but then each test that uses mnist would re-download the data
20 |         # this way the data is not cleaned up, but we only download it once per machine
21 |         mnist_path = osp.join(tempfile.gettempdir(), 'MNIST_data')
22 |         with filelock.FileLock(mnist_path + '.lock'):
23 |            self.mnist = input_data.read_data_sets(mnist_path)
24 | 
25 |         self.np_random = np.random.RandomState()
26 |         self.np_random.seed(seed)
27 | 
28 |         self.observation_space = Box(low=0.0, high=1.0, shape=(28,28,1))
29 |         self.action_space = Discrete(10)
30 |         self.episode_len = episode_len
31 |         self.time = 0
32 |         self.no_images = no_images
33 | 
34 |         self.train_mode()
35 |         self.reset()
36 | 
37 |     def reset(self):
38 |         self._choose_next_state()
39 |         self.time = 0
40 | 
41 |         return self.state[0]
42 | 
43 |     def step(self, actions):
44 |         rew = self._get_reward(actions)
45 |         self._choose_next_state()
46 |         done = False
47 |         if self.episode_len and self.time >= self.episode_len:
48 |             rew = 0
49 |             done = True
50 | 
51 |         return self.state[0], rew, done, {}
52 | 
53 |     def train_mode(self):
54 |         self.dataset = self.mnist.train
55 | 
56 |     def test_mode(self):
57 |         self.dataset = self.mnist.test
58 | 
59 |     def _choose_next_state(self):
60 |         max_index = (self.no_images if self.no_images is not None else self.dataset.num_examples) - 1
61 |         index = self.np_random.randint(0, max_index)
62 |         image = self.dataset.images[index].reshape(28,28,1)*255
63 |         label = self.dataset.labels[index]
64 |         self.state = (image, label)
65 |         self.time += 1
66 | 
67 |     def _get_reward(self, actions):
68 |         return 1 if self.state[1] == actions else 0
69 | 
70 | 
71 | 


--------------------------------------------------------------------------------
/miniworld/baselines/common/tests/test_cartpole.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | import gym
 3 | 
 4 | from baselines.run import get_learn_function
 5 | from baselines.common.tests.util import reward_per_episode_test
 6 | 
 7 | common_kwargs = dict(
 8 |     total_timesteps=30000,
 9 |     network='mlp',
10 |     gamma=1.0,
11 |     seed=0,
12 | )
13 | 
14 | learn_kwargs = {
15 |     'a2c' : dict(nsteps=32, value_network='copy', lr=0.05),
16 |     'acer': dict(value_network='copy'),
17 |     'acktr': dict(nsteps=32, value_network='copy', is_async=False),
18 |     'deepq': dict(total_timesteps=20000),
19 |     'ppo2': dict(value_network='copy'),
20 |     'trpo_mpi': {}
21 | }
22 | 
23 | @pytest.mark.slow
24 | @pytest.mark.parametrize("alg", learn_kwargs.keys())
25 | def test_cartpole(alg):
26 |     '''
27 |     Test if the algorithm (with an mlp policy)
28 |     can learn to balance the cartpole
29 |     '''
30 | 
31 |     kwargs = common_kwargs.copy()
32 |     kwargs.update(learn_kwargs[alg])
33 | 
34 |     learn_fn = lambda e: get_learn_function(alg)(env=e, **kwargs)
35 |     def env_fn():
36 | 
37 |         env = gym.make('CartPole-v0')
38 |         env.seed(0)
39 |         return env
40 | 
41 |     reward_per_episode_test(env_fn, learn_fn, 100)
42 | 
43 | if __name__ == '__main__':
44 |     test_cartpole('acer')
45 | 


--------------------------------------------------------------------------------
/miniworld/baselines/common/tests/test_doc_examples.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | try:
 3 |     import mujoco_py
 4 |     _mujoco_present = True
 5 | except BaseException:
 6 |     mujoco_py = None
 7 |     _mujoco_present = False
 8 | 
 9 | 
10 | @pytest.mark.skipif(
11 |     not _mujoco_present,
12 |     reason='error loading mujoco - either mujoco / mujoco key not present, or LD_LIBRARY_PATH is not pointing to mujoco library'
13 | )
14 | def test_lstm_example():
15 |     import tensorflow as tf
16 |     from baselines.common import policies, models, cmd_util
17 |     from baselines.common.vec_env.dummy_vec_env import DummyVecEnv
18 | 
19 |     # create vectorized environment
20 |     venv = DummyVecEnv([lambda: cmd_util.make_mujoco_env('Reacher-v2', seed=0)])
21 | 
22 |     with tf.Session() as sess:
23 |         # build policy based on lstm network with 128 units
24 |         policy = policies.build_policy(venv, models.lstm(128))(nbatch=1, nsteps=1)
25 | 
26 |         # initialize tensorflow variables
27 |         sess.run(tf.global_variables_initializer())
28 | 
29 |         # prepare environment variables
30 |         ob = venv.reset()
31 |         state = policy.initial_state
32 |         done = [False]
33 |         step_counter = 0
34 | 
35 |         # run a single episode until the end (i.e. until done)
36 |         while True:
37 |             action, _, state, _ = policy.step(ob, S=state, M=done)
38 |             ob, reward, done, _ = venv.step(action)
39 |             step_counter += 1
40 |             if done:
41 |                 break
42 | 
43 | 
44 |         assert step_counter > 5
45 | 
46 | 
47 | 
48 | 
49 | 


--------------------------------------------------------------------------------
/miniworld/baselines/common/tests/test_env_after_learn.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | import gym
 3 | import tensorflow as tf
 4 | 
 5 | from baselines.common.vec_env.subproc_vec_env import SubprocVecEnv
 6 | from baselines.run import get_learn_function
 7 | from baselines.common.tf_util import make_session
 8 | 
 9 | algos = ['a2c', 'acer', 'acktr', 'deepq', 'ppo2', 'trpo_mpi']
10 | 
11 | @pytest.mark.parametrize('algo', algos)
12 | def test_env_after_learn(algo):
13 |     def make_env():
14 |         # acktr requires too much RAM, fails on travis
15 |         env = gym.make('CartPole-v1' if algo == 'acktr' else 'PongNoFrameskip-v4')
16 |         return env
17 | 
18 |     make_session(make_default=True, graph=tf.Graph())
19 |     env = SubprocVecEnv([make_env])
20 | 
21 |     learn = get_learn_function(algo)
22 | 
23 |     # Commenting out the following line resolves the issue, though crash happens at env.reset().
24 |     learn(network='mlp', env=env, total_timesteps=0, load_path=None, seed=None)
25 | 
26 |     env.reset()
27 |     env.close()
28 | 


--------------------------------------------------------------------------------
/miniworld/baselines/common/tests/test_fetchreach.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | import gym
 3 | 
 4 | from baselines.run import get_learn_function
 5 | from baselines.common.tests.util import reward_per_episode_test
 6 | 
 7 | pytest.importorskip('mujoco_py')
 8 | 
 9 | common_kwargs = dict(
10 |     network='mlp',
11 |     seed=0,
12 | )
13 | 
14 | learn_kwargs = {
15 |     'her': dict(total_timesteps=2000)
16 | }
17 | 
18 | @pytest.mark.slow
19 | @pytest.mark.parametrize("alg", learn_kwargs.keys())
20 | def test_fetchreach(alg):
21 |     '''
22 |     Test if the algorithm (with an mlp policy)
23 |     can learn the FetchReach task
24 |     '''
25 | 
26 |     kwargs = common_kwargs.copy()
27 |     kwargs.update(learn_kwargs[alg])
28 | 
29 |     learn_fn = lambda e: get_learn_function(alg)(env=e, **kwargs)
30 |     def env_fn():
31 | 
32 |         env = gym.make('FetchReach-v1')
33 |         env.seed(0)
34 |         return env
35 | 
36 |     reward_per_episode_test(env_fn, learn_fn, -15)
37 | 
38 | if __name__ == '__main__':
39 |     test_fetchreach('her')
40 | 


--------------------------------------------------------------------------------
/miniworld/baselines/common/tests/test_fixed_sequence.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | from baselines.common.tests.envs.fixed_sequence_env import FixedSequenceEnv
 3 | 
 4 | from baselines.common.tests.util import simple_test
 5 | from baselines.run import get_learn_function
 6 | 
 7 | common_kwargs = dict(
 8 |     seed=0,
 9 |     total_timesteps=50000,
10 | )
11 | 
12 | learn_kwargs = {
13 |     'a2c': {},
14 |     'ppo2': dict(nsteps=10, ent_coef=0.0, nminibatches=1),
15 |     # TODO enable sequential models for trpo_mpi (proper handling of nbatch and nsteps)
16 |     # github issue: https://github.com/openai/baselines/issues/188
17 |     # 'trpo_mpi': lambda e, p: trpo_mpi.learn(policy_fn=p(env=e), env=e, max_timesteps=30000, timesteps_per_batch=100, cg_iters=10, gamma=0.9, lam=1.0, max_kl=0.001)
18 | }
19 | 
20 | 
21 | alg_list = learn_kwargs.keys()
22 | rnn_list = ['lstm']
23 | 
24 | @pytest.mark.slow
25 | @pytest.mark.parametrize("alg", alg_list)
26 | @pytest.mark.parametrize("rnn", rnn_list)
27 | def test_fixed_sequence(alg, rnn):
28 |     '''
29 |     Test if the algorithm (with a given policy)
30 |     can learn an identity transformation (i.e. return observation as an action)
31 |     '''
32 | 
33 |     kwargs = learn_kwargs[alg]
34 |     kwargs.update(common_kwargs)
35 | 
36 |     episode_len = 5
37 |     env_fn = lambda: FixedSequenceEnv(10, episode_len=episode_len)
38 |     learn = lambda e: get_learn_function(alg)(
39 |         env=e,
40 |         network=rnn,
41 |         **kwargs
42 |     )
43 | 
44 |     simple_test(env_fn, learn, 0.7)
45 | 
46 | 
47 | if __name__ == '__main__':
48 |     test_fixed_sequence('ppo2', 'lstm')
49 | 
50 | 
51 | 
52 | 


--------------------------------------------------------------------------------
/miniworld/baselines/common/tests/test_identity.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | from baselines.common.tests.envs.identity_env import DiscreteIdentityEnv, BoxIdentityEnv, MultiDiscreteIdentityEnv
 3 | from baselines.run import get_learn_function
 4 | from baselines.common.tests.util import simple_test
 5 | 
 6 | common_kwargs = dict(
 7 |     total_timesteps=30000,
 8 |     network='mlp',
 9 |     gamma=0.9,
10 |     seed=0,
11 | )
12 | 
13 | learn_kwargs = {
14 |     'a2c' : {},
15 |     'acktr': {},
16 |     'deepq': {},
17 |     'ddpg': dict(layer_norm=True),
18 |     'ppo2': dict(lr=1e-3, nsteps=64, ent_coef=0.0),
19 |     'trpo_mpi': dict(timesteps_per_batch=100, cg_iters=10, gamma=0.9, lam=1.0, max_kl=0.01)
20 | }
21 | 
22 | 
23 | algos_disc = ['a2c', 'acktr', 'deepq', 'ppo2', 'trpo_mpi']
24 | algos_multidisc = ['a2c', 'acktr', 'ppo2', 'trpo_mpi']
25 | algos_cont = ['a2c', 'acktr', 'ddpg',  'ppo2', 'trpo_mpi']
26 | 
27 | @pytest.mark.slow
28 | @pytest.mark.parametrize("alg", algos_disc)
29 | def test_discrete_identity(alg):
30 |     '''
31 |     Test if the algorithm (with an mlp policy)
32 |     can learn an identity transformation (i.e. return observation as an action)
33 |     '''
34 | 
35 |     kwargs = learn_kwargs[alg]
36 |     kwargs.update(common_kwargs)
37 | 
38 |     learn_fn = lambda e: get_learn_function(alg)(env=e, **kwargs)
39 |     env_fn = lambda: DiscreteIdentityEnv(10, episode_len=100)
40 |     simple_test(env_fn, learn_fn, 0.9)
41 | 
42 | @pytest.mark.slow
43 | @pytest.mark.parametrize("alg", algos_multidisc)
44 | def test_multidiscrete_identity(alg):
45 |     '''
46 |     Test if the algorithm (with an mlp policy)
47 |     can learn an identity transformation (i.e. return observation as an action)
48 |     '''
49 | 
50 |     kwargs = learn_kwargs[alg]
51 |     kwargs.update(common_kwargs)
52 | 
53 |     learn_fn = lambda e: get_learn_function(alg)(env=e, **kwargs)
54 |     env_fn = lambda: MultiDiscreteIdentityEnv((3,3), episode_len=100)
55 |     simple_test(env_fn, learn_fn, 0.9)
56 | 
57 | @pytest.mark.slow
58 | @pytest.mark.parametrize("alg", algos_cont)
59 | def test_continuous_identity(alg):
60 |     '''
61 |     Test if the algorithm (with an mlp policy)
62 |     can learn an identity transformation (i.e. return observation as an action)
63 |     to a required precision
64 |     '''
65 | 
66 |     kwargs = learn_kwargs[alg]
67 |     kwargs.update(common_kwargs)
68 |     learn_fn = lambda e: get_learn_function(alg)(env=e, **kwargs)
69 | 
70 |     env_fn = lambda: BoxIdentityEnv((1,), episode_len=100)
71 |     simple_test(env_fn, learn_fn, -0.1)
72 | 
73 | if __name__ == '__main__':
74 |     test_multidiscrete_identity('acktr')
75 | 
76 | 


--------------------------------------------------------------------------------
/miniworld/baselines/common/tests/test_mnist.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | 
 3 | # from baselines.acer import acer_simple as acer
 4 | from baselines.common.tests.envs.mnist_env import MnistEnv
 5 | from baselines.common.tests.util import simple_test
 6 | from baselines.run import get_learn_function
 7 | 
 8 | 
 9 | # TODO investigate a2c and ppo2 failures - is it due to bad hyperparameters for this problem?
10 | # GitHub issue https://github.com/openai/baselines/issues/189
11 | common_kwargs = {
12 |     'seed': 0,
13 |     'network':'cnn',
14 |     'gamma':0.9,
15 |     'pad':'SAME'
16 | }
17 | 
18 | learn_args = {
19 |     'a2c': dict(total_timesteps=50000),
20 |     'acer': dict(total_timesteps=20000),
21 |     'deepq': dict(total_timesteps=5000),
22 |     'acktr': dict(total_timesteps=30000),
23 |     'ppo2': dict(total_timesteps=50000, lr=1e-3, nsteps=128, ent_coef=0.0),
24 |     'trpo_mpi': dict(total_timesteps=80000, timesteps_per_batch=100, cg_iters=10, lam=1.0, max_kl=0.001)
25 | }
26 | 
27 | 
28 | #tests pass, but are too slow on travis. Same algorithms are covered
29 | # by other tests with less compute-hungry nn's and by benchmarks
30 | @pytest.mark.skip
31 | @pytest.mark.slow
32 | @pytest.mark.parametrize("alg", learn_args.keys())
33 | def test_mnist(alg):
34 |     '''
35 |     Test if the algorithm can learn to classify MNIST digits.
36 |     Uses CNN policy.
37 |     '''
38 | 
39 |     learn_kwargs = learn_args[alg]
40 |     learn_kwargs.update(common_kwargs)
41 | 
42 |     learn = get_learn_function(alg)
43 |     learn_fn = lambda e: learn(env=e, **learn_kwargs)
44 |     env_fn = lambda: MnistEnv(seed=0, episode_len=100)
45 | 
46 |     simple_test(env_fn, learn_fn, 0.6)
47 | 
48 | if __name__ == '__main__':
49 |     test_mnist('acer')
50 | 


--------------------------------------------------------------------------------
/miniworld/baselines/common/tests/test_schedules.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | from baselines.common.schedules import ConstantSchedule, PiecewiseSchedule
 4 | 
 5 | 
 6 | def test_piecewise_schedule():
 7 |     ps = PiecewiseSchedule([(-5, 100), (5, 200), (10, 50), (100, 50), (200, -50)], outside_value=500)
 8 | 
 9 |     assert np.isclose(ps.value(-10), 500)
10 |     assert np.isclose(ps.value(0), 150)
11 |     assert np.isclose(ps.value(5), 200)
12 |     assert np.isclose(ps.value(9), 80)
13 |     assert np.isclose(ps.value(50), 50)
14 |     assert np.isclose(ps.value(80), 50)
15 |     assert np.isclose(ps.value(150), 0)
16 |     assert np.isclose(ps.value(175), -25)
17 |     assert np.isclose(ps.value(201), 500)
18 |     assert np.isclose(ps.value(500), 500)
19 | 
20 |     assert np.isclose(ps.value(200 - 1e-10), -50)
21 | 
22 | 
23 | def test_constant_schedule():
24 |     cs = ConstantSchedule(5)
25 |     for i in range(-100, 100):
26 |         assert np.isclose(cs.value(i), 5)
27 | 


--------------------------------------------------------------------------------
/miniworld/baselines/common/tests/test_segment_tree.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | 
  3 | from baselines.common.segment_tree import SumSegmentTree, MinSegmentTree
  4 | 
  5 | 
  6 | def test_tree_set():
  7 |     tree = SumSegmentTree(4)
  8 | 
  9 |     tree[2] = 1.0
 10 |     tree[3] = 3.0
 11 | 
 12 |     assert np.isclose(tree.sum(), 4.0)
 13 |     assert np.isclose(tree.sum(0, 2), 0.0)
 14 |     assert np.isclose(tree.sum(0, 3), 1.0)
 15 |     assert np.isclose(tree.sum(2, 3), 1.0)
 16 |     assert np.isclose(tree.sum(2, -1), 1.0)
 17 |     assert np.isclose(tree.sum(2, 4), 4.0)
 18 | 
 19 | 
 20 | def test_tree_set_overlap():
 21 |     tree = SumSegmentTree(4)
 22 | 
 23 |     tree[2] = 1.0
 24 |     tree[2] = 3.0
 25 | 
 26 |     assert np.isclose(tree.sum(), 3.0)
 27 |     assert np.isclose(tree.sum(2, 3), 3.0)
 28 |     assert np.isclose(tree.sum(2, -1), 3.0)
 29 |     assert np.isclose(tree.sum(2, 4), 3.0)
 30 |     assert np.isclose(tree.sum(1, 2), 0.0)
 31 | 
 32 | 
 33 | def test_prefixsum_idx():
 34 |     tree = SumSegmentTree(4)
 35 | 
 36 |     tree[2] = 1.0
 37 |     tree[3] = 3.0
 38 | 
 39 |     assert tree.find_prefixsum_idx(0.0) == 2
 40 |     assert tree.find_prefixsum_idx(0.5) == 2
 41 |     assert tree.find_prefixsum_idx(0.99) == 2
 42 |     assert tree.find_prefixsum_idx(1.01) == 3
 43 |     assert tree.find_prefixsum_idx(3.00) == 3
 44 |     assert tree.find_prefixsum_idx(4.00) == 3
 45 | 
 46 | 
 47 | def test_prefixsum_idx2():
 48 |     tree = SumSegmentTree(4)
 49 | 
 50 |     tree[0] = 0.5
 51 |     tree[1] = 1.0
 52 |     tree[2] = 1.0
 53 |     tree[3] = 3.0
 54 | 
 55 |     assert tree.find_prefixsum_idx(0.00) == 0
 56 |     assert tree.find_prefixsum_idx(0.55) == 1
 57 |     assert tree.find_prefixsum_idx(0.99) == 1
 58 |     assert tree.find_prefixsum_idx(1.51) == 2
 59 |     assert tree.find_prefixsum_idx(3.00) == 3
 60 |     assert tree.find_prefixsum_idx(5.50) == 3
 61 | 
 62 | 
 63 | def test_max_interval_tree():
 64 |     tree = MinSegmentTree(4)
 65 | 
 66 |     tree[0] = 1.0
 67 |     tree[2] = 0.5
 68 |     tree[3] = 3.0
 69 | 
 70 |     assert np.isclose(tree.min(), 0.5)
 71 |     assert np.isclose(tree.min(0, 2), 1.0)
 72 |     assert np.isclose(tree.min(0, 3), 0.5)
 73 |     assert np.isclose(tree.min(0, -1), 0.5)
 74 |     assert np.isclose(tree.min(2, 4), 0.5)
 75 |     assert np.isclose(tree.min(3, 4), 3.0)
 76 | 
 77 |     tree[2] = 0.7
 78 | 
 79 |     assert np.isclose(tree.min(), 0.7)
 80 |     assert np.isclose(tree.min(0, 2), 1.0)
 81 |     assert np.isclose(tree.min(0, 3), 0.7)
 82 |     assert np.isclose(tree.min(0, -1), 0.7)
 83 |     assert np.isclose(tree.min(2, 4), 0.7)
 84 |     assert np.isclose(tree.min(3, 4), 3.0)
 85 | 
 86 |     tree[2] = 4.0
 87 | 
 88 |     assert np.isclose(tree.min(), 1.0)
 89 |     assert np.isclose(tree.min(0, 2), 1.0)
 90 |     assert np.isclose(tree.min(0, 3), 1.0)
 91 |     assert np.isclose(tree.min(0, -1), 1.0)
 92 |     assert np.isclose(tree.min(2, 4), 3.0)
 93 |     assert np.isclose(tree.min(2, 3), 4.0)
 94 |     assert np.isclose(tree.min(2, -1), 4.0)
 95 |     assert np.isclose(tree.min(3, 4), 3.0)
 96 | 
 97 | 
 98 | if __name__ == '__main__':
 99 |     test_tree_set()
100 |     test_tree_set_overlap()
101 |     test_prefixsum_idx()
102 |     test_prefixsum_idx2()
103 |     test_max_interval_tree()
104 | 


--------------------------------------------------------------------------------
/miniworld/baselines/common/tests/test_tf_util.py:
--------------------------------------------------------------------------------
 1 | # tests for tf_util
 2 | import tensorflow as tf
 3 | from baselines.common.tf_util import (
 4 |     function,
 5 |     initialize,
 6 |     single_threaded_session
 7 | )
 8 | 
 9 | 
10 | def test_function():
11 |     with tf.Graph().as_default():
12 |         x = tf.placeholder(tf.int32, (), name="x")
13 |         y = tf.placeholder(tf.int32, (), name="y")
14 |         z = 3 * x + 2 * y
15 |         lin = function([x, y], z, givens={y: 0})
16 | 
17 |         with single_threaded_session():
18 |             initialize()
19 | 
20 |             assert lin(2) == 6
21 |             assert lin(2, 2) == 10
22 | 
23 | 
24 | def test_multikwargs():
25 |     with tf.Graph().as_default():
26 |         x = tf.placeholder(tf.int32, (), name="x")
27 |         with tf.variable_scope("other"):
28 |             x2 = tf.placeholder(tf.int32, (), name="x")
29 |         z = 3 * x + 2 * x2
30 | 
31 |         lin = function([x, x2], z, givens={x2: 0})
32 |         with single_threaded_session():
33 |             initialize()
34 |             assert lin(2) == 6
35 |             assert lin(2, 2) == 10
36 | 
37 | 
38 | if __name__ == '__main__':
39 |     test_function()
40 |     test_multikwargs()
41 | 


--------------------------------------------------------------------------------
/miniworld/baselines/common/tests/util.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | import numpy as np
 3 | from gym.spaces import np_random
 4 | from baselines.common.vec_env.dummy_vec_env import DummyVecEnv
 5 | 
 6 | N_TRIALS = 10000
 7 | N_EPISODES = 100
 8 | 
 9 | def simple_test(env_fn, learn_fn, min_reward_fraction, n_trials=N_TRIALS):
10 |     np.random.seed(0)
11 |     np_random.seed(0)
12 | 
13 |     env = DummyVecEnv([env_fn])
14 | 
15 | 
16 |     with tf.Graph().as_default(), tf.Session(config=tf.ConfigProto(allow_soft_placement=True)).as_default():
17 |         tf.set_random_seed(0)
18 | 
19 |         model = learn_fn(env)
20 | 
21 |         sum_rew = 0
22 |         done = True
23 | 
24 |         for i in range(n_trials):
25 |             if done:
26 |                 obs = env.reset()
27 |                 state = model.initial_state
28 | 
29 |             if state is not None:
30 |                 a, v, state, _ = model.step(obs, S=state, M=[False])
31 |             else:
32 |                 a, v, _, _ = model.step(obs)
33 | 
34 |             obs, rew, done, _ = env.step(a)
35 |             sum_rew += float(rew)
36 | 
37 |         print("Reward in {} trials is {}".format(n_trials, sum_rew))
38 |         assert sum_rew > min_reward_fraction * n_trials, \
39 |             'sum of rewards {} is less than {} of the total number of trials {}'.format(sum_rew, min_reward_fraction, n_trials)
40 | 
41 | 
42 | 
43 | def reward_per_episode_test(env_fn, learn_fn, min_avg_reward, n_trials=N_EPISODES):
44 |     env = DummyVecEnv([env_fn])
45 | 
46 |     with tf.Graph().as_default(), tf.Session(config=tf.ConfigProto(allow_soft_placement=True)).as_default():
47 |         model = learn_fn(env)
48 | 
49 |         N_TRIALS = 100
50 | 
51 |         observations, actions, rewards = rollout(env, model, N_TRIALS)
52 |         rewards = [sum(r) for r in rewards]
53 | 
54 |         avg_rew = sum(rewards) / N_TRIALS
55 |         print("Average reward in {} episodes is {}".format(n_trials, avg_rew))
56 |         assert avg_rew > min_avg_reward, \
57 |             'average reward in {} episodes ({}) is less than {}'.format(n_trials, avg_rew, min_avg_reward)
58 | 
59 | def rollout(env, model, n_trials):
60 |     rewards = []
61 |     actions = []
62 |     observations = []
63 | 
64 |     for i in range(n_trials):
65 |         obs = env.reset()
66 |         state = model.initial_state if hasattr(model, 'initial_state') else None
67 |         episode_rew = []
68 |         episode_actions = []
69 |         episode_obs = []
70 | 
71 |         while True:
72 |             if state is not None:
73 |                 a, v, state, _ = model.step(obs, S=state, M=[False])
74 |             else:
75 |                 a,v, _, _ = model.step(obs)
76 | 
77 |             obs, rew, done, _ = env.step(a)
78 | 
79 |             episode_rew.append(rew)
80 |             episode_actions.append(a)
81 |             episode_obs.append(obs)
82 | 
83 |             if done:
84 |                 break
85 | 
86 |         rewards.append(episode_rew)
87 |         actions.append(episode_actions)
88 |         observations.append(episode_obs)
89 | 
90 |     return observations, actions, rewards
91 | 
92 | 


--------------------------------------------------------------------------------
/miniworld/baselines/common/tile_images.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | def tile_images(img_nhwc):
 4 |     """
 5 |     Tile N images into one big PxQ image
 6 |     (P,Q) are chosen to be as close as possible, and if N
 7 |     is square, then P=Q.
 8 | 
 9 |     input: img_nhwc, list or array of images, ndim=4 once turned into array
10 |         n = batch index, h = height, w = width, c = channel
11 |     returns:
12 |         bigim_HWc, ndarray with ndim=3
13 |     """
14 |     img_nhwc = np.asarray(img_nhwc)
15 |     N, h, w, c = img_nhwc.shape
16 |     H = int(np.ceil(np.sqrt(N)))
17 |     W = int(np.ceil(float(N)/H))
18 |     img_nhwc = np.array(list(img_nhwc) + [img_nhwc[0]*0 for _ in range(N, H*W)])
19 |     img_HWhwc = img_nhwc.reshape(H, W, h, w, c)
20 |     img_HhWwc = img_HWhwc.transpose(0, 2, 1, 3, 4)
21 |     img_Hh_Ww_c = img_HhWwc.reshape(H*h, W*w, c)
22 |     return img_Hh_Ww_c
23 | 
24 | 


--------------------------------------------------------------------------------
/miniworld/baselines/common/vec_env/dummy_vec_env.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from gym import spaces
 3 | from . import VecEnv
 4 | from .util import copy_obs_dict, dict_to_obs, obs_space_info
 5 | 
 6 | class DummyVecEnv(VecEnv):
 7 |     """
 8 |     VecEnv that does runs multiple environments sequentially, that is,
 9 |     the step and reset commands are send to one environment at a time.
10 |     Useful when debugging and when num_env == 1 (in the latter case,
11 |     avoids communication overhead)
12 |     """
13 |     def __init__(self, env_fns):
14 |         """
15 |         Arguments:
16 | 
17 |         env_fns: iterable of callables      functions that build environments
18 |         """
19 |         self.envs = [fn() for fn in env_fns]
20 |         env = self.envs[0]
21 |         VecEnv.__init__(self, len(env_fns), env.observation_space, env.action_space)
22 |         obs_space = env.observation_space
23 |         self.keys, shapes, dtypes = obs_space_info(obs_space)
24 | 
25 |         self.buf_obs = { k: np.zeros((self.num_envs,) + tuple(shapes[k]), dtype=dtypes[k]) for k in self.keys }
26 |         self.buf_dones = np.zeros((self.num_envs,), dtype=np.bool)
27 |         self.buf_rews  = np.zeros((self.num_envs,), dtype=np.float32)
28 |         self.buf_infos = [{} for _ in range(self.num_envs)]
29 |         self.actions = None
30 |         self.specs = [e.spec for e in self.envs]
31 | 
32 |     def step_async(self, actions):
33 |         listify = True
34 |         try:
35 |             if len(actions) == self.num_envs:
36 |                 listify = False
37 |         except TypeError:
38 |             pass
39 | 
40 |         if not listify:
41 |             self.actions = actions
42 |         else:
43 |             assert self.num_envs == 1, "actions {} is either not a list or has a wrong size - cannot match to {} environments".format(actions, self.num_envs)
44 |             self.actions = [actions]
45 | 
46 |     def step_wait(self):
47 |         for e in range(self.num_envs):
48 |             action = self.actions[e]
49 |             if isinstance(self.envs[e].action_space, spaces.Discrete):
50 |                 action = int(action)
51 | 
52 |             obs, self.buf_rews[e], self.buf_dones[e], self.buf_infos[e] = self.envs[e].step(action)
53 |             if self.buf_dones[e]:
54 |                 obs = self.envs[e].reset()
55 |             self._save_obs(e, obs)
56 |         return (self._obs_from_buf(), np.copy(self.buf_rews), np.copy(self.buf_dones),
57 |                 self.buf_infos.copy())
58 | 
59 |     def reset(self):
60 |         for e in range(self.num_envs):
61 |             obs = self.envs[e].reset()
62 |             self._save_obs(e, obs)
63 |         return self._obs_from_buf()
64 | 
65 |     def _save_obs(self, e, obs):
66 |         for k in self.keys:
67 |             if k is None:
68 |                 self.buf_obs[k][e] = obs
69 |             else:
70 |                 self.buf_obs[k][e] = obs[k]
71 | 
72 |     def _obs_from_buf(self):
73 |         return dict_to_obs(copy_obs_dict(self.buf_obs))
74 | 
75 |     def get_images(self):
76 |         return [env.render(mode='rgb_array') for env in self.envs]
77 | 
78 |     def render(self, mode='human'):
79 |         if self.num_envs == 1:
80 |             return self.envs[0].render(mode=mode)
81 |         else:
82 |             return super().render(mode=mode)
83 | 


--------------------------------------------------------------------------------
/miniworld/baselines/common/vec_env/test_vec_env.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Tests for asynchronous vectorized environments.
  3 | """
  4 | 
  5 | import gym
  6 | import numpy as np
  7 | import pytest
  8 | from .dummy_vec_env import DummyVecEnv
  9 | from .shmem_vec_env import ShmemVecEnv
 10 | from .subproc_vec_env import SubprocVecEnv
 11 | 
 12 | 
 13 | def assert_envs_equal(env1, env2, num_steps):
 14 |     """
 15 |     Compare two environments over num_steps steps and make sure
 16 |     that the observations produced by each are the same when given
 17 |     the same actions.
 18 |     """
 19 |     assert env1.num_envs == env2.num_envs
 20 |     assert env1.action_space.shape == env2.action_space.shape
 21 |     assert env1.action_space.dtype == env2.action_space.dtype
 22 |     joint_shape = (env1.num_envs,) + env1.action_space.shape
 23 | 
 24 |     try:
 25 |         obs1, obs2 = env1.reset(), env2.reset()
 26 |         assert np.array(obs1).shape == np.array(obs2).shape
 27 |         assert np.array(obs1).shape == joint_shape
 28 |         assert np.allclose(obs1, obs2)
 29 |         np.random.seed(1337)
 30 |         for _ in range(num_steps):
 31 |             actions = np.array(np.random.randint(0, 0x100, size=joint_shape),
 32 |                                dtype=env1.action_space.dtype)
 33 |             for env in [env1, env2]:
 34 |                 env.step_async(actions)
 35 |             outs1 = env1.step_wait()
 36 |             outs2 = env2.step_wait()
 37 |             for out1, out2 in zip(outs1[:3], outs2[:3]):
 38 |                 assert np.array(out1).shape == np.array(out2).shape
 39 |                 assert np.allclose(out1, out2)
 40 |             assert list(outs1[3]) == list(outs2[3])
 41 |     finally:
 42 |         env1.close()
 43 |         env2.close()
 44 | 
 45 | 
 46 | @pytest.mark.parametrize('klass', (ShmemVecEnv, SubprocVecEnv))
 47 | @pytest.mark.parametrize('dtype', ('uint8', 'float32'))
 48 | def test_vec_env(klass, dtype):  # pylint: disable=R0914
 49 |     """
 50 |     Test that a vectorized environment is equivalent to
 51 |     DummyVecEnv, since DummyVecEnv is less likely to be
 52 |     error prone.
 53 |     """
 54 |     num_envs = 3
 55 |     num_steps = 100
 56 |     shape = (3, 8)
 57 | 
 58 |     def make_fn(seed):
 59 |         """
 60 |         Get an environment constructor with a seed.
 61 |         """
 62 |         return lambda: SimpleEnv(seed, shape, dtype)
 63 |     fns = [make_fn(i) for i in range(num_envs)]
 64 |     env1 = DummyVecEnv(fns)
 65 |     env2 = klass(fns)
 66 |     assert_envs_equal(env1, env2, num_steps=num_steps)
 67 | 
 68 | 
 69 | class SimpleEnv(gym.Env):
 70 |     """
 71 |     An environment with a pre-determined observation space
 72 |     and RNG seed.
 73 |     """
 74 | 
 75 |     def __init__(self, seed, shape, dtype):
 76 |         np.random.seed(seed)
 77 |         self._dtype = dtype
 78 |         self._start_obs = np.array(np.random.randint(0, 0x100, size=shape),
 79 |                                    dtype=dtype)
 80 |         self._max_steps = seed + 1
 81 |         self._cur_obs = None
 82 |         self._cur_step = 0
 83 |         # this is 0xFF instead of 0x100 because the Box space includes
 84 |         # the high end, while randint does not
 85 |         self.action_space = gym.spaces.Box(low=0, high=0xFF, shape=shape, dtype=dtype)
 86 |         self.observation_space = self.action_space
 87 | 
 88 |     def step(self, action):
 89 |         self._cur_obs += np.array(action, dtype=self._dtype)
 90 |         self._cur_step += 1
 91 |         done = self._cur_step >= self._max_steps
 92 |         reward = self._cur_step / self._max_steps
 93 |         return self._cur_obs, reward, done, {'foo': 'bar' + str(reward)}
 94 | 
 95 |     def reset(self):
 96 |         self._cur_obs = self._start_obs
 97 |         self._cur_step = 0
 98 |         return self._cur_obs
 99 | 
100 |     def render(self, mode=None):
101 |         raise NotImplementedError
102 | 


--------------------------------------------------------------------------------
/miniworld/baselines/common/vec_env/test_video_recorder.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Tests for asynchronous vectorized environments.
 3 | """
 4 | 
 5 | import gym
 6 | import pytest
 7 | import os
 8 | import glob
 9 | import tempfile
10 | 
11 | from .dummy_vec_env import DummyVecEnv
12 | from .shmem_vec_env import ShmemVecEnv
13 | from .subproc_vec_env import SubprocVecEnv
14 | from .vec_video_recorder import VecVideoRecorder
15 | 
16 | @pytest.mark.parametrize('klass', (DummyVecEnv, ShmemVecEnv, SubprocVecEnv))
17 | @pytest.mark.parametrize('num_envs', (1, 4))
18 | @pytest.mark.parametrize('video_length', (10, 100))
19 | @pytest.mark.parametrize('video_interval', (1, 50))
20 | def test_video_recorder(klass, num_envs, video_length, video_interval):
21 |     """
22 |     Wrap an existing VecEnv with VevVideoRecorder,
23 |     Make (video_interval + video_length + 1) steps,
24 |     then check that the file is present
25 |     """
26 | 
27 |     def make_fn():
28 |         env = gym.make('PongNoFrameskip-v4')
29 |         return env
30 |     fns = [make_fn for _ in range(num_envs)]
31 |     env = klass(fns)
32 | 
33 |     with tempfile.TemporaryDirectory() as video_path:
34 |         env = VecVideoRecorder(env, video_path, record_video_trigger=lambda x: x % video_interval == 0, video_length=video_length)
35 | 
36 |         env.reset()
37 |         for _ in range(video_interval + video_length + 1):
38 |             env.step([0] * num_envs)
39 |         env.close()
40 | 
41 | 
42 |         recorded_video = glob.glob(os.path.join(video_path, "*.mp4"))
43 | 
44 |         # first and second step
45 |         assert len(recorded_video) == 2
46 |         # Files are not empty
47 |         assert all(os.stat(p).st_size != 0 for p in recorded_video)
48 | 
49 | 
50 | 


--------------------------------------------------------------------------------
/miniworld/baselines/common/vec_env/util.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Helpers for dealing with vectorized environments.
 3 | """
 4 | 
 5 | from collections import OrderedDict
 6 | 
 7 | import gym
 8 | import numpy as np
 9 | 
10 | 
11 | def copy_obs_dict(obs):
12 |     """
13 |     Deep-copy an observation dict.
14 |     """
15 |     return {k: np.copy(v) for k, v in obs.items()}
16 | 
17 | 
18 | def dict_to_obs(obs_dict):
19 |     """
20 |     Convert an observation dict into a raw array if the
21 |     original observation space was not a Dict space.
22 |     """
23 |     if set(obs_dict.keys()) == {None}:
24 |         return obs_dict[None]
25 |     return obs_dict
26 | 
27 | 
28 | def obs_space_info(obs_space):
29 |     """
30 |     Get dict-structured information about a gym.Space.
31 | 
32 |     Returns:
33 |       A tuple (keys, shapes, dtypes):
34 |         keys: a list of dict keys.
35 |         shapes: a dict mapping keys to shapes.
36 |         dtypes: a dict mapping keys to dtypes.
37 |     """
38 |     if isinstance(obs_space, gym.spaces.Dict):
39 |         assert isinstance(obs_space.spaces, OrderedDict)
40 |         subspaces = obs_space.spaces
41 |     else:
42 |         subspaces = {None: obs_space}
43 |     keys = []
44 |     shapes = {}
45 |     dtypes = {}
46 |     for key, box in subspaces.items():
47 |         keys.append(key)
48 |         shapes[key] = box.shape
49 |         dtypes[key] = box.dtype
50 |     return keys, shapes, dtypes
51 | 
52 | 
53 | def obs_to_dict(obs):
54 |     """
55 |     Convert an observation into a dict.
56 |     """
57 |     if isinstance(obs, dict):
58 |         return obs
59 |     return {None: obs}
60 | 


--------------------------------------------------------------------------------
/miniworld/baselines/common/vec_env/vec_frame_stack.py:
--------------------------------------------------------------------------------
 1 | from . import VecEnvWrapper
 2 | import numpy as np
 3 | from gym import spaces
 4 | 
 5 | 
 6 | class VecFrameStack(VecEnvWrapper):
 7 |     def __init__(self, venv, nstack):
 8 |         self.venv = venv
 9 |         self.nstack = nstack
10 |         wos = venv.observation_space  # wrapped ob space
11 |         low = np.repeat(wos.low, self.nstack, axis=-1)
12 |         high = np.repeat(wos.high, self.nstack, axis=-1)
13 |         self.stackedobs = np.zeros((venv.num_envs,) + low.shape, low.dtype)
14 |         observation_space = spaces.Box(low=low, high=high, dtype=venv.observation_space.dtype)
15 |         VecEnvWrapper.__init__(self, venv, observation_space=observation_space)
16 | 
17 |     def step_wait(self):
18 |         obs, rews, news, infos = self.venv.step_wait()
19 |         self.stackedobs = np.roll(self.stackedobs, shift=-1, axis=-1)
20 |         for (i, new) in enumerate(news):
21 |             if new:
22 |                 self.stackedobs[i] = 0
23 |         self.stackedobs[..., -obs.shape[-1]:] = obs
24 |         return self.stackedobs, rews, news, infos
25 | 
26 |     def reset(self):
27 |         obs = self.venv.reset()
28 |         self.stackedobs[...] = 0
29 |         self.stackedobs[..., -obs.shape[-1]:] = obs
30 |         return self.stackedobs
31 | 


--------------------------------------------------------------------------------
/miniworld/baselines/common/vec_env/vec_monitor.py:
--------------------------------------------------------------------------------
 1 | from . import VecEnvWrapper
 2 | from baselines.bench.monitor import ResultsWriter
 3 | import numpy as np
 4 | import time
 5 | 
 6 | 
 7 | class VecMonitor(VecEnvWrapper):
 8 |     def __init__(self, venv, filename=None):
 9 |         VecEnvWrapper.__init__(self, venv)
10 |         self.eprets = None
11 |         self.eplens = None
12 |         self.tstart = time.time()
13 |         self.results_writer = ResultsWriter(filename, header={'t_start': self.tstart})
14 | 
15 |     def reset(self):
16 |         obs = self.venv.reset()
17 |         self.eprets = np.zeros(self.num_envs, 'f')
18 |         self.eplens = np.zeros(self.num_envs, 'i')
19 |         return obs
20 | 
21 |     def step_wait(self):
22 |         obs, rews, dones, infos = self.venv.step_wait()
23 |         self.eprets += rews
24 |         self.eplens += 1
25 |         newinfos = []
26 |         for (i, (done, ret, eplen, info)) in enumerate(zip(dones, self.eprets, self.eplens, infos)):
27 |             info = info.copy()
28 |             if done:
29 |                 epinfo = {'r': ret, 'l': eplen, 't': round(time.time() - self.tstart, 6)}
30 |                 info['episode'] = epinfo
31 |                 self.eprets[i] = 0
32 |                 self.eplens[i] = 0
33 |                 self.results_writer.write_row(epinfo)
34 | 
35 |             newinfos.append(info)
36 | 
37 |         return obs, rews, dones, newinfos
38 | 


--------------------------------------------------------------------------------
/miniworld/baselines/common/vec_env/vec_normalize.py:
--------------------------------------------------------------------------------
 1 | from . import VecEnvWrapper
 2 | from baselines.common.running_mean_std import RunningMeanStd
 3 | import numpy as np
 4 | 
 5 | 
 6 | class VecNormalize(VecEnvWrapper):
 7 |     """
 8 |     A vectorized wrapper that normalizes the observations
 9 |     and returns from an environment.
10 |     """
11 | 
12 |     def __init__(self, venv, ob=True, ret=True, clipob=10., cliprew=10., gamma=0.99, epsilon=1e-8):
13 |         VecEnvWrapper.__init__(self, venv)
14 |         self.ob_rms = RunningMeanStd(shape=self.observation_space.shape) if ob else None
15 |         self.ret_rms = RunningMeanStd(shape=()) if ret else None
16 |         self.clipob = clipob
17 |         self.cliprew = cliprew
18 |         self.ret = np.zeros(self.num_envs)
19 |         self.gamma = gamma
20 |         self.epsilon = epsilon
21 | 
22 |     def step_wait(self):
23 |         obs, rews, news, infos = self.venv.step_wait()
24 |         self.ret = self.ret * self.gamma + rews
25 |         obs = self._obfilt(obs)
26 |         if self.ret_rms:
27 |             self.ret_rms.update(self.ret)
28 |             rews = np.clip(rews / np.sqrt(self.ret_rms.var + self.epsilon), -self.cliprew, self.cliprew)
29 |         self.ret[news] = 0.
30 |         return obs, rews, news, infos
31 | 
32 |     def _obfilt(self, obs):
33 |         if self.ob_rms:
34 |             self.ob_rms.update(obs)
35 |             obs = np.clip((obs - self.ob_rms.mean) / np.sqrt(self.ob_rms.var + self.epsilon), -self.clipob, self.clipob)
36 |             return obs
37 |         else:
38 |             return obs
39 | 
40 |     def reset(self):
41 |         self.ret = np.zeros(self.num_envs)
42 |         obs = self.venv.reset()
43 |         return self._obfilt(obs)
44 | 


--------------------------------------------------------------------------------
/miniworld/baselines/common/vec_env/vec_video_recorder.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from baselines import logger
 3 | from baselines.common.vec_env import VecEnvWrapper
 4 | from gym.wrappers.monitoring import video_recorder
 5 | 
 6 | 
 7 | class VecVideoRecorder(VecEnvWrapper):
 8 |     """
 9 |     Wrap VecEnv to record rendered image as mp4 video.
10 |     """
11 | 
12 |     def __init__(self, venv, directory, record_video_trigger, video_length=200):
13 |         """
14 |         # Arguments
15 |             venv: VecEnv to wrap
16 |             directory: Where to save videos
17 |             record_video_trigger:
18 |                 Function that defines when to start recording.
19 |                 The function takes the current number of step,
20 |                 and returns whether we should start recording or not.
21 |             video_length: Length of recorded video
22 |         """
23 | 
24 |         VecEnvWrapper.__init__(self, venv)
25 |         self.record_video_trigger = record_video_trigger
26 |         self.video_recorder = None
27 | 
28 |         self.directory = os.path.abspath(directory)
29 |         if not os.path.exists(self.directory): os.mkdir(self.directory)
30 | 
31 |         self.file_prefix = "vecenv"
32 |         self.file_infix = '{}'.format(os.getpid())
33 |         self.step_id = 0
34 |         self.video_length = video_length
35 | 
36 |         self.recording = False
37 |         self.recorded_frames = 0
38 | 
39 |     def reset(self):
40 |         obs = self.venv.reset()
41 | 
42 |         self.start_video_recorder()
43 | 
44 |         return obs
45 | 
46 |     def start_video_recorder(self):
47 |         self.close_video_recorder()
48 | 
49 |         base_path = os.path.join(self.directory, '{}.video.{}.video{:06}'.format(self.file_prefix, self.file_infix, self.step_id))
50 |         self.video_recorder = video_recorder.VideoRecorder(
51 |                 env=self.venv,
52 |                 base_path=base_path,
53 |                 metadata={'step_id': self.step_id}
54 |                 )
55 | 
56 |         self.video_recorder.capture_frame()
57 |         self.recorded_frames = 1
58 |         self.recording = True
59 | 
60 |     def _video_enabled(self):
61 |         return self.record_video_trigger(self.step_id)
62 | 
63 |     def step_wait(self):
64 |         obs, rews, dones, infos = self.venv.step_wait()
65 | 
66 |         self.step_id += 1
67 |         if self.recording:
68 |             self.video_recorder.capture_frame()
69 |             self.recorded_frames += 1
70 |             if self.recorded_frames > self.video_length:
71 |                 logger.info("Saving video to ", self.video_recorder.path)
72 |                 self.close_video_recorder()
73 |         elif self._video_enabled():
74 |                 self.start_video_recorder()
75 | 
76 |         return obs, rews, dones, infos
77 | 
78 |     def close_video_recorder(self):
79 |         if self.recording:
80 |             self.video_recorder.close()
81 |         self.recording = False
82 |         self.recorded_frames = 0
83 | 
84 |     def close(self):
85 |         VecEnvWrapper.close(self)
86 |         self.close_video_recorder()
87 | 
88 |     def __del__(self):
89 |         self.close()
90 | 


--------------------------------------------------------------------------------
/miniworld/baselines/ppoc_int/README.md:
--------------------------------------------------------------------------------
 1 | # PPOSGD
 2 | 
 3 | - Original paper: https://arxiv.org/abs/1707.06347
 4 | - Baselines blog post: https://blog.openai.com/openai-baselines-ppo/
 5 | - `mpirun -np 8 python -m baselines.ppo1.run_atari` runs the algorithm for 40M frames = 10M timesteps on an Atari game. See help (`-h`) for more options.
 6 | - `python -m baselines.ppo1.run_mujoco` runs the algorithm for 1M frames on a Mujoco environment.
 7 | 
 8 | - Train mujoco 3d humanoid (with optimal-ish hyperparameters): `mpirun -np 16 python -m baselines.ppo1.run_humanoid --model-path=/path/to/model`
 9 | - Render the 3d humanoid: `python -m baselines.ppo1.run_humanoid --play --model-path=/path/to/model`
10 | 


--------------------------------------------------------------------------------
/miniworld/baselines/ppoc_int/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kkhetarpal/ioc/fb88d5b881e0b5c317020b23874495e614b3ddc7/miniworld/baselines/ppoc_int/__init__.py


--------------------------------------------------------------------------------
/miniworld/baselines/ppoc_int/muj.py:
--------------------------------------------------------------------------------
 1 | 
 2 | # from rllab.envs.box2d.cartpole_swingup_env import CartpoleSwingupEnv
 3 | # from rllab.envs.mujoco.maze.point_maze_env import PointMazeEnv
 4 | # from rllab.envs.mujoco.maze.ant_maze_env import AntMazeEnv
 5 | 
 6 | # from rllab.envs.mujoco.hill.half_cheetah_hill_env import HalfCheetahHillEnv
 7 | # from rllab.envs.mujoco.hill.swimmer3d_hill_env import Swimmer3DHillEnv
 8 | import pdb
 9 | import time
10 | import gym
11 | import numpy as np
12 | # import my_gym;
13 | #from rllab.envs.mujoco.gather.swimmer_gather_env import SwimmerGatherEnv
14 | # from rllab.envs.mujoco.gather.ant_gather_env import AntGatherEnv
15 | # from rllab.envs.mujoco.gather.point_gather_env import PointGatherEnv
16 | # from rllab.envs.box2d.mountain_car_env import MountainCarEnv
17 | #from twod_tmaze2 import TMaze2
18 | #from antwalls import AntWallsEnv
19 | 
20 | import time
21 | import gym_miniworld
22 | from gym_miniworld.entity import Box as miniBox
23 | from gym_miniworld.envs.oneroom import OneRoom
24 | 
25 | 
26 | #from antmaze import AntMazeEnv
27 | 
28 | # from wheeled import WheeledEnv
29 | # from wheeled_maze import WheeledMazeEnv
30 | # from blockplaypen import BlockPlayPen
31 | # from twod_multi import TwoDMultiEnv
32 | # env = BlockPlayPen()
33 | # env = TwoDMaze()
34 | # env = TwoDMultiEnv()
35 | 
36 | 
37 | #env = SwimmerGatherEnv()
38 | #env = AntMazeEnv()
39 | 
40 | #env = gym.make('MiniWorld-Hallway-v0')
41 | #env = gym.make('MiniWorld-OneRoom-v0')
42 | #env = gym.make('MiniWorld-PutNext-v0')
43 | env = gym.make('MiniWorld-PickupObjs-v0')
44 | 
45 | 
46 | #env=AntWallsEnv()
47 | #env= TMaze2()
48 | # env= gym.make("Reacher-v1")
49 | # env.seed(0)
50 | # pdb.set_trace()
51 | # env= PointMazeEnv()
52 | # env = gym.make("Acrobot-v1")
53 | #env.reset()
54 | # env.render()
55 | # state,reward, done, _ = env.step(np.array([0.,10.]))
56 | # env.render()
57 | # state,reward, done, _ = env.step(np.array([0.,10.]))
58 | # env.render()
59 | # state,reward, done, _ = env.step(np.array([0.,10.]))
60 | 
61 | episodes = 0
62 | 
63 | for step in range(500):
64 | 	env.render()
65 | 	time.sleep(1)
66 | 	# pdb.set_trace()
67 | 	# print(t)
68 | 	# if True:
69 | 	# 	continue
70 | 	# 	print("aaa")
71 | 	# state,reward, done, _ = env.step(np.array([0.,0.]))
72 | 	# pdb.set_trace()
73 | 	state,reward, done, _ = env.step(env.action_space.sample())
74 | 	#print(env.box.pos)
75 | 
76 | 	done = True
77 | 	if done:
78 | 		#pdb.set_trace()
79 | 		env.reset()
80 | 		episodes += 1
81 | 
82 | 		# if episodes == 10:
83 | 		# 	env = OneRoom(change_goal=True)
84 | 		#
85 | 		# time.sleep(0.1)


--------------------------------------------------------------------------------
/miniworld/baselines/ppoc_int/oneroom.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import math
 3 | from ..miniworld import MiniWorldEnv, Room
 4 | from ..entity import Box
 5 | 
 6 | class OneRoom(MiniWorldEnv):
 7 |     """
 8 |     Environment in which the goal is to go to a red box
 9 |     placed randomly in one big room.
10 |     """
11 | 
12 |     def __init__(self, size=10,change_goal=None, **kwargs):
13 |         assert size >= 2
14 |         self.size = size
15 |         self.change_goal = change_goal
16 | 
17 |         super().__init__(
18 |             max_episode_steps=180,
19 |             **kwargs
20 |         )
21 | 
22 |     def _gen_world(self):
23 |         room = self.add_rect_room(
24 |             min_x=0,
25 |             max_x=self.size,
26 |             min_z=0,
27 |             max_z=self.size
28 |         )
29 | 
30 |         if not self.change_goal:
31 |             self.box = self.place_entity(Box(color='red'))
32 |         else:
33 |             self.box = self.place_entity(Box(color='blue'))
34 |         self.place_agent()
35 | 
36 |     def step(self, action):
37 |         obs, reward, done, info = super().step(action)
38 | 
39 |         if self.near(self.box):
40 |             reward += self._reward()
41 |             done = True
42 | 
43 |         return obs, reward, done, info
44 | 


--------------------------------------------------------------------------------
/miniworld/baselines/ppoc_int/run_miniw.py:
--------------------------------------------------------------------------------
 1 | # !/usr/bin/env python
 2 | from baselines.common import set_global_seeds, tf_util as U
 3 | from mpi4py import MPI
 4 | from baselines import bench
 5 | import os.path as osp
 6 | import gym, logging
 7 | import gym_miniworld
 8 | from baselines import logger
 9 | 
10 | 
11 | 
12 | def train(env_id, num_timesteps, seed, num_options, app, saves, wsaves, epoch, dc, plots, w_intfc, switch, mainlr, intlr, piolr, fewshot):
13 |     from baselines.ppoc_int import cnn_policy, pposgd_simple
14 |     rank = MPI.COMM_WORLD.Get_rank()
15 |     sess = U.single_threaded_session()
16 |     sess.__enter__()
17 |     if rank == 0:
18 |         logger.configure()
19 |     else:
20 |         logger.configure(format_strs=[])
21 |     workerseed = seed + 10000 * MPI.COMM_WORLD.Get_rank() if seed is not None else None
22 |     set_global_seeds(workerseed)
23 | 
24 |     env = gym.make(env_id)
25 |     env.seed(workerseed)
26 | 
27 | 
28 |     def policy_fn(name, ob_space, ac_space):
29 |         return cnn_policy.CnnPolicy(name=name, ob_space=ob_space, ac_space=ac_space, num_options=num_options, dc=dc, w_intfc=w_intfc)
30 | 
31 |     env = bench.Monitor(env, logger.get_dir() and
32 |                         osp.join(logger.get_dir(), str(rank)))
33 | 
34 |     optimsize = int(64 / num_options)
35 | 
36 | 
37 |     num_timesteps = num_timesteps
38 |     tperbatch = 2048 if not epoch else int(1e4)
39 |     pposgd_simple.learn(env, policy_fn,
40 |                         max_timesteps=num_timesteps,
41 |                         timesteps_per_batch=tperbatch,
42 |                         clip_param=0.2, entcoeff=0.01,
43 |                         optim_epochs=4, optim_stepsize=mainlr, optim_batchsize=optimsize,
44 |                         gamma=0.99, lam=0.95, schedule='linear', num_options=num_options,
45 |                         app=app, saves=saves, wsaves=wsaves, epoch=epoch, seed=seed, dc=dc, plots=plots,
46 |                         w_intfc=w_intfc, switch=switch, intlr=intlr, piolr=piolr, fewshot=fewshot
47 |                         )
48 |     env.close()
49 | 
50 | 
51 | def main():
52 |     import argparse
53 |     parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
54 |     parser.add_argument('--env', help='environment ID', default='MiniWorld-OneRoom-v0')
55 |     parser.add_argument('--timesteps', help='number of timesteps', type=int, default=1000000)
56 |     parser.add_argument('--seed', help='RNG seed', type=int, default=1)
57 |     parser.add_argument('--opt', help='number of options', type=int, default=2)
58 |     parser.add_argument('--app', help='Append to folder name', type=str, default='')
59 |     parser.add_argument('--saves', dest='saves', action='store_true', default=False)
60 |     parser.add_argument('--wsaves', dest='wsaves', action='store_true', default=False)
61 |     parser.add_argument('--plots', dest='plots', action='store_true', default=False)
62 |     parser.add_argument('--switch', dest='switch', help='switch task after 150 iterations', action='store_true', default=False)
63 |     parser.add_argument('--fewshot', dest='fewshot', help='value learning after 150 iterations', action='store_true', default=False)
64 |     parser.add_argument('--nointfc', dest='w_intfc', help='disables interet functions', action='store_false', default=True)
65 |     parser.add_argument('--epoch', help='Epoch', type=int, default=0)
66 |     parser.add_argument('--dc', type=float, default=0.)
67 |     parser.add_argument('--mainlr', type=float, default=3e-4)
68 |     parser.add_argument('--intlr', type=float, default=1e-4)
69 |     parser.add_argument('--piolr', type=float, default=1e-4)
70 | 
71 | 
72 |     args = parser.parse_args()
73 | 
74 |     train(args.env, num_timesteps=args.timesteps, seed=args.seed, num_options=args.opt, app=args.app, saves=args.saves,
75 |           wsaves=args.wsaves, epoch=args.epoch, dc=args.dc, plots=args.plots, w_intfc=args.w_intfc, switch=args.switch,
76 |           mainlr=args.mainlr, intlr=args.intlr, piolr=args.piolr, fewshot=args.fewshot)
77 | 
78 | 
79 | if __name__ == '__main__':
80 |     main()


--------------------------------------------------------------------------------
/miniworld/baselines/ppoc_int/run_mujoco.py:
--------------------------------------------------------------------------------
 1 | # !/usr/bin/env python
 2 | from baselines.common import set_global_seeds, tf_util as U
 3 | from baselines import bench
 4 | import os.path as osp
 5 | import gym, logging
 6 | # import gym_miniworld
 7 | import pdb
 8 | from baselines import logger
 9 | import sys
10 | # from gym_miniworld.wrappers import GreyscaleWrapper
11 | 
12 | def train(env_id, num_timesteps, seed, num_options,app, saves ,wsaves, epoch,dc,plots,w_intfc,switch,mainlr,intlr,fewshot):
13 |     from baselines.ppoc_int import mlp_policy, pposgd_simple
14 |     U.make_session(num_cpu=1).__enter__()
15 |     set_global_seeds(seed)
16 |     
17 |     if env_id=="TMaze":
18 |         from twod_tmaze import TMaze
19 |         env=TMaze()
20 |         env.seed(seed) 
21 |     elif env_id=="TMaze2":
22 |         from twod_tmaze2 import TMaze2
23 |         env=TMaze2()
24 |         env.seed(seed)         
25 |     elif env_id=="AntWalls":      
26 |         from antwalls import AntWallsEnv
27 |         env=AntWallsEnv()
28 |         env.seed(seed)
29 |     elif env_id=="AntMaze":
30 |         from ant_maze_env import AntMazeEnv
31 |         mazeid = 'Maze'
32 |         env = AntMazeEnv(mazeid)
33 |         env.seed(seed)
34 |     else:
35 |         env = gym.make(env_id)
36 |         env._seed(seed)
37 | 
38 | 
39 |     def policy_fn(name, ob_space, ac_space):
40 |         return mlp_policy.MlpPolicy(name=name, ob_space=ob_space, ac_space=ac_space,
41 |             hid_size=64, num_hid_layers=2, num_options=num_options, dc=dc, w_intfc=w_intfc)
42 | 
43 |     gym.logger.setLevel(logging.WARN)
44 | 
45 |     optimsize=int(64/num_options)
46 | 
47 |     # pdb.set_trace()
48 |     num_timesteps = num_timesteps if env_id!="TMaze" else 5e5
49 |     tperbatch = 2048 if not epoch else int(1e4)
50 |     pposgd_simple.learn(env, policy_fn, 
51 |             max_timesteps=num_timesteps,
52 |             timesteps_per_batch=tperbatch,
53 |             clip_param=0.2, entcoeff=0.0,
54 |             optim_epochs=10, optim_stepsize=mainlr, optim_batchsize=optimsize,
55 |             gamma=0.99, lam=0.95, schedule='constant', num_options=num_options,
56 |             app=app, saves=saves, wsaves=wsaves, epoch=epoch, seed=seed,dc=dc,plots=plots,
57 |             w_intfc=w_intfc,switch=switch,intlr=intlr,fewshot=fewshot
58 |         )
59 |     env.close()
60 | 
61 | def main():
62 |     import argparse
63 |     parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
64 |     parser.add_argument('--env', help='environment ID', default='TMaze')
65 |     parser.add_argument('--timesteps', help='number of timesteps', type=int, default=1000000)
66 |     parser.add_argument('--seed', help='RNG seed', type=int, default=1)
67 |     parser.add_argument('--opt', help='number of options', type=int, default=2) 
68 |     parser.add_argument('--app', help='Append to folder name', type=str, default='')        
69 |     parser.add_argument('--saves', dest='saves', action='store_true', default=False)
70 |     parser.add_argument('--wsaves', dest='wsaves', action='store_true', default=False)    
71 |     parser.add_argument('--plots', dest='plots', action='store_true', default=False)    
72 |     parser.add_argument('--switch', dest='switch', action='store_true', default=False)    
73 |     parser.add_argument('--fewshot', dest='fewshot', action='store_true', default=False)
74 |     parser.add_argument('--nointfc', dest='w_intfc', action='store_false', default=True)
75 |     parser.add_argument('--epoch', help='Epoch', type=int, default=0) 
76 |     parser.add_argument('--dc', type=float, default=0.)
77 |     parser.add_argument('--mainlr', type=float, default=3e-4)
78 |     parser.add_argument('--intlr', type=float, default=1e-4)
79 | 
80 |     # pdb.set_trace()
81 |     args = parser.parse_args()
82 | 
83 |     train(args.env, num_timesteps=args.timesteps, seed=args.seed, num_options=args.opt, app=args.app,
84 |      saves=args.saves, wsaves=args.wsaves, epoch=args.epoch,dc=args.dc,plots=args.plots,
85 |      w_intfc=args.w_intfc,switch=args.switch,mainlr=args.mainlr,intlr=args.intlr,fewshot=args.fewshot)
86 | 
87 | 
88 | if __name__ == '__main__':
89 |     main()


--------------------------------------------------------------------------------
/miniworld/baselines/results_plotter.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import matplotlib
 3 | matplotlib.use('TkAgg') # Can change to 'Agg' for non-interactive mode
 4 | 
 5 | import matplotlib.pyplot as plt
 6 | plt.rcParams['svg.fonttype'] = 'none'
 7 | 
 8 | from baselines.common import plot_util
 9 | 
10 | X_TIMESTEPS = 'timesteps'
11 | X_EPISODES = 'episodes'
12 | X_WALLTIME = 'walltime_hrs'
13 | Y_REWARD = 'reward'
14 | Y_TIMESTEPS = 'timesteps'
15 | POSSIBLE_X_AXES = [X_TIMESTEPS, X_EPISODES, X_WALLTIME]
16 | EPISODES_WINDOW = 100
17 | COLORS = ['blue', 'green', 'red', 'cyan', 'magenta', 'yellow', 'black', 'purple', 'pink',
18 |         'brown', 'orange', 'teal', 'coral', 'lightblue', 'lime', 'lavender', 'turquoise',
19 |         'darkgreen', 'tan', 'salmon', 'gold', 'darkred', 'darkblue']
20 | 
21 | def rolling_window(a, window):
22 |     shape = a.shape[:-1] + (a.shape[-1] - window + 1, window)
23 |     strides = a.strides + (a.strides[-1],)
24 |     return np.lib.stride_tricks.as_strided(a, shape=shape, strides=strides)
25 | 
26 | def window_func(x, y, window, func):
27 |     yw = rolling_window(y, window)
28 |     yw_func = func(yw, axis=-1)
29 |     return x[window-1:], yw_func
30 | 
31 | def ts2xy(ts, xaxis, yaxis):
32 |     if xaxis == X_TIMESTEPS:
33 |         x = np.cumsum(ts.l.values)
34 |     elif xaxis == X_EPISODES:
35 |         x = np.arange(len(ts))
36 |     elif xaxis == X_WALLTIME:
37 |         x = ts.t.values / 3600.
38 |     else:
39 |         raise NotImplementedError
40 |     if yaxis == Y_REWARD:
41 |         y = ts.r.values
42 |     elif yaxis == Y_TIMESTEPS:
43 |         y = ts.l.values
44 |     else:
45 |         raise NotImplementedError
46 |     return x, y
47 | 
48 | def plot_curves(xy_list, xaxis, yaxis, title):
49 |     fig = plt.figure(figsize=(8,2))
50 |     maxx = max(xy[0][-1] for xy in xy_list)
51 |     minx = 0
52 |     for (i, (x, y)) in enumerate(xy_list):
53 |         color = COLORS[i % len(COLORS)]
54 |         plt.scatter(x, y, s=2)
55 |         x, y_mean = window_func(x, y, EPISODES_WINDOW, np.mean) #So returns average of last EPISODE_WINDOW episodes
56 |         plt.plot(x, y_mean, color=color)
57 |     plt.xlim(minx, maxx)
58 |     plt.title(title)
59 |     plt.xlabel(xaxis)
60 |     plt.ylabel(yaxis)
61 |     plt.tight_layout()
62 |     fig.canvas.mpl_connect('resize_event', lambda event: plt.tight_layout())
63 |     plt.grid(True)
64 | 
65 | 
66 | def split_by_task(taskpath):
67 |     return taskpath['dirname'].split('/')[-1].split('-')[0]
68 | 
69 | def plot_results(dirs, num_timesteps=10e6, xaxis=X_TIMESTEPS, yaxis=Y_REWARD, title='', split_fn=split_by_task):
70 |     results = plot_util.load_results(dirs)
71 |     plot_util.plot_results(results, xy_fn=lambda r: ts2xy(r['monitor'], xaxis, yaxis), split_fn=split_fn, average_group=True, resample=int(1e6))
72 | 
73 | # Example usage in jupyter-notebook
74 | # from baselines.results_plotter import plot_results
75 | # %matplotlib inline
76 | # plot_results("./log")
77 | # Here ./log is a directory containing the monitor.csv files
78 | 
79 | def main():
80 |     import argparse
81 |     import os
82 |     parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
83 |     parser.add_argument('--dirs', help='List of log directories', nargs = '*', default=['./log'])
84 |     parser.add_argument('--num_timesteps', type=int, default=int(10e6))
85 |     parser.add_argument('--xaxis', help = 'Varible on X-axis', default = X_TIMESTEPS)
86 |     parser.add_argument('--yaxis', help = 'Varible on Y-axis', default = Y_REWARD)
87 |     parser.add_argument('--task_name', help = 'Title of plot', default = 'Breakout')
88 |     args = parser.parse_args()
89 |     args.dirs = [os.path.abspath(dir) for dir in args.dirs]
90 |     plot_results(args.dirs, args.num_timesteps, args.xaxis, args.yaxis, args.task_name)
91 |     plt.show()
92 | 
93 | if __name__ == '__main__':
94 |     main()
95 | 


--------------------------------------------------------------------------------
/miniworld/data/cartpole.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kkhetarpal/ioc/fb88d5b881e0b5c317020b23874495e614b3ddc7/miniworld/data/cartpole.gif


--------------------------------------------------------------------------------
/miniworld/data/fetchPickAndPlaceContrast.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kkhetarpal/ioc/fb88d5b881e0b5c317020b23874495e614b3ddc7/miniworld/data/fetchPickAndPlaceContrast.png


--------------------------------------------------------------------------------
/miniworld/data/logo.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kkhetarpal/ioc/fb88d5b881e0b5c317020b23874495e614b3ddc7/miniworld/data/logo.jpg


--------------------------------------------------------------------------------
/miniworld/setup.cfg:
--------------------------------------------------------------------------------
1 | [flake8]
2 | select = F,E999,W291,W293
3 | exclude = 
4 |     .git,
5 |     __pycache__,
6 |     baselines/ppo1,
7 |     baselines/bench,
8 | 


--------------------------------------------------------------------------------
/miniworld/setup.py:
--------------------------------------------------------------------------------
 1 | import re
 2 | from setuptools import setup, find_packages
 3 | import sys
 4 | 
 5 | if sys.version_info.major != 3:
 6 |     print('This Python is only compatible with Python 3, but you are running '
 7 |           'Python {}. The installation will likely fail.'.format(sys.version_info.major))
 8 | 
 9 | 
10 | extras = {
11 |     'test': [
12 |         'filelock',
13 |         'pytest',
14 |         'pytest-forked',
15 |         'atari-py'
16 |     ],
17 |     'bullet': [
18 |         'pybullet',
19 |     ],
20 |     'mpi': [
21 |         'mpi4py'
22 |     ]
23 | }
24 | 
25 | all_deps = []
26 | for group_name in extras:
27 |     all_deps += extras[group_name]
28 | 
29 | extras['all'] = all_deps
30 | 
31 | setup(name='baselines',
32 |       packages=[package for package in find_packages()
33 |                 if package.startswith('baselines')],
34 |       install_requires=[
35 |           'gym',
36 |           'scipy',
37 |           'tqdm',
38 |           'joblib',
39 |           'dill',
40 |           'progressbar2',
41 |           'cloudpickle',
42 |           'click',
43 |           'opencv-python'
44 |       ],
45 |       extras_require=extras,
46 |       description='OpenAI baselines: high quality implementations of reinforcement learning algorithms',
47 |       author='OpenAI',
48 |       url='https://github.com/openai/baselines',
49 |       author_email='gym@openai.com',
50 |       version='0.1.5')
51 | 
52 | 
53 | # ensure there is some tensorflow build with version above 1.4
54 | import pkg_resources
55 | tf_pkg = None
56 | for tf_pkg_name in ['tensorflow', 'tensorflow-gpu', 'tf-nightly', 'tf-nightly-gpu']:
57 |     try:
58 |         tf_pkg = pkg_resources.get_distribution(tf_pkg_name)
59 |     except pkg_resources.DistributionNotFound:
60 |         pass
61 | assert tf_pkg is not None, 'TensorFlow needed, of version above 1.4'
62 | from distutils.version import LooseVersion
63 | assert LooseVersion(re.sub(r'-?rc\d+$', '', tf_pkg.version)) >= LooseVersion('1.4.0')
64 | 


--------------------------------------------------------------------------------
/tabular/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kkhetarpal/ioc/fb88d5b881e0b5c317020b23874495e614b3ddc7/tabular/.DS_Store


--------------------------------------------------------------------------------
/tabular/FR_Return_10Runs_Smooth_MisspecifiedPiO.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kkhetarpal/ioc/fb88d5b881e0b5c317020b23874495e614b3ddc7/tabular/FR_Return_10Runs_Smooth_MisspecifiedPiO.pdf


--------------------------------------------------------------------------------
/tabular/FR_Steps_10Runs_Smooth_MisspecifiedPiO.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kkhetarpal/ioc/fb88d5b881e0b5c317020b23874495e614b3ddc7/tabular/FR_Steps_10Runs_Smooth_MisspecifiedPiO.pdf


--------------------------------------------------------------------------------
/tabular/GoalG62.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kkhetarpal/ioc/fb88d5b881e0b5c317020b23874495e614b3ddc7/tabular/GoalG62.png


--------------------------------------------------------------------------------
/tabular/InterestOptionCritic/Runs10_Epsds500_Eps0.01_NOpt4_LRT0.25_LRI0.25_LRC0.5_temp0.01_IF0.15_LReg0.0_seed7200/History.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kkhetarpal/ioc/fb88d5b881e0b5c317020b23874495e614b3ddc7/tabular/InterestOptionCritic/Runs10_Epsds500_Eps0.01_NOpt4_LRT0.25_LRI0.25_LRC0.5_temp0.01_IF0.15_LReg0.0_seed7200/History.npy


--------------------------------------------------------------------------------
/tabular/InterestOptionCritic/Runs10_Epsds500_Eps0.01_NOpt4_LRT0.25_LRI0.25_LRC0.5_temp0.01_IF0.15_LReg0.0_seed7200/IOC_Task1_IntraOptionPolicy_Opt_0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kkhetarpal/ioc/fb88d5b881e0b5c317020b23874495e614b3ddc7/tabular/InterestOptionCritic/Runs10_Epsds500_Eps0.01_NOpt4_LRT0.25_LRI0.25_LRC0.5_temp0.01_IF0.15_LReg0.0_seed7200/IOC_Task1_IntraOptionPolicy_Opt_0.png


--------------------------------------------------------------------------------
/tabular/InterestOptionCritic/Runs10_Epsds500_Eps0.01_NOpt4_LRT0.25_LRI0.25_LRC0.5_temp0.01_IF0.15_LReg0.0_seed7200/IOC_Task1_IntraOptionPolicy_Opt_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kkhetarpal/ioc/fb88d5b881e0b5c317020b23874495e614b3ddc7/tabular/InterestOptionCritic/Runs10_Epsds500_Eps0.01_NOpt4_LRT0.25_LRI0.25_LRC0.5_temp0.01_IF0.15_LReg0.0_seed7200/IOC_Task1_IntraOptionPolicy_Opt_1.png


--------------------------------------------------------------------------------
/tabular/InterestOptionCritic/Runs10_Epsds500_Eps0.01_NOpt4_LRT0.25_LRI0.25_LRC0.5_temp0.01_IF0.15_LReg0.0_seed7200/IOC_Task1_IntraOptionPolicy_Opt_2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kkhetarpal/ioc/fb88d5b881e0b5c317020b23874495e614b3ddc7/tabular/InterestOptionCritic/Runs10_Epsds500_Eps0.01_NOpt4_LRT0.25_LRI0.25_LRC0.5_temp0.01_IF0.15_LReg0.0_seed7200/IOC_Task1_IntraOptionPolicy_Opt_2.png


--------------------------------------------------------------------------------
/tabular/InterestOptionCritic/Runs10_Epsds500_Eps0.01_NOpt4_LRT0.25_LRI0.25_LRC0.5_temp0.01_IF0.15_LReg0.0_seed7200/IOC_Task1_IntraOptionPolicy_Opt_3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kkhetarpal/ioc/fb88d5b881e0b5c317020b23874495e614b3ddc7/tabular/InterestOptionCritic/Runs10_Epsds500_Eps0.01_NOpt4_LRT0.25_LRI0.25_LRC0.5_temp0.01_IF0.15_LReg0.0_seed7200/IOC_Task1_IntraOptionPolicy_Opt_3.png


--------------------------------------------------------------------------------
/tabular/InterestOptionCritic/Runs10_Epsds500_Eps0.01_NOpt4_LRT0.25_LRI0.25_LRC0.5_temp0.01_IF0.15_LReg0.0_seed7200/Params.txt:
--------------------------------------------------------------------------------
 1 | baseline:True
 2 | discount:0.99
 3 | epsilon:0.01
 4 | lr_critic:0.5
 5 | lr_interestfn:0.15
 6 | lr_intra:0.25
 7 | lr_reg:0.0
 8 | lr_term:0.25
 9 | nepisodes:500
10 | noptions:4
11 | nruns:10
12 | nsteps:2000
13 | primitive:False
14 | regularize:False
15 | seed:7200
16 | seed_startstate:10
17 | temperature:0.01
18 | 


--------------------------------------------------------------------------------
/tabular/InterestOptionCritic/Runs10_Epsds500_Eps0.01_NOpt4_LRT0.25_LRI0.25_LRC0.5_temp0.01_IF0.15_LReg0.0_seed7200/StateFreq.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kkhetarpal/ioc/fb88d5b881e0b5c317020b23874495e614b3ddc7/tabular/InterestOptionCritic/Runs10_Epsds500_Eps0.01_NOpt4_LRT0.25_LRI0.25_LRC0.5_temp0.01_IF0.15_LReg0.0_seed7200/StateFreq.npy


--------------------------------------------------------------------------------
/tabular/InterestOptionCritic/Runs10_Epsds500_Eps0.01_NOpt4_LRT0.25_LRI0.25_LRC0.5_temp0.01_IF0.15_LReg0.0_seed7200/Weights_ActionValueFunction.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kkhetarpal/ioc/fb88d5b881e0b5c317020b23874495e614b3ddc7/tabular/InterestOptionCritic/Runs10_Epsds500_Eps0.01_NOpt4_LRT0.25_LRI0.25_LRC0.5_temp0.01_IF0.15_LReg0.0_seed7200/Weights_ActionValueFunction.npy


--------------------------------------------------------------------------------
/tabular/InterestOptionCritic/Runs10_Epsds500_Eps0.01_NOpt4_LRT0.25_LRI0.25_LRC0.5_temp0.01_IF0.15_LReg0.0_seed7200/Weights_InterestFunction.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kkhetarpal/ioc/fb88d5b881e0b5c317020b23874495e614b3ddc7/tabular/InterestOptionCritic/Runs10_Epsds500_Eps0.01_NOpt4_LRT0.25_LRI0.25_LRC0.5_temp0.01_IF0.15_LReg0.0_seed7200/Weights_InterestFunction.npy


--------------------------------------------------------------------------------
/tabular/InterestOptionCritic/Runs10_Epsds500_Eps0.01_NOpt4_LRT0.25_LRI0.25_LRC0.5_temp0.01_IF0.15_LReg0.0_seed7200/Weights_IntraOption.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kkhetarpal/ioc/fb88d5b881e0b5c317020b23874495e614b3ddc7/tabular/InterestOptionCritic/Runs10_Epsds500_Eps0.01_NOpt4_LRT0.25_LRI0.25_LRC0.5_temp0.01_IF0.15_LReg0.0_seed7200/Weights_IntraOption.npy


--------------------------------------------------------------------------------
/tabular/InterestOptionCritic/Runs10_Epsds500_Eps0.01_NOpt4_LRT0.25_LRI0.25_LRC0.5_temp0.01_IF0.15_LReg0.0_seed7200/Weights_OptionValueFunction.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kkhetarpal/ioc/fb88d5b881e0b5c317020b23874495e614b3ddc7/tabular/InterestOptionCritic/Runs10_Epsds500_Eps0.01_NOpt4_LRT0.25_LRI0.25_LRC0.5_temp0.01_IF0.15_LReg0.0_seed7200/Weights_OptionValueFunction.npy


--------------------------------------------------------------------------------
/tabular/InterestOptionCritic/Runs10_Epsds500_Eps0.01_NOpt4_LRT0.25_LRI0.25_LRC0.5_temp0.01_IF0.15_LReg0.0_seed7200/Weights_Policy.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kkhetarpal/ioc/fb88d5b881e0b5c317020b23874495e614b3ddc7/tabular/InterestOptionCritic/Runs10_Epsds500_Eps0.01_NOpt4_LRT0.25_LRI0.25_LRC0.5_temp0.01_IF0.15_LReg0.0_seed7200/Weights_Policy.npy


--------------------------------------------------------------------------------
/tabular/InterestOptionCritic/Runs10_Epsds500_Eps0.01_NOpt4_LRT0.25_LRI0.25_LRC0.5_temp0.01_IF0.15_LReg0.0_seed7200/Weights_Termination.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kkhetarpal/ioc/fb88d5b881e0b5c317020b23874495e614b3ddc7/tabular/InterestOptionCritic/Runs10_Epsds500_Eps0.01_NOpt4_LRT0.25_LRI0.25_LRC0.5_temp0.01_IF0.15_LReg0.0_seed7200/Weights_Termination.npy


--------------------------------------------------------------------------------
/tabular/OptionCritic/Runs10_Epsds500_Eps0.01_NOpt4_LRT0.25_LRI0.25_LRC0.5_temp0.01_seed7200/History.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kkhetarpal/ioc/fb88d5b881e0b5c317020b23874495e614b3ddc7/tabular/OptionCritic/Runs10_Epsds500_Eps0.01_NOpt4_LRT0.25_LRI0.25_LRC0.5_temp0.01_seed7200/History.npy


--------------------------------------------------------------------------------
/tabular/OptionCritic/Runs10_Epsds500_Eps0.01_NOpt4_LRT0.25_LRI0.25_LRC0.5_temp0.01_seed7200/Params.txt:
--------------------------------------------------------------------------------
 1 | baseline:True
 2 | discount:0.99
 3 | epsilon:0.01
 4 | lr_critic:0.5
 5 | lr_intra:0.25
 6 | lr_term:0.25
 7 | nepisodes:500
 8 | noptions:4
 9 | nruns:10
10 | nsteps:2000
11 | primitive:False
12 | seed:7200
13 | seed_startstate:10
14 | temperature:0.01
15 | 


--------------------------------------------------------------------------------
/tabular/OptionCritic/Runs10_Epsds500_Eps0.01_NOpt4_LRT0.25_LRI0.25_LRC0.5_temp0.01_seed7200/StateFreq.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kkhetarpal/ioc/fb88d5b881e0b5c317020b23874495e614b3ddc7/tabular/OptionCritic/Runs10_Epsds500_Eps0.01_NOpt4_LRT0.25_LRI0.25_LRC0.5_temp0.01_seed7200/StateFreq.npy


--------------------------------------------------------------------------------
/tabular/OptionCritic/Runs10_Epsds500_Eps0.01_NOpt4_LRT0.25_LRI0.25_LRC0.5_temp0.01_seed7200/Weights_ActionValueFunction.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kkhetarpal/ioc/fb88d5b881e0b5c317020b23874495e614b3ddc7/tabular/OptionCritic/Runs10_Epsds500_Eps0.01_NOpt4_LRT0.25_LRI0.25_LRC0.5_temp0.01_seed7200/Weights_ActionValueFunction.npy


--------------------------------------------------------------------------------
/tabular/OptionCritic/Runs10_Epsds500_Eps0.01_NOpt4_LRT0.25_LRI0.25_LRC0.5_temp0.01_seed7200/Weights_IntraOption.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kkhetarpal/ioc/fb88d5b881e0b5c317020b23874495e614b3ddc7/tabular/OptionCritic/Runs10_Epsds500_Eps0.01_NOpt4_LRT0.25_LRI0.25_LRC0.5_temp0.01_seed7200/Weights_IntraOption.npy


--------------------------------------------------------------------------------
/tabular/OptionCritic/Runs10_Epsds500_Eps0.01_NOpt4_LRT0.25_LRI0.25_LRC0.5_temp0.01_seed7200/Weights_OptionValueFunction.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kkhetarpal/ioc/fb88d5b881e0b5c317020b23874495e614b3ddc7/tabular/OptionCritic/Runs10_Epsds500_Eps0.01_NOpt4_LRT0.25_LRI0.25_LRC0.5_temp0.01_seed7200/Weights_OptionValueFunction.npy


--------------------------------------------------------------------------------
/tabular/OptionCritic/Runs10_Epsds500_Eps0.01_NOpt4_LRT0.25_LRI0.25_LRC0.5_temp0.01_seed7200/Weights_Termination.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kkhetarpal/ioc/fb88d5b881e0b5c317020b23874495e614b3ddc7/tabular/OptionCritic/Runs10_Epsds500_Eps0.01_NOpt4_LRT0.25_LRI0.25_LRC0.5_temp0.01_seed7200/Weights_Termination.npy


--------------------------------------------------------------------------------
/tabular/TransferVisual.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kkhetarpal/ioc/fb88d5b881e0b5c317020b23874495e614b3ddc7/tabular/TransferVisual.png


--------------------------------------------------------------------------------
/tabular/__pycache__/fourrooms.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kkhetarpal/ioc/fb88d5b881e0b5c317020b23874495e614b3ddc7/tabular/__pycache__/fourrooms.cpython-36.pyc


--------------------------------------------------------------------------------
/tabular/fourrooms.py:
--------------------------------------------------------------------------------
  1 | #Environment File for Classic Fourrooms Grid World
  2 | import numpy as np
  3 | import gym
  4 | from gym import core, spaces
  5 | from gym.envs.registration import register
  6 | from random import uniform
  7 | 
  8 | #class Fourrooms(gym.Env):
  9 | class Fourrooms():
 10 |     def __init__(self,initstate_seed):
 11 |         layout = """\
 12 | wwwwwwwwwwwww
 13 | w     w     w
 14 | w     w     w
 15 | w           w
 16 | w     w     w
 17 | w     w     w
 18 | ww wwww     w
 19 | w     www www
 20 | w     w     w
 21 | w     w     w
 22 | w           w
 23 | w     w     w
 24 | wwwwwwwwwwwww
 25 | """
 26 | 
 27 | 
 28 |         self.occupancy = np.array([list(map(lambda c: 1 if c=='w' else 0, line)) for line in layout.splitlines()])
 29 | 
 30 |         # Action Space: from any state the agent can perform one of the four actions; Up, Down, Left and Right
 31 |         self.action_space = spaces.Discrete(4)
 32 | 
 33 |         # Observation Space
 34 |         self.observation_space = spaces.Discrete(np.sum(self.occupancy == 0))
 35 | 
 36 |         self.directions = [np.array((-1,0)), np.array((1,0)), np.array((0,-1)), np.array((0,1))]
 37 | 
 38 |         self.rng = np.random.RandomState(1234)
 39 | 
 40 |         self.initstate_seed = initstate_seed
 41 |         self.rng_init_state = np.random.RandomState(self.initstate_seed)
 42 | 
 43 |         self.tostate = {}
 44 | 
 45 |         self.occ_dict = dict(zip(range(self.observation_space.n),
 46 |                                  np.argwhere(self.occupancy.flatten() == 0).squeeze()))
 47 | 
 48 | 
 49 |         statenum = 0
 50 |         for i in range(13):
 51 |             for j in range(13):
 52 |                 if self.occupancy[i, j] == 0:
 53 |                     self.tostate[(i, j)] = statenum
 54 |                     statenum += 1
 55 | 
 56 |         self.tocell = {v:k for k,v in self.tostate.items()}
 57 | 
 58 |         self.goal = 62
 59 |         self.init_states = list(range(self.observation_space.n))
 60 |         self.init_states.remove(self.goal)
 61 | 
 62 | 
 63 |     def empty_around(self, cell):
 64 |         avail = []
 65 |         for action in range(self.action_space.n):
 66 |             nextcell = tuple(cell + self.directions[action])
 67 |             if not self.occupancy[nextcell]:
 68 |                 avail.append(nextcell)
 69 |         return avail
 70 | 
 71 |     # def reset(self):
 72 |     #     state = self.rng.choice(self.init_states)
 73 |     #     self.currentcell = self.tocell[state]
 74 |     #     return state
 75 | 
 76 | 
 77 |     def reset(self):
 78 |         state = self.rng_init_state.choice(self.init_states)
 79 |         self.currentcell = self.tocell[state]
 80 |         return state
 81 | 
 82 |     def step(self, action):
 83 |         """
 84 |         The agent can perform one of four actions,
 85 |         up, down, left or right, which have a stochastic effect. 
 86 |         We consider a case in which rewards are zero on all state transitions 
 87 |         except the goal state which has a reward of +50.
 88 |         """
 89 | 
 90 |         reward = 0
 91 |         if self.rng.uniform() < 1/3:
 92 |             empty_cells = self.empty_around(self.currentcell)
 93 |             nextcell = empty_cells[self.rng.randint(len(empty_cells))]
 94 |         else:
 95 |             nextcell = tuple(self.currentcell + self.directions[action])
 96 | 
 97 |         if not self.occupancy[nextcell]:
 98 |             self.currentcell = nextcell
 99 | 
100 |         state = self.tostate[self.currentcell]
101 | 
102 |         if state == self.goal:
103 |             reward = 50
104 | 
105 |         done = state == self.goal
106 |         return state, reward, float(done), None
107 | 
108 |     register(
109 |         id='Fourrooms-v0',
110 |         entry_point='fourrooms:Fourrooms',
111 |         timestep_limit=20000,
112 |         reward_threshold=1,
113 |     )
114 | 


--------------------------------------------------------------------------------