├── .dockerignore
├── .gitignore
├── .jenkins
    ├── Jenkinsfile
    └── run_jenkins.sh
├── Dockerfile
├── LICENSE
├── README.md
├── adept
    ├── __init__.py
    ├── actor
    │   ├── __init__.py
    │   ├── ac_eval.py
    │   ├── ac_rollout.py
    │   ├── base
    │   │   ├── __init__.py
    │   │   ├── ac_helper.py
    │   │   └── actor_module.py
    │   ├── impala.py
    │   └── ppo.py
    ├── agent
    │   ├── __init__.py
    │   ├── actor_critic.py
    │   ├── base
    │   │   ├── __init__.py
    │   │   └── agent_module.py
    │   └── ppo.py
    ├── app.py
    ├── container
    │   ├── __init__.py
    │   ├── actorlearner
    │   │   ├── __init__.py
    │   │   ├── learner_container.py
    │   │   ├── rollout_queuer.py
    │   │   └── rollout_worker.py
    │   ├── base
    │   │   ├── __init__.py
    │   │   ├── container.py
    │   │   ├── nccl_optimizer.py
    │   │   └── updater.py
    │   ├── distrib.py
    │   ├── evaluation.py
    │   ├── evaluation_thread.py
    │   ├── init.py
    │   ├── local.py
    │   └── render.py
    ├── env
    │   ├── __init__.py
    │   ├── _gym_wrappers.py
    │   ├── _spaces.py
    │   ├── base
    │   │   ├── __init__.py
    │   │   ├── _env.py
    │   │   └── env_module.py
    │   └── openai_gym.py
    ├── exp
    │   ├── __init__.py
    │   ├── base
    │   │   ├── __init__.py
    │   │   ├── exp_module.py
    │   │   └── spec_builder.py
    │   ├── replay.py
    │   └── rollout.py
    ├── globals.py
    ├── learner
    │   ├── __init__.py
    │   ├── ac_rollout.py
    │   ├── base
    │   │   ├── __init__.py
    │   │   ├── dm_return_scale.py
    │   │   └── learner_module.py
    │   └── impala.py
    ├── manager
    │   ├── __init__.py
    │   ├── base
    │   │   ├── __init__.py
    │   │   └── manager_module.py
    │   ├── simple_env_manager.py
    │   └── subproc_env_manager.py
    ├── modules
    │   ├── __init__.py
    │   ├── attention.py
    │   ├── memory.py
    │   ├── mlp.py
    │   ├── norm.py
    │   ├── sequence.py
    │   └── spatial.py
    ├── network
    │   ├── __init__.py
    │   ├── base
    │   │   ├── __init__.py
    │   │   ├── base.py
    │   │   ├── network_module.py
    │   │   └── submodule.py
    │   ├── modular_network.py
    │   ├── net1d
    │   │   ├── __init__.py
    │   │   ├── identity_1d.py
    │   │   ├── linear.py
    │   │   ├── lstm.py
    │   │   └── submodule_1d.py
    │   ├── net2d
    │   │   ├── __init__.py
    │   │   ├── identity_2d.py
    │   │   └── submodule_2d.py
    │   ├── net3d
    │   │   ├── __init__.py
    │   │   ├── _resnets.py
    │   │   ├── four_conv.py
    │   │   ├── identity_3d.py
    │   │   ├── networks.py
    │   │   ├── rmc.py
    │   │   └── submodule_3d.py
    │   └── net4d
    │   │   ├── __init__.py
    │   │   ├── identity_4d.py
    │   │   └── submodule_4d.py
    ├── preprocess
    │   ├── __init__.py
    │   ├── base
    │   │   ├── __init__.py
    │   │   ├── ops.py
    │   │   └── preprocessor.py
    │   └── ops.py
    ├── registry
    │   ├── __init__.py
    │   └── registry.py
    ├── rewardnorm
    │   ├── __init__.py
    │   ├── base
    │   │   ├── __init__.py
    │   │   └── rewnorm_module.py
    │   └── normalizers.py
    ├── scripts
    │   ├── __init__.py
    │   ├── _distrib.py
    │   ├── actorlearner.py
    │   ├── distrib.py
    │   ├── evaluate.py
    │   ├── local.py
    │   └── render.py
    └── utils
    │   ├── __init__.py
    │   ├── logging.py
    │   ├── requires_args.py
    │   ├── script_helpers.py
    │   └── util.py
├── docker
    ├── Dockerfile
    ├── Dockerfile.nogpu
    ├── README.md
    ├── connect.py
    └── startup.sh
├── docs
    ├── api_overview.md
    ├── modular_network.md
    ├── new_api.md
    └── resume_training.md
├── examples
    ├── custom_agent_stub.py
    ├── custom_environment_stub.py
    ├── custom_network_stub.py
    └── custom_submodule_stub.py
├── images
    ├── architecture.png
    ├── banner.png
    ├── benchmark.png
    └── modular_network.png
├── setup.py
└── tests
    ├── __init__.py
    ├── distrib
        ├── __init__.py
        ├── allreduce.py
        ├── container_sync.py
        ├── control_flow_zmq.py
        ├── exp_sync_broadcast.py
        ├── hello_ray.py
        ├── launch.py
        ├── multi_group.py
        ├── nccl_typecheck.py
        └── ray_container.py
    ├── exp
        └── test_rollout.py
    ├── learner
        ├── __init__.py
        └── nstep.py
    ├── network
        ├── __init__.py
        └── test_modular_network.py
    ├── registry
        ├── __init__.py
        └── test_registry.py
    └── utils
        ├── __init__.py
        ├── test_requires_args.py
        └── test_util.py


/.dockerignore:
--------------------------------------------------------------------------------
1 | logs
2 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | *~
 2 | .DS_Store
 3 | *.xml
 4 | .idea/
 5 | .vscode/
 6 | __pycache__/
 7 | logs/
 8 | trained_models/
 9 | *egg-info
10 | dist/
11 | archive/
12 | build/
13 | .pytest_cache/
14 | test_reports/


--------------------------------------------------------------------------------
/.jenkins/Jenkinsfile:
--------------------------------------------------------------------------------
 1 | pipeline {
 2 |     agent {
 3 |         dockerfile {
 4 |             filename 'Dockerfile'
 5 |             args("""--runtime nvidia \
 6 |             -v /tmp/adept_logs:/tmp/adept_logs \
 7 |             --net host \
 8 |             --cap-add SYS_PTRACE""")
 9 |         }
10 |     }
11 |     triggers { pollSCM('H/15 * * * *') }
12 | 
13 |     stages {
14 |         stage('Build') {
15 |             steps {
16 |                 echo 'Checking build...'
17 |                 sh 'nvidia-smi'
18 |                 sh 'python -m adept.scripts.local -h'
19 |             }
20 |         }
21 |         stage('Test') {
22 |             steps {
23 |                 echo 'Running unit tests...'
24 |                 sh 'pytest --verbose'
25 |             }
26 |         }
27 |     }
28 | }
29 | 


--------------------------------------------------------------------------------
/.jenkins/run_jenkins.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | docker run \
 4 |   --rm \
 5 |   -u root \
 6 |   -p 8080:8080 \
 7 |   -v /var/jenkins_home:/var/jenkins_home \
 8 |   -v /var/run/docker.sock:/var/run/docker.sock \
 9 |   -v "$HOME":/home \
10 |   jenkinsci/blueocean
11 | 


--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
1 | ./docker/Dockerfile


--------------------------------------------------------------------------------
/adept/__init__.py:
--------------------------------------------------------------------------------
 1 | def register_agent(agent_cls):
 2 |     from adept.registry import REGISTRY
 3 | 
 4 |     REGISTRY.register_agent(agent_cls)
 5 | 
 6 | 
 7 | def register_actor(actor_cls):
 8 |     from adept.registry import REGISTRY
 9 | 
10 |     REGISTRY.register_actor(actor_cls)
11 | 
12 | 
13 | def register_exp(exp_cls):
14 |     from adept.registry import REGISTRY
15 | 
16 |     REGISTRY.register_exp(exp_cls)
17 | 
18 | 
19 | def register_learner(learner_cls):
20 |     from adept.registry import REGISTRY
21 | 
22 |     REGISTRY.register_learner(learner_cls)
23 | 
24 | 
25 | def register_env(env_cls):
26 |     from adept.registry import REGISTRY
27 | 
28 |     REGISTRY.register_env(env_cls)
29 | 
30 | 
31 | def register_reward_norm(rwd_norm_cls):
32 |     from adept.registry import REGISTRY
33 | 
34 |     REGISTRY.register_reward_normalizer(rwd_norm_cls)
35 | 
36 | 
37 | def register_network(network_cls):
38 |     from adept.registry import REGISTRY
39 | 
40 |     REGISTRY.register_network(network_cls)
41 | 
42 | 
43 | def register_submodule(submod_cls):
44 |     from adept.registry import REGISTRY
45 | 
46 |     REGISTRY.register_submodule(submod_cls)
47 | 
48 | 
49 | def register_manager(manager_cls):
50 |     from adept.registry import REGISTRY
51 | 
52 |     REGISTRY.register_manager(manager_cls)
53 | 


--------------------------------------------------------------------------------
/adept/actor/__init__.py:
--------------------------------------------------------------------------------
 1 | from .base.actor_module import ActorModule
 2 | from .ac_rollout import ACRolloutActorTrain
 3 | from .ppo import PPOActorTrain
 4 | from adept.actor.ac_eval import ACActorEval, ACActorEvalSample
 5 | from .impala import ImpalaHostActor, ImpalaWorkerActor
 6 | 
 7 | ACTOR_REG = [
 8 |     ACRolloutActorTrain,
 9 |     ACActorEval,
10 |     ACActorEvalSample,
11 |     PPOActorTrain,
12 |     ImpalaHostActor,
13 |     ImpalaWorkerActor,
14 | ]
15 | 


--------------------------------------------------------------------------------
/adept/actor/ac_eval.py:
--------------------------------------------------------------------------------
 1 | from collections import OrderedDict
 2 | 
 3 | from adept.actor import ActorModule
 4 | from adept.actor.base.ac_helper import ACActorHelperMixin
 5 | 
 6 | 
 7 | class ACActorEval(ActorModule, ACActorHelperMixin):
 8 |     args = {}
 9 | 
10 |     @classmethod
11 |     def from_args(cls, args, action_space):
12 |         return cls(action_space)
13 | 
14 |     @staticmethod
15 |     def output_space(action_space):
16 |         head_dict = {"critic": (1,), **action_space}
17 |         return head_dict
18 | 
19 |     def compute_action_exp(self, preds, internals, obs, available_actions):
20 |         actions = OrderedDict()
21 | 
22 |         for key in self.action_keys:
23 |             logit = self.flatten_logits(preds[key])
24 | 
25 |             softmax = self.softmax(logit)
26 |             action = self.select_action(softmax)
27 | 
28 |             actions[key] = action.cpu()
29 |         return actions, {"value": preds["critic"].squeeze(-1)}
30 | 
31 |     @classmethod
32 |     def _exp_spec(
33 |         cls, rollout_len, batch_sz, obs_space, act_space, internal_space
34 |     ):
35 |         return {}
36 | 
37 | 
38 | class ACActorEvalSample(ACActorEval):
39 |     def compute_action_exp(self, preds, internals, obs, available_actions):
40 |         actions = OrderedDict()
41 | 
42 |         for key in self.action_keys:
43 |             logit = self.flatten_logits(preds[key])
44 | 
45 |             softmax = self.softmax(logit)
46 |             action = self.sample_action(softmax)
47 | 
48 |             actions[key] = action.cpu()
49 |         return actions, {"value": preds["critic"].squeeze(-1)}
50 | 


--------------------------------------------------------------------------------
/adept/actor/ac_rollout.py:
--------------------------------------------------------------------------------
 1 | # Copyright (C) 2018 Heron Systems, Inc.
 2 | #
 3 | # This program is free software: you can redistribute it and/or modify
 4 | # it under the terms of the GNU General Public License as published by
 5 | # the Free Software Foundation, either version 3 of the License, or
 6 | # (at your option) any later version.
 7 | #
 8 | # This program is distributed in the hope that it will be useful,
 9 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
10 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11 | # GNU General Public License for more details.
12 | #
13 | # You should have received a copy of the GNU General Public License
14 | # along with this program.  If not, see <http://www.gnu.org/licenses/>.
15 | from collections import OrderedDict
16 | 
17 | import torch
18 | 
19 | from adept.actor.base.ac_helper import ACActorHelperMixin
20 | from adept.actor.base.actor_module import ActorModule
21 | 
22 | 
23 | class ACRolloutActorTrain(ActorModule, ACActorHelperMixin):
24 |     args = {}
25 | 
26 |     @classmethod
27 |     def from_args(cls, args, action_space):
28 |         return cls(action_space)
29 | 
30 |     @staticmethod
31 |     def output_space(action_space):
32 |         head_dict = {"critic": (1,), **action_space}
33 |         return head_dict
34 | 
35 |     def compute_action_exp(self, preds, internals, obs, available_actions):
36 |         values = preds["critic"].squeeze(1)
37 | 
38 |         actions = OrderedDict()
39 |         log_probs = []
40 |         entropies = []
41 | 
42 |         for key in self.action_keys:
43 |             logit = self.flatten_logits(preds[key])
44 | 
45 |             log_softmax, softmax = self.log_softmax(logit), self.softmax(logit)
46 |             entropy = self.entropy(log_softmax, softmax)
47 |             action = self.sample_action(softmax)
48 | 
49 |             entropies.append(entropy)
50 |             log_probs.append(self.log_probability(log_softmax, action))
51 |             actions[key] = action.cpu()
52 | 
53 |         log_probs = torch.cat(log_probs, dim=1)
54 |         entropies = torch.cat(entropies, dim=1)
55 | 
56 |         return (
57 |             actions,
58 |             {"log_probs": log_probs, "entropies": entropies, "values": values},
59 |         )
60 | 
61 |     @classmethod
62 |     def _exp_spec(cls, exp_len, batch_sz, obs_space, act_space, internal_space):
63 |         act_key_len = len(act_space.keys())
64 | 
65 |         spec = {
66 |             "log_probs": (exp_len, batch_sz, act_key_len),
67 |             "entropies": (exp_len, batch_sz, act_key_len),
68 |             "values": (exp_len, batch_sz),
69 |         }
70 | 
71 |         return spec
72 | 


--------------------------------------------------------------------------------
/adept/actor/base/__init__.py:
--------------------------------------------------------------------------------
1 | from .actor_module import ActorModule
2 | 


--------------------------------------------------------------------------------
/adept/actor/base/ac_helper.py:
--------------------------------------------------------------------------------
 1 | import abc
 2 | import torch
 3 | from torch.nn import functional as F
 4 | 
 5 | 
 6 | class ACActorHelperMixin(metaclass=abc.ABCMeta):
 7 |     """
 8 |     A helper class for actor critic actors.
 9 |     """
10 | 
11 |     @staticmethod
12 |     def flatten_logits(logit):
13 |         """
14 |         :param logits: Tensor of arbitrary dim
15 |         :return: logits flattened to (N, X)
16 |         """
17 |         size = logit.size()
18 |         dim = logit.dim()
19 | 
20 |         if dim == 3:
21 |             n, f, l = size
22 |             logit = logit.view(n, f * l)
23 |         elif dim == 4:
24 |             n, f, h, w = size
25 |             logit = logit.view(n, f * h * w)
26 |         elif dim == 5:
27 |             n, f, d, h, w = size
28 |             logit = logit.view(n, f * d * h * w)
29 |         return logit
30 | 
31 |     @staticmethod
32 |     def softmax(logit):
33 |         """
34 |         :param logit: torch.Tensor (N, X)
35 |         :return: torch.Tensor (N, X)
36 |         """
37 |         return F.softmax(logit, dim=1)
38 | 
39 |     @staticmethod
40 |     def log_softmax(logit):
41 |         """
42 |         :param logit: torch.Tensor (N, X)
43 |         :return: torch.Tensor (N, X)
44 |         """
45 |         return F.log_softmax(logit, dim=1)
46 | 
47 |     @staticmethod
48 |     def log_probability(log_softmax, action):
49 |         """
50 |         :param log_softmax: Tensor (N, X)
51 |         :param action: LongTensor (N)
52 |         :return: Tensor (N, 1)
53 |         """
54 |         return log_softmax.gather(1, action.unsqueeze(1))
55 | 
56 |     @staticmethod
57 |     def entropy(log_softmax, softmax):
58 |         """
59 |         :param log_softmax: Tensor (N, X)
60 |         :param softmax: Tensor (N, X)
61 |         :return: Tensor (N, 1)
62 |         """
63 |         return -(log_softmax * softmax).sum(1, keepdim=True)
64 | 
65 |     @staticmethod
66 |     def sample_action(softmax):
67 |         """
68 |         Samples an action from a softmax distribution.
69 | 
70 |         :param softmax: torch.Tensor (N, X)
71 |         :return: torch.Tensor (N)
72 |         """
73 |         return softmax.multinomial(1).squeeze(1)
74 | 
75 |     @staticmethod
76 |     def select_action(softmax):
77 |         """
78 |         Selects the action with the highest probability.
79 | 
80 |         :param softmax:
81 |         :return:
82 |         """
83 |         return torch.argmax(softmax, dim=1)
84 | 


--------------------------------------------------------------------------------
/adept/actor/base/actor_module.py:
--------------------------------------------------------------------------------
  1 | # Copyright (C) 2018 Heron Systems, Inc.
  2 | #
  3 | # This program is free software: you can redistribute it and/or modify
  4 | # it under the terms of the GNU General Public License as published by
  5 | # the Free Software Foundation, either version 3 of the License, or
  6 | # (at your option) any later version.
  7 | #
  8 | # This program is distributed in the hope that it will be useful,
  9 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
 10 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 11 | # GNU General Public License for more details.
 12 | #
 13 | # You should have received a copy of the GNU General Public License
 14 | # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 15 | """
 16 | An actor observes the environment and decides actions. It also outputs extra
 17 | info necessary for model updates (learning) to occur.
 18 | """
 19 | import abc
 20 | from collections import defaultdict
 21 | 
 22 | from adept.exp.base.spec_builder import ExpSpecBuilder
 23 | from adept.utils.requires_args import RequiresArgsMixin
 24 | 
 25 | 
 26 | class ActorModule(RequiresArgsMixin, metaclass=abc.ABCMeta):
 27 |     def __init__(self, action_space):
 28 |         self._action_space = action_space
 29 | 
 30 |     @property
 31 |     def action_space(self):
 32 |         return self._action_space
 33 | 
 34 |     @property
 35 |     def action_keys(self):
 36 |         return sorted(self.action_space.keys())
 37 | 
 38 |     @staticmethod
 39 |     @abc.abstractmethod
 40 |     def output_space(action_space):
 41 |         raise NotImplementedError
 42 | 
 43 |     @classmethod
 44 |     def exp_spec_builder(cls, obs_space, act_space, internal_space, batch_sz):
 45 |         def build_fn(exp_len):
 46 |             exp_space = cls._exp_spec(
 47 |                 exp_len, batch_sz, obs_space, act_space, internal_space
 48 |             )
 49 |             env_space = {
 50 |                 "rewards": (exp_len, batch_sz),
 51 |                 "terminals": (exp_len, batch_sz),
 52 |             }
 53 |             return {**exp_space, **env_space}
 54 | 
 55 |         key_types = cls._key_types(obs_space, act_space, internal_space)
 56 |         exp_keys = cls._exp_keys(obs_space, act_space, internal_space)
 57 |         return ExpSpecBuilder(
 58 |             obs_space, act_space, internal_space, key_types, exp_keys, build_fn
 59 |         )
 60 | 
 61 |     @classmethod
 62 |     @abc.abstractmethod
 63 |     def _exp_spec(cls, exp_len, batch_sz, obs_space, act_space, internal_space):
 64 |         raise NotImplementedError
 65 | 
 66 |     @classmethod
 67 |     def _exp_keys(cls, obs_space, act_space, internal_space):
 68 |         dummy = cls._exp_spec(1, 1, obs_space, act_space, internal_space)
 69 |         return dummy.keys()
 70 | 
 71 |     @classmethod
 72 |     def _key_types(cls, obs_space, act_space, internal_space):
 73 |         return defaultdict(lambda: "float")
 74 | 
 75 |     @abc.abstractmethod
 76 |     def from_args(self, args, action_space):
 77 |         raise NotImplementedError
 78 | 
 79 |     @abc.abstractmethod
 80 |     def compute_action_exp(self, preds, internals, obs, available_actions):
 81 |         """
 82 |         B = Batch Size
 83 | 
 84 |         :param preds: Dict[str, torch.Tensor]
 85 |         :return:
 86 |             actions: Dict[ActionKey, Tensor (B)]
 87 |             experience: Dict[str, Tensor (B, X)]
 88 |         """
 89 |         raise NotImplementedError
 90 | 
 91 |     def act(self, network, obs, prev_internals):
 92 |         """
 93 |         :param obs: Dict[str, Tensor]
 94 |         :param prev_internals: previous interal states. Dict[str, Tensor]
 95 |         :return:
 96 |             actions: Dict[ActionKey, Tensor (B)]
 97 |             experience: Dict[str, Tensor (B, X)]
 98 |             internal_states: Dict[str, Tensor]
 99 |         """
100 | 
101 |         predictions, internal_states, pobs = network(obs, prev_internals)
102 | 
103 |         if "available_actions" in obs:
104 |             av_actions = obs["available_actions"]
105 |         else:
106 |             av_actions = None
107 | 
108 |         actions, exp = self.compute_action_exp(
109 |             predictions, prev_internals, pobs, av_actions
110 |         )
111 |         return actions, exp, internal_states
112 | 


--------------------------------------------------------------------------------
/adept/actor/impala.py:
--------------------------------------------------------------------------------
  1 | # Copyright (C) 2018 Heron Systems, Inc.
  2 | #
  3 | # This program is free software: you can redistribute it and/or modify
  4 | # it under the terms of the GNU General Public License as published by
  5 | # the Free Software Foundation, either version 3 of the License, or
  6 | # (at your option) any later version.
  7 | #
  8 | # This program is distributed in the hope that it will be useful,
  9 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
 10 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 11 | # GNU General Public License for more details.
 12 | #
 13 | # You should have received a copy of the GNU General Public License
 14 | # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 15 | from collections import OrderedDict, defaultdict
 16 | from functools import reduce
 17 | 
 18 | import torch
 19 | 
 20 | from adept.actor.base.ac_helper import ACActorHelperMixin
 21 | from adept.actor.base.actor_module import ActorModule
 22 | 
 23 | 
 24 | class ImpalaHostActor(ActorModule, ACActorHelperMixin):
 25 |     args = {}
 26 | 
 27 |     @classmethod
 28 |     def from_args(cls, args, action_space):
 29 |         return cls(action_space)
 30 | 
 31 |     @staticmethod
 32 |     def output_space(action_space):
 33 |         head_dict = {"critic": (1,), **action_space}
 34 |         return head_dict
 35 | 
 36 |     def compute_action_exp(self, preds, internals, obs, available_actions):
 37 |         values = preds["critic"].squeeze(1)
 38 | 
 39 |         log_softmaxes = []
 40 |         entropies = []
 41 | 
 42 |         for key in self.action_keys:
 43 |             logit = self.flatten_logits(preds[key])
 44 | 
 45 |             # print(logit) NaNs
 46 | 
 47 |             log_softmax, softmax = self.log_softmax(logit), self.softmax(logit)
 48 |             entropy = self.entropy(log_softmax, softmax)
 49 | 
 50 |             entropies.append(entropy)
 51 |             log_softmaxes.append(log_softmax)
 52 | 
 53 |         log_softmaxes = torch.stack(log_softmaxes, dim=1)
 54 |         entropies = torch.cat(entropies, dim=1)
 55 | 
 56 |         return (
 57 |             None,
 58 |             {
 59 |                 "log_softmaxes": log_softmaxes,
 60 |                 "entropies": entropies,
 61 |                 "values": values,
 62 |             },
 63 |         )
 64 | 
 65 |     @classmethod
 66 |     def _exp_spec(cls, exp_len, batch_sz, obs_space, act_space, internal_space):
 67 |         flat_act_space = 0
 68 |         for k, shape in act_space.items():
 69 |             flat_act_space += reduce(lambda a, b: a * b, shape)
 70 |         act_key_len = len(act_space.keys())
 71 | 
 72 |         obs_spec = {
 73 |             k: (exp_len + 1, batch_sz, *shape) for k, shape in obs_space.items()
 74 |         }
 75 |         action_spec = {k: (exp_len, batch_sz) for k in act_space.keys()}
 76 |         internal_spec = {
 77 |             k: (exp_len, batch_sz, *shape)
 78 |             for k, shape in internal_space.items()
 79 |         }
 80 | 
 81 |         spec = {
 82 |             "log_softmaxes": (exp_len, batch_sz, act_key_len, flat_act_space),
 83 |             "entropies": (exp_len, batch_sz, act_key_len),
 84 |             "values": (exp_len, batch_sz),
 85 |             # From Workers
 86 |             "log_probs": (exp_len, batch_sz, act_key_len),
 87 |             **obs_spec,
 88 |             **action_spec,
 89 |             **internal_spec,
 90 |         }
 91 | 
 92 |         return spec
 93 | 
 94 |     @classmethod
 95 |     def _key_types(cls, obs_space, act_space, internal_space):
 96 |         d = defaultdict(lambda: "float")
 97 |         for k in act_space.keys():
 98 |             d[k] = "long"
 99 |         # TODO this needs a better solution
100 |         for k in obs_space.keys():
101 |             d[k] = "byte"
102 |         return d
103 | 
104 | 
105 | class ImpalaWorkerActor(ActorModule, ACActorHelperMixin):
106 |     args = {}
107 | 
108 |     @classmethod
109 |     def from_args(cls, args, action_space):
110 |         return cls(action_space)
111 | 
112 |     @staticmethod
113 |     def output_space(action_space):
114 |         head_dict = {"critic": (1,), **action_space}
115 |         return head_dict
116 | 
117 |     def compute_action_exp(self, preds, internals, obs, available_actions):
118 |         log_probs = []
119 |         actions_gpu = OrderedDict()
120 |         actions_cpu = OrderedDict()
121 | 
122 |         for key in self.action_keys:
123 |             logit = self.flatten_logits(preds[key])
124 | 
125 |             log_softmax, softmax = self.log_softmax(logit), self.softmax(logit)
126 |             action = self.sample_action(softmax)
127 | 
128 |             log_probs.append(self.log_probability(log_softmax, action))
129 |             actions_gpu[key] = action
130 |             actions_cpu[key] = action.cpu()
131 | 
132 |         log_probs = torch.cat(log_probs, dim=1)
133 | 
134 |         internals = {k: torch.stack(vs) for k, vs in internals.items()}
135 | 
136 |         return actions_cpu, {"log_probs": log_probs, **actions_gpu, **internals}
137 | 
138 |     @classmethod
139 |     def _exp_spec(cls, exp_len, batch_sz, obs_space, act_space, internal_space):
140 |         act_key_len = len(act_space.keys())
141 | 
142 |         obs_spec = {
143 |             k: (exp_len + 1, batch_sz, *shape) for k, shape in obs_space.items()
144 |         }
145 |         action_spec = {k: (exp_len, batch_sz) for k in act_space.keys()}
146 |         internal_spec = {
147 |             k: (exp_len, batch_sz, *shape)
148 |             for k, shape in internal_space.items()
149 |         }
150 | 
151 |         spec = {
152 |             "log_probs": (exp_len, batch_sz, act_key_len),
153 |             **obs_spec,
154 |             **action_spec,
155 |             **internal_spec,
156 |         }
157 | 
158 |         return spec
159 | 
160 |     @classmethod
161 |     def _key_types(cls, obs_space, act_space, internal_space):
162 |         d = defaultdict(lambda: "float")
163 |         for k in act_space.keys():
164 |             d[k] = "long"
165 |         # TODO this needs a better solution
166 |         for k in obs_space.keys():
167 |             d[k] = "byte"
168 |         return d
169 | 


--------------------------------------------------------------------------------
/adept/actor/ppo.py:
--------------------------------------------------------------------------------
 1 | # Copyright (C) 2018 Heron Systems, Inc.
 2 | #
 3 | # This program is free software: you can redistribute it and/or modify
 4 | # it under the terms of the GNU General Public License as published by
 5 | # the Free Software Foundation, either version 3 of the License, or
 6 | # (at your option) any later version.
 7 | #
 8 | # This program is distributed in the hope that it will be useful,
 9 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
10 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11 | # GNU General Public License for more details.
12 | #
13 | # You should have received a copy of the GNU General Public License
14 | # along with this program.  If not, see <http://www.gnu.org/licenses/>.
15 | from collections import OrderedDict
16 | 
17 | import torch
18 | 
19 | from adept.actor.base.ac_helper import ACActorHelperMixin
20 | from adept.actor.base.actor_module import ActorModule
21 | 
22 | 
23 | class PPOActorTrain(ActorModule, ACActorHelperMixin):
24 |     args = {}
25 | 
26 |     @classmethod
27 |     def from_args(cls, args, action_space):
28 |         return cls(action_space)
29 | 
30 |     @staticmethod
31 |     def output_space(action_space):
32 |         head_dict = {'critic': (1,), **action_space}
33 |         return head_dict
34 | 
35 |     def compute_action_exp(self, preds, internals, obs, available_actions):
36 |         values = preds['critic'].squeeze(1)
37 | 
38 |         actions = OrderedDict()
39 |         actions_gpu = OrderedDict()
40 |         log_probs = []
41 | 
42 |         for key in self.action_keys:
43 |             logit = self.flatten_logits(preds[key])
44 | 
45 |             log_softmax, softmax = self.log_softmax(logit), self.softmax(logit)
46 |             action = self.sample_action(softmax)
47 | 
48 |             log_probs.append(self.log_probability(log_softmax, action))
49 |             actions_gpu[key] = action
50 |             actions[key] = action.cpu()
51 | 
52 |         log_probs = torch.cat(log_probs, dim=1)
53 |         internals = {k: torch.stack(vs) for k, vs in internals.items()}
54 | 
55 |         return actions, {
56 |             'log_probs': log_probs,
57 |             'values': values,
58 |             **internals,
59 |             **actions_gpu
60 |         }
61 | 
62 |     @classmethod
63 |     def _exp_spec(cls, exp_len, batch_sz, obs_space, act_space, internal_space):
64 |         act_key_len = len(act_space.keys())
65 |         action_spec = {k: (exp_len, batch_sz) for k in act_space.keys()}
66 |         internal_spec = {
67 |             k: (exp_len, batch_sz, *shape) for k, shape in internal_space.items()
68 |         }
69 |         obs_spec = {k: (exp_len + 1, batch_sz, *shape) for k, shape in
70 |                     obs_space.items()}
71 | 
72 |         spec = {
73 |             'log_probs': (exp_len, batch_sz, act_key_len),
74 |             'values': (exp_len, batch_sz),
75 |             **action_spec,
76 |             **obs_spec,
77 |             **internal_spec
78 |         }
79 | 
80 |         return spec
81 | 


--------------------------------------------------------------------------------
/adept/agent/__init__.py:
--------------------------------------------------------------------------------
1 | from .base.agent_module import AgentModule
2 | from .actor_critic import ActorCritic
3 | from .ppo import PPO
4 | 
5 | AGENT_REG = [
6 |     ActorCritic,
7 |     PPO
8 | ]
9 | 


--------------------------------------------------------------------------------
/adept/agent/actor_critic.py:
--------------------------------------------------------------------------------
 1 | # Copyright (C) 2018 Heron Systems, Inc.
 2 | #
 3 | # This program is free software: you can redistribute it and/or modify
 4 | # it under the terms of the GNU General Public License as published by
 5 | # the Free Software Foundation, either version 3 of the License, or
 6 | # (at your option) any later version.
 7 | #
 8 | # This program is distributed in the hope that it will be useful,
 9 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
10 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11 | # GNU General Public License for more details.
12 | #
13 | # You should have received a copy of the GNU General Public License
14 | # along with this program.  If not, see <http://www.gnu.org/licenses/>.
15 | from adept.actor import ACRolloutActorTrain
16 | from adept.exp import Rollout
17 | from adept.learner import ACRolloutLearner
18 | from .base.agent_module import AgentModule
19 | 
20 | 
21 | class ActorCritic(AgentModule):
22 |     args = {**Rollout.args, **ACRolloutActorTrain.args, **ACRolloutLearner.args}
23 | 
24 |     def __init__(
25 |         self,
26 |         reward_normalizer,
27 |         action_space,
28 |         spec_builder,
29 |         rollout_len,
30 |         discount,
31 |         normalize_advantage,
32 |         entropy_weight,
33 |         return_scale,
34 |     ):
35 |         super(ActorCritic, self).__init__(reward_normalizer, action_space)
36 |         self.discount = discount
37 |         self.normalize_advantage = normalize_advantage
38 |         self.entropy_weight = entropy_weight
39 | 
40 |         self._exp_cache = Rollout(spec_builder, rollout_len)
41 |         self._actor = ACRolloutActorTrain(action_space)
42 |         self._learner = ACRolloutLearner(
43 |             reward_normalizer,
44 |             discount,
45 |             normalize_advantage,
46 |             entropy_weight,
47 |             return_scale,
48 |         )
49 | 
50 |     @classmethod
51 |     def from_args(
52 |         cls, args, reward_normalizer, action_space, spec_builder, **kwargs
53 |     ):
54 |         return cls(
55 |             reward_normalizer,
56 |             action_space,
57 |             spec_builder,
58 |             rollout_len=args.rollout_len,
59 |             discount=args.discount,
60 |             normalize_advantage=args.normalize_advantage,
61 |             entropy_weight=args.entropy_weight,
62 |             return_scale=args.return_scale,
63 |         )
64 | 
65 |     @property
66 |     def exp_cache(self):
67 |         return self._exp_cache
68 | 
69 |     @classmethod
70 |     def _exp_spec(cls, exp_len, batch_sz, obs_space, act_space, internal_space):
71 |         return ACRolloutActorTrain._exp_spec(
72 |             exp_len, batch_sz, obs_space, act_space, internal_space
73 |         )
74 | 
75 |     @staticmethod
76 |     def output_space(action_space):
77 |         return ACRolloutActorTrain.output_space(action_space)
78 | 
79 |     def compute_action_exp(
80 |         self, predictions, internals, obs, available_actions
81 |     ):
82 |         return self._actor.compute_action_exp(
83 |             predictions, internals, obs, available_actions
84 |         )
85 | 
86 |     def learn_step(self, updater, network, next_obs, internals):
87 |         return self._learner.learn_step(
88 |             updater, network, self.exp_cache.read(), next_obs, internals
89 |         )
90 | 


--------------------------------------------------------------------------------
/adept/agent/base/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/heronsystems/adeptRL/d8554d134c1dfee6659baafd886684351c1dd982/adept/agent/base/__init__.py


--------------------------------------------------------------------------------
/adept/agent/base/agent_module.py:
--------------------------------------------------------------------------------
  1 | # Copyright (C) 2018 Heron Systems, Inc.
  2 | #
  3 | # This program is free software: you can redistribute it and/or modify
  4 | # it under the terms of the GNU General Public License as published by
  5 | # the Free Software Foundation, either version 3 of the License, or
  6 | # (at your option) any later version.
  7 | #
  8 | # This program is distributed in the hope that it will be useful,
  9 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
 10 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 11 | # GNU General Public License for more details.
 12 | #
 13 | # You should have received a copy of the GNU General Public License
 14 | # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 15 | """
 16 | An Agent interacts with the environment and accumulates experience.
 17 | """
 18 | from collections import defaultdict
 19 | 
 20 | import abc
 21 | 
 22 | from adept.exp import ExpSpecBuilder
 23 | from adept.utils.requires_args import RequiresArgsMixin
 24 | 
 25 | 
 26 | class AgentModule(RequiresArgsMixin, metaclass=abc.ABCMeta):
 27 |     """
 28 |     An Agent is an Actor (chooses actions) and a Learner (updates parameters)
 29 |     and maintains a cache to store a rollout or experience replay.
 30 | 
 31 |     Actors and Learners are treated separately for Actor-Learner architectures
 32 |     where multiple actors send their experience to a single learner.
 33 |     """
 34 | 
 35 |     def __init__(self, reward_normalizer, action_space):
 36 |         self._reward_normalizer = reward_normalizer
 37 |         self._action_space = action_space
 38 | 
 39 |     @classmethod
 40 |     @abc.abstractmethod
 41 |     def from_args(
 42 |         cls, args, reward_normalizer, action_space, spec_builder, **kwargs
 43 |     ):
 44 |         raise NotImplementedError
 45 | 
 46 |     @property
 47 |     @abc.abstractmethod
 48 |     def exp_cache(self):
 49 |         """Get experience cache"""
 50 |         raise NotImplementedError
 51 | 
 52 |     @property
 53 |     def action_space(self):
 54 |         return self._action_space
 55 | 
 56 |     @property
 57 |     def action_keys(self):
 58 |         return list(sorted(self.action_space.keys()))
 59 | 
 60 |     @classmethod
 61 |     def exp_spec_builder(cls, obs_space, act_space, internal_space, batch_sz):
 62 |         def build_fn(exp_len):
 63 |             exp_space = cls._exp_spec(
 64 |                 exp_len, batch_sz, obs_space, act_space, internal_space
 65 |             )
 66 |             env_space = {
 67 |                 "rewards": (exp_len, batch_sz),
 68 |                 "terminals": (exp_len, batch_sz),
 69 |             }
 70 |             return {**exp_space, **env_space}
 71 | 
 72 |         key_types = cls._key_types(obs_space, act_space, internal_space)
 73 |         exp_keys = cls._exp_keys(obs_space, act_space, internal_space)
 74 |         return ExpSpecBuilder(
 75 |             obs_space, act_space, internal_space, key_types, exp_keys, build_fn
 76 |         )
 77 | 
 78 |     @classmethod
 79 |     @abc.abstractmethod
 80 |     def _exp_spec(cls, exp_len, batch_sz, obs_space, act_space, internal_space):
 81 |         raise NotImplementedError
 82 | 
 83 |     @classmethod
 84 |     def _exp_keys(cls, obs_space, act_space, internal_space):
 85 |         dummy = cls._exp_spec(1, 1, obs_space, act_space, internal_space)
 86 |         return dummy.keys()
 87 | 
 88 |     @classmethod
 89 |     def _key_types(cls, obs_space, act_space, internal_space):
 90 |         return defaultdict(lambda: "float")
 91 | 
 92 |     @staticmethod
 93 |     @abc.abstractmethod
 94 |     def output_space(action_space):
 95 |         raise NotImplementedError
 96 | 
 97 |     @abc.abstractmethod
 98 |     def compute_action_exp(
 99 |         self, predictions, internals, obs, available_actions
100 |     ):
101 |         raise NotImplementedError
102 | 
103 |     @abc.abstractmethod
104 |     def learn_step(self, updater, network, next_obs, internals):
105 |         raise NotImplementedError
106 | 
107 |     def is_ready(self):
108 |         return self.exp_cache.is_ready()
109 | 
110 |     def clear(self):
111 |         self.exp_cache.clear()
112 | 
113 |     def act(self, network, obs, prev_internals):
114 |         """
115 |         :param network: NetworkModule
116 |         :param obs: Dict[str, Tensor]
117 |         :param prev_internals: previous interal states. Dict[str, Tensor]
118 |         :return:
119 |             actions: Dict[ActionKey, LongTensor (B)]
120 |             internal_states: Dict[str, Tensor]
121 |         """
122 |         predictions, internal_states, pobs = network(obs, prev_internals)
123 | 
124 |         if "available_actions" in obs:
125 |             av_actions = obs["available_actions"]
126 |         else:
127 |             av_actions = None
128 | 
129 |         actions, experience = self.compute_action_exp(
130 |             predictions, prev_internals, pobs, av_actions
131 |         )
132 |         self.exp_cache.write_actor(experience)
133 |         return actions, internal_states
134 | 
135 |     def observe(self, obs, rewards, terminals, infos):
136 |         self.exp_cache.write_env(obs, rewards, terminals, infos)
137 |         return rewards, terminals, infos
138 | 
139 |     def to(self, device):
140 |         self.exp_cache.to(device)
141 |         return self
142 | 


--------------------------------------------------------------------------------
/adept/app.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # Copyright (C) 2018 Heron Systems, Inc.
 3 | #
 4 | # This program is free software: you can redistribute it and/or modify
 5 | # it under the terms of the GNU General Public License as published by
 6 | # the Free Software Foundation, either version 3 of the License, or
 7 | # (at your option) any later version.
 8 | #
 9 | # This program is distributed in the hope that it will be useful,
10 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12 | # GNU General Public License for more details.
13 | #
14 | # You should have received a copy of the GNU General Public License
15 | # along with this program.  If not, see <http://www.gnu.org/licenses/>.
16 | """
17 |              __           __
18 |   ____ _____/ /__  ____  / /_
19 |  / __ `/ __  / _ \/ __ \/ __/
20 | / /_/ / /_/ /  __/ /_/ / /_
21 | \__,_/\__,_/\___/ .___/\__/
22 |                /_/
23 | 
24 | Usage:
25 |     adept.app <command> [<args>...]
26 |     adept.app (-h | --help)
27 |     adept.app --version
28 | 
29 | Commands:
30 |     local               Train an agent on a single GPU.
31 |     distrib             Train an agent on multiple machines and/or GPUs.
32 |     actorlearner        Train an agent on multiple machines and/or GPUs.
33 |     evaluate            Evaluate a trained agent.
34 |     render        Visualize an agent playing an Atari game.
35 |     replay_gen_sc2      Generate SC2 replay files of an agent playing SC2.
36 | 
37 | See 'adept.app help <command>' for more information on a specific command.
38 | """
39 | from docopt import docopt
40 | from adept.globals import VERSION
41 | from subprocess import call
42 | import os
43 | 
44 | 
45 | def parse_args():
46 |     args = docopt(
47 |         __doc__, version="adept version " + VERSION, options_first=True
48 |     )
49 | 
50 |     env = os.environ
51 |     argv = args["<args>"]
52 |     if args["<command>"] == "local":
53 |         exit(call(["python", "-m", "adept.scripts.local"] + argv, env=env))
54 |     elif args["<command>"] == "distrib":
55 |         exit(call(["python", "-m", "adept.scripts.distrib"] + argv, env=env))
56 |     elif args["<command>"] == "actorlearner":
57 |         exit(
58 |             call(["python", "-m", "adept.scripts.actorlearner"] + argv, env=env)
59 |         )
60 |     elif args["<command>"] == "evaluate":
61 |         exit(call(["python", "-m", "adept.scripts.evaluate"] + argv, env=env))
62 |     elif args["<command>"] == "render":
63 |         exit(call(["python", "-m", "adept.scripts.render"] + argv, env=env))
64 |     elif args["<command>"] == "replay_gen_sc2":
65 |         exit(
66 |             call(
67 |                 ["python", "-m", "adept.scripts.replay_gen_sc2"] + argv, env=env
68 |             )
69 |         )
70 |     elif args["<command>"] == "help":
71 |         if "local" in args["<args>"]:
72 |             exit(call(["python", "-m", "adept.scripts.local", "-h"]))
73 |         elif "distrib" in args["<args>"]:
74 |             exit(call(["python", "-m", "adept.scripts.distrib", "-h"]))
75 |         elif "actorlearner" in args["<args>"]:
76 |             exit(call(["python", "-m", "adept.scripts.actorlearner", "-h"]))
77 |         elif "evaluate" in args["<args>"]:
78 |             exit(call(["python", "-m", "adept.scripts.evaluate", "-h"]))
79 |         elif "render" in args["<args>"]:
80 |             exit(call(["python", "-m", "adept.scripts.render", "-h"]))
81 |         elif "replay_gen_sc2" in args["<args>"]:
82 |             exit(call(["python", "-m", "adept.scripts.replay_gen_sc2", "-h"]))
83 |     else:
84 |         exit(
85 |             "{} is not a valid command. See 'adept.app --help'.".format(
86 |                 args["<command>"]
87 |             )
88 |         )
89 | 
90 | 
91 | if __name__ == "__main__":
92 |     parse_args()
93 | 


--------------------------------------------------------------------------------
/adept/container/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (C) 2018 Heron Systems, Inc.
 2 | #
 3 | # This program is free software: you can redistribute it and/or modify
 4 | # it under the terms of the GNU General Public License as published by
 5 | # the Free Software Foundation, either version 3 of the License, or
 6 | # (at your option) any later version.
 7 | #
 8 | # This program is distributed in the hope that it will be useful,
 9 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
10 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11 | # GNU General Public License for more details.
12 | #
13 | # You should have received a copy of the GNU General Public License
14 | # along with this program.  If not, see <http://www.gnu.org/licenses/>.
15 | from .local import Local
16 | from .distrib import DistribHost, DistribWorker
17 | from .actorlearner import ActorLearnerHost, ActorLearnerWorker
18 | from .evaluation import EvalContainer
19 | from .init import Init
20 | from .base.updater import Updater
21 | 


--------------------------------------------------------------------------------
/adept/container/actorlearner/__init__.py:
--------------------------------------------------------------------------------
1 | from .learner_container import ActorLearnerHost
2 | from .rollout_worker import ActorLearnerWorker
3 | 


--------------------------------------------------------------------------------
/adept/container/actorlearner/rollout_queuer.py:
--------------------------------------------------------------------------------
  1 | # Copyright (C) 2018 Heron Systems, Inc.
  2 | #
  3 | # This program is free software: you can redistribute it and/or modify
  4 | # it under the terms of the GNU General Public License as published by
  5 | # the Free Software Foundation, either version 3 of the License, or
  6 | # (at your option) any later version.
  7 | #
  8 | # This program is distributed in the hope that it will be useful,
  9 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
 10 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 11 | # GNU General Public License for more details.
 12 | #
 13 | # You should have received a copy of the GNU General Public License
 14 | # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 15 | from time import time
 16 | 
 17 | import queue
 18 | import ray
 19 | import threading
 20 | import torch
 21 | 
 22 | 
 23 | class RolloutQueuerAsync:
 24 |     def __init__(self, workers, num_rollouts, queue_max_size, timeout=15.0):
 25 |         self.workers = workers
 26 |         self.num_rollouts = num_rollouts
 27 |         self.queue_max_size = queue_max_size
 28 |         self.queue_timeout = timeout
 29 | 
 30 |         self.futures = [w.run.remote() for w in self.workers]
 31 |         self.future_inds = [w for w in range(len(self.workers))]
 32 |         self._should_stop = True
 33 |         self.rollout_queue = queue.Queue(self.queue_max_size)
 34 |         self._worker_wait_time = 0
 35 |         self._host_wait_time = 0
 36 | 
 37 |     def _background_queing_thread(self):
 38 |         while not self._should_stop:
 39 |             ready_ids, remaining_ids = ray.wait(self.futures, num_returns=1)
 40 | 
 41 |             # if ray returns an empty list that means all objects have been gotten
 42 |             # this should never happen?
 43 |             if len(ready_ids) == 0:
 44 |                 print("WARNING: ray returned no ready rollouts")
 45 |             # otherwise rollout was returned
 46 |             else:
 47 |                 for ready in ready_ids:
 48 |                     # get object and add to queue
 49 |                     rollouts = ray.get(ready)
 50 |                     # this will block if queue is at max size
 51 |                     self._add_to_queue(rollouts)
 52 | 
 53 |                 # remove from futures
 54 |                 self._idle_workers = []
 55 |                 for ready in ready_ids:
 56 |                     index = self.futures.index(ready)
 57 |                     del self.futures[index]
 58 |                     self._idle_workers.append(self.future_inds[index])
 59 |                     del self.future_inds[index]
 60 | 
 61 |                 # tell the worker(s) to start another rollout
 62 |                 self._restart_idle_workers()
 63 | 
 64 |         # done, wait for all remaining to finish
 65 |         dones, not_dones = ray.wait(
 66 |             self.futures, len(self.futures), timeout=self.queue_timeout
 67 |         )
 68 | 
 69 |         if len(not_dones) > 0:
 70 |             print("WARNING: Not all rollout workers finished")
 71 | 
 72 |     def _add_to_queue(self, rollout):
 73 |         st = time()
 74 |         self.rollout_queue.put(rollout, timeout=self.queue_timeout)
 75 |         et = time()
 76 |         self._worker_wait_time += et - st
 77 | 
 78 |     def start(self):
 79 |         self._should_stop = False
 80 |         self.background_thread = threading.Thread(
 81 |             target=self._background_queing_thread
 82 |         )
 83 |         self.background_thread.start()
 84 | 
 85 |     def get(self):
 86 |         st = time()
 87 |         worker_data = [
 88 |             self.rollout_queue.get(True) for _ in range(self.num_rollouts)
 89 |         ]
 90 |         et = time()
 91 |         self._host_wait_time += et - st
 92 | 
 93 |         rollouts = []
 94 |         terminal_rewards = []
 95 |         terminal_infos = []
 96 |         for w in worker_data:
 97 |             r, t, i = w["rollout"], w["terminal_rewards"], w["terminal_infos"]
 98 |             rollouts.append(r)
 99 |             terminal_rewards.append(t)
100 |             terminal_infos.append(i)
101 | 
102 |         return rollouts, terminal_rewards, terminal_infos
103 | 
104 |     def close(self):
105 |         self._should_stop = True
106 | 
107 |         # try to join background thread
108 |         self.background_thread.join()
109 | 
110 |     def metrics(self):
111 |         return {
112 |             "Host wait time": self._host_wait_time,
113 |             "Worker wait time": self._worker_wait_time,
114 |         }
115 | 
116 |     def _restart_idle_workers(self):
117 |         del_inds = []
118 |         for d_ind, w_ind in enumerate(self._idle_workers):
119 |             worker = self.workers[w_ind]
120 |             self.futures.append(worker.run.remote())
121 |             self.future_inds.append(w_ind)
122 |             del_inds.append(d_ind)
123 | 
124 |         for d in del_inds:
125 |             del self._idle_workers[d]
126 | 
127 |     def __len__(self):
128 |         return len(self.rollout_queue)
129 | 


--------------------------------------------------------------------------------
/adept/container/base/__init__.py:
--------------------------------------------------------------------------------
1 | from .container import Container
2 | from .nccl_optimizer import NCCLOptimizer
3 | 


--------------------------------------------------------------------------------
/adept/container/base/container.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | 
 4 | class Container:
 5 |     @staticmethod
 6 |     def load_network(network, path):
 7 |         network.load_state_dict(
 8 |             torch.load(path, map_location=lambda storage, loc: storage)
 9 |         )
10 |         return network
11 | 
12 |     @staticmethod
13 |     def load_optim(optimizer, path):
14 |         optimizer.load_state_dict(
15 |             torch.load(path, map_location=lambda storage, loc: storage)
16 |         )
17 |         return optimizer
18 | 
19 |     @staticmethod
20 |     def init_next_save(initial_step_count, epoch_len):
21 |         next_save = 0
22 |         if initial_step_count > 0:
23 |             while next_save <= initial_step_count:
24 |                 next_save += epoch_len
25 |         return next_save
26 | 
27 |     @staticmethod
28 |     def count_parameters(net):
29 |         return sum(p.numel() for p in net.parameters() if p.requires_grad)
30 | 
31 |     @staticmethod
32 |     def write_summaries(
33 |         writer, step_count, total_loss, loss_dict, metric_dict, n_params
34 |     ):
35 |         writer.add_scalar("loss/total_loss", total_loss.item(), step_count)
36 |         for l_name, loss in loss_dict.items():
37 |             writer.add_scalar("loss/" + l_name, loss.item(), step_count)
38 |         for m_name, metric in metric_dict.items():
39 |             writer.add_scalar("metric/" + m_name, metric.item(), step_count)
40 |         for p_name, param in n_params:
41 |             p_name = p_name.replace(".", "/")
42 |             writer.add_scalar(p_name, torch.norm(param).item(), step_count)
43 |             if param.grad is not None:
44 |                 writer.add_scalar(
45 |                     p_name + ".grad", torch.norm(param.grad).item(), step_count
46 |                 )
47 | 


--------------------------------------------------------------------------------
/adept/container/base/nccl_optimizer.py:
--------------------------------------------------------------------------------
 1 | # Copyright (C) 2018 Heron Systems, Inc.
 2 | #
 3 | # This program is free software: you can redistribute it and/or modify
 4 | # it under the terms of the GNU General Public License as published by
 5 | # the Free Software Foundation, either version 3 of the License, or
 6 | # (at your option) any later version.
 7 | #
 8 | # This program is distributed in the hope that it will be useful,
 9 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
10 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11 | # GNU General Public License for more details.
12 | #
13 | # You should have received a copy of the GNU General Public License
14 | # along with this program.  If not, see <http://www.gnu.org/licenses/>.
15 | 
16 | 
17 | class NCCLOptimizer:
18 |     def __init__(self, optimizer_fn, network, world_size, param_sync_rate=1000):
19 |         self.network = network
20 |         self.optimizer = optimizer_fn(self.network.parameters())
21 |         self.param_sync_rate = param_sync_rate
22 |         self._opt_count = 0
23 |         self.process_group = None
24 |         self.world_size = world_size
25 | 
26 |     def set_process_group(self, pg):
27 |         self.process_group = pg
28 | 
29 |     def step(self):
30 |         handles = []
31 |         for param in self.network.parameters():
32 |             handles.append(self.process_group.allreduce(param.grad))
33 |         for handle in handles:
34 |             handle.wait()
35 |         for param in self.network.parameters():
36 |             param.grad.mul_(1.0 / self.world_size)
37 |         self.optimizer.step()
38 |         self._opt_count += 1
39 | 
40 |         # sync params every once in a while to reduce numerical errors
41 |         if self._opt_count % self.param_sync_rate == 0:
42 |             self.sync_parameters()
43 |             # can't just sync buffers, some are int and don't mean well
44 |             # self.sync_buffers()
45 | 
46 |     def sync_parameters(self):
47 |         for param in self.network.parameters():
48 |             self.process_group.allreduce(param.data)
49 |             param.data.mul_(1.0 / self.world_size)
50 | 
51 |     def sync_buffers(self):
52 |         for b in self.network.buffers():
53 |             self.process_group.allreduce(b.data)
54 |             b.data.mul_(1.0 / self.world_size)
55 | 
56 |     def zero_grad(self):
57 |         self.optimizer.zero_grad()
58 | 
59 |     def state_dict(self):
60 |         return self.optimizer.state_dict()
61 | 
62 |     def load_state_dict(self, d):
63 |         return self.optimizer.load_state_dict(d)
64 | 


--------------------------------------------------------------------------------
/adept/container/base/updater.py:
--------------------------------------------------------------------------------
 1 | import abc
 2 | 
 3 | 
 4 | class Updater(metaclass=abc.ABCMeta):
 5 |     def __init__(self, optimizer, network, grad_norm_clip):
 6 |         self.optimizer = optimizer
 7 |         self.network = network
 8 |         self.grad_norm_clip = grad_norm_clip
 9 | 
10 |     def step(self, loss):
11 |         raise NotImplementedError
12 | 


--------------------------------------------------------------------------------
/adept/container/evaluation_thread.py:
--------------------------------------------------------------------------------
  1 | # Copyright (C) 2018 Heron Systems, Inc.
  2 | #
  3 | # This program is free software: you can redistribute it and/or modify
  4 | # it under the terms of the GNU General Public License as published by
  5 | # the Free Software Foundation, either version 3 of the License, or
  6 | # (at your option) any later version.
  7 | #
  8 | # This program is distributed in the hope that it will be useful,
  9 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
 10 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 11 | # GNU General Public License for more details.
 12 | #
 13 | # You should have received a copy of the GNU General Public License
 14 | # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 15 | import numpy as np
 16 | from threading import Thread
 17 | from time import time, sleep
 18 | 
 19 | 
 20 | class EvaluationThread(LogsAndSummarizesRewards):
 21 |     def __init__(
 22 |         self,
 23 |         training_network,
 24 |         agent,
 25 |         env,
 26 |         nb_env,
 27 |         logger,
 28 |         summary_writer,
 29 |         step_rate_limit,
 30 |         override_step_count_fn=None,
 31 |     ):
 32 |         self._training_network = training_network
 33 |         self._agent = agent
 34 |         self._environment = env
 35 |         self._nb_env = nb_env
 36 |         self._logger = logger
 37 |         self._summary_writer = summary_writer
 38 |         self._step_rate_limit = step_rate_limit
 39 |         self._override_step_count_fn = override_step_count_fn
 40 |         self._thread = Thread(target=self._run)
 41 |         self._should_stop = False
 42 | 
 43 |     def start(self):
 44 |         self._thread.start()
 45 | 
 46 |     def stop(self):
 47 |         self._should_stop = True
 48 |         self._thread.join()
 49 | 
 50 |     def _run(self):
 51 |         next_obs = self.environment.reset()
 52 |         self.start_time = time()
 53 |         while not self._should_stop:
 54 |             if self._step_rate_limit > 0:
 55 |                 sleep(1 / self._step_rate_limit)
 56 |             obs = next_obs
 57 |             actions = self.agent.act_eval(obs)
 58 |             next_obs, rewards, terminals, infos = self.environment.step(actions)
 59 | 
 60 |             self.agent.reset_internals(terminals)
 61 |             # Perform state updates
 62 |             terminal_rewards, terminal_infos = self.update_buffers(
 63 |                 rewards, terminals, infos
 64 |             )
 65 |             self.log_episode_results(
 66 |                 terminal_rewards, terminal_infos, self.local_step_count
 67 |             )
 68 |             self.write_reward_summaries(terminal_rewards, self.local_step_count)
 69 | 
 70 |             if np.any(terminals) and np.any(infos):
 71 |                 self.network.load_state_dict(
 72 |                     self._training_network.state_dict()
 73 |                 )
 74 | 
 75 |     def log_episode_results(
 76 |         self, terminal_rewards, terminal_infos, step_count, initial_step_count=0
 77 |     ):
 78 |         if terminal_rewards:
 79 |             ep_reward = np.mean(terminal_rewards)
 80 |             self.logger.info(
 81 |                 "eval_frames: {} reward: {} avg_eval_fps: {}".format(
 82 |                     step_count,
 83 |                     ep_reward,
 84 |                     (step_count - initial_step_count)
 85 |                     / (time() - self.start_time),
 86 |                 )
 87 |             )
 88 |         return terminal_rewards
 89 | 
 90 |     @property
 91 |     def agent(self):
 92 |         return self._agent
 93 | 
 94 |     @property
 95 |     def environment(self):
 96 |         return self._environment
 97 | 
 98 |     @environment.setter
 99 |     def environment(self, new_env):
100 |         self._environment = new_env
101 | 
102 |     @property
103 |     def nb_env(self):
104 |         return self._nb_env
105 | 
106 |     @property
107 |     def logger(self):
108 |         return self._logger
109 | 
110 |     @property
111 |     def summary_writer(self):
112 |         return self._summary_writer
113 | 
114 |     @property
115 |     def summary_name(self):
116 |         return "reward/eval"
117 | 
118 |     @property
119 |     def local_step_count(self):
120 |         if self._override_step_count_fn is not None:
121 |             return self._override_step_count_fn()
122 |         else:
123 |             return self._local_step_count
124 | 
125 |     @local_step_count.setter
126 |     def local_step_count(self, step_count):
127 |         self._local_step_count = step_count
128 | 


--------------------------------------------------------------------------------
/adept/container/render.py:
--------------------------------------------------------------------------------
  1 | # Copyright (C) 2018 Heron Systems, Inc.
  2 | #
  3 | # This program is free software: you can redistribute it and/or modify
  4 | # it under the terms of the GNU General Public License as published by
  5 | # the Free Software Foundation, either version 3 of the License, or
  6 | # (at your option) any later version.
  7 | #
  8 | # This program is distributed in the hope that it will be useful,
  9 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
 10 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 11 | # GNU General Public License for more details.
 12 | #
 13 | # You should have received a copy of the GNU General Public License
 14 | # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 15 | import torch
 16 | 
 17 | from adept.network import ModularNetwork
 18 | from adept.registry import REGISTRY
 19 | from adept.utils import dtensor_to_dev, listd_to_dlist
 20 | from adept.utils.script_helpers import LogDirHelper
 21 | from adept.utils.util import DotDict
 22 | 
 23 | 
 24 | class RenderContainer:
 25 |     def __init__(
 26 |         self,
 27 |         actor,
 28 |         epoch_id,
 29 |         start,
 30 |         end,
 31 |         logger,
 32 |         log_id_dir,
 33 |         gpu_id,
 34 |         seed,
 35 |         manager,
 36 |         extra_args={},
 37 |     ):
 38 |         self.log_dir_helper = log_dir_helper = LogDirHelper(log_id_dir)
 39 |         self.train_args = train_args = log_dir_helper.load_args()
 40 |         self.train_args = DotDict({**self.train_args, **extra_args})
 41 |         self.device = device = self._device_from_gpu_id(gpu_id)
 42 |         self.logger = logger
 43 | 
 44 |         if epoch_id:
 45 |             epoch_ids = [epoch_id]
 46 |         else:
 47 |             epoch_ids = self.log_dir_helper.epochs()
 48 |             epoch_ids = filter(lambda eid: eid >= start, epoch_ids)
 49 |             if end != -1.0:
 50 |                 epoch_ids = filter(lambda eid: eid <= end, epoch_ids)
 51 |             epoch_ids = list(epoch_ids)
 52 |         self.epoch_ids = epoch_ids
 53 | 
 54 |         engine = REGISTRY.lookup_engine(train_args.env)
 55 |         env_cls = REGISTRY.lookup_env(train_args.env)
 56 |         manager_cls = REGISTRY.lookup_manager(manager)
 57 |         self.env_mgr = manager_cls.from_args(
 58 |             self.train_args, engine, env_cls, seed=seed, nb_env=1
 59 |         )
 60 |         if train_args.agent:
 61 |             agent = train_args.agent
 62 |         else:
 63 |             agent = train_args.actor_host
 64 |         output_space = REGISTRY.lookup_output_space(
 65 |             agent, self.env_mgr.action_space
 66 |         )
 67 |         actor_cls = REGISTRY.lookup_actor(actor)
 68 |         self.actor = actor_cls.from_args(
 69 |             actor_cls.prompt(), self.env_mgr.action_space
 70 |         )
 71 | 
 72 |         self.network = self._init_network(
 73 |             train_args,
 74 |             self.env_mgr.observation_space,
 75 |             self.env_mgr.gpu_preprocessor,
 76 |             output_space,
 77 |             REGISTRY,
 78 |         ).to(device)
 79 | 
 80 |     @staticmethod
 81 |     def _device_from_gpu_id(gpu_id):
 82 |         return torch.device(
 83 |             "cuda:{}".format(gpu_id)
 84 |             if (torch.cuda.is_available() and gpu_id >= 0)
 85 |             else "cpu"
 86 |         )
 87 | 
 88 |     @staticmethod
 89 |     def _init_network(
 90 |         train_args, obs_space, gpu_preprocessor, output_space, net_reg
 91 |     ):
 92 |         if train_args.custom_network:
 93 |             net_cls = net_reg.lookup_network(train_args.custom_network)
 94 |         else:
 95 |             net_cls = ModularNetwork
 96 | 
 97 |         return net_cls.from_args(
 98 |             train_args, obs_space, output_space, gpu_preprocessor, net_reg
 99 |         )
100 | 
101 |     def run(self):
102 |         for epoch_id in self.epoch_ids:
103 |             reward_buf = 0
104 |             for net_path in self.log_dir_helper.network_paths_at_epoch(
105 |                 epoch_id
106 |             ):
107 |                 self.network.load_state_dict(
108 |                     torch.load(
109 |                         net_path, map_location=lambda storage, loc: storage
110 |                     )
111 |                 )
112 |                 self.network.eval()
113 | 
114 |                 internals = listd_to_dlist(
115 |                     [self.network.new_internals(self.device)]
116 |                 )
117 |                 next_obs = dtensor_to_dev(self.env_mgr.reset(), self.device)
118 |                 self.env_mgr.render()
119 | 
120 |                 episode_complete = False
121 |                 while not episode_complete:
122 |                     obs = next_obs
123 |                     with torch.no_grad():
124 |                         actions, _, internals = self.actor.act(
125 |                             self.network, obs, internals
126 |                         )
127 |                     next_obs, rewards, terminals, infos = self.env_mgr.step(
128 |                         actions
129 |                     )
130 |                     self.env_mgr.render()
131 |                     next_obs = dtensor_to_dev(next_obs, self.device)
132 | 
133 |                     reward_buf += rewards[0]
134 | 
135 |                     if terminals[0]:
136 |                         episode_complete = True
137 | 
138 |                 print(f"EPOCH_ID: {epoch_id} REWARD: {reward_buf}")
139 | 
140 |     def close(self):
141 |         self.env_mgr.close()
142 | 


--------------------------------------------------------------------------------
/adept/env/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (C) 2018 Heron Systems, Inc.
 2 | #
 3 | # This program is free software: you can redistribute it and/or modify
 4 | # it under the terms of the GNU General Public License as published by
 5 | # the Free Software Foundation, either version 3 of the License, or
 6 | # (at your option) any later version.
 7 | #
 8 | # This program is distributed in the hope that it will be useful,
 9 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
10 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11 | # GNU General Public License for more details.
12 | #
13 | # You should have received a copy of the GNU General Public License
14 | # along with this program.  If not, see <http://www.gnu.org/licenses/>.
15 | from adept.env.openai_gym import ATARI_ENVS
16 | from .base.env_module import EnvModule
17 | from .openai_gym import AdeptGymEnv
18 | 
19 | ENV_REG = [AdeptGymEnv]
20 | 


--------------------------------------------------------------------------------
/adept/env/_spaces.py:
--------------------------------------------------------------------------------
 1 | # Copyright (C) 2018 Heron Systems, Inc.
 2 | #
 3 | # This program is free software: you can redistribute it and/or modify
 4 | # it under the terms of the GNU General Public License as published by
 5 | # the Free Software Foundation, either version 3 of the License, or
 6 | # (at your option) any later version.
 7 | #
 8 | # This program is distributed in the hope that it will be useful,
 9 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
10 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11 | # GNU General Public License for more details.
12 | #
13 | # You should have received a copy of the GNU General Public License
14 | # along with this program.  If not, see <http://www.gnu.org/licenses/>.
15 | from gym import spaces
16 | 
17 | 
18 | class Space(dict):
19 |     def __init__(self, entries_by_name):
20 |         super(Space, self).__init__(entries_by_name)
21 | 
22 |     @classmethod
23 |     def from_gym(cls, gym_space):
24 |         entries_by_name = Space._detect_gym_spaces(gym_space)
25 |         return cls(entries_by_name)
26 | 
27 |     @staticmethod
28 |     def _detect_gym_spaces(gym_space):
29 |         if isinstance(gym_space, spaces.Discrete):
30 |             return {"Discrete": (gym_space.n,)}
31 |         elif isinstance(gym_space, spaces.MultiDiscrete):
32 |             raise NotImplementedError
33 |         elif isinstance(gym_space, spaces.MultiBinary):
34 |             return {"MultiBinary": (gym_space.n,)}
35 |         elif isinstance(gym_space, spaces.Box):
36 |             return {"Box": gym_space.shape}
37 |         elif isinstance(gym_space, spaces.Dict):
38 |             return {
39 |                 name: list(Space._detect_gym_spaces(s).values())[0]
40 |                 for name, s in gym_space.spaces.items()
41 |             }
42 |         elif isinstance(gym_space, spaces.Tuple):
43 |             return {
44 |                 idx: list(Space._detect_gym_spaces(s).values())[0]
45 |                 for idx, s in enumerate(gym_space.spaces)
46 |             }
47 | 
48 |     @staticmethod
49 |     def dtypes_from_gym(gym_space):
50 |         if isinstance(gym_space, spaces.Discrete):
51 |             return {"Discrete": gym_space.dtype}
52 |         elif isinstance(gym_space, spaces.MultiDiscrete):
53 |             raise NotImplementedError
54 |         elif isinstance(gym_space, spaces.MultiBinary):
55 |             return {"MultiBinary": gym_space.dtype}
56 |         elif isinstance(gym_space, spaces.Box):
57 |             return {"Box": gym_space.dtype}
58 |         elif isinstance(gym_space, spaces.Dict):
59 |             return {
60 |                 name: list(Space.dtypes_from_gym(s).values())[0]
61 |                 for name, s in gym_space.spaces.items()
62 |             }
63 |         elif isinstance(gym_space, spaces.Tuple):
64 |             return {
65 |                 idx: list(Space.dtypes_from_gym(s).values())[0]
66 |                 for idx, s in enumerate(gym_space.spaces)
67 |             }
68 |         else:
69 |             raise NotImplementedError
70 | 


--------------------------------------------------------------------------------
/adept/env/base/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/heronsystems/adeptRL/d8554d134c1dfee6659baafd886684351c1dd982/adept/env/base/__init__.py


--------------------------------------------------------------------------------
/adept/env/base/_env.py:
--------------------------------------------------------------------------------
 1 | # Copyright (C) 2018 Heron Systems, Inc.
 2 | #
 3 | # This program is free software: you can redistribute it and/or modify
 4 | # it under the terms of the GNU General Public License as published by
 5 | # the Free Software Foundation, either version 3 of the License, or
 6 | # (at your option) any later version.
 7 | #
 8 | # This program is distributed in the hope that it will be useful,
 9 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
10 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11 | # GNU General Public License for more details.
12 | #
13 | # You should have received a copy of the GNU General Public License
14 | # along with this program.  If not, see <http://www.gnu.org/licenses/>.
15 | import abc
16 | 
17 | 
18 | class HasEnvMetaData(metaclass=abc.ABCMeta):
19 |     @property
20 |     @abc.abstractmethod
21 |     def observation_space(self):
22 |         raise NotImplementedError
23 | 
24 |     @property
25 |     @abc.abstractmethod
26 |     def action_space(self):
27 |         raise NotImplementedError
28 | 
29 |     @property
30 |     @abc.abstractmethod
31 |     def cpu_preprocessor(self):
32 |         raise NotImplementedError
33 | 
34 |     @property
35 |     @abc.abstractmethod
36 |     def gpu_preprocessor(self):
37 |         raise NotImplementedError
38 | 
39 | 
40 | class EnvBase(HasEnvMetaData, metaclass=abc.ABCMeta):
41 |     @abc.abstractmethod
42 |     def step(self, action):
43 |         """
44 |         :param action: Dict[ActionID, Any] Action dictionary
45 |         :return: Tuple[Observation, Reward, Terminal, Info]
46 |         """
47 |         raise NotImplementedError
48 | 
49 |     @abc.abstractmethod
50 |     def reset(self, **kwargs):
51 |         """
52 |         :param kwargs:
53 |         :return: Dict[ObservationID, Any] Observation dictionary
54 |         """
55 |         raise NotImplementedError
56 | 
57 |     @abc.abstractmethod
58 |     def close(self):
59 |         """
60 |         Close environment. Release resources.
61 | 
62 |         :return:
63 |         """
64 |         raise NotImplementedError
65 | 


--------------------------------------------------------------------------------
/adept/env/base/env_module.py:
--------------------------------------------------------------------------------
 1 | # Copyright (C) 2018 Heron Systems, Inc.
 2 | #
 3 | # This program is free software: you can redistribute it and/or modify
 4 | # it under the terms of the GNU General Public License as published by
 5 | # the Free Software Foundation, either version 3 of the License, or
 6 | # (at your option) any later version.
 7 | #
 8 | # This program is distributed in the hope that it will be useful,
 9 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
10 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11 | # GNU General Public License for more details.
12 | #
13 | # You should have received a copy of the GNU General Public License
14 | # along with this program.  If not, see <http://www.gnu.org/licenses/>.
15 | import abc
16 | 
17 | from adept.utils.requires_args import RequiresArgsMixin
18 | from adept.env.base._env import EnvBase
19 | 
20 | 
21 | class EnvModule(EnvBase, RequiresArgsMixin, metaclass=abc.ABCMeta):
22 |     ids = None
23 | 
24 |     """
25 |     Implement this class to add your custom environment. Don't forget to
26 |     implement args.
27 |     """
28 | 
29 |     def __init__(self, action_space, cpu_preprocessor, gpu_preprocessor):
30 |         """
31 |         :param observation_space: ._spaces.Spaces
32 |         :param action_space: ._spaces.Spaces
33 |         :param cpu_preprocessor: adept.preprocess.observation.ObsPreprocessor
34 |         :param gpu_preprocessor: adept.preprocess.observation.ObsPreprocessor
35 |         """
36 |         self._action_space = action_space
37 |         self._cpu_preprocessor = cpu_preprocessor
38 |         self._gpu_preprocessor = gpu_preprocessor
39 | 
40 |     @classmethod
41 |     @abc.abstractmethod
42 |     def from_args(cls, args, seed, **kwargs):
43 |         """
44 |         Construct from arguments. For convenience.
45 | 
46 |         :param args: Arguments object
47 |         :param seed: Integer used to seed this environment.
48 |         :param kwargs: Any custom arguments are passed through kwargs.
49 |         :return: EnvModule instance.
50 |         """
51 |         raise NotImplementedError
52 | 
53 |     @classmethod
54 |     def from_args_curry(cls, args, seed, **kwargs):
55 |         def _f():
56 |             return cls.from_args(args, seed, **kwargs)
57 | 
58 |         return _f
59 | 
60 |     @property
61 |     def observation_space(self):
62 |         return self._gpu_preprocessor.observation_space
63 | 
64 |     @property
65 |     def action_space(self):
66 |         return self._action_space
67 | 
68 |     @property
69 |     def cpu_preprocessor(self):
70 |         return self._cpu_preprocessor
71 | 
72 |     @property
73 |     def gpu_preprocessor(self):
74 |         return self._gpu_preprocessor
75 | 
76 |     @classmethod
77 |     def check_ids_implemented(cls):
78 |         if cls.ids is None:
79 |             raise NotImplementedError(
80 |                 'Subclass must define class attribute "ids"'
81 |             )
82 | 


--------------------------------------------------------------------------------
/adept/exp/__init__.py:
--------------------------------------------------------------------------------
1 | from .base import ExpModule
2 | from .base import ExpSpecBuilder
3 | from .replay import ExperienceReplay, PrioritizedExperienceReplay
4 | from .rollout import Rollout
5 | 
6 | EXP_REG = [
7 |     Rollout,  # , ExperienceReplay, PrioritizedExperienceReplay
8 | ]
9 | 


--------------------------------------------------------------------------------
/adept/exp/base/__init__.py:
--------------------------------------------------------------------------------
1 | from .spec_builder import ExpSpecBuilder
2 | from .exp_module import ExpModule
3 | 


--------------------------------------------------------------------------------
/adept/exp/base/exp_module.py:
--------------------------------------------------------------------------------
 1 | # Copyright (C) 2018 Heron Systems, Inc.
 2 | #
 3 | # This program is free software: you can redistribute it and/or modify
 4 | # it under the terms of the GNU General Public License as published by
 5 | # the Free Software Foundation, either version 3 of the License, or
 6 | # (at your option) any later version.
 7 | #
 8 | # This program is distributed in the hope that it will be useful,
 9 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
10 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11 | # GNU General Public License for more details.
12 | #
13 | # You should have received a copy of the GNU General Public License
14 | # along with this program.  If not, see <http://www.gnu.org/licenses/>.
15 | import abc
16 | 
17 | from adept.utils.requires_args import RequiresArgsMixin
18 | 
19 | 
20 | class ExpModule(RequiresArgsMixin, metaclass=abc.ABCMeta):
21 |     @abc.abstractmethod
22 |     def write_actor(self, experience):
23 |         raise NotImplementedError
24 | 
25 |     @abc.abstractmethod
26 |     def write_env(self, obs, rewards, terminals, infos):
27 |         raise NotImplementedError
28 | 
29 |     @abc.abstractmethod
30 |     def read(self):
31 |         raise NotImplementedError
32 | 
33 |     @abc.abstractmethod
34 |     def is_ready(self):
35 |         raise NotImplementedError
36 | 


--------------------------------------------------------------------------------
/adept/exp/base/spec_builder.py:
--------------------------------------------------------------------------------
 1 | # Copyright (C) 2018 Heron Systems, Inc.
 2 | #
 3 | # This program is free software: you can redistribute it and/or modify
 4 | # it under the terms of the GNU General Public License as published by
 5 | # the Free Software Foundation, either version 3 of the License, or
 6 | # (at your option) any later version.
 7 | #
 8 | # This program is distributed in the hope that it will be useful,
 9 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
10 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11 | # GNU General Public License for more details.
12 | #
13 | # You should have received a copy of the GNU General Public License
14 | # along with this program.  If not, see <http://www.gnu.org/licenses/>.
15 | 
16 | 
17 | class ExpSpecBuilder:
18 |     def __init__(
19 |         self, obs_keys, act_keys, internal_keys, key_types, exp_keys, build_fn
20 |     ):
21 |         self.obs_keys = sorted(obs_keys.keys())
22 |         self.action_keys = sorted(act_keys.keys())
23 |         self.internal_keys = sorted(internal_keys.keys())
24 |         self.key_types = key_types
25 |         self.exp_keys = sorted(exp_keys)
26 |         self.build_fn = build_fn
27 | 
28 |     def __call__(self, rollout_len):
29 |         return self.build_fn(rollout_len)
30 | 


--------------------------------------------------------------------------------
/adept/exp/replay.py:
--------------------------------------------------------------------------------
 1 | # Copyright (C) 2018 Heron Systems, Inc.
 2 | #
 3 | # This program is free software: you can redistribute it and/or modify
 4 | # it under the terms of the GNU General Public License as published by
 5 | # the Free Software Foundation, either version 3 of the License, or
 6 | # (at your option) any later version.
 7 | #
 8 | # This program is distributed in the hope that it will be useful,
 9 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
10 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11 | # GNU General Public License for more details.
12 | #
13 | # You should have received a copy of the GNU General Public License
14 | # along with this program.  If not, see <http://www.gnu.org/licenses/>.
15 | from adept.exp.base.exp_module import ExpModule
16 | 
17 | 
18 | class ExperienceReplay(ExpModule):
19 |     def write_actor(self, items):
20 |         pass
21 | 
22 |     def write_env(self, obs, rewards, terminals, infos):
23 |         pass
24 | 
25 |     def read(self):
26 |         pass
27 | 
28 |     def is_ready(self):
29 |         pass
30 | 
31 | 
32 | class PrioritizedExperienceReplay(ExpModule):
33 |     def write_actor(self, items):
34 |         pass
35 | 
36 |     def write_env(self, obs, rewards, terminals, infos):
37 |         pass
38 | 
39 |     def read(self):
40 |         pass
41 | 
42 |     def is_ready(self):
43 |         pass
44 | 


--------------------------------------------------------------------------------
/adept/globals.py:
--------------------------------------------------------------------------------
 1 | # Copyright (C) 2018 Heron Systems, Inc.
 2 | #
 3 | # This program is free software: you can redistribute it and/or modify
 4 | # it under the terms of the GNU General Public License as published by
 5 | # the Free Software Foundation, either version 3 of the License, or
 6 | # (at your option) any later version.
 7 | #
 8 | # This program is distributed in the hope that it will be useful,
 9 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
10 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11 | # GNU General Public License for more details.
12 | #
13 | # You should have received a copy of the GNU General Public License
14 | # along with this program.  If not, see <http://www.gnu.org/licenses/>.
15 | VERSION = "0.4.0dev0"
16 | 


--------------------------------------------------------------------------------
/adept/learner/__init__.py:
--------------------------------------------------------------------------------
1 | from adept.learner.base.learner_module import LearnerModule
2 | from .ac_rollout import ACRolloutLearner
3 | from .impala import ImpalaLearner
4 | 
5 | LEARNER_REG = [ACRolloutLearner, ImpalaLearner]
6 | 


--------------------------------------------------------------------------------
/adept/learner/ac_rollout.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | 
  3 | from .base.learner_module import LearnerModule
  4 | from .base.dm_return_scale import DeepMindReturnScaler
  5 | 
  6 | 
  7 | class ACRolloutLearner(LearnerModule):
  8 |     """
  9 |     Actor Critic Rollout Learner
 10 |     """
 11 | 
 12 |     args = {
 13 |         "discount": 0.99,
 14 |         "normalize_advantage": False,
 15 |         "entropy_weight": 0.01,
 16 |         "return_scale": False,
 17 |     }
 18 | 
 19 |     def __init__(
 20 |         self,
 21 |         reward_normalizer,
 22 |         discount,
 23 |         normalize_advantage,
 24 |         entropy_weight,
 25 |         return_scale,
 26 |     ):
 27 |         self.reward_normalizer = reward_normalizer
 28 |         self.discount = discount
 29 |         self.normalize_advantage = normalize_advantage
 30 |         self.entropy_weight = entropy_weight
 31 |         self.return_scale = return_scale
 32 |         if return_scale:
 33 |             self.dm_scaler = DeepMindReturnScaler(10.0 ** -3)
 34 | 
 35 |     @classmethod
 36 |     def from_args(cls, args, reward_normalizer):
 37 |         return cls(
 38 |             reward_normalizer,
 39 |             args.discount,
 40 |             args.normalize_advantage,
 41 |             args.entropy_weight,
 42 |             args.return_scale,
 43 |         )
 44 | 
 45 |     def learn_step(self, updater, network, experiences, next_obs, internals):
 46 |         # normalize rewards
 47 |         rewards = self.reward_normalizer(torch.stack(experiences.rewards))
 48 | 
 49 |         # torch stack rollouts
 50 |         r_log_probs_action = torch.stack(experiences.log_probs)
 51 |         r_values = torch.stack(experiences.values)
 52 |         r_entropies = torch.stack(experiences.entropies)
 53 | 
 54 |         # estimate value of next state
 55 |         with torch.no_grad():
 56 |             results, _, _ = network(next_obs, internals)
 57 |             last_values = results["critic"].squeeze(1).data
 58 | 
 59 |         # compute nstep return and advantage over batch
 60 |         r_tgt_returns = self.compute_returns(
 61 |             last_values, rewards, experiences.terminals
 62 |         )
 63 |         r_advantages = r_tgt_returns - r_values.data
 64 | 
 65 |         # normalize advantage so that an even number
 66 |         # of actions are reinforced and penalized
 67 |         if self.normalize_advantage:
 68 |             r_advantages = (r_advantages - r_advantages.mean()) / (
 69 |                 r_advantages.std() + 1e-5
 70 |             )
 71 | 
 72 |         # batched losses
 73 |         policy_loss = -(r_log_probs_action) * r_advantages.unsqueeze(-1)
 74 |         # mean over actions, seq, batch
 75 |         policy_loss = policy_loss.mean()
 76 |         entropy_loss = -r_entropies.mean() * self.entropy_weight
 77 |         value_loss = 0.5 * (r_tgt_returns - r_values).pow(2).mean()
 78 | 
 79 |         updater.step(value_loss + policy_loss + entropy_loss)
 80 | 
 81 |         losses = {
 82 |             "value_loss": value_loss,
 83 |             "policy_loss": policy_loss,
 84 |             "entropy_loss": entropy_loss,
 85 |         }
 86 |         metrics = {}
 87 |         return losses, metrics
 88 | 
 89 |     def compute_returns(self, bootstrap_value, rewards, terminals):
 90 |         # First step of nstep reward target is estimated value of t+1
 91 |         target_return = bootstrap_value
 92 |         rollout_len = len(rewards)
 93 |         nstep_target_returns = []
 94 |         for i in reversed(range(rollout_len)):
 95 |             reward = rewards[i]
 96 |             terminal_mask = 1.0 - terminals[i].float()
 97 | 
 98 |             if self.return_scale:
 99 |                 target_return = self.dm_scaler.calc_scale(
100 |                     reward
101 |                     + self.discount
102 |                     * self.dm_scaler.calc_inverse_scale(target_return)
103 |                     * terminal_mask
104 |                 )
105 |             else:
106 |                 target_return = reward + (
107 |                     self.discount * target_return * terminal_mask
108 |                 )
109 |             nstep_target_returns.append(target_return)
110 | 
111 |         # reverse lists
112 |         nstep_target_returns = torch.stack(
113 |             list(reversed(nstep_target_returns))
114 |         ).data
115 | 
116 |         return nstep_target_returns
117 | 


--------------------------------------------------------------------------------
/adept/learner/base/__init__.py:
--------------------------------------------------------------------------------
1 | from .learner_module import LearnerModule
2 | 


--------------------------------------------------------------------------------
/adept/learner/base/dm_return_scale.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | 
 4 | class DeepMindReturnScaler:
 5 |     """
 6 |     Scale returns as in R2D2.
 7 |     https://openreview.net/pdf?id=r1lyTjAqYX
 8 |     """
 9 | 
10 |     def __init__(self, scale):
11 |         self.scale = scale
12 | 
13 |     def calc_scale(self, x):
14 |         return (
15 |             torch.sign(x) * (torch.sqrt(torch.abs(x) + 1) - 1) + self.scale * x
16 |         )
17 | 
18 |     def calc_inverse_scale(self, x):
19 |         sign = torch.sign(x)
20 |         sqrt = torch.sqrt(1 + 4 * self.scale * (torch.abs(x) + 1 + self.scale))
21 |         return sign * ((((sqrt - 1) / (2 * self.scale)) ** 2) - 1)
22 | 


--------------------------------------------------------------------------------
/adept/learner/base/learner_module.py:
--------------------------------------------------------------------------------
 1 | # Copyright (C) 2018 Heron Systems, Inc.
 2 | #
 3 | # This program is free software: you can redistribute it and/or modify
 4 | # it under the terms of the GNU General Public License as published by
 5 | # the Free Software Foundation, either version 3 of the License, or
 6 | # (at your option) any later version.
 7 | #
 8 | # This program is distributed in the hope that it will be useful,
 9 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
10 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11 | # GNU General Public License for more details.
12 | #
13 | # You should have received a copy of the GNU General Public License
14 | # along with this program.  If not, see <http://www.gnu.org/licenses/>.
15 | """
16 | A Learner receives agent-environment interactions which are used to compute
17 | loss.
18 | """
19 | import abc
20 | from adept.utils.requires_args import RequiresArgsMixin
21 | 
22 | 
23 | class LearnerModule(RequiresArgsMixin, metaclass=abc.ABCMeta):
24 |     """
25 |     This one of the modules to use for custom Actor-Learner code.
26 |     """
27 | 
28 |     @classmethod
29 |     @abc.abstractmethod
30 |     def from_args(self, args, reward_normalizer):
31 |         raise NotImplementedError
32 | 
33 |     @abc.abstractmethod
34 |     def learn_step(self, updater, network, experiences, next_obs, internals):
35 |         raise NotImplementedError
36 | 


--------------------------------------------------------------------------------
/adept/manager/__init__.py:
--------------------------------------------------------------------------------
1 | from adept.manager.simple_env_manager import SimpleEnvManager
2 | from adept.manager.subproc_env_manager import SubProcEnvManager
3 | from adept.manager.base.manager_module import EnvManagerModule
4 | 
5 | 
6 | MANAGER_REG = [SimpleEnvManager, SubProcEnvManager]
7 | 


--------------------------------------------------------------------------------
/adept/manager/base/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/heronsystems/adeptRL/d8554d134c1dfee6659baafd886684351c1dd982/adept/manager/base/__init__.py


--------------------------------------------------------------------------------
/adept/manager/base/manager_module.py:
--------------------------------------------------------------------------------
 1 | # Copyright (C) 2018 Heron Systems, Inc.
 2 | #
 3 | # This program is free software: you can redistribute it and/or modify
 4 | # it under the terms of the GNU General Public License as published by
 5 | # the Free Software Foundation, either version 3 of the License, or
 6 | # (at your option) any later version.
 7 | #
 8 | # This program is distributed in the hope that it will be useful,
 9 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
10 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11 | # GNU General Public License for more details.
12 | #
13 | # You should have received a copy of the GNU General Public License
14 | # along with this program.  If not, see <http://www.gnu.org/licenses/>.
15 | import abc
16 | 
17 | from adept.env.base._env import EnvBase
18 | from adept.utils.requires_args import RequiresArgsMixin
19 | 
20 | 
21 | class EnvManagerModule(EnvBase, RequiresArgsMixin, metaclass=abc.ABCMeta):
22 |     def __init__(self, env_fns, engine):
23 |         self._env_fns = env_fns
24 |         self._engine = engine
25 | 
26 |     @property
27 |     def env_fns(self):
28 |         return self._env_fns
29 | 
30 |     @property
31 |     def engine(self):
32 |         return self._engine
33 | 
34 |     @property
35 |     def nb_env(self):
36 |         return len(self._env_fns)
37 | 
38 |     @classmethod
39 |     def from_args(cls, args, engine, env_cls, seed=None, nb_env=None, **kwargs):
40 |         if seed is None:
41 |             seed = int(args.seed)
42 |         if nb_env is None:
43 |             nb_env = args.nb_env
44 | 
45 |         env_fns = []
46 |         for i in range(nb_env):
47 |             env_fns.append(env_cls.from_args_curry(args, seed + i, **kwargs))
48 |         return cls(env_fns, engine)
49 | 


--------------------------------------------------------------------------------
/adept/manager/simple_env_manager.py:
--------------------------------------------------------------------------------
  1 | # Copyright (C) 2018 Heron Systems, Inc.
  2 | #
  3 | # This program is free software: you can redistribute it and/or modify
  4 | # it under the terms of the GNU General Public License as published by
  5 | # the Free Software Foundation, either version 3 of the License, or
  6 | # (at your option) any later version.
  7 | #
  8 | # This program is distributed in the hope that it will be useful,
  9 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
 10 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 11 | # GNU General Public License for more details.
 12 | #
 13 | # You should have received a copy of the GNU General Public License
 14 | # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 15 | import numpy as np
 16 | import torch
 17 | 
 18 | from adept.utils import listd_to_dlist
 19 | from adept.utils.util import dlist_to_listd
 20 | from .base.manager_module import EnvManagerModule
 21 | 
 22 | 
 23 | class SimpleEnvManager(EnvManagerModule):
 24 |     """
 25 |     Manages multiple env in the same process. This is slower than a
 26 |     SubProcEnvManager but allows debugging.
 27 |     """
 28 | 
 29 |     args = {}
 30 | 
 31 |     def __init__(self, env_fns, engine):
 32 |         super(SimpleEnvManager, self).__init__(env_fns, engine)
 33 |         self.envs = [fn() for fn in env_fns]
 34 |         env = self.envs[0]
 35 |         self._observation_space = env.observation_space
 36 |         self._action_space = env.action_space
 37 |         self._cpu_preprocessor = env.cpu_preprocessor
 38 |         self._gpu_preprocessor = env.gpu_preprocessor
 39 | 
 40 |         self.buf_obs = [None for _ in range(self.nb_env)]
 41 |         self.buf_dones = [None for _ in range(self.nb_env)]
 42 |         self.buf_rews = [None for _ in range(self.nb_env)]
 43 |         self.buf_infos = [None for _ in range(self.nb_env)]
 44 |         self.actions = None
 45 | 
 46 |     @property
 47 |     def cpu_preprocessor(self):
 48 |         return self._cpu_preprocessor
 49 | 
 50 |     @property
 51 |     def gpu_preprocessor(self):
 52 |         return self._gpu_preprocessor
 53 | 
 54 |     @property
 55 |     def observation_space(self):
 56 |         return self._observation_space
 57 | 
 58 |     @property
 59 |     def action_space(self):
 60 |         return self._action_space
 61 | 
 62 |     def step(self, actions):
 63 |         self.step_async(actions)
 64 |         return self.step_wait()
 65 | 
 66 |     def step_async(self, actions):
 67 |         self.actions = dlist_to_listd(actions)
 68 | 
 69 |     def step_wait(self):
 70 |         obs = []
 71 |         for e in range(self.nb_env):
 72 |             (
 73 |                 ob,
 74 |                 self.buf_rews[e],
 75 |                 self.buf_dones[e],
 76 |                 self.buf_infos[e],
 77 |             ) = self.envs[e].step(self.actions[e])
 78 |             if self.buf_dones[e]:
 79 |                 ob = self.envs[e].reset()
 80 |             obs.append(ob)
 81 |         obs = listd_to_dlist(obs)
 82 |         new_obs = {}
 83 |         for k, v in dummy_handle_ob(obs).items():
 84 |             if self._is_tensor_key(k):
 85 |                 new_obs[k] = torch.stack(v)
 86 |             else:
 87 |                 new_obs[k] = v
 88 |         self.buf_obs = new_obs
 89 | 
 90 |         return (
 91 |             self.buf_obs,
 92 |             torch.tensor(self.buf_rews),
 93 |             torch.tensor(self.buf_dones),
 94 |             self.buf_infos,
 95 |         )
 96 | 
 97 |     def reset(self):
 98 |         obs = []
 99 |         for e in range(self.nb_env):
100 |             ob = self.envs[e].reset()
101 |             obs.append(ob)
102 |         obs = listd_to_dlist(obs)
103 |         new_obs = {}
104 |         for k, v in dummy_handle_ob(obs).items():
105 |             if self._is_tensor_key(k):
106 |                 new_obs[k] = torch.stack(v)
107 |             else:
108 |                 new_obs[k] = v
109 |         self.buf_obs = new_obs
110 |         return self.buf_obs
111 | 
112 |     def close(self):
113 |         return [e.close() for e in self.envs]
114 | 
115 |     def render(self, mode="human"):
116 |         return [e.render(mode=mode) for e in self.envs]
117 | 
118 |     def _is_tensor_key(self, key):
119 |         return None not in self.cpu_preprocessor.observation_space[key]
120 | 
121 | 
122 | def dummy_handle_ob(ob):
123 |     new_ob = {}
124 |     for k, v in ob.items():
125 |         if isinstance(v, np.ndarray):
126 |             new_ob[k] = torch.from_numpy(v)
127 |         else:
128 |             new_ob[k] = v
129 |     return new_ob
130 | 


--------------------------------------------------------------------------------
/adept/modules/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (C) 2018 Heron Systems, Inc.
 2 | #
 3 | # This program is free software: you can redistribute it and/or modify
 4 | # it under the terms of the GNU General Public License as published by
 5 | # the Free Software Foundation, either version 3 of the License, or
 6 | # (at your option) any later version.
 7 | #
 8 | # This program is distributed in the hope that it will be useful,
 9 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
10 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11 | # GNU General Public License for more details.
12 | #
13 | # You should have received a copy of the GNU General Public License
14 | # along with this program.  If not, see <http://www.gnu.org/licenses/>.
15 | from .norm import Identity
16 | from .attention import MultiHeadSelfAttention, RMCCell
17 | from .sequence import LSTMCellLayerNorm
18 | from .spatial import Residual2DPreact
19 | 


--------------------------------------------------------------------------------
/adept/modules/mlp.py:
--------------------------------------------------------------------------------
  1 | # Copyright (C) 2018 Heron Systems, Inc.
  2 | #
  3 | # This program is free software: you can redistribute it and/or modify
  4 | # it under the terms of the GNU General Public License as published by
  5 | # the Free Software Foundation, either version 3 of the License, or
  6 | # (at your option) any later version.
  7 | #
  8 | # This program is distributed in the hope that it will be useful,
  9 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
 10 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 11 | # GNU General Public License for more details.
 12 | #
 13 | # You should have received a copy of the GNU General Public License
 14 | # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 15 | import math
 16 | 
 17 | import torch
 18 | from torch import nn
 19 | from torch.nn import Parameter, functional as F
 20 | 
 21 | 
 22 | class GaussianLinear(nn.Module):
 23 |     def __init__(self, fan_in, nodes):
 24 |         super().__init__()
 25 |         self.mu = nn.Linear(fan_in, nodes)
 26 |         # init_tflearn_fc_(self.mu)
 27 |         self.std = nn.Linear(fan_in, nodes)
 28 |         # init_tflearn_fc_(self.std)
 29 | 
 30 |     def forward(self, x):
 31 |         mu = self.mu(x)
 32 |         if self.training:
 33 |             std = self.std(x)
 34 |             std = torch.exp(0.5 * std)
 35 |             eps = torch.randn_like(std)
 36 |             return eps.mul(std).add_(mu)
 37 |         else:
 38 |             return mu
 39 | 
 40 |     def get_parameter_names(self):
 41 |         return ["Mu_W", "Mu_b", "Std_W", "Std_b"]
 42 | 
 43 | 
 44 | class NoisyLinear(nn.Linear):
 45 |     """
 46 |     Reference implementation:
 47 |     https://github.com/Kaixhin/NoisyNet-A3C/blob/master/model.py
 48 |     """
 49 | 
 50 |     def __init__(self, in_features, out_features, sigma_init=0.017, bias=True):
 51 |         super(NoisyLinear, self).__init__(in_features, out_features, bias=True)
 52 |         # µ^w and µ^b reuse self.weight and self.bias
 53 |         self.sigma_init = sigma_init
 54 |         self.sigma_weight = Parameter(
 55 |             torch.Tensor(out_features, in_features)
 56 |         )  # σ^w
 57 |         self.sigma_bias = Parameter(torch.Tensor(out_features))  # σ^b
 58 |         self.init_params()
 59 | 
 60 |     def init_params(self):
 61 |         limit = math.sqrt(3 / self.in_features)
 62 | 
 63 |         self.weight.data.uniform_(-limit, limit)
 64 |         self.bias.data.uniform_(-limit, limit)
 65 |         self.sigma_weight.data.fill_(self.sigma_init)
 66 |         self.sigma_bias.data.fill_(self.sigma_init)
 67 | 
 68 |     def forward(self, x, internals):
 69 |         if self.training:
 70 |             w = self.weight + self.sigma_weight * internals[0]
 71 |             b = self.bias + self.sigma_bias * internals[1]
 72 |         else:
 73 |             w = self.weight + self.sigma_weight
 74 |             b = self.bias + self.sigma_bias
 75 |         return F.linear(x, w, b)
 76 | 
 77 |     def batch_forward(self, x, internals, batch_size=None):
 78 |         print(
 79 |             "WARNING: calling forward multiple times is actually"
 80 |             "faster than this and takes less memory"
 81 |         )
 82 |         batch_size = batch_size if batch_size is not None else x.shape[0]
 83 |         x = x.unsqueeze(1)
 84 |         # internals come in as [[w, b], ...] reshape to [w, ...], [b, ...]
 85 |         eps_w, eps_b = zip(*internals)
 86 |         eps_w = torch.stack(eps_w)
 87 |         eps_b = torch.stack(eps_b)
 88 |         batch_w = self.weight.unsqueeze(0).expand(
 89 |             batch_size, -1, -1
 90 |         ) + self.sigma_weight.unsqueeze(0).expand(batch_size, -1, -1)
 91 |         batch_w += eps_w
 92 |         # permute to b x m x p
 93 |         batch_w = batch_w.permute(0, 2, 1)
 94 |         batch_b = self.bias.expand(batch_size, -1) + self.sigma_bias.expand(
 95 |             batch_size, -1
 96 |         )
 97 |         batch_b += eps_b
 98 | 
 99 |         bmm = torch.bmm(x, batch_w).squeeze(1)
100 | 
101 |         return bmm + batch_b
102 | 
103 |     def reset(self, gpu=False, device=None):
104 |         # sample new noise
105 |         if not gpu:
106 |             return (
107 |                 torch.randn(self.out_features, self.in_features).detach(),
108 |                 torch.randn(self.out_features).detach(),
109 |             )
110 |         else:
111 |             return (
112 |                 torch.randn(self.out_features, self.in_features)
113 |                 .cuda(device, non_blocking=True)
114 |                 .detach(),
115 |                 torch.randn(self.out_features)
116 |                 .cuda(device, non_blocking=True)
117 |                 .detach(),
118 |             )
119 | 
120 |     def get_parameter_names(self):
121 |         return ["W", "b", "sigma_W", "sigma_b"]
122 | 


--------------------------------------------------------------------------------
/adept/modules/norm.py:
--------------------------------------------------------------------------------
 1 | # Copyright (C) 2018 Heron Systems, Inc.
 2 | #
 3 | # This program is free software: you can redistribute it and/or modify
 4 | # it under the terms of the GNU General Public License as published by
 5 | # the Free Software Foundation, either version 3 of the License, or
 6 | # (at your option) any later version.
 7 | #
 8 | # This program is distributed in the hope that it will be useful,
 9 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
10 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11 | # GNU General Public License for more details.
12 | #
13 | # You should have received a copy of the GNU General Public License
14 | # along with this program.  If not, see <http://www.gnu.org/licenses/>.
15 | import torch
16 | 
17 | 
18 | class Identity(torch.nn.Module):
19 |     def forward(self, x):
20 |         return x
21 | 


--------------------------------------------------------------------------------
/adept/modules/sequence.py:
--------------------------------------------------------------------------------
 1 | # Copyright (C) 2018 Heron Systems, Inc.
 2 | #
 3 | # This program is free software: you can redistribute it and/or modify
 4 | # it under the terms of the GNU General Public License as published by
 5 | # the Free Software Foundation, either version 3 of the License, or
 6 | # (at your option) any later version.
 7 | #
 8 | # This program is distributed in the hope that it will be useful,
 9 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
10 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11 | # GNU General Public License for more details.
12 | #
13 | # You should have received a copy of the GNU General Public License
14 | # along with this program.  If not, see <http://www.gnu.org/licenses/>.
15 | import torch
16 | from torch.nn import Module, Linear, LayerNorm
17 | 
18 | 
19 | class LSTMCellLayerNorm(Module):
20 |     """
21 |     A lstm cell that layer norms the cell state
22 |     https://github.com/seba-1511/lstms.pth/blob/master/lstms/lstm.py for reference.
23 |     Original License Apache 2.0
24 |     """
25 | 
26 |     def __init__(self, input_size, hidden_size, bias=True, forget_bias=0):
27 |         super().__init__()
28 |         self.input_size = input_size
29 |         self.hidden_size = hidden_size
30 |         self.ih = Linear(input_size, 4 * hidden_size, bias=bias)
31 |         self.hh = Linear(hidden_size, 4 * hidden_size, bias=bias)
32 | 
33 |         if bias:
34 |             self.ih.bias.data.fill_(0)
35 |             self.hh.bias.data.fill_(0)
36 |             # forget bias init
37 |             self.ih.bias.data[hidden_size : hidden_size * 2].fill_(forget_bias)
38 |             self.hh.bias.data[hidden_size : hidden_size * 2].fill_(forget_bias)
39 | 
40 |         self.ln_cell = LayerNorm(hidden_size)
41 | 
42 |     def forward(self, x, hidden):
43 |         """
44 |         LSTM Cell that layer normalizes the cell state.
45 |         :param x: Tensor{B, C}
46 |         :param hidden: A Tuple[Tensor{B, C}, Tensor{B, C}] of (previous output, cell state)
47 |         :return:
48 |         """
49 |         h, c = hidden
50 | 
51 |         # Linear mappings
52 |         i2h = self.ih(x)
53 |         h2h = self.hh(h)
54 |         preact = i2h + h2h
55 | 
56 |         # activations
57 |         gates = preact[:, : 3 * self.hidden_size].sigmoid()
58 |         g_t = preact[:, 3 * self.hidden_size :].tanh()
59 |         i_t = gates[:, : self.hidden_size]
60 |         f_t = gates[:, self.hidden_size : 2 * self.hidden_size]
61 |         o_t = gates[:, -self.hidden_size :]
62 | 
63 |         # cell computations
64 |         c_t = torch.mul(c, f_t) + torch.mul(i_t, g_t)
65 |         c_t = self.ln_cell(c_t)
66 |         h_t = torch.mul(o_t, c_t.tanh())
67 | 
68 |         return h_t, c_t
69 | 


--------------------------------------------------------------------------------
/adept/modules/spatial.py:
--------------------------------------------------------------------------------
 1 | # Copyright (C) 2018 Heron Systems, Inc.
 2 | #
 3 | # This program is free software: you can redistribute it and/or modify
 4 | # it under the terms of the GNU General Public License as published by
 5 | # the Free Software Foundation, either version 3 of the License, or
 6 | # (at your option) any later version.
 7 | #
 8 | # This program is distributed in the hope that it will be useful,
 9 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
10 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11 | # GNU General Public License for more details.
12 | #
13 | # You should have received a copy of the GNU General Public License
14 | # along with this program.  If not, see <http://www.gnu.org/licenses/>.
15 | from torch import nn as nn
16 | from torch.nn import functional as F
17 | 
18 | 
19 | class Residual2DPreact(nn.Module):
20 |     def __init__(self, nb_in_chan, nb_out_chan, stride=1):
21 |         super(Residual2DPreact, self).__init__()
22 | 
23 |         self.nb_in_chan = nb_in_chan
24 |         self.nb_out_chan = nb_out_chan
25 |         self.stride = stride
26 | 
27 |         self.bn1 = nn.BatchNorm2d(nb_in_chan)
28 |         self.conv1 = nn.Conv2d(
29 |             nb_in_chan, nb_out_chan, 3, stride=stride, padding=1, bias=False
30 |         )
31 |         self.bn2 = nn.BatchNorm2d(nb_out_chan)
32 |         self.conv2 = nn.Conv2d(
33 |             nb_out_chan, nb_out_chan, 3, stride=1, padding=1, bias=False
34 |         )
35 | 
36 |         relu_gain = nn.init.calculate_gain("relu")
37 |         self.conv1.weight.data.mul_(relu_gain)
38 |         self.conv2.weight.data.mul_(relu_gain)
39 | 
40 |         self.do_projection = (
41 |             self.nb_in_chan != self.nb_out_chan or self.stride > 1
42 |         )
43 |         if self.do_projection:
44 |             self.projection = nn.Conv2d(
45 |                 nb_in_chan, nb_out_chan, 3, stride=stride, padding=1
46 |             )
47 |             self.projection.weight.data.mul_(relu_gain)
48 | 
49 |     def forward(self, x):
50 |         first = F.relu(self.bn1(x))
51 |         if self.do_projection:
52 |             projection = self.projection(first)
53 |         else:
54 |             projection = x
55 |         x = self.conv1(first)
56 |         x = self.conv2(F.relu(self.bn2(x)))
57 |         return x + projection
58 | 


--------------------------------------------------------------------------------
/adept/network/__init__.py:
--------------------------------------------------------------------------------
 1 | from .base.network_module import NetworkModule
 2 | from .modular_network import ModularNetwork
 3 | from .net1d.submodule_1d import SubModule1D
 4 | from .net2d.submodule_2d import SubModule2D
 5 | from .net3d.submodule_3d import SubModule3D
 6 | from .net4d.submodule_4d import SubModule4D
 7 | 
 8 | from .net1d.linear import Linear
 9 | from .net1d.identity_1d import Identity1D
10 | from .net1d.lstm import LSTM
11 | 
12 | from .net2d.identity_2d import Identity2D
13 | 
14 | from .net3d.identity_3d import Identity3D
15 | from .net3d.four_conv import FourConv
16 | 
17 | from .net4d.identity_4d import Identity4D
18 | 
19 | NET_REG = []
20 | SUBMOD_REG = [
21 |     Identity1D,
22 |     Linear,
23 |     LSTM,
24 |     Identity2D,
25 |     Identity3D,
26 |     FourConv,
27 |     Identity4D,
28 | ]
29 | 


--------------------------------------------------------------------------------
/adept/network/base/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/heronsystems/adeptRL/d8554d134c1dfee6659baafd886684351c1dd982/adept/network/base/__init__.py


--------------------------------------------------------------------------------
/adept/network/base/base.py:
--------------------------------------------------------------------------------
 1 | # Copyright (C) 2018 Heron Systems, Inc.
 2 | #
 3 | # This program is free software: you can redistribute it and/or modify
 4 | # it under the terms of the GNU General Public License as published by
 5 | # the Free Software Foundation, either version 3 of the License, or
 6 | # (at your option) any later version.
 7 | #
 8 | # This program is distributed in the hope that it will be useful,
 9 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
10 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11 | # GNU General Public License for more details.
12 | #
13 | # You should have received a copy of the GNU General Public License
14 | # along with this program.  If not, see <http://www.gnu.org/licenses/>.
15 | import abc
16 | 
17 | import torch
18 | from torch import distributed as dist
19 | 
20 | 
21 | class BaseNetwork(torch.nn.Module):
22 |     @classmethod
23 |     @abc.abstractmethod
24 |     def from_args(
25 |         cls, args, observation_space, output_space, gpu_preprocessor, net_reg
26 |     ):
27 |         raise NotImplementedError
28 | 
29 |     @abc.abstractmethod
30 |     def new_internals(self, device):
31 |         """
32 |         :return: Dict[InternalKey, torch.Tensor (ND)]
33 |         """
34 |         raise NotImplementedError
35 | 
36 |     @abc.abstractmethod
37 |     def forward(self, observation, internals):
38 |         raise NotImplementedError
39 | 
40 |     def internal_space(self):
41 |         return {k: t.shape for k, t in self.new_internals("cpu").items()}
42 | 
43 |     def sync(self, src, grp=None, async_op=False):
44 | 
45 |         keys = []
46 |         handles = []
47 | 
48 |         for k, t in self.state_dict().items():
49 |             if grp is None:
50 |                 h = dist.broadcast(t, src, async_op=True)
51 |             else:
52 |                 h = dist.broadcast(t, src, grp, async_op=True)
53 | 
54 |             keys.append(k)
55 |             handles.append(h)
56 | 
57 |         if not async_op:
58 |             for k, h in zip(keys, handles):
59 |                 h.wait()
60 | 
61 |         return handles
62 | 


--------------------------------------------------------------------------------
/adept/network/base/network_module.py:
--------------------------------------------------------------------------------
 1 | # Copyright (C) 2018 Heron Systems, Inc.
 2 | #
 3 | # This program is free software: you can redistribute it and/or modify
 4 | # it under the terms of the GNU General Public License as published by
 5 | # the Free Software Foundation, either version 3 of the License, or
 6 | # (at your option) any later version.
 7 | #
 8 | # This program is distributed in the hope that it will be useful,
 9 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
10 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11 | # GNU General Public License for more details.
12 | #
13 | # You should have received a copy of the GNU General Public License
14 | # along with this program.  If not, see <http://www.gnu.org/licenses/>.
15 | import abc
16 | 
17 | from adept.network.base.base import BaseNetwork
18 | from adept.utils.requires_args import RequiresArgsMixin
19 | 
20 | 
21 | class NetworkModule(BaseNetwork, RequiresArgsMixin, metaclass=abc.ABCMeta):
22 |     pass
23 | 


--------------------------------------------------------------------------------
/adept/network/net1d/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/heronsystems/adeptRL/d8554d134c1dfee6659baafd886684351c1dd982/adept/network/net1d/__init__.py


--------------------------------------------------------------------------------
/adept/network/net1d/identity_1d.py:
--------------------------------------------------------------------------------
 1 | # Copyright (C) 2018 Heron Systems, Inc.
 2 | #
 3 | # This program is free software: you can redistribute it and/or modify
 4 | # it under the terms of the GNU General Public License as published by
 5 | # the Free Software Foundation, either version 3 of the License, or
 6 | # (at your option) any later version.
 7 | #
 8 | # This program is distributed in the hope that it will be useful,
 9 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
10 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11 | # GNU General Public License for more details.
12 | #
13 | # You should have received a copy of the GNU General Public License
14 | # along with this program.  If not, see <http://www.gnu.org/licenses/>.
15 | from .submodule_1d import SubModule1D
16 | 
17 | 
18 | class Identity1D(SubModule1D):
19 |     args = {}
20 | 
21 |     def __init__(self, input_shape, id):
22 |         super().__init__(input_shape, id)
23 | 
24 |     @classmethod
25 |     def from_args(cls, args, input_shape, id):
26 |         return cls(input_shape, id)
27 | 
28 |     @property
29 |     def _output_shape(self):
30 |         return self.input_shape
31 | 
32 |     def _forward(self, input, internals, **kwargs):
33 |         return input, {}
34 | 
35 |     def _new_internals(self):
36 |         return {}
37 | 


--------------------------------------------------------------------------------
/adept/network/net1d/linear.py:
--------------------------------------------------------------------------------
 1 | # Copyright (C) 2018 Heron Systems, Inc.
 2 | #
 3 | # This program is free software: you can redistribute it and/or modify
 4 | # it under the terms of the GNU General Public License as published by
 5 | # the Free Software Foundation, either version 3 of the License, or
 6 | # (at your option) any later version.
 7 | #
 8 | # This program is distributed in the hope that it will be useful,
 9 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
10 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11 | # GNU General Public License for more details.
12 | #
13 | # You should have received a copy of the GNU General Public License
14 | # along with this program.  If not, see <http://www.gnu.org/licenses/>.
15 | from __future__ import division
16 | 
17 | from torch import nn
18 | from torch.nn import functional as F
19 | 
20 | from adept.modules import Identity
21 | 
22 | from .submodule_1d import SubModule1D
23 | 
24 | 
25 | class Linear(SubModule1D):
26 |     args = {"linear_normalize": "bn", "linear_nb_hidden": 512, "nb_layer": 3}
27 | 
28 |     def __init__(self, input_shape, id, normalize, nb_hidden, nb_layer):
29 |         super().__init__(input_shape, id)
30 |         self._nb_hidden = nb_hidden
31 | 
32 |         nb_input_channel = input_shape[0]
33 | 
34 |         bias = not normalize
35 |         self.linears = nn.ModuleList(
36 |             [
37 |                 nn.Linear(
38 |                     nb_input_channel if i == 0 else nb_hidden, nb_hidden, bias
39 |                 )
40 |                 for i in range(nb_layer)
41 |             ]
42 |         )
43 |         if normalize == "bn":
44 |             self.norms = nn.ModuleList(
45 |                 [nn.BatchNorm1d(nb_hidden) for _ in range(nb_layer)]
46 |             )
47 |         elif normalize == "gn":
48 |             if nb_hidden % 16 != 0:
49 |                 raise Exception(
50 |                     "linear_nb_hidden must be divisible by 16 for Group Norm"
51 |                 )
52 |             self.norms = nn.ModuleList(
53 |                 [
54 |                     nn.GroupNorm(nb_hidden // 16, nb_hidden)
55 |                     for _ in range(nb_layer)
56 |                 ]
57 |             )
58 |         else:
59 |             self.norms = nn.ModuleList([Identity() for _ in range(nb_layer)])
60 | 
61 |     @classmethod
62 |     def from_args(cls, args, input_shape, id):
63 |         return cls(
64 |             input_shape,
65 |             id,
66 |             args.linear_normalize,
67 |             args.linear_nb_hidden,
68 |             args.nb_layer,
69 |         )
70 | 
71 |     def _forward(self, xs, internals, **kwargs):
72 |         for linear, norm in zip(self.linears, self.norms):
73 |             xs = F.relu(norm(linear(xs)))
74 |         return xs, {}
75 | 
76 |     def _new_internals(self):
77 |         return {}
78 | 
79 |     @property
80 |     def _output_shape(self):
81 |         return (self._nb_hidden,)
82 | 


--------------------------------------------------------------------------------
/adept/network/net1d/lstm.py:
--------------------------------------------------------------------------------
 1 | # Copyright (C) 2018 Heron Systems, Inc.
 2 | #
 3 | # This program is free software: you can redistribute it and/or modify
 4 | # it under the terms of the GNU General Public License as published by
 5 | # the Free Software Foundation, either version 3 of the License, or
 6 | # (at your option) any later version.
 7 | #
 8 | # This program is distributed in the hope that it will be useful,
 9 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
10 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11 | # GNU General Public License for more details.
12 | #
13 | # You should have received a copy of the GNU General Public License
14 | # along with this program.  If not, see <http://www.gnu.org/licenses/>.
15 | import torch
16 | from torch.nn import LSTMCell
17 | 
18 | from adept.modules import LSTMCellLayerNorm
19 | from .submodule_1d import SubModule1D
20 | 
21 | 
22 | class LSTM(SubModule1D):
23 |     args = {"lstm_normalize": True, "lstm_nb_hidden": 512}
24 | 
25 |     def __init__(self, input_shape, id, normalize, nb_hidden):
26 |         super().__init__(input_shape, id)
27 |         self._nb_hidden = nb_hidden
28 | 
29 |         if normalize:
30 |             self.lstm = LSTMCellLayerNorm(input_shape[0], nb_hidden)
31 |         else:
32 |             self.lstm = LSTMCell(input_shape[0], nb_hidden)
33 |             self.lstm.bias_ih.data.fill_(0)
34 |             self.lstm.bias_hh.data.fill_(0)
35 | 
36 |     @classmethod
37 |     def from_args(cls, args, input_shape, id):
38 |         return cls(input_shape, id, args.lstm_normalize, args.lstm_nb_hidden)
39 | 
40 |     @property
41 |     def _output_shape(self):
42 |         return (self._nb_hidden,)
43 | 
44 |     def _forward(self, xs, internals, **kwargs):
45 |         hxs = self.stacked_internals("hx", internals)
46 |         cxs = self.stacked_internals("cx", internals)
47 |         hxs, cxs = self.lstm(xs, (hxs, cxs))
48 | 
49 |         return (
50 |             hxs,
51 |             {
52 |                 "hx": list(torch.unbind(hxs, dim=0)),
53 |                 "cx": list(torch.unbind(cxs, dim=0)),
54 |             },
55 |         )
56 | 
57 |     def _new_internals(self):
58 |         return {
59 |             "hx": torch.zeros(self._nb_hidden),
60 |             "cx": torch.zeros(self._nb_hidden),
61 |         }
62 | 


--------------------------------------------------------------------------------
/adept/network/net1d/submodule_1d.py:
--------------------------------------------------------------------------------
 1 | # Copyright (C) 2018 Heron Systems, Inc.
 2 | #
 3 | # This program is free software: you can redistribute it and/or modify
 4 | # it under the terms of the GNU General Public License as published by
 5 | # the Free Software Foundation, either version 3 of the License, or
 6 | # (at your option) any later version.
 7 | #
 8 | # This program is distributed in the hope that it will be useful,
 9 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
10 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11 | # GNU General Public License for more details.
12 | #
13 | # You should have received a copy of the GNU General Public License
14 | # along with this program.  If not, see <http://www.gnu.org/licenses/>.
15 | from adept.network.base.submodule import SubModule
16 | import abc
17 | 
18 | 
19 | class SubModule1D(SubModule, metaclass=abc.ABCMeta):
20 |     dim = 1
21 | 
22 |     def __init__(self, input_shape, id):
23 |         super(SubModule1D, self).__init__(input_shape, id)
24 | 
25 |     def _to_1d_shape(self):
26 |         return self._output_shape
27 | 
28 |     def _to_2d_shape(self):
29 |         return self._output_shape + (1,)
30 | 
31 |     def _to_3d_shape(self):
32 |         return self._output_shape + (1, 1)
33 | 
34 |     def _to_4d_shape(self):
35 |         return self._output_shape + (1, 1, 1)
36 | 


--------------------------------------------------------------------------------
/adept/network/net2d/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/heronsystems/adeptRL/d8554d134c1dfee6659baafd886684351c1dd982/adept/network/net2d/__init__.py


--------------------------------------------------------------------------------
/adept/network/net2d/identity_2d.py:
--------------------------------------------------------------------------------
 1 | # Copyright (C) 2018 Heron Systems, Inc.
 2 | #
 3 | # This program is free software: you can redistribute it and/or modify
 4 | # it under the terms of the GNU General Public License as published by
 5 | # the Free Software Foundation, either version 3 of the License, or
 6 | # (at your option) any later version.
 7 | #
 8 | # This program is distributed in the hope that it will be useful,
 9 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
10 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11 | # GNU General Public License for more details.
12 | #
13 | # You should have received a copy of the GNU General Public License
14 | # along with this program.  If not, see <http://www.gnu.org/licenses/>.
15 | from .submodule_2d import SubModule2D
16 | 
17 | 
18 | class Identity2D(SubModule2D):
19 |     args = {}
20 | 
21 |     def __init__(self, input_shape, id):
22 |         super().__init__(input_shape, id)
23 | 
24 |     @classmethod
25 |     def from_args(cls, args, input_shape, id):
26 |         return cls(input_shape, id)
27 | 
28 |     @property
29 |     def _output_shape(self):
30 |         return self.input_shape
31 | 
32 |     def _forward(self, input, internals, **kwargs):
33 |         return input, {}
34 | 
35 |     def _new_internals(self):
36 |         return {}
37 | 


--------------------------------------------------------------------------------
/adept/network/net2d/submodule_2d.py:
--------------------------------------------------------------------------------
 1 | # Copyright (C) 2018 Heron Systems, Inc.
 2 | #
 3 | # This program is free software: you can redistribute it and/or modify
 4 | # it under the terms of the GNU General Public License as published by
 5 | # the Free Software Foundation, either version 3 of the License, or
 6 | # (at your option) any later version.
 7 | #
 8 | # This program is distributed in the hope that it will be useful,
 9 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
10 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11 | # GNU General Public License for more details.
12 | #
13 | # You should have received a copy of the GNU General Public License
14 | # along with this program.  If not, see <http://www.gnu.org/licenses/>.
15 | from ..base.submodule import SubModule
16 | import abc
17 | import math
18 | 
19 | 
20 | class SubModule2D(SubModule, metaclass=abc.ABCMeta):
21 |     dim = 2
22 | 
23 |     def __init__(self, input_shape, id):
24 |         super(SubModule2D, self).__init__(input_shape, id)
25 | 
26 |     def _to_1d_shape(self):
27 |         f, s = self._output_shape
28 |         return (f * s,)
29 | 
30 |     def _to_2d_shape(self):
31 |         return self._output_shape
32 | 
33 |     def _to_3d_shape(self):
34 |         f, s = self._output_shape
35 |         return (f * s, 1, 1)
36 | 
37 |     def _to_4d_shape(self):
38 |         f, s = self._output_shape
39 |         return (f, s, 1, 1)
40 | 


--------------------------------------------------------------------------------
/adept/network/net3d/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/heronsystems/adeptRL/d8554d134c1dfee6659baafd886684351c1dd982/adept/network/net3d/__init__.py


--------------------------------------------------------------------------------
/adept/network/net3d/four_conv.py:
--------------------------------------------------------------------------------
 1 | # Copyright (C) 2018 Heron Systems, Inc.
 2 | #
 3 | # This program is free software: you can redistribute it and/or modify
 4 | # it under the terms of the GNU General Public License as published by
 5 | # the Free Software Foundation, either version 3 of the License, or
 6 | # (at your option) any later version.
 7 | #
 8 | # This program is distributed in the hope that it will be useful,
 9 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
10 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11 | # GNU General Public License for more details.
12 | #
13 | # You should have received a copy of the GNU General Public License
14 | # along with this program.  If not, see <http://www.gnu.org/licenses/>.
15 | from torch.nn import Conv2d, BatchNorm2d, GroupNorm, init, functional as F
16 | 
17 | from adept.modules import Identity
18 | from .submodule_3d import SubModule3D
19 | 
20 | 
21 | class FourConv(SubModule3D):
22 |     args = {"fourconv_norm": "bn"}
23 | 
24 |     def __init__(self, in_shape, id, normalize):
25 |         super().__init__(in_shape, id)
26 |         bias = not normalize
27 |         self._in_shape = in_shape
28 |         self._out_shape = None
29 |         self.conv1 = Conv2d(in_shape[0], 32, 7, stride=2, padding=1, bias=bias)
30 |         self.conv2 = Conv2d(32, 32, 3, stride=2, padding=1, bias=bias)
31 |         self.conv3 = Conv2d(32, 32, 3, stride=2, padding=1, bias=bias)
32 |         self.conv4 = Conv2d(32, 32, 3, stride=2, padding=1, bias=bias)
33 | 
34 |         if normalize == "bn":
35 |             self.bn1 = BatchNorm2d(32)
36 |             self.bn2 = BatchNorm2d(32)
37 |             self.bn3 = BatchNorm2d(32)
38 |             self.bn4 = BatchNorm2d(32)
39 |         elif normalize == "gn":
40 |             self.bn1 = GroupNorm(8, 32)
41 |             self.bn2 = GroupNorm(8, 32)
42 |             self.bn3 = GroupNorm(8, 32)
43 |             self.bn4 = GroupNorm(8, 32)
44 |         else:
45 |             self.bn1 = Identity()
46 |             self.bn2 = Identity()
47 |             self.bn3 = Identity()
48 |             self.bn4 = Identity()
49 | 
50 |         relu_gain = init.calculate_gain("relu")
51 |         self.conv1.weight.data.mul_(relu_gain)
52 |         self.conv2.weight.data.mul_(relu_gain)
53 |         self.conv3.weight.data.mul_(relu_gain)
54 |         self.conv4.weight.data.mul_(relu_gain)
55 | 
56 |     @classmethod
57 |     def from_args(cls, args, in_shape, id):
58 |         return cls(in_shape, id, args.fourconv_norm)
59 | 
60 |     @property
61 |     def _output_shape(self):
62 |         # For 84x84, (32, 5, 5)
63 |         if self._out_shape is None:
64 |             output_dim = calc_output_dim(self._in_shape[1], 7, 2, 1, 1)
65 |             output_dim = calc_output_dim(output_dim, 3, 2, 1, 1)
66 |             output_dim = calc_output_dim(output_dim, 3, 2, 1, 1)
67 |             output_dim = calc_output_dim(output_dim, 3, 2, 1, 1)
68 |             self._out_shape = 32, output_dim, output_dim
69 |         return self._out_shape
70 | 
71 |     def _forward(self, xs, internals, **kwargs):
72 |         xs = F.relu(self.bn1(self.conv1(xs)))
73 |         xs = F.relu(self.bn2(self.conv2(xs)))
74 |         xs = F.relu(self.bn3(self.conv3(xs)))
75 |         xs = F.relu(self.bn4(self.conv4(xs)))
76 |         return xs, {}
77 | 
78 |     def _new_internals(self):
79 |         return {}
80 | 
81 | 
82 | def calc_output_dim(dim_size, kernel_size, stride, padding, dilation):
83 |     numerator = dim_size + 2 * padding - dilation * (kernel_size - 1) - 1
84 |     return numerator // stride + 1
85 | 
86 | 
87 | if __name__ == "__main__":
88 |     output_dim = 84
89 |     output_dim = calc_output_dim(output_dim, 7, 2, 1, 1)
90 |     output_dim = calc_output_dim(output_dim, 3, 2, 1, 1)
91 |     output_dim = calc_output_dim(output_dim, 3, 2, 1, 1)
92 |     output_dim = calc_output_dim(output_dim, 3, 2, 1, 1)
93 |     print(output_dim)  # should be 5
94 | 


--------------------------------------------------------------------------------
/adept/network/net3d/identity_3d.py:
--------------------------------------------------------------------------------
 1 | # Copyright (C) 2018 Heron Systems, Inc.
 2 | #
 3 | # This program is free software: you can redistribute it and/or modify
 4 | # it under the terms of the GNU General Public License as published by
 5 | # the Free Software Foundation, either version 3 of the License, or
 6 | # (at your option) any later version.
 7 | #
 8 | # This program is distributed in the hope that it will be useful,
 9 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
10 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11 | # GNU General Public License for more details.
12 | #
13 | # You should have received a copy of the GNU General Public License
14 | # along with this program.  If not, see <http://www.gnu.org/licenses/>.
15 | from .submodule_3d import SubModule3D
16 | 
17 | 
18 | class Identity3D(SubModule3D):
19 |     args = {}
20 | 
21 |     def __init__(self, input_shape, id):
22 |         super().__init__(input_shape, id)
23 | 
24 |     @classmethod
25 |     def from_args(cls, args, input_shape, id):
26 |         return cls(input_shape, id)
27 | 
28 |     @property
29 |     def _output_shape(self):
30 |         return self.input_shape
31 | 
32 |     def _forward(self, input, internals, **kwargs):
33 |         return input, {}
34 | 
35 |     def _new_internals(self):
36 |         return {}
37 | 


--------------------------------------------------------------------------------
/adept/network/net3d/rmc.py:
--------------------------------------------------------------------------------
  1 | # Copyright (C) 2018 Heron Systems, Inc.
  2 | #
  3 | # This program is free software: you can redistribute it and/or modify
  4 | # it under the terms of the GNU General Public License as published by
  5 | # the Free Software Foundation, either version 3 of the License, or
  6 | # (at your option) any later version.
  7 | #
  8 | # This program is distributed in the hope that it will be useful,
  9 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
 10 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 11 | # GNU General Public License for more details.
 12 | #
 13 | # You should have received a copy of the GNU General Public License
 14 | # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 15 | import torch
 16 | from torch.nn import Conv2d, Linear, BatchNorm2d, BatchNorm1d, functional as F
 17 | 
 18 | from adept.modules import RMCCell, Identity
 19 | from ..base.network_module import NetworkModule
 20 | 
 21 | 
 22 | # TODO
 23 | class RMC(NetworkModule):
 24 |     """
 25 |     Relational Memory Core
 26 |     https://arxiv.org/pdf/1806.01822.pdf
 27 |     """
 28 | 
 29 |     def __init__(self, nb_in_chan, output_shape_dict, normalize):
 30 |         self.embedding_size = 512
 31 |         super(RMC, self).__init__(self.embedding_size, output_shape_dict)
 32 |         bias = not normalize
 33 |         self.conv1 = Conv2d(
 34 |             nb_in_chan, 32, kernel_size=3, stride=2, padding=1, bias=bias
 35 |         )
 36 |         self.conv2 = Conv2d(
 37 |             32, 32, kernel_size=3, stride=2, padding=1, bias=bias
 38 |         )
 39 |         self.conv3 = Conv2d(
 40 |             32, 32, kernel_size=3, stride=2, padding=1, bias=bias
 41 |         )
 42 |         self.attention = RMCCell(100, 100, 34)
 43 |         self.conv4 = Conv2d(
 44 |             34, 8, kernel_size=3, stride=1, padding=1, bias=bias
 45 |         )
 46 |         # BATCH x 8 x 10 x 10
 47 |         self.linear = Linear(800, 512, bias=bias)
 48 | 
 49 |         if normalize:
 50 |             self.bn1 = BatchNorm2d(32)
 51 |             self.bn2 = BatchNorm2d(32)
 52 |             self.bn3 = BatchNorm2d(32)
 53 |             self.bn4 = BatchNorm2d(8)
 54 |             self.bn_linear = BatchNorm1d(512)
 55 |         else:
 56 |             self.bn1 = Identity()
 57 |             self.bn2 = Identity()
 58 |             self.bn3 = Identity()
 59 |             self.bn_linear = Identity()
 60 | 
 61 |     def forward(self, input, prev_memories):
 62 |         """
 63 |         :param input: Tensor{B, C, H, W}
 64 |         :param prev_memories: Tuple{B}[Tensor{C}]
 65 |         :return:
 66 |         """
 67 | 
 68 |         x = F.relu(self.bn1(self.conv1(input)))
 69 |         x = F.relu(self.bn2(self.conv2(x)))
 70 |         x = F.relu(self.bn3(self.conv3(x)))
 71 | 
 72 |         h = x.size(2)
 73 |         w = x.size(3)
 74 |         xs_chan = (
 75 |             torch.linspace(-1, 1, w)
 76 |             .view(1, 1, 1, w)
 77 |             .expand(input.size(0), 1, w, w)
 78 |             .to(input.device)
 79 |         )
 80 |         ys_chan = (
 81 |             torch.linspace(-1, 1, h)
 82 |             .view(1, 1, h, 1)
 83 |             .expand(input.size(0), 1, h, h)
 84 |             .to(input.device)
 85 |         )
 86 |         x = torch.cat([x, xs_chan, ys_chan], dim=1)
 87 | 
 88 |         # need to transpose because attention expects
 89 |         # attention dim before channel dim
 90 |         x = x.view(x.size(0), x.size(1), h * w).transpose(1, 2)
 91 |         prev_memories = torch.stack(prev_memories)
 92 |         x = next_memories = self.attention(x.contiguous(), prev_memories)
 93 |         # need to undo the transpose before output
 94 |         x = x.transpose(1, 2)
 95 |         x = x.view(x.size(0), x.size(1), h, w)
 96 | 
 97 |         x = F.relu(self.bn4(self.conv4(x)))
 98 |         x = x.view(x.size(0), -1)
 99 |         x = F.relu(self.bn_linear(self.linear(x)))
100 |         return x, list(torch.unbind(next_memories, 0))
101 | 
102 |     def new_internals(self, device):
103 |         pass
104 | 


--------------------------------------------------------------------------------
/adept/network/net3d/submodule_3d.py:
--------------------------------------------------------------------------------
 1 | # Copyright (C) 2018 Heron Systems, Inc.
 2 | #
 3 | # This program is free software: you can redistribute it and/or modify
 4 | # it under the terms of the GNU General Public License as published by
 5 | # the Free Software Foundation, either version 3 of the License, or
 6 | # (at your option) any later version.
 7 | #
 8 | # This program is distributed in the hope that it will be useful,
 9 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
10 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11 | # GNU General Public License for more details.
12 | #
13 | # You should have received a copy of the GNU General Public License
14 | # along with this program.  If not, see <http://www.gnu.org/licenses/>.
15 | from ..base.submodule import SubModule
16 | import abc
17 | 
18 | 
19 | class SubModule3D(SubModule, metaclass=abc.ABCMeta):
20 |     dim = 3
21 | 
22 |     def __init__(self, input_shape, id):
23 |         super(SubModule3D, self).__init__(input_shape, id)
24 | 
25 |     def _to_1d_shape(self):
26 |         f, h, w = self._output_shape
27 |         return (f * h * w,)
28 | 
29 |     def _to_2d_shape(self):
30 |         f, h, w = self._output_shape
31 |         return (f, h * w)
32 | 
33 |     def _to_3d_shape(self):
34 |         return self._output_shape
35 | 
36 |     def _to_4d_shape(self):
37 |         f, h, w = self._output_shape
38 |         return (f, 1, h, w)
39 | 


--------------------------------------------------------------------------------
/adept/network/net4d/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/heronsystems/adeptRL/d8554d134c1dfee6659baafd886684351c1dd982/adept/network/net4d/__init__.py


--------------------------------------------------------------------------------
/adept/network/net4d/identity_4d.py:
--------------------------------------------------------------------------------
 1 | # Copyright (C) 2018 Heron Systems, Inc.
 2 | #
 3 | # This program is free software: you can redistribute it and/or modify
 4 | # it under the terms of the GNU General Public License as published by
 5 | # the Free Software Foundation, either version 3 of the License, or
 6 | # (at your option) any later version.
 7 | #
 8 | # This program is distributed in the hope that it will be useful,
 9 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
10 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11 | # GNU General Public License for more details.
12 | #
13 | # You should have received a copy of the GNU General Public License
14 | # along with this program.  If not, see <http://www.gnu.org/licenses/>.
15 | from .submodule_4d import SubModule4D
16 | 
17 | 
18 | class Identity4D(SubModule4D):
19 |     args = {}
20 | 
21 |     def __init__(self, input_shape, id):
22 |         super().__init__(input_shape, id)
23 | 
24 |     @classmethod
25 |     def from_args(cls, args, input_shape, id):
26 |         return cls(input_shape, id)
27 | 
28 |     @property
29 |     def _output_shape(self):
30 |         return self.input_shape
31 | 
32 |     def _forward(self, input, internals, **kwargs):
33 |         return input, {}
34 | 
35 |     def _new_internals(self):
36 |         return {}
37 | 


--------------------------------------------------------------------------------
/adept/network/net4d/submodule_4d.py:
--------------------------------------------------------------------------------
 1 | # Copyright (C) 2018 Heron Systems, Inc.
 2 | #
 3 | # This program is free software: you can redistribute it and/or modify
 4 | # it under the terms of the GNU General Public License as published by
 5 | # the Free Software Foundation, either version 3 of the License, or
 6 | # (at your option) any later version.
 7 | #
 8 | # This program is distributed in the hope that it will be useful,
 9 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
10 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11 | # GNU General Public License for more details.
12 | #
13 | # You should have received a copy of the GNU General Public License
14 | # along with this program.  If not, see <http://www.gnu.org/licenses/>.
15 | from ..base.submodule import SubModule
16 | import abc
17 | 
18 | 
19 | class SubModule4D(SubModule, metaclass=abc.ABCMeta):
20 |     dim = 4
21 | 
22 |     def __init__(self, input_shape, id):
23 |         super(SubModule4D, self).__init__(input_shape, id)
24 | 
25 |     def _to_1d_shape(self):
26 |         f, d, h, w = self._output_shape
27 |         return (f * d * h * w,)
28 | 
29 |     def _to_2d_shape(self):
30 |         f, d, h, w = self._output_shape
31 |         return (f, d * h * w)
32 | 
33 |     def _to_3d_shape(self):
34 |         f, d, h, w = self._output_shape
35 |         return (f * d, h, w)
36 | 
37 |     def _to_4d_shape(self):
38 |         return self._output_shape
39 | 


--------------------------------------------------------------------------------
/adept/preprocess/__init__.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Copyright (C) 2018 Heron Systems, Inc.
 3 | 
 4 | This program is free software: you can redistribute it and/or modify
 5 | it under the terms of the GNU General Public License as published by
 6 | the Free Software Foundation, either version 3 of the License, or
 7 | (at your option) any later version.
 8 | 
 9 | This program is distributed in the hope that it will be useful,
10 | but WITHOUT ANY WARRANTY; without even the implied warranty of
11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12 | GNU General Public License for more details.
13 | 
14 | You should have received a copy of the GNU General Public License
15 | along with this program.  If not, see <http://www.gnu.org/licenses/>.
16 | """
17 | from .base import Operation, SimpleOperation, MultiOperation
18 | from .base import CPUPreprocessor, GPUPreprocessor
19 | 


--------------------------------------------------------------------------------
/adept/preprocess/base/__init__.py:
--------------------------------------------------------------------------------
1 | from .ops import Operation, SimpleOperation, MultiOperation
2 | from .preprocessor import CPUPreprocessor, GPUPreprocessor


--------------------------------------------------------------------------------
/adept/preprocess/base/ops.py:
--------------------------------------------------------------------------------
 1 | import abc
 2 | 
 3 | 
 4 | class Operation(abc.ABC):
 5 |     @abc.abstractmethod
 6 |     def update_shape(self, old_shape):
 7 |         raise NotImplementedError
 8 | 
 9 |     @abc.abstractmethod
10 |     def update_dtype(self, old_dtype):
11 |         raise NotImplementedError
12 | 
13 |     def reset(self):
14 |         pass
15 | 
16 |     def to(self, device):
17 |         return self
18 | 
19 | 
20 | class MultiOperation(Operation, metaclass=abc.ABCMeta):
21 |     """Modofies multiple keys of an observation dictionary."""
22 | 
23 |     def __init__(self, input_fields, output_fields):
24 |         self.input_fields = input_fields
25 |         self.output_fields = output_fields
26 | 
27 |     @abc.abstractmethod
28 |     def preprocess_cpu(self, tensors):
29 |         """Preprocess multiple observation fields on the CPU.
30 | 
31 |         Parameters
32 |         ----------
33 |         tensors : list[torch.Tensor]
34 | 
35 |         Returns
36 |         -------
37 |         list[torch.Tensor]
38 |         """
39 |         raise NotImplemented
40 | 
41 |     @abc.abstractmethod
42 |     def preprocess_gpu(self, tensors):
43 |         """Preprocess multiple observation fields on the GPU.
44 | 
45 |         Parameters
46 |         ----------
47 |         tensors : list[torch.Tensor]
48 | 
49 |         Returns
50 |         -------
51 |         list[torch.Tensor]
52 |         """
53 |         raise NotImplemented
54 | 
55 | 
56 | class SimpleOperation(Operation, metaclass=abc.ABCMeta):
57 |     """Modifies a single key in the observation dictionary."""
58 | 
59 |     def __init__(self, input_field, output_field):
60 |         self.input_field = input_field
61 |         self.output_field = output_field
62 | 
63 |     @abc.abstractmethod
64 |     def preprocess_cpu(self, tensor):
65 |         """Preprocess a specific field of an observation on the CPU.
66 | 
67 |         Parameters
68 |         ----------
69 |         tensor : torch.Tensor
70 | 
71 |         Returns
72 |         -------
73 |         torch.Tensor
74 |         """
75 |         raise NotImplemented
76 | 
77 |     @abc.abstractmethod
78 |     def preprocess_gpu(self, tensor):
79 |         """Preprocess a specific field of an observation on the GPU
80 | 
81 |         Parameters
82 |         ----------
83 |         tensor : torch.Tensor
84 | 
85 |         Returns
86 |         -------
87 |         torch.Tensor
88 |         """
89 |         raise NotImplemented
90 | 


--------------------------------------------------------------------------------
/adept/preprocess/base/preprocessor.py:
--------------------------------------------------------------------------------
  1 | # Copyright (C) 2018 Heron Systems, Inc.
  2 | #
  3 | # This program is free software: you can redistribute it and/or modify
  4 | # it under the terms of the GNU General Public License as published by
  5 | # the Free Software Foundation, either version 3 of the License, or
  6 | # (at your option) any later version.
  7 | #
  8 | # This program is distributed in the hope that it will be useful,
  9 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
 10 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 11 | # GNU General Public License for more details.
 12 | #
 13 | # You should have received a copy of the GNU General Public License
 14 | # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 15 | from copy import copy, deepcopy
 16 | from adept.preprocess.base import MultiOperation, SimpleOperation
 17 | 
 18 | 
 19 | class _Preprocessor:
 20 |     def __init__(self, ops, observation_space, observation_dtypes=None):
 21 |         """
 22 |         Parameters
 23 |         ----------
 24 |         ops : list[gamebreaker.preprocess.Operation]
 25 |         observation_space : dict[str, Shape]
 26 |         observation_dtypes : dict[str, dtype]
 27 |         """
 28 |         cur_space = deepcopy(observation_space)
 29 |         cur_dtypes = deepcopy(observation_dtypes)
 30 | 
 31 |         self.ops = ops
 32 |         self.observation_space, self.observation_dtypes = self._update(
 33 |             cur_space, cur_dtypes
 34 |         )
 35 | 
 36 |     def _update(self, cur_space, cur_dtypes):
 37 |         cur_space = copy(cur_space)
 38 |         cur_dtypes = copy(cur_dtypes)
 39 |         for op in self.ops:
 40 |             if isinstance(op, SimpleOperation):
 41 |                 output_shape = op.update_shape(
 42 |                     cur_space[op.input_field]
 43 |                 )
 44 |                 if output_shape:
 45 |                     cur_space[op.output_field] = output_shape
 46 |                 else:
 47 |                     del cur_space[op.output_field]
 48 |                 if cur_dtypes:
 49 |                     output_dtype = op.update_dtype(
 50 |                         cur_dtypes[op.input_field]
 51 |                     )
 52 |                     if output_dtype:
 53 |                         cur_dtypes[op.output_field] = output_dtype
 54 |                     else:
 55 |                         del cur_dtypes[op.output_field]
 56 |             elif isinstance(op, MultiOperation):
 57 |                 input_shapes = [cur_space[k] for k in op.input_fields]
 58 |                 input_dtypes = [cur_dtypes[k] for k in op.input_fields]
 59 |                 output_shapes = op.update_shape(input_shapes)
 60 |                 output_dtypes = op.update_dtype(input_dtypes)
 61 |                 for k, shape, dtype in zip(
 62 |                     op.output_fields, output_shapes, output_dtypes
 63 |                 ):
 64 |                     if shape:
 65 |                         cur_space[k] = shape
 66 |                     else:
 67 |                         del cur_space[k]
 68 |                     if cur_dtypes:
 69 |                         if dtype:
 70 |                             cur_dtypes[k] = dtype
 71 |                         else:
 72 |                             del cur_dtypes[k]
 73 |         return cur_space, cur_dtypes
 74 | 
 75 | 
 76 | class CPUPreprocessor(_Preprocessor):
 77 |     def __call__(self, obs):
 78 |         obs = copy(obs)
 79 |         for op in self.ops:
 80 |             if isinstance(op, SimpleOperation):
 81 |                 output_tensor = op.preprocess_cpu(obs[op.input_field])
 82 |                 if output_tensor is not None:
 83 |                     obs[op.output_field] = output_tensor
 84 |                 else:
 85 |                     del obs[op.output_field]
 86 |             elif isinstance(op, MultiOperation):
 87 |                 input_tensors = [obs[k] for k in op.input_fields]
 88 |                 output_tensors = op.preprocess_cpu(input_tensors)
 89 |                 for k, tensor in zip(op.output_fields, output_tensors):
 90 |                     if tensor is not None:
 91 |                         obs[k] = tensor
 92 |                     else:
 93 |                         del obs[k]
 94 |         return obs
 95 | 
 96 |     def reset(self):
 97 |         for o in self.ops:
 98 |             o.reset()
 99 | 
100 | 
101 | class GPUPreprocessor(_Preprocessor):
102 |     def __call__(self, obs):
103 |         obs = copy(obs)
104 |         for op in self.ops:
105 |             if isinstance(op, SimpleOperation):
106 |                 output_tensor = op.preprocess_gpu(obs[op.input_field])
107 |                 if output_tensor is not None:
108 |                     obs[op.output_field] = output_tensor
109 |                 else:
110 |                     del obs[op.output_field]
111 |             elif isinstance(op, MultiOperation):
112 |                 input_tensors = [obs[k] for k in op.input_fields]
113 |                 output_tensors = op.preprocess_gpu(input_tensors)
114 |                 for k, tensor in zip(op.output_fields, output_tensors):
115 |                     if tensor is not None:
116 |                         obs[k] = tensor
117 |                     else:
118 |                         del obs[k]
119 |         return obs
120 | 
121 |     def to(self, device):
122 |         self.ops = [op.to(device) for op in self.ops]
123 |         return self
124 | 


--------------------------------------------------------------------------------
/adept/registry/__init__.py:
--------------------------------------------------------------------------------
1 | from .registry import Registry
2 | 
3 | REGISTRY = Registry()
4 | 


--------------------------------------------------------------------------------
/adept/rewardnorm/__init__.py:
--------------------------------------------------------------------------------
1 | from .base import RewardNormModule
2 | from .normalizers import Scale, Clip, Identity
3 | 
4 | REWARD_NORM_REG = [Scale, Clip, Identity]
5 | 


--------------------------------------------------------------------------------
/adept/rewardnorm/base/__init__.py:
--------------------------------------------------------------------------------
1 | from .rewnorm_module import RewardNormModule
2 | 


--------------------------------------------------------------------------------
/adept/rewardnorm/base/rewnorm_module.py:
--------------------------------------------------------------------------------
 1 | import abc
 2 | 
 3 | from adept.utils.requires_args import RequiresArgsMixin
 4 | 
 5 | 
 6 | class RewardNormModule(RequiresArgsMixin, metaclass=abc.ABCMeta):
 7 |     def __call__(self, reward):
 8 |         """
 9 |         Normalizes a reward tensor.
10 | 
11 |         :param reward: torch.Tensor (1D)
12 |         :return:
13 |         """
14 |         raise NotImplementedError
15 | 


--------------------------------------------------------------------------------
/adept/rewardnorm/normalizers.py:
--------------------------------------------------------------------------------
 1 | # Copyright (C) 2018 Heron Systems, Inc.
 2 | #
 3 | # This program is free software: you can redistribute it and/or modify
 4 | # it under the terms of the GNU General Public License as published by
 5 | # the Free Software Foundation, either version 3 of the License, or
 6 | # (at your option) any later version.
 7 | #
 8 | # This program is distributed in the hope that it will be useful,
 9 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
10 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11 | # GNU General Public License for more details.
12 | #
13 | # You should have received a copy of the GNU General Public License
14 | # along with this program.  If not, see <http://www.gnu.org/licenses/>.
15 | import torch
16 | from .base import RewardNormModule
17 | 
18 | 
19 | class Clip(RewardNormModule):
20 |     args = {"floor": -1, "ceil": 1}
21 | 
22 |     def __init__(self, floor, ceil):
23 |         self.floor = floor
24 |         self.ceil = ceil
25 | 
26 |     @classmethod
27 |     def from_args(cls, args):
28 |         return cls(args.floor, args.ceil)
29 | 
30 |     def __call__(self, reward):
31 |         return torch.clamp(reward, self.floor, self.ceil)
32 | 
33 | 
34 | class Scale(RewardNormModule):
35 |     args = {"coefficient": 0.1}
36 | 
37 |     def __init__(self, coefficient):
38 |         self.coefficient = coefficient
39 | 
40 |     @classmethod
41 |     def from_args(cls, args):
42 |         return cls(args.coefficient)
43 | 
44 |     def __call__(self, reward):
45 |         return self.coefficient * reward
46 | 
47 | 
48 | class Identity(RewardNormModule):
49 |     args = {}
50 | 
51 |     @classmethod
52 |     def from_args(cls, args):
53 |         return cls()
54 | 
55 |     def __call__(self, item):
56 |         return item
57 | 


--------------------------------------------------------------------------------
/adept/scripts/__init__.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Copyright (C) 2018 Heron Systems, Inc.
 3 | 
 4 | This program is free software: you can redistribute it and/or modify
 5 | it under the terms of the GNU General Public License as published by
 6 | the Free Software Foundation, either version 3 of the License, or
 7 | (at your option) any later version.
 8 | 
 9 | This program is distributed in the hope that it will be useful,
10 | but WITHOUT ANY WARRANTY; without even the implied warranty of
11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12 | GNU General Public License for more details.
13 | 
14 | You should have received a copy of the GNU General Public License
15 | along with this program.  If not, see <http://www.gnu.org/licenses/>.
16 | """
17 | 


--------------------------------------------------------------------------------
/adept/scripts/_distrib.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # Copyright (C) 2018 Heron Systems, Inc.
  3 | #
  4 | # This program is free software: you can redistribute it and/or modify
  5 | # it under the terms of the GNU General Public License as published by
  6 | # the Free Software Foundation, either version 3 of the License, or
  7 | # (at your option) any later version.
  8 | #
  9 | # This program is distributed in the hope that it will be useful,
 10 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
 11 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 12 | # GNU General Public License for more details.
 13 | #
 14 | # You should have received a copy of the GNU General Public License
 15 | # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 16 | """
 17 | Distributed worker script. Called from launcher (distrib.py).
 18 | """
 19 | import argparse
 20 | import json
 21 | import os
 22 | import torch.distributed as dist
 23 | 
 24 | from adept.container import Init, DistribHost, DistribWorker
 25 | from adept.registry import REGISTRY as R
 26 | from adept.utils.script_helpers import LogDirHelper
 27 | from adept.utils.util import DotDict
 28 | 
 29 | MODE = "Distrib"
 30 | WORLD_SIZE = int(os.environ["WORLD_SIZE"])
 31 | GLOBAL_RANK = int(os.environ["RANK"])
 32 | LOCAL_RANK = int(os.environ["LOCAL_RANK"])
 33 | 
 34 | 
 35 | def str2bool(v):
 36 |     if v.lower() in ("yes", "true", "t", "y", "1"):
 37 |         return True
 38 |     elif v.lower() in ("no", "false", "f", "n", "0"):
 39 |         return False
 40 |     else:
 41 |         raise argparse.ArgumentTypeError("Boolean value expected.")
 42 | 
 43 | 
 44 | def parse_args():
 45 |     parser = argparse.ArgumentParser()
 46 |     parser.add_argument("--log-id-dir", required=True)
 47 |     parser.add_argument(
 48 |         "--resume", type=str2bool, nargs="?", const=True, default=False
 49 |     )
 50 |     parser.add_argument("--load-network", default=None)
 51 |     parser.add_argument("--load-optim", default=None)
 52 |     parser.add_argument("--initial-step-count", type=int, default=0)
 53 |     parser.add_argument("--init-method", default=None)
 54 |     parser.add_argument("--custom-network", default=None)
 55 |     args = parser.parse_args()
 56 |     return args
 57 | 
 58 | 
 59 | def main(local_args):
 60 |     """
 61 |     Run distributed training.
 62 | 
 63 |     :param local_args: Dict[str, Any]
 64 |     :return:
 65 |     """
 66 |     log_id_dir = local_args.log_id_dir
 67 |     initial_step_count = local_args.initial_step_count
 68 | 
 69 |     R.load_extern_classes(log_id_dir)
 70 |     logger = Init.setup_logger(
 71 |         log_id_dir, "train{}".format(GLOBAL_RANK)
 72 |     )
 73 | 
 74 |     helper = LogDirHelper(log_id_dir)
 75 |     with open(helper.args_file_path(), "r") as args_file:
 76 |         args = DotDict(json.load(args_file))
 77 | 
 78 |     if local_args.resume:
 79 |         args = DotDict({**args, **vars(local_args)})
 80 | 
 81 |     dist.init_process_group(
 82 |         backend="nccl",
 83 |         init_method=args.init_method,
 84 |         world_size=WORLD_SIZE,
 85 |         rank=LOCAL_RANK,
 86 |     )
 87 |     logger.info("Rank {} initialized.".format(GLOBAL_RANK))
 88 | 
 89 |     if LOCAL_RANK == 0:
 90 |         container = DistribHost(
 91 |             args,
 92 |             logger,
 93 |             log_id_dir,
 94 |             initial_step_count,
 95 |             LOCAL_RANK,
 96 |             GLOBAL_RANK,
 97 |             WORLD_SIZE,
 98 |         )
 99 |     else:
100 |         container = DistribWorker(
101 |             args,
102 |             logger,
103 |             log_id_dir,
104 |             initial_step_count,
105 |             LOCAL_RANK,
106 |             GLOBAL_RANK,
107 |             WORLD_SIZE,
108 |         )
109 | 
110 |     try:
111 |         container.run()
112 |     finally:
113 |         container.close()
114 | 
115 | 
116 | if __name__ == "__main__":
117 |     main(parse_args())
118 | 


--------------------------------------------------------------------------------
/adept/scripts/evaluate.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # Copyright (C) 2018 Heron Systems, Inc.
  3 | #
  4 | # This program is free software: you can redistribute it and/or modify
  5 | # it under the terms of the GNU General Public License as published by
  6 | # the Free Software Foundation, either version 3 of the License, or
  7 | # (at your option) any later version.
  8 | #
  9 | # This program is distributed in the hope that it will be useful,
 10 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
 11 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 12 | # GNU General Public License for more details.
 13 | #
 14 | # You should have received a copy of the GNU General Public License
 15 | # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 16 | """
 17 |              __           __
 18 |   ____ _____/ /__  ____  / /_
 19 |  / __ `/ __  / _ \/ __ \/ __/
 20 | / /_/ / /_/ /  __/ /_/ / /_
 21 | \__,_/\__,_/\___/ .___/\__/
 22 |                /_/
 23 | Evaluate
 24 | 
 25 | Evaluates an agent after training. Computes N-episode average reward by
 26 | loading a saved model from each epoch. N-episode averages are computed by
 27 | running N env in parallel.
 28 | 
 29 | Usage:
 30 |     evaluate (--logdir <path>) [options]
 31 |     evaluate (-h | --help)
 32 | 
 33 | Required:
 34 |     --logdir <path>     Path to train logs (.../logs/<env-id>/<log-id>)
 35 | 
 36 | Options:
 37 |     --epoch <int>           Epoch number to load [default: None]
 38 |     --actor <str>           Name of the eval actor [default: ACActorEval]
 39 |     --gpu-id <int>          CUDA device ID of GPU [default: 0]
 40 |     --nb-episode <int>      Number of episodes to average [default: 30]
 41 |     --start <float>         Epoch to start from [default: 0]
 42 |     --end <float>           Epoch to end on [default: -1]
 43 |     --seed <int>            Seed for random variables [default: 512]
 44 |     --custom-network <str>  Name of custom network class
 45 | """
 46 | from adept.container import EvalContainer
 47 | from adept.container import Init
 48 | from adept.registry import REGISTRY as R
 49 | from adept.utils.script_helpers import parse_path, parse_none
 50 | from adept.utils.util import DotDict
 51 | 
 52 | 
 53 | def parse_args():
 54 |     from docopt import docopt
 55 | 
 56 |     args = docopt(__doc__)
 57 |     args = {k.strip("--").replace("-", "_"): v for k, v in args.items()}
 58 |     del args["h"]
 59 |     del args["help"]
 60 |     args = DotDict(args)
 61 |     args.logdir = parse_path(args.logdir)
 62 |     # TODO implement Option utility
 63 |     epoch_option = parse_none(args.epoch)
 64 |     if epoch_option:
 65 |         args.epoch = int(float(epoch_option))
 66 |     else:
 67 |         args.epoch = epoch_option
 68 |     args.gpu_id = int(args.gpu_id)
 69 |     args.nb_episode = int(args.nb_episode)
 70 |     args.start = float(args.start)
 71 |     args.end = float(args.end)
 72 |     args.seed = int(args.seed)
 73 |     return args
 74 | 
 75 | 
 76 | def main(args):
 77 |     """
 78 |     Run an evaluation.
 79 |     :param args: Dict[str, Any]
 80 |     :return:
 81 |     """
 82 |     args = DotDict(args)
 83 | 
 84 |     Init.print_ascii_logo()
 85 |     logger = Init.setup_logger(args.logdir, "eval")
 86 |     Init.log_args(logger, args)
 87 |     R.load_extern_classes(args.logdir)
 88 | 
 89 |     eval_container = EvalContainer(
 90 |         args.actor,
 91 |         args.epoch,
 92 |         logger,
 93 |         args.logdir,
 94 |         args.gpu_id,
 95 |         args.nb_episode,
 96 |         args.start,
 97 |         args.end,
 98 |         args.seed,
 99 |     )
100 |     try:
101 |         eval_container.run()
102 |     finally:
103 |         eval_container.close()
104 | 
105 | 
106 | if __name__ == "__main__":
107 |     main(parse_args())
108 | 


--------------------------------------------------------------------------------
/adept/scripts/render.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # Copyright (C) 2018 Heron Systems, Inc.
  3 | #
  4 | # This program is free software: you can redistribute it and/or modify
  5 | # it under the terms of the GNU General Public License as published by
  6 | # the Free Software Foundation, either version 3 of the License, or
  7 | # (at your option) any later version.
  8 | #
  9 | # This program is distributed in the hope that it will be useful,
 10 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
 11 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 12 | # GNU General Public License for more details.
 13 | #
 14 | # You should have received a copy of the GNU General Public License
 15 | # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 16 | """
 17 |              __           __
 18 |   ____ _____/ /__  ____  / /_
 19 |  / __ `/ __  / _ \/ __ \/ __/
 20 | / /_/ / /_/ /  __/ /_/ / /_
 21 | \__,_/\__,_/\___/ .___/\__/
 22 |                /_/
 23 | 
 24 | Render Atari
 25 | 
 26 | Renders an agent interacting with an environment.
 27 | 
 28 | Usage:
 29 |     render --logdir <path> [options]
 30 |     render (-h | --help)
 31 | 
 32 | Required:
 33 |     --logdir <path>         Path to train logs (.../logs/<env-id>/<log-id>)
 34 | 
 35 | Options:
 36 |     --epoch <int>           Epoch number to load [default: None]
 37 |     --actor <str>           Name of the Actor [default: ACActorEval]
 38 |     --start <float>         Epoch number  to start from [default: 0]
 39 |     --end <float>           Epoch number to end on [default: -1]
 40 |     --gpu-id <int>          CUDA device ID of GPU [default: 0]
 41 |     --seed <int>            Seed for random variables [default: 512]
 42 |     --manager <str>         Manager to use [default: SimpleEnvManager]
 43 | """
 44 | 
 45 | from adept.container import Init
 46 | from adept.container.render import RenderContainer
 47 | from adept.registry import REGISTRY as R
 48 | from adept.utils.script_helpers import parse_path, parse_none
 49 | from adept.utils.util import DotDict
 50 | 
 51 | 
 52 | def parse_args():
 53 |     from docopt import docopt
 54 | 
 55 |     args = docopt(__doc__)
 56 |     args = {k.strip("--").replace("-", "_"): v for k, v in args.items()}
 57 |     del args["h"]
 58 |     del args["help"]
 59 |     args = DotDict(args)
 60 | 
 61 |     args.logdir = parse_path(args.logdir)
 62 | 
 63 |     # TODO implement Option utility
 64 |     epoch_option = parse_none(args.epoch)
 65 |     if epoch_option:
 66 |         args.epoch = int(float(epoch_option))
 67 |     else:
 68 |         args.epoch = epoch_option
 69 |     args.start = int(float(args.start))
 70 |     args.end = int(float(args.end))
 71 |     args.gpu_id = int(args.gpu_id)
 72 |     args.seed = int(args.seed)
 73 |     return args
 74 | 
 75 | 
 76 | def main(args):
 77 |     """
 78 |     Run an evaluation training.
 79 | 
 80 |     :param args: Dict[str, Any]
 81 |     :return:
 82 |     """
 83 |     # construct logging objects
 84 |     args = DotDict(args)
 85 | 
 86 |     Init.print_ascii_logo()
 87 |     logger = Init.setup_logger(args.logdir, "eval")
 88 |     Init.log_args(logger, args)
 89 |     R.load_extern_classes(args.logdir)
 90 | 
 91 |     container = RenderContainer(
 92 |         args.actor,
 93 |         args.epoch,
 94 |         args.start,
 95 |         args.end,
 96 |         logger,
 97 |         args.logdir,
 98 |         args.gpu_id,
 99 |         args.seed,
100 |         args.manager,
101 |     )
102 |     try:
103 |         container.run()
104 |     finally:
105 |         container.close()
106 | 
107 | 
108 | if __name__ == "__main__":
109 |     main(parse_args())
110 | 


--------------------------------------------------------------------------------
/adept/utils/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (C) 2018 Heron Systems, Inc.
 2 | #
 3 | # This program is free software: you can redistribute it and/or modify
 4 | # it under the terms of the GNU General Public License as published by
 5 | # the Free Software Foundation, either version 3 of the License, or
 6 | # (at your option) any later version.
 7 | #
 8 | # This program is distributed in the hope that it will be useful,
 9 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
10 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11 | # GNU General Public License for more details.
12 | #
13 | # You should have received a copy of the GNU General Public License
14 | # along with this program.  If not, see <http://www.gnu.org/licenses/>.
15 | from .util import listd_to_dlist, dlist_to_listd, dtensor_to_dev
16 | 


--------------------------------------------------------------------------------
/adept/utils/logging.py:
--------------------------------------------------------------------------------
 1 | # Copyright (C) 2018 Heron Systems, Inc.
 2 | #
 3 | # This program is free software: you can redistribute it and/or modify
 4 | # it under the terms of the GNU General Public License as published by
 5 | # the Free Software Foundation, either version 3 of the License, or
 6 | # (at your option) any later version.
 7 | #
 8 | # This program is distributed in the hope that it will be useful,
 9 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
10 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11 | # GNU General Public License for more details.
12 | #
13 | # You should have received a copy of the GNU General Public License
14 | # along with this program.  If not, see <http://www.gnu.org/licenses/>.
15 | import os
16 | from collections import namedtuple
17 | from time import time
18 | 
19 | import torch
20 | 
21 | from adept.utils.util import HeapQueue
22 | 
23 | 
24 | class ModelSaver:
25 |     BufferEntry = namedtuple(
26 |         "BufferEntry", ["reward", "priority", "network", "optimizer"]
27 |     )
28 | 
29 |     def __init__(self, nb_top_model, log_id_dir):
30 |         self.nb_top_model = nb_top_model
31 |         self._buffer = HeapQueue(nb_top_model)
32 |         self._log_id_dir = log_id_dir
33 | 
34 |     def append_if_better(self, reward, network, optimizer):
35 |         self._buffer.push(
36 |             self.BufferEntry(
37 |                 reward, time(), network.state_dict(), optimizer.state_dict()
38 |             )
39 |         )
40 | 
41 |     def write_state_dicts(self, epoch_id):
42 |         save_dir = os.path.join(self._log_id_dir, str(epoch_id))
43 |         if len(self._buffer) > 0:
44 |             os.makedirs(save_dir)
45 |         for j, buff_entry in enumerate(self._buffer.flush()):
46 |             torch.save(
47 |                 buff_entry.network,
48 |                 os.path.join(
49 |                     save_dir,
50 |                     "model_{}_{}.pth".format(j + 1, int(buff_entry.reward)),
51 |                 ),
52 |             )
53 |             torch.save(
54 |                 buff_entry.optimizer,
55 |                 os.path.join(
56 |                     save_dir,
57 |                     "optimizer_{}_{}.pth".format(j + 1, int(buff_entry.reward)),
58 |                 ),
59 |             )
60 | 
61 | 
62 | class SimpleModelSaver:
63 |     def __init__(self, log_id_dir):
64 |         self._log_id_dir = log_id_dir
65 | 
66 |     def save_state_dicts(self, network, step_count, optimizer=None):
67 |         save_dir = os.path.join(self._log_id_dir, str(step_count))
68 |         os.makedirs(save_dir)
69 |         torch.save(
70 |             network.state_dict(),
71 |             os.path.join(save_dir, "model_{}.pth".format(step_count)),
72 |         )
73 |         if optimizer is not None:
74 |             torch.save(
75 |                 optimizer.state_dict(),
76 |                 os.path.join(save_dir, "optimizer_{}.pth".format(step_count)),
77 |             )
78 | 


--------------------------------------------------------------------------------
/adept/utils/requires_args.py:
--------------------------------------------------------------------------------
 1 | # Copyright (C) 2018 Heron Systems, Inc.
 2 | #
 3 | # This program is free software: you can redistribute it and/or modify
 4 | # it under the terms of the GNU General Public License as published by
 5 | # the Free Software Foundation, either version 3 of the License, or
 6 | # (at your option) any later version.
 7 | #
 8 | # This program is distributed in the hope that it will be useful,
 9 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
10 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11 | # GNU General Public License for more details.
12 | #
13 | # You should have received a copy of the GNU General Public License
14 | # along with this program.  If not, see <http://www.gnu.org/licenses/>.
15 | import json
16 | import abc
17 | 
18 | 
19 | class RequiresArgsMixin(metaclass=abc.ABCMeta):
20 |     """
21 |     This mixin makes it so that subclasses must implement an args class
22 |     attribute. These arguments are parsed at runtime and the user is offered a
23 |     chance to change any desired args. Classes the use this mixin must
24 |     implement the from_args() class method. from_args() is essentially a
25 |     secondary constructor.
26 |     """
27 | 
28 |     args = None  # Dict[str, Any]
29 | 
30 |     @classmethod
31 |     def check_args_implemented(cls):
32 |         if cls.args is None:
33 |             raise NotImplementedError(
34 |                 'Subclass must define class attribute "args"'
35 |             )
36 | 
37 |     @classmethod
38 |     def prompt(cls, provided=None):
39 |         """
40 |         Display defaults as JSON, prompt user for changes.
41 | 
42 |         :param provided: Dict[str, Any], Override default prompts.
43 |         :return: Dict[str, Any] Updated config dictionary.
44 |         """
45 |         if provided is not None:
46 |             overrides = {k: v for k, v in provided.items() if k in cls.args}
47 |             args = {**cls.args, **overrides}
48 |         else:
49 |             args = cls.args
50 |         return cls._prompt(cls.__name__, args)
51 | 
52 |     @staticmethod
53 |     def _prompt(name, args):
54 |         """
55 |         Display defaults as JSON, prompt user for changes.
56 | 
57 |         :param name: str Name of class
58 |         :param args: Dict[str, Any]
59 |         :return: Dict[str, Any] Updated config dictionary.
60 |         """
61 |         if not args:
62 |             return args
63 | 
64 |         user_input = input(
65 |             "\n{} Defaults:\n{}\n"
66 |             "Press ENTER to use defaults. Otherwise, "
67 |             "modify JSON keys then press ENTER.\n".format(
68 |                 name, json.dumps(args, indent=2, sort_keys=True)
69 |             )
70 |             + 'Example: {"x": True, "gamma": 0.001}\n'
71 |         )
72 | 
73 |         # use defaults if no changes specified
74 |         if user_input == "":
75 |             return args
76 | 
77 |         updates = json.loads(user_input)
78 |         return {**args, **updates}
79 | 
80 |     @classmethod
81 |     @abc.abstractmethod
82 |     def from_args(cls, *argss, **kwargs):
83 |         raise NotImplementedError
84 | 


--------------------------------------------------------------------------------
/adept/utils/script_helpers.py:
--------------------------------------------------------------------------------
  1 | # Copyright (C) 2018 Heron Systems, Inc.
  2 | #
  3 | # This program is free software: you can redistribute it and/or modify
  4 | # it under the terms of the GNU General Public License as published by
  5 | # the Free Software Foundation, either version 3 of the License, or
  6 | # (at your option) any later version.
  7 | #
  8 | # This program is distributed in the hope that it will be useful,
  9 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
 10 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 11 | # GNU General Public License for more details.
 12 | #
 13 | # You should have received a copy of the GNU General Public License
 14 | # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 15 | import json
 16 | import os
 17 | from time import sleep
 18 | 
 19 | from adept.utils.util import DotDict
 20 | 
 21 | 
 22 | def parse_bool_str(bool_str):
 23 |     """
 24 |     Convert string to boolean.
 25 | 
 26 |     :param bool_str: str
 27 |     :return: Bool
 28 |     """
 29 |     if bool_str.lower() == "false":
 30 |         return False
 31 |     elif bool_str.lower() == "true":
 32 |         return True
 33 |     else:
 34 |         raise ValueError('Unable to parse "{}"'.format(bool_str))
 35 | 
 36 | 
 37 | def parse_list_str(list_str, item_type):
 38 |     items = list_str.split(",")
 39 |     return [item_type(item) for item in items]
 40 | 
 41 | 
 42 | def parse_none(none_str):
 43 |     if none_str == "None":
 44 |         return None
 45 |     else:
 46 |         return none_str
 47 | 
 48 | 
 49 | def parse_path(rel_path):
 50 |     """
 51 |     :param rel_path: (str) relative path
 52 |     :return: (str) absolute path
 53 |     """
 54 |     return os.path.abspath(rel_path)
 55 | 
 56 | 
 57 | class LogDirHelper:
 58 |     def __init__(self, log_id_path):
 59 |         """
 60 |         :param log_id_path: str Path to Log ID
 61 |         """
 62 | 
 63 |         self._log_id_path = log_id_path
 64 | 
 65 |     def epochs(self):
 66 |         epochs = []
 67 |         for item in os.listdir(self._log_id_path):
 68 |             item_path = os.path.join(self._log_id_path, item)
 69 |             if os.path.isdir(item_path):
 70 |                 if item.isnumeric():
 71 |                     item_int = int(item)
 72 |                     if item_int >= 0:
 73 |                         epochs.append(item_int)
 74 |         return list(sorted(epochs))
 75 | 
 76 |     def latest_epoch(self):
 77 |         epochs = self.epochs()
 78 |         return max(epochs) if epochs else 0
 79 | 
 80 |     def latest_epoch_path(self):
 81 |         return os.path.join(self._log_id_path, str(self.latest_epoch()))
 82 | 
 83 |     def latest_network_path(self):
 84 |         network_file = [
 85 |             f for f in os.listdir(self.latest_epoch_path()) if ("model" in f)
 86 |         ][0]
 87 |         return os.path.join(self.latest_epoch_path(), network_file)
 88 | 
 89 |     def latest_optim_path(self):
 90 |         optim_file = [
 91 |             f for f in os.listdir(self.latest_epoch_path()) if ("optim" in f)
 92 |         ][0]
 93 |         return os.path.join(self.latest_epoch_path(), optim_file)
 94 | 
 95 |     def epoch_path_at_epoch(self, epoch):
 96 |         return os.path.join(self._log_id_path, str(epoch))
 97 | 
 98 |     def network_path_at_epoch(self, epoch, num_tries=1, retry_delay=3):
 99 |         """Find network path at epoch
100 | 
101 |         Parameters
102 |         ----------
103 |         epoch : int
104 |             epoch to find network path for
105 |         num_tries: int, optional
106 |             number of tries to do, by default 1 (no retries)
107 |         retry_delay : int, optional
108 |             delay between retry attempts, by default 3 (seconds)
109 | 
110 |         Returns
111 |         -------
112 |         str
113 |             path to network file
114 |         """
115 |         assert num_tries, "num_tries must be greater than 0"
116 | 
117 |         epoch_path = self.epoch_path_at_epoch(epoch)
118 | 
119 |         for try_idx in range(num_tries):
120 |             if try_idx > 0:
121 |                 sleep(retry_delay)
122 | 
123 |             network_files = [f for f in os.listdir(epoch_path) if ("model" in f)]
124 | 
125 |             if len(network_files):
126 |                 break
127 |         else:
128 |             raise AssertionError(
129 |                 "No network files found at epoch {epoch} for {self._log_id_path} after {num_tries} tries"
130 |             )
131 | 
132 |         assert len(network_files) <= 1, (
133 |             "More than one network paths at epoch {epoch}, "
134 |             "maybe you want network_paths_at_epoch()"
135 |         )
136 | 
137 |         network_file = network_files[0]
138 |         return os.path.join(epoch_path, network_file)
139 | 
140 |     def network_paths_at_epoch(self, epoch):
141 |         epoch_path = self.epoch_path_at_epoch(epoch)
142 |         return [
143 |             os.path.join(epoch_path, f)
144 |             for f in os.listdir(epoch_path)
145 |             if ("model" in f)
146 |         ]
147 | 
148 |     def optim_path_at_epoch(self, epoch):
149 |         epoch_path = self.epoch_path_at_epoch(epoch)
150 |         optim_file = [f for f in os.listdir(epoch_path) if ("optim" in f)][0]
151 |         return os.path.join(epoch_path, optim_file)
152 | 
153 |     def timestamp(self):
154 |         splits = self._log_id_path.split("_")
155 |         timestamp = splits[-2] + "_" + splits[-1]
156 |         return timestamp
157 | 
158 |     def args_file_path(self):
159 |         return os.path.join(self._log_id_path, "args.json")
160 | 
161 |     def load_args(self):
162 |         with open(self.args_file_path()) as args_file:
163 |             return DotDict(json.load(args_file))
164 | 
165 |     def eval_path(self):
166 |         return os.path.join(self._log_id_path, "eval.csv")
167 | 


--------------------------------------------------------------------------------
/adept/utils/util.py:
--------------------------------------------------------------------------------
  1 | # Copyright (C) 2018 Heron Systems, Inc.
  2 | #
  3 | # This program is free software: you can redistribute it and/or modify
  4 | # it under the terms of the GNU General Public License as published by
  5 | # the Free Software Foundation, either version 3 of the License, or
  6 | # (at your option) any later version.
  7 | #
  8 | # This program is distributed in the hope that it will be useful,
  9 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
 10 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 11 | # GNU General Public License for more details.
 12 | #
 13 | # You should have received a copy of the GNU General Public License
 14 | # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 15 | import json
 16 | import heapq
 17 | import inspect
 18 | from collections import OrderedDict
 19 | 
 20 | import numpy as np
 21 | import torch
 22 | 
 23 | 
 24 | def listd_to_dlist(list_of_dicts):
 25 |     """
 26 |     Converts a list of dictionaries to a dictionary of lists. Preserves key
 27 |     order.
 28 | 
 29 |     K is type of key.
 30 |     V is type of value.
 31 |     :param list_of_dicts: List[Dict[K, V]]
 32 |     :return: Dict[K, List[V]]
 33 |     """
 34 |     new_dict = OrderedDict()
 35 |     for d in list_of_dicts:
 36 |         for k, v in d.items():
 37 |             if k not in new_dict:
 38 |                 new_dict[k] = [v]
 39 |             else:
 40 |                 new_dict[k].append(v)
 41 |     return new_dict
 42 | 
 43 | 
 44 | def dlist_to_listd(dict_of_lists):
 45 |     """
 46 |     Converts a dictionary of lists to a list of dictionaries. Preserves key
 47 |     order.
 48 | 
 49 |     K is type of key.
 50 |     V is type of value.
 51 |     :param dict_of_lists: Dict[K, List[V]]
 52 |     :return: List[Dict[K, V]]
 53 |     """
 54 |     keys = dict_of_lists.keys()
 55 |     list_len = len(dict_of_lists[next(iter(keys))])
 56 |     new_list = []
 57 |     for i in range(list_len):
 58 |         temp_d = OrderedDict()
 59 |         for k in keys:
 60 |             temp_d[k] = dict_of_lists[k][i]
 61 |         new_list.append(temp_d)
 62 |     return new_list
 63 | 
 64 | 
 65 | def dtensor_to_dev(d_tensor, device):
 66 |     """
 67 |     Move a dictionary of tensors to a device.
 68 | 
 69 |     :param d_tensor: Dict[str, Tensor]
 70 |     :param device: torch.device
 71 |     :return: Dict[str, Tensor] on desired device.
 72 |     """
 73 |     return {k: v.to(device) for k, v in d_tensor.items()}
 74 | 
 75 | 
 76 | def json_to_dict(file_path):
 77 |     """Read JSON config."""
 78 |     json_object = json.load(open(file_path, "r"))
 79 |     return json_object
 80 | 
 81 | 
 82 | _numpy_to_torch_dtype = {
 83 |     np.float16: torch.float16,
 84 |     np.float32:torch.float32,
 85 |     np.float64: torch.float64,
 86 |     np.uint8: torch.uint8,
 87 |     np.int8: torch.int8,
 88 |     np.int16: torch.int16,
 89 |     np.int32: torch.int32,
 90 |     np.int64: torch.int64,
 91 | }
 92 | _torch_to_numpy_dtype = {v: k for k, v in _numpy_to_torch_dtype.items()}
 93 | 
 94 | 
 95 | def numpy_to_torch_dtype(dtype):
 96 |     if inspect.isclass(dtype):
 97 |         name = dtype
 98 |     else:
 99 |         name = type(dtype)
100 |     if name not in _numpy_to_torch_dtype:
101 |         raise ValueError(
102 |             f"Could not convert numpy dtype {dtype.name} to a torch dtype."
103 |         )
104 |     return _numpy_to_torch_dtype[name]
105 | 
106 | 
107 | def torch_to_numpy_dtype(dtype):
108 |     if dtype not in _torch_to_numpy_dtype:
109 |         raise ValueError(
110 |             "Could not convert torch dtype {} to a numpy dtype.".format(
111 |                 dtype
112 |             )
113 |         )
114 | 
115 |     return _torch_to_numpy_dtype[dtype]
116 | 
117 | 
118 | class CircularBuffer(object):
119 |     def __init__(self, size):
120 |         self.index = 0
121 |         self.size = size
122 |         self._data = []
123 | 
124 |     def append(self, value):
125 |         if len(self._data) == self.size:
126 |             self._data[self.index] = value
127 |         else:
128 |             self._data.append(value)
129 |         self.index = (self.index + 1) % self.size
130 | 
131 |     def is_empty(self):
132 |         return self._data == []
133 | 
134 |     def not_empty(self):
135 |         return not self.is_empty()
136 | 
137 |     def is_full(self):
138 |         return len(self) == self.size
139 | 
140 |     def not_full(self):
141 |         return not self.is_full()
142 | 
143 |     def __getitem__(self, key):
144 |         """get element by index like a regular array"""
145 |         return self._data[key]
146 | 
147 |     def __setitem__(self, key, value):
148 |         self._data[key] = value
149 | 
150 |     def __repr__(self):
151 |         """return string representation"""
152 |         return self._data.__repr__() + " (" + str(len(self._data)) + " items)"
153 | 
154 |     def __len__(self):
155 |         return len(self._data)
156 | 
157 | 
158 | class HeapQueue:
159 |     def __init__(self, maxlen):
160 |         self.q = []
161 |         self.maxlen = maxlen
162 | 
163 |     def push(self, item):
164 |         if len(self.q) < self.maxlen:
165 |             heapq.heappush(self.q, item)
166 |         else:
167 |             heapq.heappushpop(self.q, item)
168 | 
169 |     def flush(self):
170 |         q = self.q
171 |         self.q = []
172 |         return q
173 | 
174 |     def __len__(self):
175 |         return len(self.q)
176 | 
177 | 
178 | class DotDict(dict):
179 |     """
180 |     Dictionary to access attributes
181 |     """
182 | 
183 |     __getattr__ = dict.get
184 |     __setattr__ = dict.__setitem__
185 |     __delattr__ = dict.__delitem__
186 | 
187 |     # Support pickling
188 |     def __getstate__(obj):
189 |         return dict(obj.items())
190 | 
191 |     def __setstate__(cls, attributes):
192 |         return DotDict(**attributes)
193 | 


--------------------------------------------------------------------------------
/docker/connect.py:
--------------------------------------------------------------------------------
  1 | # Copyright (C) 2020 Heron Systems, Inc.
  2 | """
  3 | Sample command:
  4 | python connect.py --dockerfile ./Dockerfile --username gburdell \
  5 |                   --email gburdell@heronsystems.com \
  6 |                   --fullname "George P. Burdell"
  7 | Given a path to a Dockerfile, this script (1) builds a Docker image from the
  8 | Dockerfile and (2) outputs a command that can directly be pasted into a shell
  9 | that connects to a Docker instance spawned from the Docker image built in (1).
 10 | """
 11 | import argparse
 12 | import os
 13 | import sys
 14 | 
 15 | 
 16 | def parse_args():
 17 |     parser = argparse.ArgumentParser()
 18 |     parser.add_argument(
 19 |         "--dockerfile",
 20 |         default="./Dockerfile",
 21 |         help=(
 22 |             "Path to the Dockerfile you wish to work with, "
 23 |             "e.g., /some/path/to/Dockerfile"
 24 |         ),
 25 |     )
 26 |     parser.add_argument(
 27 |         "--username",
 28 |         required=True,
 29 |         help="Your local username in the docker instance that you connect to.",
 30 |     )
 31 |     parser.add_argument(
 32 |         "--email",
 33 |         required=True,
 34 |         help=(
 35 |             "Your email address, used for git purposes, e.g., gburdell@heronsystems.com."
 36 |         ),
 37 |     )
 38 |     parser.add_argument(
 39 |         "--fullname",
 40 |         required=True,
 41 |         help="Your full name, used for git purposes.",
 42 |     )
 43 |     return parser.parse_args()
 44 | 
 45 | 
 46 | def runcmd(cmd):
 47 |     print("Running --\n{}\n--".format(cmd))
 48 |     return os.system(cmd)
 49 | 
 50 | 
 51 | def connect_local(args):
 52 |     # Check for .ssh under /mnt directory; this is important to be able to pull
 53 |     # from github within Docker instances.
 54 |     if not os.path.exists(
 55 |         "/mnt/users/{username}".format(username=args.username)
 56 |     ):
 57 |         print(
 58 |             (
 59 |                 "Please create the directory /mnt/users/{username} and run:"
 60 |                 "chmod -R a+rw /mnt/users/{username}."
 61 |             ).format(username=args.username)
 62 |         )
 63 | 
 64 |     # Check for .ssh under /mnt directory; this is important to be able to pull
 65 |     # from github within Docker instances.
 66 |     if not os.path.exists(
 67 |         "/mnt/users/{username}/.ssh".format(username=args.username)
 68 |     ):
 69 |         print(
 70 |             "Please run the following before running connect.py. This is to "
 71 |             "allow your Docker instance to be able to pull/push from "
 72 |             "private github repositories:\n"
 73 |             "ln -s ~/.ssh /mnt/users/{username}/.ssh".format(
 74 |                 username=args.username
 75 |             )
 76 |         )
 77 |         sys.exit(1)
 78 | 
 79 |     # Build the docker image
 80 |     dockerfile_dir = os.path.dirname(os.path.abspath(args.dockerfile))
 81 |     cmd = (
 82 |         "cd {dockerfile_dir}; docker build "
 83 |         "-f {dockerfile} "
 84 |         "--build-arg USERNAME={username} "
 85 |         "--build-arg EMAIL={email} "
 86 |         '--build-arg FULLNAME="{fullname}" '
 87 |         "-t {username}-dev .".format(
 88 |             dockerfile=args.dockerfile,
 89 |             dockerfile_dir=dockerfile_dir,
 90 |             username=args.username,
 91 |             email=args.email,
 92 |             fullname=args.fullname,
 93 |         )
 94 |     )
 95 |     success = runcmd(cmd)
 96 | 
 97 |     if success != 0:
 98 |         print("Please fix the errors that occurred above.")
 99 |         sys.exit(1)
100 | 
101 |     # Construct the docker instance creation command
102 |     cmd = (
103 |         'xhost +"local:docker"; docker run -it --rm '
104 |         # Attach gpus
105 |         "--gpus=all "
106 |         # Take care of ctrl-p issues
107 |         '--detach-keys="ctrl-@" '
108 |         # Configure X
109 |         "-e DISPLAY=$DISPLAY "
110 |         "-v /tmp/.X11-unix:/tmp/.X11-unix:ro "
111 |         # Expose all ports
112 |         "--network host "
113 |         # Mount volumes
114 |         "-v /mnt/:/mnt/ "
115 |         '{}-dev; xhost -"local:docker"'.format(args.username)
116 |     )
117 |     print("Run this to connect:\n{cmd}".format(cmd=cmd))
118 | 
119 |     # For convenience, copy to the clipboard if possible
120 |     try:
121 |         import pyperclip
122 | 
123 |         pyperclip.copy(cmd)
124 |     except:
125 |         pass
126 | 
127 | 
128 | def main():
129 |     connect_local(parse_args())
130 | 
131 | 
132 | if __name__ == "__main__":
133 |     main()
134 | 


--------------------------------------------------------------------------------
/docker/startup.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/zsh
 2 | # Copyright (C) 2020 Heron Systems, Inc.
 3 | #
 4 | # This script's purpose is to run preliminary setup for the dev environment
 5 | # once an image has been created.
 6 | 
 7 | # retain state across sessions
 8 | mkdir -p "/mnt/users/$USERNAME"
 9 | sudo chown -R $USERNAME "/mnt/users/$USERNAME"
10 | cd /home/$USERNAME/
11 | persistents=".bashrc .bash_logout .zshrc"
12 | for f in ${persistents};
13 |     do if [ ! -e /mnt/users/$USERNAME/${f} ];
14 |     then
15 |         cp ${f} /mnt/users/$USERNAME/${f};
16 |     fi;
17 | done
18 | 
19 | # zhistory and clients aren't created by default, so create them if they don't
20 | # exist already
21 | test -e /mnt/users/$USERNAME/.zhistory || touch /mnt/users/$USERNAME/.zhistory
22 | mkdir -p /mnt/users/$USERNAME/clients
23 | mkdir -p /mnt/users/$USERNAME/data
24 | 
25 | # symlink these files to the persistent versions
26 | persistents=".bashrc .bash_logout .zshrc .zhistory .tmux.conf .ssh clients data"
27 | for f in ${persistents};
28 |     do if [ -e "/mnt/users/${USERNAME}/$f" ];
29 |     then
30 |         rm -f ${f} && ln -s "/mnt/users/${USERNAME}/$f" $f;
31 |     fi
32 | done
33 | 
34 | # git setup
35 | export GIT_SSL_NO_VERIFY=1
36 | git config --global user.email "$EMAIL"
37 | git config --global user.name "$FULLNAME"
38 | 


--------------------------------------------------------------------------------
/docs/api_overview.md:
--------------------------------------------------------------------------------
  1 | # API Overview
  2 | ![architecture](../images/architecture.png)
  3 | ### Containers
  4 | Containers hold all of the application state. Each subprocess gets a container 
  5 | in Distributed and IMPALA modes.
  6 | ### Agents
  7 | An Agent acts on and observes the environment.
  8 | Currently only ActorCritic is supported. Other agents, such as DQN or ACER may 
  9 | be added later.
 10 | ### Networks
 11 | Networks are not PyTorch modules, they need to implement our abstract 
 12 | NetworkModule or ModularNetwork classes. A ModularNetwork consists of a 
 13 | source nets, body, and heads.
 14 | ### Environments
 15 | Environments run in subprocesses and send their observation, rewards,
 16 | terminals, and infos to the host process. They work pretty much the same way as 
 17 | OpenAI's code.
 18 | ### Experience Caches
 19 | An Experience Cache is a Rollout or Experience Replay that is written to after 
 20 | stepping and read before learning.
 21 | 
 22 | Inheritance Tree:
 23 | * [Container](#container)
 24 |     * LocalWorker
 25 |     * DistribWorker
 26 |     * ImpalaHost
 27 |     * ImpalaWorkerCPU
 28 |     * ImpalaWorkerGPU
 29 | * [Agent](#agent)
 30 |     * ActorCritic
 31 |     * ActorCriticVTrace
 32 |     * ACER
 33 |     * ACERVTrace
 34 | * [ExperienceCache](#experiencecache)
 35 |     * ExperienceReplay
 36 |     * RolloutCache
 37 | * [Environment](#environment)
 38 |     * Gym Env
 39 |     * SC2Feat1DEnv (1d action space)
 40 |     * SC2Feat3DEnv (3d action space)
 41 |     * SC2RawEnv (new proto)
 42 | * [EnvironmentManager](#environmentmanager)
 43 |     * SimpleEnvManager (synchronous, same process, for debugging / rendering)
 44 |     * SubProcEnvManager (use torch.multiprocessing.Pipe)
 45 | * [Network](#network)
 46 |     * ModularNetwork
 47 |     * CustomNetwork
 48 | * [SubModule](#submodule)
 49 |     * SubModule1D
 50 |         * Identity1D
 51 |         * LSTM
 52 |     * SubModule2D
 53 |         * Identity2D
 54 |         * TransformerEnc
 55 |         * TransformerDec
 56 |     * SubModule3D
 57 |         * Identity3D
 58 |         * SC2LEEncoder - encoder from SC2LE paper
 59 |         * SC2LEDecoder - decoder from SC2LE paper
 60 |         * ConvLSTM
 61 |     * SubModule4D
 62 |         * Identity4D
 63 | 
 64 | ### Container
 65 | * `agent:` [Agent](#agent)
 66 | * `env:` [EnvironmentManager](#environment)
 67 | * `exp_cache:` [ExperienceCache](#experiencecache)
 68 | * `network:` [Network](#network)
 69 | * `local_step_count: int`
 70 | * `reward_buffer: List[float], keep track of running rewards`
 71 | * `hidden_state: Dict[HSName, torch.Tensor], keep track of current LSTM states`
 72 | ```python
 73 | def run(nb_step, initial_step=0): ...
 74 | """
 75 | args:
 76 |     nb_step: int, number of steps to train for
 77 | return:
 78 |     self
 79 | """
 80 | ```
 81 | 
 82 | ### Agent
 83 | * [network](#network)
 84 | ```python
 85 | def observe(observations, rewards, terminals, infos): ...
 86 | """
 87 | args:
 88 |     observations: Dict[ObsName, List[torch.Tensor]]
 89 |     rewards: List[int]
 90 |     terminals: List[Bool]
 91 |     infos: List[Any]
 92 | return:
 93 |     None
 94 | """
 95 | def act(observation): ...
 96 | """
 97 | legend:
 98 |     ObsName = str
 99 |     ActionName = str
100 | args:
101 |     observation: Dict[ObsName, torch.Tensor]
102 | return:
103 |     actions: Dict[ActionName, torch.Tensor],
104 |     experience: Dict[ExpName, torch.Tensor]
105 | """
106 | def compute_loss(experience, next_obs): ...
107 | """
108 | args:
109 |     experience: torch.Tensor
110 |     next_obs: Dict[ObsName, torch.Tensor]
111 | return:
112 |     losses: Dict[LossName, torch.Tensor]
113 |     metrics: Dict[MetricName, torch.Tensor]
114 | """
115 | ```
116 | 
117 | ### EnvironmentManager
118 | * `obs_preprocessor_gpu: ObsPreprocessor`
119 | * `env_cls: type, environment class`
120 | ```python
121 | def step(actions): ...
122 | """
123 | args:
124 |     actions: Dict[str, List[np.ndarray]]
125 | return:
126 |     Tuple[Observations, Rewards, Terminals, Infos]
127 | """
128 | def reset(): ...
129 | """
130 | description:
131 |     Reset the environment to its initial state.
132 | return:
133 |     observation: Dict[ObsName, torch.Tensor]
134 | """
135 | def close(): ...
136 | """
137 | description:
138 |     Close environments.
139 | return:
140 |     None
141 | """
142 | ```
143 | 
144 | ### Environment
145 | * [obs_preprocessor_cpu](#)
146 | * [action_preprocessor](#)
147 | ```python
148 | def step(action): ...
149 | """
150 | args:
151 |     action: Dict[str, np.ndarray]
152 | return:
153 |     Tuple[Observation, Reward, Terminal, Info]
154 | """
155 | ```
156 | 
157 | ### ExperienceCache
158 | ```python
159 | def read(): ...
160 | def env_write(observations, rewards, terminals): ...
161 | def agent_write(log_prob, entropy): ...
162 | def write(observation, reward, terminal, value, log_prob, entropy, hidden_state): ...
163 | def ready(): ...
164 | def submit(): ...
165 | def receive(): ...
166 | ```
167 | 
168 | ### Network
169 | ```python
170 | def forward(observation, hidden_state): ...
171 | """
172 | args:
173 |     observation: Dict[ObsName, torch.Tensor]
174 |     hidden_state: Dict[HSName, torch.Tensor]
175 | """
176 | ```
177 | 
178 | ### SubModule
179 | * `input_shape: Tuple[int]`
180 | ```python
181 | def forward(): ...
182 | def output_shape(dim): ...
183 | """
184 | args:
185 |     dim: int, dimensionality of 
186 | """
187 | ```
188 | 
189 | ### HiddenState
190 | ```python
191 | def detach(): ...
192 | ```
193 | 


--------------------------------------------------------------------------------
/docs/modular_network.md:
--------------------------------------------------------------------------------
1 | ![modular_network](../images/modular_network.png)
2 | 


--------------------------------------------------------------------------------
/docs/new_api.md:
--------------------------------------------------------------------------------
  1 | Inheritance Tree:
  2 | * [Container](#container)
  3 |     * LocalWorker
  4 |     * DistribWorker
  5 |     * ImpalaHost
  6 |     * ImpalaWorkerCPU
  7 |     * ImpalaWorkerGPU
  8 | * [Agent](#agent)
  9 |     * ActorCritic
 10 |     * VTrace
 11 | * [TrajectoryCache](#trajectorycache)
 12 |     * ExperienceReplay
 13 |     * RolloutCache
 14 | * [Environment](#environment)
 15 |     * GymEnv
 16 |     * SC2Feat1DEnv (1d action space)
 17 |     * SC2Feat3DEnv (3d action space)
 18 |     * SC2RawEnv (new proto)
 19 | * [EnvironmentManager](#environmentmanager)
 20 |     * SimpleEnvManager (synchronous, same process, for debugging / rendering)
 21 |     * SubProcEnvManager (use torch.multiprocessing.Pipe, default start method)
 22 |     * SelfPlayEnvManager
 23 | * [Network](#network)
 24 |     * ModularNetwork
 25 |     * CustomNetwork
 26 | * [SubModule](#submodule)
 27 |     * SubModule1D
 28 |         * Identity1D
 29 |         * LSTM
 30 |     * SubModule2D
 31 |         * Identity2D
 32 |         * TransformerEnc
 33 |         * TransformerDec
 34 |     * SubModule3D
 35 |         * Identity3D
 36 |         * SC2LEEncoder - encoder from SC2LE paper
 37 |         * SC2LEDecoder - decoder from SC2LE paper
 38 |         * ConvLSTM
 39 |     * SubModule4D
 40 |         * Identity4D
 41 | 
 42 | ### Container
 43 | * `_agent:` [Agent](#agent)
 44 | * `_environment:` [Environment](#environment)
 45 | 
 46 | * `_network:` [Network](#network)
 47 | * `_local_step_count: int`
 48 | * `_reward_buffer: List[float], keep track of running rewards`
 49 | * `_hidden_state: Dict[Id, torch.Tensor], keep track of current LSTM states`
 50 | ```python
 51 | def run(nb_step, initial_step=0): ...
 52 | """
 53 | args:
 54 |     nb_step: int, number of steps to train for
 55 | return:
 56 |     None
 57 | """
 58 | ```
 59 | 
 60 | ### Agent
 61 | * `_trajectory_cache:` [TrajectoryCache](#trajectorycache)
 62 | ```python
 63 | 
 64 | @staticmethod
 65 | def output_space(action_space): ...
 66 | """
 67 | args:
 68 |     action_space: Dict[ActionName, Shape]
 69 |     
 70 | """
 71 | def observe(cls): ...
 72 | def compute_loss(prediction, trajectory): ...  # ?
 73 | ```
 74 | 
 75 | ### EnvironmentManager
 76 | * _obs_preprocessor_gpu: [ObsPreprocessor](#)
 77 | * `env_cls: type, environment class`
 78 | ```python
 79 | def step_train(policy_logits): ...
 80 | """
 81 | args:
 82 |     policy_logits: Dict[ActionName, torch.Tensor]
 83 | return:
 84 |     Tuple[Obs, Reward, Done, Info, LogProb, Entropy]
 85 | """
 86 | def step_eval(policy_logits): ...
 87 | """
 88 | args:
 89 |     policy_logits: Dict[ActionName, torch.Tensor]
 90 | return:
 91 |     Tuple[Obs, Reward, Done, Info]
 92 | """
 93 | 
 94 | ```
 95 | 
 96 | ### Environment
 97 | * _obs_preprocessor_cpu: [ObsPreprocessor](#)
 98 | * _action_preprocessor: [ActionPreprocessor](#)
 99 | * `_action_space: Dict[]`
100 | ```python
101 | def step(observation): ...
102 | def reset(): ...
103 | def close(): ...
104 | 
105 | 
106 | ```
107 | 
108 | ### TrajectoryCache
109 | ```python
110 | def read(): ...
111 | def write(): ...
112 | def ready(): ...
113 | """
114 | return:
115 |     bool, whether the cache is ready 
116 | """
117 | ```
118 | 
119 | ### Network
120 | ```python
121 | def forward(observation, hidden_state): ...
122 | """
123 | args:
124 |     observation: Dict[ObsName, torch.Tensor]
125 |     hidden_state: Dict[
126 | """
127 | ```
128 | 
129 | ### SubModule
130 | * `input_shape: Tuple[int]`
131 | ```python
132 | def forward(): ...
133 | def output_shape(dim): ...
134 | """
135 | args:
136 |     dim: int, dimensionality of 
137 | """
138 | ```
139 | 
140 | ### ObsPreprocessor
141 | 


--------------------------------------------------------------------------------
/docs/resume_training.md:
--------------------------------------------------------------------------------
 1 | To resume training, you need to navigate to the log folder of the training job. 
 2 | By default, logs are saved in `/tmp/adept_logs`.
 3 | ```bash
 4 | # Change directory to the desired log directory
 5 | cd /tmp/adept_logs/<environment-id>/<log-id-dir>/
 6 | # To continue training on a single GPU
 7 | python -m adept.app local --resume .
 8 | # To continue training on multiple GPUs
 9 | python -m adept.app distrib --resume .
10 | ```
11 | 


--------------------------------------------------------------------------------
/examples/custom_agent_stub.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Use a custom agent.
  3 | """
  4 | from adept.agents import AgentModule, AgentRegistry
  5 | from adept.scripts.local import parse_args, main
  6 | 
  7 | 
  8 | class MyCustomAgent(AgentModule):
  9 |     # You will be prompted for these when training script starts
 10 |     args = {"example_arg1": True, "example_arg2": 5}
 11 | 
 12 |     def __init__(
 13 |         self,
 14 |         network,
 15 |         device,
 16 |         reward_normalizer,
 17 |         gpu_preprocessor,
 18 |         engine,
 19 |         action_space,
 20 |         nb_env,
 21 |         *args,
 22 |         **kwargs
 23 |     ):
 24 |         super(MyCustomAgent, self).__init__(
 25 |             network,
 26 |             device,
 27 |             reward_normalizer,
 28 |             gpu_preprocessor,
 29 |             engine,
 30 |             action_space,
 31 |             nb_env,
 32 |         )
 33 | 
 34 |     @classmethod
 35 |     def from_args(
 36 |         cls,
 37 |         args,
 38 |         network,
 39 |         device,
 40 |         reward_normalizer,
 41 |         gpu_preprocessor,
 42 |         engine,
 43 |         action_space,
 44 |         **kwargs
 45 |     ):
 46 |         """
 47 | 
 48 |         ArgName = str
 49 | 
 50 |         :param args: Dict[ArgName, Any]
 51 |         :param network: BaseNetwork
 52 |         :param device: torch.device
 53 |         :param reward_normalizer: Callable[[float], float]
 54 |         :param gpu_preprocessor: ObsPreprocessor
 55 |         :param engine: env_registry.Engines
 56 |         :param action_space: Dict[ActionKey, torch.Tensor]
 57 |         :param kwargs:
 58 |         :return: MyCustomAgent
 59 |         """
 60 |         pass
 61 | 
 62 |     @property
 63 |     def exp_cache(self):
 64 |         """
 65 |         Experience cache, probably a RolloutCache or ExperienceReplay.
 66 | 
 67 |         :return: BaseExperience
 68 |         """
 69 |         pass
 70 | 
 71 |     @staticmethod
 72 |     def output_space(action_space):
 73 |         """
 74 |         Merge action space with any agent-based outputs to get an output_space.
 75 | 
 76 |         ActionKey = str
 77 |         Shape = Tuple[*int]
 78 | 
 79 |         :param action_space: Dict[ActionKey, Shape]
 80 |         :return:
 81 |         """
 82 |         pass
 83 | 
 84 |     def compute_loss(self, experience, next_obs):
 85 |         """
 86 |         Compute losses.
 87 | 
 88 |         ObsKey = str
 89 |         LossKey = str
 90 | 
 91 |         :param experience: Tuple[*Any]
 92 |         :param next_obs: Dict[ObsKey, torch.Tensor]
 93 |         :return: Dict[LossKey, torch.Tensor (0D)]
 94 |         """
 95 |         pass
 96 | 
 97 |     def act(self, obs):
 98 |         """
 99 |         Generate an action.
100 | 
101 |         ObsKey = str
102 |         ActionKey = str
103 | 
104 |         :param obs: Dict[ObsKey, torch.Tensor]
105 |         :return: Dict[ActionKey, np.ndarray]
106 |         """
107 |         pass
108 | 
109 |     def act_eval(self, obs):
110 |         """
111 |         Generate an action in an evaluation.
112 | 
113 |         ObsKey = str
114 |         ActionKey = str
115 | 
116 |         :param obs: Dict[ObsKey, torch.Tensor]
117 |         :return: Dict[ActionKey, np.ndarray]
118 |         """
119 |         pass
120 | 
121 | 
122 | if __name__ == "__main__":
123 |     args = parse_args()
124 |     agent_reg = AgentRegistry()
125 |     agent_reg.register_agent(MyCustomAgent)
126 | 
127 |     main(args, agent_registry=agent_reg)
128 | 
129 |     # Call script like this to train agent:
130 |     # python -m custom_agent_stub.py --agent MyCustomAgent
131 | 


--------------------------------------------------------------------------------
/examples/custom_environment_stub.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Use a custom environment.
 3 | """
 4 | from adept.env import EnvModule
 5 | from adept.scripts.local import parse_args, main
 6 | 
 7 | 
 8 | class MyCustomEnv(EnvModule):
 9 |     # You will be prompted for these when training script starts
10 |     args = {"example_arg1": True, "example_arg2": 5}
11 |     ids = ["scenario1", "scenario2"]
12 | 
13 |     def __init__(self, action_space, cpu_ops, gpu_ops, *args, **kwargs):
14 |         super(MyCustomEnv, self).__init__(action_space, cpu_ops, gpu_ops)
15 | 
16 |     @classmethod
17 |     def from_args(cls, args, seed, **kwargs):
18 |         """
19 |         Construct from arguments. For convenience.
20 | 
21 |         :param args: Arguments object
22 |         :param seed: Integer used to seed this environment.
23 |         :param kwargs: Any custom arguments are passed through kwargs.
24 |         :return: EnvModule instance.
25 |         """
26 |         pass
27 | 
28 |     def step(self, action):
29 |         """
30 |         Perform action.
31 | 
32 |         ActionID = str
33 |         Observation = Dict[ObsKey, Any]
34 |         Reward = np.ndarray
35 |         Terminal = bool
36 |         Info = Dict[Any, Any]
37 | 
38 |         :param action: Dict[ActionID, Any] Action dictionary
39 |         :return: Tuple[Observation, Reward, Terminal, Info]
40 |         """
41 |         pass
42 | 
43 |     def reset(self, **kwargs):
44 |         """
45 |         Reset environment.
46 | 
47 |         ObsKey = str
48 | 
49 |         :param kwargs:
50 |         :return: Dict[ObsKey, Any] Observation dictionary
51 |         """
52 |         pass
53 | 
54 |     def close(self):
55 |         """
56 |         Close any connections / resources.
57 | 
58 |         :return:
59 |         """
60 |         pass
61 | 
62 | 
63 | if __name__ == "__main__":
64 |     import adept
65 | 
66 |     adept.register_env(MyCustomEnv)
67 |     main(parse_args())
68 | 
69 |     # Call script like this to train agent:
70 |     # python -m custom_env_stub.py --env scenario1
71 | 


--------------------------------------------------------------------------------
/examples/custom_network_stub.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Use a custom network.
 3 | """
 4 | from adept.networks import NetworkModule, NetworkRegistry
 5 | from adept.scripts.local import parse_args, main
 6 | 
 7 | 
 8 | class MyCustomNetwork(NetworkModule):
 9 |     # You will be prompted for these when training script starts
10 |     args = {"example_arg1": True, "example_arg2": 5}
11 | 
12 |     def __init__(self):
13 |         super(MyCustomNetwork, self).__init__()
14 |         # Set properties and whatnot here
15 | 
16 |     @classmethod
17 |     def from_args(cls, args, observation_space, output_space, net_reg):
18 |         """
19 |         Construct a MyCustomNetwork from arguments.
20 |         ArgName = str
21 |         ObsKey = str
22 |         OutputKey = str
23 |         Shape = Tuple[*int]
24 |         :param args: Dict[ArgName, Any]
25 |         :param observation_space: Dict[ObsKey, Shape]
26 |         :param output_space: Dict[OutputKey, Shape]
27 |         :param net_reg: NetworkRegistry
28 |         :return: MyCustomNetwork
29 |         """
30 |         pass
31 | 
32 |     def new_internals(self, device):
33 |         """
34 |         Define any initial hidden states here, move them to device if necessary.
35 |         InternalKey=str
36 |         :return: Dict[InternalKey, torch.Tensor (ND)]
37 |         """
38 |         pass
39 | 
40 |     def forward(self, observation, internals):
41 |         """
42 |         Compute forward pass.
43 |         ObsKey = str
44 |         InternalKey = str
45 |         :param observation: Dict[ObsKey, torch.Tensor (1D | 2D | 3D | 4D)]
46 |         :param internals: Dict[InternalKey, torch.Tensor (ND)]
47 |         :return: torch.Tensor
48 |         """
49 |         pass
50 | 
51 | 
52 | if __name__ == "__main__":
53 |     args = parse_args()
54 |     network_reg = NetworkRegistry()
55 |     network_reg.register_network(MyCustomNetwork)
56 | 
57 |     main(args, net_registry=network_reg)
58 | 
59 |     # Call script like this to train agent:
60 |     # python -m custom_network_stub.py --custom-network MyCustomNetwork
61 | 


--------------------------------------------------------------------------------
/examples/custom_submodule_stub.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Custom submodule stub
 3 | """
 4 | from adept.networks import (
 5 |     SubModule1D,
 6 |     SubModule2D,
 7 |     SubModule3D,
 8 |     SubModule4D,
 9 |     NetworkRegistry,
10 | )
11 | from adept.scripts.local import parse_args, main
12 | 
13 | 
14 | # If your Module processes 2D, then inherit SubModule2D and so on.
15 | # Dimensionality refers to feature map dimensions not including batch.
16 | # ie. (F, ) = 1D, (F, L) = 2D, (F, H, W) = 3D, (F, D, H, W) = 4D
17 | class MyCustomSubModule1D(SubModule1D):
18 |     # You will be prompted for these when training script starts
19 |     args = {"example_arg1": True, "example_arg2": 5}
20 | 
21 |     def __init__(self, input_shape, id):
22 |         super(MyCustomSubModule1D, self).__init__(input_shape, id)
23 | 
24 |     @classmethod
25 |     def from_args(cls, args, input_shape, id):
26 |         """
27 |         Construct a MyCustomSubModule1D from arguments.
28 | 
29 |         :param args: Dict[ArgName, Any]
30 |         :param input_shape: Tuple[*int]
31 |         :param id: str
32 |         :return: MyCustomSubModule1D
33 |         """
34 |         pass
35 | 
36 |     @property
37 |     def _output_shape(self):
38 |         """
39 |         Return the output shape. If it's a function of the input shape, you can
40 |         access the input shape via ``self.input_shape``.
41 | 
42 |         :return: Tuple[*int]
43 |         """
44 |         pass
45 | 
46 |     def _forward(self, input, internals, **kwargs):
47 |         """
48 |         Compute forward pass.
49 | 
50 |         ObsKey = str
51 |         InternalKey = str
52 | 
53 |         :param observation: Dict[ObsKey, torch.Tensor]
54 |         :param internals: Dict[InternalKey, torch.Tensor (ND)]
55 |         :return: torch.Tensor
56 |         """
57 |         pass
58 | 
59 |     def _new_internals(self):
60 |         """
61 |         Define any initial hidden states here, move them to device if necessary.
62 | 
63 |         InternalKey=str
64 | 
65 |         :return: Dict[InternalKey, torch.Tensor (ND)]
66 |         """
67 |         pass
68 | 
69 | 
70 | if __name__ == "__main__":
71 |     args = parse_args()
72 |     network_reg = NetworkRegistry()
73 |     network_reg.register_submodule(MyCustomSubModule1D)
74 | 
75 |     main(args, net_registry=network_reg)
76 | 
77 |     # Call script like this to train agent:
78 |     # python -m custom_submodule_stub.py --net1d MyCustomSubModule1D
79 | 


--------------------------------------------------------------------------------
/images/architecture.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/heronsystems/adeptRL/d8554d134c1dfee6659baafd886684351c1dd982/images/architecture.png


--------------------------------------------------------------------------------
/images/banner.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/heronsystems/adeptRL/d8554d134c1dfee6659baafd886684351c1dd982/images/banner.png


--------------------------------------------------------------------------------
/images/benchmark.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/heronsystems/adeptRL/d8554d134c1dfee6659baafd886684351c1dd982/images/benchmark.png


--------------------------------------------------------------------------------
/images/modular_network.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/heronsystems/adeptRL/d8554d134c1dfee6659baafd886684351c1dd982/images/modular_network.png


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | from setuptools import setup, find_packages
 2 | from adept.globals import VERSION
 3 | 
 4 | # https://github.com/kennethreitz/setup.py/blob/master/setup.py
 5 | 
 6 | 
 7 | with open("README.md", "r") as fh:
 8 |     long_description = fh.read()
 9 | 
10 | extras = {
11 |     "profiler": ["pyinstrument>=2.0"],
12 |     "atari": [
13 |         "gym[atari]>=0.10",
14 |         "opencv-python-headless<4,>=3.4",
15 |     ]
16 | }
17 | test_deps = ["pytest"]
18 | 
19 | all_deps = []
20 | for group_name in extras:
21 |     all_deps += extras[group_name]
22 | all_deps = all_deps + test_deps
23 | extras["all"] = all_deps
24 | 
25 | 
26 | setup(
27 |     name="adeptRL",
28 |     version=VERSION,
29 |     author="heron",
30 |     author_email="adept@heronsystems.com",
31 |     description="Reinforcement Learning Framework",
32 |     long_description=long_description,
33 |     long_description_content_type="text/markdown",
34 |     url="https://github.com/heronsystems/adeptRL",
35 |     license="GNU",
36 |     python_requires=">=3.5.0",
37 |     packages=find_packages(),
38 |     install_requires=[
39 |         "protobuf>=3.15.3",
40 |         "numpy>=1.14",
41 |         "tensorflow<3,>=2.4.0",
42 |         "cloudpickle>=0.5",
43 |         "pyzmq>=17.1.2",
44 |         "docopt>=0.6",
45 |         "torch>=1.3.1",
46 |         "torchvision>=0.4.2",
47 |         "ray>=1.3.0",
48 |         "pandas>=1.0.5",
49 |         "msgpack<2,>=1.0.2",
50 |         "msgpack-numpy<1,>=0.4.7",
51 |     ],
52 |     test_requires=test_deps,
53 |     extras_require=extras,
54 |     include_package_data=True,
55 | )
56 | 


--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/heronsystems/adeptRL/d8554d134c1dfee6659baafd886684351c1dd982/tests/__init__.py


--------------------------------------------------------------------------------
/tests/distrib/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/heronsystems/adeptRL/d8554d134c1dfee6659baafd886684351c1dd982/tests/distrib/__init__.py


--------------------------------------------------------------------------------
/tests/distrib/allreduce.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | import torch
 4 | import torch.distributed as dist
 5 | 
 6 | WORLD_SIZE = int(os.environ["WORLD_SIZE"])
 7 | GLOBAL_RANK = int(os.environ["RANK"])
 8 | LOCAL_RANK = int(os.environ["LOCAL_RANK"])
 9 | NB_NODE = int(os.environ["NB_NODE"])
10 | LOCAL_SIZE = WORLD_SIZE // NB_NODE
11 | 
12 | print("w", WORLD_SIZE)
13 | print("g", GLOBAL_RANK)
14 | print("l", LOCAL_RANK)
15 | print("n", NB_NODE)
16 | 
17 | 
18 | def on_worker():
19 |     return LOCAL_RANK != 0
20 | 
21 | 
22 | def on_host():
23 |     return LOCAL_RANK == 0
24 | 
25 | 
26 | if __name__ == "__main__":
27 |     nb_gpu = torch.cuda.device_count()
28 |     print("Device Count", nb_gpu)
29 | 
30 |     dist.init_process_group(
31 |         backend="nccl", world_size=WORLD_SIZE, rank=LOCAL_RANK
32 |     )
33 | 
34 |     print("LOCAL_RANK", LOCAL_RANK, "initialized.")
35 |     t = torch.tensor([1.0, 2.0, 3.0]).to(f"cuda:{LOCAL_RANK}")
36 | 
37 |     # tags to identify tensors
38 |     # loop thru workers
39 |     dist.barrier()
40 |     handle = dist.all_reduce(t, async_op=True)
41 |     handle.wait()
42 | 
43 |     print(t)
44 | 


--------------------------------------------------------------------------------
/tests/distrib/control_flow_zmq.py:
--------------------------------------------------------------------------------
 1 | import zmq
 2 | import os
 3 | import time
 4 | from collections import deque
 5 | 
 6 | WORLD_SIZE = int(os.environ["WORLD_SIZE"])
 7 | GLOBAL_RANK = int(os.environ["RANK"])
 8 | LOCAL_RANK = int(os.environ["LOCAL_RANK"])
 9 | NB_NODE = int(os.environ["NB_NODE"])
10 | LOCAL_SIZE = WORLD_SIZE // NB_NODE
11 | 
12 | if __name__ == "__main__":
13 |     if LOCAL_RANK == 0:
14 |         context = zmq.Context()
15 |         h_to_w = context.socket(zmq.PUBLISH)
16 |         w_to_h = context.socket(zmq.PULL)
17 | 
18 |         h_to_w.bind("tcp://*:5556")
19 |         w_to_h.bind("tcp://*:5557")
20 | 
21 |         step_count = 0
22 |         nb_batch = 2
23 |         # while step_count < 100:
24 |         #     q, q_lookup = deque(), set()
25 |         #     while len(q) < nb_batch:
26 |         #         for i, hand in enumerate(handles):
27 |         #             if i not in q_lookup:
28 | 
29 |     else:
30 |         context = zmq.Context()
31 |         h_to_w = context.socket(zmq.SUBSCRIBE)
32 |         w_to_h = context.socket(zmq.PUSH)
33 | 
34 |         h_to_w.connect("tcp://localhost:5556")
35 |         w_to_h.connect("tcp://localhost:5557")
36 | 
37 |         done = False
38 | 
39 |         while not done:
40 | 
41 |             print("worker received")
42 | 
43 |     time.sleep(1)
44 | 
45 |     # Host event loop
46 |     # check for rollouts
47 |     # batch rollouts
48 |     # tell q workers to do another
49 |     # learn on batch
50 |     # send new model
51 | 
52 |     # Worker event loop
53 |     # step the actor, write to exp
54 |     # if new model, receive new model params
55 |     # if exp ready, notify host
56 |     # wait for host to be ready for another
57 | 
58 |     # Commands:
59 |     # CALC_EXPS
60 |     # GET_ROLLOUT_i
61 |     # GET_
62 | 


--------------------------------------------------------------------------------
/tests/distrib/exp_sync_broadcast.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | from itertools import chain
  3 | 
  4 | import torch
  5 | import torch.distributed as dist
  6 | 
  7 | WORLD_SIZE = int(os.environ["WORLD_SIZE"])
  8 | GLOBAL_RANK = int(os.environ["RANK"])
  9 | LOCAL_RANK = int(os.environ["LOCAL_RANK"])
 10 | NB_NODE = int(os.environ["NB_NODE"])
 11 | LOCAL_SIZE = WORLD_SIZE // NB_NODE
 12 | 
 13 | print("w", WORLD_SIZE)
 14 | print("g", GLOBAL_RANK)
 15 | print("l", LOCAL_RANK)
 16 | print("n", NB_NODE)
 17 | 
 18 | 
 19 | cache_spec = {"xs": (2, 6, 3, 3), "ys": (2, 6, 12), "rewards": (2, 6, 16)}
 20 | 
 21 | 
 22 | def gpu_id(local_rank, device_count):
 23 |     if local_rank == 0:
 24 |         return 0
 25 |     elif device_count == 1:
 26 |         return 0
 27 |     else:
 28 |         return (local_rank % (device_count - 1)) + 1
 29 | 
 30 | 
 31 | class WorkerCache(dict):
 32 |     def __init__(self, cache_spec, gpu_id):
 33 |         super(WorkerCache, self).__init__()
 34 |         self.sorted_keys = sorted(cache_spec.keys())
 35 |         self.gpu_id = gpu_id
 36 | 
 37 |         for k in self.sorted_keys:
 38 |             self[k] = self._init_rollout(cache_spec, k)
 39 | 
 40 |     def _init_rollout(self, spec, key):
 41 |         return [
 42 |             torch.ones(*spec[key][1:]).to(f"cuda:{self.gpu_id}")
 43 |             for _ in range(spec[key][0])
 44 |         ]
 45 | 
 46 |     def sync(self, src, grp):
 47 |         handles = []
 48 |         for k in self.sorted_keys:
 49 |             for t in self[k]:
 50 |                 handles.append(
 51 |                     dist.broadcast(t, src=src, group=grp, async_op=True)
 52 |                 )
 53 |         return handles
 54 | 
 55 |     def iter_tensors(self):
 56 |         return chain(*[self[k] for k in self.sorted_keys])
 57 | 
 58 | 
 59 | class HostCache(WorkerCache):
 60 |     def _init_rollout(self, spec, key):
 61 |         return [
 62 |             torch.zeros(*spec[key][1:]).to(f"cuda:{self.gpu_id}")
 63 |             for _ in range(spec[key][0])
 64 |         ]
 65 | 
 66 | 
 67 | def on_worker():
 68 |     return LOCAL_RANK != 0
 69 | 
 70 | 
 71 | def on_host():
 72 |     return LOCAL_RANK == 0
 73 | 
 74 | 
 75 | if __name__ == "__main__":
 76 |     nb_gpu = torch.cuda.device_count()
 77 |     print("Device Count", nb_gpu)
 78 | 
 79 |     dist.init_process_group(
 80 |         backend="nccl", world_size=WORLD_SIZE, rank=LOCAL_RANK
 81 |     )
 82 | 
 83 |     groups = []
 84 |     for i in range(1, LOCAL_SIZE):
 85 |         grp = [0, i]
 86 |         groups.append(dist.new_group(grp))
 87 | 
 88 |     print("LOCAL_RANK", LOCAL_RANK, "initialized.")
 89 |     if on_worker():
 90 |         cache = WorkerCache(cache_spec, gpu_id(LOCAL_RANK, nb_gpu))
 91 |         [t.fill_(LOCAL_RANK) for t in cache.iter_tensors()]
 92 |     else:
 93 |         caches = [
 94 |             HostCache(cache_spec, gpu_id(LOCAL_RANK, nb_gpu))
 95 |             for _ in range(LOCAL_SIZE - 1)
 96 |         ]
 97 | 
 98 |     # tags to identify tensors
 99 |     # loop thru workers
100 | 
101 |     if on_worker():
102 |         handle = cache.sync(LOCAL_RANK, groups[LOCAL_RANK - 1])
103 |     else:
104 |         handles = []
105 |         for i, cache in enumerate(caches):
106 |             handles.append(cache.sync(i + 1, groups[i]))
107 | 
108 |     if on_worker():
109 |         [h.wait() for h in handle]
110 |     else:
111 |         for handle in handles:
112 |             [h.wait() for h in handle]
113 | 
114 |     if on_host():
115 |         for cache in caches:
116 |             for t in cache.iter_tensors():
117 |                 print(t)
118 | 


--------------------------------------------------------------------------------
/tests/distrib/hello_ray.py:
--------------------------------------------------------------------------------
 1 | import time
 2 | from collections import namedtuple
 3 | 
 4 | import ray
 5 | 
 6 | 
 7 | @ray.remote(num_cpus=2)
 8 | class Worker:
 9 |     State = namedtuple("State", ["asdf"])
10 | 
11 |     def __init__(self):
12 |         pass
13 | 
14 |     def sleep(self, t):
15 |         time.sleep(t)
16 |         print(f"slept for {t}")
17 | 
18 |     def sleep5(self):
19 |         time.sleep(5)
20 | 
21 |     def sleep10(self):
22 |         time.sleep(10)
23 | 
24 | 
25 | def main():
26 |     ray.init(num_cpus=4)
27 |     remote_worker = Worker.remote()
28 | 
29 |     t_zero = time.time()
30 | 
31 |     f5 = remote_worker.sleep5.remote()
32 |     f10 = remote_worker.sleep10.remote()
33 | 
34 |     ray.wait([f5, f10], num_returns=2)
35 |     print("delta", time.time() - t_zero)
36 | 
37 | 
38 | def main_async():
39 |     import asyncio
40 |     from ray.experimental import async_api
41 | 
42 |     ray.init(num_cpus=4)
43 |     remote_worker = Worker.remote()
44 |     loop = asyncio.get_event_loop()
45 | 
46 |     t_zero = time.time()
47 | 
48 |     tasks = [
49 |         async_api.as_future(remote_worker.sleep.remote(i)) for i in range(1, 3)
50 |     ]
51 |     loop.run_until_complete(asyncio.gather(tasks))
52 | 
53 |     print("delta", time.time() - t_zero)
54 | 
55 | 
56 | if __name__ == "__main__":
57 |     main()
58 | 


--------------------------------------------------------------------------------
/tests/distrib/launch.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import sys
 3 | import subprocess
 4 | 
 5 | NB_NODE = 1
 6 | NODE_RANK = 0
 7 | MASTER_ADDR = "127.0.0.1"
 8 | MASTER_PORT = "29500"
 9 | 
10 | 
11 | def main(args):
12 |     processes = []
13 |     for local_rank in range(args.nb_proc):
14 |         # each process's rank
15 |         dist_rank = args.nb_proc * NODE_RANK + local_rank
16 |         current_env["RANK"] = str(dist_rank)
17 |         current_env["LOCAL_RANK"] = str(local_rank)
18 | 
19 |         cmd = [sys.executable, "-u", args.script]
20 | 
21 |         proc = subprocess.Popen(cmd, env=current_env)
22 |         processes.append(proc)
23 | 
24 |     for process in processes:
25 |         process.wait()
26 | 
27 | 
28 | if __name__ == "__main__":
29 |     from argparse import ArgumentParser
30 | 
31 |     parser = ArgumentParser()
32 |     parser.add_argument("--script", default="exp_sync_broadcast.py")
33 |     parser.add_argument("--nb-proc", type=int, default=2)
34 |     args = parser.parse_args()
35 | 
36 |     dist_world_size = args.nb_proc * NB_NODE
37 | 
38 |     current_env = os.environ.copy()
39 |     current_env["MASTER_ADDR"] = MASTER_ADDR
40 |     current_env["MASTER_PORT"] = MASTER_PORT
41 |     current_env["WORLD_SIZE"] = str(dist_world_size)
42 |     current_env["NB_NODE"] = str(NB_NODE)
43 | 
44 |     main(args)
45 | 


--------------------------------------------------------------------------------
/tests/distrib/multi_group.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from threading import Thread
 3 | 
 4 | import torch
 5 | import torch.distributed as dist
 6 | 
 7 | WORLD_SIZE = int(os.environ["WORLD_SIZE"])
 8 | GLOBAL_RANK = int(os.environ["RANK"])
 9 | LOCAL_RANK = int(os.environ["LOCAL_RANK"])
10 | NB_NODE = int(os.environ["NB_NODE"])
11 | LOCAL_SIZE = WORLD_SIZE // NB_NODE
12 | 
13 | print("w", WORLD_SIZE)
14 | print("g", GLOBAL_RANK)
15 | print("l", LOCAL_RANK)
16 | print("n", NB_NODE)
17 | 
18 | 
19 | def on_worker():
20 |     return LOCAL_RANK != 0
21 | 
22 | 
23 | def on_host():
24 |     return LOCAL_RANK == 0
25 | 
26 | 
27 | if __name__ == "__main__":
28 |     nb_gpu = torch.cuda.device_count()
29 |     print("Device Count", nb_gpu)
30 | 
31 |     dist.init_process_group(
32 |         backend="nccl", world_size=WORLD_SIZE, rank=LOCAL_RANK
33 |     )
34 |     wh_group = dist.new_group([0, 1])
35 |     hw_group = dist.new_group([0, 1])
36 | 
37 |     print("LOCAL_RANK", LOCAL_RANK, "initialized.")
38 | 
39 |     def wh_loop():
40 |         count = 0
41 |         while count < 10:
42 |             dist.barrier(wh_group)
43 |             dist.broadcast(t_a, 1, wh_group)
44 |             count += 1
45 |             print(f"wh {count}")
46 |         return True
47 | 
48 |     def hw_loop():
49 |         count = 0
50 |         while count < 5:
51 |             dist.barrier(hw_group)
52 |             dist.broadcast(t_b, 0, hw_group)
53 |             count += 1
54 |             print(f"hw {count}")
55 |         return True
56 | 
57 |     if on_host():
58 |         t_a = torch.tensor([1, 2, 3]).to("cuda:0")
59 |         t_b = torch.tensor([1, 2, 3]).to("cuda:0")
60 | 
61 |     if on_worker():
62 |         t_a = torch.tensor([4, 5, 6]).to("cuda:0")
63 |         t_b = torch.tensor([4, 5, 6]).to("cuda:0")
64 | 
65 |     thread_wh = Thread(target=wh_loop)
66 |     thread_hw = Thread(target=hw_loop)
67 | 
68 |     thread_wh.start()
69 |     thread_hw.start()
70 | 
71 |     thread_wh.join()
72 |     thread_hw.join()
73 | 
74 |     print("t_a", t_a, "should be [4, 5, 6]")
75 |     print("t_b", t_b, "should be [1, 2, 3]")
76 | 


--------------------------------------------------------------------------------
/tests/distrib/nccl_typecheck.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from itertools import chain
 3 | 
 4 | import torch
 5 | import torch.distributed as dist
 6 | 
 7 | WORLD_SIZE = int(os.environ["WORLD_SIZE"])
 8 | GLOBAL_RANK = int(os.environ["RANK"])
 9 | LOCAL_RANK = int(os.environ["LOCAL_RANK"])
10 | NB_NODE = int(os.environ["NB_NODE"])
11 | LOCAL_SIZE = WORLD_SIZE // NB_NODE
12 | 
13 | print("w", WORLD_SIZE)
14 | print("g", GLOBAL_RANK)
15 | print("l", LOCAL_RANK)
16 | print("n", NB_NODE)
17 | 
18 | 
19 | def on_worker():
20 |     return LOCAL_RANK != 0
21 | 
22 | 
23 | def on_host():
24 |     return LOCAL_RANK == 0
25 | 
26 | 
27 | if __name__ == "__main__":
28 |     nb_gpu = torch.cuda.device_count()
29 |     print("Device Count", nb_gpu)
30 | 
31 |     dist.init_process_group(
32 |         backend="nccl", world_size=WORLD_SIZE, rank=LOCAL_RANK
33 |     )
34 | 
35 |     print("LOCAL_RANK", LOCAL_RANK, "initialized.")
36 |     if on_host():
37 |         t = torch.tensor([1, 2, 3]).to("cuda:0")
38 |     if on_worker():
39 |         t = torch.tensor([1.0, 2.0, 3.0]).to("cuda:0")
40 | 
41 |     # tags to identify tensors
42 |     # loop thru workers
43 |     dist.barrier()
44 |     handle = dist.broadcast(t, 0, async_op=True)
45 |     handle.wait()
46 | 
47 |     print(t.long())
48 |     print(t.type())
49 | 


--------------------------------------------------------------------------------
/tests/exp/test_rollout.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | 
 3 | import torch
 4 | from adept.exp import Rollout, ExpSpecBuilder
 5 | 
 6 | obs_space = {"obs_a": (2, 2), "obs_b": (3, 3)}
 7 | act_space = {"act_a": (5,), "act_b": (6,)}
 8 | internal_space = {"internal_a": (2,), "internal_b": (3,)}
 9 | obs_keys = ["obs_a", "obs_b"]
10 | act_keys = ["act_a", "act_b"]
11 | internal_keys = ["internal_a", "internal_b"]
12 | batch_size = 16
13 | exp_len = 20
14 | 
15 | 
16 | def build_fn(exp_len):
17 |     return {
18 |         "obs_a": (exp_len + 1, batch_size, 2, 2),
19 |         "obs_b": (exp_len + 1, batch_size, 3, 3),
20 |         "act_a": (exp_len, batch_size),
21 |         "act_b": (exp_len, batch_size),
22 |         "internal_a": (exp_len, batch_size, 2),
23 |         "internal_b": (exp_len, batch_size, 3),
24 |         "rewards": (exp_len, batch_size),
25 |         "terminals": (exp_len, batch_size),
26 |     }
27 | 
28 | 
29 | spec_builder = ExpSpecBuilder(
30 |     obs_keys=obs_space,
31 |     act_keys=act_space,
32 |     internal_keys=internal_space,
33 |     key_types={
34 |         "obs_a": "long",
35 |         "obs_b": "long",
36 |         "act_a": "long",
37 |         "act_b": "long",
38 |         "internal_a": "float",
39 |         "internal_b": "float",
40 |         "rewards": "float",
41 |         "terminals": "float",
42 |     },
43 |     exp_keys=obs_keys + act_keys + internal_keys + ["rewards", "terminals"],
44 |     build_fn=build_fn,
45 | )
46 | 
47 | 
48 | class TestRollout(unittest.TestCase):
49 |     def test_next_obs(self):
50 |         r = Rollout(spec_builder, 20)
51 |         next_obs = {
52 |             "obs_a": torch.ones(batch_size, 2, 2),
53 |             "obs_b": torch.ones(batch_size, 3, 3),
54 |         }
55 |         r.write_next_obs(next_obs)
56 |         next_obs = r.read().next_observation
57 |         print(next_obs["obs_a"].shape)
58 |         print(next_obs["obs_b"].shape)
59 |         # print(next_obs)
60 |         self.assertEqual(next_obs["obs_a"][0][0][0].item(), 1)
61 |         self.assertEqual(next_obs["obs_b"][0][0][0].item(), 1)
62 | 


--------------------------------------------------------------------------------
/tests/learner/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/heronsystems/adeptRL/d8554d134c1dfee6659baafd886684351c1dd982/tests/learner/__init__.py


--------------------------------------------------------------------------------
/tests/learner/nstep.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | rollout_len = 20
 4 | 
 5 | rewards = torch.zeros(rollout_len)
 6 | rewards[-1] = 1.0
 7 | terminals = torch.zeros(rollout_len)
 8 | terminals[-1] = 1.0
 9 | terminal_masks = 1.0 - terminals
10 | bootstrap_value = 0.0
11 | 
12 | target = bootstrap_value
13 | nsteps = []
14 | for i in reversed(range(rollout_len)):
15 |     target = rewards[i] + 0.99 * target * terminal_masks[i]
16 |     nsteps.append(target)
17 | 
18 | print(nsteps)
19 | 


--------------------------------------------------------------------------------
/tests/network/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/heronsystems/adeptRL/d8554d134c1dfee6659baafd886684351c1dd982/tests/network/__init__.py


--------------------------------------------------------------------------------
/tests/network/test_modular_network.py:
--------------------------------------------------------------------------------
  1 | import unittest
  2 | 
  3 | from adept.network.modular_network import ModularNetwork
  4 | from adept.network.net1d.identity_1d import Identity1D
  5 | from adept.network.net2d.identity_2d import Identity2D
  6 | from adept.network.net3d.identity_3d import Identity3D
  7 | from adept.network.net4d.identity_4d import Identity4D
  8 | 
  9 | 
 10 | def dummy_gpu_preprocessor(obs):
 11 |     return obs
 12 | 
 13 | 
 14 | class TestModularNetwork(unittest.TestCase):
 15 |     # Example of valid structure
 16 |     source_nets = {
 17 |         "source_1d": Identity1D((16,), "source_1d"),
 18 |         "source_2d": Identity2D((16, 8 * 8), "source_2d"),
 19 |         "source_3d": Identity3D((16, 8, 8), "source_3d"),
 20 |         "source_4d": Identity4D((16, 8, 8, 8), "source_4d"),
 21 |     }
 22 |     body = Identity3D((176, 8, 8), "body")
 23 |     heads = {
 24 |         "1": Identity1D((11264,), "head1d"),
 25 |         "2": Identity2D((176, 64), "head2d"),
 26 |         "3": Identity3D((176, 8, 8), "head3d"),
 27 |     }
 28 |     output_space = {
 29 |         "output_1d": (16,),
 30 |         "output_2d": (16, 8 * 8),
 31 |         "output_3d": (16, 8, 8),
 32 |     }
 33 | 
 34 |     def test_heads_not_higher_dim_than_body(self):
 35 |         stub_1d = Identity1D((32,), "stub_1d")
 36 |         stub_2d = Identity2D((32, 32), "stub_2d")
 37 | 
 38 |         source_nets = {"source": stub_1d}
 39 |         body = stub_1d
 40 |         heads = {"2": stub_2d}
 41 |         output_space = {"output": (32, 32)}
 42 | 
 43 |         with self.assertRaises(AssertionError):
 44 |             ModularNetwork(
 45 |                 source_nets, body, heads, output_space, dummy_gpu_preprocessor
 46 |             )
 47 | 
 48 |     def test_source_nets_match_body(self):
 49 |         stub_32 = Identity2D((32, 32), "stub_32")
 50 |         stub_64 = Identity2D((32, 64), "stub_64")
 51 | 
 52 |         source_nets = {"source": stub_32}
 53 |         body = stub_64  # should error
 54 |         heads = {"2": stub_64}
 55 |         output_space = {"output": (32, 64)}
 56 | 
 57 |         with self.assertRaises(AssertionError):
 58 |             ModularNetwork(
 59 |                 source_nets, body, heads, output_space, dummy_gpu_preprocessor
 60 |             )
 61 | 
 62 |     def test_body_matches_heads(self):
 63 |         stub_32 = Identity2D((32, 32), "stub_32")
 64 |         stub_64 = Identity2D((32, 64), "stub_64")
 65 | 
 66 |         source_nets = {"source": stub_32}
 67 |         body = stub_32
 68 |         heads = {"2": stub_64}  # should error
 69 |         output_space = {"output": (32, 64)}
 70 | 
 71 |         with self.assertRaises(AssertionError):
 72 |             ModularNetwork(
 73 |                 source_nets, body, heads, output_space, dummy_gpu_preprocessor
 74 |             )
 75 | 
 76 |     def test_output_has_a_head(self):
 77 |         stub_2d = Identity2D((32, 32), "stub_2d")
 78 | 
 79 |         source_nets = {"source": stub_2d}
 80 |         body = stub_2d
 81 |         heads = {"2": stub_2d}
 82 |         output_space = {"output": (32, 32, 32)}  # should error
 83 |         with self.assertRaises(AssertionError):
 84 |             ModularNetwork(
 85 |                 source_nets, body, heads, output_space, dummy_gpu_preprocessor
 86 |             )
 87 | 
 88 |     def test_heads_match_out_shapes(self):
 89 |         stub_2d = Identity2D((32, 32), "stub_2d")
 90 | 
 91 |         source_nets = {"source": stub_2d}
 92 |         body = stub_2d
 93 |         heads = {"2": stub_2d}
 94 |         output_space = {"output": (32, 64)}  # should error
 95 |         with self.assertRaises(AssertionError):
 96 |             ModularNetwork(
 97 |                 source_nets, body, heads, output_space, dummy_gpu_preprocessor
 98 |             )
 99 | 
100 |     def test_valid_structure(self):
101 |         try:
102 |             ModularNetwork(
103 |                 self.source_nets,
104 |                 self.body,
105 |                 self.heads,
106 |                 self.output_space,
107 |                 dummy_gpu_preprocessor,
108 |             )
109 |         except:
110 |             self.fail("Unexpected exception")
111 | 
112 |     def test_forward(self):
113 |         import torch
114 | 
115 |         BATCH = 32
116 |         obs = {
117 |             "source_1d": torch.zeros((BATCH, 16,)),
118 |             "source_2d": torch.zeros((BATCH, 16, 8 * 8)),
119 |             "source_3d": torch.zeros((BATCH, 16, 8, 8)),
120 |             "source_4d": torch.zeros((BATCH, 16, 8, 8, 8)),
121 |         }
122 |         try:
123 |             net = ModularNetwork(
124 |                 self.source_nets,
125 |                 self.body,
126 |                 self.heads,
127 |                 self.output_space,
128 |                 dummy_gpu_preprocessor,
129 |             )
130 |             outputs, _ = net.forward(obs, {})
131 |         except:
132 |             self.fail("Unexpected exception")
133 | 
134 | 
135 | if __name__ == "__main__":
136 |     unittest.main(verbosity=1)
137 | 


--------------------------------------------------------------------------------
/tests/registry/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/heronsystems/adeptRL/d8554d134c1dfee6659baafd886684351c1dd982/tests/registry/__init__.py


--------------------------------------------------------------------------------
/tests/registry/test_registry.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | import torch
 3 | 
 4 | from adept.agent import AgentModule
 5 | from adept.env import EnvModule
 6 | from adept.preprocess.base.preprocessor import ObsPreprocessor
 7 | from adept.preprocess import CPUPreprocessor, GPUPreprocessor
 8 | from adept.registry import Registry
 9 | 
10 | 
11 | class NotSubclass:
12 |     pass
13 | 
14 | 
15 | class ArgsNotImplemented(AgentModule):
16 |     pass
17 | 
18 | 
19 | class DummyEnv(EnvModule):
20 | 
21 |     args = {}
22 |     ids = ["dummy"]
23 | 
24 |     def __init__(self):
25 |         obs_space = {"screen": (3, 84, 84)}
26 |         action_space = {"action": (8,)}
27 |         cpu_preprocessor = CPUPreprocessor([], obs_space)
28 |         gpu_preprocessor = GPUPreprocessor(
29 |             [], cpu_preprocessor.observation_space
30 |         )
31 |         super(DummyEnv, self).__init__(
32 |             action_space, cpu_preprocessor, gpu_preprocessor
33 |         )
34 | 
35 |     @classmethod
36 |     def from_args(cls, args, seed, **kwargs):
37 |         return cls()
38 | 
39 |     def step(self, action):
40 |         return {"screen": torch.rand((3, 84, 84))}, 1, False, {}
41 | 
42 |     def reset(self, **kwargs):
43 |         return torch.rand((3, 84, 84))
44 | 
45 |     def close(self):
46 |         return None
47 | 
48 | 
49 | class TestRegistry(unittest.TestCase):
50 |     def test_register_invalid_class(self):
51 |         registry = Registry()
52 |         with self.assertRaises(AssertionError):
53 |             registry.register_agent(NotSubclass)
54 | 
55 |     def test_register_args_not_implemented(self):
56 |         registry = Registry()
57 |         with self.assertRaises(NotImplementedError):
58 |             registry.register_agent(ArgsNotImplemented)
59 | 
60 |     def test_save_classes(self):
61 |         dummy_log_id_dir = "/tmp/adept_test/test_save_classes"
62 |         registry = Registry()
63 |         registry.register_env(DummyEnv)
64 |         registry.save_extern_classes(dummy_log_id_dir)
65 | 
66 |         other = Registry()
67 |         other.load_extern_classes(dummy_log_id_dir)
68 |         env_cls = other.lookup_env("dummy")
69 |         env = env_cls()
70 |         env.reset()
71 | 
72 | 
73 | if __name__ == "__main__":
74 |     unittest.main(verbosity=1)
75 | 


--------------------------------------------------------------------------------
/tests/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/heronsystems/adeptRL/d8554d134c1dfee6659baafd886684351c1dd982/tests/utils/__init__.py


--------------------------------------------------------------------------------
/tests/utils/test_requires_args.py:
--------------------------------------------------------------------------------
 1 | import io
 2 | import sys
 3 | import unittest
 4 | 
 5 | from adept.utils.requires_args import RequiresArgsMixin
 6 | 
 7 | 
 8 | class Stub(RequiresArgsMixin):
 9 |     args = {"k1": 0, "k2": False, "k3": 1.5, "k4": "hello"}
10 | 
11 | 
12 | class TestRequiresArgs(unittest.TestCase):
13 |     stub = Stub()
14 | 
15 |     def test_prompt_no_changes(self):
16 |         sys.stdin = io.StringIO("\n")
17 |         new_conf = self.stub.prompt()
18 |         assert new_conf == self.stub.args
19 | 
20 |     def test_prompt_modify(self):
21 |         sys.stdin = io.StringIO('{"k1": 5}')
22 |         new_conf = self.stub.prompt()
23 |         assert new_conf["k1"] == 5
24 |         assert new_conf["k2"] == self.stub.args["k2"]
25 |         assert new_conf["k3"] == self.stub.args["k3"]
26 |         assert new_conf["k4"] == self.stub.args["k4"]
27 | 
28 | 
29 | if __name__ == "__main__":
30 |     unittest.main(verbosity=1)
31 | 


--------------------------------------------------------------------------------
/tests/utils/test_util.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | 
 3 | from adept.utils.util import listd_to_dlist, dlist_to_listd
 4 | 
 5 | 
 6 | class TestUtil(unittest.TestCase):
 7 |     def test_dlist_to_listd(self):
 8 |         assert dlist_to_listd({"a": [1]}) == [{"a": 1}]
 9 | 
10 |     def test_listd_to_dlist(self):
11 |         assert listd_to_dlist([{"a": 1}]) == {"a": [1]}
12 | 
13 | 
14 | if __name__ == "__main__":
15 |     unittest.main(verbosity=2)
16 | 


--------------------------------------------------------------------------------