├── .gitignore
├── Dockerfile
├── README.md
├── __init__.py
├── demo_scripts
    ├── PETS.sh
    ├── POPLINA_INIT.sh
    ├── POPLINA_REPLAN.sh
    ├── POPLINP_AVG.sh
    ├── POPLINP_BC.sh
    └── POPLINP_UNI.sh
├── dmbrl
    ├── __init__.py
    ├── config
    │   ├── __init__.py
    │   ├── default.py
    │   ├── gym_acrobot.py
    │   ├── gym_ant.py
    │   ├── gym_cartpole.py
    │   ├── gym_cheetah.py
    │   ├── gym_fhopper.py
    │   ├── gym_fswimmer.py
    │   ├── gym_hopper.py
    │   ├── gym_invertedPendulum.py
    │   ├── gym_pendulum.py
    │   ├── gym_reacher.py
    │   ├── gym_swimmer.py
    │   ├── gym_walker2d.py
    │   ├── halfcheetah.py
    │   ├── pusher.py
    │   ├── reacher.py
    │   ├── reward_util.py
    │   ├── template.py
    │   └── view_humanoid.py
    ├── controllers
    │   ├── Controller.py
    │   ├── MPC.py
    │   └── __init__.py
    ├── env
    │   ├── __init__.py
    │   ├── assets
    │   │   ├── cartpole.xml
    │   │   ├── half_cheetah.xml
    │   │   ├── pusher.xml
    │   │   └── reacher3d.xml
    │   ├── cartpole.py
    │   ├── half_cheetah.py
    │   ├── pusher.py
    │   └── reacher.py
    ├── misc
    │   ├── Agent.py
    │   ├── DotmapUtils.py
    │   ├── MBExp.py
    │   ├── __init__.py
    │   ├── logger.py
    │   └── optimizers
    │   │   ├── POPLIN_A.py
    │   │   ├── POPLIN_P.py
    │   │   ├── __init__.py
    │   │   ├── cem.py
    │   │   ├── gbp_cem.py
    │   │   ├── gbp_rs.py
    │   │   ├── optimizer.py
    │   │   ├── pgcem.py
    │   │   ├── policy_network
    │   │       ├── BC_A_policy.py
    │   │       ├── BC_WA_policy.py
    │   │       ├── BC_WD_policy.py
    │   │       ├── __init__.py
    │   │       ├── base_policy.py
    │   │       ├── gan_policy.py
    │   │       ├── gmm_policy.py
    │   │       ├── gmm_util.py
    │   │       ├── tf_networks.py
    │   │       ├── tf_norm.py
    │   │       ├── tf_utils.py
    │   │       ├── wgan_policy.py
    │   │       └── whitening_util.py
    │   │   └── random.py
    └── modeling
    │   ├── layers
    │       ├── FC.py
    │       └── __init__.py
    │   ├── models
    │       ├── BNN.py
    │       ├── GT_dynamics.py
    │       ├── NN.py
    │       ├── TFGP.py
    │       └── __init__.py
    │   └── utils
    │       ├── TensorStandardScaler.py
    │       └── __init__.py
├── img
    ├── curve.png
    ├── policy_control.png
    ├── reward.png
    └── table.png
├── mbexp.py
├── requirements.txt
├── scripts
    ├── mbexp.py
    └── render.py
├── show_result.py
└── show_with_test_result.py


/.gitignore:
--------------------------------------------------------------------------------
1 | .idea
2 | tags
3 | *.pyc
4 | log/
5 | *.swp
6 | *.swo
7 | 


--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM nvidia/cuda:9.0-cudnn7-runtime-ubuntu16.04
 2 | 
 3 | RUN rm /bin/sh && ln -s /bin/bash /bin/sh
 4 | 
 5 | # Install pip
 6 | RUN apt-get update
 7 | RUN apt-get -y install python3 python3-pip python3-dev python3-tk
 8 | RUN apt-get -y install libglu1-mesa libxi-dev libxmu-dev libglu1-mesa-dev
 9 | 
10 | # Install basic libraries
11 | RUN pip3 install --upgrade pip
12 | RUN pip3 install numpy tensorflow-gpu==1.9 matplotlib scipy scikit-learn future
13 | 
14 | # Install MuJoCo + OpenAI gym
15 | RUN pip3 install gym==0.9.4
16 | RUN apt-get update
17 | RUN apt-get -y install unzip unetbootin wget
18 | RUN mkdir -p /.mujoco && cd /.mujoco && wget https://www.roboti.us/download/mjpro131_linux.zip && unzip mjpro131_linux.zip
19 | ENV MUJOCO_PY_MJKEY_PATH="/root/.mujoco/mjkey.txt"
20 | ENV MUJOCO_PY_MJPRO_PATH="/root/.mujoco/mjpro131"
21 | RUN pip3 install mujoco-py==0.5.7
22 | 
23 | # Install additional requirements
24 | RUN pip3 install datetime gitpython h5py tqdm dotmap cython
25 | 
26 | # GPFlow
27 | RUN apt-get -y install git
28 | RUN git clone https://github.com/GPflow/GPflow.git
29 | RUN pip3 install pandas multipledispatch pytest
30 | RUN cd GPflow/ && pip install . --no-deps
31 | 
32 | # Create copy of Deep MBRL repo and place in ~/handful-of-trials
33 | RUN cd ~ && git clone https://github.com/kchua/handful-of-trials.git
34 | 
35 | # Environment setup
36 | RUN echo 'LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/lib/x86_64-linux-gnu' >> /root/.bashrc
37 | RUN echo 'alias python=python3' >> /root/.bashrc
38 | 
39 | CMD /bin/bash
40 | 


--------------------------------------------------------------------------------
/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WilsonWangTHU/POPLIN/edd8dba50f9049c6164eda774602bef0c299cb51/__init__.py


--------------------------------------------------------------------------------
/demo_scripts/PETS.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | python mbexp.py -logdir ./log/PETS \
3 |     -env halfcheetah \
4 |     -o exp_cfg.exp_cfg.ntrain_iters 50 \
5 |     -ca opt-type CEM \
6 |     -ca model-type PE \
7 |     -ca prop-type E
8 | 


--------------------------------------------------------------------------------
/demo_scripts/POPLINA_INIT.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # The following script will run POPLIN-A with INIT methods on halfcheetah
 3 | 
 4 | python mbexp.py -logdir ./log/POPLIN_A \
 5 |     -env halfcheetah \
 6 |     -o exp_cfg.exp_cfg.ntrain_iters 50 \
 7 |     -o ctrl_cfg.cem_cfg.cem_type POPLINA-INIT \
 8 |     -o ctrl_cfg.cem_cfg.training_scheme BC-AI \
 9 |     -o ctrl_cfg.cem_cfg.test_policy 1 \
10 |     -ca model-type PE -ca prop-type E \
11 |     -ca opt-type POPLIN-A
12 | 


--------------------------------------------------------------------------------
/demo_scripts/POPLINA_REPLAN.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # The following script will run POPLIN-A with REPLAN methods on halfcheetah
 3 | 
 4 | python mbexp.py -logdir ./log/POPLINA_REPLAN \
 5 |     -env halfcheetah \
 6 |     -o exp_cfg.exp_cfg.ntrain_iters 50 \
 7 |     -o ctrl_cfg.cem_cfg.cem_type POPLINA-REPLAN \
 8 |     -o ctrl_cfg.cem_cfg.training_scheme BC-AI \
 9 |     -o ctrl_cfg.cem_cfg.test_policy 1 \
10 |     -ca model-type PE -ca prop-type E \
11 |     -ca opt-type POPLIN-A
12 | 


--------------------------------------------------------------------------------
/demo_scripts/POPLINP_AVG.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # The following script will run POPLIN-P using the AVG training methods on halfcheetah
 3 | 
 4 | python mbexp.py -logdir ./log/POPLINP_AVG -env halfcheetah \
 5 |     -o exp_cfg.exp_cfg.ntrain_iters 50 \
 6 |     -o ctrl_cfg.cem_cfg.cem_type POPLINP-SEP \
 7 |     -o ctrl_cfg.cem_cfg.training_scheme AVG-R \
 8 |     -o ctrl_cfg.cem_cfg.policy_network_shape [32] \
 9 |     -o ctrl_cfg.opt_cfg.init_var 0.1 \
10 |     -o ctrl_cfg.cem_cfg.test_policy 1 \
11 |     -ca model-type PE -ca prop-type E \
12 |     -ca opt-type POPLIN-P
13 | 


--------------------------------------------------------------------------------
/demo_scripts/POPLINP_BC.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # The following script will run POPLIN-P using the BC training methods.
 3 | 
 4 | python mbexp.py -logdir ./log/POPLINP_BC -env halfcheetah \
 5 |     -o exp_cfg.exp_cfg.ntrain_iters 50 \
 6 |     -o ctrl_cfg.cem_cfg.cem_type POPLINP-SEP \
 7 |     -o ctrl_cfg.cem_cfg.training_scheme BC-PR \
 8 |     -o ctrl_cfg.cem_cfg.policy_network_shape [32] \
 9 |     -o ctrl_cfg.opt_cfg.init_var 0.03 \
10 |     -o ctrl_cfg.cem_cfg.test_policy 1 \
11 |     -ca model-type PE -ca prop-type E \
12 |     -ca opt-type POPLIN-P
13 | 


--------------------------------------------------------------------------------
/demo_scripts/POPLINP_UNI.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | python mbexp.py -logdir ./log/POPLINP_UNI -env halfcheetah \
 4 |     -o exp_cfg.exp_cfg.ntrain_iters 50 \
 5 |     -o ctrl_cfg.cem_cfg.cem_type POPLINP-UNI\
 6 |     -o ctrl_cfg.cem_cfg.training_scheme AVG-R \
 7 |     -o ctrl_cfg.cem_cfg.policy_network_shape [32] \
 8 |     -o ctrl_cfg.opt_cfg.init_var 0.1 \
 9 |     -o ctrl_cfg.cem_cfg.test_policy 1 \
10 |     -ca model-type PE -ca prop-type E \
11 |     -ca opt-type POPLIN-P
12 | 


--------------------------------------------------------------------------------
/dmbrl/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WilsonWangTHU/POPLIN/edd8dba50f9049c6164eda774602bef0c299cb51/dmbrl/__init__.py


--------------------------------------------------------------------------------
/dmbrl/config/__init__.py:
--------------------------------------------------------------------------------
1 | from .default import create_config


--------------------------------------------------------------------------------
/dmbrl/config/gym_acrobot.py:
--------------------------------------------------------------------------------
  1 | from __future__ import division
  2 | from __future__ import print_function
  3 | from __future__ import absolute_import
  4 | 
  5 | import numpy as np
  6 | import tensorflow as tf
  7 | from dotmap import DotMap
  8 | 
  9 | from dmbrl.misc.DotmapUtils import get_required_argument
 10 | from dmbrl.modeling.layers import FC
 11 | """
 12 |     Module name,
 13 |     MODEL_IN, MODEL_OUT,
 14 |     import env, env_name
 15 | """
 16 | 
 17 | 
 18 | class GymAcrobotConfigModule:
 19 |     ENV_NAME = "MBRLGYM_acrobot-v0"
 20 |     TASK_HORIZON = 1000
 21 |     NTRAIN_ITERS = 300
 22 |     NROLLOUTS_PER_ITER = 1
 23 |     PLAN_HOR = 30
 24 |     INIT_VAR = 0.25
 25 |     MODEL_IN, MODEL_OUT = 7, 6  # obs -> 6, action -> 1
 26 |     GP_NINDUCING_POINTS = 300
 27 | 
 28 |     def __init__(self):
 29 |         # self.ENV = gym.make(self.ENV_NAME)
 30 |         from mbbl.env.gym_env import acrobot
 31 |         self.ENV = acrobot.env(env_name='gym_acrobot', rand_seed=1234,
 32 |                                misc_info={'reset_type': 'gym'})
 33 |         cfg = tf.ConfigProto()
 34 |         cfg.gpu_options.allow_growth = True
 35 |         self.SESS = tf.Session(config=cfg)
 36 |         self.NN_TRAIN_CFG = {"epochs": 5}
 37 |         self.OPT_CFG = {
 38 |             "Random": {
 39 |                 "popsize": 2500
 40 |             },
 41 |             "GBPRandom": {
 42 |                 "popsize": 2500
 43 |             },
 44 |             "GBPCEM": {
 45 |                 "popsize": 500,
 46 |                 "num_elites": 50,
 47 |                 "max_iters": 5,
 48 |                 "alpha": 0.1
 49 |             },
 50 |             "CEM": {
 51 |                 "popsize": 500,
 52 |                 "num_elites": 50,
 53 |                 "max_iters": 5,
 54 |                 "alpha": 0.1
 55 |             },
 56 |             "POPLIN-P": {
 57 |                 "popsize": 500,
 58 |                 "num_elites": 50,
 59 |                 "max_iters": 5,
 60 |                 "alpha": 0.1
 61 |             },
 62 |             "POPLIN-A": {
 63 |                 "popsize": 500,
 64 |                 "num_elites": 50,
 65 |                 "max_iters": 5,
 66 |                 "alpha": 0.1
 67 |             }
 68 |         }
 69 | 
 70 |     @staticmethod
 71 |     def obs_preproc(obs):
 72 |         """ @brief: no cheating of the observation function
 73 |         """
 74 |         if isinstance(obs, np.ndarray):
 75 |             return obs
 76 |         else:
 77 |             return obs
 78 | 
 79 |     @staticmethod
 80 |     def obs_postproc(obs, pred):
 81 |         if isinstance(obs, np.ndarray):
 82 |             return obs + pred
 83 |         else:
 84 |             return obs + pred
 85 | 
 86 |     @staticmethod
 87 |     def targ_proc(obs, next_obs):
 88 |         return next_obs - obs
 89 | 
 90 |     @staticmethod
 91 |     def obs_cost_fn(obs):
 92 |         """ @brief:
 93 | 
 94 |             def reward(data_dict):
 95 |                 def height(obs):
 96 |                     h1 = obs[0]  # Height of first arm
 97 |                     h2 = obs[0] * obs[2] - obs[1] * obs[3]  # Height of second arm
 98 |                     return -(h1 + h2)  # total height
 99 | 
100 |                 start_height = height(data_dict['start_state'])
101 | 
102 |                 reward = {
103 |                     'gym_acrobot': start_height,
104 |                     'gym_acrobot_sparse': (start_height > 1) - 1
105 |                 }[self._env_name]  # gets gt reward based on sparse/dense
106 |                 return reward
107 |             self.reward = reward
108 |         """
109 |         return obs[:, 0] + obs[:, 0] * obs[:, 2] - obs[:, 1] * obs[:, 3]
110 | 
111 |     @staticmethod
112 |     def ac_cost_fn(acs):
113 |         if isinstance(acs, np.ndarray):
114 |             return np.sum(np.square(acs), axis=1) * 0.0
115 |         else:
116 |             return tf.reduce_sum(tf.square(acs), axis=1) * 0.0
117 | 
118 |     def nn_constructor(self, model_init_cfg, misc=None):
119 |         model = get_required_argument(model_init_cfg, "model_class", "Must provide model class")(DotMap(
120 |             name="model", num_networks=get_required_argument(model_init_cfg, "num_nets", "Must provide ensemble size"),
121 |             sess=self.SESS, load_model=model_init_cfg.get("load_model", False),
122 |             model_dir=model_init_cfg.get("model_dir", None),
123 |             misc=misc
124 |         ))
125 |         if not model_init_cfg.get("load_model", False):
126 |             model.add(FC(200, input_dim=self.MODEL_IN, activation="swish", weight_decay=0.000025))
127 |             model.add(FC(200, activation="swish", weight_decay=0.00005))
128 |             model.add(FC(200, activation="swish", weight_decay=0.000075))
129 |             model.add(FC(200, activation="swish", weight_decay=0.000075))
130 |             model.add(FC(self.MODEL_OUT, weight_decay=0.0001))
131 |         model.finalize(tf.train.AdamOptimizer, {"learning_rate": 0.001})
132 |         return model
133 | 
134 |     def gp_constructor(self, model_init_cfg):
135 |         model = get_required_argument(model_init_cfg, "model_class", "Must provide model class")(DotMap(
136 |             name="model",
137 |             kernel_class=get_required_argument(model_init_cfg, "kernel_class", "Must provide kernel class"),
138 |             kernel_args=model_init_cfg.get("kernel_args", {}),
139 |             num_inducing_points=get_required_argument(
140 |                 model_init_cfg, "num_inducing_points", "Must provide number of inducing points."
141 |             ),
142 |             sess=self.SESS
143 |         ))
144 |         return model
145 | 
146 | 
147 | CONFIG_MODULE = GymAcrobotConfigModule
148 | 


--------------------------------------------------------------------------------
/dmbrl/config/gym_ant.py:
--------------------------------------------------------------------------------
  1 | from __future__ import division
  2 | from __future__ import print_function
  3 | from __future__ import absolute_import
  4 | 
  5 | import numpy as np
  6 | import tensorflow as tf
  7 | from dotmap import DotMap
  8 | 
  9 | from dmbrl.misc.DotmapUtils import get_required_argument
 10 | from dmbrl.modeling.layers import FC
 11 | 
 12 | 
 13 | class AntConfigModule:
 14 |     """
 15 |         @brief: migrate the gym module from the mbbl repo
 16 |             'gym_cheetah': {
 17 |                 'path': 'mbbl.env.gym_env.walker',
 18 |                 'ob_size': 17, 'action_size': 6, 'max_length': 1000
 19 |             }
 20 |     """
 21 |     ENV_NAME = "MBRLGYM_ANT-v0"
 22 |     TASK_HORIZON = 1000
 23 |     NTRAIN_ITERS = 300
 24 |     NROLLOUTS_PER_ITER = 1
 25 |     PLAN_HOR = 30
 26 |     INIT_VAR = 0.25
 27 |     MODEL_IN, MODEL_OUT = 35, 27  # obs - > 27, action 8
 28 |     GP_NINDUCING_POINTS = 300
 29 | 
 30 |     def __init__(self):
 31 |         # self.ENV = gym.make(self.ENV_NAME)
 32 |         from mbbl.env.gym_env import walker
 33 |         self.ENV = walker.env(env_name='gym_ant', rand_seed=1234,
 34 |                               misc_info={'reset_type': 'gym'})
 35 |         cfg = tf.ConfigProto()
 36 |         cfg.gpu_options.allow_growth = True
 37 |         self.SESS = tf.Session(config=cfg)
 38 |         self.NN_TRAIN_CFG = {"epochs": 5}
 39 |         self.OPT_CFG = {
 40 |             "Random": {
 41 |                 "popsize": 2500
 42 |             },
 43 |             "GBPRandom": {
 44 |                 "popsize": 2500
 45 |             },
 46 |             "GBPCEM": {
 47 |                 "popsize": 500,
 48 |                 "num_elites": 50,
 49 |                 "max_iters": 5,
 50 |                 "alpha": 0.1
 51 |             },
 52 |             "CEM": {
 53 |                 "popsize": 500,
 54 |                 "num_elites": 50,
 55 |                 "max_iters": 5,
 56 |                 "alpha": 0.1
 57 |             },
 58 |             "POPLIN-P": {
 59 |                 "popsize": 500,
 60 |                 "num_elites": 50,
 61 |                 "max_iters": 5,
 62 |                 "alpha": 0.1
 63 |             },
 64 |             "POPLIN-A": {
 65 |                 "popsize": 500,
 66 |                 "num_elites": 50,
 67 |                 "max_iters": 5,
 68 |                 "alpha": 0.1
 69 |             }
 70 |         }
 71 | 
 72 |     @staticmethod
 73 |     def obs_preproc(obs):
 74 |         """ @brief: no cheating of the observation function
 75 |         """
 76 |         if isinstance(obs, np.ndarray):
 77 |             return obs
 78 |         else:
 79 |             return obs
 80 | 
 81 |     @staticmethod
 82 |     def obs_postproc(obs, pred):
 83 |         if isinstance(obs, np.ndarray):
 84 |             return obs + pred
 85 |         else:
 86 |             return obs + pred
 87 | 
 88 |     @staticmethod
 89 |     def targ_proc(obs, next_obs):
 90 |         return next_obs - obs
 91 | 
 92 |     @staticmethod
 93 |     def obs_cost_fn(obs):
 94 |         """ @brief:
 95 |                 see mbbl.env.gym_env.walker.py for reward details
 96 |         """
 97 |         if isinstance(obs, np.ndarray):
 98 |             velocity_cost = -obs[:, 13]  # the qvel for the root-x joint
 99 |             height_cost = 3 * np.square(obs[:, 0] - 0.57)  # the height
100 |             return velocity_cost + height_cost
101 |         else:
102 |             velocity_cost = -obs[:, 13]  # the qvel for the root-x joint
103 |             height_cost = 3 * tf.square(obs[:, 0] - 0.57)  # the height
104 |             return velocity_cost + height_cost
105 | 
106 |     @staticmethod
107 |     def ac_cost_fn(acs):
108 |         if isinstance(acs, np.ndarray):
109 |             return 0.1 * np.sum(np.square(acs), axis=1)
110 |         else:
111 |             return 0.1 * tf.reduce_sum(tf.square(acs), axis=1)
112 | 
113 |     def nn_constructor(self, model_init_cfg, misc=None):
114 |         model = get_required_argument(model_init_cfg, "model_class", "Must provide model class")(DotMap(
115 |             name="model", num_networks=get_required_argument(model_init_cfg, "num_nets", "Must provide ensemble size"),
116 |             sess=self.SESS, load_model=model_init_cfg.get("load_model", False),
117 |             model_dir=model_init_cfg.get("model_dir", None),
118 |             misc=misc
119 |         ))
120 |         if not model_init_cfg.get("load_model", False):
121 |             model.add(FC(200, input_dim=self.MODEL_IN, activation="swish", weight_decay=0.000025))
122 |             model.add(FC(200, activation="swish", weight_decay=0.00005))
123 |             model.add(FC(200, activation="swish", weight_decay=0.000075))
124 |             model.add(FC(200, activation="swish", weight_decay=0.000075))
125 |             model.add(FC(self.MODEL_OUT, weight_decay=0.0001))
126 |         model.finalize(tf.train.AdamOptimizer, {"learning_rate": 0.001})
127 |         return model
128 | 
129 |     def gp_constructor(self, model_init_cfg):
130 |         model = get_required_argument(model_init_cfg, "model_class", "Must provide model class")(DotMap(
131 |             name="model",
132 |             kernel_class=get_required_argument(model_init_cfg, "kernel_class", "Must provide kernel class"),
133 |             kernel_args=model_init_cfg.get("kernel_args", {}),
134 |             num_inducing_points=get_required_argument(
135 |                 model_init_cfg, "num_inducing_points", "Must provide number of inducing points."
136 |             ),
137 |             sess=self.SESS
138 |         ))
139 |         return model
140 | 
141 | 
142 | CONFIG_MODULE = AntConfigModule
143 | 


--------------------------------------------------------------------------------
/dmbrl/config/gym_cartpole.py:
--------------------------------------------------------------------------------
  1 | from __future__ import division
  2 | from __future__ import print_function
  3 | from __future__ import absolute_import
  4 | 
  5 | import numpy as np
  6 | import tensorflow as tf
  7 | from dotmap import DotMap
  8 | 
  9 | from dmbrl.misc.DotmapUtils import get_required_argument
 10 | from dmbrl.modeling.layers import FC
 11 | """
 12 |     Module name,
 13 |     MODEL_IN, MODEL_OUT,
 14 |     import env, env_name
 15 | """
 16 | 
 17 | 
 18 | class GymCartpoleConfigModule:
 19 |     ENV_NAME = "MBRLGYM_cartpole-v0"
 20 |     TASK_HORIZON = 1000
 21 |     NTRAIN_ITERS = 300
 22 |     NROLLOUTS_PER_ITER = 1
 23 |     PLAN_HOR = 30
 24 |     INIT_VAR = 0.25
 25 |     MODEL_IN, MODEL_OUT = 5, 4  # obs -> 3, action -> 1
 26 |     GP_NINDUCING_POINTS = 300
 27 | 
 28 |     def __init__(self):
 29 |         # self.ENV = gym.make(self.ENV_NAME)
 30 |         from mbbl.env.gym_env import cartpole
 31 |         self.ENV = cartpole.env(env_name='gym_cartpole', rand_seed=1234,
 32 |                                 misc_info={'reset_type': 'gym'})
 33 |         cfg = tf.ConfigProto()
 34 |         cfg.gpu_options.allow_growth = True
 35 |         self.SESS = tf.Session(config=cfg)
 36 |         self.NN_TRAIN_CFG = {"epochs": 5}
 37 |         self.OPT_CFG = {
 38 |             "Random": {
 39 |                 "popsize": 2500
 40 |             },
 41 |             "GBPRandom": {
 42 |                 "popsize": 2500
 43 |             },
 44 |             "GBPCEM": {
 45 |                 "popsize": 500,
 46 |                 "num_elites": 50,
 47 |                 "max_iters": 5,
 48 |                 "alpha": 0.1
 49 |             },
 50 |             "CEM": {
 51 |                 "popsize": 500,
 52 |                 "num_elites": 50,
 53 |                 "max_iters": 5,
 54 |                 "alpha": 0.1
 55 |             },
 56 |             "POPLIN-P": {
 57 |                 "popsize": 500,
 58 |                 "num_elites": 50,
 59 |                 "max_iters": 5,
 60 |                 "alpha": 0.1
 61 |             },
 62 |             "POPLIN-A": {
 63 |                 "popsize": 500,
 64 |                 "num_elites": 50,
 65 |                 "max_iters": 5,
 66 |                 "alpha": 0.1
 67 |             }
 68 |         }
 69 | 
 70 |     @staticmethod
 71 |     def obs_preproc(obs):
 72 |         """ @brief: no cheating of the observation function
 73 |         """
 74 |         if isinstance(obs, np.ndarray):
 75 |             return obs
 76 |         else:
 77 |             return obs
 78 | 
 79 |     @staticmethod
 80 |     def obs_postproc(obs, pred):
 81 |         if isinstance(obs, np.ndarray):
 82 |             return obs + pred
 83 |         else:
 84 |             return obs + pred
 85 | 
 86 |     @staticmethod
 87 |     def targ_proc(obs, next_obs):
 88 |         return next_obs - obs
 89 | 
 90 |     @staticmethod
 91 |     def obs_cost_fn(obs):
 92 |         """ @brief:
 93 | 
 94 |             x, _, theta, _ = data_dict['start_state']
 95 |             up_reward = np.cos(theta)
 96 |             distance_penalty_reward = -0.01 * (x ** 2)
 97 |             return up_reward + distance_penalty_reward
 98 |         """
 99 |         x = obs[:, 0]
100 |         theta = obs[:, 2]
101 |         if isinstance(obs, np.ndarray):
102 |             return -(np.cos(theta) - 0.01 * (x ** 2))
103 |         else:
104 |             return -(tf.cos(theta) - 0.01 * (x ** 2))
105 | 
106 |     @staticmethod
107 |     def ac_cost_fn(acs):
108 |         if isinstance(acs, np.ndarray):
109 |             return np.sum(np.square(acs), axis=1) * 0.0
110 |         else:
111 |             return tf.reduce_sum(tf.square(acs), axis=1) * 0.0
112 | 
113 |     def nn_constructor(self, model_init_cfg, misc=None):
114 |         model = get_required_argument(model_init_cfg, "model_class", "Must provide model class")(DotMap(
115 |             name="model", num_networks=get_required_argument(model_init_cfg, "num_nets", "Must provide ensemble size"),
116 |             sess=self.SESS, load_model=model_init_cfg.get("load_model", False),
117 |             model_dir=model_init_cfg.get("model_dir", None),
118 |             misc=misc
119 |         ))
120 |         if not model_init_cfg.get("load_model", False):
121 |             model.add(FC(200, input_dim=self.MODEL_IN, activation="swish", weight_decay=0.000025))
122 |             model.add(FC(200, activation="swish", weight_decay=0.00005))
123 |             model.add(FC(200, activation="swish", weight_decay=0.000075))
124 |             model.add(FC(200, activation="swish", weight_decay=0.000075))
125 |             model.add(FC(self.MODEL_OUT, weight_decay=0.0001))
126 |         model.finalize(tf.train.AdamOptimizer, {"learning_rate": 0.001})
127 |         return model
128 | 
129 |     def gp_constructor(self, model_init_cfg):
130 |         model = get_required_argument(model_init_cfg, "model_class", "Must provide model class")(DotMap(
131 |             name="model",
132 |             kernel_class=get_required_argument(model_init_cfg, "kernel_class", "Must provide kernel class"),
133 |             kernel_args=model_init_cfg.get("kernel_args", {}),
134 |             num_inducing_points=get_required_argument(
135 |                 model_init_cfg, "num_inducing_points", "Must provide number of inducing points."
136 |             ),
137 |             sess=self.SESS
138 |         ))
139 |         return model
140 | 
141 | 
142 | CONFIG_MODULE = GymCartpoleConfigModule
143 | 


--------------------------------------------------------------------------------
/dmbrl/config/gym_cheetah.py:
--------------------------------------------------------------------------------
  1 | from __future__ import division
  2 | from __future__ import print_function
  3 | from __future__ import absolute_import
  4 | 
  5 | import numpy as np
  6 | import tensorflow as tf
  7 | from dotmap import DotMap
  8 | 
  9 | from dmbrl.misc.DotmapUtils import get_required_argument
 10 | from dmbrl.modeling.layers import FC
 11 | 
 12 | 
 13 | class HalfCheetahConfigModule:
 14 |     """
 15 |         @brief: migrate the gym module from the mbbl repo
 16 |             'gym_cheetah': {
 17 |                 'path': 'mbbl.env.gym_env.walker',
 18 |                 'ob_size': 17, 'action_size': 6, 'max_length': 1000
 19 |             }
 20 |     """
 21 |     ENV_NAME = "MBRLGYM_HalfCheetah-v0"
 22 |     TASK_HORIZON = 1000
 23 |     NTRAIN_ITERS = 300
 24 |     NROLLOUTS_PER_ITER = 1
 25 |     PLAN_HOR = 30
 26 |     INIT_VAR = 0.25
 27 |     MODEL_IN, MODEL_OUT = 23, 17  # obs - > 17, action 6
 28 |     GP_NINDUCING_POINTS = 300
 29 | 
 30 |     def __init__(self):
 31 |         # self.ENV = gym.make(self.ENV_NAME)
 32 |         from mbbl.env.gym_env import walker
 33 |         self.ENV = walker.env(env_name='gym_cheetah', rand_seed=1234,
 34 |                               misc_info={'reset_type': 'gym'})
 35 |         cfg = tf.ConfigProto()
 36 |         cfg.gpu_options.allow_growth = True
 37 |         self.SESS = tf.Session(config=cfg)
 38 |         self.NN_TRAIN_CFG = {"epochs": 5}
 39 |         self.OPT_CFG = {
 40 |             "Random": {
 41 |                 "popsize": 2500
 42 |             },
 43 |             "GBPRandom": {
 44 |                 "popsize": 2500
 45 |             },
 46 |             "GBPCEM": {
 47 |                 "popsize": 500,
 48 |                 "num_elites": 50,
 49 |                 "max_iters": 5,
 50 |                 "alpha": 0.1
 51 |             },
 52 |             "CEM": {
 53 |                 "popsize": 500,
 54 |                 "num_elites": 50,
 55 |                 "max_iters": 5,
 56 |                 "alpha": 0.1
 57 |             },
 58 |             "POPLIN-P": {
 59 |                 "popsize": 500,
 60 |                 "num_elites": 50,
 61 |                 "max_iters": 5,
 62 |                 "alpha": 0.1
 63 |             },
 64 |             "POPLIN-A": {
 65 |                 "popsize": 500,
 66 |                 "num_elites": 50,
 67 |                 "max_iters": 5,
 68 |                 "alpha": 0.1
 69 |             }
 70 |         }
 71 | 
 72 |     @staticmethod
 73 |     def obs_preproc(obs):
 74 |         """ @brief: no cheating of the observation function
 75 |         """
 76 |         if isinstance(obs, np.ndarray):
 77 |             return obs
 78 |         else:
 79 |             return obs
 80 | 
 81 |     @staticmethod
 82 |     def obs_postproc(obs, pred):
 83 |         if isinstance(obs, np.ndarray):
 84 |             return obs + pred
 85 |         else:
 86 |             return obs + pred
 87 | 
 88 |     @staticmethod
 89 |     def targ_proc(obs, next_obs):
 90 |         return next_obs - obs
 91 | 
 92 |     @staticmethod
 93 |     def obs_cost_fn(obs):
 94 |         """ @brief:
 95 |                 see mbbl.env.gym_env.walker.py for reward details
 96 |         """
 97 |         return -obs[:, 8]  # the qvel for the root-x joint
 98 | 
 99 |     @staticmethod
100 |     def ac_cost_fn(acs):
101 |         if isinstance(acs, np.ndarray):
102 |             return 0.1 * np.sum(np.square(acs), axis=1)
103 |         else:
104 |             return 0.1 * tf.reduce_sum(tf.square(acs), axis=1)
105 | 
106 |     def nn_constructor(self, model_init_cfg, misc=None):
107 |         model = get_required_argument(model_init_cfg, "model_class", "Must provide model class")(DotMap(
108 |             name="model", num_networks=get_required_argument(model_init_cfg, "num_nets", "Must provide ensemble size"),
109 |             sess=self.SESS, load_model=model_init_cfg.get("load_model", False),
110 |             model_dir=model_init_cfg.get("model_dir", None),
111 |             misc=misc
112 |         ))
113 |         if not model_init_cfg.get("load_model", False):
114 |             model.add(FC(200, input_dim=self.MODEL_IN, activation="swish", weight_decay=0.000025))
115 |             model.add(FC(200, activation="swish", weight_decay=0.00005))
116 |             model.add(FC(200, activation="swish", weight_decay=0.000075))
117 |             model.add(FC(200, activation="swish", weight_decay=0.000075))
118 |             model.add(FC(self.MODEL_OUT, weight_decay=0.0001))
119 |         model.finalize(tf.train.AdamOptimizer, {"learning_rate": 0.001})
120 |         return model
121 | 
122 |     def gp_constructor(self, model_init_cfg):
123 |         model = get_required_argument(model_init_cfg, "model_class", "Must provide model class")(DotMap(
124 |             name="model",
125 |             kernel_class=get_required_argument(model_init_cfg, "kernel_class", "Must provide kernel class"),
126 |             kernel_args=model_init_cfg.get("kernel_args", {}),
127 |             num_inducing_points=get_required_argument(
128 |                 model_init_cfg, "num_inducing_points", "Must provide number of inducing points."
129 |             ),
130 |             sess=self.SESS
131 |         ))
132 |         return model
133 | 
134 | 
135 | CONFIG_MODULE = HalfCheetahConfigModule
136 | 


--------------------------------------------------------------------------------
/dmbrl/config/gym_fhopper.py:
--------------------------------------------------------------------------------
  1 | from __future__ import division
  2 | from __future__ import print_function
  3 | from __future__ import absolute_import
  4 | 
  5 | import numpy as np
  6 | import tensorflow as tf
  7 | from dotmap import DotMap
  8 | 
  9 | from dmbrl.misc.DotmapUtils import get_required_argument
 10 | from dmbrl.modeling.layers import FC
 11 | 
 12 | 
 13 | class FixedHopperConfigModule:
 14 |     ENV_NAME = "MBRLGYM_Hopper-v0"
 15 |     TASK_HORIZON = 1000
 16 |     NTRAIN_ITERS = 300
 17 |     NROLLOUTS_PER_ITER = 1
 18 |     PLAN_HOR = 30
 19 |     INIT_VAR = 0.25
 20 |     MODEL_IN, MODEL_OUT = 14, 11  # obs - > 11, action 3
 21 |     GP_NINDUCING_POINTS = 300
 22 | 
 23 |     def __init__(self):
 24 |         # self.ENV = gym.make(self.ENV_NAME)
 25 |         from mbbl.env.gym_env import fixed_walker
 26 |         self.ENV = fixed_walker.env(env_name='gym_fhopper', rand_seed=1234,
 27 |                                     misc_info={'reset_type': 'gym'})
 28 |         cfg = tf.ConfigProto()
 29 |         cfg.gpu_options.allow_growth = True
 30 |         self.SESS = tf.Session(config=cfg)
 31 |         self.NN_TRAIN_CFG = {"epochs": 5}
 32 |         self.OPT_CFG = {
 33 |             "Random": {
 34 |                 "popsize": 2500
 35 |             },
 36 |             "GBPRandom": {
 37 |                 "popsize": 2500
 38 |             },
 39 |             "GBPCEM": {
 40 |                 "popsize": 500,
 41 |                 "num_elites": 50,
 42 |                 "max_iters": 5,
 43 |                 "alpha": 0.1
 44 |             },
 45 |             "CEM": {
 46 |                 "popsize": 500,
 47 |                 "num_elites": 50,
 48 |                 "max_iters": 5,
 49 |                 "alpha": 0.1
 50 |             },
 51 |             "POPLIN-P": {
 52 |                 "popsize": 500,
 53 |                 "num_elites": 50,
 54 |                 "max_iters": 5,
 55 |                 "alpha": 0.1
 56 |             },
 57 |             "POPLIN-A": {
 58 |                 "popsize": 500,
 59 |                 "num_elites": 50,
 60 |                 "max_iters": 5,
 61 |                 "alpha": 0.1
 62 |             }
 63 |         }
 64 | 
 65 |     @staticmethod
 66 |     def obs_preproc(obs):
 67 |         """ @brief: no cheating of the observation function
 68 |         """
 69 |         if isinstance(obs, np.ndarray):
 70 |             return obs
 71 |         else:
 72 |             return obs
 73 | 
 74 |     @staticmethod
 75 |     def obs_postproc(obs, pred):
 76 |         if isinstance(obs, np.ndarray):
 77 |             return obs + pred
 78 |         else:
 79 |             return obs + pred
 80 | 
 81 |     @staticmethod
 82 |     def targ_proc(obs, next_obs):
 83 |         return next_obs - obs
 84 | 
 85 |     @staticmethod
 86 |     def obs_cost_fn(obs):
 87 |         """ @brief:
 88 |                 see mbbl.env.gym_env.walker.py for reward details
 89 |         """
 90 |         if isinstance(obs, np.ndarray):
 91 |             velocity_cost = -obs[:, 5]  # the qvel for the root-x joint
 92 |             height_cost = 3 * np.square(obs[:, 0] - 1.3)  # the height
 93 |             # height, ang = ob[0], ob[1]
 94 |             done = (obs[:, 0] <= 0.7) or (abs(obs[:, 1]) >= 0.2)
 95 |             alive_reward = 1.0 - np.array(done, dtype=np.float)
 96 |             return velocity_cost + height_cost - alive_reward
 97 |         else:
 98 |             velocity_cost = -obs[:, 5]  # the qvel for the root-x joint
 99 |             height_cost = 3 * tf.square(obs[:, 0] - 1.3)  # the height
100 |             done = tf.logical_or(obs[:, 0] <= 0.7, tf.abs(obs[:, 1]) >= 0.2)
101 |             alive_reward = 1.0 - tf.cast(done, dtype=velocity_cost.dtype)
102 |             return velocity_cost + height_cost - alive_reward
103 | 
104 |     @staticmethod
105 |     def ac_cost_fn(acs):
106 |         if isinstance(acs, np.ndarray):
107 |             return 0.1 * np.sum(np.square(acs), axis=1)
108 |         else:
109 |             return 0.1 * tf.reduce_sum(tf.square(acs), axis=1)
110 | 
111 |     def nn_constructor(self, model_init_cfg, misc=None):
112 |         model = get_required_argument(model_init_cfg, "model_class", "Must provide model class")(DotMap(
113 |             name="model", num_networks=get_required_argument(model_init_cfg, "num_nets", "Must provide ensemble size"),
114 |             sess=self.SESS, load_model=model_init_cfg.get("load_model", False),
115 |             model_dir=model_init_cfg.get("model_dir", None),
116 |             misc=misc
117 |         ))
118 |         if not model_init_cfg.get("load_model", False):
119 |             model.add(FC(200, input_dim=self.MODEL_IN, activation="swish", weight_decay=0.000025))
120 |             model.add(FC(200, activation="swish", weight_decay=0.00005))
121 |             model.add(FC(200, activation="swish", weight_decay=0.000075))
122 |             model.add(FC(200, activation="swish", weight_decay=0.000075))
123 |             model.add(FC(self.MODEL_OUT, weight_decay=0.0001))
124 |         model.finalize(tf.train.AdamOptimizer, {"learning_rate": 0.001})
125 |         return model
126 | 
127 |     def gp_constructor(self, model_init_cfg):
128 |         model = get_required_argument(model_init_cfg, "model_class", "Must provide model class")(DotMap(
129 |             name="model",
130 |             kernel_class=get_required_argument(model_init_cfg, "kernel_class", "Must provide kernel class"),
131 |             kernel_args=model_init_cfg.get("kernel_args", {}),
132 |             num_inducing_points=get_required_argument(
133 |                 model_init_cfg, "num_inducing_points", "Must provide number of inducing points."
134 |             ),
135 |             sess=self.SESS
136 |         ))
137 |         return model
138 | 
139 | 
140 | CONFIG_MODULE = FixedHopperConfigModule
141 | 


--------------------------------------------------------------------------------
/dmbrl/config/gym_fswimmer.py:
--------------------------------------------------------------------------------
  1 | from __future__ import division
  2 | from __future__ import print_function
  3 | from __future__ import absolute_import
  4 | 
  5 | import numpy as np
  6 | import tensorflow as tf
  7 | from dotmap import DotMap
  8 | 
  9 | from dmbrl.misc.DotmapUtils import get_required_argument
 10 | from dmbrl.modeling.layers import FC
 11 | 
 12 | 
 13 | class FixedSwimmerConfigModule:
 14 |     """
 15 |     """
 16 |     ENV_NAME = "MBRLGYM_SWIMMER-v0"
 17 |     TASK_HORIZON = 1000
 18 |     NTRAIN_ITERS = 300
 19 |     NROLLOUTS_PER_ITER = 1
 20 |     PLAN_HOR = 30
 21 |     INIT_VAR = 0.25
 22 |     MODEL_IN, MODEL_OUT = 11, 9  # obs - > 8 + 1, action 2
 23 |     GP_NINDUCING_POINTS = 300
 24 | 
 25 |     def __init__(self):
 26 |         # self.ENV = gym.make(self.ENV_NAME)
 27 |         from mbbl.env.gym_env import fixed_swimmer
 28 |         self.ENV = fixed_swimmer.env(env_name='gym_fswimmer', rand_seed=1234,
 29 |                                      misc_info={'reset_type': 'gym'})
 30 |         cfg = tf.ConfigProto()
 31 |         cfg.gpu_options.allow_growth = True
 32 |         self.SESS = tf.Session(config=cfg)
 33 |         self.NN_TRAIN_CFG = {"epochs": 5}
 34 |         self.OPT_CFG = {
 35 |             "Random": {
 36 |                 "popsize": 2500
 37 |             },
 38 |             "GBPRandom": {
 39 |                 "popsize": 2500
 40 |             },
 41 |             "GBPCEM": {
 42 |                 "popsize": 500,
 43 |                 "num_elites": 50,
 44 |                 "max_iters": 5,
 45 |                 "alpha": 0.1
 46 |             },
 47 |             "CEM": {
 48 |                 "popsize": 500,
 49 |                 "num_elites": 50,
 50 |                 "max_iters": 5,
 51 |                 "alpha": 0.1
 52 |             },
 53 |             "POPLIN-P": {
 54 |                 "popsize": 500,
 55 |                 "num_elites": 50,
 56 |                 "max_iters": 5,
 57 |                 "alpha": 0.1
 58 |             },
 59 |             "POPLIN-A": {
 60 |                 "popsize": 500,
 61 |                 "num_elites": 50,
 62 |                 "max_iters": 5,
 63 |                 "alpha": 0.1
 64 |             }
 65 |         }
 66 | 
 67 |     @staticmethod
 68 |     def obs_preproc(obs):
 69 |         """ @brief: no cheating of the observation function
 70 |         """
 71 |         if isinstance(obs, np.ndarray):
 72 |             return obs
 73 |         else:
 74 |             return obs
 75 | 
 76 |     @staticmethod
 77 |     def obs_postproc(obs, pred):
 78 |         if isinstance(obs, np.ndarray):
 79 |             return obs + pred
 80 |         else:
 81 |             return obs + pred
 82 | 
 83 |     @staticmethod
 84 |     def targ_proc(obs, next_obs):
 85 |         return next_obs - obs
 86 | 
 87 |     @staticmethod
 88 |     def obs_cost_fn(obs):
 89 |         """ @brief:
 90 |                 see mbbl.env.gym_env.walker.py for reward details
 91 |         """
 92 |         if isinstance(obs, np.ndarray):
 93 |             velocity_cost = -obs[:, -1]  # the qvel for the root-x joint
 94 |             return velocity_cost
 95 |         else:
 96 |             velocity_cost = -obs[:, -1]  # the qvel for the root-x joint
 97 |             return velocity_cost
 98 | 
 99 |     @staticmethod
100 |     def ac_cost_fn(acs):
101 |         if isinstance(acs, np.ndarray):
102 |             return 0.0001 * np.sum(np.square(acs), axis=1)
103 |         else:
104 |             return 0.0001 * tf.reduce_sum(tf.square(acs), axis=1)
105 | 
106 |     def nn_constructor(self, model_init_cfg, misc=None):
107 |         model = get_required_argument(model_init_cfg, "model_class", "Must provide model class")(DotMap(
108 |             name="model", num_networks=get_required_argument(model_init_cfg, "num_nets", "Must provide ensemble size"),
109 |             sess=self.SESS, load_model=model_init_cfg.get("load_model", False),
110 |             model_dir=model_init_cfg.get("model_dir", None),
111 |             misc=misc
112 |         ))
113 |         if not model_init_cfg.get("load_model", False):
114 |             model.add(FC(200, input_dim=self.MODEL_IN, activation="swish", weight_decay=0.000025))
115 |             model.add(FC(200, activation="swish", weight_decay=0.00005))
116 |             model.add(FC(200, activation="swish", weight_decay=0.000075))
117 |             model.add(FC(200, activation="swish", weight_decay=0.000075))
118 |             model.add(FC(self.MODEL_OUT, weight_decay=0.0001))
119 |         model.finalize(tf.train.AdamOptimizer, {"learning_rate": 0.001})
120 |         return model
121 | 
122 |     def gp_constructor(self, model_init_cfg):
123 |         model = get_required_argument(model_init_cfg, "model_class", "Must provide model class")(DotMap(
124 |             name="model",
125 |             kernel_class=get_required_argument(model_init_cfg, "kernel_class", "Must provide kernel class"),
126 |             kernel_args=model_init_cfg.get("kernel_args", {}),
127 |             num_inducing_points=get_required_argument(
128 |                 model_init_cfg, "num_inducing_points", "Must provide number of inducing points."
129 |             ),
130 |             sess=self.SESS
131 |         ))
132 |         return model
133 | 
134 | 
135 | CONFIG_MODULE = FixedSwimmerConfigModule
136 | 


--------------------------------------------------------------------------------
/dmbrl/config/gym_hopper.py:
--------------------------------------------------------------------------------
  1 | from __future__ import division
  2 | from __future__ import print_function
  3 | from __future__ import absolute_import
  4 | 
  5 | import numpy as np
  6 | import tensorflow as tf
  7 | from dotmap import DotMap
  8 | 
  9 | from dmbrl.misc.DotmapUtils import get_required_argument
 10 | from dmbrl.modeling.layers import FC
 11 | 
 12 | 
 13 | class HopperConfigModule:
 14 |     ENV_NAME = "MBRLGYM_Hopper-v0"
 15 |     TASK_HORIZON = 1000
 16 |     NTRAIN_ITERS = 300
 17 |     NROLLOUTS_PER_ITER = 1
 18 |     PLAN_HOR = 30
 19 |     INIT_VAR = 0.25
 20 |     MODEL_IN, MODEL_OUT = 14, 11  # obs - > 11, action 3
 21 |     GP_NINDUCING_POINTS = 300
 22 | 
 23 |     def __init__(self):
 24 |         # self.ENV = gym.make(self.ENV_NAME)
 25 |         from mbbl.env.gym_env import walker
 26 |         self.ENV = walker.env(env_name='gym_hopper', rand_seed=1234,
 27 |                               misc_info={'reset_type': 'gym'})
 28 |         cfg = tf.ConfigProto()
 29 |         cfg.gpu_options.allow_growth = True
 30 |         self.SESS = tf.Session(config=cfg)
 31 |         self.NN_TRAIN_CFG = {"epochs": 5}
 32 |         self.OPT_CFG = {
 33 |             "Random": {
 34 |                 "popsize": 2500
 35 |             },
 36 |             "GBPRandom": {
 37 |                 "popsize": 2500
 38 |             },
 39 |             "GBPCEM": {
 40 |                 "popsize": 500,
 41 |                 "num_elites": 50,
 42 |                 "max_iters": 5,
 43 |                 "alpha": 0.1
 44 |             },
 45 |             "CEM": {
 46 |                 "popsize": 500,
 47 |                 "num_elites": 50,
 48 |                 "max_iters": 5,
 49 |                 "alpha": 0.1
 50 |             },
 51 |             "POPLIN-P": {
 52 |                 "popsize": 500,
 53 |                 "num_elites": 50,
 54 |                 "max_iters": 5,
 55 |                 "alpha": 0.1
 56 |             },
 57 |             "POPLIN-A": {
 58 |                 "popsize": 500,
 59 |                 "num_elites": 50,
 60 |                 "max_iters": 5,
 61 |                 "alpha": 0.1
 62 |             }
 63 |         }
 64 | 
 65 |     @staticmethod
 66 |     def obs_preproc(obs):
 67 |         """ @brief: no cheating of the observation function
 68 |         """
 69 |         if isinstance(obs, np.ndarray):
 70 |             return obs
 71 |         else:
 72 |             return obs
 73 | 
 74 |     @staticmethod
 75 |     def obs_postproc(obs, pred):
 76 |         if isinstance(obs, np.ndarray):
 77 |             return obs + pred
 78 |         else:
 79 |             return obs + pred
 80 | 
 81 |     @staticmethod
 82 |     def targ_proc(obs, next_obs):
 83 |         return next_obs - obs
 84 | 
 85 |     @staticmethod
 86 |     def obs_cost_fn(obs):
 87 |         """ @brief:
 88 |                 see mbbl.env.gym_env.walker.py for reward details
 89 |         """
 90 |         if isinstance(obs, np.ndarray):
 91 |             velocity_cost = -obs[:, 5]  # the qvel for the root-x joint
 92 |             height_cost = 3 * np.square(obs[:, 0] - 1.3)  # the height
 93 |             return velocity_cost + height_cost
 94 |         else:
 95 |             velocity_cost = -obs[:, 5]  # the qvel for the root-x joint
 96 |             height_cost = 3 * tf.square(obs[:, 0] - 1.3)  # the height
 97 |             return velocity_cost + height_cost
 98 | 
 99 |     @staticmethod
100 |     def ac_cost_fn(acs):
101 |         if isinstance(acs, np.ndarray):
102 |             return 0.1 * np.sum(np.square(acs), axis=1)
103 |         else:
104 |             return 0.1 * tf.reduce_sum(tf.square(acs), axis=1)
105 | 
106 |     def nn_constructor(self, model_init_cfg, misc=None):
107 |         model = get_required_argument(model_init_cfg, "model_class", "Must provide model class")(DotMap(
108 |             name="model", num_networks=get_required_argument(model_init_cfg, "num_nets", "Must provide ensemble size"),
109 |             sess=self.SESS, load_model=model_init_cfg.get("load_model", False),
110 |             model_dir=model_init_cfg.get("model_dir", None),
111 |             misc=misc
112 |         ))
113 |         if not model_init_cfg.get("load_model", False):
114 |             model.add(FC(200, input_dim=self.MODEL_IN, activation="swish", weight_decay=0.000025))
115 |             model.add(FC(200, activation="swish", weight_decay=0.00005))
116 |             model.add(FC(200, activation="swish", weight_decay=0.000075))
117 |             model.add(FC(200, activation="swish", weight_decay=0.000075))
118 |             model.add(FC(self.MODEL_OUT, weight_decay=0.0001))
119 |         model.finalize(tf.train.AdamOptimizer, {"learning_rate": 0.001})
120 |         return model
121 | 
122 |     def gp_constructor(self, model_init_cfg):
123 |         model = get_required_argument(model_init_cfg, "model_class", "Must provide model class")(DotMap(
124 |             name="model",
125 |             kernel_class=get_required_argument(model_init_cfg, "kernel_class", "Must provide kernel class"),
126 |             kernel_args=model_init_cfg.get("kernel_args", {}),
127 |             num_inducing_points=get_required_argument(
128 |                 model_init_cfg, "num_inducing_points", "Must provide number of inducing points."
129 |             ),
130 |             sess=self.SESS
131 |         ))
132 |         return model
133 | 
134 | 
135 | CONFIG_MODULE = HopperConfigModule
136 | 


--------------------------------------------------------------------------------
/dmbrl/config/gym_invertedPendulum.py:
--------------------------------------------------------------------------------
  1 | from __future__ import division
  2 | from __future__ import print_function
  3 | from __future__ import absolute_import
  4 | 
  5 | import numpy as np
  6 | import tensorflow as tf
  7 | from dotmap import DotMap
  8 | 
  9 | from dmbrl.misc.DotmapUtils import get_required_argument
 10 | from dmbrl.modeling.layers import FC
 11 | """
 12 |     Module name, ENV_NAME
 13 |     MODEL_IN, MODEL_OUT,
 14 |     import env, env_name
 15 | """
 16 | 
 17 | 
 18 | class GymINVPendulumConfigModule:
 19 |     ENV_NAME = "MBRLGYM_invpendulum-v0"
 20 |     TASK_HORIZON = 1000
 21 |     NTRAIN_ITERS = 300
 22 |     NROLLOUTS_PER_ITER = 1
 23 |     PLAN_HOR = 30
 24 |     INIT_VAR = 0.25
 25 |     MODEL_IN, MODEL_OUT = 5, 4  # obs -> 4, action -> 1
 26 |     GP_NINDUCING_POINTS = 300
 27 | 
 28 |     def __init__(self):
 29 |         # self.ENV = gym.make(self.ENV_NAME)
 30 |         from mbbl.env.gym_env import invertedPendulum
 31 |         self.ENV = invertedPendulum.env(
 32 |             env_name='gym_invertedPendulum', rand_seed=1234,
 33 |             misc_info={'reset_type': 'gym'}
 34 |         )
 35 |         cfg = tf.ConfigProto()
 36 |         cfg.gpu_options.allow_growth = True
 37 |         self.SESS = tf.Session(config=cfg)
 38 |         self.NN_TRAIN_CFG = {"epochs": 5}
 39 |         self.OPT_CFG = {
 40 |             "Random": {
 41 |                 "popsize": 2500
 42 |             },
 43 |             "GBPRandom": {
 44 |                 "popsize": 2500
 45 |             },
 46 |             "GBPCEM": {
 47 |                 "popsize": 500,
 48 |                 "num_elites": 50,
 49 |                 "max_iters": 5,
 50 |                 "alpha": 0.1
 51 |             },
 52 |             "CEM": {
 53 |                 "popsize": 500,
 54 |                 "num_elites": 50,
 55 |                 "max_iters": 5,
 56 |                 "alpha": 0.1
 57 |             },
 58 |             "POPLIN-P": {
 59 |                 "popsize": 500,
 60 |                 "num_elites": 50,
 61 |                 "max_iters": 5,
 62 |                 "alpha": 0.1
 63 |             },
 64 |             "POPLIN-A": {
 65 |                 "popsize": 500,
 66 |                 "num_elites": 50,
 67 |                 "max_iters": 5,
 68 |                 "alpha": 0.1
 69 |             }
 70 |         }
 71 | 
 72 |     @staticmethod
 73 |     def obs_preproc(obs):
 74 |         """ @brief: no cheating of the observation function
 75 |         """
 76 |         if isinstance(obs, np.ndarray):
 77 |             return obs
 78 |         else:
 79 |             return obs
 80 | 
 81 |     @staticmethod
 82 |     def obs_postproc(obs, pred):
 83 |         if isinstance(obs, np.ndarray):
 84 |             return obs + pred
 85 |         else:
 86 |             return obs + pred
 87 | 
 88 |     @staticmethod
 89 |     def targ_proc(obs, next_obs):
 90 |         return next_obs - obs
 91 | 
 92 |     @staticmethod
 93 |     def obs_cost_fn(obs):
 94 |         """ @brief:
 95 |                 see mbbl.env.gym_env.walker.py for reward details
 96 | 
 97 |             # ypos penalty
 98 |             ypos = data_dict['start_state'][ypos_ob_pos]
 99 |             ypos_reward = -(ypos - ypos_target) ** 2
100 |         """
101 |         return obs[:, 1] ** 2
102 | 
103 |     @staticmethod
104 |     def ac_cost_fn(acs):
105 |         if isinstance(acs, np.ndarray):
106 |             return np.sum(np.square(acs), axis=1) * 0.0
107 |         else:
108 |             return tf.reduce_sum(tf.square(acs), axis=1) * 0.0
109 | 
110 |     def nn_constructor(self, model_init_cfg, misc=None):
111 |         model = get_required_argument(model_init_cfg, "model_class", "Must provide model class")(DotMap(
112 |             name="model", num_networks=get_required_argument(model_init_cfg, "num_nets", "Must provide ensemble size"),
113 |             sess=self.SESS, load_model=model_init_cfg.get("load_model", False),
114 |             model_dir=model_init_cfg.get("model_dir", None),
115 |             misc=misc
116 |         ))
117 |         if not model_init_cfg.get("load_model", False):
118 |             model.add(FC(200, input_dim=self.MODEL_IN, activation="swish", weight_decay=0.000025))
119 |             model.add(FC(200, activation="swish", weight_decay=0.00005))
120 |             model.add(FC(200, activation="swish", weight_decay=0.000075))
121 |             model.add(FC(200, activation="swish", weight_decay=0.000075))
122 |             model.add(FC(self.MODEL_OUT, weight_decay=0.0001))
123 |         model.finalize(tf.train.AdamOptimizer, {"learning_rate": 0.001})
124 |         return model
125 | 
126 |     def gp_constructor(self, model_init_cfg):
127 |         model = get_required_argument(model_init_cfg, "model_class", "Must provide model class")(DotMap(
128 |             name="model",
129 |             kernel_class=get_required_argument(model_init_cfg, "kernel_class", "Must provide kernel class"),
130 |             kernel_args=model_init_cfg.get("kernel_args", {}),
131 |             num_inducing_points=get_required_argument(
132 |                 model_init_cfg, "num_inducing_points", "Must provide number of inducing points."
133 |             ),
134 |             sess=self.SESS
135 |         ))
136 |         return model
137 | 
138 | 
139 | CONFIG_MODULE = GymINVPendulumConfigModule
140 | 


--------------------------------------------------------------------------------
/dmbrl/config/gym_pendulum.py:
--------------------------------------------------------------------------------
  1 | from __future__ import division
  2 | from __future__ import print_function
  3 | from __future__ import absolute_import
  4 | 
  5 | import numpy as np
  6 | import tensorflow as tf
  7 | from dotmap import DotMap
  8 | 
  9 | from dmbrl.misc.DotmapUtils import get_required_argument
 10 | from dmbrl.modeling.layers import FC
 11 | """
 12 |     Module name,
 13 |     MODEL_IN, MODEL_OUT,
 14 |     import env, env_name
 15 | """
 16 | 
 17 | 
 18 | class GymPendulumConfigModule:
 19 |     ENV_NAME = "MBRLGYM_pendulum-v0"
 20 |     TASK_HORIZON = 1000
 21 |     NTRAIN_ITERS = 300
 22 |     NROLLOUTS_PER_ITER = 1
 23 |     PLAN_HOR = 30
 24 |     INIT_VAR = 0.25
 25 |     MODEL_IN, MODEL_OUT = 4, 3  # obs -> 3, action -> 1
 26 |     GP_NINDUCING_POINTS = 300
 27 | 
 28 |     def __init__(self):
 29 |         # self.ENV = gym.make(self.ENV_NAME)
 30 |         from mbbl.env.gym_env import pendulum
 31 |         self.ENV = pendulum.env(env_name='gym_pendulum', rand_seed=1234,
 32 |                                 misc_info={'reset_type': 'gym'})
 33 |         cfg = tf.ConfigProto()
 34 |         cfg.gpu_options.allow_growth = True
 35 |         self.SESS = tf.Session(config=cfg)
 36 |         self.NN_TRAIN_CFG = {"epochs": 5}
 37 |         self.OPT_CFG = {
 38 |             "Random": {
 39 |                 "popsize": 2500
 40 |             },
 41 |             "GBPRandom": {
 42 |                 "popsize": 2500
 43 |             },
 44 |             "GBPCEM": {
 45 |                 "popsize": 500,
 46 |                 "num_elites": 50,
 47 |                 "max_iters": 5,
 48 |                 "alpha": 0.1
 49 |             },
 50 |             "CEM": {
 51 |                 "popsize": 500,
 52 |                 "num_elites": 50,
 53 |                 "max_iters": 5,
 54 |                 "alpha": 0.1
 55 |             },
 56 |             "POPLIN-P": {
 57 |                 "popsize": 500,
 58 |                 "num_elites": 50,
 59 |                 "max_iters": 5,
 60 |                 "alpha": 0.1
 61 |             },
 62 |             "POPLIN-A": {
 63 |                 "popsize": 500,
 64 |                 "num_elites": 50,
 65 |                 "max_iters": 5,
 66 |                 "alpha": 0.1
 67 |             }
 68 |         }
 69 | 
 70 |     @staticmethod
 71 |     def obs_preproc(obs):
 72 |         """ @brief: no cheating of the observation function
 73 |         """
 74 |         if isinstance(obs, np.ndarray):
 75 |             return obs
 76 |         else:
 77 |             return obs
 78 | 
 79 |     @staticmethod
 80 |     def obs_postproc(obs, pred):
 81 |         if isinstance(obs, np.ndarray):
 82 |             return obs + pred
 83 |         else:
 84 |             return obs + pred
 85 | 
 86 |     @staticmethod
 87 |     def targ_proc(obs, next_obs):
 88 |         return next_obs - obs
 89 | 
 90 |     @staticmethod
 91 |     def obs_cost_fn(obs):
 92 |         """ @brief:
 93 |                 see mbbl.env.gym_env.walker.py for reward details
 94 | 
 95 |             def reward(data_dict):
 96 |                 action = data_dict['action']
 97 |                 true_action = action * self._env.env.max_torque
 98 | 
 99 |                 max_torque = self._env.env.max_torque
100 |                 torque = np.clip(true_action, -max_torque, max_torque)[0]
101 | 
102 |                 y, x, thetadot = data_dict['start_state']
103 | 
104 |                 costs = y + .1 * x + .1 * (thetadot ** 2) + .001 * (torque ** 2)
105 |                 # note: reward is the negative cost
106 |                 return -costs
107 |         """
108 |         y = obs[:, 0]
109 |         x = obs[:, 1]
110 |         thetadot = obs[:, 2]
111 |         cost = y + tf.abs(0.1 * x) + 0.1 * (thetadot ** 2)
112 |         return cost
113 | 
114 |     @staticmethod
115 |     def ac_cost_fn(acs):
116 |         max_torque = 2.0
117 | 
118 |         if isinstance(acs, np.ndarray):
119 |             clip_torque = np.clip(acs, -max_torque, max_torque)
120 |             return 0.001 * np.sum(np.square(clip_torque), axis=1)
121 |         else:
122 |             clip_torque = tf.clip_by_value(acs, -max_torque, max_torque)
123 |             return 0.001 * tf.reduce_sum(tf.square(clip_torque), axis=1)
124 | 
125 |     def nn_constructor(self, model_init_cfg, misc=None):
126 |         model = get_required_argument(model_init_cfg, "model_class", "Must provide model class")(DotMap(
127 |             name="model", num_networks=get_required_argument(model_init_cfg, "num_nets", "Must provide ensemble size"),
128 |             sess=self.SESS, load_model=model_init_cfg.get("load_model", False),
129 |             model_dir=model_init_cfg.get("model_dir", None),
130 |             misc=misc
131 |         ))
132 |         if not model_init_cfg.get("load_model", False):
133 |             model.add(FC(200, input_dim=self.MODEL_IN, activation="swish", weight_decay=0.000025))
134 |             model.add(FC(200, activation="swish", weight_decay=0.00005))
135 |             model.add(FC(200, activation="swish", weight_decay=0.000075))
136 |             model.add(FC(200, activation="swish", weight_decay=0.000075))
137 |             model.add(FC(self.MODEL_OUT, weight_decay=0.0001))
138 |         model.finalize(tf.train.AdamOptimizer, {"learning_rate": 0.001})
139 |         return model
140 | 
141 |     def gp_constructor(self, model_init_cfg):
142 |         model = get_required_argument(model_init_cfg, "model_class", "Must provide model class")(DotMap(
143 |             name="model",
144 |             kernel_class=get_required_argument(model_init_cfg, "kernel_class", "Must provide kernel class"),
145 |             kernel_args=model_init_cfg.get("kernel_args", {}),
146 |             num_inducing_points=get_required_argument(
147 |                 model_init_cfg, "num_inducing_points", "Must provide number of inducing points."
148 |             ),
149 |             sess=self.SESS
150 |         ))
151 |         return model
152 | 
153 | 
154 | CONFIG_MODULE = GymPendulumConfigModule
155 | 


--------------------------------------------------------------------------------
/dmbrl/config/gym_reacher.py:
--------------------------------------------------------------------------------
  1 | from __future__ import division
  2 | from __future__ import print_function
  3 | from __future__ import absolute_import
  4 | 
  5 | import numpy as np
  6 | import tensorflow as tf
  7 | from dotmap import DotMap
  8 | 
  9 | from dmbrl.misc.DotmapUtils import get_required_argument
 10 | from dmbrl.modeling.layers import FC
 11 | 
 12 | 
 13 | class ReacherConfigModule:
 14 |     ENV_NAME = "MBRLGYM_Reacher-v0"
 15 |     TASK_HORIZON = 1000
 16 |     NTRAIN_ITERS = 300
 17 |     NROLLOUTS_PER_ITER = 1
 18 |     PLAN_HOR = 30
 19 |     INIT_VAR = 0.25
 20 |     MODEL_IN, MODEL_OUT = 13, 11  # obs - > 11, action 2
 21 |     GP_NINDUCING_POINTS = 300
 22 | 
 23 |     def __init__(self):
 24 |         # self.ENV = gym.make(self.ENV_NAME)
 25 |         from mbbl.env.gym_env import reacher
 26 |         self.ENV = reacher.env(env_name='gym_reacher', rand_seed=1234,
 27 |                                misc_info={'reset_type': 'gym'})
 28 |         cfg = tf.ConfigProto()
 29 |         cfg.gpu_options.allow_growth = True
 30 |         self.SESS = tf.Session(config=cfg)
 31 |         self.NN_TRAIN_CFG = {"epochs": 5}
 32 |         self.OPT_CFG = {
 33 |             "Random": {
 34 |                 "popsize": 2500
 35 |             },
 36 |             "GBPRandom": {
 37 |                 "popsize": 2500
 38 |             },
 39 |             "GBPCEM": {
 40 |                 "popsize": 500,
 41 |                 "num_elites": 50,
 42 |                 "max_iters": 5,
 43 |                 "alpha": 0.1
 44 |             },
 45 |             "CEM": {
 46 |                 "popsize": 500,
 47 |                 "num_elites": 50,
 48 |                 "max_iters": 5,
 49 |                 "alpha": 0.1
 50 |             },
 51 |             "POPLIN-P": {
 52 |                 "popsize": 500,
 53 |                 "num_elites": 50,
 54 |                 "max_iters": 5,
 55 |                 "alpha": 0.1
 56 |             },
 57 |             "POPLIN-A": {
 58 |                 "popsize": 500,
 59 |                 "num_elites": 50,
 60 |                 "max_iters": 5,
 61 |                 "alpha": 0.1
 62 |             }
 63 |         }
 64 | 
 65 |     @staticmethod
 66 |     def obs_preproc(obs):
 67 |         """ @brief: no cheating of the observation function
 68 |         """
 69 |         if isinstance(obs, np.ndarray):
 70 |             return obs
 71 |         else:
 72 |             return obs
 73 | 
 74 |     @staticmethod
 75 |     def obs_postproc(obs, pred):
 76 |         if isinstance(obs, np.ndarray):
 77 |             return obs + pred
 78 |         else:
 79 |             return obs + pred
 80 | 
 81 |     @staticmethod
 82 |     def targ_proc(obs, next_obs):
 83 |         return next_obs - obs
 84 | 
 85 |     @staticmethod
 86 |     def obs_cost_fn(obs):
 87 |         """ @brief:
 88 |                 see mbbl.env.gym_env.walker.py for reward details
 89 |         """
 90 |         if isinstance(obs, np.ndarray):
 91 |             return np.linalg.norm(obs[:, -3:])
 92 |         else:
 93 |             return tf.linalg.norm(obs[:, -3:])
 94 | 
 95 |     @staticmethod
 96 |     def ac_cost_fn(acs):
 97 |         if isinstance(acs, np.ndarray):
 98 |             return np.sum(np.square(acs), axis=1)
 99 |         else:
100 |             return tf.reduce_sum(tf.square(acs), axis=1)
101 | 
102 |     def nn_constructor(self, model_init_cfg, misc=None):
103 |         model = get_required_argument(model_init_cfg, "model_class", "Must provide model class")(DotMap(
104 |             name="model", num_networks=get_required_argument(model_init_cfg, "num_nets", "Must provide ensemble size"),
105 |             sess=self.SESS, load_model=model_init_cfg.get("load_model", False),
106 |             model_dir=model_init_cfg.get("model_dir", None),
107 |             misc=misc
108 |         ))
109 |         if not model_init_cfg.get("load_model", False):
110 |             model.add(FC(200, input_dim=self.MODEL_IN, activation="swish", weight_decay=0.000025))
111 |             model.add(FC(200, activation="swish", weight_decay=0.00005))
112 |             model.add(FC(200, activation="swish", weight_decay=0.000075))
113 |             model.add(FC(200, activation="swish", weight_decay=0.000075))
114 |             model.add(FC(self.MODEL_OUT, weight_decay=0.0001))
115 |         model.finalize(tf.train.AdamOptimizer, {"learning_rate": 0.001})
116 |         return model
117 | 
118 |     def gp_constructor(self, model_init_cfg):
119 |         model = get_required_argument(model_init_cfg, "model_class", "Must provide model class")(DotMap(
120 |             name="model",
121 |             kernel_class=get_required_argument(model_init_cfg, "kernel_class", "Must provide kernel class"),
122 |             kernel_args=model_init_cfg.get("kernel_args", {}),
123 |             num_inducing_points=get_required_argument(
124 |                 model_init_cfg, "num_inducing_points", "Must provide number of inducing points."
125 |             ),
126 |             sess=self.SESS
127 |         ))
128 |         return model
129 | 
130 | 
131 | CONFIG_MODULE = ReacherConfigModule
132 | 


--------------------------------------------------------------------------------
/dmbrl/config/gym_swimmer.py:
--------------------------------------------------------------------------------
  1 | from __future__ import division
  2 | from __future__ import print_function
  3 | from __future__ import absolute_import
  4 | 
  5 | import numpy as np
  6 | import tensorflow as tf
  7 | from dotmap import DotMap
  8 | 
  9 | from dmbrl.misc.DotmapUtils import get_required_argument
 10 | from dmbrl.modeling.layers import FC
 11 | 
 12 | 
 13 | class SwimmerConfigModule:
 14 |     """
 15 |         @brief: migrate the gym module from the mbbl repo
 16 |             'gym_cheetah': {
 17 |                 'path': 'mbbl.env.gym_env.walker',
 18 |                 'ob_size': 17, 'action_size': 6, 'max_length': 1000
 19 |             }
 20 |     """
 21 |     ENV_NAME = "MBRLGYM_SWIMMER-v0"
 22 |     TASK_HORIZON = 1000
 23 |     NTRAIN_ITERS = 300
 24 |     NROLLOUTS_PER_ITER = 1
 25 |     PLAN_HOR = 30
 26 |     INIT_VAR = 0.25
 27 |     MODEL_IN, MODEL_OUT = 10, 8  # obs - > 8, action 2
 28 |     GP_NINDUCING_POINTS = 300
 29 | 
 30 |     def __init__(self):
 31 |         # self.ENV = gym.make(self.ENV_NAME)
 32 |         from mbbl.env.gym_env import walker
 33 |         self.ENV = walker.env(env_name='gym_swimmer', rand_seed=1234,
 34 |                               misc_info={'reset_type': 'gym'})
 35 |         cfg = tf.ConfigProto()
 36 |         cfg.gpu_options.allow_growth = True
 37 |         self.SESS = tf.Session(config=cfg)
 38 |         self.NN_TRAIN_CFG = {"epochs": 5}
 39 |         self.OPT_CFG = {
 40 |             "Random": {
 41 |                 "popsize": 2500
 42 |             },
 43 |             "GBPRandom": {
 44 |                 "popsize": 2500
 45 |             },
 46 |             "GBPCEM": {
 47 |                 "popsize": 500,
 48 |                 "num_elites": 50,
 49 |                 "max_iters": 5,
 50 |                 "alpha": 0.1
 51 |             },
 52 |             "CEM": {
 53 |                 "popsize": 500,
 54 |                 "num_elites": 50,
 55 |                 "max_iters": 5,
 56 |                 "alpha": 0.1
 57 |             },
 58 |             "POPLIN-P": {
 59 |                 "popsize": 500,
 60 |                 "num_elites": 50,
 61 |                 "max_iters": 5,
 62 |                 "alpha": 0.1
 63 |             },
 64 |             "POPLIN-A": {
 65 |                 "popsize": 500,
 66 |                 "num_elites": 50,
 67 |                 "max_iters": 5,
 68 |                 "alpha": 0.1
 69 |             }
 70 |         }
 71 | 
 72 |     @staticmethod
 73 |     def obs_preproc(obs):
 74 |         """ @brief: no cheating of the observation function
 75 |         """
 76 |         if isinstance(obs, np.ndarray):
 77 |             return obs
 78 |         else:
 79 |             return obs
 80 | 
 81 |     @staticmethod
 82 |     def obs_postproc(obs, pred):
 83 |         if isinstance(obs, np.ndarray):
 84 |             return obs + pred
 85 |         else:
 86 |             return obs + pred
 87 | 
 88 |     @staticmethod
 89 |     def targ_proc(obs, next_obs):
 90 |         return next_obs - obs
 91 | 
 92 |     @staticmethod
 93 |     def obs_cost_fn(obs):
 94 |         """ @brief:
 95 |                 see mbbl.env.gym_env.walker.py for reward details
 96 |         """
 97 |         if isinstance(obs, np.ndarray):
 98 |             velocity_cost = -obs[:, 3]  # the qvel for the root-x joint
 99 |             return velocity_cost
100 |         else:
101 |             velocity_cost = -obs[:, 3]  # the qvel for the root-x joint
102 |             return velocity_cost
103 | 
104 |     @staticmethod
105 |     def ac_cost_fn(acs):
106 |         if isinstance(acs, np.ndarray):
107 |             return 0.0001 * np.sum(np.square(acs), axis=1)
108 |         else:
109 |             return 0.0001 * tf.reduce_sum(tf.square(acs), axis=1)
110 | 
111 |     def nn_constructor(self, model_init_cfg, misc=None):
112 |         model = get_required_argument(model_init_cfg, "model_class", "Must provide model class")(DotMap(
113 |             name="model", num_networks=get_required_argument(model_init_cfg, "num_nets", "Must provide ensemble size"),
114 |             sess=self.SESS, load_model=model_init_cfg.get("load_model", False),
115 |             model_dir=model_init_cfg.get("model_dir", None),
116 |             misc=misc
117 |         ))
118 |         if not model_init_cfg.get("load_model", False):
119 |             model.add(FC(200, input_dim=self.MODEL_IN, activation="swish", weight_decay=0.000025))
120 |             model.add(FC(200, activation="swish", weight_decay=0.00005))
121 |             model.add(FC(200, activation="swish", weight_decay=0.000075))
122 |             model.add(FC(200, activation="swish", weight_decay=0.000075))
123 |             model.add(FC(self.MODEL_OUT, weight_decay=0.0001))
124 |         model.finalize(tf.train.AdamOptimizer, {"learning_rate": 0.001})
125 |         return model
126 | 
127 |     def gp_constructor(self, model_init_cfg):
128 |         model = get_required_argument(model_init_cfg, "model_class", "Must provide model class")(DotMap(
129 |             name="model",
130 |             kernel_class=get_required_argument(model_init_cfg, "kernel_class", "Must provide kernel class"),
131 |             kernel_args=model_init_cfg.get("kernel_args", {}),
132 |             num_inducing_points=get_required_argument(
133 |                 model_init_cfg, "num_inducing_points", "Must provide number of inducing points."
134 |             ),
135 |             sess=self.SESS
136 |         ))
137 |         return model
138 | 
139 | 
140 | CONFIG_MODULE = SwimmerConfigModule
141 | 


--------------------------------------------------------------------------------
/dmbrl/config/gym_walker2d.py:
--------------------------------------------------------------------------------
  1 | from __future__ import division
  2 | from __future__ import print_function
  3 | from __future__ import absolute_import
  4 | 
  5 | import numpy as np
  6 | import tensorflow as tf
  7 | from dotmap import DotMap
  8 | 
  9 | from dmbrl.misc.DotmapUtils import get_required_argument
 10 | from dmbrl.modeling.layers import FC
 11 | 
 12 | 
 13 | class WalkerConfigModule:
 14 |     """
 15 |         @brief: migrate the gym module from the mbbl repo
 16 |             'gym_cheetah': {
 17 |                 'path': 'mbbl.env.gym_env.walker',
 18 |                 'ob_size': 17, 'action_size': 6, 'max_length': 1000
 19 |             }
 20 |     """
 21 |     ENV_NAME = "MBRLGYM_Walker-v0"
 22 |     TASK_HORIZON = 1000
 23 |     NTRAIN_ITERS = 300
 24 |     NROLLOUTS_PER_ITER = 1
 25 |     PLAN_HOR = 30
 26 |     INIT_VAR = 0.25
 27 |     MODEL_IN, MODEL_OUT = 23, 17  # obs - > 17, action 6
 28 |     GP_NINDUCING_POINTS = 300
 29 | 
 30 |     def __init__(self):
 31 |         # self.ENV = gym.make(self.ENV_NAME)
 32 |         from mbbl.env.gym_env import walker
 33 |         self.ENV = walker.env(env_name='gym_walker2d', rand_seed=1234,
 34 |                               misc_info={'reset_type': 'gym'})
 35 |         cfg = tf.ConfigProto()
 36 |         cfg.gpu_options.allow_growth = True
 37 |         self.SESS = tf.Session(config=cfg)
 38 |         self.NN_TRAIN_CFG = {"epochs": 5}
 39 |         self.OPT_CFG = {
 40 |             "Random": {
 41 |                 "popsize": 2500
 42 |             },
 43 |             "GBPRandom": {
 44 |                 "popsize": 2500
 45 |             },
 46 |             "GBPCEM": {
 47 |                 "popsize": 500,
 48 |                 "num_elites": 50,
 49 |                 "max_iters": 5,
 50 |                 "alpha": 0.1
 51 |             },
 52 |             "CEM": {
 53 |                 "popsize": 500,
 54 |                 "num_elites": 50,
 55 |                 "max_iters": 5,
 56 |                 "alpha": 0.1
 57 |             },
 58 |             "POPLIN-P": {
 59 |                 "popsize": 500,
 60 |                 "num_elites": 50,
 61 |                 "max_iters": 5,
 62 |                 "alpha": 0.1
 63 |             },
 64 |             "POPLIN-A": {
 65 |                 "popsize": 500,
 66 |                 "num_elites": 50,
 67 |                 "max_iters": 5,
 68 |                 "alpha": 0.1
 69 |             }
 70 |         }
 71 | 
 72 |     @staticmethod
 73 |     def obs_preproc(obs):
 74 |         """ @brief: no cheating of the observation function
 75 |         """
 76 |         if isinstance(obs, np.ndarray):
 77 |             return obs
 78 |         else:
 79 |             return obs
 80 | 
 81 |     @staticmethod
 82 |     def obs_postproc(obs, pred):
 83 |         if isinstance(obs, np.ndarray):
 84 |             return obs + pred
 85 |         else:
 86 |             return obs + pred
 87 | 
 88 |     @staticmethod
 89 |     def targ_proc(obs, next_obs):
 90 |         return next_obs - obs
 91 | 
 92 |     @staticmethod
 93 |     def obs_cost_fn(obs):
 94 |         """ @brief:
 95 |                 see mbbl.env.gym_env.walker.py for reward details
 96 |         """
 97 |         if isinstance(obs, np.ndarray):
 98 |             velocity_cost = -obs[:, 8]  # the qvel for the root-x joint
 99 |             height_cost = 3 * np.square(obs[:, 0] - 1.3)  # the height
100 |             return velocity_cost + height_cost
101 |         else:
102 |             velocity_cost = -obs[:, 8]  # the qvel for the root-x joint
103 |             height_cost = 3 * tf.square(obs[:, 0] - 1.3)  # the height
104 |             return velocity_cost + height_cost
105 | 
106 |     @staticmethod
107 |     def ac_cost_fn(acs):
108 |         if isinstance(acs, np.ndarray):
109 |             return 0.1 * np.sum(np.square(acs), axis=1)
110 |         else:
111 |             return 0.1 * tf.reduce_sum(tf.square(acs), axis=1)
112 | 
113 |     def nn_constructor(self, model_init_cfg, misc=None):
114 |         model = get_required_argument(model_init_cfg, "model_class", "Must provide model class")(DotMap(
115 |             name="model", num_networks=get_required_argument(model_init_cfg, "num_nets", "Must provide ensemble size"),
116 |             sess=self.SESS, load_model=model_init_cfg.get("load_model", False),
117 |             model_dir=model_init_cfg.get("model_dir", None),
118 |             misc=misc
119 |         ))
120 |         if not model_init_cfg.get("load_model", False):
121 |             model.add(FC(200, input_dim=self.MODEL_IN, activation="swish", weight_decay=0.000025))
122 |             model.add(FC(200, activation="swish", weight_decay=0.00005))
123 |             model.add(FC(200, activation="swish", weight_decay=0.000075))
124 |             model.add(FC(200, activation="swish", weight_decay=0.000075))
125 |             model.add(FC(self.MODEL_OUT, weight_decay=0.0001))
126 |         model.finalize(tf.train.AdamOptimizer, {"learning_rate": 0.001})
127 |         return model
128 | 
129 |     def gp_constructor(self, model_init_cfg):
130 |         model = get_required_argument(model_init_cfg, "model_class", "Must provide model class")(DotMap(
131 |             name="model",
132 |             kernel_class=get_required_argument(model_init_cfg, "kernel_class", "Must provide kernel class"),
133 |             kernel_args=model_init_cfg.get("kernel_args", {}),
134 |             num_inducing_points=get_required_argument(
135 |                 model_init_cfg, "num_inducing_points", "Must provide number of inducing points."
136 |             ),
137 |             sess=self.SESS
138 |         ))
139 |         return model
140 | 
141 | 
142 | CONFIG_MODULE = WalkerConfigModule
143 | 


--------------------------------------------------------------------------------
/dmbrl/config/halfcheetah.py:
--------------------------------------------------------------------------------
  1 | from __future__ import division
  2 | from __future__ import print_function
  3 | from __future__ import absolute_import
  4 | 
  5 | import numpy as np
  6 | import tensorflow as tf
  7 | from dotmap import DotMap
  8 | import gym
  9 | 
 10 | from dmbrl.misc.DotmapUtils import get_required_argument
 11 | from dmbrl.modeling.layers import FC
 12 | import dmbrl.env
 13 | 
 14 | 
 15 | class HalfCheetahConfigModule:
 16 |     ENV_NAME = "MBRLHalfCheetah-v0"
 17 |     TASK_HORIZON = 1000
 18 |     NTRAIN_ITERS = 300
 19 |     NROLLOUTS_PER_ITER = 1
 20 |     PLAN_HOR = 30
 21 |     INIT_VAR = 0.25
 22 |     MODEL_IN, MODEL_OUT = 24, 18  # obs - > 18, action 6
 23 |     GP_NINDUCING_POINTS = 300
 24 | 
 25 |     def __init__(self):
 26 |         self.ENV = gym.make(self.ENV_NAME)
 27 |         cfg = tf.ConfigProto()
 28 |         cfg.gpu_options.allow_growth = True
 29 |         self.SESS = tf.Session(config=cfg)
 30 |         self.NN_TRAIN_CFG = {"epochs": 5}
 31 |         self.OPT_CFG = {
 32 |             "Random": {
 33 |                 "popsize": 2500
 34 |             },
 35 |             "GBPRandom": {
 36 |                 "popsize": 2500
 37 |             },
 38 |             "GBPCEM": {
 39 |                 "popsize": 500,
 40 |                 "num_elites": 50,
 41 |                 "max_iters": 5,
 42 |                 "alpha": 0.1
 43 |             },
 44 |             "CEM": {
 45 |                 "popsize": 500,
 46 |                 "num_elites": 50,
 47 |                 "max_iters": 5,
 48 |                 "alpha": 0.1
 49 |             },
 50 |             "POPLIN-P": {
 51 |                 "popsize": 500,
 52 |                 "num_elites": 50,
 53 |                 "max_iters": 5,
 54 |                 "alpha": 0.1
 55 |             },
 56 |             "POPLIN-A": {
 57 |                 "popsize": 500,
 58 |                 "num_elites": 50,
 59 |                 "max_iters": 5,
 60 |                 "alpha": 0.1
 61 |             }
 62 |         }
 63 | 
 64 |     @staticmethod
 65 |     def obs_preproc(obs):
 66 |         if isinstance(obs, np.ndarray):
 67 |             return np.concatenate([obs[:, 1:2], np.sin(obs[:, 2:3]), np.cos(obs[:, 2:3]), obs[:, 3:]], axis=1)
 68 |         else:
 69 |             return tf.concat([obs[:, 1:2], tf.sin(obs[:, 2:3]), tf.cos(obs[:, 2:3]), obs[:, 3:]], axis=1)
 70 | 
 71 |     @staticmethod
 72 |     def obs_postproc(obs, pred):
 73 |         if isinstance(obs, np.ndarray):
 74 |             return np.concatenate([pred[:, :1], obs[:, 1:] + pred[:, 1:]], axis=1)
 75 |         else:
 76 |             return tf.concat([pred[:, :1], obs[:, 1:] + pred[:, 1:]], axis=1)
 77 | 
 78 |     @staticmethod
 79 |     def targ_proc(obs, next_obs):
 80 |         return np.concatenate([next_obs[:, :1], next_obs[:, 1:] - obs[:, 1:]], axis=1)
 81 | 
 82 |     @staticmethod
 83 |     def obs_cost_fn(obs):
 84 |         return -obs[:, 0]
 85 | 
 86 |     @staticmethod
 87 |     def ac_cost_fn(acs):
 88 |         if isinstance(acs, np.ndarray):
 89 |             return 0.1 * np.sum(np.square(acs), axis=1)
 90 |         else:
 91 |             return 0.1 * tf.reduce_sum(tf.square(acs), axis=1)
 92 | 
 93 |     def nn_constructor(self, model_init_cfg, misc=None):
 94 |         model = get_required_argument(model_init_cfg, "model_class", "Must provide model class")(DotMap(
 95 |             name="model", num_networks=get_required_argument(model_init_cfg, "num_nets", "Must provide ensemble size"),
 96 |             sess=self.SESS, load_model=model_init_cfg.get("load_model", False),
 97 |             model_dir=model_init_cfg.get("model_dir", None),
 98 |             misc=misc
 99 |         ))
100 |         if not model_init_cfg.get("load_model", False):
101 |             model.add(FC(200, input_dim=self.MODEL_IN, activation="swish", weight_decay=0.000025))
102 |             model.add(FC(200, activation="swish", weight_decay=0.00005))
103 |             model.add(FC(200, activation="swish", weight_decay=0.000075))
104 |             model.add(FC(200, activation="swish", weight_decay=0.000075))
105 |             model.add(FC(self.MODEL_OUT, weight_decay=0.0001))
106 |         model.finalize(tf.train.AdamOptimizer, {"learning_rate": 0.001})
107 |         return model
108 | 
109 |     def gp_constructor(self, model_init_cfg):
110 |         model = get_required_argument(model_init_cfg, "model_class", "Must provide model class")(DotMap(
111 |             name="model",
112 |             kernel_class=get_required_argument(model_init_cfg, "kernel_class", "Must provide kernel class"),
113 |             kernel_args=model_init_cfg.get("kernel_args", {}),
114 |             num_inducing_points=get_required_argument(
115 |                 model_init_cfg, "num_inducing_points", "Must provide number of inducing points."
116 |             ),
117 |             sess=self.SESS
118 |         ))
119 |         return model
120 | 
121 | 
122 | CONFIG_MODULE = HalfCheetahConfigModule
123 | 


--------------------------------------------------------------------------------
/dmbrl/config/pusher.py:
--------------------------------------------------------------------------------
  1 | from __future__ import division
  2 | from __future__ import print_function
  3 | from __future__ import absolute_import
  4 | 
  5 | import numpy as np
  6 | import tensorflow as tf
  7 | from dotmap import DotMap
  8 | import gym
  9 | 
 10 | from dmbrl.misc.DotmapUtils import get_required_argument
 11 | from dmbrl.modeling.layers import FC
 12 | import dmbrl.env
 13 | 
 14 | 
 15 | class PusherConfigModule:
 16 |     ENV_NAME = "MBRLPusher-v0"
 17 |     TASK_HORIZON = 150
 18 |     NTRAIN_ITERS = 100
 19 |     NROLLOUTS_PER_ITER = 1
 20 |     PLAN_HOR = 25
 21 |     INIT_VAR = 0.25
 22 |     MODEL_IN, MODEL_OUT = 27, 20
 23 |     GP_NINDUCING_POINTS = 200
 24 | 
 25 |     def __init__(self):
 26 |         self.ENV = gym.make(self.ENV_NAME)
 27 |         cfg = tf.ConfigProto()
 28 |         cfg.gpu_options.allow_growth = True
 29 |         self.SESS = tf.Session(config=cfg)
 30 |         self.NN_TRAIN_CFG = {"epochs": 5}
 31 |         self.OPT_CFG = {
 32 |             "Random": {
 33 |                 "popsize": 2500
 34 |             },
 35 |             "CEM": {
 36 |                 "popsize": 500,
 37 |                 "num_elites": 50,
 38 |                 "max_iters": 5,
 39 |                 "alpha": 0.1
 40 |             },
 41 |             "GBPRandom": {
 42 |                 "popsize": 2500
 43 |             },
 44 |             "GBPCEM": {
 45 |                 "popsize": 500,
 46 |                 "num_elites": 50,
 47 |                 "max_iters": 5,
 48 |                 "alpha": 0.1
 49 |             },
 50 |             "POPLIN-P": {
 51 |                 "popsize": 500,
 52 |                 "num_elites": 50,
 53 |                 "max_iters": 5,
 54 |                 "alpha": 0.1
 55 |             },
 56 |             "POPLIN-A": {
 57 |                 "popsize": 500,
 58 |                 "num_elites": 50,
 59 |                 "max_iters": 5,
 60 |                 "alpha": 0.1
 61 |             }
 62 |         }
 63 | 
 64 |     @staticmethod
 65 |     def obs_postproc(obs, pred):
 66 |         return obs + pred
 67 | 
 68 |     @staticmethod
 69 |     def targ_proc(obs, next_obs):
 70 |         return next_obs - obs
 71 | 
 72 |     def obs_cost_fn(self, obs):
 73 |         to_w, og_w = 0.5, 1.25
 74 |         tip_pos, obj_pos, goal_pos = obs[:, 14:17], obs[:, 17:20], self.ENV.ac_goal_pos
 75 | 
 76 |         if isinstance(obs, np.ndarray):
 77 |             tip_obj_dist = np.sum(np.abs(tip_pos - obj_pos), axis=1)
 78 |             obj_goal_dist = np.sum(np.abs(goal_pos - obj_pos), axis=1)
 79 |             return to_w * tip_obj_dist + og_w * obj_goal_dist
 80 |         else:
 81 |             tip_obj_dist = tf.reduce_sum(tf.abs(tip_pos - obj_pos), axis=1)
 82 |             obj_goal_dist = tf.reduce_sum(tf.abs(goal_pos - obj_pos), axis=1)
 83 |             return to_w * tip_obj_dist + og_w * obj_goal_dist
 84 | 
 85 |     @staticmethod
 86 |     def ac_cost_fn(acs):
 87 |         if isinstance(acs, np.ndarray):
 88 |             return 0.1 * np.sum(np.square(acs), axis=1)
 89 |         else:
 90 |             return 0.1 * tf.reduce_sum(tf.square(acs), axis=1)
 91 | 
 92 |     def nn_constructor(self, model_init_cfg, misc):
 93 |         model = get_required_argument(model_init_cfg, "model_class", "Must provide model class")(DotMap(
 94 |             name="model", num_networks=get_required_argument(model_init_cfg, "num_nets", "Must provide ensemble size"),
 95 |             sess=self.SESS, load_model=model_init_cfg.get("load_model", False),
 96 |             model_dir=model_init_cfg.get("model_dir", None),
 97 |             misc=misc
 98 |         ))
 99 |         if not model_init_cfg.get("load_model", False):
100 |             model.add(FC(200, input_dim=self.MODEL_IN, activation="swish", weight_decay=0.00025))
101 |             model.add(FC(200, activation="swish", weight_decay=0.0005))
102 |             model.add(FC(200, activation="swish", weight_decay=0.0005))
103 |             model.add(FC(self.MODEL_OUT, weight_decay=0.00075))
104 |         model.finalize(tf.train.AdamOptimizer, {"learning_rate": 0.001})
105 |         return model
106 | 
107 |     def gp_constructor(self, model_init_cfg):
108 |         model = get_required_argument(model_init_cfg, "model_class", "Must provide model class")(DotMap(
109 |             name="model",
110 |             kernel_class=get_required_argument(model_init_cfg, "kernel_class", "Must provide kernel class"),
111 |             kernel_args=model_init_cfg.get("kernel_args", {}),
112 |             num_inducing_points=get_required_argument(
113 |                 model_init_cfg, "num_inducing_points", "Must provide number of inducing points."
114 |             ),
115 |             sess=self.SESS
116 |         ))
117 |         return model
118 | 
119 | 
120 | CONFIG_MODULE = PusherConfigModule
121 | 


--------------------------------------------------------------------------------
/dmbrl/config/reacher.py:
--------------------------------------------------------------------------------
  1 | from __future__ import division
  2 | from __future__ import print_function
  3 | from __future__ import absolute_import
  4 | 
  5 | import numpy as np
  6 | import tensorflow as tf
  7 | from dotmap import DotMap
  8 | import gym
  9 | 
 10 | from dmbrl.misc.DotmapUtils import get_required_argument
 11 | from dmbrl.modeling.layers import FC
 12 | import dmbrl.env
 13 | 
 14 | 
 15 | class ReacherConfigModule:
 16 |     ENV_NAME = "MBRLReacher3D-v0"
 17 |     TASK_HORIZON = 150
 18 |     NTRAIN_ITERS = 100
 19 |     NROLLOUTS_PER_ITER = 1
 20 |     PLAN_HOR = 25
 21 |     INIT_VAR = 0.25
 22 |     MODEL_IN, MODEL_OUT = 24, 17
 23 |     GP_NINDUCING_POINTS = 200
 24 | 
 25 |     def __init__(self):
 26 |         self.ENV = gym.make(self.ENV_NAME)
 27 |         self.ENV.reset()
 28 |         cfg = tf.ConfigProto()
 29 |         cfg.gpu_options.allow_growth = True
 30 |         self.SESS = tf.Session(config=cfg)
 31 |         self.NN_TRAIN_CFG = {"epochs": 5}
 32 |         self.OPT_CFG = {
 33 |             "Random": {
 34 |                 "popsize": 2000
 35 |             },
 36 |             "CEM": {
 37 |                 "popsize": 400,
 38 |                 "num_elites": 40,
 39 |                 "max_iters": 5,
 40 |                 "alpha": 0.1
 41 |             },
 42 |             "GBPRandom": {
 43 |                 "popsize": 2000
 44 |             },
 45 |             "GBPCEM": {
 46 |                 "popsize": 400,
 47 |                 "num_elites": 40,
 48 |                 "max_iters": 5,
 49 |                 "alpha": 0.1
 50 |             },
 51 |             "POPLIN-P": {
 52 |                 "popsize": 400,
 53 |                 "num_elites": 40,
 54 |                 "max_iters": 5,
 55 |                 "alpha": 0.1
 56 |             },
 57 |             "POPLIN-A": {
 58 |                 "popsize": 400,
 59 |                 "num_elites": 40,
 60 |                 "max_iters": 5,
 61 |                 "alpha": 0.1
 62 |             }
 63 |         }
 64 |         self.UPDATE_FNS = [self.update_goal]
 65 | 
 66 |         self.goal = tf.Variable(self.ENV.goal, dtype=tf.float32)
 67 |         self.SESS.run(self.goal.initializer)
 68 | 
 69 |     @staticmethod
 70 |     def obs_postproc(obs, pred):
 71 |         return obs + pred
 72 | 
 73 |     @staticmethod
 74 |     def targ_proc(obs, next_obs):
 75 |         return next_obs - obs
 76 | 
 77 |     def update_goal(self, sess=None):
 78 |         if sess is not None:
 79 |             self.goal.load(self.ENV.goal, sess)
 80 | 
 81 |     def obs_cost_fn(self, obs):
 82 |         if isinstance(obs, np.ndarray):
 83 |             return np.sum(np.square(ReacherConfigModule.get_ee_pos(obs, are_tensors=False) - self.ENV.goal), axis=1)
 84 |         else:
 85 |             return tf.reduce_sum(tf.square(ReacherConfigModule.get_ee_pos(obs, are_tensors=True) - self.goal), axis=1)
 86 | 
 87 |     @staticmethod
 88 |     def ac_cost_fn(acs):
 89 |         if isinstance(acs, np.ndarray):
 90 |             return 0.01 * np.sum(np.square(acs), axis=1)
 91 |         else:
 92 |             return 0.01 * tf.reduce_sum(tf.square(acs), axis=1)
 93 | 
 94 |     def nn_constructor(self, model_init_cfg, misc=None):
 95 |         model = get_required_argument(model_init_cfg, "model_class", "Must provide model class")(DotMap(
 96 |             name="model", num_networks=get_required_argument(model_init_cfg, "num_nets", "Must provide ensemble size"),
 97 |             sess=self.SESS, load_model=model_init_cfg.get("load_model", False),
 98 |             model_dir=model_init_cfg.get("model_dir", None),
 99 |             misc=misc
100 |         ))
101 |         if not model_init_cfg.get("load_model", False):
102 |             model.add(FC(200, input_dim=self.MODEL_IN, activation="swish", weight_decay=0.00025))
103 |             model.add(FC(200, activation="swish", weight_decay=0.0005))
104 |             model.add(FC(200, activation="swish", weight_decay=0.0005))
105 |             model.add(FC(200, activation="swish", weight_decay=0.0005))
106 |             model.add(FC(self.MODEL_OUT, weight_decay=0.00075))
107 |         model.finalize(tf.train.AdamOptimizer, {"learning_rate": 0.00075})
108 |         return model
109 | 
110 |     def gp_constructor(self, model_init_cfg):
111 |         model = get_required_argument(model_init_cfg, "model_class", "Must provide model class")(DotMap(
112 |             name="model",
113 |             kernel_class=get_required_argument(model_init_cfg, "kernel_class", "Must provide kernel class"),
114 |             kernel_args=model_init_cfg.get("kernel_args", {}),
115 |             num_inducing_points=get_required_argument(
116 |                 model_init_cfg, "num_inducing_points", "Must provide number of inducing points."
117 |             ),
118 |             sess=self.SESS
119 |         ))
120 |         return model
121 | 
122 |     @staticmethod
123 |     def get_ee_pos(states, are_tensors=False):
124 |         theta1, theta2, theta3, theta4, theta5, theta6, theta7 = \
125 |             states[:, :1], states[:, 1:2], states[:, 2:3], states[:, 3:4], states[:, 4:5], states[:, 5:6], states[:, 6:]
126 |         if are_tensors:
127 |             rot_axis = tf.concat([tf.cos(theta2) * tf.cos(theta1), tf.cos(theta2) * tf.sin(theta1), -tf.sin(theta2)],
128 |                                  axis=1)
129 |             rot_perp_axis = tf.concat([-tf.sin(theta1), tf.cos(theta1), tf.zeros(tf.shape(theta1))], axis=1)
130 |             cur_end = tf.concat([
131 |                 0.1 * tf.cos(theta1) + 0.4 * tf.cos(theta1) * tf.cos(theta2),
132 |                 0.1 * tf.sin(theta1) + 0.4 * tf.sin(theta1) * tf.cos(theta2) - 0.188,
133 |                 -0.4 * tf.sin(theta2)
134 |             ], axis=1)
135 | 
136 |             for length, hinge, roll in [(0.321, theta4, theta3), (0.16828, theta6, theta5)]:
137 |                 perp_all_axis = tf.cross(rot_axis, rot_perp_axis)
138 |                 x = tf.cos(hinge) * rot_axis
139 |                 y = tf.sin(hinge) * tf.sin(roll) * rot_perp_axis
140 |                 z = -tf.sin(hinge) * tf.cos(roll) * perp_all_axis
141 |                 new_rot_axis = x + y + z
142 |                 new_rot_perp_axis = tf.cross(new_rot_axis, rot_axis)
143 |                 new_rot_perp_axis = tf.where(tf.less(tf.norm(new_rot_perp_axis, axis=1), 1e-30),
144 |                                              rot_perp_axis, new_rot_perp_axis)
145 |                 new_rot_perp_axis /= tf.norm(new_rot_perp_axis, axis=1, keepdims=True)
146 |                 rot_axis, rot_perp_axis, cur_end = new_rot_axis, new_rot_perp_axis, cur_end + length * new_rot_axis
147 |         else:
148 |             rot_axis = np.concatenate([np.cos(theta2) * np.cos(theta1), np.cos(theta2) * np.sin(theta1), -np.sin(theta2)],
149 |                                       axis=1)
150 |             rot_perp_axis = np.concatenate([-np.sin(theta1), np.cos(theta1), np.zeros(theta1.shape)], axis=1)
151 |             cur_end = np.concatenate([
152 |                 0.1 * np.cos(theta1) + 0.4 * np.cos(theta1) * np.cos(theta2),
153 |                 0.1 * np.sin(theta1) + 0.4 * np.sin(theta1) * np.cos(theta2) - 0.188,
154 |                 -0.4 * np.sin(theta2)
155 |             ], axis=1)
156 | 
157 |             for length, hinge, roll in [(0.321, theta4, theta3), (0.16828, theta6, theta5)]:
158 |                 perp_all_axis = np.cross(rot_axis, rot_perp_axis)
159 |                 x = np.cos(hinge) * rot_axis
160 |                 y = np.sin(hinge) * np.sin(roll) * rot_perp_axis
161 |                 z = -np.sin(hinge) * np.cos(roll) * perp_all_axis
162 |                 new_rot_axis = x + y + z
163 |                 new_rot_perp_axis = np.cross(new_rot_axis, rot_axis)
164 |                 new_rot_perp_axis[np.linalg.norm(new_rot_perp_axis, axis=1) < 1e-30] = \
165 |                     rot_perp_axis[np.linalg.norm(new_rot_perp_axis, axis=1) < 1e-30]
166 |                 new_rot_perp_axis /= np.linalg.norm(new_rot_perp_axis, axis=1, keepdims=True)
167 |                 rot_axis, rot_perp_axis, cur_end = new_rot_axis, new_rot_perp_axis, cur_end + length * new_rot_axis
168 | 
169 |         return cur_end
170 | 
171 | 
172 | CONFIG_MODULE = ReacherConfigModule
173 | 


--------------------------------------------------------------------------------
/dmbrl/config/reward_util.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import
  2 | from __future__ import division
  3 | from __future__ import print_function
  4 | import tensorflow as tf
  5 | 
  6 | import numpy as np
  7 | 
  8 | # The value returned by tolerance() at `margin` distance from `bounds` interval.
  9 | _DEFAULT_VALUE_AT_MARGIN = 0.1
 10 | 
 11 | 
 12 | def _sigmoids(x, value_at_1, sigmoid):
 13 |   """Returns 1 when `x` == 0, between 0 and 1 otherwise.
 14 |   Args:
 15 |     x: A scalar or numpy array.
 16 |     value_at_1: A float between 0 and 1 specifying the output when `x` == 1.
 17 |     sigmoid: String, choice of sigmoid type.
 18 |   Returns:
 19 |     A numpy array with values between 0.0 and 1.0.
 20 |   Raises:
 21 |     ValueError: If not 0 < `value_at_1` < 1, except for `linear`, `cosine` and
 22 |       `quadratic` sigmoids which allow `value_at_1` == 0.
 23 |     ValueError: If `sigmoid` is of an unknown type.
 24 |   """
 25 |   if sigmoid in ('cosine', 'linear', 'quadratic'):
 26 |     if not 0 <= value_at_1 < 1:
 27 |       raise ValueError('`value_at_1` must be nonnegative and smaller than 1, '
 28 |                        'got {}.'.format(value_at_1))
 29 |   else:
 30 |     if not 0 < value_at_1 < 1:
 31 |       raise ValueError('`value_at_1` must be strictly between 0 and 1, '
 32 |                        'got {}.'.format(value_at_1))
 33 | 
 34 |   if sigmoid == 'gaussian':
 35 |     scale = tf.sqrt(-2 * tf.log(value_at_1))
 36 |     return tf.exp(-0.5 * (x * scale) ** 2)
 37 | 
 38 |   elif sigmoid == 'hyperbolic':
 39 |     scale = tf.acosh(1 / value_at_1)
 40 |     return 1 / tf.cosh(x * scale)
 41 | 
 42 |   elif sigmoid == 'long_tail':
 43 |     scale = tf.sqrt(1 / value_at_1 - 1)
 44 |     return 1 / ((x * scale) ** 2 + 1)
 45 | 
 46 |   elif sigmoid == 'cosine':
 47 |     scale = tf.acos(2 * value_at_1 - 1) / np.pi
 48 |     scaled_x = x * scale
 49 |     return tf.where(abs(scaled_x) < 1,
 50 |                     (1 + tf.cos(np.pi * scaled_x)) / 2, 0.0 * scaled_x)
 51 | 
 52 |   elif sigmoid == 'linear':
 53 |     scale = 1.0 - value_at_1
 54 |     scaled_x = x * scale
 55 |     return tf.where(abs(scaled_x) < 1, 1 - scaled_x, 0.0 * scaled_x)
 56 | 
 57 |   elif sigmoid == 'quadratic':
 58 |     scale = tf.sqrt(1.0 - value_at_1)
 59 |     scaled_x = x * scale
 60 |     return tf.where(abs(scaled_x) < 1, 1 - scaled_x ** 2, 0.0 * scaled_x)
 61 | 
 62 |   elif sigmoid == 'tanh_squared':
 63 |     scale = tf.arctanh(tf.sqrt(1 - value_at_1))
 64 |     return 1 - tf.tanh(x * scale) ** 2
 65 | 
 66 |   else:
 67 |     raise ValueError('Unknown sigmoid type {!r}.'.format(sigmoid))
 68 | 
 69 | 
 70 | def tolerance(x, bounds=(0.0, 0.0), margin=0.0, sigmoid='gaussian',
 71 |               value_at_margin=_DEFAULT_VALUE_AT_MARGIN):
 72 |   """Returns 1 when `x` falls inside the bounds, between 0 and 1 otherwise.
 73 |   Args:
 74 |     x: A scalar or numpy array.
 75 |     bounds: A tuple of floats specifying inclusive `(lower, upper)` bounds for
 76 |       the target interval. These can be infinite if the interval is unbounded
 77 |       at one or both ends, or they can be equal to one another if the target
 78 |       value is exact.
 79 |     margin: Float. Parameter that controls how steeply the output decreases as
 80 |       `x` moves out-of-bounds.
 81 |       * If `margin == 0` then the output will be 0 for all values of `x`
 82 |         outside of `bounds`.
 83 |       * If `margin > 0` then the output will decrease sigmoidally with
 84 |         increasing distance from the nearest bound.
 85 |     sigmoid: String, choice of sigmoid type. Valid values are: 'gaussian',
 86 |        'linear', 'hyperbolic', 'long_tail', 'cosine', 'tanh_squared'.
 87 |     value_at_margin: A float between 0 and 1 specifying the output value when
 88 |       the distance from `x` to the nearest bound is equal to `margin`. Ignored
 89 |       if `margin == 0`.
 90 |   Returns:
 91 |     A float or numpy array with values between 0.0 and 1.0.
 92 |   Raises:
 93 |     ValueError: If `bounds[0] > bounds[1]`.
 94 |     ValueError: If `margin` is negative.
 95 |   """
 96 |   lower, upper = bounds
 97 |   if lower > upper:
 98 |     raise ValueError('Lower bound must be <= upper bound.')
 99 |   if margin < 0:
100 |     raise ValueError('`margin` must be non-negative.')
101 | 
102 |   in_bounds = tf.logical_and(lower <= x, x <= upper)
103 |   if margin == 0:
104 |     value = tf.where(in_bounds, 1.0, 0.0)
105 |   else:
106 |     d = tf.where(x < lower, lower - x, x - upper) / margin
107 |     value = tf.where(in_bounds,
108 |                      1.0 + d * 0.0,
109 |                      _sigmoids(d, value_at_margin, sigmoid))
110 | 
111 |   return value
112 | 


--------------------------------------------------------------------------------
/dmbrl/config/template.py:
--------------------------------------------------------------------------------
 1 | from __future__ import division
 2 | from __future__ import print_function
 3 | from __future__ import absolute_import
 4 | 
 5 | import numpy as np
 6 | import tensorflow as tf
 7 | from dotmap import DotMap
 8 | import gym
 9 | 
10 | from dmbrl.misc.DotmapUtils import get_required_argument
11 | from dmbrl.modeling.layers import FC
12 | 
13 | 
14 | class EnvConfigModule:
15 |     ENV_NAME           = None
16 |     TASK_HORIZON       = None
17 |     NTRAIN_ITERS       = None
18 |     NROLLOUTS_PER_ITER = None
19 |     PLAN_HOR           = None
20 | 
21 |     def __init__(self):
22 |         self.ENV = gym.make(self.ENV_NAME)
23 |         cfg = tf.ConfigProto()
24 |         cfg.gpu_options.allow_growth = True
25 |         self.SESS = tf.Session(config=cfg)
26 |         self.NN_TRAIN_CFG = {"epochs": None}
27 |         self.OPT_CFG = {
28 |             "Random": {
29 |                 "popsize": None
30 |             },
31 |             "CEM": {
32 |                 "popsize":    None,
33 |                 "num_elites": None,
34 |                 "max_iters":  None,
35 |                 "alpha":      None
36 |             }
37 |         }
38 |         self.UPDATE_FNS = []
39 | 
40 |         # Fill in other things to be done here.
41 | 
42 |     @staticmethod
43 |     def obs_preproc(obs):
44 |         # Note: Must be able to process both NumPy and Tensorflow arrays.
45 |         if isinstance(obs, np.ndarray):
46 |             raise NotImplementedError()
47 |         else:
48 |             raise NotImplementedError
49 | 
50 |     @staticmethod
51 |     def obs_postproc(obs, pred):
52 |         # Note: Must be able to process both NumPy and Tensorflow arrays.
53 |         if isinstance(obs, np.ndarray):
54 |             raise NotImplementedError()
55 |         else:
56 |             raise NotImplementedError()
57 | 
58 |     @staticmethod
59 |     def targ_proc(obs, next_obs):
60 |         # Note: Only needs to process NumPy arrays.
61 |         raise NotImplementedError()
62 | 
63 |     @staticmethod
64 |     def obs_cost_fn(obs):
65 |         # Note: Must be able to process both NumPy and Tensorflow arrays.
66 |         if isinstance(obs, np.ndarray):
67 |             raise NotImplementedError()
68 |         else:
69 |             raise NotImplementedError()
70 | 
71 |     @staticmethod
72 |     def ac_cost_fn(acs):
73 |         # Note: Must be able to process both NumPy and Tensorflow arrays.
74 |         if isinstance(acs, np.ndarray):
75 |             raise NotImplementedError()
76 |         else:
77 |             raise NotImplementedError()
78 | 
79 |     def nn_constructor(self, model_init_cfg):
80 |         model = get_required_argument(model_init_cfg, "model_class", "Must provide model class")(DotMap(
81 |             name="model", num_networks=get_required_argument(model_init_cfg, "num_nets", "Must provide ensemble size"),
82 |             sess=self.SESS
83 |         ))
84 |         # Construct model below. For example:
85 |         # model.add(FC(*args))
86 |         # ...
87 |         # model.finalize(tf.train.AdamOptimizer, {"learning_rate": 0.001})
88 |         return model
89 | 
90 | 
91 | CONFIG_MODULE = EnvConfigModule
92 | 
93 | 


--------------------------------------------------------------------------------
/dmbrl/config/view_humanoid.py:
--------------------------------------------------------------------------------
 1 | if __name__ == '__main__':
 2 |     '''
 3 |     from dm_control import suite
 4 |     from dm_control import viewer
 5 |     import numpy as np
 6 | 
 7 |     test_env = suite.load(domain_name="humanoid", task_name="stand")
 8 |     action_spec = test_env.action_spec()
 9 | 
10 |     def initialize_episode(physics):
11 |         with physics.reset_context():
12 |             physics.data.qpos[:] = 0.0
13 |             physics.data.qpos[2] = 1.33
14 |             physics.data.qvel[:] = 0.0
15 |         print(physics.head_height())
16 |         print(physics.head_height())
17 |         print(physics.head_height())
18 |     test_env.task.initialize_episode = initialize_episode
19 | 
20 |     # Define a uniform random policy.
21 |     def random_policy(time_step):
22 |       del time_step  # Unused.
23 |       return np.random.uniform(low=action_spec.minimum,
24 |                                high=action_spec.maximum,
25 |                                size=action_spec.shape)
26 | 
27 |     # Launch the viewer application.
28 |     viewer.launch(test_env, policy=random_policy)
29 |     '''
30 |     from dm_control import suite
31 |     import matplotlib.pyplot as plt
32 |     import numpy as np
33 | 
34 |     max_frame = 90
35 | 
36 |     width = 480
37 |     height = 480
38 |     video = np.zeros((90, height, 2 * width, 3), dtype=np.uint8)
39 | 
40 |     # Load one task:
41 |     env = suite.load(domain_name="humanoid", task_name="walk")
42 | 
43 |     # Step through an episode and print out reward, discount and observation.
44 |     action_spec = env.action_spec()
45 |     time_step = env.reset()
46 | 
47 |     with env.physics.reset_context():
48 |         env.physics.data.qpos[:] = 0.0
49 |         env.physics.data.qpos[2] = 1.33
50 |         env.physics.data.qvel[:] = 0.0
51 |     head_pos = []
52 |     while not time_step.last():
53 |       for i in range(max_frame):
54 |         action = np.random.uniform(action_spec.minimum,
55 |                                    action_spec.maximum,
56 |                                    size=action_spec.shape)
57 |         time_step = env.step(action)
58 | 
59 |         head_pos.append(env.physics.head_height())
60 |         video[i] = np.hstack([env.physics.render(height, width, camera_id=0),
61 |                               env.physics.render(height, width, camera_id=1)])
62 |         # print(time_step.reward, time_step.discount, time_step.observation)
63 |       for i in range(max_frame):
64 |         print(head_pos[i])
65 |         img = plt.imshow(video[i])
66 |         plt.pause(1)  # Need min display time > 0.0.
67 |         plt.draw()
68 | 


--------------------------------------------------------------------------------
/dmbrl/controllers/Controller.py:
--------------------------------------------------------------------------------
 1 | from __future__ import division
 2 | from __future__ import print_function
 3 | from __future__ import absolute_import
 4 | 
 5 | 
 6 | class Controller:
 7 |     def __init__(self, *args, **kwargs):
 8 |         """Creates class instance.
 9 |         """
10 |         self._policy_network = None
11 |         pass
12 | 
13 |     def train(self, obs_trajs, acs_trajs, rews_trajs):
14 |         """Trains this controller using lists of trajectories.
15 |         """
16 |         raise NotImplementedError("Must be implemented in subclass.")
17 | 
18 |     def reset(self):
19 |         """Resets this controller.
20 |         """
21 |         raise NotImplementedError("Must be implemented in subclass.")
22 | 
23 |     def act(self, obs, t, get_pred_cost=False):
24 |         """Performs an action.
25 |         """
26 |         raise NotImplementedError("Must be implemented in subclass.")
27 | 
28 |     def dump_logs(self, primary_logdir, iter_logdir):
29 |         """Dumps logs into primary log directory and per-train iteration log directory.
30 |         """
31 |         raise NotImplementedError("Must be implemented in subclass.")
32 | 
33 |     def get_policy_network(self):
34 |         return self._policy_network
35 |     
36 |     def train_policy_network(self):
37 |         return False
38 | 


--------------------------------------------------------------------------------
/dmbrl/controllers/__init__.py:
--------------------------------------------------------------------------------
1 | from .MPC import MPC
2 | 


--------------------------------------------------------------------------------
/dmbrl/env/__init__.py:
--------------------------------------------------------------------------------
 1 | from gym.envs.registration import register
 2 | 
 3 | 
 4 | register(
 5 |     id='MBRLCartpole-v0',
 6 |     entry_point='dmbrl.env.cartpole:CartpoleEnv'
 7 | )
 8 | 
 9 | 
10 | register(
11 |     id='MBRLReacher3D-v0',
12 |     entry_point='dmbrl.env.reacher:Reacher3DEnv'
13 | )
14 | 
15 | 
16 | register(
17 |     id='MBRLPusher-v0',
18 |     entry_point='dmbrl.env.pusher:PusherEnv'
19 | )
20 | 
21 | 
22 | register(
23 |     id='MBRLHalfCheetah-v0',
24 |     entry_point='dmbrl.env.half_cheetah:HalfCheetahEnv'
25 | )
26 | 


--------------------------------------------------------------------------------
/dmbrl/env/assets/cartpole.xml:
--------------------------------------------------------------------------------
 1 | <mujoco model="cartpole">
 2 | 	<compiler inertiafromgeom="true"/>
 3 | 	<default>
 4 | 		<joint armature="0" damping="1" limited="true"/>
 5 | 		<geom contype="0" friction="1 0.1 0.1" rgba="0.7 0.7 0 1"/>
 6 | 		<tendon/>
 7 | 		<motor ctrlrange="-3 3" ctrllimited='true'/>
 8 | 	</default>
 9 |     <asset>
10 | 		<texture type="skybox" builtin="checker" rgb1="1 1 1" rgb2="1 1 1"
11 |                     width="256" height="256"/>
12 |         <texture name="texgeom" type="cube" builtin="flat" mark="cross" width="127" height="1278" rgb1="0.8 0.6 0.4" rgb2="0.8 0.6 0.4" markrgb="1 1 1" random="0.01" />
13 | 		<texture name="texplane" type="2d" builtin="checker" rgb1=".5 .5 .5" rgb2=".5 .5 .5" width="100" height="100" />
14 |         <texture name="texplane_show" type="2d" builtin="checker" rgb1="0 0 0" rgb2="0.9 0.9 0.9" width="100" height="100" />
15 |         <material name='MatPlane' texture="texplane" shininess="1" texrepeat="30 30" specular="1"  reflectance="0.5" />
16 |         <material name='geom' texture="texgeom" texuniform="true" />
17 |  	</asset>
18 | 	<option gravity="0 0 -9.81" integrator="RK4" timestep="0.02"/>
19 | 	<size nstack="3000"/>
20 | 	<worldbody>
21 | 		<geom name="rail" pos="0 0 0" quat="0.707 0 0.707 0" rgba="0.3 0.3 0.7 1" size="0.02 3" type="capsule"/>
22 | 		<body name="cart" pos="0 0 0">
23 | 			<joint axis="1 0 0" limited="true" name="slider" pos="0 0 0" range="-2.5 2.5" type="slide"/>
24 | 			<geom name="cart" pos="0 0 0" quat="0.707 0 0.707 0" size="0.1 0.1" type="capsule"/>
25 | 			<body name="pole" pos="0 0 0">
26 | 				<joint axis="0 1 0" limited="false" name="hinge" pos="0 0 0" range="-180 180" type="hinge"/>
27 | 				<geom fromto="0 0 0 0.001 0 -0.6" name="cpole" rgba="0 0.7 0.7 1" size="0.049 0.3" type="capsule"/>
28 | 			</body>
29 | 		</body>
30 | 	</worldbody>
31 | 	<actuator>
32 | 		<motor gear="100" joint="slider" name="slide"/>
33 | 	</actuator>
34 | </mujoco>
35 | 
36 | 


--------------------------------------------------------------------------------
/dmbrl/env/assets/half_cheetah.xml:
--------------------------------------------------------------------------------
 1 | <!-- Cheetah Model
 2 | 
 3 |     The state space is populated with joints in the order that they are
 4 |     defined in this file. The actuators also operate on joints.
 5 | 
 6 |     State-Space (name/joint/parameter):
 7 |         - rootx     slider      position (m)
 8 |         - rootz     slider      position (m)
 9 |         - rooty     hinge       angle (rad)
10 |         - bthigh    hinge       angle (rad)
11 |         - bshin     hinge       angle (rad)
12 |         - bfoot     hinge       angle (rad)
13 |         - fthigh    hinge       angle (rad)
14 |         - fshin     hinge       angle (rad)
15 |         - ffoot     hinge       angle (rad)
16 |         - rootx     slider      velocity (m/s)
17 |         - rootz     slider      velocity (m/s)
18 |         - rooty     hinge       angular velocity (rad/s)
19 |         - bthigh    hinge       angular velocity (rad/s)
20 |         - bshin     hinge       angular velocity (rad/s)
21 |         - bfoot     hinge       angular velocity (rad/s)
22 |         - fthigh    hinge       angular velocity (rad/s)
23 |         - fshin     hinge       angular velocity (rad/s)
24 |         - ffoot     hinge       angular velocity (rad/s)
25 | 
26 |     Actuators (name/actuator/parameter):
27 |         - bthigh    hinge       torque (N m)
28 |         - bshin     hinge       torque (N m)
29 |         - bfoot     hinge       torque (N m)
30 |         - fthigh    hinge       torque (N m)
31 |         - fshin     hinge       torque (N m)
32 |         - ffoot     hinge       torque (N m)
33 | 
34 | -->
35 | <mujoco model="cheetah">
36 |   <compiler angle="radian" coordinate="local" inertiafromgeom="true" settotalmass="14"/>
37 |   <default>
38 |     <joint armature=".1" damping=".01" limited="true" solimplimit="0 .8 .03" solreflimit=".02 1" stiffness="8"/>
39 |     <geom conaffinity="0" condim="3" contype="1" friction=".4 .1 .1" rgba="0.8 0.6 .4 1" solimp="0.0 0.8 0.01" solref="0.02 1"/>
40 |     <motor ctrllimited="true" ctrlrange="-1 1"/>
41 |   </default>
42 |   <size nstack="300000" nuser_geom="1"/>
43 |   <option gravity="0 0 -9.81" timestep="0.01"/>
44 |   <asset>
45 |     <texture builtin="gradient" height="100" rgb1="1 1 1" rgb2="0 0 0" type="skybox" width="100"/>
46 |     <texture builtin="flat" height="1278" mark="cross" markrgb="1 1 1" name="texgeom" random="0.01" rgb1="0.8 0.6 0.4" rgb2="0.8 0.6 0.4" type="cube" width="127"/>
47 |     <texture builtin="checker" height="100" name="texplane" rgb1="0 0 0" rgb2="0.8 0.8 0.8" type="2d" width="100"/>
48 |     <material name="MatPlane" reflectance="0.5" shininess="1" specular="1" texrepeat="150 150" texture="texplane"/>
49 |     <material name="geom" texture="texgeom" texuniform="true"/>
50 |   </asset>
51 |   <worldbody>
52 |     <light cutoff="100" diffuse="1 1 1" dir="-0 0 -1.3" directional="true" exponent="1" pos="0 0 1.3" specular=".1 .1 .1"/>
53 |     <geom conaffinity="1" condim="3" material="MatPlane" name="floor" pos="0 0 0" rgba="0.8 0.9 0.8 1" size="200 200 200" type="plane"/>
54 |     <body name="torso" pos="0 0 .7">
55 |       <joint armature="0" axis="1 0 0" damping="0" limited="false" name="rootx" pos="0 0 0" stiffness="0" type="slide"/>
56 |       <joint armature="0" axis="0 0 1" damping="0" limited="false" name="rootz" pos="0 0 0" stiffness="0" type="slide"/>
57 |       <joint armature="0" axis="0 1 0" damping="0" limited="false" name="rooty" pos="0 0 0" stiffness="0" type="hinge"/>
58 |       <geom fromto="-.5 0 0 .5 0 0" name="torso" size="0.046" type="capsule"/>
59 |       <geom axisangle="0 1 0 .87" name="head" pos=".6 0 .1" size="0.046 .15" type="capsule"/>
60 |       <!-- <site name='tip'  pos='.15 0 .11'/>-->
61 |       <body name="bthigh" pos="-.5 0 0">
62 |         <joint axis="0 1 0" damping="6" name="bthigh" pos="0 0 0" range="-.52 1.05" stiffness="240" type="hinge"/>
63 |         <geom axisangle="0 1 0 -3.8" name="bthigh" pos=".1 0 -.13" size="0.046 .145" type="capsule"/>
64 |         <body name="bshin" pos=".16 0 -.25">
65 |           <joint axis="0 1 0" damping="4.5" name="bshin" pos="0 0 0" range="-.785 .785" stiffness="180" type="hinge"/>
66 |           <geom axisangle="0 1 0 -2.03" name="bshin" pos="-.14 0 -.07" rgba="0.9 0.6 0.6 1" size="0.046 .15" type="capsule"/>
67 |           <body name="bfoot" pos="-.28 0 -.14">
68 |             <joint axis="0 1 0" damping="3" name="bfoot" pos="0 0 0" range="-.4 .785" stiffness="120" type="hinge"/>
69 |             <geom axisangle="0 1 0 -.27" name="bfoot" pos=".03 0 -.097" rgba="0.9 0.6 0.6 1" size="0.046 .094" type="capsule"/>
70 |           </body>
71 |         </body>
72 |       </body>
73 |       <body name="fthigh" pos=".5 0 0">
74 |         <joint axis="0 1 0" damping="4.5" name="fthigh" pos="0 0 0" range="-1 .7" stiffness="180" type="hinge"/>
75 |         <geom axisangle="0 1 0 .52" name="fthigh" pos="-.07 0 -.12" size="0.046 .133" type="capsule"/>
76 |         <body name="fshin" pos="-.14 0 -.24">
77 |           <joint axis="0 1 0" damping="3" name="fshin" pos="0 0 0" range="-1.2 .87" stiffness="120" type="hinge"/>
78 |           <geom axisangle="0 1 0 -.6" name="fshin" pos=".065 0 -.09" rgba="0.9 0.6 0.6 1" size="0.046 .106" type="capsule"/>
79 |           <body name="ffoot" pos=".13 0 -.18">
80 |             <joint axis="0 1 0" damping="1.5" name="ffoot" pos="0 0 0" range="-.5 .5" stiffness="60" type="hinge"/>
81 |             <geom axisangle="0 1 0 -.6" name="ffoot" pos=".045 0 -.07" rgba="0.9 0.6 0.6 1" size="0.046 .07" type="capsule"/>
82 |           </body>
83 |         </body>
84 |       </body>
85 |     </body>
86 |   </worldbody>
87 |   <actuator>
88 |     <motor gear="120" joint="bthigh" name="bthigh"/>
89 |     <motor gear="90" joint="bshin" name="bshin"/>
90 |     <motor gear="60" joint="bfoot" name="bfoot"/>
91 |     <motor gear="120" joint="fthigh" name="fthigh"/>
92 |     <motor gear="60" joint="fshin" name="fshin"/>
93 |     <motor gear="30" joint="ffoot" name="ffoot"/>
94 |   </actuator>
95 | </mujoco>
96 | 


--------------------------------------------------------------------------------
/dmbrl/env/assets/pusher.xml:
--------------------------------------------------------------------------------
  1 | <mujoco model="arm3d">
  2 |   <compiler inertiafromgeom="true" angle="radian" coordinate="local"/>
  3 |   <option timestep="0.01" gravity="0 0 0" iterations="20" integrator="Euler" />
  4 |   
  5 |   <default>
  6 |     <joint armature='0.04' damping="1" limited="true"/>
  7 |     <geom friction=".8 .1 .1" density="300" margin="0.002" condim="1" contype="0" conaffinity="0"/>
  8 |   </default>
  9 |   <asset>
 10 |     <texture type="skybox" builtin="checker" rgb1="1 1 1" rgb2="1 1 1"
 11 |              width="256" height="256"/>
 12 |     <texture name="texgeom" type="cube" builtin="flat" mark="cross" width="127" height="1278" rgb1="0.8 0.6 0.4" rgb2="0.8 0.6 0.4" markrgb="1 1 1" random="0.01" />
 13 |     <texture name="texplane" type="2d" builtin="checker" rgb1=".5 .5 .5" rgb2=".5 .5 .5" width="100" height="100" />
 14 |     <texture name="texplane_show" type="2d" builtin="checker" rgb1="0 0 0" rgb2="0.9 0.9 0.9" width="100" height="100" />
 15 |     <material name='MatPlane' texture="texplane" shininess="1" texrepeat="30 30" specular="1"  reflectance="0.5" />
 16 |     <material name='geom' texture="texgeom" texuniform="true" />
 17 |   </asset>
 18 | 
 19 |   <worldbody>
 20 |     <light diffuse=".5 .5 .5" pos="0 0 3" dir="0 0 -1"/>
 21 |     <geom name="table" type="plane" pos="0 0.5 -0.325" size="1 1 0.1" contype="1" conaffinity="1"/>
 22 | 
 23 |     <body name="r_shoulder_pan_link" pos="0 -0.6 0">
 24 |       <geom name="e1" type="sphere" rgba="0.6 0.6 0.6 1" pos="-0.06 0.05 0.2" size="0.05" />
 25 |       <geom name="e2" type="sphere" rgba="0.6 0.6 0.6 1" pos=" 0.06 0.05 0.2" size="0.05" />
 26 |       <geom name="e1p" type="sphere" rgba="0.1 0.1 0.1 1" pos="-0.06 0.09 0.2" size="0.03" />
 27 |       <geom name="e2p" type="sphere" rgba="0.1 0.1 0.1 1" pos=" 0.06 0.09 0.2" size="0.03" />
 28 |       <geom name="sp" type="capsule" fromto="0 0 -0.4 0 0 0.2" size="0.1" />
 29 |       <!--<joint name="r_shoulder_pan_joint" type="hinge" pos="0 0 0" axis="0 0 1" range="-2.2854 1.714602" damping="1.0" />-->
 30 |       <joint name="r_shoulder_pan_joint" type="hinge" pos="0 0 0" axis="0 0 1" range="-2.2854 2.0" damping="1.0" />
 31 | 
 32 |       <body name="r_shoulder_lift_link" pos="0.1 0 0">
 33 |         <geom name="sl" type="capsule" fromto="0 -0.1 0 0 0.1 0" size="0.1" />
 34 |         <joint name="r_shoulder_lift_joint" type="hinge" pos="0 0 0" axis="0 1 0" range="-0.5236 1.3963" damping="1.0" />
 35 | 
 36 |         <body name="r_upper_arm_roll_link" pos="0 0 0">
 37 |           <geom name="uar" type="capsule" fromto="-0.1 0 0 0.1 0 0" size="0.02" />
 38 |           <joint name="r_upper_arm_roll_joint" type="hinge" pos="0 0 0" axis="1 0 0" range="-1.5 1.7" damping="0.1" />
 39 | 
 40 |           <body name="r_upper_arm_link" pos="0 0 0">
 41 |             <geom name="ua" type="capsule" fromto="0 0 0 0.4 0 0" size="0.06" />
 42 | 
 43 |             <body name="r_elbow_flex_link" pos="0.4 0 0">
 44 |               <geom name="ef" type="capsule" fromto="0 -0.02 0 0.0 0.02 0" size="0.06" />
 45 |               <joint name="r_elbow_flex_joint" type="hinge" pos="0 0 0" axis="0 1 0" range="-2.3213 0" damping="0.1" />
 46 | 
 47 |               <body name="r_forearm_roll_link" pos="0 0 0">
 48 |                 <geom name="fr" type="capsule" fromto="-0.1 0 0 0.1 0 0" size="0.02" />
 49 |                 <joint name="r_forearm_roll_joint" type="hinge" limited="true" pos="0 0 0" axis="1 0 0" damping=".1" range="-1.5 1.5"/>
 50 | 
 51 |                 <body name="r_forearm_link" pos="0 0 0">
 52 |                   <geom name="fa" type="capsule" fromto="0 0 0 0.291 0 0" size="0.05" />
 53 | 
 54 |                   <body name="r_wrist_flex_link" pos="0.321 0 0">
 55 |                     <geom name="wf" type="capsule" fromto="0 -0.02 0 0 0.02 0" size="0.01" />
 56 |                     <joint name="r_wrist_flex_joint" type="hinge" pos="0 0 0" axis="0 1 0" range="-1.094 0" damping=".1" />
 57 | 
 58 |                     <body name="r_wrist_roll_link" pos="0 0 0">
 59 |                       <joint name="r_wrist_roll_joint" type="hinge" pos="0 0 0" limited="true" axis="1 0 0" damping="0.1" range="-1.5 1.5"/>
 60 |                       <body name="tips_arm" pos="0 0 0">
 61 |                         <geom name="tip_arml" type="sphere" pos="0.1 -0.1 0." size="0.01" />
 62 |                         <geom name="tip_armr" type="sphere" pos="0.1 0.1 0." size="0.01" />
 63 |                       </body>
 64 |                       <geom type="capsule" fromto="0 -0.1 0. 0.0 +0.1 0" size="0.02" contype="1" conaffinity="1" />
 65 |                       <geom type="capsule" fromto="0 -0.1 0. 0.1 -0.1 0" size="0.02" contype="1" conaffinity="1" />
 66 |                       <geom type="capsule" fromto="0 +0.1 0. 0.1 +0.1 0." size="0.02" contype="1" conaffinity="1" />
 67 |                     </body>
 68 |                   </body>
 69 |                 </body>
 70 |               </body>
 71 |             </body>
 72 |           </body>
 73 |         </body>
 74 |       </body>
 75 |     </body>
 76 | 
 77 |     <!--<body name="object" pos="0.55 -0.3 -0.275" >-->
 78 |     <body name="object" pos="0.45 -0.05 -0.275" >
 79 |       <geom rgba="1 1 1 0" type="sphere" size="0.05 0.05 0.05" density="0.00001" conaffinity="0"/>
 80 |       <geom rgba="1 1 1 1" type="cylinder" size="0.05 0.05 0.05" density="0.00001" contype="1" conaffinity="0"/>
 81 |       <joint name="obj_slidey" type="slide" pos="0 0 0" axis="0 1 0" range="-10.3213 10.3" damping="0.5"/>
 82 |       <joint name="obj_slidex" type="slide" pos="0 0 0" axis="1 0 0" range="-10.3213 10.3" damping="0.5"/>
 83 |     </body>
 84 | 
 85 |     <body name="goal" pos="0.45 -0.05 -0.3230">
 86 |       <geom rgba="1 0 0 1" type="cylinder" size="0.08 0.001 0.1" density='0.00001' contype="0" conaffinity="0"/>
 87 |       <joint name="goal_slidey" type="slide" pos="0 0 0" axis="0 1 0" range="-10.3213 10.3" damping="0.5"/>
 88 |       <joint name="goal_slidex" type="slide" pos="0 0 0" axis="1 0 0" range="-10.3213 10.3" damping="0.5"/> 
 89 |     </body>
 90 |   </worldbody>
 91 | 
 92 |   <actuator>
 93 |     <motor joint="r_shoulder_pan_joint" ctrlrange="-2.0 2.0" ctrllimited="true" />
 94 |     <motor joint="r_shoulder_lift_joint" ctrlrange="-2.0 2.0" ctrllimited="true" />
 95 |     <motor joint="r_upper_arm_roll_joint" ctrlrange="-2.0 2.0" ctrllimited="true" />
 96 |     <motor joint="r_elbow_flex_joint" ctrlrange="-2.0 2.0" ctrllimited="true" />
 97 |     <motor joint="r_forearm_roll_joint" ctrlrange="-2.0 2.0" ctrllimited="true" />
 98 |     <motor joint="r_wrist_flex_joint" ctrlrange="-2.0 2.0" ctrllimited="true" />
 99 |     <motor joint="r_wrist_roll_joint" ctrlrange="-2.0 2.0" ctrllimited="true"/>
100 |   </actuator>
101 | </mujoco>
102 | 


--------------------------------------------------------------------------------
/dmbrl/env/cartpole.py:
--------------------------------------------------------------------------------
 1 | from __future__ import division
 2 | from __future__ import print_function
 3 | from __future__ import absolute_import
 4 | 
 5 | import os
 6 | 
 7 | import numpy as np
 8 | from gym import utils
 9 | from gym.envs.mujoco import mujoco_env
10 | 
11 | 
12 | class CartpoleEnv(mujoco_env.MujocoEnv, utils.EzPickle):
13 |     PENDULUM_LENGTH = 0.6
14 | 
15 |     def __init__(self):
16 |         utils.EzPickle.__init__(self)
17 |         dir_path = os.path.dirname(os.path.realpath(__file__))
18 |         mujoco_env.MujocoEnv.__init__(self, '%s/assets/cartpole.xml' % dir_path, 2)
19 | 
20 |     def _step(self, a):
21 |         self.do_simulation(a, self.frame_skip)
22 |         ob = self._get_obs()
23 | 
24 |         cost_lscale = CartpoleEnv.PENDULUM_LENGTH
25 |         reward = np.exp(
26 |             -np.sum(np.square(self._get_ee_pos(ob) - np.array([0.0, CartpoleEnv.PENDULUM_LENGTH]))) / (cost_lscale ** 2)
27 |         )
28 |         reward -= 0.01 * np.sum(np.square(a))
29 | 
30 |         done = False
31 |         return ob, reward, done, {}
32 | 
33 |     def reset_model(self):
34 |         qpos = self.init_qpos + np.random.normal(0, 0.1, np.shape(self.init_qpos))
35 |         qvel = self.init_qvel + np.random.normal(0, 0.1, np.shape(self.init_qvel))
36 |         self.set_state(qpos, qvel)
37 |         return self._get_obs()
38 | 
39 |     def _get_obs(self):
40 |         return np.concatenate([self.model.data.qpos, self.model.data.qvel]).ravel()
41 | 
42 |     @staticmethod
43 |     def _get_ee_pos(x):
44 |         x0, theta = x[0], x[1]
45 |         return np.array([
46 |             x0 - CartpoleEnv.PENDULUM_LENGTH * np.sin(theta),
47 |             -CartpoleEnv.PENDULUM_LENGTH * np.cos(theta)
48 |         ])
49 | 
50 |     def viewer_setup(self):
51 |         v = self.viewer
52 |         v.cam.trackbodyid = 0
53 |         v.cam.distance = v.model.stat.extent
54 | 


--------------------------------------------------------------------------------
/dmbrl/env/half_cheetah.py:
--------------------------------------------------------------------------------
 1 | from __future__ import division
 2 | from __future__ import print_function
 3 | from __future__ import absolute_import
 4 | 
 5 | import os
 6 | 
 7 | import numpy as np
 8 | from gym import utils
 9 | from gym.envs.mujoco import mujoco_env
10 | 
11 | 
12 | class HalfCheetahEnv(mujoco_env.MujocoEnv, utils.EzPickle):
13 | 
14 |     def __init__(self):
15 |         self.prev_qpos = None
16 |         dir_path = os.path.dirname(os.path.realpath(__file__))
17 |         mujoco_env.MujocoEnv.__init__(self, '%s/assets/half_cheetah.xml' % dir_path, 5)
18 |         utils.EzPickle.__init__(self)
19 | 
20 |     def _step(self, action):
21 |         self.prev_qpos = np.copy(self.model.data.qpos.flat)
22 |         self.do_simulation(action, self.frame_skip)
23 |         ob = self._get_obs()
24 | 
25 |         reward_ctrl = -0.1 * np.square(action).sum()
26 |         reward_run = ob[0] - 0.0 * np.square(ob[2])
27 |         reward = reward_run + reward_ctrl
28 | 
29 |         done = False
30 |         return ob, reward, done, {}
31 | 
32 |     def _get_obs(self):
33 |         return np.concatenate([
34 |             (self.model.data.qpos.flat[:1] - self.prev_qpos[:1]) / self.dt,
35 |             self.model.data.qpos.flat[1:],
36 |             self.model.data.qvel.flat,
37 |         ])
38 | 
39 |     def reset_model(self):
40 |         qpos = self.init_qpos + np.random.normal(loc=0, scale=0.001, size=self.model.nq)
41 |         qvel = self.init_qvel + np.random.normal(loc=0, scale=0.001, size=self.model.nv)
42 |         self.set_state(qpos, qvel)
43 |         self.prev_qpos = np.copy(self.model.data.qpos.flat)
44 |         return self._get_obs()
45 | 
46 |     def viewer_setup(self):
47 |         self.viewer.cam.distance = self.model.stat.extent * 0.25
48 |         self.viewer.cam.elevation = -55
49 | 


--------------------------------------------------------------------------------
/dmbrl/env/pusher.py:
--------------------------------------------------------------------------------
 1 | from __future__ import division
 2 | from __future__ import print_function
 3 | from __future__ import absolute_import
 4 | 
 5 | import os
 6 | 
 7 | import numpy as np
 8 | from gym import utils
 9 | from gym.envs.mujoco import mujoco_env
10 | 
11 | 
12 | class PusherEnv(mujoco_env.MujocoEnv, utils.EzPickle):
13 |     def __init__(self):
14 |         dir_path = os.path.dirname(os.path.realpath(__file__))
15 |         mujoco_env.MujocoEnv.__init__(self, '%s/assets/pusher.xml' % dir_path, 4)
16 |         utils.EzPickle.__init__(self)
17 |         self.reset_model()
18 | 
19 |     def _step(self, a):
20 |         obj_pos = self.get_body_com("object"),
21 |         vec_1 = obj_pos - self.get_body_com("tips_arm")
22 |         vec_2 = obj_pos - self.get_body_com("goal")
23 | 
24 |         reward_near = -np.sum(np.abs(vec_1))
25 |         reward_dist = -np.sum(np.abs(vec_2))
26 |         reward_ctrl = -np.square(a).sum()
27 |         reward = 1.25 * reward_dist + 0.1 * reward_ctrl + 0.5 * reward_near
28 | 
29 |         self.do_simulation(a, self.frame_skip)
30 |         ob = self._get_obs()
31 |         done = False
32 |         return ob, reward, done, {}
33 | 
34 |     def viewer_setup(self):
35 |         self.viewer.cam.trackbodyid = -1
36 |         self.viewer.cam.distance = 4.0
37 | 
38 |     def reset_model(self):
39 |         qpos = self.init_qpos
40 | 
41 |         self.goal_pos = np.asarray([0, 0])
42 |         self.cylinder_pos = np.array([-0.25, 0.15]) + np.random.normal(0, 0.025, [2])
43 | 
44 |         qpos[-4:-2] = self.cylinder_pos
45 |         qpos[-2:] = self.goal_pos
46 |         qvel = self.init_qvel + self.np_random.uniform(low=-0.005,
47 |                 high=0.005, size=self.model.nv)
48 |         qvel[-4:] = 0
49 |         self.set_state(qpos, qvel)
50 |         self.ac_goal_pos = self.get_body_com("goal")
51 | 
52 |         return self._get_obs()
53 | 
54 |     def _get_obs(self):
55 |         return np.concatenate([
56 |             self.model.data.qpos.flat[:7],
57 |             self.model.data.qvel.flat[:7],
58 |             self.get_body_com("tips_arm"),
59 |             self.get_body_com("object"),
60 |         ])
61 | 


--------------------------------------------------------------------------------
/dmbrl/env/reacher.py:
--------------------------------------------------------------------------------
 1 | from __future__ import division
 2 | from __future__ import print_function
 3 | from __future__ import absolute_import
 4 | 
 5 | import os
 6 | 
 7 | import numpy as np
 8 | from gym import utils
 9 | from gym.envs.mujoco import mujoco_env
10 | 
11 | 
12 | class Reacher3DEnv(mujoco_env.MujocoEnv, utils.EzPickle):
13 |     def __init__(self):
14 |         self.viewer = None
15 |         utils.EzPickle.__init__(self)
16 |         dir_path = os.path.dirname(os.path.realpath(__file__))
17 |         self.goal = np.zeros(3)
18 |         mujoco_env.MujocoEnv.__init__(self, os.path.join(dir_path, 'assets/reacher3d.xml'), 2)
19 | 
20 |     def _step(self, a):
21 |         self.do_simulation(a, self.frame_skip)
22 |         ob = self._get_obs()
23 |         reward = -np.sum(np.square(self.get_EE_pos(ob[None]) - self.goal))
24 |         reward -= 0.01 * np.square(a).sum()
25 |         done = False
26 |         return ob, reward, done, dict(reward_dist=0, reward_ctrl=0)
27 | 
28 |     def viewer_setup(self):
29 |         self.viewer.cam.trackbodyid = 1
30 |         self.viewer.cam.distance = 2.5
31 |         self.viewer.cam.elevation = -30
32 |         self.viewer.cam.azimuth = 270
33 | 
34 |     def reset_model(self):
35 |         qpos, qvel = np.copy(self.init_qpos), np.copy(self.init_qvel)
36 |         qpos[-3:] += np.random.normal(loc=0, scale=0.1, size=[3])
37 |         qvel[-3:] = 0
38 |         self.goal = qpos[-3:]
39 |         self.set_state(qpos, qvel)
40 |         return self._get_obs()
41 | 
42 |     def _get_obs(self):
43 |         return np.concatenate([
44 |             self.model.data.qpos.flat,
45 |             self.model.data.qvel.flat[:-3],
46 |         ])
47 | 
48 |     def get_EE_pos(self, states):
49 |         theta1, theta2, theta3, theta4, theta5, theta6, theta7 = \
50 |             states[:, :1], states[:, 1:2], states[:, 2:3], states[:, 3:4], states[:, 4:5], states[:, 5:6], states[:, 6:]
51 | 
52 |         rot_axis = np.concatenate([np.cos(theta2) * np.cos(theta1), np.cos(theta2) * np.sin(theta1), -np.sin(theta2)],
53 |                                   axis=1)
54 |         rot_perp_axis = np.concatenate([-np.sin(theta1), np.cos(theta1), np.zeros(theta1.shape)], axis=1)
55 |         cur_end = np.concatenate([
56 |             0.1 * np.cos(theta1) + 0.4 * np.cos(theta1) * np.cos(theta2),
57 |             0.1 * np.sin(theta1) + 0.4 * np.sin(theta1) * np.cos(theta2) - 0.188,
58 |             -0.4 * np.sin(theta2)
59 |         ], axis=1)
60 | 
61 |         for length, hinge, roll in [(0.321, theta4, theta3), (0.16828, theta6, theta5)]:
62 |             perp_all_axis = np.cross(rot_axis, rot_perp_axis)
63 |             x = np.cos(hinge) * rot_axis
64 |             y = np.sin(hinge) * np.sin(roll) * rot_perp_axis
65 |             z = -np.sin(hinge) * np.cos(roll) * perp_all_axis
66 |             new_rot_axis = x + y + z
67 |             new_rot_perp_axis = np.cross(new_rot_axis, rot_axis)
68 |             new_rot_perp_axis[np.linalg.norm(new_rot_perp_axis, axis=1) < 1e-30] = \
69 |                 rot_perp_axis[np.linalg.norm(new_rot_perp_axis, axis=1) < 1e-30]
70 |             new_rot_perp_axis /= np.linalg.norm(new_rot_perp_axis, axis=1, keepdims=True)
71 |             rot_axis, rot_perp_axis, cur_end = new_rot_axis, new_rot_perp_axis, cur_end + length * new_rot_axis
72 | 
73 |         return cur_end
74 | 
75 | 


--------------------------------------------------------------------------------
/dmbrl/misc/Agent.py:
--------------------------------------------------------------------------------
  1 | from __future__ import division
  2 | from __future__ import print_function
  3 | from __future__ import absolute_import
  4 | 
  5 | import numpy as np
  6 | from gym.monitoring import VideoRecorder
  7 | from dotmap import DotMap
  8 | from dmbrl.misc import logger
  9 | 
 10 | import time
 11 | 
 12 | 
 13 | class Agent:
 14 |     """An general class for RL agents.
 15 |     """
 16 | 
 17 |     def __init__(self, params):
 18 |         """Initializes an agent.
 19 | 
 20 |         Arguments:
 21 |             params: (DotMap) A DotMap of agent parameters.
 22 |                 .env: (OpenAI gym environment) The environment for this agent.
 23 |                 .noisy_actions: (bool) Indicates whether random Gaussian noise will
 24 |                     be added to the actions of this agent.
 25 |                 .noise_stddev: (float) The standard deviation to be used for the
 26 |                     action noise if params.noisy_actions is True.
 27 |         """
 28 |         self.env = params.env
 29 | 
 30 |         # load the imitation data if needed
 31 |         if hasattr(self.env, '_expert_data_loaded') and \
 32 |                 (not self.env._expert_data_loaded):
 33 |             self.env.load_expert_data(
 34 |                 params.params.misc.ctrl_cfg.il_cfg.expert_amc_dir
 35 |             )
 36 | 
 37 |         self.noise_stddev = params.noise_stddev if params.get("noisy_actions", False) else None
 38 | 
 39 |         if isinstance(self.env, DotMap):
 40 |             raise ValueError("Environment must be provided to the agent at initialization.")
 41 |         if (not isinstance(self.noise_stddev, float)) and params.get("noisy_actions", False):
 42 |             raise ValueError("Must provide standard deviation for noise for noisy actions.")
 43 | 
 44 |         if self.noise_stddev is not None:
 45 |             self.dU = self.env.action_space.shape[0]
 46 |         self._debug = 1
 47 | 
 48 |     def sample(self, horizon, policy, record_fname=None, test_policy=False, average=False):
 49 |         """Samples a rollout from the agent.
 50 | 
 51 |         Arguments:
 52 |             horizon: (int) The length of the rollout to generate from the agent.
 53 |             policy: (policy) The policy that the agent will use for actions.
 54 |             record_fname: (str/None) The name of the file to which a recording of the rollout
 55 |                 will be saved. If None, the rollout will not be recorded.
 56 | 
 57 |         Returns: (dict) A dictionary containing data from the rollout.
 58 |             The keys of the dictionary are 'obs', 'ac', and 'reward_sum'.
 59 |         """
 60 |         if test_policy:
 61 |             logger.info('Testing the policy')
 62 |         video_record = record_fname is not None
 63 |         recorder = None if not video_record else VideoRecorder(self.env, record_fname)
 64 | 
 65 |         times, rewards = [], []
 66 |         O, A, reward_sum, done = [self.env.reset()], [], 0, False
 67 |         self._debug += 1
 68 | 
 69 |         policy.reset()
 70 |         # for t in range(20):
 71 |         for t in range(horizon):
 72 |             if hasattr(self.env, 'render_imitation'):
 73 |                 self.env.render_imitation()
 74 |             if t % 50 == 10 and t > 1:
 75 |                 logger.info('Current timesteps: %d / %d, average time: %.5f'
 76 |                             % (t, horizon, np.mean(times)))
 77 |             if video_record:
 78 |                 recorder.capture_frame()
 79 |             start = time.time()
 80 |             if test_policy:
 81 |                 A.append(policy.act(O[t], t, test_policy=test_policy, average=average))
 82 |             else:
 83 |                 A.append(policy.act(O[t], t))
 84 |             times.append(time.time() - start)
 85 | 
 86 |             if self.noise_stddev is None:
 87 |                 obs, reward, done, info = self.env.step(A[t])
 88 |             else:
 89 |                 action = A[t] + np.random.normal(loc=0, scale=self.noise_stddev,
 90 |                                                  size=[self.dU])
 91 |                 action = np.minimum(np.maximum(action,
 92 |                                                self.env.action_space.low),
 93 |                                     self.env.action_space.high)
 94 |                 obs, reward, done, info = self.env.step(action)
 95 |             O.append(obs)
 96 |             reward_sum += reward
 97 |             rewards.append(reward)
 98 |             if done:
 99 |                 break
100 | 
101 |         if video_record:
102 |             recorder.capture_frame()
103 |             recorder.close()
104 | 
105 |         logger.info("Average action selection time: %.4f" % np.mean(times))
106 |         logger.info("Rollout length: %d" % len(A))
107 | 
108 |         return {
109 |             "obs": np.array(O),
110 |             "ac": np.array(A),
111 |             "reward_sum": reward_sum,
112 |             "rewards": np.array(rewards),
113 |         }
114 | 


--------------------------------------------------------------------------------
/dmbrl/misc/DotmapUtils.py:
--------------------------------------------------------------------------------
 1 | from __future__ import division
 2 | from __future__ import print_function
 3 | from __future__ import absolute_import
 4 | 
 5 | 
 6 | def get_required_argument(dotmap, key, message, default=None):
 7 |     val = dotmap.get(key, default)
 8 |     if val is default:
 9 |         raise ValueError(message)
10 |     return val
11 | 


--------------------------------------------------------------------------------
/dmbrl/misc/MBExp.py:
--------------------------------------------------------------------------------
  1 | from __future__ import division
  2 | from __future__ import print_function
  3 | from __future__ import absolute_import
  4 | 
  5 | import os
  6 | from time import localtime, strftime
  7 | 
  8 | from scipy.io import savemat
  9 | from dotmap import DotMap
 10 | 
 11 | from dmbrl.misc.DotmapUtils import get_required_argument
 12 | from dmbrl.misc.Agent import Agent
 13 | from dmbrl.misc import logger
 14 | import copy
 15 | import numpy as np
 16 | 
 17 | 
 18 | class MBExperiment:
 19 | 
 20 |     def __init__(self, params):
 21 |         """Initializes class instance.
 22 | 
 23 |         Argument:
 24 |             params (DotMap): A DotMap containing the following:
 25 |                 .sim_cfg:
 26 |                     .env (gym.env): Environment for this experiment
 27 |                     .task_hor (int): Task horizon
 28 |                     .stochastic (bool): (optional) If True, agent adds noise to its actions.
 29 |                         Must provide noise_std (see below). Defaults to False.
 30 |                     .noise_std (float): for stochastic agents, noise of the form N(0, noise_std^2I)
 31 |                         will be added.
 32 | 
 33 |                 .exp_cfg:
 34 |                     .ntrain_iters (int): Number of training iterations to be performed.
 35 |                     .nrollouts_per_iter (int): (optional) Number of rollouts done between training
 36 |                         iterations. Defaults to 1.
 37 |                     .ninit_rollouts (int): (optional) Number of initial rollouts. Defaults to 1.
 38 |                     .policy (controller): Policy that will be trained.
 39 | 
 40 |                 .log_cfg:
 41 |                     .logdir (str): Parent of directory path where experiment data will be saved.
 42 |                         Experiment will be saved in logdir/<date+time of experiment start>
 43 |                     .nrecord (int): (optional) Number of rollouts to record for every iteration.
 44 |                         Defaults to 0.
 45 |                     .neval (int): (optional) Number of rollouts for performance evaluation.
 46 |                         Defaults to 1.
 47 |         """
 48 |         self.env = get_required_argument(params.sim_cfg, "env", "Must provide environment.")
 49 |         self.task_hor = get_required_argument(params.sim_cfg, "task_hor", "Must provide task horizon.")
 50 |         self._params = params
 51 |         params.sim_cfg.misc = copy.copy(params)
 52 |         if params.sim_cfg.get("stochastic", False):
 53 |             self.agent = Agent(DotMap(
 54 |                 env=self.env, noisy_actions=True,
 55 |                 noise_stddev=get_required_argument(
 56 |                     params.sim_cfg,
 57 |                     "noise_std",
 58 |                     "Must provide noise standard deviation in the case of a stochastic environment."
 59 |                 ),
 60 |                 params=params
 61 |             ))
 62 |         else:
 63 |             self.agent = Agent(DotMap(env=self.env, noisy_actions=False, params=params))
 64 | 
 65 |         self.ntrain_iters = get_required_argument(
 66 |             params.exp_cfg, "ntrain_iters", "Must provide number of training iterations."
 67 |         )
 68 |         self.nrollouts_per_iter = params.exp_cfg.get("nrollouts_per_iter", 1)
 69 |         self.ninit_rollouts = params.exp_cfg.get("ninit_rollouts", 1)
 70 |         self.policy = get_required_argument(params.exp_cfg, "policy", "Must provide a policy.")
 71 | 
 72 |         self.logdir = os.path.join(
 73 |             get_required_argument(params.log_cfg, "logdir", "Must provide log parent directory."),
 74 |             strftime("%Y-%m-%d--%H:%M:%S", localtime())
 75 |         )
 76 |         logger.set_file_handler(path=self.logdir)
 77 |         logger.info('Starting the experiments')
 78 |         self.nrecord = params.log_cfg.get("nrecord", 0)
 79 |         self.neval = params.log_cfg.get("neval", 1)
 80 | 
 81 |     def run_experiment(self):
 82 |         """Perform experiment.
 83 |         """
 84 |         os.makedirs(self.logdir, exist_ok=True)
 85 | 
 86 |         traj_obs, traj_acs, traj_rets, traj_rews = [], [], [], []
 87 |         test_traj_obs, test_traj_acs, test_traj_rets = [], [], []
 88 |         episode_iter_id = []
 89 | 
 90 |         # Perform initial rollouts
 91 |         samples = []
 92 |         needed_num_steps = self.ninit_rollouts * self.task_hor
 93 |         finished_num_steps = 0
 94 |         """
 95 |         # TODO DEBUG
 96 |         needed_num_steps = 64
 97 |         self.task_hor = 64
 98 |         """
 99 |         while True:
100 |             samples.append(
101 |                 self.agent.sample(
102 |                     self.task_hor, self.policy
103 |                 )
104 |             )
105 |             traj_obs.append(samples[-1]["obs"])
106 |             traj_acs.append(samples[-1]["ac"])
107 |             traj_rews.append(samples[-1]["rewards"])
108 |             finished_num_steps += len(samples[-1]["ac"])
109 | 
110 |             if finished_num_steps >= needed_num_steps:
111 |                 break
112 | 
113 |         if self.ninit_rollouts > 0:
114 |             self.policy.train(
115 |                 [sample["obs"] for sample in samples],
116 |                 [sample["ac"] for sample in samples],
117 |                 [sample["rewards"] for sample in samples]
118 |             )
119 | 
120 |         # Training loop
121 |         for i in range(self.ntrain_iters):
122 | 
123 |             logger.info("####################################################################")
124 |             logger.info("Starting training iteration %d." % (i + 1))
125 | 
126 |             iter_dir = os.path.join(self.logdir, "train_iter%d" % (i + 1))
127 |             os.makedirs(iter_dir, exist_ok=True)
128 | 
129 |             samples = []
130 |             assert self.nrecord == 0
131 | 
132 |             needed_num_steps = self.task_hor * \
133 |                 (max(self.neval, self.nrollouts_per_iter) - self.nrecord)
134 |             finished_num_steps = 0
135 |             while True:
136 |                 samples.append(
137 |                     self.agent.sample(
138 |                         self.task_hor, self.policy
139 |                     )
140 |                 )
141 |                 finished_num_steps += len(samples[-1]["ac"])
142 | 
143 |                 if finished_num_steps >= needed_num_steps:
144 |                     break
145 |             logger.info("Rewards obtained: {}".format(
146 |                 [sample["reward_sum"] for sample in samples[:self.neval]])
147 |             )
148 |             # test the policy if needed
149 |             if self._params.misc.ctrl_cfg.cem_cfg.test_policy > 0:
150 |                 test_data = []
151 |                 for _ in range(5):
152 |                     test_data.append(
153 |                         self.agent.sample(self.task_hor, self.policy,
154 |                                           test_policy=True, average=False)
155 |                     )
156 |                 test_traj_rets.extend([
157 |                     np.mean([i_test_data["reward_sum"] for i_test_data in test_data])
158 |                 ])
159 |                 test_traj_obs.extend(
160 |                     [i_test_data["obs"] for i_test_data in test_data]
161 |                 )
162 |                 test_traj_acs.extend(
163 |                     [i_test_data["ac"] for i_test_data in test_data]
164 |                 )
165 | 
166 |             traj_obs.extend([sample["obs"] for sample in samples])
167 |             traj_acs.extend([sample["ac"] for sample in samples])
168 |             traj_rets.extend([sample["reward_sum"] for sample in samples])
169 |             traj_rews.extend([sample["rewards"] for sample in samples])
170 |             episode_iter_id.extend([i] * len(samples))
171 |             samples = samples[:self.nrollouts_per_iter]
172 | 
173 |             self.policy.dump_logs(self.logdir, iter_dir)
174 |             savemat(
175 |                 os.path.join(self.logdir, "logs.mat"),
176 |                 {
177 |                     "observations": traj_obs,
178 |                     "actions": traj_acs,
179 |                     "returns": traj_rets,
180 |                     "rewards": traj_rews,
181 |                     "test_returns": test_traj_rets,
182 |                     "test_obs": test_traj_obs,
183 |                     "test_acs": test_traj_acs,
184 |                     'episode_iter_id': episode_iter_id
185 |                 }
186 |             )
187 |             # Delete iteration directory if not used
188 |             if len(os.listdir(iter_dir)) == 0:
189 |                 os.rmdir(iter_dir)
190 | 
191 |             if i < self.ntrain_iters - 1:
192 |                 self.policy.train(
193 |                     [sample["obs"] for sample in samples],
194 |                     [sample["ac"] for sample in samples],
195 |                     [sample["rewards"] for sample in samples]
196 |                 )
197 | 
198 |                 # TODO: train the policy network
199 | 


--------------------------------------------------------------------------------
/dmbrl/misc/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WilsonWangTHU/POPLIN/edd8dba50f9049c6164eda774602bef0c299cb51/dmbrl/misc/__init__.py


--------------------------------------------------------------------------------
/dmbrl/misc/logger.py:
--------------------------------------------------------------------------------
  1 | # -----------------------------------------------------------------------------
  2 | #   @brief:
  3 | #       The logger here will be called all across the project. It is inspired
  4 | #   by Yuxin Wu (ppwwyyxx@gmail.com)
  5 | #
  6 | #   @author:
  7 | #       Tingwu Wang, 2017, Feb, 20th
  8 | # -----------------------------------------------------------------------------
  9 | 
 10 | import logging
 11 | import sys
 12 | import os
 13 | import datetime
 14 | from termcolor import colored
 15 | 
 16 | __all__ = ['set_file_handler']  # the actual worker is the '_logger'
 17 | 
 18 | 
 19 | class _MyFormatter(logging.Formatter):
 20 |     '''
 21 |         @brief:
 22 |             a class to make sure the format could be used
 23 |     '''
 24 | 
 25 |     def format(self, record):
 26 |         date = colored('[%(asctime)s @%(filename)s:%(lineno)d]', 'green')
 27 |         msg = '%(message)s'
 28 | 
 29 |         if record.levelno == logging.WARNING:
 30 |             fmt = date + ' ' + \
 31 |                 colored('WRN', 'red', attrs=[]) + ' ' + msg
 32 |         elif record.levelno == logging.ERROR or \
 33 |                 record.levelno == logging.CRITICAL:
 34 |             fmt = date + ' ' + \
 35 |                 colored('ERR', 'red', attrs=['underline']) + ' ' + msg
 36 |         else:
 37 |             fmt = date + ' ' + msg
 38 | 
 39 |         if hasattr(self, '_style'):
 40 |             # Python3 compatibilty
 41 |             self._style._fmt = fmt
 42 |         self._fmt = fmt
 43 | 
 44 |         return super(self.__class__, self).format(record)
 45 | 
 46 | 
 47 | _logger = logging.getLogger('joint_embedding')
 48 | _logger.propagate = False
 49 | _logger.setLevel(logging.INFO)
 50 | 
 51 | # set the console output handler
 52 | con_handler = logging.StreamHandler(sys.stdout)
 53 | con_handler.setFormatter(_MyFormatter(datefmt='%m%d %H:%M:%S'))
 54 | _logger.addHandler(con_handler)
 55 | 
 56 | 
 57 | class GLOBAL_PATH(object):
 58 | 
 59 |     def __init__(self, path=None):
 60 |         if path is None:
 61 |             path = os.getcwd()
 62 |         self.path = path
 63 | 
 64 |     def _set_path(self, path):
 65 |         self.path = path
 66 | 
 67 |     def _get_path(self):
 68 |         return self.path
 69 | 
 70 | 
 71 | PATH = GLOBAL_PATH()
 72 | 
 73 | 
 74 | # set the file output handler
 75 | def set_file_handler(path=None, prefix='', time_str=''):
 76 |     if time_str == '':
 77 |         file_name = prefix + \
 78 |             datetime.datetime.now().strftime("%A_%d_%B_%Y_%I:%M%p") + '.log'
 79 |     else:
 80 |         file_name = prefix + time_str + '.log'
 81 | 
 82 |     path = os.path.abspath(path)
 83 | 
 84 |     path = os.path.join(path, file_name)
 85 |     if not os.path.exists(path):
 86 |         os.makedirs(path)
 87 | 
 88 |     PATH._set_path(path)
 89 |     # from tensorboard_logger import configure
 90 |     # configure(path)
 91 | 
 92 |     file_handler = logging.FileHandler(
 93 |         filename=os.path.join(path, 'logger.log'), encoding='utf-8', mode='w'
 94 |     )
 95 |     file_handler.setFormatter(_MyFormatter(datefmt='%m%d %H:%M:%S'))
 96 |     _logger.addHandler(file_handler)
 97 | 
 98 |     _logger.info('Log file set to {}'.format(path))
 99 |     return
100 | 
101 | 
102 | def _get_path():
103 |     return PATH._get_path()
104 | 
105 | 
106 | _LOGGING_METHOD = ['info', 'warning', 'error', 'critical',
107 |                    'warn', 'exception', 'debug']
108 | 
109 | # export logger functions
110 | for func in _LOGGING_METHOD:
111 |     locals()[func] = getattr(_logger, func)
112 | 


--------------------------------------------------------------------------------
/dmbrl/misc/optimizers/__init__.py:
--------------------------------------------------------------------------------
1 | from .cem import CEMOptimizer
2 | from .random import RandomOptimizer
3 | from .gbp_rs import GBPRandomOptimizer
4 | from .gbp_cem import GBPCEMOptimizer
5 | from .POPLIN_A import POPLINAOptimizer
6 | from .POPLIN_P import POPLINPOptimizer
7 | 


--------------------------------------------------------------------------------
/dmbrl/misc/optimizers/cem.py:
--------------------------------------------------------------------------------
  1 | from __future__ import division
  2 | from __future__ import print_function
  3 | from __future__ import absolute_import
  4 | 
  5 | import tensorflow as tf
  6 | import numpy as np
  7 | import scipy.stats as stats
  8 | from dmbrl.misc import logger
  9 | 
 10 | from .optimizer import Optimizer
 11 | 
 12 | 
 13 | class CEMOptimizer(Optimizer):
 14 |     """A Tensorflow-compatible CEM optimizer.
 15 |     """
 16 | 
 17 |     def __init__(self, sol_dim, max_iters, popsize, num_elites, tf_session=None,
 18 |                  upper_bound=None, lower_bound=None, epsilon=0.001, alpha=0.25,
 19 |                  params=None):
 20 |         """Creates an instance of this class.
 21 | 
 22 |         Arguments:
 23 |             sol_dim (int): The dimensionality of the problem space
 24 |             max_iters (int): The maximum number of iterations to perform during optimization
 25 |             popsize (int): The number of candidate solutions to be sampled at every iteration
 26 |             num_elites (int): The number of top solutions that will be used to obtain the distribution
 27 |                 at the next iteration.
 28 |             tf_session (tf.Session): (optional) Session to be used for this optimizer. Defaults to None,
 29 |                 in which case any functions passed in cannot be tf.Tensor-valued.
 30 |             upper_bound (np.array): An array of upper bounds
 31 |             lower_bound (np.array): An array of lower bounds
 32 |             epsilon (float): A minimum variance. If the maximum variance drops below epsilon, optimization is
 33 |                 stopped.
 34 |             alpha (float): Controls how much of the previous mean and variance is used for the next iteration.
 35 |                 next_mean = alpha * old_mean + (1 - alpha) * elite_mean, and similarly for variance.
 36 |         """
 37 |         from dmbrl.modeling.models import GT_dynamics
 38 |         self._gt_compile_cost = GT_dynamics.compile_cost
 39 |         super().__init__()
 40 |         self.sol_dim, self.max_iters, self.popsize, self.num_elites = \
 41 |             sol_dim, max_iters, popsize, num_elites
 42 |         self.ub, self.lb = upper_bound, lower_bound
 43 |         self.epsilon, self.alpha = epsilon, alpha
 44 |         self.tf_sess = tf_session
 45 |         self.debug = False
 46 | 
 47 |         self._params = params
 48 | 
 49 |         if num_elites > popsize:
 50 |             raise ValueError("Number of elites must be at most the population size.")
 51 | 
 52 |         if self.tf_sess is not None:
 53 |             with self.tf_sess.graph.as_default():
 54 |                 with tf.variable_scope("CEMSolver"):
 55 |                     self.init_mean = tf.placeholder(dtype=tf.float32, shape=[sol_dim])
 56 |                     self.init_var = tf.placeholder(dtype=tf.float32, shape=[sol_dim])
 57 | 
 58 |         self.num_opt_iters, self.mean, self.var = None, None, None
 59 |         self.tf_compatible, self.cost_function = None, None
 60 | 
 61 |         if self._params.il_cfg.use_gt_dynamics:
 62 |             self._dynamics = GT_dynamics.GT(self._params)
 63 | 
 64 |     def setup(self, cost_function, tf_compatible):
 65 |         """Sets up this optimizer using a given cost function.
 66 | 
 67 |         Arguments:
 68 |             cost_function (func): A function for computing costs over a batch of candidate solutions.
 69 |             tf_compatible (bool): True if the cost function provided is tf.Tensor-valued.
 70 | 
 71 |         Returns: None
 72 |         """
 73 |         if tf_compatible and self.tf_sess is None:
 74 |             raise RuntimeError("Cannot pass in a tf.Tensor-valued cost function without passing in a TensorFlow "
 75 |                                "session into the constructor")
 76 | 
 77 |         self.tf_compatible = tf_compatible
 78 | 
 79 |         if not tf_compatible:
 80 |             self.cost_function = cost_function
 81 |         else:
 82 |             def continue_optimization(t, mean, var, best_val, best_sol):
 83 |                 return tf.logical_and(tf.less(t, self.max_iters), tf.reduce_max(var) > self.epsilon)
 84 | 
 85 |             def iteration(t, mean, var, best_val, best_sol):
 86 |                 lb_dist, ub_dist = mean - self.lb, self.ub - mean
 87 |                 constrained_var = tf.minimum(tf.minimum(tf.square(lb_dist / 2), tf.square(ub_dist / 2)), var)
 88 |                 samples = tf.truncated_normal([self.popsize, self.sol_dim], mean, tf.sqrt(constrained_var))
 89 | 
 90 |                 costs = cost_function(samples)
 91 |                 values, indices = tf.nn.top_k(-costs, k=self.num_elites, sorted=True)
 92 | 
 93 |                 best_val, best_sol = tf.cond(
 94 |                     tf.less(-values[0], best_val),
 95 |                     lambda: (-values[0], samples[indices[0]]),
 96 |                     lambda: (best_val, best_sol)
 97 |                 )
 98 | 
 99 |                 elites = tf.gather(samples, indices)
100 |                 new_mean = tf.reduce_mean(elites, axis=0)
101 |                 new_var = tf.reduce_mean(tf.square(elites - new_mean), axis=0)
102 | 
103 |                 mean = self.alpha * mean + (1 - self.alpha) * new_mean
104 |                 var = self.alpha * var + (1 - self.alpha) * new_var
105 | 
106 |                 return t + 1, mean, var, best_val, best_sol
107 | 
108 |             with self.tf_sess.graph.as_default():
109 |                 self.num_opt_iters, self.mean, self.var, self.best_val, self.best_sol = tf.while_loop(
110 |                     cond=continue_optimization, body=iteration,
111 |                     loop_vars=[0, self.init_mean, self.init_var, float("inf"), self.init_mean]
112 |                 )
113 | 
114 |     def reset(self):
115 |         pass
116 | 
117 |     def obtain_solution(self, init_mean, init_var, per, dU, obs=None):
118 |         """Optimizes the cost function using the provided initial candidate distribution
119 | 
120 |         Arguments:
121 |             init_mean (np.ndarray): The mean of the initial candidate distribution.
122 |             init_var (np.ndarray): The variance of the initial candidate distribution.
123 |         """
124 |         if self.tf_compatible:
125 |             sol, solvar = self.tf_sess.run(
126 |                 [self.mean, self.var],
127 |                 feed_dict={self.init_mean: init_mean, self.init_var: init_var}
128 |             )
129 |         else:
130 |             assert self._params.il_cfg.use_gt_dynamics
131 |             mean, var, t = init_mean, init_var, 0
132 |             X = stats.truncnorm(-2, 2, loc=np.zeros_like(mean), scale=np.ones_like(mean))
133 | 
134 |             cfg = {'plan_hor': self._params.opt_cfg.plan_hor,
135 |                    'dU': self._params.env.action_space.shape[0]}
136 |             while (t < self.max_iters) and np.max(var) > self.epsilon:
137 |                 lb_dist, ub_dist = mean - self.lb, self.ub - mean
138 |                 constrained_var = np.minimum(np.minimum(np.square(lb_dist / 2), np.square(ub_dist / 2)), var)
139 | 
140 |                 samples = X.rvs(size=[self.popsize, self.sol_dim]) * np.sqrt(constrained_var) + mean
141 |                 costs = self._gt_compile_cost(
142 |                     obs, samples, cfg, self._dynamics,
143 |                     self._dynamics._numpy_reward_function
144 |                 )
145 |                 costs = np.reshape(costs, [-1])
146 |                 elites = samples[np.argsort(costs)][:self.num_elites]
147 | 
148 |                 new_mean = np.mean(elites, axis=0)
149 |                 new_var = np.var(elites, axis=0)
150 | 
151 |                 mean = self.alpha * mean + (1 - self.alpha) * new_mean
152 |                 var = self.alpha * var + (1 - self.alpha) * new_var
153 |                 logger.info('variance of elite: {}'.format(np.var(elites)))
154 |                 logger.info('Mean perforamnce: {}'.format(
155 |                     np.mean(costs[np.argsort(costs)][:self.num_elites]))
156 |                 )
157 | 
158 |                 t += 1
159 |             sol, solvar = mean, var
160 |         sol = np.reshape(sol, [-1])
161 | 
162 |         # prev_sol is going to be used next timestep
163 |         prev_sol = self.update_prev_sol(per, dU, sol)
164 |         return sol, prev_sol
165 | 


--------------------------------------------------------------------------------
/dmbrl/misc/optimizers/gbp_rs.py:
--------------------------------------------------------------------------------
  1 | from __future__ import division
  2 | from __future__ import absolute_import
  3 | from __future__ import print_function
  4 | 
  5 | import numpy as np
  6 | import tensorflow as tf
  7 | 
  8 | from .optimizer import Optimizer
  9 | from dmbrl.misc import logger
 10 | 
 11 | 
 12 | class GBPRandomOptimizer(Optimizer):
 13 |     """ @brief: use gradient based planning to update the policy network
 14 |     """
 15 | 
 16 |     def __init__(self, sol_dim, popsize, tf_session,
 17 |                  upper_bound=None, lower_bound=None, params=None):
 18 |         """Creates an instance of this class.
 19 | 
 20 |         Arguments:
 21 |             sol_dim (int): The dimensionality of the problem space
 22 |             popsize (int): The number of candidate solutions to be sampled at every iteration
 23 |             num_elites (int): The number of top solutions that will be used to obtain the distribution
 24 |                 at the next iteration.
 25 |             tf_session (tf.Session): (optional) Session to be used for this optimizer. Defaults to None,
 26 |                 in which case any functions passed in cannot be tf.Tensor-valued.
 27 |             upper_bound (np.array): An array of upper bounds
 28 |             lower_bound (np.array): An array of lower bounds
 29 |         """
 30 |         super().__init__()
 31 |         self._params = params
 32 |         self._print_count = 0
 33 | 
 34 |         self.sol_dim = sol_dim
 35 |         self.popsize = popsize
 36 |         self.ub, self.lb = upper_bound, lower_bound
 37 |         self.tf_sess = tf_session
 38 |         self.solution = None
 39 |         self.tf_compatible, self.cost_function = None, None
 40 | 
 41 |         self._debug = {}
 42 |         self._debug['old_sol'] = 0.0
 43 |         self._debug_start = False
 44 | 
 45 |     def setup(self, cost_function, tf_compatible):
 46 |         """Sets up this optimizer using a given cost function.
 47 | 
 48 |         Arguments:
 49 |             cost_function (func): A function for computing costs over a batch of candidate solutions.
 50 |             tf_compatible (bool): True if the cost function provided is tf.Tensor-valued.
 51 | 
 52 |         Returns: None
 53 |         """
 54 |         if tf_compatible and self.tf_sess is None:
 55 |             raise RuntimeError("Cannot pass in a tf.Tensor-valued cost function without passing in a TensorFlow "
 56 |                                "session into the constructor")
 57 | 
 58 |         if not tf_compatible:
 59 |             self.tf_compatible = False
 60 |             self.cost_function = cost_function
 61 |         else:
 62 |             with self.tf_sess.graph.as_default():
 63 |                 self.tf_compatible = True
 64 |                 self._candidate_solutions = tf.Variable(
 65 |                     np.random.uniform(self.lb, self.ub, [self.popsize, self.sol_dim]),
 66 |                     dtype=tf.float32
 67 |                 )
 68 |                 self.tf_sess.run(
 69 |                     tf.variables_initializer([self._candidate_solutions])
 70 |                 )
 71 | 
 72 |                 self._costs = costs = cost_function(self._candidate_solutions)
 73 |                 self._choice = tf.argmin(costs)
 74 |                 self.solution = \
 75 |                     self._candidate_solutions[tf.cast(self._choice, tf.int32)]
 76 | 
 77 |                 # the update loss
 78 |                 self._adam_optimizer = \
 79 |                     tf.train.AdamOptimizer(learning_rate=self._params.gbp_cfg.lr)
 80 |                 self._planning_optimizer = self._adam_optimizer.minimize(
 81 |                     costs, var_list=[self._candidate_solutions]
 82 |                 )
 83 |                 self.tf_sess.run(
 84 |                     tf.variables_initializer(self._adam_optimizer.variables())
 85 |                 )
 86 |                 self._average_cost = tf.reduce_mean(costs)
 87 |                 self._min_cost = tf.reduce_min(costs)
 88 |                 self._values, self._indices = tf.nn.top_k(-costs, k=10, sorted=True)
 89 | 
 90 |                 # debug information
 91 |                 self._debug_actions = self.solution
 92 | 
 93 |     def reset(self):
 94 |         pass
 95 | 
 96 |     def obtain_solution(self, init_mean, init_var, per, dU, obs=None):
 97 |         """Optimizes the cost function provided in setup().
 98 |             do gradient based planning
 99 | 
100 |         Arguments:
101 |             init_mean (np.ndarray): The mean of the initial candidate distribution.
102 |             init_var (np.ndarray): The variance of the initial candidate distribution.
103 |         """
104 |         assert self.tf_compatible
105 |         self._print_count = (self._print_count + 1) % 20
106 |         self._print = self._print_count == 0
107 | 
108 |         # step 1: initialize the action candidates TODO: use init_mean
109 |         self._old_solutions = np.concatenate(
110 |             [self.tf_sess.run(self._candidate_solutions)[:, 6:],
111 |              np.random.uniform(self.lb[0], self.ub[0], [self.popsize, 6])],
112 |             axis=1
113 |         )
114 |         self._candidate_solutions.load(self._old_solutions, self.tf_sess)
115 | 
116 |         avg_cost, min_cost = self.tf_sess.run(
117 |             [self._average_cost, self._min_cost]
118 |         )
119 |         if self._print:
120 |             logger.info('Init   -> Avg_cost: %.3f, Min_cost: %.3f' %
121 |                         (avg_cost, min_cost))
122 | 
123 |         # step 2: do gradient based planning
124 |         for gbp_iteration in range(self._params.gbp_cfg.plan_iter):
125 |             _, avg_cost, min_cost = self.tf_sess.run(
126 |                 [self._planning_optimizer, self._average_cost, self._min_cost]
127 |             )
128 |         avg_cost, min_cost = self.tf_sess.run(
129 |             [self._average_cost, self._min_cost]
130 |         )
131 |         if self._print:
132 |             logger.info('Iter %d > Avg_cost: %.3f, Min_cost: %.3f' %
133 |                         (self._params.gbp_cfg.plan_iter, avg_cost, min_cost))
134 | 
135 |         sol = self.tf_sess.run(self.solution)
136 |         prev_sol = self.update_prev_sol(per, dU, sol)
137 | 
138 |         return sol, prev_sol
139 | 


--------------------------------------------------------------------------------
/dmbrl/misc/optimizers/optimizer.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import
 2 | from __future__ import print_function
 3 | from __future__ import division
 4 | import numpy as np
 5 | 
 6 | 
 7 | class Optimizer:
 8 | 
 9 |     def __init__(self, *args, **kwargs):
10 |         self.sy_cur_obs = None
11 |         self._proposed_act_seqs_ph = None
12 |         pass
13 | 
14 |     def setup(self, cost_function, tf_compatible):
15 |         raise NotImplementedError("Must be implemented in subclass.")
16 | 
17 |     def reset(self):
18 |         raise NotImplementedError("Must be implemented in subclass.")
19 | 
20 |     def obtain_solution(self, *args, **kwargs):
21 |         raise NotImplementedError("Must be implemented in subclass.")
22 | 
23 |     def get_policy_network(self):
24 |         return None
25 | 
26 |     def train_policy_network(self):
27 |         return False
28 | 
29 |     def set_sy_cur_obs(self, sy_cur_obs):
30 |         # NOTE: it is a hack! be careful
31 |         self.sy_cur_obs = sy_cur_obs
32 | 
33 |     def forward_policy_propose(self, predict_next_obs, sy_cur_obs):
34 |         pass
35 | 
36 |     def reset_prev_sol(self, prev_sol):
37 |         return prev_sol
38 | 
39 |     def update_prev_sol(self, per, dU, soln):
40 |         prev_sol = np.concatenate([np.copy(soln)[per * dU:],
41 |                                    np.zeros(per * dU)])
42 |         return prev_sol
43 | 


--------------------------------------------------------------------------------
/dmbrl/misc/optimizers/policy_network/BC_A_policy.py:
--------------------------------------------------------------------------------
  1 | # -----------------------------------------------------------------------------
  2 | #   @author:
  3 | #       Tingwu Wang
  4 | # -----------------------------------------------------------------------------
  5 | import tensorflow as tf
  6 | import numpy as np
  7 | 
  8 | from . import base_policy
  9 | from . import tf_networks
 10 | from dmbrl.misc import logger
 11 | 
 12 | 
 13 | class policy_network(base_policy.base_policy_network):
 14 |     ''' @brief:
 15 |             In this object class, we define the network structure, the restore
 16 |             function and save function.
 17 | 
 18 |         @self.args.training_scheme
 19 |             @BC-AR: (action space) behavior cloning with the real data
 20 |             @BC-AI: (action space) behavior cloning using imaginary dataset.
 21 | 
 22 |             @AVG-R: (weight space) behavior cloning by setting the weight to
 23 |                 the average of the weights selected during sampling
 24 |             @BC-PR: (weight space) behavior cloning by distilling the policy
 25 |                 produced by the weights during sampling
 26 |             @AVG-I: (weight space) AVG-R but with imaginary dataset
 27 |             @BC-PI: (weight space) BC-PR but with imaginary dataset
 28 |     '''
 29 | 
 30 |     def __init__(self, args, session, name_scope,
 31 |                  observation_size, action_size):
 32 | 
 33 |         super(policy_network, self).__init__(
 34 |             args, session, name_scope, observation_size, action_size
 35 |         )
 36 |         assert self.args.training_scheme in ['BC-AR', 'BC-AI']
 37 |         assert self.args.cem_type in ['POPLINA-INIT', 'POPLINA-REPLAN']
 38 | 
 39 |     def build_network(self):
 40 |         """ @brief: Note that build_network is only needed for the training
 41 |         """
 42 |         network_shape = [self._observation_size] + \
 43 |             self.args.policy_network_shape + [self._action_size]
 44 |         num_layer = len(network_shape) - 1
 45 |         act_type = ['tanh'] * (num_layer - 1) + [None]
 46 |         norm_type = [None] * (num_layer - 1) + [None]
 47 |         init_data = []
 48 |         for _ in range(num_layer):
 49 |             init_data.append(
 50 |                 {'w_init_method': 'normc', 'w_init_para': {'stddev': 1.0},
 51 |                  'b_init_method': 'constant', 'b_init_para': {'val': 0.0}}
 52 |             )
 53 |         init_data[-1]['w_init_para']['stddev'] = 0.01  # the output layer std
 54 | 
 55 |         self._MLP = tf_networks.MLP(
 56 |             dims=network_shape, scope='policy_mlp', train=True,
 57 |             activation_type=act_type, normalizer_type=norm_type,
 58 |             init_data=init_data
 59 |         )
 60 | 
 61 |         # fetch all the trainable variables
 62 |         self._set_var_list()
 63 | 
 64 |     def build_loss(self):
 65 | 
 66 |         self._build_ph()
 67 |         self._tensor, self._update_operator = {}, {}
 68 | 
 69 |         # construct the input to the forward network, we normalize the state
 70 |         # input, and concatenate with the action
 71 |         self._tensor['normalized_start_state'] = (
 72 |             self._input_ph['start_state'] -
 73 |             self._whitening_operator['state_mean']
 74 |         ) / self._whitening_operator['state_std']
 75 |         self._tensor['net_input'] = self._tensor['normalized_start_state']
 76 | 
 77 |         # the output policy of the network
 78 |         self._tensor['action'] = self._MLP(self._tensor['net_input'])
 79 | 
 80 |         self._input_ph['target_action'] = tf.placeholder(
 81 |             tf.float32, [None, self._action_size], name='target_action'
 82 |         )
 83 | 
 84 |         self._update_operator['loss'] = tf.reduce_mean(
 85 |             tf.square(self._input_ph['target_action'] -
 86 |                       self._tensor['action'])
 87 |         )
 88 | 
 89 |         self._update_operator['update_op'] = tf.train.AdamOptimizer(
 90 |             learning_rate=self.args.policy_lr,
 91 |         ).minimize(self._update_operator['loss'])
 92 |         logger.info("policy training learning rate: {}".format(
 93 |             self.args.policy_lr)
 94 |         )
 95 | 
 96 |     def train(self, data_dict, training_info={}):
 97 | 
 98 |         # Step 1: update the running mean
 99 |         imaginary_dataset = training_info['imaginary_dataset']
100 | 
101 |         # Step 2: data processing
102 |         if self.args.training_scheme == 'BC-AR':
103 |             data_dict['target_action'] = data_dict['action']  # for training
104 |         elif self.args.training_scheme == 'BC-AI':
105 |             # add imaginary data to the dataset
106 |             for key in ['start_state', 'action']:
107 |                 data_dict[key] = \
108 |                     np.concatenate([data_dict[key], imaginary_dataset[key]])
109 |             data_dict['target_action'] = data_dict['action']  # for training
110 | 
111 |         else:
112 |             raise NotImplementedError
113 | 
114 |         self._set_whitening_var(data_dict['whitening_stats'])
115 |         self.optimize_weights(data_dict, ['start_state', 'target_action'])
116 | 


--------------------------------------------------------------------------------
/dmbrl/misc/optimizers/policy_network/BC_WA_policy.py:
--------------------------------------------------------------------------------
  1 | # -----------------------------------------------------------------------------
  2 | #   @author:
  3 | #       Tingwu Wang
  4 | # -----------------------------------------------------------------------------
  5 | import tensorflow as tf
  6 | import numpy as np
  7 | 
  8 | from . import base_policy
  9 | from . import tf_networks
 10 | from . import tf_utils
 11 | from dmbrl.misc import logger
 12 | 
 13 | 
 14 | class policy_network(base_policy.base_policy_network):
 15 |     ''' @brief:
 16 |             In this object class, we define the network structure, the restore
 17 |             function and save function.
 18 | 
 19 |         @self.args.training_scheme
 20 |             @BC-AR: (action space) behavior cloning with the real data
 21 |             @BC-AI: (action space) behavior cloning using imaginary dataset.
 22 | 
 23 |             @AVG-R: (weight space) behavior cloning by setting the weight to
 24 |                 the average of the weights selected during sampling
 25 |             @BC-PR: (weight space) behavior cloning by distilling the policy
 26 |                 produced by the weights during sampling
 27 |             @AVG-I: (weight space) AVG-R but with imaginary dataset
 28 |             @BC-PI: (weight space) BC-PR but with imaginary dataset
 29 |     '''
 30 | 
 31 |     def __init__(self, args, session, name_scope,
 32 |                  observation_size, action_size):
 33 | 
 34 |         super(policy_network, self).__init__(
 35 |             args, session, name_scope, observation_size, action_size
 36 |         )
 37 |         assert self.args.training_scheme in ['AVG-R', 'AVG-I']
 38 |         assert self.args.cem_type in ['POPLINP-SEP', 'POPLINP-UNI']
 39 | 
 40 |     def build_network(self):
 41 |         """ @brief: Note that build_network is only needed for the training
 42 |         """
 43 |         network_shape = [self._observation_size] + \
 44 |             self.args.policy_network_shape + [self._action_size]
 45 |         num_layer = len(network_shape) - 1
 46 |         act_type = ['tanh'] * (num_layer - 1) + [None]
 47 |         norm_type = [None] * (num_layer - 1) + [None]
 48 |         init_data = []
 49 |         for _ in range(num_layer):
 50 |             init_data.append(
 51 |                 {'w_init_method': 'normc', 'w_init_para': {'stddev': 1.0},
 52 |                  'b_init_method': 'constant', 'b_init_para': {'val': 0.0}}
 53 |             )
 54 |         init_data[-1]['w_init_para']['stddev'] = 0.01  # the output layer std
 55 | 
 56 |         self._MLP = tf_networks.W_MLP(
 57 |             dims=network_shape, scope='policy_mlp', train=True,
 58 |             activation_type=act_type, normalizer_type=norm_type,
 59 |             init_data=init_data
 60 |         )
 61 | 
 62 |         # fetch all the trainable variables
 63 |         self._set_var_list()
 64 | 
 65 |     def build_loss(self):
 66 | 
 67 |         self._build_ph()
 68 |         self._tensor, self._update_operator = {}, {}
 69 | 
 70 |         self._MLP_var_list = self._MLP.get_variable_list()
 71 |         self._set_weight = tf_utils.set_network_weights(
 72 |             self._session, self._MLP_var_list, ''
 73 |         )
 74 |         logger.info("policy training learning rate: {}".format(
 75 |             self.args.policy_lr)
 76 |         )
 77 | 
 78 |         self._session.run(tf.variables_initializer(tf.global_variables()))
 79 | 
 80 |         # synchronize the two networks if needed
 81 |         if self.args.cem_type in ['POPLINP-SEP', 'POPLINP-UNI'] and \
 82 |                 self.args.training_scheme in ['BC-PR', 'BC-PI']:
 83 |             weight_dict = self._get_weight()  # get from MLP
 84 |             self._set_weight(weight_dict)     # set the target MLP
 85 | 
 86 |     def train(self, data_dict, training_info={}):
 87 | 
 88 |         # Step 1: update the running mean
 89 |         imaginary_dataset = training_info['imaginary_dataset']
 90 | 
 91 |         # Step 2: data processing
 92 |         if self.args.training_scheme in ['AVG-R']:
 93 |             data_dict['target_weight'] = data_dict['weight']  # for training
 94 |             data_dict['weight'] = data_dict['target_weight']  # for training
 95 | 
 96 |         elif self.args.training_scheme in ['AVG-I']:
 97 |             for key in ['start_state', 'weight']:
 98 |                 data_dict[key] = \
 99 |                     np.concatenate([data_dict[key], imaginary_dataset[key]])
100 |             data_dict['target_weight'] = data_dict['weight']  # for training
101 |             data_dict['weight'] = data_dict['target_weight']  # for training
102 | 
103 |         else:
104 |             raise NotImplementedError
105 | 
106 |         # Step 3: parse the test set and train the network
107 |         # get the average of the weights
108 |         self._set_whitening_var(data_dict['whitening_stats'])
109 |         average_weights = \
110 |             np.reshape(np.mean(data_dict['target_weight'], axis=0), [1, -1])
111 | 
112 |         if self.args.zero_weight == 'yes':
113 |             average_weights *= 0.0
114 |             logger.warning('Using Zero Weights')
115 |         weight_dict = \
116 |             self._MLP.parse_np_weight_vec_into_dict(average_weights)
117 | 
118 |         # set the weights
119 |         self._set_weight(weight_dict)
120 | 


--------------------------------------------------------------------------------
/dmbrl/misc/optimizers/policy_network/BC_WD_policy.py:
--------------------------------------------------------------------------------
  1 | # -----------------------------------------------------------------------------
  2 | #   @author:
  3 | #       Tingwu Wang
  4 | # -----------------------------------------------------------------------------
  5 | import tensorflow as tf
  6 | import numpy as np
  7 | 
  8 | from . import base_policy
  9 | from . import tf_networks
 10 | from . import tf_utils
 11 | from . import whitening_util
 12 | from dmbrl.misc import logger
 13 | 
 14 | 
 15 | class policy_network(base_policy.base_policy_network):
 16 |     ''' @brief:
 17 |             In this object class, we define the network structure, the restore
 18 |             function and save function.
 19 | 
 20 |         @self.args.training_scheme
 21 |             @BC-AR: (action space) behavior cloning with the real data
 22 |             @BC-AI: (action space) behavior cloning using imaginary dataset.
 23 | 
 24 |             @AVG-R: (weight space) behavior cloning by setting the weight to
 25 |                 the average of the weights selected during sampling
 26 |             @BC-PR: (weight space) behavior cloning by distilling the policy
 27 |                 produced by the weights during sampling
 28 |             @AVG-I: (weight space) AVG-R but with imaginary dataset
 29 |             @BC-PI: (weight space) BC-PR but with imaginary dataset
 30 |     '''
 31 | 
 32 |     def __init__(self, args, session, name_scope,
 33 |                  observation_size, action_size):
 34 | 
 35 |         super(policy_network, self).__init__(
 36 |             args, session, name_scope, observation_size, action_size
 37 |         )
 38 |         assert self.args.cem_type in ['POPLINP-SEP', 'POPLINP-UNI']
 39 |         assert self.args.training_scheme in ['BC-PR', 'BC-PI']
 40 | 
 41 |     def build_network(self):
 42 |         """ @brief: Note that build_network is only needed for the training
 43 |         """
 44 |         network_shape = [self._observation_size] + \
 45 |             self.args.policy_network_shape + [self._action_size]
 46 |         num_layer = len(network_shape) - 1
 47 |         act_type = ['tanh'] * (num_layer - 1) + [None]
 48 |         norm_type = [None] * (num_layer - 1) + [None]
 49 |         init_data = []
 50 |         # TODO: be careful when it comes to batchnorm
 51 |         assert norm_type[0] is not 'batchnorm' and \
 52 |             norm_type[0] is not 'batch_norm'
 53 | 
 54 |         for _ in range(num_layer):
 55 |             init_data.append(
 56 |                 {'w_init_method': 'normc', 'w_init_para': {'stddev': 1.0},
 57 |                  'b_init_method': 'constant', 'b_init_para': {'val': 0.0}}
 58 |             )
 59 |         init_data[-1]['w_init_para']['stddev'] = 0.01  # the output layer std
 60 | 
 61 |         self._MLP = tf_networks.W_MLP(
 62 |             dims=network_shape, scope='policy_mlp', train=True,
 63 |             activation_type=act_type, normalizer_type=norm_type,
 64 |             init_data=init_data
 65 |         )
 66 |         self._target_MLP = tf_networks.W_MLP(
 67 |             dims=network_shape, scope='target_policy_mlp', train=True,
 68 |             activation_type=act_type, normalizer_type=norm_type,
 69 |             init_data=init_data
 70 |         )
 71 | 
 72 |         # fetch all the trainable variables
 73 |         self._set_var_list()
 74 | 
 75 |     def build_loss(self):
 76 |         """ @brief: the MLP is used to generate samples,
 77 |             while the target_MLP is used during the training. target_MLP is
 78 |             always older than the MLP, and we feed the dataset into target_MLP
 79 |             to train MLP.
 80 | 
 81 |             After each update, we synchronize target_MLP by copying weights from
 82 |             MLP.
 83 |         """
 84 | 
 85 |         self._build_ph()
 86 |         self._tensor, self._update_operator = {}, {}
 87 |         whitening_util.add_whitening_operator(
 88 |             self._whitening_operator, self._whitening_variable,
 89 |             'target_state', self._observation_size
 90 |         )
 91 | 
 92 |         # the weight input_ph is always set to 0.0
 93 |         self._input_ph['weight'] = tf.placeholder(
 94 |             shape=[None, self._MLP.get_weight_size()],
 95 |             dtype=tf.float32, name='weight_noise'
 96 |         )
 97 |         # the actual weight generated from the planning
 98 |         self._input_ph['target_weight'] = tf.placeholder(
 99 |             shape=[None, self._MLP.get_weight_size()], dtype=tf.float32,
100 |             name='target_weight_noise'
101 |         )
102 |         self._tensor['net_input'] = (
103 |             self._input_ph['start_state'] -
104 |             self._whitening_operator['state_mean']
105 |         ) / self._whitening_operator['state_std']
106 |         self._tensor['target_net_input'] = (
107 |             self._input_ph['start_state'] -
108 |             self._whitening_operator['target_state_mean']
109 |         ) / self._whitening_operator['target_state_std']
110 | 
111 |         # the output policy of the network
112 |         self._tensor['action'] = self._MLP(
113 |             self._tensor['net_input'], self._input_ph['weight']
114 |         )
115 |         self._tensor['target_action'] = self._target_MLP(
116 |             self._tensor['target_net_input'],
117 |             self._input_ph['target_weight']
118 |         )
119 | 
120 |         # the distillation loss
121 |         self._update_operator['loss'] = tf.reduce_mean(
122 |             tf.square(self._tensor['target_action'] -
123 |                       self._tensor['action'])
124 |         )
125 |         self._target_MLP_var_list = self._target_MLP.get_variable_list()
126 |         self._MLP_var_list = self._MLP.get_variable_list()
127 | 
128 |         self._update_operator['update_op'] = tf.train.AdamOptimizer(
129 |             learning_rate=self.args.policy_lr,
130 |         ).minimize(self._update_operator['loss'],
131 |                    var_list=self._MLP_var_list)
132 |         logger.info("policy training learning rate: {}".format(
133 |             self.args.policy_lr)
134 |         )
135 | 
136 |         # synchronize the weights
137 |         self._get_weight = tf_utils.get_network_weights(
138 |             self._session, self._MLP_var_list, 'policy_mlp'
139 |         )
140 |         self._set_weight = tf_utils.set_network_weights(
141 |             self._session, self._target_MLP_var_list, 'target_policy_mlp'
142 |         )
143 | 
144 |         self._session.run(tf.variables_initializer(tf.global_variables()))
145 | 
146 |         # synchronize the two networks if needed
147 |         self._set_weight(self._get_weight())     # set the target MLP
148 | 
149 |     def train(self, data_dict, training_info={}):
150 | 
151 |         # Step 1: update the running mean
152 |         imaginary_dataset = training_info['imaginary_dataset']
153 | 
154 |         # Step 2: data processing
155 |         if self.args.training_scheme in ['BC-PR']:
156 |             data_dict['target_weight'] = data_dict['weight']  # for training
157 |             data_dict['weight'] = 0.0 * data_dict['weight']  # for training
158 | 
159 |         elif self.args.training_scheme in ['BC-PI']:
160 |             for key in ['start_state', 'weight']:
161 |                 data_dict[key] = \
162 |                     np.concatenate([data_dict[key], imaginary_dataset[key]])
163 |             data_dict['target_weight'] = data_dict['weight']  # for training
164 |             data_dict['weight'] = 0.0 * data_dict['weight']  # for training
165 | 
166 |         else:
167 |             raise NotImplementedError
168 | 
169 |         self._set_whitening_var(data_dict['whitening_stats'])
170 |         self.optimize_weights(data_dict,
171 |                               ['start_state', 'target_weight', 'weight'])
172 | 
173 |         # synchronize the networks
174 |         whitening_util.copy_whitening_var(data_dict['whitening_stats'],
175 |                                           'state', 'target_state')
176 |         whitening_util.set_whitening_var(
177 |             self._session, self._whitening_operator,
178 |             data_dict['whitening_stats'], ['target_state']
179 |         )
180 |         if self.args.zero_weight == 'yes':
181 |             logger.warning('Using Random Weights')
182 |         else:
183 |             self._set_weight(self._get_weight())
184 | 


--------------------------------------------------------------------------------
/dmbrl/misc/optimizers/policy_network/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WilsonWangTHU/POPLIN/edd8dba50f9049c6164eda774602bef0c299cb51/dmbrl/misc/optimizers/policy_network/__init__.py


--------------------------------------------------------------------------------
/dmbrl/misc/optimizers/policy_network/base_policy.py:
--------------------------------------------------------------------------------
  1 | # -----------------------------------------------------------------------------
  2 | #   @author:
  3 | #       Tingwu Wang
  4 | # -----------------------------------------------------------------------------
  5 | import numpy as np
  6 | import tensorflow as tf
  7 | 
  8 | from . import whitening_util
  9 | from . import tf_utils
 10 | from dmbrl.misc import logger
 11 | 
 12 | 
 13 | def limit_action(action, lb=-1, ub=1):
 14 | 
 15 |     return tf.minimum(tf.maximum(action, lb), ub)
 16 | 
 17 | 
 18 | class base_policy_network(object):
 19 |     '''
 20 |         @brief:
 21 |             In this object class, we define the network structure, the restore
 22 |             function and save function.
 23 |             It will only be called in the agent/agent.py
 24 |     '''
 25 | 
 26 |     def __init__(self, args, session, name_scope,
 27 |                  observation_size, action_size):
 28 |         self.args = args
 29 | 
 30 |         self._session = session
 31 |         self._name_scope = name_scope
 32 | 
 33 |         self._observation_size = observation_size
 34 |         self._action_size = action_size
 35 | 
 36 |         # self._task_name = args.task_name
 37 |         self._network_shape = args.policy_network_shape
 38 | 
 39 |         self._npr = np.random.RandomState(args.seed)
 40 | 
 41 |         self._whitening_operator = {}
 42 |         self._whitening_variable = []
 43 | 
 44 |     def build_network(self):
 45 |         raise NotImplementedError
 46 | 
 47 |     def build_loss(self):
 48 |         raise NotImplementedError
 49 | 
 50 |     def _build_ph(self):
 51 | 
 52 |         # initialize the running mean and std (whitening)
 53 |         whitening_util.add_whitening_operator(
 54 |             self._whitening_operator, self._whitening_variable,
 55 |             'state', self._observation_size
 56 |         )
 57 | 
 58 |         # initialize the input placeholder
 59 |         self._input_ph = {
 60 |             'start_state': tf.placeholder(
 61 |                 tf.float32, [None, self._observation_size], name='start_state'
 62 |             )
 63 |         }
 64 | 
 65 |     def get_input_placeholder(self):
 66 |         return self._input_ph
 67 | 
 68 |     def get_weights(self):
 69 |         return None
 70 | 
 71 |     def set_weights(self, weights_dict):
 72 |         pass
 73 | 
 74 |     def forward_network(self, observation, weight_vec=None):
 75 |         normalized_start_state = (
 76 |             observation - self._whitening_operator['state_mean']
 77 |         ) / self._whitening_operator['state_std']
 78 | 
 79 |         # the output policy of the network
 80 |         if weight_vec is None:
 81 |             action = self._MLP(normalized_start_state)
 82 |         else:
 83 |             action = self._MLP(normalized_start_state, weight_vec)
 84 | 
 85 |         action = limit_action(action)
 86 | 
 87 |         return action
 88 | 
 89 |     def _set_var_list(self):
 90 |         # collect the tf variable and the trainable tf variable
 91 |         self._trainable_var_list = [var for var in tf.trainable_variables()
 92 |                                     if self._name_scope in var.name]
 93 | 
 94 |         self._all_var_list = [var for var in tf.global_variables()
 95 |                               if self._name_scope in var.name]
 96 | 
 97 |         # the weights that actually matter
 98 |         self._network_var_list = \
 99 |             self._trainable_var_list + self._whitening_variable
100 | 
101 |         self._set_network_weights = tf_utils.set_network_weights(
102 |             self._session, self._network_var_list, self._name_scope
103 |         )
104 | 
105 |         self._get_network_weights = tf_utils.get_network_weights(
106 |             self._session, self._network_var_list, self._name_scope
107 |         )
108 | 
109 |     def load_checkpoint(self, ckpt_path):
110 |         pass
111 | 
112 |     def save_checkpoint(self, ckpt_path):
113 |         pass
114 | 
115 |     def get_whitening_operator(self):
116 |         return self._whitening_operator
117 | 
118 |     def _set_whitening_var(self, whitening_stats):
119 |         whitening_util.set_whitening_var(
120 |             self._session, self._whitening_operator, whitening_stats, ['state']
121 |         )
122 | 
123 |     def train(self, data_dict, replay_buffer, training_info={}):
124 |         raise NotImplementedError
125 | 
126 |     def eval(self, data_dict):
127 |         raise NotImplementedError
128 | 
129 |     def act(self, data_dict):
130 |         raise NotImplementedError
131 | 
132 |     def optimize_weights(self, data_dict, training_keys):
133 | 
134 |         test_set_id = np.arange(len(data_dict['start_state']))
135 |         num_test_data = int(len(test_set_id) * self.args.pct_testset)
136 |         self._npr.shuffle(test_set_id)
137 |         test_set = {key: data_dict[key][test_set_id][:num_test_data]
138 |                     for key in training_keys}
139 |         train_set = {key: data_dict[key][test_set_id][num_test_data:]
140 |                      for key in training_keys}
141 |         test_error = old_test_error = np.inf
142 | 
143 |         # supervised training the behavior (behavior cloning)
144 |         for epoch in range(self.args.policy_epochs):
145 |             total_batch_len = len(train_set['start_state'])
146 |             total_batch_inds = np.arange(total_batch_len)
147 |             self._npr.shuffle(total_batch_inds)
148 |             num_minibatch = \
149 |                 max(total_batch_len // self.args.minibatch_size, 1)
150 |             train_error = []
151 | 
152 |             for start in range(num_minibatch):
153 |                 start = start * self.args.minibatch_size
154 |                 end = min(start + self.args.minibatch_size, total_batch_len)
155 |                 batch_inds = total_batch_inds[start: end]
156 |                 feed_dict = {self._input_ph[key]: data_dict[key][batch_inds]
157 |                              for key in training_keys}
158 | 
159 |                 error, _ = self._session.run(
160 |                     [self._update_operator['loss'],
161 |                      self._update_operator['update_op']], feed_dict=feed_dict
162 |                 )
163 |                 train_error.append(error)
164 | 
165 |             # see the test error
166 |             feed_dict = {self._input_ph[key]: test_set[key]
167 |                          for key in training_keys}
168 | 
169 |             test_error = self._session.run(
170 |                 self._update_operator['loss'], feed_dict=feed_dict
171 |             )
172 |             logger.info('Epoch %d; Train Error: %.6f; Test Error: %.6f' %
173 |                         (epoch, np.mean(train_error), test_error))
174 | 
175 |             if test_error > old_test_error and epoch % 5 == 0:
176 |                 # TODO: MAKE A COUNTER HERE
177 |                 logger.info('Early stoping')
178 |                 break
179 |             else:
180 |                 old_test_error = test_error
181 | 


--------------------------------------------------------------------------------
/dmbrl/misc/optimizers/policy_network/gmm_util.py:
--------------------------------------------------------------------------------
 1 | # -----------------------------------------------------------------------------
 2 | #   @brief:
 3 | # -----------------------------------------------------------------------------
 4 | 
 5 | import numpy as np
 6 | 
 7 | 
 8 | def get_conditional_gaussian(mean, cov, observation_size):
 9 |     """ @brief: see the function with the same name in mbbl
10 | 
11 |         y = f_c + f_d.dot(x)
12 |         cov(y) = pi_cov
13 |     """
14 | 
15 |     condition_size = observation_size
16 |     pi_x = np.linalg.solve(cov[:condition_size, :condition_size],
17 |                            cov[:condition_size, condition_size:]).T
18 |     pi_c = mean[condition_size:] - pi_x.dot(mean[:condition_size])
19 |     pi_cov = cov[condition_size:, condition_size:] - \
20 |         pi_x.dot(cov[:condition_size, :condition_size]).dot(pi_x.T)
21 |     pi_cov = 0.5 * (pi_cov + pi_cov.T)
22 | 
23 |     # return {'pol_k': pi_c, 'pol_K': pi_x, 'pol_S': pi_cov}
24 |     return {'f_c': pi_c, 'f_d': pi_x, 'cov': pi_cov}
25 | 
26 | 
27 | def get_gmm_posterior(gmm, gmm_weights, data):
28 |     """ @brief: see the function with the same name in mbbl
29 |     """
30 | 
31 |     # posterior mean of gmm (C --> num_cluster, N --> num_data)
32 |     response = gmm.predict_proba(np.reshape(data, [1, -1]))  # (N, C)
33 |     # (C, 1)
34 |     avg_response = np.reshape(np.mean(np.array(response), axis=0), [-1, 1])
35 |     pos_mean = np.mean(avg_response * gmm_weights['mean'], axis=0)  # (Vec)
36 | 
37 |     # posterior cov = (sum_i) res_i * (cov_i + \mu_i(\mu_i - \mu)^T)
38 |     diff_mu = gmm_weights['mean'] - np.expand_dims(pos_mean, axis=0)  # (C, Vec)
39 |     mui_mui_muT = np.expand_dims(gmm_weights['mean'], axis=1) * \
40 |         np.expand_dims(diff_mu, axis=2)  # (C, Vec, Vec), the outer product
41 |     response_expand = np.expand_dims(avg_response, axis=2)
42 |     pos_cov = np.sum((gmm_weights['cov'] + mui_mui_muT) *
43 |                      response_expand, axis=0)
44 | 
45 |     return pos_mean, pos_cov
46 | 


--------------------------------------------------------------------------------
/dmbrl/misc/optimizers/policy_network/tf_norm.py:
--------------------------------------------------------------------------------
 1 | # ------------------------------------------------------------------------------
 2 | #   @brief: define the batchnorm and layernorm in this function
 3 | # ------------------------------------------------------------------------------
 4 | 
 5 | import tensorflow as tf
 6 | 
 7 | 
 8 | def layer_norm(x, name_scope, epsilon=1e-5, use_bias=True,
 9 |                use_scale=True, gamma_init=None, data_format='NHWC'):
10 |     """
11 |         @Brief: code modified from ppwwyyxx github.com/ppwwyyxx/tensorpack/,
12 |             under layer_norm.py.
13 |             Layer Normalization layer, as described in the paper:
14 |             https://arxiv.org/abs/1607.06450.
15 |         @input:
16 |             x (tf.Tensor): a 4D or 2D tensor. When 4D, the layout should
17 |             match data_format.
18 |     """
19 |     with tf.variable_scope(name_scope):
20 |         shape = x.get_shape().as_list()
21 |         ndims = len(shape)
22 |         assert ndims in [2, 4]
23 | 
24 |         mean, var = tf.nn.moments(x, list(range(1, len(shape))), keep_dims=True)
25 | 
26 |         if data_format == 'NCHW':
27 |             chan = shape[1]
28 |             new_shape = [1, chan, 1, 1]
29 |         else:
30 |             chan = shape[-1]
31 |             new_shape = [1, 1, 1, chan]
32 |         if ndims == 2:
33 |             new_shape = [1, chan]
34 | 
35 |         if use_bias:
36 |             beta = tf.get_variable(
37 |                 'beta', [chan], initializer=tf.constant_initializer()
38 |             )
39 |             beta = tf.reshape(beta, new_shape)
40 |         else:
41 |             beta = tf.zeros([1] * ndims, name='beta')
42 |         if use_scale:
43 |             if gamma_init is None:
44 |                 gamma_init = tf.constant_initializer(1.0)
45 |             gamma = tf.get_variable('gamma', [chan], initializer=gamma_init)
46 |             gamma = tf.reshape(gamma, new_shape)
47 |         else:
48 |             gamma = tf.ones([1] * ndims, name='gamma')
49 | 
50 |         ret = tf.nn.batch_normalization(
51 |             x, mean, var, beta, gamma, epsilon, name='output'
52 |         )
53 |         return ret
54 | 
55 | 
56 | def batch_norm_with_train(x, name_scope, epsilon=1e-5, momentum=0.9):
57 |     ret = tf.contrib.layers.batch_norm(
58 |         x, decay=momentum, updates_collections=None, epsilon=epsilon,
59 |         scale=True, is_training=True, scope=name_scope
60 |     )
61 |     return ret
62 | 
63 | 
64 | def batch_norm_without_train(x, name_scope, epsilon=1e-5, momentum=0.9):
65 |     ret = tf.contrib.layers.batch_norm(
66 |         x, decay=momentum, updates_collections=None, epsilon=epsilon,
67 |         scale=True, is_training=False, scope=name_scope
68 |     )
69 |     return ret
70 | 


--------------------------------------------------------------------------------
/dmbrl/misc/optimizers/policy_network/tf_utils.py:
--------------------------------------------------------------------------------
  1 | # -----------------------------------------------------------------------------
  2 | #   @brief:
  3 | # -----------------------------------------------------------------------------
  4 | 
  5 | import tensorflow as tf
  6 | import numpy as np
  7 | 
  8 | 
  9 | def get_weight_decay_loss(var_list):
 10 |     weight_decay_dict = {}
 11 |     weight_decay_sum = 0.0
 12 |     for var in var_list:
 13 |         i_weight_decay = tf.nn.l2_loss(var)
 14 |         weight_decay_dict[var.name] = i_weight_decay
 15 |         weight_decay_sum += tf.reduce_mean(weight_decay_sum)
 16 |     return weight_decay_sum, weight_decay_dict
 17 | 
 18 | 
 19 | def logsigmoid(x):
 20 |     return -tf.nn.softplus(-x)
 21 | 
 22 | 
 23 | def logit_bernoulli_entropy(logits):
 24 |     ent = (1. - tf.nn.sigmoid(logits)) * logits - logsigmoid(logits)
 25 |     return ent
 26 | 
 27 | 
 28 | def gauss_selfKL_firstfixed(mu, logstd):
 29 |     '''
 30 |         @brief:
 31 |             KL divergence with itself, holding first argument fixed
 32 |             Use stop gradient to cut the gradient flows
 33 |     '''
 34 |     mu1, logstd1 = map(tf.stop_gradient, [mu, logstd])
 35 |     mu2, logstd2 = mu, logstd
 36 | 
 37 |     return gauss_KL(mu1, logstd1, mu2, logstd2)
 38 | 
 39 | 
 40 | def gauss_log_prob(mu, logstd, x):
 41 |     # probability to take action x, given paramaterized guassian distribution
 42 |     var = tf.exp(2 * logstd)
 43 |     gp = - tf.square(x - mu) / (2 * var) \
 44 |          - .5 * tf.log(tf.constant(2 * np.pi)) \
 45 |          - logstd
 46 |     return tf.reduce_sum(gp, [1])
 47 | 
 48 | 
 49 | def gauss_KL(mu1, logstd1, mu2, logstd2):
 50 |     # KL divergence between two paramaterized guassian distributions
 51 |     var1 = tf.exp(2 * logstd1)
 52 |     var2 = tf.exp(2 * logstd2)
 53 | 
 54 |     kl = tf.reduce_sum(
 55 |         logstd2 - logstd1 + (var1 + tf.square(mu1 - mu2)) / (2 * var2) - 0.5
 56 |     )
 57 |     return kl
 58 | 
 59 | 
 60 | def gauss_ent(mu, logstd):
 61 |     # shannon entropy for a paramaterized guassian distributions
 62 |     h = tf.reduce_sum(
 63 |         logstd + tf.constant(0.5 * np.log(2 * np.pi * np.e), tf.float32)
 64 |     )
 65 |     return h
 66 | 
 67 | 
 68 | def slice_2d(x, inds0, inds1):
 69 |     inds0 = tf.cast(inds0, tf.int64)
 70 |     inds1 = tf.cast(inds1, tf.int64)
 71 |     shape = tf.cast(tf.shape(x), tf.int64)
 72 |     ncols = shape[1]
 73 |     x_flat = tf.reshape(x, [-1])
 74 |     return tf.gather(x_flat, inds0 * ncols + inds1)
 75 | 
 76 | 
 77 | def var_shape(x):
 78 |     out = [k.value for k in x.get_shape()]
 79 |     assert all(isinstance(a, int) for a in out), \
 80 |         "shape function assumes that shape is fully known"
 81 |     return out
 82 | 
 83 | 
 84 | def numel(x):
 85 |     return np.prod(var_shape(x))
 86 | 
 87 | 
 88 | def l2_loss(var_list):
 89 |     l2_norm = tf.constant(0.)
 90 |     for var in var_list:
 91 |         l2_norm += tf.nn.l2_loss(var)
 92 |     return l2_norm
 93 | 
 94 | 
 95 | def flatgrad(loss, var_list):
 96 |     grads = tf.gradients(loss, var_list)
 97 |     return tf.concat(
 98 |         [tf.reshape(grad, [numel(v)]) for (v, grad) in zip(var_list, grads)], 0
 99 |     )
100 | 
101 | 
102 | class SetFromFlat(object):
103 | 
104 |     def __init__(self, session, var_list):
105 |         self.session = session
106 |         assigns = []
107 |         shapes = map(var_shape, var_list)
108 |         total_size = sum(np.prod(shape) for shape in shapes)
109 |         self.theta = theta = tf.placeholder(tf.float32, [total_size])
110 |         start = 0
111 |         assigns = []
112 |         for (shape, v) in zip(shapes, var_list):
113 |             size = np.prod(shape)
114 |             assigns.append(
115 |                 tf.assign(v, tf.reshape(theta[start:start + size], shape))
116 |             )
117 |             start += size
118 |         self.op = tf.group(*assigns)
119 | 
120 |     def __call__(self, theta):
121 |         self.session.run(self.op, feed_dict={self.theta: theta})
122 | 
123 | 
124 | class GetFlat(object):
125 | 
126 |     def __init__(self, session, var_list):
127 |         self.session = session
128 |         self.op = tf.concat([tf.reshape(v, [numel(v)]) for v in var_list], 0)
129 | 
130 |     def __call__(self):
131 |         return self.op.eval(session=self.session)
132 | 
133 | 
134 | class get_network_weights(object):
135 |     """ @brief:
136 |             call this function to get the weights in the policy network
137 |     """
138 | 
139 |     def __init__(self, session, var_list, base_namescope):
140 |         self._session = session
141 |         self._base_namescope = base_namescope
142 |         # self._op is a dict, note that the base namescope is removed, as the
143 |         # worker and the trainer has different base_namescope
144 |         self._op = {
145 |             var.name.replace(self._base_namescope, ''): var
146 |             for var in var_list
147 |         }
148 | 
149 |     def __call__(self):
150 |         return self._session.run(self._op)
151 | 
152 | 
153 | class set_network_weights(object):
154 |     """ @brief:
155 |             Call this function to set the weights in the policy network
156 |     """
157 | 
158 |     def __init__(self, session, var_list, base_namescope):
159 |         self._session = session
160 |         self._base_namescope = base_namescope
161 | 
162 |         self._var_list = var_list
163 |         self._placeholders = {}
164 |         self._assigns = []
165 | 
166 |         with tf.get_default_graph().as_default():
167 |             for var in self._var_list:
168 |                 var_name = var.name.replace(self._base_namescope, '')
169 |                 self._placeholders[var_name] = tf.placeholder(
170 |                     tf.float32, var.get_shape()
171 |                 )
172 |                 self._assigns.append(
173 |                     tf.assign(var, self._placeholders[var_name])
174 |                 )
175 | 
176 |     def __call__(self, weight_dict):
177 |         assert len(weight_dict) == len(self._var_list)
178 | 
179 |         feed_dict = {}
180 |         for var in self._var_list:
181 |             var_name = var.name.replace(self._base_namescope, '')
182 |             assert var_name in weight_dict
183 |             feed_dict[self._placeholders[var_name]] = weight_dict[var_name]
184 | 
185 |         self._session.run(self._assigns, feed_dict)
186 | 
187 | 
188 | def xavier_initializer(self, shape):
189 |     dim_sum = np.sum(shape)
190 |     if len(shape) == 1:
191 |         dim_sum += 1
192 |     bound = np.sqrt(6.0 / dim_sum)
193 |     return tf.random_uniform(shape, minval=-bound, maxval=bound)
194 | 
195 | 
196 | def fully_connected(input_layer, input_size, output_size, weight_init,
197 |                     bias_init, scope, trainable):
198 |     with tf.variable_scope(scope):
199 |         w = tf.get_variable(
200 |             "w", [input_size, output_size],
201 |             initializer=weight_init, trainable=trainable
202 |         )
203 |         b = tf.get_variable(
204 |             "b", [output_size], initializer=bias_init, trainable=trainable
205 |         )
206 |     return tf.matmul(input_layer, w) + b
207 | 


--------------------------------------------------------------------------------
/dmbrl/misc/optimizers/policy_network/whitening_util.py:
--------------------------------------------------------------------------------
  1 | # -----------------------------------------------------------------------------
  2 | #   @author:
  3 | #       Tingwu Wang
  4 | # -----------------------------------------------------------------------------
  5 | 
  6 | import numpy as np
  7 | import tensorflow as tf
  8 | 
  9 | _ALLOW_KEY = ['state', 'diff_state', 'action']
 10 | 
 11 | 
 12 | def init_whitening_stats(key_list):
 13 |     whitening_stats = {}
 14 |     for key in key_list:
 15 |         whitening_stats[key] = {'mean': 0.0, 'variance': 1, 'step': 0.01,
 16 |                                 'square_sum': 0.01, 'sum': 0.0, 'std': np.nan}
 17 |     return whitening_stats
 18 | 
 19 | 
 20 | def update_whitening_stats(whitening_stats, rollout_data, key):
 21 |     # collect the info
 22 |     new_sum, new_step_sum, new_sq_sum = 0.0, 0.0, 0.0
 23 | 
 24 |     if type(rollout_data) is dict:
 25 |         new_sum += rollout_data[key].sum(axis=0)
 26 |         new_sq_sum += (np.square(rollout_data[key])).sum(axis=0)
 27 |         new_step_sum += rollout_data[key].shape[0]
 28 |     else:
 29 |         assert type(rollout_data) is list
 30 |         for i_episode in rollout_data:
 31 |             if key == 'state':
 32 |                 i_data = i_episode['obs']
 33 |             elif key == 'action':
 34 |                 i_data = i_episode['actions']
 35 |             else:
 36 |                 assert key == 'diff_state'
 37 |                 i_data = i_episode['obs'][1:] - i_episode['obs'][:-1]
 38 | 
 39 |             new_sum += i_data.sum(axis=0)
 40 |             new_sq_sum += (np.square(i_data)).sum(axis=0)
 41 |             new_step_sum += i_data.shape[0]
 42 | 
 43 |     # update the whitening info
 44 |     whitening_stats[key]['step'] += new_step_sum
 45 |     whitening_stats[key]['sum'] += new_sum
 46 |     whitening_stats[key]['square_sum'] += new_sq_sum
 47 |     whitening_stats[key]['mean'] = \
 48 |         whitening_stats[key]['sum'] / whitening_stats[key]['step']
 49 |     whitening_stats[key]['variance'] = np.maximum(
 50 |         whitening_stats[key]['square_sum'] / whitening_stats[key]['step'] -
 51 |         np.square(whitening_stats[key]['mean']), 1e-2
 52 |     )
 53 |     whitening_stats[key]['std'] = \
 54 |         (whitening_stats[key]['variance'] + 1e-6) ** .5
 55 | 
 56 | 
 57 | def add_whitening_operator(whitening_operator, whitening_variable, name, size):
 58 | 
 59 |     with tf.variable_scope('whitening_' + name):
 60 |         whitening_operator[name + '_mean'] = tf.Variable(
 61 |             np.zeros([1, size], np.float32),
 62 |             name=name + "_mean", trainable=False
 63 |         )
 64 |         whitening_operator[name + '_std'] = tf.Variable(
 65 |             np.ones([1, size], np.float32),
 66 |             name=name + "_std", trainable=False
 67 |         )
 68 |         whitening_variable.append(whitening_operator[name + '_mean'])
 69 |         whitening_variable.append(whitening_operator[name + '_std'])
 70 | 
 71 |         # the reset placeholders
 72 |         whitening_operator[name + '_mean_ph'] = tf.placeholder(
 73 |             tf.float32, shape=(1, size), name=name + '_reset_mean_ph'
 74 |         )
 75 |         whitening_operator[name + '_std_ph'] = tf.placeholder(
 76 |             tf.float32, shape=(1, size), name=name + '_reset_std_ph'
 77 |         )
 78 | 
 79 |         # the tensorflow operators
 80 |         whitening_operator[name + '_mean_op'] = \
 81 |             whitening_operator[name + '_mean'].assign(
 82 |                 whitening_operator[name + '_mean_ph']
 83 |         )
 84 | 
 85 |         whitening_operator[name + '_std_op'] = \
 86 |             whitening_operator[name + '_std'].assign(
 87 |                 whitening_operator[name + '_std_ph']
 88 |         )
 89 | 
 90 | 
 91 | def copy_whitening_var(whitening_stats, input_name, output_name):
 92 |     whitening_stats[output_name] = {}
 93 |     whitening_stats[output_name]['mean'] = whitening_stats[input_name]['mean']
 94 |     whitening_stats[output_name]['std'] = whitening_stats[input_name]['std']
 95 | 
 96 | 
 97 | def set_whitening_var(session, whitening_operator, whitening_stats, key_list):
 98 | 
 99 |     for i_key in key_list:
100 |         for i_item in ['mean', 'std']:
101 |             session.run(
102 |                 whitening_operator[i_key + '_' + i_item + '_op'],
103 |                 feed_dict={whitening_operator[i_key + '_' + i_item + '_ph']:
104 |                            np.reshape(whitening_stats[i_key][i_item], [1, -1])}
105 |             )
106 | 
107 | 
108 | def append_normalized_data_dict(data_dict, whitening_stats,
109 |                                 target=['start_state', 'diff_state',
110 |                                         'end_state']):
111 |     data_dict['n_start_state'] = \
112 |         (data_dict['start_state'] - whitening_stats['state']['mean']) / \
113 |         whitening_stats['state']['std']
114 |     data_dict['n_end_state'] = \
115 |         (data_dict['end_state'] - whitening_stats['state']['mean']) / \
116 |         whitening_stats['state']['std']
117 |     data_dict['n_diff_state'] = \
118 |         (data_dict['end_state'] - data_dict['start_state'] -
119 |          whitening_stats['diff_state']['mean']) / \
120 |         whitening_stats['diff_state']['std']
121 |     data_dict['diff_state'] = \
122 |         data_dict['end_state'] - data_dict['start_state']
123 | 


--------------------------------------------------------------------------------
/dmbrl/misc/optimizers/random.py:
--------------------------------------------------------------------------------
 1 | from __future__ import division
 2 | from __future__ import absolute_import
 3 | from __future__ import print_function
 4 | 
 5 | import numpy as np
 6 | import tensorflow as tf
 7 | 
 8 | from .optimizer import Optimizer
 9 | 
10 | 
11 | class RandomOptimizer(Optimizer):
12 | 
13 |     def __init__(self, sol_dim, popsize, tf_session,
14 |                  upper_bound=None, lower_bound=None, params=None):
15 |         """Creates an instance of this class.
16 | 
17 |         Arguments:
18 |             sol_dim (int): The dimensionality of the problem space
19 |             popsize (int): The number of candidate solutions to be sampled at every iteration
20 |             num_elites (int): The number of top solutions that will be used to obtain the distribution
21 |                 at the next iteration.
22 |             tf_session (tf.Session): (optional) Session to be used for this optimizer. Defaults to None,
23 |                 in which case any functions passed in cannot be tf.Tensor-valued.
24 |             upper_bound (np.array): An array of upper bounds
25 |             lower_bound (np.array): An array of lower bounds
26 |         """
27 |         super().__init__()
28 |         self.sol_dim = sol_dim
29 |         self.popsize = popsize
30 |         self.ub, self.lb = upper_bound, lower_bound
31 |         self.tf_sess = tf_session
32 |         self.solution = None
33 |         self.tf_compatible, self.cost_function = None, None
34 | 
35 |     def setup(self, cost_function, tf_compatible):
36 |         """Sets up this optimizer using a given cost function.
37 | 
38 |         Arguments:
39 |             cost_function (func): A function for computing costs over a batch of candidate solutions.
40 |             tf_compatible (bool): True if the cost function provided is tf.Tensor-valued.
41 | 
42 |         Returns: None
43 |         """
44 |         if tf_compatible and self.tf_sess is None:
45 |             raise RuntimeError("Cannot pass in a tf.Tensor-valued cost function without passing in a TensorFlow "
46 |                                "session into the constructor")
47 | 
48 |         if not tf_compatible:
49 |             self.tf_compatible = False
50 |             self.cost_function = cost_function
51 |         else:
52 |             with self.tf_sess.graph.as_default():
53 |                 self.tf_compatible = True
54 |                 solutions = tf.random_uniform([self.popsize, self.sol_dim], self.ub, self.lb)
55 |                 costs = cost_function(solutions)
56 |                 self.solution = solutions[tf.cast(tf.argmin(costs), tf.int32)]
57 | 
58 |     def reset(self):
59 |         pass
60 | 
61 |     def obtain_solution(self, init_mean, init_var, per, dU, obs=None):
62 |         """Optimizes the cost function provided in setup().
63 | 
64 |         Arguments:
65 |             init_mean (np.ndarray): The mean of the initial candidate distribution.
66 |             init_var (np.ndarray): The variance of the initial candidate distribution.
67 |         """
68 |         if self.tf_compatible:
69 |             sol = self.tf_sess.run(self.solution)
70 |             return sol, self.update_prev_sol(per, dU, sol)
71 |         else:
72 |             solutions = np.random.uniform(self.lb, self.ub, [self.popsize, self.sol_dim])
73 |             costs = self.cost_function(solutions)
74 |             return solutions[np.argmin(costs)], \
75 |                 self.update_prev_sol(solutions[np.argmin(costs)], per, dU)
76 | 


--------------------------------------------------------------------------------
/dmbrl/modeling/layers/__init__.py:
--------------------------------------------------------------------------------
1 | from .FC import FC


--------------------------------------------------------------------------------
/dmbrl/modeling/models/GT_dynamics.py:
--------------------------------------------------------------------------------
  1 | from __future__ import division
  2 | from __future__ import print_function
  3 | from __future__ import absolute_import
  4 | 
  5 | 
  6 | import numpy as np
  7 | 
  8 | 
  9 | def none_constructor(model_init_cfg, misc=None):
 10 |     return GT(None)
 11 | 
 12 | 
 13 | def compile_cost(init_obs, ac_seqs, cfg, gt_dynamics, numpy_reward_function,
 14 |                  traj_id=0, cem_type=None, tf_data_dict=None):
 15 |     assert cem_type is None
 16 |     assert tf_data_dict is None
 17 | 
 18 |     t, nopt = 0, ac_seqs.shape[0]
 19 |     init_costs = np.zeros([nopt, 1])
 20 |     ac_seqs = np.reshape(ac_seqs, [-1, cfg['plan_hor'], cfg['dU']])
 21 |     ac_seqs = np.transpose(ac_seqs, [1, 0, 2])
 22 |     init_obs = np.tile(init_obs[None], [nopt, 1])
 23 |     cur_obs = init_obs
 24 |     total_cost = init_costs
 25 | 
 26 |     expert_obs = gt_dynamics.expert_obs(traj_id)
 27 |     timestep_left = int(len(expert_obs) - init_obs[0, -1] - 1)
 28 | 
 29 |     plan_depth = min(cfg['plan_hor'], timestep_left)
 30 | 
 31 |     for i_iter in range(plan_depth):
 32 |         cur_acs = ac_seqs[t]
 33 |         next_obs, _ = gt_dynamics.predict(cur_obs, cur_acs)
 34 | 
 35 |         '''
 36 |         if i_iter == plan_depth - 1:
 37 |             delta_cost = -numpy_reward_function(next_obs, cur_acs, expert_obs)
 38 |             total_cost += delta_cost.reshape(total_cost.shape)
 39 |         else:
 40 |             delta_cost = 0.0
 41 |         '''
 42 |         delta_cost = -numpy_reward_function(next_obs, cur_acs, expert_obs)
 43 |         total_cost += delta_cost.reshape(total_cost.shape)
 44 |         cur_obs = next_obs
 45 | 
 46 |     return total_cost
 47 | 
 48 | 
 49 | class GT:
 50 |     """ @brief: groundtruth dynamics
 51 |     """
 52 | 
 53 |     def __init__(self, params):
 54 |         """Initializes a class instance.
 55 | 
 56 |         Arguments:
 57 |             params (DotMap): A dotmap of model parameters.
 58 |                 .name (str): Model name, used for logging/use in variable scopes.
 59 |                     Warning: Models with the same name will overwrite each other.
 60 |                 .num_networks (int): (optional) The number of networks in the ensemble. Defaults to 1.
 61 |                     Ignored if model is being loaded.
 62 |                 .model_dir (str/None): (optional) Path to directory from which model will be loaded, and
 63 |                     saved by default. Defaults to None.
 64 |                 .load_model (bool): (optional) If True, model will be loaded from the model directory,
 65 |                     assuming that the files are generated by a model of the same name. Defaults to False.
 66 |                 .sess (tf.Session/None): The session that this model will use.
 67 |                     If None, creates a session with its own associated graph. Defaults to None.
 68 |         """
 69 |         # Instance variables
 70 |         self.finalized = False
 71 |         self.layers, self.decays, self.optvars, self.nonoptvars = [], [], [], []
 72 |         self.scaler = None
 73 | 
 74 |         # Training objects
 75 |         self.optimizer = None
 76 |         self.sy_train_in, self.sy_train_targ = None, None
 77 |         self.train_op, self.mse_loss = None, None
 78 | 
 79 |         # Prediction objects
 80 |         self.sy_pred_in2d, self.sy_pred_mean2d_fac = None, None
 81 |         self.sy_pred_mean2d, self.sy_pred_var2d = None, None
 82 |         self.sy_pred_in3d, self.sy_pred_mean3d_fac = None, None
 83 |         self.num_nets = 1
 84 | 
 85 |         # the groundtruth dynamics environment
 86 |         if params is not None:
 87 |             self.name = 'non_tensorflow'
 88 |             self.model_dir = params.get('model_dir', None)
 89 | 
 90 |             self._misc_args = params.misc
 91 |             misc_info = {'reset_type': 'gym', 'groundtruth_model': True,
 92 |                          'expert_amc_dir': params.il_cfg.expert_amc_dir,
 93 |                          'add_timestep_into_ob': True}
 94 | 
 95 |             # TODO:
 96 |             from dmbrl.env import im_dmhumanoid
 97 |             self._dynamics_env = im_dmhumanoid.IMDMHumanoid(
 98 |                 'cmu-humanoid-imitation', 1234, misc_info
 99 |             )
100 |             self._numpy_reward_function = im_dmhumanoid.numpy_reward_function
101 |             self._dynamics_env.reset()
102 | 
103 |     def expert_obs(self, traj_id):
104 |         return self._dynamics_env.expert_obs(traj_id)
105 | 
106 |     @property
107 |     def is_probabilistic(self):
108 |         return True if self.num_nets > 1 else False
109 | 
110 |     @property
111 |     def is_tf_model(self):
112 |         return False
113 | 
114 |     @property
115 |     def sess(self):
116 |         return None
117 | 
118 |     ###################################
119 |     # Network Structure Setup Methods #
120 |     ###################################
121 | 
122 |     def add(self, layer):
123 |         pass
124 | 
125 |     def pop(self):
126 |         pass
127 | 
128 |     def finalize(self, optimizer, optimizer_args=None, *args, **kwargs):
129 |         self.finalized = True
130 | 
131 |     #################
132 |     # Model Methods #
133 |     #################
134 | 
135 |     def train(self, inputs, targets, batch_size=32, epochs=100,
136 |               hide_progress=False, holdout_ratio=0.0, max_logging=5000):
137 |         pass
138 | 
139 |     def predict(self, observations, actions):
140 |         num_data = observations.shape[0]
141 |         end_state = []
142 |         for i_data in range(num_data):
143 |             i_end_state = self._dynamics_env.fdynamics(
144 |                 {'start_state': observations[i_data], 'action': actions[i_data]}
145 |             )
146 |             end_state.append(i_end_state)
147 |         return np.array(end_state), None
148 | 
149 |     def save(self, savedir=None):
150 |         pass
151 | 
152 |     def _load_structure(self):
153 |         pass
154 | 
155 |     #######################
156 |     # Compilation methods #
157 |     #######################
158 | 
159 |     def _compile_outputs(self, inputs):
160 |         return None
161 | 
162 |     def _compile_losses(self, inputs, targets):
163 |         return None
164 | 


--------------------------------------------------------------------------------
/dmbrl/modeling/models/TFGP.py:
--------------------------------------------------------------------------------
  1 | from __future__ import division
  2 | from __future__ import print_function
  3 | from __future__ import absolute_import
  4 | 
  5 | 
  6 | import tensorflow as tf
  7 | import numpy as np
  8 | import gpflow
  9 | 
 10 | from dmbrl.misc.DotmapUtils import get_required_argument
 11 | from dmbrl.misc import logger
 12 | 
 13 | 
 14 | class TFGP:
 15 |     def __init__(self, params):
 16 |         """Initializes class instance.
 17 | 
 18 |         Arguments:
 19 |             params
 20 |                 .name (str): Model name
 21 |                 .kernel_class (class): Kernel class
 22 |                 .kernel_args (args): Kernel args
 23 |                 .num_inducing_points (int): Number of inducing points
 24 |                 .sess (tf.Session): Tensorflow session
 25 |         """
 26 |         self.name = params.get("name", "GP")
 27 |         self.kernel_class = get_required_argument(params, "kernel_class", "Must provide kernel class.")
 28 |         self.kernel_args = params.get("kernel_args", {})
 29 |         self.num_inducing_points = get_required_argument(
 30 |             params, "num_inducing_points", "Must provide number of inducing points."
 31 |         )
 32 | 
 33 |         if params.get("sess", None) is None:
 34 |             config = tf.ConfigProto()
 35 |             config.gpu_options.allow_growth = True
 36 |             self._sess = tf.Session(config=config)
 37 |         else:
 38 |             self._sess = params.get("sess")
 39 | 
 40 |         with self._sess.as_default():
 41 |             with tf.variable_scope(self.name):
 42 |                 output_dim = self.kernel_args["output_dim"]
 43 |                 del self.kernel_args["output_dim"]
 44 |                 self.model = gpflow.models.SGPR(
 45 |                     np.zeros([1, self.kernel_args["input_dim"]]),
 46 |                     np.zeros([1, output_dim]),
 47 |                     kern=self.kernel_class(**self.kernel_args),
 48 |                     Z=np.zeros([self.num_inducing_points, self.kernel_args["input_dim"]])
 49 |                 )
 50 |                 self.model.initialize()
 51 | 
 52 |     @property
 53 |     def is_probabilistic(self):
 54 |         return True
 55 | 
 56 |     @property
 57 |     def sess(self):
 58 |         return self._sess
 59 | 
 60 |     @property
 61 |     def is_tf_model(self):
 62 |         return True
 63 | 
 64 |     def train(self, inputs, targets,
 65 |               *args, **kwargs):
 66 |         """Optimizes the parameters of the internal GP model.
 67 | 
 68 |         Arguments:
 69 |             inputs: (np.ndarray) An array of inputs.
 70 |             targets: (np.ndarray) An array of targets.
 71 |             num_restarts: (int) The number of times that the optimization of
 72 |                 the GP will be restarted to obtain a good set of parameters.
 73 | 
 74 |         Returns: None.
 75 |         """
 76 |         perm = np.random.permutation(inputs.shape[0])
 77 |         inputs, targets = inputs[perm], targets[perm]
 78 |         Z = np.copy(inputs[:self.num_inducing_points])
 79 |         if Z.shape[0] < self.num_inducing_points:
 80 |             Z = np.concatenate([Z, np.zeros([self.num_inducing_points - Z.shape[0], Z.shape[1]])])
 81 |         self.model.X = inputs
 82 |         self.model.Y = targets
 83 |         self.model.feature.Z = Z
 84 |         with self.sess.as_default():
 85 |             self.model.compile()
 86 |             logger.info("Optimizing model... ", end="")
 87 |             gpflow.train.ScipyOptimizer().minimize(self.model)
 88 |             logger.info("Done.")
 89 | 
 90 |     def predict(self, inputs, *args, **kwargs):
 91 |         """Returns the predictions of this model on inputs.
 92 | 
 93 |         Arguments:
 94 |             inputs: (np.ndarray) The inputs on which predictions will be returned.
 95 |             ign_var: (bool) If True, only returns the mean prediction
 96 | 
 97 |         Returns: (np.ndarrays) The mean and variance of the model on the new points.
 98 |         """
 99 |         if self.model is None:
100 |             raise RuntimeError("Cannot make predictions without initial batch of data.")
101 | 
102 |         with self.sess.as_default():
103 |             mean, var = self.model.predict_y(inputs)
104 |             return mean, var
105 | 
106 |     def create_prediction_tensors(self, inputs, *args, **kwargs):
107 |         ""
108 |         if self.model is None:
109 |             raise RuntimeError("Cannot make predictions without initial batch of data.")
110 | 
111 |         inputs = tf.cast(inputs, tf.float64)
112 |         mean, var = self.model._build_predict(inputs, full_cov=False)
113 |         return tf.cast(mean, dtype=tf.float32), tf.cast(var, tf.float32)
114 | 
115 |     def save(self, *args, **kwargs):
116 |         pass
117 | 


--------------------------------------------------------------------------------
/dmbrl/modeling/models/__init__.py:
--------------------------------------------------------------------------------
1 | from .BNN import BNN
2 | from .NN import NN
3 | from .TFGP import TFGP
4 | 


--------------------------------------------------------------------------------
/dmbrl/modeling/utils/TensorStandardScaler.py:
--------------------------------------------------------------------------------
 1 | from __future__ import division
 2 | from __future__ import print_function
 3 | from __future__ import absolute_import
 4 | 
 5 | import tensorflow as tf
 6 | import numpy as np
 7 | 
 8 | 
 9 | class TensorStandardScaler:
10 |     """Helper class for automatically normalizing inputs into the network.
11 |     """
12 |     def __init__(self, x_dim):
13 |         """Initializes a scaler.
14 | 
15 |         Arguments:
16 |         x_dim (int): The dimensionality of the inputs into the scaler.
17 | 
18 |         Returns: None.
19 |         """
20 |         self.fitted = False
21 |         with tf.variable_scope("Scaler"):
22 |             self.mu = tf.get_variable(
23 |                 name="scaler_mu", shape=[1, x_dim], initializer=tf.constant_initializer(0.0),
24 |                 trainable=False
25 |             )
26 |             self.sigma = tf.get_variable(
27 |                 name="scaler_std", shape=[1, x_dim], initializer=tf.constant_initializer(1.0),
28 |                 trainable=False
29 |             )
30 | 
31 |         self.cached_mu, self.cached_sigma = np.zeros([0, x_dim]), np.ones([1, x_dim])
32 | 
33 |     def fit(self, data):
34 |         """Runs two ops, one for assigning the mean of the data to the internal mean, and
35 |         another for assigning the standard deviation of the data to the internal standard deviation.
36 |         This function must be called within a 'with <session>.as_default()' block.
37 | 
38 |         Arguments:
39 |         data (np.ndarray): A numpy array containing the input
40 | 
41 |         Returns: None.
42 |         """
43 |         mu = np.mean(data, axis=0, keepdims=True)
44 |         sigma = np.std(data, axis=0, keepdims=True)
45 |         sigma[sigma < 1e-12] = 1.0
46 | 
47 |         self.mu.load(mu)
48 |         self.sigma.load(sigma)
49 |         self.fitted = True
50 |         self.cache()
51 | 
52 |     def transform(self, data):
53 |         """Transforms the input matrix data using the parameters of this scaler.
54 | 
55 |         Arguments:
56 |         data (np.array): A numpy array containing the points to be transformed.
57 | 
58 |         Returns: (np.array) The transformed dataset.
59 |         """
60 |         return (data - self.mu) / self.sigma
61 | 
62 |     def inverse_transform(self, data):
63 |         """Undoes the transformation performed by this scaler.
64 | 
65 |         Arguments:
66 |         data (np.array): A numpy array containing the points to be transformed.
67 | 
68 |         Returns: (np.array) The transformed dataset.
69 |         """
70 |         return self.sigma * data + self.mu
71 | 
72 |     def get_vars(self):
73 |         """Returns a list of variables managed by this object.
74 | 
75 |         Returns: (list<tf.Variable>) The list of variables.
76 |         """
77 |         return [self.mu, self.sigma]
78 | 
79 |     def cache(self):
80 |         """Caches current values of this scaler.
81 | 
82 |         Returns: None.
83 |         """
84 |         self.cached_mu = self.mu.eval()
85 |         self.cached_sigma = self.sigma.eval()
86 | 
87 |     def load_cache(self):
88 |         """Loads values from the cache
89 | 
90 |         Returns: None.
91 |         """
92 |         self.mu.load(self.cached_mu)
93 |         self.sigma.load(self.cached_sigma)
94 | 


--------------------------------------------------------------------------------
/dmbrl/modeling/utils/__init__.py:
--------------------------------------------------------------------------------
1 | from .TensorStandardScaler import TensorStandardScaler


--------------------------------------------------------------------------------
/img/curve.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WilsonWangTHU/POPLIN/edd8dba50f9049c6164eda774602bef0c299cb51/img/curve.png


--------------------------------------------------------------------------------
/img/policy_control.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WilsonWangTHU/POPLIN/edd8dba50f9049c6164eda774602bef0c299cb51/img/policy_control.png


--------------------------------------------------------------------------------
/img/reward.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WilsonWangTHU/POPLIN/edd8dba50f9049c6164eda774602bef0c299cb51/img/reward.png


--------------------------------------------------------------------------------
/img/table.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WilsonWangTHU/POPLIN/edd8dba50f9049c6164eda774602bef0c299cb51/img/table.png


--------------------------------------------------------------------------------
/mbexp.py:
--------------------------------------------------------------------------------
 1 | from __future__ import division
 2 | from __future__ import print_function
 3 | from __future__ import absolute_import
 4 | 
 5 | import os
 6 | import argparse
 7 | import pprint
 8 | import copy
 9 | 
10 | from dotmap import DotMap
11 | 
12 | from dmbrl.misc.MBExp import MBExperiment
13 | from dmbrl.controllers.MPC import MPC
14 | from dmbrl.config import create_config
15 | from dmbrl.misc import logger
16 | 
17 | 
18 | def main(env, ctrl_type, ctrl_args, overrides, logdir, args):
19 |     ctrl_args = DotMap(**{key: val for (key, val) in ctrl_args})
20 |     cfg = create_config(env, ctrl_type, ctrl_args, overrides, logdir)
21 |     logger.info('\n' + pprint.pformat(cfg))
22 | 
23 |     # add the part of popsize
24 |     if ctrl_type == "MPC":
25 |         cfg.exp_cfg.exp_cfg.policy = MPC(cfg.ctrl_cfg)
26 | 
27 |     cfg.exp_cfg.misc = copy.copy(cfg)
28 |     exp = MBExperiment(cfg.exp_cfg)
29 | 
30 |     if not os.path.exists(exp.logdir):
31 |         os.makedirs(exp.logdir)
32 |     with open(os.path.join(exp.logdir, "config.txt"), "w") as f:
33 |         f.write(pprint.pformat(cfg.toDict()))
34 | 
35 |     exp.run_experiment()
36 | 
37 | 
38 | if __name__ == "__main__":
39 |     parser = argparse.ArgumentParser()
40 |     parser.add_argument('-env', type=str, required=True,
41 |                         help='Environment name: select from [cartpole, reacher, pusher, halfcheetah]')
42 |     parser.add_argument('-ca', '--ctrl_arg', action='append', nargs=2, default=[],
43 |                         help='Controller arguments, see https://github.com/kchua/handful-of-trials#controller-arguments')
44 |     parser.add_argument('-o', '--override', action='append', nargs=2, default=[],
45 |                         help='Override default parameters, see https://github.com/kchua/handful-of-trials#overrides')
46 |     parser.add_argument('-logdir', type=str, default='log',
47 |                         help='Directory to which results will be logged (default: ./log)')
48 |     parser.add_argument('-e_popsize', type=int, default=500,
49 |                         help='different popsize to use')
50 |     args = parser.parse_args()
51 | 
52 |     main(args.env, "MPC", args.ctrl_arg, args.override, args.logdir, args)
53 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | dotmap==1.2.20
 2 | future==0.16.0
 3 | gpflow
 4 | gym==0.9.4
 5 | mujoco-py==0.5.7
 6 | numpy==1.14.0
 7 | scipy==0.19.0
 8 | tensorflow-gpu==1.9.0
 9 | tqdm==4.19.4
10 | termcolor
11 | 


--------------------------------------------------------------------------------
/scripts/mbexp.py:
--------------------------------------------------------------------------------
 1 | from __future__ import division
 2 | from __future__ import print_function
 3 | from __future__ import absolute_import
 4 | 
 5 | import os
 6 | import argparse
 7 | import pprint
 8 | import copy
 9 | 
10 | from dotmap import DotMap
11 | 
12 | from dmbrl.misc.MBExp import MBExperiment
13 | from dmbrl.controllers.MPC import MPC
14 | from dmbrl.config import create_config
15 | from dmbrl.misc import logger
16 | 
17 | 
18 | def main(env, ctrl_type, ctrl_args, overrides, logdir, args):
19 |     ctrl_args = DotMap(**{key: val for (key, val) in ctrl_args})
20 |     cfg = create_config(env, ctrl_type, ctrl_args, overrides, logdir)
21 |     logger.info('\n' + pprint.pformat(cfg))
22 | 
23 |     # add the part of popsize
24 |     if ctrl_type == "MPC":
25 |         cfg.exp_cfg.exp_cfg.policy = MPC(cfg.ctrl_cfg)
26 | 
27 |     cfg.exp_cfg.misc = copy.copy(cfg)
28 |     exp = MBExperiment(cfg.exp_cfg)
29 | 
30 |     if not os.path.exists(exp.logdir):
31 |         os.makedirs(exp.logdir)
32 |     with open(os.path.join(exp.logdir, "config.txt"), "w") as f:
33 |         f.write(pprint.pformat(cfg.toDict()))
34 | 
35 |     exp.run_experiment()
36 | 
37 | 
38 | if __name__ == "__main__":
39 |     parser = argparse.ArgumentParser()
40 |     parser.add_argument('-env', type=str, required=True,
41 |                         help='Environment name: select from [cartpole, reacher, pusher, halfcheetah]')
42 |     parser.add_argument('-ca', '--ctrl_arg', action='append', nargs=2, default=[],
43 |                         help='Controller arguments, see https://github.com/kchua/handful-of-trials#controller-arguments')
44 |     parser.add_argument('-o', '--override', action='append', nargs=2, default=[],
45 |                         help='Override default parameters, see https://github.com/kchua/handful-of-trials#overrides')
46 |     parser.add_argument('-logdir', type=str, default='log',
47 |                         help='Directory to which results will be logged (default: ./log)')
48 |     parser.add_argument('-e_popsize', type=int, default=500,
49 |                         help='different popsize to use')
50 |     args = parser.parse_args()
51 | 
52 |     main(args.env, "MPC", args.ctrl_arg, args.override, args.logdir, args)
53 | 


--------------------------------------------------------------------------------
/scripts/render.py:
--------------------------------------------------------------------------------
 1 | from __future__ import division
 2 | from __future__ import print_function
 3 | from __future__ import absolute_import
 4 | 
 5 | import os
 6 | import argparse
 7 | import pprint
 8 | 
 9 | from dotmap import DotMap
10 | 
11 | from dmbrl.misc.MBExp import MBExperiment
12 | from dmbrl.controllers.MPC import MPC
13 | from dmbrl.config import create_config
14 | 
15 | 
16 | def main(env, ctrl_type, ctrl_args, overrides, model_dir, logdir):
17 |     ctrl_args = DotMap(**{key: val for (key, val) in ctrl_args})
18 | 
19 |     overrides.append(["ctrl_cfg.prop_cfg.model_init_cfg.model_dir", model_dir])
20 |     overrides.append(["ctrl_cfg.prop_cfg.model_init_cfg.load_model", "True"])
21 |     overrides.append(["ctrl_cfg.prop_cfg.model_pretrained", "True"])
22 |     overrides.append(["exp_cfg.exp_cfg.ninit_rollouts", "0"])
23 |     overrides.append(["exp_cfg.exp_cfg.ntrain_iters", "1"])
24 |     overrides.append(["exp_cfg.log_cfg.nrecord", "1"])
25 | 
26 |     cfg = create_config(env, ctrl_type, ctrl_args, overrides, logdir)
27 |     cfg.pprint()
28 | 
29 |     if ctrl_type == "MPC":
30 |         cfg.exp_cfg.exp_cfg.policy = MPC(cfg.ctrl_cfg)
31 |     exp = MBExperiment(cfg.exp_cfg)
32 | 
33 |     os.makedirs(exp.logdir)
34 |     with open(os.path.join(exp.logdir, "config.txt"), "w") as f:
35 |         f.write(pprint.pformat(cfg.toDict()))
36 | 
37 |     exp.run_experiment()
38 | 
39 | 
40 | if __name__ == "__main__":
41 |     parser = argparse.ArgumentParser()
42 |     parser.add_argument('-env', type=str, required=True)
43 |     parser.add_argument('-ca', '--ctrl_arg', action='append', nargs=2, default=[])
44 |     parser.add_argument('-o', '--override', action='append', nargs=2, default=[])
45 |     parser.add_argument('-model-dir', type=str, required=True)
46 |     parser.add_argument('-logdir', type=str, required=True)
47 |     args = parser.parse_args()
48 | 
49 |     main(args.env, "MPC", args.ctrl_arg, args.override, args.model_dir, args.logdir)
50 | 


--------------------------------------------------------------------------------
/show_result.py:
--------------------------------------------------------------------------------
 1 | import glob
 2 | from scipy.io import loadmat
 3 | import matplotlib.pyplot as plt
 4 | import numpy as np
 5 | 
 6 | file_list = glob.glob('./log/*/*/logs.mat')
 7 | file_list = [name for name in file_list if 'old' not in name]
 8 | file_list = [name for name in file_list if '2500' in name]
 9 | legend_lable = []
10 | 
11 | colormap = plt.cm.gist_ncar
12 | # plt.gca().set_color_cycle([colormap(i) for i in np.linspace(0, 0.9, len(file_list))])
13 | 
14 | for name in file_list:
15 |     returns = loadmat(name)['returns']
16 |     print(name + '\n')
17 |     print(returns)
18 |     print('\n\n')
19 |     # import pdb; pdb.set_trace()
20 |     plt.plot(returns.reshape([-1]))
21 |     legend_lable.append(name.split('/')[2])
22 | 
23 | plt.legend(legend_lable)
24 | plt.show()
25 | 


--------------------------------------------------------------------------------
/show_with_test_result.py:
--------------------------------------------------------------------------------
 1 | import glob
 2 | from scipy.io import loadmat
 3 | import matplotlib.pyplot as plt
 4 | 
 5 | file_list = glob.glob('./log/*/*/logs.mat')
 6 | # file_list = [name for name in file_list if 'WRA' in name]
 7 | file_list = [name for name in file_list if 'GAN-I' in name]
 8 | # file_list = [name for name in file_list if 'R_0.1__' in name]
 9 | # mode = 'full'  # full, all
10 | mode = 'test'  # full, all
11 | legend_lable = []
12 | 
13 | colormap = plt.cm.gist_ncar
14 | # plt.gca().set_color_cycle([colormap(i) for i in np.linspace(0, 0.9, len(file_list))])
15 | 
16 | for name in file_list:
17 |     returns = loadmat(name)['test_returns']
18 |     print(name + '\n')
19 |     print(returns)
20 |     print('\n\n')
21 |     # import pdb; pdb.set_trace()
22 |     if mode in ['test', 'all']:
23 |         plt.plot(returns.reshape([-1]))
24 |         legend_lable.append('test_' + name.split('/')[2])
25 | 
26 |     returns = loadmat(name)['returns']
27 |     if mode in ['full', 'all']:
28 |         plt.plot(returns.reshape([-1]))
29 |         legend_lable.append('full_' + name.split('/')[2])
30 | 
31 | plt.legend(legend_lable)
32 | plt.show()
33 | 


--------------------------------------------------------------------------------