├── .gitignore
├── Dockerfile
├── README.md
├── __init__.py
├── demo_scripts
├── PETS.sh
├── POPLINA_INIT.sh
├── POPLINA_REPLAN.sh
├── POPLINP_AVG.sh
├── POPLINP_BC.sh
└── POPLINP_UNI.sh
├── dmbrl
├── __init__.py
├── config
│ ├── __init__.py
│ ├── default.py
│ ├── gym_acrobot.py
│ ├── gym_ant.py
│ ├── gym_cartpole.py
│ ├── gym_cheetah.py
│ ├── gym_fhopper.py
│ ├── gym_fswimmer.py
│ ├── gym_hopper.py
│ ├── gym_invertedPendulum.py
│ ├── gym_pendulum.py
│ ├── gym_reacher.py
│ ├── gym_swimmer.py
│ ├── gym_walker2d.py
│ ├── halfcheetah.py
│ ├── pusher.py
│ ├── reacher.py
│ ├── reward_util.py
│ ├── template.py
│ └── view_humanoid.py
├── controllers
│ ├── Controller.py
│ ├── MPC.py
│ └── __init__.py
├── env
│ ├── __init__.py
│ ├── assets
│ │ ├── cartpole.xml
│ │ ├── half_cheetah.xml
│ │ ├── pusher.xml
│ │ └── reacher3d.xml
│ ├── cartpole.py
│ ├── half_cheetah.py
│ ├── pusher.py
│ └── reacher.py
├── misc
│ ├── Agent.py
│ ├── DotmapUtils.py
│ ├── MBExp.py
│ ├── __init__.py
│ ├── logger.py
│ └── optimizers
│ │ ├── POPLIN_A.py
│ │ ├── POPLIN_P.py
│ │ ├── __init__.py
│ │ ├── cem.py
│ │ ├── gbp_cem.py
│ │ ├── gbp_rs.py
│ │ ├── optimizer.py
│ │ ├── pgcem.py
│ │ ├── policy_network
│ │ ├── BC_A_policy.py
│ │ ├── BC_WA_policy.py
│ │ ├── BC_WD_policy.py
│ │ ├── __init__.py
│ │ ├── base_policy.py
│ │ ├── gan_policy.py
│ │ ├── gmm_policy.py
│ │ ├── gmm_util.py
│ │ ├── tf_networks.py
│ │ ├── tf_norm.py
│ │ ├── tf_utils.py
│ │ ├── wgan_policy.py
│ │ └── whitening_util.py
│ │ └── random.py
└── modeling
│ ├── layers
│ ├── FC.py
│ └── __init__.py
│ ├── models
│ ├── BNN.py
│ ├── GT_dynamics.py
│ ├── NN.py
│ ├── TFGP.py
│ └── __init__.py
│ └── utils
│ ├── TensorStandardScaler.py
│ └── __init__.py
├── img
├── curve.png
├── policy_control.png
├── reward.png
└── table.png
├── mbexp.py
├── requirements.txt
├── scripts
├── mbexp.py
└── render.py
├── show_result.py
└── show_with_test_result.py
/.gitignore:
--------------------------------------------------------------------------------
1 | .idea
2 | tags
3 | *.pyc
4 | log/
5 | *.swp
6 | *.swo
7 |
--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM nvidia/cuda:9.0-cudnn7-runtime-ubuntu16.04
2 |
3 | RUN rm /bin/sh && ln -s /bin/bash /bin/sh
4 |
5 | # Install pip
6 | RUN apt-get update
7 | RUN apt-get -y install python3 python3-pip python3-dev python3-tk
8 | RUN apt-get -y install libglu1-mesa libxi-dev libxmu-dev libglu1-mesa-dev
9 |
10 | # Install basic libraries
11 | RUN pip3 install --upgrade pip
12 | RUN pip3 install numpy tensorflow-gpu==1.9 matplotlib scipy scikit-learn future
13 |
14 | # Install MuJoCo + OpenAI gym
15 | RUN pip3 install gym==0.9.4
16 | RUN apt-get update
17 | RUN apt-get -y install unzip unetbootin wget
18 | RUN mkdir -p /.mujoco && cd /.mujoco && wget https://www.roboti.us/download/mjpro131_linux.zip && unzip mjpro131_linux.zip
19 | ENV MUJOCO_PY_MJKEY_PATH="/root/.mujoco/mjkey.txt"
20 | ENV MUJOCO_PY_MJPRO_PATH="/root/.mujoco/mjpro131"
21 | RUN pip3 install mujoco-py==0.5.7
22 |
23 | # Install additional requirements
24 | RUN pip3 install datetime gitpython h5py tqdm dotmap cython
25 |
26 | # GPFlow
27 | RUN apt-get -y install git
28 | RUN git clone https://github.com/GPflow/GPflow.git
29 | RUN pip3 install pandas multipledispatch pytest
30 | RUN cd GPflow/ && pip install . --no-deps
31 |
32 | # Create copy of Deep MBRL repo and place in ~/handful-of-trials
33 | RUN cd ~ && git clone https://github.com/kchua/handful-of-trials.git
34 |
35 | # Environment setup
36 | RUN echo 'LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/lib/x86_64-linux-gnu' >> /root/.bashrc
37 | RUN echo 'alias python=python3' >> /root/.bashrc
38 |
39 | CMD /bin/bash
40 |
--------------------------------------------------------------------------------
/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WilsonWangTHU/POPLIN/edd8dba50f9049c6164eda774602bef0c299cb51/__init__.py
--------------------------------------------------------------------------------
/demo_scripts/PETS.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | python mbexp.py -logdir ./log/PETS \
3 | -env halfcheetah \
4 | -o exp_cfg.exp_cfg.ntrain_iters 50 \
5 | -ca opt-type CEM \
6 | -ca model-type PE \
7 | -ca prop-type E
8 |
--------------------------------------------------------------------------------
/demo_scripts/POPLINA_INIT.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | # The following script will run POPLIN-A with INIT methods on halfcheetah
3 |
4 | python mbexp.py -logdir ./log/POPLIN_A \
5 | -env halfcheetah \
6 | -o exp_cfg.exp_cfg.ntrain_iters 50 \
7 | -o ctrl_cfg.cem_cfg.cem_type POPLINA-INIT \
8 | -o ctrl_cfg.cem_cfg.training_scheme BC-AI \
9 | -o ctrl_cfg.cem_cfg.test_policy 1 \
10 | -ca model-type PE -ca prop-type E \
11 | -ca opt-type POPLIN-A
12 |
--------------------------------------------------------------------------------
/demo_scripts/POPLINA_REPLAN.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | # The following script will run POPLIN-A with REPLAN methods on halfcheetah
3 |
4 | python mbexp.py -logdir ./log/POPLINA_REPLAN \
5 | -env halfcheetah \
6 | -o exp_cfg.exp_cfg.ntrain_iters 50 \
7 | -o ctrl_cfg.cem_cfg.cem_type POPLINA-REPLAN \
8 | -o ctrl_cfg.cem_cfg.training_scheme BC-AI \
9 | -o ctrl_cfg.cem_cfg.test_policy 1 \
10 | -ca model-type PE -ca prop-type E \
11 | -ca opt-type POPLIN-A
12 |
--------------------------------------------------------------------------------
/demo_scripts/POPLINP_AVG.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | # The following script will run POPLIN-P using the AVG training methods on halfcheetah
3 |
4 | python mbexp.py -logdir ./log/POPLINP_AVG -env halfcheetah \
5 | -o exp_cfg.exp_cfg.ntrain_iters 50 \
6 | -o ctrl_cfg.cem_cfg.cem_type POPLINP-SEP \
7 | -o ctrl_cfg.cem_cfg.training_scheme AVG-R \
8 | -o ctrl_cfg.cem_cfg.policy_network_shape [32] \
9 | -o ctrl_cfg.opt_cfg.init_var 0.1 \
10 | -o ctrl_cfg.cem_cfg.test_policy 1 \
11 | -ca model-type PE -ca prop-type E \
12 | -ca opt-type POPLIN-P
13 |
--------------------------------------------------------------------------------
/demo_scripts/POPLINP_BC.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | # The following script will run POPLIN-P using the BC training methods.
3 |
4 | python mbexp.py -logdir ./log/POPLINP_BC -env halfcheetah \
5 | -o exp_cfg.exp_cfg.ntrain_iters 50 \
6 | -o ctrl_cfg.cem_cfg.cem_type POPLINP-SEP \
7 | -o ctrl_cfg.cem_cfg.training_scheme BC-PR \
8 | -o ctrl_cfg.cem_cfg.policy_network_shape [32] \
9 | -o ctrl_cfg.opt_cfg.init_var 0.03 \
10 | -o ctrl_cfg.cem_cfg.test_policy 1 \
11 | -ca model-type PE -ca prop-type E \
12 | -ca opt-type POPLIN-P
13 |
--------------------------------------------------------------------------------
/demo_scripts/POPLINP_UNI.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | python mbexp.py -logdir ./log/POPLINP_UNI -env halfcheetah \
4 | -o exp_cfg.exp_cfg.ntrain_iters 50 \
5 | -o ctrl_cfg.cem_cfg.cem_type POPLINP-UNI\
6 | -o ctrl_cfg.cem_cfg.training_scheme AVG-R \
7 | -o ctrl_cfg.cem_cfg.policy_network_shape [32] \
8 | -o ctrl_cfg.opt_cfg.init_var 0.1 \
9 | -o ctrl_cfg.cem_cfg.test_policy 1 \
10 | -ca model-type PE -ca prop-type E \
11 | -ca opt-type POPLIN-P
12 |
--------------------------------------------------------------------------------
/dmbrl/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WilsonWangTHU/POPLIN/edd8dba50f9049c6164eda774602bef0c299cb51/dmbrl/__init__.py
--------------------------------------------------------------------------------
/dmbrl/config/__init__.py:
--------------------------------------------------------------------------------
1 | from .default import create_config
--------------------------------------------------------------------------------
/dmbrl/config/gym_acrobot.py:
--------------------------------------------------------------------------------
1 | from __future__ import division
2 | from __future__ import print_function
3 | from __future__ import absolute_import
4 |
5 | import numpy as np
6 | import tensorflow as tf
7 | from dotmap import DotMap
8 |
9 | from dmbrl.misc.DotmapUtils import get_required_argument
10 | from dmbrl.modeling.layers import FC
11 | """
12 | Module name,
13 | MODEL_IN, MODEL_OUT,
14 | import env, env_name
15 | """
16 |
17 |
18 | class GymAcrobotConfigModule:
19 | ENV_NAME = "MBRLGYM_acrobot-v0"
20 | TASK_HORIZON = 1000
21 | NTRAIN_ITERS = 300
22 | NROLLOUTS_PER_ITER = 1
23 | PLAN_HOR = 30
24 | INIT_VAR = 0.25
25 | MODEL_IN, MODEL_OUT = 7, 6 # obs -> 6, action -> 1
26 | GP_NINDUCING_POINTS = 300
27 |
28 | def __init__(self):
29 | # self.ENV = gym.make(self.ENV_NAME)
30 | from mbbl.env.gym_env import acrobot
31 | self.ENV = acrobot.env(env_name='gym_acrobot', rand_seed=1234,
32 | misc_info={'reset_type': 'gym'})
33 | cfg = tf.ConfigProto()
34 | cfg.gpu_options.allow_growth = True
35 | self.SESS = tf.Session(config=cfg)
36 | self.NN_TRAIN_CFG = {"epochs": 5}
37 | self.OPT_CFG = {
38 | "Random": {
39 | "popsize": 2500
40 | },
41 | "GBPRandom": {
42 | "popsize": 2500
43 | },
44 | "GBPCEM": {
45 | "popsize": 500,
46 | "num_elites": 50,
47 | "max_iters": 5,
48 | "alpha": 0.1
49 | },
50 | "CEM": {
51 | "popsize": 500,
52 | "num_elites": 50,
53 | "max_iters": 5,
54 | "alpha": 0.1
55 | },
56 | "POPLIN-P": {
57 | "popsize": 500,
58 | "num_elites": 50,
59 | "max_iters": 5,
60 | "alpha": 0.1
61 | },
62 | "POPLIN-A": {
63 | "popsize": 500,
64 | "num_elites": 50,
65 | "max_iters": 5,
66 | "alpha": 0.1
67 | }
68 | }
69 |
70 | @staticmethod
71 | def obs_preproc(obs):
72 | """ @brief: no cheating of the observation function
73 | """
74 | if isinstance(obs, np.ndarray):
75 | return obs
76 | else:
77 | return obs
78 |
79 | @staticmethod
80 | def obs_postproc(obs, pred):
81 | if isinstance(obs, np.ndarray):
82 | return obs + pred
83 | else:
84 | return obs + pred
85 |
86 | @staticmethod
87 | def targ_proc(obs, next_obs):
88 | return next_obs - obs
89 |
90 | @staticmethod
91 | def obs_cost_fn(obs):
92 | """ @brief:
93 |
94 | def reward(data_dict):
95 | def height(obs):
96 | h1 = obs[0] # Height of first arm
97 | h2 = obs[0] * obs[2] - obs[1] * obs[3] # Height of second arm
98 | return -(h1 + h2) # total height
99 |
100 | start_height = height(data_dict['start_state'])
101 |
102 | reward = {
103 | 'gym_acrobot': start_height,
104 | 'gym_acrobot_sparse': (start_height > 1) - 1
105 | }[self._env_name] # gets gt reward based on sparse/dense
106 | return reward
107 | self.reward = reward
108 | """
109 | return obs[:, 0] + obs[:, 0] * obs[:, 2] - obs[:, 1] * obs[:, 3]
110 |
111 | @staticmethod
112 | def ac_cost_fn(acs):
113 | if isinstance(acs, np.ndarray):
114 | return np.sum(np.square(acs), axis=1) * 0.0
115 | else:
116 | return tf.reduce_sum(tf.square(acs), axis=1) * 0.0
117 |
118 | def nn_constructor(self, model_init_cfg, misc=None):
119 | model = get_required_argument(model_init_cfg, "model_class", "Must provide model class")(DotMap(
120 | name="model", num_networks=get_required_argument(model_init_cfg, "num_nets", "Must provide ensemble size"),
121 | sess=self.SESS, load_model=model_init_cfg.get("load_model", False),
122 | model_dir=model_init_cfg.get("model_dir", None),
123 | misc=misc
124 | ))
125 | if not model_init_cfg.get("load_model", False):
126 | model.add(FC(200, input_dim=self.MODEL_IN, activation="swish", weight_decay=0.000025))
127 | model.add(FC(200, activation="swish", weight_decay=0.00005))
128 | model.add(FC(200, activation="swish", weight_decay=0.000075))
129 | model.add(FC(200, activation="swish", weight_decay=0.000075))
130 | model.add(FC(self.MODEL_OUT, weight_decay=0.0001))
131 | model.finalize(tf.train.AdamOptimizer, {"learning_rate": 0.001})
132 | return model
133 |
134 | def gp_constructor(self, model_init_cfg):
135 | model = get_required_argument(model_init_cfg, "model_class", "Must provide model class")(DotMap(
136 | name="model",
137 | kernel_class=get_required_argument(model_init_cfg, "kernel_class", "Must provide kernel class"),
138 | kernel_args=model_init_cfg.get("kernel_args", {}),
139 | num_inducing_points=get_required_argument(
140 | model_init_cfg, "num_inducing_points", "Must provide number of inducing points."
141 | ),
142 | sess=self.SESS
143 | ))
144 | return model
145 |
146 |
147 | CONFIG_MODULE = GymAcrobotConfigModule
148 |
--------------------------------------------------------------------------------
/dmbrl/config/gym_ant.py:
--------------------------------------------------------------------------------
1 | from __future__ import division
2 | from __future__ import print_function
3 | from __future__ import absolute_import
4 |
5 | import numpy as np
6 | import tensorflow as tf
7 | from dotmap import DotMap
8 |
9 | from dmbrl.misc.DotmapUtils import get_required_argument
10 | from dmbrl.modeling.layers import FC
11 |
12 |
13 | class AntConfigModule:
14 | """
15 | @brief: migrate the gym module from the mbbl repo
16 | 'gym_cheetah': {
17 | 'path': 'mbbl.env.gym_env.walker',
18 | 'ob_size': 17, 'action_size': 6, 'max_length': 1000
19 | }
20 | """
21 | ENV_NAME = "MBRLGYM_ANT-v0"
22 | TASK_HORIZON = 1000
23 | NTRAIN_ITERS = 300
24 | NROLLOUTS_PER_ITER = 1
25 | PLAN_HOR = 30
26 | INIT_VAR = 0.25
27 | MODEL_IN, MODEL_OUT = 35, 27 # obs - > 27, action 8
28 | GP_NINDUCING_POINTS = 300
29 |
30 | def __init__(self):
31 | # self.ENV = gym.make(self.ENV_NAME)
32 | from mbbl.env.gym_env import walker
33 | self.ENV = walker.env(env_name='gym_ant', rand_seed=1234,
34 | misc_info={'reset_type': 'gym'})
35 | cfg = tf.ConfigProto()
36 | cfg.gpu_options.allow_growth = True
37 | self.SESS = tf.Session(config=cfg)
38 | self.NN_TRAIN_CFG = {"epochs": 5}
39 | self.OPT_CFG = {
40 | "Random": {
41 | "popsize": 2500
42 | },
43 | "GBPRandom": {
44 | "popsize": 2500
45 | },
46 | "GBPCEM": {
47 | "popsize": 500,
48 | "num_elites": 50,
49 | "max_iters": 5,
50 | "alpha": 0.1
51 | },
52 | "CEM": {
53 | "popsize": 500,
54 | "num_elites": 50,
55 | "max_iters": 5,
56 | "alpha": 0.1
57 | },
58 | "POPLIN-P": {
59 | "popsize": 500,
60 | "num_elites": 50,
61 | "max_iters": 5,
62 | "alpha": 0.1
63 | },
64 | "POPLIN-A": {
65 | "popsize": 500,
66 | "num_elites": 50,
67 | "max_iters": 5,
68 | "alpha": 0.1
69 | }
70 | }
71 |
72 | @staticmethod
73 | def obs_preproc(obs):
74 | """ @brief: no cheating of the observation function
75 | """
76 | if isinstance(obs, np.ndarray):
77 | return obs
78 | else:
79 | return obs
80 |
81 | @staticmethod
82 | def obs_postproc(obs, pred):
83 | if isinstance(obs, np.ndarray):
84 | return obs + pred
85 | else:
86 | return obs + pred
87 |
88 | @staticmethod
89 | def targ_proc(obs, next_obs):
90 | return next_obs - obs
91 |
92 | @staticmethod
93 | def obs_cost_fn(obs):
94 | """ @brief:
95 | see mbbl.env.gym_env.walker.py for reward details
96 | """
97 | if isinstance(obs, np.ndarray):
98 | velocity_cost = -obs[:, 13] # the qvel for the root-x joint
99 | height_cost = 3 * np.square(obs[:, 0] - 0.57) # the height
100 | return velocity_cost + height_cost
101 | else:
102 | velocity_cost = -obs[:, 13] # the qvel for the root-x joint
103 | height_cost = 3 * tf.square(obs[:, 0] - 0.57) # the height
104 | return velocity_cost + height_cost
105 |
106 | @staticmethod
107 | def ac_cost_fn(acs):
108 | if isinstance(acs, np.ndarray):
109 | return 0.1 * np.sum(np.square(acs), axis=1)
110 | else:
111 | return 0.1 * tf.reduce_sum(tf.square(acs), axis=1)
112 |
113 | def nn_constructor(self, model_init_cfg, misc=None):
114 | model = get_required_argument(model_init_cfg, "model_class", "Must provide model class")(DotMap(
115 | name="model", num_networks=get_required_argument(model_init_cfg, "num_nets", "Must provide ensemble size"),
116 | sess=self.SESS, load_model=model_init_cfg.get("load_model", False),
117 | model_dir=model_init_cfg.get("model_dir", None),
118 | misc=misc
119 | ))
120 | if not model_init_cfg.get("load_model", False):
121 | model.add(FC(200, input_dim=self.MODEL_IN, activation="swish", weight_decay=0.000025))
122 | model.add(FC(200, activation="swish", weight_decay=0.00005))
123 | model.add(FC(200, activation="swish", weight_decay=0.000075))
124 | model.add(FC(200, activation="swish", weight_decay=0.000075))
125 | model.add(FC(self.MODEL_OUT, weight_decay=0.0001))
126 | model.finalize(tf.train.AdamOptimizer, {"learning_rate": 0.001})
127 | return model
128 |
129 | def gp_constructor(self, model_init_cfg):
130 | model = get_required_argument(model_init_cfg, "model_class", "Must provide model class")(DotMap(
131 | name="model",
132 | kernel_class=get_required_argument(model_init_cfg, "kernel_class", "Must provide kernel class"),
133 | kernel_args=model_init_cfg.get("kernel_args", {}),
134 | num_inducing_points=get_required_argument(
135 | model_init_cfg, "num_inducing_points", "Must provide number of inducing points."
136 | ),
137 | sess=self.SESS
138 | ))
139 | return model
140 |
141 |
142 | CONFIG_MODULE = AntConfigModule
143 |
--------------------------------------------------------------------------------
/dmbrl/config/gym_cartpole.py:
--------------------------------------------------------------------------------
1 | from __future__ import division
2 | from __future__ import print_function
3 | from __future__ import absolute_import
4 |
5 | import numpy as np
6 | import tensorflow as tf
7 | from dotmap import DotMap
8 |
9 | from dmbrl.misc.DotmapUtils import get_required_argument
10 | from dmbrl.modeling.layers import FC
11 | """
12 | Module name,
13 | MODEL_IN, MODEL_OUT,
14 | import env, env_name
15 | """
16 |
17 |
18 | class GymCartpoleConfigModule:
19 | ENV_NAME = "MBRLGYM_cartpole-v0"
20 | TASK_HORIZON = 1000
21 | NTRAIN_ITERS = 300
22 | NROLLOUTS_PER_ITER = 1
23 | PLAN_HOR = 30
24 | INIT_VAR = 0.25
25 | MODEL_IN, MODEL_OUT = 5, 4 # obs -> 3, action -> 1
26 | GP_NINDUCING_POINTS = 300
27 |
28 | def __init__(self):
29 | # self.ENV = gym.make(self.ENV_NAME)
30 | from mbbl.env.gym_env import cartpole
31 | self.ENV = cartpole.env(env_name='gym_cartpole', rand_seed=1234,
32 | misc_info={'reset_type': 'gym'})
33 | cfg = tf.ConfigProto()
34 | cfg.gpu_options.allow_growth = True
35 | self.SESS = tf.Session(config=cfg)
36 | self.NN_TRAIN_CFG = {"epochs": 5}
37 | self.OPT_CFG = {
38 | "Random": {
39 | "popsize": 2500
40 | },
41 | "GBPRandom": {
42 | "popsize": 2500
43 | },
44 | "GBPCEM": {
45 | "popsize": 500,
46 | "num_elites": 50,
47 | "max_iters": 5,
48 | "alpha": 0.1
49 | },
50 | "CEM": {
51 | "popsize": 500,
52 | "num_elites": 50,
53 | "max_iters": 5,
54 | "alpha": 0.1
55 | },
56 | "POPLIN-P": {
57 | "popsize": 500,
58 | "num_elites": 50,
59 | "max_iters": 5,
60 | "alpha": 0.1
61 | },
62 | "POPLIN-A": {
63 | "popsize": 500,
64 | "num_elites": 50,
65 | "max_iters": 5,
66 | "alpha": 0.1
67 | }
68 | }
69 |
70 | @staticmethod
71 | def obs_preproc(obs):
72 | """ @brief: no cheating of the observation function
73 | """
74 | if isinstance(obs, np.ndarray):
75 | return obs
76 | else:
77 | return obs
78 |
79 | @staticmethod
80 | def obs_postproc(obs, pred):
81 | if isinstance(obs, np.ndarray):
82 | return obs + pred
83 | else:
84 | return obs + pred
85 |
86 | @staticmethod
87 | def targ_proc(obs, next_obs):
88 | return next_obs - obs
89 |
90 | @staticmethod
91 | def obs_cost_fn(obs):
92 | """ @brief:
93 |
94 | x, _, theta, _ = data_dict['start_state']
95 | up_reward = np.cos(theta)
96 | distance_penalty_reward = -0.01 * (x ** 2)
97 | return up_reward + distance_penalty_reward
98 | """
99 | x = obs[:, 0]
100 | theta = obs[:, 2]
101 | if isinstance(obs, np.ndarray):
102 | return -(np.cos(theta) - 0.01 * (x ** 2))
103 | else:
104 | return -(tf.cos(theta) - 0.01 * (x ** 2))
105 |
106 | @staticmethod
107 | def ac_cost_fn(acs):
108 | if isinstance(acs, np.ndarray):
109 | return np.sum(np.square(acs), axis=1) * 0.0
110 | else:
111 | return tf.reduce_sum(tf.square(acs), axis=1) * 0.0
112 |
113 | def nn_constructor(self, model_init_cfg, misc=None):
114 | model = get_required_argument(model_init_cfg, "model_class", "Must provide model class")(DotMap(
115 | name="model", num_networks=get_required_argument(model_init_cfg, "num_nets", "Must provide ensemble size"),
116 | sess=self.SESS, load_model=model_init_cfg.get("load_model", False),
117 | model_dir=model_init_cfg.get("model_dir", None),
118 | misc=misc
119 | ))
120 | if not model_init_cfg.get("load_model", False):
121 | model.add(FC(200, input_dim=self.MODEL_IN, activation="swish", weight_decay=0.000025))
122 | model.add(FC(200, activation="swish", weight_decay=0.00005))
123 | model.add(FC(200, activation="swish", weight_decay=0.000075))
124 | model.add(FC(200, activation="swish", weight_decay=0.000075))
125 | model.add(FC(self.MODEL_OUT, weight_decay=0.0001))
126 | model.finalize(tf.train.AdamOptimizer, {"learning_rate": 0.001})
127 | return model
128 |
129 | def gp_constructor(self, model_init_cfg):
130 | model = get_required_argument(model_init_cfg, "model_class", "Must provide model class")(DotMap(
131 | name="model",
132 | kernel_class=get_required_argument(model_init_cfg, "kernel_class", "Must provide kernel class"),
133 | kernel_args=model_init_cfg.get("kernel_args", {}),
134 | num_inducing_points=get_required_argument(
135 | model_init_cfg, "num_inducing_points", "Must provide number of inducing points."
136 | ),
137 | sess=self.SESS
138 | ))
139 | return model
140 |
141 |
142 | CONFIG_MODULE = GymCartpoleConfigModule
143 |
--------------------------------------------------------------------------------
/dmbrl/config/gym_cheetah.py:
--------------------------------------------------------------------------------
1 | from __future__ import division
2 | from __future__ import print_function
3 | from __future__ import absolute_import
4 |
5 | import numpy as np
6 | import tensorflow as tf
7 | from dotmap import DotMap
8 |
9 | from dmbrl.misc.DotmapUtils import get_required_argument
10 | from dmbrl.modeling.layers import FC
11 |
12 |
13 | class HalfCheetahConfigModule:
14 | """
15 | @brief: migrate the gym module from the mbbl repo
16 | 'gym_cheetah': {
17 | 'path': 'mbbl.env.gym_env.walker',
18 | 'ob_size': 17, 'action_size': 6, 'max_length': 1000
19 | }
20 | """
21 | ENV_NAME = "MBRLGYM_HalfCheetah-v0"
22 | TASK_HORIZON = 1000
23 | NTRAIN_ITERS = 300
24 | NROLLOUTS_PER_ITER = 1
25 | PLAN_HOR = 30
26 | INIT_VAR = 0.25
27 | MODEL_IN, MODEL_OUT = 23, 17 # obs - > 17, action 6
28 | GP_NINDUCING_POINTS = 300
29 |
30 | def __init__(self):
31 | # self.ENV = gym.make(self.ENV_NAME)
32 | from mbbl.env.gym_env import walker
33 | self.ENV = walker.env(env_name='gym_cheetah', rand_seed=1234,
34 | misc_info={'reset_type': 'gym'})
35 | cfg = tf.ConfigProto()
36 | cfg.gpu_options.allow_growth = True
37 | self.SESS = tf.Session(config=cfg)
38 | self.NN_TRAIN_CFG = {"epochs": 5}
39 | self.OPT_CFG = {
40 | "Random": {
41 | "popsize": 2500
42 | },
43 | "GBPRandom": {
44 | "popsize": 2500
45 | },
46 | "GBPCEM": {
47 | "popsize": 500,
48 | "num_elites": 50,
49 | "max_iters": 5,
50 | "alpha": 0.1
51 | },
52 | "CEM": {
53 | "popsize": 500,
54 | "num_elites": 50,
55 | "max_iters": 5,
56 | "alpha": 0.1
57 | },
58 | "POPLIN-P": {
59 | "popsize": 500,
60 | "num_elites": 50,
61 | "max_iters": 5,
62 | "alpha": 0.1
63 | },
64 | "POPLIN-A": {
65 | "popsize": 500,
66 | "num_elites": 50,
67 | "max_iters": 5,
68 | "alpha": 0.1
69 | }
70 | }
71 |
72 | @staticmethod
73 | def obs_preproc(obs):
74 | """ @brief: no cheating of the observation function
75 | """
76 | if isinstance(obs, np.ndarray):
77 | return obs
78 | else:
79 | return obs
80 |
81 | @staticmethod
82 | def obs_postproc(obs, pred):
83 | if isinstance(obs, np.ndarray):
84 | return obs + pred
85 | else:
86 | return obs + pred
87 |
88 | @staticmethod
89 | def targ_proc(obs, next_obs):
90 | return next_obs - obs
91 |
92 | @staticmethod
93 | def obs_cost_fn(obs):
94 | """ @brief:
95 | see mbbl.env.gym_env.walker.py for reward details
96 | """
97 | return -obs[:, 8] # the qvel for the root-x joint
98 |
99 | @staticmethod
100 | def ac_cost_fn(acs):
101 | if isinstance(acs, np.ndarray):
102 | return 0.1 * np.sum(np.square(acs), axis=1)
103 | else:
104 | return 0.1 * tf.reduce_sum(tf.square(acs), axis=1)
105 |
106 | def nn_constructor(self, model_init_cfg, misc=None):
107 | model = get_required_argument(model_init_cfg, "model_class", "Must provide model class")(DotMap(
108 | name="model", num_networks=get_required_argument(model_init_cfg, "num_nets", "Must provide ensemble size"),
109 | sess=self.SESS, load_model=model_init_cfg.get("load_model", False),
110 | model_dir=model_init_cfg.get("model_dir", None),
111 | misc=misc
112 | ))
113 | if not model_init_cfg.get("load_model", False):
114 | model.add(FC(200, input_dim=self.MODEL_IN, activation="swish", weight_decay=0.000025))
115 | model.add(FC(200, activation="swish", weight_decay=0.00005))
116 | model.add(FC(200, activation="swish", weight_decay=0.000075))
117 | model.add(FC(200, activation="swish", weight_decay=0.000075))
118 | model.add(FC(self.MODEL_OUT, weight_decay=0.0001))
119 | model.finalize(tf.train.AdamOptimizer, {"learning_rate": 0.001})
120 | return model
121 |
122 | def gp_constructor(self, model_init_cfg):
123 | model = get_required_argument(model_init_cfg, "model_class", "Must provide model class")(DotMap(
124 | name="model",
125 | kernel_class=get_required_argument(model_init_cfg, "kernel_class", "Must provide kernel class"),
126 | kernel_args=model_init_cfg.get("kernel_args", {}),
127 | num_inducing_points=get_required_argument(
128 | model_init_cfg, "num_inducing_points", "Must provide number of inducing points."
129 | ),
130 | sess=self.SESS
131 | ))
132 | return model
133 |
134 |
135 | CONFIG_MODULE = HalfCheetahConfigModule
136 |
--------------------------------------------------------------------------------
/dmbrl/config/gym_fhopper.py:
--------------------------------------------------------------------------------
1 | from __future__ import division
2 | from __future__ import print_function
3 | from __future__ import absolute_import
4 |
5 | import numpy as np
6 | import tensorflow as tf
7 | from dotmap import DotMap
8 |
9 | from dmbrl.misc.DotmapUtils import get_required_argument
10 | from dmbrl.modeling.layers import FC
11 |
12 |
13 | class FixedHopperConfigModule:
14 | ENV_NAME = "MBRLGYM_Hopper-v0"
15 | TASK_HORIZON = 1000
16 | NTRAIN_ITERS = 300
17 | NROLLOUTS_PER_ITER = 1
18 | PLAN_HOR = 30
19 | INIT_VAR = 0.25
20 | MODEL_IN, MODEL_OUT = 14, 11 # obs - > 11, action 3
21 | GP_NINDUCING_POINTS = 300
22 |
23 | def __init__(self):
24 | # self.ENV = gym.make(self.ENV_NAME)
25 | from mbbl.env.gym_env import fixed_walker
26 | self.ENV = fixed_walker.env(env_name='gym_fhopper', rand_seed=1234,
27 | misc_info={'reset_type': 'gym'})
28 | cfg = tf.ConfigProto()
29 | cfg.gpu_options.allow_growth = True
30 | self.SESS = tf.Session(config=cfg)
31 | self.NN_TRAIN_CFG = {"epochs": 5}
32 | self.OPT_CFG = {
33 | "Random": {
34 | "popsize": 2500
35 | },
36 | "GBPRandom": {
37 | "popsize": 2500
38 | },
39 | "GBPCEM": {
40 | "popsize": 500,
41 | "num_elites": 50,
42 | "max_iters": 5,
43 | "alpha": 0.1
44 | },
45 | "CEM": {
46 | "popsize": 500,
47 | "num_elites": 50,
48 | "max_iters": 5,
49 | "alpha": 0.1
50 | },
51 | "POPLIN-P": {
52 | "popsize": 500,
53 | "num_elites": 50,
54 | "max_iters": 5,
55 | "alpha": 0.1
56 | },
57 | "POPLIN-A": {
58 | "popsize": 500,
59 | "num_elites": 50,
60 | "max_iters": 5,
61 | "alpha": 0.1
62 | }
63 | }
64 |
65 | @staticmethod
66 | def obs_preproc(obs):
67 | """ @brief: no cheating of the observation function
68 | """
69 | if isinstance(obs, np.ndarray):
70 | return obs
71 | else:
72 | return obs
73 |
74 | @staticmethod
75 | def obs_postproc(obs, pred):
76 | if isinstance(obs, np.ndarray):
77 | return obs + pred
78 | else:
79 | return obs + pred
80 |
81 | @staticmethod
82 | def targ_proc(obs, next_obs):
83 | return next_obs - obs
84 |
85 | @staticmethod
86 | def obs_cost_fn(obs):
87 | """ @brief:
88 | see mbbl.env.gym_env.walker.py for reward details
89 | """
90 | if isinstance(obs, np.ndarray):
91 | velocity_cost = -obs[:, 5] # the qvel for the root-x joint
92 | height_cost = 3 * np.square(obs[:, 0] - 1.3) # the height
93 | # height, ang = ob[0], ob[1]
94 | done = (obs[:, 0] <= 0.7) or (abs(obs[:, 1]) >= 0.2)
95 | alive_reward = 1.0 - np.array(done, dtype=np.float)
96 | return velocity_cost + height_cost - alive_reward
97 | else:
98 | velocity_cost = -obs[:, 5] # the qvel for the root-x joint
99 | height_cost = 3 * tf.square(obs[:, 0] - 1.3) # the height
100 | done = tf.logical_or(obs[:, 0] <= 0.7, tf.abs(obs[:, 1]) >= 0.2)
101 | alive_reward = 1.0 - tf.cast(done, dtype=velocity_cost.dtype)
102 | return velocity_cost + height_cost - alive_reward
103 |
104 | @staticmethod
105 | def ac_cost_fn(acs):
106 | if isinstance(acs, np.ndarray):
107 | return 0.1 * np.sum(np.square(acs), axis=1)
108 | else:
109 | return 0.1 * tf.reduce_sum(tf.square(acs), axis=1)
110 |
111 | def nn_constructor(self, model_init_cfg, misc=None):
112 | model = get_required_argument(model_init_cfg, "model_class", "Must provide model class")(DotMap(
113 | name="model", num_networks=get_required_argument(model_init_cfg, "num_nets", "Must provide ensemble size"),
114 | sess=self.SESS, load_model=model_init_cfg.get("load_model", False),
115 | model_dir=model_init_cfg.get("model_dir", None),
116 | misc=misc
117 | ))
118 | if not model_init_cfg.get("load_model", False):
119 | model.add(FC(200, input_dim=self.MODEL_IN, activation="swish", weight_decay=0.000025))
120 | model.add(FC(200, activation="swish", weight_decay=0.00005))
121 | model.add(FC(200, activation="swish", weight_decay=0.000075))
122 | model.add(FC(200, activation="swish", weight_decay=0.000075))
123 | model.add(FC(self.MODEL_OUT, weight_decay=0.0001))
124 | model.finalize(tf.train.AdamOptimizer, {"learning_rate": 0.001})
125 | return model
126 |
127 | def gp_constructor(self, model_init_cfg):
128 | model = get_required_argument(model_init_cfg, "model_class", "Must provide model class")(DotMap(
129 | name="model",
130 | kernel_class=get_required_argument(model_init_cfg, "kernel_class", "Must provide kernel class"),
131 | kernel_args=model_init_cfg.get("kernel_args", {}),
132 | num_inducing_points=get_required_argument(
133 | model_init_cfg, "num_inducing_points", "Must provide number of inducing points."
134 | ),
135 | sess=self.SESS
136 | ))
137 | return model
138 |
139 |
140 | CONFIG_MODULE = FixedHopperConfigModule
141 |
--------------------------------------------------------------------------------
/dmbrl/config/gym_fswimmer.py:
--------------------------------------------------------------------------------
1 | from __future__ import division
2 | from __future__ import print_function
3 | from __future__ import absolute_import
4 |
5 | import numpy as np
6 | import tensorflow as tf
7 | from dotmap import DotMap
8 |
9 | from dmbrl.misc.DotmapUtils import get_required_argument
10 | from dmbrl.modeling.layers import FC
11 |
12 |
13 | class FixedSwimmerConfigModule:
14 | """
15 | """
16 | ENV_NAME = "MBRLGYM_SWIMMER-v0"
17 | TASK_HORIZON = 1000
18 | NTRAIN_ITERS = 300
19 | NROLLOUTS_PER_ITER = 1
20 | PLAN_HOR = 30
21 | INIT_VAR = 0.25
22 | MODEL_IN, MODEL_OUT = 11, 9 # obs - > 8 + 1, action 2
23 | GP_NINDUCING_POINTS = 300
24 |
25 | def __init__(self):
26 | # self.ENV = gym.make(self.ENV_NAME)
27 | from mbbl.env.gym_env import fixed_swimmer
28 | self.ENV = fixed_swimmer.env(env_name='gym_fswimmer', rand_seed=1234,
29 | misc_info={'reset_type': 'gym'})
30 | cfg = tf.ConfigProto()
31 | cfg.gpu_options.allow_growth = True
32 | self.SESS = tf.Session(config=cfg)
33 | self.NN_TRAIN_CFG = {"epochs": 5}
34 | self.OPT_CFG = {
35 | "Random": {
36 | "popsize": 2500
37 | },
38 | "GBPRandom": {
39 | "popsize": 2500
40 | },
41 | "GBPCEM": {
42 | "popsize": 500,
43 | "num_elites": 50,
44 | "max_iters": 5,
45 | "alpha": 0.1
46 | },
47 | "CEM": {
48 | "popsize": 500,
49 | "num_elites": 50,
50 | "max_iters": 5,
51 | "alpha": 0.1
52 | },
53 | "POPLIN-P": {
54 | "popsize": 500,
55 | "num_elites": 50,
56 | "max_iters": 5,
57 | "alpha": 0.1
58 | },
59 | "POPLIN-A": {
60 | "popsize": 500,
61 | "num_elites": 50,
62 | "max_iters": 5,
63 | "alpha": 0.1
64 | }
65 | }
66 |
67 | @staticmethod
68 | def obs_preproc(obs):
69 | """ @brief: no cheating of the observation function
70 | """
71 | if isinstance(obs, np.ndarray):
72 | return obs
73 | else:
74 | return obs
75 |
76 | @staticmethod
77 | def obs_postproc(obs, pred):
78 | if isinstance(obs, np.ndarray):
79 | return obs + pred
80 | else:
81 | return obs + pred
82 |
83 | @staticmethod
84 | def targ_proc(obs, next_obs):
85 | return next_obs - obs
86 |
87 | @staticmethod
88 | def obs_cost_fn(obs):
89 | """ @brief:
90 | see mbbl.env.gym_env.walker.py for reward details
91 | """
92 | if isinstance(obs, np.ndarray):
93 | velocity_cost = -obs[:, -1] # the qvel for the root-x joint
94 | return velocity_cost
95 | else:
96 | velocity_cost = -obs[:, -1] # the qvel for the root-x joint
97 | return velocity_cost
98 |
99 | @staticmethod
100 | def ac_cost_fn(acs):
101 | if isinstance(acs, np.ndarray):
102 | return 0.0001 * np.sum(np.square(acs), axis=1)
103 | else:
104 | return 0.0001 * tf.reduce_sum(tf.square(acs), axis=1)
105 |
106 | def nn_constructor(self, model_init_cfg, misc=None):
107 | model = get_required_argument(model_init_cfg, "model_class", "Must provide model class")(DotMap(
108 | name="model", num_networks=get_required_argument(model_init_cfg, "num_nets", "Must provide ensemble size"),
109 | sess=self.SESS, load_model=model_init_cfg.get("load_model", False),
110 | model_dir=model_init_cfg.get("model_dir", None),
111 | misc=misc
112 | ))
113 | if not model_init_cfg.get("load_model", False):
114 | model.add(FC(200, input_dim=self.MODEL_IN, activation="swish", weight_decay=0.000025))
115 | model.add(FC(200, activation="swish", weight_decay=0.00005))
116 | model.add(FC(200, activation="swish", weight_decay=0.000075))
117 | model.add(FC(200, activation="swish", weight_decay=0.000075))
118 | model.add(FC(self.MODEL_OUT, weight_decay=0.0001))
119 | model.finalize(tf.train.AdamOptimizer, {"learning_rate": 0.001})
120 | return model
121 |
122 | def gp_constructor(self, model_init_cfg):
123 | model = get_required_argument(model_init_cfg, "model_class", "Must provide model class")(DotMap(
124 | name="model",
125 | kernel_class=get_required_argument(model_init_cfg, "kernel_class", "Must provide kernel class"),
126 | kernel_args=model_init_cfg.get("kernel_args", {}),
127 | num_inducing_points=get_required_argument(
128 | model_init_cfg, "num_inducing_points", "Must provide number of inducing points."
129 | ),
130 | sess=self.SESS
131 | ))
132 | return model
133 |
134 |
135 | CONFIG_MODULE = FixedSwimmerConfigModule
136 |
--------------------------------------------------------------------------------
/dmbrl/config/gym_hopper.py:
--------------------------------------------------------------------------------
1 | from __future__ import division
2 | from __future__ import print_function
3 | from __future__ import absolute_import
4 |
5 | import numpy as np
6 | import tensorflow as tf
7 | from dotmap import DotMap
8 |
9 | from dmbrl.misc.DotmapUtils import get_required_argument
10 | from dmbrl.modeling.layers import FC
11 |
12 |
13 | class HopperConfigModule:
14 | ENV_NAME = "MBRLGYM_Hopper-v0"
15 | TASK_HORIZON = 1000
16 | NTRAIN_ITERS = 300
17 | NROLLOUTS_PER_ITER = 1
18 | PLAN_HOR = 30
19 | INIT_VAR = 0.25
20 | MODEL_IN, MODEL_OUT = 14, 11 # obs - > 11, action 3
21 | GP_NINDUCING_POINTS = 300
22 |
23 | def __init__(self):
24 | # self.ENV = gym.make(self.ENV_NAME)
25 | from mbbl.env.gym_env import walker
26 | self.ENV = walker.env(env_name='gym_hopper', rand_seed=1234,
27 | misc_info={'reset_type': 'gym'})
28 | cfg = tf.ConfigProto()
29 | cfg.gpu_options.allow_growth = True
30 | self.SESS = tf.Session(config=cfg)
31 | self.NN_TRAIN_CFG = {"epochs": 5}
32 | self.OPT_CFG = {
33 | "Random": {
34 | "popsize": 2500
35 | },
36 | "GBPRandom": {
37 | "popsize": 2500
38 | },
39 | "GBPCEM": {
40 | "popsize": 500,
41 | "num_elites": 50,
42 | "max_iters": 5,
43 | "alpha": 0.1
44 | },
45 | "CEM": {
46 | "popsize": 500,
47 | "num_elites": 50,
48 | "max_iters": 5,
49 | "alpha": 0.1
50 | },
51 | "POPLIN-P": {
52 | "popsize": 500,
53 | "num_elites": 50,
54 | "max_iters": 5,
55 | "alpha": 0.1
56 | },
57 | "POPLIN-A": {
58 | "popsize": 500,
59 | "num_elites": 50,
60 | "max_iters": 5,
61 | "alpha": 0.1
62 | }
63 | }
64 |
65 | @staticmethod
66 | def obs_preproc(obs):
67 | """ @brief: no cheating of the observation function
68 | """
69 | if isinstance(obs, np.ndarray):
70 | return obs
71 | else:
72 | return obs
73 |
74 | @staticmethod
75 | def obs_postproc(obs, pred):
76 | if isinstance(obs, np.ndarray):
77 | return obs + pred
78 | else:
79 | return obs + pred
80 |
81 | @staticmethod
82 | def targ_proc(obs, next_obs):
83 | return next_obs - obs
84 |
85 | @staticmethod
86 | def obs_cost_fn(obs):
87 | """ @brief:
88 | see mbbl.env.gym_env.walker.py for reward details
89 | """
90 | if isinstance(obs, np.ndarray):
91 | velocity_cost = -obs[:, 5] # the qvel for the root-x joint
92 | height_cost = 3 * np.square(obs[:, 0] - 1.3) # the height
93 | return velocity_cost + height_cost
94 | else:
95 | velocity_cost = -obs[:, 5] # the qvel for the root-x joint
96 | height_cost = 3 * tf.square(obs[:, 0] - 1.3) # the height
97 | return velocity_cost + height_cost
98 |
99 | @staticmethod
100 | def ac_cost_fn(acs):
101 | if isinstance(acs, np.ndarray):
102 | return 0.1 * np.sum(np.square(acs), axis=1)
103 | else:
104 | return 0.1 * tf.reduce_sum(tf.square(acs), axis=1)
105 |
106 | def nn_constructor(self, model_init_cfg, misc=None):
107 | model = get_required_argument(model_init_cfg, "model_class", "Must provide model class")(DotMap(
108 | name="model", num_networks=get_required_argument(model_init_cfg, "num_nets", "Must provide ensemble size"),
109 | sess=self.SESS, load_model=model_init_cfg.get("load_model", False),
110 | model_dir=model_init_cfg.get("model_dir", None),
111 | misc=misc
112 | ))
113 | if not model_init_cfg.get("load_model", False):
114 | model.add(FC(200, input_dim=self.MODEL_IN, activation="swish", weight_decay=0.000025))
115 | model.add(FC(200, activation="swish", weight_decay=0.00005))
116 | model.add(FC(200, activation="swish", weight_decay=0.000075))
117 | model.add(FC(200, activation="swish", weight_decay=0.000075))
118 | model.add(FC(self.MODEL_OUT, weight_decay=0.0001))
119 | model.finalize(tf.train.AdamOptimizer, {"learning_rate": 0.001})
120 | return model
121 |
122 | def gp_constructor(self, model_init_cfg):
123 | model = get_required_argument(model_init_cfg, "model_class", "Must provide model class")(DotMap(
124 | name="model",
125 | kernel_class=get_required_argument(model_init_cfg, "kernel_class", "Must provide kernel class"),
126 | kernel_args=model_init_cfg.get("kernel_args", {}),
127 | num_inducing_points=get_required_argument(
128 | model_init_cfg, "num_inducing_points", "Must provide number of inducing points."
129 | ),
130 | sess=self.SESS
131 | ))
132 | return model
133 |
134 |
135 | CONFIG_MODULE = HopperConfigModule
136 |
--------------------------------------------------------------------------------
/dmbrl/config/gym_invertedPendulum.py:
--------------------------------------------------------------------------------
1 | from __future__ import division
2 | from __future__ import print_function
3 | from __future__ import absolute_import
4 |
5 | import numpy as np
6 | import tensorflow as tf
7 | from dotmap import DotMap
8 |
9 | from dmbrl.misc.DotmapUtils import get_required_argument
10 | from dmbrl.modeling.layers import FC
11 | """
12 | Module name, ENV_NAME
13 | MODEL_IN, MODEL_OUT,
14 | import env, env_name
15 | """
16 |
17 |
18 | class GymINVPendulumConfigModule:
19 | ENV_NAME = "MBRLGYM_invpendulum-v0"
20 | TASK_HORIZON = 1000
21 | NTRAIN_ITERS = 300
22 | NROLLOUTS_PER_ITER = 1
23 | PLAN_HOR = 30
24 | INIT_VAR = 0.25
25 | MODEL_IN, MODEL_OUT = 5, 4 # obs -> 4, action -> 1
26 | GP_NINDUCING_POINTS = 300
27 |
28 | def __init__(self):
29 | # self.ENV = gym.make(self.ENV_NAME)
30 | from mbbl.env.gym_env import invertedPendulum
31 | self.ENV = invertedPendulum.env(
32 | env_name='gym_invertedPendulum', rand_seed=1234,
33 | misc_info={'reset_type': 'gym'}
34 | )
35 | cfg = tf.ConfigProto()
36 | cfg.gpu_options.allow_growth = True
37 | self.SESS = tf.Session(config=cfg)
38 | self.NN_TRAIN_CFG = {"epochs": 5}
39 | self.OPT_CFG = {
40 | "Random": {
41 | "popsize": 2500
42 | },
43 | "GBPRandom": {
44 | "popsize": 2500
45 | },
46 | "GBPCEM": {
47 | "popsize": 500,
48 | "num_elites": 50,
49 | "max_iters": 5,
50 | "alpha": 0.1
51 | },
52 | "CEM": {
53 | "popsize": 500,
54 | "num_elites": 50,
55 | "max_iters": 5,
56 | "alpha": 0.1
57 | },
58 | "POPLIN-P": {
59 | "popsize": 500,
60 | "num_elites": 50,
61 | "max_iters": 5,
62 | "alpha": 0.1
63 | },
64 | "POPLIN-A": {
65 | "popsize": 500,
66 | "num_elites": 50,
67 | "max_iters": 5,
68 | "alpha": 0.1
69 | }
70 | }
71 |
72 | @staticmethod
73 | def obs_preproc(obs):
74 | """ @brief: no cheating of the observation function
75 | """
76 | if isinstance(obs, np.ndarray):
77 | return obs
78 | else:
79 | return obs
80 |
81 | @staticmethod
82 | def obs_postproc(obs, pred):
83 | if isinstance(obs, np.ndarray):
84 | return obs + pred
85 | else:
86 | return obs + pred
87 |
88 | @staticmethod
89 | def targ_proc(obs, next_obs):
90 | return next_obs - obs
91 |
92 | @staticmethod
93 | def obs_cost_fn(obs):
94 | """ @brief:
95 | see mbbl.env.gym_env.walker.py for reward details
96 |
97 | # ypos penalty
98 | ypos = data_dict['start_state'][ypos_ob_pos]
99 | ypos_reward = -(ypos - ypos_target) ** 2
100 | """
101 | return obs[:, 1] ** 2
102 |
103 | @staticmethod
104 | def ac_cost_fn(acs):
105 | if isinstance(acs, np.ndarray):
106 | return np.sum(np.square(acs), axis=1) * 0.0
107 | else:
108 | return tf.reduce_sum(tf.square(acs), axis=1) * 0.0
109 |
110 | def nn_constructor(self, model_init_cfg, misc=None):
111 | model = get_required_argument(model_init_cfg, "model_class", "Must provide model class")(DotMap(
112 | name="model", num_networks=get_required_argument(model_init_cfg, "num_nets", "Must provide ensemble size"),
113 | sess=self.SESS, load_model=model_init_cfg.get("load_model", False),
114 | model_dir=model_init_cfg.get("model_dir", None),
115 | misc=misc
116 | ))
117 | if not model_init_cfg.get("load_model", False):
118 | model.add(FC(200, input_dim=self.MODEL_IN, activation="swish", weight_decay=0.000025))
119 | model.add(FC(200, activation="swish", weight_decay=0.00005))
120 | model.add(FC(200, activation="swish", weight_decay=0.000075))
121 | model.add(FC(200, activation="swish", weight_decay=0.000075))
122 | model.add(FC(self.MODEL_OUT, weight_decay=0.0001))
123 | model.finalize(tf.train.AdamOptimizer, {"learning_rate": 0.001})
124 | return model
125 |
126 | def gp_constructor(self, model_init_cfg):
127 | model = get_required_argument(model_init_cfg, "model_class", "Must provide model class")(DotMap(
128 | name="model",
129 | kernel_class=get_required_argument(model_init_cfg, "kernel_class", "Must provide kernel class"),
130 | kernel_args=model_init_cfg.get("kernel_args", {}),
131 | num_inducing_points=get_required_argument(
132 | model_init_cfg, "num_inducing_points", "Must provide number of inducing points."
133 | ),
134 | sess=self.SESS
135 | ))
136 | return model
137 |
138 |
139 | CONFIG_MODULE = GymINVPendulumConfigModule
140 |
--------------------------------------------------------------------------------
/dmbrl/config/gym_pendulum.py:
--------------------------------------------------------------------------------
1 | from __future__ import division
2 | from __future__ import print_function
3 | from __future__ import absolute_import
4 |
5 | import numpy as np
6 | import tensorflow as tf
7 | from dotmap import DotMap
8 |
9 | from dmbrl.misc.DotmapUtils import get_required_argument
10 | from dmbrl.modeling.layers import FC
11 | """
12 | Module name,
13 | MODEL_IN, MODEL_OUT,
14 | import env, env_name
15 | """
16 |
17 |
18 | class GymPendulumConfigModule:
19 | ENV_NAME = "MBRLGYM_pendulum-v0"
20 | TASK_HORIZON = 1000
21 | NTRAIN_ITERS = 300
22 | NROLLOUTS_PER_ITER = 1
23 | PLAN_HOR = 30
24 | INIT_VAR = 0.25
25 | MODEL_IN, MODEL_OUT = 4, 3 # obs -> 3, action -> 1
26 | GP_NINDUCING_POINTS = 300
27 |
28 | def __init__(self):
29 | # self.ENV = gym.make(self.ENV_NAME)
30 | from mbbl.env.gym_env import pendulum
31 | self.ENV = pendulum.env(env_name='gym_pendulum', rand_seed=1234,
32 | misc_info={'reset_type': 'gym'})
33 | cfg = tf.ConfigProto()
34 | cfg.gpu_options.allow_growth = True
35 | self.SESS = tf.Session(config=cfg)
36 | self.NN_TRAIN_CFG = {"epochs": 5}
37 | self.OPT_CFG = {
38 | "Random": {
39 | "popsize": 2500
40 | },
41 | "GBPRandom": {
42 | "popsize": 2500
43 | },
44 | "GBPCEM": {
45 | "popsize": 500,
46 | "num_elites": 50,
47 | "max_iters": 5,
48 | "alpha": 0.1
49 | },
50 | "CEM": {
51 | "popsize": 500,
52 | "num_elites": 50,
53 | "max_iters": 5,
54 | "alpha": 0.1
55 | },
56 | "POPLIN-P": {
57 | "popsize": 500,
58 | "num_elites": 50,
59 | "max_iters": 5,
60 | "alpha": 0.1
61 | },
62 | "POPLIN-A": {
63 | "popsize": 500,
64 | "num_elites": 50,
65 | "max_iters": 5,
66 | "alpha": 0.1
67 | }
68 | }
69 |
70 | @staticmethod
71 | def obs_preproc(obs):
72 | """ @brief: no cheating of the observation function
73 | """
74 | if isinstance(obs, np.ndarray):
75 | return obs
76 | else:
77 | return obs
78 |
79 | @staticmethod
80 | def obs_postproc(obs, pred):
81 | if isinstance(obs, np.ndarray):
82 | return obs + pred
83 | else:
84 | return obs + pred
85 |
86 | @staticmethod
87 | def targ_proc(obs, next_obs):
88 | return next_obs - obs
89 |
90 | @staticmethod
91 | def obs_cost_fn(obs):
92 | """ @brief:
93 | see mbbl.env.gym_env.walker.py for reward details
94 |
95 | def reward(data_dict):
96 | action = data_dict['action']
97 | true_action = action * self._env.env.max_torque
98 |
99 | max_torque = self._env.env.max_torque
100 | torque = np.clip(true_action, -max_torque, max_torque)[0]
101 |
102 | y, x, thetadot = data_dict['start_state']
103 |
104 | costs = y + .1 * x + .1 * (thetadot ** 2) + .001 * (torque ** 2)
105 | # note: reward is the negative cost
106 | return -costs
107 | """
108 | y = obs[:, 0]
109 | x = obs[:, 1]
110 | thetadot = obs[:, 2]
111 | cost = y + tf.abs(0.1 * x) + 0.1 * (thetadot ** 2)
112 | return cost
113 |
114 | @staticmethod
115 | def ac_cost_fn(acs):
116 | max_torque = 2.0
117 |
118 | if isinstance(acs, np.ndarray):
119 | clip_torque = np.clip(acs, -max_torque, max_torque)
120 | return 0.001 * np.sum(np.square(clip_torque), axis=1)
121 | else:
122 | clip_torque = tf.clip_by_value(acs, -max_torque, max_torque)
123 | return 0.001 * tf.reduce_sum(tf.square(clip_torque), axis=1)
124 |
125 | def nn_constructor(self, model_init_cfg, misc=None):
126 | model = get_required_argument(model_init_cfg, "model_class", "Must provide model class")(DotMap(
127 | name="model", num_networks=get_required_argument(model_init_cfg, "num_nets", "Must provide ensemble size"),
128 | sess=self.SESS, load_model=model_init_cfg.get("load_model", False),
129 | model_dir=model_init_cfg.get("model_dir", None),
130 | misc=misc
131 | ))
132 | if not model_init_cfg.get("load_model", False):
133 | model.add(FC(200, input_dim=self.MODEL_IN, activation="swish", weight_decay=0.000025))
134 | model.add(FC(200, activation="swish", weight_decay=0.00005))
135 | model.add(FC(200, activation="swish", weight_decay=0.000075))
136 | model.add(FC(200, activation="swish", weight_decay=0.000075))
137 | model.add(FC(self.MODEL_OUT, weight_decay=0.0001))
138 | model.finalize(tf.train.AdamOptimizer, {"learning_rate": 0.001})
139 | return model
140 |
141 | def gp_constructor(self, model_init_cfg):
142 | model = get_required_argument(model_init_cfg, "model_class", "Must provide model class")(DotMap(
143 | name="model",
144 | kernel_class=get_required_argument(model_init_cfg, "kernel_class", "Must provide kernel class"),
145 | kernel_args=model_init_cfg.get("kernel_args", {}),
146 | num_inducing_points=get_required_argument(
147 | model_init_cfg, "num_inducing_points", "Must provide number of inducing points."
148 | ),
149 | sess=self.SESS
150 | ))
151 | return model
152 |
153 |
154 | CONFIG_MODULE = GymPendulumConfigModule
155 |
--------------------------------------------------------------------------------
/dmbrl/config/gym_reacher.py:
--------------------------------------------------------------------------------
1 | from __future__ import division
2 | from __future__ import print_function
3 | from __future__ import absolute_import
4 |
5 | import numpy as np
6 | import tensorflow as tf
7 | from dotmap import DotMap
8 |
9 | from dmbrl.misc.DotmapUtils import get_required_argument
10 | from dmbrl.modeling.layers import FC
11 |
12 |
13 | class ReacherConfigModule:
14 | ENV_NAME = "MBRLGYM_Reacher-v0"
15 | TASK_HORIZON = 1000
16 | NTRAIN_ITERS = 300
17 | NROLLOUTS_PER_ITER = 1
18 | PLAN_HOR = 30
19 | INIT_VAR = 0.25
20 | MODEL_IN, MODEL_OUT = 13, 11 # obs - > 11, action 2
21 | GP_NINDUCING_POINTS = 300
22 |
23 | def __init__(self):
24 | # self.ENV = gym.make(self.ENV_NAME)
25 | from mbbl.env.gym_env import reacher
26 | self.ENV = reacher.env(env_name='gym_reacher', rand_seed=1234,
27 | misc_info={'reset_type': 'gym'})
28 | cfg = tf.ConfigProto()
29 | cfg.gpu_options.allow_growth = True
30 | self.SESS = tf.Session(config=cfg)
31 | self.NN_TRAIN_CFG = {"epochs": 5}
32 | self.OPT_CFG = {
33 | "Random": {
34 | "popsize": 2500
35 | },
36 | "GBPRandom": {
37 | "popsize": 2500
38 | },
39 | "GBPCEM": {
40 | "popsize": 500,
41 | "num_elites": 50,
42 | "max_iters": 5,
43 | "alpha": 0.1
44 | },
45 | "CEM": {
46 | "popsize": 500,
47 | "num_elites": 50,
48 | "max_iters": 5,
49 | "alpha": 0.1
50 | },
51 | "POPLIN-P": {
52 | "popsize": 500,
53 | "num_elites": 50,
54 | "max_iters": 5,
55 | "alpha": 0.1
56 | },
57 | "POPLIN-A": {
58 | "popsize": 500,
59 | "num_elites": 50,
60 | "max_iters": 5,
61 | "alpha": 0.1
62 | }
63 | }
64 |
65 | @staticmethod
66 | def obs_preproc(obs):
67 | """ @brief: no cheating of the observation function
68 | """
69 | if isinstance(obs, np.ndarray):
70 | return obs
71 | else:
72 | return obs
73 |
74 | @staticmethod
75 | def obs_postproc(obs, pred):
76 | if isinstance(obs, np.ndarray):
77 | return obs + pred
78 | else:
79 | return obs + pred
80 |
81 | @staticmethod
82 | def targ_proc(obs, next_obs):
83 | return next_obs - obs
84 |
85 | @staticmethod
86 | def obs_cost_fn(obs):
87 | """ @brief:
88 | see mbbl.env.gym_env.walker.py for reward details
89 | """
90 | if isinstance(obs, np.ndarray):
91 | return np.linalg.norm(obs[:, -3:])
92 | else:
93 | return tf.linalg.norm(obs[:, -3:])
94 |
95 | @staticmethod
96 | def ac_cost_fn(acs):
97 | if isinstance(acs, np.ndarray):
98 | return np.sum(np.square(acs), axis=1)
99 | else:
100 | return tf.reduce_sum(tf.square(acs), axis=1)
101 |
102 | def nn_constructor(self, model_init_cfg, misc=None):
103 | model = get_required_argument(model_init_cfg, "model_class", "Must provide model class")(DotMap(
104 | name="model", num_networks=get_required_argument(model_init_cfg, "num_nets", "Must provide ensemble size"),
105 | sess=self.SESS, load_model=model_init_cfg.get("load_model", False),
106 | model_dir=model_init_cfg.get("model_dir", None),
107 | misc=misc
108 | ))
109 | if not model_init_cfg.get("load_model", False):
110 | model.add(FC(200, input_dim=self.MODEL_IN, activation="swish", weight_decay=0.000025))
111 | model.add(FC(200, activation="swish", weight_decay=0.00005))
112 | model.add(FC(200, activation="swish", weight_decay=0.000075))
113 | model.add(FC(200, activation="swish", weight_decay=0.000075))
114 | model.add(FC(self.MODEL_OUT, weight_decay=0.0001))
115 | model.finalize(tf.train.AdamOptimizer, {"learning_rate": 0.001})
116 | return model
117 |
118 | def gp_constructor(self, model_init_cfg):
119 | model = get_required_argument(model_init_cfg, "model_class", "Must provide model class")(DotMap(
120 | name="model",
121 | kernel_class=get_required_argument(model_init_cfg, "kernel_class", "Must provide kernel class"),
122 | kernel_args=model_init_cfg.get("kernel_args", {}),
123 | num_inducing_points=get_required_argument(
124 | model_init_cfg, "num_inducing_points", "Must provide number of inducing points."
125 | ),
126 | sess=self.SESS
127 | ))
128 | return model
129 |
130 |
131 | CONFIG_MODULE = ReacherConfigModule
132 |
--------------------------------------------------------------------------------
/dmbrl/config/gym_swimmer.py:
--------------------------------------------------------------------------------
1 | from __future__ import division
2 | from __future__ import print_function
3 | from __future__ import absolute_import
4 |
5 | import numpy as np
6 | import tensorflow as tf
7 | from dotmap import DotMap
8 |
9 | from dmbrl.misc.DotmapUtils import get_required_argument
10 | from dmbrl.modeling.layers import FC
11 |
12 |
13 | class SwimmerConfigModule:
14 | """
15 | @brief: migrate the gym module from the mbbl repo
16 | 'gym_cheetah': {
17 | 'path': 'mbbl.env.gym_env.walker',
18 | 'ob_size': 17, 'action_size': 6, 'max_length': 1000
19 | }
20 | """
21 | ENV_NAME = "MBRLGYM_SWIMMER-v0"
22 | TASK_HORIZON = 1000
23 | NTRAIN_ITERS = 300
24 | NROLLOUTS_PER_ITER = 1
25 | PLAN_HOR = 30
26 | INIT_VAR = 0.25
27 | MODEL_IN, MODEL_OUT = 10, 8 # obs - > 8, action 2
28 | GP_NINDUCING_POINTS = 300
29 |
30 | def __init__(self):
31 | # self.ENV = gym.make(self.ENV_NAME)
32 | from mbbl.env.gym_env import walker
33 | self.ENV = walker.env(env_name='gym_swimmer', rand_seed=1234,
34 | misc_info={'reset_type': 'gym'})
35 | cfg = tf.ConfigProto()
36 | cfg.gpu_options.allow_growth = True
37 | self.SESS = tf.Session(config=cfg)
38 | self.NN_TRAIN_CFG = {"epochs": 5}
39 | self.OPT_CFG = {
40 | "Random": {
41 | "popsize": 2500
42 | },
43 | "GBPRandom": {
44 | "popsize": 2500
45 | },
46 | "GBPCEM": {
47 | "popsize": 500,
48 | "num_elites": 50,
49 | "max_iters": 5,
50 | "alpha": 0.1
51 | },
52 | "CEM": {
53 | "popsize": 500,
54 | "num_elites": 50,
55 | "max_iters": 5,
56 | "alpha": 0.1
57 | },
58 | "POPLIN-P": {
59 | "popsize": 500,
60 | "num_elites": 50,
61 | "max_iters": 5,
62 | "alpha": 0.1
63 | },
64 | "POPLIN-A": {
65 | "popsize": 500,
66 | "num_elites": 50,
67 | "max_iters": 5,
68 | "alpha": 0.1
69 | }
70 | }
71 |
72 | @staticmethod
73 | def obs_preproc(obs):
74 | """ @brief: no cheating of the observation function
75 | """
76 | if isinstance(obs, np.ndarray):
77 | return obs
78 | else:
79 | return obs
80 |
81 | @staticmethod
82 | def obs_postproc(obs, pred):
83 | if isinstance(obs, np.ndarray):
84 | return obs + pred
85 | else:
86 | return obs + pred
87 |
88 | @staticmethod
89 | def targ_proc(obs, next_obs):
90 | return next_obs - obs
91 |
92 | @staticmethod
93 | def obs_cost_fn(obs):
94 | """ @brief:
95 | see mbbl.env.gym_env.walker.py for reward details
96 | """
97 | if isinstance(obs, np.ndarray):
98 | velocity_cost = -obs[:, 3] # the qvel for the root-x joint
99 | return velocity_cost
100 | else:
101 | velocity_cost = -obs[:, 3] # the qvel for the root-x joint
102 | return velocity_cost
103 |
104 | @staticmethod
105 | def ac_cost_fn(acs):
106 | if isinstance(acs, np.ndarray):
107 | return 0.0001 * np.sum(np.square(acs), axis=1)
108 | else:
109 | return 0.0001 * tf.reduce_sum(tf.square(acs), axis=1)
110 |
111 | def nn_constructor(self, model_init_cfg, misc=None):
112 | model = get_required_argument(model_init_cfg, "model_class", "Must provide model class")(DotMap(
113 | name="model", num_networks=get_required_argument(model_init_cfg, "num_nets", "Must provide ensemble size"),
114 | sess=self.SESS, load_model=model_init_cfg.get("load_model", False),
115 | model_dir=model_init_cfg.get("model_dir", None),
116 | misc=misc
117 | ))
118 | if not model_init_cfg.get("load_model", False):
119 | model.add(FC(200, input_dim=self.MODEL_IN, activation="swish", weight_decay=0.000025))
120 | model.add(FC(200, activation="swish", weight_decay=0.00005))
121 | model.add(FC(200, activation="swish", weight_decay=0.000075))
122 | model.add(FC(200, activation="swish", weight_decay=0.000075))
123 | model.add(FC(self.MODEL_OUT, weight_decay=0.0001))
124 | model.finalize(tf.train.AdamOptimizer, {"learning_rate": 0.001})
125 | return model
126 |
127 | def gp_constructor(self, model_init_cfg):
128 | model = get_required_argument(model_init_cfg, "model_class", "Must provide model class")(DotMap(
129 | name="model",
130 | kernel_class=get_required_argument(model_init_cfg, "kernel_class", "Must provide kernel class"),
131 | kernel_args=model_init_cfg.get("kernel_args", {}),
132 | num_inducing_points=get_required_argument(
133 | model_init_cfg, "num_inducing_points", "Must provide number of inducing points."
134 | ),
135 | sess=self.SESS
136 | ))
137 | return model
138 |
139 |
140 | CONFIG_MODULE = SwimmerConfigModule
141 |
--------------------------------------------------------------------------------
/dmbrl/config/gym_walker2d.py:
--------------------------------------------------------------------------------
1 | from __future__ import division
2 | from __future__ import print_function
3 | from __future__ import absolute_import
4 |
5 | import numpy as np
6 | import tensorflow as tf
7 | from dotmap import DotMap
8 |
9 | from dmbrl.misc.DotmapUtils import get_required_argument
10 | from dmbrl.modeling.layers import FC
11 |
12 |
13 | class WalkerConfigModule:
14 | """
15 | @brief: migrate the gym module from the mbbl repo
16 | 'gym_cheetah': {
17 | 'path': 'mbbl.env.gym_env.walker',
18 | 'ob_size': 17, 'action_size': 6, 'max_length': 1000
19 | }
20 | """
21 | ENV_NAME = "MBRLGYM_Walker-v0"
22 | TASK_HORIZON = 1000
23 | NTRAIN_ITERS = 300
24 | NROLLOUTS_PER_ITER = 1
25 | PLAN_HOR = 30
26 | INIT_VAR = 0.25
27 | MODEL_IN, MODEL_OUT = 23, 17 # obs - > 17, action 6
28 | GP_NINDUCING_POINTS = 300
29 |
30 | def __init__(self):
31 | # self.ENV = gym.make(self.ENV_NAME)
32 | from mbbl.env.gym_env import walker
33 | self.ENV = walker.env(env_name='gym_walker2d', rand_seed=1234,
34 | misc_info={'reset_type': 'gym'})
35 | cfg = tf.ConfigProto()
36 | cfg.gpu_options.allow_growth = True
37 | self.SESS = tf.Session(config=cfg)
38 | self.NN_TRAIN_CFG = {"epochs": 5}
39 | self.OPT_CFG = {
40 | "Random": {
41 | "popsize": 2500
42 | },
43 | "GBPRandom": {
44 | "popsize": 2500
45 | },
46 | "GBPCEM": {
47 | "popsize": 500,
48 | "num_elites": 50,
49 | "max_iters": 5,
50 | "alpha": 0.1
51 | },
52 | "CEM": {
53 | "popsize": 500,
54 | "num_elites": 50,
55 | "max_iters": 5,
56 | "alpha": 0.1
57 | },
58 | "POPLIN-P": {
59 | "popsize": 500,
60 | "num_elites": 50,
61 | "max_iters": 5,
62 | "alpha": 0.1
63 | },
64 | "POPLIN-A": {
65 | "popsize": 500,
66 | "num_elites": 50,
67 | "max_iters": 5,
68 | "alpha": 0.1
69 | }
70 | }
71 |
72 | @staticmethod
73 | def obs_preproc(obs):
74 | """ @brief: no cheating of the observation function
75 | """
76 | if isinstance(obs, np.ndarray):
77 | return obs
78 | else:
79 | return obs
80 |
81 | @staticmethod
82 | def obs_postproc(obs, pred):
83 | if isinstance(obs, np.ndarray):
84 | return obs + pred
85 | else:
86 | return obs + pred
87 |
88 | @staticmethod
89 | def targ_proc(obs, next_obs):
90 | return next_obs - obs
91 |
92 | @staticmethod
93 | def obs_cost_fn(obs):
94 | """ @brief:
95 | see mbbl.env.gym_env.walker.py for reward details
96 | """
97 | if isinstance(obs, np.ndarray):
98 | velocity_cost = -obs[:, 8] # the qvel for the root-x joint
99 | height_cost = 3 * np.square(obs[:, 0] - 1.3) # the height
100 | return velocity_cost + height_cost
101 | else:
102 | velocity_cost = -obs[:, 8] # the qvel for the root-x joint
103 | height_cost = 3 * tf.square(obs[:, 0] - 1.3) # the height
104 | return velocity_cost + height_cost
105 |
106 | @staticmethod
107 | def ac_cost_fn(acs):
108 | if isinstance(acs, np.ndarray):
109 | return 0.1 * np.sum(np.square(acs), axis=1)
110 | else:
111 | return 0.1 * tf.reduce_sum(tf.square(acs), axis=1)
112 |
113 | def nn_constructor(self, model_init_cfg, misc=None):
114 | model = get_required_argument(model_init_cfg, "model_class", "Must provide model class")(DotMap(
115 | name="model", num_networks=get_required_argument(model_init_cfg, "num_nets", "Must provide ensemble size"),
116 | sess=self.SESS, load_model=model_init_cfg.get("load_model", False),
117 | model_dir=model_init_cfg.get("model_dir", None),
118 | misc=misc
119 | ))
120 | if not model_init_cfg.get("load_model", False):
121 | model.add(FC(200, input_dim=self.MODEL_IN, activation="swish", weight_decay=0.000025))
122 | model.add(FC(200, activation="swish", weight_decay=0.00005))
123 | model.add(FC(200, activation="swish", weight_decay=0.000075))
124 | model.add(FC(200, activation="swish", weight_decay=0.000075))
125 | model.add(FC(self.MODEL_OUT, weight_decay=0.0001))
126 | model.finalize(tf.train.AdamOptimizer, {"learning_rate": 0.001})
127 | return model
128 |
129 | def gp_constructor(self, model_init_cfg):
130 | model = get_required_argument(model_init_cfg, "model_class", "Must provide model class")(DotMap(
131 | name="model",
132 | kernel_class=get_required_argument(model_init_cfg, "kernel_class", "Must provide kernel class"),
133 | kernel_args=model_init_cfg.get("kernel_args", {}),
134 | num_inducing_points=get_required_argument(
135 | model_init_cfg, "num_inducing_points", "Must provide number of inducing points."
136 | ),
137 | sess=self.SESS
138 | ))
139 | return model
140 |
141 |
142 | CONFIG_MODULE = WalkerConfigModule
143 |
--------------------------------------------------------------------------------
/dmbrl/config/halfcheetah.py:
--------------------------------------------------------------------------------
1 | from __future__ import division
2 | from __future__ import print_function
3 | from __future__ import absolute_import
4 |
5 | import numpy as np
6 | import tensorflow as tf
7 | from dotmap import DotMap
8 | import gym
9 |
10 | from dmbrl.misc.DotmapUtils import get_required_argument
11 | from dmbrl.modeling.layers import FC
12 | import dmbrl.env
13 |
14 |
15 | class HalfCheetahConfigModule:
16 | ENV_NAME = "MBRLHalfCheetah-v0"
17 | TASK_HORIZON = 1000
18 | NTRAIN_ITERS = 300
19 | NROLLOUTS_PER_ITER = 1
20 | PLAN_HOR = 30
21 | INIT_VAR = 0.25
22 | MODEL_IN, MODEL_OUT = 24, 18 # obs - > 18, action 6
23 | GP_NINDUCING_POINTS = 300
24 |
25 | def __init__(self):
26 | self.ENV = gym.make(self.ENV_NAME)
27 | cfg = tf.ConfigProto()
28 | cfg.gpu_options.allow_growth = True
29 | self.SESS = tf.Session(config=cfg)
30 | self.NN_TRAIN_CFG = {"epochs": 5}
31 | self.OPT_CFG = {
32 | "Random": {
33 | "popsize": 2500
34 | },
35 | "GBPRandom": {
36 | "popsize": 2500
37 | },
38 | "GBPCEM": {
39 | "popsize": 500,
40 | "num_elites": 50,
41 | "max_iters": 5,
42 | "alpha": 0.1
43 | },
44 | "CEM": {
45 | "popsize": 500,
46 | "num_elites": 50,
47 | "max_iters": 5,
48 | "alpha": 0.1
49 | },
50 | "POPLIN-P": {
51 | "popsize": 500,
52 | "num_elites": 50,
53 | "max_iters": 5,
54 | "alpha": 0.1
55 | },
56 | "POPLIN-A": {
57 | "popsize": 500,
58 | "num_elites": 50,
59 | "max_iters": 5,
60 | "alpha": 0.1
61 | }
62 | }
63 |
64 | @staticmethod
65 | def obs_preproc(obs):
66 | if isinstance(obs, np.ndarray):
67 | return np.concatenate([obs[:, 1:2], np.sin(obs[:, 2:3]), np.cos(obs[:, 2:3]), obs[:, 3:]], axis=1)
68 | else:
69 | return tf.concat([obs[:, 1:2], tf.sin(obs[:, 2:3]), tf.cos(obs[:, 2:3]), obs[:, 3:]], axis=1)
70 |
71 | @staticmethod
72 | def obs_postproc(obs, pred):
73 | if isinstance(obs, np.ndarray):
74 | return np.concatenate([pred[:, :1], obs[:, 1:] + pred[:, 1:]], axis=1)
75 | else:
76 | return tf.concat([pred[:, :1], obs[:, 1:] + pred[:, 1:]], axis=1)
77 |
78 | @staticmethod
79 | def targ_proc(obs, next_obs):
80 | return np.concatenate([next_obs[:, :1], next_obs[:, 1:] - obs[:, 1:]], axis=1)
81 |
82 | @staticmethod
83 | def obs_cost_fn(obs):
84 | return -obs[:, 0]
85 |
86 | @staticmethod
87 | def ac_cost_fn(acs):
88 | if isinstance(acs, np.ndarray):
89 | return 0.1 * np.sum(np.square(acs), axis=1)
90 | else:
91 | return 0.1 * tf.reduce_sum(tf.square(acs), axis=1)
92 |
93 | def nn_constructor(self, model_init_cfg, misc=None):
94 | model = get_required_argument(model_init_cfg, "model_class", "Must provide model class")(DotMap(
95 | name="model", num_networks=get_required_argument(model_init_cfg, "num_nets", "Must provide ensemble size"),
96 | sess=self.SESS, load_model=model_init_cfg.get("load_model", False),
97 | model_dir=model_init_cfg.get("model_dir", None),
98 | misc=misc
99 | ))
100 | if not model_init_cfg.get("load_model", False):
101 | model.add(FC(200, input_dim=self.MODEL_IN, activation="swish", weight_decay=0.000025))
102 | model.add(FC(200, activation="swish", weight_decay=0.00005))
103 | model.add(FC(200, activation="swish", weight_decay=0.000075))
104 | model.add(FC(200, activation="swish", weight_decay=0.000075))
105 | model.add(FC(self.MODEL_OUT, weight_decay=0.0001))
106 | model.finalize(tf.train.AdamOptimizer, {"learning_rate": 0.001})
107 | return model
108 |
109 | def gp_constructor(self, model_init_cfg):
110 | model = get_required_argument(model_init_cfg, "model_class", "Must provide model class")(DotMap(
111 | name="model",
112 | kernel_class=get_required_argument(model_init_cfg, "kernel_class", "Must provide kernel class"),
113 | kernel_args=model_init_cfg.get("kernel_args", {}),
114 | num_inducing_points=get_required_argument(
115 | model_init_cfg, "num_inducing_points", "Must provide number of inducing points."
116 | ),
117 | sess=self.SESS
118 | ))
119 | return model
120 |
121 |
122 | CONFIG_MODULE = HalfCheetahConfigModule
123 |
--------------------------------------------------------------------------------
/dmbrl/config/pusher.py:
--------------------------------------------------------------------------------
1 | from __future__ import division
2 | from __future__ import print_function
3 | from __future__ import absolute_import
4 |
5 | import numpy as np
6 | import tensorflow as tf
7 | from dotmap import DotMap
8 | import gym
9 |
10 | from dmbrl.misc.DotmapUtils import get_required_argument
11 | from dmbrl.modeling.layers import FC
12 | import dmbrl.env
13 |
14 |
15 | class PusherConfigModule:
16 | ENV_NAME = "MBRLPusher-v0"
17 | TASK_HORIZON = 150
18 | NTRAIN_ITERS = 100
19 | NROLLOUTS_PER_ITER = 1
20 | PLAN_HOR = 25
21 | INIT_VAR = 0.25
22 | MODEL_IN, MODEL_OUT = 27, 20
23 | GP_NINDUCING_POINTS = 200
24 |
25 | def __init__(self):
26 | self.ENV = gym.make(self.ENV_NAME)
27 | cfg = tf.ConfigProto()
28 | cfg.gpu_options.allow_growth = True
29 | self.SESS = tf.Session(config=cfg)
30 | self.NN_TRAIN_CFG = {"epochs": 5}
31 | self.OPT_CFG = {
32 | "Random": {
33 | "popsize": 2500
34 | },
35 | "CEM": {
36 | "popsize": 500,
37 | "num_elites": 50,
38 | "max_iters": 5,
39 | "alpha": 0.1
40 | },
41 | "GBPRandom": {
42 | "popsize": 2500
43 | },
44 | "GBPCEM": {
45 | "popsize": 500,
46 | "num_elites": 50,
47 | "max_iters": 5,
48 | "alpha": 0.1
49 | },
50 | "POPLIN-P": {
51 | "popsize": 500,
52 | "num_elites": 50,
53 | "max_iters": 5,
54 | "alpha": 0.1
55 | },
56 | "POPLIN-A": {
57 | "popsize": 500,
58 | "num_elites": 50,
59 | "max_iters": 5,
60 | "alpha": 0.1
61 | }
62 | }
63 |
64 | @staticmethod
65 | def obs_postproc(obs, pred):
66 | return obs + pred
67 |
68 | @staticmethod
69 | def targ_proc(obs, next_obs):
70 | return next_obs - obs
71 |
72 | def obs_cost_fn(self, obs):
73 | to_w, og_w = 0.5, 1.25
74 | tip_pos, obj_pos, goal_pos = obs[:, 14:17], obs[:, 17:20], self.ENV.ac_goal_pos
75 |
76 | if isinstance(obs, np.ndarray):
77 | tip_obj_dist = np.sum(np.abs(tip_pos - obj_pos), axis=1)
78 | obj_goal_dist = np.sum(np.abs(goal_pos - obj_pos), axis=1)
79 | return to_w * tip_obj_dist + og_w * obj_goal_dist
80 | else:
81 | tip_obj_dist = tf.reduce_sum(tf.abs(tip_pos - obj_pos), axis=1)
82 | obj_goal_dist = tf.reduce_sum(tf.abs(goal_pos - obj_pos), axis=1)
83 | return to_w * tip_obj_dist + og_w * obj_goal_dist
84 |
85 | @staticmethod
86 | def ac_cost_fn(acs):
87 | if isinstance(acs, np.ndarray):
88 | return 0.1 * np.sum(np.square(acs), axis=1)
89 | else:
90 | return 0.1 * tf.reduce_sum(tf.square(acs), axis=1)
91 |
92 | def nn_constructor(self, model_init_cfg, misc):
93 | model = get_required_argument(model_init_cfg, "model_class", "Must provide model class")(DotMap(
94 | name="model", num_networks=get_required_argument(model_init_cfg, "num_nets", "Must provide ensemble size"),
95 | sess=self.SESS, load_model=model_init_cfg.get("load_model", False),
96 | model_dir=model_init_cfg.get("model_dir", None),
97 | misc=misc
98 | ))
99 | if not model_init_cfg.get("load_model", False):
100 | model.add(FC(200, input_dim=self.MODEL_IN, activation="swish", weight_decay=0.00025))
101 | model.add(FC(200, activation="swish", weight_decay=0.0005))
102 | model.add(FC(200, activation="swish", weight_decay=0.0005))
103 | model.add(FC(self.MODEL_OUT, weight_decay=0.00075))
104 | model.finalize(tf.train.AdamOptimizer, {"learning_rate": 0.001})
105 | return model
106 |
107 | def gp_constructor(self, model_init_cfg):
108 | model = get_required_argument(model_init_cfg, "model_class", "Must provide model class")(DotMap(
109 | name="model",
110 | kernel_class=get_required_argument(model_init_cfg, "kernel_class", "Must provide kernel class"),
111 | kernel_args=model_init_cfg.get("kernel_args", {}),
112 | num_inducing_points=get_required_argument(
113 | model_init_cfg, "num_inducing_points", "Must provide number of inducing points."
114 | ),
115 | sess=self.SESS
116 | ))
117 | return model
118 |
119 |
120 | CONFIG_MODULE = PusherConfigModule
121 |
--------------------------------------------------------------------------------
/dmbrl/config/reacher.py:
--------------------------------------------------------------------------------
1 | from __future__ import division
2 | from __future__ import print_function
3 | from __future__ import absolute_import
4 |
5 | import numpy as np
6 | import tensorflow as tf
7 | from dotmap import DotMap
8 | import gym
9 |
10 | from dmbrl.misc.DotmapUtils import get_required_argument
11 | from dmbrl.modeling.layers import FC
12 | import dmbrl.env
13 |
14 |
15 | class ReacherConfigModule:
16 | ENV_NAME = "MBRLReacher3D-v0"
17 | TASK_HORIZON = 150
18 | NTRAIN_ITERS = 100
19 | NROLLOUTS_PER_ITER = 1
20 | PLAN_HOR = 25
21 | INIT_VAR = 0.25
22 | MODEL_IN, MODEL_OUT = 24, 17
23 | GP_NINDUCING_POINTS = 200
24 |
25 | def __init__(self):
26 | self.ENV = gym.make(self.ENV_NAME)
27 | self.ENV.reset()
28 | cfg = tf.ConfigProto()
29 | cfg.gpu_options.allow_growth = True
30 | self.SESS = tf.Session(config=cfg)
31 | self.NN_TRAIN_CFG = {"epochs": 5}
32 | self.OPT_CFG = {
33 | "Random": {
34 | "popsize": 2000
35 | },
36 | "CEM": {
37 | "popsize": 400,
38 | "num_elites": 40,
39 | "max_iters": 5,
40 | "alpha": 0.1
41 | },
42 | "GBPRandom": {
43 | "popsize": 2000
44 | },
45 | "GBPCEM": {
46 | "popsize": 400,
47 | "num_elites": 40,
48 | "max_iters": 5,
49 | "alpha": 0.1
50 | },
51 | "POPLIN-P": {
52 | "popsize": 400,
53 | "num_elites": 40,
54 | "max_iters": 5,
55 | "alpha": 0.1
56 | },
57 | "POPLIN-A": {
58 | "popsize": 400,
59 | "num_elites": 40,
60 | "max_iters": 5,
61 | "alpha": 0.1
62 | }
63 | }
64 | self.UPDATE_FNS = [self.update_goal]
65 |
66 | self.goal = tf.Variable(self.ENV.goal, dtype=tf.float32)
67 | self.SESS.run(self.goal.initializer)
68 |
69 | @staticmethod
70 | def obs_postproc(obs, pred):
71 | return obs + pred
72 |
73 | @staticmethod
74 | def targ_proc(obs, next_obs):
75 | return next_obs - obs
76 |
77 | def update_goal(self, sess=None):
78 | if sess is not None:
79 | self.goal.load(self.ENV.goal, sess)
80 |
81 | def obs_cost_fn(self, obs):
82 | if isinstance(obs, np.ndarray):
83 | return np.sum(np.square(ReacherConfigModule.get_ee_pos(obs, are_tensors=False) - self.ENV.goal), axis=1)
84 | else:
85 | return tf.reduce_sum(tf.square(ReacherConfigModule.get_ee_pos(obs, are_tensors=True) - self.goal), axis=1)
86 |
87 | @staticmethod
88 | def ac_cost_fn(acs):
89 | if isinstance(acs, np.ndarray):
90 | return 0.01 * np.sum(np.square(acs), axis=1)
91 | else:
92 | return 0.01 * tf.reduce_sum(tf.square(acs), axis=1)
93 |
94 | def nn_constructor(self, model_init_cfg, misc=None):
95 | model = get_required_argument(model_init_cfg, "model_class", "Must provide model class")(DotMap(
96 | name="model", num_networks=get_required_argument(model_init_cfg, "num_nets", "Must provide ensemble size"),
97 | sess=self.SESS, load_model=model_init_cfg.get("load_model", False),
98 | model_dir=model_init_cfg.get("model_dir", None),
99 | misc=misc
100 | ))
101 | if not model_init_cfg.get("load_model", False):
102 | model.add(FC(200, input_dim=self.MODEL_IN, activation="swish", weight_decay=0.00025))
103 | model.add(FC(200, activation="swish", weight_decay=0.0005))
104 | model.add(FC(200, activation="swish", weight_decay=0.0005))
105 | model.add(FC(200, activation="swish", weight_decay=0.0005))
106 | model.add(FC(self.MODEL_OUT, weight_decay=0.00075))
107 | model.finalize(tf.train.AdamOptimizer, {"learning_rate": 0.00075})
108 | return model
109 |
110 | def gp_constructor(self, model_init_cfg):
111 | model = get_required_argument(model_init_cfg, "model_class", "Must provide model class")(DotMap(
112 | name="model",
113 | kernel_class=get_required_argument(model_init_cfg, "kernel_class", "Must provide kernel class"),
114 | kernel_args=model_init_cfg.get("kernel_args", {}),
115 | num_inducing_points=get_required_argument(
116 | model_init_cfg, "num_inducing_points", "Must provide number of inducing points."
117 | ),
118 | sess=self.SESS
119 | ))
120 | return model
121 |
122 | @staticmethod
123 | def get_ee_pos(states, are_tensors=False):
124 | theta1, theta2, theta3, theta4, theta5, theta6, theta7 = \
125 | states[:, :1], states[:, 1:2], states[:, 2:3], states[:, 3:4], states[:, 4:5], states[:, 5:6], states[:, 6:]
126 | if are_tensors:
127 | rot_axis = tf.concat([tf.cos(theta2) * tf.cos(theta1), tf.cos(theta2) * tf.sin(theta1), -tf.sin(theta2)],
128 | axis=1)
129 | rot_perp_axis = tf.concat([-tf.sin(theta1), tf.cos(theta1), tf.zeros(tf.shape(theta1))], axis=1)
130 | cur_end = tf.concat([
131 | 0.1 * tf.cos(theta1) + 0.4 * tf.cos(theta1) * tf.cos(theta2),
132 | 0.1 * tf.sin(theta1) + 0.4 * tf.sin(theta1) * tf.cos(theta2) - 0.188,
133 | -0.4 * tf.sin(theta2)
134 | ], axis=1)
135 |
136 | for length, hinge, roll in [(0.321, theta4, theta3), (0.16828, theta6, theta5)]:
137 | perp_all_axis = tf.cross(rot_axis, rot_perp_axis)
138 | x = tf.cos(hinge) * rot_axis
139 | y = tf.sin(hinge) * tf.sin(roll) * rot_perp_axis
140 | z = -tf.sin(hinge) * tf.cos(roll) * perp_all_axis
141 | new_rot_axis = x + y + z
142 | new_rot_perp_axis = tf.cross(new_rot_axis, rot_axis)
143 | new_rot_perp_axis = tf.where(tf.less(tf.norm(new_rot_perp_axis, axis=1), 1e-30),
144 | rot_perp_axis, new_rot_perp_axis)
145 | new_rot_perp_axis /= tf.norm(new_rot_perp_axis, axis=1, keepdims=True)
146 | rot_axis, rot_perp_axis, cur_end = new_rot_axis, new_rot_perp_axis, cur_end + length * new_rot_axis
147 | else:
148 | rot_axis = np.concatenate([np.cos(theta2) * np.cos(theta1), np.cos(theta2) * np.sin(theta1), -np.sin(theta2)],
149 | axis=1)
150 | rot_perp_axis = np.concatenate([-np.sin(theta1), np.cos(theta1), np.zeros(theta1.shape)], axis=1)
151 | cur_end = np.concatenate([
152 | 0.1 * np.cos(theta1) + 0.4 * np.cos(theta1) * np.cos(theta2),
153 | 0.1 * np.sin(theta1) + 0.4 * np.sin(theta1) * np.cos(theta2) - 0.188,
154 | -0.4 * np.sin(theta2)
155 | ], axis=1)
156 |
157 | for length, hinge, roll in [(0.321, theta4, theta3), (0.16828, theta6, theta5)]:
158 | perp_all_axis = np.cross(rot_axis, rot_perp_axis)
159 | x = np.cos(hinge) * rot_axis
160 | y = np.sin(hinge) * np.sin(roll) * rot_perp_axis
161 | z = -np.sin(hinge) * np.cos(roll) * perp_all_axis
162 | new_rot_axis = x + y + z
163 | new_rot_perp_axis = np.cross(new_rot_axis, rot_axis)
164 | new_rot_perp_axis[np.linalg.norm(new_rot_perp_axis, axis=1) < 1e-30] = \
165 | rot_perp_axis[np.linalg.norm(new_rot_perp_axis, axis=1) < 1e-30]
166 | new_rot_perp_axis /= np.linalg.norm(new_rot_perp_axis, axis=1, keepdims=True)
167 | rot_axis, rot_perp_axis, cur_end = new_rot_axis, new_rot_perp_axis, cur_end + length * new_rot_axis
168 |
169 | return cur_end
170 |
171 |
172 | CONFIG_MODULE = ReacherConfigModule
173 |
--------------------------------------------------------------------------------
/dmbrl/config/reward_util.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 | from __future__ import division
3 | from __future__ import print_function
4 | import tensorflow as tf
5 |
6 | import numpy as np
7 |
8 | # The value returned by tolerance() at `margin` distance from `bounds` interval.
9 | _DEFAULT_VALUE_AT_MARGIN = 0.1
10 |
11 |
12 | def _sigmoids(x, value_at_1, sigmoid):
13 | """Returns 1 when `x` == 0, between 0 and 1 otherwise.
14 | Args:
15 | x: A scalar or numpy array.
16 | value_at_1: A float between 0 and 1 specifying the output when `x` == 1.
17 | sigmoid: String, choice of sigmoid type.
18 | Returns:
19 | A numpy array with values between 0.0 and 1.0.
20 | Raises:
21 | ValueError: If not 0 < `value_at_1` < 1, except for `linear`, `cosine` and
22 | `quadratic` sigmoids which allow `value_at_1` == 0.
23 | ValueError: If `sigmoid` is of an unknown type.
24 | """
25 | if sigmoid in ('cosine', 'linear', 'quadratic'):
26 | if not 0 <= value_at_1 < 1:
27 | raise ValueError('`value_at_1` must be nonnegative and smaller than 1, '
28 | 'got {}.'.format(value_at_1))
29 | else:
30 | if not 0 < value_at_1 < 1:
31 | raise ValueError('`value_at_1` must be strictly between 0 and 1, '
32 | 'got {}.'.format(value_at_1))
33 |
34 | if sigmoid == 'gaussian':
35 | scale = tf.sqrt(-2 * tf.log(value_at_1))
36 | return tf.exp(-0.5 * (x * scale) ** 2)
37 |
38 | elif sigmoid == 'hyperbolic':
39 | scale = tf.acosh(1 / value_at_1)
40 | return 1 / tf.cosh(x * scale)
41 |
42 | elif sigmoid == 'long_tail':
43 | scale = tf.sqrt(1 / value_at_1 - 1)
44 | return 1 / ((x * scale) ** 2 + 1)
45 |
46 | elif sigmoid == 'cosine':
47 | scale = tf.acos(2 * value_at_1 - 1) / np.pi
48 | scaled_x = x * scale
49 | return tf.where(abs(scaled_x) < 1,
50 | (1 + tf.cos(np.pi * scaled_x)) / 2, 0.0 * scaled_x)
51 |
52 | elif sigmoid == 'linear':
53 | scale = 1.0 - value_at_1
54 | scaled_x = x * scale
55 | return tf.where(abs(scaled_x) < 1, 1 - scaled_x, 0.0 * scaled_x)
56 |
57 | elif sigmoid == 'quadratic':
58 | scale = tf.sqrt(1.0 - value_at_1)
59 | scaled_x = x * scale
60 | return tf.where(abs(scaled_x) < 1, 1 - scaled_x ** 2, 0.0 * scaled_x)
61 |
62 | elif sigmoid == 'tanh_squared':
63 | scale = tf.arctanh(tf.sqrt(1 - value_at_1))
64 | return 1 - tf.tanh(x * scale) ** 2
65 |
66 | else:
67 | raise ValueError('Unknown sigmoid type {!r}.'.format(sigmoid))
68 |
69 |
70 | def tolerance(x, bounds=(0.0, 0.0), margin=0.0, sigmoid='gaussian',
71 | value_at_margin=_DEFAULT_VALUE_AT_MARGIN):
72 | """Returns 1 when `x` falls inside the bounds, between 0 and 1 otherwise.
73 | Args:
74 | x: A scalar or numpy array.
75 | bounds: A tuple of floats specifying inclusive `(lower, upper)` bounds for
76 | the target interval. These can be infinite if the interval is unbounded
77 | at one or both ends, or they can be equal to one another if the target
78 | value is exact.
79 | margin: Float. Parameter that controls how steeply the output decreases as
80 | `x` moves out-of-bounds.
81 | * If `margin == 0` then the output will be 0 for all values of `x`
82 | outside of `bounds`.
83 | * If `margin > 0` then the output will decrease sigmoidally with
84 | increasing distance from the nearest bound.
85 | sigmoid: String, choice of sigmoid type. Valid values are: 'gaussian',
86 | 'linear', 'hyperbolic', 'long_tail', 'cosine', 'tanh_squared'.
87 | value_at_margin: A float between 0 and 1 specifying the output value when
88 | the distance from `x` to the nearest bound is equal to `margin`. Ignored
89 | if `margin == 0`.
90 | Returns:
91 | A float or numpy array with values between 0.0 and 1.0.
92 | Raises:
93 | ValueError: If `bounds[0] > bounds[1]`.
94 | ValueError: If `margin` is negative.
95 | """
96 | lower, upper = bounds
97 | if lower > upper:
98 | raise ValueError('Lower bound must be <= upper bound.')
99 | if margin < 0:
100 | raise ValueError('`margin` must be non-negative.')
101 |
102 | in_bounds = tf.logical_and(lower <= x, x <= upper)
103 | if margin == 0:
104 | value = tf.where(in_bounds, 1.0, 0.0)
105 | else:
106 | d = tf.where(x < lower, lower - x, x - upper) / margin
107 | value = tf.where(in_bounds,
108 | 1.0 + d * 0.0,
109 | _sigmoids(d, value_at_margin, sigmoid))
110 |
111 | return value
112 |
--------------------------------------------------------------------------------
/dmbrl/config/template.py:
--------------------------------------------------------------------------------
1 | from __future__ import division
2 | from __future__ import print_function
3 | from __future__ import absolute_import
4 |
5 | import numpy as np
6 | import tensorflow as tf
7 | from dotmap import DotMap
8 | import gym
9 |
10 | from dmbrl.misc.DotmapUtils import get_required_argument
11 | from dmbrl.modeling.layers import FC
12 |
13 |
14 | class EnvConfigModule:
15 | ENV_NAME = None
16 | TASK_HORIZON = None
17 | NTRAIN_ITERS = None
18 | NROLLOUTS_PER_ITER = None
19 | PLAN_HOR = None
20 |
21 | def __init__(self):
22 | self.ENV = gym.make(self.ENV_NAME)
23 | cfg = tf.ConfigProto()
24 | cfg.gpu_options.allow_growth = True
25 | self.SESS = tf.Session(config=cfg)
26 | self.NN_TRAIN_CFG = {"epochs": None}
27 | self.OPT_CFG = {
28 | "Random": {
29 | "popsize": None
30 | },
31 | "CEM": {
32 | "popsize": None,
33 | "num_elites": None,
34 | "max_iters": None,
35 | "alpha": None
36 | }
37 | }
38 | self.UPDATE_FNS = []
39 |
40 | # Fill in other things to be done here.
41 |
42 | @staticmethod
43 | def obs_preproc(obs):
44 | # Note: Must be able to process both NumPy and Tensorflow arrays.
45 | if isinstance(obs, np.ndarray):
46 | raise NotImplementedError()
47 | else:
48 | raise NotImplementedError
49 |
50 | @staticmethod
51 | def obs_postproc(obs, pred):
52 | # Note: Must be able to process both NumPy and Tensorflow arrays.
53 | if isinstance(obs, np.ndarray):
54 | raise NotImplementedError()
55 | else:
56 | raise NotImplementedError()
57 |
58 | @staticmethod
59 | def targ_proc(obs, next_obs):
60 | # Note: Only needs to process NumPy arrays.
61 | raise NotImplementedError()
62 |
63 | @staticmethod
64 | def obs_cost_fn(obs):
65 | # Note: Must be able to process both NumPy and Tensorflow arrays.
66 | if isinstance(obs, np.ndarray):
67 | raise NotImplementedError()
68 | else:
69 | raise NotImplementedError()
70 |
71 | @staticmethod
72 | def ac_cost_fn(acs):
73 | # Note: Must be able to process both NumPy and Tensorflow arrays.
74 | if isinstance(acs, np.ndarray):
75 | raise NotImplementedError()
76 | else:
77 | raise NotImplementedError()
78 |
79 | def nn_constructor(self, model_init_cfg):
80 | model = get_required_argument(model_init_cfg, "model_class", "Must provide model class")(DotMap(
81 | name="model", num_networks=get_required_argument(model_init_cfg, "num_nets", "Must provide ensemble size"),
82 | sess=self.SESS
83 | ))
84 | # Construct model below. For example:
85 | # model.add(FC(*args))
86 | # ...
87 | # model.finalize(tf.train.AdamOptimizer, {"learning_rate": 0.001})
88 | return model
89 |
90 |
91 | CONFIG_MODULE = EnvConfigModule
92 |
93 |
--------------------------------------------------------------------------------
/dmbrl/config/view_humanoid.py:
--------------------------------------------------------------------------------
1 | if __name__ == '__main__':
2 | '''
3 | from dm_control import suite
4 | from dm_control import viewer
5 | import numpy as np
6 |
7 | test_env = suite.load(domain_name="humanoid", task_name="stand")
8 | action_spec = test_env.action_spec()
9 |
10 | def initialize_episode(physics):
11 | with physics.reset_context():
12 | physics.data.qpos[:] = 0.0
13 | physics.data.qpos[2] = 1.33
14 | physics.data.qvel[:] = 0.0
15 | print(physics.head_height())
16 | print(physics.head_height())
17 | print(physics.head_height())
18 | test_env.task.initialize_episode = initialize_episode
19 |
20 | # Define a uniform random policy.
21 | def random_policy(time_step):
22 | del time_step # Unused.
23 | return np.random.uniform(low=action_spec.minimum,
24 | high=action_spec.maximum,
25 | size=action_spec.shape)
26 |
27 | # Launch the viewer application.
28 | viewer.launch(test_env, policy=random_policy)
29 | '''
30 | from dm_control import suite
31 | import matplotlib.pyplot as plt
32 | import numpy as np
33 |
34 | max_frame = 90
35 |
36 | width = 480
37 | height = 480
38 | video = np.zeros((90, height, 2 * width, 3), dtype=np.uint8)
39 |
40 | # Load one task:
41 | env = suite.load(domain_name="humanoid", task_name="walk")
42 |
43 | # Step through an episode and print out reward, discount and observation.
44 | action_spec = env.action_spec()
45 | time_step = env.reset()
46 |
47 | with env.physics.reset_context():
48 | env.physics.data.qpos[:] = 0.0
49 | env.physics.data.qpos[2] = 1.33
50 | env.physics.data.qvel[:] = 0.0
51 | head_pos = []
52 | while not time_step.last():
53 | for i in range(max_frame):
54 | action = np.random.uniform(action_spec.minimum,
55 | action_spec.maximum,
56 | size=action_spec.shape)
57 | time_step = env.step(action)
58 |
59 | head_pos.append(env.physics.head_height())
60 | video[i] = np.hstack([env.physics.render(height, width, camera_id=0),
61 | env.physics.render(height, width, camera_id=1)])
62 | # print(time_step.reward, time_step.discount, time_step.observation)
63 | for i in range(max_frame):
64 | print(head_pos[i])
65 | img = plt.imshow(video[i])
66 | plt.pause(1) # Need min display time > 0.0.
67 | plt.draw()
68 |
--------------------------------------------------------------------------------
/dmbrl/controllers/Controller.py:
--------------------------------------------------------------------------------
1 | from __future__ import division
2 | from __future__ import print_function
3 | from __future__ import absolute_import
4 |
5 |
6 | class Controller:
7 | def __init__(self, *args, **kwargs):
8 | """Creates class instance.
9 | """
10 | self._policy_network = None
11 | pass
12 |
13 | def train(self, obs_trajs, acs_trajs, rews_trajs):
14 | """Trains this controller using lists of trajectories.
15 | """
16 | raise NotImplementedError("Must be implemented in subclass.")
17 |
18 | def reset(self):
19 | """Resets this controller.
20 | """
21 | raise NotImplementedError("Must be implemented in subclass.")
22 |
23 | def act(self, obs, t, get_pred_cost=False):
24 | """Performs an action.
25 | """
26 | raise NotImplementedError("Must be implemented in subclass.")
27 |
28 | def dump_logs(self, primary_logdir, iter_logdir):
29 | """Dumps logs into primary log directory and per-train iteration log directory.
30 | """
31 | raise NotImplementedError("Must be implemented in subclass.")
32 |
33 | def get_policy_network(self):
34 | return self._policy_network
35 |
36 | def train_policy_network(self):
37 | return False
38 |
--------------------------------------------------------------------------------
/dmbrl/controllers/__init__.py:
--------------------------------------------------------------------------------
1 | from .MPC import MPC
2 |
--------------------------------------------------------------------------------
/dmbrl/env/__init__.py:
--------------------------------------------------------------------------------
1 | from gym.envs.registration import register
2 |
3 |
4 | register(
5 | id='MBRLCartpole-v0',
6 | entry_point='dmbrl.env.cartpole:CartpoleEnv'
7 | )
8 |
9 |
10 | register(
11 | id='MBRLReacher3D-v0',
12 | entry_point='dmbrl.env.reacher:Reacher3DEnv'
13 | )
14 |
15 |
16 | register(
17 | id='MBRLPusher-v0',
18 | entry_point='dmbrl.env.pusher:PusherEnv'
19 | )
20 |
21 |
22 | register(
23 | id='MBRLHalfCheetah-v0',
24 | entry_point='dmbrl.env.half_cheetah:HalfCheetahEnv'
25 | )
26 |
--------------------------------------------------------------------------------
/dmbrl/env/assets/cartpole.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
--------------------------------------------------------------------------------
/dmbrl/env/assets/half_cheetah.xml:
--------------------------------------------------------------------------------
1 |
35 |
36 |
37 |
38 |
39 |
40 |
41 |
42 |
43 |
44 |
45 |
46 |
47 |
48 |
49 |
50 |
51 |
52 |
53 |
54 |
55 |
56 |
57 |
58 |
59 |
60 |
61 |
62 |
63 |
64 |
65 |
66 |
67 |
68 |
69 |
70 |
71 |
72 |
73 |
74 |
75 |
76 |
77 |
78 |
79 |
80 |
81 |
82 |
83 |
84 |
85 |
86 |
87 |
88 |
89 |
90 |
91 |
92 |
93 |
94 |
95 |
96 |
--------------------------------------------------------------------------------
/dmbrl/env/assets/pusher.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 |
42 |
43 |
44 |
45 |
46 |
47 |
48 |
49 |
50 |
51 |
52 |
53 |
54 |
55 |
56 |
57 |
58 |
59 |
60 |
61 |
62 |
63 |
64 |
65 |
66 |
67 |
68 |
69 |
70 |
71 |
72 |
73 |
74 |
75 |
76 |
77 |
78 |
79 |
80 |
81 |
82 |
83 |
84 |
85 |
86 |
87 |
88 |
89 |
90 |
91 |
92 |
93 |
94 |
95 |
96 |
97 |
98 |
99 |
100 |
101 |
102 |
--------------------------------------------------------------------------------
/dmbrl/env/cartpole.py:
--------------------------------------------------------------------------------
1 | from __future__ import division
2 | from __future__ import print_function
3 | from __future__ import absolute_import
4 |
5 | import os
6 |
7 | import numpy as np
8 | from gym import utils
9 | from gym.envs.mujoco import mujoco_env
10 |
11 |
12 | class CartpoleEnv(mujoco_env.MujocoEnv, utils.EzPickle):
13 | PENDULUM_LENGTH = 0.6
14 |
15 | def __init__(self):
16 | utils.EzPickle.__init__(self)
17 | dir_path = os.path.dirname(os.path.realpath(__file__))
18 | mujoco_env.MujocoEnv.__init__(self, '%s/assets/cartpole.xml' % dir_path, 2)
19 |
20 | def _step(self, a):
21 | self.do_simulation(a, self.frame_skip)
22 | ob = self._get_obs()
23 |
24 | cost_lscale = CartpoleEnv.PENDULUM_LENGTH
25 | reward = np.exp(
26 | -np.sum(np.square(self._get_ee_pos(ob) - np.array([0.0, CartpoleEnv.PENDULUM_LENGTH]))) / (cost_lscale ** 2)
27 | )
28 | reward -= 0.01 * np.sum(np.square(a))
29 |
30 | done = False
31 | return ob, reward, done, {}
32 |
33 | def reset_model(self):
34 | qpos = self.init_qpos + np.random.normal(0, 0.1, np.shape(self.init_qpos))
35 | qvel = self.init_qvel + np.random.normal(0, 0.1, np.shape(self.init_qvel))
36 | self.set_state(qpos, qvel)
37 | return self._get_obs()
38 |
39 | def _get_obs(self):
40 | return np.concatenate([self.model.data.qpos, self.model.data.qvel]).ravel()
41 |
42 | @staticmethod
43 | def _get_ee_pos(x):
44 | x0, theta = x[0], x[1]
45 | return np.array([
46 | x0 - CartpoleEnv.PENDULUM_LENGTH * np.sin(theta),
47 | -CartpoleEnv.PENDULUM_LENGTH * np.cos(theta)
48 | ])
49 |
50 | def viewer_setup(self):
51 | v = self.viewer
52 | v.cam.trackbodyid = 0
53 | v.cam.distance = v.model.stat.extent
54 |
--------------------------------------------------------------------------------
/dmbrl/env/half_cheetah.py:
--------------------------------------------------------------------------------
1 | from __future__ import division
2 | from __future__ import print_function
3 | from __future__ import absolute_import
4 |
5 | import os
6 |
7 | import numpy as np
8 | from gym import utils
9 | from gym.envs.mujoco import mujoco_env
10 |
11 |
12 | class HalfCheetahEnv(mujoco_env.MujocoEnv, utils.EzPickle):
13 |
14 | def __init__(self):
15 | self.prev_qpos = None
16 | dir_path = os.path.dirname(os.path.realpath(__file__))
17 | mujoco_env.MujocoEnv.__init__(self, '%s/assets/half_cheetah.xml' % dir_path, 5)
18 | utils.EzPickle.__init__(self)
19 |
20 | def _step(self, action):
21 | self.prev_qpos = np.copy(self.model.data.qpos.flat)
22 | self.do_simulation(action, self.frame_skip)
23 | ob = self._get_obs()
24 |
25 | reward_ctrl = -0.1 * np.square(action).sum()
26 | reward_run = ob[0] - 0.0 * np.square(ob[2])
27 | reward = reward_run + reward_ctrl
28 |
29 | done = False
30 | return ob, reward, done, {}
31 |
32 | def _get_obs(self):
33 | return np.concatenate([
34 | (self.model.data.qpos.flat[:1] - self.prev_qpos[:1]) / self.dt,
35 | self.model.data.qpos.flat[1:],
36 | self.model.data.qvel.flat,
37 | ])
38 |
39 | def reset_model(self):
40 | qpos = self.init_qpos + np.random.normal(loc=0, scale=0.001, size=self.model.nq)
41 | qvel = self.init_qvel + np.random.normal(loc=0, scale=0.001, size=self.model.nv)
42 | self.set_state(qpos, qvel)
43 | self.prev_qpos = np.copy(self.model.data.qpos.flat)
44 | return self._get_obs()
45 |
46 | def viewer_setup(self):
47 | self.viewer.cam.distance = self.model.stat.extent * 0.25
48 | self.viewer.cam.elevation = -55
49 |
--------------------------------------------------------------------------------
/dmbrl/env/pusher.py:
--------------------------------------------------------------------------------
1 | from __future__ import division
2 | from __future__ import print_function
3 | from __future__ import absolute_import
4 |
5 | import os
6 |
7 | import numpy as np
8 | from gym import utils
9 | from gym.envs.mujoco import mujoco_env
10 |
11 |
12 | class PusherEnv(mujoco_env.MujocoEnv, utils.EzPickle):
13 | def __init__(self):
14 | dir_path = os.path.dirname(os.path.realpath(__file__))
15 | mujoco_env.MujocoEnv.__init__(self, '%s/assets/pusher.xml' % dir_path, 4)
16 | utils.EzPickle.__init__(self)
17 | self.reset_model()
18 |
19 | def _step(self, a):
20 | obj_pos = self.get_body_com("object"),
21 | vec_1 = obj_pos - self.get_body_com("tips_arm")
22 | vec_2 = obj_pos - self.get_body_com("goal")
23 |
24 | reward_near = -np.sum(np.abs(vec_1))
25 | reward_dist = -np.sum(np.abs(vec_2))
26 | reward_ctrl = -np.square(a).sum()
27 | reward = 1.25 * reward_dist + 0.1 * reward_ctrl + 0.5 * reward_near
28 |
29 | self.do_simulation(a, self.frame_skip)
30 | ob = self._get_obs()
31 | done = False
32 | return ob, reward, done, {}
33 |
34 | def viewer_setup(self):
35 | self.viewer.cam.trackbodyid = -1
36 | self.viewer.cam.distance = 4.0
37 |
38 | def reset_model(self):
39 | qpos = self.init_qpos
40 |
41 | self.goal_pos = np.asarray([0, 0])
42 | self.cylinder_pos = np.array([-0.25, 0.15]) + np.random.normal(0, 0.025, [2])
43 |
44 | qpos[-4:-2] = self.cylinder_pos
45 | qpos[-2:] = self.goal_pos
46 | qvel = self.init_qvel + self.np_random.uniform(low=-0.005,
47 | high=0.005, size=self.model.nv)
48 | qvel[-4:] = 0
49 | self.set_state(qpos, qvel)
50 | self.ac_goal_pos = self.get_body_com("goal")
51 |
52 | return self._get_obs()
53 |
54 | def _get_obs(self):
55 | return np.concatenate([
56 | self.model.data.qpos.flat[:7],
57 | self.model.data.qvel.flat[:7],
58 | self.get_body_com("tips_arm"),
59 | self.get_body_com("object"),
60 | ])
61 |
--------------------------------------------------------------------------------
/dmbrl/env/reacher.py:
--------------------------------------------------------------------------------
1 | from __future__ import division
2 | from __future__ import print_function
3 | from __future__ import absolute_import
4 |
5 | import os
6 |
7 | import numpy as np
8 | from gym import utils
9 | from gym.envs.mujoco import mujoco_env
10 |
11 |
12 | class Reacher3DEnv(mujoco_env.MujocoEnv, utils.EzPickle):
13 | def __init__(self):
14 | self.viewer = None
15 | utils.EzPickle.__init__(self)
16 | dir_path = os.path.dirname(os.path.realpath(__file__))
17 | self.goal = np.zeros(3)
18 | mujoco_env.MujocoEnv.__init__(self, os.path.join(dir_path, 'assets/reacher3d.xml'), 2)
19 |
20 | def _step(self, a):
21 | self.do_simulation(a, self.frame_skip)
22 | ob = self._get_obs()
23 | reward = -np.sum(np.square(self.get_EE_pos(ob[None]) - self.goal))
24 | reward -= 0.01 * np.square(a).sum()
25 | done = False
26 | return ob, reward, done, dict(reward_dist=0, reward_ctrl=0)
27 |
28 | def viewer_setup(self):
29 | self.viewer.cam.trackbodyid = 1
30 | self.viewer.cam.distance = 2.5
31 | self.viewer.cam.elevation = -30
32 | self.viewer.cam.azimuth = 270
33 |
34 | def reset_model(self):
35 | qpos, qvel = np.copy(self.init_qpos), np.copy(self.init_qvel)
36 | qpos[-3:] += np.random.normal(loc=0, scale=0.1, size=[3])
37 | qvel[-3:] = 0
38 | self.goal = qpos[-3:]
39 | self.set_state(qpos, qvel)
40 | return self._get_obs()
41 |
42 | def _get_obs(self):
43 | return np.concatenate([
44 | self.model.data.qpos.flat,
45 | self.model.data.qvel.flat[:-3],
46 | ])
47 |
48 | def get_EE_pos(self, states):
49 | theta1, theta2, theta3, theta4, theta5, theta6, theta7 = \
50 | states[:, :1], states[:, 1:2], states[:, 2:3], states[:, 3:4], states[:, 4:5], states[:, 5:6], states[:, 6:]
51 |
52 | rot_axis = np.concatenate([np.cos(theta2) * np.cos(theta1), np.cos(theta2) * np.sin(theta1), -np.sin(theta2)],
53 | axis=1)
54 | rot_perp_axis = np.concatenate([-np.sin(theta1), np.cos(theta1), np.zeros(theta1.shape)], axis=1)
55 | cur_end = np.concatenate([
56 | 0.1 * np.cos(theta1) + 0.4 * np.cos(theta1) * np.cos(theta2),
57 | 0.1 * np.sin(theta1) + 0.4 * np.sin(theta1) * np.cos(theta2) - 0.188,
58 | -0.4 * np.sin(theta2)
59 | ], axis=1)
60 |
61 | for length, hinge, roll in [(0.321, theta4, theta3), (0.16828, theta6, theta5)]:
62 | perp_all_axis = np.cross(rot_axis, rot_perp_axis)
63 | x = np.cos(hinge) * rot_axis
64 | y = np.sin(hinge) * np.sin(roll) * rot_perp_axis
65 | z = -np.sin(hinge) * np.cos(roll) * perp_all_axis
66 | new_rot_axis = x + y + z
67 | new_rot_perp_axis = np.cross(new_rot_axis, rot_axis)
68 | new_rot_perp_axis[np.linalg.norm(new_rot_perp_axis, axis=1) < 1e-30] = \
69 | rot_perp_axis[np.linalg.norm(new_rot_perp_axis, axis=1) < 1e-30]
70 | new_rot_perp_axis /= np.linalg.norm(new_rot_perp_axis, axis=1, keepdims=True)
71 | rot_axis, rot_perp_axis, cur_end = new_rot_axis, new_rot_perp_axis, cur_end + length * new_rot_axis
72 |
73 | return cur_end
74 |
75 |
--------------------------------------------------------------------------------
/dmbrl/misc/Agent.py:
--------------------------------------------------------------------------------
1 | from __future__ import division
2 | from __future__ import print_function
3 | from __future__ import absolute_import
4 |
5 | import numpy as np
6 | from gym.monitoring import VideoRecorder
7 | from dotmap import DotMap
8 | from dmbrl.misc import logger
9 |
10 | import time
11 |
12 |
13 | class Agent:
14 | """An general class for RL agents.
15 | """
16 |
17 | def __init__(self, params):
18 | """Initializes an agent.
19 |
20 | Arguments:
21 | params: (DotMap) A DotMap of agent parameters.
22 | .env: (OpenAI gym environment) The environment for this agent.
23 | .noisy_actions: (bool) Indicates whether random Gaussian noise will
24 | be added to the actions of this agent.
25 | .noise_stddev: (float) The standard deviation to be used for the
26 | action noise if params.noisy_actions is True.
27 | """
28 | self.env = params.env
29 |
30 | # load the imitation data if needed
31 | if hasattr(self.env, '_expert_data_loaded') and \
32 | (not self.env._expert_data_loaded):
33 | self.env.load_expert_data(
34 | params.params.misc.ctrl_cfg.il_cfg.expert_amc_dir
35 | )
36 |
37 | self.noise_stddev = params.noise_stddev if params.get("noisy_actions", False) else None
38 |
39 | if isinstance(self.env, DotMap):
40 | raise ValueError("Environment must be provided to the agent at initialization.")
41 | if (not isinstance(self.noise_stddev, float)) and params.get("noisy_actions", False):
42 | raise ValueError("Must provide standard deviation for noise for noisy actions.")
43 |
44 | if self.noise_stddev is not None:
45 | self.dU = self.env.action_space.shape[0]
46 | self._debug = 1
47 |
48 | def sample(self, horizon, policy, record_fname=None, test_policy=False, average=False):
49 | """Samples a rollout from the agent.
50 |
51 | Arguments:
52 | horizon: (int) The length of the rollout to generate from the agent.
53 | policy: (policy) The policy that the agent will use for actions.
54 | record_fname: (str/None) The name of the file to which a recording of the rollout
55 | will be saved. If None, the rollout will not be recorded.
56 |
57 | Returns: (dict) A dictionary containing data from the rollout.
58 | The keys of the dictionary are 'obs', 'ac', and 'reward_sum'.
59 | """
60 | if test_policy:
61 | logger.info('Testing the policy')
62 | video_record = record_fname is not None
63 | recorder = None if not video_record else VideoRecorder(self.env, record_fname)
64 |
65 | times, rewards = [], []
66 | O, A, reward_sum, done = [self.env.reset()], [], 0, False
67 | self._debug += 1
68 |
69 | policy.reset()
70 | # for t in range(20):
71 | for t in range(horizon):
72 | if hasattr(self.env, 'render_imitation'):
73 | self.env.render_imitation()
74 | if t % 50 == 10 and t > 1:
75 | logger.info('Current timesteps: %d / %d, average time: %.5f'
76 | % (t, horizon, np.mean(times)))
77 | if video_record:
78 | recorder.capture_frame()
79 | start = time.time()
80 | if test_policy:
81 | A.append(policy.act(O[t], t, test_policy=test_policy, average=average))
82 | else:
83 | A.append(policy.act(O[t], t))
84 | times.append(time.time() - start)
85 |
86 | if self.noise_stddev is None:
87 | obs, reward, done, info = self.env.step(A[t])
88 | else:
89 | action = A[t] + np.random.normal(loc=0, scale=self.noise_stddev,
90 | size=[self.dU])
91 | action = np.minimum(np.maximum(action,
92 | self.env.action_space.low),
93 | self.env.action_space.high)
94 | obs, reward, done, info = self.env.step(action)
95 | O.append(obs)
96 | reward_sum += reward
97 | rewards.append(reward)
98 | if done:
99 | break
100 |
101 | if video_record:
102 | recorder.capture_frame()
103 | recorder.close()
104 |
105 | logger.info("Average action selection time: %.4f" % np.mean(times))
106 | logger.info("Rollout length: %d" % len(A))
107 |
108 | return {
109 | "obs": np.array(O),
110 | "ac": np.array(A),
111 | "reward_sum": reward_sum,
112 | "rewards": np.array(rewards),
113 | }
114 |
--------------------------------------------------------------------------------
/dmbrl/misc/DotmapUtils.py:
--------------------------------------------------------------------------------
1 | from __future__ import division
2 | from __future__ import print_function
3 | from __future__ import absolute_import
4 |
5 |
6 | def get_required_argument(dotmap, key, message, default=None):
7 | val = dotmap.get(key, default)
8 | if val is default:
9 | raise ValueError(message)
10 | return val
11 |
--------------------------------------------------------------------------------
/dmbrl/misc/MBExp.py:
--------------------------------------------------------------------------------
1 | from __future__ import division
2 | from __future__ import print_function
3 | from __future__ import absolute_import
4 |
5 | import os
6 | from time import localtime, strftime
7 |
8 | from scipy.io import savemat
9 | from dotmap import DotMap
10 |
11 | from dmbrl.misc.DotmapUtils import get_required_argument
12 | from dmbrl.misc.Agent import Agent
13 | from dmbrl.misc import logger
14 | import copy
15 | import numpy as np
16 |
17 |
18 | class MBExperiment:
19 |
20 | def __init__(self, params):
21 | """Initializes class instance.
22 |
23 | Argument:
24 | params (DotMap): A DotMap containing the following:
25 | .sim_cfg:
26 | .env (gym.env): Environment for this experiment
27 | .task_hor (int): Task horizon
28 | .stochastic (bool): (optional) If True, agent adds noise to its actions.
29 | Must provide noise_std (see below). Defaults to False.
30 | .noise_std (float): for stochastic agents, noise of the form N(0, noise_std^2I)
31 | will be added.
32 |
33 | .exp_cfg:
34 | .ntrain_iters (int): Number of training iterations to be performed.
35 | .nrollouts_per_iter (int): (optional) Number of rollouts done between training
36 | iterations. Defaults to 1.
37 | .ninit_rollouts (int): (optional) Number of initial rollouts. Defaults to 1.
38 | .policy (controller): Policy that will be trained.
39 |
40 | .log_cfg:
41 | .logdir (str): Parent of directory path where experiment data will be saved.
42 | Experiment will be saved in logdir/
43 | .nrecord (int): (optional) Number of rollouts to record for every iteration.
44 | Defaults to 0.
45 | .neval (int): (optional) Number of rollouts for performance evaluation.
46 | Defaults to 1.
47 | """
48 | self.env = get_required_argument(params.sim_cfg, "env", "Must provide environment.")
49 | self.task_hor = get_required_argument(params.sim_cfg, "task_hor", "Must provide task horizon.")
50 | self._params = params
51 | params.sim_cfg.misc = copy.copy(params)
52 | if params.sim_cfg.get("stochastic", False):
53 | self.agent = Agent(DotMap(
54 | env=self.env, noisy_actions=True,
55 | noise_stddev=get_required_argument(
56 | params.sim_cfg,
57 | "noise_std",
58 | "Must provide noise standard deviation in the case of a stochastic environment."
59 | ),
60 | params=params
61 | ))
62 | else:
63 | self.agent = Agent(DotMap(env=self.env, noisy_actions=False, params=params))
64 |
65 | self.ntrain_iters = get_required_argument(
66 | params.exp_cfg, "ntrain_iters", "Must provide number of training iterations."
67 | )
68 | self.nrollouts_per_iter = params.exp_cfg.get("nrollouts_per_iter", 1)
69 | self.ninit_rollouts = params.exp_cfg.get("ninit_rollouts", 1)
70 | self.policy = get_required_argument(params.exp_cfg, "policy", "Must provide a policy.")
71 |
72 | self.logdir = os.path.join(
73 | get_required_argument(params.log_cfg, "logdir", "Must provide log parent directory."),
74 | strftime("%Y-%m-%d--%H:%M:%S", localtime())
75 | )
76 | logger.set_file_handler(path=self.logdir)
77 | logger.info('Starting the experiments')
78 | self.nrecord = params.log_cfg.get("nrecord", 0)
79 | self.neval = params.log_cfg.get("neval", 1)
80 |
81 | def run_experiment(self):
82 | """Perform experiment.
83 | """
84 | os.makedirs(self.logdir, exist_ok=True)
85 |
86 | traj_obs, traj_acs, traj_rets, traj_rews = [], [], [], []
87 | test_traj_obs, test_traj_acs, test_traj_rets = [], [], []
88 | episode_iter_id = []
89 |
90 | # Perform initial rollouts
91 | samples = []
92 | needed_num_steps = self.ninit_rollouts * self.task_hor
93 | finished_num_steps = 0
94 | """
95 | # TODO DEBUG
96 | needed_num_steps = 64
97 | self.task_hor = 64
98 | """
99 | while True:
100 | samples.append(
101 | self.agent.sample(
102 | self.task_hor, self.policy
103 | )
104 | )
105 | traj_obs.append(samples[-1]["obs"])
106 | traj_acs.append(samples[-1]["ac"])
107 | traj_rews.append(samples[-1]["rewards"])
108 | finished_num_steps += len(samples[-1]["ac"])
109 |
110 | if finished_num_steps >= needed_num_steps:
111 | break
112 |
113 | if self.ninit_rollouts > 0:
114 | self.policy.train(
115 | [sample["obs"] for sample in samples],
116 | [sample["ac"] for sample in samples],
117 | [sample["rewards"] for sample in samples]
118 | )
119 |
120 | # Training loop
121 | for i in range(self.ntrain_iters):
122 |
123 | logger.info("####################################################################")
124 | logger.info("Starting training iteration %d." % (i + 1))
125 |
126 | iter_dir = os.path.join(self.logdir, "train_iter%d" % (i + 1))
127 | os.makedirs(iter_dir, exist_ok=True)
128 |
129 | samples = []
130 | assert self.nrecord == 0
131 |
132 | needed_num_steps = self.task_hor * \
133 | (max(self.neval, self.nrollouts_per_iter) - self.nrecord)
134 | finished_num_steps = 0
135 | while True:
136 | samples.append(
137 | self.agent.sample(
138 | self.task_hor, self.policy
139 | )
140 | )
141 | finished_num_steps += len(samples[-1]["ac"])
142 |
143 | if finished_num_steps >= needed_num_steps:
144 | break
145 | logger.info("Rewards obtained: {}".format(
146 | [sample["reward_sum"] for sample in samples[:self.neval]])
147 | )
148 | # test the policy if needed
149 | if self._params.misc.ctrl_cfg.cem_cfg.test_policy > 0:
150 | test_data = []
151 | for _ in range(5):
152 | test_data.append(
153 | self.agent.sample(self.task_hor, self.policy,
154 | test_policy=True, average=False)
155 | )
156 | test_traj_rets.extend([
157 | np.mean([i_test_data["reward_sum"] for i_test_data in test_data])
158 | ])
159 | test_traj_obs.extend(
160 | [i_test_data["obs"] for i_test_data in test_data]
161 | )
162 | test_traj_acs.extend(
163 | [i_test_data["ac"] for i_test_data in test_data]
164 | )
165 |
166 | traj_obs.extend([sample["obs"] for sample in samples])
167 | traj_acs.extend([sample["ac"] for sample in samples])
168 | traj_rets.extend([sample["reward_sum"] for sample in samples])
169 | traj_rews.extend([sample["rewards"] for sample in samples])
170 | episode_iter_id.extend([i] * len(samples))
171 | samples = samples[:self.nrollouts_per_iter]
172 |
173 | self.policy.dump_logs(self.logdir, iter_dir)
174 | savemat(
175 | os.path.join(self.logdir, "logs.mat"),
176 | {
177 | "observations": traj_obs,
178 | "actions": traj_acs,
179 | "returns": traj_rets,
180 | "rewards": traj_rews,
181 | "test_returns": test_traj_rets,
182 | "test_obs": test_traj_obs,
183 | "test_acs": test_traj_acs,
184 | 'episode_iter_id': episode_iter_id
185 | }
186 | )
187 | # Delete iteration directory if not used
188 | if len(os.listdir(iter_dir)) == 0:
189 | os.rmdir(iter_dir)
190 |
191 | if i < self.ntrain_iters - 1:
192 | self.policy.train(
193 | [sample["obs"] for sample in samples],
194 | [sample["ac"] for sample in samples],
195 | [sample["rewards"] for sample in samples]
196 | )
197 |
198 | # TODO: train the policy network
199 |
--------------------------------------------------------------------------------
/dmbrl/misc/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WilsonWangTHU/POPLIN/edd8dba50f9049c6164eda774602bef0c299cb51/dmbrl/misc/__init__.py
--------------------------------------------------------------------------------
/dmbrl/misc/logger.py:
--------------------------------------------------------------------------------
1 | # -----------------------------------------------------------------------------
2 | # @brief:
3 | # The logger here will be called all across the project. It is inspired
4 | # by Yuxin Wu (ppwwyyxx@gmail.com)
5 | #
6 | # @author:
7 | # Tingwu Wang, 2017, Feb, 20th
8 | # -----------------------------------------------------------------------------
9 |
10 | import logging
11 | import sys
12 | import os
13 | import datetime
14 | from termcolor import colored
15 |
16 | __all__ = ['set_file_handler'] # the actual worker is the '_logger'
17 |
18 |
19 | class _MyFormatter(logging.Formatter):
20 | '''
21 | @brief:
22 | a class to make sure the format could be used
23 | '''
24 |
25 | def format(self, record):
26 | date = colored('[%(asctime)s @%(filename)s:%(lineno)d]', 'green')
27 | msg = '%(message)s'
28 |
29 | if record.levelno == logging.WARNING:
30 | fmt = date + ' ' + \
31 | colored('WRN', 'red', attrs=[]) + ' ' + msg
32 | elif record.levelno == logging.ERROR or \
33 | record.levelno == logging.CRITICAL:
34 | fmt = date + ' ' + \
35 | colored('ERR', 'red', attrs=['underline']) + ' ' + msg
36 | else:
37 | fmt = date + ' ' + msg
38 |
39 | if hasattr(self, '_style'):
40 | # Python3 compatibilty
41 | self._style._fmt = fmt
42 | self._fmt = fmt
43 |
44 | return super(self.__class__, self).format(record)
45 |
46 |
47 | _logger = logging.getLogger('joint_embedding')
48 | _logger.propagate = False
49 | _logger.setLevel(logging.INFO)
50 |
51 | # set the console output handler
52 | con_handler = logging.StreamHandler(sys.stdout)
53 | con_handler.setFormatter(_MyFormatter(datefmt='%m%d %H:%M:%S'))
54 | _logger.addHandler(con_handler)
55 |
56 |
57 | class GLOBAL_PATH(object):
58 |
59 | def __init__(self, path=None):
60 | if path is None:
61 | path = os.getcwd()
62 | self.path = path
63 |
64 | def _set_path(self, path):
65 | self.path = path
66 |
67 | def _get_path(self):
68 | return self.path
69 |
70 |
71 | PATH = GLOBAL_PATH()
72 |
73 |
74 | # set the file output handler
75 | def set_file_handler(path=None, prefix='', time_str=''):
76 | if time_str == '':
77 | file_name = prefix + \
78 | datetime.datetime.now().strftime("%A_%d_%B_%Y_%I:%M%p") + '.log'
79 | else:
80 | file_name = prefix + time_str + '.log'
81 |
82 | path = os.path.abspath(path)
83 |
84 | path = os.path.join(path, file_name)
85 | if not os.path.exists(path):
86 | os.makedirs(path)
87 |
88 | PATH._set_path(path)
89 | # from tensorboard_logger import configure
90 | # configure(path)
91 |
92 | file_handler = logging.FileHandler(
93 | filename=os.path.join(path, 'logger.log'), encoding='utf-8', mode='w'
94 | )
95 | file_handler.setFormatter(_MyFormatter(datefmt='%m%d %H:%M:%S'))
96 | _logger.addHandler(file_handler)
97 |
98 | _logger.info('Log file set to {}'.format(path))
99 | return
100 |
101 |
102 | def _get_path():
103 | return PATH._get_path()
104 |
105 |
106 | _LOGGING_METHOD = ['info', 'warning', 'error', 'critical',
107 | 'warn', 'exception', 'debug']
108 |
109 | # export logger functions
110 | for func in _LOGGING_METHOD:
111 | locals()[func] = getattr(_logger, func)
112 |
--------------------------------------------------------------------------------
/dmbrl/misc/optimizers/__init__.py:
--------------------------------------------------------------------------------
1 | from .cem import CEMOptimizer
2 | from .random import RandomOptimizer
3 | from .gbp_rs import GBPRandomOptimizer
4 | from .gbp_cem import GBPCEMOptimizer
5 | from .POPLIN_A import POPLINAOptimizer
6 | from .POPLIN_P import POPLINPOptimizer
7 |
--------------------------------------------------------------------------------
/dmbrl/misc/optimizers/cem.py:
--------------------------------------------------------------------------------
1 | from __future__ import division
2 | from __future__ import print_function
3 | from __future__ import absolute_import
4 |
5 | import tensorflow as tf
6 | import numpy as np
7 | import scipy.stats as stats
8 | from dmbrl.misc import logger
9 |
10 | from .optimizer import Optimizer
11 |
12 |
13 | class CEMOptimizer(Optimizer):
14 | """A Tensorflow-compatible CEM optimizer.
15 | """
16 |
17 | def __init__(self, sol_dim, max_iters, popsize, num_elites, tf_session=None,
18 | upper_bound=None, lower_bound=None, epsilon=0.001, alpha=0.25,
19 | params=None):
20 | """Creates an instance of this class.
21 |
22 | Arguments:
23 | sol_dim (int): The dimensionality of the problem space
24 | max_iters (int): The maximum number of iterations to perform during optimization
25 | popsize (int): The number of candidate solutions to be sampled at every iteration
26 | num_elites (int): The number of top solutions that will be used to obtain the distribution
27 | at the next iteration.
28 | tf_session (tf.Session): (optional) Session to be used for this optimizer. Defaults to None,
29 | in which case any functions passed in cannot be tf.Tensor-valued.
30 | upper_bound (np.array): An array of upper bounds
31 | lower_bound (np.array): An array of lower bounds
32 | epsilon (float): A minimum variance. If the maximum variance drops below epsilon, optimization is
33 | stopped.
34 | alpha (float): Controls how much of the previous mean and variance is used for the next iteration.
35 | next_mean = alpha * old_mean + (1 - alpha) * elite_mean, and similarly for variance.
36 | """
37 | from dmbrl.modeling.models import GT_dynamics
38 | self._gt_compile_cost = GT_dynamics.compile_cost
39 | super().__init__()
40 | self.sol_dim, self.max_iters, self.popsize, self.num_elites = \
41 | sol_dim, max_iters, popsize, num_elites
42 | self.ub, self.lb = upper_bound, lower_bound
43 | self.epsilon, self.alpha = epsilon, alpha
44 | self.tf_sess = tf_session
45 | self.debug = False
46 |
47 | self._params = params
48 |
49 | if num_elites > popsize:
50 | raise ValueError("Number of elites must be at most the population size.")
51 |
52 | if self.tf_sess is not None:
53 | with self.tf_sess.graph.as_default():
54 | with tf.variable_scope("CEMSolver"):
55 | self.init_mean = tf.placeholder(dtype=tf.float32, shape=[sol_dim])
56 | self.init_var = tf.placeholder(dtype=tf.float32, shape=[sol_dim])
57 |
58 | self.num_opt_iters, self.mean, self.var = None, None, None
59 | self.tf_compatible, self.cost_function = None, None
60 |
61 | if self._params.il_cfg.use_gt_dynamics:
62 | self._dynamics = GT_dynamics.GT(self._params)
63 |
64 | def setup(self, cost_function, tf_compatible):
65 | """Sets up this optimizer using a given cost function.
66 |
67 | Arguments:
68 | cost_function (func): A function for computing costs over a batch of candidate solutions.
69 | tf_compatible (bool): True if the cost function provided is tf.Tensor-valued.
70 |
71 | Returns: None
72 | """
73 | if tf_compatible and self.tf_sess is None:
74 | raise RuntimeError("Cannot pass in a tf.Tensor-valued cost function without passing in a TensorFlow "
75 | "session into the constructor")
76 |
77 | self.tf_compatible = tf_compatible
78 |
79 | if not tf_compatible:
80 | self.cost_function = cost_function
81 | else:
82 | def continue_optimization(t, mean, var, best_val, best_sol):
83 | return tf.logical_and(tf.less(t, self.max_iters), tf.reduce_max(var) > self.epsilon)
84 |
85 | def iteration(t, mean, var, best_val, best_sol):
86 | lb_dist, ub_dist = mean - self.lb, self.ub - mean
87 | constrained_var = tf.minimum(tf.minimum(tf.square(lb_dist / 2), tf.square(ub_dist / 2)), var)
88 | samples = tf.truncated_normal([self.popsize, self.sol_dim], mean, tf.sqrt(constrained_var))
89 |
90 | costs = cost_function(samples)
91 | values, indices = tf.nn.top_k(-costs, k=self.num_elites, sorted=True)
92 |
93 | best_val, best_sol = tf.cond(
94 | tf.less(-values[0], best_val),
95 | lambda: (-values[0], samples[indices[0]]),
96 | lambda: (best_val, best_sol)
97 | )
98 |
99 | elites = tf.gather(samples, indices)
100 | new_mean = tf.reduce_mean(elites, axis=0)
101 | new_var = tf.reduce_mean(tf.square(elites - new_mean), axis=0)
102 |
103 | mean = self.alpha * mean + (1 - self.alpha) * new_mean
104 | var = self.alpha * var + (1 - self.alpha) * new_var
105 |
106 | return t + 1, mean, var, best_val, best_sol
107 |
108 | with self.tf_sess.graph.as_default():
109 | self.num_opt_iters, self.mean, self.var, self.best_val, self.best_sol = tf.while_loop(
110 | cond=continue_optimization, body=iteration,
111 | loop_vars=[0, self.init_mean, self.init_var, float("inf"), self.init_mean]
112 | )
113 |
114 | def reset(self):
115 | pass
116 |
117 | def obtain_solution(self, init_mean, init_var, per, dU, obs=None):
118 | """Optimizes the cost function using the provided initial candidate distribution
119 |
120 | Arguments:
121 | init_mean (np.ndarray): The mean of the initial candidate distribution.
122 | init_var (np.ndarray): The variance of the initial candidate distribution.
123 | """
124 | if self.tf_compatible:
125 | sol, solvar = self.tf_sess.run(
126 | [self.mean, self.var],
127 | feed_dict={self.init_mean: init_mean, self.init_var: init_var}
128 | )
129 | else:
130 | assert self._params.il_cfg.use_gt_dynamics
131 | mean, var, t = init_mean, init_var, 0
132 | X = stats.truncnorm(-2, 2, loc=np.zeros_like(mean), scale=np.ones_like(mean))
133 |
134 | cfg = {'plan_hor': self._params.opt_cfg.plan_hor,
135 | 'dU': self._params.env.action_space.shape[0]}
136 | while (t < self.max_iters) and np.max(var) > self.epsilon:
137 | lb_dist, ub_dist = mean - self.lb, self.ub - mean
138 | constrained_var = np.minimum(np.minimum(np.square(lb_dist / 2), np.square(ub_dist / 2)), var)
139 |
140 | samples = X.rvs(size=[self.popsize, self.sol_dim]) * np.sqrt(constrained_var) + mean
141 | costs = self._gt_compile_cost(
142 | obs, samples, cfg, self._dynamics,
143 | self._dynamics._numpy_reward_function
144 | )
145 | costs = np.reshape(costs, [-1])
146 | elites = samples[np.argsort(costs)][:self.num_elites]
147 |
148 | new_mean = np.mean(elites, axis=0)
149 | new_var = np.var(elites, axis=0)
150 |
151 | mean = self.alpha * mean + (1 - self.alpha) * new_mean
152 | var = self.alpha * var + (1 - self.alpha) * new_var
153 | logger.info('variance of elite: {}'.format(np.var(elites)))
154 | logger.info('Mean perforamnce: {}'.format(
155 | np.mean(costs[np.argsort(costs)][:self.num_elites]))
156 | )
157 |
158 | t += 1
159 | sol, solvar = mean, var
160 | sol = np.reshape(sol, [-1])
161 |
162 | # prev_sol is going to be used next timestep
163 | prev_sol = self.update_prev_sol(per, dU, sol)
164 | return sol, prev_sol
165 |
--------------------------------------------------------------------------------
/dmbrl/misc/optimizers/gbp_rs.py:
--------------------------------------------------------------------------------
1 | from __future__ import division
2 | from __future__ import absolute_import
3 | from __future__ import print_function
4 |
5 | import numpy as np
6 | import tensorflow as tf
7 |
8 | from .optimizer import Optimizer
9 | from dmbrl.misc import logger
10 |
11 |
12 | class GBPRandomOptimizer(Optimizer):
13 | """ @brief: use gradient based planning to update the policy network
14 | """
15 |
16 | def __init__(self, sol_dim, popsize, tf_session,
17 | upper_bound=None, lower_bound=None, params=None):
18 | """Creates an instance of this class.
19 |
20 | Arguments:
21 | sol_dim (int): The dimensionality of the problem space
22 | popsize (int): The number of candidate solutions to be sampled at every iteration
23 | num_elites (int): The number of top solutions that will be used to obtain the distribution
24 | at the next iteration.
25 | tf_session (tf.Session): (optional) Session to be used for this optimizer. Defaults to None,
26 | in which case any functions passed in cannot be tf.Tensor-valued.
27 | upper_bound (np.array): An array of upper bounds
28 | lower_bound (np.array): An array of lower bounds
29 | """
30 | super().__init__()
31 | self._params = params
32 | self._print_count = 0
33 |
34 | self.sol_dim = sol_dim
35 | self.popsize = popsize
36 | self.ub, self.lb = upper_bound, lower_bound
37 | self.tf_sess = tf_session
38 | self.solution = None
39 | self.tf_compatible, self.cost_function = None, None
40 |
41 | self._debug = {}
42 | self._debug['old_sol'] = 0.0
43 | self._debug_start = False
44 |
45 | def setup(self, cost_function, tf_compatible):
46 | """Sets up this optimizer using a given cost function.
47 |
48 | Arguments:
49 | cost_function (func): A function for computing costs over a batch of candidate solutions.
50 | tf_compatible (bool): True if the cost function provided is tf.Tensor-valued.
51 |
52 | Returns: None
53 | """
54 | if tf_compatible and self.tf_sess is None:
55 | raise RuntimeError("Cannot pass in a tf.Tensor-valued cost function without passing in a TensorFlow "
56 | "session into the constructor")
57 |
58 | if not tf_compatible:
59 | self.tf_compatible = False
60 | self.cost_function = cost_function
61 | else:
62 | with self.tf_sess.graph.as_default():
63 | self.tf_compatible = True
64 | self._candidate_solutions = tf.Variable(
65 | np.random.uniform(self.lb, self.ub, [self.popsize, self.sol_dim]),
66 | dtype=tf.float32
67 | )
68 | self.tf_sess.run(
69 | tf.variables_initializer([self._candidate_solutions])
70 | )
71 |
72 | self._costs = costs = cost_function(self._candidate_solutions)
73 | self._choice = tf.argmin(costs)
74 | self.solution = \
75 | self._candidate_solutions[tf.cast(self._choice, tf.int32)]
76 |
77 | # the update loss
78 | self._adam_optimizer = \
79 | tf.train.AdamOptimizer(learning_rate=self._params.gbp_cfg.lr)
80 | self._planning_optimizer = self._adam_optimizer.minimize(
81 | costs, var_list=[self._candidate_solutions]
82 | )
83 | self.tf_sess.run(
84 | tf.variables_initializer(self._adam_optimizer.variables())
85 | )
86 | self._average_cost = tf.reduce_mean(costs)
87 | self._min_cost = tf.reduce_min(costs)
88 | self._values, self._indices = tf.nn.top_k(-costs, k=10, sorted=True)
89 |
90 | # debug information
91 | self._debug_actions = self.solution
92 |
93 | def reset(self):
94 | pass
95 |
96 | def obtain_solution(self, init_mean, init_var, per, dU, obs=None):
97 | """Optimizes the cost function provided in setup().
98 | do gradient based planning
99 |
100 | Arguments:
101 | init_mean (np.ndarray): The mean of the initial candidate distribution.
102 | init_var (np.ndarray): The variance of the initial candidate distribution.
103 | """
104 | assert self.tf_compatible
105 | self._print_count = (self._print_count + 1) % 20
106 | self._print = self._print_count == 0
107 |
108 | # step 1: initialize the action candidates TODO: use init_mean
109 | self._old_solutions = np.concatenate(
110 | [self.tf_sess.run(self._candidate_solutions)[:, 6:],
111 | np.random.uniform(self.lb[0], self.ub[0], [self.popsize, 6])],
112 | axis=1
113 | )
114 | self._candidate_solutions.load(self._old_solutions, self.tf_sess)
115 |
116 | avg_cost, min_cost = self.tf_sess.run(
117 | [self._average_cost, self._min_cost]
118 | )
119 | if self._print:
120 | logger.info('Init -> Avg_cost: %.3f, Min_cost: %.3f' %
121 | (avg_cost, min_cost))
122 |
123 | # step 2: do gradient based planning
124 | for gbp_iteration in range(self._params.gbp_cfg.plan_iter):
125 | _, avg_cost, min_cost = self.tf_sess.run(
126 | [self._planning_optimizer, self._average_cost, self._min_cost]
127 | )
128 | avg_cost, min_cost = self.tf_sess.run(
129 | [self._average_cost, self._min_cost]
130 | )
131 | if self._print:
132 | logger.info('Iter %d > Avg_cost: %.3f, Min_cost: %.3f' %
133 | (self._params.gbp_cfg.plan_iter, avg_cost, min_cost))
134 |
135 | sol = self.tf_sess.run(self.solution)
136 | prev_sol = self.update_prev_sol(per, dU, sol)
137 |
138 | return sol, prev_sol
139 |
--------------------------------------------------------------------------------
/dmbrl/misc/optimizers/optimizer.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 | from __future__ import print_function
3 | from __future__ import division
4 | import numpy as np
5 |
6 |
7 | class Optimizer:
8 |
9 | def __init__(self, *args, **kwargs):
10 | self.sy_cur_obs = None
11 | self._proposed_act_seqs_ph = None
12 | pass
13 |
14 | def setup(self, cost_function, tf_compatible):
15 | raise NotImplementedError("Must be implemented in subclass.")
16 |
17 | def reset(self):
18 | raise NotImplementedError("Must be implemented in subclass.")
19 |
20 | def obtain_solution(self, *args, **kwargs):
21 | raise NotImplementedError("Must be implemented in subclass.")
22 |
23 | def get_policy_network(self):
24 | return None
25 |
26 | def train_policy_network(self):
27 | return False
28 |
29 | def set_sy_cur_obs(self, sy_cur_obs):
30 | # NOTE: it is a hack! be careful
31 | self.sy_cur_obs = sy_cur_obs
32 |
33 | def forward_policy_propose(self, predict_next_obs, sy_cur_obs):
34 | pass
35 |
36 | def reset_prev_sol(self, prev_sol):
37 | return prev_sol
38 |
39 | def update_prev_sol(self, per, dU, soln):
40 | prev_sol = np.concatenate([np.copy(soln)[per * dU:],
41 | np.zeros(per * dU)])
42 | return prev_sol
43 |
--------------------------------------------------------------------------------
/dmbrl/misc/optimizers/policy_network/BC_A_policy.py:
--------------------------------------------------------------------------------
1 | # -----------------------------------------------------------------------------
2 | # @author:
3 | # Tingwu Wang
4 | # -----------------------------------------------------------------------------
5 | import tensorflow as tf
6 | import numpy as np
7 |
8 | from . import base_policy
9 | from . import tf_networks
10 | from dmbrl.misc import logger
11 |
12 |
13 | class policy_network(base_policy.base_policy_network):
14 | ''' @brief:
15 | In this object class, we define the network structure, the restore
16 | function and save function.
17 |
18 | @self.args.training_scheme
19 | @BC-AR: (action space) behavior cloning with the real data
20 | @BC-AI: (action space) behavior cloning using imaginary dataset.
21 |
22 | @AVG-R: (weight space) behavior cloning by setting the weight to
23 | the average of the weights selected during sampling
24 | @BC-PR: (weight space) behavior cloning by distilling the policy
25 | produced by the weights during sampling
26 | @AVG-I: (weight space) AVG-R but with imaginary dataset
27 | @BC-PI: (weight space) BC-PR but with imaginary dataset
28 | '''
29 |
30 | def __init__(self, args, session, name_scope,
31 | observation_size, action_size):
32 |
33 | super(policy_network, self).__init__(
34 | args, session, name_scope, observation_size, action_size
35 | )
36 | assert self.args.training_scheme in ['BC-AR', 'BC-AI']
37 | assert self.args.cem_type in ['POPLINA-INIT', 'POPLINA-REPLAN']
38 |
39 | def build_network(self):
40 | """ @brief: Note that build_network is only needed for the training
41 | """
42 | network_shape = [self._observation_size] + \
43 | self.args.policy_network_shape + [self._action_size]
44 | num_layer = len(network_shape) - 1
45 | act_type = ['tanh'] * (num_layer - 1) + [None]
46 | norm_type = [None] * (num_layer - 1) + [None]
47 | init_data = []
48 | for _ in range(num_layer):
49 | init_data.append(
50 | {'w_init_method': 'normc', 'w_init_para': {'stddev': 1.0},
51 | 'b_init_method': 'constant', 'b_init_para': {'val': 0.0}}
52 | )
53 | init_data[-1]['w_init_para']['stddev'] = 0.01 # the output layer std
54 |
55 | self._MLP = tf_networks.MLP(
56 | dims=network_shape, scope='policy_mlp', train=True,
57 | activation_type=act_type, normalizer_type=norm_type,
58 | init_data=init_data
59 | )
60 |
61 | # fetch all the trainable variables
62 | self._set_var_list()
63 |
64 | def build_loss(self):
65 |
66 | self._build_ph()
67 | self._tensor, self._update_operator = {}, {}
68 |
69 | # construct the input to the forward network, we normalize the state
70 | # input, and concatenate with the action
71 | self._tensor['normalized_start_state'] = (
72 | self._input_ph['start_state'] -
73 | self._whitening_operator['state_mean']
74 | ) / self._whitening_operator['state_std']
75 | self._tensor['net_input'] = self._tensor['normalized_start_state']
76 |
77 | # the output policy of the network
78 | self._tensor['action'] = self._MLP(self._tensor['net_input'])
79 |
80 | self._input_ph['target_action'] = tf.placeholder(
81 | tf.float32, [None, self._action_size], name='target_action'
82 | )
83 |
84 | self._update_operator['loss'] = tf.reduce_mean(
85 | tf.square(self._input_ph['target_action'] -
86 | self._tensor['action'])
87 | )
88 |
89 | self._update_operator['update_op'] = tf.train.AdamOptimizer(
90 | learning_rate=self.args.policy_lr,
91 | ).minimize(self._update_operator['loss'])
92 | logger.info("policy training learning rate: {}".format(
93 | self.args.policy_lr)
94 | )
95 |
96 | def train(self, data_dict, training_info={}):
97 |
98 | # Step 1: update the running mean
99 | imaginary_dataset = training_info['imaginary_dataset']
100 |
101 | # Step 2: data processing
102 | if self.args.training_scheme == 'BC-AR':
103 | data_dict['target_action'] = data_dict['action'] # for training
104 | elif self.args.training_scheme == 'BC-AI':
105 | # add imaginary data to the dataset
106 | for key in ['start_state', 'action']:
107 | data_dict[key] = \
108 | np.concatenate([data_dict[key], imaginary_dataset[key]])
109 | data_dict['target_action'] = data_dict['action'] # for training
110 |
111 | else:
112 | raise NotImplementedError
113 |
114 | self._set_whitening_var(data_dict['whitening_stats'])
115 | self.optimize_weights(data_dict, ['start_state', 'target_action'])
116 |
--------------------------------------------------------------------------------
/dmbrl/misc/optimizers/policy_network/BC_WA_policy.py:
--------------------------------------------------------------------------------
1 | # -----------------------------------------------------------------------------
2 | # @author:
3 | # Tingwu Wang
4 | # -----------------------------------------------------------------------------
5 | import tensorflow as tf
6 | import numpy as np
7 |
8 | from . import base_policy
9 | from . import tf_networks
10 | from . import tf_utils
11 | from dmbrl.misc import logger
12 |
13 |
14 | class policy_network(base_policy.base_policy_network):
15 | ''' @brief:
16 | In this object class, we define the network structure, the restore
17 | function and save function.
18 |
19 | @self.args.training_scheme
20 | @BC-AR: (action space) behavior cloning with the real data
21 | @BC-AI: (action space) behavior cloning using imaginary dataset.
22 |
23 | @AVG-R: (weight space) behavior cloning by setting the weight to
24 | the average of the weights selected during sampling
25 | @BC-PR: (weight space) behavior cloning by distilling the policy
26 | produced by the weights during sampling
27 | @AVG-I: (weight space) AVG-R but with imaginary dataset
28 | @BC-PI: (weight space) BC-PR but with imaginary dataset
29 | '''
30 |
31 | def __init__(self, args, session, name_scope,
32 | observation_size, action_size):
33 |
34 | super(policy_network, self).__init__(
35 | args, session, name_scope, observation_size, action_size
36 | )
37 | assert self.args.training_scheme in ['AVG-R', 'AVG-I']
38 | assert self.args.cem_type in ['POPLINP-SEP', 'POPLINP-UNI']
39 |
40 | def build_network(self):
41 | """ @brief: Note that build_network is only needed for the training
42 | """
43 | network_shape = [self._observation_size] + \
44 | self.args.policy_network_shape + [self._action_size]
45 | num_layer = len(network_shape) - 1
46 | act_type = ['tanh'] * (num_layer - 1) + [None]
47 | norm_type = [None] * (num_layer - 1) + [None]
48 | init_data = []
49 | for _ in range(num_layer):
50 | init_data.append(
51 | {'w_init_method': 'normc', 'w_init_para': {'stddev': 1.0},
52 | 'b_init_method': 'constant', 'b_init_para': {'val': 0.0}}
53 | )
54 | init_data[-1]['w_init_para']['stddev'] = 0.01 # the output layer std
55 |
56 | self._MLP = tf_networks.W_MLP(
57 | dims=network_shape, scope='policy_mlp', train=True,
58 | activation_type=act_type, normalizer_type=norm_type,
59 | init_data=init_data
60 | )
61 |
62 | # fetch all the trainable variables
63 | self._set_var_list()
64 |
65 | def build_loss(self):
66 |
67 | self._build_ph()
68 | self._tensor, self._update_operator = {}, {}
69 |
70 | self._MLP_var_list = self._MLP.get_variable_list()
71 | self._set_weight = tf_utils.set_network_weights(
72 | self._session, self._MLP_var_list, ''
73 | )
74 | logger.info("policy training learning rate: {}".format(
75 | self.args.policy_lr)
76 | )
77 |
78 | self._session.run(tf.variables_initializer(tf.global_variables()))
79 |
80 | # synchronize the two networks if needed
81 | if self.args.cem_type in ['POPLINP-SEP', 'POPLINP-UNI'] and \
82 | self.args.training_scheme in ['BC-PR', 'BC-PI']:
83 | weight_dict = self._get_weight() # get from MLP
84 | self._set_weight(weight_dict) # set the target MLP
85 |
86 | def train(self, data_dict, training_info={}):
87 |
88 | # Step 1: update the running mean
89 | imaginary_dataset = training_info['imaginary_dataset']
90 |
91 | # Step 2: data processing
92 | if self.args.training_scheme in ['AVG-R']:
93 | data_dict['target_weight'] = data_dict['weight'] # for training
94 | data_dict['weight'] = data_dict['target_weight'] # for training
95 |
96 | elif self.args.training_scheme in ['AVG-I']:
97 | for key in ['start_state', 'weight']:
98 | data_dict[key] = \
99 | np.concatenate([data_dict[key], imaginary_dataset[key]])
100 | data_dict['target_weight'] = data_dict['weight'] # for training
101 | data_dict['weight'] = data_dict['target_weight'] # for training
102 |
103 | else:
104 | raise NotImplementedError
105 |
106 | # Step 3: parse the test set and train the network
107 | # get the average of the weights
108 | self._set_whitening_var(data_dict['whitening_stats'])
109 | average_weights = \
110 | np.reshape(np.mean(data_dict['target_weight'], axis=0), [1, -1])
111 |
112 | if self.args.zero_weight == 'yes':
113 | average_weights *= 0.0
114 | logger.warning('Using Zero Weights')
115 | weight_dict = \
116 | self._MLP.parse_np_weight_vec_into_dict(average_weights)
117 |
118 | # set the weights
119 | self._set_weight(weight_dict)
120 |
--------------------------------------------------------------------------------
/dmbrl/misc/optimizers/policy_network/BC_WD_policy.py:
--------------------------------------------------------------------------------
1 | # -----------------------------------------------------------------------------
2 | # @author:
3 | # Tingwu Wang
4 | # -----------------------------------------------------------------------------
5 | import tensorflow as tf
6 | import numpy as np
7 |
8 | from . import base_policy
9 | from . import tf_networks
10 | from . import tf_utils
11 | from . import whitening_util
12 | from dmbrl.misc import logger
13 |
14 |
15 | class policy_network(base_policy.base_policy_network):
16 | ''' @brief:
17 | In this object class, we define the network structure, the restore
18 | function and save function.
19 |
20 | @self.args.training_scheme
21 | @BC-AR: (action space) behavior cloning with the real data
22 | @BC-AI: (action space) behavior cloning using imaginary dataset.
23 |
24 | @AVG-R: (weight space) behavior cloning by setting the weight to
25 | the average of the weights selected during sampling
26 | @BC-PR: (weight space) behavior cloning by distilling the policy
27 | produced by the weights during sampling
28 | @AVG-I: (weight space) AVG-R but with imaginary dataset
29 | @BC-PI: (weight space) BC-PR but with imaginary dataset
30 | '''
31 |
32 | def __init__(self, args, session, name_scope,
33 | observation_size, action_size):
34 |
35 | super(policy_network, self).__init__(
36 | args, session, name_scope, observation_size, action_size
37 | )
38 | assert self.args.cem_type in ['POPLINP-SEP', 'POPLINP-UNI']
39 | assert self.args.training_scheme in ['BC-PR', 'BC-PI']
40 |
41 | def build_network(self):
42 | """ @brief: Note that build_network is only needed for the training
43 | """
44 | network_shape = [self._observation_size] + \
45 | self.args.policy_network_shape + [self._action_size]
46 | num_layer = len(network_shape) - 1
47 | act_type = ['tanh'] * (num_layer - 1) + [None]
48 | norm_type = [None] * (num_layer - 1) + [None]
49 | init_data = []
50 | # TODO: be careful when it comes to batchnorm
51 | assert norm_type[0] is not 'batchnorm' and \
52 | norm_type[0] is not 'batch_norm'
53 |
54 | for _ in range(num_layer):
55 | init_data.append(
56 | {'w_init_method': 'normc', 'w_init_para': {'stddev': 1.0},
57 | 'b_init_method': 'constant', 'b_init_para': {'val': 0.0}}
58 | )
59 | init_data[-1]['w_init_para']['stddev'] = 0.01 # the output layer std
60 |
61 | self._MLP = tf_networks.W_MLP(
62 | dims=network_shape, scope='policy_mlp', train=True,
63 | activation_type=act_type, normalizer_type=norm_type,
64 | init_data=init_data
65 | )
66 | self._target_MLP = tf_networks.W_MLP(
67 | dims=network_shape, scope='target_policy_mlp', train=True,
68 | activation_type=act_type, normalizer_type=norm_type,
69 | init_data=init_data
70 | )
71 |
72 | # fetch all the trainable variables
73 | self._set_var_list()
74 |
75 | def build_loss(self):
76 | """ @brief: the MLP is used to generate samples,
77 | while the target_MLP is used during the training. target_MLP is
78 | always older than the MLP, and we feed the dataset into target_MLP
79 | to train MLP.
80 |
81 | After each update, we synchronize target_MLP by copying weights from
82 | MLP.
83 | """
84 |
85 | self._build_ph()
86 | self._tensor, self._update_operator = {}, {}
87 | whitening_util.add_whitening_operator(
88 | self._whitening_operator, self._whitening_variable,
89 | 'target_state', self._observation_size
90 | )
91 |
92 | # the weight input_ph is always set to 0.0
93 | self._input_ph['weight'] = tf.placeholder(
94 | shape=[None, self._MLP.get_weight_size()],
95 | dtype=tf.float32, name='weight_noise'
96 | )
97 | # the actual weight generated from the planning
98 | self._input_ph['target_weight'] = tf.placeholder(
99 | shape=[None, self._MLP.get_weight_size()], dtype=tf.float32,
100 | name='target_weight_noise'
101 | )
102 | self._tensor['net_input'] = (
103 | self._input_ph['start_state'] -
104 | self._whitening_operator['state_mean']
105 | ) / self._whitening_operator['state_std']
106 | self._tensor['target_net_input'] = (
107 | self._input_ph['start_state'] -
108 | self._whitening_operator['target_state_mean']
109 | ) / self._whitening_operator['target_state_std']
110 |
111 | # the output policy of the network
112 | self._tensor['action'] = self._MLP(
113 | self._tensor['net_input'], self._input_ph['weight']
114 | )
115 | self._tensor['target_action'] = self._target_MLP(
116 | self._tensor['target_net_input'],
117 | self._input_ph['target_weight']
118 | )
119 |
120 | # the distillation loss
121 | self._update_operator['loss'] = tf.reduce_mean(
122 | tf.square(self._tensor['target_action'] -
123 | self._tensor['action'])
124 | )
125 | self._target_MLP_var_list = self._target_MLP.get_variable_list()
126 | self._MLP_var_list = self._MLP.get_variable_list()
127 |
128 | self._update_operator['update_op'] = tf.train.AdamOptimizer(
129 | learning_rate=self.args.policy_lr,
130 | ).minimize(self._update_operator['loss'],
131 | var_list=self._MLP_var_list)
132 | logger.info("policy training learning rate: {}".format(
133 | self.args.policy_lr)
134 | )
135 |
136 | # synchronize the weights
137 | self._get_weight = tf_utils.get_network_weights(
138 | self._session, self._MLP_var_list, 'policy_mlp'
139 | )
140 | self._set_weight = tf_utils.set_network_weights(
141 | self._session, self._target_MLP_var_list, 'target_policy_mlp'
142 | )
143 |
144 | self._session.run(tf.variables_initializer(tf.global_variables()))
145 |
146 | # synchronize the two networks if needed
147 | self._set_weight(self._get_weight()) # set the target MLP
148 |
149 | def train(self, data_dict, training_info={}):
150 |
151 | # Step 1: update the running mean
152 | imaginary_dataset = training_info['imaginary_dataset']
153 |
154 | # Step 2: data processing
155 | if self.args.training_scheme in ['BC-PR']:
156 | data_dict['target_weight'] = data_dict['weight'] # for training
157 | data_dict['weight'] = 0.0 * data_dict['weight'] # for training
158 |
159 | elif self.args.training_scheme in ['BC-PI']:
160 | for key in ['start_state', 'weight']:
161 | data_dict[key] = \
162 | np.concatenate([data_dict[key], imaginary_dataset[key]])
163 | data_dict['target_weight'] = data_dict['weight'] # for training
164 | data_dict['weight'] = 0.0 * data_dict['weight'] # for training
165 |
166 | else:
167 | raise NotImplementedError
168 |
169 | self._set_whitening_var(data_dict['whitening_stats'])
170 | self.optimize_weights(data_dict,
171 | ['start_state', 'target_weight', 'weight'])
172 |
173 | # synchronize the networks
174 | whitening_util.copy_whitening_var(data_dict['whitening_stats'],
175 | 'state', 'target_state')
176 | whitening_util.set_whitening_var(
177 | self._session, self._whitening_operator,
178 | data_dict['whitening_stats'], ['target_state']
179 | )
180 | if self.args.zero_weight == 'yes':
181 | logger.warning('Using Random Weights')
182 | else:
183 | self._set_weight(self._get_weight())
184 |
--------------------------------------------------------------------------------
/dmbrl/misc/optimizers/policy_network/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WilsonWangTHU/POPLIN/edd8dba50f9049c6164eda774602bef0c299cb51/dmbrl/misc/optimizers/policy_network/__init__.py
--------------------------------------------------------------------------------
/dmbrl/misc/optimizers/policy_network/base_policy.py:
--------------------------------------------------------------------------------
1 | # -----------------------------------------------------------------------------
2 | # @author:
3 | # Tingwu Wang
4 | # -----------------------------------------------------------------------------
5 | import numpy as np
6 | import tensorflow as tf
7 |
8 | from . import whitening_util
9 | from . import tf_utils
10 | from dmbrl.misc import logger
11 |
12 |
13 | def limit_action(action, lb=-1, ub=1):
14 |
15 | return tf.minimum(tf.maximum(action, lb), ub)
16 |
17 |
18 | class base_policy_network(object):
19 | '''
20 | @brief:
21 | In this object class, we define the network structure, the restore
22 | function and save function.
23 | It will only be called in the agent/agent.py
24 | '''
25 |
26 | def __init__(self, args, session, name_scope,
27 | observation_size, action_size):
28 | self.args = args
29 |
30 | self._session = session
31 | self._name_scope = name_scope
32 |
33 | self._observation_size = observation_size
34 | self._action_size = action_size
35 |
36 | # self._task_name = args.task_name
37 | self._network_shape = args.policy_network_shape
38 |
39 | self._npr = np.random.RandomState(args.seed)
40 |
41 | self._whitening_operator = {}
42 | self._whitening_variable = []
43 |
44 | def build_network(self):
45 | raise NotImplementedError
46 |
47 | def build_loss(self):
48 | raise NotImplementedError
49 |
50 | def _build_ph(self):
51 |
52 | # initialize the running mean and std (whitening)
53 | whitening_util.add_whitening_operator(
54 | self._whitening_operator, self._whitening_variable,
55 | 'state', self._observation_size
56 | )
57 |
58 | # initialize the input placeholder
59 | self._input_ph = {
60 | 'start_state': tf.placeholder(
61 | tf.float32, [None, self._observation_size], name='start_state'
62 | )
63 | }
64 |
65 | def get_input_placeholder(self):
66 | return self._input_ph
67 |
68 | def get_weights(self):
69 | return None
70 |
71 | def set_weights(self, weights_dict):
72 | pass
73 |
74 | def forward_network(self, observation, weight_vec=None):
75 | normalized_start_state = (
76 | observation - self._whitening_operator['state_mean']
77 | ) / self._whitening_operator['state_std']
78 |
79 | # the output policy of the network
80 | if weight_vec is None:
81 | action = self._MLP(normalized_start_state)
82 | else:
83 | action = self._MLP(normalized_start_state, weight_vec)
84 |
85 | action = limit_action(action)
86 |
87 | return action
88 |
89 | def _set_var_list(self):
90 | # collect the tf variable and the trainable tf variable
91 | self._trainable_var_list = [var for var in tf.trainable_variables()
92 | if self._name_scope in var.name]
93 |
94 | self._all_var_list = [var for var in tf.global_variables()
95 | if self._name_scope in var.name]
96 |
97 | # the weights that actually matter
98 | self._network_var_list = \
99 | self._trainable_var_list + self._whitening_variable
100 |
101 | self._set_network_weights = tf_utils.set_network_weights(
102 | self._session, self._network_var_list, self._name_scope
103 | )
104 |
105 | self._get_network_weights = tf_utils.get_network_weights(
106 | self._session, self._network_var_list, self._name_scope
107 | )
108 |
109 | def load_checkpoint(self, ckpt_path):
110 | pass
111 |
112 | def save_checkpoint(self, ckpt_path):
113 | pass
114 |
115 | def get_whitening_operator(self):
116 | return self._whitening_operator
117 |
118 | def _set_whitening_var(self, whitening_stats):
119 | whitening_util.set_whitening_var(
120 | self._session, self._whitening_operator, whitening_stats, ['state']
121 | )
122 |
123 | def train(self, data_dict, replay_buffer, training_info={}):
124 | raise NotImplementedError
125 |
126 | def eval(self, data_dict):
127 | raise NotImplementedError
128 |
129 | def act(self, data_dict):
130 | raise NotImplementedError
131 |
132 | def optimize_weights(self, data_dict, training_keys):
133 |
134 | test_set_id = np.arange(len(data_dict['start_state']))
135 | num_test_data = int(len(test_set_id) * self.args.pct_testset)
136 | self._npr.shuffle(test_set_id)
137 | test_set = {key: data_dict[key][test_set_id][:num_test_data]
138 | for key in training_keys}
139 | train_set = {key: data_dict[key][test_set_id][num_test_data:]
140 | for key in training_keys}
141 | test_error = old_test_error = np.inf
142 |
143 | # supervised training the behavior (behavior cloning)
144 | for epoch in range(self.args.policy_epochs):
145 | total_batch_len = len(train_set['start_state'])
146 | total_batch_inds = np.arange(total_batch_len)
147 | self._npr.shuffle(total_batch_inds)
148 | num_minibatch = \
149 | max(total_batch_len // self.args.minibatch_size, 1)
150 | train_error = []
151 |
152 | for start in range(num_minibatch):
153 | start = start * self.args.minibatch_size
154 | end = min(start + self.args.minibatch_size, total_batch_len)
155 | batch_inds = total_batch_inds[start: end]
156 | feed_dict = {self._input_ph[key]: data_dict[key][batch_inds]
157 | for key in training_keys}
158 |
159 | error, _ = self._session.run(
160 | [self._update_operator['loss'],
161 | self._update_operator['update_op']], feed_dict=feed_dict
162 | )
163 | train_error.append(error)
164 |
165 | # see the test error
166 | feed_dict = {self._input_ph[key]: test_set[key]
167 | for key in training_keys}
168 |
169 | test_error = self._session.run(
170 | self._update_operator['loss'], feed_dict=feed_dict
171 | )
172 | logger.info('Epoch %d; Train Error: %.6f; Test Error: %.6f' %
173 | (epoch, np.mean(train_error), test_error))
174 |
175 | if test_error > old_test_error and epoch % 5 == 0:
176 | # TODO: MAKE A COUNTER HERE
177 | logger.info('Early stoping')
178 | break
179 | else:
180 | old_test_error = test_error
181 |
--------------------------------------------------------------------------------
/dmbrl/misc/optimizers/policy_network/gmm_util.py:
--------------------------------------------------------------------------------
1 | # -----------------------------------------------------------------------------
2 | # @brief:
3 | # -----------------------------------------------------------------------------
4 |
5 | import numpy as np
6 |
7 |
8 | def get_conditional_gaussian(mean, cov, observation_size):
9 | """ @brief: see the function with the same name in mbbl
10 |
11 | y = f_c + f_d.dot(x)
12 | cov(y) = pi_cov
13 | """
14 |
15 | condition_size = observation_size
16 | pi_x = np.linalg.solve(cov[:condition_size, :condition_size],
17 | cov[:condition_size, condition_size:]).T
18 | pi_c = mean[condition_size:] - pi_x.dot(mean[:condition_size])
19 | pi_cov = cov[condition_size:, condition_size:] - \
20 | pi_x.dot(cov[:condition_size, :condition_size]).dot(pi_x.T)
21 | pi_cov = 0.5 * (pi_cov + pi_cov.T)
22 |
23 | # return {'pol_k': pi_c, 'pol_K': pi_x, 'pol_S': pi_cov}
24 | return {'f_c': pi_c, 'f_d': pi_x, 'cov': pi_cov}
25 |
26 |
27 | def get_gmm_posterior(gmm, gmm_weights, data):
28 | """ @brief: see the function with the same name in mbbl
29 | """
30 |
31 | # posterior mean of gmm (C --> num_cluster, N --> num_data)
32 | response = gmm.predict_proba(np.reshape(data, [1, -1])) # (N, C)
33 | # (C, 1)
34 | avg_response = np.reshape(np.mean(np.array(response), axis=0), [-1, 1])
35 | pos_mean = np.mean(avg_response * gmm_weights['mean'], axis=0) # (Vec)
36 |
37 | # posterior cov = (sum_i) res_i * (cov_i + \mu_i(\mu_i - \mu)^T)
38 | diff_mu = gmm_weights['mean'] - np.expand_dims(pos_mean, axis=0) # (C, Vec)
39 | mui_mui_muT = np.expand_dims(gmm_weights['mean'], axis=1) * \
40 | np.expand_dims(diff_mu, axis=2) # (C, Vec, Vec), the outer product
41 | response_expand = np.expand_dims(avg_response, axis=2)
42 | pos_cov = np.sum((gmm_weights['cov'] + mui_mui_muT) *
43 | response_expand, axis=0)
44 |
45 | return pos_mean, pos_cov
46 |
--------------------------------------------------------------------------------
/dmbrl/misc/optimizers/policy_network/tf_norm.py:
--------------------------------------------------------------------------------
1 | # ------------------------------------------------------------------------------
2 | # @brief: define the batchnorm and layernorm in this function
3 | # ------------------------------------------------------------------------------
4 |
5 | import tensorflow as tf
6 |
7 |
8 | def layer_norm(x, name_scope, epsilon=1e-5, use_bias=True,
9 | use_scale=True, gamma_init=None, data_format='NHWC'):
10 | """
11 | @Brief: code modified from ppwwyyxx github.com/ppwwyyxx/tensorpack/,
12 | under layer_norm.py.
13 | Layer Normalization layer, as described in the paper:
14 | https://arxiv.org/abs/1607.06450.
15 | @input:
16 | x (tf.Tensor): a 4D or 2D tensor. When 4D, the layout should
17 | match data_format.
18 | """
19 | with tf.variable_scope(name_scope):
20 | shape = x.get_shape().as_list()
21 | ndims = len(shape)
22 | assert ndims in [2, 4]
23 |
24 | mean, var = tf.nn.moments(x, list(range(1, len(shape))), keep_dims=True)
25 |
26 | if data_format == 'NCHW':
27 | chan = shape[1]
28 | new_shape = [1, chan, 1, 1]
29 | else:
30 | chan = shape[-1]
31 | new_shape = [1, 1, 1, chan]
32 | if ndims == 2:
33 | new_shape = [1, chan]
34 |
35 | if use_bias:
36 | beta = tf.get_variable(
37 | 'beta', [chan], initializer=tf.constant_initializer()
38 | )
39 | beta = tf.reshape(beta, new_shape)
40 | else:
41 | beta = tf.zeros([1] * ndims, name='beta')
42 | if use_scale:
43 | if gamma_init is None:
44 | gamma_init = tf.constant_initializer(1.0)
45 | gamma = tf.get_variable('gamma', [chan], initializer=gamma_init)
46 | gamma = tf.reshape(gamma, new_shape)
47 | else:
48 | gamma = tf.ones([1] * ndims, name='gamma')
49 |
50 | ret = tf.nn.batch_normalization(
51 | x, mean, var, beta, gamma, epsilon, name='output'
52 | )
53 | return ret
54 |
55 |
56 | def batch_norm_with_train(x, name_scope, epsilon=1e-5, momentum=0.9):
57 | ret = tf.contrib.layers.batch_norm(
58 | x, decay=momentum, updates_collections=None, epsilon=epsilon,
59 | scale=True, is_training=True, scope=name_scope
60 | )
61 | return ret
62 |
63 |
64 | def batch_norm_without_train(x, name_scope, epsilon=1e-5, momentum=0.9):
65 | ret = tf.contrib.layers.batch_norm(
66 | x, decay=momentum, updates_collections=None, epsilon=epsilon,
67 | scale=True, is_training=False, scope=name_scope
68 | )
69 | return ret
70 |
--------------------------------------------------------------------------------
/dmbrl/misc/optimizers/policy_network/tf_utils.py:
--------------------------------------------------------------------------------
1 | # -----------------------------------------------------------------------------
2 | # @brief:
3 | # -----------------------------------------------------------------------------
4 |
5 | import tensorflow as tf
6 | import numpy as np
7 |
8 |
9 | def get_weight_decay_loss(var_list):
10 | weight_decay_dict = {}
11 | weight_decay_sum = 0.0
12 | for var in var_list:
13 | i_weight_decay = tf.nn.l2_loss(var)
14 | weight_decay_dict[var.name] = i_weight_decay
15 | weight_decay_sum += tf.reduce_mean(weight_decay_sum)
16 | return weight_decay_sum, weight_decay_dict
17 |
18 |
19 | def logsigmoid(x):
20 | return -tf.nn.softplus(-x)
21 |
22 |
23 | def logit_bernoulli_entropy(logits):
24 | ent = (1. - tf.nn.sigmoid(logits)) * logits - logsigmoid(logits)
25 | return ent
26 |
27 |
28 | def gauss_selfKL_firstfixed(mu, logstd):
29 | '''
30 | @brief:
31 | KL divergence with itself, holding first argument fixed
32 | Use stop gradient to cut the gradient flows
33 | '''
34 | mu1, logstd1 = map(tf.stop_gradient, [mu, logstd])
35 | mu2, logstd2 = mu, logstd
36 |
37 | return gauss_KL(mu1, logstd1, mu2, logstd2)
38 |
39 |
40 | def gauss_log_prob(mu, logstd, x):
41 | # probability to take action x, given paramaterized guassian distribution
42 | var = tf.exp(2 * logstd)
43 | gp = - tf.square(x - mu) / (2 * var) \
44 | - .5 * tf.log(tf.constant(2 * np.pi)) \
45 | - logstd
46 | return tf.reduce_sum(gp, [1])
47 |
48 |
49 | def gauss_KL(mu1, logstd1, mu2, logstd2):
50 | # KL divergence between two paramaterized guassian distributions
51 | var1 = tf.exp(2 * logstd1)
52 | var2 = tf.exp(2 * logstd2)
53 |
54 | kl = tf.reduce_sum(
55 | logstd2 - logstd1 + (var1 + tf.square(mu1 - mu2)) / (2 * var2) - 0.5
56 | )
57 | return kl
58 |
59 |
60 | def gauss_ent(mu, logstd):
61 | # shannon entropy for a paramaterized guassian distributions
62 | h = tf.reduce_sum(
63 | logstd + tf.constant(0.5 * np.log(2 * np.pi * np.e), tf.float32)
64 | )
65 | return h
66 |
67 |
68 | def slice_2d(x, inds0, inds1):
69 | inds0 = tf.cast(inds0, tf.int64)
70 | inds1 = tf.cast(inds1, tf.int64)
71 | shape = tf.cast(tf.shape(x), tf.int64)
72 | ncols = shape[1]
73 | x_flat = tf.reshape(x, [-1])
74 | return tf.gather(x_flat, inds0 * ncols + inds1)
75 |
76 |
77 | def var_shape(x):
78 | out = [k.value for k in x.get_shape()]
79 | assert all(isinstance(a, int) for a in out), \
80 | "shape function assumes that shape is fully known"
81 | return out
82 |
83 |
84 | def numel(x):
85 | return np.prod(var_shape(x))
86 |
87 |
88 | def l2_loss(var_list):
89 | l2_norm = tf.constant(0.)
90 | for var in var_list:
91 | l2_norm += tf.nn.l2_loss(var)
92 | return l2_norm
93 |
94 |
95 | def flatgrad(loss, var_list):
96 | grads = tf.gradients(loss, var_list)
97 | return tf.concat(
98 | [tf.reshape(grad, [numel(v)]) for (v, grad) in zip(var_list, grads)], 0
99 | )
100 |
101 |
102 | class SetFromFlat(object):
103 |
104 | def __init__(self, session, var_list):
105 | self.session = session
106 | assigns = []
107 | shapes = map(var_shape, var_list)
108 | total_size = sum(np.prod(shape) for shape in shapes)
109 | self.theta = theta = tf.placeholder(tf.float32, [total_size])
110 | start = 0
111 | assigns = []
112 | for (shape, v) in zip(shapes, var_list):
113 | size = np.prod(shape)
114 | assigns.append(
115 | tf.assign(v, tf.reshape(theta[start:start + size], shape))
116 | )
117 | start += size
118 | self.op = tf.group(*assigns)
119 |
120 | def __call__(self, theta):
121 | self.session.run(self.op, feed_dict={self.theta: theta})
122 |
123 |
124 | class GetFlat(object):
125 |
126 | def __init__(self, session, var_list):
127 | self.session = session
128 | self.op = tf.concat([tf.reshape(v, [numel(v)]) for v in var_list], 0)
129 |
130 | def __call__(self):
131 | return self.op.eval(session=self.session)
132 |
133 |
134 | class get_network_weights(object):
135 | """ @brief:
136 | call this function to get the weights in the policy network
137 | """
138 |
139 | def __init__(self, session, var_list, base_namescope):
140 | self._session = session
141 | self._base_namescope = base_namescope
142 | # self._op is a dict, note that the base namescope is removed, as the
143 | # worker and the trainer has different base_namescope
144 | self._op = {
145 | var.name.replace(self._base_namescope, ''): var
146 | for var in var_list
147 | }
148 |
149 | def __call__(self):
150 | return self._session.run(self._op)
151 |
152 |
153 | class set_network_weights(object):
154 | """ @brief:
155 | Call this function to set the weights in the policy network
156 | """
157 |
158 | def __init__(self, session, var_list, base_namescope):
159 | self._session = session
160 | self._base_namescope = base_namescope
161 |
162 | self._var_list = var_list
163 | self._placeholders = {}
164 | self._assigns = []
165 |
166 | with tf.get_default_graph().as_default():
167 | for var in self._var_list:
168 | var_name = var.name.replace(self._base_namescope, '')
169 | self._placeholders[var_name] = tf.placeholder(
170 | tf.float32, var.get_shape()
171 | )
172 | self._assigns.append(
173 | tf.assign(var, self._placeholders[var_name])
174 | )
175 |
176 | def __call__(self, weight_dict):
177 | assert len(weight_dict) == len(self._var_list)
178 |
179 | feed_dict = {}
180 | for var in self._var_list:
181 | var_name = var.name.replace(self._base_namescope, '')
182 | assert var_name in weight_dict
183 | feed_dict[self._placeholders[var_name]] = weight_dict[var_name]
184 |
185 | self._session.run(self._assigns, feed_dict)
186 |
187 |
188 | def xavier_initializer(self, shape):
189 | dim_sum = np.sum(shape)
190 | if len(shape) == 1:
191 | dim_sum += 1
192 | bound = np.sqrt(6.0 / dim_sum)
193 | return tf.random_uniform(shape, minval=-bound, maxval=bound)
194 |
195 |
196 | def fully_connected(input_layer, input_size, output_size, weight_init,
197 | bias_init, scope, trainable):
198 | with tf.variable_scope(scope):
199 | w = tf.get_variable(
200 | "w", [input_size, output_size],
201 | initializer=weight_init, trainable=trainable
202 | )
203 | b = tf.get_variable(
204 | "b", [output_size], initializer=bias_init, trainable=trainable
205 | )
206 | return tf.matmul(input_layer, w) + b
207 |
--------------------------------------------------------------------------------
/dmbrl/misc/optimizers/policy_network/whitening_util.py:
--------------------------------------------------------------------------------
1 | # -----------------------------------------------------------------------------
2 | # @author:
3 | # Tingwu Wang
4 | # -----------------------------------------------------------------------------
5 |
6 | import numpy as np
7 | import tensorflow as tf
8 |
9 | _ALLOW_KEY = ['state', 'diff_state', 'action']
10 |
11 |
12 | def init_whitening_stats(key_list):
13 | whitening_stats = {}
14 | for key in key_list:
15 | whitening_stats[key] = {'mean': 0.0, 'variance': 1, 'step': 0.01,
16 | 'square_sum': 0.01, 'sum': 0.0, 'std': np.nan}
17 | return whitening_stats
18 |
19 |
20 | def update_whitening_stats(whitening_stats, rollout_data, key):
21 | # collect the info
22 | new_sum, new_step_sum, new_sq_sum = 0.0, 0.0, 0.0
23 |
24 | if type(rollout_data) is dict:
25 | new_sum += rollout_data[key].sum(axis=0)
26 | new_sq_sum += (np.square(rollout_data[key])).sum(axis=0)
27 | new_step_sum += rollout_data[key].shape[0]
28 | else:
29 | assert type(rollout_data) is list
30 | for i_episode in rollout_data:
31 | if key == 'state':
32 | i_data = i_episode['obs']
33 | elif key == 'action':
34 | i_data = i_episode['actions']
35 | else:
36 | assert key == 'diff_state'
37 | i_data = i_episode['obs'][1:] - i_episode['obs'][:-1]
38 |
39 | new_sum += i_data.sum(axis=0)
40 | new_sq_sum += (np.square(i_data)).sum(axis=0)
41 | new_step_sum += i_data.shape[0]
42 |
43 | # update the whitening info
44 | whitening_stats[key]['step'] += new_step_sum
45 | whitening_stats[key]['sum'] += new_sum
46 | whitening_stats[key]['square_sum'] += new_sq_sum
47 | whitening_stats[key]['mean'] = \
48 | whitening_stats[key]['sum'] / whitening_stats[key]['step']
49 | whitening_stats[key]['variance'] = np.maximum(
50 | whitening_stats[key]['square_sum'] / whitening_stats[key]['step'] -
51 | np.square(whitening_stats[key]['mean']), 1e-2
52 | )
53 | whitening_stats[key]['std'] = \
54 | (whitening_stats[key]['variance'] + 1e-6) ** .5
55 |
56 |
57 | def add_whitening_operator(whitening_operator, whitening_variable, name, size):
58 |
59 | with tf.variable_scope('whitening_' + name):
60 | whitening_operator[name + '_mean'] = tf.Variable(
61 | np.zeros([1, size], np.float32),
62 | name=name + "_mean", trainable=False
63 | )
64 | whitening_operator[name + '_std'] = tf.Variable(
65 | np.ones([1, size], np.float32),
66 | name=name + "_std", trainable=False
67 | )
68 | whitening_variable.append(whitening_operator[name + '_mean'])
69 | whitening_variable.append(whitening_operator[name + '_std'])
70 |
71 | # the reset placeholders
72 | whitening_operator[name + '_mean_ph'] = tf.placeholder(
73 | tf.float32, shape=(1, size), name=name + '_reset_mean_ph'
74 | )
75 | whitening_operator[name + '_std_ph'] = tf.placeholder(
76 | tf.float32, shape=(1, size), name=name + '_reset_std_ph'
77 | )
78 |
79 | # the tensorflow operators
80 | whitening_operator[name + '_mean_op'] = \
81 | whitening_operator[name + '_mean'].assign(
82 | whitening_operator[name + '_mean_ph']
83 | )
84 |
85 | whitening_operator[name + '_std_op'] = \
86 | whitening_operator[name + '_std'].assign(
87 | whitening_operator[name + '_std_ph']
88 | )
89 |
90 |
91 | def copy_whitening_var(whitening_stats, input_name, output_name):
92 | whitening_stats[output_name] = {}
93 | whitening_stats[output_name]['mean'] = whitening_stats[input_name]['mean']
94 | whitening_stats[output_name]['std'] = whitening_stats[input_name]['std']
95 |
96 |
97 | def set_whitening_var(session, whitening_operator, whitening_stats, key_list):
98 |
99 | for i_key in key_list:
100 | for i_item in ['mean', 'std']:
101 | session.run(
102 | whitening_operator[i_key + '_' + i_item + '_op'],
103 | feed_dict={whitening_operator[i_key + '_' + i_item + '_ph']:
104 | np.reshape(whitening_stats[i_key][i_item], [1, -1])}
105 | )
106 |
107 |
108 | def append_normalized_data_dict(data_dict, whitening_stats,
109 | target=['start_state', 'diff_state',
110 | 'end_state']):
111 | data_dict['n_start_state'] = \
112 | (data_dict['start_state'] - whitening_stats['state']['mean']) / \
113 | whitening_stats['state']['std']
114 | data_dict['n_end_state'] = \
115 | (data_dict['end_state'] - whitening_stats['state']['mean']) / \
116 | whitening_stats['state']['std']
117 | data_dict['n_diff_state'] = \
118 | (data_dict['end_state'] - data_dict['start_state'] -
119 | whitening_stats['diff_state']['mean']) / \
120 | whitening_stats['diff_state']['std']
121 | data_dict['diff_state'] = \
122 | data_dict['end_state'] - data_dict['start_state']
123 |
--------------------------------------------------------------------------------
/dmbrl/misc/optimizers/random.py:
--------------------------------------------------------------------------------
1 | from __future__ import division
2 | from __future__ import absolute_import
3 | from __future__ import print_function
4 |
5 | import numpy as np
6 | import tensorflow as tf
7 |
8 | from .optimizer import Optimizer
9 |
10 |
11 | class RandomOptimizer(Optimizer):
12 |
13 | def __init__(self, sol_dim, popsize, tf_session,
14 | upper_bound=None, lower_bound=None, params=None):
15 | """Creates an instance of this class.
16 |
17 | Arguments:
18 | sol_dim (int): The dimensionality of the problem space
19 | popsize (int): The number of candidate solutions to be sampled at every iteration
20 | num_elites (int): The number of top solutions that will be used to obtain the distribution
21 | at the next iteration.
22 | tf_session (tf.Session): (optional) Session to be used for this optimizer. Defaults to None,
23 | in which case any functions passed in cannot be tf.Tensor-valued.
24 | upper_bound (np.array): An array of upper bounds
25 | lower_bound (np.array): An array of lower bounds
26 | """
27 | super().__init__()
28 | self.sol_dim = sol_dim
29 | self.popsize = popsize
30 | self.ub, self.lb = upper_bound, lower_bound
31 | self.tf_sess = tf_session
32 | self.solution = None
33 | self.tf_compatible, self.cost_function = None, None
34 |
35 | def setup(self, cost_function, tf_compatible):
36 | """Sets up this optimizer using a given cost function.
37 |
38 | Arguments:
39 | cost_function (func): A function for computing costs over a batch of candidate solutions.
40 | tf_compatible (bool): True if the cost function provided is tf.Tensor-valued.
41 |
42 | Returns: None
43 | """
44 | if tf_compatible and self.tf_sess is None:
45 | raise RuntimeError("Cannot pass in a tf.Tensor-valued cost function without passing in a TensorFlow "
46 | "session into the constructor")
47 |
48 | if not tf_compatible:
49 | self.tf_compatible = False
50 | self.cost_function = cost_function
51 | else:
52 | with self.tf_sess.graph.as_default():
53 | self.tf_compatible = True
54 | solutions = tf.random_uniform([self.popsize, self.sol_dim], self.ub, self.lb)
55 | costs = cost_function(solutions)
56 | self.solution = solutions[tf.cast(tf.argmin(costs), tf.int32)]
57 |
58 | def reset(self):
59 | pass
60 |
61 | def obtain_solution(self, init_mean, init_var, per, dU, obs=None):
62 | """Optimizes the cost function provided in setup().
63 |
64 | Arguments:
65 | init_mean (np.ndarray): The mean of the initial candidate distribution.
66 | init_var (np.ndarray): The variance of the initial candidate distribution.
67 | """
68 | if self.tf_compatible:
69 | sol = self.tf_sess.run(self.solution)
70 | return sol, self.update_prev_sol(per, dU, sol)
71 | else:
72 | solutions = np.random.uniform(self.lb, self.ub, [self.popsize, self.sol_dim])
73 | costs = self.cost_function(solutions)
74 | return solutions[np.argmin(costs)], \
75 | self.update_prev_sol(solutions[np.argmin(costs)], per, dU)
76 |
--------------------------------------------------------------------------------
/dmbrl/modeling/layers/__init__.py:
--------------------------------------------------------------------------------
1 | from .FC import FC
--------------------------------------------------------------------------------
/dmbrl/modeling/models/GT_dynamics.py:
--------------------------------------------------------------------------------
1 | from __future__ import division
2 | from __future__ import print_function
3 | from __future__ import absolute_import
4 |
5 |
6 | import numpy as np
7 |
8 |
9 | def none_constructor(model_init_cfg, misc=None):
10 | return GT(None)
11 |
12 |
13 | def compile_cost(init_obs, ac_seqs, cfg, gt_dynamics, numpy_reward_function,
14 | traj_id=0, cem_type=None, tf_data_dict=None):
15 | assert cem_type is None
16 | assert tf_data_dict is None
17 |
18 | t, nopt = 0, ac_seqs.shape[0]
19 | init_costs = np.zeros([nopt, 1])
20 | ac_seqs = np.reshape(ac_seqs, [-1, cfg['plan_hor'], cfg['dU']])
21 | ac_seqs = np.transpose(ac_seqs, [1, 0, 2])
22 | init_obs = np.tile(init_obs[None], [nopt, 1])
23 | cur_obs = init_obs
24 | total_cost = init_costs
25 |
26 | expert_obs = gt_dynamics.expert_obs(traj_id)
27 | timestep_left = int(len(expert_obs) - init_obs[0, -1] - 1)
28 |
29 | plan_depth = min(cfg['plan_hor'], timestep_left)
30 |
31 | for i_iter in range(plan_depth):
32 | cur_acs = ac_seqs[t]
33 | next_obs, _ = gt_dynamics.predict(cur_obs, cur_acs)
34 |
35 | '''
36 | if i_iter == plan_depth - 1:
37 | delta_cost = -numpy_reward_function(next_obs, cur_acs, expert_obs)
38 | total_cost += delta_cost.reshape(total_cost.shape)
39 | else:
40 | delta_cost = 0.0
41 | '''
42 | delta_cost = -numpy_reward_function(next_obs, cur_acs, expert_obs)
43 | total_cost += delta_cost.reshape(total_cost.shape)
44 | cur_obs = next_obs
45 |
46 | return total_cost
47 |
48 |
49 | class GT:
50 | """ @brief: groundtruth dynamics
51 | """
52 |
53 | def __init__(self, params):
54 | """Initializes a class instance.
55 |
56 | Arguments:
57 | params (DotMap): A dotmap of model parameters.
58 | .name (str): Model name, used for logging/use in variable scopes.
59 | Warning: Models with the same name will overwrite each other.
60 | .num_networks (int): (optional) The number of networks in the ensemble. Defaults to 1.
61 | Ignored if model is being loaded.
62 | .model_dir (str/None): (optional) Path to directory from which model will be loaded, and
63 | saved by default. Defaults to None.
64 | .load_model (bool): (optional) If True, model will be loaded from the model directory,
65 | assuming that the files are generated by a model of the same name. Defaults to False.
66 | .sess (tf.Session/None): The session that this model will use.
67 | If None, creates a session with its own associated graph. Defaults to None.
68 | """
69 | # Instance variables
70 | self.finalized = False
71 | self.layers, self.decays, self.optvars, self.nonoptvars = [], [], [], []
72 | self.scaler = None
73 |
74 | # Training objects
75 | self.optimizer = None
76 | self.sy_train_in, self.sy_train_targ = None, None
77 | self.train_op, self.mse_loss = None, None
78 |
79 | # Prediction objects
80 | self.sy_pred_in2d, self.sy_pred_mean2d_fac = None, None
81 | self.sy_pred_mean2d, self.sy_pred_var2d = None, None
82 | self.sy_pred_in3d, self.sy_pred_mean3d_fac = None, None
83 | self.num_nets = 1
84 |
85 | # the groundtruth dynamics environment
86 | if params is not None:
87 | self.name = 'non_tensorflow'
88 | self.model_dir = params.get('model_dir', None)
89 |
90 | self._misc_args = params.misc
91 | misc_info = {'reset_type': 'gym', 'groundtruth_model': True,
92 | 'expert_amc_dir': params.il_cfg.expert_amc_dir,
93 | 'add_timestep_into_ob': True}
94 |
95 | # TODO:
96 | from dmbrl.env import im_dmhumanoid
97 | self._dynamics_env = im_dmhumanoid.IMDMHumanoid(
98 | 'cmu-humanoid-imitation', 1234, misc_info
99 | )
100 | self._numpy_reward_function = im_dmhumanoid.numpy_reward_function
101 | self._dynamics_env.reset()
102 |
103 | def expert_obs(self, traj_id):
104 | return self._dynamics_env.expert_obs(traj_id)
105 |
106 | @property
107 | def is_probabilistic(self):
108 | return True if self.num_nets > 1 else False
109 |
110 | @property
111 | def is_tf_model(self):
112 | return False
113 |
114 | @property
115 | def sess(self):
116 | return None
117 |
118 | ###################################
119 | # Network Structure Setup Methods #
120 | ###################################
121 |
122 | def add(self, layer):
123 | pass
124 |
125 | def pop(self):
126 | pass
127 |
128 | def finalize(self, optimizer, optimizer_args=None, *args, **kwargs):
129 | self.finalized = True
130 |
131 | #################
132 | # Model Methods #
133 | #################
134 |
135 | def train(self, inputs, targets, batch_size=32, epochs=100,
136 | hide_progress=False, holdout_ratio=0.0, max_logging=5000):
137 | pass
138 |
139 | def predict(self, observations, actions):
140 | num_data = observations.shape[0]
141 | end_state = []
142 | for i_data in range(num_data):
143 | i_end_state = self._dynamics_env.fdynamics(
144 | {'start_state': observations[i_data], 'action': actions[i_data]}
145 | )
146 | end_state.append(i_end_state)
147 | return np.array(end_state), None
148 |
149 | def save(self, savedir=None):
150 | pass
151 |
152 | def _load_structure(self):
153 | pass
154 |
155 | #######################
156 | # Compilation methods #
157 | #######################
158 |
159 | def _compile_outputs(self, inputs):
160 | return None
161 |
162 | def _compile_losses(self, inputs, targets):
163 | return None
164 |
--------------------------------------------------------------------------------
/dmbrl/modeling/models/TFGP.py:
--------------------------------------------------------------------------------
1 | from __future__ import division
2 | from __future__ import print_function
3 | from __future__ import absolute_import
4 |
5 |
6 | import tensorflow as tf
7 | import numpy as np
8 | import gpflow
9 |
10 | from dmbrl.misc.DotmapUtils import get_required_argument
11 | from dmbrl.misc import logger
12 |
13 |
14 | class TFGP:
15 | def __init__(self, params):
16 | """Initializes class instance.
17 |
18 | Arguments:
19 | params
20 | .name (str): Model name
21 | .kernel_class (class): Kernel class
22 | .kernel_args (args): Kernel args
23 | .num_inducing_points (int): Number of inducing points
24 | .sess (tf.Session): Tensorflow session
25 | """
26 | self.name = params.get("name", "GP")
27 | self.kernel_class = get_required_argument(params, "kernel_class", "Must provide kernel class.")
28 | self.kernel_args = params.get("kernel_args", {})
29 | self.num_inducing_points = get_required_argument(
30 | params, "num_inducing_points", "Must provide number of inducing points."
31 | )
32 |
33 | if params.get("sess", None) is None:
34 | config = tf.ConfigProto()
35 | config.gpu_options.allow_growth = True
36 | self._sess = tf.Session(config=config)
37 | else:
38 | self._sess = params.get("sess")
39 |
40 | with self._sess.as_default():
41 | with tf.variable_scope(self.name):
42 | output_dim = self.kernel_args["output_dim"]
43 | del self.kernel_args["output_dim"]
44 | self.model = gpflow.models.SGPR(
45 | np.zeros([1, self.kernel_args["input_dim"]]),
46 | np.zeros([1, output_dim]),
47 | kern=self.kernel_class(**self.kernel_args),
48 | Z=np.zeros([self.num_inducing_points, self.kernel_args["input_dim"]])
49 | )
50 | self.model.initialize()
51 |
52 | @property
53 | def is_probabilistic(self):
54 | return True
55 |
56 | @property
57 | def sess(self):
58 | return self._sess
59 |
60 | @property
61 | def is_tf_model(self):
62 | return True
63 |
64 | def train(self, inputs, targets,
65 | *args, **kwargs):
66 | """Optimizes the parameters of the internal GP model.
67 |
68 | Arguments:
69 | inputs: (np.ndarray) An array of inputs.
70 | targets: (np.ndarray) An array of targets.
71 | num_restarts: (int) The number of times that the optimization of
72 | the GP will be restarted to obtain a good set of parameters.
73 |
74 | Returns: None.
75 | """
76 | perm = np.random.permutation(inputs.shape[0])
77 | inputs, targets = inputs[perm], targets[perm]
78 | Z = np.copy(inputs[:self.num_inducing_points])
79 | if Z.shape[0] < self.num_inducing_points:
80 | Z = np.concatenate([Z, np.zeros([self.num_inducing_points - Z.shape[0], Z.shape[1]])])
81 | self.model.X = inputs
82 | self.model.Y = targets
83 | self.model.feature.Z = Z
84 | with self.sess.as_default():
85 | self.model.compile()
86 | logger.info("Optimizing model... ", end="")
87 | gpflow.train.ScipyOptimizer().minimize(self.model)
88 | logger.info("Done.")
89 |
90 | def predict(self, inputs, *args, **kwargs):
91 | """Returns the predictions of this model on inputs.
92 |
93 | Arguments:
94 | inputs: (np.ndarray) The inputs on which predictions will be returned.
95 | ign_var: (bool) If True, only returns the mean prediction
96 |
97 | Returns: (np.ndarrays) The mean and variance of the model on the new points.
98 | """
99 | if self.model is None:
100 | raise RuntimeError("Cannot make predictions without initial batch of data.")
101 |
102 | with self.sess.as_default():
103 | mean, var = self.model.predict_y(inputs)
104 | return mean, var
105 |
106 | def create_prediction_tensors(self, inputs, *args, **kwargs):
107 | ""
108 | if self.model is None:
109 | raise RuntimeError("Cannot make predictions without initial batch of data.")
110 |
111 | inputs = tf.cast(inputs, tf.float64)
112 | mean, var = self.model._build_predict(inputs, full_cov=False)
113 | return tf.cast(mean, dtype=tf.float32), tf.cast(var, tf.float32)
114 |
115 | def save(self, *args, **kwargs):
116 | pass
117 |
--------------------------------------------------------------------------------
/dmbrl/modeling/models/__init__.py:
--------------------------------------------------------------------------------
1 | from .BNN import BNN
2 | from .NN import NN
3 | from .TFGP import TFGP
4 |
--------------------------------------------------------------------------------
/dmbrl/modeling/utils/TensorStandardScaler.py:
--------------------------------------------------------------------------------
1 | from __future__ import division
2 | from __future__ import print_function
3 | from __future__ import absolute_import
4 |
5 | import tensorflow as tf
6 | import numpy as np
7 |
8 |
9 | class TensorStandardScaler:
10 | """Helper class for automatically normalizing inputs into the network.
11 | """
12 | def __init__(self, x_dim):
13 | """Initializes a scaler.
14 |
15 | Arguments:
16 | x_dim (int): The dimensionality of the inputs into the scaler.
17 |
18 | Returns: None.
19 | """
20 | self.fitted = False
21 | with tf.variable_scope("Scaler"):
22 | self.mu = tf.get_variable(
23 | name="scaler_mu", shape=[1, x_dim], initializer=tf.constant_initializer(0.0),
24 | trainable=False
25 | )
26 | self.sigma = tf.get_variable(
27 | name="scaler_std", shape=[1, x_dim], initializer=tf.constant_initializer(1.0),
28 | trainable=False
29 | )
30 |
31 | self.cached_mu, self.cached_sigma = np.zeros([0, x_dim]), np.ones([1, x_dim])
32 |
33 | def fit(self, data):
34 | """Runs two ops, one for assigning the mean of the data to the internal mean, and
35 | another for assigning the standard deviation of the data to the internal standard deviation.
36 | This function must be called within a 'with .as_default()' block.
37 |
38 | Arguments:
39 | data (np.ndarray): A numpy array containing the input
40 |
41 | Returns: None.
42 | """
43 | mu = np.mean(data, axis=0, keepdims=True)
44 | sigma = np.std(data, axis=0, keepdims=True)
45 | sigma[sigma < 1e-12] = 1.0
46 |
47 | self.mu.load(mu)
48 | self.sigma.load(sigma)
49 | self.fitted = True
50 | self.cache()
51 |
52 | def transform(self, data):
53 | """Transforms the input matrix data using the parameters of this scaler.
54 |
55 | Arguments:
56 | data (np.array): A numpy array containing the points to be transformed.
57 |
58 | Returns: (np.array) The transformed dataset.
59 | """
60 | return (data - self.mu) / self.sigma
61 |
62 | def inverse_transform(self, data):
63 | """Undoes the transformation performed by this scaler.
64 |
65 | Arguments:
66 | data (np.array): A numpy array containing the points to be transformed.
67 |
68 | Returns: (np.array) The transformed dataset.
69 | """
70 | return self.sigma * data + self.mu
71 |
72 | def get_vars(self):
73 | """Returns a list of variables managed by this object.
74 |
75 | Returns: (list) The list of variables.
76 | """
77 | return [self.mu, self.sigma]
78 |
79 | def cache(self):
80 | """Caches current values of this scaler.
81 |
82 | Returns: None.
83 | """
84 | self.cached_mu = self.mu.eval()
85 | self.cached_sigma = self.sigma.eval()
86 |
87 | def load_cache(self):
88 | """Loads values from the cache
89 |
90 | Returns: None.
91 | """
92 | self.mu.load(self.cached_mu)
93 | self.sigma.load(self.cached_sigma)
94 |
--------------------------------------------------------------------------------
/dmbrl/modeling/utils/__init__.py:
--------------------------------------------------------------------------------
1 | from .TensorStandardScaler import TensorStandardScaler
--------------------------------------------------------------------------------
/img/curve.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WilsonWangTHU/POPLIN/edd8dba50f9049c6164eda774602bef0c299cb51/img/curve.png
--------------------------------------------------------------------------------
/img/policy_control.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WilsonWangTHU/POPLIN/edd8dba50f9049c6164eda774602bef0c299cb51/img/policy_control.png
--------------------------------------------------------------------------------
/img/reward.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WilsonWangTHU/POPLIN/edd8dba50f9049c6164eda774602bef0c299cb51/img/reward.png
--------------------------------------------------------------------------------
/img/table.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WilsonWangTHU/POPLIN/edd8dba50f9049c6164eda774602bef0c299cb51/img/table.png
--------------------------------------------------------------------------------
/mbexp.py:
--------------------------------------------------------------------------------
1 | from __future__ import division
2 | from __future__ import print_function
3 | from __future__ import absolute_import
4 |
5 | import os
6 | import argparse
7 | import pprint
8 | import copy
9 |
10 | from dotmap import DotMap
11 |
12 | from dmbrl.misc.MBExp import MBExperiment
13 | from dmbrl.controllers.MPC import MPC
14 | from dmbrl.config import create_config
15 | from dmbrl.misc import logger
16 |
17 |
18 | def main(env, ctrl_type, ctrl_args, overrides, logdir, args):
19 | ctrl_args = DotMap(**{key: val for (key, val) in ctrl_args})
20 | cfg = create_config(env, ctrl_type, ctrl_args, overrides, logdir)
21 | logger.info('\n' + pprint.pformat(cfg))
22 |
23 | # add the part of popsize
24 | if ctrl_type == "MPC":
25 | cfg.exp_cfg.exp_cfg.policy = MPC(cfg.ctrl_cfg)
26 |
27 | cfg.exp_cfg.misc = copy.copy(cfg)
28 | exp = MBExperiment(cfg.exp_cfg)
29 |
30 | if not os.path.exists(exp.logdir):
31 | os.makedirs(exp.logdir)
32 | with open(os.path.join(exp.logdir, "config.txt"), "w") as f:
33 | f.write(pprint.pformat(cfg.toDict()))
34 |
35 | exp.run_experiment()
36 |
37 |
38 | if __name__ == "__main__":
39 | parser = argparse.ArgumentParser()
40 | parser.add_argument('-env', type=str, required=True,
41 | help='Environment name: select from [cartpole, reacher, pusher, halfcheetah]')
42 | parser.add_argument('-ca', '--ctrl_arg', action='append', nargs=2, default=[],
43 | help='Controller arguments, see https://github.com/kchua/handful-of-trials#controller-arguments')
44 | parser.add_argument('-o', '--override', action='append', nargs=2, default=[],
45 | help='Override default parameters, see https://github.com/kchua/handful-of-trials#overrides')
46 | parser.add_argument('-logdir', type=str, default='log',
47 | help='Directory to which results will be logged (default: ./log)')
48 | parser.add_argument('-e_popsize', type=int, default=500,
49 | help='different popsize to use')
50 | args = parser.parse_args()
51 |
52 | main(args.env, "MPC", args.ctrl_arg, args.override, args.logdir, args)
53 |
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | dotmap==1.2.20
2 | future==0.16.0
3 | gpflow
4 | gym==0.9.4
5 | mujoco-py==0.5.7
6 | numpy==1.14.0
7 | scipy==0.19.0
8 | tensorflow-gpu==1.9.0
9 | tqdm==4.19.4
10 | termcolor
11 |
--------------------------------------------------------------------------------
/scripts/mbexp.py:
--------------------------------------------------------------------------------
1 | from __future__ import division
2 | from __future__ import print_function
3 | from __future__ import absolute_import
4 |
5 | import os
6 | import argparse
7 | import pprint
8 | import copy
9 |
10 | from dotmap import DotMap
11 |
12 | from dmbrl.misc.MBExp import MBExperiment
13 | from dmbrl.controllers.MPC import MPC
14 | from dmbrl.config import create_config
15 | from dmbrl.misc import logger
16 |
17 |
18 | def main(env, ctrl_type, ctrl_args, overrides, logdir, args):
19 | ctrl_args = DotMap(**{key: val for (key, val) in ctrl_args})
20 | cfg = create_config(env, ctrl_type, ctrl_args, overrides, logdir)
21 | logger.info('\n' + pprint.pformat(cfg))
22 |
23 | # add the part of popsize
24 | if ctrl_type == "MPC":
25 | cfg.exp_cfg.exp_cfg.policy = MPC(cfg.ctrl_cfg)
26 |
27 | cfg.exp_cfg.misc = copy.copy(cfg)
28 | exp = MBExperiment(cfg.exp_cfg)
29 |
30 | if not os.path.exists(exp.logdir):
31 | os.makedirs(exp.logdir)
32 | with open(os.path.join(exp.logdir, "config.txt"), "w") as f:
33 | f.write(pprint.pformat(cfg.toDict()))
34 |
35 | exp.run_experiment()
36 |
37 |
38 | if __name__ == "__main__":
39 | parser = argparse.ArgumentParser()
40 | parser.add_argument('-env', type=str, required=True,
41 | help='Environment name: select from [cartpole, reacher, pusher, halfcheetah]')
42 | parser.add_argument('-ca', '--ctrl_arg', action='append', nargs=2, default=[],
43 | help='Controller arguments, see https://github.com/kchua/handful-of-trials#controller-arguments')
44 | parser.add_argument('-o', '--override', action='append', nargs=2, default=[],
45 | help='Override default parameters, see https://github.com/kchua/handful-of-trials#overrides')
46 | parser.add_argument('-logdir', type=str, default='log',
47 | help='Directory to which results will be logged (default: ./log)')
48 | parser.add_argument('-e_popsize', type=int, default=500,
49 | help='different popsize to use')
50 | args = parser.parse_args()
51 |
52 | main(args.env, "MPC", args.ctrl_arg, args.override, args.logdir, args)
53 |
--------------------------------------------------------------------------------
/scripts/render.py:
--------------------------------------------------------------------------------
1 | from __future__ import division
2 | from __future__ import print_function
3 | from __future__ import absolute_import
4 |
5 | import os
6 | import argparse
7 | import pprint
8 |
9 | from dotmap import DotMap
10 |
11 | from dmbrl.misc.MBExp import MBExperiment
12 | from dmbrl.controllers.MPC import MPC
13 | from dmbrl.config import create_config
14 |
15 |
16 | def main(env, ctrl_type, ctrl_args, overrides, model_dir, logdir):
17 | ctrl_args = DotMap(**{key: val for (key, val) in ctrl_args})
18 |
19 | overrides.append(["ctrl_cfg.prop_cfg.model_init_cfg.model_dir", model_dir])
20 | overrides.append(["ctrl_cfg.prop_cfg.model_init_cfg.load_model", "True"])
21 | overrides.append(["ctrl_cfg.prop_cfg.model_pretrained", "True"])
22 | overrides.append(["exp_cfg.exp_cfg.ninit_rollouts", "0"])
23 | overrides.append(["exp_cfg.exp_cfg.ntrain_iters", "1"])
24 | overrides.append(["exp_cfg.log_cfg.nrecord", "1"])
25 |
26 | cfg = create_config(env, ctrl_type, ctrl_args, overrides, logdir)
27 | cfg.pprint()
28 |
29 | if ctrl_type == "MPC":
30 | cfg.exp_cfg.exp_cfg.policy = MPC(cfg.ctrl_cfg)
31 | exp = MBExperiment(cfg.exp_cfg)
32 |
33 | os.makedirs(exp.logdir)
34 | with open(os.path.join(exp.logdir, "config.txt"), "w") as f:
35 | f.write(pprint.pformat(cfg.toDict()))
36 |
37 | exp.run_experiment()
38 |
39 |
40 | if __name__ == "__main__":
41 | parser = argparse.ArgumentParser()
42 | parser.add_argument('-env', type=str, required=True)
43 | parser.add_argument('-ca', '--ctrl_arg', action='append', nargs=2, default=[])
44 | parser.add_argument('-o', '--override', action='append', nargs=2, default=[])
45 | parser.add_argument('-model-dir', type=str, required=True)
46 | parser.add_argument('-logdir', type=str, required=True)
47 | args = parser.parse_args()
48 |
49 | main(args.env, "MPC", args.ctrl_arg, args.override, args.model_dir, args.logdir)
50 |
--------------------------------------------------------------------------------
/show_result.py:
--------------------------------------------------------------------------------
1 | import glob
2 | from scipy.io import loadmat
3 | import matplotlib.pyplot as plt
4 | import numpy as np
5 |
6 | file_list = glob.glob('./log/*/*/logs.mat')
7 | file_list = [name for name in file_list if 'old' not in name]
8 | file_list = [name for name in file_list if '2500' in name]
9 | legend_lable = []
10 |
11 | colormap = plt.cm.gist_ncar
12 | # plt.gca().set_color_cycle([colormap(i) for i in np.linspace(0, 0.9, len(file_list))])
13 |
14 | for name in file_list:
15 | returns = loadmat(name)['returns']
16 | print(name + '\n')
17 | print(returns)
18 | print('\n\n')
19 | # import pdb; pdb.set_trace()
20 | plt.plot(returns.reshape([-1]))
21 | legend_lable.append(name.split('/')[2])
22 |
23 | plt.legend(legend_lable)
24 | plt.show()
25 |
--------------------------------------------------------------------------------
/show_with_test_result.py:
--------------------------------------------------------------------------------
1 | import glob
2 | from scipy.io import loadmat
3 | import matplotlib.pyplot as plt
4 |
5 | file_list = glob.glob('./log/*/*/logs.mat')
6 | # file_list = [name for name in file_list if 'WRA' in name]
7 | file_list = [name for name in file_list if 'GAN-I' in name]
8 | # file_list = [name for name in file_list if 'R_0.1__' in name]
9 | # mode = 'full' # full, all
10 | mode = 'test' # full, all
11 | legend_lable = []
12 |
13 | colormap = plt.cm.gist_ncar
14 | # plt.gca().set_color_cycle([colormap(i) for i in np.linspace(0, 0.9, len(file_list))])
15 |
16 | for name in file_list:
17 | returns = loadmat(name)['test_returns']
18 | print(name + '\n')
19 | print(returns)
20 | print('\n\n')
21 | # import pdb; pdb.set_trace()
22 | if mode in ['test', 'all']:
23 | plt.plot(returns.reshape([-1]))
24 | legend_lable.append('test_' + name.split('/')[2])
25 |
26 | returns = loadmat(name)['returns']
27 | if mode in ['full', 'all']:
28 | plt.plot(returns.reshape([-1]))
29 | legend_lable.append('full_' + name.split('/')[2])
30 |
31 | plt.legend(legend_lable)
32 | plt.show()
33 |
--------------------------------------------------------------------------------