├── .gitignore ├── CODE_OF_CONDUCT.md ├── CONTRIBUTING.md ├── LICENSE ├── README.md ├── mbirl ├── README.md ├── __init__.py ├── env │ ├── __init__.py │ └── kuka_iiwa │ │ ├── meshes │ │ ├── iiwa7 │ │ │ ├── collision │ │ │ │ ├── link_0.stl │ │ │ │ ├── link_1.stl │ │ │ │ ├── link_2.stl │ │ │ │ ├── link_3.stl │ │ │ │ ├── link_4.stl │ │ │ │ ├── link_5.stl │ │ │ │ ├── link_6.stl │ │ │ │ └── link_7.stl │ │ │ └── visual │ │ │ │ ├── link_0.stl │ │ │ │ ├── link_1.stl │ │ │ │ ├── link_2.stl │ │ │ │ ├── link_3.stl │ │ │ │ ├── link_4.stl │ │ │ │ ├── link_5.stl │ │ │ │ ├── link_6.stl │ │ │ │ └── link_7.stl │ │ └── robotiq-ft300 │ │ │ ├── collision │ │ │ ├── robotiq_fts150.stl │ │ │ └── robotiq_fts300.dae │ │ │ └── visual │ │ │ ├── robotiq_fts150.stl │ │ │ └── robotiq_fts300.dae │ │ └── urdf │ │ └── iiwa7_ft_with_obj_keypts.urdf ├── experiments │ ├── __init__.py │ ├── plot_mbirl_training_and_eval.ipynb │ └── run_model_based_irl.py ├── generate_expert_demo.py ├── keypoint_mpc.py └── learnable_costs.py ├── ml3 ├── README.md ├── __init__.py ├── envs │ ├── __init__.py │ ├── bullet_sim.py │ ├── mountain_car.py │ ├── mujoco_robots │ │ ├── ground_plane.xml │ │ └── reacher.xml │ └── reacher_sim.py ├── experiments │ ├── Loss shaping visualization.ipynb │ ├── __init__.py │ ├── ml3_sine_regression_exp_viz.ipynb │ ├── run_mbrl_reacher_exp.py │ ├── run_mountain_car_exp.py │ ├── run_shaped_sine_exp.py │ └── run_sine_regression_exp.py ├── learnable_losses.py ├── mbrl_utils.py ├── ml3_test.py ├── ml3_train.py ├── optimizee.py ├── shaped_sine_utils.py ├── sine_regression_task.py └── sine_task_sampler.py └── setup.py /.gitignore: -------------------------------------------------------------------------------- 1 | *.idea 2 | .DS_Store 3 | __pycache__ 4 | *.egg-info 5 | **/data/* 6 | **/model_data/* 7 | **/traj_data/* 8 | **/plots/* 9 | **/.ipynb_checkpoints/* 10 | -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | # Code of Conduct 2 | 3 | Facebook has adopted a Code of Conduct that we expect project participants to adhere to. 4 | Please read the [full text](https://code.fb.com/codeofconduct/) 5 | so that you can understand what actions will and will not be tolerated. 6 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing to learning-to-learn 2 | We want to make contributing to this project as easy and transparent as 3 | possible. 4 | 5 | ## Pull Requests 6 | We actively welcome your pull requests. 7 | 8 | 1. Fork the repo and create your branch from `master`. 9 | 2. If you've added code that should be tested, add tests. 10 | 3. If you've changed APIs, update the documentation. 11 | 4. Ensure the test suite passes. 12 | 5. Make sure your code lints. 13 | 6. If you haven't already, complete the Contributor License Agreement ("CLA"). 14 | 15 | ## Contributor License Agreement ("CLA") 16 | In order to accept your pull request, we need you to submit a CLA. You only need 17 | to do this once to work on any of Facebook's open source projects. 18 | 19 | Complete your CLA here: 20 | 21 | ## Issues 22 | We use GitHub issues to track public bugs. Please ensure your description is 23 | clear and has sufficient instructions to be able to reproduce the issue. 24 | 25 | Facebook has a [bounty program](https://www.facebook.com/whitehat/) for the safe 26 | disclosure of security bugs. In those cases, please go through the process 27 | outlined on that page and do not file a public issue. 28 | 29 | ## License 30 | By contributing to learning-to-learn, you agree that your contributions will be licensed 31 | under the LICENSE file in the root directory of this source tree. 32 | 33 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) Facebook, Inc. and its affiliates. 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # LearningToLearn 2 | This repository contains code for 3 | * ML3: Meta-Learning via Learned Losses, presented at ICPR 2020, *won best student award* ([pdf](https://arxiv.org/pdf/1906.05374.pdf)) 4 | * MBIRL: Model-Based Inverse Reinforcement Learning from Visual Demonstrations, presented at CoRL 2020 ([pdf](https://arxiv.org/pdf/2010.09034.pdf)) 5 | 6 | ## Setup 7 | In the LearningToLearn folder, run: 8 | 9 | ``` 10 | conda create -n l2l python=3.7 11 | conda activate l2l 12 | python setup.py develop 13 | ``` 14 | 15 | ## ML3 paper experiments and citation 16 | To reproduce results of the ML3 paper follow the README instructions in the `ml3` folder 17 | 18 | #### Citation 19 | ``` 20 | @inproceedings{ml3, 21 | author = {Sarah Bechtle and Artem Molchanov and Yevgen Chebotar and Edward Grefenstette and Ludovic Righetti and Gaurav Sukhatme and Franziska Meier}, 22 | title = {Meta Learning via Learned Loss}, 23 | booktitle = {International Conference on Pattern Recognition, {ICPR}, Italy, January 10-15, 2021}, 24 | year = {2021} } 25 | ``` 26 | 27 | ## MBIRL paper experiments and citation 28 | To test our MBIRL algorithm follow the README instructions in the `mbirl` folder 29 | 30 | #### Citation 31 | ``` 32 | @InProceedings{mbirl, 33 | author = {Neha Das, Sarah Bechtle, Todor Davchev, Dinesh Jayaraman, Akshara Rai and Franziska Meier}, 34 | booktitle = {Conference on Robot Learning (CoRL)}, 35 | title = {Model Based Inverse Reinforcement Learning from Visual Demonstration}, 36 | year = {2020}, 37 | video = {https://www.youtube.com/watch?v=sRrNhtLk12M&t=52s}, 38 | } 39 | ``` 40 | 41 | ## License 42 | 43 | `LearningToLearn` is released under the MIT license. See [LICENSE](LICENSE) for additional details about it. 44 | See also our [Terms of Use](https://opensource.facebook.com/legal/terms) and [Privacy Policy](https://opensource.facebook.com/legal/privacy). 45 | -------------------------------------------------------------------------------- /mbirl/README.md: -------------------------------------------------------------------------------- 1 | ## MBIRL - Model Based Inverse Reinforcement Learning 2 | 3 | ### Simulation with ground truth keypoint predictions 4 | 5 | #### Generate expert demonstrations 6 | 1. ```python mbirl/generate_expert_demo.py``` 7 | 2. Check the data and visualizations of the demonstration in 'mbirl/traj_data' 8 | 9 | #### Run Our Method 10 | 1. ```python mbirl/experiments/run_model_based_irl.py``` 11 | 2. Check the trajectories predicted during training in model_data/placing/ 12 | 13 | #### Plot the losses, evaluate our method 14 | 1. ```jupyter notebook``` 15 | 2. Access the notebook in the browser in 'mbirl/experiments/plot_mbirl_training_and_eval.ipynb' 16 | 17 | ### Simulation with learned keypoint representation and dynamics 18 | COMING SOON 19 | -------------------------------------------------------------------------------- /mbirl/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/LearningToLearn/fa32b98b40402fa15982b450ed09d9d3735ec924/mbirl/__init__.py -------------------------------------------------------------------------------- /mbirl/env/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/LearningToLearn/fa32b98b40402fa15982b450ed09d9d3735ec924/mbirl/env/__init__.py -------------------------------------------------------------------------------- /mbirl/env/kuka_iiwa/meshes/iiwa7/collision/link_0.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/LearningToLearn/fa32b98b40402fa15982b450ed09d9d3735ec924/mbirl/env/kuka_iiwa/meshes/iiwa7/collision/link_0.stl -------------------------------------------------------------------------------- /mbirl/env/kuka_iiwa/meshes/iiwa7/collision/link_1.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/LearningToLearn/fa32b98b40402fa15982b450ed09d9d3735ec924/mbirl/env/kuka_iiwa/meshes/iiwa7/collision/link_1.stl -------------------------------------------------------------------------------- /mbirl/env/kuka_iiwa/meshes/iiwa7/collision/link_2.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/LearningToLearn/fa32b98b40402fa15982b450ed09d9d3735ec924/mbirl/env/kuka_iiwa/meshes/iiwa7/collision/link_2.stl -------------------------------------------------------------------------------- /mbirl/env/kuka_iiwa/meshes/iiwa7/collision/link_3.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/LearningToLearn/fa32b98b40402fa15982b450ed09d9d3735ec924/mbirl/env/kuka_iiwa/meshes/iiwa7/collision/link_3.stl -------------------------------------------------------------------------------- /mbirl/env/kuka_iiwa/meshes/iiwa7/collision/link_4.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/LearningToLearn/fa32b98b40402fa15982b450ed09d9d3735ec924/mbirl/env/kuka_iiwa/meshes/iiwa7/collision/link_4.stl -------------------------------------------------------------------------------- /mbirl/env/kuka_iiwa/meshes/iiwa7/collision/link_5.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/LearningToLearn/fa32b98b40402fa15982b450ed09d9d3735ec924/mbirl/env/kuka_iiwa/meshes/iiwa7/collision/link_5.stl -------------------------------------------------------------------------------- /mbirl/env/kuka_iiwa/meshes/iiwa7/collision/link_6.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/LearningToLearn/fa32b98b40402fa15982b450ed09d9d3735ec924/mbirl/env/kuka_iiwa/meshes/iiwa7/collision/link_6.stl -------------------------------------------------------------------------------- /mbirl/env/kuka_iiwa/meshes/iiwa7/collision/link_7.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/LearningToLearn/fa32b98b40402fa15982b450ed09d9d3735ec924/mbirl/env/kuka_iiwa/meshes/iiwa7/collision/link_7.stl -------------------------------------------------------------------------------- /mbirl/env/kuka_iiwa/meshes/iiwa7/visual/link_0.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/LearningToLearn/fa32b98b40402fa15982b450ed09d9d3735ec924/mbirl/env/kuka_iiwa/meshes/iiwa7/visual/link_0.stl -------------------------------------------------------------------------------- /mbirl/env/kuka_iiwa/meshes/iiwa7/visual/link_1.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/LearningToLearn/fa32b98b40402fa15982b450ed09d9d3735ec924/mbirl/env/kuka_iiwa/meshes/iiwa7/visual/link_1.stl -------------------------------------------------------------------------------- /mbirl/env/kuka_iiwa/meshes/iiwa7/visual/link_2.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/LearningToLearn/fa32b98b40402fa15982b450ed09d9d3735ec924/mbirl/env/kuka_iiwa/meshes/iiwa7/visual/link_2.stl -------------------------------------------------------------------------------- /mbirl/env/kuka_iiwa/meshes/iiwa7/visual/link_3.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/LearningToLearn/fa32b98b40402fa15982b450ed09d9d3735ec924/mbirl/env/kuka_iiwa/meshes/iiwa7/visual/link_3.stl -------------------------------------------------------------------------------- /mbirl/env/kuka_iiwa/meshes/iiwa7/visual/link_4.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/LearningToLearn/fa32b98b40402fa15982b450ed09d9d3735ec924/mbirl/env/kuka_iiwa/meshes/iiwa7/visual/link_4.stl -------------------------------------------------------------------------------- /mbirl/env/kuka_iiwa/meshes/iiwa7/visual/link_5.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/LearningToLearn/fa32b98b40402fa15982b450ed09d9d3735ec924/mbirl/env/kuka_iiwa/meshes/iiwa7/visual/link_5.stl -------------------------------------------------------------------------------- /mbirl/env/kuka_iiwa/meshes/iiwa7/visual/link_6.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/LearningToLearn/fa32b98b40402fa15982b450ed09d9d3735ec924/mbirl/env/kuka_iiwa/meshes/iiwa7/visual/link_6.stl -------------------------------------------------------------------------------- /mbirl/env/kuka_iiwa/meshes/iiwa7/visual/link_7.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/LearningToLearn/fa32b98b40402fa15982b450ed09d9d3735ec924/mbirl/env/kuka_iiwa/meshes/iiwa7/visual/link_7.stl -------------------------------------------------------------------------------- /mbirl/env/kuka_iiwa/meshes/robotiq-ft300/collision/robotiq_fts150.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/LearningToLearn/fa32b98b40402fa15982b450ed09d9d3735ec924/mbirl/env/kuka_iiwa/meshes/robotiq-ft300/collision/robotiq_fts150.stl -------------------------------------------------------------------------------- /mbirl/env/kuka_iiwa/meshes/robotiq-ft300/visual/robotiq_fts150.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/LearningToLearn/fa32b98b40402fa15982b450ed09d9d3735ec924/mbirl/env/kuka_iiwa/meshes/robotiq-ft300/visual/robotiq_fts150.stl -------------------------------------------------------------------------------- /mbirl/env/kuka_iiwa/urdf/iiwa7_ft_with_obj_keypts.urdf: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | 107 | 108 | 109 | 110 | 111 | 112 | 113 | 114 | 115 | 116 | 117 | 118 | 119 | 120 | 121 | 122 | 123 | 124 | 125 | 126 | 127 | 128 | 129 | 130 | 131 | 132 | 133 | 134 | 135 | 136 | 137 | 138 | 139 | 140 | 141 | 142 | 143 | 144 | 145 | 146 | 147 | 148 | 149 | 150 | 151 | 152 | 153 | 154 | 155 | 156 | 157 | 158 | 159 | 160 | 161 | 162 | 163 | 164 | 165 | 166 | 167 | 168 | 169 | 170 | 171 | 172 | 173 | 174 | 175 | 176 | 177 | 178 | 179 | 180 | 181 | 182 | 183 | 184 | 185 | 186 | 187 | 188 | 189 | 190 | 191 | 192 | 193 | 194 | 195 | 196 | 197 | 198 | 199 | 200 | 201 | 202 | 203 | 204 | 205 | 206 | 207 | 208 | 209 | 210 | 211 | 212 | 213 | 214 | 215 | 216 | 217 | 218 | 219 | 220 | 221 | 222 | 223 | 224 | 225 | 226 | 227 | 228 | 229 | 230 | 231 | 232 | 233 | 234 | 235 | 236 | 237 | 238 | 239 | 240 | 241 | 242 | 243 | 244 | 245 | 246 | 247 | 248 | 249 | 250 | 251 | 252 | 253 | 254 | 255 | 256 | 257 | 258 | 259 | 260 | 261 | 262 | 263 | 264 | 265 | 266 | 267 | 268 | 269 | 270 | 271 | 272 | 273 | 274 | 275 | 276 | 277 | 278 | 279 | 280 | 281 | 282 | 283 | 284 | 285 | 286 | 287 | 288 | 289 | 290 | 291 | 292 | 293 | 294 | 295 | 296 | 297 | 298 | 299 | 300 | 301 | 302 | 303 | 304 | 305 | 306 | 307 | 308 | 309 | 310 | 311 | 312 | 313 | 314 | 315 | 316 | 317 | 318 | 319 | 320 | 321 | 322 | 323 | 324 | 325 | 326 | 327 | 328 | 329 | 330 | 331 | 332 | 333 | 334 | 335 | 336 | 337 | 338 | 339 | 340 | 341 | 342 | 343 | 344 | 345 | 346 | 347 | 348 | 349 | 350 | 351 | 352 | 353 | 354 | 355 | 356 | 357 | 358 | 359 | 360 | 361 | 362 | 363 | 364 | 365 | 366 | 367 | 368 | 369 | 370 | 371 | 372 | 373 | 374 | 375 | 376 | 377 | 378 | 379 | 380 | 381 | 382 | 383 | 384 | 385 | 386 | 387 | 388 | 389 | 390 | 391 | 392 | 393 | 394 | 395 | 396 | -------------------------------------------------------------------------------- /mbirl/experiments/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/LearningToLearn/fa32b98b40402fa15982b450ed09d9d3735ec924/mbirl/experiments/__init__.py -------------------------------------------------------------------------------- /mbirl/experiments/plot_mbirl_training_and_eval.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 3, 6 | "metadata": { 7 | "scrolled": true 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "import os, sys\n", 12 | "import torch\n", 13 | "import numpy as np\n", 14 | "import matplotlib.pyplot as plt\n", 15 | "from os.path import dirname, abspath\n", 16 | "from differentiable_robot_model import DifferentiableRobotModel\n", 17 | "\n", 18 | "from mbirl.keypoint_mpc import KeypointMPCWrapper\n", 19 | "from mbirl.learnable_costs import *\n", 20 | "import mbirl\n", 21 | "import warnings\n", 22 | "warnings.filterwarnings('ignore')\n", 23 | "\n", 24 | "EXP_FOLDER = os.path.join(mbirl.__path__[0], \"experiments\")\n", 25 | "traj_data_dir = os.path.join(EXP_FOLDER, 'traj_data')\n", 26 | "model_data_dir = os.path.join(EXP_FOLDER, 'model_data')\n" 27 | ] 28 | }, 29 | { 30 | "cell_type": "code", 31 | "execution_count": 4, 32 | "metadata": {}, 33 | "outputs": [], 34 | "source": [ 35 | "experiment_type = 'placing'" 36 | ] 37 | }, 38 | { 39 | "cell_type": "code", 40 | "execution_count": 5, 41 | "metadata": {}, 42 | "outputs": [], 43 | "source": [ 44 | "# Get data saved during training\n", 45 | "\n", 46 | "if not os.path.exists(\n", 47 | " f\"{model_data_dir}/{experiment_type}_TimeDep\") or not os.path.exists(\n", 48 | " f\"{model_data_dir}/{experiment_type}_Weighted\") or not os.path.exists(f\"{model_data_dir}/{experiment_type}_RBF\"):\n", 49 | " assert False, \"Path does not exist\"\n", 50 | "\n", 51 | "timedep = torch.load(f\"{model_data_dir}/{experiment_type}_TimeDep\")\n", 52 | "weighted = torch.load(f\"{model_data_dir}/{experiment_type}_Weighted\")\n", 53 | "rbf = torch.load(f\"{model_data_dir}/{experiment_type}_RBF\")" 54 | ] 55 | }, 56 | { 57 | "cell_type": "code", 58 | "execution_count": 6, 59 | "metadata": {}, 60 | "outputs": [ 61 | { 62 | "data": { 63 | "image/png": "\n", 64 | "text/plain": [ 65 | "
" 66 | ] 67 | }, 68 | "metadata": { 69 | "needs_background": "light" 70 | }, 71 | "output_type": "display_data" 72 | } 73 | ], 74 | "source": [ 75 | "# IRL Loss on train trajectories, as a function of cost function updates\n", 76 | "\n", 77 | "plt.figure()\n", 78 | "plt.plot(weighted['irl_loss_train'].detach(), color='orange', label=\"Weighted Ours\")\n", 79 | "plt.plot(timedep['irl_loss_train'].detach(), color='green', label=\"Time Dep Weighted Ours\")\n", 80 | "plt.plot(rbf['irl_loss_train'].detach(), color='violet', label=\"RBF Weighted Ours\")\n", 81 | "plt.xlabel(\"iterations\")\n", 82 | "plt.ylabel(\"IRL Loss on train\")\n", 83 | "plt.ylim([0, 2000])\n", 84 | "plt.legend()\n", 85 | "\n", 86 | "plt.savefig(f\"{model_data_dir}/{experiment_type}_IRL_loss_train.png\")" 87 | ] 88 | }, 89 | { 90 | "cell_type": "code", 91 | "execution_count": 7, 92 | "metadata": {}, 93 | "outputs": [ 94 | { 95 | "data": { 96 | "image/png": "\n", 97 | "text/plain": [ 98 | "
" 99 | ] 100 | }, 101 | "metadata": { 102 | "needs_background": "light" 103 | }, 104 | "output_type": "display_data" 105 | } 106 | ], 107 | "source": [ 108 | "# IRL Loss on test trajectories, as a function of cost function updates\n", 109 | "\n", 110 | "plt.figure()\n", 111 | "weighted_trace = weighted['irl_loss_test'].detach()\n", 112 | "w_mean = weighted_trace.mean(dim=-1)\n", 113 | "w_std = weighted_trace.std(dim=-1)\n", 114 | "timedep_trace = timedep['irl_loss_test'].detach()\n", 115 | "t_mean = timedep_trace.mean(dim=-1)\n", 116 | "t_std = timedep_trace.std(dim=-1)\n", 117 | "rbf_trace = rbf['irl_loss_test'].detach()\n", 118 | "r_mean = rbf_trace.mean(dim=-1)\n", 119 | "r_std = rbf_trace.std(dim=-1)\n", 120 | "plt.plot(w_mean, color='orange', label=\"Weighted Ours\")\n", 121 | "plt.fill_between(np.arange(len(w_mean)), w_mean - w_std, w_mean + w_std, color='orange', alpha=0.1)\n", 122 | "plt.plot(t_mean, color='green', label=\"Time Dep Weighted Ours\")\n", 123 | "plt.fill_between(np.arange(len(t_mean)), t_mean - t_std, t_mean + t_std, color='green', alpha=0.1)\n", 124 | "plt.plot(r_mean, color='violet', label=\"RBF Weighted Ours\")\n", 125 | "plt.fill_between(np.arange(len(r_mean)), r_mean - r_std, r_mean + r_std, color='blueviolet', alpha=0.1)\n", 126 | "plt.xlabel(\"iterations\")\n", 127 | "plt.ylabel(\"IRL Loss on test\")\n", 128 | "plt.legend()\n", 129 | "\n", 130 | "plt.savefig(f\"{model_data_dir}/{experiment_type}_IRL_loss_test.png\")\n", 131 | "plt.show()" 132 | ] 133 | }, 134 | { 135 | "cell_type": "code", 136 | "execution_count": null, 137 | "metadata": {}, 138 | "outputs": [], 139 | "source": [] 140 | } 141 | ], 142 | "metadata": { 143 | "kernelspec": { 144 | "display_name": "Python 3", 145 | "language": "python", 146 | "name": "python3" 147 | }, 148 | "language_info": { 149 | "codemirror_mode": { 150 | "name": "ipython", 151 | "version": 3 152 | }, 153 | "file_extension": ".py", 154 | "mimetype": "text/x-python", 155 | "name": "python", 156 | "nbconvert_exporter": "python", 157 | "pygments_lexer": "ipython3", 158 | "version": "3.7.9" 159 | } 160 | }, 161 | "nbformat": 4, 162 | "nbformat_minor": 4 163 | } 164 | -------------------------------------------------------------------------------- /mbirl/experiments/run_model_based_irl.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | import random 3 | import os 4 | import torch 5 | import numpy as np 6 | import higher 7 | import mbirl 8 | import matplotlib.pyplot as plt 9 | 10 | from differentiable_robot_model import DifferentiableRobotModel 11 | 12 | from mbirl.learnable_costs import LearnableWeightedCost, LearnableTimeDepWeightedCost, LearnableRBFWeightedCost 13 | from mbirl.keypoint_mpc import GroundTruthKeypointMPCWrapper 14 | 15 | EXP_FOLDER = os.path.join(mbirl.__path__[0], "experiments") 16 | traj_data_dir = os.path.join(EXP_FOLDER, 'traj_data') 17 | model_data_dir = os.path.join(EXP_FOLDER, 'model_data') 18 | 19 | 20 | # The IRL Loss, the learning objective for the learnable cost functions. 21 | # The IRL loss measures the distance between the demonstrated trajectory and predicted trajectory 22 | class IRLLoss(object): 23 | def __call__(self, pred_traj, target_traj): 24 | loss = ((pred_traj[:, -6:] - target_traj[:, -6:]) ** 2).sum(dim=0) 25 | return loss.mean() 26 | 27 | 28 | def evaluate_action_optimization(learned_cost, robot_model, irl_loss_fn, trajs, n_inner_iter, action_lr=0.001): 29 | # np.random.seed(cfg.random_seed) 30 | # torch.manual_seed(cfg.random_seed) 31 | 32 | eval_costs = [] 33 | for i, traj in enumerate(trajs): 34 | 35 | traj_len = len(traj['desired_keypoints']) 36 | start_pose = traj['start_joint_config'].squeeze() 37 | expert_demo = traj['desired_keypoints'].reshape(traj_len, -1) 38 | expert_demo = torch.Tensor(expert_demo) 39 | time_horizon, n_keypt_dim = expert_demo.shape 40 | 41 | keypoint_mpc_wrapper = GroundTruthKeypointMPCWrapper(robot_model, time_horizon=time_horizon - 1, n_keypt_dim=n_keypt_dim) 42 | action_optimizer = torch.optim.SGD(keypoint_mpc_wrapper.parameters(), lr=action_lr) 43 | 44 | for i in range(n_inner_iter): 45 | action_optimizer.zero_grad() 46 | 47 | pred_traj = keypoint_mpc_wrapper.roll_out(start_pose.clone()) 48 | # use the learned loss to update the action sequence 49 | learned_cost_val = learned_cost(pred_traj, expert_demo[-1]) 50 | learned_cost_val.backward(retain_graph=True) 51 | action_optimizer.step() 52 | 53 | # Actually take the next step after optimizing the action 54 | pred_state_traj_new = keypoint_mpc_wrapper.roll_out(start_pose.clone()) 55 | eval_costs.append(irl_loss_fn(pred_state_traj_new, expert_demo).mean()) 56 | 57 | return torch.stack(eval_costs).detach() 58 | 59 | 60 | # Helper function for the irl learning loop 61 | def irl_training(learnable_cost, robot_model, irl_loss_fn, train_trajs, test_trajs, n_outer_iter, n_inner_iter, 62 | data_type, cost_type, cost_lr=1e-2, action_lr=1e-3): 63 | irl_loss_on_train = [] 64 | irl_loss_on_test = [] 65 | 66 | learnable_cost_opt = torch.optim.Adam(learnable_cost.parameters(), lr=cost_lr) 67 | 68 | irl_loss_dems = [] 69 | # initial loss before training 70 | 71 | plots_dir = os.path.join(model_data_dir, data_type, cost_type) 72 | 73 | if not os.path.exists(plots_dir): 74 | os.makedirs(plots_dir) 75 | 76 | for demo_i in range(len(train_trajs)): 77 | expert_demo_dict = train_trajs[demo_i] 78 | 79 | start_pose = expert_demo_dict['start_joint_config'].squeeze() 80 | expert_demo = expert_demo_dict['desired_keypoints'].reshape(traj_len, -1) 81 | expert_demo = torch.Tensor(expert_demo) 82 | time_horizon, n_keypt_dim = expert_demo.shape 83 | 84 | keypoint_mpc_wrapper = GroundTruthKeypointMPCWrapper(robot_model, time_horizon=time_horizon - 1, n_keypt_dim=n_keypt_dim) 85 | # unroll and extract expected features 86 | pred_traj = keypoint_mpc_wrapper.roll_out(start_pose.clone()) 87 | 88 | # get initial irl loss 89 | irl_loss = irl_loss_fn(pred_traj, expert_demo).mean() 90 | irl_loss_dems.append(irl_loss.item()) 91 | 92 | irl_loss_on_train.append(torch.Tensor(irl_loss_dems).mean()) 93 | print("irl cost training iter: {} loss: {}".format(0, irl_loss_on_train[-1])) 94 | 95 | print("Cost function parameters to be optimized:") 96 | for name, param in learnable_cost.named_parameters(): 97 | print(name) 98 | print(param) 99 | 100 | # start of inverse RL loop 101 | for outer_i in range(n_outer_iter): 102 | irl_loss_dems = [] 103 | 104 | for demo_i in range(len(train_trajs)): 105 | learnable_cost_opt.zero_grad() 106 | expert_demo_dict = train_trajs[demo_i] 107 | 108 | start_pose = expert_demo_dict['start_joint_config'].squeeze() 109 | expert_demo = expert_demo_dict['desired_keypoints'].reshape(traj_len, -1) 110 | expert_demo = torch.Tensor(expert_demo) 111 | time_horizon, n_keypt_dim = expert_demo.shape 112 | 113 | keypoint_mpc_wrapper = GroundTruthKeypointMPCWrapper(robot_model, time_horizon=time_horizon - 1, 114 | n_keypt_dim=n_keypt_dim) 115 | action_optimizer = torch.optim.SGD(keypoint_mpc_wrapper.parameters(), lr=action_lr) 116 | 117 | with higher.innerloop_ctx(keypoint_mpc_wrapper, action_optimizer) as (fpolicy, diffopt): 118 | pred_traj = fpolicy.roll_out(start_pose.clone()) 119 | 120 | # use the learned loss to update the action sequence 121 | learned_cost_val = learnable_cost(pred_traj, expert_demo[-1]) 122 | diffopt.step(learned_cost_val) 123 | 124 | pred_traj = fpolicy.roll_out(start_pose) 125 | # compute task loss 126 | irl_loss = irl_loss_fn(pred_traj, expert_demo).mean() 127 | # backprop gradient of learned cost parameters wrt irl loss 128 | irl_loss.backward(retain_graph=True) 129 | irl_loss_dems.append(irl_loss.detach()) 130 | 131 | learnable_cost_opt.step() 132 | 133 | if outer_i % 25 == 0: 134 | plt.figure() 135 | plt.plot(pred_traj[:, 7].detach(), pred_traj[:, 9].detach(), 'o') 136 | plt.plot(expert_demo[:, 0], expert_demo[:, 2], 'x') 137 | plt.title("outer i: {}".format(outer_i)) 138 | plt.savefig(os.path.join(plots_dir, f'{demo_i}_{outer_i}.png')) 139 | 140 | irl_loss_on_train.append(torch.Tensor(irl_loss_dems).mean()) 141 | test_irl_losses = evaluate_action_optimization(learnable_cost.eval(), robot_model, irl_loss_fn, test_trajs, 142 | n_inner_iter) 143 | print("irl loss (on train) training iter: {} loss: {}".format(outer_i + 1, irl_loss_on_train[-1])) 144 | print("irl loss (on test) training iter: {} loss: {}".format(outer_i + 1, test_irl_losses.mean().item())) 145 | print("") 146 | irl_loss_on_test.append(test_irl_losses) 147 | learnable_cost_params = {} 148 | for name, param in learnable_cost.named_parameters(): 149 | learnable_cost_params[name] = param 150 | 151 | if len(learnable_cost_params) == 0: 152 | # For RBF Weighted Cost 153 | for name, param in learnable_cost.weights_fn.named_parameters(): 154 | learnable_cost_params[name] = param 155 | 156 | plt.figure() 157 | plt.plot(pred_traj[:, 7].detach(), pred_traj[:, 9].detach(), 'o') 158 | plt.plot(expert_demo[:, 0], expert_demo[:, 2], 'x') 159 | plt.title("final") 160 | plt.savefig(os.path.join(plots_dir, f'{demo_i}_final.png')) 161 | 162 | return torch.stack(irl_loss_on_train), torch.stack(irl_loss_on_test), learnable_cost_params, pred_traj 163 | 164 | 165 | if __name__ == '__main__': 166 | random.seed(10) 167 | np.random.seed(10) 168 | torch.manual_seed(0) 169 | 170 | rest_pose = [0.0, 0.0, 0.0, 1.57079633, 0.0, 1.03672558, 0.0] 171 | 172 | rel_urdf_path = 'env/kuka_iiwa/urdf/iiwa7_ft_with_obj_keypts.urdf' 173 | urdf_path = os.path.join(mbirl.__path__[0], rel_urdf_path) 174 | robot_model = DifferentiableRobotModel(urdf_path=urdf_path, name="kuka_w_obj_keypts") 175 | 176 | data_type = 'placing' 177 | trajs = torch.load(f'{traj_data_dir}/traj_data_{data_type}.pt') 178 | 179 | traj = trajs[0] 180 | traj_len = len(traj['desired_keypoints']) 181 | 182 | start_q = traj['start_joint_config'].squeeze() 183 | expert_demo = traj['desired_keypoints'].reshape(traj_len, -1) 184 | expert_demo = torch.Tensor(expert_demo) 185 | print(expert_demo.shape) 186 | n_keypt_dim = expert_demo.shape[1] 187 | time_horizon = expert_demo.shape[0] 188 | 189 | # type of cost 190 | #cost_type = 'Weighted' 191 | #cost_type = 'TimeDep' 192 | cost_type = 'RBF' 193 | 194 | learnable_cost = None 195 | 196 | if cost_type == 'Weighted': 197 | learnable_cost = LearnableWeightedCost(dim=n_keypt_dim) 198 | elif cost_type == 'TimeDep': 199 | learnable_cost = LearnableTimeDepWeightedCost(time_horizon=time_horizon, dim=n_keypt_dim) 200 | elif cost_type == 'RBF': 201 | learnable_cost = LearnableRBFWeightedCost(time_horizon=time_horizon, dim=n_keypt_dim) 202 | else: 203 | print('Cost not implemented') 204 | 205 | irl_loss_fn = IRLLoss() 206 | 207 | cost_lr = 1e-2 208 | action_lr = 1e-3 209 | n_outer_iter = 100 210 | n_inner_iter = 1 211 | n_test_traj = 2 212 | train_trajs = trajs[0:3] 213 | test_trajs = trajs[3:3 + n_test_traj] 214 | irl_loss_train, irl_loss_test, learnable_cost_params, pred_traj = irl_training(learnable_cost, robot_model, 215 | irl_loss_fn, 216 | train_trajs, test_trajs, 217 | n_outer_iter, n_inner_iter, 218 | cost_type=cost_type, 219 | data_type=data_type, 220 | cost_lr=cost_lr, 221 | action_lr=action_lr) 222 | 223 | if not os.path.exists(model_data_dir): 224 | os.makedirs(model_data_dir) 225 | 226 | torch.save({ 227 | 'irl_loss_train': irl_loss_train, 228 | 'irl_loss_test': irl_loss_test, 229 | 'cost_parameters': learnable_cost_params, 230 | 'fina_pred_traj': pred_traj, 231 | 'n_inner_iter': n_inner_iter, 232 | 'action_lr': action_lr 233 | }, f=f'{model_data_dir}/{data_type}_{cost_type}') 234 | -------------------------------------------------------------------------------- /mbirl/generate_expert_demo.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | import os 3 | import random 4 | import torch 5 | import numpy as np 6 | import mbirl 7 | import matplotlib.pyplot as plt 8 | 9 | from differentiable_robot_model import DifferentiableRobotModel 10 | 11 | EXP_FOLDER = os.path.join(mbirl.__path__[0], "experiments") 12 | traj_data_dir = os.path.join(EXP_FOLDER, 'traj_data') 13 | 14 | 15 | class GroundTruthForwardModel(torch.nn.Module): 16 | def __init__(self, model): 17 | super(GroundTruthForwardModel, self).__init__() 18 | self.robot_model = model 19 | 20 | def forward_kin(self, x): 21 | keypoints = [] 22 | for link in [1, 2]: # , 3]: 23 | kp_pos, kp_rot = self.robot_model.compute_forward_kinematics(x, 'kp_link_' + str(link)) 24 | keypoints += 100.0*kp_pos 25 | 26 | return torch.stack(keypoints).squeeze() 27 | 28 | 29 | if __name__ == '__main__': 30 | 31 | random.seed(10) 32 | np.random.seed(10) 33 | torch.manual_seed(0) 34 | curr_dir = os.path.dirname(__file__) 35 | 36 | rel_urdf_path = 'env/kuka_iiwa/urdf/iiwa7_ft_with_obj_keypts.urdf' 37 | urdf_path = os.path.join(mbirl.__path__[0], rel_urdf_path) 38 | robot_model = DifferentiableRobotModel(urdf_path=urdf_path, name="kuka_w_obj_keypts") 39 | 40 | dmodel = GroundTruthForwardModel(robot_model) 41 | 42 | rest_pose = [0.0, 0.0, 0.0, 1.57079633, 0.0, 1.03672558, 0.0] 43 | rest_pose = torch.Tensor(rest_pose).unsqueeze(dim=0) 44 | 45 | experiment_type = 'placing' 46 | 47 | regenerate_data = True 48 | 49 | if not os.path.exists(traj_data_dir): 50 | os.makedirs(traj_data_dir) 51 | 52 | joint_limits = [2.967, 2.094, 2.967, 2.094, 2.967, 2.094, 3.054] 53 | if regenerate_data or not os.path.exists(f'{traj_data_dir}/traj_data_{experiment_type}.pt'): 54 | trajectories = [] 55 | for traj_it in range(6): 56 | print(traj_it) 57 | traj_data = {} 58 | start_pose = rest_pose.clone() 59 | start_keypts = dmodel.forward_kin(start_pose) 60 | print(f"cur keypts: {start_keypts}") 61 | goal_keypts1 = start_keypts[-3:].clone() 62 | goal_keypts1[:, 0] = goal_keypts1[:, 0] + torch.Tensor([-20.0]) + torch.randn(1)[0] 63 | goal_keypts2 = goal_keypts1.clone() 64 | goal_keypts2[:, 2] = goal_keypts2[:, 2] + torch.Tensor([-30.0]) + torch.randn(1)[0] 65 | 66 | desired_keypt_traj = torch.stack([start_keypts.clone() for i in range(5)] + [goal_keypts1.clone() for i in range(5)]) 67 | 68 | for kp_idx in range(2): 69 | desired_keypt_traj[:5, kp_idx, 0] = torch.linspace(start_keypts[kp_idx, 0], goal_keypts1[kp_idx, 0], 5) 70 | desired_keypt_traj[5:, kp_idx, 2] = torch.linspace(goal_keypts1[kp_idx, 2], goal_keypts2[kp_idx, 2], 5) 71 | 72 | traj_data['start_joint_config'] = start_pose 73 | traj_data['desired_keypoints'] = desired_keypt_traj 74 | trajectories.append(traj_data) 75 | 76 | torch.save(trajectories, f"{traj_data_dir}/traj_data_{experiment_type}.pt") 77 | 78 | # visualization - matplotlib 79 | trajs = torch.load(f"{traj_data_dir}/traj_data_{experiment_type}.pt") 80 | 81 | n_trajs = len(trajs) 82 | 83 | fig = plt.figure(figsize=(2 * 5, int(np.ceil(n_trajs/2)) * 5)) 84 | for i, traj in enumerate(trajs): 85 | ax = fig.add_subplot(2, int(np.ceil(n_trajs/2)), i + 1, projection='3d') 86 | ax.plot(trajs[i]['desired_keypoints'][:, 0, 0], trajs[i]['desired_keypoints'][:, 0, 1], trajs[i]['desired_keypoints'][:, 0, 2]) 87 | ax.scatter(trajs[i]['desired_keypoints'][:, 0, 0], trajs[i]['desired_keypoints'][:, 0, 1], trajs[i]['desired_keypoints'][:, 0, 2], 88 | color='blue') 89 | ax.scatter(start_keypts[0, 0], start_keypts[0, 1], start_keypts[0, 2], 90 | color='red') 91 | ax.scatter(trajs[i]['desired_keypoints'][-1, 0, 0], trajs[i]['desired_keypoints'][-1, 0, 1], trajs[i]['desired_keypoints'][-1, 0, 2], 92 | color='green') 93 | min_x = -100.0; max_x = -50.0 94 | min_y = 0.0; max_y = 30 95 | min_z = 50; max_z = 100 96 | ax.set_xlim([min_x, max_x]) 97 | ax.set_ylim([min_y, max_y]) 98 | ax.set_zlim([min_z, max_z]) 99 | ax.set_xlabel("x") 100 | ax.set_ylabel("y") 101 | ax.set_zlabel("z") 102 | ax.set_title(f"Trajectory {i}") 103 | 104 | plt.tight_layout() 105 | plt.savefig(f'{traj_data_dir}/traj_data_{experiment_type}.png') 106 | plt.show() 107 | 108 | -------------------------------------------------------------------------------- /mbirl/keypoint_mpc.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | import torch 3 | import numpy as np 4 | 5 | 6 | joint_limits = [2.967, 2.094, 2.967, 2.094, 2.967, 2.094, 3.054] 7 | 8 | 9 | # A wrapper class keypoint MPC with action parameters to be optimized 10 | # This implementation assumes object keypoints are known and part of the robot model 11 | # this means the keypoint dynamics model can be implemented through a forward kinematics call 12 | class GroundTruthKeypointMPCWrapper(torch.nn.Module): 13 | 14 | def __init__(self, model, time_horizon, n_keypt_dim): 15 | super().__init__() 16 | self.time_horizon = time_horizon 17 | self.n_keypt_dim = n_keypt_dim 18 | self.action_seq = torch.nn.Parameter(torch.Tensor(np.zeros([time_horizon, 7]))) 19 | self.robot_model = model 20 | 21 | def forward(self, x, u=0): 22 | xdesired = x + u 23 | tl = torch.Tensor(joint_limits) 24 | xdesired = torch.where(xdesired > tl, tl, xdesired) 25 | xdesired = torch.where(xdesired < -tl, -tl, xdesired) 26 | keypoints = [] 27 | for link in [1,2]:#,3]: 28 | kp_pos, _ = self.robot_model.compute_forward_kinematics(xdesired.reshape(1, 7), 'kp_link_'+str(link)) 29 | keypoints += 100.0*kp_pos[0] 30 | return xdesired, torch.stack(keypoints).squeeze() 31 | 32 | def roll_out(self, joint_state): 33 | qs = [] 34 | key_pos = [] 35 | joint_state, keypts = self.forward(joint_state) 36 | qs.append(joint_state) 37 | key_pos.append(keypts) 38 | for t in range(self.time_horizon): 39 | ac = self.action_seq[t] 40 | joint_state, keypts = self.forward(joint_state, ac) 41 | tl = torch.Tensor(joint_limits) 42 | joint_state = torch.where(joint_state > tl, tl, joint_state) 43 | joint_state = torch.where(joint_state < -tl, -tl, joint_state) 44 | qs.append(joint_state.clone()) 45 | key_pos.append(keypts.clone()) 46 | return torch.cat((torch.stack(qs), torch.stack(key_pos)), dim=1) 47 | 48 | def reset_actions(self): 49 | self.action_seq.data = torch.Tensor(np.zeros([self.time_horizon, 7])) 50 | -------------------------------------------------------------------------------- /mbirl/learnable_costs.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | import torch 3 | 4 | 5 | # The learned weighted cost, with fixed weights ### 6 | class LearnableWeightedCost(torch.nn.Module): 7 | def __init__(self, dim=9, weights=None): 8 | super(LearnableWeightedCost, self).__init__() 9 | if weights is None: 10 | self.weights = torch.nn.Parameter(0.1 * torch.ones([dim, 1])) 11 | else: 12 | self.weights = weights 13 | self.dim = dim 14 | self.clip = torch.nn.ReLU() 15 | self.meta_grads = [[] for _, _ in enumerate(self.parameters())] 16 | 17 | def forward(self, y_in, y_target): 18 | assert y_in.dim() == 2 19 | mse = ((y_in[:,-self.dim:] - y_target[-self.dim:]) ** 2).squeeze() 20 | 21 | # weighted mse 22 | #wmse = torch.mm(mse, self.clip(self.weights)) 23 | wmse = torch.mm(mse, self.weights) 24 | return wmse.mean() 25 | 26 | 27 | # The learned weighted cost, with time dependent weights ### 28 | class LearnableTimeDepWeightedCost(torch.nn.Module): 29 | def __init__(self, time_horizon, dim=9, weights=None): 30 | super(LearnableTimeDepWeightedCost, self).__init__() 31 | if weights is None: 32 | self.weights = torch.nn.Parameter(0.01 * torch.ones([time_horizon, dim])) 33 | else: 34 | self.weights = weights 35 | self.clip = torch.nn.ReLU() 36 | self.dim = dim 37 | self.meta_grads = [[] for _, _ in enumerate(self.parameters())] 38 | 39 | def forward(self, y_in, y_target): 40 | assert y_in.dim() == 2 41 | mse = ((y_in[:,-self.dim:] - y_target[-self.dim:]) ** 2).squeeze() 42 | # weighted mse 43 | #wmse = mse * self.clip(self.weights) 44 | wmse = mse * self.weights 45 | return wmse.mean() 46 | 47 | 48 | class RBFWeights(torch.nn.Module): 49 | 50 | def __init__(self, time_horizon, dim, width, weights=None): 51 | super(RBFWeights, self).__init__() 52 | k_list = torch.linspace(0, time_horizon-1, 5) 53 | if weights is None: 54 | self.weights = torch.nn.Parameter(0.01 * torch.ones(len(k_list), dim)) 55 | else: 56 | self.weights = weights 57 | 58 | self.dim = dim 59 | 60 | x = torch.arange(0, 10) 61 | self.K = torch.stack([torch.exp(-(int(k) - x) ** 2 / width) for k in k_list]).T 62 | print(f"\nRBFWEIGHTS: {k_list}") 63 | 64 | self.clip = torch.nn.ReLU() 65 | 66 | def forward(self): 67 | #return self.K.matmul(self.clip(self.weights)) 68 | return self.K.matmul(self.weights) 69 | 70 | 71 | class LearnableRBFWeightedCost(torch.nn.Module): 72 | def __init__(self, time_horizon, dim=9, width=2.0, weights=None): 73 | super(LearnableRBFWeightedCost, self).__init__() 74 | self.dim = dim 75 | self.weights_fn = RBFWeights(time_horizon=time_horizon, dim=dim, width=width, weights=weights) 76 | self.weights = self.weights_fn() 77 | 78 | def forward(self, y_in, y_target): 79 | assert y_in.dim() == 2 80 | mse = (y_in[:, -self.dim:] - y_target[-self.dim:]) ** 2 81 | 82 | self.weights = self.weights_fn() 83 | wmse = self.weights * mse 84 | 85 | return wmse.sum(dim=0).mean() 86 | 87 | 88 | class BaselineCost(object): 89 | def __init__(self, dim, weights): 90 | self.weights = weights 91 | self.dim = dim 92 | 93 | def __call__(self, y_in, y_target): 94 | assert y_in.dim() == 2 95 | mse = ((y_in[:, -self.dim:] - y_target[-self.dim:]) ** 2).squeeze() 96 | 97 | # weighted mse 98 | wmse = mse * self.weights 99 | return wmse.mean() 100 | 101 | 102 | class IRLLoss(object): 103 | def __init__(self, dim): 104 | self.dim = dim 105 | 106 | def __call__(self, pred_traj, target_traj): 107 | loss = ((pred_traj[:, -self.dim:] - target_traj[:, -self.dim:]) ** 2).sum(dim=0) 108 | return loss.mean() 109 | -------------------------------------------------------------------------------- /ml3/README.md: -------------------------------------------------------------------------------- 1 | # LearningToLearn 2 | 3 | ## ML3 paper experiments and citation 4 | To reproduce results of the ML3 paper follow the instructions. 5 | All loss models are stored in 'experiments/data, all plots are stored in ./plots 6 | 7 | #### Loss Learning for Regression (ML3 paper experiment section IV.A.1) 8 | For meta learning the loss run 9 | 10 | ``` 11 | python experiments/run_sine_regression_exp.py 12 | ``` 13 | 14 | For visualizing the results run `jupyter notebook` and open `ml3_sine_regression_exp_viz` 15 | 16 | #### Reward Learning for Model-based RL (MBRL) Reacher (ML3 section IV.A.2) 17 | For meta learning the reward, run 18 | 19 | ``` 20 | python experiments/run_mbrl_reacher_exp.py train 21 | ``` 22 | 23 | For testing the reward, run 24 | 25 | ``` 26 | python experiments/run_mbrl_reacher_exp.py test 27 | ``` 28 | 29 | #### Learning with extra information at meta-train time (ML3 section IV.B) 30 | The following scripts require two arguments, first one is `train\test`, the 2nd one 31 | indicates whether to use extra information by setting `True\False` (with\without extra info) 32 | 33 | ##### For meta learning the loss with extra information on sine function run: 34 | In this experiment we show how the extra info can be used to shape the loss function for easier optimization. 35 | ``` 36 | python experiments/run_shaped_sine_exp.py train True 37 | ``` 38 | To test the loss with extra information run: 39 | ``` 40 | python experiments/run_shaped_sine_exp.py test True 41 | ``` 42 | To see how these results compare to not using the extra info, run the above scripts with the 2nd argument being `False` 43 | To visualize the loss landscapes for this experiment run `jupyter notebook` and open `Loss shaping visualization.ipynb` 44 | 45 | ##### For meta learning the loss with additional goal in the mountain car experiment run: 46 | In this experiment we show how the extra info can be used to guide exploration for an RL task. 47 | ``` 48 | python experiments/run_mountain_car_exp.py train True 49 | ``` 50 | To test the loss with extra goal run: 51 | ``` 52 | python experiments/run_mountain_car_exp.py test True 53 | ``` 54 | The test script generates a gif of the final policy, and stores it in the experiment folder 55 | To see how these results compare to not using the extra info, run the above scripts with the 2nd argument being `False` 56 | -------------------------------------------------------------------------------- /ml3/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/LearningToLearn/fa32b98b40402fa15982b450ed09d9d3735ec924/ml3/__init__.py -------------------------------------------------------------------------------- /ml3/envs/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/LearningToLearn/fa32b98b40402fa15982b450ed09d9d3735ec924/ml3/envs/__init__.py -------------------------------------------------------------------------------- /ml3/envs/bullet_sim.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | import os 3 | 4 | import numpy as np 5 | 6 | import pybullet_utils.bullet_client as bc 7 | import pybullet_data 8 | import pybullet 9 | 10 | class BulletSimulation(object): 11 | 12 | def __init__(self, gui, controlled_joints, ee_idx, torque_limits,target_pos): 13 | 14 | if gui: 15 | self.sim = bc.BulletClient(connection_mode=pybullet.GUI) 16 | else: 17 | self.sim = bc.BulletClient(connection_mode=pybullet.DIRECT) 18 | self.sim.setAdditionalSearchPath(pybullet_data.getDataPath()) 19 | 20 | self.ee_idx = ee_idx 21 | 22 | self.cur_joint_pos = None 23 | self.cur_joint_vel = None 24 | self.curr_ee = None 25 | self.babbling_torque_limits = None 26 | self.logger = None 27 | 28 | self.controlled_joints = controlled_joints 29 | self.torque_limits = torque_limits 30 | 31 | self.n_dofs = len(controlled_joints) 32 | 33 | #pybullet.setAdditionalSearchPath(pybullet_data.getDataPath()) 34 | # TODO: the following should be extracted into some world model that is loaded independently of the robot 35 | #self.planeId = pybullet.loadURDF("plane.urdf",[0,0,0]) 36 | if target_pos is not None: 37 | self.cubeId = pybullet.loadURDF("sphere_small.urdf",target_pos) 38 | 39 | def disconnect(self): 40 | self.sim.disconnect() 41 | #pybullet.disconnect() 42 | 43 | def get_random_torques(self, time_horizon, bounds_low, bounds_high): 44 | trajectory= [] 45 | for t in range(time_horizon): 46 | torque_limits_babbling = bounds_high 47 | torques = np.random.uniform(-torque_limits_babbling, torque_limits_babbling) 48 | trajectory.append(np.array(torques)) 49 | return np.array(trajectory) 50 | 51 | def get_random_torques_uinit(self, time_horizon): 52 | trajectory= [] 53 | for t in range(time_horizon): 54 | torques = np.random.uniform(-0.3*self.torque_limits, 0.3*self.torque_limits) 55 | trajectory.append(np.array(torques)) 56 | return np.array(trajectory) 57 | 58 | def get_target_joint_configuration(self, target_position): 59 | self.reset() 60 | des_joint_state = self.sim.calculateInverseKinematics(self.robot_id, 61 | self.ee_idx, 62 | np.array(target_position), jointDamping = [0.1 for i in range(self.n_dofs)]) 63 | return np.asarray(des_joint_state) 64 | 65 | def reset(self, joint_pos=None, joint_vel=None): 66 | if joint_vel is None: 67 | joint_vel = list(np.zeros(self.n_dofs)) 68 | 69 | if joint_pos is None: 70 | joint_pos = list(np.zeros(self.n_dofs)) 71 | 72 | for i in range(self.n_dofs): 73 | self.sim.resetJointState(bodyUniqueId=self.robot_id, 74 | jointIndex=self.controlled_joints[i], 75 | targetValue=joint_pos[i], 76 | targetVelocity=joint_vel[i]) 77 | 78 | self.sim.stepSimulation() 79 | self.cur_joint_pos = self.get_current_joint_pos() 80 | self.cur_joint_vel = self.get_current_joint_vel() 81 | self.curr_ee = self.get_current_ee_state() 82 | return np.hstack([self.get_current_joint_pos(),self.get_current_joint_vel()]) 83 | 84 | def move_to_joint_positions(self, joint_pos, joint_vel=None): 85 | if joint_vel is None: 86 | joint_vel = [0]*len(joint_pos) 87 | 88 | for i in range(self.n_dofs): 89 | self.sim.resetJointState(bodyUniqueId=self.robot_id, 90 | jointIndex=self.controlled_joints[i], 91 | targetValue=joint_pos[i], 92 | targetVelocity=joint_vel[i]) 93 | 94 | self.sim.stepSimulation() 95 | 96 | self.cur_joint_pos = self.get_current_joint_pos() 97 | self.cur_joint_vel = self.get_current_joint_vel() 98 | self.curr_ee = self.get_current_ee_state() 99 | return np.hstack([self.cur_joint_pos,self.cur_joint_vel]) 100 | 101 | def get_MassM(self,angles): 102 | for link_idx in self.controlled_joints: 103 | self.sim.changeDynamics(self.robot_id, link_idx, linearDamping=0.0, angularDamping=0.0, jointDamping=0.0) 104 | cur_joint_angles = list(angles) 105 | mass_m = self.sim.calculateMassMatrix(bodyUniqueId=self.robot_id, 106 | objPositions = cur_joint_angles) 107 | 108 | return np.array(mass_m) 109 | 110 | def get_F(self,angles,vel): 111 | for link_idx in self.controlled_joints: 112 | self.sim.changeDynamics(self.robot_id, link_idx, linearDamping=0.0, angularDamping=0.0, jointDamping=0.0) 113 | cur_joint_angles = list(angles) 114 | cur_joint_vel = list(vel) 115 | torques = self.sim.calculateInverseDynamics(self.robot_id, 116 | cur_joint_angles, 117 | cur_joint_vel, 118 | [0]*self.action_dim) 119 | return np.asarray(torques) 120 | 121 | 122 | 123 | def joint_angles(self): 124 | return self.cur_joint_pos 125 | 126 | def joint_velocities(self): 127 | return self.cur_joint_vel 128 | 129 | def forwad_kin(self,state): 130 | return self.endeffector_pos() 131 | 132 | def endeffector_pos(self): 133 | return self.curr_ee 134 | 135 | def get_target_ee(self, state): 136 | 137 | for i in range(self.n_dofs): 138 | self.sim.resetJointState(bodyUniqueId=self.robot_id, 139 | jointIndex=self.controlled_joints[i], 140 | targetValue=state[i], 141 | targetVelocity=0.0) 142 | self.sim.stepSimulation() 143 | 144 | ls = self.sim.getLinkState(self.robot_id, self.ee_idx)[0] 145 | return ls 146 | 147 | def reset_then_step(self, des_joint_state, torque): 148 | # for link_idx in self.controlled_joints: 149 | # self.sim.changeDynamics(self.robot_id, link_idx, linearDamping=0.0, angularDamping=0.0, jointDamping=0.0) 150 | for i in range(self.n_dofs): 151 | self.sim.resetJointState(bodyUniqueId=self.robot_id, 152 | jointIndex=self.controlled_joints[i], 153 | targetValue=des_joint_state[i], 154 | targetVelocity=des_joint_state[(i+self.n_dofs)]) 155 | 156 | return self.apply_joint_torque(torque)[0] 157 | 158 | def step_model(self,state,torque): 159 | return self.sim_step(state,torque) 160 | 161 | def sim_step(self,state,torque): 162 | for link_idx in self.controlled_joints: 163 | self.sim.changeDynamics(self.robot_id, link_idx, linearDamping=0.0, angularDamping=0.0, jointDamping=0.0) 164 | if str(state.dtype).startswith('torch'): 165 | state = state.clone().detach().numpy() 166 | if str(torque.dtype).startswith('torch'): 167 | torque = torque.clone().detach().numpy() 168 | return self.reset_then_step(state,torque) 169 | 170 | def step(self,state,torque): 171 | for link_idx in self.controlled_joints: 172 | self.sim.changeDynamics(self.robot_id, link_idx, linearDamping=0.0, angularDamping=0.0, jointDamping=0.0) 173 | if str(state.dtype).startswith('torch'): 174 | state = state.clone().detach().numpy() 175 | if str(torque.dtype).startswith('torch'): 176 | torque = torque.clone().detach().numpy() 177 | return self.reset_then_step(state,torque) 178 | 179 | def apply_joint_torque(self, torque): 180 | 181 | 182 | self.grav_comp = self.inverse_dynamics([0] * self.action_dim) 183 | torque = torque + self.grav_comp 184 | full_torque = torque.copy() 185 | 186 | 187 | #torque = torque.clip(-self.torque_limits, self.torque_limits) 188 | 189 | self.sim.setJointMotorControlArray(bodyIndex=self.robot_id, 190 | jointIndices=self.controlled_joints, 191 | controlMode=pybullet.TORQUE_CONTROL, 192 | forces=torque) 193 | self.sim.stepSimulation() 194 | 195 | cur_joint_states = self.sim.getJointStates(self.robot_id, self.controlled_joints) 196 | cur_joint_angles = [cur_joint_states[i][0] for i in range(self.n_dofs)] 197 | cur_joint_vel = [cur_joint_states[i][1] for i in range(self.n_dofs)] 198 | 199 | next_state = cur_joint_angles + cur_joint_vel 200 | 201 | ls = list(self.sim.getLinkState(self.robot_id, self.ee_idx)[0]) 202 | self.cur_joint_pos = self.get_current_joint_pos() 203 | self.cur_joint_vel = self.get_current_joint_vel() 204 | self.curr_ee = self.get_current_ee_state() 205 | return np.hstack([self.cur_joint_pos,self.cur_joint_vel]),self.curr_ee 206 | 207 | def get_current_ee_state(self): 208 | ee_state = self.sim.getLinkState(self.robot_id, self.ee_idx) 209 | return np.array(ee_state[0]) 210 | 211 | def get_current_joint_pos(self): 212 | cur_joint_states = self.sim.getJointStates(self.robot_id, self.controlled_joints) 213 | cur_joint_angles = [cur_joint_states[i][0] for i in range(self.n_dofs)] 214 | return np.array(cur_joint_angles) 215 | 216 | def get_current_joint_vel(self): 217 | cur_joint_states = self.sim.getJointStates(self.robot_id, self.controlled_joints) 218 | cur_joint_vel = [cur_joint_states[i][1] for i in range(self.n_dofs)] 219 | return np.array(cur_joint_vel) 220 | 221 | def get_current_joint_state(self): 222 | cur_joint_states = self.sim.getJointStates(self.robot_id, self.controlled_joints) 223 | cur_joint_angles = [cur_joint_states[i][0] for i in range(self.n_dofs)] 224 | cur_joint_vel = [cur_joint_states[i][1] for i in range(self.n_dofs)] 225 | return np.hstack([cur_joint_angles, cur_joint_vel]) 226 | 227 | def get_ee_jacobian(self): 228 | cur_joint_states = self.sim.getJointStates(self.robot_id, self.controlled_joints) 229 | cur_joint_angles = [cur_joint_states[i][0] for i in range(self.n_dofs)] 230 | cur_joint_vel = [cur_joint_states[i][1] for i in range(self.n_dofs)] 231 | bullet_jac_lin, bullet_jac_ang = self.sim.calculateJacobian( 232 | bodyUniqueId=self.robot_id, 233 | linkIndex=self.ee_idx, 234 | localPosition=[0, 0, 0], 235 | objPositions=cur_joint_angles, 236 | objVelocities=cur_joint_vel, 237 | objAccelerations=[0] * self.n_dofs, 238 | ) 239 | return np.asarray(bullet_jac_lin), np.asarray(bullet_jac_ang) 240 | 241 | def inverse_dynamics(self, des_acc): 242 | for link_idx in self.controlled_joints: 243 | self.sim.changeDynamics(self.robot_id, link_idx, linearDamping=0.0, angularDamping=0.0, jointDamping=0.0) 244 | cur_joint_states = self.sim.getJointStates(self.robot_id, self.controlled_joints) 245 | cur_joint_angles = [cur_joint_states[i][0] for i in range(self.n_dofs)] 246 | cur_joint_vel = [cur_joint_states[i][1] for i in range(self.n_dofs)] 247 | torques = self.sim.calculateInverseDynamics(self.robot_id, 248 | cur_joint_angles, 249 | cur_joint_vel, 250 | des_acc) 251 | return np.asarray(torques) 252 | 253 | 254 | def detect_collision(self): 255 | return False 256 | 257 | def return_grav_comp_torques(self): 258 | return 0.0 259 | 260 | def get_pred_error(self,x,u): 261 | return np.zeros(len(u)) 262 | 263 | def sim_step_un(self,x,u): 264 | return np.zeros(len(u)),np.zeros(len(u)) 265 | 266 | def get_gravity_comp(self): 267 | return 0.0 268 | 269 | 270 | class BulletSimulationFromURDF(BulletSimulation): 271 | def __init__(self, rel_urdf_path, gui, controlled_joints, ee_idx, torque_limits, target_pos): 272 | super(BulletSimulationFromURDF, self).__init__(gui, controlled_joints, ee_idx, torque_limits, target_pos) 273 | urdf_path = os.getcwd()+'/envs/'+rel_urdf_path 274 | print("loading urdf file: {}".format(urdf_path)) 275 | 276 | self.robot_id = self.sim.loadURDF(urdf_path, basePosition=[-0.5, 0, 0.0], useFixedBase=True) 277 | self.n_dofs = len(controlled_joints) 278 | 279 | pybullet.setAdditionalSearchPath(pybullet_data.getDataPath()) 280 | #self.planeId = pybullet.loadURDF("plane.urdf") 281 | 282 | self.sim.resetBasePositionAndOrientation(self.robot_id,[-0.5,0,0.0],[0,0,0,1]) 283 | self.sim.setGravity(0, 0, -9.81) 284 | dt = 1.0/240.0 285 | self.dt = dt 286 | self.sim.setTimeStep(dt) 287 | self.sim.setRealTimeSimulation(0) 288 | self.sim.setJointMotorControlArray(self.robot_id, 289 | self.controlled_joints, 290 | pybullet.VELOCITY_CONTROL, 291 | forces=np.zeros(self.n_dofs)) 292 | 293 | 294 | class BulletSimulationFromMJCF(BulletSimulation): 295 | 296 | def __init__(self, rel_mjcf_path, gui, controlled_joints, ee_idx, torque_limits): 297 | super(BulletSimulationFromMJCF, self).__init__(gui, controlled_joints, ee_idx, torque_limits, None) 298 | print('hierhierhierhier') 299 | 300 | xml_path = os.getcwd()+'/envs/'+rel_mjcf_path 301 | if rel_mjcf_path[0] != os.sep: xml_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), rel_mjcf_path) 302 | else: xml_path = rel_mjcf_path 303 | 304 | #xml_path = '/Users/sarah/Documents/GitHub/LearningToLearn/ml3/envs/mujoco_robots/reacher.xml' 305 | print("loading this mjcf file: {}".format(xml_path)) 306 | 307 | self.world_id, self.robot_id = self.sim.loadMJCF(xml_path) 308 | 309 | pybullet.setAdditionalSearchPath(pybullet_data.getDataPath()) 310 | print(pybullet_data.getDataPath()) 311 | self.planeId = pybullet.loadURDF("plane.urdf") 312 | #self.cubeId = pybullet.loadURDF("sphere_small.urdf", [0.02534078, -0.19863741, 0.01]) #0.02534078, -0.19863741 0.10534078, 0.1663741 313 | 314 | self.n_dofs = len(controlled_joints) 315 | self.sim.setGravity(0, 0, -9.81) 316 | dt = 1.0/100.0 317 | self.dt = dt 318 | self.sim.setTimeStep(dt) 319 | self.sim.setRealTimeSimulation(0) 320 | self.sim.setJointMotorControlArray(self.robot_id, 321 | self.controlled_joints, 322 | pybullet.VELOCITY_CONTROL, 323 | forces=np.zeros(self.n_dofs)) 324 | -------------------------------------------------------------------------------- /ml3/envs/mountain_car.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | import numpy as np 3 | import torch 4 | import matplotlib.pyplot as plt 5 | import matplotlib.animation as animation 6 | 7 | 8 | class MountainCar(): 9 | def __init__(self): 10 | self.m = 0.2 11 | self.g = -9.8 12 | self.k = 0.3 13 | self.max_position = 0.5 14 | self.min_position = -1.2 15 | self.max_speed = 1.5 16 | self.min_speed = -1.5 17 | self.delta_t = 0.1 18 | self.min_action = -0.25 19 | self.max_action = 0.25 20 | self.cur_pos = 0.0 21 | self.cur_vel = 0.0 22 | 23 | def sim_step_torch(self, state, action): 24 | 25 | position = state[0] 26 | velocity = state[1] 27 | 28 | action = torch.clamp(action, min=self.min_action, max=self.max_action) 29 | 30 | velocity = velocity + (self.g * self.m * torch.cos(3.0 * position) + (action / self.m) - ( 31 | self.k * velocity)) * self.delta_t 32 | position = position + (velocity * self.delta_t) 33 | 34 | if (velocity.data > self.max_speed): velocity.data = torch.Tensor([self.max_speed]) 35 | if (velocity.data < -self.max_speed): velocity.data = torch.Tensor([-self.max_speed]) 36 | 37 | if (position.data >= self.max_position): position.data = torch.Tensor([self.max_position]) 38 | if (position.data < self.min_position): position.data = torch.Tensor([self.min_position]) 39 | if (position.data == self.min_position and velocity.data < 0): velocity.data = torch.Tensor([0.0]) 40 | 41 | new_state = torch.stack([position.squeeze(), velocity.squeeze()]) 42 | return new_state 43 | 44 | def sim_step(self, state, action): 45 | position = state[0] 46 | velocity = state[1] 47 | 48 | velocity = velocity + (self.g * self.m * np.cos(3 * position) + (action / self.m) - (self.k * velocity)) * self.delta_t 49 | position = position + (velocity * self.delta_t) 50 | 51 | if (velocity > self.max_speed): velocity = self.max_speed 52 | if (velocity < -self.max_speed): velocity = -self.max_speed 53 | 54 | if (position > self.max_position): position = self.max_position 55 | if (position < self.min_position): position = self.min_position 56 | if (position==self.min_position and velocity<0): velocity = 0 57 | 58 | new_state = np.array([position, velocity]) 59 | return new_state.squeeze() 60 | 61 | def step(self, action): 62 | position = self.cur_pos 63 | velocity = self.cur_vel 64 | 65 | velocity = velocity + ( 66 | self.g * self.m * np.cos(3 * position) + (action / self.m) - (self.k * velocity)) * self.delta_t 67 | position = position + (velocity * self.delta_t) 68 | 69 | if (velocity > self.max_speed): velocity = self.max_speed 70 | if (velocity < -self.max_speed): velocity = -self.max_speed 71 | 72 | if (position > self.max_position): position = self.max_position 73 | if (position < self.min_position): position = self.min_position 74 | if (position == self.min_position and velocity < 0): velocity = 0 75 | 76 | new_state = np.array([position, velocity]) 77 | self.cur_pos = position 78 | self.cur_vel = velocity 79 | reward = 0 80 | if new_state[0] >= 0.5: 81 | reward = 100 82 | return np.array([self.cur_pos, self.cur_vel]), reward 83 | 84 | def reset(self): 85 | self.cur_pos = -0.55 86 | self.cur_vel = 0 87 | return np.array([self.cur_pos, self.cur_vel]) 88 | 89 | def reset_to(self, state): 90 | self.cur_pos = state[0] 91 | self.cur_vel = state[1] 92 | return np.array([self.cur_pos, self.cur_vel]) 93 | 94 | def render(self, position_list, file_path='./mountain_car.gif', mode='gif'): 95 | """ When the method is called it saves an animation 96 | of what happened until that point in the episode. 97 | Ideally it should be called at the end of the episode, 98 | and every k episodes. 99 | 100 | ATTENTION: It requires avconv and/or imagemagick installed. 101 | @param file_path: the name and path of the video file 102 | @param mode: the file can be saved as 'gif' or 'mp4' 103 | """ 104 | 105 | # Plot init 106 | fig = plt.figure(figsize=(4,4)) 107 | ax = fig.add_subplot(111, autoscale_on=False, xlim=(-1.3, 0.6), ylim=(-1.2, 1.5)) 108 | ax.grid(False) # disable the grid 109 | x_sin = np.linspace(start=-1.2, stop=0.5, num=100) 110 | y_sin = np.sin(3 * x_sin) 111 | ax.plot(x_sin, y_sin,c='black',linewidth=3) # plot the sine wave 112 | ax.plot(0.50, 1.16, marker="$\u2691$", markersize=25, color='green') 113 | 114 | dot, = ax.plot([], [],marker="$\u25A1$",markersize=15,color='red') 115 | time_text = ax.text(0.05, 0.9, '', transform=ax.transAxes) 116 | _position_list = position_list 117 | _delta_t = self.delta_t 118 | 119 | def _init(): 120 | dot.set_data([], []) 121 | time_text.set_text('') 122 | return dot, time_text 123 | 124 | def _animate(i): 125 | x = _position_list[i] 126 | y = np.sin(3 * x) 127 | dot.set_data(x, y) 128 | time_text.set_text("") 129 | return dot, time_text 130 | 131 | ani = animation.FuncAnimation(fig, _animate, np.arange(1, len(position_list)), 132 | blit=True, init_func=_init, repeat=False) 133 | 134 | if mode == 'gif': 135 | ani.save(file_path, writer='imagemagick', fps=int(1 / self.delta_t)) 136 | elif mode == 'mp4': 137 | ani.save(file_path, fps=int(1 / self.delta_t), writer='avconv', codec='libx264') 138 | # Clear the figure 139 | fig.clear() 140 | plt.close(fig) -------------------------------------------------------------------------------- /ml3/envs/mujoco_robots/ground_plane.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | -------------------------------------------------------------------------------- /ml3/envs/mujoco_robots/reacher.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 44 | -------------------------------------------------------------------------------- /ml3/envs/reacher_sim.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | import numpy as np 3 | from ml3.envs.bullet_sim import BulletSimulationFromMJCF 4 | 5 | 6 | class ReacherSimulation(BulletSimulationFromMJCF): 7 | def __init__(self, gui, file_name = 'mujoco_robots/reacher.xml',controlled_joints=None, ee_idx=None, torque_limits=None): 8 | rel_xml_path = file_name 9 | 10 | #fingertip 11 | if ee_idx is None: 12 | self.ee_idx = 4 13 | if controlled_joints is None: 14 | controlled_joints = [0, 2] 15 | if torque_limits is None: 16 | torque_limits = np.asarray([1, 1]) 17 | 18 | self.action_dim=2 19 | self.state_dim=4 20 | self.pos_dim=2 21 | 22 | super(ReacherSimulation, self).__init__(rel_mjcf_path=rel_xml_path, 23 | gui=gui, 24 | controlled_joints=controlled_joints, 25 | ee_idx=self.ee_idx, 26 | torque_limits=torque_limits) 27 | 28 | if gui: 29 | self.sim.resetDebugVisualizerCamera(cameraDistance=0.5, cameraYaw=-50, cameraPitch=-50, 30 | cameraTargetPosition=[0, 0, 0]) 31 | 32 | n_dofs_total = self.sim.getNumJoints(self.robot_id) 33 | print("n dofs total (including fixed joints): {}".format(n_dofs_total)) 34 | 35 | for i in range(n_dofs_total): 36 | print(self.sim.getJointInfo(self.robot_id, i)) 37 | return 38 | 39 | -------------------------------------------------------------------------------- /ml3/experiments/Loss shaping visualization.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import os\n", 10 | "import ml3\n", 11 | "EXP_FOLDER = os.path.join(ml3.__path__[0], \"experiments/data/shaped_sine\")" 12 | ] 13 | }, 14 | { 15 | "cell_type": "markdown", 16 | "metadata": {}, 17 | "source": [ 18 | "# Shaping Loss Example" 19 | ] 20 | }, 21 | { 22 | "cell_type": "markdown", 23 | "metadata": {}, 24 | "source": [ 25 | "### Before running visualization please run in Terminal:" 26 | ] 27 | }, 28 | { 29 | "cell_type": "code", 30 | "execution_count": 5, 31 | "metadata": {}, 32 | "outputs": [ 33 | { 34 | "name": "stdout", 35 | "output_type": "stream", 36 | "text": [ 37 | "task loss: 0.002699082251638174\n", 38 | "task loss: 14.513303756713867\n", 39 | "task loss: 0.0004392655100673437\n", 40 | "task loss: 0.1109960600733757\n", 41 | "task loss: 0.010903225280344486\n", 42 | "task loss: 0.00028027829830534756\n", 43 | "task loss: 0.00023076884099282324\n", 44 | "task loss: 0.01268570777028799\n", 45 | "task loss: 0.005605524405837059\n", 46 | "task loss: 0.0009330477914772928\n" 47 | ] 48 | } 49 | ], 50 | "source": [ 51 | "!python run_shaped_sine_exp.py train True" 52 | ] 53 | }, 54 | { 55 | "cell_type": "code", 56 | "execution_count": 7, 57 | "metadata": {}, 58 | "outputs": [ 59 | { 60 | "name": "stdout", 61 | "output_type": "stream", 62 | "text": [ 63 | "task loss: 1.9977433112217113e-06\n", 64 | "task loss: 0.5099665522575378\n", 65 | "task loss: 0.48222532868385315\n", 66 | "task loss: 0.0001230674679391086\n", 67 | "task loss: 0.4575786292552948\n", 68 | "task loss: 0.4234389066696167\n", 69 | "task loss: 0.5580686330795288\n", 70 | "task loss: 0.48198580741882324\n", 71 | "task loss: 0.0002105423336615786\n", 72 | "task loss: 0.4930514395236969\n" 73 | ] 74 | } 75 | ], 76 | "source": [ 77 | "!python run_shaped_sine_exp.py train False" 78 | ] 79 | }, 80 | { 81 | "cell_type": "code", 82 | "execution_count": 5, 83 | "metadata": {}, 84 | "outputs": [ 85 | { 86 | "name": "stdout", 87 | "output_type": "stream", 88 | "text": [ 89 | "Populating the interactive namespace from numpy and matplotlib\n" 90 | ] 91 | } 92 | ], 93 | "source": [ 94 | "%pylab inline\n", 95 | "from ml3.shaped_sine_utils import render\n", 96 | "from ml3.shaped_sine_utils import plot_loss\n", 97 | "def normalize_data(data):\n", 98 | " norm_data = []\n", 99 | " for d in data:\n", 100 | " norm_data.append((d-min(d))/(max(d)-min(d)))\n", 101 | " return norm_data" 102 | ] 103 | }, 104 | { 105 | "cell_type": "code", 106 | "execution_count": 6, 107 | "metadata": {}, 108 | "outputs": [ 109 | { 110 | "data": { 111 | "text/plain": [ 112 | "" 113 | ] 114 | }, 115 | "execution_count": 6, 116 | "metadata": {}, 117 | "output_type": "execute_result" 118 | }, 119 | { 120 | "data": { 121 | "image/png": "\n", 122 | "text/plain": [ 123 | "
" 124 | ] 125 | }, 126 | "metadata": { 127 | "needs_background": "light" 128 | }, 129 | "output_type": "display_data" 130 | } 131 | ], 132 | "source": [ 133 | "figure(figsize=(10,3))\n", 134 | "\n", 135 | "freq=0.5\n", 136 | "theta_ranges, landscape_with_extra, landscape_mse = plot_loss(extra=True,exp_folder=EXP_FOLDER,freq=freq)\n", 137 | "subplot(2,2,1)\n", 138 | "plt.plot(theta_ranges,landscape_with_extra)\n", 139 | "plt.ylabel('Loss')\n", 140 | "plt.legend(['Shaped ML$^3$ Landscape'])\n", 141 | "plt.axvline(x=freq,c='red')\n", 142 | "subplot(2,2,3)\n", 143 | "plt.plot(theta_ranges,landscape_mse,c='C1')\n", 144 | "plt.xlabel('Theta')\n", 145 | "plt.ylabel('Loss')\n", 146 | "plt.legend(['MSE Loss Landscape'])\n", 147 | "plt.axvline(x=freq,c='red')\n", 148 | "\n", 149 | "\n", 150 | "theta_ranges, landscape_wo_extra, landscape_mse = plot_loss(extra=False,exp_folder=EXP_FOLDER,freq=freq)\n", 151 | "subplot(2,2,2)\n", 152 | "plt.plot(theta_ranges,landscape_wo_extra)\n", 153 | "plt.ylabel('Loss')\n", 154 | "plt.legend(['ML$^3$ Landscape'])\n", 155 | "plt.axvline(x=freq,c='red')\n", 156 | "subplot(2,2,4)\n", 157 | "plt.plot(theta_ranges,landscape_mse,c='C1')\n", 158 | "plt.xlabel('Theta')\n", 159 | "plt.ylabel('Loss')\n", 160 | "plt.legend(['MSE Loss Landscape'])\n", 161 | "plt.axvline(x=freq,c='red')" 162 | ] 163 | }, 164 | { 165 | "cell_type": "code", 166 | "execution_count": 4, 167 | "metadata": {}, 168 | "outputs": [ 169 | { 170 | "name": "stderr", 171 | "output_type": "stream", 172 | "text": [ 173 | "MovieWriter imagemagick unavailable; using Pillow instead.\n", 174 | "MovieWriter imagemagick unavailable; using Pillow instead.\n", 175 | "MovieWriter imagemagick unavailable; using Pillow instead.\n" 176 | ] 177 | } 178 | ], 179 | "source": [ 180 | "theta_ranges = np.load(f'{EXP_FOLDER}/theta_ranges_True_.npy')\n", 181 | "ml3_extra_loss = normalize_data(np.load(f'{EXP_FOLDER}/landscape_with_extra_True_.npy'))\n", 182 | "ml3_mse_loss = normalize_data(np.load(f'{EXP_FOLDER}/landscape_mse_False_.npy'))\n", 183 | "ml3_not_shaped_loss = normalize_data(np.load(f'{EXP_FOLDER}/landscape_with_extra_False_.npy'))\n", 184 | "freq=0.5\n", 185 | "render(theta_ranges,ml3_extra_loss,'C0',freq=freq,file_path=f'{EXP_FOLDER}/ml3_shaped_loss_sine.gif')\n", 186 | "render(theta_ranges,ml3_not_shaped_loss,'C2',freq=freq,file_path=f'{EXP_FOLDER}/ml3_not_shaped_loss_sine.gif')\n", 187 | "render(theta_ranges,ml3_mse_loss,'C1',freq=freq,file_path=f'{EXP_FOLDER}/mse_loss_sine.gif')" 188 | ] 189 | }, 190 | { 191 | "cell_type": "code", 192 | "execution_count": null, 193 | "metadata": {}, 194 | "outputs": [], 195 | "source": [] 196 | } 197 | ], 198 | "metadata": { 199 | "kernelspec": { 200 | "display_name": "Python 3", 201 | "language": "python", 202 | "name": "python3" 203 | }, 204 | "language_info": { 205 | "codemirror_mode": { 206 | "name": "ipython", 207 | "version": 3 208 | }, 209 | "file_extension": ".py", 210 | "mimetype": "text/x-python", 211 | "name": "python", 212 | "nbconvert_exporter": "python", 213 | "pygments_lexer": "ipython3", 214 | "version": "3.7.9" 215 | } 216 | }, 217 | "nbformat": 4, 218 | "nbformat_minor": 4 219 | } 220 | -------------------------------------------------------------------------------- /ml3/experiments/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/LearningToLearn/fa32b98b40402fa15982b450ed09d9d3735ec924/ml3/experiments/__init__.py -------------------------------------------------------------------------------- /ml3/experiments/run_mbrl_reacher_exp.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | import sys 3 | import os 4 | import numpy as np 5 | import torch 6 | import pybullet 7 | import ml3 8 | from ml3.envs.reacher_sim import ReacherSimulation 9 | from ml3.mbrl_utils import Dynamics 10 | from ml3.learnable_losses import Ml3_loss_reacher as Ml3_loss 11 | from ml3.optimizee import Reacher_Policy as Policy 12 | from ml3.ml3_train import meta_train_mbrl_reacher as meta_train 13 | from ml3.ml3_test import test_ml3_loss_reacher as test_ml3_loss 14 | 15 | EXP_FOLDER = os.path.join(ml3.__path__[0], "experiments/data/mbrl_reacher") 16 | 17 | 18 | class Task_loss(object): 19 | def __call__(self, a, s, goal): 20 | loss = 10*torch.norm(s[-1,:2]-goal[:2])+torch.mean(torch.norm(s[:,:2]-goal[:2],dim=1))+0.0001*torch.mean(torch.norm(s[:,2:],dim=1)) 21 | return loss 22 | 23 | 24 | def random_babbling(env, time_horizon): 25 | # do random babbling 26 | actions = np.random.uniform(-1.0, 1.0, [time_horizon, 2]) 27 | states = [] 28 | state = env.reset() 29 | states.append(state) 30 | for u in actions: 31 | state = env.sim_step(state, u) 32 | states.append(state.copy()) 33 | 34 | return np.array(states), actions 35 | 36 | 37 | if __name__ == '__main__': 38 | 39 | if not os.path.exists(EXP_FOLDER): 40 | os.makedirs(EXP_FOLDER) 41 | 42 | np.random.seed(0) 43 | torch.manual_seed(0) 44 | 45 | # create Reacher simulation 46 | env = ReacherSimulation(gui=False) 47 | 48 | # initialize policy and save initialization for training 49 | policy = Policy(8, 2, EXP_FOLDER) 50 | policy.reset() 51 | 52 | # initialize learned loss 53 | ml3_loss = Ml3_loss(7, 1) 54 | # initialize task loss for meta training 55 | task_loss = Task_loss() 56 | 57 | # initialize learned dynamics model 58 | dmodel = Dynamics(env) 59 | 60 | # generate training task 61 | num_task = 1 62 | train_goal = np.array(env.get_target_joint_configuration(np.array([0.02534078, 0.19863741, 0.0]))) 63 | train_goal = np.hstack([train_goal, np.zeros(2)]) 64 | 65 | goals = [train_goal] 66 | time_horizon = 65 67 | 68 | if sys.argv[1] == 'train': 69 | 70 | n_outer_iter = 3000 # 3000 71 | n_inner_iter = 1 72 | 73 | for random_data in range(3): 74 | states, actions = random_babbling(env, time_horizon) 75 | dmodel.train(torch.Tensor(states), torch.Tensor(actions)) 76 | 77 | meta_train(policy, ml3_loss,dmodel,env, task_loss, goals, n_outer_iter, n_inner_iter, time_horizon, EXP_FOLDER) 78 | 79 | if sys.argv[1] == 'test': 80 | ml3_loss.load_state_dict(torch.load(f"{EXP_FOLDER}/ml3_loss_reacher.pt")) 81 | ml3_loss.eval() 82 | opt_iter = 2 83 | 84 | xy = [0.05534078, 0.150863741] 85 | 86 | test_goal = np.array(env.get_target_joint_configuration(np.array([xy[0], xy[1], 0.0]))) 87 | test_goal = np.hstack([test_goal, np.zeros(2)]) 88 | args = (torch.Tensor(test_goal),time_horizon,None,env,True) 89 | print('goal joint position:', test_goal[:2]) 90 | states = test_ml3_loss(policy, ml3_loss,opt_iter,*args) 91 | print('achieved joint position',states[-1,:2]) 92 | -------------------------------------------------------------------------------- /ml3/experiments/run_mountain_car_exp.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | import sys 3 | import os 4 | import numpy as np 5 | import torch 6 | import ml3 7 | from ml3.ml3_train import meta_train_mountain_car as meta_train 8 | from ml3.ml3_test import test_ml3_loss_mountain_car as test_ml3_loss 9 | from ml3.learnable_losses import Ml3_loss_mountain_car as Ml3_loss 10 | from ml3.optimizee import MC_Policy 11 | from ml3.envs.mountain_car import MountainCar 12 | 13 | EXP_FOLDER = os.path.join(ml3.__path__[0], "experiments/data/mountain_car") 14 | 15 | 16 | class Task_loss(object): 17 | def __call__(self, a, s, goal, goal_exp,shaped_loss): 18 | 19 | loss = (torch.norm(s - goal)).mean() 20 | if shaped_loss: 21 | loss = (torch.norm(s[:15] - goal_exp)).mean() + (torch.norm(s[15:] - goal)).mean() 22 | 23 | return loss 24 | 25 | 26 | if __name__ == '__main__': 27 | 28 | if not os.path.exists(EXP_FOLDER): 29 | os.makedirs(EXP_FOLDER) 30 | 31 | np.random.seed(0) 32 | torch.manual_seed(0) 33 | 34 | policy = MC_Policy(2,1) 35 | ml3_loss = Ml3_loss(4,1) 36 | 37 | task_loss = Task_loss() 38 | 39 | goal = [0.5000, 1.0375] 40 | goal_extra = [-0.9470, -0.0055] 41 | 42 | env = MountainCar() 43 | s_0 = env.reset() 44 | 45 | n_outer_iter = 300 46 | n_inner_iter = 1 47 | 48 | time_horizon = 35 49 | 50 | if sys.argv[1] == 'train': 51 | shaped_loss = sys.argv[2] == 'True' 52 | meta_train(policy, ml3_loss, task_loss, s_0, goal, goal_extra, n_outer_iter, n_inner_iter, time_horizon, shaped_loss) 53 | if shaped_loss: 54 | torch.save(ml3_loss.state_dict(), f"{EXP_FOLDER}/shaped_ml3_loss_mountain_car.pt") 55 | else: 56 | torch.save(ml3_loss.state_dict(), f"{EXP_FOLDER}/ml3_loss_mountain_car.pt") 57 | 58 | if sys.argv[1] == 'test': 59 | shaped_loss = sys.argv[2] == 'True' 60 | if shaped_loss: 61 | ml3_loss.load_state_dict(torch.load(f"{EXP_FOLDER}/shaped_ml3_loss_mountain_car.pt")) 62 | else: 63 | ml3_loss.load_state_dict(torch.load(f"{EXP_FOLDER}/ml3_loss_mountain_car.pt")) 64 | ml3_loss.eval() 65 | opt_iter = 2 66 | args = (torch.Tensor(s_0), torch.Tensor(goal), time_horizon) 67 | states = test_ml3_loss(policy, ml3_loss, opt_iter, *args) 68 | if shaped_loss: 69 | np.save(f"{EXP_FOLDER}/shaped_ml3_mc_states.npy", states) 70 | else: 71 | np.save(f"{EXP_FOLDER}/ml3_mc_states.npy", states) 72 | 73 | if shaped_loss: 74 | env.render(list(np.array(states)[:, 0]), file_path=f"{EXP_FOLDER}/shaped_ml3_mc.gif") 75 | else: 76 | env.render(list(np.array(states)[:, 0]), file_path=f"{EXP_FOLDER}/ml3_mc.gif") 77 | 78 | 79 | 80 | -------------------------------------------------------------------------------- /ml3/experiments/run_shaped_sine_exp.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | import torch 3 | import numpy as np 4 | import sys 5 | import os 6 | import ml3 7 | from ml3.optimizee import ShapedSineModel 8 | from ml3.ml3_train import meta_train_shaped_sine as meta_train 9 | from ml3.learnable_losses import Ml3_loss_shaped_sine as Ml3_loss 10 | from ml3.ml3_test import test_ml3_loss_shaped_sine as test_ml3_loss 11 | 12 | EXP_FOLDER = os.path.join(ml3.__path__[0], "experiments/data/shaped_sine") 13 | 14 | 15 | class Task_loss(object): 16 | def __call__(self, input,outputs,labels,shaped,new_theta,label_thetas): 17 | if shaped: 18 | loss = (new_theta - label_thetas) ** 2 19 | else: 20 | loss = (outputs - labels) ** 2 21 | return loss 22 | 23 | 24 | def generate_sinusoid_batch(num_tasks, num_examples_task, num_steps, random_steps=False, 25 | freq_range=[-5.0, 5.0], input_range=[-5.0, 5.0]): 26 | """ Generate samples from random sine functions. """ 27 | freq = np.random.uniform(freq_range[0], freq_range[1], [num_tasks]) 28 | outputs = np.zeros([num_tasks, num_steps, num_examples_task, 1]) 29 | thetas = np.zeros([num_tasks, num_steps, num_examples_task, 1]) 30 | init_inputs = np.zeros([num_tasks, num_steps, num_examples_task, 1]) 31 | 32 | for task in range(num_tasks): 33 | if random_steps: 34 | init_inputs[task] = np.random.uniform(input_range[0], input_range[1], 35 | [num_steps, num_examples_task, 1]) 36 | else: 37 | init_inputs[task] = np.repeat(np.random.uniform(input_range[0], input_range[1], 38 | [1, num_examples_task, 1]), num_steps, -3) 39 | 40 | outputs[task] = np.sin(freq[task]*init_inputs[task]) 41 | thetas[task] = np.zeros_like(outputs[task]) + freq[task] 42 | return init_inputs, outputs,thetas 43 | 44 | 45 | if __name__ == '__main__': 46 | 47 | if not os.path.exists(EXP_FOLDER): 48 | os.makedirs(EXP_FOLDER) 49 | 50 | shaped = sys.argv[2]=='True' 51 | torch.manual_seed(0) 52 | np.random.seed(0) 53 | 54 | n_outer_iter = 1500 55 | num_task = 4 56 | n_inner_iter = 10 57 | batch_size = 64 58 | 59 | ml3_loss = Ml3_loss() 60 | sine_model=ShapedSineModel() 61 | torch.save(sine_model.state_dict(), f"{EXP_FOLDER}/shaped_sine_init_policy.pt") 62 | sine_model.load_state_dict(torch.load(f"{EXP_FOLDER}/shaped_sine_init_policy.pt")) 63 | sine_model.eval() 64 | 65 | # initialize task loss for meta training 66 | task_loss_fn = Task_loss() 67 | 68 | if sys.argv[1] == 'train': 69 | meta_train(n_outer_iter, shaped, num_task, n_inner_iter, sine_model, ml3_loss,task_loss_fn, EXP_FOLDER) 70 | 71 | if sys.argv[1] == 'test': 72 | freq=0.7 73 | test_x = np.expand_dims(np.arange(-5.0,5.0,0.1),1) 74 | test_y = np.sin(freq*test_x) 75 | x = torch.Tensor(test_x) 76 | y = torch.Tensor(test_y) 77 | 78 | ml3_loss.load_state_dict(torch.load(f"{EXP_FOLDER}/ml3_loss_shaped_sine_{str(shaped)}.pt")) 79 | ml3_loss.eval() 80 | opt_iter = 1 81 | args = (torch.Tensor(test_x),torch.Tensor(test_y)) 82 | test_ml3_loss(sine_model, ml3_loss,opt_iter,*args) 83 | 84 | -------------------------------------------------------------------------------- /ml3/experiments/run_sine_regression_exp.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | 3 | import os 4 | import ml3 5 | from ml3.sine_regression_task import main as meta_train 6 | 7 | EXP_FOLDER = os.path.join(ml3.__path__[0], "experiments/data/sine_exp") 8 | 9 | 10 | if __name__ == "__main__": 11 | exp_cfg = {} 12 | exp_cfg['seed'] = 0 13 | exp_cfg['num_train_tasks'] = 1 14 | exp_cfg['num_test_tasks'] = 10 15 | exp_cfg['n_outer_iter'] = 500 16 | exp_cfg['n_gradient_steps_at_test'] = 100 17 | exp_cfg['inner_lr'] = 0.001 18 | exp_cfg['outer_lr'] = 0.001 19 | 20 | exp_cfg['model'] = {} 21 | exp_cfg['model']['in_dim'] = 1 22 | exp_cfg['model']['hidden_dim'] = [100, 10] 23 | 24 | exp_cfg['metaloss'] = {} 25 | exp_cfg['metaloss']['in_dim'] = 2 26 | exp_cfg['metaloss']['hidden_dim'] = [50, 50] 27 | 28 | model_arch_str = str(exp_cfg['model']['hidden_dim']) 29 | meta_arch_str = "{}".format(exp_cfg['metaloss']['hidden_dim']) 30 | exp_cfg['log_dir'] = f"{EXP_FOLDER}" 31 | 32 | for seed in range(5): 33 | exp_cfg['seed'] = seed 34 | exp_file = "sine_regression_seed_{}.pt".format(exp_cfg['seed']) 35 | exp_cfg['exp_log_file_name'] = exp_file 36 | meta_train(exp_cfg) -------------------------------------------------------------------------------- /ml3/learnable_losses.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | import numpy as np 3 | import torch 4 | import torch.nn as nn 5 | 6 | 7 | def weight_init(module): 8 | if isinstance(module, nn.Linear): 9 | nn.init.xavier_uniform_(module.weight, gain=1.0) 10 | if module.bias is not None: 11 | module.bias.data.zero_() 12 | 13 | 14 | def weight_reset(m): 15 | if isinstance(m, nn.Conv2d) or isinstance(m, nn.Linear): 16 | m.reset_parameters() 17 | 18 | 19 | class ML3_SineRegressionLoss(nn.Module): 20 | 21 | def __init__(self, in_dim, hidden_dim): 22 | super(ML3_SineRegressionLoss, self).__init__() 23 | w = [50, 50] 24 | self.layers = nn.Sequential( 25 | nn.Linear(in_dim, hidden_dim[0], bias=False), 26 | nn.ReLU(), 27 | nn.Linear(hidden_dim[0], hidden_dim[1], bias=False), 28 | nn.ReLU(), 29 | ) 30 | self.loss = nn.Sequential(nn.Linear(hidden_dim[1], 1, bias=False), nn.Softplus()) 31 | self.reset() 32 | 33 | def forward(self, y_in, y_target): 34 | y = torch.cat((y_in, y_target), dim=1) 35 | yp = self.layers(y) 36 | return self.loss(yp).mean() 37 | 38 | def reset(self): 39 | self.layers.apply(weight_init) 40 | self.loss.apply(weight_init) 41 | 42 | 43 | class Ml3_loss_mountain_car(nn.Module): 44 | 45 | def __init__(self, meta_in, meta_out): 46 | super(Ml3_loss_mountain_car, self).__init__() 47 | 48 | activation = torch.nn.ELU 49 | num_neurons = 400 50 | self.loss_fn = torch.nn.Sequential(torch.nn.Linear(meta_in, num_neurons), 51 | activation(), 52 | torch.nn.Linear(num_neurons, num_neurons), 53 | activation(), 54 | torch.nn.Linear(num_neurons, meta_out)) 55 | self.learning_rate = 1e-3 56 | 57 | def forward(self, x): 58 | return self.loss_fn(x) 59 | 60 | 61 | class Ml3_loss_reacher(nn.Module): 62 | 63 | def __init__(self, meta_in, meta_out): 64 | super(Ml3_loss_reacher, self).__init__() 65 | 66 | activation = torch.nn.ELU 67 | output_activation = torch.nn.Softplus 68 | num_neurons = 400 69 | self.loss_fun = torch.nn.Sequential(torch.nn.Linear(meta_in, num_neurons), 70 | activation(), 71 | torch.nn.Linear(num_neurons, num_neurons), 72 | activation(), 73 | torch.nn.Linear(num_neurons, meta_out), 74 | output_activation()) 75 | self.learning_rate = 1e-2 76 | 77 | self.norm_in = torch.Tensor(np.expand_dims(np.array([1.0, 1.0, 8.0, 8.0, 1.0, 1.0,1.0]), axis=0)) 78 | 79 | def forward(self, x): 80 | return self.loss_fun(x/self.norm_in) 81 | 82 | 83 | class Ml3_loss_shaped_sine(nn.Module): 84 | 85 | def __init__(self, meta_in=3, meta_out=1): 86 | super(Ml3_loss_shaped_sine, self).__init__() 87 | def init_weights(m): 88 | if type(m) == torch.nn.Linear: 89 | torch.nn.init.xavier_uniform_(m.weight) 90 | m.bias.data.fill_(0.01) 91 | 92 | activation = torch.nn.ELU 93 | num_neurons = 10 94 | self.loss_fn = torch.nn.Sequential(torch.nn.Linear(meta_in, num_neurons), activation(), 95 | torch.nn.Linear(num_neurons, num_neurons), activation(), 96 | torch.nn.Linear(num_neurons, num_neurons), activation(), 97 | torch.nn.Linear(num_neurons, meta_out)) 98 | 99 | self.loss_fn.apply(init_weights) 100 | 101 | self.learning_rate = 3e-3 102 | 103 | def forward(self, x): 104 | return self.loss_fn(x) 105 | -------------------------------------------------------------------------------- /ml3/mbrl_utils.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | import numpy as np 3 | from termcolor import colored 4 | import logging 5 | import torch.nn as nn 6 | import torch.utils.data 7 | 8 | 9 | log = logging.getLogger(__name__) 10 | 11 | import torch 12 | import numpy as np 13 | import math 14 | 15 | 16 | class Dataset(torch.utils.data.Dataset): 17 | def __init__(self, x, y): 18 | self.dataset = [ 19 | (torch.FloatTensor(x[i]), torch.FloatTensor(y[i])) for i in range(len(x)) 20 | ] 21 | 22 | def __len__(self): 23 | return len(self.dataset) 24 | 25 | def __getitem__(self, idx): 26 | return self.dataset[idx] 27 | 28 | class Dynamics(nn.Module): 29 | 30 | def __init__(self,env): 31 | super(Dynamics, self).__init__() 32 | 33 | self.env=env 34 | self.dt = env.dt 35 | 36 | self.model_cfg = {} 37 | self.model_cfg['device'] = 'cpu' 38 | self.model_cfg['hidden_size'] = [100, 30] 39 | self.model_cfg['batch_size'] = 128 40 | self.model_cfg['epochs'] = 500 41 | self.model_cfg['display_epoch'] = 50 42 | self.model_cfg['learning_rate'] = 0.001 43 | self.model_cfg['ensemble_size'] = 3 44 | self.model_cfg['state_dim'] = env.state_dim 45 | self.model_cfg['action_dim'] = env.action_dim 46 | self.model_cfg['output_dim'] = env.pos_dim 47 | 48 | self.ensemble = EnsembleProbabilisticModel(self.model_cfg) 49 | 50 | self.data_X = [] 51 | self.data_Y = [] 52 | self.norm_in = torch.Tensor(np.expand_dims(np.array([1.0,1.0,8.0,8.0,1.0,1.0]),axis=0)) 53 | 54 | 55 | 56 | def train(self,states,actions): 57 | 58 | inputs = (torch.cat((states[:-1],actions),dim=1)/self.norm_in).detach().numpy() 59 | outputs = (states[1:,self.env.pos_dim:] - states[:-1,self.env.pos_dim:]).detach().numpy() 60 | 61 | self.data_X+=list(inputs) 62 | self.data_Y+=list(outputs) 63 | 64 | training_dataset = {} 65 | training_dataset['X'] = np.array(self.data_X) 66 | training_dataset['Y'] = np.array(self.data_Y) 67 | 68 | #self.ensemble = EnsembleProbabilisticModel(self.model_cfg) 69 | self.ensemble.train_model(training_dataset, training_dataset, 0.0) 70 | 71 | def step_model(self,state,action): 72 | input_x = torch.cat((state,action),dim=0)/self.norm_in 73 | pred_acc = self.ensemble.forward(input_x)[0].squeeze() 74 | 75 | #numerically integrate predicted acceleration to velocity and position 76 | 77 | pred_vel = state[self.env.pos_dim:]+pred_acc 78 | pred_pos = state[:self.env.pos_dim] + pred_vel*self.dt 79 | pred_pos = torch.clamp(pred_pos, min=-3.0, max=3.0) 80 | pred_vel = torch.clamp(pred_vel, min=-4.0, max=4.0) 81 | next_state = torch.cat((pred_pos.squeeze(),pred_vel.squeeze()),dim=0) 82 | return next_state.squeeze() 83 | 84 | 85 | 86 | # I did not make this inherit from nn.Module, because our GP implementation is not torch based 87 | class AbstractModel(object): 88 | 89 | # def forward(self, x): 90 | # raise NotImplementedError("Subclass must implement") 91 | 92 | def train_model(self, training_dataset, testing_dataset, training_params): 93 | raise NotImplementedError("Subclass must implement") 94 | 95 | # function that (if necessary) converts between numpy input x and torch, and returns a prediction in numpy 96 | def predict_np(self, x): 97 | raise NotImplementedError("Subclass must implement") 98 | 99 | def get_input_size(self): 100 | raise NotImplementedError("Subclass must implement") 101 | 102 | def get_output_size(self): 103 | raise NotImplementedError("Subclass must implement") 104 | 105 | def get_hyperparameters(self): 106 | return None 107 | 108 | 109 | 110 | class Dataset(torch.utils.data.Dataset): 111 | def __init__(self, x, y): 112 | self.dataset = [ 113 | (torch.FloatTensor(x[i]), torch.FloatTensor(y[i])) for i in range(len(x)) 114 | ] 115 | 116 | def __len__(self): 117 | return len(self.dataset) 118 | 119 | def __getitem__(self, idx): 120 | return self.dataset[idx] 121 | 122 | 123 | # creates K datasets out of X and Y 124 | # if N is the total number of data points, then this function splits it in to K subsets. and each dataset contains K-1 125 | # subsets. 126 | # so let's say K=5. We create 5 subsets. 127 | # Each datasets contains 4 out of the 5 datasets, by leaving out one of the K subsets. 128 | def split_to_subsets(X, Y, K): 129 | if K == 1: 130 | # for 1 split, do not resshuffle dataset 131 | return [Dataset(X, Y)] 132 | 133 | n_data = len(X) 134 | chunk_sz = int(math.ceil(n_data / K)) 135 | all_idx = np.random.permutation(n_data) 136 | 137 | datasets = [] 138 | # each dataset contains 139 | for i in range(K): 140 | start_idx = i * (chunk_sz) 141 | end_idx = min(start_idx + chunk_sz, n_data) 142 | dataset_idx = np.delete(all_idx, range(start_idx, end_idx), axis=0) 143 | X_subset = [X[idx] for idx in dataset_idx] 144 | Y_subset = [Y[idx] for idx in dataset_idx] 145 | datasets.append(Dataset(X_subset, Y_subset)) 146 | 147 | return datasets 148 | 149 | 150 | class NLLLoss(torch.nn.modules.loss._Loss): 151 | """ 152 | Specialized NLL loss used to predict both mean (the actual function) and the variance of the input data. 153 | """ 154 | 155 | def __init__(self, size_average=None, reduce=None, reduction="mean"): 156 | super(NLLLoss, self).__init__(size_average, reduce, reduction) 157 | 158 | def forward(self, net_output, target): 159 | assert net_output.dim() == 3 160 | assert net_output.size(0) == 2 161 | mean = net_output[0] 162 | var = net_output[1] 163 | reduction = "mean" 164 | ret = 0.5 * torch.log(var) + 0.5 * ((mean - target) ** 2) / var 165 | # ret = 0.5 * ((mean - target) ** 2) 166 | 167 | if reduction != "none": 168 | ret = torch.mean(ret) if reduction == "mean" else torch.sum(ret) 169 | return ret 170 | 171 | class EnsembleProbabilisticModel(AbstractModel): 172 | def __init__(self, model_cfg): 173 | super(EnsembleProbabilisticModel, self).__init__() 174 | 175 | self.input_dimension = model_cfg['state_dim'] + model_cfg['action_dim'] 176 | # predicting velocity only (second half of state space) 177 | assert model_cfg['state_dim'] % 2 == 0 178 | self.output_dimension = model_cfg['state_dim'] // 2 179 | if model_cfg['device'] == "gpu": 180 | self.device = model_cfg['gpu_name'] 181 | else: 182 | self.device = "cpu" 183 | self.ensemble_size = model_cfg['ensemble_size'] 184 | self.model_cfg = model_cfg 185 | 186 | self.reset() 187 | 188 | def reset(self): 189 | self.models = [PModel(self.model_cfg) for _ in range(self.ensemble_size)] 190 | 191 | def forward(self, x): 192 | x = torch.Tensor(x) 193 | means = [] 194 | variances = [] 195 | for eid in range(self.ensemble_size): 196 | mean_and_var = self.models[eid](x) 197 | means.append(mean_and_var[0]) 198 | variances.append(mean_and_var[1]) 199 | 200 | mean = sum(means) / len(means) 201 | dum = torch.zeros_like(variances[0]) 202 | for i in range(len(means)): 203 | dum_var2 = variances[i] 204 | dum_mean2 = means[i] * means[i] 205 | dum += dum_var2 + dum_mean2 206 | 207 | var = (dum / len(means)) - (mean * mean) 208 | # Clipping the variance to a minimum of 1e-3, we can interpret this as saying weexpect a minimum 209 | # level of noise 210 | # the clipping here is probably not necessary anymore because we're now clipping at the individual model level 211 | var = var.clamp_min(1e-3) 212 | return torch.stack((mean, var)) 213 | 214 | def predict_np(self, x_np): 215 | x = torch.Tensor(x_np) 216 | pred = self.forward(x).detach().cpu().numpy() 217 | return pred[0].squeeze(), pred[1].squeeze() 218 | 219 | def train_model(self, training_dataset, testing_dataset, training_params): 220 | X = training_dataset["X"] 221 | Y = training_dataset["Y"] 222 | 223 | datasets = split_to_subsets(X, Y, self.ensemble_size) 224 | 225 | for m in range(self.ensemble_size): 226 | print(colored("training model={}".format(m), "green")) 227 | self.models[m].train_model(datasets[m]) 228 | 229 | def get_gradient(self, x_np): 230 | 231 | x = torch.Tensor(x_np).requires_grad_() 232 | output_mean, _ = self.forward(x) 233 | gradients = [] 234 | # get gradients of ENN with respect to x and u 235 | for output_dim in range(self.output_dimension): 236 | grads = torch.autograd.grad( 237 | output_mean[0, output_dim], x, create_graph=True 238 | )[0].data 239 | gradients.append(grads.detach().cpu().numpy()[0, :]) 240 | 241 | return np.array(gradients).reshape( 242 | [self.output_dimension, self.input_dimension] 243 | ) 244 | 245 | def get_input_size(self): 246 | return self.input_dimension 247 | 248 | def get_output_size(self): 249 | return self.output_dimension 250 | 251 | def get_hyper_params(self): 252 | return None 253 | 254 | 255 | class PModel(nn.Module): 256 | """ 257 | Probabilistic network 258 | Output a 3d tensor: 259 | d0 : always 2, first element is mean and second element is variance 260 | d1 : batch size 261 | d2 : output size (number of dimensions in the output of the modeled function) 262 | """ 263 | 264 | def __init__(self, config): 265 | super(PModel, self).__init__() 266 | if config["device"] == "gpu": 267 | self.device = config["gpu_name"] 268 | else: 269 | self.device = "cpu" 270 | self.input_sz = config['state_dim'] + config['action_dim'] 271 | self.output_sz = config['output_dim'] 272 | 273 | self.learning_rate = config["learning_rate"] 274 | self.display_epoch = config["display_epoch"] 275 | self.epochs = config["epochs"] 276 | 277 | w = config["hidden_size"] 278 | 279 | self.layers = nn.Sequential( 280 | nn.Linear(self.input_sz, w[0]), 281 | nn.Tanh(), 282 | nn.Linear(w[0], w[1]), 283 | nn.Tanh(), 284 | ) 285 | 286 | self.mean = nn.Linear(w[1], self.output_sz) 287 | self.var = nn.Sequential(nn.Linear(w[1], self.output_sz), nn.Softplus()) 288 | self.to(self.device) 289 | 290 | def forward(self, x): 291 | x = x.to(device=self.device) 292 | assert x.dim() == 2, "Expected 2 dimensional input, got {}".format(x.dim()) 293 | assert x.size(1) == self.input_sz 294 | y = self.layers(x) 295 | mean_p = self.mean(y) 296 | var_p = self.var(y) 297 | # Clipping the variance to a minimum of 1e-3, we can interpret this as saying weexpect a minimum 298 | # level of noise 299 | var_p = var_p.clamp_min(1e-3) 300 | return torch.stack((mean_p, var_p)) 301 | 302 | def predict_np(self, x_np): 303 | x = torch.Tensor(x_np) 304 | pred = self.forward(x).detach().cpu().numpy() 305 | return pred[0].squeeze(), pred[1].squeeze() 306 | 307 | def train_model(self, training_data): 308 | train_loader = torch.utils.data.DataLoader( 309 | training_data, batch_size=64, num_workers=0 310 | ) 311 | optimizer = torch.optim.Adam(self.parameters(), lr=self.learning_rate) 312 | loss_fn = NLLLoss() 313 | for epoch in range(self.epochs): 314 | losses = [] 315 | for batch, (data, target) in enumerate( 316 | train_loader, 1 317 | ): # This is the training loader 318 | x = data.type(torch.FloatTensor).to(device=self.device) 319 | y = target.type(torch.FloatTensor).to(device=self.device) 320 | 321 | if x.dim() == 1: 322 | x = x.unsqueeze(0).t() 323 | if y.dim() == 1: 324 | y = y.unsqueeze(0).t() 325 | 326 | py = self.forward(x) 327 | loss = loss_fn(py, y) 328 | optimizer.zero_grad() 329 | loss.backward() 330 | optimizer.step() 331 | losses.append(loss.item()) 332 | 333 | if epoch % self.display_epoch == 0: 334 | print( 335 | colored( 336 | "epoch={}, loss={}".format(epoch, np.mean(losses)), "yellow" 337 | ) 338 | ) -------------------------------------------------------------------------------- /ml3/ml3_test.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | import torch 3 | import matplotlib.pyplot as plt 4 | 5 | 6 | def test_ml3_loss_mountain_car(policy, ml3_loss, opt_iter, *args): 7 | 8 | opt = torch.optim.SGD(policy.parameters(), lr=policy.learning_rate) 9 | for i in range(opt_iter): 10 | s_tr, a_tr, g_tr = policy.roll_out(*args) 11 | pred_task_loss = ml3_loss(torch.cat([s_tr[:-1], a_tr, g_tr], dim=1)).mean() 12 | opt.zero_grad() 13 | pred_task_loss.backward() 14 | opt.step() 15 | s_tr, a_tr, g_tr = policy.roll_out(*args) 16 | print('last state: ', s_tr[-1]) 17 | return s_tr.detach().numpy() 18 | 19 | 20 | def test_ml3_loss_reacher(policy, ml3_loss, opt_iter, *args): 21 | opt = torch.optim.SGD(policy.parameters(), lr=policy.learning_rate) 22 | for i in range(opt_iter): 23 | s_tr, a_tr, g_tr = policy.roll_out(*args) 24 | meta_input = torch.cat([s_tr[:-1], a_tr, g_tr], dim=1) 25 | pred_task_loss = ml3_loss(meta_input).mean() 26 | opt.zero_grad() 27 | pred_task_loss.backward() 28 | opt.step() 29 | return s_tr.detach().numpy() 30 | 31 | 32 | def test_ml3_loss_shaped_sine(sine_model, ml3_loss, opt_iter, test_x, test_y): 33 | opt = torch.optim.SGD(sine_model.parameters(), lr=sine_model.learning_rate) 34 | for i in range(opt_iter): 35 | yp = sine_model(test_x) 36 | meta_input = torch.cat([test_x, yp, test_y], dim=1) 37 | pred_task_loss = ml3_loss(meta_input).mean() 38 | opt.zero_grad() 39 | pred_task_loss.backward() 40 | opt.step() 41 | yp = sine_model(test_x) 42 | print('last state: ', yp[-1]) 43 | print('label: ',test_y[-1]) 44 | plt.plot(yp.detach().numpy()) 45 | plt.plot(test_y.detach().numpy()) 46 | plt.show() -------------------------------------------------------------------------------- /ml3/ml3_train.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | import torch 3 | import higher 4 | import numpy as np 5 | from ml3.shaped_sine_utils import plot_loss, generate_sinusoid_batch 6 | from ml3.optimizee import ShapedSineModel 7 | 8 | 9 | def meta_train_mountain_car(policy,ml3_loss,task_loss_fn,s_0,goal,goal_extra,n_outer_iter,n_inner_iter,time_horizon,shaped_loss): 10 | s_0 = torch.Tensor(s_0) 11 | goal = torch.Tensor(goal) 12 | goal_extra = torch.Tensor(goal_extra) 13 | 14 | inner_opt = torch.optim.SGD(policy.parameters(), lr=policy.learning_rate) 15 | meta_opt = torch.optim.Adam(ml3_loss.parameters(), lr=ml3_loss.learning_rate) 16 | 17 | for outer_i in range(n_outer_iter): 18 | # set gradient with respect to meta loss parameters to 0 19 | meta_opt.zero_grad() 20 | for _ in range(n_inner_iter): 21 | inner_opt.zero_grad() 22 | with higher.innerloop_ctx(policy, inner_opt, copy_initial_weights=False) as (fpolicy, diffopt): 23 | # use current meta loss to update model 24 | s_tr, a_tr, g_tr = fpolicy.roll_out(s_0, goal, time_horizon) 25 | 26 | loss_input = torch.cat([s_tr[:-1], a_tr, g_tr], dim=1) 27 | pred_task_loss = ml3_loss(loss_input).mean() 28 | diffopt.step(pred_task_loss) 29 | 30 | # compute task loss 31 | s, a, g = fpolicy.roll_out(s_0, goal, time_horizon) 32 | task_loss = task_loss_fn(a, s[:], goal, goal_extra, shaped_loss) 33 | # backprop grad wrt to task loss 34 | task_loss.backward() 35 | 36 | meta_opt.step() 37 | 38 | if outer_i % 100 == 0: 39 | print("meta iter: {} loss: {}".format(outer_i, task_loss.item())) 40 | print('last state', s[-1]) 41 | 42 | 43 | def meta_train_mbrl_reacher(policy, ml3_loss, dmodel, env, task_loss_fn, goals, n_outer_iter, n_inner_iter, time_horizon, exp_folder): 44 | goals = torch.Tensor(goals) 45 | 46 | meta_opt = torch.optim.Adam(ml3_loss.parameters(), lr=ml3_loss.learning_rate) 47 | 48 | for outer_i in range(n_outer_iter): 49 | # set gradient with respect to meta loss parameters to 0 50 | meta_opt.zero_grad() 51 | all_loss = 0 52 | for goal in goals: 53 | goal = torch.Tensor(goal) 54 | policy.reset() 55 | inner_opt = torch.optim.SGD(policy.parameters(), lr=policy.learning_rate) 56 | for _ in range(n_inner_iter): 57 | inner_opt.zero_grad() 58 | with higher.innerloop_ctx(policy, inner_opt, copy_initial_weights=False) as (fpolicy, diffopt): 59 | # use current meta loss to update model 60 | s_tr, a_tr, g_tr = fpolicy.roll_out(goal, time_horizon, dmodel, env) 61 | meta_input = torch.cat([s_tr[:-1].detach(), a_tr, g_tr.detach()], dim=1) 62 | pred_task_loss = ml3_loss(meta_input).mean() 63 | diffopt.step(pred_task_loss) 64 | # compute task loss 65 | s, a, g = fpolicy.roll_out(goal, time_horizon, dmodel, env) 66 | task_loss = task_loss_fn(a, s[:], goal).mean() 67 | 68 | # collect losses for logging 69 | all_loss += task_loss 70 | # backprop grad wrt to task loss 71 | task_loss.backward() 72 | 73 | if outer_i % 100 == 0: 74 | # roll out in real environment, to monitor training and tp collect data for dynamics model update 75 | states, actions, _ = fpolicy.roll_out(goal, time_horizon, dmodel, env, real_rollout=True) 76 | print("meta iter: {} loss: {}".format(outer_i, (torch.mean((states[-1,:2]-goal[:2])**2)))) 77 | if outer_i % 300 == 0 and outer_i < 3001: 78 | # update dynamics model under current optimal policy 79 | dmodel.train(torch.Tensor(states), torch.Tensor(actions)) 80 | 81 | # step optimizer to update meta loss network 82 | meta_opt.step() 83 | torch.save(ml3_loss.state_dict(), f'{exp_folder}/ml3_loss_reacher.pt') 84 | 85 | 86 | def meta_train_shaped_sine(n_outer_iter,shaped,num_task,n_inner_iter,sine_model,ml3_loss,task_loss_fn, exp_folder): 87 | theta_ranges = [] 88 | landscape_with_extra = [] 89 | landscape_mse = [] 90 | 91 | meta_opt = torch.optim.Adam(ml3_loss.parameters(), lr=ml3_loss.learning_rate) 92 | 93 | for outer_i in range(n_outer_iter): 94 | # set gradient with respect to meta loss parameters to 0 95 | batch_inputs, batch_labels, batch_thetas = generate_sinusoid_batch(num_task, 64, n_inner_iter) 96 | for task in range(num_task): 97 | sine_model = ShapedSineModel() 98 | inner_opt = torch.optim.SGD([sine_model.freq], lr=sine_model.learning_rate) 99 | for step in range(n_inner_iter): 100 | inputs = torch.Tensor(batch_inputs[task, step, :]) 101 | labels = torch.Tensor(batch_labels[task, step, :]) 102 | label_thetas = torch.Tensor(batch_thetas[task, step, :]) 103 | 104 | ''' Updating the frequency parameters, taking gradient of theta wrt meta loss ''' 105 | with higher.innerloop_ctx(sine_model, inner_opt) as (fmodel, diffopt): 106 | # use current meta loss to update model 107 | yp = fmodel(inputs) 108 | meta_input = torch.cat([inputs, yp, labels], dim=1) 109 | 110 | meta_out = ml3_loss(meta_input) 111 | loss = meta_out.mean() 112 | diffopt.step(loss) 113 | 114 | yp = fmodel(inputs) 115 | task_loss = task_loss_fn(inputs, yp, labels, shaped, fmodel.freq, label_thetas) 116 | 117 | sine_model.freq = torch.nn.Parameter(fmodel.freq.clone().detach()) 118 | inner_opt = torch.optim.SGD([sine_model.freq], lr=sine_model.learning_rate) 119 | 120 | ''' updating the learned loss ''' 121 | meta_opt.zero_grad() 122 | task_loss.mean().backward() 123 | meta_opt.step() 124 | 125 | if outer_i % 100 == 0: 126 | print("task loss: {}".format(task_loss.mean().item())) 127 | 128 | torch.save(ml3_loss.state_dict(), f'{exp_folder}/ml3_loss_shaped_sine_' + str(shaped) + '.pt') 129 | 130 | if outer_i%10==0: 131 | t_range, l_with_extra, l_mse = plot_loss(shaped, exp_folder) 132 | theta_ranges.append(t_range) 133 | landscape_with_extra.append(l_with_extra) 134 | landscape_mse.append(l_mse) 135 | np.save(f'{exp_folder}/theta_ranges_'+str(shaped)+'_.npy', theta_ranges) 136 | np.save(f'{exp_folder}/landscape_with_extra_'+str(shaped)+'_.npy',landscape_with_extra) 137 | np.save(f'{exp_folder}/landscape_mse_'+str(shaped)+'_.npy',landscape_mse) 138 | 139 | -------------------------------------------------------------------------------- /ml3/optimizee.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | import numpy as np 3 | import torch 4 | import torch.nn as nn 5 | from ml3.envs.mountain_car import MountainCar 6 | 7 | 8 | def weight_init(module): 9 | if isinstance(module, nn.Linear): 10 | nn.init.xavier_uniform_(module.weight, gain=1.0) 11 | if module.bias is not None: 12 | module.bias.data.zero_() 13 | 14 | 15 | def weight_reset(m): 16 | if isinstance(m, nn.Conv2d) or isinstance(m, nn.Linear): 17 | m.reset_parameters() 18 | 19 | 20 | class SineModel(nn.Module): 21 | 22 | def __init__(self, in_dim, hidden_dim, out_dim): 23 | super(SineModel, self).__init__() 24 | net_dim = [in_dim] + hidden_dim 25 | 26 | layers = [] 27 | 28 | for i in range(1, len(net_dim)): 29 | layers.append(nn.Linear(net_dim[i-1], net_dim[i])) 30 | layers.append(nn.ReLU()) 31 | 32 | self.layers = nn.Sequential(*layers) 33 | self.mean_pred = nn.Linear(hidden_dim[-1], out_dim) 34 | 35 | def reset(self): 36 | self.layers.apply(weight_init) 37 | self.mean_pred.apply(weight_init) 38 | 39 | def forward(self, x): 40 | feat = self.layers(x) 41 | return self.mean_pred(feat) 42 | 43 | 44 | class MC_Policy(nn.Module): 45 | 46 | def __init__(self, pi_in, pi_out): 47 | super(MC_Policy, self).__init__() 48 | 49 | num_neurons = 200 50 | self.policy = nn.Sequential(nn.Linear(pi_in, num_neurons,bias=False), 51 | nn.Linear(num_neurons, pi_out,bias=False)) 52 | self.learning_rate = 1e-3 53 | self.env = MountainCar() 54 | 55 | def forward(self, x): 56 | return self.policy(x) 57 | 58 | def reset_gradients(self): 59 | for i, param in enumerate(self.policy.parameters()): 60 | param.detach() 61 | 62 | def roll_out(self, s_0, goal, time_horizon): 63 | state = torch.Tensor(self.env.reset_to(s_0)) 64 | states = [] 65 | actions = [] 66 | states.append(state) 67 | for t in range(time_horizon): 68 | 69 | u = self.forward(state) 70 | u = u.clamp(self.env.min_action, self.env.max_action) 71 | state = self.env.sim_step_torch(state.squeeze(), u.squeeze()).clone() 72 | states.append(state.clone()) 73 | actions.append(u.clone()) 74 | 75 | running_reward = torch.norm(state-goal) 76 | rewards = [torch.Tensor([running_reward])]*time_horizon 77 | return torch.stack(states), torch.stack(actions), torch.stack(rewards) 78 | 79 | 80 | class Reacher_Policy(nn.Module): 81 | 82 | def __init__(self, pi_in, pi_out,exp_folder): 83 | super(Reacher_Policy, self).__init__() 84 | 85 | num_neurons = 64 86 | self.activation = torch.nn.Tanh 87 | self.policy = torch.nn.Sequential(torch.nn.Linear(pi_in, num_neurons), 88 | self.activation(), 89 | torch.nn.Linear(num_neurons, num_neurons), 90 | self.activation(), 91 | torch.nn.Linear(num_neurons, pi_out)) 92 | self.learning_rate = 1e-4 93 | self.norm_in = torch.Tensor(np.array([1.0,1.0,8.0,8.0,1.0,1.0,1.0,1.0])) 94 | self.exp_folder = exp_folder 95 | torch.save(self.state_dict(), f"{self.exp_folder}/init_policy.pt") 96 | 97 | def forward(self, x): 98 | return self.policy(x) 99 | 100 | def reset(self): 101 | self.load_state_dict(torch.load(f"{self.exp_folder}/init_policy.pt")) 102 | self.eval() 103 | 104 | def roll_out(self, goal, time_horizon, dmodel, env, real_rollout=False): 105 | 106 | state = torch.Tensor(env.reset()) 107 | states = [] 108 | actions = [] 109 | states.append(state.clone()) 110 | for t in range(time_horizon): 111 | 112 | u = self.forward(torch.cat((state.detach(), goal[:]), dim=0) / self.norm_in) 113 | u = u.clamp(-1.0, 1.0) 114 | if not real_rollout: 115 | pred_next_state = dmodel.step_model(state.squeeze(), u.squeeze()).clone() 116 | else: 117 | pred_next_state = torch.Tensor(env.step_model(state.squeeze().detach().numpy(), u.squeeze().detach().numpy()).copy()) 118 | states.append(pred_next_state.clone()) 119 | actions.append(u.clone()) 120 | state_cost = torch.norm(pred_next_state[:]-goal[:]).detach().unsqueeze(0) 121 | state = pred_next_state.clone() 122 | 123 | # rewards to pass to meta loss 124 | rewards = [state_cost]*time_horizon 125 | return torch.stack(states), torch.stack(actions), torch.stack(rewards).detach() 126 | 127 | 128 | class ShapedSineModel(torch.nn.Module): 129 | 130 | def __init__(self,theta=None): 131 | super(ShapedSineModel, self).__init__() 132 | if theta is None: 133 | self.freq = torch.nn.Parameter(torch.Tensor([0.1])) 134 | else: 135 | self.freq = torch.nn.Parameter(torch.Tensor([theta])) 136 | self.learning_rate = 1.0 137 | 138 | def forward(self, x): 139 | return torch.sin(self.freq*x) -------------------------------------------------------------------------------- /ml3/shaped_sine_utils.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | import torch 3 | import numpy as np 4 | import matplotlib.pyplot as plt 5 | import matplotlib.animation as animation 6 | from ml3.optimizee import ShapedSineModel 7 | from ml3.learnable_losses import Ml3_loss_shaped_sine as MetaNetwork 8 | 9 | 10 | '''GENERATE DATA''' 11 | def generate_sinusoid_batch(num_tasks, num_examples_task, num_steps, random_steps=False, 12 | freq_range=[-5.0, 5.0], input_range=[-5.0, 5.0]): 13 | """ Generate samples from random sine functions. """ 14 | freq = np.random.uniform(freq_range[0], freq_range[1], [num_tasks]) 15 | outputs = np.zeros([num_tasks, num_steps, num_examples_task, 1]) 16 | thetas = np.zeros([num_tasks, num_steps, num_examples_task, 1]) 17 | init_inputs = np.zeros([num_tasks, num_steps, num_examples_task, 1]) 18 | 19 | for task in range(num_tasks): 20 | if random_steps: 21 | init_inputs[task] = np.random.uniform(input_range[0], input_range[1], 22 | [num_steps, num_examples_task, 1]) 23 | else: 24 | init_inputs[task] = np.repeat(np.random.uniform(input_range[0], input_range[1], 25 | [1, num_examples_task, 1]), num_steps, -3) 26 | 27 | outputs[task] = np.sin(freq[task]*init_inputs[task]) 28 | thetas[task] = np.zeros_like(outputs[task]) + freq[task] 29 | return init_inputs, outputs,thetas 30 | 31 | '''PLOTTING THE LOSS LANDSCAPES FOR ILLUSTRATION''' 32 | def plot_loss(extra, exp_folder, freq=0.5): 33 | meta = MetaNetwork() 34 | meta.load_state_dict(torch.load(f'{exp_folder}/ml3_loss_shaped_sine_'+str(extra)+'.pt')) 35 | meta.eval() 36 | 37 | loss_landscape = [] 38 | 39 | theta_ranges = np.arange(-7.0, 7.0, 0.1) 40 | test_x = np.expand_dims(np.arange(-5.0, 5.0, 0.1), 1) 41 | test_y = np.sin(freq * test_x) 42 | x = torch.Tensor(test_x) 43 | y = torch.Tensor(test_y) 44 | 45 | for theta in theta_ranges: 46 | pi = ShapedSineModel(theta) 47 | pi.learning_rate = 0.1 48 | pi_out = pi(x) 49 | loss = 0.5 * (pi_out - y) ** 2 50 | loss_landscape.append(loss.mean().detach().numpy()) 51 | 52 | meta_loss_landscape = [] 53 | for theta in theta_ranges: 54 | pi = ShapedSineModel(theta) 55 | policy_theta = torch.Tensor(np.zeros_like(test_y)) + pi.freq 56 | pi_out = pi(x) 57 | meta_input = torch.cat([x, pi_out, y], 1) 58 | loss = meta(meta_input).mean() 59 | meta_loss_landscape.append(loss.clone().mean().detach().numpy()) 60 | 61 | return theta_ranges, np.array(meta_loss_landscape), np.array(loss_landscape) 62 | 63 | 64 | def render(theta_ranges,loss,color,freq=0.5,file_path='./ml3_loss_sine.gif', mode='gif'): 65 | """ When the method is called it saves an animation 66 | of what happened until that point in the episode. 67 | Ideally it should be called at the end of the episode, 68 | and every k episodes. 69 | 70 | ATTENTION: It requires avconv and/or imagemagick installed. 71 | @param file_path: the name and path of the video file 72 | @param mode: the file can be saved as 'gif' or 'mp4' 73 | """ 74 | 75 | fig = plt.figure(figsize=(5,5)) 76 | ax = fig.add_subplot(111,autoscale_on=False, xlim=(-7.0, 7.0), ylim=(0.0, 1.0)) 77 | 78 | 79 | ax.axvline(x=freq, c='red') 80 | delta_t = 1.0/10.0 81 | dot, = ax.plot([], [],color=color) 82 | time_text = ax.text(0.25, 1.05, '', transform=ax.transAxes,fontsize=14) 83 | _theta_ranges = theta_ranges 84 | _loss = loss 85 | _delta_t = delta_t 86 | 87 | def _init(): 88 | dot.set_data([], []) 89 | time_text.set_text('') 90 | return dot, time_text 91 | 92 | def _animate(i): 93 | x = _theta_ranges[i] 94 | y = _loss[i] 95 | dot.set_data(x, y) 96 | time_text.set_text("Iteration: "+str(i)) 97 | return dot, time_text 98 | 99 | ani = animation.FuncAnimation(fig, _animate, np.arange(1, len(theta_ranges)), 100 | blit=True, init_func=_init, repeat=False) 101 | 102 | if mode == 'gif': 103 | ani.save(file_path, writer='imagemagick', fps=int(1 / delta_t)) 104 | # Clear the figure 105 | fig.clear() 106 | plt.close(fig) -------------------------------------------------------------------------------- /ml3/sine_regression_task.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | import os 3 | 4 | import numpy as np 5 | import torch.nn as nn 6 | import torch 7 | import higher 8 | 9 | from ml3.optimizee import SineModel 10 | from ml3.sine_task_sampler import SineTaskSampler 11 | from ml3.learnable_losses import ML3_SineRegressionLoss 12 | 13 | 14 | def regular_train(loss_fn, eval_loss_fn, task_model, x_tr, y_tr, exp_cfg): 15 | n_iter = exp_cfg['n_gradient_steps_at_test'] 16 | lr = exp_cfg['inner_lr'] 17 | 18 | loss_trace = [] 19 | 20 | optimizer = torch.optim.SGD(task_model.parameters(), lr=lr) 21 | for i in range(n_iter): 22 | optimizer.zero_grad() 23 | y_pred = task_model(x_tr) 24 | loss = loss_fn(y_pred, y_tr) 25 | 26 | loss.backward() 27 | optimizer.step() 28 | 29 | eval_loss = eval_loss_fn(y_pred, y_tr) 30 | loss_trace.append(eval_loss.item()) 31 | 32 | return loss_trace 33 | 34 | 35 | def meta_train(meta_loss_model, meta_optimizer, meta_objective, task_sampler_train, task_sampler_test, exp_cfg): 36 | 37 | num_tasks = exp_cfg['num_train_tasks'] 38 | n_outer_iter= exp_cfg['n_outer_iter'] 39 | inner_lr = exp_cfg['inner_lr'] 40 | 41 | results = [] 42 | 43 | task_models = [] 44 | task_opts = [] 45 | for i in range(num_tasks): 46 | task_models.append(SineModel(in_dim=exp_cfg['model']['in_dim'], 47 | hidden_dim=exp_cfg['model']['hidden_dim'], 48 | out_dim=1)) 49 | task_opts.append(torch.optim.SGD(task_models[i].parameters(), lr=inner_lr)) 50 | 51 | for outer_i in range(n_outer_iter): 52 | # Sample a batch of support and query images and labels. 53 | 54 | x_spt, y_spt, x_qry, y_qry = task_sampler_train.sample() 55 | 56 | for i in range(num_tasks): 57 | task_models[i].reset() 58 | 59 | qry_losses = [] 60 | for _ in range(1): 61 | pred_losses = [] 62 | meta_optimizer.zero_grad() 63 | 64 | for i in range(num_tasks): 65 | # zero gradients wrt to meta loss parameters 66 | with higher.innerloop_ctx(task_models[i], task_opts[i], 67 | copy_initial_weights=False) as (fmodel, diffopt): 68 | 69 | # update model parameters via meta loss 70 | yp = fmodel(x_spt[i]) 71 | pred_loss = meta_loss_model(yp, y_spt[i]) 72 | diffopt.step(pred_loss) 73 | 74 | # compute task loss with new model 75 | yp = fmodel(x_spt[i]) 76 | task_loss = meta_objective(yp, y_spt[i]) 77 | 78 | # this accumulates gradients wrt to meta parameters 79 | task_loss.backward() 80 | qry_losses.append(task_loss.item()) 81 | 82 | meta_optimizer.step() 83 | 84 | avg_qry_loss = sum(qry_losses) / num_tasks 85 | if outer_i % 10 == 0: 86 | res_train_eval_reg = eval(task_sampler=task_sampler_train, exp_cfg=exp_cfg, 87 | train_loss_fn=nn.MSELoss(), eval_loss_fn=nn.MSELoss()) 88 | 89 | res_train_eval_ml3 = eval(task_sampler=task_sampler_train, exp_cfg=exp_cfg, 90 | train_loss_fn=meta_loss_model, eval_loss_fn=nn.MSELoss()) 91 | 92 | res_test_eval_reg = eval(task_sampler=task_sampler_test, exp_cfg=exp_cfg, 93 | train_loss_fn=nn.MSELoss(), eval_loss_fn=nn.MSELoss()) 94 | 95 | res_test_eval_ml3 = eval(task_sampler=task_sampler_test, exp_cfg=exp_cfg, 96 | train_loss_fn=meta_loss_model, eval_loss_fn=nn.MSELoss()) 97 | 98 | res = {} 99 | res['train_reg'] = res_train_eval_reg 100 | res['train_ml3'] = res_train_eval_ml3 101 | res['test_reg'] = res_test_eval_reg 102 | res['test_ml3'] = res_test_eval_ml3 103 | res['task_loss'] = {} 104 | res['task_loss']['mse'] = qry_losses 105 | results.append(res) 106 | test_loss_ml3 = np.mean(res_test_eval_ml3['mse']) 107 | test_loss_reg = np.mean(res_test_eval_reg['mse']) 108 | print( 109 | f'[Epoch {outer_i:.2f}] Train Loss: {avg_qry_loss:.2f}]| Test Loss ML3: {test_loss_ml3:.2f} | TestLoss REG: {test_loss_reg:.2f}' 110 | ) 111 | 112 | return results 113 | 114 | 115 | def eval(task_sampler, exp_cfg, train_loss_fn, eval_loss_fn): 116 | seed = exp_cfg['seed'] 117 | num_tasks = task_sampler.num_tasks_total 118 | 119 | np.random.seed(seed) 120 | torch.manual_seed(seed) 121 | 122 | mse = [] 123 | nmse = [] 124 | loss_trace = [] 125 | x, y, _, _ = task_sampler.sample() 126 | for i in range(num_tasks): 127 | task_model_test = SineModel(in_dim=exp_cfg['model']['in_dim'], 128 | hidden_dim=exp_cfg['model']['hidden_dim'], 129 | out_dim=1) 130 | loss = regular_train(loss_fn=train_loss_fn, eval_loss_fn=eval_loss_fn, task_model=task_model_test, 131 | x_tr=x[i], y_tr=y[i], exp_cfg=exp_cfg) 132 | yp = task_model_test(x[i]) 133 | l = eval_loss_fn(yp, y[i]) 134 | 135 | mse.append(l.item()) 136 | nmse.append(l.item()/y[i].var()) 137 | loss_trace.append(loss) 138 | 139 | res = {'nmse': nmse, 'mse': mse, 'loss_trace': loss_trace} 140 | return res 141 | 142 | 143 | def main(exp_cfg): 144 | seed = exp_cfg['seed'] 145 | num_train_tasks = exp_cfg['num_train_tasks'] 146 | num_test_tasks = exp_cfg['num_test_tasks'] 147 | outer_lr = exp_cfg['outer_lr'] 148 | 149 | np.random.seed(seed) 150 | torch.manual_seed(seed) 151 | 152 | meta_loss_model = ML3_SineRegressionLoss(in_dim=exp_cfg['metaloss']['in_dim'], 153 | hidden_dim=exp_cfg['metaloss']['hidden_dim']) 154 | 155 | meta_optimizer = torch.optim.Adam(meta_loss_model.parameters(), lr=outer_lr) 156 | 157 | meta_objective = nn.MSELoss() 158 | 159 | task_sampler_train = SineTaskSampler(num_tasks_total=num_train_tasks, num_tasks_per_batch=num_train_tasks, num_data_points=100, 160 | amp_range=[1.0, 1.0], 161 | input_range=[-2.0, 2.0], 162 | ) 163 | 164 | task_sampler_test = SineTaskSampler(num_tasks_total=num_test_tasks, num_tasks_per_batch=num_test_tasks, num_data_points=100, 165 | input_range=[-5.0, 5.0], 166 | amp_range=[0.2, 5.0], 167 | phase_range=[-np.pi, np.pi] 168 | ) 169 | # 170 | res = meta_train(meta_loss_model=meta_loss_model, meta_optimizer=meta_optimizer, meta_objective=meta_objective, 171 | task_sampler_train=task_sampler_train, task_sampler_test=task_sampler_test, 172 | exp_cfg=exp_cfg) 173 | 174 | data_file = os.path.join(exp_cfg['log_dir'], exp_cfg['exp_log_file_name']) 175 | 176 | data_dir = os.path.dirname(data_file) 177 | if data_dir is not '' and not os.path.exists(data_dir): # Create directory if it doesn't exist. 178 | os.makedirs(data_dir) 179 | torch.save(res, data_file) 180 | 181 | 182 | if __name__ == "__main__": 183 | exp_cfg = {} 184 | exp_cfg['seed'] = 0 185 | exp_cfg['num_train_tasks'] = 1 186 | exp_cfg['num_test_tasks'] = 10 187 | exp_cfg['n_outer_iter'] = 500 188 | exp_cfg['n_gradient_steps_at_test'] = 100 189 | exp_cfg['inner_lr'] = 0.001 190 | exp_cfg['outer_lr'] = 0.001 191 | 192 | exp_cfg['model'] = {} 193 | exp_cfg['model']['in_dim'] = 1 194 | exp_cfg['model']['hidden_dim'] = [100, 10] 195 | 196 | exp_cfg['metaloss'] = {} 197 | exp_cfg['metaloss']['in_dim'] = 2 198 | exp_cfg['metaloss']['hidden_dim'] = [50, 50] 199 | 200 | model_arch_str = str(exp_cfg['model']['hidden_dim']) 201 | meta_arch_str = "{}".format(exp_cfg['metaloss']['hidden_dim']) 202 | exp_cfg['log_dir'] = "sin_cos_exp" 203 | exp_file = "sine_regression_seed_{}.pkl".format(exp_cfg['seed']) 204 | exp_cfg['exp_log_file_name'] = exp_file 205 | main(exp_cfg) 206 | -------------------------------------------------------------------------------- /ml3/sine_task_sampler.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | 3 | import numpy as np 4 | import torch 5 | 6 | 7 | class SineTaskSampler(object): 8 | def __init__(self, num_tasks_total, num_tasks_per_batch, num_data_points, 9 | input_range=[-5.0, 5.0], 10 | amp_range=[1.0, 1.0], 11 | freq_range=[1.0, 1.0], 12 | phase_range=[np.pi, np.pi], 13 | fun_type="sine"): 14 | 15 | self.input_range = input_range 16 | 17 | self.amp_range = amp_range 18 | self.freq_range = freq_range 19 | self.phase_range = phase_range 20 | 21 | self.fun_type = fun_type 22 | 23 | self.observation_space = np.ones([1], dtype=np.float32) 24 | self.action_space = np.ones([1], dtype=np.float32) 25 | self.sample_space = np.ones([1], dtype=np.float32) 26 | 27 | self.num_tasks_total = num_tasks_total 28 | self.num_tasks_per_task = num_tasks_per_batch 29 | self.train_tasks = self._sample_tasks(num_tasks_total, num_data_points) 30 | self.valid_tasks = self._sample_tasks(num_tasks_total, num_data_points) 31 | 32 | def _sample_tasks(self, num_tasks, n_data_points): 33 | """ 34 | Returns a list of task parameters 35 | """ 36 | amp = np.random.uniform(self.amp_range[0], self.amp_range[1], [num_tasks]).astype(np.float32) 37 | freq = np.random.uniform(self.freq_range[0], self.freq_range[1], [num_tasks]).astype(np.float32) 38 | phase = np.random.uniform(self.phase_range[0], self.phase_range[1], [num_tasks]).astype(np.float32) 39 | inputs = np.random.uniform(self.input_range[0], self.input_range[1], [num_tasks, n_data_points, 1]).astype(np.float32) 40 | 41 | return [[amp[i], freq[i], phase[i], inputs[i]] for i in range(num_tasks)] 42 | 43 | def _sample_from_tasks(self, tasks): 44 | task_idx = np.random.permutation(self.num_tasks_total)[:self.num_tasks_per_task] 45 | inputs, targets = [], [] 46 | for i in task_idx: 47 | task_params = tasks[i] 48 | inputs_np = task_params[3] 49 | targets_np = (task_params[0] * np.sin(task_params[1] * (inputs_np - task_params[2]))).astype(np.float32) 50 | inputs.append(torch.FloatTensor(inputs_np)) 51 | targets.append(torch.FloatTensor(targets_np)) 52 | return inputs, targets 53 | 54 | def sample(self): 55 | """ 56 | Samples from a single task 57 | """ 58 | ## [traj_len=1, batch_size, obs_shape=1] 59 | train_inputs, train_targets = self._sample_from_tasks(self.train_tasks) 60 | valid_inputs, valid_targets = self._sample_from_tasks(self.valid_tasks) 61 | 62 | return train_inputs, train_targets, valid_inputs, valid_targets 63 | 64 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | ###################################################################### 3 | # \file setup.py 4 | # \author Franziska Meier 5 | ####################################################################### 6 | from setuptools import setup, find_packages 7 | 8 | install_requires = ["higher", "pybullet", "matplotlib", "termcolor", "differentiable_robot_model", "jupyter"] 9 | 10 | setup( 11 | name="l2l", 12 | author="Facebook AI Research", 13 | author_email="", 14 | version=1.0, 15 | packages=find_packages(), 16 | install_requires=install_requires, 17 | include_package_data=True, 18 | zip_safe=False, 19 | ) 20 | 21 | 22 | --------------------------------------------------------------------------------