├── .gitignore ├── .idea ├── .gitignore ├── LTO-CMA.iml ├── inspectionProfiles │ └── profiles_settings.xml ├── misc.xml ├── modules.xml └── vcs.xml ├── LICENSE ├── README.md ├── data ├── PPSN_LTO_Data.zip ├── PPSN_LTO_policies.zip ├── README_Data.txt └── README_Policies.txt ├── examples ├── 10BBOB │ ├── GallaghersGaussian21hi_LTO.json │ ├── data_files │ │ ├── checkpoint │ │ ├── policy_itr_14.pkl │ │ ├── policy_itr_14_tf_data.ckpt.data-00000-of-00001 │ │ ├── policy_itr_14_tf_data.ckpt.index │ │ └── policy_itr_14_tf_data.ckpt.meta │ └── hyperparams.py └── BentCigar │ └── hyperparams.py ├── plots ├── Plot_ObjectiveValue_AttractiveSector.pdf ├── Plot_ObjectiveValue_BentCigar.pdf ├── Plot_ObjectiveValue_BuecheRastrigin.pdf ├── Plot_ObjectiveValue_CompositeGR.pdf ├── Plot_ObjectiveValue_DifferentPowers.pdf ├── Plot_ObjectiveValue_GG101me.pdf ├── Plot_ObjectiveValue_GG21hi.pdf ├── Plot_ObjectiveValue_LinearSlope.pdf ├── Plot_ObjectiveValue_LunacekBiRastrigin.pdf ├── Plot_ObjectiveValue_RosenbrockRotated.pdf ├── Plot_ObjectiveValue_SchaffersIllConditioned.pdf ├── Plot_ObjectiveValue_SharpRidge.pdf ├── Plot_ObjectiveValue_StepEllipsoidal.pdf ├── Plot_StepSize_AttractiveSector.pdf ├── Plot_StepSize_BentCigar.pdf ├── Plot_StepSize_BuecheRastrigin.pdf ├── Plot_StepSize_CompositeGR.pdf ├── Plot_StepSize_DifferentPowers.pdf ├── Plot_StepSize_GG101me.pdf ├── Plot_StepSize_GG21hi.pdf ├── Plot_StepSize_LinearSlope.pdf ├── Plot_StepSize_LunacekBiRastrigin.pdf ├── Plot_StepSize_RosenbrockRotated.pdf ├── Plot_StepSize_SchaffersIllConditioned.pdf ├── Plot_StepSize_SharpRidge.pdf └── Plot_StepSize_StepEllipsoidal.pdf ├── requirements.txt ├── scripts └── plot_performance.py └── source └── gps ├── LICENSE.md ├── README.md ├── __init__.py ├── agent ├── __init__.py ├── agent.py ├── config.py └── lto │ ├── __init__.py │ ├── agent_cmaes.py │ ├── cmaes_world.py │ └── fcn.py ├── algorithm ├── __init__.py ├── algorithm.py ├── algorithm_utils.py ├── config.py ├── cost │ ├── __init__.py │ ├── config.py │ ├── cost.py │ └── cost_utils.py ├── dynamics │ ├── __init__.py │ ├── config.py │ ├── dynamics_lr_prior.py │ └── dynamics_prior_gmm.py ├── policy │ ├── __init__.py │ ├── config.py │ ├── constant_policy.py │ ├── csa_policy.py │ ├── lin_gauss_init.py │ ├── lin_gauss_policy.py │ ├── policy.py │ ├── policy_prior_gmm.py │ └── tf_policy.py ├── policy_opt │ ├── __init__.py │ ├── config.py │ ├── lto_model.py │ ├── policy_opt.py │ └── tf_utils.py └── traj_opt │ ├── __init__.py │ ├── config.py │ ├── traj_opt.py │ └── traj_opt_utils.py ├── gps_test.py ├── gps_train.py ├── proto ├── __init__.py └── gps_pb2.py ├── sample ├── __init__.py ├── sample.py └── sample_list.py └── utility ├── __init__.py ├── display.py ├── general_utils.py └── gmm.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | pip-wheel-metadata/ 24 | share/python-wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | MANIFEST 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .nox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | nosetests.xml 48 | coverage.xml 49 | *.cover 50 | *.py,cover 51 | .hypothesis/ 52 | .pytest_cache/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | target/ 76 | 77 | # Jupyter Notebook 78 | .ipynb_checkpoints 79 | 80 | # IPython 81 | profile_default/ 82 | ipython_config.py 83 | 84 | # pyenv 85 | .python-version 86 | 87 | # pipenv 88 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 89 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 90 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 91 | # install all needed dependencies. 92 | #Pipfile.lock 93 | 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 95 | __pypackages__/ 96 | 97 | # Celery stuff 98 | celerybeat-schedule 99 | celerybeat.pid 100 | 101 | # SageMath parsed files 102 | *.sage.py 103 | 104 | # Environments 105 | .env 106 | .venv 107 | env/ 108 | venv/ 109 | ENV/ 110 | env.bak/ 111 | venv.bak/ 112 | 113 | # Spyder project settings 114 | .spyderproject 115 | .spyproject 116 | 117 | # Rope project settings 118 | .ropeproject 119 | 120 | # mkdocs documentation 121 | /site 122 | 123 | # mypy 124 | .mypy_cache/ 125 | .dmypy.json 126 | dmypy.json 127 | 128 | # Pyre type checker 129 | .pyre/ 130 | -------------------------------------------------------------------------------- /.idea/.gitignore: -------------------------------------------------------------------------------- 1 | # Default ignored files 2 | /workspace.xml 3 | -------------------------------------------------------------------------------- /.idea/LTO-CMA.iml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 12 | 13 | 15 | -------------------------------------------------------------------------------- /.idea/inspectionProfiles/profiles_settings.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 6 | -------------------------------------------------------------------------------- /.idea/misc.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | -------------------------------------------------------------------------------- /.idea/modules.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /.idea/vcs.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # LTO-CMA 2 | Code for the paper "Learning Step-Size Adaptation in CMA-ES" 3 | ## License 4 | Our work is available under Apache-2.0. In order to learn step-size adaptation in CMA-ES we use guided policy search (GPS). 5 | We built upon the GPS version as given by Li and Malik. The original GPS code of Li and Malik can be found at https://www.math.ias.edu/~ke.li/downloads/lto_code.tar.gz 6 | 7 | In a nutshell, we modified the GPS code to be able to continuously sample from the starting teacher. To this end we introduce a sampling rate that determines how often we use new samples generated from the starting policy. 8 | 9 | The original code falls under GPLv3. In *source/gps* we list files that we modifed (thereby fall under GPLv3) and those that are of our creation (i.e. under Apache-2.0) 10 | 11 | ## Experiment Setup 12 | ### Training 13 | - Create experiment folder 14 | - Create file with hyperparameters of the experiment *hyperparams.py* in the experiment folder 15 | - Start learning step-size adaptation by executing the command: 16 | ``` 17 | python gps_train.py EXPERIMENT_FOLDER_NAME 18 | ``` 19 | - The output of training is the pickled version of the learned policy, saved in the path *EXPERIMENT_FOLDER_NAME/data_files*. 20 | ### Testing 21 | - Add the path to the learned policy in the hyperparameter file *hyperparams.py* 22 | - Start testing the performance of the learned policy on the test set by executing the command: 23 | ``` 24 | python gps_test.py EXPERIMENT_FOLDER_NAME 25 | ``` 26 | - The output of testing are the files *log_data_test_X.json* for the function name of each condition X of the test set, saved in the experiment folder. 27 | - The output file *test_data_X.json* contains: 28 | - The average objective values from 25 samples of running the learned policy on the test condition X, 29 | - The end objective values of the 25 samples, 30 | - The average step-size for each step of the optimization trajectory from 25 samples, and 31 | - The standard deviation of the objective value and the step-size for each step of the optimization trajectory. 32 | - To plot the results, run the *plot_performance.py* script in the *scripts* folder. 33 | ## Reference 34 | ``` 35 | @inproceedings{shala-ppsn20, 36 | author = {G.Shala and A. Biedenkapp and N.Awad and S. Adriaensen and M.Lindauer and F. Hutter}, 37 | title = {Learning Step-size Adaptation in CMA-ES}, 38 | booktitle = {Proceedings of the Sixteenth International Conference on Parallel Problem Solving from Nature ({PPSN}'20)}, 39 | year = {2020}, 40 | month = sep, 41 | } 42 | ``` 43 | -------------------------------------------------------------------------------- /data/PPSN_LTO_Data.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/LTO-CMA/6f496999ce26a7f144b42dc98a88833bde4ce39b/data/PPSN_LTO_Data.zip -------------------------------------------------------------------------------- /data/PPSN_LTO_policies.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/LTO-CMA/6f496999ce26a7f144b42dc98a88833bde4ce39b/data/PPSN_LTO_policies.zip -------------------------------------------------------------------------------- /data/README_Data.txt: -------------------------------------------------------------------------------- 1 | Data Archive for the paper 2 | "Learning step-size adaptation in CMA-ES" 3 | -CSA_Data (Contains the performance data of running CMA-ES with CSA) 4 | - -CSA_Plots_XD (X={5,10,15,...,60} Contains CSA data for functions of dimensionality 5-60D) 5 | - -CSA_Plots_X (X={50, 100, 150, 200, 250, 500, 1000} Contains CSA data for CMA-ES runs of 50-1000 generations) 6 | -LTO_Data (Contains the saved policies from the runs of CMA-ES with the learned policies for step-size adaptation) 7 | - -Sampling_Rate_Ablation (Contains the performance data of LTO with sampling rate 0-0.9) 8 | - - -Sampling 0.X (X = {0,1,...,9}) 9 | - -Train_5-30D_to_35-60D (Contains the train and test performance data of LTO trained on functions of 5-30D, tested on 35-60D) 10 | - -Transfer_Other_Fcns (Contains the performance data of LTO trained on 10 BBOB functions, tested on 12 different BBOB functions) 11 | - -Transfer_Longer_Traj (Contains the performance data of LTO trained on CMA-ES runs of 50 generations, tested on CMA-ES runs of 50-1000 generations) 12 | - -Transfer5-30D_from10D (Contains the performance data of LTO trained on functions of 10D, tested on 5-30D) -------------------------------------------------------------------------------- /data/README_Policies.txt: -------------------------------------------------------------------------------- 1 | Policy Archive for the paper 2 | "Learning step-size adaptation in CMA-ES" 3 | 4 | -LTO_Data (Contains the saved policies from the runs of CMA-ES with the learned policies for step-size adaptation) 5 | - -Sampling_Rate_Ablation (Contains the saved learned policies with sampling rate 0-0.9) 6 | - - -Sampling 0.X (X = {0,1,...,9}) 7 | - -Train_5-30D_to_35-60D (Contains the saved learned policies trained on functions of 5-30D) 8 | - -Transfer_Other_Fcns (Contains the saved learned policies trained on 10 BBOB functions) 9 | -------------------------------------------------------------------------------- /examples/10BBOB/GallaghersGaussian21hi_LTO.json: -------------------------------------------------------------------------------- 1 | {"Average costs LTO": [112.62379735434348, 111.3391294935969, 109.26043885899631, 103.50465549871282, 94.59429581291653, 84.85220229325782, 77.61487904143523, 73.09672733938768, 68.23321210764803, 64.02529556050979, 61.264454931406405, 57.422301371701494, 55.42630242063979, 54.50174089226569, 53.136916149149755, 51.98158278564496, 51.16316322814002, 50.23521801149499, 49.879771890939466, 49.3406015323143, 48.81505231270593, 48.32021789141222, 48.16056999659033, 47.92214862529059, 47.66926530314748, 47.380307245204804, 47.13012495591299, 46.94405863900299, 46.683562697681026, 46.56380402513933, 46.4175654420398, 46.34038930319992, 46.29502297233834, 46.13500183931653, 46.06886297010712, 45.963060523358806, 45.904999020500235, 45.876167776364106, 45.84578782589061, 45.80835552328669, 45.78785598740914, 45.764379926741185, 45.744466485144265, 45.72109054700329, 45.71120003269648, 45.705404625222585, 45.68477710520333, 45.67598030492183, 45.66006137928655, 45.65477567766216], "End values LTO": [45.58425177849915, 45.59863009863613, 45.603145052739045, 45.61201690273372, 45.6134123141773, 45.61359841515806, 45.61438702954894, 45.6161682253263, 45.61647638193877, 45.61802862912683, 45.6183892188068, 45.61895058239136, 45.61937265202621, 45.61943089687926, 45.62214901997382, 45.622363190080925, 45.62245851643677, 45.623178210192584, 45.623711210241346, 45.62371554636879, 45.625113586144394, 45.62521121969189, 45.62615689529963, 45.62640282783731, 45.62750677605763], "Sigma LTO": [0.41302519076753247, 0.2926393218497182, 0.2977230347253698, 0.29782627594495187, 0.3015109857823612, 0.3254356160911449, 0.32692723299273585, 0.3363416406579729, 0.3437612871011416, 0.37097323391964243, 0.3738293573355639, 0.37632890637587735, 0.38888381867361804, 0.3886229773720293, 0.38878827040435926, 0.4040172978213582, 0.39160998779126466, 0.39509360197929294, 0.3709629475841213, 0.3942064625706532, 0.354022768365397, 0.34676055123771904, 0.33992057101149087, 0.329181679985184, 0.3261333482409759, 0.31179579282714975, 0.28437874762764515, 0.306736331453086, 0.2736518937732548, 0.286509352791504, 0.26251309283898505, 0.2717234340968551, 0.24336039441818147, 0.24486909156746256, 0.23267127409018246, 0.21863449950033412, 0.22589433230159012, 0.2085146743364903, 0.21118013421705886, 0.19435015563259692, 0.19438686756396586, 0.18964455108396447, 0.1810123647408295, 0.18250413453802442, 0.18364313730240783, 0.17302415340718397, 0.17068904530811732, 0.1656929317213275, 0.16353122194234754, 0.16907236761351602], "Std costs LTO": [2.754740698754516, 2.409498281178775, 2.8079918273574247, 3.5646825562559723, 5.461435381212568, 8.13005565272067, 10.50886796545814, 12.378945849280134, 13.435664393153894, 14.386911554310375, 14.64554780922917, 15.064299560675199, 14.717408416587924, 14.323257039815257, 13.862713382625122, 13.221380523371106, 12.401666557469156, 11.58956449082783, 10.630010909239266, 9.52415147790265, 8.737926049247628, 7.966438436815966, 7.371977933270448, 6.835523964887072, 6.382606539180567, 6.048732647100209, 5.844663712137482, 5.714337356392995, 5.599890211229416, 5.552298819931983, 5.458436150543536, 5.406659014150698, 5.336915428966987, 5.265005839530419, 5.154414877920218, 5.050979665946451, 5.022267130624553, 4.9615487137075185, 4.891022328836538, 4.823307523248715, 4.777225833159941, 4.73513980295079, 4.703012572471365, 4.664328930351546, 4.636839174564745, 4.607799659624398, 4.569735913825095, 4.527669534979869, 4.500262686345359, 4.469846732288544], "Std Sigma LTO": [0.17923269515976697, 0.16356070517668442, 0.15718907435209512, 0.16778215946723338, 0.16821275422144688, 0.1879636627426515, 0.19894398911597808, 0.2092583706577774, 0.2196352169061549, 0.24238747987750348, 0.22864439472127326, 0.23156663011726647, 0.23033679270935198, 0.2295729406747637, 0.23017083420511353, 0.22164800911681012, 0.20696349039151168, 0.2007510177412861, 0.1814064055629177, 0.19515969632033472, 0.15740111398817108, 0.15068756117609325, 0.14634179931948976, 0.1357868290346, 0.1365103381027344, 0.13184222737234677, 0.10729688937425973, 0.1364989829742853, 0.10514342823455317, 0.1252798074542987, 0.10945311991717543, 0.12001588001012473, 0.09494951180325842, 0.10227374132424506, 0.08737016973389826, 0.07133715021518652, 0.09472027108427962, 0.0694132887668044, 0.07057152804376877, 0.057362791591105466, 0.061161598919881176, 0.05667473735617357, 0.0571830224310085, 0.06051263270568752, 0.06262075139748807, 0.05117954292315268, 0.04495258801748381, 0.04116835112442848, 0.04207894035101424, 0.059761939590193006]} -------------------------------------------------------------------------------- /examples/10BBOB/data_files/checkpoint: -------------------------------------------------------------------------------- 1 | model_checkpoint_path: "policy_itr_00_tf_data.ckpt" 2 | all_model_checkpoint_paths: "policy_itr_00_tf_data.ckpt" 3 | -------------------------------------------------------------------------------- /examples/10BBOB/data_files/policy_itr_14.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/LTO-CMA/6f496999ce26a7f144b42dc98a88833bde4ce39b/examples/10BBOB/data_files/policy_itr_14.pkl -------------------------------------------------------------------------------- /examples/10BBOB/data_files/policy_itr_14_tf_data.ckpt.data-00000-of-00001: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/LTO-CMA/6f496999ce26a7f144b42dc98a88833bde4ce39b/examples/10BBOB/data_files/policy_itr_14_tf_data.ckpt.data-00000-of-00001 -------------------------------------------------------------------------------- /examples/10BBOB/data_files/policy_itr_14_tf_data.ckpt.index: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/LTO-CMA/6f496999ce26a7f144b42dc98a88833bde4ce39b/examples/10BBOB/data_files/policy_itr_14_tf_data.ckpt.index -------------------------------------------------------------------------------- /examples/10BBOB/data_files/policy_itr_14_tf_data.ckpt.meta: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/LTO-CMA/6f496999ce26a7f144b42dc98a88833bde4ce39b/examples/10BBOB/data_files/policy_itr_14_tf_data.ckpt.meta -------------------------------------------------------------------------------- /examples/10BBOB/hyperparams.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | 3 | import os.path 4 | from datetime import datetime 5 | import numpy as np 6 | import cma 7 | from cma import bbobbenchmarks as bn 8 | import gps 9 | from gps import __file__ as gps_filepath 10 | from gps.agent.lto.agent_cmaes import AgentCMAES 11 | from gps.agent.lto.cmaes_world import CMAESWorld 12 | from gps.algorithm.algorithm import Algorithm 13 | from gps.algorithm.cost.cost import Cost 14 | from gps.algorithm.dynamics.dynamics_lr_prior import DynamicsLRPrior 15 | from gps.algorithm.dynamics.dynamics_prior_gmm import DynamicsPriorGMM 16 | from gps.algorithm.policy.policy_prior_gmm import PolicyPriorGMM 17 | from gps.algorithm.traj_opt.traj_opt import TrajOpt 18 | from gps.algorithm.policy_opt.policy_opt import PolicyOpt 19 | from gps.algorithm.policy_opt.lto_model import fully_connected_tf_network 20 | from gps.algorithm.policy.lin_gauss_init import init_cmaes_controller 21 | from gps.proto.gps_pb2 import CUR_LOC, PAST_OBJ_VAL_DELTAS, CUR_SIGMA, CUR_PS, PAST_LOC_DELTAS,PAST_SIGMA, ACTION 22 | from gps.algorithm.cost.cost_utils import RAMP_CONSTANT 23 | 24 | try: 25 | import cPickle as pickle 26 | except: 27 | import pickle 28 | import copy 29 | 30 | 31 | 32 | session = tf.Session() 33 | history_len = 40 34 | 35 | TRAIN = True 36 | 37 | input_dim = 10 38 | num_inits_per_fcn = 1 39 | init_locs = [] 40 | if TRAIN: 41 | num_fcns = 100 42 | train_fcns = range(num_fcns) 43 | test_fcns = range(num_fcns-10, num_fcns) 44 | fcn_ids = [12, 11, 2, 23, 15, 8, 17, 20, 1, 16] 45 | fcn_names = ["BentCigar", "Discus", "Ellipsoid", "Katsuura", "Rastrigin", "Rosenbrock", "Schaffers", "Schwefel", "Sphere", "Weierstrass"] 46 | init_sigma_test = [1.28, 0.38, 1.54, 1.18, 0.1, 1.66, 0.33, 0.1, 1.63, 0.1] 47 | #initialize the initial locations of the optimization trajectories 48 | init_locs.extend(list(np.random.randn(num_fcns-len(test_fcns), input_dim))) 49 | #initialize the initial sigma(step size) values 50 | init_sigmas = list(np.random.rand(num_fcns-len(test_fcns))) 51 | init_sigmas.extend(init_sigma_test) 52 | #append the initial locations of the conditions in the test set 53 | for i in test_fcns: 54 | init_locs.append([0]*input_dim) 55 | 56 | else: 57 | num_fcns = 12 58 | # We don't do any training so we evaluate on all the conditions in the 'training set' 59 | train_fcns = range(num_fcns) 60 | test_fcns = train_fcns 61 | fcn_ids = [6, 4, 19, 14, 5, 13, 7, 9, 18, 24, 21, 22] 62 | fcn_names = ["AttractiveSector", "BuecheRastrigin", "CompositeGR", "DifferentPowers", "LinearSlope", 63 | "SharpRidge", "StepEllipsoidal", "RosenbrockRotated", "SchaffersIllConditioned", 64 | "LunacekBiR", "GG101me", "GG21hi"] 65 | init_sigmas = [0.5]*len(test_fcns) 66 | for i in test_fcns: 67 | init_locs.append([0]*input_dim) 68 | 69 | 70 | cur_dir = os.path.dirname(os.path.abspath(__file__)) 71 | 72 | 73 | fcn_objs = [] 74 | fcns = [] 75 | for i in range(num_fcns//len(fcn_ids)): 76 | #instantiate BBOB functions based on their ID 77 | for i in fcn_ids: 78 | fcn_objs.append(bn.instantiate(i)[0]) 79 | 80 | for i,function in enumerate(fcn_objs): 81 | fcns.append({'fcn_obj': function, 'dim': input_dim, 'init_loc': list(init_locs[i]), 'init_sigma': init_sigmas[i]}) 82 | 83 | SENSOR_DIMS = { 84 | PAST_OBJ_VAL_DELTAS: history_len, 85 | CUR_PS: 1, 86 | CUR_SIGMA : 1, 87 | ACTION: 1, 88 | PAST_SIGMA: history_len 89 | } 90 | 91 | BASE_DIR = '/'.join(str.split(gps_filepath, '/')[:-2]) 92 | EXP_DIR = BASE_DIR + '/../examples/10BBOB' + '/' 93 | 94 | 95 | common = { 96 | 'experiment_name': 'CMA_' + \ 97 | datetime.strftime(datetime.now(), '%m-%d-%y_%H-%M'), 98 | 'experiment_dir': EXP_DIR, 99 | 'data_files_dir': EXP_DIR + 'data_files/', 100 | 'plot_filename': EXP_DIR + 'plot', 101 | 'log_filename': EXP_DIR + 'log_data', 102 | 'conditions': num_fcns, 103 | 'train_conditions': train_fcns, 104 | 'test_conditions': test_fcns, 105 | 'test_functions': fcn_names 106 | } 107 | 108 | if not os.path.exists(common['data_files_dir']): 109 | os.makedirs(common['data_files_dir']) 110 | 111 | agent = { 112 | 'type': AgentCMAES, 113 | 'world' : CMAESWorld, 114 | 'init_sigma': 0.3, 115 | 'popsize': 10, 116 | 'n_min':10, 117 | 'max_nfe': 200000, 118 | 'substeps': 1, 119 | 'conditions': common['conditions'], 120 | 'dt': 0.05, 121 | 'T': 50, 122 | 'sensor_dims': SENSOR_DIMS, 123 | 'state_include': [PAST_OBJ_VAL_DELTAS, CUR_SIGMA, CUR_PS, PAST_SIGMA], 124 | 'obs_include': [PAST_OBJ_VAL_DELTAS, CUR_PS, PAST_SIGMA, CUR_SIGMA], 125 | 'history_len': history_len, 126 | 'fcns': fcns 127 | } 128 | 129 | algorithm = { 130 | 'type': Algorithm, 131 | 'conditions': common['conditions'], 132 | 'train_conditions': train_fcns, 133 | 'test_conditions': test_fcns, 134 | 'test_functions': fcn_names, 135 | 'iterations': 15, # must be > 1 136 | 'inner_iterations': 4, 137 | 'policy_dual_rate': 0.2, 138 | 'init_pol_wt': 0.01, 139 | 'ent_reg_schedule': 0.0, 140 | 'fixed_lg_step': 3, 141 | 'kl_step': 0.2, 142 | 'min_step_mult': 0.01, 143 | 'max_step_mult': 10.0, 144 | 'sample_decrease_var': 0.05, 145 | 'sample_increase_var': 0.1, 146 | 'policy_sample_mode': 'replace', 147 | 'exp_step_lower': 2, 148 | 'exp_step_upper': 2 149 | } 150 | 151 | algorithm['init_traj_distr'] = { 152 | 'type': init_cmaes_controller, 153 | 'init_var': 0.01, 154 | 'dt': agent['dt'], 155 | 'T': agent['T'] 156 | } 157 | 158 | algorithm['cost'] = { 159 | 'type': Cost, 160 | 'ramp_option': RAMP_CONSTANT, 161 | 'wp_final_multiplier': 1.0, 162 | 'weight': 1.0, 163 | } 164 | 165 | algorithm['dynamics'] = { 166 | 'type': DynamicsLRPrior, 167 | 'regularization': 1e-3, # Increase this if Qtt is not PD during DGD 168 | 'clipping_thresh': None, 169 | 'prior': { 170 | 'type': DynamicsPriorGMM, 171 | 'max_clusters': 20, 172 | 'min_samples_per_cluster': 20, 173 | 'max_samples': 20, 174 | 'strength': 1.0 # How much weight to give to prior relative to samples 175 | } 176 | } 177 | 178 | algorithm['traj_opt'] = { 179 | 'type': TrajOpt, 180 | } 181 | 182 | algorithm['policy_opt'] = { 183 | 'type': PolicyOpt, 184 | 'network_model': fully_connected_tf_network, 185 | 'iterations': 20000, 186 | 'init_var': 0.01, 187 | 'batch_size': 25, 188 | 'solver_type': 'adam', 189 | 'lr': 0.0001, 190 | 'lr_policy': 'fixed', 191 | 'momentum': 0.9, 192 | 'weight_decay': 0.005, 193 | 'use_gpu': 0, 194 | 'weights_file_prefix': EXP_DIR + 'policy', 195 | 'network_params': { 196 | 'obs_include': agent['obs_include'], 197 | 'sensor_dims': agent['sensor_dims'], 198 | 'dim_hidden': [50, 50] 199 | } 200 | } 201 | 202 | algorithm['policy_prior'] = { 203 | 'type': PolicyPriorGMM, 204 | 'max_clusters': 20, 205 | 'min_samples_per_cluster': 20, 206 | 'max_samples': 20, 207 | 'strength': 1.0, 208 | 'clipping_thresh': None, 209 | 'init_regularization': 1e-3, 210 | 'subsequent_regularization': 1e-3 211 | } 212 | 213 | config = { 214 | 'iterations': algorithm['iterations'], 215 | 'num_samples': 25, 216 | 'common': common, 217 | 'agent': agent, 218 | 'algorithm': algorithm, 219 | 'train_conditions': train_fcns, 220 | 'test_conditions': test_fcns, 221 | 'test_functions': fcn_names, 222 | 'policy_path': EXP_DIR + 'data_files/policy_itr_14.pkl' 223 | } 224 | 225 | -------------------------------------------------------------------------------- /examples/BentCigar/hyperparams.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | 3 | import os.path 4 | from datetime import datetime 5 | import numpy as np 6 | import cma 7 | from cma import bbobbenchmarks as bn 8 | import gps 9 | from gps import __file__ as gps_filepath 10 | from gps.agent.lto.agent_cmaes import AgentCMAES 11 | from gps.agent.lto.cmaes_world import CMAESWorld 12 | from gps.algorithm.algorithm import Algorithm 13 | from gps.algorithm.cost.cost import Cost 14 | from gps.algorithm.dynamics.dynamics_lr_prior import DynamicsLRPrior 15 | from gps.algorithm.dynamics.dynamics_prior_gmm import DynamicsPriorGMM 16 | from gps.algorithm.policy.policy_prior_gmm import PolicyPriorGMM 17 | from gps.algorithm.traj_opt.traj_opt import TrajOpt 18 | from gps.algorithm.policy_opt.policy_opt import PolicyOpt 19 | from gps.algorithm.policy_opt.lto_model import fully_connected_tf_network 20 | from gps.algorithm.policy.lin_gauss_init import init_cmaes_controller 21 | from gps.proto.gps_pb2 import CUR_LOC, PAST_OBJ_VAL_DELTAS, CUR_SIGMA, CUR_PS, PAST_LOC_DELTAS,PAST_SIGMA, ACTION 22 | from gps.algorithm.cost.cost_utils import RAMP_CONSTANT 23 | 24 | try: 25 | import cPickle as pickle 26 | except: 27 | import pickle 28 | import copy 29 | 30 | 31 | 32 | session = tf.Session() 33 | history_len = 40 34 | 35 | TRAIN = True 36 | 37 | input_dim = 10 38 | num_inits_per_fcn = 1 39 | init_locs = [] 40 | if TRAIN: 41 | num_fcns = 20 42 | train_fcns = range(num_fcns) 43 | test_fcns = range(num_fcns-1, num_fcns) 44 | fcn_ids = [12] 45 | fcn_names = ["BentCigar"] 46 | init_sigma_test = [1.28] 47 | #initialize the initial locations of the optimization trajectories 48 | init_locs.extend(list(np.random.randn(num_fcns-len(test_fcns), input_dim))) 49 | #initialize the initial sigma(step size) values 50 | init_sigmas = list(np.random.rand(num_fcns-len(test_fcns))) 51 | init_sigmas.extend(init_sigma_test) 52 | #append the initial locations of the conditions in the test set 53 | for i in test_fcns: 54 | init_locs.append([0]*input_dim) 55 | 56 | else: 57 | num_fcns = 1 58 | # We don't do any training so we evaluate on all the conditions in the 'training set' 59 | train_fcns = range(num_fcns) 60 | test_fcns = train_fcns 61 | fcn_ids = [12] 62 | fcn_names = ["BentCigar"] 63 | init_sigmas = [1.28]*len(test_fcns) 64 | for i in test_fcns: 65 | init_locs.append([0]*input_dim) 66 | 67 | 68 | cur_dir = os.path.dirname(os.path.abspath(__file__)) 69 | 70 | 71 | fcn_objs = [] 72 | fcns = [] 73 | for i in range(num_fcns//len(fcn_ids)): 74 | #instantiate BBOB functions based on their ID 75 | for i in fcn_ids: 76 | fcn_objs.append(bn.instantiate(i)[0]) 77 | 78 | for i,function in enumerate(fcn_objs): 79 | fcns.append({'fcn_obj': function, 'dim': input_dim, 'init_loc': list(init_locs[i]), 'init_sigma': init_sigmas[i]}) 80 | 81 | SENSOR_DIMS = { 82 | PAST_OBJ_VAL_DELTAS: history_len, 83 | CUR_PS: 1, 84 | CUR_SIGMA : 1, 85 | ACTION: 1, 86 | PAST_SIGMA: history_len 87 | } 88 | 89 | BASE_DIR = '/'.join(str.split(gps_filepath, '/')[:-2]) 90 | EXP_DIR = BASE_DIR + '/../examples/BentCigar' + '/' 91 | 92 | 93 | common = { 94 | 'experiment_name': 'CMA_' + \ 95 | datetime.strftime(datetime.now(), '%m-%d-%y_%H-%M'), 96 | 'experiment_dir': EXP_DIR, 97 | 'data_files_dir': EXP_DIR + 'data_files/', 98 | 'plot_filename': EXP_DIR + 'plot', 99 | 'log_filename': EXP_DIR + 'log_data', 100 | 'conditions': num_fcns, 101 | 'train_conditions': train_fcns, 102 | 'test_conditions': test_fcns, 103 | 'test_functions': fcn_names 104 | } 105 | 106 | if not os.path.exists(common['data_files_dir']): 107 | os.makedirs(common['data_files_dir']) 108 | 109 | agent = { 110 | 'type': AgentCMAES, 111 | 'world' : CMAESWorld, 112 | 'init_sigma': 0.3, 113 | 'popsize': 10, 114 | 'n_min':10, 115 | 'max_nfe': 200000, 116 | 'substeps': 1, 117 | 'conditions': common['conditions'], 118 | 'dt': 0.05, 119 | 'T': 50, 120 | 'sensor_dims': SENSOR_DIMS, 121 | 'state_include': [PAST_OBJ_VAL_DELTAS, CUR_SIGMA, CUR_PS, PAST_SIGMA], 122 | 'obs_include': [PAST_OBJ_VAL_DELTAS, CUR_PS, PAST_SIGMA, CUR_SIGMA], 123 | 'history_len': history_len, 124 | 'fcns': fcns 125 | } 126 | 127 | algorithm = { 128 | 'type': Algorithm, 129 | 'conditions': common['conditions'], 130 | 'train_conditions': train_fcns, 131 | 'test_conditions': test_fcns, 132 | 'test_functions': fcn_names, 133 | 'iterations': 15, 134 | 'inner_iterations': 4, 135 | 'policy_dual_rate': 0.2, 136 | 'init_pol_wt': 0.01, 137 | 'ent_reg_schedule': 0.0, 138 | 'fixed_lg_step': 3, 139 | 'kl_step': 0.2, 140 | 'min_step_mult': 0.01, 141 | 'max_step_mult': 10.0, 142 | 'sample_decrease_var': 0.05, 143 | 'sample_increase_var': 0.1, 144 | 'policy_sample_mode': 'replace', 145 | 'exp_step_lower': 2, 146 | 'exp_step_upper': 2 147 | } 148 | 149 | algorithm['init_traj_distr'] = { 150 | 'type': init_cmaes_controller, 151 | 'init_var': 0.01, 152 | 'dt': agent['dt'], 153 | 'T': agent['T'] 154 | } 155 | 156 | algorithm['cost'] = { 157 | 'type': Cost, 158 | 'ramp_option': RAMP_CONSTANT, 159 | 'wp_final_multiplier': 1.0, 160 | 'weight': 1.0, 161 | } 162 | 163 | algorithm['dynamics'] = { 164 | 'type': DynamicsLRPrior, 165 | 'regularization': 1e-3, # Increase this if Qtt is not PD during DGD 166 | 'clipping_thresh': None, 167 | 'prior': { 168 | 'type': DynamicsPriorGMM, 169 | 'max_clusters': 20, 170 | 'min_samples_per_cluster': 20, 171 | 'max_samples': 20, 172 | 'strength': 1.0 # How much weight to give to prior relative to samples 173 | } 174 | } 175 | 176 | algorithm['traj_opt'] = { 177 | 'type': TrajOpt, 178 | } 179 | 180 | algorithm['policy_opt'] = { 181 | 'type': PolicyOpt, 182 | 'network_model': fully_connected_tf_network, 183 | 'iterations': 20000, 184 | 'init_var': 0.01, 185 | 'batch_size': 25, 186 | 'solver_type': 'adam', 187 | 'lr': 0.0001, 188 | 'lr_policy': 'fixed', 189 | 'momentum': 0.9, 190 | 'weight_decay': 0.005, 191 | 'use_gpu': 0, 192 | 'weights_file_prefix': EXP_DIR + 'policy', 193 | 'network_params': { 194 | 'obs_include': agent['obs_include'], 195 | 'sensor_dims': agent['sensor_dims'], 196 | 'dim_hidden': [50, 50] 197 | } 198 | } 199 | 200 | algorithm['policy_prior'] = { 201 | 'type': PolicyPriorGMM, 202 | 'max_clusters': 20, 203 | 'min_samples_per_cluster': 20, 204 | 'max_samples': 20, 205 | 'strength': 1.0, 206 | 'clipping_thresh': None, 207 | 'init_regularization': 1e-3, 208 | 'subsequent_regularization': 1e-3 209 | } 210 | 211 | config = { 212 | 'iterations': algorithm['iterations'], 213 | 'num_samples': 25, 214 | 'common': common, 215 | 'agent': agent, 216 | 'algorithm': algorithm, 217 | 'train_conditions': train_fcns, 218 | 'test_conditions': test_fcns, 219 | 'test_functions': fcn_names, 220 | 'policy_path': EXP_DIR + 'data_files/policy_itr_14.pkl' 221 | } 222 | 223 | -------------------------------------------------------------------------------- /plots/Plot_ObjectiveValue_AttractiveSector.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/LTO-CMA/6f496999ce26a7f144b42dc98a88833bde4ce39b/plots/Plot_ObjectiveValue_AttractiveSector.pdf -------------------------------------------------------------------------------- /plots/Plot_ObjectiveValue_BentCigar.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/LTO-CMA/6f496999ce26a7f144b42dc98a88833bde4ce39b/plots/Plot_ObjectiveValue_BentCigar.pdf -------------------------------------------------------------------------------- /plots/Plot_ObjectiveValue_BuecheRastrigin.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/LTO-CMA/6f496999ce26a7f144b42dc98a88833bde4ce39b/plots/Plot_ObjectiveValue_BuecheRastrigin.pdf -------------------------------------------------------------------------------- /plots/Plot_ObjectiveValue_CompositeGR.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/LTO-CMA/6f496999ce26a7f144b42dc98a88833bde4ce39b/plots/Plot_ObjectiveValue_CompositeGR.pdf -------------------------------------------------------------------------------- /plots/Plot_ObjectiveValue_DifferentPowers.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/LTO-CMA/6f496999ce26a7f144b42dc98a88833bde4ce39b/plots/Plot_ObjectiveValue_DifferentPowers.pdf -------------------------------------------------------------------------------- /plots/Plot_ObjectiveValue_GG101me.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/LTO-CMA/6f496999ce26a7f144b42dc98a88833bde4ce39b/plots/Plot_ObjectiveValue_GG101me.pdf -------------------------------------------------------------------------------- /plots/Plot_ObjectiveValue_GG21hi.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/LTO-CMA/6f496999ce26a7f144b42dc98a88833bde4ce39b/plots/Plot_ObjectiveValue_GG21hi.pdf -------------------------------------------------------------------------------- /plots/Plot_ObjectiveValue_LinearSlope.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/LTO-CMA/6f496999ce26a7f144b42dc98a88833bde4ce39b/plots/Plot_ObjectiveValue_LinearSlope.pdf -------------------------------------------------------------------------------- /plots/Plot_ObjectiveValue_LunacekBiRastrigin.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/LTO-CMA/6f496999ce26a7f144b42dc98a88833bde4ce39b/plots/Plot_ObjectiveValue_LunacekBiRastrigin.pdf -------------------------------------------------------------------------------- /plots/Plot_ObjectiveValue_RosenbrockRotated.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/LTO-CMA/6f496999ce26a7f144b42dc98a88833bde4ce39b/plots/Plot_ObjectiveValue_RosenbrockRotated.pdf -------------------------------------------------------------------------------- /plots/Plot_ObjectiveValue_SchaffersIllConditioned.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/LTO-CMA/6f496999ce26a7f144b42dc98a88833bde4ce39b/plots/Plot_ObjectiveValue_SchaffersIllConditioned.pdf -------------------------------------------------------------------------------- /plots/Plot_ObjectiveValue_SharpRidge.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/LTO-CMA/6f496999ce26a7f144b42dc98a88833bde4ce39b/plots/Plot_ObjectiveValue_SharpRidge.pdf -------------------------------------------------------------------------------- /plots/Plot_ObjectiveValue_StepEllipsoidal.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/LTO-CMA/6f496999ce26a7f144b42dc98a88833bde4ce39b/plots/Plot_ObjectiveValue_StepEllipsoidal.pdf -------------------------------------------------------------------------------- /plots/Plot_StepSize_AttractiveSector.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/LTO-CMA/6f496999ce26a7f144b42dc98a88833bde4ce39b/plots/Plot_StepSize_AttractiveSector.pdf -------------------------------------------------------------------------------- /plots/Plot_StepSize_BentCigar.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/LTO-CMA/6f496999ce26a7f144b42dc98a88833bde4ce39b/plots/Plot_StepSize_BentCigar.pdf -------------------------------------------------------------------------------- /plots/Plot_StepSize_BuecheRastrigin.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/LTO-CMA/6f496999ce26a7f144b42dc98a88833bde4ce39b/plots/Plot_StepSize_BuecheRastrigin.pdf -------------------------------------------------------------------------------- /plots/Plot_StepSize_CompositeGR.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/LTO-CMA/6f496999ce26a7f144b42dc98a88833bde4ce39b/plots/Plot_StepSize_CompositeGR.pdf -------------------------------------------------------------------------------- /plots/Plot_StepSize_DifferentPowers.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/LTO-CMA/6f496999ce26a7f144b42dc98a88833bde4ce39b/plots/Plot_StepSize_DifferentPowers.pdf -------------------------------------------------------------------------------- /plots/Plot_StepSize_GG101me.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/LTO-CMA/6f496999ce26a7f144b42dc98a88833bde4ce39b/plots/Plot_StepSize_GG101me.pdf -------------------------------------------------------------------------------- /plots/Plot_StepSize_GG21hi.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/LTO-CMA/6f496999ce26a7f144b42dc98a88833bde4ce39b/plots/Plot_StepSize_GG21hi.pdf -------------------------------------------------------------------------------- /plots/Plot_StepSize_LinearSlope.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/LTO-CMA/6f496999ce26a7f144b42dc98a88833bde4ce39b/plots/Plot_StepSize_LinearSlope.pdf -------------------------------------------------------------------------------- /plots/Plot_StepSize_LunacekBiRastrigin.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/LTO-CMA/6f496999ce26a7f144b42dc98a88833bde4ce39b/plots/Plot_StepSize_LunacekBiRastrigin.pdf -------------------------------------------------------------------------------- /plots/Plot_StepSize_RosenbrockRotated.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/LTO-CMA/6f496999ce26a7f144b42dc98a88833bde4ce39b/plots/Plot_StepSize_RosenbrockRotated.pdf -------------------------------------------------------------------------------- /plots/Plot_StepSize_SchaffersIllConditioned.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/LTO-CMA/6f496999ce26a7f144b42dc98a88833bde4ce39b/plots/Plot_StepSize_SchaffersIllConditioned.pdf -------------------------------------------------------------------------------- /plots/Plot_StepSize_SharpRidge.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/LTO-CMA/6f496999ce26a7f144b42dc98a88833bde4ce39b/plots/Plot_StepSize_SharpRidge.pdf -------------------------------------------------------------------------------- /plots/Plot_StepSize_StepEllipsoidal.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/LTO-CMA/6f496999ce26a7f144b42dc98a88833bde4ce39b/plots/Plot_StepSize_StepEllipsoidal.pdf -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | cma==3.0.3 2 | matplotlib 3 | numpy 4 | seaborn==0.11.1 5 | tensorflow==1.15.0 6 | protobuf==3.17.1 7 | -------------------------------------------------------------------------------- /scripts/plot_performance.py: -------------------------------------------------------------------------------- 1 | import os 2 | import json 3 | import numpy as np 4 | import seaborn as sns 5 | import matplotlib.pyplot as plt 6 | import argparse 7 | from datetime import datetime 8 | sns.set() 9 | 10 | from matplotlib import rcParams 11 | 12 | rcParams["font.size"] = "40" 13 | rcParams['text.usetex'] = False 14 | rcParams['font.family'] = 'serif' 15 | rcParams['figure.figsize'] = (16.0, 9.0) 16 | rcParams['figure.frameon'] = True 17 | rcParams['figure.edgecolor'] = 'k' 18 | rcParams['grid.color'] = 'k' 19 | rcParams['grid.linestyle'] = ':' 20 | rcParams['grid.linewidth'] = 0.5 21 | rcParams['axes.linewidth'] = 3 22 | rcParams['axes.edgecolor'] = 'k' 23 | rcParams['axes.grid.which'] = 'both' 24 | rcParams['legend.frameon'] = 'True' 25 | rcParams['legend.framealpha'] = 1 26 | rcParams['legend.fontsize'] = 30 27 | 28 | rcParams['ytick.major.size'] = 32 29 | rcParams['ytick.major.width'] = 6 30 | rcParams['ytick.minor.size'] = 6 31 | rcParams['ytick.minor.width'] = 1 32 | rcParams['xtick.major.size'] = 32 33 | rcParams['xtick.major.width'] = 6 34 | rcParams['xtick.minor.size'] = 6 35 | rcParams['xtick.minor.width'] = 1 36 | rcParams['xtick.labelsize'] = 32 37 | rcParams['ytick.labelsize'] = 32 38 | 39 | def dir_path(path): 40 | if os.path.isfile(path): 41 | return path 42 | else: 43 | raise argparse.ArgumentTypeError("readable_dir:%s is not a valid path to a file"% path) 44 | 45 | 46 | parser = argparse.ArgumentParser(description='Script to plot LTO test data.') 47 | parser.add_argument('--lto_path', type=dir_path, help="Path to the LTO data file.", 48 | default=os.path.join("..","examples","10BBOB","GallaghersGaussian21hi_LTO.json")) 49 | parser.add_argument('--csa_path', type=dir_path, help="Path to the CSA data file.", 50 | default=os.path.join("..","data","PPSN_LTO_Data","CSA_Data","CSA_Plots_10D","GallaghersGaussian21hi.json")) 51 | parser.add_argument('--function', type=str, help="Function being plotted", 52 | default="GallaghersGaussian21hi") 53 | 54 | args = parser.parse_args() 55 | lto_path = args.lto_path 56 | csa_path = args.csa_path 57 | function = args.function 58 | popsize = 10 59 | 60 | data_LTO = {} 61 | data_CSA = {} 62 | with open(lto_path) as json_file: 63 | data_LTO = json.load(json_file) 64 | with open(csa_path) as json_file: 65 | data_CSA = json.load(json_file) 66 | generations = len(data_LTO["Average costs LTO"]) 67 | num_feval = generations * popsize 68 | 69 | plt.tick_params(axis='x', which='minor') 70 | plt.legend(loc=0, fontsize=25, ncol=2) 71 | plt.xlabel("Num FEval", fontsize=50) 72 | plt.ylabel("Step Size", fontsize=50) 73 | plt.xticks(np.arange(start=1, stop=generations, step=generations//5), 74 | [str(10)] + [str(gen * 10) for gen in np.arange(start=10, stop=generations, step=generations//5)]) 75 | plt.xticks() 76 | plt.title(function) 77 | plt.fill_between(list(np.arange(1, len(data_LTO["Sigma LTO"]) + 1)), 78 | np.subtract(data_LTO["Sigma LTO"], data_LTO["Std Sigma LTO"]), 79 | np.add(data_LTO["Sigma LTO"], data_LTO["Std Sigma LTO"]), 80 | color=sns.xkcd_rgb["magenta"], alpha=0.1) 81 | plt.plot(list(np.arange(1, len(data_LTO["Sigma LTO"]) + 1)), data_LTO["Sigma LTO"], linewidth=4, 82 | label="LTO", color=sns.xkcd_rgb["magenta"]) 83 | plt.fill_between(list(np.arange(1, len(data_CSA["Sigma CSA"]) + 1)), 84 | np.subtract(data_CSA["Sigma CSA"], data_CSA["Std Sigma CSA"]), 85 | np.add(data_CSA["Sigma CSA"], data_CSA["Std Sigma CSA"]), 86 | color=sns.xkcd_rgb["green"], alpha=0.1) 87 | plt.plot(list(np.arange(1, len(data_CSA["Sigma CSA"]) + 1)), data_CSA["Sigma CSA"], linewidth=4, 88 | label="CSA", color=sns.xkcd_rgb["green"]) 89 | 90 | plt.legend() 91 | type = "StepSize" 92 | output_path = os.path.join("..","plots") 93 | os.makedirs(output_path, exist_ok=True) 94 | timestamp = datetime.now() 95 | time = str(timestamp) 96 | plot_file = ('Plot_%s_%s_%s.pdf' % (type, function, time)) 97 | plt.savefig(os.path.join(output_path, plot_file), bbox_inches='tight') 98 | plt.clf() 99 | 100 | 101 | plt.tick_params(axis='x', which='minor') 102 | plt.legend(loc=0, fontsize=25, ncol=2) 103 | plt.xlabel("Num FEval", fontsize=50) 104 | plt.ylabel("Objective Value", fontsize=50) 105 | plt.xscale("log") 106 | plt.title(function) 107 | plt.xticks(np.arange(start=1, stop=generations, step=generations//5), 108 | [str(10)] + [str(gen * 10) for gen in np.arange(start=10, stop=generations, step=generations//5)]) 109 | 110 | plt.fill_between(list(np.arange(1, len(data_LTO["Average costs LTO"]) + 1)), 111 | np.subtract(data_LTO["Average costs LTO"], data_LTO["Std costs LTO"]), 112 | np.add(data_LTO["Average costs LTO"], data_LTO["Std costs LTO"]), alpha=0.1, 113 | color=sns.xkcd_rgb["magenta"]) 114 | plt.plot(list(np.arange(1, len(data_LTO["Average costs LTO"]) + 1)), data_LTO["Average costs LTO"], 115 | linewidth=4, label="LTO", color=sns.xkcd_rgb["magenta"]) 116 | 117 | plt.fill_between(list(np.arange(1, len(data_CSA["Average costs CSA"]) + 1)), 118 | np.subtract(data_CSA["Average costs CSA"], data_CSA["Std costs CSA"]), 119 | np.add(data_CSA["Average costs CSA"], data_CSA["Std costs CSA"]), alpha=0.1, 120 | color=sns.xkcd_rgb["green"]) 121 | plt.plot(list(np.arange(1, len(data_CSA["Average costs CSA"]) + 1)), data_CSA["Average costs CSA"], 122 | linewidth=4, label="CSA", color=sns.xkcd_rgb["green"]) 123 | 124 | plt.legend() 125 | type = "ObjectiveValue" 126 | timestamp = datetime.now() 127 | time = str(timestamp) 128 | plot_file = ('Plot_%s_%s_%s.pdf' % (type, function, time)) 129 | plt.savefig(os.path.join(output_path, plot_file), bbox_inches='tight') 130 | plt.clf() 131 | -------------------------------------------------------------------------------- /source/gps/README.md: -------------------------------------------------------------------------------- 1 | We built upon the GPS version as given by Li and Malik [[1]](#1) 2 | The original GPS code of Li and Malik can be found at [https://www.math.ias.edu/~ke.li/downloads/lto_code.tar.gz](https://www.math.ias.edu/~ke.li/downloads/lto_code.tar.gz) 3 | 4 | In a nutshell, we modified the GPS code to be able to continuously sample from the starting teacher. To this end we introduce a sampling rate that determines how often we use new samples generated from the starting policy. 5 | 6 | [1] 7 | Li, K., Malik, J.: Learning to optimize. In: Proceedings of the International 8 | Conference on Learning Representations (ICLR’17) (2017), published on- 9 | line: [iclr.cc](iclr.cc) 10 | 11 | ### Contents 12 | ```bash 13 | |-gps 14 | | |-agent # code for the LTO agent 15 | | | |-lto # code for the CMAES world and agent 16 | | |-sample # handling the trajectory samples 17 | | |-utility # utilities, including the handling logging and output 18 | | |-proto # the protocol buffers 19 | | |-algorithm 20 | | | |-cost # code for computing the cost of trajectories 21 | | | |-traj_opt # code for trajectory optimization 22 | | | |-policy # policies that are used to obtain samples (CSA, Linear Gaussian and NN) 23 | | | |-policy_opt # code for policy optimization 24 | | | |-dynamics # code for handling the dynamics 25 | ``` 26 | This *gps* directory contains the code to run LTO-CMA. The above file tree contains a tree of the directories it consists of, and short descriptions of the code they contain. The code in the directory is under a *GNU GENERAL PUBLIC LICENSE v3*, except the specific files mentioned in the Modifications section, which are under an *APACHE v2* license. 27 | 28 | #### Modifications to GPS code 29 | In order to implement our approach, we have made modifications to the GPS code provided by Li and Malik. Below is the file tree depicting the list of files that have been either added or modified. Newly created files fall under Apache 2.0 whereas modified files keep their GPLv3 license 30 | ```bash 31 | |-gps 32 | | |-agent 33 | | | |-lto 34 | | | | |-agent_cmaes.py (under Apache 2.0 license) 35 | | | | |-cmaes_world.py (under Apache 2.0 license) 36 | | |-sample 37 | | | |-sample.py 38 | | |-utility 39 | | | |-display.py 40 | | |-proto 41 | | | |-gps.pb2.py 42 | | |-algorithm 43 | | | |-policy 44 | | | | |- lin_gauss_init.py 45 | | | | |- lin_gauss_policy.py 46 | | | | |- csa_policy.py (under Apache 2.0 license) 47 | | | |-policy_opt 48 | | | | |-lto_model.py 49 | ``` 50 | 51 | -------------------------------------------------------------------------------- /source/gps/__init__.py: -------------------------------------------------------------------------------- 1 | """ This Python module houses the guided policy search codebase. """ 2 | -------------------------------------------------------------------------------- /source/gps/agent/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/LTO-CMA/6f496999ce26a7f144b42dc98a88833bde4ce39b/source/gps/agent/__init__.py -------------------------------------------------------------------------------- /source/gps/agent/agent.py: -------------------------------------------------------------------------------- 1 | """ This file defines the base agent class. """ 2 | import abc 3 | import copy 4 | import numpy as np 5 | 6 | from gps.agent.config import AGENT 7 | from gps.proto.gps_pb2 import ACTION 8 | from gps.sample.sample_list import SampleList 9 | 10 | 11 | class Agent(object): 12 | """ 13 | Agent superclass. The agent interacts with the environment to 14 | collect samples. 15 | """ 16 | __metaclass__ = abc.ABCMeta 17 | 18 | def __init__(self, hyperparams): 19 | config = copy.deepcopy(AGENT) 20 | config.update(hyperparams) 21 | self._hyperparams = config 22 | 23 | # Store samples, along with size/index information for samples. 24 | self._samples = [[] for _ in range(self._hyperparams['conditions'])] 25 | self.T = self._hyperparams['T'] 26 | self.dU = self._hyperparams['sensor_dims'][ACTION] 27 | 28 | self.x_data_types = self._hyperparams['state_include'] 29 | self.obs_data_types = self._hyperparams['obs_include'] 30 | if 'meta_include' in self._hyperparams: 31 | self.meta_data_types = self._hyperparams['meta_include'] 32 | else: 33 | self.meta_data_types = [] 34 | 35 | # List of indices for each data type in state X. 36 | self._state_idx, i = [], 0 37 | for sensor in self.x_data_types: 38 | dim = self._hyperparams['sensor_dims'][sensor] 39 | self._state_idx.append(list(range(i, i+dim))) 40 | i += dim 41 | self.dX = i 42 | 43 | # List of indices for each data type in observation. 44 | self._obs_idx, i = [], 0 45 | for sensor in self.obs_data_types: 46 | dim = self._hyperparams['sensor_dims'][sensor] 47 | self._obs_idx.append(list(range(i, i+dim))) 48 | i += dim 49 | self.dO = i 50 | 51 | # List of indices for each data type in meta data. 52 | self._meta_idx, i = [], 0 53 | for sensor in self.meta_data_types: 54 | dim = self._hyperparams['sensor_dims'][sensor] 55 | self._meta_idx.append(list(range(i, i+dim))) 56 | i += dim 57 | self.dM = i 58 | 59 | self._x_data_idx = {d: i for d, i in zip(self.x_data_types, 60 | self._state_idx)} 61 | self._obs_data_idx = {d: i for d, i in zip(self.obs_data_types, 62 | self._obs_idx)} 63 | self._meta_data_idx = {d: i for d, i in zip(self.meta_data_types, 64 | self._meta_idx)} 65 | 66 | @abc.abstractmethod 67 | def sample(self, policy, condition, verbose=True, save=True, noisy=True): 68 | """ 69 | Draw a sample from the environment, using the specified policy 70 | and under the specified condition, with or without noise. 71 | """ 72 | raise NotImplementedError("Must be implemented in subclass.") 73 | 74 | def reset(self, condition): 75 | """ Reset environment to the specified condition. """ 76 | pass # May be overridden in subclass. 77 | 78 | def get_samples(self, condition, start=0, end=None): 79 | """ 80 | Return the requested samples based on the start and end indices. 81 | Args: 82 | start: Starting index of samples to return. 83 | end: End index of samples to return. 84 | """ 85 | return (SampleList(self._samples[condition][start:]) if end is None 86 | else SampleList(self._samples[condition][start:end])) 87 | 88 | def clear_samples(self, condition=None): 89 | """ 90 | Reset the samples for a given condition, defaulting to all conditions. 91 | Args: 92 | condition: Condition for which to reset samples. 93 | """ 94 | if condition is None: 95 | self._samples = [[] for _ in range(self._hyperparams['conditions'])] 96 | else: 97 | self._samples[condition] = [] 98 | 99 | def delete_last_sample(self, condition): 100 | """ Delete the last sample from the specified condition. """ 101 | self._samples[condition].pop() 102 | 103 | def get_idx_x(self, sensor_name): 104 | """ 105 | Return the indices corresponding to a certain state sensor name. 106 | Args: 107 | sensor_name: The name of the sensor. 108 | """ 109 | return self._x_data_idx[sensor_name] 110 | 111 | def get_idx_obs(self, sensor_name): 112 | """ 113 | Return the indices corresponding to a certain observation sensor name. 114 | Args: 115 | sensor_name: The name of the sensor. 116 | """ 117 | return self._obs_data_idx[sensor_name] 118 | 119 | def pack_data_obs(self, existing_mat, data_to_insert, data_types, 120 | axes=None): 121 | """ 122 | Update the observation matrix with new data. 123 | Args: 124 | existing_mat: Current observation matrix. 125 | data_to_insert: New data to insert into the existing matrix. 126 | data_types: Name of the sensors to insert data for. 127 | axes: Which axes to insert data. Defaults to the last axes. 128 | """ 129 | num_sensor = len(data_types) 130 | if axes is None: 131 | # If axes not specified, assume indexing on last dimensions. 132 | axes = list(range(-1, -num_sensor - 1, -1)) 133 | else: 134 | # Make sure number of sensors and axes are consistent. 135 | if num_sensor != len(axes): 136 | raise ValueError( 137 | 'Length of sensors (%d) must equal length of axes (%d)', 138 | num_sensor, len(axes) 139 | ) 140 | 141 | # Shape checks. 142 | insert_shape = list(existing_mat.shape) 143 | for i in range(num_sensor): 144 | # Make sure to slice along X. 145 | if existing_mat.shape[axes[i]] != self.dO: 146 | raise ValueError('Axes must be along an dX=%d dimensional axis', 147 | self.dO) 148 | insert_shape[axes[i]] = len(self._obs_data_idx[data_types[i]]) 149 | #if tuple(insert_shape) != data_to_insert.shape: 150 | #if data_to_insert.shape[0] 151 | #raise ValueError('Data has shape %s. Expected %s', 152 | #data_to_insert.shape, tuple(insert_shape)) 153 | 154 | # Actually perform the slice. 155 | index = [slice(None) for _ in range(len(existing_mat.shape))] 156 | for i in range(num_sensor): 157 | index[axes[i]] = slice(self._obs_data_idx[data_types[i]][0], 158 | self._obs_data_idx[data_types[i]][-1] + 1) 159 | existing_mat[index] = data_to_insert 160 | 161 | def pack_data_meta(self, existing_mat, data_to_insert, data_types, 162 | axes=None): 163 | """ 164 | Update the meta data matrix with new data. 165 | Args: 166 | existing_mat: Current meta data matrix. 167 | data_to_insert: New data to insert into the existing matrix. 168 | data_types: Name of the sensors to insert data for. 169 | axes: Which axes to insert data. Defaults to the last axes. 170 | """ 171 | num_sensor = len(data_types) 172 | if axes is None: 173 | # If axes not specified, assume indexing on last dimensions. 174 | axes = list(range(-1, -num_sensor - 1, -1)) 175 | else: 176 | # Make sure number of sensors and axes are consistent. 177 | if num_sensor != len(axes): 178 | raise ValueError( 179 | 'Length of sensors (%d) must equal length of axes (%d)', 180 | num_sensor, len(axes) 181 | ) 182 | 183 | # Shape checks. 184 | insert_shape = list(existing_mat.shape) 185 | for i in range(num_sensor): 186 | # Make sure to slice along X. 187 | if existing_mat.shape[axes[i]] != self.dM: 188 | raise ValueError('Axes must be along an dX=%d dimensional axis', 189 | self.dM) 190 | insert_shape[axes[i]] = len(self._meta_data_idx[data_types[i]]) 191 | if tuple(insert_shape) != data_to_insert.shape: 192 | raise ValueError('Data has shape %s. Expected %s', 193 | data_to_insert.shape, tuple(insert_shape)) 194 | 195 | # Actually perform the slice. 196 | index = [slice(None) for _ in range(len(existing_mat.shape))] 197 | for i in range(num_sensor): 198 | index[axes[i]] = slice(self._meta_data_idx[data_types[i]][0], 199 | self._meta_data_idx[data_types[i]][-1] + 1) 200 | existing_mat[index] = data_to_insert 201 | 202 | def pack_data_x(self, existing_mat, data_to_insert, data_types, axes=None): 203 | """ 204 | Update the state matrix with new data. 205 | Args: 206 | existing_mat: Current state matrix. 207 | data_to_insert: New data to insert into the existing matrix. 208 | data_types: Name of the sensors to insert data for. 209 | axes: Which axes to insert data. Defaults to the last axes. 210 | """ 211 | num_sensor = len(data_types) 212 | if axes is None: 213 | # If axes not specified, assume indexing on last dimensions. 214 | axes = list(range(-1, -num_sensor - 1, -1)) 215 | else: 216 | # Make sure number of sensors and axes are consistent. 217 | if num_sensor != len(axes): 218 | raise ValueError( 219 | 'Length of sensors (%d) must equal length of axes (%d)', 220 | num_sensor, len(axes) 221 | ) 222 | 223 | # Shape checks. 224 | insert_shape = list(existing_mat.shape) 225 | for i in range(num_sensor): 226 | # Make sure to slice along X. 227 | if existing_mat.shape[axes[i]] != self.dX: 228 | raise ValueError('Axes must be along an dX=%d dimensional axis', 229 | self.dX) 230 | insert_shape[axes[i]] = len(self._x_data_idx[data_types[i]]) 231 | if isinstance(data_to_insert,(list)): 232 | data_to_insert = np.array(data_to_insert).reshape(tuple(insert_shape)) 233 | if tuple(insert_shape) != data_to_insert.shape: 234 | raise ValueError('Data has shape %s. Expected %s', 235 | data_to_insert.shape, tuple(insert_shape)) 236 | 237 | # Actually perform the slice. 238 | index = [slice(None) for _ in range(len(existing_mat.shape))] 239 | for i in range(num_sensor): 240 | index[axes[i]] = slice(self._x_data_idx[data_types[i]][0], 241 | self._x_data_idx[data_types[i]][-1] + 1) 242 | existing_mat[index] = data_to_insert 243 | 244 | def unpack_data_x(self, existing_mat, data_types, axes=None): 245 | """ 246 | Returns the requested data from the state matrix. 247 | Args: 248 | existing_mat: State matrix to unpack from. 249 | data_types: Names of the sensor to unpack. 250 | axes: Which axes to unpack along. Defaults to the last axes. 251 | """ 252 | num_sensor = len(data_types) 253 | if axes is None: 254 | # If axes not specified, assume indexing on last dimensions. 255 | axes = list(range(-1, -num_sensor - 1, -1)) 256 | else: 257 | # Make sure number of sensors and axes are consistent. 258 | if num_sensor != len(axes): 259 | raise ValueError( 260 | 'Length of sensors (%d) must equal length of axes (%d)', 261 | num_sensor, len(axes) 262 | ) 263 | 264 | # Shape checks. 265 | for i in range(num_sensor): 266 | # Make sure to slice along X. 267 | if existing_mat.shape[axes[i]] != self.dX: 268 | raise ValueError('Axes must be along an dX=%d dimensional axis', 269 | self.dX) 270 | 271 | # Actually perform the slice. 272 | index = [slice(None) for _ in range(len(existing_mat.shape))] 273 | for i in range(num_sensor): 274 | index[axes[i]] = slice(self._x_data_idx[data_types[i]][0], 275 | self._x_data_idx[data_types[i]][-1] + 1) 276 | return existing_mat[index] 277 | 278 | # state is a dictionary 279 | def get_vectorized_state(self, state, condition = None): 280 | state_vector = np.empty((self.dX,)) 281 | state_vector.fill(np.nan) 282 | for data_type in self.x_data_types: 283 | self.pack_data_x(state_vector, state[data_type], data_types=[data_type]) 284 | assert(not np.any(np.isnan(state_vector))) 285 | return state_vector 286 | 287 | -------------------------------------------------------------------------------- /source/gps/agent/config.py: -------------------------------------------------------------------------------- 1 | """ Default configuration and hyperparameters for agent objects. """ 2 | import numpy as np 3 | 4 | # Agent 5 | AGENT = { 6 | 'substeps': 1, 7 | } 8 | -------------------------------------------------------------------------------- /source/gps/agent/lto/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/LTO-CMA/6f496999ce26a7f144b42dc98a88833bde4ce39b/source/gps/agent/lto/__init__.py -------------------------------------------------------------------------------- /source/gps/agent/lto/agent_cmaes.py: -------------------------------------------------------------------------------- 1 | from copy import deepcopy 2 | import numpy as np 3 | from gps.agent.agent import Agent 4 | from gps.proto.gps_pb2 import ACTION 5 | from gps.sample.sample import Sample 6 | from gps.agent.lto.cmaes_world import CMAESWorld 7 | 8 | class AgentCMAES(Agent): 9 | 10 | def __init__(self, hyperparams): 11 | Agent.__init__(self, hyperparams) 12 | 13 | self._setup_conditions() 14 | self._setup_worlds() 15 | 16 | def _setup_conditions(self): 17 | self.conds = self._hyperparams['conditions'] 18 | self.fcns = self._hyperparams['fcns'] 19 | self.history_len = self._hyperparams['history_len'] 20 | self.init_sigma = self._hyperparams['init_sigma'] 21 | self.popsize = self._hyperparams['popsize'] 22 | 23 | def _setup_worlds(self): 24 | fcn = [] 25 | hpolib = False 26 | for i in range(self.conds): 27 | if 'fcn_obj' in self.fcns[i]: 28 | fcn.append(self.fcns[i]['fcn_obj']) 29 | else: 30 | fcn.append(None) 31 | if 'hpolib' in self.fcns[i]: 32 | hpolib = True 33 | benchmark = None 34 | if 'benchmark' in self.fcns[0]: 35 | benchmark = self.fcns[0]['benchmark'] 36 | self._worlds = [CMAESWorld(self.fcns[i]['dim'], self.fcns[i]['init_loc'], self.fcns[i]['init_sigma'], self.popsize, self.history_len, fcn=fcn[i], hpolib=hpolib, benchmark=benchmark) for i in range(self.conds)] 37 | self.x0 = [] 38 | 39 | for i in range(self.conds): 40 | self._worlds[i].reset_world() 41 | self._worlds[i].run() # Get noiseless initial state 42 | x0 = self.get_vectorized_state(self._worlds[i].get_state()) 43 | self.x0.append(x0) 44 | 45 | 46 | def sample(self, policy, condition, start_policy=None, verbose=False, save=True, noisy=True, ltorun=False, guided_steps=0, t_length=None): 47 | """ 48 | Runs a trial and constructs a new sample containing information 49 | about the trial. 50 | 51 | Args: 52 | policy: Policy to to used in the trial. 53 | condition (int): Which condition setup to run. 54 | verbose (boolean): Whether or not to plot the trial (not used here). 55 | save (boolean): Whether or not to store the trial into the samples. 56 | noisy (boolean): Whether or not to use noise during sampling. 57 | """ 58 | if t_length == None: 59 | t_length = self.T 60 | self._worlds[condition].reset_world() 61 | self._worlds[condition].run(ltorun=ltorun) 62 | state = self._worlds[condition].get_state() 63 | new_sample = self._init_sample(self._worlds[condition].get_state()) 64 | #self._set_sample(new_sample, self._worlds[condition].get_state(), t=0) 65 | new_sample.trajectory.append(self._worlds[condition].fbest) 66 | U = np.zeros([t_length, self.dU]) 67 | if noisy: 68 | noise = np.random.randn(t_length, self.dU) 69 | else: 70 | noise = np.zeros((t_length, self.dU)) 71 | policy.reset() # To support non-Markovian policies 72 | for t in range(t_length): 73 | es = self._worlds[condition].es 74 | f_vals = self._worlds[condition].func_values 75 | obs_t = new_sample.get_obs(t=t) 76 | X_t = self.get_vectorized_state(self._worlds[condition].get_state(), condition) 77 | if np.any(np.isnan(X_t)): 78 | print("X_t: %s" % X_t) 79 | if ltorun and t < guided_steps * t_length and start_policy != None: 80 | U[t,:] = start_policy.act(es, f_vals, obs_t, t, noise[t,:]) 81 | else: 82 | U[t, :] = policy.act(X_t, obs_t, t, noise[t, :],es, f_vals) 83 | if (t+1) < t_length: 84 | next_action = U[t, :] #* es.sigma 85 | self._worlds[condition].run_next(next_action) 86 | self._set_sample(new_sample, self._worlds[condition].get_state(), t) 87 | new_sample.trajectory.append(self._worlds[condition].fbest) 88 | new_sample.set(ACTION, U) 89 | policy.finalize() 90 | if save: 91 | self._samples[condition].append(new_sample) 92 | return new_sample 93 | 94 | def _init_sample(self, init_X): 95 | """ 96 | Construct a new sample and fill in the first time step. 97 | """ 98 | sample = Sample(self) 99 | self._set_sample(sample, init_X, -1) 100 | return sample 101 | 102 | def _set_sample(self, sample, X, t): 103 | for sensor in X.keys(): 104 | sample.set(sensor, np.array(X[sensor]), t=t+1) 105 | -------------------------------------------------------------------------------- /source/gps/agent/lto/cmaes_world.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from collections import deque 3 | from cma.evolution_strategy import CMAEvolutionStrategy, CMAOptions 4 | from gps.proto.gps_pb2 import CUR_LOC, PAST_OBJ_VAL_DELTAS, CUR_PS, CUR_SIGMA, PAST_LOC_DELTAS, PAST_SIGMA 5 | import threading 6 | import concurrent.futures 7 | 8 | def _norm(x): return np.sqrt(np.sum(np.square(x))) 9 | class CMAESWorld(object): 10 | def __init__(self, dim, init_loc, init_sigma, init_popsize, history_len, fcn=None, hpolib=False, benchmark=None): 11 | if fcn is not None: 12 | self.fcn = fcn 13 | else: 14 | self.fcn = None 15 | self.hpolib = hpolib 16 | self.benchmark = benchmark 17 | self.b = None 18 | self.bounds = [None, None] 19 | self.dim = dim 20 | self.init_loc = init_loc 21 | self.init_sigma = init_sigma 22 | self.init_popsize = init_popsize 23 | self.fbest = None 24 | self.history_len = history_len 25 | self.past_locs = deque(maxlen=history_len) 26 | self.past_obj_vals = deque(maxlen=history_len) 27 | self.past_sigma = deque(maxlen=history_len) 28 | self.solutions = None 29 | self.func_values = [] 30 | self.f_vals = deque(maxlen=self.init_popsize) 31 | self.lock = threading.Lock() 32 | self.chi_N = dim**0.5 * (1 - 1. / (4.*dim) + 1. / (21.*dim**2)) 33 | 34 | 35 | def run(self, batch_size="all", ltorun=False): 36 | """Initiates the first time step""" 37 | #self.fcn.new_sample(batch_size=batch_size) 38 | self.cur_loc = self.init_loc 39 | self.cur_sigma = self.init_sigma 40 | self.cur_ps = 0 41 | self.es = CMAEvolutionStrategy(self.cur_loc, self.init_sigma, {'popsize': self.init_popsize, 'bounds': self.bounds}) 42 | self.solutions, self.func_values = self.es.ask_and_eval(self.fcn) 43 | self.fbest = self.func_values[np.argmin(self.func_values)] 44 | self.cur_obj_val = self.fbest 45 | self.f_difference = np.abs(np.amax(self.func_values) - self.cur_obj_val)/float(self.cur_obj_val) 46 | self.velocity = np.abs(np.amin(self.func_values) - self.cur_obj_val)/float(self.cur_obj_val) 47 | self.es.mean_old = self.es.mean 48 | self.past_locs.append([self.f_difference, self.velocity]) 49 | 50 | # action is of shape (dU,) 51 | def run_next(self, action): 52 | self.past_locs.append([self.f_difference, self.velocity]) 53 | if not self.es.stop(): 54 | """Moves forward in time one step""" 55 | sigma = action 56 | self.es.tell(self.solutions, self.func_values) 57 | self.es.sigma = min(max(sigma, 0.05), 10) 58 | self.solutions, self.func_values = self.es.ask_and_eval(self.fcn) 59 | 60 | self.f_difference = np.nan_to_num(np.abs(np.amax(self.func_values) - self.cur_obj_val)/float(self.cur_obj_val)) 61 | self.velocity = np.nan_to_num(np.abs(np.amin(self.func_values) - self.cur_obj_val)/float(self.cur_obj_val)) 62 | self.fbest = min(self.es.best.f, np.amin(self.func_values)) 63 | 64 | self.past_obj_vals.append(self.cur_obj_val) 65 | self.past_sigma.append(self.cur_sigma) 66 | self.cur_ps = _norm(self.es.adapt_sigma.ps) / self.chi_N - 1 67 | self.cur_loc = self.es.best.x 68 | self.cur_sigma = self.es.sigma 69 | self.cur_obj_val = self.es.best.f 70 | 71 | def reset_world(self): 72 | self.past_locs.clear() 73 | self.past_obj_vals.clear() 74 | self.past_sigma.clear() 75 | self.cur_loc = self.init_loc 76 | self.cur_sigma = self.init_sigma 77 | self.cur_ps = 0 78 | self.func_values = [] 79 | 80 | 81 | def get_state(self): 82 | past_obj_val_deltas = [] 83 | for i in range(1,len(self.past_obj_vals)): 84 | past_obj_val_deltas.append((self.past_obj_vals[i] - self.past_obj_vals[i-1]+1e-3) / float(self.past_obj_vals[i-1])) 85 | if len(self.past_obj_vals) > 0: 86 | past_obj_val_deltas.append((self.cur_obj_val - self.past_obj_vals[-1]+1e-3)/ float(self.past_obj_vals[-1])) 87 | past_obj_val_deltas = np.array(past_obj_val_deltas).reshape(-1) 88 | 89 | past_loc_deltas = [] 90 | for i in range(len(self.past_locs)): 91 | past_loc_deltas.append(self.past_locs[i]) 92 | past_loc_deltas = np.array(past_loc_deltas).reshape(-1) 93 | past_sigma_deltas = [] 94 | for i in range(len(self.past_sigma)): 95 | past_sigma_deltas.append(self.past_sigma[i]) 96 | past_sigma_deltas = np.array(past_sigma_deltas).reshape(-1) 97 | past_obj_val_deltas = np.hstack((np.zeros((self.history_len-past_obj_val_deltas.shape[0],)), past_obj_val_deltas)) 98 | past_loc_deltas = np.hstack((np.zeros((self.history_len*2-past_loc_deltas.shape[0],)), past_loc_deltas)) 99 | past_sigma_deltas = np.hstack((np.zeros((self.history_len-past_sigma_deltas.shape[0],)), past_sigma_deltas)) 100 | 101 | cur_loc = self.cur_loc 102 | cur_ps = self.cur_ps 103 | cur_sigma = self.cur_sigma 104 | 105 | state = {CUR_LOC: cur_loc, 106 | PAST_OBJ_VAL_DELTAS: past_obj_val_deltas, 107 | CUR_PS: cur_ps, 108 | CUR_SIGMA: cur_sigma, 109 | PAST_LOC_DELTAS: past_loc_deltas, 110 | PAST_SIGMA: past_sigma_deltas 111 | } 112 | return state 113 | 114 | -------------------------------------------------------------------------------- /source/gps/algorithm/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/LTO-CMA/6f496999ce26a7f144b42dc98a88833bde4ce39b/source/gps/algorithm/__init__.py -------------------------------------------------------------------------------- /source/gps/algorithm/algorithm_utils.py: -------------------------------------------------------------------------------- 1 | """ This file defines utility classes and functions for algorithms. """ 2 | import numpy as np 3 | 4 | from gps.utility.general_utils import BundleType 5 | from gps.algorithm.policy.lin_gauss_policy import LinearGaussianPolicy 6 | 7 | 8 | class IterationData(BundleType): 9 | """ Collection of iteration variables. """ 10 | def __init__(self): 11 | variables = { 12 | 'sample_list': None, # List of samples for the current iteration. 13 | 'traj_info': None, # Current TrajectoryInfo object. 14 | 'pol_info': None, # Current PolicyInfo object. 15 | 'traj_distr': None, # Initial trajectory distribution. 16 | 'new_traj_distr': None, # Updated trajectory distribution. 17 | 'cs': None, # Sample costs of the current iteration. 18 | 'step_mult': 1.0, # KL step multiplier for the current iteration. 19 | 'eta': 1.0, # Dual variable used in LQR backward pass. 20 | } 21 | BundleType.__init__(self, variables) 22 | 23 | 24 | class TrajectoryInfo(BundleType): 25 | """ Collection of trajectory-related variables. """ 26 | def __init__(self): 27 | variables = { 28 | 'dynamics': None, # Dynamics object for the current iteration. 29 | 'x0mu': None, # Mean for the initial state, used by the dynamics. 30 | 'x0sigma': None, # Covariance for the initial state distribution. 31 | 'cc': None, # Cost estimate constant term. 32 | 'cv': None, # Cost estimate vector term. 33 | 'Cm': None, # Cost estimate matrix term. 34 | 'last_kl_step': float('inf'), # KL step of the previous iteration. 35 | } 36 | BundleType.__init__(self, variables) 37 | 38 | 39 | class PolicyInfo(BundleType): 40 | """ Collection of policy-related variables. """ 41 | def __init__(self, hyperparams): 42 | T, dU, dX = hyperparams['T'], hyperparams['dU'], hyperparams['dX'] 43 | variables = { 44 | 'lambda_k': np.zeros((T, dU)), # Dual variables. 45 | 'lambda_K': np.zeros((T, dU, dX)), # Dual variables. 46 | 'pol_wt': hyperparams['init_pol_wt'] * np.ones(T), # Policy weight. 47 | #'pol_mu': None, # Mean of the current policy output. 48 | #'pol_sig': None, # Covariance of the current policy output. 49 | 'pol_K': np.zeros((T, dU, dX)), # Policy linearization. 50 | 'pol_k': np.zeros((T, dU)), # Policy linearization. 51 | 'pol_S': np.zeros((T, dU, dU)), # Policy linearization covariance. 52 | 'chol_pol_S': np.zeros((T, dU, dU)), # Cholesky decomp of covar. 53 | 'prev_kl': None, # Previous KL divergence. 54 | 'init_kl': None, # The initial KL divergence, before the iteration. 55 | 'policy_samples': [], # List of current policy samples. 56 | 'policy_prior': None, # Current prior for policy linearization. 57 | } 58 | BundleType.__init__(self, variables) 59 | 60 | def traj_distr(self): 61 | """ Create a trajectory distribution object from policy info. """ 62 | T, dU, dX = self.pol_K.shape 63 | # Compute inverse policy covariances. 64 | inv_pol_S = np.empty_like(self.chol_pol_S) 65 | for t in range(T): 66 | inv_pol_S[t, :, :] = np.linalg.solve( 67 | self.chol_pol_S[t, :, :], 68 | np.linalg.solve(self.chol_pol_S[t, :, :].T, np.eye(dU)) 69 | ) 70 | return LinearGaussianPolicy(self.pol_K, self.pol_k, self.pol_S, 71 | self.chol_pol_S, inv_pol_S) 72 | 73 | 74 | def estimate_moments(X, mu, covar): 75 | """ Estimate the moments for a given linearized policy. """ 76 | N, T, dX = X.shape 77 | dU = mu.shape[-1] 78 | if len(covar.shape) == 3: 79 | covar = np.tile(covar, [N, 1, 1, 1]) 80 | Xmu = np.concatenate([X, mu], axis=2) 81 | ev = np.mean(Xmu, axis=0) 82 | em = np.zeros((N, T, dX+dU, dX+dU)) 83 | pad1 = np.zeros((dX, dX+dU)) 84 | pad2 = np.zeros((dU, dX)) 85 | for n in range(N): 86 | for t in range(T): 87 | covar_pad = np.vstack([pad1, np.hstack([pad2, covar[n, t, :, :]])]) 88 | em[n, t, :, :] = np.outer(Xmu[n, t, :], Xmu[n, t, :]) + covar_pad 89 | return ev, em 90 | 91 | 92 | def gauss_fit_joint_prior(pts, mu0, Phi, m, n0, dwts, dX, dU, sig_reg, clipping_thresh = None): 93 | """ Perform Gaussian fit to data with a prior. """ 94 | # Build weights matrix. 95 | #D = np.diag(dwts) 96 | # Compute empirical mean and covariance. 97 | mun = np.sum((pts.T * dwts).T, axis=0) 98 | diff = pts - mun 99 | #empsig = diff.T.dot(D).dot(diff) 100 | empsig = (diff.T * dwts).dot(diff) 101 | empsig = 0.5 * (empsig + empsig.T) 102 | # MAP estimate of joint distribution. 103 | N = dwts.shape[0] 104 | mu = mun 105 | sigma = (N * empsig + Phi + (N * m) / (N + m) * 106 | np.outer(mun - mu0, mun - mu0)) / (N + n0) 107 | sigma = 0.5 * (sigma + sigma.T) 108 | # Add sigma regularization. 109 | sigma += sig_reg 110 | # Conditioning to get dynamics. 111 | fd = np.linalg.solve(sigma[:dX, :dX], sigma[:dX, dX:dX+dU]).T 112 | ori_fd = fd 113 | if clipping_thresh is not None: 114 | fd = np.maximum(np.minimum(fd, clipping_thresh), -clipping_thresh) 115 | fc = mu[dX:dX+dU] - fd.dot(mu[:dX]) 116 | #dynsig = sigma[dX:dX+dU, dX:dX+dU] - ori_fd.dot(sigma[:dX, :dX]).dot(ori_fd.T) 117 | dynsig = sigma[dX:dX+dU, dX:dX+dU] - ori_fd.dot(sigma[:dX, dX:dX+dU]) # Mathematically equivalent to the above 118 | dynsig = 0.5 * (dynsig + dynsig.T) 119 | return fd, fc, dynsig 120 | -------------------------------------------------------------------------------- /source/gps/algorithm/config.py: -------------------------------------------------------------------------------- 1 | """ Default configuration and hyperparameter values for algorithms. """ 2 | 3 | # Algorithm 4 | ALG = { 5 | 'inner_iterations': 4, 6 | 'min_eta': 1e-5, # Minimum initial lagrange multiplier in DGD for 7 | # trajectory optimization. 8 | 'kl_step':0.2, 9 | 'min_step_mult':0.01, 10 | 'max_step_mult':10.0, 11 | # Trajectory settings. 12 | 'initial_state_var':1e-6, 13 | 'init_traj_distr': None, # A function that takes in two arguments, agent and cond, and returns a policy 14 | # Trajectory optimization. 15 | 'traj_opt': None, 16 | # Dynamics hyperaparams. 17 | 'dynamics': None, 18 | # Costs. 19 | 'cost': None, # A list of Cost objects for each condition. 20 | 'sample_on_policy': False, 21 | 22 | 'policy_dual_rate': 0.1, 23 | 'policy_dual_rate_covar': 0.0, 24 | 'fixed_lg_step': 0, 25 | 'lg_step_schedule': 10.0, 26 | 'ent_reg_schedule': 0.0, 27 | 'init_pol_wt': 0.01, 28 | 'policy_sample_mode': 'add', 29 | 'exp_step_increase': 2.0, 30 | 'exp_step_decrease': 0.5, 31 | 'exp_step_upper': 0.5, 32 | 'exp_step_lower': 1.0 33 | } 34 | -------------------------------------------------------------------------------- /source/gps/algorithm/cost/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/LTO-CMA/6f496999ce26a7f144b42dc98a88833bde4ce39b/source/gps/algorithm/cost/__init__.py -------------------------------------------------------------------------------- /source/gps/algorithm/cost/config.py: -------------------------------------------------------------------------------- 1 | """ Default configuration and hyperparameter values for costs. """ 2 | import numpy as np 3 | 4 | from gps.algorithm.cost.cost_utils import RAMP_CONSTANT 5 | 6 | COST = { 7 | 'ramp_option': RAMP_CONSTANT, # How target cost ramps over time. 8 | 'wp_final_multiplier': 1.0, # Weight multiplier on final time step. 9 | 'weight': 1.0 10 | } 11 | -------------------------------------------------------------------------------- /source/gps/algorithm/cost/cost.py: -------------------------------------------------------------------------------- 1 | import copy 2 | 3 | import numpy as np 4 | 5 | from gps.algorithm.cost.config import COST 6 | from gps.algorithm.cost.cost_utils import get_ramp_multiplier 7 | 8 | from gps.proto.gps_pb2 import CUR_LOC 9 | 10 | class Cost(object): 11 | def __init__(self, hyperparams): 12 | config = copy.deepcopy(COST) 13 | config.update(hyperparams) 14 | self._hyperparams = config 15 | # Used by _eval_cost in algorithm.py 16 | self.weight = self._hyperparams['weight'] 17 | self.cur_cond_idx = self._hyperparams['cur_cond_idx'] 18 | 19 | def eval(self, sample, obj_val_only = False): 20 | """ 21 | Evaluate cost function and derivatives on a sample. 22 | Args: 23 | sample: A single sample 24 | """ 25 | T = sample.T 26 | Du = sample.dU 27 | Dx = sample.dX 28 | 29 | # cur_fcn = sample.agent.fcns[self.cur_cond_idx]['fcn_obj'] 30 | 31 | final_l = np.zeros(T) 32 | 33 | if not obj_val_only: 34 | final_lu = np.zeros((T, Du)) 35 | final_lx = np.zeros((T, Dx)) 36 | final_luu = np.zeros((T, Du, Du)) 37 | final_lxx = np.zeros((T, Dx, Dx)) 38 | final_lux = np.zeros((T, Du, Dx)) 39 | 40 | x = sample.get(CUR_LOC) 41 | _, dim = x.shape 42 | 43 | # Time step-specific weights 44 | wpm = get_ramp_multiplier( 45 | self._hyperparams['ramp_option'], T, 46 | wp_final_multiplier=self._hyperparams['wp_final_multiplier'], 47 | wp_custom=self._hyperparams['wp_custom'] if 'wp_custom' in self._hyperparams else None 48 | ) 49 | 50 | if not obj_val_only: 51 | ls = np.empty((T, dim)) 52 | lss = np.empty((T, dim, dim)) 53 | 54 | #cur_fcn.new_sample(batch_size="all") # Get noiseless gradient 55 | for t in range(T): 56 | final_l[t] = sample.trajectory[t] # cur_fcn.evaluate(x[t,:]) 57 | # if not obj_val_only: 58 | # ls[t,:] = cur_fcn.grad(x[t,:][:,None])[:,0] 59 | # lss[t,:,:] = cur_fcn.hess(x[t,:][:,None]) 60 | 61 | final_l = final_l * wpm 62 | 63 | # if not obj_val_only: 64 | # ls = ls * wpm[:,None] 65 | # lss = lss * wpm[:,None,None] 66 | 67 | # Equivalent to final_lx[:,sensor_start_idx:sensor_end_idx] = ls 68 | #sample.agent.pack_data_x(final_lx, ls, data_types=[CUR_LOC]) 69 | # Equivalent to final_lxx[:,sensor_start_idx:sensor_end_idx,sensor_start_idx:sensor_end_idx] = lss 70 | #sample.agent.pack_data_x(final_lxx, lss, data_types=[CUR_LOC, CUR_LOC]) 71 | 72 | if obj_val_only: 73 | return (final_l,) 74 | else: 75 | return final_l, final_lx, final_lu, final_lxx, final_luu, final_lux 76 | -------------------------------------------------------------------------------- /source/gps/algorithm/cost/cost_utils.py: -------------------------------------------------------------------------------- 1 | """ This file defines utility classes and functions for costs. """ 2 | import numpy as np 3 | 4 | RAMP_CONSTANT = 1 5 | RAMP_LINEAR = 2 6 | RAMP_QUADRATIC = 3 7 | RAMP_FINAL_ONLY = 4 8 | RAMP_CUSTOM = 5 9 | 10 | def get_ramp_multiplier(ramp_option, T, wp_final_multiplier=1.0, wp_custom=None): 11 | """ 12 | Return a time-varying multiplier. 13 | Returns: 14 | A (T,) float vector containing weights for each time step. 15 | """ 16 | if ramp_option == RAMP_CONSTANT: 17 | wpm = np.ones(T) 18 | elif ramp_option == RAMP_LINEAR: 19 | wpm = (np.arange(T, dtype=np.float32) + 1) / T 20 | elif ramp_option == RAMP_QUADRATIC: 21 | wpm = ((np.arange(T, dtype=np.float32) + 1) / T) ** 2 22 | elif ramp_option == RAMP_FINAL_ONLY: 23 | wpm = np.zeros(T) 24 | wpm[T-1] = 1.0 25 | elif ramp_option == RAMP_CUSTOM: 26 | assert(wp_custom is not None) 27 | wpm = wp_custom 28 | else: 29 | raise ValueError('Unknown cost ramp requested!') 30 | wpm[-1] *= wp_final_multiplier 31 | return wpm 32 | 33 | -------------------------------------------------------------------------------- /source/gps/algorithm/dynamics/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/LTO-CMA/6f496999ce26a7f144b42dc98a88833bde4ce39b/source/gps/algorithm/dynamics/__init__.py -------------------------------------------------------------------------------- /source/gps/algorithm/dynamics/config.py: -------------------------------------------------------------------------------- 1 | """ Default configuration and hyperparameter values for dynamics objects. """ 2 | 3 | # DynamicsPriorGMM 4 | DYN_PRIOR_GMM = { 5 | 'min_samples_per_cluster': 20, 6 | 'max_clusters': 50, 7 | 'max_samples': 20, 8 | 'strength': 1.0, 9 | } 10 | -------------------------------------------------------------------------------- /source/gps/algorithm/dynamics/dynamics_lr_prior.py: -------------------------------------------------------------------------------- 1 | """ This file defines linear regression with an arbitrary prior. """ 2 | import numpy as np 3 | 4 | from gps.algorithm.algorithm_utils import gauss_fit_joint_prior 5 | 6 | class DynamicsLRPrior(object): 7 | """ Dynamics with linear regression, with arbitrary prior. """ 8 | def __init__(self, hyperparams): 9 | self._hyperparams = hyperparams 10 | 11 | # Fitted dynamics: x_t+1 = Fm * [x_t;u_t] + fv. 12 | self.Fm = np.array(np.nan) 13 | self.fv = np.array(np.nan) 14 | self.dyn_covar = np.array(np.nan) # Covariance. 15 | 16 | self.Fm = None 17 | self.fv = None 18 | self.dyn_covar = None 19 | self.prior = \ 20 | self._hyperparams['prior']['type'](self._hyperparams['prior']) 21 | 22 | def update_prior(self, samples): 23 | """ Update dynamics prior. """ 24 | X = samples.get_X() 25 | U = samples.get_U() 26 | self.prior.update(X, U) 27 | 28 | def get_prior(self): 29 | """ Return the dynamics prior. """ 30 | return self.prior 31 | 32 | def fit(self, X, U): 33 | """ Fit dynamics. """ 34 | N, T, dX = X.shape 35 | dU = U.shape[2] 36 | 37 | if N == 1: 38 | raise ValueError("Cannot fit dynamics on 1 sample") 39 | 40 | self.Fm = np.zeros([T, dX, dX+dU]) 41 | self.fv = np.zeros([T, dX]) 42 | self.dyn_covar = np.zeros([T, dX, dX]) 43 | 44 | it = slice(dX+dU) 45 | ip = slice(dX+dU, dX+dU+dX) 46 | # Fit dynamics with least squares regression. 47 | dwts = (1.0 / N) * np.ones(N) 48 | for t in range(T - 1): 49 | Ys = np.c_[X[:, t, :], U[:, t, :], X[:, t+1, :]] 50 | # Obtain Normal-inverse-Wishart prior. 51 | mu0, Phi, mm, n0 = self.prior.eval(dX, dU, Ys) 52 | sig_reg = np.zeros((dX+dU+dX, dX+dU+dX)) 53 | sig_reg[it, it] = self._hyperparams['regularization']*np.eye(dX+dU) 54 | Fm, fv, dyn_covar = gauss_fit_joint_prior(Ys, 55 | mu0, Phi, mm, n0, dwts, dX+dU, dX, sig_reg, self._hyperparams['clipping_thresh']) 56 | self.Fm[t, :, :] = Fm 57 | self.fv[t, :] = fv 58 | # Fm * [x; u] + fv gives the predicted state 59 | self.dyn_covar[t, :, :] = dyn_covar 60 | return self.Fm, self.fv, self.dyn_covar 61 | 62 | def copy(self): 63 | """ Return a copy of the dynamics estimate. """ 64 | dyn = type(self)(self._hyperparams) 65 | dyn.Fm = np.copy(self.Fm) 66 | dyn.fv = np.copy(self.fv) 67 | dyn.dyn_covar = np.copy(self.dyn_covar) 68 | return dyn 69 | -------------------------------------------------------------------------------- /source/gps/algorithm/dynamics/dynamics_prior_gmm.py: -------------------------------------------------------------------------------- 1 | """ This file defines the GMM prior for dynamics estimation. """ 2 | import copy 3 | import logging 4 | 5 | import numpy as np 6 | 7 | from gps.algorithm.dynamics.config import DYN_PRIOR_GMM 8 | from gps.utility.gmm import GMM 9 | 10 | 11 | LOGGER = logging.getLogger(__name__) 12 | 13 | 14 | class DynamicsPriorGMM(object): 15 | """ 16 | A dynamics prior encoded as a GMM over [x_t, u_t, x_t+1] points. 17 | See: 18 | S. Levine*, C. Finn*, T. Darrell, P. Abbeel, "End-to-end 19 | training of Deep Visuomotor Policies", arXiv:1504.00702, 20 | Appendix A.3. 21 | """ 22 | def __init__(self, hyperparams): 23 | """ 24 | Hyperparameters: 25 | min_samples_per_cluster: Minimum samples per cluster. 26 | max_clusters: Maximum number of clusters to fit. 27 | max_samples: Maximum number of trajectories to use for 28 | fitting the GMM at any given time. 29 | strength: Adjusts the strength of the prior. 30 | """ 31 | config = copy.deepcopy(DYN_PRIOR_GMM) 32 | config.update(hyperparams) 33 | self._hyperparams = config 34 | self.X = None 35 | self.U = None 36 | self.gmm = GMM() 37 | self._min_samp = self._hyperparams['min_samples_per_cluster'] 38 | self._max_samples = self._hyperparams['max_samples'] 39 | self._max_clusters = self._hyperparams['max_clusters'] 40 | self._strength = self._hyperparams['strength'] 41 | 42 | def initial_state(self): 43 | """ Return dynamics prior for initial time step. """ 44 | # Compute mean and covariance. 45 | mu0 = np.mean(self.X[:, 0, :], axis=0) 46 | Phi = np.diag(np.var(self.X[:, 0, :], axis=0)) 47 | 48 | # Factor in multiplier. 49 | n0 = self.X.shape[2] * self._strength 50 | m = self.X.shape[2] * self._strength 51 | 52 | # Multiply Phi by m (since it was normalized before). 53 | Phi = Phi * m 54 | return mu0, Phi, m, n0 55 | 56 | def update(self, X, U): 57 | """ 58 | Update prior with additional data. 59 | Args: 60 | X: A N x T x dX matrix of sequential state data. 61 | U: A N x T x dU matrix of sequential control data. 62 | """ 63 | # Constants. 64 | T = X.shape[1] - 1 65 | 66 | # Append data to dataset. 67 | if self.X is None: 68 | self.X = X 69 | else: 70 | self.X = np.concatenate([self.X, X], axis=0) 71 | 72 | if self.U is None: 73 | self.U = U 74 | else: 75 | self.U = np.concatenate([self.U, U], axis=0) 76 | 77 | # Remove excess samples from dataset. 78 | start = max(0, self.X.shape[0] - self._max_samples + 1) 79 | self.X = self.X[start:, :] 80 | self.U = self.U[start:, :] 81 | 82 | # Compute cluster dimensionality. 83 | Do = X.shape[2] + U.shape[2] + X.shape[2] 84 | 85 | # Create dataset. 86 | N = self.X.shape[0] 87 | xux = np.reshape( 88 | np.c_[self.X[:, :T, :], self.U[:, :T, :], self.X[:, 1:(T+1), :]], 89 | [T * N, Do] 90 | ) 91 | 92 | # Choose number of clusters. 93 | K = int(max(2, min(self._max_clusters, 94 | np.floor(float(N * T) / self._min_samp)))) 95 | LOGGER.debug('Generating %d clusters for dynamics GMM.', K) 96 | 97 | # Update GMM. 98 | self.gmm.update(xux, K) 99 | 100 | def eval(self, Dx, Du, pts): 101 | """ 102 | Evaluate prior. 103 | Args: 104 | pts: A N x Dx+Du+Dx matrix. 105 | """ 106 | # Construct query data point by rearranging entries and adding 107 | # in reference. 108 | assert pts.shape[1] == Dx + Du + Dx 109 | 110 | # Perform query and fix mean. 111 | mu0, Phi, m, n0 = self.gmm.inference(pts) 112 | 113 | # Factor in multiplier. 114 | n0 = n0 * self._strength 115 | m = m * self._strength 116 | 117 | # Multiply Phi by m (since it was normalized before). 118 | Phi *= m 119 | return mu0, Phi, m, n0 120 | -------------------------------------------------------------------------------- /source/gps/algorithm/policy/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/LTO-CMA/6f496999ce26a7f144b42dc98a88833bde4ce39b/source/gps/algorithm/policy/__init__.py -------------------------------------------------------------------------------- /source/gps/algorithm/policy/config.py: -------------------------------------------------------------------------------- 1 | """ Default configuration and hyperparameter values for policies. """ 2 | INIT_LG = { 3 | 'init_var': 1.0, 4 | 'verbose': False 5 | } 6 | 7 | # PolicyPriorGMM 8 | POLICY_PRIOR_GMM = { 9 | 'min_samples_per_cluster': 20, 10 | 'max_clusters': 50, 11 | 'max_samples': 20, 12 | 'strength': 1.0, 13 | } 14 | -------------------------------------------------------------------------------- /source/gps/algorithm/policy/constant_policy.py: -------------------------------------------------------------------------------- 1 | """ This file defines the linear Gaussian policy class. """ 2 | import numpy as np 3 | from cma.sigma_adaptation import CMAAdaptSigmaCSA 4 | from gps.algorithm.policy.policy import Policy 5 | from gps.utility.general_utils import check_shape 6 | 7 | 8 | class ConstantPolicy(Policy): 9 | """ 10 | Constant policy 11 | Important for RL learning ability check 12 | """ 13 | def __init__(self, const=0.5): 14 | Policy.__init__(self) 15 | self.const = const 16 | self.adapt_sigma = CMAAdaptSigmaCSA() 17 | 18 | def act(self, x, obs, t, noise, es, f_vals): 19 | """ 20 | Return an action for a state. 21 | Args: 22 | x: State vector. 23 | obs: Observation vector. 24 | t: Time step. 25 | noise: Action noise. This will be scaled by the variance. 26 | """ 27 | if self.adapt_sigma is None: 28 | self.adapt_sigma = CMAAdaptSigmaCSA() 29 | self.adapt_sigma.sigma = es.sigma 30 | hsig = es.adapt_sigma.hsig(es) 31 | es.hsig = hsig 32 | es.adapt_sigma.update2(es, function_values=f_vals) 33 | u = self.const 34 | return u 35 | 36 | 37 | -------------------------------------------------------------------------------- /source/gps/algorithm/policy/csa_policy.py: -------------------------------------------------------------------------------- 1 | """ This file defines the linear Gaussian policy class. """ 2 | import numpy as np 3 | from cma.sigma_adaptation import CMAAdaptSigmaCSA 4 | from gps.algorithm.policy.policy import Policy 5 | 6 | class CSAPolicy(Policy): 7 | """ 8 | Time-varying linear Gaussian policy. 9 | U = CSA(sigma, ps, chiN)+ noise, where noise ~ N(0, chol_pol_covar) 10 | """ 11 | def __init__(self, T=50): 12 | Policy.__init__(self) 13 | 14 | self.teacher = 0 #np.random.choice([0,1]) 15 | self.T = T 16 | self.adapt_sigma = CMAAdaptSigmaCSA() 17 | 18 | def act(self, x, obs, t, noise, es, f_vals): 19 | """ 20 | Return an action for a state. 21 | Args: 22 | x: State vector. 23 | obs: Observation vector. 24 | t: Time step. 25 | noise: Action noise. This will be scaled by the variance. 26 | """ 27 | #if self.adapt_sigma is None: 28 | # self.adapt_sigma = CMAAdaptSigmaCSA() 29 | 30 | #self.adapt_sigma.sigma = es.sigma 31 | u = es.sigma 32 | hsig = es.adapt_sigma.hsig(es) 33 | es.hsig = hsig 34 | #if self.teacher == 0 or t == 0 : 35 | delta = es.adapt_sigma.update2(es, function_values=f_vals) 36 | #else: 37 | # delta = self.init_sigma 38 | u *= delta 39 | #if t == 0: 40 | # self.init_sigma = delta 41 | return u 42 | -------------------------------------------------------------------------------- /source/gps/algorithm/policy/lin_gauss_init.py: -------------------------------------------------------------------------------- 1 | """ Initializations for linear Gaussian controllers. """ 2 | import copy 3 | import numpy as np 4 | import scipy as sp 5 | from gps.algorithm.policy.config import INIT_LG 6 | from gps.algorithm.policy.csa_policy import CSAPolicy 7 | from gps.algorithm.policy.lin_gauss_policy import LinearGaussianPolicy 8 | from gps.agent.lto.cmaes_world import CMAESWorld 9 | 10 | def init_cmaes_controller(hyperparams, agent): 11 | 12 | config = copy.deepcopy(INIT_LG) 13 | config.update(hyperparams) 14 | 15 | dX, dU = config['dX'], config['dU'] 16 | T = config['T'] 17 | cur_cond_idx = config['cur_cond_idx'] 18 | history_len = agent.history_len 19 | fcn = agent.fcns[cur_cond_idx] 20 | popsize = agent.popsize 21 | if 'fcn_obj' in fcn: 22 | fcn_obj = fcn['fcn_obj'] 23 | else: 24 | fcn_obj = None 25 | hpolib = False 26 | if 'hpolib' in fcn: 27 | hpolib = True 28 | benchmark = None 29 | if 'benchmark' in fcn: 30 | benchmark = fcn['benchmark'] 31 | # Create new world to avoiding changing the state of the original world 32 | world = CMAESWorld(dim=fcn['dim'], init_loc=fcn['init_loc'], init_sigma=fcn['init_sigma'], init_popsize=popsize, history_len=history_len, fcn=fcn_obj, hpolib=hpolib, benchmark=benchmark) 33 | 34 | if config['verbose']: 35 | print("Finding Initial Linear Gaussian Controller") 36 | action_mean = [] 37 | action_var = [] 38 | for i in range(25): 39 | f_values=[] 40 | cur_policy = CSAPolicy(T=T) 41 | 42 | world.reset_world() 43 | world.run() 44 | for t in range(T): 45 | X_t = agent.get_vectorized_state(world.get_state(), cur_cond_idx) 46 | es = world.es 47 | f_vals = world.func_values 48 | U_t = cur_policy.act(X_t, None, t, np.zeros((dU,)), es, f_vals) 49 | world.run_next(U_t) 50 | f_values.append(U_t) 51 | action_mean.append(f_values)# np.mean(f_values, axis=0)) 52 | action_var.append(f_values)# np.mean(f_values, axis=0)) 53 | mean_actions = np.mean(action_mean, axis=0) 54 | var_actions = np.std(action_var, axis=0) 55 | np.place(var_actions, var_actions==0, config["init_var"]) 56 | Kt = np.zeros((dU, dX)) # K matrix for a single time step. 57 | 58 | kt = mean_actions.reshape((T,1)) 59 | #print("Mean actions: %s" % kt, flush=True) 60 | 61 | K = np.tile(Kt[None,:,:], (T, 1, 1)) # Controller gains matrix. 62 | k = kt 63 | PSig = var_actions.reshape((T, 1, 1)) 64 | cholPSig = np.sqrt(var_actions).reshape((T, 1, 1)) 65 | invPSig = 1./var_actions.reshape((T, 1, 1)) 66 | 67 | return LinearGaussianPolicy(K, k, PSig, cholPSig, invPSig) 68 | 69 | 70 | 71 | -------------------------------------------------------------------------------- /source/gps/algorithm/policy/lin_gauss_policy.py: -------------------------------------------------------------------------------- 1 | """ This file defines the linear Gaussian policy class. """ 2 | import numpy as np 3 | from cma.sigma_adaptation import CMAAdaptSigmaCSA 4 | from gps.algorithm.policy.policy import Policy 5 | from gps.utility.general_utils import check_shape 6 | 7 | 8 | class LinearGaussianPolicy(Policy): 9 | """ 10 | Time-varying linear Gaussian policy. 11 | U = K*x + k + noise, where noise ~ N(0, chol_pol_covar) 12 | """ 13 | def __init__(self, K, k, pol_covar, chol_pol_covar, inv_pol_covar): 14 | Policy.__init__(self) 15 | 16 | # Assume K has the correct shape, and make sure others match. 17 | self.T = K.shape[0] 18 | self.dU = K.shape[1] 19 | self.dX = K.shape[2] 20 | 21 | check_shape(k, (self.T, self.dU)) 22 | check_shape(pol_covar, (self.T, self.dU, self.dU)) 23 | check_shape(chol_pol_covar, (self.T, self.dU, self.dU)) 24 | check_shape(inv_pol_covar, (self.T, self.dU, self.dU)) 25 | 26 | self.K = K 27 | self.k = k 28 | self.pol_covar = pol_covar 29 | self.chol_pol_covar = chol_pol_covar 30 | self.inv_pol_covar = inv_pol_covar 31 | self.adapt_sigma = CMAAdaptSigmaCSA() 32 | 33 | def act(self, x, obs, t, noise, es, f_vals): 34 | """ 35 | Return an action for a state. 36 | Args: 37 | x: State vector. 38 | obs: Observation vector. 39 | t: Time step. 40 | noise: Action noise. This will be scaled by the variance. 41 | """ 42 | if self.adapt_sigma is None: 43 | self.adapt_sigma = CMAAdaptSigmaCSA() 44 | self.adapt_sigma.sigma = es.sigma 45 | hsig = es.adapt_sigma.hsig(es) 46 | es.hsig = hsig 47 | es.adapt_sigma.update2(es, function_values=f_vals) 48 | u = self.K[t].dot(x) + self.k[t] 49 | u += self.chol_pol_covar[t].T.dot(noise) 50 | return np.nan_to_num(u) 51 | 52 | def fold_k(self, noise): 53 | """ 54 | Fold noise into k. 55 | Args: 56 | noise: A T x Du noise vector with mean 0 and variance 1. 57 | Returns: 58 | k: A T x dU bias vector. 59 | """ 60 | k = np.zeros_like(self.k) 61 | for i in range(self.T): 62 | scaled_noise = self.chol_pol_covar[i].T.dot(noise[i]) 63 | k[i] = scaled_noise + self.k[i] 64 | return k 65 | 66 | def nans_like(self): 67 | """ 68 | Returns: 69 | A new linear Gaussian policy object with the same dimensions 70 | but all values filled with NaNs. 71 | """ 72 | policy = LinearGaussianPolicy( 73 | np.zeros_like(self.K), np.zeros_like(self.k), 74 | np.zeros_like(self.pol_covar), np.zeros_like(self.chol_pol_covar), 75 | np.zeros_like(self.inv_pol_covar) 76 | ) 77 | policy.K.fill(np.nan) 78 | policy.k.fill(np.nan) 79 | policy.pol_covar.fill(np.nan) 80 | policy.chol_pol_covar.fill(np.nan) 81 | policy.inv_pol_covar.fill(np.nan) 82 | return policy 83 | 84 | -------------------------------------------------------------------------------- /source/gps/algorithm/policy/policy.py: -------------------------------------------------------------------------------- 1 | """ This file defines the base class for the policy. """ 2 | import abc 3 | 4 | 5 | class Policy(object): 6 | """ Computes actions from states/observations. """ 7 | __metaclass__ = abc.ABCMeta 8 | 9 | @abc.abstractmethod 10 | def act(self, es, f_vals, obs, t, noise): 11 | """ 12 | Args: 13 | x: State vector. 14 | obs: Observation vector. 15 | t: Time step. 16 | noise: A dU-dimensional noise vector. 17 | Returns: 18 | A dU dimensional action vector. 19 | """ 20 | raise NotImplementedError("Must be implemented in subclass.") 21 | 22 | def reset(self): 23 | return 24 | 25 | # Called when done using the object - must call reset() before starting to use it again 26 | def finalize(self): 27 | return 28 | 29 | def set_meta_data(self, meta): 30 | """ 31 | Set meta data for policy (e.g., domain image, multi modal observation sizes) 32 | Args: 33 | meta: meta data. 34 | """ 35 | return 36 | -------------------------------------------------------------------------------- /source/gps/algorithm/policy/policy_prior_gmm.py: -------------------------------------------------------------------------------- 1 | """ This file defines a GMM prior for policy linearization. """ 2 | import copy 3 | import logging 4 | 5 | import numpy as np 6 | 7 | from gps.algorithm.policy.config import POLICY_PRIOR_GMM 8 | from gps.utility.gmm import GMM 9 | from gps.algorithm.algorithm_utils import gauss_fit_joint_prior 10 | 11 | 12 | LOGGER = logging.getLogger(__name__) 13 | 14 | 15 | class PolicyPriorGMM(object): 16 | """ 17 | A policy prior encoded as a GMM over [x_t, u_t] points, where u_t is 18 | the output of the policy for the given state x_t. This prior is used 19 | when computing the linearization of the policy. 20 | 21 | See the method AlgorithmBADMM._update_policy_fit, in 22 | python/gps/algorithm.algorithm_badmm.py. 23 | 24 | Also see the GMM dynamics prior, in 25 | python/gps/algorithm/dynamics/dynamics_prior_gmm.py. This is a 26 | similar GMM prior that is used for the dynamics estimate. 27 | """ 28 | def __init__(self, hyperparams): 29 | """ 30 | Hyperparameters: 31 | min_samples_per_cluster: Minimum number of samples. 32 | max_clusters: Maximum number of clusters to fit. 33 | max_samples: Maximum number of trajectories to use for 34 | fitting the GMM at any given time. 35 | strength: Adjusts the strength of the prior. 36 | """ 37 | config = copy.deepcopy(POLICY_PRIOR_GMM) 38 | config.update(hyperparams) 39 | self._hyperparams = config 40 | self.X = None 41 | self.obs = None 42 | self.gmm = GMM() 43 | self._min_samp = self._hyperparams['min_samples_per_cluster'] 44 | self._max_samples = self._hyperparams['max_samples'] 45 | self._max_clusters = self._hyperparams['max_clusters'] 46 | self._strength = self._hyperparams['strength'] 47 | self._init_sig_reg = self._hyperparams['init_regularization'] 48 | self._subsequent_sig_reg = self._hyperparams['subsequent_regularization'] 49 | 50 | def update(self, samples, policy_opt, mode='add'): 51 | """ 52 | Update GMM using new samples or policy_opt. 53 | By default does not replace old samples. 54 | 55 | Args: 56 | samples: SampleList containing new samples 57 | policy_opt: PolicyOpt containing current policy 58 | """ 59 | 60 | X, obs = samples.get_X(), samples.get_obs() 61 | if self.X is None or mode == 'replace': 62 | self.X = X 63 | self.obs = obs 64 | elif mode == 'add' and X.size > 0: 65 | self.X = np.concatenate([self.X, X], axis=0) 66 | self.obs = np.concatenate([self.obs, obs], axis=0) 67 | # Trim extra samples 68 | N = self.X.shape[0] 69 | if N > self._max_samples: 70 | start = N - self._max_samples 71 | self.X = self.X[start:, :, :] 72 | self.obs = self.obs[start:, :, :] 73 | 74 | # Evaluate policy at samples to get mean policy action. 75 | U = policy_opt.prob(self.obs,diag_var=True)[0] 76 | # Create the dataset 77 | N, T = self.X.shape[:2] 78 | dO = self.X.shape[2] + U.shape[2] 79 | XU = np.reshape(np.concatenate([self.X, U], axis=2), [T * N, dO]) 80 | # Choose number of clusters. 81 | K = int(max(2, min(self._max_clusters, 82 | np.floor(float(N * T) / self._min_samp)))) 83 | 84 | LOGGER.debug('Generating %d clusters for policy prior GMM.', K) 85 | self.gmm.update(XU, K) 86 | 87 | def eval(self, Ts, Ps): 88 | """ Evaluate prior. """ 89 | # Construct query data point. 90 | pts = np.concatenate((Ts, Ps), axis=1) 91 | # Perform query. 92 | mu0, Phi, m, n0 = self.gmm.inference(pts) 93 | # Factor in multiplier. 94 | n0 *= self._strength 95 | m *= self._strength 96 | # Multiply Phi by m (since it was normalized before). 97 | Phi *= m 98 | return mu0, Phi, m, n0 99 | 100 | def fit(self, X, pol_mu, pol_sig): 101 | """ 102 | Fit policy linearization. 103 | 104 | Args: 105 | X: Samples (N, T, dX) 106 | pol_mu: Policy means (N, T, dU) 107 | pol_sig: Policy covariance (N, T, dU) 108 | """ 109 | N, T, dX = X.shape 110 | dU = pol_mu.shape[2] 111 | if N == 1: 112 | raise ValueError("Cannot fit dynamics on 1 sample") 113 | 114 | # Collapse policy covariances. (This is only correct because 115 | # the policy doesn't depend on state). 116 | pol_sig = np.mean(pol_sig, axis=0) 117 | 118 | # Allocate. 119 | pol_K = np.zeros([T, dU, dX]) 120 | pol_k = np.zeros([T, dU]) 121 | pol_S = np.zeros([T, dU, dU]) 122 | 123 | # Fit policy linearization with least squares regression. 124 | dwts = (1.0 / N) * np.ones(N) 125 | for t in range(T): 126 | Ts = X[:, t, :] 127 | Ps = pol_mu[:, t, :] 128 | Ys = np.concatenate([Ts, Ps], axis=1) 129 | # Obtain Normal-inverse-Wishart prior. 130 | mu0, Phi, mm, n0 = self.eval(Ts, Ps) 131 | sig_reg = np.zeros((dX+dU, dX+dU)) 132 | # Slightly regularize on first timestep. 133 | if t == 0: 134 | sig_reg[:dX, :dX] = self._init_sig_reg*np.eye(dX) 135 | else: 136 | sig_reg[:dX, :dX] = self._subsequent_sig_reg*np.eye(dX) 137 | pol_K[t, :, :], pol_k[t, :], pol_S[t, :, :] = \ 138 | gauss_fit_joint_prior(Ys, 139 | mu0, Phi, mm, n0, dwts, dX, dU, sig_reg) 140 | pol_S += pol_sig 141 | return pol_K, pol_k, pol_S 142 | -------------------------------------------------------------------------------- /source/gps/algorithm/policy/tf_policy.py: -------------------------------------------------------------------------------- 1 | import pickle 2 | import os 3 | import uuid 4 | 5 | import numpy as np 6 | import tensorflow as tf 7 | 8 | from gps.algorithm.policy.policy import Policy 9 | 10 | 11 | class TfPolicy(Policy): 12 | """ 13 | A neural network policy implemented in TensorFlow. The network output is 14 | taken to be the mean, and Gaussian noise is added on top of it. 15 | U = net.forward(obs) + noise, where noise ~ N(0, diag(var)) 16 | Args: 17 | obs_tensor: tensor representing tf observation. Used in feed dict for forward pass. 18 | act_op: tf op to execute the forward pass. Use sess.run on this op. 19 | var: Du-dimensional noise variance vector. 20 | sess: tf session. 21 | device_string: tf device string for running on either gpu or cpu. 22 | """ 23 | def __init__(self, dU, obs_tensor, act_op, var, sess, device_string): 24 | Policy.__init__(self) 25 | self.dU = dU 26 | self.obs_tensor = obs_tensor 27 | self.act_op = act_op 28 | self.sess = sess 29 | self.device_string = device_string 30 | self.chol_pol_covar = np.diag(np.sqrt(var)) 31 | self.scale = None # must be set from elsewhere based on observations 32 | self.bias = None 33 | 34 | def act(self, X_t, obs, t, noise, es, f_vals): 35 | """ 36 | Return an action for a state. 37 | Args: 38 | x: State vector. 39 | obs: Observation vector. 40 | t: Time step. 41 | noise: Action noise. This will be scaled by the variance. 42 | """ 43 | 44 | # Normalize obs. 45 | if len(obs.shape) == 1: 46 | obs = np.expand_dims(obs, axis=0) 47 | obs = obs.dot(self.scale) + self.bias 48 | with tf.device(self.device_string): 49 | action_mean = self.sess.run(self.act_op, feed_dict={self.obs_tensor: obs}) 50 | #if noise is None: 51 | #u = action_mean 52 | #else: 53 | #u = action_mean + self.chol_pol_covar.T.dot(noise) 54 | #u += self.chol_pol_covar[t].T.dot(noise) 55 | u = action_mean 56 | delta = u[0] 57 | hsig = es.adapt_sigma.hsig(es) 58 | es.hsig = hsig 59 | es.adapt_sigma.update2(es, function_values=f_vals) 60 | #if delta < np.exp(-1) or delta > 1e5: 61 | # delta = 1 62 | if np.any(np.isnan(delta)): 63 | print("Action %s" % delta) 64 | action = delta 65 | 66 | return action # the DAG computations are batched by default, but we use batch size 1. 67 | 68 | def pickle_policy(self, deg_obs, deg_action, checkpoint_path, goal_state=None, should_hash=False): 69 | """ 70 | We can save just the policy if we are only interested in running forward at a later point 71 | without needing a policy optimization class. Useful for debugging and deploying. 72 | """ 73 | if should_hash is True: 74 | hash_str = str(uuid.uuid4()) 75 | checkpoint_path += hash_str 76 | pickled_pol = {'deg_obs': deg_obs, 'deg_action': deg_action, 'chol_pol_covar': self.chol_pol_covar, 77 | 'checkpoint_path_tf': checkpoint_path + '_tf_data.ckpt', 'scale': self.scale, 'bias': self.bias, 78 | 'device_string': self.device_string, 'goal_state': goal_state} 79 | pickle.dump(pickled_pol, open(checkpoint_path + '.pkl', "wb")) 80 | saver = tf.train.Saver() 81 | saver.save(self.sess, checkpoint_path + '_tf_data.ckpt') 82 | 83 | @classmethod 84 | def load_policy(cls, policy_dict_path, tf_generator, network_config=None): 85 | """ 86 | For when we only need to load a policy for the forward pass. For instance, to run on the robot from 87 | a checkpointed policy. 88 | """ 89 | from tensorflow.python.framework import ops 90 | ops.reset_default_graph() # we need to destroy the default graph before re_init or checkpoint won't restore. 91 | pol_dict = pickle.load(open(policy_dict_path, "rb")) 92 | #if 'deg_obs' in network_config: 93 | # pol_dict['deg_obs'] = network_config['deg_obs'] 94 | #if 'deg_action' in network_config: 95 | # pol_dict['deg_action'] = network_config['deg_action'] 96 | 97 | tf_map = tf_generator(dim_input=pol_dict['deg_obs'], dim_output=pol_dict['deg_action'], 98 | batch_size=1, network_config=network_config) 99 | 100 | sess = tf.Session() 101 | init_op = tf.initialize_all_variables() 102 | sess.run(init_op) 103 | saver = tf.train.Saver() 104 | check_file = '/'.join(str.split(policy_dict_path, '/')[:-1]) + '/' + str.split(pol_dict['checkpoint_path_tf'], '/')[-1] 105 | 106 | saver.restore(sess, check_file) 107 | 108 | device_string = pol_dict['device_string'] 109 | 110 | cls_init = cls(pol_dict['deg_action'], tf_map.get_input_tensor(), tf_map.get_output_op(), np.zeros((1,)), 111 | sess, device_string) 112 | cls_init.chol_pol_covar = pol_dict['chol_pol_covar'] 113 | cls_init.scale = pol_dict['scale'] 114 | cls_init.bias = pol_dict['bias'] 115 | return cls_init 116 | 117 | -------------------------------------------------------------------------------- /source/gps/algorithm/policy_opt/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/LTO-CMA/6f496999ce26a7f144b42dc98a88833bde4ce39b/source/gps/algorithm/policy_opt/__init__.py -------------------------------------------------------------------------------- /source/gps/algorithm/policy_opt/config.py: -------------------------------------------------------------------------------- 1 | """ Default configuration for policy optimization. """ 2 | import os 3 | 4 | POLICY_OPT = { 5 | # Initialization. 6 | 'init_var': 0.1, # Initial policy variance. 7 | 'ent_reg': 0.0, # Entropy regularizer. 8 | # Solver hyperparameters. 9 | 'iterations': 5000, # Number of iterations per inner iteration. 10 | 'batch_size': 25, 11 | 'lr': 0.001, # Base learning rate (by default it's fixed). 12 | 'lr_policy': 'fixed', # Learning rate policy. 13 | 'momentum': 0.9, # Momentum. 14 | 'momentum2': 0.999, 15 | 'epsilon': 1e-8, 16 | 'weight_decay': 0.005, # Weight decay. 17 | 'solver_type': 'Adam', # Solver type (e.g. 'SGD', 'Adam', etc.). 18 | # set gpu usage. 19 | 'use_gpu': 1, # Whether or not to use the GPU. 20 | 'gpu_id': 0, 21 | 'random_seed': 1 22 | } 23 | -------------------------------------------------------------------------------- /source/gps/algorithm/policy_opt/lto_model.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | from gps.algorithm.policy_opt.tf_utils import TfMap 3 | import numpy as np 4 | 5 | def init_weights(shape, name=None): 6 | return tf.Variable(tf.random_normal(shape, stddev=0.01), name=name) 7 | 8 | def init_bias(shape, name=None): 9 | return tf.Variable(tf.zeros(shape, dtype='float'), name=name) 10 | 11 | def batched_matrix_vector_multiply(vector, matrix): 12 | """ computes x^T A in mini-batches. """ 13 | vector_batch_as_matricies = tf.expand_dims(vector, [1]) 14 | mult_result = tf.matmul(vector_batch_as_matricies, matrix) 15 | squeezed_result = tf.squeeze(mult_result, [1]) 16 | return squeezed_result 17 | 18 | def get_input_layer(): 19 | """produce the placeholder inputs that are used to run ops forward and backwards. 20 | net_input: usually an observation. 21 | action: mu, the ground truth actions we're trying to learn. 22 | precision: precision matrix used to compute loss.""" 23 | net_input = tf.placeholder("float", [None, None], name='nn_input') # (N*T) x dO 24 | action = tf.placeholder('float', [None, None], name='action') # (N*T) x dU 25 | precision = tf.placeholder('float', [None, None, None], name='precision') # (N*T) x dU x dU 26 | return net_input, action, precision 27 | 28 | def get_loss_layer(mlp_out, action, precision, batch_size): 29 | """The loss layer used for the MLP network is obtained through this class.""" 30 | scale_factor = tf.constant(2*batch_size, dtype='float') 31 | uP = batched_matrix_vector_multiply(action - mlp_out, precision) 32 | uPu = tf.reduce_sum(uP*(action - mlp_out)) # this last dot product is then summed, so we just the sum all at once. 33 | return uPu/scale_factor 34 | 35 | def fully_connected_tf_network(dim_input, dim_output, batch_size=25, network_config=None): 36 | 37 | dim_hidden = network_config['dim_hidden'] + [dim_output] 38 | n_layers = len(dim_hidden) 39 | 40 | nn_input, action, precision = get_input_layer() 41 | 42 | weights = [] 43 | biases = [] 44 | in_shape = dim_input 45 | for layer_step in range(0, n_layers): 46 | cur_weight = init_weights([in_shape, dim_hidden[layer_step]], name='w_' + str(layer_step)) 47 | cur_bias = init_bias([dim_hidden[layer_step]], name='b_' + str(layer_step)) 48 | in_shape = dim_hidden[layer_step] 49 | weights.append(cur_weight) 50 | biases.append(cur_bias) 51 | 52 | cur_top = nn_input 53 | for layer_step in range(0, n_layers): 54 | if layer_step != n_layers-1: # final layer has no RELU 55 | cur_top = tf.nn.relu(tf.matmul(cur_top, weights[layer_step]) + biases[layer_step]) 56 | else: 57 | cur_top = tf.nn.relu6(tf.matmul(cur_top, weights[layer_step]) + biases[layer_step]) 58 | 59 | mlp_applied = cur_top 60 | loss_out = get_loss_layer(mlp_out=mlp_applied, action=action, precision=precision, batch_size=batch_size) 61 | 62 | return TfMap.init_from_lists([nn_input, action, precision], [mlp_applied], [loss_out]) 63 | -------------------------------------------------------------------------------- /source/gps/algorithm/policy_opt/policy_opt.py: -------------------------------------------------------------------------------- 1 | """ This file defines policy optimization for a tensorflow policy. """ 2 | import copy 3 | import logging 4 | 5 | import numpy as np 6 | 7 | # NOTE: Order of these imports matters for some reason. 8 | # Changing it can lead to segmentation faults on some machines. 9 | 10 | from gps.algorithm.policy_opt.config import POLICY_OPT 11 | import tensorflow as tf 12 | 13 | from gps.algorithm.policy.tf_policy import TfPolicy 14 | from gps.algorithm.policy_opt.tf_utils import TfSolver 15 | 16 | 17 | LOGGER = logging.getLogger(__name__) 18 | 19 | class PolicyOpt(object): 20 | """ Policy optimization using tensor flow for DAG computations/nonlinear function approximation. """ 21 | def __init__(self, hyperparams, dO, dU): 22 | config = copy.deepcopy(POLICY_OPT) 23 | config.update(hyperparams) 24 | 25 | self._hyperparams = config 26 | self._dO = dO 27 | self._dU = dU 28 | 29 | tf.set_random_seed(self._hyperparams['random_seed']) 30 | 31 | self.tf_iter = 0 32 | self.batch_size = self._hyperparams['batch_size'] 33 | self.device_string = "/cpu:0" 34 | if self._hyperparams['use_gpu'] == 1: 35 | self.gpu_device = self._hyperparams['gpu_id'] 36 | self.device_string = "/gpu:" + str(self.gpu_device) 37 | self.act_op = None # mu_hat 38 | self.loss_scalar = None 39 | self.obs_tensor = None 40 | self.precision_tensor = None 41 | self.action_tensor = None # mu true 42 | self.solver = None 43 | self.init_network() 44 | self.init_solver() 45 | self.var = self._hyperparams['init_var'] * np.ones(dU) 46 | self.sess = tf.Session() 47 | self.policy = TfPolicy(dU, self.obs_tensor, self.act_op, np.zeros(dU), self.sess, self.device_string) 48 | init_op = tf.initialize_all_variables() 49 | self.sess.run(init_op) 50 | 51 | def init_network(self): 52 | """ Helper method to initialize the tf networks used """ 53 | tf_map_generator = self._hyperparams['network_model'] 54 | tf_map = tf_map_generator(dim_input=self._dO, dim_output=self._dU, batch_size=self.batch_size, 55 | network_config=self._hyperparams['network_params']) 56 | self.obs_tensor = tf_map.get_input_tensor() 57 | self.action_tensor = tf_map.get_target_output_tensor() 58 | self.precision_tensor = tf_map.get_precision_tensor() 59 | self.act_op = tf_map.get_output_op() 60 | self.loss_scalar = tf_map.get_loss_op() 61 | 62 | def init_solver(self): 63 | """ Helper method to initialize the solver. """ 64 | self.solver = TfSolver(loss_scalar=self.loss_scalar, 65 | solver_name=self._hyperparams['solver_type'], 66 | base_lr=self._hyperparams['lr'], 67 | lr_policy=self._hyperparams['lr_policy'], 68 | momentum=self._hyperparams['momentum'], 69 | momentum2=self._hyperparams['momentum2'], 70 | epsilon=self._hyperparams['epsilon'], 71 | weight_decay=self._hyperparams['weight_decay']) 72 | 73 | def update(self, obs, tgt_mu, tgt_prc, tgt_wt): 74 | """ 75 | Update policy. 76 | Args: 77 | obs: Numpy array of observations, N x T x dO. 78 | tgt_mu: Numpy array of mean controller outputs, N x T x dU. 79 | tgt_prc: Numpy array of precision matrices, N x T x dU x dU. 80 | tgt_wt: Numpy array of weights, N x T. 81 | Returns: 82 | A tensorflow object with updated weights. 83 | """ 84 | N, T = obs.shape[:2] 85 | dU, dO = self._dU, self._dO 86 | 87 | # Renormalize weights. 88 | tgt_wt *= (float(N * T) / np.sum(tgt_wt)) 89 | # Allow weights to be at most twice the robust median. 90 | mn = np.median(tgt_wt[(tgt_wt > 1e-2).nonzero()]) 91 | for n in range(N): 92 | for t in range(T): 93 | tgt_wt[n, t] = min(tgt_wt[n, t], 2 * mn) 94 | # Robust median should be around one. 95 | tgt_wt /= mn 96 | 97 | # Reshape inputs. 98 | obs = np.reshape(obs, (N*T, dO)) 99 | tgt_mu = np.reshape(tgt_mu, (N*T, dU)) 100 | tgt_prc = np.reshape(tgt_prc, (N*T, dU, dU)) 101 | tgt_wt = np.reshape(tgt_wt, (N*T, 1, 1)) 102 | 103 | # Fold weights into tgt_prc. 104 | tgt_prc = tgt_wt * tgt_prc 105 | 106 | # Normalize obs, but only compute normalzation at the beginning. 107 | if self.policy.scale is None or self.policy.bias is None: 108 | # 1e-3 to avoid infs if some state dimensions don't change in the 109 | # first batch of samples 110 | self.policy.scale = np.diag( 111 | 1.0 / np.maximum(np.std(obs, axis=0), 1e-3)) 112 | self.policy.bias = - np.mean( 113 | obs.dot(self.policy.scale), axis=0) 114 | obs = obs.dot(self.policy.scale) + self.policy.bias 115 | 116 | # Assuming that N*T >= self.batch_size. 117 | batches_per_epoch = np.floor(N*T / self.batch_size) 118 | idx = list(range(N*T)) 119 | average_loss = 0 120 | np.random.shuffle(idx) 121 | 122 | # actual training. 123 | for i in range(self._hyperparams['iterations']): 124 | # Load in data for this batch. 125 | start_idx = int(i * self.batch_size % 126 | (batches_per_epoch * self.batch_size)) 127 | idx_i = idx[start_idx:start_idx+self.batch_size] 128 | feed_dict = {self.obs_tensor: obs[idx_i], 129 | self.action_tensor: tgt_mu[idx_i], 130 | self.precision_tensor: tgt_prc[idx_i]} 131 | train_loss = self.solver(feed_dict, self.sess) 132 | 133 | average_loss += train_loss 134 | if (i+1) % 500 == 0: 135 | LOGGER.debug('tensorflow iteration %d, average loss %f', 136 | i+1, average_loss / 500) 137 | print ('supervised tf loss is ' + str(average_loss)) 138 | average_loss = 0 139 | 140 | # Keep track of tensorflow iterations for loading solver states. 141 | self.tf_iter += self._hyperparams['iterations'] 142 | 143 | # Optimize variance. 144 | 145 | self.var = (np.sum(tgt_wt,axis=0)[:,0] - 2*N*T*self._hyperparams['ent_reg']) / np.sum(np.diagonal(tgt_prc, axis1=1, axis2=2),axis=0) 146 | 147 | self.policy.chol_pol_covar = np.diag(np.sqrt(self.var)) 148 | 149 | return self.policy 150 | 151 | def prob(self, obs, diag_var = False): 152 | """ 153 | Run policy forward. 154 | Args: 155 | obs: Numpy array of observations that is N x T x dO. 156 | """ 157 | dU = self._dU 158 | N, T = obs.shape[:2] 159 | 160 | output = np.zeros((N, T, dU)) 161 | 162 | for i in range(N): 163 | for t in range(T): 164 | # Feed in data. 165 | if self.policy.scale is not None: 166 | feed_dict = {self.obs_tensor: np.expand_dims(obs[i, t], axis=0).dot(self.policy.scale) + self.policy.bias} 167 | else: 168 | feed_dict = {self.obs_tensor: np.expand_dims(obs[i, t], axis=0)} 169 | with tf.device(self.device_string): 170 | output[i, t, :] = self.sess.run(self.act_op, feed_dict=feed_dict) 171 | 172 | if diag_var: 173 | pol_sigma = np.tile(self.var, [N, T, 1]) 174 | pol_prec = np.tile(1.0 / self.var, [N, T, 1]) 175 | pol_det_sigma = np.tile(np.prod(self.var), [N, T]) 176 | else: 177 | pol_sigma = np.tile(np.diag(self.var), [N, T, 1, 1]) 178 | pol_prec = np.tile(np.diag(1.0 / self.var), [N, T, 1, 1]) 179 | pol_det_sigma = np.tile(np.prod(self.var), [N, T]) 180 | 181 | return output, pol_sigma, pol_prec, pol_det_sigma 182 | 183 | def set_ent_reg(self, ent_reg): 184 | """ Set the entropy regularization. """ 185 | self._hyperparams['ent_reg'] = ent_reg 186 | 187 | # For pickling. 188 | def __getstate__(self): 189 | 190 | return { 191 | 'hyperparams': self._hyperparams, 192 | 'dO': self._dO, 193 | 'dU': self._dU, 194 | 'scale': self.policy.scale, 195 | 'bias': self.policy.bias, 196 | 'tf_iter': self.tf_iter, 197 | } 198 | 199 | # For unpickling. 200 | def __setstate__(self, state): 201 | self.__init__(state['hyperparams'], state['dO'], state['dU']) 202 | self.policy.scale = state['scale'] 203 | self.policy.bias = state['bias'] 204 | self.tf_iter = state['tf_iter'] 205 | 206 | -------------------------------------------------------------------------------- /source/gps/algorithm/policy_opt/tf_utils.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | 3 | def check_list_and_convert(the_object): 4 | if isinstance(the_object, list): 5 | return the_object 6 | return [the_object] 7 | 8 | 9 | class TfMap: 10 | """ a container for inputs, outputs, and loss in a tf graph. This object exists only 11 | to make well-defined the tf inputs, outputs, and losses used in the policy_opt_tf class.""" 12 | 13 | def __init__(self, input_tensor, target_output_tensor, precision_tensor, output_op, loss_op): 14 | self.input_tensor = input_tensor 15 | self.target_output_tensor = target_output_tensor 16 | self.precision_tensor = precision_tensor 17 | self.output_op = output_op 18 | self.loss_op = loss_op 19 | 20 | @classmethod 21 | def init_from_lists(cls, inputs, outputs, loss): 22 | inputs = check_list_and_convert(inputs) 23 | outputs = check_list_and_convert(outputs) 24 | loss = check_list_and_convert(loss) 25 | if len(inputs) < 3: # pad for the constructor if needed. 26 | inputs += [None]*(3 - len(inputs)) 27 | return cls(inputs[0], inputs[1], inputs[2], outputs[0], loss[0]) 28 | 29 | def get_input_tensor(self): 30 | return self.input_tensor 31 | 32 | def set_input_tensor(self, input_tensor): 33 | self.input_tensor = input_tensor 34 | 35 | def get_target_output_tensor(self): 36 | return self.target_output_tensor 37 | 38 | def set_target_output_tensor(self, target_output_tensor): 39 | self.target_output_tensor = target_output_tensor 40 | 41 | def get_precision_tensor(self): 42 | return self.precision_tensor 43 | 44 | def set_precision_tensor(self, precision_tensor): 45 | self.precision_tensor = precision_tensor 46 | 47 | def get_output_op(self): 48 | return self.output_op 49 | 50 | def set_output_op(self, output_op): 51 | self.output_op = output_op 52 | 53 | def get_loss_op(self): 54 | return self.loss_op 55 | 56 | def set_loss_op(self, loss_op): 57 | self.loss_op = loss_op 58 | 59 | 60 | class TfSolver: 61 | """ A container for holding solver hyperparams in tensorflow. Used to execute backwards pass. """ 62 | def __init__(self, loss_scalar, solver_name='adam', base_lr=None, lr_policy=None, momentum=None, momentum2=None, epsilon=None, weight_decay=None): 63 | self.base_lr = base_lr 64 | self.lr_policy = lr_policy 65 | self.momentum = momentum 66 | self.momentum2 = momentum2 67 | self.epsilon = epsilon 68 | self.solver_name = solver_name 69 | self.loss_scalar = loss_scalar 70 | if self.lr_policy != 'fixed': 71 | raise NotImplementedError('learning rate policies other than fixed are not implemented') 72 | 73 | self.weight_decay = weight_decay 74 | if weight_decay is not None: 75 | trainable_vars = tf.trainable_variables() 76 | loss_with_reg = self.loss_scalar 77 | for var in trainable_vars: 78 | loss_with_reg += self.weight_decay*tf.nn.l2_loss(var) 79 | self.loss_scalar = loss_with_reg 80 | 81 | self.solver_op = self.get_solver_op() 82 | 83 | def get_solver_op(self): 84 | solver_string = self.solver_name.lower() 85 | if solver_string == 'adam': 86 | return tf.train.AdamOptimizer(learning_rate=self.base_lr,beta1=self.momentum,beta2=self.momentum2,epsilon=self.epsilon).minimize(self.loss_scalar) 87 | elif solver_string == 'rmsprop': 88 | return tf.train.RMSPropOptimizer(learning_rate=self.base_lr,decay=self.momentum).minimize(self.loss_scalar) 89 | elif solver_string == 'momentum': 90 | return tf.train.MomentumOptimizer(learning_rate=self.base_lr,momentum=self.momentum).minimize(self.loss_scalar) 91 | elif solver_string == 'adagrad': 92 | return tf.train.AdagradOptimizer(learning_rate=self.base_lr,initial_accumulator_value=self.momentum).minimize(self.loss_scalar) 93 | elif solver_string == 'sgd': 94 | return tf.train.GradientDescentOptimizer(learning_rate=self.base_lr).minimize(self.loss_scalar) 95 | else: 96 | raise NotImplementedError("Please select a valid optimizer.") 97 | 98 | def __call__(self, feed_dict, sess, device_string="/cpu:0", additional_tensors = None): 99 | if additional_tensors is None: 100 | with tf.device(device_string): 101 | loss = sess.run([self.loss_scalar, self.solver_op], feed_dict) 102 | return loss[0] 103 | else: 104 | with tf.device(device_string): 105 | loss = sess.run([self.loss_scalar] + additional_tensors + [self.solver_op], feed_dict) 106 | return loss[:-1] 107 | -------------------------------------------------------------------------------- /source/gps/algorithm/traj_opt/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/LTO-CMA/6f496999ce26a7f144b42dc98a88833bde4ce39b/source/gps/algorithm/traj_opt/__init__.py -------------------------------------------------------------------------------- /source/gps/algorithm/traj_opt/config.py: -------------------------------------------------------------------------------- 1 | """ Default configuration for trajectory optimization. """ 2 | 3 | TRAJ_OPT = { 4 | # Dual variable updates for non-PD Q-function. 5 | 'del0': 1e-4, 6 | 'min_eta': 1e-4, 7 | 'max_eta': 1e16, 8 | } 9 | -------------------------------------------------------------------------------- /source/gps/algorithm/traj_opt/traj_opt.py: -------------------------------------------------------------------------------- 1 | """ This file defines code for iLQG-based trajectory optimization. """ 2 | import logging 3 | import copy 4 | 5 | import numpy as np 6 | from numpy.linalg import LinAlgError 7 | import scipy as sp 8 | 9 | from gps.algorithm.traj_opt.config import TRAJ_OPT 10 | from gps.algorithm.traj_opt.traj_opt_utils import traj_distr_kl, DGD_MAX_ITER 11 | 12 | LOGGER = logging.getLogger(__name__) 13 | 14 | class TrajOpt(object): 15 | """ LQR trajectory optimization """ 16 | def __init__(self, hyperparams): 17 | config = copy.deepcopy(TRAJ_OPT) 18 | config.update(hyperparams) 19 | 20 | self._hyperparams = config 21 | 22 | def update(self, m, algorithm): 23 | """ Run dual gradient decent to optimize trajectories. """ 24 | T = algorithm.T 25 | eta = algorithm.cur[m].eta 26 | step_mult = algorithm.cur[m].step_mult 27 | traj_info = algorithm.cur[m].traj_info 28 | 29 | prev_traj_distr = algorithm.cur[m].traj_distr 30 | 31 | # Set KL-divergence step size (epsilon). 32 | kl_step = T * algorithm.base_kl_step * step_mult 33 | 34 | # We assume at min_eta, kl_div > kl_step, opposite for max_eta. 35 | min_eta = self._hyperparams['min_eta'] 36 | max_eta = self._hyperparams['max_eta'] 37 | 38 | LOGGER.debug("Running DGD for trajectory %d, eta: %f", m, eta) 39 | for itr in range(DGD_MAX_ITER): 40 | LOGGER.debug("Iteration %i, bracket: (%.2e , %.2e , %.2e)", 41 | itr, min_eta, eta, max_eta) 42 | 43 | # Run fwd/bwd pass, note that eta may be updated. 44 | # NOTE: we can just ignore case when the new eta is larger. 45 | traj_distr, eta = self.backward(prev_traj_distr, traj_info, 46 | eta, algorithm, m) 47 | new_mu, new_sigma = self.forward(traj_distr, traj_info) 48 | 49 | # Compute KL divergence constraint violation. 50 | kl_div = traj_distr_kl(new_mu, new_sigma, 51 | traj_distr, prev_traj_distr) 52 | con = kl_div - kl_step 53 | 54 | # Convergence check - constraint satisfaction. 55 | if (abs(con) < 0.1*kl_step): 56 | LOGGER.debug("KL: %f / %f, converged iteration %i", 57 | kl_div, kl_step, itr) 58 | break 59 | 60 | # Choose new eta (bisect bracket or multiply by constant) 61 | if con < 0: # Eta was too big. 62 | max_eta = eta 63 | geom = np.sqrt(min_eta*max_eta) # Geometric mean. 64 | new_eta = max(geom, 0.1*max_eta) 65 | LOGGER.debug("KL: %f / %f, eta too big, new eta: %f", 66 | kl_div, kl_step, new_eta) 67 | else: # Eta was too small. 68 | min_eta = eta 69 | geom = np.sqrt(min_eta*max_eta) # Geometric mean. 70 | new_eta = min(geom, 10.0*min_eta) 71 | LOGGER.debug("KL: %f / %f, eta too small, new eta: %f", 72 | kl_div, kl_step, new_eta) 73 | 74 | # Logarithmic mean: log_mean(x,y) = (y - x)/(log(y) - log(x)) 75 | eta = new_eta 76 | 77 | if kl_div > kl_step and abs(kl_div - kl_step) > 0.1*kl_step: 78 | LOGGER.warning( 79 | "Final KL divergence after DGD convergence is too high." 80 | ) 81 | 82 | return traj_distr, eta 83 | 84 | def estimate_cost(self, traj_distr, traj_info): 85 | """ Compute Laplace approximation to expected cost. """ 86 | # Constants. 87 | T = traj_distr.T 88 | 89 | # Perform forward pass (note that we repeat this here, because 90 | # traj_info may have different dynamics from the ones that were 91 | # used to compute the distribution already saved in traj). 92 | mu, sigma = self.forward(traj_distr, traj_info) 93 | 94 | # Compute cost. 95 | predicted_cost = np.zeros(T) 96 | for t in range(T): 97 | predicted_cost[t] = traj_info.cc[t] + 0.5 * \ 98 | np.sum(sigma[t, :, :] * traj_info.Cm[t, :, :]) + 0.5 * \ 99 | mu[t, :].T.dot(traj_info.Cm[t, :, :]).dot(mu[t, :]) + \ 100 | mu[t, :].T.dot(traj_info.cv[t, :]) 101 | return predicted_cost 102 | 103 | def forward(self, traj_distr, traj_info): 104 | """ 105 | Perform LQR forward pass. Computes state-action marginals from 106 | dynamics and policy. 107 | Args: 108 | traj_distr: A linear Gaussian policy object. 109 | traj_info: A TrajectoryInfo object. 110 | Returns: 111 | mu: A T x dX mean action vector. 112 | sigma: A T x dX x dX covariance matrix. 113 | """ 114 | # Compute state-action marginals from specified conditional 115 | # parameters and current traj_info. 116 | T = traj_distr.T 117 | dU = traj_distr.dU 118 | dX = traj_distr.dX 119 | 120 | # Constants. 121 | idx_x = slice(dX) 122 | 123 | # Allocate space. 124 | sigma = np.zeros((T, dX+dU, dX+dU)) 125 | mu = np.zeros((T, dX+dU)) 126 | 127 | # Pull out dynamics. 128 | Fm = traj_info.dynamics.Fm 129 | fv = traj_info.dynamics.fv 130 | dyn_covar = traj_info.dynamics.dyn_covar 131 | 132 | # Set initial covariance (initial mu is always zero). 133 | sigma[0, idx_x, idx_x] = traj_info.x0sigma 134 | mu[0, idx_x] = traj_info.x0mu 135 | 136 | for t in range(T): 137 | sigma[t, :, :] = np.vstack([ 138 | np.hstack([ 139 | sigma[t, idx_x, idx_x], 140 | sigma[t, idx_x, idx_x].dot(traj_distr.K[t, :, :].T) 141 | ]), 142 | np.hstack([ 143 | traj_distr.K[t, :, :].dot(sigma[t, idx_x, idx_x]), 144 | traj_distr.K[t, :, :].dot(sigma[t, idx_x, idx_x]).dot( 145 | traj_distr.K[t, :, :].T 146 | ) + traj_distr.pol_covar[t, :, :] 147 | ]) 148 | ]) 149 | mu[t, :] = np.hstack([ 150 | mu[t, idx_x], 151 | traj_distr.K[t, :, :].dot(mu[t, idx_x]) + traj_distr.k[t, :] 152 | ]) 153 | if t < T - 1: 154 | sigma[t+1, idx_x, idx_x] = \ 155 | Fm[t, :, :].dot(sigma[t, :, :]).dot(Fm[t, :, :].T) + \ 156 | dyn_covar[t, :, :] 157 | mu[t+1, idx_x] = Fm[t, :, :].dot(mu[t, :]) + fv[t, :] 158 | return mu, sigma 159 | 160 | def backward(self, prev_traj_distr, traj_info, eta, algorithm, m): 161 | """ 162 | Perform LQR backward pass. This computes a new linear Gaussian 163 | policy object. 164 | Args: 165 | prev_traj_distr: A linear Gaussian policy object from 166 | previous iteration. 167 | traj_info: A TrajectoryInfo object. 168 | eta: Dual variable. 169 | algorithm: Algorithm object needed to compute costs. 170 | m: Condition number. 171 | Returns: 172 | traj_distr: A new linear Gaussian policy. 173 | new_eta: The updated dual variable. Updates happen if the 174 | Q-function is not PD. 175 | """ 176 | # Constants. 177 | T = prev_traj_distr.T 178 | dU = prev_traj_distr.dU 179 | dX = prev_traj_distr.dX 180 | 181 | traj_distr = prev_traj_distr.nans_like() 182 | 183 | pol_wt = algorithm.cur[m].pol_info.pol_wt 184 | 185 | idx_x = slice(dX) 186 | idx_u = slice(dX, dX+dU) 187 | 188 | # Pull out dynamics. 189 | Fm = traj_info.dynamics.Fm 190 | fv = traj_info.dynamics.fv 191 | 192 | # Non-SPD correction terms. 193 | del_ = self._hyperparams['del0'] 194 | eta0 = eta 195 | 196 | # Run dynamic programming. 197 | fail = True 198 | while fail: 199 | fail = False # Flip to true on non-symmetric PD. 200 | 201 | # Allocate. 202 | Vxx = np.zeros((T, dX, dX)) 203 | Vx = np.zeros((T, dX)) 204 | 205 | fCm, fcv = algorithm.compute_costs(m, eta) 206 | 207 | # Compute state-action-state function at each time step. 208 | for t in range(T - 1, -1, -1): 209 | # Add in the cost. 210 | Qtt = fCm[t, :, :] # (X+U) x (X+U) 211 | Qt = fcv[t, :] # (X+U) x 1 212 | 213 | # Add in the value function from the next time step. 214 | #if t < T - 1: 215 | # multiplier = (pol_wt[t+1] + eta)/(pol_wt[t] + eta) 216 | # Qtt = Qtt + multiplier * \ 217 | # Fm[t, :, :].T.dot(Vxx[t+1, :, :]).dot(Fm[t, :, :]) 218 | # Qt = Qt + multiplier * \ 219 | # Fm[t, :, :].T.dot(Vx[t+1, :] + 220 | # Vxx[t+1, :, :].dot(fv[t, :])) 221 | 222 | # Symmetrize quadratic component. 223 | Qtt = 0.5 * (Qtt + Qtt.T) 224 | 225 | # Compute Cholesky decomposition of Q function action 226 | # component. 227 | try: 228 | U = sp.linalg.cholesky(Qtt[idx_u, idx_u]) 229 | L = U.T 230 | except LinAlgError as e: 231 | # Error thrown when Qtt[idx_u, idx_u] is not 232 | # symmetric positive definite. 233 | LOGGER.debug('LinAlgError: %s', e) 234 | fail = True 235 | break 236 | 237 | # Store conditional covariance, inverse, and Cholesky. 238 | traj_distr.inv_pol_covar[t, :, :] = Qtt[idx_u, idx_u] 239 | traj_distr.pol_covar[t, :, :] = sp.linalg.solve_triangular( 240 | U, sp.linalg.solve_triangular(L, np.eye(dU), lower=True) 241 | ) 242 | traj_distr.chol_pol_covar[t, :, :] = sp.linalg.cholesky( 243 | traj_distr.pol_covar[t, :, :] 244 | ) 245 | 246 | # Compute mean terms. 247 | traj_distr.k[t, :] = -sp.linalg.solve_triangular( 248 | U, sp.linalg.solve_triangular(L, Qt[idx_u], lower=True) 249 | ) 250 | traj_distr.K[t, :, :] = -sp.linalg.solve_triangular( 251 | U, sp.linalg.solve_triangular(L, Qtt[idx_u, idx_x], 252 | lower=True) 253 | ) 254 | 255 | # Compute value function. 256 | Vxx[t, :, :] = Qtt[idx_x, idx_x] + \ 257 | Qtt[idx_x, idx_u].dot(traj_distr.K[t, :, :]) 258 | Vx[t, :] = Qt[idx_x] + Qtt[idx_x, idx_u].dot(traj_distr.k[t, :]) 259 | Vxx[t, :, :] = 0.5 * (Vxx[t, :, :] + Vxx[t, :, :].T) 260 | 261 | # Increment eta on non-SPD Q-function. 262 | if fail: 263 | old_eta = eta 264 | eta = eta0 + del_ 265 | LOGGER.debug('Increasing eta: %f -> %f', old_eta, eta) 266 | del_ *= 2 # Increase del_ exponentially on failure. 267 | if eta >= 1e16: 268 | if np.any(np.isnan(Fm)) or np.any(np.isnan(fv)): 269 | raise ValueError('NaNs encountered in dynamics!') 270 | raise ValueError('Failed to find PD solution even for very \ 271 | large eta (check that dynamics and cost are \ 272 | reasonably well conditioned)!') 273 | return traj_distr, eta 274 | -------------------------------------------------------------------------------- /source/gps/algorithm/traj_opt/traj_opt_utils.py: -------------------------------------------------------------------------------- 1 | """ This file defines utilities for trajectory optimization. """ 2 | import numpy as np 3 | import scipy as sp 4 | 5 | # Constants used in TrajOptLQR. 6 | DGD_MAX_ITER = 50 7 | 8 | def traj_distr_kl(new_mu, new_sigma, new_traj_distr, prev_traj_distr): 9 | """ 10 | Compute KL divergence between new and previous trajectory 11 | distributions. 12 | Args: 13 | new_mu: T x dX, mean of new trajectory distribution. 14 | new_sigma: T x dX x dX, variance of new trajectory distribution. 15 | new_traj_distr: A linear Gaussian policy object, new 16 | distribution. 17 | prev_traj_distr: A linear Gaussian policy object, previous 18 | distribution. 19 | Returns: 20 | kl_div: The KL divergence between the new and previous 21 | trajectories. 22 | """ 23 | # Constants. 24 | T = new_mu.shape[0] 25 | dU = new_traj_distr.dU 26 | 27 | # Initialize vector of divergences for each time step. 28 | kl_div = np.zeros(T) 29 | 30 | # Step through trajectory. 31 | for t in range(T): 32 | # Fetch matrices and vectors from trajectory distributions. 33 | mu_t = new_mu[t, :] 34 | sigma_t = new_sigma[t, :, :] 35 | K_prev = prev_traj_distr.K[t, :, :] 36 | K_new = new_traj_distr.K[t, :, :] 37 | k_prev = prev_traj_distr.k[t, :] 38 | k_new = new_traj_distr.k[t, :] 39 | chol_prev = prev_traj_distr.chol_pol_covar[t, :, :] 40 | chol_new = new_traj_distr.chol_pol_covar[t, :, :] 41 | 42 | # Compute log determinants and precision matrices. 43 | logdet_prev = 2 * sum(np.log(np.diag(chol_prev))) 44 | logdet_new = 2 * sum(np.log(np.diag(chol_new))) 45 | prc_prev = sp.linalg.solve_triangular( 46 | chol_prev, sp.linalg.solve_triangular(chol_prev.T, np.eye(dU), 47 | lower=True) 48 | ) 49 | prc_new = sp.linalg.solve_triangular( 50 | chol_new, sp.linalg.solve_triangular(chol_new.T, np.eye(dU), 51 | lower=True) 52 | ) 53 | 54 | # Construct matrix, vector, and constants. 55 | M_prev = np.r_[ 56 | np.c_[K_prev.T.dot(prc_prev).dot(K_prev), -K_prev.T.dot(prc_prev)], 57 | np.c_[-prc_prev.dot(K_prev), prc_prev] 58 | ] 59 | M_new = np.r_[ 60 | np.c_[K_new.T.dot(prc_new).dot(K_new), -K_new.T.dot(prc_new)], 61 | np.c_[-prc_new.dot(K_new), prc_new] 62 | ] 63 | v_prev = np.r_[K_prev.T.dot(prc_prev).dot(k_prev), 64 | -prc_prev.dot(k_prev)] 65 | v_new = np.r_[K_new.T.dot(prc_new).dot(k_new), -prc_new.dot(k_new)] 66 | c_prev = 0.5 * k_prev.T.dot(prc_prev).dot(k_prev) 67 | c_new = 0.5 * k_new.T.dot(prc_new).dot(k_new) 68 | 69 | # Compute KL divergence at timestep t. 70 | kl_div[t] = max( 71 | 0, 72 | -0.5 * mu_t.T.dot(M_new - M_prev).dot(mu_t) - 73 | mu_t.T.dot(v_new - v_prev) - c_new + c_prev - 74 | 0.5 * np.sum(sigma_t * (M_new-M_prev)) - 0.5 * logdet_new + 75 | 0.5 * logdet_prev 76 | ) 77 | 78 | # Add up divergences across time to get total divergence. 79 | return np.sum(kl_div) 80 | -------------------------------------------------------------------------------- /source/gps/gps_test.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import imp 3 | import os 4 | import os.path 5 | import sys 6 | import argparse 7 | import time 8 | import numpy as np 9 | import random 10 | 11 | # Add gps/python to path so that imports work. 12 | sys.path.append('/'.join(str.split(__file__, '/')[:-2])) 13 | import gps as gps_globals 14 | from gps.utility.display import Display 15 | from gps.sample.sample_list import SampleList 16 | from gps.algorithm.policy.tf_policy import TfPolicy 17 | from gps.algorithm.policy_opt.lto_model import fully_connected_tf_network 18 | from gps.algorithm.policy.csa_policy import CSAPolicy 19 | 20 | class GPSMain(object): 21 | #""" Main class to run algorithms and experiments. """ 22 | def __init__(self, config): 23 | """ 24 | Initialize GPSMain 25 | Args: 26 | config: Hyperparameters for experiment 27 | """ 28 | self._hyperparams = config 29 | self._conditions = config['common']['conditions'] 30 | if 'train_conditions' in config: 31 | self._train_idx = config['train_conditions'] 32 | self._test_idx = config['test_conditions'] 33 | else: 34 | self._train_idx = range(self._conditions) 35 | config['train_conditions'] = config['common']['conditions'] 36 | self._hyperparams=config 37 | self._test_idx = self._train_idx 38 | self._test_fncs = config['test_functions'] 39 | 40 | self._data_files_dir = config['common']['data_files_dir'] 41 | self.policy_path = config['policy_path'] 42 | self.network_config = config['algorithm']['policy_opt']['network_params'] 43 | self.agent = config['agent']['type'](config['agent']) 44 | config['common']['log_filename'] += '_test' 45 | self.disp = Display(config['common']) # For logging 46 | 47 | config['algorithm']['agent'] = self.agent 48 | self.algorithm = config['algorithm']['type'](config['algorithm']) 49 | 50 | def run(self): 51 | 52 | #itr_start = 0 53 | #guided_steps = [0.5, 0.4, 0.3, 0.2, 0.1] 54 | self.algorithm.policy_opt.policy = TfPolicy.load_policy(policy_dict_path=self.policy_path, tf_generator=fully_connected_tf_network, network_config=self.network_config) 55 | 56 | #for itr in range(itr_start, self._hyperparams['iterations']): 57 | #for m, cond in enumerate(self._train_idx): 58 | # for i in range(self._hyperparams['num_samples']): 59 | # self._take_sample(itr, cond, m, i) 60 | # print('Iteration %d' % (itr)) 61 | # traj_sample_lists = [self.agent.get_samples(cond, -self._hyperparams['num_samples']) for cond in self._train_idx] 62 | # # Clear agent samples. 63 | # self.agent.clear_samples() 64 | # self.algorithm.iteration(traj_sample_lists) 65 | # 66 | # #pol_sample_lists = self._take_policy_samples(self._train_idx) 67 | # 68 | # #self._prev_traj_costs, self._prev_pol_costs = self.disp.update(itr, self.algorithm, self.agent, traj_sample_lists, pol_sample_lists) 69 | # self.algorithm.policy_opt.policy.pickle_policy(self.algorithm.policy_opt._dO, self.algorithm.policy_opt._dU, self._data_files_dir + ('policy_itr_%02d' % itr)) 70 | # self._test_peformance(t_length=50) 71 | #self.algorithm.policy_opt.policy = TfPolicy.load_policy(policy_dict_path=self.policy_path, tf_generator=fully_connected_tf_network, network_config=self.network_config) 72 | self._test_peformance(t_length=50) 73 | 74 | #pol_sample_lists = self._take_policy_samples(self._test_idx) 75 | #self._prev_traj_costs, self._prev_pol_costs = self.disp.update(self.alg orithm, self.agent, self._test_idx, pol_sample_lists) 76 | 77 | if 'on_exit' in self._hyperparams: 78 | self._hyperparams['on_exit'](self._hyperparams) 79 | 80 | def _train_peformance(self, guided_steps=0, t_length=50): 81 | pol_sample_lists = self._take_policy_samples(self._train_idx, guided_steps=guided_steps,t_length=t_length) 82 | for m, cond in enumerate(self._train_idx): 83 | for i in range(self._hyperparams['num_samples']): 84 | self._take_sample(11, cond, m, i, t_length=t_length) 85 | traj_sample_lists = [self.agent.get_samples(cond, -self._hyperparams['num_samples']) for cond in self._train_idx] 86 | self.agent.clear_samples() 87 | self.disp.update(self.algorithm, self.agent,self._test_idx, self._test_fncs, pol_sample_lists, traj_sample_lists) 88 | 89 | 90 | def _test_peformance(self, guided_steps=0, t_length=50): 91 | pol_sample_lists = self._take_policy_samples(self._test_idx, guided_steps=guided_steps,t_length=t_length) 92 | for m, cond in enumerate(self._test_idx): 93 | for i in range(self._hyperparams['num_samples']): 94 | self._take_sample(11, cond, m, i, t_length=t_length) 95 | traj_sample_lists = [self.agent.get_samples(cond, -self._hyperparams['num_samples']) for cond in self._test_idx] 96 | self.agent.clear_samples() 97 | self.disp.update(self.algorithm, self.agent,self._test_idx, self._test_fncs, pol_sample_lists, traj_sample_lists) 98 | 99 | def _take_sample(self, itr, cond, m, i, t_length=50): 100 | 101 | if self.algorithm.iteration_count == 0: 102 | pol = self.algorithm.cur[m].traj_distr 103 | else: 104 | if self.algorithm._hyperparams['sample_on_policy']: 105 | pol = self.algorithm.policy_opt.policy 106 | else: 107 | if np.random.rand() < 1.0: 108 | pol = self.algorithm.cur[m].traj_distr 109 | else: 110 | pol = CSAPolicy(T=self.agent.T) 111 | 112 | 113 | self.agent.sample(pol, cond, t_length=t_length) 114 | 115 | def _take_policy_samples(self, cond_list, guided_steps=0, t_length=50): 116 | pol_samples = [[] for _ in range(len(cond_list))] 117 | for cond in range(len(cond_list)): 118 | for i in range(self._hyperparams['num_samples']): 119 | pol_samples[cond].append(self.agent.sample(self.algorithm.policy_opt.policy, cond_list[cond], start_policy=self.algorithm.cur[cond].traj_distr, save=False, ltorun=True, guided_steps=guided_steps, t_length=t_length)) 120 | return [SampleList(samples) for samples in pol_samples] 121 | 122 | def main(): 123 | parser = argparse.ArgumentParser(description='Run the Guided Policy Search algorithm.') 124 | parser.add_argument('experiment', type=str, help='experiment name') 125 | args = parser.parse_args() 126 | 127 | exp_name = args.experiment 128 | 129 | from gps import __file__ as gps_filepath 130 | gps_filepath = os.path.abspath(gps_filepath) 131 | gps_dir = '/'.join(str.split(gps_filepath, '/')[:-3]) + '/' 132 | exp_dir = gps_dir + 'examples/' + exp_name + '/' 133 | hyperparams_file = exp_dir + 'hyperparams.py' 134 | 135 | logging.basicConfig(format='%(levelname)s:%(message)s', level=logging.INFO) 136 | 137 | if not os.path.exists(hyperparams_file): 138 | sys.exit("Experiment '%s' does not exist.\nDid you create '%s'?" % (exp_name, hyperparams_file)) 139 | 140 | 141 | # May be used by hyperparams.py to load different conditions 142 | gps_globals.phase = "TRAIN" 143 | hyperparams = imp.load_source('hyperparams', hyperparams_file) 144 | 145 | seed = hyperparams.config.get('random_seed', 0) 146 | random.seed(seed) 147 | np.random.seed(seed) 148 | 149 | gps = GPSMain(hyperparams.config) 150 | gps.run() 151 | 152 | if 'on_exit' in hyperparams.config: 153 | hyperparams.config['on_exit'](hyperparams.config) 154 | 155 | 156 | if __name__ == "__main__": 157 | main() 158 | 159 | -------------------------------------------------------------------------------- /source/gps/gps_train.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import imp 3 | import os 4 | import os.path 5 | import sys 6 | import argparse 7 | import time 8 | import numpy as np 9 | import random 10 | 11 | # Add gps/python to path so that imports work. 12 | sys.path.append('/'.join(str.split(__file__, '/')[:-2])) 13 | import gps as gps_globals 14 | from gps.utility.display import Display 15 | from gps.sample.sample_list import SampleList 16 | from gps.algorithm.policy.tf_policy import TfPolicy 17 | from gps.algorithm.policy_opt.lto_model import fully_connected_tf_network 18 | from gps.algorithm.policy.csa_policy import CSAPolicy 19 | 20 | class GPSMain(object): 21 | #""" Main class to run algorithms and experiments. """ 22 | def __init__(self, config): 23 | """ 24 | Initialize GPSMain 25 | Args: 26 | config: Hyperparameters for experiment 27 | """ 28 | self._hyperparams = config 29 | self._conditions = config['common']['conditions'] 30 | if 'train_conditions' in config: 31 | self._train_idx = config['train_conditions'] 32 | self._test_idx = config['test_conditions'] 33 | else: 34 | self._train_idx = range(self._conditions) 35 | config['train_conditions'] = config['common']['conditions'] 36 | self._hyperparams=config 37 | self._test_idx = self._train_idx 38 | self._test_fncs = config['test_functions'] 39 | 40 | self._data_files_dir = config['common']['data_files_dir'] 41 | self.policy_path = config['policy_path'] 42 | self.network_config = config['algorithm']['policy_opt']['network_params'] 43 | self.agent = config['agent']['type'](config['agent']) 44 | self.disp = Display(config['common']) # For logging 45 | 46 | config['algorithm']['agent'] = self.agent 47 | self.algorithm = config['algorithm']['type'](config['algorithm']) 48 | 49 | def run(self): 50 | 51 | itr_start = 0 52 | #guided_steps = [0.5, 0.4, 0.3, 0.2, 0.1] 53 | #self.algorithm.policy_opt.policy = TfPolicy.load_policy(policy_dict_path=self.policy_path, tf_generator=fully_connected_tf_network, network_config=self.network_config) 54 | 55 | for itr in range(itr_start, self._hyperparams['iterations']): 56 | for m, cond in enumerate(self._train_idx): 57 | for i in range(self._hyperparams['num_samples']): 58 | self._take_sample(itr, cond, m, i) 59 | print('Iteration %d' % (itr)) 60 | traj_sample_lists = [self.agent.get_samples(cond, -self._hyperparams['num_samples']) for cond in self._train_idx] 61 | # Clear agent samples. 62 | self.agent.clear_samples() 63 | self.algorithm.iteration(traj_sample_lists) 64 | 65 | #pol_sample_lists = self._take_policy_samples(self._train_idx) 66 | 67 | #self._prev_traj_costs, self._prev_pol_costs = self.disp.update(itr, self.algorithm, self.agent, traj_sample_lists, pol_sample_lists) 68 | self.algorithm.policy_opt.policy.pickle_policy(self.algorithm.policy_opt._dO, self.algorithm.policy_opt._dU, self._data_files_dir + ('policy_itr_%02d' % itr)) 69 | self._test_peformance(t_length=50, iteration=itr) 70 | #self.algorithm.policy_opt.policy = TfPolicy.load_policy(policy_dict_path=self.policy_path, tf_generator=fully_connected_tf_network, network_config=self.network_config) 71 | self._test_peformance(t_length=50) 72 | 73 | #pol_sample_lists = self._take_policy_samples(self._test_idx) 74 | #self._prev_traj_costs, self._prev_pol_costs = self.disp.update(self.alg orithm, self.agent, self._test_idx, pol_sample_lists) 75 | 76 | if 'on_exit' in self._hyperparams: 77 | self._hyperparams['on_exit'](self._hyperparams) 78 | 79 | def _train_peformance(self, guided_steps=0, t_length=50): 80 | pol_sample_lists = self._take_policy_samples(self._train_idx, guided_steps=guided_steps,t_length=t_length) 81 | for m, cond in enumerate(self._train_idx): 82 | for i in range(self._hyperparams['num_samples']): 83 | self._take_sample(11, cond, m, i, t_length=t_length) 84 | traj_sample_lists = [self.agent.get_samples(cond, -self._hyperparams['num_samples']) for cond in self._train_idx] 85 | self.agent.clear_samples() 86 | self.disp.update(self.algorithm, self.agent,self._test_idx, self._test_fncs, pol_sample_lists, traj_sample_lists) 87 | 88 | 89 | def _test_peformance(self, guided_steps=0, t_length=50, iteration=15): 90 | pol_sample_lists = self._take_policy_samples(self._test_idx, guided_steps=guided_steps,t_length=t_length) 91 | for m, cond in enumerate(self._test_idx): 92 | for i in range(self._hyperparams['num_samples']): 93 | self._take_sample(11, cond, m, i, t_length=t_length) 94 | traj_sample_lists = [self.agent.get_samples(cond, -self._hyperparams['num_samples']) for cond in self._test_idx] 95 | self.agent.clear_samples() 96 | self.disp.update(self.algorithm, self.agent,self._test_idx, self._test_fncs, pol_sample_lists, traj_sample_lists, iteration=iteration) 97 | 98 | def _take_sample(self, itr, cond, m, i, t_length=50): 99 | 100 | if self.algorithm.iteration_count == 0: 101 | pol = self.algorithm.cur[m].traj_distr 102 | else: 103 | if self.algorithm._hyperparams['sample_on_policy']: 104 | pol = self.algorithm.policy_opt.policy 105 | else: 106 | if np.random.rand() < 0.7: 107 | pol = self.algorithm.cur[m].traj_distr 108 | else: 109 | pol = CSAPolicy(T=self.agent.T) 110 | 111 | 112 | self.agent.sample(pol, cond, t_length=t_length) 113 | 114 | def _take_policy_samples(self, cond_list, guided_steps=0, t_length=50): 115 | pol_samples = [[] for _ in range(len(cond_list))] 116 | for cond in range(len(cond_list)): 117 | for i in range(self._hyperparams['num_samples']): 118 | pol_samples[cond].append(self.agent.sample(self.algorithm.policy_opt.policy, cond_list[cond], start_policy=self.algorithm.cur[cond].traj_distr, save=False, ltorun=True, guided_steps=guided_steps, t_length=t_length)) 119 | return [SampleList(samples) for samples in pol_samples] 120 | 121 | def main(): 122 | parser = argparse.ArgumentParser(description='Run the Guided Policy Search algorithm.') 123 | parser.add_argument('experiment', type=str, help='experiment name') 124 | args = parser.parse_args() 125 | 126 | exp_name = args.experiment 127 | 128 | from gps import __file__ as gps_filepath 129 | gps_filepath = os.path.abspath(gps_filepath) 130 | gps_dir = '/'.join(str.split(gps_filepath, '/')[:-3]) + '/' 131 | exp_dir = gps_dir + 'examples/' + exp_name + '/' 132 | hyperparams_file = exp_dir + 'hyperparams.py' 133 | 134 | logging.basicConfig(format='%(levelname)s:%(message)s', level=logging.INFO) 135 | 136 | if not os.path.exists(hyperparams_file): 137 | sys.exit("Experiment '%s' does not exist.\nDid you create '%s'?" % (exp_name, hyperparams_file)) 138 | 139 | # May be used by hyperparams.py to load different conditions 140 | gps_globals.phase = "TRAIN" 141 | hyperparams = imp.load_source('hyperparams', hyperparams_file) 142 | 143 | seed = hyperparams.config.get('random_seed', 0) 144 | random.seed(seed) 145 | np.random.seed(seed) 146 | 147 | gps = GPSMain(hyperparams.config) 148 | gps.run() 149 | print("Done with sampling") 150 | if 'on_exit' in hyperparams.config: 151 | hyperparams.config['on_exit'](hyperparams.config) 152 | 153 | 154 | if __name__ == "__main__": 155 | main() 156 | 157 | -------------------------------------------------------------------------------- /source/gps/proto/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/LTO-CMA/6f496999ce26a7f144b42dc98a88833bde4ce39b/source/gps/proto/__init__.py -------------------------------------------------------------------------------- /source/gps/proto/gps_pb2.py: -------------------------------------------------------------------------------- 1 | # Generated by the protocol buffer compiler. DO NOT EDIT! 2 | # source: gps.proto 3 | 4 | import sys 5 | _b=sys.version_info[0]<3 and (lambda x:x) or (lambda x:x.encode('latin1')) 6 | from google.protobuf.internal import enum_type_wrapper 7 | from google.protobuf import descriptor as _descriptor 8 | from google.protobuf import message as _message 9 | from google.protobuf import reflection as _reflection 10 | from google.protobuf import symbol_database as _symbol_database 11 | # @@protoc_insertion_point(imports) 12 | 13 | _sym_db = _symbol_database.Default() 14 | 15 | 16 | 17 | 18 | DESCRIPTOR = _descriptor.FileDescriptor( 19 | name='gps.proto', 20 | package='gps', 21 | syntax='proto2', 22 | serialized_options=None, 23 | serialized_pb=_b('\n\tgps.proto\x12\x03gps\"x\n\x06Sample\x12\t\n\x01T\x18\x01 \x01(\r\x12\n\n\x02\x64X\x18\x02 \x01(\r\x12\n\n\x02\x64U\x18\x03 \x01(\r\x12\n\n\x02\x64O\x18\x04 \x01(\r\x12\r\n\x01X\x18\x05 \x03(\x02\x42\x02\x10\x01\x12\r\n\x01U\x18\x06 \x03(\x02\x42\x02\x10\x01\x12\x0f\n\x03obs\x18\x07 \x03(\x02\x42\x02\x10\x01\x12\x10\n\x04meta\x18\x08 \x03(\x02\x42\x02\x10\x01*\xc7\x01\n\nSampleType\x12\n\n\x06\x41\x43TION\x10\x00\x12\x0b\n\x07\x43UR_LOC\x10\x01\x12\x17\n\x13PAST_OBJ_VAL_DELTAS\x10\x02\x12\r\n\tCUR_SIGMA\x10\x03\x12\n\n\x06\x43UR_PS\x10\x04\x12\x13\n\x0fPAST_LOC_DELTAS\x10\x05\x12\x0e\n\nPAST_SIGMA\x10\x06\x12\x0e\n\nMULTIMODAL\x10\x07\x12\x12\n\x0eGAUSSIAN_NOISE\x10\x08\x12\x10\n\x0c\x43\x41UCHY_NOISE\x10\t\x12\x11\n\rUNIFORM_NOISE\x10\n') 24 | ) 25 | 26 | _SAMPLETYPE = _descriptor.EnumDescriptor( 27 | name='SampleType', 28 | full_name='gps.SampleType', 29 | filename=None, 30 | file=DESCRIPTOR, 31 | values=[ 32 | _descriptor.EnumValueDescriptor( 33 | name='ACTION', index=0, number=0, 34 | serialized_options=None, 35 | type=None), 36 | _descriptor.EnumValueDescriptor( 37 | name='CUR_LOC', index=1, number=1, 38 | serialized_options=None, 39 | type=None), 40 | _descriptor.EnumValueDescriptor( 41 | name='PAST_OBJ_VAL_DELTAS', index=2, number=2, 42 | serialized_options=None, 43 | type=None), 44 | _descriptor.EnumValueDescriptor( 45 | name='CUR_SIGMA', index=3, number=3, 46 | serialized_options=None, 47 | type=None), 48 | _descriptor.EnumValueDescriptor( 49 | name='CUR_PS', index=4, number=4, 50 | serialized_options=None, 51 | type=None), 52 | _descriptor.EnumValueDescriptor( 53 | name='PAST_LOC_DELTAS', index=5, number=5, 54 | serialized_options=None, 55 | type=None), 56 | _descriptor.EnumValueDescriptor( 57 | name='PAST_SIGMA', index=6, number=6, 58 | serialized_options=None, 59 | type=None), 60 | _descriptor.EnumValueDescriptor( 61 | name='MULTIMODAL', index=7, number=7, 62 | serialized_options=None, 63 | type=None), 64 | _descriptor.EnumValueDescriptor( 65 | name='GAUSSIAN_NOISE', index=8, number=8, 66 | serialized_options=None, 67 | type=None), 68 | _descriptor.EnumValueDescriptor( 69 | name='CAUCHY_NOISE', index=9, number=9, 70 | serialized_options=None, 71 | type=None), 72 | _descriptor.EnumValueDescriptor( 73 | name='UNIFORM_NOISE', index=10, number=10, 74 | serialized_options=None, 75 | type=None), 76 | ], 77 | containing_type=None, 78 | serialized_options=None, 79 | serialized_start=141, 80 | serialized_end=340, 81 | ) 82 | _sym_db.RegisterEnumDescriptor(_SAMPLETYPE) 83 | 84 | SampleType = enum_type_wrapper.EnumTypeWrapper(_SAMPLETYPE) 85 | ACTION = 0 86 | CUR_LOC = 1 87 | PAST_OBJ_VAL_DELTAS = 2 88 | CUR_SIGMA = 3 89 | CUR_PS = 4 90 | PAST_LOC_DELTAS = 5 91 | PAST_SIGMA = 6 92 | MULTIMODAL = 7 93 | GAUSSIAN_NOISE = 8 94 | CAUCHY_NOISE = 9 95 | UNIFORM_NOISE = 10 96 | 97 | 98 | 99 | _SAMPLE = _descriptor.Descriptor( 100 | name='Sample', 101 | full_name='gps.Sample', 102 | filename=None, 103 | file=DESCRIPTOR, 104 | containing_type=None, 105 | fields=[ 106 | _descriptor.FieldDescriptor( 107 | name='T', full_name='gps.Sample.T', index=0, 108 | number=1, type=13, cpp_type=3, label=1, 109 | has_default_value=False, default_value=0, 110 | message_type=None, enum_type=None, containing_type=None, 111 | is_extension=False, extension_scope=None, 112 | serialized_options=None, file=DESCRIPTOR), 113 | _descriptor.FieldDescriptor( 114 | name='dX', full_name='gps.Sample.dX', index=1, 115 | number=2, type=13, cpp_type=3, label=1, 116 | has_default_value=False, default_value=0, 117 | message_type=None, enum_type=None, containing_type=None, 118 | is_extension=False, extension_scope=None, 119 | serialized_options=None, file=DESCRIPTOR), 120 | _descriptor.FieldDescriptor( 121 | name='dU', full_name='gps.Sample.dU', index=2, 122 | number=3, type=13, cpp_type=3, label=1, 123 | has_default_value=False, default_value=0, 124 | message_type=None, enum_type=None, containing_type=None, 125 | is_extension=False, extension_scope=None, 126 | serialized_options=None, file=DESCRIPTOR), 127 | _descriptor.FieldDescriptor( 128 | name='dO', full_name='gps.Sample.dO', index=3, 129 | number=4, type=13, cpp_type=3, label=1, 130 | has_default_value=False, default_value=0, 131 | message_type=None, enum_type=None, containing_type=None, 132 | is_extension=False, extension_scope=None, 133 | serialized_options=None, file=DESCRIPTOR), 134 | _descriptor.FieldDescriptor( 135 | name='X', full_name='gps.Sample.X', index=4, 136 | number=5, type=2, cpp_type=6, label=3, 137 | has_default_value=False, default_value=[], 138 | message_type=None, enum_type=None, containing_type=None, 139 | is_extension=False, extension_scope=None, 140 | serialized_options=_b('\020\001'), file=DESCRIPTOR), 141 | _descriptor.FieldDescriptor( 142 | name='U', full_name='gps.Sample.U', index=5, 143 | number=6, type=2, cpp_type=6, label=3, 144 | has_default_value=False, default_value=[], 145 | message_type=None, enum_type=None, containing_type=None, 146 | is_extension=False, extension_scope=None, 147 | serialized_options=_b('\020\001'), file=DESCRIPTOR), 148 | _descriptor.FieldDescriptor( 149 | name='obs', full_name='gps.Sample.obs', index=6, 150 | number=7, type=2, cpp_type=6, label=3, 151 | has_default_value=False, default_value=[], 152 | message_type=None, enum_type=None, containing_type=None, 153 | is_extension=False, extension_scope=None, 154 | serialized_options=_b('\020\001'), file=DESCRIPTOR), 155 | _descriptor.FieldDescriptor( 156 | name='meta', full_name='gps.Sample.meta', index=7, 157 | number=8, type=2, cpp_type=6, label=3, 158 | has_default_value=False, default_value=[], 159 | message_type=None, enum_type=None, containing_type=None, 160 | is_extension=False, extension_scope=None, 161 | serialized_options=_b('\020\001'), file=DESCRIPTOR), 162 | ], 163 | extensions=[ 164 | ], 165 | nested_types=[], 166 | enum_types=[ 167 | ], 168 | serialized_options=None, 169 | is_extendable=False, 170 | syntax='proto2', 171 | extension_ranges=[], 172 | oneofs=[ 173 | ], 174 | serialized_start=18, 175 | serialized_end=138, 176 | ) 177 | 178 | DESCRIPTOR.message_types_by_name['Sample'] = _SAMPLE 179 | DESCRIPTOR.enum_types_by_name['SampleType'] = _SAMPLETYPE 180 | _sym_db.RegisterFileDescriptor(DESCRIPTOR) 181 | 182 | Sample = _reflection.GeneratedProtocolMessageType('Sample', (_message.Message,), dict( 183 | DESCRIPTOR = _SAMPLE, 184 | __module__ = 'gps_pb2' 185 | # @@protoc_insertion_point(class_scope:gps.Sample) 186 | )) 187 | _sym_db.RegisterMessage(Sample) 188 | 189 | 190 | _SAMPLE.fields_by_name['X']._options = None 191 | _SAMPLE.fields_by_name['U']._options = None 192 | _SAMPLE.fields_by_name['obs']._options = None 193 | _SAMPLE.fields_by_name['meta']._options = None 194 | # @@protoc_insertion_point(module_scope) 195 | 196 | -------------------------------------------------------------------------------- /source/gps/sample/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/LTO-CMA/6f496999ce26a7f144b42dc98a88833bde4ce39b/source/gps/sample/__init__.py -------------------------------------------------------------------------------- /source/gps/sample/sample.py: -------------------------------------------------------------------------------- 1 | """ This file defines the sample class. """ 2 | import numpy as np 3 | 4 | from gps.proto.gps_pb2 import ACTION 5 | 6 | class Sample(object): 7 | """ 8 | Class that handles the representation of a trajectory and stores a 9 | single trajectory. 10 | Note: must be serializable for easy saving, no C++ references! 11 | """ 12 | def __init__(self, agent): 13 | self.agent = agent 14 | 15 | self.T = agent.T 16 | self.dX = agent.dX 17 | self.dU = agent.dU 18 | self.dO = agent.dO 19 | self.dM = agent.dM 20 | 21 | # Dictionary containing the sample data from various sensors. 22 | self._data = {} 23 | 24 | self._X = np.empty((self.T, self.dX)) 25 | self._X.fill(np.nan) 26 | self._obs = np.empty((self.T, self.dO)) 27 | self._obs.fill(np.nan) 28 | self._meta = np.empty(self.dM) 29 | self._meta.fill(np.nan) 30 | self.trajectory = [] 31 | 32 | def set(self, sensor_name, sensor_data, t=None): 33 | """ Set trajectory data for a particular sensor. """ 34 | if t is None: 35 | self._data[sensor_name] = sensor_data 36 | self._X.fill(np.nan) # Invalidate existing X. 37 | self._obs.fill(np.nan) # Invalidate existing obs. 38 | self._meta.fill(np.nan) # Invalidate existing meta data. 39 | else: 40 | if sensor_name not in self._data: 41 | if sensor_data.size > 1: 42 | self._data[sensor_name] = \ 43 | np.empty((self.T,) + sensor_data.shape) 44 | 45 | else: 46 | 47 | self._data[sensor_name] = \ 48 | np.empty((self.T,1)) 49 | 50 | self._data[sensor_name].fill(np.nan) 51 | self._data[sensor_name][t, :] = sensor_data 52 | self._X[t, :].fill(np.nan) 53 | self._obs[t, :].fill(np.nan) 54 | 55 | def get(self, sensor_name, t=None): 56 | """ Get trajectory data for a particular sensor. """ 57 | return (self._data[sensor_name] if t is None 58 | else self._data[sensor_name][t, :]) 59 | 60 | def get_X(self, t=None): 61 | """ Get the state. Put it together if not precomputed. """ 62 | X = self._X if t is None else self._X[t, :] 63 | if np.any(np.isnan(X)): 64 | for data_type in self._data: 65 | if data_type not in self.agent.x_data_types: 66 | continue 67 | data = (self._data[data_type] if t is None 68 | else self._data[data_type][t, :]) 69 | self.agent.pack_data_x(X, data, data_types=[data_type]) 70 | return X 71 | 72 | def get_U(self, t=None): 73 | """ Get the action. """ 74 | return self._data[ACTION] if t is None else self._data[ACTION][t, :] 75 | 76 | def get_obs(self, t=None): 77 | """ Get the observation. Put it together if not precomputed. """ 78 | obs = self._obs if t is None else self._obs[t, :] 79 | if np.any(np.isnan(obs)): 80 | for data_type in self._data: 81 | if data_type not in self.agent.obs_data_types: 82 | continue 83 | if data_type in self.agent.meta_data_types: 84 | continue 85 | data = (self._data[data_type] if t is None 86 | else self._data[data_type][t, :]) 87 | self.agent.pack_data_obs(obs, data, data_types=[data_type]) 88 | return obs 89 | 90 | def get_meta(self): 91 | """ Get the meta data. Put it together if not precomputed. """ 92 | meta = self._meta 93 | if np.any(np.isnan(meta)): 94 | for data_type in self._data: 95 | if data_type not in self.agent.meta_data_types: 96 | continue 97 | data = self._data[data_type] 98 | self.agent.pack_data_meta(meta, data, data_types=[data_type]) 99 | return meta 100 | 101 | def __copy__(self): 102 | cls = self.__class__ 103 | result = cls.__new__(cls, self.agent) 104 | result.__dict__.update(self.__dict__) 105 | return result 106 | 107 | def __deepcopy__(self, memo): 108 | cls = self.__class__ 109 | result = cls.__new__(cls, self.agent) 110 | memo[id(self)] = result 111 | for name in self.__dict__: 112 | if name != "agent": # Do not deepcopy self.agent 113 | setattr(result, name, copy.deepcopy(self.__dict__[name], memo)) 114 | return result 115 | 116 | # For pickling. 117 | def __getstate__(self): 118 | state = self.__dict__.copy() 119 | state.pop('agent') 120 | return state 121 | 122 | # For unpickling. 123 | def __setstate__(self, state): 124 | self.__dict__ = state 125 | self.__dict__['agent'] = None 126 | -------------------------------------------------------------------------------- /source/gps/sample/sample_list.py: -------------------------------------------------------------------------------- 1 | """ This file defines the sample list wrapper and sample writers. """ 2 | import pickle 3 | import logging 4 | 5 | import numpy as np 6 | 7 | 8 | LOGGER = logging.getLogger(__name__) 9 | 10 | 11 | class SampleList(object): 12 | """ Class that handles writes and reads to sample data. """ 13 | def __init__(self, samples): 14 | self._samples = samples 15 | 16 | def get(self, sensor_name, idx=None): 17 | """ Returns N x T x dX numpy array of states. """ 18 | if idx is None: 19 | idx = range(len(self._samples)) 20 | return np.asarray([self._samples[i].get(sensor_name) for i in idx]) 21 | 22 | def get_X(self, idx=None): 23 | """ Returns N x T x dX numpy array of states. """ 24 | if idx is None: 25 | idx = range(len(self._samples)) 26 | return np.asarray([self._samples[i].get_X() for i in idx]) 27 | 28 | def get_coordwise_X(self, coord=None, idx=None): 29 | """ Returns N x num_coords x T x coordwsie_dX numpy array of features. """ 30 | if idx is None: 31 | idx = range(len(self._samples)) 32 | return np.asarray([self._samples[i].get_coordwise_X(coord) for i in idx]) 33 | 34 | def get_U(self, idx=None): 35 | """ Returns N x T x dU numpy array of actions. """ 36 | if idx is None: 37 | idx = range(len(self._samples)) 38 | return np.asarray([self._samples[i].get_U() for i in idx]) 39 | 40 | def get_obs(self, idx=None): 41 | """ Returns N x T x dO numpy array of features. """ 42 | if idx is None: 43 | idx = range(len(self._samples)) 44 | return np.asarray([self._samples[i].get_obs() for i in idx]) 45 | 46 | def get_coordwise_obs(self, coord=None, idx=None): 47 | """ Returns N x num_coords x T x coordwsie_dO numpy array of features. """ 48 | if idx is None: 49 | idx = range(len(self._samples)) 50 | return np.asarray([self._samples[i].get_coordwise_obs(coord) for i in idx]) 51 | 52 | def get_samples(self, idx=None): 53 | """ Returns N sample objects. """ 54 | if idx is None: 55 | idx = range(len(self._samples)) 56 | return [self._samples[i] for i in idx] 57 | 58 | def num_samples(self): 59 | """ Returns number of samples. """ 60 | return len(self._samples) 61 | 62 | # Convenience methods. 63 | def __len__(self): 64 | return self.num_samples() 65 | 66 | def __getitem__(self, idx): 67 | return self.get_samples([idx])[0] 68 | 69 | 70 | class PickleSampleWriter(object): 71 | """ Pickles samples into data_file. """ 72 | def __init__(self, data_file): 73 | self._data_file = data_file 74 | 75 | def write(self, samples): 76 | """ Write samples to data file. """ 77 | with open(self._data_file, 'wb') as data_file: 78 | cPickle.dump(data_file, samples) 79 | 80 | 81 | class SysOutWriter(object): 82 | """ Writes notifications to sysout on sample writes. """ 83 | def __init__(self): 84 | pass 85 | 86 | def write(self, samples): 87 | """ Write number of samples to sysout. """ 88 | LOGGER.debug('Collected %d samples', len(samples)) 89 | -------------------------------------------------------------------------------- /source/gps/utility/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/LTO-CMA/6f496999ce26a7f144b42dc98a88833bde4ce39b/source/gps/utility/__init__.py -------------------------------------------------------------------------------- /source/gps/utility/display.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import json 3 | from gps.proto.gps_pb2 import CUR_LOC, ACTION, CUR_PS, CUR_SIGMA, PAST_SIGMA, PAST_OBJ_VAL_DELTAS, PAST_LOC_DELTAS 4 | import seaborn as sns 5 | import matplotlib.pyplot as plt 6 | sns.set() 7 | 8 | from matplotlib import rcParams 9 | rcParams["font.size"] = "30" 10 | rcParams['text.usetex'] = False 11 | rcParams['font.family'] = 'serif' 12 | rcParams['figure.figsize'] = (16.0, 9.0) 13 | rcParams['figure.frameon'] = True 14 | rcParams['figure.edgecolor'] = 'k' 15 | rcParams['grid.color'] = 'k' 16 | rcParams['grid.linestyle'] = ':' 17 | rcParams['grid.linewidth'] = 0.5 18 | rcParams['axes.linewidth'] = 1 19 | rcParams['axes.edgecolor'] = 'k' 20 | rcParams['axes.grid.which'] = 'both' 21 | rcParams['legend.frameon'] = 'True' 22 | rcParams['legend.framealpha'] = 1 23 | 24 | rcParams['ytick.major.size'] = 12 25 | rcParams['ytick.major.width'] = 1.5 26 | rcParams['ytick.minor.size'] = 6 27 | rcParams['ytick.minor.width'] = 1 28 | rcParams['xtick.major.size'] = 12 29 | rcParams['xtick.major.width'] = 1.5 30 | rcParams['xtick.minor.size'] = 6 31 | rcParams['xtick.minor.width'] = 1 32 | 33 | from datetime import datetime 34 | class Display(object): 35 | 36 | def __init__(self, hyperparams): 37 | self._hyperparams = hyperparams 38 | self._log_filename = self._hyperparams['log_filename'] 39 | self._plot_filename = self._hyperparams['plot_filename'] 40 | self._first_update = True 41 | 42 | def _output_column_titles(self, algorithm, policy_titles=False): 43 | """ 44 | Setup iteration data column titles: iteration, average cost, and for 45 | each condition the mean cost over samples, step size, linear Guassian 46 | controller entropies, and initial/final KL divergences for BADMM. 47 | """ 48 | condition_titles = '%3s | %8s %12s' % ('', '', '') 49 | itr_data_fields = '%3s | %8s %12s' % ('itr', 'avg_cost', 'avg_pol_cost') 50 | for m in range(algorithm.M): 51 | condition_titles += ' | %8s %9s %-7d' % ('', 'condition', m) 52 | itr_data_fields += ' | %8s %8s %8s' % (' cost ', ' step ', 'entropy ') 53 | condition_titles += ' %8s %8s %8s' % ('', '', '') 54 | itr_data_fields += ' %8s %8s %8s %s ' % ('pol_cost', 'kl_div_i', 'kl_div_f', 'samples') 55 | self.append_output_text(condition_titles) 56 | self.append_output_text(itr_data_fields) 57 | 58 | def eval(self, sample, cur_cond_idx): 59 | """ 60 | Evaluate cost function and derivatives on a sample. 61 | Args: 62 | sample: A single sample 63 | """ 64 | T = sample.T 65 | Du = sample.dU 66 | Dx = sample.dX 67 | 68 | cur_fcn = sample.agent.fcns[cur_cond_idx]['fcn_obj'] 69 | 70 | final_l = np.zeros(T) 71 | 72 | x = sample.get(CUR_LOC) 73 | sigma_ = sample.get(CUR_SIGMA) 74 | sigma = [sigma_[i][0] for i in range(sigma_.shape[0])] 75 | _, dim = x.shape 76 | 77 | 78 | for t in range(T): 79 | final_l[t] = sample.trajectory[t] 80 | 81 | return x, sigma, final_l 82 | 83 | def get_sample_data(self, sample, cur_cond_idx): 84 | """ 85 | Evaluate cost function and derivatives on a sample. 86 | Args: 87 | sample: A single sample 88 | """ 89 | T = sample.T 90 | Du = sample.dU 91 | Dx = sample.dX 92 | 93 | cur_fcn = sample.agent.fcns[cur_cond_idx]['fcn_obj'] 94 | 95 | ps_ = sample.get(CUR_PS) 96 | ps = [ps_[i][0] for i in range(ps_.shape[0])] 97 | past_sigma = sample.get(PAST_SIGMA) 98 | past_obj_val_deltas = sample.get(PAST_OBJ_VAL_DELTAS) 99 | past_loc_deltas = sample.get(PAST_LOC_DELTAS) 100 | 101 | return ps, past_sigma, past_obj_val_deltas, past_loc_deltas 102 | 103 | def _update_iteration_data(self, algorithm, test_idx, test_fcns, pol_sample_lists, traj_sample_lists, iteration=15): 104 | """ 105 | Update iteration data information: iteration, average cost, and for 106 | each condition the mean cost over samples, step size, linear Guassian 107 | controller entropies, and initial/final KL divergences for BADMM. 108 | """ 109 | #data = {} 110 | #if pol_sample_lists is not None: 111 | # pol_costs = [[np.sum(algorithm.cost[m].eval(pol_sample_lists[m][i],True)[0]) for i in range(len(pol_sample_lists[m]))] 112 | # for m in range(len(cond_idx_list))] 113 | #if traj_sample_lists is not None: 114 | # traj_costs = [[np.sum(algorithm.cost[m].eval(traj_sample_lists[m][i],True)[0]) for i in range(len(traj_sample_lists[m]))] 115 | # for m in range(len(cond_idx_list))] 116 | 117 | #data['avg_cost'] = np.mean(pol_costs) 118 | #itr_data = '%s : %12.2f' % ('avg_cost', np.mean(pol_costs)) 119 | #self.append_output_text(itr_data) 120 | #else: 121 | # pol_costs = None 122 | # itr_data = '%3d | %8.2f' % (itr, avg_cost) 123 | for m,idx in enumerate(test_idx): 124 | samples = len(pol_sample_lists[m]) 125 | sample = np.random.randint(samples) 126 | sample_ = 'Sample_' + str(sample) 127 | test_fcn = test_fcns[m % len(test_fcns)] 128 | #itr_data = '%s%d' % ('Sample_', i) 129 | #self.append_output_text(itr_data) 130 | pol_avg_cost, pol_std, traj_avg_cost, traj_std, pol_avg_sigma, pol_sigma_std, traj_avg_sigma, traj_sigma_std, end_values = self.get_data(pol_sample_lists[m], traj_sample_lists[m], idx) 131 | self.plot_data(pol_sample_lists[m][0], traj_sample_lists[m][0], test_fcn, pol_avg_cost, traj_avg_cost, pol_avg_sigma, traj_avg_sigma, pol_std, traj_std, pol_sigma_std, traj_sigma_std, end_values, iteration=iteration) 132 | 133 | #data[function_str][sample_] = {'obj_values': list(obj_val)} 134 | #itr_data = '%s : %s ' % ('cur_loc', x) 135 | #self.append_output_text(itr_data) 136 | #itr_data = '%s : %s ' % ('obj_values', obj_val) 137 | #self.append_output_text(itr_data) 138 | #self.append_output_text(data) 139 | return pol_avg_cost 140 | 141 | def get_data(self, pol_samples, traj_samples, cur_cond): 142 | pol_avg_obj = [] 143 | pol_avg_sigma = [] 144 | traj_avg_obj = [] 145 | traj_avg_sigma = [] 146 | end_values = [] 147 | for m in range(len(pol_samples)): 148 | _,p_sigma,p_obj_val = self.eval(pol_samples[m], cur_cond) 149 | _,t_sigma,t_obj_val = self.eval(traj_samples[m], cur_cond) 150 | pol_avg_obj.append(p_obj_val) 151 | pol_avg_sigma.append(p_sigma) 152 | traj_avg_obj.append(t_obj_val) 153 | traj_avg_sigma.append(t_sigma) 154 | end_values.append(p_obj_val[-1]) 155 | return np.mean(pol_avg_obj, axis=0), np.std(pol_avg_obj, axis=0), np.mean(traj_avg_obj, axis=0), np.std(traj_avg_obj, axis=0), np.mean(pol_avg_sigma, axis=0), np.std(pol_avg_sigma, axis=0), np.mean(traj_avg_sigma, axis=0), np.std(traj_avg_sigma, axis=0), end_values 156 | 157 | def plot_data(self, pol_sample, traj_sample, cur_cond, pol_costs, traj_costs, pol_sigma, traj_sigma, pol_std, traj_std, pol_sigma_std, traj_sigma_std, end_values, iteration=15): 158 | #pol_ps, pol_past_sigma, pol_past_obj_val_deltas, pol_past_loc_deltas = self.get_sample_data(pol_sample,cur_cond) 159 | #traj_ps, traj_past_sigma, traj_past_obj_val_deltas, traj_past_loc_deltas = self.get_sample_data(traj_sample, cur_cond) 160 | log_text = {} 161 | log_text['Average costs LTO'] = list(pol_costs) 162 | log_text['Average costs controller'] = list(traj_costs) 163 | log_text['End values LTO'] = list(end_values) 164 | log_text['Sigma LTO'] = list(pol_sigma) 165 | log_text['Sigma controller'] = list(traj_sigma) 166 | log_text['Std costs LTO'] = list(pol_std) 167 | log_text['Std costs controller'] = list(traj_std) 168 | log_text['Std Sigma LTO'] = list(pol_sigma_std) 169 | log_text['Std Sigma controller'] = list(traj_sigma_std) 170 | 171 | # log_text += 'Ps LTO: %s \n' % (pol_ps) 172 | # log_text += 'Ps CSA: %s \n' % (traj_ps) 173 | # log_text += 'Past Sigma LTO: %s \n' % (pol_past_sigma) 174 | # log_text += 'Past Sigma CSA: %s \n' % (traj_past_sigma) 175 | # log_text += 'Past Obj Val Deltas LTO: %s \n' % (pol_past_obj_val_deltas) 176 | # log_text += 'Past Obj Val Deltas CSA: %s \n' % (traj_past_obj_val_deltas) 177 | # log_text += 'Past Loc Deltas LTO: %s \n' % (pol_past_loc_deltas) 178 | # log_text += 'Past Loc Deltas CSA: %s \n' % (traj_past_loc_deltas) 179 | self.append_output_text(log_text, iteration, fcn_name=cur_cond) 180 | 181 | plt.tick_params(axis='x', which='minor') 182 | plt.legend(loc=0, fontsize=25, ncol=2) 183 | plt.title(cur_cond, fontsize=50) 184 | plt.xlabel("iteration", fontsize=50) 185 | plt.ylabel("objective value", fontsize=50) 186 | plt.fill_between(list(range(len(pol_costs))), np.subtract(pol_costs,pol_std), np.add(pol_costs,pol_std), color=sns.xkcd_rgb["medium green"], alpha=0.5) 187 | plt.plot(pol_costs,color=sns.xkcd_rgb["medium green"], 188 | linewidth=4, label='LTO') 189 | plt.fill_between(list(range(len(traj_costs))),np.subtract(traj_costs,traj_std), np.add(traj_costs,traj_std), color=sns.xkcd_rgb["denim blue"], alpha=0.5) 190 | plt.plot(traj_costs,color=sns.xkcd_rgb["denim blue"], 191 | linewidth=4, label='LG Controller') 192 | plt.legend() 193 | timestamp = datetime.now() 194 | time = str(timestamp) 195 | method = "Objective_value" 196 | plot_file = ('%s_%s_%s_%s.pdf' % (self._plot_filename, method, cur_cond, time)) 197 | plt.savefig(plot_file, bbox_inches='tight') 198 | plt.show() 199 | plt.clf() 200 | 201 | plt.tick_params(axis='x', which='minor') 202 | plt.legend(loc=0, fontsize=25, ncol=2) 203 | plt.title(cur_cond, fontsize=50) 204 | plt.xlabel("iteration", fontsize=50) 205 | plt.ylabel("Step size", fontsize=50) 206 | plt.fill_between(list(range(len(pol_sigma))),np.subtract(pol_sigma,pol_sigma_std), np.add(pol_sigma,pol_sigma_std), color=sns.xkcd_rgb["medium green"], alpha=0.5) 207 | plt.plot(pol_sigma, color=sns.xkcd_rgb["medium green"], 208 | linewidth=4, label='LTO') 209 | plt.fill_between(list(range(len(traj_sigma))),np.subtract(traj_sigma,traj_sigma_std), np.add(traj_sigma,traj_sigma_std), color=sns.xkcd_rgb["denim blue"], alpha=0.5) 210 | plt.plot(traj_sigma,color=sns.xkcd_rgb["denim blue"], 211 | linewidth=4, label='LG Controller') 212 | plt.legend() 213 | timestamp = datetime.now() 214 | time = str(timestamp) 215 | method = "Step size" 216 | plot_file = ('%s_%s_%s_%s.pdf' % (self._plot_filename, method, cur_cond, time)) 217 | plt.savefig(plot_file, bbox_inches='tight') 218 | plt.show() 219 | plt.clf() 220 | 221 | 222 | def update(self, algorithm, agent, test_fcns, cond_idx_list, pol_sample_lists, traj_sample_lists, iteration=15): 223 | 224 | if self._first_update: 225 | #self._output_column_titles(algorithm) 226 | self._first_update = False 227 | #costs = [np.mean(np.sum(algorithm.prev[m].cs, axis=1)) for m in range(algorithm.M)] 228 | pol_costs = self._update_iteration_data(algorithm, test_fcns, cond_idx_list, pol_sample_lists, traj_sample_lists, iteration=iteration) 229 | 230 | return pol_costs 231 | 232 | def append_output_text(self, text, iteration=15, fcn_name=""): 233 | log_file = '%s_iteration%s_%s.json' % (self._log_filename, iteration, fcn_name) 234 | with open(log_file, 'a') as f: 235 | #f.write('%s \n' % (str(text))) 236 | json.dump(text, f) 237 | f.write('\n') 238 | #print(text) 239 | 240 | -------------------------------------------------------------------------------- /source/gps/utility/general_utils.py: -------------------------------------------------------------------------------- 1 | """ This file defines general utility functions and classes. """ 2 | import numpy as np 3 | 4 | class BundleType(object): 5 | """ 6 | This class bundles many fields, similar to a record or a mutable 7 | namedtuple. 8 | """ 9 | def __init__(self, variables): 10 | for var, val in variables.items(): 11 | object.__setattr__(self, var, val) 12 | 13 | # Freeze fields so new ones cannot be set. 14 | def __setattr__(self, key, value): 15 | if not hasattr(self, key): 16 | raise AttributeError("%r has no attribute %s" % (self, key)) 17 | object.__setattr__(self, key, value) 18 | 19 | def check_shape(value, expected_shape, name=''): 20 | """ 21 | Throws a ValueError if value.shape != expected_shape. 22 | Args: 23 | value: Matrix to shape check. 24 | expected_shape: A tuple or list of integers. 25 | name: An optional name to add to the exception message. 26 | """ 27 | if value.shape != tuple(expected_shape): 28 | raise ValueError('Shape mismatch %s: Expected %s, got %s' % 29 | (name, str(expected_shape), str(value.shape))) 30 | 31 | 32 | def finite_differences(func, inputs, func_output_shape=(), epsilon=1e-5): 33 | """ 34 | Computes gradients via finite differences. 35 | derivative = (func(x+epsilon) - func(x-epsilon)) / (2*epsilon) 36 | Args: 37 | func: Function to compute gradient of. Inputs and outputs can be 38 | arbitrary dimension. 39 | inputs: Vector value to compute gradient at. 40 | func_output_shape: Shape of the output of func. Default is 41 | empty-tuple, which works for scalar-valued functions. 42 | epsilon: Difference to use for computing gradient. 43 | Returns: 44 | Gradient vector of each dimension of func with respect to each 45 | dimension of input. 46 | """ 47 | gradient = np.zeros(inputs.shape+func_output_shape) 48 | for idx, _ in np.ndenumerate(inputs): 49 | test_input = np.copy(inputs) 50 | test_input[idx] += epsilon 51 | obj_d1 = func(test_input) 52 | assert obj_d1.shape == func_output_shape 53 | test_input = np.copy(inputs) 54 | test_input[idx] -= epsilon 55 | obj_d2 = func(test_input) 56 | assert obj_d2.shape == func_output_shape 57 | diff = (obj_d1 - obj_d2) / (2 * epsilon) 58 | gradient[idx] += diff 59 | return gradient 60 | 61 | 62 | def approx_equal(a, b, threshold=1e-5): 63 | """ 64 | Return whether two numbers are equal within an absolute threshold. 65 | Returns: 66 | True if a and b are equal within threshold. 67 | """ 68 | return np.all(np.abs(a - b) < threshold) 69 | 70 | 71 | def extract_condition(hyperparams, m): 72 | """ 73 | Pull the relevant hyperparameters corresponding to the specified 74 | condition, and return a new hyperparameter dictionary. 75 | """ 76 | return {var: val[m] if isinstance(val, list) else val 77 | for var, val in hyperparams.items()} 78 | 79 | def replicate_var(val, num_conds): 80 | return val if isinstance(val, list) else [val] * num_conds 81 | -------------------------------------------------------------------------------- /source/gps/utility/gmm.py: -------------------------------------------------------------------------------- 1 | """ This file defines a Gaussian mixture model class. """ 2 | import logging 3 | 4 | import numpy as np 5 | import scipy.linalg 6 | 7 | 8 | LOGGER = logging.getLogger(__name__) 9 | 10 | 11 | def logsum(vec, axis=0, keepdims=True): 12 | maxv = np.max(vec, axis=axis, keepdims=keepdims) 13 | maxv[maxv == -float('inf')] = 0 14 | return np.log(np.sum(np.exp(vec-maxv), axis=axis, keepdims=keepdims)) + maxv 15 | 16 | 17 | class GMM(object): 18 | """ Gaussian Mixture Model. """ 19 | def __init__(self, init_sequential=False, eigreg=False, warmstart=True): 20 | self.init_sequential = init_sequential 21 | self.eigreg = eigreg 22 | self.warmstart = warmstart 23 | self.sigma = None 24 | 25 | def inference(self, pts): 26 | """ 27 | Evaluate dynamics prior. 28 | Args: 29 | pts: A N x D array of points. 30 | """ 31 | # Compute posterior cluster weights. 32 | logwts = self.clusterwts(pts) 33 | 34 | # Compute posterior mean and covariance. 35 | mu0, Phi = self.moments(logwts) 36 | 37 | # Set hyperparameters. 38 | m = self.N 39 | n0 = m - 2 - mu0.shape[0] 40 | 41 | # Normalize. 42 | m = float(m) / self.N 43 | n0 = float(n0) / self.N 44 | return mu0, Phi, m, n0 45 | 46 | def estep(self, data): 47 | """ 48 | Compute log observation probabilities under GMM. 49 | Args: 50 | data: A N x D array of points. 51 | Returns: 52 | logobs: A N x K array of log probabilities (for each point 53 | on each cluster). 54 | """ 55 | # Constants. 56 | K = self.sigma.shape[0] 57 | Di = data.shape[1] 58 | N = data.shape[0] 59 | 60 | # Compute probabilities. 61 | data = data.T 62 | mu = self.mu[:, 0:Di].T 63 | mu_expand = np.expand_dims(np.expand_dims(mu, axis=1), axis=1) 64 | assert mu_expand.shape == (Di, 1, 1, K) 65 | # Calculate for each point distance to each cluster. 66 | data_expand = np.tile(data, [K, 1, 1, 1]).transpose([2, 3, 1, 0]) 67 | diff = data_expand - np.tile(mu_expand, [1, N, 1, 1]) 68 | assert diff.shape == (Di, N, 1, K) 69 | Pdiff = np.zeros_like(diff) 70 | cconst = np.zeros((1, 1, 1, K)) 71 | 72 | for i in range(K): 73 | U = scipy.linalg.cholesky(self.sigma[i, :Di, :Di], 74 | check_finite=False) 75 | Pdiff[:, :, 0, i] = scipy.linalg.solve_triangular( 76 | U, scipy.linalg.solve_triangular( 77 | U.T, diff[:, :, 0, i], lower=True, check_finite=False 78 | ), check_finite=False 79 | ) 80 | cconst[0, 0, 0, i] = -np.sum(np.log(np.diag(U))) - 0.5 * Di * \ 81 | np.log(2 * np.pi) 82 | 83 | logobs = -0.5 * np.sum(diff * Pdiff, axis=0, keepdims=True) + cconst 84 | assert logobs.shape == (1, N, 1, K) 85 | logobs = logobs[0, :, 0, :] + self.logmass.T 86 | return logobs 87 | 88 | def moments(self, logwts): 89 | """ 90 | Compute the moments of the cluster mixture with logwts. 91 | Args: 92 | logwts: A K x 1 array of log cluster probabilities. 93 | Returns: 94 | mu: A (D,) mean vector. 95 | sigma: A D x D covariance matrix. 96 | """ 97 | # Exponentiate. 98 | wts = np.exp(logwts) 99 | 100 | # Compute overall mean. 101 | mu = np.sum(self.mu * wts, axis=0) 102 | 103 | # Compute overall covariance. 104 | # For some reason this version works way better than the "right" 105 | # one... could we be computing xxt wrong? 106 | diff = self.mu - np.expand_dims(mu, axis=0) 107 | diff_expand = np.expand_dims(diff, axis=1) * \ 108 | np.expand_dims(diff, axis=2) 109 | wts_expand = np.expand_dims(wts, axis=2) 110 | sigma = np.sum((self.sigma + diff_expand) * wts_expand, axis=0) 111 | return mu, sigma 112 | 113 | def clusterwts(self, data): 114 | """ 115 | Compute cluster weights for specified points under GMM. 116 | Args: 117 | data: An N x D array of points 118 | Returns: 119 | A K x 1 array of average cluster log probabilities. 120 | """ 121 | # Compute probability of each point under each cluster. 122 | logobs = self.estep(data) 123 | 124 | # Renormalize to get cluster weights. 125 | logwts = logobs - logsum(logobs, axis=1) 126 | 127 | # Average the cluster probabilities. 128 | logwts = logsum(logwts, axis=0) - np.log(data.shape[0]) 129 | return logwts.T 130 | 131 | def update(self, data, K, max_iterations=100): 132 | """ 133 | Run EM to update clusters. 134 | Args: 135 | data: An N x D data matrix, where N = number of data points. 136 | K: Number of clusters to use. 137 | """ 138 | # Constants. 139 | N = data.shape[0] 140 | Do = data.shape[1] 141 | 142 | LOGGER.debug('Fitting GMM with %d clusters on %d points', K, N) 143 | 144 | if (not self.warmstart or self.sigma is None or 145 | K != self.sigma.shape[0]): 146 | # Initialization. 147 | LOGGER.debug('Initializing GMM.') 148 | self.sigma = np.zeros((K, Do, Do)) 149 | self.mu = np.zeros((K, Do)) 150 | self.logmass = np.log(1.0 / K) * np.ones((K, 1)) 151 | self.mass = (1.0 / K) * np.ones((K, 1)) 152 | self.N = data.shape[0] 153 | N = self.N 154 | 155 | # Set initial cluster indices. 156 | if not self.init_sequential: 157 | cidx = np.random.randint(0, K, size=(1, N)) 158 | else: 159 | raise NotImplementedError() 160 | 161 | # Initialize. 162 | for i in range(K): 163 | cluster_idx = (cidx == i)[0] 164 | mu = np.mean(data[cluster_idx, :], axis=0) 165 | diff = (data[cluster_idx, :] - mu).T 166 | sigma = (1.0 / cluster_idx.shape[0]) * (diff.dot(diff.T)) 167 | self.mu[i, :] = mu 168 | self.sigma[i, :, :] = sigma + np.eye(Do) * 2e-6 169 | 170 | prevll = -float('inf') 171 | for itr in range(max_iterations): 172 | # E-step: compute cluster probabilities. 173 | logobs = self.estep(data) 174 | 175 | # Compute log-likelihood. 176 | ll = np.sum(logsum(logobs, axis=1)) 177 | LOGGER.debug('GMM itr %d/%d. Log likelihood: %f', 178 | itr, max_iterations, ll) 179 | if ll < prevll: 180 | LOGGER.debug('Log-likelihood decreased! Ending on itr=%d/%d', 181 | itr, max_iterations) 182 | break 183 | if np.abs(ll-prevll) < 1e-5*prevll: 184 | LOGGER.debug('GMM converged on itr=%d/%d', 185 | itr, max_iterations) 186 | break 187 | prevll = ll 188 | 189 | # Renormalize to get cluster weights. 190 | logw = logobs - logsum(logobs, axis=1) 191 | assert logw.shape == (N, K) 192 | 193 | # Renormalize again to get weights for refitting clusters. 194 | logwn = logw - logsum(logw, axis=0) 195 | assert logwn.shape == (N, K) 196 | w = np.exp(logwn) 197 | 198 | # M-step: update clusters. 199 | # Fit cluster mass. 200 | self.logmass = logsum(logw, axis=0).T 201 | self.logmass = self.logmass - logsum(self.logmass, axis=0) 202 | assert self.logmass.shape == (K, 1) 203 | self.mass = np.exp(self.logmass) 204 | # Reboot small clusters. 205 | w[:, (self.mass < (1.0 / K) * 1e-4)[:, 0]] = 1.0 / N 206 | # Fit cluster means. 207 | w_expand = np.expand_dims(w, axis=2) 208 | data_expand = np.expand_dims(data, axis=1) 209 | self.mu = np.sum(w_expand * data_expand, axis=0) 210 | # Fit covariances. 211 | wdata = data_expand * np.sqrt(w_expand) 212 | assert wdata.shape == (N, K, Do) 213 | for i in range(K): 214 | # Compute weighted outer product. 215 | XX = wdata[:, i, :].T.dot(wdata[:, i, :]) 216 | mu = self.mu[i, :] 217 | self.sigma[i, :, :] = XX - np.outer(mu, mu) 218 | 219 | if self.eigreg: # Use eigenvalue regularization. 220 | raise NotImplementedError() 221 | else: # Use quick and dirty regularization. 222 | sigma = self.sigma[i, :, :] 223 | self.sigma[i, :, :] = 0.5 * (sigma + sigma.T) + \ 224 | 1e-6 * np.eye(Do) 225 | --------------------------------------------------------------------------------