├── .gitignore
├── .idea
├── .gitignore
├── LTO-CMA.iml
├── inspectionProfiles
│ └── profiles_settings.xml
├── misc.xml
├── modules.xml
└── vcs.xml
├── LICENSE
├── README.md
├── data
├── PPSN_LTO_Data.zip
├── PPSN_LTO_policies.zip
├── README_Data.txt
└── README_Policies.txt
├── examples
├── 10BBOB
│ ├── GallaghersGaussian21hi_LTO.json
│ ├── data_files
│ │ ├── checkpoint
│ │ ├── policy_itr_14.pkl
│ │ ├── policy_itr_14_tf_data.ckpt.data-00000-of-00001
│ │ ├── policy_itr_14_tf_data.ckpt.index
│ │ └── policy_itr_14_tf_data.ckpt.meta
│ └── hyperparams.py
└── BentCigar
│ └── hyperparams.py
├── plots
├── Plot_ObjectiveValue_AttractiveSector.pdf
├── Plot_ObjectiveValue_BentCigar.pdf
├── Plot_ObjectiveValue_BuecheRastrigin.pdf
├── Plot_ObjectiveValue_CompositeGR.pdf
├── Plot_ObjectiveValue_DifferentPowers.pdf
├── Plot_ObjectiveValue_GG101me.pdf
├── Plot_ObjectiveValue_GG21hi.pdf
├── Plot_ObjectiveValue_LinearSlope.pdf
├── Plot_ObjectiveValue_LunacekBiRastrigin.pdf
├── Plot_ObjectiveValue_RosenbrockRotated.pdf
├── Plot_ObjectiveValue_SchaffersIllConditioned.pdf
├── Plot_ObjectiveValue_SharpRidge.pdf
├── Plot_ObjectiveValue_StepEllipsoidal.pdf
├── Plot_StepSize_AttractiveSector.pdf
├── Plot_StepSize_BentCigar.pdf
├── Plot_StepSize_BuecheRastrigin.pdf
├── Plot_StepSize_CompositeGR.pdf
├── Plot_StepSize_DifferentPowers.pdf
├── Plot_StepSize_GG101me.pdf
├── Plot_StepSize_GG21hi.pdf
├── Plot_StepSize_LinearSlope.pdf
├── Plot_StepSize_LunacekBiRastrigin.pdf
├── Plot_StepSize_RosenbrockRotated.pdf
├── Plot_StepSize_SchaffersIllConditioned.pdf
├── Plot_StepSize_SharpRidge.pdf
└── Plot_StepSize_StepEllipsoidal.pdf
├── requirements.txt
├── scripts
└── plot_performance.py
└── source
└── gps
├── LICENSE.md
├── README.md
├── __init__.py
├── agent
├── __init__.py
├── agent.py
├── config.py
└── lto
│ ├── __init__.py
│ ├── agent_cmaes.py
│ ├── cmaes_world.py
│ └── fcn.py
├── algorithm
├── __init__.py
├── algorithm.py
├── algorithm_utils.py
├── config.py
├── cost
│ ├── __init__.py
│ ├── config.py
│ ├── cost.py
│ └── cost_utils.py
├── dynamics
│ ├── __init__.py
│ ├── config.py
│ ├── dynamics_lr_prior.py
│ └── dynamics_prior_gmm.py
├── policy
│ ├── __init__.py
│ ├── config.py
│ ├── constant_policy.py
│ ├── csa_policy.py
│ ├── lin_gauss_init.py
│ ├── lin_gauss_policy.py
│ ├── policy.py
│ ├── policy_prior_gmm.py
│ └── tf_policy.py
├── policy_opt
│ ├── __init__.py
│ ├── config.py
│ ├── lto_model.py
│ ├── policy_opt.py
│ └── tf_utils.py
└── traj_opt
│ ├── __init__.py
│ ├── config.py
│ ├── traj_opt.py
│ └── traj_opt_utils.py
├── gps_test.py
├── gps_train.py
├── proto
├── __init__.py
└── gps_pb2.py
├── sample
├── __init__.py
├── sample.py
└── sample_list.py
└── utility
├── __init__.py
├── display.py
├── general_utils.py
└── gmm.py
/.gitignore:
--------------------------------------------------------------------------------
1 | # Byte-compiled / optimized / DLL files
2 | __pycache__/
3 | *.py[cod]
4 | *$py.class
5 |
6 | # C extensions
7 | *.so
8 |
9 | # Distribution / packaging
10 | .Python
11 | build/
12 | develop-eggs/
13 | dist/
14 | downloads/
15 | eggs/
16 | .eggs/
17 | lib/
18 | lib64/
19 | parts/
20 | sdist/
21 | var/
22 | wheels/
23 | pip-wheel-metadata/
24 | share/python-wheels/
25 | *.egg-info/
26 | .installed.cfg
27 | *.egg
28 | MANIFEST
29 |
30 | # PyInstaller
31 | # Usually these files are written by a python script from a template
32 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
33 | *.manifest
34 | *.spec
35 |
36 | # Installer logs
37 | pip-log.txt
38 | pip-delete-this-directory.txt
39 |
40 | # Unit test / coverage reports
41 | htmlcov/
42 | .tox/
43 | .nox/
44 | .coverage
45 | .coverage.*
46 | .cache
47 | nosetests.xml
48 | coverage.xml
49 | *.cover
50 | *.py,cover
51 | .hypothesis/
52 | .pytest_cache/
53 |
54 | # Translations
55 | *.mo
56 | *.pot
57 |
58 | # Django stuff:
59 | *.log
60 | local_settings.py
61 | db.sqlite3
62 | db.sqlite3-journal
63 |
64 | # Flask stuff:
65 | instance/
66 | .webassets-cache
67 |
68 | # Scrapy stuff:
69 | .scrapy
70 |
71 | # Sphinx documentation
72 | docs/_build/
73 |
74 | # PyBuilder
75 | target/
76 |
77 | # Jupyter Notebook
78 | .ipynb_checkpoints
79 |
80 | # IPython
81 | profile_default/
82 | ipython_config.py
83 |
84 | # pyenv
85 | .python-version
86 |
87 | # pipenv
88 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
89 | # However, in case of collaboration, if having platform-specific dependencies or dependencies
90 | # having no cross-platform support, pipenv may install dependencies that don't work, or not
91 | # install all needed dependencies.
92 | #Pipfile.lock
93 |
94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
95 | __pypackages__/
96 |
97 | # Celery stuff
98 | celerybeat-schedule
99 | celerybeat.pid
100 |
101 | # SageMath parsed files
102 | *.sage.py
103 |
104 | # Environments
105 | .env
106 | .venv
107 | env/
108 | venv/
109 | ENV/
110 | env.bak/
111 | venv.bak/
112 |
113 | # Spyder project settings
114 | .spyderproject
115 | .spyproject
116 |
117 | # Rope project settings
118 | .ropeproject
119 |
120 | # mkdocs documentation
121 | /site
122 |
123 | # mypy
124 | .mypy_cache/
125 | .dmypy.json
126 | dmypy.json
127 |
128 | # Pyre type checker
129 | .pyre/
130 |
--------------------------------------------------------------------------------
/.idea/.gitignore:
--------------------------------------------------------------------------------
1 | # Default ignored files
2 | /workspace.xml
3 |
--------------------------------------------------------------------------------
/.idea/LTO-CMA.iml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
--------------------------------------------------------------------------------
/.idea/inspectionProfiles/profiles_settings.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
--------------------------------------------------------------------------------
/.idea/misc.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
--------------------------------------------------------------------------------
/.idea/modules.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
--------------------------------------------------------------------------------
/.idea/vcs.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | Apache License
2 | Version 2.0, January 2004
3 | http://www.apache.org/licenses/
4 |
5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
6 |
7 | 1. Definitions.
8 |
9 | "License" shall mean the terms and conditions for use, reproduction,
10 | and distribution as defined by Sections 1 through 9 of this document.
11 |
12 | "Licensor" shall mean the copyright owner or entity authorized by
13 | the copyright owner that is granting the License.
14 |
15 | "Legal Entity" shall mean the union of the acting entity and all
16 | other entities that control, are controlled by, or are under common
17 | control with that entity. For the purposes of this definition,
18 | "control" means (i) the power, direct or indirect, to cause the
19 | direction or management of such entity, whether by contract or
20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the
21 | outstanding shares, or (iii) beneficial ownership of such entity.
22 |
23 | "You" (or "Your") shall mean an individual or Legal Entity
24 | exercising permissions granted by this License.
25 |
26 | "Source" form shall mean the preferred form for making modifications,
27 | including but not limited to software source code, documentation
28 | source, and configuration files.
29 |
30 | "Object" form shall mean any form resulting from mechanical
31 | transformation or translation of a Source form, including but
32 | not limited to compiled object code, generated documentation,
33 | and conversions to other media types.
34 |
35 | "Work" shall mean the work of authorship, whether in Source or
36 | Object form, made available under the License, as indicated by a
37 | copyright notice that is included in or attached to the work
38 | (an example is provided in the Appendix below).
39 |
40 | "Derivative Works" shall mean any work, whether in Source or Object
41 | form, that is based on (or derived from) the Work and for which the
42 | editorial revisions, annotations, elaborations, or other modifications
43 | represent, as a whole, an original work of authorship. For the purposes
44 | of this License, Derivative Works shall not include works that remain
45 | separable from, or merely link (or bind by name) to the interfaces of,
46 | the Work and Derivative Works thereof.
47 |
48 | "Contribution" shall mean any work of authorship, including
49 | the original version of the Work and any modifications or additions
50 | to that Work or Derivative Works thereof, that is intentionally
51 | submitted to Licensor for inclusion in the Work by the copyright owner
52 | or by an individual or Legal Entity authorized to submit on behalf of
53 | the copyright owner. For the purposes of this definition, "submitted"
54 | means any form of electronic, verbal, or written communication sent
55 | to the Licensor or its representatives, including but not limited to
56 | communication on electronic mailing lists, source code control systems,
57 | and issue tracking systems that are managed by, or on behalf of, the
58 | Licensor for the purpose of discussing and improving the Work, but
59 | excluding communication that is conspicuously marked or otherwise
60 | designated in writing by the copyright owner as "Not a Contribution."
61 |
62 | "Contributor" shall mean Licensor and any individual or Legal Entity
63 | on behalf of whom a Contribution has been received by Licensor and
64 | subsequently incorporated within the Work.
65 |
66 | 2. Grant of Copyright License. Subject to the terms and conditions of
67 | this License, each Contributor hereby grants to You a perpetual,
68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
69 | copyright license to reproduce, prepare Derivative Works of,
70 | publicly display, publicly perform, sublicense, and distribute the
71 | Work and such Derivative Works in Source or Object form.
72 |
73 | 3. Grant of Patent License. Subject to the terms and conditions of
74 | this License, each Contributor hereby grants to You a perpetual,
75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
76 | (except as stated in this section) patent license to make, have made,
77 | use, offer to sell, sell, import, and otherwise transfer the Work,
78 | where such license applies only to those patent claims licensable
79 | by such Contributor that are necessarily infringed by their
80 | Contribution(s) alone or by combination of their Contribution(s)
81 | with the Work to which such Contribution(s) was submitted. If You
82 | institute patent litigation against any entity (including a
83 | cross-claim or counterclaim in a lawsuit) alleging that the Work
84 | or a Contribution incorporated within the Work constitutes direct
85 | or contributory patent infringement, then any patent licenses
86 | granted to You under this License for that Work shall terminate
87 | as of the date such litigation is filed.
88 |
89 | 4. Redistribution. You may reproduce and distribute copies of the
90 | Work or Derivative Works thereof in any medium, with or without
91 | modifications, and in Source or Object form, provided that You
92 | meet the following conditions:
93 |
94 | (a) You must give any other recipients of the Work or
95 | Derivative Works a copy of this License; and
96 |
97 | (b) You must cause any modified files to carry prominent notices
98 | stating that You changed the files; and
99 |
100 | (c) You must retain, in the Source form of any Derivative Works
101 | that You distribute, all copyright, patent, trademark, and
102 | attribution notices from the Source form of the Work,
103 | excluding those notices that do not pertain to any part of
104 | the Derivative Works; and
105 |
106 | (d) If the Work includes a "NOTICE" text file as part of its
107 | distribution, then any Derivative Works that You distribute must
108 | include a readable copy of the attribution notices contained
109 | within such NOTICE file, excluding those notices that do not
110 | pertain to any part of the Derivative Works, in at least one
111 | of the following places: within a NOTICE text file distributed
112 | as part of the Derivative Works; within the Source form or
113 | documentation, if provided along with the Derivative Works; or,
114 | within a display generated by the Derivative Works, if and
115 | wherever such third-party notices normally appear. The contents
116 | of the NOTICE file are for informational purposes only and
117 | do not modify the License. You may add Your own attribution
118 | notices within Derivative Works that You distribute, alongside
119 | or as an addendum to the NOTICE text from the Work, provided
120 | that such additional attribution notices cannot be construed
121 | as modifying the License.
122 |
123 | You may add Your own copyright statement to Your modifications and
124 | may provide additional or different license terms and conditions
125 | for use, reproduction, or distribution of Your modifications, or
126 | for any such Derivative Works as a whole, provided Your use,
127 | reproduction, and distribution of the Work otherwise complies with
128 | the conditions stated in this License.
129 |
130 | 5. Submission of Contributions. Unless You explicitly state otherwise,
131 | any Contribution intentionally submitted for inclusion in the Work
132 | by You to the Licensor shall be under the terms and conditions of
133 | this License, without any additional terms or conditions.
134 | Notwithstanding the above, nothing herein shall supersede or modify
135 | the terms of any separate license agreement you may have executed
136 | with Licensor regarding such Contributions.
137 |
138 | 6. Trademarks. This License does not grant permission to use the trade
139 | names, trademarks, service marks, or product names of the Licensor,
140 | except as required for reasonable and customary use in describing the
141 | origin of the Work and reproducing the content of the NOTICE file.
142 |
143 | 7. Disclaimer of Warranty. Unless required by applicable law or
144 | agreed to in writing, Licensor provides the Work (and each
145 | Contributor provides its Contributions) on an "AS IS" BASIS,
146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 | implied, including, without limitation, any warranties or conditions
148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 | PARTICULAR PURPOSE. You are solely responsible for determining the
150 | appropriateness of using or redistributing the Work and assume any
151 | risks associated with Your exercise of permissions under this License.
152 |
153 | 8. Limitation of Liability. In no event and under no legal theory,
154 | whether in tort (including negligence), contract, or otherwise,
155 | unless required by applicable law (such as deliberate and grossly
156 | negligent acts) or agreed to in writing, shall any Contributor be
157 | liable to You for damages, including any direct, indirect, special,
158 | incidental, or consequential damages of any character arising as a
159 | result of this License or out of the use or inability to use the
160 | Work (including but not limited to damages for loss of goodwill,
161 | work stoppage, computer failure or malfunction, or any and all
162 | other commercial damages or losses), even if such Contributor
163 | has been advised of the possibility of such damages.
164 |
165 | 9. Accepting Warranty or Additional Liability. While redistributing
166 | the Work or Derivative Works thereof, You may choose to offer,
167 | and charge a fee for, acceptance of support, warranty, indemnity,
168 | or other liability obligations and/or rights consistent with this
169 | License. However, in accepting such obligations, You may act only
170 | on Your own behalf and on Your sole responsibility, not on behalf
171 | of any other Contributor, and only if You agree to indemnify,
172 | defend, and hold each Contributor harmless for any liability
173 | incurred by, or claims asserted against, such Contributor by reason
174 | of your accepting any such warranty or additional liability.
175 |
176 | END OF TERMS AND CONDITIONS
177 |
178 | APPENDIX: How to apply the Apache License to your work.
179 |
180 | To apply the Apache License to your work, attach the following
181 | boilerplate notice, with the fields enclosed by brackets "[]"
182 | replaced with your own identifying information. (Don't include
183 | the brackets!) The text should be enclosed in the appropriate
184 | comment syntax for the file format. We also recommend that a
185 | file or class name and description of purpose be included on the
186 | same "printed page" as the copyright notice for easier
187 | identification within third-party archives.
188 |
189 | Copyright [yyyy] [name of copyright owner]
190 |
191 | Licensed under the Apache License, Version 2.0 (the "License");
192 | you may not use this file except in compliance with the License.
193 | You may obtain a copy of the License at
194 |
195 | http://www.apache.org/licenses/LICENSE-2.0
196 |
197 | Unless required by applicable law or agreed to in writing, software
198 | distributed under the License is distributed on an "AS IS" BASIS,
199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 | See the License for the specific language governing permissions and
201 | limitations under the License.
202 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # LTO-CMA
2 | Code for the paper "Learning Step-Size Adaptation in CMA-ES"
3 | ## License
4 | Our work is available under Apache-2.0. In order to learn step-size adaptation in CMA-ES we use guided policy search (GPS).
5 | We built upon the GPS version as given by Li and Malik. The original GPS code of Li and Malik can be found at https://www.math.ias.edu/~ke.li/downloads/lto_code.tar.gz
6 |
7 | In a nutshell, we modified the GPS code to be able to continuously sample from the starting teacher. To this end we introduce a sampling rate that determines how often we use new samples generated from the starting policy.
8 |
9 | The original code falls under GPLv3. In *source/gps* we list files that we modifed (thereby fall under GPLv3) and those that are of our creation (i.e. under Apache-2.0)
10 |
11 | ## Experiment Setup
12 | ### Training
13 | - Create experiment folder
14 | - Create file with hyperparameters of the experiment *hyperparams.py* in the experiment folder
15 | - Start learning step-size adaptation by executing the command:
16 | ```
17 | python gps_train.py EXPERIMENT_FOLDER_NAME
18 | ```
19 | - The output of training is the pickled version of the learned policy, saved in the path *EXPERIMENT_FOLDER_NAME/data_files*.
20 | ### Testing
21 | - Add the path to the learned policy in the hyperparameter file *hyperparams.py*
22 | - Start testing the performance of the learned policy on the test set by executing the command:
23 | ```
24 | python gps_test.py EXPERIMENT_FOLDER_NAME
25 | ```
26 | - The output of testing are the files *log_data_test_X.json* for the function name of each condition X of the test set, saved in the experiment folder.
27 | - The output file *test_data_X.json* contains:
28 | - The average objective values from 25 samples of running the learned policy on the test condition X,
29 | - The end objective values of the 25 samples,
30 | - The average step-size for each step of the optimization trajectory from 25 samples, and
31 | - The standard deviation of the objective value and the step-size for each step of the optimization trajectory.
32 | - To plot the results, run the *plot_performance.py* script in the *scripts* folder.
33 | ## Reference
34 | ```
35 | @inproceedings{shala-ppsn20,
36 | author = {G.Shala and A. Biedenkapp and N.Awad and S. Adriaensen and M.Lindauer and F. Hutter},
37 | title = {Learning Step-size Adaptation in CMA-ES},
38 | booktitle = {Proceedings of the Sixteenth International Conference on Parallel Problem Solving from Nature ({PPSN}'20)},
39 | year = {2020},
40 | month = sep,
41 | }
42 | ```
43 |
--------------------------------------------------------------------------------
/data/PPSN_LTO_Data.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/automl/LTO-CMA/6f496999ce26a7f144b42dc98a88833bde4ce39b/data/PPSN_LTO_Data.zip
--------------------------------------------------------------------------------
/data/PPSN_LTO_policies.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/automl/LTO-CMA/6f496999ce26a7f144b42dc98a88833bde4ce39b/data/PPSN_LTO_policies.zip
--------------------------------------------------------------------------------
/data/README_Data.txt:
--------------------------------------------------------------------------------
1 | Data Archive for the paper
2 | "Learning step-size adaptation in CMA-ES"
3 | -CSA_Data (Contains the performance data of running CMA-ES with CSA)
4 | - -CSA_Plots_XD (X={5,10,15,...,60} Contains CSA data for functions of dimensionality 5-60D)
5 | - -CSA_Plots_X (X={50, 100, 150, 200, 250, 500, 1000} Contains CSA data for CMA-ES runs of 50-1000 generations)
6 | -LTO_Data (Contains the saved policies from the runs of CMA-ES with the learned policies for step-size adaptation)
7 | - -Sampling_Rate_Ablation (Contains the performance data of LTO with sampling rate 0-0.9)
8 | - - -Sampling 0.X (X = {0,1,...,9})
9 | - -Train_5-30D_to_35-60D (Contains the train and test performance data of LTO trained on functions of 5-30D, tested on 35-60D)
10 | - -Transfer_Other_Fcns (Contains the performance data of LTO trained on 10 BBOB functions, tested on 12 different BBOB functions)
11 | - -Transfer_Longer_Traj (Contains the performance data of LTO trained on CMA-ES runs of 50 generations, tested on CMA-ES runs of 50-1000 generations)
12 | - -Transfer5-30D_from10D (Contains the performance data of LTO trained on functions of 10D, tested on 5-30D)
--------------------------------------------------------------------------------
/data/README_Policies.txt:
--------------------------------------------------------------------------------
1 | Policy Archive for the paper
2 | "Learning step-size adaptation in CMA-ES"
3 |
4 | -LTO_Data (Contains the saved policies from the runs of CMA-ES with the learned policies for step-size adaptation)
5 | - -Sampling_Rate_Ablation (Contains the saved learned policies with sampling rate 0-0.9)
6 | - - -Sampling 0.X (X = {0,1,...,9})
7 | - -Train_5-30D_to_35-60D (Contains the saved learned policies trained on functions of 5-30D)
8 | - -Transfer_Other_Fcns (Contains the saved learned policies trained on 10 BBOB functions)
9 |
--------------------------------------------------------------------------------
/examples/10BBOB/GallaghersGaussian21hi_LTO.json:
--------------------------------------------------------------------------------
1 | {"Average costs LTO": [112.62379735434348, 111.3391294935969, 109.26043885899631, 103.50465549871282, 94.59429581291653, 84.85220229325782, 77.61487904143523, 73.09672733938768, 68.23321210764803, 64.02529556050979, 61.264454931406405, 57.422301371701494, 55.42630242063979, 54.50174089226569, 53.136916149149755, 51.98158278564496, 51.16316322814002, 50.23521801149499, 49.879771890939466, 49.3406015323143, 48.81505231270593, 48.32021789141222, 48.16056999659033, 47.92214862529059, 47.66926530314748, 47.380307245204804, 47.13012495591299, 46.94405863900299, 46.683562697681026, 46.56380402513933, 46.4175654420398, 46.34038930319992, 46.29502297233834, 46.13500183931653, 46.06886297010712, 45.963060523358806, 45.904999020500235, 45.876167776364106, 45.84578782589061, 45.80835552328669, 45.78785598740914, 45.764379926741185, 45.744466485144265, 45.72109054700329, 45.71120003269648, 45.705404625222585, 45.68477710520333, 45.67598030492183, 45.66006137928655, 45.65477567766216], "End values LTO": [45.58425177849915, 45.59863009863613, 45.603145052739045, 45.61201690273372, 45.6134123141773, 45.61359841515806, 45.61438702954894, 45.6161682253263, 45.61647638193877, 45.61802862912683, 45.6183892188068, 45.61895058239136, 45.61937265202621, 45.61943089687926, 45.62214901997382, 45.622363190080925, 45.62245851643677, 45.623178210192584, 45.623711210241346, 45.62371554636879, 45.625113586144394, 45.62521121969189, 45.62615689529963, 45.62640282783731, 45.62750677605763], "Sigma LTO": [0.41302519076753247, 0.2926393218497182, 0.2977230347253698, 0.29782627594495187, 0.3015109857823612, 0.3254356160911449, 0.32692723299273585, 0.3363416406579729, 0.3437612871011416, 0.37097323391964243, 0.3738293573355639, 0.37632890637587735, 0.38888381867361804, 0.3886229773720293, 0.38878827040435926, 0.4040172978213582, 0.39160998779126466, 0.39509360197929294, 0.3709629475841213, 0.3942064625706532, 0.354022768365397, 0.34676055123771904, 0.33992057101149087, 0.329181679985184, 0.3261333482409759, 0.31179579282714975, 0.28437874762764515, 0.306736331453086, 0.2736518937732548, 0.286509352791504, 0.26251309283898505, 0.2717234340968551, 0.24336039441818147, 0.24486909156746256, 0.23267127409018246, 0.21863449950033412, 0.22589433230159012, 0.2085146743364903, 0.21118013421705886, 0.19435015563259692, 0.19438686756396586, 0.18964455108396447, 0.1810123647408295, 0.18250413453802442, 0.18364313730240783, 0.17302415340718397, 0.17068904530811732, 0.1656929317213275, 0.16353122194234754, 0.16907236761351602], "Std costs LTO": [2.754740698754516, 2.409498281178775, 2.8079918273574247, 3.5646825562559723, 5.461435381212568, 8.13005565272067, 10.50886796545814, 12.378945849280134, 13.435664393153894, 14.386911554310375, 14.64554780922917, 15.064299560675199, 14.717408416587924, 14.323257039815257, 13.862713382625122, 13.221380523371106, 12.401666557469156, 11.58956449082783, 10.630010909239266, 9.52415147790265, 8.737926049247628, 7.966438436815966, 7.371977933270448, 6.835523964887072, 6.382606539180567, 6.048732647100209, 5.844663712137482, 5.714337356392995, 5.599890211229416, 5.552298819931983, 5.458436150543536, 5.406659014150698, 5.336915428966987, 5.265005839530419, 5.154414877920218, 5.050979665946451, 5.022267130624553, 4.9615487137075185, 4.891022328836538, 4.823307523248715, 4.777225833159941, 4.73513980295079, 4.703012572471365, 4.664328930351546, 4.636839174564745, 4.607799659624398, 4.569735913825095, 4.527669534979869, 4.500262686345359, 4.469846732288544], "Std Sigma LTO": [0.17923269515976697, 0.16356070517668442, 0.15718907435209512, 0.16778215946723338, 0.16821275422144688, 0.1879636627426515, 0.19894398911597808, 0.2092583706577774, 0.2196352169061549, 0.24238747987750348, 0.22864439472127326, 0.23156663011726647, 0.23033679270935198, 0.2295729406747637, 0.23017083420511353, 0.22164800911681012, 0.20696349039151168, 0.2007510177412861, 0.1814064055629177, 0.19515969632033472, 0.15740111398817108, 0.15068756117609325, 0.14634179931948976, 0.1357868290346, 0.1365103381027344, 0.13184222737234677, 0.10729688937425973, 0.1364989829742853, 0.10514342823455317, 0.1252798074542987, 0.10945311991717543, 0.12001588001012473, 0.09494951180325842, 0.10227374132424506, 0.08737016973389826, 0.07133715021518652, 0.09472027108427962, 0.0694132887668044, 0.07057152804376877, 0.057362791591105466, 0.061161598919881176, 0.05667473735617357, 0.0571830224310085, 0.06051263270568752, 0.06262075139748807, 0.05117954292315268, 0.04495258801748381, 0.04116835112442848, 0.04207894035101424, 0.059761939590193006]}
--------------------------------------------------------------------------------
/examples/10BBOB/data_files/checkpoint:
--------------------------------------------------------------------------------
1 | model_checkpoint_path: "policy_itr_00_tf_data.ckpt"
2 | all_model_checkpoint_paths: "policy_itr_00_tf_data.ckpt"
3 |
--------------------------------------------------------------------------------
/examples/10BBOB/data_files/policy_itr_14.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/automl/LTO-CMA/6f496999ce26a7f144b42dc98a88833bde4ce39b/examples/10BBOB/data_files/policy_itr_14.pkl
--------------------------------------------------------------------------------
/examples/10BBOB/data_files/policy_itr_14_tf_data.ckpt.data-00000-of-00001:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/automl/LTO-CMA/6f496999ce26a7f144b42dc98a88833bde4ce39b/examples/10BBOB/data_files/policy_itr_14_tf_data.ckpt.data-00000-of-00001
--------------------------------------------------------------------------------
/examples/10BBOB/data_files/policy_itr_14_tf_data.ckpt.index:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/automl/LTO-CMA/6f496999ce26a7f144b42dc98a88833bde4ce39b/examples/10BBOB/data_files/policy_itr_14_tf_data.ckpt.index
--------------------------------------------------------------------------------
/examples/10BBOB/data_files/policy_itr_14_tf_data.ckpt.meta:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/automl/LTO-CMA/6f496999ce26a7f144b42dc98a88833bde4ce39b/examples/10BBOB/data_files/policy_itr_14_tf_data.ckpt.meta
--------------------------------------------------------------------------------
/examples/10BBOB/hyperparams.py:
--------------------------------------------------------------------------------
1 | import tensorflow as tf
2 |
3 | import os.path
4 | from datetime import datetime
5 | import numpy as np
6 | import cma
7 | from cma import bbobbenchmarks as bn
8 | import gps
9 | from gps import __file__ as gps_filepath
10 | from gps.agent.lto.agent_cmaes import AgentCMAES
11 | from gps.agent.lto.cmaes_world import CMAESWorld
12 | from gps.algorithm.algorithm import Algorithm
13 | from gps.algorithm.cost.cost import Cost
14 | from gps.algorithm.dynamics.dynamics_lr_prior import DynamicsLRPrior
15 | from gps.algorithm.dynamics.dynamics_prior_gmm import DynamicsPriorGMM
16 | from gps.algorithm.policy.policy_prior_gmm import PolicyPriorGMM
17 | from gps.algorithm.traj_opt.traj_opt import TrajOpt
18 | from gps.algorithm.policy_opt.policy_opt import PolicyOpt
19 | from gps.algorithm.policy_opt.lto_model import fully_connected_tf_network
20 | from gps.algorithm.policy.lin_gauss_init import init_cmaes_controller
21 | from gps.proto.gps_pb2 import CUR_LOC, PAST_OBJ_VAL_DELTAS, CUR_SIGMA, CUR_PS, PAST_LOC_DELTAS,PAST_SIGMA, ACTION
22 | from gps.algorithm.cost.cost_utils import RAMP_CONSTANT
23 |
24 | try:
25 | import cPickle as pickle
26 | except:
27 | import pickle
28 | import copy
29 |
30 |
31 |
32 | session = tf.Session()
33 | history_len = 40
34 |
35 | TRAIN = True
36 |
37 | input_dim = 10
38 | num_inits_per_fcn = 1
39 | init_locs = []
40 | if TRAIN:
41 | num_fcns = 100
42 | train_fcns = range(num_fcns)
43 | test_fcns = range(num_fcns-10, num_fcns)
44 | fcn_ids = [12, 11, 2, 23, 15, 8, 17, 20, 1, 16]
45 | fcn_names = ["BentCigar", "Discus", "Ellipsoid", "Katsuura", "Rastrigin", "Rosenbrock", "Schaffers", "Schwefel", "Sphere", "Weierstrass"]
46 | init_sigma_test = [1.28, 0.38, 1.54, 1.18, 0.1, 1.66, 0.33, 0.1, 1.63, 0.1]
47 | #initialize the initial locations of the optimization trajectories
48 | init_locs.extend(list(np.random.randn(num_fcns-len(test_fcns), input_dim)))
49 | #initialize the initial sigma(step size) values
50 | init_sigmas = list(np.random.rand(num_fcns-len(test_fcns)))
51 | init_sigmas.extend(init_sigma_test)
52 | #append the initial locations of the conditions in the test set
53 | for i in test_fcns:
54 | init_locs.append([0]*input_dim)
55 |
56 | else:
57 | num_fcns = 12
58 | # We don't do any training so we evaluate on all the conditions in the 'training set'
59 | train_fcns = range(num_fcns)
60 | test_fcns = train_fcns
61 | fcn_ids = [6, 4, 19, 14, 5, 13, 7, 9, 18, 24, 21, 22]
62 | fcn_names = ["AttractiveSector", "BuecheRastrigin", "CompositeGR", "DifferentPowers", "LinearSlope",
63 | "SharpRidge", "StepEllipsoidal", "RosenbrockRotated", "SchaffersIllConditioned",
64 | "LunacekBiR", "GG101me", "GG21hi"]
65 | init_sigmas = [0.5]*len(test_fcns)
66 | for i in test_fcns:
67 | init_locs.append([0]*input_dim)
68 |
69 |
70 | cur_dir = os.path.dirname(os.path.abspath(__file__))
71 |
72 |
73 | fcn_objs = []
74 | fcns = []
75 | for i in range(num_fcns//len(fcn_ids)):
76 | #instantiate BBOB functions based on their ID
77 | for i in fcn_ids:
78 | fcn_objs.append(bn.instantiate(i)[0])
79 |
80 | for i,function in enumerate(fcn_objs):
81 | fcns.append({'fcn_obj': function, 'dim': input_dim, 'init_loc': list(init_locs[i]), 'init_sigma': init_sigmas[i]})
82 |
83 | SENSOR_DIMS = {
84 | PAST_OBJ_VAL_DELTAS: history_len,
85 | CUR_PS: 1,
86 | CUR_SIGMA : 1,
87 | ACTION: 1,
88 | PAST_SIGMA: history_len
89 | }
90 |
91 | BASE_DIR = '/'.join(str.split(gps_filepath, '/')[:-2])
92 | EXP_DIR = BASE_DIR + '/../examples/10BBOB' + '/'
93 |
94 |
95 | common = {
96 | 'experiment_name': 'CMA_' + \
97 | datetime.strftime(datetime.now(), '%m-%d-%y_%H-%M'),
98 | 'experiment_dir': EXP_DIR,
99 | 'data_files_dir': EXP_DIR + 'data_files/',
100 | 'plot_filename': EXP_DIR + 'plot',
101 | 'log_filename': EXP_DIR + 'log_data',
102 | 'conditions': num_fcns,
103 | 'train_conditions': train_fcns,
104 | 'test_conditions': test_fcns,
105 | 'test_functions': fcn_names
106 | }
107 |
108 | if not os.path.exists(common['data_files_dir']):
109 | os.makedirs(common['data_files_dir'])
110 |
111 | agent = {
112 | 'type': AgentCMAES,
113 | 'world' : CMAESWorld,
114 | 'init_sigma': 0.3,
115 | 'popsize': 10,
116 | 'n_min':10,
117 | 'max_nfe': 200000,
118 | 'substeps': 1,
119 | 'conditions': common['conditions'],
120 | 'dt': 0.05,
121 | 'T': 50,
122 | 'sensor_dims': SENSOR_DIMS,
123 | 'state_include': [PAST_OBJ_VAL_DELTAS, CUR_SIGMA, CUR_PS, PAST_SIGMA],
124 | 'obs_include': [PAST_OBJ_VAL_DELTAS, CUR_PS, PAST_SIGMA, CUR_SIGMA],
125 | 'history_len': history_len,
126 | 'fcns': fcns
127 | }
128 |
129 | algorithm = {
130 | 'type': Algorithm,
131 | 'conditions': common['conditions'],
132 | 'train_conditions': train_fcns,
133 | 'test_conditions': test_fcns,
134 | 'test_functions': fcn_names,
135 | 'iterations': 15, # must be > 1
136 | 'inner_iterations': 4,
137 | 'policy_dual_rate': 0.2,
138 | 'init_pol_wt': 0.01,
139 | 'ent_reg_schedule': 0.0,
140 | 'fixed_lg_step': 3,
141 | 'kl_step': 0.2,
142 | 'min_step_mult': 0.01,
143 | 'max_step_mult': 10.0,
144 | 'sample_decrease_var': 0.05,
145 | 'sample_increase_var': 0.1,
146 | 'policy_sample_mode': 'replace',
147 | 'exp_step_lower': 2,
148 | 'exp_step_upper': 2
149 | }
150 |
151 | algorithm['init_traj_distr'] = {
152 | 'type': init_cmaes_controller,
153 | 'init_var': 0.01,
154 | 'dt': agent['dt'],
155 | 'T': agent['T']
156 | }
157 |
158 | algorithm['cost'] = {
159 | 'type': Cost,
160 | 'ramp_option': RAMP_CONSTANT,
161 | 'wp_final_multiplier': 1.0,
162 | 'weight': 1.0,
163 | }
164 |
165 | algorithm['dynamics'] = {
166 | 'type': DynamicsLRPrior,
167 | 'regularization': 1e-3, # Increase this if Qtt is not PD during DGD
168 | 'clipping_thresh': None,
169 | 'prior': {
170 | 'type': DynamicsPriorGMM,
171 | 'max_clusters': 20,
172 | 'min_samples_per_cluster': 20,
173 | 'max_samples': 20,
174 | 'strength': 1.0 # How much weight to give to prior relative to samples
175 | }
176 | }
177 |
178 | algorithm['traj_opt'] = {
179 | 'type': TrajOpt,
180 | }
181 |
182 | algorithm['policy_opt'] = {
183 | 'type': PolicyOpt,
184 | 'network_model': fully_connected_tf_network,
185 | 'iterations': 20000,
186 | 'init_var': 0.01,
187 | 'batch_size': 25,
188 | 'solver_type': 'adam',
189 | 'lr': 0.0001,
190 | 'lr_policy': 'fixed',
191 | 'momentum': 0.9,
192 | 'weight_decay': 0.005,
193 | 'use_gpu': 0,
194 | 'weights_file_prefix': EXP_DIR + 'policy',
195 | 'network_params': {
196 | 'obs_include': agent['obs_include'],
197 | 'sensor_dims': agent['sensor_dims'],
198 | 'dim_hidden': [50, 50]
199 | }
200 | }
201 |
202 | algorithm['policy_prior'] = {
203 | 'type': PolicyPriorGMM,
204 | 'max_clusters': 20,
205 | 'min_samples_per_cluster': 20,
206 | 'max_samples': 20,
207 | 'strength': 1.0,
208 | 'clipping_thresh': None,
209 | 'init_regularization': 1e-3,
210 | 'subsequent_regularization': 1e-3
211 | }
212 |
213 | config = {
214 | 'iterations': algorithm['iterations'],
215 | 'num_samples': 25,
216 | 'common': common,
217 | 'agent': agent,
218 | 'algorithm': algorithm,
219 | 'train_conditions': train_fcns,
220 | 'test_conditions': test_fcns,
221 | 'test_functions': fcn_names,
222 | 'policy_path': EXP_DIR + 'data_files/policy_itr_14.pkl'
223 | }
224 |
225 |
--------------------------------------------------------------------------------
/examples/BentCigar/hyperparams.py:
--------------------------------------------------------------------------------
1 | import tensorflow as tf
2 |
3 | import os.path
4 | from datetime import datetime
5 | import numpy as np
6 | import cma
7 | from cma import bbobbenchmarks as bn
8 | import gps
9 | from gps import __file__ as gps_filepath
10 | from gps.agent.lto.agent_cmaes import AgentCMAES
11 | from gps.agent.lto.cmaes_world import CMAESWorld
12 | from gps.algorithm.algorithm import Algorithm
13 | from gps.algorithm.cost.cost import Cost
14 | from gps.algorithm.dynamics.dynamics_lr_prior import DynamicsLRPrior
15 | from gps.algorithm.dynamics.dynamics_prior_gmm import DynamicsPriorGMM
16 | from gps.algorithm.policy.policy_prior_gmm import PolicyPriorGMM
17 | from gps.algorithm.traj_opt.traj_opt import TrajOpt
18 | from gps.algorithm.policy_opt.policy_opt import PolicyOpt
19 | from gps.algorithm.policy_opt.lto_model import fully_connected_tf_network
20 | from gps.algorithm.policy.lin_gauss_init import init_cmaes_controller
21 | from gps.proto.gps_pb2 import CUR_LOC, PAST_OBJ_VAL_DELTAS, CUR_SIGMA, CUR_PS, PAST_LOC_DELTAS,PAST_SIGMA, ACTION
22 | from gps.algorithm.cost.cost_utils import RAMP_CONSTANT
23 |
24 | try:
25 | import cPickle as pickle
26 | except:
27 | import pickle
28 | import copy
29 |
30 |
31 |
32 | session = tf.Session()
33 | history_len = 40
34 |
35 | TRAIN = True
36 |
37 | input_dim = 10
38 | num_inits_per_fcn = 1
39 | init_locs = []
40 | if TRAIN:
41 | num_fcns = 20
42 | train_fcns = range(num_fcns)
43 | test_fcns = range(num_fcns-1, num_fcns)
44 | fcn_ids = [12]
45 | fcn_names = ["BentCigar"]
46 | init_sigma_test = [1.28]
47 | #initialize the initial locations of the optimization trajectories
48 | init_locs.extend(list(np.random.randn(num_fcns-len(test_fcns), input_dim)))
49 | #initialize the initial sigma(step size) values
50 | init_sigmas = list(np.random.rand(num_fcns-len(test_fcns)))
51 | init_sigmas.extend(init_sigma_test)
52 | #append the initial locations of the conditions in the test set
53 | for i in test_fcns:
54 | init_locs.append([0]*input_dim)
55 |
56 | else:
57 | num_fcns = 1
58 | # We don't do any training so we evaluate on all the conditions in the 'training set'
59 | train_fcns = range(num_fcns)
60 | test_fcns = train_fcns
61 | fcn_ids = [12]
62 | fcn_names = ["BentCigar"]
63 | init_sigmas = [1.28]*len(test_fcns)
64 | for i in test_fcns:
65 | init_locs.append([0]*input_dim)
66 |
67 |
68 | cur_dir = os.path.dirname(os.path.abspath(__file__))
69 |
70 |
71 | fcn_objs = []
72 | fcns = []
73 | for i in range(num_fcns//len(fcn_ids)):
74 | #instantiate BBOB functions based on their ID
75 | for i in fcn_ids:
76 | fcn_objs.append(bn.instantiate(i)[0])
77 |
78 | for i,function in enumerate(fcn_objs):
79 | fcns.append({'fcn_obj': function, 'dim': input_dim, 'init_loc': list(init_locs[i]), 'init_sigma': init_sigmas[i]})
80 |
81 | SENSOR_DIMS = {
82 | PAST_OBJ_VAL_DELTAS: history_len,
83 | CUR_PS: 1,
84 | CUR_SIGMA : 1,
85 | ACTION: 1,
86 | PAST_SIGMA: history_len
87 | }
88 |
89 | BASE_DIR = '/'.join(str.split(gps_filepath, '/')[:-2])
90 | EXP_DIR = BASE_DIR + '/../examples/BentCigar' + '/'
91 |
92 |
93 | common = {
94 | 'experiment_name': 'CMA_' + \
95 | datetime.strftime(datetime.now(), '%m-%d-%y_%H-%M'),
96 | 'experiment_dir': EXP_DIR,
97 | 'data_files_dir': EXP_DIR + 'data_files/',
98 | 'plot_filename': EXP_DIR + 'plot',
99 | 'log_filename': EXP_DIR + 'log_data',
100 | 'conditions': num_fcns,
101 | 'train_conditions': train_fcns,
102 | 'test_conditions': test_fcns,
103 | 'test_functions': fcn_names
104 | }
105 |
106 | if not os.path.exists(common['data_files_dir']):
107 | os.makedirs(common['data_files_dir'])
108 |
109 | agent = {
110 | 'type': AgentCMAES,
111 | 'world' : CMAESWorld,
112 | 'init_sigma': 0.3,
113 | 'popsize': 10,
114 | 'n_min':10,
115 | 'max_nfe': 200000,
116 | 'substeps': 1,
117 | 'conditions': common['conditions'],
118 | 'dt': 0.05,
119 | 'T': 50,
120 | 'sensor_dims': SENSOR_DIMS,
121 | 'state_include': [PAST_OBJ_VAL_DELTAS, CUR_SIGMA, CUR_PS, PAST_SIGMA],
122 | 'obs_include': [PAST_OBJ_VAL_DELTAS, CUR_PS, PAST_SIGMA, CUR_SIGMA],
123 | 'history_len': history_len,
124 | 'fcns': fcns
125 | }
126 |
127 | algorithm = {
128 | 'type': Algorithm,
129 | 'conditions': common['conditions'],
130 | 'train_conditions': train_fcns,
131 | 'test_conditions': test_fcns,
132 | 'test_functions': fcn_names,
133 | 'iterations': 15,
134 | 'inner_iterations': 4,
135 | 'policy_dual_rate': 0.2,
136 | 'init_pol_wt': 0.01,
137 | 'ent_reg_schedule': 0.0,
138 | 'fixed_lg_step': 3,
139 | 'kl_step': 0.2,
140 | 'min_step_mult': 0.01,
141 | 'max_step_mult': 10.0,
142 | 'sample_decrease_var': 0.05,
143 | 'sample_increase_var': 0.1,
144 | 'policy_sample_mode': 'replace',
145 | 'exp_step_lower': 2,
146 | 'exp_step_upper': 2
147 | }
148 |
149 | algorithm['init_traj_distr'] = {
150 | 'type': init_cmaes_controller,
151 | 'init_var': 0.01,
152 | 'dt': agent['dt'],
153 | 'T': agent['T']
154 | }
155 |
156 | algorithm['cost'] = {
157 | 'type': Cost,
158 | 'ramp_option': RAMP_CONSTANT,
159 | 'wp_final_multiplier': 1.0,
160 | 'weight': 1.0,
161 | }
162 |
163 | algorithm['dynamics'] = {
164 | 'type': DynamicsLRPrior,
165 | 'regularization': 1e-3, # Increase this if Qtt is not PD during DGD
166 | 'clipping_thresh': None,
167 | 'prior': {
168 | 'type': DynamicsPriorGMM,
169 | 'max_clusters': 20,
170 | 'min_samples_per_cluster': 20,
171 | 'max_samples': 20,
172 | 'strength': 1.0 # How much weight to give to prior relative to samples
173 | }
174 | }
175 |
176 | algorithm['traj_opt'] = {
177 | 'type': TrajOpt,
178 | }
179 |
180 | algorithm['policy_opt'] = {
181 | 'type': PolicyOpt,
182 | 'network_model': fully_connected_tf_network,
183 | 'iterations': 20000,
184 | 'init_var': 0.01,
185 | 'batch_size': 25,
186 | 'solver_type': 'adam',
187 | 'lr': 0.0001,
188 | 'lr_policy': 'fixed',
189 | 'momentum': 0.9,
190 | 'weight_decay': 0.005,
191 | 'use_gpu': 0,
192 | 'weights_file_prefix': EXP_DIR + 'policy',
193 | 'network_params': {
194 | 'obs_include': agent['obs_include'],
195 | 'sensor_dims': agent['sensor_dims'],
196 | 'dim_hidden': [50, 50]
197 | }
198 | }
199 |
200 | algorithm['policy_prior'] = {
201 | 'type': PolicyPriorGMM,
202 | 'max_clusters': 20,
203 | 'min_samples_per_cluster': 20,
204 | 'max_samples': 20,
205 | 'strength': 1.0,
206 | 'clipping_thresh': None,
207 | 'init_regularization': 1e-3,
208 | 'subsequent_regularization': 1e-3
209 | }
210 |
211 | config = {
212 | 'iterations': algorithm['iterations'],
213 | 'num_samples': 25,
214 | 'common': common,
215 | 'agent': agent,
216 | 'algorithm': algorithm,
217 | 'train_conditions': train_fcns,
218 | 'test_conditions': test_fcns,
219 | 'test_functions': fcn_names,
220 | 'policy_path': EXP_DIR + 'data_files/policy_itr_14.pkl'
221 | }
222 |
223 |
--------------------------------------------------------------------------------
/plots/Plot_ObjectiveValue_AttractiveSector.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/automl/LTO-CMA/6f496999ce26a7f144b42dc98a88833bde4ce39b/plots/Plot_ObjectiveValue_AttractiveSector.pdf
--------------------------------------------------------------------------------
/plots/Plot_ObjectiveValue_BentCigar.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/automl/LTO-CMA/6f496999ce26a7f144b42dc98a88833bde4ce39b/plots/Plot_ObjectiveValue_BentCigar.pdf
--------------------------------------------------------------------------------
/plots/Plot_ObjectiveValue_BuecheRastrigin.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/automl/LTO-CMA/6f496999ce26a7f144b42dc98a88833bde4ce39b/plots/Plot_ObjectiveValue_BuecheRastrigin.pdf
--------------------------------------------------------------------------------
/plots/Plot_ObjectiveValue_CompositeGR.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/automl/LTO-CMA/6f496999ce26a7f144b42dc98a88833bde4ce39b/plots/Plot_ObjectiveValue_CompositeGR.pdf
--------------------------------------------------------------------------------
/plots/Plot_ObjectiveValue_DifferentPowers.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/automl/LTO-CMA/6f496999ce26a7f144b42dc98a88833bde4ce39b/plots/Plot_ObjectiveValue_DifferentPowers.pdf
--------------------------------------------------------------------------------
/plots/Plot_ObjectiveValue_GG101me.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/automl/LTO-CMA/6f496999ce26a7f144b42dc98a88833bde4ce39b/plots/Plot_ObjectiveValue_GG101me.pdf
--------------------------------------------------------------------------------
/plots/Plot_ObjectiveValue_GG21hi.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/automl/LTO-CMA/6f496999ce26a7f144b42dc98a88833bde4ce39b/plots/Plot_ObjectiveValue_GG21hi.pdf
--------------------------------------------------------------------------------
/plots/Plot_ObjectiveValue_LinearSlope.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/automl/LTO-CMA/6f496999ce26a7f144b42dc98a88833bde4ce39b/plots/Plot_ObjectiveValue_LinearSlope.pdf
--------------------------------------------------------------------------------
/plots/Plot_ObjectiveValue_LunacekBiRastrigin.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/automl/LTO-CMA/6f496999ce26a7f144b42dc98a88833bde4ce39b/plots/Plot_ObjectiveValue_LunacekBiRastrigin.pdf
--------------------------------------------------------------------------------
/plots/Plot_ObjectiveValue_RosenbrockRotated.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/automl/LTO-CMA/6f496999ce26a7f144b42dc98a88833bde4ce39b/plots/Plot_ObjectiveValue_RosenbrockRotated.pdf
--------------------------------------------------------------------------------
/plots/Plot_ObjectiveValue_SchaffersIllConditioned.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/automl/LTO-CMA/6f496999ce26a7f144b42dc98a88833bde4ce39b/plots/Plot_ObjectiveValue_SchaffersIllConditioned.pdf
--------------------------------------------------------------------------------
/plots/Plot_ObjectiveValue_SharpRidge.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/automl/LTO-CMA/6f496999ce26a7f144b42dc98a88833bde4ce39b/plots/Plot_ObjectiveValue_SharpRidge.pdf
--------------------------------------------------------------------------------
/plots/Plot_ObjectiveValue_StepEllipsoidal.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/automl/LTO-CMA/6f496999ce26a7f144b42dc98a88833bde4ce39b/plots/Plot_ObjectiveValue_StepEllipsoidal.pdf
--------------------------------------------------------------------------------
/plots/Plot_StepSize_AttractiveSector.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/automl/LTO-CMA/6f496999ce26a7f144b42dc98a88833bde4ce39b/plots/Plot_StepSize_AttractiveSector.pdf
--------------------------------------------------------------------------------
/plots/Plot_StepSize_BentCigar.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/automl/LTO-CMA/6f496999ce26a7f144b42dc98a88833bde4ce39b/plots/Plot_StepSize_BentCigar.pdf
--------------------------------------------------------------------------------
/plots/Plot_StepSize_BuecheRastrigin.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/automl/LTO-CMA/6f496999ce26a7f144b42dc98a88833bde4ce39b/plots/Plot_StepSize_BuecheRastrigin.pdf
--------------------------------------------------------------------------------
/plots/Plot_StepSize_CompositeGR.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/automl/LTO-CMA/6f496999ce26a7f144b42dc98a88833bde4ce39b/plots/Plot_StepSize_CompositeGR.pdf
--------------------------------------------------------------------------------
/plots/Plot_StepSize_DifferentPowers.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/automl/LTO-CMA/6f496999ce26a7f144b42dc98a88833bde4ce39b/plots/Plot_StepSize_DifferentPowers.pdf
--------------------------------------------------------------------------------
/plots/Plot_StepSize_GG101me.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/automl/LTO-CMA/6f496999ce26a7f144b42dc98a88833bde4ce39b/plots/Plot_StepSize_GG101me.pdf
--------------------------------------------------------------------------------
/plots/Plot_StepSize_GG21hi.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/automl/LTO-CMA/6f496999ce26a7f144b42dc98a88833bde4ce39b/plots/Plot_StepSize_GG21hi.pdf
--------------------------------------------------------------------------------
/plots/Plot_StepSize_LinearSlope.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/automl/LTO-CMA/6f496999ce26a7f144b42dc98a88833bde4ce39b/plots/Plot_StepSize_LinearSlope.pdf
--------------------------------------------------------------------------------
/plots/Plot_StepSize_LunacekBiRastrigin.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/automl/LTO-CMA/6f496999ce26a7f144b42dc98a88833bde4ce39b/plots/Plot_StepSize_LunacekBiRastrigin.pdf
--------------------------------------------------------------------------------
/plots/Plot_StepSize_RosenbrockRotated.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/automl/LTO-CMA/6f496999ce26a7f144b42dc98a88833bde4ce39b/plots/Plot_StepSize_RosenbrockRotated.pdf
--------------------------------------------------------------------------------
/plots/Plot_StepSize_SchaffersIllConditioned.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/automl/LTO-CMA/6f496999ce26a7f144b42dc98a88833bde4ce39b/plots/Plot_StepSize_SchaffersIllConditioned.pdf
--------------------------------------------------------------------------------
/plots/Plot_StepSize_SharpRidge.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/automl/LTO-CMA/6f496999ce26a7f144b42dc98a88833bde4ce39b/plots/Plot_StepSize_SharpRidge.pdf
--------------------------------------------------------------------------------
/plots/Plot_StepSize_StepEllipsoidal.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/automl/LTO-CMA/6f496999ce26a7f144b42dc98a88833bde4ce39b/plots/Plot_StepSize_StepEllipsoidal.pdf
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | cma==3.0.3
2 | matplotlib
3 | numpy
4 | seaborn==0.11.1
5 | tensorflow==1.15.0
6 | protobuf==3.17.1
7 |
--------------------------------------------------------------------------------
/scripts/plot_performance.py:
--------------------------------------------------------------------------------
1 | import os
2 | import json
3 | import numpy as np
4 | import seaborn as sns
5 | import matplotlib.pyplot as plt
6 | import argparse
7 | from datetime import datetime
8 | sns.set()
9 |
10 | from matplotlib import rcParams
11 |
12 | rcParams["font.size"] = "40"
13 | rcParams['text.usetex'] = False
14 | rcParams['font.family'] = 'serif'
15 | rcParams['figure.figsize'] = (16.0, 9.0)
16 | rcParams['figure.frameon'] = True
17 | rcParams['figure.edgecolor'] = 'k'
18 | rcParams['grid.color'] = 'k'
19 | rcParams['grid.linestyle'] = ':'
20 | rcParams['grid.linewidth'] = 0.5
21 | rcParams['axes.linewidth'] = 3
22 | rcParams['axes.edgecolor'] = 'k'
23 | rcParams['axes.grid.which'] = 'both'
24 | rcParams['legend.frameon'] = 'True'
25 | rcParams['legend.framealpha'] = 1
26 | rcParams['legend.fontsize'] = 30
27 |
28 | rcParams['ytick.major.size'] = 32
29 | rcParams['ytick.major.width'] = 6
30 | rcParams['ytick.minor.size'] = 6
31 | rcParams['ytick.minor.width'] = 1
32 | rcParams['xtick.major.size'] = 32
33 | rcParams['xtick.major.width'] = 6
34 | rcParams['xtick.minor.size'] = 6
35 | rcParams['xtick.minor.width'] = 1
36 | rcParams['xtick.labelsize'] = 32
37 | rcParams['ytick.labelsize'] = 32
38 |
39 | def dir_path(path):
40 | if os.path.isfile(path):
41 | return path
42 | else:
43 | raise argparse.ArgumentTypeError("readable_dir:%s is not a valid path to a file"% path)
44 |
45 |
46 | parser = argparse.ArgumentParser(description='Script to plot LTO test data.')
47 | parser.add_argument('--lto_path', type=dir_path, help="Path to the LTO data file.",
48 | default=os.path.join("..","examples","10BBOB","GallaghersGaussian21hi_LTO.json"))
49 | parser.add_argument('--csa_path', type=dir_path, help="Path to the CSA data file.",
50 | default=os.path.join("..","data","PPSN_LTO_Data","CSA_Data","CSA_Plots_10D","GallaghersGaussian21hi.json"))
51 | parser.add_argument('--function', type=str, help="Function being plotted",
52 | default="GallaghersGaussian21hi")
53 |
54 | args = parser.parse_args()
55 | lto_path = args.lto_path
56 | csa_path = args.csa_path
57 | function = args.function
58 | popsize = 10
59 |
60 | data_LTO = {}
61 | data_CSA = {}
62 | with open(lto_path) as json_file:
63 | data_LTO = json.load(json_file)
64 | with open(csa_path) as json_file:
65 | data_CSA = json.load(json_file)
66 | generations = len(data_LTO["Average costs LTO"])
67 | num_feval = generations * popsize
68 |
69 | plt.tick_params(axis='x', which='minor')
70 | plt.legend(loc=0, fontsize=25, ncol=2)
71 | plt.xlabel("Num FEval", fontsize=50)
72 | plt.ylabel("Step Size", fontsize=50)
73 | plt.xticks(np.arange(start=1, stop=generations, step=generations//5),
74 | [str(10)] + [str(gen * 10) for gen in np.arange(start=10, stop=generations, step=generations//5)])
75 | plt.xticks()
76 | plt.title(function)
77 | plt.fill_between(list(np.arange(1, len(data_LTO["Sigma LTO"]) + 1)),
78 | np.subtract(data_LTO["Sigma LTO"], data_LTO["Std Sigma LTO"]),
79 | np.add(data_LTO["Sigma LTO"], data_LTO["Std Sigma LTO"]),
80 | color=sns.xkcd_rgb["magenta"], alpha=0.1)
81 | plt.plot(list(np.arange(1, len(data_LTO["Sigma LTO"]) + 1)), data_LTO["Sigma LTO"], linewidth=4,
82 | label="LTO", color=sns.xkcd_rgb["magenta"])
83 | plt.fill_between(list(np.arange(1, len(data_CSA["Sigma CSA"]) + 1)),
84 | np.subtract(data_CSA["Sigma CSA"], data_CSA["Std Sigma CSA"]),
85 | np.add(data_CSA["Sigma CSA"], data_CSA["Std Sigma CSA"]),
86 | color=sns.xkcd_rgb["green"], alpha=0.1)
87 | plt.plot(list(np.arange(1, len(data_CSA["Sigma CSA"]) + 1)), data_CSA["Sigma CSA"], linewidth=4,
88 | label="CSA", color=sns.xkcd_rgb["green"])
89 |
90 | plt.legend()
91 | type = "StepSize"
92 | output_path = os.path.join("..","plots")
93 | os.makedirs(output_path, exist_ok=True)
94 | timestamp = datetime.now()
95 | time = str(timestamp)
96 | plot_file = ('Plot_%s_%s_%s.pdf' % (type, function, time))
97 | plt.savefig(os.path.join(output_path, plot_file), bbox_inches='tight')
98 | plt.clf()
99 |
100 |
101 | plt.tick_params(axis='x', which='minor')
102 | plt.legend(loc=0, fontsize=25, ncol=2)
103 | plt.xlabel("Num FEval", fontsize=50)
104 | plt.ylabel("Objective Value", fontsize=50)
105 | plt.xscale("log")
106 | plt.title(function)
107 | plt.xticks(np.arange(start=1, stop=generations, step=generations//5),
108 | [str(10)] + [str(gen * 10) for gen in np.arange(start=10, stop=generations, step=generations//5)])
109 |
110 | plt.fill_between(list(np.arange(1, len(data_LTO["Average costs LTO"]) + 1)),
111 | np.subtract(data_LTO["Average costs LTO"], data_LTO["Std costs LTO"]),
112 | np.add(data_LTO["Average costs LTO"], data_LTO["Std costs LTO"]), alpha=0.1,
113 | color=sns.xkcd_rgb["magenta"])
114 | plt.plot(list(np.arange(1, len(data_LTO["Average costs LTO"]) + 1)), data_LTO["Average costs LTO"],
115 | linewidth=4, label="LTO", color=sns.xkcd_rgb["magenta"])
116 |
117 | plt.fill_between(list(np.arange(1, len(data_CSA["Average costs CSA"]) + 1)),
118 | np.subtract(data_CSA["Average costs CSA"], data_CSA["Std costs CSA"]),
119 | np.add(data_CSA["Average costs CSA"], data_CSA["Std costs CSA"]), alpha=0.1,
120 | color=sns.xkcd_rgb["green"])
121 | plt.plot(list(np.arange(1, len(data_CSA["Average costs CSA"]) + 1)), data_CSA["Average costs CSA"],
122 | linewidth=4, label="CSA", color=sns.xkcd_rgb["green"])
123 |
124 | plt.legend()
125 | type = "ObjectiveValue"
126 | timestamp = datetime.now()
127 | time = str(timestamp)
128 | plot_file = ('Plot_%s_%s_%s.pdf' % (type, function, time))
129 | plt.savefig(os.path.join(output_path, plot_file), bbox_inches='tight')
130 | plt.clf()
131 |
--------------------------------------------------------------------------------
/source/gps/README.md:
--------------------------------------------------------------------------------
1 | We built upon the GPS version as given by Li and Malik [[1]](#1)
2 | The original GPS code of Li and Malik can be found at [https://www.math.ias.edu/~ke.li/downloads/lto_code.tar.gz](https://www.math.ias.edu/~ke.li/downloads/lto_code.tar.gz)
3 |
4 | In a nutshell, we modified the GPS code to be able to continuously sample from the starting teacher. To this end we introduce a sampling rate that determines how often we use new samples generated from the starting policy.
5 |
6 | [1]
7 | Li, K., Malik, J.: Learning to optimize. In: Proceedings of the International
8 | Conference on Learning Representations (ICLR’17) (2017), published on-
9 | line: [iclr.cc](iclr.cc)
10 |
11 | ### Contents
12 | ```bash
13 | |-gps
14 | | |-agent # code for the LTO agent
15 | | | |-lto # code for the CMAES world and agent
16 | | |-sample # handling the trajectory samples
17 | | |-utility # utilities, including the handling logging and output
18 | | |-proto # the protocol buffers
19 | | |-algorithm
20 | | | |-cost # code for computing the cost of trajectories
21 | | | |-traj_opt # code for trajectory optimization
22 | | | |-policy # policies that are used to obtain samples (CSA, Linear Gaussian and NN)
23 | | | |-policy_opt # code for policy optimization
24 | | | |-dynamics # code for handling the dynamics
25 | ```
26 | This *gps* directory contains the code to run LTO-CMA. The above file tree contains a tree of the directories it consists of, and short descriptions of the code they contain. The code in the directory is under a *GNU GENERAL PUBLIC LICENSE v3*, except the specific files mentioned in the Modifications section, which are under an *APACHE v2* license.
27 |
28 | #### Modifications to GPS code
29 | In order to implement our approach, we have made modifications to the GPS code provided by Li and Malik. Below is the file tree depicting the list of files that have been either added or modified. Newly created files fall under Apache 2.0 whereas modified files keep their GPLv3 license
30 | ```bash
31 | |-gps
32 | | |-agent
33 | | | |-lto
34 | | | | |-agent_cmaes.py (under Apache 2.0 license)
35 | | | | |-cmaes_world.py (under Apache 2.0 license)
36 | | |-sample
37 | | | |-sample.py
38 | | |-utility
39 | | | |-display.py
40 | | |-proto
41 | | | |-gps.pb2.py
42 | | |-algorithm
43 | | | |-policy
44 | | | | |- lin_gauss_init.py
45 | | | | |- lin_gauss_policy.py
46 | | | | |- csa_policy.py (under Apache 2.0 license)
47 | | | |-policy_opt
48 | | | | |-lto_model.py
49 | ```
50 |
51 |
--------------------------------------------------------------------------------
/source/gps/__init__.py:
--------------------------------------------------------------------------------
1 | """ This Python module houses the guided policy search codebase. """
2 |
--------------------------------------------------------------------------------
/source/gps/agent/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/automl/LTO-CMA/6f496999ce26a7f144b42dc98a88833bde4ce39b/source/gps/agent/__init__.py
--------------------------------------------------------------------------------
/source/gps/agent/agent.py:
--------------------------------------------------------------------------------
1 | """ This file defines the base agent class. """
2 | import abc
3 | import copy
4 | import numpy as np
5 |
6 | from gps.agent.config import AGENT
7 | from gps.proto.gps_pb2 import ACTION
8 | from gps.sample.sample_list import SampleList
9 |
10 |
11 | class Agent(object):
12 | """
13 | Agent superclass. The agent interacts with the environment to
14 | collect samples.
15 | """
16 | __metaclass__ = abc.ABCMeta
17 |
18 | def __init__(self, hyperparams):
19 | config = copy.deepcopy(AGENT)
20 | config.update(hyperparams)
21 | self._hyperparams = config
22 |
23 | # Store samples, along with size/index information for samples.
24 | self._samples = [[] for _ in range(self._hyperparams['conditions'])]
25 | self.T = self._hyperparams['T']
26 | self.dU = self._hyperparams['sensor_dims'][ACTION]
27 |
28 | self.x_data_types = self._hyperparams['state_include']
29 | self.obs_data_types = self._hyperparams['obs_include']
30 | if 'meta_include' in self._hyperparams:
31 | self.meta_data_types = self._hyperparams['meta_include']
32 | else:
33 | self.meta_data_types = []
34 |
35 | # List of indices for each data type in state X.
36 | self._state_idx, i = [], 0
37 | for sensor in self.x_data_types:
38 | dim = self._hyperparams['sensor_dims'][sensor]
39 | self._state_idx.append(list(range(i, i+dim)))
40 | i += dim
41 | self.dX = i
42 |
43 | # List of indices for each data type in observation.
44 | self._obs_idx, i = [], 0
45 | for sensor in self.obs_data_types:
46 | dim = self._hyperparams['sensor_dims'][sensor]
47 | self._obs_idx.append(list(range(i, i+dim)))
48 | i += dim
49 | self.dO = i
50 |
51 | # List of indices for each data type in meta data.
52 | self._meta_idx, i = [], 0
53 | for sensor in self.meta_data_types:
54 | dim = self._hyperparams['sensor_dims'][sensor]
55 | self._meta_idx.append(list(range(i, i+dim)))
56 | i += dim
57 | self.dM = i
58 |
59 | self._x_data_idx = {d: i for d, i in zip(self.x_data_types,
60 | self._state_idx)}
61 | self._obs_data_idx = {d: i for d, i in zip(self.obs_data_types,
62 | self._obs_idx)}
63 | self._meta_data_idx = {d: i for d, i in zip(self.meta_data_types,
64 | self._meta_idx)}
65 |
66 | @abc.abstractmethod
67 | def sample(self, policy, condition, verbose=True, save=True, noisy=True):
68 | """
69 | Draw a sample from the environment, using the specified policy
70 | and under the specified condition, with or without noise.
71 | """
72 | raise NotImplementedError("Must be implemented in subclass.")
73 |
74 | def reset(self, condition):
75 | """ Reset environment to the specified condition. """
76 | pass # May be overridden in subclass.
77 |
78 | def get_samples(self, condition, start=0, end=None):
79 | """
80 | Return the requested samples based on the start and end indices.
81 | Args:
82 | start: Starting index of samples to return.
83 | end: End index of samples to return.
84 | """
85 | return (SampleList(self._samples[condition][start:]) if end is None
86 | else SampleList(self._samples[condition][start:end]))
87 |
88 | def clear_samples(self, condition=None):
89 | """
90 | Reset the samples for a given condition, defaulting to all conditions.
91 | Args:
92 | condition: Condition for which to reset samples.
93 | """
94 | if condition is None:
95 | self._samples = [[] for _ in range(self._hyperparams['conditions'])]
96 | else:
97 | self._samples[condition] = []
98 |
99 | def delete_last_sample(self, condition):
100 | """ Delete the last sample from the specified condition. """
101 | self._samples[condition].pop()
102 |
103 | def get_idx_x(self, sensor_name):
104 | """
105 | Return the indices corresponding to a certain state sensor name.
106 | Args:
107 | sensor_name: The name of the sensor.
108 | """
109 | return self._x_data_idx[sensor_name]
110 |
111 | def get_idx_obs(self, sensor_name):
112 | """
113 | Return the indices corresponding to a certain observation sensor name.
114 | Args:
115 | sensor_name: The name of the sensor.
116 | """
117 | return self._obs_data_idx[sensor_name]
118 |
119 | def pack_data_obs(self, existing_mat, data_to_insert, data_types,
120 | axes=None):
121 | """
122 | Update the observation matrix with new data.
123 | Args:
124 | existing_mat: Current observation matrix.
125 | data_to_insert: New data to insert into the existing matrix.
126 | data_types: Name of the sensors to insert data for.
127 | axes: Which axes to insert data. Defaults to the last axes.
128 | """
129 | num_sensor = len(data_types)
130 | if axes is None:
131 | # If axes not specified, assume indexing on last dimensions.
132 | axes = list(range(-1, -num_sensor - 1, -1))
133 | else:
134 | # Make sure number of sensors and axes are consistent.
135 | if num_sensor != len(axes):
136 | raise ValueError(
137 | 'Length of sensors (%d) must equal length of axes (%d)',
138 | num_sensor, len(axes)
139 | )
140 |
141 | # Shape checks.
142 | insert_shape = list(existing_mat.shape)
143 | for i in range(num_sensor):
144 | # Make sure to slice along X.
145 | if existing_mat.shape[axes[i]] != self.dO:
146 | raise ValueError('Axes must be along an dX=%d dimensional axis',
147 | self.dO)
148 | insert_shape[axes[i]] = len(self._obs_data_idx[data_types[i]])
149 | #if tuple(insert_shape) != data_to_insert.shape:
150 | #if data_to_insert.shape[0]
151 | #raise ValueError('Data has shape %s. Expected %s',
152 | #data_to_insert.shape, tuple(insert_shape))
153 |
154 | # Actually perform the slice.
155 | index = [slice(None) for _ in range(len(existing_mat.shape))]
156 | for i in range(num_sensor):
157 | index[axes[i]] = slice(self._obs_data_idx[data_types[i]][0],
158 | self._obs_data_idx[data_types[i]][-1] + 1)
159 | existing_mat[index] = data_to_insert
160 |
161 | def pack_data_meta(self, existing_mat, data_to_insert, data_types,
162 | axes=None):
163 | """
164 | Update the meta data matrix with new data.
165 | Args:
166 | existing_mat: Current meta data matrix.
167 | data_to_insert: New data to insert into the existing matrix.
168 | data_types: Name of the sensors to insert data for.
169 | axes: Which axes to insert data. Defaults to the last axes.
170 | """
171 | num_sensor = len(data_types)
172 | if axes is None:
173 | # If axes not specified, assume indexing on last dimensions.
174 | axes = list(range(-1, -num_sensor - 1, -1))
175 | else:
176 | # Make sure number of sensors and axes are consistent.
177 | if num_sensor != len(axes):
178 | raise ValueError(
179 | 'Length of sensors (%d) must equal length of axes (%d)',
180 | num_sensor, len(axes)
181 | )
182 |
183 | # Shape checks.
184 | insert_shape = list(existing_mat.shape)
185 | for i in range(num_sensor):
186 | # Make sure to slice along X.
187 | if existing_mat.shape[axes[i]] != self.dM:
188 | raise ValueError('Axes must be along an dX=%d dimensional axis',
189 | self.dM)
190 | insert_shape[axes[i]] = len(self._meta_data_idx[data_types[i]])
191 | if tuple(insert_shape) != data_to_insert.shape:
192 | raise ValueError('Data has shape %s. Expected %s',
193 | data_to_insert.shape, tuple(insert_shape))
194 |
195 | # Actually perform the slice.
196 | index = [slice(None) for _ in range(len(existing_mat.shape))]
197 | for i in range(num_sensor):
198 | index[axes[i]] = slice(self._meta_data_idx[data_types[i]][0],
199 | self._meta_data_idx[data_types[i]][-1] + 1)
200 | existing_mat[index] = data_to_insert
201 |
202 | def pack_data_x(self, existing_mat, data_to_insert, data_types, axes=None):
203 | """
204 | Update the state matrix with new data.
205 | Args:
206 | existing_mat: Current state matrix.
207 | data_to_insert: New data to insert into the existing matrix.
208 | data_types: Name of the sensors to insert data for.
209 | axes: Which axes to insert data. Defaults to the last axes.
210 | """
211 | num_sensor = len(data_types)
212 | if axes is None:
213 | # If axes not specified, assume indexing on last dimensions.
214 | axes = list(range(-1, -num_sensor - 1, -1))
215 | else:
216 | # Make sure number of sensors and axes are consistent.
217 | if num_sensor != len(axes):
218 | raise ValueError(
219 | 'Length of sensors (%d) must equal length of axes (%d)',
220 | num_sensor, len(axes)
221 | )
222 |
223 | # Shape checks.
224 | insert_shape = list(existing_mat.shape)
225 | for i in range(num_sensor):
226 | # Make sure to slice along X.
227 | if existing_mat.shape[axes[i]] != self.dX:
228 | raise ValueError('Axes must be along an dX=%d dimensional axis',
229 | self.dX)
230 | insert_shape[axes[i]] = len(self._x_data_idx[data_types[i]])
231 | if isinstance(data_to_insert,(list)):
232 | data_to_insert = np.array(data_to_insert).reshape(tuple(insert_shape))
233 | if tuple(insert_shape) != data_to_insert.shape:
234 | raise ValueError('Data has shape %s. Expected %s',
235 | data_to_insert.shape, tuple(insert_shape))
236 |
237 | # Actually perform the slice.
238 | index = [slice(None) for _ in range(len(existing_mat.shape))]
239 | for i in range(num_sensor):
240 | index[axes[i]] = slice(self._x_data_idx[data_types[i]][0],
241 | self._x_data_idx[data_types[i]][-1] + 1)
242 | existing_mat[index] = data_to_insert
243 |
244 | def unpack_data_x(self, existing_mat, data_types, axes=None):
245 | """
246 | Returns the requested data from the state matrix.
247 | Args:
248 | existing_mat: State matrix to unpack from.
249 | data_types: Names of the sensor to unpack.
250 | axes: Which axes to unpack along. Defaults to the last axes.
251 | """
252 | num_sensor = len(data_types)
253 | if axes is None:
254 | # If axes not specified, assume indexing on last dimensions.
255 | axes = list(range(-1, -num_sensor - 1, -1))
256 | else:
257 | # Make sure number of sensors and axes are consistent.
258 | if num_sensor != len(axes):
259 | raise ValueError(
260 | 'Length of sensors (%d) must equal length of axes (%d)',
261 | num_sensor, len(axes)
262 | )
263 |
264 | # Shape checks.
265 | for i in range(num_sensor):
266 | # Make sure to slice along X.
267 | if existing_mat.shape[axes[i]] != self.dX:
268 | raise ValueError('Axes must be along an dX=%d dimensional axis',
269 | self.dX)
270 |
271 | # Actually perform the slice.
272 | index = [slice(None) for _ in range(len(existing_mat.shape))]
273 | for i in range(num_sensor):
274 | index[axes[i]] = slice(self._x_data_idx[data_types[i]][0],
275 | self._x_data_idx[data_types[i]][-1] + 1)
276 | return existing_mat[index]
277 |
278 | # state is a dictionary
279 | def get_vectorized_state(self, state, condition = None):
280 | state_vector = np.empty((self.dX,))
281 | state_vector.fill(np.nan)
282 | for data_type in self.x_data_types:
283 | self.pack_data_x(state_vector, state[data_type], data_types=[data_type])
284 | assert(not np.any(np.isnan(state_vector)))
285 | return state_vector
286 |
287 |
--------------------------------------------------------------------------------
/source/gps/agent/config.py:
--------------------------------------------------------------------------------
1 | """ Default configuration and hyperparameters for agent objects. """
2 | import numpy as np
3 |
4 | # Agent
5 | AGENT = {
6 | 'substeps': 1,
7 | }
8 |
--------------------------------------------------------------------------------
/source/gps/agent/lto/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/automl/LTO-CMA/6f496999ce26a7f144b42dc98a88833bde4ce39b/source/gps/agent/lto/__init__.py
--------------------------------------------------------------------------------
/source/gps/agent/lto/agent_cmaes.py:
--------------------------------------------------------------------------------
1 | from copy import deepcopy
2 | import numpy as np
3 | from gps.agent.agent import Agent
4 | from gps.proto.gps_pb2 import ACTION
5 | from gps.sample.sample import Sample
6 | from gps.agent.lto.cmaes_world import CMAESWorld
7 |
8 | class AgentCMAES(Agent):
9 |
10 | def __init__(self, hyperparams):
11 | Agent.__init__(self, hyperparams)
12 |
13 | self._setup_conditions()
14 | self._setup_worlds()
15 |
16 | def _setup_conditions(self):
17 | self.conds = self._hyperparams['conditions']
18 | self.fcns = self._hyperparams['fcns']
19 | self.history_len = self._hyperparams['history_len']
20 | self.init_sigma = self._hyperparams['init_sigma']
21 | self.popsize = self._hyperparams['popsize']
22 |
23 | def _setup_worlds(self):
24 | fcn = []
25 | hpolib = False
26 | for i in range(self.conds):
27 | if 'fcn_obj' in self.fcns[i]:
28 | fcn.append(self.fcns[i]['fcn_obj'])
29 | else:
30 | fcn.append(None)
31 | if 'hpolib' in self.fcns[i]:
32 | hpolib = True
33 | benchmark = None
34 | if 'benchmark' in self.fcns[0]:
35 | benchmark = self.fcns[0]['benchmark']
36 | self._worlds = [CMAESWorld(self.fcns[i]['dim'], self.fcns[i]['init_loc'], self.fcns[i]['init_sigma'], self.popsize, self.history_len, fcn=fcn[i], hpolib=hpolib, benchmark=benchmark) for i in range(self.conds)]
37 | self.x0 = []
38 |
39 | for i in range(self.conds):
40 | self._worlds[i].reset_world()
41 | self._worlds[i].run() # Get noiseless initial state
42 | x0 = self.get_vectorized_state(self._worlds[i].get_state())
43 | self.x0.append(x0)
44 |
45 |
46 | def sample(self, policy, condition, start_policy=None, verbose=False, save=True, noisy=True, ltorun=False, guided_steps=0, t_length=None):
47 | """
48 | Runs a trial and constructs a new sample containing information
49 | about the trial.
50 |
51 | Args:
52 | policy: Policy to to used in the trial.
53 | condition (int): Which condition setup to run.
54 | verbose (boolean): Whether or not to plot the trial (not used here).
55 | save (boolean): Whether or not to store the trial into the samples.
56 | noisy (boolean): Whether or not to use noise during sampling.
57 | """
58 | if t_length == None:
59 | t_length = self.T
60 | self._worlds[condition].reset_world()
61 | self._worlds[condition].run(ltorun=ltorun)
62 | state = self._worlds[condition].get_state()
63 | new_sample = self._init_sample(self._worlds[condition].get_state())
64 | #self._set_sample(new_sample, self._worlds[condition].get_state(), t=0)
65 | new_sample.trajectory.append(self._worlds[condition].fbest)
66 | U = np.zeros([t_length, self.dU])
67 | if noisy:
68 | noise = np.random.randn(t_length, self.dU)
69 | else:
70 | noise = np.zeros((t_length, self.dU))
71 | policy.reset() # To support non-Markovian policies
72 | for t in range(t_length):
73 | es = self._worlds[condition].es
74 | f_vals = self._worlds[condition].func_values
75 | obs_t = new_sample.get_obs(t=t)
76 | X_t = self.get_vectorized_state(self._worlds[condition].get_state(), condition)
77 | if np.any(np.isnan(X_t)):
78 | print("X_t: %s" % X_t)
79 | if ltorun and t < guided_steps * t_length and start_policy != None:
80 | U[t,:] = start_policy.act(es, f_vals, obs_t, t, noise[t,:])
81 | else:
82 | U[t, :] = policy.act(X_t, obs_t, t, noise[t, :],es, f_vals)
83 | if (t+1) < t_length:
84 | next_action = U[t, :] #* es.sigma
85 | self._worlds[condition].run_next(next_action)
86 | self._set_sample(new_sample, self._worlds[condition].get_state(), t)
87 | new_sample.trajectory.append(self._worlds[condition].fbest)
88 | new_sample.set(ACTION, U)
89 | policy.finalize()
90 | if save:
91 | self._samples[condition].append(new_sample)
92 | return new_sample
93 |
94 | def _init_sample(self, init_X):
95 | """
96 | Construct a new sample and fill in the first time step.
97 | """
98 | sample = Sample(self)
99 | self._set_sample(sample, init_X, -1)
100 | return sample
101 |
102 | def _set_sample(self, sample, X, t):
103 | for sensor in X.keys():
104 | sample.set(sensor, np.array(X[sensor]), t=t+1)
105 |
--------------------------------------------------------------------------------
/source/gps/agent/lto/cmaes_world.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | from collections import deque
3 | from cma.evolution_strategy import CMAEvolutionStrategy, CMAOptions
4 | from gps.proto.gps_pb2 import CUR_LOC, PAST_OBJ_VAL_DELTAS, CUR_PS, CUR_SIGMA, PAST_LOC_DELTAS, PAST_SIGMA
5 | import threading
6 | import concurrent.futures
7 |
8 | def _norm(x): return np.sqrt(np.sum(np.square(x)))
9 | class CMAESWorld(object):
10 | def __init__(self, dim, init_loc, init_sigma, init_popsize, history_len, fcn=None, hpolib=False, benchmark=None):
11 | if fcn is not None:
12 | self.fcn = fcn
13 | else:
14 | self.fcn = None
15 | self.hpolib = hpolib
16 | self.benchmark = benchmark
17 | self.b = None
18 | self.bounds = [None, None]
19 | self.dim = dim
20 | self.init_loc = init_loc
21 | self.init_sigma = init_sigma
22 | self.init_popsize = init_popsize
23 | self.fbest = None
24 | self.history_len = history_len
25 | self.past_locs = deque(maxlen=history_len)
26 | self.past_obj_vals = deque(maxlen=history_len)
27 | self.past_sigma = deque(maxlen=history_len)
28 | self.solutions = None
29 | self.func_values = []
30 | self.f_vals = deque(maxlen=self.init_popsize)
31 | self.lock = threading.Lock()
32 | self.chi_N = dim**0.5 * (1 - 1. / (4.*dim) + 1. / (21.*dim**2))
33 |
34 |
35 | def run(self, batch_size="all", ltorun=False):
36 | """Initiates the first time step"""
37 | #self.fcn.new_sample(batch_size=batch_size)
38 | self.cur_loc = self.init_loc
39 | self.cur_sigma = self.init_sigma
40 | self.cur_ps = 0
41 | self.es = CMAEvolutionStrategy(self.cur_loc, self.init_sigma, {'popsize': self.init_popsize, 'bounds': self.bounds})
42 | self.solutions, self.func_values = self.es.ask_and_eval(self.fcn)
43 | self.fbest = self.func_values[np.argmin(self.func_values)]
44 | self.cur_obj_val = self.fbest
45 | self.f_difference = np.abs(np.amax(self.func_values) - self.cur_obj_val)/float(self.cur_obj_val)
46 | self.velocity = np.abs(np.amin(self.func_values) - self.cur_obj_val)/float(self.cur_obj_val)
47 | self.es.mean_old = self.es.mean
48 | self.past_locs.append([self.f_difference, self.velocity])
49 |
50 | # action is of shape (dU,)
51 | def run_next(self, action):
52 | self.past_locs.append([self.f_difference, self.velocity])
53 | if not self.es.stop():
54 | """Moves forward in time one step"""
55 | sigma = action
56 | self.es.tell(self.solutions, self.func_values)
57 | self.es.sigma = min(max(sigma, 0.05), 10)
58 | self.solutions, self.func_values = self.es.ask_and_eval(self.fcn)
59 |
60 | self.f_difference = np.nan_to_num(np.abs(np.amax(self.func_values) - self.cur_obj_val)/float(self.cur_obj_val))
61 | self.velocity = np.nan_to_num(np.abs(np.amin(self.func_values) - self.cur_obj_val)/float(self.cur_obj_val))
62 | self.fbest = min(self.es.best.f, np.amin(self.func_values))
63 |
64 | self.past_obj_vals.append(self.cur_obj_val)
65 | self.past_sigma.append(self.cur_sigma)
66 | self.cur_ps = _norm(self.es.adapt_sigma.ps) / self.chi_N - 1
67 | self.cur_loc = self.es.best.x
68 | self.cur_sigma = self.es.sigma
69 | self.cur_obj_val = self.es.best.f
70 |
71 | def reset_world(self):
72 | self.past_locs.clear()
73 | self.past_obj_vals.clear()
74 | self.past_sigma.clear()
75 | self.cur_loc = self.init_loc
76 | self.cur_sigma = self.init_sigma
77 | self.cur_ps = 0
78 | self.func_values = []
79 |
80 |
81 | def get_state(self):
82 | past_obj_val_deltas = []
83 | for i in range(1,len(self.past_obj_vals)):
84 | past_obj_val_deltas.append((self.past_obj_vals[i] - self.past_obj_vals[i-1]+1e-3) / float(self.past_obj_vals[i-1]))
85 | if len(self.past_obj_vals) > 0:
86 | past_obj_val_deltas.append((self.cur_obj_val - self.past_obj_vals[-1]+1e-3)/ float(self.past_obj_vals[-1]))
87 | past_obj_val_deltas = np.array(past_obj_val_deltas).reshape(-1)
88 |
89 | past_loc_deltas = []
90 | for i in range(len(self.past_locs)):
91 | past_loc_deltas.append(self.past_locs[i])
92 | past_loc_deltas = np.array(past_loc_deltas).reshape(-1)
93 | past_sigma_deltas = []
94 | for i in range(len(self.past_sigma)):
95 | past_sigma_deltas.append(self.past_sigma[i])
96 | past_sigma_deltas = np.array(past_sigma_deltas).reshape(-1)
97 | past_obj_val_deltas = np.hstack((np.zeros((self.history_len-past_obj_val_deltas.shape[0],)), past_obj_val_deltas))
98 | past_loc_deltas = np.hstack((np.zeros((self.history_len*2-past_loc_deltas.shape[0],)), past_loc_deltas))
99 | past_sigma_deltas = np.hstack((np.zeros((self.history_len-past_sigma_deltas.shape[0],)), past_sigma_deltas))
100 |
101 | cur_loc = self.cur_loc
102 | cur_ps = self.cur_ps
103 | cur_sigma = self.cur_sigma
104 |
105 | state = {CUR_LOC: cur_loc,
106 | PAST_OBJ_VAL_DELTAS: past_obj_val_deltas,
107 | CUR_PS: cur_ps,
108 | CUR_SIGMA: cur_sigma,
109 | PAST_LOC_DELTAS: past_loc_deltas,
110 | PAST_SIGMA: past_sigma_deltas
111 | }
112 | return state
113 |
114 |
--------------------------------------------------------------------------------
/source/gps/algorithm/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/automl/LTO-CMA/6f496999ce26a7f144b42dc98a88833bde4ce39b/source/gps/algorithm/__init__.py
--------------------------------------------------------------------------------
/source/gps/algorithm/algorithm_utils.py:
--------------------------------------------------------------------------------
1 | """ This file defines utility classes and functions for algorithms. """
2 | import numpy as np
3 |
4 | from gps.utility.general_utils import BundleType
5 | from gps.algorithm.policy.lin_gauss_policy import LinearGaussianPolicy
6 |
7 |
8 | class IterationData(BundleType):
9 | """ Collection of iteration variables. """
10 | def __init__(self):
11 | variables = {
12 | 'sample_list': None, # List of samples for the current iteration.
13 | 'traj_info': None, # Current TrajectoryInfo object.
14 | 'pol_info': None, # Current PolicyInfo object.
15 | 'traj_distr': None, # Initial trajectory distribution.
16 | 'new_traj_distr': None, # Updated trajectory distribution.
17 | 'cs': None, # Sample costs of the current iteration.
18 | 'step_mult': 1.0, # KL step multiplier for the current iteration.
19 | 'eta': 1.0, # Dual variable used in LQR backward pass.
20 | }
21 | BundleType.__init__(self, variables)
22 |
23 |
24 | class TrajectoryInfo(BundleType):
25 | """ Collection of trajectory-related variables. """
26 | def __init__(self):
27 | variables = {
28 | 'dynamics': None, # Dynamics object for the current iteration.
29 | 'x0mu': None, # Mean for the initial state, used by the dynamics.
30 | 'x0sigma': None, # Covariance for the initial state distribution.
31 | 'cc': None, # Cost estimate constant term.
32 | 'cv': None, # Cost estimate vector term.
33 | 'Cm': None, # Cost estimate matrix term.
34 | 'last_kl_step': float('inf'), # KL step of the previous iteration.
35 | }
36 | BundleType.__init__(self, variables)
37 |
38 |
39 | class PolicyInfo(BundleType):
40 | """ Collection of policy-related variables. """
41 | def __init__(self, hyperparams):
42 | T, dU, dX = hyperparams['T'], hyperparams['dU'], hyperparams['dX']
43 | variables = {
44 | 'lambda_k': np.zeros((T, dU)), # Dual variables.
45 | 'lambda_K': np.zeros((T, dU, dX)), # Dual variables.
46 | 'pol_wt': hyperparams['init_pol_wt'] * np.ones(T), # Policy weight.
47 | #'pol_mu': None, # Mean of the current policy output.
48 | #'pol_sig': None, # Covariance of the current policy output.
49 | 'pol_K': np.zeros((T, dU, dX)), # Policy linearization.
50 | 'pol_k': np.zeros((T, dU)), # Policy linearization.
51 | 'pol_S': np.zeros((T, dU, dU)), # Policy linearization covariance.
52 | 'chol_pol_S': np.zeros((T, dU, dU)), # Cholesky decomp of covar.
53 | 'prev_kl': None, # Previous KL divergence.
54 | 'init_kl': None, # The initial KL divergence, before the iteration.
55 | 'policy_samples': [], # List of current policy samples.
56 | 'policy_prior': None, # Current prior for policy linearization.
57 | }
58 | BundleType.__init__(self, variables)
59 |
60 | def traj_distr(self):
61 | """ Create a trajectory distribution object from policy info. """
62 | T, dU, dX = self.pol_K.shape
63 | # Compute inverse policy covariances.
64 | inv_pol_S = np.empty_like(self.chol_pol_S)
65 | for t in range(T):
66 | inv_pol_S[t, :, :] = np.linalg.solve(
67 | self.chol_pol_S[t, :, :],
68 | np.linalg.solve(self.chol_pol_S[t, :, :].T, np.eye(dU))
69 | )
70 | return LinearGaussianPolicy(self.pol_K, self.pol_k, self.pol_S,
71 | self.chol_pol_S, inv_pol_S)
72 |
73 |
74 | def estimate_moments(X, mu, covar):
75 | """ Estimate the moments for a given linearized policy. """
76 | N, T, dX = X.shape
77 | dU = mu.shape[-1]
78 | if len(covar.shape) == 3:
79 | covar = np.tile(covar, [N, 1, 1, 1])
80 | Xmu = np.concatenate([X, mu], axis=2)
81 | ev = np.mean(Xmu, axis=0)
82 | em = np.zeros((N, T, dX+dU, dX+dU))
83 | pad1 = np.zeros((dX, dX+dU))
84 | pad2 = np.zeros((dU, dX))
85 | for n in range(N):
86 | for t in range(T):
87 | covar_pad = np.vstack([pad1, np.hstack([pad2, covar[n, t, :, :]])])
88 | em[n, t, :, :] = np.outer(Xmu[n, t, :], Xmu[n, t, :]) + covar_pad
89 | return ev, em
90 |
91 |
92 | def gauss_fit_joint_prior(pts, mu0, Phi, m, n0, dwts, dX, dU, sig_reg, clipping_thresh = None):
93 | """ Perform Gaussian fit to data with a prior. """
94 | # Build weights matrix.
95 | #D = np.diag(dwts)
96 | # Compute empirical mean and covariance.
97 | mun = np.sum((pts.T * dwts).T, axis=0)
98 | diff = pts - mun
99 | #empsig = diff.T.dot(D).dot(diff)
100 | empsig = (diff.T * dwts).dot(diff)
101 | empsig = 0.5 * (empsig + empsig.T)
102 | # MAP estimate of joint distribution.
103 | N = dwts.shape[0]
104 | mu = mun
105 | sigma = (N * empsig + Phi + (N * m) / (N + m) *
106 | np.outer(mun - mu0, mun - mu0)) / (N + n0)
107 | sigma = 0.5 * (sigma + sigma.T)
108 | # Add sigma regularization.
109 | sigma += sig_reg
110 | # Conditioning to get dynamics.
111 | fd = np.linalg.solve(sigma[:dX, :dX], sigma[:dX, dX:dX+dU]).T
112 | ori_fd = fd
113 | if clipping_thresh is not None:
114 | fd = np.maximum(np.minimum(fd, clipping_thresh), -clipping_thresh)
115 | fc = mu[dX:dX+dU] - fd.dot(mu[:dX])
116 | #dynsig = sigma[dX:dX+dU, dX:dX+dU] - ori_fd.dot(sigma[:dX, :dX]).dot(ori_fd.T)
117 | dynsig = sigma[dX:dX+dU, dX:dX+dU] - ori_fd.dot(sigma[:dX, dX:dX+dU]) # Mathematically equivalent to the above
118 | dynsig = 0.5 * (dynsig + dynsig.T)
119 | return fd, fc, dynsig
120 |
--------------------------------------------------------------------------------
/source/gps/algorithm/config.py:
--------------------------------------------------------------------------------
1 | """ Default configuration and hyperparameter values for algorithms. """
2 |
3 | # Algorithm
4 | ALG = {
5 | 'inner_iterations': 4,
6 | 'min_eta': 1e-5, # Minimum initial lagrange multiplier in DGD for
7 | # trajectory optimization.
8 | 'kl_step':0.2,
9 | 'min_step_mult':0.01,
10 | 'max_step_mult':10.0,
11 | # Trajectory settings.
12 | 'initial_state_var':1e-6,
13 | 'init_traj_distr': None, # A function that takes in two arguments, agent and cond, and returns a policy
14 | # Trajectory optimization.
15 | 'traj_opt': None,
16 | # Dynamics hyperaparams.
17 | 'dynamics': None,
18 | # Costs.
19 | 'cost': None, # A list of Cost objects for each condition.
20 | 'sample_on_policy': False,
21 |
22 | 'policy_dual_rate': 0.1,
23 | 'policy_dual_rate_covar': 0.0,
24 | 'fixed_lg_step': 0,
25 | 'lg_step_schedule': 10.0,
26 | 'ent_reg_schedule': 0.0,
27 | 'init_pol_wt': 0.01,
28 | 'policy_sample_mode': 'add',
29 | 'exp_step_increase': 2.0,
30 | 'exp_step_decrease': 0.5,
31 | 'exp_step_upper': 0.5,
32 | 'exp_step_lower': 1.0
33 | }
34 |
--------------------------------------------------------------------------------
/source/gps/algorithm/cost/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/automl/LTO-CMA/6f496999ce26a7f144b42dc98a88833bde4ce39b/source/gps/algorithm/cost/__init__.py
--------------------------------------------------------------------------------
/source/gps/algorithm/cost/config.py:
--------------------------------------------------------------------------------
1 | """ Default configuration and hyperparameter values for costs. """
2 | import numpy as np
3 |
4 | from gps.algorithm.cost.cost_utils import RAMP_CONSTANT
5 |
6 | COST = {
7 | 'ramp_option': RAMP_CONSTANT, # How target cost ramps over time.
8 | 'wp_final_multiplier': 1.0, # Weight multiplier on final time step.
9 | 'weight': 1.0
10 | }
11 |
--------------------------------------------------------------------------------
/source/gps/algorithm/cost/cost.py:
--------------------------------------------------------------------------------
1 | import copy
2 |
3 | import numpy as np
4 |
5 | from gps.algorithm.cost.config import COST
6 | from gps.algorithm.cost.cost_utils import get_ramp_multiplier
7 |
8 | from gps.proto.gps_pb2 import CUR_LOC
9 |
10 | class Cost(object):
11 | def __init__(self, hyperparams):
12 | config = copy.deepcopy(COST)
13 | config.update(hyperparams)
14 | self._hyperparams = config
15 | # Used by _eval_cost in algorithm.py
16 | self.weight = self._hyperparams['weight']
17 | self.cur_cond_idx = self._hyperparams['cur_cond_idx']
18 |
19 | def eval(self, sample, obj_val_only = False):
20 | """
21 | Evaluate cost function and derivatives on a sample.
22 | Args:
23 | sample: A single sample
24 | """
25 | T = sample.T
26 | Du = sample.dU
27 | Dx = sample.dX
28 |
29 | # cur_fcn = sample.agent.fcns[self.cur_cond_idx]['fcn_obj']
30 |
31 | final_l = np.zeros(T)
32 |
33 | if not obj_val_only:
34 | final_lu = np.zeros((T, Du))
35 | final_lx = np.zeros((T, Dx))
36 | final_luu = np.zeros((T, Du, Du))
37 | final_lxx = np.zeros((T, Dx, Dx))
38 | final_lux = np.zeros((T, Du, Dx))
39 |
40 | x = sample.get(CUR_LOC)
41 | _, dim = x.shape
42 |
43 | # Time step-specific weights
44 | wpm = get_ramp_multiplier(
45 | self._hyperparams['ramp_option'], T,
46 | wp_final_multiplier=self._hyperparams['wp_final_multiplier'],
47 | wp_custom=self._hyperparams['wp_custom'] if 'wp_custom' in self._hyperparams else None
48 | )
49 |
50 | if not obj_val_only:
51 | ls = np.empty((T, dim))
52 | lss = np.empty((T, dim, dim))
53 |
54 | #cur_fcn.new_sample(batch_size="all") # Get noiseless gradient
55 | for t in range(T):
56 | final_l[t] = sample.trajectory[t] # cur_fcn.evaluate(x[t,:])
57 | # if not obj_val_only:
58 | # ls[t,:] = cur_fcn.grad(x[t,:][:,None])[:,0]
59 | # lss[t,:,:] = cur_fcn.hess(x[t,:][:,None])
60 |
61 | final_l = final_l * wpm
62 |
63 | # if not obj_val_only:
64 | # ls = ls * wpm[:,None]
65 | # lss = lss * wpm[:,None,None]
66 |
67 | # Equivalent to final_lx[:,sensor_start_idx:sensor_end_idx] = ls
68 | #sample.agent.pack_data_x(final_lx, ls, data_types=[CUR_LOC])
69 | # Equivalent to final_lxx[:,sensor_start_idx:sensor_end_idx,sensor_start_idx:sensor_end_idx] = lss
70 | #sample.agent.pack_data_x(final_lxx, lss, data_types=[CUR_LOC, CUR_LOC])
71 |
72 | if obj_val_only:
73 | return (final_l,)
74 | else:
75 | return final_l, final_lx, final_lu, final_lxx, final_luu, final_lux
76 |
--------------------------------------------------------------------------------
/source/gps/algorithm/cost/cost_utils.py:
--------------------------------------------------------------------------------
1 | """ This file defines utility classes and functions for costs. """
2 | import numpy as np
3 |
4 | RAMP_CONSTANT = 1
5 | RAMP_LINEAR = 2
6 | RAMP_QUADRATIC = 3
7 | RAMP_FINAL_ONLY = 4
8 | RAMP_CUSTOM = 5
9 |
10 | def get_ramp_multiplier(ramp_option, T, wp_final_multiplier=1.0, wp_custom=None):
11 | """
12 | Return a time-varying multiplier.
13 | Returns:
14 | A (T,) float vector containing weights for each time step.
15 | """
16 | if ramp_option == RAMP_CONSTANT:
17 | wpm = np.ones(T)
18 | elif ramp_option == RAMP_LINEAR:
19 | wpm = (np.arange(T, dtype=np.float32) + 1) / T
20 | elif ramp_option == RAMP_QUADRATIC:
21 | wpm = ((np.arange(T, dtype=np.float32) + 1) / T) ** 2
22 | elif ramp_option == RAMP_FINAL_ONLY:
23 | wpm = np.zeros(T)
24 | wpm[T-1] = 1.0
25 | elif ramp_option == RAMP_CUSTOM:
26 | assert(wp_custom is not None)
27 | wpm = wp_custom
28 | else:
29 | raise ValueError('Unknown cost ramp requested!')
30 | wpm[-1] *= wp_final_multiplier
31 | return wpm
32 |
33 |
--------------------------------------------------------------------------------
/source/gps/algorithm/dynamics/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/automl/LTO-CMA/6f496999ce26a7f144b42dc98a88833bde4ce39b/source/gps/algorithm/dynamics/__init__.py
--------------------------------------------------------------------------------
/source/gps/algorithm/dynamics/config.py:
--------------------------------------------------------------------------------
1 | """ Default configuration and hyperparameter values for dynamics objects. """
2 |
3 | # DynamicsPriorGMM
4 | DYN_PRIOR_GMM = {
5 | 'min_samples_per_cluster': 20,
6 | 'max_clusters': 50,
7 | 'max_samples': 20,
8 | 'strength': 1.0,
9 | }
10 |
--------------------------------------------------------------------------------
/source/gps/algorithm/dynamics/dynamics_lr_prior.py:
--------------------------------------------------------------------------------
1 | """ This file defines linear regression with an arbitrary prior. """
2 | import numpy as np
3 |
4 | from gps.algorithm.algorithm_utils import gauss_fit_joint_prior
5 |
6 | class DynamicsLRPrior(object):
7 | """ Dynamics with linear regression, with arbitrary prior. """
8 | def __init__(self, hyperparams):
9 | self._hyperparams = hyperparams
10 |
11 | # Fitted dynamics: x_t+1 = Fm * [x_t;u_t] + fv.
12 | self.Fm = np.array(np.nan)
13 | self.fv = np.array(np.nan)
14 | self.dyn_covar = np.array(np.nan) # Covariance.
15 |
16 | self.Fm = None
17 | self.fv = None
18 | self.dyn_covar = None
19 | self.prior = \
20 | self._hyperparams['prior']['type'](self._hyperparams['prior'])
21 |
22 | def update_prior(self, samples):
23 | """ Update dynamics prior. """
24 | X = samples.get_X()
25 | U = samples.get_U()
26 | self.prior.update(X, U)
27 |
28 | def get_prior(self):
29 | """ Return the dynamics prior. """
30 | return self.prior
31 |
32 | def fit(self, X, U):
33 | """ Fit dynamics. """
34 | N, T, dX = X.shape
35 | dU = U.shape[2]
36 |
37 | if N == 1:
38 | raise ValueError("Cannot fit dynamics on 1 sample")
39 |
40 | self.Fm = np.zeros([T, dX, dX+dU])
41 | self.fv = np.zeros([T, dX])
42 | self.dyn_covar = np.zeros([T, dX, dX])
43 |
44 | it = slice(dX+dU)
45 | ip = slice(dX+dU, dX+dU+dX)
46 | # Fit dynamics with least squares regression.
47 | dwts = (1.0 / N) * np.ones(N)
48 | for t in range(T - 1):
49 | Ys = np.c_[X[:, t, :], U[:, t, :], X[:, t+1, :]]
50 | # Obtain Normal-inverse-Wishart prior.
51 | mu0, Phi, mm, n0 = self.prior.eval(dX, dU, Ys)
52 | sig_reg = np.zeros((dX+dU+dX, dX+dU+dX))
53 | sig_reg[it, it] = self._hyperparams['regularization']*np.eye(dX+dU)
54 | Fm, fv, dyn_covar = gauss_fit_joint_prior(Ys,
55 | mu0, Phi, mm, n0, dwts, dX+dU, dX, sig_reg, self._hyperparams['clipping_thresh'])
56 | self.Fm[t, :, :] = Fm
57 | self.fv[t, :] = fv
58 | # Fm * [x; u] + fv gives the predicted state
59 | self.dyn_covar[t, :, :] = dyn_covar
60 | return self.Fm, self.fv, self.dyn_covar
61 |
62 | def copy(self):
63 | """ Return a copy of the dynamics estimate. """
64 | dyn = type(self)(self._hyperparams)
65 | dyn.Fm = np.copy(self.Fm)
66 | dyn.fv = np.copy(self.fv)
67 | dyn.dyn_covar = np.copy(self.dyn_covar)
68 | return dyn
69 |
--------------------------------------------------------------------------------
/source/gps/algorithm/dynamics/dynamics_prior_gmm.py:
--------------------------------------------------------------------------------
1 | """ This file defines the GMM prior for dynamics estimation. """
2 | import copy
3 | import logging
4 |
5 | import numpy as np
6 |
7 | from gps.algorithm.dynamics.config import DYN_PRIOR_GMM
8 | from gps.utility.gmm import GMM
9 |
10 |
11 | LOGGER = logging.getLogger(__name__)
12 |
13 |
14 | class DynamicsPriorGMM(object):
15 | """
16 | A dynamics prior encoded as a GMM over [x_t, u_t, x_t+1] points.
17 | See:
18 | S. Levine*, C. Finn*, T. Darrell, P. Abbeel, "End-to-end
19 | training of Deep Visuomotor Policies", arXiv:1504.00702,
20 | Appendix A.3.
21 | """
22 | def __init__(self, hyperparams):
23 | """
24 | Hyperparameters:
25 | min_samples_per_cluster: Minimum samples per cluster.
26 | max_clusters: Maximum number of clusters to fit.
27 | max_samples: Maximum number of trajectories to use for
28 | fitting the GMM at any given time.
29 | strength: Adjusts the strength of the prior.
30 | """
31 | config = copy.deepcopy(DYN_PRIOR_GMM)
32 | config.update(hyperparams)
33 | self._hyperparams = config
34 | self.X = None
35 | self.U = None
36 | self.gmm = GMM()
37 | self._min_samp = self._hyperparams['min_samples_per_cluster']
38 | self._max_samples = self._hyperparams['max_samples']
39 | self._max_clusters = self._hyperparams['max_clusters']
40 | self._strength = self._hyperparams['strength']
41 |
42 | def initial_state(self):
43 | """ Return dynamics prior for initial time step. """
44 | # Compute mean and covariance.
45 | mu0 = np.mean(self.X[:, 0, :], axis=0)
46 | Phi = np.diag(np.var(self.X[:, 0, :], axis=0))
47 |
48 | # Factor in multiplier.
49 | n0 = self.X.shape[2] * self._strength
50 | m = self.X.shape[2] * self._strength
51 |
52 | # Multiply Phi by m (since it was normalized before).
53 | Phi = Phi * m
54 | return mu0, Phi, m, n0
55 |
56 | def update(self, X, U):
57 | """
58 | Update prior with additional data.
59 | Args:
60 | X: A N x T x dX matrix of sequential state data.
61 | U: A N x T x dU matrix of sequential control data.
62 | """
63 | # Constants.
64 | T = X.shape[1] - 1
65 |
66 | # Append data to dataset.
67 | if self.X is None:
68 | self.X = X
69 | else:
70 | self.X = np.concatenate([self.X, X], axis=0)
71 |
72 | if self.U is None:
73 | self.U = U
74 | else:
75 | self.U = np.concatenate([self.U, U], axis=0)
76 |
77 | # Remove excess samples from dataset.
78 | start = max(0, self.X.shape[0] - self._max_samples + 1)
79 | self.X = self.X[start:, :]
80 | self.U = self.U[start:, :]
81 |
82 | # Compute cluster dimensionality.
83 | Do = X.shape[2] + U.shape[2] + X.shape[2]
84 |
85 | # Create dataset.
86 | N = self.X.shape[0]
87 | xux = np.reshape(
88 | np.c_[self.X[:, :T, :], self.U[:, :T, :], self.X[:, 1:(T+1), :]],
89 | [T * N, Do]
90 | )
91 |
92 | # Choose number of clusters.
93 | K = int(max(2, min(self._max_clusters,
94 | np.floor(float(N * T) / self._min_samp))))
95 | LOGGER.debug('Generating %d clusters for dynamics GMM.', K)
96 |
97 | # Update GMM.
98 | self.gmm.update(xux, K)
99 |
100 | def eval(self, Dx, Du, pts):
101 | """
102 | Evaluate prior.
103 | Args:
104 | pts: A N x Dx+Du+Dx matrix.
105 | """
106 | # Construct query data point by rearranging entries and adding
107 | # in reference.
108 | assert pts.shape[1] == Dx + Du + Dx
109 |
110 | # Perform query and fix mean.
111 | mu0, Phi, m, n0 = self.gmm.inference(pts)
112 |
113 | # Factor in multiplier.
114 | n0 = n0 * self._strength
115 | m = m * self._strength
116 |
117 | # Multiply Phi by m (since it was normalized before).
118 | Phi *= m
119 | return mu0, Phi, m, n0
120 |
--------------------------------------------------------------------------------
/source/gps/algorithm/policy/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/automl/LTO-CMA/6f496999ce26a7f144b42dc98a88833bde4ce39b/source/gps/algorithm/policy/__init__.py
--------------------------------------------------------------------------------
/source/gps/algorithm/policy/config.py:
--------------------------------------------------------------------------------
1 | """ Default configuration and hyperparameter values for policies. """
2 | INIT_LG = {
3 | 'init_var': 1.0,
4 | 'verbose': False
5 | }
6 |
7 | # PolicyPriorGMM
8 | POLICY_PRIOR_GMM = {
9 | 'min_samples_per_cluster': 20,
10 | 'max_clusters': 50,
11 | 'max_samples': 20,
12 | 'strength': 1.0,
13 | }
14 |
--------------------------------------------------------------------------------
/source/gps/algorithm/policy/constant_policy.py:
--------------------------------------------------------------------------------
1 | """ This file defines the linear Gaussian policy class. """
2 | import numpy as np
3 | from cma.sigma_adaptation import CMAAdaptSigmaCSA
4 | from gps.algorithm.policy.policy import Policy
5 | from gps.utility.general_utils import check_shape
6 |
7 |
8 | class ConstantPolicy(Policy):
9 | """
10 | Constant policy
11 | Important for RL learning ability check
12 | """
13 | def __init__(self, const=0.5):
14 | Policy.__init__(self)
15 | self.const = const
16 | self.adapt_sigma = CMAAdaptSigmaCSA()
17 |
18 | def act(self, x, obs, t, noise, es, f_vals):
19 | """
20 | Return an action for a state.
21 | Args:
22 | x: State vector.
23 | obs: Observation vector.
24 | t: Time step.
25 | noise: Action noise. This will be scaled by the variance.
26 | """
27 | if self.adapt_sigma is None:
28 | self.adapt_sigma = CMAAdaptSigmaCSA()
29 | self.adapt_sigma.sigma = es.sigma
30 | hsig = es.adapt_sigma.hsig(es)
31 | es.hsig = hsig
32 | es.adapt_sigma.update2(es, function_values=f_vals)
33 | u = self.const
34 | return u
35 |
36 |
37 |
--------------------------------------------------------------------------------
/source/gps/algorithm/policy/csa_policy.py:
--------------------------------------------------------------------------------
1 | """ This file defines the linear Gaussian policy class. """
2 | import numpy as np
3 | from cma.sigma_adaptation import CMAAdaptSigmaCSA
4 | from gps.algorithm.policy.policy import Policy
5 |
6 | class CSAPolicy(Policy):
7 | """
8 | Time-varying linear Gaussian policy.
9 | U = CSA(sigma, ps, chiN)+ noise, where noise ~ N(0, chol_pol_covar)
10 | """
11 | def __init__(self, T=50):
12 | Policy.__init__(self)
13 |
14 | self.teacher = 0 #np.random.choice([0,1])
15 | self.T = T
16 | self.adapt_sigma = CMAAdaptSigmaCSA()
17 |
18 | def act(self, x, obs, t, noise, es, f_vals):
19 | """
20 | Return an action for a state.
21 | Args:
22 | x: State vector.
23 | obs: Observation vector.
24 | t: Time step.
25 | noise: Action noise. This will be scaled by the variance.
26 | """
27 | #if self.adapt_sigma is None:
28 | # self.adapt_sigma = CMAAdaptSigmaCSA()
29 |
30 | #self.adapt_sigma.sigma = es.sigma
31 | u = es.sigma
32 | hsig = es.adapt_sigma.hsig(es)
33 | es.hsig = hsig
34 | #if self.teacher == 0 or t == 0 :
35 | delta = es.adapt_sigma.update2(es, function_values=f_vals)
36 | #else:
37 | # delta = self.init_sigma
38 | u *= delta
39 | #if t == 0:
40 | # self.init_sigma = delta
41 | return u
42 |
--------------------------------------------------------------------------------
/source/gps/algorithm/policy/lin_gauss_init.py:
--------------------------------------------------------------------------------
1 | """ Initializations for linear Gaussian controllers. """
2 | import copy
3 | import numpy as np
4 | import scipy as sp
5 | from gps.algorithm.policy.config import INIT_LG
6 | from gps.algorithm.policy.csa_policy import CSAPolicy
7 | from gps.algorithm.policy.lin_gauss_policy import LinearGaussianPolicy
8 | from gps.agent.lto.cmaes_world import CMAESWorld
9 |
10 | def init_cmaes_controller(hyperparams, agent):
11 |
12 | config = copy.deepcopy(INIT_LG)
13 | config.update(hyperparams)
14 |
15 | dX, dU = config['dX'], config['dU']
16 | T = config['T']
17 | cur_cond_idx = config['cur_cond_idx']
18 | history_len = agent.history_len
19 | fcn = agent.fcns[cur_cond_idx]
20 | popsize = agent.popsize
21 | if 'fcn_obj' in fcn:
22 | fcn_obj = fcn['fcn_obj']
23 | else:
24 | fcn_obj = None
25 | hpolib = False
26 | if 'hpolib' in fcn:
27 | hpolib = True
28 | benchmark = None
29 | if 'benchmark' in fcn:
30 | benchmark = fcn['benchmark']
31 | # Create new world to avoiding changing the state of the original world
32 | world = CMAESWorld(dim=fcn['dim'], init_loc=fcn['init_loc'], init_sigma=fcn['init_sigma'], init_popsize=popsize, history_len=history_len, fcn=fcn_obj, hpolib=hpolib, benchmark=benchmark)
33 |
34 | if config['verbose']:
35 | print("Finding Initial Linear Gaussian Controller")
36 | action_mean = []
37 | action_var = []
38 | for i in range(25):
39 | f_values=[]
40 | cur_policy = CSAPolicy(T=T)
41 |
42 | world.reset_world()
43 | world.run()
44 | for t in range(T):
45 | X_t = agent.get_vectorized_state(world.get_state(), cur_cond_idx)
46 | es = world.es
47 | f_vals = world.func_values
48 | U_t = cur_policy.act(X_t, None, t, np.zeros((dU,)), es, f_vals)
49 | world.run_next(U_t)
50 | f_values.append(U_t)
51 | action_mean.append(f_values)# np.mean(f_values, axis=0))
52 | action_var.append(f_values)# np.mean(f_values, axis=0))
53 | mean_actions = np.mean(action_mean, axis=0)
54 | var_actions = np.std(action_var, axis=0)
55 | np.place(var_actions, var_actions==0, config["init_var"])
56 | Kt = np.zeros((dU, dX)) # K matrix for a single time step.
57 |
58 | kt = mean_actions.reshape((T,1))
59 | #print("Mean actions: %s" % kt, flush=True)
60 |
61 | K = np.tile(Kt[None,:,:], (T, 1, 1)) # Controller gains matrix.
62 | k = kt
63 | PSig = var_actions.reshape((T, 1, 1))
64 | cholPSig = np.sqrt(var_actions).reshape((T, 1, 1))
65 | invPSig = 1./var_actions.reshape((T, 1, 1))
66 |
67 | return LinearGaussianPolicy(K, k, PSig, cholPSig, invPSig)
68 |
69 |
70 |
71 |
--------------------------------------------------------------------------------
/source/gps/algorithm/policy/lin_gauss_policy.py:
--------------------------------------------------------------------------------
1 | """ This file defines the linear Gaussian policy class. """
2 | import numpy as np
3 | from cma.sigma_adaptation import CMAAdaptSigmaCSA
4 | from gps.algorithm.policy.policy import Policy
5 | from gps.utility.general_utils import check_shape
6 |
7 |
8 | class LinearGaussianPolicy(Policy):
9 | """
10 | Time-varying linear Gaussian policy.
11 | U = K*x + k + noise, where noise ~ N(0, chol_pol_covar)
12 | """
13 | def __init__(self, K, k, pol_covar, chol_pol_covar, inv_pol_covar):
14 | Policy.__init__(self)
15 |
16 | # Assume K has the correct shape, and make sure others match.
17 | self.T = K.shape[0]
18 | self.dU = K.shape[1]
19 | self.dX = K.shape[2]
20 |
21 | check_shape(k, (self.T, self.dU))
22 | check_shape(pol_covar, (self.T, self.dU, self.dU))
23 | check_shape(chol_pol_covar, (self.T, self.dU, self.dU))
24 | check_shape(inv_pol_covar, (self.T, self.dU, self.dU))
25 |
26 | self.K = K
27 | self.k = k
28 | self.pol_covar = pol_covar
29 | self.chol_pol_covar = chol_pol_covar
30 | self.inv_pol_covar = inv_pol_covar
31 | self.adapt_sigma = CMAAdaptSigmaCSA()
32 |
33 | def act(self, x, obs, t, noise, es, f_vals):
34 | """
35 | Return an action for a state.
36 | Args:
37 | x: State vector.
38 | obs: Observation vector.
39 | t: Time step.
40 | noise: Action noise. This will be scaled by the variance.
41 | """
42 | if self.adapt_sigma is None:
43 | self.adapt_sigma = CMAAdaptSigmaCSA()
44 | self.adapt_sigma.sigma = es.sigma
45 | hsig = es.adapt_sigma.hsig(es)
46 | es.hsig = hsig
47 | es.adapt_sigma.update2(es, function_values=f_vals)
48 | u = self.K[t].dot(x) + self.k[t]
49 | u += self.chol_pol_covar[t].T.dot(noise)
50 | return np.nan_to_num(u)
51 |
52 | def fold_k(self, noise):
53 | """
54 | Fold noise into k.
55 | Args:
56 | noise: A T x Du noise vector with mean 0 and variance 1.
57 | Returns:
58 | k: A T x dU bias vector.
59 | """
60 | k = np.zeros_like(self.k)
61 | for i in range(self.T):
62 | scaled_noise = self.chol_pol_covar[i].T.dot(noise[i])
63 | k[i] = scaled_noise + self.k[i]
64 | return k
65 |
66 | def nans_like(self):
67 | """
68 | Returns:
69 | A new linear Gaussian policy object with the same dimensions
70 | but all values filled with NaNs.
71 | """
72 | policy = LinearGaussianPolicy(
73 | np.zeros_like(self.K), np.zeros_like(self.k),
74 | np.zeros_like(self.pol_covar), np.zeros_like(self.chol_pol_covar),
75 | np.zeros_like(self.inv_pol_covar)
76 | )
77 | policy.K.fill(np.nan)
78 | policy.k.fill(np.nan)
79 | policy.pol_covar.fill(np.nan)
80 | policy.chol_pol_covar.fill(np.nan)
81 | policy.inv_pol_covar.fill(np.nan)
82 | return policy
83 |
84 |
--------------------------------------------------------------------------------
/source/gps/algorithm/policy/policy.py:
--------------------------------------------------------------------------------
1 | """ This file defines the base class for the policy. """
2 | import abc
3 |
4 |
5 | class Policy(object):
6 | """ Computes actions from states/observations. """
7 | __metaclass__ = abc.ABCMeta
8 |
9 | @abc.abstractmethod
10 | def act(self, es, f_vals, obs, t, noise):
11 | """
12 | Args:
13 | x: State vector.
14 | obs: Observation vector.
15 | t: Time step.
16 | noise: A dU-dimensional noise vector.
17 | Returns:
18 | A dU dimensional action vector.
19 | """
20 | raise NotImplementedError("Must be implemented in subclass.")
21 |
22 | def reset(self):
23 | return
24 |
25 | # Called when done using the object - must call reset() before starting to use it again
26 | def finalize(self):
27 | return
28 |
29 | def set_meta_data(self, meta):
30 | """
31 | Set meta data for policy (e.g., domain image, multi modal observation sizes)
32 | Args:
33 | meta: meta data.
34 | """
35 | return
36 |
--------------------------------------------------------------------------------
/source/gps/algorithm/policy/policy_prior_gmm.py:
--------------------------------------------------------------------------------
1 | """ This file defines a GMM prior for policy linearization. """
2 | import copy
3 | import logging
4 |
5 | import numpy as np
6 |
7 | from gps.algorithm.policy.config import POLICY_PRIOR_GMM
8 | from gps.utility.gmm import GMM
9 | from gps.algorithm.algorithm_utils import gauss_fit_joint_prior
10 |
11 |
12 | LOGGER = logging.getLogger(__name__)
13 |
14 |
15 | class PolicyPriorGMM(object):
16 | """
17 | A policy prior encoded as a GMM over [x_t, u_t] points, where u_t is
18 | the output of the policy for the given state x_t. This prior is used
19 | when computing the linearization of the policy.
20 |
21 | See the method AlgorithmBADMM._update_policy_fit, in
22 | python/gps/algorithm.algorithm_badmm.py.
23 |
24 | Also see the GMM dynamics prior, in
25 | python/gps/algorithm/dynamics/dynamics_prior_gmm.py. This is a
26 | similar GMM prior that is used for the dynamics estimate.
27 | """
28 | def __init__(self, hyperparams):
29 | """
30 | Hyperparameters:
31 | min_samples_per_cluster: Minimum number of samples.
32 | max_clusters: Maximum number of clusters to fit.
33 | max_samples: Maximum number of trajectories to use for
34 | fitting the GMM at any given time.
35 | strength: Adjusts the strength of the prior.
36 | """
37 | config = copy.deepcopy(POLICY_PRIOR_GMM)
38 | config.update(hyperparams)
39 | self._hyperparams = config
40 | self.X = None
41 | self.obs = None
42 | self.gmm = GMM()
43 | self._min_samp = self._hyperparams['min_samples_per_cluster']
44 | self._max_samples = self._hyperparams['max_samples']
45 | self._max_clusters = self._hyperparams['max_clusters']
46 | self._strength = self._hyperparams['strength']
47 | self._init_sig_reg = self._hyperparams['init_regularization']
48 | self._subsequent_sig_reg = self._hyperparams['subsequent_regularization']
49 |
50 | def update(self, samples, policy_opt, mode='add'):
51 | """
52 | Update GMM using new samples or policy_opt.
53 | By default does not replace old samples.
54 |
55 | Args:
56 | samples: SampleList containing new samples
57 | policy_opt: PolicyOpt containing current policy
58 | """
59 |
60 | X, obs = samples.get_X(), samples.get_obs()
61 | if self.X is None or mode == 'replace':
62 | self.X = X
63 | self.obs = obs
64 | elif mode == 'add' and X.size > 0:
65 | self.X = np.concatenate([self.X, X], axis=0)
66 | self.obs = np.concatenate([self.obs, obs], axis=0)
67 | # Trim extra samples
68 | N = self.X.shape[0]
69 | if N > self._max_samples:
70 | start = N - self._max_samples
71 | self.X = self.X[start:, :, :]
72 | self.obs = self.obs[start:, :, :]
73 |
74 | # Evaluate policy at samples to get mean policy action.
75 | U = policy_opt.prob(self.obs,diag_var=True)[0]
76 | # Create the dataset
77 | N, T = self.X.shape[:2]
78 | dO = self.X.shape[2] + U.shape[2]
79 | XU = np.reshape(np.concatenate([self.X, U], axis=2), [T * N, dO])
80 | # Choose number of clusters.
81 | K = int(max(2, min(self._max_clusters,
82 | np.floor(float(N * T) / self._min_samp))))
83 |
84 | LOGGER.debug('Generating %d clusters for policy prior GMM.', K)
85 | self.gmm.update(XU, K)
86 |
87 | def eval(self, Ts, Ps):
88 | """ Evaluate prior. """
89 | # Construct query data point.
90 | pts = np.concatenate((Ts, Ps), axis=1)
91 | # Perform query.
92 | mu0, Phi, m, n0 = self.gmm.inference(pts)
93 | # Factor in multiplier.
94 | n0 *= self._strength
95 | m *= self._strength
96 | # Multiply Phi by m (since it was normalized before).
97 | Phi *= m
98 | return mu0, Phi, m, n0
99 |
100 | def fit(self, X, pol_mu, pol_sig):
101 | """
102 | Fit policy linearization.
103 |
104 | Args:
105 | X: Samples (N, T, dX)
106 | pol_mu: Policy means (N, T, dU)
107 | pol_sig: Policy covariance (N, T, dU)
108 | """
109 | N, T, dX = X.shape
110 | dU = pol_mu.shape[2]
111 | if N == 1:
112 | raise ValueError("Cannot fit dynamics on 1 sample")
113 |
114 | # Collapse policy covariances. (This is only correct because
115 | # the policy doesn't depend on state).
116 | pol_sig = np.mean(pol_sig, axis=0)
117 |
118 | # Allocate.
119 | pol_K = np.zeros([T, dU, dX])
120 | pol_k = np.zeros([T, dU])
121 | pol_S = np.zeros([T, dU, dU])
122 |
123 | # Fit policy linearization with least squares regression.
124 | dwts = (1.0 / N) * np.ones(N)
125 | for t in range(T):
126 | Ts = X[:, t, :]
127 | Ps = pol_mu[:, t, :]
128 | Ys = np.concatenate([Ts, Ps], axis=1)
129 | # Obtain Normal-inverse-Wishart prior.
130 | mu0, Phi, mm, n0 = self.eval(Ts, Ps)
131 | sig_reg = np.zeros((dX+dU, dX+dU))
132 | # Slightly regularize on first timestep.
133 | if t == 0:
134 | sig_reg[:dX, :dX] = self._init_sig_reg*np.eye(dX)
135 | else:
136 | sig_reg[:dX, :dX] = self._subsequent_sig_reg*np.eye(dX)
137 | pol_K[t, :, :], pol_k[t, :], pol_S[t, :, :] = \
138 | gauss_fit_joint_prior(Ys,
139 | mu0, Phi, mm, n0, dwts, dX, dU, sig_reg)
140 | pol_S += pol_sig
141 | return pol_K, pol_k, pol_S
142 |
--------------------------------------------------------------------------------
/source/gps/algorithm/policy/tf_policy.py:
--------------------------------------------------------------------------------
1 | import pickle
2 | import os
3 | import uuid
4 |
5 | import numpy as np
6 | import tensorflow as tf
7 |
8 | from gps.algorithm.policy.policy import Policy
9 |
10 |
11 | class TfPolicy(Policy):
12 | """
13 | A neural network policy implemented in TensorFlow. The network output is
14 | taken to be the mean, and Gaussian noise is added on top of it.
15 | U = net.forward(obs) + noise, where noise ~ N(0, diag(var))
16 | Args:
17 | obs_tensor: tensor representing tf observation. Used in feed dict for forward pass.
18 | act_op: tf op to execute the forward pass. Use sess.run on this op.
19 | var: Du-dimensional noise variance vector.
20 | sess: tf session.
21 | device_string: tf device string for running on either gpu or cpu.
22 | """
23 | def __init__(self, dU, obs_tensor, act_op, var, sess, device_string):
24 | Policy.__init__(self)
25 | self.dU = dU
26 | self.obs_tensor = obs_tensor
27 | self.act_op = act_op
28 | self.sess = sess
29 | self.device_string = device_string
30 | self.chol_pol_covar = np.diag(np.sqrt(var))
31 | self.scale = None # must be set from elsewhere based on observations
32 | self.bias = None
33 |
34 | def act(self, X_t, obs, t, noise, es, f_vals):
35 | """
36 | Return an action for a state.
37 | Args:
38 | x: State vector.
39 | obs: Observation vector.
40 | t: Time step.
41 | noise: Action noise. This will be scaled by the variance.
42 | """
43 |
44 | # Normalize obs.
45 | if len(obs.shape) == 1:
46 | obs = np.expand_dims(obs, axis=0)
47 | obs = obs.dot(self.scale) + self.bias
48 | with tf.device(self.device_string):
49 | action_mean = self.sess.run(self.act_op, feed_dict={self.obs_tensor: obs})
50 | #if noise is None:
51 | #u = action_mean
52 | #else:
53 | #u = action_mean + self.chol_pol_covar.T.dot(noise)
54 | #u += self.chol_pol_covar[t].T.dot(noise)
55 | u = action_mean
56 | delta = u[0]
57 | hsig = es.adapt_sigma.hsig(es)
58 | es.hsig = hsig
59 | es.adapt_sigma.update2(es, function_values=f_vals)
60 | #if delta < np.exp(-1) or delta > 1e5:
61 | # delta = 1
62 | if np.any(np.isnan(delta)):
63 | print("Action %s" % delta)
64 | action = delta
65 |
66 | return action # the DAG computations are batched by default, but we use batch size 1.
67 |
68 | def pickle_policy(self, deg_obs, deg_action, checkpoint_path, goal_state=None, should_hash=False):
69 | """
70 | We can save just the policy if we are only interested in running forward at a later point
71 | without needing a policy optimization class. Useful for debugging and deploying.
72 | """
73 | if should_hash is True:
74 | hash_str = str(uuid.uuid4())
75 | checkpoint_path += hash_str
76 | pickled_pol = {'deg_obs': deg_obs, 'deg_action': deg_action, 'chol_pol_covar': self.chol_pol_covar,
77 | 'checkpoint_path_tf': checkpoint_path + '_tf_data.ckpt', 'scale': self.scale, 'bias': self.bias,
78 | 'device_string': self.device_string, 'goal_state': goal_state}
79 | pickle.dump(pickled_pol, open(checkpoint_path + '.pkl', "wb"))
80 | saver = tf.train.Saver()
81 | saver.save(self.sess, checkpoint_path + '_tf_data.ckpt')
82 |
83 | @classmethod
84 | def load_policy(cls, policy_dict_path, tf_generator, network_config=None):
85 | """
86 | For when we only need to load a policy for the forward pass. For instance, to run on the robot from
87 | a checkpointed policy.
88 | """
89 | from tensorflow.python.framework import ops
90 | ops.reset_default_graph() # we need to destroy the default graph before re_init or checkpoint won't restore.
91 | pol_dict = pickle.load(open(policy_dict_path, "rb"))
92 | #if 'deg_obs' in network_config:
93 | # pol_dict['deg_obs'] = network_config['deg_obs']
94 | #if 'deg_action' in network_config:
95 | # pol_dict['deg_action'] = network_config['deg_action']
96 |
97 | tf_map = tf_generator(dim_input=pol_dict['deg_obs'], dim_output=pol_dict['deg_action'],
98 | batch_size=1, network_config=network_config)
99 |
100 | sess = tf.Session()
101 | init_op = tf.initialize_all_variables()
102 | sess.run(init_op)
103 | saver = tf.train.Saver()
104 | check_file = '/'.join(str.split(policy_dict_path, '/')[:-1]) + '/' + str.split(pol_dict['checkpoint_path_tf'], '/')[-1]
105 |
106 | saver.restore(sess, check_file)
107 |
108 | device_string = pol_dict['device_string']
109 |
110 | cls_init = cls(pol_dict['deg_action'], tf_map.get_input_tensor(), tf_map.get_output_op(), np.zeros((1,)),
111 | sess, device_string)
112 | cls_init.chol_pol_covar = pol_dict['chol_pol_covar']
113 | cls_init.scale = pol_dict['scale']
114 | cls_init.bias = pol_dict['bias']
115 | return cls_init
116 |
117 |
--------------------------------------------------------------------------------
/source/gps/algorithm/policy_opt/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/automl/LTO-CMA/6f496999ce26a7f144b42dc98a88833bde4ce39b/source/gps/algorithm/policy_opt/__init__.py
--------------------------------------------------------------------------------
/source/gps/algorithm/policy_opt/config.py:
--------------------------------------------------------------------------------
1 | """ Default configuration for policy optimization. """
2 | import os
3 |
4 | POLICY_OPT = {
5 | # Initialization.
6 | 'init_var': 0.1, # Initial policy variance.
7 | 'ent_reg': 0.0, # Entropy regularizer.
8 | # Solver hyperparameters.
9 | 'iterations': 5000, # Number of iterations per inner iteration.
10 | 'batch_size': 25,
11 | 'lr': 0.001, # Base learning rate (by default it's fixed).
12 | 'lr_policy': 'fixed', # Learning rate policy.
13 | 'momentum': 0.9, # Momentum.
14 | 'momentum2': 0.999,
15 | 'epsilon': 1e-8,
16 | 'weight_decay': 0.005, # Weight decay.
17 | 'solver_type': 'Adam', # Solver type (e.g. 'SGD', 'Adam', etc.).
18 | # set gpu usage.
19 | 'use_gpu': 1, # Whether or not to use the GPU.
20 | 'gpu_id': 0,
21 | 'random_seed': 1
22 | }
23 |
--------------------------------------------------------------------------------
/source/gps/algorithm/policy_opt/lto_model.py:
--------------------------------------------------------------------------------
1 | import tensorflow as tf
2 | from gps.algorithm.policy_opt.tf_utils import TfMap
3 | import numpy as np
4 |
5 | def init_weights(shape, name=None):
6 | return tf.Variable(tf.random_normal(shape, stddev=0.01), name=name)
7 |
8 | def init_bias(shape, name=None):
9 | return tf.Variable(tf.zeros(shape, dtype='float'), name=name)
10 |
11 | def batched_matrix_vector_multiply(vector, matrix):
12 | """ computes x^T A in mini-batches. """
13 | vector_batch_as_matricies = tf.expand_dims(vector, [1])
14 | mult_result = tf.matmul(vector_batch_as_matricies, matrix)
15 | squeezed_result = tf.squeeze(mult_result, [1])
16 | return squeezed_result
17 |
18 | def get_input_layer():
19 | """produce the placeholder inputs that are used to run ops forward and backwards.
20 | net_input: usually an observation.
21 | action: mu, the ground truth actions we're trying to learn.
22 | precision: precision matrix used to compute loss."""
23 | net_input = tf.placeholder("float", [None, None], name='nn_input') # (N*T) x dO
24 | action = tf.placeholder('float', [None, None], name='action') # (N*T) x dU
25 | precision = tf.placeholder('float', [None, None, None], name='precision') # (N*T) x dU x dU
26 | return net_input, action, precision
27 |
28 | def get_loss_layer(mlp_out, action, precision, batch_size):
29 | """The loss layer used for the MLP network is obtained through this class."""
30 | scale_factor = tf.constant(2*batch_size, dtype='float')
31 | uP = batched_matrix_vector_multiply(action - mlp_out, precision)
32 | uPu = tf.reduce_sum(uP*(action - mlp_out)) # this last dot product is then summed, so we just the sum all at once.
33 | return uPu/scale_factor
34 |
35 | def fully_connected_tf_network(dim_input, dim_output, batch_size=25, network_config=None):
36 |
37 | dim_hidden = network_config['dim_hidden'] + [dim_output]
38 | n_layers = len(dim_hidden)
39 |
40 | nn_input, action, precision = get_input_layer()
41 |
42 | weights = []
43 | biases = []
44 | in_shape = dim_input
45 | for layer_step in range(0, n_layers):
46 | cur_weight = init_weights([in_shape, dim_hidden[layer_step]], name='w_' + str(layer_step))
47 | cur_bias = init_bias([dim_hidden[layer_step]], name='b_' + str(layer_step))
48 | in_shape = dim_hidden[layer_step]
49 | weights.append(cur_weight)
50 | biases.append(cur_bias)
51 |
52 | cur_top = nn_input
53 | for layer_step in range(0, n_layers):
54 | if layer_step != n_layers-1: # final layer has no RELU
55 | cur_top = tf.nn.relu(tf.matmul(cur_top, weights[layer_step]) + biases[layer_step])
56 | else:
57 | cur_top = tf.nn.relu6(tf.matmul(cur_top, weights[layer_step]) + biases[layer_step])
58 |
59 | mlp_applied = cur_top
60 | loss_out = get_loss_layer(mlp_out=mlp_applied, action=action, precision=precision, batch_size=batch_size)
61 |
62 | return TfMap.init_from_lists([nn_input, action, precision], [mlp_applied], [loss_out])
63 |
--------------------------------------------------------------------------------
/source/gps/algorithm/policy_opt/policy_opt.py:
--------------------------------------------------------------------------------
1 | """ This file defines policy optimization for a tensorflow policy. """
2 | import copy
3 | import logging
4 |
5 | import numpy as np
6 |
7 | # NOTE: Order of these imports matters for some reason.
8 | # Changing it can lead to segmentation faults on some machines.
9 |
10 | from gps.algorithm.policy_opt.config import POLICY_OPT
11 | import tensorflow as tf
12 |
13 | from gps.algorithm.policy.tf_policy import TfPolicy
14 | from gps.algorithm.policy_opt.tf_utils import TfSolver
15 |
16 |
17 | LOGGER = logging.getLogger(__name__)
18 |
19 | class PolicyOpt(object):
20 | """ Policy optimization using tensor flow for DAG computations/nonlinear function approximation. """
21 | def __init__(self, hyperparams, dO, dU):
22 | config = copy.deepcopy(POLICY_OPT)
23 | config.update(hyperparams)
24 |
25 | self._hyperparams = config
26 | self._dO = dO
27 | self._dU = dU
28 |
29 | tf.set_random_seed(self._hyperparams['random_seed'])
30 |
31 | self.tf_iter = 0
32 | self.batch_size = self._hyperparams['batch_size']
33 | self.device_string = "/cpu:0"
34 | if self._hyperparams['use_gpu'] == 1:
35 | self.gpu_device = self._hyperparams['gpu_id']
36 | self.device_string = "/gpu:" + str(self.gpu_device)
37 | self.act_op = None # mu_hat
38 | self.loss_scalar = None
39 | self.obs_tensor = None
40 | self.precision_tensor = None
41 | self.action_tensor = None # mu true
42 | self.solver = None
43 | self.init_network()
44 | self.init_solver()
45 | self.var = self._hyperparams['init_var'] * np.ones(dU)
46 | self.sess = tf.Session()
47 | self.policy = TfPolicy(dU, self.obs_tensor, self.act_op, np.zeros(dU), self.sess, self.device_string)
48 | init_op = tf.initialize_all_variables()
49 | self.sess.run(init_op)
50 |
51 | def init_network(self):
52 | """ Helper method to initialize the tf networks used """
53 | tf_map_generator = self._hyperparams['network_model']
54 | tf_map = tf_map_generator(dim_input=self._dO, dim_output=self._dU, batch_size=self.batch_size,
55 | network_config=self._hyperparams['network_params'])
56 | self.obs_tensor = tf_map.get_input_tensor()
57 | self.action_tensor = tf_map.get_target_output_tensor()
58 | self.precision_tensor = tf_map.get_precision_tensor()
59 | self.act_op = tf_map.get_output_op()
60 | self.loss_scalar = tf_map.get_loss_op()
61 |
62 | def init_solver(self):
63 | """ Helper method to initialize the solver. """
64 | self.solver = TfSolver(loss_scalar=self.loss_scalar,
65 | solver_name=self._hyperparams['solver_type'],
66 | base_lr=self._hyperparams['lr'],
67 | lr_policy=self._hyperparams['lr_policy'],
68 | momentum=self._hyperparams['momentum'],
69 | momentum2=self._hyperparams['momentum2'],
70 | epsilon=self._hyperparams['epsilon'],
71 | weight_decay=self._hyperparams['weight_decay'])
72 |
73 | def update(self, obs, tgt_mu, tgt_prc, tgt_wt):
74 | """
75 | Update policy.
76 | Args:
77 | obs: Numpy array of observations, N x T x dO.
78 | tgt_mu: Numpy array of mean controller outputs, N x T x dU.
79 | tgt_prc: Numpy array of precision matrices, N x T x dU x dU.
80 | tgt_wt: Numpy array of weights, N x T.
81 | Returns:
82 | A tensorflow object with updated weights.
83 | """
84 | N, T = obs.shape[:2]
85 | dU, dO = self._dU, self._dO
86 |
87 | # Renormalize weights.
88 | tgt_wt *= (float(N * T) / np.sum(tgt_wt))
89 | # Allow weights to be at most twice the robust median.
90 | mn = np.median(tgt_wt[(tgt_wt > 1e-2).nonzero()])
91 | for n in range(N):
92 | for t in range(T):
93 | tgt_wt[n, t] = min(tgt_wt[n, t], 2 * mn)
94 | # Robust median should be around one.
95 | tgt_wt /= mn
96 |
97 | # Reshape inputs.
98 | obs = np.reshape(obs, (N*T, dO))
99 | tgt_mu = np.reshape(tgt_mu, (N*T, dU))
100 | tgt_prc = np.reshape(tgt_prc, (N*T, dU, dU))
101 | tgt_wt = np.reshape(tgt_wt, (N*T, 1, 1))
102 |
103 | # Fold weights into tgt_prc.
104 | tgt_prc = tgt_wt * tgt_prc
105 |
106 | # Normalize obs, but only compute normalzation at the beginning.
107 | if self.policy.scale is None or self.policy.bias is None:
108 | # 1e-3 to avoid infs if some state dimensions don't change in the
109 | # first batch of samples
110 | self.policy.scale = np.diag(
111 | 1.0 / np.maximum(np.std(obs, axis=0), 1e-3))
112 | self.policy.bias = - np.mean(
113 | obs.dot(self.policy.scale), axis=0)
114 | obs = obs.dot(self.policy.scale) + self.policy.bias
115 |
116 | # Assuming that N*T >= self.batch_size.
117 | batches_per_epoch = np.floor(N*T / self.batch_size)
118 | idx = list(range(N*T))
119 | average_loss = 0
120 | np.random.shuffle(idx)
121 |
122 | # actual training.
123 | for i in range(self._hyperparams['iterations']):
124 | # Load in data for this batch.
125 | start_idx = int(i * self.batch_size %
126 | (batches_per_epoch * self.batch_size))
127 | idx_i = idx[start_idx:start_idx+self.batch_size]
128 | feed_dict = {self.obs_tensor: obs[idx_i],
129 | self.action_tensor: tgt_mu[idx_i],
130 | self.precision_tensor: tgt_prc[idx_i]}
131 | train_loss = self.solver(feed_dict, self.sess)
132 |
133 | average_loss += train_loss
134 | if (i+1) % 500 == 0:
135 | LOGGER.debug('tensorflow iteration %d, average loss %f',
136 | i+1, average_loss / 500)
137 | print ('supervised tf loss is ' + str(average_loss))
138 | average_loss = 0
139 |
140 | # Keep track of tensorflow iterations for loading solver states.
141 | self.tf_iter += self._hyperparams['iterations']
142 |
143 | # Optimize variance.
144 |
145 | self.var = (np.sum(tgt_wt,axis=0)[:,0] - 2*N*T*self._hyperparams['ent_reg']) / np.sum(np.diagonal(tgt_prc, axis1=1, axis2=2),axis=0)
146 |
147 | self.policy.chol_pol_covar = np.diag(np.sqrt(self.var))
148 |
149 | return self.policy
150 |
151 | def prob(self, obs, diag_var = False):
152 | """
153 | Run policy forward.
154 | Args:
155 | obs: Numpy array of observations that is N x T x dO.
156 | """
157 | dU = self._dU
158 | N, T = obs.shape[:2]
159 |
160 | output = np.zeros((N, T, dU))
161 |
162 | for i in range(N):
163 | for t in range(T):
164 | # Feed in data.
165 | if self.policy.scale is not None:
166 | feed_dict = {self.obs_tensor: np.expand_dims(obs[i, t], axis=0).dot(self.policy.scale) + self.policy.bias}
167 | else:
168 | feed_dict = {self.obs_tensor: np.expand_dims(obs[i, t], axis=0)}
169 | with tf.device(self.device_string):
170 | output[i, t, :] = self.sess.run(self.act_op, feed_dict=feed_dict)
171 |
172 | if diag_var:
173 | pol_sigma = np.tile(self.var, [N, T, 1])
174 | pol_prec = np.tile(1.0 / self.var, [N, T, 1])
175 | pol_det_sigma = np.tile(np.prod(self.var), [N, T])
176 | else:
177 | pol_sigma = np.tile(np.diag(self.var), [N, T, 1, 1])
178 | pol_prec = np.tile(np.diag(1.0 / self.var), [N, T, 1, 1])
179 | pol_det_sigma = np.tile(np.prod(self.var), [N, T])
180 |
181 | return output, pol_sigma, pol_prec, pol_det_sigma
182 |
183 | def set_ent_reg(self, ent_reg):
184 | """ Set the entropy regularization. """
185 | self._hyperparams['ent_reg'] = ent_reg
186 |
187 | # For pickling.
188 | def __getstate__(self):
189 |
190 | return {
191 | 'hyperparams': self._hyperparams,
192 | 'dO': self._dO,
193 | 'dU': self._dU,
194 | 'scale': self.policy.scale,
195 | 'bias': self.policy.bias,
196 | 'tf_iter': self.tf_iter,
197 | }
198 |
199 | # For unpickling.
200 | def __setstate__(self, state):
201 | self.__init__(state['hyperparams'], state['dO'], state['dU'])
202 | self.policy.scale = state['scale']
203 | self.policy.bias = state['bias']
204 | self.tf_iter = state['tf_iter']
205 |
206 |
--------------------------------------------------------------------------------
/source/gps/algorithm/policy_opt/tf_utils.py:
--------------------------------------------------------------------------------
1 | import tensorflow as tf
2 |
3 | def check_list_and_convert(the_object):
4 | if isinstance(the_object, list):
5 | return the_object
6 | return [the_object]
7 |
8 |
9 | class TfMap:
10 | """ a container for inputs, outputs, and loss in a tf graph. This object exists only
11 | to make well-defined the tf inputs, outputs, and losses used in the policy_opt_tf class."""
12 |
13 | def __init__(self, input_tensor, target_output_tensor, precision_tensor, output_op, loss_op):
14 | self.input_tensor = input_tensor
15 | self.target_output_tensor = target_output_tensor
16 | self.precision_tensor = precision_tensor
17 | self.output_op = output_op
18 | self.loss_op = loss_op
19 |
20 | @classmethod
21 | def init_from_lists(cls, inputs, outputs, loss):
22 | inputs = check_list_and_convert(inputs)
23 | outputs = check_list_and_convert(outputs)
24 | loss = check_list_and_convert(loss)
25 | if len(inputs) < 3: # pad for the constructor if needed.
26 | inputs += [None]*(3 - len(inputs))
27 | return cls(inputs[0], inputs[1], inputs[2], outputs[0], loss[0])
28 |
29 | def get_input_tensor(self):
30 | return self.input_tensor
31 |
32 | def set_input_tensor(self, input_tensor):
33 | self.input_tensor = input_tensor
34 |
35 | def get_target_output_tensor(self):
36 | return self.target_output_tensor
37 |
38 | def set_target_output_tensor(self, target_output_tensor):
39 | self.target_output_tensor = target_output_tensor
40 |
41 | def get_precision_tensor(self):
42 | return self.precision_tensor
43 |
44 | def set_precision_tensor(self, precision_tensor):
45 | self.precision_tensor = precision_tensor
46 |
47 | def get_output_op(self):
48 | return self.output_op
49 |
50 | def set_output_op(self, output_op):
51 | self.output_op = output_op
52 |
53 | def get_loss_op(self):
54 | return self.loss_op
55 |
56 | def set_loss_op(self, loss_op):
57 | self.loss_op = loss_op
58 |
59 |
60 | class TfSolver:
61 | """ A container for holding solver hyperparams in tensorflow. Used to execute backwards pass. """
62 | def __init__(self, loss_scalar, solver_name='adam', base_lr=None, lr_policy=None, momentum=None, momentum2=None, epsilon=None, weight_decay=None):
63 | self.base_lr = base_lr
64 | self.lr_policy = lr_policy
65 | self.momentum = momentum
66 | self.momentum2 = momentum2
67 | self.epsilon = epsilon
68 | self.solver_name = solver_name
69 | self.loss_scalar = loss_scalar
70 | if self.lr_policy != 'fixed':
71 | raise NotImplementedError('learning rate policies other than fixed are not implemented')
72 |
73 | self.weight_decay = weight_decay
74 | if weight_decay is not None:
75 | trainable_vars = tf.trainable_variables()
76 | loss_with_reg = self.loss_scalar
77 | for var in trainable_vars:
78 | loss_with_reg += self.weight_decay*tf.nn.l2_loss(var)
79 | self.loss_scalar = loss_with_reg
80 |
81 | self.solver_op = self.get_solver_op()
82 |
83 | def get_solver_op(self):
84 | solver_string = self.solver_name.lower()
85 | if solver_string == 'adam':
86 | return tf.train.AdamOptimizer(learning_rate=self.base_lr,beta1=self.momentum,beta2=self.momentum2,epsilon=self.epsilon).minimize(self.loss_scalar)
87 | elif solver_string == 'rmsprop':
88 | return tf.train.RMSPropOptimizer(learning_rate=self.base_lr,decay=self.momentum).minimize(self.loss_scalar)
89 | elif solver_string == 'momentum':
90 | return tf.train.MomentumOptimizer(learning_rate=self.base_lr,momentum=self.momentum).minimize(self.loss_scalar)
91 | elif solver_string == 'adagrad':
92 | return tf.train.AdagradOptimizer(learning_rate=self.base_lr,initial_accumulator_value=self.momentum).minimize(self.loss_scalar)
93 | elif solver_string == 'sgd':
94 | return tf.train.GradientDescentOptimizer(learning_rate=self.base_lr).minimize(self.loss_scalar)
95 | else:
96 | raise NotImplementedError("Please select a valid optimizer.")
97 |
98 | def __call__(self, feed_dict, sess, device_string="/cpu:0", additional_tensors = None):
99 | if additional_tensors is None:
100 | with tf.device(device_string):
101 | loss = sess.run([self.loss_scalar, self.solver_op], feed_dict)
102 | return loss[0]
103 | else:
104 | with tf.device(device_string):
105 | loss = sess.run([self.loss_scalar] + additional_tensors + [self.solver_op], feed_dict)
106 | return loss[:-1]
107 |
--------------------------------------------------------------------------------
/source/gps/algorithm/traj_opt/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/automl/LTO-CMA/6f496999ce26a7f144b42dc98a88833bde4ce39b/source/gps/algorithm/traj_opt/__init__.py
--------------------------------------------------------------------------------
/source/gps/algorithm/traj_opt/config.py:
--------------------------------------------------------------------------------
1 | """ Default configuration for trajectory optimization. """
2 |
3 | TRAJ_OPT = {
4 | # Dual variable updates for non-PD Q-function.
5 | 'del0': 1e-4,
6 | 'min_eta': 1e-4,
7 | 'max_eta': 1e16,
8 | }
9 |
--------------------------------------------------------------------------------
/source/gps/algorithm/traj_opt/traj_opt.py:
--------------------------------------------------------------------------------
1 | """ This file defines code for iLQG-based trajectory optimization. """
2 | import logging
3 | import copy
4 |
5 | import numpy as np
6 | from numpy.linalg import LinAlgError
7 | import scipy as sp
8 |
9 | from gps.algorithm.traj_opt.config import TRAJ_OPT
10 | from gps.algorithm.traj_opt.traj_opt_utils import traj_distr_kl, DGD_MAX_ITER
11 |
12 | LOGGER = logging.getLogger(__name__)
13 |
14 | class TrajOpt(object):
15 | """ LQR trajectory optimization """
16 | def __init__(self, hyperparams):
17 | config = copy.deepcopy(TRAJ_OPT)
18 | config.update(hyperparams)
19 |
20 | self._hyperparams = config
21 |
22 | def update(self, m, algorithm):
23 | """ Run dual gradient decent to optimize trajectories. """
24 | T = algorithm.T
25 | eta = algorithm.cur[m].eta
26 | step_mult = algorithm.cur[m].step_mult
27 | traj_info = algorithm.cur[m].traj_info
28 |
29 | prev_traj_distr = algorithm.cur[m].traj_distr
30 |
31 | # Set KL-divergence step size (epsilon).
32 | kl_step = T * algorithm.base_kl_step * step_mult
33 |
34 | # We assume at min_eta, kl_div > kl_step, opposite for max_eta.
35 | min_eta = self._hyperparams['min_eta']
36 | max_eta = self._hyperparams['max_eta']
37 |
38 | LOGGER.debug("Running DGD for trajectory %d, eta: %f", m, eta)
39 | for itr in range(DGD_MAX_ITER):
40 | LOGGER.debug("Iteration %i, bracket: (%.2e , %.2e , %.2e)",
41 | itr, min_eta, eta, max_eta)
42 |
43 | # Run fwd/bwd pass, note that eta may be updated.
44 | # NOTE: we can just ignore case when the new eta is larger.
45 | traj_distr, eta = self.backward(prev_traj_distr, traj_info,
46 | eta, algorithm, m)
47 | new_mu, new_sigma = self.forward(traj_distr, traj_info)
48 |
49 | # Compute KL divergence constraint violation.
50 | kl_div = traj_distr_kl(new_mu, new_sigma,
51 | traj_distr, prev_traj_distr)
52 | con = kl_div - kl_step
53 |
54 | # Convergence check - constraint satisfaction.
55 | if (abs(con) < 0.1*kl_step):
56 | LOGGER.debug("KL: %f / %f, converged iteration %i",
57 | kl_div, kl_step, itr)
58 | break
59 |
60 | # Choose new eta (bisect bracket or multiply by constant)
61 | if con < 0: # Eta was too big.
62 | max_eta = eta
63 | geom = np.sqrt(min_eta*max_eta) # Geometric mean.
64 | new_eta = max(geom, 0.1*max_eta)
65 | LOGGER.debug("KL: %f / %f, eta too big, new eta: %f",
66 | kl_div, kl_step, new_eta)
67 | else: # Eta was too small.
68 | min_eta = eta
69 | geom = np.sqrt(min_eta*max_eta) # Geometric mean.
70 | new_eta = min(geom, 10.0*min_eta)
71 | LOGGER.debug("KL: %f / %f, eta too small, new eta: %f",
72 | kl_div, kl_step, new_eta)
73 |
74 | # Logarithmic mean: log_mean(x,y) = (y - x)/(log(y) - log(x))
75 | eta = new_eta
76 |
77 | if kl_div > kl_step and abs(kl_div - kl_step) > 0.1*kl_step:
78 | LOGGER.warning(
79 | "Final KL divergence after DGD convergence is too high."
80 | )
81 |
82 | return traj_distr, eta
83 |
84 | def estimate_cost(self, traj_distr, traj_info):
85 | """ Compute Laplace approximation to expected cost. """
86 | # Constants.
87 | T = traj_distr.T
88 |
89 | # Perform forward pass (note that we repeat this here, because
90 | # traj_info may have different dynamics from the ones that were
91 | # used to compute the distribution already saved in traj).
92 | mu, sigma = self.forward(traj_distr, traj_info)
93 |
94 | # Compute cost.
95 | predicted_cost = np.zeros(T)
96 | for t in range(T):
97 | predicted_cost[t] = traj_info.cc[t] + 0.5 * \
98 | np.sum(sigma[t, :, :] * traj_info.Cm[t, :, :]) + 0.5 * \
99 | mu[t, :].T.dot(traj_info.Cm[t, :, :]).dot(mu[t, :]) + \
100 | mu[t, :].T.dot(traj_info.cv[t, :])
101 | return predicted_cost
102 |
103 | def forward(self, traj_distr, traj_info):
104 | """
105 | Perform LQR forward pass. Computes state-action marginals from
106 | dynamics and policy.
107 | Args:
108 | traj_distr: A linear Gaussian policy object.
109 | traj_info: A TrajectoryInfo object.
110 | Returns:
111 | mu: A T x dX mean action vector.
112 | sigma: A T x dX x dX covariance matrix.
113 | """
114 | # Compute state-action marginals from specified conditional
115 | # parameters and current traj_info.
116 | T = traj_distr.T
117 | dU = traj_distr.dU
118 | dX = traj_distr.dX
119 |
120 | # Constants.
121 | idx_x = slice(dX)
122 |
123 | # Allocate space.
124 | sigma = np.zeros((T, dX+dU, dX+dU))
125 | mu = np.zeros((T, dX+dU))
126 |
127 | # Pull out dynamics.
128 | Fm = traj_info.dynamics.Fm
129 | fv = traj_info.dynamics.fv
130 | dyn_covar = traj_info.dynamics.dyn_covar
131 |
132 | # Set initial covariance (initial mu is always zero).
133 | sigma[0, idx_x, idx_x] = traj_info.x0sigma
134 | mu[0, idx_x] = traj_info.x0mu
135 |
136 | for t in range(T):
137 | sigma[t, :, :] = np.vstack([
138 | np.hstack([
139 | sigma[t, idx_x, idx_x],
140 | sigma[t, idx_x, idx_x].dot(traj_distr.K[t, :, :].T)
141 | ]),
142 | np.hstack([
143 | traj_distr.K[t, :, :].dot(sigma[t, idx_x, idx_x]),
144 | traj_distr.K[t, :, :].dot(sigma[t, idx_x, idx_x]).dot(
145 | traj_distr.K[t, :, :].T
146 | ) + traj_distr.pol_covar[t, :, :]
147 | ])
148 | ])
149 | mu[t, :] = np.hstack([
150 | mu[t, idx_x],
151 | traj_distr.K[t, :, :].dot(mu[t, idx_x]) + traj_distr.k[t, :]
152 | ])
153 | if t < T - 1:
154 | sigma[t+1, idx_x, idx_x] = \
155 | Fm[t, :, :].dot(sigma[t, :, :]).dot(Fm[t, :, :].T) + \
156 | dyn_covar[t, :, :]
157 | mu[t+1, idx_x] = Fm[t, :, :].dot(mu[t, :]) + fv[t, :]
158 | return mu, sigma
159 |
160 | def backward(self, prev_traj_distr, traj_info, eta, algorithm, m):
161 | """
162 | Perform LQR backward pass. This computes a new linear Gaussian
163 | policy object.
164 | Args:
165 | prev_traj_distr: A linear Gaussian policy object from
166 | previous iteration.
167 | traj_info: A TrajectoryInfo object.
168 | eta: Dual variable.
169 | algorithm: Algorithm object needed to compute costs.
170 | m: Condition number.
171 | Returns:
172 | traj_distr: A new linear Gaussian policy.
173 | new_eta: The updated dual variable. Updates happen if the
174 | Q-function is not PD.
175 | """
176 | # Constants.
177 | T = prev_traj_distr.T
178 | dU = prev_traj_distr.dU
179 | dX = prev_traj_distr.dX
180 |
181 | traj_distr = prev_traj_distr.nans_like()
182 |
183 | pol_wt = algorithm.cur[m].pol_info.pol_wt
184 |
185 | idx_x = slice(dX)
186 | idx_u = slice(dX, dX+dU)
187 |
188 | # Pull out dynamics.
189 | Fm = traj_info.dynamics.Fm
190 | fv = traj_info.dynamics.fv
191 |
192 | # Non-SPD correction terms.
193 | del_ = self._hyperparams['del0']
194 | eta0 = eta
195 |
196 | # Run dynamic programming.
197 | fail = True
198 | while fail:
199 | fail = False # Flip to true on non-symmetric PD.
200 |
201 | # Allocate.
202 | Vxx = np.zeros((T, dX, dX))
203 | Vx = np.zeros((T, dX))
204 |
205 | fCm, fcv = algorithm.compute_costs(m, eta)
206 |
207 | # Compute state-action-state function at each time step.
208 | for t in range(T - 1, -1, -1):
209 | # Add in the cost.
210 | Qtt = fCm[t, :, :] # (X+U) x (X+U)
211 | Qt = fcv[t, :] # (X+U) x 1
212 |
213 | # Add in the value function from the next time step.
214 | #if t < T - 1:
215 | # multiplier = (pol_wt[t+1] + eta)/(pol_wt[t] + eta)
216 | # Qtt = Qtt + multiplier * \
217 | # Fm[t, :, :].T.dot(Vxx[t+1, :, :]).dot(Fm[t, :, :])
218 | # Qt = Qt + multiplier * \
219 | # Fm[t, :, :].T.dot(Vx[t+1, :] +
220 | # Vxx[t+1, :, :].dot(fv[t, :]))
221 |
222 | # Symmetrize quadratic component.
223 | Qtt = 0.5 * (Qtt + Qtt.T)
224 |
225 | # Compute Cholesky decomposition of Q function action
226 | # component.
227 | try:
228 | U = sp.linalg.cholesky(Qtt[idx_u, idx_u])
229 | L = U.T
230 | except LinAlgError as e:
231 | # Error thrown when Qtt[idx_u, idx_u] is not
232 | # symmetric positive definite.
233 | LOGGER.debug('LinAlgError: %s', e)
234 | fail = True
235 | break
236 |
237 | # Store conditional covariance, inverse, and Cholesky.
238 | traj_distr.inv_pol_covar[t, :, :] = Qtt[idx_u, idx_u]
239 | traj_distr.pol_covar[t, :, :] = sp.linalg.solve_triangular(
240 | U, sp.linalg.solve_triangular(L, np.eye(dU), lower=True)
241 | )
242 | traj_distr.chol_pol_covar[t, :, :] = sp.linalg.cholesky(
243 | traj_distr.pol_covar[t, :, :]
244 | )
245 |
246 | # Compute mean terms.
247 | traj_distr.k[t, :] = -sp.linalg.solve_triangular(
248 | U, sp.linalg.solve_triangular(L, Qt[idx_u], lower=True)
249 | )
250 | traj_distr.K[t, :, :] = -sp.linalg.solve_triangular(
251 | U, sp.linalg.solve_triangular(L, Qtt[idx_u, idx_x],
252 | lower=True)
253 | )
254 |
255 | # Compute value function.
256 | Vxx[t, :, :] = Qtt[idx_x, idx_x] + \
257 | Qtt[idx_x, idx_u].dot(traj_distr.K[t, :, :])
258 | Vx[t, :] = Qt[idx_x] + Qtt[idx_x, idx_u].dot(traj_distr.k[t, :])
259 | Vxx[t, :, :] = 0.5 * (Vxx[t, :, :] + Vxx[t, :, :].T)
260 |
261 | # Increment eta on non-SPD Q-function.
262 | if fail:
263 | old_eta = eta
264 | eta = eta0 + del_
265 | LOGGER.debug('Increasing eta: %f -> %f', old_eta, eta)
266 | del_ *= 2 # Increase del_ exponentially on failure.
267 | if eta >= 1e16:
268 | if np.any(np.isnan(Fm)) or np.any(np.isnan(fv)):
269 | raise ValueError('NaNs encountered in dynamics!')
270 | raise ValueError('Failed to find PD solution even for very \
271 | large eta (check that dynamics and cost are \
272 | reasonably well conditioned)!')
273 | return traj_distr, eta
274 |
--------------------------------------------------------------------------------
/source/gps/algorithm/traj_opt/traj_opt_utils.py:
--------------------------------------------------------------------------------
1 | """ This file defines utilities for trajectory optimization. """
2 | import numpy as np
3 | import scipy as sp
4 |
5 | # Constants used in TrajOptLQR.
6 | DGD_MAX_ITER = 50
7 |
8 | def traj_distr_kl(new_mu, new_sigma, new_traj_distr, prev_traj_distr):
9 | """
10 | Compute KL divergence between new and previous trajectory
11 | distributions.
12 | Args:
13 | new_mu: T x dX, mean of new trajectory distribution.
14 | new_sigma: T x dX x dX, variance of new trajectory distribution.
15 | new_traj_distr: A linear Gaussian policy object, new
16 | distribution.
17 | prev_traj_distr: A linear Gaussian policy object, previous
18 | distribution.
19 | Returns:
20 | kl_div: The KL divergence between the new and previous
21 | trajectories.
22 | """
23 | # Constants.
24 | T = new_mu.shape[0]
25 | dU = new_traj_distr.dU
26 |
27 | # Initialize vector of divergences for each time step.
28 | kl_div = np.zeros(T)
29 |
30 | # Step through trajectory.
31 | for t in range(T):
32 | # Fetch matrices and vectors from trajectory distributions.
33 | mu_t = new_mu[t, :]
34 | sigma_t = new_sigma[t, :, :]
35 | K_prev = prev_traj_distr.K[t, :, :]
36 | K_new = new_traj_distr.K[t, :, :]
37 | k_prev = prev_traj_distr.k[t, :]
38 | k_new = new_traj_distr.k[t, :]
39 | chol_prev = prev_traj_distr.chol_pol_covar[t, :, :]
40 | chol_new = new_traj_distr.chol_pol_covar[t, :, :]
41 |
42 | # Compute log determinants and precision matrices.
43 | logdet_prev = 2 * sum(np.log(np.diag(chol_prev)))
44 | logdet_new = 2 * sum(np.log(np.diag(chol_new)))
45 | prc_prev = sp.linalg.solve_triangular(
46 | chol_prev, sp.linalg.solve_triangular(chol_prev.T, np.eye(dU),
47 | lower=True)
48 | )
49 | prc_new = sp.linalg.solve_triangular(
50 | chol_new, sp.linalg.solve_triangular(chol_new.T, np.eye(dU),
51 | lower=True)
52 | )
53 |
54 | # Construct matrix, vector, and constants.
55 | M_prev = np.r_[
56 | np.c_[K_prev.T.dot(prc_prev).dot(K_prev), -K_prev.T.dot(prc_prev)],
57 | np.c_[-prc_prev.dot(K_prev), prc_prev]
58 | ]
59 | M_new = np.r_[
60 | np.c_[K_new.T.dot(prc_new).dot(K_new), -K_new.T.dot(prc_new)],
61 | np.c_[-prc_new.dot(K_new), prc_new]
62 | ]
63 | v_prev = np.r_[K_prev.T.dot(prc_prev).dot(k_prev),
64 | -prc_prev.dot(k_prev)]
65 | v_new = np.r_[K_new.T.dot(prc_new).dot(k_new), -prc_new.dot(k_new)]
66 | c_prev = 0.5 * k_prev.T.dot(prc_prev).dot(k_prev)
67 | c_new = 0.5 * k_new.T.dot(prc_new).dot(k_new)
68 |
69 | # Compute KL divergence at timestep t.
70 | kl_div[t] = max(
71 | 0,
72 | -0.5 * mu_t.T.dot(M_new - M_prev).dot(mu_t) -
73 | mu_t.T.dot(v_new - v_prev) - c_new + c_prev -
74 | 0.5 * np.sum(sigma_t * (M_new-M_prev)) - 0.5 * logdet_new +
75 | 0.5 * logdet_prev
76 | )
77 |
78 | # Add up divergences across time to get total divergence.
79 | return np.sum(kl_div)
80 |
--------------------------------------------------------------------------------
/source/gps/gps_test.py:
--------------------------------------------------------------------------------
1 | import logging
2 | import imp
3 | import os
4 | import os.path
5 | import sys
6 | import argparse
7 | import time
8 | import numpy as np
9 | import random
10 |
11 | # Add gps/python to path so that imports work.
12 | sys.path.append('/'.join(str.split(__file__, '/')[:-2]))
13 | import gps as gps_globals
14 | from gps.utility.display import Display
15 | from gps.sample.sample_list import SampleList
16 | from gps.algorithm.policy.tf_policy import TfPolicy
17 | from gps.algorithm.policy_opt.lto_model import fully_connected_tf_network
18 | from gps.algorithm.policy.csa_policy import CSAPolicy
19 |
20 | class GPSMain(object):
21 | #""" Main class to run algorithms and experiments. """
22 | def __init__(self, config):
23 | """
24 | Initialize GPSMain
25 | Args:
26 | config: Hyperparameters for experiment
27 | """
28 | self._hyperparams = config
29 | self._conditions = config['common']['conditions']
30 | if 'train_conditions' in config:
31 | self._train_idx = config['train_conditions']
32 | self._test_idx = config['test_conditions']
33 | else:
34 | self._train_idx = range(self._conditions)
35 | config['train_conditions'] = config['common']['conditions']
36 | self._hyperparams=config
37 | self._test_idx = self._train_idx
38 | self._test_fncs = config['test_functions']
39 |
40 | self._data_files_dir = config['common']['data_files_dir']
41 | self.policy_path = config['policy_path']
42 | self.network_config = config['algorithm']['policy_opt']['network_params']
43 | self.agent = config['agent']['type'](config['agent'])
44 | config['common']['log_filename'] += '_test'
45 | self.disp = Display(config['common']) # For logging
46 |
47 | config['algorithm']['agent'] = self.agent
48 | self.algorithm = config['algorithm']['type'](config['algorithm'])
49 |
50 | def run(self):
51 |
52 | #itr_start = 0
53 | #guided_steps = [0.5, 0.4, 0.3, 0.2, 0.1]
54 | self.algorithm.policy_opt.policy = TfPolicy.load_policy(policy_dict_path=self.policy_path, tf_generator=fully_connected_tf_network, network_config=self.network_config)
55 |
56 | #for itr in range(itr_start, self._hyperparams['iterations']):
57 | #for m, cond in enumerate(self._train_idx):
58 | # for i in range(self._hyperparams['num_samples']):
59 | # self._take_sample(itr, cond, m, i)
60 | # print('Iteration %d' % (itr))
61 | # traj_sample_lists = [self.agent.get_samples(cond, -self._hyperparams['num_samples']) for cond in self._train_idx]
62 | # # Clear agent samples.
63 | # self.agent.clear_samples()
64 | # self.algorithm.iteration(traj_sample_lists)
65 | #
66 | # #pol_sample_lists = self._take_policy_samples(self._train_idx)
67 | #
68 | # #self._prev_traj_costs, self._prev_pol_costs = self.disp.update(itr, self.algorithm, self.agent, traj_sample_lists, pol_sample_lists)
69 | # self.algorithm.policy_opt.policy.pickle_policy(self.algorithm.policy_opt._dO, self.algorithm.policy_opt._dU, self._data_files_dir + ('policy_itr_%02d' % itr))
70 | # self._test_peformance(t_length=50)
71 | #self.algorithm.policy_opt.policy = TfPolicy.load_policy(policy_dict_path=self.policy_path, tf_generator=fully_connected_tf_network, network_config=self.network_config)
72 | self._test_peformance(t_length=50)
73 |
74 | #pol_sample_lists = self._take_policy_samples(self._test_idx)
75 | #self._prev_traj_costs, self._prev_pol_costs = self.disp.update(self.alg orithm, self.agent, self._test_idx, pol_sample_lists)
76 |
77 | if 'on_exit' in self._hyperparams:
78 | self._hyperparams['on_exit'](self._hyperparams)
79 |
80 | def _train_peformance(self, guided_steps=0, t_length=50):
81 | pol_sample_lists = self._take_policy_samples(self._train_idx, guided_steps=guided_steps,t_length=t_length)
82 | for m, cond in enumerate(self._train_idx):
83 | for i in range(self._hyperparams['num_samples']):
84 | self._take_sample(11, cond, m, i, t_length=t_length)
85 | traj_sample_lists = [self.agent.get_samples(cond, -self._hyperparams['num_samples']) for cond in self._train_idx]
86 | self.agent.clear_samples()
87 | self.disp.update(self.algorithm, self.agent,self._test_idx, self._test_fncs, pol_sample_lists, traj_sample_lists)
88 |
89 |
90 | def _test_peformance(self, guided_steps=0, t_length=50):
91 | pol_sample_lists = self._take_policy_samples(self._test_idx, guided_steps=guided_steps,t_length=t_length)
92 | for m, cond in enumerate(self._test_idx):
93 | for i in range(self._hyperparams['num_samples']):
94 | self._take_sample(11, cond, m, i, t_length=t_length)
95 | traj_sample_lists = [self.agent.get_samples(cond, -self._hyperparams['num_samples']) for cond in self._test_idx]
96 | self.agent.clear_samples()
97 | self.disp.update(self.algorithm, self.agent,self._test_idx, self._test_fncs, pol_sample_lists, traj_sample_lists)
98 |
99 | def _take_sample(self, itr, cond, m, i, t_length=50):
100 |
101 | if self.algorithm.iteration_count == 0:
102 | pol = self.algorithm.cur[m].traj_distr
103 | else:
104 | if self.algorithm._hyperparams['sample_on_policy']:
105 | pol = self.algorithm.policy_opt.policy
106 | else:
107 | if np.random.rand() < 1.0:
108 | pol = self.algorithm.cur[m].traj_distr
109 | else:
110 | pol = CSAPolicy(T=self.agent.T)
111 |
112 |
113 | self.agent.sample(pol, cond, t_length=t_length)
114 |
115 | def _take_policy_samples(self, cond_list, guided_steps=0, t_length=50):
116 | pol_samples = [[] for _ in range(len(cond_list))]
117 | for cond in range(len(cond_list)):
118 | for i in range(self._hyperparams['num_samples']):
119 | pol_samples[cond].append(self.agent.sample(self.algorithm.policy_opt.policy, cond_list[cond], start_policy=self.algorithm.cur[cond].traj_distr, save=False, ltorun=True, guided_steps=guided_steps, t_length=t_length))
120 | return [SampleList(samples) for samples in pol_samples]
121 |
122 | def main():
123 | parser = argparse.ArgumentParser(description='Run the Guided Policy Search algorithm.')
124 | parser.add_argument('experiment', type=str, help='experiment name')
125 | args = parser.parse_args()
126 |
127 | exp_name = args.experiment
128 |
129 | from gps import __file__ as gps_filepath
130 | gps_filepath = os.path.abspath(gps_filepath)
131 | gps_dir = '/'.join(str.split(gps_filepath, '/')[:-3]) + '/'
132 | exp_dir = gps_dir + 'examples/' + exp_name + '/'
133 | hyperparams_file = exp_dir + 'hyperparams.py'
134 |
135 | logging.basicConfig(format='%(levelname)s:%(message)s', level=logging.INFO)
136 |
137 | if not os.path.exists(hyperparams_file):
138 | sys.exit("Experiment '%s' does not exist.\nDid you create '%s'?" % (exp_name, hyperparams_file))
139 |
140 |
141 | # May be used by hyperparams.py to load different conditions
142 | gps_globals.phase = "TRAIN"
143 | hyperparams = imp.load_source('hyperparams', hyperparams_file)
144 |
145 | seed = hyperparams.config.get('random_seed', 0)
146 | random.seed(seed)
147 | np.random.seed(seed)
148 |
149 | gps = GPSMain(hyperparams.config)
150 | gps.run()
151 |
152 | if 'on_exit' in hyperparams.config:
153 | hyperparams.config['on_exit'](hyperparams.config)
154 |
155 |
156 | if __name__ == "__main__":
157 | main()
158 |
159 |
--------------------------------------------------------------------------------
/source/gps/gps_train.py:
--------------------------------------------------------------------------------
1 | import logging
2 | import imp
3 | import os
4 | import os.path
5 | import sys
6 | import argparse
7 | import time
8 | import numpy as np
9 | import random
10 |
11 | # Add gps/python to path so that imports work.
12 | sys.path.append('/'.join(str.split(__file__, '/')[:-2]))
13 | import gps as gps_globals
14 | from gps.utility.display import Display
15 | from gps.sample.sample_list import SampleList
16 | from gps.algorithm.policy.tf_policy import TfPolicy
17 | from gps.algorithm.policy_opt.lto_model import fully_connected_tf_network
18 | from gps.algorithm.policy.csa_policy import CSAPolicy
19 |
20 | class GPSMain(object):
21 | #""" Main class to run algorithms and experiments. """
22 | def __init__(self, config):
23 | """
24 | Initialize GPSMain
25 | Args:
26 | config: Hyperparameters for experiment
27 | """
28 | self._hyperparams = config
29 | self._conditions = config['common']['conditions']
30 | if 'train_conditions' in config:
31 | self._train_idx = config['train_conditions']
32 | self._test_idx = config['test_conditions']
33 | else:
34 | self._train_idx = range(self._conditions)
35 | config['train_conditions'] = config['common']['conditions']
36 | self._hyperparams=config
37 | self._test_idx = self._train_idx
38 | self._test_fncs = config['test_functions']
39 |
40 | self._data_files_dir = config['common']['data_files_dir']
41 | self.policy_path = config['policy_path']
42 | self.network_config = config['algorithm']['policy_opt']['network_params']
43 | self.agent = config['agent']['type'](config['agent'])
44 | self.disp = Display(config['common']) # For logging
45 |
46 | config['algorithm']['agent'] = self.agent
47 | self.algorithm = config['algorithm']['type'](config['algorithm'])
48 |
49 | def run(self):
50 |
51 | itr_start = 0
52 | #guided_steps = [0.5, 0.4, 0.3, 0.2, 0.1]
53 | #self.algorithm.policy_opt.policy = TfPolicy.load_policy(policy_dict_path=self.policy_path, tf_generator=fully_connected_tf_network, network_config=self.network_config)
54 |
55 | for itr in range(itr_start, self._hyperparams['iterations']):
56 | for m, cond in enumerate(self._train_idx):
57 | for i in range(self._hyperparams['num_samples']):
58 | self._take_sample(itr, cond, m, i)
59 | print('Iteration %d' % (itr))
60 | traj_sample_lists = [self.agent.get_samples(cond, -self._hyperparams['num_samples']) for cond in self._train_idx]
61 | # Clear agent samples.
62 | self.agent.clear_samples()
63 | self.algorithm.iteration(traj_sample_lists)
64 |
65 | #pol_sample_lists = self._take_policy_samples(self._train_idx)
66 |
67 | #self._prev_traj_costs, self._prev_pol_costs = self.disp.update(itr, self.algorithm, self.agent, traj_sample_lists, pol_sample_lists)
68 | self.algorithm.policy_opt.policy.pickle_policy(self.algorithm.policy_opt._dO, self.algorithm.policy_opt._dU, self._data_files_dir + ('policy_itr_%02d' % itr))
69 | self._test_peformance(t_length=50, iteration=itr)
70 | #self.algorithm.policy_opt.policy = TfPolicy.load_policy(policy_dict_path=self.policy_path, tf_generator=fully_connected_tf_network, network_config=self.network_config)
71 | self._test_peformance(t_length=50)
72 |
73 | #pol_sample_lists = self._take_policy_samples(self._test_idx)
74 | #self._prev_traj_costs, self._prev_pol_costs = self.disp.update(self.alg orithm, self.agent, self._test_idx, pol_sample_lists)
75 |
76 | if 'on_exit' in self._hyperparams:
77 | self._hyperparams['on_exit'](self._hyperparams)
78 |
79 | def _train_peformance(self, guided_steps=0, t_length=50):
80 | pol_sample_lists = self._take_policy_samples(self._train_idx, guided_steps=guided_steps,t_length=t_length)
81 | for m, cond in enumerate(self._train_idx):
82 | for i in range(self._hyperparams['num_samples']):
83 | self._take_sample(11, cond, m, i, t_length=t_length)
84 | traj_sample_lists = [self.agent.get_samples(cond, -self._hyperparams['num_samples']) for cond in self._train_idx]
85 | self.agent.clear_samples()
86 | self.disp.update(self.algorithm, self.agent,self._test_idx, self._test_fncs, pol_sample_lists, traj_sample_lists)
87 |
88 |
89 | def _test_peformance(self, guided_steps=0, t_length=50, iteration=15):
90 | pol_sample_lists = self._take_policy_samples(self._test_idx, guided_steps=guided_steps,t_length=t_length)
91 | for m, cond in enumerate(self._test_idx):
92 | for i in range(self._hyperparams['num_samples']):
93 | self._take_sample(11, cond, m, i, t_length=t_length)
94 | traj_sample_lists = [self.agent.get_samples(cond, -self._hyperparams['num_samples']) for cond in self._test_idx]
95 | self.agent.clear_samples()
96 | self.disp.update(self.algorithm, self.agent,self._test_idx, self._test_fncs, pol_sample_lists, traj_sample_lists, iteration=iteration)
97 |
98 | def _take_sample(self, itr, cond, m, i, t_length=50):
99 |
100 | if self.algorithm.iteration_count == 0:
101 | pol = self.algorithm.cur[m].traj_distr
102 | else:
103 | if self.algorithm._hyperparams['sample_on_policy']:
104 | pol = self.algorithm.policy_opt.policy
105 | else:
106 | if np.random.rand() < 0.7:
107 | pol = self.algorithm.cur[m].traj_distr
108 | else:
109 | pol = CSAPolicy(T=self.agent.T)
110 |
111 |
112 | self.agent.sample(pol, cond, t_length=t_length)
113 |
114 | def _take_policy_samples(self, cond_list, guided_steps=0, t_length=50):
115 | pol_samples = [[] for _ in range(len(cond_list))]
116 | for cond in range(len(cond_list)):
117 | for i in range(self._hyperparams['num_samples']):
118 | pol_samples[cond].append(self.agent.sample(self.algorithm.policy_opt.policy, cond_list[cond], start_policy=self.algorithm.cur[cond].traj_distr, save=False, ltorun=True, guided_steps=guided_steps, t_length=t_length))
119 | return [SampleList(samples) for samples in pol_samples]
120 |
121 | def main():
122 | parser = argparse.ArgumentParser(description='Run the Guided Policy Search algorithm.')
123 | parser.add_argument('experiment', type=str, help='experiment name')
124 | args = parser.parse_args()
125 |
126 | exp_name = args.experiment
127 |
128 | from gps import __file__ as gps_filepath
129 | gps_filepath = os.path.abspath(gps_filepath)
130 | gps_dir = '/'.join(str.split(gps_filepath, '/')[:-3]) + '/'
131 | exp_dir = gps_dir + 'examples/' + exp_name + '/'
132 | hyperparams_file = exp_dir + 'hyperparams.py'
133 |
134 | logging.basicConfig(format='%(levelname)s:%(message)s', level=logging.INFO)
135 |
136 | if not os.path.exists(hyperparams_file):
137 | sys.exit("Experiment '%s' does not exist.\nDid you create '%s'?" % (exp_name, hyperparams_file))
138 |
139 | # May be used by hyperparams.py to load different conditions
140 | gps_globals.phase = "TRAIN"
141 | hyperparams = imp.load_source('hyperparams', hyperparams_file)
142 |
143 | seed = hyperparams.config.get('random_seed', 0)
144 | random.seed(seed)
145 | np.random.seed(seed)
146 |
147 | gps = GPSMain(hyperparams.config)
148 | gps.run()
149 | print("Done with sampling")
150 | if 'on_exit' in hyperparams.config:
151 | hyperparams.config['on_exit'](hyperparams.config)
152 |
153 |
154 | if __name__ == "__main__":
155 | main()
156 |
157 |
--------------------------------------------------------------------------------
/source/gps/proto/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/automl/LTO-CMA/6f496999ce26a7f144b42dc98a88833bde4ce39b/source/gps/proto/__init__.py
--------------------------------------------------------------------------------
/source/gps/proto/gps_pb2.py:
--------------------------------------------------------------------------------
1 | # Generated by the protocol buffer compiler. DO NOT EDIT!
2 | # source: gps.proto
3 |
4 | import sys
5 | _b=sys.version_info[0]<3 and (lambda x:x) or (lambda x:x.encode('latin1'))
6 | from google.protobuf.internal import enum_type_wrapper
7 | from google.protobuf import descriptor as _descriptor
8 | from google.protobuf import message as _message
9 | from google.protobuf import reflection as _reflection
10 | from google.protobuf import symbol_database as _symbol_database
11 | # @@protoc_insertion_point(imports)
12 |
13 | _sym_db = _symbol_database.Default()
14 |
15 |
16 |
17 |
18 | DESCRIPTOR = _descriptor.FileDescriptor(
19 | name='gps.proto',
20 | package='gps',
21 | syntax='proto2',
22 | serialized_options=None,
23 | serialized_pb=_b('\n\tgps.proto\x12\x03gps\"x\n\x06Sample\x12\t\n\x01T\x18\x01 \x01(\r\x12\n\n\x02\x64X\x18\x02 \x01(\r\x12\n\n\x02\x64U\x18\x03 \x01(\r\x12\n\n\x02\x64O\x18\x04 \x01(\r\x12\r\n\x01X\x18\x05 \x03(\x02\x42\x02\x10\x01\x12\r\n\x01U\x18\x06 \x03(\x02\x42\x02\x10\x01\x12\x0f\n\x03obs\x18\x07 \x03(\x02\x42\x02\x10\x01\x12\x10\n\x04meta\x18\x08 \x03(\x02\x42\x02\x10\x01*\xc7\x01\n\nSampleType\x12\n\n\x06\x41\x43TION\x10\x00\x12\x0b\n\x07\x43UR_LOC\x10\x01\x12\x17\n\x13PAST_OBJ_VAL_DELTAS\x10\x02\x12\r\n\tCUR_SIGMA\x10\x03\x12\n\n\x06\x43UR_PS\x10\x04\x12\x13\n\x0fPAST_LOC_DELTAS\x10\x05\x12\x0e\n\nPAST_SIGMA\x10\x06\x12\x0e\n\nMULTIMODAL\x10\x07\x12\x12\n\x0eGAUSSIAN_NOISE\x10\x08\x12\x10\n\x0c\x43\x41UCHY_NOISE\x10\t\x12\x11\n\rUNIFORM_NOISE\x10\n')
24 | )
25 |
26 | _SAMPLETYPE = _descriptor.EnumDescriptor(
27 | name='SampleType',
28 | full_name='gps.SampleType',
29 | filename=None,
30 | file=DESCRIPTOR,
31 | values=[
32 | _descriptor.EnumValueDescriptor(
33 | name='ACTION', index=0, number=0,
34 | serialized_options=None,
35 | type=None),
36 | _descriptor.EnumValueDescriptor(
37 | name='CUR_LOC', index=1, number=1,
38 | serialized_options=None,
39 | type=None),
40 | _descriptor.EnumValueDescriptor(
41 | name='PAST_OBJ_VAL_DELTAS', index=2, number=2,
42 | serialized_options=None,
43 | type=None),
44 | _descriptor.EnumValueDescriptor(
45 | name='CUR_SIGMA', index=3, number=3,
46 | serialized_options=None,
47 | type=None),
48 | _descriptor.EnumValueDescriptor(
49 | name='CUR_PS', index=4, number=4,
50 | serialized_options=None,
51 | type=None),
52 | _descriptor.EnumValueDescriptor(
53 | name='PAST_LOC_DELTAS', index=5, number=5,
54 | serialized_options=None,
55 | type=None),
56 | _descriptor.EnumValueDescriptor(
57 | name='PAST_SIGMA', index=6, number=6,
58 | serialized_options=None,
59 | type=None),
60 | _descriptor.EnumValueDescriptor(
61 | name='MULTIMODAL', index=7, number=7,
62 | serialized_options=None,
63 | type=None),
64 | _descriptor.EnumValueDescriptor(
65 | name='GAUSSIAN_NOISE', index=8, number=8,
66 | serialized_options=None,
67 | type=None),
68 | _descriptor.EnumValueDescriptor(
69 | name='CAUCHY_NOISE', index=9, number=9,
70 | serialized_options=None,
71 | type=None),
72 | _descriptor.EnumValueDescriptor(
73 | name='UNIFORM_NOISE', index=10, number=10,
74 | serialized_options=None,
75 | type=None),
76 | ],
77 | containing_type=None,
78 | serialized_options=None,
79 | serialized_start=141,
80 | serialized_end=340,
81 | )
82 | _sym_db.RegisterEnumDescriptor(_SAMPLETYPE)
83 |
84 | SampleType = enum_type_wrapper.EnumTypeWrapper(_SAMPLETYPE)
85 | ACTION = 0
86 | CUR_LOC = 1
87 | PAST_OBJ_VAL_DELTAS = 2
88 | CUR_SIGMA = 3
89 | CUR_PS = 4
90 | PAST_LOC_DELTAS = 5
91 | PAST_SIGMA = 6
92 | MULTIMODAL = 7
93 | GAUSSIAN_NOISE = 8
94 | CAUCHY_NOISE = 9
95 | UNIFORM_NOISE = 10
96 |
97 |
98 |
99 | _SAMPLE = _descriptor.Descriptor(
100 | name='Sample',
101 | full_name='gps.Sample',
102 | filename=None,
103 | file=DESCRIPTOR,
104 | containing_type=None,
105 | fields=[
106 | _descriptor.FieldDescriptor(
107 | name='T', full_name='gps.Sample.T', index=0,
108 | number=1, type=13, cpp_type=3, label=1,
109 | has_default_value=False, default_value=0,
110 | message_type=None, enum_type=None, containing_type=None,
111 | is_extension=False, extension_scope=None,
112 | serialized_options=None, file=DESCRIPTOR),
113 | _descriptor.FieldDescriptor(
114 | name='dX', full_name='gps.Sample.dX', index=1,
115 | number=2, type=13, cpp_type=3, label=1,
116 | has_default_value=False, default_value=0,
117 | message_type=None, enum_type=None, containing_type=None,
118 | is_extension=False, extension_scope=None,
119 | serialized_options=None, file=DESCRIPTOR),
120 | _descriptor.FieldDescriptor(
121 | name='dU', full_name='gps.Sample.dU', index=2,
122 | number=3, type=13, cpp_type=3, label=1,
123 | has_default_value=False, default_value=0,
124 | message_type=None, enum_type=None, containing_type=None,
125 | is_extension=False, extension_scope=None,
126 | serialized_options=None, file=DESCRIPTOR),
127 | _descriptor.FieldDescriptor(
128 | name='dO', full_name='gps.Sample.dO', index=3,
129 | number=4, type=13, cpp_type=3, label=1,
130 | has_default_value=False, default_value=0,
131 | message_type=None, enum_type=None, containing_type=None,
132 | is_extension=False, extension_scope=None,
133 | serialized_options=None, file=DESCRIPTOR),
134 | _descriptor.FieldDescriptor(
135 | name='X', full_name='gps.Sample.X', index=4,
136 | number=5, type=2, cpp_type=6, label=3,
137 | has_default_value=False, default_value=[],
138 | message_type=None, enum_type=None, containing_type=None,
139 | is_extension=False, extension_scope=None,
140 | serialized_options=_b('\020\001'), file=DESCRIPTOR),
141 | _descriptor.FieldDescriptor(
142 | name='U', full_name='gps.Sample.U', index=5,
143 | number=6, type=2, cpp_type=6, label=3,
144 | has_default_value=False, default_value=[],
145 | message_type=None, enum_type=None, containing_type=None,
146 | is_extension=False, extension_scope=None,
147 | serialized_options=_b('\020\001'), file=DESCRIPTOR),
148 | _descriptor.FieldDescriptor(
149 | name='obs', full_name='gps.Sample.obs', index=6,
150 | number=7, type=2, cpp_type=6, label=3,
151 | has_default_value=False, default_value=[],
152 | message_type=None, enum_type=None, containing_type=None,
153 | is_extension=False, extension_scope=None,
154 | serialized_options=_b('\020\001'), file=DESCRIPTOR),
155 | _descriptor.FieldDescriptor(
156 | name='meta', full_name='gps.Sample.meta', index=7,
157 | number=8, type=2, cpp_type=6, label=3,
158 | has_default_value=False, default_value=[],
159 | message_type=None, enum_type=None, containing_type=None,
160 | is_extension=False, extension_scope=None,
161 | serialized_options=_b('\020\001'), file=DESCRIPTOR),
162 | ],
163 | extensions=[
164 | ],
165 | nested_types=[],
166 | enum_types=[
167 | ],
168 | serialized_options=None,
169 | is_extendable=False,
170 | syntax='proto2',
171 | extension_ranges=[],
172 | oneofs=[
173 | ],
174 | serialized_start=18,
175 | serialized_end=138,
176 | )
177 |
178 | DESCRIPTOR.message_types_by_name['Sample'] = _SAMPLE
179 | DESCRIPTOR.enum_types_by_name['SampleType'] = _SAMPLETYPE
180 | _sym_db.RegisterFileDescriptor(DESCRIPTOR)
181 |
182 | Sample = _reflection.GeneratedProtocolMessageType('Sample', (_message.Message,), dict(
183 | DESCRIPTOR = _SAMPLE,
184 | __module__ = 'gps_pb2'
185 | # @@protoc_insertion_point(class_scope:gps.Sample)
186 | ))
187 | _sym_db.RegisterMessage(Sample)
188 |
189 |
190 | _SAMPLE.fields_by_name['X']._options = None
191 | _SAMPLE.fields_by_name['U']._options = None
192 | _SAMPLE.fields_by_name['obs']._options = None
193 | _SAMPLE.fields_by_name['meta']._options = None
194 | # @@protoc_insertion_point(module_scope)
195 |
196 |
--------------------------------------------------------------------------------
/source/gps/sample/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/automl/LTO-CMA/6f496999ce26a7f144b42dc98a88833bde4ce39b/source/gps/sample/__init__.py
--------------------------------------------------------------------------------
/source/gps/sample/sample.py:
--------------------------------------------------------------------------------
1 | """ This file defines the sample class. """
2 | import numpy as np
3 |
4 | from gps.proto.gps_pb2 import ACTION
5 |
6 | class Sample(object):
7 | """
8 | Class that handles the representation of a trajectory and stores a
9 | single trajectory.
10 | Note: must be serializable for easy saving, no C++ references!
11 | """
12 | def __init__(self, agent):
13 | self.agent = agent
14 |
15 | self.T = agent.T
16 | self.dX = agent.dX
17 | self.dU = agent.dU
18 | self.dO = agent.dO
19 | self.dM = agent.dM
20 |
21 | # Dictionary containing the sample data from various sensors.
22 | self._data = {}
23 |
24 | self._X = np.empty((self.T, self.dX))
25 | self._X.fill(np.nan)
26 | self._obs = np.empty((self.T, self.dO))
27 | self._obs.fill(np.nan)
28 | self._meta = np.empty(self.dM)
29 | self._meta.fill(np.nan)
30 | self.trajectory = []
31 |
32 | def set(self, sensor_name, sensor_data, t=None):
33 | """ Set trajectory data for a particular sensor. """
34 | if t is None:
35 | self._data[sensor_name] = sensor_data
36 | self._X.fill(np.nan) # Invalidate existing X.
37 | self._obs.fill(np.nan) # Invalidate existing obs.
38 | self._meta.fill(np.nan) # Invalidate existing meta data.
39 | else:
40 | if sensor_name not in self._data:
41 | if sensor_data.size > 1:
42 | self._data[sensor_name] = \
43 | np.empty((self.T,) + sensor_data.shape)
44 |
45 | else:
46 |
47 | self._data[sensor_name] = \
48 | np.empty((self.T,1))
49 |
50 | self._data[sensor_name].fill(np.nan)
51 | self._data[sensor_name][t, :] = sensor_data
52 | self._X[t, :].fill(np.nan)
53 | self._obs[t, :].fill(np.nan)
54 |
55 | def get(self, sensor_name, t=None):
56 | """ Get trajectory data for a particular sensor. """
57 | return (self._data[sensor_name] if t is None
58 | else self._data[sensor_name][t, :])
59 |
60 | def get_X(self, t=None):
61 | """ Get the state. Put it together if not precomputed. """
62 | X = self._X if t is None else self._X[t, :]
63 | if np.any(np.isnan(X)):
64 | for data_type in self._data:
65 | if data_type not in self.agent.x_data_types:
66 | continue
67 | data = (self._data[data_type] if t is None
68 | else self._data[data_type][t, :])
69 | self.agent.pack_data_x(X, data, data_types=[data_type])
70 | return X
71 |
72 | def get_U(self, t=None):
73 | """ Get the action. """
74 | return self._data[ACTION] if t is None else self._data[ACTION][t, :]
75 |
76 | def get_obs(self, t=None):
77 | """ Get the observation. Put it together if not precomputed. """
78 | obs = self._obs if t is None else self._obs[t, :]
79 | if np.any(np.isnan(obs)):
80 | for data_type in self._data:
81 | if data_type not in self.agent.obs_data_types:
82 | continue
83 | if data_type in self.agent.meta_data_types:
84 | continue
85 | data = (self._data[data_type] if t is None
86 | else self._data[data_type][t, :])
87 | self.agent.pack_data_obs(obs, data, data_types=[data_type])
88 | return obs
89 |
90 | def get_meta(self):
91 | """ Get the meta data. Put it together if not precomputed. """
92 | meta = self._meta
93 | if np.any(np.isnan(meta)):
94 | for data_type in self._data:
95 | if data_type not in self.agent.meta_data_types:
96 | continue
97 | data = self._data[data_type]
98 | self.agent.pack_data_meta(meta, data, data_types=[data_type])
99 | return meta
100 |
101 | def __copy__(self):
102 | cls = self.__class__
103 | result = cls.__new__(cls, self.agent)
104 | result.__dict__.update(self.__dict__)
105 | return result
106 |
107 | def __deepcopy__(self, memo):
108 | cls = self.__class__
109 | result = cls.__new__(cls, self.agent)
110 | memo[id(self)] = result
111 | for name in self.__dict__:
112 | if name != "agent": # Do not deepcopy self.agent
113 | setattr(result, name, copy.deepcopy(self.__dict__[name], memo))
114 | return result
115 |
116 | # For pickling.
117 | def __getstate__(self):
118 | state = self.__dict__.copy()
119 | state.pop('agent')
120 | return state
121 |
122 | # For unpickling.
123 | def __setstate__(self, state):
124 | self.__dict__ = state
125 | self.__dict__['agent'] = None
126 |
--------------------------------------------------------------------------------
/source/gps/sample/sample_list.py:
--------------------------------------------------------------------------------
1 | """ This file defines the sample list wrapper and sample writers. """
2 | import pickle
3 | import logging
4 |
5 | import numpy as np
6 |
7 |
8 | LOGGER = logging.getLogger(__name__)
9 |
10 |
11 | class SampleList(object):
12 | """ Class that handles writes and reads to sample data. """
13 | def __init__(self, samples):
14 | self._samples = samples
15 |
16 | def get(self, sensor_name, idx=None):
17 | """ Returns N x T x dX numpy array of states. """
18 | if idx is None:
19 | idx = range(len(self._samples))
20 | return np.asarray([self._samples[i].get(sensor_name) for i in idx])
21 |
22 | def get_X(self, idx=None):
23 | """ Returns N x T x dX numpy array of states. """
24 | if idx is None:
25 | idx = range(len(self._samples))
26 | return np.asarray([self._samples[i].get_X() for i in idx])
27 |
28 | def get_coordwise_X(self, coord=None, idx=None):
29 | """ Returns N x num_coords x T x coordwsie_dX numpy array of features. """
30 | if idx is None:
31 | idx = range(len(self._samples))
32 | return np.asarray([self._samples[i].get_coordwise_X(coord) for i in idx])
33 |
34 | def get_U(self, idx=None):
35 | """ Returns N x T x dU numpy array of actions. """
36 | if idx is None:
37 | idx = range(len(self._samples))
38 | return np.asarray([self._samples[i].get_U() for i in idx])
39 |
40 | def get_obs(self, idx=None):
41 | """ Returns N x T x dO numpy array of features. """
42 | if idx is None:
43 | idx = range(len(self._samples))
44 | return np.asarray([self._samples[i].get_obs() for i in idx])
45 |
46 | def get_coordwise_obs(self, coord=None, idx=None):
47 | """ Returns N x num_coords x T x coordwsie_dO numpy array of features. """
48 | if idx is None:
49 | idx = range(len(self._samples))
50 | return np.asarray([self._samples[i].get_coordwise_obs(coord) for i in idx])
51 |
52 | def get_samples(self, idx=None):
53 | """ Returns N sample objects. """
54 | if idx is None:
55 | idx = range(len(self._samples))
56 | return [self._samples[i] for i in idx]
57 |
58 | def num_samples(self):
59 | """ Returns number of samples. """
60 | return len(self._samples)
61 |
62 | # Convenience methods.
63 | def __len__(self):
64 | return self.num_samples()
65 |
66 | def __getitem__(self, idx):
67 | return self.get_samples([idx])[0]
68 |
69 |
70 | class PickleSampleWriter(object):
71 | """ Pickles samples into data_file. """
72 | def __init__(self, data_file):
73 | self._data_file = data_file
74 |
75 | def write(self, samples):
76 | """ Write samples to data file. """
77 | with open(self._data_file, 'wb') as data_file:
78 | cPickle.dump(data_file, samples)
79 |
80 |
81 | class SysOutWriter(object):
82 | """ Writes notifications to sysout on sample writes. """
83 | def __init__(self):
84 | pass
85 |
86 | def write(self, samples):
87 | """ Write number of samples to sysout. """
88 | LOGGER.debug('Collected %d samples', len(samples))
89 |
--------------------------------------------------------------------------------
/source/gps/utility/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/automl/LTO-CMA/6f496999ce26a7f144b42dc98a88833bde4ce39b/source/gps/utility/__init__.py
--------------------------------------------------------------------------------
/source/gps/utility/display.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import json
3 | from gps.proto.gps_pb2 import CUR_LOC, ACTION, CUR_PS, CUR_SIGMA, PAST_SIGMA, PAST_OBJ_VAL_DELTAS, PAST_LOC_DELTAS
4 | import seaborn as sns
5 | import matplotlib.pyplot as plt
6 | sns.set()
7 |
8 | from matplotlib import rcParams
9 | rcParams["font.size"] = "30"
10 | rcParams['text.usetex'] = False
11 | rcParams['font.family'] = 'serif'
12 | rcParams['figure.figsize'] = (16.0, 9.0)
13 | rcParams['figure.frameon'] = True
14 | rcParams['figure.edgecolor'] = 'k'
15 | rcParams['grid.color'] = 'k'
16 | rcParams['grid.linestyle'] = ':'
17 | rcParams['grid.linewidth'] = 0.5
18 | rcParams['axes.linewidth'] = 1
19 | rcParams['axes.edgecolor'] = 'k'
20 | rcParams['axes.grid.which'] = 'both'
21 | rcParams['legend.frameon'] = 'True'
22 | rcParams['legend.framealpha'] = 1
23 |
24 | rcParams['ytick.major.size'] = 12
25 | rcParams['ytick.major.width'] = 1.5
26 | rcParams['ytick.minor.size'] = 6
27 | rcParams['ytick.minor.width'] = 1
28 | rcParams['xtick.major.size'] = 12
29 | rcParams['xtick.major.width'] = 1.5
30 | rcParams['xtick.minor.size'] = 6
31 | rcParams['xtick.minor.width'] = 1
32 |
33 | from datetime import datetime
34 | class Display(object):
35 |
36 | def __init__(self, hyperparams):
37 | self._hyperparams = hyperparams
38 | self._log_filename = self._hyperparams['log_filename']
39 | self._plot_filename = self._hyperparams['plot_filename']
40 | self._first_update = True
41 |
42 | def _output_column_titles(self, algorithm, policy_titles=False):
43 | """
44 | Setup iteration data column titles: iteration, average cost, and for
45 | each condition the mean cost over samples, step size, linear Guassian
46 | controller entropies, and initial/final KL divergences for BADMM.
47 | """
48 | condition_titles = '%3s | %8s %12s' % ('', '', '')
49 | itr_data_fields = '%3s | %8s %12s' % ('itr', 'avg_cost', 'avg_pol_cost')
50 | for m in range(algorithm.M):
51 | condition_titles += ' | %8s %9s %-7d' % ('', 'condition', m)
52 | itr_data_fields += ' | %8s %8s %8s' % (' cost ', ' step ', 'entropy ')
53 | condition_titles += ' %8s %8s %8s' % ('', '', '')
54 | itr_data_fields += ' %8s %8s %8s %s ' % ('pol_cost', 'kl_div_i', 'kl_div_f', 'samples')
55 | self.append_output_text(condition_titles)
56 | self.append_output_text(itr_data_fields)
57 |
58 | def eval(self, sample, cur_cond_idx):
59 | """
60 | Evaluate cost function and derivatives on a sample.
61 | Args:
62 | sample: A single sample
63 | """
64 | T = sample.T
65 | Du = sample.dU
66 | Dx = sample.dX
67 |
68 | cur_fcn = sample.agent.fcns[cur_cond_idx]['fcn_obj']
69 |
70 | final_l = np.zeros(T)
71 |
72 | x = sample.get(CUR_LOC)
73 | sigma_ = sample.get(CUR_SIGMA)
74 | sigma = [sigma_[i][0] for i in range(sigma_.shape[0])]
75 | _, dim = x.shape
76 |
77 |
78 | for t in range(T):
79 | final_l[t] = sample.trajectory[t]
80 |
81 | return x, sigma, final_l
82 |
83 | def get_sample_data(self, sample, cur_cond_idx):
84 | """
85 | Evaluate cost function and derivatives on a sample.
86 | Args:
87 | sample: A single sample
88 | """
89 | T = sample.T
90 | Du = sample.dU
91 | Dx = sample.dX
92 |
93 | cur_fcn = sample.agent.fcns[cur_cond_idx]['fcn_obj']
94 |
95 | ps_ = sample.get(CUR_PS)
96 | ps = [ps_[i][0] for i in range(ps_.shape[0])]
97 | past_sigma = sample.get(PAST_SIGMA)
98 | past_obj_val_deltas = sample.get(PAST_OBJ_VAL_DELTAS)
99 | past_loc_deltas = sample.get(PAST_LOC_DELTAS)
100 |
101 | return ps, past_sigma, past_obj_val_deltas, past_loc_deltas
102 |
103 | def _update_iteration_data(self, algorithm, test_idx, test_fcns, pol_sample_lists, traj_sample_lists, iteration=15):
104 | """
105 | Update iteration data information: iteration, average cost, and for
106 | each condition the mean cost over samples, step size, linear Guassian
107 | controller entropies, and initial/final KL divergences for BADMM.
108 | """
109 | #data = {}
110 | #if pol_sample_lists is not None:
111 | # pol_costs = [[np.sum(algorithm.cost[m].eval(pol_sample_lists[m][i],True)[0]) for i in range(len(pol_sample_lists[m]))]
112 | # for m in range(len(cond_idx_list))]
113 | #if traj_sample_lists is not None:
114 | # traj_costs = [[np.sum(algorithm.cost[m].eval(traj_sample_lists[m][i],True)[0]) for i in range(len(traj_sample_lists[m]))]
115 | # for m in range(len(cond_idx_list))]
116 |
117 | #data['avg_cost'] = np.mean(pol_costs)
118 | #itr_data = '%s : %12.2f' % ('avg_cost', np.mean(pol_costs))
119 | #self.append_output_text(itr_data)
120 | #else:
121 | # pol_costs = None
122 | # itr_data = '%3d | %8.2f' % (itr, avg_cost)
123 | for m,idx in enumerate(test_idx):
124 | samples = len(pol_sample_lists[m])
125 | sample = np.random.randint(samples)
126 | sample_ = 'Sample_' + str(sample)
127 | test_fcn = test_fcns[m % len(test_fcns)]
128 | #itr_data = '%s%d' % ('Sample_', i)
129 | #self.append_output_text(itr_data)
130 | pol_avg_cost, pol_std, traj_avg_cost, traj_std, pol_avg_sigma, pol_sigma_std, traj_avg_sigma, traj_sigma_std, end_values = self.get_data(pol_sample_lists[m], traj_sample_lists[m], idx)
131 | self.plot_data(pol_sample_lists[m][0], traj_sample_lists[m][0], test_fcn, pol_avg_cost, traj_avg_cost, pol_avg_sigma, traj_avg_sigma, pol_std, traj_std, pol_sigma_std, traj_sigma_std, end_values, iteration=iteration)
132 |
133 | #data[function_str][sample_] = {'obj_values': list(obj_val)}
134 | #itr_data = '%s : %s ' % ('cur_loc', x)
135 | #self.append_output_text(itr_data)
136 | #itr_data = '%s : %s ' % ('obj_values', obj_val)
137 | #self.append_output_text(itr_data)
138 | #self.append_output_text(data)
139 | return pol_avg_cost
140 |
141 | def get_data(self, pol_samples, traj_samples, cur_cond):
142 | pol_avg_obj = []
143 | pol_avg_sigma = []
144 | traj_avg_obj = []
145 | traj_avg_sigma = []
146 | end_values = []
147 | for m in range(len(pol_samples)):
148 | _,p_sigma,p_obj_val = self.eval(pol_samples[m], cur_cond)
149 | _,t_sigma,t_obj_val = self.eval(traj_samples[m], cur_cond)
150 | pol_avg_obj.append(p_obj_val)
151 | pol_avg_sigma.append(p_sigma)
152 | traj_avg_obj.append(t_obj_val)
153 | traj_avg_sigma.append(t_sigma)
154 | end_values.append(p_obj_val[-1])
155 | return np.mean(pol_avg_obj, axis=0), np.std(pol_avg_obj, axis=0), np.mean(traj_avg_obj, axis=0), np.std(traj_avg_obj, axis=0), np.mean(pol_avg_sigma, axis=0), np.std(pol_avg_sigma, axis=0), np.mean(traj_avg_sigma, axis=0), np.std(traj_avg_sigma, axis=0), end_values
156 |
157 | def plot_data(self, pol_sample, traj_sample, cur_cond, pol_costs, traj_costs, pol_sigma, traj_sigma, pol_std, traj_std, pol_sigma_std, traj_sigma_std, end_values, iteration=15):
158 | #pol_ps, pol_past_sigma, pol_past_obj_val_deltas, pol_past_loc_deltas = self.get_sample_data(pol_sample,cur_cond)
159 | #traj_ps, traj_past_sigma, traj_past_obj_val_deltas, traj_past_loc_deltas = self.get_sample_data(traj_sample, cur_cond)
160 | log_text = {}
161 | log_text['Average costs LTO'] = list(pol_costs)
162 | log_text['Average costs controller'] = list(traj_costs)
163 | log_text['End values LTO'] = list(end_values)
164 | log_text['Sigma LTO'] = list(pol_sigma)
165 | log_text['Sigma controller'] = list(traj_sigma)
166 | log_text['Std costs LTO'] = list(pol_std)
167 | log_text['Std costs controller'] = list(traj_std)
168 | log_text['Std Sigma LTO'] = list(pol_sigma_std)
169 | log_text['Std Sigma controller'] = list(traj_sigma_std)
170 |
171 | # log_text += 'Ps LTO: %s \n' % (pol_ps)
172 | # log_text += 'Ps CSA: %s \n' % (traj_ps)
173 | # log_text += 'Past Sigma LTO: %s \n' % (pol_past_sigma)
174 | # log_text += 'Past Sigma CSA: %s \n' % (traj_past_sigma)
175 | # log_text += 'Past Obj Val Deltas LTO: %s \n' % (pol_past_obj_val_deltas)
176 | # log_text += 'Past Obj Val Deltas CSA: %s \n' % (traj_past_obj_val_deltas)
177 | # log_text += 'Past Loc Deltas LTO: %s \n' % (pol_past_loc_deltas)
178 | # log_text += 'Past Loc Deltas CSA: %s \n' % (traj_past_loc_deltas)
179 | self.append_output_text(log_text, iteration, fcn_name=cur_cond)
180 |
181 | plt.tick_params(axis='x', which='minor')
182 | plt.legend(loc=0, fontsize=25, ncol=2)
183 | plt.title(cur_cond, fontsize=50)
184 | plt.xlabel("iteration", fontsize=50)
185 | plt.ylabel("objective value", fontsize=50)
186 | plt.fill_between(list(range(len(pol_costs))), np.subtract(pol_costs,pol_std), np.add(pol_costs,pol_std), color=sns.xkcd_rgb["medium green"], alpha=0.5)
187 | plt.plot(pol_costs,color=sns.xkcd_rgb["medium green"],
188 | linewidth=4, label='LTO')
189 | plt.fill_between(list(range(len(traj_costs))),np.subtract(traj_costs,traj_std), np.add(traj_costs,traj_std), color=sns.xkcd_rgb["denim blue"], alpha=0.5)
190 | plt.plot(traj_costs,color=sns.xkcd_rgb["denim blue"],
191 | linewidth=4, label='LG Controller')
192 | plt.legend()
193 | timestamp = datetime.now()
194 | time = str(timestamp)
195 | method = "Objective_value"
196 | plot_file = ('%s_%s_%s_%s.pdf' % (self._plot_filename, method, cur_cond, time))
197 | plt.savefig(plot_file, bbox_inches='tight')
198 | plt.show()
199 | plt.clf()
200 |
201 | plt.tick_params(axis='x', which='minor')
202 | plt.legend(loc=0, fontsize=25, ncol=2)
203 | plt.title(cur_cond, fontsize=50)
204 | plt.xlabel("iteration", fontsize=50)
205 | plt.ylabel("Step size", fontsize=50)
206 | plt.fill_between(list(range(len(pol_sigma))),np.subtract(pol_sigma,pol_sigma_std), np.add(pol_sigma,pol_sigma_std), color=sns.xkcd_rgb["medium green"], alpha=0.5)
207 | plt.plot(pol_sigma, color=sns.xkcd_rgb["medium green"],
208 | linewidth=4, label='LTO')
209 | plt.fill_between(list(range(len(traj_sigma))),np.subtract(traj_sigma,traj_sigma_std), np.add(traj_sigma,traj_sigma_std), color=sns.xkcd_rgb["denim blue"], alpha=0.5)
210 | plt.plot(traj_sigma,color=sns.xkcd_rgb["denim blue"],
211 | linewidth=4, label='LG Controller')
212 | plt.legend()
213 | timestamp = datetime.now()
214 | time = str(timestamp)
215 | method = "Step size"
216 | plot_file = ('%s_%s_%s_%s.pdf' % (self._plot_filename, method, cur_cond, time))
217 | plt.savefig(plot_file, bbox_inches='tight')
218 | plt.show()
219 | plt.clf()
220 |
221 |
222 | def update(self, algorithm, agent, test_fcns, cond_idx_list, pol_sample_lists, traj_sample_lists, iteration=15):
223 |
224 | if self._first_update:
225 | #self._output_column_titles(algorithm)
226 | self._first_update = False
227 | #costs = [np.mean(np.sum(algorithm.prev[m].cs, axis=1)) for m in range(algorithm.M)]
228 | pol_costs = self._update_iteration_data(algorithm, test_fcns, cond_idx_list, pol_sample_lists, traj_sample_lists, iteration=iteration)
229 |
230 | return pol_costs
231 |
232 | def append_output_text(self, text, iteration=15, fcn_name=""):
233 | log_file = '%s_iteration%s_%s.json' % (self._log_filename, iteration, fcn_name)
234 | with open(log_file, 'a') as f:
235 | #f.write('%s \n' % (str(text)))
236 | json.dump(text, f)
237 | f.write('\n')
238 | #print(text)
239 |
240 |
--------------------------------------------------------------------------------
/source/gps/utility/general_utils.py:
--------------------------------------------------------------------------------
1 | """ This file defines general utility functions and classes. """
2 | import numpy as np
3 |
4 | class BundleType(object):
5 | """
6 | This class bundles many fields, similar to a record or a mutable
7 | namedtuple.
8 | """
9 | def __init__(self, variables):
10 | for var, val in variables.items():
11 | object.__setattr__(self, var, val)
12 |
13 | # Freeze fields so new ones cannot be set.
14 | def __setattr__(self, key, value):
15 | if not hasattr(self, key):
16 | raise AttributeError("%r has no attribute %s" % (self, key))
17 | object.__setattr__(self, key, value)
18 |
19 | def check_shape(value, expected_shape, name=''):
20 | """
21 | Throws a ValueError if value.shape != expected_shape.
22 | Args:
23 | value: Matrix to shape check.
24 | expected_shape: A tuple or list of integers.
25 | name: An optional name to add to the exception message.
26 | """
27 | if value.shape != tuple(expected_shape):
28 | raise ValueError('Shape mismatch %s: Expected %s, got %s' %
29 | (name, str(expected_shape), str(value.shape)))
30 |
31 |
32 | def finite_differences(func, inputs, func_output_shape=(), epsilon=1e-5):
33 | """
34 | Computes gradients via finite differences.
35 | derivative = (func(x+epsilon) - func(x-epsilon)) / (2*epsilon)
36 | Args:
37 | func: Function to compute gradient of. Inputs and outputs can be
38 | arbitrary dimension.
39 | inputs: Vector value to compute gradient at.
40 | func_output_shape: Shape of the output of func. Default is
41 | empty-tuple, which works for scalar-valued functions.
42 | epsilon: Difference to use for computing gradient.
43 | Returns:
44 | Gradient vector of each dimension of func with respect to each
45 | dimension of input.
46 | """
47 | gradient = np.zeros(inputs.shape+func_output_shape)
48 | for idx, _ in np.ndenumerate(inputs):
49 | test_input = np.copy(inputs)
50 | test_input[idx] += epsilon
51 | obj_d1 = func(test_input)
52 | assert obj_d1.shape == func_output_shape
53 | test_input = np.copy(inputs)
54 | test_input[idx] -= epsilon
55 | obj_d2 = func(test_input)
56 | assert obj_d2.shape == func_output_shape
57 | diff = (obj_d1 - obj_d2) / (2 * epsilon)
58 | gradient[idx] += diff
59 | return gradient
60 |
61 |
62 | def approx_equal(a, b, threshold=1e-5):
63 | """
64 | Return whether two numbers are equal within an absolute threshold.
65 | Returns:
66 | True if a and b are equal within threshold.
67 | """
68 | return np.all(np.abs(a - b) < threshold)
69 |
70 |
71 | def extract_condition(hyperparams, m):
72 | """
73 | Pull the relevant hyperparameters corresponding to the specified
74 | condition, and return a new hyperparameter dictionary.
75 | """
76 | return {var: val[m] if isinstance(val, list) else val
77 | for var, val in hyperparams.items()}
78 |
79 | def replicate_var(val, num_conds):
80 | return val if isinstance(val, list) else [val] * num_conds
81 |
--------------------------------------------------------------------------------
/source/gps/utility/gmm.py:
--------------------------------------------------------------------------------
1 | """ This file defines a Gaussian mixture model class. """
2 | import logging
3 |
4 | import numpy as np
5 | import scipy.linalg
6 |
7 |
8 | LOGGER = logging.getLogger(__name__)
9 |
10 |
11 | def logsum(vec, axis=0, keepdims=True):
12 | maxv = np.max(vec, axis=axis, keepdims=keepdims)
13 | maxv[maxv == -float('inf')] = 0
14 | return np.log(np.sum(np.exp(vec-maxv), axis=axis, keepdims=keepdims)) + maxv
15 |
16 |
17 | class GMM(object):
18 | """ Gaussian Mixture Model. """
19 | def __init__(self, init_sequential=False, eigreg=False, warmstart=True):
20 | self.init_sequential = init_sequential
21 | self.eigreg = eigreg
22 | self.warmstart = warmstart
23 | self.sigma = None
24 |
25 | def inference(self, pts):
26 | """
27 | Evaluate dynamics prior.
28 | Args:
29 | pts: A N x D array of points.
30 | """
31 | # Compute posterior cluster weights.
32 | logwts = self.clusterwts(pts)
33 |
34 | # Compute posterior mean and covariance.
35 | mu0, Phi = self.moments(logwts)
36 |
37 | # Set hyperparameters.
38 | m = self.N
39 | n0 = m - 2 - mu0.shape[0]
40 |
41 | # Normalize.
42 | m = float(m) / self.N
43 | n0 = float(n0) / self.N
44 | return mu0, Phi, m, n0
45 |
46 | def estep(self, data):
47 | """
48 | Compute log observation probabilities under GMM.
49 | Args:
50 | data: A N x D array of points.
51 | Returns:
52 | logobs: A N x K array of log probabilities (for each point
53 | on each cluster).
54 | """
55 | # Constants.
56 | K = self.sigma.shape[0]
57 | Di = data.shape[1]
58 | N = data.shape[0]
59 |
60 | # Compute probabilities.
61 | data = data.T
62 | mu = self.mu[:, 0:Di].T
63 | mu_expand = np.expand_dims(np.expand_dims(mu, axis=1), axis=1)
64 | assert mu_expand.shape == (Di, 1, 1, K)
65 | # Calculate for each point distance to each cluster.
66 | data_expand = np.tile(data, [K, 1, 1, 1]).transpose([2, 3, 1, 0])
67 | diff = data_expand - np.tile(mu_expand, [1, N, 1, 1])
68 | assert diff.shape == (Di, N, 1, K)
69 | Pdiff = np.zeros_like(diff)
70 | cconst = np.zeros((1, 1, 1, K))
71 |
72 | for i in range(K):
73 | U = scipy.linalg.cholesky(self.sigma[i, :Di, :Di],
74 | check_finite=False)
75 | Pdiff[:, :, 0, i] = scipy.linalg.solve_triangular(
76 | U, scipy.linalg.solve_triangular(
77 | U.T, diff[:, :, 0, i], lower=True, check_finite=False
78 | ), check_finite=False
79 | )
80 | cconst[0, 0, 0, i] = -np.sum(np.log(np.diag(U))) - 0.5 * Di * \
81 | np.log(2 * np.pi)
82 |
83 | logobs = -0.5 * np.sum(diff * Pdiff, axis=0, keepdims=True) + cconst
84 | assert logobs.shape == (1, N, 1, K)
85 | logobs = logobs[0, :, 0, :] + self.logmass.T
86 | return logobs
87 |
88 | def moments(self, logwts):
89 | """
90 | Compute the moments of the cluster mixture with logwts.
91 | Args:
92 | logwts: A K x 1 array of log cluster probabilities.
93 | Returns:
94 | mu: A (D,) mean vector.
95 | sigma: A D x D covariance matrix.
96 | """
97 | # Exponentiate.
98 | wts = np.exp(logwts)
99 |
100 | # Compute overall mean.
101 | mu = np.sum(self.mu * wts, axis=0)
102 |
103 | # Compute overall covariance.
104 | # For some reason this version works way better than the "right"
105 | # one... could we be computing xxt wrong?
106 | diff = self.mu - np.expand_dims(mu, axis=0)
107 | diff_expand = np.expand_dims(diff, axis=1) * \
108 | np.expand_dims(diff, axis=2)
109 | wts_expand = np.expand_dims(wts, axis=2)
110 | sigma = np.sum((self.sigma + diff_expand) * wts_expand, axis=0)
111 | return mu, sigma
112 |
113 | def clusterwts(self, data):
114 | """
115 | Compute cluster weights for specified points under GMM.
116 | Args:
117 | data: An N x D array of points
118 | Returns:
119 | A K x 1 array of average cluster log probabilities.
120 | """
121 | # Compute probability of each point under each cluster.
122 | logobs = self.estep(data)
123 |
124 | # Renormalize to get cluster weights.
125 | logwts = logobs - logsum(logobs, axis=1)
126 |
127 | # Average the cluster probabilities.
128 | logwts = logsum(logwts, axis=0) - np.log(data.shape[0])
129 | return logwts.T
130 |
131 | def update(self, data, K, max_iterations=100):
132 | """
133 | Run EM to update clusters.
134 | Args:
135 | data: An N x D data matrix, where N = number of data points.
136 | K: Number of clusters to use.
137 | """
138 | # Constants.
139 | N = data.shape[0]
140 | Do = data.shape[1]
141 |
142 | LOGGER.debug('Fitting GMM with %d clusters on %d points', K, N)
143 |
144 | if (not self.warmstart or self.sigma is None or
145 | K != self.sigma.shape[0]):
146 | # Initialization.
147 | LOGGER.debug('Initializing GMM.')
148 | self.sigma = np.zeros((K, Do, Do))
149 | self.mu = np.zeros((K, Do))
150 | self.logmass = np.log(1.0 / K) * np.ones((K, 1))
151 | self.mass = (1.0 / K) * np.ones((K, 1))
152 | self.N = data.shape[0]
153 | N = self.N
154 |
155 | # Set initial cluster indices.
156 | if not self.init_sequential:
157 | cidx = np.random.randint(0, K, size=(1, N))
158 | else:
159 | raise NotImplementedError()
160 |
161 | # Initialize.
162 | for i in range(K):
163 | cluster_idx = (cidx == i)[0]
164 | mu = np.mean(data[cluster_idx, :], axis=0)
165 | diff = (data[cluster_idx, :] - mu).T
166 | sigma = (1.0 / cluster_idx.shape[0]) * (diff.dot(diff.T))
167 | self.mu[i, :] = mu
168 | self.sigma[i, :, :] = sigma + np.eye(Do) * 2e-6
169 |
170 | prevll = -float('inf')
171 | for itr in range(max_iterations):
172 | # E-step: compute cluster probabilities.
173 | logobs = self.estep(data)
174 |
175 | # Compute log-likelihood.
176 | ll = np.sum(logsum(logobs, axis=1))
177 | LOGGER.debug('GMM itr %d/%d. Log likelihood: %f',
178 | itr, max_iterations, ll)
179 | if ll < prevll:
180 | LOGGER.debug('Log-likelihood decreased! Ending on itr=%d/%d',
181 | itr, max_iterations)
182 | break
183 | if np.abs(ll-prevll) < 1e-5*prevll:
184 | LOGGER.debug('GMM converged on itr=%d/%d',
185 | itr, max_iterations)
186 | break
187 | prevll = ll
188 |
189 | # Renormalize to get cluster weights.
190 | logw = logobs - logsum(logobs, axis=1)
191 | assert logw.shape == (N, K)
192 |
193 | # Renormalize again to get weights for refitting clusters.
194 | logwn = logw - logsum(logw, axis=0)
195 | assert logwn.shape == (N, K)
196 | w = np.exp(logwn)
197 |
198 | # M-step: update clusters.
199 | # Fit cluster mass.
200 | self.logmass = logsum(logw, axis=0).T
201 | self.logmass = self.logmass - logsum(self.logmass, axis=0)
202 | assert self.logmass.shape == (K, 1)
203 | self.mass = np.exp(self.logmass)
204 | # Reboot small clusters.
205 | w[:, (self.mass < (1.0 / K) * 1e-4)[:, 0]] = 1.0 / N
206 | # Fit cluster means.
207 | w_expand = np.expand_dims(w, axis=2)
208 | data_expand = np.expand_dims(data, axis=1)
209 | self.mu = np.sum(w_expand * data_expand, axis=0)
210 | # Fit covariances.
211 | wdata = data_expand * np.sqrt(w_expand)
212 | assert wdata.shape == (N, K, Do)
213 | for i in range(K):
214 | # Compute weighted outer product.
215 | XX = wdata[:, i, :].T.dot(wdata[:, i, :])
216 | mu = self.mu[i, :]
217 | self.sigma[i, :, :] = XX - np.outer(mu, mu)
218 |
219 | if self.eigreg: # Use eigenvalue regularization.
220 | raise NotImplementedError()
221 | else: # Use quick and dirty regularization.
222 | sigma = self.sigma[i, :, :]
223 | self.sigma[i, :, :] = 0.5 * (sigma + sigma.T) + \
224 | 1e-6 * np.eye(Do)
225 |
--------------------------------------------------------------------------------