├── .gitignore
├── CMakeLists.txt
├── CONTRIBUTING.md
├── LICENSE
├── README.md
├── cmake
└── FindOrFetch.cmake
├── mjpc
├── CMakeLists.txt
├── barkour
│ ├── CMakeLists.txt
│ ├── barkour.cc
│ ├── barkour.h
│ ├── barkour_vb.xml.patch
│ ├── task_barkour.xml
│ └── world.xml
├── headless_server.cc
└── ui_server.cc
├── pyproject.toml
├── requirements.txt
├── setup.py
└── src
└── language_to_reward_2023
├── confirmation_safe_executor.py
├── conversation.py
├── platforms
├── barkour
│ ├── barkour_execution.py
│ ├── barkour_execution_test.py
│ ├── barkour_l2r_task_client.py
│ ├── barkour_l2r_task_client_test.py
│ ├── barkour_l2r_tasks.py
│ ├── barkour_l2r_tasks_test.py
│ ├── default_task_parameters.py
│ └── prompts
│ │ ├── prompt_coder_only.py
│ │ ├── prompt_low_level.py
│ │ └── prompt_thinker_coder.py
├── llm_prompt.py
├── process_code.py
├── process_code_test.py
└── task_evaluator.py
├── safe_executor.py
├── task_clients.py
├── task_configs.py
└── user_interaction.py
/.gitignore:
--------------------------------------------------------------------------------
1 | # Byte-compiled / optimized / DLL files
2 | __pycache__/
3 | *.py[cod]
4 | *$py.class
5 |
6 | # Distribution / packaging
7 | .Python
8 | build/
9 | develop-eggs/
10 | dist/
11 | downloads/
12 | eggs/
13 | .eggs/
14 | lib/
15 | lib64/
16 | parts/
17 | sdist/
18 | var/
19 | wheels/
20 | share/python-wheels/
21 | *.egg-info/
22 | .installed.cfg
23 | *.egg
24 | MANIFEST
25 |
--------------------------------------------------------------------------------
/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | cmake_minimum_required(VERSION 3.16)
2 |
3 | # INTERPROCEDURAL_OPTIMIZATION is enforced when enabled.
4 | set(CMAKE_POLICY_DEFAULT_CMP0069 NEW)
5 | # Default to GLVND if available.
6 | set(CMAKE_POLICY_DEFAULT_CMP0072 NEW)
7 |
8 | # This line has to appear before 'PROJECT' in order to be able to disable incremental linking
9 | set(MSVC_INCREMENTAL_DEFAULT ON)
10 |
11 | project(
12 | language_to_reward_2023
13 | VERSION 0.1.0
14 | DESCRIPTION "Language to Reward: 2023 paper"
15 | HOMEPAGE_URL "https://github.com/google-deepmind/language_to_reward_2023"
16 | )
17 |
18 | enable_language(C)
19 | enable_language(CXX)
20 | if(APPLE)
21 | enable_language(OBJC)
22 | enable_language(OBJCXX)
23 | endif()
24 |
25 | list(APPEND CMAKE_MODULE_PATH "${PROJECT_SOURCE_DIR}/cmake")
26 |
27 | include(FindOrFetch)
28 |
29 | find_package(Threads REQUIRED)
30 |
31 | set(BUILD_SHARED_LIBS_OLD ${BUILD_SHARED_LIBS})
32 | set(BUILD_SHARED_LIBS
33 | OFF
34 | CACHE INTERNAL "Build SHARED libraries"
35 | )
36 |
37 | set(L2R_MJPC_GIT_TAG
38 | c5c7ead065b7f4034ab265a13023231900dbfaa7
39 | CACHE STRING "Git revision for MuJoCo MPC."
40 | )
41 |
42 | set(L2R_MENAGERIE_GIT_TAG
43 | main
44 | CACHE STRING "Git revision for MuJoCo Menagerie."
45 | )
46 |
47 | set(MJPC_BUILD_TESTS OFF)
48 | set(MJPC_GRPC_BUILD_TESTS OFF)
49 | set(MJPC_BUILD_GRPC_SERVICE ON)
50 | set(PYMJPC_BUILD_TESTS OFF)
51 |
52 | findorfetch(
53 | USE_SYSTEM_PACKAGE
54 | OFF
55 | PACKAGE_NAME
56 | mjpc
57 | LIBRARY_NAME
58 | mjpc
59 | GIT_REPO
60 | https://github.com/google-deepmind/mujoco_mpc.git
61 | GIT_TAG
62 | ${L2R_MJPC_GIT_TAG}
63 | TARGETS
64 | libmjpc
65 | mjpc_agent_service
66 | mjpc_ui_agent_service
67 | )
68 |
69 | set(BUILD_SHARED_LIBS
70 | ${BUILD_SHARED_LIBS_OLD}
71 | CACHE BOOL "Build SHARED libraries" FORCE
72 | )
73 | unset(BUILD_SHARED_LIBS_OLD)
74 |
75 | FetchContent_Declare(
76 | menagerie
77 | GIT_REPOSITORY https://github.com/google-deepmind/mujoco_menagerie.git
78 | GIT_TAG ${L2R_MENAGERIE_GIT_TAG}
79 | )
80 |
81 | FetchContent_GetProperties(menagerie)
82 | if(NOT menagerie_POPULATED)
83 | FetchContent_Populate(menagerie)
84 | endif()
85 |
86 | add_subdirectory(mjpc)
87 |
--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
1 | # How to Contribute
2 |
3 | ## Contributor License Agreement
4 |
5 | Contributions to this project must be accompanied by a Contributor License
6 | Agreement. You (or your employer) retain the copyright to your contribution,
7 | this simply gives us permission to use and redistribute your contributions as
8 | part of the project. Head over to to see
9 | your current agreements on file or to sign a new one.
10 |
11 | You generally only need to submit a CLA once, so if you've already submitted one
12 | (even if it was for a different project), you probably don't need to do it
13 | again.
14 |
15 | ## Code reviews
16 |
17 | All submissions, including submissions by project members, require review. We
18 | use GitHub pull requests for this purpose. Consult
19 | [GitHub Help](https://help.github.com/articles/about-pull-requests/) for more
20 | information on using pull requests.
21 |
22 | ## Community Guidelines
23 |
24 | This project follows [Google's Open Source Community
25 | Guidelines](https://opensource.google/conduct/).
26 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | Apache License
2 | Version 2.0, January 2004
3 | http://www.apache.org/licenses/
4 |
5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
6 |
7 | 1. Definitions.
8 |
9 | "License" shall mean the terms and conditions for use, reproduction,
10 | and distribution as defined by Sections 1 through 9 of this document.
11 |
12 | "Licensor" shall mean the copyright owner or entity authorized by
13 | the copyright owner that is granting the License.
14 |
15 | "Legal Entity" shall mean the union of the acting entity and all
16 | other entities that control, are controlled by, or are under common
17 | control with that entity. For the purposes of this definition,
18 | "control" means (i) the power, direct or indirect, to cause the
19 | direction or management of such entity, whether by contract or
20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the
21 | outstanding shares, or (iii) beneficial ownership of such entity.
22 |
23 | "You" (or "Your") shall mean an individual or Legal Entity
24 | exercising permissions granted by this License.
25 |
26 | "Source" form shall mean the preferred form for making modifications,
27 | including but not limited to software source code, documentation
28 | source, and configuration files.
29 |
30 | "Object" form shall mean any form resulting from mechanical
31 | transformation or translation of a Source form, including but
32 | not limited to compiled object code, generated documentation,
33 | and conversions to other media types.
34 |
35 | "Work" shall mean the work of authorship, whether in Source or
36 | Object form, made available under the License, as indicated by a
37 | copyright notice that is included in or attached to the work
38 | (an example is provided in the Appendix below).
39 |
40 | "Derivative Works" shall mean any work, whether in Source or Object
41 | form, that is based on (or derived from) the Work and for which the
42 | editorial revisions, annotations, elaborations, or other modifications
43 | represent, as a whole, an original work of authorship. For the purposes
44 | of this License, Derivative Works shall not include works that remain
45 | separable from, or merely link (or bind by name) to the interfaces of,
46 | the Work and Derivative Works thereof.
47 |
48 | "Contribution" shall mean any work of authorship, including
49 | the original version of the Work and any modifications or additions
50 | to that Work or Derivative Works thereof, that is intentionally
51 | submitted to Licensor for inclusion in the Work by the copyright owner
52 | or by an individual or Legal Entity authorized to submit on behalf of
53 | the copyright owner. For the purposes of this definition, "submitted"
54 | means any form of electronic, verbal, or written communication sent
55 | to the Licensor or its representatives, including but not limited to
56 | communication on electronic mailing lists, source code control systems,
57 | and issue tracking systems that are managed by, or on behalf of, the
58 | Licensor for the purpose of discussing and improving the Work, but
59 | excluding communication that is conspicuously marked or otherwise
60 | designated in writing by the copyright owner as "Not a Contribution."
61 |
62 | "Contributor" shall mean Licensor and any individual or Legal Entity
63 | on behalf of whom a Contribution has been received by Licensor and
64 | subsequently incorporated within the Work.
65 |
66 | 2. Grant of Copyright License. Subject to the terms and conditions of
67 | this License, each Contributor hereby grants to You a perpetual,
68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
69 | copyright license to reproduce, prepare Derivative Works of,
70 | publicly display, publicly perform, sublicense, and distribute the
71 | Work and such Derivative Works in Source or Object form.
72 |
73 | 3. Grant of Patent License. Subject to the terms and conditions of
74 | this License, each Contributor hereby grants to You a perpetual,
75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
76 | (except as stated in this section) patent license to make, have made,
77 | use, offer to sell, sell, import, and otherwise transfer the Work,
78 | where such license applies only to those patent claims licensable
79 | by such Contributor that are necessarily infringed by their
80 | Contribution(s) alone or by combination of their Contribution(s)
81 | with the Work to which such Contribution(s) was submitted. If You
82 | institute patent litigation against any entity (including a
83 | cross-claim or counterclaim in a lawsuit) alleging that the Work
84 | or a Contribution incorporated within the Work constitutes direct
85 | or contributory patent infringement, then any patent licenses
86 | granted to You under this License for that Work shall terminate
87 | as of the date such litigation is filed.
88 |
89 | 4. Redistribution. You may reproduce and distribute copies of the
90 | Work or Derivative Works thereof in any medium, with or without
91 | modifications, and in Source or Object form, provided that You
92 | meet the following conditions:
93 |
94 | (a) You must give any other recipients of the Work or
95 | Derivative Works a copy of this License; and
96 |
97 | (b) You must cause any modified files to carry prominent notices
98 | stating that You changed the files; and
99 |
100 | (c) You must retain, in the Source form of any Derivative Works
101 | that You distribute, all copyright, patent, trademark, and
102 | attribution notices from the Source form of the Work,
103 | excluding those notices that do not pertain to any part of
104 | the Derivative Works; and
105 |
106 | (d) If the Work includes a "NOTICE" text file as part of its
107 | distribution, then any Derivative Works that You distribute must
108 | include a readable copy of the attribution notices contained
109 | within such NOTICE file, excluding those notices that do not
110 | pertain to any part of the Derivative Works, in at least one
111 | of the following places: within a NOTICE text file distributed
112 | as part of the Derivative Works; within the Source form or
113 | documentation, if provided along with the Derivative Works; or,
114 | within a display generated by the Derivative Works, if and
115 | wherever such third-party notices normally appear. The contents
116 | of the NOTICE file are for informational purposes only and
117 | do not modify the License. You may add Your own attribution
118 | notices within Derivative Works that You distribute, alongside
119 | or as an addendum to the NOTICE text from the Work, provided
120 | that such additional attribution notices cannot be construed
121 | as modifying the License.
122 |
123 | You may add Your own copyright statement to Your modifications and
124 | may provide additional or different license terms and conditions
125 | for use, reproduction, or distribution of Your modifications, or
126 | for any such Derivative Works as a whole, provided Your use,
127 | reproduction, and distribution of the Work otherwise complies with
128 | the conditions stated in this License.
129 |
130 | 5. Submission of Contributions. Unless You explicitly state otherwise,
131 | any Contribution intentionally submitted for inclusion in the Work
132 | by You to the Licensor shall be under the terms and conditions of
133 | this License, without any additional terms or conditions.
134 | Notwithstanding the above, nothing herein shall supersede or modify
135 | the terms of any separate license agreement you may have executed
136 | with Licensor regarding such Contributions.
137 |
138 | 6. Trademarks. This License does not grant permission to use the trade
139 | names, trademarks, service marks, or product names of the Licensor,
140 | except as required for reasonable and customary use in describing the
141 | origin of the Work and reproducing the content of the NOTICE file.
142 |
143 | 7. Disclaimer of Warranty. Unless required by applicable law or
144 | agreed to in writing, Licensor provides the Work (and each
145 | Contributor provides its Contributions) on an "AS IS" BASIS,
146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 | implied, including, without limitation, any warranties or conditions
148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 | PARTICULAR PURPOSE. You are solely responsible for determining the
150 | appropriateness of using or redistributing the Work and assume any
151 | risks associated with Your exercise of permissions under this License.
152 |
153 | 8. Limitation of Liability. In no event and under no legal theory,
154 | whether in tort (including negligence), contract, or otherwise,
155 | unless required by applicable law (such as deliberate and grossly
156 | negligent acts) or agreed to in writing, shall any Contributor be
157 | liable to You for damages, including any direct, indirect, special,
158 | incidental, or consequential damages of any character arising as a
159 | result of this License or out of the use or inability to use the
160 | Work (including but not limited to damages for loss of goodwill,
161 | work stoppage, computer failure or malfunction, or any and all
162 | other commercial damages or losses), even if such Contributor
163 | has been advised of the possibility of such damages.
164 |
165 | 9. Accepting Warranty or Additional Liability. While redistributing
166 | the Work or Derivative Works thereof, You may choose to offer,
167 | and charge a fee for, acceptance of support, warranty, indemnity,
168 | or other liability obligations and/or rights consistent with this
169 | License. However, in accepting such obligations, You may act only
170 | on Your own behalf and on Your sole responsibility, not on behalf
171 | of any other Contributor, and only if You agree to indemnify,
172 | defend, and hold each Contributor harmless for any liability
173 | incurred by, or claims asserted against, such Contributor by reason
174 | of your accepting any such warranty or additional liability.
175 |
176 | END OF TERMS AND CONDITIONS
177 |
178 | APPENDIX: How to apply the Apache License to your work.
179 |
180 | To apply the Apache License to your work, attach the following
181 | boilerplate notice, with the fields enclosed by brackets "[]"
182 | replaced with your own identifying information. (Don't include
183 | the brackets!) The text should be enclosed in the appropriate
184 | comment syntax for the file format. We also recommend that a
185 | file or class name and description of purpose be included on the
186 | same "printed page" as the copyright notice for easier
187 | identification within third-party archives.
188 |
189 | Copyright [yyyy] [name of copyright owner]
190 |
191 | Licensed under the Apache License, Version 2.0 (the "License");
192 | you may not use this file except in compliance with the License.
193 | You may obtain a copy of the License at
194 |
195 | http://www.apache.org/licenses/LICENSE-2.0
196 |
197 | Unless required by applicable law or agreed to in writing, software
198 | distributed under the License is distributed on an "AS IS" BASIS,
199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 | See the License for the specific language governing permissions and
201 | limitations under the License.
202 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # language_to_reward_2023
2 |
3 | This repository contains code to reproduce the results in the paper
4 | "Language to Rewards for Robotic Skill Synthesis".
5 |
6 | [Project website](https://language-to-reward.github.io/)
7 | [arxiv paper](https://arxiv.org/abs/2306.08647)
8 | [video](https://www.youtube.com/watch?v=7KiKg0rdSSQ&feature=youtu.be)
9 |
10 | ## Abstract
11 | Large language models (LLMs) have demonstrated exciting progress in acquiring diverse new capabilities through in-context learning, ranging from logical reasoning to code-writing. Robotics researchers have also explored using LLMs to advance the capabilities of robotic control. However, since low-level robot actions are hardware-dependent and underrepresented in LLM training corpora, existing efforts in applying LLMs to robotics have largely treated LLMs as semantic planners or relied on human-engineered control primitives to interface with the robot. On the other hand, reward functions are shown to be flexible representations that can be optimized for control policies to achieve diverse tasks, while their semantic richness makes them suitable to be specified by LLMs.
12 |
13 | In this work, we introduce a new paradigm that harnesses this realization by utilizing LLMs to define reward parameters that can be optimized and accomplish variety of robotic tasks. Using reward as the intermediate interface generated by LLMs, we can effectively bridge the gap between high-level language instructions or corrections to low-level robot actions. Meanwhile, combining this with a real-time optimizer, MuJoCo MPC, empowers an interactive behavior creation experience where users can immediately observe the results and provide feedback to the system. To systematically evaluate the performance of our proposed method, we designed a total of 17 tasks for a simulated quadruped robot and a dexterous manipulator robot. We demonstrate that our proposed method reliably tackles 90% of the designed tasks, while a baseline using primitive skills as the interface with Code-as-policies achieves 50% of the tasks. We further validated our method on a real robot arm where complex manipulation skills such as non-prehensile pushing emerge through our interactive system.
14 |
15 | ## Installation and Usage
16 |
17 | ```sh
18 | # Create a Python venv, make sure to use Python version >= 3.9
19 | python3 --version
20 | python3 -m venv /tmp/l2r
21 | . /tmp/l2r/bin/activate
22 |
23 | # Build and install mujoco_mpc
24 | git clone https://github.com/google-deepmind/mujoco_mpc.git
25 | cd mujoco_mpc
26 | # Latest MJPC commit at the time of release. Using `main` might work too.
27 | git checkout c5c7ead065b7f4034ab265a13023231900dbfaa7
28 |
29 | # Compile mujoco_mpc from source and install, this step can take a few minutes.
30 | pip install ./python
31 |
32 | cd ..
33 |
34 | # Build and install language_to_reward_2023
35 | git clone https://github.com/google-deepmind/language_to_reward_2023.git
36 | cd language_to_reward_2023
37 |
38 | # Compile language_to_reward_2023 from source and install, this step can take a few minutes.
39 | pip install .
40 |
41 | # Run the demo
42 | python -m language_to_reward_2023.user_interaction --api_key=
43 | ```
44 |
45 | ## Notes
46 |
47 | In the published paper, we showed the robot moon walking. This was done with an older version of the MuJoCo model, which had unrealistically strong actuators. In the model used in this repository, moonwalking is not possible, regretfully.
48 |
49 |
50 | ## Citing this work
51 |
52 | ```
53 | @article{yu2023language,
54 | title={Language to Rewards for Robotic Skill Synthesis},
55 | author={Yu, Wenhao and Gileadi, Nimrod and Fu, Chuyuan and Kirmani, Sean and Lee, Kuang-Huei and Gonzalez Arenas, Montse and Lewis Chiang, Hao-Tien and Erez, Tom and Hasenclever, Leonard and Humplik, Jan and Ichter, Brian and Xiao, Ted and Xu, Peng and Zeng, Andy and Zhang, Tingnan and Heess, Nicolas and Sadigh, Dorsa and Tan, Jie and Tassa, Yuval and Xia, Fei},
56 | year={2023},
57 | journal={Conference of Robot Learning 2023},
58 | }
59 | ```
60 |
61 | ## License and disclaimer
62 |
63 | Copyright 2023 DeepMind Technologies Limited
64 |
65 | All software is licensed under the Apache License, Version 2.0 (Apache 2.0);
66 | you may not use this file except in compliance with the Apache 2.0 license.
67 | You may obtain a copy of the Apache 2.0 license at:
68 | https://www.apache.org/licenses/LICENSE-2.0
69 |
70 | All other materials are licensed under the Creative Commons Attribution 4.0
71 | International License (CC-BY). You may obtain a copy of the CC-BY license at:
72 | https://creativecommons.org/licenses/by/4.0/legalcode
73 |
74 | Unless required by applicable law or agreed to in writing, all software and
75 | materials distributed here under the Apache 2.0 or CC-BY licenses are
76 | distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
77 | either express or implied. See the licenses for the specific language governing
78 | permissions and limitations under those licenses.
79 |
80 | This is not an official Google product.
81 |
--------------------------------------------------------------------------------
/cmake/FindOrFetch.cmake:
--------------------------------------------------------------------------------
1 | #.rst:
2 | # FindOrFetch
3 | # ----------------------
4 | #
5 | # Find or fetch a package in order to satisfy target dependencies.
6 | #
7 | # FindOrFetch([USE_SYSTEM_PACKAGE [ON/OFF]]
8 | # [PACKAGE_NAME [name]]
9 | # [LIBRARY_NAME [name]]
10 | # [GIT_REPO [repo]]
11 | # [GIT_TAG [tag]]
12 | # [PATCH_COMMAND [cmd] [args]]
13 | # [TARGETS [targets]]
14 | # [EXCLUDE_FROM_ALL])
15 | #
16 | # The command has the following parameters:
17 | #
18 | # Arguments:
19 | # - ``USE_SYSTEM_PACKAGE`` one-value argument on whether to search for the
20 | # package in the system (ON) or whether to fetch the library using
21 | # FetchContent from the specified Git repository (OFF). Note that
22 | # FetchContent variables will override this behaviour.
23 | # - ``PACKAGE_NAME`` name of the system-package. Ignored if
24 | # ``USE_SYSTEM_PACKAGE`` is ``OFF``.
25 | # - ``LIBRARY_NAME`` name of the library. Ignored if
26 | # ``USE_SYSTEM_PACKAGE`` is ``ON``.
27 | # - ``GIT_REPO`` git repository to fetch the library from. Ignored if
28 | # ``USE_SYSTEM_PACKAGE`` is ``ON``.
29 | # - ``GIT_TAG`` tag reference when fetching the library from the git
30 | # repository. Ignored if ``USE_SYSTEM_PACKAGE`` is ``ON``.
31 | # - ``PATCH_COMMAND`` Specifies a custom command to patch the sources after an
32 | # update. See https://cmake.org/cmake/help/latest/module/ExternalProject.html#command:externalproject_add
33 | # for details on the parameter.
34 | # - ``TARGETS`` list of targets to be satisfied. If any of these targets are
35 | # not currently defined, this macro will attempt to either find or fetch the
36 | # package.
37 | # - ``EXCLUDE_FROM_ALL`` if specified, the targets are not added to the ``all``
38 | # metatarget.
39 | #
40 | # Note: if ``USE_SYSTEM_PACKAGE`` is ``OFF``, FetchContent will be used to
41 | # retrieve the specified targets. It is possible to specify any variable in
42 | # https://cmake.org/cmake/help/latest/module/FetchContent.html#variables to
43 | # override this macro behaviour.
44 |
45 | if(COMMAND FindOrFetch)
46 | return()
47 | endif()
48 |
49 | macro(FindOrFetch)
50 | if(NOT FetchContent)
51 | include(FetchContent)
52 | endif()
53 |
54 | # Parse arguments.
55 | set(options EXCLUDE_FROM_ALL)
56 | set(one_value_args
57 | USE_SYSTEM_PACKAGE
58 | PACKAGE_NAME
59 | LIBRARY_NAME
60 | GIT_REPO
61 | GIT_TAG
62 | )
63 | set(multi_value_args PATCH_COMMAND TARGETS)
64 | cmake_parse_arguments(
65 | _ARGS
66 | "${options}"
67 | "${one_value_args}"
68 | "${multi_value_args}"
69 | ${ARGN}
70 | )
71 |
72 | # Check if all targets are found.
73 | if(NOT _ARGS_TARGETS)
74 | message(FATAL_ERROR "mujoco::FindOrFetch: TARGETS must be specified.")
75 | endif()
76 |
77 | set(targets_found TRUE)
78 | message(CHECK_START
79 | "mujoco::FindOrFetch: checking for targets in package `${_ARGS_PACKAGE_NAME}`"
80 | )
81 | foreach(target ${_ARGS_TARGETS})
82 | if(NOT TARGET ${target})
83 | message(CHECK_FAIL "target `${target}` not defined.")
84 | set(targets_found FALSE)
85 | break()
86 | endif()
87 | endforeach()
88 |
89 | # If targets are not found, use `find_package` or `FetchContent...` to get it.
90 | if(NOT targets_found)
91 | if(${_ARGS_USE_SYSTEM_PACKAGE})
92 | message(CHECK_START
93 | "mujoco::FindOrFetch: finding `${_ARGS_PACKAGE_NAME}` in system packages..."
94 | )
95 | find_package(${_ARGS_PACKAGE_NAME} REQUIRED)
96 | message(CHECK_PASS "found")
97 | else()
98 | message(CHECK_START
99 | "mujoco::FindOrFetch: Using FetchContent to retrieve `${_ARGS_LIBRARY_NAME}`"
100 | )
101 | FetchContent_Declare(
102 | ${_ARGS_LIBRARY_NAME}
103 | GIT_REPOSITORY ${_ARGS_GIT_REPO}
104 | GIT_TAG ${_ARGS_GIT_TAG}
105 | GIT_SHALLOW FALSE
106 | PATCH_COMMAND ${_ARGS_PATCH_COMMAND}
107 | UPDATE_DISCONNECTED TRUE
108 | )
109 | if(${_ARGS_EXCLUDE_FROM_ALL})
110 | FetchContent_GetProperties(${_ARGS_LIBRARY_NAME})
111 | if(NOT ${${_ARGS_LIBRARY_NAME}_POPULATED})
112 | FetchContent_Populate(${_ARGS_LIBRARY_NAME})
113 | add_subdirectory(
114 | ${${_ARGS_LIBRARY_NAME}_SOURCE_DIR} ${${_ARGS_LIBRARY_NAME}_BINARY_DIR}
115 | EXCLUDE_FROM_ALL
116 | )
117 | endif()
118 | else()
119 | FetchContent_MakeAvailable(${_ARGS_LIBRARY_NAME})
120 | endif()
121 | message(CHECK_PASS "Done")
122 | endif()
123 | else()
124 | message(CHECK_PASS "found")
125 | endif()
126 | endmacro()
127 |
--------------------------------------------------------------------------------
/mjpc/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | add_executable(
2 | l2r_headless_server
3 | headless_server.cc
4 | barkour/barkour.cc
5 | barkour/barkour.h
6 | )
7 |
8 | target_link_libraries(
9 | l2r_headless_server
10 | absl::check
11 | absl::flags
12 | absl::flags_parse
13 | absl::log
14 | absl::status
15 | absl::strings
16 | mujoco::mujoco
17 | libmjpc
18 | mjpc_agent_service
19 | )
20 |
21 | target_include_directories(l2r_headless_server PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/..)
22 | message(AGENT_SERVICE_COMPILE_OPTIONS=${AGENT_SERVICE_COMPILE_OPTIONS})
23 | target_compile_options(l2r_headless_server PUBLIC ${AGENT_SERVICE_COMPILE_OPTIONS})
24 | target_link_options(l2r_headless_server PRIVATE ${AGENT_SERVICE_LINK_OPTIONS})
25 |
26 | add_executable(
27 | l2r_ui_server
28 | ui_server.cc
29 | barkour/barkour.cc
30 | barkour/barkour.h
31 | )
32 |
33 | target_link_libraries(
34 | l2r_ui_server
35 | absl::check
36 | absl::flags
37 | absl::flags_parse
38 | absl::log
39 | absl::status
40 | absl::strings
41 | mujoco::mujoco
42 | libmjpc
43 | mjpc_ui_agent_service
44 | )
45 |
46 | target_include_directories(l2r_ui_server PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/..)
47 | target_compile_options(l2r_ui_server PUBLIC ${AGENT_SERVICE_COMPILE_OPTIONS})
48 | target_link_options(l2r_ui_server PRIVATE ${AGENT_SERVICE_LINK_OPTIONS})
49 |
50 | add_subdirectory(barkour)
51 | add_dependencies(l2r_headless_server copy_barkour_resources)
52 | add_dependencies(l2r_ui_server copy_barkour_resources)
53 |
--------------------------------------------------------------------------------
/mjpc/barkour/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | # Copy the model files to the binary directory to make them available to the
2 | # built binary.
3 |
4 | # Start by pulling the Barkour vB model from MuJoCo Menagerie
5 | add_custom_target(
6 | copy_menagerie_barkour_resources ALL
7 | COMMAND ${CMAKE_COMMAND} -E copy
8 | ${menagerie_SOURCE_DIR}/google_barkour_vb/barkour_vb.xml
9 | ${CMAKE_CURRENT_BINARY_DIR}/barkour_vb.xml
10 | COMMAND ${CMAKE_COMMAND} -E copy_directory
11 | ${menagerie_SOURCE_DIR}/google_barkour_vb/assets
12 | ${CMAKE_CURRENT_BINARY_DIR}/assets
13 | # apply a patch to barkour_vb.xml
14 | COMMAND patch -o ${CMAKE_CURRENT_BINARY_DIR}/barkour_vb_modified.xml
15 | ${CMAKE_CURRENT_BINARY_DIR}/barkour_vb.xml
16 | <${CMAKE_CURRENT_SOURCE_DIR}/barkour_vb.xml.patch
17 | COMMENT "Copying Barkour vB from MuJoCo Menagerie")
18 |
19 | # Add further task XML from this repository
20 | add_custom_target(copy_barkour_resources ALL
21 | COMMAND ${CMAKE_COMMAND} -E copy
22 | ${CMAKE_CURRENT_SOURCE_DIR}/task_barkour.xml
23 | ${CMAKE_CURRENT_SOURCE_DIR}/world.xml
24 | ${CMAKE_CURRENT_BINARY_DIR}
25 | COMMENT "Copying Barkour task XML into binary directory")
26 |
27 | add_dependencies(copy_barkour_resources copy_menagerie_barkour_resources)
28 |
--------------------------------------------------------------------------------
/mjpc/barkour/barkour.cc:
--------------------------------------------------------------------------------
1 | // Copyright 2023 DeepMind Technologies Limited
2 | //
3 | // Licensed under the Apache License, Version 2.0 (the "License");
4 | // you may not use this file except in compliance with the License.
5 | // You may obtain a copy of the License at
6 | //
7 | // http://www.apache.org/licenses/LICENSE-2.0
8 | //
9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 | // ==============================================================================
15 |
16 | #include "mjpc/barkour/barkour.h"
17 |
18 | #include
19 | #include
20 | #include
21 |
22 | #include
23 | #include
24 | #include
25 | #include
26 |
27 | namespace mjpc::language2reward {
28 |
29 | namespace { // anonymous namespace for local definitions
30 |
31 | constexpr double kNominalFootFromShoulder[4][2] = {{0.05, 0.0},
32 | {-0.05, 0.0},
33 | {0.05, -0.0},
34 | {-0.05, -0.0}};
35 |
36 | // ============ reusable utilities ============
37 |
38 | void RotateVectorAroundZaxis(const double* vec, const double angle,
39 | double* rotated_vec) {
40 | rotated_vec[0] = mju_cos(angle) * vec[0] - mju_sin(angle) * vec[1];
41 | rotated_vec[1] = mju_sin(angle) * vec[0] + mju_cos(angle) * vec[1];
42 | rotated_vec[2] = vec[2];
43 | }
44 |
45 | // Unwind the roll current angle to be close to last measured roll angle
46 | // to prevent sudden jump of the measured sensor data.
47 | double UnwindAngle(const double current_roll, const double last_roll) {
48 | double vec[3] = {current_roll - 2 * mjPI, current_roll,
49 | current_roll + 2 * mjPI};
50 | double closest = vec[0];
51 | double min_diff = std::abs(last_roll - vec[0]);
52 | for (int i = 1; i < 3; i++) {
53 | double diff = std::abs(last_roll - vec[i]);
54 | if (diff < min_diff) {
55 | min_diff = diff;
56 | closest = vec[i];
57 | }
58 | }
59 | return closest;
60 | }
61 |
62 | } // namespace
63 |
64 | std::string Barkour::XmlPath() const {
65 | return absl::StrCat(
66 | mjpc::GetExecutableDir(), "/../mjpc/barkour/world.xml");
67 | }
68 |
69 | std::string Barkour::Name() const { return "Barkour"; }
70 |
71 | void Barkour::ResidualFn::GetNormalizedFootTrajectory(
72 | double duty_ratio, double gait_frequency, double normalized_phase_offset,
73 | double time, double amplitude_forward, double amplitude_vertical,
74 | double* target_pos_x, double* target_pos_z) const {
75 | double step_duration = 1.0 / fmax(gait_frequency, 0.0001);
76 | double stance_duration = step_duration * fmin(fmax(duty_ratio, 0.01), 0.99);
77 | double flight_duration = step_duration - stance_duration;
78 | double gait_start_time = normalized_phase_offset * step_duration;
79 | double in_phase_time = fmod((time - gait_start_time), step_duration);
80 | if (in_phase_time < 0.0) in_phase_time += step_duration;
81 | double normalized_phase = 0.0;
82 | bool is_flight_phase = false;
83 | if (in_phase_time < flight_duration) {
84 | normalized_phase = in_phase_time / flight_duration;
85 | is_flight_phase = true;
86 | } else {
87 | normalized_phase = (in_phase_time - flight_duration) / stance_duration;
88 | is_flight_phase = false;
89 | }
90 | if (is_flight_phase) {
91 | *target_pos_z += mju_sin(normalized_phase * mjPI) * amplitude_vertical;
92 | *target_pos_x +=
93 | mju_sin(normalized_phase * mjPI - mjPI / 2) * amplitude_forward;
94 | } else {
95 | *target_pos_x +=
96 | -mju_sin(normalized_phase * mjPI - mjPI / 2) * amplitude_forward;
97 | }
98 | }
99 |
100 | // ============ residual ============
101 | void Barkour::ResidualFn::Residual(const mjModel* model,
102 | const mjData* data,
103 | double* residual) const {
104 | // start counter
105 | int counter = 0;
106 |
107 | // common info
108 | double* torso_xmat = data->xmat + 9 * torso_body_id_;
109 | double* compos = SensorByName(model, data, "torso_subtreecom");
110 | CHECK(compos != nullptr);
111 | double pitch = atan2(-torso_xmat[6], mju_sqrt(torso_xmat[7] * torso_xmat[7] +
112 | torso_xmat[8] * torso_xmat[8]));
113 | double roll = atan2(torso_xmat[7], torso_xmat[8]);
114 |
115 | double torso_heading[2] = {torso_xmat[0], torso_xmat[3]};
116 | if (parameters_[target_body_pitch_id_] > 1.0 ||
117 | parameters_[target_body_pitch_id_] < -1.0) {
118 | int handstand = parameters_[target_body_pitch_id_] < 0.0 ? -1 : 1;
119 | torso_heading[0] = handstand * torso_xmat[2];
120 | torso_heading[1] = handstand * torso_xmat[5];
121 | }
122 |
123 | double* comvel = SensorByName(model, data, "torso_subtreelinvel");
124 | double* angvel = SensorByName(model, data, "torso_angvel");
125 | double linvel_ego[3];
126 | RotateVectorAroundZaxis(comvel,
127 | -mju_atan2(torso_heading[1], torso_heading[0]),
128 | linvel_ego); // Need double check
129 |
130 | // get foot positions
131 | double* foot_pos[kNumFoot];
132 | for (BkFoot foot : kFootAll) {
133 | foot_pos[foot] = data->site_xpos + 3 * foot_site_id_[foot];
134 | }
135 |
136 | double* shoulder_pos[kNumFoot];
137 | for (BkFoot foot : kFootAll)
138 | shoulder_pos[foot] = data->xpos + 3 * shoulder_body_id_[foot];
139 |
140 | // average foot position
141 | double avg_foot_pos[3];
142 | AverageFootPos(avg_foot_pos, foot_pos);
143 |
144 | // Residual Calculations
145 | // ---------- BodyHeight ----------
146 | // quadrupedal or bipedal height of torso
147 | double* body = SensorByName(model, data, "body");
148 | residual[counter++] =
149 | body[2] - parameters_[target_body_height_id_];
150 |
151 | // ---------- BodyXYPos ----------
152 | double* pos = SensorByName(model, data, "torso_subtreecom");
153 | CHECK(pos != nullptr);
154 | residual[counter++] = pos[0] - parameters_[goal_position_x_id_];
155 | residual[counter++] = pos[1] - parameters_[goal_position_y_id_];
156 |
157 | // ---------- Heading ----------
158 | mju_normalize(torso_heading, 2);
159 | residual[counter++] =
160 | torso_heading[0] - mju_cos(parameters_[target_body_heading_id_]);
161 | residual[counter++] =
162 | torso_heading[1] - mju_sin(parameters_[target_body_heading_id_]);
163 |
164 | // ---------- Body Roll and Pitch ----------
165 | residual[counter++] = pitch - parameters_[target_body_pitch_id_];
166 | residual[counter++] =
167 | UnwindAngle(roll, current_base_roll_) - parameters_[target_body_roll_id_];
168 | residual[counter++] = 0;
169 |
170 | // ---------- Forward Velocity ----------
171 | residual[counter++] =
172 | linvel_ego[0] - parameters_[target_forward_velocity_id_];
173 |
174 | // ---------- Sideways Velocity ---------
175 | residual[counter++] =
176 | linvel_ego[1] - parameters_[target_sideways_velocity_id_];
177 |
178 | // ---------- Upward Velocity -----------
179 | residual[counter++] = linvel_ego[2] - parameters_[target_upward_velocity_id_];
180 |
181 | // ---------- Roll Pitch Yaw Velocity ----------
182 | residual[counter++] = angvel[0] - parameters_[target_roll_velocity_id_];
183 | residual[counter++] = angvel[2] - parameters_[target_turning_velocity_id_];
184 |
185 | // ---------- Feet pose tracking -------------
186 | double foot_current_poses_yaw_aligned[4][3];
187 | double shoulder_yaw_aligned[4][3];
188 | double heading_angle = mju_atan2(torso_heading[1], torso_heading[0]);
189 |
190 | for (int foot_id = 0; foot_id < 4; foot_id++) {
191 | double shoulder_pos_local[3];
192 | mju_sub3(shoulder_pos_local, shoulder_pos[foot_id], compos);
193 | RotateVectorAroundZaxis(shoulder_pos_local, -heading_angle,
194 | shoulder_yaw_aligned[foot_id]);
195 | double foot_pos_local[3];
196 | mju_sub3(foot_pos_local, foot_pos[foot_id], compos);
197 | RotateVectorAroundZaxis(foot_pos_local, -heading_angle,
198 | foot_current_poses_yaw_aligned[foot_id]);
199 | foot_current_poses_yaw_aligned[foot_id][2] = foot_pos[foot_id][2];
200 | }
201 | for (int dim = 0; dim < 3; dim++) {
202 | for (int foot_id = 0; foot_id < 4; foot_id++) {
203 | double foot_pos_delta = parameters_[dist_from_nominal_ids_[foot_id][dim]];
204 | double foot_target_pos;
205 | if (dim < 2) {
206 | if (foot_id >= 2 && dim == 1) {
207 | foot_pos_delta *= -1;
208 | }
209 | foot_target_pos = shoulder_yaw_aligned[foot_id][dim] +
210 | kNominalFootFromShoulder[foot_id][dim] +
211 | foot_pos_delta;
212 | } else {
213 | // dim == 2
214 | foot_target_pos = foot_pos_delta;
215 | }
216 | residual[counter++] =
217 | foot_target_pos - foot_current_poses_yaw_aligned[foot_id][dim];
218 | }
219 | }
220 |
221 | // ---------- Gait ----------
222 | // Get tracking target for the feet gait patter
223 | for (int foot_id = 0; foot_id < 4; foot_id++) {
224 | double target_pos_x = shoulder_yaw_aligned[foot_id][0] +
225 | kNominalFootFromShoulder[foot_id][0] +
226 | parameters_[dist_from_nominal_ids_[foot_id][0]];
227 | double target_pos_z = parameters_[dist_from_nominal_ids_[foot_id][2]];
228 | GetNormalizedFootTrajectory(parameters_[ground_to_air_ratio_ids_[foot_id]],
229 | parameters_[stepping_frequency_ids_[foot_id]],
230 | parameters_[phase_offset_ids_[foot_id]],
231 | data->time,
232 | parameters_[amplitudes_forward_ids_[foot_id]],
233 | parameters_[amplitudes_vertical_ids_[foot_id]],
234 | &target_pos_x, &target_pos_z);
235 | residual[counter++] =
236 | target_pos_z - foot_current_poses_yaw_aligned[foot_id][2];
237 | residual[counter++] =
238 | target_pos_x - foot_current_poses_yaw_aligned[foot_id][0];
239 | }
240 |
241 | // ---------- Balance ----------
242 | double capture_point[3];
243 | double fall_time = mju_sqrt(2 * parameters_[target_body_height_id_] / 9.81);
244 | mju_addScl3(capture_point, compos, comvel, fall_time);
245 | residual[counter++] = capture_point[0] - avg_foot_pos[0];
246 | residual[counter++] = capture_point[1] - avg_foot_pos[1];
247 |
248 | // ---------- Effort ----------
249 | mju_scl(residual + counter, data->actuator_force, 2e-2, model->nu);
250 | counter += model->nu;
251 |
252 | // ---------- Posture ----------
253 | double* home = KeyQPosByName(model, data, "home");
254 | int walker_first_joint_adr = model->jnt_qposadr[walker_root_joint_id_] + 7;
255 | mju_sub(residual + counter,
256 | data->qpos + walker_first_joint_adr,
257 | home + walker_first_joint_adr, model->nu);
258 | for (BkFoot foot : kFootAll) {
259 | for (int joint = 0; joint < 3; joint++) {
260 | residual[counter + 3 * foot + joint] *= kJointPostureGain[joint];
261 | }
262 | }
263 | // loosen the "hands" in Biped mode
264 | if (parameters_[target_body_pitch_id_] < -1.0) {
265 | residual[counter + 4] *= 0.03;
266 | residual[counter + 5] *= 0.03;
267 | residual[counter + 10] *= 0.03;
268 | residual[counter + 11] *= 0.03;
269 | } else if (parameters_[target_body_pitch_id_] > 1.0) {
270 | residual[counter + 1] *= 0.03;
271 | residual[counter + 2] *= 0.03;
272 | residual[counter + 7] *= 0.03;
273 | residual[counter + 8] *= 0.03;
274 | }
275 | counter += model->nu;
276 |
277 | // PoseTracking
278 | for (int i = 0; i < 12; i++) {
279 | residual[counter + i] =
280 | parameters_[target_joint_angle_ids_[i]] -
281 | *(data->qpos + walker_first_joint_adr + i);
282 | }
283 | counter += model->nu;
284 |
285 | // ---------- Act dot -----------------
286 | // encourage actions to be similar to the previous actions.
287 | if (model->na > 0) {
288 | mju_copy(residual + counter, data->act_dot, model->na);
289 | counter += model->na;
290 | } else {
291 | for (int i=0; i < 12; i++) {
292 | residual[counter++] = 0.;
293 | }
294 | }
295 |
296 | // sensor dim sanity check
297 | CheckSensorDim(model, counter);
298 | }
299 |
300 | // ============ transition ============
301 | void Barkour::TransitionLocked(mjModel* model, mjData* data) {
302 | // Update goal position for mocap object
303 | data->mocap_pos[3 * goal_mocap_id_] =
304 | parameters[residual_.goal_position_x_id_];
305 | data->mocap_pos[3 * goal_mocap_id_ + 1] =
306 | parameters[residual_.goal_position_y_id_];
307 |
308 | // update the body roll variable
309 | double* torso_xmat = data->xmat + 9 * residual_.torso_body_id_;
310 | // xmat[7,8] are the projections of the local y and z axes onto the global
311 | // z axis, respectively.
312 | double roll_angle = atan2(torso_xmat[7], torso_xmat[8]);
313 | // This is needed to handle when the roll_angle jumps between PI and -PI,
314 | // which creates undesired discontinuity in the optimization.
315 | residual_.current_base_roll_ =
316 | UnwindAngle(roll_angle, residual_.current_base_roll_);
317 | }
318 |
319 | // ============ task-state utilities ============
320 | // save task-related ids
321 | void Barkour::ResetLocked(const mjModel* model) {
322 | const int* mocap_id = model->body_mocapid;
323 | goal_mocap_id_ = mocap_id[mj_name2id(model, mjOBJ_XBODY, "goal")];
324 | residual_.ResetLocked(model);
325 | }
326 |
327 | void Barkour::ResidualFn::ResetLocked(const mjModel* model) {
328 | // ---------- model identifiers ----------
329 | torso_body_id_ = mj_name2id(model, mjOBJ_XBODY, "torso");
330 | CHECK_GT(torso_body_id_, 0);
331 | // foot site ids
332 | int foot_index = 0;
333 | // for (const char* footname : {"FL", "HL", "FR", "HR"}) {
334 | for (const char* footname :
335 | {"foot_front_left", "foot_hind_left",
336 | "foot_front_right", "foot_hind_right"}) {
337 | int foot_id = mj_name2id(model, mjOBJ_SITE, footname);
338 | if (foot_id < 0) mju_error_s("site '%s' not found", footname);
339 | foot_site_id_[foot_index] = foot_id;
340 | foot_index++;
341 | }
342 |
343 | // shoulder ids
344 | shoulder_body_id_[kFootFL] =
345 | mj_name2id(model, mjOBJ_BODY, "upper_leg_front_left");
346 | shoulder_body_id_[kFootHL] =
347 | mj_name2id(model, mjOBJ_BODY, "upper_leg_hind_left");
348 | shoulder_body_id_[kFootFR] =
349 | mj_name2id(model, mjOBJ_BODY, "upper_leg_front_right");
350 | shoulder_body_id_[kFootHR] =
351 | mj_name2id(model, mjOBJ_BODY, "upper_leg_hind_right");
352 | CHECK_GT(shoulder_body_id_[kFootFL], 0);
353 | CHECK_GT(shoulder_body_id_[kFootHL], 0);
354 | CHECK_GT(shoulder_body_id_[kFootFR], 0);
355 | CHECK_GT(shoulder_body_id_[kFootHR], 0);
356 |
357 | // get the reward parameter_ids
358 | target_body_height_id_ = ParameterIndex(model, "TargetBodyHeight");
359 | goal_position_x_id_ = ParameterIndex(model, "GoalPositionX");
360 | goal_position_y_id_ = ParameterIndex(model, "GoalPositionY");
361 | target_body_heading_id_ = ParameterIndex(model, "TargetHeading");
362 | target_body_pitch_id_ = ParameterIndex(model, "TargetPitch");
363 | target_body_roll_id_ = ParameterIndex(model, "TargetRoll");
364 | target_forward_velocity_id_ = ParameterIndex(model, "TargetForwardVelocity");
365 | target_sideways_velocity_id_ =
366 | ParameterIndex(model, "TargetSidewaysVelocity");
367 | target_upward_velocity_id_ = ParameterIndex(model, "TargetUpwardVelocity");
368 | target_turning_velocity_id_ = ParameterIndex(model, "TargetTurningVelocity");
369 | target_roll_velocity_id_ = ParameterIndex(model, "TargetRollVelocity");
370 |
371 | balance_ids_[kFootFL] = ParameterIndex(model, "FLBalance");
372 | CHECK_GE(balance_ids_[kFootFL], 0);
373 | balance_ids_[kFootHL] = ParameterIndex(model, "HLBalance");
374 | CHECK_GE(balance_ids_[kFootHL], 0);
375 | balance_ids_[kFootFR] = ParameterIndex(model, "FRBalance");
376 | CHECK_GE(balance_ids_[kFootFR], 0);
377 | balance_ids_[kFootHR] = ParameterIndex(model, "HRBalance");
378 | CHECK_GE(balance_ids_[kFootHR], 0);
379 |
380 | dist_from_nominal_ids_[0][0] = ParameterIndex(model, "FLXDistFromNominal");
381 | dist_from_nominal_ids_[0][1] = ParameterIndex(model, "FLYDistFromNominal");
382 | dist_from_nominal_ids_[0][2] = ParameterIndex(model, "FLZDistFromNominal");
383 | stepping_frequency_ids_[0] = ParameterIndex(model, "FLSteppingFrequency");
384 | ground_to_air_ratio_ids_[0] = ParameterIndex(model, "FLGroundToAirRatio");
385 | phase_offset_ids_[0] = ParameterIndex(model, "FLPhaseOffset");
386 | amplitudes_vertical_ids_[0] = ParameterIndex(model, "FLAmplitudesVertical");
387 | amplitudes_forward_ids_[0] = ParameterIndex(model, "FLAmplitudesForward");
388 | dist_from_nominal_ids_[1][0] = ParameterIndex(model, "HLXDistFromNominal");
389 | dist_from_nominal_ids_[1][1] = ParameterIndex(model, "HLYDistFromNominal");
390 | dist_from_nominal_ids_[1][2] = ParameterIndex(model, "HLZDistFromNominal");
391 | stepping_frequency_ids_[1] = ParameterIndex(model, "HLSteppingFrequency");
392 | ground_to_air_ratio_ids_[1] = ParameterIndex(model, "HLGroundToAirRatio");
393 | phase_offset_ids_[1] = ParameterIndex(model, "HLPhaseOffset");
394 | amplitudes_vertical_ids_[1] = ParameterIndex(model, "HLAmplitudesVertical");
395 | amplitudes_forward_ids_[1] = ParameterIndex(model, "HLAmplitudesForward");
396 | dist_from_nominal_ids_[2][0] = ParameterIndex(model, "FRXDistFromNominal");
397 | dist_from_nominal_ids_[2][1] = ParameterIndex(model, "FRYDistFromNominal");
398 | dist_from_nominal_ids_[2][2] = ParameterIndex(model, "FRZDistFromNominal");
399 | stepping_frequency_ids_[2] = ParameterIndex(model, "FRSteppingFrequency");
400 | ground_to_air_ratio_ids_[2] = ParameterIndex(model, "FRGroundToAirRatio");
401 | phase_offset_ids_[2] = ParameterIndex(model, "FRPhaseOffset");
402 | amplitudes_vertical_ids_[2] = ParameterIndex(model, "FRAmplitudesVertical");
403 | amplitudes_forward_ids_[2] = ParameterIndex(model, "FRAmplitudesForward");
404 | dist_from_nominal_ids_[3][0] = ParameterIndex(model, "HRXDistFromNominal");
405 | dist_from_nominal_ids_[3][1] = ParameterIndex(model, "HRYDistFromNominal");
406 | dist_from_nominal_ids_[3][2] = ParameterIndex(model, "HRZDistFromNominal");
407 | stepping_frequency_ids_[3] = ParameterIndex(model, "HRSteppingFrequency");
408 | ground_to_air_ratio_ids_[3] = ParameterIndex(model, "HRGroundToAirRatio");
409 | phase_offset_ids_[3] = ParameterIndex(model, "HRPhaseOffset");
410 | amplitudes_vertical_ids_[3] = ParameterIndex(model, "HRAmplitudesVertical");
411 | amplitudes_forward_ids_[3] = ParameterIndex(model, "HRAmplitudesForward");
412 |
413 | for (int i = 0; i < 12; i++) {
414 | target_joint_angle_ids_[i] = ParameterIndex(model, absl::StrCat("q", i+1));
415 | }
416 |
417 | walker_root_joint_id_ = mj_name2id(model, mjOBJ_JOINT, kRootJointName);
418 | if (walker_root_joint_id_ == -1) {
419 | mju_error("root joint '%s' not found", kRootJointName);
420 | }
421 | }
422 |
423 | // compute average foot position, depending on mode
424 | void Barkour::ResidualFn::AverageFootPos(
425 | double avg_foot_pos[3], double* foot_pos[kNumFoot]) const {
426 | // we should compute the average foot position for feet that are used for
427 | // balancing.
428 | // start by using all feet
429 | double foot_weight[kNumFoot] = {};
430 | double total_weight = 0;
431 | for (int foot : kFootAll) {
432 | foot_weight[foot] = parameters_[balance_ids_[foot]];
433 | total_weight += foot_weight[foot];
434 | }
435 | if (total_weight == 0) {
436 | // if no feet are considered, just use the average
437 | for (int foot : kFootAll) {
438 | foot_weight[foot] = 1.0;
439 | }
440 | total_weight = 4.0;
441 | }
442 |
443 | mju_zero3(avg_foot_pos);
444 | for (int foot : kFootAll) {
445 | mju_addToScl3(avg_foot_pos, foot_pos[foot], foot_weight[foot]);
446 | }
447 | mju_scl3(avg_foot_pos, avg_foot_pos, 1 / total_weight);
448 | }
449 |
450 | } // namespace mjpc::language2reward
451 |
--------------------------------------------------------------------------------
/mjpc/barkour/barkour.h:
--------------------------------------------------------------------------------
1 | // Copyright 2023 DeepMind Technologies Limited
2 | //
3 | // Licensed under the Apache License, Version 2.0 (the "License");
4 | // you may not use this file except in compliance with the License.
5 | // You may obtain a copy of the License at
6 | //
7 | // http://www.apache.org/licenses/LICENSE-2.0
8 | //
9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 | // ==============================================================================
15 |
16 | #ifndef THIRD_PARTY_DEEPMIND_LANGUAGE_TO_REWARD_2023_MJPC_BARKOUR_BARKOUR_H_
17 | #define THIRD_PARTY_DEEPMIND_LANGUAGE_TO_REWARD_2023_MJPC_BARKOUR_BARKOUR_H_
18 |
19 | #include
20 | #include
21 |
22 | #include
23 | #include
24 |
25 |
26 | namespace mjpc::language2reward {
27 |
28 | class Barkour : public mjpc::Task {
29 | public:
30 | std::string Name() const override;
31 | std::string XmlPath() const override;
32 | class ResidualFn : public mjpc::BaseResidualFn {
33 | public:
34 | explicit ResidualFn(const Barkour* task) :
35 | mjpc::BaseResidualFn(task) {}
36 |
37 | void Residual(const mjModel* model, const mjData* data,
38 | double* residual) const override;
39 |
40 | private:
41 | friend class Barkour;
42 | // ============ enums ============
43 |
44 | // feet
45 | enum BkFoot { kFootFL = 0, kFootHL, kFootFR, kFootHR, kNumFoot };
46 |
47 | // ============ constants ============
48 | constexpr static BkFoot kFootAll[kNumFoot] = {kFootFL, kFootHL, kFootFR,
49 | kFootHR};
50 |
51 | // posture gain factors for abduction, hip, knee
52 | constexpr static double kJointPostureGain[3] = {2, 1, 1}; // unitless
53 |
54 | constexpr static char kRootJointName[] = "torso";
55 |
56 | // ============ methods ============
57 |
58 | // compute average foot position, depending on mode
59 | void AverageFootPos(double avg_foot_pos[3],
60 | double* foot_pos[kNumFoot]) const;
61 |
62 | void GetNormalizedFootTrajectory(
63 | double duty_ratio, double gait_frequency,
64 | double normalized_phase_offset, double time,
65 | double amplitude_forward, double amplitude_vertical,
66 | double* target_pos_x, double* target_pos_z) const;
67 |
68 | void ResetLocked(const mjModel* model);
69 |
70 | // ============ constants, computed in Reset() ============
71 | int torso_body_id_ = -1;
72 |
73 | // ===== task parameters id ======
74 | int target_body_height_id_ = -1;
75 | int goal_position_x_id_ = -1;
76 | int goal_position_y_id_ = -1;
77 | int target_body_heading_id_ = -1;
78 | int target_body_pitch_id_ = -1;
79 | int target_body_roll_id_ = -1;
80 | int target_forward_velocity_id_ = -1;
81 | int target_sideways_velocity_id_ = -1;
82 | int target_upward_velocity_id_ = -1;
83 | int target_turning_velocity_id_ = -1;
84 | int target_roll_velocity_id_ = -1;
85 | int balance_ids_[4] = {-1};
86 |
87 | // ==== gait-relevant parameters id ====
88 | int dist_from_nominal_ids_[4][3];
89 | int stepping_frequency_ids_[4];
90 | int ground_to_air_ratio_ids_[4];
91 | int phase_offset_ids_[4];
92 | int amplitudes_vertical_ids_[4];
93 | int amplitudes_forward_ids_[4];
94 |
95 | // ==== joint angle parameters id ====
96 | int target_joint_angle_ids_[12];
97 |
98 | // ==== object root joint ids ====
99 | int walker_root_joint_id_ = -1;
100 |
101 | double current_base_roll_ = 0;
102 |
103 | int foot_site_id_[kNumFoot];
104 | int shoulder_body_id_[kNumFoot];
105 | };
106 |
107 | Barkour() : residual_(this) {}
108 | void TransitionLocked(mjModel* model, mjData* data) override;
109 |
110 | // call base-class Reset, save task-related ids
111 | void ResetLocked(const mjModel* model) override;
112 |
113 | protected:
114 | std::unique_ptr ResidualLocked() const override {
115 | return std::make_unique(residual_);
116 | }
117 | ResidualFn* InternalResidual() override { return &residual_; }
118 |
119 | private:
120 | ResidualFn residual_;
121 | int goal_mocap_id_ = -1;
122 | };
123 |
124 | } // namespace mjpc::language2reward
125 |
126 | #endif // THIRD_PARTY_DEEPMIND_LANGUAGE_TO_REWARD_2023_MJPC_BARKOUR_BARKOUR_H_
127 |
--------------------------------------------------------------------------------
/mjpc/barkour/barkour_vb.xml.patch:
--------------------------------------------------------------------------------
1 | diff --git a/barkour_vb_modified.xml b/barkour_vb_modified.xml
2 | --- a/barkour_vb_modified.xml
3 | +++ b/barkour_vb_modified.xml
4 | @@ -11,7 +11,8 @@
5 |
6 |
7 |
8 | -
9 | +
11 |
12 |
13 |
14 | @@ -64,6 +65,7 @@
15 |
16 |
17 |
18 | +
19 |
20 |
21 |
22 |
--------------------------------------------------------------------------------
/mjpc/barkour/task_barkour.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 |
42 |
43 |
44 |
45 |
46 |
47 |
48 |
49 |
50 |
51 |
52 |
53 |
54 |
55 |
56 |
57 |
58 |
59 |
60 |
61 |
62 |
63 |
64 |
65 |
66 |
67 |
68 |
69 |
70 |
71 |
72 |
73 |
74 |
75 |
76 |
77 |
78 |
79 |
80 |
95 |
96 |
97 |
98 |
99 |
100 |
101 |
102 |
103 |
104 |
105 |
106 |
107 |
108 |
109 |
110 |
111 |
112 |
113 |
114 |
115 |
116 |
117 |
118 |
119 |
120 |
121 |
122 |
123 |
124 |
125 |
126 |
127 |
128 |
129 |
130 |
131 |
132 |
133 |
134 |
135 |
136 |
137 |
138 |
139 |
140 |
141 |
142 |
143 |
144 |
145 |
146 |
147 |
148 |
149 |
150 |
151 |
152 |
153 |
154 |
155 |
156 |
157 |
158 |
159 |
160 |
161 |
--------------------------------------------------------------------------------
/mjpc/barkour/world.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
--------------------------------------------------------------------------------
/mjpc/headless_server.cc:
--------------------------------------------------------------------------------
1 | // Copyright 2023 DeepMind Technologies Limited
2 | //
3 | // Licensed under the Apache License, Version 2.0 (the "License");
4 | // you may not use this file except in compliance with the License.
5 | // You may obtain a copy of the License at
6 | //
7 | // http://www.apache.org/licenses/LICENSE-2.0
8 | //
9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 | // ==============================================================================
15 |
16 | #include
17 | #include
18 | #include
19 |
20 | #include
21 | #include
22 | #include
23 | #include
24 | #include "mjpc/barkour/barkour.h"
25 | // DEEPMIND INTERNAL IMPORT
26 | #include
27 | #include
28 | #include
29 | #include
30 |
31 | ABSL_FLAG(int32_t, mjpc_port, 10000, "port to listen on");
32 | ABSL_FLAG(int32_t, mjpc_workers, -1,
33 | "number of worker threads for MJPC planning. -1 means use the number "
34 | "of available hardware threads.");
35 |
36 | int main(int argc, char** argv) {
37 | absl::ParseCommandLine(argc, argv);
38 | int port = absl::GetFlag(FLAGS_mjpc_port);
39 |
40 | std::string server_address = absl::StrCat("[::]:", port);
41 |
42 | std::shared_ptr server_credentials =
43 | grpc::experimental::LocalServerCredentials(LOCAL_TCP);
44 | grpc::ServerBuilder builder;
45 | builder.AddListeningPort(server_address, server_credentials);
46 |
47 | mjpc::agent_grpc::AgentService service(
48 | {std::make_shared()},
49 | absl::GetFlag(FLAGS_mjpc_workers));
50 | builder.RegisterService(&service);
51 |
52 | std::unique_ptr server(builder.BuildAndStart());
53 | LOG(INFO) << "Server listening on " << server_address;
54 |
55 | // Keep the program running until the server shuts down.
56 | server->Wait();
57 |
58 | return 0;
59 | }
60 |
--------------------------------------------------------------------------------
/mjpc/ui_server.cc:
--------------------------------------------------------------------------------
1 | // Copyright 2023 DeepMind Technologies Limited
2 | //
3 | // Licensed under the Apache License, Version 2.0 (the "License");
4 | // you may not use this file except in compliance with the License.
5 | // You may obtain a copy of the License at
6 | //
7 | // http://www.apache.org/licenses/LICENSE-2.0
8 | //
9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 | // ==============================================================================
15 |
16 | #include
17 | #include
18 | #include
19 |
20 | #include
21 | #include
22 | #include
23 | #include
24 | #include
25 | #include
26 | #include "mjpc/barkour/barkour.h"
27 | // DEEPMIND INTERNAL IMPORT
28 | #include
29 | #include
30 | #include
31 | #include
32 | #include
33 |
34 | ABSL_FLAG(int32_t, mjpc_port, 10000, "port to listen on");
35 |
36 | int main(int argc, char** argv) {
37 | absl::ParseCommandLine(argc, argv);
38 | int port = absl::GetFlag(FLAGS_mjpc_port);
39 |
40 | std::string server_address = absl::StrCat("[::]:", port);
41 |
42 | std::shared_ptr server_credentials =
43 | grpc::experimental::LocalServerCredentials(LOCAL_TCP);
44 | grpc::ServerBuilder builder;
45 | builder.AddListeningPort(server_address, server_credentials);
46 |
47 | mjpc::MjpcApp app({
48 | std::make_shared(),
49 | });
50 | mjpc::agent_grpc::UiAgentService service(app.Sim());
51 | builder.RegisterService(&service);
52 |
53 | std::unique_ptr server(builder.BuildAndStart());
54 | LOG(INFO) << "Server listening on " << server_address;
55 |
56 | app.Start();
57 | server->Shutdown(absl::ToChronoTime(absl::Now() + absl::Seconds(1)));
58 | server->Wait();
59 |
60 | return 0;
61 | }
62 |
--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
1 | [build-system]
2 | requires = ["setuptools"]
3 | build-backend = "setuptools.build_meta"
4 |
5 | [project]
6 | name="language_to_reward_2023"
7 | version = "0.0.1"
8 | authors = [
9 | {name = "Wenhao Yu", email = "magicmelon@google.com"},
10 | {name = "Nimrod Gileadi", email = "nimrod@google.com"},
11 | {name = "Fei Xia", email = "xiafei@google.com"},
12 | ]
13 | description = "Language to Reward (2023)"
14 | readme = {file = "README.md", content-type = "text/markdown"}
15 | license = {text = "Apache License 2.0"}
16 | classifiers = [
17 | "Development Status :: 3 - Alpha",
18 | "Intended Audience :: Developers",
19 | "Intended Audience :: Science/Research",
20 | "License :: OSI Approved :: Apache Software License",
21 | "Natural Language :: English",
22 | "Programming Language :: Python :: 3",
23 | "Programming Language :: Python :: 3.8",
24 | "Programming Language :: Python :: 3.9",
25 | "Programming Language :: Python :: 3.10",
26 | "Programming Language :: Python :: 3.11",
27 | "Programming Language :: Python :: 3.12",
28 | "Topic :: Scientific/Engineering",
29 | ]
30 | requires-python = ">=3.8"
31 | dependencies = [
32 | "absl-py",
33 | "colorama",
34 | "mujoco",
35 | "openai",
36 | "termcolor",
37 | ]
38 |
39 | [project.urls]
40 | Homepage = "https://github.com/google-deepmind/language_to_reward_2023"
41 | Repository = "https://github.com/google-deepmind/language_to_reward_2023"
42 |
43 | [tool.setuptools.packages.find]
44 | where = ["src"]
45 | include = ["language_to_reward_2023*"]
46 |
47 | [tool.setuptools.package-data]
48 | language_to_reward_2023 = [
49 | "mjpc/l2r_headless_server",
50 | "mjpc/l2r_ui_server",
51 | ]
52 |
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | absl-py
2 | colorama==0.4.6
3 | mujoco==3.0.0
4 | openai
5 | termcolor
6 |
--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | # Copyright 2023 DeepMind Technologies Limited
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 |
16 | """Setup script for language_to_reward_2023."""
17 |
18 | import os
19 | import pathlib
20 | import platform
21 | import shutil
22 | import subprocess
23 |
24 | import setuptools
25 | from setuptools.command import build_ext
26 |
27 |
28 | Path = pathlib.Path
29 |
30 |
31 | class CMakeExtension(setuptools.Extension):
32 | """A Python extension that has been prebuilt by CMake.
33 |
34 | We do not want distutils to handle the build process for our extensions, so
35 | so we pass an empty list to the super constructor.
36 | """
37 |
38 | def __init__(self, name):
39 | super().__init__(name, sources=[])
40 |
41 |
42 | class BuildAgentServerBinaries(build_ext.build_ext):
43 | """A Python extension that has been prebuilt by CMake.
44 |
45 | We do not want distutils to handle the build process for our extensions, so
46 | so we pass an empty list to the super constructor.
47 | """
48 |
49 | def run(self):
50 | self._configure_and_build_agent_server()
51 | self.run_command("copy_agent_server_binary")
52 | self.run_command("copy_task_assets")
53 |
54 | def _configure_and_build_agent_server(self):
55 | """Check for CMake."""
56 | cmake_command = "cmake"
57 | build_cfg = "Debug"
58 | l2r_root = Path(__file__).parent
59 | l2r_build_dir = l2r_root / "build"
60 | cmake_configure_args = [
61 | f"-DCMAKE_BUILD_TYPE:STRING={build_cfg}",
62 | "-DBUILD_TESTING:BOOL=OFF",
63 | ]
64 |
65 | if platform.system() == "Darwin" and "ARCHFLAGS" in os.environ:
66 | osx_archs = []
67 | if "-arch x86_64" in os.environ["ARCHFLAGS"]:
68 | osx_archs.append("x86_64")
69 | if "-arch arm64" in os.environ["ARCHFLAGS"]:
70 | osx_archs.append("arm64")
71 | cmake_configure_args.append(
72 | f"-DCMAKE_OSX_ARCHITECTURES={';'.join(osx_archs)}"
73 | )
74 |
75 | # TODO(nimrod): We currently configure the builds into
76 | # `mujoco_mpc/build`. This should use `self.build_{temp,lib}` instead, to
77 | # isolate the Python builds from the C++ builds.
78 | print("Configuring CMake with the following arguments:")
79 | for arg in cmake_configure_args:
80 | print(f" {arg}")
81 |
82 | subprocess.check_call(
83 | [
84 | cmake_command,
85 | *cmake_configure_args,
86 | f"-S{l2r_root.resolve()}",
87 | f"-B{l2r_build_dir.resolve()}",
88 | ],
89 | cwd=l2r_root,
90 | )
91 |
92 | print("Building `l2r_headless_server` and `l2r_ui_server` with CMake")
93 | subprocess.check_call(
94 | [
95 | cmake_command,
96 | "--build",
97 | str(l2r_build_dir.resolve()),
98 | "--target",
99 | "l2r_headless_server",
100 | "l2r_ui_server",
101 | f"-j{os.cpu_count()}",
102 | "--config",
103 | build_cfg,
104 | ],
105 | cwd=l2r_root,
106 | )
107 |
108 |
109 | class CopyAgentServerBinaryCommand(setuptools.Command):
110 | """Command to copy `l2r_{headless,ui}_server` next to task_clients.py.
111 |
112 | Assumes that the C++ binaries were built and
113 | and located in the default `build/mjpc` folder.
114 | """
115 |
116 | description = "Copy server binaries into package."
117 | user_options = []
118 |
119 | def initialize_options(self):
120 | self.build_lib = None
121 |
122 | def finalize_options(self):
123 | self.set_undefined_options("copy_task_assets", ("build_lib", "build_lib"))
124 |
125 | def run(self):
126 | self._copy_binary("l2r_headless_server")
127 | self._copy_binary("l2r_ui_server")
128 |
129 | def _copy_binary(self, binary_name):
130 | source_path = Path(f"build/mjpc/{binary_name}")
131 | if not source_path.exists():
132 | raise ValueError(
133 | f"Cannot find `{binary_name}` binary from {source_path.absolute()}. "
134 | f"Please build the `{binary_name}` C++ gRPC service."
135 | )
136 | assert self.build_lib is not None
137 | build_lib_path = Path(self.build_lib).resolve()
138 | destination_path = Path(
139 | build_lib_path, "language_to_reward_2023", "mjpc", binary_name
140 | )
141 |
142 | self.announce(f"{source_path.resolve()=}")
143 | self.announce(f"{destination_path.resolve()=}")
144 |
145 | destination_path.parent.mkdir(exist_ok=True, parents=True)
146 | shutil.copy(source_path, destination_path)
147 |
148 |
149 | class CopyTaskAssetsCommand(setuptools.Command):
150 | """Copies XML and mesh files next to the server binaries."""
151 |
152 | description = "Copy task assets over to python source."
153 | user_options = []
154 |
155 | def initialize_options(self):
156 | self.build_lib = None
157 |
158 | def finalize_options(self):
159 | self.set_undefined_options("build_py", ("build_lib", "build_lib"))
160 |
161 | def run(self):
162 | l2r_root = Path(__file__).parent
163 | l2r_build_dir = l2r_root / "build"
164 | mjpc_task_paths = [l2r_build_dir / "mjpc" / "barkour"]
165 | for task_path in mjpc_task_paths:
166 | relative_task_path = task_path.relative_to(l2r_build_dir)
167 | assert self.build_lib is not None
168 | build_lib_path = Path(self.build_lib).resolve()
169 | destination_dir_path = Path(build_lib_path, "language_to_reward_2023")
170 | self.announce(
171 | f"Copying assets {relative_task_path} from"
172 | f" {l2r_build_dir} over to {destination_dir_path}."
173 | )
174 |
175 | destination_path = destination_dir_path / relative_task_path
176 | shutil.copytree(task_path, destination_path, dirs_exist_ok=True)
177 |
178 |
179 | setuptools.setup(
180 | ext_modules=[CMakeExtension("agent_server")],
181 | cmdclass={
182 | "build_ext": BuildAgentServerBinaries,
183 | "copy_agent_server_binary": CopyAgentServerBinaryCommand,
184 | "copy_task_assets": CopyTaskAssetsCommand,
185 | },
186 | )
187 |
--------------------------------------------------------------------------------
/src/language_to_reward_2023/confirmation_safe_executor.py:
--------------------------------------------------------------------------------
1 | # Copyright 2023 DeepMind Technologies Limited
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 |
16 | """An alternative SafeExecutor implementation which asks for user confirmation.
17 |
18 | This should only be used in platforms where the sandbox is not available, e.g.
19 | on OS X.
20 | """
21 |
22 | import os
23 | import subprocess
24 | import sys
25 | import tempfile
26 | import termcolor
27 |
28 | from language_to_reward_2023 import safe_executor
29 |
30 |
31 | def default_interpreter() -> str:
32 | return sys.executable
33 |
34 |
35 | _SERIOUS_WARNING = (
36 | "\nYou are about to execute untrusted code.\n"
37 | "Code executed this way can perform any operation on your PC, and "
38 | "can be a security risk."
39 | '\nOnce you have reviewed the code above, type "yes" to continue.\n'
40 | )
41 |
42 | _REPEATED_WARNING = '\nAbout to execute the code above. Type "y" to continue.\n'
43 |
44 |
45 | class ConfirmationSafeExecutor(safe_executor.SafeExecutor):
46 | """An executor that asks for user confirmation before executing the code."""
47 |
48 | def __init__(self, interpreter_path=None, skip_confirmation=False):
49 | super().__init__()
50 | self._confirmed_once = False
51 | self._interpreter_path = interpreter_path or default_interpreter()
52 | self._skip_confirmation = skip_confirmation
53 |
54 | def safe_execute(self, code: str) -> str:
55 | if not self._confirmed_once:
56 | while not self._skip_confirmation:
57 | confirm = input(
58 | termcolor.colored(_SERIOUS_WARNING, "red", attrs=["bold"])
59 | )
60 | if confirm.lower() == "yes":
61 | break
62 | self._confirmed_once = True
63 | else:
64 | while not self._skip_confirmation:
65 | confirm = input(
66 | termcolor.colored(_REPEATED_WARNING, "red", attrs=["bold"])
67 | )
68 | if confirm.lower() in ("y", "yes"):
69 | break
70 | return self._execute(code)
71 |
72 | def _execute(self, code: str) -> str:
73 | f = tempfile.NamedTemporaryFile(suffix=".py", mode="w", delete=False)
74 |
75 | f.write(code)
76 | f.close()
77 | filepath = f.name
78 |
79 | # Start by compiling the code to pyc (to get compilation errors)
80 | try:
81 | subprocess.run(
82 | [self._interpreter_path, "-m", "py_compile", filepath],
83 | check=True,
84 | )
85 | except subprocess.CalledProcessError as e:
86 | raise ValueError("Failed to compile code.") from e
87 | finally:
88 | os.unlink(filepath)
89 |
90 | # py_compile should output a pyc file in the pycache directory
91 | filename = os.path.basename(filepath)
92 | directory = os.path.dirname(filepath)
93 | pycache_dir = os.path.join(directory, "__pycache__")
94 | pyc_filepath = os.path.join(pycache_dir, filename + "c")
95 | if not os.path.exists(pyc_filepath):
96 | filename = (
97 | os.path.splitext(filename)[0]
98 | + f".cpython-{sys.version_info[0]}{sys.version_info[1]}.pyc"
99 | )
100 | pyc_filepath = os.path.join(pycache_dir, filename)
101 |
102 | # Now execute the pyc file
103 | try:
104 | completed_process = subprocess.run(
105 | [self._interpreter_path, pyc_filepath],
106 | capture_output=True,
107 | check=True,
108 | )
109 | except subprocess.CalledProcessError as e:
110 | print("stdout", e.stdout)
111 | print("stderr", e.stderr)
112 | raise ValueError("Failed to run code.") from e
113 | finally:
114 | os.unlink(pyc_filepath)
115 | return completed_process.stdout.decode("utf-8")
116 |
--------------------------------------------------------------------------------
/src/language_to_reward_2023/conversation.py:
--------------------------------------------------------------------------------
1 | # Copyright 2023 DeepMind Technologies Limited
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 |
16 | """A class which manages a chat session using a prompt."""
17 |
18 | import time
19 | from typing import Any
20 |
21 | import openai
22 | import termcolor
23 |
24 | from language_to_reward_2023.platforms import llm_prompt
25 |
26 |
27 | def _open_ai_call_with_retry(
28 | model: str, messages: list[Any]
29 | ) -> openai.ChatCompletion:
30 | """Call OpenAI API with retry."""
31 | reset_trigger_phrases = ["CHOICE", "NUM"]
32 | success = False
33 | completion = None
34 | while not success:
35 | try:
36 | completion = openai.ChatCompletion.create(
37 | model=model,
38 | messages=messages,
39 | temperature=0.3,
40 | )
41 | success = True
42 | for reset_trigger_phrase in reset_trigger_phrases:
43 | if reset_trigger_phrase in completion.choices[0].message.content:
44 | success = False
45 | except Exception as e: # pylint: disable=broad-exception-caught
46 | print("OpenAI API call issue, re-trying..." + str(e) + "\n")
47 | time.sleep(5)
48 | return completion
49 |
50 |
51 | class Conversation:
52 | """Manages the state of a conversation with a language model."""
53 |
54 | def __init__(
55 | self,
56 | prompt_model: llm_prompt.LLMPrompt,
57 | model: str,
58 | print_responses: bool = True,
59 | ):
60 | self._prompt_model = prompt_model
61 | self._model = model
62 | self._print_responses = print_responses
63 | number_of_llms = prompt_model.num_llms
64 | self._message_queues = [[] for _ in range(number_of_llms)]
65 |
66 | # Add general prompt to the message queue.
67 | for llm_id in range(number_of_llms):
68 | message = [{"role": "user", "content": prompt_model.prompts[llm_id]}]
69 | self._message_queues[llm_id].append(message[0])
70 |
71 | def send_command(self, user_command: str) -> str:
72 | """Sends a user command to the LLMs, returns final processed response."""
73 | if user_command == "reset":
74 | self.reset()
75 | print("Resetting the conversation history.")
76 | return "reset"
77 | upstream_message = user_command + " Make sure to ignore irrelevant options."
78 | for llm_id in range(self._prompt_model.num_llms):
79 | completion = _open_ai_call_with_retry(
80 | self._model,
81 | self._message_queues[llm_id]
82 | + [{"role": "user", "content": upstream_message}],
83 | )
84 | if self._prompt_model.keep_message_history[llm_id]:
85 | self._message_queues[llm_id].append(
86 | {"role": "user", "content": upstream_message}
87 | )
88 | self._message_queues[llm_id].append(completion.choices[0].message)
89 | print(f"LLM{llm_id} queried")
90 | response = completion.choices[0].message.content
91 | if self._print_responses:
92 | print(termcolor.colored(response + "\n", "cyan", attrs=["bold"]))
93 | try:
94 | upstream_message = self._prompt_model.response_processors[llm_id](
95 | response
96 | )
97 | except Exception:
98 | if self._prompt_model.keep_message_history[llm_id]:
99 | self._message_queues[llm_id].pop(-1)
100 | self._message_queues[llm_id].pop(-1)
101 | raise
102 | return upstream_message
103 |
104 | def reset(self) -> None:
105 | for i in range(self._prompt_model.num_llms):
106 | while len(self._message_queues[i]) > 1:
107 | self._message_queues[i].pop(-1)
108 |
--------------------------------------------------------------------------------
/src/language_to_reward_2023/platforms/barkour/barkour_execution.py:
--------------------------------------------------------------------------------
1 | # Copyright 2023 DeepMind Technologies Limited
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 |
16 | """Execute untrusted code to get task parameters for the Barkour MJPC task."""
17 |
18 | import abc
19 | import json
20 | from typing import Any, Callable, Sequence
21 |
22 | from absl import logging
23 | from mujoco_mpc import mjpc_parameters
24 |
25 | from language_to_reward_2023 import safe_executor
26 | from language_to_reward_2023.platforms.barkour import barkour_l2r_tasks
27 |
28 |
29 | class BarkourSafeExecutor(metaclass=abc.ABCMeta):
30 | """Runs untrusted code to get task parameters for the Barkour MJPC task."""
31 |
32 | def __init__(self, executor: safe_executor.SafeExecutor):
33 | self._executor = executor
34 |
35 | def execute(self, code: str) -> mjpc_parameters.MjpcParameters:
36 | """Executes untrusted code to get MJPC task parameters.
37 |
38 | See _CODE_TEMPLATE below for available functions.
39 |
40 | Arguments:
41 | code: code which edits the weights and params dicts.
42 |
43 | Raises:
44 | ValueError: the code doesn't compile or failed to run.
45 |
46 | Returns:
47 | MJPC task parameters for the Barkour robot.
48 | """
49 | return self._execute_with_template(_CODE_TEMPLATE, code)
50 |
51 | def _execute_with_template(
52 | self, template: str, code: str
53 | ) -> mjpc_parameters.MjpcParameters:
54 | """Combines untrusted code with a template and executes it in a sandbox."""
55 | combined_code = template.replace('INSERT_CODE_HERE', code)
56 | output = self._executor.safe_execute(combined_code)
57 | return _parse_output(output)
58 |
59 |
60 | def _parse_output(output: str) -> mjpc_parameters.MjpcParameters:
61 | """Parses the output from executing param generation code."""
62 | try:
63 | overwriting_weights, overwriting_params = json.loads(output)
64 | except ValueError as e:
65 | raise ValueError(f'Invalid JSON output: {output}') from e
66 |
67 | weights, params = barkour_l2r_tasks.defaults()
68 | _overwrite_entries(weights, overwriting_weights, 'weight')
69 | _overwrite_entries(params, overwriting_params, 'param')
70 | return mjpc_parameters.MjpcParameters(
71 | cost_weights=weights, task_parameters=params
72 | )
73 |
74 |
75 | def _overwrite_entries(base, overwrites, name_for_log) -> None:
76 | for key, value in overwrites.items():
77 | if key not in base:
78 | logging.warning('Invalid %s: %s', name_for_log, key)
79 | continue
80 | try:
81 | base[key] = float(value)
82 | except ValueError:
83 | logging.warning('Invalid float value for %s: %s', key, value)
84 |
85 |
86 | def _generate_function_definitions(
87 | supported_functions: Sequence[Callable[..., Any]]
88 | ) -> str:
89 | """Generates function definitions for supported functions."""
90 | definitions = []
91 | for function in supported_functions:
92 | definitions.append(f"""
93 | def {function.__name__}(*args, **kwargs):
94 | global weights, params
95 | barkour_l2r_tasks.{function.__name__}(weights, params, *args, **kwargs)
96 |
97 |
98 | """)
99 | for dummy_function_name in _DUMMY_FUNCTIONS:
100 | definitions.append(f"""
101 | def {dummy_function_name}(*args, **kwargs):
102 | pass
103 |
104 | """)
105 |
106 | return ''.join(definitions)
107 |
108 |
109 | _SUPPORTED_FUNCTIONS = (
110 | barkour_l2r_tasks.set_torso_targets,
111 | barkour_l2r_tasks.set_foot_pos_parameters,
112 | barkour_l2r_tasks.set_foot_stepping_parameters,
113 | barkour_l2r_tasks.set_target_joint_angles,
114 | barkour_l2r_tasks.head_towards,
115 | barkour_l2r_tasks.walk,
116 | )
117 |
118 | _DUMMY_FUNCTIONS = (
119 | 'reset_reward',
120 | 'execute_plan',
121 | )
122 |
123 |
124 | _CODE_TEMPLATE = r"""
125 | from language_to_reward_2023.platforms.barkour import barkour_l2r_tasks
126 |
127 | weights, params = barkour_l2r_tasks.defaults()
128 |
129 | FUNCTION_DEFINITIONS
130 |
131 | INSERT_CODE_HERE
132 |
133 | import json
134 | print(json.JSONEncoder().encode((weights, params)))
135 | """.replace(
136 | 'FUNCTION_DEFINITIONS', _generate_function_definitions(_SUPPORTED_FUNCTIONS)
137 | )
138 |
--------------------------------------------------------------------------------
/src/language_to_reward_2023/platforms/barkour/barkour_execution_test.py:
--------------------------------------------------------------------------------
1 | # Copyright 2023 DeepMind Technologies Limited
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 |
16 | import inspect
17 |
18 | from absl.testing import absltest
19 | from absl.testing import parameterized
20 |
21 | from language_to_reward_2023 import confirmation_safe_executor
22 | from language_to_reward_2023 import sandbox_safe_executor
23 | from language_to_reward_2023.platforms.barkour import barkour_execution
24 | from language_to_reward_2023.platforms.barkour import barkour_l2r_tasks
25 |
26 |
27 | def _make_confirmation_executor():
28 | return barkour_execution.BarkourSafeExecutor(
29 | confirmation_safe_executor.ConfirmationSafeExecutor(
30 | skip_confirmation=True
31 | )
32 | )
33 |
34 |
35 | def _make_sandbox_executor():
36 | return barkour_execution.BarkourSafeExecutor(
37 | sandbox_safe_executor.SandboxSafeExecutor()
38 | )
39 |
40 |
41 | _EXECUTER_BUILDERS = [
42 | (_make_confirmation_executor,),
43 | (_make_sandbox_executor,),
44 | ]
45 |
46 |
47 | class BarkourExecutionTest(parameterized.TestCase):
48 |
49 | @parameterized.parameters(_EXECUTER_BUILDERS)
50 | def test_empty_code_returns_default_weights(self, executor_builder):
51 | executor = executor_builder()
52 | params = executor.execute('')
53 | default_cost_weights, default_task_params = barkour_l2r_tasks.defaults()
54 | self.assertEqual(default_cost_weights, params.cost_weights)
55 | self.assertEqual(default_task_params, params.task_parameters)
56 |
57 | @parameterized.parameters(_EXECUTER_BUILDERS)
58 | def test_invalid_code_fails(self, executor_builder):
59 | executor = executor_builder()
60 | code = '('
61 | self.assertRaises(ValueError, lambda: executor.execute(code))
62 |
63 | @parameterized.parameters(_EXECUTER_BUILDERS)
64 | def test_throwing_code_fails(self, executor_builder):
65 | executor = executor_builder()
66 | code = 'raise Exception()'
67 | self.assertRaises(ValueError, lambda: executor.execute(code))
68 |
69 | @parameterized.parameters(_EXECUTER_BUILDERS)
70 | def test_printing_code_fails(self, executor_builder):
71 | executor = executor_builder()
72 | code = 'print("hello")'
73 | self.assertRaises(ValueError, lambda: executor.execute(code))
74 |
75 | @parameterized.parameters(_EXECUTER_BUILDERS)
76 | def test_can_import_numpy(self, executor_builder):
77 | executor = executor_builder()
78 | params = executor.execute('import numpy as np')
79 | default_cost_weights, default_task_params = barkour_l2r_tasks.defaults()
80 | self.assertEqual(default_cost_weights, params.cost_weights)
81 | self.assertEqual(default_task_params, params.task_parameters)
82 |
83 | @parameterized.parameters(_EXECUTER_BUILDERS)
84 | def test_invalid_parameters_ignored(self, executor_builder):
85 | executor = executor_builder()
86 | _, default_task_params = barkour_l2r_tasks.defaults()
87 | params = executor.execute('params["banana"] = 0.0')
88 | self.assertEqual(default_task_params, params.task_parameters)
89 |
90 | @parameterized.parameters(_EXECUTER_BUILDERS)
91 | def test_invalid_float_ignored(self, executor_builder):
92 | executor = executor_builder()
93 | _, default_task_params = barkour_l2r_tasks.defaults()
94 | params = executor.execute('params["TargetBodyHeight"] = "asdf"')
95 | self.assertEqual(default_task_params, params.task_parameters)
96 |
97 | @parameterized.parameters(_EXECUTER_BUILDERS)
98 | def test_set_torso_targets(self, executor_builder):
99 | executor = executor_builder()
100 | code = """
101 | set_torso_targets(target_torso_height=0.5)
102 | """
103 | params = executor.execute(code)
104 | self.assertEqual(0.5, params.task_parameters['TargetBodyHeight'])
105 |
106 | def test_all_supported_functions_are_valid(self):
107 | for f in barkour_execution._SUPPORTED_FUNCTIONS:
108 | args_names = list(inspect.signature(f).parameters.keys())
109 | self.assertEqual(
110 | ['cost_weights', 'task_parameters'],
111 | args_names[:2],
112 | f"First 2 parameters of {f.__name__} should be ['cost_weights',"
113 | " 'task_parameters']",
114 | )
115 |
116 |
117 | if __name__ == '__main__':
118 | absltest.main()
119 |
--------------------------------------------------------------------------------
/src/language_to_reward_2023/platforms/barkour/barkour_l2r_task_client.py:
--------------------------------------------------------------------------------
1 | # Copyright 2023 DeepMind Technologies Limited
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 |
16 | """Python client for interfacing with an MJPC binary running the Barkour task."""
17 |
18 | import dataclasses
19 | import pathlib
20 | from typing import Optional, Sequence
21 | import mujoco
22 | from mujoco_mpc import agent as agent_lib
23 | import numpy as np
24 | from language_to_reward_2023 import task_clients
25 | from language_to_reward_2023.platforms.barkour import barkour_l2r_tasks
26 |
27 |
28 | _MODEL_PATH = str(
29 | pathlib.Path(__file__).parent.parent.parent
30 | / "mjpc" / "barkour" / "world.xml"
31 | )
32 |
33 |
34 | @dataclasses.dataclass
35 | class BarkourState:
36 | """Current state of the Barkour robot from the MJPC binary."""
37 | # Torso state (world space)
38 | torso_position: np.ndarray
39 | torso_linear_velocity: np.ndarray
40 | torso_orientation: np.ndarray
41 | torso_angular_velocity: np.ndarray
42 |
43 | # Foot space (local space)
44 | front_left_foot_position: np.ndarray
45 | front_right_foot_position: np.ndarray
46 | front_left_foot_velocity: np.ndarray
47 | front_right_foot_velocity: np.ndarray
48 | hind_left_foot_position: np.ndarray
49 | hind_right_foot_position: np.ndarray
50 | hind_left_foot_velocity: np.ndarray
51 | hind_right_foot_velocity: np.ndarray
52 |
53 |
54 | class BarkourClient(task_clients.AgentApiTaskClient):
55 | """Client for controlling MJPC when it's running the Barkour task."""
56 |
57 | def __init__(
58 | self,
59 | ui: bool = False,
60 | agent: Optional[agent_lib.Agent] = None,
61 | test_params: Sequence[str] = (), # Do not use, test only.
62 | ):
63 | agent = agent or task_clients.create_agent(
64 | task_id='Barkour',
65 | ui=ui,
66 | real_time_speed=0.4,
67 | )
68 | model = agent.model or mujoco.MjModel.from_xml_path(_MODEL_PATH)
69 |
70 | super().__init__(
71 | agent=agent,
72 | model=model,
73 | # normally, we run at 25% real time, and planning time is 50 to 80ms,
74 | # so 13ms planning delay should be fine, but for some reason performance
75 | # is not so good in this case, so we set a planning duration such that
76 | # planning happens every step.
77 | # TODO(nimrod): change this to 13ms.
78 | planning_duration=0.005,
79 | )
80 |
81 | def reset(self):
82 | super().reset()
83 | self._reset_reward()
84 |
85 | def get_state(self) -> BarkourState:
86 | self.update_state()
87 | return BarkourState(
88 | torso_position=np.array(self._data.sensor('torso_subtreecom').data),
89 | torso_orientation=np.array(self._data.sensor('torso_subtreequat').data),
90 | torso_linear_velocity=np.array(
91 | self._data.sensor('torso_subtreelinvel').data
92 | ),
93 | torso_angular_velocity=np.array(self._data.sensor('torso_angvel').data),
94 | front_left_foot_position=np.array(self._data.sensor('FLpos').data),
95 | hind_left_foot_position=np.array(self._data.sensor('RLpos').data),
96 | front_right_foot_position=np.array(self._data.sensor('FRpos').data),
97 | hind_right_foot_position=np.array(self._data.sensor('RRpos').data),
98 | front_left_foot_velocity=np.array(self._data.sensor('FLvel').data),
99 | hind_left_foot_velocity=np.array(self._data.sensor('RLvel').data),
100 | front_right_foot_velocity=np.array(self._data.sensor('FRvel').data),
101 | hind_right_foot_velocity=np.array(self._data.sensor('RRvel').data),
102 | )
103 |
104 | def _reset_reward(self):
105 | weights, params = barkour_l2r_tasks.defaults()
106 | self._agent.set_task_parameters(params)
107 | self._agent.set_cost_weights(weights)
108 |
--------------------------------------------------------------------------------
/src/language_to_reward_2023/platforms/barkour/barkour_l2r_task_client_test.py:
--------------------------------------------------------------------------------
1 | # Copyright 2023 DeepMind Technologies Limited
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 |
16 | """Tests for Barkour task client."""
17 |
18 | from absl.testing import absltest
19 |
20 | from language_to_reward_2023.platforms.barkour import barkour_l2r_task_client
21 |
22 |
23 | class BarkourTaskClientTest(absltest.TestCase):
24 |
25 | def setUp(self):
26 | super().setUp()
27 | self.client = barkour_l2r_task_client.BarkourClient()
28 |
29 | def tearDown(self):
30 | self.client.close()
31 | super().tearDown()
32 |
33 | def test_creates_agent(self):
34 | self.assertIsNotNone(self.client.agent())
35 |
36 |
37 | if __name__ == '__main__':
38 | absltest.main()
39 |
--------------------------------------------------------------------------------
/src/language_to_reward_2023/platforms/barkour/barkour_l2r_tasks.py:
--------------------------------------------------------------------------------
1 | # Copyright 2023 DeepMind Technologies Limited
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 |
16 | """Utilities for generating task parameters for the Barkour MJPC task."""
17 |
18 | from typing import Literal
19 |
20 | from language_to_reward_2023.platforms.barkour import default_task_parameters
21 |
22 | FootName = Literal[
23 | 'front_left',
24 | 'rear_left',
25 | 'front_right',
26 | 'rear_right',
27 | 'back_left',
28 | 'back_right',
29 | ]
30 |
31 |
32 | def defaults() -> tuple[dict[str, float], dict[str, float]]:
33 | """Default cost weights and task parameters for the Barkour MJPC task."""
34 | weights, params = default_task_parameters.default_parameters_and_weights()
35 | return weights, params
36 |
37 |
38 | def set_torso_targets(
39 | cost_weights: dict[str, float],
40 | task_parameters: dict[str, float],
41 | target_torso_height: float | None = None,
42 | target_torso_pitch: float | None = None,
43 | target_torso_roll: float | None = None,
44 | target_torso_location_xy: tuple[float, float] | None = None,
45 | target_torso_velocity_xy: tuple[float, float] | None = None,
46 | target_torso_heading: float | None = None,
47 | target_turning_speed: float | None = None,
48 | ):
49 | """Updates cost_weights and task_parameters to specify torso targets.
50 |
51 | Arguments:
52 | cost_weights: a dict of weight values which will be mutated.
53 | task_parameters: a dict of task parameters which will be mutated.
54 | target_torso_height: how high the torso should be in meters.
55 | target_torso_pitch: pitch from a horizontal pose in radians. A positive
56 | number means robot is looking up.
57 | target_torso_roll: roll angle in radians, in clockwise direction
58 | target_torso_location_xy: a tuple containing target global coordinates in
59 | meters.
60 | target_torso_velocity_xy: target velocity in robot coordinates, where x is
61 | forward and y is right.
62 | target_torso_heading: desired global heading direction in radians.
63 | target_turning_speed: the desired turning speed of the torso in radians per
64 | second.
65 | """
66 | if target_torso_height is not None:
67 | task_parameters.update({
68 | 'TargetBodyHeight': target_torso_height,
69 | })
70 | cost_weights.update({
71 | 'BodyHeight': 0.5,
72 | })
73 |
74 | if target_torso_pitch is not None:
75 | task_parameters.update({
76 | 'TargetPitch': -target_torso_pitch,
77 | })
78 | cost_weights.update({
79 | 'Pitch': 0.6,
80 | })
81 |
82 | if target_torso_roll is not None:
83 | # Note: In Language2Reward paper, this was added to the current roll of the
84 | # robot.
85 | task_parameters.update({
86 | 'TargetRoll': target_torso_roll,
87 | })
88 | cost_weights.update({
89 | 'Roll': 1.0,
90 | })
91 |
92 | if target_torso_location_xy is not None:
93 | task_parameters.update({
94 | 'GoalPositionX': target_torso_location_xy[0],
95 | 'GoalPositionY': target_torso_location_xy[1],
96 | })
97 | cost_weights.update(
98 | {'BodyXYPos': 0.6, 'Forward_Velocity': 0.0, 'Turning_Velocity': 0.0}
99 | )
100 | else:
101 | cost_weights.update({'BodyXYPos': 0.0})
102 |
103 | if target_torso_velocity_xy is not None:
104 | task_parameters.update({
105 | 'TargetForwardVelocity': target_torso_velocity_xy[0],
106 | 'TargetSidewaysVelocity': target_torso_velocity_xy[1],
107 | })
108 | cost_weights.update({
109 | 'BodyXYPos': 0.0,
110 | 'Forward_Velocity': 0.1,
111 | 'Sideways_Velocity': 0.1,
112 | 'Turning_Velocity': 0.1,
113 | })
114 | else:
115 | cost_weights.update({
116 | 'Forward_Velocity': 0.0,
117 | 'Sideways_Velocity': 0.0,
118 | 'Turning_Velocity': 0.0,
119 | })
120 | if target_torso_heading is not None:
121 | task_parameters.update({'TargetHeading': target_torso_heading})
122 | cost_weights.update({'Heading': 0.6, 'Turning_Velocity': 0.0})
123 | else:
124 | cost_weights.update({'Heading': 0.0})
125 | if target_turning_speed is not None:
126 | task_parameters.update({'TargetTurningVelocity': target_turning_speed})
127 | cost_weights.update({'Heading': 0.0, 'Turning_Velocity': 0.6})
128 | else:
129 | cost_weights.update({'Turning_Velocity': 0.0})
130 |
131 |
132 | def set_foot_pos_parameters(
133 | cost_weights: dict[str, float],
134 | task_parameters: dict[str, float],
135 | foot_name: FootName,
136 | lift_height: float | None = None,
137 | extend_forward: float | None = None,
138 | move_inward: float | None = None,
139 | ):
140 | """Updates cost_weights and task_parameters to set the position of a foot."""
141 | foot_code_name = _FOOT_TO_CODENAME[foot_name]
142 | if lift_height is not None:
143 | balance = lift_height <= 0.0
144 | task_parameters.update({
145 | f'{foot_code_name}ZDistFromNominal': lift_height,
146 | f'{foot_code_name}Balance': balance,
147 | })
148 | cost_weights.update({f'{foot_code_name}_upward': 1.0})
149 |
150 | if extend_forward is not None:
151 | task_parameters.update(
152 | {f'{foot_code_name}XDistFromNominal': extend_forward}
153 | )
154 | cost_weights.update({f'{foot_code_name}_forward': 0.5})
155 |
156 | if move_inward is not None:
157 | task_parameters.update({f'{foot_code_name}YDistFromNominal': -move_inward})
158 | cost_weights.update({f'{foot_code_name}_inward': 0.5})
159 |
160 |
161 | def set_foot_stepping_parameters(
162 | cost_weights: dict[str, float],
163 | task_parameters: dict[str, float],
164 | foot_name: FootName,
165 | stepping_frequency: float,
166 | air_ratio: float,
167 | phase_offset: float,
168 | swing_up_down: float,
169 | swing_forward_back: float,
170 | should_activate: bool,
171 | ):
172 | """Updates cost_weights and task_parameters to set gait parameters."""
173 | foot_code_name = _FOOT_TO_CODENAME[foot_name]
174 | if should_activate:
175 | task_parameters.update({
176 | f'{foot_code_name}SteppingFrequency': stepping_frequency,
177 | f'{foot_code_name}GroundToAirRatio': 1 - air_ratio,
178 | f'{foot_code_name}PhaseOffset': phase_offset,
179 | f'{foot_code_name}AmplitudesVertical': swing_up_down,
180 | f'{foot_code_name}AmplitudesForward': swing_forward_back,
181 | })
182 | cost_weights.update({
183 | f'{foot_code_name}_stepping_vertical': 1.0,
184 | f'{foot_code_name}_stepping_horizontal': 0.2,
185 | })
186 | else:
187 | cost_weights.update({
188 | f'{foot_code_name}_stepping_vertical': 0.0,
189 | f'{foot_code_name}_stepping_horizontal': 0.0,
190 | })
191 |
192 |
193 | def set_target_joint_angles(
194 | cost_weights: dict[str, float],
195 | task_parameters: dict[str, float],
196 | leg_name: FootName,
197 | target_joint_angles,
198 | ):
199 | leg_id = _FOOT_TO_ID[leg_name]
200 | for i in range(3):
201 | task_parameters.update({f'q{leg_id*3+i+1}': target_joint_angles[i]})
202 | cost_weights.update({'PoseTracking': 1.0})
203 |
204 |
205 | def head_towards(
206 | cost_weights: dict[str, float],
207 | task_parameters: dict[str, float],
208 | heading_direction: float,
209 | ):
210 | """Sets a global heading direction."""
211 | task_parameters['TargetHeading'] = heading_direction
212 | cost_weights['Heading'] = 0.3
213 |
214 |
215 | def walk(
216 | cost_weights: dict[str, float],
217 | task_parameters: dict[str, float],
218 | forward_speed: float,
219 | sideways_speed: float,
220 | turning_speed: float,
221 | ):
222 | """Sets target velocities for the torso."""
223 | task_parameters.update({
224 | 'TargetForwardVelocity': forward_speed,
225 | 'TargetSidewaysVelocity': sideways_speed,
226 | 'TargetTurningVelocity': turning_speed,
227 | })
228 | cost_weights.update({
229 | 'BodyXYPos': 0.0,
230 | 'Forward_Velocity': 0.1,
231 | 'Sideways_Velocity': 0.1,
232 | 'Turning_Velocity': 0.3,
233 | })
234 |
235 |
236 | _FOOT_TO_CODENAME = {
237 | 'front_left': 'FL',
238 | 'rear_left': 'HL',
239 | 'front_right': 'FR',
240 | 'rear_right': 'HR',
241 | 'back_left': 'HL',
242 | 'back_right': 'HR',
243 | }
244 | _FOOT_TO_ID = {
245 | 'front_left': 0,
246 | 'rear_left': 1,
247 | 'front_right': 2,
248 | 'rear_right': 3,
249 | 'back_left': 1,
250 | 'back_right': 3,
251 | }
252 |
--------------------------------------------------------------------------------
/src/language_to_reward_2023/platforms/barkour/barkour_l2r_tasks_test.py:
--------------------------------------------------------------------------------
1 | # Copyright 2023 DeepMind Technologies Limited
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 |
16 | from absl.testing import absltest
17 | from absl.testing import parameterized
18 | from mujoco_mpc import agent as mjpc_agent
19 | import numpy as np
20 |
21 | from language_to_reward_2023.platforms.barkour import barkour_l2r_task_client
22 | from language_to_reward_2023.platforms.barkour import barkour_l2r_tasks
23 |
24 | _AGENT = None
25 |
26 |
27 | def setUpModule():
28 | global _AGENT
29 | _AGENT = barkour_l2r_task_client.BarkourClient().agent()
30 |
31 |
32 | def tearDownModule():
33 | global _AGENT
34 | _AGENT.close()
35 | del _AGENT
36 |
37 |
38 | class BarkourTasksTest(parameterized.TestCase):
39 |
40 | def test_defaults(self):
41 | weights, params = barkour_l2r_tasks.defaults()
42 | self.assertEqual(0, weights["FL_forward"])
43 | self.assertEqual(0, params["GoalPositionX"])
44 | self.assertAlmostEqual(0.292, params["TargetBodyHeight"])
45 |
46 | def test_set_torso_targets_location_xy(self):
47 | """Tests that setting torso targets results in the correct behavior."""
48 | weights, params = barkour_l2r_tasks.defaults()
49 | target = (1.0, 2.0)
50 | barkour_l2r_tasks.set_torso_targets(
51 | weights, params, target_torso_location_xy=target
52 | )
53 | state = self.run_agent(
54 | _AGENT,
55 | weights,
56 | params,
57 | until=lambda state: _distance(state.qpos[:2], target) < 0.2,
58 | )
59 | self.assertLessEqual(_distance(state.qpos[:2], target), 0.2)
60 |
61 | def run_agent(
62 | self,
63 | agent: mjpc_agent.Agent,
64 | weights,
65 | params,
66 | until=lambda x: False,
67 | max_time=4.0,
68 | ):
69 | agent.reset()
70 | agent.set_cost_weights(weights)
71 | agent.set_task_parameters(params)
72 | state = agent.get_state()
73 | while state.time < max_time and not until(state):
74 | agent.planner_step()
75 | for _ in range(10):
76 | agent.step()
77 | state = agent.get_state()
78 | return state
79 |
80 |
81 | def _distance(a, b):
82 | return np.linalg.norm(np.array(a) - np.array(b))
83 |
84 |
85 | if __name__ == "__main__":
86 | absltest.main()
87 |
--------------------------------------------------------------------------------
/src/language_to_reward_2023/platforms/barkour/default_task_parameters.py:
--------------------------------------------------------------------------------
1 | # Copyright 2023 DeepMind Technologies Limited
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 |
16 | """Parameters matching the default values in barkour_modeless.cc."""
17 |
18 |
19 | def default_parameters_and_weights() -> (
20 | tuple[dict[str, float], dict[str, float]]
21 | ):
22 | """Returns parameters matching the default values in barkour_modeless.cc."""
23 | weights = {
24 | "Balance": 0.2,
25 | "BodyHeight": 0.0,
26 | "BodyXYPos": 0.0,
27 | "Effort": 0.0,
28 | "FL_forward": 0.0,
29 | "FL_inward": 0.0,
30 | "FL_stepping_horizontal": 0.0,
31 | "FL_stepping_vertical": 0.0,
32 | "FL_upward": 0.0,
33 | "FR_forward": 0.0,
34 | "FR_inward": 0.0,
35 | "FR_stepping_horizontal": 0.0,
36 | "FR_stepping_vertical": 0.0,
37 | "FR_upward": 0.0,
38 | "Forward_Velocity": 0.0,
39 | "HL_forward": 0.0,
40 | "HL_inward": 0.0,
41 | "HL_stepping_horizontal": 0.0,
42 | "HL_stepping_vertical": 0.0,
43 | "HL_upward": 0.0,
44 | "HR_forward": 0.0,
45 | "HR_inward": 0.0,
46 | "HR_stepping_horizontal": 0.0,
47 | "HR_stepping_vertical": 0.0,
48 | "HR_upward": 0.0,
49 | "Heading": 0.0,
50 | "Pitch": 0.0,
51 | "PoseTracking": 0.0,
52 | "Posture": 0.01,
53 | "Roll": 0.0,
54 | "Roll_Velocity": 0.0,
55 | "Sideways_Velocity": 0.0,
56 | "Turning_Velocity": 0.0,
57 | "Upward_Velocity": 0.0,
58 | "act_dot": 2e-05,
59 | }
60 | params = {
61 | "FLAmplitudesForward": 0.0,
62 | "FLAmplitudesVertical": 0.0,
63 | "FLBalance": 1.0,
64 | "FLGroundToAirRatio": 0.0,
65 | "FLPhaseOffset": 0.0,
66 | "FLSteppingFrequency": 0.0,
67 | "FLXDistFromNominal": 0.0,
68 | "FLYDistFromNominal": 0.0,
69 | "FLZDistFromNominal": 0.0,
70 | "FRAmplitudesForward": 0.0,
71 | "FRAmplitudesVertical": 0.0,
72 | "FRBalance": 1.0,
73 | "FRGroundToAirRatio": 0.0,
74 | "FRPhaseOffset": 0.0,
75 | "FRSteppingFrequency": 0.0,
76 | "FRXDistFromNominal": 0.0,
77 | "FRYDistFromNominal": 0.0,
78 | "FRZDistFromNominal": 0.0,
79 | "GoalPositionX": 0.0,
80 | "GoalPositionY": 0.0,
81 | "HLAmplitudesForward": 0.0,
82 | "HLAmplitudesVertical": 0.0,
83 | "HLBalance": 1.0,
84 | "HLGroundToAirRatio": 0.0,
85 | "HLPhaseOffset": 0.0,
86 | "HLSteppingFrequency": 0.0,
87 | "HLXDistFromNominal": 0.0,
88 | "HLYDistFromNominal": 0.0,
89 | "HLZDistFromNominal": 0.0,
90 | "HRAmplitudesForward": 0.0,
91 | "HRAmplitudesVertical": 0.0,
92 | "HRBalance": 1.0,
93 | "HRGroundToAirRatio": 0.0,
94 | "HRPhaseOffset": 0.0,
95 | "HRSteppingFrequency": 0.0,
96 | "HRXDistFromNominal": 0.0,
97 | "HRYDistFromNominal": 0.0,
98 | "HRZDistFromNominal": 0.0,
99 | "TargetBodyHeight": 0.292,
100 | "TargetForwardVelocity": 0.0,
101 | "TargetHeading": 0.0,
102 | "TargetPitch": 0.0,
103 | "TargetRoll": 0.0,
104 | "TargetRollVelocity": 0.0,
105 | "TargetSidewaysVelocity": 0.0,
106 | "TargetTurningVelocity": 0.0,
107 | "TargetUpwardVelocity": 0.0,
108 | "q1": 0.0,
109 | "q10": 0.0,
110 | "q11": 0.0,
111 | "q12": 0.0,
112 | "q2": 0.0,
113 | "q3": 0.0,
114 | "q4": 0.0,
115 | "q5": 0.0,
116 | "q6": 0.0,
117 | "q7": 0.0,
118 | "q8": 0.0,
119 | "q9": 0.0,
120 | }
121 | return weights, params
122 |
--------------------------------------------------------------------------------
/src/language_to_reward_2023/platforms/barkour/prompts/prompt_coder_only.py:
--------------------------------------------------------------------------------
1 | # Copyright 2023 DeepMind Technologies Limited
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 |
16 | """Prompt class with Reward Coder only."""
17 |
18 | from language_to_reward_2023 import safe_executor
19 | from language_to_reward_2023.platforms import llm_prompt
20 | from language_to_reward_2023.platforms import process_code
21 | from language_to_reward_2023.platforms.barkour import barkour_execution
22 | from language_to_reward_2023.platforms.barkour import barkour_l2r_task_client
23 |
24 | prompt = """
25 | We have a quadruped robot and we can use the following functions to program its behavior:
26 | ```
27 | def set_torso_targets(target_torso_height, target_torso_pitch, target_torso_roll, target_torso_location_xy, target_torso_velocity_xy, target_torso_heading, target_turning_speed)
28 | ```
29 | target_torso_height: how high the torso wants to reach. When the robot is standing on all four feet in a normal standing pose, the torso is about 0.3m high.
30 | target_torso_pitch: How much the torso should tilt up from a horizontal pose in radians. A positive number means robot is looking up, e.g. if the angle is 0.5*pi the robot will be looking upward, if the angel is 0, then robot will be looking forward.
31 | target_torso_roll: How much the torso should roll in clockwise direction in radians. Zero means the torso is flat.
32 | target_torso_velocity_xy: target torso moving velocity in local space, x is forward velocity, y is sideways velocity (positive means left).
33 | target_torso_heading: the desired direction that the robot should face towards. The value of target_torso_heading is in the range of 0 to 2*pi, where 0 and 2*pi both mean East, pi being West, etc.
34 | Remember:
35 | one of target_torso_location_xy and target_torso_velocity_xy must be None.
36 | one of target_torso_heading and target_turning_speed must be None.
37 |
38 | ```
39 | def set_foot_pos_parameters(foot_name, lift_height, extend_forward, move_inward)
40 | ```
41 | foot_name is one of ('front_left', 'back_left', 'front_right', 'back_right').
42 | lift_height: how high should the foot be lifted in the air. If is None, disable this term. If it's set to 0, the foot will touch the ground.
43 | extend_forward: how much should the foot extend forward. If is None, disable this term.
44 | move_inward: how much should the foot move inward. If is None, disable this term.
45 |
46 | ```
47 | def execute_plan(plan_duration=10)
48 | ```
49 | This function sends the parameters to the robot and execute the plan for `plan_duration` seconds, default to be 2
50 |
51 | Below are examples for generating a reward function in python for the robot given a desired task.
52 |
53 | Example 1: Make the robot go to (1, 0), facing South, and lift the back right foot.
54 | Code:
55 | set_torso_targets(0.26, 0.0, 0.0, (1, 0), None, 1.5*np.pi, None)
56 | set_foot_pos_parameters('back_right', 0.1, None, None)
57 | execute_plan(3)
58 | Explanation:
59 | The goal location should be (1, 0). South is 1.5*pi so heading should be this.
60 |
61 | Remember:
62 | 1. Always start your response with [start analysis]. Provide your analysis of the problem within 100 words, then end it with [end analysis].
63 | 2. After analysis, start your code response, format the code in code blocks. In your response all four functions above: set_torso_targets, set_foot_pos_parameters, execute_plan, should be called at least once.
64 | 3. Do not invent new functions or classes. The only allowed functions you can call are the ones listed above. Do not leave unimplemented code blocks in your response.
65 | 4. The only allowed library is numpy. Do not import or use any other library. If you use np, be sure to import numpy.
66 | 5. If you are not sure what value to use, just use your best judge. Do not use None for anything.
67 | 6. Do not calculate the position or direction of any object (except for the ones provided above). Just use a number directly based on your best guess.
68 | 7. For set_torso_targets, only the last four arguments (target_torso_location_xy, target_torso_velocity_xy, target_torso_heading, target_turning_speed) can be None. Do not set None for any other arguments.
69 | 8. Don't forget to call execute_plan at the end.
70 |
71 | """
72 |
73 | feet2id = {"front_left": 0, "rear_left": 1, "front_right": 2, "rear_right": 3}
74 |
75 |
76 | class PromptCoder(llm_prompt.LLMPrompt):
77 | """Prompt with Reward Coder only."""
78 |
79 | def __init__(
80 | self,
81 | client: barkour_l2r_task_client.BarkourClient,
82 | executor: safe_executor.SafeExecutor,
83 | ):
84 | self._agent = client.agent()
85 | self._safe_executor = barkour_execution.BarkourSafeExecutor(executor)
86 |
87 | self.name = "Language2Reward"
88 | self.num_llms = 1
89 | self.prompts = [prompt]
90 | self.keep_message_history = [True]
91 | self.response_processors = [self.process_llm_response]
92 | self.code_executor = self.execute_code
93 |
94 | def execute_code(self, code: str) -> None:
95 | print("ABOUT TO EXECUTE")
96 | print(code)
97 |
98 | mjpc_parameters = self._safe_executor.execute(code)
99 | self._agent.set_task_parameters(mjpc_parameters.task_parameters)
100 | self._agent.set_cost_weights(mjpc_parameters.cost_weights)
101 |
102 | def process_llm_response(self, response):
103 | """Process the response from coder, the output will be the python code."""
104 | return process_code.process_code_block(response)
105 |
--------------------------------------------------------------------------------
/src/language_to_reward_2023/platforms/barkour/prompts/prompt_low_level.py:
--------------------------------------------------------------------------------
1 | # Copyright 2023 DeepMind Technologies Limited
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 |
16 | """Prompt for low level baseline."""
17 |
18 | from language_to_reward_2023 import safe_executor
19 | from language_to_reward_2023.platforms import llm_prompt
20 | from language_to_reward_2023.platforms import process_code
21 | from language_to_reward_2023.platforms.barkour import barkour_execution
22 | from language_to_reward_2023.platforms.barkour import barkour_l2r_task_client
23 |
24 | prompt = """
25 | We have a quadruped robot. It has 12 joints in total, three for each leg.
26 | We can use the following functions to control its movements:
27 |
28 | ```
29 | def set_target_joint_angles(leg_name, target_joint_angles)
30 | ```
31 | leg_name is one of ('front_left', 'back_left', 'front_right', 'back_right').
32 | target_joint_angles: a 3D vector that describes the target angle for the abduction/adduction, hip, and knee joint of the each leg.
33 |
34 | ```
35 | def walk(forward_speed, sideways_speed, turning_speed)
36 | ```
37 | forward_speed: how fast the robot should walk forward
38 | sideways_speed: how fast the robot should walk sideways
39 | turning_speed: how fast the robot should be turning (positive means turning right)
40 |
41 | ```
42 | def head_towards(heading_direction)
43 | ```
44 | heading_direction: target heading for the robot to reach, in the range of 0 to 2*pi, where 0 means East, 0.5pi means North, pi means West, and 1.5pi means South.
45 |
46 | ```
47 | def execute_plan(plan_duration=10)
48 | ```
49 | This function sends the parameters to the robot and execute the plan for `plan_duration` seconds, default to be 2
50 |
51 |
52 | Details about joint angles of each leg:
53 | abduction/adduction joint controls the upper leg to swinging inward/outward.
54 | When it's positive, legs will swing outward (swing to the right for right legs and left for left legs).
55 | When it's negative, legs will swing inward.
56 |
57 | hip joint controls the upper leg to rotate around the shoulder.
58 | When it's zero, the upper leg is parallel to the torso (hip is same height as shoulder), pointing backward.
59 | When it's positive, the upper leg rotates downward so the knee is below the shoulder. When it's 0.5pi, it's perpendicular to the torso, pointing downward.
60 | When it's negative, the upper leg rotates upward so the knee is higher than the shoulder.
61 |
62 | knee joint controls the lower leg to rotate around the knee.
63 | When it's zero, the lower leg is folded closer to the upper leg.
64 | knee joint angle can only be positive. When it's 0.5pi, the lower leg is perpendicular to the upper leg. When it's pi, the lower leg is fully streching out and parallel to the upper leg.
65 |
66 | Here are a few examples for setting the joint angles to make the robot reach a few key poses:
67 | standing on all four feet:
68 | ```
69 | set_target_joint_angles("front_left", [0, 1, 1.5])
70 | set_target_joint_angles("back_left", [0, 0.75, 1.5])
71 | set_target_joint_angles("front_right", [0, 1, 1.5])
72 | set_target_joint_angles("back_right", [0, 0.75, 1.5])
73 | execute_plan()
74 | ```
75 |
76 | sit down on the floor:
77 | ```
78 | set_target_joint_angles("front_left", [0, 0, 0])
79 | set_target_joint_angles("back_left", [0, 0, 0])
80 | set_target_joint_angles("front_right", [0, 0, 0])
81 | set_target_joint_angles("back_right", [0, 0, 0])
82 | execute_plan()
83 | ```
84 |
85 | lift front left foot:
86 | ```
87 | set_target_joint_angles("front_left", [0, 0.45, 0.35])
88 | set_target_joint_angles("back_left", [0, 1, 1.5])
89 | set_target_joint_angles("front_right", [0, 1.4, 1.5])
90 | set_target_joint_angles("back_right", [0, 1, 1.5])
91 | execute_plan()
92 | ```
93 |
94 | lift back left foot:
95 | ```
96 | set_target_joint_angles("front_left", [0, 0.5, 1.5])
97 | set_target_joint_angles("back_left", [0, 0.45, 0.35])
98 | set_target_joint_angles("front_right", [0, 0.5, 1.5])
99 | set_target_joint_angles("back_right", [0, 0.5, 1.5])
100 | execute_plan()
101 | ```
102 |
103 |
104 | Remember:
105 | 1. Always start your response with [start analysis]. Provide your analysis of the problem within 100 words, then end it with [end analysis].
106 | 2. After analysis, start your code response, format the code in code blocks.
107 | 3. Do not invent new functions or classes. The only allowed functions you can call are the ones listed above. Do not leave unimplemented code blocks in your response.
108 | 4. The only allowed library is numpy. Do not import or use any other library. If you use np, be sure to import numpy.
109 | 5. If you are not sure what value to use, just use your best judge. Do not use None for anything.
110 | 6. Do not calculate the position or direction of any object (except for the ones provided above). Just use a number directly based on your best guess.
111 | 7. Write the code as concisely as possible and try not to define additional variables.
112 | 8. If you define a new function for the skill, be sure to call it somewhere.
113 | 9. Be sure to call execute_plan at the end.
114 |
115 | """
116 |
117 | feet2id = {"front_left": 0, "rear_left": 1, "front_right": 2, "rear_right": 3}
118 |
119 |
120 | class PromptLowLevel(llm_prompt.LLMPrompt):
121 | """Prompt for low level baseline."""
122 |
123 | def __init__(
124 | self,
125 | client: barkour_l2r_task_client.BarkourClient,
126 | executor: safe_executor.SafeExecutor,
127 | ):
128 | self._agent = client.agent()
129 | self._safe_executor = barkour_execution.BarkourSafeExecutor(executor)
130 |
131 | self.name = "Language2Reward low level baseline"
132 | self.num_llms = 1
133 | self.prompts = [prompt]
134 | self.keep_message_history = [True]
135 | self.response_processors = [self.process_llm_response]
136 | self.code_executor = self.execute_code
137 |
138 | def execute_code(self, code: str) -> None:
139 | print("ABOUT TO EXECUTE")
140 | print(code)
141 |
142 | mjpc_parameters = self._safe_executor.execute(code)
143 | self._agent.set_task_parameters(mjpc_parameters.task_parameters)
144 | self._agent.set_cost_weights(mjpc_parameters.cost_weights)
145 |
146 | def process_llm_response(self, response):
147 | """Process the response from LLM, the output will be the python code."""
148 | return process_code.process_code_block(response)
149 |
--------------------------------------------------------------------------------
/src/language_to_reward_2023/platforms/barkour/prompts/prompt_thinker_coder.py:
--------------------------------------------------------------------------------
1 | # Copyright 2023 DeepMind Technologies Limited
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 |
16 | """Prompt class with both Motion Descriptor and Reward Coder."""
17 | import re
18 |
19 | from language_to_reward_2023 import safe_executor
20 | from language_to_reward_2023.platforms import llm_prompt
21 | from language_to_reward_2023.platforms import process_code
22 | from language_to_reward_2023.platforms.barkour import barkour_execution
23 | from language_to_reward_2023.platforms.barkour import barkour_l2r_task_client
24 |
25 | prompt_thinker = """
26 | Describe the motion of a dog robot using the following form:
27 |
28 | [start of description]
29 | * This {CHOICE: [is, is not]} a new task.
30 | * The torso of the robot should roll by [NUM: 0.0] degrees towards right, the torso should pitch upward at [NUM: 0.0] degrees.
31 | * The height of the robot's CoM or torso center should be at [NUM: 0.3] meters.
32 | * The robot should {CHOICE: [face certain direction, turn at certain speed]}. If facing certain direction, it should be facing {CHOICE: [east, south, north, west]}. If turning, it should turn at [NUM: 0.0] degrees/s.
33 | * The robot should {CHOICE: [go to a certain location, move at certain speed]}. If going to certain location, it should go to (x=[NUM: 0.0], y=[NUM: 0.0]). If moving at certain speed, it should move forward at [NUM: 0.0]m/s and sideways at [NUM: 0.0]m/s (positive means left).
34 | * [optional] front_left foot lifted to [NUM: 0.0] meters high.
35 | * [optional] back_left foot lifted to [NUM: 0.0] meters high.
36 | * [optional] front_right foot lifted to [NUM: 0.0] meters high.
37 | * [optional] back_right foot lifted to [NUM: 0.0] meters high.
38 | * [optional] front_left foot extend forward by [NUM: 0.0] meters.
39 | * [optional] back_left foot extend forward by [NUM: 0.0] meters.
40 | * [optional] front_right foot extend forward by [NUM: 0.0] meters.
41 | * [optional] back_right foot extend forward by [NUM: 0.0] meters.
42 | * [optional] front_left foot shifts inward laterally by [NUM: 0.0] meters.
43 | * [optional] back_left foot shifts inward laterally by [NUM: 0.0] meters.
44 | * [optional] front_right foot shifts inward laterally by [NUM: 0.0] meters.
45 | * [optional] back_right foot shifts inward laterally by [NUM: 0.0] meters.
46 | * [optional] front_left foot steps on the ground at a frequency of [NUM: 0.0] Hz, during the stepping motion, the foot will move [NUM: 0.0] meters up and down, and [NUM: 0.0] meters forward and back, drawing a circle as if it's walking {CHOICE: forward, back}, spending [NUM: 0.0] portion of the time in the air vs gait cycle.
47 | * [optional] back_left foot steps on the ground at a frequency of [NUM: 0.0] Hz, during the stepping motion, the foot will move [NUM: 0.0] meters up and down, and [NUM: 0.0] meters forward and back, drawing a circle as if it's walking {CHOICE: forward, back}, spending [NUM: 0.0] portion of the time in the air vs gait cycle.
48 | * [optional] front_right foot steps on the ground at a frequency of [NUM: 0.0] Hz, during the stepping motion, the foot will move [NUM: 0.0] meters up and down, and [NUM: 0.0] meters forward and back, drawing a circle as if it's walking {CHOICE: forward, back}, spending [NUM: 0.0] portion of the time in the air vs gait cycle.
49 | * [optional] back_right foot steps on the ground at a frequency of [NUM: 0.0] Hz, during the stepping motion, the foot will move [NUM: 0.0] meters up and down, and [NUM: 0.0] meters forward and back, drawing a circle as if it's walking {CHOICE: forward, back}, spending [NUM: 0.0] portion of the time in the air vs gait cycle.
50 | * [optional] The phase offsets for the four legs should be front_left: [NUM: 0.0], back_left: [NUM: 0.0], front_right: [NUM: 0.0], back_right: [NUM: 0.0].
51 |
52 | [end of description]
53 |
54 | Rules:
55 | 1. If you see phrases like [NUM: default_value], replace the entire phrase with a numerical value. If you see [PNUM: default_value], replace it with a positive, non-zero numerical value.
56 | 2. If you see phrases like {CHOICE: [choice1, choice2, ...]}, it means you should replace the entire phrase with one of the choices listed. Be sure to replace all of them. If you are not sure about the value, just use your best judgement.
57 | 3. Phase offset is between [0, 1]. So if two legs' phase offset differs by 0 or 1 they are moving in synchronous. If they have phase offset difference of 0.5, they are moving opposite in the gait cycle.
58 | 4. The portion of air vs the gait cycle is between [0, 1]. So if it's 0, it means the foot will always stay on the ground, and if it's 1 it means the foot will always be in the air.
59 | 5. I will tell you a behavior/skill/task that I want the quadruped to perform and you will provide the full description of the quadruped motion, even if you may only need to change a few lines. Always start the description with [start of description] and end it with [end of description].
60 | 6. We can assume that the robot has a good low-level controller that maintains balance and stability as long as it's in a reasonable pose.
61 | 7. You can assume that the robot is capable of doing anything, even for the most challenging task.
62 | 8. The robot is about 0.3m high in CoM or torso center when it's standing on all four feet with horizontal body. It's about 0.65m high when it stand upright on two feet with vertical body. When the robot's torso/body is flat and parallel to the ground, the pitch and roll angles are both 0.
63 | 9. Holding a foot 0.0m in the air is the same as saying it should maintain contact with the ground.
64 | 10. Do not add additional descriptions not shown above. Only use the bullet points given in the template.
65 | 11. If a bullet point is marked [optional], do NOT add it unless it's absolutely needed.
66 | 12. Use as few bullet points as possible. Be concise.
67 |
68 | """
69 |
70 | prompt_coder = """
71 | We have a description of a robot's motion and we want you to turn that into the corresponding program with following functions:
72 | ```
73 | def set_torso_targets(target_torso_height, target_torso_pitch, target_torso_roll, target_torso_location_xy, target_torso_velocity_xy, target_torso_heading, target_turning_speed)
74 | ```
75 | target_torso_height: how high the torso wants to reach. When the robot is standing on all four feet in a normal standing pose, the torso is about 0.3m high.
76 | target_torso_pitch: How much the torso should tilt up from a horizontal pose in radians. A positive number means robot is looking up, e.g. if the angle is 0.5*pi the robot will be looking upward, if the angel is 0, then robot will be looking forward.
77 | target_torso_velocity_xy: target torso moving velocity in local space, x is forward velocity, y is sideways velocity (positive means left).
78 | target_torso_heading: the desired direction that the robot should face towards. The value of target_torso_heading is in the range of 0 to 2*pi, where 0 and 2*pi both mean East, pi being West, etc.
79 | target_turning_speed: the desired turning speed of the torso in radians per second.
80 | Remember:
81 | one of target_torso_location_xy and target_torso_velocity_xy must be None.
82 | one of target_torso_heading and target_turning_speed must be None.
83 | No other inputs can be None.
84 |
85 | ```
86 | def set_foot_pos_parameters(foot_name, lift_height, extend_forward, move_inward)
87 | ```
88 | foot_name is one of ('front_left', 'back_left', 'front_right', 'back_right').
89 | lift_height: how high should the foot be lifted in the air. If is None, disable this term. If it's set to 0, the foot will touch the ground.
90 | extend_forward: how much should the foot extend forward. If is None, disable this term.
91 | move_inward: how much should the foot move inward. If is None, disable this term.
92 |
93 | ```
94 | def set_foot_stepping_parameters(foot_name, stepping_frequency, air_ratio, phase_offset, swing_up_down, swing_forward_back, should_activate)
95 | ```
96 | foot_name is one of ('front_left', 'rear_left', 'front_right', 'rear_right').
97 | air_ratio (value from 0 to 1) describes how much time the foot spends in the air versus the whole gait cycle. If it's 0 the foot will always stay on ground, and if it's 1 it'll always stay in the air.
98 | phase_offset (value from 0 to 1) describes how the timing of the stepping motion differs between
99 | different feet. For example, if the phase_offset between two legs differs by 0.5, it means
100 | one leg will start the stepping motion in the middle of the stepping motion cycle of the other leg.
101 | swing_up_down is how much the foot swings vertical during the motion cycle.
102 | swing_forward_back is how much the foot swings horizontally during the motion cycle.
103 | If swing_forward_back is positive, the foot would look like it's going forward, if it's negative, the foot will look like it's going backward.
104 | If should_activate is False, the leg will not follow the stepping motion.
105 |
106 | ```
107 | def execute_plan(plan_duration=2)
108 | ```
109 | This function sends the parameters to the robot and execute the plan for `plan_duration` seconds, default to be 2
110 |
111 | Example answer code:
112 | ```
113 | import numpy as np # import numpy because we are using it below
114 |
115 | reset_reward() # This is a new task so reset reward; otherwise we don't need it
116 | set_torso_targets(0.1, np.deg2rad(5), np.deg2rad(15), (2, 3), None, None, np.deg2rad(10))
117 |
118 | set_foot_pos_parameters('front_left', 0.1, 0.1, None)
119 | set_foot_pos_parameters('back_left', None, None, 0.15)
120 | set_foot_pos_parameters('front_right', None, None, None)
121 | set_foot_pos_parameters('back_right', 0.0, 0.0, None)
122 | set_foot_stepping_parameters('front_right', 2.0, 0.5, 0.2, 0.1, -0.05, True)
123 | set_foot_stepping_parameters('back_left', 3.0, 0.7, 0.1, 0.1, 0.05, True)
124 | set_foot_stepping_parameters('front_left', 0.0, 0.0, 0.0, 0.0, 0.0, False)
125 | set_foot_stepping_parameters('back_right', 0.0, 0.0, 0.0, 0.0, 0.0, False)
126 |
127 | execute_plan(4)
128 | ```
129 |
130 | Remember:
131 | 1. Always format the code in code blocks. In your response all four functions above: set_torso_targets, set_foot_pos_parameters, execute_plan, should be called at least once.
132 | 2. Do not invent new functions or classes. The only allowed functions you can call are the ones listed above. Do not leave unimplemented code blocks in your response.
133 | 3. The only allowed library is numpy. Do not import or use any other library. If you use np, be sure to import numpy.
134 | 4. If you are not sure what value to use, just use your best judge. Do not use None for anything.
135 | 5. Do not calculate the position or direction of any object (except for the ones provided above). Just use a number directly based on your best guess.
136 | 6. For set_torso_targets, only the last four arguments (target_torso_location_xy, target_torso_velocity_xy, target_torso_heading, target_turning_speed) can be None. Do not set None for any other arguments.
137 | 7. Don't forget to call execute_plan at the end.
138 |
139 | """
140 |
141 |
142 | class PromptThinkerCoder(llm_prompt.LLMPrompt):
143 | """Prompt with both Motion Descriptor and Reward Coder."""
144 |
145 | def __init__(
146 | self,
147 | client: barkour_l2r_task_client.BarkourClient,
148 | executor: safe_executor.SafeExecutor,
149 | ):
150 | self._agent = client.agent()
151 | self._safe_executor = barkour_execution.BarkourSafeExecutor(executor)
152 |
153 | self.name = "Language2StructuredLang2Reward"
154 |
155 | self.num_llms = 2
156 | self.prompts = [prompt_thinker, prompt_coder]
157 |
158 | # The coder doesn't need to keep the history as it only serves a purpose for
159 | # translating to code
160 | self.keep_message_history = [True, False]
161 | self.response_processors = [
162 | self.process_thinker_response,
163 | self.process_coder_response,
164 | ]
165 | self.code_executor = self.execute_code
166 |
167 | # process the response from thinker, the output will be used as input to coder
168 | def process_thinker_response(self, response: str) -> str:
169 | try:
170 | motion_description = (
171 | re.split(
172 | "end of description",
173 | re.split("start of description", response, flags=re.IGNORECASE)[
174 | 1
175 | ],
176 | flags=re.IGNORECASE,
177 | )[0]
178 | .strip("[")
179 | .strip("]")
180 | .strip()
181 | .strip("```")
182 | )
183 | return motion_description
184 | except Exception as _: # pylint: disable=broad-exception-caught
185 | return response
186 |
187 | def process_coder_response(self, response):
188 | """Process the response from coder, the output will be the python code."""
189 | return process_code.process_code_block(response)
190 |
191 | def execute_code(self, code: str) -> None:
192 | print("ABOUT TO EXECUTE\n", code)
193 | mjpc_parameters = self._safe_executor.execute(code)
194 | self._agent.set_task_parameters(mjpc_parameters.task_parameters)
195 | self._agent.set_cost_weights(mjpc_parameters.cost_weights)
196 |
--------------------------------------------------------------------------------
/src/language_to_reward_2023/platforms/llm_prompt.py:
--------------------------------------------------------------------------------
1 | # Copyright 2023 DeepMind Technologies Limited
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 |
16 | """Abstract class for LLM Prompts."""
17 | import dataclasses
18 | from typing import Any, Optional, Sequence
19 |
20 |
21 | @dataclasses.dataclass
22 | class LLMPrompt:
23 | name: str = "TaskEvaluator"
24 | num_llms: int = 0
25 | prompts: Sequence[str] = dataclasses.field(default_factory=list)
26 | keep_message_history: Sequence[bool] = dataclasses.field(default_factory=list)
27 | response_processors: Sequence[Any] = dataclasses.field(default_factory=list)
28 | code_executor: Optional[Any] = None
29 |
--------------------------------------------------------------------------------
/src/language_to_reward_2023/platforms/process_code.py:
--------------------------------------------------------------------------------
1 | # Copyright 2023 DeepMind Technologies Limited
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 |
16 | """Utilities for processing LLM-generated code into executable code."""
17 |
18 | import re
19 |
20 |
21 | def _fix_code(code_str: str) -> str:
22 | """Fixes common mistakes in LLM-generated code."""
23 | if "np." in code_str and "import numpy as np" not in code_str:
24 | code_str = "import numpy as np\n" + code_str
25 |
26 | return code_str
27 |
28 |
29 | def process_code_block(text: str) -> str:
30 | """Extracts code blocks from the input string.
31 |
32 | Arguments:
33 | text: A string which may contain markdown code blocks.
34 |
35 | Returns:
36 | Code concatenated from the markdown code blocks, with some code fixes
37 | applied. The code is untrusted and should only be executed in a sandbox.
38 | """
39 | code_block_regex = r"```(python)?\n?([\s\S]*?)```"
40 | matches = re.findall(code_block_regex, text)
41 | if matches:
42 | code = "\n".join([m[1] for m in matches])
43 | else:
44 | # If there are no code block markers, assume the whole text is one code
45 | # block.
46 | code = text
47 | return _fix_code(code)
48 |
--------------------------------------------------------------------------------
/src/language_to_reward_2023/platforms/process_code_test.py:
--------------------------------------------------------------------------------
1 | # Copyright 2023 DeepMind Technologies Limited
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 |
16 | from absl.testing import absltest
17 | from language_to_reward_2023.platforms import process_code
18 |
19 |
20 | class ProcessCodeTest(absltest.TestCase):
21 |
22 | def test_extract_code_block(self):
23 | block = process_code.process_code_block("""
24 | ```python
25 | banana
26 | ```
27 | """)
28 | self.assertEqual(block.strip(), "banana")
29 |
30 | def test_extract_multiple_code_blocks(self):
31 | block = process_code.process_code_block("""
32 | ```python
33 | banana
34 | ```
35 |
36 | ```
37 | apple
38 | ```
39 | """)
40 | self.assertEqual(block, "banana\n\napple\n")
41 |
42 | def test_add_numpy(self):
43 | block = process_code.process_code_block("""
44 | ```python
45 | np.zeros(3)
46 | ```
47 | """)
48 | self.assertEqual(block.strip(), "import numpy as np\nnp.zeros(3)")
49 |
50 | def test_no_block_markers(self):
51 | block = process_code.process_code_block("""np.zeros(3)
52 | """)
53 | self.assertEqual(block.strip(), "import numpy as np\nnp.zeros(3)")
54 |
55 |
56 | if __name__ == "__main__":
57 | absltest.main()
58 |
--------------------------------------------------------------------------------
/src/language_to_reward_2023/platforms/task_evaluator.py:
--------------------------------------------------------------------------------
1 | # Copyright 2023 DeepMind Technologies Limited
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 |
16 | """Abstract base class for task evaluators."""
17 | import dataclasses
18 | from typing import Any, Sequence
19 |
20 |
21 | @dataclasses.dataclass
22 | class TaskEvaluator:
23 | name: str = "TaskEvaluator"
24 | evaluators: Sequence[Any] = dataclasses.field(default_factory=list)
25 | queries: Sequence[Any] = dataclasses.field(default_factory=list)
26 |
--------------------------------------------------------------------------------
/src/language_to_reward_2023/safe_executor.py:
--------------------------------------------------------------------------------
1 | # Copyright 2023 DeepMind Technologies Limited
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 |
16 | """Base class for executing untrusted code."""
17 |
18 | from typing import Protocol
19 |
20 |
21 | class SafeExecutor(Protocol):
22 | """Base class for executors that run untrusted code and produce their output."""
23 |
24 | def safe_execute(self, code: str) -> str:
25 | """Executes the given Python code and returns the standard output from it.
26 |
27 | Arguments:
28 | code: code which edits the weights and params dicts.
29 |
30 | Raises:
31 | ValueError: the code doesn't compile or failed to run.
32 |
33 | Returns:
34 | The standard output from the executed code.
35 | """
36 |
--------------------------------------------------------------------------------
/src/language_to_reward_2023/task_clients.py:
--------------------------------------------------------------------------------
1 | # Copyright 2023 DeepMind Technologies Limited
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 |
16 | """Classes that manage resources related to MJPC tasks."""
17 |
18 | import abc
19 | import pathlib
20 | from typing import Any, Callable, Mapping, Optional, Sequence
21 |
22 | import mujoco
23 | from mujoco_mpc import agent as agent_lib
24 | import numpy as np
25 |
26 | from mujoco_mpc.proto import agent_pb2
27 |
28 |
29 | DEFAULT_UI_SERVER_PATH = str(
30 | pathlib.Path(__file__).parent / "mjpc" / "l2r_ui_server"
31 | )
32 | DEFAULT_HEADLESS_SERVER_PATH = str(
33 | pathlib.Path(__file__).parent / "mjpc" / "l2r_headless_server"
34 | )
35 |
36 |
37 | class TaskClient(abc.ABC):
38 | """Base class for task clients."""
39 |
40 | @abc.abstractmethod
41 | def reset(self):
42 | """Resets the task to a starting position."""
43 |
44 | @abc.abstractmethod
45 | def model(self) -> mujoco.MjModel:
46 | """Returns an MjModel for the task."""
47 |
48 | def close(self):
49 | """Releases any resources associated with the task."""
50 |
51 | def methods(self) -> dict[str, Callable[..., Any]]:
52 | """Returns the set of methods supported by the client."""
53 | return {}
54 |
55 |
56 | def create_agent(
57 | task_id: str,
58 | ui: bool,
59 | real_time_speed: float = 1,
60 | server_binary_path: Optional[str] = None,
61 | subprocess_kwargs: Optional[Mapping[str, Any]] = None,
62 | extra_flags: Optional[Sequence[str]] = None,
63 | ) -> agent_lib.Agent:
64 | """Helper function to create an agent_lib.Agent instance."""
65 | if server_binary_path is None:
66 | if ui:
67 | server_binary_path = DEFAULT_UI_SERVER_PATH
68 | else:
69 | server_binary_path = DEFAULT_HEADLESS_SERVER_PATH
70 |
71 | if ui:
72 | flags = [
73 | '--planner_enabled',
74 | ]
75 | else:
76 | flags = []
77 |
78 | return agent_lib.Agent(
79 | task_id,
80 | server_binary_path=server_binary_path,
81 | real_time_speed=real_time_speed,
82 | extra_flags=flags + (extra_flags or []),
83 | subprocess_kwargs=subprocess_kwargs,
84 | )
85 |
86 |
87 | class AgentApiTaskClient(TaskClient):
88 | """Base class for task clients which starts the Agent Service.
89 |
90 | By default, this will be a headless client, but if ui==True, an interactive
91 | user interface will be run while executing the task. The UI version will
92 | behave differently from the headless version, because planning is async.
93 | """
94 |
95 | def __init__(
96 | self,
97 | agent: agent_lib.Agent,
98 | model: mujoco.MjModel,
99 | planning_duration: float = 0.04,
100 | real_time_speed: float = 1.0,
101 | ):
102 | self._agent = agent
103 | self._model = model
104 | self._data = mujoco.MjData(self._model)
105 |
106 | self._last_planning_time = None
107 | self._planning_duration = planning_duration
108 | self._real_time_speed = real_time_speed
109 | self._recorded = None
110 |
111 | def agent(self) -> agent_lib.Agent:
112 | return self._agent
113 |
114 | def model(self) -> mujoco.MjModel:
115 | return self._model
116 |
117 | def reset(self):
118 | self._agent.reset()
119 | self._last_planning_time = None
120 |
121 | def close(self):
122 | """Releases any resources associated with the task."""
123 | self._agent.close()
124 |
125 | def start_recording(self):
126 | """Start recording simulation states, for later rendering."""
127 | self._recorded = []
128 |
129 | def end_recording(self) -> list[agent_pb2.State] | None:
130 | """Returns a list of states recorded since the call to start_recording."""
131 | recorded = self._recorded
132 | self._recorded = None
133 | return recorded
134 |
135 | def update_state(self):
136 | """Updates self._data with latest state from the agent binary."""
137 | state = self._agent.get_state()
138 | self._data.time = state.time
139 | self._data.qpos = state.qpos
140 | self._data.qvel = state.qvel
141 | self._data.act = state.act
142 | self._data.mocap_pos = np.array(state.mocap_pos).reshape(
143 | self._data.mocap_pos.shape
144 | )
145 | self._data.mocap_quat = np.array(state.mocap_quat).reshape(
146 | self._data.mocap_quat.shape
147 | )
148 | self._data.userdata = np.array(state.userdata).reshape(
149 | self._data.userdata.shape
150 | )
151 | mujoco.mj_forward(self._model, self._data)
152 | if self._recorded is not None:
153 | self._recorded.append(state)
154 |
155 | def execute_plan(self, duration=10):
156 | """Runs MJPC for `duration` seconds while executing planner actions."""
157 | state = self._agent.get_state()
158 | start = state.time
159 | while state.time - start < duration:
160 | if self._last_planning_time is None or (
161 | state.time - self._last_planning_time
162 | >= self._planning_duration * self._real_time_speed
163 | ):
164 | self._last_planning_time = state.time
165 | # repeat planning for 5 times
166 | for _ in range(5):
167 | self._agent.planner_step()
168 | self.update_state()
169 | self._agent.set_state() # needed to call transition()
170 | self._agent.step()
171 | state = self._agent.get_state()
172 |
--------------------------------------------------------------------------------
/src/language_to_reward_2023/task_configs.py:
--------------------------------------------------------------------------------
1 | # Copyright 2023 DeepMind Technologies Limited
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 |
16 | """Config file for evaluation and user interaction scripts."""
17 |
18 | import dataclasses
19 | from typing import Any
20 |
21 | from language_to_reward_2023 import task_clients
22 | from language_to_reward_2023.platforms.barkour import barkour_l2r_task_client
23 | from language_to_reward_2023.platforms.barkour.prompts import prompt_coder_only as bk_prompt_coder_only
24 | from language_to_reward_2023.platforms.barkour.prompts import prompt_low_level as bk_prompt_low_level
25 | from language_to_reward_2023.platforms.barkour.prompts import prompt_thinker_coder as bk_prompt_thinker_coder
26 |
27 |
28 | @dataclasses.dataclass(frozen=True)
29 | class TaskConfig:
30 | client: type[task_clients.TaskClient]
31 | prompts: dict[str, type[Any]]
32 |
33 |
34 | ALL_TASKS = {
35 | 'barkour': TaskConfig(
36 | client=barkour_l2r_task_client.BarkourClient,
37 | prompts={
38 | 'thinker_coder': bk_prompt_thinker_coder.PromptThinkerCoder,
39 | 'coder_only': bk_prompt_coder_only.PromptCoder,
40 | 'low_level': bk_prompt_low_level.PromptLowLevel,
41 | },
42 | ),
43 | }
44 |
--------------------------------------------------------------------------------
/src/language_to_reward_2023/user_interaction.py:
--------------------------------------------------------------------------------
1 | # Copyright 2023 DeepMind Technologies Limited
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 |
16 | """A conversational chat utility for controlling MJPC."""
17 |
18 | from typing import Any, List
19 |
20 | from absl import app
21 | from absl import flags
22 | import colorama
23 | import openai
24 | import termcolor
25 |
26 | from language_to_reward_2023 import confirmation_safe_executor
27 | from language_to_reward_2023 import conversation
28 | from language_to_reward_2023 import task_configs
29 |
30 |
31 | _API_KEY_FLAG = flags.DEFINE_string("api_key", "", "OpenAI API Key")
32 | _TASK_FLAG = flags.DEFINE_enum(
33 | "task", "barkour", list(task_configs.ALL_TASKS), "task to be used"
34 | )
35 | _PROMPT_FLAG = flags.DEFINE_string(
36 | "prompt", "thinker_coder", "prompt to be used"
37 | )
38 | MODEL = "gpt-4"
39 |
40 | colorama.init()
41 |
42 |
43 | def main(argv: List[str]) -> None:
44 | if len(argv) > 1:
45 | raise app.UsageError("Too many command-line arguments.")
46 |
47 | safe_executor = confirmation_safe_executor.ConfirmationSafeExecutor()
48 |
49 | assert _TASK_FLAG.value in task_configs.ALL_TASKS
50 |
51 | openai.api_key = _API_KEY_FLAG.value
52 | task_config = task_configs.ALL_TASKS[_TASK_FLAG.value]
53 | if _PROMPT_FLAG.value not in task_config.prompts:
54 | raise ValueError(
55 | "Invalid value for --prompt. Valid values:"
56 | f" {', '.join(task_config.prompts)}"
57 | )
58 | prompt = task_config.prompts[_PROMPT_FLAG.value]
59 | print(
60 | "Starting MJPC UI"
61 | )
62 | client_class: Any = task_config.client
63 | client = client_class(ui=True)
64 |
65 | try:
66 | # send the grpc channel to the prompt model to create stub
67 | prompt_model = prompt(
68 | client, executor=safe_executor
69 | )
70 | conv = conversation.Conversation(prompt_model, MODEL)
71 | client.reset()
72 |
73 | while True:
74 | user_command = input(termcolor.colored("User: ", "red", attrs=["bold"]))
75 | try:
76 | response = conv.send_command(user_command)
77 | except Exception as e: # pylint: disable=broad-exception-caught
78 | print("Planning failed, try something else... " + str(e) + "\n")
79 | continue
80 |
81 | # Final response should be code
82 | try:
83 | prompt_model.code_executor(response)
84 | except Exception as e: # pylint: disable=broad-exception-caught
85 | print("Execution failed, try something else... " + str(e) + "\n")
86 | finally:
87 | client.close()
88 |
89 |
90 | if __name__ == "__main__":
91 | app.run(main)
92 |
--------------------------------------------------------------------------------