├── .gitignore ├── CMakeLists.txt ├── CONTRIBUTING.md ├── LICENSE ├── README.md ├── cmake └── FindOrFetch.cmake ├── mjpc ├── CMakeLists.txt ├── barkour │ ├── CMakeLists.txt │ ├── barkour.cc │ ├── barkour.h │ ├── barkour_vb.xml.patch │ ├── task_barkour.xml │ └── world.xml ├── headless_server.cc └── ui_server.cc ├── pyproject.toml ├── requirements.txt ├── setup.py └── src └── language_to_reward_2023 ├── confirmation_safe_executor.py ├── conversation.py ├── platforms ├── barkour │ ├── barkour_execution.py │ ├── barkour_execution_test.py │ ├── barkour_l2r_task_client.py │ ├── barkour_l2r_task_client_test.py │ ├── barkour_l2r_tasks.py │ ├── barkour_l2r_tasks_test.py │ ├── default_task_parameters.py │ └── prompts │ │ ├── prompt_coder_only.py │ │ ├── prompt_low_level.py │ │ └── prompt_thinker_coder.py ├── llm_prompt.py ├── process_code.py ├── process_code_test.py └── task_evaluator.py ├── safe_executor.py ├── task_clients.py ├── task_configs.py └── user_interaction.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # Distribution / packaging 7 | .Python 8 | build/ 9 | develop-eggs/ 10 | dist/ 11 | downloads/ 12 | eggs/ 13 | .eggs/ 14 | lib/ 15 | lib64/ 16 | parts/ 17 | sdist/ 18 | var/ 19 | wheels/ 20 | share/python-wheels/ 21 | *.egg-info/ 22 | .installed.cfg 23 | *.egg 24 | MANIFEST 25 | -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.16) 2 | 3 | # INTERPROCEDURAL_OPTIMIZATION is enforced when enabled. 4 | set(CMAKE_POLICY_DEFAULT_CMP0069 NEW) 5 | # Default to GLVND if available. 6 | set(CMAKE_POLICY_DEFAULT_CMP0072 NEW) 7 | 8 | # This line has to appear before 'PROJECT' in order to be able to disable incremental linking 9 | set(MSVC_INCREMENTAL_DEFAULT ON) 10 | 11 | project( 12 | language_to_reward_2023 13 | VERSION 0.1.0 14 | DESCRIPTION "Language to Reward: 2023 paper" 15 | HOMEPAGE_URL "https://github.com/google-deepmind/language_to_reward_2023" 16 | ) 17 | 18 | enable_language(C) 19 | enable_language(CXX) 20 | if(APPLE) 21 | enable_language(OBJC) 22 | enable_language(OBJCXX) 23 | endif() 24 | 25 | list(APPEND CMAKE_MODULE_PATH "${PROJECT_SOURCE_DIR}/cmake") 26 | 27 | include(FindOrFetch) 28 | 29 | find_package(Threads REQUIRED) 30 | 31 | set(BUILD_SHARED_LIBS_OLD ${BUILD_SHARED_LIBS}) 32 | set(BUILD_SHARED_LIBS 33 | OFF 34 | CACHE INTERNAL "Build SHARED libraries" 35 | ) 36 | 37 | set(L2R_MJPC_GIT_TAG 38 | c5c7ead065b7f4034ab265a13023231900dbfaa7 39 | CACHE STRING "Git revision for MuJoCo MPC." 40 | ) 41 | 42 | set(L2R_MENAGERIE_GIT_TAG 43 | main 44 | CACHE STRING "Git revision for MuJoCo Menagerie." 45 | ) 46 | 47 | set(MJPC_BUILD_TESTS OFF) 48 | set(MJPC_GRPC_BUILD_TESTS OFF) 49 | set(MJPC_BUILD_GRPC_SERVICE ON) 50 | set(PYMJPC_BUILD_TESTS OFF) 51 | 52 | findorfetch( 53 | USE_SYSTEM_PACKAGE 54 | OFF 55 | PACKAGE_NAME 56 | mjpc 57 | LIBRARY_NAME 58 | mjpc 59 | GIT_REPO 60 | https://github.com/google-deepmind/mujoco_mpc.git 61 | GIT_TAG 62 | ${L2R_MJPC_GIT_TAG} 63 | TARGETS 64 | libmjpc 65 | mjpc_agent_service 66 | mjpc_ui_agent_service 67 | ) 68 | 69 | set(BUILD_SHARED_LIBS 70 | ${BUILD_SHARED_LIBS_OLD} 71 | CACHE BOOL "Build SHARED libraries" FORCE 72 | ) 73 | unset(BUILD_SHARED_LIBS_OLD) 74 | 75 | FetchContent_Declare( 76 | menagerie 77 | GIT_REPOSITORY https://github.com/google-deepmind/mujoco_menagerie.git 78 | GIT_TAG ${L2R_MENAGERIE_GIT_TAG} 79 | ) 80 | 81 | FetchContent_GetProperties(menagerie) 82 | if(NOT menagerie_POPULATED) 83 | FetchContent_Populate(menagerie) 84 | endif() 85 | 86 | add_subdirectory(mjpc) 87 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # How to Contribute 2 | 3 | ## Contributor License Agreement 4 | 5 | Contributions to this project must be accompanied by a Contributor License 6 | Agreement. You (or your employer) retain the copyright to your contribution, 7 | this simply gives us permission to use and redistribute your contributions as 8 | part of the project. Head over to to see 9 | your current agreements on file or to sign a new one. 10 | 11 | You generally only need to submit a CLA once, so if you've already submitted one 12 | (even if it was for a different project), you probably don't need to do it 13 | again. 14 | 15 | ## Code reviews 16 | 17 | All submissions, including submissions by project members, require review. We 18 | use GitHub pull requests for this purpose. Consult 19 | [GitHub Help](https://help.github.com/articles/about-pull-requests/) for more 20 | information on using pull requests. 21 | 22 | ## Community Guidelines 23 | 24 | This project follows [Google's Open Source Community 25 | Guidelines](https://opensource.google/conduct/). 26 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # language_to_reward_2023 2 | 3 | This repository contains code to reproduce the results in the paper 4 | "Language to Rewards for Robotic Skill Synthesis". 5 | 6 | [Project website](https://language-to-reward.github.io/) 7 | [arxiv paper](https://arxiv.org/abs/2306.08647) 8 | [video](https://www.youtube.com/watch?v=7KiKg0rdSSQ&feature=youtu.be) 9 | 10 | ## Abstract 11 | Large language models (LLMs) have demonstrated exciting progress in acquiring diverse new capabilities through in-context learning, ranging from logical reasoning to code-writing. Robotics researchers have also explored using LLMs to advance the capabilities of robotic control. However, since low-level robot actions are hardware-dependent and underrepresented in LLM training corpora, existing efforts in applying LLMs to robotics have largely treated LLMs as semantic planners or relied on human-engineered control primitives to interface with the robot. On the other hand, reward functions are shown to be flexible representations that can be optimized for control policies to achieve diverse tasks, while their semantic richness makes them suitable to be specified by LLMs. 12 | 13 | In this work, we introduce a new paradigm that harnesses this realization by utilizing LLMs to define reward parameters that can be optimized and accomplish variety of robotic tasks. Using reward as the intermediate interface generated by LLMs, we can effectively bridge the gap between high-level language instructions or corrections to low-level robot actions. Meanwhile, combining this with a real-time optimizer, MuJoCo MPC, empowers an interactive behavior creation experience where users can immediately observe the results and provide feedback to the system. To systematically evaluate the performance of our proposed method, we designed a total of 17 tasks for a simulated quadruped robot and a dexterous manipulator robot. We demonstrate that our proposed method reliably tackles 90% of the designed tasks, while a baseline using primitive skills as the interface with Code-as-policies achieves 50% of the tasks. We further validated our method on a real robot arm where complex manipulation skills such as non-prehensile pushing emerge through our interactive system. 14 | 15 | ## Installation and Usage 16 | 17 | ```sh 18 | # Create a Python venv, make sure to use Python version >= 3.9 19 | python3 --version 20 | python3 -m venv /tmp/l2r 21 | . /tmp/l2r/bin/activate 22 | 23 | # Build and install mujoco_mpc 24 | git clone https://github.com/google-deepmind/mujoco_mpc.git 25 | cd mujoco_mpc 26 | # Latest MJPC commit at the time of release. Using `main` might work too. 27 | git checkout c5c7ead065b7f4034ab265a13023231900dbfaa7 28 | 29 | # Compile mujoco_mpc from source and install, this step can take a few minutes. 30 | pip install ./python 31 | 32 | cd .. 33 | 34 | # Build and install language_to_reward_2023 35 | git clone https://github.com/google-deepmind/language_to_reward_2023.git 36 | cd language_to_reward_2023 37 | 38 | # Compile language_to_reward_2023 from source and install, this step can take a few minutes. 39 | pip install . 40 | 41 | # Run the demo 42 | python -m language_to_reward_2023.user_interaction --api_key= 43 | ``` 44 | 45 | ## Notes 46 | 47 | In the published paper, we showed the robot moon walking. This was done with an older version of the MuJoCo model, which had unrealistically strong actuators. In the model used in this repository, moonwalking is not possible, regretfully. 48 | 49 | 50 | ## Citing this work 51 | 52 | ``` 53 | @article{yu2023language, 54 | title={Language to Rewards for Robotic Skill Synthesis}, 55 | author={Yu, Wenhao and Gileadi, Nimrod and Fu, Chuyuan and Kirmani, Sean and Lee, Kuang-Huei and Gonzalez Arenas, Montse and Lewis Chiang, Hao-Tien and Erez, Tom and Hasenclever, Leonard and Humplik, Jan and Ichter, Brian and Xiao, Ted and Xu, Peng and Zeng, Andy and Zhang, Tingnan and Heess, Nicolas and Sadigh, Dorsa and Tan, Jie and Tassa, Yuval and Xia, Fei}, 56 | year={2023}, 57 | journal={Conference of Robot Learning 2023}, 58 | } 59 | ``` 60 | 61 | ## License and disclaimer 62 | 63 | Copyright 2023 DeepMind Technologies Limited 64 | 65 | All software is licensed under the Apache License, Version 2.0 (Apache 2.0); 66 | you may not use this file except in compliance with the Apache 2.0 license. 67 | You may obtain a copy of the Apache 2.0 license at: 68 | https://www.apache.org/licenses/LICENSE-2.0 69 | 70 | All other materials are licensed under the Creative Commons Attribution 4.0 71 | International License (CC-BY). You may obtain a copy of the CC-BY license at: 72 | https://creativecommons.org/licenses/by/4.0/legalcode 73 | 74 | Unless required by applicable law or agreed to in writing, all software and 75 | materials distributed here under the Apache 2.0 or CC-BY licenses are 76 | distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 77 | either express or implied. See the licenses for the specific language governing 78 | permissions and limitations under those licenses. 79 | 80 | This is not an official Google product. 81 | -------------------------------------------------------------------------------- /cmake/FindOrFetch.cmake: -------------------------------------------------------------------------------- 1 | #.rst: 2 | # FindOrFetch 3 | # ---------------------- 4 | # 5 | # Find or fetch a package in order to satisfy target dependencies. 6 | # 7 | # FindOrFetch([USE_SYSTEM_PACKAGE [ON/OFF]] 8 | # [PACKAGE_NAME [name]] 9 | # [LIBRARY_NAME [name]] 10 | # [GIT_REPO [repo]] 11 | # [GIT_TAG [tag]] 12 | # [PATCH_COMMAND [cmd] [args]] 13 | # [TARGETS [targets]] 14 | # [EXCLUDE_FROM_ALL]) 15 | # 16 | # The command has the following parameters: 17 | # 18 | # Arguments: 19 | # - ``USE_SYSTEM_PACKAGE`` one-value argument on whether to search for the 20 | # package in the system (ON) or whether to fetch the library using 21 | # FetchContent from the specified Git repository (OFF). Note that 22 | # FetchContent variables will override this behaviour. 23 | # - ``PACKAGE_NAME`` name of the system-package. Ignored if 24 | # ``USE_SYSTEM_PACKAGE`` is ``OFF``. 25 | # - ``LIBRARY_NAME`` name of the library. Ignored if 26 | # ``USE_SYSTEM_PACKAGE`` is ``ON``. 27 | # - ``GIT_REPO`` git repository to fetch the library from. Ignored if 28 | # ``USE_SYSTEM_PACKAGE`` is ``ON``. 29 | # - ``GIT_TAG`` tag reference when fetching the library from the git 30 | # repository. Ignored if ``USE_SYSTEM_PACKAGE`` is ``ON``. 31 | # - ``PATCH_COMMAND`` Specifies a custom command to patch the sources after an 32 | # update. See https://cmake.org/cmake/help/latest/module/ExternalProject.html#command:externalproject_add 33 | # for details on the parameter. 34 | # - ``TARGETS`` list of targets to be satisfied. If any of these targets are 35 | # not currently defined, this macro will attempt to either find or fetch the 36 | # package. 37 | # - ``EXCLUDE_FROM_ALL`` if specified, the targets are not added to the ``all`` 38 | # metatarget. 39 | # 40 | # Note: if ``USE_SYSTEM_PACKAGE`` is ``OFF``, FetchContent will be used to 41 | # retrieve the specified targets. It is possible to specify any variable in 42 | # https://cmake.org/cmake/help/latest/module/FetchContent.html#variables to 43 | # override this macro behaviour. 44 | 45 | if(COMMAND FindOrFetch) 46 | return() 47 | endif() 48 | 49 | macro(FindOrFetch) 50 | if(NOT FetchContent) 51 | include(FetchContent) 52 | endif() 53 | 54 | # Parse arguments. 55 | set(options EXCLUDE_FROM_ALL) 56 | set(one_value_args 57 | USE_SYSTEM_PACKAGE 58 | PACKAGE_NAME 59 | LIBRARY_NAME 60 | GIT_REPO 61 | GIT_TAG 62 | ) 63 | set(multi_value_args PATCH_COMMAND TARGETS) 64 | cmake_parse_arguments( 65 | _ARGS 66 | "${options}" 67 | "${one_value_args}" 68 | "${multi_value_args}" 69 | ${ARGN} 70 | ) 71 | 72 | # Check if all targets are found. 73 | if(NOT _ARGS_TARGETS) 74 | message(FATAL_ERROR "mujoco::FindOrFetch: TARGETS must be specified.") 75 | endif() 76 | 77 | set(targets_found TRUE) 78 | message(CHECK_START 79 | "mujoco::FindOrFetch: checking for targets in package `${_ARGS_PACKAGE_NAME}`" 80 | ) 81 | foreach(target ${_ARGS_TARGETS}) 82 | if(NOT TARGET ${target}) 83 | message(CHECK_FAIL "target `${target}` not defined.") 84 | set(targets_found FALSE) 85 | break() 86 | endif() 87 | endforeach() 88 | 89 | # If targets are not found, use `find_package` or `FetchContent...` to get it. 90 | if(NOT targets_found) 91 | if(${_ARGS_USE_SYSTEM_PACKAGE}) 92 | message(CHECK_START 93 | "mujoco::FindOrFetch: finding `${_ARGS_PACKAGE_NAME}` in system packages..." 94 | ) 95 | find_package(${_ARGS_PACKAGE_NAME} REQUIRED) 96 | message(CHECK_PASS "found") 97 | else() 98 | message(CHECK_START 99 | "mujoco::FindOrFetch: Using FetchContent to retrieve `${_ARGS_LIBRARY_NAME}`" 100 | ) 101 | FetchContent_Declare( 102 | ${_ARGS_LIBRARY_NAME} 103 | GIT_REPOSITORY ${_ARGS_GIT_REPO} 104 | GIT_TAG ${_ARGS_GIT_TAG} 105 | GIT_SHALLOW FALSE 106 | PATCH_COMMAND ${_ARGS_PATCH_COMMAND} 107 | UPDATE_DISCONNECTED TRUE 108 | ) 109 | if(${_ARGS_EXCLUDE_FROM_ALL}) 110 | FetchContent_GetProperties(${_ARGS_LIBRARY_NAME}) 111 | if(NOT ${${_ARGS_LIBRARY_NAME}_POPULATED}) 112 | FetchContent_Populate(${_ARGS_LIBRARY_NAME}) 113 | add_subdirectory( 114 | ${${_ARGS_LIBRARY_NAME}_SOURCE_DIR} ${${_ARGS_LIBRARY_NAME}_BINARY_DIR} 115 | EXCLUDE_FROM_ALL 116 | ) 117 | endif() 118 | else() 119 | FetchContent_MakeAvailable(${_ARGS_LIBRARY_NAME}) 120 | endif() 121 | message(CHECK_PASS "Done") 122 | endif() 123 | else() 124 | message(CHECK_PASS "found") 125 | endif() 126 | endmacro() 127 | -------------------------------------------------------------------------------- /mjpc/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | add_executable( 2 | l2r_headless_server 3 | headless_server.cc 4 | barkour/barkour.cc 5 | barkour/barkour.h 6 | ) 7 | 8 | target_link_libraries( 9 | l2r_headless_server 10 | absl::check 11 | absl::flags 12 | absl::flags_parse 13 | absl::log 14 | absl::status 15 | absl::strings 16 | mujoco::mujoco 17 | libmjpc 18 | mjpc_agent_service 19 | ) 20 | 21 | target_include_directories(l2r_headless_server PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/..) 22 | message(AGENT_SERVICE_COMPILE_OPTIONS=${AGENT_SERVICE_COMPILE_OPTIONS}) 23 | target_compile_options(l2r_headless_server PUBLIC ${AGENT_SERVICE_COMPILE_OPTIONS}) 24 | target_link_options(l2r_headless_server PRIVATE ${AGENT_SERVICE_LINK_OPTIONS}) 25 | 26 | add_executable( 27 | l2r_ui_server 28 | ui_server.cc 29 | barkour/barkour.cc 30 | barkour/barkour.h 31 | ) 32 | 33 | target_link_libraries( 34 | l2r_ui_server 35 | absl::check 36 | absl::flags 37 | absl::flags_parse 38 | absl::log 39 | absl::status 40 | absl::strings 41 | mujoco::mujoco 42 | libmjpc 43 | mjpc_ui_agent_service 44 | ) 45 | 46 | target_include_directories(l2r_ui_server PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/..) 47 | target_compile_options(l2r_ui_server PUBLIC ${AGENT_SERVICE_COMPILE_OPTIONS}) 48 | target_link_options(l2r_ui_server PRIVATE ${AGENT_SERVICE_LINK_OPTIONS}) 49 | 50 | add_subdirectory(barkour) 51 | add_dependencies(l2r_headless_server copy_barkour_resources) 52 | add_dependencies(l2r_ui_server copy_barkour_resources) 53 | -------------------------------------------------------------------------------- /mjpc/barkour/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # Copy the model files to the binary directory to make them available to the 2 | # built binary. 3 | 4 | # Start by pulling the Barkour vB model from MuJoCo Menagerie 5 | add_custom_target( 6 | copy_menagerie_barkour_resources ALL 7 | COMMAND ${CMAKE_COMMAND} -E copy 8 | ${menagerie_SOURCE_DIR}/google_barkour_vb/barkour_vb.xml 9 | ${CMAKE_CURRENT_BINARY_DIR}/barkour_vb.xml 10 | COMMAND ${CMAKE_COMMAND} -E copy_directory 11 | ${menagerie_SOURCE_DIR}/google_barkour_vb/assets 12 | ${CMAKE_CURRENT_BINARY_DIR}/assets 13 | # apply a patch to barkour_vb.xml 14 | COMMAND patch -o ${CMAKE_CURRENT_BINARY_DIR}/barkour_vb_modified.xml 15 | ${CMAKE_CURRENT_BINARY_DIR}/barkour_vb.xml 16 | <${CMAKE_CURRENT_SOURCE_DIR}/barkour_vb.xml.patch 17 | COMMENT "Copying Barkour vB from MuJoCo Menagerie") 18 | 19 | # Add further task XML from this repository 20 | add_custom_target(copy_barkour_resources ALL 21 | COMMAND ${CMAKE_COMMAND} -E copy 22 | ${CMAKE_CURRENT_SOURCE_DIR}/task_barkour.xml 23 | ${CMAKE_CURRENT_SOURCE_DIR}/world.xml 24 | ${CMAKE_CURRENT_BINARY_DIR} 25 | COMMENT "Copying Barkour task XML into binary directory") 26 | 27 | add_dependencies(copy_barkour_resources copy_menagerie_barkour_resources) 28 | -------------------------------------------------------------------------------- /mjpc/barkour/barkour.cc: -------------------------------------------------------------------------------- 1 | // Copyright 2023 DeepMind Technologies Limited 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | // ============================================================================== 15 | 16 | #include "mjpc/barkour/barkour.h" 17 | 18 | #include 19 | #include 20 | #include 21 | 22 | #include 23 | #include 24 | #include 25 | #include 26 | 27 | namespace mjpc::language2reward { 28 | 29 | namespace { // anonymous namespace for local definitions 30 | 31 | constexpr double kNominalFootFromShoulder[4][2] = {{0.05, 0.0}, 32 | {-0.05, 0.0}, 33 | {0.05, -0.0}, 34 | {-0.05, -0.0}}; 35 | 36 | // ============ reusable utilities ============ 37 | 38 | void RotateVectorAroundZaxis(const double* vec, const double angle, 39 | double* rotated_vec) { 40 | rotated_vec[0] = mju_cos(angle) * vec[0] - mju_sin(angle) * vec[1]; 41 | rotated_vec[1] = mju_sin(angle) * vec[0] + mju_cos(angle) * vec[1]; 42 | rotated_vec[2] = vec[2]; 43 | } 44 | 45 | // Unwind the roll current angle to be close to last measured roll angle 46 | // to prevent sudden jump of the measured sensor data. 47 | double UnwindAngle(const double current_roll, const double last_roll) { 48 | double vec[3] = {current_roll - 2 * mjPI, current_roll, 49 | current_roll + 2 * mjPI}; 50 | double closest = vec[0]; 51 | double min_diff = std::abs(last_roll - vec[0]); 52 | for (int i = 1; i < 3; i++) { 53 | double diff = std::abs(last_roll - vec[i]); 54 | if (diff < min_diff) { 55 | min_diff = diff; 56 | closest = vec[i]; 57 | } 58 | } 59 | return closest; 60 | } 61 | 62 | } // namespace 63 | 64 | std::string Barkour::XmlPath() const { 65 | return absl::StrCat( 66 | mjpc::GetExecutableDir(), "/../mjpc/barkour/world.xml"); 67 | } 68 | 69 | std::string Barkour::Name() const { return "Barkour"; } 70 | 71 | void Barkour::ResidualFn::GetNormalizedFootTrajectory( 72 | double duty_ratio, double gait_frequency, double normalized_phase_offset, 73 | double time, double amplitude_forward, double amplitude_vertical, 74 | double* target_pos_x, double* target_pos_z) const { 75 | double step_duration = 1.0 / fmax(gait_frequency, 0.0001); 76 | double stance_duration = step_duration * fmin(fmax(duty_ratio, 0.01), 0.99); 77 | double flight_duration = step_duration - stance_duration; 78 | double gait_start_time = normalized_phase_offset * step_duration; 79 | double in_phase_time = fmod((time - gait_start_time), step_duration); 80 | if (in_phase_time < 0.0) in_phase_time += step_duration; 81 | double normalized_phase = 0.0; 82 | bool is_flight_phase = false; 83 | if (in_phase_time < flight_duration) { 84 | normalized_phase = in_phase_time / flight_duration; 85 | is_flight_phase = true; 86 | } else { 87 | normalized_phase = (in_phase_time - flight_duration) / stance_duration; 88 | is_flight_phase = false; 89 | } 90 | if (is_flight_phase) { 91 | *target_pos_z += mju_sin(normalized_phase * mjPI) * amplitude_vertical; 92 | *target_pos_x += 93 | mju_sin(normalized_phase * mjPI - mjPI / 2) * amplitude_forward; 94 | } else { 95 | *target_pos_x += 96 | -mju_sin(normalized_phase * mjPI - mjPI / 2) * amplitude_forward; 97 | } 98 | } 99 | 100 | // ============ residual ============ 101 | void Barkour::ResidualFn::Residual(const mjModel* model, 102 | const mjData* data, 103 | double* residual) const { 104 | // start counter 105 | int counter = 0; 106 | 107 | // common info 108 | double* torso_xmat = data->xmat + 9 * torso_body_id_; 109 | double* compos = SensorByName(model, data, "torso_subtreecom"); 110 | CHECK(compos != nullptr); 111 | double pitch = atan2(-torso_xmat[6], mju_sqrt(torso_xmat[7] * torso_xmat[7] + 112 | torso_xmat[8] * torso_xmat[8])); 113 | double roll = atan2(torso_xmat[7], torso_xmat[8]); 114 | 115 | double torso_heading[2] = {torso_xmat[0], torso_xmat[3]}; 116 | if (parameters_[target_body_pitch_id_] > 1.0 || 117 | parameters_[target_body_pitch_id_] < -1.0) { 118 | int handstand = parameters_[target_body_pitch_id_] < 0.0 ? -1 : 1; 119 | torso_heading[0] = handstand * torso_xmat[2]; 120 | torso_heading[1] = handstand * torso_xmat[5]; 121 | } 122 | 123 | double* comvel = SensorByName(model, data, "torso_subtreelinvel"); 124 | double* angvel = SensorByName(model, data, "torso_angvel"); 125 | double linvel_ego[3]; 126 | RotateVectorAroundZaxis(comvel, 127 | -mju_atan2(torso_heading[1], torso_heading[0]), 128 | linvel_ego); // Need double check 129 | 130 | // get foot positions 131 | double* foot_pos[kNumFoot]; 132 | for (BkFoot foot : kFootAll) { 133 | foot_pos[foot] = data->site_xpos + 3 * foot_site_id_[foot]; 134 | } 135 | 136 | double* shoulder_pos[kNumFoot]; 137 | for (BkFoot foot : kFootAll) 138 | shoulder_pos[foot] = data->xpos + 3 * shoulder_body_id_[foot]; 139 | 140 | // average foot position 141 | double avg_foot_pos[3]; 142 | AverageFootPos(avg_foot_pos, foot_pos); 143 | 144 | // Residual Calculations 145 | // ---------- BodyHeight ---------- 146 | // quadrupedal or bipedal height of torso 147 | double* body = SensorByName(model, data, "body"); 148 | residual[counter++] = 149 | body[2] - parameters_[target_body_height_id_]; 150 | 151 | // ---------- BodyXYPos ---------- 152 | double* pos = SensorByName(model, data, "torso_subtreecom"); 153 | CHECK(pos != nullptr); 154 | residual[counter++] = pos[0] - parameters_[goal_position_x_id_]; 155 | residual[counter++] = pos[1] - parameters_[goal_position_y_id_]; 156 | 157 | // ---------- Heading ---------- 158 | mju_normalize(torso_heading, 2); 159 | residual[counter++] = 160 | torso_heading[0] - mju_cos(parameters_[target_body_heading_id_]); 161 | residual[counter++] = 162 | torso_heading[1] - mju_sin(parameters_[target_body_heading_id_]); 163 | 164 | // ---------- Body Roll and Pitch ---------- 165 | residual[counter++] = pitch - parameters_[target_body_pitch_id_]; 166 | residual[counter++] = 167 | UnwindAngle(roll, current_base_roll_) - parameters_[target_body_roll_id_]; 168 | residual[counter++] = 0; 169 | 170 | // ---------- Forward Velocity ---------- 171 | residual[counter++] = 172 | linvel_ego[0] - parameters_[target_forward_velocity_id_]; 173 | 174 | // ---------- Sideways Velocity --------- 175 | residual[counter++] = 176 | linvel_ego[1] - parameters_[target_sideways_velocity_id_]; 177 | 178 | // ---------- Upward Velocity ----------- 179 | residual[counter++] = linvel_ego[2] - parameters_[target_upward_velocity_id_]; 180 | 181 | // ---------- Roll Pitch Yaw Velocity ---------- 182 | residual[counter++] = angvel[0] - parameters_[target_roll_velocity_id_]; 183 | residual[counter++] = angvel[2] - parameters_[target_turning_velocity_id_]; 184 | 185 | // ---------- Feet pose tracking ------------- 186 | double foot_current_poses_yaw_aligned[4][3]; 187 | double shoulder_yaw_aligned[4][3]; 188 | double heading_angle = mju_atan2(torso_heading[1], torso_heading[0]); 189 | 190 | for (int foot_id = 0; foot_id < 4; foot_id++) { 191 | double shoulder_pos_local[3]; 192 | mju_sub3(shoulder_pos_local, shoulder_pos[foot_id], compos); 193 | RotateVectorAroundZaxis(shoulder_pos_local, -heading_angle, 194 | shoulder_yaw_aligned[foot_id]); 195 | double foot_pos_local[3]; 196 | mju_sub3(foot_pos_local, foot_pos[foot_id], compos); 197 | RotateVectorAroundZaxis(foot_pos_local, -heading_angle, 198 | foot_current_poses_yaw_aligned[foot_id]); 199 | foot_current_poses_yaw_aligned[foot_id][2] = foot_pos[foot_id][2]; 200 | } 201 | for (int dim = 0; dim < 3; dim++) { 202 | for (int foot_id = 0; foot_id < 4; foot_id++) { 203 | double foot_pos_delta = parameters_[dist_from_nominal_ids_[foot_id][dim]]; 204 | double foot_target_pos; 205 | if (dim < 2) { 206 | if (foot_id >= 2 && dim == 1) { 207 | foot_pos_delta *= -1; 208 | } 209 | foot_target_pos = shoulder_yaw_aligned[foot_id][dim] + 210 | kNominalFootFromShoulder[foot_id][dim] + 211 | foot_pos_delta; 212 | } else { 213 | // dim == 2 214 | foot_target_pos = foot_pos_delta; 215 | } 216 | residual[counter++] = 217 | foot_target_pos - foot_current_poses_yaw_aligned[foot_id][dim]; 218 | } 219 | } 220 | 221 | // ---------- Gait ---------- 222 | // Get tracking target for the feet gait patter 223 | for (int foot_id = 0; foot_id < 4; foot_id++) { 224 | double target_pos_x = shoulder_yaw_aligned[foot_id][0] + 225 | kNominalFootFromShoulder[foot_id][0] + 226 | parameters_[dist_from_nominal_ids_[foot_id][0]]; 227 | double target_pos_z = parameters_[dist_from_nominal_ids_[foot_id][2]]; 228 | GetNormalizedFootTrajectory(parameters_[ground_to_air_ratio_ids_[foot_id]], 229 | parameters_[stepping_frequency_ids_[foot_id]], 230 | parameters_[phase_offset_ids_[foot_id]], 231 | data->time, 232 | parameters_[amplitudes_forward_ids_[foot_id]], 233 | parameters_[amplitudes_vertical_ids_[foot_id]], 234 | &target_pos_x, &target_pos_z); 235 | residual[counter++] = 236 | target_pos_z - foot_current_poses_yaw_aligned[foot_id][2]; 237 | residual[counter++] = 238 | target_pos_x - foot_current_poses_yaw_aligned[foot_id][0]; 239 | } 240 | 241 | // ---------- Balance ---------- 242 | double capture_point[3]; 243 | double fall_time = mju_sqrt(2 * parameters_[target_body_height_id_] / 9.81); 244 | mju_addScl3(capture_point, compos, comvel, fall_time); 245 | residual[counter++] = capture_point[0] - avg_foot_pos[0]; 246 | residual[counter++] = capture_point[1] - avg_foot_pos[1]; 247 | 248 | // ---------- Effort ---------- 249 | mju_scl(residual + counter, data->actuator_force, 2e-2, model->nu); 250 | counter += model->nu; 251 | 252 | // ---------- Posture ---------- 253 | double* home = KeyQPosByName(model, data, "home"); 254 | int walker_first_joint_adr = model->jnt_qposadr[walker_root_joint_id_] + 7; 255 | mju_sub(residual + counter, 256 | data->qpos + walker_first_joint_adr, 257 | home + walker_first_joint_adr, model->nu); 258 | for (BkFoot foot : kFootAll) { 259 | for (int joint = 0; joint < 3; joint++) { 260 | residual[counter + 3 * foot + joint] *= kJointPostureGain[joint]; 261 | } 262 | } 263 | // loosen the "hands" in Biped mode 264 | if (parameters_[target_body_pitch_id_] < -1.0) { 265 | residual[counter + 4] *= 0.03; 266 | residual[counter + 5] *= 0.03; 267 | residual[counter + 10] *= 0.03; 268 | residual[counter + 11] *= 0.03; 269 | } else if (parameters_[target_body_pitch_id_] > 1.0) { 270 | residual[counter + 1] *= 0.03; 271 | residual[counter + 2] *= 0.03; 272 | residual[counter + 7] *= 0.03; 273 | residual[counter + 8] *= 0.03; 274 | } 275 | counter += model->nu; 276 | 277 | // PoseTracking 278 | for (int i = 0; i < 12; i++) { 279 | residual[counter + i] = 280 | parameters_[target_joint_angle_ids_[i]] - 281 | *(data->qpos + walker_first_joint_adr + i); 282 | } 283 | counter += model->nu; 284 | 285 | // ---------- Act dot ----------------- 286 | // encourage actions to be similar to the previous actions. 287 | if (model->na > 0) { 288 | mju_copy(residual + counter, data->act_dot, model->na); 289 | counter += model->na; 290 | } else { 291 | for (int i=0; i < 12; i++) { 292 | residual[counter++] = 0.; 293 | } 294 | } 295 | 296 | // sensor dim sanity check 297 | CheckSensorDim(model, counter); 298 | } 299 | 300 | // ============ transition ============ 301 | void Barkour::TransitionLocked(mjModel* model, mjData* data) { 302 | // Update goal position for mocap object 303 | data->mocap_pos[3 * goal_mocap_id_] = 304 | parameters[residual_.goal_position_x_id_]; 305 | data->mocap_pos[3 * goal_mocap_id_ + 1] = 306 | parameters[residual_.goal_position_y_id_]; 307 | 308 | // update the body roll variable 309 | double* torso_xmat = data->xmat + 9 * residual_.torso_body_id_; 310 | // xmat[7,8] are the projections of the local y and z axes onto the global 311 | // z axis, respectively. 312 | double roll_angle = atan2(torso_xmat[7], torso_xmat[8]); 313 | // This is needed to handle when the roll_angle jumps between PI and -PI, 314 | // which creates undesired discontinuity in the optimization. 315 | residual_.current_base_roll_ = 316 | UnwindAngle(roll_angle, residual_.current_base_roll_); 317 | } 318 | 319 | // ============ task-state utilities ============ 320 | // save task-related ids 321 | void Barkour::ResetLocked(const mjModel* model) { 322 | const int* mocap_id = model->body_mocapid; 323 | goal_mocap_id_ = mocap_id[mj_name2id(model, mjOBJ_XBODY, "goal")]; 324 | residual_.ResetLocked(model); 325 | } 326 | 327 | void Barkour::ResidualFn::ResetLocked(const mjModel* model) { 328 | // ---------- model identifiers ---------- 329 | torso_body_id_ = mj_name2id(model, mjOBJ_XBODY, "torso"); 330 | CHECK_GT(torso_body_id_, 0); 331 | // foot site ids 332 | int foot_index = 0; 333 | // for (const char* footname : {"FL", "HL", "FR", "HR"}) { 334 | for (const char* footname : 335 | {"foot_front_left", "foot_hind_left", 336 | "foot_front_right", "foot_hind_right"}) { 337 | int foot_id = mj_name2id(model, mjOBJ_SITE, footname); 338 | if (foot_id < 0) mju_error_s("site '%s' not found", footname); 339 | foot_site_id_[foot_index] = foot_id; 340 | foot_index++; 341 | } 342 | 343 | // shoulder ids 344 | shoulder_body_id_[kFootFL] = 345 | mj_name2id(model, mjOBJ_BODY, "upper_leg_front_left"); 346 | shoulder_body_id_[kFootHL] = 347 | mj_name2id(model, mjOBJ_BODY, "upper_leg_hind_left"); 348 | shoulder_body_id_[kFootFR] = 349 | mj_name2id(model, mjOBJ_BODY, "upper_leg_front_right"); 350 | shoulder_body_id_[kFootHR] = 351 | mj_name2id(model, mjOBJ_BODY, "upper_leg_hind_right"); 352 | CHECK_GT(shoulder_body_id_[kFootFL], 0); 353 | CHECK_GT(shoulder_body_id_[kFootHL], 0); 354 | CHECK_GT(shoulder_body_id_[kFootFR], 0); 355 | CHECK_GT(shoulder_body_id_[kFootHR], 0); 356 | 357 | // get the reward parameter_ids 358 | target_body_height_id_ = ParameterIndex(model, "TargetBodyHeight"); 359 | goal_position_x_id_ = ParameterIndex(model, "GoalPositionX"); 360 | goal_position_y_id_ = ParameterIndex(model, "GoalPositionY"); 361 | target_body_heading_id_ = ParameterIndex(model, "TargetHeading"); 362 | target_body_pitch_id_ = ParameterIndex(model, "TargetPitch"); 363 | target_body_roll_id_ = ParameterIndex(model, "TargetRoll"); 364 | target_forward_velocity_id_ = ParameterIndex(model, "TargetForwardVelocity"); 365 | target_sideways_velocity_id_ = 366 | ParameterIndex(model, "TargetSidewaysVelocity"); 367 | target_upward_velocity_id_ = ParameterIndex(model, "TargetUpwardVelocity"); 368 | target_turning_velocity_id_ = ParameterIndex(model, "TargetTurningVelocity"); 369 | target_roll_velocity_id_ = ParameterIndex(model, "TargetRollVelocity"); 370 | 371 | balance_ids_[kFootFL] = ParameterIndex(model, "FLBalance"); 372 | CHECK_GE(balance_ids_[kFootFL], 0); 373 | balance_ids_[kFootHL] = ParameterIndex(model, "HLBalance"); 374 | CHECK_GE(balance_ids_[kFootHL], 0); 375 | balance_ids_[kFootFR] = ParameterIndex(model, "FRBalance"); 376 | CHECK_GE(balance_ids_[kFootFR], 0); 377 | balance_ids_[kFootHR] = ParameterIndex(model, "HRBalance"); 378 | CHECK_GE(balance_ids_[kFootHR], 0); 379 | 380 | dist_from_nominal_ids_[0][0] = ParameterIndex(model, "FLXDistFromNominal"); 381 | dist_from_nominal_ids_[0][1] = ParameterIndex(model, "FLYDistFromNominal"); 382 | dist_from_nominal_ids_[0][2] = ParameterIndex(model, "FLZDistFromNominal"); 383 | stepping_frequency_ids_[0] = ParameterIndex(model, "FLSteppingFrequency"); 384 | ground_to_air_ratio_ids_[0] = ParameterIndex(model, "FLGroundToAirRatio"); 385 | phase_offset_ids_[0] = ParameterIndex(model, "FLPhaseOffset"); 386 | amplitudes_vertical_ids_[0] = ParameterIndex(model, "FLAmplitudesVertical"); 387 | amplitudes_forward_ids_[0] = ParameterIndex(model, "FLAmplitudesForward"); 388 | dist_from_nominal_ids_[1][0] = ParameterIndex(model, "HLXDistFromNominal"); 389 | dist_from_nominal_ids_[1][1] = ParameterIndex(model, "HLYDistFromNominal"); 390 | dist_from_nominal_ids_[1][2] = ParameterIndex(model, "HLZDistFromNominal"); 391 | stepping_frequency_ids_[1] = ParameterIndex(model, "HLSteppingFrequency"); 392 | ground_to_air_ratio_ids_[1] = ParameterIndex(model, "HLGroundToAirRatio"); 393 | phase_offset_ids_[1] = ParameterIndex(model, "HLPhaseOffset"); 394 | amplitudes_vertical_ids_[1] = ParameterIndex(model, "HLAmplitudesVertical"); 395 | amplitudes_forward_ids_[1] = ParameterIndex(model, "HLAmplitudesForward"); 396 | dist_from_nominal_ids_[2][0] = ParameterIndex(model, "FRXDistFromNominal"); 397 | dist_from_nominal_ids_[2][1] = ParameterIndex(model, "FRYDistFromNominal"); 398 | dist_from_nominal_ids_[2][2] = ParameterIndex(model, "FRZDistFromNominal"); 399 | stepping_frequency_ids_[2] = ParameterIndex(model, "FRSteppingFrequency"); 400 | ground_to_air_ratio_ids_[2] = ParameterIndex(model, "FRGroundToAirRatio"); 401 | phase_offset_ids_[2] = ParameterIndex(model, "FRPhaseOffset"); 402 | amplitudes_vertical_ids_[2] = ParameterIndex(model, "FRAmplitudesVertical"); 403 | amplitudes_forward_ids_[2] = ParameterIndex(model, "FRAmplitudesForward"); 404 | dist_from_nominal_ids_[3][0] = ParameterIndex(model, "HRXDistFromNominal"); 405 | dist_from_nominal_ids_[3][1] = ParameterIndex(model, "HRYDistFromNominal"); 406 | dist_from_nominal_ids_[3][2] = ParameterIndex(model, "HRZDistFromNominal"); 407 | stepping_frequency_ids_[3] = ParameterIndex(model, "HRSteppingFrequency"); 408 | ground_to_air_ratio_ids_[3] = ParameterIndex(model, "HRGroundToAirRatio"); 409 | phase_offset_ids_[3] = ParameterIndex(model, "HRPhaseOffset"); 410 | amplitudes_vertical_ids_[3] = ParameterIndex(model, "HRAmplitudesVertical"); 411 | amplitudes_forward_ids_[3] = ParameterIndex(model, "HRAmplitudesForward"); 412 | 413 | for (int i = 0; i < 12; i++) { 414 | target_joint_angle_ids_[i] = ParameterIndex(model, absl::StrCat("q", i+1)); 415 | } 416 | 417 | walker_root_joint_id_ = mj_name2id(model, mjOBJ_JOINT, kRootJointName); 418 | if (walker_root_joint_id_ == -1) { 419 | mju_error("root joint '%s' not found", kRootJointName); 420 | } 421 | } 422 | 423 | // compute average foot position, depending on mode 424 | void Barkour::ResidualFn::AverageFootPos( 425 | double avg_foot_pos[3], double* foot_pos[kNumFoot]) const { 426 | // we should compute the average foot position for feet that are used for 427 | // balancing. 428 | // start by using all feet 429 | double foot_weight[kNumFoot] = {}; 430 | double total_weight = 0; 431 | for (int foot : kFootAll) { 432 | foot_weight[foot] = parameters_[balance_ids_[foot]]; 433 | total_weight += foot_weight[foot]; 434 | } 435 | if (total_weight == 0) { 436 | // if no feet are considered, just use the average 437 | for (int foot : kFootAll) { 438 | foot_weight[foot] = 1.0; 439 | } 440 | total_weight = 4.0; 441 | } 442 | 443 | mju_zero3(avg_foot_pos); 444 | for (int foot : kFootAll) { 445 | mju_addToScl3(avg_foot_pos, foot_pos[foot], foot_weight[foot]); 446 | } 447 | mju_scl3(avg_foot_pos, avg_foot_pos, 1 / total_weight); 448 | } 449 | 450 | } // namespace mjpc::language2reward 451 | -------------------------------------------------------------------------------- /mjpc/barkour/barkour.h: -------------------------------------------------------------------------------- 1 | // Copyright 2023 DeepMind Technologies Limited 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | // ============================================================================== 15 | 16 | #ifndef THIRD_PARTY_DEEPMIND_LANGUAGE_TO_REWARD_2023_MJPC_BARKOUR_BARKOUR_H_ 17 | #define THIRD_PARTY_DEEPMIND_LANGUAGE_TO_REWARD_2023_MJPC_BARKOUR_BARKOUR_H_ 18 | 19 | #include 20 | #include 21 | 22 | #include 23 | #include 24 | 25 | 26 | namespace mjpc::language2reward { 27 | 28 | class Barkour : public mjpc::Task { 29 | public: 30 | std::string Name() const override; 31 | std::string XmlPath() const override; 32 | class ResidualFn : public mjpc::BaseResidualFn { 33 | public: 34 | explicit ResidualFn(const Barkour* task) : 35 | mjpc::BaseResidualFn(task) {} 36 | 37 | void Residual(const mjModel* model, const mjData* data, 38 | double* residual) const override; 39 | 40 | private: 41 | friend class Barkour; 42 | // ============ enums ============ 43 | 44 | // feet 45 | enum BkFoot { kFootFL = 0, kFootHL, kFootFR, kFootHR, kNumFoot }; 46 | 47 | // ============ constants ============ 48 | constexpr static BkFoot kFootAll[kNumFoot] = {kFootFL, kFootHL, kFootFR, 49 | kFootHR}; 50 | 51 | // posture gain factors for abduction, hip, knee 52 | constexpr static double kJointPostureGain[3] = {2, 1, 1}; // unitless 53 | 54 | constexpr static char kRootJointName[] = "torso"; 55 | 56 | // ============ methods ============ 57 | 58 | // compute average foot position, depending on mode 59 | void AverageFootPos(double avg_foot_pos[3], 60 | double* foot_pos[kNumFoot]) const; 61 | 62 | void GetNormalizedFootTrajectory( 63 | double duty_ratio, double gait_frequency, 64 | double normalized_phase_offset, double time, 65 | double amplitude_forward, double amplitude_vertical, 66 | double* target_pos_x, double* target_pos_z) const; 67 | 68 | void ResetLocked(const mjModel* model); 69 | 70 | // ============ constants, computed in Reset() ============ 71 | int torso_body_id_ = -1; 72 | 73 | // ===== task parameters id ====== 74 | int target_body_height_id_ = -1; 75 | int goal_position_x_id_ = -1; 76 | int goal_position_y_id_ = -1; 77 | int target_body_heading_id_ = -1; 78 | int target_body_pitch_id_ = -1; 79 | int target_body_roll_id_ = -1; 80 | int target_forward_velocity_id_ = -1; 81 | int target_sideways_velocity_id_ = -1; 82 | int target_upward_velocity_id_ = -1; 83 | int target_turning_velocity_id_ = -1; 84 | int target_roll_velocity_id_ = -1; 85 | int balance_ids_[4] = {-1}; 86 | 87 | // ==== gait-relevant parameters id ==== 88 | int dist_from_nominal_ids_[4][3]; 89 | int stepping_frequency_ids_[4]; 90 | int ground_to_air_ratio_ids_[4]; 91 | int phase_offset_ids_[4]; 92 | int amplitudes_vertical_ids_[4]; 93 | int amplitudes_forward_ids_[4]; 94 | 95 | // ==== joint angle parameters id ==== 96 | int target_joint_angle_ids_[12]; 97 | 98 | // ==== object root joint ids ==== 99 | int walker_root_joint_id_ = -1; 100 | 101 | double current_base_roll_ = 0; 102 | 103 | int foot_site_id_[kNumFoot]; 104 | int shoulder_body_id_[kNumFoot]; 105 | }; 106 | 107 | Barkour() : residual_(this) {} 108 | void TransitionLocked(mjModel* model, mjData* data) override; 109 | 110 | // call base-class Reset, save task-related ids 111 | void ResetLocked(const mjModel* model) override; 112 | 113 | protected: 114 | std::unique_ptr ResidualLocked() const override { 115 | return std::make_unique(residual_); 116 | } 117 | ResidualFn* InternalResidual() override { return &residual_; } 118 | 119 | private: 120 | ResidualFn residual_; 121 | int goal_mocap_id_ = -1; 122 | }; 123 | 124 | } // namespace mjpc::language2reward 125 | 126 | #endif // THIRD_PARTY_DEEPMIND_LANGUAGE_TO_REWARD_2023_MJPC_BARKOUR_BARKOUR_H_ 127 | -------------------------------------------------------------------------------- /mjpc/barkour/barkour_vb.xml.patch: -------------------------------------------------------------------------------- 1 | diff --git a/barkour_vb_modified.xml b/barkour_vb_modified.xml 2 | --- a/barkour_vb_modified.xml 3 | +++ b/barkour_vb_modified.xml 4 | @@ -11,7 +11,8 @@ 5 | 6 | 7 | 8 | - 9 | + 11 | 12 | 13 | 14 | @@ -64,6 +65,7 @@ 15 | 16 | 17 | 18 | + 19 | 20 | 21 | 22 | -------------------------------------------------------------------------------- /mjpc/barkour/task_barkour.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 95 | 96 | 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | 107 | 108 | 109 | 110 | 111 | 112 | 113 | 114 | 115 | 116 | 117 | 118 | 119 | 120 | 121 | 122 | 123 | 124 | 125 | 126 | 127 | 128 | 129 | 130 | 131 | 132 | 133 | 134 | 135 | 136 | 137 | 138 | 139 | 140 | 141 | 142 | 143 | 144 | 145 | 146 | 147 | 148 | 149 | 150 | 151 | 152 | 153 | 154 | 155 | 156 | 157 | 158 | 159 | 160 | 161 | -------------------------------------------------------------------------------- /mjpc/barkour/world.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 37 | -------------------------------------------------------------------------------- /mjpc/headless_server.cc: -------------------------------------------------------------------------------- 1 | // Copyright 2023 DeepMind Technologies Limited 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | // ============================================================================== 15 | 16 | #include 17 | #include 18 | #include 19 | 20 | #include 21 | #include 22 | #include 23 | #include 24 | #include "mjpc/barkour/barkour.h" 25 | // DEEPMIND INTERNAL IMPORT 26 | #include 27 | #include 28 | #include 29 | #include 30 | 31 | ABSL_FLAG(int32_t, mjpc_port, 10000, "port to listen on"); 32 | ABSL_FLAG(int32_t, mjpc_workers, -1, 33 | "number of worker threads for MJPC planning. -1 means use the number " 34 | "of available hardware threads."); 35 | 36 | int main(int argc, char** argv) { 37 | absl::ParseCommandLine(argc, argv); 38 | int port = absl::GetFlag(FLAGS_mjpc_port); 39 | 40 | std::string server_address = absl::StrCat("[::]:", port); 41 | 42 | std::shared_ptr server_credentials = 43 | grpc::experimental::LocalServerCredentials(LOCAL_TCP); 44 | grpc::ServerBuilder builder; 45 | builder.AddListeningPort(server_address, server_credentials); 46 | 47 | mjpc::agent_grpc::AgentService service( 48 | {std::make_shared()}, 49 | absl::GetFlag(FLAGS_mjpc_workers)); 50 | builder.RegisterService(&service); 51 | 52 | std::unique_ptr server(builder.BuildAndStart()); 53 | LOG(INFO) << "Server listening on " << server_address; 54 | 55 | // Keep the program running until the server shuts down. 56 | server->Wait(); 57 | 58 | return 0; 59 | } 60 | -------------------------------------------------------------------------------- /mjpc/ui_server.cc: -------------------------------------------------------------------------------- 1 | // Copyright 2023 DeepMind Technologies Limited 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | // ============================================================================== 15 | 16 | #include 17 | #include 18 | #include 19 | 20 | #include 21 | #include 22 | #include 23 | #include 24 | #include 25 | #include 26 | #include "mjpc/barkour/barkour.h" 27 | // DEEPMIND INTERNAL IMPORT 28 | #include 29 | #include 30 | #include 31 | #include 32 | #include 33 | 34 | ABSL_FLAG(int32_t, mjpc_port, 10000, "port to listen on"); 35 | 36 | int main(int argc, char** argv) { 37 | absl::ParseCommandLine(argc, argv); 38 | int port = absl::GetFlag(FLAGS_mjpc_port); 39 | 40 | std::string server_address = absl::StrCat("[::]:", port); 41 | 42 | std::shared_ptr server_credentials = 43 | grpc::experimental::LocalServerCredentials(LOCAL_TCP); 44 | grpc::ServerBuilder builder; 45 | builder.AddListeningPort(server_address, server_credentials); 46 | 47 | mjpc::MjpcApp app({ 48 | std::make_shared(), 49 | }); 50 | mjpc::agent_grpc::UiAgentService service(app.Sim()); 51 | builder.RegisterService(&service); 52 | 53 | std::unique_ptr server(builder.BuildAndStart()); 54 | LOG(INFO) << "Server listening on " << server_address; 55 | 56 | app.Start(); 57 | server->Shutdown(absl::ToChronoTime(absl::Now() + absl::Seconds(1))); 58 | server->Wait(); 59 | 60 | return 0; 61 | } 62 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = ["setuptools"] 3 | build-backend = "setuptools.build_meta" 4 | 5 | [project] 6 | name="language_to_reward_2023" 7 | version = "0.0.1" 8 | authors = [ 9 | {name = "Wenhao Yu", email = "magicmelon@google.com"}, 10 | {name = "Nimrod Gileadi", email = "nimrod@google.com"}, 11 | {name = "Fei Xia", email = "xiafei@google.com"}, 12 | ] 13 | description = "Language to Reward (2023)" 14 | readme = {file = "README.md", content-type = "text/markdown"} 15 | license = {text = "Apache License 2.0"} 16 | classifiers = [ 17 | "Development Status :: 3 - Alpha", 18 | "Intended Audience :: Developers", 19 | "Intended Audience :: Science/Research", 20 | "License :: OSI Approved :: Apache Software License", 21 | "Natural Language :: English", 22 | "Programming Language :: Python :: 3", 23 | "Programming Language :: Python :: 3.8", 24 | "Programming Language :: Python :: 3.9", 25 | "Programming Language :: Python :: 3.10", 26 | "Programming Language :: Python :: 3.11", 27 | "Programming Language :: Python :: 3.12", 28 | "Topic :: Scientific/Engineering", 29 | ] 30 | requires-python = ">=3.8" 31 | dependencies = [ 32 | "absl-py", 33 | "colorama", 34 | "mujoco", 35 | "openai", 36 | "termcolor", 37 | ] 38 | 39 | [project.urls] 40 | Homepage = "https://github.com/google-deepmind/language_to_reward_2023" 41 | Repository = "https://github.com/google-deepmind/language_to_reward_2023" 42 | 43 | [tool.setuptools.packages.find] 44 | where = ["src"] 45 | include = ["language_to_reward_2023*"] 46 | 47 | [tool.setuptools.package-data] 48 | language_to_reward_2023 = [ 49 | "mjpc/l2r_headless_server", 50 | "mjpc/l2r_ui_server", 51 | ] 52 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | absl-py 2 | colorama==0.4.6 3 | mujoco==3.0.0 4 | openai 5 | termcolor 6 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | # Copyright 2023 DeepMind Technologies Limited 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | 16 | """Setup script for language_to_reward_2023.""" 17 | 18 | import os 19 | import pathlib 20 | import platform 21 | import shutil 22 | import subprocess 23 | 24 | import setuptools 25 | from setuptools.command import build_ext 26 | 27 | 28 | Path = pathlib.Path 29 | 30 | 31 | class CMakeExtension(setuptools.Extension): 32 | """A Python extension that has been prebuilt by CMake. 33 | 34 | We do not want distutils to handle the build process for our extensions, so 35 | so we pass an empty list to the super constructor. 36 | """ 37 | 38 | def __init__(self, name): 39 | super().__init__(name, sources=[]) 40 | 41 | 42 | class BuildAgentServerBinaries(build_ext.build_ext): 43 | """A Python extension that has been prebuilt by CMake. 44 | 45 | We do not want distutils to handle the build process for our extensions, so 46 | so we pass an empty list to the super constructor. 47 | """ 48 | 49 | def run(self): 50 | self._configure_and_build_agent_server() 51 | self.run_command("copy_agent_server_binary") 52 | self.run_command("copy_task_assets") 53 | 54 | def _configure_and_build_agent_server(self): 55 | """Check for CMake.""" 56 | cmake_command = "cmake" 57 | build_cfg = "Debug" 58 | l2r_root = Path(__file__).parent 59 | l2r_build_dir = l2r_root / "build" 60 | cmake_configure_args = [ 61 | f"-DCMAKE_BUILD_TYPE:STRING={build_cfg}", 62 | "-DBUILD_TESTING:BOOL=OFF", 63 | ] 64 | 65 | if platform.system() == "Darwin" and "ARCHFLAGS" in os.environ: 66 | osx_archs = [] 67 | if "-arch x86_64" in os.environ["ARCHFLAGS"]: 68 | osx_archs.append("x86_64") 69 | if "-arch arm64" in os.environ["ARCHFLAGS"]: 70 | osx_archs.append("arm64") 71 | cmake_configure_args.append( 72 | f"-DCMAKE_OSX_ARCHITECTURES={';'.join(osx_archs)}" 73 | ) 74 | 75 | # TODO(nimrod): We currently configure the builds into 76 | # `mujoco_mpc/build`. This should use `self.build_{temp,lib}` instead, to 77 | # isolate the Python builds from the C++ builds. 78 | print("Configuring CMake with the following arguments:") 79 | for arg in cmake_configure_args: 80 | print(f" {arg}") 81 | 82 | subprocess.check_call( 83 | [ 84 | cmake_command, 85 | *cmake_configure_args, 86 | f"-S{l2r_root.resolve()}", 87 | f"-B{l2r_build_dir.resolve()}", 88 | ], 89 | cwd=l2r_root, 90 | ) 91 | 92 | print("Building `l2r_headless_server` and `l2r_ui_server` with CMake") 93 | subprocess.check_call( 94 | [ 95 | cmake_command, 96 | "--build", 97 | str(l2r_build_dir.resolve()), 98 | "--target", 99 | "l2r_headless_server", 100 | "l2r_ui_server", 101 | f"-j{os.cpu_count()}", 102 | "--config", 103 | build_cfg, 104 | ], 105 | cwd=l2r_root, 106 | ) 107 | 108 | 109 | class CopyAgentServerBinaryCommand(setuptools.Command): 110 | """Command to copy `l2r_{headless,ui}_server` next to task_clients.py. 111 | 112 | Assumes that the C++ binaries were built and 113 | and located in the default `build/mjpc` folder. 114 | """ 115 | 116 | description = "Copy server binaries into package." 117 | user_options = [] 118 | 119 | def initialize_options(self): 120 | self.build_lib = None 121 | 122 | def finalize_options(self): 123 | self.set_undefined_options("copy_task_assets", ("build_lib", "build_lib")) 124 | 125 | def run(self): 126 | self._copy_binary("l2r_headless_server") 127 | self._copy_binary("l2r_ui_server") 128 | 129 | def _copy_binary(self, binary_name): 130 | source_path = Path(f"build/mjpc/{binary_name}") 131 | if not source_path.exists(): 132 | raise ValueError( 133 | f"Cannot find `{binary_name}` binary from {source_path.absolute()}. " 134 | f"Please build the `{binary_name}` C++ gRPC service." 135 | ) 136 | assert self.build_lib is not None 137 | build_lib_path = Path(self.build_lib).resolve() 138 | destination_path = Path( 139 | build_lib_path, "language_to_reward_2023", "mjpc", binary_name 140 | ) 141 | 142 | self.announce(f"{source_path.resolve()=}") 143 | self.announce(f"{destination_path.resolve()=}") 144 | 145 | destination_path.parent.mkdir(exist_ok=True, parents=True) 146 | shutil.copy(source_path, destination_path) 147 | 148 | 149 | class CopyTaskAssetsCommand(setuptools.Command): 150 | """Copies XML and mesh files next to the server binaries.""" 151 | 152 | description = "Copy task assets over to python source." 153 | user_options = [] 154 | 155 | def initialize_options(self): 156 | self.build_lib = None 157 | 158 | def finalize_options(self): 159 | self.set_undefined_options("build_py", ("build_lib", "build_lib")) 160 | 161 | def run(self): 162 | l2r_root = Path(__file__).parent 163 | l2r_build_dir = l2r_root / "build" 164 | mjpc_task_paths = [l2r_build_dir / "mjpc" / "barkour"] 165 | for task_path in mjpc_task_paths: 166 | relative_task_path = task_path.relative_to(l2r_build_dir) 167 | assert self.build_lib is not None 168 | build_lib_path = Path(self.build_lib).resolve() 169 | destination_dir_path = Path(build_lib_path, "language_to_reward_2023") 170 | self.announce( 171 | f"Copying assets {relative_task_path} from" 172 | f" {l2r_build_dir} over to {destination_dir_path}." 173 | ) 174 | 175 | destination_path = destination_dir_path / relative_task_path 176 | shutil.copytree(task_path, destination_path, dirs_exist_ok=True) 177 | 178 | 179 | setuptools.setup( 180 | ext_modules=[CMakeExtension("agent_server")], 181 | cmdclass={ 182 | "build_ext": BuildAgentServerBinaries, 183 | "copy_agent_server_binary": CopyAgentServerBinaryCommand, 184 | "copy_task_assets": CopyTaskAssetsCommand, 185 | }, 186 | ) 187 | -------------------------------------------------------------------------------- /src/language_to_reward_2023/confirmation_safe_executor.py: -------------------------------------------------------------------------------- 1 | # Copyright 2023 DeepMind Technologies Limited 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | 16 | """An alternative SafeExecutor implementation which asks for user confirmation. 17 | 18 | This should only be used in platforms where the sandbox is not available, e.g. 19 | on OS X. 20 | """ 21 | 22 | import os 23 | import subprocess 24 | import sys 25 | import tempfile 26 | import termcolor 27 | 28 | from language_to_reward_2023 import safe_executor 29 | 30 | 31 | def default_interpreter() -> str: 32 | return sys.executable 33 | 34 | 35 | _SERIOUS_WARNING = ( 36 | "\nYou are about to execute untrusted code.\n" 37 | "Code executed this way can perform any operation on your PC, and " 38 | "can be a security risk." 39 | '\nOnce you have reviewed the code above, type "yes" to continue.\n' 40 | ) 41 | 42 | _REPEATED_WARNING = '\nAbout to execute the code above. Type "y" to continue.\n' 43 | 44 | 45 | class ConfirmationSafeExecutor(safe_executor.SafeExecutor): 46 | """An executor that asks for user confirmation before executing the code.""" 47 | 48 | def __init__(self, interpreter_path=None, skip_confirmation=False): 49 | super().__init__() 50 | self._confirmed_once = False 51 | self._interpreter_path = interpreter_path or default_interpreter() 52 | self._skip_confirmation = skip_confirmation 53 | 54 | def safe_execute(self, code: str) -> str: 55 | if not self._confirmed_once: 56 | while not self._skip_confirmation: 57 | confirm = input( 58 | termcolor.colored(_SERIOUS_WARNING, "red", attrs=["bold"]) 59 | ) 60 | if confirm.lower() == "yes": 61 | break 62 | self._confirmed_once = True 63 | else: 64 | while not self._skip_confirmation: 65 | confirm = input( 66 | termcolor.colored(_REPEATED_WARNING, "red", attrs=["bold"]) 67 | ) 68 | if confirm.lower() in ("y", "yes"): 69 | break 70 | return self._execute(code) 71 | 72 | def _execute(self, code: str) -> str: 73 | f = tempfile.NamedTemporaryFile(suffix=".py", mode="w", delete=False) 74 | 75 | f.write(code) 76 | f.close() 77 | filepath = f.name 78 | 79 | # Start by compiling the code to pyc (to get compilation errors) 80 | try: 81 | subprocess.run( 82 | [self._interpreter_path, "-m", "py_compile", filepath], 83 | check=True, 84 | ) 85 | except subprocess.CalledProcessError as e: 86 | raise ValueError("Failed to compile code.") from e 87 | finally: 88 | os.unlink(filepath) 89 | 90 | # py_compile should output a pyc file in the pycache directory 91 | filename = os.path.basename(filepath) 92 | directory = os.path.dirname(filepath) 93 | pycache_dir = os.path.join(directory, "__pycache__") 94 | pyc_filepath = os.path.join(pycache_dir, filename + "c") 95 | if not os.path.exists(pyc_filepath): 96 | filename = ( 97 | os.path.splitext(filename)[0] 98 | + f".cpython-{sys.version_info[0]}{sys.version_info[1]}.pyc" 99 | ) 100 | pyc_filepath = os.path.join(pycache_dir, filename) 101 | 102 | # Now execute the pyc file 103 | try: 104 | completed_process = subprocess.run( 105 | [self._interpreter_path, pyc_filepath], 106 | capture_output=True, 107 | check=True, 108 | ) 109 | except subprocess.CalledProcessError as e: 110 | print("stdout", e.stdout) 111 | print("stderr", e.stderr) 112 | raise ValueError("Failed to run code.") from e 113 | finally: 114 | os.unlink(pyc_filepath) 115 | return completed_process.stdout.decode("utf-8") 116 | -------------------------------------------------------------------------------- /src/language_to_reward_2023/conversation.py: -------------------------------------------------------------------------------- 1 | # Copyright 2023 DeepMind Technologies Limited 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | 16 | """A class which manages a chat session using a prompt.""" 17 | 18 | import time 19 | from typing import Any 20 | 21 | import openai 22 | import termcolor 23 | 24 | from language_to_reward_2023.platforms import llm_prompt 25 | 26 | 27 | def _open_ai_call_with_retry( 28 | model: str, messages: list[Any] 29 | ) -> openai.ChatCompletion: 30 | """Call OpenAI API with retry.""" 31 | reset_trigger_phrases = ["CHOICE", "NUM"] 32 | success = False 33 | completion = None 34 | while not success: 35 | try: 36 | completion = openai.ChatCompletion.create( 37 | model=model, 38 | messages=messages, 39 | temperature=0.3, 40 | ) 41 | success = True 42 | for reset_trigger_phrase in reset_trigger_phrases: 43 | if reset_trigger_phrase in completion.choices[0].message.content: 44 | success = False 45 | except Exception as e: # pylint: disable=broad-exception-caught 46 | print("OpenAI API call issue, re-trying..." + str(e) + "\n") 47 | time.sleep(5) 48 | return completion 49 | 50 | 51 | class Conversation: 52 | """Manages the state of a conversation with a language model.""" 53 | 54 | def __init__( 55 | self, 56 | prompt_model: llm_prompt.LLMPrompt, 57 | model: str, 58 | print_responses: bool = True, 59 | ): 60 | self._prompt_model = prompt_model 61 | self._model = model 62 | self._print_responses = print_responses 63 | number_of_llms = prompt_model.num_llms 64 | self._message_queues = [[] for _ in range(number_of_llms)] 65 | 66 | # Add general prompt to the message queue. 67 | for llm_id in range(number_of_llms): 68 | message = [{"role": "user", "content": prompt_model.prompts[llm_id]}] 69 | self._message_queues[llm_id].append(message[0]) 70 | 71 | def send_command(self, user_command: str) -> str: 72 | """Sends a user command to the LLMs, returns final processed response.""" 73 | if user_command == "reset": 74 | self.reset() 75 | print("Resetting the conversation history.") 76 | return "reset" 77 | upstream_message = user_command + " Make sure to ignore irrelevant options." 78 | for llm_id in range(self._prompt_model.num_llms): 79 | completion = _open_ai_call_with_retry( 80 | self._model, 81 | self._message_queues[llm_id] 82 | + [{"role": "user", "content": upstream_message}], 83 | ) 84 | if self._prompt_model.keep_message_history[llm_id]: 85 | self._message_queues[llm_id].append( 86 | {"role": "user", "content": upstream_message} 87 | ) 88 | self._message_queues[llm_id].append(completion.choices[0].message) 89 | print(f"LLM{llm_id} queried") 90 | response = completion.choices[0].message.content 91 | if self._print_responses: 92 | print(termcolor.colored(response + "\n", "cyan", attrs=["bold"])) 93 | try: 94 | upstream_message = self._prompt_model.response_processors[llm_id]( 95 | response 96 | ) 97 | except Exception: 98 | if self._prompt_model.keep_message_history[llm_id]: 99 | self._message_queues[llm_id].pop(-1) 100 | self._message_queues[llm_id].pop(-1) 101 | raise 102 | return upstream_message 103 | 104 | def reset(self) -> None: 105 | for i in range(self._prompt_model.num_llms): 106 | while len(self._message_queues[i]) > 1: 107 | self._message_queues[i].pop(-1) 108 | -------------------------------------------------------------------------------- /src/language_to_reward_2023/platforms/barkour/barkour_execution.py: -------------------------------------------------------------------------------- 1 | # Copyright 2023 DeepMind Technologies Limited 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | 16 | """Execute untrusted code to get task parameters for the Barkour MJPC task.""" 17 | 18 | import abc 19 | import json 20 | from typing import Any, Callable, Sequence 21 | 22 | from absl import logging 23 | from mujoco_mpc import mjpc_parameters 24 | 25 | from language_to_reward_2023 import safe_executor 26 | from language_to_reward_2023.platforms.barkour import barkour_l2r_tasks 27 | 28 | 29 | class BarkourSafeExecutor(metaclass=abc.ABCMeta): 30 | """Runs untrusted code to get task parameters for the Barkour MJPC task.""" 31 | 32 | def __init__(self, executor: safe_executor.SafeExecutor): 33 | self._executor = executor 34 | 35 | def execute(self, code: str) -> mjpc_parameters.MjpcParameters: 36 | """Executes untrusted code to get MJPC task parameters. 37 | 38 | See _CODE_TEMPLATE below for available functions. 39 | 40 | Arguments: 41 | code: code which edits the weights and params dicts. 42 | 43 | Raises: 44 | ValueError: the code doesn't compile or failed to run. 45 | 46 | Returns: 47 | MJPC task parameters for the Barkour robot. 48 | """ 49 | return self._execute_with_template(_CODE_TEMPLATE, code) 50 | 51 | def _execute_with_template( 52 | self, template: str, code: str 53 | ) -> mjpc_parameters.MjpcParameters: 54 | """Combines untrusted code with a template and executes it in a sandbox.""" 55 | combined_code = template.replace('INSERT_CODE_HERE', code) 56 | output = self._executor.safe_execute(combined_code) 57 | return _parse_output(output) 58 | 59 | 60 | def _parse_output(output: str) -> mjpc_parameters.MjpcParameters: 61 | """Parses the output from executing param generation code.""" 62 | try: 63 | overwriting_weights, overwriting_params = json.loads(output) 64 | except ValueError as e: 65 | raise ValueError(f'Invalid JSON output: {output}') from e 66 | 67 | weights, params = barkour_l2r_tasks.defaults() 68 | _overwrite_entries(weights, overwriting_weights, 'weight') 69 | _overwrite_entries(params, overwriting_params, 'param') 70 | return mjpc_parameters.MjpcParameters( 71 | cost_weights=weights, task_parameters=params 72 | ) 73 | 74 | 75 | def _overwrite_entries(base, overwrites, name_for_log) -> None: 76 | for key, value in overwrites.items(): 77 | if key not in base: 78 | logging.warning('Invalid %s: %s', name_for_log, key) 79 | continue 80 | try: 81 | base[key] = float(value) 82 | except ValueError: 83 | logging.warning('Invalid float value for %s: %s', key, value) 84 | 85 | 86 | def _generate_function_definitions( 87 | supported_functions: Sequence[Callable[..., Any]] 88 | ) -> str: 89 | """Generates function definitions for supported functions.""" 90 | definitions = [] 91 | for function in supported_functions: 92 | definitions.append(f""" 93 | def {function.__name__}(*args, **kwargs): 94 | global weights, params 95 | barkour_l2r_tasks.{function.__name__}(weights, params, *args, **kwargs) 96 | 97 | 98 | """) 99 | for dummy_function_name in _DUMMY_FUNCTIONS: 100 | definitions.append(f""" 101 | def {dummy_function_name}(*args, **kwargs): 102 | pass 103 | 104 | """) 105 | 106 | return ''.join(definitions) 107 | 108 | 109 | _SUPPORTED_FUNCTIONS = ( 110 | barkour_l2r_tasks.set_torso_targets, 111 | barkour_l2r_tasks.set_foot_pos_parameters, 112 | barkour_l2r_tasks.set_foot_stepping_parameters, 113 | barkour_l2r_tasks.set_target_joint_angles, 114 | barkour_l2r_tasks.head_towards, 115 | barkour_l2r_tasks.walk, 116 | ) 117 | 118 | _DUMMY_FUNCTIONS = ( 119 | 'reset_reward', 120 | 'execute_plan', 121 | ) 122 | 123 | 124 | _CODE_TEMPLATE = r""" 125 | from language_to_reward_2023.platforms.barkour import barkour_l2r_tasks 126 | 127 | weights, params = barkour_l2r_tasks.defaults() 128 | 129 | FUNCTION_DEFINITIONS 130 | 131 | INSERT_CODE_HERE 132 | 133 | import json 134 | print(json.JSONEncoder().encode((weights, params))) 135 | """.replace( 136 | 'FUNCTION_DEFINITIONS', _generate_function_definitions(_SUPPORTED_FUNCTIONS) 137 | ) 138 | -------------------------------------------------------------------------------- /src/language_to_reward_2023/platforms/barkour/barkour_execution_test.py: -------------------------------------------------------------------------------- 1 | # Copyright 2023 DeepMind Technologies Limited 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | 16 | import inspect 17 | 18 | from absl.testing import absltest 19 | from absl.testing import parameterized 20 | 21 | from language_to_reward_2023 import confirmation_safe_executor 22 | from language_to_reward_2023 import sandbox_safe_executor 23 | from language_to_reward_2023.platforms.barkour import barkour_execution 24 | from language_to_reward_2023.platforms.barkour import barkour_l2r_tasks 25 | 26 | 27 | def _make_confirmation_executor(): 28 | return barkour_execution.BarkourSafeExecutor( 29 | confirmation_safe_executor.ConfirmationSafeExecutor( 30 | skip_confirmation=True 31 | ) 32 | ) 33 | 34 | 35 | def _make_sandbox_executor(): 36 | return barkour_execution.BarkourSafeExecutor( 37 | sandbox_safe_executor.SandboxSafeExecutor() 38 | ) 39 | 40 | 41 | _EXECUTER_BUILDERS = [ 42 | (_make_confirmation_executor,), 43 | (_make_sandbox_executor,), 44 | ] 45 | 46 | 47 | class BarkourExecutionTest(parameterized.TestCase): 48 | 49 | @parameterized.parameters(_EXECUTER_BUILDERS) 50 | def test_empty_code_returns_default_weights(self, executor_builder): 51 | executor = executor_builder() 52 | params = executor.execute('') 53 | default_cost_weights, default_task_params = barkour_l2r_tasks.defaults() 54 | self.assertEqual(default_cost_weights, params.cost_weights) 55 | self.assertEqual(default_task_params, params.task_parameters) 56 | 57 | @parameterized.parameters(_EXECUTER_BUILDERS) 58 | def test_invalid_code_fails(self, executor_builder): 59 | executor = executor_builder() 60 | code = '(' 61 | self.assertRaises(ValueError, lambda: executor.execute(code)) 62 | 63 | @parameterized.parameters(_EXECUTER_BUILDERS) 64 | def test_throwing_code_fails(self, executor_builder): 65 | executor = executor_builder() 66 | code = 'raise Exception()' 67 | self.assertRaises(ValueError, lambda: executor.execute(code)) 68 | 69 | @parameterized.parameters(_EXECUTER_BUILDERS) 70 | def test_printing_code_fails(self, executor_builder): 71 | executor = executor_builder() 72 | code = 'print("hello")' 73 | self.assertRaises(ValueError, lambda: executor.execute(code)) 74 | 75 | @parameterized.parameters(_EXECUTER_BUILDERS) 76 | def test_can_import_numpy(self, executor_builder): 77 | executor = executor_builder() 78 | params = executor.execute('import numpy as np') 79 | default_cost_weights, default_task_params = barkour_l2r_tasks.defaults() 80 | self.assertEqual(default_cost_weights, params.cost_weights) 81 | self.assertEqual(default_task_params, params.task_parameters) 82 | 83 | @parameterized.parameters(_EXECUTER_BUILDERS) 84 | def test_invalid_parameters_ignored(self, executor_builder): 85 | executor = executor_builder() 86 | _, default_task_params = barkour_l2r_tasks.defaults() 87 | params = executor.execute('params["banana"] = 0.0') 88 | self.assertEqual(default_task_params, params.task_parameters) 89 | 90 | @parameterized.parameters(_EXECUTER_BUILDERS) 91 | def test_invalid_float_ignored(self, executor_builder): 92 | executor = executor_builder() 93 | _, default_task_params = barkour_l2r_tasks.defaults() 94 | params = executor.execute('params["TargetBodyHeight"] = "asdf"') 95 | self.assertEqual(default_task_params, params.task_parameters) 96 | 97 | @parameterized.parameters(_EXECUTER_BUILDERS) 98 | def test_set_torso_targets(self, executor_builder): 99 | executor = executor_builder() 100 | code = """ 101 | set_torso_targets(target_torso_height=0.5) 102 | """ 103 | params = executor.execute(code) 104 | self.assertEqual(0.5, params.task_parameters['TargetBodyHeight']) 105 | 106 | def test_all_supported_functions_are_valid(self): 107 | for f in barkour_execution._SUPPORTED_FUNCTIONS: 108 | args_names = list(inspect.signature(f).parameters.keys()) 109 | self.assertEqual( 110 | ['cost_weights', 'task_parameters'], 111 | args_names[:2], 112 | f"First 2 parameters of {f.__name__} should be ['cost_weights'," 113 | " 'task_parameters']", 114 | ) 115 | 116 | 117 | if __name__ == '__main__': 118 | absltest.main() 119 | -------------------------------------------------------------------------------- /src/language_to_reward_2023/platforms/barkour/barkour_l2r_task_client.py: -------------------------------------------------------------------------------- 1 | # Copyright 2023 DeepMind Technologies Limited 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | 16 | """Python client for interfacing with an MJPC binary running the Barkour task.""" 17 | 18 | import dataclasses 19 | import pathlib 20 | from typing import Optional, Sequence 21 | import mujoco 22 | from mujoco_mpc import agent as agent_lib 23 | import numpy as np 24 | from language_to_reward_2023 import task_clients 25 | from language_to_reward_2023.platforms.barkour import barkour_l2r_tasks 26 | 27 | 28 | _MODEL_PATH = str( 29 | pathlib.Path(__file__).parent.parent.parent 30 | / "mjpc" / "barkour" / "world.xml" 31 | ) 32 | 33 | 34 | @dataclasses.dataclass 35 | class BarkourState: 36 | """Current state of the Barkour robot from the MJPC binary.""" 37 | # Torso state (world space) 38 | torso_position: np.ndarray 39 | torso_linear_velocity: np.ndarray 40 | torso_orientation: np.ndarray 41 | torso_angular_velocity: np.ndarray 42 | 43 | # Foot space (local space) 44 | front_left_foot_position: np.ndarray 45 | front_right_foot_position: np.ndarray 46 | front_left_foot_velocity: np.ndarray 47 | front_right_foot_velocity: np.ndarray 48 | hind_left_foot_position: np.ndarray 49 | hind_right_foot_position: np.ndarray 50 | hind_left_foot_velocity: np.ndarray 51 | hind_right_foot_velocity: np.ndarray 52 | 53 | 54 | class BarkourClient(task_clients.AgentApiTaskClient): 55 | """Client for controlling MJPC when it's running the Barkour task.""" 56 | 57 | def __init__( 58 | self, 59 | ui: bool = False, 60 | agent: Optional[agent_lib.Agent] = None, 61 | test_params: Sequence[str] = (), # Do not use, test only. 62 | ): 63 | agent = agent or task_clients.create_agent( 64 | task_id='Barkour', 65 | ui=ui, 66 | real_time_speed=0.4, 67 | ) 68 | model = agent.model or mujoco.MjModel.from_xml_path(_MODEL_PATH) 69 | 70 | super().__init__( 71 | agent=agent, 72 | model=model, 73 | # normally, we run at 25% real time, and planning time is 50 to 80ms, 74 | # so 13ms planning delay should be fine, but for some reason performance 75 | # is not so good in this case, so we set a planning duration such that 76 | # planning happens every step. 77 | # TODO(nimrod): change this to 13ms. 78 | planning_duration=0.005, 79 | ) 80 | 81 | def reset(self): 82 | super().reset() 83 | self._reset_reward() 84 | 85 | def get_state(self) -> BarkourState: 86 | self.update_state() 87 | return BarkourState( 88 | torso_position=np.array(self._data.sensor('torso_subtreecom').data), 89 | torso_orientation=np.array(self._data.sensor('torso_subtreequat').data), 90 | torso_linear_velocity=np.array( 91 | self._data.sensor('torso_subtreelinvel').data 92 | ), 93 | torso_angular_velocity=np.array(self._data.sensor('torso_angvel').data), 94 | front_left_foot_position=np.array(self._data.sensor('FLpos').data), 95 | hind_left_foot_position=np.array(self._data.sensor('RLpos').data), 96 | front_right_foot_position=np.array(self._data.sensor('FRpos').data), 97 | hind_right_foot_position=np.array(self._data.sensor('RRpos').data), 98 | front_left_foot_velocity=np.array(self._data.sensor('FLvel').data), 99 | hind_left_foot_velocity=np.array(self._data.sensor('RLvel').data), 100 | front_right_foot_velocity=np.array(self._data.sensor('FRvel').data), 101 | hind_right_foot_velocity=np.array(self._data.sensor('RRvel').data), 102 | ) 103 | 104 | def _reset_reward(self): 105 | weights, params = barkour_l2r_tasks.defaults() 106 | self._agent.set_task_parameters(params) 107 | self._agent.set_cost_weights(weights) 108 | -------------------------------------------------------------------------------- /src/language_to_reward_2023/platforms/barkour/barkour_l2r_task_client_test.py: -------------------------------------------------------------------------------- 1 | # Copyright 2023 DeepMind Technologies Limited 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | 16 | """Tests for Barkour task client.""" 17 | 18 | from absl.testing import absltest 19 | 20 | from language_to_reward_2023.platforms.barkour import barkour_l2r_task_client 21 | 22 | 23 | class BarkourTaskClientTest(absltest.TestCase): 24 | 25 | def setUp(self): 26 | super().setUp() 27 | self.client = barkour_l2r_task_client.BarkourClient() 28 | 29 | def tearDown(self): 30 | self.client.close() 31 | super().tearDown() 32 | 33 | def test_creates_agent(self): 34 | self.assertIsNotNone(self.client.agent()) 35 | 36 | 37 | if __name__ == '__main__': 38 | absltest.main() 39 | -------------------------------------------------------------------------------- /src/language_to_reward_2023/platforms/barkour/barkour_l2r_tasks.py: -------------------------------------------------------------------------------- 1 | # Copyright 2023 DeepMind Technologies Limited 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | 16 | """Utilities for generating task parameters for the Barkour MJPC task.""" 17 | 18 | from typing import Literal 19 | 20 | from language_to_reward_2023.platforms.barkour import default_task_parameters 21 | 22 | FootName = Literal[ 23 | 'front_left', 24 | 'rear_left', 25 | 'front_right', 26 | 'rear_right', 27 | 'back_left', 28 | 'back_right', 29 | ] 30 | 31 | 32 | def defaults() -> tuple[dict[str, float], dict[str, float]]: 33 | """Default cost weights and task parameters for the Barkour MJPC task.""" 34 | weights, params = default_task_parameters.default_parameters_and_weights() 35 | return weights, params 36 | 37 | 38 | def set_torso_targets( 39 | cost_weights: dict[str, float], 40 | task_parameters: dict[str, float], 41 | target_torso_height: float | None = None, 42 | target_torso_pitch: float | None = None, 43 | target_torso_roll: float | None = None, 44 | target_torso_location_xy: tuple[float, float] | None = None, 45 | target_torso_velocity_xy: tuple[float, float] | None = None, 46 | target_torso_heading: float | None = None, 47 | target_turning_speed: float | None = None, 48 | ): 49 | """Updates cost_weights and task_parameters to specify torso targets. 50 | 51 | Arguments: 52 | cost_weights: a dict of weight values which will be mutated. 53 | task_parameters: a dict of task parameters which will be mutated. 54 | target_torso_height: how high the torso should be in meters. 55 | target_torso_pitch: pitch from a horizontal pose in radians. A positive 56 | number means robot is looking up. 57 | target_torso_roll: roll angle in radians, in clockwise direction 58 | target_torso_location_xy: a tuple containing target global coordinates in 59 | meters. 60 | target_torso_velocity_xy: target velocity in robot coordinates, where x is 61 | forward and y is right. 62 | target_torso_heading: desired global heading direction in radians. 63 | target_turning_speed: the desired turning speed of the torso in radians per 64 | second. 65 | """ 66 | if target_torso_height is not None: 67 | task_parameters.update({ 68 | 'TargetBodyHeight': target_torso_height, 69 | }) 70 | cost_weights.update({ 71 | 'BodyHeight': 0.5, 72 | }) 73 | 74 | if target_torso_pitch is not None: 75 | task_parameters.update({ 76 | 'TargetPitch': -target_torso_pitch, 77 | }) 78 | cost_weights.update({ 79 | 'Pitch': 0.6, 80 | }) 81 | 82 | if target_torso_roll is not None: 83 | # Note: In Language2Reward paper, this was added to the current roll of the 84 | # robot. 85 | task_parameters.update({ 86 | 'TargetRoll': target_torso_roll, 87 | }) 88 | cost_weights.update({ 89 | 'Roll': 1.0, 90 | }) 91 | 92 | if target_torso_location_xy is not None: 93 | task_parameters.update({ 94 | 'GoalPositionX': target_torso_location_xy[0], 95 | 'GoalPositionY': target_torso_location_xy[1], 96 | }) 97 | cost_weights.update( 98 | {'BodyXYPos': 0.6, 'Forward_Velocity': 0.0, 'Turning_Velocity': 0.0} 99 | ) 100 | else: 101 | cost_weights.update({'BodyXYPos': 0.0}) 102 | 103 | if target_torso_velocity_xy is not None: 104 | task_parameters.update({ 105 | 'TargetForwardVelocity': target_torso_velocity_xy[0], 106 | 'TargetSidewaysVelocity': target_torso_velocity_xy[1], 107 | }) 108 | cost_weights.update({ 109 | 'BodyXYPos': 0.0, 110 | 'Forward_Velocity': 0.1, 111 | 'Sideways_Velocity': 0.1, 112 | 'Turning_Velocity': 0.1, 113 | }) 114 | else: 115 | cost_weights.update({ 116 | 'Forward_Velocity': 0.0, 117 | 'Sideways_Velocity': 0.0, 118 | 'Turning_Velocity': 0.0, 119 | }) 120 | if target_torso_heading is not None: 121 | task_parameters.update({'TargetHeading': target_torso_heading}) 122 | cost_weights.update({'Heading': 0.6, 'Turning_Velocity': 0.0}) 123 | else: 124 | cost_weights.update({'Heading': 0.0}) 125 | if target_turning_speed is not None: 126 | task_parameters.update({'TargetTurningVelocity': target_turning_speed}) 127 | cost_weights.update({'Heading': 0.0, 'Turning_Velocity': 0.6}) 128 | else: 129 | cost_weights.update({'Turning_Velocity': 0.0}) 130 | 131 | 132 | def set_foot_pos_parameters( 133 | cost_weights: dict[str, float], 134 | task_parameters: dict[str, float], 135 | foot_name: FootName, 136 | lift_height: float | None = None, 137 | extend_forward: float | None = None, 138 | move_inward: float | None = None, 139 | ): 140 | """Updates cost_weights and task_parameters to set the position of a foot.""" 141 | foot_code_name = _FOOT_TO_CODENAME[foot_name] 142 | if lift_height is not None: 143 | balance = lift_height <= 0.0 144 | task_parameters.update({ 145 | f'{foot_code_name}ZDistFromNominal': lift_height, 146 | f'{foot_code_name}Balance': balance, 147 | }) 148 | cost_weights.update({f'{foot_code_name}_upward': 1.0}) 149 | 150 | if extend_forward is not None: 151 | task_parameters.update( 152 | {f'{foot_code_name}XDistFromNominal': extend_forward} 153 | ) 154 | cost_weights.update({f'{foot_code_name}_forward': 0.5}) 155 | 156 | if move_inward is not None: 157 | task_parameters.update({f'{foot_code_name}YDistFromNominal': -move_inward}) 158 | cost_weights.update({f'{foot_code_name}_inward': 0.5}) 159 | 160 | 161 | def set_foot_stepping_parameters( 162 | cost_weights: dict[str, float], 163 | task_parameters: dict[str, float], 164 | foot_name: FootName, 165 | stepping_frequency: float, 166 | air_ratio: float, 167 | phase_offset: float, 168 | swing_up_down: float, 169 | swing_forward_back: float, 170 | should_activate: bool, 171 | ): 172 | """Updates cost_weights and task_parameters to set gait parameters.""" 173 | foot_code_name = _FOOT_TO_CODENAME[foot_name] 174 | if should_activate: 175 | task_parameters.update({ 176 | f'{foot_code_name}SteppingFrequency': stepping_frequency, 177 | f'{foot_code_name}GroundToAirRatio': 1 - air_ratio, 178 | f'{foot_code_name}PhaseOffset': phase_offset, 179 | f'{foot_code_name}AmplitudesVertical': swing_up_down, 180 | f'{foot_code_name}AmplitudesForward': swing_forward_back, 181 | }) 182 | cost_weights.update({ 183 | f'{foot_code_name}_stepping_vertical': 1.0, 184 | f'{foot_code_name}_stepping_horizontal': 0.2, 185 | }) 186 | else: 187 | cost_weights.update({ 188 | f'{foot_code_name}_stepping_vertical': 0.0, 189 | f'{foot_code_name}_stepping_horizontal': 0.0, 190 | }) 191 | 192 | 193 | def set_target_joint_angles( 194 | cost_weights: dict[str, float], 195 | task_parameters: dict[str, float], 196 | leg_name: FootName, 197 | target_joint_angles, 198 | ): 199 | leg_id = _FOOT_TO_ID[leg_name] 200 | for i in range(3): 201 | task_parameters.update({f'q{leg_id*3+i+1}': target_joint_angles[i]}) 202 | cost_weights.update({'PoseTracking': 1.0}) 203 | 204 | 205 | def head_towards( 206 | cost_weights: dict[str, float], 207 | task_parameters: dict[str, float], 208 | heading_direction: float, 209 | ): 210 | """Sets a global heading direction.""" 211 | task_parameters['TargetHeading'] = heading_direction 212 | cost_weights['Heading'] = 0.3 213 | 214 | 215 | def walk( 216 | cost_weights: dict[str, float], 217 | task_parameters: dict[str, float], 218 | forward_speed: float, 219 | sideways_speed: float, 220 | turning_speed: float, 221 | ): 222 | """Sets target velocities for the torso.""" 223 | task_parameters.update({ 224 | 'TargetForwardVelocity': forward_speed, 225 | 'TargetSidewaysVelocity': sideways_speed, 226 | 'TargetTurningVelocity': turning_speed, 227 | }) 228 | cost_weights.update({ 229 | 'BodyXYPos': 0.0, 230 | 'Forward_Velocity': 0.1, 231 | 'Sideways_Velocity': 0.1, 232 | 'Turning_Velocity': 0.3, 233 | }) 234 | 235 | 236 | _FOOT_TO_CODENAME = { 237 | 'front_left': 'FL', 238 | 'rear_left': 'HL', 239 | 'front_right': 'FR', 240 | 'rear_right': 'HR', 241 | 'back_left': 'HL', 242 | 'back_right': 'HR', 243 | } 244 | _FOOT_TO_ID = { 245 | 'front_left': 0, 246 | 'rear_left': 1, 247 | 'front_right': 2, 248 | 'rear_right': 3, 249 | 'back_left': 1, 250 | 'back_right': 3, 251 | } 252 | -------------------------------------------------------------------------------- /src/language_to_reward_2023/platforms/barkour/barkour_l2r_tasks_test.py: -------------------------------------------------------------------------------- 1 | # Copyright 2023 DeepMind Technologies Limited 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | 16 | from absl.testing import absltest 17 | from absl.testing import parameterized 18 | from mujoco_mpc import agent as mjpc_agent 19 | import numpy as np 20 | 21 | from language_to_reward_2023.platforms.barkour import barkour_l2r_task_client 22 | from language_to_reward_2023.platforms.barkour import barkour_l2r_tasks 23 | 24 | _AGENT = None 25 | 26 | 27 | def setUpModule(): 28 | global _AGENT 29 | _AGENT = barkour_l2r_task_client.BarkourClient().agent() 30 | 31 | 32 | def tearDownModule(): 33 | global _AGENT 34 | _AGENT.close() 35 | del _AGENT 36 | 37 | 38 | class BarkourTasksTest(parameterized.TestCase): 39 | 40 | def test_defaults(self): 41 | weights, params = barkour_l2r_tasks.defaults() 42 | self.assertEqual(0, weights["FL_forward"]) 43 | self.assertEqual(0, params["GoalPositionX"]) 44 | self.assertAlmostEqual(0.292, params["TargetBodyHeight"]) 45 | 46 | def test_set_torso_targets_location_xy(self): 47 | """Tests that setting torso targets results in the correct behavior.""" 48 | weights, params = barkour_l2r_tasks.defaults() 49 | target = (1.0, 2.0) 50 | barkour_l2r_tasks.set_torso_targets( 51 | weights, params, target_torso_location_xy=target 52 | ) 53 | state = self.run_agent( 54 | _AGENT, 55 | weights, 56 | params, 57 | until=lambda state: _distance(state.qpos[:2], target) < 0.2, 58 | ) 59 | self.assertLessEqual(_distance(state.qpos[:2], target), 0.2) 60 | 61 | def run_agent( 62 | self, 63 | agent: mjpc_agent.Agent, 64 | weights, 65 | params, 66 | until=lambda x: False, 67 | max_time=4.0, 68 | ): 69 | agent.reset() 70 | agent.set_cost_weights(weights) 71 | agent.set_task_parameters(params) 72 | state = agent.get_state() 73 | while state.time < max_time and not until(state): 74 | agent.planner_step() 75 | for _ in range(10): 76 | agent.step() 77 | state = agent.get_state() 78 | return state 79 | 80 | 81 | def _distance(a, b): 82 | return np.linalg.norm(np.array(a) - np.array(b)) 83 | 84 | 85 | if __name__ == "__main__": 86 | absltest.main() 87 | -------------------------------------------------------------------------------- /src/language_to_reward_2023/platforms/barkour/default_task_parameters.py: -------------------------------------------------------------------------------- 1 | # Copyright 2023 DeepMind Technologies Limited 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | 16 | """Parameters matching the default values in barkour_modeless.cc.""" 17 | 18 | 19 | def default_parameters_and_weights() -> ( 20 | tuple[dict[str, float], dict[str, float]] 21 | ): 22 | """Returns parameters matching the default values in barkour_modeless.cc.""" 23 | weights = { 24 | "Balance": 0.2, 25 | "BodyHeight": 0.0, 26 | "BodyXYPos": 0.0, 27 | "Effort": 0.0, 28 | "FL_forward": 0.0, 29 | "FL_inward": 0.0, 30 | "FL_stepping_horizontal": 0.0, 31 | "FL_stepping_vertical": 0.0, 32 | "FL_upward": 0.0, 33 | "FR_forward": 0.0, 34 | "FR_inward": 0.0, 35 | "FR_stepping_horizontal": 0.0, 36 | "FR_stepping_vertical": 0.0, 37 | "FR_upward": 0.0, 38 | "Forward_Velocity": 0.0, 39 | "HL_forward": 0.0, 40 | "HL_inward": 0.0, 41 | "HL_stepping_horizontal": 0.0, 42 | "HL_stepping_vertical": 0.0, 43 | "HL_upward": 0.0, 44 | "HR_forward": 0.0, 45 | "HR_inward": 0.0, 46 | "HR_stepping_horizontal": 0.0, 47 | "HR_stepping_vertical": 0.0, 48 | "HR_upward": 0.0, 49 | "Heading": 0.0, 50 | "Pitch": 0.0, 51 | "PoseTracking": 0.0, 52 | "Posture": 0.01, 53 | "Roll": 0.0, 54 | "Roll_Velocity": 0.0, 55 | "Sideways_Velocity": 0.0, 56 | "Turning_Velocity": 0.0, 57 | "Upward_Velocity": 0.0, 58 | "act_dot": 2e-05, 59 | } 60 | params = { 61 | "FLAmplitudesForward": 0.0, 62 | "FLAmplitudesVertical": 0.0, 63 | "FLBalance": 1.0, 64 | "FLGroundToAirRatio": 0.0, 65 | "FLPhaseOffset": 0.0, 66 | "FLSteppingFrequency": 0.0, 67 | "FLXDistFromNominal": 0.0, 68 | "FLYDistFromNominal": 0.0, 69 | "FLZDistFromNominal": 0.0, 70 | "FRAmplitudesForward": 0.0, 71 | "FRAmplitudesVertical": 0.0, 72 | "FRBalance": 1.0, 73 | "FRGroundToAirRatio": 0.0, 74 | "FRPhaseOffset": 0.0, 75 | "FRSteppingFrequency": 0.0, 76 | "FRXDistFromNominal": 0.0, 77 | "FRYDistFromNominal": 0.0, 78 | "FRZDistFromNominal": 0.0, 79 | "GoalPositionX": 0.0, 80 | "GoalPositionY": 0.0, 81 | "HLAmplitudesForward": 0.0, 82 | "HLAmplitudesVertical": 0.0, 83 | "HLBalance": 1.0, 84 | "HLGroundToAirRatio": 0.0, 85 | "HLPhaseOffset": 0.0, 86 | "HLSteppingFrequency": 0.0, 87 | "HLXDistFromNominal": 0.0, 88 | "HLYDistFromNominal": 0.0, 89 | "HLZDistFromNominal": 0.0, 90 | "HRAmplitudesForward": 0.0, 91 | "HRAmplitudesVertical": 0.0, 92 | "HRBalance": 1.0, 93 | "HRGroundToAirRatio": 0.0, 94 | "HRPhaseOffset": 0.0, 95 | "HRSteppingFrequency": 0.0, 96 | "HRXDistFromNominal": 0.0, 97 | "HRYDistFromNominal": 0.0, 98 | "HRZDistFromNominal": 0.0, 99 | "TargetBodyHeight": 0.292, 100 | "TargetForwardVelocity": 0.0, 101 | "TargetHeading": 0.0, 102 | "TargetPitch": 0.0, 103 | "TargetRoll": 0.0, 104 | "TargetRollVelocity": 0.0, 105 | "TargetSidewaysVelocity": 0.0, 106 | "TargetTurningVelocity": 0.0, 107 | "TargetUpwardVelocity": 0.0, 108 | "q1": 0.0, 109 | "q10": 0.0, 110 | "q11": 0.0, 111 | "q12": 0.0, 112 | "q2": 0.0, 113 | "q3": 0.0, 114 | "q4": 0.0, 115 | "q5": 0.0, 116 | "q6": 0.0, 117 | "q7": 0.0, 118 | "q8": 0.0, 119 | "q9": 0.0, 120 | } 121 | return weights, params 122 | -------------------------------------------------------------------------------- /src/language_to_reward_2023/platforms/barkour/prompts/prompt_coder_only.py: -------------------------------------------------------------------------------- 1 | # Copyright 2023 DeepMind Technologies Limited 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | 16 | """Prompt class with Reward Coder only.""" 17 | 18 | from language_to_reward_2023 import safe_executor 19 | from language_to_reward_2023.platforms import llm_prompt 20 | from language_to_reward_2023.platforms import process_code 21 | from language_to_reward_2023.platforms.barkour import barkour_execution 22 | from language_to_reward_2023.platforms.barkour import barkour_l2r_task_client 23 | 24 | prompt = """ 25 | We have a quadruped robot and we can use the following functions to program its behavior: 26 | ``` 27 | def set_torso_targets(target_torso_height, target_torso_pitch, target_torso_roll, target_torso_location_xy, target_torso_velocity_xy, target_torso_heading, target_turning_speed) 28 | ``` 29 | target_torso_height: how high the torso wants to reach. When the robot is standing on all four feet in a normal standing pose, the torso is about 0.3m high. 30 | target_torso_pitch: How much the torso should tilt up from a horizontal pose in radians. A positive number means robot is looking up, e.g. if the angle is 0.5*pi the robot will be looking upward, if the angel is 0, then robot will be looking forward. 31 | target_torso_roll: How much the torso should roll in clockwise direction in radians. Zero means the torso is flat. 32 | target_torso_velocity_xy: target torso moving velocity in local space, x is forward velocity, y is sideways velocity (positive means left). 33 | target_torso_heading: the desired direction that the robot should face towards. The value of target_torso_heading is in the range of 0 to 2*pi, where 0 and 2*pi both mean East, pi being West, etc. 34 | Remember: 35 | one of target_torso_location_xy and target_torso_velocity_xy must be None. 36 | one of target_torso_heading and target_turning_speed must be None. 37 | 38 | ``` 39 | def set_foot_pos_parameters(foot_name, lift_height, extend_forward, move_inward) 40 | ``` 41 | foot_name is one of ('front_left', 'back_left', 'front_right', 'back_right'). 42 | lift_height: how high should the foot be lifted in the air. If is None, disable this term. If it's set to 0, the foot will touch the ground. 43 | extend_forward: how much should the foot extend forward. If is None, disable this term. 44 | move_inward: how much should the foot move inward. If is None, disable this term. 45 | 46 | ``` 47 | def execute_plan(plan_duration=10) 48 | ``` 49 | This function sends the parameters to the robot and execute the plan for `plan_duration` seconds, default to be 2 50 | 51 | Below are examples for generating a reward function in python for the robot given a desired task. 52 | 53 | Example 1: Make the robot go to (1, 0), facing South, and lift the back right foot. 54 | Code: 55 | set_torso_targets(0.26, 0.0, 0.0, (1, 0), None, 1.5*np.pi, None) 56 | set_foot_pos_parameters('back_right', 0.1, None, None) 57 | execute_plan(3) 58 | Explanation: 59 | The goal location should be (1, 0). South is 1.5*pi so heading should be this. 60 | 61 | Remember: 62 | 1. Always start your response with [start analysis]. Provide your analysis of the problem within 100 words, then end it with [end analysis]. 63 | 2. After analysis, start your code response, format the code in code blocks. In your response all four functions above: set_torso_targets, set_foot_pos_parameters, execute_plan, should be called at least once. 64 | 3. Do not invent new functions or classes. The only allowed functions you can call are the ones listed above. Do not leave unimplemented code blocks in your response. 65 | 4. The only allowed library is numpy. Do not import or use any other library. If you use np, be sure to import numpy. 66 | 5. If you are not sure what value to use, just use your best judge. Do not use None for anything. 67 | 6. Do not calculate the position or direction of any object (except for the ones provided above). Just use a number directly based on your best guess. 68 | 7. For set_torso_targets, only the last four arguments (target_torso_location_xy, target_torso_velocity_xy, target_torso_heading, target_turning_speed) can be None. Do not set None for any other arguments. 69 | 8. Don't forget to call execute_plan at the end. 70 | 71 | """ 72 | 73 | feet2id = {"front_left": 0, "rear_left": 1, "front_right": 2, "rear_right": 3} 74 | 75 | 76 | class PromptCoder(llm_prompt.LLMPrompt): 77 | """Prompt with Reward Coder only.""" 78 | 79 | def __init__( 80 | self, 81 | client: barkour_l2r_task_client.BarkourClient, 82 | executor: safe_executor.SafeExecutor, 83 | ): 84 | self._agent = client.agent() 85 | self._safe_executor = barkour_execution.BarkourSafeExecutor(executor) 86 | 87 | self.name = "Language2Reward" 88 | self.num_llms = 1 89 | self.prompts = [prompt] 90 | self.keep_message_history = [True] 91 | self.response_processors = [self.process_llm_response] 92 | self.code_executor = self.execute_code 93 | 94 | def execute_code(self, code: str) -> None: 95 | print("ABOUT TO EXECUTE") 96 | print(code) 97 | 98 | mjpc_parameters = self._safe_executor.execute(code) 99 | self._agent.set_task_parameters(mjpc_parameters.task_parameters) 100 | self._agent.set_cost_weights(mjpc_parameters.cost_weights) 101 | 102 | def process_llm_response(self, response): 103 | """Process the response from coder, the output will be the python code.""" 104 | return process_code.process_code_block(response) 105 | -------------------------------------------------------------------------------- /src/language_to_reward_2023/platforms/barkour/prompts/prompt_low_level.py: -------------------------------------------------------------------------------- 1 | # Copyright 2023 DeepMind Technologies Limited 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | 16 | """Prompt for low level baseline.""" 17 | 18 | from language_to_reward_2023 import safe_executor 19 | from language_to_reward_2023.platforms import llm_prompt 20 | from language_to_reward_2023.platforms import process_code 21 | from language_to_reward_2023.platforms.barkour import barkour_execution 22 | from language_to_reward_2023.platforms.barkour import barkour_l2r_task_client 23 | 24 | prompt = """ 25 | We have a quadruped robot. It has 12 joints in total, three for each leg. 26 | We can use the following functions to control its movements: 27 | 28 | ``` 29 | def set_target_joint_angles(leg_name, target_joint_angles) 30 | ``` 31 | leg_name is one of ('front_left', 'back_left', 'front_right', 'back_right'). 32 | target_joint_angles: a 3D vector that describes the target angle for the abduction/adduction, hip, and knee joint of the each leg. 33 | 34 | ``` 35 | def walk(forward_speed, sideways_speed, turning_speed) 36 | ``` 37 | forward_speed: how fast the robot should walk forward 38 | sideways_speed: how fast the robot should walk sideways 39 | turning_speed: how fast the robot should be turning (positive means turning right) 40 | 41 | ``` 42 | def head_towards(heading_direction) 43 | ``` 44 | heading_direction: target heading for the robot to reach, in the range of 0 to 2*pi, where 0 means East, 0.5pi means North, pi means West, and 1.5pi means South. 45 | 46 | ``` 47 | def execute_plan(plan_duration=10) 48 | ``` 49 | This function sends the parameters to the robot and execute the plan for `plan_duration` seconds, default to be 2 50 | 51 | 52 | Details about joint angles of each leg: 53 | abduction/adduction joint controls the upper leg to swinging inward/outward. 54 | When it's positive, legs will swing outward (swing to the right for right legs and left for left legs). 55 | When it's negative, legs will swing inward. 56 | 57 | hip joint controls the upper leg to rotate around the shoulder. 58 | When it's zero, the upper leg is parallel to the torso (hip is same height as shoulder), pointing backward. 59 | When it's positive, the upper leg rotates downward so the knee is below the shoulder. When it's 0.5pi, it's perpendicular to the torso, pointing downward. 60 | When it's negative, the upper leg rotates upward so the knee is higher than the shoulder. 61 | 62 | knee joint controls the lower leg to rotate around the knee. 63 | When it's zero, the lower leg is folded closer to the upper leg. 64 | knee joint angle can only be positive. When it's 0.5pi, the lower leg is perpendicular to the upper leg. When it's pi, the lower leg is fully streching out and parallel to the upper leg. 65 | 66 | Here are a few examples for setting the joint angles to make the robot reach a few key poses: 67 | standing on all four feet: 68 | ``` 69 | set_target_joint_angles("front_left", [0, 1, 1.5]) 70 | set_target_joint_angles("back_left", [0, 0.75, 1.5]) 71 | set_target_joint_angles("front_right", [0, 1, 1.5]) 72 | set_target_joint_angles("back_right", [0, 0.75, 1.5]) 73 | execute_plan() 74 | ``` 75 | 76 | sit down on the floor: 77 | ``` 78 | set_target_joint_angles("front_left", [0, 0, 0]) 79 | set_target_joint_angles("back_left", [0, 0, 0]) 80 | set_target_joint_angles("front_right", [0, 0, 0]) 81 | set_target_joint_angles("back_right", [0, 0, 0]) 82 | execute_plan() 83 | ``` 84 | 85 | lift front left foot: 86 | ``` 87 | set_target_joint_angles("front_left", [0, 0.45, 0.35]) 88 | set_target_joint_angles("back_left", [0, 1, 1.5]) 89 | set_target_joint_angles("front_right", [0, 1.4, 1.5]) 90 | set_target_joint_angles("back_right", [0, 1, 1.5]) 91 | execute_plan() 92 | ``` 93 | 94 | lift back left foot: 95 | ``` 96 | set_target_joint_angles("front_left", [0, 0.5, 1.5]) 97 | set_target_joint_angles("back_left", [0, 0.45, 0.35]) 98 | set_target_joint_angles("front_right", [0, 0.5, 1.5]) 99 | set_target_joint_angles("back_right", [0, 0.5, 1.5]) 100 | execute_plan() 101 | ``` 102 | 103 | 104 | Remember: 105 | 1. Always start your response with [start analysis]. Provide your analysis of the problem within 100 words, then end it with [end analysis]. 106 | 2. After analysis, start your code response, format the code in code blocks. 107 | 3. Do not invent new functions or classes. The only allowed functions you can call are the ones listed above. Do not leave unimplemented code blocks in your response. 108 | 4. The only allowed library is numpy. Do not import or use any other library. If you use np, be sure to import numpy. 109 | 5. If you are not sure what value to use, just use your best judge. Do not use None for anything. 110 | 6. Do not calculate the position or direction of any object (except for the ones provided above). Just use a number directly based on your best guess. 111 | 7. Write the code as concisely as possible and try not to define additional variables. 112 | 8. If you define a new function for the skill, be sure to call it somewhere. 113 | 9. Be sure to call execute_plan at the end. 114 | 115 | """ 116 | 117 | feet2id = {"front_left": 0, "rear_left": 1, "front_right": 2, "rear_right": 3} 118 | 119 | 120 | class PromptLowLevel(llm_prompt.LLMPrompt): 121 | """Prompt for low level baseline.""" 122 | 123 | def __init__( 124 | self, 125 | client: barkour_l2r_task_client.BarkourClient, 126 | executor: safe_executor.SafeExecutor, 127 | ): 128 | self._agent = client.agent() 129 | self._safe_executor = barkour_execution.BarkourSafeExecutor(executor) 130 | 131 | self.name = "Language2Reward low level baseline" 132 | self.num_llms = 1 133 | self.prompts = [prompt] 134 | self.keep_message_history = [True] 135 | self.response_processors = [self.process_llm_response] 136 | self.code_executor = self.execute_code 137 | 138 | def execute_code(self, code: str) -> None: 139 | print("ABOUT TO EXECUTE") 140 | print(code) 141 | 142 | mjpc_parameters = self._safe_executor.execute(code) 143 | self._agent.set_task_parameters(mjpc_parameters.task_parameters) 144 | self._agent.set_cost_weights(mjpc_parameters.cost_weights) 145 | 146 | def process_llm_response(self, response): 147 | """Process the response from LLM, the output will be the python code.""" 148 | return process_code.process_code_block(response) 149 | -------------------------------------------------------------------------------- /src/language_to_reward_2023/platforms/barkour/prompts/prompt_thinker_coder.py: -------------------------------------------------------------------------------- 1 | # Copyright 2023 DeepMind Technologies Limited 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | 16 | """Prompt class with both Motion Descriptor and Reward Coder.""" 17 | import re 18 | 19 | from language_to_reward_2023 import safe_executor 20 | from language_to_reward_2023.platforms import llm_prompt 21 | from language_to_reward_2023.platforms import process_code 22 | from language_to_reward_2023.platforms.barkour import barkour_execution 23 | from language_to_reward_2023.platforms.barkour import barkour_l2r_task_client 24 | 25 | prompt_thinker = """ 26 | Describe the motion of a dog robot using the following form: 27 | 28 | [start of description] 29 | * This {CHOICE: [is, is not]} a new task. 30 | * The torso of the robot should roll by [NUM: 0.0] degrees towards right, the torso should pitch upward at [NUM: 0.0] degrees. 31 | * The height of the robot's CoM or torso center should be at [NUM: 0.3] meters. 32 | * The robot should {CHOICE: [face certain direction, turn at certain speed]}. If facing certain direction, it should be facing {CHOICE: [east, south, north, west]}. If turning, it should turn at [NUM: 0.0] degrees/s. 33 | * The robot should {CHOICE: [go to a certain location, move at certain speed]}. If going to certain location, it should go to (x=[NUM: 0.0], y=[NUM: 0.0]). If moving at certain speed, it should move forward at [NUM: 0.0]m/s and sideways at [NUM: 0.0]m/s (positive means left). 34 | * [optional] front_left foot lifted to [NUM: 0.0] meters high. 35 | * [optional] back_left foot lifted to [NUM: 0.0] meters high. 36 | * [optional] front_right foot lifted to [NUM: 0.0] meters high. 37 | * [optional] back_right foot lifted to [NUM: 0.0] meters high. 38 | * [optional] front_left foot extend forward by [NUM: 0.0] meters. 39 | * [optional] back_left foot extend forward by [NUM: 0.0] meters. 40 | * [optional] front_right foot extend forward by [NUM: 0.0] meters. 41 | * [optional] back_right foot extend forward by [NUM: 0.0] meters. 42 | * [optional] front_left foot shifts inward laterally by [NUM: 0.0] meters. 43 | * [optional] back_left foot shifts inward laterally by [NUM: 0.0] meters. 44 | * [optional] front_right foot shifts inward laterally by [NUM: 0.0] meters. 45 | * [optional] back_right foot shifts inward laterally by [NUM: 0.0] meters. 46 | * [optional] front_left foot steps on the ground at a frequency of [NUM: 0.0] Hz, during the stepping motion, the foot will move [NUM: 0.0] meters up and down, and [NUM: 0.0] meters forward and back, drawing a circle as if it's walking {CHOICE: forward, back}, spending [NUM: 0.0] portion of the time in the air vs gait cycle. 47 | * [optional] back_left foot steps on the ground at a frequency of [NUM: 0.0] Hz, during the stepping motion, the foot will move [NUM: 0.0] meters up and down, and [NUM: 0.0] meters forward and back, drawing a circle as if it's walking {CHOICE: forward, back}, spending [NUM: 0.0] portion of the time in the air vs gait cycle. 48 | * [optional] front_right foot steps on the ground at a frequency of [NUM: 0.0] Hz, during the stepping motion, the foot will move [NUM: 0.0] meters up and down, and [NUM: 0.0] meters forward and back, drawing a circle as if it's walking {CHOICE: forward, back}, spending [NUM: 0.0] portion of the time in the air vs gait cycle. 49 | * [optional] back_right foot steps on the ground at a frequency of [NUM: 0.0] Hz, during the stepping motion, the foot will move [NUM: 0.0] meters up and down, and [NUM: 0.0] meters forward and back, drawing a circle as if it's walking {CHOICE: forward, back}, spending [NUM: 0.0] portion of the time in the air vs gait cycle. 50 | * [optional] The phase offsets for the four legs should be front_left: [NUM: 0.0], back_left: [NUM: 0.0], front_right: [NUM: 0.0], back_right: [NUM: 0.0]. 51 | 52 | [end of description] 53 | 54 | Rules: 55 | 1. If you see phrases like [NUM: default_value], replace the entire phrase with a numerical value. If you see [PNUM: default_value], replace it with a positive, non-zero numerical value. 56 | 2. If you see phrases like {CHOICE: [choice1, choice2, ...]}, it means you should replace the entire phrase with one of the choices listed. Be sure to replace all of them. If you are not sure about the value, just use your best judgement. 57 | 3. Phase offset is between [0, 1]. So if two legs' phase offset differs by 0 or 1 they are moving in synchronous. If they have phase offset difference of 0.5, they are moving opposite in the gait cycle. 58 | 4. The portion of air vs the gait cycle is between [0, 1]. So if it's 0, it means the foot will always stay on the ground, and if it's 1 it means the foot will always be in the air. 59 | 5. I will tell you a behavior/skill/task that I want the quadruped to perform and you will provide the full description of the quadruped motion, even if you may only need to change a few lines. Always start the description with [start of description] and end it with [end of description]. 60 | 6. We can assume that the robot has a good low-level controller that maintains balance and stability as long as it's in a reasonable pose. 61 | 7. You can assume that the robot is capable of doing anything, even for the most challenging task. 62 | 8. The robot is about 0.3m high in CoM or torso center when it's standing on all four feet with horizontal body. It's about 0.65m high when it stand upright on two feet with vertical body. When the robot's torso/body is flat and parallel to the ground, the pitch and roll angles are both 0. 63 | 9. Holding a foot 0.0m in the air is the same as saying it should maintain contact with the ground. 64 | 10. Do not add additional descriptions not shown above. Only use the bullet points given in the template. 65 | 11. If a bullet point is marked [optional], do NOT add it unless it's absolutely needed. 66 | 12. Use as few bullet points as possible. Be concise. 67 | 68 | """ 69 | 70 | prompt_coder = """ 71 | We have a description of a robot's motion and we want you to turn that into the corresponding program with following functions: 72 | ``` 73 | def set_torso_targets(target_torso_height, target_torso_pitch, target_torso_roll, target_torso_location_xy, target_torso_velocity_xy, target_torso_heading, target_turning_speed) 74 | ``` 75 | target_torso_height: how high the torso wants to reach. When the robot is standing on all four feet in a normal standing pose, the torso is about 0.3m high. 76 | target_torso_pitch: How much the torso should tilt up from a horizontal pose in radians. A positive number means robot is looking up, e.g. if the angle is 0.5*pi the robot will be looking upward, if the angel is 0, then robot will be looking forward. 77 | target_torso_velocity_xy: target torso moving velocity in local space, x is forward velocity, y is sideways velocity (positive means left). 78 | target_torso_heading: the desired direction that the robot should face towards. The value of target_torso_heading is in the range of 0 to 2*pi, where 0 and 2*pi both mean East, pi being West, etc. 79 | target_turning_speed: the desired turning speed of the torso in radians per second. 80 | Remember: 81 | one of target_torso_location_xy and target_torso_velocity_xy must be None. 82 | one of target_torso_heading and target_turning_speed must be None. 83 | No other inputs can be None. 84 | 85 | ``` 86 | def set_foot_pos_parameters(foot_name, lift_height, extend_forward, move_inward) 87 | ``` 88 | foot_name is one of ('front_left', 'back_left', 'front_right', 'back_right'). 89 | lift_height: how high should the foot be lifted in the air. If is None, disable this term. If it's set to 0, the foot will touch the ground. 90 | extend_forward: how much should the foot extend forward. If is None, disable this term. 91 | move_inward: how much should the foot move inward. If is None, disable this term. 92 | 93 | ``` 94 | def set_foot_stepping_parameters(foot_name, stepping_frequency, air_ratio, phase_offset, swing_up_down, swing_forward_back, should_activate) 95 | ``` 96 | foot_name is one of ('front_left', 'rear_left', 'front_right', 'rear_right'). 97 | air_ratio (value from 0 to 1) describes how much time the foot spends in the air versus the whole gait cycle. If it's 0 the foot will always stay on ground, and if it's 1 it'll always stay in the air. 98 | phase_offset (value from 0 to 1) describes how the timing of the stepping motion differs between 99 | different feet. For example, if the phase_offset between two legs differs by 0.5, it means 100 | one leg will start the stepping motion in the middle of the stepping motion cycle of the other leg. 101 | swing_up_down is how much the foot swings vertical during the motion cycle. 102 | swing_forward_back is how much the foot swings horizontally during the motion cycle. 103 | If swing_forward_back is positive, the foot would look like it's going forward, if it's negative, the foot will look like it's going backward. 104 | If should_activate is False, the leg will not follow the stepping motion. 105 | 106 | ``` 107 | def execute_plan(plan_duration=2) 108 | ``` 109 | This function sends the parameters to the robot and execute the plan for `plan_duration` seconds, default to be 2 110 | 111 | Example answer code: 112 | ``` 113 | import numpy as np # import numpy because we are using it below 114 | 115 | reset_reward() # This is a new task so reset reward; otherwise we don't need it 116 | set_torso_targets(0.1, np.deg2rad(5), np.deg2rad(15), (2, 3), None, None, np.deg2rad(10)) 117 | 118 | set_foot_pos_parameters('front_left', 0.1, 0.1, None) 119 | set_foot_pos_parameters('back_left', None, None, 0.15) 120 | set_foot_pos_parameters('front_right', None, None, None) 121 | set_foot_pos_parameters('back_right', 0.0, 0.0, None) 122 | set_foot_stepping_parameters('front_right', 2.0, 0.5, 0.2, 0.1, -0.05, True) 123 | set_foot_stepping_parameters('back_left', 3.0, 0.7, 0.1, 0.1, 0.05, True) 124 | set_foot_stepping_parameters('front_left', 0.0, 0.0, 0.0, 0.0, 0.0, False) 125 | set_foot_stepping_parameters('back_right', 0.0, 0.0, 0.0, 0.0, 0.0, False) 126 | 127 | execute_plan(4) 128 | ``` 129 | 130 | Remember: 131 | 1. Always format the code in code blocks. In your response all four functions above: set_torso_targets, set_foot_pos_parameters, execute_plan, should be called at least once. 132 | 2. Do not invent new functions or classes. The only allowed functions you can call are the ones listed above. Do not leave unimplemented code blocks in your response. 133 | 3. The only allowed library is numpy. Do not import or use any other library. If you use np, be sure to import numpy. 134 | 4. If you are not sure what value to use, just use your best judge. Do not use None for anything. 135 | 5. Do not calculate the position or direction of any object (except for the ones provided above). Just use a number directly based on your best guess. 136 | 6. For set_torso_targets, only the last four arguments (target_torso_location_xy, target_torso_velocity_xy, target_torso_heading, target_turning_speed) can be None. Do not set None for any other arguments. 137 | 7. Don't forget to call execute_plan at the end. 138 | 139 | """ 140 | 141 | 142 | class PromptThinkerCoder(llm_prompt.LLMPrompt): 143 | """Prompt with both Motion Descriptor and Reward Coder.""" 144 | 145 | def __init__( 146 | self, 147 | client: barkour_l2r_task_client.BarkourClient, 148 | executor: safe_executor.SafeExecutor, 149 | ): 150 | self._agent = client.agent() 151 | self._safe_executor = barkour_execution.BarkourSafeExecutor(executor) 152 | 153 | self.name = "Language2StructuredLang2Reward" 154 | 155 | self.num_llms = 2 156 | self.prompts = [prompt_thinker, prompt_coder] 157 | 158 | # The coder doesn't need to keep the history as it only serves a purpose for 159 | # translating to code 160 | self.keep_message_history = [True, False] 161 | self.response_processors = [ 162 | self.process_thinker_response, 163 | self.process_coder_response, 164 | ] 165 | self.code_executor = self.execute_code 166 | 167 | # process the response from thinker, the output will be used as input to coder 168 | def process_thinker_response(self, response: str) -> str: 169 | try: 170 | motion_description = ( 171 | re.split( 172 | "end of description", 173 | re.split("start of description", response, flags=re.IGNORECASE)[ 174 | 1 175 | ], 176 | flags=re.IGNORECASE, 177 | )[0] 178 | .strip("[") 179 | .strip("]") 180 | .strip() 181 | .strip("```") 182 | ) 183 | return motion_description 184 | except Exception as _: # pylint: disable=broad-exception-caught 185 | return response 186 | 187 | def process_coder_response(self, response): 188 | """Process the response from coder, the output will be the python code.""" 189 | return process_code.process_code_block(response) 190 | 191 | def execute_code(self, code: str) -> None: 192 | print("ABOUT TO EXECUTE\n", code) 193 | mjpc_parameters = self._safe_executor.execute(code) 194 | self._agent.set_task_parameters(mjpc_parameters.task_parameters) 195 | self._agent.set_cost_weights(mjpc_parameters.cost_weights) 196 | -------------------------------------------------------------------------------- /src/language_to_reward_2023/platforms/llm_prompt.py: -------------------------------------------------------------------------------- 1 | # Copyright 2023 DeepMind Technologies Limited 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | 16 | """Abstract class for LLM Prompts.""" 17 | import dataclasses 18 | from typing import Any, Optional, Sequence 19 | 20 | 21 | @dataclasses.dataclass 22 | class LLMPrompt: 23 | name: str = "TaskEvaluator" 24 | num_llms: int = 0 25 | prompts: Sequence[str] = dataclasses.field(default_factory=list) 26 | keep_message_history: Sequence[bool] = dataclasses.field(default_factory=list) 27 | response_processors: Sequence[Any] = dataclasses.field(default_factory=list) 28 | code_executor: Optional[Any] = None 29 | -------------------------------------------------------------------------------- /src/language_to_reward_2023/platforms/process_code.py: -------------------------------------------------------------------------------- 1 | # Copyright 2023 DeepMind Technologies Limited 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | 16 | """Utilities for processing LLM-generated code into executable code.""" 17 | 18 | import re 19 | 20 | 21 | def _fix_code(code_str: str) -> str: 22 | """Fixes common mistakes in LLM-generated code.""" 23 | if "np." in code_str and "import numpy as np" not in code_str: 24 | code_str = "import numpy as np\n" + code_str 25 | 26 | return code_str 27 | 28 | 29 | def process_code_block(text: str) -> str: 30 | """Extracts code blocks from the input string. 31 | 32 | Arguments: 33 | text: A string which may contain markdown code blocks. 34 | 35 | Returns: 36 | Code concatenated from the markdown code blocks, with some code fixes 37 | applied. The code is untrusted and should only be executed in a sandbox. 38 | """ 39 | code_block_regex = r"```(python)?\n?([\s\S]*?)```" 40 | matches = re.findall(code_block_regex, text) 41 | if matches: 42 | code = "\n".join([m[1] for m in matches]) 43 | else: 44 | # If there are no code block markers, assume the whole text is one code 45 | # block. 46 | code = text 47 | return _fix_code(code) 48 | -------------------------------------------------------------------------------- /src/language_to_reward_2023/platforms/process_code_test.py: -------------------------------------------------------------------------------- 1 | # Copyright 2023 DeepMind Technologies Limited 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | 16 | from absl.testing import absltest 17 | from language_to_reward_2023.platforms import process_code 18 | 19 | 20 | class ProcessCodeTest(absltest.TestCase): 21 | 22 | def test_extract_code_block(self): 23 | block = process_code.process_code_block(""" 24 | ```python 25 | banana 26 | ``` 27 | """) 28 | self.assertEqual(block.strip(), "banana") 29 | 30 | def test_extract_multiple_code_blocks(self): 31 | block = process_code.process_code_block(""" 32 | ```python 33 | banana 34 | ``` 35 | 36 | ``` 37 | apple 38 | ``` 39 | """) 40 | self.assertEqual(block, "banana\n\napple\n") 41 | 42 | def test_add_numpy(self): 43 | block = process_code.process_code_block(""" 44 | ```python 45 | np.zeros(3) 46 | ``` 47 | """) 48 | self.assertEqual(block.strip(), "import numpy as np\nnp.zeros(3)") 49 | 50 | def test_no_block_markers(self): 51 | block = process_code.process_code_block("""np.zeros(3) 52 | """) 53 | self.assertEqual(block.strip(), "import numpy as np\nnp.zeros(3)") 54 | 55 | 56 | if __name__ == "__main__": 57 | absltest.main() 58 | -------------------------------------------------------------------------------- /src/language_to_reward_2023/platforms/task_evaluator.py: -------------------------------------------------------------------------------- 1 | # Copyright 2023 DeepMind Technologies Limited 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | 16 | """Abstract base class for task evaluators.""" 17 | import dataclasses 18 | from typing import Any, Sequence 19 | 20 | 21 | @dataclasses.dataclass 22 | class TaskEvaluator: 23 | name: str = "TaskEvaluator" 24 | evaluators: Sequence[Any] = dataclasses.field(default_factory=list) 25 | queries: Sequence[Any] = dataclasses.field(default_factory=list) 26 | -------------------------------------------------------------------------------- /src/language_to_reward_2023/safe_executor.py: -------------------------------------------------------------------------------- 1 | # Copyright 2023 DeepMind Technologies Limited 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | 16 | """Base class for executing untrusted code.""" 17 | 18 | from typing import Protocol 19 | 20 | 21 | class SafeExecutor(Protocol): 22 | """Base class for executors that run untrusted code and produce their output.""" 23 | 24 | def safe_execute(self, code: str) -> str: 25 | """Executes the given Python code and returns the standard output from it. 26 | 27 | Arguments: 28 | code: code which edits the weights and params dicts. 29 | 30 | Raises: 31 | ValueError: the code doesn't compile or failed to run. 32 | 33 | Returns: 34 | The standard output from the executed code. 35 | """ 36 | -------------------------------------------------------------------------------- /src/language_to_reward_2023/task_clients.py: -------------------------------------------------------------------------------- 1 | # Copyright 2023 DeepMind Technologies Limited 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | 16 | """Classes that manage resources related to MJPC tasks.""" 17 | 18 | import abc 19 | import pathlib 20 | from typing import Any, Callable, Mapping, Optional, Sequence 21 | 22 | import mujoco 23 | from mujoco_mpc import agent as agent_lib 24 | import numpy as np 25 | 26 | from mujoco_mpc.proto import agent_pb2 27 | 28 | 29 | DEFAULT_UI_SERVER_PATH = str( 30 | pathlib.Path(__file__).parent / "mjpc" / "l2r_ui_server" 31 | ) 32 | DEFAULT_HEADLESS_SERVER_PATH = str( 33 | pathlib.Path(__file__).parent / "mjpc" / "l2r_headless_server" 34 | ) 35 | 36 | 37 | class TaskClient(abc.ABC): 38 | """Base class for task clients.""" 39 | 40 | @abc.abstractmethod 41 | def reset(self): 42 | """Resets the task to a starting position.""" 43 | 44 | @abc.abstractmethod 45 | def model(self) -> mujoco.MjModel: 46 | """Returns an MjModel for the task.""" 47 | 48 | def close(self): 49 | """Releases any resources associated with the task.""" 50 | 51 | def methods(self) -> dict[str, Callable[..., Any]]: 52 | """Returns the set of methods supported by the client.""" 53 | return {} 54 | 55 | 56 | def create_agent( 57 | task_id: str, 58 | ui: bool, 59 | real_time_speed: float = 1, 60 | server_binary_path: Optional[str] = None, 61 | subprocess_kwargs: Optional[Mapping[str, Any]] = None, 62 | extra_flags: Optional[Sequence[str]] = None, 63 | ) -> agent_lib.Agent: 64 | """Helper function to create an agent_lib.Agent instance.""" 65 | if server_binary_path is None: 66 | if ui: 67 | server_binary_path = DEFAULT_UI_SERVER_PATH 68 | else: 69 | server_binary_path = DEFAULT_HEADLESS_SERVER_PATH 70 | 71 | if ui: 72 | flags = [ 73 | '--planner_enabled', 74 | ] 75 | else: 76 | flags = [] 77 | 78 | return agent_lib.Agent( 79 | task_id, 80 | server_binary_path=server_binary_path, 81 | real_time_speed=real_time_speed, 82 | extra_flags=flags + (extra_flags or []), 83 | subprocess_kwargs=subprocess_kwargs, 84 | ) 85 | 86 | 87 | class AgentApiTaskClient(TaskClient): 88 | """Base class for task clients which starts the Agent Service. 89 | 90 | By default, this will be a headless client, but if ui==True, an interactive 91 | user interface will be run while executing the task. The UI version will 92 | behave differently from the headless version, because planning is async. 93 | """ 94 | 95 | def __init__( 96 | self, 97 | agent: agent_lib.Agent, 98 | model: mujoco.MjModel, 99 | planning_duration: float = 0.04, 100 | real_time_speed: float = 1.0, 101 | ): 102 | self._agent = agent 103 | self._model = model 104 | self._data = mujoco.MjData(self._model) 105 | 106 | self._last_planning_time = None 107 | self._planning_duration = planning_duration 108 | self._real_time_speed = real_time_speed 109 | self._recorded = None 110 | 111 | def agent(self) -> agent_lib.Agent: 112 | return self._agent 113 | 114 | def model(self) -> mujoco.MjModel: 115 | return self._model 116 | 117 | def reset(self): 118 | self._agent.reset() 119 | self._last_planning_time = None 120 | 121 | def close(self): 122 | """Releases any resources associated with the task.""" 123 | self._agent.close() 124 | 125 | def start_recording(self): 126 | """Start recording simulation states, for later rendering.""" 127 | self._recorded = [] 128 | 129 | def end_recording(self) -> list[agent_pb2.State] | None: 130 | """Returns a list of states recorded since the call to start_recording.""" 131 | recorded = self._recorded 132 | self._recorded = None 133 | return recorded 134 | 135 | def update_state(self): 136 | """Updates self._data with latest state from the agent binary.""" 137 | state = self._agent.get_state() 138 | self._data.time = state.time 139 | self._data.qpos = state.qpos 140 | self._data.qvel = state.qvel 141 | self._data.act = state.act 142 | self._data.mocap_pos = np.array(state.mocap_pos).reshape( 143 | self._data.mocap_pos.shape 144 | ) 145 | self._data.mocap_quat = np.array(state.mocap_quat).reshape( 146 | self._data.mocap_quat.shape 147 | ) 148 | self._data.userdata = np.array(state.userdata).reshape( 149 | self._data.userdata.shape 150 | ) 151 | mujoco.mj_forward(self._model, self._data) 152 | if self._recorded is not None: 153 | self._recorded.append(state) 154 | 155 | def execute_plan(self, duration=10): 156 | """Runs MJPC for `duration` seconds while executing planner actions.""" 157 | state = self._agent.get_state() 158 | start = state.time 159 | while state.time - start < duration: 160 | if self._last_planning_time is None or ( 161 | state.time - self._last_planning_time 162 | >= self._planning_duration * self._real_time_speed 163 | ): 164 | self._last_planning_time = state.time 165 | # repeat planning for 5 times 166 | for _ in range(5): 167 | self._agent.planner_step() 168 | self.update_state() 169 | self._agent.set_state() # needed to call transition() 170 | self._agent.step() 171 | state = self._agent.get_state() 172 | -------------------------------------------------------------------------------- /src/language_to_reward_2023/task_configs.py: -------------------------------------------------------------------------------- 1 | # Copyright 2023 DeepMind Technologies Limited 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | 16 | """Config file for evaluation and user interaction scripts.""" 17 | 18 | import dataclasses 19 | from typing import Any 20 | 21 | from language_to_reward_2023 import task_clients 22 | from language_to_reward_2023.platforms.barkour import barkour_l2r_task_client 23 | from language_to_reward_2023.platforms.barkour.prompts import prompt_coder_only as bk_prompt_coder_only 24 | from language_to_reward_2023.platforms.barkour.prompts import prompt_low_level as bk_prompt_low_level 25 | from language_to_reward_2023.platforms.barkour.prompts import prompt_thinker_coder as bk_prompt_thinker_coder 26 | 27 | 28 | @dataclasses.dataclass(frozen=True) 29 | class TaskConfig: 30 | client: type[task_clients.TaskClient] 31 | prompts: dict[str, type[Any]] 32 | 33 | 34 | ALL_TASKS = { 35 | 'barkour': TaskConfig( 36 | client=barkour_l2r_task_client.BarkourClient, 37 | prompts={ 38 | 'thinker_coder': bk_prompt_thinker_coder.PromptThinkerCoder, 39 | 'coder_only': bk_prompt_coder_only.PromptCoder, 40 | 'low_level': bk_prompt_low_level.PromptLowLevel, 41 | }, 42 | ), 43 | } 44 | -------------------------------------------------------------------------------- /src/language_to_reward_2023/user_interaction.py: -------------------------------------------------------------------------------- 1 | # Copyright 2023 DeepMind Technologies Limited 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | 16 | """A conversational chat utility for controlling MJPC.""" 17 | 18 | from typing import Any, List 19 | 20 | from absl import app 21 | from absl import flags 22 | import colorama 23 | import openai 24 | import termcolor 25 | 26 | from language_to_reward_2023 import confirmation_safe_executor 27 | from language_to_reward_2023 import conversation 28 | from language_to_reward_2023 import task_configs 29 | 30 | 31 | _API_KEY_FLAG = flags.DEFINE_string("api_key", "", "OpenAI API Key") 32 | _TASK_FLAG = flags.DEFINE_enum( 33 | "task", "barkour", list(task_configs.ALL_TASKS), "task to be used" 34 | ) 35 | _PROMPT_FLAG = flags.DEFINE_string( 36 | "prompt", "thinker_coder", "prompt to be used" 37 | ) 38 | MODEL = "gpt-4" 39 | 40 | colorama.init() 41 | 42 | 43 | def main(argv: List[str]) -> None: 44 | if len(argv) > 1: 45 | raise app.UsageError("Too many command-line arguments.") 46 | 47 | safe_executor = confirmation_safe_executor.ConfirmationSafeExecutor() 48 | 49 | assert _TASK_FLAG.value in task_configs.ALL_TASKS 50 | 51 | openai.api_key = _API_KEY_FLAG.value 52 | task_config = task_configs.ALL_TASKS[_TASK_FLAG.value] 53 | if _PROMPT_FLAG.value not in task_config.prompts: 54 | raise ValueError( 55 | "Invalid value for --prompt. Valid values:" 56 | f" {', '.join(task_config.prompts)}" 57 | ) 58 | prompt = task_config.prompts[_PROMPT_FLAG.value] 59 | print( 60 | "Starting MJPC UI" 61 | ) 62 | client_class: Any = task_config.client 63 | client = client_class(ui=True) 64 | 65 | try: 66 | # send the grpc channel to the prompt model to create stub 67 | prompt_model = prompt( 68 | client, executor=safe_executor 69 | ) 70 | conv = conversation.Conversation(prompt_model, MODEL) 71 | client.reset() 72 | 73 | while True: 74 | user_command = input(termcolor.colored("User: ", "red", attrs=["bold"])) 75 | try: 76 | response = conv.send_command(user_command) 77 | except Exception as e: # pylint: disable=broad-exception-caught 78 | print("Planning failed, try something else... " + str(e) + "\n") 79 | continue 80 | 81 | # Final response should be code 82 | try: 83 | prompt_model.code_executor(response) 84 | except Exception as e: # pylint: disable=broad-exception-caught 85 | print("Execution failed, try something else... " + str(e) + "\n") 86 | finally: 87 | client.close() 88 | 89 | 90 | if __name__ == "__main__": 91 | app.run(main) 92 | --------------------------------------------------------------------------------