├── .gitignore ├── CODE_OF_CONDUCT.md ├── CONTRIBUTING.md ├── LICENSE ├── NOTICE ├── README.md ├── build.sh ├── cloudformation ├── onCreate.sh ├── sagemaker.yaml └── solution-assistant │ ├── requirements.txt │ └── src │ └── lambda.py ├── docs ├── BattleSnake-RL-4.gif ├── Table_CompareInstances.png ├── cloudstack_snaphot.png ├── cloudstack_snapshot_dist.png ├── impala_benchmark_baseline_p3_2x.png ├── impala_benchmark_instances.png ├── launch_button.svg └── single_instance_all_envs.png └── sagemaker ├── .gitignore ├── Dockerfile ├── config └── sagemaker_config.yaml ├── source ├── common │ ├── daemon.json │ ├── docker_utils.py │ ├── env_utils.py │ ├── markdown_helper.py │ ├── misc.py │ ├── sagemaker_rl │ │ ├── README.md │ │ ├── __init__.py │ │ ├── coach_launcher.py │ │ ├── configuration_list.py │ │ ├── docker_utils.py │ │ ├── onnx_utils.py │ │ ├── ray_launcher.py │ │ ├── sage_cluster_communicator.py │ │ ├── stable_baselines_launcher.py │ │ └── tf_serving_utils.py │ └── setup.sh ├── custom │ ├── Readme.md │ ├── algorithms │ │ ├── __init__.py │ │ ├── custom_random_agent │ │ │ ├── __init__.py │ │ │ └── custom_random_agent.py │ │ ├── random_policy │ │ │ ├── __init__.py │ │ │ ├── policy.py │ │ │ ├── readme.md │ │ │ └── trainer.py │ │ └── registry.py │ ├── callbacks.py │ ├── envs │ │ ├── .gitkeep │ │ ├── __init__.py │ │ ├── framestack.py │ │ ├── procgen_env_wrapper.py │ │ └── readme.md │ ├── experiments │ │ ├── impala-baseline.yaml │ │ ├── procgen-starter-example.yaml │ │ └── random-policy.yaml │ ├── models │ │ ├── .gitkeep │ │ ├── impala_cnn_tf.py │ │ ├── impala_cnn_torch.py │ │ └── my_vision_network.py │ ├── preprocessors │ │ ├── __init__.py │ │ └── custom_preprocessor.py │ └── setup.py ├── procgen_ray_launcher.py ├── ray_experiment_builder.py ├── requirements.txt ├── rollout.py ├── train-local.py ├── train-sagemaker-distributed-cpu.py ├── train-sagemaker-distributed-gpu.py ├── train-sagemaker.py └── utils │ ├── inference.py │ └── loader.py ├── train-hetero-distributed.ipynb ├── train-homo-distributed-cpu.ipynb ├── train-homo-distributed-gpu.ipynb └── train.ipynb /.gitignore: -------------------------------------------------------------------------------- 1 | buildspec.yml 2 | .viperlightignore 3 | ~.DS_Store 4 | .DS_Store 5 | *~ 6 | -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | ## Code of Conduct 2 | This project has adopted the [Amazon Open Source Code of Conduct](https://aws.github.io/code-of-conduct). 3 | For more information see the [Code of Conduct FAQ](https://aws.github.io/code-of-conduct-faq) or contact 4 | opensource-codeofconduct@amazon.com with any additional questions or comments. 5 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing Guidelines 2 | 3 | Thank you for your interest in contributing to our project. Whether it's a bug report, new feature, correction, or additional 4 | documentation, we greatly value feedback and contributions from our community. 5 | 6 | Please read through this document before submitting any issues or pull requests to ensure we have all the necessary 7 | information to effectively respond to your bug report or contribution. 8 | 9 | 10 | ## Reporting Bugs/Feature Requests 11 | 12 | We welcome you to use the GitHub issue tracker to report bugs or suggest features. 13 | 14 | When filing an issue, please check existing open, or recently closed, issues to make sure somebody else hasn't already 15 | reported the issue. Please try to include as much information as you can. Details like these are incredibly useful: 16 | 17 | * A reproducible test case or series of steps 18 | * The version of our code being used 19 | * Any modifications you've made relevant to the bug 20 | * Anything unusual about your environment or deployment 21 | 22 | 23 | ## Contributing via Pull Requests 24 | Contributions via pull requests are much appreciated. Before sending us a pull request, please ensure that: 25 | 26 | 1. You are working against the latest source on the *master* branch. 27 | 2. You check existing open, and recently merged, pull requests to make sure someone else hasn't addressed the problem already. 28 | 3. You open an issue to discuss any significant work - we would hate for your time to be wasted. 29 | 30 | To send us a pull request, please: 31 | 32 | 1. Fork the repository. 33 | 2. Modify the source; please focus on the specific change you are contributing. If you also reformat all the code, it will be hard for us to focus on your change. 34 | 3. Ensure local tests pass. 35 | 4. Commit to your fork using clear commit messages. 36 | 5. Send us a pull request, answering any default questions in the pull request interface. 37 | 6. Pay attention to any automated CI failures reported in the pull request, and stay involved in the conversation. 38 | 39 | GitHub provides additional document on [forking a repository](https://help.github.com/articles/fork-a-repo/) and 40 | [creating a pull request](https://help.github.com/articles/creating-a-pull-request/). 41 | 42 | 43 | ## Finding contributions to work on 44 | Looking at the existing issues is a great way to find something to contribute on. As our projects, by default, use the default GitHub issue labels (enhancement/bug/duplicate/help wanted/invalid/question/wontfix), looking at any 'help wanted' issues is a great place to start. 45 | 46 | 47 | ## Code of Conduct 48 | This project has adopted the [Amazon Open Source Code of Conduct](https://aws.github.io/code-of-conduct). 49 | For more information see the [Code of Conduct FAQ](https://aws.github.io/code-of-conduct-faq) or contact 50 | opensource-codeofconduct@amazon.com with any additional questions or comments. 51 | 52 | 53 | ## Security issue notifications 54 | If you discover a potential security issue in this project we ask that you notify AWS/Amazon Security via our [vulnerability reporting page](http://aws.amazon.com/security/vulnerability-reporting/). Please do **not** create a public github issue. 55 | 56 | 57 | ## Licensing 58 | 59 | See the [LICENSE](LICENSE) file for our project's licensing. We will ask you to confirm the licensing of your contribution. 60 | 61 | We may ask you to sign a [Contributor License Agreement (CLA)](http://en.wikipedia.org/wiki/Contributor_License_Agreement) for larger changes. 62 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | 2 | Apache License 3 | Version 2.0, January 2004 4 | http://www.apache.org/licenses/ 5 | 6 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 7 | 8 | 1. Definitions. 9 | 10 | "License" shall mean the terms and conditions for use, reproduction, 11 | and distribution as defined by Sections 1 through 9 of this document. 12 | 13 | "Licensor" shall mean the copyright owner or entity authorized by 14 | the copyright owner that is granting the License. 15 | 16 | "Legal Entity" shall mean the union of the acting entity and all 17 | other entities that control, are controlled by, or are under common 18 | control with that entity. For the purposes of this definition, 19 | "control" means (i) the power, direct or indirect, to cause the 20 | direction or management of such entity, whether by contract or 21 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 22 | outstanding shares, or (iii) beneficial ownership of such entity. 23 | 24 | "You" (or "Your") shall mean an individual or Legal Entity 25 | exercising permissions granted by this License. 26 | 27 | "Source" form shall mean the preferred form for making modifications, 28 | including but not limited to software source code, documentation 29 | source, and configuration files. 30 | 31 | "Object" form shall mean any form resulting from mechanical 32 | transformation or translation of a Source form, including but 33 | not limited to compiled object code, generated documentation, 34 | and conversions to other media types. 35 | 36 | "Work" shall mean the work of authorship, whether in Source or 37 | Object form, made available under the License, as indicated by a 38 | copyright notice that is included in or attached to the work 39 | (an example is provided in the Appendix below). 40 | 41 | "Derivative Works" shall mean any work, whether in Source or Object 42 | form, that is based on (or derived from) the Work and for which the 43 | editorial revisions, annotations, elaborations, or other modifications 44 | represent, as a whole, an original work of authorship. For the purposes 45 | of this License, Derivative Works shall not include works that remain 46 | separable from, or merely link (or bind by name) to the interfaces of, 47 | the Work and Derivative Works thereof. 48 | 49 | "Contribution" shall mean any work of authorship, including 50 | the original version of the Work and any modifications or additions 51 | to that Work or Derivative Works thereof, that is intentionally 52 | submitted to Licensor for inclusion in the Work by the copyright owner 53 | or by an individual or Legal Entity authorized to submit on behalf of 54 | the copyright owner. For the purposes of this definition, "submitted" 55 | means any form of electronic, verbal, or written communication sent 56 | to the Licensor or its representatives, including but not limited to 57 | communication on electronic mailing lists, source code control systems, 58 | and issue tracking systems that are managed by, or on behalf of, the 59 | Licensor for the purpose of discussing and improving the Work, but 60 | excluding communication that is conspicuously marked or otherwise 61 | designated in writing by the copyright owner as "Not a Contribution." 62 | 63 | "Contributor" shall mean Licensor and any individual or Legal Entity 64 | on behalf of whom a Contribution has been received by Licensor and 65 | subsequently incorporated within the Work. 66 | 67 | 2. Grant of Copyright License. Subject to the terms and conditions of 68 | this License, each Contributor hereby grants to You a perpetual, 69 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 70 | copyright license to reproduce, prepare Derivative Works of, 71 | publicly display, publicly perform, sublicense, and distribute the 72 | Work and such Derivative Works in Source or Object form. 73 | 74 | 3. Grant of Patent License. Subject to the terms and conditions of 75 | this License, each Contributor hereby grants to You a perpetual, 76 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 77 | (except as stated in this section) patent license to make, have made, 78 | use, offer to sell, sell, import, and otherwise transfer the Work, 79 | where such license applies only to those patent claims licensable 80 | by such Contributor that are necessarily infringed by their 81 | Contribution(s) alone or by combination of their Contribution(s) 82 | with the Work to which such Contribution(s) was submitted. If You 83 | institute patent litigation against any entity (including a 84 | cross-claim or counterclaim in a lawsuit) alleging that the Work 85 | or a Contribution incorporated within the Work constitutes direct 86 | or contributory patent infringement, then any patent licenses 87 | granted to You under this License for that Work shall terminate 88 | as of the date such litigation is filed. 89 | 90 | 4. Redistribution. You may reproduce and distribute copies of the 91 | Work or Derivative Works thereof in any medium, with or without 92 | modifications, and in Source or Object form, provided that You 93 | meet the following conditions: 94 | 95 | (a) You must give any other recipients of the Work or 96 | Derivative Works a copy of this License; and 97 | 98 | (b) You must cause any modified files to carry prominent notices 99 | stating that You changed the files; and 100 | 101 | (c) You must retain, in the Source form of any Derivative Works 102 | that You distribute, all copyright, patent, trademark, and 103 | attribution notices from the Source form of the Work, 104 | excluding those notices that do not pertain to any part of 105 | the Derivative Works; and 106 | 107 | (d) If the Work includes a "NOTICE" text file as part of its 108 | distribution, then any Derivative Works that You distribute must 109 | include a readable copy of the attribution notices contained 110 | within such NOTICE file, excluding those notices that do not 111 | pertain to any part of the Derivative Works, in at least one 112 | of the following places: within a NOTICE text file distributed 113 | as part of the Derivative Works; within the Source form or 114 | documentation, if provided along with the Derivative Works; or, 115 | within a display generated by the Derivative Works, if and 116 | wherever such third-party notices normally appear. The contents 117 | of the NOTICE file are for informational purposes only and 118 | do not modify the License. You may add Your own attribution 119 | notices within Derivative Works that You distribute, alongside 120 | or as an addendum to the NOTICE text from the Work, provided 121 | that such additional attribution notices cannot be construed 122 | as modifying the License. 123 | 124 | You may add Your own copyright statement to Your modifications and 125 | may provide additional or different license terms and conditions 126 | for use, reproduction, or distribution of Your modifications, or 127 | for any such Derivative Works as a whole, provided Your use, 128 | reproduction, and distribution of the Work otherwise complies with 129 | the conditions stated in this License. 130 | 131 | 5. Submission of Contributions. Unless You explicitly state otherwise, 132 | any Contribution intentionally submitted for inclusion in the Work 133 | by You to the Licensor shall be under the terms and conditions of 134 | this License, without any additional terms or conditions. 135 | Notwithstanding the above, nothing herein shall supersede or modify 136 | the terms of any separate license agreement you may have executed 137 | with Licensor regarding such Contributions. 138 | 139 | 6. Trademarks. This License does not grant permission to use the trade 140 | names, trademarks, service marks, or product names of the Licensor, 141 | except as required for reasonable and customary use in describing the 142 | origin of the Work and reproducing the content of the NOTICE file. 143 | 144 | 7. Disclaimer of Warranty. Unless required by applicable law or 145 | agreed to in writing, Licensor provides the Work (and each 146 | Contributor provides its Contributions) on an "AS IS" BASIS, 147 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 148 | implied, including, without limitation, any warranties or conditions 149 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 150 | PARTICULAR PURPOSE. You are solely responsible for determining the 151 | appropriateness of using or redistributing the Work and assume any 152 | risks associated with Your exercise of permissions under this License. 153 | 154 | 8. Limitation of Liability. In no event and under no legal theory, 155 | whether in tort (including negligence), contract, or otherwise, 156 | unless required by applicable law (such as deliberate and grossly 157 | negligent acts) or agreed to in writing, shall any Contributor be 158 | liable to You for damages, including any direct, indirect, special, 159 | incidental, or consequential damages of any character arising as a 160 | result of this License or out of the use or inability to use the 161 | Work (including but not limited to damages for loss of goodwill, 162 | work stoppage, computer failure or malfunction, or any and all 163 | other commercial damages or losses), even if such Contributor 164 | has been advised of the possibility of such damages. 165 | 166 | 9. Accepting Warranty or Additional Liability. While redistributing 167 | the Work or Derivative Works thereof, You may choose to offer, 168 | and charge a fee for, acceptance of support, warranty, indemnity, 169 | or other liability obligations and/or rights consistent with this 170 | License. However, in accepting such obligations, You may act only 171 | on Your own behalf and on Your sole responsibility, not on behalf 172 | of any other Contributor, and only if You agree to indemnify, 173 | defend, and hold each Contributor harmless for any liability 174 | incurred by, or claims asserted against, such Contributor by reason 175 | of your accepting any such warranty or additional liability. 176 | -------------------------------------------------------------------------------- /NOTICE: -------------------------------------------------------------------------------- 1 | Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 | -------------------------------------------------------------------------------- /build.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | if [ $# -lt 3 ]; then 4 | echo "Please provide the solution name as well as the base S3 bucket name and the region to run build script." 5 | echo "For example: ./build.sh trademarked-solution-name sagemaker-solutions-build us-west-2" 6 | exit 1 7 | fi 8 | 9 | # Package the solution assistant 10 | mkdir build 11 | mkdir build/solution-assistant 12 | cp -r ./cloudformation/solution-assistant ./build/ 13 | (cd ./build/solution-assistant && pip install -r requirements.txt -t ./src/site-packages) 14 | find ./build/solution-assistant -name '*.pyc' -delete 15 | (cd ./build/solution-assistant/src && zip -q -r9 ../../solution-assistant.zip *) 16 | rm -rf ./build/solution-assistant 17 | 18 | # Upload to S3 19 | s3_prefix="s3://$2-$3/$1" 20 | echo "Using S3 path: $s3_prefix" 21 | aws s3 cp --recursive sagemaker $s3_prefix/sagemaker --exclude '.*' --exclude "*~" 22 | aws s3 cp --recursive cloudformation $s3_prefix/cloudformation --exclude '.*' --exclude "*~" 23 | aws s3 cp --recursive build $s3_prefix/build 24 | aws s3 cp Readme.md $s3_prefix/ 25 | -------------------------------------------------------------------------------- /cloudformation/onCreate.sh: -------------------------------------------------------------------------------- 1 | # This file creates a symbolic link of folders in the neurips2020-progen-starter-kit to the SageMaker's one. 2 | 3 | #### Warning: This file is ran by the cloudformation template. You should not have to manually run this file. 4 | 5 | # Remove the custom content 6 | rm -r source/custom/algorithms source/custom/envs source/custom/models source/custom/preprocessors source/custom/experiments 7 | rm source/custom/callbacks.py 8 | 9 | # Create symbolic link 10 | cd source/custom 11 | ln -s ../../neurips2020-procgen-starter-kit/algorithms algorithms 12 | ln -s ../../neurips2020-procgen-starter-kit/envs envs 13 | ln -s ../../neurips2020-procgen-starter-kit/models models 14 | ln -s ../../neurips2020-procgen-starter-kit/experiments experiments 15 | ln -s ../../neurips2020-procgen-starter-kit/preprocessors preprocessors 16 | ln -s ../../neurips2020-procgen-starter-kit/callbacks.py callbacks.py 17 | 18 | 19 | # Bug fix in framestack.py 20 | export REPLACE_STRING="\\ 21 | try:\\ 22 | from envs.procgen_env_wrapper import ProcgenEnvWrapper\\ 23 | except ModuleNotFoundError:\\ 24 | from custom.envs.procgen_env_wrapper import ProcgenEnvWrapper" 25 | 26 | sed -i "s/from envs.procgen_env_wrapper import ProcgenEnvWrapper/${REPLACE_STRING}/g" envs/framestack.py 27 | 28 | # Copy setup.py into the starter kit 29 | cp setup.py ../../neurips2020-procgen-starter-kit/ 30 | -------------------------------------------------------------------------------- /cloudformation/solution-assistant/requirements.txt: -------------------------------------------------------------------------------- 1 | crhelper 2 | -------------------------------------------------------------------------------- /cloudformation/solution-assistant/src/lambda.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 5 | # 6 | # Licensed under the Apache License, Version 2.0 (the "License"). 7 | # You may not use this file except in compliance with the License. 8 | # A copy of the License is located at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # or in the "license" file accompanying this file. This file is distributed 13 | # on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either 14 | # express or implied. See the License for the specific language governing 15 | # permissions and limitations under the License. 16 | 17 | import boto3 18 | import sys 19 | 20 | sys.path.append('./site-packages') 21 | 22 | from crhelper import CfnResource 23 | 24 | helper = CfnResource() 25 | 26 | @helper.update 27 | @helper.create 28 | def empty_function(event, _): 29 | pass 30 | 31 | @helper.delete 32 | def on_delete(event, _): 33 | s3_resource = boto3.resource("s3") 34 | bucket_name = event["ResourceProperties"]["S3BucketName"] 35 | try: 36 | s3_resource.Bucket(bucket_name).objects.all().delete() 37 | print("Successfully deleted objects in bucket " 38 | "called '{}'".format(bucket_name)) 39 | 40 | except s3_resource.meta.client.exceptions.NoSuchBucket: 41 | print( 42 | "Could not find bucket called '{}'. " 43 | "Skipping delete.".format(bucket_name) 44 | ) 45 | 46 | def handler(event, context): 47 | helper(event, context) 48 | -------------------------------------------------------------------------------- /docs/BattleSnake-RL-4.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/sagemaker-rl-procgen-ray/ba82c1f0bac2c985930bda7875f1950723f6d7ce/docs/BattleSnake-RL-4.gif -------------------------------------------------------------------------------- /docs/Table_CompareInstances.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/sagemaker-rl-procgen-ray/ba82c1f0bac2c985930bda7875f1950723f6d7ce/docs/Table_CompareInstances.png -------------------------------------------------------------------------------- /docs/cloudstack_snaphot.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/sagemaker-rl-procgen-ray/ba82c1f0bac2c985930bda7875f1950723f6d7ce/docs/cloudstack_snaphot.png -------------------------------------------------------------------------------- /docs/cloudstack_snapshot_dist.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/sagemaker-rl-procgen-ray/ba82c1f0bac2c985930bda7875f1950723f6d7ce/docs/cloudstack_snapshot_dist.png -------------------------------------------------------------------------------- /docs/impala_benchmark_baseline_p3_2x.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/sagemaker-rl-procgen-ray/ba82c1f0bac2c985930bda7875f1950723f6d7ce/docs/impala_benchmark_baseline_p3_2x.png -------------------------------------------------------------------------------- /docs/impala_benchmark_instances.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/sagemaker-rl-procgen-ray/ba82c1f0bac2c985930bda7875f1950723f6d7ce/docs/impala_benchmark_instances.png -------------------------------------------------------------------------------- /docs/launch_button.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Quick Create Stack 5 | 6 | 7 | -------------------------------------------------------------------------------- /docs/single_instance_all_envs.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/sagemaker-rl-procgen-ray/ba82c1f0bac2c985930bda7875f1950723f6d7ce/docs/single_instance_all_envs.png -------------------------------------------------------------------------------- /sagemaker/.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | share/python-wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | MANIFEST 28 | 29 | # PyInstaller 30 | # Usually these files are written by a python script from a template 31 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 32 | *.manifest 33 | *.spec 34 | 35 | # Installer logs 36 | pip-log.txt 37 | pip-delete-this-directory.txt 38 | 39 | # Unit test / coverage reports 40 | htmlcov/ 41 | .tox/ 42 | .nox/ 43 | .coverage 44 | .coverage.* 45 | .cache 46 | nosetests.xml 47 | coverage.xml 48 | *.cover 49 | *.py,cover 50 | .hypothesis/ 51 | .pytest_cache/ 52 | cover/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | .pybuilder/ 76 | target/ 77 | 78 | # Jupyter Notebook 79 | .ipynb_checkpoints 80 | 81 | # IPython 82 | profile_default/ 83 | ipython_config.py 84 | 85 | # pyenv 86 | # For a library or package, you might want to ignore these files since the code is 87 | # intended to run in multiple environments; otherwise, check them in: 88 | # .python-version 89 | 90 | # pipenv 91 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 92 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 93 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 94 | # install all needed dependencies. 95 | #Pipfile.lock 96 | 97 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 98 | __pypackages__/ 99 | 100 | # Celery stuff 101 | celerybeat-schedule 102 | celerybeat.pid 103 | 104 | # SageMath parsed files 105 | *.sage.py 106 | 107 | # Environments 108 | .env 109 | .venv 110 | env/ 111 | venv/ 112 | ENV/ 113 | env.bak/ 114 | venv.bak/ 115 | 116 | # Spyder project settings 117 | .spyderproject 118 | .spyproject 119 | 120 | # Rope project settings 121 | .ropeproject 122 | 123 | # mkdocs documentation 124 | /site 125 | 126 | # mypy 127 | .mypy_cache/ 128 | .dmypy.json 129 | dmypy.json 130 | 131 | # Pyre type checker 132 | .pyre/ 133 | 134 | # pytype static type analyzer 135 | .pytype/ 136 | 137 | # Cython debug symbols 138 | cython_debug/ 139 | 140 | -------------------------------------------------------------------------------- /sagemaker/Dockerfile: -------------------------------------------------------------------------------- 1 | ARG CPU_OR_GPU 2 | ARG AWS_REGION 3 | ARG FRAMEWORK 4 | FROM 462105765813.dkr.ecr.${AWS_REGION}.amazonaws.com/sagemaker-rl-ray-container:ray-0.8.5-${FRAMEWORK}-${CPU_OR_GPU}-py36 5 | 6 | WORKDIR /opt/ml 7 | RUN apt-get update 8 | RUN apt-get install -y openssh-server 9 | RUN pip install --upgrade \ 10 | pip \ 11 | setuptools \ 12 | setproctitle \ 13 | lz4 \ 14 | psutil 15 | 16 | RUN pip install procgen==0.10.1 17 | RUN pip install mlflow==1.8.0 18 | 19 | # Log in to dockerized SSH daemon service 20 | RUN sed -ri 's/^#?PermitRootLogin\s+.*/PermitRootLogin yes/' /etc/ssh/sshd_config && \ 21 | sed -ri 's/^#?PubkeyAuthentication\s+.*/PubkeyAuthentication yes/' /etc/ssh/sshd_config 22 | ENV PYTHONUNBUFFERED 1 23 | 24 | ############################################ 25 | # Test Installation 26 | ############################################ 27 | # Test to verify if all required dependencies installed successfully or not. 28 | RUN python -c "import gym;import sagemaker_containers.cli.train; import ray; from sagemaker_containers.cli.train import main" 29 | # Make things a bit easier to debug 30 | WORKDIR /opt/ml/code 31 | # Expose port 22 for SSH login 32 | EXPOSE 22 33 | CMD ["/usr/sbin/sshd", "-D"] 34 | -------------------------------------------------------------------------------- /sagemaker/config/sagemaker_config.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/sagemaker-rl-procgen-ray/ba82c1f0bac2c985930bda7875f1950723f6d7ce/sagemaker/config/sagemaker_config.yaml -------------------------------------------------------------------------------- /sagemaker/source/common/daemon.json: -------------------------------------------------------------------------------- 1 | 2 | { 3 | "default-runtime": "nvidia", 4 | "runtimes": { 5 | "nvidia": { 6 | "path": "/usr/bin/nvidia-container-runtime", 7 | "runtimeArgs": [] 8 | } 9 | } 10 | } -------------------------------------------------------------------------------- /sagemaker/source/common/docker_utils.py: -------------------------------------------------------------------------------- 1 | # Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"). You 4 | # may not use this file except in compliance with the License. A copy of 5 | # the License is located at 6 | # 7 | # http://aws.amazon.com/apache2.0/ 8 | # 9 | # or in the "license" file accompanying this file. This file is 10 | # distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF 11 | # ANY KIND, either express or implied. See the License for the specific 12 | # language governing permissions and limitations under the License. 13 | 14 | from __future__ import absolute_import 15 | 16 | import base64 17 | import contextlib 18 | import os 19 | import time 20 | import shlex 21 | import shutil 22 | import subprocess 23 | import sys 24 | import tempfile 25 | 26 | import boto3 27 | import json 28 | 29 | IMAGE_TEMPLATE = "{account}.dkr.ecr.{region}.amazonaws.com/{image_name}:{version}" 30 | 31 | 32 | def build_and_push_docker_image(repository_name, dockerfile='Dockerfile', build_args={}): 33 | """Builds a docker image from the specified dockerfile, and pushes it to 34 | ECR. Handles things like ECR login, creating the repository. 35 | 36 | Returns the name of the created docker image in ECR 37 | """ 38 | base_image = _find_base_image_in_dockerfile(dockerfile) 39 | _ecr_login_if_needed(base_image) 40 | _build_from_dockerfile(repository_name, dockerfile, build_args) 41 | ecr_tag = push(repository_name) 42 | return ecr_tag 43 | 44 | 45 | def _build_from_dockerfile(repository_name, dockerfile='Dockerfile', build_args={}): 46 | build_cmd = ['docker', 'build', '-t', repository_name, '-f', dockerfile, '.'] 47 | for k,v in build_args.items(): 48 | build_cmd += ['--build-arg', '%s=%s' % (k,v)] 49 | 50 | print("Building docker image %s from %s" % (repository_name, dockerfile)) 51 | _execute(build_cmd) 52 | print("Done building docker image %s" % repository_name) 53 | 54 | 55 | def _find_base_image_in_dockerfile(dockerfile): 56 | dockerfile_lines = open(dockerfile).readlines() 57 | from_line = list(filter(lambda line: line.startswith("FROM "), dockerfile_lines))[0].rstrip() 58 | base_image = from_line[5:] 59 | return base_image 60 | 61 | 62 | def push(tag, aws_account=None, aws_region=None): 63 | """ 64 | Push the builded tag to ECR. 65 | 66 | Args: 67 | tag (string): tag which you named your algo 68 | aws_account (string): aws account of the ECR repo 69 | aws_region (string): aws region where the repo is located 70 | 71 | Returns: 72 | (string): ECR repo image that was pushed 73 | """ 74 | session = boto3.Session() 75 | aws_account = aws_account or session.client("sts").get_caller_identity()['Account'] 76 | aws_region = aws_region or session.region_name 77 | try: 78 | repository_name, version = tag.split(':') 79 | except ValueError: # split failed because no : 80 | repository_name = tag 81 | version = "latest" 82 | ecr_client = session.client('ecr', region_name=aws_region) 83 | 84 | _create_ecr_repo(ecr_client, repository_name) 85 | _ecr_login(ecr_client, aws_account) 86 | ecr_tag = _push(aws_account, aws_region, tag) 87 | 88 | return ecr_tag 89 | 90 | 91 | def _push(aws_account, aws_region, tag): 92 | ecr_repo = '%s.dkr.ecr.%s.amazonaws.com' % (aws_account, aws_region) 93 | ecr_tag = '%s/%s' % (ecr_repo, tag) 94 | _execute(['docker', 'tag', tag, ecr_tag]) 95 | print("Pushing docker image to ECR repository %s/%s\n" % (ecr_repo, tag)) 96 | _execute(['docker', 'push', ecr_tag]) 97 | print("Done pushing %s" % ecr_tag) 98 | return ecr_tag 99 | 100 | 101 | def _create_ecr_repo(ecr_client, repository_name): 102 | """ 103 | Create the repo if it doesn't already exist. 104 | """ 105 | try: 106 | ecr_client.create_repository(repositoryName=repository_name) 107 | print("Created new ECR repository: %s" % repository_name) 108 | except ecr_client.exceptions.RepositoryAlreadyExistsException: 109 | print("ECR repository already exists: %s" % repository_name) 110 | 111 | 112 | def _ecr_login(ecr_client, aws_account): 113 | auth = ecr_client.get_authorization_token(registryIds=[aws_account]) 114 | authorization_data = auth['authorizationData'][0] 115 | 116 | raw_token = base64.b64decode(authorization_data['authorizationToken']) 117 | token = raw_token.decode('utf-8').strip('AWS:') 118 | ecr_url = auth['authorizationData'][0]['proxyEndpoint'] 119 | 120 | cmd = ['docker', 'login', '-u', 'AWS', '-p', token, ecr_url] 121 | _execute(cmd, quiet=True) 122 | print("Logged into ECR") 123 | 124 | 125 | def _ecr_login_if_needed(image): 126 | ecr_client = boto3.client('ecr') 127 | 128 | # Only ECR images need login 129 | if not ('dkr.ecr' in image and 'amazonaws.com' in image): 130 | return 131 | 132 | # do we have the image? 133 | if _check_output('docker images -q %s' % image).strip(): 134 | return 135 | 136 | aws_account = image.split('.')[0] 137 | _ecr_login(ecr_client, aws_account) 138 | 139 | 140 | @contextlib.contextmanager 141 | def _tmpdir(suffix='', prefix='tmp', dir=None): # type: (str, str, str) -> None 142 | """Create a temporary directory with a context manager. The file is deleted when the context exits. 143 | 144 | The prefix, suffix, and dir arguments are the same as for mkstemp(). 145 | 146 | Args: 147 | suffix (str): If suffix is specified, the file name will end with that suffix, otherwise there will be no 148 | suffix. 149 | prefix (str): If prefix is specified, the file name will begin with that prefix; otherwise, 150 | a default prefix is used. 151 | dir (str): If dir is specified, the file will be created in that directory; otherwise, a default directory is 152 | used. 153 | Returns: 154 | str: path to the directory 155 | """ 156 | tmp = tempfile.mkdtemp(suffix=suffix, prefix=prefix, dir=dir) 157 | yield tmp 158 | shutil.rmtree(tmp) 159 | 160 | 161 | def _execute(command, quiet=False): 162 | if not quiet: 163 | print("$ %s" % ' '.join(command)) 164 | process = subprocess.Popen(command, 165 | stdout=subprocess.PIPE, 166 | stderr=subprocess.STDOUT) 167 | try: 168 | _stream_output(process) 169 | except RuntimeError as e: 170 | # _stream_output() doesn't have the command line. We will handle the exception 171 | # which contains the exit code and append the command line to it. 172 | msg = "Failed to run: %s, %s" % (command, str(e)) 173 | raise RuntimeError(msg) 174 | 175 | 176 | def _stream_output(process): 177 | """Stream the output of a process to stdout 178 | 179 | This function takes an existing process that will be polled for output. Only stdout 180 | will be polled and sent to sys.stdout. 181 | 182 | Args: 183 | process(subprocess.Popen): a process that has been started with 184 | stdout=PIPE and stderr=STDOUT 185 | 186 | Returns (int): process exit code 187 | """ 188 | exit_code = None 189 | 190 | while exit_code is None: 191 | stdout = process.stdout.readline().decode("utf-8") 192 | sys.stdout.write(stdout) 193 | exit_code = process.poll() 194 | 195 | if exit_code != 0: 196 | raise RuntimeError("Process exited with code: %s" % exit_code) 197 | 198 | 199 | def _check_output(cmd, *popenargs, **kwargs): 200 | if isinstance(cmd, str): 201 | cmd = shlex.split(cmd) 202 | 203 | success = True 204 | try: 205 | output = subprocess.check_output(cmd, *popenargs, **kwargs) 206 | except subprocess.CalledProcessError as e: 207 | output = e.output 208 | success = False 209 | 210 | output = output.decode("utf-8") 211 | if not success: 212 | print("Command output: %s" % output) 213 | raise Exception("Failed to run %s" % ",".join(cmd)) 214 | 215 | return output 216 | -------------------------------------------------------------------------------- /sagemaker/source/common/env_utils.py: -------------------------------------------------------------------------------- 1 | import gym 2 | import numpy as np 3 | import pandas as pd 4 | import json 5 | from pathlib import Path 6 | 7 | gym.logger.set_level(40) 8 | 9 | class VectoredGymEnvironment(): 10 | """ 11 | Envrioment class to run multiple similations and collect rollout data 12 | """ 13 | def __init__(self, registered_gym_env, num_of_envs=1): 14 | self.envs_initialized = False 15 | self.initialized_envs = {} 16 | self.env_states = {} 17 | self.env_reset_counter = {} 18 | self.num_of_envs = num_of_envs 19 | self.data_rows = [] 20 | 21 | self.initialize_envs(num_of_envs, registered_gym_env) 22 | 23 | def is_initialized(self): 24 | return self.envs_initialized 25 | 26 | def initialize_envs( 27 | self, 28 | num_of_envs, 29 | registered_gym_env): 30 | """Initialize multiple Openai gym environments. 31 | Each envrionment will start with a different random seed. 32 | 33 | Arguments: 34 | num_of_envs {int} -- Number of environments/simulations to initiate 35 | registered_gym_env {str} -- Environment name of the registered gym environment 36 | """ 37 | print("Initializing {} environments of {}".format(num_of_envs, registered_gym_env)) 38 | for i in range(0, num_of_envs): 39 | environment_id = "environment_" + str(i) 40 | environment = gym.make(registered_gym_env) 41 | environment = environment.unwrapped 42 | environment.seed(i) 43 | self.env_states[environment_id] = environment.reset() 44 | self.env_reset_counter[environment_id] = 0 45 | self.initialized_envs[environment_id] = environment 46 | self.envs_initialized = True 47 | self.state_dims = len(self.env_states[environment_id]) 48 | 49 | def get_environment_states(self): 50 | return self.env_states 51 | 52 | def dump_environment_states(self, dir_path, file_name): 53 | """Dumping current states of all the envrionments into file 54 | 55 | Arguments: 56 | dir_path {str} -- Directory path of the target file 57 | file_name {str} -- File name of the target file 58 | """ 59 | data_folder = Path(dir_path) 60 | file_path = data_folder / file_name 61 | 62 | with open(file_path, 'w') as outfile: 63 | for state in self.env_states.values(): 64 | json.dump(list(state), outfile) 65 | outfile.write('\n') 66 | 67 | def get_environment_ids(self): 68 | return list(self.initialized_envs.keys()) 69 | 70 | def step(self, environment_id, action): 71 | local_env = self.initialized_envs[environment_id] 72 | observation, reward, done, info = local_env.step(action) 73 | 74 | self.env_states[environment_id] = observation 75 | return observation, reward, done, info 76 | 77 | def reset(self, environment_id): 78 | self.env_states[environment_id] = \ 79 | self.initialized_envs[environment_id].reset() 80 | return self.env_states[environment_id] 81 | 82 | def reset_all_envs(self): 83 | print("Resetting all the environments...") 84 | for i in range(0, self.num_of_envs): 85 | environment_id = "environment_" + str(i) 86 | self.reset(environment_id) 87 | 88 | def close(self, environment_id): 89 | self.initialized_envs[environment_id].close() 90 | return 91 | 92 | def render(self, environment_id): 93 | self.initialized_envs[environment_id].render() 94 | return 95 | 96 | def collect_rollouts_for_single_env_with_given_episodes(self, environment_id, action_prob, num_episodes): 97 | """Collect rollouts with given steps from one environment 98 | 99 | Arguments: 100 | environment_id {str} -- Environment id for the environment 101 | action_prob {list} -- Action probabilities of the simulated policy 102 | num_episodes {int} -- Number of episodes to run rollouts 103 | """ 104 | # normalization if sum of probs is not exact equal to 1 105 | action_prob = np.array(action_prob) 106 | if action_prob.sum() != 1: 107 | action_prob /= action_prob.sum() 108 | action_prob = list(action_prob) 109 | 110 | for _ in range(num_episodes): 111 | done = False 112 | cumulative_rewards = 0 113 | while not done: 114 | data_item = [] 115 | action = np.random.choice(len(action_prob), p=action_prob) 116 | cur_state_features = self.env_states[environment_id] 117 | _, reward, done, _ = self.step(environment_id, action) 118 | cumulative_rewards += reward 119 | episode_id = int(environment_id.split('_')[-1]) + \ 120 | self.num_of_envs * self.env_reset_counter[environment_id] 121 | if not done: 122 | data_item.extend([action, action_prob, episode_id, reward, 0.0]) 123 | else: 124 | data_item.extend([action, action_prob, episode_id, reward, cumulative_rewards]) 125 | for j in range(len(cur_state_features)): 126 | data_item.append(cur_state_features[j]) 127 | self.data_rows.append(data_item) 128 | 129 | self.reset(environment_id) 130 | self.env_reset_counter[environment_id] += 1 131 | 132 | def collect_rollouts_for_single_env_with_given_steps(self, environment_id, action_prob, num_steps): 133 | """Collect rollouts with given steps from one environment 134 | 135 | Arguments: 136 | environment_id {str} -- Environment id for the environment 137 | action_prob {list} -- Action probabilities of the simulated policy 138 | num_episodes {int} -- Number of steps to run rollouts 139 | """ 140 | # normalization if sum of probs is not exact equal to 1 141 | action_prob = np.array(action_prob) 142 | if action_prob.sum() != 1: 143 | action_prob /= action_prob.sum() 144 | action_prob = list(action_prob) 145 | 146 | for _ in range(num_steps): 147 | data_item = [] 148 | action = np.random.choice(len(action_prob), p=action_prob) 149 | cur_state_features = self.env_states[environment_id] 150 | _, reward, done, _ = self.step(environment_id, action) 151 | episode_id = int(environment_id.split('_')[-1]) + \ 152 | self.num_of_envs * self.env_reset_counter[environment_id] 153 | data_item.extend([action, action_prob, episode_id, reward]) 154 | for j in range(len(cur_state_features)): 155 | data_item.append(cur_state_features[j]) 156 | self.data_rows.append(data_item) 157 | if done: 158 | self.reset(environment_id) 159 | self.env_reset_counter[environment_id] += 1 160 | 161 | def collect_rollouts_with_given_action_probs(self, num_steps=None, num_episodes=None, action_probs=None, file_name=None): 162 | """Collect rollouts from all the initiated environments with given action probs 163 | 164 | Keyword Arguments: 165 | num_steps {int} -- Number of steps to run rollouts (default: {None}) 166 | num_episodes {int} -- Number of episodes to run rollouts (default: {None}) 167 | action_probs {list} -- Action probs for the policy (default: {None}) 168 | file_name {str} -- Batch transform output that contain predictions of probs (default: {None}) 169 | 170 | Returns: 171 | [Dataframe] -- Dataframe that contains the rollout data from all envs 172 | """ 173 | if file_name is not None: 174 | assert action_probs is None 175 | json_lines = [json.loads(line.rstrip('\n')) for line in open(file_name) if line is not ''] 176 | action_probs = [] 177 | for line in json_lines: 178 | if line.get('SageMakerOutput') is not None: 179 | action_probs.append(line['SageMakerOutput'].get("predictions")[0]) 180 | else: 181 | action_probs.append(line.get("predictions")[0]) 182 | 183 | assert len(action_probs) == self.num_of_envs 184 | for index, environment_id in enumerate(self.get_environment_ids()): 185 | if num_steps is not None: 186 | assert num_episodes is None 187 | self.collect_rollouts_for_single_env_with_given_steps( 188 | environment_id, action_probs[index], num_steps 189 | ) 190 | else: 191 | assert num_episodes is not None 192 | self.collect_rollouts_for_single_env_with_given_episodes( 193 | environment_id, action_probs[index], num_episodes 194 | ) 195 | 196 | col_names = self._create_col_names() 197 | df = pd.DataFrame(self.data_rows, columns = col_names) 198 | 199 | return df 200 | 201 | def _create_col_names(self): 202 | """Create column names of dataframe that can be consumed by Coach 203 | 204 | Returns: 205 | [list] -- List of column names 206 | """ 207 | col_names = ['action', 'all_action_probabilities', 'episode_id', 'reward', 'cumulative_rewards'] 208 | for i in range(self.state_dims): 209 | col_names.append('state_feature_' + str(i)) 210 | 211 | return col_names -------------------------------------------------------------------------------- /sagemaker/source/common/markdown_helper.py: -------------------------------------------------------------------------------- 1 | # Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"). You 4 | # may not use this file except in compliance with the License. A copy of 5 | # the License is located at 6 | # 7 | # http://aws.amazon.com/apache2.0/ 8 | # 9 | # or in the "license" file accompanying this file. This file is 10 | # distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF 11 | # ANY KIND, either express or implied. See the License for the specific 12 | # language governing permissions and limitations under the License. 13 | 14 | def generate_s3_write_permission_for_sagemaker_role(role): 15 | role_name = role.split("/")[-1] 16 | url = "https://console.aws.amazon.com/iam/home#/roles/%s" % role_name 17 | text = "1. Go to IAM console to edit current SageMaker role: [%s](%s).\n" % (role_name, url) 18 | text += "2. Next, go to the `Permissions tab` and click on `Attach Policy.` \n" 19 | text += "3. Search and select `AmazonKinesisVideoStreamsFullAccess` policy\n" 20 | return text 21 | 22 | def generate_kinesis_create_permission_for_sagemaker_role(role): 23 | role_name = role.split("/")[-1] 24 | url = "https://console.aws.amazon.com/iam/home#/roles/%s" % role_name 25 | text = "1. Go to IAM console to edit current SageMaker role: [%s](%s).\n" % (role_name, url) 26 | text += "2. Next, go to the `Permissions tab` and click on `Attach Policy.` \n" 27 | text += "3. Search and select `AmazonS3FullAccess` policy\n" 28 | return text 29 | 30 | def generate_help_for_s3_endpoint_permissions(role): 31 | role_name = role.split("/")[-1] 32 | url = "https://console.aws.amazon.com/iam/home#/roles/%s" % role_name 33 | text = ">It looks like your SageMaker role has insufficient premissions. Please do the following:\n" 34 | text += "1. Go to IAM console to edit current SageMaker role: [%s](%s).\n" % (role_name, url) 35 | text += "2. Select %s and then click on `Edit Policy`\n" % role_name 36 | text += "3. Select the JSON tab and add the following JSON blob to the `Statement` list:\n" 37 | text += """```json 38 | { 39 | "Action": [ 40 | "ec2:DescribeRouteTables", 41 | "ec2:CreateVpcEndpoint" 42 | ], 43 | "Effect": "Allow", 44 | "Resource": "*" 45 | },```\n""" 46 | text += "4. Now wait for a few minutes before executing this cell again!" 47 | return text 48 | 49 | 50 | def generate_help_for_robomaker_trust_relationship(role): 51 | role_name = role.split("/")[-1] 52 | url = "https://console.aws.amazon.com/iam/home#/roles/%s" % role_name 53 | text = "1. Go to IAM console to edit current SageMaker role: [%s](%s).\n" % (role_name, url) 54 | text += "2. Next, go to the `Trust relationships tab` and click on `Edit Trust Relationship.` \n" 55 | text += "3. Replace the JSON blob with the following:\n" 56 | text += """```json 57 | { 58 | "Version": "2012-10-17", 59 | "Statement": [ 60 | { 61 | "Effect": "Allow", 62 | "Principal": { 63 | "Service": [ 64 | "sagemaker.amazonaws.com", 65 | "robomaker.amazonaws.com" 66 | ] 67 | }, 68 | "Action": "sts:AssumeRole" 69 | } 70 | ] 71 | }```\n""" 72 | text += "4. Once this is complete, click on Update Trust Policy and you are done." 73 | return text 74 | 75 | 76 | def generate_help_for_robomaker_all_permissions(role): 77 | role_name = role.split("/")[-1] 78 | url = "https://console.aws.amazon.com/iam/home#/roles/%s" % role_name 79 | text = ">It looks like your SageMaker role has insufficient premissions. Please do the following:\n" 80 | text += "1. Go to IAM console to edit current SageMaker role: [%s](%s).\n" % (role_name, url) 81 | text += "2. Click on policy starting with `AmazonSageMaker-ExecutionPolicy` and then edit policy.\n" 82 | text += "3. Go to JSON tab, add the following JSON blob to the `Statement` list and save policy:\n" 83 | text += """```json 84 | { 85 | "Effect": "Allow", 86 | "Action": [ 87 | "robomaker:CreateSimulationApplication", 88 | "robomaker:DescribeSimulationApplication", 89 | "robomaker:DeleteSimulationApplication", 90 | "robomaker:CreateSimulationJob", 91 | "robomaker:DescribeSimulationJob", 92 | "robomaker:CancelSimulationJob", 93 | "robomaker:ListSimulationApplications" 94 | ], 95 | "Resource": [ 96 | "*" 97 | ] 98 | }, 99 | { 100 | "Effect": "Allow", 101 | "Action": "iam:CreateServiceLinkedRole", 102 | "Resource": "*", 103 | "Condition": { 104 | "StringEquals": { 105 | "iam:AWSServiceName": "robomaker.amazonaws.com" 106 | } 107 | } 108 | }, 109 | { 110 | "Effect": "Allow", 111 | "Action": [ 112 | "iam:PassRole" 113 | ], 114 | "Resource": "*", 115 | "Condition": { 116 | "StringEquals": { 117 | "iam:PassedToService": [ 118 | "robomaker.amazonaws.com" 119 | ] 120 | } 121 | } 122 | },```\n""" 123 | text += "4. Next, go to the `Trust relationships tab` and click on `Edit Trust Relationship.` \n" 124 | text += "5. Add the following JSON blob to the `Statement` list:\n" 125 | text += """```json 126 | { 127 | "Effect": "Allow", 128 | "Principal": { 129 | "Service": "robomaker.amazonaws.com" 130 | }, 131 | "Action": "sts:AssumeRole" 132 | },```\n""" 133 | text += "6. Now wait for a few minutes before executing this cell again!" 134 | return text 135 | 136 | 137 | def generate_robomaker_links(job_arns, aws_region): 138 | simulation_ids = [job_arn.split("/")[-1] for job_arn in job_arns] 139 | robomaker_links = [] 140 | for simulation_id in simulation_ids: 141 | robomaker_link = "https://%s.console.aws.amazon.com/robomaker/home?region=%s#simulationJobs/%s" % (aws_region, 142 | aws_region, 143 | simulation_id) 144 | robomaker_links.append(robomaker_link) 145 | 146 | markdown_content = '> Click on the following links for visualization of simulation jobs on RoboMaker Console\n' 147 | for i in range(len(robomaker_links)): 148 | markdown_content += "- [Simulation %s](%s) \n" % (i + 1, robomaker_links[i]) 149 | 150 | markdown_content += "\nYou can click on Gazebo after you open the above link to start the simulator." 151 | return markdown_content 152 | 153 | 154 | def create_s3_endpoint_manually(aws_region, default_vpc): 155 | url = "https://%s.console.aws.amazon.com/vpc/home?region=%s#Endpoints:sort=vpcEndpointId" % (aws_region, aws_region) 156 | text = ">VPC S3 endpoint creation failed. Please do the following to create an endpoint manually:\n" 157 | text += "1. Go to [VPC console | Endpoints](%s)\n" % url 158 | text += "2. Click on `Create Endpoint`. Select Service Name as `com.amazonaws.%s.s3`.\n" % (aws_region) 159 | text += "3. Next, select your Default VPC: `%s` and click the checkbox against the main Route Table ID\n" % ( 160 | default_vpc) 161 | text += "4. Select `Full Access` in policy and click on `Create Endpoint`\n" 162 | text += "5. That should be it! Now wait for a few seconds before proceeding to the next cell." 163 | return text 164 | 165 | 166 | def generate_help_for_administrator_policy(role): 167 | role_name = role.split("/")[-1] 168 | url = "https://console.aws.amazon.com/iam/home#/roles/%s" % role_name 169 | text = "1. Go to IAM console to edit current SageMaker role: [%s](%s).\n" % (role_name, url) 170 | text += "2. Next, go to the `Permissions tab` and click on `Attach policies`. \n" 171 | text += "3. Check the box for `AdministratorAccess`\n" 172 | text += "4. Click on `Attach policy` at the bottom.\n" 173 | text += "5. You'll see message `Policy AdministratorAccess has been attached for the %s`. \n" % (role) 174 | text += "6. Once this is complete, you are all set." 175 | return text 176 | 177 | def generate_help_for_experiment_manager_permissions(role): 178 | role_name = role.split("/")[-1] 179 | url = "https://console.aws.amazon.com/iam/home#/roles/%s" % role_name 180 | text = ">It looks like your SageMaker role has insufficient premissions. Please do the following:\n" 181 | text += "1. Go to IAM console to edit current SageMaker role: [%s](%s).\n" % (role_name, url) 182 | text += "2. Click on policy starting with `AmazonSageMaker-ExecutionPolicy` and then edit policy.\n" 183 | text += "3. Go to JSON tab, add the following JSON blob to the `Statement` list and save policy:\n" 184 | text += """```json 185 | { 186 | "Effect": "Allow", 187 | "Action": [ 188 | "cloudformation:DescribeStacks", 189 | "cloudformation:ValidateTemplate", 190 | "cloudformation:CreateStack", 191 | "dynamodb:DescribeTable", 192 | "dynamodb:CreateTable", 193 | "dynamodb:DeleteTable", 194 | "dynamodb:PutItem", 195 | "dynamodb:UpdateItem", 196 | "dynamodb:DeleteItem", 197 | "dynamodb:Query", 198 | "dynamodb:BatchWriteItem", 199 | "iam:CreateRole", 200 | "iam:GetRole", 201 | "iam:PutRolePolicy", 202 | "iam:DeleteRolePolicy", 203 | "iam:DeleteRole", 204 | "iam:PassRole", 205 | "cloudwatch:PutDashboard", 206 | "firehose:ListDeliveryStreams", 207 | "firehose:DeleteDeliveryStream", 208 | "firehose:DescribeDeliveryStream", 209 | "firehose:CreateDeliveryStream", 210 | "athena:StartQueryExecution", 211 | "athena:GetQueryExecution", 212 | "glue:GetTable", 213 | "glue:DeleteTable", 214 | "glue:GetPartitions", 215 | "glue:UpdateTable", 216 | "glue:CreateTable", 217 | "glue:GetDatabase" 218 | ], 219 | "Resource": [ 220 | "*" 221 | ] 222 | },```\n""" 223 | text += "4. Now wait for a few minutes before executing this cell again!" 224 | return text 225 | 226 | -------------------------------------------------------------------------------- /sagemaker/source/common/misc.py: -------------------------------------------------------------------------------- 1 | # Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"). You 4 | # may not use this file except in compliance with the License. A copy of 5 | # the License is located at 6 | # 7 | # http://aws.amazon.com/apache2.0/ 8 | # 9 | # or in the "license" file accompanying this file. This file is 10 | # distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF 11 | # ANY KIND, either express or implied. See the License for the specific 12 | # language governing permissions and limitations under the License. 13 | 14 | from __future__ import absolute_import 15 | 16 | import base64 17 | import contextlib 18 | import os 19 | import time 20 | import shlex 21 | import shutil 22 | import subprocess 23 | import sys 24 | import tempfile 25 | 26 | import boto3 27 | import json 28 | 29 | 30 | def wait_for_s3_object(s3_bucket, key, local_dir, local_prefix='', 31 | aws_account=None, aws_region=None, timeout=1200, limit=20, 32 | fetch_only=None, training_job_name=None): 33 | """ 34 | Keep polling s3 object until it is generated. 35 | Pulling down latest data to local directory with short key 36 | 37 | Arguments: 38 | s3_bucket (string): s3 bucket name 39 | key (string): key for s3 object 40 | local_dir (string): local directory path to save s3 object 41 | local_prefix (string): local prefix path append to the local directory 42 | aws_account (string): aws account of the s3 bucket 43 | aws_region (string): aws region where the repo is located 44 | timeout (int): how long to wait for the object to appear before giving up 45 | limit (int): maximum number of files to download 46 | fetch_only (lambda): a function to decide if this object should be fetched or not 47 | training_job_name (string): training job name to query job status 48 | 49 | Returns: 50 | A list of all downloaded files, as local filenames 51 | """ 52 | session = boto3.Session() 53 | aws_account = aws_account or session.client("sts").get_caller_identity()['Account'] 54 | aws_region = aws_region or session.region_name 55 | 56 | s3 = session.resource('s3') 57 | sagemaker = session.client('sagemaker') 58 | bucket = s3.Bucket(s3_bucket) 59 | objects = [] 60 | 61 | print("Waiting for s3://%s/%s..." % (s3_bucket, key), end='', flush=True) 62 | start_time = time.time() 63 | cnt = 0 64 | while len(objects) == 0: 65 | objects = list(bucket.objects.filter(Prefix=key)) 66 | if fetch_only: 67 | objects = list(filter(fetch_only, objects)) 68 | if objects: 69 | continue 70 | print('.', end='', flush=True) 71 | time.sleep(5) 72 | cnt += 1 73 | if cnt % 80 == 0: 74 | print("") 75 | if time.time() > start_time + timeout: 76 | raise FileNotFoundError("S3 object s3://%s/%s never appeared after %d seconds" % (s3_bucket, key, timeout)) 77 | if training_job_name: 78 | training_job_status = sagemaker.describe_training_job(TrainingJobName=training_job_name)['TrainingJobStatus'] 79 | if training_job_status == 'Failed': 80 | raise RuntimeError("Training job {} failed while waiting for S3 object s3://{}/{}" 81 | .format(training_job_name, s3_bucket, key)) 82 | 83 | print('\n', end='', flush=True) 84 | 85 | if len(objects) > limit: 86 | print("Only downloading %d of %d files" % (limit, len(objects))) 87 | objects = objects[-limit:] 88 | 89 | fetched_files = [] 90 | for obj in objects: 91 | print("Downloading %s" % obj.key) 92 | local_path = os.path.join(local_dir, local_prefix, obj.key.split('/')[-1]) 93 | obj.Object().download_file(local_path) 94 | fetched_files.append(local_path) 95 | 96 | return fetched_files 97 | 98 | 99 | def get_execution_role(role_name="sagemaker", aws_account=None, aws_region=None): 100 | """ 101 | Create sagemaker execution role to perform sagemaker task 102 | 103 | Args: 104 | role_name (string): name of the role to be created 105 | aws_account (string): aws account of the ECR repo 106 | aws_region (string): aws region where the repo is located 107 | """ 108 | session = boto3.Session() 109 | aws_account = aws_account or session.client("sts").get_caller_identity()['Account'] 110 | aws_region = aws_region or session.region_name 111 | 112 | assume_role_policy_document = json.dumps({ 113 | "Version": "2012-10-17", 114 | "Statement": [ 115 | { 116 | "Effect": "Allow", 117 | "Principal": { 118 | "Service": ["sagemaker.amazonaws.com", "robomaker.amazonaws.com"] 119 | }, 120 | "Action": "sts:AssumeRole" 121 | } 122 | ] 123 | }) 124 | 125 | client = session.client('iam') 126 | try: 127 | client.get_role(RoleName=role_name) 128 | except client.exceptions.NoSuchEntityException: 129 | client.create_role( 130 | RoleName=role_name, 131 | AssumeRolePolicyDocument=str(assume_role_policy_document) 132 | ) 133 | 134 | print("Created new sagemaker execution role: %s" % role_name) 135 | 136 | client.attach_role_policy( 137 | PolicyArn='arn:aws:iam::aws:policy/AmazonSageMakerFullAccess', 138 | RoleName=role_name 139 | ) 140 | 141 | return client.get_role(RoleName=role_name)['Role']['Arn'] 142 | 143 | 144 | -------------------------------------------------------------------------------- /sagemaker/source/common/sagemaker_rl/README.md: -------------------------------------------------------------------------------- 1 | These shared RL classes need to be moved into the sagemaker-containers package. 2 | 3 | -------------------------------------------------------------------------------- /sagemaker/source/common/sagemaker_rl/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/sagemaker-rl-procgen-ray/ba82c1f0bac2c985930bda7875f1950723f6d7ce/sagemaker/source/common/sagemaker_rl/__init__.py -------------------------------------------------------------------------------- /sagemaker/source/common/sagemaker_rl/coach_launcher.py: -------------------------------------------------------------------------------- 1 | from rl_coach.agents.clipped_ppo_agent import ClippedPPOAgentParameters 2 | from rl_coach.agents.policy_gradients_agent import PolicyGradientsAgentParameters 3 | from rl_coach.graph_managers.basic_rl_graph_manager import BasicRLGraphManager 4 | from rl_coach.graph_managers.graph_manager import ScheduleParameters 5 | from rl_coach.base_parameters import VisualizationParameters, TaskParameters, Frameworks 6 | from rl_coach.utils import short_dynamic_import 7 | from rl_coach.core_types import SelectedPhaseOnlyDumpFilter, MaxDumpFilter, RunPhase 8 | import rl_coach.core_types 9 | from rl_coach import logger 10 | from rl_coach.logger import screen 11 | import argparse 12 | import copy 13 | import logging 14 | import os 15 | import sys 16 | import shutil 17 | import glob 18 | import re 19 | 20 | from .configuration_list import ConfigurationList 21 | from rl_coach.coach import CoachLauncher 22 | 23 | screen.set_use_colors(False) # Simple text logging so it looks good in CloudWatch 24 | 25 | class CoachConfigurationList(ConfigurationList): 26 | """Helper Object for converting CLI arguments (or SageMaker hyperparameters) 27 | into Coach configuration. 28 | """ 29 | 30 | # Being security-paranoid and not instantiating any arbitrary string the customer passes in 31 | ALLOWED_TYPES = { 32 | 'Frames': rl_coach.core_types.Frames, 33 | 'EnvironmentSteps': rl_coach.core_types.EnvironmentSteps, 34 | 'EnvironmentEpisodes': rl_coach.core_types.EnvironmentEpisodes, 35 | 'TrainingSteps': rl_coach.core_types.TrainingSteps, 36 | 'Time': rl_coach.core_types.Time, 37 | } 38 | 39 | 40 | 41 | class SageMakerCoachPresetLauncher(CoachLauncher): 42 | """Base class for training RL tasks using RL-Coach. 43 | Customers subclass this to define specific kinds of workloads, overriding these methods as needed. 44 | """ 45 | 46 | def __init__(self): 47 | super().__init__() 48 | self.hyperparams = None 49 | 50 | 51 | def get_config_args(self, parser: argparse.ArgumentParser) -> argparse.Namespace: 52 | """Overrides the default CLI parsing. 53 | Sets the configuration parameters for what a SageMaker run should do. 54 | Note, this does not support the "play" mode. 55 | """ 56 | # first, convert the parser to a Namespace object with all default values. 57 | empty_arg_list = [] 58 | args, _ = parser.parse_known_args(args=empty_arg_list) 59 | parser = self.sagemaker_argparser() 60 | sage_args, unknown = parser.parse_known_args() 61 | 62 | # Now fill in the args that we care about. 63 | sagemaker_job_name = os.environ.get("sagemaker_job_name", "sagemaker-experiment") 64 | args.experiment_name = logger.get_experiment_name(sagemaker_job_name) 65 | 66 | # Override experiment_path used for outputs 67 | args.experiment_path = '/opt/ml/output/intermediate' 68 | rl_coach.logger.experiment_path = '/opt/ml/output/intermediate' # for gifs 69 | 70 | args.checkpoint_save_dir = '/opt/ml/output/data/checkpoint' 71 | args.checkpoint_save_secs = 10 # should avoid hardcoding 72 | # onnx for deployment for mxnet (not tensorflow) 73 | save_model = (sage_args.save_model == 1) 74 | backend = os.getenv('COACH_BACKEND', 'tensorflow') 75 | if save_model and backend == "mxnet": 76 | args.export_onnx_graph = True 77 | 78 | args.no_summary = True 79 | 80 | args.num_workers = sage_args.num_workers 81 | args.framework = Frameworks[backend] 82 | args.preset = sage_args.RLCOACH_PRESET 83 | # args.apply_stop_condition = True # uncomment for old coach behaviour 84 | 85 | self.hyperparameters = CoachConfigurationList() 86 | if len(unknown) % 2 == 1: 87 | raise ValueError("Odd number of command-line arguments specified. Key without value.") 88 | 89 | for i in range(0, len(unknown), 2): 90 | name = unknown[i] 91 | if name.startswith("--"): 92 | name = name[2:] 93 | else: 94 | raise ValueError("Unknown command-line argument %s" % name) 95 | val = unknown[i+1] 96 | self.map_hyperparameter(name, val) 97 | 98 | return args 99 | 100 | def map_hyperparameter(self, name, value): 101 | """This is a good method to override where customers can specify custom shortcuts 102 | for hyperparameters. Default takes everything starting with "rl." and sends it 103 | straight to the graph manager. 104 | """ 105 | if name.startswith("rl."): 106 | self.apply_hyperparameter(name, value) 107 | else: 108 | raise ValueError("Unknown hyperparameter %s" % name) 109 | 110 | 111 | def apply_hyperparameter(self, name, value): 112 | """Save this hyperparameter to be applied to the graph_manager object when 113 | it's ready. 114 | """ 115 | print("Applying RL hyperparameter %s=%s" % (name,value)) 116 | self.hyperparameters.store(name, value) 117 | 118 | 119 | def default_preset_name(self): 120 | """ 121 | Sub-classes will typically return a single hard-coded string. 122 | """ 123 | try: 124 | #TODO: remove this after converting all samples. 125 | default_preset = self.DEFAULT_PRESET 126 | screen.warning("Deprecated configuration of default preset. Please implement default_preset_name()") 127 | return default_preset 128 | except: 129 | pass 130 | raise NotImplementedError("Sub-classes must specify the name of the default preset "+ 131 | "for this RL problem. This will be the name of a python "+ 132 | "file (without .py) that defines a graph_manager variable") 133 | 134 | def sagemaker_argparser(self) -> argparse.ArgumentParser: 135 | """ 136 | Expose only the CLI arguments that make sense in the SageMaker context. 137 | """ 138 | parser = argparse.ArgumentParser() 139 | 140 | # Arguably this would be cleaner if we copied the config from the base class argparser. 141 | parser.add_argument('-n', '--num_workers', 142 | help="(int) Number of workers for multi-process based agents, e.g. A3C", 143 | default=1, 144 | type=int) 145 | parser.add_argument('-p', '--RLCOACH_PRESET', 146 | help="(string) Name of the file with the RLCoach preset", 147 | default=self.default_preset_name(), 148 | type=str) 149 | parser.add_argument('--save_model', 150 | help="(int) Flag to save model artifact after training finish", 151 | default=0, 152 | type=int) 153 | return parser 154 | 155 | def path_of_main_launcher(self): 156 | """ 157 | A bit of python magic to find the path of the file that launched the current process. 158 | """ 159 | main_mod = sys.modules['__main__'] 160 | try: 161 | launcher_file = os.path.abspath(sys.modules['__main__'].__file__) 162 | return os.path.dirname(launcher_file) 163 | except AttributeError: 164 | # If __main__.__file__ is missing, then we're probably in an interactive python shell 165 | return os.getcwd() 166 | 167 | def preset_from_name(self, preset_name): 168 | preset_path = self.path_of_main_launcher() 169 | print("Loading preset %s from %s" % (preset_name, preset_path)) 170 | preset_path = os.path.join(self.path_of_main_launcher(),preset_name) + '.py:graph_manager' 171 | graph_manager = short_dynamic_import(preset_path, ignore_module_case=True) 172 | return graph_manager 173 | 174 | def get_graph_manager_from_args(self, args): 175 | # First get the graph manager for the customer-specified (or default) preset 176 | graph_manager = self.preset_from_name(args.preset) 177 | # Now override whatever config is specified in hyperparameters. 178 | self.hyperparameters.apply_subset(graph_manager, "rl.") 179 | # Set framework 180 | # Note: Some graph managers (e.g. HAC preset) create multiple agents and the attribute is called agents_params 181 | if hasattr(graph_manager, 'agent_params'): 182 | for network_parameters in graph_manager.agent_params.network_wrappers.values(): 183 | network_parameters.framework = args.framework 184 | elif hasattr(graph_manager, 'agents_params'): 185 | for ap in graph_manager.agents_params: 186 | for network_parameters in ap.network_wrappers.values(): 187 | network_parameters.framework = args.framework 188 | return graph_manager 189 | 190 | def _save_tf_model(self): 191 | ckpt_dir = '/opt/ml/output/data/checkpoint' 192 | model_dir = '/opt/ml/model' 193 | 194 | # Re-Initialize from the checkpoint so that you will have the latest models up. 195 | tf.train.init_from_checkpoint(ckpt_dir, 196 | {'main_level/agent/online/network_0/': 'main_level/agent/online/network_0'}) 197 | tf.train.init_from_checkpoint(ckpt_dir, 198 | {'main_level/agent/online/network_1/': 'main_level/agent/online/network_1'}) 199 | 200 | # Create a new session with a new tf graph. 201 | sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) 202 | sess.run(tf.global_variables_initializer()) # initialize the checkpoint. 203 | 204 | # This is the node that will accept the input. 205 | input_nodes = tf.get_default_graph().get_tensor_by_name('main_level/agent/main/online/' + \ 206 | 'network_0/observation/observation:0') 207 | # This is the node that will produce the output. 208 | output_nodes = tf.get_default_graph().get_operation_by_name('main_level/agent/main/online/' + \ 209 | 'network_1/ppo_head_0/policy') 210 | # Save the model as a servable model. 211 | tf.saved_model.simple_save(session=sess, 212 | export_dir='model', 213 | inputs={"observation": input_nodes}, 214 | outputs={"policy": output_nodes.outputs[0]}) 215 | # Move to the appropriate folder. Don't mind the directory, this just works. 216 | # rl-cart-pole is the name of the model. Remember it. 217 | shutil.move('model/', model_dir + '/model/tf-model/00000001/') 218 | # EASE will pick it up and upload to the right path. 219 | print("Success") 220 | 221 | def _save_onnx_model(self): 222 | from .onnx_utils import fix_onnx_model 223 | ckpt_dir = '/opt/ml/output/data/checkpoint' 224 | model_dir = '/opt/ml/model' 225 | # find latest onnx file 226 | # currently done by name, expected to be changed in future release of coach. 227 | glob_pattern = os.path.join(ckpt_dir, '*.onnx') 228 | onnx_files = [file for file in glob.iglob(glob_pattern, recursive=True)] 229 | if len(onnx_files) > 0: 230 | extract_step = lambda string: int(re.search('/(\d*)_Step.*', string, re.IGNORECASE).group(1)) 231 | onnx_files.sort(key=extract_step) 232 | latest_onnx_file = onnx_files[-1] 233 | # move to model directory 234 | filepath_from = os.path.abspath(latest_onnx_file) 235 | filepath_to = os.path.join(model_dir, "model.onnx") 236 | shutil.move(filepath_from, filepath_to) 237 | fix_onnx_model(filepath_to) 238 | else: 239 | screen.warning("No ONNX files found in {}".format(ckpt_dir)) 240 | 241 | @classmethod 242 | def train_main(cls): 243 | """Entrypoint for training. 244 | Parses command-line arguments and starts training. 245 | """ 246 | trainer = cls() 247 | trainer.launch() 248 | 249 | # Create model artifact for model.tar.gz 250 | parser = trainer.sagemaker_argparser() 251 | sage_args, unknown = parser.parse_known_args() 252 | if sage_args.save_model == 1: 253 | backend = os.getenv('COACH_BACKEND', 'tensorflow') 254 | if backend == 'tensorflow': 255 | trainer._save_tf_model() 256 | if backend == 'mxnet': 257 | trainer._save_onnx_model() 258 | 259 | 260 | class SageMakerCoachLauncher(SageMakerCoachPresetLauncher): 261 | """ 262 | Older version of the launcher that doesn't use preset, but instead effectively has a single preset built in. 263 | """ 264 | 265 | def __init__(self): 266 | super().__init__() 267 | screen.warning("DEPRECATION WARNING: Please switch to SageMakerCoachPresetLauncher") 268 | #TODO: Remove this whole class when nobody's using it any more. 269 | 270 | def define_environment(self): 271 | return NotImplementedEror("Sub-class must define environment e.g. GymVectorEnvironment(level='your_module:YourClass')") 272 | 273 | def get_graph_manager_from_args(self, args): 274 | """Returns the GraphManager object for coach to use to train by calling improve() 275 | """ 276 | # NOTE: TaskParameters are not configurable at this time. 277 | 278 | # Visualization 279 | vis_params = VisualizationParameters() 280 | self.config_visualization(vis_params) 281 | self.hyperparameters.apply_subset(vis_params, "vis_params.") 282 | 283 | # Schedule 284 | schedule_params = ScheduleParameters() 285 | self.config_schedule(schedule_params) 286 | self.hyperparameters.apply_subset(schedule_params, "schedule_params.") 287 | 288 | # Agent 289 | agent_params = self.define_agent() 290 | self.hyperparameters.apply_subset(agent_params, "agent_params.") 291 | 292 | # Environment 293 | env_params = self.define_environment() 294 | self.hyperparameters.apply_subset(env_params, "env_params.") 295 | 296 | graph_manager = BasicRLGraphManager( 297 | agent_params=agent_params, 298 | env_params=env_params, 299 | schedule_params=schedule_params, 300 | vis_params=vis_params, 301 | ) 302 | 303 | return graph_manager 304 | 305 | def config_schedule(self, schedule_params): 306 | pass 307 | 308 | def define_agent(self): 309 | raise NotImplementedError("Subclass must create define_agent() method which returns an AgentParameters object. e.g.\n" \ 310 | " return rl_coach.agents.dqn_agent.DQNAgentParameters()"); 311 | 312 | def config_visualization(self, vis_params): 313 | vis_params.dump_gifs = True 314 | vis_params.video_dump_methods = [SelectedPhaseOnlyDumpFilter(RunPhase.TEST), MaxDumpFilter()] 315 | vis_params.print_networks_summary = True 316 | return vis_params 317 | -------------------------------------------------------------------------------- /sagemaker/source/common/sagemaker_rl/configuration_list.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | 4 | class ConfigurationList(object): 5 | """Helper Object for converting CLI arguments (or SageMaker hyperparameters) 6 | into Coach configuration. 7 | """ 8 | 9 | def __init__(self): 10 | """Args: 11 | - arg_list [list]: list of arguments on the command-line like [key1, value1, key2, value2, ...] 12 | - prefix [str]: Prefix for every key that must be present, e.g. "--" for common command-line args 13 | """ 14 | self.hp_dict = {} 15 | 16 | def store(self, name, value): 17 | """Store a key/value hyperparameter combination 18 | """ 19 | self.hp_dict[name] = value 20 | 21 | def apply_subset(self, config_object, prefix): 22 | """Merges configured hyperparameters in the params dict into the config_object. 23 | Recognized arguments are consumed out of self.hp_dict 24 | 25 | Args: 26 | config_object (obj): will be something like a Coach TaskParameters object, where we're setting properties 27 | params (dict): comes from the command line (and thus customer-specified hyperparameters) 28 | prefix (str): string prefix for which items in params to use. (e.g. "rl.task_params.") 29 | """ 30 | # Materialize a copy of the dict as tuples so we can modify the original dict as we go. 31 | for key, val in list(self.hp_dict.items()): 32 | if key.startswith(prefix): 33 | logging.debug("Configuring %s with %s=%s" % (prefix, key, val)) 34 | subkey = key[ len(prefix): ] 35 | msg = "%s%s=%s" % (prefix, subkey, val) 36 | try: 37 | self._set_rl_property_value(config_object, subkey, val, prefix) 38 | except: 39 | print("Failure while applying hyperparameter %s" % msg) 40 | raise 41 | del self.hp_dict[key] 42 | 43 | def _set_rl_property_value(self, obj, key, val, path=""): 44 | """Sets a property on obj to val, or to a sub-object within obj if key looks like "foo.bar" 45 | """ 46 | if key.find(".") >= 0: 47 | top_key, sub_keys = key_list = key.split(".",1) 48 | if top_key.startswith("__"): 49 | raise ValueError("Attempting to set unsafe property name %s" % top_key) 50 | if isinstance(obj,dict): 51 | sub_obj = obj[top_key] 52 | else: 53 | sub_obj = obj.__dict__[top_key] 54 | # Recurse 55 | return self._set_rl_property_value(sub_obj, sub_keys, val, "%s.%s" % (path,top_key) ) 56 | else: 57 | key, val = self._parse_type(key,val) 58 | if key.startswith("__"): 59 | raise ValueError("Attempting to set unsafe property name %s" % key) 60 | if isinstance(obj, dict): 61 | obj[key] = val 62 | else: 63 | obj.__dict__[key] = val 64 | 65 | def _autotype(self, val): 66 | """Converts string to an int or float as possible. 67 | """ 68 | if type(val) == dict: 69 | return val 70 | if type(val) == list: 71 | return val 72 | if type(val) == bool: 73 | return val 74 | try: 75 | return int(val) 76 | except ValueError: 77 | pass 78 | try: 79 | return float(val) 80 | except ValueError: 81 | pass 82 | return val 83 | 84 | # Being security-paranoid and not instantiating any arbitrary string the customer passes in 85 | ALLOWED_TYPES = {} 86 | 87 | def _parse_type(self, key, val): 88 | """Converts the val to an appropriately typed Python object. 89 | Automatically detects ints and floats when possible. 90 | If the key takes the form "foo:bar" then it looks in ALLOWED_TYPES 91 | for an entry of bar, and instantiates one of those objects, passing 92 | val to the constructor. So if key="foo:EnvironmentSteps" then 93 | """ 94 | val = self._autotype(val) 95 | if key.find(":") > 0: 96 | key, obj_type = key.split(":", 1) 97 | cls = self.ALLOWED_TYPES.get(obj_type) 98 | if not cls: 99 | raise ValueError("Unrecognized object type %s. Allowed values are %s" % (obj_type, self.ALLOWED_TYPES.keys())) 100 | val = cls(val) 101 | return key, val 102 | -------------------------------------------------------------------------------- /sagemaker/source/common/sagemaker_rl/docker_utils.py: -------------------------------------------------------------------------------- 1 | import socket 2 | import time 3 | 4 | def get_ip_from_host(timeout=100, host_name=None): 5 | counter = 0 6 | ip_address = None 7 | 8 | if not host_name: 9 | host_name = socket.gethostname() 10 | print("Fetching IP for hostname: %s" % host_name) 11 | while counter < timeout and not ip_address: 12 | try: 13 | ip_address = socket.gethostbyname(host_name) 14 | break 15 | except Exception as e: 16 | counter += 1 17 | time.sleep(1) 18 | 19 | if counter == timeout and not ip_address: 20 | error_string = "Platform Error: Could not retrieve IP address \ 21 | for %s in past %s seconds" % (host_name, timeout) 22 | raise RuntimeError(error_string) 23 | 24 | return ip_address -------------------------------------------------------------------------------- /sagemaker/source/common/sagemaker_rl/onnx_utils.py: -------------------------------------------------------------------------------- 1 | """ 2 | ONNX Utils to support multiple output heads in agent networks, until future releases of MXNet support this. 3 | """ 4 | import onnx 5 | from onnx import helper, checker, TensorProto 6 | 7 | 8 | def get_correct_outputs(model): 9 | """ 10 | Collects the relevent outputs of the model, after identifying the type of RL Agent. 11 | Currently supports continuous PPO, discrete PPO and DQN agents. 12 | """ 13 | graph_name = model.graph.output[0].name 14 | if "_continuousppohead" in graph_name: 15 | print("ONNX correction applied to continuous PPO agent.") 16 | return ppo_continuous_outputs(model) 17 | elif "_discreteppohead" in graph_name: 18 | print("ONNX correction applied to discrete PPO agent.") 19 | return ppo_discrete_outputs(model) 20 | elif "_qhead" in graph_name: 21 | print("ONNX correction not required for DQN agent.") 22 | return model.graph.output 23 | else: 24 | raise Exception("Can't determine the RL Agent used from the ONNX graph provided.") 25 | 26 | 27 | def make_output(node_name, shape): 28 | """ 29 | Given a node name and output shape, will construct the correct Protobuf object. 30 | """ 31 | return helper.make_tensor_value_info( 32 | name=node_name, 33 | elem_type=TensorProto.FLOAT, 34 | shape=shape 35 | ) 36 | 37 | 38 | def ppo_continuous_outputs(model): 39 | """ 40 | Collects the output nodes for continuous PPO. 41 | """ 42 | # determine number of actions 43 | log_std_node_name = "generalmodel0_singlemodel1_scaledgradhead0_continuousppohead0_log_std" 44 | log_std_node = [i for i in model.graph.input if i.name == log_std_node_name][0] 45 | num_actions = log_std_node.type.tensor_type.shape.dim[0].dim_value 46 | # identify output nodes 47 | value_head_name = "generalmodel0_singlemodel0_scaledgradhead0_vhead0_squeeze0" 48 | value_head = make_output(value_head_name, shape=(1,)) 49 | policy_head_mean_name = "generalmodel0_singlemodel1_scaledgradhead0_continuousppohead0_dense0_fwd" 50 | policy_head_mean = make_output(policy_head_mean_name, shape=(num_actions,)) 51 | policy_head_std_name = "generalmodel0_singlemodel1_scaledgradhead0_continuousppohead0_broadcast_mul0" 52 | policy_head_std = make_output(policy_head_std_name, shape=(num_actions,)) 53 | # collect outputs 54 | output_nodes = [value_head, policy_head_mean, policy_head_std] 55 | return output_nodes 56 | 57 | 58 | def ppo_discrete_outputs(model): 59 | """ 60 | Collects the output nodes for discrete PPO. 61 | """ 62 | # determine number of actions 63 | bias_node_name = "generalmodel0_singlemodel1_scaledgradhead0_discreteppohead0_dense0_bias" 64 | bias_node = [i for i in model.graph.input if i.name == bias_node_name][0] 65 | num_actions = bias_node.type.tensor_type.shape.dim[0].dim_value 66 | # identify output nodes 67 | value_head_name = "generalmodel0_singlemodel0_scaledgradhead0_vhead0_squeeze0" 68 | value_head = make_output(value_head_name, shape=(1,)) 69 | policy_head_name = "generalmodel0_singlemodel1_scaledgradhead0_discreteppohead0_softmax0" 70 | policy_head = make_output(policy_head_name, shape=(num_actions,)) 71 | # collect outputs 72 | output_nodes = [value_head, policy_head] 73 | return output_nodes 74 | 75 | 76 | def save_model(model, output_nodes, filepath): 77 | """ 78 | Given an in memory model, will save to disk at given filepath. 79 | """ 80 | new_graph = helper.make_graph(nodes=model.graph.node, 81 | name='new_graph', 82 | inputs=model.graph.input, 83 | outputs=output_nodes, 84 | initializer=model.graph.initializer) 85 | checker.check_graph(new_graph) 86 | new_model = helper.make_model(new_graph) 87 | with open(filepath, "wb") as file_handle: 88 | serialized = new_model.SerializeToString() 89 | file_handle.write(serialized) 90 | 91 | 92 | def fix_onnx_model(filepath): 93 | """ 94 | Applies an inplace fix to ONNX file from Coach. 95 | """ 96 | model = onnx.load_model(filepath) 97 | output_nodes = get_correct_outputs(model) 98 | save_model(model, output_nodes, filepath) 99 | -------------------------------------------------------------------------------- /sagemaker/source/common/sagemaker_rl/sage_cluster_communicator.py: -------------------------------------------------------------------------------- 1 | import boto3 2 | import os 3 | import io 4 | import json 5 | import time 6 | 7 | 8 | class SageClusterCommunicator(): 9 | def __init__(self): 10 | bucket = os.environ.get("SM_HP_S3_BUCKET", None) 11 | prefix = os.environ.get("SM_HP_S3_PREFIX", None) 12 | aws_region = os.environ.get("SM_HP_AWS_REGION", None) 13 | self.aws_region = boto3.Session().region_name if aws_region is None else aws_region 14 | if bucket is None or prefix is None: 15 | bucket, prefix = self._find_s3_output_path() 16 | self.s3_bucket = bucket 17 | self.s3_prefix = prefix + "/dist-ray" 18 | self.ip_key = "MASTER_IP.json" 19 | self.done_file_key = "CONFIG_DONE" 20 | 21 | def get_client(self): 22 | session = boto3.session.Session() 23 | return session.client('s3', region_name=self.aws_region) 24 | 25 | def _get_s3_key(self, key): 26 | return os.path.normpath(self.s3_prefix + "/config/" + key) 27 | 28 | def _required_environment_param(self, parameter_name): 29 | SM_TRAINING_ENV = json.loads(os.environ.get("SM_TRAINING_ENV")) 30 | value = SM_TRAINING_ENV.get(parameter_name, None) 31 | if not value: 32 | raise ValueError("Missing enrironment parameter '%s'" % parameter_name) 33 | return value 34 | 35 | def _find_s3_output_path(self): 36 | """Looks in SageMaker hyperparameters for the S3 output path. 37 | Uses SM module directory to extract the output path. 38 | Returns: 39 | tuple (bucket, prefix) 40 | """ 41 | module_dir_s3_path = self._required_environment_param("module_dir") 42 | if not module_dir_s3_path.startswith('s3://'): 43 | raise ValueError('Unexpected format for module_dir_s3_path. Expected "s3://...') 44 | bucket_prefix = module_dir_s3_path.replace("s3://", "") 45 | bucket, key = bucket_prefix.split('/', 1) 46 | prefix = "/".join(key.split("/")[:-2]) 47 | if prefix == "": 48 | # {bucket}/{job_name}/source/sourcedir.tar.gz structure not present 49 | prefix = self._required_environment_param("job_name") 50 | return (bucket, prefix) 51 | 52 | def create_s3_signal(self, signal): 53 | s3_client = self.get_client() 54 | s3_client.upload_fileobj(io.BytesIO(b''), self.s3_bucket, self._get_s3_key(signal)) 55 | 56 | def wait_for_signals(self, signals, timeout=600, sleep_time=5): 57 | if len(signals) == 0: 58 | return 59 | s3_client = self.get_client() 60 | time_elapsed = 0 61 | while True: 62 | keys_found = 0 63 | for signal in signals: 64 | response = s3_client.list_objects(Bucket=self.s3_bucket, Prefix=self._get_s3_key(signal)) 65 | if "Contents" in response: 66 | keys_found += 1 67 | if keys_found != len(signals): 68 | time.sleep(sleep_time) 69 | time_elapsed += sleep_time 70 | if time_elapsed >= timeout: 71 | raise RuntimeError( 72 | "Could not find all the signals: %s for last %s seconds" % (signals, time_elapsed)) 73 | else: 74 | print("Received all signal[s]: %s" % signals) 75 | return 76 | 77 | def write_host_config(self, ip, host_name): 78 | s3_client = self.get_client() 79 | data = {"IP": ip, "HOST_NAME": host_name} 80 | json_blob = json.dumps(data) 81 | file_handle = io.BytesIO(json_blob.encode()) 82 | file_handle_done = io.BytesIO(b'done') 83 | s3_client.upload_fileobj(file_handle, self.s3_bucket, self._get_s3_key(self.ip_key)) 84 | s3_client.upload_fileobj(file_handle_done, self.s3_bucket, self._get_s3_key(self.done_file_key)) 85 | 86 | def get_master_config(self): 87 | s3_client = self.get_client() 88 | self._wait_for_ip_upload() 89 | # Wait for new IP address if using spot instace 90 | time.sleep(20) 91 | try: 92 | s3_client.download_file(self.s3_bucket, self._get_s3_key(self.ip_key), 'ip.json') 93 | with open("ip.json") as f: 94 | json_obj = json.load(f) 95 | ip = json_obj["IP"] 96 | host_name = json_obj["HOST_NAME"] 97 | return ip, host_name 98 | except Exception as e: 99 | raise RuntimeError("Cannot fetch IP of redis server running in SageMaker:", e) 100 | 101 | def _wait_for_ip_upload(self, timeout=1200): 102 | s3_client = self.get_client() 103 | time_elapsed = 0 104 | while True: 105 | response = s3_client.list_objects(Bucket=self.s3_bucket, Prefix=self._get_s3_key(self.done_file_key)) 106 | if "Contents" not in response: 107 | time.sleep(1) 108 | time_elapsed += 1 109 | if time_elapsed % 5 == 0: 110 | print("Waiting for SageMaker Redis server IP... Time elapsed: %s seconds" % time_elapsed) 111 | if time_elapsed >= timeout: 112 | raise RuntimeError("Cannot retrieve IP of redis server running in SageMaker") 113 | else: 114 | return 115 | 116 | def download_file(self, s3_key, local_path): 117 | s3_client = self.get_client() 118 | try: 119 | s3_client.download_file(self.s3_bucket, s3_key, local_path) 120 | return True 121 | except Exception as e: 122 | return False 123 | 124 | def upload_file(self, s3_key, local_path): 125 | s3_client = self.get_client() 126 | try: 127 | s3_client.upload_file(Filename=local_path, 128 | Bucket=self.s3_bucket, 129 | Key=s3_key) 130 | return True 131 | except Exception as e: 132 | return False 133 | -------------------------------------------------------------------------------- /sagemaker/source/common/sagemaker_rl/stable_baselines_launcher.py: -------------------------------------------------------------------------------- 1 | import gym 2 | import roboschool 3 | import os 4 | 5 | from gym.wrappers.monitoring.video_recorder import VideoRecorder 6 | from stable_baselines.ppo1 import PPO1 7 | from stable_baselines.common import set_global_seeds 8 | from stable_baselines.bench import Monitor 9 | from stable_baselines.common import tf_util 10 | from stable_baselines.common.policies import MlpPolicy 11 | from mpi4py import MPI 12 | 13 | 14 | class RewScale(gym.RewardWrapper): 15 | def __init__(self, env, scale): 16 | gym.RewardWrapper.__init__(self, env) 17 | self.scale = scale 18 | 19 | def reward(self, _reward): 20 | return _reward * self.scale 21 | 22 | 23 | class SagemakerStableBaselinesLauncher(): 24 | """ 25 | Sagemaker's Stable Baselines Launcher. 26 | """ 27 | 28 | def __init__(self, env, output_path, model, num_timesteps): 29 | self._env = env 30 | self._output_path = output_path 31 | self._model = model 32 | self._num_timesteps = num_timesteps 33 | 34 | def _train(self): 35 | """Train the RL model 36 | """ 37 | self._model.learn(total_timesteps=self._num_timesteps) 38 | 39 | def _predict(self, model, video_path): 40 | """Run predictions on trained RL model. 41 | """ 42 | 43 | vr = VideoRecorder(env=self._env, path="{}/rl_out.mp4".format(video_path, str(MPI.COMM_WORLD.Get_rank())), 44 | enabled=True) 45 | obs = self._env.reset() 46 | for i in range(1000): 47 | action, _states = model.predict(obs) 48 | obs, rewards, dones, info = self._env.step(action) 49 | if dones: 50 | obs = self._env.reset() 51 | self._env.render(mode='rgb_array') 52 | vr.capture_frame() 53 | vr.close() 54 | self._env.close() 55 | 56 | def run(self): 57 | 58 | self._train() 59 | 60 | if MPI.COMM_WORLD.Get_rank() == 0: 61 | self._predict(self._model, self._output_path) 62 | 63 | 64 | class SagemakerStableBaselinesPPO1Launcher(SagemakerStableBaselinesLauncher): 65 | """ 66 | Sagemaker's Stable Baselines PPO1 Launcher. 67 | """ 68 | 69 | def __init__(self, env, output_path, timesteps_per_actorbatch, 70 | clip_param, entcoeff, optim_epochs, 71 | optim_stepsize, optim_batchsize, 72 | gamma, lam, schedule, 73 | verbose, num_timesteps): 74 | print( 75 | "Initializing PPO with output_path: {} and Hyper Params [timesteps_per_actorbatch: {},clip_param: {}, " 76 | "entcoeff: {}, optim_epochs: {}, optim_stepsize: {}, optim_batchsize: {}, gamma: {}, lam: {}, " 77 | "schedule: {}, verbose: {}, num_timesteps: {}]".format(output_path, timesteps_per_actorbatch, 78 | clip_param, entcoeff, optim_epochs, 79 | optim_stepsize, optim_batchsize, 80 | gamma, lam, schedule, 81 | verbose, num_timesteps)) 82 | super().__init__(env, output_path, 83 | PPO1(policy=MlpPolicy, 84 | env=env, 85 | gamma=gamma, 86 | timesteps_per_actorbatch=timesteps_per_actorbatch, 87 | clip_param=clip_param, 88 | entcoeff=entcoeff, 89 | optim_epochs=optim_epochs, 90 | optim_stepsize=optim_stepsize, 91 | optim_batchsize=optim_batchsize, 92 | lam=lam, 93 | schedule=schedule, 94 | verbose=verbose), 95 | num_timesteps) 96 | 97 | 98 | def create_env(env_id, output_path, seed=0): 99 | rank = MPI.COMM_WORLD.Get_rank() 100 | set_global_seeds(seed + 10000 * rank) 101 | env = gym.make(env_id) 102 | env = Monitor(env, os.path.join(output_path, str(rank)), allow_early_resets=True) 103 | env.seed(seed) 104 | return env 105 | -------------------------------------------------------------------------------- /sagemaker/source/common/sagemaker_rl/tf_serving_utils.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import ray 3 | import os 4 | import re 5 | 6 | 7 | def atoi(text): 8 | return int(text) if text.isdigit() else text 9 | 10 | 11 | def natural_keys(text): 12 | return [atoi(c) for c in re.split('(\d+)', text)] 13 | 14 | 15 | def change_permissions_recursive(path, mode): 16 | for root, dirs, files in os.walk(path, topdown=False): 17 | for dir in [os.path.join(root, d) for d in dirs]: 18 | os.chmod(dir, mode) 19 | for file in [os.path.join(root, f) for f in files]: 20 | os.chmod(file, mode) 21 | 22 | 23 | def export_tf_serving(agent, output_dir): 24 | if ray.__version__ >= "0.8.2": 25 | agent.export_policy_model(os.path.join(output_dir, "1")) 26 | else: 27 | policy = agent.local_evaluator.policy_map["default"] 28 | input_signature = {} 29 | input_signature["observations"] = tf.saved_model.utils.build_tensor_info(policy.observations) 30 | 31 | output_signature = {} 32 | output_signature["actions"] = tf.saved_model.utils.build_tensor_info(policy.sampler) 33 | output_signature["logits"] = tf.saved_model.utils.build_tensor_info(policy.logits) 34 | 35 | signature_def = ( 36 | tf.saved_model.signature_def_utils.build_signature_def( 37 | input_signature, output_signature, 38 | tf.saved_model.signature_constants.PREDICT_METHOD_NAME)) 39 | signature_def_key = (tf.saved_model.signature_constants. 40 | DEFAULT_SERVING_SIGNATURE_DEF_KEY) 41 | signature_def_map = {signature_def_key: signature_def} 42 | 43 | with policy.sess.graph.as_default(): 44 | builder = tf.saved_model.builder.SavedModelBuilder(os.path.join(output_dir, "1")) 45 | builder.add_meta_graph_and_variables( 46 | policy.sess, [tf.saved_model.tag_constants.SERVING], 47 | signature_def_map=signature_def_map) 48 | builder.save() 49 | print("Saved TensorFlow serving model!") 50 | -------------------------------------------------------------------------------- /sagemaker/source/common/setup.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | sudo -n true 4 | if [ $? -eq 0 ]; then 5 | echo "The user has root access." 6 | else 7 | echo "The user does not have root access. Everything required to run the notebook is already installed and setup. We are good to go!" 8 | exit 0 9 | fi 10 | 11 | # Do we have GPU support? 12 | nvidia-smi > /dev/null 2>&1 13 | if [ $? -eq 0 ]; then 14 | # check if we have nvidia-docker 15 | NVIDIA_DOCKER=`rpm -qa | grep -c nvidia-docker2` 16 | if [ $NVIDIA_DOCKER -eq 0 ]; then 17 | # Install nvidia-docker2 18 | DOCKER_VERSION=`yum list docker | tail -1 | awk '{print $2}' | head -c 2` 19 | 20 | if [ $DOCKER_VERSION -eq 17 ]; then 21 | DOCKER_PKG_VERSION='17.09.1ce-1.111.amzn1' 22 | NVIDIA_DOCKER_PKG_VERSION='2.0.3-1.docker17.09.1.ce.amzn1' 23 | else 24 | DOCKER_PKG_VERSION='18.06.1ce-3.17.amzn1' 25 | NVIDIA_DOCKER_PKG_VERSION='2.0.3-1.docker18.06.1.ce.amzn1' 26 | fi 27 | 28 | sudo yum -y remove docker 29 | sudo yum -y install docker-$DOCKER_PKG_VERSION 30 | 31 | sudo /etc/init.d/docker start 32 | 33 | curl -s -L https://nvidia.github.io/nvidia-docker/amzn1/nvidia-docker.repo | sudo tee /etc/yum.repos.d/nvidia-docker.repo 34 | sudo yum install -y nvidia-docker2-$NVIDIA_DOCKER_PKG_VERSION 35 | sudo cp daemon.json /etc/docker/daemon.json 36 | sudo pkill -SIGHUP dockerd 37 | echo "installed nvidia-docker2" 38 | else 39 | echo "nvidia-docker2 already installed. We are good to go!" 40 | fi 41 | fi 42 | 43 | # This is common for both GPU and CPU instances 44 | 45 | # check if we have docker-compose 46 | docker-compose version >/dev/null 2>&1 47 | if [ $? -ne 0 ]; then 48 | # install docker compose 49 | pip install docker-compose 50 | fi 51 | 52 | # check if we need to configure our docker interface 53 | SAGEMAKER_NETWORK=`docker network ls | grep -c sagemaker-local` 54 | if [ $SAGEMAKER_NETWORK -eq 0 ]; then 55 | docker network create --driver bridge sagemaker-local 56 | fi 57 | 58 | # Notebook instance Docker networking fixes 59 | RUNNING_ON_NOTEBOOK_INSTANCE=`sudo iptables -S OUTPUT -t nat | grep -c 169.254.0.2` 60 | 61 | # Get the Docker Network CIDR and IP for the sagemaker-local docker interface. 62 | SAGEMAKER_INTERFACE=br-`docker network ls | grep sagemaker-local | cut -d' ' -f1` 63 | DOCKER_NET=`ip route | grep $SAGEMAKER_INTERFACE | cut -d" " -f1` 64 | DOCKER_IP=`ip route | grep $SAGEMAKER_INTERFACE | cut -d" " -f12` 65 | 66 | # check if both IPTables and the Route Table are OK. 67 | IPTABLES_PATCHED=`sudo iptables -S PREROUTING -t nat | grep -c $SAGEMAKER_INTERFACE` 68 | ROUTE_TABLE_PATCHED=`sudo ip route show table agent | grep -c $SAGEMAKER_INTERFACE` 69 | 70 | if [ $RUNNING_ON_NOTEBOOK_INSTANCE -gt 0 ]; then 71 | 72 | if [ $ROUTE_TABLE_PATCHED -eq 0 ]; then 73 | # fix routing 74 | sudo ip route add $DOCKER_NET via $DOCKER_IP dev $SAGEMAKER_INTERFACE table agent 75 | else 76 | echo "SageMaker instance route table setup is ok. We are good to go." 77 | fi 78 | 79 | if [ $IPTABLES_PATCHED -eq 0 ]; then 80 | sudo iptables -t nat -A PREROUTING -i $SAGEMAKER_INTERFACE -d 169.254.169.254/32 -p tcp -m tcp --dport 80 -j DNAT --to-destination 169.254.0.2:9081 81 | echo "iptables for Docker setup done" 82 | else 83 | echo "SageMaker instance routing for Docker is ok. We are good to go!" 84 | fi 85 | fi 86 | -------------------------------------------------------------------------------- /sagemaker/source/custom/Readme.md: -------------------------------------------------------------------------------- 1 | # Contents of this folder is presented for reference only. SageMaker will automatically update the code from https://github.com/aicrowd/neurips2020-procgen-starter-kit -------------------------------------------------------------------------------- /sagemaker/source/custom/algorithms/__init__.py: -------------------------------------------------------------------------------- 1 | from .registry import CUSTOM_ALGORITHMS -------------------------------------------------------------------------------- /sagemaker/source/custom/algorithms/custom_random_agent/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/sagemaker-rl-procgen-ray/ba82c1f0bac2c985930bda7875f1950723f6d7ce/sagemaker/source/custom/algorithms/custom_random_agent/__init__.py -------------------------------------------------------------------------------- /sagemaker/source/custom/algorithms/custom_random_agent/custom_random_agent.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from ray.rllib.agents.trainer import Trainer, with_common_config 4 | from ray.rllib.utils.annotations import override 5 | 6 | """ 7 | Note : This implementation has been adapted from : 8 | https://github.com/ray-project/ray/blob/master/rllib/contrib/random_agent/random_agent.py 9 | """ 10 | 11 | # yapf: disable 12 | # __sphinx_doc_begin__ 13 | class CustomRandomAgent(Trainer): 14 | """Policy that takes random actions and never learns.""" 15 | 16 | _name = "CustomRandomAgent" 17 | _default_config = with_common_config({ 18 | "rollouts_per_iteration": 10, 19 | }) 20 | 21 | @override(Trainer) 22 | def _init(self, config, env_creator): 23 | self.env = env_creator(config["env_config"]) 24 | 25 | @override(Trainer) 26 | def _train(self): 27 | rewards = [] 28 | steps = 0 29 | for _ in range(self.config["rollouts_per_iteration"]): 30 | obs = self.env.reset() 31 | done = False 32 | reward = 0.0 33 | while not done: 34 | action = self.env.action_space.sample() 35 | obs, r, done, info = self.env.step(action) 36 | reward += r 37 | steps += 1 38 | rewards.append(reward) 39 | return { 40 | "episode_reward_mean": np.mean(rewards), 41 | "timesteps_this_iter": steps, 42 | } 43 | # __sphinx_doc_end__ 44 | # don't enable yapf after, it's buggy here 45 | 46 | 47 | if __name__ == "__main__": 48 | trainer = CustomRandomAgent( 49 | env="CartPole-v0", config={"rollouts_per_iteration": 10}) 50 | result = trainer.train() 51 | assert result["episode_reward_mean"] > 10, result 52 | print("Test: OK") -------------------------------------------------------------------------------- /sagemaker/source/custom/algorithms/random_policy/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/sagemaker-rl-procgen-ray/ba82c1f0bac2c985930bda7875f1950723f6d7ce/sagemaker/source/custom/algorithms/random_policy/__init__.py -------------------------------------------------------------------------------- /sagemaker/source/custom/algorithms/random_policy/policy.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | from ray.rllib.policy import Policy 4 | import numpy as np 5 | 6 | 7 | class RandomPolicy(Policy): 8 | """Example of a random policy 9 | 10 | If you are using tensorflow/pytorch to build custom policies, 11 | you might find `build_tf_policy` and `build_torch_policy` to 12 | be useful. 13 | 14 | Adopted from examples from https://docs.ray.io/en/master/rllib-concepts.html 15 | """ 16 | 17 | def __init__(self, observation_space, action_space, config): 18 | Policy.__init__(self, observation_space, action_space, config) 19 | 20 | # You can replace this with whatever variable you want to save 21 | # the state of the policy in. `get_weights` and `set_weights` 22 | # are used for checkpointing the states and restoring the states 23 | # from a checkpoint. 24 | self.w = [] 25 | 26 | def compute_actions( 27 | self, 28 | obs_batch, 29 | state_batches, 30 | prev_action_batch=None, 31 | prev_reward_batch=None, 32 | info_batch=None, 33 | episodes=None, 34 | **kwargs 35 | ): 36 | """Return the action for a batch 37 | 38 | Returns: 39 | action_batch: List of actions for the batch 40 | rnn_states: List of RNN states if any 41 | info: Additional info 42 | """ 43 | action_batch = [] 44 | rnn_states = [] 45 | info = {} 46 | for _ in obs_batch: 47 | action_batch.append(self.action_space.sample()) 48 | return action_batch, rnn_states, info 49 | 50 | def learn_on_batch(self, samples): 51 | """Fused compute gradients and apply gradients call. 52 | 53 | Either this or the combination of compute/apply grads must be 54 | implemented by subclasses. 55 | 56 | Returns: 57 | grad_info: dictionary of extra metadata from compute_gradients(). 58 | Examples: 59 | >>> batch = ev.sample() 60 | >>> ev.learn_on_batch(samples) 61 | 62 | Reference: https://github.com/ray-project/ray/blob/master/rllib/policy/policy.py#L279-L316 63 | """ 64 | # implement your learning code here 65 | return {} 66 | 67 | def get_weights(self): 68 | """Returns model weights. 69 | 70 | Returns: 71 | weights (obj): Serializable copy or view of model weights 72 | """ 73 | return {"w": self.w} 74 | 75 | def set_weights(self, weights): 76 | """Returns the current exploration information of this policy. 77 | 78 | This information depends on the policy's Exploration object. 79 | 80 | Returns: 81 | any: Serializable information on the `self.exploration` object. 82 | """ 83 | self.w = weights["w"] 84 | -------------------------------------------------------------------------------- /sagemaker/source/custom/algorithms/random_policy/readme.md: -------------------------------------------------------------------------------- 1 | # Writing a custom policy 2 | 3 | For more information on writing custom policies, please refer https://docs.ray.io/en/master/rllib-concepts.html 4 | 5 | This directory contains the example code for implementing a custom random policy. Here, the agent never learns and outputs random actions for every observation. 6 | 7 | ## Directory structure 8 | 9 | ``` 10 | . 11 | └── algorithms # Directory containing code for custom algorithms 12 |    ├── __init__.py 13 |    ├── random_policy # Python module for random policy 14 |    │   ├── __init__.py 15 |    │   ├── policy.py # Code for random policy 16 |    │   └── trainer.py # Training wrapper for the random policy 17 |    └── registry.py 18 | ``` 19 | 20 | ## How to start? 21 | 22 | - Go through `policy.py` that has most of what you are looking for. `trainer.py` is just a training wrapper around the policy. 23 | - Once the policy is implemented, you need to register the policy with `rllib`. You can do this by adding your policy trainer class to `registry.py`. -------------------------------------------------------------------------------- /sagemaker/source/custom/algorithms/random_policy/trainer.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | from ray.rllib.agents.trainer_template import build_trainer 4 | from .policy import RandomPolicy 5 | 6 | DEFAULT_CONFIG = ( 7 | {} 8 | ) # Default config parameters that can be overriden by experiments YAML. 9 | 10 | RandomPolicyTrainer = build_trainer( 11 | name="RandomPolicyTrainer", 12 | default_policy=RandomPolicy, 13 | default_config=DEFAULT_CONFIG, 14 | ) 15 | -------------------------------------------------------------------------------- /sagemaker/source/custom/algorithms/registry.py: -------------------------------------------------------------------------------- 1 | """ 2 | Registry of custom implemented algorithms names 3 | 4 | Please refer to the following examples to add your custom algorithms : 5 | 6 | - AlphaZero : https://github.com/ray-project/ray/tree/master/rllib/contrib/alpha_zero 7 | - bandits : https://github.com/ray-project/ray/tree/master/rllib/contrib/bandits 8 | - maddpg : https://github.com/ray-project/ray/tree/master/rllib/contrib/maddpg 9 | - random_agent: https://github.com/ray-project/ray/tree/master/rllib/contrib/random_agent 10 | 11 | An example integration of the random agent is shown here : 12 | - https://github.com/AIcrowd/neurips2020-procgen-starter-kit/tree/master/algorithms/custom_random_agent 13 | """ 14 | 15 | 16 | def _import_custom_random_agent(): 17 | from .custom_random_agent.custom_random_agent import CustomRandomAgent 18 | return CustomRandomAgent 19 | 20 | def _import_random_policy(): 21 | from .random_policy.trainer import RandomPolicyTrainer 22 | return RandomPolicyTrainer 23 | 24 | 25 | CUSTOM_ALGORITHMS = { 26 | "custom/CustomRandomAgent": _import_custom_random_agent, 27 | "RandomPolicy": _import_random_policy 28 | } 29 | -------------------------------------------------------------------------------- /sagemaker/source/custom/callbacks.py: -------------------------------------------------------------------------------- 1 | 2 | from typing import Dict 3 | 4 | import ray 5 | from ray.rllib.env import BaseEnv 6 | from ray.rllib.policy import Policy 7 | from ray.rllib.policy.sample_batch import SampleBatch 8 | from ray.rllib.evaluation import MultiAgentEpisode, RolloutWorker 9 | from ray.rllib.agents.callbacks import DefaultCallbacks 10 | 11 | import numpy as np 12 | 13 | class CustomCallbacks(DefaultCallbacks): 14 | """ 15 | Please refer to : 16 | https://github.com/ray-project/ray/blob/master/rllib/examples/custom_metrics_and_callbacks.py 17 | https://docs.ray.io/en/latest/rllib-training.html#callbacks-and-custom-metrics 18 | for examples on adding your custom metrics and callbacks. 19 | 20 | This code adapts the documentations of the individual functions from : 21 | https://github.com/ray-project/ray/blob/master/rllib/agents/callbacks.py 22 | 23 | These callbacks can be used for custom metrics and custom postprocessing. 24 | """ 25 | 26 | def on_episode_start(self, worker: RolloutWorker, base_env: BaseEnv, 27 | policies: Dict[str, Policy], 28 | episode: MultiAgentEpisode, **kwargs): 29 | """Callback run on the rollout worker before each episode starts. 30 | Args: 31 | worker (RolloutWorker): Reference to the current rollout worker. 32 | base_env (BaseEnv): BaseEnv running the episode. The underlying 33 | env object can be gotten by calling base_env.get_unwrapped(). 34 | policies (dict): Mapping of policy id to policy objects. In single 35 | agent mode there will only be a single "default" policy. 36 | episode (MultiAgentEpisode): Episode object which contains episode 37 | state. You can use the `episode.user_data` dict to store 38 | temporary data, and `episode.custom_metrics` to store custom 39 | metrics for the episode. 40 | kwargs: Forward compatibility placeholder. 41 | """ 42 | pass 43 | 44 | def on_episode_step(self, worker: RolloutWorker, base_env: BaseEnv, 45 | episode: MultiAgentEpisode, **kwargs): 46 | """Runs on each episode step. 47 | Args: 48 | worker (RolloutWorker): Reference to the current rollout worker. 49 | base_env (BaseEnv): BaseEnv running the episode. The underlying 50 | env object can be gotten by calling base_env.get_unwrapped(). 51 | episode (MultiAgentEpisode): Episode object which contains episode 52 | state. You can use the `episode.user_data` dict to store 53 | temporary data, and `episode.custom_metrics` to store custom 54 | metrics for the episode. 55 | kwargs: Forward compatibility placeholder. 56 | """ 57 | pass 58 | 59 | def on_episode_end(self, worker: RolloutWorker, base_env: BaseEnv, 60 | policies: Dict[str, Policy], 61 | episode: MultiAgentEpisode, **kwargs): 62 | """Runs when an episode is done. 63 | Args: 64 | worker (RolloutWorker): Reference to the current rollout worker. 65 | base_env (BaseEnv): BaseEnv running the episode. The underlying 66 | env object can be gotten by calling base_env.get_unwrapped(). 67 | policies (dict): Mapping of policy id to policy objects. In single 68 | agent mode there will only be a single "default" policy. 69 | episode (MultiAgentEpisode): Episode object which contains episode 70 | state. You can use the `episode.user_data` dict to store 71 | temporary data, and `episode.custom_metrics` to store custom 72 | metrics for the episode. 73 | kwargs: Forward compatibility placeholder. 74 | """ 75 | ###################################################################### 76 | # An example of adding a custom metric from the latest observation 77 | # from your env 78 | ###################################################################### 79 | # last_obs_object_from_episode = episode.last_observation_for() 80 | # We define a dummy custom metric, observation_mean 81 | # episode.custom_metrics["observation_mean"] = last_obs_object_from_episode.mean() 82 | pass 83 | 84 | def on_postprocess_trajectory( 85 | self, worker: RolloutWorker, episode: MultiAgentEpisode, 86 | agent_id: str, policy_id: str, 87 | policies: Dict[str, Policy], postprocessed_batch: SampleBatch, 88 | original_batches: Dict[str, SampleBatch], **kwargs): 89 | """Called immediately after a policy's postprocess_fn is called. 90 | You can use this callback to do additional postprocessing for a policy, 91 | including looking at the trajectory data of other agents in multi-agent 92 | settings. 93 | Args: 94 | worker (RolloutWorker): Reference to the current rollout worker. 95 | episode (MultiAgentEpisode): Episode object. 96 | agent_id (str): Id of the current agent. 97 | policy_id (str): Id of the current policy for the agent. 98 | policies (dict): Mapping of policy id to policy objects. In single 99 | agent mode there will only be a single "default" policy. 100 | postprocessed_batch (SampleBatch): The postprocessed sample batch 101 | for this agent. You can mutate this object to apply your own 102 | trajectory postprocessing. 103 | original_batches (dict): Mapping of agents to their unpostprocessed 104 | trajectory data. You should not mutate this object. 105 | kwargs: Forward compatibility placeholder. 106 | """ 107 | pass 108 | 109 | def on_sample_end(self, worker: RolloutWorker, samples: SampleBatch, 110 | **kwargs): 111 | """Called at the end RolloutWorker.sample(). 112 | Args: 113 | worker (RolloutWorker): Reference to the current rollout worker. 114 | samples (SampleBatch): Batch to be returned. You can mutate this 115 | object to modify the samples generated. 116 | kwargs: Forward compatibility placeholder. 117 | """ 118 | pass 119 | 120 | def on_train_result(self, trainer, result: dict, **kwargs): 121 | """Called at the end of Trainable.train(). 122 | Args: 123 | trainer (Trainer): Current trainer instance. 124 | result (dict): Dict of results returned from trainer.train() call. 125 | You can mutate this object to add additional metrics. 126 | kwargs: Forward compatibility placeholder. 127 | """ 128 | # In this case we also print the mean timesteps throughput 129 | # for easier reference in the logs 130 | # print("=============================================================") 131 | # print(" Timesteps Throughput : {} ts/sec".format(TBD)) 132 | # print("=============================================================") 133 | pass 134 | -------------------------------------------------------------------------------- /sagemaker/source/custom/envs/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/sagemaker-rl-procgen-ray/ba82c1f0bac2c985930bda7875f1950723f6d7ce/sagemaker/source/custom/envs/.gitkeep -------------------------------------------------------------------------------- /sagemaker/source/custom/envs/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/sagemaker-rl-procgen-ray/ba82c1f0bac2c985930bda7875f1950723f6d7ce/sagemaker/source/custom/envs/__init__.py -------------------------------------------------------------------------------- /sagemaker/source/custom/envs/framestack.py: -------------------------------------------------------------------------------- 1 | from gym.wrappers import FrameStack 2 | from ray.tune import registry 3 | 4 | try: 5 | from envs.procgen_env_wrapper import ProcgenEnvWrapper 6 | except ModuleNotFoundError: 7 | from custom.envs.procgen_env_wrapper import ProcgenEnvWrapper 8 | 9 | # Register Env in Ray 10 | registry.register_env( 11 | "stacked_procgen_env", # This should be different from procgen_env_wrapper 12 | lambda config: FrameStack(ProcgenEnvWrapper(config), 4), 13 | ) 14 | -------------------------------------------------------------------------------- /sagemaker/source/custom/envs/procgen_env_wrapper.py: -------------------------------------------------------------------------------- 1 | import time 2 | import gym 3 | import numpy as np 4 | 5 | from ray.tune import registry 6 | from procgen.env import ENV_NAMES as VALID_ENV_NAMES 7 | 8 | class ProcgenEnvWrapper(gym.Env): 9 | """ 10 | Procgen Wrapper file 11 | """ 12 | def __init__(self, config): 13 | self._default_config = { 14 | "num_levels" : 0, # The number of unique levels that can be generated. Set to 0 to use unlimited levels. 15 | "env_name" : "coinrun", # Name of environment, or comma-separate list of environment names to instantiate as each env in the VecEnv 16 | "start_level" : 0, # The lowest seed that will be used to generated levels. 'start_level' and 'num_levels' fully specify the set of possible levels 17 | "paint_vel_info" : False, # Paint player velocity info in the top left corner. Only supported by certain games. 18 | "use_generated_assets" : False, # Use randomly generated assets in place of human designed assets 19 | "center_agent" : True, # Determines whether observations are centered on the agent or display the full level. Override at your own risk. 20 | "use_sequential_levels" : False, # When you reach the end of a level, the episode is ended and a new level is selected. If use_sequential_levels is set to True, reaching the end of a level does not end the episode, and the seed for the new level is derived from the current level seed. If you combine this with start_level= and num_levels=1, you can have a single linear series of levels similar to a gym-retro or ALE game. 21 | "distribution_mode" : "easy" # What variant of the levels to use, the options are "easy", "hard", "extreme", "memory", "exploration". All games support "easy" and "hard", while other options are game-specific. The default is "hard". Switching to "easy" will reduce the number of timesteps required to solve each game and is useful for testing or when working with limited compute resources. NOTE : During the evaluation phase (rollout), this will always be overriden to "easy" 22 | } 23 | self.config = self._default_config 24 | self.config.update(config) 25 | 26 | self.env_name = self.config.pop("env_name") 27 | 28 | assert self.env_name in VALID_ENV_NAMES 29 | 30 | env = gym.make(f"procgen:procgen-{self.env_name}-v0", **self.config) 31 | self.env = env 32 | # Enable video recording features 33 | self.metadata = self.env.metadata 34 | 35 | self.action_space = self.env.action_space 36 | self.observation_space = self.env.observation_space 37 | self._done = True 38 | 39 | def reset(self): 40 | assert self._done, "procgen envs cannot be early-restarted" 41 | return self.env.reset() 42 | 43 | def step(self, action): 44 | obs, rew, done, info = self.env.step(action) 45 | self._done = done 46 | return obs, rew, done, info 47 | 48 | def render(self, mode="human"): 49 | return self.env.render(mode=mode) 50 | 51 | def close(self): 52 | return self.env.close() 53 | 54 | def seed(self, seed=None): 55 | return self.env.seed(seed) 56 | 57 | def __repr__(self): 58 | return self.env.__repr() 59 | 60 | @property 61 | def spec(self): 62 | return self.env.spec 63 | 64 | # Register Env in Ray 65 | registry.register_env( 66 | "procgen_env_wrapper", 67 | lambda config: ProcgenEnvWrapper(config) 68 | ) -------------------------------------------------------------------------------- /sagemaker/source/custom/envs/readme.md: -------------------------------------------------------------------------------- 1 | # Custom gym environments 2 | 3 | This directory contains the custom gym environments that will be used by 4 | `rllib`. 5 | 6 | ## Using gym wrappers 7 | 8 | You can use custom gym wrappers for better control over the environment. 9 | These wrappers should be used on `ProcgenEnvWrapper` class. You should 10 | not use `gym.make` to create an environment but use the gym env provided 11 | in the starter kit. 12 | 13 | ### Example 14 | 15 | A simple example to use framestack will be 16 | 17 | ```python 18 | from gym.wrappers import FrameStack 19 | from ray.tune import registry 20 | 21 | from envs.procgen_env_wrapper import ProcgenEnvWrapper 22 | 23 | # Register Env in Ray 24 | registry.register_env( 25 | "stacked_procgen_env", # This should be different from procgen_env_wrapper 26 | lambda config: FrameStack(ProcgenEnvWrapper(config), 4) 27 | ) 28 | ``` 29 | 30 | You can point to `stacked_procgen_env` instead of `procgen_env_wrapper` in your 31 | experiment config file in order to use the env with the wrapper. 32 | 33 | ### Note 34 | - If you do not use `ProcgenEnvWrapper` as your base env, the 35 | rollouts will fail. 36 | - Please do not edit `procgen_env_wrapper.py` file. All the changes 37 | you make to this file will be dropped during the evaluation. 38 | -------------------------------------------------------------------------------- /sagemaker/source/custom/experiments/procgen-starter-example.yaml: -------------------------------------------------------------------------------- 1 | procgen-starter-example: 2 | env: procgen_env_wrapper # Change this at your own risk :D 3 | run: PPO 4 | # Can be replaced by any of the available agents as described at : 5 | # https://github.com/ray-project/ray/blob/master/rllib/agents/registry.py#L103 6 | # 7 | # Internally, rllib uses the terminology of Trainable, Algorithms, Agents depending 8 | # on the context in which it is used. In this repository, we will consistently 9 | # use the terminology of "Algorithms" to refer to these Trainables/Agents. 10 | # 11 | # This can also be replaced by a custom "algorithm" 12 | # For addition of custom algorithms, 13 | # Please refer to : 14 | # https://github.com/AIcrowd/neurips2020-procgen-starter-kit/blob/master/algorithms/registry.py 15 | ################################################ 16 | # === Stop Conditions === 17 | ################################################ 18 | stop: 19 | timesteps_total: 3000 # 100 20 | 21 | ################################################ 22 | # === Settings for Checkpoints === 23 | ################################################ 24 | checkpoint_freq: 1 25 | checkpoint_at_end: True 26 | keep_checkpoints_num: 5 27 | 28 | config: 29 | ################################################ 30 | ################################################ 31 | # === Settings for the Procgen Environment === 32 | ################################################ 33 | ################################################ 34 | env_config: 35 | # Name of the procgen environment to train on # Note, that this parameter will be overriden during the evaluation by the AIcrowd evaluators. 36 | env_name: coinrun 37 | # The number of unique levels that can be generated. Set to 0 to use unlimited levels 38 | num_levels: 0 39 | # The lowest seed that will be used to generated levels. 'start_level' and 'num_levels' fully specify the set of possible levels 40 | start_level: 0 41 | # Paint player velocity info in the top left corner. Only supported by certain games. 42 | paint_vel_info: False 43 | # Use randomly generated assets in place of human designed assets 44 | use_generated_assets: False 45 | # center_agent : Determines whether observations are centered on the agent or display the full level. Override at your own risk. 46 | center_agent: True 47 | # sequential levels : When you reach the end of a level, the episode is ended and a new level is selected. If use_sequential_levels is set to True, reaching the end of a level does not end the episode, and the seed for the new level is derived from the current level seed. If you combine this with start_level= and num_levels=1, you can have a single linear series of levels similar to a gym-retro or ALE game. 48 | use_sequential_levels: False 49 | # What variant of the levels to use, the options are "easy", "hard", "extreme", "memory", "exploration". All games support "easy" and "hard", while other options are game-specific. The default is "hard". Switching to "easy" will reduce the number of timesteps required to solve each game and is useful for testing or when working with limited compute resources. NOTE : During the evaluation phase (rollout), this will always be overriden to "easy" 50 | distribution_mode: easy 51 | 52 | ################################################ 53 | ################################################ 54 | # === Environment Settings === 55 | ################################################ 56 | ################################################ 57 | # Discount factor of the MDP. 58 | gamma: 0.99 59 | # The default learning rate. 60 | lr: 0.0001 61 | # Number of steps after which the episode is forced to terminate. Defaults 62 | # to `env.spec.max_episode_steps` (if present) for Gym envs. 63 | horizon: null 64 | # Calculate rewards but don't reset the environment when the horizon is 65 | # hit. This allows value estimation and RNN state to span across logical 66 | # episodes denoted by horizon. This only has an effect if horizon != inf. 67 | soft_horizon: False 68 | # Don't set 'done' at the end of the episode. Note that you still need to 69 | # set this if soft_horizon=True, unless your env is actually running 70 | # forever without returning done=True. 71 | no_done_at_end: False 72 | 73 | # Unsquash actions to the upper and lower bounds of env's action space 74 | normalize_actions: False 75 | # Whether to clip rewards prior to experience postprocessing. Setting to 76 | # None means clip for Atari only. 77 | clip_rewards: null 78 | # Whether to np.clip() actions to the action space low/high range spec. 79 | clip_actions: True 80 | # Whether to use rllib or deepmind preprocessors by default 81 | preprocessor_pref: deepmind 82 | 83 | # Whether to attempt to continue training if a worker crashes. The number 84 | # of currently healthy workers is reported as the "num_healthy_workers" 85 | # metric. 86 | ignore_worker_failures: False 87 | # Log system resource metrics to results. This requires `psutil` to be 88 | # installed for sys stats, and `gputil` for GPU metrics. 89 | # Note : The AIcrowd Evaluators will always override this to be True 90 | log_sys_usage: True 91 | 92 | # Use PyTorch (instead of tf). If using `rllib train`, this can also be 93 | # enabled with the `--torch` flag. 94 | # NOTE: Some agents may not support `torch` yet and throw an error. 95 | use_pytorch: False 96 | 97 | ################################################ 98 | ################################################ 99 | # === Settings for Model === 100 | ################################################ 101 | ################################################ 102 | model: 103 | # === Built-in options === 104 | # More information on built in Models are available here : 105 | # https://ray.readthedocs.io/en/stable/rllib-models.html#built-in-models-and-preprocessors 106 | # 107 | # Filter config. List of [out_channels, kernel, stride] for each filter 108 | conv_filters: 109 | - [16, [3, 3], 3] 110 | - [16, [3, 3], 1] 111 | # Nonlinearity for built-in convnet 112 | conv_activation: relu 113 | # Nonlinearity for fully connected net (tanh, relu) 114 | fcnet_activation: tanh 115 | # Number of hidden layers for fully connected net 116 | fcnet_hiddens: [128, 128] 117 | # Whether to skip the final linear layer used to resize the hidden layer 118 | # outputs to size `num_outputs`. If True, then the last hidden layer 119 | # should already match num_outputs. 120 | no_final_linear: false 121 | # Whether layers should be shared for the value function 122 | vf_share_layers: true 123 | 124 | # == LSTM == 125 | # Whether to wrap the model with a LSTM 126 | use_lstm: false 127 | # Max seq len for training the LSTM, defaults to 20 128 | max_seq_len: 20 129 | # Size of the LSTM cell 130 | lstm_cell_size: 256 131 | # Whether to feed a_{t-1}, r_{t-1} to LSTM 132 | lstm_use_prev_action_reward: false 133 | # When using modelv1 models with a modelv2 algorithm, you may have to 134 | # define the state shape here (e.g., [256, 256]). 135 | state_shape: null 136 | 137 | # === Options for custom models === 138 | # Name of a custom model to use 139 | # 140 | # Custom Models can be implemented in the models/ folder. 141 | # Please refer to : 142 | # https://github.com/AIcrowd/neurips2020-procgen-starter-kit/blob/master/models/my_vision_network.py 143 | # for an example. 144 | # 145 | # RLlib documentation on implementing custom Models is available 146 | # here : 147 | # https://ray.readthedocs.io/en/stable/rllib-models.html#custom-models-tensorflow 148 | # 149 | # Participants can also choose to implement their models 150 | # in PyTorch. Here is an example to implement a PyTorch based model : 151 | # https://github.com/ray-project/ray/blob/master/rllib/examples/custom_torch_policy.py 152 | # 153 | # Examples of implementing the model in Keras is also available 154 | # here : 155 | # https://github.com/ray-project/ray/blob/master/rllib/examples/custom_keras_model.py 156 | custom_model: my_vision_network 157 | # Extra options to pass to custom class 158 | custom_options: {} 159 | 160 | ################################################ 161 | ################################################ 162 | # === Settings for Rollout Worker processes === 163 | ################################################ 164 | ################################################ 165 | # Number of rollout worker actors to create for parallel sampling. Setting 166 | # this to 0 will force rollouts to be done in the trainer actor. 167 | num_workers: 1 168 | 169 | # Number of environments to evaluate vectorwise per worker. This enables 170 | # model inference batching, which can improve performance for inference 171 | # bottlenecked workloads. 172 | num_envs_per_worker: 2 173 | 174 | # Divide episodes into fragments of this many steps each during rollouts. 175 | # Sample batches of this size are collected from rollout workers and 176 | # combined into a larger batch of `train_batch_size` for learning. 177 | # 178 | # For example, given rollout_fragment_length=100 and train_batch_size=1000: 179 | # 1. RLlib collects 10 fragments of 100 steps each from rollout workers. 180 | # 2. These fragments are concatenated and we perform an epoch of SGD. 181 | # 182 | # When using multiple envs per worker, the fragment size is multiplied by 183 | # `num_envs_per_worker`. This is since we are collecting steps from 184 | # multiple envs in parallel. For example, if num_envs_per_worker=5, then 185 | # rollout workers will return experiences in chunks of 5*100 = 500 steps. 186 | # 187 | # The dataflow here can vary per algorithm. For example, PPO further 188 | # divides the train batch into minibatches for multi-epoch SGD. 189 | rollout_fragment_length: 200 190 | 191 | # Whether to rollout "complete_episodes" or "truncate_episodes" to 192 | # `rollout_fragment_length` length unrolls. Episode truncation guarantees 193 | # evenly sized batches, but increases variance as the reward-to-go will 194 | # need to be estimated at truncation boundaries. 195 | batch_mode: truncate_episodes 196 | 197 | ################################################ 198 | ################################################ 199 | # === Advanced Resource Settings === 200 | ################################################ 201 | ################################################ 202 | # Number of CPUs to allocate per worker. 203 | num_cpus_per_worker: 1 204 | # Number of GPUs to allocate per worker. This can be fractional. This is 205 | # usually needed only if your env itself requires a GPU (i.e., it is a 206 | # GPU-intensive video game), or model inference is unusually expensive. 207 | num_gpus_per_worker: 0.9 208 | # Number of CPUs to allocate for the trainer. Note: this only takes effect 209 | # when running in Tune. Otherwise, the trainer runs in the main program. 210 | num_cpus_for_driver: 1 211 | 212 | ################################################ 213 | ################################################ 214 | # === Settings for the Trainer process === 215 | ################################################ 216 | ################################################ 217 | # Number of GPUs to allocate to the trainer process. Note that not all 218 | # algorithms can take advantage of trainer GPUs. This can be fractional 219 | # (e.g., 0.3 GPUs). 220 | # Note : If GPUs are not available, this will be overriden by the AIcrowd evaluators to 0. 221 | num_gpus: 0 222 | # Training batch size, if applicable. Should be >= rollout_fragment_length. 223 | # Samples batches will be concatenated together to a batch of this size, 224 | # which is then passed to SGD. 225 | train_batch_size: 200 226 | 227 | ################################################ 228 | ################################################ 229 | # === Exploration Settings === 230 | ################################################ 231 | ################################################ 232 | # Default exploration behavior, iff `explore`=None is passed into 233 | # compute_action(s). 234 | # Set to False for no exploration behavior (e.g., for evaluation). 235 | explore: True, 236 | # Provide a dict specifying the Exploration object's config. 237 | exploration_config: 238 | # The Exploration class to use. In the simplest case, this is the name 239 | # (str) of any class present in the `rllib.utils.exploration` package. 240 | # You can also provide the python class directly or the full location 241 | # of your class (e.g. "ray.rllib.utils.exploration.epsilon_greedy. 242 | # EpsilonGreedy) 243 | type: "StochasticSampling" 244 | # Can add constructor kwargs here (if any) 245 | 246 | ################################################ 247 | ################################################ 248 | # === Advanced Rollout Settings === 249 | ################################################ 250 | ################################################ 251 | # Element-wise observation filter, either "NoFilter" or "MeanStdFilter". 252 | observation_filter: "NoFilter" 253 | # Whether to synchronize the statistics of remote filters. 254 | synchronize_filters: True 255 | # Whether to LZ4 compress individual observations 256 | compress_observations: False 257 | # Minimum env steps to optimize for per train call. This value does 258 | # not affect learning, only the length of train iterations. 259 | timesteps_per_iteration: 0 260 | # This argument, in conjunction with worker_index, sets the random seed of 261 | # each worker, so that identically configured trials will have identical 262 | # results. This makes experiments reproducible. 263 | seed: null 264 | -------------------------------------------------------------------------------- /sagemaker/source/custom/experiments/random-policy.yaml: -------------------------------------------------------------------------------- 1 | procgen-starter-example: 2 | env: procgen_env_wrapper # Change this at your own risk :D 3 | run: RandomPolicy 4 | 5 | stop: 6 | timesteps_total: 300000 7 | checkpoint_freq: 1 8 | checkpoint_at_end: True 9 | keep_checkpoints_num: 5 10 | 11 | config: 12 | env_config: 13 | env_name: coinrun 14 | num_levels: 0 15 | start_level: 0 16 | paint_vel_info: False 17 | use_generated_assets: False 18 | center_agent: True 19 | use_sequential_levels: False 20 | distribution_mode: easy 21 | 22 | model: 23 | custom_preprocessor: MyPreprocessor 24 | -------------------------------------------------------------------------------- /sagemaker/source/custom/models/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/sagemaker-rl-procgen-ray/ba82c1f0bac2c985930bda7875f1950723f6d7ce/sagemaker/source/custom/models/.gitkeep -------------------------------------------------------------------------------- /sagemaker/source/custom/models/impala_cnn_tf.py: -------------------------------------------------------------------------------- 1 | from ray.rllib.models.tf.tf_modelv2 import TFModelV2 2 | from ray.rllib.utils.framework import try_import_tf 3 | from ray.rllib.models import ModelCatalog 4 | 5 | tf = try_import_tf() 6 | 7 | 8 | def conv_layer(depth, name): 9 | return tf.keras.layers.Conv2D( 10 | filters=depth, kernel_size=3, strides=1, padding="same", name=name 11 | ) 12 | 13 | 14 | def residual_block(x, depth, prefix): 15 | inputs = x 16 | assert inputs.get_shape()[-1].value == depth 17 | x = tf.keras.layers.ReLU()(x) 18 | x = conv_layer(depth, name=prefix + "_conv0")(x) 19 | x = tf.keras.layers.ReLU()(x) 20 | x = conv_layer(depth, name=prefix + "_conv1")(x) 21 | return x + inputs 22 | 23 | 24 | def conv_sequence(x, depth, prefix): 25 | x = conv_layer(depth, prefix + "_conv")(x) 26 | x = tf.keras.layers.MaxPool2D(pool_size=3, strides=2, padding="same")(x) 27 | x = residual_block(x, depth, prefix=prefix + "_block0") 28 | x = residual_block(x, depth, prefix=prefix + "_block1") 29 | return x 30 | 31 | 32 | class ImpalaCNN(TFModelV2): 33 | """ 34 | Network from IMPALA paper implemented in ModelV2 API. 35 | 36 | Based on https://github.com/ray-project/ray/blob/master/rllib/models/tf/visionnet_v2.py 37 | and https://github.com/openai/baselines/blob/9ee399f5b20cd70ac0a871927a6cf043b478193f/baselines/common/models.py#L28 38 | """ 39 | 40 | def __init__(self, obs_space, action_space, num_outputs, model_config, name): 41 | super().__init__(obs_space, action_space, num_outputs, model_config, name) 42 | 43 | depths = [16, 32, 32] 44 | 45 | inputs = tf.keras.layers.Input(shape=obs_space.shape, name="observations") 46 | scaled_inputs = tf.cast(inputs, tf.float32) / 255.0 47 | 48 | x = scaled_inputs 49 | for i, depth in enumerate(depths): 50 | x = conv_sequence(x, depth, prefix=f"seq{i}") 51 | 52 | x = tf.keras.layers.Flatten()(x) 53 | x = tf.keras.layers.ReLU()(x) 54 | x = tf.keras.layers.Dense(units=256, activation="relu", name="hidden")(x) 55 | logits = tf.keras.layers.Dense(units=num_outputs, name="pi")(x) 56 | value = tf.keras.layers.Dense(units=1, name="vf")(x) 57 | self.base_model = tf.keras.Model(inputs, [logits, value]) 58 | self.register_variables(self.base_model.variables) 59 | 60 | def forward(self, input_dict, state, seq_lens): 61 | # explicit cast to float32 needed in eager 62 | obs = tf.cast(input_dict["obs"], tf.float32) 63 | logits, self._value = self.base_model(obs) 64 | return logits, state 65 | 66 | def value_function(self): 67 | return tf.reshape(self._value, [-1]) 68 | 69 | 70 | # Register model in ModelCatalog 71 | ModelCatalog.register_custom_model("impala_cnn_tf", ImpalaCNN) 72 | -------------------------------------------------------------------------------- /sagemaker/source/custom/models/impala_cnn_torch.py: -------------------------------------------------------------------------------- 1 | from ray.rllib.models.torch.torch_modelv2 import TorchModelV2 2 | from ray.rllib.models import ModelCatalog 3 | from ray.rllib.utils.annotations import override 4 | from ray.rllib.utils import try_import_torch 5 | 6 | torch, nn = try_import_torch() 7 | 8 | 9 | class ResidualBlock(nn.Module): 10 | def __init__(self, channels): 11 | super().__init__() 12 | self.conv0 = nn.Conv2d(in_channels=channels, out_channels=channels, kernel_size=3, padding=1) 13 | self.conv1 = nn.Conv2d(in_channels=channels, out_channels=channels, kernel_size=3, padding=1) 14 | 15 | def forward(self, x): 16 | inputs = x 17 | x = nn.functional.relu(x) 18 | x = self.conv0(x) 19 | x = nn.functional.relu(x) 20 | x = self.conv1(x) 21 | return x + inputs 22 | 23 | 24 | class ConvSequence(nn.Module): 25 | def __init__(self, input_shape, out_channels): 26 | super().__init__() 27 | self._input_shape = input_shape 28 | self._out_channels = out_channels 29 | self.conv = nn.Conv2d(in_channels=self._input_shape[0], out_channels=self._out_channels, kernel_size=3, padding=1) 30 | self.res_block0 = ResidualBlock(self._out_channels) 31 | self.res_block1 = ResidualBlock(self._out_channels) 32 | 33 | def forward(self, x): 34 | x = self.conv(x) 35 | x = nn.functional.max_pool2d(x, kernel_size=3, stride=2, padding=1) 36 | x = self.res_block0(x) 37 | x = self.res_block1(x) 38 | assert x.shape[1:] == self.get_output_shape() 39 | return x 40 | 41 | def get_output_shape(self): 42 | _c, h, w = self._input_shape 43 | return (self._out_channels, (h + 1) // 2, (w + 1) // 2) 44 | 45 | 46 | class ImpalaCNN(TorchModelV2, nn.Module): 47 | """ 48 | Network from IMPALA paper implemented in ModelV2 API. 49 | 50 | Based on https://github.com/ray-project/ray/blob/master/rllib/models/tf/visionnet_v2.py 51 | and https://github.com/openai/baselines/blob/9ee399f5b20cd70ac0a871927a6cf043b478193f/baselines/common/models.py#L28 52 | """ 53 | 54 | def __init__(self, obs_space, action_space, num_outputs, model_config, 55 | name): 56 | TorchModelV2.__init__(self, obs_space, action_space, num_outputs, 57 | model_config, name) 58 | nn.Module.__init__(self) 59 | 60 | h, w, c = obs_space.shape 61 | shape = (c, h, w) 62 | 63 | conv_seqs = [] 64 | for out_channels in [16, 32, 32]: 65 | conv_seq = ConvSequence(shape, out_channels) 66 | shape = conv_seq.get_output_shape() 67 | conv_seqs.append(conv_seq) 68 | self.conv_seqs = nn.ModuleList(conv_seqs) 69 | self.hidden_fc = nn.Linear(in_features=shape[0] * shape[1] * shape[2], out_features=256) 70 | self.logits_fc = nn.Linear(in_features=256, out_features=num_outputs) 71 | self.value_fc = nn.Linear(in_features=256, out_features=1) 72 | 73 | @override(TorchModelV2) 74 | def forward(self, input_dict, state, seq_lens): 75 | x = input_dict["obs"].float() 76 | x = x / 255.0 # scale to 0-1 77 | x = x.permute(0, 3, 1, 2) # NHWC => NCHW 78 | for conv_seq in self.conv_seqs: 79 | x = conv_seq(x) 80 | x = torch.flatten(x, start_dim=1) 81 | x = nn.functional.relu(x) 82 | x = self.hidden_fc(x) 83 | x = nn.functional.relu(x) 84 | logits = self.logits_fc(x) 85 | value = self.value_fc(x) 86 | self._value = value.squeeze(1) 87 | return logits, state 88 | 89 | @override(TorchModelV2) 90 | def value_function(self): 91 | assert self._value is not None, "must call forward() first" 92 | return self._value 93 | 94 | ModelCatalog.register_custom_model("impala_cnn_torch", ImpalaCNN) -------------------------------------------------------------------------------- /sagemaker/source/custom/models/my_vision_network.py: -------------------------------------------------------------------------------- 1 | from ray.rllib.models.tf.tf_modelv2 import TFModelV2 2 | from ray.rllib.models.tf.visionnet_v1 import _get_filter_config 3 | from ray.rllib.models.tf.misc import normc_initializer 4 | from ray.rllib.utils.framework import try_import_tf 5 | 6 | tf = try_import_tf() 7 | 8 | """ 9 | NOTE : This implementation has been taken from : 10 | https://github.com/ray-project/ray/blob/master/rllib/models/tf/visionnet_v2.py 11 | 12 | to act as a reference implementation for implementing custom models. 13 | """ 14 | 15 | 16 | def get_conv_activation(model_config): 17 | if model_config.get("conv_activation") == "linear": 18 | activation = None 19 | else: 20 | activation = getattr(tf.nn, model_config.get("conv_activation")) 21 | return activation 22 | 23 | 24 | def get_fc_activation(model_config): 25 | activation = model_config.get("fcnet_activation") 26 | if activation is None: 27 | activation = tf.keras.layers.ReLU() 28 | return activation 29 | 30 | 31 | def conv_layers(x, model_config, obs_space, prefix=""): 32 | filters = model_config.get("conv_filters") 33 | if not filters: 34 | filters = _get_filter_config(obs_space.shape) 35 | 36 | activation = get_conv_activation(model_config) 37 | 38 | for i, (out_size, kernel, stride) in enumerate(filters, 1): 39 | x = tf.keras.layers.Conv2D( 40 | out_size, 41 | kernel, 42 | strides=(stride, stride), 43 | activation=activation, 44 | padding="same", 45 | data_format="channels_last", 46 | name=f"{prefix}conv{i}", 47 | )(x) 48 | return x 49 | 50 | 51 | def fc_layers(x, model_config, prefix=""): 52 | x = tf.keras.layers.Flatten()(x) 53 | activation = get_fc_activation(model_config) 54 | fc_layers_config = model_config.get("fcnet_hiddens", []) 55 | for i, dim in enumerate(fc_layers_config): 56 | x = tf.keras.layers.Dense( 57 | units=dim, activation=activation, name=f"{prefix}fc-{i}" 58 | )(x) 59 | return x 60 | 61 | 62 | def get_final_fc(x, num_outputs, model_config): 63 | x = tf.keras.layers.Dense(num_outputs, name="pi")(x) 64 | return x 65 | 66 | 67 | def value_layers(x, inputs, obs_space, model_config): 68 | if not model_config.get("vf_share_layers"): 69 | x = conv_layers(inputs, model_config, obs_space, prefix="vf-") 70 | x = fc_layers(x, model_config, prefix="vf-") 71 | x = tf.keras.layers.Dense(units=1, name="vf")(x) 72 | return x 73 | 74 | 75 | class MyVisionNetwork(TFModelV2): 76 | """Generic vision network implemented in ModelV2 API.""" 77 | 78 | def __init__(self, obs_space, action_space, num_outputs, model_config, name): 79 | super(MyVisionNetwork, self).__init__( 80 | obs_space, action_space, num_outputs, model_config, name 81 | ) 82 | 83 | inputs = tf.keras.layers.Input(shape=obs_space.shape, name="observations") 84 | last_layer = inputs 85 | # Build the conv layers 86 | last_layer = conv_layers(last_layer, model_config, obs_space) 87 | # Build the linear layers 88 | last_layer = fc_layers(last_layer, model_config) 89 | # Final linear layer 90 | logits = get_final_fc(last_layer, num_outputs, model_config) 91 | # Build the value layers 92 | value_out = value_layers(last_layer, inputs, obs_space, model_config) 93 | 94 | self.base_model = tf.keras.Model(inputs, [logits, value_out]) 95 | self.register_variables(self.base_model.variables) 96 | 97 | def forward(self, input_dict, state, seq_lens): 98 | # explicit cast to float32 needed in eager 99 | logits, self._value_out = self.base_model( 100 | tf.cast(input_dict["obs"], tf.float32) 101 | ) 102 | return logits, state 103 | 104 | def value_function(self): 105 | return tf.reshape(self._value_out, [-1]) -------------------------------------------------------------------------------- /sagemaker/source/custom/preprocessors/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | from .custom_preprocessor import MyPreprocessorClass 4 | 5 | CUSTOM_PREPROCESSORS = {"MyPreprocessor": MyPreprocessorClass} 6 | -------------------------------------------------------------------------------- /sagemaker/source/custom/preprocessors/custom_preprocessor.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | from ray.rllib.models.preprocessors import Preprocessor 4 | 5 | 6 | class MyPreprocessorClass(Preprocessor): 7 | """Custom preprocessing for observations 8 | 9 | Adopted from https://docs.ray.io/en/master/rllib-models.html#custom-preprocessors 10 | """ 11 | 12 | def _init_shape(self, obs_space, options): 13 | return obs_space.shape # New shape after preprocessing 14 | 15 | def transform(self, observation): 16 | # Do your custom stuff 17 | return observation 18 | -------------------------------------------------------------------------------- /sagemaker/source/custom/setup.py: -------------------------------------------------------------------------------- 1 | from distutils.core import setup 2 | 3 | setup(name='custom_rl_estimator', 4 | version='1.0', 5 | description='Custom scripts for RLEstimator.', 6 | packages=['algorithms', 'envs', 'models', 'preprocessors'] 7 | ) 8 | -------------------------------------------------------------------------------- /sagemaker/source/procgen_ray_launcher.py: -------------------------------------------------------------------------------- 1 | # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"). 4 | # You may not use this file except in compliance with the License. 5 | # A copy of the License is located at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # or in the "license" file accompanying this file. This file is distributed 10 | # on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either 11 | # express or implied. See the License for the specific language governing 12 | # permissions and limitations under the License. 13 | 14 | import os 15 | import json 16 | import subprocess 17 | from shutil import copytree 18 | 19 | import gym 20 | import ray 21 | from ray.tune.registry import register_env 22 | from ray.tune import registry 23 | from ray.tune.tune import run_experiments, run, _make_scheduler 24 | from ray.tune.experiment import convert_to_experiment_list, Experiment 25 | 26 | from sagemaker_rl.ray_launcher import SageMakerRayLauncher 27 | from sagemaker_rl.tf_serving_utils import export_tf_serving, natural_keys 28 | 29 | try: 30 | from custom.callbacks import CustomCallbacks 31 | except ModuleNotFoundError: 32 | from callbacks import CustomCallbacks 33 | 34 | TERMINATION_SIGNAL = "JOB_TERMINATED" 35 | MODEL_OUTPUT_DIR = "/opt/ml/model" 36 | CHECKPOINTS_DIR = '/opt/ml/checkpoints' 37 | 38 | def custom_sync_func(source, target): 39 | """Custom rsync cmd to sync experiment artifact from remote nodes to driver node. 40 | """ 41 | sync_cmd = 'rsync -havP --inplace --stats -e "ssh -i /root/.ssh/id_rsa" {source} {target}'.format( 42 | source=source, target=target 43 | ) 44 | 45 | sync_process = subprocess.Popen(sync_cmd, shell=True) 46 | sync_process.wait() 47 | 48 | 49 | class ProcgenSageMakerRayLauncher(SageMakerRayLauncher): 50 | """Launcher class for Procgen experiments using Ray-RLLib. 51 | Customers should sub-class this, fill in the required methods, and 52 | call .train_main() to start a training process. 53 | 54 | Example:: 55 | 56 | class MyLauncher(ProcgenSageMakerRayLauncher): 57 | def register_env_creator(self): 58 | register_env( 59 | "stacked_procgen_env", # This should be different from procgen_env_wrapper 60 | lambda config: gym.wrappers.FrameStack(ProcgenEnvWrapper(config), 4) 61 | ) 62 | 63 | def get_experiment_config(self): 64 | return { 65 | "training": { 66 | "env": "procgen_env_wrapper", 67 | "run": "PPO", 68 | ... 69 | } 70 | } 71 | 72 | if __name__ == "__main__": 73 | MyLauncher().train_main() 74 | """ 75 | def register_algorithms_and_preprocessors(self): 76 | raise NotImplementedError() 77 | 78 | def create_tf_serving_model(self, algorithm=None, env_string=None): 79 | self.register_env_creator() 80 | self.register_algorithms_and_preprocessors() 81 | if ray.__version__ >= "0.6.5": 82 | from ray.rllib.agents.registry import get_agent_class 83 | else: 84 | from ray.rllib.agents.agent import get_agent_class 85 | cls = get_agent_class(algorithm) 86 | with open(os.path.join(MODEL_OUTPUT_DIR, "params.json")) as config_json: 87 | config = json.load(config_json) 88 | use_torch = config.get("use_pytorch", False) 89 | if not use_torch: 90 | if 'callbacks' in config: 91 | callback_cls_str = config['callbacks'] # "", 92 | callback_cls = callback_cls_str.split("'")[-2].split(".")[-1] # CustomCallbacks 93 | config['callbacks'] = eval(callback_cls) 94 | print("Loaded config for TensorFlow serving.") 95 | config["monitor"] = False 96 | config["num_workers"] = 1 97 | config["num_gpus"] = 0 98 | agent = cls(env=env_string, config=config) 99 | checkpoint = os.path.join(MODEL_OUTPUT_DIR, "checkpoint") 100 | agent.restore(checkpoint) 101 | export_tf_serving(agent, MODEL_OUTPUT_DIR) 102 | 103 | def find_checkpoint_path_for_spot(self, prefix): 104 | ckpts = [] 105 | ckpts_prefix = '' 106 | for root, directories, files in os.walk(prefix): 107 | for directory in directories: 108 | if directory.startswith("checkpoint"): 109 | if not ckpts_prefix: 110 | ckpts_prefix = root 111 | ckpts.append(directory) 112 | return ckpts_prefix, ckpts 113 | 114 | def find_checkpoint_file_for_spot(self, prefix): 115 | ckpts_prefix, ckpts = self.find_checkpoint_path_for_spot(prefix) 116 | if not ckpts: 117 | return "" 118 | else: 119 | ckpts.sort(key=natural_keys) 120 | ckpt_name = ckpts[-1].replace("_", "-") 121 | return os.path.join(ckpts_prefix, ckpts[-1], ckpt_name) 122 | 123 | def launch(self): 124 | """Actual entry point into the class instance where everything happens. 125 | """ 126 | self.register_env_creator() 127 | self.register_algorithms_and_preprocessors() 128 | experiment_config, args, verbose = self.get_experiment_config() 129 | 130 | # All worker nodes will block at this step during training 131 | ray_cluster_config = self.ray_init_config() 132 | if not self.is_master_node: 133 | return 134 | ray_custom_cluster_config = { 135 | "object_store_memory": args.ray_object_store_memory, 136 | "memory": args.ray_memory, 137 | "redis_max_memory": args.ray_redis_max_memory, 138 | "num_cpus": args.ray_num_cpus, 139 | "num_gpus": args.ray_num_gpus 140 | } 141 | all_wokers_host_names = self.get_all_host_names()[1:] 142 | # Overwrite redis address for single instance job 143 | if len(all_wokers_host_names) == 0: 144 | ray_custom_cluster_config.update({"address": args.ray_address}) 145 | ray_cluster_config.update(ray_custom_cluster_config) 146 | 147 | # Start the driver on master node 148 | ray.init(**ray_cluster_config) 149 | 150 | # Spot instance is back 151 | if os.path.exists(CHECKPOINTS_DIR) and os.listdir(CHECKPOINTS_DIR): 152 | print("Instance is back. Local checkpoint path detected.") 153 | # Sample path in ckpt channel: opt/ml/checkpoints/training/PPO_procgen_/checkpoint_50/checkpoint-50 154 | checkpoint_file = self.find_checkpoint_file_for_spot(CHECKPOINTS_DIR) 155 | print("Setting checkpoint path to {}".format(checkpoint_file)) 156 | if checkpoint_file: 157 | experiment_config['training']['restore'] = checkpoint_file # Overwrite 158 | experiment_config = self.customize_experiment_config(experiment_config) 159 | experiment_config = self.set_up_checkpoint(experiment_config) 160 | experiment_config['training']['sync_to_driver'] = custom_sync_func 161 | 162 | run_experiments( 163 | experiment_config, 164 | scheduler=_make_scheduler(args), 165 | queue_trials=args.queue_trials, 166 | resume=args.resume, 167 | verbose=verbose, 168 | concurrent=True 169 | ) 170 | # If distributed job, send TERMINATION_SIGNAL to all workers. 171 | if len(all_wokers_host_names) > 0: 172 | self.sage_cluster_communicator.create_s3_signal(TERMINATION_SIGNAL) 173 | 174 | @classmethod 175 | def train_main(cls): 176 | """main function that kicks things off 177 | """ 178 | launcher = cls() 179 | launcher.launch() 180 | -------------------------------------------------------------------------------- /sagemaker/source/ray_experiment_builder.py: -------------------------------------------------------------------------------- 1 | # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"). 4 | # You may not use this file except in compliance with the License. 5 | # A copy of the License is located at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # or in the "license" file accompanying this file. This file is distributed 10 | # on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either 11 | # express or implied. See the License for the specific language governing 12 | # permissions and limitations under the License. 13 | 14 | import os 15 | from pathlib import Path 16 | import argparse 17 | import json 18 | import yaml 19 | 20 | from ray.tune.result import DEFAULT_RESULTS_DIR 21 | from ray.tune.resources import resources_to_json, json_to_resources 22 | 23 | try: 24 | from custom.callbacks import CustomCallbacks 25 | except ModuleNotFoundError: 26 | from callbacks import CustomCallbacks 27 | 28 | class RayExperimentBuilder: 29 | EXAMPLE_USAGE = """ 30 | Training example: 31 | python ./train.py --run DQN --env CartPole-v0 32 | 33 | Training with Config: 34 | python ./train.py -f experiments/simple-corridor-0.yaml 35 | 36 | 37 | Note that -f overrides all other trial-specific command-line options. 38 | """ 39 | def __init__(self, **kwargs): 40 | parser = self.create_parser() 41 | self.args, _ = parser.parse_known_args() 42 | 43 | if kwargs is not None: 44 | for k, v in kwargs.items(): 45 | self.args.__dict__[k] = v 46 | 47 | # Convert jsons to dicts in local mode 48 | self.args.scheduler_config = self.try_convert_json_to_dict(self.args.scheduler_config) 49 | self.args.config = self.try_convert_json_to_dict(self.args.config) 50 | self.args.stop = self.try_convert_json_to_dict(self.args.stop) 51 | 52 | def try_convert_json_to_dict(self, json_string): 53 | try: 54 | return json.loads(json_string) 55 | except TypeError: 56 | return json_string 57 | 58 | def make_parser(self, **kwargs): 59 | #TODO import method from starter-kit 60 | # Taken from https://github.com/ray-project/ray/blob/5303c3abe322cbd90f75bcf03ee1f9c3dad23aae/python/ray/tune/config_parser.py 61 | parser = argparse.ArgumentParser(**kwargs) 62 | 63 | parser.add_argument( 64 | "--run", 65 | default=None, 66 | type=str, 67 | help="The algorithm or model to train. This may refer to the name " 68 | "of a built-on algorithm (e.g. RLLib's DQN or PPO), or a " 69 | "user-defined trainable function or class registered in the " 70 | "tune registry.") 71 | parser.add_argument( 72 | "--stop", 73 | default="{}", 74 | help="The stopping criteria, specified in JSON. The keys may be any " 75 | "field returned by 'train()' e.g. " 76 | "'{\"time_total_s\": 600, \"training_iteration\": 100000}' to stop " 77 | "after 600 seconds or 100k iterations, whichever is reached first.") 78 | parser.add_argument( 79 | "--config", 80 | default="{}", 81 | help="Algorithm-specific configuration (e.g. env, hyperparams), " 82 | "specified in JSON.") 83 | parser.add_argument( 84 | "--resources-per-trial", 85 | default=None, 86 | type=json_to_resources, 87 | help="Override the machine resources to allocate per trial, e.g. " 88 | "'{\"cpu\": 64, \"gpu\": 8}'. Note that GPUs will not be assigned " 89 | "unless you specify them here. For RLlib, you probably want to " 90 | "leave this alone and use RLlib configs to control parallelism.") 91 | parser.add_argument( 92 | "--num-samples", 93 | default=1, 94 | type=int, 95 | help="Number of times to repeat each trial.") 96 | parser.add_argument( 97 | "--checkpoint-freq", 98 | default=0, 99 | type=int, 100 | help="How many training iterations between checkpoints. " 101 | "A value of 0 (default) disables checkpointing.") 102 | parser.add_argument( 103 | "--checkpoint-at-end", 104 | action="store_true", 105 | help="Whether to checkpoint at the end of the experiment. " 106 | "Default is False.") 107 | parser.add_argument( 108 | "--sync-on-checkpoint", 109 | action="store_true", 110 | help="Enable sync-down of trial checkpoint to guarantee " 111 | "recoverability. If unset, checkpoint syncing from worker " 112 | "to driver is asynchronous, so unset this only if synchronous " 113 | "checkpointing is too slow and trial restoration failures " 114 | "can be tolerated.") 115 | parser.add_argument( 116 | "--keep-checkpoints-num", 117 | default=None, 118 | type=int, 119 | help="Number of best checkpoints to keep. Others get " 120 | "deleted. Default (None) keeps all checkpoints.") 121 | parser.add_argument( 122 | "--checkpoint-score-attr", 123 | default="training_iteration", 124 | type=str, 125 | help="Specifies by which attribute to rank the best checkpoint. " 126 | "Default is increasing order. If attribute starts with min- it " 127 | "will rank attribute in decreasing order. Example: " 128 | "min-validation_loss") 129 | parser.add_argument( 130 | "--export-formats", 131 | default=None, 132 | help="List of formats that exported at the end of the experiment. " 133 | "Default is None. For RLlib, 'checkpoint' and 'model' are " 134 | "supported for TensorFlow policy graphs.") 135 | parser.add_argument( 136 | "--max-failures", 137 | default=3, 138 | type=int, 139 | help="Try to recover a trial from its last checkpoint at least this " 140 | "many times. Only applies if checkpointing is enabled.") 141 | parser.add_argument( 142 | "--scheduler", 143 | default="FIFO", 144 | type=str, 145 | help="FIFO (default), MedianStopping, AsyncHyperBand, " 146 | "HyperBand, or HyperOpt.") 147 | parser.add_argument( 148 | "--scheduler-config", 149 | default="{}", 150 | help="Config options to pass to the scheduler.") 151 | 152 | # Note: this currently only makes sense when running a single trial 153 | parser.add_argument( 154 | "--restore", 155 | default=None, 156 | type=str, 157 | help="If specified, restore from this checkpoint.") 158 | 159 | return parser 160 | 161 | def create_parser(self): 162 | parser = self.make_parser( 163 | formatter_class=argparse.RawDescriptionHelpFormatter, 164 | description="Train a reinforcement learning agent.", 165 | epilog=self.EXAMPLE_USAGE, 166 | ) 167 | 168 | # See also the base parser definition in ray/tune/config_parser.py 169 | parser.add_argument( 170 | "--ray-address", 171 | default=None, 172 | type=str, 173 | help="Connect to an existing Ray cluster at this address instead " 174 | "of starting a new one.") 175 | parser.add_argument( 176 | "--ray-num-cpus", 177 | default=None, 178 | type=int, 179 | help="--num-cpus to use if starting a new cluster.") 180 | parser.add_argument( 181 | "--ray-num-gpus", 182 | default=None, 183 | type=int, 184 | help="--num-gpus to use if starting a new cluster.") 185 | parser.add_argument( 186 | "--ray-num-nodes", 187 | default=None, 188 | type=int, 189 | help="Emulate multiple cluster nodes for debugging.") 190 | parser.add_argument( 191 | "--ray-redis-max-memory", 192 | default=None, 193 | type=int, 194 | help="--redis-max-memory to use if starting a new cluster.") 195 | parser.add_argument( 196 | "--ray-memory", 197 | default=None, 198 | type=int, 199 | help="--memory to use if starting a new cluster.") 200 | parser.add_argument( 201 | "--ray-object-store-memory", 202 | default=None, 203 | type=int, 204 | help="--object-store-memory to use if starting a new cluster.") 205 | parser.add_argument( 206 | "--experiment-name", 207 | default="default", 208 | type=str, 209 | help="Name of the subdirectory under `local_dir` to put results in.") 210 | parser.add_argument( 211 | "--local-dir", 212 | default=DEFAULT_RESULTS_DIR, 213 | type=str, 214 | help="Local dir to save training results to. Defaults to '{}'.".format( 215 | DEFAULT_RESULTS_DIR)) 216 | parser.add_argument( 217 | "--upload-dir", 218 | default="", 219 | type=str, 220 | help="Optional URI to sync training results to (e.g. s3://bucket).") 221 | parser.add_argument( 222 | "-v", action="store_true", help="Whether to use INFO level logging.") 223 | parser.add_argument( 224 | "-vv", action="store_true", help="Whether to use DEBUG level logging.") 225 | parser.add_argument( 226 | "--resume", 227 | action="store_true", 228 | help="Whether to attempt to resume previous Tune experiments.") 229 | parser.add_argument( 230 | "--torch", 231 | action="store_true", 232 | help="Whether to use PyTorch (instead of tf) as the DL framework.") 233 | parser.add_argument( 234 | "--eager", 235 | action="store_true", 236 | help="Whether to attempt to enable TF eager execution.") 237 | parser.add_argument( 238 | "--trace", 239 | action="store_true", 240 | help="Whether to attempt to enable tracing for eager mode.") 241 | parser.add_argument( 242 | "--env", default=None, type=str, help="The gym environment to use.") 243 | parser.add_argument( 244 | "--queue-trials", 245 | action="store_true", 246 | help=( 247 | "Whether to queue trials when the cluster does not currently have " 248 | "enough resources to launch one. This should be set to True when " 249 | "running on an autoscaling cluster to enable automatic scale-up.")) 250 | parser.add_argument( 251 | "-f", 252 | "--config-file", 253 | default=None, 254 | type=str, 255 | help="If specified, use config options from this file. Note that this " 256 | "overrides any trial-specific options set via flags above.") 257 | 258 | return parser 259 | 260 | def get_experiment_definition(self): 261 | if self.args.config_file: 262 | with open(self.args.config_file) as f: 263 | experiments = yaml.safe_load(f) 264 | exp_name_list = list(experiments.keys()) 265 | assert len(exp_name_list)==1 266 | # overwirte experiment name for SageMaker to recognize 267 | experiments['training'] = experiments.pop(exp_name_list[0]) 268 | else: 269 | experiments = { 270 | self.args.experiment_name: { # i.e. log to ~/ray_results/default 271 | "run": self.args.run, 272 | "checkpoint_freq": self.args.checkpoint_freq, 273 | "keep_checkpoints_num": self.args.keep_checkpoints_num, 274 | "checkpoint_score_attr": self.args.checkpoint_score_attr, 275 | "local_dir": self.args.local_dir, 276 | "resources_per_trial": ( 277 | self.args.resources_per_trial and 278 | resources_to_json(self.args.resources_per_trial)), 279 | "stop": self.args.stop, 280 | "config": dict(self.args.config, env=self.args.env), 281 | "restore": self.args.restore, 282 | "num_samples": self.args.num_samples, 283 | "upload_dir": self.args.upload_dir, 284 | } 285 | } 286 | 287 | verbose = 1 288 | for exp in experiments.values(): 289 | # Bazel makes it hard to find files specified in `args` (and `data`). 290 | # Look for them here. 291 | # NOTE: Some of our yaml files don't have a `config` section. 292 | if exp.get("config", {}).get("input") and \ 293 | not os.path.exists(exp["config"]["input"]): 294 | # This script runs in the ray/rllib dir. 295 | rllib_dir = Path(__file__).parent 296 | input_file = rllib_dir.absolute().joinpath(exp["config"]["input"]) 297 | exp["config"]["input"] = str(input_file) 298 | 299 | if not exp.get("run"): 300 | raise ValueError("The following arguments are required: run") 301 | if not exp.get("env") and not exp.get("config", {}).get("env"): 302 | raise ValueError("The following arguments are required: env") 303 | 304 | if self.args.eager: 305 | exp["config"]["eager"] = True 306 | if self.args.torch: 307 | exp["config"]["use_pytorch"] = True 308 | if self.args.v: 309 | exp["config"]["log_level"] = "INFO" 310 | verbose = 2 311 | if self.args.vv: 312 | exp["config"]["log_level"] = "DEBUG" 313 | verbose = 3 314 | if self.args.trace: 315 | if not exp["config"].get("eager"): 316 | raise ValueError("Must enable --eager to enable tracing.") 317 | exp["config"]["eager_tracing"] = True 318 | 319 | ### Add Custom Callbacks 320 | exp["config"]["callbacks"] = CustomCallbacks 321 | return experiments, self.args, verbose 322 | -------------------------------------------------------------------------------- /sagemaker/source/requirements.txt: -------------------------------------------------------------------------------- 1 | ray[rllib]==0.8.5 2 | procgen==0.10.1 -------------------------------------------------------------------------------- /sagemaker/source/train-local.py: -------------------------------------------------------------------------------- 1 | # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"). 4 | # You may not use this file except in compliance with the License. 5 | # A copy of the License is located at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # or in the "license" file accompanying this file. This file is distributed 10 | # on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either 11 | # express or implied. See the License for the specific language governing 12 | # permissions and limitations under the License. 13 | 14 | #!/usr/bin/env python 15 | 16 | import os 17 | from pathlib import Path 18 | import yaml 19 | 20 | import ray 21 | from ray.tune.tune import _make_scheduler, run_experiments 22 | from ray.rllib.models import ModelCatalog 23 | from ray.rllib.utils.framework import try_import_tf, try_import_torch 24 | 25 | from utils.loader import load_envs, load_models, load_algorithms, load_preprocessors 26 | from ray_experiment_builder import RayExperimentBuilder 27 | 28 | # Try to import both backends for flag checking/warnings. 29 | tf = try_import_tf() 30 | torch, _ = try_import_torch() 31 | 32 | from custom.models.my_vision_network import MyVisionNetwork 33 | 34 | """ 35 | Note : This script has been adapted from : 36 | https://github.com/ray-project/ray/blob/master/rllib/train.py and https://github.com/AIcrowd/neurips2020-procgen-starter-kit/blob/master/train.py 37 | """ 38 | 39 | load_envs(os.path.join("custom", "envs")) 40 | load_models((os.path.join("custom", "models"))) # Load models 41 | # Load custom algorithms 42 | from custom.algorithms import CUSTOM_ALGORITHMS 43 | load_algorithms(CUSTOM_ALGORITHMS) 44 | # Load custom preprocessors 45 | from custom.preprocessors import CUSTOM_PREPROCESSORS 46 | load_preprocessors(CUSTOM_PREPROCESSORS) 47 | 48 | print(ray.rllib.contrib.registry.CONTRIBUTED_ALGORITHMS) 49 | 50 | def run(): 51 | ModelCatalog.register_custom_model("my_vision_network", MyVisionNetwork) 52 | config={ 53 | "model":{ 54 | "custom_model": "my_vision_network", 55 | "conv_filters": [[16, [5, 5], 4], [32, [3, 3], 1], [256, [3, 3], 1]], 56 | "custom_preprocessor": None 57 | } 58 | } 59 | 60 | reb = RayExperimentBuilder(**config) 61 | experiments, args, verbose = reb.get_experiment_definition() 62 | 63 | ray.init( 64 | address=args.ray_address, 65 | object_store_memory=args.ray_object_store_memory, 66 | memory=args.ray_memory, 67 | redis_max_memory=args.ray_redis_max_memory, 68 | num_cpus=args.ray_num_cpus, 69 | num_gpus=args.ray_num_gpus) 70 | 71 | run_experiments( 72 | experiments, 73 | scheduler=_make_scheduler(args), 74 | queue_trials=args.queue_trials, 75 | resume=args.resume, 76 | verbose=verbose, 77 | concurrent=True) 78 | 79 | if __name__ == "__main__": 80 | run() 81 | -------------------------------------------------------------------------------- /sagemaker/source/train-sagemaker-distributed-cpu.py: -------------------------------------------------------------------------------- 1 | # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"). 4 | # You may not use this file except in compliance with the License. 5 | # A copy of the License is located at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # or in the "license" file accompanying this file. This file is distributed 10 | # on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either 11 | # express or implied. See the License for the specific language governing 12 | # permissions and limitations under the License. 13 | 14 | import os 15 | import json 16 | import gym 17 | import ray 18 | 19 | from ray.tune.registry import register_env 20 | from ray.rllib.models import ModelCatalog 21 | 22 | from procgen_ray_launcher import ProcgenSageMakerRayLauncher 23 | 24 | from ray_experiment_builder import RayExperimentBuilder 25 | 26 | from utils.loader import load_algorithms, load_preprocessors 27 | try: 28 | from custom.envs.procgen_env_wrapper import ProcgenEnvWrapper 29 | except ModuleNotFoundError: 30 | from envs.procgen_env_wrapper import ProcgenEnvWrapper 31 | 32 | class MyLauncher(ProcgenSageMakerRayLauncher): 33 | def register_env_creator(self): 34 | register_env( 35 | "stacked_procgen_env", # This should be different from procgen_env_wrapper 36 | lambda config: gym.wrappers.FrameStack(ProcgenEnvWrapper(config), 4) 37 | ) 38 | 39 | def _get_ray_config(self): 40 | return { 41 | # Leave "ray_num_cpus" and "ray_num_gpus" blank for multi-instance training 42 | # "ray_num_cpus": 16, 43 | # "ray_num_gpus": 0, 44 | "eager": False, 45 | "v": True, # requried for CW to catch the progress 46 | } 47 | 48 | def _get_rllib_config(self): 49 | return { 50 | "experiment_name": "training", 51 | "run": "PPO", 52 | "env": "procgen_env_wrapper", 53 | "stop": { 54 | # 'time_total_s': 60, 55 | 'training_iteration': 4000, 56 | }, 57 | "checkpoint_freq": 1, 58 | "config": { 59 | "ignore_worker_failures": True, 60 | "gamma": 0.999, 61 | "kl_coeff": 0.2, 62 | "lambda": 0.9, 63 | "lr": 0.0001, 64 | "num_workers": 16*2 - 1, # adjust based on total number of CPUs available in the cluster 65 | "num_gpus": 0, # adjust based on number of GPUs available in a single node 66 | "rollout_fragment_length": 140, 67 | "train_batch_size": 2048, 68 | "batch_mode": "truncate_episodes", 69 | "num_sgd_iter": 10, 70 | "use_pytorch": False, 71 | "model": { 72 | # "custom_model": "my_vision_network", 73 | # "conv_filters": [[16, [5, 5], 4], [32, [3, 3], 1], [256, [3, 3], 1]], 74 | "custom_model": "impala_cnn_tf", 75 | }, 76 | "env_config": { 77 | # See https://github.com/AIcrowd/neurips2020-procgen-starter-kit/blob/master/experiments/procgen-starter-example.yaml#L34 for an explaination. 78 | "env_name": "coinrun", 79 | "num_levels": 0, 80 | "start_level": 0, 81 | "paint_vel_info": False, 82 | "use_generated_assets": False, 83 | "center_agent": True, 84 | "use_sequential_levels": False, 85 | "distribution_mode": "easy" 86 | } 87 | }, 88 | "queue_trials": True, 89 | # Uncomment if you want to use a config_file 90 | # Note that this overrides any options set above 91 | # "config_file": path/to/your/config/file 92 | } 93 | 94 | def register_algorithms_and_preprocessors(self): 95 | try: 96 | from custom.algorithms import CUSTOM_ALGORITHMS 97 | from custom.preprocessors import CUSTOM_PREPROCESSORS 98 | from custom.models.my_vision_network import MyVisionNetwork 99 | from custom.models.impala_cnn_tf import ImpalaCNN 100 | except ModuleNotFoundError: 101 | from algorithms import CUSTOM_ALGORITHMS 102 | from preprocessors import CUSTOM_PREPROCESSORS 103 | from models.my_vision_network import MyVisionNetwork 104 | from models.impala_cnn_tf import ImpalaCNN 105 | 106 | load_algorithms(CUSTOM_ALGORITHMS) 107 | 108 | load_preprocessors(CUSTOM_PREPROCESSORS) 109 | ModelCatalog.register_custom_model("my_vision_network", MyVisionNetwork) 110 | ModelCatalog.register_custom_model("impala_cnn_tf", ImpalaCNN) 111 | 112 | def get_experiment_config(self): 113 | params = dict(self._get_ray_config()) 114 | params.update(self._get_rllib_config()) 115 | reb = RayExperimentBuilder(**params) 116 | return reb.get_experiment_definition() 117 | 118 | if __name__ == "__main__": 119 | MyLauncher().train_main() 120 | -------------------------------------------------------------------------------- /sagemaker/source/train-sagemaker-distributed-gpu.py: -------------------------------------------------------------------------------- 1 | # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"). 4 | # You may not use this file except in compliance with the License. 5 | # A copy of the License is located at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # or in the "license" file accompanying this file. This file is distributed 10 | # on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either 11 | # express or implied. See the License for the specific language governing 12 | # permissions and limitations under the License. 13 | 14 | import os 15 | import json 16 | import gym 17 | import ray 18 | 19 | from ray.tune.registry import register_env 20 | from ray.rllib.models import ModelCatalog 21 | 22 | from procgen_ray_launcher import ProcgenSageMakerRayLauncher 23 | 24 | from ray_experiment_builder import RayExperimentBuilder 25 | 26 | from utils.loader import load_algorithms, load_preprocessors 27 | try: 28 | from custom.envs.procgen_env_wrapper import ProcgenEnvWrapper 29 | except ModuleNotFoundError: 30 | from envs.procgen_env_wrapper import ProcgenEnvWrapper 31 | 32 | class MyLauncher(ProcgenSageMakerRayLauncher): 33 | def register_env_creator(self): 34 | register_env( 35 | "stacked_procgen_env", # This should be different from procgen_env_wrapper 36 | lambda config: gym.wrappers.FrameStack(ProcgenEnvWrapper(config), 4) 37 | ) 38 | 39 | def _get_ray_config(self): 40 | return { 41 | # Leave "ray_num_cpus" and "ray_num_gpus" blank for multi-instance training 42 | # "ray_num_cpus": 16, 43 | # "ray_num_gpus": 0, 44 | "eager": False, 45 | "v": True, # requried for CW to catch the progress 46 | } 47 | 48 | def _get_rllib_config(self): 49 | return { 50 | "experiment_name": "training", 51 | "run": "PPO", 52 | "env": "procgen_env_wrapper", 53 | "stop": { 54 | #'time_total_s': 60, 55 | 'training_iteration': 4000, 56 | }, 57 | "checkpoint_freq": 1, 58 | "config": { 59 | "ignore_worker_failures": True, 60 | "gamma": 0.999, 61 | "kl_coeff": 0.2, 62 | "lambda": 0.9, 63 | "lr": 0.0001, 64 | "num_workers": 8*2 -1, # adjust based on total number of CPUs available in the cluster, e.g., p3.2xlarge has 8 CPUs 65 | "num_gpus": 0.2, # adjust based on number of GPUs available in a single node, e.g., p3.2xlarge has 1 GPU 66 | "num_gpus_per_worker": 0.1, # adjust based on number of GPUs, e.g., p3.2x large (1 GPU - num_gpus) / num_workers = 0.1 67 | "rollout_fragment_length": 140, 68 | "train_batch_size": 256 * (8*2 -1), 69 | "batch_mode": "truncate_episodes", 70 | "num_sgd_iter": 10, 71 | "use_pytorch": False, 72 | "model": { 73 | "custom_model": "impala_cnn_tf", 74 | }, 75 | "env_config": { 76 | # See https://github.com/AIcrowd/neurips2020-procgen-starter-kit/blob/master/experiments/procgen-starter-example.yaml#L34 for an explaination. 77 | "env_name": "coinrun", 78 | "num_levels": 0, 79 | "start_level": 0, 80 | "paint_vel_info": False, 81 | "use_generated_assets": False, 82 | "center_agent": True, 83 | "use_sequential_levels": False, 84 | "distribution_mode": "easy" 85 | } 86 | }, 87 | "queue_trials": True, 88 | # Uncomment if you want to use a config_file 89 | # Note that this overrides any options set above 90 | # "config_file": path/to/your/config/file 91 | } 92 | 93 | def register_algorithms_and_preprocessors(self): 94 | try: 95 | from custom.algorithms import CUSTOM_ALGORITHMS 96 | from custom.preprocessors import CUSTOM_PREPROCESSORS 97 | from custom.models.my_vision_network import MyVisionNetwork 98 | from custom.models.impala_cnn_tf import ImpalaCNN 99 | except ModuleNotFoundError: 100 | from algorithms import CUSTOM_ALGORITHMS 101 | from preprocessors import CUSTOM_PREPROCESSORS 102 | from models.my_vision_network import MyVisionNetwork 103 | from models.impala_cnn_tf import ImpalaCNN 104 | 105 | load_algorithms(CUSTOM_ALGORITHMS) 106 | 107 | load_preprocessors(CUSTOM_PREPROCESSORS) 108 | ModelCatalog.register_custom_model("my_vision_network", MyVisionNetwork) 109 | ModelCatalog.register_custom_model("impala_cnn_tf", ImpalaCNN) 110 | 111 | def get_experiment_config(self): 112 | params = dict(self._get_ray_config()) 113 | params.update(self._get_rllib_config()) 114 | reb = RayExperimentBuilder(**params) 115 | return reb.get_experiment_definition() 116 | 117 | if __name__ == "__main__": 118 | MyLauncher().train_main() 119 | -------------------------------------------------------------------------------- /sagemaker/source/train-sagemaker.py: -------------------------------------------------------------------------------- 1 | # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"). 4 | # You may not use this file except in compliance with the License. 5 | # A copy of the License is located at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # or in the "license" file accompanying this file. This file is distributed 10 | # on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either 11 | # express or implied. See the License for the specific language governing 12 | # permissions and limitations under the License. 13 | 14 | import os 15 | import json 16 | import gym 17 | import ray 18 | 19 | from ray.tune.registry import register_env 20 | from ray.rllib.models import ModelCatalog 21 | 22 | from procgen_ray_launcher import ProcgenSageMakerRayLauncher 23 | 24 | from ray_experiment_builder import RayExperimentBuilder 25 | 26 | from utils.loader import load_algorithms, load_preprocessors 27 | try: 28 | from custom.envs.procgen_env_wrapper import ProcgenEnvWrapper 29 | except ModuleNotFoundError: 30 | from envs.procgen_env_wrapper import ProcgenEnvWrapper 31 | 32 | class MyLauncher(ProcgenSageMakerRayLauncher): 33 | def register_env_creator(self): 34 | register_env( 35 | "stacked_procgen_env", # This should be different from procgen_env_wrapper 36 | lambda config: gym.wrappers.FrameStack(ProcgenEnvWrapper(config), 4) 37 | ) 38 | 39 | def _get_ray_config(self): 40 | return { 41 | "ray_num_cpus": 8, # adjust based on selected instance type 42 | "ray_num_gpus": 1, 43 | "eager": False, 44 | "v": True, # requried for CW to catch the progress 45 | } 46 | 47 | def _get_rllib_config(self): 48 | return { 49 | "experiment_name": "training", 50 | "run": "PPO", 51 | "env": "procgen_env_wrapper", 52 | "stop": { 53 | # 'time_total_s': 60, 54 | 'training_iteration': 500, 55 | }, 56 | "checkpoint_freq": 20, 57 | "checkpoint_at_end": True, 58 | "keep_checkpoints_num": 5, 59 | "queue_trials": False, 60 | "config": { 61 | # === Environment Settings === 62 | "gamma": 0.999, 63 | "lambda": 0.95, 64 | "lr": 5.0e-4, 65 | "num_sgd_iter": 3, 66 | "kl_coeff": 0.0, 67 | "kl_target": 0.01, 68 | "vf_loss_coeff": 0.5, 69 | "entropy_coeff": 0.01, 70 | "clip_param": 0.2, 71 | "vf_clip_param": 0.2, 72 | "grad_clip": 0.5, 73 | "observation_filter": "NoFilter", 74 | "vf_share_layers": True, 75 | "soft_horizon": False, 76 | "no_done_at_end": False, 77 | "normalize_actions": False, 78 | "clip_actions": True, 79 | "ignore_worker_failures": True, 80 | "use_pytorch": False, 81 | "sgd_minibatch_size": 2048, # 8 minibatches per epoch 82 | "train_batch_size": 16384, # 2048 * 8 83 | # === Settings for Model === 84 | "model": { 85 | "custom_model": "impala_cnn_tf", 86 | }, 87 | # === Settings for Rollout Worker processes === 88 | "num_workers": 6, # adjust based on total number of CPUs available in the cluster, e.g., p3.2xlarge has 8 CPUs 89 | "rollout_fragment_length": 140, 90 | "batch_mode": "truncate_episodes", 91 | # === Advanced Resource Settings === 92 | "num_envs_per_worker": 12, 93 | "num_cpus_per_worker": 1, 94 | "num_cpus_for_driver": 1, 95 | "num_gpus_per_worker": 0.1, 96 | # === Settings for the Trainer process === 97 | "num_gpus": 0.3, # adjust based on number of GPUs available in a single node, e.g., p3.2xlarge has 1 GPU 98 | # === Exploration Settings === 99 | "explore": True, 100 | "exploration_config": { 101 | "type": "StochasticSampling", 102 | }, 103 | # === Settings for the Procgen Environment === 104 | "env_config": { 105 | # See https://github.com/AIcrowd/neurips2020-procgen-starter-kit/blob/master/experiments/procgen-starter-example.yaml#L34 for an explaination. 106 | "env_name": "coinrun", 107 | "num_levels": 0, 108 | "start_level": 0, 109 | "paint_vel_info": False, 110 | "use_generated_assets": False, 111 | "center_agent": True, 112 | "use_sequential_levels": False, 113 | "distribution_mode": "easy" 114 | } 115 | } 116 | } 117 | 118 | def register_algorithms_and_preprocessors(self): 119 | try: 120 | from custom.algorithms import CUSTOM_ALGORITHMS 121 | from custom.preprocessors import CUSTOM_PREPROCESSORS 122 | from custom.models.impala_cnn_tf import ImpalaCNN 123 | except ModuleNotFoundError: 124 | from algorithms import CUSTOM_ALGORITHMS 125 | from preprocessors import CUSTOM_PREPROCESSORS 126 | from models.impala_cnn_tf import ImpalaCNN 127 | 128 | load_algorithms(CUSTOM_ALGORITHMS) 129 | load_preprocessors(CUSTOM_PREPROCESSORS) 130 | ModelCatalog.register_custom_model("impala_cnn_tf", ImpalaCNN) 131 | 132 | def get_experiment_config(self): 133 | params = dict(self._get_ray_config()) 134 | params.update(self._get_rllib_config()) 135 | reb = RayExperimentBuilder(**params) 136 | return reb.get_experiment_definition() 137 | 138 | 139 | if __name__ == "__main__": 140 | MyLauncher().train_main() 141 | -------------------------------------------------------------------------------- /sagemaker/source/utils/inference.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import json 3 | import os 4 | import collections 5 | import re 6 | import sys 7 | sys.path.append("neurips2020-procgen-starter-kit") 8 | 9 | import gym 10 | import sagemaker 11 | import boto3 12 | 13 | from rollout import default_policy_agent_mapping, keep_going, DefaultMapping, RolloutSaver 14 | from ray.rllib.policy.sample_batch import DEFAULT_POLICY_ID 15 | from ray.rllib.env.base_env import _DUMMY_AGENT_ID 16 | from ray.rllib.env import MultiAgentEnv 17 | try: 18 | from ray.rllib.evaluation.episode import _flatten_action 19 | except Exception: 20 | # For newer ray versions 21 | from ray.rllib.utils.space_utils import flatten_to_single_ndarray as _flatten_action 22 | 23 | from ray.rllib.evaluation.worker_set import WorkerSet 24 | 25 | from source.custom.callbacks import CustomCallbacks 26 | 27 | def get_latest_sagemaker_training_job(name_contains): 28 | sagemaker_session = sagemaker.Session() 29 | sagemaker_client = boto3.client('sagemaker') 30 | response = sagemaker_client.list_training_jobs( 31 | NameContains=name_contains, 32 | StatusEquals='Completed' 33 | ) 34 | training_jobs = response['TrainingJobSummaries'] 35 | assert len(training_jobs) > 0, "Couldn't find any completed training jobs with '{}' in name.".format(name_contains) 36 | latest_training_job = training_jobs[0]['TrainingJobName'] 37 | return latest_training_job 38 | 39 | def download_ray_checkpoint(checkpoint_dir, s3_bucket, latest_training_job): 40 | # Get last checkpoint 41 | checkpoint_data = "{}/{}/output/intermediate/training".format(s3_bucket, latest_training_job) 42 | checkpoint_bucket_key = "/".join(checkpoint_data.split("/")[1:]) + "/" 43 | 44 | s3 = boto3.client('s3') 45 | intermediate = s3.list_objects_v2(Bucket=s3_bucket, Prefix=checkpoint_bucket_key, Delimiter='//') 46 | 47 | last_checkpoint_num = 0 48 | last_checkpoint_key = None 49 | 50 | for content in intermediate['Contents']: 51 | # Check params.json 52 | if "params.json" in content["Key"]: 53 | with open('checkpoint/params.json', 'wb') as data: 54 | s3.download_fileobj(s3_bucket, content["Key"], data) 55 | 56 | # Find the last checkpoint 57 | checkpoint = re.search(r"checkpoint-([0-9]+)", content["Key"]) 58 | if checkpoint is not None: 59 | checkpoint_num = checkpoint.group(1) 60 | if int(checkpoint_num) > last_checkpoint_num: 61 | last_checkpoint_num = int(checkpoint_num) 62 | last_checkpoint_key = content["Key"] 63 | 64 | with open('{}/checkpoint-{}'.format(checkpoint_dir, last_checkpoint_num), 'wb') as data: 65 | s3.download_fileobj(s3_bucket, last_checkpoint_key, data) 66 | with open('{}/checkpoint-{}.tune_metadata'.format(checkpoint_dir, last_checkpoint_num), 'wb') as data: 67 | s3.download_fileobj(s3_bucket, last_checkpoint_key+".tune_metadata", data) 68 | 69 | return last_checkpoint_num 70 | 71 | def get_model_config(): 72 | with open(os.path.join("checkpoint", "params.json")) as f: 73 | config = json.load(f) 74 | 75 | config["monitor"] = False 76 | config["num_workers"] = 1 77 | config["num_gpus"] = 0 78 | 79 | if 'callbacks' in config: 80 | callback_cls_str = config['callbacks'] # "", 81 | callback_cls = callback_cls_str.split("'")[-2].split(".")[-1] # CustomCallbacks 82 | config['callbacks'] = eval(callback_cls) 83 | 84 | return config 85 | 86 | def rollout(agent, 87 | env_name, 88 | num_steps, 89 | num_episodes=0, 90 | saver=None, 91 | no_render=True, 92 | video_dir=None): 93 | # Adapted from https://github.com/AIcrowd/neurips2020-procgen-starter-kit/blob/master/rollout.py#L349 94 | policy_agent_mapping = default_policy_agent_mapping 95 | 96 | if saver is None: 97 | saver = RolloutSaver() 98 | 99 | if hasattr(agent, "workers") and isinstance(agent.workers, WorkerSet): 100 | #env = agent.workers.local_worker().env 101 | env = gym.make(env_name, render_mode="rgb_array") 102 | multiagent = isinstance(env, MultiAgentEnv) 103 | if agent.workers.local_worker().multiagent: 104 | policy_agent_mapping = agent.config["multiagent"][ 105 | "policy_mapping_fn"] 106 | 107 | policy_map = agent.workers.local_worker().policy_map 108 | state_init = {p: m.get_initial_state() for p, m in policy_map.items()} 109 | use_lstm = {p: len(s) > 0 for p, s in state_init.items()} 110 | else: 111 | env = gym.make(env_name) 112 | multiagent = False 113 | try: 114 | policy_map = {DEFAULT_POLICY_ID: agent.policy} 115 | except AttributeError: 116 | raise AttributeError( 117 | "Agent ({}) does not have a `policy` property! This is needed " 118 | "for performing (trained) agent rollouts.".format(agent)) 119 | use_lstm = {DEFAULT_POLICY_ID: False} 120 | 121 | action_init = { 122 | p: _flatten_action(m.action_space.sample()) 123 | for p, m in policy_map.items() 124 | } 125 | 126 | steps = 0 127 | episodes = 0 128 | rgb_array = [] 129 | 130 | while keep_going(steps, num_steps, episodes, num_episodes): 131 | mapping_cache = {} # in case policy_agent_mapping is stochastic 132 | saver.begin_rollout() 133 | obs = env.reset() 134 | agent_states = DefaultMapping( 135 | lambda agent_id: state_init[mapping_cache[agent_id]]) 136 | prev_actions = DefaultMapping( 137 | lambda agent_id: action_init[mapping_cache[agent_id]]) 138 | prev_rewards = collections.defaultdict(lambda: 0.) 139 | done = False 140 | reward_total = 0.0 141 | episode_steps = 0 142 | while not done and keep_going(steps, num_steps, episodes, 143 | num_episodes): 144 | multi_obs = obs if multiagent else {_DUMMY_AGENT_ID: obs} 145 | action_dict = {} 146 | for agent_id, a_obs in multi_obs.items(): 147 | if a_obs is not None: 148 | policy_id = mapping_cache.setdefault( 149 | agent_id, policy_agent_mapping(agent_id)) 150 | p_use_lstm = use_lstm[policy_id] 151 | if p_use_lstm: 152 | a_action, p_state, _ = agent.compute_action( 153 | a_obs, 154 | state=agent_states[agent_id], 155 | prev_action=prev_actions[agent_id], 156 | prev_reward=prev_rewards[agent_id], 157 | policy_id=policy_id) 158 | agent_states[agent_id] = p_state 159 | else: 160 | a_action = agent.compute_action( 161 | a_obs, 162 | prev_action=prev_actions[agent_id], 163 | prev_reward=prev_rewards[agent_id], 164 | policy_id=policy_id) 165 | a_action = _flatten_action(a_action) # tuple actions 166 | action_dict[agent_id] = a_action 167 | prev_actions[agent_id] = a_action 168 | action = action_dict 169 | 170 | action = action if multiagent else action[_DUMMY_AGENT_ID] 171 | next_obs, reward, done, info = env.step(action) 172 | episode_steps += 1 173 | if multiagent: 174 | for agent_id, r in reward.items(): 175 | prev_rewards[agent_id] = r 176 | else: 177 | prev_rewards[_DUMMY_AGENT_ID] = reward 178 | 179 | if multiagent: 180 | done = done["__all__"] 181 | reward_total += sum(reward.values()) 182 | else: 183 | reward_total += reward 184 | if not no_render: 185 | rgb_array.append(env.render(mode='rgb_array')) 186 | saver.append_step(obs, action, next_obs, reward, done, info) 187 | steps += 1 188 | obs = next_obs 189 | saver.end_rollout() 190 | print("Episode #{}: reward: {} steps: {}".format(episodes, reward_total, episode_steps)) 191 | if done: 192 | episodes += 1 193 | return rgb_array -------------------------------------------------------------------------------- /sagemaker/source/utils/loader.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import os 3 | import glob 4 | 5 | import types 6 | import importlib.machinery 7 | 8 | """ 9 | Helper functions 10 | """ 11 | 12 | 13 | def _source_file(_file_path): 14 | """ 15 | Dynamically "sources" a provided file 16 | """ 17 | basename = os.path.basename(_file_path) 18 | filename = basename.replace(".py", "") 19 | # Load the module 20 | loader = importlib.machinery.SourceFileLoader(filename, _file_path) 21 | mod = types.ModuleType(loader.name) 22 | loader.exec_module(mod) 23 | 24 | """ 25 | A loder utility, which takes an experiment directory 26 | path, and loads necessary things into the ModelRegistry. 27 | 28 | This imposes an opinionated directory structure on the 29 | users, which looks something like : 30 | 31 | - envs/ 32 | - my_env_1.py 33 | - my_env_2.py 34 | .... 35 | - my_env_N.py 36 | - models/ 37 | - my_model_1.py 38 | - my_model_2.py 39 | ..... 40 | - my_model_N.py 41 | """ 42 | 43 | 44 | def load_envs(local_dir="."): 45 | """ 46 | This function takes a path to a local directory 47 | and looks for an `envs` folder, and imports 48 | all the available files in there. 49 | """ 50 | for _file_path in glob.glob(os.path.join( 51 | local_dir, "*.py")): 52 | """ 53 | Sources a file expected to implement a said 54 | gym env wrapper. 55 | 56 | The respective files are expected to do a 57 | `registry.register_env` call to ensure that 58 | the implemented envs are available in the 59 | ray registry. 60 | """ 61 | _source_file(_file_path) 62 | 63 | 64 | def load_models(local_dir="."): 65 | """ 66 | This function takes a path to a local directory 67 | and looks for a `models` folder, and imports 68 | all the available files in there. 69 | """ 70 | for _file_path in glob.glob(os.path.join( 71 | local_dir, "models", "*.py")): 72 | """ 73 | Sources a file expected to implement a 74 | custom model. 75 | 76 | The respective files are expected to do a 77 | `registry.register_env` call to ensure that 78 | the implemented envs are available in the 79 | ray registry. 80 | """ 81 | _source_file(_file_path) 82 | 83 | 84 | def load_algorithms(CUSTOM_ALGORITHMS): 85 | """ 86 | This function loads the custom algorithms implemented in this 87 | repository, and registers them with the tune registry 88 | """ 89 | from ray.tune import registry 90 | 91 | for _custom_algorithm_name in CUSTOM_ALGORITHMS: 92 | _class = CUSTOM_ALGORITHMS[_custom_algorithm_name]() 93 | registry.register_trainable( 94 | _custom_algorithm_name, 95 | _class) 96 | 97 | 98 | def load_preprocessors(CUSTOM_PREPROCESSORS): 99 | """Function to register custom preprocessors 100 | """ 101 | from ray.rllib.models import ModelCatalog 102 | 103 | for _precessor_name, _processor_class in CUSTOM_PREPROCESSORS.items(): 104 | ModelCatalog.register_custom_preprocessor(_precessor_name, _processor_class) 105 | -------------------------------------------------------------------------------- /sagemaker/train-homo-distributed-cpu.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Amazon SageMaker Notebook for ProcGen Starter Kit with homogeneous scaling of multiple CPU instances " 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": null, 13 | "metadata": {}, 14 | "outputs": [], 15 | "source": [ 16 | "import os\n", 17 | "import time\n", 18 | "import yaml\n", 19 | "\n", 20 | "import sagemaker\n", 21 | "from sagemaker.rl import RLEstimator, RLToolkit, RLFramework\n", 22 | "import boto3\n", 23 | "\n", 24 | "from IPython.display import HTML, Markdown\n", 25 | "from source.common.docker_utils import build_and_push_docker_image\n", 26 | "from source.common.markdown_helper import generate_help_for_s3_endpoint_permissions, create_s3_endpoint_manually" 27 | ] 28 | }, 29 | { 30 | "cell_type": "code", 31 | "execution_count": null, 32 | "metadata": {}, 33 | "outputs": [], 34 | "source": [ 35 | "with open(os.path.join(\"config\", \"sagemaker_config.yaml\")) as f:\n", 36 | " sagemaker_config = yaml.safe_load(f)" 37 | ] 38 | }, 39 | { 40 | "cell_type": "markdown", 41 | "metadata": {}, 42 | "source": [ 43 | "## Initialize Amazon SageMaker" 44 | ] 45 | }, 46 | { 47 | "cell_type": "code", 48 | "execution_count": null, 49 | "metadata": {}, 50 | "outputs": [], 51 | "source": [ 52 | "sm_session = sagemaker.session.Session()\n", 53 | "s3_bucket = sagemaker_config[\"S3_BUCKET\"]\n", 54 | "\n", 55 | "s3_output_path = 's3://{}/'.format(s3_bucket)\n", 56 | "print(\"S3 bucket path: {}\".format(s3_output_path))" 57 | ] 58 | }, 59 | { 60 | "cell_type": "code", 61 | "execution_count": null, 62 | "metadata": {}, 63 | "outputs": [], 64 | "source": [ 65 | "job_name_prefix = 'sm-ray-cpu-distributed-procgen'\n", 66 | "\n", 67 | "role = sagemaker.get_execution_role()\n", 68 | "print(role)" 69 | ] 70 | }, 71 | { 72 | "cell_type": "markdown", 73 | "metadata": {}, 74 | "source": [ 75 | "#### Note that `local_mode = True` does not work with heterogeneous scaling" 76 | ] 77 | }, 78 | { 79 | "cell_type": "code", 80 | "execution_count": null, 81 | "metadata": {}, 82 | "outputs": [], 83 | "source": [ 84 | "instance_type = sagemaker_config[\"CPU_TRAINING_INSTANCE\"]" 85 | ] 86 | }, 87 | { 88 | "cell_type": "markdown", 89 | "metadata": {}, 90 | "source": [ 91 | "# Configure the framework you want to use\n", 92 | "\n", 93 | "Set `framework` to `\"tf\"` or `\"torch\"` for tensorflow or pytorch respectively.\n", 94 | "\n", 95 | "You will also have to edit your entry point i.e., `train-sagemaker-distributed-cpu.py` with the configuration parameter `\"use_pytorch\"` to match the framework that you have selected." 96 | ] 97 | }, 98 | { 99 | "cell_type": "code", 100 | "execution_count": null, 101 | "metadata": {}, 102 | "outputs": [], 103 | "source": [ 104 | "framework = \"tf\"" 105 | ] 106 | }, 107 | { 108 | "cell_type": "markdown", 109 | "metadata": {}, 110 | "source": [ 111 | "# Train your homogeneous scaling job here" 112 | ] 113 | }, 114 | { 115 | "cell_type": "markdown", 116 | "metadata": {}, 117 | "source": [ 118 | "### Edit the training code\n", 119 | "\n", 120 | "The training code is written in the file `train-sagemaker-distributed-cpu.py` which is uploaded in the /source directory.\n", 121 | "\n", 122 | "*Note that ray will automatically set `\"ray_num_cpus\"` and `\"ray_num_gpus\"` in `_get_ray_config`*" 123 | ] 124 | }, 125 | { 126 | "cell_type": "code", 127 | "execution_count": null, 128 | "metadata": {}, 129 | "outputs": [], 130 | "source": [ 131 | "!pygmentize source/train-sagemaker-distributed-cpu.py" 132 | ] 133 | }, 134 | { 135 | "cell_type": "markdown", 136 | "metadata": {}, 137 | "source": [ 138 | "### Train the RL model using the Python SDK Script mode\n", 139 | "\n", 140 | "When using SageMaker for distributed training, you can select a GPU or CPU instance. The RLEstimator is used for training RL jobs.\n", 141 | "\n", 142 | "1. Specify the source directory where the environment, presets and training code is uploaded.\n", 143 | "2. Specify the entry point as the training code\n", 144 | "3. Specify the image (CPU or GPU) to be used for the training environment.\n", 145 | "4. Define the training parameters such as the instance count, job name, S3 path for output and job name.\n", 146 | "5. Define the metrics definitions that you are interested in capturing in your logs. These can also be visualized in CloudWatch and SageMaker Notebooks." 147 | ] 148 | }, 149 | { 150 | "cell_type": "markdown", 151 | "metadata": {}, 152 | "source": [ 153 | "#### CPU docker image" 154 | ] 155 | }, 156 | { 157 | "cell_type": "code", 158 | "execution_count": null, 159 | "metadata": { 160 | "scrolled": true 161 | }, 162 | "outputs": [], 163 | "source": [ 164 | "# Build CPU image\n", 165 | "cpu_repository_short_name = \"sagemaker-procgen-ray-%s\" % \"cpu\"\n", 166 | "docker_build_args = {\n", 167 | " 'CPU_OR_GPU': \"cpu\", \n", 168 | " 'AWS_REGION': boto3.Session().region_name,\n", 169 | " 'FRAMEWORK': framework\n", 170 | "}\n", 171 | "image_name = build_and_push_docker_image(cpu_repository_short_name, build_args=docker_build_args)\n", 172 | "print(\"Using CPU ECR image %s\" % image_name)" 173 | ] 174 | }, 175 | { 176 | "cell_type": "code", 177 | "execution_count": null, 178 | "metadata": {}, 179 | "outputs": [], 180 | "source": [ 181 | "metric_definitions = [\n", 182 | " {'Name': 'training_iteration', 'Regex': 'training_iteration: ([-+]?[0-9]*[.]?[0-9]+([eE][-+]?[0-9]+)?)'}, \n", 183 | " {'Name': 'episodes_total', 'Regex': 'episodes_total: ([-+]?[0-9]*[.]?[0-9]+([eE][-+]?[0-9]+)?)'}, \n", 184 | " {'Name': 'num_steps_trained', 'Regex': 'num_steps_trained: ([-+]?[0-9]*[.]?[0-9]+([eE][-+]?[0-9]+)?)'}, \n", 185 | " {'Name': 'timesteps_total', 'Regex': 'timesteps_total: ([-+]?[0-9]*[.]?[0-9]+([eE][-+]?[0-9]+)?)'},\n", 186 | " {'Name': 'training_iteration', 'Regex': 'training_iteration: ([-+]?[0-9]*[.]?[0-9]+([eE][-+]?[0-9]+)?)'},\n", 187 | "\n", 188 | " {'Name': 'episode_reward_max', 'Regex': 'episode_reward_max: ([-+]?[0-9]*[.]?[0-9]+([eE][-+]?[0-9]+)?)'}, \n", 189 | " {'Name': 'episode_reward_mean', 'Regex': 'episode_reward_mean: ([-+]?[0-9]*[.]?[0-9]+([eE][-+]?[0-9]+)?)'}, \n", 190 | " {'Name': 'episode_reward_min', 'Regex': 'episode_reward_min: ([-+]?[0-9]*[.]?[0-9]+([eE][-+]?[0-9]+)?)'},\n", 191 | "] " 192 | ] 193 | }, 194 | { 195 | "cell_type": "markdown", 196 | "metadata": {}, 197 | "source": [ 198 | "### Ray homogeneous scaling - Specify `train_instance_count` > 1\n", 199 | "\n", 200 | "Homogeneous scaling allows us to use multiple instances of the same type.\n", 201 | "\n", 202 | "Spot instances are unused EC2 instances that could be used at 90% discount compared to On-Demand prices (more information about spot instances can be found [here](https://aws.amazon.com/ec2/spot/?cards.sort-by=item.additionalFields.startDateTime&cards.sort-order=asc) and [here](https://docs.aws.amazon.com/sagemaker/latest/dg/model-managed-spot-training.html))\n", 203 | "\n", 204 | "To use spot instances, set `train_use_spot_instances = True`. To use On-Demand instances, `train_use_spot_instances = False`." 205 | ] 206 | }, 207 | { 208 | "cell_type": "code", 209 | "execution_count": null, 210 | "metadata": { 211 | "scrolled": true 212 | }, 213 | "outputs": [], 214 | "source": [ 215 | "train_instance_count = 2\n", 216 | "train_use_spot_instances = False\n", 217 | "\n", 218 | "# Select which procgen environments to run in `envs_to_run`\n", 219 | "'''\n", 220 | "envs_to_run = [\"coinrun\", \"bigfish\", \"bossfight\", \"caveflyer\",\n", 221 | " \"chaser\", \"climber\", \"coinrun\", \"dodgeball\",\n", 222 | " \"fruitbot\", \"heist\", \"jumper\", \"leaper\", \"maze\",\n", 223 | " \"miner\", \"ninja\", \"plunder\", \"starpilot\"]\n", 224 | "'''\n", 225 | "\n", 226 | "envs_to_run = [\"coinrun\"]\n", 227 | "\n", 228 | "for env in envs_to_run:\n", 229 | " if train_use_spot_instances:\n", 230 | " print('*** Using spot instances ... ')\n", 231 | " job_name = 'sm-ray-distributed-procgen-spot-' + time.strftime(\"%Y-%m-%d-%H-%M-%S\", time.gmtime()) + \"-\" + env\n", 232 | " checkpoint_s3_uri = 's3://{}/sagemaker-procgen/checkpoints/{}'.format(s3_bucket, job_name)\n", 233 | " training_params = {\"train_use_spot_instances\": True,\n", 234 | " \"train_max_run\": 3600 * 5,\n", 235 | " \"train_max_wait\": 7200 * 5,\n", 236 | " \"checkpoint_s3_uri\": checkpoint_s3_uri\n", 237 | " }\n", 238 | " hyperparameters = {\n", 239 | " \"rl.training.upload_dir\": checkpoint_s3_uri, #Necessary for syncing between spot instances\n", 240 | " \"rl.training.config.env_config.env_name\": env,\n", 241 | " }\n", 242 | " else:\n", 243 | " training_params = {\"base_job_name\": job_name_prefix + \"-\" + env}\n", 244 | " hyperparameters = {\n", 245 | " #\"rl.training.upload_dir\": s3_output_path + \"/tensorboard_sync\", # Uncomment to view tensorboard\n", 246 | " \"rl.training.config.env_config.env_name\": env,\n", 247 | " }\n", 248 | "\n", 249 | " # Defining the RLEstimator\n", 250 | " estimator = RLEstimator(entry_point=\"train-sagemaker-distributed-cpu.py\",\n", 251 | " source_dir='source',\n", 252 | " dependencies=[\"source/utils\", \"source/common/\", \"neurips2020-procgen-starter-kit/\"],\n", 253 | " image_uri=image_name,\n", 254 | " role=role,\n", 255 | " instance_type=instance_type,\n", 256 | " instance_count=train_instance_count,\n", 257 | " output_path=s3_output_path,\n", 258 | " metric_definitions=metric_definitions,\n", 259 | " hyperparameters=hyperparameters,\n", 260 | " **training_params\n", 261 | " )\n", 262 | " if train_use_spot_instances:\n", 263 | " estimator.fit(job_name=job_name, wait=False)\n", 264 | " else:\n", 265 | " estimator.fit(wait=False)\n", 266 | " \n", 267 | " print(' ')\n", 268 | " print(estimator.latest_training_job.job_name)\n", 269 | " print('type=', instance_type, 'count=', train_instance_count )\n", 270 | " print(' ')" 271 | ] 272 | }, 273 | { 274 | "cell_type": "code", 275 | "execution_count": null, 276 | "metadata": {}, 277 | "outputs": [], 278 | "source": [] 279 | } 280 | ], 281 | "metadata": { 282 | "kernelspec": { 283 | "display_name": "conda_tensorflow2_p36", 284 | "language": "python", 285 | "name": "conda_tensorflow2_p36" 286 | }, 287 | "language_info": { 288 | "codemirror_mode": { 289 | "name": "ipython", 290 | "version": 3 291 | }, 292 | "file_extension": ".py", 293 | "mimetype": "text/x-python", 294 | "name": "python", 295 | "nbconvert_exporter": "python", 296 | "pygments_lexer": "ipython3", 297 | "version": "3.6.10" 298 | } 299 | }, 300 | "nbformat": 4, 301 | "nbformat_minor": 4 302 | } 303 | -------------------------------------------------------------------------------- /sagemaker/train-homo-distributed-gpu.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Amazon SageMaker Notebook for ProcGen Starter Kit with homogeneous scaling of multiple GPU instances " 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": null, 13 | "metadata": {}, 14 | "outputs": [], 15 | "source": [ 16 | "import os\n", 17 | "import time\n", 18 | "import yaml\n", 19 | "\n", 20 | "import sagemaker\n", 21 | "from sagemaker.rl import RLEstimator, RLToolkit, RLFramework\n", 22 | "import boto3\n", 23 | "\n", 24 | "from IPython.display import HTML, Markdown\n", 25 | "from source.common.docker_utils import build_and_push_docker_image\n", 26 | "from source.common.markdown_helper import generate_help_for_s3_endpoint_permissions, create_s3_endpoint_manually" 27 | ] 28 | }, 29 | { 30 | "cell_type": "code", 31 | "execution_count": null, 32 | "metadata": {}, 33 | "outputs": [], 34 | "source": [ 35 | "with open(os.path.join(\"config\", \"sagemaker_config.yaml\")) as f:\n", 36 | " sagemaker_config = yaml.safe_load(f)" 37 | ] 38 | }, 39 | { 40 | "cell_type": "markdown", 41 | "metadata": {}, 42 | "source": [ 43 | "## Initialize Amazon SageMaker" 44 | ] 45 | }, 46 | { 47 | "cell_type": "code", 48 | "execution_count": null, 49 | "metadata": {}, 50 | "outputs": [], 51 | "source": [ 52 | "sm_session = sagemaker.session.Session()\n", 53 | "s3_bucket = sagemaker_config[\"S3_BUCKET\"]\n", 54 | "\n", 55 | "s3_output_path = 's3://{}/'.format(s3_bucket)\n", 56 | "print(\"S3 bucket path: {}\".format(s3_output_path))" 57 | ] 58 | }, 59 | { 60 | "cell_type": "code", 61 | "execution_count": null, 62 | "metadata": {}, 63 | "outputs": [], 64 | "source": [ 65 | "job_name_prefix = 'sm-ray-gpu-dist-procgen'\n", 66 | "\n", 67 | "role = sagemaker.get_execution_role()\n", 68 | "print(role)" 69 | ] 70 | }, 71 | { 72 | "cell_type": "markdown", 73 | "metadata": {}, 74 | "source": [ 75 | "#### Note that `local_mode = True` does not work with heterogeneous scaling" 76 | ] 77 | }, 78 | { 79 | "cell_type": "code", 80 | "execution_count": null, 81 | "metadata": {}, 82 | "outputs": [], 83 | "source": [ 84 | "instance_type = sagemaker_config[\"GPU_TRAINING_INSTANCE\"]" 85 | ] 86 | }, 87 | { 88 | "cell_type": "markdown", 89 | "metadata": {}, 90 | "source": [ 91 | "# Configure the framework you want to use\n", 92 | "\n", 93 | "Set `framework` to `\"tf\"` or `\"torch\"` for tensorflow or pytorch respectively.\n", 94 | "\n", 95 | "You will also have to edit your entry point i.e., `train-sagemaker-distributed-gpu.py` with the configuration parameter `\"use_pytorch\"` to match the framework that you have selected." 96 | ] 97 | }, 98 | { 99 | "cell_type": "code", 100 | "execution_count": null, 101 | "metadata": {}, 102 | "outputs": [], 103 | "source": [ 104 | "framework = \"tf\"" 105 | ] 106 | }, 107 | { 108 | "cell_type": "markdown", 109 | "metadata": {}, 110 | "source": [ 111 | "# Train your homogeneous scaling job here" 112 | ] 113 | }, 114 | { 115 | "cell_type": "markdown", 116 | "metadata": {}, 117 | "source": [ 118 | "### Edit the training code\n", 119 | "\n", 120 | "The training code is written in the file `train-sagemaker-distributed-gpu.py` which is uploaded in the /source directory.\n", 121 | "\n", 122 | "*Note that ray will automatically set `\"ray_num_cpus\"` and `\"ray_num_gpus\"` in `_get_ray_config`*" 123 | ] 124 | }, 125 | { 126 | "cell_type": "code", 127 | "execution_count": null, 128 | "metadata": {}, 129 | "outputs": [], 130 | "source": [ 131 | "!pygmentize source/train-sagemaker-distributed-gpu.py" 132 | ] 133 | }, 134 | { 135 | "cell_type": "markdown", 136 | "metadata": {}, 137 | "source": [ 138 | "### Train the RL model using the Python SDK Script mode\n", 139 | "\n", 140 | "When using SageMaker for distributed training, you can select a GPU or CPU instance. The RLEstimator is used for training RL jobs.\n", 141 | "\n", 142 | "1. Specify the source directory where the environment, presets and training code is uploaded.\n", 143 | "2. Specify the entry point as the training code\n", 144 | "3. Specify the image (CPU or GPU) to be used for the training environment.\n", 145 | "4. Define the training parameters such as the instance count, job name, S3 path for output and job name.\n", 146 | "5. Define the metrics definitions that you are interested in capturing in your logs. These can also be visualized in CloudWatch and SageMaker Notebooks." 147 | ] 148 | }, 149 | { 150 | "cell_type": "markdown", 151 | "metadata": {}, 152 | "source": [ 153 | "#### GPU docker image" 154 | ] 155 | }, 156 | { 157 | "cell_type": "code", 158 | "execution_count": null, 159 | "metadata": { 160 | "scrolled": true 161 | }, 162 | "outputs": [], 163 | "source": [ 164 | "#Build GPU image\n", 165 | "gpu_repository_short_name = \"sagemaker-procgen-ray-%s\" % \"gpu\"\n", 166 | "docker_build_args = {\n", 167 | " 'CPU_OR_GPU': \"gpu\", \n", 168 | " 'AWS_REGION': boto3.Session().region_name,\n", 169 | " 'FRAMEWORK': framework\n", 170 | "}\n", 171 | "image_name = build_and_push_docker_image(gpu_repository_short_name, build_args=docker_build_args)\n", 172 | "print(\"Using GPU ECR image %s\" % image_name)" 173 | ] 174 | }, 175 | { 176 | "cell_type": "code", 177 | "execution_count": null, 178 | "metadata": {}, 179 | "outputs": [], 180 | "source": [ 181 | "metric_definitions = [\n", 182 | " {'Name': 'training_iteration', 'Regex': 'training_iteration: ([-+]?[0-9]*[.]?[0-9]+([eE][-+]?[0-9]+)?)'}, \n", 183 | " {'Name': 'episodes_total', 'Regex': 'episodes_total: ([-+]?[0-9]*[.]?[0-9]+([eE][-+]?[0-9]+)?)'}, \n", 184 | " {'Name': 'num_steps_trained', 'Regex': 'num_steps_trained: ([-+]?[0-9]*[.]?[0-9]+([eE][-+]?[0-9]+)?)'}, \n", 185 | " {'Name': 'timesteps_total', 'Regex': 'timesteps_total: ([-+]?[0-9]*[.]?[0-9]+([eE][-+]?[0-9]+)?)'},\n", 186 | " {'Name': 'training_iteration', 'Regex': 'training_iteration: ([-+]?[0-9]*[.]?[0-9]+([eE][-+]?[0-9]+)?)'},\n", 187 | "\n", 188 | " {'Name': 'episode_reward_max', 'Regex': 'episode_reward_max: ([-+]?[0-9]*[.]?[0-9]+([eE][-+]?[0-9]+)?)'}, \n", 189 | " {'Name': 'episode_reward_mean', 'Regex': 'episode_reward_mean: ([-+]?[0-9]*[.]?[0-9]+([eE][-+]?[0-9]+)?)'}, \n", 190 | " {'Name': 'episode_reward_min', 'Regex': 'episode_reward_min: ([-+]?[0-9]*[.]?[0-9]+([eE][-+]?[0-9]+)?)'},\n", 191 | "] " 192 | ] 193 | }, 194 | { 195 | "cell_type": "markdown", 196 | "metadata": {}, 197 | "source": [ 198 | "### Ray homogeneous scaling - Specify `train_instance_count` > 1\n", 199 | "\n", 200 | "Homogeneous scaling allows us to use multiple instances of the same type.\n", 201 | "\n", 202 | "Spot instances are unused EC2 instances that could be used at 90% discount compared to On-Demand prices (more information about spot instances can be found [here](https://aws.amazon.com/ec2/spot/?cards.sort-by=item.additionalFields.startDateTime&cards.sort-order=asc) and [here](https://docs.aws.amazon.com/sagemaker/latest/dg/model-managed-spot-training.html))\n", 203 | "\n", 204 | "To use spot instances, set `train_use_spot_instances = True`. To use On-Demand instances, `train_use_spot_instances = False`." 205 | ] 206 | }, 207 | { 208 | "cell_type": "code", 209 | "execution_count": null, 210 | "metadata": { 211 | "scrolled": true 212 | }, 213 | "outputs": [], 214 | "source": [ 215 | "train_instance_count = 2\n", 216 | "train_use_spot_instances = False\n", 217 | "\n", 218 | "# Select which procgen environments to run in `envs_to_run`\n", 219 | "'''\n", 220 | "envs_to_run = [\"coinrun\", \"bigfish\", \"bossfight\", \"caveflyer\",\n", 221 | " \"chaser\", \"climber\", \"dodgeball\",\n", 222 | " \"fruitbot\", \"heist\", \"jumper\", \"leaper\", \"maze\",\n", 223 | " \"miner\", \"ninja\", \"plunder\", \"starpilot\"]\n", 224 | "'''\n", 225 | "\n", 226 | "envs_to_run = [\"coinrun\"]\n", 227 | "\n", 228 | "for env in envs_to_run:\n", 229 | " if train_use_spot_instances:\n", 230 | " print('*** Using spot instances ... ')\n", 231 | " job_name = 'sm-ray-dist-procgen-spot-' + time.strftime(\"%Y-%m-%d-%H-%M-%S\", time.gmtime()) + \"-\" + env\n", 232 | " checkpoint_s3_uri = 's3://{}/sagemaker-procgen/checkpoints/{}'.format(s3_bucket, job_name)\n", 233 | " training_params = {\"train_use_spot_instances\": True,\n", 234 | " \"train_max_run\": 3600 * 5,\n", 235 | " \"train_max_wait\": 7200 * 5,\n", 236 | " \"checkpoint_s3_uri\": checkpoint_s3_uri\n", 237 | " }\n", 238 | " hyperparameters = {\n", 239 | " \"rl.training.upload_dir\": checkpoint_s3_uri, #Necessary for syncing between spot instances\n", 240 | " \"rl.training.config.env_config.env_name\": env,\n", 241 | " }\n", 242 | " else:\n", 243 | " training_params = {\"base_job_name\": job_name_prefix + \"-\" + env}\n", 244 | " hyperparameters = {\n", 245 | " #\"rl.training.upload_dir\": s3_output_path + \"/tensorboard_sync\", # Uncomment to view tensorboard\n", 246 | " \"rl.training.config.env_config.env_name\": env,\n", 247 | " }\n", 248 | "\n", 249 | " # Defining the RLEstimator\n", 250 | " estimator = RLEstimator(entry_point=\"train-sagemaker-distributed-gpu.py\",\n", 251 | " source_dir='source',\n", 252 | " dependencies=[\"source/utils\", \"source/common/\", \"neurips2020-procgen-starter-kit/\"],\n", 253 | " image_uri=image_name,\n", 254 | " role=role,\n", 255 | " instance_type=instance_type,\n", 256 | " instance_count=train_instance_count,\n", 257 | " output_path=s3_output_path,\n", 258 | " metric_definitions=metric_definitions,\n", 259 | " hyperparameters=hyperparameters,\n", 260 | " **training_params\n", 261 | " )\n", 262 | " if train_use_spot_instances:\n", 263 | " estimator.fit(job_name=job_name, wait=False)\n", 264 | " else:\n", 265 | " estimator.fit(wait=False)\n", 266 | " \n", 267 | " print(' ')\n", 268 | " print(estimator.latest_training_job.job_name)\n", 269 | " print('type=', instance_type, 'count=', train_instance_count )\n", 270 | " print(' ')" 271 | ] 272 | }, 273 | { 274 | "cell_type": "code", 275 | "execution_count": null, 276 | "metadata": {}, 277 | "outputs": [], 278 | "source": [] 279 | } 280 | ], 281 | "metadata": { 282 | "kernelspec": { 283 | "display_name": "conda_tensorflow2_p36", 284 | "language": "python", 285 | "name": "conda_tensorflow2_p36" 286 | }, 287 | "language_info": { 288 | "codemirror_mode": { 289 | "name": "ipython", 290 | "version": 3 291 | }, 292 | "file_extension": ".py", 293 | "mimetype": "text/x-python", 294 | "name": "python", 295 | "nbconvert_exporter": "python", 296 | "pygments_lexer": "ipython3", 297 | "version": "3.6.10" 298 | } 299 | }, 300 | "nbformat": 4, 301 | "nbformat_minor": 4 302 | } 303 | --------------------------------------------------------------------------------