├── .gitignore
├── CODE_OF_CONDUCT.md
├── CONTRIBUTING.md
├── LICENSE
├── NOTICE
├── README.md
├── build.sh
├── cloudformation
    ├── onCreate.sh
    ├── sagemaker.yaml
    └── solution-assistant
    │   ├── requirements.txt
    │   └── src
    │       └── lambda.py
├── docs
    ├── BattleSnake-RL-4.gif
    ├── Table_CompareInstances.png
    ├── cloudstack_snaphot.png
    ├── cloudstack_snapshot_dist.png
    ├── impala_benchmark_baseline_p3_2x.png
    ├── impala_benchmark_instances.png
    ├── launch_button.svg
    └── single_instance_all_envs.png
└── sagemaker
    ├── .gitignore
    ├── Dockerfile
    ├── config
        └── sagemaker_config.yaml
    ├── source
        ├── common
        │   ├── daemon.json
        │   ├── docker_utils.py
        │   ├── env_utils.py
        │   ├── markdown_helper.py
        │   ├── misc.py
        │   ├── sagemaker_rl
        │   │   ├── README.md
        │   │   ├── __init__.py
        │   │   ├── coach_launcher.py
        │   │   ├── configuration_list.py
        │   │   ├── docker_utils.py
        │   │   ├── onnx_utils.py
        │   │   ├── ray_launcher.py
        │   │   ├── sage_cluster_communicator.py
        │   │   ├── stable_baselines_launcher.py
        │   │   └── tf_serving_utils.py
        │   └── setup.sh
        ├── custom
        │   ├── Readme.md
        │   ├── algorithms
        │   │   ├── __init__.py
        │   │   ├── custom_random_agent
        │   │   │   ├── __init__.py
        │   │   │   └── custom_random_agent.py
        │   │   ├── random_policy
        │   │   │   ├── __init__.py
        │   │   │   ├── policy.py
        │   │   │   ├── readme.md
        │   │   │   └── trainer.py
        │   │   └── registry.py
        │   ├── callbacks.py
        │   ├── envs
        │   │   ├── .gitkeep
        │   │   ├── __init__.py
        │   │   ├── framestack.py
        │   │   ├── procgen_env_wrapper.py
        │   │   └── readme.md
        │   ├── experiments
        │   │   ├── impala-baseline.yaml
        │   │   ├── procgen-starter-example.yaml
        │   │   └── random-policy.yaml
        │   ├── models
        │   │   ├── .gitkeep
        │   │   ├── impala_cnn_tf.py
        │   │   ├── impala_cnn_torch.py
        │   │   └── my_vision_network.py
        │   ├── preprocessors
        │   │   ├── __init__.py
        │   │   └── custom_preprocessor.py
        │   └── setup.py
        ├── procgen_ray_launcher.py
        ├── ray_experiment_builder.py
        ├── requirements.txt
        ├── rollout.py
        ├── train-local.py
        ├── train-sagemaker-distributed-cpu.py
        ├── train-sagemaker-distributed-gpu.py
        ├── train-sagemaker.py
        └── utils
        │   ├── inference.py
        │   └── loader.py
    ├── train-hetero-distributed.ipynb
    ├── train-homo-distributed-cpu.ipynb
    ├── train-homo-distributed-gpu.ipynb
    └── train.ipynb


/.gitignore:
--------------------------------------------------------------------------------
1 | buildspec.yml
2 | .viperlightignore
3 | ~.DS_Store
4 | .DS_Store
5 | *~
6 | 


--------------------------------------------------------------------------------
/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
1 | ## Code of Conduct
2 | This project has adopted the [Amazon Open Source Code of Conduct](https://aws.github.io/code-of-conduct).
3 | For more information see the [Code of Conduct FAQ](https://aws.github.io/code-of-conduct-faq) or contact
4 | opensource-codeofconduct@amazon.com with any additional questions or comments.
5 | 


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
 1 | # Contributing Guidelines
 2 | 
 3 | Thank you for your interest in contributing to our project. Whether it's a bug report, new feature, correction, or additional
 4 | documentation, we greatly value feedback and contributions from our community.
 5 | 
 6 | Please read through this document before submitting any issues or pull requests to ensure we have all the necessary
 7 | information to effectively respond to your bug report or contribution.
 8 | 
 9 | 
10 | ## Reporting Bugs/Feature Requests
11 | 
12 | We welcome you to use the GitHub issue tracker to report bugs or suggest features.
13 | 
14 | When filing an issue, please check existing open, or recently closed, issues to make sure somebody else hasn't already
15 | reported the issue. Please try to include as much information as you can. Details like these are incredibly useful:
16 | 
17 | * A reproducible test case or series of steps
18 | * The version of our code being used
19 | * Any modifications you've made relevant to the bug
20 | * Anything unusual about your environment or deployment
21 | 
22 | 
23 | ## Contributing via Pull Requests
24 | Contributions via pull requests are much appreciated. Before sending us a pull request, please ensure that:
25 | 
26 | 1. You are working against the latest source on the *master* branch.
27 | 2. You check existing open, and recently merged, pull requests to make sure someone else hasn't addressed the problem already.
28 | 3. You open an issue to discuss any significant work - we would hate for your time to be wasted.
29 | 
30 | To send us a pull request, please:
31 | 
32 | 1. Fork the repository.
33 | 2. Modify the source; please focus on the specific change you are contributing. If you also reformat all the code, it will be hard for us to focus on your change.
34 | 3. Ensure local tests pass.
35 | 4. Commit to your fork using clear commit messages.
36 | 5. Send us a pull request, answering any default questions in the pull request interface.
37 | 6. Pay attention to any automated CI failures reported in the pull request, and stay involved in the conversation.
38 | 
39 | GitHub provides additional document on [forking a repository](https://help.github.com/articles/fork-a-repo/) and
40 | [creating a pull request](https://help.github.com/articles/creating-a-pull-request/).
41 | 
42 | 
43 | ## Finding contributions to work on
44 | Looking at the existing issues is a great way to find something to contribute on. As our projects, by default, use the default GitHub issue labels (enhancement/bug/duplicate/help wanted/invalid/question/wontfix), looking at any 'help wanted' issues is a great place to start.
45 | 
46 | 
47 | ## Code of Conduct
48 | This project has adopted the [Amazon Open Source Code of Conduct](https://aws.github.io/code-of-conduct).
49 | For more information see the [Code of Conduct FAQ](https://aws.github.io/code-of-conduct-faq) or contact
50 | opensource-codeofconduct@amazon.com with any additional questions or comments.
51 | 
52 | 
53 | ## Security issue notifications
54 | If you discover a potential security issue in this project we ask that you notify AWS/Amazon Security via our [vulnerability reporting page](http://aws.amazon.com/security/vulnerability-reporting/). Please do **not** create a public github issue.
55 | 
56 | 
57 | ## Licensing
58 | 
59 | See the [LICENSE](LICENSE) file for our project's licensing. We will ask you to confirm the licensing of your contribution.
60 | 
61 | We may ask you to sign a [Contributor License Agreement (CLA)](http://en.wikipedia.org/wiki/Contributor_License_Agreement) for larger changes.
62 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 | 
  2 |                                  Apache License
  3 |                            Version 2.0, January 2004
  4 |                         http://www.apache.org/licenses/
  5 | 
  6 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  7 | 
  8 |    1. Definitions.
  9 | 
 10 |       "License" shall mean the terms and conditions for use, reproduction,
 11 |       and distribution as defined by Sections 1 through 9 of this document.
 12 | 
 13 |       "Licensor" shall mean the copyright owner or entity authorized by
 14 |       the copyright owner that is granting the License.
 15 | 
 16 |       "Legal Entity" shall mean the union of the acting entity and all
 17 |       other entities that control, are controlled by, or are under common
 18 |       control with that entity. For the purposes of this definition,
 19 |       "control" means (i) the power, direct or indirect, to cause the
 20 |       direction or management of such entity, whether by contract or
 21 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 22 |       outstanding shares, or (iii) beneficial ownership of such entity.
 23 | 
 24 |       "You" (or "Your") shall mean an individual or Legal Entity
 25 |       exercising permissions granted by this License.
 26 | 
 27 |       "Source" form shall mean the preferred form for making modifications,
 28 |       including but not limited to software source code, documentation
 29 |       source, and configuration files.
 30 | 
 31 |       "Object" form shall mean any form resulting from mechanical
 32 |       transformation or translation of a Source form, including but
 33 |       not limited to compiled object code, generated documentation,
 34 |       and conversions to other media types.
 35 | 
 36 |       "Work" shall mean the work of authorship, whether in Source or
 37 |       Object form, made available under the License, as indicated by a
 38 |       copyright notice that is included in or attached to the work
 39 |       (an example is provided in the Appendix below).
 40 | 
 41 |       "Derivative Works" shall mean any work, whether in Source or Object
 42 |       form, that is based on (or derived from) the Work and for which the
 43 |       editorial revisions, annotations, elaborations, or other modifications
 44 |       represent, as a whole, an original work of authorship. For the purposes
 45 |       of this License, Derivative Works shall not include works that remain
 46 |       separable from, or merely link (or bind by name) to the interfaces of,
 47 |       the Work and Derivative Works thereof.
 48 | 
 49 |       "Contribution" shall mean any work of authorship, including
 50 |       the original version of the Work and any modifications or additions
 51 |       to that Work or Derivative Works thereof, that is intentionally
 52 |       submitted to Licensor for inclusion in the Work by the copyright owner
 53 |       or by an individual or Legal Entity authorized to submit on behalf of
 54 |       the copyright owner. For the purposes of this definition, "submitted"
 55 |       means any form of electronic, verbal, or written communication sent
 56 |       to the Licensor or its representatives, including but not limited to
 57 |       communication on electronic mailing lists, source code control systems,
 58 |       and issue tracking systems that are managed by, or on behalf of, the
 59 |       Licensor for the purpose of discussing and improving the Work, but
 60 |       excluding communication that is conspicuously marked or otherwise
 61 |       designated in writing by the copyright owner as "Not a Contribution."
 62 | 
 63 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 64 |       on behalf of whom a Contribution has been received by Licensor and
 65 |       subsequently incorporated within the Work.
 66 | 
 67 |    2. Grant of Copyright License. Subject to the terms and conditions of
 68 |       this License, each Contributor hereby grants to You a perpetual,
 69 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 70 |       copyright license to reproduce, prepare Derivative Works of,
 71 |       publicly display, publicly perform, sublicense, and distribute the
 72 |       Work and such Derivative Works in Source or Object form.
 73 | 
 74 |    3. Grant of Patent License. Subject to the terms and conditions of
 75 |       this License, each Contributor hereby grants to You a perpetual,
 76 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 77 |       (except as stated in this section) patent license to make, have made,
 78 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 79 |       where such license applies only to those patent claims licensable
 80 |       by such Contributor that are necessarily infringed by their
 81 |       Contribution(s) alone or by combination of their Contribution(s)
 82 |       with the Work to which such Contribution(s) was submitted. If You
 83 |       institute patent litigation against any entity (including a
 84 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 85 |       or a Contribution incorporated within the Work constitutes direct
 86 |       or contributory patent infringement, then any patent licenses
 87 |       granted to You under this License for that Work shall terminate
 88 |       as of the date such litigation is filed.
 89 | 
 90 |    4. Redistribution. You may reproduce and distribute copies of the
 91 |       Work or Derivative Works thereof in any medium, with or without
 92 |       modifications, and in Source or Object form, provided that You
 93 |       meet the following conditions:
 94 | 
 95 |       (a) You must give any other recipients of the Work or
 96 |           Derivative Works a copy of this License; and
 97 | 
 98 |       (b) You must cause any modified files to carry prominent notices
 99 |           stating that You changed the files; and
100 | 
101 |       (c) You must retain, in the Source form of any Derivative Works
102 |           that You distribute, all copyright, patent, trademark, and
103 |           attribution notices from the Source form of the Work,
104 |           excluding those notices that do not pertain to any part of
105 |           the Derivative Works; and
106 | 
107 |       (d) If the Work includes a "NOTICE" text file as part of its
108 |           distribution, then any Derivative Works that You distribute must
109 |           include a readable copy of the attribution notices contained
110 |           within such NOTICE file, excluding those notices that do not
111 |           pertain to any part of the Derivative Works, in at least one
112 |           of the following places: within a NOTICE text file distributed
113 |           as part of the Derivative Works; within the Source form or
114 |           documentation, if provided along with the Derivative Works; or,
115 |           within a display generated by the Derivative Works, if and
116 |           wherever such third-party notices normally appear. The contents
117 |           of the NOTICE file are for informational purposes only and
118 |           do not modify the License. You may add Your own attribution
119 |           notices within Derivative Works that You distribute, alongside
120 |           or as an addendum to the NOTICE text from the Work, provided
121 |           that such additional attribution notices cannot be construed
122 |           as modifying the License.
123 | 
124 |       You may add Your own copyright statement to Your modifications and
125 |       may provide additional or different license terms and conditions
126 |       for use, reproduction, or distribution of Your modifications, or
127 |       for any such Derivative Works as a whole, provided Your use,
128 |       reproduction, and distribution of the Work otherwise complies with
129 |       the conditions stated in this License.
130 | 
131 |    5. Submission of Contributions. Unless You explicitly state otherwise,
132 |       any Contribution intentionally submitted for inclusion in the Work
133 |       by You to the Licensor shall be under the terms and conditions of
134 |       this License, without any additional terms or conditions.
135 |       Notwithstanding the above, nothing herein shall supersede or modify
136 |       the terms of any separate license agreement you may have executed
137 |       with Licensor regarding such Contributions.
138 | 
139 |    6. Trademarks. This License does not grant permission to use the trade
140 |       names, trademarks, service marks, or product names of the Licensor,
141 |       except as required for reasonable and customary use in describing the
142 |       origin of the Work and reproducing the content of the NOTICE file.
143 | 
144 |    7. Disclaimer of Warranty. Unless required by applicable law or
145 |       agreed to in writing, Licensor provides the Work (and each
146 |       Contributor provides its Contributions) on an "AS IS" BASIS,
147 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
148 |       implied, including, without limitation, any warranties or conditions
149 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
150 |       PARTICULAR PURPOSE. You are solely responsible for determining the
151 |       appropriateness of using or redistributing the Work and assume any
152 |       risks associated with Your exercise of permissions under this License.
153 | 
154 |    8. Limitation of Liability. In no event and under no legal theory,
155 |       whether in tort (including negligence), contract, or otherwise,
156 |       unless required by applicable law (such as deliberate and grossly
157 |       negligent acts) or agreed to in writing, shall any Contributor be
158 |       liable to You for damages, including any direct, indirect, special,
159 |       incidental, or consequential damages of any character arising as a
160 |       result of this License or out of the use or inability to use the
161 |       Work (including but not limited to damages for loss of goodwill,
162 |       work stoppage, computer failure or malfunction, or any and all
163 |       other commercial damages or losses), even if such Contributor
164 |       has been advised of the possibility of such damages.
165 | 
166 |    9. Accepting Warranty or Additional Liability. While redistributing
167 |       the Work or Derivative Works thereof, You may choose to offer,
168 |       and charge a fee for, acceptance of support, warranty, indemnity,
169 |       or other liability obligations and/or rights consistent with this
170 |       License. However, in accepting such obligations, You may act only
171 |       on Your own behalf and on Your sole responsibility, not on behalf
172 |       of any other Contributor, and only if You agree to indemnify,
173 |       defend, and hold each Contributor harmless for any liability
174 |       incurred by, or claims asserted against, such Contributor by reason
175 |       of your accepting any such warranty or additional liability.
176 | 


--------------------------------------------------------------------------------
/NOTICE:
--------------------------------------------------------------------------------
1 | Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
2 | 


--------------------------------------------------------------------------------
/build.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | if [ $# -lt 3 ]; then
 4 |     echo "Please provide the solution name as well as the base S3 bucket name and the region to run build script."
 5 |     echo "For example: ./build.sh trademarked-solution-name sagemaker-solutions-build us-west-2"
 6 |     exit 1
 7 | fi
 8 | 
 9 | # Package the solution assistant
10 | mkdir build
11 | mkdir build/solution-assistant
12 | cp -r ./cloudformation/solution-assistant ./build/
13 | (cd ./build/solution-assistant && pip install -r requirements.txt -t ./src/site-packages)
14 | find ./build/solution-assistant -name '*.pyc' -delete
15 | (cd ./build/solution-assistant/src && zip -q -r9 ../../solution-assistant.zip *)
16 | rm -rf ./build/solution-assistant
17 | 
18 | # Upload to S3
19 | s3_prefix="s3://$2-$3/$1"
20 | echo "Using S3 path: $s3_prefix"
21 | aws s3 cp --recursive sagemaker $s3_prefix/sagemaker --exclude '.*' --exclude "*~"
22 | aws s3 cp --recursive cloudformation $s3_prefix/cloudformation --exclude '.*' --exclude "*~"
23 | aws s3 cp --recursive build $s3_prefix/build
24 | aws s3 cp Readme.md $s3_prefix/
25 | 


--------------------------------------------------------------------------------
/cloudformation/onCreate.sh:
--------------------------------------------------------------------------------
 1 | # This file creates a symbolic link of folders in the neurips2020-progen-starter-kit to the SageMaker's one.
 2 | 
 3 | #### Warning: This file is ran by the cloudformation template. You should not have to manually run this file.
 4 | 
 5 | # Remove the custom content
 6 | rm -r source/custom/algorithms source/custom/envs source/custom/models source/custom/preprocessors source/custom/experiments
 7 | rm source/custom/callbacks.py
 8 | 
 9 | # Create symbolic link
10 | cd source/custom
11 | ln -s ../../neurips2020-procgen-starter-kit/algorithms algorithms
12 | ln -s ../../neurips2020-procgen-starter-kit/envs envs
13 | ln -s ../../neurips2020-procgen-starter-kit/models models
14 | ln -s ../../neurips2020-procgen-starter-kit/experiments experiments
15 | ln -s ../../neurips2020-procgen-starter-kit/preprocessors preprocessors
16 | ln -s ../../neurips2020-procgen-starter-kit/callbacks.py callbacks.py
17 | 
18 | 
19 | # Bug fix in framestack.py
20 | export REPLACE_STRING="\\
21 | try:\\
22 |     from envs.procgen_env_wrapper import ProcgenEnvWrapper\\
23 | except ModuleNotFoundError:\\
24 |     from custom.envs.procgen_env_wrapper import ProcgenEnvWrapper"
25 | 
26 | sed -i "s/from envs.procgen_env_wrapper import ProcgenEnvWrapper/${REPLACE_STRING}/g" envs/framestack.py
27 | 
28 | # Copy setup.py into the starter kit
29 | cp setup.py ../../neurips2020-procgen-starter-kit/
30 | 


--------------------------------------------------------------------------------
/cloudformation/solution-assistant/requirements.txt:
--------------------------------------------------------------------------------
1 | crhelper
2 | 


--------------------------------------------------------------------------------
/cloudformation/solution-assistant/src/lambda.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | 
 4 | # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
 5 | # 
 6 | # Licensed under the Apache License, Version 2.0 (the "License").
 7 | # You may not use this file except in compliance with the License.
 8 | # A copy of the License is located at
 9 | # 
10 | #     http://www.apache.org/licenses/LICENSE-2.0
11 | # 
12 | # or in the "license" file accompanying this file. This file is distributed 
13 | # on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either 
14 | # express or implied. See the License for the specific language governing 
15 | # permissions and limitations under the License.
16 | 
17 | import boto3
18 | import sys
19 | 
20 | sys.path.append('./site-packages')
21 | 
22 | from crhelper import CfnResource
23 | 
24 | helper = CfnResource()
25 | 
26 | @helper.update
27 | @helper.create
28 | def empty_function(event, _):
29 |     pass
30 | 
31 | @helper.delete
32 | def on_delete(event, _):
33 |     s3_resource = boto3.resource("s3")
34 |     bucket_name = event["ResourceProperties"]["S3BucketName"]
35 |     try:
36 |         s3_resource.Bucket(bucket_name).objects.all().delete()
37 |         print("Successfully deleted objects in bucket "
38 |                 "called '{}'".format(bucket_name))
39 |             
40 |     except s3_resource.meta.client.exceptions.NoSuchBucket:
41 |         print(
42 |             "Could not find bucket called '{}'. "
43 |             "Skipping delete.".format(bucket_name)
44 |         )
45 | 
46 | def handler(event, context):
47 |     helper(event, context)
48 | 


--------------------------------------------------------------------------------
/docs/BattleSnake-RL-4.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/sagemaker-rl-procgen-ray/ba82c1f0bac2c985930bda7875f1950723f6d7ce/docs/BattleSnake-RL-4.gif


--------------------------------------------------------------------------------
/docs/Table_CompareInstances.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/sagemaker-rl-procgen-ray/ba82c1f0bac2c985930bda7875f1950723f6d7ce/docs/Table_CompareInstances.png


--------------------------------------------------------------------------------
/docs/cloudstack_snaphot.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/sagemaker-rl-procgen-ray/ba82c1f0bac2c985930bda7875f1950723f6d7ce/docs/cloudstack_snaphot.png


--------------------------------------------------------------------------------
/docs/cloudstack_snapshot_dist.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/sagemaker-rl-procgen-ray/ba82c1f0bac2c985930bda7875f1950723f6d7ce/docs/cloudstack_snapshot_dist.png


--------------------------------------------------------------------------------
/docs/impala_benchmark_baseline_p3_2x.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/sagemaker-rl-procgen-ray/ba82c1f0bac2c985930bda7875f1950723f6d7ce/docs/impala_benchmark_baseline_p3_2x.png


--------------------------------------------------------------------------------
/docs/impala_benchmark_instances.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/sagemaker-rl-procgen-ray/ba82c1f0bac2c985930bda7875f1950723f6d7ce/docs/impala_benchmark_instances.png


--------------------------------------------------------------------------------
/docs/launch_button.svg:
--------------------------------------------------------------------------------
1 | <svg width="510" height="100" xmlns="http://www.w3.org/2000/svg">
2 |  <g>
3 |   <rect stroke="#BC6F00" rx="10" id="svg_1" height="75" width="500" y="20" x="5" stroke-width="5" fill="#FF9900"/>
4 |   <text font-weight="normal" stroke="#000" transform="matrix(2.0498603689046746,0,0,2.0498603689046746,-226.30895651699603,-218.52865540575243) " xml:space="preserve" text-anchor="start" font-family="BlinkMacSystemFont,Segoe UI,Helvetica,Arial,sans-serif" font-size="24 " id="svg_2" y="143" x="130" stroke-opacity="null" stroke-width="0" fill="#ffffff">Quick Create Stack</text>
5 |  </g>
6 | </svg>
7 | 


--------------------------------------------------------------------------------
/docs/single_instance_all_envs.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/sagemaker-rl-procgen-ray/ba82c1f0bac2c985930bda7875f1950723f6d7ce/docs/single_instance_all_envs.png


--------------------------------------------------------------------------------
/sagemaker/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | share/python-wheels/
 24 | *.egg-info/
 25 | .installed.cfg
 26 | *.egg
 27 | MANIFEST
 28 | 
 29 | # PyInstaller
 30 | #  Usually these files are written by a python script from a template
 31 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 32 | *.manifest
 33 | *.spec
 34 | 
 35 | # Installer logs
 36 | pip-log.txt
 37 | pip-delete-this-directory.txt
 38 | 
 39 | # Unit test / coverage reports
 40 | htmlcov/
 41 | .tox/
 42 | .nox/
 43 | .coverage
 44 | .coverage.*
 45 | .cache
 46 | nosetests.xml
 47 | coverage.xml
 48 | *.cover
 49 | *.py,cover
 50 | .hypothesis/
 51 | .pytest_cache/
 52 | cover/
 53 | 
 54 | # Translations
 55 | *.mo
 56 | *.pot
 57 | 
 58 | # Django stuff:
 59 | *.log
 60 | local_settings.py
 61 | db.sqlite3
 62 | db.sqlite3-journal
 63 | 
 64 | # Flask stuff:
 65 | instance/
 66 | .webassets-cache
 67 | 
 68 | # Scrapy stuff:
 69 | .scrapy
 70 | 
 71 | # Sphinx documentation
 72 | docs/_build/
 73 | 
 74 | # PyBuilder
 75 | .pybuilder/
 76 | target/
 77 | 
 78 | # Jupyter Notebook
 79 | .ipynb_checkpoints
 80 | 
 81 | # IPython
 82 | profile_default/
 83 | ipython_config.py
 84 | 
 85 | # pyenv
 86 | #   For a library or package, you might want to ignore these files since the code is
 87 | #   intended to run in multiple environments; otherwise, check them in:
 88 | # .python-version
 89 | 
 90 | # pipenv
 91 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 92 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 93 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 94 | #   install all needed dependencies.
 95 | #Pipfile.lock
 96 | 
 97 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
 98 | __pypackages__/
 99 | 
100 | # Celery stuff
101 | celerybeat-schedule
102 | celerybeat.pid
103 | 
104 | # SageMath parsed files
105 | *.sage.py
106 | 
107 | # Environments
108 | .env
109 | .venv
110 | env/
111 | venv/
112 | ENV/
113 | env.bak/
114 | venv.bak/
115 | 
116 | # Spyder project settings
117 | .spyderproject
118 | .spyproject
119 | 
120 | # Rope project settings
121 | .ropeproject
122 | 
123 | # mkdocs documentation
124 | /site
125 | 
126 | # mypy
127 | .mypy_cache/
128 | .dmypy.json
129 | dmypy.json
130 | 
131 | # Pyre type checker
132 | .pyre/
133 | 
134 | # pytype static type analyzer
135 | .pytype/
136 | 
137 | # Cython debug symbols
138 | cython_debug/
139 | 
140 | 


--------------------------------------------------------------------------------
/sagemaker/Dockerfile:
--------------------------------------------------------------------------------
 1 | ARG CPU_OR_GPU
 2 | ARG AWS_REGION
 3 | ARG FRAMEWORK
 4 | FROM 462105765813.dkr.ecr.${AWS_REGION}.amazonaws.com/sagemaker-rl-ray-container:ray-0.8.5-${FRAMEWORK}-${CPU_OR_GPU}-py36
 5 | 
 6 | WORKDIR /opt/ml
 7 | RUN apt-get update
 8 | RUN apt-get install -y openssh-server
 9 | RUN pip install --upgrade \
10 |     pip \
11 |     setuptools \
12 |     setproctitle \
13 |     lz4 \
14 |     psutil
15 |     
16 | RUN pip install procgen==0.10.1
17 | RUN pip install mlflow==1.8.0
18 | 
19 | # Log in to dockerized SSH daemon service
20 | RUN sed -ri 's/^#?PermitRootLogin\s+.*/PermitRootLogin yes/' /etc/ssh/sshd_config && \
21 |     sed -ri 's/^#?PubkeyAuthentication\s+.*/PubkeyAuthentication yes/' /etc/ssh/sshd_config
22 | ENV PYTHONUNBUFFERED 1
23 | 
24 | ############################################
25 | # Test Installation
26 | ############################################
27 | # Test to verify if all required dependencies installed successfully or not.
28 | RUN python -c "import gym;import sagemaker_containers.cli.train; import ray; from sagemaker_containers.cli.train import main"
29 | # Make things a bit easier to debug
30 | WORKDIR /opt/ml/code
31 | # Expose port 22 for SSH login
32 | EXPOSE 22
33 | CMD    ["/usr/sbin/sshd", "-D"]
34 | 


--------------------------------------------------------------------------------
/sagemaker/config/sagemaker_config.yaml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/sagemaker-rl-procgen-ray/ba82c1f0bac2c985930bda7875f1950723f6d7ce/sagemaker/config/sagemaker_config.yaml


--------------------------------------------------------------------------------
/sagemaker/source/common/daemon.json:
--------------------------------------------------------------------------------
 1 | 
 2 | {
 3 | 	"default-runtime": "nvidia",
 4 |     "runtimes": {
 5 |         "nvidia": {
 6 |             "path": "/usr/bin/nvidia-container-runtime",
 7 |             "runtimeArgs": []
 8 |         }
 9 |     }
10 | }


--------------------------------------------------------------------------------
/sagemaker/source/common/docker_utils.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License"). You
  4 | # may not use this file except in compliance with the License. A copy of
  5 | # the License is located at
  6 | #
  7 | #     http://aws.amazon.com/apache2.0/
  8 | #
  9 | # or in the "license" file accompanying this file. This file is
 10 | # distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
 11 | # ANY KIND, either express or implied. See the License for the specific
 12 | # language governing permissions and limitations under the License.
 13 | 
 14 | from __future__ import absolute_import
 15 | 
 16 | import base64
 17 | import contextlib
 18 | import os
 19 | import time
 20 | import shlex
 21 | import shutil
 22 | import subprocess
 23 | import sys
 24 | import tempfile
 25 | 
 26 | import boto3
 27 | import json
 28 | 
 29 | IMAGE_TEMPLATE = "{account}.dkr.ecr.{region}.amazonaws.com/{image_name}:{version}"
 30 | 
 31 | 
 32 | def build_and_push_docker_image(repository_name, dockerfile='Dockerfile', build_args={}):
 33 |     """Builds a docker image from the specified dockerfile, and pushes it to
 34 |     ECR.  Handles things like ECR login, creating the repository.
 35 | 
 36 |     Returns the name of the created docker image in ECR
 37 |     """
 38 |     base_image = _find_base_image_in_dockerfile(dockerfile)
 39 |     _ecr_login_if_needed(base_image)
 40 |     _build_from_dockerfile(repository_name, dockerfile, build_args)
 41 |     ecr_tag = push(repository_name)
 42 |     return ecr_tag
 43 | 
 44 | 
 45 | def _build_from_dockerfile(repository_name, dockerfile='Dockerfile', build_args={}):
 46 |     build_cmd = ['docker', 'build', '-t', repository_name, '-f', dockerfile, '.']
 47 |     for k,v in build_args.items():
 48 |         build_cmd += ['--build-arg', '%s=%s' % (k,v)]
 49 | 
 50 |     print("Building docker image %s from %s" % (repository_name, dockerfile))
 51 |     _execute(build_cmd)
 52 |     print("Done building docker image %s" % repository_name)
 53 |     
 54 | 
 55 | def _find_base_image_in_dockerfile(dockerfile):
 56 |     dockerfile_lines = open(dockerfile).readlines()
 57 |     from_line = list(filter(lambda line: line.startswith("FROM "), dockerfile_lines))[0].rstrip()
 58 |     base_image = from_line[5:]
 59 |     return base_image
 60 | 
 61 | 
 62 | def push(tag, aws_account=None, aws_region=None):
 63 |     """
 64 |     Push the builded tag to ECR.
 65 | 
 66 |     Args:
 67 |         tag (string): tag which you named your algo
 68 |         aws_account (string): aws account of the ECR repo
 69 |         aws_region (string): aws region where the repo is located
 70 | 
 71 |     Returns:
 72 |         (string): ECR repo image that was pushed
 73 |     """
 74 |     session = boto3.Session()
 75 |     aws_account = aws_account or session.client("sts").get_caller_identity()['Account']
 76 |     aws_region = aws_region or session.region_name
 77 |     try:
 78 |         repository_name, version = tag.split(':')
 79 |     except ValueError:  # split failed because no :
 80 |         repository_name = tag
 81 |         version = "latest"
 82 |     ecr_client = session.client('ecr', region_name=aws_region)
 83 | 
 84 |     _create_ecr_repo(ecr_client, repository_name)
 85 |     _ecr_login(ecr_client, aws_account)
 86 |     ecr_tag = _push(aws_account, aws_region, tag)
 87 | 
 88 |     return ecr_tag
 89 | 
 90 | 
 91 | def _push(aws_account, aws_region, tag):
 92 |     ecr_repo = '%s.dkr.ecr.%s.amazonaws.com' % (aws_account, aws_region)
 93 |     ecr_tag = '%s/%s' % (ecr_repo, tag)
 94 |     _execute(['docker', 'tag', tag, ecr_tag])
 95 |     print("Pushing docker image to ECR repository %s/%s\n" % (ecr_repo, tag))
 96 |     _execute(['docker', 'push', ecr_tag])
 97 |     print("Done pushing %s" % ecr_tag)
 98 |     return ecr_tag
 99 | 
100 | 
101 | def _create_ecr_repo(ecr_client, repository_name):
102 |     """
103 |     Create the repo if it doesn't already exist.
104 |     """
105 |     try:
106 |         ecr_client.create_repository(repositoryName=repository_name)
107 |         print("Created new ECR repository: %s" % repository_name)
108 |     except ecr_client.exceptions.RepositoryAlreadyExistsException:
109 |         print("ECR repository already exists: %s" % repository_name)
110 | 
111 | 
112 | def _ecr_login(ecr_client, aws_account):
113 |     auth = ecr_client.get_authorization_token(registryIds=[aws_account])
114 |     authorization_data = auth['authorizationData'][0]
115 | 
116 |     raw_token = base64.b64decode(authorization_data['authorizationToken'])
117 |     token = raw_token.decode('utf-8').strip('AWS:')
118 |     ecr_url = auth['authorizationData'][0]['proxyEndpoint']
119 | 
120 |     cmd = ['docker', 'login', '-u', 'AWS', '-p', token, ecr_url]
121 |     _execute(cmd, quiet=True)
122 |     print("Logged into ECR")
123 | 
124 | 
125 | def _ecr_login_if_needed(image):
126 |     ecr_client = boto3.client('ecr')
127 | 
128 |     # Only ECR images need login
129 |     if not ('dkr.ecr' in image and 'amazonaws.com' in image):
130 |         return
131 | 
132 |     # do we have the image?
133 |     if _check_output('docker images -q %s' % image).strip():
134 |         return
135 | 
136 |     aws_account = image.split('.')[0]
137 |     _ecr_login(ecr_client, aws_account)
138 | 
139 | 
140 | @contextlib.contextmanager
141 | def _tmpdir(suffix='', prefix='tmp', dir=None):  # type: (str, str, str) -> None
142 |     """Create a temporary directory with a context manager. The file is deleted when the context exits.
143 | 
144 |     The prefix, suffix, and dir arguments are the same as for mkstemp().
145 | 
146 |     Args:
147 |         suffix (str):  If suffix is specified, the file name will end with that suffix, otherwise there will be no
148 |                         suffix.
149 |         prefix (str):  If prefix is specified, the file name will begin with that prefix; otherwise,
150 |                         a default prefix is used.
151 |         dir (str):  If dir is specified, the file will be created in that directory; otherwise, a default directory is
152 |                         used.
153 |     Returns:
154 |         str: path to the directory
155 |     """
156 |     tmp = tempfile.mkdtemp(suffix=suffix, prefix=prefix, dir=dir)
157 |     yield tmp
158 |     shutil.rmtree(tmp)
159 | 
160 | 
161 | def _execute(command, quiet=False):
162 |     if not quiet:
163 |         print("$ %s" % ' '.join(command))
164 |     process = subprocess.Popen(command,
165 |                                stdout=subprocess.PIPE,
166 |                                stderr=subprocess.STDOUT)
167 |     try:
168 |         _stream_output(process)
169 |     except RuntimeError as e:
170 |         # _stream_output() doesn't have the command line. We will handle the exception
171 |         # which contains the exit code and append the command line to it.
172 |         msg = "Failed to run: %s, %s" % (command, str(e))
173 |         raise RuntimeError(msg)
174 | 
175 | 
176 | def _stream_output(process):
177 |     """Stream the output of a process to stdout
178 | 
179 |     This function takes an existing process that will be polled for output. Only stdout
180 |     will be polled and sent to sys.stdout.
181 | 
182 |     Args:
183 |         process(subprocess.Popen): a process that has been started with
184 |             stdout=PIPE and stderr=STDOUT
185 | 
186 |     Returns (int): process exit code
187 |     """
188 |     exit_code = None
189 | 
190 |     while exit_code is None:
191 |         stdout = process.stdout.readline().decode("utf-8")
192 |         sys.stdout.write(stdout)
193 |         exit_code = process.poll()
194 | 
195 |     if exit_code != 0:
196 |         raise RuntimeError("Process exited with code: %s" % exit_code)
197 | 
198 | 
199 | def _check_output(cmd, *popenargs, **kwargs):
200 |     if isinstance(cmd, str):
201 |         cmd = shlex.split(cmd)
202 | 
203 |     success = True
204 |     try:
205 |         output = subprocess.check_output(cmd, *popenargs, **kwargs)
206 |     except subprocess.CalledProcessError as e:
207 |         output = e.output
208 |         success = False
209 | 
210 |     output = output.decode("utf-8")
211 |     if not success:
212 |         print("Command output: %s" % output)
213 |         raise Exception("Failed to run %s" % ",".join(cmd))
214 | 
215 |     return output
216 | 


--------------------------------------------------------------------------------
/sagemaker/source/common/env_utils.py:
--------------------------------------------------------------------------------
  1 | import gym
  2 | import numpy as np
  3 | import pandas as pd
  4 | import json
  5 | from pathlib import Path
  6 | 
  7 | gym.logger.set_level(40)
  8 | 
  9 | class VectoredGymEnvironment():
 10 |     """
 11 |     Envrioment class to run multiple similations and collect rollout data
 12 |     """
 13 |     def __init__(self, registered_gym_env, num_of_envs=1):
 14 |         self.envs_initialized = False
 15 |         self.initialized_envs = {}
 16 |         self.env_states = {}
 17 |         self.env_reset_counter = {}
 18 |         self.num_of_envs = num_of_envs
 19 |         self.data_rows = []
 20 | 
 21 |         self.initialize_envs(num_of_envs, registered_gym_env)
 22 |  
 23 |     def is_initialized(self):
 24 |         return self.envs_initialized
 25 |  
 26 |     def initialize_envs(
 27 |             self,
 28 |             num_of_envs,
 29 |             registered_gym_env):
 30 |         """Initialize multiple Openai gym environments.
 31 |         Each envrionment will start with a different random seed.
 32 | 
 33 |         Arguments:
 34 |             num_of_envs {int} -- Number of environments/simulations to initiate
 35 |             registered_gym_env {str} -- Environment name of the registered gym environment
 36 |         """
 37 |         print("Initializing {} environments of {}".format(num_of_envs, registered_gym_env))
 38 |         for i in range(0, num_of_envs):
 39 |             environment_id = "environment_" + str(i)
 40 |             environment = gym.make(registered_gym_env)
 41 |             environment = environment.unwrapped
 42 |             environment.seed(i)
 43 |             self.env_states[environment_id] = environment.reset()
 44 |             self.env_reset_counter[environment_id] = 0
 45 |             self.initialized_envs[environment_id] = environment
 46 |         self.envs_initialized = True
 47 |         self.state_dims = len(self.env_states[environment_id])
 48 | 
 49 |     def get_environment_states(self):
 50 |         return self.env_states
 51 | 
 52 |     def dump_environment_states(self, dir_path, file_name):
 53 |         """Dumping current states of all the envrionments into file
 54 |         
 55 |         Arguments:
 56 |             dir_path {str} -- Directory path of the target file
 57 |             file_name {str} -- File name of the target file
 58 |         """
 59 |         data_folder = Path(dir_path)
 60 |         file_path = data_folder / file_name
 61 | 
 62 |         with open(file_path, 'w') as outfile:
 63 |             for state in self.env_states.values():
 64 |                 json.dump(list(state), outfile)
 65 |                 outfile.write('\n')
 66 | 
 67 |     def get_environment_ids(self):
 68 |         return list(self.initialized_envs.keys())
 69 |  
 70 |     def step(self, environment_id, action):
 71 |         local_env = self.initialized_envs[environment_id]
 72 |         observation, reward, done, info = local_env.step(action)
 73 | 
 74 |         self.env_states[environment_id] = observation
 75 |         return observation, reward, done, info
 76 |  
 77 |     def reset(self, environment_id):
 78 |         self.env_states[environment_id] = \
 79 |             self.initialized_envs[environment_id].reset()
 80 |         return self.env_states[environment_id]
 81 | 
 82 |     def reset_all_envs(self):
 83 |         print("Resetting all the environments...")
 84 |         for i in range(0, self.num_of_envs): 
 85 |             environment_id = "environment_" + str(i)
 86 |             self.reset(environment_id)
 87 |  
 88 |     def close(self, environment_id):
 89 |         self.initialized_envs[environment_id].close()
 90 |         return
 91 |  
 92 |     def render(self, environment_id):
 93 |         self.initialized_envs[environment_id].render()
 94 |         return
 95 | 
 96 |     def collect_rollouts_for_single_env_with_given_episodes(self, environment_id, action_prob, num_episodes):
 97 |         """Collect rollouts with given steps from one environment
 98 |         
 99 |         Arguments:
100 |             environment_id {str} -- Environment id for the environment
101 |             action_prob {list} -- Action probabilities of the simulated policy
102 |             num_episodes {int} -- Number of episodes to run rollouts
103 |         """
104 |         # normalization if sum of probs is not exact equal to 1
105 |         action_prob = np.array(action_prob)
106 |         if action_prob.sum() != 1:
107 |             action_prob /= action_prob.sum()
108 |         action_prob = list(action_prob)
109 | 
110 |         for _ in range(num_episodes):
111 |             done = False
112 |             cumulative_rewards = 0
113 |             while not done:
114 |                 data_item = []
115 |                 action = np.random.choice(len(action_prob), p=action_prob)
116 |                 cur_state_features = self.env_states[environment_id]
117 |                 _, reward, done, _ = self.step(environment_id, action)
118 |                 cumulative_rewards += reward
119 |                 episode_id = int(environment_id.split('_')[-1]) + \
120 |                     self.num_of_envs * self.env_reset_counter[environment_id]
121 |                 if not done:
122 |                     data_item.extend([action, action_prob, episode_id, reward, 0.0])
123 |                 else:
124 |                     data_item.extend([action, action_prob, episode_id, reward, cumulative_rewards])
125 |                 for j in range(len(cur_state_features)):
126 |                     data_item.append(cur_state_features[j])
127 |                 self.data_rows.append(data_item)
128 | 
129 |             self.reset(environment_id)
130 |             self.env_reset_counter[environment_id] += 1
131 | 
132 |     def collect_rollouts_for_single_env_with_given_steps(self, environment_id, action_prob, num_steps):
133 |         """Collect rollouts with given steps from one environment
134 |         
135 |         Arguments:
136 |             environment_id {str} -- Environment id for the environment
137 |             action_prob {list} -- Action probabilities of the simulated policy
138 |             num_episodes {int} -- Number of steps to run rollouts
139 |         """
140 |         # normalization if sum of probs is not exact equal to 1
141 |         action_prob = np.array(action_prob)
142 |         if action_prob.sum() != 1:
143 |             action_prob /= action_prob.sum()
144 |         action_prob = list(action_prob)
145 | 
146 |         for _ in range(num_steps):
147 |             data_item = []
148 |             action = np.random.choice(len(action_prob), p=action_prob)
149 |             cur_state_features = self.env_states[environment_id]
150 |             _, reward, done, _ = self.step(environment_id, action)
151 |             episode_id = int(environment_id.split('_')[-1]) + \
152 |                 self.num_of_envs * self.env_reset_counter[environment_id]
153 |             data_item.extend([action, action_prob, episode_id, reward])
154 |             for j in range(len(cur_state_features)):
155 |                 data_item.append(cur_state_features[j])
156 |             self.data_rows.append(data_item)
157 |             if done:
158 |                 self.reset(environment_id)
159 |                 self.env_reset_counter[environment_id] += 1
160 | 
161 |     def collect_rollouts_with_given_action_probs(self, num_steps=None, num_episodes=None, action_probs=None, file_name=None):
162 |         """Collect rollouts from all the initiated environments with given action probs
163 |         
164 |         Keyword Arguments:
165 |             num_steps {int} -- Number of steps to run rollouts (default: {None})
166 |             num_episodes {int} --  Number of episodes to run rollouts (default: {None})
167 |             action_probs {list} -- Action probs for the policy (default: {None})
168 |             file_name {str} -- Batch transform output that contain predictions of probs  (default: {None})
169 |         
170 |         Returns:
171 |             [Dataframe] -- Dataframe that contains the rollout data from all envs
172 |         """
173 |         if file_name is not None:
174 |             assert action_probs is None
175 |             json_lines = [json.loads(line.rstrip('\n')) for line in open(file_name) if line is not '']
176 |             action_probs = []
177 |             for line in json_lines:
178 |                 if line.get('SageMakerOutput') is not None:
179 |                     action_probs.append(line['SageMakerOutput'].get("predictions")[0])
180 |                 else:
181 |                     action_probs.append(line.get("predictions")[0])
182 | 
183 |         assert len(action_probs) == self.num_of_envs
184 |         for index, environment_id in enumerate(self.get_environment_ids()):
185 |             if num_steps is not None:
186 |                 assert num_episodes is None
187 |                 self.collect_rollouts_for_single_env_with_given_steps(
188 |                     environment_id, action_probs[index], num_steps
189 |                 )
190 |             else:
191 |                 assert num_episodes is not None
192 |                 self.collect_rollouts_for_single_env_with_given_episodes(
193 |                     environment_id, action_probs[index], num_episodes
194 |                 )
195 | 
196 |         col_names = self._create_col_names()
197 |         df = pd.DataFrame(self.data_rows, columns = col_names)
198 | 
199 |         return df
200 | 
201 |     def _create_col_names(self):
202 |         """Create column names of dataframe that can be consumed by Coach
203 |         
204 |         Returns:
205 |             [list] -- List of column names
206 |         """
207 |         col_names = ['action', 'all_action_probabilities', 'episode_id', 'reward', 'cumulative_rewards']
208 |         for i in range(self.state_dims):
209 |             col_names.append('state_feature_' + str(i))
210 | 
211 |         return col_names


--------------------------------------------------------------------------------
/sagemaker/source/common/markdown_helper.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License"). You
  4 | # may not use this file except in compliance with the License. A copy of
  5 | # the License is located at
  6 | #
  7 | #     http://aws.amazon.com/apache2.0/
  8 | #
  9 | # or in the "license" file accompanying this file. This file is
 10 | # distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
 11 | # ANY KIND, either express or implied. See the License for the specific
 12 | # language governing permissions and limitations under the License.
 13 | 
 14 | def generate_s3_write_permission_for_sagemaker_role(role):
 15 |     role_name = role.split("/")[-1]
 16 |     url = "https://console.aws.amazon.com/iam/home#/roles/%s" % role_name
 17 |     text = "1. Go to IAM console to edit current SageMaker role: [%s](%s).\n" % (role_name, url)
 18 |     text += "2. Next, go to the `Permissions tab` and click on `Attach Policy.` \n"
 19 |     text += "3. Search and select `AmazonKinesisVideoStreamsFullAccess` policy\n"
 20 |     return text
 21 | 
 22 | def generate_kinesis_create_permission_for_sagemaker_role(role):
 23 |     role_name = role.split("/")[-1]
 24 |     url = "https://console.aws.amazon.com/iam/home#/roles/%s" % role_name
 25 |     text = "1. Go to IAM console to edit current SageMaker role: [%s](%s).\n" % (role_name, url)
 26 |     text += "2. Next, go to the `Permissions tab` and click on `Attach Policy.` \n"
 27 |     text += "3. Search and select `AmazonS3FullAccess` policy\n"
 28 |     return text
 29 | 
 30 | def generate_help_for_s3_endpoint_permissions(role):
 31 |     role_name = role.split("/")[-1]
 32 |     url = "https://console.aws.amazon.com/iam/home#/roles/%s" % role_name
 33 |     text = ">It looks like your SageMaker role has insufficient premissions. Please do the following:\n"
 34 |     text += "1. Go to IAM console to edit current SageMaker role: [%s](%s).\n" % (role_name, url)
 35 |     text += "2. Select %s and then click on `Edit Policy`\n" % role_name
 36 |     text += "3. Select the JSON tab and add the following JSON blob to the `Statement` list:\n"
 37 |     text += """```json
 38 |             {
 39 |             "Action": [
 40 |                 "ec2:DescribeRouteTables",
 41 |                 "ec2:CreateVpcEndpoint"
 42 |             ],
 43 |             "Effect": "Allow",
 44 |             "Resource": "*"
 45 |             },```\n"""
 46 |     text += "4. Now wait for a few minutes before executing this cell again!"
 47 |     return text
 48 | 
 49 | 
 50 | def generate_help_for_robomaker_trust_relationship(role):
 51 |     role_name = role.split("/")[-1]
 52 |     url = "https://console.aws.amazon.com/iam/home#/roles/%s" % role_name
 53 |     text = "1. Go to IAM console to edit current SageMaker role: [%s](%s).\n" % (role_name, url)
 54 |     text += "2. Next, go to the `Trust relationships tab` and click on `Edit Trust Relationship.` \n"
 55 |     text += "3. Replace the JSON blob with the following:\n"
 56 |     text += """```json
 57 |             {
 58 |               "Version": "2012-10-17",
 59 |               "Statement": [
 60 |                 {
 61 |                   "Effect": "Allow",
 62 |                   "Principal": {
 63 |                     "Service": [
 64 |                       "sagemaker.amazonaws.com",
 65 |                       "robomaker.amazonaws.com"
 66 |                     ]
 67 |                   },
 68 |                   "Action": "sts:AssumeRole"
 69 |                 }
 70 |               ]
 71 |             }```\n"""
 72 |     text += "4. Once this is complete, click on Update Trust Policy and you are done."
 73 |     return text
 74 | 
 75 | 
 76 | def generate_help_for_robomaker_all_permissions(role):
 77 |     role_name = role.split("/")[-1]
 78 |     url = "https://console.aws.amazon.com/iam/home#/roles/%s" % role_name
 79 |     text = ">It looks like your SageMaker role has insufficient premissions. Please do the following:\n"
 80 |     text += "1. Go to IAM console to edit current SageMaker role: [%s](%s).\n" % (role_name, url)
 81 |     text += "2. Click on policy starting with `AmazonSageMaker-ExecutionPolicy` and then edit policy.\n"
 82 |     text += "3. Go to JSON tab, add the following JSON blob to the `Statement` list and save policy:\n"
 83 |     text += """```json
 84 |         {
 85 |             "Effect": "Allow",
 86 |             "Action": [
 87 |                 "robomaker:CreateSimulationApplication",
 88 |                 "robomaker:DescribeSimulationApplication",
 89 |                 "robomaker:DeleteSimulationApplication",
 90 |                 "robomaker:CreateSimulationJob",
 91 |                 "robomaker:DescribeSimulationJob",
 92 |                 "robomaker:CancelSimulationJob",
 93 |                 "robomaker:ListSimulationApplications"
 94 |             ],
 95 |             "Resource": [
 96 |                 "*"
 97 |             ]
 98 |         },
 99 |         {
100 |             "Effect": "Allow",
101 |             "Action": "iam:CreateServiceLinkedRole",
102 |             "Resource": "*",
103 |             "Condition": {
104 |                 "StringEquals": {
105 |                     "iam:AWSServiceName": "robomaker.amazonaws.com"
106 |                 }
107 |             }
108 |         },
109 |         {
110 |             "Effect": "Allow",
111 |             "Action": [
112 |                 "iam:PassRole"
113 |             ],
114 |             "Resource": "*",
115 |             "Condition": {
116 |                 "StringEquals": {
117 |                     "iam:PassedToService": [
118 |                         "robomaker.amazonaws.com"
119 |                     ]
120 |                 }
121 |             }
122 |         },```\n"""
123 |     text += "4. Next, go to the `Trust relationships tab` and click on `Edit Trust Relationship.` \n"
124 |     text += "5. Add the following JSON blob to the `Statement` list:\n"
125 |     text += """```json
126 |             {
127 |               "Effect": "Allow",
128 |               "Principal": {
129 |                 "Service": "robomaker.amazonaws.com"
130 |               },
131 |               "Action": "sts:AssumeRole"
132 |             },```\n"""
133 |     text += "6. Now wait for a few minutes before executing this cell again!"
134 |     return text
135 | 
136 | 
137 | def generate_robomaker_links(job_arns, aws_region):
138 |     simulation_ids = [job_arn.split("/")[-1] for job_arn in job_arns]
139 |     robomaker_links = []
140 |     for simulation_id in simulation_ids:
141 |         robomaker_link = "https://%s.console.aws.amazon.com/robomaker/home?region=%s#simulationJobs/%s" % (aws_region,
142 |                                                                                                            aws_region,
143 |                                                                                                            simulation_id)
144 |         robomaker_links.append(robomaker_link)
145 | 
146 |     markdown_content = '> Click on the following links for visualization of simulation jobs on RoboMaker Console\n'
147 |     for i in range(len(robomaker_links)):
148 |         markdown_content += "- [Simulation %s](%s)  \n" % (i + 1, robomaker_links[i])
149 | 
150 |     markdown_content += "\nYou can click on Gazebo after you open the above link to start the simulator."
151 |     return markdown_content
152 | 
153 | 
154 | def create_s3_endpoint_manually(aws_region, default_vpc):
155 |     url = "https://%s.console.aws.amazon.com/vpc/home?region=%s#Endpoints:sort=vpcEndpointId" % (aws_region, aws_region)
156 |     text = ">VPC S3 endpoint creation failed. Please do the following to create an endpoint manually:\n"
157 |     text += "1. Go to [VPC console | Endpoints](%s)\n" % url
158 |     text += "2. Click on `Create Endpoint`. Select Service Name as `com.amazonaws.%s.s3`.\n" % (aws_region)
159 |     text += "3. Next, select your Default VPC: `%s` and click the checkbox against the main Route Table ID\n" % (
160 |     default_vpc)
161 |     text += "4. Select `Full Access` in policy and click on `Create Endpoint`\n"
162 |     text += "5. That should be it! Now wait for a few seconds before proceeding to the next cell."
163 |     return text
164 | 
165 | 
166 | def generate_help_for_administrator_policy(role):
167 |     role_name = role.split("/")[-1]
168 |     url = "https://console.aws.amazon.com/iam/home#/roles/%s" % role_name
169 |     text = "1. Go to IAM console to edit current SageMaker role: [%s](%s).\n" % (role_name, url)
170 |     text += "2. Next, go to the `Permissions tab` and click on `Attach policies`. \n"
171 |     text += "3. Check the box for `AdministratorAccess`\n"
172 |     text += "4. Click on `Attach policy` at the bottom.\n"
173 |     text += "5. You'll see message `Policy AdministratorAccess has been attached for the %s`. \n" % (role)
174 |     text += "6. Once this is complete, you are all set."
175 |     return text
176 | 
177 | def generate_help_for_experiment_manager_permissions(role):
178 |     role_name = role.split("/")[-1]
179 |     url = "https://console.aws.amazon.com/iam/home#/roles/%s" % role_name
180 |     text = ">It looks like your SageMaker role has insufficient premissions. Please do the following:\n"
181 |     text += "1. Go to IAM console to edit current SageMaker role: [%s](%s).\n" % (role_name, url)
182 |     text += "2. Click on policy starting with `AmazonSageMaker-ExecutionPolicy` and then edit policy.\n"
183 |     text += "3. Go to JSON tab, add the following JSON blob to the `Statement` list and save policy:\n"
184 |     text += """```json
185 |         {
186 |             "Effect": "Allow",
187 |             "Action": [
188 |                 "cloudformation:DescribeStacks",
189 |                 "cloudformation:ValidateTemplate",
190 |                 "cloudformation:CreateStack",
191 |                 "dynamodb:DescribeTable",
192 |                 "dynamodb:CreateTable",
193 |                 "dynamodb:DeleteTable",
194 |                 "dynamodb:PutItem",
195 |                 "dynamodb:UpdateItem",
196 |                 "dynamodb:DeleteItem",
197 |                 "dynamodb:Query",
198 |                 "dynamodb:BatchWriteItem",
199 |                 "iam:CreateRole",
200 |                 "iam:GetRole",
201 |                 "iam:PutRolePolicy",
202 |                 "iam:DeleteRolePolicy",
203 |                 "iam:DeleteRole",
204 |                 "iam:PassRole",
205 |                 "cloudwatch:PutDashboard",
206 |                 "firehose:ListDeliveryStreams",
207 |                 "firehose:DeleteDeliveryStream",
208 |                 "firehose:DescribeDeliveryStream",
209 |                 "firehose:CreateDeliveryStream",
210 |                 "athena:StartQueryExecution",
211 |                 "athena:GetQueryExecution",
212 |                 "glue:GetTable",
213 |                 "glue:DeleteTable",
214 |                 "glue:GetPartitions",
215 |                 "glue:UpdateTable",
216 |                 "glue:CreateTable",
217 |                 "glue:GetDatabase"
218 |             ],
219 |             "Resource": [
220 |                 "*"
221 |             ]
222 |         },```\n"""
223 |     text += "4. Now wait for a few minutes before executing this cell again!"
224 |     return text
225 | 
226 | 


--------------------------------------------------------------------------------
/sagemaker/source/common/misc.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License"). You
  4 | # may not use this file except in compliance with the License. A copy of
  5 | # the License is located at
  6 | #
  7 | #     http://aws.amazon.com/apache2.0/
  8 | #
  9 | # or in the "license" file accompanying this file. This file is
 10 | # distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
 11 | # ANY KIND, either express or implied. See the License for the specific
 12 | # language governing permissions and limitations under the License.
 13 | 
 14 | from __future__ import absolute_import
 15 | 
 16 | import base64
 17 | import contextlib
 18 | import os
 19 | import time
 20 | import shlex
 21 | import shutil
 22 | import subprocess
 23 | import sys
 24 | import tempfile
 25 | 
 26 | import boto3
 27 | import json
 28 | 
 29 |     
 30 | def wait_for_s3_object(s3_bucket, key, local_dir, local_prefix='', 
 31 |                        aws_account=None, aws_region=None, timeout=1200, limit=20,
 32 |                        fetch_only=None, training_job_name=None):
 33 |     """
 34 |     Keep polling s3 object until it is generated.
 35 |     Pulling down latest data to local directory with short key
 36 | 
 37 |     Arguments:
 38 |         s3_bucket (string): s3 bucket name
 39 |         key (string): key for s3 object
 40 |         local_dir (string): local directory path to save s3 object
 41 |         local_prefix (string): local prefix path append to the local directory
 42 |         aws_account (string): aws account of the s3 bucket
 43 |         aws_region (string): aws region where the repo is located
 44 |         timeout (int): how long to wait for the object to appear before giving up
 45 |         limit (int): maximum number of files to download
 46 |         fetch_only (lambda): a function to decide if this object should be fetched or not
 47 |         training_job_name (string): training job name to query job status
 48 | 
 49 |     Returns:
 50 |         A list of all downloaded files, as local filenames
 51 |     """
 52 |     session = boto3.Session()
 53 |     aws_account = aws_account or session.client("sts").get_caller_identity()['Account']
 54 |     aws_region = aws_region or session.region_name
 55 | 
 56 |     s3 = session.resource('s3')
 57 |     sagemaker = session.client('sagemaker')
 58 |     bucket = s3.Bucket(s3_bucket)
 59 |     objects = []
 60 | 
 61 |     print("Waiting for s3://%s/%s..." % (s3_bucket, key), end='', flush=True)
 62 |     start_time = time.time()
 63 |     cnt = 0
 64 |     while len(objects) == 0:
 65 |         objects = list(bucket.objects.filter(Prefix=key))
 66 |         if fetch_only:
 67 |             objects = list(filter(fetch_only, objects))
 68 |         if objects:
 69 |             continue
 70 |         print('.', end='', flush=True)
 71 |         time.sleep(5)
 72 |         cnt += 1
 73 |         if cnt % 80 == 0:
 74 |             print("")
 75 |         if time.time() > start_time + timeout:
 76 |             raise FileNotFoundError("S3 object s3://%s/%s never appeared after %d seconds" % (s3_bucket, key, timeout))
 77 |         if training_job_name:
 78 |             training_job_status = sagemaker.describe_training_job(TrainingJobName=training_job_name)['TrainingJobStatus']
 79 |             if training_job_status == 'Failed':
 80 |                 raise RuntimeError("Training job {} failed while waiting for S3 object s3://{}/{}"
 81 |                                    .format(training_job_name, s3_bucket, key))
 82 | 
 83 |     print('\n', end='', flush=True)
 84 | 
 85 |     if len(objects) > limit:
 86 |         print("Only downloading %d of %d files" % (limit, len(objects)))
 87 |         objects = objects[-limit:]
 88 | 
 89 |     fetched_files = []
 90 |     for obj in objects:
 91 |         print("Downloading %s" % obj.key)
 92 |         local_path = os.path.join(local_dir, local_prefix, obj.key.split('/')[-1])
 93 |         obj.Object().download_file(local_path)
 94 |         fetched_files.append(local_path)
 95 | 
 96 |     return fetched_files
 97 | 
 98 | 
 99 | def get_execution_role(role_name="sagemaker", aws_account=None, aws_region=None):
100 |     """
101 |     Create sagemaker execution role to perform sagemaker task
102 | 
103 |     Args:
104 |         role_name (string): name of the role to be created
105 |         aws_account (string): aws account of the ECR repo
106 |         aws_region (string): aws region where the repo is located
107 |     """
108 |     session = boto3.Session()
109 |     aws_account = aws_account or session.client("sts").get_caller_identity()['Account']
110 |     aws_region = aws_region or session.region_name
111 | 
112 |     assume_role_policy_document = json.dumps({
113 |         "Version": "2012-10-17",
114 |         "Statement": [
115 |             {
116 |                 "Effect": "Allow",
117 |                 "Principal": {
118 |                     "Service": ["sagemaker.amazonaws.com", "robomaker.amazonaws.com"]
119 |                 },
120 |                 "Action": "sts:AssumeRole"
121 |             }
122 |         ]
123 |     })
124 | 
125 |     client = session.client('iam')
126 |     try:
127 |         client.get_role(RoleName=role_name)
128 |     except client.exceptions.NoSuchEntityException:
129 |         client.create_role(
130 |             RoleName=role_name,
131 |             AssumeRolePolicyDocument=str(assume_role_policy_document)
132 |         )
133 | 
134 |         print("Created new sagemaker execution role: %s" % role_name)
135 | 
136 |     client.attach_role_policy(
137 |         PolicyArn='arn:aws:iam::aws:policy/AmazonSageMakerFullAccess',
138 |         RoleName=role_name
139 |     )
140 | 
141 |     return client.get_role(RoleName=role_name)['Role']['Arn']
142 | 
143 | 
144 | 


--------------------------------------------------------------------------------
/sagemaker/source/common/sagemaker_rl/README.md:
--------------------------------------------------------------------------------
1 | These shared RL classes need to be moved into the sagemaker-containers package.
2 | 
3 | 


--------------------------------------------------------------------------------
/sagemaker/source/common/sagemaker_rl/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/sagemaker-rl-procgen-ray/ba82c1f0bac2c985930bda7875f1950723f6d7ce/sagemaker/source/common/sagemaker_rl/__init__.py


--------------------------------------------------------------------------------
/sagemaker/source/common/sagemaker_rl/coach_launcher.py:
--------------------------------------------------------------------------------
  1 | from rl_coach.agents.clipped_ppo_agent import ClippedPPOAgentParameters
  2 | from rl_coach.agents.policy_gradients_agent import PolicyGradientsAgentParameters
  3 | from rl_coach.graph_managers.basic_rl_graph_manager import BasicRLGraphManager
  4 | from rl_coach.graph_managers.graph_manager import ScheduleParameters
  5 | from rl_coach.base_parameters import VisualizationParameters, TaskParameters, Frameworks
  6 | from rl_coach.utils import short_dynamic_import
  7 | from rl_coach.core_types import SelectedPhaseOnlyDumpFilter, MaxDumpFilter, RunPhase
  8 | import rl_coach.core_types 
  9 | from rl_coach import logger
 10 | from rl_coach.logger import screen
 11 | import argparse
 12 | import copy
 13 | import logging
 14 | import os
 15 | import sys
 16 | import shutil
 17 | import glob
 18 | import re
 19 | 
 20 | from .configuration_list import ConfigurationList
 21 | from rl_coach.coach import CoachLauncher
 22 | 
 23 | screen.set_use_colors(False)  # Simple text logging so it looks good in CloudWatch
 24 | 
 25 | class CoachConfigurationList(ConfigurationList):
 26 |     """Helper Object for converting CLI arguments (or SageMaker hyperparameters) 
 27 |     into Coach configuration.
 28 |     """
 29 | 
 30 |     # Being security-paranoid and not instantiating any arbitrary string the customer passes in
 31 |     ALLOWED_TYPES = {
 32 |         'Frames': rl_coach.core_types.Frames,
 33 |         'EnvironmentSteps': rl_coach.core_types.EnvironmentSteps,
 34 |         'EnvironmentEpisodes': rl_coach.core_types.EnvironmentEpisodes,
 35 |         'TrainingSteps': rl_coach.core_types.TrainingSteps,
 36 |         'Time': rl_coach.core_types.Time,
 37 |     }
 38 | 
 39 | 
 40 | 
 41 | class SageMakerCoachPresetLauncher(CoachLauncher):
 42 |     """Base class for training RL tasks using RL-Coach.
 43 |     Customers subclass this to define specific kinds of workloads, overriding these methods as needed.
 44 |     """
 45 | 
 46 |     def __init__(self):
 47 |         super().__init__()
 48 |         self.hyperparams = None
 49 | 
 50 | 
 51 |     def get_config_args(self, parser: argparse.ArgumentParser) -> argparse.Namespace:
 52 |         """Overrides the default CLI parsing.
 53 |         Sets the configuration parameters for what a SageMaker run should do.
 54 |         Note, this does not support the "play" mode.
 55 |         """
 56 |         # first, convert the parser to a Namespace object with all default values.
 57 |         empty_arg_list = []
 58 |         args, _ = parser.parse_known_args(args=empty_arg_list)
 59 |         parser = self.sagemaker_argparser()
 60 |         sage_args, unknown = parser.parse_known_args()
 61 |         
 62 |         # Now fill in the args that we care about.
 63 |         sagemaker_job_name = os.environ.get("sagemaker_job_name", "sagemaker-experiment")
 64 |         args.experiment_name = logger.get_experiment_name(sagemaker_job_name)
 65 |         
 66 |         # Override experiment_path used for outputs
 67 |         args.experiment_path = '/opt/ml/output/intermediate'
 68 |         rl_coach.logger.experiment_path = '/opt/ml/output/intermediate' # for gifs
 69 | 
 70 |         args.checkpoint_save_dir = '/opt/ml/output/data/checkpoint'
 71 |         args.checkpoint_save_secs = 10 # should avoid hardcoding
 72 |         # onnx for deployment for mxnet (not tensorflow)
 73 |         save_model = (sage_args.save_model == 1)
 74 |         backend = os.getenv('COACH_BACKEND', 'tensorflow')
 75 |         if save_model and backend == "mxnet":
 76 |             args.export_onnx_graph = True
 77 | 
 78 |         args.no_summary = True
 79 | 
 80 |         args.num_workers = sage_args.num_workers
 81 |         args.framework = Frameworks[backend]
 82 |         args.preset = sage_args.RLCOACH_PRESET
 83 |         # args.apply_stop_condition = True # uncomment for old coach behaviour
 84 | 
 85 |         self.hyperparameters = CoachConfigurationList()
 86 |         if len(unknown) % 2 == 1:
 87 |             raise ValueError("Odd number of command-line arguments specified. Key without value.")
 88 | 
 89 |         for i in range(0, len(unknown), 2):
 90 |             name = unknown[i]
 91 |             if name.startswith("--"):
 92 |                 name = name[2:]
 93 |             else:
 94 |                 raise ValueError("Unknown command-line argument %s" % name)
 95 |             val = unknown[i+1]
 96 |             self.map_hyperparameter(name, val)
 97 | 
 98 |         return args
 99 | 
100 |     def map_hyperparameter(self, name, value):
101 |         """This is a good method to override where customers can specify custom shortcuts
102 |         for hyperparameters.  Default takes everything starting with "rl." and sends it
103 |         straight to the graph manager.
104 |         """
105 |         if name.startswith("rl."):
106 |             self.apply_hyperparameter(name, value)
107 |         else:
108 |             raise ValueError("Unknown hyperparameter %s" % name)
109 | 
110 | 
111 |     def apply_hyperparameter(self, name, value):
112 |         """Save this hyperparameter to be applied to the graph_manager object when
113 |         it's ready.
114 |         """
115 |         print("Applying RL hyperparameter %s=%s" % (name,value))
116 |         self.hyperparameters.store(name, value)
117 | 
118 | 
119 |     def default_preset_name(self):
120 |         """
121 |         Sub-classes will typically return a single hard-coded string.
122 |         """
123 |         try:
124 |             #TODO: remove this after converting all samples.
125 |             default_preset = self.DEFAULT_PRESET
126 |             screen.warning("Deprecated configuration of default preset.  Please implement default_preset_name()")
127 |             return default_preset
128 |         except:
129 |             pass
130 |         raise NotImplementedError("Sub-classes must specify the name of the default preset "+
131 |                                   "for this RL problem.  This will be the name of a python "+
132 |                                   "file (without .py) that defines a graph_manager variable")
133 | 
134 |     def sagemaker_argparser(self) -> argparse.ArgumentParser:
135 |         """
136 |         Expose only the CLI arguments that make sense in the SageMaker context.
137 |         """
138 |         parser = argparse.ArgumentParser()
139 | 
140 |         # Arguably this would be cleaner if we copied the config from the base class argparser.
141 |         parser.add_argument('-n', '--num_workers',
142 |                             help="(int) Number of workers for multi-process based agents, e.g. A3C",
143 |                             default=1,
144 |                             type=int)
145 |         parser.add_argument('-p', '--RLCOACH_PRESET',
146 |                             help="(string) Name of the file with the RLCoach preset",
147 |                             default=self.default_preset_name(),
148 |                             type=str)
149 |         parser.add_argument('--save_model',
150 |                             help="(int) Flag to save model artifact after training finish",
151 |                             default=0,
152 |                             type=int)
153 |         return parser
154 | 
155 |     def path_of_main_launcher(self):
156 |         """
157 |         A bit of python magic to find the path of the file that launched the current process.
158 |         """
159 |         main_mod = sys.modules['__main__']
160 |         try:
161 |             launcher_file = os.path.abspath(sys.modules['__main__'].__file__)
162 |             return os.path.dirname(launcher_file)
163 |         except AttributeError:
164 |             # If __main__.__file__ is missing, then we're probably in an interactive python shell
165 |             return os.getcwd()
166 | 
167 |     def preset_from_name(self, preset_name):
168 |         preset_path = self.path_of_main_launcher()
169 |         print("Loading preset %s from %s" % (preset_name, preset_path))
170 |         preset_path = os.path.join(self.path_of_main_launcher(),preset_name) + '.py:graph_manager'
171 |         graph_manager = short_dynamic_import(preset_path, ignore_module_case=True)
172 |         return graph_manager
173 | 
174 |     def get_graph_manager_from_args(self, args):
175 |         # First get the graph manager for the customer-specified (or default) preset
176 |         graph_manager = self.preset_from_name(args.preset)
177 |         # Now override whatever config is specified in hyperparameters.
178 |         self.hyperparameters.apply_subset(graph_manager, "rl.")
179 |         # Set framework
180 |         # Note: Some graph managers (e.g. HAC preset) create multiple agents and the attribute is called agents_params
181 |         if hasattr(graph_manager, 'agent_params'):
182 |             for network_parameters in graph_manager.agent_params.network_wrappers.values():
183 |                 network_parameters.framework = args.framework
184 |         elif hasattr(graph_manager, 'agents_params'):
185 |             for ap in graph_manager.agents_params:
186 |                 for network_parameters in ap.network_wrappers.values():
187 |                     network_parameters.framework = args.framework
188 |         return graph_manager
189 | 
190 |     def _save_tf_model(self):
191 |         ckpt_dir = '/opt/ml/output/data/checkpoint'
192 |         model_dir = '/opt/ml/model'
193 | 
194 |         # Re-Initialize from the checkpoint so that you will have the latest models up.
195 |         tf.train.init_from_checkpoint(ckpt_dir,
196 |                                       {'main_level/agent/online/network_0/': 'main_level/agent/online/network_0'})
197 |         tf.train.init_from_checkpoint(ckpt_dir,
198 |                                       {'main_level/agent/online/network_1/': 'main_level/agent/online/network_1'})
199 | 
200 |         # Create a new session with a new tf graph.
201 |         sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True))
202 |         sess.run(tf.global_variables_initializer())  # initialize the checkpoint.
203 | 
204 |         # This is the node that will accept the input.
205 |         input_nodes = tf.get_default_graph().get_tensor_by_name('main_level/agent/main/online/' + \
206 |                                                                 'network_0/observation/observation:0')
207 |         # This is the node that will produce the output.
208 |         output_nodes = tf.get_default_graph().get_operation_by_name('main_level/agent/main/online/' + \
209 |                                                                     'network_1/ppo_head_0/policy')
210 |         # Save the model as a servable model.
211 |         tf.saved_model.simple_save(session=sess,
212 |                                    export_dir='model',
213 |                                    inputs={"observation": input_nodes},
214 |                                    outputs={"policy": output_nodes.outputs[0]})
215 |         # Move to the appropriate folder. Don't mind the directory, this just works.
216 |         # rl-cart-pole is the name of the model. Remember it.
217 |         shutil.move('model/', model_dir + '/model/tf-model/00000001/')
218 |         # EASE will pick it up and upload to the right path.
219 |         print("Success")
220 | 
221 |     def _save_onnx_model(self):
222 |         from .onnx_utils import fix_onnx_model
223 |         ckpt_dir = '/opt/ml/output/data/checkpoint'
224 |         model_dir = '/opt/ml/model'
225 |         # find latest onnx file
226 |         # currently done by name, expected to be changed in future release of coach.
227 |         glob_pattern = os.path.join(ckpt_dir, '*.onnx')
228 |         onnx_files = [file for file in glob.iglob(glob_pattern, recursive=True)]
229 |         if len(onnx_files) > 0:
230 |             extract_step = lambda string: int(re.search('/(\d*)_Step.*', string, re.IGNORECASE).group(1))
231 |             onnx_files.sort(key=extract_step)
232 |             latest_onnx_file = onnx_files[-1]
233 |             # move to model directory
234 |             filepath_from = os.path.abspath(latest_onnx_file)
235 |             filepath_to = os.path.join(model_dir, "model.onnx")
236 |             shutil.move(filepath_from, filepath_to)
237 |             fix_onnx_model(filepath_to)
238 |         else:
239 |             screen.warning("No ONNX files found in {}".format(ckpt_dir))
240 |         
241 |     @classmethod
242 |     def train_main(cls):
243 |         """Entrypoint for training.  
244 |         Parses command-line arguments and starts training.
245 |         """
246 |         trainer = cls()
247 |         trainer.launch()
248 | 
249 |         # Create model artifact for model.tar.gz
250 |         parser = trainer.sagemaker_argparser()
251 |         sage_args, unknown = parser.parse_known_args()
252 |         if sage_args.save_model == 1:
253 |             backend = os.getenv('COACH_BACKEND', 'tensorflow')
254 |             if backend == 'tensorflow':
255 |                 trainer._save_tf_model()
256 |             if backend == 'mxnet':
257 |                 trainer._save_onnx_model()
258 | 
259 | 
260 | class SageMakerCoachLauncher(SageMakerCoachPresetLauncher):
261 |     """
262 |     Older version of the launcher that doesn't use preset, but instead effectively has a single preset built in.
263 |     """
264 | 
265 |     def __init__(self):
266 |         super().__init__()
267 |         screen.warning("DEPRECATION WARNING: Please switch to SageMakerCoachPresetLauncher")
268 |         #TODO: Remove this whole class when nobody's using it any more.
269 | 
270 |     def define_environment(self):
271 |         return NotImplementedEror("Sub-class must define environment e.g. GymVectorEnvironment(level='your_module:YourClass')")
272 | 
273 |     def get_graph_manager_from_args(self, args):
274 |         """Returns the GraphManager object for coach to use to train by calling improve()
275 |         """
276 |         # NOTE: TaskParameters are not configurable at this time.
277 | 
278 |         # Visualization
279 |         vis_params = VisualizationParameters()
280 |         self.config_visualization(vis_params)
281 |         self.hyperparameters.apply_subset(vis_params, "vis_params.")
282 | 
283 |         # Schedule
284 |         schedule_params = ScheduleParameters()
285 |         self.config_schedule(schedule_params)
286 |         self.hyperparameters.apply_subset(schedule_params, "schedule_params.")
287 | 
288 |         # Agent
289 |         agent_params = self.define_agent()
290 |         self.hyperparameters.apply_subset(agent_params, "agent_params.")
291 | 
292 |         # Environment
293 |         env_params = self.define_environment()
294 |         self.hyperparameters.apply_subset(env_params, "env_params.")
295 | 
296 |         graph_manager = BasicRLGraphManager(
297 |             agent_params=agent_params,
298 |             env_params=env_params,
299 |             schedule_params=schedule_params,
300 |             vis_params=vis_params,
301 |         )
302 | 
303 |         return graph_manager
304 | 
305 |     def config_schedule(self, schedule_params):
306 |         pass
307 | 
308 |     def define_agent(self):
309 |         raise NotImplementedError("Subclass must create define_agent() method which returns an AgentParameters object. e.g.\n" \
310 |             "   return rl_coach.agents.dqn_agent.DQNAgentParameters()");
311 | 
312 |     def config_visualization(self, vis_params):
313 |         vis_params.dump_gifs = True
314 |         vis_params.video_dump_methods = [SelectedPhaseOnlyDumpFilter(RunPhase.TEST), MaxDumpFilter()]
315 |         vis_params.print_networks_summary = True
316 |         return vis_params
317 | 


--------------------------------------------------------------------------------
/sagemaker/source/common/sagemaker_rl/configuration_list.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | 
  3 | 
  4 | class ConfigurationList(object):
  5 |     """Helper Object for converting CLI arguments (or SageMaker hyperparameters)
  6 |     into Coach configuration.
  7 |     """
  8 | 
  9 |     def __init__(self):
 10 |         """Args:
 11 |             - arg_list [list]: list of arguments on the command-line like [key1, value1, key2, value2, ...]
 12 |             - prefix [str]: Prefix for every key that must be present, e.g. "--" for common command-line args
 13 |         """
 14 |         self.hp_dict = {}
 15 | 
 16 |     def store(self, name, value):
 17 |         """Store a key/value hyperparameter combination
 18 |         """
 19 |         self.hp_dict[name] = value
 20 | 
 21 |     def apply_subset(self, config_object, prefix):
 22 |         """Merges configured hyperparameters in the params dict into the config_object.
 23 |         Recognized arguments are consumed out of self.hp_dict
 24 | 
 25 |         Args:
 26 |             config_object (obj):  will be something like a Coach TaskParameters object, where we're setting properties
 27 |             params (dict): comes from the command line (and thus customer-specified hyperparameters)
 28 |             prefix (str): string prefix for which items in params to use.  (e.g. "rl.task_params.")
 29 |         """
 30 |         # Materialize a copy of the dict as tuples so we can modify the original dict as we go.
 31 |         for key, val in list(self.hp_dict.items()):
 32 |             if key.startswith(prefix):
 33 |                 logging.debug("Configuring %s with %s=%s" % (prefix, key, val))
 34 |                 subkey = key[ len(prefix): ]
 35 |                 msg = "%s%s=%s" % (prefix, subkey, val)
 36 |                 try:
 37 |                     self._set_rl_property_value(config_object, subkey, val, prefix)
 38 |                 except:
 39 |                     print("Failure while applying hyperparameter %s" % msg)
 40 |                     raise
 41 |                 del self.hp_dict[key]
 42 | 
 43 |     def _set_rl_property_value(self, obj, key, val, path=""):
 44 |         """Sets a property on obj to val, or to a sub-object within obj if key looks like "foo.bar"
 45 |         """
 46 |         if key.find(".") >= 0:
 47 |             top_key, sub_keys = key_list = key.split(".",1)
 48 |             if top_key.startswith("__"):
 49 |                 raise ValueError("Attempting to set unsafe property name %s" % top_key)
 50 |             if isinstance(obj,dict):
 51 |                 sub_obj = obj[top_key]
 52 |             else:
 53 |                 sub_obj = obj.__dict__[top_key]
 54 |             # Recurse
 55 |             return self._set_rl_property_value(sub_obj, sub_keys, val, "%s.%s" % (path,top_key) )
 56 |         else:
 57 |             key, val = self._parse_type(key,val)
 58 |             if key.startswith("__"):
 59 |                 raise ValueError("Attempting to set unsafe property name %s" % key)
 60 |             if isinstance(obj, dict):
 61 |                 obj[key] = val
 62 |             else:
 63 |                 obj.__dict__[key] = val
 64 | 
 65 |     def _autotype(self, val):
 66 |         """Converts string to an int or float as possible.
 67 |         """
 68 |         if type(val) == dict:
 69 |             return val
 70 |         if type(val) == list:
 71 |             return val
 72 |         if type(val) == bool:
 73 |             return val
 74 |         try:
 75 |             return int(val)
 76 |         except ValueError:
 77 |             pass
 78 |         try:
 79 |             return float(val)
 80 |         except ValueError:
 81 |             pass
 82 |         return val
 83 | 
 84 |     # Being security-paranoid and not instantiating any arbitrary string the customer passes in
 85 |     ALLOWED_TYPES = {}
 86 | 
 87 |     def _parse_type(self, key, val):
 88 |         """Converts the val to an appropriately typed Python object.
 89 |         Automatically detects ints and floats when possible.
 90 |         If the key takes the form "foo:bar" then it looks in ALLOWED_TYPES
 91 |         for an entry of bar, and instantiates one of those objects, passing
 92 |         val to the constructor.  So if key="foo:EnvironmentSteps" then
 93 |         """
 94 |         val = self._autotype(val)
 95 |         if key.find(":") > 0:
 96 |             key, obj_type = key.split(":", 1)
 97 |             cls = self.ALLOWED_TYPES.get(obj_type)
 98 |             if not cls:
 99 |                 raise ValueError("Unrecognized object type %s.  Allowed values are %s" % (obj_type, self.ALLOWED_TYPES.keys()))
100 |             val = cls(val)
101 |         return key, val
102 | 


--------------------------------------------------------------------------------
/sagemaker/source/common/sagemaker_rl/docker_utils.py:
--------------------------------------------------------------------------------
 1 | import socket
 2 | import time
 3 | 
 4 | def get_ip_from_host(timeout=100, host_name=None):
 5 |     counter = 0
 6 |     ip_address = None
 7 | 
 8 |     if not host_name:
 9 |         host_name = socket.gethostname()
10 |         print("Fetching IP for hostname: %s" % host_name)
11 |     while counter < timeout and not ip_address:
12 |         try:
13 |             ip_address = socket.gethostbyname(host_name)
14 |             break
15 |         except Exception as e:
16 |             counter += 1
17 |             time.sleep(1)
18 | 
19 |     if counter == timeout and not ip_address:
20 |         error_string = "Platform Error: Could not retrieve IP address \
21 |         for %s in past %s seconds" % (host_name, timeout)
22 |         raise RuntimeError(error_string)
23 | 
24 |     return ip_address


--------------------------------------------------------------------------------
/sagemaker/source/common/sagemaker_rl/onnx_utils.py:
--------------------------------------------------------------------------------
 1 | """
 2 | ONNX Utils to support multiple output heads in agent networks, until future releases of MXNet support this.
 3 | """
 4 | import onnx
 5 | from onnx import helper, checker, TensorProto
 6 | 
 7 | 
 8 | def get_correct_outputs(model):
 9 |     """
10 |     Collects the relevent outputs of the model, after identifying the type of RL Agent.
11 |     Currently supports continuous PPO, discrete PPO and DQN agents.
12 |     """
13 |     graph_name = model.graph.output[0].name
14 |     if "_continuousppohead" in graph_name:
15 |         print("ONNX correction applied to continuous PPO agent.")
16 |         return ppo_continuous_outputs(model)
17 |     elif "_discreteppohead" in graph_name:
18 |         print("ONNX correction applied to discrete PPO agent.")
19 |         return ppo_discrete_outputs(model)
20 |     elif "_qhead" in graph_name:
21 |         print("ONNX correction not required for DQN agent.")
22 |         return model.graph.output
23 |     else:
24 |         raise Exception("Can't determine the RL Agent used from the ONNX graph provided.")
25 | 
26 |         
27 | def make_output(node_name, shape):
28 |     """
29 |     Given a node name and output shape, will construct the correct Protobuf object.
30 |     """
31 |     return helper.make_tensor_value_info(
32 |         name=node_name,
33 |         elem_type=TensorProto.FLOAT,
34 |         shape=shape
35 |     )
36 | 
37 | 
38 | def ppo_continuous_outputs(model):
39 |     """
40 |     Collects the output nodes for continuous PPO.
41 |     """
42 |     # determine number of actions 
43 |     log_std_node_name = "generalmodel0_singlemodel1_scaledgradhead0_continuousppohead0_log_std"
44 |     log_std_node = [i for i in model.graph.input if i.name == log_std_node_name][0]
45 |     num_actions = log_std_node.type.tensor_type.shape.dim[0].dim_value
46 |     # identify output nodes
47 |     value_head_name = "generalmodel0_singlemodel0_scaledgradhead0_vhead0_squeeze0"
48 |     value_head = make_output(value_head_name, shape=(1,))
49 |     policy_head_mean_name = "generalmodel0_singlemodel1_scaledgradhead0_continuousppohead0_dense0_fwd"
50 |     policy_head_mean = make_output(policy_head_mean_name, shape=(num_actions,))
51 |     policy_head_std_name = "generalmodel0_singlemodel1_scaledgradhead0_continuousppohead0_broadcast_mul0"
52 |     policy_head_std = make_output(policy_head_std_name, shape=(num_actions,))
53 |     # collect outputs
54 |     output_nodes = [value_head, policy_head_mean, policy_head_std]
55 |     return output_nodes
56 | 
57 | 
58 | def ppo_discrete_outputs(model):
59 |     """
60 |     Collects the output nodes for discrete PPO.
61 |     """
62 |     # determine number of actions 
63 |     bias_node_name = "generalmodel0_singlemodel1_scaledgradhead0_discreteppohead0_dense0_bias"
64 |     bias_node = [i for i in model.graph.input if i.name == bias_node_name][0]
65 |     num_actions = bias_node.type.tensor_type.shape.dim[0].dim_value
66 |     # identify output nodes
67 |     value_head_name = "generalmodel0_singlemodel0_scaledgradhead0_vhead0_squeeze0"
68 |     value_head = make_output(value_head_name, shape=(1,))
69 |     policy_head_name = "generalmodel0_singlemodel1_scaledgradhead0_discreteppohead0_softmax0"
70 |     policy_head = make_output(policy_head_name, shape=(num_actions,))
71 |     # collect outputs
72 |     output_nodes = [value_head, policy_head]
73 |     return output_nodes
74 | 
75 | 
76 | def save_model(model, output_nodes, filepath):
77 |     """
78 |     Given an in memory model, will save to disk at given filepath.
79 |     """
80 |     new_graph = helper.make_graph(nodes=model.graph.node,
81 |                                   name='new_graph',
82 |                                   inputs=model.graph.input,
83 |                                   outputs=output_nodes,
84 |                                   initializer=model.graph.initializer)
85 |     checker.check_graph(new_graph)
86 |     new_model = helper.make_model(new_graph)
87 |     with open(filepath, "wb") as file_handle:
88 |         serialized = new_model.SerializeToString()
89 |         file_handle.write(serialized)
90 |  
91 | 
92 | def fix_onnx_model(filepath):
93 |     """
94 |     Applies an inplace fix to ONNX file from Coach. 
95 |     """
96 |     model = onnx.load_model(filepath)
97 |     output_nodes = get_correct_outputs(model)
98 |     save_model(model, output_nodes, filepath)
99 | 


--------------------------------------------------------------------------------
/sagemaker/source/common/sagemaker_rl/sage_cluster_communicator.py:
--------------------------------------------------------------------------------
  1 | import boto3
  2 | import os
  3 | import io
  4 | import json
  5 | import time
  6 | 
  7 | 
  8 | class SageClusterCommunicator():
  9 |     def __init__(self):
 10 |         bucket = os.environ.get("SM_HP_S3_BUCKET", None)
 11 |         prefix = os.environ.get("SM_HP_S3_PREFIX", None)
 12 |         aws_region = os.environ.get("SM_HP_AWS_REGION", None)
 13 |         self.aws_region = boto3.Session().region_name if aws_region is None else aws_region
 14 |         if bucket is None or prefix is None:
 15 |             bucket, prefix = self._find_s3_output_path()
 16 |         self.s3_bucket = bucket
 17 |         self.s3_prefix = prefix + "/dist-ray"
 18 |         self.ip_key = "MASTER_IP.json"
 19 |         self.done_file_key = "CONFIG_DONE"
 20 | 
 21 |     def get_client(self):
 22 |         session = boto3.session.Session()
 23 |         return session.client('s3', region_name=self.aws_region)
 24 | 
 25 |     def _get_s3_key(self, key):
 26 |         return os.path.normpath(self.s3_prefix + "/config/" + key)
 27 | 
 28 |     def _required_environment_param(self, parameter_name):
 29 |         SM_TRAINING_ENV = json.loads(os.environ.get("SM_TRAINING_ENV"))
 30 |         value = SM_TRAINING_ENV.get(parameter_name, None)
 31 |         if not value:
 32 |             raise ValueError("Missing enrironment parameter '%s'" % parameter_name)
 33 |         return value
 34 | 
 35 |     def _find_s3_output_path(self):
 36 |         """Looks in SageMaker hyperparameters for the S3 output path.
 37 |         Uses SM module directory to extract the output path.
 38 |         Returns:
 39 |             tuple (bucket, prefix)
 40 |         """
 41 |         module_dir_s3_path = self._required_environment_param("module_dir")
 42 |         if not module_dir_s3_path.startswith('s3://'):
 43 |             raise ValueError('Unexpected format for module_dir_s3_path.  Expected "s3://...')
 44 |         bucket_prefix = module_dir_s3_path.replace("s3://", "")
 45 |         bucket, key = bucket_prefix.split('/', 1)
 46 |         prefix = "/".join(key.split("/")[:-2])
 47 |         if prefix == "":
 48 |             # {bucket}/{job_name}/source/sourcedir.tar.gz structure not present
 49 |             prefix = self._required_environment_param("job_name")
 50 |         return (bucket, prefix)
 51 | 
 52 |     def create_s3_signal(self, signal):
 53 |         s3_client = self.get_client()
 54 |         s3_client.upload_fileobj(io.BytesIO(b''), self.s3_bucket, self._get_s3_key(signal))
 55 | 
 56 |     def wait_for_signals(self, signals, timeout=600, sleep_time=5):
 57 |         if len(signals) == 0:
 58 |             return
 59 |         s3_client = self.get_client()
 60 |         time_elapsed = 0
 61 |         while True:
 62 |             keys_found = 0
 63 |             for signal in signals:
 64 |                 response = s3_client.list_objects(Bucket=self.s3_bucket, Prefix=self._get_s3_key(signal))
 65 |                 if "Contents" in response:
 66 |                     keys_found += 1
 67 |             if keys_found != len(signals):
 68 |                 time.sleep(sleep_time)
 69 |                 time_elapsed += sleep_time
 70 |                 if time_elapsed >= timeout:
 71 |                     raise RuntimeError(
 72 |                         "Could not find all the signals: %s for last %s seconds" % (signals, time_elapsed))
 73 |             else:
 74 |                 print("Received all signal[s]: %s" % signals)
 75 |                 return
 76 | 
 77 |     def write_host_config(self, ip, host_name):
 78 |         s3_client = self.get_client()
 79 |         data = {"IP": ip, "HOST_NAME": host_name}
 80 |         json_blob = json.dumps(data)
 81 |         file_handle = io.BytesIO(json_blob.encode())
 82 |         file_handle_done = io.BytesIO(b'done')
 83 |         s3_client.upload_fileobj(file_handle, self.s3_bucket, self._get_s3_key(self.ip_key))
 84 |         s3_client.upload_fileobj(file_handle_done, self.s3_bucket, self._get_s3_key(self.done_file_key))
 85 | 
 86 |     def get_master_config(self):
 87 |         s3_client = self.get_client()
 88 |         self._wait_for_ip_upload()
 89 |         # Wait for new IP address if using spot instace
 90 |         time.sleep(20)
 91 |         try:
 92 |             s3_client.download_file(self.s3_bucket, self._get_s3_key(self.ip_key), 'ip.json')
 93 |             with open("ip.json") as f:
 94 |                 json_obj = json.load(f)
 95 |                 ip = json_obj["IP"]
 96 |                 host_name = json_obj["HOST_NAME"]
 97 |             return ip, host_name
 98 |         except Exception as e:
 99 |             raise RuntimeError("Cannot fetch IP of redis server running in SageMaker:", e)
100 | 
101 |     def _wait_for_ip_upload(self, timeout=1200):
102 |         s3_client = self.get_client()
103 |         time_elapsed = 0
104 |         while True:
105 |             response = s3_client.list_objects(Bucket=self.s3_bucket, Prefix=self._get_s3_key(self.done_file_key))
106 |             if "Contents" not in response:
107 |                 time.sleep(1)
108 |                 time_elapsed += 1
109 |                 if time_elapsed % 5 == 0:
110 |                     print("Waiting for SageMaker Redis server IP... Time elapsed: %s seconds" % time_elapsed)
111 |                 if time_elapsed >= timeout:
112 |                     raise RuntimeError("Cannot retrieve IP of redis server running in SageMaker")
113 |             else:
114 |                 return
115 | 
116 |     def download_file(self, s3_key, local_path):
117 |         s3_client = self.get_client()
118 |         try:
119 |             s3_client.download_file(self.s3_bucket, s3_key, local_path)
120 |             return True
121 |         except Exception as e:
122 |             return False
123 | 
124 |     def upload_file(self, s3_key, local_path):
125 |         s3_client = self.get_client()
126 |         try:
127 |             s3_client.upload_file(Filename=local_path,
128 |                                   Bucket=self.s3_bucket,
129 |                                   Key=s3_key)
130 |             return True
131 |         except Exception as e:
132 |             return False
133 | 


--------------------------------------------------------------------------------
/sagemaker/source/common/sagemaker_rl/stable_baselines_launcher.py:
--------------------------------------------------------------------------------
  1 | import gym
  2 | import roboschool
  3 | import os
  4 | 
  5 | from gym.wrappers.monitoring.video_recorder import VideoRecorder
  6 | from stable_baselines.ppo1 import PPO1
  7 | from stable_baselines.common import set_global_seeds
  8 | from stable_baselines.bench import Monitor
  9 | from stable_baselines.common import tf_util
 10 | from stable_baselines.common.policies import MlpPolicy
 11 | from mpi4py import MPI
 12 | 
 13 | 
 14 | class RewScale(gym.RewardWrapper):
 15 |     def __init__(self, env, scale):
 16 |         gym.RewardWrapper.__init__(self, env)
 17 |         self.scale = scale
 18 | 
 19 |     def reward(self, _reward):
 20 |         return _reward * self.scale
 21 | 
 22 | 
 23 | class SagemakerStableBaselinesLauncher():
 24 |     """
 25 |     Sagemaker's Stable Baselines Launcher.
 26 |     """
 27 | 
 28 |     def __init__(self, env, output_path, model, num_timesteps):
 29 |         self._env = env
 30 |         self._output_path = output_path
 31 |         self._model = model
 32 |         self._num_timesteps = num_timesteps
 33 | 
 34 |     def _train(self):
 35 |         """Train the RL model
 36 |         """
 37 |         self._model.learn(total_timesteps=self._num_timesteps)
 38 | 
 39 |     def _predict(self, model, video_path):
 40 |         """Run predictions on trained RL model.
 41 |         """
 42 | 
 43 |         vr = VideoRecorder(env=self._env, path="{}/rl_out.mp4".format(video_path, str(MPI.COMM_WORLD.Get_rank())),
 44 |                            enabled=True)
 45 |         obs = self._env.reset()
 46 |         for i in range(1000):
 47 |             action, _states = model.predict(obs)
 48 |             obs, rewards, dones, info = self._env.step(action)
 49 |             if dones:
 50 |                 obs = self._env.reset()
 51 |             self._env.render(mode='rgb_array')
 52 |             vr.capture_frame()
 53 |         vr.close()
 54 |         self._env.close()
 55 | 
 56 |     def run(self):
 57 | 
 58 |         self._train()
 59 | 
 60 |         if MPI.COMM_WORLD.Get_rank() == 0:
 61 |             self._predict(self._model, self._output_path)
 62 | 
 63 | 
 64 | class SagemakerStableBaselinesPPO1Launcher(SagemakerStableBaselinesLauncher):
 65 |     """
 66 |     Sagemaker's Stable Baselines PPO1 Launcher.
 67 |     """
 68 | 
 69 |     def __init__(self, env, output_path, timesteps_per_actorbatch,
 70 |                  clip_param, entcoeff, optim_epochs,
 71 |                  optim_stepsize, optim_batchsize,
 72 |                  gamma, lam, schedule,
 73 |                  verbose, num_timesteps):
 74 |         print(
 75 |             "Initializing PPO with output_path: {} and Hyper Params [timesteps_per_actorbatch: {},clip_param: {}, "
 76 |             "entcoeff: {}, optim_epochs: {}, optim_stepsize: {}, optim_batchsize: {}, gamma: {}, lam: {}, "
 77 |             "schedule: {}, verbose: {}, num_timesteps: {}]".format(output_path, timesteps_per_actorbatch,
 78 |                                                                    clip_param, entcoeff, optim_epochs,
 79 |                                                                    optim_stepsize, optim_batchsize,
 80 |                                                                    gamma, lam, schedule,
 81 |                                                                    verbose, num_timesteps))
 82 |         super().__init__(env, output_path,
 83 |                          PPO1(policy=MlpPolicy,
 84 |                               env=env,
 85 |                               gamma=gamma,
 86 |                               timesteps_per_actorbatch=timesteps_per_actorbatch,
 87 |                               clip_param=clip_param,
 88 |                               entcoeff=entcoeff,
 89 |                               optim_epochs=optim_epochs,
 90 |                               optim_stepsize=optim_stepsize,
 91 |                               optim_batchsize=optim_batchsize,
 92 |                               lam=lam,
 93 |                               schedule=schedule,
 94 |                               verbose=verbose),
 95 |                          num_timesteps)
 96 | 
 97 | 
 98 | def create_env(env_id, output_path, seed=0):
 99 |     rank = MPI.COMM_WORLD.Get_rank()
100 |     set_global_seeds(seed + 10000 * rank)
101 |     env = gym.make(env_id)
102 |     env = Monitor(env, os.path.join(output_path, str(rank)), allow_early_resets=True)
103 |     env.seed(seed)
104 |     return env
105 | 


--------------------------------------------------------------------------------
/sagemaker/source/common/sagemaker_rl/tf_serving_utils.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | import ray
 3 | import os
 4 | import re
 5 | 
 6 | 
 7 | def atoi(text):
 8 |     return int(text) if text.isdigit() else text
 9 | 
10 | 
11 | def natural_keys(text):
12 |     return [atoi(c) for c in re.split('(\d+)', text)]
13 | 
14 | 
15 | def change_permissions_recursive(path, mode):
16 |     for root, dirs, files in os.walk(path, topdown=False):
17 |         for dir in [os.path.join(root, d) for d in dirs]:
18 |             os.chmod(dir, mode)
19 |     for file in [os.path.join(root, f) for f in files]:
20 |         os.chmod(file, mode)
21 | 
22 | 
23 | def export_tf_serving(agent, output_dir):
24 |     if ray.__version__ >= "0.8.2":
25 |         agent.export_policy_model(os.path.join(output_dir, "1"))
26 |     else:
27 |         policy = agent.local_evaluator.policy_map["default"]
28 |         input_signature = {}
29 |         input_signature["observations"] = tf.saved_model.utils.build_tensor_info(policy.observations)
30 | 
31 |         output_signature = {}
32 |         output_signature["actions"] = tf.saved_model.utils.build_tensor_info(policy.sampler)
33 |         output_signature["logits"] = tf.saved_model.utils.build_tensor_info(policy.logits)
34 | 
35 |         signature_def = (
36 |             tf.saved_model.signature_def_utils.build_signature_def(
37 |                 input_signature, output_signature,
38 |                 tf.saved_model.signature_constants.PREDICT_METHOD_NAME))
39 |         signature_def_key = (tf.saved_model.signature_constants.
40 |                             DEFAULT_SERVING_SIGNATURE_DEF_KEY)
41 |         signature_def_map = {signature_def_key: signature_def}
42 | 
43 |         with policy.sess.graph.as_default():
44 |             builder = tf.saved_model.builder.SavedModelBuilder(os.path.join(output_dir, "1"))
45 |             builder.add_meta_graph_and_variables(
46 |                 policy.sess, [tf.saved_model.tag_constants.SERVING],
47 |                 signature_def_map=signature_def_map)
48 |             builder.save()
49 |     print("Saved TensorFlow serving model!")
50 | 


--------------------------------------------------------------------------------
/sagemaker/source/common/setup.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | sudo -n true
 4 | if [ $? -eq 0 ]; then
 5 |   echo "The user has root access."
 6 | else
 7 |   echo "The user does not have root access. Everything required to run the notebook is already installed and setup. We are good to go!"
 8 |   exit 0
 9 | fi
10 | 
11 | # Do we have GPU support?
12 | nvidia-smi > /dev/null 2>&1
13 | if [ $? -eq 0 ]; then
14 |   # check if we have nvidia-docker
15 |   NVIDIA_DOCKER=`rpm -qa | grep -c nvidia-docker2`
16 |   if [ $NVIDIA_DOCKER -eq 0 ]; then
17 |     # Install nvidia-docker2
18 |     DOCKER_VERSION=`yum list docker | tail -1 | awk '{print $2}' | head -c 2`
19 | 
20 |     if [ $DOCKER_VERSION -eq 17 ]; then
21 |       DOCKER_PKG_VERSION='17.09.1ce-1.111.amzn1'
22 |       NVIDIA_DOCKER_PKG_VERSION='2.0.3-1.docker17.09.1.ce.amzn1'
23 |     else
24 |       DOCKER_PKG_VERSION='18.06.1ce-3.17.amzn1'
25 |       NVIDIA_DOCKER_PKG_VERSION='2.0.3-1.docker18.06.1.ce.amzn1'
26 |     fi
27 | 
28 |     sudo yum -y remove docker
29 |     sudo yum -y install docker-$DOCKER_PKG_VERSION
30 | 
31 |     sudo /etc/init.d/docker start
32 | 
33 |     curl -s -L https://nvidia.github.io/nvidia-docker/amzn1/nvidia-docker.repo | sudo tee /etc/yum.repos.d/nvidia-docker.repo
34 |     sudo yum install -y nvidia-docker2-$NVIDIA_DOCKER_PKG_VERSION
35 |     sudo cp daemon.json /etc/docker/daemon.json
36 |     sudo pkill -SIGHUP dockerd
37 |     echo "installed nvidia-docker2"
38 |   else
39 |     echo "nvidia-docker2 already installed. We are good to go!"
40 |   fi
41 | fi
42 | 
43 | # This is common for both GPU and CPU instances
44 | 
45 | # check if we have docker-compose
46 | docker-compose version >/dev/null 2>&1
47 | if [ $? -ne 0 ]; then
48 |   # install docker compose
49 |   pip install docker-compose
50 | fi
51 | 
52 | # check if we need to configure our docker interface
53 | SAGEMAKER_NETWORK=`docker network ls | grep -c sagemaker-local`
54 | if [ $SAGEMAKER_NETWORK -eq 0 ]; then
55 |   docker network create --driver bridge sagemaker-local
56 | fi
57 | 
58 | # Notebook instance Docker networking fixes
59 | RUNNING_ON_NOTEBOOK_INSTANCE=`sudo iptables -S OUTPUT -t nat | grep -c 169.254.0.2`
60 | 
61 | # Get the Docker Network CIDR and IP for the sagemaker-local docker interface.
62 | SAGEMAKER_INTERFACE=br-`docker network ls | grep sagemaker-local | cut -d' ' -f1`
63 | DOCKER_NET=`ip route | grep $SAGEMAKER_INTERFACE | cut -d" " -f1`
64 | DOCKER_IP=`ip route | grep $SAGEMAKER_INTERFACE | cut -d" " -f12`
65 | 
66 | # check if both IPTables and the Route Table are OK.
67 | IPTABLES_PATCHED=`sudo iptables -S PREROUTING -t nat | grep -c $SAGEMAKER_INTERFACE`
68 | ROUTE_TABLE_PATCHED=`sudo ip route show table agent | grep -c $SAGEMAKER_INTERFACE`
69 | 
70 | if [ $RUNNING_ON_NOTEBOOK_INSTANCE -gt 0 ]; then
71 | 
72 |   if [ $ROUTE_TABLE_PATCHED -eq 0 ]; then
73 |     # fix routing
74 |     sudo ip route add $DOCKER_NET via $DOCKER_IP dev $SAGEMAKER_INTERFACE table agent
75 |   else
76 |     echo "SageMaker instance route table setup is ok. We are good to go."
77 |   fi
78 | 
79 |   if [ $IPTABLES_PATCHED -eq 0 ]; then
80 |     sudo iptables -t nat -A PREROUTING  -i $SAGEMAKER_INTERFACE -d 169.254.169.254/32 -p tcp -m tcp --dport 80 -j DNAT --to-destination 169.254.0.2:9081
81 |     echo "iptables for Docker setup done"
82 |   else
83 |     echo "SageMaker instance routing for Docker is ok. We are good to go!"
84 |   fi
85 | fi
86 | 


--------------------------------------------------------------------------------
/sagemaker/source/custom/Readme.md:
--------------------------------------------------------------------------------
1 | # Contents of this folder is presented for reference only. SageMaker will automatically update the code from https://github.com/aicrowd/neurips2020-procgen-starter-kit


--------------------------------------------------------------------------------
/sagemaker/source/custom/algorithms/__init__.py:
--------------------------------------------------------------------------------
1 | from .registry import CUSTOM_ALGORITHMS


--------------------------------------------------------------------------------
/sagemaker/source/custom/algorithms/custom_random_agent/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/sagemaker-rl-procgen-ray/ba82c1f0bac2c985930bda7875f1950723f6d7ce/sagemaker/source/custom/algorithms/custom_random_agent/__init__.py


--------------------------------------------------------------------------------
/sagemaker/source/custom/algorithms/custom_random_agent/custom_random_agent.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | from ray.rllib.agents.trainer import Trainer, with_common_config
 4 | from ray.rllib.utils.annotations import override
 5 | 
 6 | """
 7 | Note : This implementation has been adapted from : 
 8 |     https://github.com/ray-project/ray/blob/master/rllib/contrib/random_agent/random_agent.py
 9 | """
10 | 
11 | # yapf: disable
12 | # __sphinx_doc_begin__
13 | class CustomRandomAgent(Trainer):
14 |     """Policy that takes random actions and never learns."""
15 | 
16 |     _name = "CustomRandomAgent"
17 |     _default_config = with_common_config({
18 |         "rollouts_per_iteration": 10,
19 |     })
20 | 
21 |     @override(Trainer)
22 |     def _init(self, config, env_creator):
23 |         self.env = env_creator(config["env_config"])
24 | 
25 |     @override(Trainer)
26 |     def _train(self):
27 |         rewards = []
28 |         steps = 0
29 |         for _ in range(self.config["rollouts_per_iteration"]):
30 |             obs = self.env.reset()
31 |             done = False
32 |             reward = 0.0
33 |             while not done:
34 |                 action = self.env.action_space.sample()
35 |                 obs, r, done, info = self.env.step(action)
36 |                 reward += r
37 |                 steps += 1
38 |             rewards.append(reward)
39 |         return {
40 |             "episode_reward_mean": np.mean(rewards),
41 |             "timesteps_this_iter": steps,
42 |         }
43 | # __sphinx_doc_end__
44 | # don't enable yapf after, it's buggy here
45 | 
46 | 
47 | if __name__ == "__main__":
48 |     trainer = CustomRandomAgent(
49 |         env="CartPole-v0", config={"rollouts_per_iteration": 10})
50 |     result = trainer.train()
51 |     assert result["episode_reward_mean"] > 10, result
52 |     print("Test: OK")


--------------------------------------------------------------------------------
/sagemaker/source/custom/algorithms/random_policy/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/sagemaker-rl-procgen-ray/ba82c1f0bac2c985930bda7875f1950723f6d7ce/sagemaker/source/custom/algorithms/random_policy/__init__.py


--------------------------------------------------------------------------------
/sagemaker/source/custom/algorithms/random_policy/policy.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | from ray.rllib.policy import Policy
 4 | import numpy as np
 5 | 
 6 | 
 7 | class RandomPolicy(Policy):
 8 |     """Example of a random policy
 9 | 
10 |     If you are using tensorflow/pytorch to build custom policies,
11 |     you might find `build_tf_policy` and `build_torch_policy` to
12 |     be useful.
13 | 
14 |     Adopted from examples from https://docs.ray.io/en/master/rllib-concepts.html
15 |     """
16 | 
17 |     def __init__(self, observation_space, action_space, config):
18 |         Policy.__init__(self, observation_space, action_space, config)
19 | 
20 |         # You can replace this with whatever variable you want to save
21 |         # the state of the policy in. `get_weights` and `set_weights`
22 |         # are used for checkpointing the states and restoring the states
23 |         # from a checkpoint.
24 |         self.w = []
25 | 
26 |     def compute_actions(
27 |         self,
28 |         obs_batch,
29 |         state_batches,
30 |         prev_action_batch=None,
31 |         prev_reward_batch=None,
32 |         info_batch=None,
33 |         episodes=None,
34 |         **kwargs
35 |     ):
36 |         """Return the action for a batch
37 | 
38 |         Returns:
39 |             action_batch: List of actions for the batch
40 |             rnn_states: List of RNN states if any
41 |             info: Additional info
42 |         """
43 |         action_batch = []
44 |         rnn_states = []
45 |         info = {}
46 |         for _ in obs_batch:
47 |             action_batch.append(self.action_space.sample())
48 |         return action_batch, rnn_states, info
49 | 
50 |     def learn_on_batch(self, samples):
51 |         """Fused compute gradients and apply gradients call.
52 | 
53 |         Either this or the combination of compute/apply grads must be
54 |         implemented by subclasses.
55 | 
56 |         Returns:
57 |             grad_info: dictionary of extra metadata from compute_gradients().
58 |         Examples:
59 |             >>> batch = ev.sample()
60 |             >>> ev.learn_on_batch(samples)
61 | 
62 |         Reference: https://github.com/ray-project/ray/blob/master/rllib/policy/policy.py#L279-L316
63 |         """
64 |         # implement your learning code here
65 |         return {}
66 | 
67 |     def get_weights(self):
68 |         """Returns model weights.
69 | 
70 |         Returns:
71 |             weights (obj): Serializable copy or view of model weights
72 |         """
73 |         return {"w": self.w}
74 | 
75 |     def set_weights(self, weights):
76 |         """Returns the current exploration information of this policy.
77 | 
78 |         This information depends on the policy's Exploration object.
79 |         
80 |         Returns:
81 |             any: Serializable information on the `self.exploration` object.
82 |         """
83 |         self.w = weights["w"]
84 | 


--------------------------------------------------------------------------------
/sagemaker/source/custom/algorithms/random_policy/readme.md:
--------------------------------------------------------------------------------
 1 | # Writing a custom policy
 2 | 
 3 | For more information on writing custom policies, please refer https://docs.ray.io/en/master/rllib-concepts.html
 4 | 
 5 | This directory contains the example code for implementing a custom random policy. Here, the agent never learns and outputs random actions for every observation.
 6 | 
 7 | ## Directory structure
 8 | 
 9 | ```
10 | .
11 | └── algorithms            # Directory containing code for custom algorithms
12 |     ├── __init__.py
13 |     ├── random_policy     # Python module for random policy
14 |     │   ├── __init__.py
15 |     │   ├── policy.py     # Code for random policy
16 |     │   └── trainer.py    # Training wrapper for the random policy
17 |     └── registry.py
18 | ```
19 | 
20 | ## How to start?
21 | 
22 | - Go through `policy.py` that has most of what you are looking for. `trainer.py` is just a training wrapper around the policy.
23 | - Once the policy is implemented, you need to register the policy with `rllib`. You can do this by adding your policy trainer class to `registry.py`.


--------------------------------------------------------------------------------
/sagemaker/source/custom/algorithms/random_policy/trainer.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | from ray.rllib.agents.trainer_template import build_trainer
 4 | from .policy import RandomPolicy
 5 | 
 6 | DEFAULT_CONFIG = (
 7 |     {}
 8 | )  # Default config parameters that can be overriden by experiments YAML.
 9 | 
10 | RandomPolicyTrainer = build_trainer(
11 |     name="RandomPolicyTrainer",
12 |     default_policy=RandomPolicy,
13 |     default_config=DEFAULT_CONFIG,
14 | )
15 | 


--------------------------------------------------------------------------------
/sagemaker/source/custom/algorithms/registry.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Registry of custom implemented algorithms names
 3 | 
 4 | Please refer to the following examples to add your custom algorithms : 
 5 | 
 6 | - AlphaZero : https://github.com/ray-project/ray/tree/master/rllib/contrib/alpha_zero
 7 | - bandits : https://github.com/ray-project/ray/tree/master/rllib/contrib/bandits
 8 | - maddpg : https://github.com/ray-project/ray/tree/master/rllib/contrib/maddpg
 9 | - random_agent: https://github.com/ray-project/ray/tree/master/rllib/contrib/random_agent
10 | 
11 | An example integration of the random agent is shown here : 
12 | - https://github.com/AIcrowd/neurips2020-procgen-starter-kit/tree/master/algorithms/custom_random_agent
13 | """
14 | 
15 | 
16 | def _import_custom_random_agent():
17 |     from .custom_random_agent.custom_random_agent import CustomRandomAgent
18 |     return CustomRandomAgent
19 | 
20 | def _import_random_policy():
21 |     from .random_policy.trainer import RandomPolicyTrainer
22 |     return RandomPolicyTrainer
23 | 
24 | 
25 | CUSTOM_ALGORITHMS = {
26 |     "custom/CustomRandomAgent": _import_custom_random_agent,
27 |     "RandomPolicy": _import_random_policy
28 | }
29 | 


--------------------------------------------------------------------------------
/sagemaker/source/custom/callbacks.py:
--------------------------------------------------------------------------------
  1 | 
  2 | from typing import Dict
  3 | 
  4 | import ray
  5 | from ray.rllib.env import BaseEnv
  6 | from ray.rllib.policy import Policy
  7 | from ray.rllib.policy.sample_batch import SampleBatch
  8 | from ray.rllib.evaluation import MultiAgentEpisode, RolloutWorker
  9 | from ray.rllib.agents.callbacks import DefaultCallbacks
 10 | 
 11 | import numpy as np
 12 | 
 13 | class CustomCallbacks(DefaultCallbacks):
 14 |     """
 15 |     Please refer to : 
 16 |         https://github.com/ray-project/ray/blob/master/rllib/examples/custom_metrics_and_callbacks.py
 17 |         https://docs.ray.io/en/latest/rllib-training.html#callbacks-and-custom-metrics
 18 |     for examples on adding your custom metrics and callbacks. 
 19 | 
 20 |     This code adapts the documentations of the individual functions from :
 21 |     https://github.com/ray-project/ray/blob/master/rllib/agents/callbacks.py
 22 | 
 23 |     These callbacks can be used for custom metrics and custom postprocessing.
 24 |     """
 25 | 
 26 |     def on_episode_start(self, worker: RolloutWorker, base_env: BaseEnv,
 27 |                          policies: Dict[str, Policy],
 28 |                          episode: MultiAgentEpisode, **kwargs):
 29 |         """Callback run on the rollout worker before each episode starts.
 30 |         Args:
 31 |             worker (RolloutWorker): Reference to the current rollout worker.
 32 |             base_env (BaseEnv): BaseEnv running the episode. The underlying
 33 |                 env object can be gotten by calling base_env.get_unwrapped().
 34 |             policies (dict): Mapping of policy id to policy objects. In single
 35 |                 agent mode there will only be a single "default" policy.
 36 |             episode (MultiAgentEpisode): Episode object which contains episode
 37 |                 state. You can use the `episode.user_data` dict to store
 38 |                 temporary data, and `episode.custom_metrics` to store custom
 39 |                 metrics for the episode.
 40 |             kwargs: Forward compatibility placeholder.
 41 |         """
 42 |         pass
 43 | 
 44 |     def on_episode_step(self, worker: RolloutWorker, base_env: BaseEnv,
 45 |                         episode: MultiAgentEpisode, **kwargs):
 46 |         """Runs on each episode step.
 47 |         Args:
 48 |             worker (RolloutWorker): Reference to the current rollout worker.
 49 |             base_env (BaseEnv): BaseEnv running the episode. The underlying
 50 |                 env object can be gotten by calling base_env.get_unwrapped().
 51 |             episode (MultiAgentEpisode): Episode object which contains episode
 52 |                 state. You can use the `episode.user_data` dict to store
 53 |                 temporary data, and `episode.custom_metrics` to store custom
 54 |                 metrics for the episode.
 55 |             kwargs: Forward compatibility placeholder.
 56 |         """
 57 |         pass
 58 | 
 59 |     def on_episode_end(self, worker: RolloutWorker, base_env: BaseEnv,
 60 |                        policies: Dict[str, Policy],
 61 |                        episode: MultiAgentEpisode, **kwargs):
 62 |         """Runs when an episode is done.
 63 |         Args:
 64 |             worker (RolloutWorker): Reference to the current rollout worker.
 65 |             base_env (BaseEnv): BaseEnv running the episode. The underlying
 66 |                 env object can be gotten by calling base_env.get_unwrapped().
 67 |             policies (dict): Mapping of policy id to policy objects. In single
 68 |                 agent mode there will only be a single "default" policy.
 69 |             episode (MultiAgentEpisode): Episode object which contains episode
 70 |                 state. You can use the `episode.user_data` dict to store
 71 |                 temporary data, and `episode.custom_metrics` to store custom
 72 |                 metrics for the episode.
 73 |             kwargs: Forward compatibility placeholder.
 74 |         """
 75 |         ######################################################################
 76 |         # An example of adding a custom metric from the latest observation 
 77 |         # from your env
 78 |         ######################################################################
 79 |         # last_obs_object_from_episode = episode.last_observation_for()
 80 |         # We define a dummy custom metric, observation_mean
 81 |         # episode.custom_metrics["observation_mean"] = last_obs_object_from_episode.mean()
 82 |         pass
 83 | 
 84 |     def on_postprocess_trajectory(
 85 |             self, worker: RolloutWorker, episode: MultiAgentEpisode,
 86 |             agent_id: str, policy_id: str,
 87 |             policies: Dict[str, Policy], postprocessed_batch: SampleBatch,
 88 |             original_batches: Dict[str, SampleBatch], **kwargs):
 89 |         """Called immediately after a policy's postprocess_fn is called.
 90 |         You can use this callback to do additional postprocessing for a policy,
 91 |         including looking at the trajectory data of other agents in multi-agent
 92 |         settings.
 93 |         Args:
 94 |             worker (RolloutWorker): Reference to the current rollout worker.
 95 |             episode (MultiAgentEpisode): Episode object.
 96 |             agent_id (str): Id of the current agent.
 97 |             policy_id (str): Id of the current policy for the agent.
 98 |             policies (dict): Mapping of policy id to policy objects. In single
 99 |                 agent mode there will only be a single "default" policy.
100 |             postprocessed_batch (SampleBatch): The postprocessed sample batch
101 |                 for this agent. You can mutate this object to apply your own
102 |                 trajectory postprocessing.
103 |             original_batches (dict): Mapping of agents to their unpostprocessed
104 |                 trajectory data. You should not mutate this object.
105 |             kwargs: Forward compatibility placeholder.
106 |         """
107 |         pass
108 | 
109 |     def on_sample_end(self, worker: RolloutWorker, samples: SampleBatch,
110 |                       **kwargs):
111 |         """Called at the end RolloutWorker.sample().
112 |         Args:
113 |             worker (RolloutWorker): Reference to the current rollout worker.
114 |             samples (SampleBatch): Batch to be returned. You can mutate this
115 |                 object to modify the samples generated.
116 |             kwargs: Forward compatibility placeholder.
117 |         """
118 |         pass
119 | 
120 |     def on_train_result(self, trainer, result: dict, **kwargs):
121 |         """Called at the end of Trainable.train().
122 |         Args:
123 |             trainer (Trainer): Current trainer instance.
124 |             result (dict): Dict of results returned from trainer.train() call.
125 |                 You can mutate this object to add additional metrics.
126 |             kwargs: Forward compatibility placeholder.
127 |         """
128 |         # In this case we also print the mean timesteps throughput
129 |         # for easier reference in the logs
130 |         # print("=============================================================")
131 |         # print(" Timesteps Throughput : {} ts/sec".format(TBD))
132 |         # print("=============================================================")
133 |         pass
134 | 


--------------------------------------------------------------------------------
/sagemaker/source/custom/envs/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/sagemaker-rl-procgen-ray/ba82c1f0bac2c985930bda7875f1950723f6d7ce/sagemaker/source/custom/envs/.gitkeep


--------------------------------------------------------------------------------
/sagemaker/source/custom/envs/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/sagemaker-rl-procgen-ray/ba82c1f0bac2c985930bda7875f1950723f6d7ce/sagemaker/source/custom/envs/__init__.py


--------------------------------------------------------------------------------
/sagemaker/source/custom/envs/framestack.py:
--------------------------------------------------------------------------------
 1 | from gym.wrappers import FrameStack
 2 | from ray.tune import registry
 3 | 
 4 | try:
 5 |     from envs.procgen_env_wrapper import ProcgenEnvWrapper
 6 | except ModuleNotFoundError:
 7 |     from custom.envs.procgen_env_wrapper import ProcgenEnvWrapper
 8 | 
 9 | # Register Env in Ray
10 | registry.register_env(
11 |     "stacked_procgen_env",  # This should be different from procgen_env_wrapper
12 |     lambda config: FrameStack(ProcgenEnvWrapper(config), 4),
13 | )
14 | 


--------------------------------------------------------------------------------
/sagemaker/source/custom/envs/procgen_env_wrapper.py:
--------------------------------------------------------------------------------
 1 | import time
 2 | import gym
 3 | import numpy as np
 4 | 
 5 | from ray.tune import registry
 6 | from procgen.env import ENV_NAMES as VALID_ENV_NAMES
 7 | 
 8 | class ProcgenEnvWrapper(gym.Env):
 9 |     """
10 |     Procgen Wrapper file
11 |     """
12 |     def __init__(self, config):
13 |         self._default_config = {
14 |             "num_levels" : 0,  # The number of unique levels that can be generated. Set to 0 to use unlimited levels.
15 |             "env_name" : "coinrun",  # Name of environment, or comma-separate list of environment names to instantiate as each env in the VecEnv
16 |             "start_level" : 0,  # The lowest seed that will be used to generated levels. 'start_level' and 'num_levels' fully specify the set of possible levels
17 |             "paint_vel_info" : False,  # Paint player velocity info in the top left corner. Only supported by certain games.
18 |             "use_generated_assets" : False,  # Use randomly generated assets in place of human designed assets
19 |             "center_agent" : True,  # Determines whether observations are centered on the agent or display the full level. Override at your own risk.
20 |             "use_sequential_levels" : False,  # When you reach the end of a level, the episode is ended and a new level is selected. If use_sequential_levels is set to True, reaching the end of a level does not end the episode, and the seed for the new level is derived from the current level seed. If you combine this with start_level=<some seed> and num_levels=1, you can have a single linear series of levels similar to a gym-retro or ALE game.
21 |             "distribution_mode" : "easy"  # What variant of the levels to use, the options are "easy", "hard", "extreme", "memory", "exploration". All games support "easy" and "hard", while other options are game-specific. The default is "hard". Switching to "easy" will reduce the number of timesteps required to solve each game and is useful for testing or when working with limited compute resources. NOTE : During the evaluation phase (rollout), this will always be overriden to "easy"
22 |         }
23 |         self.config = self._default_config
24 |         self.config.update(config)
25 | 
26 |         self.env_name = self.config.pop("env_name")
27 | 
28 |         assert self.env_name in VALID_ENV_NAMES
29 | 
30 |         env = gym.make(f"procgen:procgen-{self.env_name}-v0", **self.config)
31 |         self.env = env
32 |         # Enable video recording features
33 |         self.metadata = self.env.metadata
34 | 
35 |         self.action_space = self.env.action_space
36 |         self.observation_space = self.env.observation_space
37 |         self._done = True
38 | 
39 |     def reset(self):
40 |         assert self._done, "procgen envs cannot be early-restarted"
41 |         return self.env.reset()
42 | 
43 |     def step(self, action):
44 |         obs, rew, done, info = self.env.step(action)
45 |         self._done = done
46 |         return obs, rew, done, info
47 | 
48 |     def render(self, mode="human"):
49 |         return self.env.render(mode=mode)
50 | 
51 |     def close(self):
52 |         return self.env.close()
53 | 
54 |     def seed(self, seed=None):
55 |         return self.env.seed(seed)
56 | 
57 |     def __repr__(self):
58 |         return self.env.__repr()
59 | 
60 |     @property
61 |     def spec(self):
62 |         return self.env.spec
63 | 
64 | # Register Env in Ray
65 | registry.register_env(
66 |     "procgen_env_wrapper",
67 |     lambda config: ProcgenEnvWrapper(config)
68 | )


--------------------------------------------------------------------------------
/sagemaker/source/custom/envs/readme.md:
--------------------------------------------------------------------------------
 1 | # Custom gym environments
 2 | 
 3 | This directory contains the custom gym environments that will be used by
 4 | `rllib`.
 5 | 
 6 | ## Using gym wrappers
 7 | 
 8 | You can use custom gym wrappers for better control over the environment.
 9 | These wrappers should be used on `ProcgenEnvWrapper` class. You should
10 | not use `gym.make` to create an environment but use the gym env provided
11 | in the starter kit.
12 | 
13 | ### Example
14 | 
15 | A simple example to use framestack will be
16 | 
17 | ```python
18 | from gym.wrappers import FrameStack
19 | from ray.tune import registry
20 | 
21 | from envs.procgen_env_wrapper import ProcgenEnvWrapper
22 | 
23 | # Register Env in Ray
24 | registry.register_env(
25 |     "stacked_procgen_env",  # This should be different from procgen_env_wrapper
26 |     lambda config: FrameStack(ProcgenEnvWrapper(config), 4)
27 | )
28 | ```
29 | 
30 | You can point to `stacked_procgen_env` instead of `procgen_env_wrapper` in your
31 | experiment config file in order to use the env with the wrapper.
32 | 
33 | ### Note
34 | - If you do not use `ProcgenEnvWrapper` as your base env, the
35 | rollouts will fail.
36 | - Please do not edit `procgen_env_wrapper.py` file. All the changes
37 | you make to this file will be dropped during the evaluation.
38 | 


--------------------------------------------------------------------------------
/sagemaker/source/custom/experiments/procgen-starter-example.yaml:
--------------------------------------------------------------------------------
  1 | procgen-starter-example:
  2 |     env: procgen_env_wrapper  # Change this at your own risk :D
  3 |     run: PPO
  4 |     # Can be replaced by any of the available agents as described at :
  5 |     # https://github.com/ray-project/ray/blob/master/rllib/agents/registry.py#L103
  6 |     #
  7 |     # Internally, rllib uses the terminology of Trainable, Algorithms, Agents depending
  8 |     # on the context in which it is used. In this repository, we will consistently
  9 |     # use the terminology of "Algorithms" to refer to these Trainables/Agents.
 10 |     #
 11 |     # This can also be replaced by a custom "algorithm"
 12 |     # For addition of custom algorithms,
 13 |     # Please refer to :
 14 |     # https://github.com/AIcrowd/neurips2020-procgen-starter-kit/blob/master/algorithms/registry.py
 15 |     ################################################
 16 |     # === Stop Conditions ===
 17 |     ################################################
 18 |     stop:
 19 |         timesteps_total: 3000  # 100
 20 | 
 21 |     ################################################
 22 |     # === Settings for Checkpoints ===
 23 |     ################################################
 24 |     checkpoint_freq: 1
 25 |     checkpoint_at_end: True
 26 |     keep_checkpoints_num: 5
 27 | 
 28 |     config:
 29 |         ################################################
 30 |         ################################################
 31 |         # === Settings for the Procgen Environment ===
 32 |         ################################################
 33 |         ################################################
 34 |         env_config:
 35 |             # Name of the procgen environment to train on # Note, that this parameter will be overriden during the evaluation by the AIcrowd evaluators.
 36 |             env_name: coinrun
 37 |             # The number of unique levels that can be generated. Set to 0 to use unlimited levels
 38 |             num_levels: 0
 39 |             # The lowest seed that will be used to generated levels. 'start_level' and 'num_levels' fully specify the set of possible levels
 40 |             start_level: 0
 41 |             # Paint player velocity info in the top left corner. Only supported by certain games.
 42 |             paint_vel_info: False
 43 |             # Use randomly generated assets in place of human designed assets
 44 |             use_generated_assets: False
 45 |             # center_agent : Determines whether observations are centered on the agent or display the full level. Override at your own risk.
 46 |             center_agent: True
 47 |             # sequential levels : When you reach the end of a level, the episode is ended and a new level is selected. If use_sequential_levels is set to True, reaching the end of a level does not end the episode, and the seed for the new level is derived from the current level seed. If you combine this with start_level=<some seed> and num_levels=1, you can have a single linear series of levels similar to a gym-retro or ALE game.
 48 |             use_sequential_levels: False
 49 |             # What variant of the levels to use, the options are "easy", "hard", "extreme", "memory", "exploration". All games support "easy" and "hard", while other options are game-specific. The default is "hard". Switching to "easy" will reduce the number of timesteps required to solve each game and is useful for testing or when working with limited compute resources. NOTE : During the evaluation phase (rollout), this will always be overriden to "easy"
 50 |             distribution_mode: easy
 51 | 
 52 |         ################################################
 53 |         ################################################
 54 |         # === Environment Settings ===
 55 |         ################################################
 56 |         ################################################
 57 |         # Discount factor of the MDP.
 58 |         gamma: 0.99
 59 |         # The default learning rate.
 60 |         lr: 0.0001
 61 |         # Number of steps after which the episode is forced to terminate. Defaults
 62 |         # to `env.spec.max_episode_steps` (if present) for Gym envs.
 63 |         horizon: null
 64 |         # Calculate rewards but don't reset the environment when the horizon is
 65 |         # hit. This allows value estimation and RNN state to span across logical
 66 |         # episodes denoted by horizon. This only has an effect if horizon != inf.
 67 |         soft_horizon: False
 68 |         # Don't set 'done' at the end of the episode. Note that you still need to
 69 |         # set this if soft_horizon=True, unless your env is actually running
 70 |         # forever without returning done=True.
 71 |         no_done_at_end: False
 72 | 
 73 |         # Unsquash actions to the upper and lower bounds of env's action space
 74 |         normalize_actions: False
 75 |         # Whether to clip rewards prior to experience postprocessing. Setting to
 76 |         # None means clip for Atari only.
 77 |         clip_rewards: null
 78 |         # Whether to np.clip() actions to the action space low/high range spec.
 79 |         clip_actions: True
 80 |         # Whether to use rllib or deepmind preprocessors by default
 81 |         preprocessor_pref: deepmind
 82 | 
 83 |         # Whether to attempt to continue training if a worker crashes. The number
 84 |         # of currently healthy workers is reported as the "num_healthy_workers"
 85 |         # metric.
 86 |         ignore_worker_failures: False
 87 |         # Log system resource metrics to results. This requires `psutil` to be
 88 |         # installed for sys stats, and `gputil` for GPU metrics.
 89 |         # Note : The AIcrowd Evaluators will always override this to be True
 90 |         log_sys_usage: True
 91 | 
 92 |         # Use PyTorch (instead of tf). If using `rllib train`, this can also be
 93 |         # enabled with the `--torch` flag.
 94 |         # NOTE: Some agents may not support `torch` yet and throw an error.
 95 |         use_pytorch: False
 96 | 
 97 |         ################################################
 98 |         ################################################
 99 |         # === Settings for Model ===
100 |         ################################################
101 |         ################################################
102 |         model:
103 |             # === Built-in options ===
104 |             # More information on built in Models are available here :
105 |             # https://ray.readthedocs.io/en/stable/rllib-models.html#built-in-models-and-preprocessors
106 |             #
107 |             # Filter config. List of [out_channels, kernel, stride] for each filter
108 |             conv_filters:
109 |               - [16, [3, 3], 3]
110 |               - [16, [3, 3], 1]
111 |             # Nonlinearity for built-in convnet
112 |             conv_activation: relu
113 |             # Nonlinearity for fully connected net (tanh, relu)
114 |             fcnet_activation: tanh
115 |             # Number of hidden layers for fully connected net
116 |             fcnet_hiddens: [128, 128]
117 |             # Whether to skip the final linear layer used to resize the hidden layer
118 |             # outputs to size `num_outputs`. If True, then the last hidden layer
119 |             # should already match num_outputs.
120 |             no_final_linear: false
121 |             # Whether layers should be shared for the value function
122 |             vf_share_layers: true
123 | 
124 |             # == LSTM ==
125 |             # Whether to wrap the model with a LSTM
126 |             use_lstm: false
127 |             # Max seq len for training the LSTM, defaults to 20
128 |             max_seq_len: 20
129 |             # Size of the LSTM cell
130 |             lstm_cell_size: 256
131 |             # Whether to feed a_{t-1}, r_{t-1} to LSTM
132 |             lstm_use_prev_action_reward: false
133 |             # When using modelv1 models with a modelv2 algorithm, you may have to
134 |             # define the state shape here (e.g., [256, 256]).
135 |             state_shape: null
136 | 
137 |             # === Options for custom models ===
138 |             # Name of a custom model to use
139 |             #
140 |             # Custom Models can be implemented in the models/ folder.
141 |             # Please refer to :
142 |             #   https://github.com/AIcrowd/neurips2020-procgen-starter-kit/blob/master/models/my_vision_network.py
143 |             #  for an example.
144 |             #
145 |             # RLlib documentation on implementing custom Models is available
146 |             # here :
147 |             # https://ray.readthedocs.io/en/stable/rllib-models.html#custom-models-tensorflow
148 |             #
149 |             # Participants can also choose to implement their models
150 |             # in PyTorch. Here is an example to implement a PyTorch based model :
151 |             # https://github.com/ray-project/ray/blob/master/rllib/examples/custom_torch_policy.py
152 |             #
153 |             # Examples of implementing the model in Keras is also available
154 |             # here :
155 |             # https://github.com/ray-project/ray/blob/master/rllib/examples/custom_keras_model.py
156 |             custom_model: my_vision_network
157 |             # Extra options to pass to custom class
158 |             custom_options: {}
159 | 
160 |         ################################################
161 |         ################################################
162 |         # === Settings for Rollout Worker processes ===
163 |         ################################################
164 |         ################################################
165 |         # Number of rollout worker actors to create for parallel sampling. Setting
166 |         # this to 0 will force rollouts to be done in the trainer actor.
167 |         num_workers: 1
168 | 
169 |         # Number of environments to evaluate vectorwise per worker. This enables
170 |         # model inference batching, which can improve performance for inference
171 |         # bottlenecked workloads.
172 |         num_envs_per_worker: 2
173 | 
174 |         # Divide episodes into fragments of this many steps each during rollouts.
175 |         # Sample batches of this size are collected from rollout workers and
176 |         # combined into a larger batch of `train_batch_size` for learning.
177 |         #
178 |         # For example, given rollout_fragment_length=100 and train_batch_size=1000:
179 |         #   1. RLlib collects 10 fragments of 100 steps each from rollout workers.
180 |         #   2. These fragments are concatenated and we perform an epoch of SGD.
181 |         #
182 |         # When using multiple envs per worker, the fragment size is multiplied by
183 |         # `num_envs_per_worker`. This is since we are collecting steps from
184 |         # multiple envs in parallel. For example, if num_envs_per_worker=5, then
185 |         # rollout workers will return experiences in chunks of 5*100 = 500 steps.
186 |         #
187 |         # The dataflow here can vary per algorithm. For example, PPO further
188 |         # divides the train batch into minibatches for multi-epoch SGD.
189 |         rollout_fragment_length: 200
190 | 
191 |         # Whether to rollout "complete_episodes" or "truncate_episodes" to
192 |         # `rollout_fragment_length` length unrolls. Episode truncation guarantees
193 |         # evenly sized batches, but increases variance as the reward-to-go will
194 |         # need to be estimated at truncation boundaries.
195 |         batch_mode: truncate_episodes
196 |         
197 |         ################################################
198 |         ################################################
199 |         # === Advanced Resource Settings ===
200 |         ################################################
201 |         ################################################
202 |         # Number of CPUs to allocate per worker.
203 |         num_cpus_per_worker: 1
204 |         # Number of GPUs to allocate per worker. This can be fractional. This is
205 |         # usually needed only if your env itself requires a GPU (i.e., it is a
206 |         # GPU-intensive video game), or model inference is unusually expensive.
207 |         num_gpus_per_worker: 0.9
208 |         # Number of CPUs to allocate for the trainer. Note: this only takes effect
209 |         # when running in Tune. Otherwise, the trainer runs in the main program.
210 |         num_cpus_for_driver: 1        
211 | 
212 |         ################################################
213 |         ################################################
214 |         # === Settings for the Trainer process ===
215 |         ################################################
216 |         ################################################
217 |         # Number of GPUs to allocate to the trainer process. Note that not all
218 |         # algorithms can take advantage of trainer GPUs. This can be fractional
219 |         # (e.g., 0.3 GPUs).
220 |         # Note : If GPUs are not available, this will be overriden by the AIcrowd evaluators to 0.
221 |         num_gpus: 0
222 |         # Training batch size, if applicable. Should be >= rollout_fragment_length.
223 |         # Samples batches will be concatenated together to a batch of this size,
224 |         # which is then passed to SGD.
225 |         train_batch_size: 200
226 | 
227 |         ################################################
228 |         ################################################
229 |         # === Exploration Settings ===
230 |         ################################################
231 |         ################################################
232 |         # Default exploration behavior, iff `explore`=None is passed into
233 |         # compute_action(s).
234 |         # Set to False for no exploration behavior (e.g., for evaluation).
235 |         explore: True,
236 |         # Provide a dict specifying the Exploration object's config.
237 |         exploration_config:
238 |             # The Exploration class to use. In the simplest case, this is the name
239 |             # (str) of any class present in the `rllib.utils.exploration` package.
240 |             # You can also provide the python class directly or the full location
241 |             # of your class (e.g. "ray.rllib.utils.exploration.epsilon_greedy.
242 |             # EpsilonGreedy)
243 |             type: "StochasticSampling"
244 |             # Can add constructor kwargs here (if any)
245 | 
246 |         ################################################
247 |         ################################################
248 |         # === Advanced Rollout Settings ===
249 |         ################################################
250 |         ################################################
251 |         # Element-wise observation filter, either "NoFilter" or "MeanStdFilter".
252 |         observation_filter: "NoFilter"
253 |         # Whether to synchronize the statistics of remote filters.
254 |         synchronize_filters: True
255 |         # Whether to LZ4 compress individual observations
256 |         compress_observations: False
257 |         # Minimum env steps to optimize for per train call. This value does
258 |         # not affect learning, only the length of train iterations.
259 |         timesteps_per_iteration: 0
260 |         # This argument, in conjunction with worker_index, sets the random seed of
261 |         # each worker, so that identically configured trials will have identical
262 |         # results. This makes experiments reproducible.
263 |         seed: null
264 | 


--------------------------------------------------------------------------------
/sagemaker/source/custom/experiments/random-policy.yaml:
--------------------------------------------------------------------------------
 1 | procgen-starter-example:
 2 |   env: procgen_env_wrapper  # Change this at your own risk :D
 3 |   run: RandomPolicy
 4 | 
 5 |   stop:
 6 |     timesteps_total: 300000
 7 |   checkpoint_freq: 1
 8 |   checkpoint_at_end: True
 9 |   keep_checkpoints_num: 5
10 | 
11 |   config:
12 |     env_config:
13 |       env_name: coinrun
14 |       num_levels: 0
15 |       start_level: 0
16 |       paint_vel_info: False
17 |       use_generated_assets: False
18 |       center_agent: True
19 |       use_sequential_levels: False
20 |       distribution_mode: easy
21 | 
22 |     model:
23 |       custom_preprocessor: MyPreprocessor
24 | 


--------------------------------------------------------------------------------
/sagemaker/source/custom/models/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/sagemaker-rl-procgen-ray/ba82c1f0bac2c985930bda7875f1950723f6d7ce/sagemaker/source/custom/models/.gitkeep


--------------------------------------------------------------------------------
/sagemaker/source/custom/models/impala_cnn_tf.py:
--------------------------------------------------------------------------------
 1 | from ray.rllib.models.tf.tf_modelv2 import TFModelV2
 2 | from ray.rllib.utils.framework import try_import_tf
 3 | from ray.rllib.models import ModelCatalog
 4 | 
 5 | tf = try_import_tf()
 6 | 
 7 | 
 8 | def conv_layer(depth, name):
 9 |     return tf.keras.layers.Conv2D(
10 |         filters=depth, kernel_size=3, strides=1, padding="same", name=name
11 |     )
12 | 
13 | 
14 | def residual_block(x, depth, prefix):
15 |     inputs = x
16 |     assert inputs.get_shape()[-1].value == depth
17 |     x = tf.keras.layers.ReLU()(x)
18 |     x = conv_layer(depth, name=prefix + "_conv0")(x)
19 |     x = tf.keras.layers.ReLU()(x)
20 |     x = conv_layer(depth, name=prefix + "_conv1")(x)
21 |     return x + inputs
22 | 
23 | 
24 | def conv_sequence(x, depth, prefix):
25 |     x = conv_layer(depth, prefix + "_conv")(x)
26 |     x = tf.keras.layers.MaxPool2D(pool_size=3, strides=2, padding="same")(x)
27 |     x = residual_block(x, depth, prefix=prefix + "_block0")
28 |     x = residual_block(x, depth, prefix=prefix + "_block1")
29 |     return x
30 | 
31 | 
32 | class ImpalaCNN(TFModelV2):
33 |     """
34 |     Network from IMPALA paper implemented in ModelV2 API.
35 | 
36 |     Based on https://github.com/ray-project/ray/blob/master/rllib/models/tf/visionnet_v2.py
37 |     and https://github.com/openai/baselines/blob/9ee399f5b20cd70ac0a871927a6cf043b478193f/baselines/common/models.py#L28
38 |     """
39 | 
40 |     def __init__(self, obs_space, action_space, num_outputs, model_config, name):
41 |         super().__init__(obs_space, action_space, num_outputs, model_config, name)
42 | 
43 |         depths = [16, 32, 32]
44 | 
45 |         inputs = tf.keras.layers.Input(shape=obs_space.shape, name="observations")
46 |         scaled_inputs = tf.cast(inputs, tf.float32) / 255.0
47 | 
48 |         x = scaled_inputs
49 |         for i, depth in enumerate(depths):
50 |             x = conv_sequence(x, depth, prefix=f"seq{i}")
51 | 
52 |         x = tf.keras.layers.Flatten()(x)
53 |         x = tf.keras.layers.ReLU()(x)
54 |         x = tf.keras.layers.Dense(units=256, activation="relu", name="hidden")(x)
55 |         logits = tf.keras.layers.Dense(units=num_outputs, name="pi")(x)
56 |         value = tf.keras.layers.Dense(units=1, name="vf")(x)
57 |         self.base_model = tf.keras.Model(inputs, [logits, value])
58 |         self.register_variables(self.base_model.variables)
59 | 
60 |     def forward(self, input_dict, state, seq_lens):
61 |         # explicit cast to float32 needed in eager
62 |         obs = tf.cast(input_dict["obs"], tf.float32)
63 |         logits, self._value = self.base_model(obs)
64 |         return logits, state
65 | 
66 |     def value_function(self):
67 |         return tf.reshape(self._value, [-1])
68 | 
69 | 
70 | # Register model in ModelCatalog
71 | ModelCatalog.register_custom_model("impala_cnn_tf", ImpalaCNN)
72 | 


--------------------------------------------------------------------------------
/sagemaker/source/custom/models/impala_cnn_torch.py:
--------------------------------------------------------------------------------
 1 | from ray.rllib.models.torch.torch_modelv2 import TorchModelV2
 2 | from ray.rllib.models import ModelCatalog
 3 | from ray.rllib.utils.annotations import override
 4 | from ray.rllib.utils import try_import_torch
 5 | 
 6 | torch, nn = try_import_torch()
 7 | 
 8 | 
 9 | class ResidualBlock(nn.Module):
10 |     def __init__(self, channels):
11 |         super().__init__()
12 |         self.conv0 = nn.Conv2d(in_channels=channels, out_channels=channels, kernel_size=3, padding=1)
13 |         self.conv1 = nn.Conv2d(in_channels=channels, out_channels=channels, kernel_size=3, padding=1)
14 |     
15 |     def forward(self, x):
16 |         inputs = x
17 |         x = nn.functional.relu(x)
18 |         x = self.conv0(x)
19 |         x = nn.functional.relu(x)
20 |         x = self.conv1(x)
21 |         return x + inputs
22 | 
23 | 
24 | class ConvSequence(nn.Module):
25 |     def __init__(self, input_shape, out_channels):
26 |         super().__init__()
27 |         self._input_shape = input_shape
28 |         self._out_channels = out_channels
29 |         self.conv = nn.Conv2d(in_channels=self._input_shape[0], out_channels=self._out_channels, kernel_size=3, padding=1)
30 |         self.res_block0 = ResidualBlock(self._out_channels)
31 |         self.res_block1 = ResidualBlock(self._out_channels)
32 | 
33 |     def forward(self, x):
34 |         x = self.conv(x)
35 |         x = nn.functional.max_pool2d(x, kernel_size=3, stride=2, padding=1)
36 |         x = self.res_block0(x)
37 |         x = self.res_block1(x)
38 |         assert x.shape[1:] == self.get_output_shape()
39 |         return x
40 | 
41 |     def get_output_shape(self):
42 |         _c, h, w = self._input_shape
43 |         return (self._out_channels, (h + 1) // 2, (w + 1) // 2)
44 | 
45 | 
46 | class ImpalaCNN(TorchModelV2, nn.Module):
47 |     """
48 |     Network from IMPALA paper implemented in ModelV2 API.
49 | 
50 |     Based on https://github.com/ray-project/ray/blob/master/rllib/models/tf/visionnet_v2.py
51 |     and https://github.com/openai/baselines/blob/9ee399f5b20cd70ac0a871927a6cf043b478193f/baselines/common/models.py#L28
52 |     """
53 | 
54 |     def __init__(self, obs_space, action_space, num_outputs, model_config,
55 |                  name):
56 |         TorchModelV2.__init__(self, obs_space, action_space, num_outputs,
57 |                               model_config, name)
58 |         nn.Module.__init__(self)
59 | 
60 |         h, w, c = obs_space.shape
61 |         shape = (c, h, w)
62 | 
63 |         conv_seqs = []
64 |         for out_channels in [16, 32, 32]:
65 |             conv_seq = ConvSequence(shape, out_channels)
66 |             shape = conv_seq.get_output_shape()
67 |             conv_seqs.append(conv_seq)
68 |         self.conv_seqs = nn.ModuleList(conv_seqs)
69 |         self.hidden_fc = nn.Linear(in_features=shape[0] * shape[1] * shape[2], out_features=256)
70 |         self.logits_fc = nn.Linear(in_features=256, out_features=num_outputs)
71 |         self.value_fc = nn.Linear(in_features=256, out_features=1)
72 |         
73 |     @override(TorchModelV2)
74 |     def forward(self, input_dict, state, seq_lens):
75 |         x = input_dict["obs"].float()
76 |         x = x / 255.0  # scale to 0-1
77 |         x = x.permute(0, 3, 1, 2)  # NHWC => NCHW
78 |         for conv_seq in self.conv_seqs:
79 |             x = conv_seq(x)
80 |         x = torch.flatten(x, start_dim=1)
81 |         x = nn.functional.relu(x)
82 |         x = self.hidden_fc(x)
83 |         x = nn.functional.relu(x)
84 |         logits = self.logits_fc(x)
85 |         value = self.value_fc(x)
86 |         self._value = value.squeeze(1)
87 |         return logits, state
88 | 
89 |     @override(TorchModelV2)
90 |     def value_function(self):
91 |         assert self._value is not None, "must call forward() first"
92 |         return self._value
93 | 
94 | ModelCatalog.register_custom_model("impala_cnn_torch", ImpalaCNN)


--------------------------------------------------------------------------------
/sagemaker/source/custom/models/my_vision_network.py:
--------------------------------------------------------------------------------
  1 | from ray.rllib.models.tf.tf_modelv2 import TFModelV2
  2 | from ray.rllib.models.tf.visionnet_v1 import _get_filter_config
  3 | from ray.rllib.models.tf.misc import normc_initializer
  4 | from ray.rllib.utils.framework import try_import_tf
  5 | 
  6 | tf = try_import_tf()
  7 | 
  8 | """
  9 | NOTE : This implementation has been taken from : 
 10 |     https://github.com/ray-project/ray/blob/master/rllib/models/tf/visionnet_v2.py
 11 | 
 12 |     to act as a reference implementation for implementing custom models.
 13 | """
 14 | 
 15 | 
 16 | def get_conv_activation(model_config):
 17 |     if model_config.get("conv_activation") == "linear":
 18 |         activation = None
 19 |     else:
 20 |         activation = getattr(tf.nn, model_config.get("conv_activation"))
 21 |     return activation
 22 | 
 23 | 
 24 | def get_fc_activation(model_config):
 25 |     activation = model_config.get("fcnet_activation")
 26 |     if activation is None:
 27 |         activation = tf.keras.layers.ReLU()
 28 |     return activation
 29 | 
 30 | 
 31 | def conv_layers(x, model_config, obs_space, prefix=""):
 32 |     filters = model_config.get("conv_filters")
 33 |     if not filters:
 34 |         filters = _get_filter_config(obs_space.shape)
 35 | 
 36 |     activation = get_conv_activation(model_config)
 37 | 
 38 |     for i, (out_size, kernel, stride) in enumerate(filters, 1):
 39 |         x = tf.keras.layers.Conv2D(
 40 |             out_size,
 41 |             kernel,
 42 |             strides=(stride, stride),
 43 |             activation=activation,
 44 |             padding="same",
 45 |             data_format="channels_last",
 46 |             name=f"{prefix}conv{i}",
 47 |         )(x)
 48 |     return x
 49 | 
 50 | 
 51 | def fc_layers(x, model_config, prefix=""):
 52 |     x = tf.keras.layers.Flatten()(x)
 53 |     activation = get_fc_activation(model_config)
 54 |     fc_layers_config = model_config.get("fcnet_hiddens", [])
 55 |     for i, dim in enumerate(fc_layers_config):
 56 |         x = tf.keras.layers.Dense(
 57 |             units=dim, activation=activation, name=f"{prefix}fc-{i}"
 58 |         )(x)
 59 |     return x
 60 | 
 61 | 
 62 | def get_final_fc(x, num_outputs, model_config):
 63 |     x = tf.keras.layers.Dense(num_outputs, name="pi")(x)
 64 |     return x
 65 | 
 66 | 
 67 | def value_layers(x, inputs, obs_space, model_config):
 68 |     if not model_config.get("vf_share_layers"):
 69 |         x = conv_layers(inputs, model_config, obs_space, prefix="vf-")
 70 |         x = fc_layers(x, model_config, prefix="vf-")
 71 |     x = tf.keras.layers.Dense(units=1, name="vf")(x)
 72 |     return x
 73 | 
 74 | 
 75 | class MyVisionNetwork(TFModelV2):
 76 |     """Generic vision network implemented in ModelV2 API."""
 77 | 
 78 |     def __init__(self, obs_space, action_space, num_outputs, model_config, name):
 79 |         super(MyVisionNetwork, self).__init__(
 80 |             obs_space, action_space, num_outputs, model_config, name
 81 |         )
 82 | 
 83 |         inputs = tf.keras.layers.Input(shape=obs_space.shape, name="observations")
 84 |         last_layer = inputs
 85 |         # Build the conv layers
 86 |         last_layer = conv_layers(last_layer, model_config, obs_space)
 87 |         # Build the linear layers
 88 |         last_layer = fc_layers(last_layer, model_config)
 89 |         # Final linear layer
 90 |         logits = get_final_fc(last_layer, num_outputs, model_config)
 91 |         # Build the value layers
 92 |         value_out = value_layers(last_layer, inputs, obs_space, model_config)
 93 | 
 94 |         self.base_model = tf.keras.Model(inputs, [logits, value_out])
 95 |         self.register_variables(self.base_model.variables)
 96 | 
 97 |     def forward(self, input_dict, state, seq_lens):
 98 |         # explicit cast to float32 needed in eager
 99 |         logits, self._value_out = self.base_model(
100 |             tf.cast(input_dict["obs"], tf.float32)
101 |         )
102 |         return logits, state
103 | 
104 |     def value_function(self):
105 |         return tf.reshape(self._value_out, [-1])


--------------------------------------------------------------------------------
/sagemaker/source/custom/preprocessors/__init__.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | 
3 | from .custom_preprocessor import MyPreprocessorClass
4 | 
5 | CUSTOM_PREPROCESSORS = {"MyPreprocessor": MyPreprocessorClass}
6 | 


--------------------------------------------------------------------------------
/sagemaker/source/custom/preprocessors/custom_preprocessor.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | from ray.rllib.models.preprocessors import Preprocessor
 4 | 
 5 | 
 6 | class MyPreprocessorClass(Preprocessor):
 7 |     """Custom preprocessing for observations
 8 | 
 9 |     Adopted from https://docs.ray.io/en/master/rllib-models.html#custom-preprocessors
10 |     """
11 | 
12 |     def _init_shape(self, obs_space, options):
13 |         return obs_space.shape  # New shape after preprocessing
14 | 
15 |     def transform(self, observation):
16 |         # Do your custom stuff
17 |         return observation
18 | 


--------------------------------------------------------------------------------
/sagemaker/source/custom/setup.py:
--------------------------------------------------------------------------------
1 | from distutils.core import setup
2 | 
3 | setup(name='custom_rl_estimator',
4 |       version='1.0',
5 |       description='Custom scripts for RLEstimator.',
6 |       packages=['algorithms', 'envs', 'models', 'preprocessors']
7 |      )
8 | 


--------------------------------------------------------------------------------
/sagemaker/source/procgen_ray_launcher.py:
--------------------------------------------------------------------------------
  1 | # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
  2 | # 
  3 | # Licensed under the Apache License, Version 2.0 (the "License").
  4 | # You may not use this file except in compliance with the License.
  5 | # A copy of the License is located at
  6 | # 
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | # 
  9 | # or in the "license" file accompanying this file. This file is distributed 
 10 | # on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either 
 11 | # express or implied. See the License for the specific language governing 
 12 | # permissions and limitations under the License.
 13 | 
 14 | import os
 15 | import json
 16 | import subprocess
 17 | from shutil import copytree
 18 | 
 19 | import gym
 20 | import ray
 21 | from ray.tune.registry import register_env
 22 | from ray.tune import registry
 23 | from ray.tune.tune import run_experiments, run, _make_scheduler
 24 | from ray.tune.experiment import convert_to_experiment_list, Experiment
 25 | 
 26 | from sagemaker_rl.ray_launcher import SageMakerRayLauncher
 27 | from sagemaker_rl.tf_serving_utils import export_tf_serving, natural_keys
 28 | 
 29 | try:
 30 |     from custom.callbacks import CustomCallbacks
 31 | except ModuleNotFoundError:
 32 |     from callbacks import CustomCallbacks
 33 | 
 34 | TERMINATION_SIGNAL = "JOB_TERMINATED"
 35 | MODEL_OUTPUT_DIR = "/opt/ml/model"
 36 | CHECKPOINTS_DIR = '/opt/ml/checkpoints'
 37 | 
 38 | def custom_sync_func(source, target):
 39 |     """Custom rsync cmd to sync experiment artifact from remote nodes to driver node.
 40 |     """
 41 |     sync_cmd = 'rsync -havP --inplace --stats -e "ssh -i /root/.ssh/id_rsa" {source} {target}'.format(
 42 |         source=source, target=target
 43 |     )
 44 | 
 45 |     sync_process = subprocess.Popen(sync_cmd, shell=True)
 46 |     sync_process.wait()
 47 | 
 48 | 
 49 | class ProcgenSageMakerRayLauncher(SageMakerRayLauncher):
 50 |     """Launcher class for Procgen experiments using Ray-RLLib.
 51 |     Customers should sub-class this, fill in the required methods, and
 52 |     call .train_main() to start a training process.
 53 | 
 54 |     Example::
 55 | 
 56 |         class MyLauncher(ProcgenSageMakerRayLauncher):
 57 |             def register_env_creator(self):
 58 |                 register_env(
 59 |                     "stacked_procgen_env",  # This should be different from procgen_env_wrapper
 60 |                     lambda config: gym.wrappers.FrameStack(ProcgenEnvWrapper(config), 4)
 61 |                 )
 62 | 
 63 |             def get_experiment_config(self):
 64 |                 return {
 65 |                   "training": {
 66 |                     "env": "procgen_env_wrapper",
 67 |                     "run": "PPO",
 68 |                     ...
 69 |                   }
 70 |                 }
 71 | 
 72 |         if __name__ == "__main__":
 73 |             MyLauncher().train_main()
 74 |     """
 75 |     def register_algorithms_and_preprocessors(self):
 76 |         raise NotImplementedError()
 77 | 
 78 |     def create_tf_serving_model(self, algorithm=None, env_string=None):
 79 |         self.register_env_creator()
 80 |         self.register_algorithms_and_preprocessors()
 81 |         if ray.__version__ >= "0.6.5":
 82 |             from ray.rllib.agents.registry import get_agent_class
 83 |         else:
 84 |             from ray.rllib.agents.agent import get_agent_class
 85 |         cls = get_agent_class(algorithm)
 86 |         with open(os.path.join(MODEL_OUTPUT_DIR, "params.json")) as config_json:
 87 |             config = json.load(config_json)
 88 |         use_torch = config.get("use_pytorch", False)
 89 |         if not use_torch:
 90 |             if 'callbacks' in config:
 91 |                 callback_cls_str = config['callbacks'] # "<class 'custom.callbacks.CustomCallbacks'>",
 92 |                 callback_cls = callback_cls_str.split("'")[-2].split(".")[-1] # CustomCallbacks
 93 |                 config['callbacks'] = eval(callback_cls)
 94 |             print("Loaded config for TensorFlow serving.")
 95 |             config["monitor"] = False
 96 |             config["num_workers"] = 1
 97 |             config["num_gpus"] = 0
 98 |             agent = cls(env=env_string, config=config)
 99 |             checkpoint = os.path.join(MODEL_OUTPUT_DIR, "checkpoint")
100 |             agent.restore(checkpoint)
101 |             export_tf_serving(agent, MODEL_OUTPUT_DIR)
102 | 
103 |     def find_checkpoint_path_for_spot(self, prefix):
104 |         ckpts = []
105 |         ckpts_prefix = ''
106 |         for root, directories, files in os.walk(prefix):
107 |             for directory in directories:
108 |                 if directory.startswith("checkpoint"):
109 |                     if not ckpts_prefix:
110 |                         ckpts_prefix = root
111 |                     ckpts.append(directory)
112 |         return ckpts_prefix, ckpts
113 | 
114 |     def find_checkpoint_file_for_spot(self, prefix):
115 |         ckpts_prefix, ckpts = self.find_checkpoint_path_for_spot(prefix)
116 |         if not ckpts:
117 |             return ""
118 |         else:
119 |             ckpts.sort(key=natural_keys)
120 |             ckpt_name = ckpts[-1].replace("_", "-")
121 |             return os.path.join(ckpts_prefix, ckpts[-1], ckpt_name)
122 | 
123 |     def launch(self):
124 |         """Actual entry point into the class instance where everything happens.
125 |         """
126 |         self.register_env_creator()
127 |         self.register_algorithms_and_preprocessors()
128 |         experiment_config, args, verbose = self.get_experiment_config()
129 | 
130 |         # All worker nodes will block at this step during training
131 |         ray_cluster_config = self.ray_init_config()
132 |         if not self.is_master_node:
133 |             return
134 |         ray_custom_cluster_config = {
135 |             "object_store_memory": args.ray_object_store_memory,
136 |             "memory": args.ray_memory,
137 |             "redis_max_memory": args.ray_redis_max_memory,
138 |             "num_cpus": args.ray_num_cpus,
139 |             "num_gpus": args.ray_num_gpus
140 |         }
141 |         all_wokers_host_names = self.get_all_host_names()[1:]
142 |         # Overwrite redis address for single instance job
143 |         if len(all_wokers_host_names) == 0:
144 |             ray_custom_cluster_config.update({"address": args.ray_address})
145 |         ray_cluster_config.update(ray_custom_cluster_config)
146 | 
147 |         # Start the driver on master node
148 |         ray.init(**ray_cluster_config)
149 | 
150 |         # Spot instance is back
151 |         if os.path.exists(CHECKPOINTS_DIR) and os.listdir(CHECKPOINTS_DIR):
152 |             print("Instance is back. Local checkpoint path detected.")
153 |             # Sample path in ckpt channel: opt/ml/checkpoints/training/PPO_procgen_<xxx>/checkpoint_50/checkpoint-50
154 |             checkpoint_file = self.find_checkpoint_file_for_spot(CHECKPOINTS_DIR)
155 |             print("Setting checkpoint path to {}".format(checkpoint_file))
156 |             if checkpoint_file:
157 |                 experiment_config['training']['restore'] = checkpoint_file # Overwrite
158 |         experiment_config = self.customize_experiment_config(experiment_config)
159 |         experiment_config = self.set_up_checkpoint(experiment_config)
160 |         experiment_config['training']['sync_to_driver'] = custom_sync_func
161 | 
162 |         run_experiments(
163 |             experiment_config,
164 |             scheduler=_make_scheduler(args),
165 |             queue_trials=args.queue_trials,
166 |             resume=args.resume,
167 |             verbose=verbose,
168 |             concurrent=True
169 |             )
170 |         # If distributed job, send TERMINATION_SIGNAL to all workers.
171 |         if len(all_wokers_host_names) > 0:
172 |             self.sage_cluster_communicator.create_s3_signal(TERMINATION_SIGNAL)
173 | 
174 |     @classmethod
175 |     def train_main(cls):
176 |         """main function that kicks things off
177 |         """
178 |         launcher = cls()
179 |         launcher.launch()
180 | 


--------------------------------------------------------------------------------
/sagemaker/source/ray_experiment_builder.py:
--------------------------------------------------------------------------------
  1 | # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
  2 | # 
  3 | # Licensed under the Apache License, Version 2.0 (the "License").
  4 | # You may not use this file except in compliance with the License.
  5 | # A copy of the License is located at
  6 | # 
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | # 
  9 | # or in the "license" file accompanying this file. This file is distributed 
 10 | # on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either 
 11 | # express or implied. See the License for the specific language governing 
 12 | # permissions and limitations under the License.
 13 | 
 14 | import os
 15 | from pathlib import Path
 16 | import argparse
 17 | import json
 18 | import yaml
 19 | 
 20 | from ray.tune.result import DEFAULT_RESULTS_DIR
 21 | from ray.tune.resources import resources_to_json, json_to_resources
 22 | 
 23 | try:
 24 |     from custom.callbacks import CustomCallbacks
 25 | except ModuleNotFoundError:
 26 |     from callbacks import CustomCallbacks
 27 | 
 28 | class RayExperimentBuilder:
 29 |     EXAMPLE_USAGE = """
 30 |         Training example:
 31 |             python ./train.py --run DQN --env CartPole-v0
 32 | 
 33 |         Training with Config:
 34 |             python ./train.py -f experiments/simple-corridor-0.yaml
 35 | 
 36 | 
 37 |         Note that -f overrides all other trial-specific command-line options.
 38 |         """
 39 |     def __init__(self, **kwargs):
 40 |         parser = self.create_parser()
 41 |         self.args, _ = parser.parse_known_args()
 42 |         
 43 |         if kwargs is not None:
 44 |             for k, v in kwargs.items():
 45 |                 self.args.__dict__[k] = v
 46 | 
 47 |         # Convert jsons to dicts in local mode
 48 |         self.args.scheduler_config = self.try_convert_json_to_dict(self.args.scheduler_config)
 49 |         self.args.config = self.try_convert_json_to_dict(self.args.config)
 50 |         self.args.stop = self.try_convert_json_to_dict(self.args.stop)
 51 |             
 52 |     def try_convert_json_to_dict(self, json_string):
 53 |         try:
 54 |             return json.loads(json_string)
 55 |         except TypeError:
 56 |             return json_string
 57 |         
 58 |     def make_parser(self, **kwargs):
 59 |         #TODO import method from starter-kit
 60 |         # Taken from https://github.com/ray-project/ray/blob/5303c3abe322cbd90f75bcf03ee1f9c3dad23aae/python/ray/tune/config_parser.py
 61 |         parser = argparse.ArgumentParser(**kwargs)
 62 |         
 63 |         parser.add_argument(
 64 |             "--run",
 65 |             default=None,
 66 |             type=str,
 67 |             help="The algorithm or model to train. This may refer to the name "
 68 |             "of a built-on algorithm (e.g. RLLib's DQN or PPO), or a "
 69 |             "user-defined trainable function or class registered in the "
 70 |             "tune registry.")
 71 |         parser.add_argument(
 72 |             "--stop",
 73 |             default="{}",
 74 |             help="The stopping criteria, specified in JSON. The keys may be any "
 75 |             "field returned by 'train()' e.g. "
 76 |             "'{\"time_total_s\": 600, \"training_iteration\": 100000}' to stop "
 77 |             "after 600 seconds or 100k iterations, whichever is reached first.")
 78 |         parser.add_argument(
 79 |             "--config",
 80 |             default="{}",
 81 |             help="Algorithm-specific configuration (e.g. env, hyperparams), "
 82 |             "specified in JSON.")
 83 |         parser.add_argument(
 84 |             "--resources-per-trial",
 85 |             default=None,
 86 |             type=json_to_resources,
 87 |             help="Override the machine resources to allocate per trial, e.g. "
 88 |             "'{\"cpu\": 64, \"gpu\": 8}'. Note that GPUs will not be assigned "
 89 |             "unless you specify them here. For RLlib, you probably want to "
 90 |             "leave this alone and use RLlib configs to control parallelism.")
 91 |         parser.add_argument(
 92 |             "--num-samples",
 93 |             default=1,
 94 |             type=int,
 95 |             help="Number of times to repeat each trial.")
 96 |         parser.add_argument(
 97 |             "--checkpoint-freq",
 98 |             default=0,
 99 |             type=int,
100 |             help="How many training iterations between checkpoints. "
101 |             "A value of 0 (default) disables checkpointing.")
102 |         parser.add_argument(
103 |             "--checkpoint-at-end",
104 |             action="store_true",
105 |             help="Whether to checkpoint at the end of the experiment. "
106 |             "Default is False.")
107 |         parser.add_argument(
108 |             "--sync-on-checkpoint",
109 |             action="store_true",
110 |             help="Enable sync-down of trial checkpoint to guarantee "
111 |             "recoverability. If unset, checkpoint syncing from worker "
112 |             "to driver is asynchronous, so unset this only if synchronous "
113 |             "checkpointing is too slow and trial restoration failures "
114 |             "can be tolerated.")
115 |         parser.add_argument(
116 |             "--keep-checkpoints-num",
117 |             default=None,
118 |             type=int,
119 |             help="Number of best checkpoints to keep. Others get "
120 |             "deleted. Default (None) keeps all checkpoints.")
121 |         parser.add_argument(
122 |             "--checkpoint-score-attr",
123 |             default="training_iteration",
124 |             type=str,
125 |             help="Specifies by which attribute to rank the best checkpoint. "
126 |             "Default is increasing order. If attribute starts with min- it "
127 |             "will rank attribute in decreasing order. Example: "
128 |             "min-validation_loss")
129 |         parser.add_argument(
130 |             "--export-formats",
131 |             default=None,
132 |             help="List of formats that exported at the end of the experiment. "
133 |             "Default is None. For RLlib, 'checkpoint' and 'model' are "
134 |             "supported for TensorFlow policy graphs.")
135 |         parser.add_argument(
136 |             "--max-failures",
137 |             default=3,
138 |             type=int,
139 |             help="Try to recover a trial from its last checkpoint at least this "
140 |             "many times. Only applies if checkpointing is enabled.")
141 |         parser.add_argument(
142 |             "--scheduler",
143 |             default="FIFO",
144 |             type=str,
145 |             help="FIFO (default), MedianStopping, AsyncHyperBand, "
146 |             "HyperBand, or HyperOpt.")
147 |         parser.add_argument(
148 |             "--scheduler-config",
149 |             default="{}",
150 |             help="Config options to pass to the scheduler.")
151 | 
152 |         # Note: this currently only makes sense when running a single trial
153 |         parser.add_argument(
154 |             "--restore",
155 |             default=None,
156 |             type=str,
157 |             help="If specified, restore from this checkpoint.")
158 | 
159 |         return parser
160 | 
161 |     def create_parser(self):
162 |         parser = self.make_parser(
163 |             formatter_class=argparse.RawDescriptionHelpFormatter,
164 |             description="Train a reinforcement learning agent.",
165 |             epilog=self.EXAMPLE_USAGE,
166 |         )
167 | 
168 |         # See also the base parser definition in ray/tune/config_parser.py
169 |         parser.add_argument(
170 |             "--ray-address",
171 |             default=None,
172 |             type=str,
173 |             help="Connect to an existing Ray cluster at this address instead "
174 |             "of starting a new one.")
175 |         parser.add_argument(
176 |             "--ray-num-cpus",
177 |             default=None,
178 |             type=int,
179 |             help="--num-cpus to use if starting a new cluster.")
180 |         parser.add_argument(
181 |             "--ray-num-gpus",
182 |             default=None,
183 |             type=int,
184 |             help="--num-gpus to use if starting a new cluster.")
185 |         parser.add_argument(
186 |             "--ray-num-nodes",
187 |             default=None,
188 |             type=int,
189 |             help="Emulate multiple cluster nodes for debugging.")
190 |         parser.add_argument(
191 |             "--ray-redis-max-memory",
192 |             default=None,
193 |             type=int,
194 |             help="--redis-max-memory to use if starting a new cluster.")
195 |         parser.add_argument(
196 |             "--ray-memory",
197 |             default=None,
198 |             type=int,
199 |             help="--memory to use if starting a new cluster.")
200 |         parser.add_argument(
201 |             "--ray-object-store-memory",
202 |             default=None,
203 |             type=int,
204 |             help="--object-store-memory to use if starting a new cluster.")
205 |         parser.add_argument(
206 |             "--experiment-name",
207 |             default="default",
208 |             type=str,
209 |             help="Name of the subdirectory under `local_dir` to put results in.")
210 |         parser.add_argument(
211 |             "--local-dir",
212 |             default=DEFAULT_RESULTS_DIR,
213 |             type=str,
214 |             help="Local dir to save training results to. Defaults to '{}'.".format(
215 |                 DEFAULT_RESULTS_DIR))
216 |         parser.add_argument(
217 |             "--upload-dir",
218 |             default="",
219 |             type=str,
220 |             help="Optional URI to sync training results to (e.g. s3://bucket).")
221 |         parser.add_argument(
222 |             "-v", action="store_true", help="Whether to use INFO level logging.")
223 |         parser.add_argument(
224 |             "-vv", action="store_true", help="Whether to use DEBUG level logging.")
225 |         parser.add_argument(
226 |             "--resume",
227 |             action="store_true",
228 |             help="Whether to attempt to resume previous Tune experiments.")
229 |         parser.add_argument(
230 |             "--torch",
231 |             action="store_true",
232 |             help="Whether to use PyTorch (instead of tf) as the DL framework.")
233 |         parser.add_argument(
234 |             "--eager",
235 |             action="store_true",
236 |             help="Whether to attempt to enable TF eager execution.")
237 |         parser.add_argument(
238 |             "--trace",
239 |             action="store_true",
240 |             help="Whether to attempt to enable tracing for eager mode.")
241 |         parser.add_argument(
242 |             "--env", default=None, type=str, help="The gym environment to use.")
243 |         parser.add_argument(
244 |             "--queue-trials",
245 |             action="store_true",
246 |             help=(
247 |                 "Whether to queue trials when the cluster does not currently have "
248 |                 "enough resources to launch one. This should be set to True when "
249 |                 "running on an autoscaling cluster to enable automatic scale-up."))
250 |         parser.add_argument(
251 |             "-f",
252 |             "--config-file",
253 |             default=None,
254 |             type=str,
255 |             help="If specified, use config options from this file. Note that this "
256 |             "overrides any trial-specific options set via flags above.")
257 | 
258 |         return parser
259 | 
260 |     def get_experiment_definition(self):
261 |         if self.args.config_file:
262 |             with open(self.args.config_file) as f:
263 |                 experiments = yaml.safe_load(f)
264 |                 exp_name_list = list(experiments.keys())
265 |                 assert len(exp_name_list)==1
266 |                 # overwirte experiment name for SageMaker to recognize
267 |                 experiments['training'] = experiments.pop(exp_name_list[0])
268 |         else:
269 |             experiments = {
270 |                 self.args.experiment_name: {  # i.e. log to ~/ray_results/default
271 |                     "run": self.args.run,
272 |                     "checkpoint_freq": self.args.checkpoint_freq,
273 |                     "keep_checkpoints_num": self.args.keep_checkpoints_num,
274 |                     "checkpoint_score_attr": self.args.checkpoint_score_attr,
275 |                     "local_dir": self.args.local_dir,
276 |                     "resources_per_trial": (
277 |                         self.args.resources_per_trial and
278 |                         resources_to_json(self.args.resources_per_trial)),
279 |                     "stop": self.args.stop,
280 |                     "config": dict(self.args.config, env=self.args.env),
281 |                     "restore": self.args.restore,
282 |                     "num_samples": self.args.num_samples,
283 |                     "upload_dir": self.args.upload_dir,
284 |                 }
285 |             }
286 | 
287 |         verbose = 1
288 |         for exp in experiments.values():
289 |             # Bazel makes it hard to find files specified in `args` (and `data`).
290 |             # Look for them here.
291 |             # NOTE: Some of our yaml files don't have a `config` section.
292 |             if exp.get("config", {}).get("input") and \
293 |                     not os.path.exists(exp["config"]["input"]):
294 |                 # This script runs in the ray/rllib dir.
295 |                 rllib_dir = Path(__file__).parent
296 |                 input_file = rllib_dir.absolute().joinpath(exp["config"]["input"])
297 |                 exp["config"]["input"] = str(input_file)
298 |             
299 |             if not exp.get("run"):
300 |                 raise ValueError("The following arguments are required: run")
301 |             if not exp.get("env") and not exp.get("config", {}).get("env"):
302 |                 raise ValueError("The following arguments are required: env")
303 | 
304 |             if self.args.eager:
305 |                 exp["config"]["eager"] = True
306 |             if self.args.torch:
307 |                 exp["config"]["use_pytorch"] = True
308 |             if self.args.v:
309 |                 exp["config"]["log_level"] = "INFO"
310 |                 verbose = 2
311 |             if self.args.vv:
312 |                 exp["config"]["log_level"] = "DEBUG"
313 |                 verbose = 3
314 |             if self.args.trace:
315 |                 if not exp["config"].get("eager"):
316 |                     raise ValueError("Must enable --eager to enable tracing.")
317 |                 exp["config"]["eager_tracing"] = True
318 |                 
319 |             ### Add Custom Callbacks
320 |             exp["config"]["callbacks"] = CustomCallbacks
321 |         return experiments, self.args, verbose
322 | 


--------------------------------------------------------------------------------
/sagemaker/source/requirements.txt:
--------------------------------------------------------------------------------
1 | ray[rllib]==0.8.5
2 | procgen==0.10.1


--------------------------------------------------------------------------------
/sagemaker/source/train-local.py:
--------------------------------------------------------------------------------
 1 | # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
 2 | # 
 3 | # Licensed under the Apache License, Version 2.0 (the "License").
 4 | # You may not use this file except in compliance with the License.
 5 | # A copy of the License is located at
 6 | # 
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | # 
 9 | # or in the "license" file accompanying this file. This file is distributed 
10 | # on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either 
11 | # express or implied. See the License for the specific language governing 
12 | # permissions and limitations under the License.
13 | 
14 | #!/usr/bin/env python
15 | 
16 | import os
17 | from pathlib import Path
18 | import yaml
19 | 
20 | import ray
21 | from ray.tune.tune import _make_scheduler, run_experiments
22 | from ray.rllib.models import ModelCatalog
23 | from ray.rllib.utils.framework import try_import_tf, try_import_torch
24 | 
25 | from utils.loader import load_envs, load_models, load_algorithms, load_preprocessors
26 | from ray_experiment_builder import RayExperimentBuilder
27 | 
28 | # Try to import both backends for flag checking/warnings.
29 | tf = try_import_tf()
30 | torch, _ = try_import_torch()
31 | 
32 | from custom.models.my_vision_network import MyVisionNetwork
33 | 
34 | """
35 | Note : This script has been adapted from :
36 |     https://github.com/ray-project/ray/blob/master/rllib/train.py and https://github.com/AIcrowd/neurips2020-procgen-starter-kit/blob/master/train.py
37 | """
38 | 
39 | load_envs(os.path.join("custom", "envs"))
40 | load_models((os.path.join("custom", "models"))) # Load models
41 | # Load custom algorithms
42 | from custom.algorithms import CUSTOM_ALGORITHMS
43 | load_algorithms(CUSTOM_ALGORITHMS)
44 | # Load custom preprocessors
45 | from custom.preprocessors import CUSTOM_PREPROCESSORS
46 | load_preprocessors(CUSTOM_PREPROCESSORS)
47 | 
48 | print(ray.rllib.contrib.registry.CONTRIBUTED_ALGORITHMS)
49 | 
50 | def run():
51 |     ModelCatalog.register_custom_model("my_vision_network", MyVisionNetwork)
52 |     config={
53 |         "model":{
54 |             "custom_model": "my_vision_network",
55 |             "conv_filters": [[16, [5, 5], 4], [32, [3, 3], 1], [256, [3, 3], 1]],
56 |             "custom_preprocessor": None
57 |         }
58 |     }
59 |         
60 |     reb = RayExperimentBuilder(**config)
61 |     experiments, args, verbose = reb.get_experiment_definition()
62 |     
63 |     ray.init(
64 |         address=args.ray_address,
65 |         object_store_memory=args.ray_object_store_memory,
66 |         memory=args.ray_memory,
67 |         redis_max_memory=args.ray_redis_max_memory,
68 |         num_cpus=args.ray_num_cpus,
69 |         num_gpus=args.ray_num_gpus)
70 | 
71 |     run_experiments(
72 |         experiments,
73 |         scheduler=_make_scheduler(args),
74 |         queue_trials=args.queue_trials,
75 |         resume=args.resume,
76 |         verbose=verbose,
77 |         concurrent=True)
78 | 
79 | if __name__ == "__main__":
80 |     run()
81 | 


--------------------------------------------------------------------------------
/sagemaker/source/train-sagemaker-distributed-cpu.py:
--------------------------------------------------------------------------------
  1 | # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
  2 | # 
  3 | # Licensed under the Apache License, Version 2.0 (the "License").
  4 | # You may not use this file except in compliance with the License.
  5 | # A copy of the License is located at
  6 | # 
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | # 
  9 | # or in the "license" file accompanying this file. This file is distributed 
 10 | # on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either 
 11 | # express or implied. See the License for the specific language governing 
 12 | # permissions and limitations under the License.
 13 | 
 14 | import os
 15 | import json
 16 | import gym
 17 | import ray
 18 | 
 19 | from ray.tune.registry import register_env
 20 | from ray.rllib.models import ModelCatalog
 21 | 
 22 | from procgen_ray_launcher import ProcgenSageMakerRayLauncher
 23 | 
 24 | from ray_experiment_builder import RayExperimentBuilder
 25 | 
 26 | from utils.loader import load_algorithms, load_preprocessors
 27 | try:
 28 |     from custom.envs.procgen_env_wrapper import ProcgenEnvWrapper
 29 | except ModuleNotFoundError:
 30 |     from envs.procgen_env_wrapper import ProcgenEnvWrapper
 31 | 
 32 | class MyLauncher(ProcgenSageMakerRayLauncher):
 33 |     def register_env_creator(self):        
 34 |         register_env(
 35 |             "stacked_procgen_env",  # This should be different from procgen_env_wrapper
 36 |             lambda config: gym.wrappers.FrameStack(ProcgenEnvWrapper(config), 4)
 37 |         )
 38 | 
 39 |     def _get_ray_config(self):
 40 |         return {
 41 |             # Leave "ray_num_cpus" and "ray_num_gpus" blank for multi-instance training
 42 |             # "ray_num_cpus": 16,
 43 |             # "ray_num_gpus": 0,
 44 |             "eager": False,
 45 |              "v": True, # requried for CW to catch the progress
 46 |         }
 47 | 
 48 |     def _get_rllib_config(self):
 49 |         return {
 50 |             "experiment_name": "training",
 51 |             "run": "PPO",
 52 |             "env": "procgen_env_wrapper",
 53 |             "stop": {
 54 |                 # 'time_total_s': 60,
 55 |                 'training_iteration': 4000,
 56 |                 },
 57 |             "checkpoint_freq": 1,
 58 |             "config": {
 59 |                 "ignore_worker_failures": True,
 60 |                 "gamma": 0.999,
 61 |                 "kl_coeff": 0.2,
 62 |                 "lambda": 0.9,
 63 |                 "lr": 0.0001,
 64 |                 "num_workers": 16*2 - 1, # adjust based on total number of CPUs available in the cluster
 65 |                 "num_gpus": 0, # adjust based on number of GPUs available in a single node
 66 |                 "rollout_fragment_length": 140,
 67 |                 "train_batch_size": 2048,
 68 |                 "batch_mode": "truncate_episodes",
 69 |                 "num_sgd_iter": 10,
 70 |                 "use_pytorch": False,
 71 |                 "model": {
 72 | #                     "custom_model": "my_vision_network",
 73 | #                     "conv_filters": [[16, [5, 5], 4], [32, [3, 3], 1], [256, [3, 3], 1]],
 74 |                     "custom_model": "impala_cnn_tf",
 75 |                 },
 76 |                 "env_config": {
 77 |                     # See https://github.com/AIcrowd/neurips2020-procgen-starter-kit/blob/master/experiments/procgen-starter-example.yaml#L34 for an explaination.
 78 |                     "env_name": "coinrun",
 79 |                     "num_levels": 0,
 80 |                     "start_level": 0,
 81 |                     "paint_vel_info": False,
 82 |                     "use_generated_assets": False,
 83 |                     "center_agent": True,
 84 |                     "use_sequential_levels": False,
 85 |                     "distribution_mode": "easy"
 86 |                 }
 87 |             },
 88 |             "queue_trials": True,
 89 |             # Uncomment if you want to use a config_file
 90 |             # Note that this overrides any options set above
 91 |             # "config_file": path/to/your/config/file
 92 |         }
 93 |     
 94 |     def register_algorithms_and_preprocessors(self):
 95 |         try:
 96 |             from custom.algorithms import CUSTOM_ALGORITHMS
 97 |             from custom.preprocessors import CUSTOM_PREPROCESSORS
 98 |             from custom.models.my_vision_network import MyVisionNetwork
 99 |             from custom.models.impala_cnn_tf import ImpalaCNN
100 |         except ModuleNotFoundError:
101 |             from algorithms import CUSTOM_ALGORITHMS
102 |             from preprocessors import CUSTOM_PREPROCESSORS
103 |             from models.my_vision_network import MyVisionNetwork
104 |             from models.impala_cnn_tf import ImpalaCNN
105 | 
106 |         load_algorithms(CUSTOM_ALGORITHMS)
107 |         
108 |         load_preprocessors(CUSTOM_PREPROCESSORS)
109 |         ModelCatalog.register_custom_model("my_vision_network", MyVisionNetwork)
110 |         ModelCatalog.register_custom_model("impala_cnn_tf", ImpalaCNN)
111 |     
112 |     def get_experiment_config(self):
113 |         params = dict(self._get_ray_config())
114 |         params.update(self._get_rllib_config())
115 |         reb = RayExperimentBuilder(**params)
116 |         return reb.get_experiment_definition()
117 | 
118 | if __name__ == "__main__":
119 |     MyLauncher().train_main()
120 | 


--------------------------------------------------------------------------------
/sagemaker/source/train-sagemaker-distributed-gpu.py:
--------------------------------------------------------------------------------
  1 | # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
  2 | # 
  3 | # Licensed under the Apache License, Version 2.0 (the "License").
  4 | # You may not use this file except in compliance with the License.
  5 | # A copy of the License is located at
  6 | # 
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | # 
  9 | # or in the "license" file accompanying this file. This file is distributed 
 10 | # on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either 
 11 | # express or implied. See the License for the specific language governing 
 12 | # permissions and limitations under the License.
 13 | 
 14 | import os
 15 | import json
 16 | import gym
 17 | import ray
 18 | 
 19 | from ray.tune.registry import register_env
 20 | from ray.rllib.models import ModelCatalog
 21 | 
 22 | from procgen_ray_launcher import ProcgenSageMakerRayLauncher
 23 | 
 24 | from ray_experiment_builder import RayExperimentBuilder
 25 | 
 26 | from utils.loader import load_algorithms, load_preprocessors
 27 | try:
 28 |     from custom.envs.procgen_env_wrapper import ProcgenEnvWrapper
 29 | except ModuleNotFoundError:
 30 |     from envs.procgen_env_wrapper import ProcgenEnvWrapper
 31 | 
 32 | class MyLauncher(ProcgenSageMakerRayLauncher):
 33 |     def register_env_creator(self):        
 34 |         register_env(
 35 |             "stacked_procgen_env",  # This should be different from procgen_env_wrapper
 36 |             lambda config: gym.wrappers.FrameStack(ProcgenEnvWrapper(config), 4)
 37 |         )
 38 | 
 39 |     def _get_ray_config(self):
 40 |         return {
 41 |             # Leave "ray_num_cpus" and "ray_num_gpus" blank for multi-instance training
 42 |             # "ray_num_cpus": 16,
 43 |             # "ray_num_gpus": 0,
 44 |             "eager": False,
 45 |              "v": True, # requried for CW to catch the progress
 46 |         }
 47 | 
 48 |     def _get_rllib_config(self):
 49 |         return {
 50 |             "experiment_name": "training",
 51 |             "run": "PPO",
 52 |             "env": "procgen_env_wrapper",
 53 |             "stop": {
 54 |                 #'time_total_s': 60,
 55 |                 'training_iteration': 4000,
 56 |                 },
 57 |             "checkpoint_freq": 1,
 58 |             "config": {
 59 |                 "ignore_worker_failures": True,
 60 |                 "gamma": 0.999,
 61 |                 "kl_coeff": 0.2,
 62 |                 "lambda": 0.9,
 63 |                 "lr": 0.0001,
 64 |                 "num_workers": 8*2 -1, # adjust based on total number of CPUs available in the cluster, e.g., p3.2xlarge has 8 CPUs
 65 |                 "num_gpus": 0.2, # adjust based on number of GPUs available in a single node, e.g., p3.2xlarge has 1 GPU
 66 |                 "num_gpus_per_worker": 0.1, # adjust based on number of GPUs, e.g., p3.2x large (1 GPU - num_gpus) / num_workers = 0.1
 67 |                 "rollout_fragment_length": 140,
 68 |                 "train_batch_size": 256 * (8*2 -1),
 69 |                 "batch_mode": "truncate_episodes",
 70 |                 "num_sgd_iter": 10,
 71 |                 "use_pytorch": False,
 72 |                 "model": {
 73 |                    "custom_model": "impala_cnn_tf",
 74 |                 },
 75 |                 "env_config": {
 76 |                     # See https://github.com/AIcrowd/neurips2020-procgen-starter-kit/blob/master/experiments/procgen-starter-example.yaml#L34 for an explaination.
 77 |                     "env_name": "coinrun",
 78 |                     "num_levels": 0,
 79 |                     "start_level": 0,
 80 |                     "paint_vel_info": False,
 81 |                     "use_generated_assets": False,
 82 |                     "center_agent": True,
 83 |                     "use_sequential_levels": False,
 84 |                     "distribution_mode": "easy"
 85 |                 }
 86 |             },
 87 |             "queue_trials": True,
 88 |             # Uncomment if you want to use a config_file
 89 |             # Note that this overrides any options set above
 90 |             # "config_file": path/to/your/config/file
 91 |         }
 92 |     
 93 |     def register_algorithms_and_preprocessors(self):
 94 |         try:
 95 |             from custom.algorithms import CUSTOM_ALGORITHMS
 96 |             from custom.preprocessors import CUSTOM_PREPROCESSORS
 97 |             from custom.models.my_vision_network import MyVisionNetwork
 98 |             from custom.models.impala_cnn_tf import ImpalaCNN
 99 |         except ModuleNotFoundError:
100 |             from algorithms import CUSTOM_ALGORITHMS
101 |             from preprocessors import CUSTOM_PREPROCESSORS
102 |             from models.my_vision_network import MyVisionNetwork
103 |             from models.impala_cnn_tf import ImpalaCNN
104 | 
105 |         load_algorithms(CUSTOM_ALGORITHMS)
106 |         
107 |         load_preprocessors(CUSTOM_PREPROCESSORS)
108 |         ModelCatalog.register_custom_model("my_vision_network", MyVisionNetwork)
109 |         ModelCatalog.register_custom_model("impala_cnn_tf", ImpalaCNN)
110 |     
111 |     def get_experiment_config(self):
112 |         params = dict(self._get_ray_config())
113 |         params.update(self._get_rllib_config())
114 |         reb = RayExperimentBuilder(**params)
115 |         return reb.get_experiment_definition()
116 | 
117 | if __name__ == "__main__":
118 |     MyLauncher().train_main()
119 | 


--------------------------------------------------------------------------------
/sagemaker/source/train-sagemaker.py:
--------------------------------------------------------------------------------
  1 | # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
  2 | # 
  3 | # Licensed under the Apache License, Version 2.0 (the "License").
  4 | # You may not use this file except in compliance with the License.
  5 | # A copy of the License is located at
  6 | # 
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | # 
  9 | # or in the "license" file accompanying this file. This file is distributed 
 10 | # on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either 
 11 | # express or implied. See the License for the specific language governing 
 12 | # permissions and limitations under the License.
 13 | 
 14 | import os
 15 | import json
 16 | import gym
 17 | import ray
 18 | 
 19 | from ray.tune.registry import register_env
 20 | from ray.rllib.models import ModelCatalog
 21 | 
 22 | from procgen_ray_launcher import ProcgenSageMakerRayLauncher
 23 | 
 24 | from ray_experiment_builder import RayExperimentBuilder
 25 | 
 26 | from utils.loader import load_algorithms, load_preprocessors
 27 | try:
 28 |     from custom.envs.procgen_env_wrapper import ProcgenEnvWrapper
 29 | except ModuleNotFoundError:
 30 |     from envs.procgen_env_wrapper import ProcgenEnvWrapper
 31 | 
 32 | class MyLauncher(ProcgenSageMakerRayLauncher):
 33 |     def register_env_creator(self):        
 34 |         register_env(
 35 |             "stacked_procgen_env",  # This should be different from procgen_env_wrapper
 36 |             lambda config: gym.wrappers.FrameStack(ProcgenEnvWrapper(config), 4)
 37 |         )
 38 | 
 39 |     def _get_ray_config(self):
 40 |         return {
 41 |             "ray_num_cpus": 8, # adjust based on selected instance type
 42 |             "ray_num_gpus": 1,
 43 |             "eager": False,
 44 |              "v": True, # requried for CW to catch the progress
 45 |         }
 46 | 
 47 |     def _get_rllib_config(self):
 48 |         return {
 49 |             "experiment_name": "training",
 50 |             "run": "PPO",
 51 |             "env": "procgen_env_wrapper",
 52 |             "stop": {
 53 |                 # 'time_total_s': 60,
 54 |                 'training_iteration': 500,
 55 |                 },
 56 |             "checkpoint_freq": 20,
 57 |             "checkpoint_at_end": True,
 58 |             "keep_checkpoints_num": 5,
 59 |             "queue_trials": False,
 60 |             "config": {
 61 |                 # === Environment Settings ===
 62 |                 "gamma": 0.999,
 63 |                 "lambda": 0.95,
 64 |                 "lr": 5.0e-4,
 65 |                 "num_sgd_iter": 3,
 66 |                 "kl_coeff": 0.0,
 67 |                 "kl_target": 0.01,
 68 |                 "vf_loss_coeff": 0.5,
 69 |                 "entropy_coeff": 0.01,
 70 |                 "clip_param": 0.2,
 71 |                 "vf_clip_param": 0.2,
 72 |                 "grad_clip": 0.5,
 73 |                 "observation_filter": "NoFilter",
 74 |                 "vf_share_layers": True,
 75 |                 "soft_horizon": False,
 76 |                 "no_done_at_end": False,
 77 |                 "normalize_actions": False,
 78 |                 "clip_actions": True,
 79 |                 "ignore_worker_failures": True,
 80 |                 "use_pytorch": False,
 81 |                 "sgd_minibatch_size": 2048, # 8 minibatches per epoch
 82 |                 "train_batch_size": 16384, # 2048 * 8
 83 |                 # === Settings for Model ===
 84 |                 "model": {
 85 |                     "custom_model": "impala_cnn_tf",
 86 |                 },
 87 |                 # === Settings for Rollout Worker processes ===
 88 |                 "num_workers": 6, # adjust based on total number of CPUs available in the cluster, e.g., p3.2xlarge has 8 CPUs
 89 |                 "rollout_fragment_length": 140,
 90 |                 "batch_mode": "truncate_episodes",
 91 |                 # === Advanced Resource Settings ===
 92 |                 "num_envs_per_worker": 12,
 93 |                 "num_cpus_per_worker": 1,
 94 |                 "num_cpus_for_driver": 1,
 95 |                 "num_gpus_per_worker": 0.1,
 96 |                 # === Settings for the Trainer process ===
 97 |                 "num_gpus": 0.3, # adjust based on number of GPUs available in a single node, e.g., p3.2xlarge has 1 GPU
 98 |                 # === Exploration Settings ===
 99 |                 "explore": True,
100 |                 "exploration_config": {
101 |                     "type": "StochasticSampling",
102 |                 },
103 |                 # === Settings for the Procgen Environment ===
104 |                 "env_config": {
105 |                     # See https://github.com/AIcrowd/neurips2020-procgen-starter-kit/blob/master/experiments/procgen-starter-example.yaml#L34 for an explaination.
106 |                     "env_name": "coinrun",
107 |                     "num_levels": 0,
108 |                     "start_level": 0,
109 |                     "paint_vel_info": False,
110 |                     "use_generated_assets": False,
111 |                     "center_agent": True,
112 |                     "use_sequential_levels": False,
113 |                     "distribution_mode": "easy"
114 |                 }
115 |             }
116 |         }
117 |     
118 |     def register_algorithms_and_preprocessors(self):
119 |         try:
120 |             from custom.algorithms import CUSTOM_ALGORITHMS
121 |             from custom.preprocessors import CUSTOM_PREPROCESSORS
122 |             from custom.models.impala_cnn_tf import ImpalaCNN
123 |         except ModuleNotFoundError:
124 |             from algorithms import CUSTOM_ALGORITHMS
125 |             from preprocessors import CUSTOM_PREPROCESSORS
126 |             from models.impala_cnn_tf import ImpalaCNN
127 | 
128 |         load_algorithms(CUSTOM_ALGORITHMS)
129 |         load_preprocessors(CUSTOM_PREPROCESSORS)
130 |         ModelCatalog.register_custom_model("impala_cnn_tf", ImpalaCNN)
131 | 
132 |     def get_experiment_config(self):
133 |         params = dict(self._get_ray_config())
134 |         params.update(self._get_rllib_config())
135 |         reb = RayExperimentBuilder(**params)
136 |         return reb.get_experiment_definition()
137 | 
138 | 
139 | if __name__ == "__main__":
140 |     MyLauncher().train_main()
141 | 


--------------------------------------------------------------------------------
/sagemaker/source/utils/inference.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import json
  3 | import os
  4 | import collections
  5 | import re
  6 | import sys
  7 | sys.path.append("neurips2020-procgen-starter-kit")
  8 | 
  9 | import gym
 10 | import sagemaker
 11 | import boto3
 12 | 
 13 | from rollout import default_policy_agent_mapping, keep_going, DefaultMapping, RolloutSaver
 14 | from ray.rllib.policy.sample_batch import DEFAULT_POLICY_ID
 15 | from ray.rllib.env.base_env import _DUMMY_AGENT_ID
 16 | from ray.rllib.env import MultiAgentEnv
 17 | try:
 18 |     from ray.rllib.evaluation.episode import _flatten_action
 19 | except Exception:
 20 |     # For newer ray versions
 21 |     from ray.rllib.utils.space_utils import flatten_to_single_ndarray as _flatten_action
 22 | 
 23 | from ray.rllib.evaluation.worker_set import WorkerSet
 24 | 
 25 | from source.custom.callbacks import CustomCallbacks
 26 | 
 27 | def get_latest_sagemaker_training_job(name_contains):
 28 |     sagemaker_session = sagemaker.Session()
 29 |     sagemaker_client = boto3.client('sagemaker')
 30 |     response = sagemaker_client.list_training_jobs(
 31 |         NameContains=name_contains,
 32 |         StatusEquals='Completed'
 33 |     )
 34 |     training_jobs = response['TrainingJobSummaries']
 35 |     assert len(training_jobs) > 0, "Couldn't find any completed training jobs with '{}' in name.".format(name_contains)
 36 |     latest_training_job = training_jobs[0]['TrainingJobName']
 37 |     return latest_training_job
 38 | 
 39 | def download_ray_checkpoint(checkpoint_dir, s3_bucket, latest_training_job):
 40 |     # Get last checkpoint
 41 |     checkpoint_data = "{}/{}/output/intermediate/training".format(s3_bucket, latest_training_job)
 42 |     checkpoint_bucket_key = "/".join(checkpoint_data.split("/")[1:]) + "/"
 43 | 
 44 |     s3 = boto3.client('s3')
 45 |     intermediate = s3.list_objects_v2(Bucket=s3_bucket, Prefix=checkpoint_bucket_key, Delimiter='//')
 46 | 
 47 |     last_checkpoint_num = 0
 48 |     last_checkpoint_key = None
 49 | 
 50 |     for content in intermediate['Contents']:
 51 |         # Check params.json
 52 |         if "params.json" in content["Key"]:
 53 |             with open('checkpoint/params.json', 'wb') as data:
 54 |                 s3.download_fileobj(s3_bucket, content["Key"], data)
 55 | 
 56 |         # Find the last checkpoint
 57 |         checkpoint = re.search(r"checkpoint-([0-9]+)", content["Key"])
 58 |         if checkpoint is not None:
 59 |             checkpoint_num = checkpoint.group(1)
 60 |             if int(checkpoint_num) > last_checkpoint_num:
 61 |                 last_checkpoint_num = int(checkpoint_num)
 62 |                 last_checkpoint_key = content["Key"]
 63 | 
 64 |     with open('{}/checkpoint-{}'.format(checkpoint_dir, last_checkpoint_num), 'wb') as data:
 65 |         s3.download_fileobj(s3_bucket, last_checkpoint_key, data)
 66 |     with open('{}/checkpoint-{}.tune_metadata'.format(checkpoint_dir, last_checkpoint_num), 'wb') as data:
 67 |         s3.download_fileobj(s3_bucket, last_checkpoint_key+".tune_metadata", data)
 68 |     
 69 |     return last_checkpoint_num
 70 | 
 71 | def get_model_config():
 72 |     with open(os.path.join("checkpoint", "params.json")) as f:
 73 |         config = json.load(f)
 74 |         
 75 |     config["monitor"] = False
 76 |     config["num_workers"] = 1
 77 |     config["num_gpus"] = 0
 78 | 
 79 |     if 'callbacks' in config:
 80 |         callback_cls_str = config['callbacks'] # "<class 'custom.callbacks.CustomCallbacks'>",
 81 |         callback_cls = callback_cls_str.split("'")[-2].split(".")[-1] # CustomCallbacks
 82 |         config['callbacks'] = eval(callback_cls)
 83 |             
 84 |     return config
 85 | 
 86 | def rollout(agent,
 87 |             env_name,
 88 |             num_steps,
 89 |             num_episodes=0,
 90 |             saver=None,
 91 |             no_render=True,
 92 |             video_dir=None):
 93 |     # Adapted from https://github.com/AIcrowd/neurips2020-procgen-starter-kit/blob/master/rollout.py#L349
 94 |     policy_agent_mapping = default_policy_agent_mapping
 95 |     
 96 |     if saver is None:
 97 |         saver = RolloutSaver()
 98 | 
 99 |     if hasattr(agent, "workers") and isinstance(agent.workers, WorkerSet):
100 |         #env = agent.workers.local_worker().env
101 |         env = gym.make(env_name, render_mode="rgb_array")
102 |         multiagent = isinstance(env, MultiAgentEnv)
103 |         if agent.workers.local_worker().multiagent:
104 |             policy_agent_mapping = agent.config["multiagent"][
105 |                 "policy_mapping_fn"]
106 | 
107 |         policy_map = agent.workers.local_worker().policy_map
108 |         state_init = {p: m.get_initial_state() for p, m in policy_map.items()}
109 |         use_lstm = {p: len(s) > 0 for p, s in state_init.items()}
110 |     else:
111 |         env = gym.make(env_name)
112 |         multiagent = False
113 |         try:
114 |             policy_map = {DEFAULT_POLICY_ID: agent.policy}
115 |         except AttributeError:
116 |             raise AttributeError(
117 |                 "Agent ({}) does not have a `policy` property! This is needed "
118 |                 "for performing (trained) agent rollouts.".format(agent))
119 |         use_lstm = {DEFAULT_POLICY_ID: False}
120 | 
121 |     action_init = {
122 |         p: _flatten_action(m.action_space.sample())
123 |         for p, m in policy_map.items()
124 |     }
125 | 
126 |     steps = 0
127 |     episodes = 0
128 |     rgb_array = []
129 |     
130 |     while keep_going(steps, num_steps, episodes, num_episodes):
131 |         mapping_cache = {}  # in case policy_agent_mapping is stochastic
132 |         saver.begin_rollout()
133 |         obs = env.reset()
134 |         agent_states = DefaultMapping(
135 |             lambda agent_id: state_init[mapping_cache[agent_id]])
136 |         prev_actions = DefaultMapping(
137 |             lambda agent_id: action_init[mapping_cache[agent_id]])
138 |         prev_rewards = collections.defaultdict(lambda: 0.)
139 |         done = False
140 |         reward_total = 0.0
141 |         episode_steps = 0
142 |         while not done and keep_going(steps, num_steps, episodes,
143 |                                       num_episodes):
144 |             multi_obs = obs if multiagent else {_DUMMY_AGENT_ID: obs}
145 |             action_dict = {}
146 |             for agent_id, a_obs in multi_obs.items():
147 |                 if a_obs is not None:
148 |                     policy_id = mapping_cache.setdefault(
149 |                         agent_id, policy_agent_mapping(agent_id))
150 |                     p_use_lstm = use_lstm[policy_id]
151 |                     if p_use_lstm:
152 |                         a_action, p_state, _ = agent.compute_action(
153 |                             a_obs,
154 |                             state=agent_states[agent_id],
155 |                             prev_action=prev_actions[agent_id],
156 |                             prev_reward=prev_rewards[agent_id],
157 |                             policy_id=policy_id)
158 |                         agent_states[agent_id] = p_state
159 |                     else:
160 |                         a_action = agent.compute_action(
161 |                             a_obs,
162 |                             prev_action=prev_actions[agent_id],
163 |                             prev_reward=prev_rewards[agent_id],
164 |                             policy_id=policy_id)
165 |                     a_action = _flatten_action(a_action)  # tuple actions
166 |                     action_dict[agent_id] = a_action
167 |                     prev_actions[agent_id] = a_action
168 |             action = action_dict
169 | 
170 |             action = action if multiagent else action[_DUMMY_AGENT_ID]
171 |             next_obs, reward, done, info = env.step(action)
172 |             episode_steps += 1
173 |             if multiagent:
174 |                 for agent_id, r in reward.items():
175 |                     prev_rewards[agent_id] = r
176 |             else:
177 |                 prev_rewards[_DUMMY_AGENT_ID] = reward
178 | 
179 |             if multiagent:
180 |                 done = done["__all__"]
181 |                 reward_total += sum(reward.values())
182 |             else:
183 |                 reward_total += reward
184 |             if not no_render:
185 |                 rgb_array.append(env.render(mode='rgb_array'))
186 |             saver.append_step(obs, action, next_obs, reward, done, info)
187 |             steps += 1
188 |             obs = next_obs
189 |         saver.end_rollout()
190 |         print("Episode #{}: reward: {} steps: {}".format(episodes, reward_total, episode_steps))
191 |         if done:
192 |             episodes += 1
193 |     return rgb_array


--------------------------------------------------------------------------------
/sagemaker/source/utils/loader.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | import os
  3 | import glob
  4 | 
  5 | import types
  6 | import importlib.machinery
  7 | 
  8 | """
  9 | Helper functions
 10 | """
 11 | 
 12 | 
 13 | def _source_file(_file_path):
 14 |     """
 15 |     Dynamically "sources" a provided file
 16 |     """
 17 |     basename = os.path.basename(_file_path)
 18 |     filename = basename.replace(".py", "")
 19 |     # Load the module
 20 |     loader = importlib.machinery.SourceFileLoader(filename, _file_path)
 21 |     mod = types.ModuleType(loader.name)
 22 |     loader.exec_module(mod)
 23 | 
 24 | """
 25 | A loder utility, which takes an experiment directory
 26 | path, and loads necessary things into the ModelRegistry.
 27 | 
 28 | This imposes an opinionated directory structure on the
 29 | users, which looks something like :
 30 | 
 31 | - envs/
 32 |     - my_env_1.py
 33 |     - my_env_2.py
 34 |     ....
 35 |     - my_env_N.py
 36 | - models/
 37 |     - my_model_1.py
 38 |     - my_model_2.py
 39 |     .....
 40 |     - my_model_N.py
 41 | """
 42 | 
 43 | 
 44 | def load_envs(local_dir="."):
 45 |     """
 46 |     This function takes a path to a local directory
 47 |     and looks for an `envs` folder, and imports
 48 |     all the available files in there.
 49 |     """
 50 |     for _file_path in glob.glob(os.path.join(
 51 |             local_dir, "*.py")):
 52 |         """
 53 |         Sources a file expected to implement a said
 54 |         gym env wrapper.
 55 | 
 56 |         The respective files are expected to do a
 57 |         `registry.register_env` call to ensure that
 58 |         the implemented envs are available in the
 59 |         ray registry.
 60 |         """
 61 |         _source_file(_file_path)
 62 | 
 63 | 
 64 | def load_models(local_dir="."):
 65 |     """
 66 |     This function takes a path to a local directory
 67 |     and looks for a `models` folder, and imports
 68 |     all the available files in there.
 69 |     """
 70 |     for _file_path in glob.glob(os.path.join(
 71 |         local_dir, "models", "*.py")):
 72 |         """
 73 |         Sources a file expected to implement a
 74 |         custom model.
 75 | 
 76 |         The respective files are expected to do a
 77 |         `registry.register_env` call to ensure that
 78 |         the implemented envs are available in the
 79 |         ray registry.
 80 |         """
 81 |         _source_file(_file_path)
 82 | 
 83 | 
 84 | def load_algorithms(CUSTOM_ALGORITHMS):
 85 |     """
 86 |     This function loads the custom algorithms implemented in this 
 87 |     repository, and registers them with the tune registry
 88 |     """
 89 |     from ray.tune import registry
 90 |     
 91 |     for _custom_algorithm_name in CUSTOM_ALGORITHMS:
 92 |         _class = CUSTOM_ALGORITHMS[_custom_algorithm_name]()
 93 |         registry.register_trainable(
 94 |             _custom_algorithm_name,
 95 |             _class)
 96 | 
 97 | 
 98 | def load_preprocessors(CUSTOM_PREPROCESSORS):
 99 |     """Function to register custom preprocessors
100 |     """
101 |     from ray.rllib.models import ModelCatalog
102 | 
103 |     for _precessor_name, _processor_class in CUSTOM_PREPROCESSORS.items():
104 |         ModelCatalog.register_custom_preprocessor(_precessor_name, _processor_class)
105 | 


--------------------------------------------------------------------------------
/sagemaker/train-homo-distributed-cpu.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Amazon SageMaker Notebook for ProcGen Starter Kit with homogeneous scaling of multiple CPU instances "
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "code",
 12 |    "execution_count": null,
 13 |    "metadata": {},
 14 |    "outputs": [],
 15 |    "source": [
 16 |     "import os\n",
 17 |     "import time\n",
 18 |     "import yaml\n",
 19 |     "\n",
 20 |     "import sagemaker\n",
 21 |     "from sagemaker.rl import RLEstimator, RLToolkit, RLFramework\n",
 22 |     "import boto3\n",
 23 |     "\n",
 24 |     "from IPython.display import HTML, Markdown\n",
 25 |     "from source.common.docker_utils import build_and_push_docker_image\n",
 26 |     "from source.common.markdown_helper import generate_help_for_s3_endpoint_permissions, create_s3_endpoint_manually"
 27 |    ]
 28 |   },
 29 |   {
 30 |    "cell_type": "code",
 31 |    "execution_count": null,
 32 |    "metadata": {},
 33 |    "outputs": [],
 34 |    "source": [
 35 |     "with open(os.path.join(\"config\", \"sagemaker_config.yaml\")) as f:\n",
 36 |     "    sagemaker_config = yaml.safe_load(f)"
 37 |    ]
 38 |   },
 39 |   {
 40 |    "cell_type": "markdown",
 41 |    "metadata": {},
 42 |    "source": [
 43 |     "## Initialize Amazon SageMaker"
 44 |    ]
 45 |   },
 46 |   {
 47 |    "cell_type": "code",
 48 |    "execution_count": null,
 49 |    "metadata": {},
 50 |    "outputs": [],
 51 |    "source": [
 52 |     "sm_session = sagemaker.session.Session()\n",
 53 |     "s3_bucket = sagemaker_config[\"S3_BUCKET\"]\n",
 54 |     "\n",
 55 |     "s3_output_path = 's3://{}/'.format(s3_bucket)\n",
 56 |     "print(\"S3 bucket path: {}\".format(s3_output_path))"
 57 |    ]
 58 |   },
 59 |   {
 60 |    "cell_type": "code",
 61 |    "execution_count": null,
 62 |    "metadata": {},
 63 |    "outputs": [],
 64 |    "source": [
 65 |     "job_name_prefix = 'sm-ray-cpu-distributed-procgen'\n",
 66 |     "\n",
 67 |     "role = sagemaker.get_execution_role()\n",
 68 |     "print(role)"
 69 |    ]
 70 |   },
 71 |   {
 72 |    "cell_type": "markdown",
 73 |    "metadata": {},
 74 |    "source": [
 75 |     "#### Note that `local_mode = True` does not work with heterogeneous scaling"
 76 |    ]
 77 |   },
 78 |   {
 79 |    "cell_type": "code",
 80 |    "execution_count": null,
 81 |    "metadata": {},
 82 |    "outputs": [],
 83 |    "source": [
 84 |     "instance_type = sagemaker_config[\"CPU_TRAINING_INSTANCE\"]"
 85 |    ]
 86 |   },
 87 |   {
 88 |    "cell_type": "markdown",
 89 |    "metadata": {},
 90 |    "source": [
 91 |     "# Configure the framework you want to use\n",
 92 |     "\n",
 93 |     "Set `framework` to `\"tf\"` or `\"torch\"` for tensorflow or pytorch respectively.\n",
 94 |     "\n",
 95 |     "You will also have to edit your entry point i.e., `train-sagemaker-distributed-cpu.py` with the configuration parameter `\"use_pytorch\"` to match the framework that you have selected."
 96 |    ]
 97 |   },
 98 |   {
 99 |    "cell_type": "code",
100 |    "execution_count": null,
101 |    "metadata": {},
102 |    "outputs": [],
103 |    "source": [
104 |     "framework = \"tf\""
105 |    ]
106 |   },
107 |   {
108 |    "cell_type": "markdown",
109 |    "metadata": {},
110 |    "source": [
111 |     "# Train your homogeneous scaling job here"
112 |    ]
113 |   },
114 |   {
115 |    "cell_type": "markdown",
116 |    "metadata": {},
117 |    "source": [
118 |     "### Edit the training code\n",
119 |     "\n",
120 |     "The training code is written in the file `train-sagemaker-distributed-cpu.py` which is uploaded in the /source directory.\n",
121 |     "\n",
122 |     "*Note that ray will automatically set `\"ray_num_cpus\"` and `\"ray_num_gpus\"` in `_get_ray_config`*"
123 |    ]
124 |   },
125 |   {
126 |    "cell_type": "code",
127 |    "execution_count": null,
128 |    "metadata": {},
129 |    "outputs": [],
130 |    "source": [
131 |     "!pygmentize source/train-sagemaker-distributed-cpu.py"
132 |    ]
133 |   },
134 |   {
135 |    "cell_type": "markdown",
136 |    "metadata": {},
137 |    "source": [
138 |     "### Train the RL model using the Python SDK Script mode\n",
139 |     "\n",
140 |     "When using SageMaker for distributed training, you can select a GPU or CPU instance. The RLEstimator is used for training RL jobs.\n",
141 |     "\n",
142 |     "1. Specify the source directory where the environment, presets and training code is uploaded.\n",
143 |     "2. Specify the entry point as the training code\n",
144 |     "3. Specify the image (CPU or GPU) to be used for the training environment.\n",
145 |     "4. Define the training parameters such as the instance count, job name, S3 path for output and job name.\n",
146 |     "5. Define the metrics definitions that you are interested in capturing in your logs. These can also be visualized in CloudWatch and SageMaker Notebooks."
147 |    ]
148 |   },
149 |   {
150 |    "cell_type": "markdown",
151 |    "metadata": {},
152 |    "source": [
153 |     "#### CPU docker image"
154 |    ]
155 |   },
156 |   {
157 |    "cell_type": "code",
158 |    "execution_count": null,
159 |    "metadata": {
160 |     "scrolled": true
161 |    },
162 |    "outputs": [],
163 |    "source": [
164 |     "# Build CPU image\n",
165 |     "cpu_repository_short_name = \"sagemaker-procgen-ray-%s\" % \"cpu\"\n",
166 |     "docker_build_args = {\n",
167 |     "    'CPU_OR_GPU': \"cpu\", \n",
168 |     "    'AWS_REGION': boto3.Session().region_name,\n",
169 |     "    'FRAMEWORK': framework\n",
170 |     "}\n",
171 |     "image_name = build_and_push_docker_image(cpu_repository_short_name, build_args=docker_build_args)\n",
172 |     "print(\"Using CPU ECR image %s\" % image_name)"
173 |    ]
174 |   },
175 |   {
176 |    "cell_type": "code",
177 |    "execution_count": null,
178 |    "metadata": {},
179 |    "outputs": [],
180 |    "source": [
181 |     "metric_definitions =  [\n",
182 |     "    {'Name': 'training_iteration', 'Regex': 'training_iteration: ([-+]?[0-9]*[.]?[0-9]+([eE][-+]?[0-9]+)?)'}, \n",
183 |     "    {'Name': 'episodes_total', 'Regex': 'episodes_total: ([-+]?[0-9]*[.]?[0-9]+([eE][-+]?[0-9]+)?)'}, \n",
184 |     "    {'Name': 'num_steps_trained', 'Regex': 'num_steps_trained: ([-+]?[0-9]*[.]?[0-9]+([eE][-+]?[0-9]+)?)'}, \n",
185 |     "    {'Name': 'timesteps_total', 'Regex': 'timesteps_total: ([-+]?[0-9]*[.]?[0-9]+([eE][-+]?[0-9]+)?)'},\n",
186 |     "    {'Name': 'training_iteration', 'Regex': 'training_iteration: ([-+]?[0-9]*[.]?[0-9]+([eE][-+]?[0-9]+)?)'},\n",
187 |     "\n",
188 |     "    {'Name': 'episode_reward_max', 'Regex': 'episode_reward_max: ([-+]?[0-9]*[.]?[0-9]+([eE][-+]?[0-9]+)?)'}, \n",
189 |     "    {'Name': 'episode_reward_mean', 'Regex': 'episode_reward_mean: ([-+]?[0-9]*[.]?[0-9]+([eE][-+]?[0-9]+)?)'}, \n",
190 |     "    {'Name': 'episode_reward_min', 'Regex': 'episode_reward_min: ([-+]?[0-9]*[.]?[0-9]+([eE][-+]?[0-9]+)?)'},\n",
191 |     "] "
192 |    ]
193 |   },
194 |   {
195 |    "cell_type": "markdown",
196 |    "metadata": {},
197 |    "source": [
198 |     "### Ray homogeneous scaling - Specify `train_instance_count` > 1\n",
199 |     "\n",
200 |     "Homogeneous scaling allows us to use multiple instances of the same type.\n",
201 |     "\n",
202 |     "Spot instances are unused EC2 instances that could be used at 90% discount compared to On-Demand prices (more information about spot instances can be found [here](https://aws.amazon.com/ec2/spot/?cards.sort-by=item.additionalFields.startDateTime&cards.sort-order=asc) and [here](https://docs.aws.amazon.com/sagemaker/latest/dg/model-managed-spot-training.html))\n",
203 |     "\n",
204 |     "To use spot instances, set `train_use_spot_instances = True`. To use On-Demand instances, `train_use_spot_instances = False`."
205 |    ]
206 |   },
207 |   {
208 |    "cell_type": "code",
209 |    "execution_count": null,
210 |    "metadata": {
211 |     "scrolled": true
212 |    },
213 |    "outputs": [],
214 |    "source": [
215 |     "train_instance_count = 2\n",
216 |     "train_use_spot_instances = False\n",
217 |     "\n",
218 |     "# Select which procgen environments to run in `envs_to_run`\n",
219 |     "'''\n",
220 |     "envs_to_run = [\"coinrun\", \"bigfish\", \"bossfight\", \"caveflyer\",\n",
221 |     "               \"chaser\", \"climber\", \"coinrun\", \"dodgeball\",\n",
222 |     "               \"fruitbot\", \"heist\", \"jumper\", \"leaper\", \"maze\",\n",
223 |     "               \"miner\", \"ninja\", \"plunder\", \"starpilot\"]\n",
224 |     "'''\n",
225 |     "\n",
226 |     "envs_to_run = [\"coinrun\"]\n",
227 |     "\n",
228 |     "for env in envs_to_run:\n",
229 |     "    if train_use_spot_instances:\n",
230 |     "        print('*** Using spot instances ... ')\n",
231 |     "        job_name = 'sm-ray-distributed-procgen-spot-' + time.strftime(\"%Y-%m-%d-%H-%M-%S\", time.gmtime()) + \"-\" + env\n",
232 |     "        checkpoint_s3_uri = 's3://{}/sagemaker-procgen/checkpoints/{}'.format(s3_bucket, job_name)\n",
233 |     "        training_params = {\"train_use_spot_instances\": True,\n",
234 |     "                           \"train_max_run\": 3600 * 5,\n",
235 |     "                           \"train_max_wait\": 7200 * 5,\n",
236 |     "                           \"checkpoint_s3_uri\": checkpoint_s3_uri\n",
237 |     "                          }\n",
238 |     "        hyperparameters = {\n",
239 |     "            \"rl.training.upload_dir\": checkpoint_s3_uri, #Necessary for syncing between spot instances\n",
240 |     "            \"rl.training.config.env_config.env_name\": env,\n",
241 |     "        }\n",
242 |     "    else:\n",
243 |     "        training_params = {\"base_job_name\": job_name_prefix + \"-\" + env}\n",
244 |     "        hyperparameters = {\n",
245 |     "            #\"rl.training.upload_dir\": s3_output_path + \"/tensorboard_sync\", # Uncomment to view tensorboard\n",
246 |     "            \"rl.training.config.env_config.env_name\": env,\n",
247 |     "        }\n",
248 |     "\n",
249 |     "    # Defining the RLEstimator\n",
250 |     "    estimator = RLEstimator(entry_point=\"train-sagemaker-distributed-cpu.py\",\n",
251 |     "                            source_dir='source',\n",
252 |     "                            dependencies=[\"source/utils\", \"source/common/\", \"neurips2020-procgen-starter-kit/\"],\n",
253 |     "                            image_uri=image_name,\n",
254 |     "                            role=role,\n",
255 |     "                            instance_type=instance_type,\n",
256 |     "                            instance_count=train_instance_count,\n",
257 |     "                            output_path=s3_output_path,\n",
258 |     "                            metric_definitions=metric_definitions,\n",
259 |     "                            hyperparameters=hyperparameters,\n",
260 |     "                            **training_params\n",
261 |     "                        )\n",
262 |     "    if train_use_spot_instances:\n",
263 |     "        estimator.fit(job_name=job_name, wait=False)\n",
264 |     "    else:\n",
265 |     "        estimator.fit(wait=False)\n",
266 |     "        \n",
267 |     "    print(' ')\n",
268 |     "    print(estimator.latest_training_job.job_name)\n",
269 |     "    print('type=', instance_type, 'count=', train_instance_count )\n",
270 |     "    print(' ')"
271 |    ]
272 |   },
273 |   {
274 |    "cell_type": "code",
275 |    "execution_count": null,
276 |    "metadata": {},
277 |    "outputs": [],
278 |    "source": []
279 |   }
280 |  ],
281 |  "metadata": {
282 |   "kernelspec": {
283 |    "display_name": "conda_tensorflow2_p36",
284 |    "language": "python",
285 |    "name": "conda_tensorflow2_p36"
286 |   },
287 |   "language_info": {
288 |    "codemirror_mode": {
289 |     "name": "ipython",
290 |     "version": 3
291 |    },
292 |    "file_extension": ".py",
293 |    "mimetype": "text/x-python",
294 |    "name": "python",
295 |    "nbconvert_exporter": "python",
296 |    "pygments_lexer": "ipython3",
297 |    "version": "3.6.10"
298 |   }
299 |  },
300 |  "nbformat": 4,
301 |  "nbformat_minor": 4
302 | }
303 | 


--------------------------------------------------------------------------------
/sagemaker/train-homo-distributed-gpu.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Amazon SageMaker Notebook for ProcGen Starter Kit with homogeneous scaling of multiple GPU instances "
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "code",
 12 |    "execution_count": null,
 13 |    "metadata": {},
 14 |    "outputs": [],
 15 |    "source": [
 16 |     "import os\n",
 17 |     "import time\n",
 18 |     "import yaml\n",
 19 |     "\n",
 20 |     "import sagemaker\n",
 21 |     "from sagemaker.rl import RLEstimator, RLToolkit, RLFramework\n",
 22 |     "import boto3\n",
 23 |     "\n",
 24 |     "from IPython.display import HTML, Markdown\n",
 25 |     "from source.common.docker_utils import build_and_push_docker_image\n",
 26 |     "from source.common.markdown_helper import generate_help_for_s3_endpoint_permissions, create_s3_endpoint_manually"
 27 |    ]
 28 |   },
 29 |   {
 30 |    "cell_type": "code",
 31 |    "execution_count": null,
 32 |    "metadata": {},
 33 |    "outputs": [],
 34 |    "source": [
 35 |     "with open(os.path.join(\"config\", \"sagemaker_config.yaml\")) as f:\n",
 36 |     "    sagemaker_config = yaml.safe_load(f)"
 37 |    ]
 38 |   },
 39 |   {
 40 |    "cell_type": "markdown",
 41 |    "metadata": {},
 42 |    "source": [
 43 |     "## Initialize Amazon SageMaker"
 44 |    ]
 45 |   },
 46 |   {
 47 |    "cell_type": "code",
 48 |    "execution_count": null,
 49 |    "metadata": {},
 50 |    "outputs": [],
 51 |    "source": [
 52 |     "sm_session = sagemaker.session.Session()\n",
 53 |     "s3_bucket = sagemaker_config[\"S3_BUCKET\"]\n",
 54 |     "\n",
 55 |     "s3_output_path = 's3://{}/'.format(s3_bucket)\n",
 56 |     "print(\"S3 bucket path: {}\".format(s3_output_path))"
 57 |    ]
 58 |   },
 59 |   {
 60 |    "cell_type": "code",
 61 |    "execution_count": null,
 62 |    "metadata": {},
 63 |    "outputs": [],
 64 |    "source": [
 65 |     "job_name_prefix = 'sm-ray-gpu-dist-procgen'\n",
 66 |     "\n",
 67 |     "role = sagemaker.get_execution_role()\n",
 68 |     "print(role)"
 69 |    ]
 70 |   },
 71 |   {
 72 |    "cell_type": "markdown",
 73 |    "metadata": {},
 74 |    "source": [
 75 |     "#### Note that `local_mode = True` does not work with heterogeneous scaling"
 76 |    ]
 77 |   },
 78 |   {
 79 |    "cell_type": "code",
 80 |    "execution_count": null,
 81 |    "metadata": {},
 82 |    "outputs": [],
 83 |    "source": [
 84 |     "instance_type = sagemaker_config[\"GPU_TRAINING_INSTANCE\"]"
 85 |    ]
 86 |   },
 87 |   {
 88 |    "cell_type": "markdown",
 89 |    "metadata": {},
 90 |    "source": [
 91 |     "# Configure the framework you want to use\n",
 92 |     "\n",
 93 |     "Set `framework` to `\"tf\"` or `\"torch\"` for tensorflow or pytorch respectively.\n",
 94 |     "\n",
 95 |     "You will also have to edit your entry point i.e., `train-sagemaker-distributed-gpu.py` with the configuration parameter `\"use_pytorch\"` to match the framework that you have selected."
 96 |    ]
 97 |   },
 98 |   {
 99 |    "cell_type": "code",
100 |    "execution_count": null,
101 |    "metadata": {},
102 |    "outputs": [],
103 |    "source": [
104 |     "framework = \"tf\""
105 |    ]
106 |   },
107 |   {
108 |    "cell_type": "markdown",
109 |    "metadata": {},
110 |    "source": [
111 |     "# Train your homogeneous scaling job here"
112 |    ]
113 |   },
114 |   {
115 |    "cell_type": "markdown",
116 |    "metadata": {},
117 |    "source": [
118 |     "### Edit the training code\n",
119 |     "\n",
120 |     "The training code is written in the file `train-sagemaker-distributed-gpu.py` which is uploaded in the /source directory.\n",
121 |     "\n",
122 |     "*Note that ray will automatically set `\"ray_num_cpus\"` and `\"ray_num_gpus\"` in `_get_ray_config`*"
123 |    ]
124 |   },
125 |   {
126 |    "cell_type": "code",
127 |    "execution_count": null,
128 |    "metadata": {},
129 |    "outputs": [],
130 |    "source": [
131 |     "!pygmentize source/train-sagemaker-distributed-gpu.py"
132 |    ]
133 |   },
134 |   {
135 |    "cell_type": "markdown",
136 |    "metadata": {},
137 |    "source": [
138 |     "### Train the RL model using the Python SDK Script mode\n",
139 |     "\n",
140 |     "When using SageMaker for distributed training, you can select a GPU or CPU instance. The RLEstimator is used for training RL jobs.\n",
141 |     "\n",
142 |     "1. Specify the source directory where the environment, presets and training code is uploaded.\n",
143 |     "2. Specify the entry point as the training code\n",
144 |     "3. Specify the image (CPU or GPU) to be used for the training environment.\n",
145 |     "4. Define the training parameters such as the instance count, job name, S3 path for output and job name.\n",
146 |     "5. Define the metrics definitions that you are interested in capturing in your logs. These can also be visualized in CloudWatch and SageMaker Notebooks."
147 |    ]
148 |   },
149 |   {
150 |    "cell_type": "markdown",
151 |    "metadata": {},
152 |    "source": [
153 |     "#### GPU docker image"
154 |    ]
155 |   },
156 |   {
157 |    "cell_type": "code",
158 |    "execution_count": null,
159 |    "metadata": {
160 |     "scrolled": true
161 |    },
162 |    "outputs": [],
163 |    "source": [
164 |     "#Build GPU image\n",
165 |     "gpu_repository_short_name = \"sagemaker-procgen-ray-%s\" % \"gpu\"\n",
166 |     "docker_build_args = {\n",
167 |     "    'CPU_OR_GPU': \"gpu\", \n",
168 |     "    'AWS_REGION': boto3.Session().region_name,\n",
169 |     "    'FRAMEWORK': framework\n",
170 |     "}\n",
171 |     "image_name = build_and_push_docker_image(gpu_repository_short_name, build_args=docker_build_args)\n",
172 |     "print(\"Using GPU ECR image %s\" % image_name)"
173 |    ]
174 |   },
175 |   {
176 |    "cell_type": "code",
177 |    "execution_count": null,
178 |    "metadata": {},
179 |    "outputs": [],
180 |    "source": [
181 |     "metric_definitions =  [\n",
182 |     "    {'Name': 'training_iteration', 'Regex': 'training_iteration: ([-+]?[0-9]*[.]?[0-9]+([eE][-+]?[0-9]+)?)'}, \n",
183 |     "    {'Name': 'episodes_total', 'Regex': 'episodes_total: ([-+]?[0-9]*[.]?[0-9]+([eE][-+]?[0-9]+)?)'}, \n",
184 |     "    {'Name': 'num_steps_trained', 'Regex': 'num_steps_trained: ([-+]?[0-9]*[.]?[0-9]+([eE][-+]?[0-9]+)?)'}, \n",
185 |     "    {'Name': 'timesteps_total', 'Regex': 'timesteps_total: ([-+]?[0-9]*[.]?[0-9]+([eE][-+]?[0-9]+)?)'},\n",
186 |     "    {'Name': 'training_iteration', 'Regex': 'training_iteration: ([-+]?[0-9]*[.]?[0-9]+([eE][-+]?[0-9]+)?)'},\n",
187 |     "\n",
188 |     "    {'Name': 'episode_reward_max', 'Regex': 'episode_reward_max: ([-+]?[0-9]*[.]?[0-9]+([eE][-+]?[0-9]+)?)'}, \n",
189 |     "    {'Name': 'episode_reward_mean', 'Regex': 'episode_reward_mean: ([-+]?[0-9]*[.]?[0-9]+([eE][-+]?[0-9]+)?)'}, \n",
190 |     "    {'Name': 'episode_reward_min', 'Regex': 'episode_reward_min: ([-+]?[0-9]*[.]?[0-9]+([eE][-+]?[0-9]+)?)'},\n",
191 |     "] "
192 |    ]
193 |   },
194 |   {
195 |    "cell_type": "markdown",
196 |    "metadata": {},
197 |    "source": [
198 |     "### Ray homogeneous scaling - Specify `train_instance_count` > 1\n",
199 |     "\n",
200 |     "Homogeneous scaling allows us to use multiple instances of the same type.\n",
201 |     "\n",
202 |     "Spot instances are unused EC2 instances that could be used at 90% discount compared to On-Demand prices (more information about spot instances can be found [here](https://aws.amazon.com/ec2/spot/?cards.sort-by=item.additionalFields.startDateTime&cards.sort-order=asc) and [here](https://docs.aws.amazon.com/sagemaker/latest/dg/model-managed-spot-training.html))\n",
203 |     "\n",
204 |     "To use spot instances, set `train_use_spot_instances = True`. To use On-Demand instances, `train_use_spot_instances = False`."
205 |    ]
206 |   },
207 |   {
208 |    "cell_type": "code",
209 |    "execution_count": null,
210 |    "metadata": {
211 |     "scrolled": true
212 |    },
213 |    "outputs": [],
214 |    "source": [
215 |     "train_instance_count = 2\n",
216 |     "train_use_spot_instances = False\n",
217 |     "\n",
218 |     "# Select which procgen environments to run in `envs_to_run`\n",
219 |     "'''\n",
220 |     "envs_to_run = [\"coinrun\", \"bigfish\", \"bossfight\", \"caveflyer\",\n",
221 |     "               \"chaser\", \"climber\",  \"dodgeball\",\n",
222 |     "               \"fruitbot\", \"heist\", \"jumper\", \"leaper\", \"maze\",\n",
223 |     "               \"miner\", \"ninja\", \"plunder\", \"starpilot\"]\n",
224 |     "'''\n",
225 |     "\n",
226 |     "envs_to_run = [\"coinrun\"]\n",
227 |     "\n",
228 |     "for env in envs_to_run:\n",
229 |     "    if train_use_spot_instances:\n",
230 |     "        print('*** Using spot instances ... ')\n",
231 |     "        job_name = 'sm-ray-dist-procgen-spot-' + time.strftime(\"%Y-%m-%d-%H-%M-%S\", time.gmtime()) + \"-\" + env\n",
232 |     "        checkpoint_s3_uri = 's3://{}/sagemaker-procgen/checkpoints/{}'.format(s3_bucket, job_name)\n",
233 |     "        training_params = {\"train_use_spot_instances\": True,\n",
234 |     "                           \"train_max_run\": 3600 * 5,\n",
235 |     "                           \"train_max_wait\": 7200 * 5,\n",
236 |     "                           \"checkpoint_s3_uri\": checkpoint_s3_uri\n",
237 |     "                          }\n",
238 |     "        hyperparameters = {\n",
239 |     "            \"rl.training.upload_dir\": checkpoint_s3_uri, #Necessary for syncing between spot instances\n",
240 |     "            \"rl.training.config.env_config.env_name\": env,\n",
241 |     "        }\n",
242 |     "    else:\n",
243 |     "        training_params = {\"base_job_name\": job_name_prefix + \"-\" + env}\n",
244 |     "        hyperparameters = {\n",
245 |     "            #\"rl.training.upload_dir\": s3_output_path + \"/tensorboard_sync\", # Uncomment to view tensorboard\n",
246 |     "            \"rl.training.config.env_config.env_name\": env,\n",
247 |     "        }\n",
248 |     "\n",
249 |     "    # Defining the RLEstimator\n",
250 |     "    estimator = RLEstimator(entry_point=\"train-sagemaker-distributed-gpu.py\",\n",
251 |     "                            source_dir='source',\n",
252 |     "                            dependencies=[\"source/utils\", \"source/common/\", \"neurips2020-procgen-starter-kit/\"],\n",
253 |     "                            image_uri=image_name,\n",
254 |     "                            role=role,\n",
255 |     "                            instance_type=instance_type,\n",
256 |     "                            instance_count=train_instance_count,\n",
257 |     "                            output_path=s3_output_path,\n",
258 |     "                            metric_definitions=metric_definitions,\n",
259 |     "                            hyperparameters=hyperparameters,\n",
260 |     "                            **training_params\n",
261 |     "                        )\n",
262 |     "    if train_use_spot_instances:\n",
263 |     "        estimator.fit(job_name=job_name, wait=False)\n",
264 |     "    else:\n",
265 |     "        estimator.fit(wait=False)\n",
266 |     "    \n",
267 |     "    print(' ')\n",
268 |     "    print(estimator.latest_training_job.job_name)\n",
269 |     "    print('type=', instance_type, 'count=', train_instance_count )\n",
270 |     "    print(' ')"
271 |    ]
272 |   },
273 |   {
274 |    "cell_type": "code",
275 |    "execution_count": null,
276 |    "metadata": {},
277 |    "outputs": [],
278 |    "source": []
279 |   }
280 |  ],
281 |  "metadata": {
282 |   "kernelspec": {
283 |    "display_name": "conda_tensorflow2_p36",
284 |    "language": "python",
285 |    "name": "conda_tensorflow2_p36"
286 |   },
287 |   "language_info": {
288 |    "codemirror_mode": {
289 |     "name": "ipython",
290 |     "version": 3
291 |    },
292 |    "file_extension": ".py",
293 |    "mimetype": "text/x-python",
294 |    "name": "python",
295 |    "nbconvert_exporter": "python",
296 |    "pygments_lexer": "ipython3",
297 |    "version": "3.6.10"
298 |   }
299 |  },
300 |  "nbformat": 4,
301 |  "nbformat_minor": 4
302 | }
303 | 


--------------------------------------------------------------------------------