├── .DS_Store
├── .github
    └── PULL_REQUEST_TEMPLATE.md
├── CODE_OF_CONDUCT.md
├── CONTRIBUTING.md
├── LICENSE
├── README.md
└── scripts
    ├── add-pypi-repository
        └── on-start.sh
    ├── auto-stop-idle
        ├── autostop.py
        └── on-start.sh
    ├── connect-emr-cluster
        └── on-start.sh
    ├── disable-uninstall-ssm-agent
        └── on-start.sh
    ├── enable-fips-openssl-provider
        └── on-start.sh
    ├── execute-notebook-on-startup
        └── on-start.sh
    ├── export-to-pdf-enable
        ├── on-create.sh
        └── on-start.sh
    ├── install-conda-package-all-environments
        └── on-start.sh
    ├── install-conda-package-single-environment
        └── on-start.sh
    ├── install-lab-extension
        └── on-start.sh
    ├── install-nb-extension
        └── on-start.sh
    ├── install-pip-package-all-environments
        └── on-start.sh
    ├── install-pip-package-single-environment
        └── on-start.sh
    ├── install-r-package
        └── on-start.sh
    ├── install-server-extension
        └── on-start.sh
    ├── migrate-ebs-data-backup
        └── on-start.sh
    ├── migrate-ebs-data-sync
        └── on-create.sh
    ├── mount-efs-file-system
        └── on-start.sh
    ├── mount-fsx-lustre-file-system
        └── on-start.sh
    ├── notebook-history-s3
        ├── notebook-history-s3.py
        └── on-start.sh
    ├── notebook-instance-monitor
        ├── amazon-cloudwatch-agent.json
        ├── notebookapi.py
        └── on-start.sh
    ├── persistent-conda-ebs
        ├── on-create.sh
        └── on-start.sh
    ├── proxy-for-jupyter
        └── on-start.sh
    ├── publish-instance-metrics
        ├── amazon-cloudwatch-agent.json
        └── on-start.sh
    ├── set-codecommit-cross-account-access
        └── on-start.sh
    ├── set-env-variable
        └── on-start.sh
    └── set-git-config
        └── on-start.sh


/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/amazon-sagemaker-notebook-instance-lifecycle-config-samples/77ffb12d81a7392af0a88cd8158d936a01c955ac/.DS_Store


--------------------------------------------------------------------------------
/.github/PULL_REQUEST_TEMPLATE.md:
--------------------------------------------------------------------------------
 1 | **Issue #, if available:**
 2 | 
 3 | **Description of changes:**
 4 | 
 5 | **Testing Done**
 6 | 
 7 | - [ ] Notebook Instance created successfully with the Lifecycle Configuration
 8 | - [ ] Notebook Instance stopped and started successfully
 9 | - [ ] Documentation in the script around any network access requirements
10 | - [ ] Documentation in the script around any IAM permission requirements
11 | - [ ] CLI commands used to validate functionality on the instance
12 | - [ ] New script link and description added to README.md
13 | 
14 | ```
15 | # Provide your commands here
16 | /you/commands/here
17 | ```
18 | 
19 | 
20 | 
21 | 
22 | 
23 | By submitting this pull request, I confirm that you can use, modify, copy, and redistribute this contribution, under the terms of your choice.
24 | 


--------------------------------------------------------------------------------
/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
1 | ## Code of Conduct
2 | This project has adopted the [Amazon Open Source Code of Conduct](https://aws.github.io/code-of-conduct).
3 | For more information see the [Code of Conduct FAQ](https://aws.github.io/code-of-conduct-faq) or contact
4 | opensource-codeofconduct@amazon.com with any additional questions or comments.
5 | 


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
 1 | # Contributing Guidelines
 2 | 
 3 | Thank you for your interest in contributing to our project. Whether it's a bug report, new feature, correction, or additional
 4 | documentation, we greatly value feedback and contributions from our community.
 5 | 
 6 | Please read through this document before submitting any issues or pull requests to ensure we have all the necessary
 7 | information to effectively respond to your bug report or contribution.
 8 | 
 9 | 
10 | ## Reporting Bugs/Feature Requests
11 | 
12 | We welcome you to use the GitHub issue tracker to report bugs or suggest features.
13 | 
14 | When filing an issue, please check [existing open](https://github.com/aws-samples/amazon-sagemaker-notebook-instance-lifecycle-configuration-samples/issues), or [recently closed](https://github.com/aws-samples/amazon-sagemaker-notebook-instance-lifecycle-configuration-samples/issues?utf8=%E2%9C%93&q=is%3Aissue%20is%3Aclosed%20), issues to make sure somebody else hasn't already
15 | reported the issue. Please try to include as much information as you can. Details like these are incredibly useful:
16 | 
17 | * A reproducible test case or series of steps
18 | * The version of our code being used
19 | * Any modifications you've made relevant to the bug
20 | * Anything unusual about your environment or deployment
21 | 
22 | 
23 | ## Contributing via Pull Requests
24 | Contributions via pull requests are much appreciated. Before sending us a pull request, please ensure that:
25 | 
26 | 1. You are working against the latest source on the *master* branch.
27 | 2. You check existing open, and recently merged, pull requests to make sure someone else hasn't addressed the problem already.
28 | 3. You open an issue to discuss any significant work - we would hate for your time to be wasted.
29 | 
30 | To send us a pull request, please:
31 | 
32 | 1. Fork the repository.
33 | 2. Modify the source; please focus on the specific change you are contributing. If you also reformat all the code, it will be hard for us to focus on your change.
34 | 3. Ensure local tests pass.
35 | 4. Commit to your fork using clear commit messages.
36 | 5. Send us a pull request, answering any default questions in the pull request interface.
37 | 6. Pay attention to any automated CI failures reported in the pull request, and stay involved in the conversation.
38 | 
39 | GitHub provides additional document on [forking a repository](https://help.github.com/articles/fork-a-repo/) and
40 | [creating a pull request](https://help.github.com/articles/creating-a-pull-request/).
41 | 
42 | 
43 | ## Finding contributions to work on
44 | Looking at the existing issues is a great way to find something to contribute on. As our projects, by default, use the default GitHub issue labels (enhancement/bug/duplicate/help wanted/invalid/question/wontfix), looking at any ['help wanted'](https://github.com/aws-samples/amazon-sagemaker-notebook-instance-lifecycle-configuration-samples/labels/help%20wanted) issues is a great place to start.
45 | 
46 | 
47 | ## Code of Conduct
48 | This project has adopted the [Amazon Open Source Code of Conduct](https://aws.github.io/code-of-conduct).
49 | For more information see the [Code of Conduct FAQ](https://aws.github.io/code-of-conduct-faq) or contact
50 | opensource-codeofconduct@amazon.com with any additional questions or comments.
51 | 
52 | 
53 | ## Security issue notifications
54 | If you discover a potential security issue in this project we ask that you notify AWS/Amazon Security via our [vulnerability reporting page](http://aws.amazon.com/security/vulnerability-reporting/). Please do **not** create a public github issue.
55 | 
56 | 
57 | ## Licensing
58 | 
59 | See the [LICENSE](https://github.com/aws-samples/amazon-sagemaker-notebook-instance-lifecycle-configuration-samples/blob/master/LICENSE) file for our project's licensing. We will ask you to confirm the licensing of your contribution.
60 | 
61 | We may ask you to sign a [Contributor License Agreement (CLA)](http://en.wikipedia.org/wiki/Contributor_License_Agreement) for larger changes.
62 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | Copyright 2019 Amazon.com, Inc. or its affiliates. All Rights Reserved.
 2 | 
 3 | Permission is hereby granted, free of charge, to any person obtaining a copy of
 4 | this software and associated documentation files (the "Software"), to deal in
 5 | the Software without restriction, including without limitation the rights to
 6 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
 7 | the Software, and to permit persons to whom the Software is furnished to do so.
 8 | 
 9 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
10 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
11 | FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
12 | COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
13 | IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
14 | CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
15 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | ## SageMaker Notebook Instance Lifecycle Config Samples
  2 | 
  3 | ### Overview
  4 | 
  5 | A collection of sample scripts to customize [Amazon SageMaker Notebook Instances](https://docs.aws.amazon.com/sagemaker/latest/dg/nbi.html) using [Lifecycle Configurations](https://docs.aws.amazon.com/sagemaker/latest/dg/notebook-lifecycle-config.html)
  6 | 
  7 | Lifecycle Configurations provide a mechanism to customize Notebook Instances via shell scripts that are executed during the lifecycle of a Notebook Instance.
  8 | 
  9 | #### Sample Scripts
 10 | 
 11 | * [add-pypi-repository](scripts/add-pypi-repository) - This script adds a private PyPi repository in addition to or instead of pypi.org.
 12 | * [auto-stop-idle](scripts/auto-stop-idle) - This script stops a SageMaker notebook once it's idle for more than 1 hour. (default time)
 13 | * [connect-emr-cluster](scripts/connect-emr-cluster) - This script connects an EMR cluster to the Notebook Instance using SparkMagic.
 14 | * [disable-uninstall-ssm-agent](scripts/disable-uninstall-ssm-agent) - This script disables and uninstalls the SSM Agent at startup.
 15 | * [enable-fips-openssl-provider](scripts/enable-fips-openssl-provider) - This script enables the OpenSSL FIPS provider in each conda environment.
 16 | * [execute-notebook-on-startup](scripts/execute-notebook-on-startup) - This script executes a Notebook file on the instance during startup.
 17 | * [export-to-pdf-enable](scripts/export-to-pdf-enable) - This script enables Jupyter to export a notebook directly to PDF.
 18 | * [install-conda-package-all-environments](scripts/install-conda-package-all-environments) - This script installs a single conda package in all SageMaker conda environments, apart from the JupyterSystemEnv which is a system environment reserved for Jupyter.
 19 | * [install-conda-package-single-environment](scripts/install-conda-package-single-environment) - This script installs a single conda package in a single SageMaker conda environment.
 20 | * [install-lab-extension](scripts/install-lab-extension) - This script installs a jupyterlab extension package in SageMaker Notebook Instance.
 21 | * [install-nb-extension](scripts/install-nb-extension) - This script installs a single jupyter notebook extension package in SageMaker Notebook Instance.
 22 | * [install-pip-package-all-environments](scripts/install-pip-package-all-environments) - This script installs a single pip package in all SageMaker conda environments, apart from the JupyterSystemEnv which is a system environment reserved for Jupyter.
 23 | * [install-pip-package-single-environment](scripts/install-pip-package-single-environment) - This script installs a single pip package in a single SageMaker conda environments.
 24 | * [install-r-package](scripts/install-r-package) - This script installs a single R package in SageMaker R environment.
 25 | * [install-server-extension](scripts/install-server-extension) - This script installs a single jupyter notebook server extension package in SageMaker Notebook Instance.
 26 | * [migrate-ebs-data-backup](scripts/migrate-ebs-data-backup) - This script backs up content in `/home/ec2-user/SageMaker/` to a S3 bucket specified in a tag on the notebook instance.
 27 | * [migrate-ebs-data-sync](scripts/migrate-ebs-data-sync) - This script downloads a snapshot created by [migrate-ebs-data-backup](scripts/migrate-ebs-data-backup) to `/home/ec2-user/SageMaker/` in a new notebook instance. You specify the snapshop using tags on the notebook instance.
 28 | * [mount-efs-file-system](scripts/mount-efs-file-system) - This script mounts an EFS file system to the Notebook Instance at the ~/SageMaker/efs directory based off the DNS name.
 29 | * [mount-fsx-lustre-file-system](scripts/mount-fsx-lustre-file-system) - This script mounts an FSx for Lustre file system to the Notebook Instance at the /fsx directory based off the DNS and Mount name parameters.
 30 | * [notebook-history-s3](scripts/notebook-history-s3) - This script persists the underlying sqlite database of commands and cells executed for S3.
 31 | * [persistent-conda-ebs](scripts/persistent-conda-ebs) - This script installs a custom, persistent installation of conda on the Notebook Instance's EBS volume, and ensures that these custom environments are available as kernels in Jupyter.
 32 | * [proxy-for-jupyter](scripts/proxy-for-jupyter) - This script configures proxy settings for your Jupyter notebooks and the SageMaker Notebook Instance.
 33 | * [publish-instance-metrics](scripts/publish-instance-metrics) - This script publishes the system-level metrics from the Notebook Instance to CloudWatch.
 34 | * [set-codecommit-cross-account-access](scripts/set-codecommit-cross-account-access) - This script sets cross-account CodeCommit access, so you can work on repositories hosted in another account.
 35 | * [set-env-variable](scripts/set-env-variable) - This script gets a value from the Notebook Instance's tags and sets it as an environment variable for all processes including Jupyter.
 36 | * [set-git-config](scripts/set-git-config) - This script sets the username and email address in Git config.
 37 | 
 38 | ### Development
 39 | 
 40 | For contributors looking to develop scripts, they can be developed directly on SageMaker Notebook Instances since that is the environment that they are run with. Lifecycle Configuration scripts run as `root`, the working directory is `/`.  To simulate the execution environment, you may use
 41 | 
 42 | ```bash
 43 | sudo su
 44 | export PATH=/usr/local/sbin:/usr/local/bin:/usr/bin:/usr/sbin:/sbin:/bin
 45 | cd /
 46 | ```
 47 | 
 48 | Edit the script in a file such as `my-script-on-start.sh` and execute it as
 49 | 
 50 | ```bash
 51 | sh my-script-on-start.sh
 52 | ```
 53 | 
 54 | The directory structure followed is:
 55 | 
 56 | ```
 57 | scripts/
 58 |     my-script-name/
 59 |         on-start.sh
 60 |         on-create.sh
 61 | ```
 62 | 
 63 | ### Testing
 64 | 
 65 | To test the script end-to-end:
 66 | 
 67 | Create a [Lifecycle Configuration](https://docs.aws.amazon.com/sagemaker/latest/dg/API_CreateNotebookInstanceLifecycleConfig.html) with the script content and
 68 | a [Notebook Instance](https://docs.aws.amazon.com/sagemaker/latest/dg/API_CreateNotebookInstance.html) with the Lifecycle Configuration
 69 | 
 70 | ```bash
 71 | # If the scripts are in a directory "scripts/my-script-name/*"
 72 | SCRIPT_NAME=my-script-name
 73 | ROLE_ARN=my-role-arn
 74 | 
 75 | RESOURCE_NAME="$SCRIPT_NAME-$RANDOM"
 76 | 
 77 | # Add any script specific options such as subnet-id
 78 | aws sagemaker create-notebook-instance-lifecycle-config \
 79 |     --notebook-instance-lifecycle-config-name "$RESOURCE_NAME" \
 80 |     --on-start Content=$((cat scripts/$SCRIPT_NAME/on-start.sh || echo "")| base64) \
 81 |     --on-create Content=$((cat scripts/$SCRIPT_NAME/on-create.sh || echo "")| base64)
 82 | 
 83 | aws sagemaker create-notebook-instance \
 84 |     --notebook-instance-name "$RESOURCE_NAME" \
 85 |     --instance-type ml.t2.medium \
 86 |     --role-arn "$ROLE_ARN" \
 87 |     --lifecycle-config-name "$RESOURCE_NAME"
 88 | 
 89 | aws sagemaker wait \
 90 |     notebook-instance-in-service \
 91 |     --notebook-instance-name "$RESOURCE_NAME"
 92 | ```
 93 | 
 94 | * [Access](https://docs.aws.amazon.com/sagemaker/latest/dg/API_CreatePresignedNotebookInstanceUrl.html) the Notebook Instance and perform any validation specific to the script.
 95 | 
 96 | ```bash
 97 | aws sagemaker create-presigned-notebook-instance-url \
 98 |     --notebook-instance-name "$RESOURCE_NAME"
 99 | ```
100 | 
101 | * [Stop](https://docs.aws.amazon.com/sagemaker/latest/dg/API_StopNotebookInstance.html) and [Start](https://docs.aws.amazon.com/sagemaker/latest/dg/API_StartNotebookInstance.html) the Notebook Instance
102 | 
103 | ```bash
104 | aws sagemaker stop-notebook-instance \
105 |     --notebook-instance-name "$RESOURCE_NAME"
106 | 
107 | aws sagemaker wait \
108 |     notebook-instance-stopped \
109 |     --notebook-instance-name "$RESOURCE_NAME"
110 | 
111 | aws sagemaker start-notebook-instance \
112 |     --notebook-instance-name "$RESOURCE_NAME"
113 | 
114 | aws sagemaker wait \
115 |     notebook-instance-in-service \
116 |     --notebook-instance-name "$RESOURCE_NAME"
117 | ```
118 | 
119 | * [Access](https://docs.aws.amazon.com/sagemaker/latest/dg/API_CreatePresignedNotebookInstanceUrl.html) the Notebook Instance again and perform any validation specific to the script.
120 | 
121 | ```bash
122 | aws sagemaker create-presigned-notebook-instance-url \
123 |     --notebook-instance-name "$RESOURCE_NAME"
124 | ```
125 | 
126 | File a Pull Request following the instructions in the [Contribution Guidelines](CONTRIBUTING.md).
127 | 
128 | ## License Summary
129 | 
130 | This sample code is made available under the MIT-0 license. See the LICENSE file.
131 | 


--------------------------------------------------------------------------------
/scripts/add-pypi-repository/on-start.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -e
 4 | 
 5 | # OVERVIEW:
 6 | # This script adds an alternate PyPi repository to be used by `pip install ...`. 
 7 | # 
 8 | # You can use this script to connect to your organization's private PyPi repository. 
 9 | # There are two common reasons for using private PyPi repositories and this script is 
10 | # set up to support both.
11 | # 1. You have a repository that you want to use to host your organization's own packages
12 | #    *in addition to* the main pypi.org repository. In that case, set DISABLE_PYPI, below,
13 | #    to false and the setup will allow `pip install` to search both repositories.
14 | # 2. You have a repository that hosts a set of curated packages that are approved for use
15 | #    in your organization. This is common in organizations that have strict regulatory
16 | #    or security requirements. In this case, set DISABLE_PYPI to true and pip will be
17 | #    be configured to search *only* your private repository.
18 | #
19 | # For other requirements (like multiple repositories), feel free to edit this script to
20 | # meet your needs.
21 | #
22 | # See the pip documentation at https://pip.pypa.io/en/stable/user_guide/#config-file for 
23 | # details on how the pip configuration file works.
24 | 
25 | # The URL of your repository
26 | PYPI_URL=<Your respository url here>
27 | 
28 | # If DISABLE_PYPI is true, pip will ignore the pypi.org repository and only use the defined
29 | # repository. If it is false, pip will use the defined repository *in addition to* 
30 | # pypi.org
31 | DISABLE_PYPI=false
32 | 
33 | # If ADD_TRUST is true, tell pip to trust the specified server even if the certificate
34 | # doesn't validate.
35 | ADD_TRUST=true
36 | 
37 | if [ "$DISABLE_PYPI" == "true" ]
38 | then
39 |     extra=""
40 | else
41 |     extra="extra-"
42 | fi
43 | 
44 | if [ "$ADD_TRUST" == "true" ]
45 | then
46 |     pypi_host=$(sed 's%^[^/]*//\([^/:]*\)[:/].*$%\1%' <<< "${PYPI_URL}")
47 |     trusted_host="trusted-host = ${pypi_host}"
48 | else
49 |     trusted_host=""
50 | fi
51 | 
52 | mkdir ~ec2-user/.pip
53 | 
54 | cat > ~ec2-user/.pip/pip.conf <<END
55 | [global]
56 | ${extra}index-url = ${PYPI_URL}
57 | ${trusted_host}
58 | END
59 | 
60 | chown -R ec2-user:ec2-user ~ec2-user/.pip


--------------------------------------------------------------------------------
/scripts/auto-stop-idle/autostop.py:
--------------------------------------------------------------------------------
  1 | #     Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved.
  2 | #
  3 | #     Licensed under the Apache License, Version 2.0 (the "License").
  4 | #     You may not use this file except in compliance with the License.
  5 | #     A copy of the License is located at
  6 | #
  7 | #         https://aws.amazon.com/apache-2-0/
  8 | #
  9 | #     or in the "license" file accompanying this file. This file is distributed
 10 | #     on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 11 | #     express or implied. See the License for the specific language governing
 12 | #     permissions and limitations under the License.
 13 | 
 14 | import requests
 15 | from datetime import datetime
 16 | import getopt, sys
 17 | import urllib3
 18 | import boto3
 19 | import json
 20 | 
 21 | urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
 22 | 
 23 | # Usage
 24 | usageInfo = """Usage:
 25 | This scripts checks if a notebook is idle for X seconds if it does, it'll stop the notebook:
 26 | python autostop.py --time <time_in_seconds> [--port <jupyter_port>] [--ignore-connections]
 27 | Type "python autostop.py -h" for available options.
 28 | """
 29 | # Help info
 30 | helpInfo = """-t, --time
 31 |     Auto stop time in seconds
 32 | -p, --port
 33 |     jupyter port
 34 | -c --ignore-connections
 35 |     Stop notebook once idle, ignore connected users
 36 | -h, --help
 37 |     Help information
 38 | """
 39 | 
 40 | # Read in command-line parameters
 41 | idle = True
 42 | port = '8443'
 43 | ignore_connections = False
 44 | try:
 45 |     opts, args = getopt.getopt(sys.argv[1:], "ht:p:c", ["help","time=","port=","ignore-connections"])
 46 |     if len(opts) == 0:
 47 |         raise getopt.GetoptError("No input parameters!")
 48 |     for opt, arg in opts:
 49 |         if opt in ("-h", "--help"):
 50 |             print(helpInfo)
 51 |             exit(0)
 52 |         if opt in ("-t", "--time"):
 53 |             time = int(arg)
 54 |         if opt in ("-p", "--port"):
 55 |             port = str(arg)
 56 |         if opt in ("-c", "--ignore-connections"):
 57 |             ignore_connections = True
 58 | except getopt.GetoptError:
 59 |     print(usageInfo)
 60 |     exit(1)
 61 | 
 62 | # Missing configuration notification
 63 | missingConfiguration = False
 64 | if not time:
 65 |     print("Missing '-t' or '--time'")
 66 |     missingConfiguration = True
 67 | if missingConfiguration:
 68 |     exit(2)
 69 | 
 70 | 
 71 | def is_idle(last_activity):
 72 |     last_activity = datetime.strptime(last_activity,"%Y-%m-%dT%H:%M:%S.%fz")
 73 |     if (datetime.now() - last_activity).total_seconds() > time:
 74 |         print('Notebook is idle. Last activity time = ', last_activity)
 75 |         return True
 76 |     else:
 77 |         print('Notebook is not idle. Last activity time = ', last_activity)
 78 |         return False
 79 | 
 80 | 
 81 | def get_notebook_name():
 82 |     log_path = '/opt/ml/metadata/resource-metadata.json'
 83 |     with open(log_path, 'r') as logs:
 84 |         _logs = json.load(logs)
 85 |     return _logs['ResourceName']
 86 | 
 87 | # This is hitting Jupyter's sessions API: https://github.com/jupyter/jupyter/wiki/Jupyter-Notebook-Server-API#Sessions-API
 88 | response = requests.get('https://localhost:'+port+'/api/sessions', verify=False)
 89 | data = response.json()
 90 | if len(data) > 0:
 91 |     for notebook in data:
 92 |         # Idleness is defined by Jupyter
 93 |         # https://github.com/jupyter/notebook/issues/4634
 94 |         if notebook['kernel']['execution_state'] == 'idle':
 95 |             if not ignore_connections:
 96 |                 if notebook['kernel']['connections'] == 0:
 97 |                     if not is_idle(notebook['kernel']['last_activity']):
 98 |                         idle = False
 99 |                 else:
100 |                     idle = False
101 |                     print('Notebook idle state set as %s because no kernel has been detected.' % idle)
102 |             else:
103 |                 if not is_idle(notebook['kernel']['last_activity']):
104 |                     idle = False
105 |                     print('Notebook idle state set as %s since kernel connections are ignored.' % idle)
106 |         else:
107 |             print('Notebook is not idle:', notebook['kernel']['execution_state'])
108 |             idle = False
109 | else:
110 |     client = boto3.client('sagemaker')
111 |     uptime = client.describe_notebook_instance(
112 |         NotebookInstanceName=get_notebook_name()
113 |     )['LastModifiedTime']
114 |     if not is_idle(uptime.strftime("%Y-%m-%dT%H:%M:%S.%fz")):
115 |         idle = False
116 |         print('Notebook idle state set as %s since no sessions detected.' % idle)
117 | 
118 | if idle:
119 |     print('Closing idle notebook')
120 |     client = boto3.client('sagemaker')
121 |     client.stop_notebook_instance(
122 |         NotebookInstanceName=get_notebook_name()
123 |     )
124 | else:
125 |     print('Notebook not idle. Pass.')


--------------------------------------------------------------------------------
/scripts/auto-stop-idle/on-start.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -ex
 4 | 
 5 | # OVERVIEW
 6 | # This script stops a SageMaker notebook once it's idle for more than 1 hour (default time)
 7 | # You can change the idle time for stop using the environment variable below.
 8 | # If you want the notebook the stop only if no browsers are open, remove the --ignore-connections flag
 9 | #
10 | # Note that this script will fail if either condition is not met
11 | #   1. Ensure the Notebook Instance has internet connectivity to fetch the example config
12 | #   2. Ensure the Notebook Instance execution role permissions to SageMaker:StopNotebookInstance to stop the notebook 
13 | #       and SageMaker:DescribeNotebookInstance to describe the notebook.
14 | #
15 | 
16 | # PARAMETERS
17 | IDLE_TIME=3600
18 | 
19 | echo "Fetching the autostop script"
20 | wget https://raw.githubusercontent.com/aws-samples/amazon-sagemaker-notebook-instance-lifecycle-config-samples/master/scripts/auto-stop-idle/autostop.py
21 | 
22 | 
23 | echo "Detecting Python install with boto3 install"
24 | 
25 | # Find which install has boto3 and use that to run the cron command. So will use default when available
26 | # Redirect stderr as it is unneeded
27 | CONDA_PYTHON_DIR=$(source /home/ec2-user/anaconda3/bin/activate /home/ec2-user/anaconda3/envs/JupyterSystemEnv && which python)
28 | if $CONDA_PYTHON_DIR -c "import boto3" 2>/dev/null; then
29 |     PYTHON_DIR=$CONDA_PYTHON_DIR
30 | elif /usr/bin/python -c "import boto3" 2>/dev/null; then
31 |     PYTHON_DIR='/usr/bin/python'
32 | else
33 |     # If no boto3 just quit because the script won't work
34 |     echo "No boto3 found in Python or Python3. Exiting..."
35 |     exit 1
36 | fi
37 | 
38 | echo "Found boto3 at $PYTHON_DIR"
39 | 
40 | 
41 | echo "Starting the SageMaker autostop script in cron"
42 | 
43 | (crontab -l 2>/dev/null; echo "*/5 * * * * $PYTHON_DIR $PWD/autostop.py --time $IDLE_TIME --ignore-connections >> /var/log/jupyter.log") | crontab -


--------------------------------------------------------------------------------
/scripts/connect-emr-cluster/on-start.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -e
 4 | 
 5 | # OVERVIEW
 6 | # This script connects an EMR cluster to the Notebook Instance using SparkMagic.
 7 | # 
 8 | # Note that this script will fail if the EMR cluster's master node IP address not reachable
 9 | #   1. Ensure that the EMR master node IP is resolvable from the Notebook Instance.
10 | #       - One way to accomplish this is having the Notebook Instance and the EMR cluster in the same subnet
11 | #   2. Ensure the EMR master node Security Groups provides inbound access from the Notebook Instance Security Group
12 | #       Type        - Protocol - Port - Source
13 | #       Custom TCP  - TCP      - 8998 - $NOTEBOOK_SECURITY_GROUP
14 | #   3. Ensure the Notebook Instance has internet connectivity to fetch the SparkMagic example config  
15 | #
16 | # https://aws.amazon.com/blogs/machine-learning/build-amazon-sagemaker-notebooks-backed-by-spark-in-amazon-emr/
17 | 
18 | # PARAMETERS
19 | EMR_MASTER_IP=your.emr.master.ip
20 | 
21 | cd /home/ec2-user/.sparkmagic
22 | 
23 | echo "Fetching SparkMagic example config from GitHub.."
24 | wget https://raw.githubusercontent.com/jupyter-incubator/sparkmagic/master/sparkmagic/example_config.json
25 | 
26 | echo "Replacing EMR master node IP in SparkMagic config.."
27 | sed -i -- "s/localhost/$EMR_MASTER_IP/g" example_config.json
28 | mv example_config.json config.json
29 | 
30 | echo "Sending a sample request to Livy.."
31 | curl "$EMR_MASTER_IP:8998/sessions"


--------------------------------------------------------------------------------
/scripts/disable-uninstall-ssm-agent/on-start.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | 
 4 | # OVERVIEW
 5 | # This script disables and uninstalls the SSM agent which is present by default on Notebook Instances.
 6 | # NOTE: The SSM Agent will still be enabled for the short period between the Notebook Instance initiating and the Lifecycle Configuration script executing
 7 | 
 8 | ssm_status=$(status amazon-ssm-agent)
 9 | 
10 | # Set -e after "status" so that the script doesn't fail if the SSM agent is already stopped
11 | set -e
12 | 
13 | if [[ "$ssm_status" =~ "running" ]]; 
14 | then 
15 |     echo "Stopping SSM Agent.."
16 |     stop amazon-ssm-agent
17 | fi
18 | 
19 | echo "Uninstalling SSM Agent.."
20 | yum erase amazon-ssm-agent --assumeyes


--------------------------------------------------------------------------------
/scripts/enable-fips-openssl-provider/on-start.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Iterate through each conda environment
 4 | for ENV in /home/ec2-user/anaconda3/envs/*; do
 5 |    ENV_NAME=$(basename $ENV)
 6 |    # Skip any non-directory and JupyterSystemEnv
 7 |    if [[ ! -d "$ENV" || $ENV_NAME == "JupyterSystemEnv" ]]; then
 8 |        continue
 9 |    fi
10 | 
11 |    # Construct the path to the openssl.cnf file within the environment
12 |    openssl_cnf_path="$ENV/ssl/openssl.cnf"
13 | 
14 |    # Check if the openssl.cnf file exists
15 |    if [[ -f "$openssl_cnf_path" ]]; then
16 |        # Use sed to make the required modifications
17 |        # openssl.cnf modifications are described here: https://github.com/openssl/openssl/blob/master/README-FIPS.md
18 |        sed -i.bak 's|^# \.include fipsmodule\.cnf|\.include /home/ec2-user/anaconda3/envs/'"$ENV_NAME"'/ssl/fipsmodule.cnf|' "$openssl_cnf_path"
19 |        sed -i.bak 's|^# fips = fips_sect|fips = fips_sect|' "$openssl_cnf_path"
20 |        sed -i.bak 's|^# activate = 1|activate = 1|' "$openssl_cnf_path"
21 |        sed -i.bak '/providers = provider_sect/a\
22 | alg_section = algorithm_sect' "$openssl_cnf_path"
23 |        sed -i.bak '/activate = 1/a\
24 | [algorithm_sect]\
25 | default_properties = fips=yes' "$openssl_cnf_path"
26 | 
27 |        echo "Updated openssl.cnf at $openssl_cnf_path"
28 |    else
29 |        echo "Warning: openssl.cnf file not found at $openssl_cnf_path"
30 |    fi
31 | done
32 | 


--------------------------------------------------------------------------------
/scripts/execute-notebook-on-startup/on-start.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -e
 4 | 
 5 | # OVERVIEW
 6 | # This script executes an existing Notebook file on the instance during start using nbconvert(https://github.com/jupyter/nbconvert)
 7 | 
 8 | # PARAMETERS
 9 | 
10 | ENVIRONMENT=pytorch_p36
11 | NOTEBOOK_FILE=/home/ec2-user/SageMaker/MyNotebook.ipynb
12 | 
13 | source /home/ec2-user/anaconda3/bin/activate "$ENVIRONMENT"
14 | 
15 | jupyter nbconvert "$NOTEBOOK_FILE" --ExecutePreprocessor.kernel_name=python --execute
16 | 
17 | source /home/ec2-user/anaconda3/bin/deactivate


--------------------------------------------------------------------------------
/scripts/export-to-pdf-enable/on-create.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -e
 4 | 
 5 | # OVERVIEW
 6 | # This script enables Jupyter to export a notebook directly to PDF.
 7 | # nbconvert depends on XeLaTeX and several LaTeX packages that are non-trivial to
 8 | # install because `tlmgr` is not included with the texlive packages provided by yum.
 9 | 
10 | # REQUIREMENTS
11 | # Internet access is required in order to fetch the below latex libraries from the ctan mirror.
12 | 
13 | sudo -u ec2-user -i <<EOF
14 | unset SUDO_UID
15 | 
16 | mkdir -p /home/ec2-user/SageMaker/.texmf
17 | cd /home/ec2-user/SageMaker/.texmf
18 | wget http://mirrors.ctan.org/install/macros/latex/contrib/tcolorbox.tds.zip
19 | wget http://mirrors.ctan.org/install/macros/latex/contrib/environ.tds.zip
20 | wget http://mirrors.ctan.org/install/macros/latex/contrib/etoolbox.tds.zip
21 | wget http://mirrors.ctan.org/install/macros/latex/contrib/trimspaces.tds.zip
22 | wget http://mirrors.ctan.org/macros/latex/contrib/upquote.zip
23 | unzip tcolorbox.tds.zip
24 | unzip environ.tds.zip
25 | unzip etoolbox.tds.zip
26 | unzip trimspaces.tds.zip
27 | unzip upquote.zip
28 | mv upquote tex/latex/
29 | 
30 | EOF


--------------------------------------------------------------------------------
/scripts/export-to-pdf-enable/on-start.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -e
 4 | 
 5 | # OVERVIEW
 6 | # This script enables Jupyter to export a notebook directly to PDF.
 7 | # nbconvert depends on XeLaTeX and several LaTeX packages that are non-trivial to
 8 | # install because `tlmgr` is not included with the texlive packages provided by yum.
 9 | 
10 | # REQUIREMENTS
11 | # Internet access is required in on-create.sh in order to fetch the latex libraries from the ctan mirror.
12 | 
13 | sudo yum install -y texlive*
14 | sudo -u ec2-user -i <<EOF
15 | unset SUDO_UID
16 | 
17 | ln -s /home/ec2-user/SageMaker/.texmf /home/ec2-user/texmf
18 | 
19 | EOF
20 | 


--------------------------------------------------------------------------------
/scripts/install-conda-package-all-environments/on-start.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -e
 4 | 
 5 | # OVERVIEW
 6 | # This script installs a single conda package in all SageMaker conda environments, apart from the JupyterSystemEnv
 7 | # which is a system environment reserved for Jupyter.
 8 | 
 9 | # NOTE: if the total runtime of this script exceeds 5 minutes, the Notebook Instance will fail to start up.  If you
10 | # would like to run this script in the background, then replace "sudo" with "nohup sudo -b".  This will allow the
11 | # Notebook Instance to start up while the installation happens in the background.
12 | 
13 | sudo -u ec2-user -i <<'EOF'
14 | 
15 | # PARAMETERS
16 | PACKAGE=scipy
17 | 
18 | 
19 | # Note that "base" is special environment name, include it there as well.
20 | conda install "$PACKAGE" --name base --yes
21 | 
22 | for env in /home/ec2-user/anaconda3/envs/*; do
23 |     env_name=$(basename "$env")
24 |     if [ $env_name = 'JupyterSystemEnv' ]; then
25 |         continue
26 |     fi
27 | 
28 |     conda install "$PACKAGE" --name "$env_name" --yes
29 | done
30 | 
31 | EOF
32 | 


--------------------------------------------------------------------------------
/scripts/install-conda-package-single-environment/on-start.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -e
 4 | 
 5 | # OVERVIEW
 6 | # This script installs a single conda package in a single SageMaker conda environments.
 7 | 
 8 | sudo -u ec2-user -i <<'EOF'
 9 | 
10 | # PARAMETERS
11 | PACKAGE=scipy
12 | ENVIRONMENT=python3
13 | 
14 | conda install "$PACKAGE" --name "$ENVIRONMENT" --yes
15 | 
16 | EOF
17 | 


--------------------------------------------------------------------------------
/scripts/install-lab-extension/on-start.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -e
 4 | 
 5 | # OVERVIEW
 6 | # This script installs a jupyterlab extension package in SageMaker Notebook Instance
 7 | 
 8 | sudo -u ec2-user -i <<'EOF'
 9 | 
10 | # PARAMETERS
11 | EXTENSION_NAME=@jupyterlab/git
12 | 
13 | source /home/ec2-user/anaconda3/bin/activate JupyterSystemEnv
14 | 
15 | jupyter labextension install $EXTENSION_NAME
16 | 
17 | source /home/ec2-user/anaconda3/bin/deactivate
18 | 
19 | EOF
20 | 


--------------------------------------------------------------------------------
/scripts/install-nb-extension/on-start.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -e
 4 | 
 5 | # OVERVIEW
 6 | # This script installs a single jupyter notebook extension package in SageMaker Notebook Instance
 7 | # For more details of the example extension, see https://github.com/jupyter-widgets/ipywidgets
 8 | 
 9 | sudo -u ec2-user -i <<'EOF'
10 | 
11 | # PARAMETERS
12 | PIP_PACKAGE_NAME=ipywidgets
13 | EXTENSION_NAME=widgetsnbextension
14 | 
15 | source /home/ec2-user/anaconda3/bin/activate JupyterSystemEnv
16 | 
17 | pip install $PIP_PACKAGE_NAME
18 | jupyter nbextension enable $EXTENSION_NAME --py --sys-prefix
19 | 
20 | source /home/ec2-user/anaconda3/bin/deactivate
21 | 
22 | EOF
23 | 


--------------------------------------------------------------------------------
/scripts/install-pip-package-all-environments/on-start.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -e
 4 | 
 5 | # OVERVIEW
 6 | # This script installs a single pip package in all SageMaker conda environments, apart from the JupyterSystemEnv which
 7 | # is a system environment reserved for Jupyter.
 8 | # Note this may timeout if the package installations in all environments take longer than 5 mins, consider using
 9 | # "nohup" to run this as a background process in that case.
10 | 
11 | sudo -u ec2-user -i <<'EOF'
12 | 
13 | # PARAMETERS
14 | PACKAGE=scipy
15 | 
16 | # Note that "base" is special environment name, include it there as well.
17 | for env in base /home/ec2-user/anaconda3/envs/*; do
18 |     source /home/ec2-user/anaconda3/bin/activate $(basename "$env")
19 | 
20 |     if [ $env = 'JupyterSystemEnv' ]; then
21 |         continue
22 |     fi
23 | 
24 |     pip install --upgrade "$PACKAGE"
25 | 
26 |     source /home/ec2-user/anaconda3/bin/deactivate
27 | done
28 | 
29 | EOF
30 | 


--------------------------------------------------------------------------------
/scripts/install-pip-package-single-environment/on-start.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -e
 4 | 
 5 | # OVERVIEW
 6 | # This script installs a single pip package in a single SageMaker conda environments.
 7 | 
 8 | sudo -u ec2-user -i <<'EOF'
 9 | 
10 | # PARAMETERS
11 | PACKAGE=scipy
12 | ENVIRONMENT=python3
13 | 
14 | source /home/ec2-user/anaconda3/bin/activate "$ENVIRONMENT"
15 | 
16 | pip install --upgrade "$PACKAGE"
17 | 
18 | source /home/ec2-user/anaconda3/bin/deactivate
19 | 
20 | EOF
21 | 


--------------------------------------------------------------------------------
/scripts/install-r-package/on-start.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -e
 4 | 
 5 | # OVERVIEW
 6 | # This script installs a single conda R package (bigmemory) in SageMaker R environment.
 7 | # To install an R package with conda, the package needs to be prefixed with 'r-'. For example, to install the package `shiny`, run 'conda install -c r r-shiny'.
 8 | 
 9 | sudo -u ec2-user -i <<'EOF'
10 | 
11 | source activate R
12 | 
13 | conda install -y -c r r-bigmemory 
14 | conda deactivate
15 | EOF
16 | 


--------------------------------------------------------------------------------
/scripts/install-server-extension/on-start.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -e
 4 | 
 5 | # OVERVIEW
 6 | # This script installs a single jupyter notebook server extension package in SageMaker Notebook Instance
 7 | 
 8 | sudo -u ec2-user -i <<'EOF'
 9 | 
10 | # PARAMETERS
11 | PIP_PACKAGE_NAME=jupyterlab-git
12 | EXTENSION_NAME=jupyterlab_git
13 | 
14 | source /home/ec2-user/anaconda3/bin/activate JupyterSystemEnv
15 | 
16 | pip install $PIP_PACKAGE_NAME
17 | jupyter serverextension enable $EXTENSION_NAME --py --sys-prefix
18 | 
19 | source /home/ec2-user/anaconda3/bin/deactivate
20 | 
21 | EOF
22 | 


--------------------------------------------------------------------------------
/scripts/migrate-ebs-data-backup/on-start.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | set -e
 3 | 
 4 | # OVERVIEW
 5 | # This script creates a snapshot of EBS volume /home/ec2-user/SageMaker/ to a S3 bucket specified by tag on the notebook instance (ebs-backup-bucket). 
 6 | #
 7 | # The snapshot can be download into a new instance using https://github.com/aws-samples/amazon-sagemaker-notebook-instance-lifecycle-config-samples/tree/master/scripts/migrate-ebs-data-sync/on-create.sh.
 8 | # 
 9 | # Note that the execution is done with nohup to bypass the startup timeout set by SageMaker Notebook instance. Depending on the size of the source /home/ec2-user/SageMaker/, it may take more than 5 minutes. You would see a text file BACKUP_COMPLETE created in /home/ec2-user/SageMaker/ and in the S3 bucket to denote the completion. You need s3:CreateBucket, s3:GetObject, s3:PutObject, and s3:ListBucket in the execution role to perform aws s3 sync.
10 | #
11 | # Note if your notebook instance is in VPC mode without a direct internet access, please create a S3 VPC Gateway endpoint (https://docs.aws.amazon.com/vpc/latest/privatelink/vpc-endpoints-s3.html) and a SageMaker API VPC interface endpoint (https://docs.aws.amazon.com/sagemaker/latest/dg/interface-vpc-endpoint.html).
12 | #
13 | # See detail instruction in https://aws.amazon.com/blogs/machine-learning/migrate-your-work-to-amazon-sagemaker-notebook-instance-with-amazon-linux-2/
14 | 
15 | cat << "EOF" > /home/ec2-user/backup.sh
16 | #!/bin/bash
17 | NOTEBOOK_ARN=$(jq '.ResourceArn' /opt/ml/metadata/resource-metadata.json --raw-output)
18 | NOTEBOOK_NAME=$(jq '.ResourceName' /opt/ml/metadata/resource-metadata.json --raw-output)
19 | BACKUP_DESTINATION=ebs-backup-bucket
20 | BUCKET=$(aws sagemaker list-tags --resource-arn $NOTEBOOK_ARN  | jq -r --arg BACKUP_DESTINATION "$BACKUP_DESTINATION" .'Tags[] | select(.Key == $BACKUP_DESTINATION).Value' --raw-output)
21 | # check if bucket exists
22 | # if not, create a bucket
23 | echo "Checking if s3://${BUCKET} exists..."
24 | aws s3api wait bucket-exists --bucket $BUCKET || (echo "s3://${BUCKET} does not exist, creating..."; aws s3 mb s3://${BUCKET})
25 | TIMESTAMP=`date +%F-%H-%M-%S`
26 | SNAPSHOT=${NOTEBOOK_NAME}_${TIMESTAMP}
27 | echo "Backup up /home/ec2-user/SageMaker/ to s3://${BUCKET}/${SNAPSHOT}/"
28 | aws s3 sync --exclude "*/lost+found/*" /home/ec2-user/SageMaker/ s3://${BUCKET}/${SNAPSHOT}/
29 | exitcode=$?
30 | echo $exitcode
31 | if [ $exitcode -eq 0 ] || [ $exitcode -eq 2 ]
32 | then
33 |     TIMESTAMP=`date +%F-%H-%M-%S`
34 |     echo "Created s3://${BUCKET}/${SNAPSHOT}/" > /home/ec2-user/SageMaker/BACKUP_COMPLETE
35 |     echo "Completed at $TIMESTAMP" >> /home/ec2-user/SageMaker/BACKUP_COMPLETE
36 |     aws s3 cp /home/ec2-user/SageMaker/BACKUP_COMPLETE s3://${BUCKET}/${SNAPSHOT}/BACKUP_COMPLETE
37 | fi
38 | EOF
39 | 
40 | chmod +x /home/ec2-user/backup.sh
41 | chown ec2-user:ec2-user /home/ec2-user/backup.sh
42 | 
43 | # nohup to bypass the notebook instance timeout at start
44 | sudo -u ec2-user nohup /home/ec2-user/backup.sh >>  /home/ec2-user/nohup.out 2>&1 & 
45 | 


--------------------------------------------------------------------------------
/scripts/migrate-ebs-data-sync/on-create.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | set -e
 3 | 
 4 | # OVERVIEW
 5 | # This script downloads a snapshot specified by tags (ebs-backup-bucket and backup-snapshot)on the notebook instance into /home/ec2-user/SageMaker/. 
 6 | # The snapshot can be created from an existing instace using https://github.com/aws-samples/amazon-sagemaker-notebook-instance-lifecycle-config-samples/tree/master/scripts/migrate-ebs-data-backup/on-start.sh.
 7 | # 
 8 | # Note that the execution is done with nohup to bypass the startup timeout set by SageMaker Notebook instance. Depending on the size of the source /home/ec2-user/SageMaker/, it may take more than 5 minutes. You would see a text file SYNC_COMPLETE created in /home/ec2-user/SageMaker/ to denote the completion. You need s3:GetObject, s3:PutObject, and s3:ListBucket for the S3 bucket in the execution role to perform aws s3 sync.
 9 | # 
10 | # Note if your notebook instance is in VPC mode without a direct internet access, please create a S3 VPC Gateway endpoint (https://docs.aws.amazon.com/vpc/latest/privatelink/vpc-endpoints-s3.html) and a SageMaker API VPC interface endpoint (https://docs.aws.amazon.com/sagemaker/latest/dg/interface-vpc-endpoint.html).
11 | #
12 | # See detail instruction in https://aws.amazon.com/blogs/machine-learning/migrate-your-work-to-amazon-sagemaker-notebook-instance-with-amazon-linux-2/
13 | 
14 | cat << "EOF" > /home/ec2-user/sync.sh
15 | # When creating a new AL2 notebook instance, sync from a snapshot in S3 bucket to /home/ec2-user/SageMaker/
16 | NOTEBOOK_ARN=$(jq '.ResourceArn' /opt/ml/metadata/resource-metadata.json --raw-output)
17 | NOTEBOOK_NAME=$(jq '.ResourceName' /opt/ml/metadata/resource-metadata.json --raw-output)
18 | VAR_BACKUP_SOURCE=ebs-backup-bucket
19 | BUCKET=$(aws sagemaker list-tags --resource-arn $NOTEBOOK_ARN  | jq -r --arg VAR_BACKUP_SOURCE "$VAR_BACKUP_SOURCE" .'Tags[] | select(.Key == $VAR_BACKUP_SOURCE).Value' --raw-output)
20 | VAR_SNAPSHOT=backup-snapshot
21 | SNAPSHOT=$(aws sagemaker list-tags --resource-arn $NOTEBOOK_ARN  | jq -r --arg VAR_SNAPSHOT "$VAR_SNAPSHOT" .'Tags[] | select(.Key == $VAR_SNAPSHOT).Value' --raw-output)
22 | 
23 | # check if SNAPSHOT exists, if not, proceed without sync
24 | echo "Checking if s3://${BUCKET}/${SNAPSHOT} exists..."
25 | aws s3 ls s3://${BUCKET}/${SNAPSHOT} || (echo "Snapshot s3://${BUCKET}/${SNAPSHOT} does not exist. Proceed without the sync."; exit 0)
26 | echo "Sync-ing s3://${BUCKET}/${SNAPSHOT}/ to /home/ec2-user/SageMaker/"
27 | aws s3 sync s3://${BUCKET}/${SNAPSHOT}/ /home/ec2-user/SageMaker/ 
28 | exitcode=$?
29 | echo $exitcode
30 | if [ $exitcode -eq 0 ] || [ $exitcode -eq 2 ]
31 | then
32 |     TIMESTAMP=`date +%F-%H-%M-%S`
33 |     echo "Completed at $TIMESTAMP" > /home/ec2-user/SageMaker/SYNC_COMPLETE
34 | fi
35 | EOF
36 | 
37 | chmod +x /home/ec2-user/sync.sh
38 | chown ec2-user:ec2-user /home/ec2-user/sync.sh
39 | 
40 | # nohup to bypass the notebook instance timeout at start
41 | sudo -u ec2-user nohup /home/ec2-user/sync.sh >>  /home/ec2-user/nohup.out 2>&1 &
42 |  
43 | 


--------------------------------------------------------------------------------
/scripts/mount-efs-file-system/on-start.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -e
 4 | 
 5 | # OVERVIEW
 6 | # This script mounts an EFS file system to the Notebook Instance at the ~/SageMaker/efs directory based off the DNS name.
 7 | # 
 8 | # Note that this script will fail if file system is not reachable from the Notebook Instance.
 9 | #   1. Ensure that the EFS file system DNS name is resolvable from the Notebook Instance
10 | #       - One way to accomplish this is having the Notebook Instance and the EFS file system in the same subnet
11 | #   2. Ensure the Mount Target Security Group provides inbound access from the Notebook Instance Security Group
12 | #       Type - Protocol - Port - Source
13 | #       NFS  - TCP      - 2049 - $NOTEBOOK_SECURITY_GROUP
14 | #
15 | # https://aws.amazon.com/blogs/machine-learning/mount-an-efs-file-system-to-an-amazon-sagemaker-notebook-with-lifecycle-configurations/
16 | 
17 | # PARAMETERS
18 | EFS_DNS_NAME=fs-your-fs-id.efs.your-region.amazonaws.com
19 | 
20 | mkdir -p /home/ec2-user/SageMaker/efs
21 | mount \
22 |     --type nfs \
23 |     --options nfsvers=4.1,rsize=1048576,wsize=1048576,hard,timeo=600,retrans=2 \
24 |     $EFS_DNS_NAME:/ /home/ec2-user/SageMaker/efs \
25 |     --verbose
26 | 
27 | chmod go+rw /home/ec2-user/SageMaker/efs


--------------------------------------------------------------------------------
/scripts/mount-fsx-lustre-file-system/on-start.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -e
 4 | 
 5 | # OVERVIEW
 6 | # This script mounts a FSx for Lustre file system to the Notebook Instance at the /fsx directory based off
 7 | # the DNS and Mount name parameters.
 8 | #
 9 | # This script assumes the following:
10 | #   1. There's an FSx for Lustre file system created and running
11 | #   2. The FSx for Lustre file system is accessible from the Notebook Instance
12 | #       - The Notebook Instance has to be created on the same VPN as the FSx for Lustre file system
13 | #       - The subnets and security groups have to be properly set up.  Same values for file system and notebook.
14 | #   3. Set the FSX_DNS_NAME parameter below to the DNS name of the FSx for Lustre file system.
15 | #   4. Set the FSX_MOUNT_NAME parameter below to the Mount name of the FSx for Lustre file system. It's not the name you appointed at creation.
16 | 
17 | 
18 | #sudo -u ec2-user -i <<'EOF'
19 | 
20 | # PARAMETERS
21 | FSX_DNS_NAME=fs-your-fs-id.fsx.your-region.amazonaws.com
22 | FSX_MOUNT_NAME=your-mount-name
23 | 
24 | # First, we need to install the lustre libraries
25 | # this command is dependent on current running Amazon Linux and JupyterLab versions
26 | CURR_VERSION=$(cat /etc/os-release)
27 | if [[ $CURR_VERSION == *$"http://aws.amazon.com/amazon-linux-ami/"* ]]; then
28 | 	sudo yum install -y lustre-client
29 | else
30 | 	sudo amazon-linux-extras install -y lustre
31 | fi
32 | 
33 | # Now we can create the mount point and mount the file system
34 | sudo mkdir -p /fsx
35 | 
36 | sudo mount -t lustre -o noatime,flock $FSX_DNS_NAME@tcp:/$FSX_MOUNT_NAME /fsx
37 | 
38 | # Let's make sure we have the appropriate access to the directory
39 | sudo chmod go+rw /fsx
40 | 
41 | #EOF
42 | 


--------------------------------------------------------------------------------
/scripts/notebook-history-s3/notebook-history-s3.py:
--------------------------------------------------------------------------------
 1 | #     Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved.
 2 | #
 3 | #     Licensed under the Apache License, Version 2.0 (the "License").
 4 | #     You may not use this file except in compliance with the License.
 5 | #     A copy of the License is located at
 6 | #
 7 | #         https://aws.amazon.com/apache-2-0/
 8 | #
 9 | #     or in the "license" file accompanying this file. This file is distributed
10 | #     on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
11 | #     express or implied. See the License for the specific language governing
12 | #     permissions and limitations under the License.
13 | 
14 | import requests
15 | from datetime import datetime
16 | import getopt, sys
17 | import boto3
18 | import json
19 | import sagemaker
20 | import urllib3
21 | import logging
22 | 
23 | 
24 | urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
25 | 
26 | # Usage
27 | usageInfo = """Usage:
28 | This scripts gets the sqllite database of sessions and jupyter history and writes the spllite files to S3:
29 | python log_notebook_history. Type "python autostop.py -h" for available options.
30 | """
31 | 
32 | # Help info
33 | helpInfo = """
34 | -h, --help
35 |     Help information
36 | """
37 | logging.basicConfig(level=logging.INFO, format='%(message)s')
38 | logger = logging.getLogger()
39 | logger.addHandler(logging.FileHandler('/var/log/notebook_history_s3.log', 'a'))
40 | 
41 | # Read in command-line parameters
42 | try:
43 |     opts, args = getopt.getopt(sys.argv[1:], "h", ["help"])
44 |     for opt, arg in opts:
45 |         if opt in ("-h", "--help"):
46 |             print(helpInfo)
47 |             exit(0)
48 | except getopt.GetoptError:
49 |     print(usageInfo)
50 |     exit(1)
51 | 
52 | def get_notebook_name():
53 |     log_path = "/opt/ml/metadata/resource-metadata.json"
54 |     with open(log_path, "r") as logs:
55 |         _logs = json.load(logs)
56 |     return _logs["ResourceName"]
57 | 
58 | sagemaker_session = sagemaker.Session()
59 | s3 = boto3.client("s3")
60 | bucket = sagemaker_session.default_bucket()
61 | key = "notebooks/{}/history/{}/history.sqlite".format(get_notebook_name(), datetime.now().strftime("%Y%m%d-%H%M%S"))
62 | 
63 | logger.info("Writing history.sqlite to {}/{}".format(bucket,key))
64 | with open('/home/ec2-user/.ipython/profile_default/history.sqlite', 'rb') as data:
65 |     s3.upload_fileobj(data, bucket, key)
66 | 


--------------------------------------------------------------------------------
/scripts/notebook-history-s3/on-start.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -e
 4 | 
 5 | # OVERVIEW
 6 | # This script logs the history of a notebook server to S3 once an hour/
 7 | #
 8 | # Note that this script will fail if either condition is not met
 9 | #   1. Ensure the Notebook Instance has internet connectivity to fetch the example config
10 | #   2. Ensure the Notebook Instance execution role permissions to write a file to the Sagemaker default bucket
11 | 
12 | # DEPENDENCIES
13 | pip install sagemaker
14 | 
15 | # PARAMETERS
16 | echo "Fetching the log history script"
17 | wget https://raw.githubusercontent.com/aws-samples/amazon-sagemaker-notebook-instance-lifecycle-config-samples/master/scripts/notebook-history-s3/notebook-history-s3.py
18 | echo "Starting the SageMaker logging script in cron"
19 | 
20 | (crontab -l 2>/dev/null; echo "0 * * * * /usr/bin/python3 $PWD/notebook-history-s3.py") | crontab -
21 | 


--------------------------------------------------------------------------------
/scripts/notebook-instance-monitor/amazon-cloudwatch-agent.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "metrics": {
 3 |     "namespace": "SageMakerNotebookInstances",
 4 |     "metrics_collected": {
 5 |       "cpu": {
 6 |         "measurement": [ "cpu_usage_idle","cpu_usage_active" ],
 7 |         "metrics_collection_interval": 60,
 8 |         "append_dimensions": { "notebook_instance_name": "MyNotebookInstance" },
 9 |         "resources": [ "*" ],
10 |         "totalcpu": true
11 |       },
12 |       "nvidia_gpu":{
13 |         "measurement": ["nvidia_smi_utilization_gpu","nvidia_smi_utilization_memory","nvidia_smi_memory_used","nvidia_smi_memory_total"],
14 |         "metrics_collection_interval": 60,
15 |         "append_dimensions":{ "notebook_instance_name": "MyNotebookInstance" },
16 |         "resources":["*"]
17 |       },
18 |       "disk": {
19 |         "measurement": [ "used_percent" ],
20 |         "metrics_collection_interval": 60,
21 |         "append_dimensions": { "notebook_instance_name": "MyNotebookInstance" },
22 |         "resources": [ "*" ]
23 |       },
24 |       "diskio": {
25 |         "measurement": [ "write_bytes","read_bytes", "writes", "reads" ],
26 |         "metrics_collection_interval": 60,
27 |         "append_dimensions": { "notebook_instance_name": "MyNotebookInstance" },
28 |         "resources": [ "*" ]
29 |       },
30 |       "mem": {
31 |         "measurement": [ "mem_used_percent" ],
32 |         "metrics_collection_interval": 60,
33 |         "append_dimensions": { "notebook_instance_name": "MyNotebookInstance" }
34 |       },
35 |       "net": {
36 |         "measurement": [ "bytes_sent", "bytes_recv", "packets_sent", "packets_recv" ],
37 |         "metrics_collection_interval": 60,
38 |         "append_dimensions": { "notebook_instance_name": "MyNotebookInstance" },
39 |         "resources": [ "*" ]
40 |       },
41 |       "swap": {
42 |         "measurement": [ "swap_used_percent" ],
43 |         "metrics_collection_interval": 60,
44 |         "append_dimensions": { "notebook_instance_name": "MyNotebookInstance" }
45 |       }
46 |     }
47 |   }
48 | }
49 | 
50 | 


--------------------------------------------------------------------------------
/scripts/notebook-instance-monitor/notebookapi.py:
--------------------------------------------------------------------------------
  1 | import requests
  2 | from datetime import datetime
  3 | import getopt, sys
  4 | import urllib3
  5 | import boto3
  6 | import json
  7 | import os
  8 | import GPUtil
  9 | import psutil
 10 | 
 11 | from io import StringIO 
 12 | 
 13 | class Capturing(list):
 14 |     def __enter__(self):
 15 |         self._stdout = sys.stdout
 16 |         sys.stdout = self._stringio = StringIO()
 17 |         return self
 18 |     def __exit__(self, *args):
 19 |         self.extend(self._stringio.getvalue().splitlines())
 20 |         del self._stringio    # free up some memory
 21 |         sys.stdout = self._stdout
 22 | 
 23 | # OVERVIEW
 24 | # This script is adapted from https://github.com/aws-samples/amazon-sagemaker-notebook-instance-lifecycle-config-samples/blob/master/scripts/auto-stop-idle/autostop.py. Modifications are made to calculate four quantities (CPU utilization, CPU memory utilization, GPU utilization, GPU memory utilization) at regular intervals defined by the cron expression of the on-start script. These aggregate values are also added as tags to the notebook instance so users can get an idea of what the utilization looks like without accessing the actual jupyter notebook. Additionally, a cloudwatch agent logs more detailed metrics for users to monitor notebook instance usage. Fianlly, an example query (commented out) is provided to use within Cost Explorer to visualize aggregate metrics. 
 25 | 
 26 | idle = True
 27 | port = '8443'
 28 | 
 29 | # Ignore if any browsers or clients are open
 30 | ignore_connections = False
 31 | 
 32 | # Threshold for deciding idle value
 33 | time_threshold = 4*60*60 # 4 hours in seconds
 34 | 
 35 | # Force shutdown if conditions are true, or just log to output
 36 | force_shutdown = False
 37 | 
 38 | urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
 39 | 
 40 | def get_notebook_name():
 41 |     log_path = '/opt/ml/metadata/resource-metadata.json'
 42 |     with open(log_path, 'r') as logs:
 43 |         _logs = json.load(logs)
 44 |     return _logs['ResourceName']
 45 | 
 46 | 
 47 | def get_notebook_resource_arn():
 48 |     log_path = '/opt/ml/metadata/resource-metadata.json'
 49 |     with open(log_path, 'r') as logs:
 50 |         _logs = json.load(logs)
 51 |     return _logs['ResourceArn']
 52 | 
 53 | 
 54 | # When is a notebook considered idle by the Notebooks API? - https://github.com/jupyter/notebook/issues/4634
 55 | 
 56 | # The way it works at present is that the kernel sends a 'busy' message when it starts executing a request, and an 'idle' message when it finishes. So it's idle if there's not code running. The 'while True' loop would leave it busy.
 57 | 
 58 | # Code execution isn't the only kind of request, though. Among other things, when you open a notebook in a tab, it will make a kernel info request, which will reset the timer.
 59 | 
 60 | 
 61 | def is_idle(last_activity):
 62 |     last_activity = datetime.strptime(last_activity,"%Y-%m-%dT%H:%M:%S.%fz")
 63 |     if (datetime.now() - last_activity).total_seconds() > time_threshold:
 64 |         print('Notebook is idle. Last activity time = ', last_activity)
 65 |         return True
 66 |     else:
 67 |         print('Notebook is not idle. Last activity time = ', last_activity)
 68 |         return False
 69 | 
 70 | 
 71 | response = requests.get('https://localhost:'+port+'/api/sessions', verify=False)
 72 | data = response.json()
 73 | print(data)
 74 | if len(data) > 0:
 75 |     
 76 |     print("Using Jupyter Notebook API since request was successful")
 77 |     for notebook in data:
 78 | 
 79 |         if notebook['kernel']['execution_state'] == 'idle':
 80 |             if not ignore_connections:
 81 |                 if notebook['kernel']['connections'] == 0:
 82 |                     if not is_idle(notebook['kernel']['last_activity']):
 83 |                         idle = False
 84 |                 else:
 85 |                     idle = False #If any connection exists, notebook is not idling
 86 |             else:
 87 |                 if not is_idle(notebook['kernel']['last_activity']):
 88 |                     idle = False #If last activity is recent, notebook is not idling
 89 |         else:
 90 |             print('Notebook is not idle:', notebook['kernel']['execution_state'])
 91 |             idle = False
 92 | else:
 93 |     print("Using SageMaker instance last modified time")
 94 |     client = boto3.client('sagemaker')
 95 |     uptime = client.describe_notebook_instance(
 96 |         NotebookInstanceName=get_notebook_name()
 97 |     )['LastModifiedTime']
 98 |     if not is_idle(uptime.strftime("%Y-%m-%dT%H:%M:%S.%fz")):
 99 |         idle = False
100 |         
101 |         
102 | #CPU, Mem and GPU utilization
103 | print(f"Utilization metrics at {datetime.now()}")
104 | 
105 | total_cpu_util = psutil.cpu_percent()
106 | total_mem_util = psutil.virtual_memory().percent
107 | print(f"CPU utilization = {total_cpu_util}%")
108 | print(f"Memory utilization = {total_mem_util}%")
109 | # Testing the GPUtil library for both GPU performance details
110 | num_gpu = 0
111 | try:
112 |     print("GPU utilization = ")
113 |     with Capturing() as output:
114 |         GPUtil.showUtilization()
115 |     
116 | 
117 |     if len(output)==1:
118 |         print("Found no GPUs")
119 |     else:
120 |         print(f"Found {len(output) -2} GPUs:") # Output is formatted, -2 is one for header and another for separator '---'
121 |         num_gpu = len(output)-2
122 |         total_gpu_util = 0
123 |         total_gpumem_util = 0
124 |         for i in range(2,len(output)):
125 |             tmp = output[i].split('|')
126 |             # print(tmp)
127 |             print(f"GPU{tmp[1]} mem = {tmp[-2]}")
128 |             print(f"GPU{tmp[1]} util = {tmp[-4]}")
129 |             total_gpu_util+=int(tmp[-4].split('%')[0])
130 |             total_gpumem_util+=int(tmp[-2].split('%')[0])
131 |         
132 |         print(f"Total GPU Mem Utilization = {total_gpumem_util}/{(len(output) -2)*100} %")
133 |         print(f"Total GPU Utilization = {total_gpu_util}/{(len(output) -2)*100} %")
134 |         
135 | 
136 | except Exception as e:
137 |     print("Did not capture GPU utilization")
138 |     print(e)
139 |     total_gpu_util = 0
140 |     total_gpumem_util = 0
141 |     
142 | 
143 | 
144 | # Updating tags
145 | client = boto3.client('sagemaker')
146 | response = client.add_tags(
147 |     ResourceArn=get_notebook_resource_arn(),
148 |     Tags=[
149 |     {
150 |         'Key': 'total_cpu_util',
151 |         'Value': str(total_cpu_util)
152 |     },
153 |     {
154 |         'Key': 'total_mem_util',
155 |         'Value': str(total_mem_util)
156 |     },
157 |     {
158 |         'Key': 'total_gpu_util',
159 |         'Value': str(total_gpu_util)
160 |     },
161 |     {
162 |         'Key': 'total_gpumem_util',
163 |         'Value': str(total_gpumem_util)
164 |     }
165 |     ])
166 |     
167 |     
168 | # Add conditions here:
169 | 
170 | shutdown = False
171 | 
172 | if not idle and num_gpu>0 and 0 < total_gpu_util < 20:
173 |     print("Recommend using a smaller GPU instance")
174 |     
175 | if idle and total_cpu_util < 10 and total_mem_util < 10 and force_shutdown:
176 |     print(f'Closing idle notebook since Jupyter Kernels idling is {idle}, total CPU utilization is {total_cpu_util} and total Memory utilization is {total_mem_util}')
177 |     client = boto3.client('sagemaker')
178 |     client.stop_notebook_instance(
179 |         NotebookInstanceName=get_notebook_name()
180 |     )
181 | 
182 | else:
183 |     print(f"Notebook is active at {datetime.now()}. Updated util metrics")
184 |     print(f'NOT closing idle notebook since Jupyter Kernels idling is {idle}, total CPU utilization is {total_cpu_util} and total Memory utilization is {total_mem_util}')
185 |     
186 | 
187 | print(json.dumps({"CPU_util":total_cpu_util, "Mem_util":total_mem_util, "GPU_util":total_gpu_util, "GPU_mem_util":total_gpumem_util}))
188 | 
189 | client = boto3.client('sagemaker')
190 | response = client.list_tags(
191 |     ResourceArn=get_notebook_resource_arn()
192 | )
193 | tags = response['Tags']
194 | tagdict = {}
195 | 
196 | for tag in tags:
197 |     tagdict[tag['Key']] = tag['Value']
198 | print("---")
199 | print(tagdict)
200 | 
201 | 
202 | try:
203 |     print("If available, log running average utilization ...")
204 |     print(float(tagdict['total_cpu_util']) + float(total_cpu_util))
205 | 
206 |     print(json.dumps({
207 |         'avg_CPU_util' : int((float(tagdict['total_cpu_util']) + float(total_cpu_util))/2.),
208 |         'avg_Mem_util' : int((float(tagdict['total_mem_util']) + float(total_mem_util))/2.),
209 |         'avg_GPU_util' : int((float(tagdict['total_gpu_util']) + float(total_gpu_util))/2.),
210 |         'avg_GPUmem_util' : int((float(tagdict['total_gpumem_util']) + float(total_gpumem_util))/2.),
211 |     }))
212 |     
213 | except Exception as e:
214 |     print('Historical values not available')
215 |     print(e)
216 |     
217 |     
218 | # In cloudwatch log insights, use the a query similar to the following:
219 | '''
220 | fields @timestamp, avg_CPU_util
221 | | filter @logStream="notebook-name/jupyter.log"
222 | | stats avg(avg_CPU_util),avg(avg_Mem_util),avg(avg_GPU_util),avg(avg_GPUmem_util),count() by bin(60s)
223 | | sort @timestamp asc 
224 | '''
225 | 
226 | 
227 | 
228 |     
229 | 


--------------------------------------------------------------------------------
/scripts/notebook-instance-monitor/on-start.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -ex
 4 | 
 5 | # OVERVIEW
 6 | # This script is adapted from https://github.com/aws-samples/amazon-sagemaker-notebook-instance-lifecycle-config-samples/blob/master/scripts/auto-stop-idle/autostop.py. Modifications are made to calculate four quantities (CPU utilization, CPU memory utilization, GPU utilization, GPU memory utilization) at regular intervals defined by the cron expression of the on-start script. These aggregate values are also added as tags to the notebook instance so users can get an idea of what the utilization looks like without accessing the actual jupyter notebook. Additionally, a cloudwatch agent logs more detailed metrics for users to monitor notebook instance usage. Fianlly, an example query (commented out) is provided to use within Cost Explorer to visualize aggregate metrics.
 7 | 
 8 | # Note that this script will fail if either condition is not met
 9 | #   1. Ensure the Notebook Instance has internet connectivity to fetch the example config
10 | #   2. Ensure the Notebook Instance execution role permissions to SageMaker:StopNotebookInstance to stop the notebook 
11 | #       and SageMaker:DescribeNotebookInstance to describe the notebook, SageMaker:AddTags
12 | #   3. Ensure that the CloudWatch agent is correctly installed and permissions match those outlined in the docs https://docs.aws.amazon.com/AmazonCloudWatch/latest/monitoring/create-iam-roles-for-cloudwatch-agent.html
13 | 
14 | echo "Fetching the scripts"
15 | wget https://raw.githubusercontent.com/aws-samples/amazon-sagemaker-notebook-instance-lifecycle-config-samples/blob/master/scripts/notebook-instance-monitor/notebookapi.py
16 | wget https://raw.githubusercontent.com/aws-samples/amazon-sagemaker-notebook-instance-lifecycle-config-samples/blob/master/scripts/notebook-instance-monitor/amazon-cloudwatch-agent.json
17 | 
18 | 
19 | echo "Detecting Python install with boto3 install"
20 | 
21 | # Find which install has boto3 and use that to run the cron command. So will use default when available
22 | # Redirect stderr as it is unneeded
23 | if /usr/bin/python3 -c "import boto3" 2>/dev/null; then
24 |     # Standard installation in any notebook instance should find both python3 and pip-3
25 |     PYTHON_DIR='/usr/bin/python3'
26 |     /usr/bin/python3 -m pip install gputil psutil --user
27 | else
28 |     # If no boto3 just quit because the script won't work
29 |     echo "No boto3 found in Python or Python3. Exiting..."
30 |     exit 1
31 | fi
32 | 
33 | echo "Found boto3 at $PYTHON_DIR"
34 | 
35 | 
36 | echo "Starting the SageMaker autostop script in cron"
37 | 
38 | # Change the following cron expression to update the frequency of running the notebookapi.py script
39 | (crontab -l 2>/dev/null; echo "*/30 * * * * $PYTHON_DIR $PWD/notebookapi.py >> /var/log/jupyter.log") | crontab -
40 | 
41 | 
42 | echo "Also turning on cloudwatch metrics through the CW agent"
43 | 
44 | NOTEBOOK_INSTANCE_NAME=$(jq '.ResourceName' \
45 |                       /opt/ml/metadata/resource-metadata.json --raw-output)
46 | 
47 | 
48 | sed -i -- "s/MyNotebookInstance/$NOTEBOOK_INSTANCE_NAME/g" amazon-cloudwatch-agent.json
49 | 
50 | echo "Starting the CloudWatch agent on the Notebook Instance."
51 | /opt/aws/amazon-cloudwatch-agent/bin/amazon-cloudwatch-agent-ctl -a \
52 |     append-config -m ec2 -c file://$(pwd)/amazon-cloudwatch-agent.json
53 | 
54 | restart restart-cloudwatch-agent || true 
55 | systemctl restart amazon-cloudwatch-agent.service || true
56 | 
57 | rm amazon-cloudwatch-agent.json
58 | 


--------------------------------------------------------------------------------
/scripts/persistent-conda-ebs/on-create.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -e
 4 | 
 5 | # OVERVIEW
 6 | # This script installs a custom, persistent installation of conda on the Notebook Instance's EBS volume, and ensures
 7 | # that these custom environments are available as kernels in Jupyter.
 8 | # 
 9 | # The on-create script downloads and installs a custom conda installation to the EBS volume via Miniconda. Any relevant
10 | # packages can be installed here.
11 | #   1. ipykernel is installed to ensure that the custom environment can be used as a Jupyter kernel   
12 | #   2. Ensure the Notebook Instance has internet connectivity to download the Miniconda installer
13 | 
14 | 
15 | sudo -u ec2-user -i <<'EOF'
16 | unset SUDO_UID
17 | 
18 | # Install a separate conda installation via Miniconda
19 | WORKING_DIR=/home/ec2-user/SageMaker/custom-miniconda
20 | mkdir -p "$WORKING_DIR"
21 | wget https://repo.anaconda.com/miniconda/Miniconda3-4.6.14-Linux-x86_64.sh -O "$WORKING_DIR/miniconda.sh"
22 | bash "$WORKING_DIR/miniconda.sh" -b -u -p "$WORKING_DIR/miniconda" 
23 | rm -rf "$WORKING_DIR/miniconda.sh"
24 | 
25 | 
26 | # Create a custom conda environment
27 | source "$WORKING_DIR/miniconda/bin/activate"
28 | KERNEL_NAME="custom_python"
29 | PYTHON="3.6"
30 | 
31 | conda create --yes --name "$KERNEL_NAME" python="$PYTHON"
32 | conda activate "$KERNEL_NAME"
33 | 
34 | pip install --quiet ipykernel
35 | 
36 | # Customize these lines as necessary to install the required packages
37 | conda install --yes numpy
38 | pip install --quiet boto3
39 | 
40 | EOF
41 | 


--------------------------------------------------------------------------------
/scripts/persistent-conda-ebs/on-start.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -e
 4 | 
 5 | # OVERVIEW
 6 | # This script installs a custom, persistent installation of conda on the Notebook Instance's EBS volume, and ensures
 7 | # that these custom environments are available as kernels in Jupyter.
 8 | # 
 9 | # The on-start script uses the custom conda environment created in the on-create script and uses the ipykernel package
10 | # to add that as a kernel in Jupyter.
11 | #
12 | # For another example, see:
13 | # https://docs.aws.amazon.com/sagemaker/latest/dg/nbi-add-external.html#nbi-isolated-environment
14 | 
15 | sudo -u ec2-user -i <<'EOF'
16 | unset SUDO_UID
17 | 
18 | WORKING_DIR=/home/ec2-user/SageMaker/custom-miniconda
19 | source "$WORKING_DIR/miniconda/bin/activate"
20 | 
21 | for env in $WORKING_DIR/miniconda/envs/*; do
22 |     BASENAME=$(basename "$env")
23 |     source activate "$BASENAME"
24 |     python -m ipykernel install --user --name "$BASENAME" --display-name "Custom ($BASENAME)"
25 | done
26 | 
27 | # Optionally, uncomment these lines to disable SageMaker-provided Conda functionality.
28 | # echo "c.EnvironmentKernelSpecManager.use_conda_directly = False" >> /home/ec2-user/.jupyter/jupyter_notebook_config.py
29 | # rm /home/ec2-user/.condarc
30 | EOF
31 | 
32 | echo "Restarting the Jupyter server.."
33 | # restart command is dependent on current running Amazon Linux and JupyterLab
34 | CURR_VERSION=$(cat /etc/os-release)
35 | if [[ $CURR_VERSION == *$"http://aws.amazon.com/amazon-linux-ami/"* ]]; then
36 | 	sudo initctl restart jupyter-server --no-wait
37 | else
38 | 	sudo systemctl --no-block restart jupyter-server.service
39 | fi
40 | 


--------------------------------------------------------------------------------
/scripts/proxy-for-jupyter/on-start.sh:
--------------------------------------------------------------------------------
 1 | # This script configures proxy settings for your Jupyter notebooks and the SageMaker notebook instance.
 2 | # This is useful for use cases where you would like to configure your notebook instance in your custom VPC
 3 | # without direct internet access to route all traffic via a proxy server in your VPC.
 4 | 
 5 | # Please ensure that you have already configure a proxy server in your VPC.
 6 | 
 7 | #!/bin/bash
 8 |  
 9 | set -e
10 | 
11 | su - ec2-user -c "mkdir -p /home/ec2-user/.ipython/profile_default/startup/ && touch /home/ec2-user/.ipython/profile_default/startup/00-startup.py"
12 | 
13 | # Please replace proxy.local:3128 with the URL of your proxy server eg, proxy.example.com:80 or proxy.example.com:443
14 | # Please note that we are excluding S3 because we do not want this traffic to be routed over the public internet, but rather through the S3 endpoint in the VPC.
15 | 
16 | #PARAMETER
17 | SERVER=http://proxy.local:3128
18 | 
19 | echo "export http_proxy='$SERVER'" | tee -a /home/ec2-user/.profile >/dev/null
20 | echo "export https_proxy='$SERVER'" | tee -a /home/ec2-user/.profile >/dev/null
21 | echo "export no_proxy='s3.amazonaws.com,127.0.0.1,localhost'" | tee -a /home/ec2-user/.profile >/dev/null
22 | 
23 | # Now we change the terminal shell to bash
24 | echo "c.NotebookApp.terminado_settings={'shell_command': ['/bin/bash']}" | tee -a /home/ec2-user/.jupyter/jupyter_notebook_config.py >/dev/null
25 | 
26 | echo "import sys,os,os.path" | tee -a /home/ec2-user/.ipython/profile_default/startup/00-startup.py >/dev/null
27 | echo "os.environ['HTTP_PROXY']="\""$SERVER"\""" | tee -a /home/ec2-user/.ipython/profile_default/startup/00-startup.py >/dev/null
28 | echo "os.environ['HTTPS_PROXY']="\""$SERVER"\""" | tee -a /home/ec2-user/.ipython/profile_default/startup/00-startup.py >/dev/null
29 | echo "os.environ['NO_PROXY']="\""s3.amazonaws.com,127.0.0.1,localhost"\""" | tee -a /home/ec2-user/.ipython/profile_default/startup/00-startup.py >/dev/null
30 | 
31 | # Next, we reboot the system so the bash shell setting can take effect. This reboot is only required when applying proxy settings to the shell environment as well.
32 | # If only setting up Jupyter notebook proxy, you can leave this out
33 | 
34 | reboot
35 | 


--------------------------------------------------------------------------------
/scripts/publish-instance-metrics/amazon-cloudwatch-agent.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "metrics": {
 3 |     "namespace": "SageMakerNotebookInstances",
 4 |     "metrics_collected": {
 5 |       "cpu": {
 6 |         "measurement": [ "cpu_usage_idle" ],
 7 |         "metrics_collection_interval": 60,
 8 |         "append_dimensions": { "notebook_instance_name": "MyNotebookInstance" },
 9 |         "resources": [ "*" ],
10 |         "totalcpu": true
11 |       },
12 |       "disk": {
13 |         "measurement": [ "used_percent" ],
14 |         "metrics_collection_interval": 60,
15 |         "append_dimensions": { "notebook_instance_name": "MyNotebookInstance" },
16 |         "resources": [ "*" ]
17 |       },
18 |       "diskio": {
19 |         "measurement": [ "write_bytes","read_bytes", "writes", "reads" ],
20 |         "metrics_collection_interval": 60,
21 |         "append_dimensions": { "notebook_instance_name": "MyNotebookInstance" },
22 |         "resources": [ "*" ]
23 |       },
24 |       "mem": {
25 |         "measurement": [ "mem_used_percent" ],
26 |         "metrics_collection_interval": 60,
27 |         "append_dimensions": { "notebook_instance_name": "MyNotebookInstance" }
28 |       },
29 |       "net": {
30 |         "measurement": [ "bytes_sent", "bytes_recv", "packets_sent", "packets_recv" ],
31 |         "metrics_collection_interval": 60,
32 |         "append_dimensions": { "notebook_instance_name": "MyNotebookInstance" },
33 |         "resources": [ "*" ]
34 |       },
35 |       "swap": {
36 |         "measurement": [ "swap_used_percent" ],
37 |         "metrics_collection_interval": 60,
38 |         "append_dimensions": { "notebook_instance_name": "MyNotebookInstance" }
39 |       }
40 |     }
41 |   }
42 | }
43 | 
44 | 


--------------------------------------------------------------------------------
/scripts/publish-instance-metrics/on-start.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -e
 4 | 
 5 | # OVERVIEW
 6 | # This script publishes the system-level metrics from the Notebook instance to Cloudwatch.
 7 | #
 8 | # Note that this script will fail if either condition is not met
 9 | #   1. Ensure the Notebook Instance has internet connectivity to fetch the example config
10 | #   2. Ensure the Notebook Instance execution role permissions to cloudwatch:PutMetricData to publish the system-level metrics
11 | #
12 | # https://aws.amazon.com/cloudwatch/pricing/
13 | 
14 | # PARAMETERS
15 | NOTEBOOK_INSTANCE_NAME=$(jq '.ResourceName' \
16 |                       /opt/ml/metadata/resource-metadata.json --raw-output)
17 | 
18 | echo "Fetching the CloudWatch agent configuration file."
19 | wget https://raw.githubusercontent.com/aws-samples/amazon-sagemaker-notebook-instance-lifecycle-config-samples/master/scripts/publish-instance-metrics/amazon-cloudwatch-agent.json
20 | 
21 | sed -i -- "s/MyNotebookInstance/$NOTEBOOK_INSTANCE_NAME/g" amazon-cloudwatch-agent.json
22 | 
23 | echo "Starting the CloudWatch agent on the Notebook Instance."
24 | /opt/aws/amazon-cloudwatch-agent/bin/amazon-cloudwatch-agent-ctl -a \
25 |     append-config -m ec2 -c file://$(pwd)/amazon-cloudwatch-agent.json
26 | 
27 | CURR_VERSION=$(cat /etc/os-release)
28 | if [[ $CURR_VERSION == *$"http://aws.amazon.com/amazon-linux-ami/"* ]]; then
29 |     restart restart-cloudwatch-agent
30 | else
31 |     systemctl restart amazon-cloudwatch-agent.service
32 | fi
33 | 
34 | rm amazon-cloudwatch-agent.json
35 | 


--------------------------------------------------------------------------------
/scripts/set-codecommit-cross-account-access/on-start.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -e
 4 | 
 5 | # OVERVIEW
 6 | # This script sets cross-account CodeCommit access, so you can work on repositories hosted in another account.
 7 | # You'll need to create a role in AccountA granting repositories access to AccountB as instructed here:
 8 | # https://docs.aws.amazon.com/sagemaker/latest/dg/nbi-git-cross.html
 9 | # More information about the credential helper here:
10 | # https://docs.aws.amazon.com/codecommit/latest/userguide/setting-up-https-unixes.html#setting-up-https-unixes-credential-helper
11 | 
12 | # PARAMETERS
13 | ROLE_ARN=arn:aws:iam::CodeCommitAccount:role/CrossAccountRepositoryContributorRole
14 | REGION=us-east-1
15 | 
16 | sudo -u ec2-user -i <<EOF
17 | 
18 | cat >>/home/ec2-user/.aws/config <<-END_CAT
19 | 	[profile CrossAccountAccessProfile]
20 | 	region = $REGION
21 | 	role_arn = $ROLE_ARN
22 | 	credential_source = Ec2InstanceMetadata
23 | 	output = json
24 | END_CAT
25 | 
26 | git config --global credential.helper '!aws --profile CrossAccountAccessProfile codecommit credential-helper $@'
27 | 
28 | EOF
29 | 


--------------------------------------------------------------------------------
/scripts/set-env-variable/on-start.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -e
 4 | 
 5 | # OVERVIEW
 6 | # This script gets value from Notebook Instance's tag and sets it as environment
 7 | # variable for all process including Jupyter in SageMaker Notebook Instance
 8 | # Note that this script will fail this condition is not met
 9 | #   1. Ensure the Notebook Instance execution role has permission of SageMaker:ListTags
10 | #
11 | 
12 | #sudo -u ec2-user -i <<'EOF'
13 | 
14 | # PARAMETERS
15 | YOUR_ENV_VARIABLE_NAME=<ENV_VAR_NAME>
16 | 
17 | NOTEBOOK_ARN=$(jq '.ResourceArn' /opt/ml/metadata/resource-metadata.json --raw-output)
18 | TAG=$(aws sagemaker list-tags --resource-arn $NOTEBOOK_ARN  | jq -r --arg YOUR_ENV_VARIABLE_NAME "$YOUR_ENV_VARIABLE_NAME" .'Tags[] | select(.Key == $YOUR_ENV_VARIABLE_NAME).Value' --raw-output)
19 | touch /etc/profile.d/jupyter-env.sh
20 | echo "export $YOUR_ENV_VARIABLE_NAME=$TAG" >> /etc/profile.d/jupyter-env.sh
21 | 
22 | # restart command is dependent on current running Amazon Linux and JupyterLab
23 | CURR_VERSION=$(cat /etc/os-release)
24 | if [[ $CURR_VERSION == *$"http://aws.amazon.com/amazon-linux-ami/"* ]]; then
25 | 	sudo initctl restart jupyter-server --no-wait
26 | else
27 | 	sudo systemctl --no-block restart jupyter-server.service
28 | fi
29 | 
30 | #EOF


--------------------------------------------------------------------------------
/scripts/set-git-config/on-start.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -e
 4 | 
 5 | # OVERVIEW
 6 | # This script sets username and email address in Git config
 7 | 
 8 | # PARAMETERS
 9 | YOUR_USER_NAME="your_user_name"
10 | YOUR_EMAIL_ADDRESS="your_email_address"
11 | 
12 | sudo -u ec2-user -i <<EOF
13 | 
14 | git config --global user.name "$YOUR_USER_NAME"
15 | git config --global user.email "$YOUR_EMAIL_ADDRESS"
16 | 
17 | EOF
18 | 


--------------------------------------------------------------------------------