├── docker
    ├── __init__.py
    ├── build_artifacts
    │   ├── __init__.py
    │   ├── sagemaker
    │   │   ├── serve
    │   │   ├── __init__.py
    │   │   ├── nginx.conf.template
    │   │   ├── multi_model_utils.py
    │   │   └── tensorflowServing.js
    │   ├── dockerd-entrypoint.py
    │   └── deep_learning_container.py
    ├── 1.11
    │   ├── Dockerfile.eia
    │   ├── Dockerfile.cpu
    │   └── Dockerfile.gpu
    ├── 1.12
    │   ├── Dockerfile.eia
    │   ├── Dockerfile.cpu
    │   └── Dockerfile.gpu
    ├── 1.13
    │   ├── Dockerfile.eia
    │   ├── Dockerfile.cpu
    │   └── Dockerfile.gpu
    ├── 1.14
    │   ├── Dockerfile.cpu
    │   ├── Dockerfile.eia
    │   └── Dockerfile.gpu
    ├── 2.0
    │   ├── Dockerfile.cpu
    │   ├── Dockerfile.eia
    │   └── Dockerfile.gpu
    ├── 2.1
    │   ├── Dockerfile.cpu
    │   └── Dockerfile.gpu
    └── 1.15
    │   ├── Dockerfile.cpu
    │   ├── Dockerfile.eia
    │   └── Dockerfile.gpu
├── VERSION
├── test
    ├── resources
    │   ├── inputs
    │   │   ├── test-generic.json
    │   │   ├── test.csv
    │   │   ├── test-gcloud.jsons
    │   │   └── test.json
    │   ├── mme
    │   │   ├── invalid_version
    │   │   │   └── abcde
    │   │   │   │   └── dummy.txt
    │   │   ├── half_plus_three
    │   │   │   ├── 00000123
    │   │   │   │   ├── assets
    │   │   │   │   │   └── foo.txt
    │   │   │   │   ├── variables
    │   │   │   │   │   ├── variables.data-00000-of-00001
    │   │   │   │   │   └── variables.index
    │   │   │   │   └── saved_model.pb
    │   │   │   └── 00000124
    │   │   │   │   ├── assets
    │   │   │   │       └── foo.txt
    │   │   │   │   ├── variables
    │   │   │   │       ├── variables.data-00000-of-00001
    │   │   │   │       └── variables.index
    │   │   │   │   └── saved_model.pb
    │   │   ├── half_plus_two
    │   │   │   └── 00000123
    │   │   │   │   ├── variables
    │   │   │   │       ├── variables.data-00000-of-00001
    │   │   │   │       └── variables.index
    │   │   │   │   └── saved_model.pb
    │   │   └── cifar
    │   │   │   └── 1540855709
    │   │   │       ├── saved_model.pb
    │   │   │       └── variables
    │   │   │           ├── variables.index
    │   │   │           └── variables.data-00000-of-00001
    │   ├── examples
    │   │   ├── test5
    │   │   │   ├── requirements.txt
    │   │   │   ├── lib
    │   │   │   │   └── dummy_module
    │   │   │   │   │   └── __init__.py
    │   │   │   └── inference.py
    │   │   ├── test3
    │   │   │   ├── requirements.txt
    │   │   │   └── inference.py
    │   │   ├── test4
    │   │   │   ├── lib
    │   │   │   │   └── dummy_module
    │   │   │   │   │   └── __init__.py
    │   │   │   └── inference.py
    │   │   ├── test2
    │   │   │   └── inference.py
    │   │   └── test1
    │   │   │   └── inference.py
    │   ├── models
    │   │   └── half_plus_three
    │   │   │   ├── .00000111
    │   │   │       └── .hidden_file
    │   │   │   ├── 00000123
    │   │   │       ├── assets
    │   │   │       │   └── foo.txt
    │   │   │       ├── variables
    │   │   │       │   ├── variables.data-00000-of-00001
    │   │   │       │   └── variables.index
    │   │   │       └── saved_model.pb
    │   │   │   └── 00000124
    │   │   │       ├── assets
    │   │   │           └── foo.txt
    │   │   │       ├── variables
    │   │   │           ├── variables.data-00000-of-00001
    │   │   │           └── variables.index
    │   │   │       └── saved_model.pb
    │   └── mme_universal_script
    │   │   ├── code
    │   │       ├── requirements.txt
    │   │       └── inference.py
    │   │   └── half_plus_three
    │   │       └── model
    │   │           └── half_plus_three
    │   │               ├── 00000123
    │   │                   ├── assets
    │   │                   │   └── foo.txt
    │   │                   ├── variables
    │   │                   │   ├── variables.data-00000-of-00001
    │   │                   │   └── variables.index
    │   │                   └── saved_model.pb
    │   │               └── 00000124
    │   │                   ├── assets
    │   │                       └── foo.txt
    │   │                   ├── variables
    │   │                       ├── variables.data-00000-of-00001
    │   │                       └── variables.index
    │   │                   └── saved_model.pb
    ├── data
    │   ├── tfs-model.tar.gz
    │   ├── python-with-lib.tar.gz
    │   ├── python-with-requirements.tar.gz
    │   └── batch.csv
    ├── perf
    │   ├── delete-endpoint.sh
    │   ├── create-endpoint.sh
    │   ├── create-model.sh
    │   ├── ab.sh
    │   ├── ec2-perftest.sh
    │   ├── perftest_endpoint.py
    │   └── data_generator.py
    ├── conftest.py
    ├── integration
    │   ├── local
    │   │   ├── conftest.py
    │   │   ├── multi_model_endpoint_test_utils.py
    │   │   ├── test_tfs_batching.py
    │   │   ├── test_multi_tfs.py
    │   │   ├── test_nginx_config.py
    │   │   ├── test_pre_post_processing_mme.py
    │   │   ├── test_pre_post_processing.py
    │   │   └── test_multi_model_endpoint.py
    │   └── sagemaker
    │   │   ├── test_ei.py
    │   │   ├── conftest.py
    │   │   └── test_tfs.py
    └── unit
    │   ├── test_proxy_client.py
    │   └── test_deep_learning_container.py
├── .jshintrc
├── .gitignore
├── NOTICE
├── branding
    └── icon
    │   └── sagemaker-banner.png
├── .github
    ├── PULL_REQUEST_TEMPLATE.md
    └── ISSUE_TEMPLATE
    │   ├── config.yml
    │   ├── documentation-request.md
    │   ├── feature_request.md
    │   └── bug_report.md
├── scripts
    ├── stop.sh
    ├── build-all.sh
    ├── publish-all.sh
    ├── start.sh
    ├── publish.sh
    ├── curl.sh
    ├── build.sh
    └── shared.sh
├── CODE_OF_CONDUCT.md
├── tox.ini
├── .pylintrc
└── CONTRIBUTING.md


/docker/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/VERSION:
--------------------------------------------------------------------------------
1 | 1.8.5.dev0
2 | 


--------------------------------------------------------------------------------
/docker/build_artifacts/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/test/resources/inputs/test-generic.json:
--------------------------------------------------------------------------------
1 | [1.0,2.0,5.0]


--------------------------------------------------------------------------------
/test/resources/mme/invalid_version/abcde/dummy.txt:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/.jshintrc:
--------------------------------------------------------------------------------
1 | {
2 |   "asi": true,
3 |   "esversion": 6
4 | }
5 | 


--------------------------------------------------------------------------------
/test/resources/examples/test5/requirements.txt:
--------------------------------------------------------------------------------
1 | Pillow>=6.2.2


--------------------------------------------------------------------------------
/test/resources/models/half_plus_three/.00000111/.hidden_file:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/test/resources/examples/test3/requirements.txt:
--------------------------------------------------------------------------------
1 | Pillow>=6.2.2
2 | 


--------------------------------------------------------------------------------
/test/resources/mme_universal_script/code/requirements.txt:
--------------------------------------------------------------------------------
1 | Pillow>=6.2.2


--------------------------------------------------------------------------------
/test/resources/mme/half_plus_three/00000123/assets/foo.txt:
--------------------------------------------------------------------------------
1 | asset-file-contents


--------------------------------------------------------------------------------
/test/resources/mme/half_plus_three/00000124/assets/foo.txt:
--------------------------------------------------------------------------------
1 | asset-file-contents


--------------------------------------------------------------------------------
/test/resources/examples/test4/lib/dummy_module/__init__.py:
--------------------------------------------------------------------------------
1 | __version__ = '0.1'
2 | 


--------------------------------------------------------------------------------
/test/resources/examples/test5/lib/dummy_module/__init__.py:
--------------------------------------------------------------------------------
1 | __version__ = '0.1'
2 | 


--------------------------------------------------------------------------------
/test/resources/models/half_plus_three/00000123/assets/foo.txt:
--------------------------------------------------------------------------------
1 | asset-file-contents


--------------------------------------------------------------------------------
/test/resources/models/half_plus_three/00000124/assets/foo.txt:
--------------------------------------------------------------------------------
1 | asset-file-contents
2 | 


--------------------------------------------------------------------------------
/docker/build_artifacts/sagemaker/serve:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | python3 /sagemaker/serve.py
4 | 


--------------------------------------------------------------------------------
/test/resources/mme/half_plus_two/00000123/variables/variables.data-00000-of-00001:
--------------------------------------------------------------------------------
1 |    ?   @  @@


--------------------------------------------------------------------------------
/test/resources/mme/half_plus_three/00000123/variables/variables.data-00000-of-00001:
--------------------------------------------------------------------------------
1 |    ?  @@  @@


--------------------------------------------------------------------------------
/test/resources/mme/half_plus_three/00000124/variables/variables.data-00000-of-00001:
--------------------------------------------------------------------------------
1 |    ?  @@  @@


--------------------------------------------------------------------------------
/test/resources/models/half_plus_three/00000123/variables/variables.data-00000-of-00001:
--------------------------------------------------------------------------------
1 |    ?  @@  @@


--------------------------------------------------------------------------------
/test/resources/models/half_plus_three/00000124/variables/variables.data-00000-of-00001:
--------------------------------------------------------------------------------
1 |    ?  @@  @@


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | __pycache__
2 | .tox/
3 | log.txt
4 | .idea/
5 | node_modules/
6 | package.json
7 | package-lock.json
8 | 


--------------------------------------------------------------------------------
/test/resources/mme_universal_script/half_plus_three/model/half_plus_three/00000123/assets/foo.txt:
--------------------------------------------------------------------------------
1 | asset-file-contents


--------------------------------------------------------------------------------
/test/resources/mme_universal_script/half_plus_three/model/half_plus_three/00000124/assets/foo.txt:
--------------------------------------------------------------------------------
1 | asset-file-contents
2 | 


--------------------------------------------------------------------------------
/NOTICE:
--------------------------------------------------------------------------------
1 | Sagemaker TensorFlow Serving Container
2 | Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. 
3 | 


--------------------------------------------------------------------------------
/test/data/tfs-model.tar.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws/sagemaker-tensorflow-serving-container/HEAD/test/data/tfs-model.tar.gz


--------------------------------------------------------------------------------
/test/resources/mme_universal_script/half_plus_three/model/half_plus_three/00000123/variables/variables.data-00000-of-00001:
--------------------------------------------------------------------------------
1 |    ?  @@  @@


--------------------------------------------------------------------------------
/test/resources/mme_universal_script/half_plus_three/model/half_plus_three/00000124/variables/variables.data-00000-of-00001:
--------------------------------------------------------------------------------
1 |    ?  @@  @@


--------------------------------------------------------------------------------
/test/data/python-with-lib.tar.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws/sagemaker-tensorflow-serving-container/HEAD/test/data/python-with-lib.tar.gz


--------------------------------------------------------------------------------
/branding/icon/sagemaker-banner.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws/sagemaker-tensorflow-serving-container/HEAD/branding/icon/sagemaker-banner.png


--------------------------------------------------------------------------------
/test/data/python-with-requirements.tar.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws/sagemaker-tensorflow-serving-container/HEAD/test/data/python-with-requirements.tar.gz


--------------------------------------------------------------------------------
/test/resources/mme/cifar/1540855709/saved_model.pb:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws/sagemaker-tensorflow-serving-container/HEAD/test/resources/mme/cifar/1540855709/saved_model.pb


--------------------------------------------------------------------------------
/test/resources/mme/half_plus_two/00000123/saved_model.pb:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws/sagemaker-tensorflow-serving-container/HEAD/test/resources/mme/half_plus_two/00000123/saved_model.pb


--------------------------------------------------------------------------------
/test/resources/mme/half_plus_three/00000123/saved_model.pb:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws/sagemaker-tensorflow-serving-container/HEAD/test/resources/mme/half_plus_three/00000123/saved_model.pb


--------------------------------------------------------------------------------
/test/resources/mme/half_plus_three/00000124/saved_model.pb:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws/sagemaker-tensorflow-serving-container/HEAD/test/resources/mme/half_plus_three/00000124/saved_model.pb


--------------------------------------------------------------------------------
/test/resources/inputs/test.csv:
--------------------------------------------------------------------------------
 1 | 1.0,2.0,5.0
 2 | 1.0,2.0,5.0
 3 | 1.0,2.0,5.0
 4 | 1.0,2.0,5.0
 5 | 1.0,2.0,5.0
 6 | 1.0,2.0,5.0
 7 | 1.0,2.0,5.0
 8 | 1.0,2.0,5.0
 9 | 1.0,2.0,5.0
10 | 1.0,2.0,5.0
11 | 


--------------------------------------------------------------------------------
/test/resources/mme/cifar/1540855709/variables/variables.index:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws/sagemaker-tensorflow-serving-container/HEAD/test/resources/mme/cifar/1540855709/variables/variables.index


--------------------------------------------------------------------------------
/test/resources/models/half_plus_three/00000123/saved_model.pb:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws/sagemaker-tensorflow-serving-container/HEAD/test/resources/models/half_plus_three/00000123/saved_model.pb


--------------------------------------------------------------------------------
/test/resources/models/half_plus_three/00000124/saved_model.pb:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws/sagemaker-tensorflow-serving-container/HEAD/test/resources/models/half_plus_three/00000124/saved_model.pb


--------------------------------------------------------------------------------
/test/data/batch.csv:
--------------------------------------------------------------------------------
 1 | 1.0, 2.0, 5.0
 2 | 1.0, 2.0, 5.0
 3 | 1.0, 2.0, 5.0
 4 | 1.0, 2.0, 5.0
 5 | 1.0, 2.0, 5.0
 6 | 1.0, 2.0, 5.0
 7 | 1.0, 2.0, 5.0
 8 | 1.0, 2.0, 5.0
 9 | 1.0, 2.0, 5.0
10 | 1.0, 2.0, 5.0
11 | 


--------------------------------------------------------------------------------
/test/resources/mme/half_plus_two/00000123/variables/variables.index:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws/sagemaker-tensorflow-serving-container/HEAD/test/resources/mme/half_plus_two/00000123/variables/variables.index


--------------------------------------------------------------------------------
/test/resources/mme/half_plus_three/00000123/variables/variables.index:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws/sagemaker-tensorflow-serving-container/HEAD/test/resources/mme/half_plus_three/00000123/variables/variables.index


--------------------------------------------------------------------------------
/test/resources/mme/half_plus_three/00000124/variables/variables.index:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws/sagemaker-tensorflow-serving-container/HEAD/test/resources/mme/half_plus_three/00000124/variables/variables.index


--------------------------------------------------------------------------------
/test/resources/models/half_plus_three/00000123/variables/variables.index:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws/sagemaker-tensorflow-serving-container/HEAD/test/resources/models/half_plus_three/00000123/variables/variables.index


--------------------------------------------------------------------------------
/test/resources/models/half_plus_three/00000124/variables/variables.index:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws/sagemaker-tensorflow-serving-container/HEAD/test/resources/models/half_plus_three/00000124/variables/variables.index


--------------------------------------------------------------------------------
/.github/PULL_REQUEST_TEMPLATE.md:
--------------------------------------------------------------------------------
1 | *Issue #, if available:*
2 | 
3 | *Description of changes:*
4 | 
5 | 
6 | By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license.
7 | 


--------------------------------------------------------------------------------
/test/resources/mme/cifar/1540855709/variables/variables.data-00000-of-00001:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws/sagemaker-tensorflow-serving-container/HEAD/test/resources/mme/cifar/1540855709/variables/variables.data-00000-of-00001


--------------------------------------------------------------------------------
/test/perf/delete-endpoint.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | endpoint=${1-'sagemaker-tensorflow-serving-cpu-c5-xlarge'}
4 | aws sagemaker delete-endpoint --endpoint-name $endpoint
5 | aws sagemaker delete-endpoint-config --endpoint-config-name $endpoint


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/config.yml:
--------------------------------------------------------------------------------
1 | blank_issues_enabled: false
2 | contact_links:
3 |   - name: Ask a question
4 |     url: https://stackoverflow.com/questions/tagged/amazon-sagemaker
5 |     about: Use Stack Overflow to ask and answer questions
6 | 


--------------------------------------------------------------------------------
/scripts/stop.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | #
 3 | # Stop a local docker container.
 4 | 
 5 | set -euo pipefail
 6 | 
 7 | source scripts/shared.sh
 8 | 
 9 | parse_std_args "$@"
10 | 
11 | docker kill $(docker ps -q --filter ancestor=$repository:$full_version-$device)
12 | 


--------------------------------------------------------------------------------
/test/resources/mme_universal_script/half_plus_three/model/half_plus_three/00000123/saved_model.pb:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws/sagemaker-tensorflow-serving-container/HEAD/test/resources/mme_universal_script/half_plus_three/model/half_plus_three/00000123/saved_model.pb


--------------------------------------------------------------------------------
/test/resources/mme_universal_script/half_plus_three/model/half_plus_three/00000124/saved_model.pb:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws/sagemaker-tensorflow-serving-container/HEAD/test/resources/mme_universal_script/half_plus_three/model/half_plus_three/00000124/saved_model.pb


--------------------------------------------------------------------------------
/test/resources/mme_universal_script/half_plus_three/model/half_plus_three/00000123/variables/variables.index:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws/sagemaker-tensorflow-serving-container/HEAD/test/resources/mme_universal_script/half_plus_three/model/half_plus_three/00000123/variables/variables.index


--------------------------------------------------------------------------------
/test/resources/mme_universal_script/half_plus_three/model/half_plus_three/00000124/variables/variables.index:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws/sagemaker-tensorflow-serving-container/HEAD/test/resources/mme_universal_script/half_plus_three/model/half_plus_three/00000124/variables/variables.index


--------------------------------------------------------------------------------
/test/resources/inputs/test-gcloud.jsons:
--------------------------------------------------------------------------------
 1 | {"x": [1.0,2.0,5.0]}
 2 | {"x": [1.0,2.0,5.0]}
 3 | {"x": [1.0,2.0,5.0]}
 4 | {"x": [1.0,2.0,5.0]}
 5 | {"x": [1.0,2.0,5.0]}
 6 | {"x": [1.0,2.0,5.0]}
 7 | {"x": [1.0,2.0,5.0]}
 8 | {"x": [1.0,2.0,5.0]}
 9 | {"x": [1.0,2.0,5.0]}
10 | {"x": [1.0,2.0,5.0]}
11 | 


--------------------------------------------------------------------------------
/test/resources/inputs/test.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "instances": [
 3 |     [1.0,2.0,5.0],
 4 |     [1.0,2.0,5.0],
 5 |     [1.0,2.0,5.0],
 6 |     [1.0,2.0,5.0],
 7 |     [1.0,2.0,5.0],
 8 |     [1.0,2.0,5.0],
 9 |     [1.0,2.0,5.0],
10 |     [1.0,2.0,5.0],
11 |     [1.0,2.0,5.0],
12 |     [1.0,2.0,5.0]
13 |   ]
14 | }


--------------------------------------------------------------------------------
/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
1 | ## Code of Conduct
2 | This project has adopted the [Amazon Open Source Code of Conduct](https://aws.github.io/code-of-conduct). 
3 | For more information see the [Code of Conduct FAQ](https://aws.github.io/code-of-conduct-faq) or contact 
4 | opensource-codeofconduct@amazon.com with any additional questions or comments.
5 | 


--------------------------------------------------------------------------------
/scripts/build-all.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | #
 3 | # Build all the docker images.
 4 | 
 5 | set -euo pipefail
 6 | 
 7 | DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
 8 | 
 9 | ${DIR}/build.sh --version 1.14.0 --arch eia
10 | ${DIR}/build.sh --version 1.15.0 --arch cpu
11 | ${DIR}/build.sh --version 1.15.0 --arch gpu
12 | ${DIR}/build.sh --version 2.1.0 --arch cpu
13 | ${DIR}/build.sh --version 2.1.0 --arch gpu
14 | 


--------------------------------------------------------------------------------
/scripts/publish-all.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | #
 3 | # Publish all images to your ECR account.
 4 | 
 5 | set -euo pipefail
 6 | 
 7 | DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
 8 | 
 9 | ${DIR}/publish.sh --version 1.14.0 --arch eia
10 | ${DIR}/publish.sh --version 1.15.0 --arch cpu
11 | ${DIR}/publish.sh --version 1.15.0 --arch gpu
12 | ${DIR}/publish.sh --version 2.1.0 --arch cpu
13 | ${DIR}/publish.sh --version 2.1.0 --arch gpu
14 | 


--------------------------------------------------------------------------------
/test/conftest.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2018-2020 Amazon.com, Inc. or its affiliates. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License"). You
 4 | # may not use this file except in compliance with the License. A copy of
 5 | # the License is located at
 6 | #
 7 | #     http://aws.amazon.com/apache2.0/
 8 | #
 9 | # or in the "license" file accompanying this file. This file is
10 | # distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
11 | # ANY KIND, either express or implied. See the License for the specific
12 | # language governing permissions and limitations under the License.
13 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/documentation-request.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Documentation request
 3 | about: Request improved documentation
 4 | title: ''
 5 | labels: ''
 6 | assignees: ''
 7 | 
 8 | ---
 9 | 
10 | **What did you find confusing? Please describe.**
11 | A clear and concise description of what you found confusing. Ex. I tried to [...] but I didn't understand how to [...]
12 | 
13 | **Describe how documentation can be improved**
14 | A clear and concise description of where documentation was lacking and how it can be improved.
15 | 
16 | **Additional context**
17 | Add any other context or screenshots about the documentation request here.
18 | 


--------------------------------------------------------------------------------
/docker/build_artifacts/sagemaker/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2019-2020 Amazon.com, Inc. or its affiliates. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License"). You
 4 | # may not use this file except in compliance with the License. A copy of
 5 | # the License is located at
 6 | #
 7 | #     http://aws.amazon.com/apache2.0/
 8 | #
 9 | # or in the "license" file accompanying this file. This file is
10 | # distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
11 | # ANY KIND, either express or implied. See the License for the specific
12 | # language governing permissions and limitations under the License.
13 | 


--------------------------------------------------------------------------------
/scripts/start.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | #
 3 | # Start a local docker container.
 4 | 
 5 | set -euo pipefail
 6 | 
 7 | source scripts/shared.sh
 8 | 
 9 | parse_std_args "$@"
10 | 
11 | if [ "$arch" == 'gpu' ]; then
12 |     docker_command='nvidia-docker'
13 | else
14 |     docker_command='docker'
15 | fi
16 | 
17 | 
18 | MODEL_DIR="$(cd "test/resources/models" > /dev/null && pwd)"
19 | $docker_command run \
20 |     -v "$MODEL_DIR":/opt/ml/model:ro \
21 |     -p 8080:8080 \
22 |     -e "SAGEMAKER_TFS_NGINX_LOGLEVEL=error" \
23 |     -e "SAGEMAKER_BIND_TO_PORT=8080" \
24 |     -e "SAGEMAKER_SAFE_PORT_RANGE=9000-9999" \
25 |     $repository:$full_version-$device serve > log.txt 2>&1 &
26 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/feature_request.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Feature request
 3 | about: Suggest new functionality for this toolkit
 4 | title: ''
 5 | labels: ''
 6 | assignees: ''
 7 | 
 8 | ---
 9 | 
10 | **Describe the feature you'd like**
11 | A clear and concise description of the functionality you want.
12 | 
13 | **How would this feature be used? Please describe.**
14 | A clear and concise description of the use case for this feature. Please provide an example, if possible.
15 | 
16 | **Describe alternatives you've considered**
17 | A clear and concise description of any alternative solutions or features you've considered.
18 | 
19 | **Additional context**
20 | Add any other context or screenshots about the feature request here.
21 | 


--------------------------------------------------------------------------------
/test/perf/create-endpoint.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | instance_type="${1:-c5.xlarge}"
 4 | if [[ "$instance_type" == p* ]]; then
 5 |     arch='gpu'
 6 | else
 7 |     arch='cpu'
 8 | fi
 9 | 
10 | endpoint_name=$(echo "sagemaker-tensorflow-serving-$instance_type" | tr . -)
11 | 
12 | aws sagemaker create-endpoint-config \
13 |     --endpoint-config-name $endpoint_name \
14 |     --production-variants '[{
15 |         "VariantName": "variant-name-1",
16 |         "ModelName": "sagemaker-tensorflow-serving-model-'$arch'",
17 |         "InitialInstanceCount": 1,
18 |         "InstanceType": "ml.'$instance_type'"
19 |     }]'
20 | 
21 | aws sagemaker create-endpoint --endpoint-name $endpoint_name --endpoint-config-name $endpoint_name
22 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/bug_report.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Bug report
 3 | about: File a report to help us reproduce and fix the problem
 4 | title: ''
 5 | labels: ''
 6 | assignees: ''
 7 | 
 8 | ---
 9 | 
10 | **Describe the bug**
11 | A clear and concise description of what the bug is.
12 | 
13 | **To reproduce**
14 | A clear, step-by-step set of instructions to reproduce the bug.
15 | 
16 | **Expected behavior**
17 | A clear and concise description of what you expected to happen.
18 | 
19 | **Screenshots or logs**
20 | If applicable, add screenshots or logs to help explain your problem.
21 | 
22 | **System information**
23 | A description of your system. Please provide:
24 | - **Toolkit version**:
25 | - **Framework version**:
26 | - **Python version**:
27 | - **CPU or GPU**:
28 | - **Custom Docker image (Y/N)**:
29 | 
30 | **Additional context**
31 | Add any other context about the problem here.
32 | 


--------------------------------------------------------------------------------
/scripts/publish.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | #
 3 | # Publish images to your ECR account.
 4 | 
 5 | set -euo pipefail
 6 | 
 7 | source scripts/shared.sh
 8 | 
 9 | parse_std_args "$@"
10 | 
11 | aws ecr get-login-password --region ${aws_region} \
12 |     | docker login \
13 |         --password-stdin \
14 |         --username AWS \
15 |         "${aws_account}.dkr.ecr.${aws_region}.amazonaws.com/${repository}"
16 | docker tag $repository:$full_version-$device $aws_account.dkr.ecr.$aws_region.amazonaws.com/$repository:$full_version-$device
17 | docker tag $repository:$full_version-$device $aws_account.dkr.ecr.$aws_region.amazonaws.com/$repository:$short_version-$device
18 | docker push $aws_account.dkr.ecr.$aws_region.amazonaws.com/$repository:$full_version-$device
19 | docker push $aws_account.dkr.ecr.$aws_region.amazonaws.com/$repository:$short_version-$device
20 | docker logout https://$aws_account.dkr.ecr.$aws_region.amazonaws.com
21 | 


--------------------------------------------------------------------------------
/docker/build_artifacts/dockerd-entrypoint.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2019-2020 Amazon.com, Inc. or its affiliates. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License"). You
 4 | # may not use this file except in compliance with the License. A copy of
 5 | # the License is located at
 6 | #
 7 | #     http://aws.amazon.com/apache2.0/
 8 | #
 9 | # or in the "license" file accompanying this file. This file is
10 | # distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
11 | # ANY KIND, either express or implied. See the License for the specific
12 | # language governing permissions and limitations under the License.
13 | 
14 | import os.path
15 | import subprocess
16 | import shlex
17 | import sys
18 | 
19 | if not os.path.exists("/opt/ml/input/config"):
20 |     subprocess.call(["python", "/usr/local/bin/deep_learning_container.py", "&>/dev/null", "&"])
21 | 
22 | subprocess.check_call(shlex.split(" ".join(sys.argv[1:])))
23 | 


--------------------------------------------------------------------------------
/docker/1.11/Dockerfile.eia:
--------------------------------------------------------------------------------
 1 | FROM ubuntu:16.04
 2 | LABEL com.amazonaws.sagemaker.capabilities.accept-bind-to-port=true
 3 | 
 4 | ARG TFS_SHORT_VERSION
 5 | 
 6 | # nginx + njs
 7 | RUN \
 8 |     apt-get update && \
 9 |     apt-get -y install --no-install-recommends curl && \
10 |     curl -s http://nginx.org/keys/nginx_signing.key | apt-key add - && \
11 |     echo 'deb http://nginx.org/packages/ubuntu/ xenial nginx' >> /etc/apt/sources.list && \
12 |     apt-get update && \
13 |     apt-get -y install --no-install-recommends nginx nginx-module-njs python3 python3-pip python3-setuptools && \
14 |     apt-get clean
15 | 
16 | # cython, falcon, gunicorn, tensorflow-serving
17 | RUN \
18 |     pip3 install --no-cache-dir cython falcon gunicorn gevent requests grpcio protobuf && \
19 |     pip3 install --no-dependencies --no-cache-dir tensorflow-serving-api==1.11.1
20 | 
21 | COPY ./ /
22 | 
23 | RUN mv amazonei_tensorflow_model_server /usr/bin/tensorflow_model_server && \
24 |     chmod +x /usr/bin/tensorflow_model_server
25 | 
26 | ENV SAGEMAKER_TFS_VERSION "${TFS_SHORT_VERSION}"
27 | ENV PATH "$PATH:/sagemaker"
28 | 


--------------------------------------------------------------------------------
/docker/1.12/Dockerfile.eia:
--------------------------------------------------------------------------------
 1 | FROM ubuntu:16.04
 2 | LABEL com.amazonaws.sagemaker.capabilities.accept-bind-to-port=true
 3 | 
 4 | ARG TFS_SHORT_VERSION
 5 | 
 6 | # nginx + njs
 7 | RUN \
 8 |     apt-get update && \
 9 |     apt-get -y install --no-install-recommends curl && \
10 |     curl -s http://nginx.org/keys/nginx_signing.key | apt-key add - && \
11 |     echo 'deb http://nginx.org/packages/ubuntu/ xenial nginx' >> /etc/apt/sources.list && \
12 |     apt-get update && \
13 |     apt-get -y install --no-install-recommends nginx nginx-module-njs python3 python3-pip python3-setuptools && \
14 |     apt-get clean
15 | 
16 | # cython, falcon, gunicorn, tensorflow-serving
17 | RUN \
18 |     pip3 install --no-cache-dir cython falcon gunicorn gevent requests grpcio protobuf && \
19 |     pip3 install --no-dependencies --no-cache-dir tensorflow-serving-api==1.12.0
20 | 
21 | COPY ./ /
22 | 
23 | RUN mv amazonei_tensorflow_model_server /usr/bin/tensorflow_model_server && \
24 |     chmod +x /usr/bin/tensorflow_model_server
25 | 
26 | ENV SAGEMAKER_TFS_VERSION "${TFS_SHORT_VERSION}"
27 | ENV PATH "$PATH:/sagemaker"
28 | 


--------------------------------------------------------------------------------
/scripts/curl.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | #
3 | # Some example curl requests to try on local docker containers.
4 | 
5 | curl -X POST --data-binary @test/resources/inputs/test.json -H 'Content-Type: application/json' -H 'X-Amzn-SageMaker-Custom-Attributes: tfs-model-name=half_plus_three' http://localhost:8080/invocations
6 | curl -X POST --data-binary @test/resources/inputs/test-gcloud.jsons -H 'Content-Type: application/json' -H 'X-Amzn-SageMaker-Custom-Attributes: tfs-model-name=half_plus_three' http://localhost:8080/invocations
7 | curl -X POST --data-binary @test/resources/inputs/test-generic.json -H 'Content-Type: application/json' -H 'X-Amzn-SageMaker-Custom-Attributes: tfs-model-name=half_plus_three' http://localhost:8080/invocations
8 | curl -X POST --data-binary @test/resources/inputs/test.csv -H 'Content-Type: text/csv' -H 'X-Amzn-SageMaker-Custom-Attributes: tfs-model-name=half_plus_three' http://localhost:8080/invocations
9 | curl -X POST --data-binary @test/resources/inputs/test-cifar.json -H 'Content-Type: application/json' -H 'X-Amzn-SageMaker-Custom-Attributes: tfs-model-name=cifar' http://localhost:8080/invocations


--------------------------------------------------------------------------------
/docker/1.11/Dockerfile.cpu:
--------------------------------------------------------------------------------
 1 | ARG TFS_VERSION
 2 | 
 3 | FROM tensorflow/serving:${TFS_VERSION} as tfs
 4 | FROM ubuntu:16.04
 5 | LABEL com.amazonaws.sagemaker.capabilities.accept-bind-to-port=true
 6 | 
 7 | COPY --from=tfs /usr/bin/tensorflow_model_server /usr/bin/tensorflow_model_server
 8 | 
 9 | # nginx + njs
10 | RUN \
11 |     apt-get update && \
12 |     apt-get -y install --no-install-recommends curl && \
13 |     curl -s http://nginx.org/keys/nginx_signing.key | apt-key add - && \
14 |     echo 'deb http://nginx.org/packages/ubuntu/ xenial nginx' >> /etc/apt/sources.list && \
15 |     apt-get update && \
16 |     apt-get -y install --no-install-recommends nginx nginx-module-njs python3 python3-pip python3-setuptools && \
17 |     apt-get clean
18 | 
19 | # cython, falcon, gunicorn, tensorflow-serving
20 | RUN \
21 |     pip3 install --no-cache-dir cython falcon gunicorn gevent requests grpcio protobuf && \
22 |     pip3 install --no-dependencies --no-cache-dir tensorflow-serving-api==1.11.1
23 | 
24 | COPY ./ /
25 | 
26 | ARG TFS_SHORT_VERSION
27 | ENV SAGEMAKER_TFS_VERSION "${TFS_SHORT_VERSION}"
28 | ENV PATH "$PATH:/sagemaker"
29 | 


--------------------------------------------------------------------------------
/docker/1.12/Dockerfile.cpu:
--------------------------------------------------------------------------------
 1 | ARG TFS_VERSION
 2 | 
 3 | FROM tensorflow/serving:${TFS_VERSION} as tfs
 4 | FROM ubuntu:16.04
 5 | LABEL com.amazonaws.sagemaker.capabilities.accept-bind-to-port=true
 6 | 
 7 | COPY --from=tfs /usr/bin/tensorflow_model_server /usr/bin/tensorflow_model_server
 8 | 
 9 | # nginx + njs
10 | RUN \
11 |     apt-get update && \
12 |     apt-get -y install --no-install-recommends curl && \
13 |     curl -s http://nginx.org/keys/nginx_signing.key | apt-key add - && \
14 |     echo 'deb http://nginx.org/packages/ubuntu/ xenial nginx' >> /etc/apt/sources.list && \
15 |     apt-get update && \
16 |     apt-get -y install --no-install-recommends nginx nginx-module-njs python3 python3-pip python3-setuptools && \
17 |     apt-get clean
18 | 
19 | # cython, falcon, gunicorn, tensorflow-serving
20 | RUN \
21 |     pip3 install --no-cache-dir cython falcon gunicorn gevent requests grpcio protobuf && \
22 |     pip3 install --no-dependencies --no-cache-dir tensorflow-serving-api==1.12.0
23 | 
24 | COPY ./ /
25 | 
26 | ARG TFS_SHORT_VERSION
27 | ENV SAGEMAKER_TFS_VERSION "${TFS_SHORT_VERSION}"
28 | ENV PATH "$PATH:/sagemaker"
29 | 


--------------------------------------------------------------------------------
/scripts/build.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | #
 3 | # Build the docker images.
 4 | 
 5 | set -euo pipefail
 6 | 
 7 | source scripts/shared.sh
 8 | 
 9 | parse_std_args "$@"
10 | 
11 | get_ei_executable
12 | 
13 | echo "pulling previous image for layer cache... "
14 | aws ecr get-login-password --region ${aws_region} \
15 |     | docker login \
16 |         --password-stdin \
17 |         --username AWS \
18 |         "${aws_account}.dkr.ecr.${aws_region}.amazonaws.com/${repository}" &>/dev/null || echo 'warning: ecr login failed'
19 | docker pull $aws_account.dkr.ecr.$aws_region.amazonaws.com/$repository:$full_version-$device &>/dev/null || echo 'warning: pull failed'
20 | docker logout https://$aws_account.dkr.ecr.$aws_region.amazonaws.com &>/dev/null
21 | 
22 | echo "building image... "
23 | cp -r docker/build_artifacts/* docker/$short_version/
24 | docker build \
25 |     --cache-from $aws_account.dkr.ecr.$aws_region.amazonaws.com/$repository:$full_version-$device \
26 |     --build-arg TFS_VERSION=$full_version \
27 |     --build-arg TFS_SHORT_VERSION=$short_version \
28 |     -f docker/$short_version/Dockerfile.$arch \
29 |     -t $repository:$full_version-$device \
30 |     -t $repository:$short_version-$device \
31 |     docker/$short_version/
32 | 
33 | remove_ei_executable
34 | 


--------------------------------------------------------------------------------
/docker/1.13/Dockerfile.eia:
--------------------------------------------------------------------------------
 1 | FROM ubuntu:16.04
 2 | LABEL com.amazonaws.sagemaker.capabilities.accept-bind-to-port=true
 3 | 
 4 | ARG PIP=pip3
 5 | ARG TFS_SHORT_VERSION
 6 | 
 7 | ENV SAGEMAKER_TFS_VERSION="${TFS_SHORT_VERSION}"
 8 | ENV PATH="$PATH:/sagemaker"
 9 | 
10 | # nginx + njs
11 | RUN apt-get update \
12 |  && apt-get -y install --no-install-recommends curl gnupg2 \
13 |  && curl -s http://nginx.org/keys/nginx_signing.key | apt-key add - \
14 |  && echo 'deb http://nginx.org/packages/ubuntu/ xenial nginx' >> /etc/apt/sources.list \
15 |  && apt-get update \
16 |  && apt-get -y install --no-install-recommends nginx nginx-module-njs python3 python3-pip python3-setuptools \
17 |  && apt-get clean \
18 |  && rm -rf /var/lib/apt/lists/*
19 | 
20 | # cython, falcon, gunicorn, grpc
21 | RUN ${PIP} install --no-cache-dir \
22 |     awscli==1.16.130 \
23 |     cython==0.29.10 \
24 |     falcon==2.0.0 \
25 |     gunicorn==19.9.0 \
26 |     gevent==1.4.0 \
27 |     requests==2.21.0 \
28 |     grpcio==1.24.1 \
29 |     protobuf==3.10.0 \
30 | # using --no-dependencies to avoid installing tensorflow binary
31 |  && ${PIP} install --no-dependencies --no-cache-dir \
32 |     tensorflow-serving-api==1.13.0
33 | 
34 | COPY ./ /
35 | 
36 | RUN mv amazonei_tensorflow_model_server /usr/bin/tensorflow_model_server && \
37 |     chmod +x /usr/bin/tensorflow_model_server
38 | 


--------------------------------------------------------------------------------
/test/perf/create-model.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -e
 4 | 
 5 | arch=${1:-'cpu'}
 6 | aws_region=$(aws configure get region)
 7 | aws_account=$(aws --region $aws_region sts --endpoint-url https://sts.$aws_region.amazonaws.com get-caller-identity --query 'Account' --output text)
 8 | 
 9 | # change this to match SageMaker execution role in your account
10 | sagemaker_role="arn:aws:iam::$aws_account:role/service-role/AmazonSageMaker-ExecutionRole-20180510T114550"
11 | 
12 | tar -C test/resources/models -czf /tmp/sagemaker-tensorflow-serving-model.tar.gz .
13 | aws s3 mb s3://sagemaker-$aws_region-$aws_account || true
14 | aws s3 cp /tmp/sagemaker-tensorflow-serving-model.tar.gz s3://sagemaker-$aws_region-$aws_account/sagemaker-tensorflow-serving/test-models/sagemaker-tensorflow-serving-model.tar.gz
15 | rm /tmp/sagemaker-tensorflow-serving-model.tar.gz
16 | 
17 | 
18 | aws sagemaker create-model \
19 |     --model-name sagemaker-tensorflow-serving-model-$arch \
20 |     --primary-container '{
21 |         "Image": "'$aws_account'.dkr.ecr.'$aws_region'.amazonaws.com/sagemaker-tensorflow-serving:1.11.1-'$arch'",
22 |         "ModelDataUrl": "s3://sagemaker-'$aws_region'-'$aws_account'/sagemaker-tensorflow-serving/test-models/sagemaker-tensorflow-serving-model.tar.gz",
23 |         "Environment": {
24 |             "SAGEMAKER_TFS_DEFAULT_MODEL_NAME": "half_plus_three"
25 |         }
26 |     }' \
27 |     --execution-role-arn "$sagemaker_role"
28 | 


--------------------------------------------------------------------------------
/test/perf/ab.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | ab -k -n 10000 -c 16 -p test/resources/inputs/test.json -T 'application/json' http://localhost:8080/tfs/v1/models/half_plus_three:predict
 4 | ab -k -n 10000 -c 16 -p test/resources/inputs/test.json -T 'application/json' http://localhost:8080/invocations
 5 | ab -k -n 10000 -c 16 -p test/resources/inputs/test.jsons -T 'application/json' http://localhost:8080/invocations
 6 | ab -k -n 10000 -c 16 -p test/resources/inputs/test.csv -T 'text/csv' http://localhost:8080/invocations
 7 | ab -k -n 10000 -c 16 -p test/resources/inputs/test-cifar.json -T 'application/json'     -H 'X-Amzn-SageMaker-Custom-Attributes: tfs-model-name=cifar' http://localhost:8080/invocations
 8 | 
 9 | # Larger payloads are generated and removed when this script exits.
10 | TEMPFILE='/tmp/perftest_data'
11 | trap 'rm -f $TEMPFILE' EXIT
12 | 
13 | echo "Generating data"
14 | # Creates a 10MB file with 10000 columns per line.
15 | python test/perf/data_generator.py -c 'text/csv' -s 10000 -p 10 -u MB > $TEMPFILE || exit $?
16 | ab -k -n 10 -c 1 -p "$TEMPFILE" -T 'text/csv' http://localhost:8080/invocations
17 | 
18 | python test/perf/data_generator.py -c 'application/json' -s 10000 -p 10 -u MB > $TEMPFILE || exit $?
19 | ab -k -n 10 -c 1 -p "$TEMPFILE" -T 'application/json' http://localhost:8080/invocations
20 | 
21 | python test/perf/data_generator.py -c 'application/jsonlines' -s 10000 -p 10 -u MB > $TEMPFILE || exit $?
22 | ab -k -n 10 -c 1 -p "$TEMPFILE" -T 'application/jsonlines' http://localhost:8080/invocations
23 | 


--------------------------------------------------------------------------------
/docker/build_artifacts/sagemaker/nginx.conf.template:
--------------------------------------------------------------------------------
 1 | load_module modules/ngx_http_js_module.so;
 2 | 
 3 | worker_processes auto;
 4 | daemon off;
 5 | pid /tmp/nginx.pid;
 6 | error_log  /dev/stderr %NGINX_LOG_LEVEL%;
 7 | 
 8 | worker_rlimit_nofile 4096;
 9 | 
10 | events {
11 |   worker_connections 2048;
12 | }
13 | 
14 | http {
15 |   include /etc/nginx/mime.types;
16 |   default_type application/json;
17 |   access_log /dev/stdout combined;
18 |   js_import tensorflowServing.js;
19 | 
20 |   proxy_read_timeout %PROXY_READ_TIMEOUT%;  
21 | 
22 |   upstream tfs_upstream {
23 |     %TFS_UPSTREAM%;
24 |   }
25 | 
26 |   upstream gunicorn_upstream {
27 |     server unix:/tmp/gunicorn.sock fail_timeout=1;
28 |   }
29 | 
30 |   server {
31 |     listen %NGINX_HTTP_PORT% deferred;
32 |     client_max_body_size 0;
33 |     client_body_buffer_size 100m;
34 |     subrequest_output_buffer_size 100m;
35 | 
36 |     set $tfs_version %TFS_VERSION%;
37 |     set $default_tfs_model %TFS_DEFAULT_MODEL_NAME%;
38 | 
39 |     location /tfs {
40 |         rewrite ^/tfs/(.*) /$1  break;
41 |         proxy_redirect off;
42 |         proxy_pass_request_headers off;
43 |         proxy_set_header Content-Type 'application/json';
44 |         proxy_set_header Accept 'application/json';
45 |         proxy_pass http://tfs_upstream;
46 |     }
47 | 
48 |     location /ping {
49 |         %FORWARD_PING_REQUESTS%;
50 |     }
51 | 
52 |     location /invocations {
53 |         %FORWARD_INVOCATION_REQUESTS%;
54 |     }
55 | 
56 |     location /models {
57 |         proxy_pass http://gunicorn_upstream/models;
58 |     }
59 | 
60 |     location / {
61 |         return 404 '{"error": "Not Found"}';
62 |     }
63 | 
64 |     keepalive_timeout 3;
65 |   }
66 | }
67 |   


--------------------------------------------------------------------------------
/docker/build_artifacts/sagemaker/multi_model_utils.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2019-2020 Amazon.com, Inc. or its affiliates. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License"). You
 4 | # may not use this file except in compliance with the License. A copy of
 5 | # the License is located at
 6 | #
 7 | #     http://aws.amazon.com/apache2.0/
 8 | #
 9 | # or in the "license" file accompanying this file. This file is
10 | # distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
11 | # ANY KIND, either express or implied. See the License for the specific
12 | # language governing permissions and limitations under the License.
13 | import fcntl
14 | import signal
15 | import time
16 | from contextlib import contextmanager
17 | 
18 | MODEL_CONFIG_FILE = "/sagemaker/model-config.cfg"
19 | DEFAULT_LOCK_FILE = "/sagemaker/lock-file.lock"
20 | 
21 | 
22 | @contextmanager
23 | def lock(path=DEFAULT_LOCK_FILE):
24 |     f = open(path, "w", encoding="utf8")
25 |     fd = f.fileno()
26 |     fcntl.lockf(fd, fcntl.LOCK_EX)
27 | 
28 |     try:
29 |         yield
30 |     finally:
31 |         time.sleep(1)
32 |         fcntl.lockf(fd, fcntl.LOCK_UN)
33 | 
34 | 
35 | @contextmanager
36 | def timeout(seconds=60):
37 |     def _raise_timeout_error(signum, frame):
38 |         raise Exception(408, "Timed out after {} seconds".format(seconds))
39 | 
40 |     try:
41 |         signal.signal(signal.SIGALRM, _raise_timeout_error)
42 |         signal.alarm(seconds)
43 |         yield
44 |     finally:
45 |         signal.alarm(0)
46 | 
47 | 
48 | class MultiModelException(Exception):
49 |     def __init__(self, code, msg):
50 |         Exception.__init__(self, code, msg)
51 |         self.code = code
52 |         self.msg = msg
53 | 


--------------------------------------------------------------------------------
/test/integration/local/conftest.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2019-2020 Amazon.com, Inc. or its affiliates. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License"). You
 4 | # may not use this file except in compliance with the License. A copy of
 5 | # the License is located at
 6 | #
 7 | #     http://aws.amazon.com/apache2.0/
 8 | #
 9 | # or in the "license" file accompanying this file. This file is
10 | # distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
11 | # ANY KIND, either express or implied. See the License for the specific
12 | # language governing permissions and limitations under the License.
13 | 
14 | import pytest
15 | 
16 | FRAMEWORK_LATEST_VERSION = "1.13"
17 | TFS_DOCKER_BASE_NAME = "sagemaker-tensorflow-serving"
18 | 
19 | 
20 | def pytest_addoption(parser):
21 |     parser.addoption("--docker-base-name", default=TFS_DOCKER_BASE_NAME)
22 |     parser.addoption("--framework-version", default=FRAMEWORK_LATEST_VERSION, required=True)
23 |     parser.addoption("--processor", default="cpu", choices=["cpu", "gpu"])
24 |     parser.addoption("--tag")
25 | 
26 | 
27 | @pytest.fixture(scope="module")
28 | def docker_base_name(request):
29 |     return request.config.getoption("--docker-base-name")
30 | 
31 | 
32 | @pytest.fixture(scope="module")
33 | def framework_version(request):
34 |     return request.config.getoption("--framework-version")
35 | 
36 | 
37 | @pytest.fixture(scope="module")
38 | def processor(request):
39 |     return request.config.getoption("--processor")
40 | 
41 | 
42 | @pytest.fixture(scope="module")
43 | def runtime_config(request, processor):
44 |     if processor == "gpu":
45 |         return "--runtime=nvidia "
46 |     else:
47 |         return ""
48 | 
49 | 
50 | @pytest.fixture(scope="module")
51 | def tag(request, framework_version, processor):
52 |     image_tag = request.config.getoption("--tag")
53 |     if not image_tag:
54 |         image_tag = "{}-{}".format(framework_version, processor)
55 |     return image_tag
56 | 
57 | 
58 | @pytest.fixture(autouse=True)
59 | def skip_by_device_type(request, processor):
60 |     is_gpu = processor == "gpu"
61 |     if (request.node.get_closest_marker("skip_gpu") and is_gpu) or \
62 |             (request.node.get_closest_marker("skip_cpu") and not is_gpu):
63 |         pytest.skip("Skipping because running on \"{}\" instance".format(processor))
64 | 


--------------------------------------------------------------------------------
/test/resources/examples/test2/inference.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2018-2020 Amazon.com, Inc. or its affiliates. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License"). You
 4 | # may not use this file except in compliance with the License. A copy of
 5 | # the License is located at
 6 | #
 7 | #     http://aws.amazon.com/apache2.0/
 8 | #
 9 | # or in the "license" file accompanying this file. This file is
10 | # distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
11 | # ANY KIND, either express or implied. See the License for the specific
12 | # language governing permissions and limitations under the License.
13 | 
14 | import json
15 | from collections import namedtuple
16 | 
17 | import requests
18 | 
19 | Context = namedtuple('Context',
20 |                      'model_name, model_version, method, rest_uri, grpc_uri, '
21 |                      'custom_attributes, request_content_type, accept_header')
22 | 
23 | 
24 | def handler(data, context):
25 |     """Handle request.
26 | 
27 |     Args:
28 |         data (obj): the request data
29 |         context (Context): an object containing request and configuration details
30 | 
31 |     Returns:
32 |         (bytes, string): data to return to client, (optional) response content type
33 |     """
34 |     processed_input = _process_input(data, context)
35 |     response = requests.post(context.rest_uri, data=processed_input)
36 |     return _process_output(response, context)
37 | 
38 | 
39 | def _process_input(data, context):
40 |     if context.request_content_type == 'application/json':
41 |         # pass through json (assumes it's correctly formed)
42 |         d = data.read().decode('utf-8')
43 |         return d if len(d) else ''
44 | 
45 |     if context.request_content_type == 'text/csv':
46 |         # very simple csv handler
47 |         return json.dumps({
48 |             'instances': [float(x) for x in data.read().decode('utf-8').split(',')]
49 |         })
50 | 
51 |     raise ValueError('{{"error": "unsupported content type {}"}}'.format(
52 |         context.request_content_type or "unknown"))
53 | 
54 | 
55 | def _process_output(data, context):
56 |     if data.status_code != 200:
57 |         raise ValueError(data.content.decode('utf-8'))
58 | 
59 |     response_content_type = context.accept_header
60 |     prediction = data.content
61 |     return prediction, response_content_type
62 | 


--------------------------------------------------------------------------------
/test/integration/local/multi_model_endpoint_test_utils.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License"). You
 4 | # may not use this file except in compliance with the License. A copy of
 5 | # the License is located at
 6 | #
 7 | #     http://aws.amazon.com/apache2.0/
 8 | #
 9 | # or in the "license" file accompanying this file. This file is
10 | # distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
11 | # ANY KIND, either express or implied. See the License for the specific
12 | # language governing permissions and limitations under the License.
13 | 
14 | import requests
15 | 
16 | INVOCATION_URL = "http://localhost:8080/models/{}/invoke"
17 | MODELS_URL = "http://localhost:8080/models"
18 | DELETE_MODEL_URL = "http://localhost:8080/models/{}"
19 | 
20 | 
21 | def make_headers(content_type="application/json", method="predict", version=None):
22 |     custom_attributes = "tfs-method={}".format(method)
23 |     if version:
24 |         custom_attributes += ",tfs-model-version={}".format(version)
25 | 
26 |     return {
27 |         "Content-Type": content_type,
28 |         "X-Amzn-SageMaker-Custom-Attributes": custom_attributes,
29 |     }
30 | 
31 | 
32 | def make_invocation_request(data, model_name, content_type="application/json", version=None):
33 |     headers = make_headers(content_type=content_type, method="predict", version=version)
34 |     response = requests.post(INVOCATION_URL.format(model_name), data=data, headers=headers)
35 |     return response.status_code, response.content.decode("utf-8")
36 | 
37 | 
38 | def make_list_model_request():
39 |     response = requests.get(MODELS_URL)
40 |     return response.status_code, response.content.decode("utf-8")
41 | 
42 | 
43 | def make_get_model_request(model_name):
44 |     response = requests.get(MODELS_URL + "/{}".format(model_name))
45 |     return response.status_code, response.content.decode("utf-8")
46 | 
47 | 
48 | def make_load_model_request(data, content_type="application/json"):
49 |     headers = {
50 |         "Content-Type": content_type
51 |     }
52 |     response = requests.post(MODELS_URL, data=data, headers=headers)
53 |     return response.status_code, response.content.decode("utf-8")
54 | 
55 | 
56 | def make_unload_model_request(model_name):
57 |     response = requests.delete(DELETE_MODEL_URL.format(model_name))
58 |     return response.status_code, response.content.decode("utf-8")
59 | 


--------------------------------------------------------------------------------
/test/resources/examples/test1/inference.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2018-2020 Amazon.com, Inc. or its affiliates. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License"). You
 4 | # may not use this file except in compliance with the License. A copy of
 5 | # the License is located at
 6 | #
 7 | #     http://aws.amazon.com/apache2.0/
 8 | #
 9 | # or in the "license" file accompanying this file. This file is
10 | # distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
11 | # ANY KIND, either express or implied. See the License for the specific
12 | # language governing permissions and limitations under the License.
13 | 
14 | import json
15 | from collections import namedtuple
16 | 
17 | Context = namedtuple('Context',
18 |                      'model_name, model_version, method, rest_uri, grpc_uri, '
19 |                      'custom_attributes, request_content_type, accept_header')
20 | 
21 | 
22 | def input_handler(data, context):
23 |     """ Pre-process request input before it is sent to TensorFlow Serving REST API
24 | 
25 |     Args:
26 |         data (obj): the request data, in format of dict or string
27 |         context (Context): an object containing request and configuration details
28 | 
29 |     Returns:
30 |         (dict): a JSON-serializable dict that contains request body and headers
31 |     """
32 |     if context.request_content_type == 'application/json':
33 |         # pass through json (assumes it's correctly formed)
34 |         d = data.read().decode('utf-8')
35 |         return d if len(d) else ''
36 | 
37 |     if context.request_content_type == 'text/csv':
38 |         # very simple csv handler
39 |         return json.dumps({
40 |             'instances': [float(x) for x in data.read().decode('utf-8').split(',')]
41 |         })
42 | 
43 |     raise ValueError('{{"error": "unsupported content type {}"}}'.format(
44 |         context.request_content_type or "unknown"))
45 | 
46 | 
47 | def output_handler(data, context):
48 |     """Post-process TensorFlow Serving output before it is returned to the client.
49 | 
50 |     Args:
51 |         data (obj): the TensorFlow serving response
52 |         context (Context): an object containing request and configuration details
53 | 
54 |     Returns:
55 |         (bytes, string): data to return to client, response content type
56 |     """
57 |     if data.status_code != 200:
58 |         raise ValueError(data.content.decode('utf-8'))
59 | 
60 |     response_content_type = context.accept_header
61 |     prediction = data.content
62 |     return prediction, response_content_type
63 | 


--------------------------------------------------------------------------------
/test/resources/mme_universal_script/code/inference.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2018-2020 Amazon.com, Inc. or its affiliates. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License"). You
 4 | # may not use this file except in compliance with the License. A copy of
 5 | # the License is located at
 6 | #
 7 | #     http://aws.amazon.com/apache2.0/
 8 | #
 9 | # or in the "license" file accompanying this file. This file is
10 | # distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
11 | # ANY KIND, either express or implied. See the License for the specific
12 | # language governing permissions and limitations under the License.
13 | 
14 | import json
15 | from collections import namedtuple
16 | 
17 | import PIL
18 | 
19 | Context = namedtuple('Context',
20 |                      'model_name, model_version, method, rest_uri, grpc_uri, '
21 |                      'custom_attributes, request_content_type, accept_header')
22 | 
23 | 
24 | def input_handler(data, context):
25 |     """ Pre-process request input before it is sent to TensorFlow Serving REST API
26 | 
27 |     Args:
28 |         data (obj): the request data, in format of dict or string
29 |         context (Context): an object containing request and configuration details
30 | 
31 |     Returns:
32 |         (dict): a JSON-serializable dict that contains request body and headers
33 |     """
34 |     if context.request_content_type == 'application/json':
35 |         # pass through json (assumes it's correctly formed)
36 |         d = data.read().decode('utf-8')
37 |         return d if len(d) else ''
38 | 
39 |     if context.request_content_type == 'text/csv':
40 |         # very simple csv handler
41 |         return json.dumps({
42 |             'instances': [float(x) for x in data.read().decode('utf-8').split(',')]
43 |         })
44 | 
45 |     raise ValueError('{{"error": "unsupported content type {}"}}'.format(
46 |         context.request_content_type or "unknown"))
47 | 
48 | 
49 | def output_handler(data, context):
50 |     """Post-process TensorFlow Serving output before it is returned to the client.
51 | 
52 |     Args:
53 |         data (obj): the TensorFlow serving response
54 |         context (Context): an object containing request and configuration details
55 | 
56 |     Returns:
57 |         (bytes, string): data to return to client, response content type
58 |     """
59 |     if data.status_code != 200:
60 |         raise ValueError(data.content.decode('utf-8'))
61 | 
62 |     response_content_type = context.accept_header
63 |     prediction = data.content
64 |     return prediction, response_content_type
65 | 


--------------------------------------------------------------------------------
/test/resources/examples/test3/inference.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2018-2020 Amazon.com, Inc. or its affiliates. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License"). You
 4 | # may not use this file except in compliance with the License. A copy of
 5 | # the License is located at
 6 | #
 7 | #     http://aws.amazon.com/apache2.0/
 8 | #
 9 | # or in the "license" file accompanying this file. This file is
10 | # distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
11 | # ANY KIND, either express or implied. See the License for the specific
12 | # language governing permissions and limitations under the License.
13 | 
14 | 
15 | import json
16 | from collections import namedtuple
17 | 
18 | import requests
19 | 
20 | # for testing requirements.txt install and pythonpath
21 | import PIL
22 | from PIL.Image import core as _imaging
23 | 
24 | Context = namedtuple('Context',
25 |                      'model_name, model_version, method, rest_uri, grpc_uri, '
26 |                      'custom_attributes, request_content_type, accept_header')
27 | 
28 | 
29 | def handler(data, context):
30 |     """Handle request.
31 | 
32 |     Args:
33 |         data (obj): the request data
34 |         context (Context): an object containing request and configuration details
35 | 
36 |     Returns:
37 |         (bytes, string): data to return to client, (optional) response content type
38 |     """
39 | 
40 |     # use the imported library
41 |     print('pillow: {}\n{}'.format(PIL.__version__, dir(_imaging)))
42 |     processed_input = _process_input(data, context)
43 |     response = requests.post(context.rest_uri, data=processed_input)
44 |     return _process_output(response, context)
45 | 
46 | 
47 | def _process_input(data, context):
48 |     if context.request_content_type == 'application/json':
49 |         # pass through json (assumes it's correctly formed)
50 |         d = data.read().decode('utf-8')
51 |         return d if len(d) else ''
52 | 
53 |     if context.request_content_type == 'text/csv':
54 |         # very simple csv handler
55 |         return json.dumps({
56 |             'instances': [float(x) for x in data.read().decode('utf-8').split(',')]
57 |         })
58 | 
59 |     raise ValueError('{{"error": "unsupported content type {}"}}'.format(
60 |         context.request_content_type or "unknown"))
61 | 
62 | 
63 | def _process_output(data, context):
64 |     if data.status_code != 200:
65 |         raise ValueError(data.content.decode('utf-8'))
66 | 
67 |     response_content_type = context.accept_header
68 |     prediction = data.content
69 |     return prediction, response_content_type
70 | 


--------------------------------------------------------------------------------
/test/resources/examples/test4/inference.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2018-2020 Amazon.com, Inc. or its affiliates. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License"). You
 4 | # may not use this file except in compliance with the License. A copy of
 5 | # the License is located at
 6 | #
 7 | #     http://aws.amazon.com/apache2.0/
 8 | #
 9 | # or in the "license" file accompanying this file. This file is
10 | # distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
11 | # ANY KIND, either express or implied. See the License for the specific
12 | # language governing permissions and limitations under the License.
13 | 
14 | 
15 | import json
16 | from collections import namedtuple
17 | 
18 | import requests
19 | 
20 | import dummy_module  # for testing requirements.txt install and pythonpath
21 | 
22 | Context = namedtuple('Context',
23 |                      'model_name, model_version, method, rest_uri, grpc_uri, '
24 |                      'custom_attributes, request_content_type, accept_header')
25 | 
26 | 
27 | def handler(data, context):
28 |     """Handle request.
29 | 
30 |     Args:
31 |         data (obj): the request data
32 |         context (Context): an object containing request and configuration details
33 | 
34 |     Returns:
35 |         (bytes, string): data to return to client, (optional) response content type
36 |     """
37 | 
38 |     # use the library in lib/
39 |     print(dummy_module.__version__)
40 | 
41 |     # ensure the requirements.txt wasn't installed
42 |     try:
43 |         import PIL
44 |         raise Exception('pillow should not be installed')
45 |     except ImportError:
46 |         pass
47 | 
48 |     processed_input = _process_input(data, context)
49 |     response = requests.post(context.rest_uri, data=processed_input)
50 |     return _process_output(response, context)
51 | 
52 | 
53 | def _process_input(data, context):
54 |     if context.request_content_type == 'application/json':
55 |         # pass through json (assumes it's correctly formed)
56 |         d = data.read().decode('utf-8')
57 |         return d if len(d) else ''
58 | 
59 |     if context.request_content_type == 'text/csv':
60 |         # very simple csv handler
61 |         return json.dumps({
62 |             'instances': [float(x) for x in data.read().decode('utf-8').split(',')]
63 |         })
64 | 
65 |     raise ValueError('{{"error": "unsupported content type {}"}}'.format(
66 |         context.request_content_type or "unknown"))
67 | 
68 | 
69 | def _process_output(data, context):
70 |     if data.status_code != 200:
71 |         raise ValueError(data.content.decode('utf-8'))
72 | 
73 |     response_content_type = context.accept_header
74 |     prediction = data.content
75 |     return prediction, response_content_type
76 | 


--------------------------------------------------------------------------------
/test/resources/examples/test5/inference.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2018-2020 Amazon.com, Inc. or its affiliates. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License"). You
 4 | # may not use this file except in compliance with the License. A copy of
 5 | # the License is located at
 6 | #
 7 | #     http://aws.amazon.com/apache2.0/
 8 | #
 9 | # or in the "license" file accompanying this file. This file is
10 | # distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
11 | # ANY KIND, either express or implied. See the License for the specific
12 | # language governing permissions and limitations under the License.
13 | 
14 | 
15 | import json
16 | from collections import namedtuple
17 | 
18 | import requests
19 | 
20 | import dummy_module  # for testing requirements.txt install and pythonpath
21 | 
22 | Context = namedtuple('Context',
23 |                      'model_name, model_version, method, rest_uri, grpc_uri, '
24 |                      'custom_attributes, request_content_type, accept_header')
25 | 
26 | 
27 | def handler(data, context):
28 |     """Handle request.
29 | 
30 |     Args:
31 |         data (obj): the request data
32 |         context (Context): an object containing request and configuration details
33 | 
34 |     Returns:
35 |         (bytes, string): data to return to client, (optional) response content type
36 |     """
37 | 
38 |     # use the library in lib/
39 |     print(dummy_module.__version__)
40 | 
41 |     # ensure the requirements.txt wasn't installed
42 |     try:
43 |         import PIL
44 |         raise Exception('pillow should not be installed')
45 |     except ImportError:
46 |         pass
47 | 
48 |     processed_input = _process_input(data, context)
49 |     response = requests.post(context.rest_uri, data=processed_input)
50 |     return _process_output(response, context)
51 | 
52 | 
53 | def _process_input(data, context):
54 |     if context.request_content_type == 'application/json':
55 |         # pass through json (assumes it's correctly formed)
56 |         d = data.read().decode('utf-8')
57 |         return d if len(d) else ''
58 | 
59 |     if context.request_content_type == 'text/csv':
60 |         # very simple csv handler
61 |         return json.dumps({
62 |             'instances': [float(x) for x in data.read().decode('utf-8').split(',')]
63 |         })
64 | 
65 |     raise ValueError('{{"error": "unsupported content type {}"}}'.format(
66 |         context.request_content_type or "unknown"))
67 | 
68 | 
69 | def _process_output(data, context):
70 |     if data.status_code != 200:
71 |         raise ValueError(data.content.decode('utf-8'))
72 | 
73 |     response_content_type = context.accept_header
74 |     prediction = data.content
75 |     return prediction, response_content_type
76 | 


--------------------------------------------------------------------------------
/tox.ini:
--------------------------------------------------------------------------------
  1 | # Tox (http://tox.testrun.org/) is a tool for running tests
  2 | # in multiple virtualenvs. This configuration file will run the
  3 | # test suite on all supported python versions. To use it, "pip install tox"
  4 | # and then run "tox" from this directory.
  5 | 
  6 | [tox]
  7 | skipsdist = True
  8 | skip_missing_interpreters = False
  9 | envlist = black-format,jshint,flake8,pylint,py36,py37
 10 | 
 11 | [flake8]
 12 | max-line-length = 100
 13 | exclude =
 14 |     build/
 15 |     .github/
 16 |     .pytest_cache/
 17 |     .git
 18 |     .tox
 19 |     tests/resources/
 20 |     docker/build_artifacts/sagemaker/tensorflow/
 21 |     docker/build_artifacts/sagemaker/tensorflow-2.1/
 22 |     docker/build_artifacts/sagemaker/tensorflow-2.2/
 23 | 
 24 | max-complexity = 10
 25 | ignore =
 26 |     E203,  # whitespace before ':': Black disagrees with and explicitly violates this.
 27 |     FI10,  
 28 |     FI12,
 29 |     FI13,
 30 |     FI14,
 31 |     FI15,
 32 |     FI16,
 33 |     FI17,
 34 |     FI18,  # __future__ import "annotations" missing -> check only Python 3.7 compatible
 35 |     FI50,
 36 |     FI51,
 37 |     FI52,
 38 |     FI53,
 39 |     FI54,
 40 |     FI55,
 41 |     FI56,
 42 |     FI57,
 43 |     W503  # Ignore line break before binary operator, since Black violates this.
 44 | 
 45 | require-code = True
 46 | 
 47 | [testenv]
 48 | # {posargs} can be passed in by additional arguments specified when invoking tox.
 49 | # Can be used to specify which tests to run, e.g.: tox -- -s
 50 | basepython = python3
 51 | passenv =
 52 |     AWS_ACCESS_KEY_ID
 53 |     AWS_SECRET_ACCESS_KEY
 54 |     AWS_SESSION_TOKEN
 55 |     AWS_CONTAINER_CREDENTIALS_RELATIVE_URI
 56 |     AWS_DEFAULT_REGION
 57 | commands =
 58 |     python -m pytest {posargs}
 59 | 
 60 | deps =
 61 |     pytest
 62 |     pytest-xdist
 63 |     boto3
 64 |     requests
 65 | 
 66 | [testenv:flake8]
 67 | deps =
 68 |     flake8
 69 | commands = flake8 docker/build_artifacts/
 70 | 
 71 | [testenv:pylint]
 72 | deps =
 73 |     pylint
 74 | commands =
 75 |     python -m pylint --rcfile=.pylintrc docker/build_artifacts/
 76 | 
 77 | [testenv:jshint]
 78 | whitelist_externals =
 79 |     jshint
 80 | commands =
 81 |     jshint docker/build_artifacts/
 82 | 
 83 | [testenv:black-format]
 84 | # Used during development (before committing) to format .py files.
 85 | setenv =
 86 |     LC_ALL=C.UTF-8
 87 |     LANG=C.UTF-8
 88 | deps = black
 89 | commands =
 90 |     black -l 100 ./
 91 | 
 92 | [testenv:black-check]
 93 | # Used by automated build steps to check that all files are properly formatted.
 94 | setenv =
 95 |     LC_ALL=C.UTF-8
 96 |     LANG=C.UTF-8
 97 | deps = black
 98 | commands =
 99 |     black -l 100 --check ./
100 | 
101 | [pytest]
102 | markers =
103 |     skip_gpu: skip test if running on gpu instance
104 | 


--------------------------------------------------------------------------------
/docker/1.11/Dockerfile.gpu:
--------------------------------------------------------------------------------
 1 | ARG TFS_VERSION
 2 | 
 3 | FROM tensorflow/serving:${TFS_VERSION}-gpu as tfs
 4 | FROM nvidia/cuda:9.0-base-ubuntu16.04
 5 | LABEL com.amazonaws.sagemaker.capabilities.accept-bind-to-port=true
 6 | 
 7 | COPY --from=tfs /usr/bin/tensorflow_model_server /usr/bin/tensorflow_model_server
 8 | 
 9 | # https://github.com/tensorflow/serving/blob/1.12.0/tensorflow_serving/tools/docker/Dockerfile.gpu
10 | ENV NCCL_VERSION=2.2.13
11 | ENV CUDNN_VERSION=7.2.1.38
12 | ENV TF_TENSORRT_VERSION=4.1.2
13 | 
14 | RUN \
15 |     apt-get update && apt-get install -y --no-install-recommends \
16 |         ca-certificates \
17 |         cuda-command-line-tools-9-0 \
18 |         cuda-command-line-tools-9-0 \
19 |         cuda-cublas-9-0 \
20 |         cuda-cufft-9-0 \
21 |         cuda-curand-9-0 \
22 |         cuda-cusolver-9-0 \
23 |         cuda-cusparse-9-0 \
24 |         libcudnn7=${CUDNN_VERSION}-1+cuda9.0 \
25 |         libnccl2=${NCCL_VERSION}-1+cuda9.0 \
26 |         libgomp1 && \
27 |     apt-get clean && \
28 |     rm -rf /var/lib/apt/lists/*
29 | 
30 | # The 'apt-get install' of nvinfer-runtime-trt-repo-ubuntu1604-4.0.1-ga-cuda9.0
31 | # adds a new list which contains libnvinfer library, so it needs another
32 | # 'apt-get update' to retrieve that list before it can actually install the
33 | # library.
34 | # We don't install libnvinfer-dev since we don't need to build against TensorRT,
35 | # and libnvinfer4 doesn't contain libnvinfer.a static library.
36 | RUN apt-get update && \
37 |     apt-get install --no-install-recommends \
38 |         nvinfer-runtime-trt-repo-ubuntu1604-4.0.1-ga-cuda9.0 && \
39 |     apt-get update && \
40 |     apt-get install --no-install-recommends \
41 |         libnvinfer4=${TF_TENSORRT_VERSION}-1+cuda9.0 && \
42 |     apt-get clean && \
43 |     rm -rf /var/lib/apt/lists/* && \
44 |     rm /usr/lib/x86_64-linux-gnu/libnvinfer_plugin* && \
45 |     rm /usr/lib/x86_64-linux-gnu/libnvcaffe_parser* && \
46 |     rm /usr/lib/x86_64-linux-gnu/libnvparsers*
47 | 
48 | # nginx + njs
49 | RUN \
50 |     apt-get update && \
51 |     apt-get -y install --no-install-recommends curl && \
52 |     curl -s http://nginx.org/keys/nginx_signing.key | apt-key add - && \
53 |     echo 'deb http://nginx.org/packages/ubuntu/ xenial nginx' >> /etc/apt/sources.list && \
54 |     apt-get update && \
55 |     apt-get -y install --no-install-recommends nginx nginx-module-njs python3 python3-pip python3-setuptools && \
56 |     apt-get clean
57 | 
58 | # cython, falcon, gunicorn, tensorflow-serving
59 | RUN \
60 |     pip3 install --no-cache-dir cython falcon gunicorn gevent requests grpcio protobuf && \
61 |     pip3 install --no-dependencies --no-cache-dir tensorflow-serving-api==1.11.1
62 | 
63 | COPY ./ /
64 | 
65 | ARG TFS_SHORT_VERSION
66 | ENV SAGEMAKER_TFS_VERSION "${TFS_SHORT_VERSION}"
67 | ENV PATH "$PATH:/sagemaker"
68 | 


--------------------------------------------------------------------------------
/docker/1.12/Dockerfile.gpu:
--------------------------------------------------------------------------------
 1 | ARG TFS_VERSION
 2 | 
 3 | FROM tensorflow/serving:${TFS_VERSION}-gpu as tfs
 4 | FROM nvidia/cuda:9.0-base-ubuntu16.04
 5 | LABEL com.amazonaws.sagemaker.capabilities.accept-bind-to-port=true
 6 | 
 7 | COPY --from=tfs /usr/bin/tensorflow_model_server /usr/bin/tensorflow_model_server
 8 | 
 9 | # https://github.com/tensorflow/serving/blob/1.12.0/tensorflow_serving/tools/docker/Dockerfile.gpu
10 | ENV NCCL_VERSION=2.2.13
11 | ENV CUDNN_VERSION=7.2.1.38
12 | ENV TF_TENSORRT_VERSION=4.1.2
13 | 
14 | RUN \
15 |     apt-get update && apt-get install -y --no-install-recommends \
16 |         ca-certificates \
17 |         cuda-command-line-tools-9-0 \
18 |         cuda-command-line-tools-9-0 \
19 |         cuda-cublas-9-0 \
20 |         cuda-cufft-9-0 \
21 |         cuda-curand-9-0 \
22 |         cuda-cusolver-9-0 \
23 |         cuda-cusparse-9-0 \
24 |         libcudnn7=${CUDNN_VERSION}-1+cuda9.0 \
25 |         libnccl2=${NCCL_VERSION}-1+cuda9.0 \
26 |         libgomp1 && \
27 |     apt-get clean && \
28 |     rm -rf /var/lib/apt/lists/*
29 | 
30 | # The 'apt-get install' of nvinfer-runtime-trt-repo-ubuntu1604-4.0.1-ga-cuda9.0
31 | # adds a new list which contains libnvinfer library, so it needs another
32 | # 'apt-get update' to retrieve that list before it can actually install the
33 | # library.
34 | # We don't install libnvinfer-dev since we don't need to build against TensorRT,
35 | # and libnvinfer4 doesn't contain libnvinfer.a static library.
36 | RUN apt-get update && \
37 |     apt-get install --no-install-recommends \
38 |         nvinfer-runtime-trt-repo-ubuntu1604-4.0.1-ga-cuda9.0 && \
39 |     apt-get update && \
40 |     apt-get install --no-install-recommends \
41 |         libnvinfer4=${TF_TENSORRT_VERSION}-1+cuda9.0 && \
42 |     apt-get clean && \
43 |     rm -rf /var/lib/apt/lists/* && \
44 |     rm /usr/lib/x86_64-linux-gnu/libnvinfer_plugin* && \
45 |     rm /usr/lib/x86_64-linux-gnu/libnvcaffe_parser* && \
46 |     rm /usr/lib/x86_64-linux-gnu/libnvparsers*
47 | 
48 | # nginx + njs
49 | RUN \
50 |     apt-get update && \
51 |     apt-get -y install --no-install-recommends curl && \
52 |     curl -s http://nginx.org/keys/nginx_signing.key | apt-key add - && \
53 |     echo 'deb http://nginx.org/packages/ubuntu/ xenial nginx' >> /etc/apt/sources.list && \
54 |     apt-get update && \
55 |     apt-get -y install --no-install-recommends nginx nginx-module-njs python3 python3-pip python3-setuptools && \
56 |     apt-get clean
57 | 
58 | # cython, falcon, gunicorn, tensorflow-serving
59 | RUN \
60 |     pip3 install --no-cache-dir cython falcon gunicorn gevent requests grpcio protobuf && \
61 |     pip3 install --no-dependencies --no-cache-dir tensorflow-serving-api==1.12.0
62 | 
63 | COPY ./ /
64 | 
65 | 
66 | ARG TFS_SHORT_VERSION
67 | ENV SAGEMAKER_TFS_VERSION "${TFS_SHORT_VERSION}"
68 | ENV PATH "$PATH:/sagemaker"
69 | 


--------------------------------------------------------------------------------
/.pylintrc:
--------------------------------------------------------------------------------
 1 | [MASTER]
 2 | 
 3 | ignore=
 4 |     tensorflow_serving,
 5 |     tensorflow-2.1,
 6 |     tensorflow-2.2
 7 | 
 8 | [MESSAGES CONTROL]
 9 | 
10 | disable=
11 |     C, # convention
12 |     R, # refactor
13 |     too-many-arguments, # We should fix the offending ones soon.
14 |     too-many-lines, # Some files are too big, we should fix this too
15 |     too-few-public-methods,
16 |     too-many-instance-attributes,
17 |     too-many-locals,
18 |     len-as-condition, # Nice to have in the future
19 |     bad-indentation,
20 |     line-too-long, # We let Flake8 take care of this
21 |     logging-format-interpolation,
22 |     useless-object-inheritance, # We still support python2 so inheriting from object is ok
23 |     invalid-name,
24 |     import-error,
25 |     logging-not-lazy,
26 |     fixme,
27 |     no-self-use,
28 |     attribute-defined-outside-init,
29 |     protected-access,
30 |     invalid-all-object,
31 |     arguments-differ,
32 |     abstract-method,
33 |     signature-differs,
34 |     raise-missing-from
35 | 
36 | [REPORTS]
37 | # Set the output format. Available formats are text, parseable, colorized, msvs
38 | # (visual studio) and html
39 | output-format=colorized
40 | 
41 | # Tells whether to display a full report or only the messages
42 | # CHANGE: No report.
43 | reports=no
44 | 
45 | [FORMAT]
46 | # Maximum number of characters on a single line.
47 | max-line-length=100
48 | # Maximum number of lines in a module
49 | #max-module-lines=1000
50 | # String used as indentation unit. This is usually " " (4 spaces) or "\t" (1 tab).
51 | indent-string='  '
52 | 
53 | [BASIC]
54 | 
55 | # Required attributes for module, separated by a comma
56 | #required-attributes=
57 | # List of builtins function names that should not be used, separated by a comma.
58 | # XXX: Should we ban map() & filter() for list comprehensions?
59 | # exit & quit are for the interactive interpreter shell only.
60 | # https://docs.python.org/3/library/constants.html#constants-added-by-the-site-module
61 | bad-functions=
62 |     apply,
63 |     exit,
64 |     input,
65 |     quit,
66 | 
67 | [SIMILARITIES]
68 | # Minimum lines number of a similarity.
69 | min-similarity-lines=5
70 | # Ignore comments when computing similarities.
71 | ignore-comments=yes
72 | # Ignore docstrings when computing similarities.
73 | ignore-docstrings=yes
74 | 
75 | [VARIABLES]
76 | # Tells whether we should check for unused import in __init__ files.
77 | init-import=no
78 | # A regular expression matching the beginning of the name of dummy variables
79 | # (i.e. not used).
80 | dummy-variables-rgx=_|unused_
81 | 
82 | # List of additional names supposed to be defined in builtins. Remember that
83 | # you should avoid to define new builtins when possible.
84 | #additional-builtins=
85 | 
86 | [LOGGING]
87 | # Apply logging string format checks to calls on these modules.
88 | logging-modules=
89 |     logging
90 | 
91 | [TYPECHECK]
92 | ignored-modules=
93 |     distutils
94 | 


--------------------------------------------------------------------------------
/test/integration/local/test_tfs_batching.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2019-2020 Amazon.com, Inc. or its affiliates. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License"). You
 4 | # may not use this file except in compliance with the License. A copy of
 5 | # the License is located at
 6 | #
 7 | #     http://aws.amazon.com/apache2.0/
 8 | #
 9 | # or in the "license" file accompanying this file. This file is
10 | # distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
11 | # ANY KIND, either express or implied. See the License for the specific
12 | # language governing permissions and limitations under the License.
13 | 
14 | import os
15 | import subprocess
16 | 
17 | import pytest
18 | 
19 | 
20 | @pytest.fixture(scope="session", autouse=True)
21 | def volume():
22 |     try:
23 |         model_dir = os.path.abspath("test/resources/models")
24 |         subprocess.check_call(
25 |             "docker volume create --name batching_model_volume --opt type=none "
26 |             "--opt device={} --opt o=bind".format(model_dir).split())
27 |         yield model_dir
28 |     finally:
29 |         subprocess.check_call("docker volume rm batching_model_volume".split())
30 | 
31 | 
32 | def test_run_tfs_with_batching_parameters(docker_base_name, tag, runtime_config):
33 |     try:
34 |         command = (
35 |             "docker run {}--name sagemaker-tensorflow-serving-test -p 8080:8080"
36 |             " --mount type=volume,source=batching_model_volume,target=/opt/ml/model,readonly"
37 |             " -e SAGEMAKER_TFS_ENABLE_BATCHING=true"
38 |             " -e SAGEMAKER_TFS_MAX_BATCH_SIZE=16"
39 |             " -e SAGEMAKER_TFS_BATCH_TIMEOUT_MICROS=500"
40 |             " -e SAGEMAKER_TFS_NUM_BATCH_THREADS=100"
41 |             " -e SAGEMAKER_TFS_MAX_ENQUEUED_BATCHES=1"
42 |             " -e SAGEMAKER_TFS_NGINX_LOGLEVEL=info"
43 |             " -e SAGEMAKER_BIND_TO_PORT=8080"
44 |             " -e SAGEMAKER_SAFE_PORT_RANGE=9000-9999"
45 |             " {}:{} serve"
46 |         ).format(runtime_config, docker_base_name, tag)
47 | 
48 |         proc = subprocess.Popen(command.split(), stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
49 | 
50 |         lines_seen = {
51 |             "max_batch_size { value: 16 }": 0,
52 |             "batch_timeout_micros { value: 500 }": 0,
53 |             "num_batch_threads { value: 100 }": 0,
54 |             "max_enqueued_batches { value: 1 }": 0
55 |         }
56 | 
57 |         for stdout_line in iter(proc.stdout.readline, ""):
58 |             stdout_line = str(stdout_line)
59 |             for line in lines_seen.keys():
60 |                 if line in stdout_line:
61 |                     lines_seen[line] += 1
62 |             if "Entering the event loop" in stdout_line:
63 |                 for value in lines_seen.values():
64 |                     assert value == 1
65 |                 break
66 | 
67 |     finally:
68 |         subprocess.check_call("docker rm -f sagemaker-tensorflow-serving-test".split())
69 | 


--------------------------------------------------------------------------------
/test/integration/sagemaker/test_ei.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2018-2020 Amazon.com, Inc. or its affiliates. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License"). You
 4 | # may not use this file except in compliance with the License. A copy of
 5 | # the License is located at
 6 | #
 7 | #     http://aws.amazon.com/apache2.0/
 8 | #
 9 | # or in the "license" file accompanying this file. This file is
10 | # distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
11 | # ANY KIND, either express or implied. See the License for the specific
12 | # language governing permissions and limitations under the License.
13 | import os
14 | import random
15 | 
16 | import pytest
17 | 
18 | import util
19 | 
20 | EI_SUPPORTED_REGIONS = ["us-east-1", "us-east-2", "us-west-2",
21 |                         "eu-west-1", "ap-northeast-1", "ap-northeast-2"]
22 | 
23 | 
24 | @pytest.fixture(params=os.environ["TEST_EI_VERSIONS"].split(","))
25 | def version(request):
26 |     return request.param
27 | 
28 | 
29 | @pytest.fixture
30 | def repo(request):
31 |     return request.config.getoption("--repo") or "sagemaker-tensorflow-serving-eia"
32 | 
33 | 
34 | @pytest.fixture
35 | def tag(request, version):
36 |     return request.config.getoption("--tag") or f"{version}-cpu"
37 | 
38 | 
39 | @pytest.fixture
40 | def image_uri(registry, region, repo, tag):
41 |     return util.image_uri(registry, region, repo, tag)
42 | 
43 | 
44 | @pytest.fixture(params=os.environ["TEST_EI_INSTANCE_TYPES"].split(","))
45 | def instance_type(request, region):
46 |     return request.param
47 | 
48 | 
49 | @pytest.fixture(scope="module")
50 | def accelerator_type(request):
51 |     return request.config.getoption("--accelerator-type") or "ml.eia1.medium"
52 | 
53 | 
54 | @pytest.fixture(scope="session")
55 | def model_data(region):
56 |     return ("s3://sagemaker-sample-data-{}/tensorflow/model"
57 |             "/resnet/resnet_50_v2_fp32_NCHW.tar.gz").format(region)
58 | 
59 | 
60 | @pytest.fixture
61 | def input_data():
62 |     return {"instances": [[[[random.random() for _ in range(3)] for _ in range(3)]]]}
63 | 
64 | 
65 | @pytest.fixture
66 | def skip_if_no_accelerator(accelerator_type):
67 |     if accelerator_type is None:
68 |         pytest.skip("Skipping because accelerator type was not provided")
69 | 
70 | 
71 | @pytest.fixture
72 | def skip_if_non_supported_ei_region(region):
73 |     if region not in EI_SUPPORTED_REGIONS:
74 |         pytest.skip("EI is not supported in {}".format(region))
75 | 
76 | 
77 | @pytest.mark.skip_if_non_supported_ei_region()
78 | @pytest.mark.skip_if_no_accelerator()
79 | def test_invoke_endpoint(boto_session, sagemaker_client, sagemaker_runtime_client,
80 |                          model_name, model_data, image_uri, instance_type, accelerator_type,
81 |                          input_data):
82 |     util.create_and_invoke_endpoint(boto_session, sagemaker_client,
83 |                                     sagemaker_runtime_client, model_name, model_data, image_uri,
84 |                                     instance_type, accelerator_type, input_data)
85 | 


--------------------------------------------------------------------------------
/docker/1.14/Dockerfile.cpu:
--------------------------------------------------------------------------------
 1 | FROM ubuntu:18.04
 2 | 
 3 | LABEL maintainer="Amazon AI"
 4 | LABEL com.amazonaws.sagemaker.capabilities.accept-bind-to-port=true
 5 | 
 6 | ARG PYTHON=python3
 7 | ARG PIP=pip3
 8 | ARG TFS_SHORT_VERSION=1.14
 9 | 
10 | # See http://bugs.python.org/issue19846
11 | ENV LANG=C.UTF-8
12 | # Python won’t try to write .pyc or .pyo files on the import of source modules
13 | ENV PYTHONDONTWRITEBYTECODE=1
14 | ENV PYTHONUNBUFFERED=1
15 | ENV SAGEMAKER_TFS_VERSION="${TFS_SHORT_VERSION}"
16 | ENV PATH="$PATH:/sagemaker"
17 | ENV LD_LIBRARY_PATH='/usr/local/lib:$LD_LIBRARY_PATH'
18 | ENV MODEL_BASE_PATH=/models
19 | # The only required piece is the model name in order to differentiate endpoints
20 | ENV MODEL_NAME=model
21 | 
22 | # nginx + njs
23 | RUN apt-get update \
24 |  && apt-get -y install --no-install-recommends curl gnupg2 ca-certificates git wget vim build-essential zlib1g-dev \
25 |  && curl -s http://nginx.org/keys/nginx_signing.key | apt-key add - \
26 |  && echo 'deb http://nginx.org/packages/ubuntu/ bionic nginx' >> /etc/apt/sources.list \
27 |  && apt-get update \
28 |  && apt-get -y install --no-install-recommends nginx nginx-module-njs python3 python3-pip python3-setuptools \
29 |  && apt-get clean \
30 |  && rm -rf /var/lib/apt/lists/*
31 | 
32 | RUN ${PIP} --no-cache-dir install --upgrade pip setuptools
33 | 
34 | # cython, falcon, gunicorn, grpc
35 | RUN ${PIP} install --no-cache-dir \
36 |     awscli==1.16.196 \
37 |     cython==0.29.12 \
38 |     falcon==2.0.0 \
39 |     gunicorn==19.9.0 \
40 |     gevent==1.4.0 \
41 |     requests==2.22.0 \
42 |     grpcio==1.24.1 \
43 |     protobuf==3.10.0 \
44 | # using --no-dependencies to avoid installing tensorflow binary
45 |  && ${PIP} install --no-dependencies --no-cache-dir \
46 |     tensorflow-serving-api==1.14.0
47 | 
48 | COPY ./ /
49 | 
50 | # Some TF tools expect a "python" binary
51 | RUN ln -s $(which ${PYTHON}) /usr/local/bin/python
52 | 
53 | RUN curl https://tensorflow-aws.s3-us-west-2.amazonaws.com/MKL-Libraries/libiomp5.so -o /usr/local/lib/libiomp5.so
54 | RUN curl https://tensorflow-aws.s3-us-west-2.amazonaws.com/MKL-Libraries/libmklml_intel.so -o /usr/local/lib/libmklml_intel.so
55 | 
56 | RUN curl https://tensorflow-aws.s3-us-west-2.amazonaws.com/1.14/Serving/CPU-WITH-MKL/tensorflow_model_server -o tensorflow_model_server && \
57 | chmod 555 tensorflow_model_server && cp tensorflow_model_server /usr/bin/tensorflow_model_server && \
58 | rm -f tensorflow_model_server
59 | 
60 | # Expose ports
61 | # gRPC and REST
62 | EXPOSE 8500 8501
63 | 
64 | # Set where models should be stored in the container
65 | RUN mkdir -p ${MODEL_BASE_PATH}
66 | 
67 | # Create a script that runs the model server so we can use environment variables
68 | # while also passing in arguments from the docker command line
69 | RUN echo '#!/bin/bash \n\n' > /usr/bin/tf_serving_entrypoint.sh \
70 |  && echo '/usr/bin/tensorflow_model_server --port=8500 --rest_api_port=8501 --model_name=${MODEL_NAME} --model_base_path=${MODEL_BASE_PATH}/${MODEL_NAME} "$@"' >> /usr/bin/tf_serving_entrypoint.sh \
71 |  && chmod +x /usr/bin/tf_serving_entrypoint.sh
72 | 
73 | CMD ["/usr/bin/tf_serving_entrypoint.sh"]
74 | 


--------------------------------------------------------------------------------
/docker/1.13/Dockerfile.cpu:
--------------------------------------------------------------------------------
 1 | FROM ubuntu:18.04
 2 | 
 3 | LABEL maintainer="Amazon AI"
 4 | LABEL com.amazonaws.sagemaker.capabilities.accept-bind-to-port=true
 5 | 
 6 | ARG PYTHON=python3
 7 | ARG PIP=pip3
 8 | ARG TFS_SHORT_VERSION=1.13
 9 | 
10 | # See http://bugs.python.org/issue19846
11 | ENV LANG C.UTF-8
12 | # Python won’t try to write .pyc or .pyo files on the import of source modules
13 | ENV PYTHONDONTWRITEBYTECODE=1
14 | ENV PYTHONUNBUFFERED=1
15 | ENV SAGEMAKER_TFS_VERSION="${TFS_SHORT_VERSION}"
16 | ENV PATH="$PATH:/sagemaker"
17 | ENV LD_LIBRARY_PATH='/usr/local/lib:$LD_LIBRARY_PATH'
18 | ENV MODEL_BASE_PATH=/models
19 | # The only required piece is the model name in order to differentiate endpoints
20 | ENV MODEL_NAME=model
21 | 
22 | # nginx + njs
23 | RUN apt-get update \
24 |  && apt-get -y install --no-install-recommends curl gnupg2 ca-certificates git wget vim build-essential zlib1g-dev \
25 |  && curl -s http://nginx.org/keys/nginx_signing.key | apt-key add - \
26 |  && echo 'deb http://nginx.org/packages/ubuntu/ bionic nginx' >> /etc/apt/sources.list \
27 |  && apt-get update \
28 |  && apt-get -y install --no-install-recommends nginx nginx-module-njs python3 python3-pip python3-setuptools \
29 |  && apt-get clean \
30 |  && rm -rf /var/lib/apt/lists/*
31 | 
32 | RUN ${PIP} --no-cache-dir install --upgrade pip setuptools
33 | 
34 | # cython, falcon, gunicorn, grpc
35 | RUN ${PIP} install -U --no-cache-dir \
36 |     awscli==1.16.130 \
37 |     cython==0.29.10 \
38 |     falcon==2.0.0 \
39 |     gunicorn==19.9.0 \
40 |     gevent==1.4.0 \
41 |     requests==2.21.0 \
42 |     grpcio==1.24.1 \
43 |     protobuf==3.10.0 \
44 | # using --no-dependencies to avoid installing tensorflow binary
45 |  && ${PIP} install --no-dependencies --no-cache-dir \
46 |     tensorflow-serving-api==1.13.0
47 | 
48 | COPY ./ /
49 | 
50 | # Some TF tools expect a "python" binary
51 | RUN ln -s $(which ${PYTHON}) /usr/local/bin/python \
52 |  && ln -s /usr/local/bin/pip3 /usr/bin/pip
53 | 
54 | RUN curl https://s3-us-west-2.amazonaws.com/tensorflow-aws/1.13/Serving/CPU-WITH-MKL/libiomp5.so -o /usr/local/lib/libiomp5.so
55 | RUN curl https://s3-us-west-2.amazonaws.com/tensorflow-aws/1.13/Serving/CPU-WITH-MKL/libmklml_intel.so -o /usr/local/lib/libmklml_intel.so
56 | 
57 | RUN curl https://s3-us-west-2.amazonaws.com/tensorflow-aws/1.13/Serving/CPU-WITH-MKL/tensorflow_model_server -o tensorflow_model_server \
58 |  && chmod 555 tensorflow_model_server \
59 |  && cp tensorflow_model_server /usr/bin/tensorflow_model_server \
60 |  && rm -f tensorflow_model_server
61 | 
62 | # Expose ports
63 | # gRPC and REST
64 | EXPOSE 8500 8501
65 | 
66 | # Set where models should be stored in the container
67 | RUN mkdir -p ${MODEL_BASE_PATH}
68 | 
69 | # Create a script that runs the model server so we can use environment variables
70 | # while also passing in arguments from the docker command line
71 | RUN echo '#!/bin/bash \n\n' > /usr/bin/tf_serving_entrypoint.sh \
72 |  && echo '/usr/bin/tensorflow_model_server --port=8500 --rest_api_port=8501 --model_name=${MODEL_NAME} --model_base_path=${MODEL_BASE_PATH}/${MODEL_NAME} "$@"' >> /usr/bin/tf_serving_entrypoint.sh \
73 |  && chmod +x /usr/bin/tf_serving_entrypoint.sh
74 | 
75 | CMD ["/usr/bin/tf_serving_entrypoint.sh"]
76 | 


--------------------------------------------------------------------------------
/scripts/shared.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | #
  3 | # Utility functions for build/test scripts.
  4 | 
  5 | function error() {
  6 |     >&2 echo $1
  7 |     >&2 echo "usage: $0 [--version <major-version>] [--arch (cpu*|gpu|eia)] [--region <aws-region>]"
  8 |     exit 1
  9 | }
 10 | 
 11 | function get_default_region() {
 12 |     if [ -n "${AWS_DEFAULT_REGION:-}" ]; then
 13 |         echo "$AWS_DEFAULT_REGION"
 14 |     else
 15 |         aws configure get region
 16 |     fi
 17 | }
 18 | 
 19 | function get_full_version() {
 20 |     echo $1 | sed 's#^\([0-9][0-9]*\.[0-9][0-9]*\)$#\1.0#'
 21 | }
 22 | 
 23 | function get_short_version() {
 24 |     echo $1 | sed 's#\([0-9][0-9]*\.[0-9][0-9]*\)\.[0-9][0-9]*#\1#'
 25 | }
 26 | 
 27 | function get_aws_account() {
 28 |     aws --region $AWS_DEFAULT_REGION sts --endpoint-url https://sts.$AWS_DEFAULT_REGION.amazonaws.com get-caller-identity --query 'Account' --output text
 29 | }
 30 | 
 31 | function get_ei_executable() {
 32 |     [[ $arch != 'eia' ]] && return
 33 | 
 34 |     if [[ -z $(aws s3 ls 's3://amazonei-tensorflow/tensorflow-serving/v'${short_version}'/ubuntu/latest/') ]]; then
 35 |         echo 'ERROR: cannot find this version in S3 bucket.'
 36 |         exit 1
 37 |     fi
 38 | 
 39 |     tmpdir=$(mktemp -d)
 40 |     tar_file=$(aws s3 ls "s3://amazonei-tensorflow/tensorflow-serving/v${short_version}/ubuntu/latest/" | awk '{print $4}')
 41 |     aws s3 cp "s3://amazonei-tensorflow/tensorflow-serving/v${short_version}/ubuntu/latest/${tar_file}" "$tmpdir/$tar_file"
 42 | 
 43 |     tar -C "$tmpdir" -xf "$tmpdir/$tar_file"
 44 | 
 45 |     find "$tmpdir" -name amazonei_tensorflow_model_server -exec mv {} docker/build_artifacts/ \;
 46 |     rm -rf "$tmpdir"
 47 | }
 48 | 
 49 | function remove_ei_executable() {
 50 |     [[ $arch != 'eia' ]] && return
 51 | 
 52 |     rm docker/build_artifacts/amazonei_tensorflow_model_server
 53 | }
 54 | 
 55 | function get_device_type() {
 56 |     if [[ $1 = 'eia' ]]; then
 57 |         echo 'cpu'
 58 |     else
 59 |         echo $1
 60 |     fi
 61 | }
 62 | 
 63 | function parse_std_args() {
 64 |     # defaults
 65 |     arch='cpu'
 66 |     version='1.13.0'
 67 |     repository='sagemaker-tensorflow-serving'
 68 | 
 69 |     aws_region=$(get_default_region)
 70 |     aws_account=$(get_aws_account)
 71 | 
 72 |     while [[ $# -gt 0 ]]; do
 73 |     key="$1"
 74 | 
 75 |     case $key in
 76 |         -v|--version)
 77 |         version="$2"
 78 |         shift
 79 |         shift
 80 |         ;;
 81 |         -a|--arch)
 82 |         arch="$2"
 83 |         shift
 84 |         shift
 85 |         ;;
 86 |         -r|--region)
 87 |         aws_region="$2"
 88 |         shift
 89 |         shift
 90 |         ;;
 91 |         -p|--repository)
 92 |         repository="$2"
 93 |         shift
 94 |         shift
 95 |         ;;
 96 |         *) # unknown option
 97 |         error "unknown option: $1"
 98 |         shift
 99 |         ;;
100 |     esac
101 |     done
102 | 
103 |     [[ -z "${version// }" ]] && error 'missing version'
104 |     [[ "$arch" =~ ^(cpu|gpu|eia)$ ]] || error "invalid arch: $arch"
105 |     [[ -z "${aws_region// }" ]] && error 'missing aws region'
106 | 
107 |     [[ "$arch" = eia ]] && repository=$repository'-'$arch
108 | 
109 |     full_version=$(get_full_version $version)
110 |     short_version=$(get_short_version $version)
111 |     device=$(get_device_type $arch)
112 | 
113 |     true
114 | }
115 | 


--------------------------------------------------------------------------------
/docker/2.0/Dockerfile.cpu:
--------------------------------------------------------------------------------
 1 | FROM ubuntu:18.04
 2 | 
 3 | LABEL maintainer="Amazon AI"
 4 | LABEL com.amazonaws.sagemaker.capabilities.accept-bind-to-port=true
 5 | 
 6 | ARG PYTHON=python3
 7 | ARG PIP=pip3
 8 | ARG TFS_SHORT_VERSION=2.0.1
 9 | ARG TFS_URL=https://tensorflow-aws.s3-us-west-2.amazonaws.com/${TFS_SHORT_VERSION}/Serving/CPU-WITH-MKL/tensorflow_model_server
10 | 
11 | # See http://bugs.python.org/issue19846
12 | ENV LANG=C.UTF-8
13 | # Python won’t try to write .pyc or .pyo files on the import of source modules
14 | ENV PYTHONDONTWRITEBYTECODE=1
15 | ENV PYTHONUNBUFFERED=1
16 | ENV SAGEMAKER_TFS_VERSION="${TFS_SHORT_VERSION}"
17 | ENV PATH="$PATH:/sagemaker"
18 | ENV LD_LIBRARY_PATH='/usr/local/lib:$LD_LIBRARY_PATH'
19 | ENV MODEL_BASE_PATH=/models
20 | # The only required piece is the model name in order to differentiate endpoints
21 | ENV MODEL_NAME=model
22 | ENV DEBIAN_FRONTEND=noninteractive
23 | 
24 | # nginx + njs
25 | RUN apt-get update \
26 |  && apt-get -y install --no-install-recommends \
27 |     curl \
28 |     gnupg2 \
29 |     ca-certificates \
30 |     git \
31 |     wget \
32 |     vim \
33 |     build-essential \
34 |     zlib1g-dev \
35 |  && curl -s http://nginx.org/keys/nginx_signing.key | apt-key add - \
36 |  && echo 'deb http://nginx.org/packages/ubuntu/ bionic nginx' >> /etc/apt/sources.list \
37 |  && apt-get update \
38 |  && apt-get -y install --no-install-recommends \
39 |     nginx \
40 |     nginx-module-njs \
41 |     python3 \
42 |     python3-pip \
43 |     python3-setuptools \
44 |  && apt-get clean \
45 |  && rm -rf /var/lib/apt/lists/*
46 | 
47 | RUN ${PIP} --no-cache-dir install --upgrade pip setuptools
48 | 
49 | # cython, falcon, gunicorn, grpc
50 | RUN ${PIP} install --no-cache-dir \
51 |     awscli==1.16.303 \
52 |     cython==0.29.14 \
53 |     falcon==2.0.0 \
54 |     gunicorn==20.0.4 \
55 |     gevent==1.4.0 \
56 |     requests==2.22.0 \
57 |     grpcio==1.26.0 \
58 |     protobuf==3.11.1 \
59 | # using --no-dependencies to avoid installing tensorflow binary
60 |  && ${PIP} install --no-dependencies --no-cache-dir \
61 |     tensorflow-serving-api==2.0
62 | 
63 | COPY ./sagemaker /sagemaker
64 | 
65 | # Some TF tools expect a "python" binary
66 | RUN ln -s $(which ${PYTHON}) /usr/local/bin/python
67 | 
68 | RUN curl https://tensorflow-aws.s3-us-west-2.amazonaws.com/MKL-Libraries/libiomp5.so -o /usr/local/lib/libiomp5.so
69 | RUN curl https://tensorflow-aws.s3-us-west-2.amazonaws.com/MKL-Libraries/libmklml_intel.so -o /usr/local/lib/libmklml_intel.so
70 | 
71 | RUN curl $TFS_URL -o /usr/bin/tensorflow_model_server \
72 |  && chmod 555 /usr/bin/tensorflow_model_server
73 | 
74 | # Expose ports
75 | # gRPC and REST
76 | EXPOSE 8500 8501
77 | 
78 | # Set where models should be stored in the container
79 | RUN mkdir -p ${MODEL_BASE_PATH}
80 | 
81 | # Create a script that runs the model server so we can use environment variables
82 | # while also passing in arguments from the docker command line
83 | RUN echo '#!/bin/bash \n\n' > /usr/bin/tf_serving_entrypoint.sh \
84 |  && echo '/usr/bin/tensorflow_model_server --port=8500 --rest_api_port=8501 --model_name=${MODEL_NAME} --model_base_path=${MODEL_BASE_PATH}/${MODEL_NAME} "$@"' >> /usr/bin/tf_serving_entrypoint.sh \
85 |  && chmod +x /usr/bin/tf_serving_entrypoint.sh
86 | 
87 | ADD https://raw.githubusercontent.com/aws/aws-deep-learning-containers-utils/master/deep_learning_container.py /usr/local/bin/deep_learning_container.py
88 | 
89 | RUN chmod +x /usr/local/bin/deep_learning_container.py
90 | 
91 | RUN curl https://aws-dlc-licenses.s3.amazonaws.com/tensorflow-2.0.1/license.txt -o /license.txt
92 | 
93 | CMD ["/usr/bin/tf_serving_entrypoint.sh"]
94 | 


--------------------------------------------------------------------------------
/docker/2.1/Dockerfile.cpu:
--------------------------------------------------------------------------------
 1 | FROM public.ecr.aws/ubuntu/ubuntu:18.04
 2 | 
 3 | LABEL maintainer="Amazon AI"
 4 | LABEL com.amazonaws.sagemaker.capabilities.accept-bind-to-port=true
 5 | LABEL com.amazonaws.sagemaker.capabilities.multi-models=true
 6 | 
 7 | ARG PYTHON=python3
 8 | ARG PIP=pip3
 9 | ARG TFS_SHORT_VERSION=2.1
10 | ARG TFS_URL=https://tensorflow-aws.s3-us-west-2.amazonaws.com/2.1/Serving/CPU-WITH-MKL/tensorflow_model_server
11 | 
12 | # See http://bugs.python.org/issue19846
13 | ENV LANG=C.UTF-8
14 | # Python won’t try to write .pyc or .pyo files on the import of source modules
15 | ENV PYTHONDONTWRITEBYTECODE=1
16 | ENV PYTHONUNBUFFERED=1
17 | ENV SAGEMAKER_TFS_VERSION="${TFS_SHORT_VERSION}"
18 | ENV PATH="$PATH:/sagemaker"
19 | ENV LD_LIBRARY_PATH='/usr/local/lib:$LD_LIBRARY_PATH'
20 | ENV MODEL_BASE_PATH=/models
21 | # The only required piece is the model name in order to differentiate endpoints
22 | ENV MODEL_NAME=model
23 | ENV DEBIAN_FRONTEND=noninteractive
24 | 
25 | # nginx + njs
26 | RUN apt-get update \
27 |  && apt-get -y install --no-install-recommends \
28 |     curl \
29 |     gnupg2 \
30 |     ca-certificates \
31 |     git \
32 |     wget \
33 |     vim \
34 |     build-essential \
35 |     zlib1g-dev \
36 |  && curl -s http://nginx.org/keys/nginx_signing.key | apt-key add - \
37 |  && echo 'deb http://nginx.org/packages/ubuntu/ bionic nginx' >> /etc/apt/sources.list \
38 |  && apt-get update \
39 |  && apt-get -y install --no-install-recommends \
40 |     nginx \
41 |     nginx-module-njs \
42 |     python3 \
43 |     python3-pip \
44 |     python3-setuptools \
45 |  && apt-get clean \
46 |  && rm -rf /var/lib/apt/lists/*
47 | 
48 | RUN ${PIP} --no-cache-dir install --upgrade pip setuptools
49 | 
50 | # cython, falcon, gunicorn, grpc
51 | RUN ${PIP} install --no-cache-dir \
52 |     awscli \
53 |     boto3 \
54 |     cython==0.29.14 \
55 |     falcon==2.0.0 \
56 |     gunicorn==20.0.4 \
57 |     gevent==1.4.0 \
58 |     requests==2.22.0 \
59 |     grpcio==1.27.1 \
60 |     protobuf==3.11.1 \
61 | # using --no-dependencies to avoid installing tensorflow binary
62 |  && ${PIP} install --no-dependencies --no-cache-dir \
63 |     tensorflow-serving-api==2.1.0
64 | 
65 | COPY ./sagemaker /sagemaker
66 | 
67 | # Some TF tools expect a "python" binary
68 | RUN ln -s $(which ${PYTHON}) /usr/local/bin/python
69 | 
70 | RUN curl https://tensorflow-aws.s3-us-west-2.amazonaws.com/MKL-Libraries/libiomp5.so -o /usr/local/lib/libiomp5.so
71 | RUN curl https://tensorflow-aws.s3-us-west-2.amazonaws.com/MKL-Libraries/libmklml_intel.so -o /usr/local/lib/libmklml_intel.so
72 | 
73 | RUN curl $TFS_URL -o /usr/bin/tensorflow_model_server \
74 |  && chmod 555 /usr/bin/tensorflow_model_server
75 | 
76 | # Expose ports
77 | # gRPC and REST
78 | EXPOSE 8500 8501
79 | 
80 | # Set where models should be stored in the container
81 | RUN mkdir -p ${MODEL_BASE_PATH}
82 | 
83 | # Create a script that runs the model server so we can use environment variables
84 | # while also passing in arguments from the docker command line
85 | RUN echo '#!/bin/bash \n\n' > /usr/bin/tf_serving_entrypoint.sh \
86 |  && echo '/usr/bin/tensorflow_model_server --port=8500 --rest_api_port=8501 --model_name=${MODEL_NAME} --model_base_path=${MODEL_BASE_PATH}/${MODEL_NAME} "$@"' >> /usr/bin/tf_serving_entrypoint.sh \
87 |  && chmod +x /usr/bin/tf_serving_entrypoint.sh
88 | 
89 | ADD https://raw.githubusercontent.com/aws/aws-deep-learning-containers-utils/master/deep_learning_container.py /usr/local/bin/deep_learning_container.py
90 | 
91 | RUN chmod +x /usr/local/bin/deep_learning_container.py
92 | 
93 | RUN curl https://aws-dlc-licenses.s3.amazonaws.com/tensorflow-2.1/license.txt -o /license.txt
94 | 
95 | CMD ["/usr/bin/tf_serving_entrypoint.sh"]
96 | 


--------------------------------------------------------------------------------
/test/integration/local/test_multi_tfs.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2019-2020 Amazon.com, Inc. or its affiliates. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License"). You
 4 | # may not use this file except in compliance with the License. A copy of
 5 | # the License is located at
 6 | #
 7 | #     http://aws.amazon.com/apache2.0/
 8 | #
 9 | # or in the "license" file accompanying this file. This file is
10 | # distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
11 | # ANY KIND, either express or implied. See the License for the specific
12 | # language governing permissions and limitations under the License.
13 | 
14 | import json
15 | import os
16 | import subprocess
17 | import sys
18 | import time
19 | 
20 | import pytest
21 | import requests
22 | 
23 | BASE_URL = "http://localhost:8080/invocations"
24 | 
25 | 
26 | @pytest.fixture(scope="session", autouse=True)
27 | def volume():
28 |     try:
29 |         model_dir = os.path.abspath("test/resources/models")
30 |         subprocess.check_call(
31 |             "docker volume create --name multi_tfs_model_volume --opt type=none "
32 |             "--opt device={} --opt o=bind".format(model_dir).split())
33 |         yield model_dir
34 |     finally:
35 |         subprocess.check_call("docker volume rm multi_tfs_model_volume".split())
36 | 
37 | 
38 | @pytest.fixture(scope="module", autouse=True, params=[True, False])
39 | def container(request, docker_base_name, tag, runtime_config):
40 |     try:
41 |         if request.param:
42 |             batching_config = " -e SAGEMAKER_TFS_ENABLE_BATCHING=true"
43 |         else:
44 |             batching_config = ""
45 |         command = (
46 |             "docker run {}--name sagemaker-tensorflow-serving-test -p 8080:8080"
47 |             " --mount type=volume,source=multi_tfs_model_volume,target=/opt/ml/model,readonly"
48 |             " -e SAGEMAKER_TFS_NGINX_LOGLEVEL=info"
49 |             " -e SAGEMAKER_BIND_TO_PORT=8080"
50 |             " -e SAGEMAKER_SAFE_PORT_RANGE=9000-9999"
51 |             " -e SAGEMAKER_TFS_INSTANCE_COUNT=2"
52 |             " -e SAGEMAKER_GUNICORN_WORKERS=4"
53 |             " -e SAGEMAKER_TFS_INTER_OP_PARALLELISM=1"
54 |             " -e SAGEMAKER_TFS_INTRA_OP_PARALLELISM=1"          
55 |             " {}"
56 |             " {}:{} serve"
57 |         ).format(runtime_config, batching_config, docker_base_name, tag)
58 | 
59 |         proc = subprocess.Popen(command.split(), stdout=sys.stdout, stderr=subprocess.STDOUT)
60 | 
61 |         attempts = 0
62 | 
63 |         while attempts < 40:
64 |             time.sleep(3)
65 |             try:
66 |                 res_code = requests.get("http://localhost:8080/ping").status_code
67 |                 if res_code == 200:
68 |                     break
69 |             except:
70 |                 attempts += 1
71 |                 pass
72 | 
73 |         yield proc.pid
74 |     finally:
75 |         subprocess.check_call("docker rm -f sagemaker-tensorflow-serving-test".split())
76 | 
77 | 
78 | def make_request(data, content_type="application/json", method="predict", version=None):
79 |     custom_attributes = "tfs-model-name=half_plus_three,tfs-method={}".format(method)
80 |     if version:
81 |         custom_attributes += ",tfs-model-version={}".format(version)
82 | 
83 |     headers = {
84 |         "Content-Type": content_type,
85 |         "X-Amzn-SageMaker-Custom-Attributes": custom_attributes,
86 |     }
87 |     response = requests.post(BASE_URL, data=data, headers=headers)
88 |     return json.loads(response.content.decode("utf-8"))
89 | 
90 | 
91 | def test_predict():
92 |     x = {
93 |         "instances": [1.0, 2.0, 5.0]
94 |     }
95 | 
96 |     y = make_request(json.dumps(x))
97 |     assert y == {"predictions": [3.5, 4.0, 5.5]}
98 | 


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
 1 | # Contributing Guidelines
 2 | 
 3 | Thank you for your interest in contributing to our project. Whether it's a bug report, new feature, correction, or additional 
 4 | documentation, we greatly value feedback and contributions from our community.
 5 | 
 6 | Please read through this document before submitting any issues or pull requests to ensure we have all the necessary 
 7 | information to effectively respond to your bug report or contribution.
 8 | 
 9 | 
10 | ## Reporting Bugs/Feature Requests
11 | 
12 | We welcome you to use the GitHub issue tracker to report bugs or suggest features.
13 | 
14 | When filing an issue, please check [existing open](https://github.com/aws/sagemaker-tfs-container/issues), or [recently closed](https://github.com/aws/sagemaker-tfs-container/issues?utf8=%E2%9C%93&q=is%3Aissue%20is%3Aclosed%20), issues to make sure somebody else hasn't already 
15 | reported the issue. Please try to include as much information as you can. Details like these are incredibly useful:
16 | 
17 | * A reproducible test case or series of steps
18 | * The version of our code being used
19 | * Any modifications you've made relevant to the bug
20 | * Anything unusual about your environment or deployment
21 | 
22 | 
23 | ## Contributing via Pull Requests
24 | Contributions via pull requests are much appreciated. Before sending us a pull request, please ensure that:
25 | 
26 | 1. You are working against the latest source on the *master* branch.
27 | 2. You check existing open, and recently merged, pull requests to make sure someone else hasn't addressed the problem already.
28 | 3. You open an issue to discuss any significant work - we would hate for your time to be wasted.
29 | 
30 | To send us a pull request, please:
31 | 
32 | 1. Fork the repository.
33 | 2. Modify the source; please focus on the specific change you are contributing. If you also reformat all the code, it will be hard for us to focus on your change.
34 | 3. Ensure local tests pass.
35 | 4. Commit to your fork using clear commit messages.
36 | 5. Send us a pull request, answering any default questions in the pull request interface.
37 | 6. Pay attention to any automated CI failures reported in the pull request, and stay involved in the conversation.
38 | 
39 | GitHub provides additional document on [forking a repository](https://help.github.com/articles/fork-a-repo/) and 
40 | [creating a pull request](https://help.github.com/articles/creating-a-pull-request/).
41 | 
42 | 
43 | ## Finding contributions to work on
44 | Looking at the existing issues is a great way to find something to contribute on. As our projects, by default, use the default GitHub issue labels (enhancement/bug/duplicate/help wanted/invalid/question/wontfix), looking at any ['help wanted'](https://github.com/aws/sagemaker-tfs-container/labels/help%20wanted) issues is a great place to start. 
45 | 
46 | 
47 | ## Code of Conduct
48 | This project has adopted the [Amazon Open Source Code of Conduct](https://aws.github.io/code-of-conduct). 
49 | For more information see the [Code of Conduct FAQ](https://aws.github.io/code-of-conduct-faq) or contact 
50 | opensource-codeofconduct@amazon.com with any additional questions or comments.
51 | 
52 | 
53 | ## Security issue notifications
54 | If you discover a potential security issue in this project we ask that you notify AWS/Amazon Security via our [vulnerability reporting page](http://aws.amazon.com/security/vulnerability-reporting/). Please do **not** create a public github issue.
55 | 
56 | 
57 | ## Licensing
58 | 
59 | See the [LICENSE](https://github.com/aws/sagemaker-tfs-container/blob/master/LICENSE) file for our project's licensing. We will ask you to confirm the licensing of your contribution.
60 | 
61 | We may ask you to sign a [Contributor License Agreement (CLA)](http://en.wikipedia.org/wiki/Contributor_License_Agreement) for larger changes.
62 | 


--------------------------------------------------------------------------------
/docker/build_artifacts/deep_learning_container.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2018-2020 Amazon.com, Inc. or its affiliates. All Rights Reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License"). You
  4 | # may not use this file except in compliance with the License. A copy of
  5 | # the License is located at
  6 | #
  7 | #     http://aws.amazon.com/apache2.0/
  8 | #
  9 | # or in the "license" file accompanying this file. This file is
 10 | # distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
 11 | # ANY KIND, either express or implied. See the License for the specific
 12 | # language governing permissions and limitations under the License.
 13 | import re
 14 | import json
 15 | import logging
 16 | import requests
 17 | 
 18 | 
 19 | def _validate_instance_id(instance_id):
 20 |     """
 21 |     Validate instance ID
 22 |     """
 23 |     instance_id_regex = r"^(i-\S{17})"
 24 |     compiled_regex = re.compile(instance_id_regex)
 25 |     match = compiled_regex.match(instance_id)
 26 | 
 27 |     if not match:
 28 |         return None
 29 | 
 30 |     return match.group(1)
 31 | 
 32 | 
 33 | def _retrieve_instance_id():
 34 |     """
 35 |     Retrieve instance ID from instance metadata service
 36 |     """
 37 |     instance_id = None
 38 |     url = "http://169.254.169.254/latest/meta-data/instance-id"
 39 |     response = requests_helper(url, timeout=0.1)
 40 | 
 41 |     if response is not None:
 42 |         instance_id = _validate_instance_id(response.text)
 43 | 
 44 |     return instance_id
 45 | 
 46 | 
 47 | def _retrieve_instance_region():
 48 |     """
 49 |     Retrieve instance region from instance metadata service
 50 |     """
 51 |     region = None
 52 |     valid_regions = [
 53 |         "ap-northeast-1",
 54 |         "ap-northeast-2",
 55 |         "ap-southeast-1",
 56 |         "ap-southeast-2",
 57 |         "ap-south-1",
 58 |         "ca-central-1",
 59 |         "eu-central-1",
 60 |         "eu-north-1",
 61 |         "eu-west-1",
 62 |         "eu-west-2",
 63 |         "eu-west-3",
 64 |         "sa-east-1",
 65 |         "us-east-1",
 66 |         "us-east-2",
 67 |         "us-west-1",
 68 |         "us-west-2",
 69 |     ]
 70 | 
 71 |     url = "http://169.254.169.254/latest/dynamic/instance-identity/document"
 72 |     response = requests_helper(url, timeout=0.1)
 73 | 
 74 |     if response is not None:
 75 |         response_json = json.loads(response.text)
 76 | 
 77 |         if response_json["region"] in valid_regions:
 78 |             region = response_json["region"]
 79 | 
 80 |     return region
 81 | 
 82 | 
 83 | def query_bucket():
 84 |     """
 85 |     GET request on an empty object from an Amazon S3 bucket
 86 |     """
 87 |     response = None
 88 |     instance_id = _retrieve_instance_id()
 89 |     region = _retrieve_instance_region()
 90 | 
 91 |     if instance_id is not None and region is not None:
 92 |         url = (
 93 |             "https://aws-deep-learning-containers-{0}.s3.{0}.amazonaws.com"
 94 |             "/dlc-containers.txt?x-instance-id={1}".format(region, instance_id)
 95 |         )
 96 |         response = requests_helper(url, timeout=0.2)
 97 | 
 98 |     logging.debug("Query bucket finished: {}".format(response))
 99 | 
100 |     return response
101 | 
102 | 
103 | def requests_helper(url, timeout):
104 |     response = None
105 |     try:
106 |         response = requests.get(url, timeout=timeout)
107 |     except requests.exceptions.RequestException as e:
108 |         logging.error("Request exception: {}".format(e))
109 | 
110 |     return response
111 | 
112 | 
113 | def main():
114 |     """
115 |     Invoke bucket query
116 |     """
117 |     # Logs are not necessary for normal run. Remove this line while debugging.
118 |     logging.getLogger().disabled = True
119 | 
120 |     logging.basicConfig(level=logging.ERROR)
121 |     query_bucket()
122 | 
123 | 
124 | if __name__ == "__main__":
125 |     main()
126 | 


--------------------------------------------------------------------------------
/docker/1.15/Dockerfile.cpu:
--------------------------------------------------------------------------------
  1 | FROM public.ecr.aws/ubuntu/ubuntu:18.04
  2 | 
  3 | LABEL maintainer="Amazon AI"
  4 | # Specify LABEL for inference pipelines to use SAGEMAKER_BIND_TO_PORT
  5 | # https://docs.aws.amazon.com/sagemaker/latest/dg/inference-pipeline-real-time.html
  6 | LABEL com.amazonaws.sagemaker.capabilities.accept-bind-to-port=true
  7 | LABEL com.amazonaws.sagemaker.capabilities.multi-models=true
  8 | 
  9 | # Add arguments to achieve the version, python and url
 10 | ARG PYTHON=python3
 11 | ARG PIP=pip3
 12 | ARG TFS_SHORT_VERSION=1.15.2
 13 | ARG TF_S3_URL=https://tensorflow-aws.s3-us-west-2.amazonaws.com
 14 | ARG TF_MODEL_SERVER_SOURCE=${TF_S3_URL}/${TFS_SHORT_VERSION}/Serving/CPU-WITH-MKL/tensorflow_model_server
 15 | 
 16 | # See http://bugs.python.org/issue19846
 17 | ENV LANG=C.UTF-8
 18 | # Python won’t try to write .pyc or .pyo files on the import of source modules
 19 | ENV PYTHONDONTWRITEBYTECODE=1
 20 | ENV PYTHONUNBUFFERED=1
 21 | ENV SAGEMAKER_TFS_VERSION="${TFS_SHORT_VERSION}"
 22 | ENV PATH="$PATH:/sagemaker"
 23 | ENV LD_LIBRARY_PATH='/usr/local/lib:$LD_LIBRARY_PATH'
 24 | ENV MODEL_BASE_PATH=/models
 25 | # The only required piece is the model name in order to differentiate endpoints
 26 | ENV MODEL_NAME=model
 27 | # To prevent user interaction when installing time zone data package
 28 | ENV DEBIAN_FRONTEND=noninteractive
 29 | 
 30 | # nginx + njs
 31 | RUN apt-get update \
 32 |  && apt-get -y install --no-install-recommends \
 33 |     curl \
 34 |     gnupg2 \
 35 |     ca-certificates \
 36 |     git \
 37 |     wget \
 38 |     vim \
 39 |  && curl -s http://nginx.org/keys/nginx_signing.key | apt-key add - \
 40 |  && echo 'deb http://nginx.org/packages/ubuntu/ bionic nginx' >> /etc/apt/sources.list \
 41 |  && apt-get update \
 42 |  && apt-get -y install --no-install-recommends \
 43 |     nginx \
 44 |     nginx-module-njs \
 45 |     python3 \
 46 |     python3-pip \
 47 |     python3-setuptools \
 48 |  && apt-get clean \
 49 |  && rm -rf /var/lib/apt/lists/*
 50 | 
 51 | RUN ${PIP} --no-cache-dir install --upgrade \
 52 |     pip \
 53 |     setuptools
 54 | 
 55 | # cython, falcon, gunicorn, grpc
 56 | RUN ${PIP} install --no-cache-dir \
 57 |     awscli \
 58 |     boto3 \
 59 |     pyYAML==5.3.1 \
 60 |     cython==0.29.12 \
 61 |     falcon==2.0.0 \
 62 |     gunicorn==19.9.0 \
 63 |     gevent==1.4.0 \
 64 |     requests==2.22.0 \
 65 |     grpcio==1.24.1 \
 66 |     protobuf==3.10.0 \
 67 | # using --no-dependencies to avoid installing tensorflow binary
 68 |  && ${PIP} install --no-dependencies --no-cache-dir \
 69 |     tensorflow-serving-api==1.15.0
 70 | 
 71 | COPY sagemaker /sagemaker
 72 | 
 73 | WORKDIR /
 74 | 
 75 | # Some TF tools expect a "python" binary
 76 | RUN ln -s $(which ${PYTHON}) /usr/local/bin/python \
 77 |  && ln -s /usr/local/bin/pip3 /usr/bin/pip
 78 | 
 79 | RUN curl ${TF_S3_URL}/MKL-Libraries/libiomp5.so -o /usr/local/lib/libiomp5.so \
 80 |  && curl ${TF_S3_URL}/MKL-Libraries/libmklml_intel.so -o /usr/local/lib/libmklml_intel.so
 81 | 
 82 | RUN curl ${TF_MODEL_SERVER_SOURCE} -o /usr/bin/tensorflow_model_server \
 83 |  && chmod 555 /usr/bin/tensorflow_model_server
 84 | 
 85 | # Expose ports
 86 | # gRPC and REST
 87 | EXPOSE 8500 8501
 88 | 
 89 | # Set where models should be stored in the container
 90 | RUN mkdir -p ${MODEL_BASE_PATH}
 91 | 
 92 | # Create a script that runs the model server so we can use environment variables
 93 | # while also passing in arguments from the docker command line
 94 | RUN echo '#!/bin/bash \n\n' > /usr/bin/tf_serving_entrypoint.sh \
 95 |  && echo '/usr/bin/tensorflow_model_server --port=8500 --rest_api_port=8501 --model_name=${MODEL_NAME} --model_base_path=${MODEL_BASE_PATH}/${MODEL_NAME} "$@"' >> /usr/bin/tf_serving_entrypoint.sh \
 96 |  && chmod +x /usr/bin/tf_serving_entrypoint.sh
 97 | 
 98 | ADD https://raw.githubusercontent.com/aws/aws-deep-learning-containers-utils/master/deep_learning_container.py /usr/local/bin/deep_learning_container.py
 99 | 
100 | RUN chmod +x /usr/local/bin/deep_learning_container.py
101 | 
102 | RUN curl https://aws-dlc-licenses.s3.amazonaws.com/tensorflow/license.txt -o /license.txt
103 | 
104 | CMD ["/usr/bin/tf_serving_entrypoint.sh"]
105 | 


--------------------------------------------------------------------------------
/test/perf/ec2-perftest.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | for i in $(seq 1 5); do python perftest_endpoint.py --count 5000 --warmup 100 --workers 4 --model sm-c5xl >> sm-perftest.log; done
 4 | for i in $(seq 1 5); do python perftest_endpoint.py --count 5000 --warmup 100 --workers 8 --model sm-c5xl >> sm-perftest.log; done
 5 | for i in $(seq 1 5); do python perftest_endpoint.py --count 5000 --warmup 100 --workers 16 --model sm-c5xl >> sm-perftest.log; done
 6 | for i in $(seq 1 5); do python perftest_endpoint.py --count 5000 --warmup 100 --workers 32 --model sm-c5xl >> sm-perftest.log; done
 7 | for i in $(seq 1 5); do python perftest_endpoint.py --count 5000 --warmup 100 --workers 4 --model sm-c518xl >> sm-perftest.log; done
 8 | for i in $(seq 1 5); do python perftest_endpoint.py --count 5000 --warmup 100 --workers 8 --model sm-c518xl >> sm-perftest.log; done
 9 | for i in $(seq 1 5); do python perftest_endpoint.py --count 5000 --warmup 100 --workers 16 --model sm-c518xl >> sm-perftest.log; done
10 | for i in $(seq 1 5); do python perftest_endpoint.py --count 5000 --warmup 100 --workers 32 --model sm-c518xl >> sm-perftest.log; done
11 | for i in $(seq 1 5); do python perftest_endpoint.py --count 5000 --warmup 100 --workers 64 --model sm-c518xl >> sm-perftest.log; done
12 | 
13 | for i in $(seq 1 5); do python perftest_endpoint.py --count 5000 --warmup 100 --workers 4 --model tfs-c5xl >> tfs-perftest.log; done
14 | for i in $(seq 1 5); do python perftest_endpoint.py --count 5000 --warmup 100 --workers 8 --model tfs-c5xl >> tfs-perftest.log; done
15 | for i in $(seq 1 5); do python perftest_endpoint.py --count 5000 --warmup 100 --workers 16 --model tfs-c5xl >> tfs-perftest.log; done
16 | for i in $(seq 1 5); do python perftest_endpoint.py --count 5000 --warmup 100 --workers 32 --model tfs-c5xl >> tfs-perftest.log; done
17 | for i in $(seq 1 5); do python perftest_endpoint.py --count 5000 --warmup 100 --workers 4 --model tfs-c518xl >> tfs-perftest.log; done
18 | for i in $(seq 1 5); do python perftest_endpoint.py --count 5000 --warmup 100 --workers 8 --model tfs-c518xl >> tfs-perftest.log; done
19 | for i in $(seq 1 5); do python perftest_endpoint.py --count 5000 --warmup 100 --workers 16 --model tfs-c518xl >> tfs-perftest.log; done
20 | for i in $(seq 1 5); do python perftest_endpoint.py --count 5000 --warmup 100 --workers 32 --model tfs-c518xl >> tfs-perftest.log; done
21 | for i in $(seq 1 5); do python perftest_endpoint.py --count 5000 --warmup 100 --workers 64 --model tfs-c518xl >> tfs-perftest.log; done
22 | for i in $(seq 1 5); do python perftest_endpoint.py --count 5000 --warmup 100 --workers 128 --model tfs-c518xl >> tfs-perftest.log; done
23 | 
24 | 
25 | for i in $(seq 1 5); do python test/perf/perftest_endpoint.py --count 5000 --warmup 100 --workers 4 --model tfs-p2xl >> tfs-perftest.log; done
26 | for i in $(seq 1 5); do python test/perf/perftest_endpoint.py --count 5000 --warmup 100 --workers 8 --model tfs-p2xl >> tfs-perftest.log; done
27 | for i in $(seq 1 5); do python test/perf/perftest_endpoint.py --count 5000 --warmup 100 --workers 16 --model tfs-p2xl >> tfs-perftest.log; done
28 | for i in $(seq 1 5); do python test/perf/perftest_endpoint.py --count 5000 --warmup 100 --workers 32 --model tfs-p2xl >> tfs-perftest.log; done
29 | for i in $(seq 1 5); do python test/perf/perftest_endpoint.py --count 5000 --warmup 100 --workers 64 --model tfs-p2xl >> tfs-perftest.log; done
30 | for i in $(seq 1 5); do python test/perf/perftest_endpoint.py --count 5000 --warmup 100 --workers 4 --model tfs-p316xl >> tfs-perftest.log; done
31 | for i in $(seq 1 5); do python test/perf/perftest_endpoint.py --count 5000 --warmup 100 --workers 8 --model tfs-p316xl >> tfs-perftest.log; done
32 | for i in $(seq 1 5); do python test/perf/perftest_endpoint.py --count 5000 --warmup 100 --workers 16 --model tfs-p316xl >> tfs-perftest.log; done
33 | for i in $(seq 1 5); do python test/perf/perftest_endpoint.py --count 5000 --warmup 100 --workers 32 --model tfs-p316xl >> tfs-perftest.log; done
34 | for i in $(seq 1 5); do python test/perf/perftest_endpoint.py --count 5000 --warmup 100 --workers 64 --model tfs-p316xl >> tfs-perftest.log; done
35 | for i in $(seq 1 5); do python test/perf/perftest_endpoint.py --count 5000 --warmup 100 --workers 128 --model tfs-p316xl >> tfs-perftest.log; done
36 | 


--------------------------------------------------------------------------------
/docker/1.14/Dockerfile.eia:
--------------------------------------------------------------------------------
  1 | FROM public.ecr.aws/e2s1w5p1/ubuntu:16.04
  2 | LABEL com.amazonaws.sagemaker.capabilities.accept-bind-to-port=true
  3 | 
  4 | ARG TFS_SHORT_VERSION=1.14
  5 | ARG S3_TF_VERSION=1-14-0
  6 | ARG S3_TF_EI_VERSION=1-4
  7 | ARG PYTHON=python3
  8 | ARG PYTHON_VERSION=3.6.6
  9 | ARG HEALTH_CHECK_VERSION=1.5.3
 10 | 
 11 | # See http://bugs.python.org/issue19846
 12 | ENV LANG=C.UTF-8
 13 | ENV PYTHONDONTWRITEBYTECODE=1
 14 | ENV PYTHONUNBUFFERED=1
 15 | ENV MODEL_BASE_PATH=/models
 16 | ENV MODEL_NAME=model
 17 | ENV SAGEMAKER_TFS_VERSION="${TFS_SHORT_VERSION}"
 18 | ENV PATH="$PATH:/sagemaker"
 19 | 
 20 | # nginx + njs
 21 | RUN apt-get update \
 22 |  && apt-get -y install --no-install-recommends \
 23 |     build-essential \
 24 |     ca-certificates \
 25 |     curl \
 26 |     git \
 27 |     gnupg2 \
 28 |     vim \
 29 |     wget \
 30 |     zlib1g-dev \
 31 |  && curl -s http://nginx.org/keys/nginx_signing.key | apt-key add - \
 32 |  && echo 'deb http://nginx.org/packages/ubuntu/ xenial nginx' >> /etc/apt/sources.list \
 33 |  && apt-get update \
 34 |  && apt-get -y install --no-install-recommends nginx wget nginx-module-njs \
 35 |  && apt-get clean \
 36 |  && rm -rf /var/lib/apt/lists/*
 37 | 
 38 | RUN wget https://www.python.org/ftp/python/$PYTHON_VERSION/Python-$PYTHON_VERSION.tgz \
 39 |  && tar -xvf Python-$PYTHON_VERSION.tgz \
 40 |  && cd Python-$PYTHON_VERSION \
 41 |  && ./configure \
 42 |  && make \
 43 |  && make install \
 44 |  && apt-get update \
 45 |  && apt-get install -y --no-install-recommends \
 46 |     libbz2-dev \
 47 |     libc6-dev \
 48 |     libgdbm-dev \
 49 |     libncursesw5-dev \
 50 |     libreadline-gplv2-dev \
 51 |     libsqlite3-dev \
 52 |     libssl-dev \
 53 |     tk-dev \
 54 |  && rm -rf /var/lib/apt/lists/* \
 55 |  && make \
 56 |  && make install \
 57 |  && rm -rf ../Python-$PYTHON_VERSION* \
 58 |  && ln -s /usr/local/bin/pip3 /usr/bin/pip \
 59 |  && ln -s $(which ${PYTHON}) /usr/local/bin/python
 60 | 
 61 | # Some TF tools expect a "python" binary
 62 | RUN pip install -U --no-cache-dir --upgrade \
 63 |     pip \
 64 |     setuptools
 65 | 
 66 | # cython, falcon, gunicorn, grpc
 67 | RUN pip install --no-cache-dir \
 68 |     cython==0.29.13 \
 69 |     falcon==2.0.0 \
 70 |     gunicorn==19.9.0 \
 71 |     gevent==1.4.0 \
 72 |     requests==2.22.0 \
 73 |     docutils==0.14 \
 74 |     awscli==1.16.196 \
 75 |     grpcio==1.24.1 \
 76 |     protobuf==3.10.0 \
 77 | # using --no-dependencies to avoid installing tensorflow binary
 78 |  && pip install --no-dependencies --no-cache-dir \
 79 |     tensorflow-serving-api==1.14.0
 80 | 
 81 | COPY sagemaker /sagemaker
 82 | 
 83 | RUN wget https://amazonei-tools.s3.amazonaws.com/v${HEALTH_CHECK_VERSION}/ei_tools_${HEALTH_CHECK_VERSION}.tar.gz -O /opt/ei_tools_${HEALTH_CHECK_VERSION}.tar.gz \
 84 |  && tar -xvf /opt/ei_tools_${HEALTH_CHECK_VERSION}.tar.gz -C /opt/ \
 85 |  && rm -rf /opt/ei_tools_${HEALTH_CHECK_VERSION}.tar.gz \
 86 |  && chmod a+x /opt/ei_tools/bin/health_check \
 87 |  && mkdir -p /opt/ei_health_check/bin \
 88 |  && ln -s /opt/ei_tools/bin/health_check /opt/ei_health_check/bin/health_check \
 89 |  && ln -s /opt/ei_tools/lib /opt/ei_health_check/lib
 90 | 
 91 | # Expose ports
 92 | EXPOSE 8500 8501
 93 | 
 94 | RUN wget https://amazonei-tensorflow.s3.amazonaws.com/tensorflow-serving/v1.14/ubuntu/archive/tensorflow-serving-${S3_TF_VERSION}-ubuntu-ei-${S3_TF_EI_VERSION}.tar.gz \
 95 |             -O /tmp/tensorflow-serving-${S3_TF_VERSION}-ubuntu-ei-${S3_TF_EI_VERSION}.tar.gz \
 96 |  && cd /tmp \
 97 |  && tar zxf tensorflow-serving-${S3_TF_VERSION}-ubuntu-ei-${S3_TF_EI_VERSION}.tar.gz \
 98 |  && mv tensorflow-serving-${S3_TF_VERSION}-ubuntu-ei-${S3_TF_EI_VERSION}/amazonei_tensorflow_model_server /usr/bin/tensorflow_model_server \
 99 |  && chmod +x /usr/bin/tensorflow_model_server \
100 |  && rm -rf tensorflow-serving-${S3_TF_VERSION}*
101 | 
102 | # Set where models should be stored in the container
103 | RUN mkdir -p ${MODEL_BASE_PATH}
104 | 
105 | RUN echo '#!/bin/bash \n\n' > /usr/bin/tf_serving_entrypoint.sh \
106 |  && echo '/usr/bin/tensorflow_model_server --port=8500 --rest_api_port=8501 --model_name=${MODEL_NAME} --model_base_path=${MODEL_BASE_PATH}/${MODEL_NAME} "$@"' >> /usr/bin/tf_serving_entrypoint.sh \
107 |  && chmod +x /usr/bin/tf_serving_entrypoint.sh
108 | 
109 | CMD ["/usr/bin/tf_serving_entrypoint.sh"]
110 | 


--------------------------------------------------------------------------------
/docker/2.0/Dockerfile.eia:
--------------------------------------------------------------------------------
  1 | FROM ubuntu:18.04
  2 | 
  3 | LABEL maintainer="Amazon AI"
  4 | # Specify LABEL for inference pipelines to use SAGEMAKER_BIND_TO_PORT
  5 | # https://docs.aws.amazon.com/sagemaker/latest/dg/inference-pipeline-real-time.html
  6 | LABEL com.amazonaws.sagemaker.capabilities.accept-bind-to-port=true
  7 | 
  8 | # Add arguments to achieve the version, python and url
  9 | ARG PYTHON=python3
 10 | ARG PIP=pip3
 11 | ARG HEALTH_CHECK_VERSION=1.6.3
 12 | ARG S3_TF_EI_VERSION=1-5
 13 | ARG S3_TF_VERSION=2-0-0
 14 | 
 15 | 
 16 | # See http://bugs.python.org/issue19846
 17 | ENV LANG=C.UTF-8
 18 | # Python won’t try to write .pyc or .pyo files on the import of source modules
 19 | ENV PYTHONDONTWRITEBYTECODE=1
 20 | ENV PYTHONUNBUFFERED=1
 21 | ENV SAGEMAKER_TFS_VERSION="${S3_TF_VERSION}"
 22 | ENV PATH="$PATH:/sagemaker"
 23 | ENV LD_LIBRARY_PATH='/usr/local/lib:$LD_LIBRARY_PATH'
 24 | ENV MODEL_BASE_PATH=/models
 25 | # The only required piece is the model name in order to differentiate endpoints
 26 | ENV MODEL_NAME=model
 27 | # To prevent user interaction when installing time zone data package
 28 | ENV DEBIAN_FRONTEND=noninteractive
 29 | 
 30 | # nginx + njs
 31 | RUN apt-get update \
 32 |  && apt-get -y install --no-install-recommends \
 33 |     curl \
 34 |     gnupg2 \
 35 |     ca-certificates \
 36 |     git \
 37 |     wget \
 38 |     vim \
 39 |  && curl -s http://nginx.org/keys/nginx_signing.key | apt-key add - \
 40 |  && echo 'deb http://nginx.org/packages/ubuntu/ bionic nginx' >> /etc/apt/sources.list \
 41 |  && apt-get update \
 42 |  && apt-get -y install --no-install-recommends \
 43 |     nginx \
 44 |     nginx-module-njs \
 45 |     python3 \
 46 |     python3-pip \
 47 |     python3-setuptools \
 48 |  && apt-get clean \
 49 |  && rm -rf /var/lib/apt/lists/*
 50 | 
 51 | RUN ${PIP} --no-cache-dir install --upgrade \
 52 |     pip \
 53 |     setuptools
 54 | 
 55 | # cython, falcon, gunicorn, grpc
 56 | RUN ${PIP} install --no-cache-dir \
 57 |     awscli==1.18.32 \
 58 |     cython==0.29.16 \
 59 |     falcon==2.0.0 \
 60 |     gunicorn==20.0.4 \
 61 |     gevent==1.4.0 \
 62 |     requests==2.23.0 \
 63 |     grpcio==1.27.2 \
 64 |     protobuf==3.11.3 \
 65 | # using --no-dependencies to avoid installing tensorflow binary
 66 |  && ${PIP} install --no-dependencies --no-cache-dir \
 67 |     tensorflow-serving-api==2.0.0
 68 | 
 69 | COPY sagemaker /sagemaker
 70 | 
 71 | # Some TF tools expect a "python" binary
 72 | RUN ln -s $(which ${PYTHON}) /usr/local/bin/python \
 73 |  && ln -s /usr/local/bin/pip3 /usr/bin/pip
 74 | 
 75 | # Get EI tools
 76 | RUN wget https://amazonei-tools.s3.amazonaws.com/v${HEALTH_CHECK_VERSION}/ei_tools_${HEALTH_CHECK_VERSION}.tar.gz -O /opt/ei_tools_${HEALTH_CHECK_VERSION}.tar.gz \
 77 |  && tar -xvf /opt/ei_tools_${HEALTH_CHECK_VERSION}.tar.gz -C /opt/ \
 78 |  && rm -rf /opt/ei_tools_${HEALTH_CHECK_VERSION}.tar.gz \
 79 |  && chmod a+x /opt/ei_tools/bin/health_check \
 80 |  && mkdir -p /opt/ei_health_check/bin \
 81 |  && ln -s /opt/ei_tools/bin/health_check /opt/ei_health_check/bin/health_check \
 82 |  && ln -s /opt/ei_tools/lib /opt/ei_health_check/lib
 83 | 
 84 | RUN wget https://amazonei-tensorflow.s3.amazonaws.com/tensorflow-serving/v2.0/archive/tensorflow-serving-${S3_TF_VERSION}-ei-${S3_TF_EI_VERSION}.tar.gz \
 85 |  -O /tmp/tensorflow-serving-${S3_TF_VERSION}-ei-${S3_TF_EI_VERSION}.tar.gz \
 86 |  && cd /tmp \
 87 |  && tar zxf tensorflow-serving-${S3_TF_VERSION}-ei-${S3_TF_EI_VERSION}.tar.gz \
 88 |  && mv tensorflow-serving-${S3_TF_VERSION}-ei-${S3_TF_EI_VERSION}/amazonei_tensorflow_model_server /usr/bin/tensorflow_model_server \
 89 |  && chmod +x /usr/bin/tensorflow_model_server \
 90 |  && rm -rf tensorflow-serving-${S3_TF_VERSION}*
 91 | 
 92 | 
 93 | # Expose ports
 94 | # gRPC and REST
 95 | EXPOSE 8500 8501
 96 | 
 97 | # Set where models should be stored in the container
 98 | RUN mkdir -p ${MODEL_BASE_PATH}
 99 | 
100 | # Create a script that runs the model server so we can use environment variables
101 | # while also passing in arguments from the docker command line
102 | RUN echo '#!/bin/bash \n\n' > /usr/bin/tf_serving_entrypoint.sh \
103 |  && echo '/usr/bin/tensorflow_model_server --port=8500 --rest_api_port=8501 --model_name=${MODEL_NAME} --model_base_path=${MODEL_BASE_PATH}/${MODEL_NAME} "$@"' >> /usr/bin/tf_serving_entrypoint.sh \
104 |  && chmod +x /usr/bin/tf_serving_entrypoint.sh
105 | 
106 | RUN curl https://aws-dlc-licenses.s3.amazonaws.com/tensorflow-2.0/license.txt -o /license.txt
107 | 
108 | CMD ["/usr/bin/tf_serving_entrypoint.sh"]
109 | 


--------------------------------------------------------------------------------
/test/unit/test_proxy_client.py:
--------------------------------------------------------------------------------
  1 | import unittest.mock as mock
  2 | import pytest
  3 | from tensorflow_serving.config import model_server_config_pb2
  4 | 
  5 | from container.sagemaker import proxy_client
  6 | 
  7 | 
  8 | @pytest.fixture(autouse=True)
  9 | def create_sagemaker_folder(tmpdir):
 10 |     tmpdir.join('sagemaker').ensure(dir=True)
 11 | 
 12 |     proxy_client.MODEL_CONFIG_FILE = str(tmpdir) + proxy_client.MODEL_CONFIG_FILE
 13 |     proxy_client.DEFAULT_LOCK_FILE = str(tmpdir) + proxy_client.DEFAULT_LOCK_FILE
 14 | 
 15 | 
 16 | def test_grpc_add_model_no_config_file():
 17 |     client = proxy_client.GRPCProxyClient(port='9090')
 18 | 
 19 |     with pytest.raises(FileNotFoundError) as e:
 20 |         assert client.add_model('my-model', '/opt/ml/model_path')
 21 |     assert 'No such file or directory' in str(e.value)
 22 | 
 23 | 
 24 | @mock.patch('tensorflow_serving.apis.model_management_pb2.ReloadConfigRequest')
 25 | @mock.patch('grpc.insecure_channel')
 26 | def test_grpc_add_model_call(channel, ReloadConfigRequest):
 27 |     config = 'model_config_list: {\n}\n'
 28 |     with open(proxy_client.MODEL_CONFIG_FILE, 'w') as f:
 29 |         f.write(config)
 30 | 
 31 |     client = proxy_client.GRPCProxyClient(port='9090')
 32 |     client.add_model('my-model', '/opt/ml/model_path')
 33 | 
 34 |     calls = [mock.call('0.0.0.0:9090'),
 35 |              mock.call().unary_unary('/tensorflow.serving.ModelService/GetModelStatus',
 36 |                                      request_serializer=mock.ANY, response_deserializer=mock.ANY),
 37 |              mock.call().unary_unary('/tensorflow.serving.ModelService/HandleReloadConfigRequest',
 38 |                                      request_serializer=mock.ANY, response_deserializer=mock.ANY),
 39 |              mock.call().unary_unary()(ReloadConfigRequest())
 40 |              ]
 41 | 
 42 |     channel.assert_has_calls(calls)
 43 | 
 44 |     config_list = model_server_config_pb2.ModelConfigList()
 45 |     new_model_config = config_list.config.add()
 46 |     new_model_config.name = 'my-model'
 47 |     new_model_config.base_path = '/opt/ml/model_path'
 48 |     new_model_config.model_platform = 'tensorflow'
 49 | 
 50 |     model_server_config = model_server_config_pb2.ModelServerConfig()
 51 |     model_server_config.model_config_list.MergeFrom(config_list)
 52 | 
 53 |     ReloadConfigRequest().config.CopyFrom.assert_called_with(model_server_config)
 54 | 
 55 |     expected = 'model_config_list: {\n'
 56 |     expected += '  config: {\n'
 57 |     expected += '    name: "my-model",\n'
 58 |     expected += '    base_path: "/opt/ml/model_path",\n'
 59 |     expected += '    model_platform: "tensorflow"\n'
 60 |     expected += '  }\n'
 61 |     expected += '}\n'
 62 | 
 63 |     with open(proxy_client.MODEL_CONFIG_FILE, 'r') as file:
 64 |         assert file.read() == expected
 65 | 
 66 | 
 67 | @mock.patch('tensorflow_serving.apis.model_management_pb2.ReloadConfigRequest')
 68 | @mock.patch('grpc.insecure_channel')
 69 | def test_grpc_delete_model_call(channel, ReloadConfigRequest):
 70 |     config = 'model_config_list: {\n'
 71 |     config += '  config: {\n'
 72 |     config += '    name: "my-model",\n'
 73 |     config += '    base_path: "/opt/ml/model_path",\n'
 74 |     config += '    model_platform: "tensorflow"\n'
 75 |     config += '  }\n'
 76 |     config += '}\n'
 77 |     with open(proxy_client.MODEL_CONFIG_FILE, 'w') as f:
 78 |         f.write(config)
 79 | 
 80 |     client = proxy_client.GRPCProxyClient(port='9090')
 81 |     client.delete_model('my-model', '/opt/ml/model_path')
 82 | 
 83 |     calls = [mock.call('0.0.0.0:9090'),
 84 |              mock.call().unary_unary('/tensorflow.serving.ModelService/GetModelStatus',
 85 |                                      request_serializer=mock.ANY, response_deserializer=mock.ANY),
 86 |              mock.call().unary_unary('/tensorflow.serving.ModelService/HandleReloadConfigRequest',
 87 |                                      request_serializer=mock.ANY, response_deserializer=mock.ANY),
 88 |              mock.call().unary_unary()(ReloadConfigRequest())
 89 |              ]
 90 | 
 91 |     channel.assert_has_calls(calls)
 92 | 
 93 |     config_list = model_server_config_pb2.ModelConfigList()
 94 |     model_server_config = model_server_config_pb2.ModelServerConfig()
 95 |     model_server_config.model_config_list.MergeFrom(config_list)
 96 | 
 97 |     ReloadConfigRequest().config.CopyFrom.assert_called_with(model_server_config)
 98 | 
 99 |     expected = 'model_config_list: {\n'
100 |     expected += '}\n'
101 | 
102 |     with open(proxy_client.MODEL_CONFIG_FILE, 'r') as file:
103 |         assert file.read() == expected
104 | 


--------------------------------------------------------------------------------
/docker/1.15/Dockerfile.eia:
--------------------------------------------------------------------------------
  1 | FROM ubuntu:18.04
  2 | 
  3 | LABEL maintainer="Amazon AI"
  4 | # Specify LABEL for inference pipelines to use SAGEMAKER_BIND_TO_PORT
  5 | # https://docs.aws.amazon.com/sagemaker/latest/dg/inference-pipeline-real-time.html
  6 | LABEL com.amazonaws.sagemaker.capabilities.accept-bind-to-port=true
  7 | 
  8 | # Add arguments to achieve the version, python and url
  9 | ARG PYTHON=python3
 10 | ARG PIP=pip3
 11 | ARG HEALTH_CHECK_VERSION=1.6.3
 12 | ARG S3_TF_EI_VERSION=1-5
 13 | ARG S3_TF_VERSION=1-15-2
 14 | #This is the serving version not TF version
 15 | ARG TFS_SHORT_VERSION=1-15-0
 16 | 
 17 | 
 18 | # See http://bugs.python.org/issue19846
 19 | ENV LANG=C.UTF-8
 20 | # Python won’t try to write .pyc or .pyo files on the import of source modules
 21 | ENV PYTHONDONTWRITEBYTECODE=1
 22 | ENV PYTHONUNBUFFERED=1
 23 | ENV SAGEMAKER_TFS_VERSION="${TFS_SHORT_VERSION}"
 24 | ENV PATH="$PATH:/sagemaker"
 25 | ENV LD_LIBRARY_PATH='/usr/local/lib:$LD_LIBRARY_PATH'
 26 | ENV MODEL_BASE_PATH=/models
 27 | # The only required piece is the model name in order to differentiate endpoints
 28 | ENV MODEL_NAME=model
 29 | # To prevent user interaction when installing time zone data package
 30 | ENV DEBIAN_FRONTEND=noninteractive
 31 | 
 32 | # nginx + njs
 33 | RUN apt-get update \
 34 |  && apt-get -y install --no-install-recommends \
 35 |     curl \
 36 |     gnupg2 \
 37 |     ca-certificates \
 38 |     git \
 39 |     wget \
 40 |     vim \
 41 |  && curl -s http://nginx.org/keys/nginx_signing.key | apt-key add - \
 42 |  && echo 'deb http://nginx.org/packages/ubuntu/ bionic nginx' >> /etc/apt/sources.list \
 43 |  && apt-get update \
 44 |  && apt-get -y install --no-install-recommends \
 45 |     nginx \
 46 |     nginx-module-njs \
 47 |     python3 \
 48 |     python3-pip \
 49 |     python3-setuptools \
 50 |  && apt-get clean \
 51 |  && rm -rf /var/lib/apt/lists/*
 52 | 
 53 | RUN ${PIP} --no-cache-dir install --upgrade \
 54 |     pip \
 55 |     setuptools
 56 | 
 57 | # cython, falcon, gunicorn, grpc
 58 | RUN ${PIP} install --no-cache-dir \
 59 |     awscli==1.18.32 \
 60 |     cython==0.29.16 \
 61 |     falcon==2.0.0 \
 62 |     gunicorn==20.0.4 \
 63 |     gevent==1.4.0 \
 64 |     requests==2.23.0 \
 65 |     grpcio==1.27.2 \
 66 |     protobuf==3.11.3 \
 67 | # using --no-dependencies to avoid installing tensorflow binary
 68 |  && ${PIP} install --no-dependencies --no-cache-dir \
 69 |     tensorflow-serving-api==1.15.0
 70 | 
 71 | COPY sagemaker /sagemaker
 72 | 
 73 | # Some TF tools expect a "python" binary
 74 | RUN ln -s $(which ${PYTHON}) /usr/local/bin/python \
 75 |  && ln -s /usr/local/bin/pip3 /usr/bin/pip
 76 | 
 77 | # Get EI tools
 78 | RUN wget https://amazonei-tools.s3.amazonaws.com/v${HEALTH_CHECK_VERSION}/ei_tools_${HEALTH_CHECK_VERSION}.tar.gz -O /opt/ei_tools_${HEALTH_CHECK_VERSION}.tar.gz \
 79 |  && tar -xvf /opt/ei_tools_${HEALTH_CHECK_VERSION}.tar.gz -C /opt/ \
 80 |  && rm -rf /opt/ei_tools_${HEALTH_CHECK_VERSION}.tar.gz \
 81 |  && chmod a+x /opt/ei_tools/bin/health_check \
 82 |  && mkdir -p /opt/ei_health_check/bin \
 83 |  && ln -s /opt/ei_tools/bin/health_check /opt/ei_health_check/bin/health_check \
 84 |  && ln -s /opt/ei_tools/lib /opt/ei_health_check/lib
 85 | 
 86 | RUN wget https://amazonei-tensorflow.s3.amazonaws.com/tensorflow-serving/v1.15/ubuntu/archive/tensorflow-serving-${S3_TF_VERSION}-ubuntu-ei-${S3_TF_EI_VERSION}.tar.gz \
 87 |  -O /tmp/tensorflow-serving-${S3_TF_VERSION}-ubuntu-ei-${S3_TF_EI_VERSION}.tar.gz \
 88 |  && cd /tmp \
 89 |  && tar zxf tensorflow-serving-${S3_TF_VERSION}-ubuntu-ei-${S3_TF_EI_VERSION}.tar.gz \
 90 |  && mv tensorflow-serving-${S3_TF_VERSION}-ubuntu-ei-${S3_TF_EI_VERSION}/amazonei_tensorflow_model_server /usr/bin/tensorflow_model_server \
 91 |  && chmod +x /usr/bin/tensorflow_model_server \
 92 |  && rm -rf tensorflow-serving-${S3_TF_VERSION}*
 93 | 
 94 | 
 95 | # Expose ports
 96 | # gRPC and REST
 97 | EXPOSE 8500 8501
 98 | 
 99 | # Set where models should be stored in the container
100 | RUN mkdir -p ${MODEL_BASE_PATH}
101 | 
102 | # Create a script that runs the model server so we can use environment variables
103 | # while also passing in arguments from the docker command line
104 | RUN echo '#!/bin/bash \n\n' > /usr/bin/tf_serving_entrypoint.sh \
105 |  && echo '/usr/bin/tensorflow_model_server --port=8500 --rest_api_port=8501 --model_name=${MODEL_NAME} --model_base_path=${MODEL_BASE_PATH}/${MODEL_NAME} "$@"' >> /usr/bin/tf_serving_entrypoint.sh \
106 |  && chmod +x /usr/bin/tf_serving_entrypoint.sh
107 | 
108 | RUN curl https://aws-dlc-licenses.s3.amazonaws.com/tensorflow/license.txt -o /license.txt
109 | 
110 | CMD ["/usr/bin/tf_serving_entrypoint.sh"]
111 | 


--------------------------------------------------------------------------------
/test/integration/sagemaker/conftest.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2018-2020 Amazon.com, Inc. or its affiliates. All Rights Reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License"). You
  4 | # may not use this file except in compliance with the License. A copy of
  5 | # the License is located at
  6 | #
  7 | #     http://aws.amazon.com/apache2.0/
  8 | #
  9 | # or in the "license" file accompanying this file. This file is
 10 | # distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
 11 | # ANY KIND, either express or implied. See the License for the specific
 12 | # language governing permissions and limitations under the License.
 13 | import os
 14 | import random
 15 | import time
 16 | 
 17 | import boto3
 18 | import pytest
 19 | 
 20 | # these regions have some p2 and p3 instances, but not enough for automated testing
 21 | NO_P2_REGIONS = [
 22 |     "ca-central-1",
 23 |     "eu-central-1",
 24 |     "eu-west-2",
 25 |     "us-west-1",
 26 |     "eu-west-3",
 27 |     "eu-north-1",
 28 |     "sa-east-1",
 29 |     "ap-east-1",
 30 |     "me-south-1"
 31 | ]
 32 | NO_P3_REGIONS = [
 33 |     "ap-southeast-1",
 34 |     "ap-southeast-2",
 35 |     "ap-south-1",
 36 |     "a-central-1",
 37 |     "eu-central-1",
 38 |     "eu-west-2",
 39 |     "us-west-1",
 40 |     "eu-west-3",
 41 |     "eu-north-1",
 42 |     "sa-east-1",
 43 |     "ap-east-1",
 44 |     "me-south-1"
 45 | ]
 46 | 
 47 | 
 48 | def pytest_addoption(parser):
 49 |     parser.addoption("--region", default="us-west-2")
 50 |     parser.addoption("--registry")
 51 |     parser.addoption("--repo")
 52 |     parser.addoption("--versions")
 53 |     parser.addoption("--instance-types")
 54 |     parser.addoption("--accelerator-type")
 55 |     parser.addoption("--tag")
 56 | 
 57 | 
 58 | def pytest_configure(config):
 59 |     os.environ["TEST_REGION"] = config.getoption("--region")
 60 |     os.environ["TEST_VERSIONS"] = config.getoption("--versions") or "1.11.1,1.12.0,1.13.0"
 61 |     os.environ["TEST_INSTANCE_TYPES"] = (config.getoption("--instance-types") or
 62 |                                          "ml.m5.xlarge,ml.p2.xlarge")
 63 | 
 64 |     os.environ["TEST_EI_VERSIONS"] = config.getoption("--versions") or "1.11,1.12"
 65 |     os.environ["TEST_EI_INSTANCE_TYPES"] = (config.getoption("--instance-types") or
 66 |                                             "ml.m5.xlarge")
 67 | 
 68 |     if config.getoption("--tag"):
 69 |         os.environ["TEST_VERSIONS"] = config.getoption("--tag")
 70 |         os.environ["TEST_EI_VERSIONS"] = config.getoption("--tag")
 71 | 
 72 | 
 73 | @pytest.fixture(scope="session")
 74 | def region(request):
 75 |     return request.config.getoption("--region")
 76 | 
 77 | 
 78 | @pytest.fixture(scope="session")
 79 | def registry(request, region):
 80 |     if request.config.getoption("--registry"):
 81 |         return request.config.getoption("--registry")
 82 | 
 83 |     sts = boto3.client(
 84 |         "sts",
 85 |         region_name=region,
 86 |         endpoint_url="https://sts.{}.amazonaws.com".format(region)
 87 |     )
 88 |     return sts.get_caller_identity()["Account"]
 89 | 
 90 | 
 91 | @pytest.fixture(scope="session")
 92 | def boto_session(region):
 93 |     return boto3.Session(region_name=region)
 94 | 
 95 | 
 96 | @pytest.fixture(scope="session")
 97 | def sagemaker_client(boto_session):
 98 |     return boto_session.client("sagemaker")
 99 | 
100 | 
101 | @pytest.fixture(scope="session")
102 | def sagemaker_runtime_client(boto_session):
103 |     return boto_session.client("runtime.sagemaker")
104 | 
105 | 
106 | def unique_name_from_base(base, max_length=63):
107 |     unique = "%04x" % random.randrange(16 ** 4)  # 4-digit hex
108 |     ts = str(int(time.time()))
109 |     available_length = max_length - 2 - len(ts) - len(unique)
110 |     trimmed = base[:available_length]
111 |     return "{}-{}-{}".format(trimmed, ts, unique)
112 | 
113 | 
114 | @pytest.fixture
115 | def model_name():
116 |     return unique_name_from_base("test-tfs")
117 | 
118 | 
119 | @pytest.fixture(autouse=True)
120 | def skip_gpu_instance_restricted_regions(region, instance_type):
121 |     if (region in NO_P2_REGIONS and instance_type.startswith("ml.p2")) or \
122 |             (region in NO_P3_REGIONS and instance_type.startswith("ml.p3")):
123 |         pytest.skip("Skipping GPU test in region {}".format(region))
124 | 
125 | 
126 | @pytest.fixture(autouse=True)
127 | def skip_by_device_type(request, instance_type):
128 |     is_gpu = instance_type[3] in ["g", "p"]
129 |     if (request.node.get_closest_marker("skip_gpu") and is_gpu) or \
130 |             (request.node.get_closest_marker("skip_cpu") and not is_gpu):
131 |         pytest.skip("Skipping because running on \"{}\" instance".format(instance_type))
132 | 


--------------------------------------------------------------------------------
/docker/2.0/Dockerfile.gpu:
--------------------------------------------------------------------------------
  1 | FROM nvidia/cuda:10.0-base-ubuntu18.04
  2 | 
  3 | LABEL maintainer="Amazon AI"
  4 | LABEL com.amazonaws.sagemaker.capabilities.accept-bind-to-port=true
  5 | 
  6 | ARG PYTHON=python3
  7 | ARG PIP=pip3
  8 | ARG TFS_SHORT_VERSION=2.0.1
  9 | ARG TFS_URL=https://tensorflow-aws.s3-us-west-2.amazonaws.com/${TFS_SHORT_VERSION}/Serving/GPU/tensorflow_model_server
 10 | 
 11 | ENV NCCL_VERSION=2.4.7-1+cuda10.0
 12 | ENV CUDNN_VERSION=7.5.1.10-1+cuda10.0
 13 | ENV TF_TENSORRT_VERSION=5.0.2
 14 | 
 15 | # See http://bugs.python.org/issue19846
 16 | ENV LANG=C.UTF-8
 17 | ENV PYTHONDONTWRITEBYTECODE=1
 18 | # Python won’t try to write .pyc or .pyo files on the import of source modules
 19 | ENV PYTHONUNBUFFERED=1
 20 | ENV SAGEMAKER_TFS_VERSION="${TFS_SHORT_VERSION}"
 21 | ENV PATH="$PATH:/sagemaker"
 22 | ENV MODEL_BASE_PATH=/models
 23 | # The only required piece is the model name in order to differentiate endpoints
 24 | ENV MODEL_NAME=model
 25 | # Fix for the interactive mode during an install in step 21
 26 | ENV DEBIAN_FRONTEND=noninteractive
 27 | 
 28 | RUN apt-get update \
 29 |  && apt-get install -y --no-install-recommends \
 30 |     ca-certificates \
 31 |     cuda-command-line-tools-10-0 \
 32 |     cuda-cublas-10-0 \
 33 |     cuda-cufft-10-0 \
 34 |     cuda-curand-10-0 \
 35 |     cuda-cusolver-10-0 \
 36 |     cuda-cusparse-10-0 \
 37 |     libcudnn7=${CUDNN_VERSION} \
 38 |     libnccl2=${NCCL_VERSION} \
 39 |     libgomp1 \
 40 |     curl \
 41 |     git \
 42 |     wget \
 43 |     vim \
 44 |     build-essential \
 45 |     zlib1g-dev \
 46 |     python3 \
 47 |     python3-pip \
 48 |     python3-setuptools \
 49 |  && apt-get clean \
 50 |  && rm -rf /var/lib/apt/lists/*
 51 | 
 52 | # The 'apt-get install' of nvinfer-runtime-trt-repo-ubuntu1804-4.0.1-ga-cuda10.0
 53 | # adds a new list which contains libnvinfer library, so it needs another
 54 | # 'apt-get update' to retrieve that list before it can actually install the
 55 | # library.
 56 | # We don't install libnvinfer-dev since we don't need to build against TensorRT,
 57 | # and libnvinfer4 doesn't contain libnvinfer.a static library.
 58 | RUN apt-get update \
 59 |  && apt-get install -y --no-install-recommends nvinfer-runtime-trt-repo-ubuntu1804-${TF_TENSORRT_VERSION}-ga-cuda10.0 \
 60 |  && apt-get update \
 61 |  && apt-get install -y --no-install-recommends libnvinfer5=${TF_TENSORRT_VERSION}-1+cuda10.0 \
 62 |  && apt-get clean \
 63 |  && rm -rf /var/lib/apt/lists/* \
 64 |  && rm /usr/lib/x86_64-linux-gnu/libnvinfer_plugin* \
 65 |  && rm /usr/lib/x86_64-linux-gnu/libnvcaffe_parser* \
 66 |  && rm /usr/lib/x86_64-linux-gnu/libnvparsers*
 67 | 
 68 | RUN ${PIP} --no-cache-dir install --upgrade \
 69 |     pip \
 70 |     setuptools
 71 | 
 72 | # Some TF tools expect a "python" binary
 73 | RUN ln -s $(which ${PYTHON}) /usr/local/bin/python
 74 | 
 75 | # nginx + njs
 76 | RUN apt-get update \
 77 |  && apt-get -y install --no-install-recommends \
 78 |     curl \
 79 |     gnupg2 \
 80 |  && curl -s http://nginx.org/keys/nginx_signing.key | apt-key add - \
 81 |  && echo 'deb http://nginx.org/packages/ubuntu/ bionic nginx' >> /etc/apt/sources.list \
 82 |  && apt-get update \
 83 |  && apt-get -y install --no-install-recommends \
 84 |     nginx \
 85 |     nginx-module-njs \
 86 |  && apt-get clean \
 87 |  && rm -rf /var/lib/apt/lists/*
 88 | 
 89 | # cython, falcon, gunicorn, grpc
 90 | RUN ${PIP} install -U --no-cache-dir \
 91 |     boto3 \
 92 |     awscli==1.16.303 \
 93 |     cython==0.29.14 \
 94 |     falcon==2.0.0 \
 95 |     gunicorn==20.0.4 \
 96 |     gevent==1.4.0 \
 97 |     requests==2.22.0 \
 98 |     grpcio==1.26.0 \
 99 |     protobuf==3.11.1 \
100 | # using --no-dependencies to avoid installing tensorflow binary
101 |  && ${PIP} install --no-dependencies --no-cache-dir \
102 |     tensorflow-serving-api-gpu==2.0
103 | 
104 | COPY ./sagemaker /sagemaker
105 | 
106 | RUN curl $TFS_URL -o /usr/bin/tensorflow_model_server \
107 |  && chmod 555 /usr/bin/tensorflow_model_server
108 | 
109 | # Expose gRPC and REST port
110 | EXPOSE 8500 8501
111 | 
112 | # Set where models should be stored in the container
113 | RUN mkdir -p ${MODEL_BASE_PATH}
114 | 
115 | # Create a script that runs the model server so we can use environment variables
116 | # while also passing in arguments from the docker command line
117 | RUN echo '#!/bin/bash \n\n' > /usr/bin/tf_serving_entrypoint.sh \
118 |  && echo '/usr/bin/tensorflow_model_server --port=8500 --rest_api_port=8501 --model_name=${MODEL_NAME} --model_base_path=${MODEL_BASE_PATH}/${MODEL_NAME} "$@"' >> /usr/bin/tf_serving_entrypoint.sh \
119 |  && chmod +x /usr/bin/tf_serving_entrypoint.sh
120 | 
121 | ADD https://raw.githubusercontent.com/aws/aws-deep-learning-containers-utils/master/deep_learning_container.py /usr/local/bin/deep_learning_container.py
122 | 
123 | RUN chmod +x /usr/local/bin/deep_learning_container.py
124 | 
125 | RUN curl https://aws-dlc-licenses.s3.amazonaws.com/tensorflow-2.0.1/license.txt -o /license.txt
126 | 
127 | CMD ["/usr/bin/tf_serving_entrypoint.sh"]
128 | 


--------------------------------------------------------------------------------
/test/integration/local/test_nginx_config.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2019-2022 Amazon.com, Inc. or its affiliates. All Rights Reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License"). You
  4 | # may not use this file except in compliance with the License. A copy of
  5 | # the License is located at
  6 | #
  7 | #     http://aws.amazon.com/apache2.0/
  8 | #
  9 | # or in the "license" file accompanying this file. This file is
 10 | # distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
 11 | # ANY KIND, either express or implied. See the License for the specific
 12 | # language governing permissions and limitations under the License.
 13 | 
 14 | import os
 15 | import subprocess
 16 | 
 17 | import pytest
 18 | 
 19 | 
 20 | @pytest.fixture(scope="session", autouse=True)
 21 | def volume():
 22 |     try:
 23 |         model_dir = os.path.abspath("test/resources/models")
 24 |         subprocess.check_call(
 25 |             "docker volume create --name nginx_model_volume --opt type=none "
 26 |             "--opt device={} --opt o=bind".format(model_dir).split()
 27 |         )
 28 |         yield model_dir
 29 |     finally:
 30 |         subprocess.check_call("docker volume rm nginx_model_volume".split())
 31 | 
 32 | 
 33 | def test_run_nginx_with_default_parameters(docker_base_name, tag, runtime_config):
 34 |     try:
 35 |         command = (
 36 |             "docker run {}--name sagemaker-tensorflow-serving-test -p 8080:8080"
 37 |             " --mount type=volume,source=nginx_model_volume,target=/opt/ml/model,readonly"
 38 |             " {}:{} serve"
 39 |         ).format(runtime_config, docker_base_name, tag)
 40 | 
 41 |         proc = subprocess.Popen(command.split(), stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
 42 | 
 43 |         lines_seen = {
 44 |             "error_log  /dev/stderr error;": 0,
 45 |             "proxy_read_timeout 60;": 0,
 46 |         }
 47 | 
 48 |         for stdout_line in iter(proc.stdout.readline, ""):
 49 |             stdout_line = str(stdout_line)
 50 |             for line in lines_seen.keys():
 51 |                 if line in stdout_line:
 52 |                     lines_seen[line] += 1
 53 |             if "started nginx" in stdout_line:
 54 |                 for value in lines_seen.values():
 55 |                     assert value == 1
 56 |                 break
 57 | 
 58 |     finally:
 59 |         subprocess.check_call("docker rm -f sagemaker-tensorflow-serving-test".split())
 60 | 
 61 | 
 62 | def test_run_nginx_with_env_var_parameters(docker_base_name, tag, runtime_config):
 63 |     try:
 64 |         command = (
 65 |             "docker run {}--name sagemaker-tensorflow-serving-test -p 8080:8080"
 66 |             " --mount type=volume,source=nginx_model_volume,target=/opt/ml/model,readonly"
 67 |             " -e SAGEMAKER_TFS_NGINX_LOGLEVEL=info"
 68 |             " -e SAGEMAKER_NGINX_PROXY_READ_TIMEOUT_SECONDS=63"
 69 |             " {}:{} serve"
 70 |         ).format(runtime_config, docker_base_name, tag)
 71 | 
 72 |         proc = subprocess.Popen(command.split(), stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
 73 | 
 74 |         lines_seen = {
 75 |             "error_log  /dev/stderr info;": 0,
 76 |             "proxy_read_timeout 63;": 0,
 77 |         }
 78 | 
 79 |         for stdout_line in iter(proc.stdout.readline, ""):
 80 |             stdout_line = str(stdout_line)
 81 |             for line in lines_seen.keys():
 82 |                 if line in stdout_line:
 83 |                     lines_seen[line] += 1
 84 |             if "started nginx" in stdout_line:
 85 |                 for value in lines_seen.values():
 86 |                     assert value == 1
 87 |                 break
 88 | 
 89 |     finally:
 90 |         subprocess.check_call("docker rm -f sagemaker-tensorflow-serving-test".split())
 91 | 
 92 | def test_run_nginx_with_higher_gunicorn_parameter(docker_base_name, tag, runtime_config):
 93 |     try:
 94 |         command = (
 95 |             "docker run {}--name sagemaker-tensorflow-serving-test -p 8080:8080"
 96 |             " --mount type=volume,source=nginx_model_volume,target=/opt/ml/model,readonly"
 97 |             " -e SAGEMAKER_NGINX_PROXY_READ_TIMEOUT_SECONDS=60"
 98 |             " -e SAGEMAKER_GUNICORN_TIMEOUT_SECONDS=120"
 99 |             " {}:{} serve"
100 |         ).format(runtime_config, docker_base_name, tag)
101 | 
102 |         proc = subprocess.Popen(command.split(), stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
103 | 
104 |         lines_seen = {
105 |             "proxy_read_timeout 120;": 0, # When GUnicorn is higher, set timeout to match.
106 |         }
107 | 
108 |         for stdout_line in iter(proc.stdout.readline, ""):
109 |             stdout_line = str(stdout_line)
110 |             for line in lines_seen.keys():
111 |                 if line in stdout_line:
112 |                     lines_seen[line] += 1
113 |             if "started nginx" in stdout_line:
114 |                 for value in lines_seen.values():
115 |                     assert value == 1
116 |                 break
117 | 
118 |     finally:
119 |         subprocess.check_call("docker rm -f sagemaker-tensorflow-serving-test".split())
120 | 


--------------------------------------------------------------------------------
/docker/1.14/Dockerfile.gpu:
--------------------------------------------------------------------------------
  1 | FROM nvidia/cuda:10.0-base-ubuntu16.04
  2 | 
  3 | LABEL maintainer="Amazon AI"
  4 | LABEL com.amazonaws.sagemaker.capabilities.accept-bind-to-port=true
  5 | 
  6 | # Add arguments to achieve the version, python and url
  7 | #  PYTHON=python for 2.7
  8 | #  PYTHON=python3 for 3.5, 3.6 is not available directly on 16.04
  9 | ARG PYTHON=python3
 10 | #  PIP=pip for 2.7
 11 | #  PIP=pip3 for 3.5, 3.6 is not available directly on 16.04
 12 | ARG PIP=pip3
 13 | ARG PYTHON_VERSION=3.6.6
 14 | ARG TFS_SHORT_VERSION=1.14
 15 | 
 16 | # See http://bugs.python.org/issue19846
 17 | ENV LANG=C.UTF-8
 18 | ENV NCCL_VERSION=2.4.7-1+cuda10.0
 19 | ENV CUDNN_VERSION=7.5.1.10-1+cuda10.0
 20 | ENV TF_TENSORRT_VERSION=5.0.2
 21 | ENV PYTHONDONTWRITEBYTECODE=1
 22 | # Python won’t try to write .pyc or .pyo files on the import of source modules
 23 | ENV PYTHONUNBUFFERED=1
 24 | ENV SAGEMAKER_TFS_VERSION="${TFS_SHORT_VERSION}"
 25 | ENV PATH="$PATH:/sagemaker"
 26 | ENV MODEL_BASE_PATH=/models
 27 | # The only required piece is the model name in order to differentiate endpoints
 28 | ENV MODEL_NAME=model
 29 | 
 30 | RUN apt-get update \
 31 |  && apt-get install -y --no-install-recommends \
 32 |     ca-certificates \
 33 |     cuda-command-line-tools-10-0 \
 34 |     cuda-cublas-10-0 \
 35 |     cuda-cufft-10-0 \
 36 |     cuda-curand-10-0 \
 37 |     cuda-cusolver-10-0 \
 38 |     cuda-cusparse-10-0 \
 39 |     libcudnn7=${CUDNN_VERSION} \
 40 |     libnccl2=${NCCL_VERSION} \
 41 |     libgomp1 \
 42 |     curl \
 43 |     git \
 44 |     wget \
 45 |     vim \
 46 |     #next two lines are needed to add python-3.6 should be removed from ubuntu-16.10
 47 |     build-essential \
 48 |     zlib1g-dev \
 49 |  && apt-get clean \
 50 |  && rm -rf /var/lib/apt/lists/*
 51 | 
 52 | # The 'apt-get install' of nvinfer-runtime-trt-repo-ubuntu1604-4.0.1-ga-cuda10.0
 53 | # adds a new list which contains libnvinfer library, so it needs another
 54 | # 'apt-get update' to retrieve that list before it can actually install the
 55 | # library.
 56 | # We don't install libnvinfer-dev since we don't need to build against TensorRT,
 57 | # and libnvinfer4 doesn't contain libnvinfer.a static library.
 58 | RUN apt-get update \
 59 |  && apt-get install -y --no-install-recommends nvinfer-runtime-trt-repo-ubuntu1604-${TF_TENSORRT_VERSION}-ga-cuda10.0 \
 60 |  && apt-get update \
 61 |  && apt-get install -y --no-install-recommends libnvinfer5=${TF_TENSORRT_VERSION}-1+cuda10.0 \
 62 |  && apt-get clean \
 63 |  && rm -rf /var/lib/apt/lists/* \
 64 |  && rm /usr/lib/x86_64-linux-gnu/libnvinfer_plugin* \
 65 |  && rm /usr/lib/x86_64-linux-gnu/libnvcaffe_parser* \
 66 |  && rm /usr/lib/x86_64-linux-gnu/libnvparsers*
 67 | 
 68 | RUN wget https://www.python.org/ftp/python/$PYTHON_VERSION/Python-$PYTHON_VERSION.tgz \
 69 |  && tar -xvf Python-$PYTHON_VERSION.tgz \
 70 |  && cd Python-$PYTHON_VERSION \
 71 |  && ./configure \
 72 |  && make \
 73 |  && make install \
 74 |  && apt-get update \
 75 |  && apt-get install -y --no-install-recommends libreadline-gplv2-dev libncursesw5-dev libssl-dev libsqlite3-dev tk-dev libgdbm-dev libc6-dev libbz2-dev \
 76 |  && make \
 77 |  && make install \
 78 |  && rm -rf ../Python-$PYTHON_VERSION* \
 79 |  && ln -s /usr/local/bin/pip3 /usr/bin/pip \
 80 |  && rm -rf /var/lib/apt/lists/*
 81 | 
 82 | RUN ${PIP} --no-cache-dir install --upgrade pip setuptools
 83 | 
 84 | # Some TF tools expect a "python" binary
 85 | RUN ln -s $(which ${PYTHON}) /usr/local/bin/python
 86 | 
 87 | RUN curl https://tensorflow-aws.s3-us-west-2.amazonaws.com/1.14/Serving/GPU/tensorflow_model_server --output tensorflow_model_server \
 88 |  && chmod 555 tensorflow_model_server && cp tensorflow_model_server /usr/bin/tensorflow_model_server \
 89 |  && rm -f tensorflow_model_server
 90 | 
 91 | # nginx + njs
 92 | RUN apt-get update \
 93 |  && apt-get -y install --no-install-recommends curl gnupg2 \
 94 |  && curl -s http://nginx.org/keys/nginx_signing.key | apt-key add - \
 95 |  && echo 'deb http://nginx.org/packages/ubuntu/ xenial nginx' >> /etc/apt/sources.list \
 96 |  && apt-get update \
 97 |  && apt-get -y install --no-install-recommends nginx nginx-module-njs \
 98 |  && apt-get clean \
 99 |  && rm -rf /var/lib/apt/lists/*
100 | 
101 | # cython, falcon, gunicorn, grpc
102 | RUN ${PIP} install -U --no-cache-dir \
103 |     boto3 \
104 |     awscli==1.16.196 \
105 |     cython==0.29.12 \
106 |     falcon==2.0.0 \
107 |     gunicorn==19.9.0 \
108 |     gevent==1.4.0 \
109 |     requests==2.22.0 \
110 |     grpcio==1.24.1 \
111 |     protobuf==3.10.0 \
112 | # using --no-dependencies to avoid installing tensorflow binary
113 |  && ${PIP} install --no-dependencies --no-cache-dir \
114 |     tensorflow-serving-api-gpu==1.14.0
115 | 
116 | COPY ./ /
117 | 
118 | # Expose gRPC and REST port
119 | EXPOSE 8500 8501
120 | 
121 | # Set where models should be stored in the container
122 | RUN mkdir -p ${MODEL_BASE_PATH}
123 | 
124 | # Create a script that runs the model server so we can use environment variables
125 | # while also passing in arguments from the docker command line
126 | RUN echo '#!/bin/bash \n\n' > /usr/bin/tf_serving_entrypoint.sh \
127 |  && echo '/usr/bin/tensorflow_model_server --port=8500 --rest_api_port=8501 --model_name=${MODEL_NAME} --model_base_path=${MODEL_BASE_PATH}/${MODEL_NAME} "$@"' >> /usr/bin/tf_serving_entrypoint.sh \
128 |  && chmod +x /usr/bin/tf_serving_entrypoint.sh
129 | 
130 | CMD ["/usr/bin/tf_serving_entrypoint.sh"]
131 | 


--------------------------------------------------------------------------------
/docker/1.13/Dockerfile.gpu:
--------------------------------------------------------------------------------
  1 | FROM nvidia/cuda:10.0-base-ubuntu16.04
  2 | 
  3 | LABEL maintainer="Amazon AI"
  4 | LABEL com.amazonaws.sagemaker.capabilities.accept-bind-to-port=true
  5 | 
  6 | # Add arguments to achieve the version, python and url
  7 | # PYTHON=python for 2.7, PYTHON=python3 for 3.5, 3.6 is not available directly on 16.04
  8 | ARG PYTHON=python3
  9 | #  PIP=pip for 2.7, PIP=pip3 for 3.5, 3.6 is not available directly on 16.04
 10 | ARG PIP=pip3
 11 | ARG PYTHON_VERSION=3.6.6
 12 | ARG TFS_SHORT_VERSION=1.13
 13 | 
 14 | # See http://bugs.python.org/issue19846
 15 | ENV LANG C.UTF-8
 16 | ENV NCCL_VERSION=2.4.7-1+cuda10.0
 17 | ENV CUDNN_VERSION=7.5.1.10-1+cuda10.0
 18 | ENV TF_TENSORRT_VERSION=5.0.2
 19 | # Python won’t try to write .pyc or .pyo files on the import of source modules
 20 | ENV PYTHONDONTWRITEBYTECODE=1
 21 | ENV PYTHONUNBUFFERED=1
 22 | ENV SAGEMAKER_TFS_VERSION="${TFS_SHORT_VERSION}"
 23 | ENV PATH="$PATH:/sagemaker"
 24 | ENV MODEL_BASE_PATH=/models
 25 | # The only required piece is the model name in order to differentiate endpoints
 26 | ENV MODEL_NAME=model
 27 | 
 28 | RUN apt-get update \
 29 |  && apt-get install -y --no-install-recommends \
 30 |     ca-certificates \
 31 |     cuda-command-line-tools-10-0 \
 32 |     cuda-cublas-10-0 \
 33 |     cuda-cufft-10-0 \
 34 |     cuda-curand-10-0 \
 35 |     cuda-cusolver-10-0 \
 36 |     cuda-cusparse-10-0 \
 37 |     libcudnn7=${CUDNN_VERSION} \
 38 |     libnccl2=${NCCL_VERSION} \
 39 |     libgomp1 \
 40 |     curl \
 41 |     git \
 42 |     wget \
 43 |     vim \
 44 |     #next two lines are needed to add python-3.6 should be removed from ubuntu-16.10
 45 |     build-essential \
 46 |     zlib1g-dev \
 47 |  && apt-get clean \
 48 |  && rm -rf /var/lib/apt/lists/*
 49 | 
 50 | # The 'apt-get install' of nvinfer-runtime-trt-repo-ubuntu1604-4.0.1-ga-cuda10.0
 51 | # adds a new list which contains libnvinfer library, so it needs another
 52 | # 'apt-get update' to retrieve that list before it can actually install the
 53 | # library.
 54 | # We don't install libnvinfer-dev since we don't need to build against TensorRT,
 55 | # and libnvinfer4 doesn't contain libnvinfer.a static library.
 56 | RUN apt-get update \
 57 |  && apt-get install -y --no-install-recommends \
 58 |     nvinfer-runtime-trt-repo-ubuntu1604-${TF_TENSORRT_VERSION}-ga-cuda10.0 \
 59 |  && apt-get update \
 60 |  && apt-get install -y --no-install-recommends \
 61 |     libnvinfer5=${TF_TENSORRT_VERSION}-1+cuda10.0 \
 62 |  && apt-get clean \
 63 |  && rm -rf /var/lib/apt/lists/* \
 64 |  && rm /usr/lib/x86_64-linux-gnu/libnvinfer_plugin* \
 65 |  && rm /usr/lib/x86_64-linux-gnu/libnvcaffe_parser* \
 66 |  && rm /usr/lib/x86_64-linux-gnu/libnvparsers* \
 67 |  && rm -rf /var/lib/apt/lists/*
 68 | 
 69 | RUN wget https://www.python.org/ftp/python/$PYTHON_VERSION/Python-$PYTHON_VERSION.tgz \
 70 |  && tar -xvf Python-$PYTHON_VERSION.tgz && cd Python-$PYTHON_VERSION \
 71 |  && ./configure && make && make install \
 72 |  && apt-get update && apt-get install -y --no-install-recommends \
 73 |     libreadline-gplv2-dev \
 74 |     libncursesw5-dev \
 75 |     libssl-dev \
 76 |     libsqlite3-dev \
 77 |     tk-dev libgdbm-dev \
 78 |     libc6-dev libbz2-dev \
 79 |  && rm -rf /var/lib/apt/lists/* \
 80 |  && make && make install \
 81 |  && rm -rf ../Python-$PYTHON_VERSION* \
 82 |  && ln -s /usr/local/bin/pip3 /usr/bin/pip
 83 | 
 84 | RUN ${PIP} --no-cache-dir install --upgrade pip setuptools
 85 | 
 86 | # Some TF tools expect a "python" binary
 87 | RUN ln -s $(which ${PYTHON}) /usr/local/bin/python
 88 | 
 89 | RUN curl https://s3-us-west-2.amazonaws.com/tensorflow-aws/1.13/Serving/GPU/tensorflow_model_server --output tensorflow_model_server && \
 90 | chmod 555 tensorflow_model_server && cp tensorflow_model_server /usr/bin/tensorflow_model_server && \
 91 | rm -f tensorflow_model_server
 92 | 
 93 | # nginx + njs
 94 | RUN apt-get update \
 95 |  && apt-get -y install --no-install-recommends curl gnupg2 \
 96 |  && curl -s http://nginx.org/keys/nginx_signing.key | apt-key add - \
 97 |  && echo 'deb http://nginx.org/packages/ubuntu/ xenial nginx' >> /etc/apt/sources.list \
 98 |  && apt-get update \
 99 |  && apt-get -y install --no-install-recommends nginx nginx-module-njs \
100 |  && apt-get clean \
101 |  && rm -rf /var/lib/apt/lists/*
102 | 
103 | # cython, falcon, gunicorn, grpc
104 | RUN ${PIP} install -U --no-cache-dir \
105 |     boto3 \
106 |     awscli==1.16.130 \
107 |     cython==0.29.10 \
108 |     falcon==2.0.0 \
109 |     gunicorn==19.9.0 \
110 |     gevent==1.4.0 \
111 |     requests==2.21.0 \
112 |     grpcio==1.24.1 \
113 |     protobuf==3.10.0 \
114 | # using --no-dependencies to avoid installing tensorflow binary
115 |  && ${PIP} install --no-dependencies --no-cache-dir \
116 |     tensorflow-serving-api-gpu==1.13.0
117 | 
118 | COPY ./ /
119 | 
120 | # Expose gRPC and REST port
121 | EXPOSE 8500 8501
122 | 
123 | # Set where models should be stored in the container
124 | RUN mkdir -p ${MODEL_BASE_PATH}
125 | 
126 | # Create a script that runs the model server so we can use environment variables
127 | # while also passing in arguments from the docker command line
128 | RUN echo '#!/bin/bash \n\n' > /usr/bin/tf_serving_entrypoint.sh \
129 |  && echo '/usr/bin/tensorflow_model_server --port=8500 --rest_api_port=8501 --model_name=${MODEL_NAME} --model_base_path=${MODEL_BASE_PATH}/${MODEL_NAME} "$@"' >> /usr/bin/tf_serving_entrypoint.sh \
130 |  && chmod +x /usr/bin/tf_serving_entrypoint.sh
131 | 
132 | CMD ["/usr/bin/tf_serving_entrypoint.sh"]
133 | 


--------------------------------------------------------------------------------
/test/integration/local/test_pre_post_processing_mme.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License"). You
  4 | # may not use this file except in compliance with the License. A copy of
  5 | # the License is located at
  6 | #
  7 | #     http://aws.amazon.com/apache2.0/
  8 | #
  9 | # or in the "license" file accompanying this file. This file is
 10 | # distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
 11 | # ANY KIND, either express or implied. See the License for the specific
 12 | # language governing permissions and limitations under the License.
 13 | 
 14 | import json
 15 | import os
 16 | import shutil
 17 | import subprocess
 18 | import sys
 19 | import time
 20 | 
 21 | import pytest
 22 | 
 23 | import requests
 24 | 
 25 | from multi_model_endpoint_test_utils import make_load_model_request, make_headers
 26 | 
 27 | 
 28 | PING_URL = "http://localhost:8080/ping"
 29 | INVOCATION_URL = "http://localhost:8080/models/{}/invoke"
 30 | MODEL_NAME = "half_plus_three"
 31 | 
 32 | 
 33 | @pytest.fixture(scope="session", autouse=True)
 34 | def volume():
 35 |     try:
 36 |         model_dir = os.path.abspath("test/resources/mme_universal_script")
 37 |         subprocess.check_call(
 38 |             "docker volume create --name model_volume_mme --opt type=none "
 39 |             "--opt device={} --opt o=bind".format(model_dir).split())
 40 |         yield model_dir
 41 |     finally:
 42 |         subprocess.check_call("docker volume rm model_volume_mme".split())
 43 | 
 44 | 
 45 | @pytest.fixture(scope="module", autouse=True)
 46 | def container(docker_base_name, tag, runtime_config):
 47 |     try:
 48 |         command = (
 49 |             "docker run {}--name sagemaker-tensorflow-serving-test -p 8080:8080"
 50 |             " --mount type=volume,source=model_volume_mme,target=/opt/ml/models,readonly"
 51 |             " -e SAGEMAKER_TFS_NGINX_LOGLEVEL=info"
 52 |             " -e SAGEMAKER_BIND_TO_PORT=8080"
 53 |             " -e SAGEMAKER_SAFE_PORT_RANGE=9000-9999"
 54 |             " -e SAGEMAKER_MULTI_MODEL=True"
 55 |             " {}:{} serve"
 56 |         ).format(runtime_config, docker_base_name, tag)
 57 | 
 58 |         proc = subprocess.Popen(command.split(), stdout=sys.stdout, stderr=subprocess.STDOUT)
 59 | 
 60 |         attempts = 0
 61 |         while attempts < 40:
 62 |             time.sleep(3)
 63 |             try:
 64 |                 res_code = requests.get("http://localhost:8080/ping").status_code
 65 |                 if res_code == 200:
 66 |                     break
 67 |             except:
 68 |                 attempts += 1
 69 |                 pass
 70 | 
 71 |         yield proc.pid
 72 |     finally:
 73 |         subprocess.check_call("docker rm -f sagemaker-tensorflow-serving-test".split())
 74 | 
 75 | 
 76 | @pytest.fixture
 77 | def model():
 78 |     model_data = {
 79 |         "model_name": MODEL_NAME,
 80 |         "url": "/opt/ml/models/half_plus_three/model/half_plus_three"
 81 |     }
 82 |     make_load_model_request(json.dumps(model_data))
 83 |     return MODEL_NAME
 84 | 
 85 | 
 86 | @pytest.mark.skip_gpu
 87 | def test_ping_service():
 88 |     response = requests.get(PING_URL)
 89 |     assert 200 == response.status_code
 90 | 
 91 | 
 92 | @pytest.mark.skip_gpu
 93 | def test_predict_json(model):
 94 |     headers = make_headers()
 95 |     data = "{\"instances\": [1.0, 2.0, 5.0]}"
 96 |     response = requests.post(INVOCATION_URL.format(model), data=data, headers=headers).json()
 97 |     assert response == {"predictions": [3.5, 4.0, 5.5]}
 98 | 
 99 | 
100 | @pytest.mark.skip_gpu
101 | def test_zero_content():
102 |     headers = make_headers()
103 |     x = ""
104 |     response = requests.post(INVOCATION_URL.format(MODEL_NAME), data=x, headers=headers)
105 |     assert 500 == response.status_code
106 |     assert "document is empty" in response.text
107 | 
108 | 
109 | @pytest.mark.skip_gpu
110 | def test_large_input():
111 |     data_file = "test/resources/inputs/test-large.csv"
112 | 
113 |     with open(data_file, "r") as file:
114 |         x = file.read()
115 |         headers = make_headers(content_type="text/csv")
116 |         response = requests.post(INVOCATION_URL.format(MODEL_NAME), data=x, headers=headers).json()
117 |         predictions = response["predictions"]
118 |         assert len(predictions) == 753936
119 | 
120 | 
121 | @pytest.mark.skip_gpu
122 | def test_csv_input():
123 |     headers = make_headers(content_type="text/csv")
124 |     data = "1.0,2.0,5.0"
125 |     response = requests.post(INVOCATION_URL.format(MODEL_NAME), data=data, headers=headers).json()
126 |     assert response == {"predictions": [3.5, 4.0, 5.5]}
127 | 
128 | 
129 | @pytest.mark.skip_gpu
130 | def test_specific_versions():
131 |     for version in ("123", "124"):
132 |         headers = make_headers(content_type="text/csv", version=version)
133 |         data = "1.0,2.0,5.0"
134 |         response = requests.post(
135 |             INVOCATION_URL.format(MODEL_NAME), data=data, headers=headers
136 |         ).json()
137 |         assert response == {"predictions": [3.5, 4.0, 5.5]}
138 | 
139 | 
140 | @pytest.mark.skip_gpu
141 | def test_unsupported_content_type():
142 |     headers = make_headers("unsupported-type", "predict")
143 |     data = "aW1hZ2UgYnl0ZXM="
144 |     response = requests.post(INVOCATION_URL.format(MODEL_NAME), data=data, headers=headers)
145 |     assert 500 == response.status_code
146 |     assert "unsupported content type" in response.text
147 | 


--------------------------------------------------------------------------------
/test/integration/local/test_pre_post_processing.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2019-2020 Amazon.com, Inc. or its affiliates. All Rights Reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License"). You
  4 | # may not use this file except in compliance with the License. A copy of
  5 | # the License is located at
  6 | #
  7 | #     http://aws.amazon.com/apache2.0/
  8 | #
  9 | # or in the "license" file accompanying this file. This file is
 10 | # distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
 11 | # ANY KIND, either express or implied. See the License for the specific
 12 | # language governing permissions and limitations under the License.
 13 | 
 14 | import os
 15 | import shutil
 16 | import subprocess
 17 | import sys
 18 | import time
 19 | 
 20 | import pytest
 21 | 
 22 | import requests
 23 | 
 24 | 
 25 | PING_URL = "http://localhost:8080/ping"
 26 | INVOCATIONS_URL = "http://localhost:8080/invocations"
 27 | 
 28 | 
 29 | @pytest.fixture(scope="module", autouse=True, params=["1", "2", "3", "4", "5"])
 30 | def volume(tmpdir_factory, request):
 31 |     try:
 32 |         print(str(tmpdir_factory))
 33 |         model_dir = os.path.join(tmpdir_factory.mktemp("test"), "model")
 34 |         code_dir = os.path.join(model_dir, "code")
 35 |         test_example = "test/resources/examples/test{}".format(request.param)
 36 | 
 37 |         model_src_dir = "test/resources/models"
 38 |         shutil.copytree(model_src_dir, model_dir)
 39 |         shutil.copytree(test_example, code_dir)
 40 | 
 41 |         volume_name = f"model_volume_{request.param}"
 42 |         subprocess.check_call(
 43 |             "docker volume create --name {} --opt type=none "
 44 |             "--opt device={} --opt o=bind".format(volume_name, model_dir).split())
 45 |         yield volume_name
 46 |     finally:
 47 |         subprocess.check_call(f"docker volume rm {volume_name}".split())
 48 | 
 49 | 
 50 | @pytest.fixture(scope="module", autouse=True)
 51 | def container(volume, docker_base_name, tag, runtime_config):
 52 |     try:
 53 |         command = (
 54 |             "docker run {}--name sagemaker-tensorflow-serving-test -p 8080:8080"
 55 |             " --mount type=volume,source={},target=/opt/ml/model,readonly"
 56 |             " -e SAGEMAKER_TFS_NGINX_LOGLEVEL=info"
 57 |             " -e SAGEMAKER_BIND_TO_PORT=8080"
 58 |             " -e SAGEMAKER_SAFE_PORT_RANGE=9000-9999"
 59 |             " {}:{} serve"
 60 |         ).format(runtime_config, volume, docker_base_name, tag)
 61 | 
 62 |         proc = subprocess.Popen(command.split(), stdout=sys.stdout, stderr=subprocess.STDOUT)
 63 | 
 64 |         attempts = 0
 65 |         while attempts < 40:
 66 |             time.sleep(3)
 67 |             try:
 68 |                 res_code = requests.get("http://localhost:8080/ping").status_code
 69 |                 if res_code == 200:
 70 |                     break
 71 |             except:
 72 |                 attempts += 1
 73 |                 pass
 74 | 
 75 |         yield proc.pid
 76 |     finally:
 77 |         subprocess.check_call("docker rm -f sagemaker-tensorflow-serving-test".split())
 78 | 
 79 | 
 80 | def make_headers(content_type, method, version=None):
 81 |     custom_attributes = "tfs-model-name=half_plus_three,tfs-method={}".format(method)
 82 |     if version:
 83 |         custom_attributes += ",tfs-model-version={}".format(version)
 84 | 
 85 |     return {
 86 |         "Content-Type": content_type,
 87 |         "X-Amzn-SageMaker-Custom-Attributes": custom_attributes,
 88 |     }
 89 | 
 90 | 
 91 | def test_predict_json():
 92 |     headers = make_headers("application/json", "predict")
 93 |     data = "{\"instances\": [1.0, 2.0, 5.0]}"
 94 |     response = requests.post(INVOCATIONS_URL, data=data, headers=headers).json()
 95 |     assert response == {"predictions": [3.5, 4.0, 5.5]}
 96 | 
 97 | 
 98 | def test_zero_content():
 99 |     headers = make_headers("application/json", "predict")
100 |     data = ""
101 |     response = requests.post(INVOCATIONS_URL, data=data, headers=headers)
102 |     assert 500 == response.status_code
103 |     assert "document is empty" in response.text
104 | 
105 | 
106 | def test_large_input():
107 |     headers = make_headers("text/csv", "predict")
108 |     data_file = "test/resources/inputs/test-large.csv"
109 | 
110 |     with open(data_file, "r") as file:
111 |         large_data = file.read()
112 |         response = requests.post(INVOCATIONS_URL, data=large_data, headers=headers).json()
113 |         predictions = response["predictions"]
114 |         assert len(predictions) == 753936
115 | 
116 | 
117 | def test_csv_input():
118 |     headers = make_headers("text/csv", "predict")
119 |     data = "1.0,2.0,5.0"
120 |     response = requests.post(INVOCATIONS_URL, data=data, headers=headers).json()
121 |     assert response == {"predictions": [3.5, 4.0, 5.5]}
122 | 
123 | 
124 | def test_predict_specific_versions():
125 |     for version in ("123", "124"):
126 |         headers = make_headers("application/json", "predict", version=version)
127 |         data = "{\"instances\": [1.0, 2.0, 5.0]}"
128 |         response = requests.post(INVOCATIONS_URL, data=data, headers=headers).json()
129 |         assert response == {"predictions": [3.5, 4.0, 5.5]}
130 | 
131 | 
132 | def test_unsupported_content_type():
133 |     headers = make_headers("unsupported-type", "predict")
134 |     data = "aW1hZ2UgYnl0ZXM="
135 |     response = requests.post(INVOCATIONS_URL, data=data, headers=headers)
136 |     assert 500 == response.status_code
137 |     assert "unsupported content type" in response.text
138 | 
139 | 
140 | def test_ping_service():
141 |     response = requests.get(PING_URL)
142 |     assert 200 == response.status_code
143 | 


--------------------------------------------------------------------------------
/test/perf/perftest_endpoint.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2018-2020 Amazon.com, Inc. or its affiliates. All Rights Reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License"). You
  4 | # may not use this file except in compliance with the License. A copy of
  5 | # the License is located at
  6 | #
  7 | #     http://aws.amazon.com/apache2.0/
  8 | #
  9 | # or in the "license" file accompanying this file. This file is
 10 | # distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
 11 | # ANY KIND, either express or implied. See the License for the specific
 12 | # language governing permissions and limitations under the License.
 13 | 
 14 | import argparse
 15 | import multiprocessing
 16 | import sys
 17 | import time
 18 | 
 19 | import boto3
 20 | 
 21 | 
 22 | class PerfTester(object):
 23 |     def __init__(self):
 24 |         self.engine = None
 25 |         self.count = None
 26 |         self.payload_kb = None
 27 |         self.start_time = None
 28 |         self.end_time = None
 29 | 
 30 |     def test_worker(self, id, args, count, test_data, error_counts):
 31 |         client = boto3.client('sagemaker-runtime')
 32 | 
 33 |         endpoint_name = test_data[0]
 34 |         data = test_data[1]
 35 |         for i in range(count):
 36 |             try:
 37 |                 response = client.invoke_endpoint(EndpointName=endpoint_name,
 38 |                                                   Body=data,
 39 |                                                   ContentType='application/json',
 40 |                                                   Accept='application/json',
 41 |                                                   CustomAttributes='tfs-model-name=cifar')
 42 |                 _ = response['Body'].read()
 43 |             except:
 44 |                 error_counts[id] += 1
 45 | 
 46 |     def test(self, args, count, test_data):
 47 |         self.count = args.count * args.workers
 48 |         self.payload_kb = len(test_data[1]) / 1024.0
 49 | 
 50 |         manager = multiprocessing.Manager()
 51 |         error_counts = manager.dict()
 52 |         workers = []
 53 |         for i in range(args.workers):
 54 |             error_counts[i] = 0
 55 |             w = multiprocessing.Process(target=self.test_worker,
 56 |                                         args=(i, args, count, test_data, error_counts))
 57 |             workers.append(w)
 58 | 
 59 |         self.start_time = time.time()
 60 |         for w in workers:
 61 |             w.start()
 62 | 
 63 |         for w in workers:
 64 |             w.join()
 65 | 
 66 |         self.errors = sum(error_counts.values())
 67 |         self.end_time = time.time()
 68 | 
 69 |     def report(self, args):
 70 |         elapsed = self.end_time - self.start_time
 71 |         report_format = '{},{},{:.3f},{:.3f},{:.3f},{:.3f},{},{},{:.3f}'
 72 |         report = report_format.format(args.model,
 73 |                                       args.workers,
 74 |                                       self.count / elapsed,
 75 |                                       ((elapsed / args.count) * 1000),
 76 |                                       (self.payload_kb * self.count) / elapsed / 1024,
 77 |                                       elapsed,
 78 |                                       self.count,
 79 |                                       self.errors,
 80 |                                       self.payload_kb)
 81 |         print('model,workers,r/s,ms/req,mb/s,total s,requests,errors,payload kb')
 82 |         print(report)
 83 | 
 84 |     def parse_args(self, args):
 85 |         parser = argparse.ArgumentParser('performance tester')
 86 |         parser.set_defaults(func=lambda x: parser.print_usage())
 87 |         parser.add_argument('--count', help='number of test iterations', default=1000, type=int)
 88 |         parser.add_argument('--warmup', help='number of warmup iterations', default=100, type=int)
 89 |         parser.add_argument('--workers', help='number of model workers (and clients)', default=1,
 90 |                             type=int)
 91 |         parser.add_argument('--model', help='model id', default='half_plus_three')
 92 |         return parser.parse_args(args)
 93 | 
 94 |     def run(self, args):
 95 |         args = self.parse_args(args)
 96 |         test_data = TEST_DATA[args.model]
 97 |         self.test(args, min(args.warmup, args.count), test_data)
 98 |         self.test(args, args.count, test_data)
 99 |         self.report(args)
100 | 
101 | 
102 | def _read_file(path):
103 |     with open(path, 'rb') as f:
104 |         return f.read()
105 | 
106 | 
107 | def _random_payload(size_in_kb):
108 |     return bytes(bytearray(size_in_kb * 1024))
109 | 
110 | 
111 | # add/change these to match your endpoints
112 | TEST_DATA = {
113 |     'sm-p2xl': ('sagemaker-tensorflow-2018-11-03-14-38-51-707', b'[' + _read_file('test/resources/inputs/test-cifar.json') + b']'),
114 |     'sm-p316xl': ('sagemaker-tensorflow-2018-11-03-14-38-51-706', b'[' + _read_file('test/resources/inputs/test-cifar.json') + b']'),
115 |     'tfs-p2xl': ('sagemaker-tfs-p2-xlarge', _read_file('test/resources/inputs/test-cifar.json')),
116 |     'tfs-p316xl': ('sagemaker-tfs-p3-16xlarge', _read_file('test/resources/inputs/test-cifar.json')),
117 |     'tfs-c5xl': ('sagemaker-tfs-c5-xlarge', _read_file('test/resources/inputs/test-cifar.json')),
118 |     'tfs-c518xl': ('sagemaker-tfs-c5-18xlarge', _read_file('test/resources/inputs/test-cifar.json')),
119 |     'sm-c5xl': ('sagemaker-tensorflow-cifar-c5.xlarge', b'[' + _read_file('test/resources/inputs/test-cifar.json') + b']'),
120 |     'sm-c518xl': ('sagemaker-tensorflow-cifar-c5.18xlarge', b'[' + _read_file('test/resources/inputs/test-cifar.json') + b']')
121 | }
122 | 
123 | if __name__ == '__main__':
124 |     PerfTester().run(sys.argv[1:])
125 | 


--------------------------------------------------------------------------------
/test/perf/data_generator.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import math
  3 | import random
  4 | import sys
  5 | 
  6 | _CONTENT_TYPE_CSV = 'text/csv'
  7 | _CONTENT_TYPE_JSON = 'application/json'
  8 | _CONTENT_TYPE_JSONLINES = 'application/jsonlines'
  9 | 
 10 | _VALID_CONTENT_TYPES = [_CONTENT_TYPE_JSON, _CONTENT_TYPE_CSV, _CONTENT_TYPE_JSONLINES]
 11 | _UNIT_FOR_PAYLOAD_SIZE = {'B': 1, 'KB': 1024, 'MB': 1048576}
 12 | 
 13 | def generate_json(shape, payload_size):
 14 |     one_record = _generate_json_recursively(shape)
 15 |     if payload_size:
 16 |         per_record_size = len(str(one_record))
 17 |         num_records = _get_num_records_for_json_payload(payload_size, per_record_size)
 18 |         records = []
 19 |         for record in range(0, num_records):
 20 |             records.append(one_record)
 21 |         return str(records)
 22 |     else:
 23 |         return str(one_record)
 24 | 
 25 | 
 26 | def _generate_json_recursively(shape):
 27 |     if len(shape) == 1:
 28 |         input = list(_random_input(shape[0]))
 29 |         return input
 30 |     else:
 31 |         inner_list = _generate_json_recursively(shape[1:])
 32 |         return [inner_list for _ in range(0, shape[0])]
 33 | 
 34 | 
 35 | def generate_jsonlines(shape, payload_size):
 36 |     one_row = _generate_json_recursively(shape)
 37 |     if payload_size:
 38 |         one_row_string = str(one_row)
 39 |         num_records = _get_num_records_for_json_payload(payload_size, len(one_row_string))
 40 |         return '\n'.join([one_row_string for _ in range(0, num_records)])
 41 |     else:
 42 |         return one_row
 43 | 
 44 | 
 45 | def _get_num_records_for_json_payload(payload_size, one_record_size):
 46 |     return max(int(round(float(payload_size) / one_record_size)), 1)
 47 | 
 48 | 
 49 | def generate_csv(shape, payload_size):
 50 |     # First, calculate overhead from commas.
 51 |     try:
 52 |         rows, columns = shape
 53 |     except ValueError:
 54 |         rows = 1
 55 |         columns = shape[0]
 56 |     # Override number of rows.
 57 |     if payload_size:
 58 |         # Divide by two given the assumption is that inputs are single-digit integers separate by commas and lines
 59 |         # end in newline characters.
 60 |         rows = int(math.ceil((float(payload_size) / columns / 2.0)))
 61 | 
 62 |     row = ','.join(map(lambda x: str(x), _random_input(columns)))
 63 |     return '\n'.join([row for _ in range(0, rows)])
 64 | 
 65 | 
 66 | def _random_input(n):
 67 |     for i in range(0, n):
 68 |         yield random.randint(0, 9)
 69 | 
 70 | 
 71 | def _map_payload_size_given_unit(payload_size, unit_of_payload):
 72 |     return payload_size * _UNIT_FOR_PAYLOAD_SIZE[unit_of_payload]
 73 | 
 74 | 
 75 | def generate_data(content_type, shape, payload_size, unit_of_payload='B'):
 76 |     assert unit_of_payload in _UNIT_FOR_PAYLOAD_SIZE.keys()
 77 |     payload_size = _map_payload_size_given_unit(payload_size, unit_of_payload)
 78 |     if content_type == _CONTENT_TYPE_JSONLINES:
 79 |         return generate_jsonlines(shape, payload_size)
 80 |     elif content_type == _CONTENT_TYPE_JSON:
 81 |         return generate_json(shape, payload_size)
 82 |     elif content_type == _CONTENT_TYPE_CSV:
 83 |         return generate_csv(shape, payload_size)
 84 |     else:
 85 |         raise ValueError('Content-type {} must be in {}'.format(content_type, _VALID_CONTENT_TYPES))
 86 | 
 87 | 
 88 | if __name__ == '__main__':
 89 |     parser = argparse.ArgumentParser(description='Generates test data of different shapes and content types.')
 90 |     parser.set_defaults(func=lambda x: parser.print_usage())
 91 | 
 92 |     parser.add_argument('-c', '--content-type', help='Content type to generate data for.', type=str, required=True,
 93 |                         choices=_VALID_CONTENT_TYPES)
 94 |     parser.add_argument('-s', '--shape', help='Shape of the output data. Behavior depends on content-type. '
 95 |                                               'For text/csv, "50,32" generates 50 lines with 32 columns. '
 96 |                                               'More than two values are invalid for CSV shape. '
 97 |                                               'For application/json, "32,32,3" generates a JSON object with shape. '
 98 |                                               '32,32,3. For application/jsonlines, the shape given represents a '
 99 |                                               'single instance as one nested JSON list.',
100 |                         type=str, required=True)
101 |     parser.add_argument('-p', '--payload-size', help='Approximate payload size. If supplied with text/csv, '
102 |                                                      'ignores the number of rows given in shape and repeats a '
103 |                                                      'single row until the payload is full. If supplied with '
104 |                                                      'application/json or application/jsonlines, repeats the '
105 |                                                      'payload with the given shape until the payload is full. '
106 |                                                      'application/json payload is always one line. '
107 |                                                      'Unit size defaults to the number of bytes unless --unit is set.',
108 |                                                      type=int)
109 |     parser.add_argument('-u', '--unit', help='Unit for payload size. One of "B", "KB", or "MB" for bytes, kilobytes, '
110 |                                              'and megabytes, respectively', type=str,
111 |                                              choices=_UNIT_FOR_PAYLOAD_SIZE.keys())
112 | 
113 |     args = parser.parse_args(sys.argv[1:])
114 |     parsed_shape = list(map(lambda x: int(x), args.shape.split(',')))
115 |     print(generate_data(args.content_type, parsed_shape, args.payload_size, args.unit))
116 | 


--------------------------------------------------------------------------------
/test/integration/sagemaker/test_tfs.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2018-2020 Amazon.com, Inc. or its affiliates. All Rights Reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License"). You
  4 | # may not use this file except in compliance with the License. A copy of
  5 | # the License is located at
  6 | #
  7 | #     http://aws.amazon.com/apache2.0/
  8 | #
  9 | # or in the "license" file accompanying this file. This file is
 10 | # distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
 11 | # ANY KIND, either express or implied. See the License for the specific
 12 | # language governing permissions and limitations under the License.
 13 | import os
 14 | 
 15 | import pytest
 16 | 
 17 | import util
 18 | 
 19 | NON_P3_REGIONS = ["ap-southeast-1", "ap-southeast-2", "ap-south-1",
 20 |                   "ca-central-1", "eu-central-1", "eu-west-2", "us-west-1"]
 21 | 
 22 | 
 23 | @pytest.fixture(params=os.environ["TEST_VERSIONS"].split(","))
 24 | def version(request):
 25 |     return request.param
 26 | 
 27 | 
 28 | @pytest.fixture(scope="session")
 29 | def repo(request):
 30 |     return request.config.getoption("--repo") or "sagemaker-tensorflow-serving"
 31 | 
 32 | 
 33 | @pytest.fixture
 34 | def tag(request, version, instance_type):
 35 |     if request.config.getoption("--tag"):
 36 |         return request.config.getoption("--tag")
 37 | 
 38 |     arch = "gpu" if instance_type.startswith("ml.p") else "cpu"
 39 |     return f"{version}-{arch}"
 40 | 
 41 | 
 42 | @pytest.fixture
 43 | def image_uri(registry, region, repo, tag):
 44 |     return util.image_uri(registry, region, repo, tag)
 45 | 
 46 | 
 47 | @pytest.fixture(params=os.environ["TEST_INSTANCE_TYPES"].split(","))
 48 | def instance_type(request, region):
 49 |     return request.param
 50 | 
 51 | 
 52 | @pytest.fixture(scope="module")
 53 | def accelerator_type():
 54 |     return None
 55 | 
 56 | 
 57 | @pytest.fixture(scope="session")
 58 | def tfs_model(region, boto_session):
 59 |     return util.find_or_put_model_data(region,
 60 |                                        boto_session,
 61 |                                        "test/data/tfs-model.tar.gz")
 62 | 
 63 | 
 64 | @pytest.fixture(scope='session')
 65 | def python_model_with_requirements(region, boto_session):
 66 |     return util.find_or_put_model_data(region,
 67 |                                        boto_session,
 68 |                                        "test/data/python-with-requirements.tar.gz")
 69 | 
 70 | 
 71 | @pytest.fixture(scope='session')
 72 | def python_model_with_lib(region, boto_session):
 73 |     return util.find_or_put_model_data(region,
 74 |                                        boto_session,
 75 |                                        "test/data/python-with-lib.tar.gz")
 76 | 
 77 | 
 78 | def test_tfs_model(boto_session, sagemaker_client,
 79 |                    sagemaker_runtime_client, model_name, tfs_model,
 80 |                    image_uri, instance_type, accelerator_type):
 81 |     input_data = {"instances": [1.0, 2.0, 5.0]}
 82 |     util.create_and_invoke_endpoint(boto_session, sagemaker_client,
 83 |                                     sagemaker_runtime_client, model_name, tfs_model,
 84 |                                     image_uri, instance_type, accelerator_type, input_data)
 85 | 
 86 | 
 87 | def test_batch_transform(region, boto_session, sagemaker_client,
 88 |                          model_name, tfs_model, image_uri,
 89 |                          instance_type):
 90 |     results = util.run_batch_transform_job(region=region,
 91 |                                            boto_session=boto_session,
 92 |                                            model_data=tfs_model,
 93 |                                            image_uri=image_uri,
 94 |                                            model_name=model_name,
 95 |                                            sagemaker_client=sagemaker_client,
 96 |                                            instance_type=instance_type)
 97 |     assert len(results) == 10
 98 |     for r in results:
 99 |         assert r == [3.5, 4.0, 5.5]
100 | 
101 | 
102 | def test_python_model_with_requirements(boto_session, sagemaker_client,
103 |                                         sagemaker_runtime_client, model_name,
104 |                                         python_model_with_requirements, image_uri, instance_type,
105 |                                         accelerator_type):
106 | 
107 |     if "p3" in instance_type:
108 |         pytest.skip("skip for p3 instance")
109 | 
110 |     # the python service needs to transform this to get a valid prediction
111 |     input_data = {"x": [1.0, 2.0, 5.0]}
112 |     output_data = util.create_and_invoke_endpoint(boto_session, sagemaker_client,
113 |                                                   sagemaker_runtime_client, model_name,
114 |                                                   python_model_with_requirements, image_uri,
115 |                                                   instance_type, accelerator_type, input_data)
116 | 
117 |     # python service adds this to tfs response
118 |     assert output_data["python"] is True
119 |     assert output_data["pillow"] == "6.0.0"
120 | 
121 | 
122 | def test_python_model_with_lib(boto_session, sagemaker_client,
123 |                                sagemaker_runtime_client, model_name, python_model_with_lib,
124 |                                image_uri, instance_type, accelerator_type):
125 | 
126 |     if "p3" in instance_type:
127 |         pytest.skip("skip for p3 instance")
128 | 
129 |     # the python service needs to transform this to get a valid prediction
130 |     input_data = {"x": [1.0, 2.0, 5.0]}
131 |     output_data = util.create_and_invoke_endpoint(boto_session, sagemaker_client,
132 |                                                   sagemaker_runtime_client, model_name, python_model_with_lib,
133 |                                                   image_uri, instance_type, accelerator_type, input_data)
134 | 
135 |     # python service adds this to tfs response
136 |     assert output_data["python"] is True
137 |     assert output_data["dummy_module"] == "0.1"
138 | 


--------------------------------------------------------------------------------
/docker/2.1/Dockerfile.gpu:
--------------------------------------------------------------------------------
  1 | FROM nvidia/cuda:10.1-base-ubuntu18.04
  2 | 
  3 | LABEL maintainer="Amazon AI"
  4 | LABEL com.amazonaws.sagemaker.capabilities.accept-bind-to-port=true
  5 | 
  6 | ARG PYTHON=python3
  7 | ARG PIP=pip3
  8 | ARG TFS_SHORT_VERSION=2.1
  9 | ARG TFS_URL=https://tensorflow-aws.s3-us-west-2.amazonaws.com/2.1/Serving/GPU/tensorflow_model_server
 10 | 
 11 | ENV NCCL_VERSION=2.4.7-1+cuda10.1
 12 | ENV CUDNN_VERSION=7.6.2.24-1+cuda10.1
 13 | ENV TF_TENSORRT_VERSION=5.0.2
 14 | ENV TF_TENSORRT_LIB_VERSION=6.0.1
 15 | 
 16 | # See http://bugs.python.org/issue19846
 17 | ENV LANG=C.UTF-8
 18 | ENV PYTHONDONTWRITEBYTECODE=1
 19 | # Python won’t try to write .pyc or .pyo files on the import of source modules
 20 | ENV PYTHONUNBUFFERED=1
 21 | ENV SAGEMAKER_TFS_VERSION="${TFS_SHORT_VERSION}"
 22 | ENV PATH="$PATH:/sagemaker"
 23 | ENV MODEL_BASE_PATH=/models
 24 | # The only required piece is the model name in order to differentiate endpoints
 25 | ENV MODEL_NAME=model
 26 | # Fix for the interactive mode during an install in step 21
 27 | ENV DEBIAN_FRONTEND=noninteractive
 28 | 
 29 | # https://forums.developer.nvidia.com/t/notice-cuda-linux-repository-key-rotation/212771
 30 | # Fix cuda repo's GPG key. Nvidia is no longer updating the machine-learning repo.
 31 | # Need to manually pull and install necessary debs to continue using these versions.
 32 | RUN rm /etc/apt/sources.list.d/cuda.list \
 33 | && apt-key del 7fa2af80 \
 34 | && apt-get update && apt-get install -y --no-install-recommends wget \
 35 | && wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64/cuda-keyring_1.0-1_all.deb \
 36 | && dpkg -i cuda-keyring_1.0-1_all.deb \
 37 | && wget https://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1804/x86_64/libcudnn7_${CUDNN_VERSION}_amd64.deb \
 38 | && dpkg -i libcudnn7_${CUDNN_VERSION}_amd64.deb \
 39 | && wget https://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1804/x86_64/libnccl2_${NCCL_VERSION}_amd64.deb \
 40 | && dpkg -i libnccl2_${NCCL_VERSION}_amd64.deb \
 41 | && rm *.deb
 42 | 
 43 | # allow unauthenticated and allow downgrades for special libcublas library
 44 | RUN apt-get update \
 45 |  && apt-get install -y --no-install-recommends --allow-unauthenticated --allow-downgrades\
 46 |     ca-certificates \
 47 |     cuda-command-line-tools-10-1 \
 48 |     cuda-cufft-10-1 \
 49 |     cuda-curand-10-1 \
 50 |     cuda-cusolver-10-1 \
 51 |     cuda-cusparse-10-1 \
 52 |     #cuda-cublas-dev not available with 10-1, install libcublas instead
 53 |     libcublas10=10.1.0.105-1 \
 54 |     libcublas-dev=10.1.0.105-1 \
 55 |     libgomp1 \
 56 |     curl \
 57 |     git \
 58 |     wget \
 59 |     vim \
 60 |     build-essential \
 61 |     zlib1g-dev \
 62 |     python3 \
 63 |     python3-pip \
 64 |     python3-setuptools \
 65 |  && apt-get clean \
 66 |  && rm -rf /var/lib/apt/lists/*
 67 | 
 68 | RUN ${PIP} --no-cache-dir install --upgrade \
 69 |     pip \
 70 |     setuptools
 71 | 
 72 | # Some TF tools expect a "python" binary
 73 | RUN ln -s $(which ${PYTHON}) /usr/local/bin/python
 74 | 
 75 | # nginx + njs
 76 | RUN apt-get update \
 77 |  && apt-get -y install --no-install-recommends \
 78 |     curl \
 79 |     gnupg2 \
 80 |  && curl -s http://nginx.org/keys/nginx_signing.key | apt-key add - \
 81 |  && echo 'deb http://nginx.org/packages/ubuntu/ bionic nginx' >> /etc/apt/sources.list \
 82 |  && apt-get update \
 83 |  && apt-get -y install --no-install-recommends \
 84 |     nginx \
 85 |     nginx-module-njs \
 86 |  && apt-get clean \
 87 |  && rm -rf /var/lib/apt/lists/*
 88 | 
 89 | # https://forums.developer.nvidia.com/t/notice-cuda-linux-repository-key-rotation/212771
 90 | # Nvidia is no longer updating the machine-learning repo.
 91 | # Need to manually pull and install necessary debs to continue using these versions.
 92 | # nvinfer-runtime-trt-repo doesn't have a 1804-cuda10.1 version.
 93 | RUN wget https://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1804/x86_64/nvinfer-runtime-trt-repo-ubuntu1804-${TF_TENSORRT_VERSION}-ga-cuda10.0_1-1_amd64.deb \
 94 |  && dpkg -i nvinfer-runtime-trt-repo-ubuntu1804-${TF_TENSORRT_VERSION}-ga-cuda10.0_1-1_amd64.deb \
 95 |  && wget https://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1804/x86_64/libnvinfer6_${TF_TENSORRT_LIB_VERSION}-1+cuda10.1_amd64.deb \
 96 |  && dpkg -i libnvinfer6_${TF_TENSORRT_LIB_VERSION}-1+cuda10.1_amd64.deb \
 97 |  && rm *.deb \
 98 |  && rm -rf /var/lib/apt/lists/*
 99 | 
100 | # cython, falcon, gunicorn, grpc
101 | RUN ${PIP} install -U --no-cache-dir \
102 |     boto3 \
103 |     awscli \
104 |     cython==0.29.14 \
105 |     falcon==2.0.0 \
106 |     gunicorn==20.0.4 \
107 |     gevent==1.4.0 \
108 |     requests==2.22.0 \
109 |     grpcio==1.27.1  \
110 |     protobuf==3.11.1 \
111 | # using --no-dependencies to avoid installing tensorflow binary
112 |  && ${PIP} install --no-dependencies --no-cache-dir \
113 |     tensorflow-serving-api-gpu==2.1.0
114 | 
115 | COPY ./sagemaker /sagemaker
116 | 
117 | RUN curl $TFS_URL -o /usr/bin/tensorflow_model_server \
118 |  && chmod 555 /usr/bin/tensorflow_model_server
119 | 
120 | # Expose gRPC and REST port
121 | EXPOSE 8500 8501
122 | 
123 | # Set where models should be stored in the container
124 | RUN mkdir -p ${MODEL_BASE_PATH}
125 | 
126 | # Create a script that runs the model server so we can use environment variables
127 | # while also passing in arguments from the docker command line
128 | RUN echo '#!/bin/bash \n\n' > /usr/bin/tf_serving_entrypoint.sh \
129 |  && echo '/usr/bin/tensorflow_model_server --port=8500 --rest_api_port=8501 --model_name=${MODEL_NAME} --model_base_path=${MODEL_BASE_PATH}/${MODEL_NAME} "$@"' >> /usr/bin/tf_serving_entrypoint.sh \
130 |  && chmod +x /usr/bin/tf_serving_entrypoint.sh
131 | 
132 | ADD https://raw.githubusercontent.com/aws/aws-deep-learning-containers-utils/master/deep_learning_container.py /usr/local/bin/deep_learning_container.py
133 | 
134 | RUN chmod +x /usr/local/bin/deep_learning_container.py
135 | 
136 | RUN curl https://aws-dlc-licenses.s3.amazonaws.com/tensorflow-2.1/license.txt -o /license.txt
137 | 
138 | CMD ["/usr/bin/tf_serving_entrypoint.sh"]
139 | 


--------------------------------------------------------------------------------
/docker/1.15/Dockerfile.gpu:
--------------------------------------------------------------------------------
  1 | FROM nvidia/cuda:10.0-base-ubuntu18.04
  2 | 
  3 | LABEL maintainer="Amazon AI"
  4 | # Specify LABEL for inference pipelines to use SAGEMAKER_BIND_TO_PORT
  5 | # https://docs.aws.amazon.com/sagemaker/latest/dg/inference-pipeline-real-time.html
  6 | LABEL com.amazonaws.sagemaker.capabilities.accept-bind-to-port=true
  7 | 
  8 | # Add arguments to achieve the version, python and url
  9 | ARG PYTHON=python3
 10 | ARG PIP=pip3
 11 | ARG TFS_SHORT_VERSION=1.15.2
 12 | ARG TF_MODEL_SERVER_SOURCE=https://tensorflow-aws.s3-us-west-2.amazonaws.com/${TFS_SHORT_VERSION}/Serving/GPU/tensorflow_model_server
 13 | 
 14 | # See http://bugs.python.org/issue19846
 15 | ENV LANG=C.UTF-8
 16 | ENV NCCL_VERSION=2.4.7-1+cuda10.0
 17 | ENV CUDNN_VERSION=7.5.1.10-1+cuda10.0
 18 | ENV TF_TENSORRT_VERSION=5.0.2
 19 | ENV TF_TENSORRT_LIB_VERSION=5.1.2
 20 | ENV PYTHONDONTWRITEBYTECODE=1
 21 | # Python won’t try to write .pyc or .pyo files on the import of source modules
 22 | ENV PYTHONUNBUFFERED=1
 23 | ENV SAGEMAKER_TFS_VERSION="${TFS_SHORT_VERSION}"
 24 | ENV PATH="$PATH:/sagemaker"
 25 | ENV MODEL_BASE_PATH=/models
 26 | # The only required piece is the model name in order to differentiate endpoints
 27 | ENV MODEL_NAME=model
 28 | # Prevent docker build from getting stopped by request for user interaction
 29 | ENV DEBIAN_FRONTEND=noninteractive
 30 | 
 31 | # https://forums.developer.nvidia.com/t/notice-cuda-linux-repository-key-rotation/212771
 32 | # Fix cuda repo's GPG key. Nvidia is no longer updating the machine-learning repo.
 33 | # Need to manually pull and install necessary debs to continue using these versions.
 34 | RUN rm /etc/apt/sources.list.d/cuda.list \
 35 | && apt-key del 7fa2af80 \
 36 | && apt-get update && apt-get install -y --no-install-recommends wget \
 37 | && wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64/cuda-keyring_1.0-1_all.deb \
 38 | && dpkg -i cuda-keyring_1.0-1_all.deb \
 39 | && wget https://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1804/x86_64/libcudnn7_${CUDNN_VERSION}_amd64.deb \
 40 | && dpkg -i libcudnn7_${CUDNN_VERSION}_amd64.deb \
 41 | && wget https://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1804/x86_64/libnccl2_${NCCL_VERSION}_amd64.deb \
 42 | && dpkg -i libnccl2_${NCCL_VERSION}_amd64.deb \
 43 | && rm *.deb
 44 | 
 45 | RUN apt-get update \
 46 |  && apt-get install -y --no-install-recommends \
 47 |     ca-certificates \
 48 |     cuda-command-line-tools-10-0 \
 49 |     cuda-cublas-10-0 \
 50 |     cuda-cufft-10-0 \
 51 |     cuda-curand-10-0 \
 52 |     cuda-cusolver-10-0 \
 53 |     cuda-cusparse-10-0 \
 54 |     libgomp1 \
 55 |     curl \
 56 |     git \
 57 |     wget \
 58 |     vim \
 59 |     python3 \
 60 |     python3-pip \
 61 |     python3-setuptools \
 62 |  && apt-get clean \
 63 |  && rm -rf /var/lib/apt/lists/*
 64 | 
 65 | RUN ${PIP} --no-cache-dir install --upgrade \
 66 |     pip \
 67 |     setuptools
 68 | 
 69 | # Some TF tools expect a "python" binary
 70 | RUN ln -s $(which ${PYTHON}) /usr/local/bin/python \
 71 |  && ln -s /usr/local/bin/pip3 /usr/bin/pip
 72 | 
 73 | # nginx + njs
 74 | RUN apt-get update \
 75 |  && apt-get -y install --no-install-recommends \
 76 |     curl \
 77 |     gnupg2 \
 78 |  && curl -s http://nginx.org/keys/nginx_signing.key | apt-key add - \
 79 |  && echo 'deb http://nginx.org/packages/ubuntu/ bionic nginx' >> /etc/apt/sources.list \
 80 |  && apt-get update \
 81 |  && apt-get -y install --no-install-recommends \
 82 |     nginx \
 83 |     nginx-module-njs \
 84 |  && apt-get clean \
 85 |  && rm -rf /var/lib/apt/lists/*
 86 | 
 87 | # cython, falcon, gunicorn, grpc
 88 | RUN ${PIP} install -U --no-cache-dir \
 89 |     boto3 \
 90 |     awscli==1.18.34 \
 91 |     pyYAML==5.3.1 \
 92 |     cython==0.29.12 \
 93 |     falcon==2.0.0 \
 94 |     gunicorn==19.9.0 \
 95 |     gevent==1.4.0 \
 96 |     requests==2.22.0 \
 97 |     grpcio==1.24.1 \
 98 |     protobuf==3.10.0 \
 99 | # using --no-dependencies to avoid installing tensorflow binary
100 |  && ${PIP} install --no-dependencies --no-cache-dir \
101 |     tensorflow-serving-api-gpu==1.15.0
102 | 
103 | # https://forums.developer.nvidia.com/t/notice-cuda-linux-repository-key-rotation/212771
104 | # Fix cuda repo's GPG key. Nvidia is no longer updating the machine-learning repo.
105 | # Need to manually pull and install necessary debs to continue using these versions.
106 | RUN wget https://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1804/x86_64/nvinfer-runtime-trt-repo-ubuntu1804-${TF_TENSORRT_VERSION}-ga-cuda10.0_1-1_amd64.deb \
107 | && dpkg -i nvinfer-runtime-trt-repo-ubuntu1804-${TF_TENSORRT_VERSION}-ga-cuda10.0_1-1_amd64.deb \
108 | && wget https://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1804/x86_64/libnvinfer5_${TF_TENSORRT_LIB_VERSION}-1+cuda10.0_amd64.deb \
109 | && dpkg -i libnvinfer5_${TF_TENSORRT_LIB_VERSION}-1+cuda10.0_amd64.deb \
110 | && rm *.deb \
111 | && rm -rf /var/lib/apt/lists/* \
112 | && rm /usr/lib/x86_64-linux-gnu/libnvinfer_plugin* \
113 | && rm /usr/lib/x86_64-linux-gnu/libnvcaffe_parser* \
114 | && rm /usr/lib/x86_64-linux-gnu/libnvparsers*
115 | 
116 | COPY sagemaker /sagemaker
117 | 
118 | RUN curl ${TF_MODEL_SERVER_SOURCE} -o /usr/bin/tensorflow_model_server \
119 |  && chmod 555 /usr/bin/tensorflow_model_server
120 | 
121 | # Expose gRPC and REST port
122 | EXPOSE 8500 8501
123 | 
124 | # Set where models should be stored in the container
125 | RUN mkdir -p ${MODEL_BASE_PATH}
126 | 
127 | # Create a script that runs the model server so we can use environment variables
128 | # while also passing in arguments from the docker command line
129 | RUN echo '#!/bin/bash \n\n' > /usr/bin/tf_serving_entrypoint.sh \
130 |  && echo '/usr/bin/tensorflow_model_server --port=8500 --rest_api_port=8501 --model_name=${MODEL_NAME} --model_base_path=${MODEL_BASE_PATH}/${MODEL_NAME} "$@"' >> /usr/bin/tf_serving_entrypoint.sh \
131 |  && chmod +x /usr/bin/tf_serving_entrypoint.sh
132 | 
133 | ADD https://raw.githubusercontent.com/aws/aws-deep-learning-containers-utils/master/deep_learning_container.py /usr/local/bin/deep_learning_container.py
134 | 
135 | RUN chmod +x /usr/local/bin/deep_learning_container.py
136 | 
137 | RUN curl https://aws-dlc-licenses.s3.amazonaws.com/tensorflow/license.txt -o /license.txt
138 | 
139 | CMD ["/usr/bin/tf_serving_entrypoint.sh"]
140 | 


--------------------------------------------------------------------------------
/test/unit/test_deep_learning_container.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the 'License'). You
  4 | # may not use this file except in compliance with the License. A copy of
  5 | # the License is located at
  6 | #
  7 | #     http://aws.amazon.com/apache2.0/
  8 | #
  9 | # or in the 'license' file accompanying this file. This file is
 10 | # distributed on an 'AS IS' BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
 11 | # ANY KIND, either express or implied. See the License for the specific
 12 | # language governing permissions and limitations under the License.
 13 | from __future__ import absolute_import
 14 | 
 15 | import unittest
 16 | 
 17 | from docker.build_artifacts import deep_learning_container as deep_learning_container_to_test
 18 | import pytest
 19 | import requests
 20 | 
 21 | 
 22 | @pytest.fixture(name='fixture_valid_instance_id')
 23 | def fixture_valid_instance_id(requests_mock):
 24 |     return requests_mock.get('http://169.254.169.254/latest/meta-data/instance-id',
 25 |                              text='i-123t32e11s32t1231')
 26 | 
 27 | 
 28 | @pytest.fixture(name='fixture_invalid_instance_id')
 29 | def fixture_invalid_instance_id(requests_mock):
 30 |     return requests_mock.get('http://169.254.169.254/latest/meta-data/instance-id', text='i-123')
 31 | 
 32 | 
 33 | @pytest.fixture(name='fixture_none_instance_id')
 34 | def fixture_none_instance_id(requests_mock):
 35 |     return requests_mock.get('http://169.254.169.254/latest/meta-data/instance-id', text=None)
 36 | 
 37 | 
 38 | @pytest.fixture(name='fixture_invalid_region')
 39 | def fixture_invalid_region(requests_mock):
 40 |     return requests_mock.get('http://169.254.169.254/latest/dynamic/instance-identity/document',
 41 |                              json={'region': 'test'})
 42 | 
 43 | 
 44 | @pytest.fixture(name='fixture_valid_region')
 45 | def fixture_valid_region(requests_mock):
 46 |     return requests_mock.get('http://169.254.169.254/latest/dynamic/instance-identity/document',
 47 |                              json={'region': 'us-east-1'})
 48 | 
 49 | 
 50 | def test_retrieve_instance_id(fixture_valid_instance_id):
 51 |     result = deep_learning_container_to_test._retrieve_instance_id()
 52 |     assert 'i-123t32e11s32t1231' == result
 53 | 
 54 | 
 55 | def test_retrieve_none_instance_id(fixture_none_instance_id):
 56 |     result = deep_learning_container_to_test._retrieve_instance_id()
 57 |     assert result is None
 58 | 
 59 | 
 60 | def test_retrieve_invalid_instance_id(fixture_invalid_instance_id):
 61 |     result = deep_learning_container_to_test._retrieve_instance_id()
 62 |     assert result is None
 63 | 
 64 | 
 65 | def test_retrieve_invalid_region(fixture_invalid_region):
 66 |     result = deep_learning_container_to_test._retrieve_instance_region()
 67 |     assert result is None
 68 | 
 69 | 
 70 | def test_retrieve_valid_region(fixture_valid_region):
 71 |     result = deep_learning_container_to_test._retrieve_instance_region()
 72 |     assert 'us-east-1' == result
 73 | 
 74 | 
 75 | def test_query_bucket(requests_mock, fixture_valid_region, fixture_valid_instance_id):
 76 |     fixture_valid_instance_id.return_value = 'i-123t32e11s32t1231'
 77 |     fixture_valid_region.return_value = 'us-east-1'
 78 |     requests_mock.get(('https://aws-deep-learning-containers-us-east-1.s3.us-east-1.amazonaws.com'
 79 |                        '/dlc-containers.txt?x-instance-id=i-123t32e11s32t1231'),
 80 |                       text='Access Denied')
 81 |     actual_response = deep_learning_container_to_test.query_bucket()
 82 |     assert 'Access Denied' == actual_response.text
 83 | 
 84 | 
 85 | def test_query_bucket_region_none(fixture_invalid_region, fixture_valid_instance_id):
 86 |     fixture_valid_instance_id.return_value = 'i-123t32e11s32t1231'
 87 |     fixture_invalid_region.return_value = None
 88 |     actual_response = deep_learning_container_to_test.query_bucket()
 89 |     assert actual_response is None
 90 | 
 91 | 
 92 | def test_query_bucket_instance_id_none(requests_mock, fixture_valid_region, fixture_none_instance_id):
 93 |     fixture_none_instance_id.return_value = None
 94 |     fixture_valid_region.return_value = 'us-east-1'
 95 |     actual_response = deep_learning_container_to_test.query_bucket()
 96 |     assert actual_response is None
 97 | 
 98 | 
 99 | def test_query_bucket_instance_id_invalid(requests_mock, fixture_valid_region, fixture_invalid_instance_id):
100 |     fixture_invalid_instance_id.return_value = None
101 |     fixture_valid_region.return_value = 'us-east-1'
102 |     actual_response = deep_learning_container_to_test.query_bucket()
103 |     assert actual_response is None
104 | 
105 | 
106 | def test_HTTP_error_on_S3(requests_mock, fixture_valid_region, fixture_valid_instance_id):
107 |     fixture_valid_instance_id.return_value = 'i-123t32e11s32t1231'
108 |     fixture_valid_region.return_value = 'us-east-1'
109 |     query_s3_url = ('https://aws-deep-learning-containers-us-east-1.s3.us-east-1.amazonaws.com'
110 |                     '/dlc-containers.txt?x-instance-id=i-123t32e11s32t1231')
111 | 
112 |     requests_mock.get(
113 |         query_s3_url,
114 |         exc=requests.exceptions.HTTPError)
115 |     requests_mock.side_effect = requests.exceptions.HTTPError
116 | 
117 |     with pytest.raises(requests.exceptions.HTTPError):
118 |         actual_response = requests.get(query_s3_url)
119 |         assert actual_response is None
120 | 
121 | 
122 | def test_connection_error_on_S3(requests_mock, fixture_valid_region, fixture_valid_instance_id):
123 |     fixture_valid_instance_id.return_value = 'i-123t32e11s32t1231'
124 |     fixture_valid_region.return_value = 'us-east-1'
125 |     query_s3_url = ('https://aws-deep-learning-containers-us-east-1.s3.us-east-1.amazonaws.com'
126 |                     '/dlc-containers.txt?x-instance-id=i-123t32e11s32t1231')
127 | 
128 |     requests_mock.get(
129 |         query_s3_url,
130 |         exc=requests.exceptions.ConnectionError)
131 | 
132 |     with pytest.raises(requests.exceptions.ConnectionError):
133 |         actual_response = requests.get(
134 |             query_s3_url)
135 | 
136 |         assert actual_response is None
137 | 
138 | 
139 | def test_timeout_error_on_S3(requests_mock, fixture_valid_region, fixture_valid_instance_id):
140 |     fixture_valid_instance_id.return_value = 'i-123t32e11s32t1231'
141 |     fixture_valid_region.return_value = 'us-east-1'
142 |     query_s3_url = ('https://aws-deep-learning-containers-us-east-1.s3.us-east-1.amazonaws.com'
143 |                     '/dlc-containers.txt?x-instance-id=i-123t32e11s32t1231')
144 | 
145 |     requests_mock.get(
146 |         query_s3_url,
147 |         exc=requests.Timeout)
148 | 
149 |     with pytest.raises(requests.exceptions.Timeout):
150 |         actual_response = requests.get(
151 |             query_s3_url)
152 | 
153 |         assert actual_response is None
154 | 
155 | 
156 | if __name__ == '__main__':
157 |     unittest.main()
158 | 


--------------------------------------------------------------------------------
/docker/build_artifacts/sagemaker/tensorflowServing.js:
--------------------------------------------------------------------------------
  1 | var tfs_base_uri = '/tfs/v1/models/'
  2 | var custom_attributes_header = 'X-Amzn-SageMaker-Custom-Attributes'
  3 | 
  4 | function invocations(r) {
  5 |     var ct = r.headersIn['Content-Type']
  6 | 
  7 |     if ('application/json' == ct || 'application/jsonlines' == ct || 'application/jsons' == ct) {
  8 |         json_request(r)
  9 |     } else if ('text/csv' == ct) {
 10 |         csv_request(r)
 11 |     } else {
 12 |         return_error(r, 415, 'Unsupported Media Type: ' + (ct || 'Unknown'))
 13 |     }
 14 | }
 15 | 
 16 | function ping(r) {
 17 |     var uri = make_tfs_uri(r, false)
 18 | 
 19 |     function callback (reply) {
 20 |         if (reply.status == 200 && reply.responseBody.includes('"AVAILABLE"')) {
 21 |             r.return(200)
 22 |         } else {
 23 |             r.error('failed ping' + reply.responseBody)
 24 |             r.return(502)
 25 |         }
 26 |     }
 27 | 
 28 |     r.subrequest(uri, callback)
 29 | }
 30 | 
 31 | function ping_without_model(r) {
 32 |     // hack for TF 1.11 and MME
 33 |     // for TF 1.11, send an arbitrary fixed request to the default model.
 34 |     // if response is 400, the model is ok (but input was bad), so return 200
 35 |     // for MME, the default model name is None and does not exist
 36 |     // also return 200 in unlikely case our request was really valid
 37 | 
 38 |     var uri = make_tfs_uri(r, true)
 39 |     var options = {
 40 |         method: 'POST',
 41 |         body: '{"instances": "invalid"}'
 42 |     }
 43 | 
 44 |     function callback (reply) {
 45 |         if (reply.status == 200 || reply.status == 400 ||
 46 |         reply.responseBody.includes('Servable not found for request: Latest(None)')) {
 47 |             r.return(200)
 48 |         } else {
 49 |             r.error('failed ping' + reply.responseBody)
 50 |             r.return(502)
 51 |         }
 52 |     }
 53 | 
 54 |     r.subrequest(uri, options, callback)
 55 | }
 56 | 
 57 | function return_error(r, code, message) {
 58 |     if (message) {
 59 |         r.return(code, '{"error": "' + message + '"}')
 60 |     } else {
 61 |         r.return(code)
 62 |     }
 63 | }
 64 | 
 65 | function tfs_json_request(r, json) {
 66 |     var uri = make_tfs_uri(r, true)
 67 |     var options = {
 68 |         method: 'POST',
 69 |         body: json
 70 |     }
 71 | 
 72 |     var accept = r.headersIn.Accept
 73 |     function callback (reply) {
 74 |         var body = reply.responseBody
 75 |         if (reply.status == 400) {
 76 |             // "fix" broken json escaping in \'instances\' message
 77 |             body = body.replace("\\'instances\\'", "'instances'")
 78 |         }
 79 | 
 80 |         if (accept != undefined) {
 81 |             var content_types = accept.trim().replace(" ", "").split(",")
 82 |             if (content_types.includes('application/jsonlines') || content_types.includes('application/json')) {
 83 |                 body = body.replace(/\n/g, '')
 84 |                 r.headersOut['Content-Type'] = content_types[0]
 85 |             }
 86 |         }
 87 |         r.return(reply.status, body)
 88 |     }
 89 | 
 90 |     r.subrequest(uri, options, callback)
 91 | 
 92 | }
 93 | 
 94 | function make_tfs_uri(r, with_method) {
 95 |     var attributes = parse_custom_attributes(r)
 96 | 
 97 |     var uri = tfs_base_uri + attributes['tfs-model-name']
 98 |     if ('tfs-model-version' in attributes) {
 99 |         uri += '/versions/' + attributes['tfs-model-version']
100 |     }
101 | 
102 |     if (with_method) {
103 |         uri += ':' + (attributes['tfs-method'] || 'predict')
104 |     }
105 | 
106 |     return uri
107 | }
108 | 
109 | function parse_custom_attributes(r) {
110 |     var attributes = {}
111 |     var kv_pattern = /tfs-[a-z\-]+=[^,]+/g
112 |     var header = r.headersIn[custom_attributes_header]
113 |     if (header) {
114 |         var matches = header.match(kv_pattern)
115 |         if (matches) {
116 |             for (var i = 0; i < matches.length; i++) {
117 |                 var kv = matches[i].split('=')
118 |                 if (kv.length === 2) {
119 |                     attributes[kv[0]] = kv[1]
120 |                 }
121 |             }
122 |         }
123 |     }
124 | 
125 |     // for MME invocations, tfs-model-name is in the uri, or use default_tfs_model
126 |     if (!attributes['tfs-model-name']) {
127 |         var uri_pattern = /\/models\/[^,]+\/invoke/g
128 |         var model_name = r.uri.match(uri_pattern)
129 |         if (model_name[0]) {
130 |             model_name = r.uri.replace('/models/', '').replace('/invoke', '')
131 |             attributes['tfs-model-name'] = model_name
132 |         } else {
133 |             attributes['tfs-model-name'] = r.variables.default_tfs_model
134 |         }
135 |     }
136 | 
137 |     return attributes
138 | }
139 | 
140 | function json_request(r) {
141 |     var data = r.requestBody
142 | 
143 |     if (is_tfs_json(data)) {
144 |         tfs_json_request(r, data)
145 |     } else if (is_json_lines(data)) {
146 |         json_lines_request(r, data)
147 |     } else {
148 |         generic_json_request(r, data)
149 |     }
150 | }
151 | 
152 | function is_tfs_json(data) {
153 |     return /"(instances|inputs|examples)"\s*:/.test(data)
154 | }
155 | 
156 | function is_json_lines(data) {
157 |     // objects separated only by (optional) whitespace means jsons/json-lines
158 |     return /[}\]]\s*[\[{]/.test(data)
159 | }
160 | 
161 | function generic_json_request(r, data) {
162 |     if (! /^\s*\[\s*\[/.test(data)) {
163 |         data = '[' + data + ']'
164 |     }
165 | 
166 |     var json = '{"instances":' + data + '}'
167 |     tfs_json_request(r, json)
168 | }
169 | 
170 | function json_lines_request(r, data) {
171 |     var lines = data.trim().split(/\r?\n/)
172 |     var builder = []
173 |     builder.push('{"instances":')
174 |     if (lines.length != 1) {
175 |         builder.push('[')
176 |     }
177 | 
178 |     for (var i = 0; i < lines.length; i++) {
179 |         var line = lines[i].trim()
180 |         if (line) {
181 |             var instance = (i == 0) ? '' : ','
182 |             instance += line
183 |             builder.push(instance)
184 |         }
185 |     }
186 | 
187 |     builder.push(lines.length == 1 ? '}' : ']}')
188 |     tfs_json_request(r, builder.join(''))
189 | }
190 | 
191 | function csv_request(r) {
192 |     var data = r.requestBody
193 |     // look for initial quote or numeric-only data in 1st field
194 |     var needs_quotes = data.search(/^\s*("|[\d.Ee+\-]+.*)/) != 0
195 |     var lines = data.trim().split(/\r?\n/)
196 |     var builder = []
197 |     builder.push('{"instances":[')
198 | 
199 |     for (var i = 0; i < lines.length; i++) {
200 |         var line = lines[i].trim()
201 |         if (line) {
202 |             var line_builder = []
203 |             // Only wrap line in brackets if there are multiple columns.
204 |             // If there's only one column and it has a string with a comma,
205 |             // the input will be wrapped in an extra set of brackets.
206 |             var has_multiple_columns = line.search(',') != -1
207 | 
208 |             if (has_multiple_columns) {
209 |                 line_builder.push('[')
210 |             }
211 | 
212 |             if (needs_quotes) {
213 |                 line_builder.push('"')
214 |                 line_builder.push(line.replace('"', '\\"').replace(',', '","'))
215 |                 line_builder.push('"')
216 |             } else {
217 |                 line_builder.push(line)
218 |             }
219 | 
220 |             if (has_multiple_columns) {
221 |                 line_builder.push(']')
222 |             }
223 | 
224 |             var json_line = line_builder.join('')
225 |             builder.push(json_line)
226 | 
227 |             if (i != lines.length - 1)
228 |                 builder.push(',')
229 |         }
230 |     }
231 | 
232 |     builder.push(']}')
233 |     tfs_json_request(r, builder.join(''))
234 | }
235 | 
236 | export default {invocations, ping, ping_without_model, return_error,
237 |     tfs_json_request, make_tfs_uri, parse_custom_attributes,
238 |     json_request, is_tfs_json, is_json_lines, generic_json_request,
239 |     json_lines_request, csv_request};
240 | 


--------------------------------------------------------------------------------
/test/integration/local/test_multi_model_endpoint.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2019-2020 Amazon.com, Inc. or its affiliates. All Rights Reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License"). You
  4 | # may not use this file except in compliance with the License. A copy of
  5 | # the License is located at
  6 | #
  7 | #     http://aws.amazon.com/apache2.0/
  8 | #
  9 | # or in the "license" file accompanying this file. This file is
 10 | # distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
 11 | # ANY KIND, either express or implied. See the License for the specific
 12 | # language governing permissions and limitations under the License.
 13 | 
 14 | import json
 15 | import os
 16 | import subprocess
 17 | import sys
 18 | import time
 19 | 
 20 | import pytest
 21 | import requests
 22 | 
 23 | from multi_model_endpoint_test_utils import (
 24 |     make_invocation_request,
 25 |     make_list_model_request,
 26 |     make_load_model_request,
 27 |     make_unload_model_request,
 28 | )
 29 | 
 30 | PING_URL = "http://localhost:8080/ping"
 31 | 
 32 | 
 33 | @pytest.fixture(scope="session", autouse=True)
 34 | def volume():
 35 |     try:
 36 |         model_dir = os.path.abspath("test/resources/mme")
 37 |         subprocess.check_call(
 38 |            "docker volume create --name dynamic_endpoint_model_volume --opt type=none "
 39 |            "--opt device={} --opt o=bind".format(model_dir).split())
 40 |         yield model_dir
 41 |     finally:
 42 |         subprocess.check_call("docker volume rm dynamic_endpoint_model_volume".split())
 43 | 
 44 | 
 45 | @pytest.fixture(scope="module", autouse=True)
 46 | def container(request, docker_base_name, tag, runtime_config):
 47 |     try:
 48 |         command = (
 49 |             "docker run {}--name sagemaker-tensorflow-serving-test -p 8080:8080"
 50 |             " --mount type=volume,source=dynamic_endpoint_model_volume,target=/opt/ml/models,readonly"
 51 |             " -e SAGEMAKER_TFS_NGINX_LOGLEVEL=info"
 52 |             " -e SAGEMAKER_BIND_TO_PORT=8080"
 53 |             " -e SAGEMAKER_SAFE_PORT_RANGE=9000-9999"
 54 |             " -e SAGEMAKER_MULTI_MODEL=true"
 55 |             " {}:{} serve"
 56 |         ).format(runtime_config, docker_base_name, tag)
 57 | 
 58 |         proc = subprocess.Popen(command.split(), stdout=sys.stdout, stderr=subprocess.STDOUT)
 59 | 
 60 |         attempts = 0
 61 |         while attempts < 40:
 62 |             time.sleep(3)
 63 |             try:
 64 |                 res_code = requests.get("http://localhost:8080/ping").status_code
 65 |                 if res_code == 200:
 66 |                     break
 67 |             except:
 68 |                 attempts += 1
 69 |                 pass
 70 | 
 71 |         yield proc.pid
 72 |     finally:
 73 |         subprocess.check_call("docker rm -f sagemaker-tensorflow-serving-test".split())
 74 | 
 75 | 
 76 | @pytest.mark.skip_gpu
 77 | def test_ping():
 78 |     res = requests.get(PING_URL)
 79 |     assert res.status_code == 200
 80 | 
 81 | 
 82 | @pytest.mark.skip_gpu
 83 | def test_container_start_invocation_fail():
 84 |     x = {
 85 |         "instances": [1.0, 2.0, 5.0]
 86 |     }
 87 |     code, y = make_invocation_request(json.dumps(x), "half_plus_three")
 88 |     y = json.loads(y)
 89 |     assert code == 404
 90 |     assert "Model half_plus_three is not loaded yet." in str(y)
 91 | 
 92 | 
 93 | @pytest.mark.skip_gpu
 94 | def test_list_models_empty():
 95 |     code, res = make_list_model_request()
 96 |     res = json.loads(res)
 97 |     assert code == 200
 98 |     assert len(res) == 0
 99 | 
100 | 
101 | @pytest.mark.skip_gpu
102 | def test_delete_unloaded_model():
103 |     # unloads the given model/version, no-op if not loaded
104 |     model_name = "non-existing-model"
105 |     code, res = make_unload_model_request(model_name)
106 |     assert code == 404
107 |     assert "Model {} is not loaded yet".format(model_name) in res
108 | 
109 | 
110 | @pytest.mark.skip_gpu
111 | def test_delete_model():
112 |     model_name = "half_plus_two"
113 |     model_data = {
114 |         "model_name": model_name,
115 |         "url": "/opt/ml/models/half_plus_two"
116 |     }
117 |     code, res = make_load_model_request(json.dumps(model_data))
118 |     assert code == 200
119 |     assert "Successfully loaded model {}".format(model_name) in res
120 | 
121 |     x = {
122 |         "instances": [1.0, 2.0, 5.0]
123 |     }
124 |     _, y = make_invocation_request(json.dumps(x), model_name)
125 |     y = json.loads(y)
126 |     assert y == {"predictions": [2.5, 3.0, 4.5]}
127 | 
128 |     code_unload, res2 = make_unload_model_request(model_name)
129 |     assert code_unload == 200
130 | 
131 |     code_invoke, y2 = make_invocation_request(json.dumps(x), model_name)
132 |     y2 = json.loads(y2)
133 |     assert code_invoke == 404
134 |     assert "Model {} is not loaded yet.".format(model_name) in str(y2)
135 | 
136 | 
137 | @pytest.mark.skip_gpu
138 | def test_load_two_models():
139 |     model_name_1 = "half_plus_two"
140 |     model_data_1 = {
141 |         "model_name": model_name_1,
142 |         "url": "/opt/ml/models/half_plus_two"
143 |     }
144 |     code1, res1 = make_load_model_request(json.dumps(model_data_1))
145 |     assert code1 == 200
146 |     assert "Successfully loaded model {}".format(model_name_1) in res1
147 | 
148 |     # load second model
149 |     model_name_2 = "half_plus_three"
150 |     model_data_2 = {
151 |         "model_name": model_name_2,
152 |         "url": "/opt/ml/models/half_plus_three"
153 |     }
154 |     code2, res2 = make_load_model_request(json.dumps(model_data_2))
155 |     assert code2 == 200
156 |     assert "Successfully loaded model {}".format(model_name_2) in res2
157 | 
158 |     # make invocation request to the first model
159 |     x = {
160 |         "instances": [1.0, 2.0, 5.0]
161 |     }
162 |     code_invoke1, y1 = make_invocation_request(json.dumps(x), model_name_1)
163 |     y1 = json.loads(y1)
164 |     assert code_invoke1 == 200
165 |     assert y1 == {"predictions": [2.5, 3.0, 4.5]}
166 | 
167 |     # make invocation request to the second model
168 |     for ver in ("123", "124"):
169 |         code_invoke2, y2 = make_invocation_request(json.dumps(x), "half_plus_three", version=ver)
170 |         y2 = json.loads(y2)
171 |         assert code_invoke2 == 200
172 |         assert y2 == {"predictions": [3.5, 4.0, 5.5]}
173 | 
174 |     code_list, res3 = make_list_model_request()
175 |     res3 = json.loads(res3)
176 |     assert len(res3) == 2
177 | 
178 | 
179 | @pytest.mark.skip_gpu
180 | def test_load_one_model_two_times():
181 |     model_name = "cifar"
182 |     model_data = {
183 |         "model_name": model_name,
184 |         "url": "/opt/ml/models/cifar"
185 |     }
186 |     code_load, res = make_load_model_request(json.dumps(model_data))
187 |     assert code_load == 200
188 |     assert "Successfully loaded model {}".format(model_name) in res
189 | 
190 |     code_load2, res2 = make_load_model_request(json.dumps(model_data))
191 |     assert code_load2 == 409
192 |     assert "Model {} is already loaded".format(model_name) in res2
193 | 
194 | 
195 | @pytest.mark.skip_gpu
196 | def test_load_non_existing_model():
197 |     model_name = "non-existing"
198 |     base_path = "/opt/ml/models/non-existing"
199 |     model_data = {
200 |         "model_name": model_name,
201 |         "url": base_path
202 |     }
203 |     code, res = make_load_model_request(json.dumps(model_data))
204 |     assert code == 404
205 |     assert "Could not find valid base path {} for servable {}".format(base_path, model_name) in str(res)
206 | 
207 | 
208 | @pytest.mark.skip_gpu
209 | def test_bad_model_reqeust():
210 |     bad_model_data = {
211 |         "model_name": "model_name",
212 |         "uri": "/opt/ml/models/non-existing"
213 |     }
214 |     code, _ = make_load_model_request(json.dumps(bad_model_data))
215 |     assert code == 500
216 | 
217 | 
218 | @pytest.mark.skip_gpu
219 | def test_invalid_model_version():
220 |     model_name = "invalid_version"
221 |     base_path = "/opt/ml/models/invalid_version"
222 |     invalid_model_version_data = {
223 |         "model_name": model_name,
224 |         "url": base_path
225 |     }
226 |     code, res = make_load_model_request(json.dumps(invalid_model_version_data))
227 |     assert code == 404
228 |     assert "Could not find valid base path {} for servable {}".format(base_path, model_name) in str(res)
229 | 


--------------------------------------------------------------------------------