├── docker ├── __init__.py ├── build_artifacts │ ├── __init__.py │ ├── sagemaker │ │ ├── serve │ │ ├── __init__.py │ │ ├── nginx.conf.template │ │ ├── multi_model_utils.py │ │ └── tensorflowServing.js │ ├── dockerd-entrypoint.py │ └── deep_learning_container.py ├── 1.11 │ ├── Dockerfile.eia │ ├── Dockerfile.cpu │ └── Dockerfile.gpu ├── 1.12 │ ├── Dockerfile.eia │ ├── Dockerfile.cpu │ └── Dockerfile.gpu ├── 1.13 │ ├── Dockerfile.eia │ ├── Dockerfile.cpu │ └── Dockerfile.gpu ├── 1.14 │ ├── Dockerfile.cpu │ ├── Dockerfile.eia │ └── Dockerfile.gpu ├── 2.0 │ ├── Dockerfile.cpu │ ├── Dockerfile.eia │ └── Dockerfile.gpu ├── 2.1 │ ├── Dockerfile.cpu │ └── Dockerfile.gpu └── 1.15 │ ├── Dockerfile.cpu │ ├── Dockerfile.eia │ └── Dockerfile.gpu ├── VERSION ├── test ├── resources │ ├── inputs │ │ ├── test-generic.json │ │ ├── test.csv │ │ ├── test-gcloud.jsons │ │ └── test.json │ ├── mme │ │ ├── invalid_version │ │ │ └── abcde │ │ │ │ └── dummy.txt │ │ ├── half_plus_three │ │ │ ├── 00000123 │ │ │ │ ├── assets │ │ │ │ │ └── foo.txt │ │ │ │ ├── variables │ │ │ │ │ ├── variables.data-00000-of-00001 │ │ │ │ │ └── variables.index │ │ │ │ └── saved_model.pb │ │ │ └── 00000124 │ │ │ │ ├── assets │ │ │ │ └── foo.txt │ │ │ │ ├── variables │ │ │ │ ├── variables.data-00000-of-00001 │ │ │ │ └── variables.index │ │ │ │ └── saved_model.pb │ │ ├── half_plus_two │ │ │ └── 00000123 │ │ │ │ ├── variables │ │ │ │ ├── variables.data-00000-of-00001 │ │ │ │ └── variables.index │ │ │ │ └── saved_model.pb │ │ └── cifar │ │ │ └── 1540855709 │ │ │ ├── saved_model.pb │ │ │ └── variables │ │ │ ├── variables.index │ │ │ └── variables.data-00000-of-00001 │ ├── examples │ │ ├── test5 │ │ │ ├── requirements.txt │ │ │ ├── lib │ │ │ │ └── dummy_module │ │ │ │ │ └── __init__.py │ │ │ └── inference.py │ │ ├── test3 │ │ │ ├── requirements.txt │ │ │ └── inference.py │ │ ├── test4 │ │ │ ├── lib │ │ │ │ └── dummy_module │ │ │ │ │ └── __init__.py │ │ │ └── inference.py │ │ ├── test2 │ │ │ └── inference.py │ │ └── test1 │ │ │ └── inference.py │ ├── models │ │ └── half_plus_three │ │ │ ├── .00000111 │ │ │ └── .hidden_file │ │ │ ├── 00000123 │ │ │ ├── assets │ │ │ │ └── foo.txt │ │ │ ├── variables │ │ │ │ ├── variables.data-00000-of-00001 │ │ │ │ └── variables.index │ │ │ └── saved_model.pb │ │ │ └── 00000124 │ │ │ ├── assets │ │ │ └── foo.txt │ │ │ ├── variables │ │ │ ├── variables.data-00000-of-00001 │ │ │ └── variables.index │ │ │ └── saved_model.pb │ └── mme_universal_script │ │ ├── code │ │ ├── requirements.txt │ │ └── inference.py │ │ └── half_plus_three │ │ └── model │ │ └── half_plus_three │ │ ├── 00000123 │ │ ├── assets │ │ │ └── foo.txt │ │ ├── variables │ │ │ ├── variables.data-00000-of-00001 │ │ │ └── variables.index │ │ └── saved_model.pb │ │ └── 00000124 │ │ ├── assets │ │ └── foo.txt │ │ ├── variables │ │ ├── variables.data-00000-of-00001 │ │ └── variables.index │ │ └── saved_model.pb ├── data │ ├── tfs-model.tar.gz │ ├── python-with-lib.tar.gz │ ├── python-with-requirements.tar.gz │ └── batch.csv ├── perf │ ├── delete-endpoint.sh │ ├── create-endpoint.sh │ ├── create-model.sh │ ├── ab.sh │ ├── ec2-perftest.sh │ ├── perftest_endpoint.py │ └── data_generator.py ├── conftest.py ├── integration │ ├── local │ │ ├── conftest.py │ │ ├── multi_model_endpoint_test_utils.py │ │ ├── test_tfs_batching.py │ │ ├── test_multi_tfs.py │ │ ├── test_nginx_config.py │ │ ├── test_pre_post_processing_mme.py │ │ ├── test_pre_post_processing.py │ │ └── test_multi_model_endpoint.py │ └── sagemaker │ │ ├── test_ei.py │ │ ├── conftest.py │ │ └── test_tfs.py └── unit │ ├── test_proxy_client.py │ └── test_deep_learning_container.py ├── .jshintrc ├── .gitignore ├── NOTICE ├── branding └── icon │ └── sagemaker-banner.png ├── .github ├── PULL_REQUEST_TEMPLATE.md └── ISSUE_TEMPLATE │ ├── config.yml │ ├── documentation-request.md │ ├── feature_request.md │ └── bug_report.md ├── scripts ├── stop.sh ├── build-all.sh ├── publish-all.sh ├── start.sh ├── publish.sh ├── curl.sh ├── build.sh └── shared.sh ├── CODE_OF_CONDUCT.md ├── tox.ini ├── .pylintrc └── CONTRIBUTING.md /docker/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /VERSION: -------------------------------------------------------------------------------- 1 | 1.8.5.dev0 2 | -------------------------------------------------------------------------------- /docker/build_artifacts/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /test/resources/inputs/test-generic.json: -------------------------------------------------------------------------------- 1 | [1.0,2.0,5.0] -------------------------------------------------------------------------------- /test/resources/mme/invalid_version/abcde/dummy.txt: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /.jshintrc: -------------------------------------------------------------------------------- 1 | { 2 | "asi": true, 3 | "esversion": 6 4 | } 5 | -------------------------------------------------------------------------------- /test/resources/examples/test5/requirements.txt: -------------------------------------------------------------------------------- 1 | Pillow>=6.2.2 -------------------------------------------------------------------------------- /test/resources/models/half_plus_three/.00000111/.hidden_file: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /test/resources/examples/test3/requirements.txt: -------------------------------------------------------------------------------- 1 | Pillow>=6.2.2 2 | -------------------------------------------------------------------------------- /test/resources/mme_universal_script/code/requirements.txt: -------------------------------------------------------------------------------- 1 | Pillow>=6.2.2 -------------------------------------------------------------------------------- /test/resources/mme/half_plus_three/00000123/assets/foo.txt: -------------------------------------------------------------------------------- 1 | asset-file-contents -------------------------------------------------------------------------------- /test/resources/mme/half_plus_three/00000124/assets/foo.txt: -------------------------------------------------------------------------------- 1 | asset-file-contents -------------------------------------------------------------------------------- /test/resources/examples/test4/lib/dummy_module/__init__.py: -------------------------------------------------------------------------------- 1 | __version__ = '0.1' 2 | -------------------------------------------------------------------------------- /test/resources/examples/test5/lib/dummy_module/__init__.py: -------------------------------------------------------------------------------- 1 | __version__ = '0.1' 2 | -------------------------------------------------------------------------------- /test/resources/models/half_plus_three/00000123/assets/foo.txt: -------------------------------------------------------------------------------- 1 | asset-file-contents -------------------------------------------------------------------------------- /test/resources/models/half_plus_three/00000124/assets/foo.txt: -------------------------------------------------------------------------------- 1 | asset-file-contents 2 | -------------------------------------------------------------------------------- /docker/build_artifacts/sagemaker/serve: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | python3 /sagemaker/serve.py 4 | -------------------------------------------------------------------------------- /test/resources/mme/half_plus_two/00000123/variables/variables.data-00000-of-00001: -------------------------------------------------------------------------------- 1 | ?@@@ -------------------------------------------------------------------------------- /test/resources/mme/half_plus_three/00000123/variables/variables.data-00000-of-00001: -------------------------------------------------------------------------------- 1 | ?@@@@ -------------------------------------------------------------------------------- /test/resources/mme/half_plus_three/00000124/variables/variables.data-00000-of-00001: -------------------------------------------------------------------------------- 1 | ?@@@@ -------------------------------------------------------------------------------- /test/resources/models/half_plus_three/00000123/variables/variables.data-00000-of-00001: -------------------------------------------------------------------------------- 1 | ?@@@@ -------------------------------------------------------------------------------- /test/resources/models/half_plus_three/00000124/variables/variables.data-00000-of-00001: -------------------------------------------------------------------------------- 1 | ?@@@@ -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | __pycache__ 2 | .tox/ 3 | log.txt 4 | .idea/ 5 | node_modules/ 6 | package.json 7 | package-lock.json 8 | -------------------------------------------------------------------------------- /test/resources/mme_universal_script/half_plus_three/model/half_plus_three/00000123/assets/foo.txt: -------------------------------------------------------------------------------- 1 | asset-file-contents -------------------------------------------------------------------------------- /test/resources/mme_universal_script/half_plus_three/model/half_plus_three/00000124/assets/foo.txt: -------------------------------------------------------------------------------- 1 | asset-file-contents 2 | -------------------------------------------------------------------------------- /NOTICE: -------------------------------------------------------------------------------- 1 | Sagemaker TensorFlow Serving Container 2 | Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. 3 | -------------------------------------------------------------------------------- /test/data/tfs-model.tar.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws/sagemaker-tensorflow-serving-container/HEAD/test/data/tfs-model.tar.gz -------------------------------------------------------------------------------- /test/resources/mme_universal_script/half_plus_three/model/half_plus_three/00000123/variables/variables.data-00000-of-00001: -------------------------------------------------------------------------------- 1 | ?@@@@ -------------------------------------------------------------------------------- /test/resources/mme_universal_script/half_plus_three/model/half_plus_three/00000124/variables/variables.data-00000-of-00001: -------------------------------------------------------------------------------- 1 | ?@@@@ -------------------------------------------------------------------------------- /test/data/python-with-lib.tar.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws/sagemaker-tensorflow-serving-container/HEAD/test/data/python-with-lib.tar.gz -------------------------------------------------------------------------------- /branding/icon/sagemaker-banner.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws/sagemaker-tensorflow-serving-container/HEAD/branding/icon/sagemaker-banner.png -------------------------------------------------------------------------------- /test/data/python-with-requirements.tar.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws/sagemaker-tensorflow-serving-container/HEAD/test/data/python-with-requirements.tar.gz -------------------------------------------------------------------------------- /test/resources/mme/cifar/1540855709/saved_model.pb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws/sagemaker-tensorflow-serving-container/HEAD/test/resources/mme/cifar/1540855709/saved_model.pb -------------------------------------------------------------------------------- /test/resources/mme/half_plus_two/00000123/saved_model.pb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws/sagemaker-tensorflow-serving-container/HEAD/test/resources/mme/half_plus_two/00000123/saved_model.pb -------------------------------------------------------------------------------- /test/resources/mme/half_plus_three/00000123/saved_model.pb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws/sagemaker-tensorflow-serving-container/HEAD/test/resources/mme/half_plus_three/00000123/saved_model.pb -------------------------------------------------------------------------------- /test/resources/mme/half_plus_three/00000124/saved_model.pb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws/sagemaker-tensorflow-serving-container/HEAD/test/resources/mme/half_plus_three/00000124/saved_model.pb -------------------------------------------------------------------------------- /test/resources/inputs/test.csv: -------------------------------------------------------------------------------- 1 | 1.0,2.0,5.0 2 | 1.0,2.0,5.0 3 | 1.0,2.0,5.0 4 | 1.0,2.0,5.0 5 | 1.0,2.0,5.0 6 | 1.0,2.0,5.0 7 | 1.0,2.0,5.0 8 | 1.0,2.0,5.0 9 | 1.0,2.0,5.0 10 | 1.0,2.0,5.0 11 | -------------------------------------------------------------------------------- /test/resources/mme/cifar/1540855709/variables/variables.index: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws/sagemaker-tensorflow-serving-container/HEAD/test/resources/mme/cifar/1540855709/variables/variables.index -------------------------------------------------------------------------------- /test/resources/models/half_plus_three/00000123/saved_model.pb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws/sagemaker-tensorflow-serving-container/HEAD/test/resources/models/half_plus_three/00000123/saved_model.pb -------------------------------------------------------------------------------- /test/resources/models/half_plus_three/00000124/saved_model.pb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws/sagemaker-tensorflow-serving-container/HEAD/test/resources/models/half_plus_three/00000124/saved_model.pb -------------------------------------------------------------------------------- /test/data/batch.csv: -------------------------------------------------------------------------------- 1 | 1.0, 2.0, 5.0 2 | 1.0, 2.0, 5.0 3 | 1.0, 2.0, 5.0 4 | 1.0, 2.0, 5.0 5 | 1.0, 2.0, 5.0 6 | 1.0, 2.0, 5.0 7 | 1.0, 2.0, 5.0 8 | 1.0, 2.0, 5.0 9 | 1.0, 2.0, 5.0 10 | 1.0, 2.0, 5.0 11 | -------------------------------------------------------------------------------- /test/resources/mme/half_plus_two/00000123/variables/variables.index: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws/sagemaker-tensorflow-serving-container/HEAD/test/resources/mme/half_plus_two/00000123/variables/variables.index -------------------------------------------------------------------------------- /test/resources/mme/half_plus_three/00000123/variables/variables.index: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws/sagemaker-tensorflow-serving-container/HEAD/test/resources/mme/half_plus_three/00000123/variables/variables.index -------------------------------------------------------------------------------- /test/resources/mme/half_plus_three/00000124/variables/variables.index: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws/sagemaker-tensorflow-serving-container/HEAD/test/resources/mme/half_plus_three/00000124/variables/variables.index -------------------------------------------------------------------------------- /test/resources/models/half_plus_three/00000123/variables/variables.index: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws/sagemaker-tensorflow-serving-container/HEAD/test/resources/models/half_plus_three/00000123/variables/variables.index -------------------------------------------------------------------------------- /test/resources/models/half_plus_three/00000124/variables/variables.index: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws/sagemaker-tensorflow-serving-container/HEAD/test/resources/models/half_plus_three/00000124/variables/variables.index -------------------------------------------------------------------------------- /.github/PULL_REQUEST_TEMPLATE.md: -------------------------------------------------------------------------------- 1 | *Issue #, if available:* 2 | 3 | *Description of changes:* 4 | 5 | 6 | By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. 7 | -------------------------------------------------------------------------------- /test/resources/mme/cifar/1540855709/variables/variables.data-00000-of-00001: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws/sagemaker-tensorflow-serving-container/HEAD/test/resources/mme/cifar/1540855709/variables/variables.data-00000-of-00001 -------------------------------------------------------------------------------- /test/perf/delete-endpoint.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | endpoint=${1-'sagemaker-tensorflow-serving-cpu-c5-xlarge'} 4 | aws sagemaker delete-endpoint --endpoint-name $endpoint 5 | aws sagemaker delete-endpoint-config --endpoint-config-name $endpoint -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/config.yml: -------------------------------------------------------------------------------- 1 | blank_issues_enabled: false 2 | contact_links: 3 | - name: Ask a question 4 | url: https://stackoverflow.com/questions/tagged/amazon-sagemaker 5 | about: Use Stack Overflow to ask and answer questions 6 | -------------------------------------------------------------------------------- /scripts/stop.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # 3 | # Stop a local docker container. 4 | 5 | set -euo pipefail 6 | 7 | source scripts/shared.sh 8 | 9 | parse_std_args "$@" 10 | 11 | docker kill $(docker ps -q --filter ancestor=$repository:$full_version-$device) 12 | -------------------------------------------------------------------------------- /test/resources/mme_universal_script/half_plus_three/model/half_plus_three/00000123/saved_model.pb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws/sagemaker-tensorflow-serving-container/HEAD/test/resources/mme_universal_script/half_plus_three/model/half_plus_three/00000123/saved_model.pb -------------------------------------------------------------------------------- /test/resources/mme_universal_script/half_plus_three/model/half_plus_three/00000124/saved_model.pb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws/sagemaker-tensorflow-serving-container/HEAD/test/resources/mme_universal_script/half_plus_three/model/half_plus_three/00000124/saved_model.pb -------------------------------------------------------------------------------- /test/resources/mme_universal_script/half_plus_three/model/half_plus_three/00000123/variables/variables.index: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws/sagemaker-tensorflow-serving-container/HEAD/test/resources/mme_universal_script/half_plus_three/model/half_plus_three/00000123/variables/variables.index -------------------------------------------------------------------------------- /test/resources/mme_universal_script/half_plus_three/model/half_plus_three/00000124/variables/variables.index: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws/sagemaker-tensorflow-serving-container/HEAD/test/resources/mme_universal_script/half_plus_three/model/half_plus_three/00000124/variables/variables.index -------------------------------------------------------------------------------- /test/resources/inputs/test-gcloud.jsons: -------------------------------------------------------------------------------- 1 | {"x": [1.0,2.0,5.0]} 2 | {"x": [1.0,2.0,5.0]} 3 | {"x": [1.0,2.0,5.0]} 4 | {"x": [1.0,2.0,5.0]} 5 | {"x": [1.0,2.0,5.0]} 6 | {"x": [1.0,2.0,5.0]} 7 | {"x": [1.0,2.0,5.0]} 8 | {"x": [1.0,2.0,5.0]} 9 | {"x": [1.0,2.0,5.0]} 10 | {"x": [1.0,2.0,5.0]} 11 | -------------------------------------------------------------------------------- /test/resources/inputs/test.json: -------------------------------------------------------------------------------- 1 | { 2 | "instances": [ 3 | [1.0,2.0,5.0], 4 | [1.0,2.0,5.0], 5 | [1.0,2.0,5.0], 6 | [1.0,2.0,5.0], 7 | [1.0,2.0,5.0], 8 | [1.0,2.0,5.0], 9 | [1.0,2.0,5.0], 10 | [1.0,2.0,5.0], 11 | [1.0,2.0,5.0], 12 | [1.0,2.0,5.0] 13 | ] 14 | } -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | ## Code of Conduct 2 | This project has adopted the [Amazon Open Source Code of Conduct](https://aws.github.io/code-of-conduct). 3 | For more information see the [Code of Conduct FAQ](https://aws.github.io/code-of-conduct-faq) or contact 4 | opensource-codeofconduct@amazon.com with any additional questions or comments. 5 | -------------------------------------------------------------------------------- /scripts/build-all.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # 3 | # Build all the docker images. 4 | 5 | set -euo pipefail 6 | 7 | DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" 8 | 9 | ${DIR}/build.sh --version 1.14.0 --arch eia 10 | ${DIR}/build.sh --version 1.15.0 --arch cpu 11 | ${DIR}/build.sh --version 1.15.0 --arch gpu 12 | ${DIR}/build.sh --version 2.1.0 --arch cpu 13 | ${DIR}/build.sh --version 2.1.0 --arch gpu 14 | -------------------------------------------------------------------------------- /scripts/publish-all.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # 3 | # Publish all images to your ECR account. 4 | 5 | set -euo pipefail 6 | 7 | DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" 8 | 9 | ${DIR}/publish.sh --version 1.14.0 --arch eia 10 | ${DIR}/publish.sh --version 1.15.0 --arch cpu 11 | ${DIR}/publish.sh --version 1.15.0 --arch gpu 12 | ${DIR}/publish.sh --version 2.1.0 --arch cpu 13 | ${DIR}/publish.sh --version 2.1.0 --arch gpu 14 | -------------------------------------------------------------------------------- /test/conftest.py: -------------------------------------------------------------------------------- 1 | # Copyright 2018-2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"). You 4 | # may not use this file except in compliance with the License. A copy of 5 | # the License is located at 6 | # 7 | # http://aws.amazon.com/apache2.0/ 8 | # 9 | # or in the "license" file accompanying this file. This file is 10 | # distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF 11 | # ANY KIND, either express or implied. See the License for the specific 12 | # language governing permissions and limitations under the License. 13 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/documentation-request.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Documentation request 3 | about: Request improved documentation 4 | title: '' 5 | labels: '' 6 | assignees: '' 7 | 8 | --- 9 | 10 | **What did you find confusing? Please describe.** 11 | A clear and concise description of what you found confusing. Ex. I tried to [...] but I didn't understand how to [...] 12 | 13 | **Describe how documentation can be improved** 14 | A clear and concise description of where documentation was lacking and how it can be improved. 15 | 16 | **Additional context** 17 | Add any other context or screenshots about the documentation request here. 18 | -------------------------------------------------------------------------------- /docker/build_artifacts/sagemaker/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2019-2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"). You 4 | # may not use this file except in compliance with the License. A copy of 5 | # the License is located at 6 | # 7 | # http://aws.amazon.com/apache2.0/ 8 | # 9 | # or in the "license" file accompanying this file. This file is 10 | # distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF 11 | # ANY KIND, either express or implied. See the License for the specific 12 | # language governing permissions and limitations under the License. 13 | -------------------------------------------------------------------------------- /scripts/start.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # 3 | # Start a local docker container. 4 | 5 | set -euo pipefail 6 | 7 | source scripts/shared.sh 8 | 9 | parse_std_args "$@" 10 | 11 | if [ "$arch" == 'gpu' ]; then 12 | docker_command='nvidia-docker' 13 | else 14 | docker_command='docker' 15 | fi 16 | 17 | 18 | MODEL_DIR="$(cd "test/resources/models" > /dev/null && pwd)" 19 | $docker_command run \ 20 | -v "$MODEL_DIR":/opt/ml/model:ro \ 21 | -p 8080:8080 \ 22 | -e "SAGEMAKER_TFS_NGINX_LOGLEVEL=error" \ 23 | -e "SAGEMAKER_BIND_TO_PORT=8080" \ 24 | -e "SAGEMAKER_SAFE_PORT_RANGE=9000-9999" \ 25 | $repository:$full_version-$device serve > log.txt 2>&1 & 26 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature_request.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Feature request 3 | about: Suggest new functionality for this toolkit 4 | title: '' 5 | labels: '' 6 | assignees: '' 7 | 8 | --- 9 | 10 | **Describe the feature you'd like** 11 | A clear and concise description of the functionality you want. 12 | 13 | **How would this feature be used? Please describe.** 14 | A clear and concise description of the use case for this feature. Please provide an example, if possible. 15 | 16 | **Describe alternatives you've considered** 17 | A clear and concise description of any alternative solutions or features you've considered. 18 | 19 | **Additional context** 20 | Add any other context or screenshots about the feature request here. 21 | -------------------------------------------------------------------------------- /test/perf/create-endpoint.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | instance_type="${1:-c5.xlarge}" 4 | if [[ "$instance_type" == p* ]]; then 5 | arch='gpu' 6 | else 7 | arch='cpu' 8 | fi 9 | 10 | endpoint_name=$(echo "sagemaker-tensorflow-serving-$instance_type" | tr . -) 11 | 12 | aws sagemaker create-endpoint-config \ 13 | --endpoint-config-name $endpoint_name \ 14 | --production-variants '[{ 15 | "VariantName": "variant-name-1", 16 | "ModelName": "sagemaker-tensorflow-serving-model-'$arch'", 17 | "InitialInstanceCount": 1, 18 | "InstanceType": "ml.'$instance_type'" 19 | }]' 20 | 21 | aws sagemaker create-endpoint --endpoint-name $endpoint_name --endpoint-config-name $endpoint_name 22 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/bug_report.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Bug report 3 | about: File a report to help us reproduce and fix the problem 4 | title: '' 5 | labels: '' 6 | assignees: '' 7 | 8 | --- 9 | 10 | **Describe the bug** 11 | A clear and concise description of what the bug is. 12 | 13 | **To reproduce** 14 | A clear, step-by-step set of instructions to reproduce the bug. 15 | 16 | **Expected behavior** 17 | A clear and concise description of what you expected to happen. 18 | 19 | **Screenshots or logs** 20 | If applicable, add screenshots or logs to help explain your problem. 21 | 22 | **System information** 23 | A description of your system. Please provide: 24 | - **Toolkit version**: 25 | - **Framework version**: 26 | - **Python version**: 27 | - **CPU or GPU**: 28 | - **Custom Docker image (Y/N)**: 29 | 30 | **Additional context** 31 | Add any other context about the problem here. 32 | -------------------------------------------------------------------------------- /scripts/publish.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # 3 | # Publish images to your ECR account. 4 | 5 | set -euo pipefail 6 | 7 | source scripts/shared.sh 8 | 9 | parse_std_args "$@" 10 | 11 | aws ecr get-login-password --region ${aws_region} \ 12 | | docker login \ 13 | --password-stdin \ 14 | --username AWS \ 15 | "${aws_account}.dkr.ecr.${aws_region}.amazonaws.com/${repository}" 16 | docker tag $repository:$full_version-$device $aws_account.dkr.ecr.$aws_region.amazonaws.com/$repository:$full_version-$device 17 | docker tag $repository:$full_version-$device $aws_account.dkr.ecr.$aws_region.amazonaws.com/$repository:$short_version-$device 18 | docker push $aws_account.dkr.ecr.$aws_region.amazonaws.com/$repository:$full_version-$device 19 | docker push $aws_account.dkr.ecr.$aws_region.amazonaws.com/$repository:$short_version-$device 20 | docker logout https://$aws_account.dkr.ecr.$aws_region.amazonaws.com 21 | -------------------------------------------------------------------------------- /docker/build_artifacts/dockerd-entrypoint.py: -------------------------------------------------------------------------------- 1 | # Copyright 2019-2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"). You 4 | # may not use this file except in compliance with the License. A copy of 5 | # the License is located at 6 | # 7 | # http://aws.amazon.com/apache2.0/ 8 | # 9 | # or in the "license" file accompanying this file. This file is 10 | # distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF 11 | # ANY KIND, either express or implied. See the License for the specific 12 | # language governing permissions and limitations under the License. 13 | 14 | import os.path 15 | import subprocess 16 | import shlex 17 | import sys 18 | 19 | if not os.path.exists("/opt/ml/input/config"): 20 | subprocess.call(["python", "/usr/local/bin/deep_learning_container.py", "&>/dev/null", "&"]) 21 | 22 | subprocess.check_call(shlex.split(" ".join(sys.argv[1:]))) 23 | -------------------------------------------------------------------------------- /docker/1.11/Dockerfile.eia: -------------------------------------------------------------------------------- 1 | FROM ubuntu:16.04 2 | LABEL com.amazonaws.sagemaker.capabilities.accept-bind-to-port=true 3 | 4 | ARG TFS_SHORT_VERSION 5 | 6 | # nginx + njs 7 | RUN \ 8 | apt-get update && \ 9 | apt-get -y install --no-install-recommends curl && \ 10 | curl -s http://nginx.org/keys/nginx_signing.key | apt-key add - && \ 11 | echo 'deb http://nginx.org/packages/ubuntu/ xenial nginx' >> /etc/apt/sources.list && \ 12 | apt-get update && \ 13 | apt-get -y install --no-install-recommends nginx nginx-module-njs python3 python3-pip python3-setuptools && \ 14 | apt-get clean 15 | 16 | # cython, falcon, gunicorn, tensorflow-serving 17 | RUN \ 18 | pip3 install --no-cache-dir cython falcon gunicorn gevent requests grpcio protobuf && \ 19 | pip3 install --no-dependencies --no-cache-dir tensorflow-serving-api==1.11.1 20 | 21 | COPY ./ / 22 | 23 | RUN mv amazonei_tensorflow_model_server /usr/bin/tensorflow_model_server && \ 24 | chmod +x /usr/bin/tensorflow_model_server 25 | 26 | ENV SAGEMAKER_TFS_VERSION "${TFS_SHORT_VERSION}" 27 | ENV PATH "$PATH:/sagemaker" 28 | -------------------------------------------------------------------------------- /docker/1.12/Dockerfile.eia: -------------------------------------------------------------------------------- 1 | FROM ubuntu:16.04 2 | LABEL com.amazonaws.sagemaker.capabilities.accept-bind-to-port=true 3 | 4 | ARG TFS_SHORT_VERSION 5 | 6 | # nginx + njs 7 | RUN \ 8 | apt-get update && \ 9 | apt-get -y install --no-install-recommends curl && \ 10 | curl -s http://nginx.org/keys/nginx_signing.key | apt-key add - && \ 11 | echo 'deb http://nginx.org/packages/ubuntu/ xenial nginx' >> /etc/apt/sources.list && \ 12 | apt-get update && \ 13 | apt-get -y install --no-install-recommends nginx nginx-module-njs python3 python3-pip python3-setuptools && \ 14 | apt-get clean 15 | 16 | # cython, falcon, gunicorn, tensorflow-serving 17 | RUN \ 18 | pip3 install --no-cache-dir cython falcon gunicorn gevent requests grpcio protobuf && \ 19 | pip3 install --no-dependencies --no-cache-dir tensorflow-serving-api==1.12.0 20 | 21 | COPY ./ / 22 | 23 | RUN mv amazonei_tensorflow_model_server /usr/bin/tensorflow_model_server && \ 24 | chmod +x /usr/bin/tensorflow_model_server 25 | 26 | ENV SAGEMAKER_TFS_VERSION "${TFS_SHORT_VERSION}" 27 | ENV PATH "$PATH:/sagemaker" 28 | -------------------------------------------------------------------------------- /scripts/curl.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # 3 | # Some example curl requests to try on local docker containers. 4 | 5 | curl -X POST --data-binary @test/resources/inputs/test.json -H 'Content-Type: application/json' -H 'X-Amzn-SageMaker-Custom-Attributes: tfs-model-name=half_plus_three' http://localhost:8080/invocations 6 | curl -X POST --data-binary @test/resources/inputs/test-gcloud.jsons -H 'Content-Type: application/json' -H 'X-Amzn-SageMaker-Custom-Attributes: tfs-model-name=half_plus_three' http://localhost:8080/invocations 7 | curl -X POST --data-binary @test/resources/inputs/test-generic.json -H 'Content-Type: application/json' -H 'X-Amzn-SageMaker-Custom-Attributes: tfs-model-name=half_plus_three' http://localhost:8080/invocations 8 | curl -X POST --data-binary @test/resources/inputs/test.csv -H 'Content-Type: text/csv' -H 'X-Amzn-SageMaker-Custom-Attributes: tfs-model-name=half_plus_three' http://localhost:8080/invocations 9 | curl -X POST --data-binary @test/resources/inputs/test-cifar.json -H 'Content-Type: application/json' -H 'X-Amzn-SageMaker-Custom-Attributes: tfs-model-name=cifar' http://localhost:8080/invocations -------------------------------------------------------------------------------- /docker/1.11/Dockerfile.cpu: -------------------------------------------------------------------------------- 1 | ARG TFS_VERSION 2 | 3 | FROM tensorflow/serving:${TFS_VERSION} as tfs 4 | FROM ubuntu:16.04 5 | LABEL com.amazonaws.sagemaker.capabilities.accept-bind-to-port=true 6 | 7 | COPY --from=tfs /usr/bin/tensorflow_model_server /usr/bin/tensorflow_model_server 8 | 9 | # nginx + njs 10 | RUN \ 11 | apt-get update && \ 12 | apt-get -y install --no-install-recommends curl && \ 13 | curl -s http://nginx.org/keys/nginx_signing.key | apt-key add - && \ 14 | echo 'deb http://nginx.org/packages/ubuntu/ xenial nginx' >> /etc/apt/sources.list && \ 15 | apt-get update && \ 16 | apt-get -y install --no-install-recommends nginx nginx-module-njs python3 python3-pip python3-setuptools && \ 17 | apt-get clean 18 | 19 | # cython, falcon, gunicorn, tensorflow-serving 20 | RUN \ 21 | pip3 install --no-cache-dir cython falcon gunicorn gevent requests grpcio protobuf && \ 22 | pip3 install --no-dependencies --no-cache-dir tensorflow-serving-api==1.11.1 23 | 24 | COPY ./ / 25 | 26 | ARG TFS_SHORT_VERSION 27 | ENV SAGEMAKER_TFS_VERSION "${TFS_SHORT_VERSION}" 28 | ENV PATH "$PATH:/sagemaker" 29 | -------------------------------------------------------------------------------- /docker/1.12/Dockerfile.cpu: -------------------------------------------------------------------------------- 1 | ARG TFS_VERSION 2 | 3 | FROM tensorflow/serving:${TFS_VERSION} as tfs 4 | FROM ubuntu:16.04 5 | LABEL com.amazonaws.sagemaker.capabilities.accept-bind-to-port=true 6 | 7 | COPY --from=tfs /usr/bin/tensorflow_model_server /usr/bin/tensorflow_model_server 8 | 9 | # nginx + njs 10 | RUN \ 11 | apt-get update && \ 12 | apt-get -y install --no-install-recommends curl && \ 13 | curl -s http://nginx.org/keys/nginx_signing.key | apt-key add - && \ 14 | echo 'deb http://nginx.org/packages/ubuntu/ xenial nginx' >> /etc/apt/sources.list && \ 15 | apt-get update && \ 16 | apt-get -y install --no-install-recommends nginx nginx-module-njs python3 python3-pip python3-setuptools && \ 17 | apt-get clean 18 | 19 | # cython, falcon, gunicorn, tensorflow-serving 20 | RUN \ 21 | pip3 install --no-cache-dir cython falcon gunicorn gevent requests grpcio protobuf && \ 22 | pip3 install --no-dependencies --no-cache-dir tensorflow-serving-api==1.12.0 23 | 24 | COPY ./ / 25 | 26 | ARG TFS_SHORT_VERSION 27 | ENV SAGEMAKER_TFS_VERSION "${TFS_SHORT_VERSION}" 28 | ENV PATH "$PATH:/sagemaker" 29 | -------------------------------------------------------------------------------- /scripts/build.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # 3 | # Build the docker images. 4 | 5 | set -euo pipefail 6 | 7 | source scripts/shared.sh 8 | 9 | parse_std_args "$@" 10 | 11 | get_ei_executable 12 | 13 | echo "pulling previous image for layer cache... " 14 | aws ecr get-login-password --region ${aws_region} \ 15 | | docker login \ 16 | --password-stdin \ 17 | --username AWS \ 18 | "${aws_account}.dkr.ecr.${aws_region}.amazonaws.com/${repository}" &>/dev/null || echo 'warning: ecr login failed' 19 | docker pull $aws_account.dkr.ecr.$aws_region.amazonaws.com/$repository:$full_version-$device &>/dev/null || echo 'warning: pull failed' 20 | docker logout https://$aws_account.dkr.ecr.$aws_region.amazonaws.com &>/dev/null 21 | 22 | echo "building image... " 23 | cp -r docker/build_artifacts/* docker/$short_version/ 24 | docker build \ 25 | --cache-from $aws_account.dkr.ecr.$aws_region.amazonaws.com/$repository:$full_version-$device \ 26 | --build-arg TFS_VERSION=$full_version \ 27 | --build-arg TFS_SHORT_VERSION=$short_version \ 28 | -f docker/$short_version/Dockerfile.$arch \ 29 | -t $repository:$full_version-$device \ 30 | -t $repository:$short_version-$device \ 31 | docker/$short_version/ 32 | 33 | remove_ei_executable 34 | -------------------------------------------------------------------------------- /docker/1.13/Dockerfile.eia: -------------------------------------------------------------------------------- 1 | FROM ubuntu:16.04 2 | LABEL com.amazonaws.sagemaker.capabilities.accept-bind-to-port=true 3 | 4 | ARG PIP=pip3 5 | ARG TFS_SHORT_VERSION 6 | 7 | ENV SAGEMAKER_TFS_VERSION="${TFS_SHORT_VERSION}" 8 | ENV PATH="$PATH:/sagemaker" 9 | 10 | # nginx + njs 11 | RUN apt-get update \ 12 | && apt-get -y install --no-install-recommends curl gnupg2 \ 13 | && curl -s http://nginx.org/keys/nginx_signing.key | apt-key add - \ 14 | && echo 'deb http://nginx.org/packages/ubuntu/ xenial nginx' >> /etc/apt/sources.list \ 15 | && apt-get update \ 16 | && apt-get -y install --no-install-recommends nginx nginx-module-njs python3 python3-pip python3-setuptools \ 17 | && apt-get clean \ 18 | && rm -rf /var/lib/apt/lists/* 19 | 20 | # cython, falcon, gunicorn, grpc 21 | RUN ${PIP} install --no-cache-dir \ 22 | awscli==1.16.130 \ 23 | cython==0.29.10 \ 24 | falcon==2.0.0 \ 25 | gunicorn==19.9.0 \ 26 | gevent==1.4.0 \ 27 | requests==2.21.0 \ 28 | grpcio==1.24.1 \ 29 | protobuf==3.10.0 \ 30 | # using --no-dependencies to avoid installing tensorflow binary 31 | && ${PIP} install --no-dependencies --no-cache-dir \ 32 | tensorflow-serving-api==1.13.0 33 | 34 | COPY ./ / 35 | 36 | RUN mv amazonei_tensorflow_model_server /usr/bin/tensorflow_model_server && \ 37 | chmod +x /usr/bin/tensorflow_model_server 38 | -------------------------------------------------------------------------------- /test/perf/create-model.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -e 4 | 5 | arch=${1:-'cpu'} 6 | aws_region=$(aws configure get region) 7 | aws_account=$(aws --region $aws_region sts --endpoint-url https://sts.$aws_region.amazonaws.com get-caller-identity --query 'Account' --output text) 8 | 9 | # change this to match SageMaker execution role in your account 10 | sagemaker_role="arn:aws:iam::$aws_account:role/service-role/AmazonSageMaker-ExecutionRole-20180510T114550" 11 | 12 | tar -C test/resources/models -czf /tmp/sagemaker-tensorflow-serving-model.tar.gz . 13 | aws s3 mb s3://sagemaker-$aws_region-$aws_account || true 14 | aws s3 cp /tmp/sagemaker-tensorflow-serving-model.tar.gz s3://sagemaker-$aws_region-$aws_account/sagemaker-tensorflow-serving/test-models/sagemaker-tensorflow-serving-model.tar.gz 15 | rm /tmp/sagemaker-tensorflow-serving-model.tar.gz 16 | 17 | 18 | aws sagemaker create-model \ 19 | --model-name sagemaker-tensorflow-serving-model-$arch \ 20 | --primary-container '{ 21 | "Image": "'$aws_account'.dkr.ecr.'$aws_region'.amazonaws.com/sagemaker-tensorflow-serving:1.11.1-'$arch'", 22 | "ModelDataUrl": "s3://sagemaker-'$aws_region'-'$aws_account'/sagemaker-tensorflow-serving/test-models/sagemaker-tensorflow-serving-model.tar.gz", 23 | "Environment": { 24 | "SAGEMAKER_TFS_DEFAULT_MODEL_NAME": "half_plus_three" 25 | } 26 | }' \ 27 | --execution-role-arn "$sagemaker_role" 28 | -------------------------------------------------------------------------------- /test/perf/ab.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | ab -k -n 10000 -c 16 -p test/resources/inputs/test.json -T 'application/json' http://localhost:8080/tfs/v1/models/half_plus_three:predict 4 | ab -k -n 10000 -c 16 -p test/resources/inputs/test.json -T 'application/json' http://localhost:8080/invocations 5 | ab -k -n 10000 -c 16 -p test/resources/inputs/test.jsons -T 'application/json' http://localhost:8080/invocations 6 | ab -k -n 10000 -c 16 -p test/resources/inputs/test.csv -T 'text/csv' http://localhost:8080/invocations 7 | ab -k -n 10000 -c 16 -p test/resources/inputs/test-cifar.json -T 'application/json' -H 'X-Amzn-SageMaker-Custom-Attributes: tfs-model-name=cifar' http://localhost:8080/invocations 8 | 9 | # Larger payloads are generated and removed when this script exits. 10 | TEMPFILE='/tmp/perftest_data' 11 | trap 'rm -f $TEMPFILE' EXIT 12 | 13 | echo "Generating data" 14 | # Creates a 10MB file with 10000 columns per line. 15 | python test/perf/data_generator.py -c 'text/csv' -s 10000 -p 10 -u MB > $TEMPFILE || exit $? 16 | ab -k -n 10 -c 1 -p "$TEMPFILE" -T 'text/csv' http://localhost:8080/invocations 17 | 18 | python test/perf/data_generator.py -c 'application/json' -s 10000 -p 10 -u MB > $TEMPFILE || exit $? 19 | ab -k -n 10 -c 1 -p "$TEMPFILE" -T 'application/json' http://localhost:8080/invocations 20 | 21 | python test/perf/data_generator.py -c 'application/jsonlines' -s 10000 -p 10 -u MB > $TEMPFILE || exit $? 22 | ab -k -n 10 -c 1 -p "$TEMPFILE" -T 'application/jsonlines' http://localhost:8080/invocations 23 | -------------------------------------------------------------------------------- /docker/build_artifacts/sagemaker/nginx.conf.template: -------------------------------------------------------------------------------- 1 | load_module modules/ngx_http_js_module.so; 2 | 3 | worker_processes auto; 4 | daemon off; 5 | pid /tmp/nginx.pid; 6 | error_log /dev/stderr %NGINX_LOG_LEVEL%; 7 | 8 | worker_rlimit_nofile 4096; 9 | 10 | events { 11 | worker_connections 2048; 12 | } 13 | 14 | http { 15 | include /etc/nginx/mime.types; 16 | default_type application/json; 17 | access_log /dev/stdout combined; 18 | js_import tensorflowServing.js; 19 | 20 | proxy_read_timeout %PROXY_READ_TIMEOUT%; 21 | 22 | upstream tfs_upstream { 23 | %TFS_UPSTREAM%; 24 | } 25 | 26 | upstream gunicorn_upstream { 27 | server unix:/tmp/gunicorn.sock fail_timeout=1; 28 | } 29 | 30 | server { 31 | listen %NGINX_HTTP_PORT% deferred; 32 | client_max_body_size 0; 33 | client_body_buffer_size 100m; 34 | subrequest_output_buffer_size 100m; 35 | 36 | set $tfs_version %TFS_VERSION%; 37 | set $default_tfs_model %TFS_DEFAULT_MODEL_NAME%; 38 | 39 | location /tfs { 40 | rewrite ^/tfs/(.*) /$1 break; 41 | proxy_redirect off; 42 | proxy_pass_request_headers off; 43 | proxy_set_header Content-Type 'application/json'; 44 | proxy_set_header Accept 'application/json'; 45 | proxy_pass http://tfs_upstream; 46 | } 47 | 48 | location /ping { 49 | %FORWARD_PING_REQUESTS%; 50 | } 51 | 52 | location /invocations { 53 | %FORWARD_INVOCATION_REQUESTS%; 54 | } 55 | 56 | location /models { 57 | proxy_pass http://gunicorn_upstream/models; 58 | } 59 | 60 | location / { 61 | return 404 '{"error": "Not Found"}'; 62 | } 63 | 64 | keepalive_timeout 3; 65 | } 66 | } 67 | -------------------------------------------------------------------------------- /docker/build_artifacts/sagemaker/multi_model_utils.py: -------------------------------------------------------------------------------- 1 | # Copyright 2019-2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"). You 4 | # may not use this file except in compliance with the License. A copy of 5 | # the License is located at 6 | # 7 | # http://aws.amazon.com/apache2.0/ 8 | # 9 | # or in the "license" file accompanying this file. This file is 10 | # distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF 11 | # ANY KIND, either express or implied. See the License for the specific 12 | # language governing permissions and limitations under the License. 13 | import fcntl 14 | import signal 15 | import time 16 | from contextlib import contextmanager 17 | 18 | MODEL_CONFIG_FILE = "/sagemaker/model-config.cfg" 19 | DEFAULT_LOCK_FILE = "/sagemaker/lock-file.lock" 20 | 21 | 22 | @contextmanager 23 | def lock(path=DEFAULT_LOCK_FILE): 24 | f = open(path, "w", encoding="utf8") 25 | fd = f.fileno() 26 | fcntl.lockf(fd, fcntl.LOCK_EX) 27 | 28 | try: 29 | yield 30 | finally: 31 | time.sleep(1) 32 | fcntl.lockf(fd, fcntl.LOCK_UN) 33 | 34 | 35 | @contextmanager 36 | def timeout(seconds=60): 37 | def _raise_timeout_error(signum, frame): 38 | raise Exception(408, "Timed out after {} seconds".format(seconds)) 39 | 40 | try: 41 | signal.signal(signal.SIGALRM, _raise_timeout_error) 42 | signal.alarm(seconds) 43 | yield 44 | finally: 45 | signal.alarm(0) 46 | 47 | 48 | class MultiModelException(Exception): 49 | def __init__(self, code, msg): 50 | Exception.__init__(self, code, msg) 51 | self.code = code 52 | self.msg = msg 53 | -------------------------------------------------------------------------------- /test/integration/local/conftest.py: -------------------------------------------------------------------------------- 1 | # Copyright 2019-2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"). You 4 | # may not use this file except in compliance with the License. A copy of 5 | # the License is located at 6 | # 7 | # http://aws.amazon.com/apache2.0/ 8 | # 9 | # or in the "license" file accompanying this file. This file is 10 | # distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF 11 | # ANY KIND, either express or implied. See the License for the specific 12 | # language governing permissions and limitations under the License. 13 | 14 | import pytest 15 | 16 | FRAMEWORK_LATEST_VERSION = "1.13" 17 | TFS_DOCKER_BASE_NAME = "sagemaker-tensorflow-serving" 18 | 19 | 20 | def pytest_addoption(parser): 21 | parser.addoption("--docker-base-name", default=TFS_DOCKER_BASE_NAME) 22 | parser.addoption("--framework-version", default=FRAMEWORK_LATEST_VERSION, required=True) 23 | parser.addoption("--processor", default="cpu", choices=["cpu", "gpu"]) 24 | parser.addoption("--tag") 25 | 26 | 27 | @pytest.fixture(scope="module") 28 | def docker_base_name(request): 29 | return request.config.getoption("--docker-base-name") 30 | 31 | 32 | @pytest.fixture(scope="module") 33 | def framework_version(request): 34 | return request.config.getoption("--framework-version") 35 | 36 | 37 | @pytest.fixture(scope="module") 38 | def processor(request): 39 | return request.config.getoption("--processor") 40 | 41 | 42 | @pytest.fixture(scope="module") 43 | def runtime_config(request, processor): 44 | if processor == "gpu": 45 | return "--runtime=nvidia " 46 | else: 47 | return "" 48 | 49 | 50 | @pytest.fixture(scope="module") 51 | def tag(request, framework_version, processor): 52 | image_tag = request.config.getoption("--tag") 53 | if not image_tag: 54 | image_tag = "{}-{}".format(framework_version, processor) 55 | return image_tag 56 | 57 | 58 | @pytest.fixture(autouse=True) 59 | def skip_by_device_type(request, processor): 60 | is_gpu = processor == "gpu" 61 | if (request.node.get_closest_marker("skip_gpu") and is_gpu) or \ 62 | (request.node.get_closest_marker("skip_cpu") and not is_gpu): 63 | pytest.skip("Skipping because running on \"{}\" instance".format(processor)) 64 | -------------------------------------------------------------------------------- /test/resources/examples/test2/inference.py: -------------------------------------------------------------------------------- 1 | # Copyright 2018-2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"). You 4 | # may not use this file except in compliance with the License. A copy of 5 | # the License is located at 6 | # 7 | # http://aws.amazon.com/apache2.0/ 8 | # 9 | # or in the "license" file accompanying this file. This file is 10 | # distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF 11 | # ANY KIND, either express or implied. See the License for the specific 12 | # language governing permissions and limitations under the License. 13 | 14 | import json 15 | from collections import namedtuple 16 | 17 | import requests 18 | 19 | Context = namedtuple('Context', 20 | 'model_name, model_version, method, rest_uri, grpc_uri, ' 21 | 'custom_attributes, request_content_type, accept_header') 22 | 23 | 24 | def handler(data, context): 25 | """Handle request. 26 | 27 | Args: 28 | data (obj): the request data 29 | context (Context): an object containing request and configuration details 30 | 31 | Returns: 32 | (bytes, string): data to return to client, (optional) response content type 33 | """ 34 | processed_input = _process_input(data, context) 35 | response = requests.post(context.rest_uri, data=processed_input) 36 | return _process_output(response, context) 37 | 38 | 39 | def _process_input(data, context): 40 | if context.request_content_type == 'application/json': 41 | # pass through json (assumes it's correctly formed) 42 | d = data.read().decode('utf-8') 43 | return d if len(d) else '' 44 | 45 | if context.request_content_type == 'text/csv': 46 | # very simple csv handler 47 | return json.dumps({ 48 | 'instances': [float(x) for x in data.read().decode('utf-8').split(',')] 49 | }) 50 | 51 | raise ValueError('{{"error": "unsupported content type {}"}}'.format( 52 | context.request_content_type or "unknown")) 53 | 54 | 55 | def _process_output(data, context): 56 | if data.status_code != 200: 57 | raise ValueError(data.content.decode('utf-8')) 58 | 59 | response_content_type = context.accept_header 60 | prediction = data.content 61 | return prediction, response_content_type 62 | -------------------------------------------------------------------------------- /test/integration/local/multi_model_endpoint_test_utils.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"). You 4 | # may not use this file except in compliance with the License. A copy of 5 | # the License is located at 6 | # 7 | # http://aws.amazon.com/apache2.0/ 8 | # 9 | # or in the "license" file accompanying this file. This file is 10 | # distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF 11 | # ANY KIND, either express or implied. See the License for the specific 12 | # language governing permissions and limitations under the License. 13 | 14 | import requests 15 | 16 | INVOCATION_URL = "http://localhost:8080/models/{}/invoke" 17 | MODELS_URL = "http://localhost:8080/models" 18 | DELETE_MODEL_URL = "http://localhost:8080/models/{}" 19 | 20 | 21 | def make_headers(content_type="application/json", method="predict", version=None): 22 | custom_attributes = "tfs-method={}".format(method) 23 | if version: 24 | custom_attributes += ",tfs-model-version={}".format(version) 25 | 26 | return { 27 | "Content-Type": content_type, 28 | "X-Amzn-SageMaker-Custom-Attributes": custom_attributes, 29 | } 30 | 31 | 32 | def make_invocation_request(data, model_name, content_type="application/json", version=None): 33 | headers = make_headers(content_type=content_type, method="predict", version=version) 34 | response = requests.post(INVOCATION_URL.format(model_name), data=data, headers=headers) 35 | return response.status_code, response.content.decode("utf-8") 36 | 37 | 38 | def make_list_model_request(): 39 | response = requests.get(MODELS_URL) 40 | return response.status_code, response.content.decode("utf-8") 41 | 42 | 43 | def make_get_model_request(model_name): 44 | response = requests.get(MODELS_URL + "/{}".format(model_name)) 45 | return response.status_code, response.content.decode("utf-8") 46 | 47 | 48 | def make_load_model_request(data, content_type="application/json"): 49 | headers = { 50 | "Content-Type": content_type 51 | } 52 | response = requests.post(MODELS_URL, data=data, headers=headers) 53 | return response.status_code, response.content.decode("utf-8") 54 | 55 | 56 | def make_unload_model_request(model_name): 57 | response = requests.delete(DELETE_MODEL_URL.format(model_name)) 58 | return response.status_code, response.content.decode("utf-8") 59 | -------------------------------------------------------------------------------- /test/resources/examples/test1/inference.py: -------------------------------------------------------------------------------- 1 | # Copyright 2018-2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"). You 4 | # may not use this file except in compliance with the License. A copy of 5 | # the License is located at 6 | # 7 | # http://aws.amazon.com/apache2.0/ 8 | # 9 | # or in the "license" file accompanying this file. This file is 10 | # distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF 11 | # ANY KIND, either express or implied. See the License for the specific 12 | # language governing permissions and limitations under the License. 13 | 14 | import json 15 | from collections import namedtuple 16 | 17 | Context = namedtuple('Context', 18 | 'model_name, model_version, method, rest_uri, grpc_uri, ' 19 | 'custom_attributes, request_content_type, accept_header') 20 | 21 | 22 | def input_handler(data, context): 23 | """ Pre-process request input before it is sent to TensorFlow Serving REST API 24 | 25 | Args: 26 | data (obj): the request data, in format of dict or string 27 | context (Context): an object containing request and configuration details 28 | 29 | Returns: 30 | (dict): a JSON-serializable dict that contains request body and headers 31 | """ 32 | if context.request_content_type == 'application/json': 33 | # pass through json (assumes it's correctly formed) 34 | d = data.read().decode('utf-8') 35 | return d if len(d) else '' 36 | 37 | if context.request_content_type == 'text/csv': 38 | # very simple csv handler 39 | return json.dumps({ 40 | 'instances': [float(x) for x in data.read().decode('utf-8').split(',')] 41 | }) 42 | 43 | raise ValueError('{{"error": "unsupported content type {}"}}'.format( 44 | context.request_content_type or "unknown")) 45 | 46 | 47 | def output_handler(data, context): 48 | """Post-process TensorFlow Serving output before it is returned to the client. 49 | 50 | Args: 51 | data (obj): the TensorFlow serving response 52 | context (Context): an object containing request and configuration details 53 | 54 | Returns: 55 | (bytes, string): data to return to client, response content type 56 | """ 57 | if data.status_code != 200: 58 | raise ValueError(data.content.decode('utf-8')) 59 | 60 | response_content_type = context.accept_header 61 | prediction = data.content 62 | return prediction, response_content_type 63 | -------------------------------------------------------------------------------- /test/resources/mme_universal_script/code/inference.py: -------------------------------------------------------------------------------- 1 | # Copyright 2018-2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"). You 4 | # may not use this file except in compliance with the License. A copy of 5 | # the License is located at 6 | # 7 | # http://aws.amazon.com/apache2.0/ 8 | # 9 | # or in the "license" file accompanying this file. This file is 10 | # distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF 11 | # ANY KIND, either express or implied. See the License for the specific 12 | # language governing permissions and limitations under the License. 13 | 14 | import json 15 | from collections import namedtuple 16 | 17 | import PIL 18 | 19 | Context = namedtuple('Context', 20 | 'model_name, model_version, method, rest_uri, grpc_uri, ' 21 | 'custom_attributes, request_content_type, accept_header') 22 | 23 | 24 | def input_handler(data, context): 25 | """ Pre-process request input before it is sent to TensorFlow Serving REST API 26 | 27 | Args: 28 | data (obj): the request data, in format of dict or string 29 | context (Context): an object containing request and configuration details 30 | 31 | Returns: 32 | (dict): a JSON-serializable dict that contains request body and headers 33 | """ 34 | if context.request_content_type == 'application/json': 35 | # pass through json (assumes it's correctly formed) 36 | d = data.read().decode('utf-8') 37 | return d if len(d) else '' 38 | 39 | if context.request_content_type == 'text/csv': 40 | # very simple csv handler 41 | return json.dumps({ 42 | 'instances': [float(x) for x in data.read().decode('utf-8').split(',')] 43 | }) 44 | 45 | raise ValueError('{{"error": "unsupported content type {}"}}'.format( 46 | context.request_content_type or "unknown")) 47 | 48 | 49 | def output_handler(data, context): 50 | """Post-process TensorFlow Serving output before it is returned to the client. 51 | 52 | Args: 53 | data (obj): the TensorFlow serving response 54 | context (Context): an object containing request and configuration details 55 | 56 | Returns: 57 | (bytes, string): data to return to client, response content type 58 | """ 59 | if data.status_code != 200: 60 | raise ValueError(data.content.decode('utf-8')) 61 | 62 | response_content_type = context.accept_header 63 | prediction = data.content 64 | return prediction, response_content_type 65 | -------------------------------------------------------------------------------- /test/resources/examples/test3/inference.py: -------------------------------------------------------------------------------- 1 | # Copyright 2018-2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"). You 4 | # may not use this file except in compliance with the License. A copy of 5 | # the License is located at 6 | # 7 | # http://aws.amazon.com/apache2.0/ 8 | # 9 | # or in the "license" file accompanying this file. This file is 10 | # distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF 11 | # ANY KIND, either express or implied. See the License for the specific 12 | # language governing permissions and limitations under the License. 13 | 14 | 15 | import json 16 | from collections import namedtuple 17 | 18 | import requests 19 | 20 | # for testing requirements.txt install and pythonpath 21 | import PIL 22 | from PIL.Image import core as _imaging 23 | 24 | Context = namedtuple('Context', 25 | 'model_name, model_version, method, rest_uri, grpc_uri, ' 26 | 'custom_attributes, request_content_type, accept_header') 27 | 28 | 29 | def handler(data, context): 30 | """Handle request. 31 | 32 | Args: 33 | data (obj): the request data 34 | context (Context): an object containing request and configuration details 35 | 36 | Returns: 37 | (bytes, string): data to return to client, (optional) response content type 38 | """ 39 | 40 | # use the imported library 41 | print('pillow: {}\n{}'.format(PIL.__version__, dir(_imaging))) 42 | processed_input = _process_input(data, context) 43 | response = requests.post(context.rest_uri, data=processed_input) 44 | return _process_output(response, context) 45 | 46 | 47 | def _process_input(data, context): 48 | if context.request_content_type == 'application/json': 49 | # pass through json (assumes it's correctly formed) 50 | d = data.read().decode('utf-8') 51 | return d if len(d) else '' 52 | 53 | if context.request_content_type == 'text/csv': 54 | # very simple csv handler 55 | return json.dumps({ 56 | 'instances': [float(x) for x in data.read().decode('utf-8').split(',')] 57 | }) 58 | 59 | raise ValueError('{{"error": "unsupported content type {}"}}'.format( 60 | context.request_content_type or "unknown")) 61 | 62 | 63 | def _process_output(data, context): 64 | if data.status_code != 200: 65 | raise ValueError(data.content.decode('utf-8')) 66 | 67 | response_content_type = context.accept_header 68 | prediction = data.content 69 | return prediction, response_content_type 70 | -------------------------------------------------------------------------------- /test/resources/examples/test4/inference.py: -------------------------------------------------------------------------------- 1 | # Copyright 2018-2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"). You 4 | # may not use this file except in compliance with the License. A copy of 5 | # the License is located at 6 | # 7 | # http://aws.amazon.com/apache2.0/ 8 | # 9 | # or in the "license" file accompanying this file. This file is 10 | # distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF 11 | # ANY KIND, either express or implied. See the License for the specific 12 | # language governing permissions and limitations under the License. 13 | 14 | 15 | import json 16 | from collections import namedtuple 17 | 18 | import requests 19 | 20 | import dummy_module # for testing requirements.txt install and pythonpath 21 | 22 | Context = namedtuple('Context', 23 | 'model_name, model_version, method, rest_uri, grpc_uri, ' 24 | 'custom_attributes, request_content_type, accept_header') 25 | 26 | 27 | def handler(data, context): 28 | """Handle request. 29 | 30 | Args: 31 | data (obj): the request data 32 | context (Context): an object containing request and configuration details 33 | 34 | Returns: 35 | (bytes, string): data to return to client, (optional) response content type 36 | """ 37 | 38 | # use the library in lib/ 39 | print(dummy_module.__version__) 40 | 41 | # ensure the requirements.txt wasn't installed 42 | try: 43 | import PIL 44 | raise Exception('pillow should not be installed') 45 | except ImportError: 46 | pass 47 | 48 | processed_input = _process_input(data, context) 49 | response = requests.post(context.rest_uri, data=processed_input) 50 | return _process_output(response, context) 51 | 52 | 53 | def _process_input(data, context): 54 | if context.request_content_type == 'application/json': 55 | # pass through json (assumes it's correctly formed) 56 | d = data.read().decode('utf-8') 57 | return d if len(d) else '' 58 | 59 | if context.request_content_type == 'text/csv': 60 | # very simple csv handler 61 | return json.dumps({ 62 | 'instances': [float(x) for x in data.read().decode('utf-8').split(',')] 63 | }) 64 | 65 | raise ValueError('{{"error": "unsupported content type {}"}}'.format( 66 | context.request_content_type or "unknown")) 67 | 68 | 69 | def _process_output(data, context): 70 | if data.status_code != 200: 71 | raise ValueError(data.content.decode('utf-8')) 72 | 73 | response_content_type = context.accept_header 74 | prediction = data.content 75 | return prediction, response_content_type 76 | -------------------------------------------------------------------------------- /test/resources/examples/test5/inference.py: -------------------------------------------------------------------------------- 1 | # Copyright 2018-2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"). You 4 | # may not use this file except in compliance with the License. A copy of 5 | # the License is located at 6 | # 7 | # http://aws.amazon.com/apache2.0/ 8 | # 9 | # or in the "license" file accompanying this file. This file is 10 | # distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF 11 | # ANY KIND, either express or implied. See the License for the specific 12 | # language governing permissions and limitations under the License. 13 | 14 | 15 | import json 16 | from collections import namedtuple 17 | 18 | import requests 19 | 20 | import dummy_module # for testing requirements.txt install and pythonpath 21 | 22 | Context = namedtuple('Context', 23 | 'model_name, model_version, method, rest_uri, grpc_uri, ' 24 | 'custom_attributes, request_content_type, accept_header') 25 | 26 | 27 | def handler(data, context): 28 | """Handle request. 29 | 30 | Args: 31 | data (obj): the request data 32 | context (Context): an object containing request and configuration details 33 | 34 | Returns: 35 | (bytes, string): data to return to client, (optional) response content type 36 | """ 37 | 38 | # use the library in lib/ 39 | print(dummy_module.__version__) 40 | 41 | # ensure the requirements.txt wasn't installed 42 | try: 43 | import PIL 44 | raise Exception('pillow should not be installed') 45 | except ImportError: 46 | pass 47 | 48 | processed_input = _process_input(data, context) 49 | response = requests.post(context.rest_uri, data=processed_input) 50 | return _process_output(response, context) 51 | 52 | 53 | def _process_input(data, context): 54 | if context.request_content_type == 'application/json': 55 | # pass through json (assumes it's correctly formed) 56 | d = data.read().decode('utf-8') 57 | return d if len(d) else '' 58 | 59 | if context.request_content_type == 'text/csv': 60 | # very simple csv handler 61 | return json.dumps({ 62 | 'instances': [float(x) for x in data.read().decode('utf-8').split(',')] 63 | }) 64 | 65 | raise ValueError('{{"error": "unsupported content type {}"}}'.format( 66 | context.request_content_type or "unknown")) 67 | 68 | 69 | def _process_output(data, context): 70 | if data.status_code != 200: 71 | raise ValueError(data.content.decode('utf-8')) 72 | 73 | response_content_type = context.accept_header 74 | prediction = data.content 75 | return prediction, response_content_type 76 | -------------------------------------------------------------------------------- /tox.ini: -------------------------------------------------------------------------------- 1 | # Tox (http://tox.testrun.org/) is a tool for running tests 2 | # in multiple virtualenvs. This configuration file will run the 3 | # test suite on all supported python versions. To use it, "pip install tox" 4 | # and then run "tox" from this directory. 5 | 6 | [tox] 7 | skipsdist = True 8 | skip_missing_interpreters = False 9 | envlist = black-format,jshint,flake8,pylint,py36,py37 10 | 11 | [flake8] 12 | max-line-length = 100 13 | exclude = 14 | build/ 15 | .github/ 16 | .pytest_cache/ 17 | .git 18 | .tox 19 | tests/resources/ 20 | docker/build_artifacts/sagemaker/tensorflow/ 21 | docker/build_artifacts/sagemaker/tensorflow-2.1/ 22 | docker/build_artifacts/sagemaker/tensorflow-2.2/ 23 | 24 | max-complexity = 10 25 | ignore = 26 | E203, # whitespace before ':': Black disagrees with and explicitly violates this. 27 | FI10, 28 | FI12, 29 | FI13, 30 | FI14, 31 | FI15, 32 | FI16, 33 | FI17, 34 | FI18, # __future__ import "annotations" missing -> check only Python 3.7 compatible 35 | FI50, 36 | FI51, 37 | FI52, 38 | FI53, 39 | FI54, 40 | FI55, 41 | FI56, 42 | FI57, 43 | W503 # Ignore line break before binary operator, since Black violates this. 44 | 45 | require-code = True 46 | 47 | [testenv] 48 | # {posargs} can be passed in by additional arguments specified when invoking tox. 49 | # Can be used to specify which tests to run, e.g.: tox -- -s 50 | basepython = python3 51 | passenv = 52 | AWS_ACCESS_KEY_ID 53 | AWS_SECRET_ACCESS_KEY 54 | AWS_SESSION_TOKEN 55 | AWS_CONTAINER_CREDENTIALS_RELATIVE_URI 56 | AWS_DEFAULT_REGION 57 | commands = 58 | python -m pytest {posargs} 59 | 60 | deps = 61 | pytest 62 | pytest-xdist 63 | boto3 64 | requests 65 | 66 | [testenv:flake8] 67 | deps = 68 | flake8 69 | commands = flake8 docker/build_artifacts/ 70 | 71 | [testenv:pylint] 72 | deps = 73 | pylint 74 | commands = 75 | python -m pylint --rcfile=.pylintrc docker/build_artifacts/ 76 | 77 | [testenv:jshint] 78 | whitelist_externals = 79 | jshint 80 | commands = 81 | jshint docker/build_artifacts/ 82 | 83 | [testenv:black-format] 84 | # Used during development (before committing) to format .py files. 85 | setenv = 86 | LC_ALL=C.UTF-8 87 | LANG=C.UTF-8 88 | deps = black 89 | commands = 90 | black -l 100 ./ 91 | 92 | [testenv:black-check] 93 | # Used by automated build steps to check that all files are properly formatted. 94 | setenv = 95 | LC_ALL=C.UTF-8 96 | LANG=C.UTF-8 97 | deps = black 98 | commands = 99 | black -l 100 --check ./ 100 | 101 | [pytest] 102 | markers = 103 | skip_gpu: skip test if running on gpu instance 104 | -------------------------------------------------------------------------------- /docker/1.11/Dockerfile.gpu: -------------------------------------------------------------------------------- 1 | ARG TFS_VERSION 2 | 3 | FROM tensorflow/serving:${TFS_VERSION}-gpu as tfs 4 | FROM nvidia/cuda:9.0-base-ubuntu16.04 5 | LABEL com.amazonaws.sagemaker.capabilities.accept-bind-to-port=true 6 | 7 | COPY --from=tfs /usr/bin/tensorflow_model_server /usr/bin/tensorflow_model_server 8 | 9 | # https://github.com/tensorflow/serving/blob/1.12.0/tensorflow_serving/tools/docker/Dockerfile.gpu 10 | ENV NCCL_VERSION=2.2.13 11 | ENV CUDNN_VERSION=7.2.1.38 12 | ENV TF_TENSORRT_VERSION=4.1.2 13 | 14 | RUN \ 15 | apt-get update && apt-get install -y --no-install-recommends \ 16 | ca-certificates \ 17 | cuda-command-line-tools-9-0 \ 18 | cuda-command-line-tools-9-0 \ 19 | cuda-cublas-9-0 \ 20 | cuda-cufft-9-0 \ 21 | cuda-curand-9-0 \ 22 | cuda-cusolver-9-0 \ 23 | cuda-cusparse-9-0 \ 24 | libcudnn7=${CUDNN_VERSION}-1+cuda9.0 \ 25 | libnccl2=${NCCL_VERSION}-1+cuda9.0 \ 26 | libgomp1 && \ 27 | apt-get clean && \ 28 | rm -rf /var/lib/apt/lists/* 29 | 30 | # The 'apt-get install' of nvinfer-runtime-trt-repo-ubuntu1604-4.0.1-ga-cuda9.0 31 | # adds a new list which contains libnvinfer library, so it needs another 32 | # 'apt-get update' to retrieve that list before it can actually install the 33 | # library. 34 | # We don't install libnvinfer-dev since we don't need to build against TensorRT, 35 | # and libnvinfer4 doesn't contain libnvinfer.a static library. 36 | RUN apt-get update && \ 37 | apt-get install --no-install-recommends \ 38 | nvinfer-runtime-trt-repo-ubuntu1604-4.0.1-ga-cuda9.0 && \ 39 | apt-get update && \ 40 | apt-get install --no-install-recommends \ 41 | libnvinfer4=${TF_TENSORRT_VERSION}-1+cuda9.0 && \ 42 | apt-get clean && \ 43 | rm -rf /var/lib/apt/lists/* && \ 44 | rm /usr/lib/x86_64-linux-gnu/libnvinfer_plugin* && \ 45 | rm /usr/lib/x86_64-linux-gnu/libnvcaffe_parser* && \ 46 | rm /usr/lib/x86_64-linux-gnu/libnvparsers* 47 | 48 | # nginx + njs 49 | RUN \ 50 | apt-get update && \ 51 | apt-get -y install --no-install-recommends curl && \ 52 | curl -s http://nginx.org/keys/nginx_signing.key | apt-key add - && \ 53 | echo 'deb http://nginx.org/packages/ubuntu/ xenial nginx' >> /etc/apt/sources.list && \ 54 | apt-get update && \ 55 | apt-get -y install --no-install-recommends nginx nginx-module-njs python3 python3-pip python3-setuptools && \ 56 | apt-get clean 57 | 58 | # cython, falcon, gunicorn, tensorflow-serving 59 | RUN \ 60 | pip3 install --no-cache-dir cython falcon gunicorn gevent requests grpcio protobuf && \ 61 | pip3 install --no-dependencies --no-cache-dir tensorflow-serving-api==1.11.1 62 | 63 | COPY ./ / 64 | 65 | ARG TFS_SHORT_VERSION 66 | ENV SAGEMAKER_TFS_VERSION "${TFS_SHORT_VERSION}" 67 | ENV PATH "$PATH:/sagemaker" 68 | -------------------------------------------------------------------------------- /docker/1.12/Dockerfile.gpu: -------------------------------------------------------------------------------- 1 | ARG TFS_VERSION 2 | 3 | FROM tensorflow/serving:${TFS_VERSION}-gpu as tfs 4 | FROM nvidia/cuda:9.0-base-ubuntu16.04 5 | LABEL com.amazonaws.sagemaker.capabilities.accept-bind-to-port=true 6 | 7 | COPY --from=tfs /usr/bin/tensorflow_model_server /usr/bin/tensorflow_model_server 8 | 9 | # https://github.com/tensorflow/serving/blob/1.12.0/tensorflow_serving/tools/docker/Dockerfile.gpu 10 | ENV NCCL_VERSION=2.2.13 11 | ENV CUDNN_VERSION=7.2.1.38 12 | ENV TF_TENSORRT_VERSION=4.1.2 13 | 14 | RUN \ 15 | apt-get update && apt-get install -y --no-install-recommends \ 16 | ca-certificates \ 17 | cuda-command-line-tools-9-0 \ 18 | cuda-command-line-tools-9-0 \ 19 | cuda-cublas-9-0 \ 20 | cuda-cufft-9-0 \ 21 | cuda-curand-9-0 \ 22 | cuda-cusolver-9-0 \ 23 | cuda-cusparse-9-0 \ 24 | libcudnn7=${CUDNN_VERSION}-1+cuda9.0 \ 25 | libnccl2=${NCCL_VERSION}-1+cuda9.0 \ 26 | libgomp1 && \ 27 | apt-get clean && \ 28 | rm -rf /var/lib/apt/lists/* 29 | 30 | # The 'apt-get install' of nvinfer-runtime-trt-repo-ubuntu1604-4.0.1-ga-cuda9.0 31 | # adds a new list which contains libnvinfer library, so it needs another 32 | # 'apt-get update' to retrieve that list before it can actually install the 33 | # library. 34 | # We don't install libnvinfer-dev since we don't need to build against TensorRT, 35 | # and libnvinfer4 doesn't contain libnvinfer.a static library. 36 | RUN apt-get update && \ 37 | apt-get install --no-install-recommends \ 38 | nvinfer-runtime-trt-repo-ubuntu1604-4.0.1-ga-cuda9.0 && \ 39 | apt-get update && \ 40 | apt-get install --no-install-recommends \ 41 | libnvinfer4=${TF_TENSORRT_VERSION}-1+cuda9.0 && \ 42 | apt-get clean && \ 43 | rm -rf /var/lib/apt/lists/* && \ 44 | rm /usr/lib/x86_64-linux-gnu/libnvinfer_plugin* && \ 45 | rm /usr/lib/x86_64-linux-gnu/libnvcaffe_parser* && \ 46 | rm /usr/lib/x86_64-linux-gnu/libnvparsers* 47 | 48 | # nginx + njs 49 | RUN \ 50 | apt-get update && \ 51 | apt-get -y install --no-install-recommends curl && \ 52 | curl -s http://nginx.org/keys/nginx_signing.key | apt-key add - && \ 53 | echo 'deb http://nginx.org/packages/ubuntu/ xenial nginx' >> /etc/apt/sources.list && \ 54 | apt-get update && \ 55 | apt-get -y install --no-install-recommends nginx nginx-module-njs python3 python3-pip python3-setuptools && \ 56 | apt-get clean 57 | 58 | # cython, falcon, gunicorn, tensorflow-serving 59 | RUN \ 60 | pip3 install --no-cache-dir cython falcon gunicorn gevent requests grpcio protobuf && \ 61 | pip3 install --no-dependencies --no-cache-dir tensorflow-serving-api==1.12.0 62 | 63 | COPY ./ / 64 | 65 | 66 | ARG TFS_SHORT_VERSION 67 | ENV SAGEMAKER_TFS_VERSION "${TFS_SHORT_VERSION}" 68 | ENV PATH "$PATH:/sagemaker" 69 | -------------------------------------------------------------------------------- /.pylintrc: -------------------------------------------------------------------------------- 1 | [MASTER] 2 | 3 | ignore= 4 | tensorflow_serving, 5 | tensorflow-2.1, 6 | tensorflow-2.2 7 | 8 | [MESSAGES CONTROL] 9 | 10 | disable= 11 | C, # convention 12 | R, # refactor 13 | too-many-arguments, # We should fix the offending ones soon. 14 | too-many-lines, # Some files are too big, we should fix this too 15 | too-few-public-methods, 16 | too-many-instance-attributes, 17 | too-many-locals, 18 | len-as-condition, # Nice to have in the future 19 | bad-indentation, 20 | line-too-long, # We let Flake8 take care of this 21 | logging-format-interpolation, 22 | useless-object-inheritance, # We still support python2 so inheriting from object is ok 23 | invalid-name, 24 | import-error, 25 | logging-not-lazy, 26 | fixme, 27 | no-self-use, 28 | attribute-defined-outside-init, 29 | protected-access, 30 | invalid-all-object, 31 | arguments-differ, 32 | abstract-method, 33 | signature-differs, 34 | raise-missing-from 35 | 36 | [REPORTS] 37 | # Set the output format. Available formats are text, parseable, colorized, msvs 38 | # (visual studio) and html 39 | output-format=colorized 40 | 41 | # Tells whether to display a full report or only the messages 42 | # CHANGE: No report. 43 | reports=no 44 | 45 | [FORMAT] 46 | # Maximum number of characters on a single line. 47 | max-line-length=100 48 | # Maximum number of lines in a module 49 | #max-module-lines=1000 50 | # String used as indentation unit. This is usually " " (4 spaces) or "\t" (1 tab). 51 | indent-string=' ' 52 | 53 | [BASIC] 54 | 55 | # Required attributes for module, separated by a comma 56 | #required-attributes= 57 | # List of builtins function names that should not be used, separated by a comma. 58 | # XXX: Should we ban map() & filter() for list comprehensions? 59 | # exit & quit are for the interactive interpreter shell only. 60 | # https://docs.python.org/3/library/constants.html#constants-added-by-the-site-module 61 | bad-functions= 62 | apply, 63 | exit, 64 | input, 65 | quit, 66 | 67 | [SIMILARITIES] 68 | # Minimum lines number of a similarity. 69 | min-similarity-lines=5 70 | # Ignore comments when computing similarities. 71 | ignore-comments=yes 72 | # Ignore docstrings when computing similarities. 73 | ignore-docstrings=yes 74 | 75 | [VARIABLES] 76 | # Tells whether we should check for unused import in __init__ files. 77 | init-import=no 78 | # A regular expression matching the beginning of the name of dummy variables 79 | # (i.e. not used). 80 | dummy-variables-rgx=_|unused_ 81 | 82 | # List of additional names supposed to be defined in builtins. Remember that 83 | # you should avoid to define new builtins when possible. 84 | #additional-builtins= 85 | 86 | [LOGGING] 87 | # Apply logging string format checks to calls on these modules. 88 | logging-modules= 89 | logging 90 | 91 | [TYPECHECK] 92 | ignored-modules= 93 | distutils 94 | -------------------------------------------------------------------------------- /test/integration/local/test_tfs_batching.py: -------------------------------------------------------------------------------- 1 | # Copyright 2019-2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"). You 4 | # may not use this file except in compliance with the License. A copy of 5 | # the License is located at 6 | # 7 | # http://aws.amazon.com/apache2.0/ 8 | # 9 | # or in the "license" file accompanying this file. This file is 10 | # distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF 11 | # ANY KIND, either express or implied. See the License for the specific 12 | # language governing permissions and limitations under the License. 13 | 14 | import os 15 | import subprocess 16 | 17 | import pytest 18 | 19 | 20 | @pytest.fixture(scope="session", autouse=True) 21 | def volume(): 22 | try: 23 | model_dir = os.path.abspath("test/resources/models") 24 | subprocess.check_call( 25 | "docker volume create --name batching_model_volume --opt type=none " 26 | "--opt device={} --opt o=bind".format(model_dir).split()) 27 | yield model_dir 28 | finally: 29 | subprocess.check_call("docker volume rm batching_model_volume".split()) 30 | 31 | 32 | def test_run_tfs_with_batching_parameters(docker_base_name, tag, runtime_config): 33 | try: 34 | command = ( 35 | "docker run {}--name sagemaker-tensorflow-serving-test -p 8080:8080" 36 | " --mount type=volume,source=batching_model_volume,target=/opt/ml/model,readonly" 37 | " -e SAGEMAKER_TFS_ENABLE_BATCHING=true" 38 | " -e SAGEMAKER_TFS_MAX_BATCH_SIZE=16" 39 | " -e SAGEMAKER_TFS_BATCH_TIMEOUT_MICROS=500" 40 | " -e SAGEMAKER_TFS_NUM_BATCH_THREADS=100" 41 | " -e SAGEMAKER_TFS_MAX_ENQUEUED_BATCHES=1" 42 | " -e SAGEMAKER_TFS_NGINX_LOGLEVEL=info" 43 | " -e SAGEMAKER_BIND_TO_PORT=8080" 44 | " -e SAGEMAKER_SAFE_PORT_RANGE=9000-9999" 45 | " {}:{} serve" 46 | ).format(runtime_config, docker_base_name, tag) 47 | 48 | proc = subprocess.Popen(command.split(), stdout=subprocess.PIPE, stderr=subprocess.STDOUT) 49 | 50 | lines_seen = { 51 | "max_batch_size { value: 16 }": 0, 52 | "batch_timeout_micros { value: 500 }": 0, 53 | "num_batch_threads { value: 100 }": 0, 54 | "max_enqueued_batches { value: 1 }": 0 55 | } 56 | 57 | for stdout_line in iter(proc.stdout.readline, ""): 58 | stdout_line = str(stdout_line) 59 | for line in lines_seen.keys(): 60 | if line in stdout_line: 61 | lines_seen[line] += 1 62 | if "Entering the event loop" in stdout_line: 63 | for value in lines_seen.values(): 64 | assert value == 1 65 | break 66 | 67 | finally: 68 | subprocess.check_call("docker rm -f sagemaker-tensorflow-serving-test".split()) 69 | -------------------------------------------------------------------------------- /test/integration/sagemaker/test_ei.py: -------------------------------------------------------------------------------- 1 | # Copyright 2018-2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"). You 4 | # may not use this file except in compliance with the License. A copy of 5 | # the License is located at 6 | # 7 | # http://aws.amazon.com/apache2.0/ 8 | # 9 | # or in the "license" file accompanying this file. This file is 10 | # distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF 11 | # ANY KIND, either express or implied. See the License for the specific 12 | # language governing permissions and limitations under the License. 13 | import os 14 | import random 15 | 16 | import pytest 17 | 18 | import util 19 | 20 | EI_SUPPORTED_REGIONS = ["us-east-1", "us-east-2", "us-west-2", 21 | "eu-west-1", "ap-northeast-1", "ap-northeast-2"] 22 | 23 | 24 | @pytest.fixture(params=os.environ["TEST_EI_VERSIONS"].split(",")) 25 | def version(request): 26 | return request.param 27 | 28 | 29 | @pytest.fixture 30 | def repo(request): 31 | return request.config.getoption("--repo") or "sagemaker-tensorflow-serving-eia" 32 | 33 | 34 | @pytest.fixture 35 | def tag(request, version): 36 | return request.config.getoption("--tag") or f"{version}-cpu" 37 | 38 | 39 | @pytest.fixture 40 | def image_uri(registry, region, repo, tag): 41 | return util.image_uri(registry, region, repo, tag) 42 | 43 | 44 | @pytest.fixture(params=os.environ["TEST_EI_INSTANCE_TYPES"].split(",")) 45 | def instance_type(request, region): 46 | return request.param 47 | 48 | 49 | @pytest.fixture(scope="module") 50 | def accelerator_type(request): 51 | return request.config.getoption("--accelerator-type") or "ml.eia1.medium" 52 | 53 | 54 | @pytest.fixture(scope="session") 55 | def model_data(region): 56 | return ("s3://sagemaker-sample-data-{}/tensorflow/model" 57 | "/resnet/resnet_50_v2_fp32_NCHW.tar.gz").format(region) 58 | 59 | 60 | @pytest.fixture 61 | def input_data(): 62 | return {"instances": [[[[random.random() for _ in range(3)] for _ in range(3)]]]} 63 | 64 | 65 | @pytest.fixture 66 | def skip_if_no_accelerator(accelerator_type): 67 | if accelerator_type is None: 68 | pytest.skip("Skipping because accelerator type was not provided") 69 | 70 | 71 | @pytest.fixture 72 | def skip_if_non_supported_ei_region(region): 73 | if region not in EI_SUPPORTED_REGIONS: 74 | pytest.skip("EI is not supported in {}".format(region)) 75 | 76 | 77 | @pytest.mark.skip_if_non_supported_ei_region() 78 | @pytest.mark.skip_if_no_accelerator() 79 | def test_invoke_endpoint(boto_session, sagemaker_client, sagemaker_runtime_client, 80 | model_name, model_data, image_uri, instance_type, accelerator_type, 81 | input_data): 82 | util.create_and_invoke_endpoint(boto_session, sagemaker_client, 83 | sagemaker_runtime_client, model_name, model_data, image_uri, 84 | instance_type, accelerator_type, input_data) 85 | -------------------------------------------------------------------------------- /docker/1.14/Dockerfile.cpu: -------------------------------------------------------------------------------- 1 | FROM ubuntu:18.04 2 | 3 | LABEL maintainer="Amazon AI" 4 | LABEL com.amazonaws.sagemaker.capabilities.accept-bind-to-port=true 5 | 6 | ARG PYTHON=python3 7 | ARG PIP=pip3 8 | ARG TFS_SHORT_VERSION=1.14 9 | 10 | # See http://bugs.python.org/issue19846 11 | ENV LANG=C.UTF-8 12 | # Python won’t try to write .pyc or .pyo files on the import of source modules 13 | ENV PYTHONDONTWRITEBYTECODE=1 14 | ENV PYTHONUNBUFFERED=1 15 | ENV SAGEMAKER_TFS_VERSION="${TFS_SHORT_VERSION}" 16 | ENV PATH="$PATH:/sagemaker" 17 | ENV LD_LIBRARY_PATH='/usr/local/lib:$LD_LIBRARY_PATH' 18 | ENV MODEL_BASE_PATH=/models 19 | # The only required piece is the model name in order to differentiate endpoints 20 | ENV MODEL_NAME=model 21 | 22 | # nginx + njs 23 | RUN apt-get update \ 24 | && apt-get -y install --no-install-recommends curl gnupg2 ca-certificates git wget vim build-essential zlib1g-dev \ 25 | && curl -s http://nginx.org/keys/nginx_signing.key | apt-key add - \ 26 | && echo 'deb http://nginx.org/packages/ubuntu/ bionic nginx' >> /etc/apt/sources.list \ 27 | && apt-get update \ 28 | && apt-get -y install --no-install-recommends nginx nginx-module-njs python3 python3-pip python3-setuptools \ 29 | && apt-get clean \ 30 | && rm -rf /var/lib/apt/lists/* 31 | 32 | RUN ${PIP} --no-cache-dir install --upgrade pip setuptools 33 | 34 | # cython, falcon, gunicorn, grpc 35 | RUN ${PIP} install --no-cache-dir \ 36 | awscli==1.16.196 \ 37 | cython==0.29.12 \ 38 | falcon==2.0.0 \ 39 | gunicorn==19.9.0 \ 40 | gevent==1.4.0 \ 41 | requests==2.22.0 \ 42 | grpcio==1.24.1 \ 43 | protobuf==3.10.0 \ 44 | # using --no-dependencies to avoid installing tensorflow binary 45 | && ${PIP} install --no-dependencies --no-cache-dir \ 46 | tensorflow-serving-api==1.14.0 47 | 48 | COPY ./ / 49 | 50 | # Some TF tools expect a "python" binary 51 | RUN ln -s $(which ${PYTHON}) /usr/local/bin/python 52 | 53 | RUN curl https://tensorflow-aws.s3-us-west-2.amazonaws.com/MKL-Libraries/libiomp5.so -o /usr/local/lib/libiomp5.so 54 | RUN curl https://tensorflow-aws.s3-us-west-2.amazonaws.com/MKL-Libraries/libmklml_intel.so -o /usr/local/lib/libmklml_intel.so 55 | 56 | RUN curl https://tensorflow-aws.s3-us-west-2.amazonaws.com/1.14/Serving/CPU-WITH-MKL/tensorflow_model_server -o tensorflow_model_server && \ 57 | chmod 555 tensorflow_model_server && cp tensorflow_model_server /usr/bin/tensorflow_model_server && \ 58 | rm -f tensorflow_model_server 59 | 60 | # Expose ports 61 | # gRPC and REST 62 | EXPOSE 8500 8501 63 | 64 | # Set where models should be stored in the container 65 | RUN mkdir -p ${MODEL_BASE_PATH} 66 | 67 | # Create a script that runs the model server so we can use environment variables 68 | # while also passing in arguments from the docker command line 69 | RUN echo '#!/bin/bash \n\n' > /usr/bin/tf_serving_entrypoint.sh \ 70 | && echo '/usr/bin/tensorflow_model_server --port=8500 --rest_api_port=8501 --model_name=${MODEL_NAME} --model_base_path=${MODEL_BASE_PATH}/${MODEL_NAME} "$@"' >> /usr/bin/tf_serving_entrypoint.sh \ 71 | && chmod +x /usr/bin/tf_serving_entrypoint.sh 72 | 73 | CMD ["/usr/bin/tf_serving_entrypoint.sh"] 74 | -------------------------------------------------------------------------------- /docker/1.13/Dockerfile.cpu: -------------------------------------------------------------------------------- 1 | FROM ubuntu:18.04 2 | 3 | LABEL maintainer="Amazon AI" 4 | LABEL com.amazonaws.sagemaker.capabilities.accept-bind-to-port=true 5 | 6 | ARG PYTHON=python3 7 | ARG PIP=pip3 8 | ARG TFS_SHORT_VERSION=1.13 9 | 10 | # See http://bugs.python.org/issue19846 11 | ENV LANG C.UTF-8 12 | # Python won’t try to write .pyc or .pyo files on the import of source modules 13 | ENV PYTHONDONTWRITEBYTECODE=1 14 | ENV PYTHONUNBUFFERED=1 15 | ENV SAGEMAKER_TFS_VERSION="${TFS_SHORT_VERSION}" 16 | ENV PATH="$PATH:/sagemaker" 17 | ENV LD_LIBRARY_PATH='/usr/local/lib:$LD_LIBRARY_PATH' 18 | ENV MODEL_BASE_PATH=/models 19 | # The only required piece is the model name in order to differentiate endpoints 20 | ENV MODEL_NAME=model 21 | 22 | # nginx + njs 23 | RUN apt-get update \ 24 | && apt-get -y install --no-install-recommends curl gnupg2 ca-certificates git wget vim build-essential zlib1g-dev \ 25 | && curl -s http://nginx.org/keys/nginx_signing.key | apt-key add - \ 26 | && echo 'deb http://nginx.org/packages/ubuntu/ bionic nginx' >> /etc/apt/sources.list \ 27 | && apt-get update \ 28 | && apt-get -y install --no-install-recommends nginx nginx-module-njs python3 python3-pip python3-setuptools \ 29 | && apt-get clean \ 30 | && rm -rf /var/lib/apt/lists/* 31 | 32 | RUN ${PIP} --no-cache-dir install --upgrade pip setuptools 33 | 34 | # cython, falcon, gunicorn, grpc 35 | RUN ${PIP} install -U --no-cache-dir \ 36 | awscli==1.16.130 \ 37 | cython==0.29.10 \ 38 | falcon==2.0.0 \ 39 | gunicorn==19.9.0 \ 40 | gevent==1.4.0 \ 41 | requests==2.21.0 \ 42 | grpcio==1.24.1 \ 43 | protobuf==3.10.0 \ 44 | # using --no-dependencies to avoid installing tensorflow binary 45 | && ${PIP} install --no-dependencies --no-cache-dir \ 46 | tensorflow-serving-api==1.13.0 47 | 48 | COPY ./ / 49 | 50 | # Some TF tools expect a "python" binary 51 | RUN ln -s $(which ${PYTHON}) /usr/local/bin/python \ 52 | && ln -s /usr/local/bin/pip3 /usr/bin/pip 53 | 54 | RUN curl https://s3-us-west-2.amazonaws.com/tensorflow-aws/1.13/Serving/CPU-WITH-MKL/libiomp5.so -o /usr/local/lib/libiomp5.so 55 | RUN curl https://s3-us-west-2.amazonaws.com/tensorflow-aws/1.13/Serving/CPU-WITH-MKL/libmklml_intel.so -o /usr/local/lib/libmklml_intel.so 56 | 57 | RUN curl https://s3-us-west-2.amazonaws.com/tensorflow-aws/1.13/Serving/CPU-WITH-MKL/tensorflow_model_server -o tensorflow_model_server \ 58 | && chmod 555 tensorflow_model_server \ 59 | && cp tensorflow_model_server /usr/bin/tensorflow_model_server \ 60 | && rm -f tensorflow_model_server 61 | 62 | # Expose ports 63 | # gRPC and REST 64 | EXPOSE 8500 8501 65 | 66 | # Set where models should be stored in the container 67 | RUN mkdir -p ${MODEL_BASE_PATH} 68 | 69 | # Create a script that runs the model server so we can use environment variables 70 | # while also passing in arguments from the docker command line 71 | RUN echo '#!/bin/bash \n\n' > /usr/bin/tf_serving_entrypoint.sh \ 72 | && echo '/usr/bin/tensorflow_model_server --port=8500 --rest_api_port=8501 --model_name=${MODEL_NAME} --model_base_path=${MODEL_BASE_PATH}/${MODEL_NAME} "$@"' >> /usr/bin/tf_serving_entrypoint.sh \ 73 | && chmod +x /usr/bin/tf_serving_entrypoint.sh 74 | 75 | CMD ["/usr/bin/tf_serving_entrypoint.sh"] 76 | -------------------------------------------------------------------------------- /scripts/shared.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # 3 | # Utility functions for build/test scripts. 4 | 5 | function error() { 6 | >&2 echo $1 7 | >&2 echo "usage: $0 [--version ] [--arch (cpu*|gpu|eia)] [--region ]" 8 | exit 1 9 | } 10 | 11 | function get_default_region() { 12 | if [ -n "${AWS_DEFAULT_REGION:-}" ]; then 13 | echo "$AWS_DEFAULT_REGION" 14 | else 15 | aws configure get region 16 | fi 17 | } 18 | 19 | function get_full_version() { 20 | echo $1 | sed 's#^\([0-9][0-9]*\.[0-9][0-9]*\)$#\1.0#' 21 | } 22 | 23 | function get_short_version() { 24 | echo $1 | sed 's#\([0-9][0-9]*\.[0-9][0-9]*\)\.[0-9][0-9]*#\1#' 25 | } 26 | 27 | function get_aws_account() { 28 | aws --region $AWS_DEFAULT_REGION sts --endpoint-url https://sts.$AWS_DEFAULT_REGION.amazonaws.com get-caller-identity --query 'Account' --output text 29 | } 30 | 31 | function get_ei_executable() { 32 | [[ $arch != 'eia' ]] && return 33 | 34 | if [[ -z $(aws s3 ls 's3://amazonei-tensorflow/tensorflow-serving/v'${short_version}'/ubuntu/latest/') ]]; then 35 | echo 'ERROR: cannot find this version in S3 bucket.' 36 | exit 1 37 | fi 38 | 39 | tmpdir=$(mktemp -d) 40 | tar_file=$(aws s3 ls "s3://amazonei-tensorflow/tensorflow-serving/v${short_version}/ubuntu/latest/" | awk '{print $4}') 41 | aws s3 cp "s3://amazonei-tensorflow/tensorflow-serving/v${short_version}/ubuntu/latest/${tar_file}" "$tmpdir/$tar_file" 42 | 43 | tar -C "$tmpdir" -xf "$tmpdir/$tar_file" 44 | 45 | find "$tmpdir" -name amazonei_tensorflow_model_server -exec mv {} docker/build_artifacts/ \; 46 | rm -rf "$tmpdir" 47 | } 48 | 49 | function remove_ei_executable() { 50 | [[ $arch != 'eia' ]] && return 51 | 52 | rm docker/build_artifacts/amazonei_tensorflow_model_server 53 | } 54 | 55 | function get_device_type() { 56 | if [[ $1 = 'eia' ]]; then 57 | echo 'cpu' 58 | else 59 | echo $1 60 | fi 61 | } 62 | 63 | function parse_std_args() { 64 | # defaults 65 | arch='cpu' 66 | version='1.13.0' 67 | repository='sagemaker-tensorflow-serving' 68 | 69 | aws_region=$(get_default_region) 70 | aws_account=$(get_aws_account) 71 | 72 | while [[ $# -gt 0 ]]; do 73 | key="$1" 74 | 75 | case $key in 76 | -v|--version) 77 | version="$2" 78 | shift 79 | shift 80 | ;; 81 | -a|--arch) 82 | arch="$2" 83 | shift 84 | shift 85 | ;; 86 | -r|--region) 87 | aws_region="$2" 88 | shift 89 | shift 90 | ;; 91 | -p|--repository) 92 | repository="$2" 93 | shift 94 | shift 95 | ;; 96 | *) # unknown option 97 | error "unknown option: $1" 98 | shift 99 | ;; 100 | esac 101 | done 102 | 103 | [[ -z "${version// }" ]] && error 'missing version' 104 | [[ "$arch" =~ ^(cpu|gpu|eia)$ ]] || error "invalid arch: $arch" 105 | [[ -z "${aws_region// }" ]] && error 'missing aws region' 106 | 107 | [[ "$arch" = eia ]] && repository=$repository'-'$arch 108 | 109 | full_version=$(get_full_version $version) 110 | short_version=$(get_short_version $version) 111 | device=$(get_device_type $arch) 112 | 113 | true 114 | } 115 | -------------------------------------------------------------------------------- /docker/2.0/Dockerfile.cpu: -------------------------------------------------------------------------------- 1 | FROM ubuntu:18.04 2 | 3 | LABEL maintainer="Amazon AI" 4 | LABEL com.amazonaws.sagemaker.capabilities.accept-bind-to-port=true 5 | 6 | ARG PYTHON=python3 7 | ARG PIP=pip3 8 | ARG TFS_SHORT_VERSION=2.0.1 9 | ARG TFS_URL=https://tensorflow-aws.s3-us-west-2.amazonaws.com/${TFS_SHORT_VERSION}/Serving/CPU-WITH-MKL/tensorflow_model_server 10 | 11 | # See http://bugs.python.org/issue19846 12 | ENV LANG=C.UTF-8 13 | # Python won’t try to write .pyc or .pyo files on the import of source modules 14 | ENV PYTHONDONTWRITEBYTECODE=1 15 | ENV PYTHONUNBUFFERED=1 16 | ENV SAGEMAKER_TFS_VERSION="${TFS_SHORT_VERSION}" 17 | ENV PATH="$PATH:/sagemaker" 18 | ENV LD_LIBRARY_PATH='/usr/local/lib:$LD_LIBRARY_PATH' 19 | ENV MODEL_BASE_PATH=/models 20 | # The only required piece is the model name in order to differentiate endpoints 21 | ENV MODEL_NAME=model 22 | ENV DEBIAN_FRONTEND=noninteractive 23 | 24 | # nginx + njs 25 | RUN apt-get update \ 26 | && apt-get -y install --no-install-recommends \ 27 | curl \ 28 | gnupg2 \ 29 | ca-certificates \ 30 | git \ 31 | wget \ 32 | vim \ 33 | build-essential \ 34 | zlib1g-dev \ 35 | && curl -s http://nginx.org/keys/nginx_signing.key | apt-key add - \ 36 | && echo 'deb http://nginx.org/packages/ubuntu/ bionic nginx' >> /etc/apt/sources.list \ 37 | && apt-get update \ 38 | && apt-get -y install --no-install-recommends \ 39 | nginx \ 40 | nginx-module-njs \ 41 | python3 \ 42 | python3-pip \ 43 | python3-setuptools \ 44 | && apt-get clean \ 45 | && rm -rf /var/lib/apt/lists/* 46 | 47 | RUN ${PIP} --no-cache-dir install --upgrade pip setuptools 48 | 49 | # cython, falcon, gunicorn, grpc 50 | RUN ${PIP} install --no-cache-dir \ 51 | awscli==1.16.303 \ 52 | cython==0.29.14 \ 53 | falcon==2.0.0 \ 54 | gunicorn==20.0.4 \ 55 | gevent==1.4.0 \ 56 | requests==2.22.0 \ 57 | grpcio==1.26.0 \ 58 | protobuf==3.11.1 \ 59 | # using --no-dependencies to avoid installing tensorflow binary 60 | && ${PIP} install --no-dependencies --no-cache-dir \ 61 | tensorflow-serving-api==2.0 62 | 63 | COPY ./sagemaker /sagemaker 64 | 65 | # Some TF tools expect a "python" binary 66 | RUN ln -s $(which ${PYTHON}) /usr/local/bin/python 67 | 68 | RUN curl https://tensorflow-aws.s3-us-west-2.amazonaws.com/MKL-Libraries/libiomp5.so -o /usr/local/lib/libiomp5.so 69 | RUN curl https://tensorflow-aws.s3-us-west-2.amazonaws.com/MKL-Libraries/libmklml_intel.so -o /usr/local/lib/libmklml_intel.so 70 | 71 | RUN curl $TFS_URL -o /usr/bin/tensorflow_model_server \ 72 | && chmod 555 /usr/bin/tensorflow_model_server 73 | 74 | # Expose ports 75 | # gRPC and REST 76 | EXPOSE 8500 8501 77 | 78 | # Set where models should be stored in the container 79 | RUN mkdir -p ${MODEL_BASE_PATH} 80 | 81 | # Create a script that runs the model server so we can use environment variables 82 | # while also passing in arguments from the docker command line 83 | RUN echo '#!/bin/bash \n\n' > /usr/bin/tf_serving_entrypoint.sh \ 84 | && echo '/usr/bin/tensorflow_model_server --port=8500 --rest_api_port=8501 --model_name=${MODEL_NAME} --model_base_path=${MODEL_BASE_PATH}/${MODEL_NAME} "$@"' >> /usr/bin/tf_serving_entrypoint.sh \ 85 | && chmod +x /usr/bin/tf_serving_entrypoint.sh 86 | 87 | ADD https://raw.githubusercontent.com/aws/aws-deep-learning-containers-utils/master/deep_learning_container.py /usr/local/bin/deep_learning_container.py 88 | 89 | RUN chmod +x /usr/local/bin/deep_learning_container.py 90 | 91 | RUN curl https://aws-dlc-licenses.s3.amazonaws.com/tensorflow-2.0.1/license.txt -o /license.txt 92 | 93 | CMD ["/usr/bin/tf_serving_entrypoint.sh"] 94 | -------------------------------------------------------------------------------- /docker/2.1/Dockerfile.cpu: -------------------------------------------------------------------------------- 1 | FROM public.ecr.aws/ubuntu/ubuntu:18.04 2 | 3 | LABEL maintainer="Amazon AI" 4 | LABEL com.amazonaws.sagemaker.capabilities.accept-bind-to-port=true 5 | LABEL com.amazonaws.sagemaker.capabilities.multi-models=true 6 | 7 | ARG PYTHON=python3 8 | ARG PIP=pip3 9 | ARG TFS_SHORT_VERSION=2.1 10 | ARG TFS_URL=https://tensorflow-aws.s3-us-west-2.amazonaws.com/2.1/Serving/CPU-WITH-MKL/tensorflow_model_server 11 | 12 | # See http://bugs.python.org/issue19846 13 | ENV LANG=C.UTF-8 14 | # Python won’t try to write .pyc or .pyo files on the import of source modules 15 | ENV PYTHONDONTWRITEBYTECODE=1 16 | ENV PYTHONUNBUFFERED=1 17 | ENV SAGEMAKER_TFS_VERSION="${TFS_SHORT_VERSION}" 18 | ENV PATH="$PATH:/sagemaker" 19 | ENV LD_LIBRARY_PATH='/usr/local/lib:$LD_LIBRARY_PATH' 20 | ENV MODEL_BASE_PATH=/models 21 | # The only required piece is the model name in order to differentiate endpoints 22 | ENV MODEL_NAME=model 23 | ENV DEBIAN_FRONTEND=noninteractive 24 | 25 | # nginx + njs 26 | RUN apt-get update \ 27 | && apt-get -y install --no-install-recommends \ 28 | curl \ 29 | gnupg2 \ 30 | ca-certificates \ 31 | git \ 32 | wget \ 33 | vim \ 34 | build-essential \ 35 | zlib1g-dev \ 36 | && curl -s http://nginx.org/keys/nginx_signing.key | apt-key add - \ 37 | && echo 'deb http://nginx.org/packages/ubuntu/ bionic nginx' >> /etc/apt/sources.list \ 38 | && apt-get update \ 39 | && apt-get -y install --no-install-recommends \ 40 | nginx \ 41 | nginx-module-njs \ 42 | python3 \ 43 | python3-pip \ 44 | python3-setuptools \ 45 | && apt-get clean \ 46 | && rm -rf /var/lib/apt/lists/* 47 | 48 | RUN ${PIP} --no-cache-dir install --upgrade pip setuptools 49 | 50 | # cython, falcon, gunicorn, grpc 51 | RUN ${PIP} install --no-cache-dir \ 52 | awscli \ 53 | boto3 \ 54 | cython==0.29.14 \ 55 | falcon==2.0.0 \ 56 | gunicorn==20.0.4 \ 57 | gevent==1.4.0 \ 58 | requests==2.22.0 \ 59 | grpcio==1.27.1 \ 60 | protobuf==3.11.1 \ 61 | # using --no-dependencies to avoid installing tensorflow binary 62 | && ${PIP} install --no-dependencies --no-cache-dir \ 63 | tensorflow-serving-api==2.1.0 64 | 65 | COPY ./sagemaker /sagemaker 66 | 67 | # Some TF tools expect a "python" binary 68 | RUN ln -s $(which ${PYTHON}) /usr/local/bin/python 69 | 70 | RUN curl https://tensorflow-aws.s3-us-west-2.amazonaws.com/MKL-Libraries/libiomp5.so -o /usr/local/lib/libiomp5.so 71 | RUN curl https://tensorflow-aws.s3-us-west-2.amazonaws.com/MKL-Libraries/libmklml_intel.so -o /usr/local/lib/libmklml_intel.so 72 | 73 | RUN curl $TFS_URL -o /usr/bin/tensorflow_model_server \ 74 | && chmod 555 /usr/bin/tensorflow_model_server 75 | 76 | # Expose ports 77 | # gRPC and REST 78 | EXPOSE 8500 8501 79 | 80 | # Set where models should be stored in the container 81 | RUN mkdir -p ${MODEL_BASE_PATH} 82 | 83 | # Create a script that runs the model server so we can use environment variables 84 | # while also passing in arguments from the docker command line 85 | RUN echo '#!/bin/bash \n\n' > /usr/bin/tf_serving_entrypoint.sh \ 86 | && echo '/usr/bin/tensorflow_model_server --port=8500 --rest_api_port=8501 --model_name=${MODEL_NAME} --model_base_path=${MODEL_BASE_PATH}/${MODEL_NAME} "$@"' >> /usr/bin/tf_serving_entrypoint.sh \ 87 | && chmod +x /usr/bin/tf_serving_entrypoint.sh 88 | 89 | ADD https://raw.githubusercontent.com/aws/aws-deep-learning-containers-utils/master/deep_learning_container.py /usr/local/bin/deep_learning_container.py 90 | 91 | RUN chmod +x /usr/local/bin/deep_learning_container.py 92 | 93 | RUN curl https://aws-dlc-licenses.s3.amazonaws.com/tensorflow-2.1/license.txt -o /license.txt 94 | 95 | CMD ["/usr/bin/tf_serving_entrypoint.sh"] 96 | -------------------------------------------------------------------------------- /test/integration/local/test_multi_tfs.py: -------------------------------------------------------------------------------- 1 | # Copyright 2019-2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"). You 4 | # may not use this file except in compliance with the License. A copy of 5 | # the License is located at 6 | # 7 | # http://aws.amazon.com/apache2.0/ 8 | # 9 | # or in the "license" file accompanying this file. This file is 10 | # distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF 11 | # ANY KIND, either express or implied. See the License for the specific 12 | # language governing permissions and limitations under the License. 13 | 14 | import json 15 | import os 16 | import subprocess 17 | import sys 18 | import time 19 | 20 | import pytest 21 | import requests 22 | 23 | BASE_URL = "http://localhost:8080/invocations" 24 | 25 | 26 | @pytest.fixture(scope="session", autouse=True) 27 | def volume(): 28 | try: 29 | model_dir = os.path.abspath("test/resources/models") 30 | subprocess.check_call( 31 | "docker volume create --name multi_tfs_model_volume --opt type=none " 32 | "--opt device={} --opt o=bind".format(model_dir).split()) 33 | yield model_dir 34 | finally: 35 | subprocess.check_call("docker volume rm multi_tfs_model_volume".split()) 36 | 37 | 38 | @pytest.fixture(scope="module", autouse=True, params=[True, False]) 39 | def container(request, docker_base_name, tag, runtime_config): 40 | try: 41 | if request.param: 42 | batching_config = " -e SAGEMAKER_TFS_ENABLE_BATCHING=true" 43 | else: 44 | batching_config = "" 45 | command = ( 46 | "docker run {}--name sagemaker-tensorflow-serving-test -p 8080:8080" 47 | " --mount type=volume,source=multi_tfs_model_volume,target=/opt/ml/model,readonly" 48 | " -e SAGEMAKER_TFS_NGINX_LOGLEVEL=info" 49 | " -e SAGEMAKER_BIND_TO_PORT=8080" 50 | " -e SAGEMAKER_SAFE_PORT_RANGE=9000-9999" 51 | " -e SAGEMAKER_TFS_INSTANCE_COUNT=2" 52 | " -e SAGEMAKER_GUNICORN_WORKERS=4" 53 | " -e SAGEMAKER_TFS_INTER_OP_PARALLELISM=1" 54 | " -e SAGEMAKER_TFS_INTRA_OP_PARALLELISM=1" 55 | " {}" 56 | " {}:{} serve" 57 | ).format(runtime_config, batching_config, docker_base_name, tag) 58 | 59 | proc = subprocess.Popen(command.split(), stdout=sys.stdout, stderr=subprocess.STDOUT) 60 | 61 | attempts = 0 62 | 63 | while attempts < 40: 64 | time.sleep(3) 65 | try: 66 | res_code = requests.get("http://localhost:8080/ping").status_code 67 | if res_code == 200: 68 | break 69 | except: 70 | attempts += 1 71 | pass 72 | 73 | yield proc.pid 74 | finally: 75 | subprocess.check_call("docker rm -f sagemaker-tensorflow-serving-test".split()) 76 | 77 | 78 | def make_request(data, content_type="application/json", method="predict", version=None): 79 | custom_attributes = "tfs-model-name=half_plus_three,tfs-method={}".format(method) 80 | if version: 81 | custom_attributes += ",tfs-model-version={}".format(version) 82 | 83 | headers = { 84 | "Content-Type": content_type, 85 | "X-Amzn-SageMaker-Custom-Attributes": custom_attributes, 86 | } 87 | response = requests.post(BASE_URL, data=data, headers=headers) 88 | return json.loads(response.content.decode("utf-8")) 89 | 90 | 91 | def test_predict(): 92 | x = { 93 | "instances": [1.0, 2.0, 5.0] 94 | } 95 | 96 | y = make_request(json.dumps(x)) 97 | assert y == {"predictions": [3.5, 4.0, 5.5]} 98 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing Guidelines 2 | 3 | Thank you for your interest in contributing to our project. Whether it's a bug report, new feature, correction, or additional 4 | documentation, we greatly value feedback and contributions from our community. 5 | 6 | Please read through this document before submitting any issues or pull requests to ensure we have all the necessary 7 | information to effectively respond to your bug report or contribution. 8 | 9 | 10 | ## Reporting Bugs/Feature Requests 11 | 12 | We welcome you to use the GitHub issue tracker to report bugs or suggest features. 13 | 14 | When filing an issue, please check [existing open](https://github.com/aws/sagemaker-tfs-container/issues), or [recently closed](https://github.com/aws/sagemaker-tfs-container/issues?utf8=%E2%9C%93&q=is%3Aissue%20is%3Aclosed%20), issues to make sure somebody else hasn't already 15 | reported the issue. Please try to include as much information as you can. Details like these are incredibly useful: 16 | 17 | * A reproducible test case or series of steps 18 | * The version of our code being used 19 | * Any modifications you've made relevant to the bug 20 | * Anything unusual about your environment or deployment 21 | 22 | 23 | ## Contributing via Pull Requests 24 | Contributions via pull requests are much appreciated. Before sending us a pull request, please ensure that: 25 | 26 | 1. You are working against the latest source on the *master* branch. 27 | 2. You check existing open, and recently merged, pull requests to make sure someone else hasn't addressed the problem already. 28 | 3. You open an issue to discuss any significant work - we would hate for your time to be wasted. 29 | 30 | To send us a pull request, please: 31 | 32 | 1. Fork the repository. 33 | 2. Modify the source; please focus on the specific change you are contributing. If you also reformat all the code, it will be hard for us to focus on your change. 34 | 3. Ensure local tests pass. 35 | 4. Commit to your fork using clear commit messages. 36 | 5. Send us a pull request, answering any default questions in the pull request interface. 37 | 6. Pay attention to any automated CI failures reported in the pull request, and stay involved in the conversation. 38 | 39 | GitHub provides additional document on [forking a repository](https://help.github.com/articles/fork-a-repo/) and 40 | [creating a pull request](https://help.github.com/articles/creating-a-pull-request/). 41 | 42 | 43 | ## Finding contributions to work on 44 | Looking at the existing issues is a great way to find something to contribute on. As our projects, by default, use the default GitHub issue labels (enhancement/bug/duplicate/help wanted/invalid/question/wontfix), looking at any ['help wanted'](https://github.com/aws/sagemaker-tfs-container/labels/help%20wanted) issues is a great place to start. 45 | 46 | 47 | ## Code of Conduct 48 | This project has adopted the [Amazon Open Source Code of Conduct](https://aws.github.io/code-of-conduct). 49 | For more information see the [Code of Conduct FAQ](https://aws.github.io/code-of-conduct-faq) or contact 50 | opensource-codeofconduct@amazon.com with any additional questions or comments. 51 | 52 | 53 | ## Security issue notifications 54 | If you discover a potential security issue in this project we ask that you notify AWS/Amazon Security via our [vulnerability reporting page](http://aws.amazon.com/security/vulnerability-reporting/). Please do **not** create a public github issue. 55 | 56 | 57 | ## Licensing 58 | 59 | See the [LICENSE](https://github.com/aws/sagemaker-tfs-container/blob/master/LICENSE) file for our project's licensing. We will ask you to confirm the licensing of your contribution. 60 | 61 | We may ask you to sign a [Contributor License Agreement (CLA)](http://en.wikipedia.org/wiki/Contributor_License_Agreement) for larger changes. 62 | -------------------------------------------------------------------------------- /docker/build_artifacts/deep_learning_container.py: -------------------------------------------------------------------------------- 1 | # Copyright 2018-2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"). You 4 | # may not use this file except in compliance with the License. A copy of 5 | # the License is located at 6 | # 7 | # http://aws.amazon.com/apache2.0/ 8 | # 9 | # or in the "license" file accompanying this file. This file is 10 | # distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF 11 | # ANY KIND, either express or implied. See the License for the specific 12 | # language governing permissions and limitations under the License. 13 | import re 14 | import json 15 | import logging 16 | import requests 17 | 18 | 19 | def _validate_instance_id(instance_id): 20 | """ 21 | Validate instance ID 22 | """ 23 | instance_id_regex = r"^(i-\S{17})" 24 | compiled_regex = re.compile(instance_id_regex) 25 | match = compiled_regex.match(instance_id) 26 | 27 | if not match: 28 | return None 29 | 30 | return match.group(1) 31 | 32 | 33 | def _retrieve_instance_id(): 34 | """ 35 | Retrieve instance ID from instance metadata service 36 | """ 37 | instance_id = None 38 | url = "http://169.254.169.254/latest/meta-data/instance-id" 39 | response = requests_helper(url, timeout=0.1) 40 | 41 | if response is not None: 42 | instance_id = _validate_instance_id(response.text) 43 | 44 | return instance_id 45 | 46 | 47 | def _retrieve_instance_region(): 48 | """ 49 | Retrieve instance region from instance metadata service 50 | """ 51 | region = None 52 | valid_regions = [ 53 | "ap-northeast-1", 54 | "ap-northeast-2", 55 | "ap-southeast-1", 56 | "ap-southeast-2", 57 | "ap-south-1", 58 | "ca-central-1", 59 | "eu-central-1", 60 | "eu-north-1", 61 | "eu-west-1", 62 | "eu-west-2", 63 | "eu-west-3", 64 | "sa-east-1", 65 | "us-east-1", 66 | "us-east-2", 67 | "us-west-1", 68 | "us-west-2", 69 | ] 70 | 71 | url = "http://169.254.169.254/latest/dynamic/instance-identity/document" 72 | response = requests_helper(url, timeout=0.1) 73 | 74 | if response is not None: 75 | response_json = json.loads(response.text) 76 | 77 | if response_json["region"] in valid_regions: 78 | region = response_json["region"] 79 | 80 | return region 81 | 82 | 83 | def query_bucket(): 84 | """ 85 | GET request on an empty object from an Amazon S3 bucket 86 | """ 87 | response = None 88 | instance_id = _retrieve_instance_id() 89 | region = _retrieve_instance_region() 90 | 91 | if instance_id is not None and region is not None: 92 | url = ( 93 | "https://aws-deep-learning-containers-{0}.s3.{0}.amazonaws.com" 94 | "/dlc-containers.txt?x-instance-id={1}".format(region, instance_id) 95 | ) 96 | response = requests_helper(url, timeout=0.2) 97 | 98 | logging.debug("Query bucket finished: {}".format(response)) 99 | 100 | return response 101 | 102 | 103 | def requests_helper(url, timeout): 104 | response = None 105 | try: 106 | response = requests.get(url, timeout=timeout) 107 | except requests.exceptions.RequestException as e: 108 | logging.error("Request exception: {}".format(e)) 109 | 110 | return response 111 | 112 | 113 | def main(): 114 | """ 115 | Invoke bucket query 116 | """ 117 | # Logs are not necessary for normal run. Remove this line while debugging. 118 | logging.getLogger().disabled = True 119 | 120 | logging.basicConfig(level=logging.ERROR) 121 | query_bucket() 122 | 123 | 124 | if __name__ == "__main__": 125 | main() 126 | -------------------------------------------------------------------------------- /docker/1.15/Dockerfile.cpu: -------------------------------------------------------------------------------- 1 | FROM public.ecr.aws/ubuntu/ubuntu:18.04 2 | 3 | LABEL maintainer="Amazon AI" 4 | # Specify LABEL for inference pipelines to use SAGEMAKER_BIND_TO_PORT 5 | # https://docs.aws.amazon.com/sagemaker/latest/dg/inference-pipeline-real-time.html 6 | LABEL com.amazonaws.sagemaker.capabilities.accept-bind-to-port=true 7 | LABEL com.amazonaws.sagemaker.capabilities.multi-models=true 8 | 9 | # Add arguments to achieve the version, python and url 10 | ARG PYTHON=python3 11 | ARG PIP=pip3 12 | ARG TFS_SHORT_VERSION=1.15.2 13 | ARG TF_S3_URL=https://tensorflow-aws.s3-us-west-2.amazonaws.com 14 | ARG TF_MODEL_SERVER_SOURCE=${TF_S3_URL}/${TFS_SHORT_VERSION}/Serving/CPU-WITH-MKL/tensorflow_model_server 15 | 16 | # See http://bugs.python.org/issue19846 17 | ENV LANG=C.UTF-8 18 | # Python won’t try to write .pyc or .pyo files on the import of source modules 19 | ENV PYTHONDONTWRITEBYTECODE=1 20 | ENV PYTHONUNBUFFERED=1 21 | ENV SAGEMAKER_TFS_VERSION="${TFS_SHORT_VERSION}" 22 | ENV PATH="$PATH:/sagemaker" 23 | ENV LD_LIBRARY_PATH='/usr/local/lib:$LD_LIBRARY_PATH' 24 | ENV MODEL_BASE_PATH=/models 25 | # The only required piece is the model name in order to differentiate endpoints 26 | ENV MODEL_NAME=model 27 | # To prevent user interaction when installing time zone data package 28 | ENV DEBIAN_FRONTEND=noninteractive 29 | 30 | # nginx + njs 31 | RUN apt-get update \ 32 | && apt-get -y install --no-install-recommends \ 33 | curl \ 34 | gnupg2 \ 35 | ca-certificates \ 36 | git \ 37 | wget \ 38 | vim \ 39 | && curl -s http://nginx.org/keys/nginx_signing.key | apt-key add - \ 40 | && echo 'deb http://nginx.org/packages/ubuntu/ bionic nginx' >> /etc/apt/sources.list \ 41 | && apt-get update \ 42 | && apt-get -y install --no-install-recommends \ 43 | nginx \ 44 | nginx-module-njs \ 45 | python3 \ 46 | python3-pip \ 47 | python3-setuptools \ 48 | && apt-get clean \ 49 | && rm -rf /var/lib/apt/lists/* 50 | 51 | RUN ${PIP} --no-cache-dir install --upgrade \ 52 | pip \ 53 | setuptools 54 | 55 | # cython, falcon, gunicorn, grpc 56 | RUN ${PIP} install --no-cache-dir \ 57 | awscli \ 58 | boto3 \ 59 | pyYAML==5.3.1 \ 60 | cython==0.29.12 \ 61 | falcon==2.0.0 \ 62 | gunicorn==19.9.0 \ 63 | gevent==1.4.0 \ 64 | requests==2.22.0 \ 65 | grpcio==1.24.1 \ 66 | protobuf==3.10.0 \ 67 | # using --no-dependencies to avoid installing tensorflow binary 68 | && ${PIP} install --no-dependencies --no-cache-dir \ 69 | tensorflow-serving-api==1.15.0 70 | 71 | COPY sagemaker /sagemaker 72 | 73 | WORKDIR / 74 | 75 | # Some TF tools expect a "python" binary 76 | RUN ln -s $(which ${PYTHON}) /usr/local/bin/python \ 77 | && ln -s /usr/local/bin/pip3 /usr/bin/pip 78 | 79 | RUN curl ${TF_S3_URL}/MKL-Libraries/libiomp5.so -o /usr/local/lib/libiomp5.so \ 80 | && curl ${TF_S3_URL}/MKL-Libraries/libmklml_intel.so -o /usr/local/lib/libmklml_intel.so 81 | 82 | RUN curl ${TF_MODEL_SERVER_SOURCE} -o /usr/bin/tensorflow_model_server \ 83 | && chmod 555 /usr/bin/tensorflow_model_server 84 | 85 | # Expose ports 86 | # gRPC and REST 87 | EXPOSE 8500 8501 88 | 89 | # Set where models should be stored in the container 90 | RUN mkdir -p ${MODEL_BASE_PATH} 91 | 92 | # Create a script that runs the model server so we can use environment variables 93 | # while also passing in arguments from the docker command line 94 | RUN echo '#!/bin/bash \n\n' > /usr/bin/tf_serving_entrypoint.sh \ 95 | && echo '/usr/bin/tensorflow_model_server --port=8500 --rest_api_port=8501 --model_name=${MODEL_NAME} --model_base_path=${MODEL_BASE_PATH}/${MODEL_NAME} "$@"' >> /usr/bin/tf_serving_entrypoint.sh \ 96 | && chmod +x /usr/bin/tf_serving_entrypoint.sh 97 | 98 | ADD https://raw.githubusercontent.com/aws/aws-deep-learning-containers-utils/master/deep_learning_container.py /usr/local/bin/deep_learning_container.py 99 | 100 | RUN chmod +x /usr/local/bin/deep_learning_container.py 101 | 102 | RUN curl https://aws-dlc-licenses.s3.amazonaws.com/tensorflow/license.txt -o /license.txt 103 | 104 | CMD ["/usr/bin/tf_serving_entrypoint.sh"] 105 | -------------------------------------------------------------------------------- /test/perf/ec2-perftest.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | for i in $(seq 1 5); do python perftest_endpoint.py --count 5000 --warmup 100 --workers 4 --model sm-c5xl >> sm-perftest.log; done 4 | for i in $(seq 1 5); do python perftest_endpoint.py --count 5000 --warmup 100 --workers 8 --model sm-c5xl >> sm-perftest.log; done 5 | for i in $(seq 1 5); do python perftest_endpoint.py --count 5000 --warmup 100 --workers 16 --model sm-c5xl >> sm-perftest.log; done 6 | for i in $(seq 1 5); do python perftest_endpoint.py --count 5000 --warmup 100 --workers 32 --model sm-c5xl >> sm-perftest.log; done 7 | for i in $(seq 1 5); do python perftest_endpoint.py --count 5000 --warmup 100 --workers 4 --model sm-c518xl >> sm-perftest.log; done 8 | for i in $(seq 1 5); do python perftest_endpoint.py --count 5000 --warmup 100 --workers 8 --model sm-c518xl >> sm-perftest.log; done 9 | for i in $(seq 1 5); do python perftest_endpoint.py --count 5000 --warmup 100 --workers 16 --model sm-c518xl >> sm-perftest.log; done 10 | for i in $(seq 1 5); do python perftest_endpoint.py --count 5000 --warmup 100 --workers 32 --model sm-c518xl >> sm-perftest.log; done 11 | for i in $(seq 1 5); do python perftest_endpoint.py --count 5000 --warmup 100 --workers 64 --model sm-c518xl >> sm-perftest.log; done 12 | 13 | for i in $(seq 1 5); do python perftest_endpoint.py --count 5000 --warmup 100 --workers 4 --model tfs-c5xl >> tfs-perftest.log; done 14 | for i in $(seq 1 5); do python perftest_endpoint.py --count 5000 --warmup 100 --workers 8 --model tfs-c5xl >> tfs-perftest.log; done 15 | for i in $(seq 1 5); do python perftest_endpoint.py --count 5000 --warmup 100 --workers 16 --model tfs-c5xl >> tfs-perftest.log; done 16 | for i in $(seq 1 5); do python perftest_endpoint.py --count 5000 --warmup 100 --workers 32 --model tfs-c5xl >> tfs-perftest.log; done 17 | for i in $(seq 1 5); do python perftest_endpoint.py --count 5000 --warmup 100 --workers 4 --model tfs-c518xl >> tfs-perftest.log; done 18 | for i in $(seq 1 5); do python perftest_endpoint.py --count 5000 --warmup 100 --workers 8 --model tfs-c518xl >> tfs-perftest.log; done 19 | for i in $(seq 1 5); do python perftest_endpoint.py --count 5000 --warmup 100 --workers 16 --model tfs-c518xl >> tfs-perftest.log; done 20 | for i in $(seq 1 5); do python perftest_endpoint.py --count 5000 --warmup 100 --workers 32 --model tfs-c518xl >> tfs-perftest.log; done 21 | for i in $(seq 1 5); do python perftest_endpoint.py --count 5000 --warmup 100 --workers 64 --model tfs-c518xl >> tfs-perftest.log; done 22 | for i in $(seq 1 5); do python perftest_endpoint.py --count 5000 --warmup 100 --workers 128 --model tfs-c518xl >> tfs-perftest.log; done 23 | 24 | 25 | for i in $(seq 1 5); do python test/perf/perftest_endpoint.py --count 5000 --warmup 100 --workers 4 --model tfs-p2xl >> tfs-perftest.log; done 26 | for i in $(seq 1 5); do python test/perf/perftest_endpoint.py --count 5000 --warmup 100 --workers 8 --model tfs-p2xl >> tfs-perftest.log; done 27 | for i in $(seq 1 5); do python test/perf/perftest_endpoint.py --count 5000 --warmup 100 --workers 16 --model tfs-p2xl >> tfs-perftest.log; done 28 | for i in $(seq 1 5); do python test/perf/perftest_endpoint.py --count 5000 --warmup 100 --workers 32 --model tfs-p2xl >> tfs-perftest.log; done 29 | for i in $(seq 1 5); do python test/perf/perftest_endpoint.py --count 5000 --warmup 100 --workers 64 --model tfs-p2xl >> tfs-perftest.log; done 30 | for i in $(seq 1 5); do python test/perf/perftest_endpoint.py --count 5000 --warmup 100 --workers 4 --model tfs-p316xl >> tfs-perftest.log; done 31 | for i in $(seq 1 5); do python test/perf/perftest_endpoint.py --count 5000 --warmup 100 --workers 8 --model tfs-p316xl >> tfs-perftest.log; done 32 | for i in $(seq 1 5); do python test/perf/perftest_endpoint.py --count 5000 --warmup 100 --workers 16 --model tfs-p316xl >> tfs-perftest.log; done 33 | for i in $(seq 1 5); do python test/perf/perftest_endpoint.py --count 5000 --warmup 100 --workers 32 --model tfs-p316xl >> tfs-perftest.log; done 34 | for i in $(seq 1 5); do python test/perf/perftest_endpoint.py --count 5000 --warmup 100 --workers 64 --model tfs-p316xl >> tfs-perftest.log; done 35 | for i in $(seq 1 5); do python test/perf/perftest_endpoint.py --count 5000 --warmup 100 --workers 128 --model tfs-p316xl >> tfs-perftest.log; done 36 | -------------------------------------------------------------------------------- /docker/1.14/Dockerfile.eia: -------------------------------------------------------------------------------- 1 | FROM public.ecr.aws/e2s1w5p1/ubuntu:16.04 2 | LABEL com.amazonaws.sagemaker.capabilities.accept-bind-to-port=true 3 | 4 | ARG TFS_SHORT_VERSION=1.14 5 | ARG S3_TF_VERSION=1-14-0 6 | ARG S3_TF_EI_VERSION=1-4 7 | ARG PYTHON=python3 8 | ARG PYTHON_VERSION=3.6.6 9 | ARG HEALTH_CHECK_VERSION=1.5.3 10 | 11 | # See http://bugs.python.org/issue19846 12 | ENV LANG=C.UTF-8 13 | ENV PYTHONDONTWRITEBYTECODE=1 14 | ENV PYTHONUNBUFFERED=1 15 | ENV MODEL_BASE_PATH=/models 16 | ENV MODEL_NAME=model 17 | ENV SAGEMAKER_TFS_VERSION="${TFS_SHORT_VERSION}" 18 | ENV PATH="$PATH:/sagemaker" 19 | 20 | # nginx + njs 21 | RUN apt-get update \ 22 | && apt-get -y install --no-install-recommends \ 23 | build-essential \ 24 | ca-certificates \ 25 | curl \ 26 | git \ 27 | gnupg2 \ 28 | vim \ 29 | wget \ 30 | zlib1g-dev \ 31 | && curl -s http://nginx.org/keys/nginx_signing.key | apt-key add - \ 32 | && echo 'deb http://nginx.org/packages/ubuntu/ xenial nginx' >> /etc/apt/sources.list \ 33 | && apt-get update \ 34 | && apt-get -y install --no-install-recommends nginx wget nginx-module-njs \ 35 | && apt-get clean \ 36 | && rm -rf /var/lib/apt/lists/* 37 | 38 | RUN wget https://www.python.org/ftp/python/$PYTHON_VERSION/Python-$PYTHON_VERSION.tgz \ 39 | && tar -xvf Python-$PYTHON_VERSION.tgz \ 40 | && cd Python-$PYTHON_VERSION \ 41 | && ./configure \ 42 | && make \ 43 | && make install \ 44 | && apt-get update \ 45 | && apt-get install -y --no-install-recommends \ 46 | libbz2-dev \ 47 | libc6-dev \ 48 | libgdbm-dev \ 49 | libncursesw5-dev \ 50 | libreadline-gplv2-dev \ 51 | libsqlite3-dev \ 52 | libssl-dev \ 53 | tk-dev \ 54 | && rm -rf /var/lib/apt/lists/* \ 55 | && make \ 56 | && make install \ 57 | && rm -rf ../Python-$PYTHON_VERSION* \ 58 | && ln -s /usr/local/bin/pip3 /usr/bin/pip \ 59 | && ln -s $(which ${PYTHON}) /usr/local/bin/python 60 | 61 | # Some TF tools expect a "python" binary 62 | RUN pip install -U --no-cache-dir --upgrade \ 63 | pip \ 64 | setuptools 65 | 66 | # cython, falcon, gunicorn, grpc 67 | RUN pip install --no-cache-dir \ 68 | cython==0.29.13 \ 69 | falcon==2.0.0 \ 70 | gunicorn==19.9.0 \ 71 | gevent==1.4.0 \ 72 | requests==2.22.0 \ 73 | docutils==0.14 \ 74 | awscli==1.16.196 \ 75 | grpcio==1.24.1 \ 76 | protobuf==3.10.0 \ 77 | # using --no-dependencies to avoid installing tensorflow binary 78 | && pip install --no-dependencies --no-cache-dir \ 79 | tensorflow-serving-api==1.14.0 80 | 81 | COPY sagemaker /sagemaker 82 | 83 | RUN wget https://amazonei-tools.s3.amazonaws.com/v${HEALTH_CHECK_VERSION}/ei_tools_${HEALTH_CHECK_VERSION}.tar.gz -O /opt/ei_tools_${HEALTH_CHECK_VERSION}.tar.gz \ 84 | && tar -xvf /opt/ei_tools_${HEALTH_CHECK_VERSION}.tar.gz -C /opt/ \ 85 | && rm -rf /opt/ei_tools_${HEALTH_CHECK_VERSION}.tar.gz \ 86 | && chmod a+x /opt/ei_tools/bin/health_check \ 87 | && mkdir -p /opt/ei_health_check/bin \ 88 | && ln -s /opt/ei_tools/bin/health_check /opt/ei_health_check/bin/health_check \ 89 | && ln -s /opt/ei_tools/lib /opt/ei_health_check/lib 90 | 91 | # Expose ports 92 | EXPOSE 8500 8501 93 | 94 | RUN wget https://amazonei-tensorflow.s3.amazonaws.com/tensorflow-serving/v1.14/ubuntu/archive/tensorflow-serving-${S3_TF_VERSION}-ubuntu-ei-${S3_TF_EI_VERSION}.tar.gz \ 95 | -O /tmp/tensorflow-serving-${S3_TF_VERSION}-ubuntu-ei-${S3_TF_EI_VERSION}.tar.gz \ 96 | && cd /tmp \ 97 | && tar zxf tensorflow-serving-${S3_TF_VERSION}-ubuntu-ei-${S3_TF_EI_VERSION}.tar.gz \ 98 | && mv tensorflow-serving-${S3_TF_VERSION}-ubuntu-ei-${S3_TF_EI_VERSION}/amazonei_tensorflow_model_server /usr/bin/tensorflow_model_server \ 99 | && chmod +x /usr/bin/tensorflow_model_server \ 100 | && rm -rf tensorflow-serving-${S3_TF_VERSION}* 101 | 102 | # Set where models should be stored in the container 103 | RUN mkdir -p ${MODEL_BASE_PATH} 104 | 105 | RUN echo '#!/bin/bash \n\n' > /usr/bin/tf_serving_entrypoint.sh \ 106 | && echo '/usr/bin/tensorflow_model_server --port=8500 --rest_api_port=8501 --model_name=${MODEL_NAME} --model_base_path=${MODEL_BASE_PATH}/${MODEL_NAME} "$@"' >> /usr/bin/tf_serving_entrypoint.sh \ 107 | && chmod +x /usr/bin/tf_serving_entrypoint.sh 108 | 109 | CMD ["/usr/bin/tf_serving_entrypoint.sh"] 110 | -------------------------------------------------------------------------------- /docker/2.0/Dockerfile.eia: -------------------------------------------------------------------------------- 1 | FROM ubuntu:18.04 2 | 3 | LABEL maintainer="Amazon AI" 4 | # Specify LABEL for inference pipelines to use SAGEMAKER_BIND_TO_PORT 5 | # https://docs.aws.amazon.com/sagemaker/latest/dg/inference-pipeline-real-time.html 6 | LABEL com.amazonaws.sagemaker.capabilities.accept-bind-to-port=true 7 | 8 | # Add arguments to achieve the version, python and url 9 | ARG PYTHON=python3 10 | ARG PIP=pip3 11 | ARG HEALTH_CHECK_VERSION=1.6.3 12 | ARG S3_TF_EI_VERSION=1-5 13 | ARG S3_TF_VERSION=2-0-0 14 | 15 | 16 | # See http://bugs.python.org/issue19846 17 | ENV LANG=C.UTF-8 18 | # Python won’t try to write .pyc or .pyo files on the import of source modules 19 | ENV PYTHONDONTWRITEBYTECODE=1 20 | ENV PYTHONUNBUFFERED=1 21 | ENV SAGEMAKER_TFS_VERSION="${S3_TF_VERSION}" 22 | ENV PATH="$PATH:/sagemaker" 23 | ENV LD_LIBRARY_PATH='/usr/local/lib:$LD_LIBRARY_PATH' 24 | ENV MODEL_BASE_PATH=/models 25 | # The only required piece is the model name in order to differentiate endpoints 26 | ENV MODEL_NAME=model 27 | # To prevent user interaction when installing time zone data package 28 | ENV DEBIAN_FRONTEND=noninteractive 29 | 30 | # nginx + njs 31 | RUN apt-get update \ 32 | && apt-get -y install --no-install-recommends \ 33 | curl \ 34 | gnupg2 \ 35 | ca-certificates \ 36 | git \ 37 | wget \ 38 | vim \ 39 | && curl -s http://nginx.org/keys/nginx_signing.key | apt-key add - \ 40 | && echo 'deb http://nginx.org/packages/ubuntu/ bionic nginx' >> /etc/apt/sources.list \ 41 | && apt-get update \ 42 | && apt-get -y install --no-install-recommends \ 43 | nginx \ 44 | nginx-module-njs \ 45 | python3 \ 46 | python3-pip \ 47 | python3-setuptools \ 48 | && apt-get clean \ 49 | && rm -rf /var/lib/apt/lists/* 50 | 51 | RUN ${PIP} --no-cache-dir install --upgrade \ 52 | pip \ 53 | setuptools 54 | 55 | # cython, falcon, gunicorn, grpc 56 | RUN ${PIP} install --no-cache-dir \ 57 | awscli==1.18.32 \ 58 | cython==0.29.16 \ 59 | falcon==2.0.0 \ 60 | gunicorn==20.0.4 \ 61 | gevent==1.4.0 \ 62 | requests==2.23.0 \ 63 | grpcio==1.27.2 \ 64 | protobuf==3.11.3 \ 65 | # using --no-dependencies to avoid installing tensorflow binary 66 | && ${PIP} install --no-dependencies --no-cache-dir \ 67 | tensorflow-serving-api==2.0.0 68 | 69 | COPY sagemaker /sagemaker 70 | 71 | # Some TF tools expect a "python" binary 72 | RUN ln -s $(which ${PYTHON}) /usr/local/bin/python \ 73 | && ln -s /usr/local/bin/pip3 /usr/bin/pip 74 | 75 | # Get EI tools 76 | RUN wget https://amazonei-tools.s3.amazonaws.com/v${HEALTH_CHECK_VERSION}/ei_tools_${HEALTH_CHECK_VERSION}.tar.gz -O /opt/ei_tools_${HEALTH_CHECK_VERSION}.tar.gz \ 77 | && tar -xvf /opt/ei_tools_${HEALTH_CHECK_VERSION}.tar.gz -C /opt/ \ 78 | && rm -rf /opt/ei_tools_${HEALTH_CHECK_VERSION}.tar.gz \ 79 | && chmod a+x /opt/ei_tools/bin/health_check \ 80 | && mkdir -p /opt/ei_health_check/bin \ 81 | && ln -s /opt/ei_tools/bin/health_check /opt/ei_health_check/bin/health_check \ 82 | && ln -s /opt/ei_tools/lib /opt/ei_health_check/lib 83 | 84 | RUN wget https://amazonei-tensorflow.s3.amazonaws.com/tensorflow-serving/v2.0/archive/tensorflow-serving-${S3_TF_VERSION}-ei-${S3_TF_EI_VERSION}.tar.gz \ 85 | -O /tmp/tensorflow-serving-${S3_TF_VERSION}-ei-${S3_TF_EI_VERSION}.tar.gz \ 86 | && cd /tmp \ 87 | && tar zxf tensorflow-serving-${S3_TF_VERSION}-ei-${S3_TF_EI_VERSION}.tar.gz \ 88 | && mv tensorflow-serving-${S3_TF_VERSION}-ei-${S3_TF_EI_VERSION}/amazonei_tensorflow_model_server /usr/bin/tensorflow_model_server \ 89 | && chmod +x /usr/bin/tensorflow_model_server \ 90 | && rm -rf tensorflow-serving-${S3_TF_VERSION}* 91 | 92 | 93 | # Expose ports 94 | # gRPC and REST 95 | EXPOSE 8500 8501 96 | 97 | # Set where models should be stored in the container 98 | RUN mkdir -p ${MODEL_BASE_PATH} 99 | 100 | # Create a script that runs the model server so we can use environment variables 101 | # while also passing in arguments from the docker command line 102 | RUN echo '#!/bin/bash \n\n' > /usr/bin/tf_serving_entrypoint.sh \ 103 | && echo '/usr/bin/tensorflow_model_server --port=8500 --rest_api_port=8501 --model_name=${MODEL_NAME} --model_base_path=${MODEL_BASE_PATH}/${MODEL_NAME} "$@"' >> /usr/bin/tf_serving_entrypoint.sh \ 104 | && chmod +x /usr/bin/tf_serving_entrypoint.sh 105 | 106 | RUN curl https://aws-dlc-licenses.s3.amazonaws.com/tensorflow-2.0/license.txt -o /license.txt 107 | 108 | CMD ["/usr/bin/tf_serving_entrypoint.sh"] 109 | -------------------------------------------------------------------------------- /test/unit/test_proxy_client.py: -------------------------------------------------------------------------------- 1 | import unittest.mock as mock 2 | import pytest 3 | from tensorflow_serving.config import model_server_config_pb2 4 | 5 | from container.sagemaker import proxy_client 6 | 7 | 8 | @pytest.fixture(autouse=True) 9 | def create_sagemaker_folder(tmpdir): 10 | tmpdir.join('sagemaker').ensure(dir=True) 11 | 12 | proxy_client.MODEL_CONFIG_FILE = str(tmpdir) + proxy_client.MODEL_CONFIG_FILE 13 | proxy_client.DEFAULT_LOCK_FILE = str(tmpdir) + proxy_client.DEFAULT_LOCK_FILE 14 | 15 | 16 | def test_grpc_add_model_no_config_file(): 17 | client = proxy_client.GRPCProxyClient(port='9090') 18 | 19 | with pytest.raises(FileNotFoundError) as e: 20 | assert client.add_model('my-model', '/opt/ml/model_path') 21 | assert 'No such file or directory' in str(e.value) 22 | 23 | 24 | @mock.patch('tensorflow_serving.apis.model_management_pb2.ReloadConfigRequest') 25 | @mock.patch('grpc.insecure_channel') 26 | def test_grpc_add_model_call(channel, ReloadConfigRequest): 27 | config = 'model_config_list: {\n}\n' 28 | with open(proxy_client.MODEL_CONFIG_FILE, 'w') as f: 29 | f.write(config) 30 | 31 | client = proxy_client.GRPCProxyClient(port='9090') 32 | client.add_model('my-model', '/opt/ml/model_path') 33 | 34 | calls = [mock.call('0.0.0.0:9090'), 35 | mock.call().unary_unary('/tensorflow.serving.ModelService/GetModelStatus', 36 | request_serializer=mock.ANY, response_deserializer=mock.ANY), 37 | mock.call().unary_unary('/tensorflow.serving.ModelService/HandleReloadConfigRequest', 38 | request_serializer=mock.ANY, response_deserializer=mock.ANY), 39 | mock.call().unary_unary()(ReloadConfigRequest()) 40 | ] 41 | 42 | channel.assert_has_calls(calls) 43 | 44 | config_list = model_server_config_pb2.ModelConfigList() 45 | new_model_config = config_list.config.add() 46 | new_model_config.name = 'my-model' 47 | new_model_config.base_path = '/opt/ml/model_path' 48 | new_model_config.model_platform = 'tensorflow' 49 | 50 | model_server_config = model_server_config_pb2.ModelServerConfig() 51 | model_server_config.model_config_list.MergeFrom(config_list) 52 | 53 | ReloadConfigRequest().config.CopyFrom.assert_called_with(model_server_config) 54 | 55 | expected = 'model_config_list: {\n' 56 | expected += ' config: {\n' 57 | expected += ' name: "my-model",\n' 58 | expected += ' base_path: "/opt/ml/model_path",\n' 59 | expected += ' model_platform: "tensorflow"\n' 60 | expected += ' }\n' 61 | expected += '}\n' 62 | 63 | with open(proxy_client.MODEL_CONFIG_FILE, 'r') as file: 64 | assert file.read() == expected 65 | 66 | 67 | @mock.patch('tensorflow_serving.apis.model_management_pb2.ReloadConfigRequest') 68 | @mock.patch('grpc.insecure_channel') 69 | def test_grpc_delete_model_call(channel, ReloadConfigRequest): 70 | config = 'model_config_list: {\n' 71 | config += ' config: {\n' 72 | config += ' name: "my-model",\n' 73 | config += ' base_path: "/opt/ml/model_path",\n' 74 | config += ' model_platform: "tensorflow"\n' 75 | config += ' }\n' 76 | config += '}\n' 77 | with open(proxy_client.MODEL_CONFIG_FILE, 'w') as f: 78 | f.write(config) 79 | 80 | client = proxy_client.GRPCProxyClient(port='9090') 81 | client.delete_model('my-model', '/opt/ml/model_path') 82 | 83 | calls = [mock.call('0.0.0.0:9090'), 84 | mock.call().unary_unary('/tensorflow.serving.ModelService/GetModelStatus', 85 | request_serializer=mock.ANY, response_deserializer=mock.ANY), 86 | mock.call().unary_unary('/tensorflow.serving.ModelService/HandleReloadConfigRequest', 87 | request_serializer=mock.ANY, response_deserializer=mock.ANY), 88 | mock.call().unary_unary()(ReloadConfigRequest()) 89 | ] 90 | 91 | channel.assert_has_calls(calls) 92 | 93 | config_list = model_server_config_pb2.ModelConfigList() 94 | model_server_config = model_server_config_pb2.ModelServerConfig() 95 | model_server_config.model_config_list.MergeFrom(config_list) 96 | 97 | ReloadConfigRequest().config.CopyFrom.assert_called_with(model_server_config) 98 | 99 | expected = 'model_config_list: {\n' 100 | expected += '}\n' 101 | 102 | with open(proxy_client.MODEL_CONFIG_FILE, 'r') as file: 103 | assert file.read() == expected 104 | -------------------------------------------------------------------------------- /docker/1.15/Dockerfile.eia: -------------------------------------------------------------------------------- 1 | FROM ubuntu:18.04 2 | 3 | LABEL maintainer="Amazon AI" 4 | # Specify LABEL for inference pipelines to use SAGEMAKER_BIND_TO_PORT 5 | # https://docs.aws.amazon.com/sagemaker/latest/dg/inference-pipeline-real-time.html 6 | LABEL com.amazonaws.sagemaker.capabilities.accept-bind-to-port=true 7 | 8 | # Add arguments to achieve the version, python and url 9 | ARG PYTHON=python3 10 | ARG PIP=pip3 11 | ARG HEALTH_CHECK_VERSION=1.6.3 12 | ARG S3_TF_EI_VERSION=1-5 13 | ARG S3_TF_VERSION=1-15-2 14 | #This is the serving version not TF version 15 | ARG TFS_SHORT_VERSION=1-15-0 16 | 17 | 18 | # See http://bugs.python.org/issue19846 19 | ENV LANG=C.UTF-8 20 | # Python won’t try to write .pyc or .pyo files on the import of source modules 21 | ENV PYTHONDONTWRITEBYTECODE=1 22 | ENV PYTHONUNBUFFERED=1 23 | ENV SAGEMAKER_TFS_VERSION="${TFS_SHORT_VERSION}" 24 | ENV PATH="$PATH:/sagemaker" 25 | ENV LD_LIBRARY_PATH='/usr/local/lib:$LD_LIBRARY_PATH' 26 | ENV MODEL_BASE_PATH=/models 27 | # The only required piece is the model name in order to differentiate endpoints 28 | ENV MODEL_NAME=model 29 | # To prevent user interaction when installing time zone data package 30 | ENV DEBIAN_FRONTEND=noninteractive 31 | 32 | # nginx + njs 33 | RUN apt-get update \ 34 | && apt-get -y install --no-install-recommends \ 35 | curl \ 36 | gnupg2 \ 37 | ca-certificates \ 38 | git \ 39 | wget \ 40 | vim \ 41 | && curl -s http://nginx.org/keys/nginx_signing.key | apt-key add - \ 42 | && echo 'deb http://nginx.org/packages/ubuntu/ bionic nginx' >> /etc/apt/sources.list \ 43 | && apt-get update \ 44 | && apt-get -y install --no-install-recommends \ 45 | nginx \ 46 | nginx-module-njs \ 47 | python3 \ 48 | python3-pip \ 49 | python3-setuptools \ 50 | && apt-get clean \ 51 | && rm -rf /var/lib/apt/lists/* 52 | 53 | RUN ${PIP} --no-cache-dir install --upgrade \ 54 | pip \ 55 | setuptools 56 | 57 | # cython, falcon, gunicorn, grpc 58 | RUN ${PIP} install --no-cache-dir \ 59 | awscli==1.18.32 \ 60 | cython==0.29.16 \ 61 | falcon==2.0.0 \ 62 | gunicorn==20.0.4 \ 63 | gevent==1.4.0 \ 64 | requests==2.23.0 \ 65 | grpcio==1.27.2 \ 66 | protobuf==3.11.3 \ 67 | # using --no-dependencies to avoid installing tensorflow binary 68 | && ${PIP} install --no-dependencies --no-cache-dir \ 69 | tensorflow-serving-api==1.15.0 70 | 71 | COPY sagemaker /sagemaker 72 | 73 | # Some TF tools expect a "python" binary 74 | RUN ln -s $(which ${PYTHON}) /usr/local/bin/python \ 75 | && ln -s /usr/local/bin/pip3 /usr/bin/pip 76 | 77 | # Get EI tools 78 | RUN wget https://amazonei-tools.s3.amazonaws.com/v${HEALTH_CHECK_VERSION}/ei_tools_${HEALTH_CHECK_VERSION}.tar.gz -O /opt/ei_tools_${HEALTH_CHECK_VERSION}.tar.gz \ 79 | && tar -xvf /opt/ei_tools_${HEALTH_CHECK_VERSION}.tar.gz -C /opt/ \ 80 | && rm -rf /opt/ei_tools_${HEALTH_CHECK_VERSION}.tar.gz \ 81 | && chmod a+x /opt/ei_tools/bin/health_check \ 82 | && mkdir -p /opt/ei_health_check/bin \ 83 | && ln -s /opt/ei_tools/bin/health_check /opt/ei_health_check/bin/health_check \ 84 | && ln -s /opt/ei_tools/lib /opt/ei_health_check/lib 85 | 86 | RUN wget https://amazonei-tensorflow.s3.amazonaws.com/tensorflow-serving/v1.15/ubuntu/archive/tensorflow-serving-${S3_TF_VERSION}-ubuntu-ei-${S3_TF_EI_VERSION}.tar.gz \ 87 | -O /tmp/tensorflow-serving-${S3_TF_VERSION}-ubuntu-ei-${S3_TF_EI_VERSION}.tar.gz \ 88 | && cd /tmp \ 89 | && tar zxf tensorflow-serving-${S3_TF_VERSION}-ubuntu-ei-${S3_TF_EI_VERSION}.tar.gz \ 90 | && mv tensorflow-serving-${S3_TF_VERSION}-ubuntu-ei-${S3_TF_EI_VERSION}/amazonei_tensorflow_model_server /usr/bin/tensorflow_model_server \ 91 | && chmod +x /usr/bin/tensorflow_model_server \ 92 | && rm -rf tensorflow-serving-${S3_TF_VERSION}* 93 | 94 | 95 | # Expose ports 96 | # gRPC and REST 97 | EXPOSE 8500 8501 98 | 99 | # Set where models should be stored in the container 100 | RUN mkdir -p ${MODEL_BASE_PATH} 101 | 102 | # Create a script that runs the model server so we can use environment variables 103 | # while also passing in arguments from the docker command line 104 | RUN echo '#!/bin/bash \n\n' > /usr/bin/tf_serving_entrypoint.sh \ 105 | && echo '/usr/bin/tensorflow_model_server --port=8500 --rest_api_port=8501 --model_name=${MODEL_NAME} --model_base_path=${MODEL_BASE_PATH}/${MODEL_NAME} "$@"' >> /usr/bin/tf_serving_entrypoint.sh \ 106 | && chmod +x /usr/bin/tf_serving_entrypoint.sh 107 | 108 | RUN curl https://aws-dlc-licenses.s3.amazonaws.com/tensorflow/license.txt -o /license.txt 109 | 110 | CMD ["/usr/bin/tf_serving_entrypoint.sh"] 111 | -------------------------------------------------------------------------------- /test/integration/sagemaker/conftest.py: -------------------------------------------------------------------------------- 1 | # Copyright 2018-2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"). You 4 | # may not use this file except in compliance with the License. A copy of 5 | # the License is located at 6 | # 7 | # http://aws.amazon.com/apache2.0/ 8 | # 9 | # or in the "license" file accompanying this file. This file is 10 | # distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF 11 | # ANY KIND, either express or implied. See the License for the specific 12 | # language governing permissions and limitations under the License. 13 | import os 14 | import random 15 | import time 16 | 17 | import boto3 18 | import pytest 19 | 20 | # these regions have some p2 and p3 instances, but not enough for automated testing 21 | NO_P2_REGIONS = [ 22 | "ca-central-1", 23 | "eu-central-1", 24 | "eu-west-2", 25 | "us-west-1", 26 | "eu-west-3", 27 | "eu-north-1", 28 | "sa-east-1", 29 | "ap-east-1", 30 | "me-south-1" 31 | ] 32 | NO_P3_REGIONS = [ 33 | "ap-southeast-1", 34 | "ap-southeast-2", 35 | "ap-south-1", 36 | "a-central-1", 37 | "eu-central-1", 38 | "eu-west-2", 39 | "us-west-1", 40 | "eu-west-3", 41 | "eu-north-1", 42 | "sa-east-1", 43 | "ap-east-1", 44 | "me-south-1" 45 | ] 46 | 47 | 48 | def pytest_addoption(parser): 49 | parser.addoption("--region", default="us-west-2") 50 | parser.addoption("--registry") 51 | parser.addoption("--repo") 52 | parser.addoption("--versions") 53 | parser.addoption("--instance-types") 54 | parser.addoption("--accelerator-type") 55 | parser.addoption("--tag") 56 | 57 | 58 | def pytest_configure(config): 59 | os.environ["TEST_REGION"] = config.getoption("--region") 60 | os.environ["TEST_VERSIONS"] = config.getoption("--versions") or "1.11.1,1.12.0,1.13.0" 61 | os.environ["TEST_INSTANCE_TYPES"] = (config.getoption("--instance-types") or 62 | "ml.m5.xlarge,ml.p2.xlarge") 63 | 64 | os.environ["TEST_EI_VERSIONS"] = config.getoption("--versions") or "1.11,1.12" 65 | os.environ["TEST_EI_INSTANCE_TYPES"] = (config.getoption("--instance-types") or 66 | "ml.m5.xlarge") 67 | 68 | if config.getoption("--tag"): 69 | os.environ["TEST_VERSIONS"] = config.getoption("--tag") 70 | os.environ["TEST_EI_VERSIONS"] = config.getoption("--tag") 71 | 72 | 73 | @pytest.fixture(scope="session") 74 | def region(request): 75 | return request.config.getoption("--region") 76 | 77 | 78 | @pytest.fixture(scope="session") 79 | def registry(request, region): 80 | if request.config.getoption("--registry"): 81 | return request.config.getoption("--registry") 82 | 83 | sts = boto3.client( 84 | "sts", 85 | region_name=region, 86 | endpoint_url="https://sts.{}.amazonaws.com".format(region) 87 | ) 88 | return sts.get_caller_identity()["Account"] 89 | 90 | 91 | @pytest.fixture(scope="session") 92 | def boto_session(region): 93 | return boto3.Session(region_name=region) 94 | 95 | 96 | @pytest.fixture(scope="session") 97 | def sagemaker_client(boto_session): 98 | return boto_session.client("sagemaker") 99 | 100 | 101 | @pytest.fixture(scope="session") 102 | def sagemaker_runtime_client(boto_session): 103 | return boto_session.client("runtime.sagemaker") 104 | 105 | 106 | def unique_name_from_base(base, max_length=63): 107 | unique = "%04x" % random.randrange(16 ** 4) # 4-digit hex 108 | ts = str(int(time.time())) 109 | available_length = max_length - 2 - len(ts) - len(unique) 110 | trimmed = base[:available_length] 111 | return "{}-{}-{}".format(trimmed, ts, unique) 112 | 113 | 114 | @pytest.fixture 115 | def model_name(): 116 | return unique_name_from_base("test-tfs") 117 | 118 | 119 | @pytest.fixture(autouse=True) 120 | def skip_gpu_instance_restricted_regions(region, instance_type): 121 | if (region in NO_P2_REGIONS and instance_type.startswith("ml.p2")) or \ 122 | (region in NO_P3_REGIONS and instance_type.startswith("ml.p3")): 123 | pytest.skip("Skipping GPU test in region {}".format(region)) 124 | 125 | 126 | @pytest.fixture(autouse=True) 127 | def skip_by_device_type(request, instance_type): 128 | is_gpu = instance_type[3] in ["g", "p"] 129 | if (request.node.get_closest_marker("skip_gpu") and is_gpu) or \ 130 | (request.node.get_closest_marker("skip_cpu") and not is_gpu): 131 | pytest.skip("Skipping because running on \"{}\" instance".format(instance_type)) 132 | -------------------------------------------------------------------------------- /docker/2.0/Dockerfile.gpu: -------------------------------------------------------------------------------- 1 | FROM nvidia/cuda:10.0-base-ubuntu18.04 2 | 3 | LABEL maintainer="Amazon AI" 4 | LABEL com.amazonaws.sagemaker.capabilities.accept-bind-to-port=true 5 | 6 | ARG PYTHON=python3 7 | ARG PIP=pip3 8 | ARG TFS_SHORT_VERSION=2.0.1 9 | ARG TFS_URL=https://tensorflow-aws.s3-us-west-2.amazonaws.com/${TFS_SHORT_VERSION}/Serving/GPU/tensorflow_model_server 10 | 11 | ENV NCCL_VERSION=2.4.7-1+cuda10.0 12 | ENV CUDNN_VERSION=7.5.1.10-1+cuda10.0 13 | ENV TF_TENSORRT_VERSION=5.0.2 14 | 15 | # See http://bugs.python.org/issue19846 16 | ENV LANG=C.UTF-8 17 | ENV PYTHONDONTWRITEBYTECODE=1 18 | # Python won’t try to write .pyc or .pyo files on the import of source modules 19 | ENV PYTHONUNBUFFERED=1 20 | ENV SAGEMAKER_TFS_VERSION="${TFS_SHORT_VERSION}" 21 | ENV PATH="$PATH:/sagemaker" 22 | ENV MODEL_BASE_PATH=/models 23 | # The only required piece is the model name in order to differentiate endpoints 24 | ENV MODEL_NAME=model 25 | # Fix for the interactive mode during an install in step 21 26 | ENV DEBIAN_FRONTEND=noninteractive 27 | 28 | RUN apt-get update \ 29 | && apt-get install -y --no-install-recommends \ 30 | ca-certificates \ 31 | cuda-command-line-tools-10-0 \ 32 | cuda-cublas-10-0 \ 33 | cuda-cufft-10-0 \ 34 | cuda-curand-10-0 \ 35 | cuda-cusolver-10-0 \ 36 | cuda-cusparse-10-0 \ 37 | libcudnn7=${CUDNN_VERSION} \ 38 | libnccl2=${NCCL_VERSION} \ 39 | libgomp1 \ 40 | curl \ 41 | git \ 42 | wget \ 43 | vim \ 44 | build-essential \ 45 | zlib1g-dev \ 46 | python3 \ 47 | python3-pip \ 48 | python3-setuptools \ 49 | && apt-get clean \ 50 | && rm -rf /var/lib/apt/lists/* 51 | 52 | # The 'apt-get install' of nvinfer-runtime-trt-repo-ubuntu1804-4.0.1-ga-cuda10.0 53 | # adds a new list which contains libnvinfer library, so it needs another 54 | # 'apt-get update' to retrieve that list before it can actually install the 55 | # library. 56 | # We don't install libnvinfer-dev since we don't need to build against TensorRT, 57 | # and libnvinfer4 doesn't contain libnvinfer.a static library. 58 | RUN apt-get update \ 59 | && apt-get install -y --no-install-recommends nvinfer-runtime-trt-repo-ubuntu1804-${TF_TENSORRT_VERSION}-ga-cuda10.0 \ 60 | && apt-get update \ 61 | && apt-get install -y --no-install-recommends libnvinfer5=${TF_TENSORRT_VERSION}-1+cuda10.0 \ 62 | && apt-get clean \ 63 | && rm -rf /var/lib/apt/lists/* \ 64 | && rm /usr/lib/x86_64-linux-gnu/libnvinfer_plugin* \ 65 | && rm /usr/lib/x86_64-linux-gnu/libnvcaffe_parser* \ 66 | && rm /usr/lib/x86_64-linux-gnu/libnvparsers* 67 | 68 | RUN ${PIP} --no-cache-dir install --upgrade \ 69 | pip \ 70 | setuptools 71 | 72 | # Some TF tools expect a "python" binary 73 | RUN ln -s $(which ${PYTHON}) /usr/local/bin/python 74 | 75 | # nginx + njs 76 | RUN apt-get update \ 77 | && apt-get -y install --no-install-recommends \ 78 | curl \ 79 | gnupg2 \ 80 | && curl -s http://nginx.org/keys/nginx_signing.key | apt-key add - \ 81 | && echo 'deb http://nginx.org/packages/ubuntu/ bionic nginx' >> /etc/apt/sources.list \ 82 | && apt-get update \ 83 | && apt-get -y install --no-install-recommends \ 84 | nginx \ 85 | nginx-module-njs \ 86 | && apt-get clean \ 87 | && rm -rf /var/lib/apt/lists/* 88 | 89 | # cython, falcon, gunicorn, grpc 90 | RUN ${PIP} install -U --no-cache-dir \ 91 | boto3 \ 92 | awscli==1.16.303 \ 93 | cython==0.29.14 \ 94 | falcon==2.0.0 \ 95 | gunicorn==20.0.4 \ 96 | gevent==1.4.0 \ 97 | requests==2.22.0 \ 98 | grpcio==1.26.0 \ 99 | protobuf==3.11.1 \ 100 | # using --no-dependencies to avoid installing tensorflow binary 101 | && ${PIP} install --no-dependencies --no-cache-dir \ 102 | tensorflow-serving-api-gpu==2.0 103 | 104 | COPY ./sagemaker /sagemaker 105 | 106 | RUN curl $TFS_URL -o /usr/bin/tensorflow_model_server \ 107 | && chmod 555 /usr/bin/tensorflow_model_server 108 | 109 | # Expose gRPC and REST port 110 | EXPOSE 8500 8501 111 | 112 | # Set where models should be stored in the container 113 | RUN mkdir -p ${MODEL_BASE_PATH} 114 | 115 | # Create a script that runs the model server so we can use environment variables 116 | # while also passing in arguments from the docker command line 117 | RUN echo '#!/bin/bash \n\n' > /usr/bin/tf_serving_entrypoint.sh \ 118 | && echo '/usr/bin/tensorflow_model_server --port=8500 --rest_api_port=8501 --model_name=${MODEL_NAME} --model_base_path=${MODEL_BASE_PATH}/${MODEL_NAME} "$@"' >> /usr/bin/tf_serving_entrypoint.sh \ 119 | && chmod +x /usr/bin/tf_serving_entrypoint.sh 120 | 121 | ADD https://raw.githubusercontent.com/aws/aws-deep-learning-containers-utils/master/deep_learning_container.py /usr/local/bin/deep_learning_container.py 122 | 123 | RUN chmod +x /usr/local/bin/deep_learning_container.py 124 | 125 | RUN curl https://aws-dlc-licenses.s3.amazonaws.com/tensorflow-2.0.1/license.txt -o /license.txt 126 | 127 | CMD ["/usr/bin/tf_serving_entrypoint.sh"] 128 | -------------------------------------------------------------------------------- /test/integration/local/test_nginx_config.py: -------------------------------------------------------------------------------- 1 | # Copyright 2019-2022 Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"). You 4 | # may not use this file except in compliance with the License. A copy of 5 | # the License is located at 6 | # 7 | # http://aws.amazon.com/apache2.0/ 8 | # 9 | # or in the "license" file accompanying this file. This file is 10 | # distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF 11 | # ANY KIND, either express or implied. See the License for the specific 12 | # language governing permissions and limitations under the License. 13 | 14 | import os 15 | import subprocess 16 | 17 | import pytest 18 | 19 | 20 | @pytest.fixture(scope="session", autouse=True) 21 | def volume(): 22 | try: 23 | model_dir = os.path.abspath("test/resources/models") 24 | subprocess.check_call( 25 | "docker volume create --name nginx_model_volume --opt type=none " 26 | "--opt device={} --opt o=bind".format(model_dir).split() 27 | ) 28 | yield model_dir 29 | finally: 30 | subprocess.check_call("docker volume rm nginx_model_volume".split()) 31 | 32 | 33 | def test_run_nginx_with_default_parameters(docker_base_name, tag, runtime_config): 34 | try: 35 | command = ( 36 | "docker run {}--name sagemaker-tensorflow-serving-test -p 8080:8080" 37 | " --mount type=volume,source=nginx_model_volume,target=/opt/ml/model,readonly" 38 | " {}:{} serve" 39 | ).format(runtime_config, docker_base_name, tag) 40 | 41 | proc = subprocess.Popen(command.split(), stdout=subprocess.PIPE, stderr=subprocess.STDOUT) 42 | 43 | lines_seen = { 44 | "error_log /dev/stderr error;": 0, 45 | "proxy_read_timeout 60;": 0, 46 | } 47 | 48 | for stdout_line in iter(proc.stdout.readline, ""): 49 | stdout_line = str(stdout_line) 50 | for line in lines_seen.keys(): 51 | if line in stdout_line: 52 | lines_seen[line] += 1 53 | if "started nginx" in stdout_line: 54 | for value in lines_seen.values(): 55 | assert value == 1 56 | break 57 | 58 | finally: 59 | subprocess.check_call("docker rm -f sagemaker-tensorflow-serving-test".split()) 60 | 61 | 62 | def test_run_nginx_with_env_var_parameters(docker_base_name, tag, runtime_config): 63 | try: 64 | command = ( 65 | "docker run {}--name sagemaker-tensorflow-serving-test -p 8080:8080" 66 | " --mount type=volume,source=nginx_model_volume,target=/opt/ml/model,readonly" 67 | " -e SAGEMAKER_TFS_NGINX_LOGLEVEL=info" 68 | " -e SAGEMAKER_NGINX_PROXY_READ_TIMEOUT_SECONDS=63" 69 | " {}:{} serve" 70 | ).format(runtime_config, docker_base_name, tag) 71 | 72 | proc = subprocess.Popen(command.split(), stdout=subprocess.PIPE, stderr=subprocess.STDOUT) 73 | 74 | lines_seen = { 75 | "error_log /dev/stderr info;": 0, 76 | "proxy_read_timeout 63;": 0, 77 | } 78 | 79 | for stdout_line in iter(proc.stdout.readline, ""): 80 | stdout_line = str(stdout_line) 81 | for line in lines_seen.keys(): 82 | if line in stdout_line: 83 | lines_seen[line] += 1 84 | if "started nginx" in stdout_line: 85 | for value in lines_seen.values(): 86 | assert value == 1 87 | break 88 | 89 | finally: 90 | subprocess.check_call("docker rm -f sagemaker-tensorflow-serving-test".split()) 91 | 92 | def test_run_nginx_with_higher_gunicorn_parameter(docker_base_name, tag, runtime_config): 93 | try: 94 | command = ( 95 | "docker run {}--name sagemaker-tensorflow-serving-test -p 8080:8080" 96 | " --mount type=volume,source=nginx_model_volume,target=/opt/ml/model,readonly" 97 | " -e SAGEMAKER_NGINX_PROXY_READ_TIMEOUT_SECONDS=60" 98 | " -e SAGEMAKER_GUNICORN_TIMEOUT_SECONDS=120" 99 | " {}:{} serve" 100 | ).format(runtime_config, docker_base_name, tag) 101 | 102 | proc = subprocess.Popen(command.split(), stdout=subprocess.PIPE, stderr=subprocess.STDOUT) 103 | 104 | lines_seen = { 105 | "proxy_read_timeout 120;": 0, # When GUnicorn is higher, set timeout to match. 106 | } 107 | 108 | for stdout_line in iter(proc.stdout.readline, ""): 109 | stdout_line = str(stdout_line) 110 | for line in lines_seen.keys(): 111 | if line in stdout_line: 112 | lines_seen[line] += 1 113 | if "started nginx" in stdout_line: 114 | for value in lines_seen.values(): 115 | assert value == 1 116 | break 117 | 118 | finally: 119 | subprocess.check_call("docker rm -f sagemaker-tensorflow-serving-test".split()) 120 | -------------------------------------------------------------------------------- /docker/1.14/Dockerfile.gpu: -------------------------------------------------------------------------------- 1 | FROM nvidia/cuda:10.0-base-ubuntu16.04 2 | 3 | LABEL maintainer="Amazon AI" 4 | LABEL com.amazonaws.sagemaker.capabilities.accept-bind-to-port=true 5 | 6 | # Add arguments to achieve the version, python and url 7 | # PYTHON=python for 2.7 8 | # PYTHON=python3 for 3.5, 3.6 is not available directly on 16.04 9 | ARG PYTHON=python3 10 | # PIP=pip for 2.7 11 | # PIP=pip3 for 3.5, 3.6 is not available directly on 16.04 12 | ARG PIP=pip3 13 | ARG PYTHON_VERSION=3.6.6 14 | ARG TFS_SHORT_VERSION=1.14 15 | 16 | # See http://bugs.python.org/issue19846 17 | ENV LANG=C.UTF-8 18 | ENV NCCL_VERSION=2.4.7-1+cuda10.0 19 | ENV CUDNN_VERSION=7.5.1.10-1+cuda10.0 20 | ENV TF_TENSORRT_VERSION=5.0.2 21 | ENV PYTHONDONTWRITEBYTECODE=1 22 | # Python won’t try to write .pyc or .pyo files on the import of source modules 23 | ENV PYTHONUNBUFFERED=1 24 | ENV SAGEMAKER_TFS_VERSION="${TFS_SHORT_VERSION}" 25 | ENV PATH="$PATH:/sagemaker" 26 | ENV MODEL_BASE_PATH=/models 27 | # The only required piece is the model name in order to differentiate endpoints 28 | ENV MODEL_NAME=model 29 | 30 | RUN apt-get update \ 31 | && apt-get install -y --no-install-recommends \ 32 | ca-certificates \ 33 | cuda-command-line-tools-10-0 \ 34 | cuda-cublas-10-0 \ 35 | cuda-cufft-10-0 \ 36 | cuda-curand-10-0 \ 37 | cuda-cusolver-10-0 \ 38 | cuda-cusparse-10-0 \ 39 | libcudnn7=${CUDNN_VERSION} \ 40 | libnccl2=${NCCL_VERSION} \ 41 | libgomp1 \ 42 | curl \ 43 | git \ 44 | wget \ 45 | vim \ 46 | #next two lines are needed to add python-3.6 should be removed from ubuntu-16.10 47 | build-essential \ 48 | zlib1g-dev \ 49 | && apt-get clean \ 50 | && rm -rf /var/lib/apt/lists/* 51 | 52 | # The 'apt-get install' of nvinfer-runtime-trt-repo-ubuntu1604-4.0.1-ga-cuda10.0 53 | # adds a new list which contains libnvinfer library, so it needs another 54 | # 'apt-get update' to retrieve that list before it can actually install the 55 | # library. 56 | # We don't install libnvinfer-dev since we don't need to build against TensorRT, 57 | # and libnvinfer4 doesn't contain libnvinfer.a static library. 58 | RUN apt-get update \ 59 | && apt-get install -y --no-install-recommends nvinfer-runtime-trt-repo-ubuntu1604-${TF_TENSORRT_VERSION}-ga-cuda10.0 \ 60 | && apt-get update \ 61 | && apt-get install -y --no-install-recommends libnvinfer5=${TF_TENSORRT_VERSION}-1+cuda10.0 \ 62 | && apt-get clean \ 63 | && rm -rf /var/lib/apt/lists/* \ 64 | && rm /usr/lib/x86_64-linux-gnu/libnvinfer_plugin* \ 65 | && rm /usr/lib/x86_64-linux-gnu/libnvcaffe_parser* \ 66 | && rm /usr/lib/x86_64-linux-gnu/libnvparsers* 67 | 68 | RUN wget https://www.python.org/ftp/python/$PYTHON_VERSION/Python-$PYTHON_VERSION.tgz \ 69 | && tar -xvf Python-$PYTHON_VERSION.tgz \ 70 | && cd Python-$PYTHON_VERSION \ 71 | && ./configure \ 72 | && make \ 73 | && make install \ 74 | && apt-get update \ 75 | && apt-get install -y --no-install-recommends libreadline-gplv2-dev libncursesw5-dev libssl-dev libsqlite3-dev tk-dev libgdbm-dev libc6-dev libbz2-dev \ 76 | && make \ 77 | && make install \ 78 | && rm -rf ../Python-$PYTHON_VERSION* \ 79 | && ln -s /usr/local/bin/pip3 /usr/bin/pip \ 80 | && rm -rf /var/lib/apt/lists/* 81 | 82 | RUN ${PIP} --no-cache-dir install --upgrade pip setuptools 83 | 84 | # Some TF tools expect a "python" binary 85 | RUN ln -s $(which ${PYTHON}) /usr/local/bin/python 86 | 87 | RUN curl https://tensorflow-aws.s3-us-west-2.amazonaws.com/1.14/Serving/GPU/tensorflow_model_server --output tensorflow_model_server \ 88 | && chmod 555 tensorflow_model_server && cp tensorflow_model_server /usr/bin/tensorflow_model_server \ 89 | && rm -f tensorflow_model_server 90 | 91 | # nginx + njs 92 | RUN apt-get update \ 93 | && apt-get -y install --no-install-recommends curl gnupg2 \ 94 | && curl -s http://nginx.org/keys/nginx_signing.key | apt-key add - \ 95 | && echo 'deb http://nginx.org/packages/ubuntu/ xenial nginx' >> /etc/apt/sources.list \ 96 | && apt-get update \ 97 | && apt-get -y install --no-install-recommends nginx nginx-module-njs \ 98 | && apt-get clean \ 99 | && rm -rf /var/lib/apt/lists/* 100 | 101 | # cython, falcon, gunicorn, grpc 102 | RUN ${PIP} install -U --no-cache-dir \ 103 | boto3 \ 104 | awscli==1.16.196 \ 105 | cython==0.29.12 \ 106 | falcon==2.0.0 \ 107 | gunicorn==19.9.0 \ 108 | gevent==1.4.0 \ 109 | requests==2.22.0 \ 110 | grpcio==1.24.1 \ 111 | protobuf==3.10.0 \ 112 | # using --no-dependencies to avoid installing tensorflow binary 113 | && ${PIP} install --no-dependencies --no-cache-dir \ 114 | tensorflow-serving-api-gpu==1.14.0 115 | 116 | COPY ./ / 117 | 118 | # Expose gRPC and REST port 119 | EXPOSE 8500 8501 120 | 121 | # Set where models should be stored in the container 122 | RUN mkdir -p ${MODEL_BASE_PATH} 123 | 124 | # Create a script that runs the model server so we can use environment variables 125 | # while also passing in arguments from the docker command line 126 | RUN echo '#!/bin/bash \n\n' > /usr/bin/tf_serving_entrypoint.sh \ 127 | && echo '/usr/bin/tensorflow_model_server --port=8500 --rest_api_port=8501 --model_name=${MODEL_NAME} --model_base_path=${MODEL_BASE_PATH}/${MODEL_NAME} "$@"' >> /usr/bin/tf_serving_entrypoint.sh \ 128 | && chmod +x /usr/bin/tf_serving_entrypoint.sh 129 | 130 | CMD ["/usr/bin/tf_serving_entrypoint.sh"] 131 | -------------------------------------------------------------------------------- /docker/1.13/Dockerfile.gpu: -------------------------------------------------------------------------------- 1 | FROM nvidia/cuda:10.0-base-ubuntu16.04 2 | 3 | LABEL maintainer="Amazon AI" 4 | LABEL com.amazonaws.sagemaker.capabilities.accept-bind-to-port=true 5 | 6 | # Add arguments to achieve the version, python and url 7 | # PYTHON=python for 2.7, PYTHON=python3 for 3.5, 3.6 is not available directly on 16.04 8 | ARG PYTHON=python3 9 | # PIP=pip for 2.7, PIP=pip3 for 3.5, 3.6 is not available directly on 16.04 10 | ARG PIP=pip3 11 | ARG PYTHON_VERSION=3.6.6 12 | ARG TFS_SHORT_VERSION=1.13 13 | 14 | # See http://bugs.python.org/issue19846 15 | ENV LANG C.UTF-8 16 | ENV NCCL_VERSION=2.4.7-1+cuda10.0 17 | ENV CUDNN_VERSION=7.5.1.10-1+cuda10.0 18 | ENV TF_TENSORRT_VERSION=5.0.2 19 | # Python won’t try to write .pyc or .pyo files on the import of source modules 20 | ENV PYTHONDONTWRITEBYTECODE=1 21 | ENV PYTHONUNBUFFERED=1 22 | ENV SAGEMAKER_TFS_VERSION="${TFS_SHORT_VERSION}" 23 | ENV PATH="$PATH:/sagemaker" 24 | ENV MODEL_BASE_PATH=/models 25 | # The only required piece is the model name in order to differentiate endpoints 26 | ENV MODEL_NAME=model 27 | 28 | RUN apt-get update \ 29 | && apt-get install -y --no-install-recommends \ 30 | ca-certificates \ 31 | cuda-command-line-tools-10-0 \ 32 | cuda-cublas-10-0 \ 33 | cuda-cufft-10-0 \ 34 | cuda-curand-10-0 \ 35 | cuda-cusolver-10-0 \ 36 | cuda-cusparse-10-0 \ 37 | libcudnn7=${CUDNN_VERSION} \ 38 | libnccl2=${NCCL_VERSION} \ 39 | libgomp1 \ 40 | curl \ 41 | git \ 42 | wget \ 43 | vim \ 44 | #next two lines are needed to add python-3.6 should be removed from ubuntu-16.10 45 | build-essential \ 46 | zlib1g-dev \ 47 | && apt-get clean \ 48 | && rm -rf /var/lib/apt/lists/* 49 | 50 | # The 'apt-get install' of nvinfer-runtime-trt-repo-ubuntu1604-4.0.1-ga-cuda10.0 51 | # adds a new list which contains libnvinfer library, so it needs another 52 | # 'apt-get update' to retrieve that list before it can actually install the 53 | # library. 54 | # We don't install libnvinfer-dev since we don't need to build against TensorRT, 55 | # and libnvinfer4 doesn't contain libnvinfer.a static library. 56 | RUN apt-get update \ 57 | && apt-get install -y --no-install-recommends \ 58 | nvinfer-runtime-trt-repo-ubuntu1604-${TF_TENSORRT_VERSION}-ga-cuda10.0 \ 59 | && apt-get update \ 60 | && apt-get install -y --no-install-recommends \ 61 | libnvinfer5=${TF_TENSORRT_VERSION}-1+cuda10.0 \ 62 | && apt-get clean \ 63 | && rm -rf /var/lib/apt/lists/* \ 64 | && rm /usr/lib/x86_64-linux-gnu/libnvinfer_plugin* \ 65 | && rm /usr/lib/x86_64-linux-gnu/libnvcaffe_parser* \ 66 | && rm /usr/lib/x86_64-linux-gnu/libnvparsers* \ 67 | && rm -rf /var/lib/apt/lists/* 68 | 69 | RUN wget https://www.python.org/ftp/python/$PYTHON_VERSION/Python-$PYTHON_VERSION.tgz \ 70 | && tar -xvf Python-$PYTHON_VERSION.tgz && cd Python-$PYTHON_VERSION \ 71 | && ./configure && make && make install \ 72 | && apt-get update && apt-get install -y --no-install-recommends \ 73 | libreadline-gplv2-dev \ 74 | libncursesw5-dev \ 75 | libssl-dev \ 76 | libsqlite3-dev \ 77 | tk-dev libgdbm-dev \ 78 | libc6-dev libbz2-dev \ 79 | && rm -rf /var/lib/apt/lists/* \ 80 | && make && make install \ 81 | && rm -rf ../Python-$PYTHON_VERSION* \ 82 | && ln -s /usr/local/bin/pip3 /usr/bin/pip 83 | 84 | RUN ${PIP} --no-cache-dir install --upgrade pip setuptools 85 | 86 | # Some TF tools expect a "python" binary 87 | RUN ln -s $(which ${PYTHON}) /usr/local/bin/python 88 | 89 | RUN curl https://s3-us-west-2.amazonaws.com/tensorflow-aws/1.13/Serving/GPU/tensorflow_model_server --output tensorflow_model_server && \ 90 | chmod 555 tensorflow_model_server && cp tensorflow_model_server /usr/bin/tensorflow_model_server && \ 91 | rm -f tensorflow_model_server 92 | 93 | # nginx + njs 94 | RUN apt-get update \ 95 | && apt-get -y install --no-install-recommends curl gnupg2 \ 96 | && curl -s http://nginx.org/keys/nginx_signing.key | apt-key add - \ 97 | && echo 'deb http://nginx.org/packages/ubuntu/ xenial nginx' >> /etc/apt/sources.list \ 98 | && apt-get update \ 99 | && apt-get -y install --no-install-recommends nginx nginx-module-njs \ 100 | && apt-get clean \ 101 | && rm -rf /var/lib/apt/lists/* 102 | 103 | # cython, falcon, gunicorn, grpc 104 | RUN ${PIP} install -U --no-cache-dir \ 105 | boto3 \ 106 | awscli==1.16.130 \ 107 | cython==0.29.10 \ 108 | falcon==2.0.0 \ 109 | gunicorn==19.9.0 \ 110 | gevent==1.4.0 \ 111 | requests==2.21.0 \ 112 | grpcio==1.24.1 \ 113 | protobuf==3.10.0 \ 114 | # using --no-dependencies to avoid installing tensorflow binary 115 | && ${PIP} install --no-dependencies --no-cache-dir \ 116 | tensorflow-serving-api-gpu==1.13.0 117 | 118 | COPY ./ / 119 | 120 | # Expose gRPC and REST port 121 | EXPOSE 8500 8501 122 | 123 | # Set where models should be stored in the container 124 | RUN mkdir -p ${MODEL_BASE_PATH} 125 | 126 | # Create a script that runs the model server so we can use environment variables 127 | # while also passing in arguments from the docker command line 128 | RUN echo '#!/bin/bash \n\n' > /usr/bin/tf_serving_entrypoint.sh \ 129 | && echo '/usr/bin/tensorflow_model_server --port=8500 --rest_api_port=8501 --model_name=${MODEL_NAME} --model_base_path=${MODEL_BASE_PATH}/${MODEL_NAME} "$@"' >> /usr/bin/tf_serving_entrypoint.sh \ 130 | && chmod +x /usr/bin/tf_serving_entrypoint.sh 131 | 132 | CMD ["/usr/bin/tf_serving_entrypoint.sh"] 133 | -------------------------------------------------------------------------------- /test/integration/local/test_pre_post_processing_mme.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"). You 4 | # may not use this file except in compliance with the License. A copy of 5 | # the License is located at 6 | # 7 | # http://aws.amazon.com/apache2.0/ 8 | # 9 | # or in the "license" file accompanying this file. This file is 10 | # distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF 11 | # ANY KIND, either express or implied. See the License for the specific 12 | # language governing permissions and limitations under the License. 13 | 14 | import json 15 | import os 16 | import shutil 17 | import subprocess 18 | import sys 19 | import time 20 | 21 | import pytest 22 | 23 | import requests 24 | 25 | from multi_model_endpoint_test_utils import make_load_model_request, make_headers 26 | 27 | 28 | PING_URL = "http://localhost:8080/ping" 29 | INVOCATION_URL = "http://localhost:8080/models/{}/invoke" 30 | MODEL_NAME = "half_plus_three" 31 | 32 | 33 | @pytest.fixture(scope="session", autouse=True) 34 | def volume(): 35 | try: 36 | model_dir = os.path.abspath("test/resources/mme_universal_script") 37 | subprocess.check_call( 38 | "docker volume create --name model_volume_mme --opt type=none " 39 | "--opt device={} --opt o=bind".format(model_dir).split()) 40 | yield model_dir 41 | finally: 42 | subprocess.check_call("docker volume rm model_volume_mme".split()) 43 | 44 | 45 | @pytest.fixture(scope="module", autouse=True) 46 | def container(docker_base_name, tag, runtime_config): 47 | try: 48 | command = ( 49 | "docker run {}--name sagemaker-tensorflow-serving-test -p 8080:8080" 50 | " --mount type=volume,source=model_volume_mme,target=/opt/ml/models,readonly" 51 | " -e SAGEMAKER_TFS_NGINX_LOGLEVEL=info" 52 | " -e SAGEMAKER_BIND_TO_PORT=8080" 53 | " -e SAGEMAKER_SAFE_PORT_RANGE=9000-9999" 54 | " -e SAGEMAKER_MULTI_MODEL=True" 55 | " {}:{} serve" 56 | ).format(runtime_config, docker_base_name, tag) 57 | 58 | proc = subprocess.Popen(command.split(), stdout=sys.stdout, stderr=subprocess.STDOUT) 59 | 60 | attempts = 0 61 | while attempts < 40: 62 | time.sleep(3) 63 | try: 64 | res_code = requests.get("http://localhost:8080/ping").status_code 65 | if res_code == 200: 66 | break 67 | except: 68 | attempts += 1 69 | pass 70 | 71 | yield proc.pid 72 | finally: 73 | subprocess.check_call("docker rm -f sagemaker-tensorflow-serving-test".split()) 74 | 75 | 76 | @pytest.fixture 77 | def model(): 78 | model_data = { 79 | "model_name": MODEL_NAME, 80 | "url": "/opt/ml/models/half_plus_three/model/half_plus_three" 81 | } 82 | make_load_model_request(json.dumps(model_data)) 83 | return MODEL_NAME 84 | 85 | 86 | @pytest.mark.skip_gpu 87 | def test_ping_service(): 88 | response = requests.get(PING_URL) 89 | assert 200 == response.status_code 90 | 91 | 92 | @pytest.mark.skip_gpu 93 | def test_predict_json(model): 94 | headers = make_headers() 95 | data = "{\"instances\": [1.0, 2.0, 5.0]}" 96 | response = requests.post(INVOCATION_URL.format(model), data=data, headers=headers).json() 97 | assert response == {"predictions": [3.5, 4.0, 5.5]} 98 | 99 | 100 | @pytest.mark.skip_gpu 101 | def test_zero_content(): 102 | headers = make_headers() 103 | x = "" 104 | response = requests.post(INVOCATION_URL.format(MODEL_NAME), data=x, headers=headers) 105 | assert 500 == response.status_code 106 | assert "document is empty" in response.text 107 | 108 | 109 | @pytest.mark.skip_gpu 110 | def test_large_input(): 111 | data_file = "test/resources/inputs/test-large.csv" 112 | 113 | with open(data_file, "r") as file: 114 | x = file.read() 115 | headers = make_headers(content_type="text/csv") 116 | response = requests.post(INVOCATION_URL.format(MODEL_NAME), data=x, headers=headers).json() 117 | predictions = response["predictions"] 118 | assert len(predictions) == 753936 119 | 120 | 121 | @pytest.mark.skip_gpu 122 | def test_csv_input(): 123 | headers = make_headers(content_type="text/csv") 124 | data = "1.0,2.0,5.0" 125 | response = requests.post(INVOCATION_URL.format(MODEL_NAME), data=data, headers=headers).json() 126 | assert response == {"predictions": [3.5, 4.0, 5.5]} 127 | 128 | 129 | @pytest.mark.skip_gpu 130 | def test_specific_versions(): 131 | for version in ("123", "124"): 132 | headers = make_headers(content_type="text/csv", version=version) 133 | data = "1.0,2.0,5.0" 134 | response = requests.post( 135 | INVOCATION_URL.format(MODEL_NAME), data=data, headers=headers 136 | ).json() 137 | assert response == {"predictions": [3.5, 4.0, 5.5]} 138 | 139 | 140 | @pytest.mark.skip_gpu 141 | def test_unsupported_content_type(): 142 | headers = make_headers("unsupported-type", "predict") 143 | data = "aW1hZ2UgYnl0ZXM=" 144 | response = requests.post(INVOCATION_URL.format(MODEL_NAME), data=data, headers=headers) 145 | assert 500 == response.status_code 146 | assert "unsupported content type" in response.text 147 | -------------------------------------------------------------------------------- /test/integration/local/test_pre_post_processing.py: -------------------------------------------------------------------------------- 1 | # Copyright 2019-2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"). You 4 | # may not use this file except in compliance with the License. A copy of 5 | # the License is located at 6 | # 7 | # http://aws.amazon.com/apache2.0/ 8 | # 9 | # or in the "license" file accompanying this file. This file is 10 | # distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF 11 | # ANY KIND, either express or implied. See the License for the specific 12 | # language governing permissions and limitations under the License. 13 | 14 | import os 15 | import shutil 16 | import subprocess 17 | import sys 18 | import time 19 | 20 | import pytest 21 | 22 | import requests 23 | 24 | 25 | PING_URL = "http://localhost:8080/ping" 26 | INVOCATIONS_URL = "http://localhost:8080/invocations" 27 | 28 | 29 | @pytest.fixture(scope="module", autouse=True, params=["1", "2", "3", "4", "5"]) 30 | def volume(tmpdir_factory, request): 31 | try: 32 | print(str(tmpdir_factory)) 33 | model_dir = os.path.join(tmpdir_factory.mktemp("test"), "model") 34 | code_dir = os.path.join(model_dir, "code") 35 | test_example = "test/resources/examples/test{}".format(request.param) 36 | 37 | model_src_dir = "test/resources/models" 38 | shutil.copytree(model_src_dir, model_dir) 39 | shutil.copytree(test_example, code_dir) 40 | 41 | volume_name = f"model_volume_{request.param}" 42 | subprocess.check_call( 43 | "docker volume create --name {} --opt type=none " 44 | "--opt device={} --opt o=bind".format(volume_name, model_dir).split()) 45 | yield volume_name 46 | finally: 47 | subprocess.check_call(f"docker volume rm {volume_name}".split()) 48 | 49 | 50 | @pytest.fixture(scope="module", autouse=True) 51 | def container(volume, docker_base_name, tag, runtime_config): 52 | try: 53 | command = ( 54 | "docker run {}--name sagemaker-tensorflow-serving-test -p 8080:8080" 55 | " --mount type=volume,source={},target=/opt/ml/model,readonly" 56 | " -e SAGEMAKER_TFS_NGINX_LOGLEVEL=info" 57 | " -e SAGEMAKER_BIND_TO_PORT=8080" 58 | " -e SAGEMAKER_SAFE_PORT_RANGE=9000-9999" 59 | " {}:{} serve" 60 | ).format(runtime_config, volume, docker_base_name, tag) 61 | 62 | proc = subprocess.Popen(command.split(), stdout=sys.stdout, stderr=subprocess.STDOUT) 63 | 64 | attempts = 0 65 | while attempts < 40: 66 | time.sleep(3) 67 | try: 68 | res_code = requests.get("http://localhost:8080/ping").status_code 69 | if res_code == 200: 70 | break 71 | except: 72 | attempts += 1 73 | pass 74 | 75 | yield proc.pid 76 | finally: 77 | subprocess.check_call("docker rm -f sagemaker-tensorflow-serving-test".split()) 78 | 79 | 80 | def make_headers(content_type, method, version=None): 81 | custom_attributes = "tfs-model-name=half_plus_three,tfs-method={}".format(method) 82 | if version: 83 | custom_attributes += ",tfs-model-version={}".format(version) 84 | 85 | return { 86 | "Content-Type": content_type, 87 | "X-Amzn-SageMaker-Custom-Attributes": custom_attributes, 88 | } 89 | 90 | 91 | def test_predict_json(): 92 | headers = make_headers("application/json", "predict") 93 | data = "{\"instances\": [1.0, 2.0, 5.0]}" 94 | response = requests.post(INVOCATIONS_URL, data=data, headers=headers).json() 95 | assert response == {"predictions": [3.5, 4.0, 5.5]} 96 | 97 | 98 | def test_zero_content(): 99 | headers = make_headers("application/json", "predict") 100 | data = "" 101 | response = requests.post(INVOCATIONS_URL, data=data, headers=headers) 102 | assert 500 == response.status_code 103 | assert "document is empty" in response.text 104 | 105 | 106 | def test_large_input(): 107 | headers = make_headers("text/csv", "predict") 108 | data_file = "test/resources/inputs/test-large.csv" 109 | 110 | with open(data_file, "r") as file: 111 | large_data = file.read() 112 | response = requests.post(INVOCATIONS_URL, data=large_data, headers=headers).json() 113 | predictions = response["predictions"] 114 | assert len(predictions) == 753936 115 | 116 | 117 | def test_csv_input(): 118 | headers = make_headers("text/csv", "predict") 119 | data = "1.0,2.0,5.0" 120 | response = requests.post(INVOCATIONS_URL, data=data, headers=headers).json() 121 | assert response == {"predictions": [3.5, 4.0, 5.5]} 122 | 123 | 124 | def test_predict_specific_versions(): 125 | for version in ("123", "124"): 126 | headers = make_headers("application/json", "predict", version=version) 127 | data = "{\"instances\": [1.0, 2.0, 5.0]}" 128 | response = requests.post(INVOCATIONS_URL, data=data, headers=headers).json() 129 | assert response == {"predictions": [3.5, 4.0, 5.5]} 130 | 131 | 132 | def test_unsupported_content_type(): 133 | headers = make_headers("unsupported-type", "predict") 134 | data = "aW1hZ2UgYnl0ZXM=" 135 | response = requests.post(INVOCATIONS_URL, data=data, headers=headers) 136 | assert 500 == response.status_code 137 | assert "unsupported content type" in response.text 138 | 139 | 140 | def test_ping_service(): 141 | response = requests.get(PING_URL) 142 | assert 200 == response.status_code 143 | -------------------------------------------------------------------------------- /test/perf/perftest_endpoint.py: -------------------------------------------------------------------------------- 1 | # Copyright 2018-2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"). You 4 | # may not use this file except in compliance with the License. A copy of 5 | # the License is located at 6 | # 7 | # http://aws.amazon.com/apache2.0/ 8 | # 9 | # or in the "license" file accompanying this file. This file is 10 | # distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF 11 | # ANY KIND, either express or implied. See the License for the specific 12 | # language governing permissions and limitations under the License. 13 | 14 | import argparse 15 | import multiprocessing 16 | import sys 17 | import time 18 | 19 | import boto3 20 | 21 | 22 | class PerfTester(object): 23 | def __init__(self): 24 | self.engine = None 25 | self.count = None 26 | self.payload_kb = None 27 | self.start_time = None 28 | self.end_time = None 29 | 30 | def test_worker(self, id, args, count, test_data, error_counts): 31 | client = boto3.client('sagemaker-runtime') 32 | 33 | endpoint_name = test_data[0] 34 | data = test_data[1] 35 | for i in range(count): 36 | try: 37 | response = client.invoke_endpoint(EndpointName=endpoint_name, 38 | Body=data, 39 | ContentType='application/json', 40 | Accept='application/json', 41 | CustomAttributes='tfs-model-name=cifar') 42 | _ = response['Body'].read() 43 | except: 44 | error_counts[id] += 1 45 | 46 | def test(self, args, count, test_data): 47 | self.count = args.count * args.workers 48 | self.payload_kb = len(test_data[1]) / 1024.0 49 | 50 | manager = multiprocessing.Manager() 51 | error_counts = manager.dict() 52 | workers = [] 53 | for i in range(args.workers): 54 | error_counts[i] = 0 55 | w = multiprocessing.Process(target=self.test_worker, 56 | args=(i, args, count, test_data, error_counts)) 57 | workers.append(w) 58 | 59 | self.start_time = time.time() 60 | for w in workers: 61 | w.start() 62 | 63 | for w in workers: 64 | w.join() 65 | 66 | self.errors = sum(error_counts.values()) 67 | self.end_time = time.time() 68 | 69 | def report(self, args): 70 | elapsed = self.end_time - self.start_time 71 | report_format = '{},{},{:.3f},{:.3f},{:.3f},{:.3f},{},{},{:.3f}' 72 | report = report_format.format(args.model, 73 | args.workers, 74 | self.count / elapsed, 75 | ((elapsed / args.count) * 1000), 76 | (self.payload_kb * self.count) / elapsed / 1024, 77 | elapsed, 78 | self.count, 79 | self.errors, 80 | self.payload_kb) 81 | print('model,workers,r/s,ms/req,mb/s,total s,requests,errors,payload kb') 82 | print(report) 83 | 84 | def parse_args(self, args): 85 | parser = argparse.ArgumentParser('performance tester') 86 | parser.set_defaults(func=lambda x: parser.print_usage()) 87 | parser.add_argument('--count', help='number of test iterations', default=1000, type=int) 88 | parser.add_argument('--warmup', help='number of warmup iterations', default=100, type=int) 89 | parser.add_argument('--workers', help='number of model workers (and clients)', default=1, 90 | type=int) 91 | parser.add_argument('--model', help='model id', default='half_plus_three') 92 | return parser.parse_args(args) 93 | 94 | def run(self, args): 95 | args = self.parse_args(args) 96 | test_data = TEST_DATA[args.model] 97 | self.test(args, min(args.warmup, args.count), test_data) 98 | self.test(args, args.count, test_data) 99 | self.report(args) 100 | 101 | 102 | def _read_file(path): 103 | with open(path, 'rb') as f: 104 | return f.read() 105 | 106 | 107 | def _random_payload(size_in_kb): 108 | return bytes(bytearray(size_in_kb * 1024)) 109 | 110 | 111 | # add/change these to match your endpoints 112 | TEST_DATA = { 113 | 'sm-p2xl': ('sagemaker-tensorflow-2018-11-03-14-38-51-707', b'[' + _read_file('test/resources/inputs/test-cifar.json') + b']'), 114 | 'sm-p316xl': ('sagemaker-tensorflow-2018-11-03-14-38-51-706', b'[' + _read_file('test/resources/inputs/test-cifar.json') + b']'), 115 | 'tfs-p2xl': ('sagemaker-tfs-p2-xlarge', _read_file('test/resources/inputs/test-cifar.json')), 116 | 'tfs-p316xl': ('sagemaker-tfs-p3-16xlarge', _read_file('test/resources/inputs/test-cifar.json')), 117 | 'tfs-c5xl': ('sagemaker-tfs-c5-xlarge', _read_file('test/resources/inputs/test-cifar.json')), 118 | 'tfs-c518xl': ('sagemaker-tfs-c5-18xlarge', _read_file('test/resources/inputs/test-cifar.json')), 119 | 'sm-c5xl': ('sagemaker-tensorflow-cifar-c5.xlarge', b'[' + _read_file('test/resources/inputs/test-cifar.json') + b']'), 120 | 'sm-c518xl': ('sagemaker-tensorflow-cifar-c5.18xlarge', b'[' + _read_file('test/resources/inputs/test-cifar.json') + b']') 121 | } 122 | 123 | if __name__ == '__main__': 124 | PerfTester().run(sys.argv[1:]) 125 | -------------------------------------------------------------------------------- /test/perf/data_generator.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import math 3 | import random 4 | import sys 5 | 6 | _CONTENT_TYPE_CSV = 'text/csv' 7 | _CONTENT_TYPE_JSON = 'application/json' 8 | _CONTENT_TYPE_JSONLINES = 'application/jsonlines' 9 | 10 | _VALID_CONTENT_TYPES = [_CONTENT_TYPE_JSON, _CONTENT_TYPE_CSV, _CONTENT_TYPE_JSONLINES] 11 | _UNIT_FOR_PAYLOAD_SIZE = {'B': 1, 'KB': 1024, 'MB': 1048576} 12 | 13 | def generate_json(shape, payload_size): 14 | one_record = _generate_json_recursively(shape) 15 | if payload_size: 16 | per_record_size = len(str(one_record)) 17 | num_records = _get_num_records_for_json_payload(payload_size, per_record_size) 18 | records = [] 19 | for record in range(0, num_records): 20 | records.append(one_record) 21 | return str(records) 22 | else: 23 | return str(one_record) 24 | 25 | 26 | def _generate_json_recursively(shape): 27 | if len(shape) == 1: 28 | input = list(_random_input(shape[0])) 29 | return input 30 | else: 31 | inner_list = _generate_json_recursively(shape[1:]) 32 | return [inner_list for _ in range(0, shape[0])] 33 | 34 | 35 | def generate_jsonlines(shape, payload_size): 36 | one_row = _generate_json_recursively(shape) 37 | if payload_size: 38 | one_row_string = str(one_row) 39 | num_records = _get_num_records_for_json_payload(payload_size, len(one_row_string)) 40 | return '\n'.join([one_row_string for _ in range(0, num_records)]) 41 | else: 42 | return one_row 43 | 44 | 45 | def _get_num_records_for_json_payload(payload_size, one_record_size): 46 | return max(int(round(float(payload_size) / one_record_size)), 1) 47 | 48 | 49 | def generate_csv(shape, payload_size): 50 | # First, calculate overhead from commas. 51 | try: 52 | rows, columns = shape 53 | except ValueError: 54 | rows = 1 55 | columns = shape[0] 56 | # Override number of rows. 57 | if payload_size: 58 | # Divide by two given the assumption is that inputs are single-digit integers separate by commas and lines 59 | # end in newline characters. 60 | rows = int(math.ceil((float(payload_size) / columns / 2.0))) 61 | 62 | row = ','.join(map(lambda x: str(x), _random_input(columns))) 63 | return '\n'.join([row for _ in range(0, rows)]) 64 | 65 | 66 | def _random_input(n): 67 | for i in range(0, n): 68 | yield random.randint(0, 9) 69 | 70 | 71 | def _map_payload_size_given_unit(payload_size, unit_of_payload): 72 | return payload_size * _UNIT_FOR_PAYLOAD_SIZE[unit_of_payload] 73 | 74 | 75 | def generate_data(content_type, shape, payload_size, unit_of_payload='B'): 76 | assert unit_of_payload in _UNIT_FOR_PAYLOAD_SIZE.keys() 77 | payload_size = _map_payload_size_given_unit(payload_size, unit_of_payload) 78 | if content_type == _CONTENT_TYPE_JSONLINES: 79 | return generate_jsonlines(shape, payload_size) 80 | elif content_type == _CONTENT_TYPE_JSON: 81 | return generate_json(shape, payload_size) 82 | elif content_type == _CONTENT_TYPE_CSV: 83 | return generate_csv(shape, payload_size) 84 | else: 85 | raise ValueError('Content-type {} must be in {}'.format(content_type, _VALID_CONTENT_TYPES)) 86 | 87 | 88 | if __name__ == '__main__': 89 | parser = argparse.ArgumentParser(description='Generates test data of different shapes and content types.') 90 | parser.set_defaults(func=lambda x: parser.print_usage()) 91 | 92 | parser.add_argument('-c', '--content-type', help='Content type to generate data for.', type=str, required=True, 93 | choices=_VALID_CONTENT_TYPES) 94 | parser.add_argument('-s', '--shape', help='Shape of the output data. Behavior depends on content-type. ' 95 | 'For text/csv, "50,32" generates 50 lines with 32 columns. ' 96 | 'More than two values are invalid for CSV shape. ' 97 | 'For application/json, "32,32,3" generates a JSON object with shape. ' 98 | '32,32,3. For application/jsonlines, the shape given represents a ' 99 | 'single instance as one nested JSON list.', 100 | type=str, required=True) 101 | parser.add_argument('-p', '--payload-size', help='Approximate payload size. If supplied with text/csv, ' 102 | 'ignores the number of rows given in shape and repeats a ' 103 | 'single row until the payload is full. If supplied with ' 104 | 'application/json or application/jsonlines, repeats the ' 105 | 'payload with the given shape until the payload is full. ' 106 | 'application/json payload is always one line. ' 107 | 'Unit size defaults to the number of bytes unless --unit is set.', 108 | type=int) 109 | parser.add_argument('-u', '--unit', help='Unit for payload size. One of "B", "KB", or "MB" for bytes, kilobytes, ' 110 | 'and megabytes, respectively', type=str, 111 | choices=_UNIT_FOR_PAYLOAD_SIZE.keys()) 112 | 113 | args = parser.parse_args(sys.argv[1:]) 114 | parsed_shape = list(map(lambda x: int(x), args.shape.split(','))) 115 | print(generate_data(args.content_type, parsed_shape, args.payload_size, args.unit)) 116 | -------------------------------------------------------------------------------- /test/integration/sagemaker/test_tfs.py: -------------------------------------------------------------------------------- 1 | # Copyright 2018-2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"). You 4 | # may not use this file except in compliance with the License. A copy of 5 | # the License is located at 6 | # 7 | # http://aws.amazon.com/apache2.0/ 8 | # 9 | # or in the "license" file accompanying this file. This file is 10 | # distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF 11 | # ANY KIND, either express or implied. See the License for the specific 12 | # language governing permissions and limitations under the License. 13 | import os 14 | 15 | import pytest 16 | 17 | import util 18 | 19 | NON_P3_REGIONS = ["ap-southeast-1", "ap-southeast-2", "ap-south-1", 20 | "ca-central-1", "eu-central-1", "eu-west-2", "us-west-1"] 21 | 22 | 23 | @pytest.fixture(params=os.environ["TEST_VERSIONS"].split(",")) 24 | def version(request): 25 | return request.param 26 | 27 | 28 | @pytest.fixture(scope="session") 29 | def repo(request): 30 | return request.config.getoption("--repo") or "sagemaker-tensorflow-serving" 31 | 32 | 33 | @pytest.fixture 34 | def tag(request, version, instance_type): 35 | if request.config.getoption("--tag"): 36 | return request.config.getoption("--tag") 37 | 38 | arch = "gpu" if instance_type.startswith("ml.p") else "cpu" 39 | return f"{version}-{arch}" 40 | 41 | 42 | @pytest.fixture 43 | def image_uri(registry, region, repo, tag): 44 | return util.image_uri(registry, region, repo, tag) 45 | 46 | 47 | @pytest.fixture(params=os.environ["TEST_INSTANCE_TYPES"].split(",")) 48 | def instance_type(request, region): 49 | return request.param 50 | 51 | 52 | @pytest.fixture(scope="module") 53 | def accelerator_type(): 54 | return None 55 | 56 | 57 | @pytest.fixture(scope="session") 58 | def tfs_model(region, boto_session): 59 | return util.find_or_put_model_data(region, 60 | boto_session, 61 | "test/data/tfs-model.tar.gz") 62 | 63 | 64 | @pytest.fixture(scope='session') 65 | def python_model_with_requirements(region, boto_session): 66 | return util.find_or_put_model_data(region, 67 | boto_session, 68 | "test/data/python-with-requirements.tar.gz") 69 | 70 | 71 | @pytest.fixture(scope='session') 72 | def python_model_with_lib(region, boto_session): 73 | return util.find_or_put_model_data(region, 74 | boto_session, 75 | "test/data/python-with-lib.tar.gz") 76 | 77 | 78 | def test_tfs_model(boto_session, sagemaker_client, 79 | sagemaker_runtime_client, model_name, tfs_model, 80 | image_uri, instance_type, accelerator_type): 81 | input_data = {"instances": [1.0, 2.0, 5.0]} 82 | util.create_and_invoke_endpoint(boto_session, sagemaker_client, 83 | sagemaker_runtime_client, model_name, tfs_model, 84 | image_uri, instance_type, accelerator_type, input_data) 85 | 86 | 87 | def test_batch_transform(region, boto_session, sagemaker_client, 88 | model_name, tfs_model, image_uri, 89 | instance_type): 90 | results = util.run_batch_transform_job(region=region, 91 | boto_session=boto_session, 92 | model_data=tfs_model, 93 | image_uri=image_uri, 94 | model_name=model_name, 95 | sagemaker_client=sagemaker_client, 96 | instance_type=instance_type) 97 | assert len(results) == 10 98 | for r in results: 99 | assert r == [3.5, 4.0, 5.5] 100 | 101 | 102 | def test_python_model_with_requirements(boto_session, sagemaker_client, 103 | sagemaker_runtime_client, model_name, 104 | python_model_with_requirements, image_uri, instance_type, 105 | accelerator_type): 106 | 107 | if "p3" in instance_type: 108 | pytest.skip("skip for p3 instance") 109 | 110 | # the python service needs to transform this to get a valid prediction 111 | input_data = {"x": [1.0, 2.0, 5.0]} 112 | output_data = util.create_and_invoke_endpoint(boto_session, sagemaker_client, 113 | sagemaker_runtime_client, model_name, 114 | python_model_with_requirements, image_uri, 115 | instance_type, accelerator_type, input_data) 116 | 117 | # python service adds this to tfs response 118 | assert output_data["python"] is True 119 | assert output_data["pillow"] == "6.0.0" 120 | 121 | 122 | def test_python_model_with_lib(boto_session, sagemaker_client, 123 | sagemaker_runtime_client, model_name, python_model_with_lib, 124 | image_uri, instance_type, accelerator_type): 125 | 126 | if "p3" in instance_type: 127 | pytest.skip("skip for p3 instance") 128 | 129 | # the python service needs to transform this to get a valid prediction 130 | input_data = {"x": [1.0, 2.0, 5.0]} 131 | output_data = util.create_and_invoke_endpoint(boto_session, sagemaker_client, 132 | sagemaker_runtime_client, model_name, python_model_with_lib, 133 | image_uri, instance_type, accelerator_type, input_data) 134 | 135 | # python service adds this to tfs response 136 | assert output_data["python"] is True 137 | assert output_data["dummy_module"] == "0.1" 138 | -------------------------------------------------------------------------------- /docker/2.1/Dockerfile.gpu: -------------------------------------------------------------------------------- 1 | FROM nvidia/cuda:10.1-base-ubuntu18.04 2 | 3 | LABEL maintainer="Amazon AI" 4 | LABEL com.amazonaws.sagemaker.capabilities.accept-bind-to-port=true 5 | 6 | ARG PYTHON=python3 7 | ARG PIP=pip3 8 | ARG TFS_SHORT_VERSION=2.1 9 | ARG TFS_URL=https://tensorflow-aws.s3-us-west-2.amazonaws.com/2.1/Serving/GPU/tensorflow_model_server 10 | 11 | ENV NCCL_VERSION=2.4.7-1+cuda10.1 12 | ENV CUDNN_VERSION=7.6.2.24-1+cuda10.1 13 | ENV TF_TENSORRT_VERSION=5.0.2 14 | ENV TF_TENSORRT_LIB_VERSION=6.0.1 15 | 16 | # See http://bugs.python.org/issue19846 17 | ENV LANG=C.UTF-8 18 | ENV PYTHONDONTWRITEBYTECODE=1 19 | # Python won’t try to write .pyc or .pyo files on the import of source modules 20 | ENV PYTHONUNBUFFERED=1 21 | ENV SAGEMAKER_TFS_VERSION="${TFS_SHORT_VERSION}" 22 | ENV PATH="$PATH:/sagemaker" 23 | ENV MODEL_BASE_PATH=/models 24 | # The only required piece is the model name in order to differentiate endpoints 25 | ENV MODEL_NAME=model 26 | # Fix for the interactive mode during an install in step 21 27 | ENV DEBIAN_FRONTEND=noninteractive 28 | 29 | # https://forums.developer.nvidia.com/t/notice-cuda-linux-repository-key-rotation/212771 30 | # Fix cuda repo's GPG key. Nvidia is no longer updating the machine-learning repo. 31 | # Need to manually pull and install necessary debs to continue using these versions. 32 | RUN rm /etc/apt/sources.list.d/cuda.list \ 33 | && apt-key del 7fa2af80 \ 34 | && apt-get update && apt-get install -y --no-install-recommends wget \ 35 | && wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64/cuda-keyring_1.0-1_all.deb \ 36 | && dpkg -i cuda-keyring_1.0-1_all.deb \ 37 | && wget https://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1804/x86_64/libcudnn7_${CUDNN_VERSION}_amd64.deb \ 38 | && dpkg -i libcudnn7_${CUDNN_VERSION}_amd64.deb \ 39 | && wget https://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1804/x86_64/libnccl2_${NCCL_VERSION}_amd64.deb \ 40 | && dpkg -i libnccl2_${NCCL_VERSION}_amd64.deb \ 41 | && rm *.deb 42 | 43 | # allow unauthenticated and allow downgrades for special libcublas library 44 | RUN apt-get update \ 45 | && apt-get install -y --no-install-recommends --allow-unauthenticated --allow-downgrades\ 46 | ca-certificates \ 47 | cuda-command-line-tools-10-1 \ 48 | cuda-cufft-10-1 \ 49 | cuda-curand-10-1 \ 50 | cuda-cusolver-10-1 \ 51 | cuda-cusparse-10-1 \ 52 | #cuda-cublas-dev not available with 10-1, install libcublas instead 53 | libcublas10=10.1.0.105-1 \ 54 | libcublas-dev=10.1.0.105-1 \ 55 | libgomp1 \ 56 | curl \ 57 | git \ 58 | wget \ 59 | vim \ 60 | build-essential \ 61 | zlib1g-dev \ 62 | python3 \ 63 | python3-pip \ 64 | python3-setuptools \ 65 | && apt-get clean \ 66 | && rm -rf /var/lib/apt/lists/* 67 | 68 | RUN ${PIP} --no-cache-dir install --upgrade \ 69 | pip \ 70 | setuptools 71 | 72 | # Some TF tools expect a "python" binary 73 | RUN ln -s $(which ${PYTHON}) /usr/local/bin/python 74 | 75 | # nginx + njs 76 | RUN apt-get update \ 77 | && apt-get -y install --no-install-recommends \ 78 | curl \ 79 | gnupg2 \ 80 | && curl -s http://nginx.org/keys/nginx_signing.key | apt-key add - \ 81 | && echo 'deb http://nginx.org/packages/ubuntu/ bionic nginx' >> /etc/apt/sources.list \ 82 | && apt-get update \ 83 | && apt-get -y install --no-install-recommends \ 84 | nginx \ 85 | nginx-module-njs \ 86 | && apt-get clean \ 87 | && rm -rf /var/lib/apt/lists/* 88 | 89 | # https://forums.developer.nvidia.com/t/notice-cuda-linux-repository-key-rotation/212771 90 | # Nvidia is no longer updating the machine-learning repo. 91 | # Need to manually pull and install necessary debs to continue using these versions. 92 | # nvinfer-runtime-trt-repo doesn't have a 1804-cuda10.1 version. 93 | RUN wget https://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1804/x86_64/nvinfer-runtime-trt-repo-ubuntu1804-${TF_TENSORRT_VERSION}-ga-cuda10.0_1-1_amd64.deb \ 94 | && dpkg -i nvinfer-runtime-trt-repo-ubuntu1804-${TF_TENSORRT_VERSION}-ga-cuda10.0_1-1_amd64.deb \ 95 | && wget https://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1804/x86_64/libnvinfer6_${TF_TENSORRT_LIB_VERSION}-1+cuda10.1_amd64.deb \ 96 | && dpkg -i libnvinfer6_${TF_TENSORRT_LIB_VERSION}-1+cuda10.1_amd64.deb \ 97 | && rm *.deb \ 98 | && rm -rf /var/lib/apt/lists/* 99 | 100 | # cython, falcon, gunicorn, grpc 101 | RUN ${PIP} install -U --no-cache-dir \ 102 | boto3 \ 103 | awscli \ 104 | cython==0.29.14 \ 105 | falcon==2.0.0 \ 106 | gunicorn==20.0.4 \ 107 | gevent==1.4.0 \ 108 | requests==2.22.0 \ 109 | grpcio==1.27.1 \ 110 | protobuf==3.11.1 \ 111 | # using --no-dependencies to avoid installing tensorflow binary 112 | && ${PIP} install --no-dependencies --no-cache-dir \ 113 | tensorflow-serving-api-gpu==2.1.0 114 | 115 | COPY ./sagemaker /sagemaker 116 | 117 | RUN curl $TFS_URL -o /usr/bin/tensorflow_model_server \ 118 | && chmod 555 /usr/bin/tensorflow_model_server 119 | 120 | # Expose gRPC and REST port 121 | EXPOSE 8500 8501 122 | 123 | # Set where models should be stored in the container 124 | RUN mkdir -p ${MODEL_BASE_PATH} 125 | 126 | # Create a script that runs the model server so we can use environment variables 127 | # while also passing in arguments from the docker command line 128 | RUN echo '#!/bin/bash \n\n' > /usr/bin/tf_serving_entrypoint.sh \ 129 | && echo '/usr/bin/tensorflow_model_server --port=8500 --rest_api_port=8501 --model_name=${MODEL_NAME} --model_base_path=${MODEL_BASE_PATH}/${MODEL_NAME} "$@"' >> /usr/bin/tf_serving_entrypoint.sh \ 130 | && chmod +x /usr/bin/tf_serving_entrypoint.sh 131 | 132 | ADD https://raw.githubusercontent.com/aws/aws-deep-learning-containers-utils/master/deep_learning_container.py /usr/local/bin/deep_learning_container.py 133 | 134 | RUN chmod +x /usr/local/bin/deep_learning_container.py 135 | 136 | RUN curl https://aws-dlc-licenses.s3.amazonaws.com/tensorflow-2.1/license.txt -o /license.txt 137 | 138 | CMD ["/usr/bin/tf_serving_entrypoint.sh"] 139 | -------------------------------------------------------------------------------- /docker/1.15/Dockerfile.gpu: -------------------------------------------------------------------------------- 1 | FROM nvidia/cuda:10.0-base-ubuntu18.04 2 | 3 | LABEL maintainer="Amazon AI" 4 | # Specify LABEL for inference pipelines to use SAGEMAKER_BIND_TO_PORT 5 | # https://docs.aws.amazon.com/sagemaker/latest/dg/inference-pipeline-real-time.html 6 | LABEL com.amazonaws.sagemaker.capabilities.accept-bind-to-port=true 7 | 8 | # Add arguments to achieve the version, python and url 9 | ARG PYTHON=python3 10 | ARG PIP=pip3 11 | ARG TFS_SHORT_VERSION=1.15.2 12 | ARG TF_MODEL_SERVER_SOURCE=https://tensorflow-aws.s3-us-west-2.amazonaws.com/${TFS_SHORT_VERSION}/Serving/GPU/tensorflow_model_server 13 | 14 | # See http://bugs.python.org/issue19846 15 | ENV LANG=C.UTF-8 16 | ENV NCCL_VERSION=2.4.7-1+cuda10.0 17 | ENV CUDNN_VERSION=7.5.1.10-1+cuda10.0 18 | ENV TF_TENSORRT_VERSION=5.0.2 19 | ENV TF_TENSORRT_LIB_VERSION=5.1.2 20 | ENV PYTHONDONTWRITEBYTECODE=1 21 | # Python won’t try to write .pyc or .pyo files on the import of source modules 22 | ENV PYTHONUNBUFFERED=1 23 | ENV SAGEMAKER_TFS_VERSION="${TFS_SHORT_VERSION}" 24 | ENV PATH="$PATH:/sagemaker" 25 | ENV MODEL_BASE_PATH=/models 26 | # The only required piece is the model name in order to differentiate endpoints 27 | ENV MODEL_NAME=model 28 | # Prevent docker build from getting stopped by request for user interaction 29 | ENV DEBIAN_FRONTEND=noninteractive 30 | 31 | # https://forums.developer.nvidia.com/t/notice-cuda-linux-repository-key-rotation/212771 32 | # Fix cuda repo's GPG key. Nvidia is no longer updating the machine-learning repo. 33 | # Need to manually pull and install necessary debs to continue using these versions. 34 | RUN rm /etc/apt/sources.list.d/cuda.list \ 35 | && apt-key del 7fa2af80 \ 36 | && apt-get update && apt-get install -y --no-install-recommends wget \ 37 | && wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64/cuda-keyring_1.0-1_all.deb \ 38 | && dpkg -i cuda-keyring_1.0-1_all.deb \ 39 | && wget https://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1804/x86_64/libcudnn7_${CUDNN_VERSION}_amd64.deb \ 40 | && dpkg -i libcudnn7_${CUDNN_VERSION}_amd64.deb \ 41 | && wget https://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1804/x86_64/libnccl2_${NCCL_VERSION}_amd64.deb \ 42 | && dpkg -i libnccl2_${NCCL_VERSION}_amd64.deb \ 43 | && rm *.deb 44 | 45 | RUN apt-get update \ 46 | && apt-get install -y --no-install-recommends \ 47 | ca-certificates \ 48 | cuda-command-line-tools-10-0 \ 49 | cuda-cublas-10-0 \ 50 | cuda-cufft-10-0 \ 51 | cuda-curand-10-0 \ 52 | cuda-cusolver-10-0 \ 53 | cuda-cusparse-10-0 \ 54 | libgomp1 \ 55 | curl \ 56 | git \ 57 | wget \ 58 | vim \ 59 | python3 \ 60 | python3-pip \ 61 | python3-setuptools \ 62 | && apt-get clean \ 63 | && rm -rf /var/lib/apt/lists/* 64 | 65 | RUN ${PIP} --no-cache-dir install --upgrade \ 66 | pip \ 67 | setuptools 68 | 69 | # Some TF tools expect a "python" binary 70 | RUN ln -s $(which ${PYTHON}) /usr/local/bin/python \ 71 | && ln -s /usr/local/bin/pip3 /usr/bin/pip 72 | 73 | # nginx + njs 74 | RUN apt-get update \ 75 | && apt-get -y install --no-install-recommends \ 76 | curl \ 77 | gnupg2 \ 78 | && curl -s http://nginx.org/keys/nginx_signing.key | apt-key add - \ 79 | && echo 'deb http://nginx.org/packages/ubuntu/ bionic nginx' >> /etc/apt/sources.list \ 80 | && apt-get update \ 81 | && apt-get -y install --no-install-recommends \ 82 | nginx \ 83 | nginx-module-njs \ 84 | && apt-get clean \ 85 | && rm -rf /var/lib/apt/lists/* 86 | 87 | # cython, falcon, gunicorn, grpc 88 | RUN ${PIP} install -U --no-cache-dir \ 89 | boto3 \ 90 | awscli==1.18.34 \ 91 | pyYAML==5.3.1 \ 92 | cython==0.29.12 \ 93 | falcon==2.0.0 \ 94 | gunicorn==19.9.0 \ 95 | gevent==1.4.0 \ 96 | requests==2.22.0 \ 97 | grpcio==1.24.1 \ 98 | protobuf==3.10.0 \ 99 | # using --no-dependencies to avoid installing tensorflow binary 100 | && ${PIP} install --no-dependencies --no-cache-dir \ 101 | tensorflow-serving-api-gpu==1.15.0 102 | 103 | # https://forums.developer.nvidia.com/t/notice-cuda-linux-repository-key-rotation/212771 104 | # Fix cuda repo's GPG key. Nvidia is no longer updating the machine-learning repo. 105 | # Need to manually pull and install necessary debs to continue using these versions. 106 | RUN wget https://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1804/x86_64/nvinfer-runtime-trt-repo-ubuntu1804-${TF_TENSORRT_VERSION}-ga-cuda10.0_1-1_amd64.deb \ 107 | && dpkg -i nvinfer-runtime-trt-repo-ubuntu1804-${TF_TENSORRT_VERSION}-ga-cuda10.0_1-1_amd64.deb \ 108 | && wget https://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1804/x86_64/libnvinfer5_${TF_TENSORRT_LIB_VERSION}-1+cuda10.0_amd64.deb \ 109 | && dpkg -i libnvinfer5_${TF_TENSORRT_LIB_VERSION}-1+cuda10.0_amd64.deb \ 110 | && rm *.deb \ 111 | && rm -rf /var/lib/apt/lists/* \ 112 | && rm /usr/lib/x86_64-linux-gnu/libnvinfer_plugin* \ 113 | && rm /usr/lib/x86_64-linux-gnu/libnvcaffe_parser* \ 114 | && rm /usr/lib/x86_64-linux-gnu/libnvparsers* 115 | 116 | COPY sagemaker /sagemaker 117 | 118 | RUN curl ${TF_MODEL_SERVER_SOURCE} -o /usr/bin/tensorflow_model_server \ 119 | && chmod 555 /usr/bin/tensorflow_model_server 120 | 121 | # Expose gRPC and REST port 122 | EXPOSE 8500 8501 123 | 124 | # Set where models should be stored in the container 125 | RUN mkdir -p ${MODEL_BASE_PATH} 126 | 127 | # Create a script that runs the model server so we can use environment variables 128 | # while also passing in arguments from the docker command line 129 | RUN echo '#!/bin/bash \n\n' > /usr/bin/tf_serving_entrypoint.sh \ 130 | && echo '/usr/bin/tensorflow_model_server --port=8500 --rest_api_port=8501 --model_name=${MODEL_NAME} --model_base_path=${MODEL_BASE_PATH}/${MODEL_NAME} "$@"' >> /usr/bin/tf_serving_entrypoint.sh \ 131 | && chmod +x /usr/bin/tf_serving_entrypoint.sh 132 | 133 | ADD https://raw.githubusercontent.com/aws/aws-deep-learning-containers-utils/master/deep_learning_container.py /usr/local/bin/deep_learning_container.py 134 | 135 | RUN chmod +x /usr/local/bin/deep_learning_container.py 136 | 137 | RUN curl https://aws-dlc-licenses.s3.amazonaws.com/tensorflow/license.txt -o /license.txt 138 | 139 | CMD ["/usr/bin/tf_serving_entrypoint.sh"] 140 | -------------------------------------------------------------------------------- /test/unit/test_deep_learning_container.py: -------------------------------------------------------------------------------- 1 | # Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the 'License'). You 4 | # may not use this file except in compliance with the License. A copy of 5 | # the License is located at 6 | # 7 | # http://aws.amazon.com/apache2.0/ 8 | # 9 | # or in the 'license' file accompanying this file. This file is 10 | # distributed on an 'AS IS' BASIS, WITHOUT WARRANTIES OR CONDITIONS OF 11 | # ANY KIND, either express or implied. See the License for the specific 12 | # language governing permissions and limitations under the License. 13 | from __future__ import absolute_import 14 | 15 | import unittest 16 | 17 | from docker.build_artifacts import deep_learning_container as deep_learning_container_to_test 18 | import pytest 19 | import requests 20 | 21 | 22 | @pytest.fixture(name='fixture_valid_instance_id') 23 | def fixture_valid_instance_id(requests_mock): 24 | return requests_mock.get('http://169.254.169.254/latest/meta-data/instance-id', 25 | text='i-123t32e11s32t1231') 26 | 27 | 28 | @pytest.fixture(name='fixture_invalid_instance_id') 29 | def fixture_invalid_instance_id(requests_mock): 30 | return requests_mock.get('http://169.254.169.254/latest/meta-data/instance-id', text='i-123') 31 | 32 | 33 | @pytest.fixture(name='fixture_none_instance_id') 34 | def fixture_none_instance_id(requests_mock): 35 | return requests_mock.get('http://169.254.169.254/latest/meta-data/instance-id', text=None) 36 | 37 | 38 | @pytest.fixture(name='fixture_invalid_region') 39 | def fixture_invalid_region(requests_mock): 40 | return requests_mock.get('http://169.254.169.254/latest/dynamic/instance-identity/document', 41 | json={'region': 'test'}) 42 | 43 | 44 | @pytest.fixture(name='fixture_valid_region') 45 | def fixture_valid_region(requests_mock): 46 | return requests_mock.get('http://169.254.169.254/latest/dynamic/instance-identity/document', 47 | json={'region': 'us-east-1'}) 48 | 49 | 50 | def test_retrieve_instance_id(fixture_valid_instance_id): 51 | result = deep_learning_container_to_test._retrieve_instance_id() 52 | assert 'i-123t32e11s32t1231' == result 53 | 54 | 55 | def test_retrieve_none_instance_id(fixture_none_instance_id): 56 | result = deep_learning_container_to_test._retrieve_instance_id() 57 | assert result is None 58 | 59 | 60 | def test_retrieve_invalid_instance_id(fixture_invalid_instance_id): 61 | result = deep_learning_container_to_test._retrieve_instance_id() 62 | assert result is None 63 | 64 | 65 | def test_retrieve_invalid_region(fixture_invalid_region): 66 | result = deep_learning_container_to_test._retrieve_instance_region() 67 | assert result is None 68 | 69 | 70 | def test_retrieve_valid_region(fixture_valid_region): 71 | result = deep_learning_container_to_test._retrieve_instance_region() 72 | assert 'us-east-1' == result 73 | 74 | 75 | def test_query_bucket(requests_mock, fixture_valid_region, fixture_valid_instance_id): 76 | fixture_valid_instance_id.return_value = 'i-123t32e11s32t1231' 77 | fixture_valid_region.return_value = 'us-east-1' 78 | requests_mock.get(('https://aws-deep-learning-containers-us-east-1.s3.us-east-1.amazonaws.com' 79 | '/dlc-containers.txt?x-instance-id=i-123t32e11s32t1231'), 80 | text='Access Denied') 81 | actual_response = deep_learning_container_to_test.query_bucket() 82 | assert 'Access Denied' == actual_response.text 83 | 84 | 85 | def test_query_bucket_region_none(fixture_invalid_region, fixture_valid_instance_id): 86 | fixture_valid_instance_id.return_value = 'i-123t32e11s32t1231' 87 | fixture_invalid_region.return_value = None 88 | actual_response = deep_learning_container_to_test.query_bucket() 89 | assert actual_response is None 90 | 91 | 92 | def test_query_bucket_instance_id_none(requests_mock, fixture_valid_region, fixture_none_instance_id): 93 | fixture_none_instance_id.return_value = None 94 | fixture_valid_region.return_value = 'us-east-1' 95 | actual_response = deep_learning_container_to_test.query_bucket() 96 | assert actual_response is None 97 | 98 | 99 | def test_query_bucket_instance_id_invalid(requests_mock, fixture_valid_region, fixture_invalid_instance_id): 100 | fixture_invalid_instance_id.return_value = None 101 | fixture_valid_region.return_value = 'us-east-1' 102 | actual_response = deep_learning_container_to_test.query_bucket() 103 | assert actual_response is None 104 | 105 | 106 | def test_HTTP_error_on_S3(requests_mock, fixture_valid_region, fixture_valid_instance_id): 107 | fixture_valid_instance_id.return_value = 'i-123t32e11s32t1231' 108 | fixture_valid_region.return_value = 'us-east-1' 109 | query_s3_url = ('https://aws-deep-learning-containers-us-east-1.s3.us-east-1.amazonaws.com' 110 | '/dlc-containers.txt?x-instance-id=i-123t32e11s32t1231') 111 | 112 | requests_mock.get( 113 | query_s3_url, 114 | exc=requests.exceptions.HTTPError) 115 | requests_mock.side_effect = requests.exceptions.HTTPError 116 | 117 | with pytest.raises(requests.exceptions.HTTPError): 118 | actual_response = requests.get(query_s3_url) 119 | assert actual_response is None 120 | 121 | 122 | def test_connection_error_on_S3(requests_mock, fixture_valid_region, fixture_valid_instance_id): 123 | fixture_valid_instance_id.return_value = 'i-123t32e11s32t1231' 124 | fixture_valid_region.return_value = 'us-east-1' 125 | query_s3_url = ('https://aws-deep-learning-containers-us-east-1.s3.us-east-1.amazonaws.com' 126 | '/dlc-containers.txt?x-instance-id=i-123t32e11s32t1231') 127 | 128 | requests_mock.get( 129 | query_s3_url, 130 | exc=requests.exceptions.ConnectionError) 131 | 132 | with pytest.raises(requests.exceptions.ConnectionError): 133 | actual_response = requests.get( 134 | query_s3_url) 135 | 136 | assert actual_response is None 137 | 138 | 139 | def test_timeout_error_on_S3(requests_mock, fixture_valid_region, fixture_valid_instance_id): 140 | fixture_valid_instance_id.return_value = 'i-123t32e11s32t1231' 141 | fixture_valid_region.return_value = 'us-east-1' 142 | query_s3_url = ('https://aws-deep-learning-containers-us-east-1.s3.us-east-1.amazonaws.com' 143 | '/dlc-containers.txt?x-instance-id=i-123t32e11s32t1231') 144 | 145 | requests_mock.get( 146 | query_s3_url, 147 | exc=requests.Timeout) 148 | 149 | with pytest.raises(requests.exceptions.Timeout): 150 | actual_response = requests.get( 151 | query_s3_url) 152 | 153 | assert actual_response is None 154 | 155 | 156 | if __name__ == '__main__': 157 | unittest.main() 158 | -------------------------------------------------------------------------------- /docker/build_artifacts/sagemaker/tensorflowServing.js: -------------------------------------------------------------------------------- 1 | var tfs_base_uri = '/tfs/v1/models/' 2 | var custom_attributes_header = 'X-Amzn-SageMaker-Custom-Attributes' 3 | 4 | function invocations(r) { 5 | var ct = r.headersIn['Content-Type'] 6 | 7 | if ('application/json' == ct || 'application/jsonlines' == ct || 'application/jsons' == ct) { 8 | json_request(r) 9 | } else if ('text/csv' == ct) { 10 | csv_request(r) 11 | } else { 12 | return_error(r, 415, 'Unsupported Media Type: ' + (ct || 'Unknown')) 13 | } 14 | } 15 | 16 | function ping(r) { 17 | var uri = make_tfs_uri(r, false) 18 | 19 | function callback (reply) { 20 | if (reply.status == 200 && reply.responseBody.includes('"AVAILABLE"')) { 21 | r.return(200) 22 | } else { 23 | r.error('failed ping' + reply.responseBody) 24 | r.return(502) 25 | } 26 | } 27 | 28 | r.subrequest(uri, callback) 29 | } 30 | 31 | function ping_without_model(r) { 32 | // hack for TF 1.11 and MME 33 | // for TF 1.11, send an arbitrary fixed request to the default model. 34 | // if response is 400, the model is ok (but input was bad), so return 200 35 | // for MME, the default model name is None and does not exist 36 | // also return 200 in unlikely case our request was really valid 37 | 38 | var uri = make_tfs_uri(r, true) 39 | var options = { 40 | method: 'POST', 41 | body: '{"instances": "invalid"}' 42 | } 43 | 44 | function callback (reply) { 45 | if (reply.status == 200 || reply.status == 400 || 46 | reply.responseBody.includes('Servable not found for request: Latest(None)')) { 47 | r.return(200) 48 | } else { 49 | r.error('failed ping' + reply.responseBody) 50 | r.return(502) 51 | } 52 | } 53 | 54 | r.subrequest(uri, options, callback) 55 | } 56 | 57 | function return_error(r, code, message) { 58 | if (message) { 59 | r.return(code, '{"error": "' + message + '"}') 60 | } else { 61 | r.return(code) 62 | } 63 | } 64 | 65 | function tfs_json_request(r, json) { 66 | var uri = make_tfs_uri(r, true) 67 | var options = { 68 | method: 'POST', 69 | body: json 70 | } 71 | 72 | var accept = r.headersIn.Accept 73 | function callback (reply) { 74 | var body = reply.responseBody 75 | if (reply.status == 400) { 76 | // "fix" broken json escaping in \'instances\' message 77 | body = body.replace("\\'instances\\'", "'instances'") 78 | } 79 | 80 | if (accept != undefined) { 81 | var content_types = accept.trim().replace(" ", "").split(",") 82 | if (content_types.includes('application/jsonlines') || content_types.includes('application/json')) { 83 | body = body.replace(/\n/g, '') 84 | r.headersOut['Content-Type'] = content_types[0] 85 | } 86 | } 87 | r.return(reply.status, body) 88 | } 89 | 90 | r.subrequest(uri, options, callback) 91 | 92 | } 93 | 94 | function make_tfs_uri(r, with_method) { 95 | var attributes = parse_custom_attributes(r) 96 | 97 | var uri = tfs_base_uri + attributes['tfs-model-name'] 98 | if ('tfs-model-version' in attributes) { 99 | uri += '/versions/' + attributes['tfs-model-version'] 100 | } 101 | 102 | if (with_method) { 103 | uri += ':' + (attributes['tfs-method'] || 'predict') 104 | } 105 | 106 | return uri 107 | } 108 | 109 | function parse_custom_attributes(r) { 110 | var attributes = {} 111 | var kv_pattern = /tfs-[a-z\-]+=[^,]+/g 112 | var header = r.headersIn[custom_attributes_header] 113 | if (header) { 114 | var matches = header.match(kv_pattern) 115 | if (matches) { 116 | for (var i = 0; i < matches.length; i++) { 117 | var kv = matches[i].split('=') 118 | if (kv.length === 2) { 119 | attributes[kv[0]] = kv[1] 120 | } 121 | } 122 | } 123 | } 124 | 125 | // for MME invocations, tfs-model-name is in the uri, or use default_tfs_model 126 | if (!attributes['tfs-model-name']) { 127 | var uri_pattern = /\/models\/[^,]+\/invoke/g 128 | var model_name = r.uri.match(uri_pattern) 129 | if (model_name[0]) { 130 | model_name = r.uri.replace('/models/', '').replace('/invoke', '') 131 | attributes['tfs-model-name'] = model_name 132 | } else { 133 | attributes['tfs-model-name'] = r.variables.default_tfs_model 134 | } 135 | } 136 | 137 | return attributes 138 | } 139 | 140 | function json_request(r) { 141 | var data = r.requestBody 142 | 143 | if (is_tfs_json(data)) { 144 | tfs_json_request(r, data) 145 | } else if (is_json_lines(data)) { 146 | json_lines_request(r, data) 147 | } else { 148 | generic_json_request(r, data) 149 | } 150 | } 151 | 152 | function is_tfs_json(data) { 153 | return /"(instances|inputs|examples)"\s*:/.test(data) 154 | } 155 | 156 | function is_json_lines(data) { 157 | // objects separated only by (optional) whitespace means jsons/json-lines 158 | return /[}\]]\s*[\[{]/.test(data) 159 | } 160 | 161 | function generic_json_request(r, data) { 162 | if (! /^\s*\[\s*\[/.test(data)) { 163 | data = '[' + data + ']' 164 | } 165 | 166 | var json = '{"instances":' + data + '}' 167 | tfs_json_request(r, json) 168 | } 169 | 170 | function json_lines_request(r, data) { 171 | var lines = data.trim().split(/\r?\n/) 172 | var builder = [] 173 | builder.push('{"instances":') 174 | if (lines.length != 1) { 175 | builder.push('[') 176 | } 177 | 178 | for (var i = 0; i < lines.length; i++) { 179 | var line = lines[i].trim() 180 | if (line) { 181 | var instance = (i == 0) ? '' : ',' 182 | instance += line 183 | builder.push(instance) 184 | } 185 | } 186 | 187 | builder.push(lines.length == 1 ? '}' : ']}') 188 | tfs_json_request(r, builder.join('')) 189 | } 190 | 191 | function csv_request(r) { 192 | var data = r.requestBody 193 | // look for initial quote or numeric-only data in 1st field 194 | var needs_quotes = data.search(/^\s*("|[\d.Ee+\-]+.*)/) != 0 195 | var lines = data.trim().split(/\r?\n/) 196 | var builder = [] 197 | builder.push('{"instances":[') 198 | 199 | for (var i = 0; i < lines.length; i++) { 200 | var line = lines[i].trim() 201 | if (line) { 202 | var line_builder = [] 203 | // Only wrap line in brackets if there are multiple columns. 204 | // If there's only one column and it has a string with a comma, 205 | // the input will be wrapped in an extra set of brackets. 206 | var has_multiple_columns = line.search(',') != -1 207 | 208 | if (has_multiple_columns) { 209 | line_builder.push('[') 210 | } 211 | 212 | if (needs_quotes) { 213 | line_builder.push('"') 214 | line_builder.push(line.replace('"', '\\"').replace(',', '","')) 215 | line_builder.push('"') 216 | } else { 217 | line_builder.push(line) 218 | } 219 | 220 | if (has_multiple_columns) { 221 | line_builder.push(']') 222 | } 223 | 224 | var json_line = line_builder.join('') 225 | builder.push(json_line) 226 | 227 | if (i != lines.length - 1) 228 | builder.push(',') 229 | } 230 | } 231 | 232 | builder.push(']}') 233 | tfs_json_request(r, builder.join('')) 234 | } 235 | 236 | export default {invocations, ping, ping_without_model, return_error, 237 | tfs_json_request, make_tfs_uri, parse_custom_attributes, 238 | json_request, is_tfs_json, is_json_lines, generic_json_request, 239 | json_lines_request, csv_request}; 240 | -------------------------------------------------------------------------------- /test/integration/local/test_multi_model_endpoint.py: -------------------------------------------------------------------------------- 1 | # Copyright 2019-2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"). You 4 | # may not use this file except in compliance with the License. A copy of 5 | # the License is located at 6 | # 7 | # http://aws.amazon.com/apache2.0/ 8 | # 9 | # or in the "license" file accompanying this file. This file is 10 | # distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF 11 | # ANY KIND, either express or implied. See the License for the specific 12 | # language governing permissions and limitations under the License. 13 | 14 | import json 15 | import os 16 | import subprocess 17 | import sys 18 | import time 19 | 20 | import pytest 21 | import requests 22 | 23 | from multi_model_endpoint_test_utils import ( 24 | make_invocation_request, 25 | make_list_model_request, 26 | make_load_model_request, 27 | make_unload_model_request, 28 | ) 29 | 30 | PING_URL = "http://localhost:8080/ping" 31 | 32 | 33 | @pytest.fixture(scope="session", autouse=True) 34 | def volume(): 35 | try: 36 | model_dir = os.path.abspath("test/resources/mme") 37 | subprocess.check_call( 38 | "docker volume create --name dynamic_endpoint_model_volume --opt type=none " 39 | "--opt device={} --opt o=bind".format(model_dir).split()) 40 | yield model_dir 41 | finally: 42 | subprocess.check_call("docker volume rm dynamic_endpoint_model_volume".split()) 43 | 44 | 45 | @pytest.fixture(scope="module", autouse=True) 46 | def container(request, docker_base_name, tag, runtime_config): 47 | try: 48 | command = ( 49 | "docker run {}--name sagemaker-tensorflow-serving-test -p 8080:8080" 50 | " --mount type=volume,source=dynamic_endpoint_model_volume,target=/opt/ml/models,readonly" 51 | " -e SAGEMAKER_TFS_NGINX_LOGLEVEL=info" 52 | " -e SAGEMAKER_BIND_TO_PORT=8080" 53 | " -e SAGEMAKER_SAFE_PORT_RANGE=9000-9999" 54 | " -e SAGEMAKER_MULTI_MODEL=true" 55 | " {}:{} serve" 56 | ).format(runtime_config, docker_base_name, tag) 57 | 58 | proc = subprocess.Popen(command.split(), stdout=sys.stdout, stderr=subprocess.STDOUT) 59 | 60 | attempts = 0 61 | while attempts < 40: 62 | time.sleep(3) 63 | try: 64 | res_code = requests.get("http://localhost:8080/ping").status_code 65 | if res_code == 200: 66 | break 67 | except: 68 | attempts += 1 69 | pass 70 | 71 | yield proc.pid 72 | finally: 73 | subprocess.check_call("docker rm -f sagemaker-tensorflow-serving-test".split()) 74 | 75 | 76 | @pytest.mark.skip_gpu 77 | def test_ping(): 78 | res = requests.get(PING_URL) 79 | assert res.status_code == 200 80 | 81 | 82 | @pytest.mark.skip_gpu 83 | def test_container_start_invocation_fail(): 84 | x = { 85 | "instances": [1.0, 2.0, 5.0] 86 | } 87 | code, y = make_invocation_request(json.dumps(x), "half_plus_three") 88 | y = json.loads(y) 89 | assert code == 404 90 | assert "Model half_plus_three is not loaded yet." in str(y) 91 | 92 | 93 | @pytest.mark.skip_gpu 94 | def test_list_models_empty(): 95 | code, res = make_list_model_request() 96 | res = json.loads(res) 97 | assert code == 200 98 | assert len(res) == 0 99 | 100 | 101 | @pytest.mark.skip_gpu 102 | def test_delete_unloaded_model(): 103 | # unloads the given model/version, no-op if not loaded 104 | model_name = "non-existing-model" 105 | code, res = make_unload_model_request(model_name) 106 | assert code == 404 107 | assert "Model {} is not loaded yet".format(model_name) in res 108 | 109 | 110 | @pytest.mark.skip_gpu 111 | def test_delete_model(): 112 | model_name = "half_plus_two" 113 | model_data = { 114 | "model_name": model_name, 115 | "url": "/opt/ml/models/half_plus_two" 116 | } 117 | code, res = make_load_model_request(json.dumps(model_data)) 118 | assert code == 200 119 | assert "Successfully loaded model {}".format(model_name) in res 120 | 121 | x = { 122 | "instances": [1.0, 2.0, 5.0] 123 | } 124 | _, y = make_invocation_request(json.dumps(x), model_name) 125 | y = json.loads(y) 126 | assert y == {"predictions": [2.5, 3.0, 4.5]} 127 | 128 | code_unload, res2 = make_unload_model_request(model_name) 129 | assert code_unload == 200 130 | 131 | code_invoke, y2 = make_invocation_request(json.dumps(x), model_name) 132 | y2 = json.loads(y2) 133 | assert code_invoke == 404 134 | assert "Model {} is not loaded yet.".format(model_name) in str(y2) 135 | 136 | 137 | @pytest.mark.skip_gpu 138 | def test_load_two_models(): 139 | model_name_1 = "half_plus_two" 140 | model_data_1 = { 141 | "model_name": model_name_1, 142 | "url": "/opt/ml/models/half_plus_two" 143 | } 144 | code1, res1 = make_load_model_request(json.dumps(model_data_1)) 145 | assert code1 == 200 146 | assert "Successfully loaded model {}".format(model_name_1) in res1 147 | 148 | # load second model 149 | model_name_2 = "half_plus_three" 150 | model_data_2 = { 151 | "model_name": model_name_2, 152 | "url": "/opt/ml/models/half_plus_three" 153 | } 154 | code2, res2 = make_load_model_request(json.dumps(model_data_2)) 155 | assert code2 == 200 156 | assert "Successfully loaded model {}".format(model_name_2) in res2 157 | 158 | # make invocation request to the first model 159 | x = { 160 | "instances": [1.0, 2.0, 5.0] 161 | } 162 | code_invoke1, y1 = make_invocation_request(json.dumps(x), model_name_1) 163 | y1 = json.loads(y1) 164 | assert code_invoke1 == 200 165 | assert y1 == {"predictions": [2.5, 3.0, 4.5]} 166 | 167 | # make invocation request to the second model 168 | for ver in ("123", "124"): 169 | code_invoke2, y2 = make_invocation_request(json.dumps(x), "half_plus_three", version=ver) 170 | y2 = json.loads(y2) 171 | assert code_invoke2 == 200 172 | assert y2 == {"predictions": [3.5, 4.0, 5.5]} 173 | 174 | code_list, res3 = make_list_model_request() 175 | res3 = json.loads(res3) 176 | assert len(res3) == 2 177 | 178 | 179 | @pytest.mark.skip_gpu 180 | def test_load_one_model_two_times(): 181 | model_name = "cifar" 182 | model_data = { 183 | "model_name": model_name, 184 | "url": "/opt/ml/models/cifar" 185 | } 186 | code_load, res = make_load_model_request(json.dumps(model_data)) 187 | assert code_load == 200 188 | assert "Successfully loaded model {}".format(model_name) in res 189 | 190 | code_load2, res2 = make_load_model_request(json.dumps(model_data)) 191 | assert code_load2 == 409 192 | assert "Model {} is already loaded".format(model_name) in res2 193 | 194 | 195 | @pytest.mark.skip_gpu 196 | def test_load_non_existing_model(): 197 | model_name = "non-existing" 198 | base_path = "/opt/ml/models/non-existing" 199 | model_data = { 200 | "model_name": model_name, 201 | "url": base_path 202 | } 203 | code, res = make_load_model_request(json.dumps(model_data)) 204 | assert code == 404 205 | assert "Could not find valid base path {} for servable {}".format(base_path, model_name) in str(res) 206 | 207 | 208 | @pytest.mark.skip_gpu 209 | def test_bad_model_reqeust(): 210 | bad_model_data = { 211 | "model_name": "model_name", 212 | "uri": "/opt/ml/models/non-existing" 213 | } 214 | code, _ = make_load_model_request(json.dumps(bad_model_data)) 215 | assert code == 500 216 | 217 | 218 | @pytest.mark.skip_gpu 219 | def test_invalid_model_version(): 220 | model_name = "invalid_version" 221 | base_path = "/opt/ml/models/invalid_version" 222 | invalid_model_version_data = { 223 | "model_name": model_name, 224 | "url": base_path 225 | } 226 | code, res = make_load_model_request(json.dumps(invalid_model_version_data)) 227 | assert code == 404 228 | assert "Could not find valid base path {} for servable {}".format(base_path, model_name) in str(res) 229 | --------------------------------------------------------------------------------