├── .gitignore ├── LICENSE ├── README.md ├── ei-tensorflow-resnet50.ipynb ├── gpu-tf-tensorrt-resnet50.ipynb ├── inf1-neuron-sdk-resnet50.ipynb ├── kitten.jpg └── sagemaker-tf-cpu-gpu-ei-resnet50.ipynb /.gitignore: -------------------------------------------------------------------------------- 1 | .ipynb_checkpoints/ 2 | 3 | # Byte-compiled / optimized / DLL files 4 | __pycache__/ 5 | *.py[cod] 6 | *$py.class 7 | 8 | # C extensions 9 | *.so 10 | 11 | # Distribution / packaging 12 | .Python 13 | build/ 14 | develop-eggs/ 15 | dist/ 16 | downloads/ 17 | eggs/ 18 | .eggs/ 19 | lib/ 20 | lib64/ 21 | parts/ 22 | sdist/ 23 | var/ 24 | wheels/ 25 | pip-wheel-metadata/ 26 | share/python-wheels/ 27 | *.egg-info/ 28 | .installed.cfg 29 | *.egg 30 | MANIFEST 31 | 32 | # PyInstaller 33 | # Usually these files are written by a python script from a template 34 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 35 | *.manifest 36 | *.spec 37 | 38 | # Installer logs 39 | pip-log.txt 40 | pip-delete-this-directory.txt 41 | 42 | # Unit test / coverage reports 43 | htmlcov/ 44 | .tox/ 45 | .nox/ 46 | .coverage 47 | .coverage.* 48 | .cache 49 | nosetests.xml 50 | coverage.xml 51 | *.cover 52 | *.py,cover 53 | .hypothesis/ 54 | .pytest_cache/ 55 | 56 | # Translations 57 | *.mo 58 | *.pot 59 | 60 | # Django stuff: 61 | *.log 62 | local_settings.py 63 | db.sqlite3 64 | db.sqlite3-journal 65 | 66 | # Flask stuff: 67 | instance/ 68 | .webassets-cache 69 | 70 | # Scrapy stuff: 71 | .scrapy 72 | 73 | # Sphinx documentation 74 | docs/_build/ 75 | 76 | # PyBuilder 77 | target/ 78 | 79 | # Jupyter Notebook 80 | .ipynb_checkpoints 81 | 82 | # IPython 83 | profile_default/ 84 | ipython_config.py 85 | 86 | # pyenv 87 | .python-version 88 | 89 | # pipenv 90 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 91 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 92 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 93 | # install all needed dependencies. 94 | #Pipfile.lock 95 | 96 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 97 | __pypackages__/ 98 | 99 | # Celery stuff 100 | celerybeat-schedule 101 | celerybeat.pid 102 | 103 | # SageMath parsed files 104 | *.sage.py 105 | 106 | # Environments 107 | .env 108 | .venv 109 | env/ 110 | venv/ 111 | ENV/ 112 | env.bak/ 113 | venv.bak/ 114 | 115 | # Spyder project settings 116 | .spyderproject 117 | .spyproject 118 | 119 | # Rope project settings 120 | .ropeproject 121 | 122 | # mkdocs documentation 123 | /site 124 | 125 | # mypy 126 | .mypy_cache/ 127 | .dmypy.json 128 | dmypy.json 129 | 130 | # Pyre type checker 131 | .pyre/ 132 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # This repository contains examples for deep learning inference deployment using AI accelerators: 2 | #### * Amazon EC2 G4 instances with NVIDIA T4 GPUs and NVIDIA TensorRT 3 | #### * Amazon EC2 Inf1 instances with AWS Inferentia and AWS Neuron SDK 4 | #### * Amazon EC2 CPU instances with Amazon Elastic Inference 5 | #### * Amazon SageMaker deployment hosting for CPUs, GPUs and AWS Inferentia -------------------------------------------------------------------------------- /ei-tensorflow-resnet50.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Amazon Elastic Inference (EI) inference on Amazon EC2 CPU instance\n", 8 | "This example demonstrates Amazon Elastic Inference with Amazon EI enabled TensorFlow\n", 9 | "\n", 10 | "This example was tested on Amazon EC2 `c5.2xlarge` the following AWS Deep Learning AMI: \n", 11 | "`Deep Learning AMI (Ubuntu 18.04) Version 35.0`\n", 12 | "\n", 13 | "Run this notebook using the following conda environment:\n", 14 | "`amazonei_tensorflow2_p36`\n", 15 | "\n", 16 | "Prepare your imagenet validation TFRecord files using the following helper script:\n", 17 | "https://github.com/tensorflow/models/blob/archive/research/inception/inception/data/download_and_preprocess_imagenet.sh\n", 18 | "\n", 19 | "Save it to `/home/ubuntu/datasets/` or update the dataset location in the `get_dataset()` function" 20 | ] 21 | }, 22 | { 23 | "cell_type": "code", 24 | "execution_count": 1, 25 | "metadata": {}, 26 | "outputs": [], 27 | "source": [ 28 | "# !pip install matplotlib pandas" 29 | ] 30 | }, 31 | { 32 | "cell_type": "code", 33 | "execution_count": 2, 34 | "metadata": {}, 35 | "outputs": [ 36 | { 37 | "data": { 38 | "text/plain": [ 39 | "'2.0.2'" 40 | ] 41 | }, 42 | "execution_count": 2, 43 | "metadata": {}, 44 | "output_type": "execute_result" 45 | } 46 | ], 47 | "source": [ 48 | "import tensorflow as tf\n", 49 | "from tensorflow import keras\n", 50 | "from tensorflow.keras.applications.resnet50 import ResNet50\n", 51 | "from tensorflow.keras.preprocessing import image\n", 52 | "from tensorflow.keras.applications.resnet50 import preprocess_input, decode_predictions\n", 53 | "from ei_for_tf.python.predictor.ei_predictor import EIPredictor\n", 54 | "import numpy as np\n", 55 | "import pandas as pd\n", 56 | "import shutil\n", 57 | "import requests\n", 58 | "import time\n", 59 | "import json\n", 60 | "import os\n", 61 | "import boto3\n", 62 | "tf.__version__" 63 | ] 64 | }, 65 | { 66 | "cell_type": "code", 67 | "execution_count": 3, 68 | "metadata": {}, 69 | "outputs": [], 70 | "source": [ 71 | "# https://github.com/tensorflow/tensorflow/issues/29931\n", 72 | "temp = tf.zeros([8, 224, 224, 3])\n", 73 | "_ = tf.keras.applications.resnet50.preprocess_input(temp)" 74 | ] 75 | }, 76 | { 77 | "cell_type": "code", 78 | "execution_count": 4, 79 | "metadata": {}, 80 | "outputs": [ 81 | { 82 | "name": "stdout", 83 | "output_type": "stream", 84 | "text": [ 85 | "[\n", 86 | " {\n", 87 | " \"acceleratorHealth\": {\n", 88 | " \"status\": \"Ok\"\n", 89 | " },\n", 90 | " \"acceleratorType\": \"eia2.large\",\n", 91 | " \"acceleratorId\": \"eia-63a6cf28f02841469c58055bff078a95\",\n", 92 | " \"availabilityZone\": \"us-west-2a\",\n", 93 | " \"attachedResource\": \"arn:aws:ec2:us-west-2:453691756499:instance/i-00487fc33ad7ef5eb\"\n", 94 | " },\n", 95 | " {\n", 96 | " \"acceleratorHealth\": {\n", 97 | " \"status\": \"Ok\"\n", 98 | " },\n", 99 | " \"acceleratorType\": \"eia2.xlarge\",\n", 100 | " \"acceleratorId\": \"eia-ef9561df7dd74b308ecefbd8b362ca69\",\n", 101 | " \"availabilityZone\": \"us-west-2a\",\n", 102 | " \"attachedResource\": \"arn:aws:ec2:us-west-2:453691756499:instance/i-00487fc33ad7ef5eb\"\n", 103 | " }\n", 104 | "]\n" 105 | ] 106 | } 107 | ], 108 | "source": [ 109 | "results = None\n", 110 | "batch_size = 8\n", 111 | "\n", 112 | "ei_client = boto3.client('elastic-inference')\n", 113 | "print(json.dumps(ei_client.describe_accelerators()['acceleratorSet'], indent=1))" 114 | ] 115 | }, 116 | { 117 | "cell_type": "code", 118 | "execution_count": 5, 119 | "metadata": {}, 120 | "outputs": [], 121 | "source": [ 122 | "def load_save_resnet50_model(saved_model_dir = 'resnet50_saved_model'):\n", 123 | " model = ResNet50(weights='imagenet')\n", 124 | " shutil.rmtree(saved_model_dir, ignore_errors=True)\n", 125 | " model.save(saved_model_dir, include_optimizer=False, save_format='tf')" 126 | ] 127 | }, 128 | { 129 | "cell_type": "code", 130 | "execution_count": 6, 131 | "metadata": {}, 132 | "outputs": [], 133 | "source": [ 134 | "saved_model_dir = 'resnet50_saved_model' \n", 135 | "# load_save_resnet50_model(saved_model_dir)" 136 | ] 137 | }, 138 | { 139 | "cell_type": "code", 140 | "execution_count": 7, 141 | "metadata": {}, 142 | "outputs": [], 143 | "source": [ 144 | "def deserialize_image_record(record):\n", 145 | " feature_map = {'image/encoded': tf.io.FixedLenFeature([], tf.string, ''),\n", 146 | " 'image/class/label': tf.io.FixedLenFeature([1], tf.int64, -1),\n", 147 | " 'image/class/text': tf.io.FixedLenFeature([], tf.string, '')}\n", 148 | " obj = tf.io.parse_single_example(serialized=record, features=feature_map)\n", 149 | " imgdata = obj['image/encoded']\n", 150 | " label = tf.cast(obj['image/class/label'], tf.int32) \n", 151 | " label_text = tf.cast(obj['image/class/text'], tf.string) \n", 152 | " return imgdata, label, label_text\n", 153 | "\n", 154 | "def val_preprocessing(record):\n", 155 | " imgdata, label, label_text = deserialize_image_record(record)\n", 156 | " label -= 1\n", 157 | " image = tf.io.decode_jpeg(imgdata, channels=3, \n", 158 | " fancy_upscaling=False, \n", 159 | " dct_method='INTEGER_FAST')\n", 160 | "\n", 161 | " shape = tf.shape(image)\n", 162 | " height = tf.cast(shape[0], tf.float32)\n", 163 | " width = tf.cast(shape[1], tf.float32)\n", 164 | " side = tf.cast(tf.convert_to_tensor(256, dtype=tf.int32), tf.float32)\n", 165 | "\n", 166 | " scale = tf.cond(tf.greater(height, width),\n", 167 | " lambda: side / width,\n", 168 | " lambda: side / height)\n", 169 | " \n", 170 | " new_height = tf.cast(tf.math.rint(height * scale), tf.int32)\n", 171 | " new_width = tf.cast(tf.math.rint(width * scale), tf.int32)\n", 172 | " \n", 173 | " image = tf.image.resize(image, [new_height, new_width], method='bicubic')\n", 174 | " image = tf.image.resize_with_crop_or_pad(image, 224, 224)\n", 175 | " \n", 176 | " image = tf.keras.applications.resnet50.preprocess_input(image)\n", 177 | " \n", 178 | " return image, label, label_text\n", 179 | "\n", 180 | "def get_dataset(batch_size, use_cache=False):\n", 181 | " data_dir = '/home/ubuntu/datasets/*'\n", 182 | " files = tf.io.gfile.glob(os.path.join(data_dir))\n", 183 | " dataset = tf.data.TFRecordDataset(files)\n", 184 | " \n", 185 | " dataset = dataset.map(map_func=val_preprocessing, num_parallel_calls=tf.data.experimental.AUTOTUNE)\n", 186 | " dataset = dataset.batch(batch_size=batch_size)\n", 187 | " dataset = dataset.prefetch(buffer_size=tf.data.experimental.AUTOTUNE)\n", 188 | " dataset = dataset.repeat(count=1)\n", 189 | " \n", 190 | " if use_cache:\n", 191 | " shutil.rmtree('tfdatacache', ignore_errors=True)\n", 192 | " os.mkdir('tfdatacache')\n", 193 | " dataset = dataset.cache(f'./tfdatacache/imagenet_val')\n", 194 | " \n", 195 | " return dataset" 196 | ] 197 | }, 198 | { 199 | "cell_type": "code", 200 | "execution_count": 8, 201 | "metadata": {}, 202 | "outputs": [ 203 | { 204 | "name": "stdout", 205 | "output_type": "stream", 206 | "text": [ 207 | "\n", 208 | "=======================================================\n", 209 | "Benchmark results for CPU Keras, batch size: 8\n", 210 | "=======================================================\n", 211 | "\n", 212 | "Images 5000/50000. Average i/s 26.555694032421247\n", 213 | "Images 10000/50000. Average i/s 26.676666543597392\n", 214 | "Images 15000/50000. Average i/s 26.77406612095138\n", 215 | "Images 20000/50000. Average i/s 26.822275491462182\n", 216 | "Images 25000/50000. Average i/s 26.847471484622154\n", 217 | "Images 30000/50000. Average i/s 26.859330729648033\n", 218 | "Images 35000/50000. Average i/s 26.865594015573578\n", 219 | "Images 40000/50000. Average i/s 26.873174015987328\n", 220 | "Images 45000/50000. Average i/s 26.91567530151017\n" 221 | ] 222 | }, 223 | { 224 | "data": { 225 | "text/html": [ 226 | "
\n", 227 | "\n", 240 | "\n", 241 | " \n", 242 | " \n", 243 | " \n", 244 | " \n", 245 | " \n", 246 | " \n", 247 | " \n", 248 | " \n", 249 | " \n", 250 | " \n", 251 | " \n", 252 | " \n", 253 | " \n", 254 | " \n", 255 | " \n", 256 | " \n", 257 | " \n", 258 | " \n", 259 | " \n", 260 | " \n", 261 | " \n", 262 | " \n", 263 | " \n", 264 | " \n", 265 | " \n", 266 | " \n", 267 | " \n", 268 | " \n", 269 | " \n", 270 | " \n", 271 | " \n", 272 | " \n", 273 | " \n", 274 | " \n", 275 | "
instance_typeacceleratoruser_batch_sizeaccuracyprediction_timewall_timeimages_per_sec_meanimages_per_sec_stdlatency_meanlatency_99th_percentilelatency_medianlatency_min
keras_cpu_8c5.2xlargeNA80.749561860.751864.8526.88390.502054297.721330.937296.569286.248
\n", 276 | "
" 277 | ], 278 | "text/plain": [ 279 | " instance_type accelerator user_batch_size accuracy \\\n", 280 | "keras_cpu_8 c5.2xlarge NA 8 0.74956 \n", 281 | "\n", 282 | " prediction_time wall_time images_per_sec_mean images_per_sec_std \\\n", 283 | "keras_cpu_8 1860.75 1864.85 26.8839 0.502054 \n", 284 | "\n", 285 | " latency_mean latency_99th_percentile latency_median latency_min \n", 286 | "keras_cpu_8 297.721 330.937 296.569 286.248 " 287 | ] 288 | }, 289 | "metadata": {}, 290 | "output_type": "display_data" 291 | } 292 | ], 293 | "source": [ 294 | "print('\\n=======================================================')\n", 295 | "print(f'Benchmark results for CPU Keras, batch size: {batch_size}')\n", 296 | "print('=======================================================\\n')\n", 297 | "\n", 298 | "model = tf.keras.models.load_model(saved_model_dir)\n", 299 | "display_every = 5000\n", 300 | "display_threshold = display_every\n", 301 | "\n", 302 | "pred_labels = []\n", 303 | "actual_labels = []\n", 304 | "iter_times = []\n", 305 | "\n", 306 | "# Get the tf.data.TFRecordDataset object for the ImageNet2012 validation dataset\n", 307 | "dataset = get_dataset(batch_size) \n", 308 | "\n", 309 | "walltime_start = time.time()\n", 310 | "for i, (validation_ds, batch_labels, _) in enumerate(dataset):\n", 311 | " start_time = time.time()\n", 312 | " pred_prob_keras = model(validation_ds)\n", 313 | " iter_times.append(time.time() - start_time)\n", 314 | " \n", 315 | " actual_labels.extend(label for label_list in batch_labels.numpy() for label in label_list)\n", 316 | " pred_labels.extend(list(np.argmax(pred_prob_keras, axis=1)))\n", 317 | " \n", 318 | " if i*batch_size >= display_threshold:\n", 319 | " print(f'Images {i*batch_size}/50000. Average i/s {np.mean(batch_size/np.array(iter_times[-display_every:]))}')\n", 320 | " display_threshold+=display_every\n", 321 | "\n", 322 | "iter_times = np.array(iter_times)\n", 323 | "acc_keras_gpu = np.sum(np.array(actual_labels) == np.array(pred_labels))/len(actual_labels)\n", 324 | "\n", 325 | "results = pd.DataFrame(columns = [f'keras_cpu_{batch_size}'])\n", 326 | "results.loc['instance_type'] = [requests.get('http://169.254.169.254/latest/meta-data/instance-type').text]\n", 327 | "results.loc['accelerator'] = ['NA']\n", 328 | "results.loc['user_batch_size'] = [batch_size]\n", 329 | "results.loc['accuracy'] = [acc_keras_gpu]\n", 330 | "results.loc['prediction_time'] = [np.sum(iter_times)]\n", 331 | "results.loc['wall_time'] = [time.time() - walltime_start]\n", 332 | "results.loc['images_per_sec_mean'] = [np.mean(batch_size / iter_times)]\n", 333 | "results.loc['images_per_sec_std'] = [np.std(batch_size / iter_times, ddof=1)]\n", 334 | "results.loc['latency_mean'] = [np.mean(iter_times) * 1000]\n", 335 | "results.loc['latency_99th_percentile'] = [np.percentile(iter_times, q=99, interpolation=\"lower\") * 1000]\n", 336 | "results.loc['latency_median'] = [np.median(iter_times) * 1000]\n", 337 | "results.loc['latency_min'] = [np.min(iter_times) * 1000]\n", 338 | "display(results.T)" 339 | ] 340 | }, 341 | { 342 | "cell_type": "code", 343 | "execution_count": 9, 344 | "metadata": {}, 345 | "outputs": [], 346 | "source": [ 347 | "def ei_predict_benchmark(saved_model_dir, batch_size, accelerator_id):\n", 348 | " \n", 349 | " ei_size = ei_client.describe_accelerators()['acceleratorSet'][accelerator_id]['acceleratorType']\n", 350 | "\n", 351 | " print('\\n=======================================================')\n", 352 | " print(f'Benchmark results for EI: {ei_size}, batch size: {batch_size}')\n", 353 | " print('=======================================================\\n')\n", 354 | " \n", 355 | " eia_model = EIPredictor(saved_model_dir, \n", 356 | " accelerator_id=1)\n", 357 | "\n", 358 | " display_every = 5000\n", 359 | " display_threshold = display_every\n", 360 | "\n", 361 | " pred_labels = []\n", 362 | " actual_labels = []\n", 363 | " iter_times = []\n", 364 | "\n", 365 | " # Get the tf.data.TFRecordDataset object for the ImageNet2012 validation dataset\n", 366 | " dataset = get_dataset(batch_size) \n", 367 | "\n", 368 | " walltime_start = time.time()\n", 369 | " ipname = list(eia_model.feed_tensors.keys())[0]\n", 370 | " resname = list(eia_model.fetch_tensors.keys())[0]\n", 371 | "\n", 372 | " for i, (validation_ds, batch_labels, _) in enumerate(dataset):\n", 373 | "\n", 374 | " model_feed_dict={'input_1': validation_ds.numpy()}\n", 375 | " start_time = time.time()\n", 376 | " pred_prob = eia_model(model_feed_dict)\n", 377 | " iter_times.append(time.time() - start_time)\n", 378 | "\n", 379 | " actual_labels.extend(label for label_list in batch_labels.numpy() for label in label_list)\n", 380 | " pred_labels.extend(list(np.argmax(pred_prob['probs'], axis=1)))\n", 381 | "\n", 382 | " if i*batch_size >= display_threshold:\n", 383 | " print(f'Images {i*batch_size}/50000. Average i/s {np.mean(batch_size/np.array(iter_times[-display_every:]))}')\n", 384 | " display_threshold+=display_every\n", 385 | "\n", 386 | " iter_times = np.array(iter_times)\n", 387 | " acc_ei_gpu = np.sum(np.array(actual_labels) == np.array(pred_labels))/len(actual_labels)\n", 388 | " \n", 389 | " results = pd.DataFrame(columns = [f'EI_{batch_size}_{ei_size}'])\n", 390 | " results.loc['instance_type'] = [requests.get('http://169.254.169.254/latest/meta-data/instance-type').text]\n", 391 | " results.loc['accelerator'] = [ei_size]\n", 392 | " results.loc['user_batch_size'] = [batch_size]\n", 393 | " results.loc['accuracy'] = [acc_ei_gpu]\n", 394 | " results.loc['prediction_time'] = [np.sum(iter_times)]\n", 395 | " results.loc['wall_time'] = [time.time() - walltime_start]\n", 396 | " results.loc['images_per_sec_mean'] = [np.mean(batch_size / iter_times)]\n", 397 | " results.loc['images_per_sec_std'] = [np.std(batch_size / iter_times, ddof=1)]\n", 398 | " results.loc['latency_mean'] = [np.mean(iter_times) * 1000]\n", 399 | " results.loc['latency_99th_percentile'] = [np.percentile(iter_times, q=99, interpolation=\"lower\") * 1000]\n", 400 | " results.loc['latency_median'] = [np.median(iter_times) * 1000]\n", 401 | " results.loc['latency_min'] = [np.min(iter_times) * 1000]\n", 402 | " display(results.T)\n", 403 | " \n", 404 | " return results, iter_times" 405 | ] 406 | }, 407 | { 408 | "cell_type": "code", 409 | "execution_count": 10, 410 | "metadata": {}, 411 | "outputs": [ 412 | { 413 | "name": "stdout", 414 | "output_type": "stream", 415 | "text": [ 416 | "\n", 417 | "=======================================================\n", 418 | "Benchmark results for EI: eia2.large, batch size: 8\n", 419 | "=======================================================\n", 420 | "\n", 421 | "Using DEFAULT_SERVING_SIGNATURE_DEF_KEY .....\n", 422 | "WARNING:tensorflow:From /home/ubuntu/anaconda3/envs/amazonei_tensorflow2_p36/lib/python3.6/site-packages/ei_for_tf/python/predictor/ei_predictor.py:168: load (from tensorflow.python.saved_model.loader_impl) is deprecated and will be removed in a future version.\n", 423 | "Instructions for updating:\n", 424 | "This function will only be available through the v1 compatibility library as tf.compat.v1.saved_model.loader.load or tf.compat.v1.saved_model.load. There will be a new function for importing SavedModels in Tensorflow 2.0.\n", 425 | "INFO:tensorflow:Restoring parameters from resnet50_saved_model/variables/variables\n", 426 | "Images 5000/50000. Average i/s 160.79150457173685\n", 427 | "Images 10000/50000. Average i/s 160.57224536199263\n", 428 | "Images 15000/50000. Average i/s 160.17887928377442\n", 429 | "Images 20000/50000. Average i/s 159.55135762825725\n", 430 | "Images 25000/50000. Average i/s 159.05273547195634\n", 431 | "Images 30000/50000. Average i/s 158.55027160196224\n", 432 | "Images 35000/50000. Average i/s 158.2158252593362\n", 433 | "Images 40000/50000. Average i/s 157.88468338480075\n", 434 | "Images 45000/50000. Average i/s 157.15614275808505\n" 435 | ] 436 | }, 437 | { 438 | "data": { 439 | "text/html": [ 440 | "
\n", 441 | "\n", 454 | "\n", 455 | " \n", 456 | " \n", 457 | " \n", 458 | " \n", 459 | " \n", 460 | " \n", 461 | " \n", 462 | " \n", 463 | " \n", 464 | " \n", 465 | " \n", 466 | " \n", 467 | " \n", 468 | " \n", 469 | " \n", 470 | " \n", 471 | " \n", 472 | " \n", 473 | " \n", 474 | " \n", 475 | " \n", 476 | " \n", 477 | " \n", 478 | " \n", 479 | " \n", 480 | " \n", 481 | " \n", 482 | " \n", 483 | " \n", 484 | " \n", 485 | " \n", 486 | " \n", 487 | " \n", 488 | " \n", 489 | "
instance_typeacceleratoruser_batch_sizeaccuracyprediction_timewall_timeimages_per_sec_meanimages_per_sec_stdlatency_meanlatency_99th_percentilelatency_medianlatency_min
EI_8_eia2.largec5.2xlargeeia2.large80.74956321.635331.082157.2715.0455651.461655.373450.752447.5709
\n", 490 | "
" 491 | ], 492 | "text/plain": [ 493 | " instance_type accelerator user_batch_size accuracy \\\n", 494 | "EI_8_eia2.large c5.2xlarge eia2.large 8 0.74956 \n", 495 | "\n", 496 | " prediction_time wall_time images_per_sec_mean \\\n", 497 | "EI_8_eia2.large 321.635 331.082 157.271 \n", 498 | "\n", 499 | " images_per_sec_std latency_mean latency_99th_percentile \\\n", 500 | "EI_8_eia2.large 5.04556 51.4616 55.3734 \n", 501 | "\n", 502 | " latency_median latency_min \n", 503 | "EI_8_eia2.large 50.7524 47.5709 " 504 | ] 505 | }, 506 | "metadata": {}, 507 | "output_type": "display_data" 508 | }, 509 | { 510 | "data": { 511 | "text/html": [ 512 | "
\n", 513 | "\n", 526 | "\n", 527 | " \n", 528 | " \n", 529 | " \n", 530 | " \n", 531 | " \n", 532 | " \n", 533 | " \n", 534 | " \n", 535 | " \n", 536 | " \n", 537 | " \n", 538 | " \n", 539 | " \n", 540 | " \n", 541 | " \n", 542 | " \n", 543 | " \n", 544 | " \n", 545 | " \n", 546 | " \n", 547 | " \n", 548 | " \n", 549 | " \n", 550 | " \n", 551 | " \n", 552 | " \n", 553 | " \n", 554 | " \n", 555 | " \n", 556 | " \n", 557 | " \n", 558 | " \n", 559 | " \n", 560 | " \n", 561 | " \n", 562 | " \n", 563 | " \n", 564 | " \n", 565 | " \n", 566 | " \n", 567 | " \n", 568 | " \n", 569 | " \n", 570 | " \n", 571 | " \n", 572 | " \n", 573 | " \n", 574 | " \n", 575 | " \n", 576 | " \n", 577 | " \n", 578 | " \n", 579 | " \n", 580 | " \n", 581 | " \n", 582 | " \n", 583 | " \n", 584 | " \n", 585 | " \n", 586 | " \n", 587 | " \n", 588 | " \n", 589 | " \n", 590 | " \n", 591 | " \n", 592 | " \n", 593 | " \n", 594 | " \n", 595 | " \n", 596 | "
keras_cpu_8EI_8_eia2.large
instance_typec5.2xlargec5.2xlarge
acceleratorNAeia2.large
user_batch_size88
accuracy0.749560.74956
prediction_time1860.75321.635
wall_time1864.85331.082
images_per_sec_mean26.8839157.271
images_per_sec_std0.5020545.04556
latency_mean297.72151.4616
latency_99th_percentile330.93755.3734
latency_median296.56950.7524
latency_min286.24847.5709
\n", 597 | "
" 598 | ], 599 | "text/plain": [ 600 | " keras_cpu_8 EI_8_eia2.large\n", 601 | "instance_type c5.2xlarge c5.2xlarge\n", 602 | "accelerator NA eia2.large\n", 603 | "user_batch_size 8 8\n", 604 | "accuracy 0.74956 0.74956\n", 605 | "prediction_time 1860.75 321.635\n", 606 | "wall_time 1864.85 331.082\n", 607 | "images_per_sec_mean 26.8839 157.271\n", 608 | "images_per_sec_std 0.502054 5.04556\n", 609 | "latency_mean 297.721 51.4616\n", 610 | "latency_99th_percentile 330.937 55.3734\n", 611 | "latency_median 296.569 50.7524\n", 612 | "latency_min 286.248 47.5709" 613 | ] 614 | }, 615 | "metadata": {}, 616 | "output_type": "display_data" 617 | } 618 | ], 619 | "source": [ 620 | "ei_options = [{'ei_acc_id': 0}]\n", 621 | "\n", 622 | "iter_ds = pd.DataFrame()\n", 623 | "if results is None:\n", 624 | " results = pd.DataFrame()\n", 625 | "\n", 626 | "col_name = lambda ei_acc_id: f'ei_{ei_client.describe_accelerators()[\"acceleratorSet\"][ei_acc_id][\"acceleratorType\"]}_batch_size_{batch_size}'\n", 627 | "\n", 628 | " \n", 629 | "for opt in ei_options:\n", 630 | " ei_acc_id = opt[\"ei_acc_id\"]\n", 631 | " res, iter_times = ei_predict_benchmark(saved_model_dir, batch_size, ei_acc_id)\n", 632 | " \n", 633 | " iter_ds = pd.concat([iter_ds, pd.DataFrame(iter_times, columns=[col_name(ei_acc_id)])], axis=1)\n", 634 | " results = pd.concat([results, res], axis=1)\n", 635 | " \n", 636 | "display(results)" 637 | ] 638 | } 639 | ], 640 | "metadata": { 641 | "kernelspec": { 642 | "display_name": "Environment (conda_amazonei_tensorflow2_p36)", 643 | "language": "python", 644 | "name": "conda_amazonei_tensorflow2_p36" 645 | }, 646 | "language_info": { 647 | "codemirror_mode": { 648 | "name": "ipython", 649 | "version": 3 650 | }, 651 | "file_extension": ".py", 652 | "mimetype": "text/x-python", 653 | "name": "python", 654 | "nbconvert_exporter": "python", 655 | "pygments_lexer": "ipython3", 656 | "version": "3.6.10" 657 | } 658 | }, 659 | "nbformat": 4, 660 | "nbformat_minor": 4 661 | } 662 | -------------------------------------------------------------------------------- /gpu-tf-tensorrt-resnet50.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# GPU inference with NVIDIA T4 on Amazon EC2 G4 instance\n", 8 | "This example demonstrates GPU inference with:\n", 9 | "* GPU accelerated TensorFlow/Keras\n", 10 | "* NVIDIA TensorRT optimizer and runtime" 11 | ] 12 | }, 13 | { 14 | "cell_type": "markdown", 15 | "metadata": {}, 16 | "source": [ 17 | "This example was tested on Amazon EC2 `g4dn.xlarge` using the following AWS Deep Learning AMI:\n", 18 | "`Deep Learning AMI (Ubuntu 18.04) Version 35.0`\n", 19 | "\n", 20 | "And the following NVIDIA TensorFlow Docker image: \n", 21 | "`nvcr.io/nvidia/tensorflow:20.08-tf2-py3`\n", 22 | "\n", 23 | "Create a Docker container:
\n", 24 | "`nvidia-docker run --shm-size 8g --ulimit memlock=-1 -it -v $PWD:/examples -v ~/.aws/:/.aws --network=host nvcr.io/nvidia/tensorflow:20.08-tf2-py3`\n", 25 | "\n", 26 | "Prepare your imagenet validation TFRecord files using the following helper script:\n", 27 | "https://github.com/tensorflow/models/blob/archive/research/inception/inception/data/download_and_preprocess_imagenet.sh\n", 28 | "\n", 29 | "Save it to `/examples/datasets/` or update the dataset location in the `get_dataset()` function" 30 | ] 31 | }, 32 | { 33 | "cell_type": "code", 34 | "execution_count": 1, 35 | "metadata": {}, 36 | "outputs": [], 37 | "source": [ 38 | "# !pip install --upgrade pip -q\n", 39 | "# !pip install matplotlib pandas -q" 40 | ] 41 | }, 42 | { 43 | "cell_type": "code", 44 | "execution_count": 2, 45 | "metadata": {}, 46 | "outputs": [ 47 | { 48 | "name": "stdout", 49 | "output_type": "stream", 50 | "text": [ 51 | "TensorRT version: (7, 1, 3)\n", 52 | "TensorFlow version: 2.2.0\n" 53 | ] 54 | } 55 | ], 56 | "source": [ 57 | "import os\n", 58 | "import time\n", 59 | "import shutil\n", 60 | "import json\n", 61 | "import time\n", 62 | "import pandas as pd\n", 63 | "import numpy as np\n", 64 | "import requests\n", 65 | "from functools import partial\n", 66 | "\n", 67 | "import tensorflow as tf\n", 68 | "import tensorflow.keras as keras\n", 69 | "from tensorflow.keras.applications.resnet50 import ResNet50, preprocess_input\n", 70 | "from tensorflow.keras.preprocessing import image\n", 71 | "from tensorflow.python.saved_model import tag_constants, signature_constants\n", 72 | "from tensorflow.python.framework import convert_to_constants\n", 73 | "\n", 74 | "from tensorflow.compiler.tf2tensorrt.wrap_py_utils import get_linked_tensorrt_version\n", 75 | "\n", 76 | "print(f\"TensorRT version: {get_linked_tensorrt_version()}\")\n", 77 | "print(f\"TensorFlow version: {tf.__version__}\")" 78 | ] 79 | }, 80 | { 81 | "cell_type": "code", 82 | "execution_count": 3, 83 | "metadata": {}, 84 | "outputs": [], 85 | "source": [ 86 | "results = None\n", 87 | "batch_size = 8" 88 | ] 89 | }, 90 | { 91 | "cell_type": "markdown", 92 | "metadata": {}, 93 | "source": [ 94 | "### Download Keras Resnet50 model" 95 | ] 96 | }, 97 | { 98 | "cell_type": "code", 99 | "execution_count": 4, 100 | "metadata": {}, 101 | "outputs": [], 102 | "source": [ 103 | "def load_save_resnet50_model(saved_model_dir = 'resnet50_saved_model'):\n", 104 | " model = ResNet50(weights='imagenet')\n", 105 | " shutil.rmtree(saved_model_dir, ignore_errors=True)\n", 106 | " model.save(saved_model_dir, include_optimizer=False, save_format='tf')" 107 | ] 108 | }, 109 | { 110 | "cell_type": "code", 111 | "execution_count": 5, 112 | "metadata": {}, 113 | "outputs": [ 114 | { 115 | "name": "stdout", 116 | "output_type": "stream", 117 | "text": [ 118 | "WARNING:tensorflow:From /usr/local/lib/python3.6/dist-packages/tensorflow/python/ops/resource_variable_ops.py:1817: calling BaseResourceVariable.__init__ (from tensorflow.python.ops.resource_variable_ops) with constraint is deprecated and will be removed in a future version.\n", 119 | "Instructions for updating:\n", 120 | "If using Keras pass *_constraint arguments to layers.\n", 121 | "INFO:tensorflow:Assets written to: resnet50_saved_model/assets\n" 122 | ] 123 | } 124 | ], 125 | "source": [ 126 | "saved_model_dir = 'resnet50_saved_model' \n", 127 | "load_save_resnet50_model(saved_model_dir)" 128 | ] 129 | }, 130 | { 131 | "cell_type": "markdown", 132 | "metadata": {}, 133 | "source": [ 134 | "### Use `tf.data` to read ImageNet validation dataset" 135 | ] 136 | }, 137 | { 138 | "cell_type": "code", 139 | "execution_count": 6, 140 | "metadata": {}, 141 | "outputs": [], 142 | "source": [ 143 | "def deserialize_image_record(record):\n", 144 | " feature_map = {'image/encoded': tf.io.FixedLenFeature([], tf.string, ''),\n", 145 | " 'image/class/label': tf.io.FixedLenFeature([1], tf.int64, -1),\n", 146 | " 'image/class/text': tf.io.FixedLenFeature([], tf.string, '')}\n", 147 | " obj = tf.io.parse_single_example(serialized=record, features=feature_map)\n", 148 | " imgdata = obj['image/encoded']\n", 149 | " label = tf.cast(obj['image/class/label'], tf.int32) \n", 150 | " label_text = tf.cast(obj['image/class/text'], tf.string) \n", 151 | " return imgdata, label, label_text\n", 152 | "\n", 153 | "def val_preprocessing(record):\n", 154 | " imgdata, label, label_text = deserialize_image_record(record)\n", 155 | " label -= 1\n", 156 | " image = tf.io.decode_jpeg(imgdata, channels=3, \n", 157 | " fancy_upscaling=False, \n", 158 | " dct_method='INTEGER_FAST')\n", 159 | "\n", 160 | " shape = tf.shape(image)\n", 161 | " height = tf.cast(shape[0], tf.float32)\n", 162 | " width = tf.cast(shape[1], tf.float32)\n", 163 | " side = tf.cast(tf.convert_to_tensor(256, dtype=tf.int32), tf.float32)\n", 164 | "\n", 165 | " scale = tf.cond(tf.greater(height, width),\n", 166 | " lambda: side / width,\n", 167 | " lambda: side / height)\n", 168 | " \n", 169 | " new_height = tf.cast(tf.math.rint(height * scale), tf.int32)\n", 170 | " new_width = tf.cast(tf.math.rint(width * scale), tf.int32)\n", 171 | " \n", 172 | " image = tf.image.resize(image, [new_height, new_width], method='bicubic')\n", 173 | " image = tf.image.resize_with_crop_or_pad(image, 224, 224)\n", 174 | " \n", 175 | " image = tf.keras.applications.resnet50.preprocess_input(image)\n", 176 | " \n", 177 | " return image, label, label_text\n", 178 | "\n", 179 | "def get_dataset(batch_size, use_cache=False):\n", 180 | " data_dir = '/examples/datasets/*'\n", 181 | " files = tf.io.gfile.glob(os.path.join(data_dir))\n", 182 | " dataset = tf.data.TFRecordDataset(files)\n", 183 | " \n", 184 | " dataset = dataset.map(map_func=val_preprocessing, num_parallel_calls=tf.data.experimental.AUTOTUNE)\n", 185 | " dataset = dataset.batch(batch_size=batch_size)\n", 186 | " dataset = dataset.prefetch(buffer_size=tf.data.experimental.AUTOTUNE)\n", 187 | " dataset = dataset.repeat(count=1)\n", 188 | " \n", 189 | " if use_cache:\n", 190 | " shutil.rmtree('tfdatacache', ignore_errors=True)\n", 191 | " os.mkdir('tfdatacache')\n", 192 | " dataset = dataset.cache(f'./tfdatacache/imagenet_val')\n", 193 | " \n", 194 | " return dataset" 195 | ] 196 | }, 197 | { 198 | "cell_type": "markdown", 199 | "metadata": {}, 200 | "source": [ 201 | "#### Predict using GPU + Keras" 202 | ] 203 | }, 204 | { 205 | "cell_type": "code", 206 | "execution_count": 7, 207 | "metadata": {}, 208 | "outputs": [ 209 | { 210 | "name": "stdout", 211 | "output_type": "stream", 212 | "text": [ 213 | "WARNING:tensorflow:No training configuration found in save file, so the model was *not* compiled. Compile it manually.\n", 214 | "Images 5000/50000. Average i/s 113.52902935209637\n", 215 | "Images 10000/50000. Average i/s 114.4822357792094\n", 216 | "Images 15000/50000. Average i/s 114.17100473485702\n", 217 | "Images 20000/50000. Average i/s 114.61583324110076\n", 218 | "Images 25000/50000. Average i/s 114.84187563894713\n", 219 | "Images 30000/50000. Average i/s 115.14781546576788\n", 220 | "Images 35000/50000. Average i/s 115.10561798972904\n", 221 | "Images 40000/50000. Average i/s 115.20453046771337\n", 222 | "Images 45000/50000. Average i/s 115.71023295020717\n" 223 | ] 224 | }, 225 | { 226 | "data": { 227 | "text/html": [ 228 | "
\n", 229 | "\n", 242 | "\n", 243 | " \n", 244 | " \n", 245 | " \n", 246 | " \n", 247 | " \n", 248 | " \n", 249 | " \n", 250 | " \n", 251 | " \n", 252 | " \n", 253 | " \n", 254 | " \n", 255 | " \n", 256 | " \n", 257 | " \n", 258 | " \n", 259 | " \n", 260 | " \n", 261 | " \n", 262 | " \n", 263 | " \n", 264 | " \n", 265 | " \n", 266 | " \n", 267 | " \n", 268 | " \n", 269 | " \n", 270 | " \n", 271 | " \n", 272 | " \n", 273 | " \n", 274 | " \n", 275 | " \n", 276 | " \n", 277 | " \n", 278 | " \n", 279 | " \n", 280 | " \n", 281 | " \n", 282 | " \n", 283 | " \n", 284 | " \n", 285 | " \n", 286 | " \n", 287 | " \n", 288 | " \n", 289 | " \n", 290 | " \n", 291 | " \n", 292 | " \n", 293 | " \n", 294 | " \n", 295 | "
keras_gpu_8
instance_typeg4dn.xlarge
user_batch_size8
accuracy0.74956
prediction_time440.113
wall_time443.712
images_per_sec_mean115.746
images_per_sec_std7.3476
latency_mean70.418
latency_99th_percentile84.4612
latency_median69.0285
latency_min62.314
\n", 296 | "
" 297 | ], 298 | "text/plain": [ 299 | " keras_gpu_8\n", 300 | "instance_type g4dn.xlarge\n", 301 | "user_batch_size 8\n", 302 | "accuracy 0.74956\n", 303 | "prediction_time 440.113\n", 304 | "wall_time 443.712\n", 305 | "images_per_sec_mean 115.746\n", 306 | "images_per_sec_std 7.3476\n", 307 | "latency_mean 70.418\n", 308 | "latency_99th_percentile 84.4612\n", 309 | "latency_median 69.0285\n", 310 | "latency_min 62.314" 311 | ] 312 | }, 313 | "metadata": {}, 314 | "output_type": "display_data" 315 | } 316 | ], 317 | "source": [ 318 | "model = tf.keras.models.load_model(saved_model_dir)\n", 319 | "display_every = 5000\n", 320 | "display_threshold = display_every\n", 321 | "\n", 322 | "pred_labels = []\n", 323 | "actual_labels = []\n", 324 | "iter_times = []\n", 325 | "\n", 326 | "# Get the tf.data.TFRecordDataset object for the ImageNet2012 validation dataset\n", 327 | "dataset = get_dataset(batch_size) \n", 328 | "\n", 329 | "walltime_start = time.time()\n", 330 | "for i, (validation_ds, batch_labels, _) in enumerate(dataset):\n", 331 | " start_time = time.time()\n", 332 | " pred_prob_keras = model(validation_ds)\n", 333 | " iter_times.append(time.time() - start_time)\n", 334 | " \n", 335 | " actual_labels.extend(label for label_list in batch_labels.numpy() for label in label_list)\n", 336 | " pred_labels.extend(list(np.argmax(pred_prob_keras, axis=1)))\n", 337 | " \n", 338 | " if i*batch_size >= display_threshold:\n", 339 | " print(f'Images {i*batch_size}/50000. Average i/s {np.mean(batch_size/np.array(iter_times[-display_every:]))}')\n", 340 | " display_threshold+=display_every\n", 341 | "\n", 342 | "iter_times = np.array(iter_times)\n", 343 | "acc_keras_gpu = np.sum(np.array(actual_labels) == np.array(pred_labels))/len(actual_labels)\n", 344 | "\n", 345 | "results = pd.DataFrame(columns = [f'keras_gpu_{batch_size}'])\n", 346 | "results.loc['instance_type'] = [requests.get('http://169.254.169.254/latest/meta-data/instance-type').text]\n", 347 | "results.loc['user_batch_size'] = [batch_size]\n", 348 | "results.loc['accuracy'] = [acc_keras_gpu]\n", 349 | "results.loc['prediction_time'] = [np.sum(iter_times)]\n", 350 | "results.loc['wall_time'] = [time.time() - walltime_start]\n", 351 | "results.loc['images_per_sec_mean'] = [np.mean(batch_size / iter_times)]\n", 352 | "results.loc['images_per_sec_std'] = [np.std(batch_size / iter_times, ddof=1)]\n", 353 | "results.loc['latency_mean'] = [np.mean(iter_times) * 1000]\n", 354 | "results.loc['latency_99th_percentile'] = [np.percentile(iter_times, q=99, interpolation=\"lower\") * 1000]\n", 355 | "results.loc['latency_median'] = [np.median(iter_times) * 1000]\n", 356 | "results.loc['latency_min'] = [np.min(iter_times) * 1000]\n", 357 | "display(results.T)" 358 | ] 359 | }, 360 | { 361 | "cell_type": "markdown", 362 | "metadata": {}, 363 | "source": [ 364 | "#### Predict using GPU + TensorRT" 365 | ] 366 | }, 367 | { 368 | "cell_type": "code", 369 | "execution_count": 8, 370 | "metadata": {}, 371 | "outputs": [], 372 | "source": [ 373 | "def build_fn(batch_size, dataset):\n", 374 | " for i, (build_image, _, _) in enumerate(dataset):\n", 375 | " if i > 1:\n", 376 | " break\n", 377 | " yield (build_image,)\n", 378 | "\n", 379 | "def calibrate_fn(n_calib, batch_size, dataset):\n", 380 | " for i, (calib_image, _, _) in enumerate(dataset):\n", 381 | " if i > n_calib // batch_size:\n", 382 | " break\n", 383 | " yield (calib_image,)\n", 384 | "\n", 385 | "def build_tensorrt_engine(precision, batch_size, dataset):\n", 386 | " from tensorflow.python.compiler.tensorrt import trt_convert as trt\n", 387 | " conversion_params = trt.DEFAULT_TRT_CONVERSION_PARAMS._replace(precision_mode=precision.upper(),\n", 388 | " max_workspace_size_bytes=(1<<32),\n", 389 | " maximum_cached_engines=2)\n", 390 | " converter = trt.TrtGraphConverterV2(input_saved_model_dir='resnet50_saved_model',\n", 391 | " conversion_params=conversion_params)\n", 392 | " \n", 393 | " if precision.lower() == 'int8':\n", 394 | " n_calib=50\n", 395 | " converter.convert(calibration_input_fn=partial(calibrate_fn, n_calib, batch_size, \n", 396 | " dataset.shuffle(buffer_size=n_calib, reshuffle_each_iteration=True)))\n", 397 | " else:\n", 398 | " converter.convert()\n", 399 | " \n", 400 | " trt_compiled_model_dir = f'resnet50_trt_saved_models/resnet50_{precision}_{batch_size}'\n", 401 | " shutil.rmtree(trt_compiled_model_dir, ignore_errors=True)\n", 402 | "\n", 403 | " converter.build(input_fn=partial(build_fn, batch_size, dataset))\n", 404 | " converter.save(output_saved_model_dir=trt_compiled_model_dir)\n", 405 | " print(f'\\nOptimized for {precision} and batch size {batch_size}, directory:{trt_compiled_model_dir}\\n')\n", 406 | " return trt_compiled_model_dir" 407 | ] 408 | }, 409 | { 410 | "cell_type": "code", 411 | "execution_count": 9, 412 | "metadata": {}, 413 | "outputs": [], 414 | "source": [ 415 | "def trt_predict_benchmark(precision, batch_size, use_cache=False, display_every=100, warm_up=10):\n", 416 | "\n", 417 | " print('\\n=======================================================')\n", 418 | " print(f'Benchmark results for precision: {precision}, batch size: {batch_size}')\n", 419 | " print('=======================================================\\n')\n", 420 | " \n", 421 | " dataset = get_dataset(batch_size)\n", 422 | " \n", 423 | " # If caching is enabled, cache dataset for better i/o performance\n", 424 | " if use_cache:\n", 425 | " print('Caching dataset ...')\n", 426 | " start_time = time.time()\n", 427 | " for (img,_,_) in dataset:\n", 428 | " continue\n", 429 | " print(f'Finished caching {time.time() - start_time}')\n", 430 | " \n", 431 | " trt_compiled_model_dir = build_tensorrt_engine(precision, batch_size, dataset)\n", 432 | " saved_model_trt = tf.saved_model.load(trt_compiled_model_dir, tags=[tag_constants.SERVING])\n", 433 | " model_trt = saved_model_trt.signatures[signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY]\n", 434 | " \n", 435 | " pred_labels = []\n", 436 | " actual_labels = []\n", 437 | " iter_times = []\n", 438 | " \n", 439 | " display_every = 5000\n", 440 | " display_threshold = display_every\n", 441 | " initial_time = time.time()\n", 442 | " \n", 443 | " walltime_start = time.time()\n", 444 | " for i, (validation_ds, batch_labels, _) in enumerate(dataset):\n", 445 | " if i==0:\n", 446 | " for w in range(warm_up):\n", 447 | " _ = model_trt(validation_ds);\n", 448 | " \n", 449 | " start_time = time.time()\n", 450 | " trt_results = model_trt(validation_ds);\n", 451 | " iter_times.append(time.time() - start_time)\n", 452 | " \n", 453 | " actual_labels.extend(label for label_list in batch_labels.numpy() for label in label_list)\n", 454 | " pred_labels.extend(list(tf.argmax(trt_results['predictions'], axis=1).numpy()))\n", 455 | " if (i)*batch_size >= display_threshold:\n", 456 | " print(f'Images {(i)*batch_size}/50000. Average i/s {np.mean(batch_size/np.array(iter_times[-display_every:]))}')\n", 457 | " display_threshold+=display_every\n", 458 | " \n", 459 | " print(f'Wall time: {time.time() - walltime_start}')\n", 460 | "\n", 461 | " acc_trt = np.sum(np.array(actual_labels) == np.array(pred_labels))/len(actual_labels)\n", 462 | " iter_times = np.array(iter_times)\n", 463 | " \n", 464 | " results = pd.DataFrame(columns = [f'trt_{precision}_{batch_size}'])\n", 465 | " results.loc['instance_type'] = [requests.get('http://169.254.169.254/latest/meta-data/instance-type').text]\n", 466 | " results.loc['user_batch_size'] = [batch_size]\n", 467 | " results.loc['accuracy'] = [acc_trt]\n", 468 | " results.loc['prediction_time'] = [np.sum(iter_times)]\n", 469 | " results.loc['wall_time'] = [time.time() - walltime_start] \n", 470 | " results.loc['images_per_sec_mean'] = [np.mean(batch_size / iter_times)]\n", 471 | " results.loc['images_per_sec_std'] = [np.std(batch_size / iter_times, ddof=1)]\n", 472 | " results.loc['latency_mean'] = [np.mean(iter_times) * 1000]\n", 473 | " results.loc['latency_99th_percentile'] = [np.percentile(iter_times, q=99, interpolation=\"lower\") * 1000]\n", 474 | " results.loc['latency_median'] = [np.median(iter_times) * 1000]\n", 475 | " results.loc['latency_min'] = [np.min(iter_times) * 1000]\n", 476 | " display(results.T)\n", 477 | " \n", 478 | " return results, iter_times" 479 | ] 480 | }, 481 | { 482 | "cell_type": "code", 483 | "execution_count": 10, 484 | "metadata": {}, 485 | "outputs": [ 486 | { 487 | "name": "stdout", 488 | "output_type": "stream", 489 | "text": [ 490 | "Benchmark sweep combinations:\n", 491 | "{'batch_size': 8, 'precision': 'fp32'}\n", 492 | "{'batch_size': 8, 'precision': 'fp16'}\n", 493 | "{'batch_size': 8, 'precision': 'int8'}\n" 494 | ] 495 | } 496 | ], 497 | "source": [ 498 | "import itertools\n", 499 | "bench_options = {\n", 500 | " 'batch_size': [batch_size],\n", 501 | " 'precision': ['fp32', 'fp16', 'int8']\n", 502 | "}\n", 503 | "\n", 504 | "bname, bval = zip(*bench_options.items())\n", 505 | "blist = [dict(zip(bname, h)) for h in itertools.product(*bval)]\n", 506 | "\n", 507 | "print('Benchmark sweep combinations:')\n", 508 | "for b in blist:\n", 509 | " print(b)" 510 | ] 511 | }, 512 | { 513 | "cell_type": "code", 514 | "execution_count": 11, 515 | "metadata": {}, 516 | "outputs": [ 517 | { 518 | "name": "stdout", 519 | "output_type": "stream", 520 | "text": [ 521 | "\n", 522 | "=======================================================\n", 523 | "Benchmark results for precision: fp32, batch size: 8\n", 524 | "=======================================================\n", 525 | "\n", 526 | "INFO:tensorflow:Linked TensorRT version: (7, 1, 3)\n", 527 | "INFO:tensorflow:Loaded TensorRT version: (7, 1, 3)\n", 528 | "INFO:tensorflow:Assets written to: resnet50_trt_saved_models/resnet50_fp32_8/assets\n", 529 | "\n", 530 | "Optimized for fp32 and batch size 8, directory:resnet50_trt_saved_models/resnet50_fp32_8\n", 531 | "\n", 532 | "Images 5000/50000. Average i/s 1706.9338144076587\n", 533 | "Images 10000/50000. Average i/s 1709.7124824008995\n", 534 | "Images 15000/50000. Average i/s 1714.181552149894\n", 535 | "Images 20000/50000. Average i/s 1706.435347541865\n", 536 | "Images 25000/50000. Average i/s 1694.5647994188168\n", 537 | "Images 30000/50000. Average i/s 1686.1055872763206\n", 538 | "Images 35000/50000. Average i/s 1691.5992314594068\n", 539 | "Images 40000/50000. Average i/s 1690.6736552055474\n", 540 | "Images 45000/50000. Average i/s 1678.790774983944\n", 541 | "Wall time: 143.3079001903534\n" 542 | ] 543 | }, 544 | { 545 | "data": { 546 | "text/html": [ 547 | "
\n", 548 | "\n", 561 | "\n", 562 | " \n", 563 | " \n", 564 | " \n", 565 | " \n", 566 | " \n", 567 | " \n", 568 | " \n", 569 | " \n", 570 | " \n", 571 | " \n", 572 | " \n", 573 | " \n", 574 | " \n", 575 | " \n", 576 | " \n", 577 | " \n", 578 | " \n", 579 | " \n", 580 | " \n", 581 | " \n", 582 | " \n", 583 | " \n", 584 | " \n", 585 | " \n", 586 | " \n", 587 | " \n", 588 | " \n", 589 | " \n", 590 | " \n", 591 | " \n", 592 | " \n", 593 | " \n", 594 | "
instance_typeuser_batch_sizeaccuracyprediction_timewall_timeimages_per_sec_meanimages_per_sec_stdlatency_meanlatency_99th_percentilelatency_medianlatency_min
trt_fp32_8g4dn.xlarge80.7495638.1336143.3271666.69960.9286.1013813.7975.910631.36304
\n", 595 | "
" 596 | ], 597 | "text/plain": [ 598 | " instance_type user_batch_size accuracy prediction_time wall_time \\\n", 599 | "trt_fp32_8 g4dn.xlarge 8 0.74956 38.1336 143.327 \n", 600 | "\n", 601 | " images_per_sec_mean images_per_sec_std latency_mean \\\n", 602 | "trt_fp32_8 1666.69 960.928 6.10138 \n", 603 | "\n", 604 | " latency_99th_percentile latency_median latency_min \n", 605 | "trt_fp32_8 13.797 5.91063 1.36304 " 606 | ] 607 | }, 608 | "metadata": {}, 609 | "output_type": "display_data" 610 | }, 611 | { 612 | "name": "stdout", 613 | "output_type": "stream", 614 | "text": [ 615 | "\n", 616 | "=======================================================\n", 617 | "Benchmark results for precision: fp16, batch size: 8\n", 618 | "=======================================================\n", 619 | "\n", 620 | "INFO:tensorflow:Linked TensorRT version: (7, 1, 3)\n", 621 | "INFO:tensorflow:Loaded TensorRT version: (7, 1, 3)\n", 622 | "WARNING:tensorflow:Unresolved object in checkpoint: (root).trt_engine_resources.TRTEngineOp_0_0._serialized_trt_resource_filename\n", 623 | "WARNING:tensorflow:A checkpoint was restored (e.g. tf.train.Checkpoint.restore or tf.keras.Model.load_weights) but not all checkpointed values were used. See above for specific issues. Use expect_partial() on the load status object, e.g. tf.train.Checkpoint.restore(...).expect_partial(), to silence these warnings, or use assert_consumed() to make the check explicit. See https://www.tensorflow.org/guide/checkpoint#loading_mechanics for details.\n", 624 | "INFO:tensorflow:Assets written to: resnet50_trt_saved_models/resnet50_fp16_8/assets\n", 625 | "\n", 626 | "Optimized for fp16 and batch size 8, directory:resnet50_trt_saved_models/resnet50_fp16_8\n", 627 | "\n", 628 | "Images 5000/50000. Average i/s 1972.9929443064034\n", 629 | "Images 10000/50000. Average i/s 1931.125588304386\n", 630 | "Images 15000/50000. Average i/s 1897.5221155505612\n", 631 | "Images 20000/50000. Average i/s 1897.409528086548\n", 632 | "Images 25000/50000. Average i/s 1903.1808092268618\n", 633 | "Images 30000/50000. Average i/s 1907.7603788948525\n", 634 | "Images 35000/50000. Average i/s 1881.8081660423734\n", 635 | "Images 40000/50000. Average i/s 1811.5798501140848\n", 636 | "Images 45000/50000. Average i/s 1725.9618582526007\n", 637 | "Wall time: 135.05888485908508\n" 638 | ] 639 | }, 640 | { 641 | "data": { 642 | "text/html": [ 643 | "
\n", 644 | "\n", 657 | "\n", 658 | " \n", 659 | " \n", 660 | " \n", 661 | " \n", 662 | " \n", 663 | " \n", 664 | " \n", 665 | " \n", 666 | " \n", 667 | " \n", 668 | " \n", 669 | " \n", 670 | " \n", 671 | " \n", 672 | " \n", 673 | " \n", 674 | " \n", 675 | " \n", 676 | " \n", 677 | " \n", 678 | " \n", 679 | " \n", 680 | " \n", 681 | " \n", 682 | " \n", 683 | " \n", 684 | " \n", 685 | " \n", 686 | " \n", 687 | " \n", 688 | " \n", 689 | " \n", 690 | "
instance_typeuser_batch_sizeaccuracyprediction_timewall_timeimages_per_sec_meanimages_per_sec_stdlatency_meanlatency_99th_percentilelatency_medianlatency_min
trt_fp16_8g4dn.xlarge80.7496838.0335135.0781707.241016.376.0853614.16685.916361.43266
\n", 691 | "
" 692 | ], 693 | "text/plain": [ 694 | " instance_type user_batch_size accuracy prediction_time wall_time \\\n", 695 | "trt_fp16_8 g4dn.xlarge 8 0.74968 38.0335 135.078 \n", 696 | "\n", 697 | " images_per_sec_mean images_per_sec_std latency_mean \\\n", 698 | "trt_fp16_8 1707.24 1016.37 6.08536 \n", 699 | "\n", 700 | " latency_99th_percentile latency_median latency_min \n", 701 | "trt_fp16_8 14.1668 5.91636 1.43266 " 702 | ] 703 | }, 704 | "metadata": {}, 705 | "output_type": "display_data" 706 | }, 707 | { 708 | "name": "stdout", 709 | "output_type": "stream", 710 | "text": [ 711 | "\n", 712 | "=======================================================\n", 713 | "Benchmark results for precision: int8, batch size: 8\n", 714 | "=======================================================\n", 715 | "\n", 716 | "INFO:tensorflow:Linked TensorRT version: (7, 1, 3)\n", 717 | "INFO:tensorflow:Loaded TensorRT version: (7, 1, 3)\n", 718 | "WARNING:tensorflow:Unresolved object in checkpoint: (root).trt_engine_resources.TRTEngineOp_1_0._serialized_trt_resource_filename\n", 719 | "WARNING:tensorflow:A checkpoint was restored (e.g. tf.train.Checkpoint.restore or tf.keras.Model.load_weights) but not all checkpointed values were used. See above for specific issues. Use expect_partial() on the load status object, e.g. tf.train.Checkpoint.restore(...).expect_partial(), to silence these warnings, or use assert_consumed() to make the check explicit. See https://www.tensorflow.org/guide/checkpoint#loading_mechanics for details.\n", 720 | "INFO:tensorflow:Assets written to: resnet50_trt_saved_models/resnet50_int8_8/assets\n", 721 | "\n", 722 | "Optimized for int8 and batch size 8, directory:resnet50_trt_saved_models/resnet50_int8_8\n", 723 | "\n", 724 | "Images 5000/50000. Average i/s 1879.6287615037268\n", 725 | "Images 10000/50000. Average i/s 1890.5233308310728\n", 726 | "Images 15000/50000. Average i/s 1904.7501508674482\n", 727 | "Images 20000/50000. Average i/s 1898.7457632383791\n", 728 | "Images 25000/50000. Average i/s 1902.8776155291969\n", 729 | "Images 30000/50000. Average i/s 1898.16488970591\n", 730 | "Images 35000/50000. Average i/s 1889.473046700565\n", 731 | "Images 40000/50000. Average i/s 1894.5937887248815\n", 732 | "Images 45000/50000. Average i/s 1893.7721136475534\n", 733 | "Wall time: 133.06834959983826\n" 734 | ] 735 | }, 736 | { 737 | "data": { 738 | "text/html": [ 739 | "
\n", 740 | "\n", 753 | "\n", 754 | " \n", 755 | " \n", 756 | " \n", 757 | " \n", 758 | " \n", 759 | " \n", 760 | " \n", 761 | " \n", 762 | " \n", 763 | " \n", 764 | " \n", 765 | " \n", 766 | " \n", 767 | " \n", 768 | " \n", 769 | " \n", 770 | " \n", 771 | " \n", 772 | " \n", 773 | " \n", 774 | " \n", 775 | " \n", 776 | " \n", 777 | " \n", 778 | " \n", 779 | " \n", 780 | " \n", 781 | " \n", 782 | " \n", 783 | " \n", 784 | " \n", 785 | " \n", 786 | "
instance_typeuser_batch_sizeaccuracyprediction_timewall_timeimages_per_sec_meanimages_per_sec_stdlatency_meanlatency_99th_percentilelatency_medianlatency_min
trt_int8_8g4dn.xlarge80.7492434.3497133.0871895.031086.225.4959412.28265.272981.44053
\n", 787 | "
" 788 | ], 789 | "text/plain": [ 790 | " instance_type user_batch_size accuracy prediction_time wall_time \\\n", 791 | "trt_int8_8 g4dn.xlarge 8 0.74924 34.3497 133.087 \n", 792 | "\n", 793 | " images_per_sec_mean images_per_sec_std latency_mean \\\n", 794 | "trt_int8_8 1895.03 1086.22 5.49594 \n", 795 | "\n", 796 | " latency_99th_percentile latency_median latency_min \n", 797 | "trt_int8_8 12.2826 5.27298 1.44053 " 798 | ] 799 | }, 800 | "metadata": {}, 801 | "output_type": "display_data" 802 | }, 803 | { 804 | "data": { 805 | "text/html": [ 806 | "
\n", 807 | "\n", 820 | "\n", 821 | " \n", 822 | " \n", 823 | " \n", 824 | " \n", 825 | " \n", 826 | " \n", 827 | " \n", 828 | " \n", 829 | " \n", 830 | " \n", 831 | " \n", 832 | " \n", 833 | " \n", 834 | " \n", 835 | " \n", 836 | " \n", 837 | " \n", 838 | " \n", 839 | " \n", 840 | " \n", 841 | " \n", 842 | " \n", 843 | " \n", 844 | " \n", 845 | " \n", 846 | " \n", 847 | " \n", 848 | " \n", 849 | " \n", 850 | " \n", 851 | " \n", 852 | " \n", 853 | " \n", 854 | " \n", 855 | " \n", 856 | " \n", 857 | " \n", 858 | " \n", 859 | " \n", 860 | " \n", 861 | " \n", 862 | " \n", 863 | " \n", 864 | " \n", 865 | " \n", 866 | " \n", 867 | " \n", 868 | " \n", 869 | " \n", 870 | " \n", 871 | " \n", 872 | " \n", 873 | " \n", 874 | " \n", 875 | " \n", 876 | " \n", 877 | " \n", 878 | " \n", 879 | " \n", 880 | " \n", 881 | " \n", 882 | " \n", 883 | " \n", 884 | " \n", 885 | " \n", 886 | " \n", 887 | " \n", 888 | " \n", 889 | " \n", 890 | " \n", 891 | " \n", 892 | " \n", 893 | " \n", 894 | " \n", 895 | " \n", 896 | " \n", 897 | " \n", 898 | " \n", 899 | " \n", 900 | " \n", 901 | " \n", 902 | " \n", 903 | " \n", 904 | " \n", 905 | " \n", 906 | " \n", 907 | " \n", 908 | " \n", 909 | "
keras_gpu_8trt_fp32_8trt_fp16_8trt_int8_8
instance_typeg4dn.xlargeg4dn.xlargeg4dn.xlargeg4dn.xlarge
user_batch_size8888
accuracy0.749560.749560.749680.74924
prediction_time440.11338.133638.033534.3497
wall_time443.712143.327135.078133.087
images_per_sec_mean115.7461666.691707.241895.03
images_per_sec_std7.3476960.9281016.371086.22
latency_mean70.4186.101386.085365.49594
latency_99th_percentile84.461213.79714.166812.2826
latency_median69.02855.910635.916365.27298
latency_min62.3141.363041.432661.44053
\n", 910 | "
" 911 | ], 912 | "text/plain": [ 913 | " keras_gpu_8 trt_fp32_8 trt_fp16_8 trt_int8_8\n", 914 | "instance_type g4dn.xlarge g4dn.xlarge g4dn.xlarge g4dn.xlarge\n", 915 | "user_batch_size 8 8 8 8\n", 916 | "accuracy 0.74956 0.74956 0.74968 0.74924\n", 917 | "prediction_time 440.113 38.1336 38.0335 34.3497\n", 918 | "wall_time 443.712 143.327 135.078 133.087\n", 919 | "images_per_sec_mean 115.746 1666.69 1707.24 1895.03\n", 920 | "images_per_sec_std 7.3476 960.928 1016.37 1086.22\n", 921 | "latency_mean 70.418 6.10138 6.08536 5.49594\n", 922 | "latency_99th_percentile 84.4612 13.797 14.1668 12.2826\n", 923 | "latency_median 69.0285 5.91063 5.91636 5.27298\n", 924 | "latency_min 62.314 1.36304 1.43266 1.44053" 925 | ] 926 | }, 927 | "metadata": {}, 928 | "output_type": "display_data" 929 | } 930 | ], 931 | "source": [ 932 | "iter_ds = pd.DataFrame()\n", 933 | "\n", 934 | "if results is None:\n", 935 | " results = pd.DataFrame()\n", 936 | "\n", 937 | "col_name = lambda boption: f'trt_{boption[\"precision\"]}_{boption[\"batch_size\"]}'\n", 938 | "\n", 939 | "for boption in blist:\n", 940 | " res, it = trt_predict_benchmark(**boption)\n", 941 | " iter_ds = pd.concat([iter_ds, pd.DataFrame(it, columns=[col_name(boption)])], axis=1)\n", 942 | " results = pd.concat([results, res], axis=1)\n", 943 | "\n", 944 | "display(results)" 945 | ] 946 | }, 947 | { 948 | "cell_type": "code", 949 | "execution_count": null, 950 | "metadata": {}, 951 | "outputs": [], 952 | "source": [] 953 | } 954 | ], 955 | "metadata": { 956 | "instance_type": "ml.g4dn.4xlarge", 957 | "kernelspec": { 958 | "display_name": "Python 3", 959 | "language": "python", 960 | "name": "python3" 961 | }, 962 | "language_info": { 963 | "codemirror_mode": { 964 | "name": "ipython", 965 | "version": 3 966 | }, 967 | "file_extension": ".py", 968 | "mimetype": "text/x-python", 969 | "name": "python", 970 | "nbconvert_exporter": "python", 971 | "pygments_lexer": "ipython3", 972 | "version": "3.6.9" 973 | } 974 | }, 975 | "nbformat": 4, 976 | "nbformat_minor": 4 977 | } 978 | -------------------------------------------------------------------------------- /inf1-neuron-sdk-resnet50.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# AWS Inferentia inference on Amazon EC2 Inf1 instance\n", 8 | "This example demonstrates AWS Inferentia inference with TensorFlow and AWS Neuron SDK compiler and runtime\n", 9 | "\n", 10 | "This example was tested on Amazon EC2 `inf1.xlarge` the following AWS Deep Learning AMI: \n", 11 | "`Deep Learning AMI (Ubuntu 18.04) Version 35.0`\n", 12 | "\n", 13 | "Run this notebook using the following conda environment:\n", 14 | "`aws_neuron_tensorflow_p36`\n", 15 | "\n", 16 | "Prepare your imagenet validation TFRecord files using the following helper script:\n", 17 | "https://github.com/tensorflow/models/blob/archive/research/inception/inception/data/download_and_preprocess_imagenet.sh\n", 18 | "\n", 19 | "Save it to `/home/ubuntu/datasets/` or update the dataset location in the `get_dataset()` function" 20 | ] 21 | }, 22 | { 23 | "cell_type": "code", 24 | "execution_count": 1, 25 | "metadata": {}, 26 | "outputs": [], 27 | "source": [ 28 | "# !pip install matplotlib pandas" 29 | ] 30 | }, 31 | { 32 | "cell_type": "code", 33 | "execution_count": 2, 34 | "metadata": {}, 35 | "outputs": [], 36 | "source": [ 37 | "!/opt/aws/neuron/bin/neuron-cli reset\n", 38 | "import os\n", 39 | "import time\n", 40 | "import shutil\n", 41 | "import json\n", 42 | "import requests\n", 43 | "import numpy as np\n", 44 | "import pandas as pd\n", 45 | "import tensorflow as tf\n", 46 | "import tensorflow.neuron as tfn\n", 47 | "import tensorflow.compat.v1.keras as keras\n", 48 | "from tensorflow.keras.applications.resnet50 import ResNet50, preprocess_input\n", 49 | "from tensorflow.keras.preprocessing import image\n", 50 | "from concurrent import futures\n", 51 | "from itertools import compress" 52 | ] 53 | }, 54 | { 55 | "cell_type": "code", 56 | "execution_count": 3, 57 | "metadata": {}, 58 | "outputs": [], 59 | "source": [ 60 | "# https://github.com/tensorflow/tensorflow/issues/29931\n", 61 | "temp = tf.zeros([8, 224, 224, 3])\n", 62 | "_ = tf.keras.applications.resnet50.preprocess_input(temp)" 63 | ] 64 | }, 65 | { 66 | "cell_type": "markdown", 67 | "metadata": {}, 68 | "source": [ 69 | "### Resnet50 FP32 saved model" 70 | ] 71 | }, 72 | { 73 | "cell_type": "code", 74 | "execution_count": 4, 75 | "metadata": {}, 76 | "outputs": [ 77 | { 78 | "name": "stdout", 79 | "output_type": "stream", 80 | "text": [ 81 | "WARNING:tensorflow:From /home/ubuntu/anaconda3/envs/aws_neuron_tensorflow_p36/lib/python3.6/site-packages/tensorflow_core/python/ops/resource_variable_ops.py:1630: calling BaseResourceVariable.__init__ (from tensorflow.python.ops.resource_variable_ops) with constraint is deprecated and will be removed in a future version.\n", 82 | "Instructions for updating:\n", 83 | "If using Keras pass *_constraint arguments to layers.\n", 84 | "WARNING:tensorflow:From :10: simple_save (from tensorflow.python.saved_model.simple_save) is deprecated and will be removed in a future version.\n", 85 | "Instructions for updating:\n", 86 | "This function will only be available through the v1 compatibility library as tf.compat.v1.saved_model.simple_save.\n", 87 | "WARNING:tensorflow:From /home/ubuntu/anaconda3/envs/aws_neuron_tensorflow_p36/lib/python3.6/site-packages/tensorflow_core/python/saved_model/signature_def_utils_impl.py:201: build_tensor_info (from tensorflow.python.saved_model.utils_impl) is deprecated and will be removed in a future version.\n", 88 | "Instructions for updating:\n", 89 | "This function will only be available through the v1 compatibility library as tf.compat.v1.saved_model.utils.build_tensor_info or tf.compat.v1.saved_model.build_tensor_info.\n", 90 | "INFO:tensorflow:Assets added to graph.\n", 91 | "INFO:tensorflow:No assets to write.\n", 92 | "INFO:tensorflow:SavedModel written to: resnet50_saved_model/saved_model.pb\n" 93 | ] 94 | } 95 | ], 96 | "source": [ 97 | "# Export SavedModel\n", 98 | "saved_model_dir = 'resnet50_saved_model'\n", 99 | "shutil.rmtree(saved_model_dir, ignore_errors=True)\n", 100 | "\n", 101 | "keras.backend.set_learning_phase(0)\n", 102 | "model = ResNet50(weights='imagenet')\n", 103 | "tf.saved_model.simple_save(session = keras.backend.get_session(),\n", 104 | " export_dir = saved_model_dir,\n", 105 | " inputs = {'input_1:0': model.inputs[0]},\n", 106 | " outputs = {'probs/Softmax:0': model.outputs[0]})" 107 | ] 108 | }, 109 | { 110 | "cell_type": "markdown", 111 | "metadata": {}, 112 | "source": [ 113 | "### Compile models with different batch sizes and cores" 114 | ] 115 | }, 116 | { 117 | "cell_type": "code", 118 | "execution_count": 5, 119 | "metadata": {}, 120 | "outputs": [], 121 | "source": [ 122 | "def compile_inf1_model(saved_model_dir, inf1_model_dir, batch_size=1, num_cores=1, use_static_weights=False):\n", 123 | " print(f'-----------batch size: {batch_size}, num cores: {num_cores}----------')\n", 124 | " print('Compiling...')\n", 125 | " \n", 126 | " compiled_model_dir = f'resnet50_batch_{batch_size}_inf1_cores_{num_cores}'\n", 127 | " inf1_compiled_model_dir = os.path.join(inf1_model_dir, compiled_model_dir)\n", 128 | " shutil.rmtree(inf1_compiled_model_dir, ignore_errors=True)\n", 129 | "\n", 130 | " example_input = np.zeros([batch_size,224,224,3], dtype='float32')\n", 131 | "\n", 132 | " compiler_args = ['--verbose','1', '--num-neuroncores', str(num_cores)]\n", 133 | " if use_static_weights:\n", 134 | " compiler_args.append('--static-weights')\n", 135 | " \n", 136 | " start_time = time.time()\n", 137 | " compiled_res = tfn.saved_model.compile(model_dir = saved_model_dir,\n", 138 | " model_feed_dict={'input_1:0': example_input},\n", 139 | " new_model_dir = inf1_compiled_model_dir,\n", 140 | " dynamic_batch_size=True,\n", 141 | " compiler_workdir=f'./compiler-workdir/{inf1_compiled_model_dir}',\n", 142 | " compiler_args = compiler_args)\n", 143 | " print(f'Compile time: {time.time() - start_time}')\n", 144 | " \n", 145 | " compile_success = False\n", 146 | " perc_on_inf = compiled_res['OnNeuronRatio'] * 100\n", 147 | " if perc_on_inf > 50:\n", 148 | " compile_success = True\n", 149 | " \n", 150 | " print(inf1_compiled_model_dir)\n", 151 | " print(compiled_res)\n", 152 | " print('----------- Done! ----------- \\n')\n", 153 | " \n", 154 | " return compile_success" 155 | ] 156 | }, 157 | { 158 | "cell_type": "markdown", 159 | "metadata": {}, 160 | "source": [ 161 | "### Use `tf.data` to read ImageNet validation dataset" 162 | ] 163 | }, 164 | { 165 | "cell_type": "code", 166 | "execution_count": 6, 167 | "metadata": {}, 168 | "outputs": [], 169 | "source": [ 170 | "def deserialize_image_record(record):\n", 171 | " feature_map = {'image/encoded': tf.io.FixedLenFeature([], tf.string, ''),\n", 172 | " 'image/class/label': tf.io.FixedLenFeature([1], tf.int64, -1),\n", 173 | " 'image/class/text': tf.io.FixedLenFeature([], tf.string, '')}\n", 174 | " obj = tf.io.parse_single_example(serialized=record, features=feature_map)\n", 175 | " imgdata = obj['image/encoded']\n", 176 | " label = tf.cast(obj['image/class/label'], tf.int32) \n", 177 | " label_text = tf.cast(obj['image/class/text'], tf.string) \n", 178 | " return imgdata, label, label_text\n", 179 | "\n", 180 | "def val_preprocessing(record):\n", 181 | " imgdata, label, label_text = deserialize_image_record(record)\n", 182 | " label -= 1\n", 183 | " image = tf.io.decode_jpeg(imgdata, channels=3, \n", 184 | " fancy_upscaling=False, \n", 185 | " dct_method='INTEGER_FAST')\n", 186 | "\n", 187 | " shape = tf.shape(image)\n", 188 | " height = tf.cast(shape[0], tf.float32)\n", 189 | " width = tf.cast(shape[1], tf.float32)\n", 190 | " side = tf.cast(tf.convert_to_tensor(256, dtype=tf.int32), tf.float32)\n", 191 | "\n", 192 | " scale = tf.cond(tf.greater(height, width),\n", 193 | " lambda: side / width,\n", 194 | " lambda: side / height)\n", 195 | " \n", 196 | " new_height = tf.cast(tf.math.rint(height * scale), tf.int32)\n", 197 | " new_width = tf.cast(tf.math.rint(width * scale), tf.int32)\n", 198 | " \n", 199 | " image = tf.image.resize(image, [new_height, new_width], method='bicubic')\n", 200 | " image = tf.image.resize_with_crop_or_pad(image, 224, 224)\n", 201 | " \n", 202 | " image = tf.keras.applications.resnet50.preprocess_input(image)\n", 203 | " \n", 204 | " return image, label, label_text\n", 205 | "\n", 206 | "def get_dataset(batch_size, use_cache=False):\n", 207 | " data_dir = '/home/ubuntu/datasets/*'\n", 208 | " files = tf.io.gfile.glob(os.path.join(data_dir))\n", 209 | " dataset = tf.data.TFRecordDataset(files)\n", 210 | " \n", 211 | " dataset = dataset.map(map_func=val_preprocessing, num_parallel_calls=tf.data.experimental.AUTOTUNE)\n", 212 | " dataset = dataset.batch(batch_size=batch_size)\n", 213 | " dataset = dataset.prefetch(buffer_size=tf.data.experimental.AUTOTUNE)\n", 214 | " dataset = dataset.repeat(count=1)\n", 215 | " \n", 216 | " if use_cache:\n", 217 | " shutil.rmtree('tfdatacache', ignore_errors=True)\n", 218 | " os.mkdir('tfdatacache')\n", 219 | " dataset = dataset.cache(f'./tfdatacache/imagenet_val')\n", 220 | " \n", 221 | " return dataset" 222 | ] 223 | }, 224 | { 225 | "cell_type": "markdown", 226 | "metadata": {}, 227 | "source": [ 228 | "## Single AWS Inferentia chip execution\n", 229 | "* Single core compiled models with automatic data parallel model upto 4 cores\n", 230 | "* Multi-core compiled models for pipeline execution" 231 | ] 232 | }, 233 | { 234 | "cell_type": "code", 235 | "execution_count": 7, 236 | "metadata": { 237 | "scrolled": true 238 | }, 239 | "outputs": [], 240 | "source": [ 241 | "def inf1_predict_benchmark_single_threaded(neuron_saved_model_name, batch_size, user_batch_size, num_cores, use_cache=False, warm_up=10):\n", 242 | " print(f'Running model {neuron_saved_model_name}, user_batch_size: {user_batch_size}\\n')\n", 243 | "\n", 244 | " model_inf1 = tf.contrib.predictor.from_saved_model(neuron_saved_model_name)\n", 245 | "\n", 246 | " iter_times = []\n", 247 | " pred_labels = []\n", 248 | " actual_labels = []\n", 249 | " display_threshold = 0\n", 250 | " warm_up = 10\n", 251 | "\n", 252 | " ds = get_dataset(user_batch_size, use_cache)\n", 253 | "\n", 254 | " ds_iter = ds.make_initializable_iterator()\n", 255 | " ds_next = ds_iter.get_next()\n", 256 | " ds_init_op = ds_iter.initializer\n", 257 | "\n", 258 | " with tf.Session() as sess:\n", 259 | " if use_cache:\n", 260 | " sess.run(ds_init_op)\n", 261 | " print('\\nCaching dataset ...')\n", 262 | " start_time = time.time()\n", 263 | " try:\n", 264 | " while True:\n", 265 | " (validation_ds,label,_) = sess.run(ds_next)\n", 266 | " except tf.errors.OutOfRangeError:\n", 267 | " pass\n", 268 | " print(f'Caching finished: {time.time()-start_time} sec') \n", 269 | "\n", 270 | " try:\n", 271 | " sess.run(ds_init_op)\n", 272 | " counter = 0\n", 273 | " \n", 274 | " display_every = 5000\n", 275 | " display_threshold = display_every\n", 276 | " \n", 277 | " ipname = list(model_inf1.feed_tensors.keys())[0]\n", 278 | " resname = list(model_inf1.fetch_tensors.keys())[0]\n", 279 | " \n", 280 | " walltime_start = time.time()\n", 281 | "\n", 282 | " while True:\n", 283 | " (validation_ds,batch_labels,_) = sess.run(ds_next)\n", 284 | "\n", 285 | " model_feed_dict={ipname: validation_ds}\n", 286 | "\n", 287 | " if counter == 0:\n", 288 | " for i in range(warm_up):\n", 289 | " _ = model_inf1(model_feed_dict); \n", 290 | "\n", 291 | " start_time = time.time()\n", 292 | " inf1_results = model_inf1(model_feed_dict);\n", 293 | " iter_times.append(time.time() - start_time)\n", 294 | " \n", 295 | " actual_labels.extend(label for label_list in batch_labels for label in label_list)\n", 296 | " pred_labels.extend(list(np.argmax(inf1_results[resname], axis=1)))\n", 297 | "\n", 298 | " if counter*user_batch_size >= display_threshold:\n", 299 | " print(f'Images {counter*user_batch_size}/50000. Average i/s {np.mean(user_batch_size/np.array(iter_times[-display_every:]))}')\n", 300 | " display_threshold+=display_every\n", 301 | "\n", 302 | " counter+=1\n", 303 | "\n", 304 | " except tf.errors.OutOfRangeError:\n", 305 | " pass\n", 306 | " \n", 307 | " acc_inf1 = np.sum(np.array(actual_labels) == np.array(pred_labels))/len(actual_labels)\n", 308 | " iter_times = np.array(iter_times)\n", 309 | " \n", 310 | " results = pd.DataFrame(columns = [f'inf1_compiled_batch_size_{batch_size}_compiled_cores_{num_cores}'])\n", 311 | " results.loc['instance_type'] = [requests.get('http://169.254.169.254/latest/meta-data/instance-type').text]\n", 312 | " results.loc['compiled_batch_size'] = [batch_size]\n", 313 | " results.loc['user_batch_size'] = [user_batch_size]\n", 314 | " results.loc['accuracy'] = [acc_inf1]\n", 315 | " results.loc['prediction_time'] = [np.sum(iter_times)]\n", 316 | " results.loc['wall_time'] = [time.time() - walltime_start]\n", 317 | " results.loc['images_per_sec_mean'] = [np.mean(user_batch_size / iter_times)]\n", 318 | " results.loc['images_per_sec_std'] = [np.std(user_batch_size / iter_times, ddof=1)]\n", 319 | " results.loc['latency_mean'] = [np.mean(iter_times) * 1000]\n", 320 | " results.loc['latency_99th_percentile'] = [np.percentile(iter_times, q=99, interpolation=\"lower\") * 1000]\n", 321 | " results.loc['latency_median'] = [np.median(iter_times) * 1000]\n", 322 | " results.loc['latency_min'] = [np.min(iter_times) * 1000]\n", 323 | " display(results.T)\n", 324 | "\n", 325 | " return results, iter_times" 326 | ] 327 | }, 328 | { 329 | "cell_type": "code", 330 | "execution_count": 8, 331 | "metadata": {}, 332 | "outputs": [ 333 | { 334 | "name": "stdout", 335 | "output_type": "stream", 336 | "text": [ 337 | "-----------batch size: 1, num cores: 1----------\n", 338 | "Compiling...\n", 339 | "INFO:tensorflow:Restoring parameters from resnet50_saved_model/variables/variables\n", 340 | "INFO:tensorflow:Froze 320 variables.\n", 341 | "INFO:tensorflow:Converted 320 variables to const ops.\n", 342 | "INFO:tensorflow:fusing subgraph neuron_op_d6f098c01c780733 with neuron-cc; log file is at /home/ubuntu/examples/bkp/ai-accelerators-examples/compiler-workdir/resnet50_inf1_saved_models/resnet50_batch_1_inf1_cores_1/neuron_op_d6f098c01c780733/graph_def.neuron-cc.log\n", 343 | "INFO:tensorflow:Number of operations in TensorFlow session: 4647\n", 344 | "INFO:tensorflow:Number of operations after tf.neuron optimizations: 556\n", 345 | "INFO:tensorflow:Number of operations placed on Neuron runtime: 554\n", 346 | "INFO:tensorflow:No assets to save.\n", 347 | "INFO:tensorflow:No assets to write.\n", 348 | "INFO:tensorflow:SavedModel written to: resnet50_inf1_saved_models/resnet50_batch_1_inf1_cores_1/saved_model.pb\n", 349 | "INFO:tensorflow:Successfully converted resnet50_saved_model to resnet50_inf1_saved_models/resnet50_batch_1_inf1_cores_1\n", 350 | "Compile time: 57.83445167541504\n", 351 | "resnet50_inf1_saved_models/resnet50_batch_1_inf1_cores_1\n", 352 | "{'OnNeuronRatio': 0.9964028776978417}\n", 353 | "----------- Done! ----------- \n", 354 | "\n", 355 | "-----------batch size: 5, num cores: 1----------\n", 356 | "Compiling...\n", 357 | "INFO:tensorflow:Restoring parameters from resnet50_saved_model/variables/variables\n", 358 | "INFO:tensorflow:Froze 320 variables.\n", 359 | "INFO:tensorflow:Converted 320 variables to const ops.\n", 360 | "INFO:tensorflow:fusing subgraph neuron_op_d6f098c01c780733 with neuron-cc; log file is at /home/ubuntu/examples/bkp/ai-accelerators-examples/compiler-workdir/resnet50_inf1_saved_models/resnet50_batch_5_inf1_cores_1/neuron_op_d6f098c01c780733/graph_def.neuron-cc.log\n", 361 | "INFO:tensorflow:Number of operations in TensorFlow session: 4647\n", 362 | "INFO:tensorflow:Number of operations after tf.neuron optimizations: 556\n", 363 | "INFO:tensorflow:Number of operations placed on Neuron runtime: 554\n", 364 | "INFO:tensorflow:No assets to save.\n", 365 | "INFO:tensorflow:No assets to write.\n", 366 | "INFO:tensorflow:SavedModel written to: resnet50_inf1_saved_models/resnet50_batch_5_inf1_cores_1/saved_model.pb\n", 367 | "INFO:tensorflow:Successfully converted resnet50_saved_model to resnet50_inf1_saved_models/resnet50_batch_5_inf1_cores_1\n", 368 | "Compile time: 96.24623918533325\n", 369 | "resnet50_inf1_saved_models/resnet50_batch_5_inf1_cores_1\n", 370 | "{'OnNeuronRatio': 0.9964028776978417}\n", 371 | "----------- Done! ----------- \n", 372 | "\n", 373 | "-----------batch size: 1, num cores: 4----------\n", 374 | "Compiling...\n", 375 | "INFO:tensorflow:Restoring parameters from resnet50_saved_model/variables/variables\n", 376 | "INFO:tensorflow:Froze 320 variables.\n", 377 | "INFO:tensorflow:Converted 320 variables to const ops.\n", 378 | "INFO:tensorflow:fusing subgraph neuron_op_d6f098c01c780733 with neuron-cc; log file is at /home/ubuntu/examples/bkp/ai-accelerators-examples/compiler-workdir/resnet50_inf1_saved_models/resnet50_batch_1_inf1_cores_4/neuron_op_d6f098c01c780733/graph_def.neuron-cc.log\n", 379 | "INFO:tensorflow:Number of operations in TensorFlow session: 4647\n", 380 | "INFO:tensorflow:Number of operations after tf.neuron optimizations: 556\n", 381 | "INFO:tensorflow:Number of operations placed on Neuron runtime: 554\n", 382 | "INFO:tensorflow:No assets to save.\n", 383 | "INFO:tensorflow:No assets to write.\n", 384 | "INFO:tensorflow:SavedModel written to: resnet50_inf1_saved_models/resnet50_batch_1_inf1_cores_4/saved_model.pb\n", 385 | "INFO:tensorflow:Successfully converted resnet50_saved_model to resnet50_inf1_saved_models/resnet50_batch_1_inf1_cores_4\n", 386 | "Compile time: 63.46157956123352\n", 387 | "resnet50_inf1_saved_models/resnet50_batch_1_inf1_cores_4\n", 388 | "{'OnNeuronRatio': 0.9964028776978417}\n", 389 | "----------- Done! ----------- \n", 390 | "\n" 391 | ] 392 | }, 393 | { 394 | "data": { 395 | "text/plain": [ 396 | "True" 397 | ] 398 | }, 399 | "execution_count": 8, 400 | "metadata": {}, 401 | "output_type": "execute_result" 402 | } 403 | ], 404 | "source": [ 405 | "inf1_model_dir = 'resnet50_inf1_saved_models'\n", 406 | "saved_model_dir = 'resnet50_saved_model'\n", 407 | "\n", 408 | "compile_inf1_model(saved_model_dir, inf1_model_dir, batch_size=1, num_cores=1)\n", 409 | "compile_inf1_model(saved_model_dir, inf1_model_dir, batch_size=5, num_cores=1)\n", 410 | "compile_inf1_model(saved_model_dir, inf1_model_dir, batch_size=1, num_cores=4)" 411 | ] 412 | }, 413 | { 414 | "cell_type": "code", 415 | "execution_count": 9, 416 | "metadata": {}, 417 | "outputs": [ 418 | { 419 | "name": "stdout", 420 | "output_type": "stream", 421 | "text": [ 422 | "inf1_compiled_model_dir: resnet50_inf1_saved_models/resnet50_batch_1_inf1_cores_1\n", 423 | "Running model resnet50_inf1_saved_models/resnet50_batch_1_inf1_cores_1, user_batch_size: 10\n", 424 | "\n", 425 | "WARNING:tensorflow:\n", 426 | "The TensorFlow contrib module will not be included in TensorFlow 2.0.\n", 427 | "For more information, please see:\n", 428 | " * https://github.com/tensorflow/community/blob/master/rfcs/20180907-contrib-sunset.md\n", 429 | " * https://github.com/tensorflow/addons\n", 430 | " * https://github.com/tensorflow/io (for I/O related ops)\n", 431 | "If you depend on functionality not listed there, please file an issue.\n", 432 | "\n", 433 | "WARNING:tensorflow:From /home/ubuntu/anaconda3/envs/aws_neuron_tensorflow_p36/lib/python3.6/site-packages/tensorflow_core/contrib/predictor/saved_model_predictor.py:153: load (from tensorflow.python.saved_model.loader_impl) is deprecated and will be removed in a future version.\n", 434 | "Instructions for updating:\n", 435 | "This function will only be available through the v1 compatibility library as tf.compat.v1.saved_model.loader.load or tf.compat.v1.saved_model.load. There will be a new function for importing SavedModels in Tensorflow 2.0.\n", 436 | "INFO:tensorflow:Saver not created because there are no variables in the graph to restore\n", 437 | "INFO:tensorflow:The specified SavedModel has no variables; no checkpoints were restored.\n", 438 | "WARNING:tensorflow:From :14: DatasetV1.make_initializable_iterator (from tensorflow.python.data.ops.dataset_ops) is deprecated and will be removed in a future version.\n", 439 | "Instructions for updating:\n", 440 | "Use `for ... in dataset:` to iterate over a dataset. If using `tf.estimator`, return the `Dataset` object directly from your input function. As a last resort, you can use `tf.compat.v1.data.make_initializable_iterator(dataset)`.\n", 441 | "Images 5000/50000. Average i/s 511.9149005447497\n", 442 | "Images 10000/50000. Average i/s 514.1476835875276\n", 443 | "Images 15000/50000. Average i/s 511.55752611295105\n", 444 | "Images 20000/50000. Average i/s 510.6258382445502\n", 445 | "Images 25000/50000. Average i/s 510.6002877210464\n", 446 | "Images 30000/50000. Average i/s 510.33624960724376\n", 447 | "Images 35000/50000. Average i/s 510.23169540573906\n", 448 | "Images 40000/50000. Average i/s 509.62934416741103\n", 449 | "Images 45000/50000. Average i/s 509.851016680137\n" 450 | ] 451 | }, 452 | { 453 | "data": { 454 | "text/html": [ 455 | "
\n", 456 | "\n", 469 | "\n", 470 | " \n", 471 | " \n", 472 | " \n", 473 | " \n", 474 | " \n", 475 | " \n", 476 | " \n", 477 | " \n", 478 | " \n", 479 | " \n", 480 | " \n", 481 | " \n", 482 | " \n", 483 | " \n", 484 | " \n", 485 | " \n", 486 | " \n", 487 | " \n", 488 | " \n", 489 | " \n", 490 | " \n", 491 | " \n", 492 | " \n", 493 | " \n", 494 | " \n", 495 | " \n", 496 | " \n", 497 | " \n", 498 | " \n", 499 | " \n", 500 | " \n", 501 | " \n", 502 | " \n", 503 | " \n", 504 | "
instance_typecompiled_batch_sizeuser_batch_sizeaccuracyprediction_timewall_timeimages_per_sec_meanimages_per_sec_stdlatency_meanlatency_99th_percentilelatency_medianlatency_min
inf1_compiled_batch_size_1_compiled_cores_1inf1.xlarge1100.7485299.4977146.44509.89859.350919.899527.009519.451615.7409
\n", 505 | "
" 506 | ], 507 | "text/plain": [ 508 | " instance_type compiled_batch_size \\\n", 509 | "inf1_compiled_batch_size_1_compiled_cores_1 inf1.xlarge 1 \n", 510 | "\n", 511 | " user_batch_size accuracy \\\n", 512 | "inf1_compiled_batch_size_1_compiled_cores_1 10 0.74852 \n", 513 | "\n", 514 | " prediction_time wall_time \\\n", 515 | "inf1_compiled_batch_size_1_compiled_cores_1 99.4977 146.44 \n", 516 | "\n", 517 | " images_per_sec_mean \\\n", 518 | "inf1_compiled_batch_size_1_compiled_cores_1 509.898 \n", 519 | "\n", 520 | " images_per_sec_std latency_mean \\\n", 521 | "inf1_compiled_batch_size_1_compiled_cores_1 59.3509 19.8995 \n", 522 | "\n", 523 | " latency_99th_percentile \\\n", 524 | "inf1_compiled_batch_size_1_compiled_cores_1 27.0095 \n", 525 | "\n", 526 | " latency_median latency_min \n", 527 | "inf1_compiled_batch_size_1_compiled_cores_1 19.4516 15.7409 " 528 | ] 529 | }, 530 | "metadata": {}, 531 | "output_type": "display_data" 532 | }, 533 | { 534 | "name": "stdout", 535 | "output_type": "stream", 536 | "text": [ 537 | "inf1_compiled_model_dir: resnet50_inf1_saved_models/resnet50_batch_5_inf1_cores_1\n", 538 | "Running model resnet50_inf1_saved_models/resnet50_batch_5_inf1_cores_1, user_batch_size: 50\n", 539 | "\n", 540 | "INFO:tensorflow:Saver not created because there are no variables in the graph to restore\n", 541 | "INFO:tensorflow:The specified SavedModel has no variables; no checkpoints were restored.\n", 542 | "Images 5000/50000. Average i/s 756.9368826530499\n", 543 | "Images 10000/50000. Average i/s 758.3658209888853\n", 544 | "Images 15000/50000. Average i/s 758.889930748228\n", 545 | "Images 20000/50000. Average i/s 760.4266183337617\n", 546 | "Images 25000/50000. Average i/s 760.8172446072302\n", 547 | "Images 30000/50000. Average i/s 760.2700641837997\n", 548 | "Images 35000/50000. Average i/s 759.5763119031232\n", 549 | "Images 40000/50000. Average i/s 760.0876622928208\n", 550 | "Images 45000/50000. Average i/s 760.6692409949743\n" 551 | ] 552 | }, 553 | { 554 | "data": { 555 | "text/html": [ 556 | "
\n", 557 | "\n", 570 | "\n", 571 | " \n", 572 | " \n", 573 | " \n", 574 | " \n", 575 | " \n", 576 | " \n", 577 | " \n", 578 | " \n", 579 | " \n", 580 | " \n", 581 | " \n", 582 | " \n", 583 | " \n", 584 | " \n", 585 | " \n", 586 | " \n", 587 | " \n", 588 | " \n", 589 | " \n", 590 | " \n", 591 | " \n", 592 | " \n", 593 | " \n", 594 | " \n", 595 | " \n", 596 | " \n", 597 | " \n", 598 | " \n", 599 | " \n", 600 | " \n", 601 | " \n", 602 | " \n", 603 | " \n", 604 | " \n", 605 | "
instance_typecompiled_batch_sizeuser_batch_sizeaccuracyprediction_timewall_timeimages_per_sec_meanimages_per_sec_stdlatency_meanlatency_99th_percentilelatency_medianlatency_min
inf1_compiled_batch_size_5_compiled_cores_1inf1.xlarge5500.748666.1318143.338760.85259.786366.131880.052965.834556.5889
\n", 606 | "
" 607 | ], 608 | "text/plain": [ 609 | " instance_type compiled_batch_size \\\n", 610 | "inf1_compiled_batch_size_5_compiled_cores_1 inf1.xlarge 5 \n", 611 | "\n", 612 | " user_batch_size accuracy \\\n", 613 | "inf1_compiled_batch_size_5_compiled_cores_1 50 0.7486 \n", 614 | "\n", 615 | " prediction_time wall_time \\\n", 616 | "inf1_compiled_batch_size_5_compiled_cores_1 66.1318 143.338 \n", 617 | "\n", 618 | " images_per_sec_mean \\\n", 619 | "inf1_compiled_batch_size_5_compiled_cores_1 760.852 \n", 620 | "\n", 621 | " images_per_sec_std latency_mean \\\n", 622 | "inf1_compiled_batch_size_5_compiled_cores_1 59.7863 66.1318 \n", 623 | "\n", 624 | " latency_99th_percentile \\\n", 625 | "inf1_compiled_batch_size_5_compiled_cores_1 80.0529 \n", 626 | "\n", 627 | " latency_median latency_min \n", 628 | "inf1_compiled_batch_size_5_compiled_cores_1 65.8345 56.5889 " 629 | ] 630 | }, 631 | "metadata": {}, 632 | "output_type": "display_data" 633 | }, 634 | { 635 | "name": "stdout", 636 | "output_type": "stream", 637 | "text": [ 638 | "inf1_compiled_model_dir: resnet50_inf1_saved_models/resnet50_batch_1_inf1_cores_4\n", 639 | "Running model resnet50_inf1_saved_models/resnet50_batch_1_inf1_cores_4, user_batch_size: 10\n", 640 | "\n", 641 | "INFO:tensorflow:Saver not created because there are no variables in the graph to restore\n", 642 | "INFO:tensorflow:The specified SavedModel has no variables; no checkpoints were restored.\n", 643 | "Images 5000/50000. Average i/s 361.0220527406529\n", 644 | "Images 10000/50000. Average i/s 359.68171067062457\n", 645 | "Images 15000/50000. Average i/s 358.75999045862824\n", 646 | "Images 20000/50000. Average i/s 358.28821764290626\n", 647 | "Images 25000/50000. Average i/s 357.87710423546616\n", 648 | "Images 30000/50000. Average i/s 358.12412006408044\n", 649 | "Images 35000/50000. Average i/s 358.09136022506436\n", 650 | "Images 40000/50000. Average i/s 357.7498270103637\n", 651 | "Images 45000/50000. Average i/s 357.8152796870437\n" 652 | ] 653 | }, 654 | { 655 | "data": { 656 | "text/html": [ 657 | "
\n", 658 | "\n", 671 | "\n", 672 | " \n", 673 | " \n", 674 | " \n", 675 | " \n", 676 | " \n", 677 | " \n", 678 | " \n", 679 | " \n", 680 | " \n", 681 | " \n", 682 | " \n", 683 | " \n", 684 | " \n", 685 | " \n", 686 | " \n", 687 | " \n", 688 | " \n", 689 | " \n", 690 | " \n", 691 | " \n", 692 | " \n", 693 | " \n", 694 | " \n", 695 | " \n", 696 | " \n", 697 | " \n", 698 | " \n", 699 | " \n", 700 | " \n", 701 | " \n", 702 | " \n", 703 | " \n", 704 | " \n", 705 | " \n", 706 | "
instance_typecompiled_batch_sizeuser_batch_sizeaccuracyprediction_timewall_timeimages_per_sec_meanimages_per_sec_stdlatency_meanlatency_99th_percentilelatency_medianlatency_min
inf1_compiled_batch_size_1_compiled_cores_4inf1.xlarge1100.749140.423147.618357.97425.120328.084634.50227.550325.4283
\n", 707 | "
" 708 | ], 709 | "text/plain": [ 710 | " instance_type compiled_batch_size \\\n", 711 | "inf1_compiled_batch_size_1_compiled_cores_4 inf1.xlarge 1 \n", 712 | "\n", 713 | " user_batch_size accuracy \\\n", 714 | "inf1_compiled_batch_size_1_compiled_cores_4 10 0.749 \n", 715 | "\n", 716 | " prediction_time wall_time \\\n", 717 | "inf1_compiled_batch_size_1_compiled_cores_4 140.423 147.618 \n", 718 | "\n", 719 | " images_per_sec_mean \\\n", 720 | "inf1_compiled_batch_size_1_compiled_cores_4 357.974 \n", 721 | "\n", 722 | " images_per_sec_std latency_mean \\\n", 723 | "inf1_compiled_batch_size_1_compiled_cores_4 25.1203 28.0846 \n", 724 | "\n", 725 | " latency_99th_percentile \\\n", 726 | "inf1_compiled_batch_size_1_compiled_cores_4 34.502 \n", 727 | "\n", 728 | " latency_median latency_min \n", 729 | "inf1_compiled_batch_size_1_compiled_cores_4 27.5503 25.4283 " 730 | ] 731 | }, 732 | "metadata": {}, 733 | "output_type": "display_data" 734 | }, 735 | { 736 | "data": { 737 | "text/html": [ 738 | "
\n", 739 | "\n", 752 | "\n", 753 | " \n", 754 | " \n", 755 | " \n", 756 | " \n", 757 | " \n", 758 | " \n", 759 | " \n", 760 | " \n", 761 | " \n", 762 | " \n", 763 | " \n", 764 | " \n", 765 | " \n", 766 | " \n", 767 | " \n", 768 | " \n", 769 | " \n", 770 | " \n", 771 | " \n", 772 | " \n", 773 | " \n", 774 | " \n", 775 | " \n", 776 | " \n", 777 | " \n", 778 | " \n", 779 | " \n", 780 | " \n", 781 | " \n", 782 | " \n", 783 | " \n", 784 | " \n", 785 | " \n", 786 | " \n", 787 | " \n", 788 | " \n", 789 | " \n", 790 | " \n", 791 | " \n", 792 | " \n", 793 | " \n", 794 | " \n", 795 | " \n", 796 | " \n", 797 | " \n", 798 | " \n", 799 | " \n", 800 | " \n", 801 | " \n", 802 | " \n", 803 | " \n", 804 | " \n", 805 | " \n", 806 | " \n", 807 | " \n", 808 | " \n", 809 | " \n", 810 | " \n", 811 | " \n", 812 | " \n", 813 | " \n", 814 | " \n", 815 | " \n", 816 | " \n", 817 | " \n", 818 | " \n", 819 | " \n", 820 | " \n", 821 | " \n", 822 | " \n", 823 | " \n", 824 | " \n", 825 | " \n", 826 | " \n", 827 | " \n", 828 | " \n", 829 | " \n", 830 | " \n", 831 | " \n", 832 | " \n", 833 | " \n", 834 | " \n", 835 | "
inf1_compiled_batch_size_1_compiled_cores_1inf1_compiled_batch_size_5_compiled_cores_1inf1_compiled_batch_size_1_compiled_cores_4
instance_typeinf1.xlargeinf1.xlargeinf1.xlarge
compiled_batch_size151
user_batch_size105010
accuracy0.748520.74860.749
prediction_time99.497766.1318140.423
wall_time146.44143.338147.618
images_per_sec_mean509.898760.852357.974
images_per_sec_std59.350959.786325.1203
latency_mean19.899566.131828.0846
latency_99th_percentile27.009580.052934.502
latency_median19.451665.834527.5503
latency_min15.740956.588925.4283
\n", 836 | "
" 837 | ], 838 | "text/plain": [ 839 | " inf1_compiled_batch_size_1_compiled_cores_1 \\\n", 840 | "instance_type inf1.xlarge \n", 841 | "compiled_batch_size 1 \n", 842 | "user_batch_size 10 \n", 843 | "accuracy 0.74852 \n", 844 | "prediction_time 99.4977 \n", 845 | "wall_time 146.44 \n", 846 | "images_per_sec_mean 509.898 \n", 847 | "images_per_sec_std 59.3509 \n", 848 | "latency_mean 19.8995 \n", 849 | "latency_99th_percentile 27.0095 \n", 850 | "latency_median 19.4516 \n", 851 | "latency_min 15.7409 \n", 852 | "\n", 853 | " inf1_compiled_batch_size_5_compiled_cores_1 \\\n", 854 | "instance_type inf1.xlarge \n", 855 | "compiled_batch_size 5 \n", 856 | "user_batch_size 50 \n", 857 | "accuracy 0.7486 \n", 858 | "prediction_time 66.1318 \n", 859 | "wall_time 143.338 \n", 860 | "images_per_sec_mean 760.852 \n", 861 | "images_per_sec_std 59.7863 \n", 862 | "latency_mean 66.1318 \n", 863 | "latency_99th_percentile 80.0529 \n", 864 | "latency_median 65.8345 \n", 865 | "latency_min 56.5889 \n", 866 | "\n", 867 | " inf1_compiled_batch_size_1_compiled_cores_4 \n", 868 | "instance_type inf1.xlarge \n", 869 | "compiled_batch_size 1 \n", 870 | "user_batch_size 10 \n", 871 | "accuracy 0.749 \n", 872 | "prediction_time 140.423 \n", 873 | "wall_time 147.618 \n", 874 | "images_per_sec_mean 357.974 \n", 875 | "images_per_sec_std 25.1203 \n", 876 | "latency_mean 28.0846 \n", 877 | "latency_99th_percentile 34.502 \n", 878 | "latency_median 27.5503 \n", 879 | "latency_min 25.4283 " 880 | ] 881 | }, 882 | "metadata": {}, 883 | "output_type": "display_data" 884 | } 885 | ], 886 | "source": [ 887 | "inf1_model_dir = 'resnet50_inf1_saved_models'\n", 888 | "\n", 889 | "compile_options = [{'batch_size': 1, 'num_cores': 1},\n", 890 | " {'batch_size': 5, 'num_cores': 1},\n", 891 | " {'batch_size': 1, 'num_cores': 4}]\n", 892 | "\n", 893 | "iter_ds = pd.DataFrame()\n", 894 | "results = pd.DataFrame()\n", 895 | "\n", 896 | "for opt in compile_options:\n", 897 | " batch_size = opt[\"batch_size\"]\n", 898 | " num_cores = opt[\"num_cores\"]\n", 899 | " compiled_model_dir = f'resnet50_batch_{batch_size}_inf1_cores_{num_cores}'\n", 900 | " inf1_compiled_model_dir = os.path.join(inf1_model_dir, compiled_model_dir)\n", 901 | " \n", 902 | " print(f'inf1_compiled_model_dir: {inf1_compiled_model_dir}')\n", 903 | " col_name = lambda opt: f'inf1_{batch_size}_multicores_{num_cores}'\n", 904 | " \n", 905 | " res, iter_times = inf1_predict_benchmark_single_threaded(inf1_compiled_model_dir,\n", 906 | " batch_size = batch_size,\n", 907 | " user_batch_size = batch_size*10,\n", 908 | " num_cores = num_cores,\n", 909 | " use_cache=False, \n", 910 | " warm_up=10)\n", 911 | " \n", 912 | " iter_ds = pd.concat([iter_ds, pd.DataFrame(iter_times, columns=[col_name(opt)])], axis=1)\n", 913 | " results = pd.concat([results, res], axis=1)\n", 914 | " \n", 915 | "display(results)" 916 | ] 917 | }, 918 | { 919 | "cell_type": "code", 920 | "execution_count": null, 921 | "metadata": {}, 922 | "outputs": [], 923 | "source": [] 924 | } 925 | ], 926 | "metadata": { 927 | "kernelspec": { 928 | "display_name": "Environment (conda_aws_neuron_tensorflow_p36)", 929 | "language": "python", 930 | "name": "conda_aws_neuron_tensorflow_p36" 931 | }, 932 | "language_info": { 933 | "codemirror_mode": { 934 | "name": "ipython", 935 | "version": 3 936 | }, 937 | "file_extension": ".py", 938 | "mimetype": "text/x-python", 939 | "name": "python", 940 | "nbconvert_exporter": "python", 941 | "pygments_lexer": "ipython3", 942 | "version": "3.6.11" 943 | } 944 | }, 945 | "nbformat": 4, 946 | "nbformat_minor": 4 947 | } 948 | -------------------------------------------------------------------------------- /kitten.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shashankprasanna/ai-accelerators-examples/c8c9c828e9876161cacf3b165811f814c52f6d3a/kitten.jpg -------------------------------------------------------------------------------- /sagemaker-tf-cpu-gpu-ei-resnet50.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Amazon SageMaker inference deployment to CPUs, GPUs, and EI\n", 8 | "This example demonstrates Amazon SageMaker inference deployment using SageMaker SDK\n", 9 | "\n", 10 | "This example was tested on Amazon SageMaker Studio Notebook\n", 11 | "Run this notebook using the following Amazon SageMaker Studio conda environment:\n", 12 | "`TensorFlow 2 CPU Optimized`" 13 | ] 14 | }, 15 | { 16 | "cell_type": "code", 17 | "execution_count": 1, 18 | "metadata": {}, 19 | "outputs": [], 20 | "source": [ 21 | "# !pip install --upgrade pip -q\n", 22 | "# !pip install --upgrade sagemaker -q" 23 | ] 24 | }, 25 | { 26 | "cell_type": "code", 27 | "execution_count": 2, 28 | "metadata": {}, 29 | "outputs": [ 30 | { 31 | "name": "stdout", 32 | "output_type": "stream", 33 | "text": [ 34 | "sagemaker version: 2.15.1\n", 35 | "tensorflow version: 2.1.0\n" 36 | ] 37 | } 38 | ], 39 | "source": [ 40 | "import tarfile\n", 41 | "import sagemaker\n", 42 | "import tensorflow as tf\n", 43 | "import tensorflow.keras as keras\n", 44 | "import shutil\n", 45 | "import os\n", 46 | "import time\n", 47 | "from tensorflow.keras.applications.resnet50 import ResNet50\n", 48 | "\n", 49 | "role = sagemaker.get_execution_role()\n", 50 | "sess = sagemaker.Session()\n", 51 | "region = sess.boto_region_name\n", 52 | "bucket = sess.default_bucket()\n", 53 | "print('sagemaker version: '+sagemaker.__version__)\n", 54 | "print('tensorflow version: '+tf.__version__)" 55 | ] 56 | }, 57 | { 58 | "cell_type": "code", 59 | "execution_count": 3, 60 | "metadata": {}, 61 | "outputs": [], 62 | "source": [ 63 | "def load_save_resnet50_model(model_path):\n", 64 | " model = ResNet50(weights='imagenet')\n", 65 | " shutil.rmtree(model_path, ignore_errors=True)\n", 66 | " model.save(model_path, include_optimizer=False, save_format='tf')\n", 67 | "\n", 68 | "saved_model_dir = 'resnet50_saved_model' \n", 69 | "model_ver = '1'\n", 70 | "model_path = os.path.join(saved_model_dir, model_ver)\n", 71 | "\n", 72 | "# load_save_resnet50_model(model_path)" 73 | ] 74 | }, 75 | { 76 | "cell_type": "code", 77 | "execution_count": 4, 78 | "metadata": {}, 79 | "outputs": [ 80 | { 81 | "name": "stdout", 82 | "output_type": "stream", 83 | "text": [ 84 | "./\n", 85 | "./1/\n", 86 | "./1/variables/\n", 87 | "./1/variables/variables.data-00000-of-00001\n", 88 | "./1/variables/variables.index\n", 89 | "./1/saved_model.pb\n", 90 | "./1/assets/\n" 91 | ] 92 | } 93 | ], 94 | "source": [ 95 | "shutil.rmtree('model.tar.gz', ignore_errors=True)\n", 96 | "!tar cvfz model.tar.gz -C resnet50_saved_model ." 97 | ] 98 | }, 99 | { 100 | "cell_type": "code", 101 | "execution_count": 5, 102 | "metadata": {}, 103 | "outputs": [], 104 | "source": [ 105 | "from sagemaker.tensorflow.model import TensorFlowModel, TensorFlowPredictor\n", 106 | "\n", 107 | "prefix = 'keras_models'\n", 108 | "s3_model_path = sess.upload_data(path='model.tar.gz', key_prefix=prefix)\n", 109 | "\n", 110 | "model = TensorFlowModel(model_data=s3_model_path, \n", 111 | " framework_version='1.15',\n", 112 | " role=role,\n", 113 | " predictor_cls = TensorFlowPredictor,\n", 114 | " sagemaker_session=sess)" 115 | ] 116 | }, 117 | { 118 | "cell_type": "markdown", 119 | "metadata": {}, 120 | "source": [ 121 | "### Deploy to CPU instance" 122 | ] 123 | }, 124 | { 125 | "cell_type": "code", 126 | "execution_count": 6, 127 | "metadata": {}, 128 | "outputs": [ 129 | { 130 | "name": "stderr", 131 | "output_type": "stream", 132 | "text": [ 133 | "update_endpoint is a no-op in sagemaker>=2.\n", 134 | "See: https://sagemaker.readthedocs.io/en/stable/v2.html for details.\n" 135 | ] 136 | }, 137 | { 138 | "name": "stdout", 139 | "output_type": "stream", 140 | "text": [ 141 | "-------------!" 142 | ] 143 | } 144 | ], 145 | "source": [ 146 | "predictor_cpu = model.deploy(initial_instance_count=1, \n", 147 | " instance_type='ml.c5.xlarge')" 148 | ] 149 | }, 150 | { 151 | "cell_type": "markdown", 152 | "metadata": {}, 153 | "source": [ 154 | "### Deploy using EI" 155 | ] 156 | }, 157 | { 158 | "cell_type": "code", 159 | "execution_count": 7, 160 | "metadata": {}, 161 | "outputs": [ 162 | { 163 | "name": "stderr", 164 | "output_type": "stream", 165 | "text": [ 166 | "update_endpoint is a no-op in sagemaker>=2.\n", 167 | "See: https://sagemaker.readthedocs.io/en/stable/v2.html for details.\n" 168 | ] 169 | }, 170 | { 171 | "name": "stdout", 172 | "output_type": "stream", 173 | "text": [ 174 | "-------------!" 175 | ] 176 | } 177 | ], 178 | "source": [ 179 | "predictor_ei = model.deploy(initial_instance_count=1, \n", 180 | " instance_type='ml.c5.xlarge',\n", 181 | " accelerator_type='ml.eia2.large')" 182 | ] 183 | }, 184 | { 185 | "cell_type": "markdown", 186 | "metadata": {}, 187 | "source": [ 188 | "### Deploy to GPU instance" 189 | ] 190 | }, 191 | { 192 | "cell_type": "code", 193 | "execution_count": 8, 194 | "metadata": {}, 195 | "outputs": [ 196 | { 197 | "name": "stderr", 198 | "output_type": "stream", 199 | "text": [ 200 | "update_endpoint is a no-op in sagemaker>=2.\n", 201 | "See: https://sagemaker.readthedocs.io/en/stable/v2.html for details.\n" 202 | ] 203 | }, 204 | { 205 | "name": "stdout", 206 | "output_type": "stream", 207 | "text": [ 208 | "-------------!" 209 | ] 210 | } 211 | ], 212 | "source": [ 213 | "predictor_gpu = model.deploy(initial_instance_count=1, \n", 214 | " instance_type='ml.g4dn.xlarge')" 215 | ] 216 | }, 217 | { 218 | "cell_type": "markdown", 219 | "metadata": {}, 220 | "source": [ 221 | "### Test endpoint" 222 | ] 223 | }, 224 | { 225 | "cell_type": "code", 226 | "execution_count": 9, 227 | "metadata": {}, 228 | "outputs": [], 229 | "source": [ 230 | "## If you have an existing endpoint, create a predictor using the endpoint name\n", 231 | "\n", 232 | "# from sagemaker.tensorflow.model import TensorFlowPredictor\n", 233 | "# predictor = TensorFlowPredictor('ENDPOINT_NAME',\n", 234 | "# sagemaker_session=sess)" 235 | ] 236 | }, 237 | { 238 | "cell_type": "code", 239 | "execution_count": 10, 240 | "metadata": {}, 241 | "outputs": [], 242 | "source": [ 243 | "def image_preprocess(img, reps=1):\n", 244 | " img = np.asarray(img.resize((224, 224)))\n", 245 | " img = np.stack([img]*reps)\n", 246 | " img = tf.keras.applications.resnet50.preprocess_input(img)\n", 247 | " return img" 248 | ] 249 | }, 250 | { 251 | "cell_type": "code", 252 | "execution_count": 11, 253 | "metadata": {}, 254 | "outputs": [], 255 | "source": [ 256 | "from PIL import Image \n", 257 | "import numpy as np\n", 258 | "import json\n", 259 | "\n", 260 | "img= Image.open('kitten.jpg')\n", 261 | "img = image_preprocess(img, 5)" 262 | ] 263 | }, 264 | { 265 | "cell_type": "markdown", 266 | "metadata": {}, 267 | "source": [ 268 | "### Invoke CPU endpoint" 269 | ] 270 | }, 271 | { 272 | "cell_type": "code", 273 | "execution_count": 12, 274 | "metadata": {}, 275 | "outputs": [ 276 | { 277 | "data": { 278 | "text/plain": [ 279 | "[[('n02123159', 'tiger_cat', 0.495739877),\n", 280 | " ('n02123045', 'tabby', 0.434538245),\n", 281 | " ('n02124075', 'Egyptian_cat', 0.0492461845),\n", 282 | " ('n02127052', 'lynx', 0.0143557377),\n", 283 | " ('n02128385', 'leopard', 0.00133766234)]]" 284 | ] 285 | }, 286 | "execution_count": 12, 287 | "metadata": {}, 288 | "output_type": "execute_result" 289 | } 290 | ], 291 | "source": [ 292 | "response = predictor_cpu.predict(data=img)\n", 293 | "probs = np.array(response['predictions'][0])\n", 294 | "tf.keras.applications.resnet.decode_predictions(np.expand_dims(probs, axis=0), top=5)" 295 | ] 296 | }, 297 | { 298 | "cell_type": "markdown", 299 | "metadata": {}, 300 | "source": [ 301 | "### Invoke CPU Instance + EI endpoint" 302 | ] 303 | }, 304 | { 305 | "cell_type": "code", 306 | "execution_count": 13, 307 | "metadata": {}, 308 | "outputs": [ 309 | { 310 | "data": { 311 | "text/plain": [ 312 | "[[('n02123159', 'tiger_cat', 0.495739),\n", 313 | " ('n02123045', 'tabby', 0.434539199),\n", 314 | " ('n02124075', 'Egyptian_cat', 0.0492460541),\n", 315 | " ('n02127052', 'lynx', 0.0143557545),\n", 316 | " ('n02128385', 'leopard', 0.00133766781)]]" 317 | ] 318 | }, 319 | "execution_count": 13, 320 | "metadata": {}, 321 | "output_type": "execute_result" 322 | } 323 | ], 324 | "source": [ 325 | "response = predictor_ei.predict(data=img)\n", 326 | "probs = np.array(response['predictions'][0])\n", 327 | "tf.keras.applications.resnet.decode_predictions(np.expand_dims(probs, axis=0), top=5)" 328 | ] 329 | }, 330 | { 331 | "cell_type": "markdown", 332 | "metadata": {}, 333 | "source": [ 334 | "### Invoke G4 GPU Instance with NVIDIA T4 endpoint" 335 | ] 336 | }, 337 | { 338 | "cell_type": "code", 339 | "execution_count": 14, 340 | "metadata": {}, 341 | "outputs": [ 342 | { 343 | "data": { 344 | "text/plain": [ 345 | "[[('n02123159', 'tiger_cat', 0.495739311),\n", 346 | " ('n02123045', 'tabby', 0.434538603),\n", 347 | " ('n02124075', 'Egyptian_cat', 0.0492461771),\n", 348 | " ('n02127052', 'lynx', 0.0143557768),\n", 349 | " ('n02128385', 'leopard', 0.00133766851)]]" 350 | ] 351 | }, 352 | "execution_count": 14, 353 | "metadata": {}, 354 | "output_type": "execute_result" 355 | } 356 | ], 357 | "source": [ 358 | "response = predictor_gpu.predict(data=img)\n", 359 | "probs = np.array(response['predictions'][0])\n", 360 | "tf.keras.applications.resnet.decode_predictions(np.expand_dims(probs, axis=0), top=5)" 361 | ] 362 | } 363 | ], 364 | "metadata": { 365 | "instance_type": "ml.t3.medium", 366 | "kernelspec": { 367 | "display_name": "Python 3 (TensorFlow 2 CPU Optimized)", 368 | "language": "python", 369 | "name": "python3__SAGEMAKER_INTERNAL__arn:aws:sagemaker:us-west-2:236514542706:image/tensorflow-2.1-cpu-py36" 370 | }, 371 | "language_info": { 372 | "codemirror_mode": { 373 | "name": "ipython", 374 | "version": 3 375 | }, 376 | "file_extension": ".py", 377 | "mimetype": "text/x-python", 378 | "name": "python", 379 | "nbconvert_exporter": "python", 380 | "pygments_lexer": "ipython3", 381 | "version": "3.6.9" 382 | } 383 | }, 384 | "nbformat": 4, 385 | "nbformat_minor": 4 386 | } 387 | --------------------------------------------------------------------------------