├── .gitignore
├── LICENSE
├── README.md
├── ei-tensorflow-resnet50.ipynb
├── gpu-tf-tensorrt-resnet50.ipynb
├── inf1-neuron-sdk-resnet50.ipynb
├── kitten.jpg
└── sagemaker-tf-cpu-gpu-ei-resnet50.ipynb
/.gitignore:
--------------------------------------------------------------------------------
1 | .ipynb_checkpoints/
2 |
3 | # Byte-compiled / optimized / DLL files
4 | __pycache__/
5 | *.py[cod]
6 | *$py.class
7 |
8 | # C extensions
9 | *.so
10 |
11 | # Distribution / packaging
12 | .Python
13 | build/
14 | develop-eggs/
15 | dist/
16 | downloads/
17 | eggs/
18 | .eggs/
19 | lib/
20 | lib64/
21 | parts/
22 | sdist/
23 | var/
24 | wheels/
25 | pip-wheel-metadata/
26 | share/python-wheels/
27 | *.egg-info/
28 | .installed.cfg
29 | *.egg
30 | MANIFEST
31 |
32 | # PyInstaller
33 | # Usually these files are written by a python script from a template
34 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
35 | *.manifest
36 | *.spec
37 |
38 | # Installer logs
39 | pip-log.txt
40 | pip-delete-this-directory.txt
41 |
42 | # Unit test / coverage reports
43 | htmlcov/
44 | .tox/
45 | .nox/
46 | .coverage
47 | .coverage.*
48 | .cache
49 | nosetests.xml
50 | coverage.xml
51 | *.cover
52 | *.py,cover
53 | .hypothesis/
54 | .pytest_cache/
55 |
56 | # Translations
57 | *.mo
58 | *.pot
59 |
60 | # Django stuff:
61 | *.log
62 | local_settings.py
63 | db.sqlite3
64 | db.sqlite3-journal
65 |
66 | # Flask stuff:
67 | instance/
68 | .webassets-cache
69 |
70 | # Scrapy stuff:
71 | .scrapy
72 |
73 | # Sphinx documentation
74 | docs/_build/
75 |
76 | # PyBuilder
77 | target/
78 |
79 | # Jupyter Notebook
80 | .ipynb_checkpoints
81 |
82 | # IPython
83 | profile_default/
84 | ipython_config.py
85 |
86 | # pyenv
87 | .python-version
88 |
89 | # pipenv
90 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
91 | # However, in case of collaboration, if having platform-specific dependencies or dependencies
92 | # having no cross-platform support, pipenv may install dependencies that don't work, or not
93 | # install all needed dependencies.
94 | #Pipfile.lock
95 |
96 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
97 | __pypackages__/
98 |
99 | # Celery stuff
100 | celerybeat-schedule
101 | celerybeat.pid
102 |
103 | # SageMath parsed files
104 | *.sage.py
105 |
106 | # Environments
107 | .env
108 | .venv
109 | env/
110 | venv/
111 | ENV/
112 | env.bak/
113 | venv.bak/
114 |
115 | # Spyder project settings
116 | .spyderproject
117 | .spyproject
118 |
119 | # Rope project settings
120 | .ropeproject
121 |
122 | # mkdocs documentation
123 | /site
124 |
125 | # mypy
126 | .mypy_cache/
127 | .dmypy.json
128 | dmypy.json
129 |
130 | # Pyre type checker
131 | .pyre/
132 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | Apache License
2 | Version 2.0, January 2004
3 | http://www.apache.org/licenses/
4 |
5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
6 |
7 | 1. Definitions.
8 |
9 | "License" shall mean the terms and conditions for use, reproduction,
10 | and distribution as defined by Sections 1 through 9 of this document.
11 |
12 | "Licensor" shall mean the copyright owner or entity authorized by
13 | the copyright owner that is granting the License.
14 |
15 | "Legal Entity" shall mean the union of the acting entity and all
16 | other entities that control, are controlled by, or are under common
17 | control with that entity. For the purposes of this definition,
18 | "control" means (i) the power, direct or indirect, to cause the
19 | direction or management of such entity, whether by contract or
20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the
21 | outstanding shares, or (iii) beneficial ownership of such entity.
22 |
23 | "You" (or "Your") shall mean an individual or Legal Entity
24 | exercising permissions granted by this License.
25 |
26 | "Source" form shall mean the preferred form for making modifications,
27 | including but not limited to software source code, documentation
28 | source, and configuration files.
29 |
30 | "Object" form shall mean any form resulting from mechanical
31 | transformation or translation of a Source form, including but
32 | not limited to compiled object code, generated documentation,
33 | and conversions to other media types.
34 |
35 | "Work" shall mean the work of authorship, whether in Source or
36 | Object form, made available under the License, as indicated by a
37 | copyright notice that is included in or attached to the work
38 | (an example is provided in the Appendix below).
39 |
40 | "Derivative Works" shall mean any work, whether in Source or Object
41 | form, that is based on (or derived from) the Work and for which the
42 | editorial revisions, annotations, elaborations, or other modifications
43 | represent, as a whole, an original work of authorship. For the purposes
44 | of this License, Derivative Works shall not include works that remain
45 | separable from, or merely link (or bind by name) to the interfaces of,
46 | the Work and Derivative Works thereof.
47 |
48 | "Contribution" shall mean any work of authorship, including
49 | the original version of the Work and any modifications or additions
50 | to that Work or Derivative Works thereof, that is intentionally
51 | submitted to Licensor for inclusion in the Work by the copyright owner
52 | or by an individual or Legal Entity authorized to submit on behalf of
53 | the copyright owner. For the purposes of this definition, "submitted"
54 | means any form of electronic, verbal, or written communication sent
55 | to the Licensor or its representatives, including but not limited to
56 | communication on electronic mailing lists, source code control systems,
57 | and issue tracking systems that are managed by, or on behalf of, the
58 | Licensor for the purpose of discussing and improving the Work, but
59 | excluding communication that is conspicuously marked or otherwise
60 | designated in writing by the copyright owner as "Not a Contribution."
61 |
62 | "Contributor" shall mean Licensor and any individual or Legal Entity
63 | on behalf of whom a Contribution has been received by Licensor and
64 | subsequently incorporated within the Work.
65 |
66 | 2. Grant of Copyright License. Subject to the terms and conditions of
67 | this License, each Contributor hereby grants to You a perpetual,
68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
69 | copyright license to reproduce, prepare Derivative Works of,
70 | publicly display, publicly perform, sublicense, and distribute the
71 | Work and such Derivative Works in Source or Object form.
72 |
73 | 3. Grant of Patent License. Subject to the terms and conditions of
74 | this License, each Contributor hereby grants to You a perpetual,
75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
76 | (except as stated in this section) patent license to make, have made,
77 | use, offer to sell, sell, import, and otherwise transfer the Work,
78 | where such license applies only to those patent claims licensable
79 | by such Contributor that are necessarily infringed by their
80 | Contribution(s) alone or by combination of their Contribution(s)
81 | with the Work to which such Contribution(s) was submitted. If You
82 | institute patent litigation against any entity (including a
83 | cross-claim or counterclaim in a lawsuit) alleging that the Work
84 | or a Contribution incorporated within the Work constitutes direct
85 | or contributory patent infringement, then any patent licenses
86 | granted to You under this License for that Work shall terminate
87 | as of the date such litigation is filed.
88 |
89 | 4. Redistribution. You may reproduce and distribute copies of the
90 | Work or Derivative Works thereof in any medium, with or without
91 | modifications, and in Source or Object form, provided that You
92 | meet the following conditions:
93 |
94 | (a) You must give any other recipients of the Work or
95 | Derivative Works a copy of this License; and
96 |
97 | (b) You must cause any modified files to carry prominent notices
98 | stating that You changed the files; and
99 |
100 | (c) You must retain, in the Source form of any Derivative Works
101 | that You distribute, all copyright, patent, trademark, and
102 | attribution notices from the Source form of the Work,
103 | excluding those notices that do not pertain to any part of
104 | the Derivative Works; and
105 |
106 | (d) If the Work includes a "NOTICE" text file as part of its
107 | distribution, then any Derivative Works that You distribute must
108 | include a readable copy of the attribution notices contained
109 | within such NOTICE file, excluding those notices that do not
110 | pertain to any part of the Derivative Works, in at least one
111 | of the following places: within a NOTICE text file distributed
112 | as part of the Derivative Works; within the Source form or
113 | documentation, if provided along with the Derivative Works; or,
114 | within a display generated by the Derivative Works, if and
115 | wherever such third-party notices normally appear. The contents
116 | of the NOTICE file are for informational purposes only and
117 | do not modify the License. You may add Your own attribution
118 | notices within Derivative Works that You distribute, alongside
119 | or as an addendum to the NOTICE text from the Work, provided
120 | that such additional attribution notices cannot be construed
121 | as modifying the License.
122 |
123 | You may add Your own copyright statement to Your modifications and
124 | may provide additional or different license terms and conditions
125 | for use, reproduction, or distribution of Your modifications, or
126 | for any such Derivative Works as a whole, provided Your use,
127 | reproduction, and distribution of the Work otherwise complies with
128 | the conditions stated in this License.
129 |
130 | 5. Submission of Contributions. Unless You explicitly state otherwise,
131 | any Contribution intentionally submitted for inclusion in the Work
132 | by You to the Licensor shall be under the terms and conditions of
133 | this License, without any additional terms or conditions.
134 | Notwithstanding the above, nothing herein shall supersede or modify
135 | the terms of any separate license agreement you may have executed
136 | with Licensor regarding such Contributions.
137 |
138 | 6. Trademarks. This License does not grant permission to use the trade
139 | names, trademarks, service marks, or product names of the Licensor,
140 | except as required for reasonable and customary use in describing the
141 | origin of the Work and reproducing the content of the NOTICE file.
142 |
143 | 7. Disclaimer of Warranty. Unless required by applicable law or
144 | agreed to in writing, Licensor provides the Work (and each
145 | Contributor provides its Contributions) on an "AS IS" BASIS,
146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 | implied, including, without limitation, any warranties or conditions
148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 | PARTICULAR PURPOSE. You are solely responsible for determining the
150 | appropriateness of using or redistributing the Work and assume any
151 | risks associated with Your exercise of permissions under this License.
152 |
153 | 8. Limitation of Liability. In no event and under no legal theory,
154 | whether in tort (including negligence), contract, or otherwise,
155 | unless required by applicable law (such as deliberate and grossly
156 | negligent acts) or agreed to in writing, shall any Contributor be
157 | liable to You for damages, including any direct, indirect, special,
158 | incidental, or consequential damages of any character arising as a
159 | result of this License or out of the use or inability to use the
160 | Work (including but not limited to damages for loss of goodwill,
161 | work stoppage, computer failure or malfunction, or any and all
162 | other commercial damages or losses), even if such Contributor
163 | has been advised of the possibility of such damages.
164 |
165 | 9. Accepting Warranty or Additional Liability. While redistributing
166 | the Work or Derivative Works thereof, You may choose to offer,
167 | and charge a fee for, acceptance of support, warranty, indemnity,
168 | or other liability obligations and/or rights consistent with this
169 | License. However, in accepting such obligations, You may act only
170 | on Your own behalf and on Your sole responsibility, not on behalf
171 | of any other Contributor, and only if You agree to indemnify,
172 | defend, and hold each Contributor harmless for any liability
173 | incurred by, or claims asserted against, such Contributor by reason
174 | of your accepting any such warranty or additional liability.
175 |
176 | END OF TERMS AND CONDITIONS
177 |
178 | APPENDIX: How to apply the Apache License to your work.
179 |
180 | To apply the Apache License to your work, attach the following
181 | boilerplate notice, with the fields enclosed by brackets "[]"
182 | replaced with your own identifying information. (Don't include
183 | the brackets!) The text should be enclosed in the appropriate
184 | comment syntax for the file format. We also recommend that a
185 | file or class name and description of purpose be included on the
186 | same "printed page" as the copyright notice for easier
187 | identification within third-party archives.
188 |
189 | Copyright [yyyy] [name of copyright owner]
190 |
191 | Licensed under the Apache License, Version 2.0 (the "License");
192 | you may not use this file except in compliance with the License.
193 | You may obtain a copy of the License at
194 |
195 | http://www.apache.org/licenses/LICENSE-2.0
196 |
197 | Unless required by applicable law or agreed to in writing, software
198 | distributed under the License is distributed on an "AS IS" BASIS,
199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 | See the License for the specific language governing permissions and
201 | limitations under the License.
202 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # This repository contains examples for deep learning inference deployment using AI accelerators:
2 | #### * Amazon EC2 G4 instances with NVIDIA T4 GPUs and NVIDIA TensorRT
3 | #### * Amazon EC2 Inf1 instances with AWS Inferentia and AWS Neuron SDK
4 | #### * Amazon EC2 CPU instances with Amazon Elastic Inference
5 | #### * Amazon SageMaker deployment hosting for CPUs, GPUs and AWS Inferentia
--------------------------------------------------------------------------------
/ei-tensorflow-resnet50.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# Amazon Elastic Inference (EI) inference on Amazon EC2 CPU instance\n",
8 | "This example demonstrates Amazon Elastic Inference with Amazon EI enabled TensorFlow\n",
9 | "\n",
10 | "This example was tested on Amazon EC2 `c5.2xlarge` the following AWS Deep Learning AMI: \n",
11 | "`Deep Learning AMI (Ubuntu 18.04) Version 35.0`\n",
12 | "\n",
13 | "Run this notebook using the following conda environment:\n",
14 | "`amazonei_tensorflow2_p36`\n",
15 | "\n",
16 | "Prepare your imagenet validation TFRecord files using the following helper script:\n",
17 | "https://github.com/tensorflow/models/blob/archive/research/inception/inception/data/download_and_preprocess_imagenet.sh\n",
18 | "\n",
19 | "Save it to `/home/ubuntu/datasets/` or update the dataset location in the `get_dataset()` function"
20 | ]
21 | },
22 | {
23 | "cell_type": "code",
24 | "execution_count": 1,
25 | "metadata": {},
26 | "outputs": [],
27 | "source": [
28 | "# !pip install matplotlib pandas"
29 | ]
30 | },
31 | {
32 | "cell_type": "code",
33 | "execution_count": 2,
34 | "metadata": {},
35 | "outputs": [
36 | {
37 | "data": {
38 | "text/plain": [
39 | "'2.0.2'"
40 | ]
41 | },
42 | "execution_count": 2,
43 | "metadata": {},
44 | "output_type": "execute_result"
45 | }
46 | ],
47 | "source": [
48 | "import tensorflow as tf\n",
49 | "from tensorflow import keras\n",
50 | "from tensorflow.keras.applications.resnet50 import ResNet50\n",
51 | "from tensorflow.keras.preprocessing import image\n",
52 | "from tensorflow.keras.applications.resnet50 import preprocess_input, decode_predictions\n",
53 | "from ei_for_tf.python.predictor.ei_predictor import EIPredictor\n",
54 | "import numpy as np\n",
55 | "import pandas as pd\n",
56 | "import shutil\n",
57 | "import requests\n",
58 | "import time\n",
59 | "import json\n",
60 | "import os\n",
61 | "import boto3\n",
62 | "tf.__version__"
63 | ]
64 | },
65 | {
66 | "cell_type": "code",
67 | "execution_count": 3,
68 | "metadata": {},
69 | "outputs": [],
70 | "source": [
71 | "# https://github.com/tensorflow/tensorflow/issues/29931\n",
72 | "temp = tf.zeros([8, 224, 224, 3])\n",
73 | "_ = tf.keras.applications.resnet50.preprocess_input(temp)"
74 | ]
75 | },
76 | {
77 | "cell_type": "code",
78 | "execution_count": 4,
79 | "metadata": {},
80 | "outputs": [
81 | {
82 | "name": "stdout",
83 | "output_type": "stream",
84 | "text": [
85 | "[\n",
86 | " {\n",
87 | " \"acceleratorHealth\": {\n",
88 | " \"status\": \"Ok\"\n",
89 | " },\n",
90 | " \"acceleratorType\": \"eia2.large\",\n",
91 | " \"acceleratorId\": \"eia-63a6cf28f02841469c58055bff078a95\",\n",
92 | " \"availabilityZone\": \"us-west-2a\",\n",
93 | " \"attachedResource\": \"arn:aws:ec2:us-west-2:453691756499:instance/i-00487fc33ad7ef5eb\"\n",
94 | " },\n",
95 | " {\n",
96 | " \"acceleratorHealth\": {\n",
97 | " \"status\": \"Ok\"\n",
98 | " },\n",
99 | " \"acceleratorType\": \"eia2.xlarge\",\n",
100 | " \"acceleratorId\": \"eia-ef9561df7dd74b308ecefbd8b362ca69\",\n",
101 | " \"availabilityZone\": \"us-west-2a\",\n",
102 | " \"attachedResource\": \"arn:aws:ec2:us-west-2:453691756499:instance/i-00487fc33ad7ef5eb\"\n",
103 | " }\n",
104 | "]\n"
105 | ]
106 | }
107 | ],
108 | "source": [
109 | "results = None\n",
110 | "batch_size = 8\n",
111 | "\n",
112 | "ei_client = boto3.client('elastic-inference')\n",
113 | "print(json.dumps(ei_client.describe_accelerators()['acceleratorSet'], indent=1))"
114 | ]
115 | },
116 | {
117 | "cell_type": "code",
118 | "execution_count": 5,
119 | "metadata": {},
120 | "outputs": [],
121 | "source": [
122 | "def load_save_resnet50_model(saved_model_dir = 'resnet50_saved_model'):\n",
123 | " model = ResNet50(weights='imagenet')\n",
124 | " shutil.rmtree(saved_model_dir, ignore_errors=True)\n",
125 | " model.save(saved_model_dir, include_optimizer=False, save_format='tf')"
126 | ]
127 | },
128 | {
129 | "cell_type": "code",
130 | "execution_count": 6,
131 | "metadata": {},
132 | "outputs": [],
133 | "source": [
134 | "saved_model_dir = 'resnet50_saved_model' \n",
135 | "# load_save_resnet50_model(saved_model_dir)"
136 | ]
137 | },
138 | {
139 | "cell_type": "code",
140 | "execution_count": 7,
141 | "metadata": {},
142 | "outputs": [],
143 | "source": [
144 | "def deserialize_image_record(record):\n",
145 | " feature_map = {'image/encoded': tf.io.FixedLenFeature([], tf.string, ''),\n",
146 | " 'image/class/label': tf.io.FixedLenFeature([1], tf.int64, -1),\n",
147 | " 'image/class/text': tf.io.FixedLenFeature([], tf.string, '')}\n",
148 | " obj = tf.io.parse_single_example(serialized=record, features=feature_map)\n",
149 | " imgdata = obj['image/encoded']\n",
150 | " label = tf.cast(obj['image/class/label'], tf.int32) \n",
151 | " label_text = tf.cast(obj['image/class/text'], tf.string) \n",
152 | " return imgdata, label, label_text\n",
153 | "\n",
154 | "def val_preprocessing(record):\n",
155 | " imgdata, label, label_text = deserialize_image_record(record)\n",
156 | " label -= 1\n",
157 | " image = tf.io.decode_jpeg(imgdata, channels=3, \n",
158 | " fancy_upscaling=False, \n",
159 | " dct_method='INTEGER_FAST')\n",
160 | "\n",
161 | " shape = tf.shape(image)\n",
162 | " height = tf.cast(shape[0], tf.float32)\n",
163 | " width = tf.cast(shape[1], tf.float32)\n",
164 | " side = tf.cast(tf.convert_to_tensor(256, dtype=tf.int32), tf.float32)\n",
165 | "\n",
166 | " scale = tf.cond(tf.greater(height, width),\n",
167 | " lambda: side / width,\n",
168 | " lambda: side / height)\n",
169 | " \n",
170 | " new_height = tf.cast(tf.math.rint(height * scale), tf.int32)\n",
171 | " new_width = tf.cast(tf.math.rint(width * scale), tf.int32)\n",
172 | " \n",
173 | " image = tf.image.resize(image, [new_height, new_width], method='bicubic')\n",
174 | " image = tf.image.resize_with_crop_or_pad(image, 224, 224)\n",
175 | " \n",
176 | " image = tf.keras.applications.resnet50.preprocess_input(image)\n",
177 | " \n",
178 | " return image, label, label_text\n",
179 | "\n",
180 | "def get_dataset(batch_size, use_cache=False):\n",
181 | " data_dir = '/home/ubuntu/datasets/*'\n",
182 | " files = tf.io.gfile.glob(os.path.join(data_dir))\n",
183 | " dataset = tf.data.TFRecordDataset(files)\n",
184 | " \n",
185 | " dataset = dataset.map(map_func=val_preprocessing, num_parallel_calls=tf.data.experimental.AUTOTUNE)\n",
186 | " dataset = dataset.batch(batch_size=batch_size)\n",
187 | " dataset = dataset.prefetch(buffer_size=tf.data.experimental.AUTOTUNE)\n",
188 | " dataset = dataset.repeat(count=1)\n",
189 | " \n",
190 | " if use_cache:\n",
191 | " shutil.rmtree('tfdatacache', ignore_errors=True)\n",
192 | " os.mkdir('tfdatacache')\n",
193 | " dataset = dataset.cache(f'./tfdatacache/imagenet_val')\n",
194 | " \n",
195 | " return dataset"
196 | ]
197 | },
198 | {
199 | "cell_type": "code",
200 | "execution_count": 8,
201 | "metadata": {},
202 | "outputs": [
203 | {
204 | "name": "stdout",
205 | "output_type": "stream",
206 | "text": [
207 | "\n",
208 | "=======================================================\n",
209 | "Benchmark results for CPU Keras, batch size: 8\n",
210 | "=======================================================\n",
211 | "\n",
212 | "Images 5000/50000. Average i/s 26.555694032421247\n",
213 | "Images 10000/50000. Average i/s 26.676666543597392\n",
214 | "Images 15000/50000. Average i/s 26.77406612095138\n",
215 | "Images 20000/50000. Average i/s 26.822275491462182\n",
216 | "Images 25000/50000. Average i/s 26.847471484622154\n",
217 | "Images 30000/50000. Average i/s 26.859330729648033\n",
218 | "Images 35000/50000. Average i/s 26.865594015573578\n",
219 | "Images 40000/50000. Average i/s 26.873174015987328\n",
220 | "Images 45000/50000. Average i/s 26.91567530151017\n"
221 | ]
222 | },
223 | {
224 | "data": {
225 | "text/html": [
226 | "
\n",
227 | "\n",
240 | "
\n",
241 | " \n",
242 | " \n",
243 | " | \n",
244 | " instance_type | \n",
245 | " accelerator | \n",
246 | " user_batch_size | \n",
247 | " accuracy | \n",
248 | " prediction_time | \n",
249 | " wall_time | \n",
250 | " images_per_sec_mean | \n",
251 | " images_per_sec_std | \n",
252 | " latency_mean | \n",
253 | " latency_99th_percentile | \n",
254 | " latency_median | \n",
255 | " latency_min | \n",
256 | "
\n",
257 | " \n",
258 | " \n",
259 | " \n",
260 | " keras_cpu_8 | \n",
261 | " c5.2xlarge | \n",
262 | " NA | \n",
263 | " 8 | \n",
264 | " 0.74956 | \n",
265 | " 1860.75 | \n",
266 | " 1864.85 | \n",
267 | " 26.8839 | \n",
268 | " 0.502054 | \n",
269 | " 297.721 | \n",
270 | " 330.937 | \n",
271 | " 296.569 | \n",
272 | " 286.248 | \n",
273 | "
\n",
274 | " \n",
275 | "
\n",
276 | "
"
277 | ],
278 | "text/plain": [
279 | " instance_type accelerator user_batch_size accuracy \\\n",
280 | "keras_cpu_8 c5.2xlarge NA 8 0.74956 \n",
281 | "\n",
282 | " prediction_time wall_time images_per_sec_mean images_per_sec_std \\\n",
283 | "keras_cpu_8 1860.75 1864.85 26.8839 0.502054 \n",
284 | "\n",
285 | " latency_mean latency_99th_percentile latency_median latency_min \n",
286 | "keras_cpu_8 297.721 330.937 296.569 286.248 "
287 | ]
288 | },
289 | "metadata": {},
290 | "output_type": "display_data"
291 | }
292 | ],
293 | "source": [
294 | "print('\\n=======================================================')\n",
295 | "print(f'Benchmark results for CPU Keras, batch size: {batch_size}')\n",
296 | "print('=======================================================\\n')\n",
297 | "\n",
298 | "model = tf.keras.models.load_model(saved_model_dir)\n",
299 | "display_every = 5000\n",
300 | "display_threshold = display_every\n",
301 | "\n",
302 | "pred_labels = []\n",
303 | "actual_labels = []\n",
304 | "iter_times = []\n",
305 | "\n",
306 | "# Get the tf.data.TFRecordDataset object for the ImageNet2012 validation dataset\n",
307 | "dataset = get_dataset(batch_size) \n",
308 | "\n",
309 | "walltime_start = time.time()\n",
310 | "for i, (validation_ds, batch_labels, _) in enumerate(dataset):\n",
311 | " start_time = time.time()\n",
312 | " pred_prob_keras = model(validation_ds)\n",
313 | " iter_times.append(time.time() - start_time)\n",
314 | " \n",
315 | " actual_labels.extend(label for label_list in batch_labels.numpy() for label in label_list)\n",
316 | " pred_labels.extend(list(np.argmax(pred_prob_keras, axis=1)))\n",
317 | " \n",
318 | " if i*batch_size >= display_threshold:\n",
319 | " print(f'Images {i*batch_size}/50000. Average i/s {np.mean(batch_size/np.array(iter_times[-display_every:]))}')\n",
320 | " display_threshold+=display_every\n",
321 | "\n",
322 | "iter_times = np.array(iter_times)\n",
323 | "acc_keras_gpu = np.sum(np.array(actual_labels) == np.array(pred_labels))/len(actual_labels)\n",
324 | "\n",
325 | "results = pd.DataFrame(columns = [f'keras_cpu_{batch_size}'])\n",
326 | "results.loc['instance_type'] = [requests.get('http://169.254.169.254/latest/meta-data/instance-type').text]\n",
327 | "results.loc['accelerator'] = ['NA']\n",
328 | "results.loc['user_batch_size'] = [batch_size]\n",
329 | "results.loc['accuracy'] = [acc_keras_gpu]\n",
330 | "results.loc['prediction_time'] = [np.sum(iter_times)]\n",
331 | "results.loc['wall_time'] = [time.time() - walltime_start]\n",
332 | "results.loc['images_per_sec_mean'] = [np.mean(batch_size / iter_times)]\n",
333 | "results.loc['images_per_sec_std'] = [np.std(batch_size / iter_times, ddof=1)]\n",
334 | "results.loc['latency_mean'] = [np.mean(iter_times) * 1000]\n",
335 | "results.loc['latency_99th_percentile'] = [np.percentile(iter_times, q=99, interpolation=\"lower\") * 1000]\n",
336 | "results.loc['latency_median'] = [np.median(iter_times) * 1000]\n",
337 | "results.loc['latency_min'] = [np.min(iter_times) * 1000]\n",
338 | "display(results.T)"
339 | ]
340 | },
341 | {
342 | "cell_type": "code",
343 | "execution_count": 9,
344 | "metadata": {},
345 | "outputs": [],
346 | "source": [
347 | "def ei_predict_benchmark(saved_model_dir, batch_size, accelerator_id):\n",
348 | " \n",
349 | " ei_size = ei_client.describe_accelerators()['acceleratorSet'][accelerator_id]['acceleratorType']\n",
350 | "\n",
351 | " print('\\n=======================================================')\n",
352 | " print(f'Benchmark results for EI: {ei_size}, batch size: {batch_size}')\n",
353 | " print('=======================================================\\n')\n",
354 | " \n",
355 | " eia_model = EIPredictor(saved_model_dir, \n",
356 | " accelerator_id=1)\n",
357 | "\n",
358 | " display_every = 5000\n",
359 | " display_threshold = display_every\n",
360 | "\n",
361 | " pred_labels = []\n",
362 | " actual_labels = []\n",
363 | " iter_times = []\n",
364 | "\n",
365 | " # Get the tf.data.TFRecordDataset object for the ImageNet2012 validation dataset\n",
366 | " dataset = get_dataset(batch_size) \n",
367 | "\n",
368 | " walltime_start = time.time()\n",
369 | " ipname = list(eia_model.feed_tensors.keys())[0]\n",
370 | " resname = list(eia_model.fetch_tensors.keys())[0]\n",
371 | "\n",
372 | " for i, (validation_ds, batch_labels, _) in enumerate(dataset):\n",
373 | "\n",
374 | " model_feed_dict={'input_1': validation_ds.numpy()}\n",
375 | " start_time = time.time()\n",
376 | " pred_prob = eia_model(model_feed_dict)\n",
377 | " iter_times.append(time.time() - start_time)\n",
378 | "\n",
379 | " actual_labels.extend(label for label_list in batch_labels.numpy() for label in label_list)\n",
380 | " pred_labels.extend(list(np.argmax(pred_prob['probs'], axis=1)))\n",
381 | "\n",
382 | " if i*batch_size >= display_threshold:\n",
383 | " print(f'Images {i*batch_size}/50000. Average i/s {np.mean(batch_size/np.array(iter_times[-display_every:]))}')\n",
384 | " display_threshold+=display_every\n",
385 | "\n",
386 | " iter_times = np.array(iter_times)\n",
387 | " acc_ei_gpu = np.sum(np.array(actual_labels) == np.array(pred_labels))/len(actual_labels)\n",
388 | " \n",
389 | " results = pd.DataFrame(columns = [f'EI_{batch_size}_{ei_size}'])\n",
390 | " results.loc['instance_type'] = [requests.get('http://169.254.169.254/latest/meta-data/instance-type').text]\n",
391 | " results.loc['accelerator'] = [ei_size]\n",
392 | " results.loc['user_batch_size'] = [batch_size]\n",
393 | " results.loc['accuracy'] = [acc_ei_gpu]\n",
394 | " results.loc['prediction_time'] = [np.sum(iter_times)]\n",
395 | " results.loc['wall_time'] = [time.time() - walltime_start]\n",
396 | " results.loc['images_per_sec_mean'] = [np.mean(batch_size / iter_times)]\n",
397 | " results.loc['images_per_sec_std'] = [np.std(batch_size / iter_times, ddof=1)]\n",
398 | " results.loc['latency_mean'] = [np.mean(iter_times) * 1000]\n",
399 | " results.loc['latency_99th_percentile'] = [np.percentile(iter_times, q=99, interpolation=\"lower\") * 1000]\n",
400 | " results.loc['latency_median'] = [np.median(iter_times) * 1000]\n",
401 | " results.loc['latency_min'] = [np.min(iter_times) * 1000]\n",
402 | " display(results.T)\n",
403 | " \n",
404 | " return results, iter_times"
405 | ]
406 | },
407 | {
408 | "cell_type": "code",
409 | "execution_count": 10,
410 | "metadata": {},
411 | "outputs": [
412 | {
413 | "name": "stdout",
414 | "output_type": "stream",
415 | "text": [
416 | "\n",
417 | "=======================================================\n",
418 | "Benchmark results for EI: eia2.large, batch size: 8\n",
419 | "=======================================================\n",
420 | "\n",
421 | "Using DEFAULT_SERVING_SIGNATURE_DEF_KEY .....\n",
422 | "WARNING:tensorflow:From /home/ubuntu/anaconda3/envs/amazonei_tensorflow2_p36/lib/python3.6/site-packages/ei_for_tf/python/predictor/ei_predictor.py:168: load (from tensorflow.python.saved_model.loader_impl) is deprecated and will be removed in a future version.\n",
423 | "Instructions for updating:\n",
424 | "This function will only be available through the v1 compatibility library as tf.compat.v1.saved_model.loader.load or tf.compat.v1.saved_model.load. There will be a new function for importing SavedModels in Tensorflow 2.0.\n",
425 | "INFO:tensorflow:Restoring parameters from resnet50_saved_model/variables/variables\n",
426 | "Images 5000/50000. Average i/s 160.79150457173685\n",
427 | "Images 10000/50000. Average i/s 160.57224536199263\n",
428 | "Images 15000/50000. Average i/s 160.17887928377442\n",
429 | "Images 20000/50000. Average i/s 159.55135762825725\n",
430 | "Images 25000/50000. Average i/s 159.05273547195634\n",
431 | "Images 30000/50000. Average i/s 158.55027160196224\n",
432 | "Images 35000/50000. Average i/s 158.2158252593362\n",
433 | "Images 40000/50000. Average i/s 157.88468338480075\n",
434 | "Images 45000/50000. Average i/s 157.15614275808505\n"
435 | ]
436 | },
437 | {
438 | "data": {
439 | "text/html": [
440 | "\n",
441 | "\n",
454 | "
\n",
455 | " \n",
456 | " \n",
457 | " | \n",
458 | " instance_type | \n",
459 | " accelerator | \n",
460 | " user_batch_size | \n",
461 | " accuracy | \n",
462 | " prediction_time | \n",
463 | " wall_time | \n",
464 | " images_per_sec_mean | \n",
465 | " images_per_sec_std | \n",
466 | " latency_mean | \n",
467 | " latency_99th_percentile | \n",
468 | " latency_median | \n",
469 | " latency_min | \n",
470 | "
\n",
471 | " \n",
472 | " \n",
473 | " \n",
474 | " EI_8_eia2.large | \n",
475 | " c5.2xlarge | \n",
476 | " eia2.large | \n",
477 | " 8 | \n",
478 | " 0.74956 | \n",
479 | " 321.635 | \n",
480 | " 331.082 | \n",
481 | " 157.271 | \n",
482 | " 5.04556 | \n",
483 | " 51.4616 | \n",
484 | " 55.3734 | \n",
485 | " 50.7524 | \n",
486 | " 47.5709 | \n",
487 | "
\n",
488 | " \n",
489 | "
\n",
490 | "
"
491 | ],
492 | "text/plain": [
493 | " instance_type accelerator user_batch_size accuracy \\\n",
494 | "EI_8_eia2.large c5.2xlarge eia2.large 8 0.74956 \n",
495 | "\n",
496 | " prediction_time wall_time images_per_sec_mean \\\n",
497 | "EI_8_eia2.large 321.635 331.082 157.271 \n",
498 | "\n",
499 | " images_per_sec_std latency_mean latency_99th_percentile \\\n",
500 | "EI_8_eia2.large 5.04556 51.4616 55.3734 \n",
501 | "\n",
502 | " latency_median latency_min \n",
503 | "EI_8_eia2.large 50.7524 47.5709 "
504 | ]
505 | },
506 | "metadata": {},
507 | "output_type": "display_data"
508 | },
509 | {
510 | "data": {
511 | "text/html": [
512 | "\n",
513 | "\n",
526 | "
\n",
527 | " \n",
528 | " \n",
529 | " | \n",
530 | " keras_cpu_8 | \n",
531 | " EI_8_eia2.large | \n",
532 | "
\n",
533 | " \n",
534 | " \n",
535 | " \n",
536 | " instance_type | \n",
537 | " c5.2xlarge | \n",
538 | " c5.2xlarge | \n",
539 | "
\n",
540 | " \n",
541 | " accelerator | \n",
542 | " NA | \n",
543 | " eia2.large | \n",
544 | "
\n",
545 | " \n",
546 | " user_batch_size | \n",
547 | " 8 | \n",
548 | " 8 | \n",
549 | "
\n",
550 | " \n",
551 | " accuracy | \n",
552 | " 0.74956 | \n",
553 | " 0.74956 | \n",
554 | "
\n",
555 | " \n",
556 | " prediction_time | \n",
557 | " 1860.75 | \n",
558 | " 321.635 | \n",
559 | "
\n",
560 | " \n",
561 | " wall_time | \n",
562 | " 1864.85 | \n",
563 | " 331.082 | \n",
564 | "
\n",
565 | " \n",
566 | " images_per_sec_mean | \n",
567 | " 26.8839 | \n",
568 | " 157.271 | \n",
569 | "
\n",
570 | " \n",
571 | " images_per_sec_std | \n",
572 | " 0.502054 | \n",
573 | " 5.04556 | \n",
574 | "
\n",
575 | " \n",
576 | " latency_mean | \n",
577 | " 297.721 | \n",
578 | " 51.4616 | \n",
579 | "
\n",
580 | " \n",
581 | " latency_99th_percentile | \n",
582 | " 330.937 | \n",
583 | " 55.3734 | \n",
584 | "
\n",
585 | " \n",
586 | " latency_median | \n",
587 | " 296.569 | \n",
588 | " 50.7524 | \n",
589 | "
\n",
590 | " \n",
591 | " latency_min | \n",
592 | " 286.248 | \n",
593 | " 47.5709 | \n",
594 | "
\n",
595 | " \n",
596 | "
\n",
597 | "
"
598 | ],
599 | "text/plain": [
600 | " keras_cpu_8 EI_8_eia2.large\n",
601 | "instance_type c5.2xlarge c5.2xlarge\n",
602 | "accelerator NA eia2.large\n",
603 | "user_batch_size 8 8\n",
604 | "accuracy 0.74956 0.74956\n",
605 | "prediction_time 1860.75 321.635\n",
606 | "wall_time 1864.85 331.082\n",
607 | "images_per_sec_mean 26.8839 157.271\n",
608 | "images_per_sec_std 0.502054 5.04556\n",
609 | "latency_mean 297.721 51.4616\n",
610 | "latency_99th_percentile 330.937 55.3734\n",
611 | "latency_median 296.569 50.7524\n",
612 | "latency_min 286.248 47.5709"
613 | ]
614 | },
615 | "metadata": {},
616 | "output_type": "display_data"
617 | }
618 | ],
619 | "source": [
620 | "ei_options = [{'ei_acc_id': 0}]\n",
621 | "\n",
622 | "iter_ds = pd.DataFrame()\n",
623 | "if results is None:\n",
624 | " results = pd.DataFrame()\n",
625 | "\n",
626 | "col_name = lambda ei_acc_id: f'ei_{ei_client.describe_accelerators()[\"acceleratorSet\"][ei_acc_id][\"acceleratorType\"]}_batch_size_{batch_size}'\n",
627 | "\n",
628 | " \n",
629 | "for opt in ei_options:\n",
630 | " ei_acc_id = opt[\"ei_acc_id\"]\n",
631 | " res, iter_times = ei_predict_benchmark(saved_model_dir, batch_size, ei_acc_id)\n",
632 | " \n",
633 | " iter_ds = pd.concat([iter_ds, pd.DataFrame(iter_times, columns=[col_name(ei_acc_id)])], axis=1)\n",
634 | " results = pd.concat([results, res], axis=1)\n",
635 | " \n",
636 | "display(results)"
637 | ]
638 | }
639 | ],
640 | "metadata": {
641 | "kernelspec": {
642 | "display_name": "Environment (conda_amazonei_tensorflow2_p36)",
643 | "language": "python",
644 | "name": "conda_amazonei_tensorflow2_p36"
645 | },
646 | "language_info": {
647 | "codemirror_mode": {
648 | "name": "ipython",
649 | "version": 3
650 | },
651 | "file_extension": ".py",
652 | "mimetype": "text/x-python",
653 | "name": "python",
654 | "nbconvert_exporter": "python",
655 | "pygments_lexer": "ipython3",
656 | "version": "3.6.10"
657 | }
658 | },
659 | "nbformat": 4,
660 | "nbformat_minor": 4
661 | }
662 |
--------------------------------------------------------------------------------
/gpu-tf-tensorrt-resnet50.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# GPU inference with NVIDIA T4 on Amazon EC2 G4 instance\n",
8 | "This example demonstrates GPU inference with:\n",
9 | "* GPU accelerated TensorFlow/Keras\n",
10 | "* NVIDIA TensorRT optimizer and runtime"
11 | ]
12 | },
13 | {
14 | "cell_type": "markdown",
15 | "metadata": {},
16 | "source": [
17 | "This example was tested on Amazon EC2 `g4dn.xlarge` using the following AWS Deep Learning AMI:\n",
18 | "`Deep Learning AMI (Ubuntu 18.04) Version 35.0`\n",
19 | "\n",
20 | "And the following NVIDIA TensorFlow Docker image: \n",
21 | "`nvcr.io/nvidia/tensorflow:20.08-tf2-py3`\n",
22 | "\n",
23 | "Create a Docker container:
\n",
24 | "`nvidia-docker run --shm-size 8g --ulimit memlock=-1 -it -v $PWD:/examples -v ~/.aws/:/.aws --network=host nvcr.io/nvidia/tensorflow:20.08-tf2-py3`\n",
25 | "\n",
26 | "Prepare your imagenet validation TFRecord files using the following helper script:\n",
27 | "https://github.com/tensorflow/models/blob/archive/research/inception/inception/data/download_and_preprocess_imagenet.sh\n",
28 | "\n",
29 | "Save it to `/examples/datasets/` or update the dataset location in the `get_dataset()` function"
30 | ]
31 | },
32 | {
33 | "cell_type": "code",
34 | "execution_count": 1,
35 | "metadata": {},
36 | "outputs": [],
37 | "source": [
38 | "# !pip install --upgrade pip -q\n",
39 | "# !pip install matplotlib pandas -q"
40 | ]
41 | },
42 | {
43 | "cell_type": "code",
44 | "execution_count": 2,
45 | "metadata": {},
46 | "outputs": [
47 | {
48 | "name": "stdout",
49 | "output_type": "stream",
50 | "text": [
51 | "TensorRT version: (7, 1, 3)\n",
52 | "TensorFlow version: 2.2.0\n"
53 | ]
54 | }
55 | ],
56 | "source": [
57 | "import os\n",
58 | "import time\n",
59 | "import shutil\n",
60 | "import json\n",
61 | "import time\n",
62 | "import pandas as pd\n",
63 | "import numpy as np\n",
64 | "import requests\n",
65 | "from functools import partial\n",
66 | "\n",
67 | "import tensorflow as tf\n",
68 | "import tensorflow.keras as keras\n",
69 | "from tensorflow.keras.applications.resnet50 import ResNet50, preprocess_input\n",
70 | "from tensorflow.keras.preprocessing import image\n",
71 | "from tensorflow.python.saved_model import tag_constants, signature_constants\n",
72 | "from tensorflow.python.framework import convert_to_constants\n",
73 | "\n",
74 | "from tensorflow.compiler.tf2tensorrt.wrap_py_utils import get_linked_tensorrt_version\n",
75 | "\n",
76 | "print(f\"TensorRT version: {get_linked_tensorrt_version()}\")\n",
77 | "print(f\"TensorFlow version: {tf.__version__}\")"
78 | ]
79 | },
80 | {
81 | "cell_type": "code",
82 | "execution_count": 3,
83 | "metadata": {},
84 | "outputs": [],
85 | "source": [
86 | "results = None\n",
87 | "batch_size = 8"
88 | ]
89 | },
90 | {
91 | "cell_type": "markdown",
92 | "metadata": {},
93 | "source": [
94 | "### Download Keras Resnet50 model"
95 | ]
96 | },
97 | {
98 | "cell_type": "code",
99 | "execution_count": 4,
100 | "metadata": {},
101 | "outputs": [],
102 | "source": [
103 | "def load_save_resnet50_model(saved_model_dir = 'resnet50_saved_model'):\n",
104 | " model = ResNet50(weights='imagenet')\n",
105 | " shutil.rmtree(saved_model_dir, ignore_errors=True)\n",
106 | " model.save(saved_model_dir, include_optimizer=False, save_format='tf')"
107 | ]
108 | },
109 | {
110 | "cell_type": "code",
111 | "execution_count": 5,
112 | "metadata": {},
113 | "outputs": [
114 | {
115 | "name": "stdout",
116 | "output_type": "stream",
117 | "text": [
118 | "WARNING:tensorflow:From /usr/local/lib/python3.6/dist-packages/tensorflow/python/ops/resource_variable_ops.py:1817: calling BaseResourceVariable.__init__ (from tensorflow.python.ops.resource_variable_ops) with constraint is deprecated and will be removed in a future version.\n",
119 | "Instructions for updating:\n",
120 | "If using Keras pass *_constraint arguments to layers.\n",
121 | "INFO:tensorflow:Assets written to: resnet50_saved_model/assets\n"
122 | ]
123 | }
124 | ],
125 | "source": [
126 | "saved_model_dir = 'resnet50_saved_model' \n",
127 | "load_save_resnet50_model(saved_model_dir)"
128 | ]
129 | },
130 | {
131 | "cell_type": "markdown",
132 | "metadata": {},
133 | "source": [
134 | "### Use `tf.data` to read ImageNet validation dataset"
135 | ]
136 | },
137 | {
138 | "cell_type": "code",
139 | "execution_count": 6,
140 | "metadata": {},
141 | "outputs": [],
142 | "source": [
143 | "def deserialize_image_record(record):\n",
144 | " feature_map = {'image/encoded': tf.io.FixedLenFeature([], tf.string, ''),\n",
145 | " 'image/class/label': tf.io.FixedLenFeature([1], tf.int64, -1),\n",
146 | " 'image/class/text': tf.io.FixedLenFeature([], tf.string, '')}\n",
147 | " obj = tf.io.parse_single_example(serialized=record, features=feature_map)\n",
148 | " imgdata = obj['image/encoded']\n",
149 | " label = tf.cast(obj['image/class/label'], tf.int32) \n",
150 | " label_text = tf.cast(obj['image/class/text'], tf.string) \n",
151 | " return imgdata, label, label_text\n",
152 | "\n",
153 | "def val_preprocessing(record):\n",
154 | " imgdata, label, label_text = deserialize_image_record(record)\n",
155 | " label -= 1\n",
156 | " image = tf.io.decode_jpeg(imgdata, channels=3, \n",
157 | " fancy_upscaling=False, \n",
158 | " dct_method='INTEGER_FAST')\n",
159 | "\n",
160 | " shape = tf.shape(image)\n",
161 | " height = tf.cast(shape[0], tf.float32)\n",
162 | " width = tf.cast(shape[1], tf.float32)\n",
163 | " side = tf.cast(tf.convert_to_tensor(256, dtype=tf.int32), tf.float32)\n",
164 | "\n",
165 | " scale = tf.cond(tf.greater(height, width),\n",
166 | " lambda: side / width,\n",
167 | " lambda: side / height)\n",
168 | " \n",
169 | " new_height = tf.cast(tf.math.rint(height * scale), tf.int32)\n",
170 | " new_width = tf.cast(tf.math.rint(width * scale), tf.int32)\n",
171 | " \n",
172 | " image = tf.image.resize(image, [new_height, new_width], method='bicubic')\n",
173 | " image = tf.image.resize_with_crop_or_pad(image, 224, 224)\n",
174 | " \n",
175 | " image = tf.keras.applications.resnet50.preprocess_input(image)\n",
176 | " \n",
177 | " return image, label, label_text\n",
178 | "\n",
179 | "def get_dataset(batch_size, use_cache=False):\n",
180 | " data_dir = '/examples/datasets/*'\n",
181 | " files = tf.io.gfile.glob(os.path.join(data_dir))\n",
182 | " dataset = tf.data.TFRecordDataset(files)\n",
183 | " \n",
184 | " dataset = dataset.map(map_func=val_preprocessing, num_parallel_calls=tf.data.experimental.AUTOTUNE)\n",
185 | " dataset = dataset.batch(batch_size=batch_size)\n",
186 | " dataset = dataset.prefetch(buffer_size=tf.data.experimental.AUTOTUNE)\n",
187 | " dataset = dataset.repeat(count=1)\n",
188 | " \n",
189 | " if use_cache:\n",
190 | " shutil.rmtree('tfdatacache', ignore_errors=True)\n",
191 | " os.mkdir('tfdatacache')\n",
192 | " dataset = dataset.cache(f'./tfdatacache/imagenet_val')\n",
193 | " \n",
194 | " return dataset"
195 | ]
196 | },
197 | {
198 | "cell_type": "markdown",
199 | "metadata": {},
200 | "source": [
201 | "#### Predict using GPU + Keras"
202 | ]
203 | },
204 | {
205 | "cell_type": "code",
206 | "execution_count": 7,
207 | "metadata": {},
208 | "outputs": [
209 | {
210 | "name": "stdout",
211 | "output_type": "stream",
212 | "text": [
213 | "WARNING:tensorflow:No training configuration found in save file, so the model was *not* compiled. Compile it manually.\n",
214 | "Images 5000/50000. Average i/s 113.52902935209637\n",
215 | "Images 10000/50000. Average i/s 114.4822357792094\n",
216 | "Images 15000/50000. Average i/s 114.17100473485702\n",
217 | "Images 20000/50000. Average i/s 114.61583324110076\n",
218 | "Images 25000/50000. Average i/s 114.84187563894713\n",
219 | "Images 30000/50000. Average i/s 115.14781546576788\n",
220 | "Images 35000/50000. Average i/s 115.10561798972904\n",
221 | "Images 40000/50000. Average i/s 115.20453046771337\n",
222 | "Images 45000/50000. Average i/s 115.71023295020717\n"
223 | ]
224 | },
225 | {
226 | "data": {
227 | "text/html": [
228 | "\n",
229 | "\n",
242 | "
\n",
243 | " \n",
244 | " \n",
245 | " | \n",
246 | " keras_gpu_8 | \n",
247 | "
\n",
248 | " \n",
249 | " \n",
250 | " \n",
251 | " instance_type | \n",
252 | " g4dn.xlarge | \n",
253 | "
\n",
254 | " \n",
255 | " user_batch_size | \n",
256 | " 8 | \n",
257 | "
\n",
258 | " \n",
259 | " accuracy | \n",
260 | " 0.74956 | \n",
261 | "
\n",
262 | " \n",
263 | " prediction_time | \n",
264 | " 440.113 | \n",
265 | "
\n",
266 | " \n",
267 | " wall_time | \n",
268 | " 443.712 | \n",
269 | "
\n",
270 | " \n",
271 | " images_per_sec_mean | \n",
272 | " 115.746 | \n",
273 | "
\n",
274 | " \n",
275 | " images_per_sec_std | \n",
276 | " 7.3476 | \n",
277 | "
\n",
278 | " \n",
279 | " latency_mean | \n",
280 | " 70.418 | \n",
281 | "
\n",
282 | " \n",
283 | " latency_99th_percentile | \n",
284 | " 84.4612 | \n",
285 | "
\n",
286 | " \n",
287 | " latency_median | \n",
288 | " 69.0285 | \n",
289 | "
\n",
290 | " \n",
291 | " latency_min | \n",
292 | " 62.314 | \n",
293 | "
\n",
294 | " \n",
295 | "
\n",
296 | "
"
297 | ],
298 | "text/plain": [
299 | " keras_gpu_8\n",
300 | "instance_type g4dn.xlarge\n",
301 | "user_batch_size 8\n",
302 | "accuracy 0.74956\n",
303 | "prediction_time 440.113\n",
304 | "wall_time 443.712\n",
305 | "images_per_sec_mean 115.746\n",
306 | "images_per_sec_std 7.3476\n",
307 | "latency_mean 70.418\n",
308 | "latency_99th_percentile 84.4612\n",
309 | "latency_median 69.0285\n",
310 | "latency_min 62.314"
311 | ]
312 | },
313 | "metadata": {},
314 | "output_type": "display_data"
315 | }
316 | ],
317 | "source": [
318 | "model = tf.keras.models.load_model(saved_model_dir)\n",
319 | "display_every = 5000\n",
320 | "display_threshold = display_every\n",
321 | "\n",
322 | "pred_labels = []\n",
323 | "actual_labels = []\n",
324 | "iter_times = []\n",
325 | "\n",
326 | "# Get the tf.data.TFRecordDataset object for the ImageNet2012 validation dataset\n",
327 | "dataset = get_dataset(batch_size) \n",
328 | "\n",
329 | "walltime_start = time.time()\n",
330 | "for i, (validation_ds, batch_labels, _) in enumerate(dataset):\n",
331 | " start_time = time.time()\n",
332 | " pred_prob_keras = model(validation_ds)\n",
333 | " iter_times.append(time.time() - start_time)\n",
334 | " \n",
335 | " actual_labels.extend(label for label_list in batch_labels.numpy() for label in label_list)\n",
336 | " pred_labels.extend(list(np.argmax(pred_prob_keras, axis=1)))\n",
337 | " \n",
338 | " if i*batch_size >= display_threshold:\n",
339 | " print(f'Images {i*batch_size}/50000. Average i/s {np.mean(batch_size/np.array(iter_times[-display_every:]))}')\n",
340 | " display_threshold+=display_every\n",
341 | "\n",
342 | "iter_times = np.array(iter_times)\n",
343 | "acc_keras_gpu = np.sum(np.array(actual_labels) == np.array(pred_labels))/len(actual_labels)\n",
344 | "\n",
345 | "results = pd.DataFrame(columns = [f'keras_gpu_{batch_size}'])\n",
346 | "results.loc['instance_type'] = [requests.get('http://169.254.169.254/latest/meta-data/instance-type').text]\n",
347 | "results.loc['user_batch_size'] = [batch_size]\n",
348 | "results.loc['accuracy'] = [acc_keras_gpu]\n",
349 | "results.loc['prediction_time'] = [np.sum(iter_times)]\n",
350 | "results.loc['wall_time'] = [time.time() - walltime_start]\n",
351 | "results.loc['images_per_sec_mean'] = [np.mean(batch_size / iter_times)]\n",
352 | "results.loc['images_per_sec_std'] = [np.std(batch_size / iter_times, ddof=1)]\n",
353 | "results.loc['latency_mean'] = [np.mean(iter_times) * 1000]\n",
354 | "results.loc['latency_99th_percentile'] = [np.percentile(iter_times, q=99, interpolation=\"lower\") * 1000]\n",
355 | "results.loc['latency_median'] = [np.median(iter_times) * 1000]\n",
356 | "results.loc['latency_min'] = [np.min(iter_times) * 1000]\n",
357 | "display(results.T)"
358 | ]
359 | },
360 | {
361 | "cell_type": "markdown",
362 | "metadata": {},
363 | "source": [
364 | "#### Predict using GPU + TensorRT"
365 | ]
366 | },
367 | {
368 | "cell_type": "code",
369 | "execution_count": 8,
370 | "metadata": {},
371 | "outputs": [],
372 | "source": [
373 | "def build_fn(batch_size, dataset):\n",
374 | " for i, (build_image, _, _) in enumerate(dataset):\n",
375 | " if i > 1:\n",
376 | " break\n",
377 | " yield (build_image,)\n",
378 | "\n",
379 | "def calibrate_fn(n_calib, batch_size, dataset):\n",
380 | " for i, (calib_image, _, _) in enumerate(dataset):\n",
381 | " if i > n_calib // batch_size:\n",
382 | " break\n",
383 | " yield (calib_image,)\n",
384 | "\n",
385 | "def build_tensorrt_engine(precision, batch_size, dataset):\n",
386 | " from tensorflow.python.compiler.tensorrt import trt_convert as trt\n",
387 | " conversion_params = trt.DEFAULT_TRT_CONVERSION_PARAMS._replace(precision_mode=precision.upper(),\n",
388 | " max_workspace_size_bytes=(1<<32),\n",
389 | " maximum_cached_engines=2)\n",
390 | " converter = trt.TrtGraphConverterV2(input_saved_model_dir='resnet50_saved_model',\n",
391 | " conversion_params=conversion_params)\n",
392 | " \n",
393 | " if precision.lower() == 'int8':\n",
394 | " n_calib=50\n",
395 | " converter.convert(calibration_input_fn=partial(calibrate_fn, n_calib, batch_size, \n",
396 | " dataset.shuffle(buffer_size=n_calib, reshuffle_each_iteration=True)))\n",
397 | " else:\n",
398 | " converter.convert()\n",
399 | " \n",
400 | " trt_compiled_model_dir = f'resnet50_trt_saved_models/resnet50_{precision}_{batch_size}'\n",
401 | " shutil.rmtree(trt_compiled_model_dir, ignore_errors=True)\n",
402 | "\n",
403 | " converter.build(input_fn=partial(build_fn, batch_size, dataset))\n",
404 | " converter.save(output_saved_model_dir=trt_compiled_model_dir)\n",
405 | " print(f'\\nOptimized for {precision} and batch size {batch_size}, directory:{trt_compiled_model_dir}\\n')\n",
406 | " return trt_compiled_model_dir"
407 | ]
408 | },
409 | {
410 | "cell_type": "code",
411 | "execution_count": 9,
412 | "metadata": {},
413 | "outputs": [],
414 | "source": [
415 | "def trt_predict_benchmark(precision, batch_size, use_cache=False, display_every=100, warm_up=10):\n",
416 | "\n",
417 | " print('\\n=======================================================')\n",
418 | " print(f'Benchmark results for precision: {precision}, batch size: {batch_size}')\n",
419 | " print('=======================================================\\n')\n",
420 | " \n",
421 | " dataset = get_dataset(batch_size)\n",
422 | " \n",
423 | " # If caching is enabled, cache dataset for better i/o performance\n",
424 | " if use_cache:\n",
425 | " print('Caching dataset ...')\n",
426 | " start_time = time.time()\n",
427 | " for (img,_,_) in dataset:\n",
428 | " continue\n",
429 | " print(f'Finished caching {time.time() - start_time}')\n",
430 | " \n",
431 | " trt_compiled_model_dir = build_tensorrt_engine(precision, batch_size, dataset)\n",
432 | " saved_model_trt = tf.saved_model.load(trt_compiled_model_dir, tags=[tag_constants.SERVING])\n",
433 | " model_trt = saved_model_trt.signatures[signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY]\n",
434 | " \n",
435 | " pred_labels = []\n",
436 | " actual_labels = []\n",
437 | " iter_times = []\n",
438 | " \n",
439 | " display_every = 5000\n",
440 | " display_threshold = display_every\n",
441 | " initial_time = time.time()\n",
442 | " \n",
443 | " walltime_start = time.time()\n",
444 | " for i, (validation_ds, batch_labels, _) in enumerate(dataset):\n",
445 | " if i==0:\n",
446 | " for w in range(warm_up):\n",
447 | " _ = model_trt(validation_ds);\n",
448 | " \n",
449 | " start_time = time.time()\n",
450 | " trt_results = model_trt(validation_ds);\n",
451 | " iter_times.append(time.time() - start_time)\n",
452 | " \n",
453 | " actual_labels.extend(label for label_list in batch_labels.numpy() for label in label_list)\n",
454 | " pred_labels.extend(list(tf.argmax(trt_results['predictions'], axis=1).numpy()))\n",
455 | " if (i)*batch_size >= display_threshold:\n",
456 | " print(f'Images {(i)*batch_size}/50000. Average i/s {np.mean(batch_size/np.array(iter_times[-display_every:]))}')\n",
457 | " display_threshold+=display_every\n",
458 | " \n",
459 | " print(f'Wall time: {time.time() - walltime_start}')\n",
460 | "\n",
461 | " acc_trt = np.sum(np.array(actual_labels) == np.array(pred_labels))/len(actual_labels)\n",
462 | " iter_times = np.array(iter_times)\n",
463 | " \n",
464 | " results = pd.DataFrame(columns = [f'trt_{precision}_{batch_size}'])\n",
465 | " results.loc['instance_type'] = [requests.get('http://169.254.169.254/latest/meta-data/instance-type').text]\n",
466 | " results.loc['user_batch_size'] = [batch_size]\n",
467 | " results.loc['accuracy'] = [acc_trt]\n",
468 | " results.loc['prediction_time'] = [np.sum(iter_times)]\n",
469 | " results.loc['wall_time'] = [time.time() - walltime_start] \n",
470 | " results.loc['images_per_sec_mean'] = [np.mean(batch_size / iter_times)]\n",
471 | " results.loc['images_per_sec_std'] = [np.std(batch_size / iter_times, ddof=1)]\n",
472 | " results.loc['latency_mean'] = [np.mean(iter_times) * 1000]\n",
473 | " results.loc['latency_99th_percentile'] = [np.percentile(iter_times, q=99, interpolation=\"lower\") * 1000]\n",
474 | " results.loc['latency_median'] = [np.median(iter_times) * 1000]\n",
475 | " results.loc['latency_min'] = [np.min(iter_times) * 1000]\n",
476 | " display(results.T)\n",
477 | " \n",
478 | " return results, iter_times"
479 | ]
480 | },
481 | {
482 | "cell_type": "code",
483 | "execution_count": 10,
484 | "metadata": {},
485 | "outputs": [
486 | {
487 | "name": "stdout",
488 | "output_type": "stream",
489 | "text": [
490 | "Benchmark sweep combinations:\n",
491 | "{'batch_size': 8, 'precision': 'fp32'}\n",
492 | "{'batch_size': 8, 'precision': 'fp16'}\n",
493 | "{'batch_size': 8, 'precision': 'int8'}\n"
494 | ]
495 | }
496 | ],
497 | "source": [
498 | "import itertools\n",
499 | "bench_options = {\n",
500 | " 'batch_size': [batch_size],\n",
501 | " 'precision': ['fp32', 'fp16', 'int8']\n",
502 | "}\n",
503 | "\n",
504 | "bname, bval = zip(*bench_options.items())\n",
505 | "blist = [dict(zip(bname, h)) for h in itertools.product(*bval)]\n",
506 | "\n",
507 | "print('Benchmark sweep combinations:')\n",
508 | "for b in blist:\n",
509 | " print(b)"
510 | ]
511 | },
512 | {
513 | "cell_type": "code",
514 | "execution_count": 11,
515 | "metadata": {},
516 | "outputs": [
517 | {
518 | "name": "stdout",
519 | "output_type": "stream",
520 | "text": [
521 | "\n",
522 | "=======================================================\n",
523 | "Benchmark results for precision: fp32, batch size: 8\n",
524 | "=======================================================\n",
525 | "\n",
526 | "INFO:tensorflow:Linked TensorRT version: (7, 1, 3)\n",
527 | "INFO:tensorflow:Loaded TensorRT version: (7, 1, 3)\n",
528 | "INFO:tensorflow:Assets written to: resnet50_trt_saved_models/resnet50_fp32_8/assets\n",
529 | "\n",
530 | "Optimized for fp32 and batch size 8, directory:resnet50_trt_saved_models/resnet50_fp32_8\n",
531 | "\n",
532 | "Images 5000/50000. Average i/s 1706.9338144076587\n",
533 | "Images 10000/50000. Average i/s 1709.7124824008995\n",
534 | "Images 15000/50000. Average i/s 1714.181552149894\n",
535 | "Images 20000/50000. Average i/s 1706.435347541865\n",
536 | "Images 25000/50000. Average i/s 1694.5647994188168\n",
537 | "Images 30000/50000. Average i/s 1686.1055872763206\n",
538 | "Images 35000/50000. Average i/s 1691.5992314594068\n",
539 | "Images 40000/50000. Average i/s 1690.6736552055474\n",
540 | "Images 45000/50000. Average i/s 1678.790774983944\n",
541 | "Wall time: 143.3079001903534\n"
542 | ]
543 | },
544 | {
545 | "data": {
546 | "text/html": [
547 | "\n",
548 | "\n",
561 | "
\n",
562 | " \n",
563 | " \n",
564 | " | \n",
565 | " instance_type | \n",
566 | " user_batch_size | \n",
567 | " accuracy | \n",
568 | " prediction_time | \n",
569 | " wall_time | \n",
570 | " images_per_sec_mean | \n",
571 | " images_per_sec_std | \n",
572 | " latency_mean | \n",
573 | " latency_99th_percentile | \n",
574 | " latency_median | \n",
575 | " latency_min | \n",
576 | "
\n",
577 | " \n",
578 | " \n",
579 | " \n",
580 | " trt_fp32_8 | \n",
581 | " g4dn.xlarge | \n",
582 | " 8 | \n",
583 | " 0.74956 | \n",
584 | " 38.1336 | \n",
585 | " 143.327 | \n",
586 | " 1666.69 | \n",
587 | " 960.928 | \n",
588 | " 6.10138 | \n",
589 | " 13.797 | \n",
590 | " 5.91063 | \n",
591 | " 1.36304 | \n",
592 | "
\n",
593 | " \n",
594 | "
\n",
595 | "
"
596 | ],
597 | "text/plain": [
598 | " instance_type user_batch_size accuracy prediction_time wall_time \\\n",
599 | "trt_fp32_8 g4dn.xlarge 8 0.74956 38.1336 143.327 \n",
600 | "\n",
601 | " images_per_sec_mean images_per_sec_std latency_mean \\\n",
602 | "trt_fp32_8 1666.69 960.928 6.10138 \n",
603 | "\n",
604 | " latency_99th_percentile latency_median latency_min \n",
605 | "trt_fp32_8 13.797 5.91063 1.36304 "
606 | ]
607 | },
608 | "metadata": {},
609 | "output_type": "display_data"
610 | },
611 | {
612 | "name": "stdout",
613 | "output_type": "stream",
614 | "text": [
615 | "\n",
616 | "=======================================================\n",
617 | "Benchmark results for precision: fp16, batch size: 8\n",
618 | "=======================================================\n",
619 | "\n",
620 | "INFO:tensorflow:Linked TensorRT version: (7, 1, 3)\n",
621 | "INFO:tensorflow:Loaded TensorRT version: (7, 1, 3)\n",
622 | "WARNING:tensorflow:Unresolved object in checkpoint: (root).trt_engine_resources.TRTEngineOp_0_0._serialized_trt_resource_filename\n",
623 | "WARNING:tensorflow:A checkpoint was restored (e.g. tf.train.Checkpoint.restore or tf.keras.Model.load_weights) but not all checkpointed values were used. See above for specific issues. Use expect_partial() on the load status object, e.g. tf.train.Checkpoint.restore(...).expect_partial(), to silence these warnings, or use assert_consumed() to make the check explicit. See https://www.tensorflow.org/guide/checkpoint#loading_mechanics for details.\n",
624 | "INFO:tensorflow:Assets written to: resnet50_trt_saved_models/resnet50_fp16_8/assets\n",
625 | "\n",
626 | "Optimized for fp16 and batch size 8, directory:resnet50_trt_saved_models/resnet50_fp16_8\n",
627 | "\n",
628 | "Images 5000/50000. Average i/s 1972.9929443064034\n",
629 | "Images 10000/50000. Average i/s 1931.125588304386\n",
630 | "Images 15000/50000. Average i/s 1897.5221155505612\n",
631 | "Images 20000/50000. Average i/s 1897.409528086548\n",
632 | "Images 25000/50000. Average i/s 1903.1808092268618\n",
633 | "Images 30000/50000. Average i/s 1907.7603788948525\n",
634 | "Images 35000/50000. Average i/s 1881.8081660423734\n",
635 | "Images 40000/50000. Average i/s 1811.5798501140848\n",
636 | "Images 45000/50000. Average i/s 1725.9618582526007\n",
637 | "Wall time: 135.05888485908508\n"
638 | ]
639 | },
640 | {
641 | "data": {
642 | "text/html": [
643 | "\n",
644 | "\n",
657 | "
\n",
658 | " \n",
659 | " \n",
660 | " | \n",
661 | " instance_type | \n",
662 | " user_batch_size | \n",
663 | " accuracy | \n",
664 | " prediction_time | \n",
665 | " wall_time | \n",
666 | " images_per_sec_mean | \n",
667 | " images_per_sec_std | \n",
668 | " latency_mean | \n",
669 | " latency_99th_percentile | \n",
670 | " latency_median | \n",
671 | " latency_min | \n",
672 | "
\n",
673 | " \n",
674 | " \n",
675 | " \n",
676 | " trt_fp16_8 | \n",
677 | " g4dn.xlarge | \n",
678 | " 8 | \n",
679 | " 0.74968 | \n",
680 | " 38.0335 | \n",
681 | " 135.078 | \n",
682 | " 1707.24 | \n",
683 | " 1016.37 | \n",
684 | " 6.08536 | \n",
685 | " 14.1668 | \n",
686 | " 5.91636 | \n",
687 | " 1.43266 | \n",
688 | "
\n",
689 | " \n",
690 | "
\n",
691 | "
"
692 | ],
693 | "text/plain": [
694 | " instance_type user_batch_size accuracy prediction_time wall_time \\\n",
695 | "trt_fp16_8 g4dn.xlarge 8 0.74968 38.0335 135.078 \n",
696 | "\n",
697 | " images_per_sec_mean images_per_sec_std latency_mean \\\n",
698 | "trt_fp16_8 1707.24 1016.37 6.08536 \n",
699 | "\n",
700 | " latency_99th_percentile latency_median latency_min \n",
701 | "trt_fp16_8 14.1668 5.91636 1.43266 "
702 | ]
703 | },
704 | "metadata": {},
705 | "output_type": "display_data"
706 | },
707 | {
708 | "name": "stdout",
709 | "output_type": "stream",
710 | "text": [
711 | "\n",
712 | "=======================================================\n",
713 | "Benchmark results for precision: int8, batch size: 8\n",
714 | "=======================================================\n",
715 | "\n",
716 | "INFO:tensorflow:Linked TensorRT version: (7, 1, 3)\n",
717 | "INFO:tensorflow:Loaded TensorRT version: (7, 1, 3)\n",
718 | "WARNING:tensorflow:Unresolved object in checkpoint: (root).trt_engine_resources.TRTEngineOp_1_0._serialized_trt_resource_filename\n",
719 | "WARNING:tensorflow:A checkpoint was restored (e.g. tf.train.Checkpoint.restore or tf.keras.Model.load_weights) but not all checkpointed values were used. See above for specific issues. Use expect_partial() on the load status object, e.g. tf.train.Checkpoint.restore(...).expect_partial(), to silence these warnings, or use assert_consumed() to make the check explicit. See https://www.tensorflow.org/guide/checkpoint#loading_mechanics for details.\n",
720 | "INFO:tensorflow:Assets written to: resnet50_trt_saved_models/resnet50_int8_8/assets\n",
721 | "\n",
722 | "Optimized for int8 and batch size 8, directory:resnet50_trt_saved_models/resnet50_int8_8\n",
723 | "\n",
724 | "Images 5000/50000. Average i/s 1879.6287615037268\n",
725 | "Images 10000/50000. Average i/s 1890.5233308310728\n",
726 | "Images 15000/50000. Average i/s 1904.7501508674482\n",
727 | "Images 20000/50000. Average i/s 1898.7457632383791\n",
728 | "Images 25000/50000. Average i/s 1902.8776155291969\n",
729 | "Images 30000/50000. Average i/s 1898.16488970591\n",
730 | "Images 35000/50000. Average i/s 1889.473046700565\n",
731 | "Images 40000/50000. Average i/s 1894.5937887248815\n",
732 | "Images 45000/50000. Average i/s 1893.7721136475534\n",
733 | "Wall time: 133.06834959983826\n"
734 | ]
735 | },
736 | {
737 | "data": {
738 | "text/html": [
739 | "\n",
740 | "\n",
753 | "
\n",
754 | " \n",
755 | " \n",
756 | " | \n",
757 | " instance_type | \n",
758 | " user_batch_size | \n",
759 | " accuracy | \n",
760 | " prediction_time | \n",
761 | " wall_time | \n",
762 | " images_per_sec_mean | \n",
763 | " images_per_sec_std | \n",
764 | " latency_mean | \n",
765 | " latency_99th_percentile | \n",
766 | " latency_median | \n",
767 | " latency_min | \n",
768 | "
\n",
769 | " \n",
770 | " \n",
771 | " \n",
772 | " trt_int8_8 | \n",
773 | " g4dn.xlarge | \n",
774 | " 8 | \n",
775 | " 0.74924 | \n",
776 | " 34.3497 | \n",
777 | " 133.087 | \n",
778 | " 1895.03 | \n",
779 | " 1086.22 | \n",
780 | " 5.49594 | \n",
781 | " 12.2826 | \n",
782 | " 5.27298 | \n",
783 | " 1.44053 | \n",
784 | "
\n",
785 | " \n",
786 | "
\n",
787 | "
"
788 | ],
789 | "text/plain": [
790 | " instance_type user_batch_size accuracy prediction_time wall_time \\\n",
791 | "trt_int8_8 g4dn.xlarge 8 0.74924 34.3497 133.087 \n",
792 | "\n",
793 | " images_per_sec_mean images_per_sec_std latency_mean \\\n",
794 | "trt_int8_8 1895.03 1086.22 5.49594 \n",
795 | "\n",
796 | " latency_99th_percentile latency_median latency_min \n",
797 | "trt_int8_8 12.2826 5.27298 1.44053 "
798 | ]
799 | },
800 | "metadata": {},
801 | "output_type": "display_data"
802 | },
803 | {
804 | "data": {
805 | "text/html": [
806 | "\n",
807 | "\n",
820 | "
\n",
821 | " \n",
822 | " \n",
823 | " | \n",
824 | " keras_gpu_8 | \n",
825 | " trt_fp32_8 | \n",
826 | " trt_fp16_8 | \n",
827 | " trt_int8_8 | \n",
828 | "
\n",
829 | " \n",
830 | " \n",
831 | " \n",
832 | " instance_type | \n",
833 | " g4dn.xlarge | \n",
834 | " g4dn.xlarge | \n",
835 | " g4dn.xlarge | \n",
836 | " g4dn.xlarge | \n",
837 | "
\n",
838 | " \n",
839 | " user_batch_size | \n",
840 | " 8 | \n",
841 | " 8 | \n",
842 | " 8 | \n",
843 | " 8 | \n",
844 | "
\n",
845 | " \n",
846 | " accuracy | \n",
847 | " 0.74956 | \n",
848 | " 0.74956 | \n",
849 | " 0.74968 | \n",
850 | " 0.74924 | \n",
851 | "
\n",
852 | " \n",
853 | " prediction_time | \n",
854 | " 440.113 | \n",
855 | " 38.1336 | \n",
856 | " 38.0335 | \n",
857 | " 34.3497 | \n",
858 | "
\n",
859 | " \n",
860 | " wall_time | \n",
861 | " 443.712 | \n",
862 | " 143.327 | \n",
863 | " 135.078 | \n",
864 | " 133.087 | \n",
865 | "
\n",
866 | " \n",
867 | " images_per_sec_mean | \n",
868 | " 115.746 | \n",
869 | " 1666.69 | \n",
870 | " 1707.24 | \n",
871 | " 1895.03 | \n",
872 | "
\n",
873 | " \n",
874 | " images_per_sec_std | \n",
875 | " 7.3476 | \n",
876 | " 960.928 | \n",
877 | " 1016.37 | \n",
878 | " 1086.22 | \n",
879 | "
\n",
880 | " \n",
881 | " latency_mean | \n",
882 | " 70.418 | \n",
883 | " 6.10138 | \n",
884 | " 6.08536 | \n",
885 | " 5.49594 | \n",
886 | "
\n",
887 | " \n",
888 | " latency_99th_percentile | \n",
889 | " 84.4612 | \n",
890 | " 13.797 | \n",
891 | " 14.1668 | \n",
892 | " 12.2826 | \n",
893 | "
\n",
894 | " \n",
895 | " latency_median | \n",
896 | " 69.0285 | \n",
897 | " 5.91063 | \n",
898 | " 5.91636 | \n",
899 | " 5.27298 | \n",
900 | "
\n",
901 | " \n",
902 | " latency_min | \n",
903 | " 62.314 | \n",
904 | " 1.36304 | \n",
905 | " 1.43266 | \n",
906 | " 1.44053 | \n",
907 | "
\n",
908 | " \n",
909 | "
\n",
910 | "
"
911 | ],
912 | "text/plain": [
913 | " keras_gpu_8 trt_fp32_8 trt_fp16_8 trt_int8_8\n",
914 | "instance_type g4dn.xlarge g4dn.xlarge g4dn.xlarge g4dn.xlarge\n",
915 | "user_batch_size 8 8 8 8\n",
916 | "accuracy 0.74956 0.74956 0.74968 0.74924\n",
917 | "prediction_time 440.113 38.1336 38.0335 34.3497\n",
918 | "wall_time 443.712 143.327 135.078 133.087\n",
919 | "images_per_sec_mean 115.746 1666.69 1707.24 1895.03\n",
920 | "images_per_sec_std 7.3476 960.928 1016.37 1086.22\n",
921 | "latency_mean 70.418 6.10138 6.08536 5.49594\n",
922 | "latency_99th_percentile 84.4612 13.797 14.1668 12.2826\n",
923 | "latency_median 69.0285 5.91063 5.91636 5.27298\n",
924 | "latency_min 62.314 1.36304 1.43266 1.44053"
925 | ]
926 | },
927 | "metadata": {},
928 | "output_type": "display_data"
929 | }
930 | ],
931 | "source": [
932 | "iter_ds = pd.DataFrame()\n",
933 | "\n",
934 | "if results is None:\n",
935 | " results = pd.DataFrame()\n",
936 | "\n",
937 | "col_name = lambda boption: f'trt_{boption[\"precision\"]}_{boption[\"batch_size\"]}'\n",
938 | "\n",
939 | "for boption in blist:\n",
940 | " res, it = trt_predict_benchmark(**boption)\n",
941 | " iter_ds = pd.concat([iter_ds, pd.DataFrame(it, columns=[col_name(boption)])], axis=1)\n",
942 | " results = pd.concat([results, res], axis=1)\n",
943 | "\n",
944 | "display(results)"
945 | ]
946 | },
947 | {
948 | "cell_type": "code",
949 | "execution_count": null,
950 | "metadata": {},
951 | "outputs": [],
952 | "source": []
953 | }
954 | ],
955 | "metadata": {
956 | "instance_type": "ml.g4dn.4xlarge",
957 | "kernelspec": {
958 | "display_name": "Python 3",
959 | "language": "python",
960 | "name": "python3"
961 | },
962 | "language_info": {
963 | "codemirror_mode": {
964 | "name": "ipython",
965 | "version": 3
966 | },
967 | "file_extension": ".py",
968 | "mimetype": "text/x-python",
969 | "name": "python",
970 | "nbconvert_exporter": "python",
971 | "pygments_lexer": "ipython3",
972 | "version": "3.6.9"
973 | }
974 | },
975 | "nbformat": 4,
976 | "nbformat_minor": 4
977 | }
978 |
--------------------------------------------------------------------------------
/inf1-neuron-sdk-resnet50.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# AWS Inferentia inference on Amazon EC2 Inf1 instance\n",
8 | "This example demonstrates AWS Inferentia inference with TensorFlow and AWS Neuron SDK compiler and runtime\n",
9 | "\n",
10 | "This example was tested on Amazon EC2 `inf1.xlarge` the following AWS Deep Learning AMI: \n",
11 | "`Deep Learning AMI (Ubuntu 18.04) Version 35.0`\n",
12 | "\n",
13 | "Run this notebook using the following conda environment:\n",
14 | "`aws_neuron_tensorflow_p36`\n",
15 | "\n",
16 | "Prepare your imagenet validation TFRecord files using the following helper script:\n",
17 | "https://github.com/tensorflow/models/blob/archive/research/inception/inception/data/download_and_preprocess_imagenet.sh\n",
18 | "\n",
19 | "Save it to `/home/ubuntu/datasets/` or update the dataset location in the `get_dataset()` function"
20 | ]
21 | },
22 | {
23 | "cell_type": "code",
24 | "execution_count": 1,
25 | "metadata": {},
26 | "outputs": [],
27 | "source": [
28 | "# !pip install matplotlib pandas"
29 | ]
30 | },
31 | {
32 | "cell_type": "code",
33 | "execution_count": 2,
34 | "metadata": {},
35 | "outputs": [],
36 | "source": [
37 | "!/opt/aws/neuron/bin/neuron-cli reset\n",
38 | "import os\n",
39 | "import time\n",
40 | "import shutil\n",
41 | "import json\n",
42 | "import requests\n",
43 | "import numpy as np\n",
44 | "import pandas as pd\n",
45 | "import tensorflow as tf\n",
46 | "import tensorflow.neuron as tfn\n",
47 | "import tensorflow.compat.v1.keras as keras\n",
48 | "from tensorflow.keras.applications.resnet50 import ResNet50, preprocess_input\n",
49 | "from tensorflow.keras.preprocessing import image\n",
50 | "from concurrent import futures\n",
51 | "from itertools import compress"
52 | ]
53 | },
54 | {
55 | "cell_type": "code",
56 | "execution_count": 3,
57 | "metadata": {},
58 | "outputs": [],
59 | "source": [
60 | "# https://github.com/tensorflow/tensorflow/issues/29931\n",
61 | "temp = tf.zeros([8, 224, 224, 3])\n",
62 | "_ = tf.keras.applications.resnet50.preprocess_input(temp)"
63 | ]
64 | },
65 | {
66 | "cell_type": "markdown",
67 | "metadata": {},
68 | "source": [
69 | "### Resnet50 FP32 saved model"
70 | ]
71 | },
72 | {
73 | "cell_type": "code",
74 | "execution_count": 4,
75 | "metadata": {},
76 | "outputs": [
77 | {
78 | "name": "stdout",
79 | "output_type": "stream",
80 | "text": [
81 | "WARNING:tensorflow:From /home/ubuntu/anaconda3/envs/aws_neuron_tensorflow_p36/lib/python3.6/site-packages/tensorflow_core/python/ops/resource_variable_ops.py:1630: calling BaseResourceVariable.__init__ (from tensorflow.python.ops.resource_variable_ops) with constraint is deprecated and will be removed in a future version.\n",
82 | "Instructions for updating:\n",
83 | "If using Keras pass *_constraint arguments to layers.\n",
84 | "WARNING:tensorflow:From :10: simple_save (from tensorflow.python.saved_model.simple_save) is deprecated and will be removed in a future version.\n",
85 | "Instructions for updating:\n",
86 | "This function will only be available through the v1 compatibility library as tf.compat.v1.saved_model.simple_save.\n",
87 | "WARNING:tensorflow:From /home/ubuntu/anaconda3/envs/aws_neuron_tensorflow_p36/lib/python3.6/site-packages/tensorflow_core/python/saved_model/signature_def_utils_impl.py:201: build_tensor_info (from tensorflow.python.saved_model.utils_impl) is deprecated and will be removed in a future version.\n",
88 | "Instructions for updating:\n",
89 | "This function will only be available through the v1 compatibility library as tf.compat.v1.saved_model.utils.build_tensor_info or tf.compat.v1.saved_model.build_tensor_info.\n",
90 | "INFO:tensorflow:Assets added to graph.\n",
91 | "INFO:tensorflow:No assets to write.\n",
92 | "INFO:tensorflow:SavedModel written to: resnet50_saved_model/saved_model.pb\n"
93 | ]
94 | }
95 | ],
96 | "source": [
97 | "# Export SavedModel\n",
98 | "saved_model_dir = 'resnet50_saved_model'\n",
99 | "shutil.rmtree(saved_model_dir, ignore_errors=True)\n",
100 | "\n",
101 | "keras.backend.set_learning_phase(0)\n",
102 | "model = ResNet50(weights='imagenet')\n",
103 | "tf.saved_model.simple_save(session = keras.backend.get_session(),\n",
104 | " export_dir = saved_model_dir,\n",
105 | " inputs = {'input_1:0': model.inputs[0]},\n",
106 | " outputs = {'probs/Softmax:0': model.outputs[0]})"
107 | ]
108 | },
109 | {
110 | "cell_type": "markdown",
111 | "metadata": {},
112 | "source": [
113 | "### Compile models with different batch sizes and cores"
114 | ]
115 | },
116 | {
117 | "cell_type": "code",
118 | "execution_count": 5,
119 | "metadata": {},
120 | "outputs": [],
121 | "source": [
122 | "def compile_inf1_model(saved_model_dir, inf1_model_dir, batch_size=1, num_cores=1, use_static_weights=False):\n",
123 | " print(f'-----------batch size: {batch_size}, num cores: {num_cores}----------')\n",
124 | " print('Compiling...')\n",
125 | " \n",
126 | " compiled_model_dir = f'resnet50_batch_{batch_size}_inf1_cores_{num_cores}'\n",
127 | " inf1_compiled_model_dir = os.path.join(inf1_model_dir, compiled_model_dir)\n",
128 | " shutil.rmtree(inf1_compiled_model_dir, ignore_errors=True)\n",
129 | "\n",
130 | " example_input = np.zeros([batch_size,224,224,3], dtype='float32')\n",
131 | "\n",
132 | " compiler_args = ['--verbose','1', '--num-neuroncores', str(num_cores)]\n",
133 | " if use_static_weights:\n",
134 | " compiler_args.append('--static-weights')\n",
135 | " \n",
136 | " start_time = time.time()\n",
137 | " compiled_res = tfn.saved_model.compile(model_dir = saved_model_dir,\n",
138 | " model_feed_dict={'input_1:0': example_input},\n",
139 | " new_model_dir = inf1_compiled_model_dir,\n",
140 | " dynamic_batch_size=True,\n",
141 | " compiler_workdir=f'./compiler-workdir/{inf1_compiled_model_dir}',\n",
142 | " compiler_args = compiler_args)\n",
143 | " print(f'Compile time: {time.time() - start_time}')\n",
144 | " \n",
145 | " compile_success = False\n",
146 | " perc_on_inf = compiled_res['OnNeuronRatio'] * 100\n",
147 | " if perc_on_inf > 50:\n",
148 | " compile_success = True\n",
149 | " \n",
150 | " print(inf1_compiled_model_dir)\n",
151 | " print(compiled_res)\n",
152 | " print('----------- Done! ----------- \\n')\n",
153 | " \n",
154 | " return compile_success"
155 | ]
156 | },
157 | {
158 | "cell_type": "markdown",
159 | "metadata": {},
160 | "source": [
161 | "### Use `tf.data` to read ImageNet validation dataset"
162 | ]
163 | },
164 | {
165 | "cell_type": "code",
166 | "execution_count": 6,
167 | "metadata": {},
168 | "outputs": [],
169 | "source": [
170 | "def deserialize_image_record(record):\n",
171 | " feature_map = {'image/encoded': tf.io.FixedLenFeature([], tf.string, ''),\n",
172 | " 'image/class/label': tf.io.FixedLenFeature([1], tf.int64, -1),\n",
173 | " 'image/class/text': tf.io.FixedLenFeature([], tf.string, '')}\n",
174 | " obj = tf.io.parse_single_example(serialized=record, features=feature_map)\n",
175 | " imgdata = obj['image/encoded']\n",
176 | " label = tf.cast(obj['image/class/label'], tf.int32) \n",
177 | " label_text = tf.cast(obj['image/class/text'], tf.string) \n",
178 | " return imgdata, label, label_text\n",
179 | "\n",
180 | "def val_preprocessing(record):\n",
181 | " imgdata, label, label_text = deserialize_image_record(record)\n",
182 | " label -= 1\n",
183 | " image = tf.io.decode_jpeg(imgdata, channels=3, \n",
184 | " fancy_upscaling=False, \n",
185 | " dct_method='INTEGER_FAST')\n",
186 | "\n",
187 | " shape = tf.shape(image)\n",
188 | " height = tf.cast(shape[0], tf.float32)\n",
189 | " width = tf.cast(shape[1], tf.float32)\n",
190 | " side = tf.cast(tf.convert_to_tensor(256, dtype=tf.int32), tf.float32)\n",
191 | "\n",
192 | " scale = tf.cond(tf.greater(height, width),\n",
193 | " lambda: side / width,\n",
194 | " lambda: side / height)\n",
195 | " \n",
196 | " new_height = tf.cast(tf.math.rint(height * scale), tf.int32)\n",
197 | " new_width = tf.cast(tf.math.rint(width * scale), tf.int32)\n",
198 | " \n",
199 | " image = tf.image.resize(image, [new_height, new_width], method='bicubic')\n",
200 | " image = tf.image.resize_with_crop_or_pad(image, 224, 224)\n",
201 | " \n",
202 | " image = tf.keras.applications.resnet50.preprocess_input(image)\n",
203 | " \n",
204 | " return image, label, label_text\n",
205 | "\n",
206 | "def get_dataset(batch_size, use_cache=False):\n",
207 | " data_dir = '/home/ubuntu/datasets/*'\n",
208 | " files = tf.io.gfile.glob(os.path.join(data_dir))\n",
209 | " dataset = tf.data.TFRecordDataset(files)\n",
210 | " \n",
211 | " dataset = dataset.map(map_func=val_preprocessing, num_parallel_calls=tf.data.experimental.AUTOTUNE)\n",
212 | " dataset = dataset.batch(batch_size=batch_size)\n",
213 | " dataset = dataset.prefetch(buffer_size=tf.data.experimental.AUTOTUNE)\n",
214 | " dataset = dataset.repeat(count=1)\n",
215 | " \n",
216 | " if use_cache:\n",
217 | " shutil.rmtree('tfdatacache', ignore_errors=True)\n",
218 | " os.mkdir('tfdatacache')\n",
219 | " dataset = dataset.cache(f'./tfdatacache/imagenet_val')\n",
220 | " \n",
221 | " return dataset"
222 | ]
223 | },
224 | {
225 | "cell_type": "markdown",
226 | "metadata": {},
227 | "source": [
228 | "## Single AWS Inferentia chip execution\n",
229 | "* Single core compiled models with automatic data parallel model upto 4 cores\n",
230 | "* Multi-core compiled models for pipeline execution"
231 | ]
232 | },
233 | {
234 | "cell_type": "code",
235 | "execution_count": 7,
236 | "metadata": {
237 | "scrolled": true
238 | },
239 | "outputs": [],
240 | "source": [
241 | "def inf1_predict_benchmark_single_threaded(neuron_saved_model_name, batch_size, user_batch_size, num_cores, use_cache=False, warm_up=10):\n",
242 | " print(f'Running model {neuron_saved_model_name}, user_batch_size: {user_batch_size}\\n')\n",
243 | "\n",
244 | " model_inf1 = tf.contrib.predictor.from_saved_model(neuron_saved_model_name)\n",
245 | "\n",
246 | " iter_times = []\n",
247 | " pred_labels = []\n",
248 | " actual_labels = []\n",
249 | " display_threshold = 0\n",
250 | " warm_up = 10\n",
251 | "\n",
252 | " ds = get_dataset(user_batch_size, use_cache)\n",
253 | "\n",
254 | " ds_iter = ds.make_initializable_iterator()\n",
255 | " ds_next = ds_iter.get_next()\n",
256 | " ds_init_op = ds_iter.initializer\n",
257 | "\n",
258 | " with tf.Session() as sess:\n",
259 | " if use_cache:\n",
260 | " sess.run(ds_init_op)\n",
261 | " print('\\nCaching dataset ...')\n",
262 | " start_time = time.time()\n",
263 | " try:\n",
264 | " while True:\n",
265 | " (validation_ds,label,_) = sess.run(ds_next)\n",
266 | " except tf.errors.OutOfRangeError:\n",
267 | " pass\n",
268 | " print(f'Caching finished: {time.time()-start_time} sec') \n",
269 | "\n",
270 | " try:\n",
271 | " sess.run(ds_init_op)\n",
272 | " counter = 0\n",
273 | " \n",
274 | " display_every = 5000\n",
275 | " display_threshold = display_every\n",
276 | " \n",
277 | " ipname = list(model_inf1.feed_tensors.keys())[0]\n",
278 | " resname = list(model_inf1.fetch_tensors.keys())[0]\n",
279 | " \n",
280 | " walltime_start = time.time()\n",
281 | "\n",
282 | " while True:\n",
283 | " (validation_ds,batch_labels,_) = sess.run(ds_next)\n",
284 | "\n",
285 | " model_feed_dict={ipname: validation_ds}\n",
286 | "\n",
287 | " if counter == 0:\n",
288 | " for i in range(warm_up):\n",
289 | " _ = model_inf1(model_feed_dict); \n",
290 | "\n",
291 | " start_time = time.time()\n",
292 | " inf1_results = model_inf1(model_feed_dict);\n",
293 | " iter_times.append(time.time() - start_time)\n",
294 | " \n",
295 | " actual_labels.extend(label for label_list in batch_labels for label in label_list)\n",
296 | " pred_labels.extend(list(np.argmax(inf1_results[resname], axis=1)))\n",
297 | "\n",
298 | " if counter*user_batch_size >= display_threshold:\n",
299 | " print(f'Images {counter*user_batch_size}/50000. Average i/s {np.mean(user_batch_size/np.array(iter_times[-display_every:]))}')\n",
300 | " display_threshold+=display_every\n",
301 | "\n",
302 | " counter+=1\n",
303 | "\n",
304 | " except tf.errors.OutOfRangeError:\n",
305 | " pass\n",
306 | " \n",
307 | " acc_inf1 = np.sum(np.array(actual_labels) == np.array(pred_labels))/len(actual_labels)\n",
308 | " iter_times = np.array(iter_times)\n",
309 | " \n",
310 | " results = pd.DataFrame(columns = [f'inf1_compiled_batch_size_{batch_size}_compiled_cores_{num_cores}'])\n",
311 | " results.loc['instance_type'] = [requests.get('http://169.254.169.254/latest/meta-data/instance-type').text]\n",
312 | " results.loc['compiled_batch_size'] = [batch_size]\n",
313 | " results.loc['user_batch_size'] = [user_batch_size]\n",
314 | " results.loc['accuracy'] = [acc_inf1]\n",
315 | " results.loc['prediction_time'] = [np.sum(iter_times)]\n",
316 | " results.loc['wall_time'] = [time.time() - walltime_start]\n",
317 | " results.loc['images_per_sec_mean'] = [np.mean(user_batch_size / iter_times)]\n",
318 | " results.loc['images_per_sec_std'] = [np.std(user_batch_size / iter_times, ddof=1)]\n",
319 | " results.loc['latency_mean'] = [np.mean(iter_times) * 1000]\n",
320 | " results.loc['latency_99th_percentile'] = [np.percentile(iter_times, q=99, interpolation=\"lower\") * 1000]\n",
321 | " results.loc['latency_median'] = [np.median(iter_times) * 1000]\n",
322 | " results.loc['latency_min'] = [np.min(iter_times) * 1000]\n",
323 | " display(results.T)\n",
324 | "\n",
325 | " return results, iter_times"
326 | ]
327 | },
328 | {
329 | "cell_type": "code",
330 | "execution_count": 8,
331 | "metadata": {},
332 | "outputs": [
333 | {
334 | "name": "stdout",
335 | "output_type": "stream",
336 | "text": [
337 | "-----------batch size: 1, num cores: 1----------\n",
338 | "Compiling...\n",
339 | "INFO:tensorflow:Restoring parameters from resnet50_saved_model/variables/variables\n",
340 | "INFO:tensorflow:Froze 320 variables.\n",
341 | "INFO:tensorflow:Converted 320 variables to const ops.\n",
342 | "INFO:tensorflow:fusing subgraph neuron_op_d6f098c01c780733 with neuron-cc; log file is at /home/ubuntu/examples/bkp/ai-accelerators-examples/compiler-workdir/resnet50_inf1_saved_models/resnet50_batch_1_inf1_cores_1/neuron_op_d6f098c01c780733/graph_def.neuron-cc.log\n",
343 | "INFO:tensorflow:Number of operations in TensorFlow session: 4647\n",
344 | "INFO:tensorflow:Number of operations after tf.neuron optimizations: 556\n",
345 | "INFO:tensorflow:Number of operations placed on Neuron runtime: 554\n",
346 | "INFO:tensorflow:No assets to save.\n",
347 | "INFO:tensorflow:No assets to write.\n",
348 | "INFO:tensorflow:SavedModel written to: resnet50_inf1_saved_models/resnet50_batch_1_inf1_cores_1/saved_model.pb\n",
349 | "INFO:tensorflow:Successfully converted resnet50_saved_model to resnet50_inf1_saved_models/resnet50_batch_1_inf1_cores_1\n",
350 | "Compile time: 57.83445167541504\n",
351 | "resnet50_inf1_saved_models/resnet50_batch_1_inf1_cores_1\n",
352 | "{'OnNeuronRatio': 0.9964028776978417}\n",
353 | "----------- Done! ----------- \n",
354 | "\n",
355 | "-----------batch size: 5, num cores: 1----------\n",
356 | "Compiling...\n",
357 | "INFO:tensorflow:Restoring parameters from resnet50_saved_model/variables/variables\n",
358 | "INFO:tensorflow:Froze 320 variables.\n",
359 | "INFO:tensorflow:Converted 320 variables to const ops.\n",
360 | "INFO:tensorflow:fusing subgraph neuron_op_d6f098c01c780733 with neuron-cc; log file is at /home/ubuntu/examples/bkp/ai-accelerators-examples/compiler-workdir/resnet50_inf1_saved_models/resnet50_batch_5_inf1_cores_1/neuron_op_d6f098c01c780733/graph_def.neuron-cc.log\n",
361 | "INFO:tensorflow:Number of operations in TensorFlow session: 4647\n",
362 | "INFO:tensorflow:Number of operations after tf.neuron optimizations: 556\n",
363 | "INFO:tensorflow:Number of operations placed on Neuron runtime: 554\n",
364 | "INFO:tensorflow:No assets to save.\n",
365 | "INFO:tensorflow:No assets to write.\n",
366 | "INFO:tensorflow:SavedModel written to: resnet50_inf1_saved_models/resnet50_batch_5_inf1_cores_1/saved_model.pb\n",
367 | "INFO:tensorflow:Successfully converted resnet50_saved_model to resnet50_inf1_saved_models/resnet50_batch_5_inf1_cores_1\n",
368 | "Compile time: 96.24623918533325\n",
369 | "resnet50_inf1_saved_models/resnet50_batch_5_inf1_cores_1\n",
370 | "{'OnNeuronRatio': 0.9964028776978417}\n",
371 | "----------- Done! ----------- \n",
372 | "\n",
373 | "-----------batch size: 1, num cores: 4----------\n",
374 | "Compiling...\n",
375 | "INFO:tensorflow:Restoring parameters from resnet50_saved_model/variables/variables\n",
376 | "INFO:tensorflow:Froze 320 variables.\n",
377 | "INFO:tensorflow:Converted 320 variables to const ops.\n",
378 | "INFO:tensorflow:fusing subgraph neuron_op_d6f098c01c780733 with neuron-cc; log file is at /home/ubuntu/examples/bkp/ai-accelerators-examples/compiler-workdir/resnet50_inf1_saved_models/resnet50_batch_1_inf1_cores_4/neuron_op_d6f098c01c780733/graph_def.neuron-cc.log\n",
379 | "INFO:tensorflow:Number of operations in TensorFlow session: 4647\n",
380 | "INFO:tensorflow:Number of operations after tf.neuron optimizations: 556\n",
381 | "INFO:tensorflow:Number of operations placed on Neuron runtime: 554\n",
382 | "INFO:tensorflow:No assets to save.\n",
383 | "INFO:tensorflow:No assets to write.\n",
384 | "INFO:tensorflow:SavedModel written to: resnet50_inf1_saved_models/resnet50_batch_1_inf1_cores_4/saved_model.pb\n",
385 | "INFO:tensorflow:Successfully converted resnet50_saved_model to resnet50_inf1_saved_models/resnet50_batch_1_inf1_cores_4\n",
386 | "Compile time: 63.46157956123352\n",
387 | "resnet50_inf1_saved_models/resnet50_batch_1_inf1_cores_4\n",
388 | "{'OnNeuronRatio': 0.9964028776978417}\n",
389 | "----------- Done! ----------- \n",
390 | "\n"
391 | ]
392 | },
393 | {
394 | "data": {
395 | "text/plain": [
396 | "True"
397 | ]
398 | },
399 | "execution_count": 8,
400 | "metadata": {},
401 | "output_type": "execute_result"
402 | }
403 | ],
404 | "source": [
405 | "inf1_model_dir = 'resnet50_inf1_saved_models'\n",
406 | "saved_model_dir = 'resnet50_saved_model'\n",
407 | "\n",
408 | "compile_inf1_model(saved_model_dir, inf1_model_dir, batch_size=1, num_cores=1)\n",
409 | "compile_inf1_model(saved_model_dir, inf1_model_dir, batch_size=5, num_cores=1)\n",
410 | "compile_inf1_model(saved_model_dir, inf1_model_dir, batch_size=1, num_cores=4)"
411 | ]
412 | },
413 | {
414 | "cell_type": "code",
415 | "execution_count": 9,
416 | "metadata": {},
417 | "outputs": [
418 | {
419 | "name": "stdout",
420 | "output_type": "stream",
421 | "text": [
422 | "inf1_compiled_model_dir: resnet50_inf1_saved_models/resnet50_batch_1_inf1_cores_1\n",
423 | "Running model resnet50_inf1_saved_models/resnet50_batch_1_inf1_cores_1, user_batch_size: 10\n",
424 | "\n",
425 | "WARNING:tensorflow:\n",
426 | "The TensorFlow contrib module will not be included in TensorFlow 2.0.\n",
427 | "For more information, please see:\n",
428 | " * https://github.com/tensorflow/community/blob/master/rfcs/20180907-contrib-sunset.md\n",
429 | " * https://github.com/tensorflow/addons\n",
430 | " * https://github.com/tensorflow/io (for I/O related ops)\n",
431 | "If you depend on functionality not listed there, please file an issue.\n",
432 | "\n",
433 | "WARNING:tensorflow:From /home/ubuntu/anaconda3/envs/aws_neuron_tensorflow_p36/lib/python3.6/site-packages/tensorflow_core/contrib/predictor/saved_model_predictor.py:153: load (from tensorflow.python.saved_model.loader_impl) is deprecated and will be removed in a future version.\n",
434 | "Instructions for updating:\n",
435 | "This function will only be available through the v1 compatibility library as tf.compat.v1.saved_model.loader.load or tf.compat.v1.saved_model.load. There will be a new function for importing SavedModels in Tensorflow 2.0.\n",
436 | "INFO:tensorflow:Saver not created because there are no variables in the graph to restore\n",
437 | "INFO:tensorflow:The specified SavedModel has no variables; no checkpoints were restored.\n",
438 | "WARNING:tensorflow:From :14: DatasetV1.make_initializable_iterator (from tensorflow.python.data.ops.dataset_ops) is deprecated and will be removed in a future version.\n",
439 | "Instructions for updating:\n",
440 | "Use `for ... in dataset:` to iterate over a dataset. If using `tf.estimator`, return the `Dataset` object directly from your input function. As a last resort, you can use `tf.compat.v1.data.make_initializable_iterator(dataset)`.\n",
441 | "Images 5000/50000. Average i/s 511.9149005447497\n",
442 | "Images 10000/50000. Average i/s 514.1476835875276\n",
443 | "Images 15000/50000. Average i/s 511.55752611295105\n",
444 | "Images 20000/50000. Average i/s 510.6258382445502\n",
445 | "Images 25000/50000. Average i/s 510.6002877210464\n",
446 | "Images 30000/50000. Average i/s 510.33624960724376\n",
447 | "Images 35000/50000. Average i/s 510.23169540573906\n",
448 | "Images 40000/50000. Average i/s 509.62934416741103\n",
449 | "Images 45000/50000. Average i/s 509.851016680137\n"
450 | ]
451 | },
452 | {
453 | "data": {
454 | "text/html": [
455 | "\n",
456 | "\n",
469 | "
\n",
470 | " \n",
471 | " \n",
472 | " | \n",
473 | " instance_type | \n",
474 | " compiled_batch_size | \n",
475 | " user_batch_size | \n",
476 | " accuracy | \n",
477 | " prediction_time | \n",
478 | " wall_time | \n",
479 | " images_per_sec_mean | \n",
480 | " images_per_sec_std | \n",
481 | " latency_mean | \n",
482 | " latency_99th_percentile | \n",
483 | " latency_median | \n",
484 | " latency_min | \n",
485 | "
\n",
486 | " \n",
487 | " \n",
488 | " \n",
489 | " inf1_compiled_batch_size_1_compiled_cores_1 | \n",
490 | " inf1.xlarge | \n",
491 | " 1 | \n",
492 | " 10 | \n",
493 | " 0.74852 | \n",
494 | " 99.4977 | \n",
495 | " 146.44 | \n",
496 | " 509.898 | \n",
497 | " 59.3509 | \n",
498 | " 19.8995 | \n",
499 | " 27.0095 | \n",
500 | " 19.4516 | \n",
501 | " 15.7409 | \n",
502 | "
\n",
503 | " \n",
504 | "
\n",
505 | "
"
506 | ],
507 | "text/plain": [
508 | " instance_type compiled_batch_size \\\n",
509 | "inf1_compiled_batch_size_1_compiled_cores_1 inf1.xlarge 1 \n",
510 | "\n",
511 | " user_batch_size accuracy \\\n",
512 | "inf1_compiled_batch_size_1_compiled_cores_1 10 0.74852 \n",
513 | "\n",
514 | " prediction_time wall_time \\\n",
515 | "inf1_compiled_batch_size_1_compiled_cores_1 99.4977 146.44 \n",
516 | "\n",
517 | " images_per_sec_mean \\\n",
518 | "inf1_compiled_batch_size_1_compiled_cores_1 509.898 \n",
519 | "\n",
520 | " images_per_sec_std latency_mean \\\n",
521 | "inf1_compiled_batch_size_1_compiled_cores_1 59.3509 19.8995 \n",
522 | "\n",
523 | " latency_99th_percentile \\\n",
524 | "inf1_compiled_batch_size_1_compiled_cores_1 27.0095 \n",
525 | "\n",
526 | " latency_median latency_min \n",
527 | "inf1_compiled_batch_size_1_compiled_cores_1 19.4516 15.7409 "
528 | ]
529 | },
530 | "metadata": {},
531 | "output_type": "display_data"
532 | },
533 | {
534 | "name": "stdout",
535 | "output_type": "stream",
536 | "text": [
537 | "inf1_compiled_model_dir: resnet50_inf1_saved_models/resnet50_batch_5_inf1_cores_1\n",
538 | "Running model resnet50_inf1_saved_models/resnet50_batch_5_inf1_cores_1, user_batch_size: 50\n",
539 | "\n",
540 | "INFO:tensorflow:Saver not created because there are no variables in the graph to restore\n",
541 | "INFO:tensorflow:The specified SavedModel has no variables; no checkpoints were restored.\n",
542 | "Images 5000/50000. Average i/s 756.9368826530499\n",
543 | "Images 10000/50000. Average i/s 758.3658209888853\n",
544 | "Images 15000/50000. Average i/s 758.889930748228\n",
545 | "Images 20000/50000. Average i/s 760.4266183337617\n",
546 | "Images 25000/50000. Average i/s 760.8172446072302\n",
547 | "Images 30000/50000. Average i/s 760.2700641837997\n",
548 | "Images 35000/50000. Average i/s 759.5763119031232\n",
549 | "Images 40000/50000. Average i/s 760.0876622928208\n",
550 | "Images 45000/50000. Average i/s 760.6692409949743\n"
551 | ]
552 | },
553 | {
554 | "data": {
555 | "text/html": [
556 | "\n",
557 | "\n",
570 | "
\n",
571 | " \n",
572 | " \n",
573 | " | \n",
574 | " instance_type | \n",
575 | " compiled_batch_size | \n",
576 | " user_batch_size | \n",
577 | " accuracy | \n",
578 | " prediction_time | \n",
579 | " wall_time | \n",
580 | " images_per_sec_mean | \n",
581 | " images_per_sec_std | \n",
582 | " latency_mean | \n",
583 | " latency_99th_percentile | \n",
584 | " latency_median | \n",
585 | " latency_min | \n",
586 | "
\n",
587 | " \n",
588 | " \n",
589 | " \n",
590 | " inf1_compiled_batch_size_5_compiled_cores_1 | \n",
591 | " inf1.xlarge | \n",
592 | " 5 | \n",
593 | " 50 | \n",
594 | " 0.7486 | \n",
595 | " 66.1318 | \n",
596 | " 143.338 | \n",
597 | " 760.852 | \n",
598 | " 59.7863 | \n",
599 | " 66.1318 | \n",
600 | " 80.0529 | \n",
601 | " 65.8345 | \n",
602 | " 56.5889 | \n",
603 | "
\n",
604 | " \n",
605 | "
\n",
606 | "
"
607 | ],
608 | "text/plain": [
609 | " instance_type compiled_batch_size \\\n",
610 | "inf1_compiled_batch_size_5_compiled_cores_1 inf1.xlarge 5 \n",
611 | "\n",
612 | " user_batch_size accuracy \\\n",
613 | "inf1_compiled_batch_size_5_compiled_cores_1 50 0.7486 \n",
614 | "\n",
615 | " prediction_time wall_time \\\n",
616 | "inf1_compiled_batch_size_5_compiled_cores_1 66.1318 143.338 \n",
617 | "\n",
618 | " images_per_sec_mean \\\n",
619 | "inf1_compiled_batch_size_5_compiled_cores_1 760.852 \n",
620 | "\n",
621 | " images_per_sec_std latency_mean \\\n",
622 | "inf1_compiled_batch_size_5_compiled_cores_1 59.7863 66.1318 \n",
623 | "\n",
624 | " latency_99th_percentile \\\n",
625 | "inf1_compiled_batch_size_5_compiled_cores_1 80.0529 \n",
626 | "\n",
627 | " latency_median latency_min \n",
628 | "inf1_compiled_batch_size_5_compiled_cores_1 65.8345 56.5889 "
629 | ]
630 | },
631 | "metadata": {},
632 | "output_type": "display_data"
633 | },
634 | {
635 | "name": "stdout",
636 | "output_type": "stream",
637 | "text": [
638 | "inf1_compiled_model_dir: resnet50_inf1_saved_models/resnet50_batch_1_inf1_cores_4\n",
639 | "Running model resnet50_inf1_saved_models/resnet50_batch_1_inf1_cores_4, user_batch_size: 10\n",
640 | "\n",
641 | "INFO:tensorflow:Saver not created because there are no variables in the graph to restore\n",
642 | "INFO:tensorflow:The specified SavedModel has no variables; no checkpoints were restored.\n",
643 | "Images 5000/50000. Average i/s 361.0220527406529\n",
644 | "Images 10000/50000. Average i/s 359.68171067062457\n",
645 | "Images 15000/50000. Average i/s 358.75999045862824\n",
646 | "Images 20000/50000. Average i/s 358.28821764290626\n",
647 | "Images 25000/50000. Average i/s 357.87710423546616\n",
648 | "Images 30000/50000. Average i/s 358.12412006408044\n",
649 | "Images 35000/50000. Average i/s 358.09136022506436\n",
650 | "Images 40000/50000. Average i/s 357.7498270103637\n",
651 | "Images 45000/50000. Average i/s 357.8152796870437\n"
652 | ]
653 | },
654 | {
655 | "data": {
656 | "text/html": [
657 | "\n",
658 | "\n",
671 | "
\n",
672 | " \n",
673 | " \n",
674 | " | \n",
675 | " instance_type | \n",
676 | " compiled_batch_size | \n",
677 | " user_batch_size | \n",
678 | " accuracy | \n",
679 | " prediction_time | \n",
680 | " wall_time | \n",
681 | " images_per_sec_mean | \n",
682 | " images_per_sec_std | \n",
683 | " latency_mean | \n",
684 | " latency_99th_percentile | \n",
685 | " latency_median | \n",
686 | " latency_min | \n",
687 | "
\n",
688 | " \n",
689 | " \n",
690 | " \n",
691 | " inf1_compiled_batch_size_1_compiled_cores_4 | \n",
692 | " inf1.xlarge | \n",
693 | " 1 | \n",
694 | " 10 | \n",
695 | " 0.749 | \n",
696 | " 140.423 | \n",
697 | " 147.618 | \n",
698 | " 357.974 | \n",
699 | " 25.1203 | \n",
700 | " 28.0846 | \n",
701 | " 34.502 | \n",
702 | " 27.5503 | \n",
703 | " 25.4283 | \n",
704 | "
\n",
705 | " \n",
706 | "
\n",
707 | "
"
708 | ],
709 | "text/plain": [
710 | " instance_type compiled_batch_size \\\n",
711 | "inf1_compiled_batch_size_1_compiled_cores_4 inf1.xlarge 1 \n",
712 | "\n",
713 | " user_batch_size accuracy \\\n",
714 | "inf1_compiled_batch_size_1_compiled_cores_4 10 0.749 \n",
715 | "\n",
716 | " prediction_time wall_time \\\n",
717 | "inf1_compiled_batch_size_1_compiled_cores_4 140.423 147.618 \n",
718 | "\n",
719 | " images_per_sec_mean \\\n",
720 | "inf1_compiled_batch_size_1_compiled_cores_4 357.974 \n",
721 | "\n",
722 | " images_per_sec_std latency_mean \\\n",
723 | "inf1_compiled_batch_size_1_compiled_cores_4 25.1203 28.0846 \n",
724 | "\n",
725 | " latency_99th_percentile \\\n",
726 | "inf1_compiled_batch_size_1_compiled_cores_4 34.502 \n",
727 | "\n",
728 | " latency_median latency_min \n",
729 | "inf1_compiled_batch_size_1_compiled_cores_4 27.5503 25.4283 "
730 | ]
731 | },
732 | "metadata": {},
733 | "output_type": "display_data"
734 | },
735 | {
736 | "data": {
737 | "text/html": [
738 | "\n",
739 | "\n",
752 | "
\n",
753 | " \n",
754 | " \n",
755 | " | \n",
756 | " inf1_compiled_batch_size_1_compiled_cores_1 | \n",
757 | " inf1_compiled_batch_size_5_compiled_cores_1 | \n",
758 | " inf1_compiled_batch_size_1_compiled_cores_4 | \n",
759 | "
\n",
760 | " \n",
761 | " \n",
762 | " \n",
763 | " instance_type | \n",
764 | " inf1.xlarge | \n",
765 | " inf1.xlarge | \n",
766 | " inf1.xlarge | \n",
767 | "
\n",
768 | " \n",
769 | " compiled_batch_size | \n",
770 | " 1 | \n",
771 | " 5 | \n",
772 | " 1 | \n",
773 | "
\n",
774 | " \n",
775 | " user_batch_size | \n",
776 | " 10 | \n",
777 | " 50 | \n",
778 | " 10 | \n",
779 | "
\n",
780 | " \n",
781 | " accuracy | \n",
782 | " 0.74852 | \n",
783 | " 0.7486 | \n",
784 | " 0.749 | \n",
785 | "
\n",
786 | " \n",
787 | " prediction_time | \n",
788 | " 99.4977 | \n",
789 | " 66.1318 | \n",
790 | " 140.423 | \n",
791 | "
\n",
792 | " \n",
793 | " wall_time | \n",
794 | " 146.44 | \n",
795 | " 143.338 | \n",
796 | " 147.618 | \n",
797 | "
\n",
798 | " \n",
799 | " images_per_sec_mean | \n",
800 | " 509.898 | \n",
801 | " 760.852 | \n",
802 | " 357.974 | \n",
803 | "
\n",
804 | " \n",
805 | " images_per_sec_std | \n",
806 | " 59.3509 | \n",
807 | " 59.7863 | \n",
808 | " 25.1203 | \n",
809 | "
\n",
810 | " \n",
811 | " latency_mean | \n",
812 | " 19.8995 | \n",
813 | " 66.1318 | \n",
814 | " 28.0846 | \n",
815 | "
\n",
816 | " \n",
817 | " latency_99th_percentile | \n",
818 | " 27.0095 | \n",
819 | " 80.0529 | \n",
820 | " 34.502 | \n",
821 | "
\n",
822 | " \n",
823 | " latency_median | \n",
824 | " 19.4516 | \n",
825 | " 65.8345 | \n",
826 | " 27.5503 | \n",
827 | "
\n",
828 | " \n",
829 | " latency_min | \n",
830 | " 15.7409 | \n",
831 | " 56.5889 | \n",
832 | " 25.4283 | \n",
833 | "
\n",
834 | " \n",
835 | "
\n",
836 | "
"
837 | ],
838 | "text/plain": [
839 | " inf1_compiled_batch_size_1_compiled_cores_1 \\\n",
840 | "instance_type inf1.xlarge \n",
841 | "compiled_batch_size 1 \n",
842 | "user_batch_size 10 \n",
843 | "accuracy 0.74852 \n",
844 | "prediction_time 99.4977 \n",
845 | "wall_time 146.44 \n",
846 | "images_per_sec_mean 509.898 \n",
847 | "images_per_sec_std 59.3509 \n",
848 | "latency_mean 19.8995 \n",
849 | "latency_99th_percentile 27.0095 \n",
850 | "latency_median 19.4516 \n",
851 | "latency_min 15.7409 \n",
852 | "\n",
853 | " inf1_compiled_batch_size_5_compiled_cores_1 \\\n",
854 | "instance_type inf1.xlarge \n",
855 | "compiled_batch_size 5 \n",
856 | "user_batch_size 50 \n",
857 | "accuracy 0.7486 \n",
858 | "prediction_time 66.1318 \n",
859 | "wall_time 143.338 \n",
860 | "images_per_sec_mean 760.852 \n",
861 | "images_per_sec_std 59.7863 \n",
862 | "latency_mean 66.1318 \n",
863 | "latency_99th_percentile 80.0529 \n",
864 | "latency_median 65.8345 \n",
865 | "latency_min 56.5889 \n",
866 | "\n",
867 | " inf1_compiled_batch_size_1_compiled_cores_4 \n",
868 | "instance_type inf1.xlarge \n",
869 | "compiled_batch_size 1 \n",
870 | "user_batch_size 10 \n",
871 | "accuracy 0.749 \n",
872 | "prediction_time 140.423 \n",
873 | "wall_time 147.618 \n",
874 | "images_per_sec_mean 357.974 \n",
875 | "images_per_sec_std 25.1203 \n",
876 | "latency_mean 28.0846 \n",
877 | "latency_99th_percentile 34.502 \n",
878 | "latency_median 27.5503 \n",
879 | "latency_min 25.4283 "
880 | ]
881 | },
882 | "metadata": {},
883 | "output_type": "display_data"
884 | }
885 | ],
886 | "source": [
887 | "inf1_model_dir = 'resnet50_inf1_saved_models'\n",
888 | "\n",
889 | "compile_options = [{'batch_size': 1, 'num_cores': 1},\n",
890 | " {'batch_size': 5, 'num_cores': 1},\n",
891 | " {'batch_size': 1, 'num_cores': 4}]\n",
892 | "\n",
893 | "iter_ds = pd.DataFrame()\n",
894 | "results = pd.DataFrame()\n",
895 | "\n",
896 | "for opt in compile_options:\n",
897 | " batch_size = opt[\"batch_size\"]\n",
898 | " num_cores = opt[\"num_cores\"]\n",
899 | " compiled_model_dir = f'resnet50_batch_{batch_size}_inf1_cores_{num_cores}'\n",
900 | " inf1_compiled_model_dir = os.path.join(inf1_model_dir, compiled_model_dir)\n",
901 | " \n",
902 | " print(f'inf1_compiled_model_dir: {inf1_compiled_model_dir}')\n",
903 | " col_name = lambda opt: f'inf1_{batch_size}_multicores_{num_cores}'\n",
904 | " \n",
905 | " res, iter_times = inf1_predict_benchmark_single_threaded(inf1_compiled_model_dir,\n",
906 | " batch_size = batch_size,\n",
907 | " user_batch_size = batch_size*10,\n",
908 | " num_cores = num_cores,\n",
909 | " use_cache=False, \n",
910 | " warm_up=10)\n",
911 | " \n",
912 | " iter_ds = pd.concat([iter_ds, pd.DataFrame(iter_times, columns=[col_name(opt)])], axis=1)\n",
913 | " results = pd.concat([results, res], axis=1)\n",
914 | " \n",
915 | "display(results)"
916 | ]
917 | },
918 | {
919 | "cell_type": "code",
920 | "execution_count": null,
921 | "metadata": {},
922 | "outputs": [],
923 | "source": []
924 | }
925 | ],
926 | "metadata": {
927 | "kernelspec": {
928 | "display_name": "Environment (conda_aws_neuron_tensorflow_p36)",
929 | "language": "python",
930 | "name": "conda_aws_neuron_tensorflow_p36"
931 | },
932 | "language_info": {
933 | "codemirror_mode": {
934 | "name": "ipython",
935 | "version": 3
936 | },
937 | "file_extension": ".py",
938 | "mimetype": "text/x-python",
939 | "name": "python",
940 | "nbconvert_exporter": "python",
941 | "pygments_lexer": "ipython3",
942 | "version": "3.6.11"
943 | }
944 | },
945 | "nbformat": 4,
946 | "nbformat_minor": 4
947 | }
948 |
--------------------------------------------------------------------------------
/kitten.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/shashankprasanna/ai-accelerators-examples/c8c9c828e9876161cacf3b165811f814c52f6d3a/kitten.jpg
--------------------------------------------------------------------------------
/sagemaker-tf-cpu-gpu-ei-resnet50.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# Amazon SageMaker inference deployment to CPUs, GPUs, and EI\n",
8 | "This example demonstrates Amazon SageMaker inference deployment using SageMaker SDK\n",
9 | "\n",
10 | "This example was tested on Amazon SageMaker Studio Notebook\n",
11 | "Run this notebook using the following Amazon SageMaker Studio conda environment:\n",
12 | "`TensorFlow 2 CPU Optimized`"
13 | ]
14 | },
15 | {
16 | "cell_type": "code",
17 | "execution_count": 1,
18 | "metadata": {},
19 | "outputs": [],
20 | "source": [
21 | "# !pip install --upgrade pip -q\n",
22 | "# !pip install --upgrade sagemaker -q"
23 | ]
24 | },
25 | {
26 | "cell_type": "code",
27 | "execution_count": 2,
28 | "metadata": {},
29 | "outputs": [
30 | {
31 | "name": "stdout",
32 | "output_type": "stream",
33 | "text": [
34 | "sagemaker version: 2.15.1\n",
35 | "tensorflow version: 2.1.0\n"
36 | ]
37 | }
38 | ],
39 | "source": [
40 | "import tarfile\n",
41 | "import sagemaker\n",
42 | "import tensorflow as tf\n",
43 | "import tensorflow.keras as keras\n",
44 | "import shutil\n",
45 | "import os\n",
46 | "import time\n",
47 | "from tensorflow.keras.applications.resnet50 import ResNet50\n",
48 | "\n",
49 | "role = sagemaker.get_execution_role()\n",
50 | "sess = sagemaker.Session()\n",
51 | "region = sess.boto_region_name\n",
52 | "bucket = sess.default_bucket()\n",
53 | "print('sagemaker version: '+sagemaker.__version__)\n",
54 | "print('tensorflow version: '+tf.__version__)"
55 | ]
56 | },
57 | {
58 | "cell_type": "code",
59 | "execution_count": 3,
60 | "metadata": {},
61 | "outputs": [],
62 | "source": [
63 | "def load_save_resnet50_model(model_path):\n",
64 | " model = ResNet50(weights='imagenet')\n",
65 | " shutil.rmtree(model_path, ignore_errors=True)\n",
66 | " model.save(model_path, include_optimizer=False, save_format='tf')\n",
67 | "\n",
68 | "saved_model_dir = 'resnet50_saved_model' \n",
69 | "model_ver = '1'\n",
70 | "model_path = os.path.join(saved_model_dir, model_ver)\n",
71 | "\n",
72 | "# load_save_resnet50_model(model_path)"
73 | ]
74 | },
75 | {
76 | "cell_type": "code",
77 | "execution_count": 4,
78 | "metadata": {},
79 | "outputs": [
80 | {
81 | "name": "stdout",
82 | "output_type": "stream",
83 | "text": [
84 | "./\n",
85 | "./1/\n",
86 | "./1/variables/\n",
87 | "./1/variables/variables.data-00000-of-00001\n",
88 | "./1/variables/variables.index\n",
89 | "./1/saved_model.pb\n",
90 | "./1/assets/\n"
91 | ]
92 | }
93 | ],
94 | "source": [
95 | "shutil.rmtree('model.tar.gz', ignore_errors=True)\n",
96 | "!tar cvfz model.tar.gz -C resnet50_saved_model ."
97 | ]
98 | },
99 | {
100 | "cell_type": "code",
101 | "execution_count": 5,
102 | "metadata": {},
103 | "outputs": [],
104 | "source": [
105 | "from sagemaker.tensorflow.model import TensorFlowModel, TensorFlowPredictor\n",
106 | "\n",
107 | "prefix = 'keras_models'\n",
108 | "s3_model_path = sess.upload_data(path='model.tar.gz', key_prefix=prefix)\n",
109 | "\n",
110 | "model = TensorFlowModel(model_data=s3_model_path, \n",
111 | " framework_version='1.15',\n",
112 | " role=role,\n",
113 | " predictor_cls = TensorFlowPredictor,\n",
114 | " sagemaker_session=sess)"
115 | ]
116 | },
117 | {
118 | "cell_type": "markdown",
119 | "metadata": {},
120 | "source": [
121 | "### Deploy to CPU instance"
122 | ]
123 | },
124 | {
125 | "cell_type": "code",
126 | "execution_count": 6,
127 | "metadata": {},
128 | "outputs": [
129 | {
130 | "name": "stderr",
131 | "output_type": "stream",
132 | "text": [
133 | "update_endpoint is a no-op in sagemaker>=2.\n",
134 | "See: https://sagemaker.readthedocs.io/en/stable/v2.html for details.\n"
135 | ]
136 | },
137 | {
138 | "name": "stdout",
139 | "output_type": "stream",
140 | "text": [
141 | "-------------!"
142 | ]
143 | }
144 | ],
145 | "source": [
146 | "predictor_cpu = model.deploy(initial_instance_count=1, \n",
147 | " instance_type='ml.c5.xlarge')"
148 | ]
149 | },
150 | {
151 | "cell_type": "markdown",
152 | "metadata": {},
153 | "source": [
154 | "### Deploy using EI"
155 | ]
156 | },
157 | {
158 | "cell_type": "code",
159 | "execution_count": 7,
160 | "metadata": {},
161 | "outputs": [
162 | {
163 | "name": "stderr",
164 | "output_type": "stream",
165 | "text": [
166 | "update_endpoint is a no-op in sagemaker>=2.\n",
167 | "See: https://sagemaker.readthedocs.io/en/stable/v2.html for details.\n"
168 | ]
169 | },
170 | {
171 | "name": "stdout",
172 | "output_type": "stream",
173 | "text": [
174 | "-------------!"
175 | ]
176 | }
177 | ],
178 | "source": [
179 | "predictor_ei = model.deploy(initial_instance_count=1, \n",
180 | " instance_type='ml.c5.xlarge',\n",
181 | " accelerator_type='ml.eia2.large')"
182 | ]
183 | },
184 | {
185 | "cell_type": "markdown",
186 | "metadata": {},
187 | "source": [
188 | "### Deploy to GPU instance"
189 | ]
190 | },
191 | {
192 | "cell_type": "code",
193 | "execution_count": 8,
194 | "metadata": {},
195 | "outputs": [
196 | {
197 | "name": "stderr",
198 | "output_type": "stream",
199 | "text": [
200 | "update_endpoint is a no-op in sagemaker>=2.\n",
201 | "See: https://sagemaker.readthedocs.io/en/stable/v2.html for details.\n"
202 | ]
203 | },
204 | {
205 | "name": "stdout",
206 | "output_type": "stream",
207 | "text": [
208 | "-------------!"
209 | ]
210 | }
211 | ],
212 | "source": [
213 | "predictor_gpu = model.deploy(initial_instance_count=1, \n",
214 | " instance_type='ml.g4dn.xlarge')"
215 | ]
216 | },
217 | {
218 | "cell_type": "markdown",
219 | "metadata": {},
220 | "source": [
221 | "### Test endpoint"
222 | ]
223 | },
224 | {
225 | "cell_type": "code",
226 | "execution_count": 9,
227 | "metadata": {},
228 | "outputs": [],
229 | "source": [
230 | "## If you have an existing endpoint, create a predictor using the endpoint name\n",
231 | "\n",
232 | "# from sagemaker.tensorflow.model import TensorFlowPredictor\n",
233 | "# predictor = TensorFlowPredictor('ENDPOINT_NAME',\n",
234 | "# sagemaker_session=sess)"
235 | ]
236 | },
237 | {
238 | "cell_type": "code",
239 | "execution_count": 10,
240 | "metadata": {},
241 | "outputs": [],
242 | "source": [
243 | "def image_preprocess(img, reps=1):\n",
244 | " img = np.asarray(img.resize((224, 224)))\n",
245 | " img = np.stack([img]*reps)\n",
246 | " img = tf.keras.applications.resnet50.preprocess_input(img)\n",
247 | " return img"
248 | ]
249 | },
250 | {
251 | "cell_type": "code",
252 | "execution_count": 11,
253 | "metadata": {},
254 | "outputs": [],
255 | "source": [
256 | "from PIL import Image \n",
257 | "import numpy as np\n",
258 | "import json\n",
259 | "\n",
260 | "img= Image.open('kitten.jpg')\n",
261 | "img = image_preprocess(img, 5)"
262 | ]
263 | },
264 | {
265 | "cell_type": "markdown",
266 | "metadata": {},
267 | "source": [
268 | "### Invoke CPU endpoint"
269 | ]
270 | },
271 | {
272 | "cell_type": "code",
273 | "execution_count": 12,
274 | "metadata": {},
275 | "outputs": [
276 | {
277 | "data": {
278 | "text/plain": [
279 | "[[('n02123159', 'tiger_cat', 0.495739877),\n",
280 | " ('n02123045', 'tabby', 0.434538245),\n",
281 | " ('n02124075', 'Egyptian_cat', 0.0492461845),\n",
282 | " ('n02127052', 'lynx', 0.0143557377),\n",
283 | " ('n02128385', 'leopard', 0.00133766234)]]"
284 | ]
285 | },
286 | "execution_count": 12,
287 | "metadata": {},
288 | "output_type": "execute_result"
289 | }
290 | ],
291 | "source": [
292 | "response = predictor_cpu.predict(data=img)\n",
293 | "probs = np.array(response['predictions'][0])\n",
294 | "tf.keras.applications.resnet.decode_predictions(np.expand_dims(probs, axis=0), top=5)"
295 | ]
296 | },
297 | {
298 | "cell_type": "markdown",
299 | "metadata": {},
300 | "source": [
301 | "### Invoke CPU Instance + EI endpoint"
302 | ]
303 | },
304 | {
305 | "cell_type": "code",
306 | "execution_count": 13,
307 | "metadata": {},
308 | "outputs": [
309 | {
310 | "data": {
311 | "text/plain": [
312 | "[[('n02123159', 'tiger_cat', 0.495739),\n",
313 | " ('n02123045', 'tabby', 0.434539199),\n",
314 | " ('n02124075', 'Egyptian_cat', 0.0492460541),\n",
315 | " ('n02127052', 'lynx', 0.0143557545),\n",
316 | " ('n02128385', 'leopard', 0.00133766781)]]"
317 | ]
318 | },
319 | "execution_count": 13,
320 | "metadata": {},
321 | "output_type": "execute_result"
322 | }
323 | ],
324 | "source": [
325 | "response = predictor_ei.predict(data=img)\n",
326 | "probs = np.array(response['predictions'][0])\n",
327 | "tf.keras.applications.resnet.decode_predictions(np.expand_dims(probs, axis=0), top=5)"
328 | ]
329 | },
330 | {
331 | "cell_type": "markdown",
332 | "metadata": {},
333 | "source": [
334 | "### Invoke G4 GPU Instance with NVIDIA T4 endpoint"
335 | ]
336 | },
337 | {
338 | "cell_type": "code",
339 | "execution_count": 14,
340 | "metadata": {},
341 | "outputs": [
342 | {
343 | "data": {
344 | "text/plain": [
345 | "[[('n02123159', 'tiger_cat', 0.495739311),\n",
346 | " ('n02123045', 'tabby', 0.434538603),\n",
347 | " ('n02124075', 'Egyptian_cat', 0.0492461771),\n",
348 | " ('n02127052', 'lynx', 0.0143557768),\n",
349 | " ('n02128385', 'leopard', 0.00133766851)]]"
350 | ]
351 | },
352 | "execution_count": 14,
353 | "metadata": {},
354 | "output_type": "execute_result"
355 | }
356 | ],
357 | "source": [
358 | "response = predictor_gpu.predict(data=img)\n",
359 | "probs = np.array(response['predictions'][0])\n",
360 | "tf.keras.applications.resnet.decode_predictions(np.expand_dims(probs, axis=0), top=5)"
361 | ]
362 | }
363 | ],
364 | "metadata": {
365 | "instance_type": "ml.t3.medium",
366 | "kernelspec": {
367 | "display_name": "Python 3 (TensorFlow 2 CPU Optimized)",
368 | "language": "python",
369 | "name": "python3__SAGEMAKER_INTERNAL__arn:aws:sagemaker:us-west-2:236514542706:image/tensorflow-2.1-cpu-py36"
370 | },
371 | "language_info": {
372 | "codemirror_mode": {
373 | "name": "ipython",
374 | "version": 3
375 | },
376 | "file_extension": ".py",
377 | "mimetype": "text/x-python",
378 | "name": "python",
379 | "nbconvert_exporter": "python",
380 | "pygments_lexer": "ipython3",
381 | "version": "3.6.9"
382 | }
383 | },
384 | "nbformat": 4,
385 | "nbformat_minor": 4
386 | }
387 |
--------------------------------------------------------------------------------