├── .gitignore
├── CODE_OF_CONDUCT.md
├── CONTRIBUTING.md
├── LICENSE
├── README.md
├── app.py
├── cdk.json
├── inference
    ├── Dockerfile
    ├── sentiment.py
    └── summarization.py
├── requirements.txt
└── serverless-hugging-face.png


/.gitignore:
--------------------------------------------------------------------------------
  1 | 
  2 | # Created by https://www.gitignore.io/api/osx,linux,python,windows,pycharm,visualstudiocode,node
  3 | # Edit at https://www.gitignore.io/?templates=osx,linux,python,windows,pycharm,visualstudiocode,node
  4 | 
  5 | ### Linux ###
  6 | *~
  7 | 
  8 | # temporary files which can be created if a process still has a handle open of a deleted file
  9 | .fuse_hidden*
 10 | 
 11 | # KDE directory preferences
 12 | .directory
 13 | 
 14 | # Linux trash folder which might appear on any partition or disk
 15 | .Trash-*
 16 | 
 17 | # .nfs files are created when an open file is removed but is still being accessed
 18 | .nfs*
 19 | 
 20 | ### Node ###
 21 | # Logs
 22 | logs
 23 | *.log
 24 | npm-debug.log*
 25 | yarn-debug.log*
 26 | yarn-error.log*
 27 | lerna-debug.log*
 28 | 
 29 | # Diagnostic reports (https://nodejs.org/api/report.html)
 30 | report.[0-9]*.[0-9]*.[0-9]*.[0-9]*.json
 31 | 
 32 | # Runtime data
 33 | pids
 34 | *.pid
 35 | *.seed
 36 | *.pid.lock
 37 | 
 38 | # Directory for instrumented libs generated by jscoverage/JSCover
 39 | lib-cov
 40 | 
 41 | # Coverage directory used by tools like istanbul
 42 | coverage
 43 | *.lcov
 44 | 
 45 | # nyc test coverage
 46 | .nyc_output
 47 | 
 48 | # Grunt intermediate storage (https://gruntjs.com/creating-plugins#storing-task-files)
 49 | .grunt
 50 | 
 51 | # Bower dependency directory (https://bower.io/)
 52 | bower_components
 53 | 
 54 | # node-waf configuration
 55 | .lock-wscript
 56 | 
 57 | # Compiled binary addons (https://nodejs.org/api/addons.html)
 58 | build/Release
 59 | 
 60 | # Dependency directories
 61 | node_modules/
 62 | jspm_packages/
 63 | 
 64 | # TypeScript v1 declaration files
 65 | typings/
 66 | 
 67 | # TypeScript cache
 68 | *.tsbuildinfo
 69 | 
 70 | # Optional npm cache directory
 71 | .npm
 72 | 
 73 | # Optional eslint cache
 74 | .eslintcache
 75 | 
 76 | # Optional REPL history
 77 | .node_repl_history
 78 | 
 79 | # Output of 'npm pack'
 80 | *.tgz
 81 | 
 82 | # Yarn Integrity file
 83 | .yarn-integrity
 84 | 
 85 | # dotenv environment variables file
 86 | .env
 87 | .env.test
 88 | 
 89 | # parcel-bundler cache (https://parceljs.org/)
 90 | .cache
 91 | 
 92 | # next.js build output
 93 | .next
 94 | 
 95 | # nuxt.js build output
 96 | .nuxt
 97 | 
 98 | # vuepress build output
 99 | .vuepress/dist
100 | 
101 | # Serverless directories
102 | .serverless/
103 | 
104 | # FuseBox cache
105 | .fusebox/
106 | 
107 | # DynamoDB Local files
108 | .dynamodb/
109 | 
110 | ### OSX ###
111 | # General
112 | .DS_Store
113 | .AppleDouble
114 | .LSOverride
115 | 
116 | # Icon must end with two \r
117 | Icon
118 | 
119 | # Thumbnails
120 | ._*
121 | 
122 | # Files that might appear in the root of a volume
123 | .DocumentRevisions-V100
124 | .fseventsd
125 | .Spotlight-V100
126 | .TemporaryItems
127 | .Trashes
128 | .VolumeIcon.icns
129 | .com.apple.timemachine.donotpresent
130 | 
131 | # Directories potentially created on remote AFP share
132 | .AppleDB
133 | .AppleDesktop
134 | Network Trash Folder
135 | Temporary Items
136 | .apdisk
137 | 
138 | ### PyCharm ###
139 | # Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio and WebStorm
140 | # Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839
141 | 
142 | # User-specific stuff
143 | .idea/**/workspace.xml
144 | .idea/**/tasks.xml
145 | .idea/**/usage.statistics.xml
146 | .idea/**/dictionaries
147 | .idea/**/shelf
148 | 
149 | # Generated files
150 | .idea/**/contentModel.xml
151 | 
152 | # Sensitive or high-churn files
153 | .idea/**/dataSources/
154 | .idea/**/dataSources.ids
155 | .idea/**/dataSources.local.xml
156 | .idea/**/sqlDataSources.xml
157 | .idea/**/dynamic.xml
158 | .idea/**/uiDesigner.xml
159 | .idea/**/dbnavigator.xml
160 | 
161 | # Gradle
162 | .idea/**/gradle.xml
163 | .idea/**/libraries
164 | 
165 | # Gradle and Maven with auto-import
166 | # When using Gradle or Maven with auto-import, you should exclude module files,
167 | # since they will be recreated, and may cause churn.  Uncomment if using
168 | # auto-import.
169 | .idea/*.xml
170 | .idea/*.iml
171 | .idea
172 | # .idea/modules
173 | # *.iml
174 | # *.ipr
175 | 
176 | # CMake
177 | cmake-build-*/
178 | 
179 | # Mongo Explorer plugin
180 | .idea/**/mongoSettings.xml
181 | 
182 | # File-based project format
183 | *.iws
184 | 
185 | # IntelliJ
186 | out/
187 | 
188 | # mpeltonen/sbt-idea plugin
189 | .idea_modules/
190 | 
191 | # JIRA plugin
192 | atlassian-ide-plugin.xml
193 | 
194 | # Cursive Clojure plugin
195 | .idea/replstate.xml
196 | 
197 | # Crashlytics plugin (for Android Studio and IntelliJ)
198 | com_crashlytics_export_strings.xml
199 | crashlytics.properties
200 | crashlytics-build.properties
201 | fabric.properties
202 | 
203 | # Editor-based Rest Client
204 | .idea/httpRequests
205 | 
206 | # Android studio 3.1+ serialized cache file
207 | .idea/caches/build_file_checksums.ser
208 | 
209 | ### PyCharm Patch ###
210 | # Comment Reason: https://github.com/joeblau/gitignore.io/issues/186#issuecomment-215987721
211 | 
212 | # *.iml
213 | # modules.xml
214 | # .idea/misc.xml
215 | # *.ipr
216 | 
217 | # Sonarlint plugin
218 | .idea/sonarlint
219 | 
220 | ### Python ###
221 | # Byte-compiled / optimized / DLL files
222 | __pycache__/
223 | *.py[cod]
224 | *$py.class
225 | 
226 | # C extensions
227 | *.so
228 | 
229 | # Distribution / packaging
230 | .Python
231 | build/
232 | develop-eggs/
233 | dist/
234 | downloads/
235 | eggs/
236 | .eggs/
237 | lib64/
238 | parts/
239 | sdist/
240 | var/
241 | wheels/
242 | pip-wheel-metadata/
243 | share/python-wheels/
244 | *.egg-info/
245 | .installed.cfg
246 | *.egg
247 | MANIFEST
248 | 
249 | # PyInstaller
250 | #  Usually these files are written by a python script from a template
251 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
252 | *.manifest
253 | *.spec
254 | 
255 | # Installer logs
256 | pip-log.txt
257 | pip-delete-this-directory.txt
258 | 
259 | # Unit test / coverage reports
260 | htmlcov/
261 | .tox/
262 | .nox/
263 | .coverage
264 | .coverage.*
265 | nosetests.xml
266 | coverage.xml
267 | *.cover
268 | .hypothesis/
269 | .pytest_cache/
270 | 
271 | # Translations
272 | *.mo
273 | *.pot
274 | 
275 | # Django stuff:
276 | local_settings.py
277 | db.sqlite3
278 | db.sqlite3-journal
279 | 
280 | # Flask stuff:
281 | instance/
282 | .webassets-cache
283 | 
284 | # Scrapy stuff:
285 | .scrapy
286 | 
287 | # Sphinx documentation
288 | docs/_build/
289 | 
290 | # PyBuilder
291 | target/
292 | 
293 | # Jupyter Notebook
294 | .ipynb_checkpoints
295 | 
296 | # IPython
297 | profile_default/
298 | ipython_config.py
299 | 
300 | # pyenv
301 | .python-version
302 | 
303 | # pipenv
304 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
305 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
306 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
307 | #   install all needed dependencies.
308 | #Pipfile.lock
309 | 
310 | # celery beat schedule file
311 | celerybeat-schedule
312 | 
313 | # SageMath parsed files
314 | *.sage.py
315 | 
316 | # Environments
317 | .venv
318 | env/
319 | venv/
320 | ENV/
321 | env.bak/
322 | venv.bak/
323 | 
324 | # Spyder project settings
325 | .spyderproject
326 | .spyproject
327 | 
328 | # Rope project settings
329 | .ropeproject
330 | 
331 | # mkdocs documentation
332 | /site
333 | 
334 | # mypy
335 | .mypy_cache/
336 | .dmypy.json
337 | dmypy.json
338 | 
339 | # Pyre type checker
340 | .pyre/
341 | 
342 | ### VisualStudioCode ###
343 | .vscode
344 | 
345 | ### VisualStudioCode Patch ###
346 | # Ignore all local history of files
347 | .history
348 | 
349 | ### Windows ###
350 | # Windows thumbnail cache files
351 | Thumbs.db
352 | Thumbs.db:encryptable
353 | ehthumbs.db
354 | ehthumbs_vista.db
355 | 
356 | # Dump file
357 | *.stackdump
358 | 
359 | # Folder config file
360 | [Dd]esktop.ini
361 | 
362 | # Recycle Bin used on file shares
363 | $RECYCLE.BIN/
364 | 
365 | # Windows Installer files
366 | *.cab
367 | *.msi
368 | *.msix
369 | *.msm
370 | *.msp
371 | 
372 | # Windows shortcuts
373 | *.lnk
374 | 
375 | # End of https://www.gitignore.io/api/osx,linux,python,windows,pycharm,visualstudiocode,node
376 | 
377 | ### CDK-specific ignores ###
378 | *.swp
379 | cdk.context.json
380 | package-lock.json
381 | yarn.lock
382 | .cdk.staging
383 | cdk.out


--------------------------------------------------------------------------------
/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
1 | ## Code of Conduct
2 | This project has adopted the [Amazon Open Source Code of Conduct](https://aws.github.io/code-of-conduct).
3 | For more information see the [Code of Conduct FAQ](https://aws.github.io/code-of-conduct-faq) or contact
4 | opensource-codeofconduct@amazon.com with any additional questions or comments.
5 | 


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
 1 | # Contributing Guidelines
 2 | 
 3 | Thank you for your interest in contributing to our project. Whether it's a bug report, new feature, correction, or additional
 4 | documentation, we greatly value feedback and contributions from our community.
 5 | 
 6 | Please read through this document before submitting any issues or pull requests to ensure we have all the necessary
 7 | information to effectively respond to your bug report or contribution.
 8 | 
 9 | 
10 | ## Reporting Bugs/Feature Requests
11 | 
12 | We welcome you to use the GitHub issue tracker to report bugs or suggest features.
13 | 
14 | When filing an issue, please check existing open, or recently closed, issues to make sure somebody else hasn't already
15 | reported the issue. Please try to include as much information as you can. Details like these are incredibly useful:
16 | 
17 | * A reproducible test case or series of steps
18 | * The version of our code being used
19 | * Any modifications you've made relevant to the bug
20 | * Anything unusual about your environment or deployment
21 | 
22 | 
23 | ## Contributing via Pull Requests
24 | Contributions via pull requests are much appreciated. Before sending us a pull request, please ensure that:
25 | 
26 | 1. You are working against the latest source on the *main* branch.
27 | 2. You check existing open, and recently merged, pull requests to make sure someone else hasn't addressed the problem already.
28 | 3. You open an issue to discuss any significant work - we would hate for your time to be wasted.
29 | 
30 | To send us a pull request, please:
31 | 
32 | 1. Fork the repository.
33 | 2. Modify the source; please focus on the specific change you are contributing. If you also reformat all the code, it will be hard for us to focus on your change.
34 | 3. Ensure local tests pass.
35 | 4. Commit to your fork using clear commit messages.
36 | 5. Send us a pull request, answering any default questions in the pull request interface.
37 | 6. Pay attention to any automated CI failures reported in the pull request, and stay involved in the conversation.
38 | 
39 | GitHub provides additional document on [forking a repository](https://help.github.com/articles/fork-a-repo/) and
40 | [creating a pull request](https://help.github.com/articles/creating-a-pull-request/).
41 | 
42 | 
43 | ## Finding contributions to work on
44 | Looking at the existing issues is a great way to find something to contribute on. As our projects, by default, use the default GitHub issue labels (enhancement/bug/duplicate/help wanted/invalid/question/wontfix), looking at any 'help wanted' issues is a great place to start.
45 | 
46 | 
47 | ## Code of Conduct
48 | This project has adopted the [Amazon Open Source Code of Conduct](https://aws.github.io/code-of-conduct).
49 | For more information see the [Code of Conduct FAQ](https://aws.github.io/code-of-conduct-faq) or contact
50 | opensource-codeofconduct@amazon.com with any additional questions or comments.
51 | 
52 | 
53 | ## Security issue notifications
54 | If you discover a potential security issue in this project we ask that you notify AWS/Amazon Security via our [vulnerability reporting page](http://aws.amazon.com/security/vulnerability-reporting/). Please do **not** create a public github issue.
55 | 
56 | 
57 | ## Licensing
58 | 
59 | See the [LICENSE](LICENSE) file for our project's licensing. We will ask you to confirm the licensing of your contribution.
60 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT No Attribution
 2 | 
 3 | Copyright 2021 Amazon.com, Inc. or its affiliates. All Rights Reserved.
 4 | SPDX-License-Identifier: MIT-0
 5 | 
 6 | Permission is hereby granted, free of charge, to any person obtaining a copy of this
 7 | software and associated documentation files (the "Software"), to deal in the Software
 8 | without restriction, including without limitation the rights to use, copy, modify,
 9 | merge, publish, distribute, sublicense, and/or sell copies of the Software, and to
10 | permit persons to whom the Software is furnished to do so.
11 | 
12 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
13 | INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
14 | PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
15 | HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
16 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
17 | SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Zero administration inference with AWS Lambda for :hugs:
  2 | 
  3 | ### Note: This is not production code and simply meant as a demo
  4 | 
  5 | [Hugging Face](https://huggingface.co/) Transformers is a popular
  6 | open-source project that provides pre-trained, natural language
  7 | processing (NLP) models for a wide variety of use cases. Customers with
  8 | minimal machine learning experience can use pre-trained models to
  9 | enhance their applications quickly using NLP. This includes tasks such
 10 | as text classification, language translation, summarization, and
 11 | question answering - to name a few.
 12 | 
 13 | ## Overview
 14 | 
 15 | Our solution consists of an [AWS Cloud Development
 16 | Kit](https://aws.amazon.com/cdk/) (AWS CDK) script that automatically
 17 | provisions container image-based Lambda functions that perform ML
 18 | inference using pre-trained Hugging Face models. This solution also
 19 | includes [Amazon Elastic File System](https://aws.amazon.com/efs/) (EFS)
 20 | storage that is attached to the Lambda functions to cache the
 21 | pre-trained models and reduce inference latency.
 22 | 
 23 | ![Architecture diagram](serverless-hugging-face.png)
 24 | In this architectural diagram:
 25 | 1.  Serverless inference is achieved by using Lambda functions that are
 26 |     based on container image
 27 | 2.  The container image is stored in an [Amazon Elastic Container
 28 |     Registry](https://aws.amazon.com/ecr/) (ECR) repository within your
 29 |     account
 30 | 3.  Pre-trained models are automatically downloaded from Hugging Face
 31 |     the first time the function is invoked
 32 | 4.  Pre-trained models are cached within Amazon Elastic File System
 33 |     storage in order to improve inference latency
 34 | 
 35 | The solution includes Python scripts for two common NLP use cases:
 36 | -   Sentiment analysis: Identifying if a sentence indicates positive or negative sentiment. It uses a fine-tuned model on sst2, which is a [GLUE task](https://arxiv.org/abs/1804.07461).
 37 | -   Summarization: Summarizing a body of text into a shorter, representative text. It uses a [Bart](https://arxiv.org/abs/1910.13461) [model](https://huggingface.co/transformers/model_doc/bart.html) that was fine-tuned on the [CNN / Daily Mail dataset](https://github.com/abisee/cnn-dailymail).
 38 | For simplicity, both of these use cases are implemented using Hugging
 39 | Face
 40 | [pipelines](https://huggingface.co/transformers/main_classes/pipelines.html).
 41 | 
 42 | ## Prerequisites
 43 | The following is required to run this example:
 44 | -   [git](https://git-scm.com/)
 45 | -   [AWS CDK v2](https://docs.aws.amazon.com/cdk/latest/guide/getting_started.html)
 46 | -   [Python](https://www.python.org/) 3.6+
 47 | -   [A virtual env](https://docs.python.org/3/library/venv.html#module-venv) (optional)
 48 | 
 49 | ## Deploying the example application
 50 | 1.  Clone the project to your development environment:
 51 | ```bash
 52 | git clone <https://github.com/aws-samples/zero-administration-inference-with-aws-lambda-for-hugging-face.git>
 53 | ```
 54 | 2.  Install the required dependencies:
 55 | ```bash
 56 | pip install -r requirements.txt
 57 | ```
 58 | 3.  Bootstrap the CDK. This command provisions the initial resources
 59 |     needed by the CDK to perform deployments:
 60 | ```bash
 61 | cdk bootstrap
 62 | ```
 63 | 4.  This command deploys the CDK application to its environment. During
 64 |     the deployment, the toolkit outputs progress indications:
 65 | ```bash
 66 | cdk deploy
 67 | ```
 68 | ## Understanding the code structure
 69 | The code is organized using the following structure:
 70 | ```bash
 71 | ├── inference
 72 | │   ├── Dockerfile
 73 | │   ├── sentiment.py
 74 | │   └── summarization.py
 75 | ├── app.py
 76 | └── ...
 77 | ```
 78 | 
 79 | The ```inference``` directory contains:
 80 | -   The ```Dockerfile``` used to build a custom image to be able to run PyTorch Hugging Face inference using Lambda functions
 81 | -   The Python scripts that perform the actual ML inference
 82 | 
 83 | The ```sentiment.py``` script shows how to use a Hugging Face Transformers
 84 | model:
 85 | 
 86 | ```python
 87 | import json
 88 | from transformers import pipeline
 89 | 
 90 | nlp = pipeline("sentiment-analysis")
 91 | 
 92 | def handler(event, context):
 93 | response = {
 94 |     "statusCode": 200,
 95 |     "body": nlp(event['text'])[0]
 96 | }
 97 | return response
 98 | ```
 99 | For each Python script in the inference directory, the CDK generates a
100 | Lambda function backed by a container image and a Python inference
101 | script.
102 | 
103 | ## CDK script
104 | The CDK script is named ```app.py``` in the solution's repository. The
105 | beginning of the script creates a virtual private cloud (VPC).
106 | ```python
107 | vpc = ec2.Vpc(self, 'Vpc', max_azs=2)
108 | ```
109 | Next, it creates the EFS file system and an access point in EFS for the
110 | cached model:
111 | ```python
112 | fs = efs.FileSystem(self, 'FileSystem',
113 | vpc=vpc,
114 | removal_policy=RemovalPolicy.DESTROY)
115 | access_point = fs.add_access_point('MLAccessPoint',
116 | create_acl=efs.Acl(
117 | owner_gid='1001', owner_uid='1001', permissions='750'),
118 | path="/export/models",
119 | posix_user=efs.PosixUser(gid="1001", uid="1001"))
120 | ```
121 | It iterates through the Python files in the inference directory:
122 | ```python
123 | docker_folder = os.path.dirname(os.path.realpath(__file__)) + "/inference"
124 | pathlist = Path(docker_folder).rglob('*.py')
125 | for path in pathlist:
126 | ```
127 | And then creates the Lambda function that serves the inference requests:
128 | ```python
129 | base = os.path.basename(path)
130 | filename = os.path.splitext(base)[0]
131 | # Lambda Function from docker image
132 | function = lambda_.DockerImageFunction(
133 |     self, filename,
134 |     code=lambda_.DockerImageCode.from_image_asset(docker_folder,
135 |     cmd=[filename+".handler"]),
136 |     memory_size=8096,
137 |     timeout=Duration.seconds(600),
138 |     vpc=vpc,
139 |     filesystem=lambda_.FileSystem.from_efs_access_point(
140 |     access_point, '/mnt/hf_models_cache'),
141 |     environment={
142 |         "TRANSFORMERS_CACHE": "/mnt/hf_models_cache"},
143 |     )
144 | ```
145 | 
146 | ## Adding a translator 
147 | Optionally, you can add more models by adding Python scripts in the
148 | inference directory. For example, add the following code in a file
149 | called ```translate-en2fr.py```:
150 | ```python
151 | import json
152 | from transformers
153 | import pipeline
154 | 
155 | en_fr_translator = pipeline('translation_en_to_fr')
156 | 
157 | def handler(event, context):
158 |     response = {
159 |         "statusCode": 200,
160 |         "body": en_fr_translator(event['text'])[0]
161 |     }
162 |     return response
163 | ```
164 | Then run:
165 | ```bash
166 | $ cdk synth
167 | $ cdk deploy
168 | ```
169 | This creates a new endpoint to perform English to French translation.
170 | 
171 | ## Cleaning up
172 | After you are finished experimenting with this project, run ```cdk destroy``` to remove all of the associated infrastructure.
173 | 
174 | ## License
175 | This library is licensed under the MIT No Attribution License. See the [LICENSE](LICENSE) file.
176 | Disclaimer: Deploying the demo applications contained in this repository will potentially cause your AWS Account to be billed for services.
177 | 
178 | ## Links
179 | - [:hugs:](https://huggingface.co)
180 | - [AWS Cloud Development Kit](https://aws.amazon.com/cdk/)
181 | - [Amazon Elastic Container Registry](https://aws.amazon.com/ecr/)
182 | - [AWS Lambda](https://aws.amazon.com/lambda/)
183 | - [Amazon Elastic File System](https://aws.amazon.com/efs/)


--------------------------------------------------------------------------------
/app.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
 3 | SPDX-License-Identifier: MIT-0
 4 | """
 5 | 
 6 | import os
 7 | from pathlib import Path
 8 | from aws_cdk import (
 9 |     aws_lambda as lambda_,
10 |     aws_efs as efs,
11 |     aws_ec2 as ec2
12 | )
13 | from aws_cdk import App, Stack, Duration, RemovalPolicy, Tags
14 | 
15 | from constructs import Construct
16 | 
17 | class ServerlessHuggingFaceStack(Stack):
18 |     def __init__(self, scope: Construct, id: str, **kwargs) -> None:
19 |         super().__init__(scope, id, **kwargs)
20 | 
21 |         # EFS needs to be setup in a VPC
22 |         vpc = ec2.Vpc(self, 'Vpc', max_azs=2)
23 | 
24 |         # creates a file system in EFS to store cache models
25 |         fs = efs.FileSystem(self, 'FileSystem',
26 |                             vpc=vpc,
27 |                             removal_policy=RemovalPolicy.DESTROY)
28 |         access_point = fs.add_access_point('MLAccessPoint',
29 |                                            create_acl=efs.Acl(
30 |                                                owner_gid='1001', owner_uid='1001', permissions='750'),
31 |                                            path="/export/models",
32 |                                            posix_user=efs.PosixUser(gid="1001", uid="1001"))
33 | 
34 |         # %%
35 |         # iterates through the Python files in the docker directory
36 |         docker_folder = os.path.dirname(os.path.realpath(__file__)) + "/inference"
37 |         pathlist = Path(docker_folder).rglob('*.py')
38 |         for path in pathlist:
39 |             base = os.path.basename(path)
40 |             filename = os.path.splitext(base)[0]
41 |             # Lambda Function from docker image
42 |             lambda_.DockerImageFunction(
43 |                 self, filename,
44 |                 code=lambda_.DockerImageCode.from_image_asset(docker_folder,
45 |                                                               cmd=[
46 |                                                                   filename+".handler"]
47 |                                                               ),
48 |                 memory_size=8096,
49 |                 timeout=Duration.seconds(600),
50 |                 vpc=vpc,
51 |                 filesystem=lambda_.FileSystem.from_efs_access_point(access_point, '/mnt/hf_models_cache'),
52 |                 environment={"TRANSFORMERS_CACHE": "/mnt/hf_models_cache"},
53 |             )
54 | 
55 | app = App()
56 | 
57 | stack = ServerlessHuggingFaceStack(app, "ServerlessHuggingFaceStack")
58 | Tags.of(stack).add("AwsSample", "ServerlessHuggingFace")
59 | 
60 | app.synth()


--------------------------------------------------------------------------------
/cdk.json:
--------------------------------------------------------------------------------
1 | {
2 |   "app": "python3.9 app.py",
3 |   "context": {
4 |       "aws-cdk:enableDiffNoFail": "true",
5 |       "@aws-cdk/core:stackRelativeExports": "true"
6 |   }
7 | }


--------------------------------------------------------------------------------
/inference/Dockerfile:
--------------------------------------------------------------------------------
 1 | ARG FUNCTION_DIR="/function/"
 2 | 
 3 | FROM huggingface/transformers-pytorch-cpu as build-image
 4 | 
 5 | 
 6 | # Include global arg in this stage of the build
 7 | ARG FUNCTION_DIR
 8 | 
 9 | # Install aws-lambda-cpp build dependencies
10 | RUN apt-get update && \
11 |   apt-get install -y \
12 |   g++ \
13 |   make \
14 |   cmake \
15 |   unzip \
16 |   libcurl4-openssl-dev
17 | 
18 | 
19 | # Create function directory
20 | RUN mkdir -p ${FUNCTION_DIR}
21 | 
22 | # Copy handler function
23 | COPY *.py ${FUNCTION_DIR}
24 | 
25 | # Install the function's dependencies
26 | RUN pip uninstall --yes jupyter
27 | RUN pip install --target ${FUNCTION_DIR} awslambdaric
28 | RUN pip install --target ${FUNCTION_DIR} sentencepiece protobuf
29 | 
30 | FROM huggingface/transformers-pytorch-cpu
31 | 
32 | # Include global arg in this stage of the build
33 | ARG FUNCTION_DIR
34 | # Set working directory to function root directory
35 | WORKDIR ${FUNCTION_DIR}
36 | 
37 | # Copy in the built dependencies
38 | COPY --from=build-image ${FUNCTION_DIR} ${FUNCTION_DIR}
39 | 
40 | ENTRYPOINT [ "python3", "-m", "awslambdaric" ]
41 | 
42 | # This will get replaced by the proper handler by the CDK script
43 | CMD [ "sentiment.handler" ]
44 | 


--------------------------------------------------------------------------------
/inference/sentiment.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
 3 | SPDX-License-Identifier: MIT-0
 4 | """
 5 | 
 6 | import json
 7 | from transformers import pipeline
 8 | 
 9 | nlp = pipeline("sentiment-analysis")
10 | 
11 | def handler(event, context):
12 |     response = {
13 |         "statusCode": 200,
14 |         "body": nlp(event['text'])[0]
15 |     }
16 |     return response


--------------------------------------------------------------------------------
/inference/summarization.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
 3 | SPDX-License-Identifier: MIT-0
 4 | """
 5 | 
 6 | import json
 7 | from transformers import pipeline
 8 | 
 9 | summarizer = pipeline("summarization")
10 | 
11 | def handler(event, context):
12 |     response = {
13 |         "statusCode": 200,
14 |         "body": summarizer(event['article'])[0]
15 |     }
16 |     return response


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | aws-cdk-lib>=2.0.0
2 | constructs>=10.0.0


--------------------------------------------------------------------------------
/serverless-hugging-face.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/zero-administration-inference-with-aws-lambda-for-hugging-face/fbd730f606f40f8baebf330422ad52531af904e4/serverless-hugging-face.png


--------------------------------------------------------------------------------