├── .gitignore ├── CODE_OF_CONDUCT.md ├── CONTRIBUTING.md ├── LICENSE ├── README.md ├── app.py ├── cdk.json ├── inference ├── Dockerfile ├── sentiment.py └── summarization.py ├── requirements.txt └── serverless-hugging-face.png /.gitignore: -------------------------------------------------------------------------------- 1 | 2 | # Created by https://www.gitignore.io/api/osx,linux,python,windows,pycharm,visualstudiocode,node 3 | # Edit at https://www.gitignore.io/?templates=osx,linux,python,windows,pycharm,visualstudiocode,node 4 | 5 | ### Linux ### 6 | *~ 7 | 8 | # temporary files which can be created if a process still has a handle open of a deleted file 9 | .fuse_hidden* 10 | 11 | # KDE directory preferences 12 | .directory 13 | 14 | # Linux trash folder which might appear on any partition or disk 15 | .Trash-* 16 | 17 | # .nfs files are created when an open file is removed but is still being accessed 18 | .nfs* 19 | 20 | ### Node ### 21 | # Logs 22 | logs 23 | *.log 24 | npm-debug.log* 25 | yarn-debug.log* 26 | yarn-error.log* 27 | lerna-debug.log* 28 | 29 | # Diagnostic reports (https://nodejs.org/api/report.html) 30 | report.[0-9]*.[0-9]*.[0-9]*.[0-9]*.json 31 | 32 | # Runtime data 33 | pids 34 | *.pid 35 | *.seed 36 | *.pid.lock 37 | 38 | # Directory for instrumented libs generated by jscoverage/JSCover 39 | lib-cov 40 | 41 | # Coverage directory used by tools like istanbul 42 | coverage 43 | *.lcov 44 | 45 | # nyc test coverage 46 | .nyc_output 47 | 48 | # Grunt intermediate storage (https://gruntjs.com/creating-plugins#storing-task-files) 49 | .grunt 50 | 51 | # Bower dependency directory (https://bower.io/) 52 | bower_components 53 | 54 | # node-waf configuration 55 | .lock-wscript 56 | 57 | # Compiled binary addons (https://nodejs.org/api/addons.html) 58 | build/Release 59 | 60 | # Dependency directories 61 | node_modules/ 62 | jspm_packages/ 63 | 64 | # TypeScript v1 declaration files 65 | typings/ 66 | 67 | # TypeScript cache 68 | *.tsbuildinfo 69 | 70 | # Optional npm cache directory 71 | .npm 72 | 73 | # Optional eslint cache 74 | .eslintcache 75 | 76 | # Optional REPL history 77 | .node_repl_history 78 | 79 | # Output of 'npm pack' 80 | *.tgz 81 | 82 | # Yarn Integrity file 83 | .yarn-integrity 84 | 85 | # dotenv environment variables file 86 | .env 87 | .env.test 88 | 89 | # parcel-bundler cache (https://parceljs.org/) 90 | .cache 91 | 92 | # next.js build output 93 | .next 94 | 95 | # nuxt.js build output 96 | .nuxt 97 | 98 | # vuepress build output 99 | .vuepress/dist 100 | 101 | # Serverless directories 102 | .serverless/ 103 | 104 | # FuseBox cache 105 | .fusebox/ 106 | 107 | # DynamoDB Local files 108 | .dynamodb/ 109 | 110 | ### OSX ### 111 | # General 112 | .DS_Store 113 | .AppleDouble 114 | .LSOverride 115 | 116 | # Icon must end with two \r 117 | Icon 118 | 119 | # Thumbnails 120 | ._* 121 | 122 | # Files that might appear in the root of a volume 123 | .DocumentRevisions-V100 124 | .fseventsd 125 | .Spotlight-V100 126 | .TemporaryItems 127 | .Trashes 128 | .VolumeIcon.icns 129 | .com.apple.timemachine.donotpresent 130 | 131 | # Directories potentially created on remote AFP share 132 | .AppleDB 133 | .AppleDesktop 134 | Network Trash Folder 135 | Temporary Items 136 | .apdisk 137 | 138 | ### PyCharm ### 139 | # Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio and WebStorm 140 | # Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839 141 | 142 | # User-specific stuff 143 | .idea/**/workspace.xml 144 | .idea/**/tasks.xml 145 | .idea/**/usage.statistics.xml 146 | .idea/**/dictionaries 147 | .idea/**/shelf 148 | 149 | # Generated files 150 | .idea/**/contentModel.xml 151 | 152 | # Sensitive or high-churn files 153 | .idea/**/dataSources/ 154 | .idea/**/dataSources.ids 155 | .idea/**/dataSources.local.xml 156 | .idea/**/sqlDataSources.xml 157 | .idea/**/dynamic.xml 158 | .idea/**/uiDesigner.xml 159 | .idea/**/dbnavigator.xml 160 | 161 | # Gradle 162 | .idea/**/gradle.xml 163 | .idea/**/libraries 164 | 165 | # Gradle and Maven with auto-import 166 | # When using Gradle or Maven with auto-import, you should exclude module files, 167 | # since they will be recreated, and may cause churn. Uncomment if using 168 | # auto-import. 169 | .idea/*.xml 170 | .idea/*.iml 171 | .idea 172 | # .idea/modules 173 | # *.iml 174 | # *.ipr 175 | 176 | # CMake 177 | cmake-build-*/ 178 | 179 | # Mongo Explorer plugin 180 | .idea/**/mongoSettings.xml 181 | 182 | # File-based project format 183 | *.iws 184 | 185 | # IntelliJ 186 | out/ 187 | 188 | # mpeltonen/sbt-idea plugin 189 | .idea_modules/ 190 | 191 | # JIRA plugin 192 | atlassian-ide-plugin.xml 193 | 194 | # Cursive Clojure plugin 195 | .idea/replstate.xml 196 | 197 | # Crashlytics plugin (for Android Studio and IntelliJ) 198 | com_crashlytics_export_strings.xml 199 | crashlytics.properties 200 | crashlytics-build.properties 201 | fabric.properties 202 | 203 | # Editor-based Rest Client 204 | .idea/httpRequests 205 | 206 | # Android studio 3.1+ serialized cache file 207 | .idea/caches/build_file_checksums.ser 208 | 209 | ### PyCharm Patch ### 210 | # Comment Reason: https://github.com/joeblau/gitignore.io/issues/186#issuecomment-215987721 211 | 212 | # *.iml 213 | # modules.xml 214 | # .idea/misc.xml 215 | # *.ipr 216 | 217 | # Sonarlint plugin 218 | .idea/sonarlint 219 | 220 | ### Python ### 221 | # Byte-compiled / optimized / DLL files 222 | __pycache__/ 223 | *.py[cod] 224 | *$py.class 225 | 226 | # C extensions 227 | *.so 228 | 229 | # Distribution / packaging 230 | .Python 231 | build/ 232 | develop-eggs/ 233 | dist/ 234 | downloads/ 235 | eggs/ 236 | .eggs/ 237 | lib64/ 238 | parts/ 239 | sdist/ 240 | var/ 241 | wheels/ 242 | pip-wheel-metadata/ 243 | share/python-wheels/ 244 | *.egg-info/ 245 | .installed.cfg 246 | *.egg 247 | MANIFEST 248 | 249 | # PyInstaller 250 | # Usually these files are written by a python script from a template 251 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 252 | *.manifest 253 | *.spec 254 | 255 | # Installer logs 256 | pip-log.txt 257 | pip-delete-this-directory.txt 258 | 259 | # Unit test / coverage reports 260 | htmlcov/ 261 | .tox/ 262 | .nox/ 263 | .coverage 264 | .coverage.* 265 | nosetests.xml 266 | coverage.xml 267 | *.cover 268 | .hypothesis/ 269 | .pytest_cache/ 270 | 271 | # Translations 272 | *.mo 273 | *.pot 274 | 275 | # Django stuff: 276 | local_settings.py 277 | db.sqlite3 278 | db.sqlite3-journal 279 | 280 | # Flask stuff: 281 | instance/ 282 | .webassets-cache 283 | 284 | # Scrapy stuff: 285 | .scrapy 286 | 287 | # Sphinx documentation 288 | docs/_build/ 289 | 290 | # PyBuilder 291 | target/ 292 | 293 | # Jupyter Notebook 294 | .ipynb_checkpoints 295 | 296 | # IPython 297 | profile_default/ 298 | ipython_config.py 299 | 300 | # pyenv 301 | .python-version 302 | 303 | # pipenv 304 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 305 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 306 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 307 | # install all needed dependencies. 308 | #Pipfile.lock 309 | 310 | # celery beat schedule file 311 | celerybeat-schedule 312 | 313 | # SageMath parsed files 314 | *.sage.py 315 | 316 | # Environments 317 | .venv 318 | env/ 319 | venv/ 320 | ENV/ 321 | env.bak/ 322 | venv.bak/ 323 | 324 | # Spyder project settings 325 | .spyderproject 326 | .spyproject 327 | 328 | # Rope project settings 329 | .ropeproject 330 | 331 | # mkdocs documentation 332 | /site 333 | 334 | # mypy 335 | .mypy_cache/ 336 | .dmypy.json 337 | dmypy.json 338 | 339 | # Pyre type checker 340 | .pyre/ 341 | 342 | ### VisualStudioCode ### 343 | .vscode 344 | 345 | ### VisualStudioCode Patch ### 346 | # Ignore all local history of files 347 | .history 348 | 349 | ### Windows ### 350 | # Windows thumbnail cache files 351 | Thumbs.db 352 | Thumbs.db:encryptable 353 | ehthumbs.db 354 | ehthumbs_vista.db 355 | 356 | # Dump file 357 | *.stackdump 358 | 359 | # Folder config file 360 | [Dd]esktop.ini 361 | 362 | # Recycle Bin used on file shares 363 | $RECYCLE.BIN/ 364 | 365 | # Windows Installer files 366 | *.cab 367 | *.msi 368 | *.msix 369 | *.msm 370 | *.msp 371 | 372 | # Windows shortcuts 373 | *.lnk 374 | 375 | # End of https://www.gitignore.io/api/osx,linux,python,windows,pycharm,visualstudiocode,node 376 | 377 | ### CDK-specific ignores ### 378 | *.swp 379 | cdk.context.json 380 | package-lock.json 381 | yarn.lock 382 | .cdk.staging 383 | cdk.out -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | ## Code of Conduct 2 | This project has adopted the [Amazon Open Source Code of Conduct](https://aws.github.io/code-of-conduct). 3 | For more information see the [Code of Conduct FAQ](https://aws.github.io/code-of-conduct-faq) or contact 4 | opensource-codeofconduct@amazon.com with any additional questions or comments. 5 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing Guidelines 2 | 3 | Thank you for your interest in contributing to our project. Whether it's a bug report, new feature, correction, or additional 4 | documentation, we greatly value feedback and contributions from our community. 5 | 6 | Please read through this document before submitting any issues or pull requests to ensure we have all the necessary 7 | information to effectively respond to your bug report or contribution. 8 | 9 | 10 | ## Reporting Bugs/Feature Requests 11 | 12 | We welcome you to use the GitHub issue tracker to report bugs or suggest features. 13 | 14 | When filing an issue, please check existing open, or recently closed, issues to make sure somebody else hasn't already 15 | reported the issue. Please try to include as much information as you can. Details like these are incredibly useful: 16 | 17 | * A reproducible test case or series of steps 18 | * The version of our code being used 19 | * Any modifications you've made relevant to the bug 20 | * Anything unusual about your environment or deployment 21 | 22 | 23 | ## Contributing via Pull Requests 24 | Contributions via pull requests are much appreciated. Before sending us a pull request, please ensure that: 25 | 26 | 1. You are working against the latest source on the *main* branch. 27 | 2. You check existing open, and recently merged, pull requests to make sure someone else hasn't addressed the problem already. 28 | 3. You open an issue to discuss any significant work - we would hate for your time to be wasted. 29 | 30 | To send us a pull request, please: 31 | 32 | 1. Fork the repository. 33 | 2. Modify the source; please focus on the specific change you are contributing. If you also reformat all the code, it will be hard for us to focus on your change. 34 | 3. Ensure local tests pass. 35 | 4. Commit to your fork using clear commit messages. 36 | 5. Send us a pull request, answering any default questions in the pull request interface. 37 | 6. Pay attention to any automated CI failures reported in the pull request, and stay involved in the conversation. 38 | 39 | GitHub provides additional document on [forking a repository](https://help.github.com/articles/fork-a-repo/) and 40 | [creating a pull request](https://help.github.com/articles/creating-a-pull-request/). 41 | 42 | 43 | ## Finding contributions to work on 44 | Looking at the existing issues is a great way to find something to contribute on. As our projects, by default, use the default GitHub issue labels (enhancement/bug/duplicate/help wanted/invalid/question/wontfix), looking at any 'help wanted' issues is a great place to start. 45 | 46 | 47 | ## Code of Conduct 48 | This project has adopted the [Amazon Open Source Code of Conduct](https://aws.github.io/code-of-conduct). 49 | For more information see the [Code of Conduct FAQ](https://aws.github.io/code-of-conduct-faq) or contact 50 | opensource-codeofconduct@amazon.com with any additional questions or comments. 51 | 52 | 53 | ## Security issue notifications 54 | If you discover a potential security issue in this project we ask that you notify AWS/Amazon Security via our [vulnerability reporting page](http://aws.amazon.com/security/vulnerability-reporting/). Please do **not** create a public github issue. 55 | 56 | 57 | ## Licensing 58 | 59 | See the [LICENSE](LICENSE) file for our project's licensing. We will ask you to confirm the licensing of your contribution. 60 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT No Attribution 2 | 3 | Copyright 2021 Amazon.com, Inc. or its affiliates. All Rights Reserved. 4 | SPDX-License-Identifier: MIT-0 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy of this 7 | software and associated documentation files (the "Software"), to deal in the Software 8 | without restriction, including without limitation the rights to use, copy, modify, 9 | merge, publish, distribute, sublicense, and/or sell copies of the Software, and to 10 | permit persons to whom the Software is furnished to do so. 11 | 12 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, 13 | INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A 14 | PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT 15 | HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 16 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 17 | SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Zero administration inference with AWS Lambda for :hugs: 2 | 3 | ### Note: This is not production code and simply meant as a demo 4 | 5 | [Hugging Face](https://huggingface.co/) Transformers is a popular 6 | open-source project that provides pre-trained, natural language 7 | processing (NLP) models for a wide variety of use cases. Customers with 8 | minimal machine learning experience can use pre-trained models to 9 | enhance their applications quickly using NLP. This includes tasks such 10 | as text classification, language translation, summarization, and 11 | question answering - to name a few. 12 | 13 | ## Overview 14 | 15 | Our solution consists of an [AWS Cloud Development 16 | Kit](https://aws.amazon.com/cdk/) (AWS CDK) script that automatically 17 | provisions container image-based Lambda functions that perform ML 18 | inference using pre-trained Hugging Face models. This solution also 19 | includes [Amazon Elastic File System](https://aws.amazon.com/efs/) (EFS) 20 | storage that is attached to the Lambda functions to cache the 21 | pre-trained models and reduce inference latency. 22 | 23 | ![Architecture diagram](serverless-hugging-face.png) 24 | In this architectural diagram: 25 | 1. Serverless inference is achieved by using Lambda functions that are 26 | based on container image 27 | 2. The container image is stored in an [Amazon Elastic Container 28 | Registry](https://aws.amazon.com/ecr/) (ECR) repository within your 29 | account 30 | 3. Pre-trained models are automatically downloaded from Hugging Face 31 | the first time the function is invoked 32 | 4. Pre-trained models are cached within Amazon Elastic File System 33 | storage in order to improve inference latency 34 | 35 | The solution includes Python scripts for two common NLP use cases: 36 | - Sentiment analysis: Identifying if a sentence indicates positive or negative sentiment. It uses a fine-tuned model on sst2, which is a [GLUE task](https://arxiv.org/abs/1804.07461). 37 | - Summarization: Summarizing a body of text into a shorter, representative text. It uses a [Bart](https://arxiv.org/abs/1910.13461) [model](https://huggingface.co/transformers/model_doc/bart.html) that was fine-tuned on the [CNN / Daily Mail dataset](https://github.com/abisee/cnn-dailymail). 38 | For simplicity, both of these use cases are implemented using Hugging 39 | Face 40 | [pipelines](https://huggingface.co/transformers/main_classes/pipelines.html). 41 | 42 | ## Prerequisites 43 | The following is required to run this example: 44 | - [git](https://git-scm.com/) 45 | - [AWS CDK v2](https://docs.aws.amazon.com/cdk/latest/guide/getting_started.html) 46 | - [Python](https://www.python.org/) 3.6+ 47 | - [A virtual env](https://docs.python.org/3/library/venv.html#module-venv) (optional) 48 | 49 | ## Deploying the example application 50 | 1. Clone the project to your development environment: 51 | ```bash 52 | git clone 53 | ``` 54 | 2. Install the required dependencies: 55 | ```bash 56 | pip install -r requirements.txt 57 | ``` 58 | 3. Bootstrap the CDK. This command provisions the initial resources 59 | needed by the CDK to perform deployments: 60 | ```bash 61 | cdk bootstrap 62 | ``` 63 | 4. This command deploys the CDK application to its environment. During 64 | the deployment, the toolkit outputs progress indications: 65 | ```bash 66 | cdk deploy 67 | ``` 68 | ## Understanding the code structure 69 | The code is organized using the following structure: 70 | ```bash 71 | ├── inference 72 | │ ├── Dockerfile 73 | │ ├── sentiment.py 74 | │ └── summarization.py 75 | ├── app.py 76 | └── ... 77 | ``` 78 | 79 | The ```inference``` directory contains: 80 | - The ```Dockerfile``` used to build a custom image to be able to run PyTorch Hugging Face inference using Lambda functions 81 | - The Python scripts that perform the actual ML inference 82 | 83 | The ```sentiment.py``` script shows how to use a Hugging Face Transformers 84 | model: 85 | 86 | ```python 87 | import json 88 | from transformers import pipeline 89 | 90 | nlp = pipeline("sentiment-analysis") 91 | 92 | def handler(event, context): 93 | response = { 94 | "statusCode": 200, 95 | "body": nlp(event['text'])[0] 96 | } 97 | return response 98 | ``` 99 | For each Python script in the inference directory, the CDK generates a 100 | Lambda function backed by a container image and a Python inference 101 | script. 102 | 103 | ## CDK script 104 | The CDK script is named ```app.py``` in the solution's repository. The 105 | beginning of the script creates a virtual private cloud (VPC). 106 | ```python 107 | vpc = ec2.Vpc(self, 'Vpc', max_azs=2) 108 | ``` 109 | Next, it creates the EFS file system and an access point in EFS for the 110 | cached model: 111 | ```python 112 | fs = efs.FileSystem(self, 'FileSystem', 113 | vpc=vpc, 114 | removal_policy=RemovalPolicy.DESTROY) 115 | access_point = fs.add_access_point('MLAccessPoint', 116 | create_acl=efs.Acl( 117 | owner_gid='1001', owner_uid='1001', permissions='750'), 118 | path="/export/models", 119 | posix_user=efs.PosixUser(gid="1001", uid="1001")) 120 | ``` 121 | It iterates through the Python files in the inference directory: 122 | ```python 123 | docker_folder = os.path.dirname(os.path.realpath(__file__)) + "/inference" 124 | pathlist = Path(docker_folder).rglob('*.py') 125 | for path in pathlist: 126 | ``` 127 | And then creates the Lambda function that serves the inference requests: 128 | ```python 129 | base = os.path.basename(path) 130 | filename = os.path.splitext(base)[0] 131 | # Lambda Function from docker image 132 | function = lambda_.DockerImageFunction( 133 | self, filename, 134 | code=lambda_.DockerImageCode.from_image_asset(docker_folder, 135 | cmd=[filename+".handler"]), 136 | memory_size=8096, 137 | timeout=Duration.seconds(600), 138 | vpc=vpc, 139 | filesystem=lambda_.FileSystem.from_efs_access_point( 140 | access_point, '/mnt/hf_models_cache'), 141 | environment={ 142 | "TRANSFORMERS_CACHE": "/mnt/hf_models_cache"}, 143 | ) 144 | ``` 145 | 146 | ## Adding a translator 147 | Optionally, you can add more models by adding Python scripts in the 148 | inference directory. For example, add the following code in a file 149 | called ```translate-en2fr.py```: 150 | ```python 151 | import json 152 | from transformers 153 | import pipeline 154 | 155 | en_fr_translator = pipeline('translation_en_to_fr') 156 | 157 | def handler(event, context): 158 | response = { 159 | "statusCode": 200, 160 | "body": en_fr_translator(event['text'])[0] 161 | } 162 | return response 163 | ``` 164 | Then run: 165 | ```bash 166 | $ cdk synth 167 | $ cdk deploy 168 | ``` 169 | This creates a new endpoint to perform English to French translation. 170 | 171 | ## Cleaning up 172 | After you are finished experimenting with this project, run ```cdk destroy``` to remove all of the associated infrastructure. 173 | 174 | ## License 175 | This library is licensed under the MIT No Attribution License. See the [LICENSE](LICENSE) file. 176 | Disclaimer: Deploying the demo applications contained in this repository will potentially cause your AWS Account to be billed for services. 177 | 178 | ## Links 179 | - [:hugs:](https://huggingface.co) 180 | - [AWS Cloud Development Kit](https://aws.amazon.com/cdk/) 181 | - [Amazon Elastic Container Registry](https://aws.amazon.com/ecr/) 182 | - [AWS Lambda](https://aws.amazon.com/lambda/) 183 | - [Amazon Elastic File System](https://aws.amazon.com/efs/) -------------------------------------------------------------------------------- /app.py: -------------------------------------------------------------------------------- 1 | """ 2 | Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 3 | SPDX-License-Identifier: MIT-0 4 | """ 5 | 6 | import os 7 | from pathlib import Path 8 | from aws_cdk import ( 9 | aws_lambda as lambda_, 10 | aws_efs as efs, 11 | aws_ec2 as ec2 12 | ) 13 | from aws_cdk import App, Stack, Duration, RemovalPolicy, Tags 14 | 15 | from constructs import Construct 16 | 17 | class ServerlessHuggingFaceStack(Stack): 18 | def __init__(self, scope: Construct, id: str, **kwargs) -> None: 19 | super().__init__(scope, id, **kwargs) 20 | 21 | # EFS needs to be setup in a VPC 22 | vpc = ec2.Vpc(self, 'Vpc', max_azs=2) 23 | 24 | # creates a file system in EFS to store cache models 25 | fs = efs.FileSystem(self, 'FileSystem', 26 | vpc=vpc, 27 | removal_policy=RemovalPolicy.DESTROY) 28 | access_point = fs.add_access_point('MLAccessPoint', 29 | create_acl=efs.Acl( 30 | owner_gid='1001', owner_uid='1001', permissions='750'), 31 | path="/export/models", 32 | posix_user=efs.PosixUser(gid="1001", uid="1001")) 33 | 34 | # %% 35 | # iterates through the Python files in the docker directory 36 | docker_folder = os.path.dirname(os.path.realpath(__file__)) + "/inference" 37 | pathlist = Path(docker_folder).rglob('*.py') 38 | for path in pathlist: 39 | base = os.path.basename(path) 40 | filename = os.path.splitext(base)[0] 41 | # Lambda Function from docker image 42 | lambda_.DockerImageFunction( 43 | self, filename, 44 | code=lambda_.DockerImageCode.from_image_asset(docker_folder, 45 | cmd=[ 46 | filename+".handler"] 47 | ), 48 | memory_size=8096, 49 | timeout=Duration.seconds(600), 50 | vpc=vpc, 51 | filesystem=lambda_.FileSystem.from_efs_access_point(access_point, '/mnt/hf_models_cache'), 52 | environment={"TRANSFORMERS_CACHE": "/mnt/hf_models_cache"}, 53 | ) 54 | 55 | app = App() 56 | 57 | stack = ServerlessHuggingFaceStack(app, "ServerlessHuggingFaceStack") 58 | Tags.of(stack).add("AwsSample", "ServerlessHuggingFace") 59 | 60 | app.synth() -------------------------------------------------------------------------------- /cdk.json: -------------------------------------------------------------------------------- 1 | { 2 | "app": "python3.9 app.py", 3 | "context": { 4 | "aws-cdk:enableDiffNoFail": "true", 5 | "@aws-cdk/core:stackRelativeExports": "true" 6 | } 7 | } -------------------------------------------------------------------------------- /inference/Dockerfile: -------------------------------------------------------------------------------- 1 | ARG FUNCTION_DIR="/function/" 2 | 3 | FROM huggingface/transformers-pytorch-cpu as build-image 4 | 5 | 6 | # Include global arg in this stage of the build 7 | ARG FUNCTION_DIR 8 | 9 | # Install aws-lambda-cpp build dependencies 10 | RUN apt-get update && \ 11 | apt-get install -y \ 12 | g++ \ 13 | make \ 14 | cmake \ 15 | unzip \ 16 | libcurl4-openssl-dev 17 | 18 | 19 | # Create function directory 20 | RUN mkdir -p ${FUNCTION_DIR} 21 | 22 | # Copy handler function 23 | COPY *.py ${FUNCTION_DIR} 24 | 25 | # Install the function's dependencies 26 | RUN pip uninstall --yes jupyter 27 | RUN pip install --target ${FUNCTION_DIR} awslambdaric 28 | RUN pip install --target ${FUNCTION_DIR} sentencepiece protobuf 29 | 30 | FROM huggingface/transformers-pytorch-cpu 31 | 32 | # Include global arg in this stage of the build 33 | ARG FUNCTION_DIR 34 | # Set working directory to function root directory 35 | WORKDIR ${FUNCTION_DIR} 36 | 37 | # Copy in the built dependencies 38 | COPY --from=build-image ${FUNCTION_DIR} ${FUNCTION_DIR} 39 | 40 | ENTRYPOINT [ "python3", "-m", "awslambdaric" ] 41 | 42 | # This will get replaced by the proper handler by the CDK script 43 | CMD [ "sentiment.handler" ] 44 | -------------------------------------------------------------------------------- /inference/sentiment.py: -------------------------------------------------------------------------------- 1 | """ 2 | Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 3 | SPDX-License-Identifier: MIT-0 4 | """ 5 | 6 | import json 7 | from transformers import pipeline 8 | 9 | nlp = pipeline("sentiment-analysis") 10 | 11 | def handler(event, context): 12 | response = { 13 | "statusCode": 200, 14 | "body": nlp(event['text'])[0] 15 | } 16 | return response -------------------------------------------------------------------------------- /inference/summarization.py: -------------------------------------------------------------------------------- 1 | """ 2 | Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 3 | SPDX-License-Identifier: MIT-0 4 | """ 5 | 6 | import json 7 | from transformers import pipeline 8 | 9 | summarizer = pipeline("summarization") 10 | 11 | def handler(event, context): 12 | response = { 13 | "statusCode": 200, 14 | "body": summarizer(event['article'])[0] 15 | } 16 | return response -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | aws-cdk-lib>=2.0.0 2 | constructs>=10.0.0 -------------------------------------------------------------------------------- /serverless-hugging-face.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/zero-administration-inference-with-aws-lambda-for-hugging-face/fbd730f606f40f8baebf330422ad52531af904e4/serverless-hugging-face.png --------------------------------------------------------------------------------