├── .gitignore ├── Makefile ├── NOTICE ├── README.md ├── app.py ├── cdk.json ├── docs └── assets │ ├── architecture.png │ ├── model_response_example.png │ ├── question_answering_input_example.png │ └── swagger_ui_docs.png ├── fastapi_model_serving ├── __init__.py └── fastapi_model_serving_stack.py ├── model_endpoint ├── docker │ └── Dockerfile └── runtime │ └── serving_api │ ├── custom_lambda_utils │ ├── __init__.py │ └── scripts │ │ ├── __init__.py │ │ └── inference.py │ ├── requirements.txt │ └── serving_api.py ├── requirements.txt ├── scripts ├── init-lambda-code.sh ├── setup.sh └── update_notice.sh ├── source.bat └── templates ├── api └── api.py └── dummy └── dummy.py /.gitignore: -------------------------------------------------------------------------------- 1 | *.swp 2 | package-lock.json 3 | __pycache__ 4 | .pytest_cache 5 | .venv 6 | *.egg-info 7 | 8 | # CDK asset staging directory 9 | .cdk.staging 10 | cdk.out 11 | 12 | # ignore large files 13 | model_endpoint/docker_assets/* 14 | model_endpoint/runtime/serving_api/custom_lambda_utils/model_artifacts/* 15 | *.zip 16 | *.tar.gz 17 | 18 | # ignore MacOS file system stuff 19 | .DS_Store -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | all: prep package_model synth deploy 2 | 3 | prep: 4 | bash -e scripts/setup.sh 5 | 6 | update_notice: 7 | bash -e scripts/update_notice.sh 8 | 9 | package_model: 10 | . .venv/bin/activate && cd ./model_endpoint/runtime/serving_api && tar czvf ../../docker/serving_api.tar.gz custom_lambda_utils requirements.txt serving_api.py 11 | 12 | cdk_bootstrap: 13 | . ./.venv/bin/activate && cdk bootstrap 14 | 15 | synth: 16 | . .venv/bin/activate && cdk synth 17 | 18 | deploy: 19 | . .venv/bin/activate && cdk deploy 20 | 21 | destroy: 22 | . .venv/bin/activate && cdk destroy 23 | 24 | clean: 25 | rm -r .venv/ cdk.out/ 26 | -------------------------------------------------------------------------------- /NOTICE: -------------------------------------------------------------------------------- 1 | Name Version License 2 | PyYAML 6.0 MIT License 3 | anyio 3.6.1 MIT License 4 | attrs 22.1.0 MIT License 5 | aws-cdk-lib 2.29.1 Apache-2.0 6 | boto3 1.24.53 Apache Software License 7 | botocore 1.27.53 Apache Software License 8 | cattrs 22.1.0 MIT License 9 | certifi 2022.6.15 Mozilla Public License 2.0 (MPL 2.0) 10 | charset-normalizer 2.1.0 MIT License 11 | click 8.1.3 BSD License 12 | constructs 10.1.76 Apache-2.0 13 | distlib 0.3.5 Python Software Foundation License 14 | docopt 0.6.2 MIT License 15 | exceptiongroup 1.0.0rc8 MIT License 16 | fastapi 0.79.0 MIT License 17 | filelock 3.8.0 Public Domain 18 | h11 0.13.0 MIT License 19 | httptools 0.4.0 MIT License 20 | huggingface-hub 0.8.1 Apache Software License 21 | idna 3.3 BSD License 22 | jmespath 1.0.1 MIT License 23 | joblib 1.1.0 BSD License 24 | jsii 1.64.0 Apache Software License 25 | mangum 0.15.1 MIT License 26 | numpy 1.23.2 BSD License 27 | packaging 21.3 Apache Software License; BSD License 28 | pipenv 2022.8.19 MIT License 29 | pipreqs 0.4.11 Apache Software License 30 | platformdirs 2.5.2 MIT License 31 | publication 0.0.3 MIT License 32 | pydantic 1.9.2 MIT License 33 | pyparsing 3.0.9 MIT License 34 | python-dateutil 2.8.2 Apache Software License; BSD License 35 | python-dotenv 0.20.0 BSD License 36 | regex 2022.7.25 Apache Software License 37 | requests 2.28.1 Apache Software License 38 | s3transfer 0.6.0 Apache Software License 39 | sacremoses 0.0.53 MIT License 40 | sentencepiece 0.1.97 Apache Software License 41 | six 1.16.0 MIT License 42 | sniffio 1.2.0 Apache Software License; MIT License 43 | starlette 0.19.1 BSD License 44 | tokenizers 0.12.1 Apache Software License 45 | torch 1.11.0 BSD License 46 | tqdm 4.64.0 MIT License; Mozilla Public License 2.0 (MPL 2.0) 47 | transformers 4.21.1 Apache Software License 48 | typeguard 2.13.3 MIT License 49 | typing_extensions 4.3.0 Python Software Foundation License 50 | urllib3 1.26.11 MIT License 51 | uvicorn 0.18.2 BSD License 52 | uvloop 0.16.0 Apache Software License; MIT License 53 | virtualenv 20.16.3 MIT License 54 | virtualenv-clone 0.5.7 MIT License 55 | watchfiles 0.16.1 MIT License 56 | websockets 10.3 BSD License 57 | yarg 0.1.9 MIT License -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Deploy an ML serverless inference endpoint using FastAPI, AWS Lambda and AWS CDK 2 | 3 | ## Architecture 4 | ![Architecture](docs/assets/architecture.png) 5 | 6 | ## Prerequisites 7 | 8 | Have Python3 installed, along with virtualenv for creating and managing virtual environments in Python. 9 | Install `aws-cdk v2` on your system in order to be able to use the aws-cdk CLI. 10 | Have Docker installed (and, for the deployment part, running!) on your local machine. 11 | 12 | ### Test if all necessary software is installed: 13 | 14 | AWS CLI is needed. Login to your account and select the region where you want to deploy the solution. 15 | 16 | Python3 is needed. For this solution, we use Python3.8. Make sure that the Python version you use locally is consistent with the Python version specified in the Dockerfile. 17 | ```shell 18 | python3 --version 19 | ``` 20 | 21 | Check if virtualenv is installed for creating and managing virtual environments in Python. Strictly speaking, this is not a hard requirement, but it will make your life easier and helps following this blog post more easily. 22 | ```shell 23 | python3 -m virtualenv --version 24 | ``` 25 | 26 | Check if cdk is installed. This will be used to deploy our solution. 27 | ```shell 28 | cdk --version 29 | ``` 30 | 31 | Check if Docker is installed. Our solution will make your model accessible through a Docker image to your lambda. For building this image locally, we will need Docker. 32 | ```shell 33 | docker --version 34 | ``` 35 | Also make sure it is up and running by running ```docker ps```. 36 | 37 | ## How to structure your FastAPI project using AWS CDK 38 | 39 | We are using the following directory structure for our project (ignoring some boilerplate cdk code that is immaterial in the context of this blog post): 40 | 41 | ``` 42 | fastapi_model_serving 43 | │ 44 | └───.venv 45 | │ 46 | └───fastapi_model_serving 47 | │ │ __init__.py 48 | │ │ fastapi_model_serving_stack.py 49 | │ │ 50 | │ └───model_endpoint 51 | │ └───docker 52 | │ │ Dockerfile 53 | │ │ serving_api.tar.gz 54 | │ │ 55 | │ └───runtime 56 | │ └───serving_api 57 | │ requirements.txt 58 | │ serving_api.py 59 | │ └───custom_lambda_utils 60 | │ └───model_artifacts 61 | │ ... 62 | │ └───scripts 63 | │ inference.py 64 | │ 65 | └───templates 66 | │ └───api 67 | │ │ api.py 68 | │ └───dummy 69 | │ dummy.py 70 | | 71 | └───scripts 72 | │ └───init-lambda-code.sh 73 | │ └───setup.sh 74 | │ └───update_notice.sh 75 | │ 76 | │ app.py 77 | │ cdk.json 78 | │ README.md 79 | │ requirements.txt 80 | 81 | ``` 82 | 83 | The directory follows the recommended structure of cdk projects for Python. 84 | 85 | The most important part of this repository is the ```fast_api_model_serving``` directory. It contains the code that will define the cdk stack and the resources that are going to be used for model serving. 86 | 87 | `model_endpoint` directory: 88 | - contains all the assets necessary that will make up our serverless endpoint, i.e., Dockerfile to build the Docker image that AWS Lamdba will use, as well as the lambda function code that uses FastAPI to handle inference requests and route them to the correct endpoint, and the model artifacts of the model that we want to deploy. 89 | 90 | Inside model endpoint, we have the follwing struture... 91 | - `docker` directory: 92 | - which specifies a `Dockerfile` which is used to build the image for the Lambda function with all the artifacts (Lambda function code, model artifacts, ...) in the right place so that they can be used without issues. 93 | - `Serving.api.tar.gz`: this is a tarball that contains all the assets from the runtime folder that are necessary for building the Docker image. More on how to create the tar.gz. file later in the next section. 94 | - `runtime` directory: 95 | - contains the code for the `serving_api` Lambda function and it’s dependencies specified in the `requirements.txt` file 96 | - as well as the `custom_lambda_utils` directory which includes an `inference` script that loads the necessary `model artifacts` so that the model can be passed to the `serving_api` that will then expose it as an endpoint 97 | 98 | 99 | Besides, we have the `template` directory which provides you with a template of folder structure and files where you can define your customised codes and APIs following the sample we went through above. 100 | 101 | - `template` directory: contains dummy code that can be used to create new lambda functions from 102 | - `dummy` contains the code that implements the structure of an ordinary AWS Lambda function using the Python runtime 103 | - `api` contains the code that lambda that implements an AWS Lambda function that wraps a FastAPI endpoint around an existing API Gateway 104 | 105 | 106 | ## Step-by-step walk-through: Deploying the solution 107 | 108 | NOTE: By default, the code is going to be deployed inside the eu-west-1 region. If you want to change the region to another region of your choice, you can change the `DEPLOYMENT_REGION` context variable in the `cdk.json` file. 109 | Beware, however, that the solution tries to deploy a lambda on top of the arm64 architecture, and that this feature might not be available in all regions at the time of your reading. In this case, you need to change the “architecture” parameter in the fastapi_model_serving_stack.py file, as well as the first line of the Dockerfile inside the model_endpoint > Docker directory, to host this solution on the x86 architecture. 110 | 111 | 112 | 1) First, run the following command to clone the git repository: 113 | `git clone https://github.com/aws-samples/lambda-serverless-inference-fastapi` 114 | Since we would like to showcase that the solution could work with model artifacts that you train locally, we contain a sample model artifact of a pretrained DistilBERT model on the Hugging Face model hub for question answering task in the `serving_api.tar.gz` file. Hence, the downloading time can take around **3 to 5 minutes**. 115 | 116 | 2) Now, let’s setup the environment to recreate the blog post. This step will download the pretrained model that will be deployed from the huggingface model hub into the `./model_endpoint/runtime/serving_api/custom_lambda_utils/model_artifacts` directory. It will also create a virtual environment and install all dependencies that are needed. You only need to run this command once: 117 | ```shell 118 | make prep 119 | ``` 120 | This command can take around **5 minutes** (depending on your internet bandwidth) because it needs to download the model artifacts. 121 | 122 | 123 | 3) The model artifacts need to be packaged inside a .tar.gz archive that will be used inside the docker image that is built in the cdk stack. You will need to run this code whenever you make changes to the model artifacts or the API itself to always have the most up-to-date version of your serving endpoint packaged: 124 | ```shell 125 | make package_model 126 | ``` 127 | Finally, the artifacts are all in-place. Now we can move over to deploying the cdk stack to your AWS account. 128 | 129 | 130 | 4) ```FIRST TIME CDK USERS ONLY```: If this is your first time deploying an AWS CDK app into an environment (account + region combination), you will need to bootstrap the environment, i.e., prepare it for use with `CDK`. This will create a stack that includes resources that are needed for the toolkit’s operation. For example, the stack includes an S3 bucket that is used to store templates and assets during the deployment process. 131 | ```shell 132 | make cdk_bootstrap 133 | ``` 134 | 135 | 5) Since we are building docker images locally in this cdk deployment, we need to ensure that the docker daemon is running before we are going to be able to deploy this stack via the cdk CLI. To check whether or not the docker daemon is running on your system, use the following command: 136 | ```shell 137 | docker ps 138 | ``` 139 | If you don’t get an error message, you should be good to deploy the solution. 140 | 141 | 142 | 6) Deploy the solution with the following command: 143 | ```shell 144 | make deploy 145 | ``` 146 | This step can take around **5-10 minutes** due to building and pushing the docker image. 147 | 148 | ## Running real-time inference 149 | 150 | ### Using the API documentation in the browser 151 | After your `AWS CloudFormation` stack got deployed successfully, go to the `Outputs` section and open up the shown endpoint url. Now, our model is accessible via the endpoint url and we are ready to use it for real-time inference! 152 | 153 | 1) Go to the url to see if you can see `“hello world”` message and go to `{endpoint_url}/docs` to see if you can successfully load the interactive swagger UI page. Notice that there might be some coldstart time so you may need to wait or refresh a few times. 154 | ![Swagger UI Docs](docs/assets/swagger_ui_docs.png) 155 | 156 | 157 | 2) Once login to the landing page of FastAPI swagger UI page, you will be able to execute your endpoint via the root `/` or via the `/question` resources. From `/`, you could try it out and execute the API and get the `“hello world”` message. 158 | From `/question`, you can try it out and execute the API and run ML inference on the model we deployed for the question answering case. Here is one example. 159 | 160 | The question is ```What is the color of my car now?``` and the context is ```My car used to be blue but I painted red.``` 161 | ![Question Answering Input Example](docs/assets/question_answering_input_example.png) 162 | 163 | Once you click on `Execute`, based on the given context, the model will answer the question with response as below. 164 | ![Model Response Example](docs/assets/model_response_example.png) 165 | 166 | In the response body, you will be able to see the answer with the confidence score the model gives. You could also play around with other examples or embed the API in your existing application. 167 | 168 | ### Real-time inference via code using the `requests` module 169 | Alternatively, you can run the inference via code. Here is one example written in Python, using the requests library: 170 | 171 | 172 | ```python 173 | import requests 174 | 175 | # optional header and payload vars 176 | headers = {} 177 | payload = {} 178 | 179 | url = "https://.execute-api..amazonaws.com/prod/question?question=\"What is the color of my car now?\"&context=\"My car used to be blue but I painted red\"" 180 | 181 | response = requests.request("GET", url, headers=headers, data=payload) 182 | 183 | print(response.text) 184 | ``` 185 | 186 | This code snippet would output a string similar to the following: 187 | ```python 188 | '{"score":0.6947233080863953,"start":38,"end":41,"answer":"red"}' 189 | ``` 190 | 191 | ### Clean up 192 | Inside the root directory of your repository, run the following command to clean up your created resources: 193 | ```shell 194 | make destroy 195 | ``` 196 | 197 | ### Troubleshooting 198 | 199 | #### If you are a Mac User 200 | Error when logging into ECR with Docker login: ```"Error saving credentials ... not implemented".``` For example, 201 | exited with error code 1: Error saving credentials: error storing credentials - err: exit status 1,...dial unix backend.sock: connect: connection refused 202 | #### Solution 203 | Before you can use lambda on top of Docker containers inside cdk, it might be the case that you have got to change the ```~/docker/config.json``` file. More specifically, you might have to change the ```credsStore``` parameter in ```~/.docker/config.json``` to ```osxkeychain```. That solves Amazon ECR login issues on a Mac. 204 | -------------------------------------------------------------------------------- /app.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | import os 3 | 4 | import aws_cdk as cdk 5 | 6 | from fastapi_model_serving.fastapi_model_serving_stack import FastapiModelServingStack 7 | 8 | 9 | app = cdk.App() 10 | 11 | FastapiModelServingStack( 12 | app, 13 | "FastapiModelServingStack", 14 | env=cdk.Environment( 15 | account=os.getenv("CDK_DEFAULT_ACCOUNT"), 16 | region=app.node.try_get_context("DEPLOYMENT_REGION"), 17 | ), 18 | ) 19 | 20 | app.synth() 21 | -------------------------------------------------------------------------------- /cdk.json: -------------------------------------------------------------------------------- 1 | { 2 | "app": "python3 app.py", 3 | "watch": { 4 | "include": [ 5 | "**" 6 | ], 7 | "exclude": [ 8 | "README.md", 9 | "cdk*.json", 10 | "requirements*.txt", 11 | "source.bat", 12 | "**/__init__.py", 13 | "python/__pycache__", 14 | "tests" 15 | ] 16 | }, 17 | "context": { 18 | "@aws-cdk/aws-apigateway:usagePlanKeyOrderInsensitiveId": true, 19 | "@aws-cdk/core:stackRelativeExports": true, 20 | "@aws-cdk/aws-rds:lowercaseDbIdentifier": true, 21 | "@aws-cdk/aws-lambda:recognizeVersionProps": true, 22 | "@aws-cdk/aws-lambda:recognizeLayerVersion": true, 23 | "@aws-cdk/aws-cloudfront:defaultSecurityPolicyTLSv1.2_2021": true, 24 | "@aws-cdk-containers/ecs-service-extensions:enableDefaultLogDriver": true, 25 | "@aws-cdk/aws-ec2:uniqueImdsv2TemplateName": true, 26 | "@aws-cdk/core:checkSecretUsage": true, 27 | "@aws-cdk/aws-iam:minimizePolicies": true, 28 | "@aws-cdk/core:validateSnapshotRemovalPolicy": true, 29 | "@aws-cdk/aws-codepipeline:crossAccountKeyAliasStackSafeResourceName": true, 30 | "@aws-cdk/core:target-partitions": [ 31 | "aws", 32 | "aws-cn" 33 | ], 34 | "DEPLOYMENT_REGION": "eu-west-1" 35 | } 36 | } 37 | -------------------------------------------------------------------------------- /docs/assets/architecture.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/lambda-serverless-inference-fastapi/c7440956936204e6a09a51199b3095f91e9ade57/docs/assets/architecture.png -------------------------------------------------------------------------------- /docs/assets/model_response_example.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/lambda-serverless-inference-fastapi/c7440956936204e6a09a51199b3095f91e9ade57/docs/assets/model_response_example.png -------------------------------------------------------------------------------- /docs/assets/question_answering_input_example.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/lambda-serverless-inference-fastapi/c7440956936204e6a09a51199b3095f91e9ade57/docs/assets/question_answering_input_example.png -------------------------------------------------------------------------------- /docs/assets/swagger_ui_docs.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/lambda-serverless-inference-fastapi/c7440956936204e6a09a51199b3095f91e9ade57/docs/assets/swagger_ui_docs.png -------------------------------------------------------------------------------- /fastapi_model_serving/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/lambda-serverless-inference-fastapi/c7440956936204e6a09a51199b3095f91e9ade57/fastapi_model_serving/__init__.py -------------------------------------------------------------------------------- /fastapi_model_serving/fastapi_model_serving_stack.py: -------------------------------------------------------------------------------- 1 | import os 2 | from aws_cdk import ( 3 | Size, 4 | Duration, 5 | Stack, 6 | aws_lambda as _lambda, 7 | aws_apigateway as apigateway, 8 | ) 9 | from constructs import Construct 10 | 11 | DIR_PATH = os.path.dirname(os.path.realpath(__file__)) 12 | 13 | 14 | class FastapiModelServingStack(Stack): 15 | def __init__(self, scope: Construct, construct_id: str, **kwargs) -> None: 16 | super().__init__(scope, construct_id, **kwargs) 17 | 18 | fastapi_model_endpoint_lambda = _lambda.DockerImageFunction( 19 | self, 20 | "fastapi_model_serving_endpoint", 21 | function_name="fastapi_model_serving_endpoint_docker", 22 | architecture=_lambda.Architecture.X86_64, 23 | code=_lambda.DockerImageCode.from_image_asset( 24 | os.path.join(DIR_PATH, "..", "model_endpoint", "docker") 25 | ), 26 | timeout=Duration.seconds(60), 27 | ephemeral_storage_size=Size.mebibytes(4096), 28 | memory_size=2048, 29 | ) 30 | 31 | apigateway.LambdaRestApi( 32 | self, 33 | "docker_model_serving_endpoint", 34 | handler=fastapi_model_endpoint_lambda, 35 | proxy=True, 36 | ) 37 | -------------------------------------------------------------------------------- /model_endpoint/docker/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM public.ecr.aws/lambda/python:3.10-x86_64 2 | 3 | # Copy function code 4 | ADD serving_api.tar.gz ${LAMBDA_TASK_ROOT} 5 | 6 | RUN pip3 install -r requirements.txt --target ${LAMBDA_TASK_ROOT} 7 | 8 | CMD [ "serving_api.lambda_handler" ] -------------------------------------------------------------------------------- /model_endpoint/runtime/serving_api/custom_lambda_utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/lambda-serverless-inference-fastapi/c7440956936204e6a09a51199b3095f91e9ade57/model_endpoint/runtime/serving_api/custom_lambda_utils/__init__.py -------------------------------------------------------------------------------- /model_endpoint/runtime/serving_api/custom_lambda_utils/scripts/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/lambda-serverless-inference-fastapi/c7440956936204e6a09a51199b3095f91e9ade57/model_endpoint/runtime/serving_api/custom_lambda_utils/scripts/__init__.py -------------------------------------------------------------------------------- /model_endpoint/runtime/serving_api/custom_lambda_utils/scripts/inference.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import os 3 | import logging 4 | 5 | logger = logging.getLogger() 6 | logger.setLevel(logging.INFO) 7 | 8 | try: 9 | sys.path.append(os.environ["LAMBDA_TASK_ROOT"]) 10 | except KeyError: 11 | logger.warning( 12 | """Environment variable "LAMBDA_TASK_ROOT" not found. 13 | Assuming execution outside of lambda environment.""" 14 | ) 15 | 16 | from transformers import pipeline, AutoModelForQuestionAnswering, AutoTokenizer 17 | 18 | 19 | DIR_PATH = os.path.dirname(os.path.realpath(__file__)) 20 | PATH_TO_MODEL_ARTIFACTS = os.path.join(DIR_PATH, "..", "model_artifacts/") 21 | 22 | 23 | model = AutoModelForQuestionAnswering.from_pretrained(PATH_TO_MODEL_ARTIFACTS) 24 | tokenizer = AutoTokenizer.from_pretrained(PATH_TO_MODEL_ARTIFACTS) 25 | 26 | question_answerer = pipeline( 27 | task="question-answering", model=model, tokenizer=tokenizer 28 | ) 29 | -------------------------------------------------------------------------------- /model_endpoint/runtime/serving_api/requirements.txt: -------------------------------------------------------------------------------- 1 | fastapi==0.79.0 2 | mangum==0.15.1 3 | transformers[torch]==4.21.1 -------------------------------------------------------------------------------- /model_endpoint/runtime/serving_api/serving_api.py: -------------------------------------------------------------------------------- 1 | import json 2 | import logging 3 | from fastapi import FastAPI 4 | from mangum import Mangum 5 | 6 | logger = logging.getLogger() 7 | logger.setLevel(logging.INFO) 8 | 9 | app = FastAPI(root_path="/prod") 10 | 11 | 12 | @app.get("/") 13 | async def root() -> dict: 14 | """ 15 | **Dummy endpoint that returns 'hello world' example.** 16 | 17 | ``` 18 | Returns: 19 | dict: 'hello world' message. 20 | ``` 21 | """ 22 | return {"message": "Hello World"} 23 | 24 | 25 | @app.get("/question") 26 | async def get_answer(question: str, context: str) -> dict: 27 | """ 28 | **Endpoint implementing the question-answering logic.** 29 | 30 | ``` 31 | Args: 32 | question (str): Question to be answered. Answer should be included in 'context'. 33 | context (str): Context containing information necessary to answer 'question'. 34 | Returns: 35 | dict: Dictionary containing the answer to the question along with some metadata. 36 | ``` 37 | """ 38 | from custom_lambda_utils.scripts.inference import question_answerer 39 | 40 | return question_answerer(question=question, context=context) 41 | 42 | 43 | def lambda_handler(event, context): 44 | logger.info(json.dumps(event)) 45 | 46 | asgi_handler = Mangum(app) 47 | response = asgi_handler( 48 | event, context 49 | ) # Call the instance with the event arguments 50 | 51 | logger.info(json.dumps(response)) 52 | return response 53 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | aws-cdk-lib==2.29.1 2 | boto3==1.24.53 3 | constructs>=10.0.0,<11.0.0 4 | fastapi==0.79.0 5 | mangum==0.15.1 6 | transformers[torch]==4.21.1 -------------------------------------------------------------------------------- /scripts/init-lambda-code.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | set -e 4 | 5 | PROJECT_ROOT_DIR=$PWD 6 | 7 | # Transform long options to short ones 8 | for arg in "$@"; do 9 | shift 10 | case "$arg" in 11 | '--api') set -- "$@" '-a' ;; 12 | '--help') set -- "$@" '-h' ;; 13 | *) set -- "$@" "$arg" ;; 14 | esac 15 | done 16 | 17 | # process flags preceeding positional parameters 18 | while getopts "a" flag; do 19 | case $flag in 20 | a) 21 | TEMPLATE_TYPE="API" 22 | ;; 23 | \?) 24 | echo "Invalid option: -$OPTARG" >&2 25 | exit 1 26 | ;; 27 | esac 28 | done 29 | 30 | shift "$((OPTIND-1))" # remove options already parsed by getops from arg list 31 | 32 | if [[ $# -lt 2 ]]; then 33 | echo "Illegal number of arguments." 34 | echo "2 positional arguments were expected." 35 | echo "Usage: init-lambda-code.sh [OPTIONS] [OPTIONS]" 36 | exit 1 37 | fi 38 | 39 | COMPONENT_NAME=$1 40 | PACKAGE_NAME=$2 41 | 42 | shift 2 # remove the two positional arguments from arg list 43 | 44 | 45 | # process any flags after positional parameters 46 | while getopts "a" flag; do 47 | case $flag in 48 | a) 49 | TEMPLATE_TYPE="API" 50 | ;; 51 | \?) 52 | echo "Invalid option: -$OPTARG" >&2 53 | exit 1 54 | ;; 55 | esac 56 | done 57 | 58 | 59 | 60 | if [[ -d "$COMPONENT_NAME"/runtime ]]; then 61 | # Control will enter here if $COMPONENT_NAME exists. 62 | echo "Component folder does exist already." 63 | else 64 | echo "Component folder does not exist yet. Creating new component" 65 | mkdir -p "$COMPONENT_NAME"/"runtime" 66 | fi 67 | 68 | 69 | cd "$COMPONENT_NAME"/runtime 70 | mkdir -p "$PACKAGE_NAME" 71 | 72 | 73 | # choose what template to use for initialization of lambda 74 | if [[ "$TEMPLATE_TYPE" == "API" ]]; then 75 | echo "Creating API dummy lambda." 76 | cp "$PROJECT_ROOT_DIR"/templates/api/api.py \ 77 | "$PACKAGE_NAME"/"$PACKAGE_NAME".py 78 | else 79 | echo "Creating default dummy lambda." 80 | cp "$PROJECT_ROOT_DIR"/templates/dummy/dummy.py \ 81 | "$PACKAGE_NAME"/"$PACKAGE_NAME".py 82 | fi 83 | 84 | -------------------------------------------------------------------------------- /scripts/setup.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash -e 2 | 3 | # # make sure rust compiler is installed (needed for huggingface transformers lib) 4 | # curl 'https://sh.rustup.rs' —-tlsv1.2 -sSf | bash 5 | # source "$HOME/.cargo/env" 6 | 7 | # download huggingface question answering model artifacts 8 | mkdir -p $PWD/model_endpoint/runtime/serving_api/custom_lambda_utils/model_artifacts 9 | curl -L https://huggingface.co/distilbert-base-cased-distilled-squad/resolve/main/pytorch_model.bin -o $PWD/model_endpoint/runtime/serving_api/custom_lambda_utils/model_artifacts/pytorch_model.bin 10 | curl https://huggingface.co/distilbert-base-cased-distilled-squad/resolve/main/config.json -o $PWD/model_endpoint/runtime/serving_api/custom_lambda_utils/model_artifacts/config.json 11 | curl https://huggingface.co/distilbert-base-cased-distilled-squad/resolve/main/tokenizer.json -o $PWD/model_endpoint/runtime/serving_api/custom_lambda_utils/model_artifacts/tokenizer.json 12 | curl https://huggingface.co/distilbert-base-cased-distilled-squad/resolve/main/tokenizer_config.json -o $PWD/model_endpoint/runtime/serving_api/custom_lambda_utils/model_artifacts/tokenizer_config.json 13 | curl https://huggingface.co/distilbert-base-cased-distilled-squad/resolve/main/vocab.txt -o $PWD/model_endpoint/runtime/serving_api/custom_lambda_utils/model_artifacts/vocab.txt 14 | 15 | 16 | # setup and activate virtual environment 17 | python3 -m venv .venv 18 | source .venv/bin/activate 19 | 20 | # install dependencies inside virtual environment 21 | pip install --upgrade pip 22 | pip install -r requirements.txt 23 | 24 | -------------------------------------------------------------------------------- /scripts/update_notice.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash -e 2 | 3 | req_files="$(find . -name requirements\*.txt)" 4 | echo $req_files 5 | 6 | python -m venv notice_licenses && . .venv/bin/activate 7 | pip install pip-licenses==4.0.1 8 | 9 | for f in $req_files 10 | do 11 | pip install -r ${f} 12 | done 13 | rm NOTICE || true 14 | pip-licenses --output NOTICE 15 | rm -r notice_licenses || true 16 | -------------------------------------------------------------------------------- /source.bat: -------------------------------------------------------------------------------- 1 | @echo off 2 | 3 | rem The sole purpose of this script is to make the command 4 | rem 5 | rem source .venv/bin/activate 6 | rem 7 | rem (which activates a Python virtualenv on Linux or Mac OS X) work on Windows. 8 | rem On Windows, this command just runs this batch file (the argument is ignored). 9 | rem 10 | rem Now we don't need to document a Windows command for activating a virtualenv. 11 | 12 | echo Executing .venv\Scripts\activate.bat for you 13 | .venv\Scripts\activate.bat 14 | -------------------------------------------------------------------------------- /templates/api/api.py: -------------------------------------------------------------------------------- 1 | import json 2 | import logging 3 | from fastapi import FastAPI 4 | from mangum import Mangum 5 | 6 | logger = logging.getLogger() 7 | logger.setLevel(logging.INFO) 8 | 9 | app = FastAPI() 10 | 11 | 12 | @app.get("/") 13 | async def root(): 14 | return {"message": "Hello World"} 15 | 16 | 17 | def lambda_handler(event, context): 18 | logger.info(json.dumps(event)) 19 | 20 | asgi_handler = Mangum(app) 21 | response = asgi_handler( 22 | event, context 23 | ) # Call the instance with the event arguments 24 | 25 | logger.info(json.dumps(response)) 26 | return response 27 | -------------------------------------------------------------------------------- /templates/dummy/dummy.py: -------------------------------------------------------------------------------- 1 | import json 2 | import logging 3 | 4 | logger = logging.getLogger() 5 | logger.setLevel(logging.INFO) 6 | 7 | 8 | def handler(event, context): 9 | logger.info(event) 10 | 11 | return {"statusCode": 200, "body": json.dumps("Hello from Lambda!")} 12 | --------------------------------------------------------------------------------