├── .gitignore
├── Makefile
├── NOTICE
├── README.md
├── app.py
├── cdk.json
├── docs
    └── assets
    │   ├── architecture.png
    │   ├── model_response_example.png
    │   ├── question_answering_input_example.png
    │   └── swagger_ui_docs.png
├── fastapi_model_serving
    ├── __init__.py
    └── fastapi_model_serving_stack.py
├── model_endpoint
    ├── docker
    │   └── Dockerfile
    └── runtime
    │   └── serving_api
    │       ├── custom_lambda_utils
    │           ├── __init__.py
    │           └── scripts
    │           │   ├── __init__.py
    │           │   └── inference.py
    │       ├── requirements.txt
    │       └── serving_api.py
├── requirements.txt
├── scripts
    ├── init-lambda-code.sh
    ├── setup.sh
    └── update_notice.sh
├── source.bat
└── templates
    ├── api
        └── api.py
    └── dummy
        └── dummy.py


/.gitignore:
--------------------------------------------------------------------------------
 1 | *.swp
 2 | package-lock.json
 3 | __pycache__
 4 | .pytest_cache
 5 | .venv
 6 | *.egg-info
 7 | 
 8 | # CDK asset staging directory
 9 | .cdk.staging
10 | cdk.out
11 | 
12 | # ignore large files 
13 | model_endpoint/docker_assets/*
14 | model_endpoint/runtime/serving_api/custom_lambda_utils/model_artifacts/*
15 | *.zip
16 | *.tar.gz
17 | 
18 | # ignore MacOS file system stuff
19 | .DS_Store


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | all: prep package_model synth deploy
 2 | 
 3 | prep:
 4 | 	bash -e scripts/setup.sh
 5 | 
 6 | update_notice:
 7 | 	bash -e scripts/update_notice.sh
 8 | 
 9 | package_model:
10 | 	. .venv/bin/activate && cd ./model_endpoint/runtime/serving_api && tar czvf ../../docker/serving_api.tar.gz custom_lambda_utils requirements.txt serving_api.py
11 | 
12 | cdk_bootstrap:
13 | 	. ./.venv/bin/activate && cdk bootstrap
14 | 
15 | synth:
16 | 	. .venv/bin/activate && cdk synth
17 | 
18 | deploy:
19 | 	. .venv/bin/activate && cdk deploy
20 | 
21 | destroy:
22 | 	. .venv/bin/activate && cdk destroy
23 | 
24 | clean: 
25 | 	rm -r .venv/ cdk.out/
26 | 


--------------------------------------------------------------------------------
/NOTICE:
--------------------------------------------------------------------------------
 1 |  Name                Version    License                                           
 2 |  PyYAML              6.0        MIT License                                       
 3 |  anyio               3.6.1      MIT License                                       
 4 |  attrs               22.1.0     MIT License                                       
 5 |  aws-cdk-lib         2.29.1     Apache-2.0                                        
 6 |  boto3               1.24.53    Apache Software License                           
 7 |  botocore            1.27.53    Apache Software License                           
 8 |  cattrs              22.1.0     MIT License                                       
 9 |  certifi             2022.6.15  Mozilla Public License 2.0 (MPL 2.0)              
10 |  charset-normalizer  2.1.0      MIT License                                       
11 |  click               8.1.3      BSD License                                       
12 |  constructs          10.1.76    Apache-2.0                                        
13 |  distlib             0.3.5      Python Software Foundation License                
14 |  docopt              0.6.2      MIT License                                       
15 |  exceptiongroup      1.0.0rc8   MIT License                                       
16 |  fastapi             0.79.0     MIT License                                       
17 |  filelock            3.8.0      Public Domain                                     
18 |  h11                 0.13.0     MIT License                                       
19 |  httptools           0.4.0      MIT License                                       
20 |  huggingface-hub     0.8.1      Apache Software License                           
21 |  idna                3.3        BSD License                                       
22 |  jmespath            1.0.1      MIT License                                       
23 |  joblib              1.1.0      BSD License                                       
24 |  jsii                1.64.0     Apache Software License                           
25 |  mangum              0.15.1     MIT License                                       
26 |  numpy               1.23.2     BSD License                                       
27 |  packaging           21.3       Apache Software License; BSD License              
28 |  pipenv              2022.8.19  MIT License                                       
29 |  pipreqs             0.4.11     Apache Software License                           
30 |  platformdirs        2.5.2      MIT License                                       
31 |  publication         0.0.3      MIT License                                       
32 |  pydantic            1.9.2      MIT License                                       
33 |  pyparsing           3.0.9      MIT License                                       
34 |  python-dateutil     2.8.2      Apache Software License; BSD License              
35 |  python-dotenv       0.20.0     BSD License                                       
36 |  regex               2022.7.25  Apache Software License                           
37 |  requests            2.28.1     Apache Software License                           
38 |  s3transfer          0.6.0      Apache Software License                           
39 |  sacremoses          0.0.53     MIT License                                       
40 |  sentencepiece       0.1.97     Apache Software License                           
41 |  six                 1.16.0     MIT License                                       
42 |  sniffio             1.2.0      Apache Software License; MIT License              
43 |  starlette           0.19.1     BSD License                                       
44 |  tokenizers          0.12.1     Apache Software License                           
45 |  torch               1.11.0     BSD License                                       
46 |  tqdm                4.64.0     MIT License; Mozilla Public License 2.0 (MPL 2.0) 
47 |  transformers        4.21.1     Apache Software License                           
48 |  typeguard           2.13.3     MIT License                                       
49 |  typing_extensions   4.3.0      Python Software Foundation License                
50 |  urllib3             1.26.11    MIT License                                       
51 |  uvicorn             0.18.2     BSD License                                       
52 |  uvloop              0.16.0     Apache Software License; MIT License              
53 |  virtualenv          20.16.3    MIT License                                       
54 |  virtualenv-clone    0.5.7      MIT License                                       
55 |  watchfiles          0.16.1     MIT License                                       
56 |  websockets          10.3       BSD License                                       
57 |  yarg                0.1.9      MIT License                                       


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Deploy an ML serverless inference endpoint using FastAPI, AWS Lambda and AWS CDK
  2 | 
  3 | ## Architecture
  4 | ![Architecture](docs/assets/architecture.png)
  5 | 
  6 | ## Prerequisites
  7 | 
  8 | Have Python3 installed, along with virtualenv for creating and managing virtual environments in Python.
  9 | Install `aws-cdk v2` on your system in order to be able to use the aws-cdk CLI.
 10 | Have Docker installed (and, for the deployment part, running!) on your local machine.
 11 | 
 12 | ### Test if all necessary software is installed:
 13 | 
 14 | AWS CLI is needed. Login to your account and select the region where you want to deploy the solution.
 15 | 
 16 | Python3 is needed. For this solution, we use Python3.8. Make sure that the Python version you use locally is consistent with the Python version specified in the Dockerfile.
 17 | ```shell
 18 | python3 --version
 19 | ```
 20 | 
 21 | Check if virtualenv is installed for creating and managing virtual environments in Python. Strictly speaking, this is not a hard requirement, but it will make your life easier and helps following this blog post more easily.
 22 | ```shell
 23 | python3 -m virtualenv --version
 24 | ```
 25 | 
 26 | Check if cdk is installed. This will be used to deploy our solution.
 27 | ```shell
 28 | cdk --version
 29 | ```
 30 | 
 31 | Check if Docker is installed. Our solution will make your model accessible through a Docker image to your lambda. For building this image locally, we will need Docker.
 32 | ```shell
 33 | docker --version
 34 | ```
 35 | Also make sure it is up and running by running ```docker ps```.
 36 | 
 37 | ## How to structure your FastAPI project using AWS CDK
 38 | 
 39 | We are using the following directory structure for our project (ignoring some boilerplate cdk code that is immaterial in the context of this blog post):
 40 | 
 41 | ```
 42 | fastapi_model_serving
 43 | │   
 44 | └───.venv  
 45 | │
 46 | └───fastapi_model_serving
 47 | │   │   __init__.py
 48 | │   │   fastapi_model_serving_stack.py
 49 | │   │   
 50 | │   └───model_endpoint  
 51 | │       └───docker
 52 | │       │      Dockerfile
 53 | │       │      serving_api.tar.gz
 54 | │       │  
 55 | │       └───runtime
 56 | │            └───serving_api
 57 | │                    requirements.txt  
 58 | │                    serving_api.py
 59 | │                └───custom_lambda_utils
 60 | │                     └───model_artifacts
 61 | │                            ...
 62 | │                     └───scripts
 63 | │                            inference.py
 64 | │  
 65 | └───templates
 66 | │   └───api
 67 | │   │     api.py    
 68 | │   └───dummy
 69 | │         dummy.py
 70 | |
 71 | └───scripts
 72 | │   └───init-lambda-code.sh
 73 | │   └───setup.sh
 74 | │   └───update_notice.sh
 75 | │      
 76 | │   app.py
 77 | │   cdk.json
 78 | │   README.md
 79 | │   requirements.txt
 80 |  
 81 | ```
 82 | 
 83 | The directory follows the recommended structure of cdk projects for Python. 
 84 | 
 85 | The most important part of this repository is the ```fast_api_model_serving``` directory. It contains the code that will define the cdk stack and the resources that are going to be used for model serving.
 86 | 
 87 | `model_endpoint` directory:
 88 | - contains all the assets necessary that will make up our serverless endpoint, i.e., Dockerfile to build the Docker image that AWS Lamdba will use, as well as the lambda function code that uses FastAPI to handle inference requests and route them to the correct endpoint, and the model artifacts of the model that we want to deploy.
 89 | 
 90 | Inside model endpoint, we have the follwing struture... 
 91 | - `docker` directory:
 92 |     - which specifies a `Dockerfile` which is used to build the image for the Lambda function with all the artifacts (Lambda function code, model artifacts, ...) in the right place so that they can be used without issues.
 93 |     - `Serving.api.tar.gz`: this is a tarball that contains all the assets from the runtime folder that are necessary for building the Docker image. More on how to create the tar.gz. file later in the next section.
 94 | - `runtime` directory:
 95 |     - contains the code for the `serving_api` Lambda function and it’s dependencies specified in the `requirements.txt` file
 96 |     - as well as the `custom_lambda_utils` directory which includes an `inference` script that loads the necessary `model artifacts` so that the model can be passed to the `serving_api` that will then expose it as an endpoint
 97 | 
 98 | 
 99 | Besides, we have the `template` directory which provides you with a template of folder structure and files where you can define your customised codes and APIs following the sample we went through above.
100 | 
101 | - `template` directory: contains dummy code that can be used to create new lambda functions from 
102 |     - `dummy` contains the code that implements the structure of an ordinary AWS Lambda function using the Python runtime
103 |     - `api` contains the code that lambda that implements an AWS Lambda function that wraps a FastAPI endpoint around an existing API Gateway
104 | 
105 | 
106 | ## Step-by-step walk-through: Deploying the solution
107 | 
108 | NOTE: By default, the code is going to be deployed inside the eu-west-1 region. If you want to change the region to another region of your choice, you can change the `DEPLOYMENT_REGION` context variable in the `cdk.json` file.
109 | Beware, however, that the solution tries to deploy a lambda on top of the arm64 architecture, and that this feature might not be available in all regions at the time of your reading. In this case, you need to change the “architecture” parameter in the fastapi_model_serving_stack.py file, as well as the first line of the Dockerfile inside the model_endpoint > Docker directory, to host this solution on the x86 architecture.
110 | 
111 | 
112 | 1)  First, run the following command to clone the git repository:
113 | `git clone https://github.com/aws-samples/lambda-serverless-inference-fastapi`
114 | Since we would like to showcase that the solution could work with model artifacts that you train locally, we contain a sample model artifact of a pretrained DistilBERT model on the Hugging Face model hub for question answering task in the `serving_api.tar.gz` file. Hence, the downloading time can take around **3 to 5 minutes**. 
115 | 
116 | 2) Now, let’s setup the environment to recreate the blog post. This step will download the pretrained model that will be deployed from the huggingface model hub into the `./model_endpoint/runtime/serving_api/custom_lambda_utils/model_artifacts` directory. It will also create a virtual environment and install all dependencies that are needed. You only need to run this command once:
117 | ```shell 
118 | make prep
119 | ```
120 | This command can take around **5 minutes** (depending on your internet bandwidth) because it needs to download the model artifacts.
121 | 
122 | 
123 | 3) The model artifacts need to be packaged inside a .tar.gz archive that will be used inside the docker image that is built in the cdk stack. You will need to run this code whenever you make changes to the model artifacts or the API itself to always have the most up-to-date version of your serving endpoint packaged:
124 | ```shell
125 | make package_model
126 | ```
127 | Finally, the artifacts are all in-place. Now we can move over to deploying the cdk stack to your AWS account.
128 | 
129 | 
130 | 4) ```FIRST TIME CDK USERS ONLY```: If this is your first time deploying an AWS CDK app into an environment (account + region combination), you will need to bootstrap the environment, i.e., prepare it for use with `CDK`. This will create a stack that includes resources that are needed for the toolkit’s operation. For example, the stack includes an S3 bucket that is used to store templates and assets during the deployment process.
131 | ```shell
132 | make cdk_bootstrap
133 | ```
134 | 
135 | 5) Since we are building docker images locally in this cdk deployment, we need to ensure that the docker daemon is running before we are going to be able to deploy this stack via the cdk CLI. To check whether or not the docker daemon is running on your system, use the following command:
136 | ```shell
137 | docker ps
138 | ```
139 | If you don’t get an error message, you should be good to deploy the solution. 
140 | 
141 | 
142 | 6) Deploy the solution with the following command:
143 | ```shell
144 | make deploy
145 | ```
146 | This step can take around **5-10 minutes** due to building and pushing the docker image.
147 | 
148 | ## Running real-time inference
149 | 
150 | ### Using the API documentation in the browser
151 | After your `AWS CloudFormation` stack got deployed successfully, go to the `Outputs` section and open up the shown endpoint url. Now, our model is accessible via the endpoint url and we are ready to use it for real-time inference!
152 | 
153 | 1) Go to the url to see if you can see `“hello world”` message and go to `{endpoint_url}/docs` to see if you can successfully load the interactive swagger UI page. Notice that there might be some coldstart time so you may need to wait or refresh a few times.
154 | ![Swagger UI Docs](docs/assets/swagger_ui_docs.png)
155 | 
156 | 
157 | 2) Once login to the landing page of FastAPI swagger UI page, you will be able to execute your endpoint via the root `/` or via the `/question` resources. From `/`, you could try it out and execute the API and get the `“hello world”` message. 
158 | From `/question`, you can try it out and execute the API and run ML inference on the model we deployed for the question answering case. Here is one example.
159 | 
160 | The question is ```What is the color of my car now?``` and the context is ```My car used to be blue but I painted red.```
161 | ![Question Answering Input Example](docs/assets/question_answering_input_example.png)
162 | 
163 | Once you click on `Execute`, based on the given context, the model will answer the question with response as below.
164 | ![Model Response Example](docs/assets/model_response_example.png)
165 | 
166 | In the response body, you will be able to see the answer with the confidence score the model gives. You could also play around with other examples or embed the API in your existing application.
167 | 
168 | ### Real-time inference via code using the `requests` module
169 | Alternatively, you can run the inference via code. Here is one example written in Python, using the requests library:
170 | 
171 | 
172 | ```python
173 | import requests
174 | 
175 | # optional header and payload vars
176 | headers = {}
177 | payload = {}
178 | 
179 | url = "https://<YOUR_API_GATEWAY_ENDPOINT_ID>.execute-api.<YOUR_ENDPOINT_REGION>.amazonaws.com/prod/question?question=\"What is the color of my car now?\"&context=\"My car used to be blue but I painted red\""
180 | 
181 | response = requests.request("GET", url, headers=headers, data=payload)
182 | 
183 | print(response.text)
184 | ```
185 | 
186 | This code snippet would output a string similar to the following:
187 | ```python
188 | '{"score":0.6947233080863953,"start":38,"end":41,"answer":"red"}'
189 | ```
190 | 
191 | ### Clean up
192 | Inside the root directory of your repository, run the following command to clean up your created resources:
193 | ```shell
194 | make destroy
195 | ```
196 | 
197 | ### Troubleshooting
198 | 
199 | #### If you are a Mac User
200 | Error when logging into ECR with Docker login: ```"Error saving credentials ... not implemented".``` For example,
201 | exited with error code 1: Error saving credentials: error storing credentials - err: exit status 1,...dial unix backend.sock: connect: connection refused
202 | #### Solution
203 | Before you can use lambda on top of Docker containers inside cdk, it might be the case that you have got to change the ```~/docker/config.json``` file. More specifically, you might have to change the ```credsStore``` parameter in ```~/.docker/config.json``` to ```osxkeychain```. That solves Amazon ECR login issues on a Mac.
204 | 


--------------------------------------------------------------------------------
/app.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | import os
 3 | 
 4 | import aws_cdk as cdk
 5 | 
 6 | from fastapi_model_serving.fastapi_model_serving_stack import FastapiModelServingStack
 7 | 
 8 | 
 9 | app = cdk.App()
10 | 
11 | FastapiModelServingStack(
12 |     app,
13 |     "FastapiModelServingStack",
14 |     env=cdk.Environment(
15 |         account=os.getenv("CDK_DEFAULT_ACCOUNT"),
16 |         region=app.node.try_get_context("DEPLOYMENT_REGION"),
17 |     ),
18 | )
19 | 
20 | app.synth()
21 | 


--------------------------------------------------------------------------------
/cdk.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "app": "python3 app.py",
 3 |   "watch": {
 4 |     "include": [
 5 |       "**"
 6 |     ],
 7 |     "exclude": [
 8 |       "README.md",
 9 |       "cdk*.json",
10 |       "requirements*.txt",
11 |       "source.bat",
12 |       "**/__init__.py",
13 |       "python/__pycache__",
14 |       "tests"
15 |     ]
16 |   },
17 |   "context": {
18 |     "@aws-cdk/aws-apigateway:usagePlanKeyOrderInsensitiveId": true,
19 |     "@aws-cdk/core:stackRelativeExports": true,
20 |     "@aws-cdk/aws-rds:lowercaseDbIdentifier": true,
21 |     "@aws-cdk/aws-lambda:recognizeVersionProps": true,
22 |     "@aws-cdk/aws-lambda:recognizeLayerVersion": true,
23 |     "@aws-cdk/aws-cloudfront:defaultSecurityPolicyTLSv1.2_2021": true,
24 |     "@aws-cdk-containers/ecs-service-extensions:enableDefaultLogDriver": true,
25 |     "@aws-cdk/aws-ec2:uniqueImdsv2TemplateName": true,
26 |     "@aws-cdk/core:checkSecretUsage": true,
27 |     "@aws-cdk/aws-iam:minimizePolicies": true,
28 |     "@aws-cdk/core:validateSnapshotRemovalPolicy": true,
29 |     "@aws-cdk/aws-codepipeline:crossAccountKeyAliasStackSafeResourceName": true,
30 |     "@aws-cdk/core:target-partitions": [
31 |       "aws",
32 |       "aws-cn"
33 |     ],
34 |     "DEPLOYMENT_REGION": "eu-west-1"
35 |   }
36 | }
37 | 


--------------------------------------------------------------------------------
/docs/assets/architecture.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/lambda-serverless-inference-fastapi/c7440956936204e6a09a51199b3095f91e9ade57/docs/assets/architecture.png


--------------------------------------------------------------------------------
/docs/assets/model_response_example.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/lambda-serverless-inference-fastapi/c7440956936204e6a09a51199b3095f91e9ade57/docs/assets/model_response_example.png


--------------------------------------------------------------------------------
/docs/assets/question_answering_input_example.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/lambda-serverless-inference-fastapi/c7440956936204e6a09a51199b3095f91e9ade57/docs/assets/question_answering_input_example.png


--------------------------------------------------------------------------------
/docs/assets/swagger_ui_docs.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/lambda-serverless-inference-fastapi/c7440956936204e6a09a51199b3095f91e9ade57/docs/assets/swagger_ui_docs.png


--------------------------------------------------------------------------------
/fastapi_model_serving/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/lambda-serverless-inference-fastapi/c7440956936204e6a09a51199b3095f91e9ade57/fastapi_model_serving/__init__.py


--------------------------------------------------------------------------------
/fastapi_model_serving/fastapi_model_serving_stack.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from aws_cdk import (
 3 |     Size,
 4 |     Duration,
 5 |     Stack,
 6 |     aws_lambda as _lambda,
 7 |     aws_apigateway as apigateway,
 8 | )
 9 | from constructs import Construct
10 | 
11 | DIR_PATH = os.path.dirname(os.path.realpath(__file__))
12 | 
13 | 
14 | class FastapiModelServingStack(Stack):
15 |     def __init__(self, scope: Construct, construct_id: str, **kwargs) -> None:
16 |         super().__init__(scope, construct_id, **kwargs)
17 | 
18 |         fastapi_model_endpoint_lambda = _lambda.DockerImageFunction(
19 |             self,
20 |             "fastapi_model_serving_endpoint",
21 |             function_name="fastapi_model_serving_endpoint_docker",
22 |             architecture=_lambda.Architecture.X86_64,
23 |             code=_lambda.DockerImageCode.from_image_asset(
24 |                 os.path.join(DIR_PATH, "..", "model_endpoint", "docker")
25 |             ),
26 |             timeout=Duration.seconds(60),
27 |             ephemeral_storage_size=Size.mebibytes(4096),
28 |             memory_size=2048,
29 |         )
30 | 
31 |         apigateway.LambdaRestApi(
32 |             self,
33 |             "docker_model_serving_endpoint",
34 |             handler=fastapi_model_endpoint_lambda,
35 |             proxy=True,
36 |         )
37 | 


--------------------------------------------------------------------------------
/model_endpoint/docker/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM public.ecr.aws/lambda/python:3.10-x86_64
2 | 
3 | # Copy function code
4 | ADD serving_api.tar.gz ${LAMBDA_TASK_ROOT}
5 | 
6 | RUN pip3 install -r requirements.txt --target ${LAMBDA_TASK_ROOT}
7 | 
8 | CMD [ "serving_api.lambda_handler" ]


--------------------------------------------------------------------------------
/model_endpoint/runtime/serving_api/custom_lambda_utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/lambda-serverless-inference-fastapi/c7440956936204e6a09a51199b3095f91e9ade57/model_endpoint/runtime/serving_api/custom_lambda_utils/__init__.py


--------------------------------------------------------------------------------
/model_endpoint/runtime/serving_api/custom_lambda_utils/scripts/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/lambda-serverless-inference-fastapi/c7440956936204e6a09a51199b3095f91e9ade57/model_endpoint/runtime/serving_api/custom_lambda_utils/scripts/__init__.py


--------------------------------------------------------------------------------
/model_endpoint/runtime/serving_api/custom_lambda_utils/scripts/inference.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | import os
 3 | import logging
 4 | 
 5 | logger = logging.getLogger()
 6 | logger.setLevel(logging.INFO)
 7 | 
 8 | try:
 9 |     sys.path.append(os.environ["LAMBDA_TASK_ROOT"])
10 | except KeyError:
11 |     logger.warning(
12 |         """Environment variable "LAMBDA_TASK_ROOT" not found.
13 |         Assuming execution outside of lambda environment."""
14 |     )
15 | 
16 | from transformers import pipeline, AutoModelForQuestionAnswering, AutoTokenizer
17 | 
18 | 
19 | DIR_PATH = os.path.dirname(os.path.realpath(__file__))
20 | PATH_TO_MODEL_ARTIFACTS = os.path.join(DIR_PATH, "..", "model_artifacts/")
21 | 
22 | 
23 | model = AutoModelForQuestionAnswering.from_pretrained(PATH_TO_MODEL_ARTIFACTS)
24 | tokenizer = AutoTokenizer.from_pretrained(PATH_TO_MODEL_ARTIFACTS)
25 | 
26 | question_answerer = pipeline(
27 |     task="question-answering", model=model, tokenizer=tokenizer
28 | )
29 | 


--------------------------------------------------------------------------------
/model_endpoint/runtime/serving_api/requirements.txt:
--------------------------------------------------------------------------------
1 | fastapi==0.79.0
2 | mangum==0.15.1
3 | transformers[torch]==4.21.1


--------------------------------------------------------------------------------
/model_endpoint/runtime/serving_api/serving_api.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import logging
 3 | from fastapi import FastAPI
 4 | from mangum import Mangum
 5 | 
 6 | logger = logging.getLogger()
 7 | logger.setLevel(logging.INFO)
 8 | 
 9 | app = FastAPI(root_path="/prod")
10 | 
11 | 
12 | @app.get("/")
13 | async def root() -> dict:
14 |     """
15 |     **Dummy endpoint that returns 'hello world' example.**
16 | 
17 |     ```
18 |     Returns:
19 |         dict: 'hello world' message.
20 |     ```
21 |     """
22 |     return {"message": "Hello World"}
23 | 
24 | 
25 | @app.get("/question")
26 | async def get_answer(question: str, context: str) -> dict:
27 |     """
28 |     **Endpoint implementing the question-answering logic.**
29 | 
30 |     ```
31 |     Args:
32 |         question (str): Question to be answered. Answer should be included in 'context'.
33 |         context (str): Context containing information necessary to answer 'question'.
34 |     Returns:
35 |         dict: Dictionary containing the answer to the question along with some metadata.
36 |     ```
37 |     """
38 |     from custom_lambda_utils.scripts.inference import question_answerer
39 | 
40 |     return question_answerer(question=question, context=context)
41 | 
42 | 
43 | def lambda_handler(event, context):
44 |     logger.info(json.dumps(event))
45 | 
46 |     asgi_handler = Mangum(app)
47 |     response = asgi_handler(
48 |         event, context
49 |     )  # Call the instance with the event arguments
50 | 
51 |     logger.info(json.dumps(response))
52 |     return response
53 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | aws-cdk-lib==2.29.1
2 | boto3==1.24.53
3 | constructs>=10.0.0,<11.0.0
4 | fastapi==0.79.0
5 | mangum==0.15.1
6 | transformers[torch]==4.21.1


--------------------------------------------------------------------------------
/scripts/init-lambda-code.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | 
 3 | set -e
 4 | 
 5 | PROJECT_ROOT_DIR=$PWD
 6 | 
 7 | # Transform long options to short ones
 8 | for arg in "$@"; do
 9 |   shift
10 |   case "$arg" in
11 |     '--api')    set -- "$@" '-a'   ;;
12 |     '--help')   set -- "$@" '-h'   ;;
13 |     *)          set -- "$@" "$arg" ;;
14 |   esac
15 | done
16 | 
17 | # process flags preceeding positional parameters
18 | while getopts "a" flag; do
19 |   case $flag in
20 |     a)
21 |       TEMPLATE_TYPE="API"
22 |       ;;
23 |     \?)
24 |       echo "Invalid option: -$OPTARG" >&2
25 |       exit 1
26 |       ;;
27 |   esac
28 | done
29 | 
30 | shift "$((OPTIND-1))" # remove options already parsed by getops from arg list
31 | 
32 | if [[ $# -lt 2 ]]; then
33 |     echo "Illegal number of arguments."
34 |     echo "2 positional arguments were expected."
35 |     echo "Usage: init-lambda-code.sh [OPTIONS] <COMPONENT_NAME> <PACKAGE_NAME> [OPTIONS]"
36 |     exit 1
37 | fi
38 | 
39 | COMPONENT_NAME=$1
40 | PACKAGE_NAME=$2
41 | 
42 | shift 2 # remove the two positional arguments from arg list
43 | 
44 | 
45 | # process any flags after positional parameters
46 | while getopts "a" flag; do
47 |   case $flag in
48 |     a)
49 |       TEMPLATE_TYPE="API"
50 |       ;;
51 |     \?)
52 |       echo "Invalid option: -$OPTARG" >&2
53 |       exit 1
54 |       ;;
55 |   esac
56 | done
57 | 
58 | 
59 | 
60 | if [[ -d "$COMPONENT_NAME"/runtime ]]; then
61 |     # Control will enter here if $COMPONENT_NAME exists.
62 |     echo "Component folder does exist already."
63 | else
64 |     echo "Component folder does not exist yet. Creating new component"
65 |     mkdir -p "$COMPONENT_NAME"/"runtime"
66 | fi
67 | 
68 | 
69 | cd "$COMPONENT_NAME"/runtime
70 | mkdir -p "$PACKAGE_NAME"
71 | 
72 | 
73 | # choose what template to use for initialization of lambda
74 | if [[ "$TEMPLATE_TYPE" == "API" ]]; then
75 |     echo "Creating API dummy lambda."
76 |     cp "$PROJECT_ROOT_DIR"/templates/api/api.py \
77 |     "$PACKAGE_NAME"/"$PACKAGE_NAME".py
78 | else
79 |     echo "Creating default dummy lambda."
80 |     cp "$PROJECT_ROOT_DIR"/templates/dummy/dummy.py \
81 |     "$PACKAGE_NAME"/"$PACKAGE_NAME".py
82 | fi
83 | 
84 | 


--------------------------------------------------------------------------------
/scripts/setup.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash -e
 2 | 
 3 | # # make sure rust compiler is installed (needed for huggingface transformers lib)
 4 | # curl 'https://sh.rustup.rs' —-tlsv1.2 -sSf  | bash
 5 | # source "$HOME/.cargo/env"
 6 | 
 7 | # download huggingface question answering model artifacts
 8 | mkdir -p $PWD/model_endpoint/runtime/serving_api/custom_lambda_utils/model_artifacts
 9 | curl -L https://huggingface.co/distilbert-base-cased-distilled-squad/resolve/main/pytorch_model.bin -o $PWD/model_endpoint/runtime/serving_api/custom_lambda_utils/model_artifacts/pytorch_model.bin
10 | curl https://huggingface.co/distilbert-base-cased-distilled-squad/resolve/main/config.json -o $PWD/model_endpoint/runtime/serving_api/custom_lambda_utils/model_artifacts/config.json
11 | curl https://huggingface.co/distilbert-base-cased-distilled-squad/resolve/main/tokenizer.json -o $PWD/model_endpoint/runtime/serving_api/custom_lambda_utils/model_artifacts/tokenizer.json
12 | curl https://huggingface.co/distilbert-base-cased-distilled-squad/resolve/main/tokenizer_config.json -o $PWD/model_endpoint/runtime/serving_api/custom_lambda_utils/model_artifacts/tokenizer_config.json
13 | curl https://huggingface.co/distilbert-base-cased-distilled-squad/resolve/main/vocab.txt -o $PWD/model_endpoint/runtime/serving_api/custom_lambda_utils/model_artifacts/vocab.txt
14 | 
15 | 
16 | # setup and activate virtual environment
17 | python3 -m venv .venv
18 | source .venv/bin/activate
19 | 
20 | # install dependencies inside virtual environment
21 | pip install --upgrade pip
22 | pip install -r requirements.txt
23 | 
24 | 


--------------------------------------------------------------------------------
/scripts/update_notice.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash -e 
 2 | 
 3 | req_files="$(find . -name requirements\*.txt)"
 4 | echo $req_files
 5 | 
 6 | python -m venv notice_licenses && . .venv/bin/activate
 7 | pip install pip-licenses==4.0.1
 8 | 
 9 | for f in $req_files
10 | do    
11 |     pip install -r ${f}
12 | done
13 | rm NOTICE || true
14 | pip-licenses --output NOTICE
15 | rm -r notice_licenses || true
16 | 


--------------------------------------------------------------------------------
/source.bat:
--------------------------------------------------------------------------------
 1 | @echo off
 2 | 
 3 | rem The sole purpose of this script is to make the command
 4 | rem
 5 | rem     source .venv/bin/activate
 6 | rem
 7 | rem (which activates a Python virtualenv on Linux or Mac OS X) work on Windows.
 8 | rem On Windows, this command just runs this batch file (the argument is ignored).
 9 | rem
10 | rem Now we don't need to document a Windows command for activating a virtualenv.
11 | 
12 | echo Executing .venv\Scripts\activate.bat for you
13 | .venv\Scripts\activate.bat
14 | 


--------------------------------------------------------------------------------
/templates/api/api.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import logging
 3 | from fastapi import FastAPI
 4 | from mangum import Mangum
 5 | 
 6 | logger = logging.getLogger()
 7 | logger.setLevel(logging.INFO)
 8 | 
 9 | app = FastAPI()
10 | 
11 | 
12 | @app.get("/")
13 | async def root():
14 |     return {"message": "Hello World"}
15 | 
16 | 
17 | def lambda_handler(event, context):
18 |     logger.info(json.dumps(event))
19 | 
20 |     asgi_handler = Mangum(app)
21 |     response = asgi_handler(
22 |         event, context
23 |     )  # Call the instance with the event arguments
24 | 
25 |     logger.info(json.dumps(response))
26 |     return response
27 | 


--------------------------------------------------------------------------------
/templates/dummy/dummy.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import logging
 3 | 
 4 | logger = logging.getLogger()
 5 | logger.setLevel(logging.INFO)
 6 | 
 7 | 
 8 | def handler(event, context):
 9 |     logger.info(event)
10 | 
11 |     return {"statusCode": 200, "body": json.dumps("Hello from Lambda!")}
12 | 


--------------------------------------------------------------------------------