├── .github
    └── workflows
    │   └── deployment.yaml
├── .gitignore
├── .kube
    ├── README.md
    ├── base_1
    │   ├── deployment.yaml
    │   ├── kustomization.yaml
    │   └── service.yaml
    ├── deployment.yaml
    ├── experiment_1
    │   ├── 2vCPU+4GB+w1
    │   │   ├── Dockerfile
    │   │   ├── deployment_replica_count.yaml
    │   │   ├── kustomization.yaml
    │   │   └── requirements.txt
    │   ├── 2vCPU+4GB+w2
    │   │   ├── Dockerfile
    │   │   ├── deployment_replica_count.yaml
    │   │   ├── kustomization.yaml
    │   │   └── requirements.txt
    │   ├── 2vCPU+4GB+w4
    │   │   ├── Dockerfile
    │   │   ├── deployment_replica_count.yaml
    │   │   ├── kustomization.yaml
    │   │   └── requirements.txt
    │   ├── 4vCPU+8GB
    │   │   ├── Dockerfile
    │   │   ├── deployment_replica_count.yaml
    │   │   ├── kustomization.yaml
    │   │   └── requirements.txt
    │   ├── 8vCPU+16GB
    │   │   ├── Dockerfile
    │   │   ├── deployment_replica_count.yaml
    │   │   ├── kustomization.yaml
    │   │   └── requirements.txt
    │   └── 8vCPU+64GB
    │   │   ├── Dockerfile
    │   │   ├── deployment_replica_count.yaml
    │   │   ├── kustomization.yaml
    │   │   └── requirements.txt
    ├── hpa.yaml
    ├── kustomization.yaml
    └── service.yaml
├── Dockerfile
├── LICENSE
├── README.md
├── api
    ├── README.md
    ├── cat.jpg
    ├── imagenet_classes.txt
    ├── main.py
    ├── requirements.txt
    └── utils
    │   ├── __init__.py
    │   └── utils.py
├── locust
    ├── README.md
    ├── cat.jpg
    ├── cat_224x224.jpg
    ├── load_test.conf
    └── locust.py
└── notebooks
    ├── TF_Serving.ipynb
    └── TF_to_ONNX.ipynb


/.github/workflows/deployment.yaml:
--------------------------------------------------------------------------------
 1 | name: Deployment
 2 | 
 3 | env:
 4 |   GCP_PROJECT_ID: "fast-ai-exploration"
 5 |   GKE_CLUSTER: "fastapi-cluster"
 6 |   GKE_REGION: "us-central1"
 7 |   GKE_ZONE: "us-central1-a"
 8 |   IMAGE: "gcr.io/fast-ai-exploration/fastapi-k8s"
 9 |   GKE_DEPLOYMENT: "fastapi-server"
10 | 
11 | on:
12 |   push:
13 |     branches: [ "main" ]
14 | 
15 |   # Allows you to run this workflow manually from the Actions tab
16 |   workflow_dispatch:
17 | 
18 | jobs:
19 |   build:
20 |     runs-on: ubuntu-latest
21 |     steps:
22 |       - name: Git
23 |         uses: actions/checkout@v2
24 | 
25 |       - name: GCP auth
26 |         uses: google-github-actions/auth@v0
27 |         with:
28 |           credentials_json: ${{ secrets.GCP_CREDENTIALS }}
29 | 
30 |       - name: Set up Cloud SDK
31 |         uses: google-github-actions/setup-gcloud@v0
32 | 
33 |       - name: Docker auth
34 |         run: |-
35 |           gcloud --quiet auth configure-docker
36 |       
37 |       - name: GKE auth
38 |         run: |- 
39 |           gcloud container clusters get-credentials "$GKE_CLUSTER" --zone "$GKE_ZONE" --project "$GCP_PROJECT_ID"
40 | 
41 |       - name: Check if the requested directories have any changes
42 |         uses: dorny/paths-filter@v2
43 |         id: changes
44 |         with:
45 |             filters: |
46 |                 src:
47 |                   - 'api/**'
48 |                   - '.github/**'
49 |                   - '.kube/**'
50 |                   - Dockerfile
51 | 
52 |       - name: Build and push Docker image based on the changes
53 |         if: steps.changes.outputs.src == 'true'
54 |         run: |
55 |           docker build --tag "$IMAGE:$GITHUB_SHA" .
56 |           docker tag "$IMAGE:$GITHUB_SHA" "$IMAGE:latest"
57 |           docker push "$IMAGE:$GITHUB_SHA"
58 |           docker push "$IMAGE:latest"
59 | 
60 |       - name: Set up Kustomize
61 |         working-directory: .kube/
62 |         if: steps.changes.outputs.src == 'true'
63 |         run: |-
64 |           curl -sfLo kustomize.tar.gz https://github.com/kubernetes-sigs/kustomize/releases/download/kustomize%2Fv4.1.2/kustomize_v4.1.2_linux_amd64.tar.gz
65 |           tar -zxvf kustomize.tar.gz
66 |           chmod u+x ./kustomize
67 |       
68 |       - name: Deploy to GKE
69 |         working-directory: .kube/
70 |         if: steps.changes.outputs.src == 'true'
71 |         run: |-
72 |           ./kustomize edit set image $IMAGE:$GITHUB_SHA
73 |           ./kustomize build . | kubectl apply -f -
74 |           kubectl rollout status deployment/$GKE_DEPLOYMENT
75 |           kubectl get services -o wide
76 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | pip-wheel-metadata/
 24 | share/python-wheels/
 25 | *.egg-info/
 26 | .installed.cfg
 27 | *.egg
 28 | MANIFEST
 29 | 
 30 | # PyInstaller
 31 | #  Usually these files are written by a python script from a template
 32 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 33 | *.manifest
 34 | *.spec
 35 | 
 36 | # Installer logs
 37 | pip-log.txt
 38 | pip-delete-this-directory.txt
 39 | 
 40 | # Unit test / coverage reports
 41 | htmlcov/
 42 | .tox/
 43 | .nox/
 44 | .coverage
 45 | .coverage.*
 46 | .cache
 47 | nosetests.xml
 48 | coverage.xml
 49 | *.cover
 50 | *.py,cover
 51 | .hypothesis/
 52 | .pytest_cache/
 53 | 
 54 | # Translations
 55 | *.mo
 56 | *.pot
 57 | 
 58 | # Django stuff:
 59 | *.log
 60 | local_settings.py
 61 | db.sqlite3
 62 | db.sqlite3-journal
 63 | 
 64 | # Flask stuff:
 65 | instance/
 66 | .webassets-cache
 67 | 
 68 | # Scrapy stuff:
 69 | .scrapy
 70 | 
 71 | # Sphinx documentation
 72 | docs/_build/
 73 | 
 74 | # PyBuilder
 75 | target/
 76 | 
 77 | # Jupyter Notebook
 78 | .ipynb_checkpoints
 79 | 
 80 | # IPython
 81 | profile_default/
 82 | ipython_config.py
 83 | 
 84 | # pyenv
 85 | .python-version
 86 | 
 87 | # pipenv
 88 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 89 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 90 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 91 | #   install all needed dependencies.
 92 | #Pipfile.lock
 93 | 
 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
 95 | __pypackages__/
 96 | 
 97 | # Celery stuff
 98 | celerybeat-schedule
 99 | celerybeat.pid
100 | 
101 | # SageMath parsed files
102 | *.sage.py
103 | 
104 | # Environments
105 | .env
106 | .venv
107 | env/
108 | venv/
109 | ENV/
110 | env.bak/
111 | venv.bak/
112 | 
113 | # Spyder project settings
114 | .spyderproject
115 | .spyproject
116 | 
117 | # Rope project settings
118 | .ropeproject
119 | 
120 | # mkdocs documentation
121 | /site
122 | 
123 | # mypy
124 | .mypy_cache/
125 | .dmypy.json
126 | dmypy.json
127 | 
128 | # Pyre type checker
129 | .pyre/
130 | 
131 | # Misc
132 | .ipynb_checkpoints/
133 | *.onnx


--------------------------------------------------------------------------------
/.kube/README.md:
--------------------------------------------------------------------------------
 1 | # Run Experimental Setup
 2 | 
 3 | ## Build Docker image
 4 | ```bash
 5 | # under /.kube directory
 6 | 
 7 | $ TARGET_EXPERIMENT=experiment_1/2vCPU+2GB/
 8 | $ TAG=gcr.io/GCP_PROJECT_ID/IMG_NAME:IMG_TAG
 9 | 
10 | $ docker build -f $TARGET_EXPERIMENT -t $TAG .
11 | ```
12 | 
13 | ## Deploy on k8s cluster
14 | ```bash
15 | # under /.kube directory
16 | 
17 | $ ./kustomize build $TARGET_EXPERIMENT | kubectl apply -f -
18 | ```


--------------------------------------------------------------------------------
/.kube/base_1/deployment.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: apps/v1
 2 | kind: Deployment
 3 | metadata:
 4 |   labels:
 5 |     app: fastapi-server
 6 |   name: fastapi-server
 7 | spec:
 8 |   replicas: 1
 9 |   selector:
10 |     matchLabels:
11 |       app: fastapi-server
12 |   strategy: {}
13 |   template:
14 |     metadata:
15 |       labels:
16 |         app: fastapi-server
17 |     spec:
18 |       containers:
19 |       - image: gcr.io/gcp-ml-172005/k8s-fastapi:latest
20 |         name: fastapi-k8s
21 |         ports:
22 |         - containerPort: 80          
23 |         resources: {}
24 | 


--------------------------------------------------------------------------------
/.kube/base_1/kustomization.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: kustomize.config.k8s.io/v1beta1
2 | kind: Kustomization
3 | 
4 | commonLabels:
5 |   app: fastapi-server
6 | 
7 | resources:
8 | - deployment.yaml
9 | - service.yaml


--------------------------------------------------------------------------------
/.kube/base_1/service.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: v1
 2 | kind: Service
 3 | metadata:
 4 |   creationTimestamp: null
 5 |   labels:
 6 |     app: fastapi-server
 7 |   name: fastapi-server
 8 | spec:
 9 |   ports:
10 |   - port: 80
11 |     protocol: TCP
12 |     targetPort: 80
13 |   selector:
14 |     app: fastapi-server
15 |   type: LoadBalancer
16 | status:
17 |   loadBalancer: {}


--------------------------------------------------------------------------------
/.kube/deployment.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: apps/v1
 2 | kind: Deployment
 3 | metadata:
 4 |   creationTimestamp: null
 5 |   labels:
 6 |     app: fastapi-server
 7 |   name: fastapi-server
 8 | spec:
 9 |   replicas: 1
10 |   selector:
11 |     matchLabels:
12 |       app: fastapi-server
13 |   strategy: {}
14 |   template:
15 |     metadata:
16 |       creationTimestamp: null
17 |       labels:
18 |         app: fastapi-server
19 |     spec:
20 |       containers:
21 |       - image: gcr.io/fast-ai-exploration/fastapi-k8s:latest
22 |         name: fastapi-k8s
23 |         ports:
24 |         - containerPort: 80          
25 |         resources: {}


--------------------------------------------------------------------------------
/.kube/experiment_1/2vCPU+4GB+w1/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM python:3.8
 2 | 
 3 | WORKDIR /app
 4 | 
 5 | # install dependencies
 6 | COPY ./.kube/experiment_1/2vCPU+4GB+w1/requirements.txt /app/requirements.txt
 7 | RUN pip install --no-cache-dir --upgrade -r /app/requirements.txt
 8 | 
 9 | # copy fastAPI app codebase
10 | COPY ./api /app
11 | 
12 | # run the fastAPI app
13 | CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "80"]


--------------------------------------------------------------------------------
/.kube/experiment_1/2vCPU+4GB+w1/deployment_replica_count.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: apps/v1
 2 | kind: Deployment
 3 | metadata:
 4 |  name: fastapi-server
 5 | 
 6 | spec:
 7 |   replicas: 8
 8 |   template:
 9 |     metadata:
10 |       labels:
11 |         app: fastapi-server
12 |     spec:
13 |       containers:
14 |       - image: gcr.io/gcp-ml-172005/k8s-fastapi:uvicorn-w-1
15 |         name: fastapi-k8s
16 |         imagePullPolicy: Always
17 |         ports:
18 |         - containerPort: 80          
19 |         resources: {}
20 | 


--------------------------------------------------------------------------------
/.kube/experiment_1/2vCPU+4GB+w1/kustomization.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: kustomize.config.k8s.io/v1beta1
2 | kind: Kustomization
3 | 
4 | bases:
5 | - ../../base_1
6 | patches:
7 | - deployment_replica_count.yaml


--------------------------------------------------------------------------------
/.kube/experiment_1/2vCPU+4GB+w1/requirements.txt:
--------------------------------------------------------------------------------
1 | numpy==1.22.2
2 | uvicorn[standard]==0.17.5
3 | Pillow==9.0.1
4 | onnxruntime==1.10.0
5 | fastapi==0.74.0
6 | python-multipart==0.0.5
7 | 


--------------------------------------------------------------------------------
/.kube/experiment_1/2vCPU+4GB+w2/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM python:3.8
 2 | 
 3 | WORKDIR /app
 4 | 
 5 | # install dependencies
 6 | COPY ./.kube/experiment_1/2vCPU+4GB+w2/requirements.txt /app/requirements.txt
 7 | RUN pip install --no-cache-dir --upgrade -r /app/requirements.txt
 8 | 
 9 | # copy fastAPI app codebase
10 | COPY ./api /app
11 | 
12 | # run the fastAPI app
13 | CMD ["uvicorn", "main:app", "--workers", "2", "--host", "0.0.0.0", "--port", "80"]


--------------------------------------------------------------------------------
/.kube/experiment_1/2vCPU+4GB+w2/deployment_replica_count.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: apps/v1
 2 | kind: Deployment
 3 | metadata:
 4 |  name: fastapi-server
 5 | 
 6 | spec:
 7 |   replicas: 8
 8 |   template:
 9 |     metadata:
10 |       labels:
11 |         app: fastapi-server
12 |     spec:
13 |       containers:
14 |       - image: gcr.io/gcp-ml-172005/k8s-fastapi:uvicorn-w-2
15 |         name: fastapi-k8s
16 |         imagePullPolicy: Always
17 |         ports:
18 |         - containerPort: 80          
19 |         resources: {}
20 | 


--------------------------------------------------------------------------------
/.kube/experiment_1/2vCPU+4GB+w2/kustomization.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: kustomize.config.k8s.io/v1beta1
2 | kind: Kustomization
3 | 
4 | bases:
5 | - ../../base_1
6 | patches:
7 | - deployment_replica_count.yaml


--------------------------------------------------------------------------------
/.kube/experiment_1/2vCPU+4GB+w2/requirements.txt:
--------------------------------------------------------------------------------
1 | numpy==1.22.2
2 | uvicorn[standard]==0.17.5
3 | Pillow==9.0.1
4 | onnxruntime==1.10.0
5 | fastapi==0.74.0
6 | python-multipart==0.0.5
7 | 


--------------------------------------------------------------------------------
/.kube/experiment_1/2vCPU+4GB+w4/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM python:3.8
 2 | 
 3 | WORKDIR /app
 4 | 
 5 | # install dependencies
 6 | COPY ./.kube/experiment_1/2vCPU+4GB+w4/requirements.txt /app/requirements.txt
 7 | RUN pip install --no-cache-dir --upgrade -r /app/requirements.txt
 8 | 
 9 | # copy fastAPI app codebase
10 | COPY ./api /app
11 | 
12 | # run the fastAPI app
13 | CMD ["uvicorn", "main:app", "--workers", "4", "--host", "0.0.0.0", "--port", "80"]


--------------------------------------------------------------------------------
/.kube/experiment_1/2vCPU+4GB+w4/deployment_replica_count.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: apps/v1
 2 | kind: Deployment
 3 | metadata:
 4 |  name: fastapi-server
 5 | 
 6 | spec:
 7 |   replicas: 8
 8 |   template:
 9 |     metadata:
10 |       labels:
11 |         app: fastapi-server
12 |     spec:
13 |       containers:
14 |       - image: gcr.io/gcp-ml-172005/k8s-fastapi:uvicorn-w-4
15 |         name: fastapi-k8s
16 |         imagePullPolicy: Always
17 |         ports:
18 |         - containerPort: 80          
19 |         resources: {}
20 | 


--------------------------------------------------------------------------------
/.kube/experiment_1/2vCPU+4GB+w4/kustomization.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: kustomize.config.k8s.io/v1beta1
2 | kind: Kustomization
3 | 
4 | bases:
5 | - ../../base_1
6 | patches:
7 | - deployment_replica_count.yaml


--------------------------------------------------------------------------------
/.kube/experiment_1/2vCPU+4GB+w4/requirements.txt:
--------------------------------------------------------------------------------
1 | numpy==1.22.2
2 | uvicorn[standard]==0.17.5
3 | Pillow==9.0.1
4 | onnxruntime==1.10.0
5 | fastapi==0.74.0
6 | python-multipart==0.0.5
7 | 


--------------------------------------------------------------------------------
/.kube/experiment_1/4vCPU+8GB/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM python:3.8
 2 | 
 3 | WORKDIR /app
 4 | 
 5 | # install dependencies
 6 | COPY ./.kube/experiment_1/4vCPU+8GB/requirements.txt /app/requirements.txt
 7 | RUN pip install --no-cache-dir --upgrade -r /app/requirements.txt
 8 | 
 9 | # copy fastAPI app codebase
10 | COPY ./api /app
11 | 
12 | # run the fastAPI app
13 | CMD ["gunicorn", "main:app", "--workers", "9", "--worker-class", "uvicorn.workers.UvicornWorker", "--bind", "0.0.0.0:80"]


--------------------------------------------------------------------------------
/.kube/experiment_1/4vCPU+8GB/deployment_replica_count.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: apps/v1
 2 | kind: Deployment
 3 | metadata:
 4 |  name: fastapi-server
 5 | 
 6 | spec:
 7 |   replicas: 4
 8 |   template:
 9 |     metadata:
10 |       labels:
11 |         app: fastapi-server
12 |     spec:
13 |       containers:
14 |       - image: gcr.io/gcp-ml-172005/k8s-fastapi:gunicorn-w-7
15 |         name: fastapi-k8s
16 |         imagePullPolicy: Always
17 |         ports:
18 |         - containerPort: 80          
19 |         resources: {}
20 | 


--------------------------------------------------------------------------------
/.kube/experiment_1/4vCPU+8GB/kustomization.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: kustomize.config.k8s.io/v1beta1
2 | kind: Kustomization
3 | 
4 | bases:
5 | - ../../base_1
6 | patches:
7 | - deployment_replica_count.yaml


--------------------------------------------------------------------------------
/.kube/experiment_1/4vCPU+8GB/requirements.txt:
--------------------------------------------------------------------------------
1 | numpy==1.22.2
2 | uvicorn[standard]==0.17.5
3 | Pillow==9.0.1
4 | onnxruntime==1.10.0
5 | fastapi==0.74.0
6 | python-multipart==0.0.5
7 | gunicorn==20.1.0


--------------------------------------------------------------------------------
/.kube/experiment_1/8vCPU+16GB/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM python:3.8
 2 | 
 3 | WORKDIR /app
 4 | 
 5 | # install dependencies
 6 | COPY ./.kube/experiment_1/8vCPU+16GB/requirements.txt /app/requirements.txt
 7 | RUN pip install --no-cache-dir --upgrade -r /app/requirements.txt
 8 | 
 9 | # copy fastAPI app codebase
10 | COPY ./api /app
11 | 
12 | # run the fastAPI app
13 | CMD ["gunicorn", "main:app", "--workers", "17", "--worker-class", "uvicorn.workers.UvicornWorker", "--bind", "0.0.0.0:80"]


--------------------------------------------------------------------------------
/.kube/experiment_1/8vCPU+16GB/deployment_replica_count.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: apps/v1
 2 | kind: Deployment
 3 | metadata:
 4 |  name: fastapi-server
 5 | 
 6 | spec:
 7 |   replicas: 2
 8 |   template:
 9 |     metadata:
10 |       labels:
11 |         app: fastapi-server
12 |     spec:
13 |       containers:
14 |       - image: gcr.io/gcp-ml-172005/k8s-fastapi:gunicorn-w-17
15 |         name: fastapi-k8s
16 |         imagePullPolicy: Always
17 |         ports:
18 |         - containerPort: 80          
19 |         resources: {}
20 | 


--------------------------------------------------------------------------------
/.kube/experiment_1/8vCPU+16GB/kustomization.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: kustomize.config.k8s.io/v1beta1
2 | kind: Kustomization
3 | 
4 | bases:
5 | - ../../base_1
6 | patches:
7 | - deployment_replica_count.yaml


--------------------------------------------------------------------------------
/.kube/experiment_1/8vCPU+16GB/requirements.txt:
--------------------------------------------------------------------------------
1 | numpy==1.22.2
2 | uvicorn[standard]==0.17.5
3 | Pillow==9.0.1
4 | onnxruntime==1.10.0
5 | fastapi==0.74.0
6 | python-multipart==0.0.5
7 | gunicorn==20.1.0


--------------------------------------------------------------------------------
/.kube/experiment_1/8vCPU+64GB/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM python:3.8
 2 | 
 3 | WORKDIR /app
 4 | 
 5 | # install dependencies
 6 | COPY ./.kube/experiment_1/8vCPU+64GB/requirements.txt /app/requirements.txt
 7 | RUN pip install --no-cache-dir --upgrade -r /app/requirements.txt
 8 | 
 9 | # copy fastAPI app codebase
10 | COPY ./api /app
11 | 
12 | # run the fastAPI app
13 | CMD ["gunicorn", "main:app", "--workers", "17", "--worker-class", "uvicorn.workers.UvicornWorker", "--bind", "0.0.0.0:80"]


--------------------------------------------------------------------------------
/.kube/experiment_1/8vCPU+64GB/deployment_replica_count.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: apps/v1
 2 | kind: Deployment
 3 | metadata:
 4 |  name: fastapi-server
 5 | 
 6 | spec:
 7 |   replicas: 2
 8 |   template:
 9 |     metadata:
10 |       labels:
11 |         app: fastapi-server
12 |     spec:
13 |       containers:
14 |       - image: gcr.io/gcp-ml-172005/k8s-fastapi:gunicorn-w-17
15 |         name: fastapi-k8s
16 |         imagePullPolicy: Always
17 |         ports:
18 |         - containerPort: 80          
19 |         resources: {}
20 | 


--------------------------------------------------------------------------------
/.kube/experiment_1/8vCPU+64GB/kustomization.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: kustomize.config.k8s.io/v1beta1
2 | kind: Kustomization
3 | 
4 | bases:
5 | - ../../base_1
6 | patches:
7 | - deployment_replica_count.yaml


--------------------------------------------------------------------------------
/.kube/experiment_1/8vCPU+64GB/requirements.txt:
--------------------------------------------------------------------------------
1 | numpy==1.22.2
2 | uvicorn[standard]==0.17.5
3 | Pillow==9.0.1
4 | onnxruntime==1.10.0
5 | fastapi==0.74.0
6 | python-multipart==0.0.5
7 | gunicorn==20.1.0


--------------------------------------------------------------------------------
/.kube/hpa.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: autoscaling/v1
 2 | kind: HorizontalPodAutoscaler
 3 | metadata:
 4 |  name: fastapi-server
 5 | 
 6 | spec:
 7 |  scaleTargetRef:
 8 |    apiVersion: apps/v1
 9 |    kind: Deployment
10 |    name: fastapi-server
11 |  minReplicas: 1
12 |  maxReplicas: 9
13 |  targetCPUUtilizationPercentage: 80


--------------------------------------------------------------------------------
/.kube/kustomization.yaml:
--------------------------------------------------------------------------------
1 | commonLabels:
2 |   app: fastapi-server
3 | resources:
4 | - deployment.yaml
5 | - hpa.yaml
6 | - service.yaml
7 | apiVersion: kustomize.config.k8s.io/v1beta1
8 | kind: Kustomization


--------------------------------------------------------------------------------
/.kube/service.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: v1
 2 | kind: Service
 3 | metadata:
 4 |   creationTimestamp: null
 5 |   labels:
 6 |     app: fastapi-server
 7 |   name: fastapi-server
 8 | spec:
 9 |   ports:
10 |   - port: 80
11 |     protocol: TCP
12 |     targetPort: 80
13 |   selector:
14 |     app: fastapi-server
15 |   type: LoadBalancer
16 | status:
17 |   loadBalancer: {}


--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM python:3.8
 2 | 
 3 | WORKDIR /app
 4 | 
 5 | # install dependencies
 6 | COPY ./api/requirements.txt /app/requirements.txt
 7 | RUN pip install --no-cache-dir --upgrade -r /app/requirements.txt
 8 | 
 9 | # copy fastAPI app codebase
10 | COPY ./api /app
11 | 
12 | # run the fastAPI app
13 | CMD ["uvicorn", "main:app", "--workers", "2", "--host", "0.0.0.0", "--port", "80"]


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 |                                  Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "[]"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 |    Copyright 2022 Sayak Paul and Chansung Park
190 | 
191 |    Licensed under the Apache License, Version 2.0 (the "License");
192 |    you may not use this file except in compliance with the License.
193 |    You may obtain a copy of the License at
194 | 
195 |        http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 |    Unless required by applicable law or agreed to in writing, software
198 |    distributed under the License is distributed on an "AS IS" BASIS,
199 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 |    See the License for the specific language governing permissions and
201 |    limitations under the License.
202 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Deploying ML models with FastAPI, Docker, and Kubernetes
  2 | 
  3 | *By: [Sayak Paul](https://github.com/sayakpaul) and [Chansung Park](https://github.com/deep-diver)*
  4 | 
  5 | <div align="center">
  6 | <img src="https://i.ibb.co/XLZTbBG/fastapi-ml-deployment.png" width="60%"/><br>
  7 | <sup>Figure developed by Chansung Park</sup>
  8 | </div>
  9 | 
 10 | This project shows how to serve an ONNX-optimized image classification model as a
 11 | RESTful web service with FastAPI, Docker, and Kubernetes (k8s). The idea is to first
 12 | Dockerize the API and then deploy it on a k8s cluster running on [Google Kubernetes
 13 | Engine (GKE)](https://cloud.google.com/kubernetes-engine). We do this integration
 14 | using [GitHub Actions](https://github.com/features/actions). 
 15 | 
 16 | 👋 **Note**: Even though this project uses an image classification its structure and techniques can
 17 | be used to serve other models as well. We also worked on a TF Serving equivalent
 18 | of this project. Check it out [here](https://github.com/deep-diver/ml-deployment-k8s-tfserving).
 19 | 
 20 | **Update July 19 2022**: This project won the [#TFCommunitySpotlight award](https://twitter.com/TensorFlow/status/1545115276152389636).
 21 | 
 22 | ## Deploying the model as a service with k8s
 23 | 
 24 | * We decouple the model optimization part from our API code. The optimization part is
 25 | available within the `notebooks/TF_to_ONNX.ipynb` notebook.
 26 | * Then we locally test the API. You can find the instructions within the `api`
 27 | directory.
 28 | * To deploy the API, we define our `deployment.yaml` workflow file inside `.github/workflows`.
 29 | It does the following tasks:
 30 | 
 31 |     * Looks for any changes in the specified directory. If there are any changes:
 32 |     * Builds and pushes the latest Docker image to Google Container Register (GCR).
 33 |     * Deploys the Docker container on the k8s cluster running on GKE. 
 34 | 
 35 | ## Configurations needed beforehand
 36 | 
 37 | * Create a k8s cluster on GKE. [Here's](https://www.youtube.com/watch?v=hxpGC19PzwI) a
 38 | relevant resource. We used 8 nodes (each with 2 vCPUs and 4 GBs of RAM) for the cluster.
 39 | * [Create](https://cloud.google.com/iam/docs/creating-managing-service-account-keys) a
 40 | service account key (JSON) file. It's a good practice to only grant it the roles
 41 | required for the project. For example, for this project, we created a fresh service 
 42 | account and granted it permissions for the following: Storage Admin, GKE Developer, and
 43 | GCR Developer. 
 44 | * Crete a secret named `GCP_CREDENTIALS` on your GitHub repository and copy paste the
 45 | contents of the service account key file into the secret. 
 46 | * Configure bucket storage related permissions for the service account:
 47 | 
 48 |     ```shell
 49 |     $ export PROJECT_ID=<PROJECT_ID>
 50 |     $ export ACCOUNT=<ACCOUNT>
 51 |     
 52 |     $ gcloud -q projects add-iam-policy-binding ${PROJECT_ID} \
 53 |         --member=serviceAccount:${ACCOUNT}@${PROJECT_ID}.iam.gserviceaccount.com \
 54 |         --role roles/storage.admin
 55 |     
 56 |     $ gcloud -q projects add-iam-policy-binding ${PROJECT_ID} \
 57 |         --member=serviceAccount:${ACCOUNT}@${PROJECT_ID}.iam.gserviceaccount.com \
 58 |         --role roles/storage.objectAdmin
 59 |     
 60 |     gcloud -q projects add-iam-policy-binding ${PROJECT_ID} \
 61 |         --member=serviceAccount:${ACCOUNT}@${PROJECT_ID}.iam.gserviceaccount.com \
 62 |         --role roles/storage.objectCreator
 63 |     ```
 64 | * If you're on the `main` branch already then upon a new push, the worflow defined
 65 | in `.github/workflows/deployment.yaml` should automatically run. Here's how the
 66 | final outputs should look like ([run link](https://github.com/sayakpaul/ml-deployment-k8s-fastapi/runs/5343002731)):
 67 | 
 68 | ![](https://i.ibb.co/fDGFbpr/Screenshot-2022-03-01-at-12-25-42-PM.png)
 69 | 
 70 | ## Notes
 71 | 
 72 | * Since we use CPU-based pods within the k8s cluster, we use ONNX optimizations
 73 |   since they are known to provide performance speed-ups for CPU-based environments.
 74 |   If you are using GPU-based pods then look into [TensorRT](https://developer.nvidia.com/tensorrt). 
 75 | * We use [Kustomize](https://kustomize.io) to manage the deployment on k8s.
 76 | * We conducted load-testing varying the number of workers, RAM, nodes, etc. From that experiment,
 77 |   we found out that for our setup, 8 nodes each having 2 vCPUs and 4 GBs of work the best in terms of 
 78 |   throughput and latency. The figure below summarizes our results:
 79 |   
 80 |   ![](https://i.ibb.co/NjFp3m9/fastapi-load-test-results.png)
 81 |   
 82 |   You can find the load-testing details under `locust` directory.
 83 | 
 84 | ## Querying the API endpoint
 85 | 
 86 | From workflow outputs, you should see something like so:
 87 | 
 88 | ```shell
 89 | NAME             TYPE           CLUSTER-IP     EXTERNAL-IP     PORT(S)        AGE
 90 | fastapi-server   LoadBalancer   xxxxxxxxxx   xxxxxxxxxx        80:30768/TCP   23m
 91 | kubernetes       ClusterIP      xxxxxxxxxx     <none>          443/TCP        160m
 92 | ```
 93 | 
 94 | Note the `EXTERNAL-IP` corresponding to `fastapi-server` (iff you have named
 95 | your service like so). Then cURL it:
 96 | 
 97 | ```shell
 98 | curl -X POST -F image_file=@cat.jpg -F with_resize=True -F with_post_process=True http://{EXTERNAL-IP}:80/predict/image
 99 | ```
100 | 
101 | You should get the following output (if you're using the `cat.jpg` image present
102 | in the `api` directory):
103 | 
104 | ```shell
105 | "{\"Label\": \"tabby\", \"Score\": \"0.538\"}"
106 | ```
107 | 
108 | The request assumes that you have a file called `cat.jpg` present in your
109 | working directory.
110 | 
111 | **Note** that if you don't see any external IP address from your GitHub Actions console log,
112 | then after successful deployment, do the following:
113 | 
114 | ```sh
115 | # Authenticate to your GKE cluster.
116 | $ gcloud container clusters get-credentials ${GKE_CLUSTER} --zone {GKE_ZONE} --project {GCP_PROJECT_ID}
117 | $ kubectl get services -o wide
118 | ```
119 | 
120 | From there, note the external IP. 
121 | 
122 | ## Acknowledgements
123 | 
124 | * [ML-GDE program](https://developers.google.com/programs/experts/) for providing GCP credit support.
125 | * [Hannes Hapke](https://www.linkedin.com/in/hanneshapke) for providing might insightful points for conducting load-tests. 
126 | 
127 | 


--------------------------------------------------------------------------------
/api/README.md:
--------------------------------------------------------------------------------
 1 | This directory exposes the ONNX model we converted in [this notebook](https://github.com/sayakpaul/ml-deployment-k8s-fastapi/blob/main/notebooks/TF_to_ONNX.ipynb) as a REST API using [FastAPI](https://fastapi.tiangolo.com/).
 2 | 
 3 | ## Setup 
 4 | 
 5 | Install the dependencies:
 6 | 
 7 | ```sh
 8 | $ pip install -r requirements.txt
 9 | ```
10 | 
11 | Download a test image:
12 | 
13 | ```sh
14 | $ wget http://images.cocodataset.org/val2017/000000039769.jpg -O cat.jpg
15 | ```
16 | 
17 | ## Deploy locally
18 | 
19 | ```sh
20 | $ uvicorn main:app --reload
21 | ```
22 | 
23 | It should show something like so:
24 | 
25 | ```sh
26 | INFO:     Uvicorn running on http://127.0.0.1:8000 (Press CTRL+C to quit)
27 | INFO:     Started reloader process [79147] using statreload
28 | INFO:     Started server process [79149]
29 | INFO:     Waiting for application startup.
30 | INFO:     Application startup complete.
31 | ```
32 | 
33 | Note the port number and run a request:
34 | 
35 | ```sh
36 | $ curl -X POST -F image_file=@cat.jpg -F with_resize=True -F with_post_process=True http://localhost:8000/predict/image
37 | ```
38 | 
39 | It should output:
40 | 
41 | ```sh
42 | "{\"Label\": \"tabby\", \"Score\": \"0.538\"}"
43 | ```
44 | 
45 | ### Client request code in Python
46 | 
47 | ```python
48 | import requests
49 | 
50 | url = "http://localhost:8000/predict/image"
51 | payload = {"with_resize": True, "with_post_process": True}
52 | files = {"image_file": open("cat.jpg", "rb")}
53 | 
54 | resp = requests.post(url=url, data=payload, files=files)
55 | print(resp.json())
56 | ```
57 | 


--------------------------------------------------------------------------------
/api/cat.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sayakpaul/ml-deployment-k8s-fastapi/55b690520b943bf39dbb02a7f28fc1aba781e945/api/cat.jpg


--------------------------------------------------------------------------------
/api/imagenet_classes.txt:
--------------------------------------------------------------------------------
   1 | tench
   2 | goldfish
   3 | great white shark
   4 | tiger shark
   5 | hammerhead
   6 | electric ray
   7 | stingray
   8 | cock
   9 | hen
  10 | ostrich
  11 | brambling
  12 | goldfinch
  13 | house finch
  14 | junco
  15 | indigo bunting
  16 | robin
  17 | bulbul
  18 | jay
  19 | magpie
  20 | chickadee
  21 | water ouzel
  22 | kite
  23 | bald eagle
  24 | vulture
  25 | great grey owl
  26 | European fire salamander
  27 | common newt
  28 | eft
  29 | spotted salamander
  30 | axolotl
  31 | bullfrog
  32 | tree frog
  33 | tailed frog
  34 | loggerhead
  35 | leatherback turtle
  36 | mud turtle
  37 | terrapin
  38 | box turtle
  39 | banded gecko
  40 | common iguana
  41 | American chameleon
  42 | whiptail
  43 | agama
  44 | frilled lizard
  45 | alligator lizard
  46 | Gila monster
  47 | green lizard
  48 | African chameleon
  49 | Komodo dragon
  50 | African crocodile
  51 | American alligator
  52 | triceratops
  53 | thunder snake
  54 | ringneck snake
  55 | hognose snake
  56 | green snake
  57 | king snake
  58 | garter snake
  59 | water snake
  60 | vine snake
  61 | night snake
  62 | boa constrictor
  63 | rock python
  64 | Indian cobra
  65 | green mamba
  66 | sea snake
  67 | horned viper
  68 | diamondback
  69 | sidewinder
  70 | trilobite
  71 | harvestman
  72 | scorpion
  73 | black and gold garden spider
  74 | barn spider
  75 | garden spider
  76 | black widow
  77 | tarantula
  78 | wolf spider
  79 | tick
  80 | centipede
  81 | black grouse
  82 | ptarmigan
  83 | ruffed grouse
  84 | prairie chicken
  85 | peacock
  86 | quail
  87 | partridge
  88 | African grey
  89 | macaw
  90 | sulphur-crested cockatoo
  91 | lorikeet
  92 | coucal
  93 | bee eater
  94 | hornbill
  95 | hummingbird
  96 | jacamar
  97 | toucan
  98 | drake
  99 | red-breasted merganser
 100 | goose
 101 | black swan
 102 | tusker
 103 | echidna
 104 | platypus
 105 | wallaby
 106 | koala
 107 | wombat
 108 | jellyfish
 109 | sea anemone
 110 | brain coral
 111 | flatworm
 112 | nematode
 113 | conch
 114 | snail
 115 | slug
 116 | sea slug
 117 | chiton
 118 | chambered nautilus
 119 | Dungeness crab
 120 | rock crab
 121 | fiddler crab
 122 | king crab
 123 | American lobster
 124 | spiny lobster
 125 | crayfish
 126 | hermit crab
 127 | isopod
 128 | white stork
 129 | black stork
 130 | spoonbill
 131 | flamingo
 132 | little blue heron
 133 | American egret
 134 | bittern
 135 | crane
 136 | limpkin
 137 | European gallinule
 138 | American coot
 139 | bustard
 140 | ruddy turnstone
 141 | red-backed sandpiper
 142 | redshank
 143 | dowitcher
 144 | oystercatcher
 145 | pelican
 146 | king penguin
 147 | albatross
 148 | grey whale
 149 | killer whale
 150 | dugong
 151 | sea lion
 152 | Chihuahua
 153 | Japanese spaniel
 154 | Maltese dog
 155 | Pekinese
 156 | Shih-Tzu
 157 | Blenheim spaniel
 158 | papillon
 159 | toy terrier
 160 | Rhodesian ridgeback
 161 | Afghan hound
 162 | basset
 163 | beagle
 164 | bloodhound
 165 | bluetick
 166 | black-and-tan coonhound
 167 | Walker hound
 168 | English foxhound
 169 | redbone
 170 | borzoi
 171 | Irish wolfhound
 172 | Italian greyhound
 173 | whippet
 174 | Ibizan hound
 175 | Norwegian elkhound
 176 | otterhound
 177 | Saluki
 178 | Scottish deerhound
 179 | Weimaraner
 180 | Staffordshire bullterrier
 181 | American Staffordshire terrier
 182 | Bedlington terrier
 183 | Border terrier
 184 | Kerry blue terrier
 185 | Irish terrier
 186 | Norfolk terrier
 187 | Norwich terrier
 188 | Yorkshire terrier
 189 | wire-haired fox terrier
 190 | Lakeland terrier
 191 | Sealyham terrier
 192 | Airedale
 193 | cairn
 194 | Australian terrier
 195 | Dandie Dinmont
 196 | Boston bull
 197 | miniature schnauzer
 198 | giant schnauzer
 199 | standard schnauzer
 200 | Scotch terrier
 201 | Tibetan terrier
 202 | silky terrier
 203 | soft-coated wheaten terrier
 204 | West Highland white terrier
 205 | Lhasa
 206 | flat-coated retriever
 207 | curly-coated retriever
 208 | golden retriever
 209 | Labrador retriever
 210 | Chesapeake Bay retriever
 211 | German short-haired pointer
 212 | vizsla
 213 | English setter
 214 | Irish setter
 215 | Gordon setter
 216 | Brittany spaniel
 217 | clumber
 218 | English springer
 219 | Welsh springer spaniel
 220 | cocker spaniel
 221 | Sussex spaniel
 222 | Irish water spaniel
 223 | kuvasz
 224 | schipperke
 225 | groenendael
 226 | malinois
 227 | briard
 228 | kelpie
 229 | komondor
 230 | Old English sheepdog
 231 | Shetland sheepdog
 232 | collie
 233 | Border collie
 234 | Bouvier des Flandres
 235 | Rottweiler
 236 | German shepherd
 237 | Doberman
 238 | miniature pinscher
 239 | Greater Swiss Mountain dog
 240 | Bernese mountain dog
 241 | Appenzeller
 242 | EntleBucher
 243 | boxer
 244 | bull mastiff
 245 | Tibetan mastiff
 246 | French bulldog
 247 | Great Dane
 248 | Saint Bernard
 249 | Eskimo dog
 250 | malamute
 251 | Siberian husky
 252 | dalmatian
 253 | affenpinscher
 254 | basenji
 255 | pug
 256 | Leonberg
 257 | Newfoundland
 258 | Great Pyrenees
 259 | Samoyed
 260 | Pomeranian
 261 | chow
 262 | keeshond
 263 | Brabancon griffon
 264 | Pembroke
 265 | Cardigan
 266 | toy poodle
 267 | miniature poodle
 268 | standard poodle
 269 | Mexican hairless
 270 | timber wolf
 271 | white wolf
 272 | red wolf
 273 | coyote
 274 | dingo
 275 | dhole
 276 | African hunting dog
 277 | hyena
 278 | red fox
 279 | kit fox
 280 | Arctic fox
 281 | grey fox
 282 | tabby
 283 | tiger cat
 284 | Persian cat
 285 | Siamese cat
 286 | Egyptian cat
 287 | cougar
 288 | lynx
 289 | leopard
 290 | snow leopard
 291 | jaguar
 292 | lion
 293 | tiger
 294 | cheetah
 295 | brown bear
 296 | American black bear
 297 | ice bear
 298 | sloth bear
 299 | mongoose
 300 | meerkat
 301 | tiger beetle
 302 | ladybug
 303 | ground beetle
 304 | long-horned beetle
 305 | leaf beetle
 306 | dung beetle
 307 | rhinoceros beetle
 308 | weevil
 309 | fly
 310 | bee
 311 | ant
 312 | grasshopper
 313 | cricket
 314 | walking stick
 315 | cockroach
 316 | mantis
 317 | cicada
 318 | leafhopper
 319 | lacewing
 320 | dragonfly
 321 | damselfly
 322 | admiral
 323 | ringlet
 324 | monarch
 325 | cabbage butterfly
 326 | sulphur butterfly
 327 | lycaenid
 328 | starfish
 329 | sea urchin
 330 | sea cucumber
 331 | wood rabbit
 332 | hare
 333 | Angora
 334 | hamster
 335 | porcupine
 336 | fox squirrel
 337 | marmot
 338 | beaver
 339 | guinea pig
 340 | sorrel
 341 | zebra
 342 | hog
 343 | wild boar
 344 | warthog
 345 | hippopotamus
 346 | ox
 347 | water buffalo
 348 | bison
 349 | ram
 350 | bighorn
 351 | ibex
 352 | hartebeest
 353 | impala
 354 | gazelle
 355 | Arabian camel
 356 | llama
 357 | weasel
 358 | mink
 359 | polecat
 360 | black-footed ferret
 361 | otter
 362 | skunk
 363 | badger
 364 | armadillo
 365 | three-toed sloth
 366 | orangutan
 367 | gorilla
 368 | chimpanzee
 369 | gibbon
 370 | siamang
 371 | guenon
 372 | patas
 373 | baboon
 374 | macaque
 375 | langur
 376 | colobus
 377 | proboscis monkey
 378 | marmoset
 379 | capuchin
 380 | howler monkey
 381 | titi
 382 | spider monkey
 383 | squirrel monkey
 384 | Madagascar cat
 385 | indri
 386 | Indian elephant
 387 | African elephant
 388 | lesser panda
 389 | giant panda
 390 | barracouta
 391 | eel
 392 | coho
 393 | rock beauty
 394 | anemone fish
 395 | sturgeon
 396 | gar
 397 | lionfish
 398 | puffer
 399 | abacus
 400 | abaya
 401 | academic gown
 402 | accordion
 403 | acoustic guitar
 404 | aircraft carrier
 405 | airliner
 406 | airship
 407 | altar
 408 | ambulance
 409 | amphibian
 410 | analog clock
 411 | apiary
 412 | apron
 413 | ashcan
 414 | assault rifle
 415 | backpack
 416 | bakery
 417 | balance beam
 418 | balloon
 419 | ballpoint
 420 | Band Aid
 421 | banjo
 422 | bannister
 423 | barbell
 424 | barber chair
 425 | barbershop
 426 | barn
 427 | barometer
 428 | barrel
 429 | barrow
 430 | baseball
 431 | basketball
 432 | bassinet
 433 | bassoon
 434 | bathing cap
 435 | bath towel
 436 | bathtub
 437 | beach wagon
 438 | beacon
 439 | beaker
 440 | bearskin
 441 | beer bottle
 442 | beer glass
 443 | bell cote
 444 | bib
 445 | bicycle-built-for-two
 446 | bikini
 447 | binder
 448 | binoculars
 449 | birdhouse
 450 | boathouse
 451 | bobsled
 452 | bolo tie
 453 | bonnet
 454 | bookcase
 455 | bookshop
 456 | bottlecap
 457 | bow
 458 | bow tie
 459 | brass
 460 | brassiere
 461 | breakwater
 462 | breastplate
 463 | broom
 464 | bucket
 465 | buckle
 466 | bulletproof vest
 467 | bullet train
 468 | butcher shop
 469 | cab
 470 | caldron
 471 | candle
 472 | cannon
 473 | canoe
 474 | can opener
 475 | cardigan
 476 | car mirror
 477 | carousel
 478 | carpenter's kit
 479 | carton
 480 | car wheel
 481 | cash machine
 482 | cassette
 483 | cassette player
 484 | castle
 485 | catamaran
 486 | CD player
 487 | cello
 488 | cellular telephone
 489 | chain
 490 | chainlink fence
 491 | chain mail
 492 | chain saw
 493 | chest
 494 | chiffonier
 495 | chime
 496 | china cabinet
 497 | Christmas stocking
 498 | church
 499 | cinema
 500 | cleaver
 501 | cliff dwelling
 502 | cloak
 503 | clog
 504 | cocktail shaker
 505 | coffee mug
 506 | coffeepot
 507 | coil
 508 | combination lock
 509 | computer keyboard
 510 | confectionery
 511 | container ship
 512 | convertible
 513 | corkscrew
 514 | cornet
 515 | cowboy boot
 516 | cowboy hat
 517 | cradle
 518 | crane
 519 | crash helmet
 520 | crate
 521 | crib
 522 | Crock Pot
 523 | croquet ball
 524 | crutch
 525 | cuirass
 526 | dam
 527 | desk
 528 | desktop computer
 529 | dial telephone
 530 | diaper
 531 | digital clock
 532 | digital watch
 533 | dining table
 534 | dishrag
 535 | dishwasher
 536 | disk brake
 537 | dock
 538 | dogsled
 539 | dome
 540 | doormat
 541 | drilling platform
 542 | drum
 543 | drumstick
 544 | dumbbell
 545 | Dutch oven
 546 | electric fan
 547 | electric guitar
 548 | electric locomotive
 549 | entertainment center
 550 | envelope
 551 | espresso maker
 552 | face powder
 553 | feather boa
 554 | file
 555 | fireboat
 556 | fire engine
 557 | fire screen
 558 | flagpole
 559 | flute
 560 | folding chair
 561 | football helmet
 562 | forklift
 563 | fountain
 564 | fountain pen
 565 | four-poster
 566 | freight car
 567 | French horn
 568 | frying pan
 569 | fur coat
 570 | garbage truck
 571 | gasmask
 572 | gas pump
 573 | goblet
 574 | go-kart
 575 | golf ball
 576 | golfcart
 577 | gondola
 578 | gong
 579 | gown
 580 | grand piano
 581 | greenhouse
 582 | grille
 583 | grocery store
 584 | guillotine
 585 | hair slide
 586 | hair spray
 587 | half track
 588 | hammer
 589 | hamper
 590 | hand blower
 591 | hand-held computer
 592 | handkerchief
 593 | hard disc
 594 | harmonica
 595 | harp
 596 | harvester
 597 | hatchet
 598 | holster
 599 | home theater
 600 | honeycomb
 601 | hook
 602 | hoopskirt
 603 | horizontal bar
 604 | horse cart
 605 | hourglass
 606 | iPod
 607 | iron
 608 | jack-o'-lantern
 609 | jean
 610 | jeep
 611 | jersey
 612 | jigsaw puzzle
 613 | jinrikisha
 614 | joystick
 615 | kimono
 616 | knee pad
 617 | knot
 618 | lab coat
 619 | ladle
 620 | lampshade
 621 | laptop
 622 | lawn mower
 623 | lens cap
 624 | letter opener
 625 | library
 626 | lifeboat
 627 | lighter
 628 | limousine
 629 | liner
 630 | lipstick
 631 | Loafer
 632 | lotion
 633 | loudspeaker
 634 | loupe
 635 | lumbermill
 636 | magnetic compass
 637 | mailbag
 638 | mailbox
 639 | maillot
 640 | maillot
 641 | manhole cover
 642 | maraca
 643 | marimba
 644 | mask
 645 | matchstick
 646 | maypole
 647 | maze
 648 | measuring cup
 649 | medicine chest
 650 | megalith
 651 | microphone
 652 | microwave
 653 | military uniform
 654 | milk can
 655 | minibus
 656 | miniskirt
 657 | minivan
 658 | missile
 659 | mitten
 660 | mixing bowl
 661 | mobile home
 662 | Model T
 663 | modem
 664 | monastery
 665 | monitor
 666 | moped
 667 | mortar
 668 | mortarboard
 669 | mosque
 670 | mosquito net
 671 | motor scooter
 672 | mountain bike
 673 | mountain tent
 674 | mouse
 675 | mousetrap
 676 | moving van
 677 | muzzle
 678 | nail
 679 | neck brace
 680 | necklace
 681 | nipple
 682 | notebook
 683 | obelisk
 684 | oboe
 685 | ocarina
 686 | odometer
 687 | oil filter
 688 | organ
 689 | oscilloscope
 690 | overskirt
 691 | oxcart
 692 | oxygen mask
 693 | packet
 694 | paddle
 695 | paddlewheel
 696 | padlock
 697 | paintbrush
 698 | pajama
 699 | palace
 700 | panpipe
 701 | paper towel
 702 | parachute
 703 | parallel bars
 704 | park bench
 705 | parking meter
 706 | passenger car
 707 | patio
 708 | pay-phone
 709 | pedestal
 710 | pencil box
 711 | pencil sharpener
 712 | perfume
 713 | Petri dish
 714 | photocopier
 715 | pick
 716 | pickelhaube
 717 | picket fence
 718 | pickup
 719 | pier
 720 | piggy bank
 721 | pill bottle
 722 | pillow
 723 | ping-pong ball
 724 | pinwheel
 725 | pirate
 726 | pitcher
 727 | plane
 728 | planetarium
 729 | plastic bag
 730 | plate rack
 731 | plow
 732 | plunger
 733 | Polaroid camera
 734 | pole
 735 | police van
 736 | poncho
 737 | pool table
 738 | pop bottle
 739 | pot
 740 | potter's wheel
 741 | power drill
 742 | prayer rug
 743 | printer
 744 | prison
 745 | projectile
 746 | projector
 747 | puck
 748 | punching bag
 749 | purse
 750 | quill
 751 | quilt
 752 | racer
 753 | racket
 754 | radiator
 755 | radio
 756 | radio telescope
 757 | rain barrel
 758 | recreational vehicle
 759 | reel
 760 | reflex camera
 761 | refrigerator
 762 | remote control
 763 | restaurant
 764 | revolver
 765 | rifle
 766 | rocking chair
 767 | rotisserie
 768 | rubber eraser
 769 | rugby ball
 770 | rule
 771 | running shoe
 772 | safe
 773 | safety pin
 774 | saltshaker
 775 | sandal
 776 | sarong
 777 | sax
 778 | scabbard
 779 | scale
 780 | school bus
 781 | schooner
 782 | scoreboard
 783 | screen
 784 | screw
 785 | screwdriver
 786 | seat belt
 787 | sewing machine
 788 | shield
 789 | shoe shop
 790 | shoji
 791 | shopping basket
 792 | shopping cart
 793 | shovel
 794 | shower cap
 795 | shower curtain
 796 | ski
 797 | ski mask
 798 | sleeping bag
 799 | slide rule
 800 | sliding door
 801 | slot
 802 | snorkel
 803 | snowmobile
 804 | snowplow
 805 | soap dispenser
 806 | soccer ball
 807 | sock
 808 | solar dish
 809 | sombrero
 810 | soup bowl
 811 | space bar
 812 | space heater
 813 | space shuttle
 814 | spatula
 815 | speedboat
 816 | spider web
 817 | spindle
 818 | sports car
 819 | spotlight
 820 | stage
 821 | steam locomotive
 822 | steel arch bridge
 823 | steel drum
 824 | stethoscope
 825 | stole
 826 | stone wall
 827 | stopwatch
 828 | stove
 829 | strainer
 830 | streetcar
 831 | stretcher
 832 | studio couch
 833 | stupa
 834 | submarine
 835 | suit
 836 | sundial
 837 | sunglass
 838 | sunglasses
 839 | sunscreen
 840 | suspension bridge
 841 | swab
 842 | sweatshirt
 843 | swimming trunks
 844 | swing
 845 | switch
 846 | syringe
 847 | table lamp
 848 | tank
 849 | tape player
 850 | teapot
 851 | teddy
 852 | television
 853 | tennis ball
 854 | thatch
 855 | theater curtain
 856 | thimble
 857 | thresher
 858 | throne
 859 | tile roof
 860 | toaster
 861 | tobacco shop
 862 | toilet seat
 863 | torch
 864 | totem pole
 865 | tow truck
 866 | toyshop
 867 | tractor
 868 | trailer truck
 869 | tray
 870 | trench coat
 871 | tricycle
 872 | trimaran
 873 | tripod
 874 | triumphal arch
 875 | trolleybus
 876 | trombone
 877 | tub
 878 | turnstile
 879 | typewriter keyboard
 880 | umbrella
 881 | unicycle
 882 | upright
 883 | vacuum
 884 | vase
 885 | vault
 886 | velvet
 887 | vending machine
 888 | vestment
 889 | viaduct
 890 | violin
 891 | volleyball
 892 | waffle iron
 893 | wall clock
 894 | wallet
 895 | wardrobe
 896 | warplane
 897 | washbasin
 898 | washer
 899 | water bottle
 900 | water jug
 901 | water tower
 902 | whiskey jug
 903 | whistle
 904 | wig
 905 | window screen
 906 | window shade
 907 | Windsor tie
 908 | wine bottle
 909 | wing
 910 | wok
 911 | wooden spoon
 912 | wool
 913 | worm fence
 914 | wreck
 915 | yawl
 916 | yurt
 917 | web site
 918 | comic book
 919 | crossword puzzle
 920 | street sign
 921 | traffic light
 922 | book jacket
 923 | menu
 924 | plate
 925 | guacamole
 926 | consomme
 927 | hot pot
 928 | trifle
 929 | ice cream
 930 | ice lolly
 931 | French loaf
 932 | bagel
 933 | pretzel
 934 | cheeseburger
 935 | hotdog
 936 | mashed potato
 937 | head cabbage
 938 | broccoli
 939 | cauliflower
 940 | zucchini
 941 | spaghetti squash
 942 | acorn squash
 943 | butternut squash
 944 | cucumber
 945 | artichoke
 946 | bell pepper
 947 | cardoon
 948 | mushroom
 949 | Granny Smith
 950 | strawberry
 951 | orange
 952 | lemon
 953 | fig
 954 | pineapple
 955 | banana
 956 | jackfruit
 957 | custard apple
 958 | pomegranate
 959 | hay
 960 | carbonara
 961 | chocolate sauce
 962 | dough
 963 | meat loaf
 964 | pizza
 965 | potpie
 966 | burrito
 967 | red wine
 968 | espresso
 969 | cup
 970 | eggnog
 971 | alp
 972 | bubble
 973 | cliff
 974 | coral reef
 975 | geyser
 976 | lakeside
 977 | promontory
 978 | sandbar
 979 | seashore
 980 | valley
 981 | volcano
 982 | ballplayer
 983 | groom
 984 | scuba diver
 985 | rapeseed
 986 | daisy
 987 | yellow lady's slipper
 988 | corn
 989 | acorn
 990 | hip
 991 | buckeye
 992 | coral fungus
 993 | agaric
 994 | gyromitra
 995 | stinkhorn
 996 | earthstar
 997 | hen-of-the-woods
 998 | bolete
 999 | ear
1000 | toilet tissue


--------------------------------------------------------------------------------
/api/main.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Adapted from:
 3 | 
 4 | (1) https://github.com/shanesoh/deploy-ml-fastapi-redis-docker/
 5 | (2) https://github.com/aniketmaurya/tensorflow-fastapi-starter-pack
 6 | """
 7 | 
 8 | import json
 9 | import urllib.request
10 | 
11 | import onnxruntime as ort
12 | from fastapi import FastAPI, File, Form, HTTPException
13 | 
14 | from utils import decode_predictions, get_latest_model_url, prepare_image
15 | 
16 | app = FastAPI(title="ONNX image classification API")
17 | 
18 | MODEL_FN = "resnet50_w_preprocessing.onnx"
19 | DEFAULT_MODEL_URL = f"https://github.com/sayakpaul/ml-deployment-k8s-fastapi/releases/download/v1.0.0/{MODEL_FN}"
20 | 
21 | 
22 | @app.get("/")
23 | async def home():
24 |     return "Welcome!"
25 | 
26 | 
27 | @app.on_event("startup")
28 | def load_modules():
29 |     model_url = get_latest_model_url()
30 | 
31 |     # If there's no latest ONNX model released fall back to the default model.
32 |     if model_url is not None:
33 |         urllib.request.urlretrieve(model_url, MODEL_FN)
34 |     else:
35 |         urllib.request.urlretrieve(DEFAULT_MODEL_URL, MODEL_FN)
36 | 
37 |     global resnet_model_sess
38 |     resnet_model_sess = ort.InferenceSession(MODEL_FN)
39 | 
40 |     category_filename = "imagenet_classes.txt"
41 |     category_url = f"https://raw.githubusercontent.com/pytorch/hub/master/{category_filename}"
42 |     urllib.request.urlretrieve(category_url, category_filename)
43 | 
44 |     global imagenet_categories
45 |     with open(category_filename, "r") as f:
46 |         imagenet_categories = [s.strip() for s in f.readlines()]
47 | 
48 | 
49 | @app.post("/predict/image")
50 | async def predict_api(
51 |     image_file: bytes = File(...),
52 |     with_resize: bool = Form(...),
53 |     with_post_process: bool = Form(...),
54 | ):
55 |     image = prepare_image(image_file, with_resize)
56 | 
57 |     if len(image.shape) != 4:
58 |         raise HTTPException(
59 |             status_code=400, detail="Only 3-channel RGB images are supported."
60 |         )
61 | 
62 |     predictions = resnet_model_sess.run(None, {"image_input": image})[0]
63 |     if with_post_process:
64 |         response_dict = decode_predictions(predictions, imagenet_categories)
65 |         return json.dumps(response_dict)
66 |     else:
67 |         return "OK"
68 | 


--------------------------------------------------------------------------------
/api/requirements.txt:
--------------------------------------------------------------------------------
1 | numpy==1.22.2
2 | uvicorn[standard]==0.17.5
3 | Pillow==9.0.1
4 | onnxruntime==1.10.0
5 | fastapi==0.74.0
6 | python-multipart==0.0.5
7 | pydantic==1.9.0
8 | PyGithub[integrations]==1.55


--------------------------------------------------------------------------------
/api/utils/__init__.py:
--------------------------------------------------------------------------------
1 | from .utils import decode_predictions, get_latest_model_url, prepare_image
2 | 


--------------------------------------------------------------------------------
/api/utils/utils.py:
--------------------------------------------------------------------------------
 1 | import io
 2 | import json
 3 | from typing import Dict, List
 4 | 
 5 | import numpy as np
 6 | import requests
 7 | from fastapi import HTTPException
 8 | from github import Github
 9 | from PIL import Image
10 | 
11 | TARGET_IMG_WIDTH = 224
12 | TARGET_IMG_HEIGHT = 224
13 | 
14 | 
15 | def get_latest_model_url() -> str:
16 |     """Gets the model download URL from the latest release artifacts."""
17 |     g = Github()
18 | 
19 |     repo = g.get_repo("sayakpaul/ml-deployment-k8s-fastapi")
20 |     latest_release = repo.get_latest_release()
21 |     assets = list(latest_release.get_assets())
22 | 
23 |     download_url = None
24 | 
25 |     for asset in assets:
26 |         if "onnx" in asset.name:
27 |             asset_url = asset.url
28 |             r = requests.get(asset_url)
29 |             response = json.loads(r.text)
30 |             download_url = response["browser_download_url"]
31 | 
32 |     return download_url
33 | 
34 | 
35 | def raise_http_exception(msg):
36 |     """Raise HTTPException with the status code 400"""
37 |     raise HTTPException(status_code=400, detail=msg)
38 | 
39 | 
40 | def prepare_image(image_file: bytes, with_resizing: bool = False) -> np.ndarray:
41 |     """Prepares an image for model prediction."""
42 |     image = Image.open(io.BytesIO(image_file))
43 |     width, height = image.size
44 | 
45 |     if image.format not in ["JPEG", "JPG", "PNG"]:
46 |         raise_http_exception("Supported formats are JPEG, JPG, and PNG.")
47 | 
48 |     if with_resizing:
49 |         image = image.resize((TARGET_IMG_WIDTH, TARGET_IMG_HEIGHT))
50 |     else:
51 |         if width is not TARGET_IMG_WIDTH or height is not TARGET_IMG_HEIGHT:
52 |             raise_http_exception("Image size is not 224x224")
53 | 
54 |     image = np.array(image).astype("float32")
55 |     return np.expand_dims(image, 0)
56 | 
57 | 
58 | def decode_predictions(
59 |     predictions: np.ndarray, imagenet_categories: List[str]
60 | ) -> Dict[str, float]:
61 |     """Decodes model predictions."""
62 |     predictions = np.squeeze(predictions)
63 |     pred_name = imagenet_categories[int(predictions.argmax())]
64 |     response_dict = {"Label": pred_name, "Score": f"{predictions.max():.3f}"}
65 | 
66 |     return response_dict
67 | 


--------------------------------------------------------------------------------
/locust/README.md:
--------------------------------------------------------------------------------
 1 | # Load Test with Locust
 2 | 
 3 | This directory contains a Locust script for load testing. 
 4 | 
 5 | ## How to setup
 6 | 
 7 | 1. Installation
 8 | 
 9 | ```python
10 | pip3 install locust
11 | ```
12 | 
13 | 2. Run 
14 | 
15 | ```bash
16 | # with UI 
17 | $ locust 
18 | 
19 | OR
20 | 
21 | $ locust --users NUM_OF_USERS \
22 |          --spawn-rate SPAWN_RATE \ 
23 |          --host HOST_ADDRESS
24 | 
25 | # without UI & manual config
26 | # the report will be generated to report.html
27 | $ locust --headless \ 
28 |          --users NUM_OF_USERS \
29 |          --spawn-rate SPAWN_RATE \ 
30 |          --host HOST_ADDRESS \ 
31 |          --html report.html
32 | 
33 | # without UI & auto config
34 | $ locust --config=load_test.conf
35 | ```
36 | 
37 | ## Notes
38 | 
39 | * We used an `n1-standard` VM (4vCPU + 16GB RAM) on GCP in `us-central1` region 
40 | since the nodes on GKE are also located there. 
41 | * Before running the load-test, don't forget to replace `<<GKE Service IP>>` with the endpoint of 
42 | your API in the `load_test.conf`.
43 | * We prepare a resized image beforehand whose size is 224x224 (`cat_224x224.jpg`).
44 | This is because we only focus on load testing on the server side thereby minimizing
45 | the time for pre and post processing as much as possible.
46 | 


--------------------------------------------------------------------------------
/locust/cat.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sayakpaul/ml-deployment-k8s-fastapi/55b690520b943bf39dbb02a7f28fc1aba781e945/locust/cat.jpg


--------------------------------------------------------------------------------
/locust/cat_224x224.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sayakpaul/ml-deployment-k8s-fastapi/55b690520b943bf39dbb02a7f28fc1aba781e945/locust/cat_224x224.jpg


--------------------------------------------------------------------------------
/locust/load_test.conf:
--------------------------------------------------------------------------------
1 | locustfile = locust.py
2 | headless = false
3 | users = 150
4 | spawn-rate = 1
5 | run-time = 5m
6 | host = http://<<GKE Service IP>>
7 | html = reports/locust_report.html
8 | csv = reports/locust_report


--------------------------------------------------------------------------------
/locust/locust.py:
--------------------------------------------------------------------------------
 1 | from locust import HttpUser, constant, task
 2 | 
 3 | 
 4 | class ImgClssificationUser(HttpUser):
 5 |     wait_time = constant(1)
 6 | 
 7 |     @task
 8 |     def predict(self):
 9 |         attach = open("cat_224x224.jpg", "rb")
10 |         payload = {"with_resize": False, "with_post_process": False}
11 |         _ = self.client.post(
12 |             "/predict/image", files={"image_file": attach}, data=payload
13 |         )
14 | 


--------------------------------------------------------------------------------
/notebooks/TF_Serving.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |   "cells": [
  3 |     {
  4 |       "cell_type": "markdown",
  5 |       "metadata": {
  6 |         "id": "6FvzT_HQXz9J"
  7 |       },
  8 |       "source": [
  9 |         "# Verifying workable TF Serving\n",
 10 |         "\n",
 11 |         "This tutorial shows:\n",
 12 |         "- how to run TF Serving for a custom model in Docker container\n",
 13 |         "- how to request for predictions via both gRPC and RestAPI calls\n",
 14 |         "- the prediction timing result from TF Serving\n",
 15 |         "\n",
 16 |         "This notebook is written by referencing the [official TF Serving gRPC example](https://github.com/tensorflow/serving/blob/master/tensorflow_serving/example/resnet_k8s.yaml) and [official TF Serving RestAPI example](https://www.tensorflow.org/tfx/tutorials/serving/rest_simple)."
 17 |       ]
 18 |     },
 19 |     {
 20 |       "cell_type": "markdown",
 21 |       "metadata": {
 22 |         "id": "Com8Mcu2Xz9L"
 23 |       },
 24 |       "source": [
 25 |         "### Imports"
 26 |       ]
 27 |     },
 28 |     {
 29 |       "cell_type": "code",
 30 |       "execution_count": null,
 31 |       "metadata": {
 32 |         "id": "b-aGIWy8c2Ht"
 33 |       },
 34 |       "outputs": [],
 35 |       "source": [
 36 |         "!pip install -q requests\n",
 37 |         "!pip install -q tensorflow-serving-api"
 38 |       ]
 39 |     },
 40 |     {
 41 |       "cell_type": "code",
 42 |       "execution_count": 2,
 43 |       "metadata": {
 44 |         "id": "6lQVylcMXz9N"
 45 |       },
 46 |       "outputs": [],
 47 |       "source": [
 48 |         "import os\n",
 49 |         "import tempfile\n",
 50 |         "import pandas as pd\n",
 51 |         "import tensorflow as tf\n",
 52 |         "import numpy as np\n",
 53 |         "import json\n",
 54 |         "import requests\n",
 55 |         "\n",
 56 |         "# gRPC request specific imports\n",
 57 |         "import grpc\n",
 58 |         "from tensorflow_serving.apis import predict_pb2\n",
 59 |         "from tensorflow_serving.apis import prediction_service_pb2_grpc"
 60 |       ]
 61 |     },
 62 |     {
 63 |       "cell_type": "markdown",
 64 |       "metadata": {
 65 |         "id": "GoIj2728pLyw"
 66 |       },
 67 |       "source": [
 68 |         "## Model"
 69 |       ]
 70 |     },
 71 |     {
 72 |       "cell_type": "markdown",
 73 |       "metadata": {
 74 |         "id": "3xmYCIWpXz9N"
 75 |       },
 76 |       "source": [
 77 |         "### Get a sample model \n",
 78 |         "\n",
 79 |         "The target model is the plain `ResNet50` trained on ImageNet."
 80 |       ]
 81 |     },
 82 |     {
 83 |       "cell_type": "code",
 84 |       "execution_count": 3,
 85 |       "metadata": {
 86 |         "colab": {
 87 |           "base_uri": "https://localhost:8080/"
 88 |         },
 89 |         "id": "VysQtJQnXz9O",
 90 |         "outputId": "c63abf81-65e7-48c9-9e71-d16108da2d2a"
 91 |       },
 92 |       "outputs": [
 93 |         {
 94 |           "name": "stdout",
 95 |           "output_type": "stream",
 96 |           "text": [
 97 |             "Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/resnet/resnet50_weights_tf_dim_ordering_tf_kernels.h5\n",
 98 |             "102973440/102967424 [==============================] - 2s 0us/step\n",
 99 |             "102981632/102967424 [==============================] - 2s 0us/step\n"
100 |           ]
101 |         }
102 |       ],
103 |       "source": [
104 |         "core = tf.keras.applications.ResNet50(include_top=True, input_shape=(224, 224, 3))\n",
105 |         "\n",
106 |         "inputs = tf.keras.layers.Input(shape=(224, 224, 3), name=\"image_input\")\n",
107 |         "preprocess = tf.keras.applications.resnet50.preprocess_input(inputs)\n",
108 |         "outputs = core(preprocess, training=False)\n",
109 |         "model = tf.keras.Model(inputs=[inputs], outputs=[outputs])"
110 |       ]
111 |     },
112 |     {
113 |       "cell_type": "markdown",
114 |       "metadata": {
115 |         "id": "p3bC--0GXz9O"
116 |       },
117 |       "source": [
118 |         "### Save the model\n",
119 |         "\n",
120 |         "Below code saves the model under `MODEL_DIR`."
121 |       ]
122 |     },
123 |     {
124 |       "cell_type": "code",
125 |       "execution_count": 4,
126 |       "metadata": {
127 |         "colab": {
128 |           "base_uri": "https://localhost:8080/"
129 |         },
130 |         "id": "z9AmyovhXz9O",
131 |         "outputId": "c26eadf0-e06e-45e4-a40d-e00b5343154e"
132 |       },
133 |       "outputs": [
134 |         {
135 |           "name": "stdout",
136 |           "output_type": "stream",
137 |           "text": [
138 |             "export_path = /tmp/1\n",
139 |             "\n",
140 |             "WARNING:tensorflow:Compiled the loaded model, but the compiled metrics have yet to be built. `model.compile_metrics` will be empty until you train or evaluate the model.\n",
141 |             "INFO:tensorflow:Assets written to: /tmp/1/assets\n",
142 |             "\n",
143 |             "Saved model:\n",
144 |             "total 4040\n",
145 |             "drwxr-xr-x 2 root root    4096 Mar 23 07:32 assets\n",
146 |             "-rw-r--r-- 1 root root  557217 Mar 23 07:32 keras_metadata.pb\n",
147 |             "-rw-r--r-- 1 root root 3565545 Mar 23 07:32 saved_model.pb\n",
148 |             "drwxr-xr-x 2 root root    4096 Mar 23 07:32 variables\n"
149 |           ]
150 |         }
151 |       ],
152 |       "source": [
153 |         "MODEL_DIR = tempfile.gettempdir()\n",
154 |         "version = 1\n",
155 |         "export_path = os.path.join(MODEL_DIR, str(version))\n",
156 |         "print('export_path = {}\\n'.format(export_path))\n",
157 |         "\n",
158 |         "tf.keras.models.save_model(\n",
159 |         "    model,\n",
160 |         "    export_path,\n",
161 |         "    overwrite=True,\n",
162 |         "    include_optimizer=True,\n",
163 |         "    save_format=None,\n",
164 |         "    signatures=None,\n",
165 |         "    options=None\n",
166 |         ")\n",
167 |         "\n",
168 |         "print('\\nSaved model:')\n",
169 |         "!ls -l {export_path}"
170 |       ]
171 |     },
172 |     {
173 |       "cell_type": "markdown",
174 |       "metadata": {
175 |         "id": "VV7onOD2Xz9P"
176 |       },
177 |       "source": [
178 |         "### Examine your saved model"
179 |       ]
180 |     },
181 |     {
182 |       "cell_type": "markdown",
183 |       "metadata": {
184 |         "id": "baanYnt8ohM7"
185 |       },
186 |       "source": [
187 |         "TensorFlow comes with a handy `saved_model_cli` tool to investigate saved model.\n",
188 |         "\n",
189 |         "Notice from `signature_def['serving_default']:` \n",
190 |         "- the input name is `image_input`\n",
191 |         "- the output name is `resnet50`\n",
192 |         "\n",
193 |         "You need to know these to make requests to the TF Serving server later"
194 |       ]
195 |     },
196 |     {
197 |       "cell_type": "code",
198 |       "execution_count": 5,
199 |       "metadata": {
200 |         "colab": {
201 |           "base_uri": "https://localhost:8080/"
202 |         },
203 |         "id": "Lgzz06XoXz9Q",
204 |         "outputId": "c51a85f9-c6bf-4e7e-f710-2a572fde45d6"
205 |       },
206 |       "outputs": [
207 |         {
208 |           "name": "stdout",
209 |           "output_type": "stream",
210 |           "text": [
211 |             "\n",
212 |             "MetaGraphDef with tag-set: 'serve' contains the following SignatureDefs:\n",
213 |             "\n",
214 |             "signature_def['__saved_model_init_op']:\n",
215 |             "  The given SavedModel SignatureDef contains the following input(s):\n",
216 |             "  The given SavedModel SignatureDef contains the following output(s):\n",
217 |             "    outputs['__saved_model_init_op'] tensor_info:\n",
218 |             "        dtype: DT_INVALID\n",
219 |             "        shape: unknown_rank\n",
220 |             "        name: NoOp\n",
221 |             "  Method name is: \n",
222 |             "\n",
223 |             "signature_def['serving_default']:\n",
224 |             "  The given SavedModel SignatureDef contains the following input(s):\n",
225 |             "    inputs['image_input'] tensor_info:\n",
226 |             "        dtype: DT_FLOAT\n",
227 |             "        shape: (-1, 224, 224, 3)\n",
228 |             "        name: serving_default_image_input:0\n",
229 |             "  The given SavedModel SignatureDef contains the following output(s):\n",
230 |             "    outputs['resnet50'] tensor_info:\n",
231 |             "        dtype: DT_FLOAT\n",
232 |             "        shape: (-1, 1000)\n",
233 |             "        name: StatefulPartitionedCall:0\n",
234 |             "  Method name is: tensorflow/serving/predict\n",
235 |             "\n",
236 |             "Concrete Functions:\n",
237 |             "  Function Name: '__call__'\n",
238 |             "    Option #1\n",
239 |             "      Callable with:\n",
240 |             "        Argument #1\n",
241 |             "          inputs: TensorSpec(shape=(None, 224, 224, 3), dtype=tf.float32, name='inputs')\n",
242 |             "        Argument #2\n",
243 |             "          DType: bool\n",
244 |             "          Value: False\n",
245 |             "        Argument #3\n",
246 |             "          DType: NoneType\n",
247 |             "          Value: None\n",
248 |             "    Option #2\n",
249 |             "      Callable with:\n",
250 |             "        Argument #1\n",
251 |             "          image_input: TensorSpec(shape=(None, 224, 224, 3), dtype=tf.float32, name='image_input')\n",
252 |             "        Argument #2\n",
253 |             "          DType: bool\n",
254 |             "          Value: False\n",
255 |             "        Argument #3\n",
256 |             "          DType: NoneType\n",
257 |             "          Value: None\n",
258 |             "    Option #3\n",
259 |             "      Callable with:\n",
260 |             "        Argument #1\n",
261 |             "          inputs: TensorSpec(shape=(None, 224, 224, 3), dtype=tf.float32, name='inputs')\n",
262 |             "        Argument #2\n",
263 |             "          DType: bool\n",
264 |             "          Value: True\n",
265 |             "        Argument #3\n",
266 |             "          DType: NoneType\n",
267 |             "          Value: None\n",
268 |             "    Option #4\n",
269 |             "      Callable with:\n",
270 |             "        Argument #1\n",
271 |             "          image_input: TensorSpec(shape=(None, 224, 224, 3), dtype=tf.float32, name='image_input')\n",
272 |             "        Argument #2\n",
273 |             "          DType: bool\n",
274 |             "          Value: True\n",
275 |             "        Argument #3\n",
276 |             "          DType: NoneType\n",
277 |             "          Value: None\n",
278 |             "\n",
279 |             "  Function Name: '_default_save_signature'\n",
280 |             "    Option #1\n",
281 |             "      Callable with:\n",
282 |             "        Argument #1\n",
283 |             "          image_input: TensorSpec(shape=(None, 224, 224, 3), dtype=tf.float32, name='image_input')\n",
284 |             "\n",
285 |             "  Function Name: 'call_and_return_all_conditional_losses'\n",
286 |             "    Option #1\n",
287 |             "      Callable with:\n",
288 |             "        Argument #1\n",
289 |             "          image_input: TensorSpec(shape=(None, 224, 224, 3), dtype=tf.float32, name='image_input')\n",
290 |             "        Argument #2\n",
291 |             "          DType: bool\n",
292 |             "          Value: True\n",
293 |             "        Argument #3\n",
294 |             "          DType: NoneType\n",
295 |             "          Value: None\n",
296 |             "    Option #2\n",
297 |             "      Callable with:\n",
298 |             "        Argument #1\n",
299 |             "          image_input: TensorSpec(shape=(None, 224, 224, 3), dtype=tf.float32, name='image_input')\n",
300 |             "        Argument #2\n",
301 |             "          DType: bool\n",
302 |             "          Value: False\n",
303 |             "        Argument #3\n",
304 |             "          DType: NoneType\n",
305 |             "          Value: None\n",
306 |             "    Option #3\n",
307 |             "      Callable with:\n",
308 |             "        Argument #1\n",
309 |             "          inputs: TensorSpec(shape=(None, 224, 224, 3), dtype=tf.float32, name='inputs')\n",
310 |             "        Argument #2\n",
311 |             "          DType: bool\n",
312 |             "          Value: True\n",
313 |             "        Argument #3\n",
314 |             "          DType: NoneType\n",
315 |             "          Value: None\n",
316 |             "    Option #4\n",
317 |             "      Callable with:\n",
318 |             "        Argument #1\n",
319 |             "          inputs: TensorSpec(shape=(None, 224, 224, 3), dtype=tf.float32, name='inputs')\n",
320 |             "        Argument #2\n",
321 |             "          DType: bool\n",
322 |             "          Value: False\n",
323 |             "        Argument #3\n",
324 |             "          DType: NoneType\n",
325 |             "          Value: None\n"
326 |           ]
327 |         }
328 |       ],
329 |       "source": [
330 |         "!saved_model_cli show --dir {export_path} --all"
331 |       ]
332 |     },
333 |     {
334 |       "cell_type": "markdown",
335 |       "metadata": {
336 |         "id": "5NBTdC7jXz9Q"
337 |       },
338 |       "source": [
339 |         "## TF Serving"
340 |       ]
341 |     },
342 |     {
343 |       "cell_type": "markdown",
344 |       "metadata": {
345 |         "id": "g4G_oWb_plDI"
346 |       },
347 |       "source": [
348 |         "### Create dummy data\n",
349 |         "\n",
350 |         "The dummy data is nothing but just contains random numbers in the batch size of 32."
351 |       ]
352 |     },
353 |     {
354 |       "cell_type": "code",
355 |       "execution_count": 6,
356 |       "metadata": {
357 |         "colab": {
358 |           "base_uri": "https://localhost:8080/"
359 |         },
360 |         "id": "loMcfbfTXz9S",
361 |         "outputId": "0b18aed6-af3b-4e06-b5ae-e336305b0d5e"
362 |       },
363 |       "outputs": [
364 |         {
365 |           "data": {
366 |             "text/plain": [
367 |               "TensorShape([32, 224, 224, 3])"
368 |             ]
369 |           },
370 |           "execution_count": 6,
371 |           "metadata": {},
372 |           "output_type": "execute_result"
373 |         }
374 |       ],
375 |       "source": [
376 |         "dummy_inputs = tf.random.normal((32, 224, 224, 3))\n",
377 |         "dummy_inputs.shape"
378 |       ]
379 |     },
380 |     {
381 |       "cell_type": "markdown",
382 |       "metadata": {
383 |         "id": "ZBxKquAwpCEh"
384 |       },
385 |       "source": [
386 |         "### Install TF Serving tool"
387 |       ]
388 |     },
389 |     {
390 |       "cell_type": "code",
391 |       "execution_count": null,
392 |       "metadata": {
393 |         "id": "mDVz8VnnXz9Q"
394 |       },
395 |       "outputs": [],
396 |       "source": [
397 |         "!echo \"deb http://storage.googleapis.com/tensorflow-serving-apt stable tensorflow-model-server tensorflow-model-server-universal\" | sudo tee /etc/apt/sources.list.d/tensorflow-serving.list && \\\n",
398 |         "curl https://storage.googleapis.com/tensorflow-serving-apt/tensorflow-serving.release.pub.gpg | sudo apt-key add -\n",
399 |         "!sudo apt update"
400 |       ]
401 |     },
402 |     {
403 |       "cell_type": "code",
404 |       "execution_count": null,
405 |       "metadata": {
406 |         "id": "u2HQb4q6sonS"
407 |       },
408 |       "outputs": [],
409 |       "source": [
410 |         "!sudo apt-get install tensorflow-model-server"
411 |       ]
412 |     },
413 |     {
414 |       "cell_type": "markdown",
415 |       "metadata": {
416 |         "id": "jn9oNYM7pYcv"
417 |       },
418 |       "source": [
419 |         "### Run TF Serving server"
420 |       ]
421 |     },
422 |     {
423 |       "cell_type": "code",
424 |       "execution_count": 26,
425 |       "metadata": {
426 |         "id": "MH-RScSvXz9R"
427 |       },
428 |       "outputs": [],
429 |       "source": [
430 |         "os.environ[\"MODEL_DIR\"] = MODEL_DIR"
431 |       ]
432 |     },
433 |     {
434 |       "cell_type": "markdown",
435 |       "metadata": {},
436 |       "source": [
437 |         "`saved_model_cli` CLI accepts a set of options.\n",
438 |         "- `--rest_api_port` exposes additional port for RestAPI. By default `8500` is exposed as gRPC.\n",
439 |         "- `--model_name` lets TF Serving to identify which model to access. You can visually see this in the RestAPI's URI.\n",
440 |         "- `--enable_model_warmup` \n",
441 |         "  - The TensorFlow runtime has components that are lazily initialized, which can cause high latency for the first request/s sent to a model after it is loaded. To reduce the impact of lazy initialization on request latency, it's possible to trigger the initialization of the sub-systems and components at model load time by providing a sample set of inference requests along with the SavedModel. This process is known as \"warming up\" the model.\n",
442 |         "  - To trigger warmup of the model at load time, attach a warmup data file under the assets.extra subfolder of the SavedModel directory.\n",
443 |         "  - `--enable_model_warmup` option triggers this process.\n",
444 |         "  - for further information, please look at the [official document](https://www.tensorflow.org/tfx/serving/saved_model_warmup?hl=en)"
445 |       ]
446 |     },
447 |     {
448 |       "cell_type": "code",
449 |       "execution_count": null,
450 |       "metadata": {
451 |         "id": "Mq4t5ozVXz9R"
452 |       },
453 |       "outputs": [],
454 |       "source": [
455 |         "!nohup tensorflow_model_server \\\n",
456 |         "  --rest_api_port=8501 \\\n",
457 |         "  --model_name=resnet_model \\\n",
458 |         "  --model_base_path=$MODEL_DIR >server.log 2>&1 &\n",
459 |         "\n",
460 |         "# --enable_model_warmup for warmup(https://www.tensorflow.org/tfx/serving/saved_model_warmup)"
461 |       ]
462 |     },
463 |     {
464 |       "cell_type": "code",
465 |       "execution_count": 28,
466 |       "metadata": {
467 |         "id": "PVhTO53jXz9S"
468 |       },
469 |       "outputs": [],
470 |       "source": [
471 |         "!cat server.log"
472 |       ]
473 |     },
474 |     {
475 |       "cell_type": "markdown",
476 |       "metadata": {
477 |         "id": "ea6V73oXzs3U"
478 |       },
479 |       "source": [
480 |         "Notice that two ports are exposed for listening both RestAPI(`8501`) and gRPC(`8500`)."
481 |       ]
482 |     },
483 |     {
484 |       "cell_type": "code",
485 |       "execution_count": 29,
486 |       "metadata": {
487 |         "colab": {
488 |           "base_uri": "https://localhost:8080/"
489 |         },
490 |         "id": "KumZ3xB4giEa",
491 |         "outputId": "4302ee0a-994f-485a-c99b-4fa394068d64"
492 |       },
493 |       "outputs": [
494 |         {
495 |           "name": "stdout",
496 |           "output_type": "stream",
497 |           "text": [
498 |             "node         7 root   21u  IPv6  25789      0t0  TCP *:8080 (LISTEN)\n",
499 |             "colab-fil   30 root    5u  IPv4  26644      0t0  TCP *:3453 (LISTEN)\n",
500 |             "colab-fil   30 root    6u  IPv6  26645      0t0  TCP *:3453 (LISTEN)\n",
501 |             "jupyter-n   43 root    6u  IPv4  25864      0t0  TCP 172.28.0.2:9000 (LISTEN)\n",
502 |             "python3     61 root   15u  IPv4  27814      0t0  TCP 127.0.0.1:50215 (LISTEN)\n",
503 |             "python3     61 root   18u  IPv4  27818      0t0  TCP 127.0.0.1:54779 (LISTEN)\n",
504 |             "python3     61 root   21u  IPv4  27822      0t0  TCP 127.0.0.1:40395 (LISTEN)\n",
505 |             "python3     61 root   24u  IPv4  27826      0t0  TCP 127.0.0.1:60517 (LISTEN)\n",
506 |             "python3     61 root   30u  IPv4  27832      0t0  TCP 127.0.0.1:40255 (LISTEN)\n",
507 |             "python3     61 root   43u  IPv4  28831      0t0  TCP 127.0.0.1:53235 (LISTEN)\n",
508 |             "python3     81 root    3u  IPv4  29267      0t0  TCP 127.0.0.1:15144 (LISTEN)\n",
509 |             "python3     81 root    5u  IPv4  28223      0t0  TCP 127.0.0.1:42197 (LISTEN)\n",
510 |             "python3     81 root    9u  IPv4  28356      0t0  TCP 127.0.0.1:41627 (LISTEN)\n",
511 |             "tensorflo 5933 root    5u  IPv4  66554      0t0  TCP *:8500 (LISTEN)\n",
512 |             "tensorflo 5933 root   12u  IPv4  66559      0t0  TCP *:8501 (LISTEN)\n"
513 |           ]
514 |         }
515 |       ],
516 |       "source": [
517 |         "!sudo lsof -i -P -n | grep LISTEN"
518 |       ]
519 |     },
520 |     {
521 |       "cell_type": "markdown",
522 |       "metadata": {
523 |         "id": "mvHRnTmppqn9"
524 |       },
525 |       "source": [
526 |         "## RestAPI request"
527 |       ]
528 |     },
529 |     {
530 |       "cell_type": "markdown",
531 |       "metadata": {
532 |         "id": "1QNMoU3qq2fN"
533 |       },
534 |       "source": [
535 |         "### Convert dummy data in JSON format"
536 |       ]
537 |     },
538 |     {
539 |       "cell_type": "code",
540 |       "execution_count": 30,
541 |       "metadata": {
542 |         "colab": {
543 |           "base_uri": "https://localhost:8080/"
544 |         },
545 |         "id": "xDw6gT7lXz9S",
546 |         "outputId": "19e82468-1441-4dae-c514-9d8b78f53240"
547 |       },
548 |       "outputs": [
549 |         {
550 |           "name": "stdout",
551 |           "output_type": "stream",
552 |           "text": [
553 |             "Data: {\"signature_name\": \"serving_default\", \"instances\": ... 442383, 0.8007770776748657, -0.7472004890441895]]]]}\n"
554 |           ]
555 |         }
556 |       ],
557 |       "source": [
558 |         "data = json.dumps({\"signature_name\": \"serving_default\", \"instances\": dummy_inputs.numpy().tolist()})\n",
559 |         "print('Data: {} ... {}'.format(data[:50], data[len(data)-52:]))"
560 |       ]
561 |     },
562 |     {
563 |       "cell_type": "markdown",
564 |       "metadata": {
565 |         "id": "hzlArynZq-dF"
566 |       },
567 |       "source": [
568 |         "### Make a request"
569 |       ]
570 |     },
571 |     {
572 |       "cell_type": "code",
573 |       "execution_count": 31,
574 |       "metadata": {
575 |         "id": "hh6vmxqnXz9T"
576 |       },
577 |       "outputs": [],
578 |       "source": [
579 |         "headers = {\"content-type\": \"application/json\"}"
580 |       ]
581 |     },
582 |     {
583 |       "cell_type": "code",
584 |       "execution_count": 32,
585 |       "metadata": {
586 |         "colab": {
587 |           "base_uri": "https://localhost:8080/"
588 |         },
589 |         "id": "fS_DI5QpdZTg",
590 |         "outputId": "6b93ca46-1047-4b76-bd84-d21f95f9f4e9"
591 |       },
592 |       "outputs": [
593 |         {
594 |           "name": "stdout",
595 |           "output_type": "stream",
596 |           "text": [
597 |             "1 loop, best of 5: 4.11 s per loop\n"
598 |           ]
599 |         }
600 |       ],
601 |       "source": [
602 |         "%%timeit\n",
603 |         "json_response = requests.post('http://localhost:8501/v1/models/resnet_model:predict', \n",
604 |         "                              data=data, headers=headers)"
605 |       ]
606 |     },
607 |     {
608 |       "cell_type": "markdown",
609 |       "metadata": {
610 |         "id": "_n8urXddrJp0"
611 |       },
612 |       "source": [
613 |         "### Interpret the output"
614 |       ]
615 |     },
616 |     {
617 |       "cell_type": "code",
618 |       "execution_count": 36,
619 |       "metadata": {
620 |         "colab": {
621 |           "base_uri": "https://localhost:8080/"
622 |         },
623 |         "id": "brI4TCETXz9T",
624 |         "outputId": "7a098027-fb8a-4bfd-96dc-0bf74d26af09"
625 |       },
626 |       "outputs": [
627 |         {
628 |           "name": "stdout",
629 |           "output_type": "stream",
630 |           "text": [
631 |             "Prediction class: [664 664 664 664 664 664 664 664 664 664 664 664 664 664 664 664 664 664\n",
632 |             " 664 664 664 664 664 664 664 851 664 664 851 664 664 664]\n"
633 |           ]
634 |         }
635 |       ],
636 |       "source": [
637 |         "json_response = requests.post('http://localhost:8501/v1/models/resnet_model:predict', \n",
638 |         "                              data=data, headers=headers)\n",
639 |         "rest_predictions = json.loads(json_response.text)['predictions']\n",
640 |         "print('Prediction class: {}'.format(np.argmax(rest_predictions, axis=-1)))"
641 |       ]
642 |     },
643 |     {
644 |       "cell_type": "markdown",
645 |       "metadata": {
646 |         "id": "lwuaqGVud58k"
647 |       },
648 |       "source": [
649 |         "## gRPC request"
650 |       ]
651 |     },
652 |     {
653 |       "cell_type": "markdown",
654 |       "metadata": {
655 |         "id": "yr7vO8BQrP2S"
656 |       },
657 |       "source": [
658 |         "### Open up gRPC channel"
659 |       ]
660 |     },
661 |     {
662 |       "cell_type": "code",
663 |       "execution_count": 37,
664 |       "metadata": {
665 |         "id": "Y1cxieBDfyjK"
666 |       },
667 |       "outputs": [],
668 |       "source": [
669 |         "channel = grpc.insecure_channel('localhost:8500')\n",
670 |         "stub = prediction_service_pb2_grpc.PredictionServiceStub(channel)"
671 |       ]
672 |     },
673 |     {
674 |       "cell_type": "markdown",
675 |       "metadata": {
676 |         "id": "fS5b0VVfrTmF"
677 |       },
678 |       "source": [
679 |         "### Prepare a request"
680 |       ]
681 |     },
682 |     {
683 |       "cell_type": "code",
684 |       "execution_count": 38,
685 |       "metadata": {
686 |         "id": "2QD8xK47emy5"
687 |       },
688 |       "outputs": [],
689 |       "source": [
690 |         "request = predict_pb2.PredictRequest()\n",
691 |         "request.model_spec.name = 'resnet_model'\n",
692 |         "request.model_spec.signature_name = 'serving_default'\n",
693 |         "request.inputs['image_input'].CopyFrom(\n",
694 |         "    tf.make_tensor_proto(dummy_inputs)) #, shape=[32,224,224,3]))"
695 |       ]
696 |     },
697 |     {
698 |       "cell_type": "markdown",
699 |       "metadata": {
700 |         "id": "7UztXjZGrYTf"
701 |       },
702 |       "source": [
703 |         "### Make a request"
704 |       ]
705 |     },
706 |     {
707 |       "cell_type": "code",
708 |       "execution_count": 39,
709 |       "metadata": {
710 |         "colab": {
711 |           "base_uri": "https://localhost:8080/"
712 |         },
713 |         "id": "wvslcT5_f4P6",
714 |         "outputId": "e4a1008a-dad5-4f8f-a0f6-f9b68aa7db67"
715 |       },
716 |       "outputs": [
717 |         {
718 |           "name": "stdout",
719 |           "output_type": "stream",
720 |           "text": [
721 |             "1 loop, best of 5: 3.63 s per loop\n"
722 |           ]
723 |         }
724 |       ],
725 |       "source": [
726 |         "%%timeit\n",
727 |         "result = stub.Predict(request, 10.0)  # 10 secs timeout"
728 |       ]
729 |     },
730 |     {
731 |       "cell_type": "markdown",
732 |       "metadata": {
733 |         "id": "o52MnidprdCY"
734 |       },
735 |       "source": [
736 |         "### Interpret the output"
737 |       ]
738 |     },
739 |     {
740 |       "cell_type": "code",
741 |       "execution_count": 40,
742 |       "metadata": {
743 |         "colab": {
744 |           "base_uri": "https://localhost:8080/"
745 |         },
746 |         "id": "TBfd4TG0f5z6",
747 |         "outputId": "ffc40a16-787e-4146-9c1e-27317d10867f"
748 |       },
749 |       "outputs": [
750 |         {
751 |           "name": "stdout",
752 |           "output_type": "stream",
753 |           "text": [
754 |             "Prediction class: [664 664 664 664 664 664 664 664 664 664 664 664 664 664 664 664 664 664\n",
755 |             " 664 664 664 664 664 664 664 851 664 664 851 664 664 664]\n"
756 |           ]
757 |         }
758 |       ],
759 |       "source": [
760 |         "grpc_predictions = stub.Predict(request, 10.0)  # 10 secs timeout\n",
761 |         "grpc_predictions = grpc_predictions.outputs['resnet50'].float_val\n",
762 |         "grpc_predictions = np.array(grpc_predictions).reshape(32, -1)\n",
763 |         "print('Prediction class: {}'.format(np.argmax(grpc_predictions, axis=-1)))"
764 |       ]
765 |     },
766 |     {
767 |       "cell_type": "markdown",
768 |       "metadata": {
769 |         "id": "_dVHrF1ksyAc"
770 |       },
771 |       "source": [
772 |         "## Compare the two results if they are identical\n",
773 |         "\n",
774 |         "`np.testing.assert_allclose` raises exception when the given two arrays do not match exactly."
775 |       ]
776 |     },
777 |     {
778 |       "cell_type": "code",
779 |       "execution_count": 41,
780 |       "metadata": {
781 |         "id": "UA4iKEcpioc8"
782 |       },
783 |       "outputs": [],
784 |       "source": [
785 |         "np.testing.assert_allclose(rest_predictions, grpc_predictions, atol=1e-4)"
786 |       ]
787 |     },
788 |     {
789 |       "cell_type": "markdown",
790 |       "metadata": {
791 |         "id": "9-UcGFM0z65y"
792 |       },
793 |       "source": [
794 |         "## Conclusion\n",
795 |         "\n",
796 |         "gRPC call took about 3.64 seconds while RestAPI call took about 4.11 seconds on the data of the batch size of 32. This let use conclude that gRPC call is much faster than RestAPI. \n",
797 |         "\n",
798 |         "Also note that this is very close performance comparing to the ONNX inference without any Server framework involved. That means we can expect TF Serving with gRPC should be faster than ONNX hosted on FastAPI server framework since FastAPI is a python framework while TF Serving is C++ implementation."
799 |       ]
800 |     },
801 |     {
802 |       "cell_type": "code",
803 |       "execution_count": null,
804 |       "metadata": {
805 |         "id": "DbLQp2Do0k6H"
806 |       },
807 |       "outputs": [],
808 |       "source": []
809 |     }
810 |   ],
811 |   "metadata": {
812 |     "colab": {
813 |       "collapsed_sections": [],
814 |       "name": "TF_Serving.ipynb",
815 |       "provenance": [],
816 |       "toc_visible": true
817 |     },
818 |     "interpreter": {
819 |       "hash": "626869861cd3ed4fdbaf755d0ab61c53ee2a93056f2b69c4f7170d3cc24dc5ea"
820 |     },
821 |     "kernelspec": {
822 |       "display_name": "Python 3.8.12 ('.venv': venv)",
823 |       "language": "python",
824 |       "name": "python3"
825 |     },
826 |     "language_info": {
827 |       "codemirror_mode": {
828 |         "name": "ipython",
829 |         "version": 3
830 |       },
831 |       "file_extension": ".py",
832 |       "mimetype": "text/x-python",
833 |       "name": "python",
834 |       "nbconvert_exporter": "python",
835 |       "pygments_lexer": "ipython3",
836 |       "version": "3.8.12"
837 |     },
838 |     "orig_nbformat": 4
839 |   },
840 |   "nbformat": 4,
841 |   "nbformat_minor": 0
842 | }
843 | 


--------------------------------------------------------------------------------
/notebooks/TF_to_ONNX.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |   "cells": [
  3 |     {
  4 |       "cell_type": "markdown",
  5 |       "metadata": {
  6 |         "id": "view-in-github",
  7 |         "colab_type": "text"
  8 |       },
  9 |       "source": [
 10 |         "<a href=\"https://colab.research.google.com/github/sayakpaul/ml-deployment-k8s-fastapi/blob/main/notebooks/TF_to_ONNX.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
 11 |       ]
 12 |     },
 13 |     {
 14 |       "cell_type": "markdown",
 15 |       "metadata": {
 16 |         "id": "ahaLOgxyzACW"
 17 |       },
 18 |       "source": [
 19 |         "# Convert `tf.keras` model to ONNX\n",
 20 |         "\n",
 21 |         "This tutorial shows:\n",
 22 |         "- how to convert tf.keras model to ONNX from the saved model file or the source code directly. \n",
 23 |         "- comparison of the execution time of the inference on CPU between tf.keras model and ONNX converted model."
 24 |       ]
 25 |     },
 26 |     {
 27 |       "cell_type": "markdown",
 28 |       "metadata": {
 29 |         "id": "CmnzNRTkzaYq"
 30 |       },
 31 |       "source": [
 32 |         "## Install ONNX dependencies\n",
 33 |         "- `tf2onnx` provides a tool to convert TensorFlow model to ONNX\n",
 34 |         "- `onnxruntime` is used to run inference on a saved ONNX model."
 35 |       ]
 36 |     },
 37 |     {
 38 |       "cell_type": "code",
 39 |       "execution_count": null,
 40 |       "metadata": {
 41 |         "id": "Y7VIFntKUh0R"
 42 |       },
 43 |       "outputs": [],
 44 |       "source": [
 45 |         "!pip install -Uqq tf2onnx\n",
 46 |         "!pip install -Uqq onnxruntime"
 47 |       ]
 48 |     },
 49 |     {
 50 |       "cell_type": "markdown",
 51 |       "metadata": {
 52 |         "id": "D7TJluNyz8k0"
 53 |       },
 54 |       "source": [
 55 |         "### Imports"
 56 |       ]
 57 |     },
 58 |     {
 59 |       "cell_type": "code",
 60 |       "execution_count": null,
 61 |       "metadata": {
 62 |         "id": "-UfszPPVf9P0"
 63 |       },
 64 |       "outputs": [],
 65 |       "source": [
 66 |         "import tf2onnx\n",
 67 |         "import pandas as pd\n",
 68 |         "import tensorflow as tf\n",
 69 |         "import numpy as np"
 70 |       ]
 71 |     },
 72 |     {
 73 |       "cell_type": "markdown",
 74 |       "metadata": {
 75 |         "id": "_eo3f1Zn0S3F"
 76 |       },
 77 |       "source": [
 78 |         "### Get a sample model "
 79 |       ]
 80 |     },
 81 |     {
 82 |       "cell_type": "code",
 83 |       "execution_count": null,
 84 |       "metadata": {
 85 |         "id": "3R81akF_hDEL"
 86 |       },
 87 |       "outputs": [],
 88 |       "source": [
 89 |         "core = tf.keras.applications.ResNet50(include_top=True, input_shape=(224, 224, 3))\n",
 90 |         "\n",
 91 |         "inputs = tf.keras.layers.Input(shape=(224, 224, 3), name=\"image_input\")\n",
 92 |         "preprocess = tf.keras.applications.resnet50.preprocess_input(inputs)\n",
 93 |         "outputs = core(preprocess, training=False)\n",
 94 |         "model = tf.keras.Model(inputs=[inputs], outputs=[outputs])"
 95 |       ]
 96 |     },
 97 |     {
 98 |       "cell_type": "markdown",
 99 |       "source": [
100 |         "Note that we are including the preprocessing layer in the `model` object. This will allow us to load an image from disk and run the model directly without requiring any\n",
101 |         "model-specific preprocessing. This reduces training/serving skew. "
102 |       ],
103 |       "metadata": {
104 |         "id": "W3smmoIBCFOX"
105 |       }
106 |     },
107 |     {
108 |       "cell_type": "markdown",
109 |       "metadata": {
110 |         "id": "MQg5cN910Z6q"
111 |       },
112 |       "source": [
113 |         "## Convert to ONNX"
114 |       ]
115 |     },
116 |     {
117 |       "cell_type": "code",
118 |       "execution_count": null,
119 |       "metadata": {
120 |         "id": "3-friv_fMk79"
121 |       },
122 |       "outputs": [],
123 |       "source": [
124 |         "num_layers = len(model.layers)\n",
125 |         "print(f'first layer name: {model.layers[0].name}')\n",
126 |         "print(f'last layer name: {model.layers[num_layers-1].name}')"
127 |       ]
128 |     },
129 |     {
130 |       "cell_type": "markdown",
131 |       "metadata": {
132 |         "id": "UBGQxHHz0dGP"
133 |       },
134 |       "source": [
135 |         "### Conversion\n",
136 |         "\n",
137 |         "`opset` in `tf2onnx.convert.from_keras` is the ONNX Op version. You can find the full list which TensorFlow (TF) Ops are convertible to ONNX Ops [here](https://github.com/onnx/tensorflow-onnx/blob/master/support_status.md).\n",
138 |         "\n",
139 |         "There are two ways to convert TensorFlow model to ONNX:\n",
140 |         "- `tf2onnx.convert.from_keras` to convert programatically\n",
141 |         "- `tf2onnx.convert` CLI to convert a saved TensorFlow model"
142 |       ]
143 |     },
144 |     {
145 |       "cell_type": "code",
146 |       "execution_count": null,
147 |       "metadata": {
148 |         "id": "_MAEoy9j0QRQ"
149 |       },
150 |       "outputs": [],
151 |       "source": [
152 |         "import onnx\n",
153 |         "\n",
154 |         "input_signature = [tf.TensorSpec([None, 224, 224, 3], tf.float32, name='image_input')]\n",
155 |         "onnx_model, _ = tf2onnx.convert.from_keras(model, input_signature, opset=15)\n",
156 |         "onnx.save(onnx_model, \"resnet50_w_preprocessing.onnx\")\n",
157 |         "\n",
158 |         "# model.save('my_model')\n",
159 |         "# !python -m tf2onnx.convert --saved-model my_model --output my_model.onnx"
160 |       ]
161 |     },
162 |     {
163 |       "cell_type": "markdown",
164 |       "metadata": {
165 |         "id": "V2-aNpahQMVR"
166 |       },
167 |       "source": [
168 |         "## Test TF vs ONNX model with dummy data"
169 |       ]
170 |     },
171 |     {
172 |       "cell_type": "markdown",
173 |       "metadata": {
174 |         "id": "Zt5lsQoUQXOo"
175 |       },
176 |       "source": [
177 |         "### Generate dummy data "
178 |       ]
179 |     },
180 |     {
181 |       "cell_type": "code",
182 |       "execution_count": null,
183 |       "metadata": {
184 |         "id": "ceqZH2KbPznx"
185 |       },
186 |       "outputs": [],
187 |       "source": [
188 |         "dummy_inputs = tf.random.normal((32, 224, 224, 3))"
189 |       ]
190 |     },
191 |     {
192 |       "cell_type": "markdown",
193 |       "metadata": {
194 |         "id": "M8DR47zeQZHI"
195 |       },
196 |       "source": [
197 |         "### Test original TF model with dummy data"
198 |       ]
199 |     },
200 |     {
201 |       "cell_type": "code",
202 |       "execution_count": null,
203 |       "metadata": {
204 |         "id": "zL8Lw9H8QbT7"
205 |       },
206 |       "outputs": [],
207 |       "source": [
208 |         "%%timeit\n",
209 |         "model.predict(dummy_inputs)"
210 |       ]
211 |     },
212 |     {
213 |       "cell_type": "code",
214 |       "execution_count": null,
215 |       "metadata": {
216 |         "id": "smFa5VWjTNLb"
217 |       },
218 |       "outputs": [],
219 |       "source": [
220 |         "tf_preds = model.predict(dummy_inputs)"
221 |       ]
222 |     },
223 |     {
224 |       "cell_type": "markdown",
225 |       "metadata": {
226 |         "id": "Lqhi458k0fkM"
227 |       },
228 |       "source": [
229 |         "### Test converted ONNX model with dummy data\n",
230 |         "\n",
231 |         "If you want to inference with GPU, then you can do so by setting `providers=[\"CUDAExecutionProvider\"]` in `ort.InferenceSession`.\n",
232 |         "\n",
233 |         "The first parameter in `sess.run` is set to `None`, and that means all the outputs of the model will be retrieved. "
234 |       ]
235 |     },
236 |     {
237 |       "cell_type": "code",
238 |       "execution_count": null,
239 |       "metadata": {
240 |         "id": "1ELVBwrn0-Cf"
241 |       },
242 |       "outputs": [],
243 |       "source": [
244 |         "import onnxruntime as ort\n",
245 |         "import numpy as np\n",
246 |         "\n",
247 |         "sess = ort.InferenceSession(\"resnet50_w_preprocessing.onnx\") # providers=[\"CUDAExecutionProvider\"])\n",
248 |         "np_dummy_inputs = dummy_inputs.numpy()"
249 |       ]
250 |     },
251 |     {
252 |       "cell_type": "code",
253 |       "execution_count": null,
254 |       "metadata": {
255 |         "id": "jszhyR15SJaE"
256 |       },
257 |       "outputs": [],
258 |       "source": [
259 |         "%%timeit \n",
260 |         "sess.run(None, {\"image_input\": np_dummy_inputs})"
261 |       ]
262 |     },
263 |     {
264 |       "cell_type": "code",
265 |       "execution_count": null,
266 |       "metadata": {
267 |         "id": "Ax6opk4ENmlK"
268 |       },
269 |       "outputs": [],
270 |       "source": [
271 |         "ort_preds = sess.run(None, {\"image_input\": np_dummy_inputs})"
272 |       ]
273 |     },
274 |     {
275 |       "cell_type": "markdown",
276 |       "source": [
277 |         "## Check if the TF and ONNX outputs match"
278 |       ],
279 |       "metadata": {
280 |         "id": "jbrwQMDbBLps"
281 |       }
282 |     },
283 |     {
284 |       "cell_type": "code",
285 |       "source": [
286 |         "np.testing.assert_allclose(tf_preds, ort_preds[0], atol=1e-4)"
287 |       ],
288 |       "metadata": {
289 |         "id": "um99Uu4FBPrY"
290 |       },
291 |       "execution_count": null,
292 |       "outputs": []
293 |     },
294 |     {
295 |       "cell_type": "markdown",
296 |       "metadata": {
297 |         "id": "QPu6kdNnU8Y6"
298 |       },
299 |       "source": [
300 |         "## Conclusion\n",
301 |         "\n",
302 |         "We did a simple experiments with dummy dataset of 32 batch size. The default behaviour of `timeit` is to measure the average of the cell execution time with 7 times of repeat ([`timeit`'s default behaviour](https://ipython.readthedocs.io/en/stable/interactive/magics.html#magic-timeit)).\n",
303 |         "\n",
304 |         "\n",
305 |         "The ONNX model will likely always have a better inference latency than the TF model if you are using a CPU server for inference."
306 |       ]
307 |     }
308 |   ],
309 |   "metadata": {
310 |     "colab": {
311 |       "collapsed_sections": [],
312 |       "name": "TF to ONNX.ipynb",
313 |       "provenance": [],
314 |       "include_colab_link": true
315 |     },
316 |     "kernelspec": {
317 |       "display_name": "Python 3",
318 |       "name": "python3"
319 |     },
320 |     "language_info": {
321 |       "name": "python",
322 |       "version": "3.8.10"
323 |     }
324 |   },
325 |   "nbformat": 4,
326 |   "nbformat_minor": 0
327 | }


--------------------------------------------------------------------------------