├── .github
└── workflows
│ └── deployment.yaml
├── .gitignore
├── .kube
├── README.md
├── base_1
│ ├── deployment.yaml
│ ├── kustomization.yaml
│ └── service.yaml
├── deployment.yaml
├── experiment_1
│ ├── 2vCPU+4GB+w1
│ │ ├── Dockerfile
│ │ ├── deployment_replica_count.yaml
│ │ ├── kustomization.yaml
│ │ └── requirements.txt
│ ├── 2vCPU+4GB+w2
│ │ ├── Dockerfile
│ │ ├── deployment_replica_count.yaml
│ │ ├── kustomization.yaml
│ │ └── requirements.txt
│ ├── 2vCPU+4GB+w4
│ │ ├── Dockerfile
│ │ ├── deployment_replica_count.yaml
│ │ ├── kustomization.yaml
│ │ └── requirements.txt
│ ├── 4vCPU+8GB
│ │ ├── Dockerfile
│ │ ├── deployment_replica_count.yaml
│ │ ├── kustomization.yaml
│ │ └── requirements.txt
│ ├── 8vCPU+16GB
│ │ ├── Dockerfile
│ │ ├── deployment_replica_count.yaml
│ │ ├── kustomization.yaml
│ │ └── requirements.txt
│ └── 8vCPU+64GB
│ │ ├── Dockerfile
│ │ ├── deployment_replica_count.yaml
│ │ ├── kustomization.yaml
│ │ └── requirements.txt
├── hpa.yaml
├── kustomization.yaml
└── service.yaml
├── Dockerfile
├── LICENSE
├── README.md
├── api
├── README.md
├── cat.jpg
├── imagenet_classes.txt
├── main.py
├── requirements.txt
└── utils
│ ├── __init__.py
│ └── utils.py
├── locust
├── README.md
├── cat.jpg
├── cat_224x224.jpg
├── load_test.conf
└── locust.py
└── notebooks
├── TF_Serving.ipynb
└── TF_to_ONNX.ipynb
/.github/workflows/deployment.yaml:
--------------------------------------------------------------------------------
1 | name: Deployment
2 |
3 | env:
4 | GCP_PROJECT_ID: "fast-ai-exploration"
5 | GKE_CLUSTER: "fastapi-cluster"
6 | GKE_REGION: "us-central1"
7 | GKE_ZONE: "us-central1-a"
8 | IMAGE: "gcr.io/fast-ai-exploration/fastapi-k8s"
9 | GKE_DEPLOYMENT: "fastapi-server"
10 |
11 | on:
12 | push:
13 | branches: [ "main" ]
14 |
15 | # Allows you to run this workflow manually from the Actions tab
16 | workflow_dispatch:
17 |
18 | jobs:
19 | build:
20 | runs-on: ubuntu-latest
21 | steps:
22 | - name: Git
23 | uses: actions/checkout@v2
24 |
25 | - name: GCP auth
26 | uses: google-github-actions/auth@v0
27 | with:
28 | credentials_json: ${{ secrets.GCP_CREDENTIALS }}
29 |
30 | - name: Set up Cloud SDK
31 | uses: google-github-actions/setup-gcloud@v0
32 |
33 | - name: Docker auth
34 | run: |-
35 | gcloud --quiet auth configure-docker
36 |
37 | - name: GKE auth
38 | run: |-
39 | gcloud container clusters get-credentials "$GKE_CLUSTER" --zone "$GKE_ZONE" --project "$GCP_PROJECT_ID"
40 |
41 | - name: Check if the requested directories have any changes
42 | uses: dorny/paths-filter@v2
43 | id: changes
44 | with:
45 | filters: |
46 | src:
47 | - 'api/**'
48 | - '.github/**'
49 | - '.kube/**'
50 | - Dockerfile
51 |
52 | - name: Build and push Docker image based on the changes
53 | if: steps.changes.outputs.src == 'true'
54 | run: |
55 | docker build --tag "$IMAGE:$GITHUB_SHA" .
56 | docker tag "$IMAGE:$GITHUB_SHA" "$IMAGE:latest"
57 | docker push "$IMAGE:$GITHUB_SHA"
58 | docker push "$IMAGE:latest"
59 |
60 | - name: Set up Kustomize
61 | working-directory: .kube/
62 | if: steps.changes.outputs.src == 'true'
63 | run: |-
64 | curl -sfLo kustomize.tar.gz https://github.com/kubernetes-sigs/kustomize/releases/download/kustomize%2Fv4.1.2/kustomize_v4.1.2_linux_amd64.tar.gz
65 | tar -zxvf kustomize.tar.gz
66 | chmod u+x ./kustomize
67 |
68 | - name: Deploy to GKE
69 | working-directory: .kube/
70 | if: steps.changes.outputs.src == 'true'
71 | run: |-
72 | ./kustomize edit set image $IMAGE:$GITHUB_SHA
73 | ./kustomize build . | kubectl apply -f -
74 | kubectl rollout status deployment/$GKE_DEPLOYMENT
75 | kubectl get services -o wide
76 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # Byte-compiled / optimized / DLL files
2 | __pycache__/
3 | *.py[cod]
4 | *$py.class
5 |
6 | # C extensions
7 | *.so
8 |
9 | # Distribution / packaging
10 | .Python
11 | build/
12 | develop-eggs/
13 | dist/
14 | downloads/
15 | eggs/
16 | .eggs/
17 | lib/
18 | lib64/
19 | parts/
20 | sdist/
21 | var/
22 | wheels/
23 | pip-wheel-metadata/
24 | share/python-wheels/
25 | *.egg-info/
26 | .installed.cfg
27 | *.egg
28 | MANIFEST
29 |
30 | # PyInstaller
31 | # Usually these files are written by a python script from a template
32 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
33 | *.manifest
34 | *.spec
35 |
36 | # Installer logs
37 | pip-log.txt
38 | pip-delete-this-directory.txt
39 |
40 | # Unit test / coverage reports
41 | htmlcov/
42 | .tox/
43 | .nox/
44 | .coverage
45 | .coverage.*
46 | .cache
47 | nosetests.xml
48 | coverage.xml
49 | *.cover
50 | *.py,cover
51 | .hypothesis/
52 | .pytest_cache/
53 |
54 | # Translations
55 | *.mo
56 | *.pot
57 |
58 | # Django stuff:
59 | *.log
60 | local_settings.py
61 | db.sqlite3
62 | db.sqlite3-journal
63 |
64 | # Flask stuff:
65 | instance/
66 | .webassets-cache
67 |
68 | # Scrapy stuff:
69 | .scrapy
70 |
71 | # Sphinx documentation
72 | docs/_build/
73 |
74 | # PyBuilder
75 | target/
76 |
77 | # Jupyter Notebook
78 | .ipynb_checkpoints
79 |
80 | # IPython
81 | profile_default/
82 | ipython_config.py
83 |
84 | # pyenv
85 | .python-version
86 |
87 | # pipenv
88 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
89 | # However, in case of collaboration, if having platform-specific dependencies or dependencies
90 | # having no cross-platform support, pipenv may install dependencies that don't work, or not
91 | # install all needed dependencies.
92 | #Pipfile.lock
93 |
94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
95 | __pypackages__/
96 |
97 | # Celery stuff
98 | celerybeat-schedule
99 | celerybeat.pid
100 |
101 | # SageMath parsed files
102 | *.sage.py
103 |
104 | # Environments
105 | .env
106 | .venv
107 | env/
108 | venv/
109 | ENV/
110 | env.bak/
111 | venv.bak/
112 |
113 | # Spyder project settings
114 | .spyderproject
115 | .spyproject
116 |
117 | # Rope project settings
118 | .ropeproject
119 |
120 | # mkdocs documentation
121 | /site
122 |
123 | # mypy
124 | .mypy_cache/
125 | .dmypy.json
126 | dmypy.json
127 |
128 | # Pyre type checker
129 | .pyre/
130 |
131 | # Misc
132 | .ipynb_checkpoints/
133 | *.onnx
--------------------------------------------------------------------------------
/.kube/README.md:
--------------------------------------------------------------------------------
1 | # Run Experimental Setup
2 |
3 | ## Build Docker image
4 | ```bash
5 | # under /.kube directory
6 |
7 | $ TARGET_EXPERIMENT=experiment_1/2vCPU+2GB/
8 | $ TAG=gcr.io/GCP_PROJECT_ID/IMG_NAME:IMG_TAG
9 |
10 | $ docker build -f $TARGET_EXPERIMENT -t $TAG .
11 | ```
12 |
13 | ## Deploy on k8s cluster
14 | ```bash
15 | # under /.kube directory
16 |
17 | $ ./kustomize build $TARGET_EXPERIMENT | kubectl apply -f -
18 | ```
--------------------------------------------------------------------------------
/.kube/base_1/deployment.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: apps/v1
2 | kind: Deployment
3 | metadata:
4 | labels:
5 | app: fastapi-server
6 | name: fastapi-server
7 | spec:
8 | replicas: 1
9 | selector:
10 | matchLabels:
11 | app: fastapi-server
12 | strategy: {}
13 | template:
14 | metadata:
15 | labels:
16 | app: fastapi-server
17 | spec:
18 | containers:
19 | - image: gcr.io/gcp-ml-172005/k8s-fastapi:latest
20 | name: fastapi-k8s
21 | ports:
22 | - containerPort: 80
23 | resources: {}
24 |
--------------------------------------------------------------------------------
/.kube/base_1/kustomization.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: kustomize.config.k8s.io/v1beta1
2 | kind: Kustomization
3 |
4 | commonLabels:
5 | app: fastapi-server
6 |
7 | resources:
8 | - deployment.yaml
9 | - service.yaml
--------------------------------------------------------------------------------
/.kube/base_1/service.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: v1
2 | kind: Service
3 | metadata:
4 | creationTimestamp: null
5 | labels:
6 | app: fastapi-server
7 | name: fastapi-server
8 | spec:
9 | ports:
10 | - port: 80
11 | protocol: TCP
12 | targetPort: 80
13 | selector:
14 | app: fastapi-server
15 | type: LoadBalancer
16 | status:
17 | loadBalancer: {}
--------------------------------------------------------------------------------
/.kube/deployment.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: apps/v1
2 | kind: Deployment
3 | metadata:
4 | creationTimestamp: null
5 | labels:
6 | app: fastapi-server
7 | name: fastapi-server
8 | spec:
9 | replicas: 1
10 | selector:
11 | matchLabels:
12 | app: fastapi-server
13 | strategy: {}
14 | template:
15 | metadata:
16 | creationTimestamp: null
17 | labels:
18 | app: fastapi-server
19 | spec:
20 | containers:
21 | - image: gcr.io/fast-ai-exploration/fastapi-k8s:latest
22 | name: fastapi-k8s
23 | ports:
24 | - containerPort: 80
25 | resources: {}
--------------------------------------------------------------------------------
/.kube/experiment_1/2vCPU+4GB+w1/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM python:3.8
2 |
3 | WORKDIR /app
4 |
5 | # install dependencies
6 | COPY ./.kube/experiment_1/2vCPU+4GB+w1/requirements.txt /app/requirements.txt
7 | RUN pip install --no-cache-dir --upgrade -r /app/requirements.txt
8 |
9 | # copy fastAPI app codebase
10 | COPY ./api /app
11 |
12 | # run the fastAPI app
13 | CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "80"]
--------------------------------------------------------------------------------
/.kube/experiment_1/2vCPU+4GB+w1/deployment_replica_count.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: apps/v1
2 | kind: Deployment
3 | metadata:
4 | name: fastapi-server
5 |
6 | spec:
7 | replicas: 8
8 | template:
9 | metadata:
10 | labels:
11 | app: fastapi-server
12 | spec:
13 | containers:
14 | - image: gcr.io/gcp-ml-172005/k8s-fastapi:uvicorn-w-1
15 | name: fastapi-k8s
16 | imagePullPolicy: Always
17 | ports:
18 | - containerPort: 80
19 | resources: {}
20 |
--------------------------------------------------------------------------------
/.kube/experiment_1/2vCPU+4GB+w1/kustomization.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: kustomize.config.k8s.io/v1beta1
2 | kind: Kustomization
3 |
4 | bases:
5 | - ../../base_1
6 | patches:
7 | - deployment_replica_count.yaml
--------------------------------------------------------------------------------
/.kube/experiment_1/2vCPU+4GB+w1/requirements.txt:
--------------------------------------------------------------------------------
1 | numpy==1.22.2
2 | uvicorn[standard]==0.17.5
3 | Pillow==9.0.1
4 | onnxruntime==1.10.0
5 | fastapi==0.74.0
6 | python-multipart==0.0.5
7 |
--------------------------------------------------------------------------------
/.kube/experiment_1/2vCPU+4GB+w2/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM python:3.8
2 |
3 | WORKDIR /app
4 |
5 | # install dependencies
6 | COPY ./.kube/experiment_1/2vCPU+4GB+w2/requirements.txt /app/requirements.txt
7 | RUN pip install --no-cache-dir --upgrade -r /app/requirements.txt
8 |
9 | # copy fastAPI app codebase
10 | COPY ./api /app
11 |
12 | # run the fastAPI app
13 | CMD ["uvicorn", "main:app", "--workers", "2", "--host", "0.0.0.0", "--port", "80"]
--------------------------------------------------------------------------------
/.kube/experiment_1/2vCPU+4GB+w2/deployment_replica_count.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: apps/v1
2 | kind: Deployment
3 | metadata:
4 | name: fastapi-server
5 |
6 | spec:
7 | replicas: 8
8 | template:
9 | metadata:
10 | labels:
11 | app: fastapi-server
12 | spec:
13 | containers:
14 | - image: gcr.io/gcp-ml-172005/k8s-fastapi:uvicorn-w-2
15 | name: fastapi-k8s
16 | imagePullPolicy: Always
17 | ports:
18 | - containerPort: 80
19 | resources: {}
20 |
--------------------------------------------------------------------------------
/.kube/experiment_1/2vCPU+4GB+w2/kustomization.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: kustomize.config.k8s.io/v1beta1
2 | kind: Kustomization
3 |
4 | bases:
5 | - ../../base_1
6 | patches:
7 | - deployment_replica_count.yaml
--------------------------------------------------------------------------------
/.kube/experiment_1/2vCPU+4GB+w2/requirements.txt:
--------------------------------------------------------------------------------
1 | numpy==1.22.2
2 | uvicorn[standard]==0.17.5
3 | Pillow==9.0.1
4 | onnxruntime==1.10.0
5 | fastapi==0.74.0
6 | python-multipart==0.0.5
7 |
--------------------------------------------------------------------------------
/.kube/experiment_1/2vCPU+4GB+w4/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM python:3.8
2 |
3 | WORKDIR /app
4 |
5 | # install dependencies
6 | COPY ./.kube/experiment_1/2vCPU+4GB+w4/requirements.txt /app/requirements.txt
7 | RUN pip install --no-cache-dir --upgrade -r /app/requirements.txt
8 |
9 | # copy fastAPI app codebase
10 | COPY ./api /app
11 |
12 | # run the fastAPI app
13 | CMD ["uvicorn", "main:app", "--workers", "4", "--host", "0.0.0.0", "--port", "80"]
--------------------------------------------------------------------------------
/.kube/experiment_1/2vCPU+4GB+w4/deployment_replica_count.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: apps/v1
2 | kind: Deployment
3 | metadata:
4 | name: fastapi-server
5 |
6 | spec:
7 | replicas: 8
8 | template:
9 | metadata:
10 | labels:
11 | app: fastapi-server
12 | spec:
13 | containers:
14 | - image: gcr.io/gcp-ml-172005/k8s-fastapi:uvicorn-w-4
15 | name: fastapi-k8s
16 | imagePullPolicy: Always
17 | ports:
18 | - containerPort: 80
19 | resources: {}
20 |
--------------------------------------------------------------------------------
/.kube/experiment_1/2vCPU+4GB+w4/kustomization.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: kustomize.config.k8s.io/v1beta1
2 | kind: Kustomization
3 |
4 | bases:
5 | - ../../base_1
6 | patches:
7 | - deployment_replica_count.yaml
--------------------------------------------------------------------------------
/.kube/experiment_1/2vCPU+4GB+w4/requirements.txt:
--------------------------------------------------------------------------------
1 | numpy==1.22.2
2 | uvicorn[standard]==0.17.5
3 | Pillow==9.0.1
4 | onnxruntime==1.10.0
5 | fastapi==0.74.0
6 | python-multipart==0.0.5
7 |
--------------------------------------------------------------------------------
/.kube/experiment_1/4vCPU+8GB/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM python:3.8
2 |
3 | WORKDIR /app
4 |
5 | # install dependencies
6 | COPY ./.kube/experiment_1/4vCPU+8GB/requirements.txt /app/requirements.txt
7 | RUN pip install --no-cache-dir --upgrade -r /app/requirements.txt
8 |
9 | # copy fastAPI app codebase
10 | COPY ./api /app
11 |
12 | # run the fastAPI app
13 | CMD ["gunicorn", "main:app", "--workers", "9", "--worker-class", "uvicorn.workers.UvicornWorker", "--bind", "0.0.0.0:80"]
--------------------------------------------------------------------------------
/.kube/experiment_1/4vCPU+8GB/deployment_replica_count.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: apps/v1
2 | kind: Deployment
3 | metadata:
4 | name: fastapi-server
5 |
6 | spec:
7 | replicas: 4
8 | template:
9 | metadata:
10 | labels:
11 | app: fastapi-server
12 | spec:
13 | containers:
14 | - image: gcr.io/gcp-ml-172005/k8s-fastapi:gunicorn-w-7
15 | name: fastapi-k8s
16 | imagePullPolicy: Always
17 | ports:
18 | - containerPort: 80
19 | resources: {}
20 |
--------------------------------------------------------------------------------
/.kube/experiment_1/4vCPU+8GB/kustomization.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: kustomize.config.k8s.io/v1beta1
2 | kind: Kustomization
3 |
4 | bases:
5 | - ../../base_1
6 | patches:
7 | - deployment_replica_count.yaml
--------------------------------------------------------------------------------
/.kube/experiment_1/4vCPU+8GB/requirements.txt:
--------------------------------------------------------------------------------
1 | numpy==1.22.2
2 | uvicorn[standard]==0.17.5
3 | Pillow==9.0.1
4 | onnxruntime==1.10.0
5 | fastapi==0.74.0
6 | python-multipart==0.0.5
7 | gunicorn==20.1.0
--------------------------------------------------------------------------------
/.kube/experiment_1/8vCPU+16GB/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM python:3.8
2 |
3 | WORKDIR /app
4 |
5 | # install dependencies
6 | COPY ./.kube/experiment_1/8vCPU+16GB/requirements.txt /app/requirements.txt
7 | RUN pip install --no-cache-dir --upgrade -r /app/requirements.txt
8 |
9 | # copy fastAPI app codebase
10 | COPY ./api /app
11 |
12 | # run the fastAPI app
13 | CMD ["gunicorn", "main:app", "--workers", "17", "--worker-class", "uvicorn.workers.UvicornWorker", "--bind", "0.0.0.0:80"]
--------------------------------------------------------------------------------
/.kube/experiment_1/8vCPU+16GB/deployment_replica_count.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: apps/v1
2 | kind: Deployment
3 | metadata:
4 | name: fastapi-server
5 |
6 | spec:
7 | replicas: 2
8 | template:
9 | metadata:
10 | labels:
11 | app: fastapi-server
12 | spec:
13 | containers:
14 | - image: gcr.io/gcp-ml-172005/k8s-fastapi:gunicorn-w-17
15 | name: fastapi-k8s
16 | imagePullPolicy: Always
17 | ports:
18 | - containerPort: 80
19 | resources: {}
20 |
--------------------------------------------------------------------------------
/.kube/experiment_1/8vCPU+16GB/kustomization.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: kustomize.config.k8s.io/v1beta1
2 | kind: Kustomization
3 |
4 | bases:
5 | - ../../base_1
6 | patches:
7 | - deployment_replica_count.yaml
--------------------------------------------------------------------------------
/.kube/experiment_1/8vCPU+16GB/requirements.txt:
--------------------------------------------------------------------------------
1 | numpy==1.22.2
2 | uvicorn[standard]==0.17.5
3 | Pillow==9.0.1
4 | onnxruntime==1.10.0
5 | fastapi==0.74.0
6 | python-multipart==0.0.5
7 | gunicorn==20.1.0
--------------------------------------------------------------------------------
/.kube/experiment_1/8vCPU+64GB/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM python:3.8
2 |
3 | WORKDIR /app
4 |
5 | # install dependencies
6 | COPY ./.kube/experiment_1/8vCPU+64GB/requirements.txt /app/requirements.txt
7 | RUN pip install --no-cache-dir --upgrade -r /app/requirements.txt
8 |
9 | # copy fastAPI app codebase
10 | COPY ./api /app
11 |
12 | # run the fastAPI app
13 | CMD ["gunicorn", "main:app", "--workers", "17", "--worker-class", "uvicorn.workers.UvicornWorker", "--bind", "0.0.0.0:80"]
--------------------------------------------------------------------------------
/.kube/experiment_1/8vCPU+64GB/deployment_replica_count.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: apps/v1
2 | kind: Deployment
3 | metadata:
4 | name: fastapi-server
5 |
6 | spec:
7 | replicas: 2
8 | template:
9 | metadata:
10 | labels:
11 | app: fastapi-server
12 | spec:
13 | containers:
14 | - image: gcr.io/gcp-ml-172005/k8s-fastapi:gunicorn-w-17
15 | name: fastapi-k8s
16 | imagePullPolicy: Always
17 | ports:
18 | - containerPort: 80
19 | resources: {}
20 |
--------------------------------------------------------------------------------
/.kube/experiment_1/8vCPU+64GB/kustomization.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: kustomize.config.k8s.io/v1beta1
2 | kind: Kustomization
3 |
4 | bases:
5 | - ../../base_1
6 | patches:
7 | - deployment_replica_count.yaml
--------------------------------------------------------------------------------
/.kube/experiment_1/8vCPU+64GB/requirements.txt:
--------------------------------------------------------------------------------
1 | numpy==1.22.2
2 | uvicorn[standard]==0.17.5
3 | Pillow==9.0.1
4 | onnxruntime==1.10.0
5 | fastapi==0.74.0
6 | python-multipart==0.0.5
7 | gunicorn==20.1.0
--------------------------------------------------------------------------------
/.kube/hpa.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: autoscaling/v1
2 | kind: HorizontalPodAutoscaler
3 | metadata:
4 | name: fastapi-server
5 |
6 | spec:
7 | scaleTargetRef:
8 | apiVersion: apps/v1
9 | kind: Deployment
10 | name: fastapi-server
11 | minReplicas: 1
12 | maxReplicas: 9
13 | targetCPUUtilizationPercentage: 80
--------------------------------------------------------------------------------
/.kube/kustomization.yaml:
--------------------------------------------------------------------------------
1 | commonLabels:
2 | app: fastapi-server
3 | resources:
4 | - deployment.yaml
5 | - hpa.yaml
6 | - service.yaml
7 | apiVersion: kustomize.config.k8s.io/v1beta1
8 | kind: Kustomization
--------------------------------------------------------------------------------
/.kube/service.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: v1
2 | kind: Service
3 | metadata:
4 | creationTimestamp: null
5 | labels:
6 | app: fastapi-server
7 | name: fastapi-server
8 | spec:
9 | ports:
10 | - port: 80
11 | protocol: TCP
12 | targetPort: 80
13 | selector:
14 | app: fastapi-server
15 | type: LoadBalancer
16 | status:
17 | loadBalancer: {}
--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM python:3.8
2 |
3 | WORKDIR /app
4 |
5 | # install dependencies
6 | COPY ./api/requirements.txt /app/requirements.txt
7 | RUN pip install --no-cache-dir --upgrade -r /app/requirements.txt
8 |
9 | # copy fastAPI app codebase
10 | COPY ./api /app
11 |
12 | # run the fastAPI app
13 | CMD ["uvicorn", "main:app", "--workers", "2", "--host", "0.0.0.0", "--port", "80"]
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | Apache License
2 | Version 2.0, January 2004
3 | http://www.apache.org/licenses/
4 |
5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
6 |
7 | 1. Definitions.
8 |
9 | "License" shall mean the terms and conditions for use, reproduction,
10 | and distribution as defined by Sections 1 through 9 of this document.
11 |
12 | "Licensor" shall mean the copyright owner or entity authorized by
13 | the copyright owner that is granting the License.
14 |
15 | "Legal Entity" shall mean the union of the acting entity and all
16 | other entities that control, are controlled by, or are under common
17 | control with that entity. For the purposes of this definition,
18 | "control" means (i) the power, direct or indirect, to cause the
19 | direction or management of such entity, whether by contract or
20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the
21 | outstanding shares, or (iii) beneficial ownership of such entity.
22 |
23 | "You" (or "Your") shall mean an individual or Legal Entity
24 | exercising permissions granted by this License.
25 |
26 | "Source" form shall mean the preferred form for making modifications,
27 | including but not limited to software source code, documentation
28 | source, and configuration files.
29 |
30 | "Object" form shall mean any form resulting from mechanical
31 | transformation or translation of a Source form, including but
32 | not limited to compiled object code, generated documentation,
33 | and conversions to other media types.
34 |
35 | "Work" shall mean the work of authorship, whether in Source or
36 | Object form, made available under the License, as indicated by a
37 | copyright notice that is included in or attached to the work
38 | (an example is provided in the Appendix below).
39 |
40 | "Derivative Works" shall mean any work, whether in Source or Object
41 | form, that is based on (or derived from) the Work and for which the
42 | editorial revisions, annotations, elaborations, or other modifications
43 | represent, as a whole, an original work of authorship. For the purposes
44 | of this License, Derivative Works shall not include works that remain
45 | separable from, or merely link (or bind by name) to the interfaces of,
46 | the Work and Derivative Works thereof.
47 |
48 | "Contribution" shall mean any work of authorship, including
49 | the original version of the Work and any modifications or additions
50 | to that Work or Derivative Works thereof, that is intentionally
51 | submitted to Licensor for inclusion in the Work by the copyright owner
52 | or by an individual or Legal Entity authorized to submit on behalf of
53 | the copyright owner. For the purposes of this definition, "submitted"
54 | means any form of electronic, verbal, or written communication sent
55 | to the Licensor or its representatives, including but not limited to
56 | communication on electronic mailing lists, source code control systems,
57 | and issue tracking systems that are managed by, or on behalf of, the
58 | Licensor for the purpose of discussing and improving the Work, but
59 | excluding communication that is conspicuously marked or otherwise
60 | designated in writing by the copyright owner as "Not a Contribution."
61 |
62 | "Contributor" shall mean Licensor and any individual or Legal Entity
63 | on behalf of whom a Contribution has been received by Licensor and
64 | subsequently incorporated within the Work.
65 |
66 | 2. Grant of Copyright License. Subject to the terms and conditions of
67 | this License, each Contributor hereby grants to You a perpetual,
68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
69 | copyright license to reproduce, prepare Derivative Works of,
70 | publicly display, publicly perform, sublicense, and distribute the
71 | Work and such Derivative Works in Source or Object form.
72 |
73 | 3. Grant of Patent License. Subject to the terms and conditions of
74 | this License, each Contributor hereby grants to You a perpetual,
75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
76 | (except as stated in this section) patent license to make, have made,
77 | use, offer to sell, sell, import, and otherwise transfer the Work,
78 | where such license applies only to those patent claims licensable
79 | by such Contributor that are necessarily infringed by their
80 | Contribution(s) alone or by combination of their Contribution(s)
81 | with the Work to which such Contribution(s) was submitted. If You
82 | institute patent litigation against any entity (including a
83 | cross-claim or counterclaim in a lawsuit) alleging that the Work
84 | or a Contribution incorporated within the Work constitutes direct
85 | or contributory patent infringement, then any patent licenses
86 | granted to You under this License for that Work shall terminate
87 | as of the date such litigation is filed.
88 |
89 | 4. Redistribution. You may reproduce and distribute copies of the
90 | Work or Derivative Works thereof in any medium, with or without
91 | modifications, and in Source or Object form, provided that You
92 | meet the following conditions:
93 |
94 | (a) You must give any other recipients of the Work or
95 | Derivative Works a copy of this License; and
96 |
97 | (b) You must cause any modified files to carry prominent notices
98 | stating that You changed the files; and
99 |
100 | (c) You must retain, in the Source form of any Derivative Works
101 | that You distribute, all copyright, patent, trademark, and
102 | attribution notices from the Source form of the Work,
103 | excluding those notices that do not pertain to any part of
104 | the Derivative Works; and
105 |
106 | (d) If the Work includes a "NOTICE" text file as part of its
107 | distribution, then any Derivative Works that You distribute must
108 | include a readable copy of the attribution notices contained
109 | within such NOTICE file, excluding those notices that do not
110 | pertain to any part of the Derivative Works, in at least one
111 | of the following places: within a NOTICE text file distributed
112 | as part of the Derivative Works; within the Source form or
113 | documentation, if provided along with the Derivative Works; or,
114 | within a display generated by the Derivative Works, if and
115 | wherever such third-party notices normally appear. The contents
116 | of the NOTICE file are for informational purposes only and
117 | do not modify the License. You may add Your own attribution
118 | notices within Derivative Works that You distribute, alongside
119 | or as an addendum to the NOTICE text from the Work, provided
120 | that such additional attribution notices cannot be construed
121 | as modifying the License.
122 |
123 | You may add Your own copyright statement to Your modifications and
124 | may provide additional or different license terms and conditions
125 | for use, reproduction, or distribution of Your modifications, or
126 | for any such Derivative Works as a whole, provided Your use,
127 | reproduction, and distribution of the Work otherwise complies with
128 | the conditions stated in this License.
129 |
130 | 5. Submission of Contributions. Unless You explicitly state otherwise,
131 | any Contribution intentionally submitted for inclusion in the Work
132 | by You to the Licensor shall be under the terms and conditions of
133 | this License, without any additional terms or conditions.
134 | Notwithstanding the above, nothing herein shall supersede or modify
135 | the terms of any separate license agreement you may have executed
136 | with Licensor regarding such Contributions.
137 |
138 | 6. Trademarks. This License does not grant permission to use the trade
139 | names, trademarks, service marks, or product names of the Licensor,
140 | except as required for reasonable and customary use in describing the
141 | origin of the Work and reproducing the content of the NOTICE file.
142 |
143 | 7. Disclaimer of Warranty. Unless required by applicable law or
144 | agreed to in writing, Licensor provides the Work (and each
145 | Contributor provides its Contributions) on an "AS IS" BASIS,
146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 | implied, including, without limitation, any warranties or conditions
148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 | PARTICULAR PURPOSE. You are solely responsible for determining the
150 | appropriateness of using or redistributing the Work and assume any
151 | risks associated with Your exercise of permissions under this License.
152 |
153 | 8. Limitation of Liability. In no event and under no legal theory,
154 | whether in tort (including negligence), contract, or otherwise,
155 | unless required by applicable law (such as deliberate and grossly
156 | negligent acts) or agreed to in writing, shall any Contributor be
157 | liable to You for damages, including any direct, indirect, special,
158 | incidental, or consequential damages of any character arising as a
159 | result of this License or out of the use or inability to use the
160 | Work (including but not limited to damages for loss of goodwill,
161 | work stoppage, computer failure or malfunction, or any and all
162 | other commercial damages or losses), even if such Contributor
163 | has been advised of the possibility of such damages.
164 |
165 | 9. Accepting Warranty or Additional Liability. While redistributing
166 | the Work or Derivative Works thereof, You may choose to offer,
167 | and charge a fee for, acceptance of support, warranty, indemnity,
168 | or other liability obligations and/or rights consistent with this
169 | License. However, in accepting such obligations, You may act only
170 | on Your own behalf and on Your sole responsibility, not on behalf
171 | of any other Contributor, and only if You agree to indemnify,
172 | defend, and hold each Contributor harmless for any liability
173 | incurred by, or claims asserted against, such Contributor by reason
174 | of your accepting any such warranty or additional liability.
175 |
176 | END OF TERMS AND CONDITIONS
177 |
178 | APPENDIX: How to apply the Apache License to your work.
179 |
180 | To apply the Apache License to your work, attach the following
181 | boilerplate notice, with the fields enclosed by brackets "[]"
182 | replaced with your own identifying information. (Don't include
183 | the brackets!) The text should be enclosed in the appropriate
184 | comment syntax for the file format. We also recommend that a
185 | file or class name and description of purpose be included on the
186 | same "printed page" as the copyright notice for easier
187 | identification within third-party archives.
188 |
189 | Copyright 2022 Sayak Paul and Chansung Park
190 |
191 | Licensed under the Apache License, Version 2.0 (the "License");
192 | you may not use this file except in compliance with the License.
193 | You may obtain a copy of the License at
194 |
195 | http://www.apache.org/licenses/LICENSE-2.0
196 |
197 | Unless required by applicable law or agreed to in writing, software
198 | distributed under the License is distributed on an "AS IS" BASIS,
199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 | See the License for the specific language governing permissions and
201 | limitations under the License.
202 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Deploying ML models with FastAPI, Docker, and Kubernetes
2 |
3 | *By: [Sayak Paul](https://github.com/sayakpaul) and [Chansung Park](https://github.com/deep-diver)*
4 |
5 |
6 |

7 |
Figure developed by Chansung Park
8 |
9 |
10 | This project shows how to serve an ONNX-optimized image classification model as a
11 | RESTful web service with FastAPI, Docker, and Kubernetes (k8s). The idea is to first
12 | Dockerize the API and then deploy it on a k8s cluster running on [Google Kubernetes
13 | Engine (GKE)](https://cloud.google.com/kubernetes-engine). We do this integration
14 | using [GitHub Actions](https://github.com/features/actions).
15 |
16 | 👋 **Note**: Even though this project uses an image classification its structure and techniques can
17 | be used to serve other models as well. We also worked on a TF Serving equivalent
18 | of this project. Check it out [here](https://github.com/deep-diver/ml-deployment-k8s-tfserving).
19 |
20 | **Update July 19 2022**: This project won the [#TFCommunitySpotlight award](https://twitter.com/TensorFlow/status/1545115276152389636).
21 |
22 | ## Deploying the model as a service with k8s
23 |
24 | * We decouple the model optimization part from our API code. The optimization part is
25 | available within the `notebooks/TF_to_ONNX.ipynb` notebook.
26 | * Then we locally test the API. You can find the instructions within the `api`
27 | directory.
28 | * To deploy the API, we define our `deployment.yaml` workflow file inside `.github/workflows`.
29 | It does the following tasks:
30 |
31 | * Looks for any changes in the specified directory. If there are any changes:
32 | * Builds and pushes the latest Docker image to Google Container Register (GCR).
33 | * Deploys the Docker container on the k8s cluster running on GKE.
34 |
35 | ## Configurations needed beforehand
36 |
37 | * Create a k8s cluster on GKE. [Here's](https://www.youtube.com/watch?v=hxpGC19PzwI) a
38 | relevant resource. We used 8 nodes (each with 2 vCPUs and 4 GBs of RAM) for the cluster.
39 | * [Create](https://cloud.google.com/iam/docs/creating-managing-service-account-keys) a
40 | service account key (JSON) file. It's a good practice to only grant it the roles
41 | required for the project. For example, for this project, we created a fresh service
42 | account and granted it permissions for the following: Storage Admin, GKE Developer, and
43 | GCR Developer.
44 | * Crete a secret named `GCP_CREDENTIALS` on your GitHub repository and copy paste the
45 | contents of the service account key file into the secret.
46 | * Configure bucket storage related permissions for the service account:
47 |
48 | ```shell
49 | $ export PROJECT_ID=
50 | $ export ACCOUNT=
51 |
52 | $ gcloud -q projects add-iam-policy-binding ${PROJECT_ID} \
53 | --member=serviceAccount:${ACCOUNT}@${PROJECT_ID}.iam.gserviceaccount.com \
54 | --role roles/storage.admin
55 |
56 | $ gcloud -q projects add-iam-policy-binding ${PROJECT_ID} \
57 | --member=serviceAccount:${ACCOUNT}@${PROJECT_ID}.iam.gserviceaccount.com \
58 | --role roles/storage.objectAdmin
59 |
60 | gcloud -q projects add-iam-policy-binding ${PROJECT_ID} \
61 | --member=serviceAccount:${ACCOUNT}@${PROJECT_ID}.iam.gserviceaccount.com \
62 | --role roles/storage.objectCreator
63 | ```
64 | * If you're on the `main` branch already then upon a new push, the worflow defined
65 | in `.github/workflows/deployment.yaml` should automatically run. Here's how the
66 | final outputs should look like ([run link](https://github.com/sayakpaul/ml-deployment-k8s-fastapi/runs/5343002731)):
67 |
68 | 
69 |
70 | ## Notes
71 |
72 | * Since we use CPU-based pods within the k8s cluster, we use ONNX optimizations
73 | since they are known to provide performance speed-ups for CPU-based environments.
74 | If you are using GPU-based pods then look into [TensorRT](https://developer.nvidia.com/tensorrt).
75 | * We use [Kustomize](https://kustomize.io) to manage the deployment on k8s.
76 | * We conducted load-testing varying the number of workers, RAM, nodes, etc. From that experiment,
77 | we found out that for our setup, 8 nodes each having 2 vCPUs and 4 GBs of work the best in terms of
78 | throughput and latency. The figure below summarizes our results:
79 |
80 | 
81 |
82 | You can find the load-testing details under `locust` directory.
83 |
84 | ## Querying the API endpoint
85 |
86 | From workflow outputs, you should see something like so:
87 |
88 | ```shell
89 | NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE
90 | fastapi-server LoadBalancer xxxxxxxxxx xxxxxxxxxx 80:30768/TCP 23m
91 | kubernetes ClusterIP xxxxxxxxxx 443/TCP 160m
92 | ```
93 |
94 | Note the `EXTERNAL-IP` corresponding to `fastapi-server` (iff you have named
95 | your service like so). Then cURL it:
96 |
97 | ```shell
98 | curl -X POST -F image_file=@cat.jpg -F with_resize=True -F with_post_process=True http://{EXTERNAL-IP}:80/predict/image
99 | ```
100 |
101 | You should get the following output (if you're using the `cat.jpg` image present
102 | in the `api` directory):
103 |
104 | ```shell
105 | "{\"Label\": \"tabby\", \"Score\": \"0.538\"}"
106 | ```
107 |
108 | The request assumes that you have a file called `cat.jpg` present in your
109 | working directory.
110 |
111 | **Note** that if you don't see any external IP address from your GitHub Actions console log,
112 | then after successful deployment, do the following:
113 |
114 | ```sh
115 | # Authenticate to your GKE cluster.
116 | $ gcloud container clusters get-credentials ${GKE_CLUSTER} --zone {GKE_ZONE} --project {GCP_PROJECT_ID}
117 | $ kubectl get services -o wide
118 | ```
119 |
120 | From there, note the external IP.
121 |
122 | ## Acknowledgements
123 |
124 | * [ML-GDE program](https://developers.google.com/programs/experts/) for providing GCP credit support.
125 | * [Hannes Hapke](https://www.linkedin.com/in/hanneshapke) for providing might insightful points for conducting load-tests.
126 |
127 |
--------------------------------------------------------------------------------
/api/README.md:
--------------------------------------------------------------------------------
1 | This directory exposes the ONNX model we converted in [this notebook](https://github.com/sayakpaul/ml-deployment-k8s-fastapi/blob/main/notebooks/TF_to_ONNX.ipynb) as a REST API using [FastAPI](https://fastapi.tiangolo.com/).
2 |
3 | ## Setup
4 |
5 | Install the dependencies:
6 |
7 | ```sh
8 | $ pip install -r requirements.txt
9 | ```
10 |
11 | Download a test image:
12 |
13 | ```sh
14 | $ wget http://images.cocodataset.org/val2017/000000039769.jpg -O cat.jpg
15 | ```
16 |
17 | ## Deploy locally
18 |
19 | ```sh
20 | $ uvicorn main:app --reload
21 | ```
22 |
23 | It should show something like so:
24 |
25 | ```sh
26 | INFO: Uvicorn running on http://127.0.0.1:8000 (Press CTRL+C to quit)
27 | INFO: Started reloader process [79147] using statreload
28 | INFO: Started server process [79149]
29 | INFO: Waiting for application startup.
30 | INFO: Application startup complete.
31 | ```
32 |
33 | Note the port number and run a request:
34 |
35 | ```sh
36 | $ curl -X POST -F image_file=@cat.jpg -F with_resize=True -F with_post_process=True http://localhost:8000/predict/image
37 | ```
38 |
39 | It should output:
40 |
41 | ```sh
42 | "{\"Label\": \"tabby\", \"Score\": \"0.538\"}"
43 | ```
44 |
45 | ### Client request code in Python
46 |
47 | ```python
48 | import requests
49 |
50 | url = "http://localhost:8000/predict/image"
51 | payload = {"with_resize": True, "with_post_process": True}
52 | files = {"image_file": open("cat.jpg", "rb")}
53 |
54 | resp = requests.post(url=url, data=payload, files=files)
55 | print(resp.json())
56 | ```
57 |
--------------------------------------------------------------------------------
/api/cat.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sayakpaul/ml-deployment-k8s-fastapi/55b690520b943bf39dbb02a7f28fc1aba781e945/api/cat.jpg
--------------------------------------------------------------------------------
/api/imagenet_classes.txt:
--------------------------------------------------------------------------------
1 | tench
2 | goldfish
3 | great white shark
4 | tiger shark
5 | hammerhead
6 | electric ray
7 | stingray
8 | cock
9 | hen
10 | ostrich
11 | brambling
12 | goldfinch
13 | house finch
14 | junco
15 | indigo bunting
16 | robin
17 | bulbul
18 | jay
19 | magpie
20 | chickadee
21 | water ouzel
22 | kite
23 | bald eagle
24 | vulture
25 | great grey owl
26 | European fire salamander
27 | common newt
28 | eft
29 | spotted salamander
30 | axolotl
31 | bullfrog
32 | tree frog
33 | tailed frog
34 | loggerhead
35 | leatherback turtle
36 | mud turtle
37 | terrapin
38 | box turtle
39 | banded gecko
40 | common iguana
41 | American chameleon
42 | whiptail
43 | agama
44 | frilled lizard
45 | alligator lizard
46 | Gila monster
47 | green lizard
48 | African chameleon
49 | Komodo dragon
50 | African crocodile
51 | American alligator
52 | triceratops
53 | thunder snake
54 | ringneck snake
55 | hognose snake
56 | green snake
57 | king snake
58 | garter snake
59 | water snake
60 | vine snake
61 | night snake
62 | boa constrictor
63 | rock python
64 | Indian cobra
65 | green mamba
66 | sea snake
67 | horned viper
68 | diamondback
69 | sidewinder
70 | trilobite
71 | harvestman
72 | scorpion
73 | black and gold garden spider
74 | barn spider
75 | garden spider
76 | black widow
77 | tarantula
78 | wolf spider
79 | tick
80 | centipede
81 | black grouse
82 | ptarmigan
83 | ruffed grouse
84 | prairie chicken
85 | peacock
86 | quail
87 | partridge
88 | African grey
89 | macaw
90 | sulphur-crested cockatoo
91 | lorikeet
92 | coucal
93 | bee eater
94 | hornbill
95 | hummingbird
96 | jacamar
97 | toucan
98 | drake
99 | red-breasted merganser
100 | goose
101 | black swan
102 | tusker
103 | echidna
104 | platypus
105 | wallaby
106 | koala
107 | wombat
108 | jellyfish
109 | sea anemone
110 | brain coral
111 | flatworm
112 | nematode
113 | conch
114 | snail
115 | slug
116 | sea slug
117 | chiton
118 | chambered nautilus
119 | Dungeness crab
120 | rock crab
121 | fiddler crab
122 | king crab
123 | American lobster
124 | spiny lobster
125 | crayfish
126 | hermit crab
127 | isopod
128 | white stork
129 | black stork
130 | spoonbill
131 | flamingo
132 | little blue heron
133 | American egret
134 | bittern
135 | crane
136 | limpkin
137 | European gallinule
138 | American coot
139 | bustard
140 | ruddy turnstone
141 | red-backed sandpiper
142 | redshank
143 | dowitcher
144 | oystercatcher
145 | pelican
146 | king penguin
147 | albatross
148 | grey whale
149 | killer whale
150 | dugong
151 | sea lion
152 | Chihuahua
153 | Japanese spaniel
154 | Maltese dog
155 | Pekinese
156 | Shih-Tzu
157 | Blenheim spaniel
158 | papillon
159 | toy terrier
160 | Rhodesian ridgeback
161 | Afghan hound
162 | basset
163 | beagle
164 | bloodhound
165 | bluetick
166 | black-and-tan coonhound
167 | Walker hound
168 | English foxhound
169 | redbone
170 | borzoi
171 | Irish wolfhound
172 | Italian greyhound
173 | whippet
174 | Ibizan hound
175 | Norwegian elkhound
176 | otterhound
177 | Saluki
178 | Scottish deerhound
179 | Weimaraner
180 | Staffordshire bullterrier
181 | American Staffordshire terrier
182 | Bedlington terrier
183 | Border terrier
184 | Kerry blue terrier
185 | Irish terrier
186 | Norfolk terrier
187 | Norwich terrier
188 | Yorkshire terrier
189 | wire-haired fox terrier
190 | Lakeland terrier
191 | Sealyham terrier
192 | Airedale
193 | cairn
194 | Australian terrier
195 | Dandie Dinmont
196 | Boston bull
197 | miniature schnauzer
198 | giant schnauzer
199 | standard schnauzer
200 | Scotch terrier
201 | Tibetan terrier
202 | silky terrier
203 | soft-coated wheaten terrier
204 | West Highland white terrier
205 | Lhasa
206 | flat-coated retriever
207 | curly-coated retriever
208 | golden retriever
209 | Labrador retriever
210 | Chesapeake Bay retriever
211 | German short-haired pointer
212 | vizsla
213 | English setter
214 | Irish setter
215 | Gordon setter
216 | Brittany spaniel
217 | clumber
218 | English springer
219 | Welsh springer spaniel
220 | cocker spaniel
221 | Sussex spaniel
222 | Irish water spaniel
223 | kuvasz
224 | schipperke
225 | groenendael
226 | malinois
227 | briard
228 | kelpie
229 | komondor
230 | Old English sheepdog
231 | Shetland sheepdog
232 | collie
233 | Border collie
234 | Bouvier des Flandres
235 | Rottweiler
236 | German shepherd
237 | Doberman
238 | miniature pinscher
239 | Greater Swiss Mountain dog
240 | Bernese mountain dog
241 | Appenzeller
242 | EntleBucher
243 | boxer
244 | bull mastiff
245 | Tibetan mastiff
246 | French bulldog
247 | Great Dane
248 | Saint Bernard
249 | Eskimo dog
250 | malamute
251 | Siberian husky
252 | dalmatian
253 | affenpinscher
254 | basenji
255 | pug
256 | Leonberg
257 | Newfoundland
258 | Great Pyrenees
259 | Samoyed
260 | Pomeranian
261 | chow
262 | keeshond
263 | Brabancon griffon
264 | Pembroke
265 | Cardigan
266 | toy poodle
267 | miniature poodle
268 | standard poodle
269 | Mexican hairless
270 | timber wolf
271 | white wolf
272 | red wolf
273 | coyote
274 | dingo
275 | dhole
276 | African hunting dog
277 | hyena
278 | red fox
279 | kit fox
280 | Arctic fox
281 | grey fox
282 | tabby
283 | tiger cat
284 | Persian cat
285 | Siamese cat
286 | Egyptian cat
287 | cougar
288 | lynx
289 | leopard
290 | snow leopard
291 | jaguar
292 | lion
293 | tiger
294 | cheetah
295 | brown bear
296 | American black bear
297 | ice bear
298 | sloth bear
299 | mongoose
300 | meerkat
301 | tiger beetle
302 | ladybug
303 | ground beetle
304 | long-horned beetle
305 | leaf beetle
306 | dung beetle
307 | rhinoceros beetle
308 | weevil
309 | fly
310 | bee
311 | ant
312 | grasshopper
313 | cricket
314 | walking stick
315 | cockroach
316 | mantis
317 | cicada
318 | leafhopper
319 | lacewing
320 | dragonfly
321 | damselfly
322 | admiral
323 | ringlet
324 | monarch
325 | cabbage butterfly
326 | sulphur butterfly
327 | lycaenid
328 | starfish
329 | sea urchin
330 | sea cucumber
331 | wood rabbit
332 | hare
333 | Angora
334 | hamster
335 | porcupine
336 | fox squirrel
337 | marmot
338 | beaver
339 | guinea pig
340 | sorrel
341 | zebra
342 | hog
343 | wild boar
344 | warthog
345 | hippopotamus
346 | ox
347 | water buffalo
348 | bison
349 | ram
350 | bighorn
351 | ibex
352 | hartebeest
353 | impala
354 | gazelle
355 | Arabian camel
356 | llama
357 | weasel
358 | mink
359 | polecat
360 | black-footed ferret
361 | otter
362 | skunk
363 | badger
364 | armadillo
365 | three-toed sloth
366 | orangutan
367 | gorilla
368 | chimpanzee
369 | gibbon
370 | siamang
371 | guenon
372 | patas
373 | baboon
374 | macaque
375 | langur
376 | colobus
377 | proboscis monkey
378 | marmoset
379 | capuchin
380 | howler monkey
381 | titi
382 | spider monkey
383 | squirrel monkey
384 | Madagascar cat
385 | indri
386 | Indian elephant
387 | African elephant
388 | lesser panda
389 | giant panda
390 | barracouta
391 | eel
392 | coho
393 | rock beauty
394 | anemone fish
395 | sturgeon
396 | gar
397 | lionfish
398 | puffer
399 | abacus
400 | abaya
401 | academic gown
402 | accordion
403 | acoustic guitar
404 | aircraft carrier
405 | airliner
406 | airship
407 | altar
408 | ambulance
409 | amphibian
410 | analog clock
411 | apiary
412 | apron
413 | ashcan
414 | assault rifle
415 | backpack
416 | bakery
417 | balance beam
418 | balloon
419 | ballpoint
420 | Band Aid
421 | banjo
422 | bannister
423 | barbell
424 | barber chair
425 | barbershop
426 | barn
427 | barometer
428 | barrel
429 | barrow
430 | baseball
431 | basketball
432 | bassinet
433 | bassoon
434 | bathing cap
435 | bath towel
436 | bathtub
437 | beach wagon
438 | beacon
439 | beaker
440 | bearskin
441 | beer bottle
442 | beer glass
443 | bell cote
444 | bib
445 | bicycle-built-for-two
446 | bikini
447 | binder
448 | binoculars
449 | birdhouse
450 | boathouse
451 | bobsled
452 | bolo tie
453 | bonnet
454 | bookcase
455 | bookshop
456 | bottlecap
457 | bow
458 | bow tie
459 | brass
460 | brassiere
461 | breakwater
462 | breastplate
463 | broom
464 | bucket
465 | buckle
466 | bulletproof vest
467 | bullet train
468 | butcher shop
469 | cab
470 | caldron
471 | candle
472 | cannon
473 | canoe
474 | can opener
475 | cardigan
476 | car mirror
477 | carousel
478 | carpenter's kit
479 | carton
480 | car wheel
481 | cash machine
482 | cassette
483 | cassette player
484 | castle
485 | catamaran
486 | CD player
487 | cello
488 | cellular telephone
489 | chain
490 | chainlink fence
491 | chain mail
492 | chain saw
493 | chest
494 | chiffonier
495 | chime
496 | china cabinet
497 | Christmas stocking
498 | church
499 | cinema
500 | cleaver
501 | cliff dwelling
502 | cloak
503 | clog
504 | cocktail shaker
505 | coffee mug
506 | coffeepot
507 | coil
508 | combination lock
509 | computer keyboard
510 | confectionery
511 | container ship
512 | convertible
513 | corkscrew
514 | cornet
515 | cowboy boot
516 | cowboy hat
517 | cradle
518 | crane
519 | crash helmet
520 | crate
521 | crib
522 | Crock Pot
523 | croquet ball
524 | crutch
525 | cuirass
526 | dam
527 | desk
528 | desktop computer
529 | dial telephone
530 | diaper
531 | digital clock
532 | digital watch
533 | dining table
534 | dishrag
535 | dishwasher
536 | disk brake
537 | dock
538 | dogsled
539 | dome
540 | doormat
541 | drilling platform
542 | drum
543 | drumstick
544 | dumbbell
545 | Dutch oven
546 | electric fan
547 | electric guitar
548 | electric locomotive
549 | entertainment center
550 | envelope
551 | espresso maker
552 | face powder
553 | feather boa
554 | file
555 | fireboat
556 | fire engine
557 | fire screen
558 | flagpole
559 | flute
560 | folding chair
561 | football helmet
562 | forklift
563 | fountain
564 | fountain pen
565 | four-poster
566 | freight car
567 | French horn
568 | frying pan
569 | fur coat
570 | garbage truck
571 | gasmask
572 | gas pump
573 | goblet
574 | go-kart
575 | golf ball
576 | golfcart
577 | gondola
578 | gong
579 | gown
580 | grand piano
581 | greenhouse
582 | grille
583 | grocery store
584 | guillotine
585 | hair slide
586 | hair spray
587 | half track
588 | hammer
589 | hamper
590 | hand blower
591 | hand-held computer
592 | handkerchief
593 | hard disc
594 | harmonica
595 | harp
596 | harvester
597 | hatchet
598 | holster
599 | home theater
600 | honeycomb
601 | hook
602 | hoopskirt
603 | horizontal bar
604 | horse cart
605 | hourglass
606 | iPod
607 | iron
608 | jack-o'-lantern
609 | jean
610 | jeep
611 | jersey
612 | jigsaw puzzle
613 | jinrikisha
614 | joystick
615 | kimono
616 | knee pad
617 | knot
618 | lab coat
619 | ladle
620 | lampshade
621 | laptop
622 | lawn mower
623 | lens cap
624 | letter opener
625 | library
626 | lifeboat
627 | lighter
628 | limousine
629 | liner
630 | lipstick
631 | Loafer
632 | lotion
633 | loudspeaker
634 | loupe
635 | lumbermill
636 | magnetic compass
637 | mailbag
638 | mailbox
639 | maillot
640 | maillot
641 | manhole cover
642 | maraca
643 | marimba
644 | mask
645 | matchstick
646 | maypole
647 | maze
648 | measuring cup
649 | medicine chest
650 | megalith
651 | microphone
652 | microwave
653 | military uniform
654 | milk can
655 | minibus
656 | miniskirt
657 | minivan
658 | missile
659 | mitten
660 | mixing bowl
661 | mobile home
662 | Model T
663 | modem
664 | monastery
665 | monitor
666 | moped
667 | mortar
668 | mortarboard
669 | mosque
670 | mosquito net
671 | motor scooter
672 | mountain bike
673 | mountain tent
674 | mouse
675 | mousetrap
676 | moving van
677 | muzzle
678 | nail
679 | neck brace
680 | necklace
681 | nipple
682 | notebook
683 | obelisk
684 | oboe
685 | ocarina
686 | odometer
687 | oil filter
688 | organ
689 | oscilloscope
690 | overskirt
691 | oxcart
692 | oxygen mask
693 | packet
694 | paddle
695 | paddlewheel
696 | padlock
697 | paintbrush
698 | pajama
699 | palace
700 | panpipe
701 | paper towel
702 | parachute
703 | parallel bars
704 | park bench
705 | parking meter
706 | passenger car
707 | patio
708 | pay-phone
709 | pedestal
710 | pencil box
711 | pencil sharpener
712 | perfume
713 | Petri dish
714 | photocopier
715 | pick
716 | pickelhaube
717 | picket fence
718 | pickup
719 | pier
720 | piggy bank
721 | pill bottle
722 | pillow
723 | ping-pong ball
724 | pinwheel
725 | pirate
726 | pitcher
727 | plane
728 | planetarium
729 | plastic bag
730 | plate rack
731 | plow
732 | plunger
733 | Polaroid camera
734 | pole
735 | police van
736 | poncho
737 | pool table
738 | pop bottle
739 | pot
740 | potter's wheel
741 | power drill
742 | prayer rug
743 | printer
744 | prison
745 | projectile
746 | projector
747 | puck
748 | punching bag
749 | purse
750 | quill
751 | quilt
752 | racer
753 | racket
754 | radiator
755 | radio
756 | radio telescope
757 | rain barrel
758 | recreational vehicle
759 | reel
760 | reflex camera
761 | refrigerator
762 | remote control
763 | restaurant
764 | revolver
765 | rifle
766 | rocking chair
767 | rotisserie
768 | rubber eraser
769 | rugby ball
770 | rule
771 | running shoe
772 | safe
773 | safety pin
774 | saltshaker
775 | sandal
776 | sarong
777 | sax
778 | scabbard
779 | scale
780 | school bus
781 | schooner
782 | scoreboard
783 | screen
784 | screw
785 | screwdriver
786 | seat belt
787 | sewing machine
788 | shield
789 | shoe shop
790 | shoji
791 | shopping basket
792 | shopping cart
793 | shovel
794 | shower cap
795 | shower curtain
796 | ski
797 | ski mask
798 | sleeping bag
799 | slide rule
800 | sliding door
801 | slot
802 | snorkel
803 | snowmobile
804 | snowplow
805 | soap dispenser
806 | soccer ball
807 | sock
808 | solar dish
809 | sombrero
810 | soup bowl
811 | space bar
812 | space heater
813 | space shuttle
814 | spatula
815 | speedboat
816 | spider web
817 | spindle
818 | sports car
819 | spotlight
820 | stage
821 | steam locomotive
822 | steel arch bridge
823 | steel drum
824 | stethoscope
825 | stole
826 | stone wall
827 | stopwatch
828 | stove
829 | strainer
830 | streetcar
831 | stretcher
832 | studio couch
833 | stupa
834 | submarine
835 | suit
836 | sundial
837 | sunglass
838 | sunglasses
839 | sunscreen
840 | suspension bridge
841 | swab
842 | sweatshirt
843 | swimming trunks
844 | swing
845 | switch
846 | syringe
847 | table lamp
848 | tank
849 | tape player
850 | teapot
851 | teddy
852 | television
853 | tennis ball
854 | thatch
855 | theater curtain
856 | thimble
857 | thresher
858 | throne
859 | tile roof
860 | toaster
861 | tobacco shop
862 | toilet seat
863 | torch
864 | totem pole
865 | tow truck
866 | toyshop
867 | tractor
868 | trailer truck
869 | tray
870 | trench coat
871 | tricycle
872 | trimaran
873 | tripod
874 | triumphal arch
875 | trolleybus
876 | trombone
877 | tub
878 | turnstile
879 | typewriter keyboard
880 | umbrella
881 | unicycle
882 | upright
883 | vacuum
884 | vase
885 | vault
886 | velvet
887 | vending machine
888 | vestment
889 | viaduct
890 | violin
891 | volleyball
892 | waffle iron
893 | wall clock
894 | wallet
895 | wardrobe
896 | warplane
897 | washbasin
898 | washer
899 | water bottle
900 | water jug
901 | water tower
902 | whiskey jug
903 | whistle
904 | wig
905 | window screen
906 | window shade
907 | Windsor tie
908 | wine bottle
909 | wing
910 | wok
911 | wooden spoon
912 | wool
913 | worm fence
914 | wreck
915 | yawl
916 | yurt
917 | web site
918 | comic book
919 | crossword puzzle
920 | street sign
921 | traffic light
922 | book jacket
923 | menu
924 | plate
925 | guacamole
926 | consomme
927 | hot pot
928 | trifle
929 | ice cream
930 | ice lolly
931 | French loaf
932 | bagel
933 | pretzel
934 | cheeseburger
935 | hotdog
936 | mashed potato
937 | head cabbage
938 | broccoli
939 | cauliflower
940 | zucchini
941 | spaghetti squash
942 | acorn squash
943 | butternut squash
944 | cucumber
945 | artichoke
946 | bell pepper
947 | cardoon
948 | mushroom
949 | Granny Smith
950 | strawberry
951 | orange
952 | lemon
953 | fig
954 | pineapple
955 | banana
956 | jackfruit
957 | custard apple
958 | pomegranate
959 | hay
960 | carbonara
961 | chocolate sauce
962 | dough
963 | meat loaf
964 | pizza
965 | potpie
966 | burrito
967 | red wine
968 | espresso
969 | cup
970 | eggnog
971 | alp
972 | bubble
973 | cliff
974 | coral reef
975 | geyser
976 | lakeside
977 | promontory
978 | sandbar
979 | seashore
980 | valley
981 | volcano
982 | ballplayer
983 | groom
984 | scuba diver
985 | rapeseed
986 | daisy
987 | yellow lady's slipper
988 | corn
989 | acorn
990 | hip
991 | buckeye
992 | coral fungus
993 | agaric
994 | gyromitra
995 | stinkhorn
996 | earthstar
997 | hen-of-the-woods
998 | bolete
999 | ear
1000 | toilet tissue
--------------------------------------------------------------------------------
/api/main.py:
--------------------------------------------------------------------------------
1 | """
2 | Adapted from:
3 |
4 | (1) https://github.com/shanesoh/deploy-ml-fastapi-redis-docker/
5 | (2) https://github.com/aniketmaurya/tensorflow-fastapi-starter-pack
6 | """
7 |
8 | import json
9 | import urllib.request
10 |
11 | import onnxruntime as ort
12 | from fastapi import FastAPI, File, Form, HTTPException
13 |
14 | from utils import decode_predictions, get_latest_model_url, prepare_image
15 |
16 | app = FastAPI(title="ONNX image classification API")
17 |
18 | MODEL_FN = "resnet50_w_preprocessing.onnx"
19 | DEFAULT_MODEL_URL = f"https://github.com/sayakpaul/ml-deployment-k8s-fastapi/releases/download/v1.0.0/{MODEL_FN}"
20 |
21 |
22 | @app.get("/")
23 | async def home():
24 | return "Welcome!"
25 |
26 |
27 | @app.on_event("startup")
28 | def load_modules():
29 | model_url = get_latest_model_url()
30 |
31 | # If there's no latest ONNX model released fall back to the default model.
32 | if model_url is not None:
33 | urllib.request.urlretrieve(model_url, MODEL_FN)
34 | else:
35 | urllib.request.urlretrieve(DEFAULT_MODEL_URL, MODEL_FN)
36 |
37 | global resnet_model_sess
38 | resnet_model_sess = ort.InferenceSession(MODEL_FN)
39 |
40 | category_filename = "imagenet_classes.txt"
41 | category_url = f"https://raw.githubusercontent.com/pytorch/hub/master/{category_filename}"
42 | urllib.request.urlretrieve(category_url, category_filename)
43 |
44 | global imagenet_categories
45 | with open(category_filename, "r") as f:
46 | imagenet_categories = [s.strip() for s in f.readlines()]
47 |
48 |
49 | @app.post("/predict/image")
50 | async def predict_api(
51 | image_file: bytes = File(...),
52 | with_resize: bool = Form(...),
53 | with_post_process: bool = Form(...),
54 | ):
55 | image = prepare_image(image_file, with_resize)
56 |
57 | if len(image.shape) != 4:
58 | raise HTTPException(
59 | status_code=400, detail="Only 3-channel RGB images are supported."
60 | )
61 |
62 | predictions = resnet_model_sess.run(None, {"image_input": image})[0]
63 | if with_post_process:
64 | response_dict = decode_predictions(predictions, imagenet_categories)
65 | return json.dumps(response_dict)
66 | else:
67 | return "OK"
68 |
--------------------------------------------------------------------------------
/api/requirements.txt:
--------------------------------------------------------------------------------
1 | numpy==1.22.2
2 | uvicorn[standard]==0.17.5
3 | Pillow==9.0.1
4 | onnxruntime==1.10.0
5 | fastapi==0.74.0
6 | python-multipart==0.0.5
7 | pydantic==1.9.0
8 | PyGithub[integrations]==1.55
--------------------------------------------------------------------------------
/api/utils/__init__.py:
--------------------------------------------------------------------------------
1 | from .utils import decode_predictions, get_latest_model_url, prepare_image
2 |
--------------------------------------------------------------------------------
/api/utils/utils.py:
--------------------------------------------------------------------------------
1 | import io
2 | import json
3 | from typing import Dict, List
4 |
5 | import numpy as np
6 | import requests
7 | from fastapi import HTTPException
8 | from github import Github
9 | from PIL import Image
10 |
11 | TARGET_IMG_WIDTH = 224
12 | TARGET_IMG_HEIGHT = 224
13 |
14 |
15 | def get_latest_model_url() -> str:
16 | """Gets the model download URL from the latest release artifacts."""
17 | g = Github()
18 |
19 | repo = g.get_repo("sayakpaul/ml-deployment-k8s-fastapi")
20 | latest_release = repo.get_latest_release()
21 | assets = list(latest_release.get_assets())
22 |
23 | download_url = None
24 |
25 | for asset in assets:
26 | if "onnx" in asset.name:
27 | asset_url = asset.url
28 | r = requests.get(asset_url)
29 | response = json.loads(r.text)
30 | download_url = response["browser_download_url"]
31 |
32 | return download_url
33 |
34 |
35 | def raise_http_exception(msg):
36 | """Raise HTTPException with the status code 400"""
37 | raise HTTPException(status_code=400, detail=msg)
38 |
39 |
40 | def prepare_image(image_file: bytes, with_resizing: bool = False) -> np.ndarray:
41 | """Prepares an image for model prediction."""
42 | image = Image.open(io.BytesIO(image_file))
43 | width, height = image.size
44 |
45 | if image.format not in ["JPEG", "JPG", "PNG"]:
46 | raise_http_exception("Supported formats are JPEG, JPG, and PNG.")
47 |
48 | if with_resizing:
49 | image = image.resize((TARGET_IMG_WIDTH, TARGET_IMG_HEIGHT))
50 | else:
51 | if width is not TARGET_IMG_WIDTH or height is not TARGET_IMG_HEIGHT:
52 | raise_http_exception("Image size is not 224x224")
53 |
54 | image = np.array(image).astype("float32")
55 | return np.expand_dims(image, 0)
56 |
57 |
58 | def decode_predictions(
59 | predictions: np.ndarray, imagenet_categories: List[str]
60 | ) -> Dict[str, float]:
61 | """Decodes model predictions."""
62 | predictions = np.squeeze(predictions)
63 | pred_name = imagenet_categories[int(predictions.argmax())]
64 | response_dict = {"Label": pred_name, "Score": f"{predictions.max():.3f}"}
65 |
66 | return response_dict
67 |
--------------------------------------------------------------------------------
/locust/README.md:
--------------------------------------------------------------------------------
1 | # Load Test with Locust
2 |
3 | This directory contains a Locust script for load testing.
4 |
5 | ## How to setup
6 |
7 | 1. Installation
8 |
9 | ```python
10 | pip3 install locust
11 | ```
12 |
13 | 2. Run
14 |
15 | ```bash
16 | # with UI
17 | $ locust
18 |
19 | OR
20 |
21 | $ locust --users NUM_OF_USERS \
22 | --spawn-rate SPAWN_RATE \
23 | --host HOST_ADDRESS
24 |
25 | # without UI & manual config
26 | # the report will be generated to report.html
27 | $ locust --headless \
28 | --users NUM_OF_USERS \
29 | --spawn-rate SPAWN_RATE \
30 | --host HOST_ADDRESS \
31 | --html report.html
32 |
33 | # without UI & auto config
34 | $ locust --config=load_test.conf
35 | ```
36 |
37 | ## Notes
38 |
39 | * We used an `n1-standard` VM (4vCPU + 16GB RAM) on GCP in `us-central1` region
40 | since the nodes on GKE are also located there.
41 | * Before running the load-test, don't forget to replace `<>` with the endpoint of
42 | your API in the `load_test.conf`.
43 | * We prepare a resized image beforehand whose size is 224x224 (`cat_224x224.jpg`).
44 | This is because we only focus on load testing on the server side thereby minimizing
45 | the time for pre and post processing as much as possible.
46 |
--------------------------------------------------------------------------------
/locust/cat.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sayakpaul/ml-deployment-k8s-fastapi/55b690520b943bf39dbb02a7f28fc1aba781e945/locust/cat.jpg
--------------------------------------------------------------------------------
/locust/cat_224x224.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sayakpaul/ml-deployment-k8s-fastapi/55b690520b943bf39dbb02a7f28fc1aba781e945/locust/cat_224x224.jpg
--------------------------------------------------------------------------------
/locust/load_test.conf:
--------------------------------------------------------------------------------
1 | locustfile = locust.py
2 | headless = false
3 | users = 150
4 | spawn-rate = 1
5 | run-time = 5m
6 | host = http://<>
7 | html = reports/locust_report.html
8 | csv = reports/locust_report
--------------------------------------------------------------------------------
/locust/locust.py:
--------------------------------------------------------------------------------
1 | from locust import HttpUser, constant, task
2 |
3 |
4 | class ImgClssificationUser(HttpUser):
5 | wait_time = constant(1)
6 |
7 | @task
8 | def predict(self):
9 | attach = open("cat_224x224.jpg", "rb")
10 | payload = {"with_resize": False, "with_post_process": False}
11 | _ = self.client.post(
12 | "/predict/image", files={"image_file": attach}, data=payload
13 | )
14 |
--------------------------------------------------------------------------------
/notebooks/TF_Serving.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {
6 | "id": "6FvzT_HQXz9J"
7 | },
8 | "source": [
9 | "# Verifying workable TF Serving\n",
10 | "\n",
11 | "This tutorial shows:\n",
12 | "- how to run TF Serving for a custom model in Docker container\n",
13 | "- how to request for predictions via both gRPC and RestAPI calls\n",
14 | "- the prediction timing result from TF Serving\n",
15 | "\n",
16 | "This notebook is written by referencing the [official TF Serving gRPC example](https://github.com/tensorflow/serving/blob/master/tensorflow_serving/example/resnet_k8s.yaml) and [official TF Serving RestAPI example](https://www.tensorflow.org/tfx/tutorials/serving/rest_simple)."
17 | ]
18 | },
19 | {
20 | "cell_type": "markdown",
21 | "metadata": {
22 | "id": "Com8Mcu2Xz9L"
23 | },
24 | "source": [
25 | "### Imports"
26 | ]
27 | },
28 | {
29 | "cell_type": "code",
30 | "execution_count": null,
31 | "metadata": {
32 | "id": "b-aGIWy8c2Ht"
33 | },
34 | "outputs": [],
35 | "source": [
36 | "!pip install -q requests\n",
37 | "!pip install -q tensorflow-serving-api"
38 | ]
39 | },
40 | {
41 | "cell_type": "code",
42 | "execution_count": 2,
43 | "metadata": {
44 | "id": "6lQVylcMXz9N"
45 | },
46 | "outputs": [],
47 | "source": [
48 | "import os\n",
49 | "import tempfile\n",
50 | "import pandas as pd\n",
51 | "import tensorflow as tf\n",
52 | "import numpy as np\n",
53 | "import json\n",
54 | "import requests\n",
55 | "\n",
56 | "# gRPC request specific imports\n",
57 | "import grpc\n",
58 | "from tensorflow_serving.apis import predict_pb2\n",
59 | "from tensorflow_serving.apis import prediction_service_pb2_grpc"
60 | ]
61 | },
62 | {
63 | "cell_type": "markdown",
64 | "metadata": {
65 | "id": "GoIj2728pLyw"
66 | },
67 | "source": [
68 | "## Model"
69 | ]
70 | },
71 | {
72 | "cell_type": "markdown",
73 | "metadata": {
74 | "id": "3xmYCIWpXz9N"
75 | },
76 | "source": [
77 | "### Get a sample model \n",
78 | "\n",
79 | "The target model is the plain `ResNet50` trained on ImageNet."
80 | ]
81 | },
82 | {
83 | "cell_type": "code",
84 | "execution_count": 3,
85 | "metadata": {
86 | "colab": {
87 | "base_uri": "https://localhost:8080/"
88 | },
89 | "id": "VysQtJQnXz9O",
90 | "outputId": "c63abf81-65e7-48c9-9e71-d16108da2d2a"
91 | },
92 | "outputs": [
93 | {
94 | "name": "stdout",
95 | "output_type": "stream",
96 | "text": [
97 | "Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/resnet/resnet50_weights_tf_dim_ordering_tf_kernels.h5\n",
98 | "102973440/102967424 [==============================] - 2s 0us/step\n",
99 | "102981632/102967424 [==============================] - 2s 0us/step\n"
100 | ]
101 | }
102 | ],
103 | "source": [
104 | "core = tf.keras.applications.ResNet50(include_top=True, input_shape=(224, 224, 3))\n",
105 | "\n",
106 | "inputs = tf.keras.layers.Input(shape=(224, 224, 3), name=\"image_input\")\n",
107 | "preprocess = tf.keras.applications.resnet50.preprocess_input(inputs)\n",
108 | "outputs = core(preprocess, training=False)\n",
109 | "model = tf.keras.Model(inputs=[inputs], outputs=[outputs])"
110 | ]
111 | },
112 | {
113 | "cell_type": "markdown",
114 | "metadata": {
115 | "id": "p3bC--0GXz9O"
116 | },
117 | "source": [
118 | "### Save the model\n",
119 | "\n",
120 | "Below code saves the model under `MODEL_DIR`."
121 | ]
122 | },
123 | {
124 | "cell_type": "code",
125 | "execution_count": 4,
126 | "metadata": {
127 | "colab": {
128 | "base_uri": "https://localhost:8080/"
129 | },
130 | "id": "z9AmyovhXz9O",
131 | "outputId": "c26eadf0-e06e-45e4-a40d-e00b5343154e"
132 | },
133 | "outputs": [
134 | {
135 | "name": "stdout",
136 | "output_type": "stream",
137 | "text": [
138 | "export_path = /tmp/1\n",
139 | "\n",
140 | "WARNING:tensorflow:Compiled the loaded model, but the compiled metrics have yet to be built. `model.compile_metrics` will be empty until you train or evaluate the model.\n",
141 | "INFO:tensorflow:Assets written to: /tmp/1/assets\n",
142 | "\n",
143 | "Saved model:\n",
144 | "total 4040\n",
145 | "drwxr-xr-x 2 root root 4096 Mar 23 07:32 assets\n",
146 | "-rw-r--r-- 1 root root 557217 Mar 23 07:32 keras_metadata.pb\n",
147 | "-rw-r--r-- 1 root root 3565545 Mar 23 07:32 saved_model.pb\n",
148 | "drwxr-xr-x 2 root root 4096 Mar 23 07:32 variables\n"
149 | ]
150 | }
151 | ],
152 | "source": [
153 | "MODEL_DIR = tempfile.gettempdir()\n",
154 | "version = 1\n",
155 | "export_path = os.path.join(MODEL_DIR, str(version))\n",
156 | "print('export_path = {}\\n'.format(export_path))\n",
157 | "\n",
158 | "tf.keras.models.save_model(\n",
159 | " model,\n",
160 | " export_path,\n",
161 | " overwrite=True,\n",
162 | " include_optimizer=True,\n",
163 | " save_format=None,\n",
164 | " signatures=None,\n",
165 | " options=None\n",
166 | ")\n",
167 | "\n",
168 | "print('\\nSaved model:')\n",
169 | "!ls -l {export_path}"
170 | ]
171 | },
172 | {
173 | "cell_type": "markdown",
174 | "metadata": {
175 | "id": "VV7onOD2Xz9P"
176 | },
177 | "source": [
178 | "### Examine your saved model"
179 | ]
180 | },
181 | {
182 | "cell_type": "markdown",
183 | "metadata": {
184 | "id": "baanYnt8ohM7"
185 | },
186 | "source": [
187 | "TensorFlow comes with a handy `saved_model_cli` tool to investigate saved model.\n",
188 | "\n",
189 | "Notice from `signature_def['serving_default']:` \n",
190 | "- the input name is `image_input`\n",
191 | "- the output name is `resnet50`\n",
192 | "\n",
193 | "You need to know these to make requests to the TF Serving server later"
194 | ]
195 | },
196 | {
197 | "cell_type": "code",
198 | "execution_count": 5,
199 | "metadata": {
200 | "colab": {
201 | "base_uri": "https://localhost:8080/"
202 | },
203 | "id": "Lgzz06XoXz9Q",
204 | "outputId": "c51a85f9-c6bf-4e7e-f710-2a572fde45d6"
205 | },
206 | "outputs": [
207 | {
208 | "name": "stdout",
209 | "output_type": "stream",
210 | "text": [
211 | "\n",
212 | "MetaGraphDef with tag-set: 'serve' contains the following SignatureDefs:\n",
213 | "\n",
214 | "signature_def['__saved_model_init_op']:\n",
215 | " The given SavedModel SignatureDef contains the following input(s):\n",
216 | " The given SavedModel SignatureDef contains the following output(s):\n",
217 | " outputs['__saved_model_init_op'] tensor_info:\n",
218 | " dtype: DT_INVALID\n",
219 | " shape: unknown_rank\n",
220 | " name: NoOp\n",
221 | " Method name is: \n",
222 | "\n",
223 | "signature_def['serving_default']:\n",
224 | " The given SavedModel SignatureDef contains the following input(s):\n",
225 | " inputs['image_input'] tensor_info:\n",
226 | " dtype: DT_FLOAT\n",
227 | " shape: (-1, 224, 224, 3)\n",
228 | " name: serving_default_image_input:0\n",
229 | " The given SavedModel SignatureDef contains the following output(s):\n",
230 | " outputs['resnet50'] tensor_info:\n",
231 | " dtype: DT_FLOAT\n",
232 | " shape: (-1, 1000)\n",
233 | " name: StatefulPartitionedCall:0\n",
234 | " Method name is: tensorflow/serving/predict\n",
235 | "\n",
236 | "Concrete Functions:\n",
237 | " Function Name: '__call__'\n",
238 | " Option #1\n",
239 | " Callable with:\n",
240 | " Argument #1\n",
241 | " inputs: TensorSpec(shape=(None, 224, 224, 3), dtype=tf.float32, name='inputs')\n",
242 | " Argument #2\n",
243 | " DType: bool\n",
244 | " Value: False\n",
245 | " Argument #3\n",
246 | " DType: NoneType\n",
247 | " Value: None\n",
248 | " Option #2\n",
249 | " Callable with:\n",
250 | " Argument #1\n",
251 | " image_input: TensorSpec(shape=(None, 224, 224, 3), dtype=tf.float32, name='image_input')\n",
252 | " Argument #2\n",
253 | " DType: bool\n",
254 | " Value: False\n",
255 | " Argument #3\n",
256 | " DType: NoneType\n",
257 | " Value: None\n",
258 | " Option #3\n",
259 | " Callable with:\n",
260 | " Argument #1\n",
261 | " inputs: TensorSpec(shape=(None, 224, 224, 3), dtype=tf.float32, name='inputs')\n",
262 | " Argument #2\n",
263 | " DType: bool\n",
264 | " Value: True\n",
265 | " Argument #3\n",
266 | " DType: NoneType\n",
267 | " Value: None\n",
268 | " Option #4\n",
269 | " Callable with:\n",
270 | " Argument #1\n",
271 | " image_input: TensorSpec(shape=(None, 224, 224, 3), dtype=tf.float32, name='image_input')\n",
272 | " Argument #2\n",
273 | " DType: bool\n",
274 | " Value: True\n",
275 | " Argument #3\n",
276 | " DType: NoneType\n",
277 | " Value: None\n",
278 | "\n",
279 | " Function Name: '_default_save_signature'\n",
280 | " Option #1\n",
281 | " Callable with:\n",
282 | " Argument #1\n",
283 | " image_input: TensorSpec(shape=(None, 224, 224, 3), dtype=tf.float32, name='image_input')\n",
284 | "\n",
285 | " Function Name: 'call_and_return_all_conditional_losses'\n",
286 | " Option #1\n",
287 | " Callable with:\n",
288 | " Argument #1\n",
289 | " image_input: TensorSpec(shape=(None, 224, 224, 3), dtype=tf.float32, name='image_input')\n",
290 | " Argument #2\n",
291 | " DType: bool\n",
292 | " Value: True\n",
293 | " Argument #3\n",
294 | " DType: NoneType\n",
295 | " Value: None\n",
296 | " Option #2\n",
297 | " Callable with:\n",
298 | " Argument #1\n",
299 | " image_input: TensorSpec(shape=(None, 224, 224, 3), dtype=tf.float32, name='image_input')\n",
300 | " Argument #2\n",
301 | " DType: bool\n",
302 | " Value: False\n",
303 | " Argument #3\n",
304 | " DType: NoneType\n",
305 | " Value: None\n",
306 | " Option #3\n",
307 | " Callable with:\n",
308 | " Argument #1\n",
309 | " inputs: TensorSpec(shape=(None, 224, 224, 3), dtype=tf.float32, name='inputs')\n",
310 | " Argument #2\n",
311 | " DType: bool\n",
312 | " Value: True\n",
313 | " Argument #3\n",
314 | " DType: NoneType\n",
315 | " Value: None\n",
316 | " Option #4\n",
317 | " Callable with:\n",
318 | " Argument #1\n",
319 | " inputs: TensorSpec(shape=(None, 224, 224, 3), dtype=tf.float32, name='inputs')\n",
320 | " Argument #2\n",
321 | " DType: bool\n",
322 | " Value: False\n",
323 | " Argument #3\n",
324 | " DType: NoneType\n",
325 | " Value: None\n"
326 | ]
327 | }
328 | ],
329 | "source": [
330 | "!saved_model_cli show --dir {export_path} --all"
331 | ]
332 | },
333 | {
334 | "cell_type": "markdown",
335 | "metadata": {
336 | "id": "5NBTdC7jXz9Q"
337 | },
338 | "source": [
339 | "## TF Serving"
340 | ]
341 | },
342 | {
343 | "cell_type": "markdown",
344 | "metadata": {
345 | "id": "g4G_oWb_plDI"
346 | },
347 | "source": [
348 | "### Create dummy data\n",
349 | "\n",
350 | "The dummy data is nothing but just contains random numbers in the batch size of 32."
351 | ]
352 | },
353 | {
354 | "cell_type": "code",
355 | "execution_count": 6,
356 | "metadata": {
357 | "colab": {
358 | "base_uri": "https://localhost:8080/"
359 | },
360 | "id": "loMcfbfTXz9S",
361 | "outputId": "0b18aed6-af3b-4e06-b5ae-e336305b0d5e"
362 | },
363 | "outputs": [
364 | {
365 | "data": {
366 | "text/plain": [
367 | "TensorShape([32, 224, 224, 3])"
368 | ]
369 | },
370 | "execution_count": 6,
371 | "metadata": {},
372 | "output_type": "execute_result"
373 | }
374 | ],
375 | "source": [
376 | "dummy_inputs = tf.random.normal((32, 224, 224, 3))\n",
377 | "dummy_inputs.shape"
378 | ]
379 | },
380 | {
381 | "cell_type": "markdown",
382 | "metadata": {
383 | "id": "ZBxKquAwpCEh"
384 | },
385 | "source": [
386 | "### Install TF Serving tool"
387 | ]
388 | },
389 | {
390 | "cell_type": "code",
391 | "execution_count": null,
392 | "metadata": {
393 | "id": "mDVz8VnnXz9Q"
394 | },
395 | "outputs": [],
396 | "source": [
397 | "!echo \"deb http://storage.googleapis.com/tensorflow-serving-apt stable tensorflow-model-server tensorflow-model-server-universal\" | sudo tee /etc/apt/sources.list.d/tensorflow-serving.list && \\\n",
398 | "curl https://storage.googleapis.com/tensorflow-serving-apt/tensorflow-serving.release.pub.gpg | sudo apt-key add -\n",
399 | "!sudo apt update"
400 | ]
401 | },
402 | {
403 | "cell_type": "code",
404 | "execution_count": null,
405 | "metadata": {
406 | "id": "u2HQb4q6sonS"
407 | },
408 | "outputs": [],
409 | "source": [
410 | "!sudo apt-get install tensorflow-model-server"
411 | ]
412 | },
413 | {
414 | "cell_type": "markdown",
415 | "metadata": {
416 | "id": "jn9oNYM7pYcv"
417 | },
418 | "source": [
419 | "### Run TF Serving server"
420 | ]
421 | },
422 | {
423 | "cell_type": "code",
424 | "execution_count": 26,
425 | "metadata": {
426 | "id": "MH-RScSvXz9R"
427 | },
428 | "outputs": [],
429 | "source": [
430 | "os.environ[\"MODEL_DIR\"] = MODEL_DIR"
431 | ]
432 | },
433 | {
434 | "cell_type": "markdown",
435 | "metadata": {},
436 | "source": [
437 | "`saved_model_cli` CLI accepts a set of options.\n",
438 | "- `--rest_api_port` exposes additional port for RestAPI. By default `8500` is exposed as gRPC.\n",
439 | "- `--model_name` lets TF Serving to identify which model to access. You can visually see this in the RestAPI's URI.\n",
440 | "- `--enable_model_warmup` \n",
441 | " - The TensorFlow runtime has components that are lazily initialized, which can cause high latency for the first request/s sent to a model after it is loaded. To reduce the impact of lazy initialization on request latency, it's possible to trigger the initialization of the sub-systems and components at model load time by providing a sample set of inference requests along with the SavedModel. This process is known as \"warming up\" the model.\n",
442 | " - To trigger warmup of the model at load time, attach a warmup data file under the assets.extra subfolder of the SavedModel directory.\n",
443 | " - `--enable_model_warmup` option triggers this process.\n",
444 | " - for further information, please look at the [official document](https://www.tensorflow.org/tfx/serving/saved_model_warmup?hl=en)"
445 | ]
446 | },
447 | {
448 | "cell_type": "code",
449 | "execution_count": null,
450 | "metadata": {
451 | "id": "Mq4t5ozVXz9R"
452 | },
453 | "outputs": [],
454 | "source": [
455 | "!nohup tensorflow_model_server \\\n",
456 | " --rest_api_port=8501 \\\n",
457 | " --model_name=resnet_model \\\n",
458 | " --model_base_path=$MODEL_DIR >server.log 2>&1 &\n",
459 | "\n",
460 | "# --enable_model_warmup for warmup(https://www.tensorflow.org/tfx/serving/saved_model_warmup)"
461 | ]
462 | },
463 | {
464 | "cell_type": "code",
465 | "execution_count": 28,
466 | "metadata": {
467 | "id": "PVhTO53jXz9S"
468 | },
469 | "outputs": [],
470 | "source": [
471 | "!cat server.log"
472 | ]
473 | },
474 | {
475 | "cell_type": "markdown",
476 | "metadata": {
477 | "id": "ea6V73oXzs3U"
478 | },
479 | "source": [
480 | "Notice that two ports are exposed for listening both RestAPI(`8501`) and gRPC(`8500`)."
481 | ]
482 | },
483 | {
484 | "cell_type": "code",
485 | "execution_count": 29,
486 | "metadata": {
487 | "colab": {
488 | "base_uri": "https://localhost:8080/"
489 | },
490 | "id": "KumZ3xB4giEa",
491 | "outputId": "4302ee0a-994f-485a-c99b-4fa394068d64"
492 | },
493 | "outputs": [
494 | {
495 | "name": "stdout",
496 | "output_type": "stream",
497 | "text": [
498 | "node 7 root 21u IPv6 25789 0t0 TCP *:8080 (LISTEN)\n",
499 | "colab-fil 30 root 5u IPv4 26644 0t0 TCP *:3453 (LISTEN)\n",
500 | "colab-fil 30 root 6u IPv6 26645 0t0 TCP *:3453 (LISTEN)\n",
501 | "jupyter-n 43 root 6u IPv4 25864 0t0 TCP 172.28.0.2:9000 (LISTEN)\n",
502 | "python3 61 root 15u IPv4 27814 0t0 TCP 127.0.0.1:50215 (LISTEN)\n",
503 | "python3 61 root 18u IPv4 27818 0t0 TCP 127.0.0.1:54779 (LISTEN)\n",
504 | "python3 61 root 21u IPv4 27822 0t0 TCP 127.0.0.1:40395 (LISTEN)\n",
505 | "python3 61 root 24u IPv4 27826 0t0 TCP 127.0.0.1:60517 (LISTEN)\n",
506 | "python3 61 root 30u IPv4 27832 0t0 TCP 127.0.0.1:40255 (LISTEN)\n",
507 | "python3 61 root 43u IPv4 28831 0t0 TCP 127.0.0.1:53235 (LISTEN)\n",
508 | "python3 81 root 3u IPv4 29267 0t0 TCP 127.0.0.1:15144 (LISTEN)\n",
509 | "python3 81 root 5u IPv4 28223 0t0 TCP 127.0.0.1:42197 (LISTEN)\n",
510 | "python3 81 root 9u IPv4 28356 0t0 TCP 127.0.0.1:41627 (LISTEN)\n",
511 | "tensorflo 5933 root 5u IPv4 66554 0t0 TCP *:8500 (LISTEN)\n",
512 | "tensorflo 5933 root 12u IPv4 66559 0t0 TCP *:8501 (LISTEN)\n"
513 | ]
514 | }
515 | ],
516 | "source": [
517 | "!sudo lsof -i -P -n | grep LISTEN"
518 | ]
519 | },
520 | {
521 | "cell_type": "markdown",
522 | "metadata": {
523 | "id": "mvHRnTmppqn9"
524 | },
525 | "source": [
526 | "## RestAPI request"
527 | ]
528 | },
529 | {
530 | "cell_type": "markdown",
531 | "metadata": {
532 | "id": "1QNMoU3qq2fN"
533 | },
534 | "source": [
535 | "### Convert dummy data in JSON format"
536 | ]
537 | },
538 | {
539 | "cell_type": "code",
540 | "execution_count": 30,
541 | "metadata": {
542 | "colab": {
543 | "base_uri": "https://localhost:8080/"
544 | },
545 | "id": "xDw6gT7lXz9S",
546 | "outputId": "19e82468-1441-4dae-c514-9d8b78f53240"
547 | },
548 | "outputs": [
549 | {
550 | "name": "stdout",
551 | "output_type": "stream",
552 | "text": [
553 | "Data: {\"signature_name\": \"serving_default\", \"instances\": ... 442383, 0.8007770776748657, -0.7472004890441895]]]]}\n"
554 | ]
555 | }
556 | ],
557 | "source": [
558 | "data = json.dumps({\"signature_name\": \"serving_default\", \"instances\": dummy_inputs.numpy().tolist()})\n",
559 | "print('Data: {} ... {}'.format(data[:50], data[len(data)-52:]))"
560 | ]
561 | },
562 | {
563 | "cell_type": "markdown",
564 | "metadata": {
565 | "id": "hzlArynZq-dF"
566 | },
567 | "source": [
568 | "### Make a request"
569 | ]
570 | },
571 | {
572 | "cell_type": "code",
573 | "execution_count": 31,
574 | "metadata": {
575 | "id": "hh6vmxqnXz9T"
576 | },
577 | "outputs": [],
578 | "source": [
579 | "headers = {\"content-type\": \"application/json\"}"
580 | ]
581 | },
582 | {
583 | "cell_type": "code",
584 | "execution_count": 32,
585 | "metadata": {
586 | "colab": {
587 | "base_uri": "https://localhost:8080/"
588 | },
589 | "id": "fS_DI5QpdZTg",
590 | "outputId": "6b93ca46-1047-4b76-bd84-d21f95f9f4e9"
591 | },
592 | "outputs": [
593 | {
594 | "name": "stdout",
595 | "output_type": "stream",
596 | "text": [
597 | "1 loop, best of 5: 4.11 s per loop\n"
598 | ]
599 | }
600 | ],
601 | "source": [
602 | "%%timeit\n",
603 | "json_response = requests.post('http://localhost:8501/v1/models/resnet_model:predict', \n",
604 | " data=data, headers=headers)"
605 | ]
606 | },
607 | {
608 | "cell_type": "markdown",
609 | "metadata": {
610 | "id": "_n8urXddrJp0"
611 | },
612 | "source": [
613 | "### Interpret the output"
614 | ]
615 | },
616 | {
617 | "cell_type": "code",
618 | "execution_count": 36,
619 | "metadata": {
620 | "colab": {
621 | "base_uri": "https://localhost:8080/"
622 | },
623 | "id": "brI4TCETXz9T",
624 | "outputId": "7a098027-fb8a-4bfd-96dc-0bf74d26af09"
625 | },
626 | "outputs": [
627 | {
628 | "name": "stdout",
629 | "output_type": "stream",
630 | "text": [
631 | "Prediction class: [664 664 664 664 664 664 664 664 664 664 664 664 664 664 664 664 664 664\n",
632 | " 664 664 664 664 664 664 664 851 664 664 851 664 664 664]\n"
633 | ]
634 | }
635 | ],
636 | "source": [
637 | "json_response = requests.post('http://localhost:8501/v1/models/resnet_model:predict', \n",
638 | " data=data, headers=headers)\n",
639 | "rest_predictions = json.loads(json_response.text)['predictions']\n",
640 | "print('Prediction class: {}'.format(np.argmax(rest_predictions, axis=-1)))"
641 | ]
642 | },
643 | {
644 | "cell_type": "markdown",
645 | "metadata": {
646 | "id": "lwuaqGVud58k"
647 | },
648 | "source": [
649 | "## gRPC request"
650 | ]
651 | },
652 | {
653 | "cell_type": "markdown",
654 | "metadata": {
655 | "id": "yr7vO8BQrP2S"
656 | },
657 | "source": [
658 | "### Open up gRPC channel"
659 | ]
660 | },
661 | {
662 | "cell_type": "code",
663 | "execution_count": 37,
664 | "metadata": {
665 | "id": "Y1cxieBDfyjK"
666 | },
667 | "outputs": [],
668 | "source": [
669 | "channel = grpc.insecure_channel('localhost:8500')\n",
670 | "stub = prediction_service_pb2_grpc.PredictionServiceStub(channel)"
671 | ]
672 | },
673 | {
674 | "cell_type": "markdown",
675 | "metadata": {
676 | "id": "fS5b0VVfrTmF"
677 | },
678 | "source": [
679 | "### Prepare a request"
680 | ]
681 | },
682 | {
683 | "cell_type": "code",
684 | "execution_count": 38,
685 | "metadata": {
686 | "id": "2QD8xK47emy5"
687 | },
688 | "outputs": [],
689 | "source": [
690 | "request = predict_pb2.PredictRequest()\n",
691 | "request.model_spec.name = 'resnet_model'\n",
692 | "request.model_spec.signature_name = 'serving_default'\n",
693 | "request.inputs['image_input'].CopyFrom(\n",
694 | " tf.make_tensor_proto(dummy_inputs)) #, shape=[32,224,224,3]))"
695 | ]
696 | },
697 | {
698 | "cell_type": "markdown",
699 | "metadata": {
700 | "id": "7UztXjZGrYTf"
701 | },
702 | "source": [
703 | "### Make a request"
704 | ]
705 | },
706 | {
707 | "cell_type": "code",
708 | "execution_count": 39,
709 | "metadata": {
710 | "colab": {
711 | "base_uri": "https://localhost:8080/"
712 | },
713 | "id": "wvslcT5_f4P6",
714 | "outputId": "e4a1008a-dad5-4f8f-a0f6-f9b68aa7db67"
715 | },
716 | "outputs": [
717 | {
718 | "name": "stdout",
719 | "output_type": "stream",
720 | "text": [
721 | "1 loop, best of 5: 3.63 s per loop\n"
722 | ]
723 | }
724 | ],
725 | "source": [
726 | "%%timeit\n",
727 | "result = stub.Predict(request, 10.0) # 10 secs timeout"
728 | ]
729 | },
730 | {
731 | "cell_type": "markdown",
732 | "metadata": {
733 | "id": "o52MnidprdCY"
734 | },
735 | "source": [
736 | "### Interpret the output"
737 | ]
738 | },
739 | {
740 | "cell_type": "code",
741 | "execution_count": 40,
742 | "metadata": {
743 | "colab": {
744 | "base_uri": "https://localhost:8080/"
745 | },
746 | "id": "TBfd4TG0f5z6",
747 | "outputId": "ffc40a16-787e-4146-9c1e-27317d10867f"
748 | },
749 | "outputs": [
750 | {
751 | "name": "stdout",
752 | "output_type": "stream",
753 | "text": [
754 | "Prediction class: [664 664 664 664 664 664 664 664 664 664 664 664 664 664 664 664 664 664\n",
755 | " 664 664 664 664 664 664 664 851 664 664 851 664 664 664]\n"
756 | ]
757 | }
758 | ],
759 | "source": [
760 | "grpc_predictions = stub.Predict(request, 10.0) # 10 secs timeout\n",
761 | "grpc_predictions = grpc_predictions.outputs['resnet50'].float_val\n",
762 | "grpc_predictions = np.array(grpc_predictions).reshape(32, -1)\n",
763 | "print('Prediction class: {}'.format(np.argmax(grpc_predictions, axis=-1)))"
764 | ]
765 | },
766 | {
767 | "cell_type": "markdown",
768 | "metadata": {
769 | "id": "_dVHrF1ksyAc"
770 | },
771 | "source": [
772 | "## Compare the two results if they are identical\n",
773 | "\n",
774 | "`np.testing.assert_allclose` raises exception when the given two arrays do not match exactly."
775 | ]
776 | },
777 | {
778 | "cell_type": "code",
779 | "execution_count": 41,
780 | "metadata": {
781 | "id": "UA4iKEcpioc8"
782 | },
783 | "outputs": [],
784 | "source": [
785 | "np.testing.assert_allclose(rest_predictions, grpc_predictions, atol=1e-4)"
786 | ]
787 | },
788 | {
789 | "cell_type": "markdown",
790 | "metadata": {
791 | "id": "9-UcGFM0z65y"
792 | },
793 | "source": [
794 | "## Conclusion\n",
795 | "\n",
796 | "gRPC call took about 3.64 seconds while RestAPI call took about 4.11 seconds on the data of the batch size of 32. This let use conclude that gRPC call is much faster than RestAPI. \n",
797 | "\n",
798 | "Also note that this is very close performance comparing to the ONNX inference without any Server framework involved. That means we can expect TF Serving with gRPC should be faster than ONNX hosted on FastAPI server framework since FastAPI is a python framework while TF Serving is C++ implementation."
799 | ]
800 | },
801 | {
802 | "cell_type": "code",
803 | "execution_count": null,
804 | "metadata": {
805 | "id": "DbLQp2Do0k6H"
806 | },
807 | "outputs": [],
808 | "source": []
809 | }
810 | ],
811 | "metadata": {
812 | "colab": {
813 | "collapsed_sections": [],
814 | "name": "TF_Serving.ipynb",
815 | "provenance": [],
816 | "toc_visible": true
817 | },
818 | "interpreter": {
819 | "hash": "626869861cd3ed4fdbaf755d0ab61c53ee2a93056f2b69c4f7170d3cc24dc5ea"
820 | },
821 | "kernelspec": {
822 | "display_name": "Python 3.8.12 ('.venv': venv)",
823 | "language": "python",
824 | "name": "python3"
825 | },
826 | "language_info": {
827 | "codemirror_mode": {
828 | "name": "ipython",
829 | "version": 3
830 | },
831 | "file_extension": ".py",
832 | "mimetype": "text/x-python",
833 | "name": "python",
834 | "nbconvert_exporter": "python",
835 | "pygments_lexer": "ipython3",
836 | "version": "3.8.12"
837 | },
838 | "orig_nbformat": 4
839 | },
840 | "nbformat": 4,
841 | "nbformat_minor": 0
842 | }
843 |
--------------------------------------------------------------------------------
/notebooks/TF_to_ONNX.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {
6 | "id": "view-in-github",
7 | "colab_type": "text"
8 | },
9 | "source": [
10 | "
"
11 | ]
12 | },
13 | {
14 | "cell_type": "markdown",
15 | "metadata": {
16 | "id": "ahaLOgxyzACW"
17 | },
18 | "source": [
19 | "# Convert `tf.keras` model to ONNX\n",
20 | "\n",
21 | "This tutorial shows:\n",
22 | "- how to convert tf.keras model to ONNX from the saved model file or the source code directly. \n",
23 | "- comparison of the execution time of the inference on CPU between tf.keras model and ONNX converted model."
24 | ]
25 | },
26 | {
27 | "cell_type": "markdown",
28 | "metadata": {
29 | "id": "CmnzNRTkzaYq"
30 | },
31 | "source": [
32 | "## Install ONNX dependencies\n",
33 | "- `tf2onnx` provides a tool to convert TensorFlow model to ONNX\n",
34 | "- `onnxruntime` is used to run inference on a saved ONNX model."
35 | ]
36 | },
37 | {
38 | "cell_type": "code",
39 | "execution_count": null,
40 | "metadata": {
41 | "id": "Y7VIFntKUh0R"
42 | },
43 | "outputs": [],
44 | "source": [
45 | "!pip install -Uqq tf2onnx\n",
46 | "!pip install -Uqq onnxruntime"
47 | ]
48 | },
49 | {
50 | "cell_type": "markdown",
51 | "metadata": {
52 | "id": "D7TJluNyz8k0"
53 | },
54 | "source": [
55 | "### Imports"
56 | ]
57 | },
58 | {
59 | "cell_type": "code",
60 | "execution_count": null,
61 | "metadata": {
62 | "id": "-UfszPPVf9P0"
63 | },
64 | "outputs": [],
65 | "source": [
66 | "import tf2onnx\n",
67 | "import pandas as pd\n",
68 | "import tensorflow as tf\n",
69 | "import numpy as np"
70 | ]
71 | },
72 | {
73 | "cell_type": "markdown",
74 | "metadata": {
75 | "id": "_eo3f1Zn0S3F"
76 | },
77 | "source": [
78 | "### Get a sample model "
79 | ]
80 | },
81 | {
82 | "cell_type": "code",
83 | "execution_count": null,
84 | "metadata": {
85 | "id": "3R81akF_hDEL"
86 | },
87 | "outputs": [],
88 | "source": [
89 | "core = tf.keras.applications.ResNet50(include_top=True, input_shape=(224, 224, 3))\n",
90 | "\n",
91 | "inputs = tf.keras.layers.Input(shape=(224, 224, 3), name=\"image_input\")\n",
92 | "preprocess = tf.keras.applications.resnet50.preprocess_input(inputs)\n",
93 | "outputs = core(preprocess, training=False)\n",
94 | "model = tf.keras.Model(inputs=[inputs], outputs=[outputs])"
95 | ]
96 | },
97 | {
98 | "cell_type": "markdown",
99 | "source": [
100 | "Note that we are including the preprocessing layer in the `model` object. This will allow us to load an image from disk and run the model directly without requiring any\n",
101 | "model-specific preprocessing. This reduces training/serving skew. "
102 | ],
103 | "metadata": {
104 | "id": "W3smmoIBCFOX"
105 | }
106 | },
107 | {
108 | "cell_type": "markdown",
109 | "metadata": {
110 | "id": "MQg5cN910Z6q"
111 | },
112 | "source": [
113 | "## Convert to ONNX"
114 | ]
115 | },
116 | {
117 | "cell_type": "code",
118 | "execution_count": null,
119 | "metadata": {
120 | "id": "3-friv_fMk79"
121 | },
122 | "outputs": [],
123 | "source": [
124 | "num_layers = len(model.layers)\n",
125 | "print(f'first layer name: {model.layers[0].name}')\n",
126 | "print(f'last layer name: {model.layers[num_layers-1].name}')"
127 | ]
128 | },
129 | {
130 | "cell_type": "markdown",
131 | "metadata": {
132 | "id": "UBGQxHHz0dGP"
133 | },
134 | "source": [
135 | "### Conversion\n",
136 | "\n",
137 | "`opset` in `tf2onnx.convert.from_keras` is the ONNX Op version. You can find the full list which TensorFlow (TF) Ops are convertible to ONNX Ops [here](https://github.com/onnx/tensorflow-onnx/blob/master/support_status.md).\n",
138 | "\n",
139 | "There are two ways to convert TensorFlow model to ONNX:\n",
140 | "- `tf2onnx.convert.from_keras` to convert programatically\n",
141 | "- `tf2onnx.convert` CLI to convert a saved TensorFlow model"
142 | ]
143 | },
144 | {
145 | "cell_type": "code",
146 | "execution_count": null,
147 | "metadata": {
148 | "id": "_MAEoy9j0QRQ"
149 | },
150 | "outputs": [],
151 | "source": [
152 | "import onnx\n",
153 | "\n",
154 | "input_signature = [tf.TensorSpec([None, 224, 224, 3], tf.float32, name='image_input')]\n",
155 | "onnx_model, _ = tf2onnx.convert.from_keras(model, input_signature, opset=15)\n",
156 | "onnx.save(onnx_model, \"resnet50_w_preprocessing.onnx\")\n",
157 | "\n",
158 | "# model.save('my_model')\n",
159 | "# !python -m tf2onnx.convert --saved-model my_model --output my_model.onnx"
160 | ]
161 | },
162 | {
163 | "cell_type": "markdown",
164 | "metadata": {
165 | "id": "V2-aNpahQMVR"
166 | },
167 | "source": [
168 | "## Test TF vs ONNX model with dummy data"
169 | ]
170 | },
171 | {
172 | "cell_type": "markdown",
173 | "metadata": {
174 | "id": "Zt5lsQoUQXOo"
175 | },
176 | "source": [
177 | "### Generate dummy data "
178 | ]
179 | },
180 | {
181 | "cell_type": "code",
182 | "execution_count": null,
183 | "metadata": {
184 | "id": "ceqZH2KbPznx"
185 | },
186 | "outputs": [],
187 | "source": [
188 | "dummy_inputs = tf.random.normal((32, 224, 224, 3))"
189 | ]
190 | },
191 | {
192 | "cell_type": "markdown",
193 | "metadata": {
194 | "id": "M8DR47zeQZHI"
195 | },
196 | "source": [
197 | "### Test original TF model with dummy data"
198 | ]
199 | },
200 | {
201 | "cell_type": "code",
202 | "execution_count": null,
203 | "metadata": {
204 | "id": "zL8Lw9H8QbT7"
205 | },
206 | "outputs": [],
207 | "source": [
208 | "%%timeit\n",
209 | "model.predict(dummy_inputs)"
210 | ]
211 | },
212 | {
213 | "cell_type": "code",
214 | "execution_count": null,
215 | "metadata": {
216 | "id": "smFa5VWjTNLb"
217 | },
218 | "outputs": [],
219 | "source": [
220 | "tf_preds = model.predict(dummy_inputs)"
221 | ]
222 | },
223 | {
224 | "cell_type": "markdown",
225 | "metadata": {
226 | "id": "Lqhi458k0fkM"
227 | },
228 | "source": [
229 | "### Test converted ONNX model with dummy data\n",
230 | "\n",
231 | "If you want to inference with GPU, then you can do so by setting `providers=[\"CUDAExecutionProvider\"]` in `ort.InferenceSession`.\n",
232 | "\n",
233 | "The first parameter in `sess.run` is set to `None`, and that means all the outputs of the model will be retrieved. "
234 | ]
235 | },
236 | {
237 | "cell_type": "code",
238 | "execution_count": null,
239 | "metadata": {
240 | "id": "1ELVBwrn0-Cf"
241 | },
242 | "outputs": [],
243 | "source": [
244 | "import onnxruntime as ort\n",
245 | "import numpy as np\n",
246 | "\n",
247 | "sess = ort.InferenceSession(\"resnet50_w_preprocessing.onnx\") # providers=[\"CUDAExecutionProvider\"])\n",
248 | "np_dummy_inputs = dummy_inputs.numpy()"
249 | ]
250 | },
251 | {
252 | "cell_type": "code",
253 | "execution_count": null,
254 | "metadata": {
255 | "id": "jszhyR15SJaE"
256 | },
257 | "outputs": [],
258 | "source": [
259 | "%%timeit \n",
260 | "sess.run(None, {\"image_input\": np_dummy_inputs})"
261 | ]
262 | },
263 | {
264 | "cell_type": "code",
265 | "execution_count": null,
266 | "metadata": {
267 | "id": "Ax6opk4ENmlK"
268 | },
269 | "outputs": [],
270 | "source": [
271 | "ort_preds = sess.run(None, {\"image_input\": np_dummy_inputs})"
272 | ]
273 | },
274 | {
275 | "cell_type": "markdown",
276 | "source": [
277 | "## Check if the TF and ONNX outputs match"
278 | ],
279 | "metadata": {
280 | "id": "jbrwQMDbBLps"
281 | }
282 | },
283 | {
284 | "cell_type": "code",
285 | "source": [
286 | "np.testing.assert_allclose(tf_preds, ort_preds[0], atol=1e-4)"
287 | ],
288 | "metadata": {
289 | "id": "um99Uu4FBPrY"
290 | },
291 | "execution_count": null,
292 | "outputs": []
293 | },
294 | {
295 | "cell_type": "markdown",
296 | "metadata": {
297 | "id": "QPu6kdNnU8Y6"
298 | },
299 | "source": [
300 | "## Conclusion\n",
301 | "\n",
302 | "We did a simple experiments with dummy dataset of 32 batch size. The default behaviour of `timeit` is to measure the average of the cell execution time with 7 times of repeat ([`timeit`'s default behaviour](https://ipython.readthedocs.io/en/stable/interactive/magics.html#magic-timeit)).\n",
303 | "\n",
304 | "\n",
305 | "The ONNX model will likely always have a better inference latency than the TF model if you are using a CPU server for inference."
306 | ]
307 | }
308 | ],
309 | "metadata": {
310 | "colab": {
311 | "collapsed_sections": [],
312 | "name": "TF to ONNX.ipynb",
313 | "provenance": [],
314 | "include_colab_link": true
315 | },
316 | "kernelspec": {
317 | "display_name": "Python 3",
318 | "name": "python3"
319 | },
320 | "language_info": {
321 | "name": "python",
322 | "version": "3.8.10"
323 | }
324 | },
325 | "nbformat": 4,
326 | "nbformat_minor": 0
327 | }
--------------------------------------------------------------------------------