├── .github └── workflows │ └── deployment.yaml ├── .gitignore ├── .kube ├── README.md ├── base_1 │ ├── deployment.yaml │ ├── kustomization.yaml │ └── service.yaml ├── deployment.yaml ├── experiment_1 │ ├── 2vCPU+4GB+w1 │ │ ├── Dockerfile │ │ ├── deployment_replica_count.yaml │ │ ├── kustomization.yaml │ │ └── requirements.txt │ ├── 2vCPU+4GB+w2 │ │ ├── Dockerfile │ │ ├── deployment_replica_count.yaml │ │ ├── kustomization.yaml │ │ └── requirements.txt │ ├── 2vCPU+4GB+w4 │ │ ├── Dockerfile │ │ ├── deployment_replica_count.yaml │ │ ├── kustomization.yaml │ │ └── requirements.txt │ ├── 4vCPU+8GB │ │ ├── Dockerfile │ │ ├── deployment_replica_count.yaml │ │ ├── kustomization.yaml │ │ └── requirements.txt │ ├── 8vCPU+16GB │ │ ├── Dockerfile │ │ ├── deployment_replica_count.yaml │ │ ├── kustomization.yaml │ │ └── requirements.txt │ └── 8vCPU+64GB │ │ ├── Dockerfile │ │ ├── deployment_replica_count.yaml │ │ ├── kustomization.yaml │ │ └── requirements.txt ├── hpa.yaml ├── kustomization.yaml └── service.yaml ├── Dockerfile ├── LICENSE ├── README.md ├── api ├── README.md ├── cat.jpg ├── imagenet_classes.txt ├── main.py ├── requirements.txt └── utils │ ├── __init__.py │ └── utils.py ├── locust ├── README.md ├── cat.jpg ├── cat_224x224.jpg ├── load_test.conf └── locust.py └── notebooks ├── TF_Serving.ipynb └── TF_to_ONNX.ipynb /.github/workflows/deployment.yaml: -------------------------------------------------------------------------------- 1 | name: Deployment 2 | 3 | env: 4 | GCP_PROJECT_ID: "fast-ai-exploration" 5 | GKE_CLUSTER: "fastapi-cluster" 6 | GKE_REGION: "us-central1" 7 | GKE_ZONE: "us-central1-a" 8 | IMAGE: "gcr.io/fast-ai-exploration/fastapi-k8s" 9 | GKE_DEPLOYMENT: "fastapi-server" 10 | 11 | on: 12 | push: 13 | branches: [ "main" ] 14 | 15 | # Allows you to run this workflow manually from the Actions tab 16 | workflow_dispatch: 17 | 18 | jobs: 19 | build: 20 | runs-on: ubuntu-latest 21 | steps: 22 | - name: Git 23 | uses: actions/checkout@v2 24 | 25 | - name: GCP auth 26 | uses: google-github-actions/auth@v0 27 | with: 28 | credentials_json: ${{ secrets.GCP_CREDENTIALS }} 29 | 30 | - name: Set up Cloud SDK 31 | uses: google-github-actions/setup-gcloud@v0 32 | 33 | - name: Docker auth 34 | run: |- 35 | gcloud --quiet auth configure-docker 36 | 37 | - name: GKE auth 38 | run: |- 39 | gcloud container clusters get-credentials "$GKE_CLUSTER" --zone "$GKE_ZONE" --project "$GCP_PROJECT_ID" 40 | 41 | - name: Check if the requested directories have any changes 42 | uses: dorny/paths-filter@v2 43 | id: changes 44 | with: 45 | filters: | 46 | src: 47 | - 'api/**' 48 | - '.github/**' 49 | - '.kube/**' 50 | - Dockerfile 51 | 52 | - name: Build and push Docker image based on the changes 53 | if: steps.changes.outputs.src == 'true' 54 | run: | 55 | docker build --tag "$IMAGE:$GITHUB_SHA" . 56 | docker tag "$IMAGE:$GITHUB_SHA" "$IMAGE:latest" 57 | docker push "$IMAGE:$GITHUB_SHA" 58 | docker push "$IMAGE:latest" 59 | 60 | - name: Set up Kustomize 61 | working-directory: .kube/ 62 | if: steps.changes.outputs.src == 'true' 63 | run: |- 64 | curl -sfLo kustomize.tar.gz https://github.com/kubernetes-sigs/kustomize/releases/download/kustomize%2Fv4.1.2/kustomize_v4.1.2_linux_amd64.tar.gz 65 | tar -zxvf kustomize.tar.gz 66 | chmod u+x ./kustomize 67 | 68 | - name: Deploy to GKE 69 | working-directory: .kube/ 70 | if: steps.changes.outputs.src == 'true' 71 | run: |- 72 | ./kustomize edit set image $IMAGE:$GITHUB_SHA 73 | ./kustomize build . | kubectl apply -f - 74 | kubectl rollout status deployment/$GKE_DEPLOYMENT 75 | kubectl get services -o wide 76 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | pip-wheel-metadata/ 24 | share/python-wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | MANIFEST 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .nox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | nosetests.xml 48 | coverage.xml 49 | *.cover 50 | *.py,cover 51 | .hypothesis/ 52 | .pytest_cache/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | target/ 76 | 77 | # Jupyter Notebook 78 | .ipynb_checkpoints 79 | 80 | # IPython 81 | profile_default/ 82 | ipython_config.py 83 | 84 | # pyenv 85 | .python-version 86 | 87 | # pipenv 88 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 89 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 90 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 91 | # install all needed dependencies. 92 | #Pipfile.lock 93 | 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 95 | __pypackages__/ 96 | 97 | # Celery stuff 98 | celerybeat-schedule 99 | celerybeat.pid 100 | 101 | # SageMath parsed files 102 | *.sage.py 103 | 104 | # Environments 105 | .env 106 | .venv 107 | env/ 108 | venv/ 109 | ENV/ 110 | env.bak/ 111 | venv.bak/ 112 | 113 | # Spyder project settings 114 | .spyderproject 115 | .spyproject 116 | 117 | # Rope project settings 118 | .ropeproject 119 | 120 | # mkdocs documentation 121 | /site 122 | 123 | # mypy 124 | .mypy_cache/ 125 | .dmypy.json 126 | dmypy.json 127 | 128 | # Pyre type checker 129 | .pyre/ 130 | 131 | # Misc 132 | .ipynb_checkpoints/ 133 | *.onnx -------------------------------------------------------------------------------- /.kube/README.md: -------------------------------------------------------------------------------- 1 | # Run Experimental Setup 2 | 3 | ## Build Docker image 4 | ```bash 5 | # under /.kube directory 6 | 7 | $ TARGET_EXPERIMENT=experiment_1/2vCPU+2GB/ 8 | $ TAG=gcr.io/GCP_PROJECT_ID/IMG_NAME:IMG_TAG 9 | 10 | $ docker build -f $TARGET_EXPERIMENT -t $TAG . 11 | ``` 12 | 13 | ## Deploy on k8s cluster 14 | ```bash 15 | # under /.kube directory 16 | 17 | $ ./kustomize build $TARGET_EXPERIMENT | kubectl apply -f - 18 | ``` -------------------------------------------------------------------------------- /.kube/base_1/deployment.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: apps/v1 2 | kind: Deployment 3 | metadata: 4 | labels: 5 | app: fastapi-server 6 | name: fastapi-server 7 | spec: 8 | replicas: 1 9 | selector: 10 | matchLabels: 11 | app: fastapi-server 12 | strategy: {} 13 | template: 14 | metadata: 15 | labels: 16 | app: fastapi-server 17 | spec: 18 | containers: 19 | - image: gcr.io/gcp-ml-172005/k8s-fastapi:latest 20 | name: fastapi-k8s 21 | ports: 22 | - containerPort: 80 23 | resources: {} 24 | -------------------------------------------------------------------------------- /.kube/base_1/kustomization.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: kustomize.config.k8s.io/v1beta1 2 | kind: Kustomization 3 | 4 | commonLabels: 5 | app: fastapi-server 6 | 7 | resources: 8 | - deployment.yaml 9 | - service.yaml -------------------------------------------------------------------------------- /.kube/base_1/service.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Service 3 | metadata: 4 | creationTimestamp: null 5 | labels: 6 | app: fastapi-server 7 | name: fastapi-server 8 | spec: 9 | ports: 10 | - port: 80 11 | protocol: TCP 12 | targetPort: 80 13 | selector: 14 | app: fastapi-server 15 | type: LoadBalancer 16 | status: 17 | loadBalancer: {} -------------------------------------------------------------------------------- /.kube/deployment.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: apps/v1 2 | kind: Deployment 3 | metadata: 4 | creationTimestamp: null 5 | labels: 6 | app: fastapi-server 7 | name: fastapi-server 8 | spec: 9 | replicas: 1 10 | selector: 11 | matchLabels: 12 | app: fastapi-server 13 | strategy: {} 14 | template: 15 | metadata: 16 | creationTimestamp: null 17 | labels: 18 | app: fastapi-server 19 | spec: 20 | containers: 21 | - image: gcr.io/fast-ai-exploration/fastapi-k8s:latest 22 | name: fastapi-k8s 23 | ports: 24 | - containerPort: 80 25 | resources: {} -------------------------------------------------------------------------------- /.kube/experiment_1/2vCPU+4GB+w1/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.8 2 | 3 | WORKDIR /app 4 | 5 | # install dependencies 6 | COPY ./.kube/experiment_1/2vCPU+4GB+w1/requirements.txt /app/requirements.txt 7 | RUN pip install --no-cache-dir --upgrade -r /app/requirements.txt 8 | 9 | # copy fastAPI app codebase 10 | COPY ./api /app 11 | 12 | # run the fastAPI app 13 | CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "80"] -------------------------------------------------------------------------------- /.kube/experiment_1/2vCPU+4GB+w1/deployment_replica_count.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: apps/v1 2 | kind: Deployment 3 | metadata: 4 | name: fastapi-server 5 | 6 | spec: 7 | replicas: 8 8 | template: 9 | metadata: 10 | labels: 11 | app: fastapi-server 12 | spec: 13 | containers: 14 | - image: gcr.io/gcp-ml-172005/k8s-fastapi:uvicorn-w-1 15 | name: fastapi-k8s 16 | imagePullPolicy: Always 17 | ports: 18 | - containerPort: 80 19 | resources: {} 20 | -------------------------------------------------------------------------------- /.kube/experiment_1/2vCPU+4GB+w1/kustomization.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: kustomize.config.k8s.io/v1beta1 2 | kind: Kustomization 3 | 4 | bases: 5 | - ../../base_1 6 | patches: 7 | - deployment_replica_count.yaml -------------------------------------------------------------------------------- /.kube/experiment_1/2vCPU+4GB+w1/requirements.txt: -------------------------------------------------------------------------------- 1 | numpy==1.22.2 2 | uvicorn[standard]==0.17.5 3 | Pillow==9.0.1 4 | onnxruntime==1.10.0 5 | fastapi==0.74.0 6 | python-multipart==0.0.5 7 | -------------------------------------------------------------------------------- /.kube/experiment_1/2vCPU+4GB+w2/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.8 2 | 3 | WORKDIR /app 4 | 5 | # install dependencies 6 | COPY ./.kube/experiment_1/2vCPU+4GB+w2/requirements.txt /app/requirements.txt 7 | RUN pip install --no-cache-dir --upgrade -r /app/requirements.txt 8 | 9 | # copy fastAPI app codebase 10 | COPY ./api /app 11 | 12 | # run the fastAPI app 13 | CMD ["uvicorn", "main:app", "--workers", "2", "--host", "0.0.0.0", "--port", "80"] -------------------------------------------------------------------------------- /.kube/experiment_1/2vCPU+4GB+w2/deployment_replica_count.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: apps/v1 2 | kind: Deployment 3 | metadata: 4 | name: fastapi-server 5 | 6 | spec: 7 | replicas: 8 8 | template: 9 | metadata: 10 | labels: 11 | app: fastapi-server 12 | spec: 13 | containers: 14 | - image: gcr.io/gcp-ml-172005/k8s-fastapi:uvicorn-w-2 15 | name: fastapi-k8s 16 | imagePullPolicy: Always 17 | ports: 18 | - containerPort: 80 19 | resources: {} 20 | -------------------------------------------------------------------------------- /.kube/experiment_1/2vCPU+4GB+w2/kustomization.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: kustomize.config.k8s.io/v1beta1 2 | kind: Kustomization 3 | 4 | bases: 5 | - ../../base_1 6 | patches: 7 | - deployment_replica_count.yaml -------------------------------------------------------------------------------- /.kube/experiment_1/2vCPU+4GB+w2/requirements.txt: -------------------------------------------------------------------------------- 1 | numpy==1.22.2 2 | uvicorn[standard]==0.17.5 3 | Pillow==9.0.1 4 | onnxruntime==1.10.0 5 | fastapi==0.74.0 6 | python-multipart==0.0.5 7 | -------------------------------------------------------------------------------- /.kube/experiment_1/2vCPU+4GB+w4/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.8 2 | 3 | WORKDIR /app 4 | 5 | # install dependencies 6 | COPY ./.kube/experiment_1/2vCPU+4GB+w4/requirements.txt /app/requirements.txt 7 | RUN pip install --no-cache-dir --upgrade -r /app/requirements.txt 8 | 9 | # copy fastAPI app codebase 10 | COPY ./api /app 11 | 12 | # run the fastAPI app 13 | CMD ["uvicorn", "main:app", "--workers", "4", "--host", "0.0.0.0", "--port", "80"] -------------------------------------------------------------------------------- /.kube/experiment_1/2vCPU+4GB+w4/deployment_replica_count.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: apps/v1 2 | kind: Deployment 3 | metadata: 4 | name: fastapi-server 5 | 6 | spec: 7 | replicas: 8 8 | template: 9 | metadata: 10 | labels: 11 | app: fastapi-server 12 | spec: 13 | containers: 14 | - image: gcr.io/gcp-ml-172005/k8s-fastapi:uvicorn-w-4 15 | name: fastapi-k8s 16 | imagePullPolicy: Always 17 | ports: 18 | - containerPort: 80 19 | resources: {} 20 | -------------------------------------------------------------------------------- /.kube/experiment_1/2vCPU+4GB+w4/kustomization.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: kustomize.config.k8s.io/v1beta1 2 | kind: Kustomization 3 | 4 | bases: 5 | - ../../base_1 6 | patches: 7 | - deployment_replica_count.yaml -------------------------------------------------------------------------------- /.kube/experiment_1/2vCPU+4GB+w4/requirements.txt: -------------------------------------------------------------------------------- 1 | numpy==1.22.2 2 | uvicorn[standard]==0.17.5 3 | Pillow==9.0.1 4 | onnxruntime==1.10.0 5 | fastapi==0.74.0 6 | python-multipart==0.0.5 7 | -------------------------------------------------------------------------------- /.kube/experiment_1/4vCPU+8GB/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.8 2 | 3 | WORKDIR /app 4 | 5 | # install dependencies 6 | COPY ./.kube/experiment_1/4vCPU+8GB/requirements.txt /app/requirements.txt 7 | RUN pip install --no-cache-dir --upgrade -r /app/requirements.txt 8 | 9 | # copy fastAPI app codebase 10 | COPY ./api /app 11 | 12 | # run the fastAPI app 13 | CMD ["gunicorn", "main:app", "--workers", "9", "--worker-class", "uvicorn.workers.UvicornWorker", "--bind", "0.0.0.0:80"] -------------------------------------------------------------------------------- /.kube/experiment_1/4vCPU+8GB/deployment_replica_count.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: apps/v1 2 | kind: Deployment 3 | metadata: 4 | name: fastapi-server 5 | 6 | spec: 7 | replicas: 4 8 | template: 9 | metadata: 10 | labels: 11 | app: fastapi-server 12 | spec: 13 | containers: 14 | - image: gcr.io/gcp-ml-172005/k8s-fastapi:gunicorn-w-7 15 | name: fastapi-k8s 16 | imagePullPolicy: Always 17 | ports: 18 | - containerPort: 80 19 | resources: {} 20 | -------------------------------------------------------------------------------- /.kube/experiment_1/4vCPU+8GB/kustomization.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: kustomize.config.k8s.io/v1beta1 2 | kind: Kustomization 3 | 4 | bases: 5 | - ../../base_1 6 | patches: 7 | - deployment_replica_count.yaml -------------------------------------------------------------------------------- /.kube/experiment_1/4vCPU+8GB/requirements.txt: -------------------------------------------------------------------------------- 1 | numpy==1.22.2 2 | uvicorn[standard]==0.17.5 3 | Pillow==9.0.1 4 | onnxruntime==1.10.0 5 | fastapi==0.74.0 6 | python-multipart==0.0.5 7 | gunicorn==20.1.0 -------------------------------------------------------------------------------- /.kube/experiment_1/8vCPU+16GB/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.8 2 | 3 | WORKDIR /app 4 | 5 | # install dependencies 6 | COPY ./.kube/experiment_1/8vCPU+16GB/requirements.txt /app/requirements.txt 7 | RUN pip install --no-cache-dir --upgrade -r /app/requirements.txt 8 | 9 | # copy fastAPI app codebase 10 | COPY ./api /app 11 | 12 | # run the fastAPI app 13 | CMD ["gunicorn", "main:app", "--workers", "17", "--worker-class", "uvicorn.workers.UvicornWorker", "--bind", "0.0.0.0:80"] -------------------------------------------------------------------------------- /.kube/experiment_1/8vCPU+16GB/deployment_replica_count.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: apps/v1 2 | kind: Deployment 3 | metadata: 4 | name: fastapi-server 5 | 6 | spec: 7 | replicas: 2 8 | template: 9 | metadata: 10 | labels: 11 | app: fastapi-server 12 | spec: 13 | containers: 14 | - image: gcr.io/gcp-ml-172005/k8s-fastapi:gunicorn-w-17 15 | name: fastapi-k8s 16 | imagePullPolicy: Always 17 | ports: 18 | - containerPort: 80 19 | resources: {} 20 | -------------------------------------------------------------------------------- /.kube/experiment_1/8vCPU+16GB/kustomization.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: kustomize.config.k8s.io/v1beta1 2 | kind: Kustomization 3 | 4 | bases: 5 | - ../../base_1 6 | patches: 7 | - deployment_replica_count.yaml -------------------------------------------------------------------------------- /.kube/experiment_1/8vCPU+16GB/requirements.txt: -------------------------------------------------------------------------------- 1 | numpy==1.22.2 2 | uvicorn[standard]==0.17.5 3 | Pillow==9.0.1 4 | onnxruntime==1.10.0 5 | fastapi==0.74.0 6 | python-multipart==0.0.5 7 | gunicorn==20.1.0 -------------------------------------------------------------------------------- /.kube/experiment_1/8vCPU+64GB/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.8 2 | 3 | WORKDIR /app 4 | 5 | # install dependencies 6 | COPY ./.kube/experiment_1/8vCPU+64GB/requirements.txt /app/requirements.txt 7 | RUN pip install --no-cache-dir --upgrade -r /app/requirements.txt 8 | 9 | # copy fastAPI app codebase 10 | COPY ./api /app 11 | 12 | # run the fastAPI app 13 | CMD ["gunicorn", "main:app", "--workers", "17", "--worker-class", "uvicorn.workers.UvicornWorker", "--bind", "0.0.0.0:80"] -------------------------------------------------------------------------------- /.kube/experiment_1/8vCPU+64GB/deployment_replica_count.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: apps/v1 2 | kind: Deployment 3 | metadata: 4 | name: fastapi-server 5 | 6 | spec: 7 | replicas: 2 8 | template: 9 | metadata: 10 | labels: 11 | app: fastapi-server 12 | spec: 13 | containers: 14 | - image: gcr.io/gcp-ml-172005/k8s-fastapi:gunicorn-w-17 15 | name: fastapi-k8s 16 | imagePullPolicy: Always 17 | ports: 18 | - containerPort: 80 19 | resources: {} 20 | -------------------------------------------------------------------------------- /.kube/experiment_1/8vCPU+64GB/kustomization.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: kustomize.config.k8s.io/v1beta1 2 | kind: Kustomization 3 | 4 | bases: 5 | - ../../base_1 6 | patches: 7 | - deployment_replica_count.yaml -------------------------------------------------------------------------------- /.kube/experiment_1/8vCPU+64GB/requirements.txt: -------------------------------------------------------------------------------- 1 | numpy==1.22.2 2 | uvicorn[standard]==0.17.5 3 | Pillow==9.0.1 4 | onnxruntime==1.10.0 5 | fastapi==0.74.0 6 | python-multipart==0.0.5 7 | gunicorn==20.1.0 -------------------------------------------------------------------------------- /.kube/hpa.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: autoscaling/v1 2 | kind: HorizontalPodAutoscaler 3 | metadata: 4 | name: fastapi-server 5 | 6 | spec: 7 | scaleTargetRef: 8 | apiVersion: apps/v1 9 | kind: Deployment 10 | name: fastapi-server 11 | minReplicas: 1 12 | maxReplicas: 9 13 | targetCPUUtilizationPercentage: 80 -------------------------------------------------------------------------------- /.kube/kustomization.yaml: -------------------------------------------------------------------------------- 1 | commonLabels: 2 | app: fastapi-server 3 | resources: 4 | - deployment.yaml 5 | - hpa.yaml 6 | - service.yaml 7 | apiVersion: kustomize.config.k8s.io/v1beta1 8 | kind: Kustomization -------------------------------------------------------------------------------- /.kube/service.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Service 3 | metadata: 4 | creationTimestamp: null 5 | labels: 6 | app: fastapi-server 7 | name: fastapi-server 8 | spec: 9 | ports: 10 | - port: 80 11 | protocol: TCP 12 | targetPort: 80 13 | selector: 14 | app: fastapi-server 15 | type: LoadBalancer 16 | status: 17 | loadBalancer: {} -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.8 2 | 3 | WORKDIR /app 4 | 5 | # install dependencies 6 | COPY ./api/requirements.txt /app/requirements.txt 7 | RUN pip install --no-cache-dir --upgrade -r /app/requirements.txt 8 | 9 | # copy fastAPI app codebase 10 | COPY ./api /app 11 | 12 | # run the fastAPI app 13 | CMD ["uvicorn", "main:app", "--workers", "2", "--host", "0.0.0.0", "--port", "80"] -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright 2022 Sayak Paul and Chansung Park 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Deploying ML models with FastAPI, Docker, and Kubernetes 2 | 3 | *By: [Sayak Paul](https://github.com/sayakpaul) and [Chansung Park](https://github.com/deep-diver)* 4 | 5 |
6 |
7 | Figure developed by Chansung Park 8 |
9 | 10 | This project shows how to serve an ONNX-optimized image classification model as a 11 | RESTful web service with FastAPI, Docker, and Kubernetes (k8s). The idea is to first 12 | Dockerize the API and then deploy it on a k8s cluster running on [Google Kubernetes 13 | Engine (GKE)](https://cloud.google.com/kubernetes-engine). We do this integration 14 | using [GitHub Actions](https://github.com/features/actions). 15 | 16 | 👋 **Note**: Even though this project uses an image classification its structure and techniques can 17 | be used to serve other models as well. We also worked on a TF Serving equivalent 18 | of this project. Check it out [here](https://github.com/deep-diver/ml-deployment-k8s-tfserving). 19 | 20 | **Update July 19 2022**: This project won the [#TFCommunitySpotlight award](https://twitter.com/TensorFlow/status/1545115276152389636). 21 | 22 | ## Deploying the model as a service with k8s 23 | 24 | * We decouple the model optimization part from our API code. The optimization part is 25 | available within the `notebooks/TF_to_ONNX.ipynb` notebook. 26 | * Then we locally test the API. You can find the instructions within the `api` 27 | directory. 28 | * To deploy the API, we define our `deployment.yaml` workflow file inside `.github/workflows`. 29 | It does the following tasks: 30 | 31 | * Looks for any changes in the specified directory. If there are any changes: 32 | * Builds and pushes the latest Docker image to Google Container Register (GCR). 33 | * Deploys the Docker container on the k8s cluster running on GKE. 34 | 35 | ## Configurations needed beforehand 36 | 37 | * Create a k8s cluster on GKE. [Here's](https://www.youtube.com/watch?v=hxpGC19PzwI) a 38 | relevant resource. We used 8 nodes (each with 2 vCPUs and 4 GBs of RAM) for the cluster. 39 | * [Create](https://cloud.google.com/iam/docs/creating-managing-service-account-keys) a 40 | service account key (JSON) file. It's a good practice to only grant it the roles 41 | required for the project. For example, for this project, we created a fresh service 42 | account and granted it permissions for the following: Storage Admin, GKE Developer, and 43 | GCR Developer. 44 | * Crete a secret named `GCP_CREDENTIALS` on your GitHub repository and copy paste the 45 | contents of the service account key file into the secret. 46 | * Configure bucket storage related permissions for the service account: 47 | 48 | ```shell 49 | $ export PROJECT_ID= 50 | $ export ACCOUNT= 51 | 52 | $ gcloud -q projects add-iam-policy-binding ${PROJECT_ID} \ 53 | --member=serviceAccount:${ACCOUNT}@${PROJECT_ID}.iam.gserviceaccount.com \ 54 | --role roles/storage.admin 55 | 56 | $ gcloud -q projects add-iam-policy-binding ${PROJECT_ID} \ 57 | --member=serviceAccount:${ACCOUNT}@${PROJECT_ID}.iam.gserviceaccount.com \ 58 | --role roles/storage.objectAdmin 59 | 60 | gcloud -q projects add-iam-policy-binding ${PROJECT_ID} \ 61 | --member=serviceAccount:${ACCOUNT}@${PROJECT_ID}.iam.gserviceaccount.com \ 62 | --role roles/storage.objectCreator 63 | ``` 64 | * If you're on the `main` branch already then upon a new push, the worflow defined 65 | in `.github/workflows/deployment.yaml` should automatically run. Here's how the 66 | final outputs should look like ([run link](https://github.com/sayakpaul/ml-deployment-k8s-fastapi/runs/5343002731)): 67 | 68 | ![](https://i.ibb.co/fDGFbpr/Screenshot-2022-03-01-at-12-25-42-PM.png) 69 | 70 | ## Notes 71 | 72 | * Since we use CPU-based pods within the k8s cluster, we use ONNX optimizations 73 | since they are known to provide performance speed-ups for CPU-based environments. 74 | If you are using GPU-based pods then look into [TensorRT](https://developer.nvidia.com/tensorrt). 75 | * We use [Kustomize](https://kustomize.io) to manage the deployment on k8s. 76 | * We conducted load-testing varying the number of workers, RAM, nodes, etc. From that experiment, 77 | we found out that for our setup, 8 nodes each having 2 vCPUs and 4 GBs of work the best in terms of 78 | throughput and latency. The figure below summarizes our results: 79 | 80 | ![](https://i.ibb.co/NjFp3m9/fastapi-load-test-results.png) 81 | 82 | You can find the load-testing details under `locust` directory. 83 | 84 | ## Querying the API endpoint 85 | 86 | From workflow outputs, you should see something like so: 87 | 88 | ```shell 89 | NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE 90 | fastapi-server LoadBalancer xxxxxxxxxx xxxxxxxxxx 80:30768/TCP 23m 91 | kubernetes ClusterIP xxxxxxxxxx 443/TCP 160m 92 | ``` 93 | 94 | Note the `EXTERNAL-IP` corresponding to `fastapi-server` (iff you have named 95 | your service like so). Then cURL it: 96 | 97 | ```shell 98 | curl -X POST -F image_file=@cat.jpg -F with_resize=True -F with_post_process=True http://{EXTERNAL-IP}:80/predict/image 99 | ``` 100 | 101 | You should get the following output (if you're using the `cat.jpg` image present 102 | in the `api` directory): 103 | 104 | ```shell 105 | "{\"Label\": \"tabby\", \"Score\": \"0.538\"}" 106 | ``` 107 | 108 | The request assumes that you have a file called `cat.jpg` present in your 109 | working directory. 110 | 111 | **Note** that if you don't see any external IP address from your GitHub Actions console log, 112 | then after successful deployment, do the following: 113 | 114 | ```sh 115 | # Authenticate to your GKE cluster. 116 | $ gcloud container clusters get-credentials ${GKE_CLUSTER} --zone {GKE_ZONE} --project {GCP_PROJECT_ID} 117 | $ kubectl get services -o wide 118 | ``` 119 | 120 | From there, note the external IP. 121 | 122 | ## Acknowledgements 123 | 124 | * [ML-GDE program](https://developers.google.com/programs/experts/) for providing GCP credit support. 125 | * [Hannes Hapke](https://www.linkedin.com/in/hanneshapke) for providing might insightful points for conducting load-tests. 126 | 127 | -------------------------------------------------------------------------------- /api/README.md: -------------------------------------------------------------------------------- 1 | This directory exposes the ONNX model we converted in [this notebook](https://github.com/sayakpaul/ml-deployment-k8s-fastapi/blob/main/notebooks/TF_to_ONNX.ipynb) as a REST API using [FastAPI](https://fastapi.tiangolo.com/). 2 | 3 | ## Setup 4 | 5 | Install the dependencies: 6 | 7 | ```sh 8 | $ pip install -r requirements.txt 9 | ``` 10 | 11 | Download a test image: 12 | 13 | ```sh 14 | $ wget http://images.cocodataset.org/val2017/000000039769.jpg -O cat.jpg 15 | ``` 16 | 17 | ## Deploy locally 18 | 19 | ```sh 20 | $ uvicorn main:app --reload 21 | ``` 22 | 23 | It should show something like so: 24 | 25 | ```sh 26 | INFO: Uvicorn running on http://127.0.0.1:8000 (Press CTRL+C to quit) 27 | INFO: Started reloader process [79147] using statreload 28 | INFO: Started server process [79149] 29 | INFO: Waiting for application startup. 30 | INFO: Application startup complete. 31 | ``` 32 | 33 | Note the port number and run a request: 34 | 35 | ```sh 36 | $ curl -X POST -F image_file=@cat.jpg -F with_resize=True -F with_post_process=True http://localhost:8000/predict/image 37 | ``` 38 | 39 | It should output: 40 | 41 | ```sh 42 | "{\"Label\": \"tabby\", \"Score\": \"0.538\"}" 43 | ``` 44 | 45 | ### Client request code in Python 46 | 47 | ```python 48 | import requests 49 | 50 | url = "http://localhost:8000/predict/image" 51 | payload = {"with_resize": True, "with_post_process": True} 52 | files = {"image_file": open("cat.jpg", "rb")} 53 | 54 | resp = requests.post(url=url, data=payload, files=files) 55 | print(resp.json()) 56 | ``` 57 | -------------------------------------------------------------------------------- /api/cat.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sayakpaul/ml-deployment-k8s-fastapi/55b690520b943bf39dbb02a7f28fc1aba781e945/api/cat.jpg -------------------------------------------------------------------------------- /api/imagenet_classes.txt: -------------------------------------------------------------------------------- 1 | tench 2 | goldfish 3 | great white shark 4 | tiger shark 5 | hammerhead 6 | electric ray 7 | stingray 8 | cock 9 | hen 10 | ostrich 11 | brambling 12 | goldfinch 13 | house finch 14 | junco 15 | indigo bunting 16 | robin 17 | bulbul 18 | jay 19 | magpie 20 | chickadee 21 | water ouzel 22 | kite 23 | bald eagle 24 | vulture 25 | great grey owl 26 | European fire salamander 27 | common newt 28 | eft 29 | spotted salamander 30 | axolotl 31 | bullfrog 32 | tree frog 33 | tailed frog 34 | loggerhead 35 | leatherback turtle 36 | mud turtle 37 | terrapin 38 | box turtle 39 | banded gecko 40 | common iguana 41 | American chameleon 42 | whiptail 43 | agama 44 | frilled lizard 45 | alligator lizard 46 | Gila monster 47 | green lizard 48 | African chameleon 49 | Komodo dragon 50 | African crocodile 51 | American alligator 52 | triceratops 53 | thunder snake 54 | ringneck snake 55 | hognose snake 56 | green snake 57 | king snake 58 | garter snake 59 | water snake 60 | vine snake 61 | night snake 62 | boa constrictor 63 | rock python 64 | Indian cobra 65 | green mamba 66 | sea snake 67 | horned viper 68 | diamondback 69 | sidewinder 70 | trilobite 71 | harvestman 72 | scorpion 73 | black and gold garden spider 74 | barn spider 75 | garden spider 76 | black widow 77 | tarantula 78 | wolf spider 79 | tick 80 | centipede 81 | black grouse 82 | ptarmigan 83 | ruffed grouse 84 | prairie chicken 85 | peacock 86 | quail 87 | partridge 88 | African grey 89 | macaw 90 | sulphur-crested cockatoo 91 | lorikeet 92 | coucal 93 | bee eater 94 | hornbill 95 | hummingbird 96 | jacamar 97 | toucan 98 | drake 99 | red-breasted merganser 100 | goose 101 | black swan 102 | tusker 103 | echidna 104 | platypus 105 | wallaby 106 | koala 107 | wombat 108 | jellyfish 109 | sea anemone 110 | brain coral 111 | flatworm 112 | nematode 113 | conch 114 | snail 115 | slug 116 | sea slug 117 | chiton 118 | chambered nautilus 119 | Dungeness crab 120 | rock crab 121 | fiddler crab 122 | king crab 123 | American lobster 124 | spiny lobster 125 | crayfish 126 | hermit crab 127 | isopod 128 | white stork 129 | black stork 130 | spoonbill 131 | flamingo 132 | little blue heron 133 | American egret 134 | bittern 135 | crane 136 | limpkin 137 | European gallinule 138 | American coot 139 | bustard 140 | ruddy turnstone 141 | red-backed sandpiper 142 | redshank 143 | dowitcher 144 | oystercatcher 145 | pelican 146 | king penguin 147 | albatross 148 | grey whale 149 | killer whale 150 | dugong 151 | sea lion 152 | Chihuahua 153 | Japanese spaniel 154 | Maltese dog 155 | Pekinese 156 | Shih-Tzu 157 | Blenheim spaniel 158 | papillon 159 | toy terrier 160 | Rhodesian ridgeback 161 | Afghan hound 162 | basset 163 | beagle 164 | bloodhound 165 | bluetick 166 | black-and-tan coonhound 167 | Walker hound 168 | English foxhound 169 | redbone 170 | borzoi 171 | Irish wolfhound 172 | Italian greyhound 173 | whippet 174 | Ibizan hound 175 | Norwegian elkhound 176 | otterhound 177 | Saluki 178 | Scottish deerhound 179 | Weimaraner 180 | Staffordshire bullterrier 181 | American Staffordshire terrier 182 | Bedlington terrier 183 | Border terrier 184 | Kerry blue terrier 185 | Irish terrier 186 | Norfolk terrier 187 | Norwich terrier 188 | Yorkshire terrier 189 | wire-haired fox terrier 190 | Lakeland terrier 191 | Sealyham terrier 192 | Airedale 193 | cairn 194 | Australian terrier 195 | Dandie Dinmont 196 | Boston bull 197 | miniature schnauzer 198 | giant schnauzer 199 | standard schnauzer 200 | Scotch terrier 201 | Tibetan terrier 202 | silky terrier 203 | soft-coated wheaten terrier 204 | West Highland white terrier 205 | Lhasa 206 | flat-coated retriever 207 | curly-coated retriever 208 | golden retriever 209 | Labrador retriever 210 | Chesapeake Bay retriever 211 | German short-haired pointer 212 | vizsla 213 | English setter 214 | Irish setter 215 | Gordon setter 216 | Brittany spaniel 217 | clumber 218 | English springer 219 | Welsh springer spaniel 220 | cocker spaniel 221 | Sussex spaniel 222 | Irish water spaniel 223 | kuvasz 224 | schipperke 225 | groenendael 226 | malinois 227 | briard 228 | kelpie 229 | komondor 230 | Old English sheepdog 231 | Shetland sheepdog 232 | collie 233 | Border collie 234 | Bouvier des Flandres 235 | Rottweiler 236 | German shepherd 237 | Doberman 238 | miniature pinscher 239 | Greater Swiss Mountain dog 240 | Bernese mountain dog 241 | Appenzeller 242 | EntleBucher 243 | boxer 244 | bull mastiff 245 | Tibetan mastiff 246 | French bulldog 247 | Great Dane 248 | Saint Bernard 249 | Eskimo dog 250 | malamute 251 | Siberian husky 252 | dalmatian 253 | affenpinscher 254 | basenji 255 | pug 256 | Leonberg 257 | Newfoundland 258 | Great Pyrenees 259 | Samoyed 260 | Pomeranian 261 | chow 262 | keeshond 263 | Brabancon griffon 264 | Pembroke 265 | Cardigan 266 | toy poodle 267 | miniature poodle 268 | standard poodle 269 | Mexican hairless 270 | timber wolf 271 | white wolf 272 | red wolf 273 | coyote 274 | dingo 275 | dhole 276 | African hunting dog 277 | hyena 278 | red fox 279 | kit fox 280 | Arctic fox 281 | grey fox 282 | tabby 283 | tiger cat 284 | Persian cat 285 | Siamese cat 286 | Egyptian cat 287 | cougar 288 | lynx 289 | leopard 290 | snow leopard 291 | jaguar 292 | lion 293 | tiger 294 | cheetah 295 | brown bear 296 | American black bear 297 | ice bear 298 | sloth bear 299 | mongoose 300 | meerkat 301 | tiger beetle 302 | ladybug 303 | ground beetle 304 | long-horned beetle 305 | leaf beetle 306 | dung beetle 307 | rhinoceros beetle 308 | weevil 309 | fly 310 | bee 311 | ant 312 | grasshopper 313 | cricket 314 | walking stick 315 | cockroach 316 | mantis 317 | cicada 318 | leafhopper 319 | lacewing 320 | dragonfly 321 | damselfly 322 | admiral 323 | ringlet 324 | monarch 325 | cabbage butterfly 326 | sulphur butterfly 327 | lycaenid 328 | starfish 329 | sea urchin 330 | sea cucumber 331 | wood rabbit 332 | hare 333 | Angora 334 | hamster 335 | porcupine 336 | fox squirrel 337 | marmot 338 | beaver 339 | guinea pig 340 | sorrel 341 | zebra 342 | hog 343 | wild boar 344 | warthog 345 | hippopotamus 346 | ox 347 | water buffalo 348 | bison 349 | ram 350 | bighorn 351 | ibex 352 | hartebeest 353 | impala 354 | gazelle 355 | Arabian camel 356 | llama 357 | weasel 358 | mink 359 | polecat 360 | black-footed ferret 361 | otter 362 | skunk 363 | badger 364 | armadillo 365 | three-toed sloth 366 | orangutan 367 | gorilla 368 | chimpanzee 369 | gibbon 370 | siamang 371 | guenon 372 | patas 373 | baboon 374 | macaque 375 | langur 376 | colobus 377 | proboscis monkey 378 | marmoset 379 | capuchin 380 | howler monkey 381 | titi 382 | spider monkey 383 | squirrel monkey 384 | Madagascar cat 385 | indri 386 | Indian elephant 387 | African elephant 388 | lesser panda 389 | giant panda 390 | barracouta 391 | eel 392 | coho 393 | rock beauty 394 | anemone fish 395 | sturgeon 396 | gar 397 | lionfish 398 | puffer 399 | abacus 400 | abaya 401 | academic gown 402 | accordion 403 | acoustic guitar 404 | aircraft carrier 405 | airliner 406 | airship 407 | altar 408 | ambulance 409 | amphibian 410 | analog clock 411 | apiary 412 | apron 413 | ashcan 414 | assault rifle 415 | backpack 416 | bakery 417 | balance beam 418 | balloon 419 | ballpoint 420 | Band Aid 421 | banjo 422 | bannister 423 | barbell 424 | barber chair 425 | barbershop 426 | barn 427 | barometer 428 | barrel 429 | barrow 430 | baseball 431 | basketball 432 | bassinet 433 | bassoon 434 | bathing cap 435 | bath towel 436 | bathtub 437 | beach wagon 438 | beacon 439 | beaker 440 | bearskin 441 | beer bottle 442 | beer glass 443 | bell cote 444 | bib 445 | bicycle-built-for-two 446 | bikini 447 | binder 448 | binoculars 449 | birdhouse 450 | boathouse 451 | bobsled 452 | bolo tie 453 | bonnet 454 | bookcase 455 | bookshop 456 | bottlecap 457 | bow 458 | bow tie 459 | brass 460 | brassiere 461 | breakwater 462 | breastplate 463 | broom 464 | bucket 465 | buckle 466 | bulletproof vest 467 | bullet train 468 | butcher shop 469 | cab 470 | caldron 471 | candle 472 | cannon 473 | canoe 474 | can opener 475 | cardigan 476 | car mirror 477 | carousel 478 | carpenter's kit 479 | carton 480 | car wheel 481 | cash machine 482 | cassette 483 | cassette player 484 | castle 485 | catamaran 486 | CD player 487 | cello 488 | cellular telephone 489 | chain 490 | chainlink fence 491 | chain mail 492 | chain saw 493 | chest 494 | chiffonier 495 | chime 496 | china cabinet 497 | Christmas stocking 498 | church 499 | cinema 500 | cleaver 501 | cliff dwelling 502 | cloak 503 | clog 504 | cocktail shaker 505 | coffee mug 506 | coffeepot 507 | coil 508 | combination lock 509 | computer keyboard 510 | confectionery 511 | container ship 512 | convertible 513 | corkscrew 514 | cornet 515 | cowboy boot 516 | cowboy hat 517 | cradle 518 | crane 519 | crash helmet 520 | crate 521 | crib 522 | Crock Pot 523 | croquet ball 524 | crutch 525 | cuirass 526 | dam 527 | desk 528 | desktop computer 529 | dial telephone 530 | diaper 531 | digital clock 532 | digital watch 533 | dining table 534 | dishrag 535 | dishwasher 536 | disk brake 537 | dock 538 | dogsled 539 | dome 540 | doormat 541 | drilling platform 542 | drum 543 | drumstick 544 | dumbbell 545 | Dutch oven 546 | electric fan 547 | electric guitar 548 | electric locomotive 549 | entertainment center 550 | envelope 551 | espresso maker 552 | face powder 553 | feather boa 554 | file 555 | fireboat 556 | fire engine 557 | fire screen 558 | flagpole 559 | flute 560 | folding chair 561 | football helmet 562 | forklift 563 | fountain 564 | fountain pen 565 | four-poster 566 | freight car 567 | French horn 568 | frying pan 569 | fur coat 570 | garbage truck 571 | gasmask 572 | gas pump 573 | goblet 574 | go-kart 575 | golf ball 576 | golfcart 577 | gondola 578 | gong 579 | gown 580 | grand piano 581 | greenhouse 582 | grille 583 | grocery store 584 | guillotine 585 | hair slide 586 | hair spray 587 | half track 588 | hammer 589 | hamper 590 | hand blower 591 | hand-held computer 592 | handkerchief 593 | hard disc 594 | harmonica 595 | harp 596 | harvester 597 | hatchet 598 | holster 599 | home theater 600 | honeycomb 601 | hook 602 | hoopskirt 603 | horizontal bar 604 | horse cart 605 | hourglass 606 | iPod 607 | iron 608 | jack-o'-lantern 609 | jean 610 | jeep 611 | jersey 612 | jigsaw puzzle 613 | jinrikisha 614 | joystick 615 | kimono 616 | knee pad 617 | knot 618 | lab coat 619 | ladle 620 | lampshade 621 | laptop 622 | lawn mower 623 | lens cap 624 | letter opener 625 | library 626 | lifeboat 627 | lighter 628 | limousine 629 | liner 630 | lipstick 631 | Loafer 632 | lotion 633 | loudspeaker 634 | loupe 635 | lumbermill 636 | magnetic compass 637 | mailbag 638 | mailbox 639 | maillot 640 | maillot 641 | manhole cover 642 | maraca 643 | marimba 644 | mask 645 | matchstick 646 | maypole 647 | maze 648 | measuring cup 649 | medicine chest 650 | megalith 651 | microphone 652 | microwave 653 | military uniform 654 | milk can 655 | minibus 656 | miniskirt 657 | minivan 658 | missile 659 | mitten 660 | mixing bowl 661 | mobile home 662 | Model T 663 | modem 664 | monastery 665 | monitor 666 | moped 667 | mortar 668 | mortarboard 669 | mosque 670 | mosquito net 671 | motor scooter 672 | mountain bike 673 | mountain tent 674 | mouse 675 | mousetrap 676 | moving van 677 | muzzle 678 | nail 679 | neck brace 680 | necklace 681 | nipple 682 | notebook 683 | obelisk 684 | oboe 685 | ocarina 686 | odometer 687 | oil filter 688 | organ 689 | oscilloscope 690 | overskirt 691 | oxcart 692 | oxygen mask 693 | packet 694 | paddle 695 | paddlewheel 696 | padlock 697 | paintbrush 698 | pajama 699 | palace 700 | panpipe 701 | paper towel 702 | parachute 703 | parallel bars 704 | park bench 705 | parking meter 706 | passenger car 707 | patio 708 | pay-phone 709 | pedestal 710 | pencil box 711 | pencil sharpener 712 | perfume 713 | Petri dish 714 | photocopier 715 | pick 716 | pickelhaube 717 | picket fence 718 | pickup 719 | pier 720 | piggy bank 721 | pill bottle 722 | pillow 723 | ping-pong ball 724 | pinwheel 725 | pirate 726 | pitcher 727 | plane 728 | planetarium 729 | plastic bag 730 | plate rack 731 | plow 732 | plunger 733 | Polaroid camera 734 | pole 735 | police van 736 | poncho 737 | pool table 738 | pop bottle 739 | pot 740 | potter's wheel 741 | power drill 742 | prayer rug 743 | printer 744 | prison 745 | projectile 746 | projector 747 | puck 748 | punching bag 749 | purse 750 | quill 751 | quilt 752 | racer 753 | racket 754 | radiator 755 | radio 756 | radio telescope 757 | rain barrel 758 | recreational vehicle 759 | reel 760 | reflex camera 761 | refrigerator 762 | remote control 763 | restaurant 764 | revolver 765 | rifle 766 | rocking chair 767 | rotisserie 768 | rubber eraser 769 | rugby ball 770 | rule 771 | running shoe 772 | safe 773 | safety pin 774 | saltshaker 775 | sandal 776 | sarong 777 | sax 778 | scabbard 779 | scale 780 | school bus 781 | schooner 782 | scoreboard 783 | screen 784 | screw 785 | screwdriver 786 | seat belt 787 | sewing machine 788 | shield 789 | shoe shop 790 | shoji 791 | shopping basket 792 | shopping cart 793 | shovel 794 | shower cap 795 | shower curtain 796 | ski 797 | ski mask 798 | sleeping bag 799 | slide rule 800 | sliding door 801 | slot 802 | snorkel 803 | snowmobile 804 | snowplow 805 | soap dispenser 806 | soccer ball 807 | sock 808 | solar dish 809 | sombrero 810 | soup bowl 811 | space bar 812 | space heater 813 | space shuttle 814 | spatula 815 | speedboat 816 | spider web 817 | spindle 818 | sports car 819 | spotlight 820 | stage 821 | steam locomotive 822 | steel arch bridge 823 | steel drum 824 | stethoscope 825 | stole 826 | stone wall 827 | stopwatch 828 | stove 829 | strainer 830 | streetcar 831 | stretcher 832 | studio couch 833 | stupa 834 | submarine 835 | suit 836 | sundial 837 | sunglass 838 | sunglasses 839 | sunscreen 840 | suspension bridge 841 | swab 842 | sweatshirt 843 | swimming trunks 844 | swing 845 | switch 846 | syringe 847 | table lamp 848 | tank 849 | tape player 850 | teapot 851 | teddy 852 | television 853 | tennis ball 854 | thatch 855 | theater curtain 856 | thimble 857 | thresher 858 | throne 859 | tile roof 860 | toaster 861 | tobacco shop 862 | toilet seat 863 | torch 864 | totem pole 865 | tow truck 866 | toyshop 867 | tractor 868 | trailer truck 869 | tray 870 | trench coat 871 | tricycle 872 | trimaran 873 | tripod 874 | triumphal arch 875 | trolleybus 876 | trombone 877 | tub 878 | turnstile 879 | typewriter keyboard 880 | umbrella 881 | unicycle 882 | upright 883 | vacuum 884 | vase 885 | vault 886 | velvet 887 | vending machine 888 | vestment 889 | viaduct 890 | violin 891 | volleyball 892 | waffle iron 893 | wall clock 894 | wallet 895 | wardrobe 896 | warplane 897 | washbasin 898 | washer 899 | water bottle 900 | water jug 901 | water tower 902 | whiskey jug 903 | whistle 904 | wig 905 | window screen 906 | window shade 907 | Windsor tie 908 | wine bottle 909 | wing 910 | wok 911 | wooden spoon 912 | wool 913 | worm fence 914 | wreck 915 | yawl 916 | yurt 917 | web site 918 | comic book 919 | crossword puzzle 920 | street sign 921 | traffic light 922 | book jacket 923 | menu 924 | plate 925 | guacamole 926 | consomme 927 | hot pot 928 | trifle 929 | ice cream 930 | ice lolly 931 | French loaf 932 | bagel 933 | pretzel 934 | cheeseburger 935 | hotdog 936 | mashed potato 937 | head cabbage 938 | broccoli 939 | cauliflower 940 | zucchini 941 | spaghetti squash 942 | acorn squash 943 | butternut squash 944 | cucumber 945 | artichoke 946 | bell pepper 947 | cardoon 948 | mushroom 949 | Granny Smith 950 | strawberry 951 | orange 952 | lemon 953 | fig 954 | pineapple 955 | banana 956 | jackfruit 957 | custard apple 958 | pomegranate 959 | hay 960 | carbonara 961 | chocolate sauce 962 | dough 963 | meat loaf 964 | pizza 965 | potpie 966 | burrito 967 | red wine 968 | espresso 969 | cup 970 | eggnog 971 | alp 972 | bubble 973 | cliff 974 | coral reef 975 | geyser 976 | lakeside 977 | promontory 978 | sandbar 979 | seashore 980 | valley 981 | volcano 982 | ballplayer 983 | groom 984 | scuba diver 985 | rapeseed 986 | daisy 987 | yellow lady's slipper 988 | corn 989 | acorn 990 | hip 991 | buckeye 992 | coral fungus 993 | agaric 994 | gyromitra 995 | stinkhorn 996 | earthstar 997 | hen-of-the-woods 998 | bolete 999 | ear 1000 | toilet tissue -------------------------------------------------------------------------------- /api/main.py: -------------------------------------------------------------------------------- 1 | """ 2 | Adapted from: 3 | 4 | (1) https://github.com/shanesoh/deploy-ml-fastapi-redis-docker/ 5 | (2) https://github.com/aniketmaurya/tensorflow-fastapi-starter-pack 6 | """ 7 | 8 | import json 9 | import urllib.request 10 | 11 | import onnxruntime as ort 12 | from fastapi import FastAPI, File, Form, HTTPException 13 | 14 | from utils import decode_predictions, get_latest_model_url, prepare_image 15 | 16 | app = FastAPI(title="ONNX image classification API") 17 | 18 | MODEL_FN = "resnet50_w_preprocessing.onnx" 19 | DEFAULT_MODEL_URL = f"https://github.com/sayakpaul/ml-deployment-k8s-fastapi/releases/download/v1.0.0/{MODEL_FN}" 20 | 21 | 22 | @app.get("/") 23 | async def home(): 24 | return "Welcome!" 25 | 26 | 27 | @app.on_event("startup") 28 | def load_modules(): 29 | model_url = get_latest_model_url() 30 | 31 | # If there's no latest ONNX model released fall back to the default model. 32 | if model_url is not None: 33 | urllib.request.urlretrieve(model_url, MODEL_FN) 34 | else: 35 | urllib.request.urlretrieve(DEFAULT_MODEL_URL, MODEL_FN) 36 | 37 | global resnet_model_sess 38 | resnet_model_sess = ort.InferenceSession(MODEL_FN) 39 | 40 | category_filename = "imagenet_classes.txt" 41 | category_url = f"https://raw.githubusercontent.com/pytorch/hub/master/{category_filename}" 42 | urllib.request.urlretrieve(category_url, category_filename) 43 | 44 | global imagenet_categories 45 | with open(category_filename, "r") as f: 46 | imagenet_categories = [s.strip() for s in f.readlines()] 47 | 48 | 49 | @app.post("/predict/image") 50 | async def predict_api( 51 | image_file: bytes = File(...), 52 | with_resize: bool = Form(...), 53 | with_post_process: bool = Form(...), 54 | ): 55 | image = prepare_image(image_file, with_resize) 56 | 57 | if len(image.shape) != 4: 58 | raise HTTPException( 59 | status_code=400, detail="Only 3-channel RGB images are supported." 60 | ) 61 | 62 | predictions = resnet_model_sess.run(None, {"image_input": image})[0] 63 | if with_post_process: 64 | response_dict = decode_predictions(predictions, imagenet_categories) 65 | return json.dumps(response_dict) 66 | else: 67 | return "OK" 68 | -------------------------------------------------------------------------------- /api/requirements.txt: -------------------------------------------------------------------------------- 1 | numpy==1.22.2 2 | uvicorn[standard]==0.17.5 3 | Pillow==9.0.1 4 | onnxruntime==1.10.0 5 | fastapi==0.74.0 6 | python-multipart==0.0.5 7 | pydantic==1.9.0 8 | PyGithub[integrations]==1.55 -------------------------------------------------------------------------------- /api/utils/__init__.py: -------------------------------------------------------------------------------- 1 | from .utils import decode_predictions, get_latest_model_url, prepare_image 2 | -------------------------------------------------------------------------------- /api/utils/utils.py: -------------------------------------------------------------------------------- 1 | import io 2 | import json 3 | from typing import Dict, List 4 | 5 | import numpy as np 6 | import requests 7 | from fastapi import HTTPException 8 | from github import Github 9 | from PIL import Image 10 | 11 | TARGET_IMG_WIDTH = 224 12 | TARGET_IMG_HEIGHT = 224 13 | 14 | 15 | def get_latest_model_url() -> str: 16 | """Gets the model download URL from the latest release artifacts.""" 17 | g = Github() 18 | 19 | repo = g.get_repo("sayakpaul/ml-deployment-k8s-fastapi") 20 | latest_release = repo.get_latest_release() 21 | assets = list(latest_release.get_assets()) 22 | 23 | download_url = None 24 | 25 | for asset in assets: 26 | if "onnx" in asset.name: 27 | asset_url = asset.url 28 | r = requests.get(asset_url) 29 | response = json.loads(r.text) 30 | download_url = response["browser_download_url"] 31 | 32 | return download_url 33 | 34 | 35 | def raise_http_exception(msg): 36 | """Raise HTTPException with the status code 400""" 37 | raise HTTPException(status_code=400, detail=msg) 38 | 39 | 40 | def prepare_image(image_file: bytes, with_resizing: bool = False) -> np.ndarray: 41 | """Prepares an image for model prediction.""" 42 | image = Image.open(io.BytesIO(image_file)) 43 | width, height = image.size 44 | 45 | if image.format not in ["JPEG", "JPG", "PNG"]: 46 | raise_http_exception("Supported formats are JPEG, JPG, and PNG.") 47 | 48 | if with_resizing: 49 | image = image.resize((TARGET_IMG_WIDTH, TARGET_IMG_HEIGHT)) 50 | else: 51 | if width is not TARGET_IMG_WIDTH or height is not TARGET_IMG_HEIGHT: 52 | raise_http_exception("Image size is not 224x224") 53 | 54 | image = np.array(image).astype("float32") 55 | return np.expand_dims(image, 0) 56 | 57 | 58 | def decode_predictions( 59 | predictions: np.ndarray, imagenet_categories: List[str] 60 | ) -> Dict[str, float]: 61 | """Decodes model predictions.""" 62 | predictions = np.squeeze(predictions) 63 | pred_name = imagenet_categories[int(predictions.argmax())] 64 | response_dict = {"Label": pred_name, "Score": f"{predictions.max():.3f}"} 65 | 66 | return response_dict 67 | -------------------------------------------------------------------------------- /locust/README.md: -------------------------------------------------------------------------------- 1 | # Load Test with Locust 2 | 3 | This directory contains a Locust script for load testing. 4 | 5 | ## How to setup 6 | 7 | 1. Installation 8 | 9 | ```python 10 | pip3 install locust 11 | ``` 12 | 13 | 2. Run 14 | 15 | ```bash 16 | # with UI 17 | $ locust 18 | 19 | OR 20 | 21 | $ locust --users NUM_OF_USERS \ 22 | --spawn-rate SPAWN_RATE \ 23 | --host HOST_ADDRESS 24 | 25 | # without UI & manual config 26 | # the report will be generated to report.html 27 | $ locust --headless \ 28 | --users NUM_OF_USERS \ 29 | --spawn-rate SPAWN_RATE \ 30 | --host HOST_ADDRESS \ 31 | --html report.html 32 | 33 | # without UI & auto config 34 | $ locust --config=load_test.conf 35 | ``` 36 | 37 | ## Notes 38 | 39 | * We used an `n1-standard` VM (4vCPU + 16GB RAM) on GCP in `us-central1` region 40 | since the nodes on GKE are also located there. 41 | * Before running the load-test, don't forget to replace `<>` with the endpoint of 42 | your API in the `load_test.conf`. 43 | * We prepare a resized image beforehand whose size is 224x224 (`cat_224x224.jpg`). 44 | This is because we only focus on load testing on the server side thereby minimizing 45 | the time for pre and post processing as much as possible. 46 | -------------------------------------------------------------------------------- /locust/cat.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sayakpaul/ml-deployment-k8s-fastapi/55b690520b943bf39dbb02a7f28fc1aba781e945/locust/cat.jpg -------------------------------------------------------------------------------- /locust/cat_224x224.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sayakpaul/ml-deployment-k8s-fastapi/55b690520b943bf39dbb02a7f28fc1aba781e945/locust/cat_224x224.jpg -------------------------------------------------------------------------------- /locust/load_test.conf: -------------------------------------------------------------------------------- 1 | locustfile = locust.py 2 | headless = false 3 | users = 150 4 | spawn-rate = 1 5 | run-time = 5m 6 | host = http://<> 7 | html = reports/locust_report.html 8 | csv = reports/locust_report -------------------------------------------------------------------------------- /locust/locust.py: -------------------------------------------------------------------------------- 1 | from locust import HttpUser, constant, task 2 | 3 | 4 | class ImgClssificationUser(HttpUser): 5 | wait_time = constant(1) 6 | 7 | @task 8 | def predict(self): 9 | attach = open("cat_224x224.jpg", "rb") 10 | payload = {"with_resize": False, "with_post_process": False} 11 | _ = self.client.post( 12 | "/predict/image", files={"image_file": attach}, data=payload 13 | ) 14 | -------------------------------------------------------------------------------- /notebooks/TF_Serving.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": { 6 | "id": "6FvzT_HQXz9J" 7 | }, 8 | "source": [ 9 | "# Verifying workable TF Serving\n", 10 | "\n", 11 | "This tutorial shows:\n", 12 | "- how to run TF Serving for a custom model in Docker container\n", 13 | "- how to request for predictions via both gRPC and RestAPI calls\n", 14 | "- the prediction timing result from TF Serving\n", 15 | "\n", 16 | "This notebook is written by referencing the [official TF Serving gRPC example](https://github.com/tensorflow/serving/blob/master/tensorflow_serving/example/resnet_k8s.yaml) and [official TF Serving RestAPI example](https://www.tensorflow.org/tfx/tutorials/serving/rest_simple)." 17 | ] 18 | }, 19 | { 20 | "cell_type": "markdown", 21 | "metadata": { 22 | "id": "Com8Mcu2Xz9L" 23 | }, 24 | "source": [ 25 | "### Imports" 26 | ] 27 | }, 28 | { 29 | "cell_type": "code", 30 | "execution_count": null, 31 | "metadata": { 32 | "id": "b-aGIWy8c2Ht" 33 | }, 34 | "outputs": [], 35 | "source": [ 36 | "!pip install -q requests\n", 37 | "!pip install -q tensorflow-serving-api" 38 | ] 39 | }, 40 | { 41 | "cell_type": "code", 42 | "execution_count": 2, 43 | "metadata": { 44 | "id": "6lQVylcMXz9N" 45 | }, 46 | "outputs": [], 47 | "source": [ 48 | "import os\n", 49 | "import tempfile\n", 50 | "import pandas as pd\n", 51 | "import tensorflow as tf\n", 52 | "import numpy as np\n", 53 | "import json\n", 54 | "import requests\n", 55 | "\n", 56 | "# gRPC request specific imports\n", 57 | "import grpc\n", 58 | "from tensorflow_serving.apis import predict_pb2\n", 59 | "from tensorflow_serving.apis import prediction_service_pb2_grpc" 60 | ] 61 | }, 62 | { 63 | "cell_type": "markdown", 64 | "metadata": { 65 | "id": "GoIj2728pLyw" 66 | }, 67 | "source": [ 68 | "## Model" 69 | ] 70 | }, 71 | { 72 | "cell_type": "markdown", 73 | "metadata": { 74 | "id": "3xmYCIWpXz9N" 75 | }, 76 | "source": [ 77 | "### Get a sample model \n", 78 | "\n", 79 | "The target model is the plain `ResNet50` trained on ImageNet." 80 | ] 81 | }, 82 | { 83 | "cell_type": "code", 84 | "execution_count": 3, 85 | "metadata": { 86 | "colab": { 87 | "base_uri": "https://localhost:8080/" 88 | }, 89 | "id": "VysQtJQnXz9O", 90 | "outputId": "c63abf81-65e7-48c9-9e71-d16108da2d2a" 91 | }, 92 | "outputs": [ 93 | { 94 | "name": "stdout", 95 | "output_type": "stream", 96 | "text": [ 97 | "Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/resnet/resnet50_weights_tf_dim_ordering_tf_kernels.h5\n", 98 | "102973440/102967424 [==============================] - 2s 0us/step\n", 99 | "102981632/102967424 [==============================] - 2s 0us/step\n" 100 | ] 101 | } 102 | ], 103 | "source": [ 104 | "core = tf.keras.applications.ResNet50(include_top=True, input_shape=(224, 224, 3))\n", 105 | "\n", 106 | "inputs = tf.keras.layers.Input(shape=(224, 224, 3), name=\"image_input\")\n", 107 | "preprocess = tf.keras.applications.resnet50.preprocess_input(inputs)\n", 108 | "outputs = core(preprocess, training=False)\n", 109 | "model = tf.keras.Model(inputs=[inputs], outputs=[outputs])" 110 | ] 111 | }, 112 | { 113 | "cell_type": "markdown", 114 | "metadata": { 115 | "id": "p3bC--0GXz9O" 116 | }, 117 | "source": [ 118 | "### Save the model\n", 119 | "\n", 120 | "Below code saves the model under `MODEL_DIR`." 121 | ] 122 | }, 123 | { 124 | "cell_type": "code", 125 | "execution_count": 4, 126 | "metadata": { 127 | "colab": { 128 | "base_uri": "https://localhost:8080/" 129 | }, 130 | "id": "z9AmyovhXz9O", 131 | "outputId": "c26eadf0-e06e-45e4-a40d-e00b5343154e" 132 | }, 133 | "outputs": [ 134 | { 135 | "name": "stdout", 136 | "output_type": "stream", 137 | "text": [ 138 | "export_path = /tmp/1\n", 139 | "\n", 140 | "WARNING:tensorflow:Compiled the loaded model, but the compiled metrics have yet to be built. `model.compile_metrics` will be empty until you train or evaluate the model.\n", 141 | "INFO:tensorflow:Assets written to: /tmp/1/assets\n", 142 | "\n", 143 | "Saved model:\n", 144 | "total 4040\n", 145 | "drwxr-xr-x 2 root root 4096 Mar 23 07:32 assets\n", 146 | "-rw-r--r-- 1 root root 557217 Mar 23 07:32 keras_metadata.pb\n", 147 | "-rw-r--r-- 1 root root 3565545 Mar 23 07:32 saved_model.pb\n", 148 | "drwxr-xr-x 2 root root 4096 Mar 23 07:32 variables\n" 149 | ] 150 | } 151 | ], 152 | "source": [ 153 | "MODEL_DIR = tempfile.gettempdir()\n", 154 | "version = 1\n", 155 | "export_path = os.path.join(MODEL_DIR, str(version))\n", 156 | "print('export_path = {}\\n'.format(export_path))\n", 157 | "\n", 158 | "tf.keras.models.save_model(\n", 159 | " model,\n", 160 | " export_path,\n", 161 | " overwrite=True,\n", 162 | " include_optimizer=True,\n", 163 | " save_format=None,\n", 164 | " signatures=None,\n", 165 | " options=None\n", 166 | ")\n", 167 | "\n", 168 | "print('\\nSaved model:')\n", 169 | "!ls -l {export_path}" 170 | ] 171 | }, 172 | { 173 | "cell_type": "markdown", 174 | "metadata": { 175 | "id": "VV7onOD2Xz9P" 176 | }, 177 | "source": [ 178 | "### Examine your saved model" 179 | ] 180 | }, 181 | { 182 | "cell_type": "markdown", 183 | "metadata": { 184 | "id": "baanYnt8ohM7" 185 | }, 186 | "source": [ 187 | "TensorFlow comes with a handy `saved_model_cli` tool to investigate saved model.\n", 188 | "\n", 189 | "Notice from `signature_def['serving_default']:` \n", 190 | "- the input name is `image_input`\n", 191 | "- the output name is `resnet50`\n", 192 | "\n", 193 | "You need to know these to make requests to the TF Serving server later" 194 | ] 195 | }, 196 | { 197 | "cell_type": "code", 198 | "execution_count": 5, 199 | "metadata": { 200 | "colab": { 201 | "base_uri": "https://localhost:8080/" 202 | }, 203 | "id": "Lgzz06XoXz9Q", 204 | "outputId": "c51a85f9-c6bf-4e7e-f710-2a572fde45d6" 205 | }, 206 | "outputs": [ 207 | { 208 | "name": "stdout", 209 | "output_type": "stream", 210 | "text": [ 211 | "\n", 212 | "MetaGraphDef with tag-set: 'serve' contains the following SignatureDefs:\n", 213 | "\n", 214 | "signature_def['__saved_model_init_op']:\n", 215 | " The given SavedModel SignatureDef contains the following input(s):\n", 216 | " The given SavedModel SignatureDef contains the following output(s):\n", 217 | " outputs['__saved_model_init_op'] tensor_info:\n", 218 | " dtype: DT_INVALID\n", 219 | " shape: unknown_rank\n", 220 | " name: NoOp\n", 221 | " Method name is: \n", 222 | "\n", 223 | "signature_def['serving_default']:\n", 224 | " The given SavedModel SignatureDef contains the following input(s):\n", 225 | " inputs['image_input'] tensor_info:\n", 226 | " dtype: DT_FLOAT\n", 227 | " shape: (-1, 224, 224, 3)\n", 228 | " name: serving_default_image_input:0\n", 229 | " The given SavedModel SignatureDef contains the following output(s):\n", 230 | " outputs['resnet50'] tensor_info:\n", 231 | " dtype: DT_FLOAT\n", 232 | " shape: (-1, 1000)\n", 233 | " name: StatefulPartitionedCall:0\n", 234 | " Method name is: tensorflow/serving/predict\n", 235 | "\n", 236 | "Concrete Functions:\n", 237 | " Function Name: '__call__'\n", 238 | " Option #1\n", 239 | " Callable with:\n", 240 | " Argument #1\n", 241 | " inputs: TensorSpec(shape=(None, 224, 224, 3), dtype=tf.float32, name='inputs')\n", 242 | " Argument #2\n", 243 | " DType: bool\n", 244 | " Value: False\n", 245 | " Argument #3\n", 246 | " DType: NoneType\n", 247 | " Value: None\n", 248 | " Option #2\n", 249 | " Callable with:\n", 250 | " Argument #1\n", 251 | " image_input: TensorSpec(shape=(None, 224, 224, 3), dtype=tf.float32, name='image_input')\n", 252 | " Argument #2\n", 253 | " DType: bool\n", 254 | " Value: False\n", 255 | " Argument #3\n", 256 | " DType: NoneType\n", 257 | " Value: None\n", 258 | " Option #3\n", 259 | " Callable with:\n", 260 | " Argument #1\n", 261 | " inputs: TensorSpec(shape=(None, 224, 224, 3), dtype=tf.float32, name='inputs')\n", 262 | " Argument #2\n", 263 | " DType: bool\n", 264 | " Value: True\n", 265 | " Argument #3\n", 266 | " DType: NoneType\n", 267 | " Value: None\n", 268 | " Option #4\n", 269 | " Callable with:\n", 270 | " Argument #1\n", 271 | " image_input: TensorSpec(shape=(None, 224, 224, 3), dtype=tf.float32, name='image_input')\n", 272 | " Argument #2\n", 273 | " DType: bool\n", 274 | " Value: True\n", 275 | " Argument #3\n", 276 | " DType: NoneType\n", 277 | " Value: None\n", 278 | "\n", 279 | " Function Name: '_default_save_signature'\n", 280 | " Option #1\n", 281 | " Callable with:\n", 282 | " Argument #1\n", 283 | " image_input: TensorSpec(shape=(None, 224, 224, 3), dtype=tf.float32, name='image_input')\n", 284 | "\n", 285 | " Function Name: 'call_and_return_all_conditional_losses'\n", 286 | " Option #1\n", 287 | " Callable with:\n", 288 | " Argument #1\n", 289 | " image_input: TensorSpec(shape=(None, 224, 224, 3), dtype=tf.float32, name='image_input')\n", 290 | " Argument #2\n", 291 | " DType: bool\n", 292 | " Value: True\n", 293 | " Argument #3\n", 294 | " DType: NoneType\n", 295 | " Value: None\n", 296 | " Option #2\n", 297 | " Callable with:\n", 298 | " Argument #1\n", 299 | " image_input: TensorSpec(shape=(None, 224, 224, 3), dtype=tf.float32, name='image_input')\n", 300 | " Argument #2\n", 301 | " DType: bool\n", 302 | " Value: False\n", 303 | " Argument #3\n", 304 | " DType: NoneType\n", 305 | " Value: None\n", 306 | " Option #3\n", 307 | " Callable with:\n", 308 | " Argument #1\n", 309 | " inputs: TensorSpec(shape=(None, 224, 224, 3), dtype=tf.float32, name='inputs')\n", 310 | " Argument #2\n", 311 | " DType: bool\n", 312 | " Value: True\n", 313 | " Argument #3\n", 314 | " DType: NoneType\n", 315 | " Value: None\n", 316 | " Option #4\n", 317 | " Callable with:\n", 318 | " Argument #1\n", 319 | " inputs: TensorSpec(shape=(None, 224, 224, 3), dtype=tf.float32, name='inputs')\n", 320 | " Argument #2\n", 321 | " DType: bool\n", 322 | " Value: False\n", 323 | " Argument #3\n", 324 | " DType: NoneType\n", 325 | " Value: None\n" 326 | ] 327 | } 328 | ], 329 | "source": [ 330 | "!saved_model_cli show --dir {export_path} --all" 331 | ] 332 | }, 333 | { 334 | "cell_type": "markdown", 335 | "metadata": { 336 | "id": "5NBTdC7jXz9Q" 337 | }, 338 | "source": [ 339 | "## TF Serving" 340 | ] 341 | }, 342 | { 343 | "cell_type": "markdown", 344 | "metadata": { 345 | "id": "g4G_oWb_plDI" 346 | }, 347 | "source": [ 348 | "### Create dummy data\n", 349 | "\n", 350 | "The dummy data is nothing but just contains random numbers in the batch size of 32." 351 | ] 352 | }, 353 | { 354 | "cell_type": "code", 355 | "execution_count": 6, 356 | "metadata": { 357 | "colab": { 358 | "base_uri": "https://localhost:8080/" 359 | }, 360 | "id": "loMcfbfTXz9S", 361 | "outputId": "0b18aed6-af3b-4e06-b5ae-e336305b0d5e" 362 | }, 363 | "outputs": [ 364 | { 365 | "data": { 366 | "text/plain": [ 367 | "TensorShape([32, 224, 224, 3])" 368 | ] 369 | }, 370 | "execution_count": 6, 371 | "metadata": {}, 372 | "output_type": "execute_result" 373 | } 374 | ], 375 | "source": [ 376 | "dummy_inputs = tf.random.normal((32, 224, 224, 3))\n", 377 | "dummy_inputs.shape" 378 | ] 379 | }, 380 | { 381 | "cell_type": "markdown", 382 | "metadata": { 383 | "id": "ZBxKquAwpCEh" 384 | }, 385 | "source": [ 386 | "### Install TF Serving tool" 387 | ] 388 | }, 389 | { 390 | "cell_type": "code", 391 | "execution_count": null, 392 | "metadata": { 393 | "id": "mDVz8VnnXz9Q" 394 | }, 395 | "outputs": [], 396 | "source": [ 397 | "!echo \"deb http://storage.googleapis.com/tensorflow-serving-apt stable tensorflow-model-server tensorflow-model-server-universal\" | sudo tee /etc/apt/sources.list.d/tensorflow-serving.list && \\\n", 398 | "curl https://storage.googleapis.com/tensorflow-serving-apt/tensorflow-serving.release.pub.gpg | sudo apt-key add -\n", 399 | "!sudo apt update" 400 | ] 401 | }, 402 | { 403 | "cell_type": "code", 404 | "execution_count": null, 405 | "metadata": { 406 | "id": "u2HQb4q6sonS" 407 | }, 408 | "outputs": [], 409 | "source": [ 410 | "!sudo apt-get install tensorflow-model-server" 411 | ] 412 | }, 413 | { 414 | "cell_type": "markdown", 415 | "metadata": { 416 | "id": "jn9oNYM7pYcv" 417 | }, 418 | "source": [ 419 | "### Run TF Serving server" 420 | ] 421 | }, 422 | { 423 | "cell_type": "code", 424 | "execution_count": 26, 425 | "metadata": { 426 | "id": "MH-RScSvXz9R" 427 | }, 428 | "outputs": [], 429 | "source": [ 430 | "os.environ[\"MODEL_DIR\"] = MODEL_DIR" 431 | ] 432 | }, 433 | { 434 | "cell_type": "markdown", 435 | "metadata": {}, 436 | "source": [ 437 | "`saved_model_cli` CLI accepts a set of options.\n", 438 | "- `--rest_api_port` exposes additional port for RestAPI. By default `8500` is exposed as gRPC.\n", 439 | "- `--model_name` lets TF Serving to identify which model to access. You can visually see this in the RestAPI's URI.\n", 440 | "- `--enable_model_warmup` \n", 441 | " - The TensorFlow runtime has components that are lazily initialized, which can cause high latency for the first request/s sent to a model after it is loaded. To reduce the impact of lazy initialization on request latency, it's possible to trigger the initialization of the sub-systems and components at model load time by providing a sample set of inference requests along with the SavedModel. This process is known as \"warming up\" the model.\n", 442 | " - To trigger warmup of the model at load time, attach a warmup data file under the assets.extra subfolder of the SavedModel directory.\n", 443 | " - `--enable_model_warmup` option triggers this process.\n", 444 | " - for further information, please look at the [official document](https://www.tensorflow.org/tfx/serving/saved_model_warmup?hl=en)" 445 | ] 446 | }, 447 | { 448 | "cell_type": "code", 449 | "execution_count": null, 450 | "metadata": { 451 | "id": "Mq4t5ozVXz9R" 452 | }, 453 | "outputs": [], 454 | "source": [ 455 | "!nohup tensorflow_model_server \\\n", 456 | " --rest_api_port=8501 \\\n", 457 | " --model_name=resnet_model \\\n", 458 | " --model_base_path=$MODEL_DIR >server.log 2>&1 &\n", 459 | "\n", 460 | "# --enable_model_warmup for warmup(https://www.tensorflow.org/tfx/serving/saved_model_warmup)" 461 | ] 462 | }, 463 | { 464 | "cell_type": "code", 465 | "execution_count": 28, 466 | "metadata": { 467 | "id": "PVhTO53jXz9S" 468 | }, 469 | "outputs": [], 470 | "source": [ 471 | "!cat server.log" 472 | ] 473 | }, 474 | { 475 | "cell_type": "markdown", 476 | "metadata": { 477 | "id": "ea6V73oXzs3U" 478 | }, 479 | "source": [ 480 | "Notice that two ports are exposed for listening both RestAPI(`8501`) and gRPC(`8500`)." 481 | ] 482 | }, 483 | { 484 | "cell_type": "code", 485 | "execution_count": 29, 486 | "metadata": { 487 | "colab": { 488 | "base_uri": "https://localhost:8080/" 489 | }, 490 | "id": "KumZ3xB4giEa", 491 | "outputId": "4302ee0a-994f-485a-c99b-4fa394068d64" 492 | }, 493 | "outputs": [ 494 | { 495 | "name": "stdout", 496 | "output_type": "stream", 497 | "text": [ 498 | "node 7 root 21u IPv6 25789 0t0 TCP *:8080 (LISTEN)\n", 499 | "colab-fil 30 root 5u IPv4 26644 0t0 TCP *:3453 (LISTEN)\n", 500 | "colab-fil 30 root 6u IPv6 26645 0t0 TCP *:3453 (LISTEN)\n", 501 | "jupyter-n 43 root 6u IPv4 25864 0t0 TCP 172.28.0.2:9000 (LISTEN)\n", 502 | "python3 61 root 15u IPv4 27814 0t0 TCP 127.0.0.1:50215 (LISTEN)\n", 503 | "python3 61 root 18u IPv4 27818 0t0 TCP 127.0.0.1:54779 (LISTEN)\n", 504 | "python3 61 root 21u IPv4 27822 0t0 TCP 127.0.0.1:40395 (LISTEN)\n", 505 | "python3 61 root 24u IPv4 27826 0t0 TCP 127.0.0.1:60517 (LISTEN)\n", 506 | "python3 61 root 30u IPv4 27832 0t0 TCP 127.0.0.1:40255 (LISTEN)\n", 507 | "python3 61 root 43u IPv4 28831 0t0 TCP 127.0.0.1:53235 (LISTEN)\n", 508 | "python3 81 root 3u IPv4 29267 0t0 TCP 127.0.0.1:15144 (LISTEN)\n", 509 | "python3 81 root 5u IPv4 28223 0t0 TCP 127.0.0.1:42197 (LISTEN)\n", 510 | "python3 81 root 9u IPv4 28356 0t0 TCP 127.0.0.1:41627 (LISTEN)\n", 511 | "tensorflo 5933 root 5u IPv4 66554 0t0 TCP *:8500 (LISTEN)\n", 512 | "tensorflo 5933 root 12u IPv4 66559 0t0 TCP *:8501 (LISTEN)\n" 513 | ] 514 | } 515 | ], 516 | "source": [ 517 | "!sudo lsof -i -P -n | grep LISTEN" 518 | ] 519 | }, 520 | { 521 | "cell_type": "markdown", 522 | "metadata": { 523 | "id": "mvHRnTmppqn9" 524 | }, 525 | "source": [ 526 | "## RestAPI request" 527 | ] 528 | }, 529 | { 530 | "cell_type": "markdown", 531 | "metadata": { 532 | "id": "1QNMoU3qq2fN" 533 | }, 534 | "source": [ 535 | "### Convert dummy data in JSON format" 536 | ] 537 | }, 538 | { 539 | "cell_type": "code", 540 | "execution_count": 30, 541 | "metadata": { 542 | "colab": { 543 | "base_uri": "https://localhost:8080/" 544 | }, 545 | "id": "xDw6gT7lXz9S", 546 | "outputId": "19e82468-1441-4dae-c514-9d8b78f53240" 547 | }, 548 | "outputs": [ 549 | { 550 | "name": "stdout", 551 | "output_type": "stream", 552 | "text": [ 553 | "Data: {\"signature_name\": \"serving_default\", \"instances\": ... 442383, 0.8007770776748657, -0.7472004890441895]]]]}\n" 554 | ] 555 | } 556 | ], 557 | "source": [ 558 | "data = json.dumps({\"signature_name\": \"serving_default\", \"instances\": dummy_inputs.numpy().tolist()})\n", 559 | "print('Data: {} ... {}'.format(data[:50], data[len(data)-52:]))" 560 | ] 561 | }, 562 | { 563 | "cell_type": "markdown", 564 | "metadata": { 565 | "id": "hzlArynZq-dF" 566 | }, 567 | "source": [ 568 | "### Make a request" 569 | ] 570 | }, 571 | { 572 | "cell_type": "code", 573 | "execution_count": 31, 574 | "metadata": { 575 | "id": "hh6vmxqnXz9T" 576 | }, 577 | "outputs": [], 578 | "source": [ 579 | "headers = {\"content-type\": \"application/json\"}" 580 | ] 581 | }, 582 | { 583 | "cell_type": "code", 584 | "execution_count": 32, 585 | "metadata": { 586 | "colab": { 587 | "base_uri": "https://localhost:8080/" 588 | }, 589 | "id": "fS_DI5QpdZTg", 590 | "outputId": "6b93ca46-1047-4b76-bd84-d21f95f9f4e9" 591 | }, 592 | "outputs": [ 593 | { 594 | "name": "stdout", 595 | "output_type": "stream", 596 | "text": [ 597 | "1 loop, best of 5: 4.11 s per loop\n" 598 | ] 599 | } 600 | ], 601 | "source": [ 602 | "%%timeit\n", 603 | "json_response = requests.post('http://localhost:8501/v1/models/resnet_model:predict', \n", 604 | " data=data, headers=headers)" 605 | ] 606 | }, 607 | { 608 | "cell_type": "markdown", 609 | "metadata": { 610 | "id": "_n8urXddrJp0" 611 | }, 612 | "source": [ 613 | "### Interpret the output" 614 | ] 615 | }, 616 | { 617 | "cell_type": "code", 618 | "execution_count": 36, 619 | "metadata": { 620 | "colab": { 621 | "base_uri": "https://localhost:8080/" 622 | }, 623 | "id": "brI4TCETXz9T", 624 | "outputId": "7a098027-fb8a-4bfd-96dc-0bf74d26af09" 625 | }, 626 | "outputs": [ 627 | { 628 | "name": "stdout", 629 | "output_type": "stream", 630 | "text": [ 631 | "Prediction class: [664 664 664 664 664 664 664 664 664 664 664 664 664 664 664 664 664 664\n", 632 | " 664 664 664 664 664 664 664 851 664 664 851 664 664 664]\n" 633 | ] 634 | } 635 | ], 636 | "source": [ 637 | "json_response = requests.post('http://localhost:8501/v1/models/resnet_model:predict', \n", 638 | " data=data, headers=headers)\n", 639 | "rest_predictions = json.loads(json_response.text)['predictions']\n", 640 | "print('Prediction class: {}'.format(np.argmax(rest_predictions, axis=-1)))" 641 | ] 642 | }, 643 | { 644 | "cell_type": "markdown", 645 | "metadata": { 646 | "id": "lwuaqGVud58k" 647 | }, 648 | "source": [ 649 | "## gRPC request" 650 | ] 651 | }, 652 | { 653 | "cell_type": "markdown", 654 | "metadata": { 655 | "id": "yr7vO8BQrP2S" 656 | }, 657 | "source": [ 658 | "### Open up gRPC channel" 659 | ] 660 | }, 661 | { 662 | "cell_type": "code", 663 | "execution_count": 37, 664 | "metadata": { 665 | "id": "Y1cxieBDfyjK" 666 | }, 667 | "outputs": [], 668 | "source": [ 669 | "channel = grpc.insecure_channel('localhost:8500')\n", 670 | "stub = prediction_service_pb2_grpc.PredictionServiceStub(channel)" 671 | ] 672 | }, 673 | { 674 | "cell_type": "markdown", 675 | "metadata": { 676 | "id": "fS5b0VVfrTmF" 677 | }, 678 | "source": [ 679 | "### Prepare a request" 680 | ] 681 | }, 682 | { 683 | "cell_type": "code", 684 | "execution_count": 38, 685 | "metadata": { 686 | "id": "2QD8xK47emy5" 687 | }, 688 | "outputs": [], 689 | "source": [ 690 | "request = predict_pb2.PredictRequest()\n", 691 | "request.model_spec.name = 'resnet_model'\n", 692 | "request.model_spec.signature_name = 'serving_default'\n", 693 | "request.inputs['image_input'].CopyFrom(\n", 694 | " tf.make_tensor_proto(dummy_inputs)) #, shape=[32,224,224,3]))" 695 | ] 696 | }, 697 | { 698 | "cell_type": "markdown", 699 | "metadata": { 700 | "id": "7UztXjZGrYTf" 701 | }, 702 | "source": [ 703 | "### Make a request" 704 | ] 705 | }, 706 | { 707 | "cell_type": "code", 708 | "execution_count": 39, 709 | "metadata": { 710 | "colab": { 711 | "base_uri": "https://localhost:8080/" 712 | }, 713 | "id": "wvslcT5_f4P6", 714 | "outputId": "e4a1008a-dad5-4f8f-a0f6-f9b68aa7db67" 715 | }, 716 | "outputs": [ 717 | { 718 | "name": "stdout", 719 | "output_type": "stream", 720 | "text": [ 721 | "1 loop, best of 5: 3.63 s per loop\n" 722 | ] 723 | } 724 | ], 725 | "source": [ 726 | "%%timeit\n", 727 | "result = stub.Predict(request, 10.0) # 10 secs timeout" 728 | ] 729 | }, 730 | { 731 | "cell_type": "markdown", 732 | "metadata": { 733 | "id": "o52MnidprdCY" 734 | }, 735 | "source": [ 736 | "### Interpret the output" 737 | ] 738 | }, 739 | { 740 | "cell_type": "code", 741 | "execution_count": 40, 742 | "metadata": { 743 | "colab": { 744 | "base_uri": "https://localhost:8080/" 745 | }, 746 | "id": "TBfd4TG0f5z6", 747 | "outputId": "ffc40a16-787e-4146-9c1e-27317d10867f" 748 | }, 749 | "outputs": [ 750 | { 751 | "name": "stdout", 752 | "output_type": "stream", 753 | "text": [ 754 | "Prediction class: [664 664 664 664 664 664 664 664 664 664 664 664 664 664 664 664 664 664\n", 755 | " 664 664 664 664 664 664 664 851 664 664 851 664 664 664]\n" 756 | ] 757 | } 758 | ], 759 | "source": [ 760 | "grpc_predictions = stub.Predict(request, 10.0) # 10 secs timeout\n", 761 | "grpc_predictions = grpc_predictions.outputs['resnet50'].float_val\n", 762 | "grpc_predictions = np.array(grpc_predictions).reshape(32, -1)\n", 763 | "print('Prediction class: {}'.format(np.argmax(grpc_predictions, axis=-1)))" 764 | ] 765 | }, 766 | { 767 | "cell_type": "markdown", 768 | "metadata": { 769 | "id": "_dVHrF1ksyAc" 770 | }, 771 | "source": [ 772 | "## Compare the two results if they are identical\n", 773 | "\n", 774 | "`np.testing.assert_allclose` raises exception when the given two arrays do not match exactly." 775 | ] 776 | }, 777 | { 778 | "cell_type": "code", 779 | "execution_count": 41, 780 | "metadata": { 781 | "id": "UA4iKEcpioc8" 782 | }, 783 | "outputs": [], 784 | "source": [ 785 | "np.testing.assert_allclose(rest_predictions, grpc_predictions, atol=1e-4)" 786 | ] 787 | }, 788 | { 789 | "cell_type": "markdown", 790 | "metadata": { 791 | "id": "9-UcGFM0z65y" 792 | }, 793 | "source": [ 794 | "## Conclusion\n", 795 | "\n", 796 | "gRPC call took about 3.64 seconds while RestAPI call took about 4.11 seconds on the data of the batch size of 32. This let use conclude that gRPC call is much faster than RestAPI. \n", 797 | "\n", 798 | "Also note that this is very close performance comparing to the ONNX inference without any Server framework involved. That means we can expect TF Serving with gRPC should be faster than ONNX hosted on FastAPI server framework since FastAPI is a python framework while TF Serving is C++ implementation." 799 | ] 800 | }, 801 | { 802 | "cell_type": "code", 803 | "execution_count": null, 804 | "metadata": { 805 | "id": "DbLQp2Do0k6H" 806 | }, 807 | "outputs": [], 808 | "source": [] 809 | } 810 | ], 811 | "metadata": { 812 | "colab": { 813 | "collapsed_sections": [], 814 | "name": "TF_Serving.ipynb", 815 | "provenance": [], 816 | "toc_visible": true 817 | }, 818 | "interpreter": { 819 | "hash": "626869861cd3ed4fdbaf755d0ab61c53ee2a93056f2b69c4f7170d3cc24dc5ea" 820 | }, 821 | "kernelspec": { 822 | "display_name": "Python 3.8.12 ('.venv': venv)", 823 | "language": "python", 824 | "name": "python3" 825 | }, 826 | "language_info": { 827 | "codemirror_mode": { 828 | "name": "ipython", 829 | "version": 3 830 | }, 831 | "file_extension": ".py", 832 | "mimetype": "text/x-python", 833 | "name": "python", 834 | "nbconvert_exporter": "python", 835 | "pygments_lexer": "ipython3", 836 | "version": "3.8.12" 837 | }, 838 | "orig_nbformat": 4 839 | }, 840 | "nbformat": 4, 841 | "nbformat_minor": 0 842 | } 843 | -------------------------------------------------------------------------------- /notebooks/TF_to_ONNX.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": { 6 | "id": "view-in-github", 7 | "colab_type": "text" 8 | }, 9 | "source": [ 10 | "\"Open" 11 | ] 12 | }, 13 | { 14 | "cell_type": "markdown", 15 | "metadata": { 16 | "id": "ahaLOgxyzACW" 17 | }, 18 | "source": [ 19 | "# Convert `tf.keras` model to ONNX\n", 20 | "\n", 21 | "This tutorial shows:\n", 22 | "- how to convert tf.keras model to ONNX from the saved model file or the source code directly. \n", 23 | "- comparison of the execution time of the inference on CPU between tf.keras model and ONNX converted model." 24 | ] 25 | }, 26 | { 27 | "cell_type": "markdown", 28 | "metadata": { 29 | "id": "CmnzNRTkzaYq" 30 | }, 31 | "source": [ 32 | "## Install ONNX dependencies\n", 33 | "- `tf2onnx` provides a tool to convert TensorFlow model to ONNX\n", 34 | "- `onnxruntime` is used to run inference on a saved ONNX model." 35 | ] 36 | }, 37 | { 38 | "cell_type": "code", 39 | "execution_count": null, 40 | "metadata": { 41 | "id": "Y7VIFntKUh0R" 42 | }, 43 | "outputs": [], 44 | "source": [ 45 | "!pip install -Uqq tf2onnx\n", 46 | "!pip install -Uqq onnxruntime" 47 | ] 48 | }, 49 | { 50 | "cell_type": "markdown", 51 | "metadata": { 52 | "id": "D7TJluNyz8k0" 53 | }, 54 | "source": [ 55 | "### Imports" 56 | ] 57 | }, 58 | { 59 | "cell_type": "code", 60 | "execution_count": null, 61 | "metadata": { 62 | "id": "-UfszPPVf9P0" 63 | }, 64 | "outputs": [], 65 | "source": [ 66 | "import tf2onnx\n", 67 | "import pandas as pd\n", 68 | "import tensorflow as tf\n", 69 | "import numpy as np" 70 | ] 71 | }, 72 | { 73 | "cell_type": "markdown", 74 | "metadata": { 75 | "id": "_eo3f1Zn0S3F" 76 | }, 77 | "source": [ 78 | "### Get a sample model " 79 | ] 80 | }, 81 | { 82 | "cell_type": "code", 83 | "execution_count": null, 84 | "metadata": { 85 | "id": "3R81akF_hDEL" 86 | }, 87 | "outputs": [], 88 | "source": [ 89 | "core = tf.keras.applications.ResNet50(include_top=True, input_shape=(224, 224, 3))\n", 90 | "\n", 91 | "inputs = tf.keras.layers.Input(shape=(224, 224, 3), name=\"image_input\")\n", 92 | "preprocess = tf.keras.applications.resnet50.preprocess_input(inputs)\n", 93 | "outputs = core(preprocess, training=False)\n", 94 | "model = tf.keras.Model(inputs=[inputs], outputs=[outputs])" 95 | ] 96 | }, 97 | { 98 | "cell_type": "markdown", 99 | "source": [ 100 | "Note that we are including the preprocessing layer in the `model` object. This will allow us to load an image from disk and run the model directly without requiring any\n", 101 | "model-specific preprocessing. This reduces training/serving skew. " 102 | ], 103 | "metadata": { 104 | "id": "W3smmoIBCFOX" 105 | } 106 | }, 107 | { 108 | "cell_type": "markdown", 109 | "metadata": { 110 | "id": "MQg5cN910Z6q" 111 | }, 112 | "source": [ 113 | "## Convert to ONNX" 114 | ] 115 | }, 116 | { 117 | "cell_type": "code", 118 | "execution_count": null, 119 | "metadata": { 120 | "id": "3-friv_fMk79" 121 | }, 122 | "outputs": [], 123 | "source": [ 124 | "num_layers = len(model.layers)\n", 125 | "print(f'first layer name: {model.layers[0].name}')\n", 126 | "print(f'last layer name: {model.layers[num_layers-1].name}')" 127 | ] 128 | }, 129 | { 130 | "cell_type": "markdown", 131 | "metadata": { 132 | "id": "UBGQxHHz0dGP" 133 | }, 134 | "source": [ 135 | "### Conversion\n", 136 | "\n", 137 | "`opset` in `tf2onnx.convert.from_keras` is the ONNX Op version. You can find the full list which TensorFlow (TF) Ops are convertible to ONNX Ops [here](https://github.com/onnx/tensorflow-onnx/blob/master/support_status.md).\n", 138 | "\n", 139 | "There are two ways to convert TensorFlow model to ONNX:\n", 140 | "- `tf2onnx.convert.from_keras` to convert programatically\n", 141 | "- `tf2onnx.convert` CLI to convert a saved TensorFlow model" 142 | ] 143 | }, 144 | { 145 | "cell_type": "code", 146 | "execution_count": null, 147 | "metadata": { 148 | "id": "_MAEoy9j0QRQ" 149 | }, 150 | "outputs": [], 151 | "source": [ 152 | "import onnx\n", 153 | "\n", 154 | "input_signature = [tf.TensorSpec([None, 224, 224, 3], tf.float32, name='image_input')]\n", 155 | "onnx_model, _ = tf2onnx.convert.from_keras(model, input_signature, opset=15)\n", 156 | "onnx.save(onnx_model, \"resnet50_w_preprocessing.onnx\")\n", 157 | "\n", 158 | "# model.save('my_model')\n", 159 | "# !python -m tf2onnx.convert --saved-model my_model --output my_model.onnx" 160 | ] 161 | }, 162 | { 163 | "cell_type": "markdown", 164 | "metadata": { 165 | "id": "V2-aNpahQMVR" 166 | }, 167 | "source": [ 168 | "## Test TF vs ONNX model with dummy data" 169 | ] 170 | }, 171 | { 172 | "cell_type": "markdown", 173 | "metadata": { 174 | "id": "Zt5lsQoUQXOo" 175 | }, 176 | "source": [ 177 | "### Generate dummy data " 178 | ] 179 | }, 180 | { 181 | "cell_type": "code", 182 | "execution_count": null, 183 | "metadata": { 184 | "id": "ceqZH2KbPznx" 185 | }, 186 | "outputs": [], 187 | "source": [ 188 | "dummy_inputs = tf.random.normal((32, 224, 224, 3))" 189 | ] 190 | }, 191 | { 192 | "cell_type": "markdown", 193 | "metadata": { 194 | "id": "M8DR47zeQZHI" 195 | }, 196 | "source": [ 197 | "### Test original TF model with dummy data" 198 | ] 199 | }, 200 | { 201 | "cell_type": "code", 202 | "execution_count": null, 203 | "metadata": { 204 | "id": "zL8Lw9H8QbT7" 205 | }, 206 | "outputs": [], 207 | "source": [ 208 | "%%timeit\n", 209 | "model.predict(dummy_inputs)" 210 | ] 211 | }, 212 | { 213 | "cell_type": "code", 214 | "execution_count": null, 215 | "metadata": { 216 | "id": "smFa5VWjTNLb" 217 | }, 218 | "outputs": [], 219 | "source": [ 220 | "tf_preds = model.predict(dummy_inputs)" 221 | ] 222 | }, 223 | { 224 | "cell_type": "markdown", 225 | "metadata": { 226 | "id": "Lqhi458k0fkM" 227 | }, 228 | "source": [ 229 | "### Test converted ONNX model with dummy data\n", 230 | "\n", 231 | "If you want to inference with GPU, then you can do so by setting `providers=[\"CUDAExecutionProvider\"]` in `ort.InferenceSession`.\n", 232 | "\n", 233 | "The first parameter in `sess.run` is set to `None`, and that means all the outputs of the model will be retrieved. " 234 | ] 235 | }, 236 | { 237 | "cell_type": "code", 238 | "execution_count": null, 239 | "metadata": { 240 | "id": "1ELVBwrn0-Cf" 241 | }, 242 | "outputs": [], 243 | "source": [ 244 | "import onnxruntime as ort\n", 245 | "import numpy as np\n", 246 | "\n", 247 | "sess = ort.InferenceSession(\"resnet50_w_preprocessing.onnx\") # providers=[\"CUDAExecutionProvider\"])\n", 248 | "np_dummy_inputs = dummy_inputs.numpy()" 249 | ] 250 | }, 251 | { 252 | "cell_type": "code", 253 | "execution_count": null, 254 | "metadata": { 255 | "id": "jszhyR15SJaE" 256 | }, 257 | "outputs": [], 258 | "source": [ 259 | "%%timeit \n", 260 | "sess.run(None, {\"image_input\": np_dummy_inputs})" 261 | ] 262 | }, 263 | { 264 | "cell_type": "code", 265 | "execution_count": null, 266 | "metadata": { 267 | "id": "Ax6opk4ENmlK" 268 | }, 269 | "outputs": [], 270 | "source": [ 271 | "ort_preds = sess.run(None, {\"image_input\": np_dummy_inputs})" 272 | ] 273 | }, 274 | { 275 | "cell_type": "markdown", 276 | "source": [ 277 | "## Check if the TF and ONNX outputs match" 278 | ], 279 | "metadata": { 280 | "id": "jbrwQMDbBLps" 281 | } 282 | }, 283 | { 284 | "cell_type": "code", 285 | "source": [ 286 | "np.testing.assert_allclose(tf_preds, ort_preds[0], atol=1e-4)" 287 | ], 288 | "metadata": { 289 | "id": "um99Uu4FBPrY" 290 | }, 291 | "execution_count": null, 292 | "outputs": [] 293 | }, 294 | { 295 | "cell_type": "markdown", 296 | "metadata": { 297 | "id": "QPu6kdNnU8Y6" 298 | }, 299 | "source": [ 300 | "## Conclusion\n", 301 | "\n", 302 | "We did a simple experiments with dummy dataset of 32 batch size. The default behaviour of `timeit` is to measure the average of the cell execution time with 7 times of repeat ([`timeit`'s default behaviour](https://ipython.readthedocs.io/en/stable/interactive/magics.html#magic-timeit)).\n", 303 | "\n", 304 | "\n", 305 | "The ONNX model will likely always have a better inference latency than the TF model if you are using a CPU server for inference." 306 | ] 307 | } 308 | ], 309 | "metadata": { 310 | "colab": { 311 | "collapsed_sections": [], 312 | "name": "TF to ONNX.ipynb", 313 | "provenance": [], 314 | "include_colab_link": true 315 | }, 316 | "kernelspec": { 317 | "display_name": "Python 3", 318 | "name": "python3" 319 | }, 320 | "language_info": { 321 | "name": "python", 322 | "version": "3.8.10" 323 | } 324 | }, 325 | "nbformat": 4, 326 | "nbformat_minor": 0 327 | } --------------------------------------------------------------------------------