├── .gitignore ├── Dockerfile ├── LICENSE ├── Makefile ├── PROJECT ├── README-zh_CN.md ├── README.md ├── assets ├── crd.yaml ├── elasticserving_operator.yaml └── sample_service.yaml ├── config ├── certmanager │ ├── certificate.yaml │ ├── kustomization.yaml │ └── kustomizeconfig.yaml ├── crd │ ├── bases │ │ └── elasticserving.paddlepaddle.org_paddleservices.yaml │ ├── kustomization.yaml │ ├── kustomizeconfig.yaml │ └── patches │ │ ├── cainjection_in_paddles.yaml │ │ └── webhook_in_paddles.yaml ├── default │ ├── kustomization.yaml │ ├── manager_auth_proxy_patch.yaml │ ├── manager_webhook_patch.yaml │ └── webhookcainjection_patch.yaml ├── manager │ ├── kustomization.yaml │ └── manager.yaml ├── prometheus │ ├── kustomization.yaml │ └── monitor.yaml ├── rbac │ ├── auth_proxy_client_clusterrole.yaml │ ├── auth_proxy_role.yaml │ ├── auth_proxy_role_binding.yaml │ ├── auth_proxy_service.yaml │ ├── kustomization.yaml │ ├── leader_election_role.yaml │ ├── leader_election_role_binding.yaml │ ├── paddle_editor_role.yaml │ ├── paddle_viewer_role.yaml │ ├── role.yaml │ └── role_binding.yaml ├── samples │ ├── elasticserving_v1_paddle.yaml │ └── kustomization.yaml ├── serving │ ├── kustomization.yaml │ └── namespace.yaml └── webhook │ ├── kustomization.yaml │ ├── kustomizeconfig.yaml │ └── service.yaml ├── docs ├── api-doc-gen │ └── gen_api_doc.sh ├── en │ ├── api_doc.md │ └── examples │ │ ├── bert.md │ │ ├── criteo_ctr.md │ │ ├── lac.md │ │ └── wide_deep.md └── zh_CN │ └── examples │ ├── bert.md │ ├── criteo_ctr.md │ ├── lac.md │ └── wide_deep.md ├── go.mod ├── go.sum ├── hack ├── boilerplate.go.txt ├── install_knative.sh └── install_knative_kourier.sh ├── main.go └── pkg ├── apis ├── doc.go └── elasticserving │ └── v1 │ ├── doc.go │ ├── groupversion_info.go │ ├── paddleservice_status.go │ ├── paddleservice_types.go │ └── zz_generated.deepcopy.go ├── constants └── constants.go └── controllers ├── doc.go └── elasticserving ├── paddleflow_serving_controller.go ├── reconcilers └── knative │ └── service_reconciler.go ├── resources └── knative │ ├── service.go │ └── service_test.go └── suite_test.go /.gitignore: -------------------------------------------------------------------------------- 1 | 2 | # Binaries for programs and plugins 3 | *.exe 4 | *.exe~ 5 | *.dll 6 | *.so 7 | *.dylib 8 | bin 9 | docs/api-doc-gen 10 | 11 | # Test binary, build with `go test -c` 12 | *.test 13 | 14 | # Output of the go coverage tool, specifically when used with LiteIDE 15 | *.out 16 | 17 | # Kubernetes Generated files - skip generated files, except for vendored files 18 | 19 | !vendor/**/zz_generated.* 20 | 21 | # editor and IDE paraphernalia 22 | .idea 23 | *.swp 24 | *.swo 25 | *~ 26 | 27 | .DS_Store 28 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | # Build the manager binary 2 | FROM golang:1.13 as builder 3 | 4 | WORKDIR /workspace 5 | # Copy the Go Modules manifests 6 | COPY go.mod go.mod 7 | COPY go.sum go.sum 8 | # cache deps before building and copying source so that we don't need to re-download as much 9 | # and so that source changes don't invalidate our downloaded layer 10 | ENV GOPROXY="https://goproxy.cn,https://goproxy.cn,direct" 11 | RUN go mod download 12 | 13 | # Copy the go source 14 | COPY main.go main.go 15 | COPY pkg/ pkg/ 16 | 17 | # Build 18 | RUN CGO_ENABLED=0 GOOS=linux GOARCH=amd64 GO111MODULE=on go build -a -o manager main.go 19 | 20 | # Use distroless as minimal base image to package the manager binary 21 | # Refer to https://github.com/GoogleContainerTools/distroless for more details 22 | FROM bitnami/minideb:stretch 23 | WORKDIR / 24 | COPY --from=builder /workspace/manager . 25 | 26 | ENTRYPOINT ["/manager"] 27 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | 2 | # Image URL to use all building/pushing image targets 3 | IMG ?= registry.baidubce.com/paddleflow-public/paddleservice-controller:latest 4 | # Produce CRDs that work back to Kubernetes 1.11 (no version conversion) 5 | CRD_OPTIONS ?= "crd:maxDescLen=0" 6 | 7 | # Get the currently used golang install path (in GOPATH/bin, unless GOBIN is set) 8 | ifeq (,$(shell go env GOBIN)) 9 | GOBIN=$(shell go env GOPATH)/bin 10 | else 11 | GOBIN=$(shell go env GOBIN) 12 | endif 13 | 14 | all: manager 15 | 16 | # Run tests 17 | test: generate fmt vet manifests 18 | go test ./... -coverprofile cover.out 19 | 20 | # Build manager binary 21 | manager: generate fmt vet 22 | go build -o bin/manager main.go 23 | 24 | # Run against the configured Kubernetes cluster in ~/.kube/config 25 | run: generate fmt vet manifests 26 | go run ./main.go 27 | 28 | # Install CRDs into a cluster 29 | install: assets 30 | kubectl apply -f assets/crd.yaml --validate=false 31 | 32 | # Uninstall CRDs from a cluster 33 | uninstall: 34 | kubectl delete -f assets/crd.yaml 35 | 36 | # Deploy controller in the configured Kubernetes cluster in ~/.kube/config 37 | deploy: assets 38 | kubectl apply -f assets/elasticserving_operator.yaml 39 | 40 | undeploy: 41 | kubectl delete -f assets/elasticserving_operator.yaml 42 | 43 | assets: manifests 44 | cd config/manager && kustomize edit set image controller=${IMG} 45 | kustomize build config/crd -o assets/crd.yaml 46 | kustomize build config/default -o assets/elasticserving_operator.yaml 47 | kustomize build config/serving -o assets/sample_service.yaml 48 | 49 | # Generate manifests e.g. CRD, RBAC etc. 50 | manifests: controller-gen 51 | $(CONTROLLER_GEN) $(CRD_OPTIONS) rbac:roleName=manager-role webhook paths="./..." output:crd:artifacts:config=config/crd/bases 52 | 53 | # Run go fmt against code 54 | fmt: 55 | go fmt ./... 56 | 57 | # Run go vet against code 58 | vet: 59 | go vet ./... 60 | 61 | # Generate code 62 | generate: controller-gen 63 | $(CONTROLLER_GEN) object:headerFile="hack/boilerplate.go.txt" paths="./..." 64 | 65 | # Build the docker image 66 | docker-build: test 67 | docker build . -t ${IMG} 68 | 69 | # Push the docker image 70 | docker-push: 71 | docker push ${IMG} 72 | 73 | update-api-doc: 74 | bash docs/api-doc-gen/gen_api_doc.sh 75 | 76 | # find or download controller-gen 77 | # download controller-gen if necessary 78 | controller-gen: 79 | ifeq (, $(shell which controller-gen)) 80 | @{ \ 81 | set -e ;\ 82 | CONTROLLER_GEN_TMP_DIR=$$(mktemp -d) ;\ 83 | cd $$CONTROLLER_GEN_TMP_DIR ;\ 84 | go mod init tmp ;\ 85 | go get sigs.k8s.io/controller-tools/cmd/controller-gen@5c0c6ae3b64bccf89fb16353880376d3ce9d9128;\ 86 | rm -rf $$CONTROLLER_GEN_TMP_DIR ;\ 87 | } 88 | CONTROLLER_GEN=$(GOBIN)/controller-gen 89 | else 90 | CONTROLLER_GEN=$(shell which controller-gen) 91 | endif 92 | -------------------------------------------------------------------------------- /PROJECT: -------------------------------------------------------------------------------- 1 | domain: paddlepaddle.org 2 | multigroup: true 3 | repo: ElasticServing 4 | resources: 5 | - group: elasticserving 6 | kind: PaddleService 7 | version: v1 8 | version: "2" 9 | -------------------------------------------------------------------------------- /README-zh_CN.md: -------------------------------------------------------------------------------- 1 | # ElasticServing 2 | 3 | [English](./README.md) | 简体中文 4 | 5 | ElasticServing 通过提供自定义资源 PaddleService,支持用户在 Kubernetes 集群上使用 TensorFlow、onnx、PaddlePaddle 等主流框架部署模型服务。 6 | ElasticServing 构建在 [Knative Serving](https://github.com/knative/serving) 之上,其提供了自动扩缩容、容错、健康检查等功能,并且支持在异构硬件上部署服务,如 Nvidia GPU 或 昆仑芯片。 7 | ElasticServing 采用的是 serverless 架构,当没有预估请求时,服务规模可以缩容到零,以节约集群资源,同时它还支持并蓝绿发版等功能。 8 | 9 | ## 快速开始 10 | 11 | 本示例使用的模型服务镜像基于 [Paddle Serving CPU 版](https://github.com/PaddlePaddle/Serving/blob/v0.6.0/README_CN.md) 构建而成. 12 | 13 | 跟多详情信息请查看 [Resnet50](https://github.com/PaddlePaddle/Serving/tree/v0.6.0/python/examples/imagenet) 和 [中文分词模型](https://github.com/PaddlePaddle/Serving#-pre-built-services-with-paddle-serving). 14 | 15 | ### 前提条件 16 | - Kubernetes >= 1.18 17 | - 安装 Knative Serving 依赖的网络插件 18 | 请查考 [安装指南](https://knative.dev/v0.21-docs/install/any-kubernetes-cluster/#installing-the-serving-component) 或者执行脚本: `hack/install_knative.sh`(knative serving v0.21 with istio) / `hack/install_knative_kourier.sh`(knative serving v0.22 with kourier). 19 | 20 | ### 安装 21 | 22 | ```bash 23 | # 下载 ElasticServing 24 | git clone https://github.com/PaddleFlow/ElasticServing.git 25 | cd ElasticServing 26 | 27 | # 安装 CRD 28 | kubectl apply -f assets/crd.yaml 29 | 30 | # 安装自定义 Controller 31 | kubectl apply -f assets/elasticserving_operator.yaml 32 | ``` 33 | 34 | ### 使用示例 35 | 36 | ```bash 37 | # 部署 paddle service 38 | kubectl apply -f assets/sample_service.yaml 39 | ``` 40 | 41 | #### 检查服务状态 42 | 43 | ```bash 44 | # 查看命名空间 paddleservice-system 下的 Service 45 | kubectl get svc -n paddleservice-system 46 | 47 | # 查看命名空间 paddleservice-system 下的 knative service 48 | kubectl get ksvc -n paddleservice-system 49 | 50 | # 查看命名空间 paddleservice-system 下的 pod 51 | kubectl get pods -n paddleservice-system 52 | 53 | # 查看 Paddle Service Pod 的日志信息 54 | kubectl logs -n paddleservice-system -c paddleserving 55 | 56 | ``` 57 | 58 | 本示例使用 Istio 插件作为 Knative Serving 的网络方案,您也可以使用其他的网络插件比如:Kourier 和 Ambassador。 59 | 60 | ```bash 61 | # Find the public IP address of the gateway (make a note of the EXTERNAL-IP field in the output) 62 | kubectl get service istio-ingressgateway --namespace=istio-system 63 | # If the EXTERNAL-IP is pending, get the ip with the following command 64 | kubectl get po -l istio=ingressgateway -n istio-system -o jsonpath='{.items[0].status.hostIP}' 65 | # If you are using minikube, the public IP address of the gateway will be listed once you execute the following command (There will exist four URLs and maybe choose the second one) 66 | minikube service --url istio-ingressgateway -n istio-system 67 | 68 | # Get the port of the gateway 69 | kubectl -n istio-system get service istio-ingressgateway -o jsonpath='{.spec.ports[?(@.name=="http2")].nodePort}' 70 | 71 | # Find the URL of the application. The expected result may be http://paddleservice-sample.paddleservice-system.example.com 72 | kubectl get ksvc paddle-sample-service -n paddleservice-system 73 | ``` 74 | 75 | #### Resnet_50_vd 示例 76 | 编写 `sample_service.yaml` 如下: 77 | 78 | ```yaml 79 | apiVersion: v1 80 | kind: Namespace 81 | metadata: 82 | labels: 83 | istio-injection: enabled 84 | name: paddleservice-system 85 | --- 86 | apiVersion: elasticserving.paddlepaddle.org/v1 87 | kind: PaddleService 88 | metadata: 89 | name: paddleservice-sample 90 | namespace: paddleservice-system 91 | spec: 92 | canary: 93 | arg: cd Serving/python/examples/imagenet && python3 resnet50_web_service_canary.py 94 | ResNet50_vd_model cpu 9292 95 | containerImage: jinmionhaobaidu/resnetcanary 96 | port: 9292 97 | tag: latest 98 | canaryTrafficPercent: 50 99 | default: 100 | arg: cd Serving/python/examples/imagenet && python3 resnet50_web_service.py ResNet50_vd_model 101 | cpu 9292 102 | containerImage: jinmionhaobaidu/resnet 103 | port: 9292 104 | tag: latest 105 | runtimeVersion: paddleserving 106 | service: 107 | minScale: 0 108 | window: 10s 109 | ``` 110 | 111 | ```bash 112 | # Start to send data to the server. is what has been got in the first or the second command. 113 | curl -H "host:paddleservice-sample.paddleservice-system.example.com" -H "Content-Type:application/json" -X POST -d '{"feed":[{"image": "https://paddle-serving.bj.bcebos.com/imagenet-example/daisy.jpg"}], "fetch": ["score"]}' http://:/image/prediction 114 | ``` 115 | 116 | ##### 输出的结果 117 | ``` 118 | # 期望的输出结果如下 119 | 120 | default: 121 | {"result":{"label":["daisy"],"prob":[0.9341399073600769]}} 122 | 123 | canary: 124 | {"result":{"isCanary":["true"],"label":["daisy"],"prob":[0.9341399073600769]}} 125 | ``` 126 | 127 | ### 创建你自己的 PaddleService 128 | 129 | 安装好 CRD ```kubectl apply -f assets/crd.yaml``` 和 Controller ```kubectl apply -f assets/elasticserving_operator.yaml``` 后, 您可以通过编写如下所示的 Yaml 文件来创建 PaddleService。 130 | 131 | example.yaml 132 | 133 | ```yaml 134 | apiVersion: v1 135 | kind: Namespace 136 | metadata: 137 | labels: 138 | istio-injection: enabled 139 | name: paddleservice-system 140 | --- 141 | apiVersion: elasticserving.paddlepaddle.org/v1 142 | kind: PaddleService 143 | metadata: 144 | name: paddleservice-sample 145 | namespace: paddleservice-system 146 | spec: 147 | canary: 148 | arg: python3 Serving/python/examples/lac/lac_web_service_canary.py lac_model/ 149 | lac_workdir 9292 150 | containerImage: jinmionhaobaidu/pdservinglaccanary 151 | port: 9292 152 | tag: latest 153 | canaryTrafficPercent: 50 154 | default: 155 | arg: python3 Serving/python/examples/lac/lac_web_service.py lac_model/ lac_workdir 156 | 9292 157 | containerImage: jinmionhaobaidu/pdservinglac 158 | port: 9292 159 | tag: latest 160 | runtimeVersion: paddleserving 161 | service: 162 | minScale: 0 163 | maxScale: 0 164 | autoscaler: "kpa" 165 | metric: "concurrency" # scaling metric 166 | window: "60s" 167 | panicWindow: 10 # percentage of stable window 168 | target: 100 169 | targetUtilization: 70 170 | ``` 171 | 172 | 注意:上述 Yaml 文件 Spec 部分只有 `default` 是必填的字段,其他字段可以是为空。如果您自己的 paddleservice 不需要字段 `canary` 和 `canaryTrafficPercent`,可以不填。 173 | 174 | 175 | 执行如下命令来创建 PaddleService 176 | 177 | ```bash 178 | kubectl apply -f /dir/to/this/yaml/example.yaml 179 | ``` 180 | 181 | ## 更多示例 182 | 183 | - [BERT](./docs/zh_CN/examples/bert.md): 语义理解预测服务 184 | - [LAC](./docs/zh_CN/examples/lac.md): 中文分词模型 185 | - [Criteo Ctr](./docs/zh_CN/examples/criteo_ctr.md):CTR预估服务 186 | - [Wide & Deep](./docs/zh_CN/examples/wide_deep.md): Wide & Deep Pipeline 187 | 188 | ## 更多信息 189 | 190 | 关于更多自定义资源 PaddleService 的信息,请查看 [API docs](./docs/en/api_doc.md) 文档。 191 | 192 | ## License 193 | 194 | 该开源项目遵循 [Apache-2.0 license](https://github.com/PaddleFlow/ElasticServing/blob/main/LICENSE) 协议. 195 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # ElasticServing 2 | 3 | English | [简体中文](./README-zh_CN.md) 4 | 5 | ElasticServing provides a Kubernetes custom resource definition (CRD) for serving machine learning (ML) models on mainstream framework such as tensorflow, onnx, paddle. It encapsulates the complexity of auto scaling, fault tolerant, health check and use kustomize for configuration reconcile. It also natively support heterogeneous hardware like nvidia GPU or KunLun chip. With ElasticServing it's easy to scaling to zero and do the canary launch for ML deployment. 6 | 7 | ## Quick Start 8 | 9 | The image used in our sample service is based on [Paddle Serving Image for CPU](https://github.com/PaddlePaddle/Serving#installation). 10 | 11 | The sample used here is [Resnet50 in ImageNet](https://github.com/PaddlePaddle/Serving/tree/v0.6.0/python/examples/imagenet) and [Chinese Word Segmentation](https://github.com/PaddlePaddle/Serving#-pre-built-services-with-paddle-serving). The preparation work is done based on the entrypoint of docker. This can be modified in `arg`. 12 | 13 | ### Prerequisites 14 | - Kubernetes >= 1.18 15 | - Knative Serving with networking layer Installed. 16 | You can refer to the [installation guide](https://knative.dev/v0.21-docs/install/any-kubernetes-cluster/#installing-the-serving-component) or run `hack/install_knative.sh`(knative serving v0.21 with istio) / `hack/install_knative_kourier.sh`(knative serving v0.22 with kourier). 17 | 18 | ### Installation 19 | 20 | ```bash 21 | # Download ElasticServing 22 | git clone https://github.com/PaddleFlow/ElasticServing.git 23 | cd ElasticServing 24 | 25 | # Install elastic serving CRD 26 | kubectl apply -f assets/crd.yaml 27 | 28 | # Install elastic serving controller manager 29 | kubectl apply -f assets/elasticserving_operator.yaml 30 | ``` 31 | 32 | ### Run Sample 33 | 34 | ```bash 35 | # Deploy paddle service 36 | kubectl apply -f assets/sample_service.yaml 37 | ``` 38 | 39 | #### Sample Service Test 40 | 41 | ```bash 42 | # Check service in namespace paddleservice-system 43 | kubectl get svc -n paddleservice-system 44 | 45 | # Check knative service in namespace paddleservice-system 46 | kubectl get ksvc -n paddleservice-system 47 | 48 | # Check pods in namespace paddleservice-system 49 | kubectl get pods -n paddleservice-system 50 | 51 | # Check if the preparation work has been finished 52 | kubectl logs -n paddleservice-system -c paddleserving 53 | ``` 54 | 55 | We use Istio as the networking layer for Knative serving. It's also fine for users to use others, i.e, Kourier, Contour and Ambassador. 56 | 57 | ```bash 58 | # Find the public IP address of the gateway (make a note of the EXTERNAL-IP field in the output) 59 | kubectl get service istio-ingressgateway --namespace=istio-system 60 | # If the EXTERNAL-IP is pending, get the ip with the following command 61 | kubectl get po -l istio=ingressgateway -n istio-system -o jsonpath='{.items[0].status.hostIP}' 62 | # If you are using minikube, the public IP address of the gateway will be listed once you execute the following command (There will exist four URLs and maybe choose the second one) 63 | minikube service --url istio-ingressgateway -n istio-system 64 | 65 | # Get the port of the gateway 66 | kubectl -n istio-system get service istio-ingressgateway -o jsonpath='{.spec.ports[?(@.name=="http2")].nodePort}' 67 | 68 | # Find the URL of the application. The expected result may be http://paddleservice-sample.paddleservice-system.example.com 69 | kubectl get ksvc paddle-sample-service -n paddleservice-system 70 | ``` 71 | 72 | #### Resnet_50_vd sample 73 | The related `sample_service.yaml` is as follows: 74 | ```yaml 75 | apiVersion: v1 76 | kind: Namespace 77 | metadata: 78 | labels: 79 | istio-injection: enabled 80 | name: paddleservice-system 81 | --- 82 | apiVersion: elasticserving.paddlepaddle.org/v1 83 | kind: PaddleService 84 | metadata: 85 | name: paddleservice-sample 86 | namespace: paddleservice-system 87 | spec: 88 | canary: 89 | arg: cd Serving/python/examples/imagenet && python3 resnet50_web_service_canary.py 90 | ResNet50_vd_model cpu 9292 91 | containerImage: jinmionhaobaidu/resnetcanary 92 | port: 9292 93 | tag: latest 94 | canaryTrafficPercent: 50 95 | default: 96 | arg: cd Serving/python/examples/imagenet && python3 resnet50_web_service.py ResNet50_vd_model 97 | cpu 9292 98 | containerImage: jinmionhaobaidu/resnet 99 | port: 9292 100 | tag: latest 101 | runtimeVersion: paddleserving 102 | service: 103 | minScale: 0 104 | window: 10s 105 | ``` 106 | ```bash 107 | # Start to send data to the server. is what has been got in the first or the second command. 108 | curl -H "host:paddleservice-sample.paddleservice-system.example.com" -H "Content-Type:application/json" -X POST -d '{"feed":[{"image": "https://paddle-serving.bj.bcebos.com/imagenet-example/daisy.jpg"}], "fetch": ["score"]}' http://:/image/prediction 109 | ``` 110 | 111 | ##### Expected Result 112 | ``` 113 | # The expected output should be 114 | 115 | default: 116 | {"result":{"label":["daisy"],"prob":[0.9341399073600769]}} 117 | 118 | canary: 119 | {"result":{"isCanary":["true"],"label":["daisy"],"prob":[0.9341399073600769]}} 120 | ``` 121 | 122 | ### Create your own PaddleService 123 | 124 | After insntalling CRD ```kubectl apply -f assets/crd.yaml``` and controller manager ```kubectl apply -f assets/elasticserving_operator.yaml```, you can build your own PaddleService by applying your yaml which looks like the following one. 125 | 126 | example.yaml 127 | 128 | ```yaml 129 | apiVersion: v1 130 | kind: Namespace 131 | metadata: 132 | labels: 133 | istio-injection: enabled 134 | name: paddleservice-system 135 | --- 136 | apiVersion: elasticserving.paddlepaddle.org/v1 137 | kind: PaddleService 138 | metadata: 139 | name: paddleservice-sample 140 | namespace: paddleservice-system 141 | spec: 142 | canary: 143 | arg: python3 Serving/python/examples/lac/lac_web_service_canary.py lac_model/ 144 | lac_workdir 9292 145 | containerImage: jinmionhaobaidu/pdservinglaccanary 146 | port: 9292 147 | tag: latest 148 | canaryTrafficPercent: 50 149 | default: 150 | arg: python3 Serving/python/examples/lac/lac_web_service.py lac_model/ lac_workdir 151 | 9292 152 | containerImage: jinmionhaobaidu/pdservinglac 153 | port: 9292 154 | tag: latest 155 | runtimeVersion: paddleserving 156 | service: 157 | minScale: 0 158 | maxScale: 0 159 | autoscaler: "kpa" 160 | metric: "concurrency" # scaling metric 161 | window: "60s" 162 | panicWindow: 10 # percentage of stable window 163 | target: 100 164 | targetUtilization: 70 165 | ``` 166 | 167 | Please note that only the field `default` is required. Other fields can be empty and default value will be set. Field `canary` and `canaryTrafficPercent` are not required if your own paddleservice doesn't need them. 168 | 169 | Execute the following command: 170 | 171 | ```bash 172 | kubectl apply -f /dir/to/this/yaml/example.yaml 173 | ``` 174 | 175 | ## More Examples 176 | 177 | - [BERT](./docs/en/examples/bert.md): Semantic Understanding Prediction 178 | - [LAC](./docs/en/examples/lac.md): Chinese Word Segmentation 179 | - [Criteo Ctr](./docs/en/examples/criteo_ctr.md): CTR Prediction Service 180 | - [Wide & Deep](./docs/en/examples/wide_deep.md): Wide & Deep Pipeline 181 | 182 | ## More Information 183 | 184 | Please refer to the [API docs](./docs/en/api_doc.md) for more information about custom resource definition. 185 | 186 | ## License 187 | 188 | This project is under the [Apache-2.0 license](https://github.com/PaddleFlow/ElasticServing/blob/main/LICENSE). 189 | -------------------------------------------------------------------------------- /assets/crd.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: apiextensions.k8s.io/v1beta1 2 | kind: CustomResourceDefinition 3 | metadata: 4 | annotations: 5 | controller-gen.kubebuilder.io/version: v0.3.1-0.20200528125929-5c0c6ae3b64b 6 | creationTimestamp: null 7 | name: paddleservices.elasticserving.paddlepaddle.org 8 | spec: 9 | group: elasticserving.paddlepaddle.org 10 | names: 11 | kind: PaddleService 12 | listKind: PaddleServiceList 13 | plural: paddleservices 14 | singular: paddleservice 15 | scope: Namespaced 16 | subresources: 17 | scale: 18 | specReplicasPath: .spec.replicas 19 | statusReplicasPath: .status.replicas 20 | status: {} 21 | validation: 22 | openAPIV3Schema: 23 | properties: 24 | apiVersion: 25 | type: string 26 | kind: 27 | type: string 28 | metadata: 29 | type: object 30 | spec: 31 | properties: 32 | canary: 33 | properties: 34 | arg: 35 | type: string 36 | containerImage: 37 | type: string 38 | port: 39 | format: int32 40 | type: integer 41 | tag: 42 | type: string 43 | required: 44 | - containerImage 45 | - port 46 | - tag 47 | type: object 48 | canaryTrafficPercent: 49 | type: integer 50 | default: 51 | properties: 52 | arg: 53 | type: string 54 | containerImage: 55 | type: string 56 | port: 57 | format: int32 58 | type: integer 59 | tag: 60 | type: string 61 | required: 62 | - containerImage 63 | - port 64 | - tag 65 | type: object 66 | resources: 67 | properties: 68 | limits: 69 | additionalProperties: 70 | anyOf: 71 | - type: integer 72 | - type: string 73 | pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ 74 | x-kubernetes-int-or-string: true 75 | type: object 76 | requests: 77 | additionalProperties: 78 | anyOf: 79 | - type: integer 80 | - type: string 81 | pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ 82 | x-kubernetes-int-or-string: true 83 | type: object 84 | type: object 85 | runtimeVersion: 86 | type: string 87 | service: 88 | properties: 89 | autoscaler: 90 | enum: 91 | - kpa.autoscaling.knative.dev 92 | - hpa.autoscaling.knative.dev 93 | type: string 94 | maxScale: 95 | type: integer 96 | metric: 97 | enum: 98 | - concurrency 99 | - rps 100 | - cpu 101 | type: string 102 | minScale: 103 | type: integer 104 | panicThreshold: 105 | type: string 106 | panicWindow: 107 | type: string 108 | target: 109 | type: integer 110 | targetUtilization: 111 | type: string 112 | window: 113 | type: string 114 | type: object 115 | volumeMounts: 116 | items: 117 | properties: 118 | mountPath: 119 | type: string 120 | mountPropagation: 121 | type: string 122 | name: 123 | type: string 124 | readOnly: 125 | type: boolean 126 | subPath: 127 | type: string 128 | subPathExpr: 129 | type: string 130 | required: 131 | - mountPath 132 | - name 133 | type: object 134 | type: array 135 | volumes: 136 | items: 137 | properties: 138 | awsElasticBlockStore: 139 | properties: 140 | fsType: 141 | type: string 142 | partition: 143 | format: int32 144 | type: integer 145 | readOnly: 146 | type: boolean 147 | volumeID: 148 | type: string 149 | required: 150 | - volumeID 151 | type: object 152 | azureDisk: 153 | properties: 154 | cachingMode: 155 | type: string 156 | diskName: 157 | type: string 158 | diskURI: 159 | type: string 160 | fsType: 161 | type: string 162 | kind: 163 | type: string 164 | readOnly: 165 | type: boolean 166 | required: 167 | - diskName 168 | - diskURI 169 | type: object 170 | azureFile: 171 | properties: 172 | readOnly: 173 | type: boolean 174 | secretName: 175 | type: string 176 | shareName: 177 | type: string 178 | required: 179 | - secretName 180 | - shareName 181 | type: object 182 | cephfs: 183 | properties: 184 | monitors: 185 | items: 186 | type: string 187 | type: array 188 | path: 189 | type: string 190 | readOnly: 191 | type: boolean 192 | secretFile: 193 | type: string 194 | secretRef: 195 | properties: 196 | name: 197 | type: string 198 | type: object 199 | user: 200 | type: string 201 | required: 202 | - monitors 203 | type: object 204 | cinder: 205 | properties: 206 | fsType: 207 | type: string 208 | readOnly: 209 | type: boolean 210 | secretRef: 211 | properties: 212 | name: 213 | type: string 214 | type: object 215 | volumeID: 216 | type: string 217 | required: 218 | - volumeID 219 | type: object 220 | configMap: 221 | properties: 222 | defaultMode: 223 | format: int32 224 | type: integer 225 | items: 226 | items: 227 | properties: 228 | key: 229 | type: string 230 | mode: 231 | format: int32 232 | type: integer 233 | path: 234 | type: string 235 | required: 236 | - key 237 | - path 238 | type: object 239 | type: array 240 | name: 241 | type: string 242 | optional: 243 | type: boolean 244 | type: object 245 | csi: 246 | properties: 247 | driver: 248 | type: string 249 | fsType: 250 | type: string 251 | nodePublishSecretRef: 252 | properties: 253 | name: 254 | type: string 255 | type: object 256 | readOnly: 257 | type: boolean 258 | volumeAttributes: 259 | additionalProperties: 260 | type: string 261 | type: object 262 | required: 263 | - driver 264 | type: object 265 | downwardAPI: 266 | properties: 267 | defaultMode: 268 | format: int32 269 | type: integer 270 | items: 271 | items: 272 | properties: 273 | fieldRef: 274 | properties: 275 | apiVersion: 276 | type: string 277 | fieldPath: 278 | type: string 279 | required: 280 | - fieldPath 281 | type: object 282 | mode: 283 | format: int32 284 | type: integer 285 | path: 286 | type: string 287 | resourceFieldRef: 288 | properties: 289 | containerName: 290 | type: string 291 | divisor: 292 | anyOf: 293 | - type: integer 294 | - type: string 295 | pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ 296 | x-kubernetes-int-or-string: true 297 | resource: 298 | type: string 299 | required: 300 | - resource 301 | type: object 302 | required: 303 | - path 304 | type: object 305 | type: array 306 | type: object 307 | emptyDir: 308 | properties: 309 | medium: 310 | type: string 311 | sizeLimit: 312 | anyOf: 313 | - type: integer 314 | - type: string 315 | pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ 316 | x-kubernetes-int-or-string: true 317 | type: object 318 | fc: 319 | properties: 320 | fsType: 321 | type: string 322 | lun: 323 | format: int32 324 | type: integer 325 | readOnly: 326 | type: boolean 327 | targetWWNs: 328 | items: 329 | type: string 330 | type: array 331 | wwids: 332 | items: 333 | type: string 334 | type: array 335 | type: object 336 | flexVolume: 337 | properties: 338 | driver: 339 | type: string 340 | fsType: 341 | type: string 342 | options: 343 | additionalProperties: 344 | type: string 345 | type: object 346 | readOnly: 347 | type: boolean 348 | secretRef: 349 | properties: 350 | name: 351 | type: string 352 | type: object 353 | required: 354 | - driver 355 | type: object 356 | flocker: 357 | properties: 358 | datasetName: 359 | type: string 360 | datasetUUID: 361 | type: string 362 | type: object 363 | gcePersistentDisk: 364 | properties: 365 | fsType: 366 | type: string 367 | partition: 368 | format: int32 369 | type: integer 370 | pdName: 371 | type: string 372 | readOnly: 373 | type: boolean 374 | required: 375 | - pdName 376 | type: object 377 | gitRepo: 378 | properties: 379 | directory: 380 | type: string 381 | repository: 382 | type: string 383 | revision: 384 | type: string 385 | required: 386 | - repository 387 | type: object 388 | glusterfs: 389 | properties: 390 | endpoints: 391 | type: string 392 | path: 393 | type: string 394 | readOnly: 395 | type: boolean 396 | required: 397 | - endpoints 398 | - path 399 | type: object 400 | hostPath: 401 | properties: 402 | path: 403 | type: string 404 | type: 405 | type: string 406 | required: 407 | - path 408 | type: object 409 | iscsi: 410 | properties: 411 | chapAuthDiscovery: 412 | type: boolean 413 | chapAuthSession: 414 | type: boolean 415 | fsType: 416 | type: string 417 | initiatorName: 418 | type: string 419 | iqn: 420 | type: string 421 | iscsiInterface: 422 | type: string 423 | lun: 424 | format: int32 425 | type: integer 426 | portals: 427 | items: 428 | type: string 429 | type: array 430 | readOnly: 431 | type: boolean 432 | secretRef: 433 | properties: 434 | name: 435 | type: string 436 | type: object 437 | targetPortal: 438 | type: string 439 | required: 440 | - iqn 441 | - lun 442 | - targetPortal 443 | type: object 444 | name: 445 | type: string 446 | nfs: 447 | properties: 448 | path: 449 | type: string 450 | readOnly: 451 | type: boolean 452 | server: 453 | type: string 454 | required: 455 | - path 456 | - server 457 | type: object 458 | persistentVolumeClaim: 459 | properties: 460 | claimName: 461 | type: string 462 | readOnly: 463 | type: boolean 464 | required: 465 | - claimName 466 | type: object 467 | photonPersistentDisk: 468 | properties: 469 | fsType: 470 | type: string 471 | pdID: 472 | type: string 473 | required: 474 | - pdID 475 | type: object 476 | portworxVolume: 477 | properties: 478 | fsType: 479 | type: string 480 | readOnly: 481 | type: boolean 482 | volumeID: 483 | type: string 484 | required: 485 | - volumeID 486 | type: object 487 | projected: 488 | properties: 489 | defaultMode: 490 | format: int32 491 | type: integer 492 | sources: 493 | items: 494 | properties: 495 | configMap: 496 | properties: 497 | items: 498 | items: 499 | properties: 500 | key: 501 | type: string 502 | mode: 503 | format: int32 504 | type: integer 505 | path: 506 | type: string 507 | required: 508 | - key 509 | - path 510 | type: object 511 | type: array 512 | name: 513 | type: string 514 | optional: 515 | type: boolean 516 | type: object 517 | downwardAPI: 518 | properties: 519 | items: 520 | items: 521 | properties: 522 | fieldRef: 523 | properties: 524 | apiVersion: 525 | type: string 526 | fieldPath: 527 | type: string 528 | required: 529 | - fieldPath 530 | type: object 531 | mode: 532 | format: int32 533 | type: integer 534 | path: 535 | type: string 536 | resourceFieldRef: 537 | properties: 538 | containerName: 539 | type: string 540 | divisor: 541 | anyOf: 542 | - type: integer 543 | - type: string 544 | pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ 545 | x-kubernetes-int-or-string: true 546 | resource: 547 | type: string 548 | required: 549 | - resource 550 | type: object 551 | required: 552 | - path 553 | type: object 554 | type: array 555 | type: object 556 | secret: 557 | properties: 558 | items: 559 | items: 560 | properties: 561 | key: 562 | type: string 563 | mode: 564 | format: int32 565 | type: integer 566 | path: 567 | type: string 568 | required: 569 | - key 570 | - path 571 | type: object 572 | type: array 573 | name: 574 | type: string 575 | optional: 576 | type: boolean 577 | type: object 578 | serviceAccountToken: 579 | properties: 580 | audience: 581 | type: string 582 | expirationSeconds: 583 | format: int64 584 | type: integer 585 | path: 586 | type: string 587 | required: 588 | - path 589 | type: object 590 | type: object 591 | type: array 592 | required: 593 | - sources 594 | type: object 595 | quobyte: 596 | properties: 597 | group: 598 | type: string 599 | readOnly: 600 | type: boolean 601 | registry: 602 | type: string 603 | tenant: 604 | type: string 605 | user: 606 | type: string 607 | volume: 608 | type: string 609 | required: 610 | - registry 611 | - volume 612 | type: object 613 | rbd: 614 | properties: 615 | fsType: 616 | type: string 617 | image: 618 | type: string 619 | keyring: 620 | type: string 621 | monitors: 622 | items: 623 | type: string 624 | type: array 625 | pool: 626 | type: string 627 | readOnly: 628 | type: boolean 629 | secretRef: 630 | properties: 631 | name: 632 | type: string 633 | type: object 634 | user: 635 | type: string 636 | required: 637 | - image 638 | - monitors 639 | type: object 640 | scaleIO: 641 | properties: 642 | fsType: 643 | type: string 644 | gateway: 645 | type: string 646 | protectionDomain: 647 | type: string 648 | readOnly: 649 | type: boolean 650 | secretRef: 651 | properties: 652 | name: 653 | type: string 654 | type: object 655 | sslEnabled: 656 | type: boolean 657 | storageMode: 658 | type: string 659 | storagePool: 660 | type: string 661 | system: 662 | type: string 663 | volumeName: 664 | type: string 665 | required: 666 | - gateway 667 | - secretRef 668 | - system 669 | type: object 670 | secret: 671 | properties: 672 | defaultMode: 673 | format: int32 674 | type: integer 675 | items: 676 | items: 677 | properties: 678 | key: 679 | type: string 680 | mode: 681 | format: int32 682 | type: integer 683 | path: 684 | type: string 685 | required: 686 | - key 687 | - path 688 | type: object 689 | type: array 690 | optional: 691 | type: boolean 692 | secretName: 693 | type: string 694 | type: object 695 | storageos: 696 | properties: 697 | fsType: 698 | type: string 699 | readOnly: 700 | type: boolean 701 | secretRef: 702 | properties: 703 | name: 704 | type: string 705 | type: object 706 | volumeName: 707 | type: string 708 | volumeNamespace: 709 | type: string 710 | type: object 711 | vsphereVolume: 712 | properties: 713 | fsType: 714 | type: string 715 | storagePolicyID: 716 | type: string 717 | storagePolicyName: 718 | type: string 719 | volumePath: 720 | type: string 721 | required: 722 | - volumePath 723 | type: object 724 | required: 725 | - name 726 | type: object 727 | type: array 728 | workingDir: 729 | type: string 730 | required: 731 | - default 732 | type: object 733 | status: 734 | properties: 735 | address: 736 | properties: 737 | url: 738 | type: Any 739 | type: object 740 | annotations: 741 | additionalProperties: 742 | type: string 743 | type: object 744 | canary: 745 | properties: 746 | name: 747 | type: string 748 | type: object 749 | conditions: 750 | items: 751 | properties: 752 | lastTransitionTime: 753 | type: Any 754 | message: 755 | type: string 756 | reason: 757 | type: string 758 | severity: 759 | type: string 760 | status: 761 | type: string 762 | type: 763 | type: string 764 | required: 765 | - status 766 | - type 767 | type: object 768 | type: array 769 | default: 770 | properties: 771 | name: 772 | type: string 773 | type: object 774 | observedGeneration: 775 | format: int64 776 | type: integer 777 | replicas: 778 | format: int32 779 | minimum: 0 780 | type: integer 781 | url: 782 | type: string 783 | type: object 784 | type: object 785 | version: v1 786 | versions: 787 | - name: v1 788 | served: true 789 | storage: true 790 | status: 791 | acceptedNames: 792 | kind: "" 793 | plural: "" 794 | conditions: null 795 | storedVersions: null 796 | -------------------------------------------------------------------------------- /assets/sample_service.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Namespace 3 | metadata: 4 | labels: 5 | istio-injection: enabled 6 | name: paddleservice-system 7 | --- 8 | apiVersion: elasticserving.paddlepaddle.org/v1 9 | kind: PaddleService 10 | metadata: 11 | name: paddleservice-sample 12 | namespace: paddleservice-system 13 | spec: 14 | canary: 15 | arg: cd Serving/python/examples/imagenet && python3 resnet50_web_service_canary.py 16 | ResNet50_vd_model cpu 9292 17 | containerImage: registry.baidubce.com/paddleflow-public/resnetcanary-serving 18 | port: 9292 19 | tag: latest 20 | canaryTrafficPercent: 50 21 | default: 22 | arg: cd Serving/python/examples/imagenet && python3 resnet50_web_service.py ResNet50_vd_model 23 | cpu 9292 24 | containerImage: registry.baidubce.com/paddleflow-public/resnet-serving 25 | port: 9292 26 | tag: latest 27 | runtimeVersion: paddleserving 28 | service: 29 | minScale: 0 30 | window: 10s 31 | -------------------------------------------------------------------------------- /config/certmanager/certificate.yaml: -------------------------------------------------------------------------------- 1 | # The following manifests contain a self-signed issuer CR and a certificate CR. 2 | # More document can be found at https://docs.cert-manager.io 3 | # WARNING: Targets CertManager 0.11 check https://docs.cert-manager.io/en/latest/tasks/upgrading/index.html for 4 | # breaking changes 5 | apiVersion: cert-manager.io/v1alpha2 6 | kind: Issuer 7 | metadata: 8 | name: selfsigned-issuer 9 | namespace: system 10 | spec: 11 | selfSigned: {} 12 | --- 13 | apiVersion: cert-manager.io/v1alpha2 14 | kind: Certificate 15 | metadata: 16 | name: serving-cert # this name should match the one appeared in kustomizeconfig.yaml 17 | namespace: system 18 | spec: 19 | # $(SERVICE_NAME) and $(SERVICE_NAMESPACE) will be substituted by kustomize 20 | dnsNames: 21 | - $(SERVICE_NAME).$(SERVICE_NAMESPACE).svc 22 | - $(SERVICE_NAME).$(SERVICE_NAMESPACE).svc.cluster.local 23 | issuerRef: 24 | kind: Issuer 25 | name: selfsigned-issuer 26 | secretName: webhook-server-cert # this secret will not be prefixed, since it's not managed by kustomize 27 | -------------------------------------------------------------------------------- /config/certmanager/kustomization.yaml: -------------------------------------------------------------------------------- 1 | resources: 2 | - certificate.yaml 3 | 4 | configurations: 5 | - kustomizeconfig.yaml 6 | -------------------------------------------------------------------------------- /config/certmanager/kustomizeconfig.yaml: -------------------------------------------------------------------------------- 1 | # This configuration is for teaching kustomize how to update name ref and var substitution 2 | nameReference: 3 | - kind: Issuer 4 | group: cert-manager.io 5 | fieldSpecs: 6 | - kind: Certificate 7 | group: cert-manager.io 8 | path: spec/issuerRef/name 9 | 10 | varReference: 11 | - kind: Certificate 12 | group: cert-manager.io 13 | path: spec/commonName 14 | - kind: Certificate 15 | group: cert-manager.io 16 | path: spec/dnsNames 17 | -------------------------------------------------------------------------------- /config/crd/kustomization.yaml: -------------------------------------------------------------------------------- 1 | # This kustomization.yaml is not intended to be run by itself, 2 | # since it depends on service name and namespace that are out of this kustomize package. 3 | # It should be run by config/default 4 | resources: 5 | - bases/elasticserving.paddlepaddle.org_paddleservices.yaml 6 | # +kubebuilder:scaffold:crdkustomizeresource 7 | 8 | patchesStrategicMerge: 9 | # [WEBHOOK] To enable webhook, uncomment all the sections with [WEBHOOK] prefix. 10 | # patches here are for enabling the conversion webhook for each CRD 11 | #- patches/webhook_in_paddles.yaml 12 | # +kubebuilder:scaffold:crdkustomizewebhookpatch 13 | 14 | # [CERTMANAGER] To enable webhook, uncomment all the sections with [CERTMANAGER] prefix. 15 | # patches here are for enabling the CA injection for each CRD 16 | #- patches/cainjection_in_paddles.yaml 17 | # +kubebuilder:scaffold:crdkustomizecainjectionpatch 18 | 19 | # the following config is for teaching kustomize how to do kustomization for CRDs. 20 | configurations: 21 | - kustomizeconfig.yaml 22 | -------------------------------------------------------------------------------- /config/crd/kustomizeconfig.yaml: -------------------------------------------------------------------------------- 1 | # This file is for teaching kustomize how to substitute name and namespace reference in CRD 2 | nameReference: 3 | - kind: Service 4 | version: v1 5 | fieldSpecs: 6 | - kind: CustomResourceDefinition 7 | group: apiextensions.k8s.io 8 | path: spec/conversion/webhookClientConfig/service/name 9 | 10 | namespace: 11 | - kind: CustomResourceDefinition 12 | group: apiextensions.k8s.io 13 | path: spec/conversion/webhookClientConfig/service/namespace 14 | create: false 15 | 16 | varReference: 17 | - path: metadata/annotations 18 | -------------------------------------------------------------------------------- /config/crd/patches/cainjection_in_paddles.yaml: -------------------------------------------------------------------------------- 1 | # The following patch adds a directive for certmanager to inject CA into the CRD 2 | # CRD conversion requires k8s 1.13 or later. 3 | apiVersion: apiextensions.k8s.io/v1beta1 4 | kind: CustomResourceDefinition 5 | metadata: 6 | annotations: 7 | cert-manager.io/inject-ca-from: $(CERTIFICATE_NAMESPACE)/$(CERTIFICATE_NAME) 8 | name: paddles.elasticserving.paddlepaddle.org 9 | -------------------------------------------------------------------------------- /config/crd/patches/webhook_in_paddles.yaml: -------------------------------------------------------------------------------- 1 | # The following patch enables conversion webhook for CRD 2 | # CRD conversion requires k8s 1.13 or later. 3 | apiVersion: apiextensions.k8s.io/v1beta1 4 | kind: CustomResourceDefinition 5 | metadata: 6 | name: paddles.elasticserving.paddlepaddle.org 7 | spec: 8 | conversion: 9 | strategy: Webhook 10 | webhookClientConfig: 11 | # this is "\n" used as a placeholder, otherwise it will be rejected by the apiserver for being blank, 12 | # but we're going to set it later using the cert-manager (or potentially a patch if not using cert-manager) 13 | caBundle: Cg== 14 | service: 15 | namespace: system 16 | name: webhook-service 17 | path: /convert 18 | -------------------------------------------------------------------------------- /config/default/kustomization.yaml: -------------------------------------------------------------------------------- 1 | # Adds namespace to all resources. 2 | namespace: elasticserving-system 3 | 4 | # Value of this field is prepended to the 5 | # names of all resources, e.g. a deployment named 6 | # "wordpress" becomes "alices-wordpress". 7 | # Note that it should also match with the prefix (text before '-') of the namespace 8 | # field above. 9 | namePrefix: elasticserving- 10 | 11 | # Labels to add to all resources and selectors. 12 | #commonLabels: 13 | # someName: someValue 14 | 15 | bases: 16 | - ../crd 17 | - ../rbac 18 | - ../manager 19 | # [WEBHOOK] To enable webhook, uncomment all the sections with [WEBHOOK] prefix including the one in 20 | # crd/kustomization.yaml 21 | #- ../webhook 22 | # [CERTMANAGER] To enable cert-manager, uncomment all sections with 'CERTMANAGER'. 'WEBHOOK' components are required. 23 | #- ../certmanager 24 | # [PROMETHEUS] To enable prometheus monitor, uncomment all sections with 'PROMETHEUS'. 25 | #- ../prometheus 26 | 27 | patchesStrategicMerge: 28 | # Protect the /metrics endpoint by putting it behind auth. 29 | # If you want your controller-manager to expose the /metrics 30 | # endpoint w/o any authn/z, please comment the following line. 31 | - manager_auth_proxy_patch.yaml 32 | 33 | # [WEBHOOK] To enable webhook, uncomment all the sections with [WEBHOOK] prefix including the one in 34 | # crd/kustomization.yaml 35 | #- manager_webhook_patch.yaml 36 | 37 | # [CERTMANAGER] To enable cert-manager, uncomment all sections with 'CERTMANAGER'. 38 | # Uncomment 'CERTMANAGER' sections in crd/kustomization.yaml to enable the CA injection in the admission webhooks. 39 | # 'CERTMANAGER' needs to be enabled to use ca injection 40 | #- webhookcainjection_patch.yaml 41 | 42 | # the following config is for teaching kustomize how to do var substitution 43 | vars: 44 | # [CERTMANAGER] To enable cert-manager, uncomment all sections with 'CERTMANAGER' prefix. 45 | #- name: CERTIFICATE_NAMESPACE # namespace of the certificate CR 46 | # objref: 47 | # kind: Certificate 48 | # group: cert-manager.io 49 | # version: v1alpha2 50 | # name: serving-cert # this name should match the one in certificate.yaml 51 | # fieldref: 52 | # fieldpath: metadata.namespace 53 | #- name: CERTIFICATE_NAME 54 | # objref: 55 | # kind: Certificate 56 | # group: cert-manager.io 57 | # version: v1alpha2 58 | # name: serving-cert # this name should match the one in certificate.yaml 59 | #- name: SERVICE_NAMESPACE # namespace of the service 60 | # objref: 61 | # kind: Service 62 | # version: v1 63 | # name: webhook-service 64 | # fieldref: 65 | # fieldpath: metadata.namespace 66 | #- name: SERVICE_NAME 67 | # objref: 68 | # kind: Service 69 | # version: v1 70 | # name: webhook-service 71 | -------------------------------------------------------------------------------- /config/default/manager_auth_proxy_patch.yaml: -------------------------------------------------------------------------------- 1 | # This patch inject a sidecar container which is a HTTP proxy for the 2 | # controller manager, it performs RBAC authorization against the Kubernetes API using SubjectAccessReviews. 3 | apiVersion: apps/v1 4 | kind: Deployment 5 | metadata: 6 | name: controller-manager 7 | namespace: system 8 | spec: 9 | template: 10 | spec: 11 | containers: 12 | - name: kube-rbac-proxy 13 | image: registry.baidubce.com/paddleflow-public/kube-rbac-proxy:v0.5.0 14 | args: 15 | - "--secure-listen-address=0.0.0.0:8443" 16 | - "--upstream=http://127.0.0.1:8080/" 17 | - "--logtostderr=true" 18 | - "--v=10" 19 | ports: 20 | - containerPort: 8443 21 | name: https 22 | - name: manager 23 | args: 24 | - "--metrics-addr=127.0.0.1:8080" 25 | - "--enable-leader-election" 26 | -------------------------------------------------------------------------------- /config/default/manager_webhook_patch.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: apps/v1 2 | kind: Deployment 3 | metadata: 4 | name: controller-manager 5 | namespace: system 6 | spec: 7 | template: 8 | spec: 9 | containers: 10 | - name: manager 11 | ports: 12 | - containerPort: 9443 13 | name: webhook-server 14 | protocol: TCP 15 | volumeMounts: 16 | - mountPath: /tmp/k8s-webhook-server/serving-certs 17 | name: cert 18 | readOnly: true 19 | volumes: 20 | - name: cert 21 | secret: 22 | defaultMode: 420 23 | secretName: webhook-server-cert 24 | -------------------------------------------------------------------------------- /config/default/webhookcainjection_patch.yaml: -------------------------------------------------------------------------------- 1 | # This patch add annotation to admission webhook config and 2 | # the variables $(CERTIFICATE_NAMESPACE) and $(CERTIFICATE_NAME) will be substituted by kustomize. 3 | apiVersion: admissionregistration.k8s.io/v1beta1 4 | kind: MutatingWebhookConfiguration 5 | metadata: 6 | name: mutating-webhook-configuration 7 | annotations: 8 | cert-manager.io/inject-ca-from: $(CERTIFICATE_NAMESPACE)/$(CERTIFICATE_NAME) 9 | --- 10 | apiVersion: admissionregistration.k8s.io/v1beta1 11 | kind: ValidatingWebhookConfiguration 12 | metadata: 13 | name: validating-webhook-configuration 14 | annotations: 15 | cert-manager.io/inject-ca-from: $(CERTIFICATE_NAMESPACE)/$(CERTIFICATE_NAME) 16 | -------------------------------------------------------------------------------- /config/manager/kustomization.yaml: -------------------------------------------------------------------------------- 1 | resources: 2 | - manager.yaml 3 | apiVersion: kustomize.config.k8s.io/v1beta1 4 | kind: Kustomization 5 | images: 6 | - name: controller 7 | newName: registry.baidubce.com/paddleflow-public/paddleservice-controller 8 | newTag: latest 9 | -------------------------------------------------------------------------------- /config/manager/manager.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Namespace 3 | metadata: 4 | labels: 5 | control-plane: controller-manager 6 | name: system 7 | --- 8 | apiVersion: apps/v1 9 | kind: Deployment 10 | metadata: 11 | name: controller-manager 12 | namespace: system 13 | labels: 14 | control-plane: controller-manager 15 | spec: 16 | selector: 17 | matchLabels: 18 | control-plane: controller-manager 19 | replicas: 1 20 | template: 21 | metadata: 22 | labels: 23 | control-plane: controller-manager 24 | spec: 25 | containers: 26 | - command: 27 | - /manager 28 | args: 29 | - --enable-leader-election 30 | image: controller:latest 31 | name: manager 32 | resources: 33 | limits: 34 | cpu: 100m 35 | memory: 30Mi 36 | requests: 37 | cpu: 100m 38 | memory: 20Mi 39 | terminationGracePeriodSeconds: 10 40 | -------------------------------------------------------------------------------- /config/prometheus/kustomization.yaml: -------------------------------------------------------------------------------- 1 | resources: 2 | - monitor.yaml 3 | -------------------------------------------------------------------------------- /config/prometheus/monitor.yaml: -------------------------------------------------------------------------------- 1 | 2 | # Prometheus Monitor Service (Metrics) 3 | apiVersion: monitoring.coreos.com/v1 4 | kind: ServiceMonitor 5 | metadata: 6 | labels: 7 | control-plane: controller-manager 8 | name: controller-manager-metrics-monitor 9 | namespace: system 10 | spec: 11 | endpoints: 12 | - path: /metrics 13 | port: https 14 | selector: 15 | matchLabels: 16 | control-plane: controller-manager 17 | -------------------------------------------------------------------------------- /config/rbac/auth_proxy_client_clusterrole.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: rbac.authorization.k8s.io/v1beta1 2 | kind: ClusterRole 3 | metadata: 4 | name: metrics-reader 5 | rules: 6 | - nonResourceURLs: ["/metrics"] 7 | verbs: ["get"] 8 | -------------------------------------------------------------------------------- /config/rbac/auth_proxy_role.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: rbac.authorization.k8s.io/v1 2 | kind: ClusterRole 3 | metadata: 4 | name: proxy-role 5 | rules: 6 | - apiGroups: ["authentication.k8s.io"] 7 | resources: 8 | - tokenreviews 9 | verbs: ["create"] 10 | - apiGroups: ["authorization.k8s.io"] 11 | resources: 12 | - subjectaccessreviews 13 | verbs: ["create"] 14 | -------------------------------------------------------------------------------- /config/rbac/auth_proxy_role_binding.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: rbac.authorization.k8s.io/v1 2 | kind: ClusterRoleBinding 3 | metadata: 4 | name: proxy-rolebinding 5 | roleRef: 6 | apiGroup: rbac.authorization.k8s.io 7 | kind: ClusterRole 8 | name: proxy-role 9 | subjects: 10 | - kind: ServiceAccount 11 | name: default 12 | namespace: system 13 | -------------------------------------------------------------------------------- /config/rbac/auth_proxy_service.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Service 3 | metadata: 4 | labels: 5 | control-plane: controller-manager 6 | name: controller-manager-metrics-service 7 | namespace: system 8 | spec: 9 | ports: 10 | - name: https 11 | port: 8443 12 | targetPort: https 13 | selector: 14 | control-plane: controller-manager 15 | -------------------------------------------------------------------------------- /config/rbac/kustomization.yaml: -------------------------------------------------------------------------------- 1 | resources: 2 | - role.yaml 3 | - role_binding.yaml 4 | - leader_election_role.yaml 5 | - leader_election_role_binding.yaml 6 | # Comment the following 4 lines if you want to disable 7 | # the auth proxy (https://github.com/brancz/kube-rbac-proxy) 8 | # which protects your /metrics endpoint. 9 | - auth_proxy_service.yaml 10 | - auth_proxy_role.yaml 11 | - auth_proxy_role_binding.yaml 12 | - auth_proxy_client_clusterrole.yaml 13 | -------------------------------------------------------------------------------- /config/rbac/leader_election_role.yaml: -------------------------------------------------------------------------------- 1 | # permissions to do leader election. 2 | apiVersion: rbac.authorization.k8s.io/v1 3 | kind: Role 4 | metadata: 5 | name: leader-election-role 6 | rules: 7 | - apiGroups: 8 | - "" 9 | resources: 10 | - configmaps 11 | verbs: 12 | - get 13 | - list 14 | - watch 15 | - create 16 | - update 17 | - patch 18 | - delete 19 | - apiGroups: 20 | - "" 21 | resources: 22 | - configmaps/status 23 | verbs: 24 | - get 25 | - update 26 | - patch 27 | - apiGroups: 28 | - "" 29 | resources: 30 | - events 31 | verbs: 32 | - create 33 | -------------------------------------------------------------------------------- /config/rbac/leader_election_role_binding.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: rbac.authorization.k8s.io/v1 2 | kind: RoleBinding 3 | metadata: 4 | name: leader-election-rolebinding 5 | roleRef: 6 | apiGroup: rbac.authorization.k8s.io 7 | kind: Role 8 | name: leader-election-role 9 | subjects: 10 | - kind: ServiceAccount 11 | name: default 12 | namespace: system 13 | -------------------------------------------------------------------------------- /config/rbac/paddle_editor_role.yaml: -------------------------------------------------------------------------------- 1 | # permissions for end users to edit paddles. 2 | apiVersion: rbac.authorization.k8s.io/v1 3 | kind: ClusterRole 4 | metadata: 5 | name: paddle-editor-role 6 | rules: 7 | - apiGroups: 8 | - elasticserving.paddlepaddle.org 9 | resources: 10 | - paddles 11 | verbs: 12 | - create 13 | - delete 14 | - get 15 | - list 16 | - patch 17 | - update 18 | - watch 19 | - apiGroups: 20 | - elasticserving.paddlepaddle.org 21 | resources: 22 | - paddles/status 23 | verbs: 24 | - get 25 | -------------------------------------------------------------------------------- /config/rbac/paddle_viewer_role.yaml: -------------------------------------------------------------------------------- 1 | # permissions for end users to view paddles. 2 | apiVersion: rbac.authorization.k8s.io/v1 3 | kind: ClusterRole 4 | metadata: 5 | name: paddle-viewer-role 6 | rules: 7 | - apiGroups: 8 | - elasticserving.paddlepaddle.org 9 | resources: 10 | - paddles 11 | verbs: 12 | - get 13 | - list 14 | - watch 15 | - apiGroups: 16 | - elasticserving.paddlepaddle.org 17 | resources: 18 | - paddles/status 19 | verbs: 20 | - get 21 | -------------------------------------------------------------------------------- /config/rbac/role.yaml: -------------------------------------------------------------------------------- 1 | 2 | --- 3 | apiVersion: rbac.authorization.k8s.io/v1 4 | kind: ClusterRole 5 | metadata: 6 | creationTimestamp: null 7 | name: manager-role 8 | rules: 9 | - apiGroups: 10 | - "" 11 | resources: 12 | - configmaps 13 | verbs: 14 | - get 15 | - list 16 | - watch 17 | - apiGroups: 18 | - "" 19 | resources: 20 | - events 21 | verbs: 22 | - create 23 | - patch 24 | - apiGroups: 25 | - "" 26 | resources: 27 | - pods 28 | verbs: 29 | - '*' 30 | - apiGroups: 31 | - "" 32 | resources: 33 | - serviceaccounts 34 | verbs: 35 | - create 36 | - get 37 | - list 38 | - apiGroups: 39 | - "" 40 | resources: 41 | - services 42 | verbs: 43 | - '*' 44 | - apiGroups: 45 | - apps 46 | resources: 47 | - deployments 48 | verbs: 49 | - create 50 | - delete 51 | - get 52 | - list 53 | - update 54 | - watch 55 | - apiGroups: 56 | - elasticserving.paddlepaddle.org 57 | resources: 58 | - paddleservices 59 | verbs: 60 | - create 61 | - delete 62 | - get 63 | - list 64 | - patch 65 | - update 66 | - watch 67 | - apiGroups: 68 | - elasticserving.paddlepaddle.org 69 | resources: 70 | - paddleservices/status 71 | verbs: 72 | - get 73 | - patch 74 | - update 75 | - apiGroups: 76 | - rbac.authorization.k8s.io 77 | resources: 78 | - rolebindings 79 | verbs: 80 | - create 81 | - get 82 | - list 83 | - apiGroups: 84 | - rbac.authorization.k8s.io 85 | resources: 86 | - roles 87 | verbs: 88 | - create 89 | - get 90 | - list 91 | - apiGroups: 92 | - serving.knative.dev 93 | resources: 94 | - revisions 95 | verbs: 96 | - create 97 | - delete 98 | - get 99 | - list 100 | - patch 101 | - update 102 | - watch 103 | - apiGroups: 104 | - serving.knative.dev 105 | resources: 106 | - revisions/status 107 | verbs: 108 | - get 109 | - patch 110 | - update 111 | - apiGroups: 112 | - serving.knative.dev 113 | resources: 114 | - services 115 | verbs: 116 | - create 117 | - delete 118 | - get 119 | - list 120 | - patch 121 | - update 122 | - watch 123 | - apiGroups: 124 | - serving.knative.dev 125 | resources: 126 | - services/status 127 | verbs: 128 | - get 129 | - patch 130 | - update 131 | -------------------------------------------------------------------------------- /config/rbac/role_binding.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: rbac.authorization.k8s.io/v1 2 | kind: ClusterRoleBinding 3 | metadata: 4 | name: manager-rolebinding 5 | roleRef: 6 | apiGroup: rbac.authorization.k8s.io 7 | kind: ClusterRole 8 | name: manager-role 9 | subjects: 10 | - kind: ServiceAccount 11 | name: default 12 | namespace: system 13 | -------------------------------------------------------------------------------- /config/samples/elasticserving_v1_paddle.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: elasticserving.paddlepaddle.org/v1 2 | kind: PaddleService 3 | metadata: 4 | name: sample 5 | namespace: paddleservice-system 6 | spec: 7 | # Add fields here 8 | runtimeVersion: paddleserving 9 | default: 10 | arg: cd Serving/python/examples/imagenet && python3 resnet50_web_service.py ResNet50_vd_model cpu 9292 11 | containerImage: jinmionhaobaidu/resnet 12 | port: 9292 13 | tag: latest 14 | canary: 15 | arg: cd Serving/python/examples/imagenet && python3 resnet50_web_service_canary.py ResNet50_vd_model cpu 9292 16 | containerImage: jinmionhaobaidu/resnetcanary 17 | port: 9292 18 | tag: latest 19 | canaryTrafficPercent: 50 20 | service: 21 | minScale: 0 22 | window: "10s" 23 | -------------------------------------------------------------------------------- /config/samples/kustomization.yaml: -------------------------------------------------------------------------------- 1 | resources: 2 | - elasticserving_v1_paddle.yaml 3 | -------------------------------------------------------------------------------- /config/serving/kustomization.yaml: -------------------------------------------------------------------------------- 1 | namespace: paddleservice-system 2 | 3 | namePrefix: paddleservice- 4 | 5 | bases: 6 | - namespace.yaml 7 | - ../samples 8 | -------------------------------------------------------------------------------- /config/serving/namespace.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Namespace 3 | metadata: 4 | name: system 5 | labels: 6 | istio-injection: enabled 7 | -------------------------------------------------------------------------------- /config/webhook/kustomization.yaml: -------------------------------------------------------------------------------- 1 | resources: 2 | - manifests.yaml 3 | - service.yaml 4 | 5 | configurations: 6 | - kustomizeconfig.yaml 7 | -------------------------------------------------------------------------------- /config/webhook/kustomizeconfig.yaml: -------------------------------------------------------------------------------- 1 | # the following config is for teaching kustomize where to look at when substituting vars. 2 | # It requires kustomize v2.1.0 or newer to work properly. 3 | nameReference: 4 | - kind: Service 5 | version: v1 6 | fieldSpecs: 7 | - kind: MutatingWebhookConfiguration 8 | group: admissionregistration.k8s.io 9 | path: webhooks/clientConfig/service/name 10 | - kind: ValidatingWebhookConfiguration 11 | group: admissionregistration.k8s.io 12 | path: webhooks/clientConfig/service/name 13 | 14 | namespace: 15 | - kind: MutatingWebhookConfiguration 16 | group: admissionregistration.k8s.io 17 | path: webhooks/clientConfig/service/namespace 18 | create: true 19 | - kind: ValidatingWebhookConfiguration 20 | group: admissionregistration.k8s.io 21 | path: webhooks/clientConfig/service/namespace 22 | create: true 23 | 24 | varReference: 25 | - path: metadata/annotations 26 | -------------------------------------------------------------------------------- /config/webhook/service.yaml: -------------------------------------------------------------------------------- 1 | 2 | apiVersion: v1 3 | kind: Service 4 | metadata: 5 | name: webhook-service 6 | namespace: system 7 | spec: 8 | ports: 9 | - port: 443 10 | targetPort: 9443 11 | selector: 12 | control-plane: controller-manager 13 | -------------------------------------------------------------------------------- /docs/api-doc-gen/gen_api_doc.sh: -------------------------------------------------------------------------------- 1 | #! /bin/sh 2 | 3 | cmd="gen-crd-api-reference-docs"; 4 | 5 | cd $(cd "$(dirname "$0")";pwd); 6 | 7 | if [ ! -x "$cmd" ]; then 8 | wget https://github.com/ahmetb/gen-crd-api-reference-docs/releases/download/v0.1.5/gen-crd-api-reference-docs_darwin_amd64.tar.gz; 9 | tar xzvf gen-crd-api-reference-docs_darwin_amd64.tar.gz; 10 | rm -rf gen-crd-api-reference-docs_darwin_amd64.tar.gz; 11 | fi 12 | 13 | ./"$cmd" \ 14 | --config ./example-config.json \ 15 | --template-dir ./template \ 16 | --api-dir ../../pkg/apis/elasticserving/v1 \ 17 | --out-file ../en/api_doc.md -------------------------------------------------------------------------------- /docs/en/api_doc.md: -------------------------------------------------------------------------------- 1 |

Packages:

2 | 7 |

elasticserving.paddlepaddle.org/v1

8 |

9 |

Package v1 contains PaddleService

10 |

11 | Resource Types: 12 |
    13 |

    Autoscaler 14 | (string alias)

    15 |

    16 | (Appears on: 17 | ServiceSpec) 18 |

    19 |

    20 |

    Autoscaler defines the autoscaler class

    21 |

    22 |

    AutoscalerMetric 23 | (string alias)

    24 |

    25 | (Appears on: 26 | ServiceSpec) 27 |

    28 |

    29 |

    AutoscalerMetric defines the metric for the autoscaler

    30 |

    31 |

    EndpointSpec 32 |

    33 |

    34 | (Appears on: 35 | PaddleServiceSpec) 36 |

    37 |

    38 |

    EndpointSpec defines the running containers

    39 |

    40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 55 | 57 | 58 | 59 | 65 | 67 | 68 | 69 | 75 | 77 | 78 | 79 | 85 | 88 | 89 | 90 |
    FieldDescription
    50 | containerImage
    51 | 52 | string 53 | 54 |
    56 |
    60 | tag
    61 | 62 | string 63 | 64 |
    66 |
    70 | port
    71 | 72 | int32 73 | 74 |
    76 |
    80 | arg
    81 | 82 | string 83 | 84 |
    86 | (Optional) 87 |
    91 |

    PaddleService 92 |

    93 |

    94 |

    PaddleService is the Schema for the paddles API

    95 |

    96 | 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | 113 | 117 | 118 | 119 | 127 | 254 | 255 | 256 | 264 | 266 | 267 | 268 |
    FieldDescription
    106 | metadata
    107 | 108 | 109 | Kubernetes meta/v1.ObjectMeta 110 | 111 | 112 |
    114 | Refer to the Kubernetes API documentation for the fields of the 115 | metadata field. 116 |
    120 | spec
    121 | 122 | 123 | PaddleServiceSpec 124 | 125 | 126 |
    128 |
    129 |
    130 | 131 | 132 | 138 | 141 | 142 | 143 | 151 | 154 | 155 | 156 | 164 | 167 | 168 | 169 | 177 | 181 | 182 | 183 | 189 | 193 | 194 | 195 | 203 | 206 | 207 | 208 | 214 | 221 | 222 | 223 | 231 | 236 | 237 | 238 | 246 | 251 | 252 |
    133 | runtimeVersion
    134 | 135 | string 136 | 137 |
    139 |

    Version of the service

    140 |
    144 | resources
    145 | 146 | 147 | Kubernetes core/v1.ResourceRequirements 148 | 149 | 150 |
    152 |

    Defaults to requests and limits of 1CPU, 2Gb MEM.

    153 |
    157 | default
    158 | 159 | 160 | EndpointSpec 161 | 162 | 163 |
    165 |

    DefaultTag defines default PaddleService endpoints

    166 |
    170 | canary
    171 | 172 | 173 | EndpointSpec 174 | 175 | 176 |
    178 | (Optional) 179 |

    CanaryTag defines an alternative PaddleService endpoints

    180 |
    184 | canaryTrafficPercent
    185 | 186 | int 187 | 188 |
    190 | (Optional) 191 |

    CanaryTrafficPercent defines the percentage of traffic going to canary PaddleService endpoints

    192 |
    196 | service
    197 | 198 | 199 | ServiceSpec 200 | 201 | 202 |
    204 | (Optional) 205 |
    209 | workingDir
    210 | 211 | string 212 | 213 |
    215 | (Optional) 216 |

    Container’s working directory. 217 | If not specified, the container runtime’s default will be used, which 218 | might be configured in the container image. 219 | Cannot be updated.

    220 |
    224 | volumeMounts
    225 | 226 | 227 | []Kubernetes core/v1.VolumeMount 228 | 229 | 230 |
    232 | (Optional) 233 |

    Pod volumes to mount into the container’s filesystem. 234 | Cannot be updated.

    235 |
    239 | volumes
    240 | 241 | 242 | []Kubernetes core/v1.Volume 243 | 244 | 245 |
    247 | (Optional) 248 |

    List of volumes that can be mounted by containers belonging to the pod. 249 | More info: https://kubernetes.io/docs/concepts/storage/volumes

    250 |
    253 |
    257 | status
    258 | 259 | 260 | PaddleServiceStatus 261 | 262 | 263 |
    265 |
    269 |

    PaddleServiceSpec 270 |

    271 |

    272 | (Appears on: 273 | PaddleService) 274 |

    275 |

    276 |

    PaddleServiceSpec defines the desired state of PaddleService

    277 |

    278 | 279 | 280 | 281 | 282 | 283 | 284 | 285 | 286 | 287 | 293 | 296 | 297 | 298 | 306 | 309 | 310 | 311 | 319 | 322 | 323 | 324 | 332 | 336 | 337 | 338 | 344 | 348 | 349 | 350 | 358 | 361 | 362 | 363 | 369 | 376 | 377 | 378 | 386 | 391 | 392 | 393 | 401 | 406 | 407 | 408 |
    FieldDescription
    288 | runtimeVersion
    289 | 290 | string 291 | 292 |
    294 |

    Version of the service

    295 |
    299 | resources
    300 | 301 | 302 | Kubernetes core/v1.ResourceRequirements 303 | 304 | 305 |
    307 |

    Defaults to requests and limits of 1CPU, 2Gb MEM.

    308 |
    312 | default
    313 | 314 | 315 | EndpointSpec 316 | 317 | 318 |
    320 |

    DefaultTag defines default PaddleService endpoints

    321 |
    325 | canary
    326 | 327 | 328 | EndpointSpec 329 | 330 | 331 |
    333 | (Optional) 334 |

    CanaryTag defines an alternative PaddleService endpoints

    335 |
    339 | canaryTrafficPercent
    340 | 341 | int 342 | 343 |
    345 | (Optional) 346 |

    CanaryTrafficPercent defines the percentage of traffic going to canary PaddleService endpoints

    347 |
    351 | service
    352 | 353 | 354 | ServiceSpec 355 | 356 | 357 |
    359 | (Optional) 360 |
    364 | workingDir
    365 | 366 | string 367 | 368 |
    370 | (Optional) 371 |

    Container’s working directory. 372 | If not specified, the container runtime’s default will be used, which 373 | might be configured in the container image. 374 | Cannot be updated.

    375 |
    379 | volumeMounts
    380 | 381 | 382 | []Kubernetes core/v1.VolumeMount 383 | 384 | 385 |
    387 | (Optional) 388 |

    Pod volumes to mount into the container’s filesystem. 389 | Cannot be updated.

    390 |
    394 | volumes
    395 | 396 | 397 | []Kubernetes core/v1.Volume 398 | 399 | 400 |
    402 | (Optional) 403 |

    List of volumes that can be mounted by containers belonging to the pod. 404 | More info: https://kubernetes.io/docs/concepts/storage/volumes

    405 |
    409 |

    PaddleServiceStatus 410 |

    411 |

    412 | (Appears on: 413 | PaddleService) 414 |

    415 |

    416 |

    PaddleServiceStatus defines the observed state of PaddleService

    417 |

    418 | 419 | 420 | 421 | 422 | 423 | 424 | 425 | 426 | 427 | 433 | 438 | 439 | 440 | 446 | 449 | 450 | 451 | 459 | 462 | 463 | 464 | 472 | 475 | 476 | 477 | 483 | 486 | 487 | 488 | 494 | 497 | 498 | 499 |
    FieldDescription
    428 | Status
    429 | 430 | knative.dev/pkg/apis/duck/v1.Status 431 | 432 |
    434 |

    435 | (Members of Status are embedded into this type.) 436 |

    437 |
    441 | url
    442 | 443 | string 444 | 445 |
    447 |

    URL of the PaddleService

    448 |
    452 | default
    453 | 454 | 455 | StatusConfigurationSpec 456 | 457 | 458 |
    460 |

    Statuses for the default endpoints of the PaddleService

    461 |
    465 | canary
    466 | 467 | 468 | StatusConfigurationSpec 469 | 470 | 471 |
    473 |

    Statuses for the canary endpoints of the PaddleService

    474 |
    478 | address
    479 | 480 | knative.dev/pkg/apis/duck/v1.Addressable 481 | 482 |
    484 |

    Addressable URL for eventing

    485 |
    489 | replicas
    490 | 491 | int32 492 | 493 |
    495 | (Optional) 496 |
    500 |

    ServiceSpec 501 |

    502 |

    503 | (Appears on: 504 | PaddleServiceSpec) 505 |

    506 |

    507 |

    ServiceSpec defines the configuration for Knative Service.

    508 |

    509 | 510 | 511 | 512 | 513 | 514 | 515 | 516 | 517 | 518 | 526 | 529 | 530 | 531 | 539 | 542 | 543 | 544 | 550 | 553 | 554 | 555 | 561 | 564 | 565 | 566 | 572 | 575 | 576 | 577 | 583 | 586 | 587 | 588 | 594 | 597 | 598 | 599 | 605 | 608 | 609 | 610 | 616 | 619 | 620 | 621 |
    FieldDescription
    519 | autoscaler
    520 | 521 | 522 | Autoscaler 523 | 524 | 525 |
    527 | (Optional) 528 |
    532 | metric
    533 | 534 | 535 | AutoscalerMetric 536 | 537 | 538 |
    540 | (Optional) 541 |
    545 | window
    546 | 547 | string 548 | 549 |
    551 | (Optional) 552 |
    556 | panicWindow
    557 | 558 | string 559 | 560 |
    562 | (Optional) 563 |
    567 | panicThreshold
    568 | 569 | string 570 | 571 |
    573 | (Optional) 574 |
    578 | minScale
    579 | 580 | int 581 | 582 |
    584 | (Optional) 585 |
    589 | maxScale
    590 | 591 | int 592 | 593 |
    595 | (Optional) 596 |
    600 | target
    601 | 602 | int 603 | 604 |
    606 | (Optional) 607 |
    611 | targetUtilization
    612 | 613 | string 614 | 615 |
    617 | (Optional) 618 |
    622 |

    StatusConfigurationSpec 623 |

    624 |

    625 | (Appears on: 626 | PaddleServiceStatus) 627 |

    628 |

    629 |

    StatusConfigurationSpec describes the state of the configuration receiving traffic.

    630 |

    631 | 632 | 633 | 634 | 635 | 636 | 637 | 638 | 639 | 640 | 646 | 649 | 650 | 651 |
    FieldDescription
    641 | name
    642 | 643 | string 644 | 645 |
    647 |

    Latest revision name that is in ready state

    648 |
    652 |
    653 |

    654 | Generated with gen-crd-api-reference-docs 655 | on git commit 97fc986. 656 |

    657 | -------------------------------------------------------------------------------- /docs/en/examples/bert.md: -------------------------------------------------------------------------------- 1 | # BERT AS Service 2 | 3 | English | [简体中文](../../zh_CN/examples/bert.md) 4 | 5 | This example uses the BERT pre-training model to deploy text analysis and prediction services. For more detail information please refer to [Paddle Serving](https://github.com/PaddlePaddle/Serving/blob/develop/python/examples/bert/README.md) 6 | 7 | ## Build Image for Service (Optional) 8 | 9 | The test service is build on `registry.baidubce.com/paddlepaddle/serving:0.6.0-devel`, and have published in public image register `registry.baidubce.com/paddleflow-public/bert-serving:latest`. 10 | If you need run the service in GPU or other device, please refer to [Docker Images](https://github.com/PaddlePaddle/Serving/blob/v0.6.0/doc/DOCKER_IMAGES_CN.md) and build the model server image as below. 11 | 12 | 1. Download `Paddle Serving` 13 | 14 | ```bash 15 | $ wget https://github.com/PaddlePaddle/Serving/archive/refs/tags/v0.6.0.tar.gz 16 | $ tar xzvf Serving-0.6.0.tar.gz 17 | $ mv Serving-0.6.0 Serving 18 | $ cd Serving 19 | ``` 20 | 21 | 2. Write Dockerfile 22 | 23 | ```Dockerfile 24 | FROM registry.baidubce.com/paddlepaddle/serving:0.6.0-devel 25 | 26 | WORKDIR /home 27 | 28 | COPY . /home/Serving 29 | 30 | WORKDIR /home/Serving 31 | 32 | # install depandences 33 | RUN pip install -r python/requirements.txt -i https://pypi.tuna.tsinghua.edu.cn/simple && \ 34 | pip install paddle-serving-server==0.6.0 -i https://pypi.tuna.tsinghua.edu.cn/simple && \ 35 | pip install paddle-serving-client==0.6.0 -i https://pypi.tuna.tsinghua.edu.cn/simple 36 | 37 | WORKDIR /home/Serving/python/examples/bert 38 | 39 | # download pre-trained BERT model 40 | RUN wget https://paddle-serving.bj.bcebos.com/paddle_hub_models/text/SemanticModel/bert_chinese_L-12_H-768_A-12.tar.gz && \ 41 | tar -xzf bert_chinese_L-12_H-768_A-12.tar.gz && rm -rf bert_chinese_L-12_H-768_A-12.tar.gz && \ 42 | mv bert_chinese_L-12_H-768_A-12_model bert_seq128_model && mv bert_chinese_L-12_H-768_A-12_client bert_seq128_client 43 | 44 | ENTRYPOINT ["python3", "-m", "paddle_serving_server.serve", "--model", "bert_seq128_model/", "--port", "9292"] 45 | ``` 46 | 47 | 3. Build Image 48 | 49 | ```bash 50 | docker build . -t registry.baidubce.com/paddleflow-public/bert-serving:latest 51 | ``` 52 | 53 | ## Create PaddleService 54 | 55 | 1. Prepare YAML File 56 | 57 | ```yaml 58 | # bert.yaml 59 | apiVersion: v1 60 | kind: Namespace 61 | metadata: 62 | labels: 63 | istio-injection: enabled 64 | name: paddleservice-system 65 | --- 66 | apiVersion: elasticserving.paddlepaddle.org/v1 67 | kind: PaddleService 68 | metadata: 69 | name: paddleservice-bert 70 | namespace: paddleservice-system 71 | spec: 72 | default: 73 | arg: python3 -m paddle_serving_server.serve --model bert_seq128_model/ --port 9292 74 | containerImage: registry.baidubce.com/paddleflow-public/bert-serving 75 | port: 9292 76 | tag: latest 77 | runtimeVersion: paddleserving 78 | service: 79 | minScale: 1 80 | ``` 81 | 82 | 2. Create PaddleService 83 | 84 | ```bash 85 | $ kubectl apply -f bert.yaml 86 | paddleservice.elasticserving.paddlepaddle.org/paddleservice-bert created 87 | ``` 88 | 89 | ## Check The Status of Service 90 | 91 | 1. check the status of bert model service 92 | 93 | ```bash 94 | # Check service in namespace paddleservice-system 95 | kubectl get svc -n paddleservice-system | grep paddleservice-bert 96 | 97 | # Check knative service in namespace paddleservice-system 98 | kubectl get ksvc paddleservice-bert -n paddleservice-system 99 | 100 | # Check pods in namespace paddleservice-system 101 | kubectl get pods -n paddleservice-system 102 | ``` 103 | 104 | 2. Obtain ClusterIP 105 | ```bash 106 | $ kubectl get svc paddleservice-bert-default-private -n paddleservice-system 107 | ``` 108 | 109 | ## Test The BERT Model Service 110 | 111 | The model service supports HTTP / BRPC / GRPC client, refer to [bert service](https://github.com/PaddlePaddle/Serving/blob/develop/python/examples/bert/README_CN.md) 112 | to obtain the code of client. It should be noted that you need to replace the service IP address and port in the client code with the cluster-ip and port from the paddleservice-bert-default-private service mentioned above. 113 | 114 | For example, modify the code of `bert_client.py` as follows 115 | ```python 116 | fetch = ["pooled_output"] 117 | - endpoint_list = ['127.0.0.1:9292'] 118 | + endpoint_list = ['172.16.237.0:80'] 119 | client = Client() 120 | ``` 121 | -------------------------------------------------------------------------------- /docs/en/examples/criteo_ctr.md: -------------------------------------------------------------------------------- 1 | # CTR Prediction Service 2 | 3 | English | [简体中文](../../zh_CN/examples/criteo_ctr.md) 4 | 5 | This example is based on the ctr prediction service trained on the criteo dataset. For more detail information please refer to [Paddle Serving](https://github.com/PaddlePaddle/Serving/blob/develop/python/examples/criteo_ctr/README.md) 6 | 7 | ## Build Image for Service (Optional) 8 | 9 | The test service is build on `registry.baidubce.com/paddlepaddle/serving:0.6.0-devel`, and have published in public image register `registry.baidubce.com/paddleflow-public/criteoctr-serving:latest`. 10 | If you need run the service in GPU or other device, please refer to [Docker Images](https://github.com/PaddlePaddle/Serving/blob/v0.6.0/doc/DOCKER_IMAGES_CN.md) and build the model server image as below. 11 | 12 | 1. Download `Paddle Serving` 13 | 14 | ```bash 15 | $ wget https://github.com/PaddlePaddle/Serving/archive/refs/tags/v0.6.0.tar.gz 16 | $ tar xzvf Serving-0.6.0.tar.gz 17 | $ mv Serving-0.6.0 Serving 18 | $ cd Serving 19 | ``` 20 | 21 | 2. Write Dockerfile 22 | 23 | ```Dockerfile 24 | FROM registry.baidubce.com/paddlepaddle/serving:0.6.0-devel 25 | 26 | WORKDIR /home 27 | 28 | COPY . /home/Serving 29 | 30 | WORKDIR /home/Serving 31 | 32 | # install depandences 33 | RUN pip install -r python/requirements.txt -i https://pypi.tuna.tsinghua.edu.cn/simple && \ 34 | pip install paddle-serving-server==0.6.0 -i https://pypi.tuna.tsinghua.edu.cn/simple && \ 35 | pip install paddle-serving-client==0.6.0 -i https://pypi.tuna.tsinghua.edu.cn/simple 36 | 37 | WORKDIR /home/Serving/python/examples/criteo_ctr 38 | 39 | RUN wget https://paddle-serving.bj.bcebos.com/criteo_ctr_example/criteo_ctr_demo_model.tar.gz && \ 40 | tar xzf criteo_ctr_demo_model.tar.gz && rm -rf criteo_ctr_demo_model.tar.gz && \ 41 | mv models/ctr_client_conf . && mv models/ctr_serving_model . 42 | 43 | ENTRYPOINT ["python3", "-m", "paddle_serving_server.serve", "--model", "ctr_serving_model/", "--port", "9292"] 44 | ``` 45 | 46 | 3. Build Image 47 | 48 | ```bash 49 | docker build . -t registry.baidubce.com/paddleflow-public/criteoctr-serving:latest 50 | ``` 51 | 52 | ## Create PaddleService 53 | 54 | 1. Prepare YAML File 55 | 56 | ```yaml 57 | # criteoctr.yaml 58 | apiVersion: elasticserving.paddlepaddle.org/v1 59 | kind: PaddleService 60 | metadata: 61 | name: paddleservice-criteoctr 62 | namespace: paddleservice-system 63 | spec: 64 | default: 65 | arg: python3 -m paddle_serving_server.serve --model ctr_serving_model/ --port 9292 66 | containerImage: registry.baidubce.com/paddleflow-public/criteoctr-serving 67 | port: 9292 68 | tag: latest 69 | runtimeVersion: paddleserving 70 | service: 71 | minScale: 1 72 | ``` 73 | 74 | 2. Create PaddleService 75 | 76 | ```bash 77 | $ kubectl apply -f criteoctr.yaml 78 | paddleservice.elasticserving.paddlepaddle.org/paddleservice-criteoctr created 79 | ``` 80 | 81 | ## Check The Status of Service 82 | 83 | 1. check the status of criteo ctr model service 84 | 85 | ```bash 86 | # Check service in namespace paddleservice-system 87 | kubectl get svc -n paddleservice-system | grep paddleservice-criteoctr 88 | 89 | # Check knative service in namespace paddleservice-system 90 | kubectl get ksvc paddleservice-criteoctr -n paddleservice-system 91 | 92 | # Check pods in namespace paddleservice-system 93 | kubectl get pods -n paddleservice-system 94 | ``` 95 | 96 | 2. Obtain ClusterIP 97 | ```bash 98 | $ kubectl get svc paddleservice-criteoctr-default-private -n paddleservice-system 99 | ``` 100 | 101 | ## Test Model Service 102 | 103 | The model service supports HTTP / BRPC / GRPC client, refer to [criteo ctr service](https://github.com/PaddlePaddle/Serving/tree/develop/python/examples/criteo_ctr) 104 | to obtain the code of client. It should be noted that you need to replace the service IP address and port in the client code with the cluster-ip and port from the paddleservice-criteoctr-default-private service mentioned above. 105 | 106 | For example, modify the code of `test_client.py` as follows 107 | ```python 108 | client.load_client_config(sys.argv[1]) 109 | - client.connect(["127.0.0.1:9292"]) 110 | + client.connect(["172.16.183.200:80"]) 111 | reader = CriteoReader(1000001) 112 | ``` 113 | -------------------------------------------------------------------------------- /docs/en/examples/lac.md: -------------------------------------------------------------------------------- 1 | # Chinese Word Segmentation 2 | 3 | English | [简体中文](../../zh_CN/examples/lac.md) 4 | 5 | This example uses the Chinese word segmentation model for service deployment. For more detail information please refer to [Paddle Serving](https://github.com/PaddlePaddle/Serving/blob/develop/python/examples/lac/README.md) 6 | 7 | ## Build Image for Service (Optional) 8 | 9 | The test service is build on `registry.baidubce.com/paddlepaddle/serving:0.6.0-devel`, and have published in public image register `registry.baidubce.com/paddleflow-public/lac-serving:latest`. 10 | If you need run the service in GPU or other device, please refer to [Docker Images](https://github.com/PaddlePaddle/Serving/blob/v0.6.0/doc/DOCKER_IMAGES_CN.md) and build the model server image as below. 11 | 12 | 1. Download `Paddle Serving` 13 | 14 | ```bash 15 | $ wget https://github.com/PaddlePaddle/Serving/archive/refs/tags/v0.6.0.tar.gz 16 | $ tar xzvf Serving-0.6.0.tar.gz 17 | $ mv Serving-0.6.0 Serving 18 | $ cd Serving 19 | ``` 20 | 21 | 2. Write Dockerfile 22 | 23 | ```Dockerfile 24 | FROM registry.baidubce.com/paddlepaddle/serving:0.6.0-devel 25 | 26 | WORKDIR /home 27 | 28 | COPY . /home/Serving 29 | 30 | WORKDIR /home/Serving 31 | 32 | # install depandences 33 | RUN pip install -r python/requirements.txt -i https://pypi.tuna.tsinghua.edu.cn/simple && \ 34 | pip install paddle-serving-server==0.6.0 -i https://pypi.tuna.tsinghua.edu.cn/simple && \ 35 | pip install paddle-serving-client==0.6.0 -i https://pypi.tuna.tsinghua.edu.cn/simple 36 | 37 | WORKDIR /home/Serving/python/examples/lac 38 | 39 | RUN python3 -m paddle_serving_app.package --get_model lac && \ 40 | tar xzf lac.tar.gz && rm -rf lac.tar.gz 41 | 42 | ENTRYPOINT ["python3", "-m", "paddle_serving_server.serve", "--model", "lac_model/", "--port", "9292"] 43 | ``` 44 | 45 | 3. Build Image 46 | 47 | ```bash 48 | docker build . -t registry.baidubce.com/paddleflow-public/lac-serving:latest 49 | ``` 50 | 51 | ## Create PaddleService 52 | 53 | 1. Prepare YAML File 54 | 55 | ```yaml 56 | # lac.yaml 57 | apiVersion: elasticserving.paddlepaddle.org/v1 58 | kind: PaddleService 59 | metadata: 60 | name: paddleservice-sample 61 | namespace: paddleservice-system 62 | spec: 63 | default: 64 | arg: python3 -m paddle_serving_server.serve --model lac_model/ --port 9292 65 | containerImage: registry.baidubce.com/paddleflow-public/lac-serving 66 | port: 9292 67 | tag: latest 68 | runtimeVersion: paddleserving 69 | service: 70 | minScale: 1 71 | ``` 72 | 73 | 2. Create PaddleService 74 | 75 | ```bash 76 | $ kubectl apply -f lac.yaml 77 | paddleservice.elasticserving.paddlepaddle.org/paddleservice-lac created 78 | ``` 79 | 80 | ## Check The Status of Service 81 | 82 | 1. check the status of model service 83 | 84 | ```bash 85 | # Check service in namespace paddleservice-system 86 | kubectl get svc -n paddleservice-system | grep paddleservice-lac 87 | 88 | # Check knative service in namespace paddleservice-system 89 | kubectl get ksvc paddleservice-lac -n paddleservice-system 90 | 91 | # Check pods in namespace paddleservice-system 92 | kubectl get pods -n paddleservice-system 93 | ``` 94 | 95 | 2. Obtain ClusterIP 96 | ```bash 97 | $ kubectl get svc paddleservice-lac-default-private -n paddleservice-system 98 | ``` 99 | 100 | ## Test Model Service 101 | 102 | The model service supports HTTP / BRPC / GRPC client, refer to [lac service](https://github.com/PaddlePaddle/Serving/blob/develop/python/examples/lac/README.md) 103 | 104 | You can simply test the service through the following command 105 | ```bash 106 | # Note: the IP-address and Port should be replaced with the cluster-ip and port of the paddleservice-criteoctr-default-private service mentioned above. 107 | curl -H "Host: paddleservice-sample.paddleservice-system.example.com" -H "Content-Type:application/json" -X POST -d '{"feed":[{"words": "我爱北京天安门"}], "fetch":["word_seg"]}' http://:/lac/prediction 108 | ``` 109 | 110 | Expected result 111 | ```bash 112 | {"result":[{"word_seg":"\u6211|\u7231|\u5317\u4eac|\u5929\u5b89\u95e8"}]} 113 | ``` 114 | -------------------------------------------------------------------------------- /docs/en/examples/wide_deep.md: -------------------------------------------------------------------------------- 1 | # Wide & Deep Pipeline 2 | 3 | This document mainly describes how to use components such as Paddle Operator and ElasticServing to complete the Pipeline of the Wide & Deep model, which includes the steps of `data preparation, model training and model serving` . 4 | The Wide & Deep model is a recommended framework published by Google in 2016. The model code used in this demo is from [PaddleRec Project](https://github.com/PaddlePaddle/PaddleRec/blob/release/2.1.0/models/rank/wide_deep/README.md) . 5 | 6 | ## 1. Data Preparation 7 | 8 | This demo uses the Criteo dataset provided by [Display Advertising Challenge](https://www.kaggle.com/c/criteo-display-ad-challenge/), 9 | And We have stored the data in the public bucket of [Baidu Object Storage (BOS)](http://baidu.netnic.com.cn/doc/BOS/BOSCLI.html#BOS.20CMD): `bos:// paddleflow-public.hkg.bcebos.com/criteo`. 10 | Use the cache component of [Paddle Operator](https://github.com/PaddleFlow/paddle-operator) to cache sample data locally can speed up model training jobs. 11 | Please refer to [quick start document](https://github.com/xiaolao/paddle-operator/blob/sampleset/docs/zh_CN/ext-get-start.md) to install Paddle Operator. 12 | In this example, we use the [JuiceFS CSI](https://github.com/juicedata/juicefs-csi-driver) plugin to store data and models, and it is also part of the Paddle Operator. 13 | 14 | ### Create Secret for Access Object Storage 15 | 16 | This example uses BOS as the storage backend. You can also use other [JuiceFS supported object storage](https://github.com/juicedata/juicefs/blob/main/docs/zh_cn/databases_for_metadata.md). 17 | Create Secret as following. The cache component of Paddle Operator needs the access-key / secret-key to access the object storage. And the metaurl is the access link of the metadata storage engine. 18 | 19 | ```yaml 20 | # criteo-secret.yaml 21 | apiVersion: v1 22 | data: 23 | access-key: xxx 24 | bucket: xxx 25 | metaurl: xxx 26 | name: Y3JpdGVv 27 | secret-key: xxx 28 | storage: Ym9z 29 | kind: Secret 30 | metadata: 31 | name: criteo 32 | namespace: paddle 33 | type: Opaque 34 | ``` 35 | 36 | Note: Each field in `data` needs to be encoded with base64 37 | 38 | ### Create SampleSet 39 | 40 | The cache component of Paddle Operator provides an abstraction of sample data sets through CRD: SampleSet. Create the following SampleSet and wait for the data to be synchronized. 41 | The nodeAffinity in the configuration can be used to specify which nodes the data should be cached to, for example, you can specify to cache the data to nodes with GPU device. 42 | 43 | ```yaml 44 | # criteo-sampleset.yaml 45 | apiVersion: batch.paddlepaddle.org/v1alpha1 46 | kind: SampleSet 47 | metadata: 48 | name: criteo 49 | namespace: paddle-system 50 | spec: 51 | # Partitions of cache data 52 | partitions: 1 53 | source: 54 | # Uri of sample data source 55 | uri: bos://paddleflow-public.hkg.bcebos.com/criteo 56 | secretRef: 57 | # Secret to access data source 58 | name: criteo-source 59 | secretRef: 60 | name: criteo 61 | nodeAffinity: 62 | requiredDuringSchedulingIgnoredDuringExecution: 63 | nodeSelectorTerms: 64 | - matchExpressions: 65 | - key: beta.kubernetes.io/instance-gpu 66 | operator: In 67 | values: 68 | - "true" 69 | ``` 70 | 71 | Since the sample data has a size of 22G, it may take a while after the SampleSet is created, 72 | and the model training can be started after SampleSet's status is changed to Ready. 73 | 74 | ```bash 75 | $ kubectl apply -f criteo-sampleset.yaml 76 | sampleset.batch.paddlepaddle.org/imagenet created 77 | 78 | $ kubectl get sampleset criteo -n paddle-system 79 | NAME TOTAL SIZE CACHED SIZE AVAIL SPACE RUNTIME PHASE AGE 80 | criteo 22 GiB 22 GiB 9.4 GiB 1/1 Ready 2d6h 81 | ``` 82 | 83 | ### Prepare Volume for Storing Model (Optional) 84 | 85 | This step is mainly to create PV and PVC resource to store the models. We create PV and PVC by [JuiceFS CSI](https://github.com/juicedata/juicefs-csi-driver/tree/master/examples/static-provisioning) , 86 | and the storage backend is still BOS. You can also use other CSI plugins, such as Ceph / Glusterfs and so on. 87 | 88 | Create PV 89 | ```yaml 90 | apiVersion: v1 91 | kind: PersistentVolume 92 | metadata: 93 | name: model-center 94 | spec: 95 | accessModes: 96 | - ReadWriteMany 97 | capacity: 98 | storage: 10Pi 99 | csi: 100 | driver: csi.juicefs.com 101 | fsType: juicefs 102 | # Secret to access remote object storage 103 | nodePublishSecretRef: 104 | name: criteo 105 | namespace: paddle-system 106 | # Bucket of object storage, the model files is store in this path 107 | volumeHandle: model-center 108 | persistentVolumeReclaimPolicy: Retain 109 | storageClassName: model-center 110 | volumeMode: Filesystem 111 | ``` 112 | 113 | Create PVC 114 | ```yaml 115 | apiVersion: v1 116 | kind: PersistentVolumeClaim 117 | metadata: 118 | name: model-center 119 | namespace: paddle-system 120 | spec: 121 | accessModes: 122 | - ReadWriteMany 123 | resources: 124 | requests: 125 | storage: 10Pi 126 | storageClassName: model-center 127 | volumeMode: Filesystem 128 | ``` 129 | 130 | ## 2. Training Model 131 | 132 | The model training script of this example is from [PaddleRec project](https://github.com/PaddlePaddle/PaddleRec/blob/release/2.1.0/models/rank/wide_deep/README.md), 133 | and image is placed in: `registry.baidubce.com/paddleflow-public/paddlerec:2.1.0-gpu-cuda10.2-cudnn7`. 134 | This example uses the [Collective](https://fleet-x.readthedocs.io/en/latest/paddle_fleet_rst/collective/index.html) mode for training, so a GPU device is required. 135 | You can also use [Parameter Server](https://fleet-x.readthedocs.io/en/latest/paddle_fleet_rst/paddle_on_k8s.html#wide-and-deep) mode for model training. 136 | And the cpu version image of PaddleRec is placed: `registry.baidubce.com/paddleflow-public/paddlerec:2.1.0`. 137 | 138 | ### 1. Create ConfigMap 139 | 140 | Each model of the PaddleRec project has a config file. In this demo we use configmap to store configurations and mount it into containers of PaddleJob, 141 | so we modify the configurations more convenient. Please refer to the document: [PaddleRec config.yaml Configuration Instructions](https://github.com/PaddlePaddle/PaddleRec/blob/release/2.1.0/doc/yaml.md). 142 | 143 | ```yaml 144 | # wide_deep_config.yaml 145 | # global settings 146 | runner: 147 | train_data_dir: "/mnt/criteo/slot_train_data_full" 148 | train_reader_path: "criteo_reader" # importlib format 149 | use_gpu: True 150 | use_auc: True 151 | train_batch_size: 4096 152 | epochs: 4 153 | print_interval: 10 154 | model_save_path: "/mnt/model" 155 | test_data_dir: "/mnt/criteo/slot_test_data_full" 156 | infer_reader_path: "criteo_reader" # importlib format 157 | infer_batch_size: 4096 158 | infer_load_path: "/mnt/model" 159 | infer_start_epoch: 0 160 | infer_end_epoch: 4 161 | use_inference: True 162 | save_inference_feed_varnames: ["C1","C2","C3","C4","C5","C6","C7","C8","C9","C10","C11","C12","C13","C14","C15","C16","C17","C18","C19","C20","C21","C22","C23","C24","C25","C26","dense_input"] 163 | save_inference_fetch_varnames: ["sigmoid_0.tmp_0"] 164 | #use fleet 165 | use_fleet: True 166 | 167 | # hyper parameters of user-defined network 168 | hyper_parameters: 169 | # optimizer config 170 | optimizer: 171 | class: Adam 172 | learning_rate: 0.001 173 | strategy: async 174 | # user-defined pairs 175 | sparse_inputs_slots: 27 176 | sparse_feature_number: 1000001 177 | sparse_feature_dim: 9 178 | dense_input_dim: 13 179 | fc_sizes: [512, 256, 128, 32] 180 | distributed_embedding: 0 181 | ``` 182 | 183 | Create ConfigMap named as wide-deep-config 184 | 185 | ```bash 186 | kubectl create configmap wide-deep-config -n paddle-system --from-file=wide_deep_config.yaml 187 | ``` 188 | 189 | ### 2. Create PaddleJob 190 | 191 | PaddleJob is a custom resource in the Paddle Operator project, used to define Paddle training jobs. 192 | 193 | ```yaml 194 | # wide-deep.yaml 195 | apiVersion: batch.paddlepaddle.org/v1 196 | kind: PaddleJob 197 | metadata: 198 | name: wide-deep 199 | namespace: paddle-system 200 | spec: 201 | cleanPodPolicy: Never 202 | sampleSetRef: 203 | name: criteo 204 | mountPath: /mnt/criteo 205 | worker: 206 | replicas: 1 207 | template: 208 | spec: 209 | containers: 210 | - name: paddlerec 211 | image: registry.baidubce.com/paddleflow-public/paddlerec:2.1.0-gpu-cuda10.2-cudnn7 212 | workingDir: /home/PaddleRec/models/rank/wide_deep 213 | command: ["/bin/bash", "-c", "cp /mnt/config/wide_deep_config.yaml . && mkdir -p /mnt/model/wide-deep && python -m paddle.distributed.launch --log_dir /mnt/model/log --gpus '0,1' ../../../tools/trainer.py -m wide_deep_config.yaml"] 214 | volumeMounts: 215 | - mountPath: /dev/shm 216 | name: dshm 217 | - mountPath: /mnt/config 218 | name: config-volume 219 | - mountPath: /mnt/model 220 | name: model-volume 221 | resources: 222 | limits: 223 | nvidia.com/gpu: 2 224 | volumes: 225 | - name: dshm 226 | emptyDir: 227 | medium: Memory 228 | - name: config-volume 229 | configMap: 230 | name: wide-deep-config 231 | - name: model-volume 232 | persistentVolumeClaim: 233 | claimName: model-center 234 | ``` 235 | 236 | Create PaddleJob and check it status 237 | 238 | ```bash 239 | $ kubectl create -f wide-deep.yaml 240 | 241 | $ kubectl get paddlejob wide-deep -n paddle-system 242 | NAME STATUS MODE WORKER AGE 243 | wide-deep Running Collective 2/2 2m 244 | ``` 245 | 246 | ## 3. Model Serving 247 | 248 | After PaddleJob is finished, you may notice that the model file stored in numeric dir, such as `0/`. During the model training process, Paddle will save a checkpoint after each epoch is done, 249 | so you use the model files in the folder with the largest number to deploy service. Before deploy model serving, you need to convert the model file the format that Paddle Serving can use. 250 | And we have already placed model files in bucket: `https://paddleflow-public.hkg.bcebos.com/models/wide-deep/wide-deep.tar.gz`. 251 | 252 | The directory structure: 253 | ``` 254 | . 255 | ├── rec_inference.pdiparams 256 | ├── rec_inference.pdmodel 257 | ├── rec_static.pdmodel 258 | ├── rec_static.pdopt 259 | └── rec_static.pdparams 260 | ``` 261 | 262 | ### 1. Create PaddleService 263 | 264 | ```yaml 265 | apiVersion: elasticserving.paddlepaddle.org/v1 266 | kind: PaddleService 267 | metadata: 268 | name: wide-deep-serving 269 | namespace: paddleservice-system 270 | spec: 271 | default: 272 | arg: wget https://paddleflow-public.hkg.bcebos.com/models/wide-deep/wide-deep.tar.gz && 273 | tar xzf wide-deep.tar.gz && rm -rf wide-deep.tar.gz && 274 | python3 -m paddle_serving_client.convert --dirname wide-deep/ --model_filename rec_inference.pdmodel --params_filename rec_inference.pdiparams && 275 | python3 -m paddle_serving_server.serve --model serving_server --port 9292 276 | containerImage: registry.baidubce.com/paddleflow-public/serving 277 | port: 9292 278 | tag: v0.6.2 279 | runtimeVersion: paddleserving 280 | service: 281 | minScale: 1 282 | ``` 283 | 284 | ### 2. Check the Service Status 285 | 286 | ```bash 287 | # Check service in namespace paddleservice-system 288 | kubectl get svc -n paddleservice-system | grep paddleservice-criteoctr 289 | 290 | # Check knative service in namespace paddleservice-system 291 | kubectl get ksvc paddleservice-criteoctr -n paddleservice-system 292 | 293 | # Check pods in namespace paddleservice-system 294 | kubectl get pods -n paddleservice-system 295 | 296 | # Obtain ClusterIP 297 | kubectl get svc wide-deep-serving-default-private -n paddleservice-system 298 | ``` 299 | 300 | The model service supports HTTP / BRPC / GRPC client, refer to [Serving](https://github.com/PaddlePaddle/PaddleRec/blob/release/2.1.0/doc/serving.md) 301 | to obtain the code of client. It should be noted that you need to replace the service IP address and port in the client code with the cluster-ip and port from the `wide-deep-serving-default-private` service mentioned above. 302 | -------------------------------------------------------------------------------- /docs/zh_CN/examples/bert.md: -------------------------------------------------------------------------------- 1 | # BERT 语义理解服务 2 | 3 | [English](../../en/examples/bert.md) | 简体中文 4 | 5 | 本示例采用 BERT 预训练模型进行文本分析与预测服务的部署,更多模型和代码的详情信息可以查看 [Paddle Serving](https://github.com/PaddlePaddle/Serving/blob/develop/python/examples/bert/README_CN.md). 6 | 7 | ## 构建服务镜像(可选) 8 | 9 | 本示例模型服务镜像基于 `registry.baidubce.com/paddlepaddle/serving:0.6.0-devel` 构建而成,并上传到公开可访问的镜像仓库 `registry.baidubce.com/paddleflow-public/bert-serving:latest` 。 10 | 如您需要 GPU 或其他版本的基础镜像,可以查看文档 [Docker 镜像](https://github.com/PaddlePaddle/Serving/blob/v0.6.0/doc/DOCKER_IMAGES_CN.md), 并按照如下步骤构建镜像。 11 | 12 | 1. 下载 Paddle Serving 代码 13 | 14 | ```bash 15 | $ wget https://github.com/PaddlePaddle/Serving/archive/refs/tags/v0.6.0.tar.gz 16 | $ tar xzvf Serving-0.6.0.tar.gz 17 | $ mv Serving-0.6.0 Serving 18 | $ cd Serving 19 | ``` 20 | 21 | 2. 编写如下 Dockerfile 22 | 23 | ```Dockerfile 24 | FROM registry.baidubce.com/paddlepaddle/serving:0.6.0-devel 25 | 26 | WORKDIR /home 27 | 28 | COPY . /home/Serving 29 | 30 | WORKDIR /home/Serving 31 | 32 | # install depandences 33 | RUN pip install -r python/requirements.txt -i https://pypi.tuna.tsinghua.edu.cn/simple && \ 34 | pip install paddle-serving-server==0.6.0 -i https://pypi.tuna.tsinghua.edu.cn/simple && \ 35 | pip install paddle-serving-client==0.6.0 -i https://pypi.tuna.tsinghua.edu.cn/simple 36 | 37 | WORKDIR /home/Serving/python/examples/bert 38 | 39 | # download pre-trained BERT model 40 | RUN wget https://paddle-serving.bj.bcebos.com/paddle_hub_models/text/SemanticModel/bert_chinese_L-12_H-768_A-12.tar.gz && \ 41 | tar -xzf bert_chinese_L-12_H-768_A-12.tar.gz && rm -rf bert_chinese_L-12_H-768_A-12.tar.gz && \ 42 | mv bert_chinese_L-12_H-768_A-12_model bert_seq128_model && mv bert_chinese_L-12_H-768_A-12_client bert_seq128_client 43 | 44 | ENTRYPOINT ["python3", "-m", "paddle_serving_server.serve", "--model", "bert_seq128_model/", "--port", "9292"] 45 | ``` 46 | 47 | 3. 构建镜像 48 | 49 | ```bash 50 | docker build . -t registry.baidubce.com/paddleflow-public/bert-serving:latest 51 | ``` 52 | 53 | ## 创建 PaddleService 54 | 55 | 1. 编写 YAML 文件 56 | 57 | ```yaml 58 | # bert.yaml 59 | apiVersion: v1 60 | kind: Namespace 61 | metadata: 62 | labels: 63 | istio-injection: enabled 64 | name: paddleservice-system 65 | --- 66 | apiVersion: elasticserving.paddlepaddle.org/v1 67 | kind: PaddleService 68 | metadata: 69 | name: paddleservice-bert 70 | namespace: paddleservice-system 71 | spec: 72 | default: 73 | arg: python3 -m paddle_serving_server.serve --model bert_seq128_model/ --port 9292 74 | containerImage: registry.baidubce.com/paddleflow-public/bert-serving 75 | port: 9292 76 | tag: latest 77 | runtimeVersion: paddleserving 78 | service: 79 | minScale: 1 80 | ``` 81 | 82 | 2. 创建 PaddleService 83 | 84 | ```bash 85 | $ kubectl apply -f bert.yaml 86 | paddleservice.elasticserving.paddlepaddle.org/paddleservice-bert created 87 | ``` 88 | 89 | ## 查看服务状态 90 | 91 | 1. 您可以使用以下命令查看服务状态 92 | 93 | ```bash 94 | # Check service in namespace paddleservice-system 95 | kubectl get svc -n paddleservice-system | grep paddleservice-bert 96 | 97 | # Check knative service in namespace paddleservice-system 98 | kubectl get ksvc paddleservice-bert -n paddleservice-system 99 | 100 | # Check pods in namespace paddleservice-system 101 | kubectl get pods -n paddleservice-system 102 | ``` 103 | 104 | 2. 运行以下命令获取 ClusterIP 105 | ```bash 106 | $ kubectl get svc paddleservice-bert-default-private -n paddleservice-system 107 | ``` 108 | 109 | ## 测试 BERT 模型服务 110 | 111 | 模型服务支持 HTTP / BRPC / GRPC 三种客户端访问,客户端代码和环境配置详情请查看文档 [语义理解预测服务](https://github.com/PaddlePaddle/Serving/blob/develop/python/examples/bert/README_CN.md) 。 112 | 需要注意的是,您需要在客户端代码中将服务 IP 地址和端口替换成上述 paddleservice-bert-default-private service 的 cluster-ip 和端口。 113 | 114 | 例如修改 `bert_client.py` 的代码 115 | 116 | ```python 117 | fetch = ["pooled_output"] 118 | - endpoint_list = ['127.0.0.1:9292'] 119 | + endpoint_list = ['172.16.237.0:80'] 120 | client = Client() 121 | ``` 122 | -------------------------------------------------------------------------------- /docs/zh_CN/examples/criteo_ctr.md: -------------------------------------------------------------------------------- 1 | # CTR预测服务 2 | 3 | [English](../../en/examples/criteo_ctr.md) | 简体中文 4 | 5 | 本示例是基于 criteo 数据集训练的 ctr 预估服务,更多模型和代码的详情信息可以查看 [Paddle Serving](https://github.com/PaddlePaddle/Serving/blob/develop/python/examples/criteo_ctr/README_CN.md). 6 | 7 | ## 构建服务镜像(可选) 8 | 9 | 本示例模型服务镜像基于 `registry.baidubce.com/paddlepaddle/serving:0.6.0-devel` 构建而成,并上传到公开可访问的镜像仓库 `registry.baidubce.com/paddleflow-public/criteoctr-serving:latest` 。 10 | 如您需要 GPU 或其他版本的基础镜像,可以查看文档 [Docker 镜像](https://github.com/PaddlePaddle/Serving/blob/v0.6.0/doc/DOCKER_IMAGES_CN.md), 并按照如下步骤构建镜像。 11 | 12 | 1. 下载 Paddle Serving 代码 13 | 14 | ```bash 15 | $ wget https://github.com/PaddlePaddle/Serving/archive/refs/tags/v0.6.0.tar.gz 16 | $ tar xzvf Serving-0.6.0.tar.gz 17 | $ mv Serving-0.6.0 Serving 18 | $ cd Serving 19 | ``` 20 | 21 | 2. 编写如下 Dockerfile 22 | 23 | ```Dockerfile 24 | FROM registry.baidubce.com/paddlepaddle/serving:0.6.0-devel 25 | 26 | WORKDIR /home 27 | 28 | COPY . /home/Serving 29 | 30 | WORKDIR /home/Serving 31 | 32 | # install depandences 33 | RUN pip install -r python/requirements.txt -i https://pypi.tuna.tsinghua.edu.cn/simple && \ 34 | pip install paddle-serving-server==0.6.0 -i https://pypi.tuna.tsinghua.edu.cn/simple && \ 35 | pip install paddle-serving-client==0.6.0 -i https://pypi.tuna.tsinghua.edu.cn/simple 36 | 37 | WORKDIR /home/Serving/python/examples/criteo_ctr 38 | 39 | RUN wget https://paddle-serving.bj.bcebos.com/criteo_ctr_example/criteo_ctr_demo_model.tar.gz && \ 40 | tar xzf criteo_ctr_demo_model.tar.gz && rm -rf criteo_ctr_demo_model.tar.gz && \ 41 | mv models/ctr_client_conf . && mv models/ctr_serving_model . 42 | 43 | ENTRYPOINT ["python3", "-m", "paddle_serving_server.serve", "--model", "ctr_serving_model/", "--port", "9292"] 44 | ``` 45 | 46 | 3. 构建镜像 47 | 48 | ```bash 49 | docker build . -t registry.baidubce.com/paddleflow-public/criteoctr-serving:latest 50 | ``` 51 | 52 | ## 创建 PaddleService 53 | 54 | 1. 编写 YAML 文件 55 | 56 | ```yaml 57 | # criteoctr.yaml 58 | apiVersion: elasticserving.paddlepaddle.org/v1 59 | kind: PaddleService 60 | metadata: 61 | name: paddleservice-criteoctr 62 | namespace: paddleservice-system 63 | spec: 64 | default: 65 | arg: python3 -m paddle_serving_server.serve --model ctr_serving_model/ --port 9292 66 | containerImage: registry.baidubce.com/paddleflow-public/criteoctr-serving 67 | port: 9292 68 | tag: latest 69 | runtimeVersion: paddleserving 70 | service: 71 | minScale: 1 72 | ``` 73 | 74 | 2. 创建 PaddleService 75 | 76 | ```bash 77 | $ kubectl apply -f criteoctr.yaml 78 | paddleservice.elasticserving.paddlepaddle.org/paddleservice-criteoctr created 79 | ``` 80 | 81 | ## 查看服务状态 82 | 83 | 1. 您可以使用以下命令查看服务状态 84 | 85 | ```bash 86 | # Check service in namespace paddleservice-system 87 | kubectl get svc -n paddleservice-system | grep paddleservice-criteoctr 88 | 89 | # Check knative service in namespace paddleservice-system 90 | kubectl get ksvc paddleservice-criteoctr -n paddleservice-system 91 | 92 | # Check pods in namespace paddleservice-system 93 | kubectl get pods -n paddleservice-system 94 | ``` 95 | 96 | 2. 运行以下命令获取 ClusterIP 97 | ```bash 98 | $ kubectl get svc paddleservice-criteoctr-default-private -n paddleservice-system 99 | ``` 100 | 101 | ## 测试模型服务 102 | 103 | 模型服务支持 HTTP / BRPC / GRPC 三种客户端访问,客户端代码和环境配置详情请查看文档 [语义理解预测服务](https://github.com/PaddlePaddle/Serving/blob/develop/python/examples/criteo_ctr/README_CN.md) 。 104 | 需要注意的是,您需要在客户端代码中将服务 IP 地址和端口替换成上述 paddleservice-criteoctr-default-private service 的 cluster-ip 和端口。 105 | 106 | 例如修改 `test_client.py` 的代码 107 | 108 | ```python 109 | client.load_client_config(sys.argv[1]) 110 | - client.connect(["127.0.0.1:9292"]) 111 | + client.connect(["172.16.183.200:80"]) 112 | reader = CriteoReader(1000001) 113 | ``` 114 | -------------------------------------------------------------------------------- /docs/zh_CN/examples/lac.md: -------------------------------------------------------------------------------- 1 | # 中文分词模型服务 2 | 3 | [English](../../en/examples/lac.md) | 简体中文 4 | 5 | 本示例采用 lac 中文分词模型来做服务部署,更多模型和代码的详情信息可以查看 [Paddle Serving](https://github.com/PaddlePaddle/Serving/blob/develop/python/examples/lac/README_CN.md). 6 | 7 | ## 构建服务镜像(可选) 8 | 9 | 本示例模型服务镜像基于 `registry.baidubce.com/paddlepaddle/serving:0.6.0-devel` 构建而成,并上传到公开可访问的镜像仓库 `registry.baidubce.com/paddleflow-public/lac-serving:latest` 。 10 | 如您需要 GPU 或其他版本的基础镜像,可以查看文档 [Docker 镜像](https://github.com/PaddlePaddle/Serving/blob/v0.6.0/doc/DOCKER_IMAGES_CN.md), 并按照如下步骤构建镜像。 11 | 12 | 1. 下载 Paddle Serving 代码 13 | 14 | ```bash 15 | $ wget https://github.com/PaddlePaddle/Serving/archive/refs/tags/v0.6.0.tar.gz 16 | $ tar xzvf Serving-0.6.0.tar.gz 17 | $ mv Serving-0.6.0 Serving 18 | $ cd Serving 19 | ``` 20 | 21 | 2. 编写如下 Dockerfile 22 | 23 | ```Dockerfile 24 | FROM registry.baidubce.com/paddlepaddle/serving:0.6.0-devel 25 | 26 | WORKDIR /home 27 | 28 | COPY . /home/Serving 29 | 30 | WORKDIR /home/Serving 31 | 32 | # install depandences 33 | RUN pip install -r python/requirements.txt -i https://pypi.tuna.tsinghua.edu.cn/simple && \ 34 | pip install paddle-serving-server==0.6.0 -i https://pypi.tuna.tsinghua.edu.cn/simple && \ 35 | pip install paddle-serving-client==0.6.0 -i https://pypi.tuna.tsinghua.edu.cn/simple 36 | 37 | WORKDIR /home/Serving/python/examples/lac 38 | 39 | RUN python3 -m paddle_serving_app.package --get_model lac && \ 40 | tar xzf lac.tar.gz && rm -rf lac.tar.gz 41 | 42 | ENTRYPOINT ["python3", "-m", "paddle_serving_server.serve", "--model", "lac_model/", "--port", "9292"] 43 | ``` 44 | 45 | 3. 构建镜像 46 | 47 | ```bash 48 | docker build . -t registry.baidubce.com/paddleflow-public/lac-serving:latest 49 | ``` 50 | 51 | ## 创建 PaddleService 52 | 53 | 1. 编写 YAML 文件 54 | 55 | ```yaml 56 | # lac.yaml 57 | apiVersion: elasticserving.paddlepaddle.org/v1 58 | kind: PaddleService 59 | metadata: 60 | name: paddleservice-sample 61 | namespace: paddleservice-system 62 | spec: 63 | default: 64 | arg: python3 -m paddle_serving_server.serve --model lac_model/ --port 9292 65 | containerImage: registry.baidubce.com/paddleflow-public/lac-serving 66 | port: 9292 67 | tag: latest 68 | runtimeVersion: paddleserving 69 | service: 70 | minScale: 1 71 | ``` 72 | 73 | 2. 创建 PaddleService 74 | 75 | ```bash 76 | $ kubectl apply -f lac.yaml 77 | paddleservice.elasticserving.paddlepaddle.org/paddleservice-lac created 78 | ``` 79 | 80 | ## 查看服务状态 81 | 82 | 1. 您可以使用以下命令查看服务状态 83 | 84 | ```bash 85 | # Check service in namespace paddleservice-system 86 | kubectl get svc -n paddleservice-system | grep paddleservice-lac 87 | 88 | # Check knative service in namespace paddleservice-system 89 | kubectl get ksvc paddleservice-lac -n paddleservice-system 90 | 91 | # Check pods in namespace paddleservice-system 92 | kubectl get pods -n paddleservice-system 93 | ``` 94 | 95 | 2. 运行以下命令获取 ClusterIP 96 | ```bash 97 | $ kubectl get svc paddleservice-lac-default-private -n paddleservice-system 98 | ``` 99 | 100 | ## 测试 BERT 模型服务 101 | 102 | 模型服务支持 HTTP / BRPC / GRPC 三种客户端访问,客户端代码和环境配置详情请查看文档 [中文分词模型服务](https://github.com/PaddlePaddle/Serving/blob/develop/python/examples/lac/README_CN.md) 。 103 | 104 | 通过以下命令可以简单测试下服务是否正常 105 | ```bash 106 | # 注意将 IP-address 和 Port 替换成上述 paddleservice-criteoctr-default-private service 的 cluster-ip 和端口。 107 | curl -H "Host: paddleservice-sample.paddleservice-system.example.com" -H "Content-Type:application/json" -X POST -d '{"feed":[{"words": "我爱北京天安门"}], "fetch":["word_seg"]}' http://:/lac/prediction 108 | ``` 109 | 110 | 预期结果 111 | ```bash 112 | {"result":[{"word_seg":"\u6211|\u7231|\u5317\u4eac|\u5929\u5b89\u95e8"}]} 113 | ``` 114 | -------------------------------------------------------------------------------- /docs/zh_CN/examples/wide_deep.md: -------------------------------------------------------------------------------- 1 | # Wide & Deep Pipeline 2 | 3 | 本文档主要讲述了如何使用 Paddle Operator 和 ElasticServing 等各个组件,完成 Wide & Deep 模型的 Pipeline 流程,该流程包括数据准备、模型训练、模型服务部署等步骤。 4 | Wide & Deep 模型是 Google 2016 年发布的推荐框架,本示例中使用的模型代码由 [PaddleRec 项目](https://github.com/PaddlePaddle/PaddleRec/blob/release/2.1.0/models/rank/wide_deep/README.md) 提供。 5 | 6 | ## 一、数据准备 7 | 8 | 本示例使用的是 [Display Advertising Challenge](https://www.kaggle.com/c/criteo-display-ad-challenge/) 提供的 Criteo 数据集, 9 | 我们已经将数据存放在[百度对象存储(BOS)](http://baidu.netnic.com.cn/doc/BOS/BOSCLI.html#BOS.20CMD) 公开可访问的 Bucket 中 `bos://paddleflow-public.hkg.bcebos.com/criteo`。 10 | 使用 [Paddle Operator](https://github.com/PaddleFlow/paddle-operator) 的样本缓存组件可以将样本数据缓存到集群本地,加速模型训练效率。 11 | 如何安装 Paddle Operator 样本缓存组件可以参考[快速上手文档](https://github.com/xiaolao/paddle-operator/blob/sampleset/docs/zh_CN/ext-get-start.md)。 12 | 在本示例中我们使用 [JuiceFS CSI](https://github.com/juicedata/juicefs-csi-driver) 插件为数据和模型提供存储功能,同时它也是 Paddle Operator 样本缓存组件的一部分。 13 | 14 | ### 1. 创建对象存储所需的 Secret 对象 15 | 16 | 本示例使用 BOS 作为存储后端,您也可以使用其他 [JuiceFS 支持的对象存储](https://github.com/juicedata/juicefs/blob/main/docs/zh_cn/databases_for_metadata.md) 。 17 | 创建如下的 Secret 对象,样本缓存组件需要使用其提供的 access-key / secret-key 来访问对象存储, metaurl 是元数据存储引擎的访问链接。 18 | 19 | ```yaml 20 | # criteo-secret.yaml 21 | apiVersion: v1 22 | data: 23 | access-key: xxx 24 | bucket: xxx 25 | metaurl: xxx 26 | name: Y3JpdGVv 27 | secret-key: xxx 28 | storage: Ym9z 29 | kind: Secret 30 | metadata: 31 | name: criteo 32 | namespace: paddle 33 | type: Opaque 34 | ``` 35 | 36 | 注意:data 中的各个字段信息需要用 base64 进行编码 37 | 38 | ### 2. 创建 SampleSet 39 | 40 | Paddle Operator 样本缓存组件通过自定义 CRD SampleSet 提供了样本数据集的抽象,方便用户管理样本数据。 41 | 创建如下的 SampleSet 并等待数据完成同步,配置中的 nodeAffinity 可以来用指定数据需要缓存到那些节点, 比如可以指定将数据缓存到 GPU 节点。 42 | 43 | ```yaml 44 | # criteo-sampleset.yaml 45 | apiVersion: batch.paddlepaddle.org/v1alpha1 46 | kind: SampleSet 47 | metadata: 48 | name: criteo 49 | namespace: paddle-system 50 | spec: 51 | # 缓存分区,一台宿主机代表一个分区 52 | partitions: 1 53 | source: 54 | # 样本数据源 55 | uri: bos://paddleflow-public.hkg.bcebos.com/criteo 56 | secretRef: 57 | # 这里填写上诉创建的 Secret 58 | name: criteo-source 59 | secretRef: 60 | name: criteo 61 | nodeAffinity: 62 | requiredDuringSchedulingIgnoredDuringExecution: 63 | nodeSelectorTerms: 64 | - matchExpressions: 65 | - key: beta.kubernetes.io/instance-gpu 66 | operator: In 67 | values: 68 | - "true" 69 | ``` 70 | 71 | 由于样本数据有22G,创建好 SampleSet 后可能需要等待一段时间,等其状态变更为 Ready 后即可开始进行模型训练的步骤了。 72 | 73 | ```bash 74 | $ kubectl apply -f criteo-sampleset.yaml 75 | sampleset.batch.paddlepaddle.org/imagenet created 76 | 77 | $ kubectl get sampleset criteo -n paddle-system 78 | NAME TOTAL SIZE CACHED SIZE AVAIL SPACE RUNTIME PHASE AGE 79 | criteo 22 GiB 22 GiB 9.4 GiB 1/1 Ready 2d6h 80 | ``` 81 | 82 | ### 3. 准备用于存储模型的 Volume (可选) 83 | 84 | 这个步骤主要是创建 PV 和 PVC 资源对象用于存储后续步骤产出的模型,本示例采用 [JuiceFS CSI 静态绑定的方式](https://github.com/juicedata/juicefs-csi-driver/tree/master/examples/static-provisioning) 85 | 来创建 PV 和 PVC,存储后端依然是 BOS。 你也可以使用其他的存储引擎的 CSI 插件, 比如 Ceph / Glusterfs 等。 86 | 87 | 创建 PV 88 | ```yaml 89 | apiVersion: v1 90 | kind: PersistentVolume 91 | metadata: 92 | name: model-center 93 | spec: 94 | accessModes: 95 | - ReadWriteMany 96 | capacity: 97 | storage: 10Pi 98 | csi: 99 | driver: csi.juicefs.com 100 | fsType: juicefs 101 | # 访问远程对象存储所需的 Secret 102 | nodePublishSecretRef: 103 | name: criteo 104 | namespace: paddle-system 105 | # 对象存储的 Bucket 路径,用于存放模型文件 106 | volumeHandle: model-center 107 | persistentVolumeReclaimPolicy: Retain 108 | storageClassName: model-center 109 | volumeMode: Filesystem 110 | ``` 111 | 112 | 创建 PVC 113 | ```yaml 114 | apiVersion: v1 115 | kind: PersistentVolumeClaim 116 | metadata: 117 | name: model-center 118 | namespace: paddle-system 119 | spec: 120 | accessModes: 121 | - ReadWriteMany 122 | resources: 123 | requests: 124 | storage: 10Pi 125 | storageClassName: model-center 126 | volumeMode: Filesystem 127 | ``` 128 | 129 | ## 二、模型训练 130 | 131 | Wide & Deep 模型的实现代码来自项目 [PaddleRec](https://github.com/PaddlePaddle/PaddleRec/blob/release/2.1.0/models/rank/wide_deep/README.md) , 132 | 我们提供了该项目的镜像并放置在公开的镜像仓库中:`registry.baidubce.com/paddleflow-public/paddlerec:2.1.0-gpu-cuda10.2-cudnn7`。 133 | 本示例使用 [Collective](https://fleet-x.readthedocs.io/en/latest/paddle_fleet_rst/collective/index.html) 的模式进行训练,所以要求要有 GPU 设备。 134 | 你也可以在采用 [Parameter Server](https://fleet-x.readthedocs.io/en/latest/paddle_fleet_rst/paddle_on_k8s.html#wide-and-deep) 模式来进行模型训练。 135 | PaddleRec 的 cpu 版本的镜像地址为:`registry.baidubce.com/paddleflow-public/paddlerec:2.1.0`。 136 | 137 | ### 1. 创建 ConfigMap 138 | 139 | PaddleRec 项目各个模型可以通过配置文件来指定模型的超参和模型训练配置,故通过 ConfigMap 的方式将配置文件挂载进训练容器,可以比较方便的修改配置文件。 140 | 关于配置文件中各个字段含义可以参考文档:[PaddleRec config.yaml 配置说明](https://github.com/PaddlePaddle/PaddleRec/blob/release/2.1.0/doc/yaml.md) 141 | 142 | ```yaml 143 | # wide_deep_config.yaml 144 | # global settings 145 | runner: 146 | train_data_dir: "/mnt/criteo/slot_train_data_full" 147 | train_reader_path: "criteo_reader" # importlib format 148 | use_gpu: True 149 | use_auc: True 150 | train_batch_size: 4096 151 | epochs: 4 152 | print_interval: 10 153 | model_save_path: "/mnt/model" 154 | test_data_dir: "/mnt/criteo/slot_test_data_full" 155 | infer_reader_path: "criteo_reader" # importlib format 156 | infer_batch_size: 4096 157 | infer_load_path: "/mnt/model" 158 | infer_start_epoch: 0 159 | infer_end_epoch: 4 160 | use_inference: True 161 | save_inference_feed_varnames: ["C1","C2","C3","C4","C5","C6","C7","C8","C9","C10","C11","C12","C13","C14","C15","C16","C17","C18","C19","C20","C21","C22","C23","C24","C25","C26","dense_input"] 162 | save_inference_fetch_varnames: ["sigmoid_0.tmp_0"] 163 | #use fleet 164 | use_fleet: True 165 | 166 | # hyper parameters of user-defined network 167 | hyper_parameters: 168 | # optimizer config 169 | optimizer: 170 | class: Adam 171 | learning_rate: 0.001 172 | strategy: async 173 | # user-defined pairs 174 | sparse_inputs_slots: 27 175 | sparse_feature_number: 1000001 176 | sparse_feature_dim: 9 177 | dense_input_dim: 13 178 | fc_sizes: [512, 256, 128, 32] 179 | distributed_embedding: 0 180 | ``` 181 | 182 | 创建 wide-deep-config ConfigMap 183 | 184 | ```bash 185 | kubectl create configmap wide-deep-config -n paddle-system --from-file=wide_deep_config.yaml 186 | ``` 187 | 188 | ### 2. 创建 PaddleJob 189 | 190 | PaddleJob 是 Paddle Operator 项目中的自定义资源,用于定义 Paddle 模型训练作业。 191 | 192 | ```yaml 193 | # wide-deep.yaml 194 | apiVersion: batch.paddlepaddle.org/v1 195 | kind: PaddleJob 196 | metadata: 197 | name: wide-deep 198 | namespace: paddle-system 199 | spec: 200 | cleanPodPolicy: Never 201 | # 申明要使用的数据集 202 | sampleSetRef: 203 | name: criteo 204 | # 数据集在容器内的挂盘路径 205 | mountPath: /mnt/criteo 206 | worker: 207 | replicas: 1 208 | template: 209 | spec: 210 | containers: 211 | - name: paddlerec 212 | image: registry.baidubce.com/paddleflow-public/paddlerec:2.1.0-gpu-cuda10.2-cudnn7 213 | workingDir: /home/PaddleRec/models/rank/wide_deep 214 | command: ["/bin/bash", "-c", "cp /mnt/config/wide_deep_config.yaml . && mkdir -p /mnt/model/wide-deep && python -m paddle.distributed.launch --log_dir /mnt/model/log --gpus '0,1' ../../../tools/trainer.py -m wide_deep_config.yaml"] 215 | volumeMounts: 216 | # 将宿主机内存挂载进容器内,防止容器内OOM出错 217 | - mountPath: /dev/shm 218 | name: dshm 219 | # 将 ConfigMap 挂载进容器内 220 | - mountPath: /mnt/config 221 | name: config-volume 222 | # 用于存储模型 223 | - mountPath: /mnt/model 224 | name: model-volume 225 | resources: 226 | limits: 227 | # 每个宿主机上使用两块GPU设备 228 | nvidia.com/gpu: 2 229 | volumes: 230 | - name: dshm 231 | emptyDir: 232 | medium: Memory 233 | - name: config-volume 234 | configMap: 235 | name: wide-deep-config 236 | - name: model-volume 237 | persistentVolumeClaim: 238 | claimName: model-center 239 | ``` 240 | 241 | 创建 PaddleJob 并查看任务状态 242 | 243 | ```bash 244 | $ kubectl create -f wide-deep.yaml 245 | 246 | $ kubectl get paddlejob wide-deep -n paddle-system 247 | NAME STATUS MODE WORKER AGE 248 | wide-deep Running Collective 2/2 2m 249 | ``` 250 | 251 | ## 三、模型服务 252 | 253 | 等模型训练完后,模型存储路径中就可以看到数字编号的目录,如 `0/`。这是因为模型训练过程中,每个 epoch 都会保存一份模型快照, 254 | 故用数字最大的文件夹中的模型文件来部署服务即可。在使用 Paddle Serving 部署模型前,需要对训练文件完成转换工作方可进行服务部署。 255 | 我们将模型文件存放在公开可访问的 bucket 中:`https://paddleflow-public.hkg.bcebos.com/models/wide-deep/wide-deep.tar.gz` 。 256 | 257 | 解压后模型文件目录结构如下: 258 | ``` 259 | . 260 | ├── rec_inference.pdiparams 261 | ├── rec_inference.pdmodel 262 | ├── rec_static.pdmodel 263 | ├── rec_static.pdopt 264 | └── rec_static.pdparams 265 | ``` 266 | 267 | ### 1. 创建 PaddleService 268 | 269 | ```yaml 270 | apiVersion: elasticserving.paddlepaddle.org/v1 271 | kind: PaddleService 272 | metadata: 273 | name: wide-deep-serving 274 | namespace: paddleservice-system 275 | spec: 276 | default: 277 | arg: wget https://paddleflow-public.hkg.bcebos.com/models/wide-deep/wide-deep.tar.gz && 278 | tar xzf wide-deep.tar.gz && rm -rf wide-deep.tar.gz && 279 | python3 -m paddle_serving_client.convert --dirname wide-deep/ --model_filename rec_inference.pdmodel --params_filename rec_inference.pdiparams && 280 | python3 -m paddle_serving_server.serve --model serving_server --port 9292 281 | containerImage: registry.baidubce.com/paddleflow-public/serving 282 | port: 9292 283 | tag: v0.6.2 284 | runtimeVersion: paddleserving 285 | service: 286 | minScale: 1 287 | ``` 288 | 289 | ### 2. 查看服务状态 290 | 291 | ```bash 292 | # 查看命名空间 paddleservice-system 下的 Service 293 | kubectl get svc -n paddleservice-system | grep wide-deep-serving 294 | 295 | # 查看命名空间 paddleservice-system 下 knative service 的状态 296 | kubectl get ksvc wide-deep-serving -n paddleservice-system 297 | 298 | # 查看命名空空间 paddleservice-system 下所有的 pod 299 | kubectl get pods -n paddleservice-system 300 | 301 | # 运行以下命令获取 ClusterIP 302 | kubectl get svc wide-deep-serving-default-private -n paddleservice-system 303 | ``` 304 | 305 | 模型服务支持 HTTP / BRPC / GRPC 三种客户端访问,客户端代码和环境配置详情请查看文档: [在线Serving部署](https://github.com/PaddlePaddle/PaddleRec/blob/release/2.1.0/doc/serving.md) 。 306 | 需要注意的是,您需要在客户端代码中将服务 IP 地址和端口替换成上述 wide-deep-serving-default-private service 的 cluster-ip 和端口。 -------------------------------------------------------------------------------- /go.mod: -------------------------------------------------------------------------------- 1 | module ElasticServing 2 | 3 | go 1.15 4 | 5 | require ( 6 | github.com/go-logr/logr v0.2.0 7 | github.com/gogo/protobuf v1.3.1 8 | github.com/google/go-cmp v0.5.2 9 | github.com/onsi/ginkgo v1.12.1 10 | github.com/onsi/gomega v1.10.1 11 | github.com/prometheus/common v0.14.0 // indirect 12 | k8s.io/api v0.19.2 13 | k8s.io/apiextensions-apiserver v0.18.8 // indirect 14 | k8s.io/apimachinery v0.18.8 15 | k8s.io/client-go v11.0.1-0.20190805182717-6502b5e7b1b5+incompatible 16 | knative.dev/pkg v0.0.0-20200922164940-4bf40ad82aab 17 | knative.dev/serving v0.18.0 18 | sigs.k8s.io/controller-runtime v0.6.1 19 | ) 20 | 21 | replace ( 22 | k8s.io/api => k8s.io/api v0.18.8 23 | k8s.io/apiextensions-apiserver => k8s.io/apiextensions-apiserver v0.18.8 24 | k8s.io/apiserver => k8s.io/apiserver v0.18.8 25 | k8s.io/cli-runtime => k8s.io/cli-runtime v0.18.8 26 | k8s.io/client-go => k8s.io/client-go v0.18.8 27 | k8s.io/cloud-provider => k8s.io/cloud-provider v0.18.8 28 | k8s.io/cluster-bootstrap => k8s.io/cluster-bootstrap v0.18.8 29 | k8s.io/code-generator => k8s.io/code-generator v0.18.8 30 | k8s.io/component-base => k8s.io/component-base v0.18.8 31 | k8s.io/cri-api => k8s.io/cri-api v0.18.8 32 | k8s.io/csi-translation-lib => k8s.io/csi-translation-lib v0.18.8 33 | k8s.io/klog => k8s.io/klog v1.0.0 34 | k8s.io/kube-aggregator => k8s.io/kube-aggregator v0.18.8 35 | k8s.io/kube-controller-manager => k8s.io/kube-controller-manager v0.18.8 36 | k8s.io/kube-proxy => k8s.io/kube-proxy v0.18.8 37 | k8s.io/kube-scheduler => k8s.io/kube-scheduler v0.18.8 38 | k8s.io/kubectl => k8s.io/kubectl v0.18.8 39 | k8s.io/kubelet => k8s.io/kubelet v0.18.8 40 | k8s.io/legacy-cloud-providers => k8s.io/legacy-cloud-providers v0.18.8 41 | k8s.io/metrics => k8s.io/metrics v0.18.8 42 | k8s.io/sample-apiserver => k8s.io/sample-apiserver v0.18.8 43 | k8s.io/test-infra => k8s.io/test-infra v0.0.0-20200803112140-d8aa4e063646 44 | k8s.io/utils => k8s.io/utils v0.0.0-20200324210504-a9aa75ae1b89 45 | // based on https://github.com/openshift/cluster-ingress-operator/pull/429/files#diff-33ef32bf6c23acb95f5902d7097b7a1d5128ca061167ec0716715b0b9eeaa5f6R34 46 | sigs.k8s.io/controller-runtime => github.com/zchee/sigs.k8s-controller-runtime v0.6.1-0.20200623114430-46812d3a0a50 47 | ) 48 | -------------------------------------------------------------------------------- /hack/boilerplate.go.txt: -------------------------------------------------------------------------------- 1 | /* 2 | 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ -------------------------------------------------------------------------------- /hack/install_knative.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/bash 2 | 3 | set -ex 4 | 5 | # NOTE: some resources doesn't exist for KNATIVE_VERSION < v0.21.0 6 | export KNATIVE_VERSION=v0.21.0 7 | 8 | # Install knative 9 | kubectl apply --filename https://github.com/knative/serving/releases/download/${KNATIVE_VERSION}/serving-crds.yaml 10 | kubectl apply --filename https://github.com/knative/serving/releases/download/${KNATIVE_VERSION}/serving-core.yaml 11 | 12 | # Setup Istio 13 | kubectl apply --filename https://github.com/knative/net-istio/releases/download/${KNATIVE_VERSION}/istio.yaml 14 | kubectl apply --filename https://github.com/knative/net-istio/releases/download/${KNATIVE_VERSION}/net-istio.yaml 15 | -------------------------------------------------------------------------------- /hack/install_knative_kourier.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/bash 2 | 3 | set -ex 4 | 5 | export KNATIVE_VERSION=v0.22.0 6 | 7 | # Install knative 8 | kubectl apply --filename https://github.com/knative/serving/releases/download/${KNATIVE_VERSION}/serving-crds.yaml 9 | kubectl apply --filename https://github.com/knative/serving/releases/download/${KNATIVE_VERSION}/serving-core.yaml 10 | 11 | # Setup kourier 12 | kubectl apply --filename https://github.com/knative/net-kourier/releases/download/${KNATIVE_VERSION}/kourier.yaml 13 | kubectl patch configmap/config-network \ 14 | --namespace knative-serving \ 15 | --type merge \ 16 | --patch '{"data":{"ingress.class":"kourier.ingress.networking.knative.dev"}}' 17 | -------------------------------------------------------------------------------- /main.go: -------------------------------------------------------------------------------- 1 | /* 2 | 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | 17 | package main 18 | 19 | import ( 20 | "flag" 21 | "os" 22 | 23 | "k8s.io/apimachinery/pkg/runtime" 24 | clientgoscheme "k8s.io/client-go/kubernetes/scheme" 25 | _ "k8s.io/client-go/plugin/pkg/client/auth/gcp" 26 | ctrl "sigs.k8s.io/controller-runtime" 27 | "sigs.k8s.io/controller-runtime/pkg/healthz" 28 | "sigs.k8s.io/controller-runtime/pkg/log/zap" 29 | 30 | elasticservingv1 "ElasticServing/pkg/apis/elasticserving/v1" 31 | controllers "ElasticServing/pkg/controllers/elasticserving" 32 | 33 | knservingv1 "knative.dev/serving/pkg/apis/serving/v1" 34 | // +kubebuilder:scaffold:imports 35 | ) 36 | 37 | var ( 38 | scheme = runtime.NewScheme() 39 | setupLog = ctrl.Log.WithName("setup") 40 | ) 41 | 42 | func init() { 43 | _ = clientgoscheme.AddToScheme(scheme) 44 | 45 | _ = elasticservingv1.AddToScheme(scheme) 46 | 47 | _ = knservingv1.AddToScheme(scheme) 48 | 49 | // +kubebuilder:scaffold:scheme 50 | } 51 | 52 | func main() { 53 | var metricsAddr string 54 | var enableLeaderElection bool 55 | flag.StringVar(&metricsAddr, "metrics-addr", ":8080", "The address the metric endpoint binds to.") 56 | flag.BoolVar(&enableLeaderElection, "enable-leader-election", false, 57 | "Enable leader election for controller manager. "+ 58 | "Enabling this will ensure there is only one active controller manager.") 59 | flag.Parse() 60 | 61 | ctrl.SetLogger(zap.New(zap.UseDevMode(true))) 62 | 63 | mgr, err := ctrl.NewManager(ctrl.GetConfigOrDie(), ctrl.Options{ 64 | Scheme: scheme, 65 | MetricsBindAddress: metricsAddr, 66 | Port: 9443, 67 | LeaderElection: enableLeaderElection, 68 | LeaderElectionID: "c3643994.paddlepaddle.org", 69 | }) 70 | if err != nil { 71 | setupLog.Error(err, "unable to start manager") 72 | os.Exit(1) 73 | } 74 | 75 | if err = (&controllers.PaddleServiceReconciler{ 76 | Client: mgr.GetClient(), 77 | Log: ctrl.Log.WithName("controllers").WithName("PaddleService"), 78 | Scheme: mgr.GetScheme(), 79 | Recorder: mgr.GetEventRecorderFor("paddleflow_service-controller"), 80 | }).SetupWithManager(mgr); err != nil { 81 | setupLog.Error(err, "unable to create controller", "controller", "PaddleService") 82 | os.Exit(1) 83 | } 84 | // +kubebuilder:scaffold:builder 85 | 86 | if err := mgr.AddHealthzCheck("healthz", healthz.Ping); err != nil { 87 | setupLog.Error(err, "unable to set up health check") 88 | os.Exit(1) 89 | } 90 | if err := mgr.AddReadyzCheck("readyz", healthz.Ping); err != nil { 91 | setupLog.Error(err, "unable to set up ready check") 92 | os.Exit(1) 93 | } 94 | 95 | setupLog.Info("starting manager") 96 | if err := mgr.Start(ctrl.SetupSignalHandler()); err != nil { 97 | setupLog.Error(err, "problem running manager") 98 | os.Exit(1) 99 | } 100 | } 101 | -------------------------------------------------------------------------------- /pkg/apis/doc.go: -------------------------------------------------------------------------------- 1 | /* 2 | 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | 17 | // +k8s:deepcopy-gen=package 18 | 19 | package apis 20 | -------------------------------------------------------------------------------- /pkg/apis/elasticserving/v1/doc.go: -------------------------------------------------------------------------------- 1 | /* 2 | 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | 17 | // Package v1 contains PaddleService 18 | // +groupName=elasticserving.paddlepaddle.org 19 | // +k8s:openapi-gen=true 20 | package v1 21 | 22 | // ########################################################################### 23 | // ####################### Do DO NOT EDIT This File ########################## 24 | // This file is used to generate crd api reference documents 25 | // ########################################################################### 26 | -------------------------------------------------------------------------------- /pkg/apis/elasticserving/v1/groupversion_info.go: -------------------------------------------------------------------------------- 1 | /* 2 | 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | 17 | // Package v1 contains API Schema definitions for the elasticserving v1 API group 18 | // +kubebuilder:object:generate=true 19 | // +groupName=elasticserving.paddlepaddle.org 20 | package v1 21 | 22 | import ( 23 | "k8s.io/apimachinery/pkg/runtime/schema" 24 | "sigs.k8s.io/controller-runtime/pkg/scheme" 25 | ) 26 | 27 | var ( 28 | // GroupVersion is group version used to register these objects 29 | GroupVersion = schema.GroupVersion{Group: "elasticserving.paddlepaddle.org", Version: "v1"} 30 | 31 | // SchemeBuilder is used to add go types to the GroupVersionKind scheme 32 | SchemeBuilder = &scheme.Builder{GroupVersion: GroupVersion} 33 | 34 | // AddToScheme adds the types in this group-version to the given scheme. 35 | AddToScheme = SchemeBuilder.AddToScheme 36 | ) 37 | -------------------------------------------------------------------------------- /pkg/apis/elasticserving/v1/paddleservice_status.go: -------------------------------------------------------------------------------- 1 | /* 2 | 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | 17 | package v1 18 | 19 | import ( 20 | "knative.dev/pkg/apis" 21 | knservingv1 "knative.dev/serving/pkg/apis/serving/v1" 22 | ) 23 | 24 | // ConditionType represents a Service condition value 25 | const ( 26 | // RoutesReady is set when network configuration has completed. 27 | RoutesReady apis.ConditionType = "RoutesReady" 28 | // DefaultEndpointReady is set when default PaddleService Endpoint has reported readiness. 29 | DefaultEndpointReady apis.ConditionType = "DefaultEndpointReady" 30 | // CanaryEndpointReady is set when canary PaddleService Endpoint has reported readiness. 31 | CanaryEndpointReady apis.ConditionType = "CanaryEndpointReady" 32 | ) 33 | 34 | // PaddleService Ready condition is depending on default PaddleService and route readiness condition 35 | // canary readiness condition only present when canary is used and currently does 36 | // not affect PaddleService readiness condition. 37 | var conditionSet = apis.NewLivingConditionSet( 38 | DefaultEndpointReady, 39 | CanaryEndpointReady, 40 | RoutesReady, 41 | ) 42 | 43 | var _ apis.ConditionsAccessor = (*PaddleServiceStatus)(nil) 44 | 45 | func (ss *PaddleServiceStatus) InitializeConditions() { 46 | conditionSet.Manage(ss).InitializeConditions() 47 | } 48 | 49 | // IsReady returns if the service is ready to serve the requested configuration. 50 | func (ss *PaddleServiceStatus) IsReady() bool { 51 | return conditionSet.Manage(ss).IsHappy() 52 | } 53 | 54 | // GetCondition returns the condition by name. 55 | func (ss *PaddleServiceStatus) GetCondition(t apis.ConditionType) *apis.Condition { 56 | return conditionSet.Manage(ss).GetCondition(t) 57 | } 58 | 59 | func (ss *PaddleServiceStatus) PropagateStatus(serviceStatus *knservingv1.ServiceStatus) { 60 | if serviceStatus == nil { 61 | return 62 | } 63 | // conditionType := DefaultEndpointReady 64 | statusSpec := StatusConfigurationSpec{} 65 | if ss.Default == nil { 66 | ss.Default = &statusSpec 67 | } 68 | statusSpec.Name = serviceStatus.LatestCreatedRevisionName 69 | // serviceCondition := serviceStatus.GetCondition(knservingv1.ServiceConditionReady) 70 | 71 | // switch { 72 | // case serviceCondition == nil: 73 | // case serviceCondition.Status == v1.ConditionUnknown: 74 | // conditionSet.Manage(ss).MarkUnknown(conditionType, "serviceCondition.Reason", "string") 75 | // case serviceCondition.Status == v1.ConditionTrue: 76 | // conditionSet.Manage(ss).MarkTrue(conditionType) 77 | // case serviceCondition.Status == v1.ConditionFalse: 78 | // conditionSet.Manage(ss).MarkFalse(conditionType, serviceCondition.Reason, serviceCondition.Message) 79 | // } 80 | *ss.Default = statusSpec 81 | } 82 | -------------------------------------------------------------------------------- /pkg/apis/elasticserving/v1/paddleservice_types.go: -------------------------------------------------------------------------------- 1 | /* 2 | 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | 17 | package v1 18 | 19 | import ( 20 | corev1 "k8s.io/api/core/v1" 21 | metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 22 | duckv1 "knative.dev/pkg/apis/duck/v1" 23 | ) 24 | 25 | // EDIT THIS FILE! THIS IS SCAFFOLDING FOR YOU TO OWN! 26 | // NOTE: json tags are required. Any new fields you add must have json tags for the fields to be serialized. 27 | 28 | // PaddleServiceSpec defines the desired state of PaddleService 29 | type PaddleServiceSpec struct { 30 | // INSERT ADDITIONAL SPEC FIELDS - desired state of cluster 31 | // Important: Run "make" to regenerate code after modifying this file 32 | 33 | // Version of the service 34 | RuntimeVersion string `json:"runtimeVersion,omitempty"` 35 | // Defaults to requests and limits of 1CPU, 2Gb MEM. 36 | Resources corev1.ResourceRequirements `json:"resources,omitempty"` 37 | // DefaultTag defines default PaddleService endpoints 38 | // +required 39 | Default *EndpointSpec `json:"default"` 40 | // CanaryTag defines an alternative PaddleService endpoints 41 | // +optional 42 | Canary *EndpointSpec `json:"canary,omitempty"` 43 | // CanaryTrafficPercent defines the percentage of traffic going to canary PaddleService endpoints 44 | // +optional 45 | CanaryTrafficPercent *int `json:"canaryTrafficPercent,omitempty"` 46 | // +optional 47 | Service ServiceSpec `json:"service,omitempty"` 48 | // Container's working directory. 49 | // If not specified, the container runtime's default will be used, which 50 | // might be configured in the container image. 51 | // Cannot be updated. 52 | // +optional 53 | WorkingDir string `json:"workingDir,omitempty"` 54 | // Pod volumes to mount into the container's filesystem. 55 | // Cannot be updated. 56 | // +optional 57 | VolumeMounts []corev1.VolumeMount `json:"volumeMounts,omitempty"` 58 | // List of volumes that can be mounted by containers belonging to the pod. 59 | // More info: https://kubernetes.io/docs/concepts/storage/volumes 60 | // +optional 61 | Volumes []corev1.Volume `json:"volumes,omitempty"` 62 | } 63 | 64 | // EndpointSpec defines the running containers 65 | type EndpointSpec struct { 66 | // +required 67 | ContainerImage string `json:"containerImage"` 68 | // +required 69 | Tag string `json:"tag"` 70 | // +required 71 | Port int32 `json:"port"` 72 | // +optional 73 | Argument string `json:"arg,omitempty"` 74 | } 75 | 76 | // ServiceSpec defines the configuration for Knative Service. 77 | type ServiceSpec struct { 78 | //+optional 79 | Autoscaler Autoscaler `json:"autoscaler,omitempty"` 80 | //+optional 81 | Metric AutoscalerMetric `json:"metric,omitempty"` 82 | //+optional 83 | Window string `json:"window,omitempty"` 84 | //+optional 85 | PanicWindow string `json:"panicWindow,omitempty"` 86 | //+optional 87 | PanicThreshold string `json:"panicThreshold,omitempty"` 88 | //+optional 89 | MinScale *int `json:"minScale,omitempty"` 90 | //+optional 91 | MaxScale int `json:"maxScale,omitempty"` 92 | //+optional 93 | Target int `json:"target,omitempty"` 94 | //+optional 95 | TargetUtilization string `json:"targetUtilization,omitempty"` 96 | } 97 | 98 | // Autoscaler defines the autoscaler class 99 | //+kubebuilder:validation:Enum=kpa.autoscaling.knative.dev;hpa.autoscaling.knative.dev 100 | type Autoscaler string 101 | 102 | // AutoscalerMetric defines the metric for the autoscaler 103 | //+kubebuilder:validation:Enum=concurrency;rps;cpu 104 | type AutoscalerMetric string 105 | 106 | // PaddleServiceStatus defines the observed state of PaddleService 107 | type PaddleServiceStatus struct { 108 | // INSERT ADDITIONAL STATUS FIELD - define observed state of cluster 109 | // Important: Run "make" to regenerate code after modifying this file 110 | 111 | duckv1.Status `json:",inline"` 112 | // URL of the PaddleService 113 | URL string `json:"url,omitempty"` 114 | // Statuses for the default endpoints of the PaddleService 115 | Default *StatusConfigurationSpec `json:"default,omitempty"` 116 | // Statuses for the canary endpoints of the PaddleService 117 | Canary *StatusConfigurationSpec `json:"canary,omitempty"` 118 | // Addressable URL for eventing 119 | Address *duckv1.Addressable `json:"address,omitempty"` 120 | 121 | // +optional 122 | // +kubebuilder:validation:Minimum=0 123 | Replicas int32 `json:"replicas,omitempty"` 124 | } 125 | 126 | 127 | 128 | // PaddleService is the Schema for the paddles API 129 | // +kubebuilder:object:root=true 130 | // +kubebuilder:subresource:status 131 | // +kubebuilder:subresource:scale:specpath=.spec.replicas,statuspath=.status.replicas 132 | // +genclient 133 | type PaddleService struct { 134 | metav1.TypeMeta `json:",inline"` 135 | metav1.ObjectMeta `json:"metadata,omitempty"` 136 | 137 | Spec PaddleServiceSpec `json:"spec,omitempty"` 138 | Status PaddleServiceStatus `json:"status,omitempty"` 139 | } 140 | 141 | // +kubebuilder:object:root=true 142 | 143 | // PaddleServiceList contains a list of PaddleService 144 | type PaddleServiceList struct { 145 | metav1.TypeMeta `json:",inline"` 146 | metav1.ListMeta `json:"metadata,omitempty"` 147 | Items []PaddleService `json:"items"` 148 | } 149 | 150 | // StatusConfigurationSpec describes the state of the configuration receiving traffic. 151 | type StatusConfigurationSpec struct { 152 | // Latest revision name that is in ready state 153 | Name string `json:"name,omitempty"` 154 | } 155 | 156 | func init() { 157 | SchemeBuilder.Register(&PaddleService{}, &PaddleServiceList{}) 158 | } 159 | -------------------------------------------------------------------------------- /pkg/apis/elasticserving/v1/zz_generated.deepcopy.go: -------------------------------------------------------------------------------- 1 | // +build !ignore_autogenerated 2 | 3 | /* 4 | 5 | 6 | Licensed under the Apache License, Version 2.0 (the "License"); 7 | you may not use this file except in compliance with the License. 8 | You may obtain a copy of the License at 9 | 10 | http://www.apache.org/licenses/LICENSE-2.0 11 | 12 | Unless required by applicable law or agreed to in writing, software 13 | distributed under the License is distributed on an "AS IS" BASIS, 14 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | See the License for the specific language governing permissions and 16 | limitations under the License. 17 | */ 18 | 19 | // Code generated by controller-gen. DO NOT EDIT. 20 | 21 | package v1 22 | 23 | import ( 24 | corev1 "k8s.io/api/core/v1" 25 | runtime "k8s.io/apimachinery/pkg/runtime" 26 | duckv1 "knative.dev/pkg/apis/duck/v1" 27 | ) 28 | 29 | // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. 30 | func (in *EndpointSpec) DeepCopyInto(out *EndpointSpec) { 31 | *out = *in 32 | } 33 | 34 | // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new EndpointSpec. 35 | func (in *EndpointSpec) DeepCopy() *EndpointSpec { 36 | if in == nil { 37 | return nil 38 | } 39 | out := new(EndpointSpec) 40 | in.DeepCopyInto(out) 41 | return out 42 | } 43 | 44 | // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. 45 | func (in *PaddleService) DeepCopyInto(out *PaddleService) { 46 | *out = *in 47 | out.TypeMeta = in.TypeMeta 48 | in.ObjectMeta.DeepCopyInto(&out.ObjectMeta) 49 | in.Spec.DeepCopyInto(&out.Spec) 50 | in.Status.DeepCopyInto(&out.Status) 51 | } 52 | 53 | // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new PaddleService. 54 | func (in *PaddleService) DeepCopy() *PaddleService { 55 | if in == nil { 56 | return nil 57 | } 58 | out := new(PaddleService) 59 | in.DeepCopyInto(out) 60 | return out 61 | } 62 | 63 | // DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object. 64 | func (in *PaddleService) DeepCopyObject() runtime.Object { 65 | if c := in.DeepCopy(); c != nil { 66 | return c 67 | } 68 | return nil 69 | } 70 | 71 | // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. 72 | func (in *PaddleServiceList) DeepCopyInto(out *PaddleServiceList) { 73 | *out = *in 74 | out.TypeMeta = in.TypeMeta 75 | in.ListMeta.DeepCopyInto(&out.ListMeta) 76 | if in.Items != nil { 77 | in, out := &in.Items, &out.Items 78 | *out = make([]PaddleService, len(*in)) 79 | for i := range *in { 80 | (*in)[i].DeepCopyInto(&(*out)[i]) 81 | } 82 | } 83 | } 84 | 85 | // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new PaddleServiceList. 86 | func (in *PaddleServiceList) DeepCopy() *PaddleServiceList { 87 | if in == nil { 88 | return nil 89 | } 90 | out := new(PaddleServiceList) 91 | in.DeepCopyInto(out) 92 | return out 93 | } 94 | 95 | // DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object. 96 | func (in *PaddleServiceList) DeepCopyObject() runtime.Object { 97 | if c := in.DeepCopy(); c != nil { 98 | return c 99 | } 100 | return nil 101 | } 102 | 103 | // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. 104 | func (in *PaddleServiceSpec) DeepCopyInto(out *PaddleServiceSpec) { 105 | *out = *in 106 | in.Resources.DeepCopyInto(&out.Resources) 107 | if in.Default != nil { 108 | in, out := &in.Default, &out.Default 109 | *out = new(EndpointSpec) 110 | **out = **in 111 | } 112 | if in.Canary != nil { 113 | in, out := &in.Canary, &out.Canary 114 | *out = new(EndpointSpec) 115 | **out = **in 116 | } 117 | if in.CanaryTrafficPercent != nil { 118 | in, out := &in.CanaryTrafficPercent, &out.CanaryTrafficPercent 119 | *out = new(int) 120 | **out = **in 121 | } 122 | in.Service.DeepCopyInto(&out.Service) 123 | if in.VolumeMounts != nil { 124 | in, out := &in.VolumeMounts, &out.VolumeMounts 125 | *out = make([]corev1.VolumeMount, len(*in)) 126 | for i := range *in { 127 | (*in)[i].DeepCopyInto(&(*out)[i]) 128 | } 129 | } 130 | if in.Volumes != nil { 131 | in, out := &in.Volumes, &out.Volumes 132 | *out = make([]corev1.Volume, len(*in)) 133 | for i := range *in { 134 | (*in)[i].DeepCopyInto(&(*out)[i]) 135 | } 136 | } 137 | } 138 | 139 | // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new PaddleServiceSpec. 140 | func (in *PaddleServiceSpec) DeepCopy() *PaddleServiceSpec { 141 | if in == nil { 142 | return nil 143 | } 144 | out := new(PaddleServiceSpec) 145 | in.DeepCopyInto(out) 146 | return out 147 | } 148 | 149 | // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. 150 | func (in *PaddleServiceStatus) DeepCopyInto(out *PaddleServiceStatus) { 151 | *out = *in 152 | in.Status.DeepCopyInto(&out.Status) 153 | if in.Default != nil { 154 | in, out := &in.Default, &out.Default 155 | *out = new(StatusConfigurationSpec) 156 | **out = **in 157 | } 158 | if in.Canary != nil { 159 | in, out := &in.Canary, &out.Canary 160 | *out = new(StatusConfigurationSpec) 161 | **out = **in 162 | } 163 | if in.Address != nil { 164 | in, out := &in.Address, &out.Address 165 | *out = new(duckv1.Addressable) 166 | (*in).DeepCopyInto(*out) 167 | } 168 | } 169 | 170 | // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new PaddleServiceStatus. 171 | func (in *PaddleServiceStatus) DeepCopy() *PaddleServiceStatus { 172 | if in == nil { 173 | return nil 174 | } 175 | out := new(PaddleServiceStatus) 176 | in.DeepCopyInto(out) 177 | return out 178 | } 179 | 180 | // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. 181 | func (in *ServiceSpec) DeepCopyInto(out *ServiceSpec) { 182 | *out = *in 183 | if in.MinScale != nil { 184 | in, out := &in.MinScale, &out.MinScale 185 | *out = new(int) 186 | **out = **in 187 | } 188 | } 189 | 190 | // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ServiceSpec. 191 | func (in *ServiceSpec) DeepCopy() *ServiceSpec { 192 | if in == nil { 193 | return nil 194 | } 195 | out := new(ServiceSpec) 196 | in.DeepCopyInto(out) 197 | return out 198 | } 199 | 200 | // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. 201 | func (in *StatusConfigurationSpec) DeepCopyInto(out *StatusConfigurationSpec) { 202 | *out = *in 203 | } 204 | 205 | // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new StatusConfigurationSpec. 206 | func (in *StatusConfigurationSpec) DeepCopy() *StatusConfigurationSpec { 207 | if in == nil { 208 | return nil 209 | } 210 | out := new(StatusConfigurationSpec) 211 | in.DeepCopyInto(out) 212 | return out 213 | } 214 | -------------------------------------------------------------------------------- /pkg/constants/constants.go: -------------------------------------------------------------------------------- 1 | package constants 2 | 3 | import ( 4 | "knative.dev/serving/pkg/apis/autoscaling" 5 | ) 6 | 7 | // PaddleService Key 8 | const ( 9 | PaddleService = "paddleService" 10 | PaddleServiceDefaultPodName = "http1" 11 | ) 12 | 13 | // PaddleService configuration name and namespce 14 | const ( 15 | PaddleServiceConfigName = "paddleservice-config" 16 | PaddleServiceConfigNamespace = "paddleservice-system" 17 | ) 18 | 19 | // PaddleService resource defaults 20 | var ( 21 | PaddleServiceDefaultCPU = "0.2" 22 | PaddleServiceDefaultMemory = "512Mi" 23 | PaddleServiceDefaultMinScale = 0 // 0 if scale-to-zero is desired 24 | PaddleServiceDefaultMaxScale = 0 // 0 means limitless 25 | PaddleServiceDefaultTimeout int64 = 300 26 | PaddleServiceDefaultScalingClass = autoscaling.KPA // kpa or hpa 27 | PaddleServiceDefaultScalingMetric = "concurrency" // concurrency, rps or cpu (hpa required) 28 | PaddleServiceDefaultScalingTarget = 100 29 | PaddleServiceDefaultTargetUtilizationPercentage = "70" 30 | PaddleServiceDefaultWindow = "60s" 31 | PaddleServiceDefaultPanicWindow = "10" // percentage of StableWindow 32 | PaddleServiceDefaultPanicThreshold = "200" 33 | PaddleServivceDefaultTrafficPercents = 50 34 | ) 35 | 36 | var ( 37 | ReadinessInitialDelaySeconds int32 = 60 38 | ReadinessFailureThreshold int32 = 3 39 | ReadinessPeriodSeconds int32 = 10 40 | ReadinessTimeoutSeconds int32 = 180 41 | SuccessThreshold int32 = 1 42 | LivenessInitialDelaySeconds int32 = 60 43 | LivenessFailureThreshold int32 = 3 44 | LivenessPeriodSeconds int32 = 10 45 | ) 46 | 47 | var ( 48 | ServiceAnnotationsList = []string{ 49 | autoscaling.MinScaleAnnotationKey, 50 | autoscaling.MaxScaleAnnotationKey, 51 | autoscaling.ClassAnnotationKey, 52 | autoscaling.MetricAnnotationKey, 53 | autoscaling.TargetAnnotationKey, 54 | autoscaling.TargetUtilizationPercentageKey, 55 | autoscaling.WindowAnnotationKey, 56 | autoscaling.PanicWindowPercentageAnnotationKey, 57 | autoscaling.PanicThresholdPercentageAnnotationKey, 58 | "kubectl.kubernetes.io/last-applied-configuration", 59 | } 60 | ) 61 | 62 | func DefaultServiceName(name string) string { 63 | return name + "-default" 64 | } 65 | 66 | func CanaryServiceName(name string) string { 67 | return name + "-canary" 68 | } 69 | -------------------------------------------------------------------------------- /pkg/controllers/doc.go: -------------------------------------------------------------------------------- 1 | package controllers 2 | -------------------------------------------------------------------------------- /pkg/controllers/elasticserving/paddleflow_serving_controller.go: -------------------------------------------------------------------------------- 1 | /* 2 | 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | 17 | package controllers 18 | 19 | import ( 20 | "context" 21 | 22 | elasticservingv1 "ElasticServing/pkg/apis/elasticserving/v1" 23 | "ElasticServing/pkg/controllers/elasticserving/reconcilers/knative" 24 | 25 | "github.com/go-logr/logr" 26 | apps "k8s.io/api/apps/v1" 27 | core "k8s.io/api/core/v1" 28 | "k8s.io/apimachinery/pkg/runtime" 29 | "k8s.io/client-go/tools/record" 30 | knservingv1 "knative.dev/serving/pkg/apis/serving/v1" 31 | ctrl "sigs.k8s.io/controller-runtime" 32 | "sigs.k8s.io/controller-runtime/pkg/client" 33 | "sigs.k8s.io/controller-runtime/pkg/reconcile" 34 | ) 35 | 36 | // PaddleServiceReconciler reconciles a PaddleService object 37 | type PaddleServiceReconciler struct { 38 | client.Client 39 | Log logr.Logger 40 | Scheme *runtime.Scheme 41 | Recorder record.EventRecorder 42 | } 43 | 44 | // +kubebuilder:rbac:groups=elasticserving.paddlepaddle.org,resources=paddleservices,verbs=get;list;watch;create;update;patch;delete 45 | // +kubebuilder:rbac:groups=elasticserving.paddlepaddle.org,resources=paddleservices/status,verbs=get;update;patch 46 | // +kubebuilder:rbac:groups=apps,resources=deployments,verbs=get;list;watch;create;update;delete 47 | // +kubebuilder:rbac:groups="",resources=events,verbs=create;patch 48 | 49 | func (r *PaddleServiceReconciler) Reconcile(req ctrl.Request) (ctrl.Result, error) { 50 | ctx := context.Background() 51 | log := r.Log.WithValues("paddlesvc", req.NamespacedName) 52 | 53 | // your logic here 54 | log.Info("reconciling paddlesvc") 55 | 56 | // Load the PaddleService by name 57 | var paddlesvc elasticservingv1.PaddleService 58 | if err := r.Get(ctx, req.NamespacedName, &paddlesvc); err != nil { 59 | log.Error(err, "unable to fetch PaddleService") 60 | // we'll ignore not-found errors, since they can't be fixed by an immediate 61 | // requeue (we'll need to wait for a new notification), and we can get them 62 | // on deleted requests. 63 | return ctrl.Result{}, client.IgnoreNotFound(err) 64 | } 65 | 66 | log.Info("Successfully fetching paddlesvc") 67 | 68 | serviceReconciler := knative.NewServiceReconciler(r.Client, r.Scheme, &paddlesvc) 69 | 70 | if err := serviceReconciler.Reconcile(&paddlesvc); err != nil { 71 | r.Log.Error(err, "Failed to finish knative reconcile") 72 | r.Recorder.Eventf(&paddlesvc, core.EventTypeWarning, "InternalError", err.Error()) 73 | return reconcile.Result{}, err 74 | } 75 | 76 | // Update status 77 | if err := r.Status().Update(ctx, &paddlesvc); err != nil { 78 | r.Recorder.Eventf(&paddlesvc, core.EventTypeWarning, "InternalError", err.Error()) 79 | return ctrl.Result{}, err 80 | } 81 | 82 | log.Info("resource status synced") 83 | 84 | return ctrl.Result{}, nil 85 | } 86 | 87 | func (r *PaddleServiceReconciler) SetupWithManager(mgr ctrl.Manager) error { 88 | 89 | return ctrl.NewControllerManagedBy(mgr). 90 | For(&elasticservingv1.PaddleService{}). 91 | Owns(&apps.Deployment{}). 92 | Owns(&knservingv1.Service{}). 93 | Complete(r) 94 | } 95 | -------------------------------------------------------------------------------- /pkg/controllers/elasticserving/reconcilers/knative/service_reconciler.go: -------------------------------------------------------------------------------- 1 | package knative 2 | 3 | import ( 4 | "ElasticServing/pkg/constants" 5 | "ElasticServing/pkg/controllers/elasticserving/resources/knative" 6 | "context" 7 | "time" 8 | 9 | "k8s.io/apimachinery/pkg/api/equality" 10 | "k8s.io/apimachinery/pkg/api/errors" 11 | metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 12 | "k8s.io/apimachinery/pkg/runtime" 13 | "k8s.io/apimachinery/pkg/types" 14 | knservingv1 "knative.dev/serving/pkg/apis/serving/v1" 15 | "sigs.k8s.io/controller-runtime/pkg/client" 16 | "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" 17 | logf "sigs.k8s.io/controller-runtime/pkg/log" 18 | 19 | elasticservingv1 "ElasticServing/pkg/apis/elasticserving/v1" 20 | ) 21 | 22 | var log = logf.Log.WithName("ServiceReconciler") 23 | 24 | type ServiceReconciler struct { 25 | client client.Client 26 | scheme *runtime.Scheme 27 | serviceBuilder *knative.ServiceBuilder 28 | } 29 | 30 | func NewServiceReconciler(client client.Client, scheme *runtime.Scheme, paddlesvc *elasticservingv1.PaddleService) *ServiceReconciler { 31 | return &ServiceReconciler{ 32 | client: client, 33 | scheme: scheme, 34 | serviceBuilder: knative.NewServiceBuilder(paddlesvc), 35 | } 36 | } 37 | 38 | // +kubebuilder:rbac:groups=serving.knative.dev,resources=services,verbs=get;list;watch;create;update;patch;delete 39 | // +kubebuilder:rbac:groups=serving.knative.dev,resources=services/status,verbs=get;update;patch 40 | // +kubebuilder:rbac:groups=serving.knative.dev,resources=revisions,verbs=get;list;watch;create;update;patch;delete 41 | // +kubebuilder:rbac:groups=serving.knative.dev,resources=revisions/status,verbs=get;update;patch 42 | // +kubebuilder:rbac:groups=rbac.authorization.k8s.io,resources=roles,verbs=get;list;create; 43 | // +kubebuilder:rbac:groups=rbac.authorization.k8s.io,resources=rolebindings,verbs=get;list;create; 44 | // +kubebuilder:rbac:groups="",resources=serviceaccounts,verbs=get;list;create; 45 | // +kubebuilder:rbac:groups="",resources=services,verbs=* 46 | // +kubebuilder:rbac:groups="",resources=pods,verbs=* 47 | // +kubebuilder:rbac:groups="",resources=events,verbs=create;patch 48 | // +kubebuilder:rbac:groups="",resources=configmaps,verbs=get;list;watch 49 | // +kubebuilder:rbac:groups=elasticserving.paddlepaddle.org,resources=paddleservices,verbs=get;list;watch;create;update;patch;delete 50 | // +kubebuilder:rbac:groups=elasticserving.paddlepaddle.org,resources=paddleservices/status,verbs=get;update;patch 51 | 52 | func (r *ServiceReconciler) Reconcile(paddlesvc *elasticservingv1.PaddleService) error { 53 | var service *knservingv1.Service 54 | var serviceWithCanary *knservingv1.Service 55 | var err error 56 | serviceName := paddlesvc.Name 57 | service, err = r.serviceBuilder.CreateService(serviceName, paddlesvc, false) 58 | if err != nil { 59 | return err 60 | } 61 | 62 | if service == nil { 63 | if err = r.finalizeService(serviceName, paddlesvc.Namespace); err != nil { 64 | return err 65 | } 66 | // TODO: Modify status 67 | // paddlesvc.Status.PropagateStatus(nil) 68 | return nil 69 | } 70 | 71 | if _, err := r.reconcileDefaultEndpoint(paddlesvc, service); err != nil { 72 | return err 73 | } else { 74 | // TODO: Modify status 75 | // paddlesvc.Status.PropagateStatus(status) 76 | } 77 | 78 | serviceWithCanary, err = r.serviceBuilder.CreateService(serviceName, paddlesvc, true) 79 | if err != nil { 80 | return err 81 | } 82 | if serviceWithCanary == nil { 83 | if err = r.finalizeCanaryEndpoint(serviceName, paddlesvc.Namespace, service.Spec); err != nil { 84 | return err 85 | } 86 | return nil 87 | } 88 | 89 | if _, err := r.reconcileCanaryEndpoint(paddlesvc, serviceWithCanary, service.Spec); err != nil { 90 | return err 91 | } else { 92 | // TODO: Modify status 93 | // paddlesvc.Status.PropagateStatus(status) 94 | } 95 | 96 | return nil 97 | } 98 | 99 | func (r *ServiceReconciler) finalizeService(serviceName, namespace string) error { 100 | existing := &knservingv1.Service{} 101 | if err := r.client.Get(context.TODO(), types.NamespacedName{Name: serviceName, Namespace: namespace}, existing); err != nil { 102 | if !errors.IsNotFound(err) { 103 | return err 104 | } 105 | } else { 106 | log.Info("Deleting Knative Service", "namespace", namespace, "name", serviceName) 107 | if err := r.client.Delete(context.TODO(), existing, client.PropagationPolicy(metav1.DeletePropagationBackground)); err != nil { 108 | if !errors.IsNotFound(err) { 109 | return err 110 | } 111 | } 112 | } 113 | return nil 114 | } 115 | 116 | func (r *ServiceReconciler) finalizeCanaryEndpoint(serviceName, namespace string, serviceSpec knservingv1.ServiceSpec) error { 117 | existing := &knservingv1.Service{} 118 | existingRevision := &knservingv1.Revision{} 119 | canaryServiceName := constants.CanaryServiceName(serviceName) 120 | if err := r.client.Get(context.TODO(), types.NamespacedName{Name: canaryServiceName, Namespace: namespace}, existingRevision); err != nil { 121 | if !errors.IsNotFound(err) { 122 | return err 123 | } 124 | } else { 125 | if err := r.client.Get(context.TODO(), types.NamespacedName{Name: serviceName, Namespace: namespace}, existing); err != nil { 126 | return err 127 | } 128 | 129 | existing.Spec = serviceSpec 130 | if err := r.client.Update(context.TODO(), existing); err != nil { 131 | return err 132 | } 133 | 134 | log.Info("Deleting Knative Canary Endpoint", "namespace", namespace, "name", serviceName) 135 | if err := r.client.Delete(context.TODO(), existingRevision, client.PropagationPolicy(metav1.DeletePropagationBackground)); err != nil { 136 | if !errors.IsNotFound(err) { 137 | return err 138 | } 139 | } 140 | } 141 | return nil 142 | } 143 | 144 | func (r *ServiceReconciler) reconcileDefaultEndpoint(paddlesvc *elasticservingv1.PaddleService, desired *knservingv1.Service) (*knservingv1.ServiceStatus, error) { 145 | // Set Paddlesvc as owner of desired service 146 | if err := controllerutil.SetControllerReference(paddlesvc, desired, r.scheme); err != nil { 147 | return nil, err 148 | } 149 | 150 | // Create service if does not exist 151 | existing := &knservingv1.Service{} 152 | err := r.client.Get(context.TODO(), types.NamespacedName{Name: desired.Name, Namespace: desired.Namespace}, existing) 153 | if err != nil { 154 | if errors.IsNotFound(err) { 155 | log.Info("Creating Knative Default Endpoint", "namespace", desired.Namespace, "name", desired.Name) 156 | err = r.client.Create(context.TODO(), desired) 157 | if err != nil { 158 | return nil, err 159 | } 160 | for { 161 | err = r.client.Get(context.TODO(), types.NamespacedName{Name: desired.Name, Namespace: desired.Namespace}, existing) 162 | if err == nil || !errors.IsNotFound(err) { 163 | break 164 | } 165 | time.Sleep(100 * time.Millisecond) 166 | } 167 | if err != nil { 168 | return nil, err 169 | } 170 | return &existing.Status, nil 171 | } 172 | return nil, err 173 | } 174 | 175 | existingRevision := &knservingv1.Revision{} 176 | err = r.client.Get(context.TODO(), types.NamespacedName{Name: constants.DefaultServiceName(desired.Name), Namespace: desired.Namespace}, existingRevision) 177 | if err != nil { 178 | return nil, err 179 | } 180 | desiredRevision, err := r.serviceBuilder.CreateRevision(constants.DefaultServiceName(desired.Name), paddlesvc, false) 181 | if err != nil { 182 | return nil, err 183 | } 184 | 185 | if knativeRevisionSemanticEquals(desiredRevision, existingRevision) { 186 | log.Info("No differences on revision found") 187 | return &existing.Status, nil 188 | } 189 | 190 | // The update process includes two steps. 191 | // 1. Delete the default endpoint(revision) 192 | // 2. Update knative service whose template should be updated to desired.Spec 193 | err = r.client.Delete(context.TODO(), existingRevision, client.PropagationPolicy(metav1.DeletePropagationBackground)) 194 | if err != nil { 195 | return nil, err 196 | } 197 | 198 | existing.Spec = desired.Spec 199 | 200 | if paddlesvc.Spec.Canary != nil { 201 | r.serviceBuilder.AddTrafficRoute(paddlesvc.Name, paddlesvc, existing) 202 | } 203 | 204 | err = r.client.Update(context.TODO(), existing) 205 | if err != nil { 206 | return nil, err 207 | } 208 | 209 | return &existing.Status, nil 210 | } 211 | 212 | func (r *ServiceReconciler) reconcileCanaryEndpoint(paddlesvc *elasticservingv1.PaddleService, desired *knservingv1.Service, serviceSpec knservingv1.ServiceSpec) (*knservingv1.ServiceStatus, error) { 213 | // Set Paddlesvc as owner of desired service 214 | if err := controllerutil.SetControllerReference(paddlesvc, desired, r.scheme); err != nil { 215 | return nil, err 216 | } 217 | 218 | existingRevision := &knservingv1.Revision{} 219 | existing := &knservingv1.Service{} 220 | 221 | //Create canary revision if does not exist 222 | err := r.client.Get(context.TODO(), types.NamespacedName{Name: constants.CanaryServiceName(desired.Name), Namespace: desired.Namespace}, existingRevision) 223 | if err != nil { 224 | if errors.IsNotFound(err) { 225 | log.Info("Creating Canary Revision", "namespace", desired.Namespace, "name", desired.Name) 226 | err = r.client.Get(context.TODO(), types.NamespacedName{Name: desired.Name, Namespace: desired.Namespace}, existing) 227 | if err != nil { 228 | return &desired.Status, err 229 | } 230 | 231 | if knativeSpecSemanticEquals(desired.Spec, existing.Spec) { 232 | return &existing.Status, nil 233 | } 234 | existing.Spec = desired.Spec 235 | 236 | err = r.client.Update(context.TODO(), existing) 237 | if err != nil { 238 | return nil, err 239 | } 240 | return &existing.Status, nil 241 | } 242 | return nil, err 243 | } 244 | 245 | err = r.client.Get(context.TODO(), types.NamespacedName{Name: constants.CanaryServiceName(desired.Name), Namespace: desired.Namespace}, existingRevision) 246 | if err != nil { 247 | return nil, err 248 | } 249 | 250 | desiredRevision, err := r.serviceBuilder.CreateRevision(constants.CanaryServiceName(desired.Name), paddlesvc, true) 251 | if err != nil { 252 | return nil, err 253 | } 254 | 255 | err = r.client.Get(context.TODO(), types.NamespacedName{Name: desired.Name, Namespace: desired.Namespace}, existing) 256 | if err != nil { 257 | return &desired.Status, err 258 | } 259 | 260 | if knativeRevisionSemanticEquals(desiredRevision, existingRevision) && 261 | knativeServiceTrafficSemanticEquals(desired, existing) { 262 | log.Info("No differences on revision found") 263 | return &existing.Status, nil 264 | } 265 | 266 | // The update process includes two steps. 267 | // 1. Delete the canary endpoint(revision) 268 | // 2. Update knative service whose template should be updated to desired.Spec 269 | err = r.finalizeCanaryEndpoint(paddlesvc.Name, paddlesvc.Namespace, serviceSpec) 270 | if err != nil { 271 | return nil, err 272 | } 273 | 274 | existing.Spec = desired.Spec 275 | 276 | err = r.client.Update(context.TODO(), existing) 277 | if err != nil { 278 | return nil, err 279 | } 280 | return &existing.Status, nil 281 | } 282 | 283 | func knativeSpecSemanticEquals(desired, existing interface{}) bool { 284 | return equality.Semantic.DeepDerivative(desired, existing) 285 | } 286 | 287 | func knativeServiceTrafficSemanticEquals(desired, existing *knservingv1.Service) bool { 288 | return equality.Semantic.DeepDerivative(desired.Spec.RouteSpec, existing.Spec.RouteSpec) 289 | } 290 | 291 | func knativeRevisionSemanticEquals(desired, existing *knservingv1.Revision) bool { 292 | return equality.Semantic.DeepDerivative(desired.ObjectMeta.Annotations, existing.ObjectMeta.Annotations) && 293 | equality.Semantic.DeepDerivative(desired.Spec, existing.Spec) 294 | } 295 | -------------------------------------------------------------------------------- /pkg/controllers/elasticserving/resources/knative/service.go: -------------------------------------------------------------------------------- 1 | package knative 2 | 3 | import ( 4 | "fmt" 5 | "strconv" 6 | 7 | "ElasticServing/pkg/constants" 8 | 9 | core "k8s.io/api/core/v1" 10 | "k8s.io/apimachinery/pkg/api/resource" 11 | metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 12 | "k8s.io/apimachinery/pkg/util/intstr" 13 | "knative.dev/serving/pkg/apis/autoscaling" 14 | knservingv1 "knative.dev/serving/pkg/apis/serving/v1" 15 | 16 | elasticservingv1 "ElasticServing/pkg/apis/elasticserving/v1" 17 | ) 18 | 19 | type EndpointConfig struct { 20 | Image string `json:"image,omitempty"` 21 | Port int32 `json:"port,omitempty"` 22 | Argument string `json:"arg,omitempty"` 23 | } 24 | 25 | type ServiceBuilder struct { 26 | defaultEndpointConfig *EndpointConfig 27 | canaryEndpointConfig *EndpointConfig 28 | } 29 | 30 | func NewServiceBuilder(paddlesvc *elasticservingv1.PaddleService) *ServiceBuilder { 31 | defaultEndpointConfig := &EndpointConfig{} 32 | defaultEndpointConfig.Image = paddlesvc.Spec.Default.ContainerImage + ":" + paddlesvc.Spec.Default.Tag 33 | defaultEndpointConfig.Port = paddlesvc.Spec.Default.Port 34 | defaultEndpointConfig.Argument = paddlesvc.Spec.Default.Argument 35 | if paddlesvc.Spec.Canary == nil { 36 | return &ServiceBuilder{ 37 | defaultEndpointConfig: defaultEndpointConfig, 38 | canaryEndpointConfig: nil, 39 | } 40 | } else { 41 | canaryEndpointConfig := &EndpointConfig{} 42 | canaryEndpointConfig.Image = paddlesvc.Spec.Canary.ContainerImage + ":" + paddlesvc.Spec.Canary.Tag 43 | canaryEndpointConfig.Port = paddlesvc.Spec.Canary.Port 44 | canaryEndpointConfig.Argument = paddlesvc.Spec.Canary.Argument 45 | return &ServiceBuilder{ 46 | defaultEndpointConfig: defaultEndpointConfig, 47 | canaryEndpointConfig: canaryEndpointConfig, 48 | } 49 | } 50 | } 51 | 52 | func (r *ServiceBuilder) CreateService(serviceName string, paddlesvc *elasticservingv1.PaddleService, isCanary bool) (*knservingv1.Service, error) { 53 | arg := r.defaultEndpointConfig.Argument 54 | containerImage := r.defaultEndpointConfig.Image 55 | containerPort := r.defaultEndpointConfig.Port 56 | 57 | if isCanary && r.canaryEndpointConfig == nil { 58 | return nil, nil 59 | } else if isCanary { 60 | arg = r.canaryEndpointConfig.Argument 61 | containerImage = r.canaryEndpointConfig.Image 62 | containerPort = r.canaryEndpointConfig.Port 63 | } 64 | 65 | metadata := paddlesvc.ObjectMeta 66 | paddlesvcSpec := paddlesvc.Spec 67 | 68 | resources, err := r.buildResources(metadata, paddlesvcSpec) 69 | if err != nil { 70 | return nil, err 71 | } 72 | 73 | annotations, err := r.buildAnnotations(metadata, paddlesvcSpec) 74 | if err != nil { 75 | return nil, err 76 | } 77 | concurrency := int64(paddlesvcSpec.Service.Target) 78 | 79 | command := []string{"/bin/bash", "-c"} 80 | args := []string{ 81 | arg, 82 | } 83 | 84 | revisionName := constants.DefaultServiceName(serviceName) 85 | if isCanary { 86 | revisionName = constants.CanaryServiceName(serviceName) 87 | } 88 | 89 | // Volume 90 | volumes := paddlesvc.Spec.Volumes 91 | // VolumeMount 92 | volumeMounts := paddlesvc.Spec.VolumeMounts 93 | 94 | service := &knservingv1.Service{ 95 | ObjectMeta: metav1.ObjectMeta{ 96 | Name: serviceName, 97 | Namespace: paddlesvc.Namespace, 98 | Labels: paddlesvc.Labels, 99 | }, 100 | Spec: knservingv1.ServiceSpec{ 101 | ConfigurationSpec: knservingv1.ConfigurationSpec{ 102 | Template: knservingv1.RevisionTemplateSpec{ 103 | ObjectMeta: metav1.ObjectMeta{ 104 | Name: revisionName, 105 | Labels: map[string]string{ 106 | "PaddleService": paddlesvc.Name, 107 | }, 108 | Annotations: annotations, 109 | }, 110 | Spec: knservingv1.RevisionSpec{ 111 | TimeoutSeconds: &constants.PaddleServiceDefaultTimeout, 112 | ContainerConcurrency: &concurrency, 113 | PodSpec: core.PodSpec{ 114 | Volumes: volumes, 115 | Containers: []core.Container{ 116 | { 117 | ImagePullPolicy: core.PullAlways, 118 | Name: paddlesvc.Spec.RuntimeVersion, 119 | Image: containerImage, 120 | Ports: []core.ContainerPort{ 121 | {ContainerPort: containerPort, 122 | Name: constants.PaddleServiceDefaultPodName, 123 | Protocol: core.ProtocolTCP, 124 | }, 125 | }, 126 | Command: command, 127 | Args: args, 128 | ReadinessProbe: &core.Probe{ 129 | SuccessThreshold: constants.SuccessThreshold, 130 | InitialDelaySeconds: constants.ReadinessInitialDelaySeconds, 131 | TimeoutSeconds: constants.ReadinessTimeoutSeconds, 132 | FailureThreshold: constants.ReadinessFailureThreshold, 133 | PeriodSeconds: constants.ReadinessPeriodSeconds, 134 | Handler: core.Handler{ 135 | TCPSocket: &core.TCPSocketAction{ 136 | Port: intstr.FromInt(0), 137 | }, 138 | }, 139 | }, 140 | LivenessProbe: &core.Probe{ 141 | InitialDelaySeconds: constants.LivenessInitialDelaySeconds, 142 | FailureThreshold: constants.LivenessFailureThreshold, 143 | PeriodSeconds: constants.LivenessPeriodSeconds, 144 | Handler: core.Handler{ 145 | TCPSocket: &core.TCPSocketAction{ 146 | Port: intstr.FromInt(0), 147 | }, 148 | }, 149 | }, 150 | Resources: resources, 151 | VolumeMounts: volumeMounts, 152 | }, 153 | }, 154 | }, 155 | }, 156 | }, 157 | }, 158 | }, 159 | } 160 | if isCanary { 161 | r.AddTrafficRoute(serviceName, paddlesvc, service) 162 | } 163 | return service, nil 164 | } 165 | 166 | func (r *ServiceBuilder) AddTrafficRoute(serviceName string, paddlesvc *elasticservingv1.PaddleService, service *knservingv1.Service) { 167 | canaryTrafficPercent := constants.PaddleServivceDefaultTrafficPercents 168 | setLastRevision := false 169 | if paddlesvc.Spec.CanaryTrafficPercent != nil { 170 | canaryTrafficPercent = *paddlesvc.Spec.CanaryTrafficPercent 171 | } 172 | 173 | defaultPercent := int64(100 - canaryTrafficPercent) 174 | canaryPercent := int64(canaryTrafficPercent) 175 | defaultTraffic := knservingv1.TrafficTarget{ 176 | RevisionName: constants.DefaultServiceName(serviceName), 177 | LatestRevision: &setLastRevision, 178 | Percent: &defaultPercent, 179 | } 180 | canaryTraffic := knservingv1.TrafficTarget{ 181 | RevisionName: constants.CanaryServiceName(serviceName), 182 | LatestRevision: &setLastRevision, 183 | Percent: &canaryPercent, 184 | } 185 | traffic := []knservingv1.TrafficTarget{ 186 | defaultTraffic, 187 | canaryTraffic, 188 | } 189 | 190 | service.Spec.RouteSpec.Traffic = traffic 191 | } 192 | 193 | func (r *ServiceBuilder) CreateRevision(serviceName string, paddlesvc *elasticservingv1.PaddleService, isCanary bool) (*knservingv1.Revision, error) { 194 | arg := r.defaultEndpointConfig.Argument 195 | containerImage := r.defaultEndpointConfig.Image 196 | containerPort := r.defaultEndpointConfig.Port 197 | if isCanary { 198 | arg = r.canaryEndpointConfig.Argument 199 | containerImage = r.canaryEndpointConfig.Image 200 | containerPort = r.canaryEndpointConfig.Port 201 | } 202 | metadata := paddlesvc.ObjectMeta 203 | paddlesvcSpec := paddlesvc.Spec 204 | resources, err := r.buildResources(metadata, paddlesvcSpec) 205 | if err != nil { 206 | return nil, err 207 | } 208 | 209 | annotations, err := r.buildAnnotations(metadata, paddlesvcSpec) 210 | if err != nil { 211 | return nil, err 212 | } 213 | concurrency := int64(paddlesvcSpec.Service.Target) 214 | 215 | command := []string{"/bin/bash", "-c"} 216 | args := []string{ 217 | arg, 218 | } 219 | 220 | // Volume 221 | volumes := paddlesvc.Spec.Volumes 222 | // VolumeMount 223 | volumeMounts := paddlesvc.Spec.VolumeMounts 224 | 225 | revision := knservingv1.Revision{ 226 | ObjectMeta: metav1.ObjectMeta{ 227 | Name: serviceName, 228 | Namespace: paddlesvc.Namespace, 229 | Labels: paddlesvc.Labels, 230 | Annotations: annotations, 231 | }, 232 | Spec: knservingv1.RevisionSpec{ 233 | TimeoutSeconds: &constants.PaddleServiceDefaultTimeout, 234 | ContainerConcurrency: &concurrency, 235 | PodSpec: core.PodSpec{ 236 | Volumes: volumes, 237 | Containers: []core.Container{ 238 | { 239 | ImagePullPolicy: core.PullAlways, 240 | Name: paddlesvc.Spec.RuntimeVersion, 241 | Image: containerImage, 242 | Ports: []core.ContainerPort{ 243 | {ContainerPort: containerPort, 244 | Name: constants.PaddleServiceDefaultPodName, 245 | Protocol: core.ProtocolTCP, 246 | }, 247 | }, 248 | Command: command, 249 | Args: args, 250 | ReadinessProbe: &core.Probe{ 251 | SuccessThreshold: constants.SuccessThreshold, 252 | InitialDelaySeconds: constants.ReadinessInitialDelaySeconds, 253 | TimeoutSeconds: constants.ReadinessTimeoutSeconds, 254 | FailureThreshold: constants.ReadinessFailureThreshold, 255 | PeriodSeconds: constants.ReadinessPeriodSeconds, 256 | Handler: core.Handler{ 257 | TCPSocket: &core.TCPSocketAction{ 258 | Port: intstr.FromInt(0), 259 | }, 260 | }, 261 | }, 262 | LivenessProbe: &core.Probe{ 263 | InitialDelaySeconds: constants.LivenessInitialDelaySeconds, 264 | FailureThreshold: constants.LivenessFailureThreshold, 265 | PeriodSeconds: constants.LivenessPeriodSeconds, 266 | Handler: core.Handler{ 267 | TCPSocket: &core.TCPSocketAction{ 268 | Port: intstr.FromInt(0), 269 | }, 270 | }, 271 | }, 272 | Resources: resources, 273 | VolumeMounts: volumeMounts, 274 | }, 275 | }, 276 | }, 277 | }, 278 | } 279 | 280 | return &revision, nil 281 | } 282 | 283 | func (r *ServiceBuilder) buildAnnotations(metadata metav1.ObjectMeta, paddlesvcSpec elasticservingv1.PaddleServiceSpec) (map[string]string, error) { 284 | annotations := make(map[string]string) 285 | 286 | // Autoscaler 287 | if paddlesvcSpec.Service.Autoscaler == "" { 288 | annotations[autoscaling.ClassAnnotationKey] = constants.PaddleServiceDefaultScalingClass 289 | } else { 290 | annotations[autoscaling.ClassAnnotationKey] = string(paddlesvcSpec.Service.Autoscaler) 291 | } 292 | 293 | // Metric 294 | if paddlesvcSpec.Service.Metric == "" { 295 | annotations[autoscaling.MetricAnnotationKey] = constants.PaddleServiceDefaultScalingMetric 296 | } else { 297 | annotations[autoscaling.MetricAnnotationKey] = string(paddlesvcSpec.Service.Metric) 298 | } 299 | 300 | // Target 301 | if paddlesvcSpec.Service.Target == 0 { 302 | annotations[autoscaling.TargetAnnotationKey] = fmt.Sprint(constants.PaddleServiceDefaultScalingTarget) 303 | } else { 304 | annotations[autoscaling.TargetAnnotationKey] = strconv.Itoa(paddlesvcSpec.Service.Target) 305 | } 306 | 307 | // Target utilization 308 | if paddlesvcSpec.Service.TargetUtilization == "" { 309 | annotations[autoscaling.TargetUtilizationPercentageKey] = constants.PaddleServiceDefaultTargetUtilizationPercentage 310 | } else { 311 | annotations[autoscaling.TargetUtilizationPercentageKey] = paddlesvcSpec.Service.TargetUtilization 312 | } 313 | 314 | // Window 315 | if paddlesvcSpec.Service.Window == "" { 316 | annotations[autoscaling.WindowAnnotationKey] = constants.PaddleServiceDefaultWindow 317 | } else { 318 | annotations[autoscaling.WindowAnnotationKey] = paddlesvcSpec.Service.Window 319 | } 320 | 321 | // Panic window 322 | if paddlesvcSpec.Service.PanicWindow == "" { 323 | annotations[autoscaling.PanicWindowPercentageAnnotationKey] = constants.PaddleServiceDefaultPanicWindow 324 | } else { 325 | annotations[autoscaling.PanicWindowPercentageAnnotationKey] = paddlesvcSpec.Service.PanicWindow 326 | } 327 | 328 | // Panic threshold 329 | if paddlesvcSpec.Service.PanicThreshold == "" { 330 | annotations[autoscaling.PanicThresholdPercentageAnnotationKey] = constants.PaddleServiceDefaultPanicThreshold 331 | } else { 332 | annotations[autoscaling.PanicThresholdPercentageAnnotationKey] = paddlesvcSpec.Service.PanicThreshold 333 | } 334 | 335 | // Min replicas 336 | if paddlesvcSpec.Service.MinScale == nil { 337 | annotations[autoscaling.MinScaleAnnotationKey] = fmt.Sprint(constants.PaddleServiceDefaultMinScale) 338 | } else { 339 | annotations[autoscaling.MinScaleAnnotationKey] = strconv.Itoa(*paddlesvcSpec.Service.MinScale) 340 | } 341 | 342 | // Max replicas 343 | if paddlesvcSpec.Service.MaxScale == 0 { 344 | annotations[autoscaling.MaxScaleAnnotationKey] = fmt.Sprint(constants.PaddleServiceDefaultMaxScale) 345 | } else { 346 | annotations[autoscaling.MaxScaleAnnotationKey] = strconv.Itoa(paddlesvcSpec.Service.MaxScale) 347 | } 348 | 349 | return annotations, nil 350 | } 351 | 352 | func (r *ServiceBuilder) buildResources(metadata metav1.ObjectMeta, paddlesvcSpec elasticservingv1.PaddleServiceSpec) (core.ResourceRequirements, error) { 353 | defaultResources := core.ResourceList{ 354 | core.ResourceCPU: resource.MustParse(constants.PaddleServiceDefaultCPU), 355 | core.ResourceMemory: resource.MustParse(constants.PaddleServiceDefaultMemory), 356 | } 357 | 358 | if paddlesvcSpec.Resources.Requests == nil { 359 | paddlesvcSpec.Resources.Requests = defaultResources 360 | } else { 361 | for name, value := range defaultResources { 362 | if _, ok := paddlesvcSpec.Resources.Requests[name]; !ok { 363 | paddlesvcSpec.Resources.Requests[name] = value 364 | } 365 | } 366 | } 367 | 368 | if paddlesvcSpec.Resources.Limits == nil { 369 | paddlesvcSpec.Resources.Limits = defaultResources 370 | } else { 371 | for name, value := range defaultResources { 372 | if _, ok := paddlesvcSpec.Resources.Limits[name]; !ok { 373 | paddlesvcSpec.Resources.Limits[name] = value 374 | } 375 | } 376 | } 377 | 378 | return paddlesvcSpec.Resources, nil 379 | } 380 | -------------------------------------------------------------------------------- /pkg/controllers/elasticserving/resources/knative/service_test.go: -------------------------------------------------------------------------------- 1 | package knative 2 | 3 | import ( 4 | elasticservingv1 "ElasticServing/pkg/apis/elasticserving/v1" 5 | "ElasticServing/pkg/constants" 6 | "testing" 7 | 8 | "github.com/google/go-cmp/cmp" 9 | core "k8s.io/api/core/v1" 10 | "k8s.io/apimachinery/pkg/api/resource" 11 | metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 12 | "k8s.io/apimachinery/pkg/util/intstr" 13 | knservingv1 "knative.dev/serving/pkg/apis/serving/v1" 14 | ) 15 | 16 | const ( 17 | image = "hub.baidubce.com/paddlepaddle/serving" 18 | port = 9292 19 | tag = "latest" 20 | actualTestServiceName = "paddlesvc" 21 | paddleServiceDefaultCPU = "0.1" 22 | paddleServiceDefaultMemory = "128Mi" 23 | paddleServiceName = "paddlesvc" 24 | paddleServiceNamespace = "default" 25 | runtimeVersion = "latest" 26 | ) 27 | 28 | var ( 29 | command = []string{"/bin/bash", "-c"} 30 | args = []string{""} 31 | containerConcurrency = int64(0) 32 | timeoutSeconds = int64(300) 33 | 34 | readinessInitialDelaySeconds = 60 35 | readinessFailureThreshold = 3 36 | readinessPeriodSeconds = 10 37 | readinessTimeoutSeconds = 180 38 | livenessInitialDelaySeconds = 60 39 | livenessFailureThreshold = 3 40 | livenessPeriodSeconds = 10 41 | 42 | defaultTrafficPercent int64 = 50 43 | 44 | setLastRevision bool = false 45 | ) 46 | 47 | var defaultResources = core.ResourceList{ 48 | core.ResourceCPU: resource.MustParse(paddleServiceDefaultCPU), 49 | core.ResourceMemory: resource.MustParse(paddleServiceDefaultMemory), 50 | } 51 | 52 | var annotations = map[string]string{ 53 | "autoscaling.knative.dev/class": "kpa.autoscaling.knative.dev", 54 | "autoscaling.knative.dev/maxScale": "0", 55 | "autoscaling.knative.dev/metric": "concurrency", 56 | "autoscaling.knative.dev/minScale": "0", 57 | "autoscaling.knative.dev/panicThresholdPercentage": "200", 58 | "autoscaling.knative.dev/panicWindowPercentage": "10", 59 | "autoscaling.knative.dev/target": "100", 60 | "autoscaling.knative.dev/targetUtilizationPercentage": "70", 61 | "autoscaling.knative.dev/window": "60s", 62 | } 63 | 64 | var paddlesvc = elasticservingv1.PaddleService{ 65 | ObjectMeta: metav1.ObjectMeta{ 66 | Name: paddleServiceName, 67 | Namespace: paddleServiceNamespace, 68 | }, 69 | Spec: elasticservingv1.PaddleServiceSpec{ 70 | RuntimeVersion: runtimeVersion, 71 | Resources: core.ResourceRequirements{ 72 | Requests: defaultResources, 73 | Limits: defaultResources, 74 | }, 75 | Default: &elasticservingv1.EndpointSpec{ 76 | ContainerImage: image, 77 | Tag: tag, 78 | Port: port, 79 | }, 80 | }, 81 | } 82 | 83 | var paddlesvcCanaryWithSameConfig = elasticservingv1.PaddleService{ 84 | ObjectMeta: metav1.ObjectMeta{ 85 | Name: paddleServiceName, 86 | Namespace: paddleServiceNamespace, 87 | }, 88 | Spec: elasticservingv1.PaddleServiceSpec{ 89 | RuntimeVersion: runtimeVersion, 90 | Resources: core.ResourceRequirements{ 91 | Requests: defaultResources, 92 | Limits: defaultResources, 93 | }, 94 | Default: &elasticservingv1.EndpointSpec{ 95 | ContainerImage: image, 96 | Tag: tag, 97 | Port: port, 98 | }, 99 | Canary: &elasticservingv1.EndpointSpec{ 100 | ContainerImage: image, 101 | Tag: tag, 102 | Port: port, 103 | }, 104 | }, 105 | } 106 | 107 | var defaultService = &knservingv1.Service{ 108 | ObjectMeta: metav1.ObjectMeta{ 109 | Name: paddlesvc.Name, 110 | Namespace: paddleServiceNamespace, 111 | }, 112 | Spec: knservingv1.ServiceSpec{ 113 | ConfigurationSpec: knservingv1.ConfigurationSpec{ 114 | Template: knservingv1.RevisionTemplateSpec{ 115 | ObjectMeta: metav1.ObjectMeta{ 116 | Name: paddlesvc.Name + "-default", 117 | Labels: map[string]string{ 118 | "PaddleService": paddlesvc.Name, 119 | }, 120 | Annotations: annotations, 121 | }, 122 | Spec: knservingv1.RevisionSpec{ 123 | ContainerConcurrency: &containerConcurrency, 124 | TimeoutSeconds: &timeoutSeconds, 125 | PodSpec: core.PodSpec{ 126 | Containers: []core.Container{ 127 | { 128 | ImagePullPolicy: core.PullAlways, 129 | Name: paddlesvc.Spec.RuntimeVersion, 130 | Image: image + ":" + tag, 131 | Ports: []core.ContainerPort{ 132 | {ContainerPort: port, Name: "http1", Protocol: "TCP"}, 133 | }, 134 | Command: command, 135 | Args: args, 136 | ReadinessProbe: &core.Probe{ 137 | InitialDelaySeconds: int32(readinessInitialDelaySeconds), 138 | FailureThreshold: int32(readinessFailureThreshold), 139 | PeriodSeconds: int32(readinessPeriodSeconds), 140 | TimeoutSeconds: int32(readinessTimeoutSeconds), 141 | SuccessThreshold: int32(1), 142 | Handler: core.Handler{ 143 | TCPSocket: &core.TCPSocketAction{ 144 | Port: intstr.FromInt(0), 145 | }, 146 | }, 147 | }, 148 | LivenessProbe: &core.Probe{ 149 | InitialDelaySeconds: int32(livenessInitialDelaySeconds), 150 | FailureThreshold: int32(livenessFailureThreshold), 151 | PeriodSeconds: int32(livenessPeriodSeconds), 152 | Handler: core.Handler{ 153 | TCPSocket: &core.TCPSocketAction{ 154 | Port: intstr.FromInt(0), 155 | }, 156 | }, 157 | }, 158 | Resources: paddlesvc.Spec.Resources, 159 | }, 160 | }, 161 | }, 162 | }, 163 | }, 164 | }, 165 | }, 166 | } 167 | 168 | var canaryServiceWithSameConfig = &knservingv1.Service{ 169 | ObjectMeta: metav1.ObjectMeta{ 170 | Name: paddlesvc.Name, 171 | Namespace: paddleServiceNamespace, 172 | }, 173 | Spec: knservingv1.ServiceSpec{ 174 | ConfigurationSpec: knservingv1.ConfigurationSpec{ 175 | Template: knservingv1.RevisionTemplateSpec{ 176 | ObjectMeta: metav1.ObjectMeta{ 177 | Name: paddlesvc.Name + "-canary", 178 | Labels: map[string]string{ 179 | "PaddleService": paddlesvc.Name, 180 | }, 181 | Annotations: annotations, 182 | }, 183 | Spec: knservingv1.RevisionSpec{ 184 | ContainerConcurrency: &containerConcurrency, 185 | TimeoutSeconds: &timeoutSeconds, 186 | PodSpec: core.PodSpec{ 187 | Containers: []core.Container{ 188 | { 189 | ImagePullPolicy: core.PullAlways, 190 | Name: paddlesvc.Spec.RuntimeVersion, 191 | Image: image + ":" + tag, 192 | Ports: []core.ContainerPort{ 193 | {ContainerPort: port, Name: "http1", Protocol: "TCP"}, 194 | }, 195 | Command: command, 196 | Args: args, 197 | ReadinessProbe: &core.Probe{ 198 | InitialDelaySeconds: int32(readinessInitialDelaySeconds), 199 | FailureThreshold: int32(readinessFailureThreshold), 200 | PeriodSeconds: int32(readinessPeriodSeconds), 201 | TimeoutSeconds: int32(readinessTimeoutSeconds), 202 | SuccessThreshold: int32(1), 203 | Handler: core.Handler{ 204 | TCPSocket: &core.TCPSocketAction{ 205 | Port: intstr.FromInt(0), 206 | }, 207 | }, 208 | }, 209 | LivenessProbe: &core.Probe{ 210 | InitialDelaySeconds: int32(livenessInitialDelaySeconds), 211 | FailureThreshold: int32(livenessFailureThreshold), 212 | PeriodSeconds: int32(livenessPeriodSeconds), 213 | Handler: core.Handler{ 214 | TCPSocket: &core.TCPSocketAction{ 215 | Port: intstr.FromInt(0), 216 | }, 217 | }, 218 | }, 219 | Resources: paddlesvc.Spec.Resources, 220 | }, 221 | }, 222 | }, 223 | }, 224 | }, 225 | }, 226 | RouteSpec: knservingv1.RouteSpec{ 227 | Traffic: []knservingv1.TrafficTarget{ 228 | { 229 | RevisionName: paddlesvc.Name + "-default", 230 | LatestRevision: &setLastRevision, 231 | Percent: &defaultTrafficPercent, 232 | }, 233 | { 234 | RevisionName: paddlesvc.Name + "-canary", 235 | LatestRevision: &setLastRevision, 236 | Percent: &defaultTrafficPercent, 237 | }, 238 | }, 239 | }, 240 | }, 241 | } 242 | 243 | func TestDefaultPaddleServiceToKnativeService(t *testing.T) { 244 | scenarios := map[string]struct { 245 | paddleService elasticservingv1.PaddleService 246 | expectedDefault *knservingv1.Service 247 | }{ 248 | "Default Test": { 249 | paddleService: paddlesvc, 250 | expectedDefault: defaultService, 251 | }, 252 | } 253 | serviceBuilder := NewServiceBuilder(&paddlesvc) 254 | 255 | for name, scenario := range scenarios { 256 | actualDefaultService, err := serviceBuilder.CreateService(actualTestServiceName, &paddlesvc, false) 257 | if err != nil { 258 | t.Errorf("Test %q unexpected error %s", name, err.Error()) 259 | } 260 | if diff := cmp.Diff(scenario.expectedDefault, actualDefaultService); diff != "" { 261 | t.Errorf("Test %q unexpected canary service (-want +got): %v", name, diff) 262 | } 263 | } 264 | } 265 | 266 | func TestCanaryPaddleServiceToKnativeService(t *testing.T) { 267 | scenarios := map[string]struct { 268 | paddleService elasticservingv1.PaddleService 269 | expectedCanary *knservingv1.Service 270 | }{ 271 | "Canary Test": { 272 | paddleService: paddlesvc, 273 | expectedCanary: canaryServiceWithSameConfig, 274 | }, 275 | } 276 | serviceBuilder := NewServiceBuilder(&paddlesvcCanaryWithSameConfig) 277 | 278 | for name, scenario := range scenarios { 279 | actualCanaryService, err := serviceBuilder.CreateService(actualTestServiceName, &paddlesvc, true) 280 | if err != nil { 281 | t.Errorf("Test %q unexpected error %s", name, err.Error()) 282 | } 283 | if diff := cmp.Diff(scenario.expectedCanary, actualCanaryService); diff != "" { 284 | t.Errorf("Test %q unexpected canary service (-want +got): %v", name, diff) 285 | } 286 | } 287 | } 288 | 289 | var defaultEndpoint = &knservingv1.Revision{ 290 | ObjectMeta: metav1.ObjectMeta{ 291 | Name: paddlesvc.Name, 292 | Namespace: paddlesvc.Namespace, 293 | Labels: paddlesvc.Labels, 294 | Annotations: annotations, 295 | }, 296 | Spec: knservingv1.RevisionSpec{ 297 | TimeoutSeconds: &constants.PaddleServiceDefaultTimeout, 298 | ContainerConcurrency: &containerConcurrency, 299 | PodSpec: core.PodSpec{ 300 | Containers: []core.Container{ 301 | { 302 | ImagePullPolicy: core.PullAlways, 303 | Name: paddlesvc.Spec.RuntimeVersion, 304 | Image: image + ":" + tag, 305 | Ports: []core.ContainerPort{ 306 | {ContainerPort: port, 307 | Name: constants.PaddleServiceDefaultPodName, 308 | Protocol: core.ProtocolTCP, 309 | }, 310 | }, 311 | Command: command, 312 | Args: args, 313 | ReadinessProbe: &core.Probe{ 314 | SuccessThreshold: constants.SuccessThreshold, 315 | InitialDelaySeconds: constants.ReadinessInitialDelaySeconds, 316 | TimeoutSeconds: constants.ReadinessTimeoutSeconds, 317 | FailureThreshold: constants.ReadinessFailureThreshold, 318 | PeriodSeconds: constants.ReadinessPeriodSeconds, 319 | Handler: core.Handler{ 320 | TCPSocket: &core.TCPSocketAction{ 321 | Port: intstr.FromInt(0), 322 | }, 323 | }, 324 | }, 325 | LivenessProbe: &core.Probe{ 326 | InitialDelaySeconds: constants.LivenessInitialDelaySeconds, 327 | FailureThreshold: constants.LivenessFailureThreshold, 328 | PeriodSeconds: constants.LivenessPeriodSeconds, 329 | Handler: core.Handler{ 330 | TCPSocket: &core.TCPSocketAction{ 331 | Port: intstr.FromInt(0), 332 | }, 333 | }, 334 | }, 335 | Resources: paddlesvc.Spec.Resources, 336 | }, 337 | }, 338 | }, 339 | }, 340 | } 341 | 342 | func TestPaddleEndpointToKnativeRevision(t *testing.T) { 343 | scenarios := map[string]struct { 344 | paddleService elasticservingv1.PaddleService 345 | expectedRevision *knservingv1.Revision 346 | }{ 347 | "Default Test": { 348 | paddleService: paddlesvc, 349 | expectedRevision: defaultEndpoint, 350 | }, 351 | } 352 | serviceBuilder := NewServiceBuilder(&paddlesvc) 353 | 354 | for name, scenario := range scenarios { 355 | actualDefaultEndpoint, err := serviceBuilder.CreateRevision(actualTestServiceName, &paddlesvc, false) 356 | if err != nil { 357 | t.Errorf("Test %q unexpected error %s", name, err.Error()) 358 | } 359 | if diff := cmp.Diff(scenario.expectedRevision, actualDefaultEndpoint); diff != "" { 360 | t.Errorf("Test %q unexpected canary service (-want +got): %v", name, diff) 361 | } 362 | } 363 | } 364 | -------------------------------------------------------------------------------- /pkg/controllers/elasticserving/suite_test.go: -------------------------------------------------------------------------------- 1 | /* 2 | 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | 17 | package controllers 18 | 19 | import ( 20 | "context" 21 | "math/rand" 22 | "path/filepath" 23 | "testing" 24 | "time" 25 | 26 | . "github.com/onsi/ginkgo" 27 | . "github.com/onsi/gomega" 28 | 29 | "github.com/gogo/protobuf/proto" 30 | "github.com/onsi/gomega/gexec" 31 | core "k8s.io/api/core/v1" 32 | metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 33 | "k8s.io/client-go/kubernetes/scheme" 34 | clientgoscheme "k8s.io/client-go/kubernetes/scheme" 35 | "k8s.io/client-go/rest" 36 | ctrl "sigs.k8s.io/controller-runtime" 37 | "sigs.k8s.io/controller-runtime/pkg/client" 38 | "sigs.k8s.io/controller-runtime/pkg/envtest" 39 | "sigs.k8s.io/controller-runtime/pkg/envtest/printer" 40 | logf "sigs.k8s.io/controller-runtime/pkg/log" 41 | "sigs.k8s.io/controller-runtime/pkg/log/zap" 42 | 43 | elasticservingv1 "ElasticServing/pkg/apis/elasticserving/v1" 44 | // +kubebuilder:scaffold:imports 45 | ) 46 | 47 | // These tests use Ginkgo (BDD-style Go testing framework). Refer to 48 | // http://onsi.github.io/ginkgo/ to learn more about Ginkgo. 49 | 50 | var cfg *rest.Config 51 | var k8sClient client.Client 52 | var testEnv *envtest.Environment 53 | 54 | func TestAPIs(t *testing.T) { 55 | RegisterFailHandler(Fail) 56 | 57 | RunSpecsWithDefaultAndCustomReporters(t, 58 | "Controller Suite", 59 | []Reporter{printer.NewlineReporter{}}) 60 | } 61 | 62 | var _ = BeforeSuite(func(done Done) { 63 | logf.SetLogger(zap.LoggerTo(GinkgoWriter, true)) 64 | 65 | By("bootstrapping test environment") 66 | testEnv = &envtest.Environment{ 67 | CRDDirectoryPaths: []string{filepath.Join("../../..", "config", "crd", "bases")}, 68 | UseExistingCluster: proto.Bool(true), 69 | } 70 | 71 | var err error 72 | cfg, err = testEnv.Start() 73 | Expect(err).ToNot(HaveOccurred()) 74 | Expect(cfg).ToNot(BeNil()) 75 | 76 | clientgoscheme.AddToScheme(scheme.Scheme) 77 | 78 | err = elasticservingv1.AddToScheme(scheme.Scheme) 79 | Expect(err).NotTo(HaveOccurred()) 80 | 81 | Expect(err).NotTo(HaveOccurred()) 82 | 83 | // +kubebuilder:scaffold:scheme 84 | 85 | k8sClient, err = client.New(cfg, client.Options{Scheme: scheme.Scheme}) 86 | Expect(err).ToNot(HaveOccurred()) 87 | Expect(k8sClient).ToNot(BeNil()) 88 | 89 | close(done) 90 | }, 60) 91 | 92 | var _ = AfterSuite(func() { 93 | By("tearing down the test environment") 94 | gexec.KillAndWait(5 * time.Second) 95 | err := testEnv.Stop() 96 | Expect(err).ToNot(HaveOccurred()) 97 | }) 98 | 99 | // SetupTest will set up a testing environment. 100 | // This includes: 101 | // * creating a Namespace to be used during the test 102 | // * starting the 'PaddleService Reconciler' 103 | // * stopping the 'PaddleService Reconciler" after the test ends 104 | // Call this function at the start of each of your tests. 105 | func SetupNs(ctx context.Context) *core.Namespace { 106 | ns := &core.Namespace{} 107 | var stopCh chan struct{} 108 | BeforeEach(func() { 109 | stopCh = make(chan struct{}) 110 | 111 | *ns = core.Namespace{ 112 | ObjectMeta: metav1.ObjectMeta{Name: "testns-" + randStringRunes(5)}, 113 | } 114 | 115 | err := k8sClient.Create(ctx, ns) 116 | Expect(err).NotTo(HaveOccurred(), "failed to create test namespace") 117 | 118 | mgr, err := ctrl.NewManager(cfg, ctrl.Options{Scheme: scheme.Scheme}) 119 | Expect(err).NotTo(HaveOccurred(), "failed to create manager") 120 | 121 | controller := &PaddleServiceReconciler{ 122 | Client: mgr.GetClient(), 123 | Log: logf.Log, 124 | Scheme: mgr.GetScheme(), 125 | Recorder: mgr.GetEventRecorderFor("paddlesvc-controller"), 126 | } 127 | err = controller.SetupWithManager(mgr) 128 | Expect(err).NotTo(HaveOccurred(), "failed to setup controller") 129 | 130 | go func() { 131 | err := mgr.Start(stopCh) 132 | Expect(err).NotTo(HaveOccurred(), "failed to start manager") 133 | }() 134 | }) 135 | 136 | AfterEach(func() { 137 | 138 | k8sClient.Delete(ctx, ns) 139 | close(stopCh) 140 | 141 | }) 142 | return ns 143 | } 144 | 145 | func init() { 146 | rand.Seed(time.Now().UnixNano()) 147 | } 148 | 149 | var letterRunes = []rune("abcdefghijklmnopqrstuvwxyz1234567890") 150 | 151 | func randStringRunes(n int) string { 152 | b := make([]rune, n) 153 | for i := range b { 154 | b[i] = letterRunes[rand.Intn(len(letterRunes))] 155 | } 156 | return string(b) 157 | } 158 | --------------------------------------------------------------------------------