├── Stable-Diffusion-Vertex ├── pipeline │ ├── images │ │ ├── .gitignore │ │ ├── one-stage.png │ │ ├── architecture.png │ │ └── Vertex-AI-Screenshot.png │ ├── requirements.txt │ ├── Dockerfile_kohya │ ├── pipeline_conf.yml │ └── README.md ├── images │ ├── FileStore.png │ ├── hpo_status.png │ ├── workbench_status.png │ ├── workbench_executor.png │ ├── custom_training_status.png │ ├── workbench_custom_image.png │ └── workbench_image_status.png ├── Diffusers │ ├── metadata.jsonl │ ├── vertex-config.yaml │ ├── cloud-build-config.yaml │ ├── vertex-config-nfs.yaml │ ├── Dockerfile │ └── cloud-cli.sh ├── Kohya-lora │ ├── vertex-config.yaml │ ├── cloud-build-config-kohya.yaml │ ├── vertex-config-nfs.yaml │ ├── cloud-cli.sh │ └── Dockerfile_kohya ├── hpo │ ├── kohya-lora │ │ ├── cloud-cli.sh │ │ ├── cloud-build-hpo.yaml │ │ ├── vertex-ai-config-hpo.yaml │ │ ├── train_network.patch │ │ └── Dockerfile │ └── diffusers │ │ ├── cloud-cli.sh │ │ ├── cloud-build-diffusers.yaml │ │ ├── vertex-config-diffusers.yaml │ │ ├── train_dreambooth.patch │ │ └── Dockerfile └── Workbench │ └── diffusers_nbexecutor.ipynb ├── Stable-Diffusion-UI-GKE ├── .DS_Store ├── images │ ├── sd-webui-gke.png │ └── sd-webui-external-gke.png ├── templates │ ├── nfs_pvc.yaml │ ├── nfs_pv.yaml │ ├── service.yaml │ ├── gcs_pvc.yaml │ ├── gcs_pv.yaml │ ├── hpa-timeshare.yaml │ ├── hpa.yaml │ ├── deployment.yaml │ ├── deployment-timeshare.yaml │ └── deployment-gcs.yaml └── docker │ ├── Dockerfile-miniconda │ ├── Dockerfile │ └── config.json ├── Stable-Diffusion-UI-Agones ├── .DS_Store ├── agones │ ├── fail_allocation_result.json │ ├── allocation_result.json │ ├── nfs_pv.yaml │ ├── nfs_pvc.yaml │ ├── agones_install.sh │ ├── gcs_pvc.yaml │ ├── gcs_pv.yaml │ ├── fleet.yaml │ ├── fleet_autoscale.yaml │ ├── fleet_pvc.yaml │ ├── fleet_gcs.yaml │ └── values.yaml ├── nginx │ ├── coffee-clock.jpg │ ├── Dockerfile │ ├── deployment.yaml │ ├── nginx.conf │ ├── default.conf │ └── sd.lua ├── images │ ├── sd-agones-arch.png │ └── sd-webui-agones.png ├── sd-webui │ ├── extensions │ │ └── simple-files │ │ │ ├── README.md │ │ │ └── CONTRIBUTING.md │ ├── start.sh │ ├── user-watch.py │ ├── Dockerfile │ └── config.json ├── cloud-function │ ├── requirements.txt │ └── main.py ├── optimizated-init │ ├── attach-disk │ │ ├── Dockerfile │ │ └── attach-disk.sh │ ├── README.MD │ ├── deployment-init-disk.yaml │ └── daemonset.yaml ├── ingress-iap │ ├── managed-cert.yaml │ ├── backendconfig.yaml │ ├── service.yaml │ └── ingress.yaml └── agones-sidecar │ ├── go.mod │ ├── Dockerfile │ └── go.sum ├── .gitignore ├── terraform-provision-infra ├── modules │ ├── nonagones │ │ ├── cloud-build │ │ │ ├── outputs.tf │ │ │ ├── variables.tf │ │ │ └── main.tf │ │ ├── k8s-res │ │ │ └── variables.tf │ │ └── gcp-res │ │ │ ├── outputs.tf │ │ │ ├── variables.tf │ │ │ └── main.tf │ └── agones │ │ ├── cloud-build │ │ ├── outputs.tf │ │ ├── variables.tf │ │ └── main.tf │ │ ├── helm-agones │ │ ├── variables.tf │ │ └── main.tf │ │ ├── k8s-res │ │ └── variables.tf │ │ └── gcp-res │ │ ├── variables.tf │ │ └── outputs.tf ├── main.tf ├── README_zh.md └── README.md ├── CONTRIBUTING.md └── README_cn.md /Stable-Diffusion-Vertex/pipeline/images/.gitignore: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /Stable-Diffusion-UI-GKE/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/stable-diffusion-on-gcp/HEAD/Stable-Diffusion-UI-GKE/.DS_Store -------------------------------------------------------------------------------- /Stable-Diffusion-Vertex/pipeline/requirements.txt: -------------------------------------------------------------------------------- 1 | kfp==1.8.20 2 | google-cloud-aiplatform==1.24.1 3 | google_cloud_pipeline_components==1.0.42 4 | -------------------------------------------------------------------------------- /Stable-Diffusion-UI-Agones/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/stable-diffusion-on-gcp/HEAD/Stable-Diffusion-UI-Agones/.DS_Store -------------------------------------------------------------------------------- /Stable-Diffusion-Vertex/images/FileStore.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/stable-diffusion-on-gcp/HEAD/Stable-Diffusion-Vertex/images/FileStore.png -------------------------------------------------------------------------------- /Stable-Diffusion-UI-GKE/images/sd-webui-gke.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/stable-diffusion-on-gcp/HEAD/Stable-Diffusion-UI-GKE/images/sd-webui-gke.png -------------------------------------------------------------------------------- /Stable-Diffusion-Vertex/images/hpo_status.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/stable-diffusion-on-gcp/HEAD/Stable-Diffusion-Vertex/images/hpo_status.png -------------------------------------------------------------------------------- /Stable-Diffusion-UI-Agones/agones/fail_allocation_result.json: -------------------------------------------------------------------------------- 1 | { 2 | "code": 8, 3 | "message": "there is no available GameServer to allocate", 4 | "details": [] 5 | } -------------------------------------------------------------------------------- /Stable-Diffusion-UI-Agones/nginx/coffee-clock.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/stable-diffusion-on-gcp/HEAD/Stable-Diffusion-UI-Agones/nginx/coffee-clock.jpg -------------------------------------------------------------------------------- /Stable-Diffusion-UI-Agones/images/sd-agones-arch.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/stable-diffusion-on-gcp/HEAD/Stable-Diffusion-UI-Agones/images/sd-agones-arch.png -------------------------------------------------------------------------------- /Stable-Diffusion-Vertex/images/workbench_status.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/stable-diffusion-on-gcp/HEAD/Stable-Diffusion-Vertex/images/workbench_status.png -------------------------------------------------------------------------------- /Stable-Diffusion-UI-Agones/images/sd-webui-agones.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/stable-diffusion-on-gcp/HEAD/Stable-Diffusion-UI-Agones/images/sd-webui-agones.png -------------------------------------------------------------------------------- /Stable-Diffusion-Vertex/images/workbench_executor.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/stable-diffusion-on-gcp/HEAD/Stable-Diffusion-Vertex/images/workbench_executor.png -------------------------------------------------------------------------------- /Stable-Diffusion-Vertex/pipeline/images/one-stage.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/stable-diffusion-on-gcp/HEAD/Stable-Diffusion-Vertex/pipeline/images/one-stage.png -------------------------------------------------------------------------------- /Stable-Diffusion-UI-GKE/images/sd-webui-external-gke.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/stable-diffusion-on-gcp/HEAD/Stable-Diffusion-UI-GKE/images/sd-webui-external-gke.png -------------------------------------------------------------------------------- /Stable-Diffusion-Vertex/images/custom_training_status.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/stable-diffusion-on-gcp/HEAD/Stable-Diffusion-Vertex/images/custom_training_status.png -------------------------------------------------------------------------------- /Stable-Diffusion-Vertex/images/workbench_custom_image.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/stable-diffusion-on-gcp/HEAD/Stable-Diffusion-Vertex/images/workbench_custom_image.png -------------------------------------------------------------------------------- /Stable-Diffusion-Vertex/images/workbench_image_status.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/stable-diffusion-on-gcp/HEAD/Stable-Diffusion-Vertex/images/workbench_image_status.png -------------------------------------------------------------------------------- /Stable-Diffusion-Vertex/pipeline/images/architecture.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/stable-diffusion-on-gcp/HEAD/Stable-Diffusion-Vertex/pipeline/images/architecture.png -------------------------------------------------------------------------------- /Stable-Diffusion-Vertex/pipeline/images/Vertex-AI-Screenshot.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/stable-diffusion-on-gcp/HEAD/Stable-Diffusion-Vertex/pipeline/images/Vertex-AI-Screenshot.png -------------------------------------------------------------------------------- /Stable-Diffusion-UI-Agones/sd-webui/extensions/simple-files/README.md: -------------------------------------------------------------------------------- 1 | # Simple File Manager for Stable Diffusion WebUI 2 | 3 | This extension provides users the ability to browse file system and download/upload files. 4 | 5 | ## Install 6 | 7 | Put this directory under `stable-diffusion-webui/extensions`. 8 | -------------------------------------------------------------------------------- /Stable-Diffusion-UI-Agones/agones/allocation_result.json: -------------------------------------------------------------------------------- 1 | { 2 | "gameServerName": "simple-game-server-l6pxm-dfwfq", 3 | "ports": [ 4 | { 5 | "name": "default", 6 | "port": 7665 7 | }, 8 | { 9 | "name": "sd", 10 | "port": 7741 11 | } 12 | ], 13 | "address": "34.69.206.234", 14 | "nodeName": "gke-sdnginx-default-pool-c5d35e3a-lhr1", 15 | "source": "local" 16 | } -------------------------------------------------------------------------------- /Stable-Diffusion-Vertex/Diffusers/metadata.jsonl: -------------------------------------------------------------------------------- 1 | {"file_name": "alvan-nee-9M0tSjb-cpA-unsplash.jpeg", "text": "a photo of sks dog"} 2 | {"file_name": "alvan-nee-bQaAJCbNq3g-unsplash.jpeg", "text": "a photo of sks dog"} 3 | {"file_name": "alvan-nee-brFsZ7qszSY-unsplash.jpeg", "text": "a photo of sks dog"} 4 | {"file_name": "alvan-nee-eoqnr8ikwFE-unsplash.jpeg", "text": "a photo of sks dog"} 5 | {"file_name": "alvan-nee-Id1DBHv4fbg-unsplash.jpeg", "text": "a photo of sks dog"} 6 | -------------------------------------------------------------------------------- /Stable-Diffusion-UI-Agones/cloud-function/requirements.txt: -------------------------------------------------------------------------------- 1 | # Copyright 2023 Google LLC 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | redis -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | build-.json 2 | script.sh 3 | Stable-Diffusion-UI-Novel/model.ckpt 4 | Stable-Diffusion-UI-Novel/sd_dreambooth_extension 5 | Training/** 6 | !Training/*.py 7 | !Training/*.sh 8 | !Training/*.ipynb 9 | !Training/*.md 10 | !Training/*.yaml 11 | !Training/Dockerfile 12 | Preprocess/** 13 | !Preprocess/*.py 14 | !Preprocess/*.sh 15 | !Preprocess/*.ipynb 16 | !Preprocess/*.md 17 | !Preprocess/*.yaml 18 | workdir/** 19 | 20 | # Local .terraform directories 21 | **/.terraform/* 22 | *.terraform.* 23 | 24 | # .tfstate files 25 | *.tfstate 26 | *.tfstate.* 27 | 28 | # terraform version lock files 29 | terraform-provision-infra/.terraform.lock.hcl 30 | terraform-provision-infra/kubernetes-sample/.terraform.lock.hcl 31 | 32 | # others 33 | .vscode/ 34 | -------------------------------------------------------------------------------- /Stable-Diffusion-UI-Agones/optimizated-init/attach-disk/Dockerfile: -------------------------------------------------------------------------------- 1 | # Copyright 2023 Google LLC 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | FROM gcr.io/google.com/cloudsdktool/cloud-sdk:slim 16 | 17 | COPY attach-disk.sh /attach-disk.sh 18 | -------------------------------------------------------------------------------- /Stable-Diffusion-UI-Agones/ingress-iap/managed-cert.yaml: -------------------------------------------------------------------------------- 1 | # Copyright 2023 Google LLC 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | apiVersion: networking.gke.io/v1 15 | kind: ManagedCertificate 16 | metadata: 17 | name: managed-cert 18 | spec: 19 | domains: 20 | - DOMAIN_NAME1 -------------------------------------------------------------------------------- /terraform-provision-infra/modules/nonagones/cloud-build/outputs.tf: -------------------------------------------------------------------------------- 1 | # Copyright 2023 Google LLC 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | output "webui_image" { 16 | value = "${var.artifact_registry}/${var.sd_webui_image.tag}" 17 | description = "webui image url" 18 | } -------------------------------------------------------------------------------- /Stable-Diffusion-UI-Agones/sd-webui/start.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Copyright 2023 Google LLC All Rights Reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | python3 user-watch.py & 17 | python3 webui.py --listen --xformers --opt-sdp-attention --enable-insecure-extension-access --no-half-vae --disable-safe-unpickle --api -------------------------------------------------------------------------------- /Stable-Diffusion-UI-Agones/agones/nfs_pv.yaml: -------------------------------------------------------------------------------- 1 | # Copyright 2023 Google LLC 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | apiVersion: v1 15 | kind: PersistentVolume 16 | metadata: 17 | name: filestore-nfs-pv 18 | spec: 19 | capacity: 20 | storage: 1Ti 21 | accessModes: 22 | - ReadWriteMany 23 | nfs: 24 | path: /vol1 25 | server: -------------------------------------------------------------------------------- /Stable-Diffusion-UI-Agones/agones-sidecar/go.mod: -------------------------------------------------------------------------------- 1 | module agones.dev/agones/examples/simple-game-server 2 | 3 | go 1.19 4 | 5 | require agones.dev/agones v1.31.0 6 | 7 | require ( 8 | github.com/golang/protobuf v1.5.3 // indirect 9 | github.com/grpc-ecosystem/grpc-gateway/v2 v2.15.2 // indirect 10 | github.com/pkg/errors v0.9.1 // indirect 11 | golang.org/x/net v0.17.0 // indirect 12 | golang.org/x/sys v0.13.0 // indirect 13 | golang.org/x/text v0.13.0 // indirect 14 | google.golang.org/genproto v0.0.0-20230822172742-b8732ec3820d // indirect 15 | google.golang.org/genproto/googleapis/api v0.0.0-20230822172742-b8732ec3820d // indirect 16 | google.golang.org/genproto/googleapis/rpc v0.0.0-20230822172742-b8732ec3820d // indirect 17 | google.golang.org/grpc v1.59.0 // indirect 18 | google.golang.org/protobuf v1.31.0 // indirect 19 | ) 20 | -------------------------------------------------------------------------------- /Stable-Diffusion-UI-Agones/agones/nfs_pvc.yaml: -------------------------------------------------------------------------------- 1 | # Copyright 2023 Google LLC 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | kind: PersistentVolumeClaim 15 | apiVersion: v1 16 | metadata: 17 | name: vol1 18 | spec: 19 | accessModes: 20 | - ReadWriteMany 21 | storageClassName: "" 22 | volumeName: filestore-nfs-pv 23 | resources: 24 | requests: 25 | storage: 1Ti 26 | -------------------------------------------------------------------------------- /Stable-Diffusion-UI-Agones/ingress-iap/backendconfig.yaml: -------------------------------------------------------------------------------- 1 | # Copyright 2023 Google LLC 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | apiVersion: cloud.google.com/v1 15 | kind: BackendConfig 16 | metadata: 17 | name: config-default 18 | namespace: default 19 | spec: 20 | timeoutSec: 900 21 | iap: 22 | enabled: true 23 | oauthclientCredentials: 24 | secretName: iap-secret -------------------------------------------------------------------------------- /Stable-Diffusion-UI-Agones/agones/agones_install.sh: -------------------------------------------------------------------------------- 1 | # Copyright 2023 Google LLC 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # Provision agones component 15 | helm repo add agones https://agones.dev/chart/stable 16 | 17 | helm repo update 18 | 19 | helm install sd-agones-release --namespace agones-system -f values.yaml agones/agones 20 | 21 | 22 | # Install Fleet and Fleet scale policy 23 | 24 | -------------------------------------------------------------------------------- /Stable-Diffusion-UI-GKE/templates/nfs_pvc.yaml: -------------------------------------------------------------------------------- 1 | # Copyright 2023 Google LLC 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | kind: PersistentVolumeClaim 16 | apiVersion: v1 17 | metadata: 18 | name: $fileshare_name # e.g. sdpvc 19 | spec: 20 | accessModes: 21 | - ReadWriteMany 22 | storageClassName: "" 23 | volumeName: filestore-nfs-pv 24 | resources: 25 | requests: 26 | storage: 1Ti 27 | -------------------------------------------------------------------------------- /Stable-Diffusion-Vertex/Kohya-lora/vertex-config.yaml: -------------------------------------------------------------------------------- 1 | # Copyright 2023 Google LLC 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | workerPoolSpecs: 16 | machineSpec: 17 | machineType: n1-standard-8 18 | acceleratorType: NVIDIA_TESLA_T4 19 | acceleratorCount: 2 20 | replicaCount: 1 21 | containerSpec: 22 | imageUri: us-central1-docker.pkg.dev/PROJECT_ID/ARTIFACT_REGISTRY_ID/sd-training:kohya -------------------------------------------------------------------------------- /Stable-Diffusion-Vertex/Diffusers/vertex-config.yaml: -------------------------------------------------------------------------------- 1 | # Copyright 2023 Google LLC 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | workerPoolSpecs: 16 | machineSpec: 17 | machineType: n1-standard-8 18 | acceleratorType: NVIDIA_TESLA_V100 19 | acceleratorCount: 1 20 | replicaCount: 1 21 | containerSpec: 22 | imageUri: us-central1-docker.pkg.dev/PROJECT_ID/ARTIFACT_REGISTRY_ID/sd-training:diffusers -------------------------------------------------------------------------------- /Stable-Diffusion-UI-GKE/templates/nfs_pv.yaml: -------------------------------------------------------------------------------- 1 | # Copyright 2023 Google LLC 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | apiVersion: v1 16 | kind: PersistentVolume 17 | metadata: 18 | name: filestore-nfs-pv 19 | spec: 20 | capacity: 21 | storage: 1Ti 22 | accessModes: 23 | - ReadWriteMany 24 | nfs: 25 | path: $fileshare_path # e.g. /sdpvc 26 | server: $filestore_instance_ip # e.g. 172.168.1.1 27 | -------------------------------------------------------------------------------- /Stable-Diffusion-UI-GKE/templates/service.yaml: -------------------------------------------------------------------------------- 1 | # Copyright 2023 Google LLC 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | apiVersion: v1 16 | kind: Service 17 | metadata: 18 | name: stable-diffusion-service 19 | labels: 20 | app: stable-diffusion 21 | spec: 22 | ports: 23 | - protocol: TCP 24 | port: 7860 25 | targetPort: 7860 26 | selector: 27 | app: stable-diffusion 28 | type: LoadBalancer -------------------------------------------------------------------------------- /Stable-Diffusion-UI-Agones/agones/gcs_pvc.yaml: -------------------------------------------------------------------------------- 1 | # Copyright 2023 Google LLC 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | apiVersion: v1 16 | kind: PersistentVolumeClaim 17 | metadata: 18 | name: gcs-fuse-csi-static-pvc 19 | namespace: default 20 | spec: 21 | accessModes: 22 | - ReadWriteMany 23 | resources: 24 | requests: 25 | storage: 5Ti 26 | volumeName: gcs-fuse-csi-pv 27 | storageClassName: example-storage-class -------------------------------------------------------------------------------- /Stable-Diffusion-UI-GKE/templates/gcs_pvc.yaml: -------------------------------------------------------------------------------- 1 | # Copyright 2023 Google LLC 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | apiVersion: v1 16 | kind: PersistentVolumeClaim 17 | metadata: 18 | name: gcs-fuse-csi-static-pvc 19 | namespace: default 20 | spec: 21 | accessModes: 22 | - ReadWriteMany 23 | resources: 24 | requests: 25 | storage: 5Ti 26 | volumeName: gcs-fuse-csi-pv 27 | storageClassName: example-storage-class -------------------------------------------------------------------------------- /Stable-Diffusion-Vertex/hpo/kohya-lora/cloud-cli.sh: -------------------------------------------------------------------------------- 1 | # Copyright 2023 Google LLC 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | # cloud build image 16 | gcloud builds submit --config cloud-build-config-hpo.yaml . 17 | 18 | #creat hp-tuning job 19 | gcloud ai hp-tuning-jobs create \ 20 | --region=us-central1 \ 21 | --display-name=sd-kohya-hpo \ 22 | --max-trial-count=5 \ 23 | --parallel-trial-count=2 \ 24 | --config=vertex-config-hpo.yaml -------------------------------------------------------------------------------- /Stable-Diffusion-Vertex/hpo/diffusers/cloud-cli.sh: -------------------------------------------------------------------------------- 1 | # Copyright 2023 Google LLC 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | # cloud build image 16 | gcloud builds submit --config cloud-build-config-diffusers.yaml . 17 | 18 | #creat hp-tuning job 19 | gcloud ai hp-tuning-jobs create \ 20 | --region=us-central1 \ 21 | --display-name=sd-diffusers-hpo \ 22 | --max-trial-count=5 \ 23 | --parallel-trial-count=2 \ 24 | --config=vertex-config-diffusers.yaml -------------------------------------------------------------------------------- /Stable-Diffusion-Vertex/Diffusers/cloud-build-config.yaml: -------------------------------------------------------------------------------- 1 | # Copyright 2023 Google LLC 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | steps: 16 | - name: 'gcr.io/cloud-builders/docker' 17 | args: [ 'build', '-t', 'us-central1-docker.pkg.dev/PROJECT_ID/ARTIFACT_REGISTRY_ID/sd-training:diffusers', '.' ] 18 | - name: 'gcr.io/cloud-builders/docker' 19 | args: ['push', 'us-central1-docker.pkg.dev/PROJECT_ID/ARTIFACT_REGISTRY_ID/sd-training:diffusers'] 20 | options: 21 | machineType: 'N1_HIGHCPU_8' 22 | diskSizeGb: '200' -------------------------------------------------------------------------------- /Stable-Diffusion-UI-Agones/nginx/Dockerfile: -------------------------------------------------------------------------------- 1 | # Copyright 2023 Google LLC 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | FROM openresty/openresty:1.21.4.1-0-focal 15 | 16 | RUN opm get ledgetech/lua-resty-http 17 | RUN opm get openresty/lua-resty-redis 18 | RUN opm get openresty/lua-resty-dns 19 | 20 | COPY nginx.conf /usr/local/openresty/nginx/conf/nginx.conf 21 | COPY default.conf /etc/nginx/conf.d/default.conf 22 | COPY sd.lua /usr/local/openresty/nginx/sd.lua 23 | COPY coffee-clock.jpg /usr/local/openresty/nginx/html/images/coffee-clock.jpg -------------------------------------------------------------------------------- /terraform-provision-infra/modules/nonagones/cloud-build/variables.tf: -------------------------------------------------------------------------------- 1 | # Copyright 2023 Google LLC 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | variable "sd_webui_image" { 16 | description = "Region to set for gcp resource deploy." 17 | type = object({ 18 | path = string 19 | tag = string 20 | }) 21 | default = { 22 | path = "../Stable-Diffusion-UI-GKE/docker/" 23 | tag = "sd-webui:tf" 24 | } 25 | } 26 | variable "artifact_registry" { 27 | type = string 28 | description = "artifact registry URL." 29 | } -------------------------------------------------------------------------------- /Stable-Diffusion-Vertex/Kohya-lora/cloud-build-config-kohya.yaml: -------------------------------------------------------------------------------- 1 | # Copyright 2023 Google LLC 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | steps: 16 | - name: 'gcr.io/cloud-builders/docker' 17 | args: [ 'build', '-f', 'Dockerfile_kohya', '-t', 'us-central1-docker.pkg.dev/PROJECT_ID/ARTIFACT_REGISTRY_ID/sd-training:kohya', '.' ] 18 | - name: 'gcr.io/cloud-builders/docker' 19 | args: ['push', 'us-central1-docker.pkg.dev/PROJECT_ID/ARTIFACT_REGISTRY_ID/sd-training:kohya'] 20 | options: 21 | machineType: 'N1_HIGHCPU_8' 22 | diskSizeGb: '200' -------------------------------------------------------------------------------- /Stable-Diffusion-Vertex/hpo/kohya-lora/cloud-build-hpo.yaml: -------------------------------------------------------------------------------- 1 | # Copyright 2023 Google LLC 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | steps: 16 | - name: 'gcr.io/cloud-builders/docker' 17 | args: [ 'build', '-f', 'Dockerfile', '-t', 'us-central1-docker.pkg.dev/PROJECT_ID/ARTIFACT_REGISTRY_NAME/sd-training:kohya-hpo', '.' ] 18 | - name: 'gcr.io/cloud-builders/docker' 19 | args: ['push', 'us-central1-docker.pkg.dev/PROJECT_ID/ARTIFACT_REGISTRY_NAME/sd-training:kohya-hpo'] 20 | options: 21 | machineType: 'N1_HIGHCPU_8' 22 | diskSizeGb: '200' -------------------------------------------------------------------------------- /Stable-Diffusion-UI-Agones/agones/gcs_pv.yaml: -------------------------------------------------------------------------------- 1 | # Copyright 2023 Google LLC 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | apiVersion: v1 16 | kind: PersistentVolume 17 | metadata: 18 | name: gcs-fuse-csi-pv 19 | spec: 20 | accessModes: 21 | - ReadWriteMany 22 | capacity: 23 | storage: 5Ti 24 | storageClassName: example-storage-class 25 | claimRef: 26 | namespace: default 27 | name: gcs-fuse-csi-static-pvc 28 | mountOptions: 29 | - implicit-dirs 30 | csi: 31 | driver: gcsfuse.csi.storage.gke.io 32 | volumeHandle: $gcs_bucket_name -------------------------------------------------------------------------------- /Stable-Diffusion-UI-GKE/templates/gcs_pv.yaml: -------------------------------------------------------------------------------- 1 | # Copyright 2023 Google LLC 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | apiVersion: v1 16 | kind: PersistentVolume 17 | metadata: 18 | name: gcs-fuse-csi-pv 19 | spec: 20 | accessModes: 21 | - ReadWriteMany 22 | capacity: 23 | storage: 5Ti 24 | storageClassName: example-storage-class 25 | claimRef: 26 | namespace: default 27 | name: gcs-fuse-csi-static-pvc 28 | mountOptions: 29 | - implicit-dirs 30 | csi: 31 | driver: gcsfuse.csi.storage.gke.io 32 | volumeHandle: $gcs_bucket_name -------------------------------------------------------------------------------- /Stable-Diffusion-Vertex/hpo/diffusers/cloud-build-diffusers.yaml: -------------------------------------------------------------------------------- 1 | # Copyright 2023 Google LLC 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | steps: 16 | - name: 'gcr.io/cloud-builders/docker' 17 | args: [ 'build', '-f', 'Dockerfile', '-t', 'us-central1-docker.pkg.dev/PROJECT_ID/ARTIFACT_REGISTRY_NAME/sd-training:diffusers-hpo', '.' ] 18 | - name: 'gcr.io/cloud-builders/docker' 19 | args: ['push', 'us-central1-docker.pkg.dev/PROJECT_ID/ARTIFACT_REGISTRY_NAME/sd-training:diffusers-hpo'] 20 | options: 21 | machineType: 'N1_HIGHCPU_8' 22 | diskSizeGb: '200' 23 | -------------------------------------------------------------------------------- /terraform-provision-infra/modules/agones/cloud-build/outputs.tf: -------------------------------------------------------------------------------- 1 | # Copyright 2023 Google LLC 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | output "webui_image" { 16 | value = "${var.artifact_registry}/${var.sd_webui_image.tag}" 17 | description = "webui image url" 18 | } 19 | output "nginx_image" { 20 | value = "${var.artifact_registry}/${var.nginx_image.tag}" 21 | description = "nginx image url" 22 | } 23 | output "game_server_image" { 24 | value = "${var.artifact_registry}/${var.game_server_image.tag}" 25 | description = "nginx image url" 26 | } -------------------------------------------------------------------------------- /Stable-Diffusion-Vertex/Kohya-lora/vertex-config-nfs.yaml: -------------------------------------------------------------------------------- 1 | # Copyright 2023 Google LLC 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | network: projects/PROJECT_NUMBER/global/networks/VPC_NETWORK_NAME 16 | workerPoolSpecs: 17 | machineSpec: 18 | machineType: n1-standard-8 19 | acceleratorType: NVIDIA_TESLA_V100 20 | acceleratorCount: 1 21 | replicaCount: 1 22 | containerSpec: 23 | imageUri: us-central1-docker.pkg.dev/PROJECT_ID/ARTIFACT_REPOSITORY/sd-training:kohya 24 | nfsMounts: 25 | - server: NFS_IP_ADDRESS 26 | path: /vol1 27 | mountPoint: /mnt/nfs/model_repo -------------------------------------------------------------------------------- /terraform-provision-infra/modules/nonagones/cloud-build/main.tf: -------------------------------------------------------------------------------- 1 | # Copyright 2023 Google LLC 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | terraform { 16 | required_providers { 17 | null = { 18 | source = "hashicorp/null" 19 | version = "3.2.1" 20 | } 21 | } 22 | } 23 | resource "null_resource" "build_webui_image" { 24 | provisioner "local-exec" { 25 | command = "gcloud builds submit --machine-type=e2-highcpu-32 --disk-size=100 --region=us-central1 -t ${var.artifact_registry}/${var.sd_webui_image.tag}" 26 | working_dir = var.sd_webui_image.path 27 | } 28 | } -------------------------------------------------------------------------------- /Stable-Diffusion-Vertex/Diffusers/vertex-config-nfs.yaml: -------------------------------------------------------------------------------- 1 | # Copyright 2023 Google LLC 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | network: projects/PROJECT_NUMBER/global/networks/VPC_NETWORK_NAME 16 | workerPoolSpecs: 17 | machineSpec: 18 | machineType: n1-standard-8 19 | acceleratorType: NVIDIA_TESLA_V100 20 | acceleratorCount: 1 21 | replicaCount: 1 22 | containerSpec: 23 | imageUri: us-central1-docker.pkg.dev/PROJECT_ID/ARTIFACT_REPOSITORY/sd-training:diffusers 24 | nfsMounts: 25 | - server: NFS_IP_ADDRESS 26 | path: /vol1 27 | mountPoint: /mnt/nfs/model_repo -------------------------------------------------------------------------------- /Stable-Diffusion-UI-Agones/ingress-iap/service.yaml: -------------------------------------------------------------------------------- 1 | # Copyright 2023 Google LLC 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | apiVersion: v1 15 | kind: Service 16 | metadata: 17 | name: stable-diffusion-nginx-service 18 | annotations: 19 | cloud.google.com/neg: '{"ingress": true}' # Creates a NEG after an Ingress is created 20 | beta.cloud.google.com/backend-config: '{"default": "config-default"}' 21 | labels: 22 | app: stable-diffusion-nginx 23 | spec: 24 | ports: 25 | - protocol: TCP 26 | port: 8080 27 | targetPort: 8080 28 | selector: 29 | app: stable-diffusion-nginx 30 | type: ClusterIP -------------------------------------------------------------------------------- /Stable-Diffusion-UI-Agones/ingress-iap/ingress.yaml: -------------------------------------------------------------------------------- 1 | # Copyright 2023 Google LLC 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | apiVersion: networking.k8s.io/v1 15 | kind: Ingress 16 | metadata: 17 | name: sd-agones-ingress 18 | annotations: 19 | kubernetes.io/ingress.global-static-ip-name: sd-agones 20 | networking.gke.io/managed-certificates: managed-cert 21 | kubernetes.io/ingress.class: "gce" 22 | spec: 23 | defaultBackend: 24 | service: 25 | name: stable-diffusion-nginx-service # Name of the Service targeted by the Ingress 26 | port: 27 | number: 8080 # Should match the port used by the Service -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # How to Contribute 2 | 3 | We'd love to accept your patches and contributions to this project. There are 4 | just a few small guidelines you need to follow. 5 | 6 | ## Contributor License Agreement 7 | 8 | Contributions to this project must be accompanied by a Contributor License 9 | Agreement. You (or your employer) retain the copyright to your contribution; 10 | this simply gives us permission to use and redistribute your contributions as 11 | part of the project. Head over to to see 12 | your current agreements on file or to sign a new one. 13 | 14 | You generally only need to submit a CLA once, so if you've already submitted one 15 | (even if it was for a different project), you probably don't need to do it 16 | again. 17 | 18 | ## Code reviews 19 | 20 | All submissions, including submissions by project members, require review. We 21 | use GitHub pull requests for this purpose. Consult 22 | [GitHub Help](https://help.github.com/articles/about-pull-requests/) for more 23 | information on using pull requests. 24 | 25 | ## Community Guidelines 26 | 27 | This project follows [Google's Open Source Community 28 | Guidelines](https://opensource.google/conduct/). 29 | -------------------------------------------------------------------------------- /Stable-Diffusion-UI-Agones/sd-webui/extensions/simple-files/CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # How to Contribute 2 | 3 | We'd love to accept your patches and contributions to this project. There are 4 | just a few small guidelines you need to follow. 5 | 6 | ## Contributor License Agreement 7 | 8 | Contributions to this project must be accompanied by a Contributor License 9 | Agreement. You (or your employer) retain the copyright to your contribution; 10 | this simply gives us permission to use and redistribute your contributions as 11 | part of the project. Head over to to see 12 | your current agreements on file or to sign a new one. 13 | 14 | You generally only need to submit a CLA once, so if you've already submitted one 15 | (even if it was for a different project), you probably don't need to do it 16 | again. 17 | 18 | ## Code reviews 19 | 20 | All submissions, including submissions by project members, require review. We 21 | use GitHub pull requests for this purpose. Consult 22 | [GitHub Help](https://help.github.com/articles/about-pull-requests/) for more 23 | information on using pull requests. 24 | 25 | ## Community Guidelines 26 | 27 | This project follows [Google's Open Source Community 28 | Guidelines](https://opensource.google/conduct/). 29 | -------------------------------------------------------------------------------- /terraform-provision-infra/modules/agones/helm-agones/variables.tf: -------------------------------------------------------------------------------- 1 | # Copyright 2023 Google LLC 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | variable "gke_cluster_name" { 16 | description = "GKE Cluster Name." 17 | type = string 18 | } 19 | variable "gke_cluster_location" { 20 | description = "GKE Cluster Location" 21 | type = string 22 | } 23 | variable "project_id" { 24 | description = "GCP project id" 25 | type = string 26 | } 27 | variable "agones_version" { 28 | description = "Agones Version" 29 | type = string 30 | } 31 | variable "gke_cluster_nodepool" { 32 | description = "GKE Nodepool Name for agones chart" 33 | type = string 34 | default = "default-pool" 35 | } 36 | -------------------------------------------------------------------------------- /Stable-Diffusion-UI-Agones/agones-sidecar/Dockerfile: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Google LLC All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | # build 16 | FROM golang:1.19.1 as builder 17 | WORKDIR /go/src 18 | COPY . agones.dev/agones/examples/simple-game-server 19 | 20 | WORKDIR /go/src/agones.dev/agones/examples/simple-game-server 21 | RUN CGO_ENABLED=0 GOOS=linux go build -a -installsuffix cgo -o server . 22 | 23 | # final image 24 | FROM alpine:3.14 25 | 26 | RUN adduser -D -u 1000 server 27 | COPY --from=builder /go/src/agones.dev/agones/examples/simple-game-server/server /home/server/server 28 | RUN chown -R server /home/server && \ 29 | chmod o+x /home/server/server 30 | 31 | USER 1000 32 | ENTRYPOINT ["/home/server/server"] -------------------------------------------------------------------------------- /Stable-Diffusion-UI-Agones/nginx/deployment.yaml: -------------------------------------------------------------------------------- 1 | # Copyright 2023 Google LLC 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | apiVersion: apps/v1 15 | kind: Deployment 16 | metadata: 17 | name: stable-diffusion-nginx-deployment 18 | labels: 19 | app: stable-diffusion-nginx 20 | spec: 21 | replicas: 2 22 | selector: 23 | matchLabels: 24 | app: stable-diffusion-nginx 25 | template: 26 | metadata: 27 | labels: 28 | app: stable-diffusion-nginx 29 | spec: 30 | containers: 31 | - name: stable-diffusion-nginx 32 | image: -docker.pkg.dev///sd-nginx:0.1 33 | ports: 34 | - containerPort: 8080 35 | nodeSelector: 36 | cloud.google.com/gke-nodepool: default-pool -------------------------------------------------------------------------------- /Stable-Diffusion-UI-GKE/templates/hpa-timeshare.yaml: -------------------------------------------------------------------------------- 1 | # Copyright 2023 Google LLC 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | apiVersion: autoscaling/v2 16 | kind: HorizontalPodAutoscaler 17 | metadata: 18 | name: stable-diffusion-hpa 19 | spec: 20 | minReplicas: 1 21 | maxReplicas: 4 22 | metrics: 23 | - type: External 24 | external: 25 | metric: 26 | name: kubernetes.io|node|accelerator|duty_cycle 27 | selector: 28 | matchLabels: 29 | resource.labels.cluster_name: 30 | target: 31 | type: AverageValue 32 | averageValue: 80 33 | scaleTargetRef: 34 | apiVersion: apps/v1 35 | kind: Deployment 36 | name: "stable-diffusion-deployment" 37 | -------------------------------------------------------------------------------- /Stable-Diffusion-UI-GKE/templates/hpa.yaml: -------------------------------------------------------------------------------- 1 | # Copyright 2023 Google LLC 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | apiVersion: autoscaling/v2 16 | kind: HorizontalPodAutoscaler 17 | metadata: 18 | name: stable-diffusion-hpa 19 | spec: 20 | minReplicas: 1 21 | maxReplicas: 30 22 | metrics: 23 | - type: External 24 | external: 25 | metric: 26 | name: kubernetes.io|container|accelerator|duty_cycle 27 | selector: 28 | matchLabels: 29 | resource.labels.namespace_name: default # replace with namespace for HPA 30 | target: 31 | type: AverageValue 32 | averageValue: 80 33 | scaleTargetRef: 34 | apiVersion: apps/v1 35 | kind: Deployment 36 | name: "stable-diffusion-deployment" -------------------------------------------------------------------------------- /Stable-Diffusion-UI-Agones/nginx/nginx.conf: -------------------------------------------------------------------------------- 1 | # Copyright 2023 Google LLC 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | user root root; 15 | worker_processes auto; 16 | 17 | events { 18 | worker_connections 1024; 19 | } 20 | 21 | http { 22 | log_format main '$remote_addr - $remote_user [$time_local] "$request" ' 23 | '$status $body_bytes_sent "$http_referer" ' 24 | '"$http_user_agent" "$http_x_forwarded_for"'; 25 | 26 | access_log /usr/local/openresty/nginx/logs/access.log main; 27 | error_log /usr/local/openresty/nginx/logs/error.log error; 28 | client_max_body_size 0; 29 | 30 | map $http_upgrade $connection_upgrade { 31 | default upgrade; 32 | '' close; 33 | } 34 | 35 | include /etc/nginx/conf.d/default.conf; 36 | } 37 | -------------------------------------------------------------------------------- /terraform-provision-infra/modules/nonagones/k8s-res/variables.tf: -------------------------------------------------------------------------------- 1 | # Copyright 2023 Google LLC 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | variable "gke_cluster_name" { 16 | description = "GKE Cluster Name." 17 | type = string 18 | } 19 | variable "gke_cluster_location" { 20 | description = "GKE Cluster Location" 21 | type = string 22 | } 23 | variable "project_id" { 24 | description = "GCP project id" 25 | type = string 26 | } 27 | variable "google_filestore_reserved_ip_range" { 28 | description = "GCP project id" 29 | type = string 30 | } 31 | variable "gke_cluster_nodepool" { 32 | description = "GCP project id" 33 | type = string 34 | } 35 | variable "webui_image_url" { 36 | description = "Stable diffusion webui Image url" 37 | type = string 38 | } -------------------------------------------------------------------------------- /terraform-provision-infra/modules/nonagones/gcp-res/outputs.tf: -------------------------------------------------------------------------------- 1 | # Copyright 2023 Google LLC 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | output "kubernetes_cluster_name" { 16 | value = google_container_cluster.gke.name 17 | description = "GKE Cluster Name" 18 | } 19 | output "google_filestore_reserved_ip_range" { 20 | value = google_filestore_instance.instance.networks[0].ip_addresses[0] 21 | description = "google_filestore_instance reserved_ip_range" 22 | } 23 | output "gpu_nodepool_name" { 24 | value = google_container_node_pool.separately_gpu_nodepool.name 25 | description = "gpu node pool name" 26 | } 27 | output "artifactregistry_url" { 28 | value = "${var.region}-docker.pkg.dev/${var.project_id}/${google_artifact_registry_repository.sd_repo.name}" 29 | description = "artifactregistry_url" 30 | } -------------------------------------------------------------------------------- /Stable-Diffusion-UI-Agones/nginx/default.conf: -------------------------------------------------------------------------------- 1 | # Copyright 2023 Google LLC 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | server { 15 | listen 8080; 16 | root /usr/local/openresty/nginx/html; 17 | location /images/ { 18 | } 19 | location / { 20 | resolver kube-dns.kube-system.svc.cluster.local; # use gke build-in Kube-DNS server 21 | set $target ''; 22 | access_by_lua_file "sd.lua"; 23 | proxy_pass http://$target; 24 | } 25 | location /queue/join { 26 | resolver kube-dns.kube-system.svc.cluster.local; # use gke build-in Kube-DNS server 27 | set $target ''; 28 | access_by_lua_file "sd.lua"; 29 | proxy_pass http://$target; 30 | proxy_http_version 1.1; 31 | proxy_set_header Upgrade $http_upgrade; 32 | proxy_set_header Connection $connection_upgrade; 33 | proxy_set_header Host $host; 34 | } 35 | } 36 | -------------------------------------------------------------------------------- /Stable-Diffusion-Vertex/hpo/kohya-lora/vertex-ai-config-hpo.yaml: -------------------------------------------------------------------------------- 1 | # Copyright 2023 Google LLC 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | studySpec: 16 | metrics: 17 | - metricId: avr_loss 18 | goal: MINIMIZE 19 | parameters: 20 | - parameterId: lr 21 | scaleType: UNIT_LOG_SCALE 22 | doubleValueSpec: 23 | minValue: 0.00001 24 | maxValue: 0.0001 25 | measurementSelectionType: BEST_MEASUREMENT 26 | trialJobSpec: 27 | workerPoolSpecs: 28 | - machineSpec: 29 | machineType: n1-standard-8 30 | acceleratorType: NVIDIA_TESLA_V100 31 | acceleratorCount: 1 32 | replicaCount: 1 33 | containerSpec: 34 | imageUri: us-central1-docker.pkg.dev/PROJECT_ID/ARTIFACT_REGISTRY_NAME/sd-training:kohya-hpo 35 | args: ["--method","kohya_lora","--model_name","CompVis/stable-diffusion-v1-4","--input_storage","/gcs/YOUR_BUCKET_NAME/IMAGE_FOLDER","--output_storage","/gcs/YOUR_BUCKET_NAME/OUTPUT_FOLDER","--display_name","PROMPT_NAME","--hpo","y"] 36 | command: ["python3","train_kohya.py"] 37 | -------------------------------------------------------------------------------- /terraform-provision-infra/modules/agones/cloud-build/variables.tf: -------------------------------------------------------------------------------- 1 | # Copyright 2023 Google LLC 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | variable "nginx_image" { 16 | description = "Region to set for gcp resource deploy." 17 | type = object({ 18 | path = string 19 | tag = string 20 | }) 21 | default = { 22 | path = "../Stable-Diffusion-UI-Agones/nginx/" 23 | tag = "nginx:tf" 24 | } 25 | } 26 | variable "sd_webui_image" { 27 | description = "Region to set for gcp resource deploy." 28 | type = object({ 29 | path = string 30 | tag = string 31 | }) 32 | default = { 33 | path = "../Stable-Diffusion-UI-Agones/sd-webui/" 34 | tag = "sd-webui:tf" 35 | } 36 | } 37 | variable "game_server_image" { 38 | description = "Region to set for gcp resource deploy." 39 | type = object({ 40 | path = string 41 | tag = string 42 | }) 43 | default = { 44 | path = "../Stable-Diffusion-UI-Agones/agones-sidecar/" 45 | tag = "game-server:tf" 46 | } 47 | } 48 | variable "artifact_registry" { 49 | type = string 50 | description = "artifact registry URL." 51 | } -------------------------------------------------------------------------------- /Stable-Diffusion-Vertex/hpo/diffusers/vertex-config-diffusers.yaml: -------------------------------------------------------------------------------- 1 | # Copyright 2023 Google LLC 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | studySpec: 16 | metrics: 17 | - metricId: loss 18 | goal: MINIMIZE 19 | parameters: 20 | - parameterId: lr 21 | scaleType: UNIT_LOG_SCALE 22 | doubleValueSpec: 23 | minValue: 0.00001 24 | maxValue: 0.0001 25 | measurementSelectionType: BEST_MEASUREMENT 26 | trialJobSpec: 27 | workerPoolSpecs: 28 | - machineSpec: 29 | machineType: n1-standard-8 30 | acceleratorType: NVIDIA_TESLA_V100 31 | acceleratorCount: 1 32 | replicaCount: 1 33 | containerSpec: 34 | imageUri: us-central1-docker.pkg.dev/PROJECT_ID/ARTIFACT_REGISTRY_NAME/sd-training:diffusers-hpo 35 | args: ["--method","diffuser_dreambooth","--model_name","runwayml/stable-diffusion-v1-5","--input_storage","/gcs/YOUR_BUCKET_NAME/FOLDER","--output_storage","/gcs/YOUR_BUCKET_NAME/FOLDER","--prompt","PROMPT","--class_prompt","CLASS PROMPT","--num_class_images","50","--max_train_steps","100","--text_encoder","True","--set_grads_to_none","True","--hpo","y"] 36 | command: ["python3","train.py"] -------------------------------------------------------------------------------- /Stable-Diffusion-UI-Agones/agones/fleet.yaml: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Google LLC All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | apiVersion: "agones.dev/v1" 16 | kind: Fleet 17 | metadata: 18 | name: sd-agones-fleet 19 | spec: 20 | replicas: 1 21 | template: 22 | spec: 23 | container: simple-game-server 24 | ports: 25 | - name: default 26 | container: simple-game-server 27 | containerPort: 7654 28 | - name: sd 29 | container: stable-diffusion-webui 30 | containerPort: 7860 31 | protocol: TCP 32 | template: 33 | spec: 34 | containers: 35 | - name: simple-game-server 36 | image: us-docker.pkg.dev/agones-images/examples/simple-game-server:0.14 37 | resources: 38 | requests: 39 | memory: "64Mi" 40 | cpu: "20m" 41 | limits: 42 | memory: "64Mi" 43 | cpu: "20m" 44 | - name: stable-diffusion-webui 45 | image: us-central1-docker.pkg.dev///sd-webui:0.1 46 | resources: 47 | limits: 48 | nvidia.com/gpu: "1" -------------------------------------------------------------------------------- /Stable-Diffusion-Vertex/pipeline/Dockerfile_kohya: -------------------------------------------------------------------------------- 1 | FROM gcr.io/deeplearning-platform-release/base-gpu.py310 2 | 3 | RUN apt-get update 4 | 5 | WORKDIR /root 6 | 7 | #install sd libraries 8 | RUN git clone --recurse-submodules https://github.com/Akegarasu/lora-scripts 9 | 10 | #install libraries 11 | RUN pip install -U accelerate==0.18.0 12 | RUN pip install -U xformers==0.0.18 13 | RUN pip install -U safetensors==0.3.0 14 | RUN pip install -U tqdm==4.65.0 15 | RUN pip install -U ftfy==6.1.1 16 | RUN pip install -U loralib==0.1.1 17 | RUN pip install -U evaluate==0.4.0 18 | RUN pip install -U psutil==5.9.4 19 | RUN pip install -U PyYAML==6.0 20 | RUN pip install -U packaging==23.0 21 | RUN pip install -U transformers==4.27.4 22 | RUN pip install -U bitsandbytes==0.35.0 23 | RUN pip install -U diffusers[torch]==0.10.2 24 | RUN pip install -U albumentations==1.3.0 25 | RUN pip install -U opencv-python-headless==4.7.0.72 26 | RUN pip install -U einops==0.6.0 27 | RUN pip install -U pytorch-lightning==2.0.1 28 | RUN pip install -U gradio==3.25.0 29 | RUN pip install -U altair==4.2.2 30 | RUN pip install -U easygui==0.98.3 31 | RUN pip install -U toml==0.10.2 32 | RUN pip install -U voluptuous==0.13.1 33 | RUN pip install -U timm==0.6.13 34 | RUN pip install -U fairscale==0.4.13 35 | RUN pip install -U huggingface-hub==0.13.4 36 | RUN pip install -U tensorflow==2.10.1 tensorboard==2.10.1 37 | RUN pip install -U lion-pytorch==0.0.7 38 | RUN pip install -U lycoris-lora==0.1.4 39 | 40 | #install pytorch 41 | RUN pip install torch==2.0.0+cu118 torchvision==0.15.1+cu118 --extra-index-url https://download.pytorch.org/whl/cu118 42 | 43 | # Copies the trainer code to the docker image. 44 | COPY train_kohya.py /root/train_kohya.py 45 | 46 | # Sets up the entry point to invoke the trainer. 47 | #ENTRYPOINT ["python3", "-m", "train_kohya"] -------------------------------------------------------------------------------- /Stable-Diffusion-Vertex/hpo/diffusers/train_dreambooth.patch: -------------------------------------------------------------------------------- 1 | diff --git a/examples/dreambooth/train_dreambooth.py b/examples/dreambooth/train_dreambooth.py 2 | index 52694660..c66dd99f 100644 3 | --- a/examples/dreambooth/train_dreambooth.py 4 | +++ b/examples/dreambooth/train_dreambooth.py 5 | @@ -44,7 +44,8 @@ from diffusers import AutoencoderKL, DDPMScheduler, DiffusionPipeline, UNet2DCon 6 | from diffusers.optimization import get_scheduler 7 | from diffusers.utils import check_min_version 8 | from diffusers.utils.import_utils import is_xformers_available 9 | - 10 | +import hypertune 11 | +hpt = hypertune.HyperTune() 12 | 13 | # Will error if the minimal version of diffusers is not installed. Remove at your own risks. 14 | check_min_version("0.14.0") 15 | @@ -340,6 +341,7 @@ def parse_args(input_args=None): 16 | " https://pytorch.org/docs/stable/generated/torch.optim.Optimizer.zero_grad.html" 17 | ), 18 | ) 19 | + parser.add_argument("--hpo", type=str, default="n", help="hyperparameter tuning") 20 | 21 | if input_args is not None: 22 | args = parser.parse_args(input_args) 23 | @@ -924,6 +926,11 @@ def main(args): 24 | logs = {"loss": loss.detach().item(), "lr": lr_scheduler.get_last_lr()[0]} 25 | progress_bar.set_postfix(**logs) 26 | accelerator.log(logs, step=global_step) 27 | + if args.hpo == "y": 28 | + hpt.report_hyperparameter_tuning_metric( 29 | + hyperparameter_metric_tag='loss', 30 | + metric_value=loss.detach().item(), 31 | + global_step=global_step) 32 | 33 | if global_step >= args.max_train_steps: 34 | break 35 | @@ -948,3 +955,4 @@ def main(args): 36 | if __name__ == "__main__": 37 | args = parse_args() 38 | main(args) 39 | + 40 | -------------------------------------------------------------------------------- /Stable-Diffusion-Vertex/pipeline/pipeline_conf.yml: -------------------------------------------------------------------------------- 1 | # Copyright 2023 Google LLC 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | pipelineName: 16 | buildImage: 17 | NFS: 18 | enable: 19 | server: 20 | path: 21 | mountPoint: 22 | gcpSpecs: 23 | project_number: 24 | project_id: 25 | region: 26 | pipeline_bucket: 27 | vertexSpecs: 28 | network: projects//global/networks/default 29 | workerPoolSpecs: 30 | machineSpec: 31 | machineType: 32 | acceleratorType: 33 | acceleratorCount: 34 | replicaCount: 35 | containerSpec: 36 | imageUri: 37 | modelSpecs: 38 | method: kohya_lora 39 | model_name: 40 | input_storage: 41 | output_storage: 42 | display_name: 43 | lr: "0.0001" 44 | unet_lr: "0.00005" 45 | text_encoder_lr: "0.0001" 46 | lr_scheduler: cosine_with_restarts 47 | network_dim: "128" 48 | network_alpha: "64" 49 | batch_size: "1" 50 | save_every_n_epochs: "1" 51 | max_train_epochs: "10" 52 | -------------------------------------------------------------------------------- /terraform-provision-infra/modules/agones/cloud-build/main.tf: -------------------------------------------------------------------------------- 1 | # Copyright 2023 Google LLC 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | terraform { 16 | required_providers { 17 | null = { 18 | source = "hashicorp/null" 19 | version = "3.2.1" 20 | } 21 | } 22 | } 23 | resource "null_resource" "build_webui_image" { 24 | provisioner "local-exec" { 25 | command = "gcloud builds submit --machine-type=e2-highcpu-32 --disk-size=100 --region=us-central1 -t ${var.artifact_registry}/${var.sd_webui_image.tag}" 26 | working_dir = var.sd_webui_image.path 27 | } 28 | } 29 | resource "null_resource" "build_game_server_image" { 30 | provisioner "local-exec" { 31 | command = "gcloud builds submit --machine-type=e2-highcpu-32 --disk-size=100 --region=us-central1 -t ${var.artifact_registry}/${var.game_server_image.tag}" 32 | working_dir = var.game_server_image.path 33 | } 34 | } 35 | resource "null_resource" "modify_nginx_image" { 36 | provisioner "local-exec" { 37 | command = "sed -i 's/$${REDIS_HOST}/redis.private.domain/g' sd.lua" 38 | working_dir = var.nginx_image.path 39 | } 40 | } 41 | 42 | resource "null_resource" "build_nginx_image" { 43 | provisioner "local-exec" { 44 | command = "gcloud builds submit --machine-type=e2-highcpu-32 --disk-size=100 --region=us-central1 -t ${var.artifact_registry}/${var.nginx_image.tag}" 45 | working_dir = var.nginx_image.path 46 | } 47 | } -------------------------------------------------------------------------------- /Stable-Diffusion-UI-GKE/templates/deployment.yaml: -------------------------------------------------------------------------------- 1 | # Copyright 2023 Google LLC 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | apiVersion: apps/v1 16 | kind: Deployment 17 | metadata: 18 | name: stable-diffusion-deployment 19 | labels: 20 | app: stable-diffusion 21 | spec: 22 | replicas: 1 23 | selector: 24 | matchLabels: 25 | app: stable-diffusion 26 | template: 27 | metadata: 28 | labels: 29 | app: stable-diffusion 30 | spec: 31 | volumes: 32 | - name: stable-diffusion-storage 33 | persistentVolumeClaim: 34 | claimName: $fileshare_name # replace with fileshare name 35 | containers: 36 | - name: stable-diffusion-webui 37 | image: $image_url # e.g. us-central1-docker.pkg.dev///sd-webui:0.1 38 | resources: 39 | limits: 40 | cpu: 2 41 | memory: 15Gi 42 | nvidia.com/gpu: 1 43 | ports: 44 | - containerPort: 7860 45 | volumeMounts: 46 | - mountPath: "/stable-diffusion-webui/models/Stable-diffusion" 47 | name: stable-diffusion-storage 48 | subPath: models/Stable-diffusion/sd15 49 | - mountPath: "/stable-diffusion-webui/outputs" 50 | name: stable-diffusion-storage 51 | subPath: outputs 52 | - mountPath: "/stable-diffusion-webui/models/ControlNet" 53 | name: stable-diffusion-storage 54 | subPath: models/ControlNet -------------------------------------------------------------------------------- /Stable-Diffusion-Vertex/Kohya-lora/cloud-cli.sh: -------------------------------------------------------------------------------- 1 | # Copyright 2023 Google LLC 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | # cloud build image 16 | gcloud builds submit --config cloud-build-config-kohya.yaml . 17 | 18 | # create vertex ai customer training job 19 | # args format: 20 | # --model_name: Huggingface repo id, or "/gcs/bucket_name/model_folder". I only test the models downloaded from HF, with standard diffusers format. Safetensors has not been test. 21 | # --input_storage: /gcs/bucket_name/input_image_folder 22 | # images put in subfolder, with foder name repeat num per image_prompt name, eg. 10_aki 23 | # you can also put caption.txt file in the folder. 24 | # --output_storage: /gcs/bucket_name/output_folder 25 | # --display_name: prompt name 26 | # input_storage, output_storage, and display_name are required, other arguments are optional. 27 | gcloud ai custom-jobs create \ 28 | --region=us-central1 \ 29 | --display-name=sd-kohya \ 30 | --config=vertex-config-nfs.yaml \ 31 | --args="--method=kohya_lora,--model_name=CompVis/stable-diffusion-v1-4,--input_storage=/gcs/sd/input_dog_kohya,--output_storage=/gcs/sd/kohya_output,--display_name=sks_dog,--save_nfs=True" \ 32 | --command="python3,train_kohya.py" 33 | 34 | # only save the models in GCS to Filestore 35 | gcloud ai custom-jobs create \ 36 | --region=us-central1 \ 37 | --display-name=sd-kohya \ 38 | --config=vertex-config-nfs.yaml \ 39 | --args="--output_storage=/gcs/sd/kohya_output,--save_nfs_only=True" \ 40 | --command="python3,train_kohya.py" -------------------------------------------------------------------------------- /terraform-provision-infra/modules/agones/k8s-res/variables.tf: -------------------------------------------------------------------------------- 1 | # Copyright 2023 Google LLC 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | variable "oauth_client_id" { 16 | description = "OAuth Client ID." 17 | type = string 18 | } 19 | 20 | variable "oauth_client_secret" { 21 | description = "OAuth Client Secret." 22 | type = string 23 | } 24 | variable "sd_webui_domain" { 25 | description = "you owned sub domain for stable diffusion webui access." 26 | type = string 27 | } 28 | variable "gke_cluster_name" { 29 | description = "GKE Cluster Name." 30 | type = string 31 | } 32 | variable "gke_cluster_location" { 33 | description = "GKE Cluster Location" 34 | type = string 35 | } 36 | variable "project_id" { 37 | description = "GCP project id" 38 | type = string 39 | } 40 | variable "google_filestore_reserved_ip_range" { 41 | description = "GCP project id" 42 | type = string 43 | } 44 | variable "gke_cluster_nodepool" { 45 | description = "GCP project id" 46 | type = string 47 | default = "default-pool" 48 | } 49 | variable "nginx_image_url" { 50 | description = "Nginx Image url" 51 | type = string 52 | } 53 | variable "webui_image_url" { 54 | description = "Stable diffusion webui Image url" 55 | type = string 56 | } 57 | variable "game_server_image_url" { 58 | description = "Stable diffusion webui Image url" 59 | type = string 60 | default = "us-docker.pkg.dev/agones-images/examples/simple-game-server:0.14" 61 | } 62 | variable "webui_address_name" { 63 | description = "GCP project id" 64 | type = string 65 | } -------------------------------------------------------------------------------- /terraform-provision-infra/modules/nonagones/gcp-res/variables.tf: -------------------------------------------------------------------------------- 1 | # Copyright 2023 Google LLC 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | # Input variable definitions 16 | variable "project_id" { 17 | description = "Project ID of the cloud resource." 18 | type = string 19 | } 20 | 21 | variable "region" { 22 | description = "Region to set for gcp resource deploy." 23 | type = string 24 | } 25 | variable "filestore_zone" { 26 | description = "Zone to set for filestore nfs server, should be same zone with gke node." 27 | type = string 28 | } 29 | 30 | variable "cluster_location" { 31 | description = "gke cluster location choose a zone or region." 32 | type = string 33 | } 34 | variable "node_machine_type" { 35 | description = "gke node machine type." 36 | type = string 37 | default = "custom-12-49152-ext" 38 | } 39 | 40 | variable "accelerator_type" { 41 | description = "Get available accelerator_type from gcloud compute accelerator-types list --format='csv(zone,name)' " 42 | type = string 43 | default = "nvidia-tesla-t4" 44 | } 45 | variable "gke_num_nodes" { 46 | description = "Tags to set on the bucket." 47 | type = number 48 | default = 1 49 | } 50 | variable "gcp_service_list" { 51 | description = "The list of apis necessary for the project" 52 | type = list(string) 53 | default = [ 54 | "artifactregistry.googleapis.com", 55 | "cloudbuild.googleapis.com", 56 | "compute.googleapis.com", 57 | "container.googleapis.com", 58 | "file.googleapis.com", 59 | "networkmanagement.googleapis.com" 60 | ] 61 | } 62 | 63 | -------------------------------------------------------------------------------- /Stable-Diffusion-UI-GKE/templates/deployment-timeshare.yaml: -------------------------------------------------------------------------------- 1 | # Copyright 2023 Google LLC 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | apiVersion: apps/v1 16 | kind: Deployment 17 | metadata: 18 | name: stable-diffusion-deployment 19 | labels: 20 | app: stable-diffusion 21 | spec: 22 | replicas: 1 23 | selector: 24 | matchLabels: 25 | app: stable-diffusion 26 | template: 27 | metadata: 28 | labels: 29 | app: stable-diffusion 30 | spec: 31 | nodeSelector: 32 | cloud.google.com/gke-gpu-sharing-strategy: time-sharing 33 | cloud.google.com/gke-max-shared-clients-per-gpu: "2" 34 | volumes: 35 | - name: stable-diffusion-storage 36 | persistentVolumeClaim: 37 | claimName: $fileshare_name # replace with fileshare name 38 | containers: 39 | - name: stable-diffusion-webui 40 | image: $image_url # e.g. us-central1-docker.pkg.dev///sd-webui:0.1 41 | resources: 42 | limits: 43 | cpu: 2 44 | memory: 15Gi 45 | nvidia.com/gpu: 1 46 | ports: 47 | - containerPort: 7860 48 | volumeMounts: 49 | - mountPath: "/stable-diffusion-webui/models/Stable-diffusion" 50 | name: stable-diffusion-storage 51 | subPath: models/Stable-diffusion/sd15 52 | - mountPath: "/stable-diffusion-webui/outputs" 53 | name: stable-diffusion-storage 54 | subPath: outputs 55 | - mountPath: "/stable-diffusion-webui/models/ControlNet" 56 | name: stable-diffusion-storage 57 | subPath: models/ControlNet -------------------------------------------------------------------------------- /Stable-Diffusion-Vertex/hpo/kohya-lora/train_network.patch: -------------------------------------------------------------------------------- 1 | diff --git a/train_network.py b/train_network.py 2 | index 5c4d5ad..7bf60fa 100644 3 | --- a/train_network.py 4 | +++ b/train_network.py 5 | @@ -27,12 +27,18 @@ from library.config_util import ( 6 | import library.huggingface_util as huggingface_util 7 | import library.custom_train_functions as custom_train_functions 8 | from library.custom_train_functions import apply_snr_weight, get_weighted_text_embeddings 9 | - 10 | +import hypertune 11 | +hpt = hypertune.HyperTune() 12 | 13 | # TODO 他のスクリプトと共通化する 14 | -def generate_step_logs(args: argparse.Namespace, current_loss, avr_loss, lr_scheduler): 15 | +def generate_step_logs(args: argparse.Namespace, current_loss, avr_loss, lr_scheduler, step): 16 | logs = {"loss/current": current_loss, "loss/average": avr_loss} 17 | - 18 | + if args.hpo == "y": 19 | + hpt.report_hyperparameter_tuning_metric( 20 | + hyperparameter_metric_tag='avr_loss', 21 | + metric_value=avr_loss, 22 | + global_step=step) 23 | + 24 | lrs = lr_scheduler.get_last_lr() 25 | 26 | if args.network_train_text_encoder_only or len(lrs) <= 2: # not block lr (or single block) 27 | @@ -686,7 +692,7 @@ def train(args): 28 | progress_bar.set_postfix(**logs) 29 | 30 | if args.logging_dir is not None: 31 | - logs = generate_step_logs(args, current_loss, avr_loss, lr_scheduler) 32 | + logs = generate_step_logs(args, current_loss, avr_loss, lr_scheduler, global_step) 33 | accelerator.log(logs, step=global_step) 34 | 35 | if global_step >= args.max_train_steps: 36 | @@ -780,7 +786,9 @@ def setup_parser() -> argparse.ArgumentParser: 37 | parser.add_argument( 38 | "--training_comment", type=str, default=None, help="arbitrary comment string stored in metadata / メタデータに記録する任意のコメント文字列" 39 | ) 40 | - 41 | + parser.add_argument( 42 | + "--hpo", type=str, default="y", help="if using hyper parameter tuning" 43 | + ) 44 | return parser 45 | 46 | 47 | @@ -791,3 +799,4 @@ if __name__ == "__main__": 48 | args = train_util.read_config_from_file(args, parser) 49 | 50 | train(args) 51 | + 52 | -------------------------------------------------------------------------------- /Stable-Diffusion-UI-Agones/agones/fleet_autoscale.yaml: -------------------------------------------------------------------------------- 1 | # Copyright 2023 Google LLC 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | apiVersion: "autoscaling.agones.dev/v1" 15 | kind: FleetAutoscaler 16 | # FleetAutoscaler Metadata 17 | # https://v1-24.docs.kubernetes.io/docs/reference/generated/kubernetes-api/v1.24/#objectmeta-v1-meta 18 | metadata: 19 | name: fleet-autoscaler-policy 20 | spec: 21 | # The name of the fleet to attach to and control. Must be an existing Fleet in the same namespace 22 | # as this FleetAutoscaler 23 | fleetName: sd-agones-fleet 24 | # The autoscaling policy 25 | policy: 26 | # type of the policy. for now, only Buffer is available 27 | type: Buffer 28 | # parameters of the buffer policy 29 | buffer: 30 | # Size of a buffer of "ready" game server instances 31 | # The FleetAutoscaler will scale the fleet up and down trying to maintain this buffer, 32 | # as instances are being allocated or terminated 33 | # it can be specified either in absolute (i.e. 5) or percentage format (i.e. 5%) 34 | bufferSize: 1 35 | # minimum fleet size to be set by this FleetAutoscaler. 36 | # if not specified, the actual minimum fleet size will be bufferSize 37 | minReplicas: 1 38 | # maximum fleet size that can be set by this FleetAutoscaler 39 | # required 40 | maxReplicas: 20 41 | # [Stage:Beta] 42 | # [FeatureFlag:CustomFasSyncInterval] 43 | # The autoscaling sync strategy 44 | sync: 45 | # type of the sync. for now, only FixedInterval is available 46 | type: FixedInterval 47 | # parameters of the fixedInterval sync 48 | fixedInterval: 49 | # the time in seconds between each auto scaling 50 | seconds: 30 -------------------------------------------------------------------------------- /Stable-Diffusion-UI-Agones/optimizated-init/README.MD: -------------------------------------------------------------------------------- 1 | 2 | # User Guide 3 | 4 | * Set Environment Variables 5 | ``` 6 | SOURCE_PROJECT_ID=flius-vpc-2 7 | IMAGE_NAME=sd-image 8 | PROJECT_ID=project-kangwe-poc 9 | CLUSTER_NAME=dpv2-test 10 | ``` 11 | * Copy Image to your project 12 | ``` 13 | gcloud compute images create sd-image \ 14 | --source-image=projects/${SOURCE_PROJECT_ID}/global/images/${IMAGE_NAME} \ 15 | --project=${PROJECT_ID} 16 | ``` 17 | 40 | * Build init container image to attach disks based on image 41 | ``` 42 | cd attach-disk 43 | gcloud builds submit --tag=us-central1-docker.pkg.dev/${PROJECT_ID}/stable-diffusion-repo/attach-disk-image 44 | ``` 45 | * Install DaemonSet, please update values in daemonset.yaml as described below. 46 | - Update image repo as you specified in previous step. 47 | ``` 48 | initContainers: 49 | - name: init-disk 50 | image: us-central1-docker.pkg.dev/flius-vpc-2/stable-diffusion-repo/attach-disk-image 51 | 52 | kubectl apply -f daemonset.yaml 53 | ``` 54 | * Install Stable Diffusion deployment 55 | ``` 56 | kubectl apply -f deployment-init-disk.yaml 57 | ``` 58 | -------------------------------------------------------------------------------- /Stable-Diffusion-UI-Agones/optimizated-init/deployment-init-disk.yaml: -------------------------------------------------------------------------------- 1 | # Copyright 2023 Google LLC 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | apiVersion: apps/v1 15 | kind: Deployment 16 | metadata: 17 | name: stable-diffusion-deployment-final 18 | labels: 19 | app: stable-diffusion 20 | spec: 21 | replicas: 1 22 | selector: 23 | matchLabels: 24 | app: stable-diffusion 25 | template: 26 | metadata: 27 | labels: 28 | app: stable-diffusion 29 | spec: 30 | serviceAccountName: workload-identity-ksa 31 | nodeSelector: 32 | iam.gke.io/gke-metadata-server-enabled: "true" 33 | volumes: 34 | - name: runtime-lib 35 | hostPath: 36 | path: /var/lib/runtime-lib 37 | containers: 38 | - name: stable-diffusion-webui 39 | image: {region}-docker.pkg.dev/{project-id}/stable-diffusion-repo/sd-webui-final:0.1 40 | imagePullPolicy: Always 41 | command: ["/bin/bash", "-c"] 42 | args: 43 | - source /runtime-lib/bin/activate; 44 | cp /user-watch.py /runtime-lib/stable-diffusion-webui/user-watch.py; 45 | cp /start.sh /runtime-lib/stable-diffusion-webui/start.sh; 46 | cd /runtime-lib/stable-diffusion-webui; 47 | python3 launch.py --listen --xformers --enable-insecure-extension-access --no-gradio-queue 48 | volumeMounts: 49 | - mountPath: "/runtime-lib" 50 | name: runtime-lib 51 | resources: 52 | limits: 53 | nvidia.com/gpu: 1 54 | ports: 55 | - containerPort: 7860 56 | env: 57 | - name: MY_NODE_NAME 58 | valueFrom: 59 | fieldRef: 60 | fieldPath: spec.nodeName 61 | # securityContext: 62 | # privileged: true 63 | -------------------------------------------------------------------------------- /Stable-Diffusion-Vertex/Diffusers/Dockerfile: -------------------------------------------------------------------------------- 1 | # Copyright 2023 Google LLC 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | FROM gcr.io/deeplearning-platform-release/base-gpu.py310 16 | 17 | RUN apt-get update 18 | 19 | WORKDIR /root 20 | 21 | #install sd libraries 22 | RUN git clone -b v0.14.0 https://github.com/huggingface/diffusers.git 23 | WORKDIR diffusers 24 | RUN git checkout f20c8f5a1aba27f5972cad50516f18ba516e4d9e 25 | WORKDIR /root 26 | RUN pip install /root/diffusers 27 | 28 | RUN git clone https://github.com/huggingface/peft.git 29 | RUN pip install /root/peft 30 | RUN git clone https://huggingface.co/spaces/smangrul/peft-lora-sd-dreambooth 31 | 32 | #install libraries 33 | #RUN pip install -U xformers safetensors tqdm ftfy loralib evaluate psutil pyyaml packaging bitsandbytes==0.35.0 datasets 34 | RUN pip install xformers==0.0.18 35 | RUN pip install safetensors==0.3.0 36 | RUN pip install tqdm==4.65.0 37 | RUN pip install ftfy==6.1.1 38 | RUN pip install loralib==0.1.1 39 | RUN pip install evaluate==0.4.0 40 | RUN pip install psutil==5.9.4 41 | RUN pip install pyYAML==6.0 42 | RUN pip install packaging==21.3 43 | RUN pip install datasets==2.11.0 44 | RUN pip install bitsandbytes==0.35.0 45 | RUN pip install transformers==4.27.4 46 | RUN pip install accelerate==0.18.0 47 | RUN pip install Jinja2==3.1.2 48 | RUN pip install cloudml-hypertune==0.1.0.dev6 49 | 50 | #install pytorch 51 | RUN pip install torch==2.0.0+cu118 torchvision==0.15.1+cu118 --extra-index-url https://download.pytorch.org/whl/cu118 52 | 53 | # Installs gcp libraries 54 | RUN pip install google-cloud-aiplatform 55 | RUN pip install google-cloud-storage 56 | 57 | # Copies the trainer code to the docker image. 58 | COPY train.py /root/train.py 59 | 60 | # Sets up the entry point to invoke the trainer. 61 | #ENTRYPOINT ["python3", "-m", "train"] 62 | 63 | -------------------------------------------------------------------------------- /Stable-Diffusion-Vertex/pipeline/README.md: -------------------------------------------------------------------------------- 1 | # Stable Diffusion Model Fine-tuning on Vertex AI Pipeline Quick Start 2 | 3 | This guide gives a simple example of how to orchestrate an end-to-end ML workflow by using Vertex AI pipeline. 4 | 5 | As the pictures shows, in this example we can use a one-stage Vertex AI pipeline to train a stable diffusion Lora model on the Kohya environment, and sync the Lora model files automatically to GCS or Cloud Filestore. In this one-stage method, we need to manually push a Kohya Docker image to the specified Docker registry. 6 | ![one-stage](./images/one-stage.png) 7 | 8 | If the Kohya docker image hasn’t been uploaded before, we can also chose to use two-stage Vertex AI pipeline, this first stage in Pipeline will build a Kohya Docker image and automatically push to the google managed Docker registry(Artifact Registry), and then the second stage will start a Vertex AI training job, the training job will also automatically sync the trained Lora file to GCS or Cloud Filestore. 9 | 10 | ![two-stage](./images/architecture.png) 11 | 12 | Whether to choose the one-stage or two-stage pipeline can be configured by setting the docker_build parameter in pipeline_conf.yaml. 13 | 14 | ## Start Vertex AI pipeline job 15 | It’s very straightforward to start the Vertex AI pipeline, just following the following steps . 16 | 17 | 1. Clone code repository to local environment, we suggest using Cloud Shell, if you chose using a different terminal, make sure google cloud sdk is well installed and configured. 18 | ``` 19 | cd gcp-stable-diffusion-build-deploy/Stable-Diffusion-Vertex/pipeline` 20 | ``` 21 | 2. Install Python dependencies 22 | ``` 23 | pip install requirements.txt 24 | ``` 25 | 3. Configure pipeline configuration settings by editing pipeline_conf.yaml 26 | ``` 27 | vi pipeline_conf.yaml 28 | ``` 29 | 4. Run pipeline.py to start the vertex AI training job. 30 | ``` 31 | python3 pipeline.py 32 | ``` 33 | By running the pipeline.py file, the pipeline script will be compiled automatically and the pipeline job will be started in the background. The pipeline job duration will depend on image number in the dataset and model hyperparameter settings such as batch size and epochs etc. 34 | 35 | Pipeline job status can be displayed by visiting the GCP console, more detailed logs can be found by clicking the icon of each step. 36 | 37 | ![Vertex-AI-Screenshot](./images/Vertex-AI-Screenshot.png) 38 | 39 | 40 | -------------------------------------------------------------------------------- /Stable-Diffusion-UI-Agones/agones/fleet_pvc.yaml: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Google LLC All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | apiVersion: "agones.dev/v1" 16 | kind: Fleet 17 | metadata: 18 | name: sd-agones-fleet 19 | spec: 20 | replicas: 1 21 | template: 22 | spec: 23 | container: simple-game-server 24 | ports: 25 | - name: default 26 | container: simple-game-server 27 | containerPort: 7654 28 | - name: sd 29 | container: stable-diffusion-webui 30 | containerPort: 7860 31 | protocol: TCP 32 | template: 33 | spec: 34 | containers: 35 | - name: simple-game-server 36 | # image: us-docker.pkg.dev/agones-images/examples/simple-game-server:0.14 37 | image: -docker.pkg.dev///sd-agones-sidecar:0.1 38 | resources: 39 | requests: 40 | memory: "64Mi" 41 | cpu: "20m" 42 | limits: 43 | memory: "64Mi" 44 | cpu: "20m" 45 | - name: stable-diffusion-webui 46 | image: -docker.pkg.dev///sd-webui:0.1 47 | command: ["/bin/sh", "start.sh"] 48 | # https://github.com/AUTOMATIC1111/stable-diffusion-webui/discussions/6722 49 | env: 50 | - name: SAFETENSORS_FAST_GPU 51 | value: "1" 52 | - name: LD_PRELOAD 53 | value: "/usr/lib/x86_64-linux-gnu/libtcmalloc_minimal.so.4" 54 | volumeMounts: 55 | - name: stable-diffusion-storage 56 | mountPath: /sd_dir 57 | resources: 58 | limits: 59 | nvidia.com/gpu: "1" 60 | volumes: 61 | - name: stable-diffusion-storage 62 | persistentVolumeClaim: 63 | claimName: vol1 -------------------------------------------------------------------------------- /Stable-Diffusion-Vertex/hpo/diffusers/Dockerfile: -------------------------------------------------------------------------------- 1 | # Copyright 2023 Google LLC 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | FROM gcr.io/deeplearning-platform-release/base-gpu.py310 16 | 17 | RUN apt-get update 18 | 19 | WORKDIR /root 20 | 21 | #install sd libraries 22 | RUN git clone -b v0.14.0 https://github.com/huggingface/diffusers.git 23 | WORKDIR diffusers 24 | RUN git checkout f20c8f5a1aba27f5972cad50516f18ba516e4d9e 25 | WORKDIR /root 26 | RUN pip install /root/diffusers 27 | 28 | RUN git clone https://github.com/huggingface/peft.git 29 | RUN pip install /root/peft 30 | RUN git clone https://huggingface.co/spaces/smangrul/peft-lora-sd-dreambooth 31 | 32 | #install libraries 33 | #RUN pip install -U xformers safetensors tqdm ftfy loralib evaluate psutil pyyaml packaging bitsandbytes==0.35.0 datasets 34 | RUN pip install xformers==0.0.18 35 | RUN pip install safetensors==0.3.0 36 | RUN pip install tqdm==4.65.0 37 | RUN pip install ftfy==6.1.1 38 | RUN pip install loralib==0.1.1 39 | RUN pip install evaluate==0.4.0 40 | RUN pip install psutil==5.9.4 41 | RUN pip install pyYAML==6.0 42 | RUN pip install packaging==21.3 43 | RUN pip install datasets==2.11.0 44 | RUN pip install bitsandbytes==0.35.0 45 | RUN pip install transformers==4.27.4 46 | RUN pip install accelerate==0.18.0 47 | RUN pip install Jinja2==3.1.2 48 | RUN pip install cloudml-hypertune==0.1.0.dev6 49 | #install pytorch 50 | RUN pip install torch==2.0.0+cu118 torchvision==0.15.1+cu118 --extra-index-url https://download.pytorch.org/whl/cu118 51 | 52 | # Installs gcp libraries 53 | RUN pip install google-cloud-aiplatform 54 | RUN pip install google-cloud-storage 55 | 56 | # Copies the trainer code to the docker image. 57 | COPY train_diffusers.py /root/train.py 58 | COPY train_dreambooth.patch /tmp/train_dreambooth.patch 59 | RUN cd /root/diffusers/; patch -p1 < /tmp/train_dreambooth.patch 60 | 61 | # Sets up the entry point to invoke the trainer. 62 | #ENTRYPOINT ["python3", "-m", "train"] 63 | 64 | 65 | -------------------------------------------------------------------------------- /Stable-Diffusion-Vertex/Kohya-lora/Dockerfile_kohya: -------------------------------------------------------------------------------- 1 | # Copyright 2023 Google LLC 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | FROM gcr.io/deeplearning-platform-release/base-gpu.py310 16 | 17 | RUN apt-get update 18 | 19 | WORKDIR /root 20 | 21 | #install sd libraries 22 | RUN git clone --recurse-submodules https://github.com/Akegarasu/lora-scripts 23 | WORKDIR lora-scripts 24 | RUN git checkout 1a483a0fd9aa1257b0bb4cfe5314054564675121 25 | 26 | WORKDIR /root 27 | #install libraries 28 | RUN pip install accelerate==0.18.0 29 | RUN pip install xformers==0.0.18 30 | RUN pip install safetensors==0.3.0 31 | RUN pip install tqdm==4.65.0 32 | RUN pip install ftfy==6.1.1 33 | RUN pip install loralib==0.1.1 34 | RUN pip install evaluate==0.4.0 35 | RUN pip install psutil==5.9.4 36 | RUN pip install PyYAML==6.0 37 | RUN pip install packaging==23.0 38 | RUN pip install transformers==4.27.4 39 | RUN pip install bitsandbytes==0.35.0 40 | RUN pip install diffusers[torch]==0.10.2 41 | RUN pip install albumentations==1.3.0 42 | RUN pip install opencv-python-headless==4.7.0.72 43 | RUN pip install einops==0.6.0 44 | RUN pip install pytorch-lightning==2.0.1 45 | RUN pip install gradio==3.25.0 46 | RUN pip install altair==4.2.2 47 | RUN pip install easygui==0.98.3 48 | RUN pip install toml==0.10.2 49 | RUN pip install voluptuous==0.13.1 50 | RUN pip install timm==0.6.13 51 | RUN pip install fairscale==0.4.13 52 | RUN pip install huggingface-hub==0.13.4 53 | RUN pip install tensorflow==2.10.1 tensorboard==2.10.1 54 | RUN pip install lion-pytorch==0.0.7 55 | RUN pip install lycoris-lora==0.1.4 56 | RUN pip install cloudml-hypertune==0.1.0.dev6 57 | #install pytorch 58 | RUN pip install torch==2.0.0+cu118 torchvision==0.15.1+cu118 --extra-index-url https://download.pytorch.org/whl/cu118 59 | 60 | # Copies the trainer code to the docker image. 61 | COPY train_kohya.py /root/train_kohya.py 62 | 63 | # Sets up the entry point to invoke the trainer. 64 | #ENTRYPOINT ["python3", "-m", "train_kohya"] 65 | -------------------------------------------------------------------------------- /Stable-Diffusion-UI-Agones/optimizated-init/attach-disk/attach-disk.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Copyright 2023 Google LLC All Rights Reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | echo $MY_NODE_NAME 17 | echo $IMAGE_NAME 18 | ZONE=$(gcloud compute instances list --filter="name:$MY_NODE_NAME" --format="value(zone)") 19 | echo $ZONE 20 | 21 | attached=$(gcloud compute instances describe $MY_NODE_NAME --zone=$ZONE | grep sd-lib-disk-) 22 | if [ "$attached" != "" ]; 23 | then 24 | echo "gke node $MY_NODE_NAME already attached a disk." 25 | exit 0 26 | fi; 27 | 28 | flag=0 29 | 30 | export PROJECT_ID=$(gcloud config get-value project) 31 | export disks=$(gcloud compute disks list --zones=$ZONE --format="value(name)" --filter="name ~ ^sd-lib-disk") 32 | echo $PROJECT_ID 33 | echo $disks 34 | disks_arr=($disks) 35 | echo $disks_arr 36 | disks_arr=($(shuf -e "${disks_arr[@]}")) 37 | 38 | for i in "${disks_arr[@]}" 39 | do 40 | echo $i 41 | disk=$(gcloud compute disks describe $i --zone=$ZONE --format="value(users)") 42 | echo $disk 43 | if [ "$disk" = "" ]; 44 | then 45 | echo "Disk $disk is Free" 46 | gcloud compute instances attach-disk ${MY_NODE_NAME} --disk=projects/$PROJECT_ID/zones/$ZONE/disks/$i --zone=$ZONE 47 | return=$? 48 | echo $return 49 | if [ "$return" -eq 0 ]; 50 | then 51 | flag=1 52 | break 53 | else 54 | echo "disk is free, but booked by another node" 55 | sleep 5 56 | continue 57 | fi; 58 | else 59 | echo "Disk $disk is Busy" 60 | fi; 61 | done 62 | 63 | if [ $flag = 0 ]; 64 | then 65 | NOW=$(date +"%Y%m%H%M%S") 66 | #gcloud compute disks create sd-lib-disk-$NOW --type=pd-balanced --size=30GB --zone=$ZONE --source-snapshot=projects/$PROJECT_ID/global/snapshots/$SNAPSHOT_NAME 67 | gcloud compute disks create sd-lib-disk-$NOW --type=pd-balanced --size=30GB --zone=$ZONE --image=$IMAGE_NAME 68 | gcloud compute instances attach-disk ${MY_NODE_NAME} --disk=projects/$PROJECT_ID/zones/$ZONE/disks/sd-lib-disk-$NOW --zone=$ZONE 69 | fi; 70 | -------------------------------------------------------------------------------- /Stable-Diffusion-UI-Agones/optimizated-init/daemonset.yaml: -------------------------------------------------------------------------------- 1 | # Copyright 2023 Google LLC 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | apiVersion: apps/v1 15 | kind: DaemonSet 16 | metadata: 17 | name: sd-ds-init-disk 18 | labels: 19 | app: sd-ds-init-disk 20 | spec: 21 | selector: 22 | matchLabels: 23 | app: sd-ds-init-disk 24 | template: 25 | metadata: 26 | labels: 27 | app: sd-ds-init-disk 28 | spec: 29 | hostPID: true 30 | tolerations: 31 | - operator: "Exists" 32 | affinity: 33 | nodeAffinity: 34 | requiredDuringSchedulingIgnoredDuringExecution: 35 | nodeSelectorTerms: 36 | - matchExpressions: 37 | - key: cloud.google.com/gke-accelerator 38 | operator: Exists 39 | # serviceAccountName: workload-identity-ksa 40 | nodeSelector: 41 | iam.gke.io/gke-metadata-server-enabled: "true" 42 | initContainers: 43 | - name: init-disk 44 | image: {region}-docker.pkg.dev/{project-id}/stable-diffusion-repo/attach-disk-image 45 | imagePullPolicy: Always 46 | command: ["bash", "-c"] 47 | args: 48 | - /attach-disk.sh 49 | env: 50 | - name: MY_NODE_NAME 51 | valueFrom: 52 | fieldRef: 53 | fieldPath: spec.nodeName 54 | - name: IMAGE_NAME 55 | value: sd-image 56 | containers: 57 | - name: sd-ds-init-disk 58 | image: gcr.io/google-containers/startup-script:v2 59 | imagePullPolicy: Always 60 | securityContext: 61 | privileged: true 62 | env: 63 | - name: STARTUP_SCRIPT 64 | value: | 65 | #!/bin/bash 66 | set -euo pipefail 67 | 68 | if [ ! -f /var/lib/runtime-lib/added-disk.txt ] 69 | then 70 | mkdir -p /var/lib/runtime-lib 71 | mount -o discard,defaults /dev/sdb /var/lib/runtime-lib 72 | sleep 10 73 | touch /var/lib/runtime-lib/added-disk.txt 74 | fi 75 | -------------------------------------------------------------------------------- /Stable-Diffusion-Vertex/hpo/kohya-lora/Dockerfile: -------------------------------------------------------------------------------- 1 | # Copyright 2023 Google LLC 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | FROM gcr.io/deeplearning-platform-release/base-gpu.py310 16 | 17 | RUN apt-get update 18 | 19 | WORKDIR /root 20 | 21 | #install sd libraries 22 | RUN git clone --recurse-submodules https://github.com/Akegarasu/lora-scripts 23 | WORKDIR lora-scripts 24 | RUN git checkout 1a483a0fd9aa1257b0bb4cfe5314054564675121 25 | RUN git submodule update 26 | 27 | WORKDIR /root 28 | #install libraries 29 | RUN pip install accelerate==0.18.0 30 | RUN pip install xformers==0.0.18 31 | RUN pip install safetensors==0.3.0 32 | RUN pip install tqdm==4.65.0 33 | RUN pip install ftfy==6.1.1 34 | RUN pip install loralib==0.1.1 35 | RUN pip install evaluate==0.4.0 36 | RUN pip install psutil==5.9.4 37 | RUN pip install PyYAML==6.0 38 | RUN pip install packaging==23.0 39 | RUN pip install transformers==4.27.4 40 | RUN pip install bitsandbytes==0.35.0 41 | RUN pip install diffusers[torch]==0.10.2 42 | RUN pip install albumentations==1.3.0 43 | RUN pip install opencv-python-headless==4.7.0.72 44 | RUN pip install einops==0.6.0 45 | RUN pip install pytorch-lightning==2.0.1 46 | RUN pip install gradio==3.25.0 47 | RUN pip install altair==4.2.2 48 | RUN pip install easygui==0.98.3 49 | RUN pip install toml==0.10.2 50 | RUN pip install voluptuous==0.13.1 51 | RUN pip install timm==0.6.13 52 | RUN pip install fairscale==0.4.13 53 | RUN pip install huggingface-hub==0.13.4 54 | RUN pip install tensorflow==2.10.1 tensorboard==2.10.1 55 | RUN pip install lion-pytorch==0.0.7 56 | RUN pip install lycoris-lora==0.1.4 57 | RUN pip install cloudml-hypertune==0.1.0.dev6 58 | #install pytorch 59 | RUN pip install torch==2.0.0+cu118 torchvision==0.15.1+cu118 --extra-index-url https://download.pytorch.org/whl/cu118 60 | 61 | # Copies the trainer code to the docker image. 62 | COPY train_kohya.py /root/train_kohya.py 63 | COPY train_network.patch /tmp/train_network.patch 64 | RUN cd /root/lora-scripts/sd-scripts/; patch -p1 < /tmp/train_network.patch 65 | # Sets up the entry point to invoke the trainer. 66 | #ENTRYPOINT ["python3", "-m", "train_kohya"] 67 | -------------------------------------------------------------------------------- /Stable-Diffusion-Vertex/Diffusers/cloud-cli.sh: -------------------------------------------------------------------------------- 1 | # Copyright 2023 Google LLC 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | # cloud build image 16 | gcloud builds submit --config cloud-build-config.yaml . 17 | 18 | # create vertex ai customer training job 19 | # args format: 20 | # --model_name: Huggingface repo id, or "/gcs/bucket_name/model_folder". I only test the models downloaded from HF, with standard diffusers format. Safetensors has not been test. 21 | # --input_storage: /gcs/bucket_name/input_image_folder 22 | # for dreambooth: just put images in the image folder 23 | # for text-to-image: put images and metadata.jsonl in the image folder 24 | # --output_storage: /gcs/bucket_name/output_folder 25 | # --prompt: a photo of XXX 26 | # --set_grads_to_none: for training dreambooth on T4 27 | # input_storage, output_storage, and prompt are required arguments 28 | gcloud ai custom-jobs create \ 29 | --region=us-central1 \ 30 | --display-name=sd-diffuser-t2i-1v100 \ 31 | --config=vertex-config-nfs.yaml \ 32 | --args="--method=diffuser_text_to_image,--model_name=CompVis/stable-diffusion-v1-4,--input_storage=/gcs/sd/input_dog_t2i,--output_storage=/gcs/sd/diffusers_t2i_output,--resolution=512,--batch_size=1,--lr=1e-4,--use_8bit=True,--max_train_steps=100" \ 33 | --command="python3,train.py" 34 | 35 | gcloud ai custom-jobs create \ 36 | --region=us-central1 \ 37 | --display-name=sd-diffuser-dreambooth-1v100 \ 38 | --config=vertex-config-nfs.yaml \ 39 | --args="--method=diffuser_dreambooth,--model_name=runwayml/stable-diffusion-v1-5,--input_storage=/gcs/sd/dog_image,--output_storage=/gcs/sd/diffusers_db_output,--prompt=a photo of sks dog,--class_prompt=a photo of dog,--num_class_images=50,--lr=1e-4,--use_8bit=True,--max_train_steps=100,--text_encoder=True,--set_grads_to_none=True" \ 40 | --command="python3,train.py" 41 | 42 | # only save the models in GCS to Filestore 43 | gcloud ai custom-jobs create \ 44 | --region=us-central1 \ 45 | --display-name=sd-diffusers \ 46 | --config=vertex-config-nfs.yaml \ 47 | --args="--output_storage=/gcs/sd/diffusers_output,--save_nfs_only=True" \ 48 | --command="python3,train.py" 49 | -------------------------------------------------------------------------------- /terraform-provision-infra/modules/agones/gcp-res/variables.tf: -------------------------------------------------------------------------------- 1 | # Copyright 2023 Google LLC 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | # Input variable definitions 16 | variable "project_id" { 17 | description = "Project ID of the cloud resource." 18 | type = string 19 | } 20 | 21 | variable "region" { 22 | description = "Region to set for gcp resource deploy." 23 | type = string 24 | } 25 | variable "filestore_zone" { 26 | description = "Zone to set for filestore nfs server, should be same zone with gke node." 27 | type = string 28 | } 29 | 30 | variable "cluster_location" { 31 | description = "gke cluster location choose a zone or region." 32 | type = string 33 | } 34 | variable "node_machine_type" { 35 | description = "gke node machine type." 36 | type = string 37 | default = "custom-12-49152-ext" 38 | } 39 | 40 | variable "accelerator_type" { 41 | description = "Get available accelerator_type from gcloud compute accelerator-types list --format='csv(zone,name)' " 42 | type = string 43 | default = "nvidia-tesla-t4" 44 | } 45 | variable "gke_num_nodes" { 46 | description = "Tags to set on the bucket." 47 | type = number 48 | default = 1 49 | } 50 | variable "skip_build_image" { 51 | description = "Choose if build images." 52 | type = bool 53 | default = false 54 | } 55 | variable "gcp_service_list" { 56 | description = "The list of apis necessary for the project" 57 | type = list(string) 58 | default = [ 59 | "artifactregistry.googleapis.com", 60 | "cloudbuild.googleapis.com", 61 | "compute.googleapis.com", 62 | "container.googleapis.com", 63 | "file.googleapis.com", 64 | "networkmanagement.googleapis.com", 65 | "memcache.googleapis.com", 66 | "cloudfunctions.googleapis.com", 67 | "cloudscheduler.googleapis.com", 68 | "iap.googleapis.com", 69 | "dns.googleapis.com", 70 | "redis.googleapis.com", 71 | "vpcaccess.googleapis.com" 72 | ] 73 | } 74 | variable "webui_dockerfile_path" { 75 | description = "Dockerfile path for webui fleet" 76 | type = string 77 | default = "" 78 | } 79 | variable "cloudfunctions_source_code_path" { 80 | description = "Dockerfile path for webui fleet" 81 | type = string 82 | default = "../Stable-Diffusion-UI-Agones/cloud-function/" 83 | } 84 | -------------------------------------------------------------------------------- /Stable-Diffusion-UI-Agones/agones/fleet_gcs.yaml: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Google LLC All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | apiVersion: "agones.dev/v1" 16 | kind: Fleet 17 | metadata: 18 | name: sd-agones-fleet 19 | spec: 20 | replicas: 1 21 | template: 22 | spec: 23 | container: simple-game-server 24 | ports: 25 | - name: default 26 | container: simple-game-server 27 | containerPort: 7654 28 | - name: sd 29 | container: stable-diffusion-webui 30 | containerPort: 7860 31 | protocol: TCP 32 | template: 33 | metadata: 34 | annotations: 35 | gke-gcsfuse/volumes: "true" 36 | gke-gcsfuse/cpu-limit: 500m 37 | gke-gcsfuse/memory-limit: 100Mi 38 | gke-gcsfuse/ephemeral-storage-limit: 50Gi 39 | spec: 40 | # serviceAccountName: gke-gcs-fuse 41 | containers: 42 | - name: simple-game-server 43 | image: us-central1-docker.pkg.dev///sd-agones-sidecar:0.1 44 | # image: us-docker.pkg.dev/agones-images/examples/simple-game-server:0.14 45 | resources: 46 | requests: 47 | memory: "64Mi" 48 | cpu: "20m" 49 | limits: 50 | memory: "64Mi" 51 | cpu: "20m" 52 | - name: stable-diffusion-webui 53 | image: us-central1-docker.pkg.dev///sd-webui:0.1 54 | # image: us-central1-docker.pkg.dev/dave-selfstudy01/hzchen-repo/sd-webui-sdxl:0.1 55 | imagePullPolicy: Always 56 | command: ["/bin/sh", "start.sh"] 57 | # https://github.com/AUTOMATIC1111/stable-diffusion-webui/discussions/6722 58 | env: 59 | - name: SAFETENSORS_FAST_GPU 60 | value: "1" 61 | - name: LD_PRELOAD 62 | value: "/usr/lib/x86_64-linux-gnu/libtcmalloc_minimal.so.4" 63 | volumeMounts: 64 | - name: stable-diffusion-storage 65 | mountPath: /sd_dir 66 | resources: 67 | limits: 68 | nvidia.com/gpu: "1" 69 | volumes: 70 | - name: stable-diffusion-storage 71 | persistentVolumeClaim: 72 | claimName: gcs-fuse-csi-static-pvc 73 | -------------------------------------------------------------------------------- /terraform-provision-infra/modules/agones/helm-agones/main.tf: -------------------------------------------------------------------------------- 1 | # Copyright 2023 Google LLC 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | terraform { 16 | required_providers { 17 | google = { 18 | source = "hashicorp/google" 19 | version = "4.63.1" 20 | } 21 | helm = { 22 | source = "hashicorp/helm" 23 | version = "~> 2.3" 24 | } 25 | } 26 | } 27 | data "google_client_config" "default" {} 28 | 29 | data "google_container_cluster" "my_cluster" { 30 | name = var.gke_cluster_name 31 | location = var.gke_cluster_location 32 | project = var.project_id 33 | } 34 | 35 | provider "kubernetes" { 36 | host = "https://${data.google_container_cluster.my_cluster.endpoint}" 37 | token = data.google_client_config.default.access_token 38 | cluster_ca_certificate = base64decode( 39 | data.google_container_cluster.my_cluster.master_auth[0].cluster_ca_certificate, 40 | ) 41 | experiments { 42 | manifest_resource = true 43 | } 44 | } 45 | 46 | provider "helm" { 47 | kubernetes { 48 | host = "https://${data.google_container_cluster.my_cluster.endpoint}" 49 | token = data.google_client_config.default.access_token 50 | cluster_ca_certificate = base64decode( 51 | data.google_container_cluster.my_cluster.master_auth[0].cluster_ca_certificate, 52 | ) 53 | } 54 | } 55 | 56 | resource "helm_release" "agones" { 57 | name = "agones" 58 | repository = "https://agones.dev/chart/stable" 59 | chart = "agones" 60 | force_update = true 61 | namespace = "agones-system" 62 | create_namespace = true 63 | version = var.agones_version 64 | values = [ 65 | file("../Stable-Diffusion-UI-Agones/agones/values.yaml") 66 | ] 67 | set { 68 | name = "agones.controller.nodeSelector.cloud\\.google\\.com/gke-nodepool" 69 | value = var.gke_cluster_nodepool 70 | type = "string" 71 | } 72 | set { 73 | name = "agones.ping.nodeSelector.cloud\\.google\\.com/gke-nodepool" 74 | value = var.gke_cluster_nodepool 75 | type = "string" 76 | } 77 | set { 78 | name = "agones.allocator.nodeSelector.cloud\\.google\\.com/gke-nodepool" 79 | value = var.gke_cluster_nodepool 80 | type = "string" 81 | } 82 | set { 83 | name = "agones.extensions.nodeSelector.cloud\\.google\\.com/gke-nodepool" 84 | value = var.gke_cluster_nodepool 85 | type = "string" 86 | } 87 | } -------------------------------------------------------------------------------- /Stable-Diffusion-UI-GKE/templates/deployment-gcs.yaml: -------------------------------------------------------------------------------- 1 | # Copyright 2023 Google LLC 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | apiVersion: apps/v1 16 | kind: Deployment 17 | metadata: 18 | name: stable-diffusion-deployment 19 | labels: 20 | app: stable-diffusion 21 | spec: 22 | replicas: 1 23 | selector: 24 | matchLabels: 25 | app: stable-diffusion 26 | template: 27 | metadata: 28 | annotations: 29 | gke-gcsfuse/volumes: "true" 30 | gke-gcsfuse/cpu-limit: 500m 31 | gke-gcsfuse/memory-limit: 100Mi 32 | gke-gcsfuse/ephemeral-storage-limit: 50Gi 33 | labels: 34 | app: stable-diffusion 35 | spec: 36 | # nodeSelector: 37 | # cloud.google.com/gke-gpu-sharing-strategy: time-sharing 38 | # cloud.google.com/gke-max-shared-clients-per-gpu: "2" 39 | affinity: 40 | nodeAffinity: 41 | preferredDuringSchedulingIgnoredDuringExecution: 42 | - weight: 1 43 | preference: 44 | matchExpressions: 45 | - key: cloud.google.com/gke-spot 46 | operator: In 47 | values: 48 | - "true" 49 | volumes: 50 | - name: stable-diffusion-storage 51 | persistentVolumeClaim: 52 | claimName: gcs-fuse-csi-static-pvc 53 | containers: 54 | - name: stable-diffusion-webui 55 | image: $image_url # e.g. us-central1-docker.pkg.dev///sd-webui:0.1 56 | imagePullPolicy: Always 57 | env: 58 | - name: SAFETENSORS_FAST_GPU 59 | value: "1" 60 | - name: LD_PRELOAD 61 | value: "/usr/lib/x86_64-linux-gnu/libtcmalloc_minimal.so.4" 62 | resources: 63 | limits: 64 | cpu: "1000m" 65 | memory: "15Gi" 66 | nvidia.com/gpu: "1" 67 | ports: 68 | - containerPort: 7860 69 | # Make sure subPath folders are already in the root dir of the bucket 70 | # If you want to mount specific path of the bucket, use --only-dir 71 | # https://cloud.google.com/kubernetes-engine/docs/how-to/persistent-volumes/cloud-storage-fuse-csi-driver#mounting-flags 72 | volumeMounts: 73 | - mountPath: "/stable-diffusion-webui/models/" 74 | name: stable-diffusion-storage 75 | subPath: models/ 76 | - mountPath: "/stable-diffusion-webui/embeddings/" 77 | name: stable-diffusion-storage 78 | subPath: embeddings/ 79 | # Follow this for service account setup, 80 | # https://github.com/GoogleCloudPlatform/gcs-fuse-csi-driver/blob/main/docs/authentication.md 81 | serviceAccountName: $k8s_serviceaccount -------------------------------------------------------------------------------- /Stable-Diffusion-UI-Agones/agones-sidecar/go.sum: -------------------------------------------------------------------------------- 1 | agones.dev/agones v1.31.0 h1:eAwBfOCq8E6iQdFeD7peY7Ekc5rez7Vk4TaMrGzRWO8= 2 | agones.dev/agones v1.31.0/go.mod h1:aoYrmyrlj4Bs0VxWoOjJTUJNGsEWcEp6ZvweIZ7esr0= 3 | github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= 4 | github.com/golang/glog v1.1.2 h1:DVjP2PbBOzHyzA+dn3WhHIq4NdVu3Q+pvivFICf/7fo= 5 | github.com/golang/protobuf v1.5.0/go.mod h1:FsONVRAS9T7sI+LIUmWTfcYkHO4aIWwzhcaSAoJOfIk= 6 | github.com/golang/protobuf v1.5.3 h1:KhyjKVUg7Usr/dYsdSqoFveMYd5ko72D+zANwlG1mmg= 7 | github.com/golang/protobuf v1.5.3/go.mod h1:XVQd3VNwM+JqD3oG2Ue2ip4fOMUkwXdXDdiuN0vRsmY= 8 | github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= 9 | github.com/google/go-cmp v0.5.9 h1:O2Tfq5qg4qc4AmwVlvv0oLiVAGB7enBSJ2x2DqQFi38= 10 | github.com/grpc-ecosystem/grpc-gateway/v2 v2.15.2 h1:gDLXvp5S9izjldquuoAhDzccbskOL6tDC5jMSyx3zxE= 11 | github.com/grpc-ecosystem/grpc-gateway/v2 v2.15.2/go.mod h1:7pdNwVWBBHGiCxa9lAszqCJMbfTISJ7oMftp8+UGV08= 12 | github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4= 13 | github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= 14 | github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= 15 | github.com/stretchr/testify v1.8.1 h1:w7B6lhMri9wdJUVmEZPGGhZzrYTPvgJArz7wNPgYKsk= 16 | golang.org/x/net v0.17.0 h1:pVaXccu2ozPjCXewfr1S7xza/zcXTity9cCdXQYSjIM= 17 | golang.org/x/net v0.17.0/go.mod h1:NxSsAGuq816PNPmqtQdLE42eU2Fs7NoRIZrHJAlaCOE= 18 | golang.org/x/sys v0.13.0 h1:Af8nKPmuFypiUBjVoU9V20FiaFXOcuZI21p0ycVYYGE= 19 | golang.org/x/sys v0.13.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= 20 | golang.org/x/text v0.13.0 h1:ablQoSUd0tRdKxZewP80B+BaqeKJuVhuRxj/dkrun3k= 21 | golang.org/x/text v0.13.0/go.mod h1:TvPlkZtksWOMsz7fbANvkp4WM8x/WCo/om8BMLbz+aE= 22 | golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= 23 | google.golang.org/genproto v0.0.0-20230822172742-b8732ec3820d h1:VBu5YqKPv6XiJ199exd8Br+Aetz+o08F+PLMnwJQHAY= 24 | google.golang.org/genproto v0.0.0-20230822172742-b8732ec3820d/go.mod h1:yZTlhN0tQnXo3h00fuXNCxJdLdIdnVFVBaRJ5LWBbw4= 25 | google.golang.org/genproto/googleapis/api v0.0.0-20230822172742-b8732ec3820d h1:DoPTO70H+bcDXcd39vOqb2viZxgqeBeSGtZ55yZU4/Q= 26 | google.golang.org/genproto/googleapis/api v0.0.0-20230822172742-b8732ec3820d/go.mod h1:KjSP20unUpOx5kyQUFa7k4OJg0qeJ7DEZflGDu2p6Bk= 27 | google.golang.org/genproto/googleapis/rpc v0.0.0-20230822172742-b8732ec3820d h1:uvYuEyMHKNt+lT4K3bN6fGswmK8qSvcreM3BwjDh+y4= 28 | google.golang.org/genproto/googleapis/rpc v0.0.0-20230822172742-b8732ec3820d/go.mod h1:+Bk1OCOj40wS2hwAMA+aCW9ypzm63QTBBHp6lQ3p+9M= 29 | google.golang.org/grpc v1.59.0 h1:Z5Iec2pjwb+LEOqzpB2MR12/eKFhDPhuqW91O+4bwUk= 30 | google.golang.org/grpc v1.59.0/go.mod h1:aUPDwccQo6OTjy7Hct4AfBPD1GptF4fyUjIkQ9YtF98= 31 | google.golang.org/protobuf v1.26.0-rc.1/go.mod h1:jlhhOSvTdKEhbULTjvd4ARK9grFBp09yW+WbY/TyQbw= 32 | google.golang.org/protobuf v1.26.0/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc= 33 | google.golang.org/protobuf v1.31.0 h1:g0LDEJHgrBl9N9r17Ru3sqWhkIx2NB67okBHPwC7hs8= 34 | google.golang.org/protobuf v1.31.0/go.mod h1:HV8QOd/L58Z+nl8r43ehVNZIU/HEI6OcFqwMG9pJV4I= 35 | gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= 36 | -------------------------------------------------------------------------------- /terraform-provision-infra/modules/agones/gcp-res/outputs.tf: -------------------------------------------------------------------------------- 1 | # Copyright 2023 Google LLC 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | output "cluster_type" { 16 | value = var.cluster_location == var.region ? "regional" : "zonal" 17 | description = "GCloud Region" 18 | } 19 | output "region" { 20 | value = var.region 21 | description = "GCloud Region" 22 | } 23 | output "gke_location" { 24 | value = var.cluster_location 25 | description = "gke location" 26 | } 27 | output "project_id" { 28 | value = var.project_id 29 | description = "GCloud Project ID" 30 | } 31 | 32 | output "kubernetes_cluster_name" { 33 | value = google_container_cluster.gke.name 34 | description = "GKE Cluster Name" 35 | } 36 | 37 | output "kubernetes_cluster_host" { 38 | value = google_container_cluster.gke.endpoint 39 | description = "GKE Cluster Host" 40 | } 41 | output "google_filestore_reserved_ip_range" { 42 | value = google_filestore_instance.instance.networks[0].ip_addresses[0] 43 | description = "google_filestore_instance reserved_ip_range" 44 | } 45 | output "gcloud_artifacts_repositories_auth_cmd" { 46 | value = "gcloud auth configure-docker ${var.region}-docker.pkg.dev" 47 | description = "repositories login gcloud example" 48 | } 49 | output "cloud_build_image_cmd_sample" { 50 | value = "gcloud builds submit --machine-type=e2-highcpu-32 --disk-size=100 --region=${var.region} -t ${var.region}-docker.pkg.dev/${var.project_id}/${google_artifact_registry_repository.sd_repo.name}/sd-webui:TAG" 51 | description = "cloud build sample command" 52 | } 53 | output "google_redis_instance_host" { 54 | value = google_redis_instance.cache.host 55 | description = "redis host" 56 | } 57 | output "gcs_function_archive_bucket" { 58 | value = google_storage_bucket.bucket.name 59 | description = "cloud function source archive bucket" 60 | } 61 | output "gcs_function_archive_object" { 62 | value = google_storage_bucket_object.archive.name 63 | description = "cloud function source archive object name" 64 | } 65 | output "webui_address_name" { 66 | value = google_compute_global_address.webui_addr.name 67 | description = "webui global static ip name" 68 | } 69 | output "webui_address" { 70 | value = google_compute_global_address.webui_addr.address 71 | description = "webui global static ip address" 72 | } 73 | output "redis_private_domain" { 74 | value = google_dns_record_set.redis_a.name 75 | description = "redis private domain" 76 | } 77 | output "gpu_nodepool_name" { 78 | value = google_container_node_pool.gpu_nodepool.name 79 | description = "gpu node pool name" 80 | } 81 | output "artifactregistry_url" { 82 | value = "${var.region}-docker.pkg.dev/${var.project_id}/${google_artifact_registry_repository.sd_repo.name}" 83 | description = "artifactregistry_url" 84 | } -------------------------------------------------------------------------------- /Stable-Diffusion-UI-Agones/sd-webui/user-watch.py: -------------------------------------------------------------------------------- 1 | # Copyright 2023 Google LLC 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | import json 15 | import shutil 16 | import requests 17 | import time 18 | import os 19 | from pathlib import Path 20 | 21 | 22 | sdk_http_port = os.environ['AGONES_SDK_HTTP_PORT'] 23 | mount_dir = '/sd_dir' 24 | 25 | if os.path.isdir('/stable-diffusion-webui/models'): 26 | shutil.rmtree('/stable-diffusion-webui/models') 27 | 28 | os.symlink(os.path.join(mount_dir, 'models'), '/stable-diffusion-webui/models', target_is_directory = True) 29 | 30 | if os.path.isdir('/stable-diffusion-webui/embeddings'): 31 | shutil.rmtree('/stable-diffusion-webui/embeddings') 32 | 33 | os.symlink(os.path.join(mount_dir, 'embeddings'), '/stable-diffusion-webui/embeddings', target_is_directory = True) 34 | 35 | url = 'http://localhost:' + sdk_http_port + '/watch/gameserver' 36 | 37 | time.sleep(30) 38 | 39 | r = requests.get(url, stream=True) 40 | 41 | if r.encoding is None: 42 | r.encoding = 'utf-8' 43 | 44 | for line in r.iter_lines(decode_unicode=True): 45 | if line: 46 | response = json.loads(line) 47 | if "user" in response['result']['object_meta']['labels']: 48 | userid = response['result']['object_meta']['labels']['user'] 49 | print(userid) 50 | # setup folders here 51 | if os.path.isdir('/stable-diffusion-webui/outputs'): 52 | shutil.rmtree('/stable-diffusion-webui/outputs') 53 | # if os.path.isdir('/stable-diffusion-webui/models'): 54 | # shutil.rmtree('/stable-diffusion-webui/models') 55 | 56 | Path(os.path.join(mount_dir, userid, 'outputs')).mkdir(parents=True, exist_ok=True) 57 | Path(os.path.join(mount_dir, userid, 'inputs')).mkdir(parents=True, exist_ok=True) 58 | 59 | os.symlink(os.path.join(mount_dir, userid, 'outputs'), '/stable-diffusion-webui/outputs', target_is_directory = True) 60 | os.symlink(os.path.join(mount_dir, userid, 'inputs'), '/stable-diffusion-webui/inputs', target_is_directory = True) 61 | # os.symlink(os.path.join(mount_dir, 'models'), '/stable-diffusion-webui/models', target_is_directory = True) 62 | 63 | # webui config files 64 | # Path(os.path.join(mount_dir, userid, 'ui-configs')).mkdir(parents=True, exist_ok=True) 65 | 66 | # config_files_to_link = ['ui-config.json', 'config.json'] 67 | 68 | # for file in config_files_to_link: 69 | # file_origin = os.path.join('/stable-diffusion-webui', file) 70 | # file_dest = os.path.join(mount_dir, userid, 'ui-configs', file) 71 | # if os.path.isfile(file_origin) and not os.path.isfile(file_dest): 72 | # shutil.copy2(file_origin, file_dest) 73 | # if os.path.isfile(file_dest): 74 | # os.remove(file_origin) 75 | # os.symlink(file_dest, file_origin, target_is_directory = False) 76 | 77 | break -------------------------------------------------------------------------------- /Stable-Diffusion-UI-Agones/cloud-function/main.py: -------------------------------------------------------------------------------- 1 | # Copyright 2023 Google LLC 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | from flask import escape 15 | import functions_framework 16 | import redis 17 | import time 18 | import os 19 | import socket 20 | 21 | @functions_framework.http 22 | def redis_http(request): 23 | redis_host = os.getenv("REDIS_HOST", "127.0.0.1") 24 | time_interval = os.getenv("TIME_INTERVAL", 900) 25 | time_interval = int(time_interval) 26 | 27 | if redis_host == "127.0.0.1": 28 | print("please correct your redis_host setting!") 29 | return "please correct your redis_host setting!" 30 | 31 | client = redis.StrictRedis(host=redis_host) 32 | cursor = '0' 33 | 34 | MESSAGE = "EXIT" 35 | 36 | while cursor != 0: 37 | try: 38 | cursor, keys = client.scan(cursor=cursor) 39 | except Exception as e: 40 | print("please check your redis connection setting!") 41 | return "please check your redis connection setting!" 42 | 43 | for key in keys: 44 | result = client.hgetall(key) 45 | last_access = int(result[b'lastaccess'].decode('utf-8')) 46 | current_time = int(time.time()) 47 | if current_time - last_access >= time_interval: 48 | try: 49 | host_info = result[b'port'].decode('utf-8').split(":") 50 | UDP_IP = host_info[0] 51 | UDP_PORT = host_info[1] 52 | sock = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) # 53 | loop = 0 54 | while loop < 3: 55 | sock.sendto(bytes(MESSAGE, "utf-8"), (UDP_IP, int(UDP_PORT))) 56 | sock.settimeout(0.5) 57 | try: 58 | data, address = sock.recvfrom(1024) 59 | except socket.timeout: 60 | print("timeout to close runtime on {}:{}! please check your firewall config!".format(UDP_IP, UDP_PORT)) 61 | loop = loop + 1 62 | if loop == 3: 63 | sock.close() 64 | continue 65 | if MESSAGE in data.decode('utf-8'): 66 | print("successed to close runtime on {}:{}!".format(UDP_IP, UDP_PORT)) 67 | sock.close() 68 | break 69 | else: 70 | loop = loop + 1 71 | except Exception as e: 72 | print(e) 73 | print("failed to close runtime on {}:{}!".format(UDP_IP, UDP_PORT)) 74 | return "failed to close runtime on {}:{}!".format(UDP_IP, UDP_PORT) 75 | try: 76 | client.delete(key) 77 | except Exception as e: 78 | print(e) 79 | print("failed to clear key {}!".format(key)) 80 | return "failed to clear key {}!".format(key) 81 | return "success tracking!" 82 | -------------------------------------------------------------------------------- /terraform-provision-infra/main.tf: -------------------------------------------------------------------------------- 1 | # Copyright 2023 Google LLC 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | 16 | locals { 17 | project_id = "PROJECT_ID" 18 | oauth_client_id = "OAUTH_CLIENT_ID" 19 | oauth_client_secret = "OAUTH_CLIENT_SECRET" 20 | sd_webui_domain = "YOUR_OWNED_CUSTOM_DOMAIN_OR_SUBDOMAIN" 21 | region = "us-central1" 22 | filestore_zone = "us-central1-f" # Filestore location must be same region or zone with gke 23 | cluster_location = "us-central1-f" # GKE Cluster location 24 | node_machine_type = "custom-12-49152-ext" 25 | accelerator_type = "nvidia-tesla-t4" # Available accelerator_type from gcloud compute accelerator-types list --format='csv(zone,name)' 26 | gke_num_nodes = 1 27 | } 28 | 29 | #[Agones version] 30 | 31 | module "agones_gcp_res" { 32 | source = "./modules/agones/gcp-res" 33 | project_id = local.project_id 34 | region = local.region 35 | filestore_zone = local.filestore_zone 36 | cluster_location = local.cluster_location 37 | node_machine_type = local.node_machine_type 38 | accelerator_type = local.accelerator_type 39 | gke_num_nodes = local.gke_num_nodes 40 | cloudfunctions_source_code_path = "../Stable-Diffusion-UI-Agones/cloud-function/" 41 | } 42 | 43 | module "agones_build_image" { 44 | source = "./modules/agones/cloud-build" 45 | artifact_registry = module.agones_gcp_res.artifactregistry_url 46 | } 47 | 48 | module "helm_agones" { 49 | source = "./modules/agones/helm-agones" 50 | project_id = local.project_id 51 | gke_cluster_name = module.agones_gcp_res.kubernetes_cluster_name 52 | gke_cluster_location = module.agones_gcp_res.gke_location 53 | agones_version = "1.32.0" 54 | } 55 | 56 | module "agones_k8s_res" { 57 | source = "./modules/agones/k8s-res" 58 | project_id = local.project_id 59 | oauth_client_id = local.oauth_client_id 60 | oauth_client_secret = local.oauth_client_secret 61 | sd_webui_domain = local.sd_webui_domain 62 | gke_cluster_name = module.agones_gcp_res.kubernetes_cluster_name 63 | gke_cluster_location = module.agones_gcp_res.gke_location 64 | google_filestore_reserved_ip_range = module.agones_gcp_res.google_filestore_reserved_ip_range 65 | webui_address_name = module.agones_gcp_res.webui_address_name 66 | nginx_image_url = module.agones_build_image.nginx_image 67 | webui_image_url = module.agones_build_image.webui_image 68 | game_server_image_url = module.agones_build_image.game_server_image 69 | } 70 | 71 | output "webui_ingress_address" { 72 | value = module.agones_gcp_res.webui_address 73 | description = "webui ip address for ingress" 74 | } 75 | 76 | #[Agones version]# 77 | 78 | 79 | #[GKE version start]# 80 | 81 | #module "nonagones_gcp_res" { 82 | # source = "./modules/nonagones/gcp-res" 83 | # project_id = local.project_id 84 | # region = local.region 85 | # filestore_zone = local.filestore_zone 86 | # cluster_location = local.cluster_location 87 | # node_machine_type = local.node_machine_type 88 | # accelerator_type = local.accelerator_type 89 | # gke_num_nodes = local.gke_num_nodes 90 | #} 91 | # 92 | #module "nonagones_build_image" { 93 | # source = "./modules/nonagones/cloud-build" 94 | # artifact_registry = module.nonagones_gcp_res.artifactregistry_url 95 | #} 96 | # 97 | #module "nonagones_k8s_res" { 98 | # source = "./modules/nonagones/k8s-res" 99 | # project_id = local.project_id 100 | # gke_cluster_name = module.nonagones_gcp_res.kubernetes_cluster_name 101 | # gke_cluster_location = local.cluster_location 102 | # gke_cluster_nodepool = module.nonagones_gcp_res.gpu_nodepool_name 103 | # google_filestore_reserved_ip_range = module.nonagones_gcp_res.google_filestore_reserved_ip_range 104 | # webui_image_url = module.nonagones_build_image.webui_image 105 | #} 106 | 107 | #[GKE version ] -------------------------------------------------------------------------------- /Stable-Diffusion-UI-Agones/nginx/sd.lua: -------------------------------------------------------------------------------- 1 | -- Copyright 2023 Google LLC 2 | -- 3 | -- Licensed under the Apache License, Version 2.0 (the "License"); 4 | -- you may not use this file except in compliance with the License. 5 | -- You may obtain a copy of the License at 6 | -- 7 | -- http://www.apache.org/licenses/LICENSE-2.0 8 | -- 9 | -- Unless required by applicable law or agreed to in writing, software 10 | -- distributed under the License is distributed on an "AS IS" BASIS, 11 | -- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | -- See the License for the specific language governing permissions and 13 | -- limitations under the License. 14 | local headers = ngx.req.get_headers() 15 | local key = headers["x-goog-authenticated-user-email"] 16 | -- print(key) 17 | 18 | if not key then 19 | if headers["user-agent"] == "GoogleHC/1.0" then 20 | ngx.log(ngx.INFO, "health check success!") 21 | ngx.say("health check success!") 22 | return ngx.exit(200) 23 | end 24 | ngx.log(ngx.ERR, "no iap user identity found") 25 | ngx.status = 400 26 | ngx.say("fail to fetch user identity!") 27 | return ngx.exit(400) 28 | end 29 | 30 | local redis = require "resty.redis" 31 | local red = redis:new() 32 | 33 | red:set_timeout(1000) -- 1 second 34 | local ok, err = red:connect("${REDIS_HOST}", 6379) 35 | if not ok then 36 | ngx.log(ngx.ERR, "failed to connect to redis: ", err) 37 | ngx.status = 500 38 | ngx.say("failed to connect to redis!") 39 | return ngx.exit(500) 40 | end 41 | 42 | local secs = ngx.time() 43 | 44 | local lookup_res, err = red:hget(key, "target") 45 | print(lookup_res) 46 | 47 | if lookup_res == ngx.null then 48 | local http = require "resty.http" 49 | local httpc = http.new() 50 | ngx.log(ngx.INFO, [[{"namespace": "default", "metadata": {"labels": {"user": "]] .. key .. [["}}}]]) 51 | local sub_key = string.gsub(key, ":", ".") 52 | local final_uid = string.gsub(sub_key, "@", ".") 53 | local res, err = httpc:request_uri( 54 | "http://agones-allocator.agones-system.svc.cluster.local:443/gameserverallocation", 55 | { 56 | method = "POST", 57 | body = [[{"namespace": "default", "metadata": {"labels": {"user": "]] .. final_uid .. [["}}}]], 58 | } 59 | ) 60 | 61 | local cjson = require "cjson" 62 | local resp_data = cjson.decode(res.body) 63 | local host = resp_data["address"] 64 | if host == nil then 65 | ngx.header.content_type = "text/html" 66 | ngx.say([[

      Too many users, Please try later! We are cooking for you!

Take a cup of coffea]]) 67 | return 68 | end 69 | 70 | local sd_port = resp_data["ports"][2]["port"] 71 | local gs_port = resp_data["ports"][1]["port"] 72 | 73 | if string.match(host, "internal") ~= nil then 74 | local resolver = require "resty.dns.resolver" 75 | local dns = "169.254.169.254" 76 | 77 | local r, err = resolver:new{ 78 | nameservers = {dns}, 79 | retrans = 3, -- 3 retransmissions on receive timeout 80 | timeout = 1000, -- 1 sec 81 | } 82 | 83 | if not r then 84 | ngx.log(ngx.ERR, "failed to instantiate the resolver!") 85 | ngx.status = 400 86 | ngx.say("failed to instantiate the resolver!") 87 | return ngx.exit(400) 88 | end 89 | 90 | local answers, err = r:query(host) 91 | if not answers then 92 | ngx.log(ngx.ERR, "failed to query the DNS server!") 93 | ngx.status = 400 94 | ngx.say("failed to query the DNS server!") 95 | return ngx.exit(400) 96 | end 97 | 98 | if answers.errcode then 99 | ngx.log(ngx.ERR, "dns server returned error code!") 100 | ngx.status = 400 101 | ngx.say("dns server returned error code!") 102 | return ngx.exit(400) 103 | end 104 | 105 | for i, ans in ipairs(answers) do 106 | if ans.address then 107 | ngx.log(ngx.INFO, ans.address) 108 | host = ans.address 109 | end 110 | end 111 | end 112 | 113 | ngx.var.target = host .. ":" .. sd_port 114 | ngx.log(ngx.INFO, "set redis ", ngx.var.target) 115 | -- print("set redis ", ngx.var.target) 116 | 117 | ok, err = red:hset(key, "target", ngx.var.target, "port", host .. ":" .. gs_port, "lastaccess", secs) 118 | if not ok then 119 | -- print("fail to set redis key") 120 | ngx.log(ngx.ERR, "failed to hset: ", err) 121 | ngx.say("failed to hset: ", err) 122 | return 123 | end 124 | else 125 | ngx.var.target = lookup_res 126 | ok, err = red:hset(key, "lastaccess", secs) 127 | if not ok then 128 | -- print("fail to set redis key") 129 | ngx.log(ngx.ERR, "failed to hset: ", err) 130 | ngx.say("failed to hset: ", err) 131 | return 132 | end 133 | end 134 | -------------------------------------------------------------------------------- /Stable-Diffusion-UI-GKE/docker/Dockerfile-miniconda: -------------------------------------------------------------------------------- 1 | FROM nvidia/cuda:11.8.0-runtime-ubuntu22.04 2 | 3 | RUN set -ex && \ 4 | apt update && \ 5 | apt install -y wget git python3 python3-venv python3-pip libglib2.0-0 pkg-config libcairo2-dev libxext6 libtcmalloc-minimal4 && \ 6 | rm -rf /var/lib/apt/lists/* 7 | 8 | ENV PATH="/root/miniconda3/bin:${PATH}" 9 | ARG PATH="/root/miniconda3/bin:${PATH}" 10 | RUN wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh \ 11 | && mkdir /root/.conda \ 12 | && bash Miniconda3-latest-Linux-x86_64.sh -b \ 13 | && rm -f Miniconda3-latest-Linux-x86_64.sh \ 14 | && echo "Running $(conda --version)" && \ 15 | conda init bash && \ 16 | . /root/.bashrc && \ 17 | conda update conda && \ 18 | conda install -y mamba -c conda-forge && \ 19 | conda create -n sd-webui && \ 20 | conda activate sd-webui && \ 21 | conda install python=3.10 pip 22 | 23 | RUN echo 'conda activate sd-webui' >> /root/.bashrc 24 | 25 | RUN . /root/.bashrc \ 26 | && python3 -m pip install torch torchvision --index-url https://download.pytorch.org/whl/cu118 --prefer-binary \ 27 | && python3 -m pip install git+https://github.com/TencentARC/GFPGAN.git@8d2447a2d918f8eba5a4a01463fd48e45126a379 --prefer-binary \ 28 | && python3 -m pip install git+https://github.com/openai/CLIP.git@d50d76daa670286dd6cacf3bcd80b5e4823fc8e1 --prefer-binary \ 29 | && python3 -m pip install git+https://github.com/mlfoundations/open_clip.git@bb6e834e9c70d9c27d0dc3ecedeebeaeb1ffad6b --prefer-binary \ 30 | && python3 -m pip install --pre xformers --prefer-binary \ 31 | && python3 -m pip install pyngrok --prefer-binary \ 32 | && git -C https://github.com/AUTOMATIC1111/stable-diffusion-webui.git checkout 89f9faa63388756314e8a1d96cf86bf5e0663045 \ 33 | && git clone https://github.com/Stability-AI/stablediffusion.git /stable-diffusion-webui/repositories/stable-diffusion-stability-ai \ 34 | && git -C /stable-diffusion-webui/repositories/stable-diffusion-stability-ai checkout cf1d67a6fd5ea1aa600c4df58e5b47da45f6bdbf \ 35 | && git clone https://github.com/CompVis/taming-transformers.git /stable-diffusion-webui/repositories/taming-transformers \ 36 | && git -C /stable-diffusion-webui/repositories/taming-transformers checkout 24268930bf1dce879235a7fddd0b2355b84d7ea6 \ 37 | && git clone https://github.com/crowsonkb/k-diffusion.git /stable-diffusion-webui/repositories/k-diffusion \ 38 | && git -C /stable-diffusion-webui/repositories/k-diffusion checkout c9fe758757e022f05ca5a53fa8fac28889e4f1cf \ 39 | && git clone https://github.com/sczhou/CodeFormer.git /stable-diffusion-webui/repositories/CodeFormer \ 40 | && git -C /stable-diffusion-webui/repositories/CodeFormer checkout c5b4593074ba6214284d6acd5f1719b6c5d739af \ 41 | && git clone https://github.com/salesforce/BLIP.git /stable-diffusion-webui/repositories/BLIP \ 42 | && git -C /stable-diffusion-webui/repositories/BLIP checkout 48211a1594f1321b00f14c9f7a5b4813144b2fb9 \ 43 | && python3 -m pip install -r /stable-diffusion-webui/repositories/CodeFormer/requirements.txt --prefer-binary \ 44 | && python3 -m pip install -r /stable-diffusion-webui/requirements_versions.txt --prefer-binary 45 | 46 | RUN set -ex && cd stable-diffusion-webui \ 47 | && git clone https://github.com/kohya-ss/sd-webui-additional-networks.git extensions/sd-webui-additional-networks \ 48 | && git clone https://github.com/hnmr293/sd-webui-cutoff.git extensions/sd-webui-cutoff \ 49 | && git clone https://github.com/toriato/stable-diffusion-webui-wd14-tagger.git extensions/stable-diffusion-webui-wd14-tagger \ 50 | && git clone https://github.com/DominikDoom/a1111-sd-webui-tagcomplete.git extensions/a1111-sd-webui-tagcomplete \ 51 | && git clone https://github.com/Mikubill/sd-webui-controlnet.git extensions/sd-webui-controlnet \ 52 | && git -C https://github.com/zanllp/sd-webui-infinite-image-browsing.git extensions/sd-webui-infinite-image-browsing checkout 4720b15126c3ee05e97d50108050d1178e50639b \ 53 | && git clone https://github.com/aria1th/Hypernetwork-MonkeyPatch-Extension.git extensions/Hypernetwork-MonkeyPatch-Extension \ 54 | && git clone https://github.com/butaixianran/Stable-Diffusion-Webui-Civitai-Helper.git extensions/Stable-Diffusion-Webui-Civitai-Helper \ 55 | && git clone https://github.com/ilian6806/stable-diffusion-webui-state.git extensions/stable-diffusion-webui-state \ 56 | && git clone https://github.com/vladmandic/sd-extension-system-info extensions/sd-extension-system-info \ 57 | && git clone https://github.com/ArtVentureX/sd-webui-agent-scheduler.git extensions/agent-scheduler 58 | 59 | RUN . /root/.bashrc \ 60 | && python3 -m pip install -r /stable-diffusion-webui/extensions/sd-webui-controlnet/requirements.txt --prefer-binary 61 | && python3 -m pip install sqlalchemy --prefer-binary 62 | 63 | # COPY extensions/ /stable-diffusion-webui/extensions/ 64 | # COPY user-watch.py /stable-diffusion-webui/user-watch.py 65 | # COPY start.sh /stable-diffusion-webui/start.sh 66 | COPY *.json /stable-diffusion-webui/ 67 | 68 | ENV SAFETENSORS_FAST_GPU=1 69 | ENV LD_PRELOAD="/usr/lib/x86_64-linux-gnu/libtcmalloc_minimal.so.4" 70 | 71 | EXPOSE 7860 72 | 73 | WORKDIR /stable-diffusion-webui/ 74 | CMD ["conda", "run", "--no-capture-output", "-n", "sd-webui", "python3", "launch.py", "--listen", "--xformers", "--medvram", "--enable-insecure-extension-access"] -------------------------------------------------------------------------------- /Stable-Diffusion-Vertex/Workbench/diffusers_nbexecutor.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "id": "644ad25f", 7 | "metadata": {}, 8 | "outputs": [], 9 | "source": [ 10 | "# Copyright 2022 Google LLC\n", 11 | "#\n", 12 | "# Licensed under the Apache License, Version 2.0 (the \"License\");\n", 13 | "# you may not use this file except in compliance with the License.\n", 14 | "# You may obtain a copy of the License at\n", 15 | "#\n", 16 | "# https://www.apache.org/licenses/LICENSE-2.0\n", 17 | "#\n", 18 | "# Unless required by applicable law or agreed to in writing, software\n", 19 | "# distributed under the License is distributed on an \"AS IS\" BASIS,\n", 20 | "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n", 21 | "# See the License for the specific language governing permissions and\n", 22 | "# limitations under the License." 23 | ] 24 | }, 25 | { 26 | "attachments": {}, 27 | "cell_type": "markdown", 28 | "id": "7abfd671", 29 | "metadata": {}, 30 | "source": [ 31 | "## Write training code here and Click \"Execute\" for a workbench execute job\n", 32 | "- Use custom container built in Cloud Build and stored in Artifact Registry\n", 33 | "- Cloud Build command: gcloud builds submit --config cloud-build.yaml .\n", 34 | "- input and output directory can be /gcs/bucket_name/folder for Cloud Storage path" 35 | ] 36 | }, 37 | { 38 | "cell_type": "code", 39 | "execution_count": null, 40 | "id": "2243ab9f-d7db-4db5-836a-154d9616a628", 41 | "metadata": { 42 | "id": "2243ab9f-d7db-4db5-836a-154d9616a628" 43 | }, 44 | "outputs": [], 45 | "source": [ 46 | "MODEL_NAME=\"runwayml/stable-diffusion-v1-5\"\n", 47 | "INSTANCE_DIR=\"/gcs/bucket_name/input_dog\"\n", 48 | "OUTPUT_DIR=\"/gcs/bucket_name/dog_lora_output\"\n", 49 | "\n", 50 | "! accelerate launch ./diffusers/examples/dreambooth/train_dreambooth_lora.py \\\n", 51 | " --pretrained_model_name_or_path=$MODEL_NAME \\\n", 52 | " --instance_data_dir=$INSTANCE_DIR \\\n", 53 | " --output_dir=$OUTPUT_DIR \\\n", 54 | " --instance_prompt=\"a photo of sks dog\" \\\n", 55 | " --resolution=512 \\\n", 56 | " --train_batch_size=1 \\\n", 57 | " --use_8bit_adam \\\n", 58 | " --mixed_precision=\"fp16\" \\\n", 59 | " --gradient_accumulation_steps=1 \\\n", 60 | " --learning_rate=1e-4 \\\n", 61 | " --lr_scheduler=\"constant\" \\\n", 62 | " --lr_warmup_steps=0 \\\n", 63 | " --max_train_steps=500" 64 | ] 65 | }, 66 | { 67 | "cell_type": "markdown", 68 | "id": "O25rkc78ggqL", 69 | "metadata": { 70 | "id": "O25rkc78ggqL" 71 | }, 72 | "source": [ 73 | "Convert the lora .bin file to safetensor file, for used in WebUI" 74 | ] 75 | }, 76 | { 77 | "cell_type": "code", 78 | "execution_count": null, 79 | "id": "d52e7698-122a-4864-ad8c-55d4562c2a94", 80 | "metadata": { 81 | "id": "d52e7698-122a-4864-ad8c-55d4562c2a94" 82 | }, 83 | "outputs": [], 84 | "source": [ 85 | "import os;\n", 86 | "import re;\n", 87 | "import torch;\n", 88 | "from safetensors.torch import save_file;\n", 89 | "\n", 90 | "newDict = dict();\n", 91 | "checkpoint = torch.load(OUTPUT_DIR + '/pytorch_lora_weights.bin');\n", 92 | "for idx, key in enumerate(checkpoint):\n", 93 | " newKey = re.sub('\\.processor\\.', '_', key);\n", 94 | " newKey = re.sub('mid_block\\.', 'mid_block_', newKey);\n", 95 | " newKey = re.sub('_lora.up.', '.lora_up.', newKey);\n", 96 | " newKey = re.sub('_lora.down.', '.lora_down.', newKey);\n", 97 | " newKey = re.sub('\\.(\\d+)\\.', '_\\\\1_', newKey);\n", 98 | " newKey = re.sub('to_out', 'to_out_0', newKey);\n", 99 | " newKey = 'lora_unet_'+newKey;\n", 100 | "\n", 101 | " newDict[newKey] = checkpoint[key];\n", 102 | "\n", 103 | "newLoraName = 'pytorch_lora_weights.safetensors';\n", 104 | "print(\"Saving \" + newLoraName);\n", 105 | "save_file(newDict, OUTPUT_DIR + '/' + newLoraName);" 106 | ] 107 | }, 108 | { 109 | "attachments": {}, 110 | "cell_type": "markdown", 111 | "id": "bd0880a8", 112 | "metadata": {}, 113 | "source": [ 114 | "***It's supported to configure NFS using Executor*" 115 | ] 116 | } 117 | ], 118 | "metadata": { 119 | "colab": { 120 | "provenance": [] 121 | }, 122 | "gpuClass": "standard", 123 | "kernelspec": { 124 | "display_name": "Pytorch (Local)", 125 | "language": "python", 126 | "name": "local-pytorch" 127 | }, 128 | "language_info": { 129 | "codemirror_mode": { 130 | "name": "ipython", 131 | "version": 3 132 | }, 133 | "file_extension": ".py", 134 | "mimetype": "text/x-python", 135 | "name": "python", 136 | "nbconvert_exporter": "python", 137 | "pygments_lexer": "ipython3", 138 | "version": "3.7.12" 139 | } 140 | }, 141 | "nbformat": 4, 142 | "nbformat_minor": 5 143 | } 144 | -------------------------------------------------------------------------------- /Stable-Diffusion-UI-GKE/docker/Dockerfile: -------------------------------------------------------------------------------- 1 | # Copyright 2023 Google LLC 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | FROM nvidia/cuda:11.8.0-runtime-ubuntu22.04 16 | # FROM nvidia/cuda:11.8.0-devel-ubuntu22.04 17 | 18 | RUN set -ex && \ 19 | apt update && \ 20 | apt install -y wget git python3 python3-venv python3-pip libglib2.0-0 pkg-config libcairo2-dev ffmpeg libsm6 libxext6 libtcmalloc-minimal4 && \ 21 | rm -rf /var/lib/apt/lists/* 22 | 23 | RUN python3 -m pip install torch==2.0.1 torchvision==0.15.2 --extra-index-url https://download.pytorch.org/whl/cu118 --prefer-binary 24 | RUN python3 -m pip install git+https://github.com/TencentARC/GFPGAN.git@8d2447a2d918f8eba5a4a01463fd48e45126a379 --prefer-binary 25 | RUN python3 -m pip install git+https://github.com/openai/CLIP.git@d50d76daa670286dd6cacf3bcd80b5e4823fc8e1 --prefer-binary 26 | RUN python3 -m pip install git+https://github.com/mlfoundations/open_clip.git@bb6e834e9c70d9c27d0dc3ecedeebeaeb1ffad6b --prefer-binary 27 | RUN python3 -m pip install xformers==0.0.20 --prefer-binary 28 | RUN python3 -m pip install pyngrok --prefer-binary 29 | RUN git clone https://github.com/AUTOMATIC1111/stable-diffusion-webui.git 30 | RUN git -C /stable-diffusion-webui reset --hard 5ef669de080814067961f28357256e8fe27544f4 31 | RUN git clone https://github.com/Stability-AI/stablediffusion.git /stable-diffusion-webui/repositories/stable-diffusion-stability-ai 32 | RUN git -C /stable-diffusion-webui/repositories/stable-diffusion-stability-ai reset --hard cf1d67a6fd5ea1aa600c4df58e5b47da45f6bdbf 33 | RUN git clone https://github.com/Stability-AI/generative-models.git /stable-diffusion-webui/repositories/generative-models 34 | RUN git -C /stable-diffusion-webui/repositories/generative-models reset --hard 45c443b316737a4ab6e40413d7794a7f5657c19f 35 | RUN git clone https://github.com/CompVis/taming-transformers.git /stable-diffusion-webui/repositories/taming-transformers 36 | RUN git -C /stable-diffusion-webui/repositories/taming-transformers reset --hard 24268930bf1dce879235a7fddd0b2355b84d7ea6 37 | RUN git clone https://github.com/crowsonkb/k-diffusion.git /stable-diffusion-webui/repositories/k-diffusion 38 | RUN git -C /stable-diffusion-webui/repositories/k-diffusion reset --hard ab527a9a6d347f364e3d185ba6d714e22d80cb3c 39 | RUN git clone https://github.com/sczhou/CodeFormer.git /stable-diffusion-webui/repositories/CodeFormer 40 | RUN git -C /stable-diffusion-webui/repositories/CodeFormer reset --hard c5b4593074ba6214284d6acd5f1719b6c5d739af 41 | RUN git clone https://github.com/salesforce/BLIP.git /stable-diffusion-webui/repositories/BLIP 42 | RUN git -C /stable-diffusion-webui/repositories/BLIP reset --hard 48211a1594f1321b00f14c9f7a5b4813144b2fb9 43 | RUN python3 -m pip install -r /stable-diffusion-webui/repositories/CodeFormer/requirements.txt --prefer-binary 44 | RUN python3 -m pip install -r /stable-diffusion-webui/requirements_versions.txt --prefer-binary 45 | RUN python3 -m pip install sqlalchemy --prefer-binary 46 | 47 | RUN set -ex && cd stable-diffusion-webui \ 48 | && git clone https://github.com/kohya-ss/sd-webui-additional-networks.git extensions/sd-webui-additional-networks \ 49 | && git clone https://github.com/hnmr293/sd-webui-cutoff.git extensions/sd-webui-cutoff \ 50 | && git clone https://github.com/DominikDoom/a1111-sd-webui-tagcomplete.git extensions/a1111-sd-webui-tagcomplete \ 51 | && git clone https://github.com/Mikubill/sd-webui-controlnet.git extensions/sd-webui-controlnet \ 52 | && git clone https://github.com/zanllp/sd-webui-infinite-image-browsing.git extensions/sd-webui-infinite-image-browsing \ 53 | && git -C /stable-diffusion-webui/extensions/sd-webui-infinite-image-browsing reset --hard 4720b15126c3ee05e97d50108050d1178e50639b \ 54 | && git clone https://github.com/aria1th/Hypernetwork-MonkeyPatch-Extension.git extensions/Hypernetwork-MonkeyPatch-Extension \ 55 | && git clone https://github.com/butaixianran/Stable-Diffusion-Webui-Civitai-Helper.git extensions/Stable-Diffusion-Webui-Civitai-Helper \ 56 | && git clone https://github.com/ilian6806/stable-diffusion-webui-state.git extensions/stable-diffusion-webui-state \ 57 | && git clone https://github.com/vladmandic/sd-extension-system-info extensions/sd-extension-system-info \ 58 | && git clone https://github.com/ArtVentureX/sd-webui-agent-scheduler.git extensions/agent-scheduler 59 | # && git clone https://github.com/continue-revolution/sd-webui-segment-anything extensions/sd-webui-segment-anything 60 | 61 | RUN python3 -m pip install -r /stable-diffusion-webui/extensions/sd-webui-controlnet/requirements.txt --prefer-binary 62 | # RUN python3 -m pip install -r /stable-diffusion-webui/extensions/sd-webui-segment-anything/requirements.txt --prefer-binary 63 | 64 | ENV SAFETENSORS_FAST_GPU=1 65 | ENV LD_PRELOAD="/usr/lib/x86_64-linux-gnu/libtcmalloc_minimal.so.4" 66 | 67 | EXPOSE 7860 68 | 69 | #COPY extensions/ /stable-diffusion-webui/extensions/ 70 | #COPY user-watch.py /stable-diffusion-webui/user-watch.py 71 | #COPY start.sh /stable-diffusion-webui/start.sh 72 | COPY *.json /stable-diffusion-webui/ 73 | 74 | WORKDIR /stable-diffusion-webui/ 75 | CMD ["python3", "webui.py", "--listen", "--opt-sdp-attention", "--enable-insecure-extension-access", "--api"] 76 | -------------------------------------------------------------------------------- /Stable-Diffusion-UI-Agones/sd-webui/Dockerfile: -------------------------------------------------------------------------------- 1 | # Copyright 2023 Google LLC 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | FROM nvidia/cuda:11.8.0-runtime-ubuntu22.04 16 | # FROM nvidia/cuda:11.8.0-devel-ubuntu22.04 17 | 18 | RUN set -ex && \ 19 | apt update && \ 20 | apt install -y wget git python3 python3-venv python3-pip libglib2.0-0 pkg-config libcairo2-dev ffmpeg libsm6 libxext6 libtcmalloc-minimal4 && \ 21 | rm -rf /var/lib/apt/lists/* 22 | 23 | RUN python3 -m pip install torch==2.0.1 torchvision==0.15.2 --extra-index-url https://download.pytorch.org/whl/cu118 --prefer-binary 24 | RUN python3 -m pip install git+https://github.com/TencentARC/GFPGAN.git@8d2447a2d918f8eba5a4a01463fd48e45126a379 --prefer-binary 25 | RUN python3 -m pip install git+https://github.com/openai/CLIP.git@d50d76daa670286dd6cacf3bcd80b5e4823fc8e1 --prefer-binary 26 | RUN python3 -m pip install git+https://github.com/mlfoundations/open_clip.git@bb6e834e9c70d9c27d0dc3ecedeebeaeb1ffad6b --prefer-binary 27 | RUN python3 -m pip install xformers==0.0.20 --prefer-binary 28 | RUN python3 -m pip install pyngrok --prefer-binary 29 | RUN git clone https://github.com/AUTOMATIC1111/stable-diffusion-webui.git 30 | RUN git -C /stable-diffusion-webui reset --hard 5ef669de080814067961f28357256e8fe27544f4 31 | RUN git clone https://github.com/Stability-AI/stablediffusion.git /stable-diffusion-webui/repositories/stable-diffusion-stability-ai 32 | RUN git -C /stable-diffusion-webui/repositories/stable-diffusion-stability-ai reset --hard cf1d67a6fd5ea1aa600c4df58e5b47da45f6bdbf 33 | RUN git clone https://github.com/Stability-AI/generative-models.git /stable-diffusion-webui/repositories/generative-models 34 | RUN git -C /stable-diffusion-webui/repositories/generative-models reset --hard 45c443b316737a4ab6e40413d7794a7f5657c19f 35 | RUN git clone https://github.com/CompVis/taming-transformers.git /stable-diffusion-webui/repositories/taming-transformers 36 | RUN git -C /stable-diffusion-webui/repositories/taming-transformers reset --hard 24268930bf1dce879235a7fddd0b2355b84d7ea6 37 | RUN git clone https://github.com/crowsonkb/k-diffusion.git /stable-diffusion-webui/repositories/k-diffusion 38 | RUN git -C /stable-diffusion-webui/repositories/k-diffusion reset --hard ab527a9a6d347f364e3d185ba6d714e22d80cb3c 39 | RUN git clone https://github.com/sczhou/CodeFormer.git /stable-diffusion-webui/repositories/CodeFormer 40 | RUN git -C /stable-diffusion-webui/repositories/CodeFormer reset --hard c5b4593074ba6214284d6acd5f1719b6c5d739af 41 | RUN git clone https://github.com/salesforce/BLIP.git /stable-diffusion-webui/repositories/BLIP 42 | RUN git -C /stable-diffusion-webui/repositories/BLIP reset --hard 48211a1594f1321b00f14c9f7a5b4813144b2fb9 43 | RUN python3 -m pip install -r /stable-diffusion-webui/repositories/CodeFormer/requirements.txt --prefer-binary 44 | RUN python3 -m pip install -r /stable-diffusion-webui/requirements_versions.txt --prefer-binary 45 | RUN python3 -m pip install sqlalchemy --prefer-binary 46 | 47 | RUN set -ex && cd stable-diffusion-webui \ 48 | && git clone https://github.com/kohya-ss/sd-webui-additional-networks.git extensions/sd-webui-additional-networks \ 49 | && git clone https://github.com/hnmr293/sd-webui-cutoff.git extensions/sd-webui-cutoff \ 50 | && git clone https://github.com/DominikDoom/a1111-sd-webui-tagcomplete.git extensions/a1111-sd-webui-tagcomplete \ 51 | && git clone https://github.com/Mikubill/sd-webui-controlnet.git extensions/sd-webui-controlnet \ 52 | && git clone https://github.com/zanllp/sd-webui-infinite-image-browsing.git extensions/sd-webui-infinite-image-browsing \ 53 | && git -C /stable-diffusion-webui/extensions/sd-webui-infinite-image-browsing reset --hard 4720b15126c3ee05e97d50108050d1178e50639b \ 54 | && git clone https://github.com/aria1th/Hypernetwork-MonkeyPatch-Extension.git extensions/Hypernetwork-MonkeyPatch-Extension \ 55 | && git clone https://github.com/butaixianran/Stable-Diffusion-Webui-Civitai-Helper.git extensions/Stable-Diffusion-Webui-Civitai-Helper \ 56 | && git clone https://github.com/ilian6806/stable-diffusion-webui-state.git extensions/stable-diffusion-webui-state \ 57 | && git clone https://github.com/vladmandic/sd-extension-system-info extensions/sd-extension-system-info \ 58 | && git clone https://github.com/ArtVentureX/sd-webui-agent-scheduler.git extensions/agent-scheduler 59 | # && git clone https://github.com/continue-revolution/sd-webui-segment-anything extensions/sd-webui-segment-anything 60 | 61 | RUN python3 -m pip install -r /stable-diffusion-webui/extensions/sd-webui-controlnet/requirements.txt --prefer-binary 62 | # RUN python3 -m pip install -r /stable-diffusion-webui/extensions/sd-webui-segment-anything/requirements.txt --prefer-binary 63 | 64 | ENV SAFETENSORS_FAST_GPU=1 65 | ENV LD_PRELOAD="/usr/lib/x86_64-linux-gnu/libtcmalloc_minimal.so.4" 66 | 67 | EXPOSE 7860 68 | 69 | COPY extensions/ /stable-diffusion-webui/extensions/ 70 | COPY user-watch.py /stable-diffusion-webui/user-watch.py 71 | COPY start.sh /stable-diffusion-webui/start.sh 72 | COPY *.json /stable-diffusion-webui/ 73 | 74 | WORKDIR /stable-diffusion-webui/ 75 | CMD ["python3", "webui.py", "--listen", "--opt-sdp-attention", "--enable-insecure-extension-access", "--api"] 76 | -------------------------------------------------------------------------------- /terraform-provision-infra/README_zh.md: -------------------------------------------------------------------------------- 1 | [英文部署文档](./README.md) 2 | # Terraform部署指南 3 | 4 | 我们提供两个版本的部署指南 Agones版本与GKE版本 5 | 6 | ### 准备工作 7 | 确保你已经安装 [google-cloud-sdk](https://cloud.google.com/sdk/docs/install#linux) and [kubectl](https://cloud.google.com/sdk/docs/components) and gke-gcloud-auth-plugin 8 | 9 | 确保你已经完成google-cloud-sdk设置 10 | 11 | 安装和设置的示例命令如下: 12 | ```bash 13 | #安装google-cloud-sdk 14 | curl -O https://dl.google.com/dl/cloudsdk/channels/rapid/downloads/google-cloud-cli-429.0.0-linux-arm.tar.gz 15 | tar -xf google-cloud-cli-429.0.0-linux-arm.tar.gz 16 | ./google-cloud-sdk/install.sh 17 | #安装 kubectl 和 gke-gcloud-auth-plugin 插件 18 | gcloud components install kubectl 19 | gcloud components install gke-gcloud-auth-plugin 20 | #设置 gcloud 和 gcloud application-default 认证 21 | gcloud auth application-default login 22 | gcloud auth login 23 | gcloud config set project PROJECT_ID 24 | 25 | ``` 26 | 27 | ## Agones 版本 28 | ### 01 设置权限 29 | 30 | 确保你使用的账号有以下权限: 31 | 32 | - ROLE: roles/artifactregistry.admin 33 | 34 | - ROLE: roles/compute.admin 35 | 36 | - ROLE: roles/compute.instanceAdmin.v1 37 | 38 | - ROLE: roles/compute.networkAdmin 39 | 40 | - ROLE: roles/container.admin 41 | 42 | - ROLE: roles/file.editor 43 | 44 | 为了避免权限问题导致创建资源失败,建议使用 **roles/editor** 或者 **roles/owner** 角色部署资源 45 | 46 | ### 02 参考[链接](https://cloud.google.com/iap/docs/enabling-kubernetes-howto#oauth-configure)完成IAP设置并创建OAuth Client 并在你的域名解析服务中创建一条A记录指向负载均衡的公网IP(可以在Terraform输出或者控制台中找到IP地址) 47 | Main step as follow 48 | 1. 完成OAuth consent screen设置 49 | 2. 创建 OAuth 凭据(**注意** *记录一下 client id 与 secret**,后续在local变量中会使用) 50 | 3. 更新 OAuth 重定向URL 51 | 52 | 53 | ### 03 替换相应的本地变量 [值为大写的内容必须替换] ,取消[Agone verion]代码块的注释,同时注释掉[GKE version]代码块 54 | 55 | 编辑main.tf文件替换变量为你的项目相关内容 56 | - 如果你选择区域集群,替换cluster_location变量为区域代码 57 | - 如果你单可用区集群,替换cluster_location变量为可用区代码 58 | 59 | 下面的示例为使用T4 GPU的单可用区集群,集群位于us-central1-f可用区 60 | 61 | ```bash 62 | locals { 63 | project_id = "PROJECT_ID" 64 | oauth_client_id = "OAUTH_CLIENT_ID" 65 | oauth_client_secret = "OAUTH_CLIENT_SECRET" 66 | sd_webui_domain = "YOUR_OWNED_CUSTOM_DOMAIN_OR_SUBDOMAIN" 67 | region = "us-central1" 68 | filestore_zone = "us-central1-f" # Filestore location must be same region or zone with gke 69 | cluster_location = "us-central1-f" # GKE Cluster location 70 | node_machine_type = "custom-12-49152-ext" 71 | accelerator_type = "nvidia-tesla-t4" # Available accelerator_type from gcloud compute accelerator-types list --format='csv(zone,name)' 72 | gke_num_nodes = 1 73 | } 74 | 75 | ``` 76 | ### 04 创建所有子模块资源 (包括 agones_gcp_res,agones_build_image,helm_agones,agones_k8s_res) 77 | 78 | ```bash 79 | # 切换至代码目录 80 | cd gcp-stable-diffusion-build-deploy/terraform-provision-infra/ 81 | 82 | # 初始化 83 | terraform init 84 | 85 | # 部署资源 86 | terraform apply --auto-approve -target="module.agones_gcp_res";terraform apply --auto-approve -target="module.agones_build_image";terraform apply --auto-approve -target="module.helm_agones";terraform apply --auto-approve -target="module.agones_k8s_res" 87 | 88 | 89 | # 销毁资源 90 | terraform destroy --auto-approve -target="module.agones_k8s_res";terraform destroy --auto-approve -target="module.helm_agones";terraform destroy --auto-approve -target="module.agones_gcp_res" 91 | ``` 92 | 93 | ### 05 设置域名解析和授权用户访问负载均衡 94 | * 在你的域名解析服务中创建一条 A 记录指向 webui_address 的公网IP (sdwebui.example.com - > xxx.xxx.xxx.xxx) 95 | * 在IAP中授权用户 IAP-secured Web App User 权限,以便用户通过子域名访问webui服务 96 | * 等待负载均衡的证书生效后,通过子域名访问你的webui界面(例如https://sdwebui.example.com) 97 | 98 | ## GKE版本 99 | 100 | ### 01 设置权限 101 | 102 | 确保你使用的账号有以下权限: 103 | 104 | - ROLE: roles/artifactregistry.admin 105 | 106 | - ROLE: roles/compute.admin 107 | 108 | - ROLE: roles/compute.instanceAdmin.v1 109 | 110 | - ROLE: roles/compute.networkAdmin 111 | 112 | - ROLE: roles/container.admin 113 | 114 | - ROLE: roles/file.editor 115 | 116 | 为了避免权限问题导致创建资源失败,建议使用 **roles/editor** 或者 **roles/owner** 角色部署资源 117 | 118 | ### 02 替换相应的本地变量 [变量oauth_client_id,oauth_client_secret,sd_webui_domain在GKE版本中未使用,可以不用修改或替换] ,注释掉[Agone verion]代码块,同时取消掉[GKE version]代码块的注释 119 | 120 | 编辑main.tf文件替换变量为你的项目相关内容 121 | - 如果你选择区域集群,替换cluster_location变量为区域代码 122 | - 如果你单可用区集群,替换cluster_location变量为可用区代码 123 | 124 | 下面的示例为使用T4 GPU的单可用区集群,集群位于us-central1-f可用区 125 | 126 | ```bash 127 | locals { 128 | project_id = "PROJECT_ID" 129 | oauth_client_id = "OAUTH_CLIENT_ID" 130 | oauth_client_secret = "OAUTH_CLIENT_SECRET" 131 | sd_webui_domain = "YOUR_OWNED_CUSTOM_DOMAIN_OR_SUBDOMAIN" 132 | region = "us-central1" 133 | filestore_zone = "us-central1-f" # Filestore location must be same region or zone with gke 134 | cluster_location = "us-central1-f" # GKE Cluster location 135 | node_machine_type = "custom-12-49152-ext" 136 | accelerator_type = "nvidia-tesla-t4" # Available accelerator_type from gcloud compute accelerator-types list --format='csv(zone,name)' 137 | gke_num_nodes = 1 138 | } 139 | 140 | ``` 141 | ### 03 创建所有子模块(包括 nonagones_gcp_res,nonagones_build_image,nonagones_k8s_res) 142 | 143 | ```bash 144 | # 切换至代码目录 145 | cd gcp-stable-diffusion-build-deploy/terraform-provision-infra/ 146 | 147 | # 初始化 148 | terraform init 149 | 150 | # 创建资源 151 | terraform apply --auto-approve -target="module.nonagones_gcp_res";terraform apply --auto-approve -target="module.nonagones_build_image";terraform apply --auto-approve -target="module.nonagones_k8s_res" 152 | 153 | 154 | # 销毁资源 155 | terraform destroy --auto-approve -target="module.nonagones_k8s_res"; terraform destroy --auto-approve -target="module.nonagones_gcp_res" 156 | ``` 157 | ## 代码贡献 158 | 159 | 欢迎提交Pull Request,遇到问题是可以通过提Issue来讨论 160 | 161 | 注意提交Pull Request前确保代码运行正确 162 | 163 | ## License 164 | 165 | [MIT](https://choosealicense.com/licenses/mit/) 166 | -------------------------------------------------------------------------------- /README_cn.md: -------------------------------------------------------------------------------- 1 | # Stable Diffusion WebUI on Google Cloud 快速入门指南 2 | 3 | 本指南为您提供了在 Google Cloud 项目中部署 Stable Diffusion WebUI 解决方案的步骤。 4 | 5 | ## 语言 6 | [English](./README.md) 7 | 8 | | 文件夹 | 说明 | 9 | |------------------------------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| 10 | | [Stable-Diffusion-UI-Agones](./Stable-Diffusion-UI-Agones/README_cn.md) | 使用 Agones 托管 Stable Diffusion WebUI 的所有 YAML 文件和 Dockerfile 的演示。 | 11 | | [Stable-Diffusion-UI-GKE](./Stable-Diffusion-UI-GKE/README.md) | 使用 GKE 托管 Stable Diffusion WebUI 的所有 YAML 文件和 Dockerfile 的演示。 | 12 | | [Stable-Diffusion-Vertex](./Stable-Diffusion-Vertex/README_cn.md) | DreamBooth & Lora 在 Vertex AI 上训练的参考代码 | 13 | | [terraform-provision-infra](./terraform-provision-infra/README_zh.md) | 用于创建演示环境的 Terraform 脚本和资源。 | 14 | | [examples](./examples) | 工作目录的示例文件夹 | 15 | 16 | 17 | ## 介绍 18 | 该项目演示了如何有效地托管流行的 AUTOMATIC1111 Web 界面 [Stable-Diffusion-WebUI](https://github.com/AUTOMATIC1111/stable-diffusion-webui)。 19 | 这是为了演示目的,在投入生产之前,您可能需要根据您的需要进行最少的修改。 但是,它也可以直接用作内部项目。 20 | 21 | 22 | 项目和产品包括: 23 | * [GKE](https://cloud.google.com/kubernetes-engine) 用于托管 Stable Diffusion 并将 GPU 硬件附加到 Kubernetes 集群中的节点。 24 | * [Filestore](https://cloud.google.com/filestore) 用于保存模型和输出文件。 25 | * [Vertex AI](https://cloud.google.com/vertex-ai) 用于训练和微调模型。 26 | * [Cloud Build](https://cloud.google.com/build) 用于构建容器镜像和持续集成。 27 | * [GKE](https://cloud.google.com/kubernetes-engine) 运行 [Agones](https://agones.dev/) 的标准集群,用于隔离不同用户的运行时和实现弹性伸缩。 28 | * [Stable-Diffusion](https://huggingface.co/runwayml/stable-diffusion-v1-5) 用于从文本生成图像。 29 | * [Webui](https://github.com/AUTOMATIC1111/stable-diffusion-webui):Stable-Diffusion的浏览器界面。 30 | 31 | ## 架构 32 | ![Agones](Stable-Diffusion-UI-Agones/images/sd-webui-agones.png) 33 | * 适用于绝大多数使用场景,每个用户有专用pod+gpu,与在您自己的工作站上运行的体验(几乎)相同。 34 | * 架构: GKE + Agones + Spot(可选) + GPU(可选分时) + Vertex AI 补充 Dreambooth/Lora 训练 35 | * 使用[Cloud identity-aware proxy](https://cloud.google.com/iap) 登录和验证谷歌账户 36 | * 演示中使用 nginx+lua 实现作为前端 UI 与 Agones 交互 37 | * 使用 Agones 代替 HPA 进行资源分配和释放 38 | * 在 WebUI 上运行推理、训练和所有其他功能和插件 39 | * 使用 Vertex AI 来辅助 Dreambooth/Lora 的训练 40 | * 没有针对 AUTOMATIC1111 webui 的侵入式更改,易于升级或使用 Dockerfile 安装插件(extensions) 41 | 42 | ![GKE](Stable-Diffusion-UI-GKE/images/sd-webui-gke.png) 43 | * 推荐作为 SaaS 平台,由于使用sd-webui,因此适合内部使用 44 | * 架构 GKE + GPU(可选分时) + Spot(可选) + HPA + Vertex AI 补充 Dreambooth/Lora训练 45 | * 多用户不冲突,一个机型一个部署,使用不同挂载点区分机型 46 | * 使用具有 GPU 指标的 HPA 进行扩展 47 | * 适合做WebUI上的推理,但不适合训练,因训练时会独占整个GPU设备 48 | * 使用 Vertex AI 来做 Dreambooth/Lora 的训练 49 | * 没有针对 AUTOMATIC1111 webui 的侵入式更改,易于升级或使用 Dockerfile 安装插件(extensions) 50 | 51 | ![As an external Saas platform](Stable-Diffusion-UI-GKE/images/sd-webui-external-gke.png) 52 | * 可用作对外 Saas 服务的架构 53 | * 您需要构建自己的前端和后端(推荐),前后端之间通过队列服务做解耦 54 | * 构建自己的后端推理流水线可以让后端的功能改造更灵活,以及更多的性能优化空间(如使用TensorRT) 55 | * sd-webui 现在也支持[API 模式](https://github.com/AUTOMATIC1111/stable-diffusion-webui/wiki/API). 56 | 57 | ## 常问问题 58 | ### 它是否支持多用户/会话? 59 | 60 | 对于 [Stable-Diffusion-UI-Agones](./Stable-Diffusion-UI-Agones/README.md),它本质上支持多用户/会话,因为它为每个登录用户分配了一个专用的 pod。 61 | 对于 [Stable-Diffusion-UI-GKE](./Stable-Diffusion-UI-GKE/README.md),由于AUTOMATIC1111的Stable Diffusion WebUI暂时不支持多用户/会话,可以参考https://github.com/AUTOMATIC1111/stable-diffusion-webui/issues/7970。为了支持多用户,我们为每个模型创建一个部署(deployment)。 62 | 63 | ### 关于 NFS 上的文件结构? 64 | 对于 [Stable-Diffusion-UI-Agones](./Stable-Diffusion-UI-Agones/README.md),在演示中我们使用 [init script](./Stable-Diffusion-UI-Agones/sd-webui/user-watch.py) 为每个用户初始化文件夹。 65 | 您可以自定义初始化脚本以满足您的需要,并且有一个[参考](./examples/sd-webui/user-watch.py)。 66 | 67 | 对于[Stable-Diffusion-UI-GKE](./Stable-Diffusion-UI-GKE/README.md),我们不为每个模型构建一个容器镜像,而是使用一个容器镜像和Filestore的共享存储,并适当地编排我们的文件和文件夹以便每个服务都挂载到特定的文件夹目录。 68 | 请参考 deployment_*.yaml 参考。 69 | 70 | 您的 Filestore 文件共享中的文件夹结构可能如下所示,您可能需要根据需要进行调整: 71 | ``` 72 | /models/Stable-diffusion # <--- 这是 Stable Diffusion WebUI 寻找模型的地方 73 | |-- nai 74 | | |-- nai.ckpt 75 | | |-- nai.vae.pt 76 | | `-- nai.yaml 77 | |-- sd15 78 | | `-- v1-5-pruned-emaonly.safetensors 79 | 80 | /inputs/ # <--- 用于训练图像,仅在从 UI 运行训练作业时使用它 (sd_dreammbooth_extension) 81 | |-- alvan-nee-clipped 82 | | |-- alvan-nee-9M0tSjb-cpA-unsplash_cropped.jpeg 83 | | |-- alvan-nee-Id1DBHv4fbg-unsplash_cropped.jpeg 84 | | |-- alvan-nee-bQaAJCbNq3g-unsplash_cropped.jpeg 85 | | |-- alvan-nee-brFsZ7qszSY-unsplash_cropped.jpeg 86 | | `-- alvan-nee-eoqnr8ikwFE-unsplash_cropped.jpeg 87 | 88 | /outputs/ # <--- 用于生成的图像 89 | |-- img2img-grid 90 | | `-- 2023-03-14 91 | | |-- grid-0000.png 92 | | `-- grid-0001.png 93 | |-- img2img-images 94 | | `-- 2023-03-14 95 | | |-- 00000-425382929.png 96 | | |-- 00001-631481262.png 97 | | |-- 00002-1301840995.png 98 | ``` 99 | ### 如何上传文件? 100 | 我们做了一个示范[脚本](./Stable-Diffusion-UI-Agones/sd-webui/extensions/stable-diffusion-webui-udload/scripts/udload.py) 以插件的形式实现文件上传。 101 | 除此之外,浏览和下载图片(https://github.com/zanllp/sd-webui-infinite-image-browsing),下载模型(https://github.com/butaixianran/Stable-Diffusion-Webui-Civitai-Helper)等都可以借助插件的方式实现。 102 | 103 | ### 下班后如何释放资源? 104 | HPA & Agones 只允许至少一个副本,为此你必须手动缩放到 0 或删除资源。 105 | 例如 对于 GKE, 106 | ``` 107 | kubectl scale --replicas=1 stable-diffusion-deployment 108 | ``` 109 | 对于Agones, 110 | ``` 111 | kubectl delete fleet sd-agones-fleet 112 | ``` 113 | ### 如何保持sd-webui settings里的设置? 114 | 有两种方式 115 | 1. 设置config.json/ui-config.json的golden copy并打包到容器中 116 | 需要设置的项往往集中在其中几项目(e.g. UI中开启VAE选择、设置CLIP Skip、设置multi-controlnet等),且不需要频繁修改。 117 | 该方式实施简单,因此作为推荐选项 118 | 119 | 2. 使用另外一种部署方式(跳转到该[分支](https://github.com/nonokangwei/Stable-Diffusion-on-GCP/tree/Stable-Diffusion-on-GCP-X)) 120 | 该分支可以做到为各个pod独立初始化各自的环境,包括持久化各自的config.json/ui-config.json,但不支持设置buffer size,资源都需要按需初始化,以及额外的部署步骤。 121 | -------------------------------------------------------------------------------- /Stable-Diffusion-UI-GKE/docker/config.json: -------------------------------------------------------------------------------- 1 | { 2 | "samples_save": true, 3 | "samples_format": "png", 4 | "samples_filename_pattern": "", 5 | "save_images_add_number": true, 6 | "grid_save": true, 7 | "grid_format": "png", 8 | "grid_extended_filename": false, 9 | "grid_only_if_multiple": true, 10 | "grid_prevent_empty_spots": false, 11 | "n_rows": -1, 12 | "enable_pnginfo": true, 13 | "save_txt": false, 14 | "save_images_before_face_restoration": false, 15 | "save_images_before_highres_fix": false, 16 | "save_images_before_color_correction": false, 17 | "save_mask": false, 18 | "save_mask_composite": false, 19 | "jpeg_quality": 80, 20 | "webp_lossless": false, 21 | "export_for_4chan": true, 22 | "img_downscale_threshold": 4.0, 23 | "target_side_length": 4000, 24 | "img_max_size_mp": 200, 25 | "use_original_name_batch": true, 26 | "use_upscaler_name_as_suffix": false, 27 | "save_selected_only": true, 28 | "save_init_img": false, 29 | "temp_dir": "", 30 | "clean_temp_dir_at_start": false, 31 | "outdir_samples": "", 32 | "outdir_txt2img_samples": "outputs/txt2img-images", 33 | "outdir_img2img_samples": "outputs/img2img-images", 34 | "outdir_extras_samples": "outputs/extras-images", 35 | "outdir_grids": "", 36 | "outdir_txt2img_grids": "outputs/txt2img-grids", 37 | "outdir_img2img_grids": "outputs/img2img-grids", 38 | "outdir_save": "log/images", 39 | "outdir_init_images": "outputs/init-images", 40 | "save_to_dirs": true, 41 | "grid_save_to_dirs": true, 42 | "use_save_to_dirs_for_ui": false, 43 | "directories_filename_pattern": "[date]", 44 | "directories_max_prompt_words": 8, 45 | "ESRGAN_tile": 192, 46 | "ESRGAN_tile_overlap": 8, 47 | "realesrgan_enabled_models": [ 48 | "R-ESRGAN 4x+", 49 | "R-ESRGAN 4x+ Anime6B" 50 | ], 51 | "upscaler_for_img2img": null, 52 | "SCUNET_tile": 256, 53 | "SCUNET_tile_overlap": 8, 54 | "face_restoration_model": "CodeFormer", 55 | "code_former_weight": 0.5, 56 | "face_restoration_unload": false, 57 | "show_warnings": false, 58 | "memmon_poll_rate": 8, 59 | "samples_log_stdout": false, 60 | "multiple_tqdm": true, 61 | "print_hypernet_extra": false, 62 | "unload_models_when_training": false, 63 | "pin_memory": false, 64 | "save_optimizer_state": false, 65 | "save_training_settings_to_txt": true, 66 | "dataset_filename_word_regex": "", 67 | "dataset_filename_join_string": " ", 68 | "training_image_repeats_per_epoch": 1, 69 | "training_write_csv_every": 500, 70 | "training_xattention_optimizations": false, 71 | "training_enable_tensorboard": false, 72 | "training_tensorboard_save_images": false, 73 | "training_tensorboard_flush_every": 120, 74 | "sd_model_checkpoint": null, 75 | "sd_checkpoint_cache": 0, 76 | "sd_vae_checkpoint_cache": 0, 77 | "sd_vae": "Automatic", 78 | "sd_vae_as_default": true, 79 | "inpainting_mask_weight": 1.0, 80 | "initial_noise_multiplier": 1.0, 81 | "img2img_color_correction": false, 82 | "img2img_fix_steps": false, 83 | "img2img_background_color": "#ffffff", 84 | "enable_quantization": false, 85 | "enable_emphasis": true, 86 | "enable_batch_seeds": true, 87 | "comma_padding_backtrack": 20, 88 | "CLIP_stop_at_last_layers": 1, 89 | "upcast_attn": false, 90 | "randn_source": "GPU", 91 | "use_old_emphasis_implementation": false, 92 | "use_old_karras_scheduler_sigmas": false, 93 | "no_dpmpp_sde_batch_determinism": false, 94 | "use_old_hires_fix_width_height": false, 95 | "dont_fix_second_order_samplers_schedule": false, 96 | "interrogate_keep_models_in_memory": false, 97 | "interrogate_return_ranks": false, 98 | "interrogate_clip_num_beams": 1, 99 | "interrogate_clip_min_length": 24, 100 | "interrogate_clip_max_length": 48, 101 | "interrogate_clip_dict_limit": 1500, 102 | "interrogate_clip_skip_categories": [], 103 | "interrogate_deepbooru_score_threshold": 0.5, 104 | "deepbooru_sort_alpha": true, 105 | "deepbooru_use_spaces": false, 106 | "deepbooru_escape": true, 107 | "deepbooru_filter_tags": "", 108 | "extra_networks_default_view": "cards", 109 | "extra_networks_default_multiplier": 1.0, 110 | "extra_networks_card_width": 0, 111 | "extra_networks_card_height": 0, 112 | "extra_networks_add_text_separator": " ", 113 | "sd_hypernetwork": "None", 114 | "return_grid": true, 115 | "return_mask": false, 116 | "return_mask_composite": false, 117 | "do_not_show_images": false, 118 | "add_model_hash_to_info": true, 119 | "add_model_name_to_info": true, 120 | "disable_weights_auto_swap": true, 121 | "send_seed": true, 122 | "send_size": true, 123 | "font": "", 124 | "js_modal_lightbox": true, 125 | "js_modal_lightbox_initially_zoomed": true, 126 | "show_progress_in_title": true, 127 | "samplers_in_dropdown": true, 128 | "dimensions_and_batch_together": true, 129 | "keyedit_precision_attention": 0.1, 130 | "keyedit_precision_extra": 0.05, 131 | "keyedit_delimiters": ".,\\/!?%^*;:{}=`~()", 132 | "quicksettings": "sd_model_checkpoint", 133 | "quicksettings_list": [ 134 | "sd_model_checkpoint", 135 | "sd_vae", 136 | "CLIP_stop_at_last_layers" 137 | ], 138 | "hidden_tabs": [], 139 | "ui_reorder": "inpaint, sampler, checkboxes, hires_fix, dimensions, cfg, seed, batch, override_settings, scripts", 140 | "ui_extra_networks_tab_reorder": "", 141 | "localization": "None", 142 | "gradio_theme": "Default", 143 | "show_progressbar": true, 144 | "live_previews_enable": true, 145 | "show_progress_grid": true, 146 | "show_progress_every_n_steps": 10, 147 | "show_progress_type": "Approx NN", 148 | "live_preview_content": "Prompt", 149 | "live_preview_refresh_period": 1000, 150 | "hide_samplers": [], 151 | "eta_ddim": 0.0, 152 | "eta_ancestral": 1.0, 153 | "ddim_discretize": "uniform", 154 | "s_churn": 0.0, 155 | "s_min_uncond": 0, 156 | "s_tmin": 0.0, 157 | "s_noise": 1.0, 158 | "eta_noise_seed_delta": 0, 159 | "always_discard_next_to_last_sigma": false, 160 | "uni_pc_variant": "bh1", 161 | "uni_pc_skip_type": "time_uniform", 162 | "uni_pc_order": 3, 163 | "uni_pc_lower_order_final": true, 164 | "postprocessing_enable_in_main_ui": [], 165 | "postprocessing_operation_order": [], 166 | "upscaling_max_images_in_cache": 5, 167 | "disabled_extensions": [], 168 | "disable_all_extensions": "none", 169 | "restore_config_state_file": "", 170 | "sd_checkpoint_hash": "", 171 | "control_net_max_models_num": 3 172 | } -------------------------------------------------------------------------------- /Stable-Diffusion-UI-Agones/sd-webui/config.json: -------------------------------------------------------------------------------- 1 | { 2 | "samples_save": true, 3 | "samples_format": "png", 4 | "samples_filename_pattern": "", 5 | "save_images_add_number": true, 6 | "grid_save": true, 7 | "grid_format": "png", 8 | "grid_extended_filename": false, 9 | "grid_only_if_multiple": true, 10 | "grid_prevent_empty_spots": false, 11 | "n_rows": -1, 12 | "enable_pnginfo": true, 13 | "save_txt": false, 14 | "save_images_before_face_restoration": false, 15 | "save_images_before_highres_fix": false, 16 | "save_images_before_color_correction": false, 17 | "save_mask": false, 18 | "save_mask_composite": false, 19 | "jpeg_quality": 80, 20 | "webp_lossless": false, 21 | "export_for_4chan": true, 22 | "img_downscale_threshold": 4.0, 23 | "target_side_length": 4000, 24 | "img_max_size_mp": 200, 25 | "use_original_name_batch": true, 26 | "use_upscaler_name_as_suffix": false, 27 | "save_selected_only": true, 28 | "save_init_img": false, 29 | "temp_dir": "", 30 | "clean_temp_dir_at_start": false, 31 | "outdir_samples": "", 32 | "outdir_txt2img_samples": "outputs/txt2img-images", 33 | "outdir_img2img_samples": "outputs/img2img-images", 34 | "outdir_extras_samples": "outputs/extras-images", 35 | "outdir_grids": "", 36 | "outdir_txt2img_grids": "outputs/txt2img-grids", 37 | "outdir_img2img_grids": "outputs/img2img-grids", 38 | "outdir_save": "log/images", 39 | "outdir_init_images": "outputs/init-images", 40 | "save_to_dirs": true, 41 | "grid_save_to_dirs": true, 42 | "use_save_to_dirs_for_ui": false, 43 | "directories_filename_pattern": "[date]", 44 | "directories_max_prompt_words": 8, 45 | "ESRGAN_tile": 192, 46 | "ESRGAN_tile_overlap": 8, 47 | "realesrgan_enabled_models": [ 48 | "R-ESRGAN 4x+", 49 | "R-ESRGAN 4x+ Anime6B" 50 | ], 51 | "upscaler_for_img2img": null, 52 | "SCUNET_tile": 256, 53 | "SCUNET_tile_overlap": 8, 54 | "face_restoration_model": "CodeFormer", 55 | "code_former_weight": 0.5, 56 | "face_restoration_unload": false, 57 | "show_warnings": false, 58 | "memmon_poll_rate": 8, 59 | "samples_log_stdout": false, 60 | "multiple_tqdm": true, 61 | "print_hypernet_extra": false, 62 | "unload_models_when_training": false, 63 | "pin_memory": false, 64 | "save_optimizer_state": false, 65 | "save_training_settings_to_txt": true, 66 | "dataset_filename_word_regex": "", 67 | "dataset_filename_join_string": " ", 68 | "training_image_repeats_per_epoch": 1, 69 | "training_write_csv_every": 500, 70 | "training_xattention_optimizations": false, 71 | "training_enable_tensorboard": false, 72 | "training_tensorboard_save_images": false, 73 | "training_tensorboard_flush_every": 120, 74 | "sd_model_checkpoint": null, 75 | "sd_checkpoint_cache": 0, 76 | "sd_vae_checkpoint_cache": 0, 77 | "sd_vae": "Automatic", 78 | "sd_vae_as_default": true, 79 | "inpainting_mask_weight": 1.0, 80 | "initial_noise_multiplier": 1.0, 81 | "img2img_color_correction": false, 82 | "img2img_fix_steps": false, 83 | "img2img_background_color": "#ffffff", 84 | "enable_quantization": false, 85 | "enable_emphasis": true, 86 | "enable_batch_seeds": true, 87 | "comma_padding_backtrack": 20, 88 | "CLIP_stop_at_last_layers": 1, 89 | "upcast_attn": false, 90 | "randn_source": "GPU", 91 | "use_old_emphasis_implementation": false, 92 | "use_old_karras_scheduler_sigmas": false, 93 | "no_dpmpp_sde_batch_determinism": false, 94 | "use_old_hires_fix_width_height": false, 95 | "dont_fix_second_order_samplers_schedule": false, 96 | "interrogate_keep_models_in_memory": false, 97 | "interrogate_return_ranks": false, 98 | "interrogate_clip_num_beams": 1, 99 | "interrogate_clip_min_length": 24, 100 | "interrogate_clip_max_length": 48, 101 | "interrogate_clip_dict_limit": 1500, 102 | "interrogate_clip_skip_categories": [], 103 | "interrogate_deepbooru_score_threshold": 0.5, 104 | "deepbooru_sort_alpha": true, 105 | "deepbooru_use_spaces": false, 106 | "deepbooru_escape": true, 107 | "deepbooru_filter_tags": "", 108 | "extra_networks_default_view": "cards", 109 | "extra_networks_default_multiplier": 1.0, 110 | "extra_networks_card_width": 0, 111 | "extra_networks_card_height": 0, 112 | "extra_networks_add_text_separator": " ", 113 | "sd_hypernetwork": "None", 114 | "return_grid": true, 115 | "return_mask": false, 116 | "return_mask_composite": false, 117 | "do_not_show_images": false, 118 | "add_model_hash_to_info": true, 119 | "add_model_name_to_info": true, 120 | "disable_weights_auto_swap": true, 121 | "send_seed": true, 122 | "send_size": true, 123 | "font": "", 124 | "js_modal_lightbox": true, 125 | "js_modal_lightbox_initially_zoomed": true, 126 | "show_progress_in_title": true, 127 | "samplers_in_dropdown": true, 128 | "dimensions_and_batch_together": true, 129 | "keyedit_precision_attention": 0.1, 130 | "keyedit_precision_extra": 0.05, 131 | "keyedit_delimiters": ".,\\/!?%^*;:{}=`~()", 132 | "quicksettings": "sd_model_checkpoint", 133 | "quicksettings_list": [ 134 | "sd_model_checkpoint", 135 | "sd_vae", 136 | "CLIP_stop_at_last_layers" 137 | ], 138 | "hidden_tabs": [], 139 | "ui_reorder": "inpaint, sampler, checkboxes, hires_fix, dimensions, cfg, seed, batch, override_settings, scripts", 140 | "ui_extra_networks_tab_reorder": "", 141 | "localization": "None", 142 | "gradio_theme": "Default", 143 | "show_progressbar": true, 144 | "live_previews_enable": true, 145 | "show_progress_grid": true, 146 | "show_progress_every_n_steps": 10, 147 | "show_progress_type": "Approx NN", 148 | "live_preview_content": "Prompt", 149 | "live_preview_refresh_period": 1000, 150 | "hide_samplers": [], 151 | "eta_ddim": 0.0, 152 | "eta_ancestral": 1.0, 153 | "ddim_discretize": "uniform", 154 | "s_churn": 0.0, 155 | "s_min_uncond": 0, 156 | "s_tmin": 0.0, 157 | "s_noise": 1.0, 158 | "eta_noise_seed_delta": 0, 159 | "always_discard_next_to_last_sigma": false, 160 | "uni_pc_variant": "bh1", 161 | "uni_pc_skip_type": "time_uniform", 162 | "uni_pc_order": 3, 163 | "uni_pc_lower_order_final": true, 164 | "postprocessing_enable_in_main_ui": [], 165 | "postprocessing_operation_order": [], 166 | "upscaling_max_images_in_cache": 5, 167 | "disabled_extensions": [], 168 | "disable_all_extensions": "none", 169 | "restore_config_state_file": "", 170 | "sd_checkpoint_hash": "", 171 | "control_net_max_models_num": 3 172 | } -------------------------------------------------------------------------------- /terraform-provision-infra/README.md: -------------------------------------------------------------------------------- 1 | [Chinese Version Guide](./README_zh.md) 2 | # Infrastructure and kubernetes resource deploy guide 3 | 4 | We Offer two version deployment of Stable Diffusion Web UI on GKE 5 | 6 | ### Before you begin 7 | Make sure that you have install [google-cloud-sdk](https://cloud.google.com/sdk/docs/install#linux) and [kubectl](https://cloud.google.com/sdk/docs/components) and gke-gcloud-auth-plugin 8 | Make sure that you have finish google-cloud-sdk setup 9 | Example cmd as follow: 10 | ```bash 11 | #install google cloud sdk 12 | curl -O https://dl.google.com/dl/cloudsdk/channels/rapid/downloads/google-cloud-cli-429.0.0-linux-arm.tar.gz 13 | tar -xf google-cloud-cli-429.0.0-linux-arm.tar.gz 14 | ./google-cloud-sdk/install.sh 15 | #install kubectl and gke-gcloud-auth-plugin 16 | gcloud components install kubectl 17 | gcloud components install gke-gcloud-auth-plugin 18 | #login gcloud and gcloud application-default 19 | gcloud auth application-default login 20 | gcloud auth login 21 | gcloud config set project PROJECT_ID 22 | 23 | ``` 24 | 25 | ## Agones Version 26 | ### 01 Set up permissions 27 | 28 | Make sure that you have the necessary permissions on your user account: 29 | 30 | - ROLE: roles/artifactregistry.admin 31 | 32 | - ROLE: roles/compute.admin 33 | 34 | - ROLE: roles/compute.instanceAdmin.v1 35 | 36 | - ROLE: roles/compute.networkAdmin 37 | 38 | - ROLE: roles/container.admin 39 | 40 | - ROLE: roles/file.editor 41 | 42 | **roles/editor or roles/owner** is prefered 43 | 44 | ### 02 Manual Step includes Config IAP refer to [Link](https://cloud.google.com/iap/docs/enabling-kubernetes-howto#oauth-configure) and create a DNS A record point to reserved IP (from terraform output webui_ingress_address) 45 | Main step as follow 46 | 1. Configuring the OAuth consent screen 47 | 2. Creating OAuth credentials (**IMPORTANT** *please make note of client id and secret**) 48 | 3. Update OAuth client Authorized redirect URIs 49 | 4. Creating A recored point to webui_address in DNS provider (sdwebui.example.com - > xxx.xxx.xxx.xxx) 50 | 5. (After kubernetes resource has been created)Grant IAP-secured Web App User permission for user 51 | 52 | 53 | ### 03 Replace parameter [UPPER CASE PARAMETER MUST REPALCE] , keep the [Agone verion] code block Uncomment and #[GKE version] code block comment 54 | 55 | edit the main.tf replace the locals parameter with your project's. 56 | - If you choose regional cluster replace the location parameter with region code 57 | - If you choose zonal cluster replace the location parameter with zone code 58 | 59 | follow example of us-central1-f zonal cluster with Nvdia T4 Accelerator Node 60 | 61 | ```bash 62 | locals { 63 | project_id = "PROJECT_ID" 64 | oauth_client_id = "OAUTH_CLIENT_ID" 65 | oauth_client_secret = "OAUTH_CLIENT_SECRET" 66 | sd_webui_domain = "YOUR_OWNED_CUSTOM_DOMAIN_OR_SUBDOMAIN" 67 | region = "us-central1" 68 | filestore_zone = "us-central1-f" # Filestore location must be same region or zone with gke 69 | cluster_location = "us-central1-f" # GKE Cluster location 70 | node_machine_type = "custom-12-49152-ext" 71 | accelerator_type = "nvidia-tesla-t4" # Available accelerator_type from gcloud compute accelerator-types list --format='csv(zone,name)' 72 | gke_num_nodes = 1 73 | } 74 | 75 | ``` 76 | ### 04 Provision all submodule (including agones_gcp_res,agones_build_image,helm_agones,agones_k8s_res) 77 | 78 | ```bash 79 | # switch to work directory 80 | cd gcp-stable-diffusion-build-deploy/terraform-provision-infra/ 81 | 82 | # init terraform 83 | terraform init 84 | 85 | # deploy Infrastructure 86 | terraform apply --auto-approve -target="module.agones_gcp_res";terraform apply --auto-approve -target="module.agones_build_image";terraform apply --auto-approve -target="module.helm_agones";terraform apply --auto-approve -target="module.agones_k8s_res" 87 | 88 | 89 | # destroy Infrastructure 90 | terraform destroy --auto-approve -target="module.agones_k8s_res";terraform destroy --auto-approve -target="module.helm_agones";terraform destroy --auto-approve -target="module.agones_gcp_res" 91 | ``` 92 | 93 | ### 05 Grant Permission and access web ui 94 | * Back to Step 04.5 grant IAP-secured Web App User permission 95 | * Access webui via your domain or subdomain 96 | 97 | ## Non Agones Version 98 | s 99 | ### 01 Set up permissions 100 | 101 | Make sure that you have the necessary permissions on your user account: 102 | 103 | - ROLE: roles/artifactregistry.admin 104 | 105 | - ROLE: roles/compute.admin 106 | 107 | - ROLE: roles/compute.instanceAdmin.v1 108 | 109 | - ROLE: roles/compute.networkAdmin 110 | 111 | - ROLE: roles/container.admin 112 | 113 | - ROLE: roles/file.editor 114 | 115 | **roles/editor or roles/owner** is prefered 116 | 117 | ### 02 Replace parameter [UPPER CASE PARAMETER MUST REPALCE] , comment the [Agone verion] code block and Uncomment [GKE version] code block 118 | 119 | edit the main.tf replace the locals parameter with your project's. 120 | - If you choose regional cluster replace the location parameter with region code 121 | - If you choose zonal cluster replace the location parameter with zone code 122 | 123 | follow example of us-central1-f zonal cluster with Nvdia T4 Accelerator Node 124 | 125 | ```bash 126 | locals { 127 | project_id = "PROJECT_ID" 128 | region = "us-central1" 129 | filestore_zone = "us-central1-f" # Filestore location must be same region or zone with gke 130 | cluster_location = "us-central1-f" # GKE Cluster location 131 | node_machine_type = "custom-12-49152-ext" 132 | accelerator_type = "nvidia-tesla-t4" # Available accelerator_type from gcloud compute accelerator-types list --format='csv(zone,name)' 133 | gke_num_nodes = 1 134 | } 135 | 136 | ``` 137 | ### 03 Provision all submodule (including nonagones_gcp_res,nonagones_build_image,nonagones_k8s_res) 138 | 139 | ```bash 140 | # switch to work directory 141 | cd gcp-stable-diffusion-build-deploy/terraform-provision-infra/ 142 | 143 | # init terraform 144 | terraform init 145 | 146 | # Provision resource 147 | terraform apply --auto-approve -target="module.nonagones_gcp_res";terraform apply --auto-approve -target="module.nonagones_build_image";terraform apply --auto-approve -target="module.nonagones_k8s_res" 148 | 149 | 150 | # destroy resource 151 | terraform destroy --auto-approve -target="module.nonagones_k8s_res"; terraform destroy --auto-approve -target="module.nonagones_gcp_res" 152 | ``` 153 | ## Contributing 154 | 155 | Pull requests are welcome. For major changes, please open an issue first 156 | to discuss what you would like to change. 157 | 158 | Please make sure to update tests as appropriate. 159 | 160 | ## License 161 | 162 | [MIT](https://choosealicense.com/licenses/mit/) 163 | -------------------------------------------------------------------------------- /terraform-provision-infra/modules/nonagones/gcp-res/main.tf: -------------------------------------------------------------------------------- 1 | # Copyright 2023 Google LLC 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | terraform { 16 | required_providers { 17 | google = { 18 | source = "hashicorp/google" 19 | version = "4.63.1" 20 | } 21 | random = { 22 | source = "hashicorp/random" 23 | version = "3.5.1" 24 | } 25 | } 26 | } 27 | provider "google" { 28 | project = var.project_id 29 | region = var.region 30 | } 31 | resource "random_id" "tf_subfix" { 32 | byte_length = 4 33 | } 34 | # Enable related service 35 | resource "google_project_service" "gcp_services" { 36 | for_each = toset(var.gcp_service_list) 37 | project = var.project_id 38 | service = each.key 39 | disable_dependent_services = false 40 | disable_on_destroy = false 41 | } 42 | # VPC 43 | resource "google_compute_network" "vpc" { 44 | project = var.project_id 45 | name = "tf-gen-vpc-${random_id.tf_subfix.hex}" 46 | auto_create_subnetworks = "false" 47 | depends_on = [google_project_service.gcp_services] 48 | } 49 | 50 | # Subnet 51 | resource "google_compute_subnetwork" "subnet" { 52 | name = "tf-gen-subnet-${random_id.tf_subfix.hex}" 53 | region = var.region 54 | network = google_compute_network.vpc.name 55 | ip_cidr_range = "10.0.0.0/16" 56 | } 57 | 58 | # Cloud Router 59 | resource "google_compute_router" "router" { 60 | name = "tf-gen-router-${var.region}-${random_id.tf_subfix.hex}" 61 | region = google_compute_subnetwork.subnet.region 62 | network = google_compute_network.vpc.id 63 | } 64 | # NAT IP 65 | resource "google_compute_address" "address" { 66 | count = 2 67 | name = "tf-gen-nat-${random_id.tf_subfix.hex}-ip-${count.index}" 68 | region = google_compute_subnetwork.subnet.region 69 | depends_on = [google_project_service.gcp_services] 70 | } 71 | 72 | # NAT Gateway 73 | resource "google_compute_router_nat" "nat" { 74 | name = "tf-gen-${var.region}-nat-gw-${random_id.tf_subfix.hex}" 75 | router = google_compute_router.router.name 76 | region = google_compute_router.router.region 77 | nat_ip_allocate_option = "MANUAL_ONLY" 78 | nat_ips = google_compute_address.address.*.self_link 79 | source_subnetwork_ip_ranges_to_nat = "ALL_SUBNETWORKS_ALL_IP_RANGES" 80 | } 81 | 82 | # GKE cluster 83 | resource "google_container_cluster" "gke" { 84 | name = "tf-gen-cluster-${random_id.tf_subfix.hex}" 85 | location = var.filestore_zone 86 | remove_default_node_pool = false 87 | enable_shielded_nodes = true 88 | initial_node_count = 1 89 | network = google_compute_network.vpc.name 90 | subnetwork = google_compute_subnetwork.subnet.name 91 | private_cluster_config { 92 | enable_private_nodes = true 93 | master_ipv4_cidr_block = "192.168.1.0/28" 94 | } 95 | ip_allocation_policy { 96 | } 97 | monitoring_config { 98 | enable_components = ["SYSTEM_COMPONENTS", "APISERVER", "SCHEDULER", "CONTROLLER_MANAGER"] 99 | managed_prometheus { enabled = true } 100 | } 101 | logging_config { 102 | enable_components = ["SYSTEM_COMPONENTS", "WORKLOADS", "APISERVER", "SCHEDULER", "CONTROLLER_MANAGER"] 103 | } 104 | release_channel { 105 | channel = "STABLE" 106 | } 107 | maintenance_policy { 108 | daily_maintenance_window { 109 | start_time = "03:00" 110 | } 111 | } 112 | addons_config { 113 | http_load_balancing { 114 | disabled = false 115 | } 116 | horizontal_pod_autoscaling { 117 | disabled = false 118 | } 119 | gcp_filestore_csi_driver_config { 120 | enabled = true 121 | } 122 | gce_persistent_disk_csi_driver_config { 123 | enabled = true 124 | } 125 | dns_cache_config { 126 | enabled = true 127 | } 128 | } 129 | node_config { 130 | shielded_instance_config { 131 | enable_secure_boot = true 132 | enable_integrity_monitoring = true 133 | } 134 | } 135 | lifecycle { 136 | ignore_changes = all 137 | } 138 | } 139 | 140 | # Separately Managed Node Pool 141 | resource "google_container_node_pool" "separately_gpu_nodepool" { 142 | name = "${var.accelerator_type}-nodepool" 143 | location = var.cluster_location 144 | cluster = google_container_cluster.gke.name 145 | autoscaling { 146 | min_node_count = 1 147 | max_node_count = 10 148 | } 149 | node_count = var.gke_num_nodes 150 | node_config { 151 | oauth_scopes = [ 152 | "https://www.googleapis.com/auth/cloud-platform" 153 | ] 154 | 155 | labels = { 156 | Terraform = "true" 157 | Environment = "dev" 158 | } 159 | 160 | spot = true 161 | machine_type = var.node_machine_type 162 | image_type = "COS_CONTAINERD" 163 | gcfs_config { 164 | enabled = true 165 | } 166 | guest_accelerator { 167 | type = var.accelerator_type 168 | count = 1 169 | gpu_sharing_config { 170 | gpu_sharing_strategy = "TIME_SHARING" 171 | max_shared_clients_per_gpu = 2 172 | } 173 | } 174 | disk_type = "pd-balanced" 175 | disk_size_gb = 100 176 | 177 | tags = ["gpu-node", "gke-sd"] 178 | metadata = { 179 | disable-legacy-endpoints = "true" 180 | } 181 | shielded_instance_config { 182 | enable_secure_boot = true 183 | enable_integrity_monitoring = true 184 | } 185 | } 186 | lifecycle { 187 | ignore_changes = all 188 | } 189 | } 190 | # Filestore 191 | resource "google_filestore_instance" "instance" { 192 | name = "nfs-store-${random_id.tf_subfix.hex}" 193 | location = var.filestore_zone 194 | tier = "BASIC_HDD" 195 | file_shares { 196 | capacity_gb = 1024 197 | name = "vol1" 198 | } 199 | networks { 200 | network = google_compute_network.vpc.name 201 | modes = ["MODE_IPV4"] 202 | } 203 | } 204 | #Artifact Registry 205 | resource "google_artifact_registry_repository" "sd_repo" { 206 | location = var.region 207 | repository_id = "${random_id.tf_subfix.hex}-stable-diffusion-repository" 208 | description = "stable diffusion repository" 209 | format = "DOCKER" 210 | depends_on = [google_project_service.gcp_services] 211 | } -------------------------------------------------------------------------------- /Stable-Diffusion-UI-Agones/agones/values.yaml: -------------------------------------------------------------------------------- 1 | # Copyright 2018 Google LLC All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | # Declare variables to be passed into your templates. 16 | 17 | agones: 18 | featureGates: "" 19 | metrics: 20 | prometheusEnabled: true 21 | prometheusServiceDiscovery: true 22 | stackdriverEnabled: false 23 | stackdriverProjectID: "" 24 | stackdriverLabels: "" 25 | serviceMonitor: 26 | enabled: false 27 | interval: 30s 28 | rbacEnabled: true 29 | registerServiceAccounts: true 30 | registerWebhooks: true 31 | registerApiService: true 32 | crds: 33 | install: true 34 | cleanupOnDelete: true 35 | serviceaccount: 36 | allocator: 37 | name: agones-allocator 38 | annotations: {} 39 | controller: 40 | name: agones-controller 41 | annotations: {} 42 | sdk: 43 | name: agones-sdk 44 | annotations: {} 45 | createPriorityClass: true 46 | priorityClassName: agones-system 47 | cloudProduct: "auto" 48 | controller: &controllerValues 49 | resources: {} 50 | # requests: 51 | # cpu: 1 52 | # memory: 256Mi 53 | nodeSelector: 54 | cloud.google.com/gke-nodepool: default-pool 55 | annotations: {} 56 | tolerations: 57 | - key: "agones.dev/agones-system" 58 | operator: "Equal" 59 | value: "true" 60 | effect: "NoExecute" 61 | affinity: 62 | nodeAffinity: 63 | preferredDuringSchedulingIgnoredDuringExecution: 64 | - weight: 1 65 | preference: 66 | matchExpressions: 67 | - key: agones.dev/agones-system 68 | operator: Exists 69 | generateTLS: true 70 | tlsCert: "" 71 | tlsKey: "" 72 | disableSecret: false 73 | allocationApiService: 74 | annotations: {} 75 | disableCaBundle: false 76 | validatingWebhook: 77 | annotations: {} 78 | disableCaBundle: false 79 | mutatingWebhook: 80 | annotations: {} 81 | disableCaBundle: false 82 | customCertSecretPath: {} 83 | safeToEvict: false 84 | persistentLogs: true 85 | persistentLogsSizeLimitMB: 10000 86 | logLevel: info 87 | numWorkers: 100 88 | apiServerQPS: 400 89 | apiServerQPSBurst: 500 90 | http: 91 | port: 8080 92 | healthCheck: 93 | initialDelaySeconds: 3 94 | periodSeconds: 3 95 | failureThreshold: 3 96 | timeoutSeconds: 1 97 | allocationBatchWaitTime: 500ms 98 | extensions: 99 | <<: *controllerValues 100 | pdb: 101 | minAvailable: 1 102 | replicas: 2 103 | ping: 104 | install: true 105 | pdb: 106 | enabled: false 107 | minAvailable: 1 108 | updateStrategy: {} 109 | resources: {} 110 | # requests: 111 | # cpu: 1 112 | # memory: 256Mi 113 | nodeSelector: 114 | cloud.google.com/gke-nodepool: default-pool 115 | annotations: {} 116 | tolerations: 117 | - key: "agones.dev/agones-system" 118 | operator: "Equal" 119 | value: "true" 120 | effect: "NoExecute" 121 | affinity: 122 | nodeAffinity: 123 | preferredDuringSchedulingIgnoredDuringExecution: 124 | - weight: 1 125 | preference: 126 | matchExpressions: 127 | - key: agones.dev/agones-system 128 | operator: Exists 129 | replicas: 2 130 | http: 131 | expose: true 132 | response: ok 133 | port: 80 134 | serviceType: LoadBalancer 135 | loadBalancerIP: "" 136 | loadBalancerSourceRanges: [] 137 | annotations: {} 138 | udp: 139 | expose: true 140 | rateLimit: 20 141 | port: 50000 142 | serviceType: LoadBalancer 143 | loadBalancerIP: "" 144 | loadBalancerSourceRanges: [] 145 | annotations: {} 146 | healthCheck: 147 | initialDelaySeconds: 3 148 | periodSeconds: 3 149 | failureThreshold: 3 150 | timeoutSeconds: 1 151 | allocator: 152 | install: true 153 | pdb: 154 | enabled: false 155 | minAvailable: 1 156 | updateStrategy: {} 157 | apiServerQPS: 400 158 | apiServerQPSBurst: 500 159 | logLevel: info 160 | annotations: {} 161 | resources: {} 162 | # requests: 163 | # cpu: 1 164 | # memory: 256Mi 165 | nodeSelector: 166 | cloud.google.com/gke-nodepool: default-pool 167 | healthCheck: 168 | initialDelaySeconds: 3 169 | periodSeconds: 3 170 | failureThreshold: 3 171 | timeoutSeconds: 1 172 | tolerations: 173 | - key: "agones.dev/agones-system" 174 | operator: "Equal" 175 | value: "true" 176 | effect: "NoExecute" 177 | affinity: 178 | nodeAffinity: 179 | preferredDuringSchedulingIgnoredDuringExecution: 180 | - weight: 1 181 | preference: 182 | matchExpressions: 183 | - key: agones.dev/agones-system 184 | operator: Exists 185 | replicas: 3 186 | service: 187 | name: agones-allocator 188 | serviceType: LoadBalancer 189 | loadBalancerIP: "" 190 | loadBalancerSourceRanges: [] 191 | annotations: {} 192 | http: 193 | enabled: true 194 | port: 443 195 | portName: https 196 | targetPort: 8443 197 | nodePort: 0 # nodePort will be used if the serviceType is set to NodePort 198 | grpc: 199 | enabled: true 200 | port: 443 201 | portName: grpc 202 | targetPort: 8443 203 | nodePort: 0 # nodePort will be used if the serviceType is set to NodePort 204 | serviceMetrics: 205 | name: agones-allocator-metrics-service 206 | annotations: {} 207 | http: 208 | enabled: true 209 | port: 8080 210 | portName: http 211 | disableSecretCreation: false 212 | generateTLS: true 213 | tlsCert: "" 214 | tlsKey: "" 215 | generateClientTLS: true 216 | clientCAs: {} 217 | disableMTLS: true 218 | disableTLS: true 219 | remoteAllocationTimeout: 10s 220 | totalRemoteAllocationTimeout: 30s 221 | allocationBatchWaitTime: 500ms 222 | image: 223 | registry: us-docker.pkg.dev/agones-images/release 224 | tag: 1.30.0 225 | controller: 226 | name: agones-controller 227 | pullPolicy: IfNotPresent 228 | # extensions settings ignored unless `SplitControllerAndExtensions` feature gate is enabled 229 | extensions: 230 | name: agones-extensions 231 | pullPolicy: IfNotPresent 232 | sdk: 233 | name: agones-sdk 234 | cpuRequest: 30m 235 | cpuLimit: 0 236 | memoryRequest: 0 237 | memoryLimit: 0 238 | alwaysPull: false 239 | ping: 240 | name: agones-ping 241 | pullPolicy: IfNotPresent 242 | allocator: 243 | name: agones-allocator 244 | pullPolicy: IfNotPresent 245 | 246 | gameservers: 247 | namespaces: 248 | - default 249 | minPort: 7000 250 | maxPort: 8000 251 | podPreserveUnknownFields: false 252 | 253 | helm: 254 | installTests: false --------------------------------------------------------------------------------