├── .DS_Store
├── .gitattributes
├── CONTRIBUTING.md
├── LICENSE
├── README.md
├── cns.json
├── install-guides
    ├── NGC_Registry_Guide_v1.0.md
    ├── RHEL-8-10_Server_x86-arm64.md
    ├── Ubuntu-22-04_Server_Developer-x86-arm64_v13.0.md
    ├── Ubuntu-22-04_Server_Developer-x86-arm64_v13.1.md
    ├── Ubuntu-22-04_Server_Developer-x86-arm64_v13.2.md
    ├── Ubuntu-22-04_Server_Developer-x86-arm64_v13.3.md
    ├── Ubuntu-22-04_Server_Developer-x86-arm64_v14.0.md
    ├── Ubuntu-22-04_Server_Developer-x86-arm64_v14.1.md
    ├── Ubuntu-22-04_Server_Developer-x86-arm64_v15.0.md
    ├── Ubuntu-22-04_Server_x86-arm64_v13.0.md
    ├── Ubuntu-22-04_Server_x86-arm64_v13.1.md
    ├── Ubuntu-22-04_Server_x86-arm64_v13.2.md
    ├── Ubuntu-22-04_Server_x86-arm64_v13.3.md
    ├── Ubuntu-22-04_Server_x86-arm64_v14.0.md
    ├── Ubuntu-22-04_Server_x86-arm64_v14.1.md
    ├── Ubuntu-24-04_Server_x86-arm64_v15.0.md
    ├── readme.md
    └── screenshots
    │   ├── AWS_Add_Storage.png
    │   ├── AWS_Add_Tags.png
    │   ├── AWS_Choose_AMI.png
    │   ├── AWS_Choose_Instance_type.png
    │   ├── AWS_Choose_Instance_type1.png
    │   ├── AWS_Configure_Instance_details.png
    │   ├── AWS_Configure_Security_Group.png
    │   ├── AWS_Launch_instance.png
    │   ├── AWS_Review_Instance_Config.png
    │   ├── Deepstream.png
    │   ├── ngc_app_catalog.png
    │   ├── ngc_check_email.png
    │   ├── ngc_create_account.png
    │   ├── ngc_create_account_2.png
    │   ├── ngc_deepstream_app.png
    │   ├── ngc_generate_API.png
    │   ├── ngc_helm_chart.png
    │   ├── ngc_org_name.png
    │   ├── ngc_set_password.png
    │   ├── ngc_team_name.png
    │   └── ngc_verify_email.png
├── playbooks
    ├── add-ons.yaml
    ├── cns-installation.yaml
    ├── cns-uninstall.yaml
    ├── cns-upgrade.yaml
    ├── cns-validation.yaml
    ├── cns_cc_bios.yaml
    ├── cns_values.yaml
    ├── cns_values_13.0.yaml
    ├── cns_values_13.1.yaml
    ├── cns_values_13.2.yaml
    ├── cns_values_13.3.yaml
    ├── cns_values_14.0.yaml
    ├── cns_values_14.1.yaml
    ├── cns_values_15.0.yaml
    ├── cns_version.yaml
    ├── files
    │   ├── aws_credentials
    │   ├── conf_compu_snp_install.sh
    │   ├── config.toml
    │   ├── csp_install.yaml
    │   ├── csp_uninstall.yaml
    │   ├── csp_values.yaml
    │   ├── fluent-values.yaml
    │   ├── grafana.yaml
    │   ├── gridd.conf
    │   ├── kube-prometheus-stack.values
    │   ├── mellanox-test.yaml
    │   ├── network-operator-value.yaml
    │   ├── network-operator-values.yaml
    │   ├── networkdefinition.yaml
    │   ├── nic-cluster-policy.yaml
    │   ├── nvidia-vgpu-driver.yaml
    │   ├── redfish.py
    │   └── resourcequota.yaml
    ├── gpu_operator.yaml
    ├── guides
    │   └── Cloud_Guide.md
    ├── hosts
    ├── k8s-install.yaml
    ├── microk8s.yaml
    ├── nvidia-docker.yaml
    ├── nvidia-driver.yaml
    ├── older_versions
    │   ├── Ubuntu_Server_v1.1.md
    │   ├── Ubuntu_Server_v1.2.md
    │   ├── Ubuntu_Server_v1.3.md
    │   ├── Ubuntu_Server_v2.0.md
    │   ├── Ubuntu_Server_v3.0.md
    │   ├── Ubuntu_Server_v3.1.md
    │   ├── Ubuntu_Server_v4.0.md
    │   ├── Ubuntu_Server_v4.1.md
    │   ├── Ubuntu_Server_v4.2.md
    │   ├── cnc-installation.yaml
    │   ├── cnc_values_5.2.yaml
    │   ├── cnc_values_6.0.yaml
    │   ├── cnc_values_6.1.yaml
    │   ├── cnc_values_6.2.yaml
    │   ├── cnc_values_6.3.yaml
    │   ├── cnc_values_6.4.yaml
    │   ├── cnc_values_7.0.yaml
    │   ├── cnc_values_7.1.yaml
    │   ├── cnc_values_7.2.yaml
    │   ├── cnc_values_7.3.yaml
    │   ├── cnc_values_7.4.yaml
    │   ├── cnc_values_7.5.yaml
    │   ├── cns_values_10.0.yaml
    │   ├── cns_values_10.1.yaml
    │   ├── cns_values_10.2.yaml
    │   ├── cns_values_10.3.yaml
    │   ├── cns_values_10.4.yaml
    │   ├── cns_values_10.5.yaml
    │   ├── cns_values_11.0.yaml
    │   ├── cns_values_11.1.yaml
    │   ├── cns_values_11.2.yaml
    │   ├── cns_values_11.3.yaml
    │   ├── cns_values_12.0.yaml
    │   ├── cns_values_12.1.yaml
    │   ├── cns_values_12.2.yaml
    │   ├── cns_values_12.3.yaml
    │   ├── cns_values_6.4.yaml
    │   ├── cns_values_8.0.yaml
    │   ├── cns_values_8.1.yaml
    │   ├── cns_values_8.2.yaml
    │   ├── cns_values_8.3.yaml
    │   ├── cns_values_8.4.yaml
    │   ├── cns_values_8.5.yaml
    │   ├── cns_values_9.0.yaml
    │   ├── cns_values_9.1.yaml
    │   ├── cns_values_9.2.yaml
    │   ├── cns_values_9.3.yaml
    │   ├── cns_values_9.4.yaml
    │   ├── prerequisites.yaml
    │   └── readme.md
    ├── operators-install.yaml
    ├── operators-upgrade.yaml
    ├── prerequisites.yaml
    ├── readme.md
    ├── setup.sh
    └── templates
    │   ├── kubeadm-init-config.template
    │   ├── kubeadm-join.template
    │   └── metal-lb.template
└── troubleshooting
    ├── README.md
    ├── dns.md
    └── driver-container-logs.png


/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVIDIA/cloud-native-stack/ab595ce308627d8ec0e372c34e765795e52bf367/.DS_Store


--------------------------------------------------------------------------------
/.gitattributes:
--------------------------------------------------------------------------------
1 | ** linguist-detectable=true
2 | 


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
 1 | # Contribute to the NVIDIA Cloud Native Core
 2 | 
 3 | Want to hack on the NVIDIA Cloud Native Core Project? Awesome!
 4 | We only require you to sign your work, the below section describes this!
 5 | 
 6 | ## Sign your work
 7 | 
 8 | The sign-off is a simple line at the end of the explanation for the patch. Your
 9 | signature certifies that you wrote the patch or otherwise have the right to pass
10 | it on as an open-source patch. The rules are pretty simple: if you can certify
11 | the below (from [developercertificate.org](http://developercertificate.org/)):
12 | 
13 | ```
14 | Developer Certificate of Origin
15 | Version 1.1
16 | 
17 | Copyright (C) 2004, 2006 The Linux Foundation and its contributors.
18 | 1 Letterman Drive
19 | Suite D4700
20 | San Francisco, CA, 94129
21 | 
22 | Everyone is permitted to copy and distribute verbatim copies of this
23 | license document, but changing it is not allowed.
24 | 
25 | Developer's Certificate of Origin 1.1
26 | 
27 | By making a contribution to this project, I certify that:
28 | 
29 | (a) The contribution was created in whole or in part by me and I
30 |     have the right to submit it under the open source license
31 |     indicated in the file; or
32 | 
33 | (b) The contribution is based upon previous work that, to the best
34 |     of my knowledge, is covered under an appropriate open source
35 |     license and I have the right under that license to submit that
36 |     work with modifications, whether created in whole or in part
37 |     by me, under the same open source license (unless I am
38 |     permitted to submit under a different license), as indicated
39 |     in the file; or
40 | 
41 | (c) The contribution was provided directly to me by some other
42 |     person who certified (a), (b) or (c) and I have not modified
43 |     it.
44 | 
45 | (d) I understand and agree that this project and the contribution
46 |     are public and that a record of the contribution (including all
47 |     personal information I submit with it, including my sign-off) is
48 |     maintained indefinitely and may be redistributed consistent with
49 |     this project or the open source license(s) involved.
50 | ```
51 | 
52 | Then you just add a line to every git commit message:
53 | 
54 |     Signed-off-by: Joe Smith <joe.smith@email.com>
55 | 
56 | Use your real name (sorry, no pseudonyms or anonymous contributions.)
57 | 
58 | If you set your `user.name` and `user.email` git configs, you can sign your
59 | commit automatically with `git commit -s`.
60 | 
61 | 


--------------------------------------------------------------------------------
/install-guides/readme.md:
--------------------------------------------------------------------------------
 1 | ## NVIDIA Cloud Native Stack Install Guides
 2 | 
 3 | The following NVIDIA Cloud Native Stack Install Guides are available.
 4 | ### Ubuntu Systems
 5 | - [Ubuntu 22.04 Server x86 & arm64 v13.0](https://github.com/NVIDIA/cloud-native-stack/blob/master/install-guides/Ubuntu-22-04_Server_x86-arm64_v13.0.md)
 6 | - [Ubuntu 22.04 Server x86 & arm64 v13.1](https://github.com/NVIDIA/cloud-native-stack/blob/master/install-guides/Ubuntu-22-04_Server_x86-arm64_v13.1.md)
 7 | - [Ubuntu 22.04 Server x86 & arm64 v13.2](https://github.com/NVIDIA/cloud-native-stack/blob/master/install-guides/Ubuntu-22-04_Server_x86-arm64_v13.2.md)
 8 | - [Ubuntu 22.04 Server x86 & arm64 v13.3](https://github.com/NVIDIA/cloud-native-stack/blob/master/install-guides/Ubuntu-22-04_Server_x86-arm64_v13.3.md)
 9 | - [Ubuntu 22.04 Server x86 & arm64 v14.0](https://github.com/NVIDIA/cloud-native-stack/blob/master/install-guides/Ubuntu-22-04_Server_x86-arm64_v14.0.md)
10 | - [Ubuntu 22.04 Server x86 & arm64 v14.1](https://github.com/NVIDIA/cloud-native-stack/blob/master/install-guides/Ubuntu-22-04_Server_x86-arm64_v14.1.md)
11 | - [Ubuntu 24.04 Server x86 & arm64 v15.0](https://github.com/NVIDIA/cloud-native-stack/blob/master/install-guides/Ubuntu-24-04_Server_x86-arm64_v15.0.md)
12 | 
13 | ### RedHat Enterprise Linux(RHEL) Systems
14 | -  [RHEL 8.10 Server x86 & arm64](https://github.com/NVIDIA/cloud-native-stack/blob/master/install-guides/RHEL-8-10_Server_x86-arm64.md)
15 | 
16 | ### Ubuntu Server for Developers
17 | - [Ubuntu 22.04 Server Developer x86 & arm64 v13.0](https://github.com/NVIDIA/cloud-native-stack/blob/master/install-guides/Ubuntu-22-04_Server_Developer_x86-arm64_v13.0.md)
18 | - [Ubuntu 22.04 Server Developer x86 & arm64 v13.1](https://github.com/NVIDIA/cloud-native-stack/blob/master/install-guides/Ubuntu-22-04_Server_Developer_x86-arm64_v13.1.md)
19 | - [Ubuntu 22.04 Server Developer x86 & arm64 v13.2](https://github.com/NVIDIA/cloud-native-stack/blob/master/install-guides/Ubuntu-22-04_Server_Developer_x86-arm64_v13.2.md)
20 | - [Ubuntu 22.04 Server Developer x86 & arm64 v13.3](https://github.com/NVIDIA/cloud-native-stack/blob/master/install-guides/Ubuntu-22-04_Server_Developer_x86-arm64_v13.3.md)
21 | - [Ubuntu 22.04 Server Developer x86 & arm64 v14.0](https://github.com/NVIDIA/cloud-native-stack/blob/master/install-guides/Ubuntu-22-04_Server_Developer_x86-arm64_v14.0.md)
22 | - [Ubuntu 22.04 Server Developer x86 & arm64 v14.1](https://github.com/NVIDIA/cloud-native-stack/blob/master/install-guides/Ubuntu-22-04_Server_Developer_x86-arm64_v14.1.md)
23 | - [Ubuntu 24.04 Server Developer x86 & arm64 v15.0](https://github.com/NVIDIA/cloud-native-stack/blob/master/install-guides/Ubuntu-22-04_Server_Developer_x86-arm64_v14.0.md)
24 | 
25 | `NOTE`
26 | A list of older NVIDIA Cloud Native Stack versions (formerly known as Cloud Native Core) can be found [here](https://github.com/NVIDIA/cloud-native-stack/blob/master/install-guides/older_versions/readme.md)
27 | 


--------------------------------------------------------------------------------
/install-guides/screenshots/AWS_Add_Storage.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVIDIA/cloud-native-stack/ab595ce308627d8ec0e372c34e765795e52bf367/install-guides/screenshots/AWS_Add_Storage.png


--------------------------------------------------------------------------------
/install-guides/screenshots/AWS_Add_Tags.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVIDIA/cloud-native-stack/ab595ce308627d8ec0e372c34e765795e52bf367/install-guides/screenshots/AWS_Add_Tags.png


--------------------------------------------------------------------------------
/install-guides/screenshots/AWS_Choose_AMI.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVIDIA/cloud-native-stack/ab595ce308627d8ec0e372c34e765795e52bf367/install-guides/screenshots/AWS_Choose_AMI.png


--------------------------------------------------------------------------------
/install-guides/screenshots/AWS_Choose_Instance_type.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVIDIA/cloud-native-stack/ab595ce308627d8ec0e372c34e765795e52bf367/install-guides/screenshots/AWS_Choose_Instance_type.png


--------------------------------------------------------------------------------
/install-guides/screenshots/AWS_Choose_Instance_type1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVIDIA/cloud-native-stack/ab595ce308627d8ec0e372c34e765795e52bf367/install-guides/screenshots/AWS_Choose_Instance_type1.png


--------------------------------------------------------------------------------
/install-guides/screenshots/AWS_Configure_Instance_details.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVIDIA/cloud-native-stack/ab595ce308627d8ec0e372c34e765795e52bf367/install-guides/screenshots/AWS_Configure_Instance_details.png


--------------------------------------------------------------------------------
/install-guides/screenshots/AWS_Configure_Security_Group.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVIDIA/cloud-native-stack/ab595ce308627d8ec0e372c34e765795e52bf367/install-guides/screenshots/AWS_Configure_Security_Group.png


--------------------------------------------------------------------------------
/install-guides/screenshots/AWS_Launch_instance.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVIDIA/cloud-native-stack/ab595ce308627d8ec0e372c34e765795e52bf367/install-guides/screenshots/AWS_Launch_instance.png


--------------------------------------------------------------------------------
/install-guides/screenshots/AWS_Review_Instance_Config.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVIDIA/cloud-native-stack/ab595ce308627d8ec0e372c34e765795e52bf367/install-guides/screenshots/AWS_Review_Instance_Config.png


--------------------------------------------------------------------------------
/install-guides/screenshots/Deepstream.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVIDIA/cloud-native-stack/ab595ce308627d8ec0e372c34e765795e52bf367/install-guides/screenshots/Deepstream.png


--------------------------------------------------------------------------------
/install-guides/screenshots/ngc_app_catalog.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVIDIA/cloud-native-stack/ab595ce308627d8ec0e372c34e765795e52bf367/install-guides/screenshots/ngc_app_catalog.png


--------------------------------------------------------------------------------
/install-guides/screenshots/ngc_check_email.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVIDIA/cloud-native-stack/ab595ce308627d8ec0e372c34e765795e52bf367/install-guides/screenshots/ngc_check_email.png


--------------------------------------------------------------------------------
/install-guides/screenshots/ngc_create_account.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVIDIA/cloud-native-stack/ab595ce308627d8ec0e372c34e765795e52bf367/install-guides/screenshots/ngc_create_account.png


--------------------------------------------------------------------------------
/install-guides/screenshots/ngc_create_account_2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVIDIA/cloud-native-stack/ab595ce308627d8ec0e372c34e765795e52bf367/install-guides/screenshots/ngc_create_account_2.png


--------------------------------------------------------------------------------
/install-guides/screenshots/ngc_deepstream_app.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVIDIA/cloud-native-stack/ab595ce308627d8ec0e372c34e765795e52bf367/install-guides/screenshots/ngc_deepstream_app.png


--------------------------------------------------------------------------------
/install-guides/screenshots/ngc_generate_API.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVIDIA/cloud-native-stack/ab595ce308627d8ec0e372c34e765795e52bf367/install-guides/screenshots/ngc_generate_API.png


--------------------------------------------------------------------------------
/install-guides/screenshots/ngc_helm_chart.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVIDIA/cloud-native-stack/ab595ce308627d8ec0e372c34e765795e52bf367/install-guides/screenshots/ngc_helm_chart.png


--------------------------------------------------------------------------------
/install-guides/screenshots/ngc_org_name.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVIDIA/cloud-native-stack/ab595ce308627d8ec0e372c34e765795e52bf367/install-guides/screenshots/ngc_org_name.png


--------------------------------------------------------------------------------
/install-guides/screenshots/ngc_set_password.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVIDIA/cloud-native-stack/ab595ce308627d8ec0e372c34e765795e52bf367/install-guides/screenshots/ngc_set_password.png


--------------------------------------------------------------------------------
/install-guides/screenshots/ngc_team_name.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVIDIA/cloud-native-stack/ab595ce308627d8ec0e372c34e765795e52bf367/install-guides/screenshots/ngc_team_name.png


--------------------------------------------------------------------------------
/install-guides/screenshots/ngc_verify_email.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVIDIA/cloud-native-stack/ab595ce308627d8ec0e372c34e765795e52bf367/install-guides/screenshots/ngc_verify_email.png


--------------------------------------------------------------------------------
/playbooks/cns_values.yaml:
--------------------------------------------------------------------------------
  1 | cns_version: 14.1
  2 | 
  3 | ## MicroK8s cluster
  4 | microk8s: no
  5 | ## Kubernetes Install with Kubeadm
  6 | install_k8s: yes
  7 | 
  8 | ## Components Versions
  9 | # Container Runtime options are containerd, cri-o, cri-dockerd
 10 | container_runtime: "containerd"
 11 | containerd_version: "2.0.3"
 12 | runc_version: "1.2.6"
 13 | cni_plugins_version: "1.6.2"
 14 | containerd_max_concurrent_downloads: "5"
 15 | nvidia_container_toolkit_version: "1.17.5"
 16 | crio_version: "1.31.6"
 17 | cri_dockerd_version: "0.3.16"
 18 | k8s_version: "1.31.6"
 19 | calico_version: "3.29.3"
 20 | flannel_version: "0.25.6"
 21 | helm_version: "3.17.2"
 22 | gpu_operator_version: "25.3.0"
 23 | network_operator_version: "25.1.0"
 24 | nim_operator_version: "1.0.1"
 25 | local_path_provisioner: "0.0.31"
 26 | nfs_provisioner: "4.0.18"
 27 | metallb_version: "0.14.9"
 28 | kserve_version: "0.15"
 29 | prometheus_stack: "70.3.0"
 30 | prometheus_adapter: "4.13.0"
 31 | grafana_operator: "v5.17.0"
 32 | elastic_stack: "8.17.4"
 33 | lws_version: "0.5.1"
 34 | 
 35 | # GPU Operator Values
 36 | enable_gpu_operator: yes
 37 | confidential_computing: no
 38 | gpu_driver_version: "570.124.06"
 39 | use_open_kernel_module: no
 40 | enable_mig: no
 41 | mig_profile: all-disabled
 42 | mig_strategy: single
 43 | # To use GDS, use_open_kernel_module needs to be enabled
 44 | enable_gds: no
 45 | #Secure Boot for only Ubuntu
 46 | enable_secure_boot: no
 47 | enable_vgpu: no
 48 | vgpu_license_server: ""
 49 | # URL of Helm repo to be added. If using NGC get this from the fetch command in the console
 50 | helm_repository: "https://helm.ngc.nvidia.com/nvidia"
 51 | # Name of the helm chart to be deployed
 52 | gpu_operator_helm_chart: nvidia/gpu-operator
 53 | ## This is most likely GPU Operator Driver Registry
 54 | gpu_operator_driver_registry: "nvcr.io/nvidia"
 55 | 
 56 | # NGC Values
 57 | ## If using a private/protected registry. NGC API Key. Leave blank for public registries
 58 | ngc_registry_password: ""
 59 | ## This is most likely an NGC email
 60 | ngc_registry_email: ""
 61 | ngc_registry_username: "$oauthtoken"
 62 | 
 63 | # Network Operator Values
 64 | ## If the Network Operator is yes then make sure enable_rdma as well yes
 65 | enable_network_operator: no
 66 | ## Enable RDMA yes for NVIDIA Certification
 67 | enable_rdma: no
 68 | ## Enable for MLNX-OFED Driver Deployment
 69 | deploy_ofed: no
 70 | 
 71 | # Prxoy Configuration
 72 | proxy: no
 73 | http_proxy: ""
 74 | https_proxy: ""
 75 | 
 76 | # Cloud Native Stack for Developers Values
 77 | ## Enable for Cloud Native Stack Developers
 78 | cns_docker: no
 79 | ## Enable For Cloud Native Stack Developers with TRD Driver
 80 | cns_nvidia_driver: no
 81 | nvidia_driver_mig: no
 82 | 
 83 | ## Kubernetes resources
 84 | k8s_apt_key: "https://pkgs.k8s.io/core:/stable:/v1.31/deb/Release.key"
 85 | k8s_gpg_key: "https://pkgs.k8s.io/core:/stable:/v1.31/rpm/repodata/repomd.xml.key"
 86 | k8s_apt_ring: "/etc/apt/keyrings/kubernetes-apt-keyring.gpg"
 87 | k8s_registry: "registry.k8s.io"
 88 | 
 89 | # Install NVIDIA NIM Operator
 90 | enable_nim_operator: yes
 91 | 
 92 | # LeaderWorkerSet https://github.com/kubernetes-sigs/lws/tree/main
 93 | lws: no
 94 | 
 95 | # Local Path Provisioner and NFS Provisoner as Storage option
 96 | storage: no
 97 | 
 98 | # Monitoring Stack Prometheus/Grafana with GPU Metrics and Elastic Logging stack
 99 | monitoring: no
100 | 
101 | # Enable Kserve on Cloud Native Stack with Istio and Cert-Manager
102 | kserve: no
103 | 
104 | # Install MetalLB
105 | loadbalancer: no
106 | # Example input loadbalancer_ip: "10.78.17.85/32"
107 | loadbalancer_ip: ""
108 | 
109 | ## Cloud Native Stack Validation
110 | cns_validation: no
111 | 
112 | # BMC Details for Confidential Computing
113 | bmc_ip:
114 | bmc_username:
115 | bmc_password:
116 | 
117 | # CSP values
118 | ## AWS EKS values
119 | aws_region: us-east-2
120 | aws_cluster_name: cns-cluster-1
121 | aws_gpu_instance_type: g4dn.2xlarge
122 | 
123 | ## Google Cloud GKE Values
124 | #https://cloud.google.com/resource-manager/docs/creating-managing-projects#identifying_projects
125 | gke_project_id:
126 | #https://cloud.google.com/compute/docs/regions-zones#available
127 | gke_region: us-west1
128 | gke_node_zones: ["us-west1-b"]
129 | gke_cluster_name: gke-cluster-1
130 | 
131 | ## Azure AKS Values
132 | aks_cluster_name: aks-cluster-1
133 | #https://azure.microsoft.com/en-us/explore/global-infrastructure/geographies/#geographies
134 | aks_cluster_location: "West US 2"
135 | #https://learn.microsoft.com/en-us/partner-center/marketplace/find-tenant-object-id
136 | azure_object_id: [""]


--------------------------------------------------------------------------------
/playbooks/cns_values_13.0.yaml:
--------------------------------------------------------------------------------
  1 | cns_version: 13.0
  2 | 
  3 | ## MicroK8s cluster
  4 | microk8s: no
  5 | ## Kubernetes Install with Kubeadm
  6 | install_k8s: yes
  7 | 
  8 | ## Components Versions
  9 | # Container Runtime options are containerd, cri-o, cri-dockerd
 10 | container_runtime: "containerd"
 11 | containerd_version: "1.7.16"
 12 | runc_version: "1.1.12"
 13 | cni_plugins_version: "1.4.1"
 14 | containerd_max_concurrent_downloads: "5"
 15 | nvidia_container_toolkit_version: "1.15.0"
 16 | crio_version: "1.30.0"
 17 | cri_dockerd_version: "0.3.13"
 18 | k8s_version: "1.30.0"
 19 | calico_version: "3.27.3"
 20 | flannel_version: "0.25.1"
 21 | helm_version: "3.14.4"
 22 | gpu_operator_version: "24.6.2"
 23 | network_operator_version: "24.1.1"
 24 | nim_operator_version: "1.0.0"
 25 | local_path_provisioner: "0.0.26"
 26 | nfs_provisioner: "4.0.18"
 27 | metallb_version: "0.14.5"
 28 | kserve_version: "0.13"
 29 | prometheus_stack: "67.5.0"
 30 | prometheus_adapter: "4.11.0"
 31 | grafana_operator: "v5.15.1"
 32 | elastic_stack: "8.14.1"
 33 | lws_version: "0.4.0"
 34 | 
 35 | # GPU Operator Values
 36 | enable_gpu_operator: yes
 37 | confidential_computing: no
 38 | gpu_driver_version: "550.54.15"
 39 | use_open_kernel_module: no
 40 | enable_mig: no
 41 | mig_profile: all-disabled
 42 | mig_strategy: single
 43 | # To use GDS, use_open_kernel_module needs to be enabled
 44 | enable_gds: no
 45 | #Secure Boot for only Ubuntu
 46 | enable_secure_boot: no
 47 | enable_cdi: no
 48 | enable_vgpu: no
 49 | vgpu_license_server: ""
 50 | # URL of Helm repo to be added. If using NGC get this from the fetch command in the console
 51 | helm_repository: https://helm.ngc.nvidia.com/nvidia
 52 | # Name of the helm chart to be deployed
 53 | gpu_operator_helm_chart: nvidia/gpu-operator
 54 | ## This is most likely GPU Operator Driver Registry
 55 | gpu_operator_driver_registry: "nvcr.io/nvidia"
 56 | 
 57 | # NGC Values
 58 | ## If using a private/protected registry. NGC API Key. Leave blank for public registries
 59 | ngc_registry_password: ""
 60 | ## This is most likely an NGC email
 61 | ngc_registry_email: ""
 62 | ngc_registry_username: "$oauthtoken"
 63 | 
 64 | # Network Operator Values
 65 | ## If the Network Operator is yes then make sure enable_rdma as well yesgit br
 66 | enable_network_operator: no
 67 | ## Enable RDMA yes for NVIDIA Certification
 68 | enable_rdma: no
 69 | ## Enable for MLNX-OFED Driver Deployment
 70 | deploy_ofed: no
 71 | 
 72 | # Prxoy Configuration
 73 | proxy: no
 74 | http_proxy: ""
 75 | https_proxy: ""
 76 | 
 77 | # Cloud Native Stack for Developers Values
 78 | ## Enable for Cloud Native Stack Developers
 79 | cns_docker: no
 80 | ## Enable For Cloud Native Stack Developers with TRD Driver
 81 | cns_nvidia_driver: no
 82 | nvidia_driver_mig: no
 83 | 
 84 | ## Kubernetes resources
 85 | k8s_apt_key: "https://pkgs.k8s.io/core:/stable:/v1.30/deb/Release.key"
 86 | k8s_gpg_key: "https://pkgs.k8s.io/core:/stable:/v1.30/rpm/repodata/repomd.xml.key"
 87 | k8s_apt_ring: "/etc/apt/keyrings/kubernetes-apt-keyring.gpg"
 88 | k8s_registry: "registry.k8s.io"
 89 | 
 90 | # Install NVIDIA NIM Operator
 91 | enable_nim_operator: no
 92 | 
 93 | # LeaderWorkerSet https://github.com/kubernetes-sigs/lws/tree/main
 94 | lws: no
 95 | 
 96 | # Local Path Provisioner and NFS Provisoner as Storage option
 97 | storage: no
 98 | 
 99 | # Monitoring Stack Prometheus/Grafana with GPU Metrics and Elastic Logging stack
100 | monitoring: no
101 | 
102 | # Enable Kserve on Cloud Native Stack with Istio and Cert-Manager
103 | kserve: no
104 | 
105 | # Install MetalLB
106 | loadbalancer: no
107 | # Example input loadbalancer_ip: "10.117.20.50/32", it could be node/host IP
108 | loadbalancer_ip: ""
109 | 
110 | ## Cloud Native Stack Validation
111 | cns_validation: no
112 | 
113 | # BMC Details for Confidential Computing
114 | bmc_ip:
115 | bmc_username:
116 | bmc_password:
117 | 
118 | # CSP values
119 | ## AWS EKS values
120 | aws_region: us-east-2
121 | aws_cluster_name: cns-cluster-1
122 | aws_gpu_instance_type: g4dn.2xlarge
123 | 
124 | ## Google Cloud GKE Values
125 | #https://cloud.google.com/resource-manager/docs/creating-managing-projects#identifying_projects
126 | gke_project_id:
127 | #https://cloud.google.com/compute/docs/regions-zones#available
128 | gke_region: us-west1
129 | gke_node_zones: ["us-west1-b"]
130 | gke_cluster_name: gke-cluster-1
131 | 
132 | ## Azure AKS Values
133 | aks_cluster_name: aks-cluster-1
134 | #https://azure.microsoft.com/en-us/explore/global-infrastructure/geographies/#geographies
135 | aks_cluster_location: "West US 2"
136 | #https://learn.microsoft.com/en-us/partner-center/marketplace/find-tenant-object-id
137 | azure_object_id: [""]


--------------------------------------------------------------------------------
/playbooks/cns_values_13.1.yaml:
--------------------------------------------------------------------------------
  1 | cns_version: 13.1
  2 | 
  3 | ## MicroK8s cluster
  4 | microk8s: no
  5 | ## Kubernetes Install with Kubeadm
  6 | install_k8s: yes
  7 | 
  8 | ## Components Versions
  9 | # Container Runtime options are containerd, cri-o, cri-dockerd
 10 | container_runtime: "containerd"
 11 | containerd_version: "1.7.20"
 12 | runc_version: "1.1.13"
 13 | cni_plugins_version: "1.5.1"
 14 | containerd_max_concurrent_downloads: "5"
 15 | nvidia_container_toolkit_version: "1.16.2"
 16 | crio_version: "1.30.2"
 17 | cri_dockerd_version: "0.3.15"
 18 | k8s_version: "1.30.2"
 19 | calico_version: "3.27.4"
 20 | flannel_version: "0.25.5"
 21 | helm_version: "3.15.3"
 22 | gpu_operator_version: "24.6.2"
 23 | network_operator_version: "24.4.1"
 24 | nim_operator_version: "1.0.0"
 25 | local_path_provisioner: "0.0.26"
 26 | nfs_provisioner: "4.0.18"
 27 | metallb_version: "0.14.5"
 28 | kserve_version: "0.13"
 29 | prometheus_stack: "67.5.0"
 30 | prometheus_adapter: "4.11.0"
 31 | grafana_operator: "v5.15.1"
 32 | elastic_stack: "8.14.1"
 33 | lws_version: "0.4.0"
 34 | 
 35 | # GPU Operator Values
 36 | enable_gpu_operator: yes
 37 | confidential_computing: no
 38 | gpu_driver_version: "550.90.07"
 39 | use_open_kernel_module: no
 40 | enable_mig: no
 41 | mig_profile: all-disabled
 42 | mig_strategy: single
 43 | # To use GDS, use_open_kernel_module needs to be enabled
 44 | enable_gds: no
 45 | #Secure Boot for only Ubuntu
 46 | enable_secure_boot: no
 47 | enable_cdi: no
 48 | enable_vgpu: no
 49 | vgpu_license_server: ""
 50 | # URL of Helm repo to be added. If using NGC get this from the fetch command in the console
 51 | helm_repository: https://helm.ngc.nvidia.com/nvidia
 52 | # Name of the helm chart to be deployed
 53 | gpu_operator_helm_chart: nvidia/gpu-operator
 54 | ## This is most likely GPU Operator Driver Registry
 55 | gpu_operator_driver_registry: "nvcr.io/nvidia"
 56 | 
 57 | # NGC Values
 58 | ## If using a private/protected registry. NGC API Key. Leave blank for public registries
 59 | ngc_registry_password: ""
 60 | ## This is most likely an NGC email
 61 | ngc_registry_email: ""
 62 | ngc_registry_username: "$oauthtoken"
 63 | 
 64 | # Network Operator Values
 65 | ## If the Network Operator is yes then make sure enable_rdma as well yes
 66 | enable_network_operator: no
 67 | ## Enable RDMA yes for NVIDIA Certification
 68 | enable_rdma: no
 69 | ## Enable for MLNX-OFED Driver Deployment
 70 | deploy_ofed: no
 71 | 
 72 | # Prxoy Configuration
 73 | proxy: no
 74 | http_proxy: ""
 75 | https_proxy: ""
 76 | 
 77 | # Cloud Native Stack for Developers Values
 78 | ## Enable for Cloud Native Stack Developers
 79 | cns_docker: no
 80 | ## Enable For Cloud Native Stack Developers with TRD Driver
 81 | cns_nvidia_driver: no
 82 | nvidia_driver_mig: no
 83 | 
 84 | ## Kubernetes resources
 85 | k8s_apt_key: "https://pkgs.k8s.io/core:/stable:/v1.30/deb/Release.key"
 86 | k8s_gpg_key: "https://pkgs.k8s.io/core:/stable:/v1.30/rpm/repodata/repomd.xml.key"
 87 | k8s_apt_ring: "/etc/apt/keyrings/kubernetes-apt-keyring.gpg"
 88 | k8s_registry: "registry.k8s.io"
 89 | 
 90 | # Install NVIDIA NIM Operator
 91 | enable_nim_operator: no
 92 | 
 93 | # LeaderWorkerSet https://github.com/kubernetes-sigs/lws/tree/main
 94 | lws: no
 95 | 
 96 | # Local Path Provisioner and NFS Provisoner as Storage option
 97 | storage: no
 98 | 
 99 | # Monitoring Stack Prometheus/Grafana with GPU Metrics and Elastic Logging stack
100 | monitoring: no
101 | 
102 | # Enable Kserve on Cloud Native Stack with Istio and Cert-Manager
103 | kserve: no
104 | 
105 | # Install MetalLB
106 | loadbalancer: no
107 | # Example input loadbalancer_ip: "10.117.20.50/32", it could be node/host IP
108 | loadbalancer_ip: ""
109 | 
110 | ## Cloud Native Stack Validation
111 | cns_validation: no
112 | 
113 | # BMC Details for Confidential Computing
114 | bmc_ip:
115 | bmc_username:
116 | bmc_password:
117 | 
118 | # CSP values
119 | ## AWS EKS values
120 | aws_region: us-east-2
121 | aws_cluster_name: cns-cluster-1
122 | aws_gpu_instance_type: g4dn.2xlarge
123 | 
124 | ## Google Cloud GKE Values
125 | #https://cloud.google.com/resource-manager/docs/creating-managing-projects#identifying_projects
126 | gke_project_id:
127 | #https://cloud.google.com/compute/docs/regions-zones#available
128 | gke_region: us-west1
129 | gke_node_zones: ["us-west1-b"]
130 | gke_cluster_name: gke-cluster-1
131 | 
132 | ## Azure AKS Values
133 | aks_cluster_name: aks-cluster-1
134 | #https://azure.microsoft.com/en-us/explore/global-infrastructure/geographies/#geographies
135 | aks_cluster_location: "West US 2"
136 | #https://learn.microsoft.com/en-us/partner-center/marketplace/find-tenant-object-id
137 | azure_object_id: [""]


--------------------------------------------------------------------------------
/playbooks/cns_values_13.2.yaml:
--------------------------------------------------------------------------------
  1 | cns_version: 13.2
  2 | 
  3 | ## MicroK8s cluster
  4 | microk8s: no
  5 | ## Kubernetes Install with Kubeadm
  6 | install_k8s: yes
  7 | 
  8 | ## Components Versions
  9 | # Container Runtime options are containerd, cri-o, cri-dockerd
 10 | container_runtime: "containerd"
 11 | containerd_version: "1.7.23"
 12 | runc_version: "1.1.14"
 13 | cni_plugins_version: "1.5.1"
 14 | containerd_max_concurrent_downloads: "5"
 15 | nvidia_container_toolkit_version: "1.17.4"
 16 | crio_version: "1.30.6"
 17 | cri_dockerd_version: "0.3.15"
 18 | k8s_version: "1.30.6"
 19 | calico_version: "3.28.2"
 20 | flannel_version: "0.25.6"
 21 | helm_version: "3.16.2"
 22 | gpu_operator_version: "24.9.2"
 23 | network_operator_version: "24.10.1"
 24 | nim_operator_version: "1.0.0"
 25 | local_path_provisioner: "0.0.30"
 26 | nfs_provisioner: "4.0.18"
 27 | metallb_version: "0.14.8"
 28 | kserve_version: "0.14"
 29 | prometheus_stack: "67.5.0"
 30 | prometheus_adapter: "4.11.0"
 31 | grafana_operator: "v5.15.1"
 32 | elastic_stack: "8.15.3"
 33 | lws_version: "0.4.0"
 34 | 
 35 | # GPU Operator Values
 36 | enable_gpu_operator: yes
 37 | confidential_computing: no
 38 | gpu_driver_version: "570.86.15"
 39 | use_open_kernel_module: no
 40 | enable_mig: no
 41 | mig_profile: all-disabled
 42 | mig_strategy: single
 43 | # To use GDS, use_open_kernel_module needs to be enabled
 44 | enable_gds: no
 45 | #Secure Boot for only Ubuntu
 46 | enable_secure_boot: no
 47 | enable_cdi: no
 48 | enable_vgpu: no
 49 | vgpu_license_server: ""
 50 | # URL of Helm repo to be added. If using NGC get this from the fetch command in the console
 51 | helm_repository: https://helm.ngc.nvidia.com/nvidia
 52 | # Name of the helm chart to be deployed
 53 | gpu_operator_helm_chart: nvidia/gpu-operator
 54 | ## This is most likely GPU Operator Driver Registry
 55 | gpu_operator_driver_registry: "nvcr.io/nvidia"
 56 | 
 57 | # NGC Values
 58 | ## If using a private/protected registry. NGC API Key. Leave blank for public registries
 59 | ngc_registry_password: ""
 60 | ## This is most likely an NGC email
 61 | ngc_registry_email: ""
 62 | ngc_registry_username: "$oauthtoken"
 63 | 
 64 | # Network Operator Values
 65 | ## If the Network Operator is yes then make sure enable_rdma as well yes
 66 | enable_network_operator: no
 67 | ## Enable RDMA yes for NVIDIA Certification
 68 | enable_rdma: no
 69 | ## Enable for MLNX-OFED Driver Deployment
 70 | deploy_ofed: no
 71 | 
 72 | # Prxoy Configuration
 73 | proxy: no
 74 | http_proxy: ""
 75 | https_proxy: ""
 76 | 
 77 | # Cloud Native Stack for Developers Values
 78 | ## Enable for Cloud Native Stack Developers
 79 | cns_docker: no
 80 | ## Enable For Cloud Native Stack Developers with TRD Driver
 81 | cns_nvidia_driver: no
 82 | nvidia_driver_mig: no
 83 | 
 84 | ## Kubernetes resources
 85 | k8s_apt_key: "https://pkgs.k8s.io/core:/stable:/v1.30/deb/Release.key"
 86 | k8s_gpg_key: "https://pkgs.k8s.io/core:/stable:/v1.30/rpm/repodata/repomd.xml.key"
 87 | k8s_apt_ring: "/etc/apt/keyrings/kubernetes-apt-keyring.gpg"
 88 | k8s_registry: "registry.k8s.io"
 89 | 
 90 | # Install NVIDIA NIM Operator
 91 | enable_nim_operator: no
 92 | 
 93 | # LeaderWorkerSet https://github.com/kubernetes-sigs/lws/tree/main
 94 | lws: no
 95 | 
 96 | # Local Path Provisioner and NFS Provisoner as Storage option
 97 | storage: no
 98 | 
 99 | # Monitoring Stack Prometheus/Grafana with GPU Metrics and Elastic Logging stack
100 | monitoring: no
101 | 
102 | # Enable Kserve on Cloud Native Stack with Istio and Cert-Manager
103 | kserve: no
104 | 
105 | # Install MetalLB
106 | loadbalancer: no
107 | # Example input loadbalancer_ip: "10.117.20.50/32", it could be node/host IP
108 | loadbalancer_ip: ""
109 | 
110 | ## Cloud Native Stack Validation
111 | cns_validation: no
112 | 
113 | # BMC Details for Confidential Computing
114 | bmc_ip:
115 | bmc_username:
116 | bmc_password:
117 | 
118 | # CSP values
119 | ## AWS EKS values
120 | aws_region: us-east-2
121 | aws_cluster_name: cns-cluster-1
122 | aws_gpu_instance_type: g4dn.2xlarge
123 | 
124 | ## Google Cloud GKE Values
125 | #https://cloud.google.com/resource-manager/docs/creating-managing-projects#identifying_projects
126 | gke_project_id:
127 | #https://cloud.google.com/compute/docs/regions-zones#available
128 | gke_region: us-west1
129 | gke_node_zones: ["us-west1-b"]
130 | gke_cluster_name: gke-cluster-1
131 | 
132 | ## Azure AKS Values
133 | aks_cluster_name: aks-cluster-1
134 | #https://azure.microsoft.com/en-us/explore/global-infrastructure/geographies/#geographies
135 | aks_cluster_location: "West US 2"
136 | #https://learn.microsoft.com/en-us/partner-center/marketplace/find-tenant-object-id
137 | azure_object_id: [""]


--------------------------------------------------------------------------------
/playbooks/cns_values_13.3.yaml:
--------------------------------------------------------------------------------
  1 | cns_version: 13.3
  2 | 
  3 | ## MicroK8s cluster
  4 | microk8s: no
  5 | ## Kubernetes Install with Kubeadm
  6 | install_k8s: yes
  7 | 
  8 | ## Components Versions
  9 | # Container Runtime options are containerd, cri-o, cri-dockerd
 10 | container_runtime: "containerd"
 11 | containerd_version: "1.7.27"
 12 | runc_version: "1.2.6"
 13 | cni_plugins_version: "1.6.2"
 14 | containerd_max_concurrent_downloads: "5"
 15 | nvidia_container_toolkit_version: "1.17.5"
 16 | crio_version: "1.30.10"
 17 | cri_dockerd_version: "0.3.16"
 18 | k8s_version: "1.30.10"
 19 | calico_version: "3.29.3"
 20 | flannel_version: "0.25.6"
 21 | helm_version: "3.17.2"
 22 | gpu_operator_version: "25.3.0"
 23 | network_operator_version: "25.1.0"
 24 | nim_operator_version: "1.0.1"
 25 | local_path_provisioner: "0.0.31"
 26 | nfs_provisioner: "4.0.18"
 27 | metallb_version: "0.14.9"
 28 | kserve_version: "0.15"
 29 | prometheus_stack: "70.3.0"
 30 | prometheus_adapter: "4.13.0"
 31 | grafana_operator: "v5.17.0"
 32 | elastic_stack: "8.17.4"
 33 | lws_version: "0.5.1"
 34 | 
 35 | # GPU Operator Values
 36 | enable_gpu_operator: yes
 37 | confidential_computing: no
 38 | gpu_driver_version: "570.124.06"
 39 | use_open_kernel_module: no
 40 | enable_mig: no
 41 | mig_profile: all-disabled
 42 | mig_strategy: single
 43 | # To use GDS, use_open_kernel_module needs to be enabled
 44 | enable_gds: no
 45 | #Secure Boot for only Ubuntu
 46 | enable_secure_boot: no
 47 | enable_cdi: no
 48 | enable_vgpu: no
 49 | vgpu_license_server: ""
 50 | # URL of Helm repo to be added. If using NGC get this from the fetch command in the console
 51 | helm_repository: "https://helm.ngc.nvidia.com/nvidia"
 52 | # Name of the helm chart to be deployed
 53 | gpu_operator_helm_chart: nvidia/gpu-operator
 54 | ## This is most likely GPU Operator Driver Registry
 55 | gpu_operator_driver_registry: "nvcr.io/nvidia"
 56 | 
 57 | # NGC Values
 58 | ## If using a private/protected registry. NGC API Key. Leave blank for public registries
 59 | ngc_registry_password: ""
 60 | ## This is most likely an NGC email
 61 | ngc_registry_email: ""
 62 | ngc_registry_username: "$oauthtoken"
 63 | 
 64 | # Network Operator Values
 65 | ## If the Network Operator is yes then make sure enable_rdma as well yes
 66 | enable_network_operator: no
 67 | ## Enable RDMA yes for NVIDIA Certification
 68 | enable_rdma: no
 69 | ## Enable for MLNX-OFED Driver Deployment
 70 | deploy_ofed: no
 71 | 
 72 | # Prxoy Configuration
 73 | proxy: no
 74 | http_proxy: ""
 75 | https_proxy: ""
 76 | 
 77 | # Cloud Native Stack for Developers Values
 78 | ## Enable for Cloud Native Stack Developers
 79 | cns_docker: no
 80 | ## Enable For Cloud Native Stack Developers with TRD Driver
 81 | cns_nvidia_driver: no
 82 | nvidia_driver_mig: no
 83 | 
 84 | ## Kubernetes resources
 85 | k8s_apt_key: "https://pkgs.k8s.io/core:/stable:/v1.30/deb/Release.key"
 86 | k8s_gpg_key: "https://pkgs.k8s.io/core:/stable:/v1.30/rpm/repodata/repomd.xml.key"
 87 | k8s_apt_ring: "/etc/apt/keyrings/kubernetes-apt-keyring.gpg"
 88 | k8s_registry: "registry.k8s.io"
 89 | 
 90 | # Install NVIDIA NIM Operator
 91 | enable_nim_operator: no
 92 | 
 93 | # LeaderWorkerSet https://github.com/kubernetes-sigs/lws/tree/main
 94 | lws: no
 95 | 
 96 | # Local Path Provisioner and NFS Provisoner as Storage option
 97 | storage: no
 98 | 
 99 | # Monitoring Stack Prometheus/Grafana with GPU Metrics and Elastic Logging stack
100 | monitoring: no
101 | 
102 | # Enable Kserve on Cloud Native Stack with Istio and Cert-Manager
103 | kserve: no
104 | 
105 | # Install MetalLB
106 | loadbalancer: no
107 | # Example input loadbalancer_ip: "10.78.17.85/32"
108 | loadbalancer_ip: ""
109 | 
110 | ## Cloud Native Stack Validation
111 | cns_validation: no
112 | 
113 | # BMC Details for Confidential Computing
114 | bmc_ip:
115 | bmc_username:
116 | bmc_password:
117 | 
118 | # CSP values
119 | ## AWS EKS values
120 | aws_region: us-east-2
121 | aws_cluster_name: cns-cluster-1
122 | aws_gpu_instance_type: g4dn.2xlarge
123 | 
124 | ## Google Cloud GKE Values
125 | #https://cloud.google.com/resource-manager/docs/creating-managing-projects#identifying_projects
126 | gke_project_id:
127 | #https://cloud.google.com/compute/docs/regions-zones#available
128 | gke_region: us-west1
129 | gke_node_zones: ["us-west1-b"]
130 | gke_cluster_name: gke-cluster-1
131 | 
132 | ## Azure AKS Values
133 | aks_cluster_name: aks-cluster-1
134 | #https://azure.microsoft.com/en-us/explore/global-infrastructure/geographies/#geographies
135 | aks_cluster_location: "West US 2"
136 | #https://learn.microsoft.com/en-us/partner-center/marketplace/find-tenant-object-id
137 | azure_object_id: [""]


--------------------------------------------------------------------------------
/playbooks/cns_values_14.0.yaml:
--------------------------------------------------------------------------------
  1 | cns_version: 14.0
  2 | 
  3 | ## MicroK8s cluster
  4 | microk8s: no
  5 | ## Kubernetes Install with Kubeadm
  6 | install_k8s: yes
  7 | 
  8 | ## Components Versions
  9 | # Container Runtime options are containerd, cri-o, cri-dockerd
 10 | container_runtime: "containerd"
 11 | containerd_version: "1.7.23"
 12 | runc_version: "1.1.14"
 13 | cni_plugins_version: "1.5.1"
 14 | containerd_max_concurrent_downloads: "5"
 15 | nvidia_container_toolkit_version: "1.17.4"
 16 | crio_version: "1.31.2"
 17 | cri_dockerd_version: "0.3.15"
 18 | k8s_version: "1.31.2"
 19 | calico_version: "3.28.2"
 20 | flannel_version: "0.25.6"
 21 | helm_version: "3.16.2"
 22 | gpu_operator_version: "24.9.2"
 23 | network_operator_version: "24.10.1"
 24 | nim_operator_version: "1.0.0"
 25 | local_path_provisioner: "0.0.30"
 26 | nfs_provisioner: "4.0.18"
 27 | metallb_version: "0.14.8"
 28 | kserve_version: "0.14"
 29 | prometheus_stack: "67.5.0"
 30 | prometheus_adapter: "4.11.0"
 31 | grafana_operator: "v5.15.1"
 32 | elastic_stack: "8.15.3"
 33 | lws_version: "0.4.0"
 34 | 
 35 | # GPU Operator Values
 36 | enable_gpu_operator: yes
 37 | confidential_computing: no
 38 | gpu_driver_version: "570.86.15"
 39 | use_open_kernel_module: no
 40 | enable_mig: no
 41 | mig_profile: all-disabled
 42 | mig_strategy: single
 43 | # To use GDS, use_open_kernel_module needs to be enabled
 44 | enable_gds: no
 45 | #Secure Boot for only Ubuntu
 46 | enable_secure_boot: no
 47 | enable_cdi: no
 48 | enable_vgpu: no
 49 | vgpu_license_server: ""
 50 | # URL of Helm repo to be added. If using NGC get this from the fetch command in the console
 51 | helm_repository: https://helm.ngc.nvidia.com/nvidia
 52 | # Name of the helm chart to be deployed
 53 | gpu_operator_helm_chart: nvidia/gpu-operator
 54 | ## This is most likely GPU Operator Driver Registry
 55 | gpu_operator_driver_registry: "nvcr.io/nvidia"
 56 | 
 57 | # NGC Values
 58 | ## If using a private/protected registry. NGC API Key. Leave blank for public registries
 59 | ngc_registry_password: ""
 60 | ## This is most likely an NGC email
 61 | ngc_registry_email: ""
 62 | ngc_registry_username: "$oauthtoken"
 63 | 
 64 | # Network Operator Values
 65 | ## If the Network Operator is yes then make sure enable_rdma as well yes
 66 | enable_network_operator: no
 67 | ## Enable RDMA yes for NVIDIA Certification
 68 | enable_rdma: no
 69 | ## Enable for MLNX-OFED Driver Deployment
 70 | deploy_ofed: no
 71 | 
 72 | # Prxoy Configuration
 73 | proxy: no
 74 | http_proxy: ""
 75 | https_proxy: ""
 76 | 
 77 | # Cloud Native Stack for Developers Values
 78 | ## Enable for Cloud Native Stack Developers
 79 | cns_docker: no
 80 | ## Enable For Cloud Native Stack Developers with TRD Driver
 81 | cns_nvidia_driver: no
 82 | nvidia_driver_mig: no
 83 | 
 84 | ## Kubernetes resources
 85 | k8s_apt_key: "https://pkgs.k8s.io/core:/stable:/v1.31/deb/Release.key"
 86 | k8s_gpg_key: "https://pkgs.k8s.io/core:/stable:/v1.31/rpm/repodata/repomd.xml.key"
 87 | k8s_apt_ring: "/etc/apt/keyrings/kubernetes-apt-keyring.gpg"
 88 | k8s_registry: "registry.k8s.io"
 89 | 
 90 | # Install NVIDIA NIM Operator
 91 | enable_nim_operator: no
 92 | 
 93 | # LeaderWorkerSet https://github.com/kubernetes-sigs/lws/tree/main
 94 | lws: no
 95 | 
 96 | # Local Path Provisioner and NFS Provisoner as Storage option
 97 | storage: no
 98 | 
 99 | # Monitoring Stack Prometheus/Grafana with GPU Metrics and Elastic Logging stack
100 | monitoring: no
101 | 
102 | # Enable Kserve on Cloud Native Stack with Istio and Cert-Manager
103 | kserve: no
104 | 
105 | # Install MetalLB
106 | loadbalancer: no
107 | # Example input loadbalancer_ip: "10.117.20.50/32", it could be node/host IP
108 | loadbalancer_ip: ""
109 | 
110 | ## Cloud Native Stack Validation
111 | cns_validation: no
112 | 
113 | # BMC Details for Confidential Computing
114 | bmc_ip:
115 | bmc_username:
116 | bmc_password:
117 | 
118 | # CSP values
119 | ## AWS EKS values
120 | aws_region: us-east-2
121 | aws_cluster_name: cns-cluster-1
122 | aws_gpu_instance_type: g4dn.2xlarge
123 | 
124 | ## Google Cloud GKE Values
125 | #https://cloud.google.com/resource-manager/docs/creating-managing-projects#identifying_projects
126 | gke_project_id:
127 | #https://cloud.google.com/compute/docs/regions-zones#available
128 | gke_region: us-west1
129 | gke_node_zones: ["us-west1-b"]
130 | gke_cluster_name: gke-cluster-1
131 | 
132 | ## Azure AKS Values
133 | aks_cluster_name: aks-cluster-1
134 | #https://azure.microsoft.com/en-us/explore/global-infrastructure/geographies/#geographies
135 | aks_cluster_location: "West US 2"
136 | #https://learn.microsoft.com/en-us/partner-center/marketplace/find-tenant-object-id
137 | azure_object_id: [""]


--------------------------------------------------------------------------------
/playbooks/cns_values_14.1.yaml:
--------------------------------------------------------------------------------
  1 | cns_version: 14.1
  2 | 
  3 | ## MicroK8s cluster
  4 | microk8s: no
  5 | ## Kubernetes Install with Kubeadm
  6 | install_k8s: yes
  7 | 
  8 | ## Components Versions
  9 | # Container Runtime options are containerd, cri-o, cri-dockerd
 10 | container_runtime: "containerd"
 11 | containerd_version: "2.0.3"
 12 | runc_version: "1.2.6"
 13 | cni_plugins_version: "1.6.2"
 14 | containerd_max_concurrent_downloads: "5"
 15 | nvidia_container_toolkit_version: "1.17.5"
 16 | crio_version: "1.31.5"
 17 | cri_dockerd_version: "0.3.16"
 18 | k8s_version: "1.31.6"
 19 | calico_version: "3.29.3"
 20 | flannel_version: "0.25.6"
 21 | helm_version: "3.17.2"
 22 | gpu_operator_version: "25.3.0"
 23 | network_operator_version: "25.1.0"
 24 | nim_operator_version: "1.0.1"
 25 | local_path_provisioner: "0.0.31"
 26 | nfs_provisioner: "4.0.18"
 27 | metallb_version: "0.14.9"
 28 | kserve_version: "0.15"
 29 | prometheus_stack: "70.3.0"
 30 | prometheus_adapter: "4.13.0"
 31 | grafana_operator: "v5.17.0"
 32 | elastic_stack: "8.17.4"
 33 | lws_version: "0.5.1"
 34 | 
 35 | # GPU Operator Values
 36 | enable_gpu_operator: yes
 37 | confidential_computing: no
 38 | gpu_driver_version: "570.124.06"
 39 | use_open_kernel_module: no
 40 | enable_mig: no
 41 | mig_profile: all-disabled
 42 | mig_strategy: single
 43 | # To use GDS, use_open_kernel_module needs to be enabled
 44 | enable_gds: no
 45 | #Secure Boot for only Ubuntu
 46 | enable_secure_boot: no
 47 | enable_cdi: no
 48 | enable_vgpu: no
 49 | vgpu_license_server: ""
 50 | # URL of Helm repo to be added. If using NGC get this from the fetch command in the console
 51 | helm_repository: "https://helm.ngc.nvidia.com/nvidia"
 52 | # Name of the helm chart to be deployed
 53 | gpu_operator_helm_chart: nvidia/gpu-operator
 54 | ## This is most likely GPU Operator Driver Registry
 55 | gpu_operator_driver_registry: "nvcr.io/nvidia"
 56 | 
 57 | # NGC Values
 58 | ## If using a private/protected registry. NGC API Key. Leave blank for public registries
 59 | ngc_registry_password: ""
 60 | ## This is most likely an NGC email
 61 | ngc_registry_email: ""
 62 | ngc_registry_username: "$oauthtoken"
 63 | 
 64 | # Network Operator Values
 65 | ## If the Network Operator is yes then make sure enable_rdma as well yes
 66 | enable_network_operator: no
 67 | ## Enable RDMA yes for NVIDIA Certification
 68 | enable_rdma: no
 69 | ## Enable for MLNX-OFED Driver Deployment
 70 | deploy_ofed: no
 71 | 
 72 | # Prxoy Configuration
 73 | proxy: no
 74 | http_proxy: ""
 75 | https_proxy: ""
 76 | 
 77 | # Cloud Native Stack for Developers Values
 78 | ## Enable for Cloud Native Stack Developers
 79 | cns_docker: no
 80 | ## Enable For Cloud Native Stack Developers with TRD Driver
 81 | cns_nvidia_driver: no
 82 | nvidia_driver_mig: no
 83 | 
 84 | ## Kubernetes resources
 85 | k8s_apt_key: "https://pkgs.k8s.io/core:/stable:/v1.31/deb/Release.key"
 86 | k8s_gpg_key: "https://pkgs.k8s.io/core:/stable:/v1.31/rpm/repodata/repomd.xml.key"
 87 | k8s_apt_ring: "/etc/apt/keyrings/kubernetes-apt-keyring.gpg"
 88 | k8s_registry: "registry.k8s.io"
 89 | 
 90 | # Install NVIDIA NIM Operator
 91 | enable_nim_operator: no
 92 | 
 93 | # LeaderWorkerSet https://github.com/kubernetes-sigs/lws/tree/main
 94 | lws: no
 95 | 
 96 | # Local Path Provisioner and NFS Provisoner as Storage option
 97 | storage: no
 98 | 
 99 | # Monitoring Stack Prometheus/Grafana with GPU Metrics and Elastic Logging stack
100 | monitoring: no
101 | 
102 | # Enable Kserve on Cloud Native Stack with Istio and Cert-Manager
103 | kserve: no
104 | 
105 | # Install MetalLB
106 | loadbalancer: no
107 | # Example input loadbalancer_ip: "10.78.17.85/32"
108 | loadbalancer_ip: ""
109 | 
110 | ## Cloud Native Stack Validation
111 | cns_validation: no
112 | 
113 | # BMC Details for Confidential Computing
114 | bmc_ip:
115 | bmc_username:
116 | bmc_password:
117 | 
118 | # CSP values
119 | ## AWS EKS values
120 | aws_region: us-east-2
121 | aws_cluster_name: cns-cluster-1
122 | aws_gpu_instance_type: g4dn.2xlarge
123 | 
124 | ## Google Cloud GKE Values
125 | #https://cloud.google.com/resource-manager/docs/creating-managing-projects#identifying_projects
126 | gke_project_id:
127 | #https://cloud.google.com/compute/docs/regions-zones#available
128 | gke_region: us-west1
129 | gke_node_zones: ["us-west1-b"]
130 | gke_cluster_name: gke-cluster-1
131 | 
132 | ## Azure AKS Values
133 | aks_cluster_name: aks-cluster-1
134 | #https://azure.microsoft.com/en-us/explore/global-infrastructure/geographies/#geographies
135 | aks_cluster_location: "West US 2"
136 | #https://learn.microsoft.com/en-us/partner-center/marketplace/find-tenant-object-id
137 | azure_object_id: [""]


--------------------------------------------------------------------------------
/playbooks/cns_values_15.0.yaml:
--------------------------------------------------------------------------------
  1 | cns_version: 15.0
  2 | 
  3 | ## MicroK8s cluster
  4 | microk8s: no
  5 | ## Kubernetes Install with Kubeadm
  6 | install_k8s: yes
  7 | 
  8 | ## Components Versions
  9 | # Container Runtime options are containerd, cri-o, cri-dockerd
 10 | container_runtime: "containerd"
 11 | containerd_version: "2.0.3"
 12 | runc_version: "1.2.6"
 13 | cni_plugins_version: "1.6.2"
 14 | containerd_max_concurrent_downloads: "5"
 15 | nvidia_container_toolkit_version: "1.17.5"
 16 | crio_version: "1.32.1"
 17 | cri_dockerd_version: "0.3.16"
 18 | k8s_version: "1.32.2"
 19 | calico_version: "3.29.3"
 20 | flannel_version: "0.25.6"
 21 | helm_version: "3.17.2"
 22 | gpu_operator_version: "25.3.0"
 23 | network_operator_version: "25.1.0"
 24 | nim_operator_version: "1.0.1"
 25 | local_path_provisioner: "0.0.31"
 26 | nfs_provisioner: "4.0.18"
 27 | metallb_version: "0.14.9"
 28 | kserve_version: "0.15"
 29 | prometheus_stack: "70.3.0"
 30 | prometheus_adapter: "4.13.0"
 31 | grafana_operator: "v5.17.0"
 32 | elastic_stack: "8.17.4"
 33 | lws_version: "0.5.1"
 34 | 
 35 | # GPU Operator Values
 36 | enable_gpu_operator: yes
 37 | confidential_computing: no
 38 | gpu_driver_version: "570.124.06"
 39 | use_open_kernel_module: no
 40 | enable_mig: no
 41 | mig_profile: all-disabled
 42 | mig_strategy: single
 43 | # To use GDS, use_open_kernel_module needs to be enabled
 44 | enable_gds: no
 45 | #Secure Boot for only Ubuntu
 46 | enable_secure_boot: no
 47 | enable_cdi: no
 48 | enable_vgpu: no
 49 | vgpu_license_server: ""
 50 | # URL of Helm repo to be added. If using NGC get this from the fetch command in the console
 51 | helm_repository: "https://helm.ngc.nvidia.com/nvidia"
 52 | # Name of the helm chart to be deployed
 53 | gpu_operator_helm_chart: nvidia/gpu-operator
 54 | ## This is most likely GPU Operator Driver Registry
 55 | gpu_operator_driver_registry: "nvcr.io/nvidia"
 56 | 
 57 | # NGC Values
 58 | ## If using a private/protected registry. NGC API Key. Leave blank for public registries
 59 | ngc_registry_password: ""
 60 | ## This is most likely an NGC email
 61 | ngc_registry_email: ""
 62 | ngc_registry_username: "$oauthtoken"
 63 | 
 64 | # Network Operator Values
 65 | ## If the Network Operator is yes then make sure enable_rdma as well yes
 66 | enable_network_operator: no
 67 | ## Enable RDMA yes for NVIDIA Certification
 68 | enable_rdma: no
 69 | ## Enable for MLNX-OFED Driver Deployment
 70 | deploy_ofed: no
 71 | 
 72 | # Prxoy Configuration
 73 | proxy: no
 74 | http_proxy: ""
 75 | https_proxy: ""
 76 | 
 77 | # Cloud Native Stack for Developers Values
 78 | ## Enable for Cloud Native Stack Developers
 79 | cns_docker: no
 80 | ## Enable For Cloud Native Stack Developers with TRD Driver
 81 | cns_nvidia_driver: no
 82 | nvidia_driver_mig: no
 83 | 
 84 | ## Kubernetes resources
 85 | k8s_apt_key: "https://pkgs.k8s.io/core:/stable:/v1.32/deb/Release.key"
 86 | k8s_gpg_key: "https://pkgs.k8s.io/core:/stable:/v1.32/rpm/repodata/repomd.xml.key"
 87 | k8s_apt_ring: "/etc/apt/keyrings/kubernetes-apt-keyring.gpg"
 88 | k8s_registry: "registry.k8s.io"
 89 | 
 90 | # Install NVIDIA NIM Operator
 91 | enable_nim_operator: no
 92 | 
 93 | # LeaderWorkerSet https://github.com/kubernetes-sigs/lws/tree/main
 94 | lws: no
 95 | 
 96 | # Local Path Provisioner and NFS Provisoner as Storage option
 97 | storage: no
 98 | 
 99 | # Monitoring Stack Prometheus/Grafana with GPU Metrics and Elastic Logging stack
100 | monitoring: no
101 | 
102 | # Enable Kserve on Cloud Native Stack with Istio and Cert-Manager
103 | kserve: no
104 | 
105 | # Install MetalLB
106 | loadbalancer: no
107 | # Example input loadbalancer_ip: "10.78.17.85/32"
108 | loadbalancer_ip: ""
109 | 
110 | ## Cloud Native Stack Validation
111 | cns_validation: no
112 | 
113 | # BMC Details for Confidential Computing
114 | bmc_ip:
115 | bmc_username:
116 | bmc_password:
117 | 
118 | # CSP values
119 | ## AWS EKS values
120 | aws_region: us-east-2
121 | aws_cluster_name: cns-cluster-1
122 | aws_gpu_instance_type: g4dn.2xlarge
123 | 
124 | ## Google Cloud GKE Values
125 | #https://cloud.google.com/resource-manager/docs/creating-managing-projects#identifying_projects
126 | gke_project_id:
127 | #https://cloud.google.com/compute/docs/regions-zones#available
128 | gke_region: us-west1
129 | gke_node_zones: ["us-west1-b"]
130 | gke_cluster_name: gke-cluster-1
131 | 
132 | ## Azure AKS Values
133 | aks_cluster_name: aks-cluster-1
134 | #https://azure.microsoft.com/en-us/explore/global-infrastructure/geographies/#geographies
135 | aks_cluster_location: "West US 2"
136 | #https://learn.microsoft.com/en-us/partner-center/marketplace/find-tenant-object-id
137 | azure_object_id: [""]


--------------------------------------------------------------------------------
/playbooks/cns_version.yaml:
--------------------------------------------------------------------------------
1 | cns_version: 14.1


--------------------------------------------------------------------------------
/playbooks/files/aws_credentials:
--------------------------------------------------------------------------------
1 | #[default]
2 | #aws_access_key_id =
3 | #aws_secret_access_key = 


--------------------------------------------------------------------------------
/playbooks/files/conf_compu_snp_install.sh:
--------------------------------------------------------------------------------
 1 |   echo "========================================================================================================================"
 2 |   echo "                               Build the Host Kernel for SNP                                                            "
 3 |   echo "========================================================================================================================"
 4 | 
 5 |   sudo apt update >/dev/null 2>&1
 6 |   sudo apt upgrade -y >/dev/null 2>&1
 7 |   sudo apt install -y ninja-build iasl nasm  flex bison openssl dkms autoconf zlib1g-dev python3-pip libncurses-dev libssl-dev libelf-dev libudev-dev libpci-dev libiberty-dev libtool libsdl-console libsdl-console-dev libpango1.0-dev libjpeg8-dev libpixman-1-dev libcairo2-dev  libgif-dev libglib2.0-dev >/dev/null 2>&1
 8 |   sudo pip3 install numpy flex bison >/dev/null 2>&1
 9 |   echo
10 |   echo "========================================================================================================================"
11 |   echo "Download the AMDSEV Pacakge"
12 |   echo "========================================================================================================================"
13 |   if [[ ! -d /shared ]]; then
14 |           sudo mkdir /shared
15 |           sudo chmod -R 777 /shared
16 |   fi
17 |   cd /shared
18 |   git clone https://github.com/AMDESE/AMDSEV.git
19 |   git clone https://github.com/NVIDIA/nvtrust.git
20 |   cd AMDSEV
21 |   git checkout sev-snp-devel
22 |   sed -i '/run_cmd .\/scripts\/config --disable DEBUG_PREEMPT/a\                        run_cmd .\/scripts\/config --enable  CONFIG_CRYPTO_ECC\n                        run_cmd .\/scripts\/config --enable  CONFIG_CRYPTO_ECDH\n                        run_cmd .\/scripts\/config --enable  CONFIG_CRYPTO_ECDSA' common.sh
23 |   sudo ln -sf /usr/bin/python3 /usr/bin/python
24 |   ./build.sh --package
25 |   cp /shared/nvtrust/infrastructure/linux/patches/*.patch /shared/AMDSEV
26 |   echo
27 |   echo "========================================================================================================================"
28 |   echo "Modify the Kernel for SNP"
29 |   echo "========================================================================================================================"
30 |   echo
31 |   pushd /shared/AMDSEV/linux/host
32 |   patch -p1 -l < ../../iommu_pagefault.patch
33 |   patch -p1 -l < ../../iommu_pagesize.patch
34 |   popd
35 |   ./build.sh --package
36 |   echo
37 |   echo "========================================================================================================================"
38 |   echo "Install Host kernel"
39 |   echo "========================================================================================================================"
40 |   echo
41 |   sudo cp kvm.conf /etc/modprobe.d/
42 |   snp_file=$(ls -lrt /shared/AMDSEV/ |grep snp-release | grep -v tar.gz | tail -1f | awk '{print $NF}')
43 |   echo $snp_file
44 |   cd /shared/AMDSEV/$snp_file
45 |   sudo ./install.sh
46 |   echo
47 |   echo "========================================================================================================================"
48 |   echo "Enble IOMMU for Confidential Computing with Kata"
49 |   echo
50 |   cpu_name=$(lscpu | grep -i 'Model name' | awk -F' ' '{print $3}')
51 |   if [[ $cpu_name == 'Intel(R)' ]]; then
52 |           sudo sed -i 's/GRUB_CMDLINE_LINUX_DEFAULT=.*/GRUB_CMDLINE_LINUX_DEFAULT="quiet intel_iommu=on modprobe.blacklist=nouveau"/g' /etc/default/grub
53 |           sudo update-grub
54 |   elif [[ $cpu_name == 'AMD' ]]; then
55 |           sudo sed -i 's/GRUB_CMDLINE_LINUX_DEFAULT=.*/GRUB_CMDLINE_LINUX_DEFAULT="quiet amd_iommu=on modprobe.blacklist=nouveau"/g' /etc/default/grub
56 |           sudo update-grub
57 |   fi
58 |   echo
59 |   echo "========================================================================================================================"
60 |   echo "Reboot to load the SNP and Grub"
61 | # sudo reboot


--------------------------------------------------------------------------------
/playbooks/files/csp_uninstall.yaml:
--------------------------------------------------------------------------------
 1 | - hosts: localhost
 2 |   gather_facts: yes
 3 |   vars_files:
 4 |     - csp_values.yaml
 5 |     - cnc_values.yaml
 6 |   environment:
 7 |     PATH: "{{ ansible_env.PATH }}:{{lookup('pipe', 'pwd')}}/google-cloud-sdk/bin/"
 8 |   tasks:
 9 | 
10 |    - name: Uninstall Helm Charts on NVIDIA Cloud Native Stack
11 |      ignore_errors: true
12 |      async: 120
13 |      shell: |
14 |        count=$(helm ls -A | egrep 'gpu-operator|network-operator' | grep -v NAME | wc -l)
15 |        if [[ $count > 0 ]]; then
16 |         for name in `helm ls -A | awk '{print $1}' | grep -v NAME`
17 |         do
18 |           for namespace in `helm ls -A | grep $name |  awk '{print $2}' | grep -v NAMESPACE`
19 |             do
20 |               helm del $name -n $namespace --wait
21 |               pods=$(kubectl get pods -n $namespace | grep -v NAME | wc -l)
22 |               while [ $pods != 0 ]
23 |               do
24 |                 sleep 10
25 |                 pods=$(kubectl get pods -n $namespace | grep -v NAME | wc -l)
26 |               done
27 |             done
28 |         done
29 |        fi
30 |      args:
31 |       executable: /bin/bash
32 | 
33 |    - name: Delete GKE Cluster
34 |      shell: "gcloud  beta container clusters delete {{ gke_cluster_name }}  --zone {{ gke_zone }} --quiet"
35 |      when: "installon_gke == true"
36 | 
37 |    - name: delete the GKE binaries
38 |      shell: "rm -rf {{lookup('pipe', 'pwd')}}/google-cloud-sdk* {{ ansible_user_dir }}/.config/gcloud"
39 |      when: "installon_gke == true"
40 |      ignore_errors: true
41 | 
42 |    - name: delete gcloud path on Linux
43 |      when: "installon_gke == true and ansible_system == 'Linux'"
44 |      lineinfile:
45 |        path: "{{ ansible_user_dir }}/.bashrc"
46 |        regexp: '^PATH=(.*)/google-cloud-sdk/bin/(.*)'
47 |        backrefs: yes
48 |        state: absent
49 | 
50 |    - name: delete gcloud path on Darwin
51 |      when: "installon_gke == true and ansible_system == 'Darwin'"
52 |      lineinfile:
53 |        path: "{{ ansible_user_dir }}/.zshrc"
54 |        regexp: '^PATH=(.*)/google-cloud-sdk/bin/(.*)'
55 |        backrefs: yes
56 |        state: absent
57 | 
58 | 
59 |    - name: Delete EKS cluster
60 |      when: installon_eks == true
61 |      shell: "eksctl delete cluster -f {{lookup('pipe', 'pwd')}}/eks_cluster_config.yaml"
62 | 
63 |    - name: Delete the EKS details
64 |      shell: "rm -rf {{ ansible_user_dir }}/.aws"
65 |      when: installon_eks == true
66 | 
67 |    - name: delete AKS cluster
68 |      when: installon_aks == true 
69 |      shell: "az aks delete -n {{ aks_cluster_name }} -g {{ azure_resource_group }} --yes"
70 | 
71 |    - name: Delete the AKS details
72 |      shell: "rm -rf {{ ansible_user_dir }}/.azure"
73 |      when: installon_aks == true


--------------------------------------------------------------------------------
/playbooks/files/csp_values.yaml:
--------------------------------------------------------------------------------
 1 | ## Google cloud values
 2 | installon_gke: no
 3 | gke_cluster_name: gke-gpu-cluster
 4 | #https://cloud.google.com/resource-manager/docs/creating-managing-projects#identifying_projects
 5 | gke_project_id: 
 6 | # https://cloud.google.com/compute/docs/regions-zones#available
 7 | gke_zone: us-west1-a
 8 | gke_version: "1.25"
 9 | #https://console.cloud.google.com/networking/networks/
10 | gke_network: default
11 | 
12 | ##TODO
13 | # https://developers.google.com/identity/protocols/oauth2/service-account
14 | #cred_file: 
15 | 
16 | ## AWS values
17 | installon_eks: no
18 | eks_cluster_name: eks-gpu
19 | #https://cloud-images.ubuntu.com/aws-eks/ 
20 | eks_ami: ami-000ec9ff4552093c1
21 | eks_version: "1.25"
22 | eks_region: us-west-1
23 | instance_type: g4dn.xlarge
24 | 
25 | ## Azure values
26 | installon_aks: no
27 | aks_cluster_name: aks-gpu-cluster
28 | #https://learn.microsoft.com/en-us/azure/azure-portal/get-subscription-tenant-id
29 | azure_account_name: 
30 | #https://learn.microsoft.com/en-us/azure/azure-resource-manager/management/manage-resource-groups-portal#list-resource-groups
31 | azure_resource_group: 
32 | azure_location: eastus
33 | az_k8s_version: "1.25.6"
34 | ## TODO
35 | # https://learn.microsoft.com/en-us/cli/azure/create-an-azure-service-principal-azure-cli#1-create-a-service-principal
36 | #azure_sp_appId: 
37 | #azure_sp_password:
38 | #azure_tenant: 


--------------------------------------------------------------------------------
/playbooks/files/fluent-values.yaml:
--------------------------------------------------------------------------------
 1 | config:
 2 |   ## https://docs.fluentbit.io/manual/pipeline/outputs
 3 |   outputs: |
 4 |     [OUTPUT]
 5 |         Name               es
 6 |         Match              *
 7 |         tls                On
 8 |         tls.verify         Off
 9 |         Host               cns-es-http
10 |         Port               9200
11 |         HTTP_User          elastic
12 |         HTTP_Passwd        cns-stack
13 |         Replace_Dots       On
14 |         Logstash_Format    On
15 |         Logstash_Prefix    logs-fluent-bit
16 |         Retry_Limit        False
17 |         Suppress_Type_Name On


--------------------------------------------------------------------------------
/playbooks/files/gridd.conf:
--------------------------------------------------------------------------------
1 | # See the official documentaion for more details: https://docs.nvidia.com/datacenter/cloud-native/gpu-operator/getting-started.html
2 | # Description: Set License Server Address
3 | # Data type: string
4 | # Format:  "<address>"
5 | ServerAddress=<grid-license-server>
6 | 


--------------------------------------------------------------------------------
/playbooks/files/kube-prometheus-stack.values:
--------------------------------------------------------------------------------
 1 | grafana:
 2 |   enabled: false
 3 | prometheus:
 4 |   service:
 5 |     type: NodePort
 6 |   prometheusSpec:
 7 |     serviceMonitorSelectorNilUsesHelmValues: false
 8 |   additionalScrapeConfigs:
 9 |   - job_name: gpu-metrics
10 |     scrape_interval: 1s
11 |     metrics_path: /metrics
12 |     scheme: http
13 |     kubernetes_sd_configs:
14 |     - role: endpoints
15 |       namespaces:
16 |         names:
17 |         - nvidia-gpu-operator
18 |         - default
19 |     relabel_configs:
20 |     - source_labels: [__meta_kubernetes_pod_node_name]
21 |       action: replace
22 |       target_label: kubernetes_node


--------------------------------------------------------------------------------
/playbooks/files/mellanox-test.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: v1
 2 | kind: Pod
 3 | metadata:
 4 |   name: rdma-test-pod-1
 5 |   #namespace: nvidia-network-operator-resources
 6 |   annotations:
 7 |     k8s.v1.cni.cncf.io/networks: rdma-net-ipam
 8 |     # If a network with static IPAM is used replace network annotation with the below.
 9 |     #k8s.v1.cni.cncf.io/networks: '[
10 |     #  { "name": "rmda-net",
11 |     #    "ips": ["192.168.111.101/24"],
12 |     #    "gateway": ["192.168.111.1"]
13 |     #  }
14 |     #]'
15 | spec:
16 |   restartPolicy: OnFailure
17 |   containers:
18 |   - image: mellanox/rping-test
19 |     name: rdma-test-ctr
20 |     securityContext:
21 |       capabilities:
22 |         add: [ "IPC_LOCK" ]
23 |     resources:
24 |       limits:
25 |         rdma/rdma_shared_device_a: 1
26 |       requests:
27 |         rdma/rdma_shared_device_a: 1
28 |     command:
29 |     - sh
30 |     - -c
31 |     - |
32 |       ls -l /dev/infiniband /sys/class/net
33 |       sleep infinity
34 | ---
35 | apiVersion: v1
36 | kind: Pod
37 | metadata:
38 |   name: rdma-test-pod-2
39 |   #namespace: nvidia-network-operator-resources
40 |   annotations:
41 |     k8s.v1.cni.cncf.io/networks: rdma-net-ipam
42 |     # If a network with static IPAM is used replace network annotation with the below.
43 |     #k8s.v1.cni.cncf.io/networks: '[
44 |     #  { "name": "rmda-net",
45 |     #    "ips": ["192.168.111.101/24"],
46 |     #    "gateway": ["192.168.111.1"]
47 |     #  }
48 |     #]'
49 | spec:
50 |   restartPolicy: OnFailure
51 |   containers:
52 |   - image: mellanox/rping-test
53 |     name: rdma-test-ctr
54 |     securityContext:
55 |       capabilities:
56 |         add: [ "IPC_LOCK" ]
57 |     resources:
58 |       limits:
59 |         rdma/rdma_shared_device_a: 1
60 |       requests:
61 |         rdma/rdma_shared_device_a: 1
62 |     command:
63 |     - sh
64 |     - -c
65 |     - |
66 |       ls -l /dev/infiniband /sys/class/net
67 |       sleep infinity
68 | 


--------------------------------------------------------------------------------
/playbooks/files/network-operator-value.yaml:
--------------------------------------------------------------------------------
1 | nfd:
2 |   enabled: true
3 |   deployNodeFeatureRules: true


--------------------------------------------------------------------------------
/playbooks/files/network-operator-values.yaml:
--------------------------------------------------------------------------------
 1 | deployCR: true
 2 | ofedDriver:
 3 |   deploy: true
 4 | rdmaSharedDevicePlugin:
 5 |   deploy: true
 6 |   resources:
 7 |     - name: rdma_shared_device_a
 8 |       vendors: [15b3]
 9 |       ifNames: [ ens160f0,ens160f1]
10 | 


--------------------------------------------------------------------------------
/playbooks/files/networkdefinition.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: k8s.cni.cncf.io/v1
 2 | kind: NetworkAttachmentDefinition
 3 | metadata:
 4 |   annotations:
 5 |     k8s.v1.cni.cncf.io/resourceName: rdma/rdma_shared_device_a
 6 |   name: rdma-net-ipam
 7 |   #namespace: nvidia-network-operator-resources
 8 | spec:
 9 |   config: |-
10 |     {
11 |         "cniVersion": "0.3.1",
12 |         "name": "rdma-net-ipam",
13 |         "plugins": [
14 |             {
15 |                 "ipam": {
16 |                     "datastore": "kubernetes",
17 |                     "kubernetes": {
18 |                         "kubeconfig": "/etc/cni/net.d/whereabouts.d/whereabouts.kubeconfig"
19 |                     },
20 |                     "log_file": "/tmp/whereabouts.log",
21 |                     "log_level": "debug",
22 |                     "range": "192.168.111.0/24",
23 |                     "type": "whereabouts"
24 |                 },
25 |                 "type": "macvlan",
26 |                 "master": "ens160f0"
27 |             },
28 |             {
29 |                 "mtu": 1500,
30 |                 "type": "tuning"
31 |             }
32 |         ]
33 |     }
34 | 


--------------------------------------------------------------------------------
/playbooks/files/nic-cluster-policy.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: mellanox.com/v1alpha1
 2 | kind: NicClusterPolicy
 3 | metadata:
 4 |   name: nic-cluster-policy
 5 | spec:
 6 | {% if deploy_ofed %}
 7 |   ofedDriver:
 8 |     readinessProbe:
 9 |       initialDelaySeconds: 10
10 |       periodSeconds: 30
11 |     forcePrecompiled: false
12 |     terminationGracePeriodSeconds: 300
13 |     livenessProbe:
14 |       initialDelaySeconds: 30
15 |       periodSeconds: 30
16 |     env:
17 |       - name: UNLOAD_STORAGE_MODULES
18 |         value: 'true'
19 |     upgradePolicy:
20 |       autoUpgrade: true
21 |       drain:
22 |         deleteEmptyDir: true
23 |         enable: true
24 |         force: true
25 |         timeoutSeconds: 300
26 |         podSelector: ''
27 |       maxParallelUpgrades: 1
28 |       safeLoad: false
29 |       waitForCompletion:
30 |         timeoutSeconds: 0
31 |     startupProbe:
32 |       initialDelaySeconds: 10
33 |       periodSeconds: 20
34 |     image: doca-driver
35 |     repository: nvcr.io/nvidia/mellanox
36 |     version: 25.01-0.6.0.0-0
37 | {% endif %}
38 |   rdmaSharedDevicePlugin:
39 |     image: k8s-rdma-shared-dev-plugin
40 |     repository: ghcr.io/mellanox
41 |     version: v1.5.2
42 |     # The config below directly propagates to k8s-rdma-shared-device-plugin configuration.
43 |     # Replace 'devices' with your (RDMA capable) netdevice name.
44 |     config: |
45 |       {
46 |         "configList": [
47 |           {
48 |             "resourceName": "rdma_shared_device_a",
49 |             "rdmaHcaMax": 63,
50 |             "selectors": {
51 |               "vendors": ["15b3"],
52 |               "ifNames": ["ens2f0"]
53 |             }
54 |           }
55 |         ]
56 |       }
57 |   secondaryNetwork:
58 |     cniPlugins:
59 |       image: plugins
60 |       repository: ghcr.io/k8snetworkplumbingwg
61 |       version: v1.5.0
62 |       imagePullSecrets: []
63 |     multus:
64 |       image: multus-cni
65 |       repository: ghcr.io/k8snetworkplumbingwg
66 |       version: v4.1.0
67 |       imagePullSecrets: []
68 |     ipamPlugin:
69 |       image: whereabouts
70 |       repository: ghcr.io/k8snetworkplumbingwg
71 |       version: v0.7.0
72 |       imagePullSecrets: []
73 | 


--------------------------------------------------------------------------------
/playbooks/files/nvidia-vgpu-driver.yaml:
--------------------------------------------------------------------------------
  1 | - hosts: all
  2 |   become: true
  3 |   become_method: sudo
  4 |   vars_files:
  5 |     - cns_values.yaml
  6 |   tasks:
  7 | 
  8 |     - name: Install Dependencies
  9 |       ansible.builtin.apt:
 10 |         name:
 11 |           - apt-transport-https
 12 |           - ca-certificates
 13 |           - lsb-release
 14 |           - gnupg
 15 |           - apt-utils
 16 |           - aptitude
 17 |           - unzip
 18 |           - pkg-config
 19 |           - build-essential
 20 |         state: latest
 21 |         update_cache: true
 22 | 
 23 |     - name: Check NVIDIA Driver Modules are loaded
 24 |       shell: "lsmod | grep -i nvidia"
 25 |       register: nvidia_mod
 26 |       no_log: True
 27 |       failed_when: false
 28 | 
 29 |     - name: Install NVIDIA vGPU Driver
 30 |       become: true
 31 |       when:  nvidia_mod.rc == 1
 32 |       block:
 33 | 
 34 |         - name: Download the NGC CLI
 35 |           get_url:
 36 |             url: https://ngc.nvidia.com/downloads/ngccli_linux.zip
 37 |             dest: /tmp/
 38 | 
 39 |         - name: Unzip the NGC CLI
 40 |           unarchive:
 41 |             src: /tmp/ngccli_linux.zip
 42 |             remote_src: true
 43 |             dest: /usr/local/bin/
 44 | 
 45 |         - name: Update the NGC CLI permissions
 46 |           file:
 47 |             path: /usr/local/bin/ngc
 48 |             mode: 0775
 49 | 
 50 |         - name: NGC Config Set
 51 |           shell: export NGC_CLI_API_KEY={{ ngc_api_key }} && ngc config set --format_type=ascii --org=nvaie --team=no-team
 52 | 
 53 |         - name: Download the NVIDIA vGPU Driver from NGC
 54 |           shell: export NGC_CLI_API_KEY={{ ngc_api_key }} && ngc registry resource download-version "nvaie/vgpu_guest_driver_2_0:510.47.03"
 55 | 
 56 |         - name: Update vGPU Driver Permissions
 57 |           file:
 58 |             path: ./vgpu_guest_driver_2_0_v510.47.03/NVIDIA-Linux-x86_64-510.47.03-grid.run
 59 |             mode: '0777'
 60 | 
 61 |         - name: Install the vGPU Driver
 62 |           shell: sh ./vgpu_guest_driver_2_0_v510.47.03/NVIDIA-Linux-x86_64-510.47.03-grid.run -s
 63 | 
 64 |         - name: Download the vGPU token
 65 |           shell: export NGC_CLI_API_KEY={{ ngc_api_key }} && ngc registry resource download-version "nvlp-aienterprise/licensetoken:1"
 66 | 
 67 |         - name: Clear NGC Config
 68 |           shell: ngc config clear
 69 | 
 70 |         - name: Clear vGPU files
 71 |           file:
 72 |             path: ./vgpu_guest_driver_2_0_v510.47.03/NVIDIA-Linux-x86_64-510.47.03-grid.run
 73 |             state: absent
 74 | 
 75 |         - name: Check NVIDIA Driver Modules are loaded
 76 |           ignore_errors: true
 77 |           copy:
 78 |             src: "{{ item }}"
 79 |             dest: /etc/nvidia/ClientConfigToken/
 80 |           with_fileglob: "{{lookup('pipe', 'pwd')}}/licensetoken_v1/client_configuration_token*.tok"
 81 | 
 82 |         - name: clear vGPU Token
 83 |           file:
 84 |             path: ./licensetoken_v1
 85 |             state: absent
 86 |             
 87 |         - name: create a gridd.conf for vGPU
 88 |           copy:
 89 |             dest: /etc/nvidia/gridd.conf
 90 |             content: |
 91 |               FeatureType=1
 92 | 
 93 |     - name: Reboot the system
 94 |       when:  nvidia_mod.rc == 1
 95 |       reboot:
 96 |         reboot_timeout: 900
 97 | 
 98 |     - name: Restart vGPU gridd service
 99 |       when:  nvidia_mod.rc == 1
100 |       service:
101 |         name: nvidia-gridd
102 |         state: restarted
103 | 


--------------------------------------------------------------------------------
/playbooks/files/resourcequota.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: v1
 2 | kind: ResourceQuota
 3 | metadata:
 4 |   name: gpu-operator-quota
 5 |   namespace: gpu-operator
 6 | spec:
 7 |   hard:
 8 |     pods: 100
 9 |   scopeSelector:
10 |     matchExpressions:
11 |     - operator: In
12 |       scopeName: PriorityClass
13 |       values:
14 |       - system-node-critical
15 |       - system-cluster-critical


--------------------------------------------------------------------------------
/playbooks/guides/Cloud_Guide.md:
--------------------------------------------------------------------------------
 1 | # Cloud Guide for NVIDIA Cloud Native Stack 
 2 | 
 3 | This page describes the steps required to use Ansible Playbooks
 4 | 
 5 | ## Following supported cloud environments
 6 | 
 7 | - EKS(Elastci Kubernetes Service)
 8 | - GKE(Google Kubernetes Engine)
 9 | - AKS(Azure Kubernetes Service) - In Progress
10 | 
11 | ## Prerequisites
12 | 
13 | - For EKS on AWS 
14 |     - [AWS IAM role](https://docs.aws.amazon.com/eks/latest/userguide/service_IAM_role.html) to create a EKS Cluster
15 |     - Update the AWS key values in `aws_credentials` file
16 | - For GKE on Google Cloud 
17 |     - Kubernetes Engine Admin and Kubernetes Engine Cluster Admin Role
18 | 
19 | 
20 | ## Using the Ansible playbooks 
21 | This section describes how to use the ansible playbooks.
22 | 
23 | ### Clone the git repository
24 | 
25 | Run the below commands to clone the NVIDIA Cloud Native Stack ansible playbooks.
26 | 
27 | ```
28 | git clone https://github.com/NVIDIA/cloud-native-stack.git
29 | cd cloud-native-stack/playbooks
30 | ```
31 | 
32 | ### Installation
33 | 
34 | Edit the `csp_values.yaml` and update the required information
35 | 
36 | ```
37 | nano csp_values.yaml
38 | ```
39 | 
40 | If you want to cusomize any predefined components versions or any other custom paramenters, modify the respective CNS version values file like below and trigger the installation. 
41 | 
42 | Example:
43 | ```
44 | $ nano csp_values.yaml
45 | 
46 | ## Google cloud values
47 | installon_gke: no
48 | gke_cluster_name: gke-gpu-cluster
49 | #https://cloud.google.com/resource-manager/docs/creating-managing-projects#identifying_projects
50 | gke_project_id: 
51 | # https://cloud.google.com/compute/docs/regions-zones#available
52 | gke_zone: us-west1-a
53 | gke_version: "1.25"
54 | #https://console.cloud.google.com/networking/networks/
55 | gke_network: default
56 | 
57 | ##TODO
58 | # https://developers.google.com/identity/protocols/oauth2/service-account
59 | #cred_file: 
60 | 
61 | ## AWS values
62 | installon_eks: no
63 | eks_cluster_name: eks-gpu
64 | #https://cloud-images.ubuntu.com/aws-eks/ 
65 | eks_ami: ami-000ec9ff4552093c1
66 | eks_version: "1.25"
67 | eks_region: us-west-1
68 | instance_type: g4dn.xlarge
69 | 
70 | ## Azure values
71 | installon_aks: no
72 | aks_cluster_name: aks-gpu-cluster
73 | #https://learn.microsoft.com/en-us/azure/azure-portal/get-subscription-tenant-id
74 | azure_account_name: 
75 | #https://learn.microsoft.com/en-us/azure/azure-resource-manager/management/manage-resource-groups-portal#list-resource-groups
76 | azure_resource_group: 
77 | azure_location: eastus
78 | az_k8s_version: "1.25.6"
79 | ## TODO
80 | # https://learn.microsoft.com/en-us/cli/azure/create-an-azure-service-principal-azure-cli#1-create-a-service-principal
81 | #azure_sp_appId: 
82 | #azure_sp_password:
83 | #azure_tenant: 
84 | 
85 | ```
86 | `NOTE:` Please wait for a while to install Cloud Kubernetes Clusters 
87 | 
88 | Install the NVIDIA Cloud Native Stack stack by running the below command. "Skipping" in the ansible output refers to the Kubernetes cluster is up and running.
89 | ```
90 | bash setup.sh install
91 | ```
92 |  


--------------------------------------------------------------------------------
/playbooks/hosts:
--------------------------------------------------------------------------------
1 | [master]
2 | #localhost ansible_ssh_user=nvidia ansible_ssh_pass=nvidiapass ansible_sudo_pass=nvidiapass ansible_ssh_common_args='-o StrictHostKeyChecking=no'
3 | [nodes]


--------------------------------------------------------------------------------
/playbooks/older_versions/Ubuntu_Server_v1.2.md:
--------------------------------------------------------------------------------
  1 | <h1> Cloud Native Core Ubuntu Server (x86-64) v1.2 </h1>
  2 | 
  3 | This page describes the steps required to use Ansible to install the Cloud Native Core.
  4 | 
  5 | The final Cloud Native Core will include:
  6 | 
  7 | - Ubuntu 18.04.3 LTS
  8 | - Ansible 2.9.9
  9 | - Docker CE 19.03.1
 10 | - Kubernetes version 1.15.3
 11 | - Helm 3.1.0
 12 | - NVIDIA GPU Operator 1.1.7
 13 |   - NV containerized driver: 440.64.00
 14 |   - NV container toolkit: 1.0.2
 15 |   - NV K8S device plug-in: 1.0.0-beta6
 16 |   - Data Center GPU Manager (DCGM): 1.7.2
 17 |   
 18 | ### Release Notes
 19 | 
 20 | - Added section: "Installing the Ubuntu Operating System"
 21 | 
 22 | ### The following Ansible Playbooks are available
 23 | 
 24 | - [Install Cloud Native Core](https://github.com/NVIDIA/cloud-native-core/blob/master/playbooks/cnc-installation.yaml)
 25 | 
 26 | - [Validate Cloud Native Core ](https://github.com/NVIDIA/cloud-native-core/blob/master/playbooks/cnc-validation.yaml)
 27 | 
 28 | - [Uninstall Cloud Native Core](https://github.com/NVIDIA/cloud-native-core/blob/master/playbooks/cnc-uninstall.yaml)
 29 | 
 30 | 
 31 | ## Prerequisites
 32 | 
 33 | - You have a NGC-Ready for Edge Server.
 34 | - You have Ubuntu Server 18.04.3 LTS installed.
 35 | - You will perform a clean install.
 36 | - The server has internet connectivity.
 37 | 
 38 | To determine if your system is NGC-Ready for Edge Servers, please review the list of validated systems on the NGC-Ready Systems documentation page: https://docs.nvidia.com/ngc/ngc-ready-systems/index.html
 39 | 
 40 | Please note that the Cloud Native Core is only validated on Intel based NGC-Ready systems with the default kernel (not HWE). Using an AMD EPYC 2nd generation (ROME) NGC-Ready server is not validated yet and will require the HWE kernel and manually disabling nouveau.
 41 | 
 42 | ### Installing the Ubuntu Operating System
 43 | These instructions require having Ubuntu Server LTS 18.04.3 on your NGC-Ready system. The Ubuntu Server can be downloaded from http://cdimage.ubuntu.com/releases/bionic/release/.
 44 | 
 45 | Disabling nouveau (not validated and only required with Ubuntu 18.04.3 LTS HWE Kernel): 
 46 | 
 47 | ```
 48 | $ sudo nano /etc/modprobe.d/blacklist-nouveau.conf
 49 | ```
 50 | 
 51 | Insert the following:
 52 | 
 53 | ```
 54 | blacklist nouveau
 55 | options nouveau modeset=0
 56 | ```
 57 | 
 58 | Regenerate the kernel initramfs:
 59 | 
 60 | ```
 61 | $ sudo update-initramfs -u
 62 | ```
 63 | 
 64 | And reboot your system:
 65 | 
 66 | ```
 67 | $ sudo reboot
 68 | ```
 69 | 
 70 | For more information on installing Ubuntu server please reference the [Ubuntu Server Installation Guide](https://ubuntu.com/tutorials/tutorial-install-ubuntu-server#1-overview).
 71 |  
 72 | ## Using the Ansible playbooks 
 73 | This section describes how to use the ansible playbooks.
 74 | 
 75 | ### Clone the git repository
 76 | 
 77 | Run the below commands to add the Cloud Native Core ansible playbooks.
 78 | 
 79 | ```
 80 | $ git clone https://github.com/NVIDIA/cloud-native-core.git
 81 | $ cd cloud-native-core/playbooks
 82 | ```
 83 | Update the hosts file in playbooks directory with master and worker nodes(if you have) IP's with username and password like below
 84 | 
 85 | ```
 86 | $ sudo nano hosts
 87 | 
 88 | [master]
 89 | 10.110.16.178 ansible_ssh_user=nvidia ansible_ssh_pass=nvidipass ansible_sudo_pass=nvidiapass ansible_ssh_common_args='-o StrictHostKeyChecking=no'
 90 | [node]
 91 | 10.110.16.179 ansible_ssh_user=nvidia ansible_ssh_pass=nvidiapass ansible_sudo_pass=nvidiapass ansible_ssh_common_args='-o StrictHostKeyChecking=no'
 92 | ```
 93 | 
 94 | ## Available Cloud Native Core Version
 95 | 
 96 | Update the Cloud Native Core version as per below, currently supported versions are
 97 | 
 98 | - [1.2](https://github.com/NVIDIA/cloud-native-core/blob/master/playbooks/Ubuntu_Server_v1.2.md)
 99 | 
100 | ```
101 | sudo nano cnc_version.yaml
102 | 
103 | cnc_version: 1.2
104 | 
105 | ```
106 | 
107 | ### Installation
108 | 
109 | Install the Cloud Native Core by running the below command. "Skipping" in the ansible output refers to the Kubernetes cluster is up and running.
110 | 
111 | ```
112 | $ sudo bash setup.sh install
113 | ```
114 | 
115 | ### Validation
116 | 
117 | Run the below command to check if the installed versions are match with predefined versions of the Cloud Native Core. Here' "Ignored" tasks refer to failed and "Changed/Ok" tasks refer to success.
118 | 
119 | Run the validation playbook after 5 minutes once completing the Cloud Native Core installation. Depends on your internet speed, you need to wait more time. 
120 | 
121 | ```
122 | $ sudo bash setup.sh validate
123 | ```
124 | 
125 | ### Uninstall
126 | 
127 | Run the below command to uninstall the Cloud Native Core. Taks being "ignored" refers to no kubernetes cluster being available.
128 | 
129 | ```
130 | $ sudo bash setup.sh uninstall
131 | ```
132 | 


--------------------------------------------------------------------------------
/playbooks/older_versions/Ubuntu_Server_v1.3.md:
--------------------------------------------------------------------------------
  1 | <h1> Cloud Native Core Ubuntu Server (x86-64) v1.3 </h1>
  2 | 
  3 | This page describes the steps required to use Ansible to install the Cloud Native Core.
  4 | 
  5 | The final Cloud Native Core will include:
  6 | 
  7 | - Ubuntu 20.04.1 LTS
  8 | - Docker CE 19.03.12
  9 | - Kubernetes version 1.15.3
 10 | - Helm 3.3.3
 11 | - NVIDIA GPU Operator 1.3.0
 12 |   - NV containerized driver: 450.80.02
 13 |   - NV container toolkit: 1.3.0
 14 |   - NV K8S device plug-in: 0.7.0
 15 |   - Data Center GPU Manager (DCGM): 2.1.0
 16 |   - Node Feature Discovery: 0.6.0
 17 | 
 18 | ### Release Notes
 19 | 
 20 | - Added section: "Installing the Ubuntu Operating System"
 21 | 
 22 | ### The following Ansible Playbooks are available
 23 | 
 24 | - [Install Cloud Native Core](https://github.com/NVIDIA/cloud-native-core/blob/master/playbooks/cnc-installation.yaml)
 25 | 
 26 | - [Validate Cloud Native Core ](https://github.com/NVIDIA/cloud-native-core/blob/master/playbooks/cnc-validation.yaml)
 27 | 
 28 | - [Uninstall Cloud Native Core](https://github.com/NVIDIA/cloud-native-core/blob/master/playbooks/cnc-uninstall.yaml)
 29 | 
 30 | ## Prerequisites
 31 | 
 32 | - You have a NGC-Ready for Edge Server.
 33 | - You will perform a clean install.
 34 | - The server has internet connectivity.
 35 | 
 36 | To determine if your system is NGC-Ready for Edge Servers, please review the list of validated systems on the NGC-Ready Systems documentation page: https://docs.nvidia.com/ngc/ngc-ready-systems/index.html
 37 | 
 38 | Please note that the Cloud Native Core is only validated on Intel based NGC-Ready systems with the default kernel (not HWE). Using an AMD EPYC 2nd generation (ROME) NGC-Ready server is not validated yet and will require the HWE kernel and manually disabling nouveau.
 39 | 
 40 | ### Installing the Ubuntu Operating System
 41 | These instructions require having Ubuntu Server LTS 20.04.1 on your NGC-Ready system. The Ubuntu Server can be downloaded from http://cdimage.ubuntu.com/releases/20.04.1/release/.
 42 | 
 43 | Disabling nouveau (not validated and only required with Ubuntu 20.04.1 LTS HWE Kernel): 
 44 | 
 45 | ```
 46 | $ sudo nano /etc/modprobe.d/blacklist-nouveau.conf
 47 | ```
 48 | 
 49 | Insert the following:
 50 | 
 51 | ```
 52 | blacklist nouveau
 53 | options nouveau modeset=0
 54 | ```
 55 | 
 56 | Regenerate the kernel initramfs:
 57 | 
 58 | ```
 59 | $ sudo update-initramfs -u
 60 | ```
 61 | 
 62 | And reboot your system:
 63 | 
 64 | ```
 65 | $ sudo reboot
 66 | ```
 67 | 
 68 | For more information on installing Ubuntu server please reference the [Ubuntu Server Installation Guide](https://ubuntu.com/tutorials/tutorial-install-ubuntu-server#1-overview).
 69 |  
 70 | ## Using the Ansible playbooks 
 71 | This section describes how to use the ansible playbooks.
 72 | 
 73 | ### Clone the git repository
 74 | 
 75 | Run the below commands to clone the Cloud Native Core ansible playbooks.
 76 | 
 77 | ```
 78 | $ git clone https://github.com/NVIDIA/cloud-native-core.git
 79 | $ cd cloud-native-core/playbooks
 80 | ```
 81 | 
 82 | Update the hosts file in playbooks directory with master and worker nodes(if you have) IP's with username and password like below
 83 | 
 84 | ```
 85 | $ sudo nano hosts
 86 | 
 87 | [master]
 88 | 10.110.16.178 ansible_ssh_user=nvidia ansible_ssh_pass=nvidipass ansible_sudo_pass=nvidiapass ansible_ssh_common_args='-o StrictHostKeyChecking=no'
 89 | [node]
 90 | 10.110.16.179 ansible_ssh_user=nvidia ansible_ssh_pass=nvidiapass ansible_sudo_pass=nvidiapass ansible_ssh_common_args='-o StrictHostKeyChecking=no'
 91 | ```
 92 | 
 93 | ## Available Cloud Native Core Versions
 94 | 
 95 | Update Cloud Native Core Version as per below, currently supported versions are
 96 | 
 97 | - [1.2](https://github.com/NVIDIA/cloud-native-core/blob/master/playbooks/Ubuntu_Server_v1.2.md)
 98 | - [1.3](https://github.com/NVIDIA/cloud-native-core/blob/master/playbooks/Ubuntu_Server_v1.3.md)
 99 | - [2.0](https://github.com/NVIDIA/cloud-native-core/blob/master/playbooks/Ubuntu_Server_v2.0.md)
100 | - [3.0](https://github.com/NVIDIA/cloud-native-core/blob/master/playbooks/Ubuntu_Server_v3.0.md)
101 | 
102 | ```
103 | sudo nano cnc_version.yaml
104 | 
105 | cnc_version: 1.3
106 | 
107 | ```
108 | 
109 | ### Installation
110 | 
111 | Install the Cloud Native Core stack by running the below command. "Skipping" in the ansible output refers to the Kubernetes cluster is up and running.
112 | 
113 | ```
114 | $ bash setup.sh install
115 | ```
116 | 
117 | ### Validation
118 | 
119 | Run the below command to check if the installed versions are match with predefined versions of the Cloud Native Core. Here' "Ignored" tasks refer to failed and "Changed/Ok" tasks refer to success.
120 | 
121 | Run the validation playbook after 5 minutes once completing the Cloud Native Core Installation. Depends on your internet speed, you need to wait more time.
122 | 
123 | ```
124 | $ bash setup.sh validate
125 | ```
126 | 
127 | ### Uninstall
128 | 
129 | Run the below command to uninstall the Cloud Native Core. Taks being "ignored" refers to no kubernetes cluster being available.
130 | 
131 | ```
132 | $ bash setup.sh uninstall
133 | ```
134 | 


--------------------------------------------------------------------------------
/playbooks/older_versions/Ubuntu_Server_v2.0.md:
--------------------------------------------------------------------------------
  1 | <h1> Cloud Native Core Ubuntu Server (x86-64) v2.0 </h1>
  2 | 
  3 | This page describes the steps required to use Ansible to install the Cloud Native Core.
  4 | 
  5 | The final Cloud Native Core will include:
  6 | 
  7 | - Ubuntu 18.04.3 LTS
  8 | - Ansible 2.9.9
  9 | - Docker CE 19.03.5
 10 | - Kubernetes version 1.17.5
 11 | - Helm 3.1.0
 12 | - NVIDIA GPU Operator 1.1.7
 13 |   - NV containerized driver: 440.64.00
 14 |   - NV container toolkit: 1.0.2
 15 |   - NV K8S device plug-in: 1.0.0-beta6
 16 |   - Data Center GPU Manager (DCGM): 1.7.2
 17 |   
 18 | ### Release Notes
 19 | 
 20 | - Added section: "Installing the Ubuntu Operating System"
 21 | 
 22 | ### The following Ansible Playbooks are available
 23 | 
 24 | - [Install Cloud Native Core](https://github.com/NVIDIA/cloud-native-core/blob/master/playbooks/cnc-installation.yaml)
 25 | 
 26 | - [Validate Cloud Native Core ](https://github.com/NVIDIA/cloud-native-core/blob/master/playbooks/cnc-validation.yaml)
 27 | 
 28 | - [Uninstall Cloud Native Core](https://github.com/NVIDIA/cloud-native-core/blob/master/playbooks/cnc-uninstall.yaml)
 29 | 
 30 | ## Prerequisites
 31 | 
 32 | - You have a NGC-Ready for Edge Server.
 33 | - You have Ubuntu Server 18.04.3 LTS installed.
 34 | - You will perform a clean install.
 35 | - The server has internet connectivity.
 36 | 
 37 | To determine if your system is NGC-Ready for Edge Servers, please review the list of validated systems on the NGC-Ready Systems documentation page: https://docs.nvidia.com/ngc/ngc-ready-systems/index.html
 38 | 
 39 | Please note that the Cloud Native Core is only validated on Intel based NGC-Ready systems with the default kernel (not HWE). Using an AMD EPYC 2nd generation (ROME) NGC-Ready server is not validated yet and will require the HWE kernel and manually disabling nouveau.
 40 | 
 41 | ### Installing the Ubuntu Operating System
 42 | These instructions require having Ubuntu Server LTS 18.04.3 on your NGC-Ready system. The Ubuntu Server can be downloaded from http://cdimage.ubuntu.com/releases/bionic/release/.
 43 | 
 44 | Disabling nouveau (not validated and only required with Ubuntu 18.04.3 LTS HWE Kernel): 
 45 | 
 46 | ```
 47 | $ sudo nano /etc/modprobe.d/blacklist-nouveau.conf
 48 | ```
 49 | 
 50 | Insert the following:
 51 | 
 52 | ```
 53 | blacklist nouveau
 54 | options nouveau modeset=0
 55 | ```
 56 | 
 57 | Regenerate the kernel initramfs:
 58 | 
 59 | ```
 60 | $ sudo update-initramfs -u
 61 | ```
 62 | 
 63 | And reboot your system:
 64 | 
 65 | ```
 66 | $ sudo reboot
 67 | ```
 68 | 
 69 | For more information on installing Ubuntu server please reference the [Ubuntu Server Installation Guide](https://ubuntu.com/tutorials/tutorial-install-ubuntu-server#1-overview).
 70 |  
 71 | ## Using the Ansible playbooks 
 72 | This section describes how to use the ansible playbooks.
 73 | 
 74 | ### Clone the git repository
 75 | 
 76 | Run the below commands to clone the Cloud Native Core ansible playbooks.
 77 | 
 78 | ```
 79 | $ git clone https://github.com/NVIDIA/cloud-native-core.git
 80 | $ cd cloud-native-core/playbooks
 81 | ```
 82 | Update the hosts file in playbooks directory with master and worker nodes(if you have) IP's with username and password like below
 83 | 
 84 | ```
 85 | $ sudo nano hosts
 86 | 
 87 | [master]
 88 | 10.110.16.178 ansible_ssh_user=nvidia ansible_ssh_pass=nvidipass ansible_sudo_pass=nvidiapass ansible_ssh_common_args='-o StrictHostKeyChecking=no'
 89 | [node]
 90 | 10.110.16.179 ansible_ssh_user=nvidia ansible_ssh_pass=nvidiapass ansible_sudo_pass=nvidiapass ansible_ssh_common_args='-o StrictHostKeyChecking=no'
 91 | ```
 92 | 
 93 | ## Available Cloud Native Core Versions
 94 | 
 95 | Update Cloud Native Core Version as per below, currently supported versions are
 96 | 
 97 | - [1.2](https://github.com/NVIDIA/cloud-native-core/blob/master/playbooks/Ubuntu_Server_v1.2.md)
 98 | - [2.0](https://github.com/NVIDIA/cloud-native-core/blob/master/playbooks/Ubuntu_Server_v2.0.md)
 99 | - [3.0](https://github.com/NVIDIA/cloud-native-core/blob/master/playbooks/Ubuntu_Server_v3.0.md)
100 | 
101 | ```
102 | sudo nano cnc_version.yaml
103 | 
104 | cnc_version: 2.0
105 | 
106 | ```
107 | 
108 | ### Installation
109 | 
110 | Install the Cloud Native Core stack by running the below command. "Skipping" in the ansible output refers to the Kubernetes cluster is up and running.
111 | 
112 | ```
113 | $ sudo bash setup.sh install
114 | ```
115 | 
116 | ### Validation
117 | 
118 | Run the below command to check if the installed versions are match with predefined versions of the Cloud Native Core. Here' "Ignored" tasks refer to failed and "Changed/Ok" tasks refer to success.
119 | 
120 | Run the validation playbook after 5 minutes once completing the Cloud Native Core Installation. Depends on your internet speed, you need to wait more time.
121 | 
122 | ```
123 | $ sudo bash setup.sh validate
124 | ```
125 | 
126 | ### Uninstall
127 | 
128 | Run the below command to uninstall the Cloud Native Core. Taks being "ignored" refers to no kubernetes cluster being available.
129 | 
130 | ```
131 | $ sudo bash setup.sh uninstall
132 | ```
133 | 


--------------------------------------------------------------------------------
/playbooks/older_versions/Ubuntu_Server_v3.1.md:
--------------------------------------------------------------------------------
  1 | <h1> Cloud Native Core Ubuntu Server (x86-64) v3.0 </h1>
  2 | 
  3 | This page describes the steps required to use Ansible to install the Cloud Native Core.
  4 | 
  5 | The final Cloud Native Core will include:
  6 | 
  7 | - Ubuntu 20.04.2 LTS
  8 | - Docker CE 19.03.13
  9 | - Kubernetes version 1.18.14
 10 | - Helm 3.3.3
 11 | - NVIDIA GPU Operator 1.6.2
 12 |   - NV containerized driver: 460.32.03
 13 |   - NV container toolkit: 1.4.7
 14 |   - NV K8S device plug-in: 0.8.2
 15 |   - Data Center GPU Manager (DCGM): 2.2.0
 16 |   - Node Feature Discovery: 0.6.0
 17 |   - GPU Feature Discovery: 0.4.1
 18 | 
 19 | ### Release Notes
 20 | 
 21 | - Added support for Multi Node Kubernetes Cluster
 22 | 
 23 | ### The following Ansible Playbooks are available
 24 | 
 25 | - [Install Cloud Native Core](https://github.com/NVIDIA/cloud-native-core/blob/master/playbooks/cnc-installation.yaml)
 26 | 
 27 | - [Validate Cloud Native Core ](https://github.com/NVIDIA/cloud-native-core/blob/master/playbooks/cnc-validation.yaml)
 28 | 
 29 | - [Uninstall Cloud Native Core](https://github.com/NVIDIA/cloud-native-core/blob/master/playbooks/cnc-uninstall.yaml)
 30 | 
 31 | ## Prerequisites
 32 | 
 33 | - You have a NGC-Ready for Edge Server.
 34 | - You will perform a clean install.
 35 | - The server has internet connectivity.
 36 | 
 37 | To determine if your system is NGC-Ready for Edge Servers, please review the list of validated systems on the NGC-Ready Systems documentation page: https://docs.nvidia.com/ngc/ngc-ready-systems/index.html
 38 | 
 39 | Please note that the Cloud Native Core is only validated on Intel based NGC-Ready systems with the default kernel (not HWE). Using an AMD EPYC 2nd generation (ROME) NGC-Ready server is not validated yet and will require the HWE kernel and manually disabling nouveau.
 40 | 
 41 | ### Installing the Ubuntu Operating System
 42 | These instructions require having Ubuntu Server LTS 20.04.1 on your NGC-Ready system. The Ubuntu Server can be downloaded from http://cdimage.ubuntu.com/releases/20.04.1/release/.
 43 | 
 44 | Disabling nouveau (not validated and only required with Ubuntu 20.04.1 LTS HWE Kernel): 
 45 | 
 46 | ```
 47 | $ sudo nano /etc/modprobe.d/blacklist-nouveau.conf
 48 | ```
 49 | 
 50 | Insert the following:
 51 | 
 52 | ```
 53 | blacklist nouveau
 54 | options nouveau modeset=0
 55 | ```
 56 | 
 57 | Regenerate the kernel initramfs:
 58 | 
 59 | ```
 60 | $ sudo update-initramfs -u
 61 | ```
 62 | 
 63 | And reboot your system:
 64 | 
 65 | ```
 66 | $ sudo reboot
 67 | ```
 68 | 
 69 | For more information on installing Ubuntu server please reference the [Ubuntu Server Installation Guide](https://ubuntu.com/tutorials/tutorial-install-ubuntu-server#1-overview).
 70 |  
 71 | ## Using the Ansible playbooks 
 72 | This section describes how to use the ansible playbooks.
 73 | 
 74 | ### Clone the git repository
 75 | 
 76 | Run the below commands to clone the Cloud Native Core ansible playbooks.
 77 | 
 78 | ```
 79 | $ git clone https://github.com/NVIDIA/cloud-native-core.git
 80 | $ cd cloud-native-core/playbooks
 81 | ```
 82 | 
 83 | Update the hosts file in playbooks directory with master and worker nodes(if you have) IP's with username and password like below
 84 | 
 85 | ```
 86 | $ sudo nano hosts
 87 | 
 88 | [master]
 89 | 10.110.16.178 ansible_ssh_user=nvidia ansible_ssh_pass=nvidipass ansible_sudo_pass=nvidiapass ansible_ssh_common_args='-o StrictHostKeyChecking=no'
 90 | [node]
 91 | 10.110.16.179 ansible_ssh_user=nvidia ansible_ssh_pass=nvidiapass ansible_sudo_pass=nvidiapass ansible_ssh_common_args='-o StrictHostKeyChecking=no'
 92 | ```
 93 | 
 94 | 
 95 | ## Available Cloud Native Core Versions
 96 | 
 97 | Update Cloud Native Core Version as per below, currently supported versions are
 98 | 
 99 | - [1.2](https://github.com/NVIDIA/cloud-native-core/blob/master/playbooks/Ubuntu_Server_v1.2.md)
100 | - [2.0](https://github.com/NVIDIA/cloud-native-core/blob/master/playbooks/Ubuntu_Server_v2.0.md)
101 | - [3.1](https://github.com/NVIDIA/cloud-native-core/blob/master/playbooks/Ubuntu_Server_v3.1.md)
102 | 
103 | ```
104 | sudo nano cnc_version.yaml
105 | 
106 | cnc_version: 3.1
107 | 
108 | ```
109 | 
110 | ### Installation
111 | 
112 | Install the Cloud Native Core stack by running the below command. "Skipping" in the ansible output refers to the Kubernetes cluster is up and running.
113 | 
114 | ```
115 | $ bash setup.sh install
116 | ```
117 | 
118 | ### Validation
119 | 
120 | Run the below command to check if the installed versions are match with predefined versions of the Cloud Native Core. Here' "Ignored" tasks refer to failed and "Changed/Ok" tasks refer to success.
121 | 
122 | Run the validation playbook after 5 minutes once completing the Cloud Native Core Installation. Depends on your internet speed, you need to wait more time.
123 | 
124 | ```
125 | $ bash setup.sh validate
126 | ```
127 | 
128 | ### Uninstall
129 | 
130 | Run the below command to uninstall the Cloud Native Core. Taks being "ignored" refers to no kubernetes cluster being available.
131 | 
132 | ```
133 | $ bash setup.sh uninstall
134 | ```
135 | 


--------------------------------------------------------------------------------
/playbooks/older_versions/cnc_values_5.2.yaml:
--------------------------------------------------------------------------------
 1 | cnc_version: 5.2
 2 | 
 3 | # GPU Operator Values
 4 | enable_gpu_operator: yes
 5 | gpu_driver_version: "510.47.03"
 6 | enable_mig: no
 7 | mig_profile: all-disabled
 8 | mig_strategy: single
 9 | enable_gds: no
10 | enable_secure_boot: no
11 | enable_vgpu: no
12 | vgpu_license_server: ""
13 | 
14 | ## NGC Values
15 | # URL of Helm repo to be added. If using NGC get this from the fetch command in the console
16 | helm_repository: https://helm.ngc.nvidia.com/nvidia
17 | # Name of the helm chart to be deployed
18 | gpu_operator_helm_chart: nvidia/gpu-operator
19 | ## If using a private/protected registry. NGC API Key. Leave blank for public registries
20 | gpu_operator_registry_password: ""
21 | ## This is most likely an NGC email
22 | gpu_operator_registry_email: ""
23 | 
24 | ## This is most likely GPU Operator Driver Registry
25 | gpu_operator_driver_registry: "nvcr.io/nvidia"
26 | gpu_operator_registry_username: "$oauthtoken"
27 | 
28 | cnc_validation: no
29 | 
30 | # Network Operator Values
31 | ## If the Network Operator is yes then make sure enable_rdma as well yes
32 | enable_network_operator: no
33 | ## Enable RDMA yes for NVIDIA Certification
34 | enable_rdma: no
35 | 
36 | # Prxoy Configuration
37 | proxy: no
38 | http_proxy: ""
39 | https_proxy: ""
40 | 
41 | # Cloud Native Stack for Developers Values
42 | ## Enable for Cloud Native Stack Developers
43 | cnc_docker: no
44 | ## Enable For Cloud Native Stack Developers with TRD Driver
45 | cnc_nvidia_driver: no
46 | 
47 | ## Kubernetes apt resources
48 | k8s_apt_key: "https://packages.cloud.google.com/apt/doc/apt-key.gpg"
49 | k8s_apt_repository: " https://apt.kubernetes.io/ kubernetes-xenial main"
50 | k8s_apt_ring: "/etc/apt/keyrings/kubernetes-archive-keyring.gpg"
51 | k8s_registry: "k8s.gcr.io"
52 | 


--------------------------------------------------------------------------------
/playbooks/older_versions/cnc_values_6.0.yaml:
--------------------------------------------------------------------------------
 1 | cnc_version: 6.0
 2 | 
 3 | ## Components Versions
 4 | # Container Runtime options are containerd, cri-o
 5 | container_runtime: "containerd"
 6 | containerd_version: "1.6.0"
 7 | k8s_version: "1.23.3"
 8 | helm_version: "3.8.0"
 9 | gpu_operator_version: "1.9.1"
10 | network_operator_version: "1.1.0"
11 | 
12 | # GPU Operator Values
13 | enable_gpu_operator: yes
14 | gpu_driver_version: "510.47.03"
15 | enable_mig: no
16 | mig_profile: all-disabled
17 | mig_strategy: single
18 | enable_gds: no
19 | enable_secure_boot: no
20 | enable_vgpu: no
21 | vgpu_license_server: ""
22 | # URL of Helm repo to be added. If using NGC get this from the fetch command in the console
23 | helm_repository: https://helm.ngc.nvidia.com/nvidia
24 | # Name of the helm chart to be deployed
25 | gpu_operator_helm_chart: nvidia/gpu-operator
26 | ## If using a private/protected registry. NGC API Key. Leave blank for public registries
27 | gpu_operator_registry_password: ""
28 | ## This is most likely an NGC email
29 | gpu_operator_registry_email: ""
30 | ## This is most likely GPU Operator Driver Registry
31 | gpu_operator_driver_registry: "nvcr.io/nvidia"
32 | gpu_operator_registry_username: "$oauthtoken"
33 | 
34 | # Network Operator Values
35 | ## If the Network Operator is yes then make sure enable_rdma as well yes
36 | enable_network_operator: no
37 | ## Enable RDMA yes for NVIDIA Certification
38 | enable_rdma: no
39 | 
40 | # Prxoy Configuration
41 | proxy: no
42 | http_proxy: ""
43 | https_proxy: ""
44 | 
45 | # Cloud Native Stack for Developers Values
46 | ## Enable for Cloud Native Stack Developers
47 | cnc_docker: no
48 | ## Enable For Cloud Native Stack Developers with TRD Driver
49 | cnc_nvidia_driver: no
50 | 
51 | ## Kubernetes apt resources
52 | k8s_apt_key: "https://packages.cloud.google.com/apt/doc/apt-key.gpg"
53 | k8s_apt_repository: " https://apt.kubernetes.io/ kubernetes-xenial main"
54 | k8s_apt_ring: "/etc/apt/keyrings/kubernetes-archive-keyring.gpg"
55 | k8s_registry: "k8s.gcr.io"
56 | 
57 | ## Cloud Native Stack Validation
58 | cnc_validation: no


--------------------------------------------------------------------------------
/playbooks/older_versions/cnc_values_6.1.yaml:
--------------------------------------------------------------------------------
 1 | cnc_version: 6.1
 2 | 
 3 | ## Components Versions
 4 | # Container Runtime options are containerd, cri-o
 5 | container_runtime: "containerd"
 6 | containerd_version: "1.6.2"
 7 | k8s_version: "1.23.5"
 8 | helm_version: "3.8.1"
 9 | gpu_operator_version: "1.10.1"
10 | network_operator_version: "1.1.0"
11 | 
12 | # GPU Operator Values
13 | enable_gpu_operator: yes
14 | gpu_driver_version: "510.47.03"
15 | enable_mig: no
16 | mig_profile: all-disabled
17 | mig_strategy: single
18 | enable_gds: no
19 | enable_secure_boot: no
20 | enable_vgpu: no
21 | vgpu_license_server: ""
22 | # URL of Helm repo to be added. If using NGC get this from the fetch command in the console
23 | helm_repository: https://helm.ngc.nvidia.com/nvidia
24 | # Name of the helm chart to be deployed
25 | gpu_operator_helm_chart: nvidia/gpu-operator
26 | ## If using a private/protected registry. NGC API Key. Leave blank for public registries
27 | gpu_operator_registry_password: ""
28 | ## This is most likely an NGC email
29 | gpu_operator_registry_email: ""
30 | ## This is most likely GPU Operator Driver Registry
31 | gpu_operator_driver_registry: "nvcr.io/nvidia"
32 | gpu_operator_registry_username: "$oauthtoken"
33 | 
34 | # Network Operator Values
35 | ## If the Network Operator is yes then make sure enable_rdma as well yes
36 | enable_network_operator: no
37 | ## Enable RDMA yes for NVIDIA Certification
38 | enable_rdma: no
39 | 
40 | # Prxoy Configuration
41 | proxy: no
42 | http_proxy: ""
43 | https_proxy: ""
44 | 
45 | # Cloud Native Stack for Developers Values
46 | ## Enable for Cloud Native Stack Developers
47 | cnc_docker: no
48 | ## Enable For Cloud Native Stack Developers with TRD Driver
49 | cnc_nvidia_driver: no
50 | 
51 | ## Kubernetes apt resources
52 | k8s_apt_key: "https://packages.cloud.google.com/apt/doc/apt-key.gpg"
53 | k8s_apt_repository: " https://apt.kubernetes.io/ kubernetes-xenial main"
54 | k8s_apt_ring: "/etc/apt/keyrings/kubernetes-archive-keyring.gpg"
55 | k8s_registry: "k8s.gcr.io"
56 | 
57 | ## Cloud Native Stack Validation
58 | cnc_validation: no


--------------------------------------------------------------------------------
/playbooks/older_versions/cnc_values_6.2.yaml:
--------------------------------------------------------------------------------
 1 | cnc_version: 6.2
 2 | 
 3 | ## Components Versions
 4 | # Container Runtime options are containerd, cri-o
 5 | container_runtime: "containerd"
 6 | containerd_version: "1.6.5"
 7 | k8s_version: "1.23.8"
 8 | helm_version: "3.8.2"
 9 | gpu_operator_version: "1.11.0"
10 | network_operator_version: "1.2.0"
11 | 
12 | # GPU Operator Values
13 | enable_gpu_operator: yes
14 | gpu_driver_version: "515.48.07"
15 | enable_mig: no
16 | mig_profile: all-disabled
17 | mig_strategy: single
18 | enable_gds: no
19 | enable_secure_boot: no
20 | enable_vgpu: no
21 | vgpu_license_server: ""
22 | # URL of Helm repo to be added. If using NGC get this from the fetch command in the console
23 | helm_repository: https://helm.ngc.nvidia.com/nvidia
24 | # Name of the helm chart to be deployed
25 | gpu_operator_helm_chart: nvidia/gpu-operator
26 | ## If using a private/protected registry. NGC API Key. Leave blank for public registries
27 | gpu_operator_registry_password: ""
28 | ## This is most likely an NGC email
29 | gpu_operator_registry_email: ""
30 | ## This is most likely GPU Operator Driver Registry
31 | gpu_operator_driver_registry: "nvcr.io/nvidia"
32 | gpu_operator_registry_username: "$oauthtoken"
33 | 
34 | # Network Operator Values
35 | ## If the Network Operator is yes then make sure enable_rdma as well yes
36 | enable_network_operator: no
37 | ## Enable RDMA yes for NVIDIA Certification
38 | enable_rdma: no
39 | 
40 | # Prxoy Configuration
41 | proxy: no
42 | http_proxy: ""
43 | https_proxy: ""
44 | 
45 | # Cloud Native Stack for Developers Values
46 | ## Enable for Cloud Native Stack Developers
47 | cnc_docker: no
48 | ## Enable For Cloud Native Stack Developers with TRD Driver
49 | cnc_nvidia_driver: no
50 | 
51 | ## Kubernetes apt resources
52 | k8s_apt_key: "https://packages.cloud.google.com/apt/doc/apt-key.gpg"
53 | k8s_apt_repository: " https://apt.kubernetes.io/ kubernetes-xenial main"
54 | k8s_apt_ring: "/etc/apt/keyrings/kubernetes-archive-keyring.gpg"
55 | k8s_registry: "k8s.gcr.io"
56 | 
57 | ## Cloud Native Stack Validation
58 | cnc_validation: no


--------------------------------------------------------------------------------
/playbooks/older_versions/cnc_values_6.3.yaml:
--------------------------------------------------------------------------------
 1 | cnc_version: 6.3
 2 | 
 3 | ## Components Versions
 4 | # Container Runtime options are containerd, cri-o
 5 | container_runtime: "containerd"
 6 | containerd_version: "1.6.8"
 7 | k8s_version: "1.23.12"
 8 | helm_version: "3.10.0"
 9 | gpu_operator_version: "22.9.0"
10 | network_operator_version: "1.3.0"
11 | 
12 | # GPU Operator Values
13 | enable_gpu_operator: yes
14 | gpu_driver_version: "520.61.07"
15 | enable_mig: no
16 | mig_profile: all-disabled
17 | mig_strategy: single
18 | enable_gds: no
19 | enable_secure_boot: no
20 | enable_vgpu: no
21 | vgpu_license_server: ""
22 | # URL of Helm repo to be added. If using NGC get this from the fetch command in the console
23 | helm_repository: https://helm.ngc.nvidia.com/nvidia
24 | # Name of the helm chart to be deployed
25 | gpu_operator_helm_chart: nvidia/gpu-operator
26 | ## If using a private/protected registry. NGC API Key. Leave blank for public registries
27 | gpu_operator_registry_password: ""
28 | ## This is most likely an NGC email
29 | gpu_operator_registry_email: ""
30 | ## This is most likely GPU Operator Driver Registry
31 | gpu_operator_driver_registry: "nvcr.io/nvidia"
32 | gpu_operator_registry_username: "$oauthtoken"
33 | 
34 | # Network Operator Values
35 | ## If the Network Operator is yes then make sure enable_rdma as well yes
36 | enable_network_operator: no
37 | ## Enable RDMA yes for NVIDIA Certification
38 | enable_rdma: no
39 | 
40 | # Prxoy Configuration
41 | proxy: no
42 | http_proxy: ""
43 | https_proxy: ""
44 | 
45 | # Cloud Native Stack for Developers Values
46 | ## Enable for Cloud Native Stack Developers
47 | cnc_docker: no
48 | ## Enable For Cloud Native Stack Developers with TRD Driver
49 | cnc_nvidia_driver: no
50 | 
51 | ## Kubernetes apt resources
52 | k8s_apt_key: "https://packages.cloud.google.com/apt/doc/apt-key.gpg"
53 | k8s_apt_repository: " https://apt.kubernetes.io/ kubernetes-xenial main"
54 | k8s_apt_ring: "/etc/apt/keyrings/kubernetes-archive-keyring.gpg"
55 | k8s_registry: "k8s.gcr.io"
56 | 
57 | ## Cloud Native Stack Validation
58 | cnc_validation: no


--------------------------------------------------------------------------------
/playbooks/older_versions/cnc_values_6.4.yaml:
--------------------------------------------------------------------------------
 1 | cnc_version: 6.4
 2 | 
 3 | ## Components Versions
 4 | # Container Runtime options are containerd, cri-o
 5 | container_runtime: "containerd"
 6 | containerd_version: "1.6.10"
 7 | crio_version: "1.23.5"
 8 | k8s_version: "1.23.14"
 9 | calico_version: "3.24.1"
10 | flannel_version: "0.20.0"
11 | helm_version: "3.10.2"
12 | gpu_operator_version: "22.9.1"
13 | network_operator_version: "1.4.0"
14 | 
15 | # GPU Operator Values
16 | enable_gpu_operator: yes
17 | gpu_driver_version: "525.60.13"
18 | enable_mig: no
19 | mig_profile: all-disabled
20 | mig_strategy: single
21 | enable_gds: no
22 | enable_secure_boot: no
23 | enable_vgpu: no
24 | vgpu_license_server: ""
25 | # URL of Helm repo to be added. If using NGC get this from the fetch command in the console
26 | helm_repository: https://helm.ngc.nvidia.com/nvidia
27 | # Name of the helm chart to be deployed
28 | gpu_operator_helm_chart: nvidia/gpu-operator
29 | ## If using a private/protected registry. NGC API Key. Leave blank for public registries
30 | gpu_operator_registry_password: ""
31 | ## This is most likely an NGC email
32 | gpu_operator_registry_email: ""
33 | ## This is most likely GPU Operator Driver Registry
34 | gpu_operator_driver_registry: "nvcr.io/nvidia"
35 | gpu_operator_registry_username: "$oauthtoken"
36 | 
37 | # Network Operator Values
38 | ## If the Network Operator is yes then make sure enable_rdma as well yes
39 | enable_network_operator: no
40 | ## Enable RDMA yes for NVIDIA Certification
41 | enable_rdma: no
42 | 
43 | # Prxoy Configuration
44 | proxy: no
45 | http_proxy: ""
46 | https_proxy: ""
47 | 
48 | # Cloud Native Stack for Developers Values
49 | ## Enable for Cloud Native Stack Developers
50 | cnc_docker: no
51 | ## Enable For Cloud Native Stack Developers with TRD Driver
52 | cnc_nvidia_driver: no
53 | 
54 | ## Kubernetes apt resources
55 | k8s_apt_key: "https://packages.cloud.google.com/apt/doc/apt-key.gpg"
56 | k8s_apt_repository: " https://apt.kubernetes.io/ kubernetes-xenial main"
57 | k8s_apt_ring: "/etc/apt/keyrings/kubernetes-archive-keyring.gpg"
58 | k8s_registry: "k8s.gcr.io"
59 | 
60 | ## Cloud Native Stack Validation
61 | cnc_validation: no


--------------------------------------------------------------------------------
/playbooks/older_versions/cnc_values_7.0.yaml:
--------------------------------------------------------------------------------
 1 | cnc_version: 7.0
 2 | 
 3 | ## Components Versions
 4 | # Container Runtime options are containerd, cri-o
 5 | container_runtime: "containerd"
 6 | containerd_version: "1.6.6"
 7 | crio_version: "1.24.5"
 8 | k8s_version: "1.24.2"
 9 | calico_version: "3.23"
10 | flannel_version: "0.19.2"
11 | helm_version: "3.9.0"
12 | gpu_operator_version: "1.11.0"
13 | network_operator_version: "1.3.0"
14 | local_path_provisioner: "0.0.24"
15 | 
16 | # GPU Operator Values
17 | enable_gpu_operator: yes
18 | gpu_driver_version: "515.48.07"
19 | enable_mig: no
20 | mig_profile: all-disabled
21 | mig_strategy: single
22 | enable_gds: no
23 | enable_secure_boot: no
24 | enable_vgpu: no
25 | vgpu_license_server: ""
26 | # URL of Helm repo to be added. If using NGC get this from the fetch command in the console
27 | helm_repository: https://helm.ngc.nvidia.com/nvidia
28 | # Name of the helm chart to be deployed
29 | gpu_operator_helm_chart: nvidia/gpu-operator
30 | ## If using a private/protected registry. NGC API Key. Leave blank for public registries
31 | gpu_operator_registry_password: ""
32 | ## This is most likely an NGC email
33 | gpu_operator_registry_email: ""
34 | ## This is most likely GPU Operator Driver Registry
35 | gpu_operator_driver_registry: "nvcr.io/nvidia"
36 | gpu_operator_registry_username: "$oauthtoken"
37 | 
38 | # Network Operator Values
39 | ## If the Network Operator is yes then make sure enable_rdma as well yes
40 | enable_network_operator: no
41 | ## Enable RDMA yes for NVIDIA Certification
42 | enable_rdma: no
43 | 
44 | # Prxoy Configuration
45 | proxy: no
46 | http_proxy: ""
47 | https_proxy: ""
48 | 
49 | # Cloud Native Stack for Developers Values
50 | ## Enable for Cloud Native Stack Developers
51 | cnc_docker: no
52 | ## Enable For Cloud Native Stack Developers with TRD Driver
53 | cnc_nvidia_driver: no
54 | 
55 | ## Kubernetes resources
56 | k8s_apt_key: "https://packages.cloud.google.com/apt/doc/apt-key.gpg"
57 | k8s_apt_repository: " https://apt.kubernetes.io/ kubernetes-xenial main"
58 | k8s_apt_ring: "/etc/apt/keyrings/kubernetes-archive-keyring.gpg"
59 | k8s_registry: "k8s.gcr.io"
60 | 
61 | # Local Path Provisioner as Storage option
62 | storage: no
63 | 
64 | ## Cloud Native Stack Validation
65 | cnc_validation: no


--------------------------------------------------------------------------------
/playbooks/older_versions/cnc_values_7.1.yaml:
--------------------------------------------------------------------------------
 1 | cnc_version: 7.1
 2 | 
 3 | ## Components Versions
 4 | # Container Runtime options are containerd, cri-o
 5 | container_runtime: "containerd"
 6 | containerd_version: "1.6.8"
 7 | crio_version: "1.24.5"
 8 | k8s_version: "1.24.6"
 9 | calico_version: "3.24.1"
10 | flannel_version: "0.19.2"
11 | helm_version: "3.10.0"
12 | gpu_operator_version: "22.9.0"
13 | network_operator_version: "1.3.0"
14 | local_path_provisioner: "0.0.24"
15 | 
16 | # GPU Operator Values
17 | enable_gpu_operator: yes
18 | gpu_driver_version: "520.61.07"
19 | enable_mig: no
20 | mig_profile: all-disabled
21 | mig_strategy: single
22 | enable_gds: no
23 | enable_secure_boot: no
24 | enable_vgpu: no
25 | vgpu_license_server: ""
26 | # URL of Helm repo to be added. If using NGC get this from the fetch command in the console
27 | helm_repository: https://helm.ngc.nvidia.com/nvidia
28 | # Name of the helm chart to be deployed
29 | gpu_operator_helm_chart: nvidia/gpu-operator
30 | ## If using a private/protected registry. NGC API Key. Leave blank for public registries
31 | gpu_operator_registry_password: ""
32 | ## This is most likely an NGC email
33 | gpu_operator_registry_email: ""
34 | ## This is most likely GPU Operator Driver Registry
35 | gpu_operator_driver_registry: "nvcr.io/nvidia"
36 | gpu_operator_registry_username: "$oauthtoken"
37 | 
38 | # Network Operator Values
39 | ## If the Network Operator is yes then make sure enable_rdma as well yes
40 | enable_network_operator: no
41 | ## Enable RDMA yes for NVIDIA Certification
42 | enable_rdma: no
43 | 
44 | # Prxoy Configuration
45 | proxy: no
46 | http_proxy: ""
47 | https_proxy: ""
48 | 
49 | # Cloud Native Stack for Developers Values
50 | ## Enable for Cloud Native Stack Developers
51 | cnc_docker: no
52 | ## Enable For Cloud Native Stack Developers with TRD Driver
53 | cnc_nvidia_driver: no
54 | 
55 | ## Kubernetes resources
56 | k8s_apt_key: "https://packages.cloud.google.com/apt/doc/apt-key.gpg"
57 | k8s_apt_repository: " https://apt.kubernetes.io/ kubernetes-xenial main"
58 | k8s_apt_ring: "/etc/apt/keyrings/kubernetes-archive-keyring.gpg"
59 | k8s_registry: "k8s.gcr.io"
60 | 
61 | # Local Path Provisioner as Storage option
62 | storage: no
63 | 
64 | ## Cloud Native Stack Validation
65 | cnc_validation: no


--------------------------------------------------------------------------------
/playbooks/older_versions/cnc_values_7.2.yaml:
--------------------------------------------------------------------------------
 1 | cnc_version: 7.2
 2 | 
 3 | ## Components Versions
 4 | # Container Runtime options are containerd, cri-o
 5 | container_runtime: "containerd"
 6 | containerd_version: "1.6.10"
 7 | crio_version: "1.24.5"
 8 | k8s_version: "1.24.8"
 9 | calico_version: "3.24.5"
10 | flannel_version: "0.20.0"
11 | helm_version: "3.10.2"
12 | gpu_operator_version: "22.9.1"
13 | network_operator_version: "1.4.0"
14 | local_path_provisioner: "0.0.24"
15 | 
16 | # GPU Operator Values
17 | enable_gpu_operator: yes
18 | gpu_driver_version: "525.60.13"
19 | enable_mig: no
20 | mig_profile: all-disabled
21 | mig_strategy: single
22 | enable_gds: no
23 | enable_secure_boot: no
24 | enable_vgpu: no
25 | vgpu_license_server: ""
26 | 
27 | ## NGC Values
28 | # URL of Helm repo to be added. If using NGC get this from the fetch command in the console
29 | helm_repository: https://helm.ngc.nvidia.com/nvidia
30 | # Name of the helm chart to be deployed
31 | gpu_operator_helm_chart: nvidia/gpu-operator
32 | ## If using a private/protected registry. NGC API Key. Leave blank for public registries
33 | gpu_operator_registry_password: ""
34 | ## This is most likely an NGC email
35 | gpu_operator_registry_email: ""
36 | 
37 | ## This is most likely GPU Operator Driver Registry
38 | gpu_operator_driver_registry: "nvcr.io/nvidia"
39 | gpu_operator_registry_username: "$oauthtoken"
40 | 
41 | # Network Operator Values
42 | ## If the Network Operator is yes then make sure enable_rdma as well yes
43 | enable_network_operator: no
44 | ## Enable RDMA yes for NVIDIA Certification
45 | enable_rdma: no
46 | 
47 | # Prxoy Configuration
48 | proxy: no
49 | http_proxy: ""
50 | https_proxy: ""
51 | 
52 | # Cloud Native Stack for Developers Values
53 | ## Enable for Cloud Native Stack Developers
54 | cnc_docker: no
55 | ## Enable For Cloud Native Stack Developers with TRD Driver
56 | cnc_nvidia_driver: no
57 | 
58 | ## Kubernetes resources
59 | k8s_apt_key: "https://packages.cloud.google.com/apt/doc/apt-key.gpg"
60 | k8s_apt_repository: " https://apt.kubernetes.io/ kubernetes-xenial main"
61 | k8s_apt_ring: "/etc/apt/keyrings/kubernetes-archive-keyring.gpg"
62 | k8s_registry: "k8s.gcr.io"
63 | 
64 | # Local Path Provisioner as Storage option
65 | storage: no
66 | 
67 | ## Cloud Native Stack Validation
68 | cnc_validation: no


--------------------------------------------------------------------------------
/playbooks/older_versions/cnc_values_7.3.yaml:
--------------------------------------------------------------------------------
 1 | cnc_version: 7.3
 2 | 
 3 | ## Components Versions
 4 | # Container Runtime options are containerd, cri-o
 5 | container_runtime: "containerd"
 6 | containerd_version: "1.6.16"
 7 | crio_version: "1.24.5"
 8 | k8s_version: "1.24.10"
 9 | calico_version: "3.25.0"
10 | flannel_version: "0.20.2"
11 | helm_version: "3.11.0"
12 | gpu_operator_version: "22.9.2"
13 | network_operator_version: "1.4.0"
14 | local_path_provisioner: "0.0.24"
15 | 
16 | # GPU Operator Values
17 | enable_gpu_operator: yes
18 | gpu_driver_version: "525.85.12"
19 | enable_mig: no
20 | mig_profile: all-disabled
21 | mig_strategy: single
22 | enable_gds: no
23 | enable_secure_boot: no
24 | enable_vgpu: no
25 | vgpu_license_server: ""
26 | # URL of Helm repo to be added. If using NGC get this from the fetch command in the console
27 | helm_repository: https://helm.ngc.nvidia.com/nvidia
28 | # Name of the helm chart to be deployed
29 | gpu_operator_helm_chart: nvidia/gpu-operator
30 | ## If using a private/protected registry. NGC API Key. Leave blank for public registries
31 | gpu_operator_registry_password: ""
32 | ## This is most likely an NGC email
33 | gpu_operator_registry_email: ""
34 | ## This is most likely GPU Operator Driver Registry
35 | gpu_operator_driver_registry: "nvcr.io/nvidia"
36 | gpu_operator_registry_username: "$oauthtoken"
37 | 
38 | # Network Operator Values
39 | ## If the Network Operator is yes then make sure enable_rdma as well yes
40 | enable_network_operator: no
41 | ## Enable RDMA yes for NVIDIA Certification
42 | enable_rdma: no
43 | 
44 | # Prxoy Configuration
45 | proxy: no
46 | http_proxy: ""
47 | https_proxy: ""
48 | 
49 | # Cloud Native Stack for Developers Values
50 | ## Enable for Cloud Native Stack Developers
51 | cnc_docker: no
52 | ## Enable For Cloud Native Stack Developers with TRD Driver
53 | cnc_nvidia_driver: no
54 | 
55 | ## Kubernetes resources
56 | k8s_apt_key: "https://packages.cloud.google.com/apt/doc/apt-key.gpg"
57 | k8s_apt_repository: " https://apt.kubernetes.io/ kubernetes-xenial main"
58 | k8s_apt_ring: "/etc/apt/keyrings/kubernetes-archive-keyring.gpg"
59 | k8s_registry: "registry.k8s.io"
60 | 
61 | # Local Path Provisioner as Storage option
62 | storage: no
63 | 
64 | ## Cloud Native Stack Validation
65 | cnc_validation: no


--------------------------------------------------------------------------------
/playbooks/older_versions/cnc_values_7.4.yaml:
--------------------------------------------------------------------------------
 1 | cnc_version: 7.4
 2 | 
 3 | ## Components Versions
 4 | # Container Runtime options are containerd, cri-o
 5 | container_runtime: "containerd"
 6 | containerd_version: "1.7.0"
 7 | crio_version: "1.24.5"
 8 | k8s_version: "1.24.12"
 9 | calico_version: "3.25.1"
10 | flannel_version: "0.21.4"
11 | helm_version: "3.11.3"
12 | gpu_operator_version: "23.3.2"
13 | network_operator_version: "23.4.0"
14 | local_path_provisioner: "0.0.24"
15 | 
16 | # GPU Operator Values
17 | enable_gpu_operator: yes
18 | gpu_driver_version: "525.105.17"
19 | enable_mig: no
20 | mig_profile: all-disabled
21 | mig_strategy: single
22 | enable_gds: no
23 | enable_secure_boot: no
24 | enable_vgpu: no
25 | vgpu_license_server: ""
26 | # URL of Helm repo to be added. If using NGC get this from the fetch command in the console
27 | helm_repository: https://helm.ngc.nvidia.com/nvidia
28 | # Name of the helm chart to be deployed
29 | gpu_operator_helm_chart: nvidia/gpu-operator
30 | ## If using a private/protected registry. NGC API Key. Leave blank for public registries
31 | gpu_operator_registry_password: ""
32 | ## This is most likely an NGC email
33 | gpu_operator_registry_email: ""
34 | ## This is most likely GPU Operator Driver Registry
35 | gpu_operator_driver_registry: "nvcr.io/nvidia"
36 | gpu_operator_registry_username: "$oauthtoken"
37 | 
38 | # Network Operator Values
39 | ## If the Network Operator is yes then make sure enable_rdma as well yes
40 | enable_network_operator: no
41 | ## Enable RDMA yes for NVIDIA Certification
42 | enable_rdma: no
43 | 
44 | # Prxoy Configuration
45 | proxy: no
46 | http_proxy: ""
47 | https_proxy: ""
48 | 
49 | # Cloud Native Stack for Developers Values
50 | ## Enable for Cloud Native Stack Developers
51 | cnc_docker: no
52 | ## Enable For Cloud Native Stack Developers with TRD Driver
53 | cnc_nvidia_driver: no
54 | 
55 | ## Kubernetes resources
56 | k8s_apt_key: "https://packages.cloud.google.com/apt/doc/apt-key.gpg"
57 | k8s_apt_repository: " https://apt.kubernetes.io/ kubernetes-xenial main"
58 | k8s_apt_ring: "/etc/apt/keyrings/kubernetes-archive-keyring.gpg"
59 | k8s_registry: "registry.k8s.io"
60 | 
61 | # Local Path Provisioner as Storage option
62 | storage: no
63 | 
64 | ## Cloud Native Stack Validation
65 | cnc_validation: no


--------------------------------------------------------------------------------
/playbooks/older_versions/cnc_values_7.5.yaml:
--------------------------------------------------------------------------------
 1 | cnc_version: 7.5
 2 | 
 3 | ## Components Versions
 4 | # Container Runtime options are containerd, cri-o
 5 | container_runtime: "containerd"
 6 | containerd_version: "1.7.2"
 7 | crio_version: "1.24.6"
 8 | k8s_version: "1.24.14"
 9 | calico_version: "3.26.1"
10 | flannel_version: "0.22.0"
11 | helm_version: "3.12.1"
12 | gpu_operator_version: "23.3.2"
13 | network_operator_version: "23.5.0"
14 | local_path_provisioner: "0.0.24"
15 | 
16 | # GPU Operator Values
17 | enable_gpu_operator: yes
18 | gpu_driver_version: "535.54.03"
19 | enable_mig: no
20 | mig_profile: all-disabled
21 | mig_strategy: single
22 | enable_gds: no
23 | enable_secure_boot: no
24 | enable_vgpu: no
25 | vgpu_license_server: ""
26 | # URL of Helm repo to be added. If using NGC get this from the fetch command in the console
27 | helm_repository: https://helm.ngc.nvidia.com/nvidia
28 | # Name of the helm chart to be deployed
29 | gpu_operator_helm_chart: nvidia/gpu-operator
30 | ## If using a private/protected registry. NGC API Key. Leave blank for public registries
31 | gpu_operator_registry_password: ""
32 | ## This is most likely an NGC email
33 | gpu_operator_registry_email: ""
34 | ## This is most likely GPU Operator Driver Registry
35 | gpu_operator_driver_registry: "nvcr.io/nvidia"
36 | gpu_operator_registry_username: "$oauthtoken"
37 | 
38 | # Network Operator Values
39 | ## If the Network Operator is yes then make sure enable_rdma as well yes
40 | enable_network_operator: no
41 | ## Enable RDMA yes for NVIDIA Certification
42 | enable_rdma: no
43 | 
44 | # Prxoy Configuration
45 | proxy: no
46 | http_proxy: ""
47 | https_proxy: ""
48 | 
49 | # Cloud Native Stack for Developers Values
50 | ## Enable for Cloud Native Stack Developers
51 | cnc_docker: no
52 | ## Enable For Cloud Native Stack Developers with TRD Driver
53 | cnc_nvidia_driver: no
54 | 
55 | ## Kubernetes resources
56 | k8s_apt_key: "https://packages.cloud.google.com/apt/doc/apt-key.gpg"
57 | k8s_apt_repository: " https://apt.kubernetes.io/ kubernetes-xenial main"
58 | k8s_apt_ring: "/etc/apt/keyrings/kubernetes-archive-keyring.gpg"
59 | k8s_registry: "registry.k8s.io"
60 | 
61 | # Local Path Provisioner as Storage option
62 | storage: no
63 | 
64 | ## Cloud Native Stack Validation
65 | cnc_validation: no


--------------------------------------------------------------------------------
/playbooks/older_versions/cns_values_10.0.yaml:
--------------------------------------------------------------------------------
  1 | cns_version: 10.0
  2 | 
  3 | ## MicroK8s cluster
  4 | microk8s: no
  5 | ## Kubernetes Install with Kubeadm 
  6 | install_k8s: yes 
  7 | 
  8 | ## Components Versions
  9 | # Container Runtime options are containerd, cri-o, cri-dockerd
 10 | container_runtime: "containerd"
 11 | containerd_version: "1.7.0"
 12 | runc_version: "1.1.11"
 13 | cni_plugins_version: "1.4.0"
 14 | containerd_max_concurrent_downloads: "5"
 15 | nvidia_container_toolkit_version: "1.13.0"
 16 | crio_version: "1.27.0"
 17 | cri_dockerd_version: "0.3.4"
 18 | k8s_version: "1.27.0"
 19 | calico_version: "3.25.1"
 20 | flannel_version: "0.21.4"
 21 | helm_version: "3.11.3"
 22 | gpu_operator_version: "23.3.2"
 23 | network_operator_version: "23.4.0"
 24 | local_path_provisioner: "0.0.24"
 25 | nfs_provisioner: "4.0.18"
 26 | metallb_version: "0.14.5"
 27 | kserve_version: "0.13"
 28 | prometheus_stack: "61.3.0"
 29 | elastic_stack: "8.14.1"
 30 | 
 31 | # GPU Operator Values
 32 | enable_gpu_operator: yes
 33 | confidential_computing: no
 34 | gpu_driver_version: "525.105.17"
 35 | use_open_kernel_module: no
 36 | enable_mig: no
 37 | mig_profile: all-disabled
 38 | mig_strategy: single
 39 | enable_gds: no
 40 | #Secure Boot for only Ubuntu
 41 | enable_secure_boot: no
 42 | enable_vgpu: no
 43 | vgpu_license_server: ""
 44 | # URL of Helm repo to be added. If using NGC get this from the fetch command in the console
 45 | helm_repository: https://helm.ngc.nvidia.com/nvidia
 46 | # Name of the helm chart to be deployed
 47 | gpu_operator_helm_chart: nvidia/gpu-operator
 48 | ## If using a private/protected registry. NGC API Key. Leave blank for public registries
 49 | gpu_operator_registry_password: ""
 50 | ## This is most likely an NGC email
 51 | gpu_operator_registry_email: ""
 52 | ## This is most likely GPU Operator Driver Registry
 53 | gpu_operator_driver_registry: "nvcr.io/nvidia"
 54 | gpu_operator_registry_username: "$oauthtoken"
 55 | 
 56 | # Network Operator Values
 57 | ## If the Network Operator is yes then make sure enable_rdma as well yes
 58 | enable_network_operator: no
 59 | ## Enable RDMA yes for NVIDIA Certification
 60 | enable_rdma: no
 61 | 
 62 | # Prxoy Configuration
 63 | proxy: no
 64 | http_proxy: ""
 65 | https_proxy: ""
 66 | 
 67 | # Cloud Native Stack for Developers Values
 68 | ## Enable for Cloud Native Stack Developers
 69 | cns_docker: no
 70 | ## Enable For Cloud Native Stack Developers with TRD Driver
 71 | cns_nvidia_driver: no
 72 | 
 73 | ## Kubernetes resources
 74 | k8s_apt_key: "https://pkgs.k8s.io/core:/stable:/v1.27/deb/Release.key"
 75 | k8s_gpg_key: "https://pkgs.k8s.io/core:/stable:/v1.27/rpm/repodata/repomd.xml.key"
 76 | k8s_apt_ring: "/etc/apt/keyrings/kubernetes-apt-keyring.gpg"
 77 | k8s_registry: "registry.k8s.io"
 78 | 
 79 | # Local Path Provisioner and NFS Provisoner as Storage option
 80 | storage: no
 81 | 
 82 | # Monitoring Stack Prometheus/Grafana with GPU Metrics and Elastic Logging stack
 83 | monitoring: no
 84 | 
 85 | # Enable Kserve on Cloud Native Stack with Istio and Cert-Manager
 86 | kserve: no
 87 | 
 88 | # Install MetalLB
 89 | loadbalancer: no
 90 | # Example input loadbalancer_ip: "10.117.20.50/32", it could be node/host IP
 91 | loadbalancer_ip: ""
 92 | 
 93 | ## Cloud Native Stack Validation
 94 | cns_validation: no
 95 | 
 96 | # BMC Details for Confidential Computing 
 97 | bmc_ip:
 98 | bmc_username:
 99 | bmc_password:
100 | 
101 | # CSP values
102 | ## AWS EKS values
103 | aws_region: us-east-2
104 | aws_cluster_name: cns-cluster-1
105 | aws_gpu_instance_type: g4dn.2xlarge
106 | 
107 | ## Google Cloud GKE Values
108 | #https://cloud.google.com/resource-manager/docs/creating-managing-projects#identifying_projects
109 | gke_project_id: 
110 | #https://cloud.google.com/compute/docs/regions-zones#available
111 | gke_region: us-west1
112 | gke_node_zones: ["us-west1-b"]
113 | gke_cluster_name: gke-cluster-1
114 | 
115 | ## Azure AKS Values
116 | aks_cluster_name: aks-cluster-1
117 | #https://azure.microsoft.com/en-us/explore/global-infrastructure/geographies/#geographies
118 | aks_cluster_location: "West US 2"
119 | #https://learn.microsoft.com/en-us/partner-center/marketplace/find-tenant-object-id
120 | azure_object_id: [""]


--------------------------------------------------------------------------------
/playbooks/older_versions/cns_values_10.1.yaml:
--------------------------------------------------------------------------------
  1 | cns_version: 10.1
  2 | 
  3 | ## MicroK8s cluster
  4 | microk8s: no
  5 | ## Kubernetes Install with Kubeadm 
  6 | install_k8s: yes 
  7 | 
  8 | ## Components Versions
  9 | # Container Runtime options are containerd, cri-o, cri-dockerd
 10 | container_runtime: "containerd"
 11 | containerd_version: "1.7.2"
 12 | runc_version: "1.1.11"
 13 | cni_plugins_version: "1.4.0"
 14 | containerd_max_concurrent_downloads: "5"
 15 | nvidia_container_toolkit_version: "1.13.2"
 16 | crio_version: "1.27.0"
 17 | cri_dockerd_version: "0.3.4"
 18 | k8s_version: "1.27.2"
 19 | calico_version: "3.26.1"
 20 | flannel_version: "0.22.0"
 21 | helm_version: "3.12.1"
 22 | gpu_operator_version: "23.3.2"
 23 | network_operator_version: "23.5.0"
 24 | local_path_provisioner: "0.0.24"
 25 | nfs_provisioner: "4.0.18"
 26 | metallb_version: "0.14.5"
 27 | kserve_version: "0.13"
 28 | prometheus_stack: "61.3.0"
 29 | elastic_stack: "8.14.1"
 30 | 
 31 | # GPU Operator Values
 32 | enable_gpu_operator: yes
 33 | confidential_computing: no
 34 | gpu_driver_version: "535.54.03"
 35 | use_open_kernel_module: no
 36 | enable_mig: no
 37 | mig_profile: all-disabled
 38 | mig_strategy: single
 39 | enable_gds: no
 40 | #Secure Boot for only Ubuntu
 41 | enable_secure_boot: no
 42 | enable_vgpu: no
 43 | vgpu_license_server: ""
 44 | # URL of Helm repo to be added. If using NGC get this from the fetch command in the console
 45 | helm_repository: https://helm.ngc.nvidia.com/nvidia
 46 | # Name of the helm chart to be deployed
 47 | gpu_operator_helm_chart: nvidia/gpu-operator
 48 | ## If using a private/protected registry. NGC API Key. Leave blank for public registries
 49 | gpu_operator_registry_password: ""
 50 | ## This is most likely an NGC email
 51 | gpu_operator_registry_email: ""
 52 | ## This is most likely GPU Operator Driver Registry
 53 | gpu_operator_driver_registry: "nvcr.io/nvidia"
 54 | gpu_operator_registry_username: "$oauthtoken"
 55 | 
 56 | # Network Operator Values
 57 | ## If the Network Operator is yes then make sure enable_rdma as well yes
 58 | enable_network_operator: no
 59 | ## Enable RDMA yes for NVIDIA Certification
 60 | enable_rdma: no
 61 | 
 62 | # Prxoy Configuration
 63 | proxy: no
 64 | http_proxy: ""
 65 | https_proxy: ""
 66 | 
 67 | # Cloud Native Stack for Developers Values
 68 | ## Enable for Cloud Native Stack Developers
 69 | cns_docker: no
 70 | ## Enable For Cloud Native Stack Developers with TRD Driver
 71 | cns_nvidia_driver: no
 72 | 
 73 | ## Kubernetes resources
 74 | k8s_apt_key: "https://pkgs.k8s.io/core:/stable:/v1.27/deb/Release.key"
 75 | k8s_gpg_key: "https://pkgs.k8s.io/core:/stable:/v1.27/rpm/repodata/repomd.xml.key"
 76 | k8s_apt_ring: "/etc/apt/keyrings/kubernetes-apt-keyring.gpg"
 77 | k8s_registry: "registry.k8s.io"
 78 | 
 79 | # Local Path Provisioner and NFS Provisoner as Storage option
 80 | storage: no
 81 | 
 82 | # Monitoring Stack Prometheus/Grafana with GPU Metrics and Elastic Logging stack
 83 | monitoring: no
 84 | 
 85 | # Enable Kserve on Cloud Native Stack with Istio and Cert-Manager
 86 | kserve: no
 87 | 
 88 | # Install MetalLB
 89 | loadbalancer: no
 90 | # Example input loadbalancer_ip: "10.117.20.50/32", it could be node/host IP
 91 | loadbalancer_ip: ""
 92 | 
 93 | ## Cloud Native Stack Validation
 94 | cns_validation: no
 95 | 
 96 | # BMC Details for Confidential Computing 
 97 | bmc_ip:
 98 | bmc_username:
 99 | bmc_password:
100 | 
101 | # CSP values
102 | ## AWS EKS values
103 | aws_region: us-east-2
104 | aws_cluster_name: cns-cluster-1
105 | aws_gpu_instance_type: g4dn.2xlarge
106 | 
107 | ## Google Cloud GKE Values
108 | #https://cloud.google.com/resource-manager/docs/creating-managing-projects#identifying_projects
109 | gke_project_id: 
110 | #https://cloud.google.com/compute/docs/regions-zones#available
111 | gke_region: us-west1
112 | gke_node_zones: ["us-west1-b"]
113 | gke_cluster_name: gke-cluster-1
114 | 
115 | ## Azure AKS Values
116 | aks_cluster_name: aks-cluster-1
117 | #https://azure.microsoft.com/en-us/explore/global-infrastructure/geographies/#geographies
118 | aks_cluster_location: "West US 2"
119 | #https://learn.microsoft.com/en-us/partner-center/marketplace/find-tenant-object-id
120 | azure_object_id: [""]


--------------------------------------------------------------------------------
/playbooks/older_versions/cns_values_10.2.yaml:
--------------------------------------------------------------------------------
  1 | cns_version: 10.2
  2 | 
  3 | ## MicroK8s cluster
  4 | microk8s: no
  5 | ## Kubernetes Install with Kubeadm 
  6 | install_k8s: yes 
  7 | 
  8 | ## Components Versions
  9 | # Container Runtime options are containerd, cri-o, cri-dockerd
 10 | container_runtime: "containerd"
 11 | containerd_version: "1.7.3"
 12 | runc_version: "1.1.11"
 13 | cni_plugins_version: "1.4.0"
 14 | containerd_max_concurrent_downloads: "5"
 15 | nvidia_container_toolkit_version: "1.13.5"
 16 | crio_version: "1.27.1"
 17 | cri_dockerd_version: "0.3.4"
 18 | k8s_version: "1.27.4"
 19 | calico_version: "3.26.1"
 20 | flannel_version: "0.22.0"
 21 | helm_version: "3.12.2"
 22 | gpu_operator_version: "23.6.1"
 23 | network_operator_version: "23.7.0"
 24 | local_path_provisioner: "0.0.24"
 25 | nfs_provisioner: "4.0.18"
 26 | metallb_version: "0.14.5"
 27 | kserve_version: "0.13"
 28 | prometheus_stack: "61.3.0"
 29 | elastic_stack: "8.14.1"
 30 | 
 31 | # GPU Operator Values
 32 | enable_gpu_operator: yes
 33 | confidential_computing: no
 34 | gpu_driver_version: "535.104.05"
 35 | use_open_kernel_module: no
 36 | enable_mig: no
 37 | mig_profile: all-disabled
 38 | mig_strategy: single
 39 | enable_gds: no
 40 | #Secure Boot for only Ubuntu
 41 | enable_secure_boot: no
 42 | enable_vgpu: no
 43 | vgpu_license_server: ""
 44 | # URL of Helm repo to be added. If using NGC get this from the fetch command in the console
 45 | helm_repository: https://helm.ngc.nvidia.com/nvidia
 46 | # Name of the helm chart to be deployed
 47 | gpu_operator_helm_chart: nvidia/gpu-operator
 48 | ## If using a private/protected registry. NGC API Key. Leave blank for public registries
 49 | gpu_operator_registry_password: ""
 50 | ## This is most likely an NGC email
 51 | gpu_operator_registry_email: ""
 52 | ## This is most likely GPU Operator Driver Registry
 53 | gpu_operator_driver_registry: "nvcr.io/nvidia"
 54 | gpu_operator_registry_username: "$oauthtoken"
 55 | 
 56 | # Network Operator Values
 57 | ## If the Network Operator is yes then make sure enable_rdma as well yes
 58 | enable_network_operator: no
 59 | ## Enable RDMA yes for NVIDIA Certification
 60 | enable_rdma: no
 61 | 
 62 | # Prxoy Configuration
 63 | proxy: no
 64 | http_proxy: ""
 65 | https_proxy: ""
 66 | 
 67 | # Cloud Native Stack for Developers Values
 68 | ## Enable for Cloud Native Stack Developers
 69 | cns_docker: no
 70 | ## Enable For Cloud Native Stack Developers with TRD Driver
 71 | cns_nvidia_driver: no
 72 | 
 73 | ## Kubernetes resources
 74 | k8s_apt_key: "https://pkgs.k8s.io/core:/stable:/v1.27/deb/Release.key"
 75 | k8s_gpg_key: "https://pkgs.k8s.io/core:/stable:/v1.27/rpm/repodata/repomd.xml.key"
 76 | k8s_apt_ring: "/etc/apt/keyrings/kubernetes-apt-keyring.gpg"
 77 | k8s_registry: "registry.k8s.io"
 78 | 
 79 | # Local Path Provisioner and NFS Provisoner as Storage option
 80 | storage: no
 81 | 
 82 | # Monitoring Stack Prometheus/Grafana with GPU Metrics and Elastic Logging stack
 83 | monitoring: no
 84 | 
 85 | # Enable Kserve on Cloud Native Stack with Istio and Cert-Manager
 86 | kserve: no
 87 | 
 88 | # Install MetalLB
 89 | loadbalancer: no
 90 | # Example input loadbalancer_ip: "10.117.20.50/32", it could be node/host IP
 91 | loadbalancer_ip: ""
 92 | 
 93 | ## Cloud Native Stack Validation
 94 | cns_validation: no
 95 | 
 96 | # BMC Details for Confidential Computing 
 97 | bmc_ip:
 98 | bmc_username:
 99 | bmc_password:
100 | 
101 | # CSP values
102 | ## AWS EKS values
103 | aws_region: us-east-2
104 | aws_cluster_name: cns-cluster-1
105 | aws_gpu_instance_type: g4dn.2xlarge
106 | 
107 | ## Google Cloud GKE Values
108 | #https://cloud.google.com/resource-manager/docs/creating-managing-projects#identifying_projects
109 | gke_project_id: 
110 | #https://cloud.google.com/compute/docs/regions-zones#available
111 | gke_region: us-west1
112 | gke_node_zones: ["us-west1-b"]
113 | gke_cluster_name: gke-cluster-1
114 | 
115 | ## Azure AKS Values
116 | aks_cluster_name: aks-cluster-1
117 | #https://azure.microsoft.com/en-us/explore/global-infrastructure/geographies/#geographies
118 | aks_cluster_location: "West US 2"
119 | #https://learn.microsoft.com/en-us/partner-center/marketplace/find-tenant-object-id
120 | azure_object_id: [""]


--------------------------------------------------------------------------------
/playbooks/older_versions/cns_values_10.3.yaml:
--------------------------------------------------------------------------------
  1 | cns_version: 10.3
  2 | 
  3 | ## MicroK8s cluster
  4 | microk8s: no
  5 | ## Kubernetes Install with Kubeadm 
  6 | install_k8s: yes 
  7 | 
  8 | ## Components Versions
  9 | # Container Runtime options are containerd, cri-o, cri-dockerd
 10 | container_runtime: "containerd"
 11 | containerd_version: "1.7.7"
 12 | runc_version: "1.1.11"
 13 | cni_plugins_version: "1.4.0"
 14 | containerd_max_concurrent_downloads: "5"
 15 | nvidia_container_toolkit_version: "1.14.3"
 16 | crio_version: "1.27.1"
 17 | cri_dockerd_version: "0.3.6"
 18 | k8s_version: "1.27.6"
 19 | calico_version: "3.26.3"
 20 | flannel_version: "0.22.3"
 21 | helm_version: "3.13.1"
 22 | gpu_operator_version: "23.9.1"
 23 | network_operator_version: "23.10.0"
 24 | local_path_provisioner: "0.0.24"
 25 | nfs_provisioner: "4.0.18"
 26 | metallb_version: "0.14.5"
 27 | kserve_version: "0.13"
 28 | prometheus_stack: "61.3.0"
 29 | elastic_stack: "8.14.1"
 30 | 
 31 | # GPU Operator Values
 32 | enable_gpu_operator: yes
 33 | confidential_computing: no
 34 | gpu_driver_version: "535.161.08"
 35 | use_open_kernel_module: no
 36 | enable_mig: no
 37 | mig_profile: all-disabled
 38 | mig_strategy: single
 39 | enable_gds: no
 40 | #Secure Boot for only Ubuntu
 41 | enable_secure_boot: no
 42 | enable_vgpu: no
 43 | vgpu_license_server: ""
 44 | # URL of Helm repo to be added. If using NGC get this from the fetch command in the console
 45 | helm_repository: https://helm.ngc.nvidia.com/nvidia
 46 | # Name of the helm chart to be deployed
 47 | gpu_operator_helm_chart: nvidia/gpu-operator
 48 | ## If using a private/protected registry. NGC API Key. Leave blank for public registries
 49 | gpu_operator_registry_password: ""
 50 | ## This is most likely an NGC email
 51 | gpu_operator_registry_email: ""
 52 | ## This is most likely GPU Operator Driver Registry
 53 | gpu_operator_driver_registry: "nvcr.io/nvidia"
 54 | gpu_operator_registry_username: "$oauthtoken"
 55 | 
 56 | # Network Operator Values
 57 | ## If the Network Operator is yes then make sure enable_rdma as well yes
 58 | enable_network_operator: no
 59 | ## Enable RDMA yes for NVIDIA Certification
 60 | enable_rdma: no
 61 | 
 62 | # Prxoy Configuration
 63 | proxy: no
 64 | http_proxy: ""
 65 | https_proxy: ""
 66 | 
 67 | # Cloud Native Stack for Developers Values
 68 | ## Enable for Cloud Native Stack Developers
 69 | cns_docker: no
 70 | ## Enable For Cloud Native Stack Developers with TRD Driver
 71 | cns_nvidia_driver: no
 72 | 
 73 | ## Kubernetes resources
 74 | k8s_apt_key: "https://pkgs.k8s.io/core:/stable:/v1.27/deb/Release.key"
 75 | k8s_gpg_key: "https://pkgs.k8s.io/core:/stable:/v1.27/rpm/repodata/repomd.xml.key"
 76 | k8s_apt_ring: "/etc/apt/keyrings/kubernetes-apt-keyring.gpg"
 77 | k8s_registry: "registry.k8s.io"
 78 | 
 79 | # Local Path Provisioner and NFS Provisoner as Storage option
 80 | storage: no
 81 | 
 82 | # Monitoring Stack Prometheus/Grafana with GPU Metrics and Elastic Logging stack
 83 | monitoring: no
 84 | 
 85 | # Enable Kserve on Cloud Native Stack with Istio and Cert-Manager
 86 | kserve: no
 87 | 
 88 | # Install MetalLB
 89 | loadbalancer: no
 90 | # Example input loadbalancer_ip: "10.117.20.50/32", it could be node/host IP
 91 | loadbalancer_ip: ""
 92 | 
 93 | ## Cloud Native Stack Validation
 94 | cns_validation: no
 95 | 
 96 | # BMC Details for Confidential Computing 
 97 | bmc_ip:
 98 | bmc_username:
 99 | bmc_password:
100 | 
101 | # CSP values
102 | ## AWS EKS values
103 | aws_region: us-east-2
104 | aws_cluster_name: cns-cluster-1
105 | aws_gpu_instance_type: g4dn.2xlarge
106 | 
107 | ## Google Cloud GKE Values
108 | #https://cloud.google.com/resource-manager/docs/creating-managing-projects#identifying_projects
109 | gke_project_id: 
110 | #https://cloud.google.com/compute/docs/regions-zones#available
111 | gke_region: us-west1
112 | gke_node_zones: ["us-west1-b"]
113 | gke_cluster_name: gke-cluster-1
114 | 
115 | ## Azure AKS Values
116 | aks_cluster_name: aks-cluster-1
117 | #https://azure.microsoft.com/en-us/explore/global-infrastructure/geographies/#geographies
118 | aks_cluster_location: "West US 2"
119 | #https://learn.microsoft.com/en-us/partner-center/marketplace/find-tenant-object-id
120 | azure_object_id: [""]


--------------------------------------------------------------------------------
/playbooks/older_versions/cns_values_10.4.yaml:
--------------------------------------------------------------------------------
  1 | cns_version: 10.4
  2 | 
  3 | ## MicroK8s cluster
  4 | microk8s: no
  5 | ## Kubernetes Install with Kubeadm 
  6 | install_k8s: yes 
  7 | 
  8 | ## Components Versions
  9 | # Container Runtime options are containerd, cri-o, cri-dockerd
 10 | container_runtime: "containerd"
 11 | containerd_version: "1.7.13"
 12 | runc_version: "1.1.12"
 13 | cni_plugins_version: "1.4.0"
 14 | containerd_max_concurrent_downloads: "5"
 15 | nvidia_container_toolkit_version: "1.14.6"
 16 | crio_version: "1.27.4"
 17 | cri_dockerd_version: "0.3.6"
 18 | k8s_version: "1.27.10"
 19 | calico_version: "3.27.0"
 20 | flannel_version: "0.24.2"
 21 | helm_version: "3.14.2"
 22 | gpu_operator_version: "23.9.2"
 23 | network_operator_version: "24.1.0"
 24 | local_path_provisioner: "0.0.26"
 25 | nfs_provisioner: "4.0.18"
 26 | metallb_version: "0.14.5"
 27 | kserve_version: "0.13"
 28 | prometheus_stack: "61.3.0"
 29 | elastic_stack: "8.14.1"
 30 | 
 31 | # GPU Operator Values
 32 | enable_gpu_operator: yes
 33 | confidential_computing: no
 34 | gpu_driver_version: "550.54.15"
 35 | use_open_kernel_module: no
 36 | enable_mig: no
 37 | mig_profile: all-disabled
 38 | mig_strategy: single
 39 | # To use GDS, use_open_kernel_module needs to be enabled
 40 | enable_gds: no
 41 | #Secure Boot for only Ubuntu
 42 | enable_secure_boot: no
 43 | enable_vgpu: no
 44 | vgpu_license_server: ""
 45 | # URL of Helm repo to be added. If using NGC get this from the fetch command in the console
 46 | helm_repository: https://helm.ngc.nvidia.com/nvidia
 47 | # Name of the helm chart to be deployed
 48 | gpu_operator_helm_chart: nvidia/gpu-operator
 49 | ## If using a private/protected registry. NGC API Key. Leave blank for public registries
 50 | gpu_operator_registry_password: ""
 51 | ## This is most likely an NGC email
 52 | gpu_operator_registry_email: ""
 53 | ## This is most likely GPU Operator Driver Registry
 54 | gpu_operator_driver_registry: "nvcr.io/nvidia"
 55 | gpu_operator_registry_username: "$oauthtoken"
 56 | 
 57 | # Network Operator Values
 58 | ## If the Network Operator is yes then make sure enable_rdma as well yes
 59 | enable_network_operator: no
 60 | ## Enable RDMA yes for NVIDIA Certification
 61 | enable_rdma: no
 62 | 
 63 | # Prxoy Configuration
 64 | proxy: no
 65 | http_proxy: ""
 66 | https_proxy: ""
 67 | 
 68 | # Cloud Native Stack for Developers Values
 69 | ## Enable for Cloud Native Stack Developers
 70 | cns_docker: no
 71 | ## Enable For Cloud Native Stack Developers with TRD Driver
 72 | cns_nvidia_driver: no
 73 | 
 74 | ## Kubernetes resources
 75 | k8s_apt_key: "https://pkgs.k8s.io/core:/stable:/v1.27/deb/Release.key"
 76 | k8s_gpg_key: "https://pkgs.k8s.io/core:/stable:/v1.27/rpm/repodata/repomd.xml.key"
 77 | k8s_apt_ring: "/etc/apt/keyrings/kubernetes-apt-keyring.gpg"
 78 | k8s_registry: "registry.k8s.io"
 79 | 
 80 | # Local Path Provisioner and NFS Provisoner as Storage option
 81 | storage: no
 82 | 
 83 | # Monitoring Stack Prometheus/Grafana with GPU Metrics and Elastic Logging stack
 84 | monitoring: no
 85 | 
 86 | # Enable Kserve on Cloud Native Stack with Istio and Cert-Manager
 87 | kserve: no
 88 | 
 89 | # Install MetalLB
 90 | loadbalancer: no
 91 | # Example input loadbalancer_ip: "10.117.20.50/32", it could be node/host IP
 92 | loadbalancer_ip: ""
 93 | 
 94 | ## Cloud Native Stack Validation
 95 | cns_validation: no
 96 | 
 97 | # BMC Details for Confidential Computing 
 98 | bmc_ip:
 99 | bmc_username:
100 | bmc_password:
101 | 
102 | # CSP values
103 | ## AWS EKS values
104 | aws_region: us-east-2
105 | aws_cluster_name: cns-cluster-1
106 | aws_gpu_instance_type: g4dn.2xlarge
107 | 
108 | ## Google Cloud GKE Values
109 | #https://cloud.google.com/resource-manager/docs/creating-managing-projects#identifying_projects
110 | gke_project_id: 
111 | #https://cloud.google.com/compute/docs/regions-zones#available
112 | gke_region: us-west1
113 | gke_node_zones: ["us-west1-b"]
114 | gke_cluster_name: gke-cluster-1
115 | 
116 | ## Azure AKS Values
117 | aks_cluster_name: aks-cluster-1
118 | #https://azure.microsoft.com/en-us/explore/global-infrastructure/geographies/#geographies
119 | aks_cluster_location: "West US 2"
120 | #https://learn.microsoft.com/en-us/partner-center/marketplace/find-tenant-object-id
121 | azure_object_id: [""]


--------------------------------------------------------------------------------
/playbooks/older_versions/cns_values_10.5.yaml:
--------------------------------------------------------------------------------
  1 | cns_version: 10.5
  2 | 
  3 | ## MicroK8s cluster
  4 | microk8s: no
  5 | ## Kubernetes Install with Kubeadm 
  6 | install_k8s: yes 
  7 | 
  8 | ## Components Versions
  9 | # Container Runtime options are containerd, cri-o, cri-dockerd
 10 | container_runtime: "containerd"
 11 | containerd_version: "1.7.16"
 12 | runc_version: "1.1.12"
 13 | cni_plugins_version: "1.4.1"
 14 | containerd_max_concurrent_downloads: "5"
 15 | nvidia_container_toolkit_version: "1.15.0"
 16 | crio_version: "1.27.6"
 17 | cri_dockerd_version: "0.3.13"
 18 | k8s_version: "1.27.12"
 19 | calico_version: "3.27.3"
 20 | flannel_version: "0.25.1"
 21 | helm_version: "3.14.4"
 22 | gpu_operator_version: "24.3.0"
 23 | network_operator_version: "24.1.1"
 24 | local_path_provisioner: "0.0.26"
 25 | nfs_provisioner: "4.0.18"
 26 | metallb_version: "0.14.5"
 27 | kserve_version: "0.13"
 28 | prometheus_stack: "61.3.0"
 29 | elastic_stack: "8.14.1"
 30 | 
 31 | # GPU Operator Values
 32 | enable_gpu_operator: yes
 33 | confidential_computing: no
 34 | gpu_driver_version: "550.54.15"
 35 | use_open_kernel_module: no
 36 | enable_mig: no
 37 | mig_profile: all-disabled
 38 | mig_strategy: single
 39 | # To use GDS, use_open_kernel_module needs to be enabled
 40 | enable_gds: no
 41 | #Secure Boot for only Ubuntu
 42 | enable_secure_boot: no
 43 | enable_vgpu: no
 44 | vgpu_license_server: ""
 45 | # URL of Helm repo to be added. If using NGC get this from the fetch command in the console
 46 | helm_repository: https://helm.ngc.nvidia.com/nvidia
 47 | # Name of the helm chart to be deployed
 48 | gpu_operator_helm_chart: nvidia/gpu-operator
 49 | ## If using a private/protected registry. NGC API Key. Leave blank for public registries
 50 | gpu_operator_registry_password: ""
 51 | ## This is most likely an NGC email
 52 | gpu_operator_registry_email: ""
 53 | ## This is most likely GPU Operator Driver Registry
 54 | gpu_operator_driver_registry: "nvcr.io/nvidia"
 55 | gpu_operator_registry_username: "$oauthtoken"
 56 | 
 57 | # Network Operator Values
 58 | ## If the Network Operator is yes then make sure enable_rdma as well yes
 59 | enable_network_operator: no
 60 | ## Enable RDMA yes for NVIDIA Certification
 61 | enable_rdma: no
 62 | 
 63 | # Prxoy Configuration
 64 | proxy: no
 65 | http_proxy: ""
 66 | https_proxy: ""
 67 | 
 68 | # Cloud Native Stack for Developers Values
 69 | ## Enable for Cloud Native Stack Developers
 70 | cns_docker: no
 71 | ## Enable For Cloud Native Stack Developers with TRD Driver
 72 | cns_nvidia_driver: no
 73 | 
 74 | ## Kubernetes resources
 75 | k8s_apt_key: "https://pkgs.k8s.io/core:/stable:/v1.27/deb/Release.key"
 76 | k8s_gpg_key: "https://pkgs.k8s.io/core:/stable:/v1.27/rpm/repodata/repomd.xml.key"
 77 | k8s_apt_ring: "/etc/apt/keyrings/kubernetes-apt-keyring.gpg"
 78 | k8s_registry: "registry.k8s.io"
 79 | 
 80 | # Local Path Provisioner and NFS Provisoner as Storage option
 81 | storage: no
 82 | 
 83 | # Monitoring Stack Prometheus/Grafana with GPU Metrics and Elastic Logging stack
 84 | monitoring: no
 85 | 
 86 | # Enable Kserve on Cloud Native Stack with Istio and Cert-Manager
 87 | kserve: no
 88 | 
 89 | # Install MetalLB
 90 | loadbalancer: no
 91 | # Example input loadbalancer_ip: "10.117.20.50/32", it could be node/host IP
 92 | loadbalancer_ip: ""
 93 | 
 94 | ## Cloud Native Stack Validation
 95 | cns_validation: no
 96 | 
 97 | # BMC Details for Confidential Computing 
 98 | bmc_ip:
 99 | bmc_username:
100 | bmc_password:
101 | 
102 | # CSP values
103 | ## AWS EKS values
104 | aws_region: us-east-2
105 | aws_cluster_name: cns-cluster-1
106 | aws_gpu_instance_type: g4dn.2xlarge
107 | 
108 | ## Google Cloud GKE Values
109 | #https://cloud.google.com/resource-manager/docs/creating-managing-projects#identifying_projects
110 | gke_project_id: 
111 | #https://cloud.google.com/compute/docs/regions-zones#available
112 | gke_region: us-west1
113 | gke_node_zones: ["us-west1-b"]
114 | gke_cluster_name: gke-cluster-1
115 | 
116 | ## Azure AKS Values
117 | aks_cluster_name: aks-cluster-1
118 | #https://azure.microsoft.com/en-us/explore/global-infrastructure/geographies/#geographies
119 | aks_cluster_location: "West US 2"
120 | #https://learn.microsoft.com/en-us/partner-center/marketplace/find-tenant-object-id
121 | azure_object_id: [""]


--------------------------------------------------------------------------------
/playbooks/older_versions/cns_values_11.0.yaml:
--------------------------------------------------------------------------------
  1 | cns_version: 11.0
  2 | 
  3 | ## MicroK8s cluster
  4 | microk8s: no
  5 | ## Kubernetes Install with Kubeadm 
  6 | install_k8s: yes 
  7 | 
  8 | ## Components Versions
  9 | # Container Runtime options are containerd, cri-o, cri-dockerd
 10 | container_runtime: "containerd"
 11 | containerd_version: "1.7.7"
 12 | runc_version: "1.1.11"
 13 | cni_plugins_version: "1.4.0"
 14 | containerd_max_concurrent_downloads: "5"
 15 | nvidia_container_toolkit_version: "1.14.3"
 16 | crio_version: "1.28.1"
 17 | cri_dockerd_version: "0.3.6"
 18 | k8s_version: "1.28.2"
 19 | calico_version: "3.26.3"
 20 | flannel_version: "0.22.3"
 21 | helm_version: "3.13.1"
 22 | gpu_operator_version: "23.9.1"
 23 | network_operator_version: "23.10.0"
 24 | local_path_provisioner: "0.0.24"
 25 | nfs_provisioner: "4.0.18"
 26 | metallb_version: "0.14.5"
 27 | kserve_version: "0.13"
 28 | prometheus_stack: "61.3.0"
 29 | elastic_stack: "8.14.1"
 30 | 
 31 | # GPU Operator Values
 32 | enable_gpu_operator: yes
 33 | confidential_computing: no
 34 | gpu_driver_version: "535.161.08"
 35 | use_open_kernel_module: no
 36 | enable_mig: no
 37 | mig_profile: all-disabled
 38 | mig_strategy: single
 39 | enable_gds: no
 40 | #Secure Boot for only Ubuntu
 41 | enable_secure_boot: no
 42 | enable_vgpu: no
 43 | vgpu_license_server: ""
 44 | # URL of Helm repo to be added. If using NGC get this from the fetch command in the console
 45 | helm_repository: https://helm.ngc.nvidia.com/nvidia
 46 | # Name of the helm chart to be deployed
 47 | gpu_operator_helm_chart: nvidia/gpu-operator
 48 | ## This is most likely GPU Operator Driver Registry
 49 | gpu_operator_driver_registry: "nvcr.io/nvidia"
 50 | 
 51 | # NGC Values
 52 | ## If using a private/protected registry. NGC API Key. Leave blank for public registries
 53 | ngc_registry_password: ""
 54 | ## This is most likely an NGC email
 55 | ngc_registry_email: ""
 56 | ngc_registry_username: "$oauthtoken"
 57 | 
 58 | # Network Operator Values
 59 | ## If the Network Operator is yes then make sure enable_rdma as well yes
 60 | enable_network_operator: no
 61 | ## Enable RDMA yes for NVIDIA Certification
 62 | enable_rdma: no
 63 | 
 64 | # Prxoy Configuration
 65 | proxy: no
 66 | http_proxy: ""
 67 | https_proxy: ""
 68 | 
 69 | # Cloud Native Stack for Developers Values
 70 | ## Enable for Cloud Native Stack Developers
 71 | cns_docker: no
 72 | ## Enable For Cloud Native Stack Developers with TRD Driver
 73 | cns_nvidia_driver: no
 74 | 
 75 | ## Kubernetes resources
 76 | k8s_apt_key: "https://pkgs.k8s.io/core:/stable:/v1.28/deb/Release.key"
 77 | k8s_gpg_key: "https://pkgs.k8s.io/core:/stable:/v1.28/rpm/repodata/repomd.xml.key"
 78 | k8s_apt_ring: "/etc/apt/keyrings/kubernetes-apt-keyring.gpg"
 79 | k8s_registry: "registry.k8s.io"
 80 | 
 81 | # Install NVIDIA NIM Operator 
 82 | enable_nim_operator: no
 83 | 
 84 | # Local Path Provisioner and NFS Provisoner as Storage option
 85 | storage: no
 86 | 
 87 | # Monitoring Stack Prometheus/Grafana with GPU Metrics and Elastic Logging stack
 88 | monitoring: no
 89 | 
 90 | # Enable Kserve on Cloud Native Stack with Istio and Cert-Manager
 91 | kserve: no
 92 | 
 93 | # Install MetalLB
 94 | loadbalancer: no
 95 | # Example input loadbalancer_ip: "10.117.20.50/32", it could be node/host IP
 96 | loadbalancer_ip: ""
 97 | 
 98 | ## Cloud Native Stack Validation
 99 | cns_validation: no
100 | 
101 | # BMC Details for Confidential Computing 
102 | bmc_ip:
103 | bmc_username:
104 | bmc_password:
105 | 
106 | # CSP values
107 | ## AWS EKS values
108 | aws_region: us-east-2
109 | aws_cluster_name: cns-cluster-1
110 | aws_gpu_instance_type: g4dn.2xlarge
111 | 
112 | ## Google Cloud GKE Values
113 | #https://cloud.google.com/resource-manager/docs/creating-managing-projects#identifying_projects
114 | gke_project_id: 
115 | #https://cloud.google.com/compute/docs/regions-zones#available
116 | gke_region: us-west1
117 | gke_node_zones: ["us-west1-b"]
118 | gke_cluster_name: gke-cluster-1
119 | 
120 | ## Azure AKS Values
121 | aks_cluster_name: aks-cluster-1
122 | #https://azure.microsoft.com/en-us/explore/global-infrastructure/geographies/#geographies
123 | aks_cluster_location: "West US 2"
124 | #https://learn.microsoft.com/en-us/partner-center/marketplace/find-tenant-object-id
125 | azure_object_id: [""]


--------------------------------------------------------------------------------
/playbooks/older_versions/cns_values_11.1.yaml:
--------------------------------------------------------------------------------
  1 | cns_version: 11.1
  2 | 
  3 | ## MicroK8s cluster
  4 | microk8s: no
  5 | ## Kubernetes Install with Kubeadm 
  6 | install_k8s: yes 
  7 | 
  8 | ## Components Versions
  9 | # Container Runtime options are containerd, cri-o, cri-dockerd
 10 | container_runtime: "containerd"
 11 | containerd_version: "1.7.13"
 12 | runc_version: "1.1.12"
 13 | cni_plugins_version: "1.4.0"
 14 | containerd_max_concurrent_downloads: "5"
 15 | nvidia_container_toolkit_version: "1.14.6"
 16 | crio_version: "1.28.2"
 17 | cri_dockerd_version: "0.3.10"
 18 | k8s_version: "1.28.6"
 19 | calico_version: "3.27.0"
 20 | flannel_version: "0.24.2"
 21 | helm_version: "3.14.2"
 22 | gpu_operator_version: "23.9.2"
 23 | network_operator_version: "24.1.0"
 24 | local_path_provisioner: "0.0.26"
 25 | nfs_provisioner: "4.0.18"
 26 | metallb_version: "0.14.5"
 27 | kserve_version: "0.13"
 28 | prometheus_stack: "61.3.0"
 29 | elastic_stack: "8.14.1"
 30 | 
 31 | # GPU Operator Values
 32 | enable_gpu_operator: yes
 33 | confidential_computing: no
 34 | gpu_driver_version: "550.54.15"
 35 | use_open_kernel_module: no
 36 | enable_mig: no
 37 | mig_profile: all-disabled
 38 | mig_strategy: single
 39 | # To use GDS, use_open_kernel_module needs to be enabled
 40 | enable_gds: no
 41 | #Secure Boot for only Ubuntu
 42 | enable_secure_boot: no
 43 | enable_vgpu: no
 44 | vgpu_license_server: ""
 45 | # URL of Helm repo to be added. If using NGC get this from the fetch command in the console
 46 | helm_repository: https://helm.ngc.nvidia.com/nvidia
 47 | # Name of the helm chart to be deployed
 48 | gpu_operator_helm_chart: nvidia/gpu-operator
 49 | ## This is most likely GPU Operator Driver Registry
 50 | gpu_operator_driver_registry: "nvcr.io/nvidia"
 51 | 
 52 | # NGC Values
 53 | ## If using a private/protected registry. NGC API Key. Leave blank for public registries
 54 | ngc_registry_password: ""
 55 | ## This is most likely an NGC email
 56 | ngc_registry_email: ""
 57 | ngc_registry_username: "$oauthtoken"
 58 | 
 59 | # Network Operator Values
 60 | ## If the Network Operator is yes then make sure enable_rdma as well yes
 61 | enable_network_operator: no
 62 | ## Enable RDMA yes for NVIDIA Certification
 63 | enable_rdma: no
 64 | 
 65 | # Prxoy Configuration
 66 | proxy: no
 67 | http_proxy: ""
 68 | https_proxy: ""
 69 | 
 70 | # Cloud Native Stack for Developers Values
 71 | ## Enable for Cloud Native Stack Developers
 72 | cns_docker: no
 73 | ## Enable For Cloud Native Stack Developers with TRD Driver
 74 | cns_nvidia_driver: no
 75 | 
 76 | ## Kubernetes resources
 77 | k8s_apt_key: "https://pkgs.k8s.io/core:/stable:/v1.28/deb/Release.key"
 78 | k8s_gpg_key: "https://pkgs.k8s.io/core:/stable:/v1.28/rpm/repodata/repomd.xml.key"
 79 | k8s_apt_ring: "/etc/apt/keyrings/kubernetes-apt-keyring.gpg"
 80 | k8s_registry: "registry.k8s.io"
 81 | 
 82 | # Local Path Provisioner and NFS Provisoner as Storage option
 83 | storage: no
 84 | 
 85 | # Monitoring Stack Prometheus/Grafana with GPU Metrics and Elastic Logging stack
 86 | monitoring: no
 87 | 
 88 | # Enable Kserve on Cloud Native Stack with Istio and Cert-Manager
 89 | kserve: no
 90 | 
 91 | # Install MetalLB
 92 | loadbalancer: no
 93 | # Example input loadbalancer_ip: "10.117.20.50/32", it could be node/host IP
 94 | loadbalancer_ip: ""
 95 | 
 96 | ## Cloud Native Stack Validation
 97 | cns_validation: no
 98 | 
 99 | # BMC Details for Confidential Computing 
100 | bmc_ip:
101 | bmc_username:
102 | bmc_password:
103 | 
104 | # CSP values
105 | ## AWS EKS values
106 | aws_region: us-east-2
107 | aws_cluster_name: cns-cluster-1
108 | aws_gpu_instance_type: g4dn.2xlarge
109 | 
110 | ## Google Cloud GKE Values
111 | #https://cloud.google.com/resource-manager/docs/creating-managing-projects#identifying_projects
112 | gke_project_id: 
113 | #https://cloud.google.com/compute/docs/regions-zones#available
114 | gke_region: us-west1
115 | gke_node_zones: ["us-west1-b"]
116 | gke_cluster_name: gke-cluster-1
117 | 
118 | ## Azure AKS Values
119 | aks_cluster_name: aks-cluster-1
120 | #https://azure.microsoft.com/en-us/explore/global-infrastructure/geographies/#geographies
121 | aks_cluster_location: "West US 2"
122 | #https://learn.microsoft.com/en-us/partner-center/marketplace/find-tenant-object-id
123 | azure_object_id: [""]


--------------------------------------------------------------------------------
/playbooks/older_versions/cns_values_11.2.yaml:
--------------------------------------------------------------------------------
  1 | cns_version: 11.2
  2 | 
  3 | ## MicroK8s cluster
  4 | microk8s: no
  5 | ## Kubernetes Install with Kubeadm 
  6 | install_k8s: yes 
  7 | 
  8 | ## Components Versions
  9 | # Container Runtime options are containerd, cri-o, cri-dockerd
 10 | container_runtime: "containerd"
 11 | containerd_version: "1.7.16"
 12 | runc_version: "1.1.12"
 13 | cni_plugins_version: "1.4.1"
 14 | containerd_max_concurrent_downloads: "5"
 15 | nvidia_container_toolkit_version: "1.15.0"
 16 | crio_version: "1.28.6"
 17 | cri_dockerd_version: "0.3.13"
 18 | k8s_version: "1.28.8"
 19 | calico_version: "3.27.3"
 20 | flannel_version: "0.25.1"
 21 | helm_version: "3.14.4"
 22 | gpu_operator_version: "24.3.0"
 23 | network_operator_version: "24.1.1"
 24 | local_path_provisioner: "0.0.26"
 25 | nfs_provisioner: "4.0.18"
 26 | metallb_version: "0.14.5"
 27 | kserve_version: "0.13"
 28 | prometheus_stack: "61.3.0"
 29 | elastic_stack: "8.14.1"
 30 | 
 31 | # GPU Operator Values
 32 | enable_gpu_operator: yes
 33 | confidential_computing: no
 34 | gpu_driver_version: "550.54.15"
 35 | use_open_kernel_module: no
 36 | enable_mig: no
 37 | mig_profile: all-disabled
 38 | mig_strategy: single
 39 | # To use GDS, use_open_kernel_module needs to be enabled
 40 | enable_gds: no
 41 | #Secure Boot for only Ubuntu
 42 | enable_secure_boot: no
 43 | enable_vgpu: no
 44 | vgpu_license_server: ""
 45 | # URL of Helm repo to be added. If using NGC get this from the fetch command in the console
 46 | helm_repository: https://helm.ngc.nvidia.com/nvidia
 47 | # Name of the helm chart to be deployed
 48 | gpu_operator_helm_chart: nvidia/gpu-operator
 49 | ## This is most likely GPU Operator Driver Registry
 50 | gpu_operator_driver_registry: "nvcr.io/nvidia"
 51 | 
 52 | # NGC Values
 53 | ## If using a private/protected registry. NGC API Key. Leave blank for public registries
 54 | ngc_registry_password: ""
 55 | ## This is most likely an NGC email
 56 | ngc_registry_email: ""
 57 | ngc_registry_username: "$oauthtoken"
 58 | 
 59 | # Network Operator Values
 60 | ## If the Network Operator is yes then make sure enable_rdma as well yes
 61 | enable_network_operator: no
 62 | ## Enable RDMA yes for NVIDIA Certification
 63 | enable_rdma: no
 64 | 
 65 | # Prxoy Configuration
 66 | proxy: no
 67 | http_proxy: ""
 68 | https_proxy: ""
 69 | 
 70 | # Cloud Native Stack for Developers Values
 71 | ## Enable for Cloud Native Stack Developers
 72 | cns_docker: no
 73 | ## Enable For Cloud Native Stack Developers with TRD Driver
 74 | cns_nvidia_driver: no
 75 | 
 76 | ## Kubernetes resources
 77 | k8s_apt_key: "https://pkgs.k8s.io/core:/stable:/v1.28/deb/Release.key"
 78 | k8s_gpg_key: "https://pkgs.k8s.io/core:/stable:/v1.28/rpm/repodata/repomd.xml.key"
 79 | k8s_apt_ring: "/etc/apt/keyrings/kubernetes-apt-keyring.gpg"
 80 | k8s_registry: "registry.k8s.io"
 81 | 
 82 | # Install NVIDIA NIM Operator 
 83 | enable_nim_operator: no
 84 | 
 85 | # Local Path Provisioner and NFS Provisoner as Storage option
 86 | storage: no
 87 | 
 88 | # Monitoring Stack Prometheus/Grafana with GPU Metrics and Elastic Logging stack
 89 | monitoring: no
 90 | 
 91 | # Enable Kserve on Cloud Native Stack with Istio and Cert-Manager
 92 | kserve: no
 93 | 
 94 | # Install MetalLB
 95 | loadbalancer: no
 96 | # Example input loadbalancer_ip: "10.117.20.50/32", it could be node/host IP
 97 | loadbalancer_ip: ""
 98 | 
 99 | ## Cloud Native Stack Validation
100 | cns_validation: no
101 | 
102 | # BMC Details for Confidential Computing 
103 | bmc_ip:
104 | bmc_username:
105 | bmc_password:
106 | 
107 | # CSP values
108 | ## AWS EKS values
109 | aws_region: us-east-2
110 | aws_cluster_name: cns-cluster-1
111 | aws_gpu_instance_type: g4dn.2xlarge
112 | 
113 | ## Google Cloud GKE Values
114 | #https://cloud.google.com/resource-manager/docs/creating-managing-projects#identifying_projects
115 | gke_project_id: 
116 | #https://cloud.google.com/compute/docs/regions-zones#available
117 | gke_region: us-west1
118 | gke_node_zones: ["us-west1-b"]
119 | gke_cluster_name: gke-cluster-1
120 | 
121 | ## Azure AKS Values
122 | aks_cluster_name: aks-cluster-1
123 | #https://azure.microsoft.com/en-us/explore/global-infrastructure/geographies/#geographies
124 | aks_cluster_location: "West US 2"
125 | #https://learn.microsoft.com/en-us/partner-center/marketplace/find-tenant-object-id
126 | azure_object_id: [""]


--------------------------------------------------------------------------------
/playbooks/older_versions/cns_values_11.3.yaml:
--------------------------------------------------------------------------------
  1 | cns_version: 11.3
  2 | 
  3 | ## MicroK8s cluster
  4 | microk8s: no
  5 | ## Kubernetes Install with Kubeadm 
  6 | install_k8s: yes 
  7 | 
  8 | ## Components Versions
  9 | # Container Runtime options are containerd, cri-o, cri-dockerd
 10 | container_runtime: "containerd"
 11 | containerd_version: "1.7.20"
 12 | runc_version: "1.1.13"
 13 | cni_plugins_version: "1.5.1"
 14 | containerd_max_concurrent_downloads: "5"
 15 | nvidia_container_toolkit_version: "1.16.1"
 16 | crio_version: "1.28.8"
 17 | cri_dockerd_version: "0.3.15"
 18 | k8s_version: "1.28.12"
 19 | calico_version: "3.27.4"
 20 | flannel_version: "0.25.5"
 21 | helm_version: "3.15.3"
 22 | gpu_operator_version: "24.6.1"
 23 | network_operator_version: "24.4.1"
 24 | local_path_provisioner: "0.0.26"
 25 | nfs_provisioner: "4.0.18"
 26 | metallb_version: "0.14.5"
 27 | kserve_version: "0.13"
 28 | prometheus_stack: "61.3.0"
 29 | elastic_stack: "8.14.1"
 30 | 
 31 | # GPU Operator Values
 32 | enable_gpu_operator: yes
 33 | confidential_computing: no
 34 | gpu_driver_version: "550.90.07"
 35 | use_open_kernel_module: no
 36 | enable_mig: no
 37 | mig_profile: all-disabled
 38 | mig_strategy: single
 39 | # To use GDS, use_open_kernel_module needs to be enabled
 40 | enable_gds: no
 41 | #Secure Boot for only Ubuntu
 42 | enable_secure_boot: no
 43 | enable_vgpu: no
 44 | vgpu_license_server: ""
 45 | # URL of Helm repo to be added. If using NGC get this from the fetch command in the console
 46 | helm_repository: https://helm.ngc.nvidia.com/nvidia
 47 | # Name of the helm chart to be deployed
 48 | gpu_operator_helm_chart: nvidia/gpu-operator
 49 | ## This is most likely GPU Operator Driver Registry
 50 | gpu_operator_driver_registry: "nvcr.io/nvidia"
 51 | 
 52 | # NGC Values
 53 | ## If using a private/protected registry. NGC API Key. Leave blank for public registries
 54 | ngc_registry_password: ""
 55 | ## This is most likely an NGC email
 56 | ngc_registry_email: ""
 57 | ngc_registry_username: "$oauthtoken"
 58 | 
 59 | # Network Operator Values
 60 | ## If the Network Operator is yes then make sure enable_rdma as well yes
 61 | enable_network_operator: no
 62 | ## Enable RDMA yes for NVIDIA Certification
 63 | enable_rdma: no
 64 | 
 65 | # Prxoy Configuration
 66 | proxy: no
 67 | http_proxy: ""
 68 | https_proxy: ""
 69 | 
 70 | # Cloud Native Stack for Developers Values
 71 | ## Enable for Cloud Native Stack Developers
 72 | cns_docker: no
 73 | ## Enable For Cloud Native Stack Developers with TRD Driver
 74 | cns_nvidia_driver: no
 75 | 
 76 | ## Kubernetes resources
 77 | k8s_apt_key: "https://pkgs.k8s.io/core:/stable:/v1.28/deb/Release.key"
 78 | k8s_gpg_key: "https://pkgs.k8s.io/core:/stable:/v1.28/rpm/repodata/repomd.xml.key"
 79 | k8s_apt_ring: "/etc/apt/keyrings/kubernetes-apt-keyring.gpg"
 80 | k8s_registry: "registry.k8s.io"
 81 | 
 82 | # Install NVIDIA NIM Operator 
 83 | enable_nim_operator: no
 84 | 
 85 | # Local Path Provisioner and NFS Provisoner as Storage option
 86 | storage: no
 87 | 
 88 | # Monitoring Stack Prometheus/Grafana with GPU Metrics and Elastic Logging stack
 89 | monitoring: no
 90 | 
 91 | # Enable Kserve on Cloud Native Stack with Istio and Cert-Manager
 92 | kserve: no
 93 | 
 94 | # Install MetalLB
 95 | loadbalancer: no
 96 | # Example input loadbalancer_ip: "10.117.20.50/32", it could be node/host IP
 97 | loadbalancer_ip: ""
 98 | 
 99 | ## Cloud Native Stack Validation
100 | cns_validation: no
101 | 
102 | # BMC Details for Confidential Computing 
103 | bmc_ip:
104 | bmc_username:
105 | bmc_password:
106 | 
107 | # CSP values
108 | ## AWS EKS values
109 | aws_region: us-east-2
110 | aws_cluster_name: cns-cluster-1
111 | aws_gpu_instance_type: g4dn.2xlarge
112 | 
113 | ## Google Cloud GKE Values
114 | #https://cloud.google.com/resource-manager/docs/creating-managing-projects#identifying_projects
115 | gke_project_id: 
116 | #https://cloud.google.com/compute/docs/regions-zones#available
117 | gke_region: us-west1
118 | gke_node_zones: ["us-west1-b"]
119 | gke_cluster_name: gke-cluster-1
120 | 
121 | ## Azure AKS Values
122 | aks_cluster_name: aks-cluster-1
123 | #https://azure.microsoft.com/en-us/explore/global-infrastructure/geographies/#geographies
124 | aks_cluster_location: "West US 2"
125 | #https://learn.microsoft.com/en-us/partner-center/marketplace/find-tenant-object-id
126 | azure_object_id: [""]


--------------------------------------------------------------------------------
/playbooks/older_versions/cns_values_12.0.yaml:
--------------------------------------------------------------------------------
  1 | cns_version: 12.0
  2 | 
  3 | ## MicroK8s cluster
  4 | microk8s: no
  5 | ## Kubernetes Install with Kubeadm
  6 | install_k8s: yes
  7 | 
  8 | ## Components Versions
  9 | # Container Runtime options are containerd, cri-o, cri-dockerd
 10 | container_runtime: "containerd"
 11 | containerd_version: "1.7.13"
 12 | runc_version: "1.1.12"
 13 | cni_plugins_version: "1.4.0"
 14 | containerd_max_concurrent_downloads: "5"
 15 | nvidia_container_toolkit_version: "1.16.2"
 16 | crio_version: "1.29.2"
 17 | cri_dockerd_version: "0.3.10"
 18 | k8s_version: "1.29.2"
 19 | calico_version: "3.27.0"
 20 | flannel_version: "0.24.2"
 21 | helm_version: "3.14.2"
 22 | gpu_operator_version: "24.6.2"
 23 | network_operator_version: "24.1.0"
 24 | nim_operator_version: "1.0.0"
 25 | local_path_provisioner: "0.0.26"
 26 | nfs_provisioner: "4.0.18"
 27 | metallb_version: "0.14.5"
 28 | kserve_version: "0.13"
 29 | prometheus_stack: "67.5.0"
 30 | prometheus_adapter: "4.11.0"
 31 | grafana_operator: "v5.15.1"
 32 | elastic_stack: "8.14.1"
 33 | lws_version: "0.4.0"
 34 | 
 35 | # GPU Operator Values
 36 | enable_gpu_operator: yes
 37 | confidential_computing: no
 38 | gpu_driver_version: "550.54.15"
 39 | use_open_kernel_module: no
 40 | enable_mig: no
 41 | mig_profile: all-disabled
 42 | mig_strategy: single
 43 | # To use GDS, use_open_kernel_module needs to be enabled
 44 | enable_gds: no
 45 | #Secure Boot for only Ubuntu
 46 | enable_secure_boot: no
 47 | enable_vgpu: no
 48 | vgpu_license_server: ""
 49 | # URL of Helm repo to be added. If using NGC get this from the fetch command in the console
 50 | helm_repository: https://helm.ngc.nvidia.com/nvidia
 51 | # Name of the helm chart to be deployed
 52 | gpu_operator_helm_chart: nvidia/gpu-operator
 53 | ## This is most likely GPU Operator Driver Registry
 54 | gpu_operator_driver_registry: "nvcr.io/nvidia"
 55 | 
 56 | # NGC Values
 57 | ## If using a private/protected registry. NGC API Key. Leave blank for public registries
 58 | ngc_registry_password: ""
 59 | ## This is most likely an NGC email
 60 | ngc_registry_email: ""
 61 | ngc_registry_username: "$oauthtoken"
 62 | 
 63 | # Network Operator Values
 64 | ## If the Network Operator is yes then make sure enable_rdma as well yes
 65 | enable_network_operator: no
 66 | ## Enable RDMA yes for NVIDIA Certification
 67 | enable_rdma: no
 68 | ## Enable for MLNX-OFED Driver Deployment
 69 | deploy_ofed: no
 70 | 
 71 | # Prxoy Configuration
 72 | proxy: no
 73 | http_proxy: ""
 74 | https_proxy: ""
 75 | 
 76 | # Cloud Native Stack for Developers Values
 77 | ## Enable for Cloud Native Stack Developers
 78 | cns_docker: no
 79 | ## Enable For Cloud Native Stack Developers with TRD Driver
 80 | cns_nvidia_driver: no
 81 | nvidia_driver_mig: no
 82 | 
 83 | ## Kubernetes resources
 84 | k8s_apt_key: "https://pkgs.k8s.io/core:/stable:/v1.29/deb/Release.key"
 85 | k8s_gpg_key: "https://pkgs.k8s.io/core:/stable:/v1.29/rpm/repodata/repomd.xml.key"
 86 | k8s_apt_ring: "/etc/apt/keyrings/kubernetes-apt-keyring.gpg"
 87 | k8s_registry: "registry.k8s.io"
 88 | 
 89 | # Install NVIDIA NIM Operator
 90 | enable_nim_operator: no
 91 | 
 92 | # LeaderWorkerSet https://github.com/kubernetes-sigs/lws/tree/main
 93 | lws: no
 94 | 
 95 | # Local Path Provisioner and NFS Provisoner as Storage option
 96 | storage: no
 97 | 
 98 | # Monitoring Stack Prometheus/Grafana with GPU Metrics and Elastic Logging stack
 99 | monitoring: no
100 | 
101 | # Enable Kserve on Cloud Native Stack with Istio and Cert-Manager
102 | kserve: no
103 | 
104 | # Install MetalLB
105 | loadbalancer: no
106 | # Example input loadbalancer_ip: "10.117.20.50/32", it could be node/host IP
107 | loadbalancer_ip: ""
108 | 
109 | ## Cloud Native Stack Validation
110 | cns_validation: no
111 | 
112 | # BMC Details for Confidential Computing
113 | bmc_ip:
114 | bmc_username:
115 | bmc_password:
116 | 
117 | # CSP values
118 | ## AWS EKS values
119 | aws_region: us-east-2
120 | aws_cluster_name: cns-cluster-1
121 | aws_gpu_instance_type: g4dn.2xlarge
122 | 
123 | ## Google Cloud GKE Values
124 | #https://cloud.google.com/resource-manager/docs/creating-managing-projects#identifying_projects
125 | gke_project_id:
126 | #https://cloud.google.com/compute/docs/regions-zones#available
127 | gke_region: us-west1
128 | gke_node_zones: ["us-west1-b"]
129 | gke_cluster_name: gke-cluster-1
130 | 
131 | ## Azure AKS Values
132 | aks_cluster_name: aks-cluster-1
133 | #https://azure.microsoft.com/en-us/explore/global-infrastructure/geographies/#geographies
134 | aks_cluster_location: "West US 2"
135 | #https://learn.microsoft.com/en-us/partner-center/marketplace/find-tenant-object-id
136 | azure_object_id: [""]


--------------------------------------------------------------------------------
/playbooks/older_versions/cns_values_12.1.yaml:
--------------------------------------------------------------------------------
  1 | cns_version: 12.1
  2 | 
  3 | ## MicroK8s cluster
  4 | microk8s: no
  5 | ## Kubernetes Install with Kubeadm
  6 | install_k8s: yes
  7 | 
  8 | ## Components Versions
  9 | # Container Runtime options are containerd, cri-o, cri-dockerd
 10 | container_runtime: "containerd"
 11 | containerd_version: "1.7.16"
 12 | runc_version: "1.1.12"
 13 | cni_plugins_version: "1.4.1"
 14 | containerd_max_concurrent_downloads: "5"
 15 | nvidia_container_toolkit_version: "1.16.2"
 16 | crio_version: "1.29.4"
 17 | cri_dockerd_version: "0.3.13"
 18 | k8s_version: "1.29.4"
 19 | calico_version: "3.27.3"
 20 | flannel_version: "0.25.1"
 21 | helm_version: "3.14.4"
 22 | gpu_operator_version: "24.6.2"
 23 | network_operator_version: "24.1.1"
 24 | nim_operator_version: "1.0.0"
 25 | local_path_provisioner: "0.0.26"
 26 | nfs_provisioner: "4.0.18"
 27 | metallb_version: "0.14.5"
 28 | kserve_version: "0.13"
 29 | prometheus_stack: "67.5.0"
 30 | prometheus_adapter: "4.11.0"
 31 | grafana_operator: "v5.15.1"
 32 | elastic_stack: "8.14.1"
 33 | lws_version: "0.4.0"
 34 | 
 35 | # GPU Operator Values
 36 | enable_gpu_operator: yes
 37 | confidential_computing: no
 38 | gpu_driver_version: "550.54.15"
 39 | use_open_kernel_module: no
 40 | enable_mig: no
 41 | mig_profile: all-disabled
 42 | mig_strategy: single
 43 | # To use GDS, use_open_kernel_module needs to be enabled
 44 | enable_gds: no
 45 | #Secure Boot for only Ubuntu
 46 | enable_secure_boot: no
 47 | enable_vgpu: no
 48 | vgpu_license_server: ""
 49 | # URL of Helm repo to be added. If using NGC get this from the fetch command in the console
 50 | helm_repository: https://helm.ngc.nvidia.com/nvidia
 51 | # Name of the helm chart to be deployed
 52 | gpu_operator_helm_chart: nvidia/gpu-operator
 53 | ## This is most likely GPU Operator Driver Registry
 54 | gpu_operator_driver_registry: "nvcr.io/nvidia"
 55 | 
 56 | # NGC Values
 57 | ## If using a private/protected registry. NGC API Key. Leave blank for public registries
 58 | ngc_registry_password: ""
 59 | ## This is most likely an NGC email
 60 | ngc_registry_email: ""
 61 | ngc_registry_username: "$oauthtoken"
 62 | 
 63 | # Network Operator Values
 64 | ## If the Network Operator is yes then make sure enable_rdma as well yes
 65 | enable_network_operator: no
 66 | ## Enable RDMA yes for NVIDIA Certification
 67 | enable_rdma: no
 68 | ## Enable for MLNX-OFED Driver Deployment
 69 | deploy_ofed: no
 70 | 
 71 | # Prxoy Configuration
 72 | proxy: no
 73 | http_proxy: ""
 74 | https_proxy: ""
 75 | 
 76 | # Cloud Native Stack for Developers Values
 77 | ## Enable for Cloud Native Stack Developers
 78 | cns_docker: no
 79 | ## Enable For Cloud Native Stack Developers with TRD Driver
 80 | cns_nvidia_driver: no
 81 | nvidia_driver_mig: no
 82 | 
 83 | ## Kubernetes resources
 84 | k8s_apt_key: "https://pkgs.k8s.io/core:/stable:/v1.29/deb/Release.key"
 85 | k8s_gpg_key: "https://pkgs.k8s.io/core:/stable:/v1.29/rpm/repodata/repomd.xml.key"
 86 | k8s_apt_ring: "/etc/apt/keyrings/kubernetes-apt-keyring.gpg"
 87 | k8s_registry: "registry.k8s.io"
 88 | 
 89 | # Install NVIDIA NIM Operator
 90 | enable_nim_operator: no
 91 | 
 92 | # LeaderWorkerSet https://github.com/kubernetes-sigs/lws/tree/main
 93 | lws: no
 94 | 
 95 | # Local Path Provisioner and NFS Provisoner as Storage option
 96 | storage: no
 97 | 
 98 | # Monitoring Stack Prometheus/Grafana with GPU Metrics and Elastic Logging stack
 99 | monitoring: no
100 | 
101 | # Enable Kserve on Cloud Native Stack with Istio and Cert-Manager
102 | kserve: no
103 | 
104 | # Install MetalLB
105 | loadbalancer: no
106 | # Example input loadbalancer_ip: "10.117.20.50/32", it could be node/host IP
107 | loadbalancer_ip: ""
108 | 
109 | ## Cloud Native Stack Validation
110 | cns_validation: no
111 | 
112 | # BMC Details for Confidential Computing
113 | bmc_ip:
114 | bmc_username:
115 | bmc_password:
116 | 
117 | # CSP values
118 | ## AWS EKS values
119 | aws_region: us-east-2
120 | aws_cluster_name: cns-cluster-1
121 | aws_gpu_instance_type: g4dn.2xlarge
122 | 
123 | ## Google Cloud GKE Values
124 | #https://cloud.google.com/resource-manager/docs/creating-managing-projects#identifying_projects
125 | gke_project_id:
126 | #https://cloud.google.com/compute/docs/regions-zones#available
127 | gke_region: us-west1
128 | gke_node_zones: ["us-west1-b"]
129 | gke_cluster_name: gke-cluster-1
130 | 
131 | ## Azure AKS Values
132 | aks_cluster_name: aks-cluster-1
133 | #https://azure.microsoft.com/en-us/explore/global-infrastructure/geographies/#geographies
134 | aks_cluster_location: "West US 2"
135 | #https://learn.microsoft.com/en-us/partner-center/marketplace/find-tenant-object-id
136 | azure_object_id: [""]


--------------------------------------------------------------------------------
/playbooks/older_versions/cns_values_12.2.yaml:
--------------------------------------------------------------------------------
  1 | cns_version: 12.2
  2 | 
  3 | ## MicroK8s cluster
  4 | microk8s: no
  5 | ## Kubernetes Install with Kubeadm
  6 | install_k8s: yes
  7 | 
  8 | ## Components Versions
  9 | # Container Runtime options are containerd, cri-o, cri-dockerd
 10 | container_runtime: "containerd"
 11 | containerd_version: "1.7.20"
 12 | runc_version: "1.1.13"
 13 | cni_plugins_version: "1.5.1"
 14 | containerd_max_concurrent_downloads: "5"
 15 | nvidia_container_toolkit_version: "1.16.2"
 16 | crio_version: "1.29.6"
 17 | cri_dockerd_version: "0.3.15"
 18 | k8s_version: "1.29.6"
 19 | calico_version: "3.27.4"
 20 | flannel_version: "0.25.5"
 21 | helm_version: "3.15.3"
 22 | gpu_operator_version: "24.6.2"
 23 | network_operator_version: "24.4.1"
 24 | nim_operator_version: "1.0.0"
 25 | local_path_provisioner: "0.0.26"
 26 | nfs_provisioner: "4.0.18"
 27 | metallb_version: "0.14.5"
 28 | kserve_version: "0.13"
 29 | prometheus_stack: "67.5.0"
 30 | prometheus_adapter: "4.11.0"
 31 | grafana_operator: "v5.15.1"
 32 | elastic_stack: "8.14.1"
 33 | lws_version: "0.4.0"
 34 | 
 35 | # GPU Operator Values
 36 | enable_gpu_operator: yes
 37 | confidential_computing: no
 38 | gpu_driver_version: "550.90.07"
 39 | use_open_kernel_module: no
 40 | enable_mig: no
 41 | mig_profile: all-disabled
 42 | mig_strategy: single
 43 | # To use GDS, use_open_kernel_module needs to be enabled
 44 | enable_gds: no
 45 | #Secure Boot for only Ubuntu
 46 | enable_secure_boot: no
 47 | enable_vgpu: no
 48 | vgpu_license_server: ""
 49 | # URL of Helm repo to be added. If using NGC get this from the fetch command in the console
 50 | helm_repository: https://helm.ngc.nvidia.com/nvidia
 51 | # Name of the helm chart to be deployed
 52 | gpu_operator_helm_chart: nvidia/gpu-operator
 53 | ## This is most likely GPU Operator Driver Registry
 54 | gpu_operator_driver_registry: "nvcr.io/nvidia"
 55 | 
 56 | # NGC Values
 57 | ## If using a private/protected registry. NGC API Key. Leave blank for public registries
 58 | ngc_registry_password: ""
 59 | ## This is most likely an NGC email
 60 | ngc_registry_email: ""
 61 | ngc_registry_username: "$oauthtoken"
 62 | 
 63 | # Network Operator Values
 64 | ## If the Network Operator is yes then make sure enable_rdma as well yes
 65 | enable_network_operator: no
 66 | ## Enable RDMA yes for NVIDIA Certification
 67 | enable_rdma: no
 68 | ## Enable for MLNX-OFED Driver Deployment
 69 | deploy_ofed: no
 70 | 
 71 | # Prxoy Configuration
 72 | proxy: no
 73 | http_proxy: ""
 74 | https_proxy: ""
 75 | 
 76 | # Cloud Native Stack for Developers Values
 77 | ## Enable for Cloud Native Stack Developers
 78 | cns_docker: no
 79 | ## Enable For Cloud Native Stack Developers with TRD Driver
 80 | cns_nvidia_driver: no
 81 | nvidia_driver_mig: no
 82 | 
 83 | ## Kubernetes resources
 84 | k8s_apt_key: "https://pkgs.k8s.io/core:/stable:/v1.29/deb/Release.key"
 85 | k8s_gpg_key: "https://pkgs.k8s.io/core:/stable:/v1.29/rpm/repodata/repomd.xml.key"
 86 | k8s_apt_ring: "/etc/apt/keyrings/kubernetes-apt-keyring.gpg"
 87 | k8s_registry: "registry.k8s.io"
 88 | 
 89 | # Install NVIDIA NIM Operator
 90 | enable_nim_operator: no
 91 | 
 92 | # LeaderWorkerSet https://github.com/kubernetes-sigs/lws/tree/main
 93 | lws: no
 94 | 
 95 | # Local Path Provisioner and NFS Provisoner as Storage option
 96 | storage: no
 97 | 
 98 | # Monitoring Stack Prometheus/Grafana with GPU Metrics and Elastic Logging stack
 99 | monitoring: no
100 | 
101 | # Enable Kserve on Cloud Native Stack with Istio and Cert-Manager
102 | kserve: no
103 | 
104 | # Install MetalLB
105 | loadbalancer: no
106 | # Example input loadbalancer_ip: "10.117.20.50/32", it could be node/host IP
107 | loadbalancer_ip: ""
108 | 
109 | ## Cloud Native Stack Validation
110 | cns_validation: no
111 | 
112 | # BMC Details for Confidential Computing
113 | bmc_ip:
114 | bmc_username:
115 | bmc_password:
116 | 
117 | # CSP values
118 | ## AWS EKS values
119 | aws_region: us-east-2
120 | aws_cluster_name: cns-cluster-1
121 | aws_gpu_instance_type: g4dn.2xlarge
122 | 
123 | ## Google Cloud GKE Values
124 | #https://cloud.google.com/resource-manager/docs/creating-managing-projects#identifying_projects
125 | gke_project_id:
126 | #https://cloud.google.com/compute/docs/regions-zones#available
127 | gke_region: us-west1
128 | gke_node_zones: ["us-west1-b"]
129 | gke_cluster_name: gke-cluster-1
130 | 
131 | ## Azure AKS Values
132 | aks_cluster_name: aks-cluster-1
133 | #https://azure.microsoft.com/en-us/explore/global-infrastructure/geographies/#geographies
134 | aks_cluster_location: "West US 2"
135 | #https://learn.microsoft.com/en-us/partner-center/marketplace/find-tenant-object-id
136 | azure_object_id: [""]


--------------------------------------------------------------------------------
/playbooks/older_versions/cns_values_12.3.yaml:
--------------------------------------------------------------------------------
  1 | cns_version: 12.3
  2 | 
  3 | ## MicroK8s cluster
  4 | microk8s: no
  5 | ## Kubernetes Install with Kubeadm
  6 | install_k8s: yes
  7 | 
  8 | ## Components Versions
  9 | # Container Runtime options are containerd, cri-o, cri-dockerd
 10 | container_runtime: "containerd"
 11 | containerd_version: "1.7.23"
 12 | runc_version: "1.1.14"
 13 | cni_plugins_version: "1.5.1"
 14 | containerd_max_concurrent_downloads: "5"
 15 | nvidia_container_toolkit_version: "1.17.4"
 16 | crio_version: "1.29.10"
 17 | cri_dockerd_version: "0.3.15"
 18 | k8s_version: "1.29.10"
 19 | calico_version: "3.28.2"
 20 | flannel_version: "0.25.6"
 21 | helm_version: "3.16.2"
 22 | gpu_operator_version: "24.9.2"
 23 | network_operator_version: "24.10.1"
 24 | nim_operator_version: "1.0.0"
 25 | local_path_provisioner: "0.0.30"
 26 | nfs_provisioner: "4.0.18"
 27 | metallb_version: "0.14.8"
 28 | kserve_version: "0.14"
 29 | prometheus_stack: "67.5.0"
 30 | prometheus_adapter: "4.11.0"
 31 | grafana_operator: "v5.15.1"
 32 | elastic_stack: "8.15.3"
 33 | lws_version: "0.4.0"
 34 | 
 35 | # GPU Operator Values
 36 | enable_gpu_operator: yes
 37 | confidential_computing: no
 38 | gpu_driver_version: "570.86.15"
 39 | use_open_kernel_module: no
 40 | enable_mig: no
 41 | mig_profile: all-disabled
 42 | mig_strategy: single
 43 | # To use GDS, use_open_kernel_module needs to be enabled
 44 | enable_gds: no
 45 | #Secure Boot for only Ubuntu
 46 | enable_secure_boot: no
 47 | enable_vgpu: no
 48 | vgpu_license_server: ""
 49 | # URL of Helm repo to be added. If using NGC get this from the fetch command in the console
 50 | helm_repository: https://helm.ngc.nvidia.com/nvidia
 51 | # Name of the helm chart to be deployed
 52 | gpu_operator_helm_chart: nvidia/gpu-operator
 53 | ## This is most likely GPU Operator Driver Registry
 54 | gpu_operator_driver_registry: "nvcr.io/nvidia"
 55 | 
 56 | # NGC Values
 57 | ## If using a private/protected registry. NGC API Key. Leave blank for public registries
 58 | ngc_registry_password: ""
 59 | ## This is most likely an NGC email
 60 | ngc_registry_email: ""
 61 | ngc_registry_username: "$oauthtoken"
 62 | 
 63 | # Network Operator Values
 64 | ## If the Network Operator is yes then make sure enable_rdma as well yes
 65 | enable_network_operator: no
 66 | ## Enable RDMA yes for NVIDIA Certification
 67 | enable_rdma: no
 68 | ## Enable for MLNX-OFED Driver Deployment
 69 | deploy_ofed: no
 70 | 
 71 | # Prxoy Configuration
 72 | proxy: no
 73 | http_proxy: ""
 74 | https_proxy: ""
 75 | 
 76 | # Cloud Native Stack for Developers Values
 77 | ## Enable for Cloud Native Stack Developers
 78 | cns_docker: no
 79 | ## Enable For Cloud Native Stack Developers with TRD Driver
 80 | cns_nvidia_driver: no
 81 | nvidia_driver_mig: no
 82 | 
 83 | ## Kubernetes resources
 84 | k8s_apt_key: "https://pkgs.k8s.io/core:/stable:/v1.29/deb/Release.key"
 85 | k8s_gpg_key: "https://pkgs.k8s.io/core:/stable:/v1.29/rpm/repodata/repomd.xml.key"
 86 | k8s_apt_ring: "/etc/apt/keyrings/kubernetes-apt-keyring.gpg"
 87 | k8s_registry: "registry.k8s.io"
 88 | 
 89 | # Install NVIDIA NIM Operator
 90 | enable_nim_operator: no
 91 | 
 92 | # LeaderWorkerSet https://github.com/kubernetes-sigs/lws/tree/main
 93 | lws: no
 94 | 
 95 | # Local Path Provisioner and NFS Provisoner as Storage option
 96 | storage: no
 97 | 
 98 | # Monitoring Stack Prometheus/Grafana with GPU Metrics and Elastic Logging stack
 99 | monitoring: no
100 | 
101 | # Enable Kserve on Cloud Native Stack with Istio and Cert-Manager
102 | kserve: no
103 | 
104 | # Install MetalLB
105 | loadbalancer: no
106 | # Example input loadbalancer_ip: "10.117.20.50/32", it could be node/host IP
107 | loadbalancer_ip: ""
108 | 
109 | ## Cloud Native Stack Validation
110 | cns_validation: no
111 | 
112 | # BMC Details for Confidential Computing
113 | bmc_ip:
114 | bmc_username:
115 | bmc_password:
116 | 
117 | # CSP values
118 | ## AWS EKS values
119 | aws_region: us-east-2
120 | aws_cluster_name: cns-cluster-1
121 | aws_gpu_instance_type: g4dn.2xlarge
122 | 
123 | ## Google Cloud GKE Values
124 | #https://cloud.google.com/resource-manager/docs/creating-managing-projects#identifying_projects
125 | gke_project_id:
126 | #https://cloud.google.com/compute/docs/regions-zones#available
127 | gke_region: us-west1
128 | gke_node_zones: ["us-west1-b"]
129 | gke_cluster_name: gke-cluster-1
130 | 
131 | ## Azure AKS Values
132 | aks_cluster_name: aks-cluster-1
133 | #https://azure.microsoft.com/en-us/explore/global-infrastructure/geographies/#geographies
134 | aks_cluster_location: "West US 2"
135 | #https://learn.microsoft.com/en-us/partner-center/marketplace/find-tenant-object-id
136 | azure_object_id: [""]


--------------------------------------------------------------------------------
/playbooks/older_versions/cns_values_6.4.yaml:
--------------------------------------------------------------------------------
  1 | cns_version: 6.4
  2 | 
  3 | ## MicroK8s cluster
  4 | microk8s: no
  5 | ## Kubernetes Install with Kubeadm 
  6 | install_k8s: yes 
  7 | 
  8 | ## Components Versions
  9 | # Container Runtime options are containerd, cri-o, cri-dockerd
 10 | container_runtime: "containerd"
 11 | containerd_version: "1.7.3"
 12 | runc_version: "1.1.11"
 13 | cni_plugins_version: "1.4.0"
 14 | containerd_max_concurrent_downloads: "5"
 15 | crio_version: "1.26.4"
 16 | cri_dockerd_version: "0.3.4"
 17 | k8s_version: "1.26.7"
 18 | calico_version: "3.26.1"
 19 | flannel_version: "0.22.0"
 20 | helm_version: "3.12.2"
 21 | gpu_operator_version: "23.6.0"
 22 | network_operator_version: "23.5.0"
 23 | local_path_provisioner: "0.0.24"
 24 | nfs_provisioner: "4.0.18"
 25 | metallb_version: "0.14.5"
 26 | kserve_version: "0.13"
 27 | prometheus_stack: "61.3.0"
 28 | elastic_stack: "8.14.1"
 29 | 
 30 | # GPU Operator Values
 31 | enable_gpu_operator: yes
 32 | confidential_computing: no
 33 | gpu_driver_version: "535.86.10"
 34 | use_open_kernel_module: no
 35 | enable_mig: no
 36 | mig_profile: all-disabled
 37 | mig_strategy: single
 38 | enable_gds: no
 39 | enable_secure_boot: no
 40 | enable_vgpu: no
 41 | vgpu_license_server: ""
 42 | # URL of Helm repo to be added. If using NGC get this from the fetch command in the console
 43 | helm_repository: https://helm.ngc.nvidia.com/nvidia
 44 | # Name of the helm chart to be deployed
 45 | gpu_operator_helm_chart: nvidia/gpu-operator
 46 | ## This is most likely GPU Operator Driver Registry
 47 | gpu_operator_driver_registry: "nvcr.io/nvidia"
 48 | 
 49 | # NGC Values
 50 | ## If using a private/protected registry. NGC API Key. Leave blank for public registries
 51 | ngc_registry_password: ""
 52 | ## This is most likely an NGC email
 53 | ngc_registry_email: ""
 54 | ngc_registry_username: "$oauthtoken"
 55 | 
 56 | # Network Operator Values
 57 | ## If the Network Operator is yes then make sure enable_rdma as well yes
 58 | enable_network_operator: no
 59 | ## Enable RDMA yes for NVIDIA Certification
 60 | enable_rdma: no
 61 | 
 62 | # Prxoy Configuration
 63 | proxy: no
 64 | http_proxy: ""
 65 | https_proxy: ""
 66 | 
 67 | # Cloud Native Stack for Developers Values
 68 | ## Enable for Cloud Native Stack Developers
 69 | cns_docker: no
 70 | ## Enable For Cloud Native Stack Developers with TRD Driver
 71 | cns_nvidia_driver: no
 72 | 
 73 | ## Kubernetes resources
 74 | k8s_apt_key: "https://pkgs.k8s.io/core:/stable:/v1.26/deb/Release.key"
 75 | k8s_gpg_key: "https://pkgs.k8s.io/core:/stable:/v1.26/rpm/repodata/repomd.xml.key"
 76 | k8s_apt_ring: "/etc/apt/keyrings/kubernetes-apt-keyring.gpg"
 77 | k8s_registry: "registry.k8s.io"
 78 | 
 79 | # Install NVIDIA NIM Operator 
 80 | enable_nim_operator: no
 81 | 
 82 | # Local Path Provisioner and NFS Provisoner as Storage option
 83 | storage: no
 84 | 
 85 | # Monitoring Stack Prometheus/Grafana with GPU Metrics and Elastic Logging stack
 86 | monitoring: no
 87 | 
 88 | # Enable Kserve on Cloud Native Stack with Istio and Cert-Manager
 89 | kserve: no
 90 | 
 91 | # Install MetalLB
 92 | loadbalancer: no
 93 | # Example input loadbalancer_ip: "10.117.20.50/32", it could be node/host IP
 94 | loadbalancer_ip: ""
 95 | 
 96 | ## Cloud Native Stack Validation
 97 | cns_validation: no
 98 | 
 99 | # BMC Details for Confidential Computing 
100 | bmc_ip:
101 | bmc_username:
102 | bmc_password:
103 | 
104 | # CSP values
105 | ## AWS EKS values
106 | aws_region: us-east-2
107 | aws_cluster_name: cns-cluster-1
108 | aws_gpu_instance_type: g4dn.2xlarge
109 | 
110 | ## Google Cloud GKE Values
111 | #https://cloud.google.com/resource-manager/docs/creating-managing-projects#identifying_projects
112 | gke_project_id: 
113 | #https://cloud.google.com/compute/docs/regions-zones#available
114 | gke_region: us-west1
115 | gke_node_zones: ["us-west1-b"]
116 | gke_cluster_name: gke-cluster-1
117 | 
118 | ## Azure AKS Values
119 | aks_cluster_name: aks-cluster-1
120 | #https://azure.microsoft.com/en-us/explore/global-infrastructure/geographies/#geographies
121 | aks_cluster_location: "West US 2"
122 | #https://learn.microsoft.com/en-us/partner-center/marketplace/find-tenant-object-id
123 | azure_object_id: [""]


--------------------------------------------------------------------------------
/playbooks/older_versions/cns_values_8.0.yaml:
--------------------------------------------------------------------------------
 1 | cns_version: 8.0
 2 | 
 3 | ## Components Versions
 4 | # Container Runtime options are containerd, cri-o, cri-dockerd
 5 | container_runtime: "containerd"
 6 | containerd_version: "1.6.8"
 7 | crio_version: "1.25.3"
 8 | cri_dockerd_version: "0.3.4"
 9 | k8s_version: "1.25.2"
10 | calico_version: "3.24.1"
11 | flannel_version: "0.19.2"
12 | helm_version: "3.10.0"
13 | gpu_operator_version: "22.9.0"
14 | network_operator_version: "1.3.0"
15 | local_path_provisioner: "0.0.24"
16 | 
17 | # GPU Operator Values
18 | enable_gpu_operator: yes
19 | confidential_computing: no
20 | gpu_driver_version: "520.61.07"
21 | enable_mig: no
22 | mig_profile: all-disabled
23 | mig_strategy: single
24 | enable_gds: no
25 | enable_secure_boot: no
26 | enable_vgpu: no
27 | vgpu_license_server: ""
28 | # URL of Helm repo to be added. If using NGC get this from the fetch command in the console
29 | helm_repository: https://helm.ngc.nvidia.com/nvidia
30 | # Name of the helm chart to be deployed
31 | gpu_operator_helm_chart: nvidia/gpu-operator
32 | ## If using a private/protected registry. NGC API Key. Leave blank for public registries
33 | gpu_operator_registry_password: ""
34 | ## This is most likely an NGC email
35 | gpu_operator_registry_email: ""
36 | ## This is most likely GPU Operator Driver Registry
37 | gpu_operator_driver_registry: "nvcr.io/nvidia"
38 | gpu_operator_registry_username: "$oauthtoken"
39 | 
40 | # Network Operator Values
41 | ## If the Network Operator is yes then make sure enable_rdma as well yes
42 | enable_network_operator: no
43 | ## Enable RDMA yes for NVIDIA Certification
44 | enable_rdma: no
45 | 
46 | # Prxoy Configuration
47 | proxy: no
48 | http_proxy: ""
49 | https_proxy: ""
50 | 
51 | # Cloud Native Stack for Developers Values
52 | ## Enable for Cloud Native Stack Developers
53 | cns_docker: no
54 | ## Enable For Cloud Native Stack Developers with TRD Driver
55 | cns_nvidia_driver: no
56 | 
57 | ## Kubernetes resources
58 | k8s_apt_key: "https://packages.cloud.google.com/apt/doc/apt-key.gpg"
59 | k8s_apt_repository: " https://apt.kubernetes.io/ kubernetes-xenial main"
60 | k8s_apt_ring: "/etc/apt/keyrings/kubernetes-archive-keyring.gpg"
61 | k8s_registry: "registry.k8s.io"
62 | 
63 | # Local Path Provisioner as Storage option
64 | storage: no
65 | 
66 | # Monitoring Stack Prometheus/Grafana with GPU Metrics
67 | monitoring: no
68 | 
69 | ## Cloud Native Stack Validation
70 | cns_validation: no
71 | 
72 | # BMC Details for Confidential Computing 
73 | bmc_ip:
74 | bmc_username:
75 | bmc_password:
76 | 
77 | # CSP values
78 | ## AWS EKS values
79 | aws_region: us-east-2
80 | aws_cluster_name: eks-cluster-1
81 | aws_gpu_instance_type: g4dn.2xlarge
82 | 
83 | ## Google Cloud GKE Values
84 | #https://cloud.google.com/resource-manager/docs/creating-managing-projects#identifying_projects
85 | gke_project_id: 
86 | #https://cloud.google.com/compute/docs/regions-zones#available
87 | gke_region: us-west1
88 | gke_node_zones: ["us-west1-b"]
89 | gke_cluster_name: gke-cluster-1
90 | 
91 | ## Azure AKS Values
92 | aks_cluster_name: aks-cluster-1
93 | #https://azure.microsoft.com/en-us/explore/global-infrastructure/geographies/#geographies
94 | aks_cluster_location: "West US 2"
95 | #https://learn.microsoft.com/en-us/partner-center/marketplace/find-tenant-object-id
96 | azure_object_id: [""]


--------------------------------------------------------------------------------
/playbooks/older_versions/cns_values_8.1.yaml:
--------------------------------------------------------------------------------
 1 | cns_version: 8.1
 2 | 
 3 | ## Components Versions
 4 | # Container Runtime options are containerd, cri-o, cri-dockerd
 5 | container_runtime: "containerd"
 6 | containerd_version: "1.6.10"
 7 | crio_version: "1.25.3"
 8 | cri_dockerd_version: "0.3.4"
 9 | k8s_version: "1.25.4"
10 | calico_version: "3.24.1"
11 | flannel_version: "0.20.0"
12 | helm_version: "3.10.2"
13 | gpu_operator_version: "22.9.1"
14 | network_operator_version: "1.4.0"
15 | local_path_provisioner: "0.0.24"
16 | 
17 | # GPU Operator Values
18 | enable_gpu_operator: yes
19 | confidential_computing: no
20 | gpu_driver_version: "525.85.12"
21 | enable_mig: no
22 | mig_profile: all-disabled
23 | mig_strategy: single
24 | enable_gds: no
25 | enable_secure_boot: no
26 | enable_vgpu: no
27 | vgpu_license_server: ""
28 | # URL of Helm repo to be added. If using NGC get this from the fetch command in the console
29 | helm_repository: https://helm.ngc.nvidia.com/nvidia
30 | # Name of the helm chart to be deployed
31 | gpu_operator_helm_chart: nvidia/gpu-operator
32 | ## If using a private/protected registry. NGC API Key. Leave blank for public registries
33 | gpu_operator_registry_password: ""
34 | ## This is most likely an NGC email
35 | gpu_operator_registry_email: ""
36 | ## This is most likely GPU Operator Driver Registry
37 | gpu_operator_driver_registry: "nvcr.io/nvidia"
38 | gpu_operator_registry_username: "$oauthtoken"
39 | 
40 | # Network Operator Values
41 | ## If the Network Operator is yes then make sure enable_rdma as well yes
42 | enable_network_operator: no
43 | ## Enable RDMA yes for NVIDIA Certification
44 | enable_rdma: no
45 | 
46 | # Prxoy Configuration
47 | proxy: no
48 | http_proxy: ""
49 | https_proxy: ""
50 | 
51 | # Cloud Native Stack for Developers Values
52 | ## Enable for Cloud Native Stack Developers
53 | cns_docker: no
54 | ## Enable For Cloud Native Stack Developers with TRD Driver
55 | cns_nvidia_driver: no
56 | 
57 | ## Kubernetes resources
58 | k8s_apt_key: "https://packages.cloud.google.com/apt/doc/apt-key.gpg"
59 | k8s_apt_repository: " https://apt.kubernetes.io/ kubernetes-xenial main"
60 | k8s_apt_ring: "/etc/apt/keyrings/kubernetes-archive-keyring.gpg"
61 | k8s_registry: "registry.k8s.io"
62 | 
63 | # Local Path Provisioner as Storage option
64 | storage: no
65 | 
66 | # Monitoring Stack Prometheus/Grafana with GPU Metrics
67 | monitoring: no
68 | 
69 | ## Cloud Native Stack Validation
70 | cns_validation: no
71 | 
72 | # BMC Details for Confidential Computing 
73 | bmc_ip:
74 | bmc_username:
75 | bmc_password:
76 | 
77 | # CSP values
78 | ## AWS EKS values
79 | aws_region: us-east-2
80 | aws_cluster_name: cns-cluster-1
81 | aws_gpu_instance_type: g4dn.2xlarge
82 | 
83 | ## Google Cloud GKE Values
84 | #https://cloud.google.com/resource-manager/docs/creating-managing-projects#identifying_projects
85 | gke_project_id: 
86 | #https://cloud.google.com/compute/docs/regions-zones#available
87 | gke_region: us-west1
88 | gke_node_zones: ["us-west1-b"]
89 | gke_cluster_name: gke-cluster-1
90 | 
91 | ## Azure AKS Values
92 | aks_cluster_name: aks-cluster-1
93 | #https://azure.microsoft.com/en-us/explore/global-infrastructure/geographies/#geographies
94 | aks_cluster_location: "West US 2"
95 | #https://learn.microsoft.com/en-us/partner-center/marketplace/find-tenant-object-id
96 | azure_object_id: [""]


--------------------------------------------------------------------------------
/playbooks/older_versions/cns_values_8.2.yaml:
--------------------------------------------------------------------------------
 1 | cns_version: 8.2
 2 | 
 3 | ## Components Versions
 4 | # Container Runtime options are containerd, cri-o, cri-dockerd
 5 | container_runtime: "containerd"
 6 | containerd_version: "1.6.16"
 7 | crio_version: "1.25.3"
 8 | cri_dockerd_version: "0.3.4"
 9 | k8s_version: "1.25.6"
10 | calico_version: "3.25.0"
11 | flannel_version: "0.20.2"
12 | helm_version: "3.11.0"
13 | gpu_operator_version: "22.9.2"
14 | network_operator_version: "1.4.0"
15 | local_path_provisioner: "0.0.24"
16 | 
17 | # GPU Operator Values
18 | enable_gpu_operator: yes
19 | confidential_computing: no
20 | gpu_driver_version: "525.85.12"
21 | enable_mig: no
22 | mig_profile: all-disabled
23 | mig_strategy: single
24 | enable_gds: no
25 | enable_secure_boot: no
26 | enable_vgpu: no
27 | vgpu_license_server: ""
28 | # URL of Helm repo to be added. If using NGC get this from the fetch command in the console
29 | helm_repository: https://helm.ngc.nvidia.com/nvidia
30 | # Name of the helm chart to be deployed
31 | gpu_operator_helm_chart: nvidia/gpu-operator
32 | ## If using a private/protected registry. NGC API Key. Leave blank for public registries
33 | gpu_operator_registry_password: ""
34 | ## This is most likely an NGC email
35 | gpu_operator_registry_email: ""
36 | ## This is most likely GPU Operator Driver Registry
37 | gpu_operator_driver_registry: "nvcr.io/nvidia"
38 | gpu_operator_registry_username: "$oauthtoken"
39 | 
40 | # Network Operator Values
41 | ## If the Network Operator is yes then make sure enable_rdma as well yes
42 | enable_network_operator: no
43 | ## Enable RDMA yes for NVIDIA Certification
44 | enable_rdma: no
45 | 
46 | # Prxoy Configuration
47 | proxy: no
48 | http_proxy: ""
49 | https_proxy: ""
50 | 
51 | # Cloud Native Stack for Developers Values
52 | ## Enable for Cloud Native Stack Developers
53 | cns_docker: no
54 | ## Enable For Cloud Native Stack Developers with TRD Driver
55 | cns_nvidia_driver: no
56 | 
57 | ## Kubernetes resources
58 | k8s_apt_key: "https://packages.cloud.google.com/apt/doc/apt-key.gpg"
59 | k8s_apt_repository: " https://apt.kubernetes.io/ kubernetes-xenial main"
60 | k8s_apt_ring: "/etc/apt/keyrings/kubernetes-archive-keyring.gpg"
61 | k8s_registry: "registry.k8s.io"
62 | 
63 | # Local Path Provisioner as Storage option
64 | storage: no
65 | 
66 | # Monitoring Stack Prometheus/Grafana with GPU Metrics
67 | monitoring: no
68 | 
69 | ## Cloud Native Stack Validation
70 | cns_validation: no
71 | 
72 | # BMC Details for Confidential Computing 
73 | bmc_ip:
74 | bmc_username:
75 | bmc_password:
76 | 
77 | # CSP values
78 | ## AWS EKS values
79 | aws_region: us-east-2
80 | aws_cluster_name: cns-cluster-1
81 | aws_gpu_instance_type: g4dn.2xlarge
82 | 
83 | ## Google Cloud GKE Values
84 | #https://cloud.google.com/resource-manager/docs/creating-managing-projects#identifying_projects
85 | gke_project_id: 
86 | #https://cloud.google.com/compute/docs/regions-zones#available
87 | gke_region: us-west1
88 | gke_node_zones: ["us-west1-b"]
89 | gke_cluster_name: gke-cluster-1
90 | 
91 | ## Azure AKS Values
92 | aks_cluster_name: aks-cluster-1
93 | #https://azure.microsoft.com/en-us/explore/global-infrastructure/geographies/#geographies
94 | aks_cluster_location: "West US 2"
95 | #https://learn.microsoft.com/en-us/partner-center/marketplace/find-tenant-object-id
96 | azure_object_id: [""]


--------------------------------------------------------------------------------
/playbooks/older_versions/cns_values_8.3.yaml:
--------------------------------------------------------------------------------
 1 | cns_version: 8.3
 2 | 
 3 | ## Components Versions
 4 | # Container Runtime options are containerd, cri-o, cri-dockerd
 5 | container_runtime: "containerd"
 6 | containerd_version: "1.7.0"
 7 | crio_version: "1.25.3"
 8 | cri_dockerd_version: "0.3.4"
 9 | k8s_version: "1.25.8"
10 | calico_version: "3.25.1"
11 | flannel_version: "0.21.4"
12 | helm_version: "3.11.3"
13 | gpu_operator_version: "23.3.2"
14 | network_operator_version: "23.4.0"
15 | local_path_provisioner: "0.0.24"
16 | 
17 | # GPU Operator Values
18 | enable_gpu_operator: yes
19 | confidential_computing: no
20 | gpu_driver_version: "525.105.17"
21 | enable_mig: no
22 | mig_profile: all-disabled
23 | mig_strategy: single
24 | enable_gds: no
25 | enable_secure_boot: no
26 | enable_vgpu: no
27 | vgpu_license_server: ""
28 | # URL of Helm repo to be added. If using NGC get this from the fetch command in the console
29 | helm_repository: https://helm.ngc.nvidia.com/nvidia
30 | # Name of the helm chart to be deployed
31 | gpu_operator_helm_chart: nvidia/gpu-operator
32 | ## If using a private/protected registry. NGC API Key. Leave blank for public registries
33 | gpu_operator_registry_password: ""
34 | ## This is most likely an NGC email
35 | gpu_operator_registry_email: ""
36 | ## This is most likely GPU Operator Driver Registry
37 | gpu_operator_driver_registry: "nvcr.io/nvidia"
38 | gpu_operator_registry_username: "$oauthtoken"
39 | 
40 | # Network Operator Values
41 | ## If the Network Operator is yes then make sure enable_rdma as well yes
42 | enable_network_operator: no
43 | ## Enable RDMA yes for NVIDIA Certification
44 | enable_rdma: no
45 | 
46 | # Prxoy Configuration
47 | proxy: no
48 | http_proxy: ""
49 | https_proxy: ""
50 | 
51 | # Cloud Native Stack for Developers Values
52 | ## Enable for Cloud Native Stack Developers
53 | cns_docker: no
54 | ## Enable For Cloud Native Stack Developers with TRD Driver
55 | cns_nvidia_driver: no
56 | 
57 | ## Kubernetes resources
58 | k8s_apt_key: "https://packages.cloud.google.com/apt/doc/apt-key.gpg"
59 | k8s_apt_repository: " https://apt.kubernetes.io/ kubernetes-xenial main"
60 | k8s_apt_ring: "/etc/apt/keyrings/kubernetes-archive-keyring.gpg"
61 | k8s_registry: "registry.k8s.io"
62 | 
63 | # Local Path Provisioner as Storage option
64 | storage: no
65 | 
66 | # Monitoring Stack Prometheus/Grafana with GPU Metrics
67 | monitoring: no
68 | 
69 | ## Cloud Native Stack Validation
70 | cns_validation: no
71 | 
72 | # BMC Details for Confidential Computing 
73 | bmc_ip:
74 | bmc_username:
75 | bmc_password:
76 | 
77 | # CSP values
78 | ## AWS EKS values
79 | aws_region: us-east-2
80 | aws_cluster_name: cns-cluster-1
81 | aws_gpu_instance_type: g4dn.2xlarge
82 | 
83 | ## Google Cloud GKE Values
84 | #https://cloud.google.com/resource-manager/docs/creating-managing-projects#identifying_projects
85 | gke_project_id: 
86 | #https://cloud.google.com/compute/docs/regions-zones#available
87 | gke_region: us-west1
88 | gke_node_zones: ["us-west1-b"]
89 | gke_cluster_name: gke-cluster-1
90 | 
91 | ## Azure AKS Values
92 | aks_cluster_name: aks-cluster-1
93 | #https://azure.microsoft.com/en-us/explore/global-infrastructure/geographies/#geographies
94 | aks_cluster_location: "West US 2"
95 | #https://learn.microsoft.com/en-us/partner-center/marketplace/find-tenant-object-id
96 | azure_object_id: [""]


--------------------------------------------------------------------------------
/playbooks/older_versions/cns_values_8.4.yaml:
--------------------------------------------------------------------------------
 1 | cns_version: 8.4
 2 | 
 3 | ## Components Versions
 4 | # Container Runtime options are containerd, cri-o, cri-dockerd
 5 | container_runtime: "containerd"
 6 | containerd_version: "1.7.2"
 7 | crio_version: "1.25.3"
 8 | cri_dockerd_version: "0.3.4"
 9 | k8s_version: "1.25.10"
10 | calico_version: "3.26.1"
11 | flannel_version: "0.22.0"
12 | helm_version: "3.12.1"
13 | gpu_operator_version: "23.3.2"
14 | network_operator_version: "23.5.0"
15 | local_path_provisioner: "0.0.24"
16 | 
17 | # GPU Operator Values
18 | enable_gpu_operator: yes
19 | confidential_computing: no
20 | gpu_driver_version: "535.54.03"
21 | enable_mig: no
22 | mig_profile: all-disabled
23 | mig_strategy: single
24 | enable_gds: no
25 | enable_secure_boot: no
26 | enable_vgpu: no
27 | vgpu_license_server: ""
28 | # URL of Helm repo to be added. If using NGC get this from the fetch command in the console
29 | helm_repository: https://helm.ngc.nvidia.com/nvidia
30 | # Name of the helm chart to be deployed
31 | gpu_operator_helm_chart: nvidia/gpu-operator
32 | ## If using a private/protected registry. NGC API Key. Leave blank for public registries
33 | gpu_operator_registry_password: ""
34 | ## This is most likely an NGC email
35 | gpu_operator_registry_email: ""
36 | ## This is most likely GPU Operator Driver Registry
37 | gpu_operator_driver_registry: "nvcr.io/nvidia"
38 | gpu_operator_registry_username: "$oauthtoken"
39 | 
40 | # Network Operator Values
41 | ## If the Network Operator is yes then make sure enable_rdma as well yes
42 | enable_network_operator: no
43 | ## Enable RDMA yes for NVIDIA Certification
44 | enable_rdma: no
45 | 
46 | # Prxoy Configuration
47 | proxy: no
48 | http_proxy: ""
49 | https_proxy: ""
50 | 
51 | # Cloud Native Stack for Developers Values
52 | ## Enable for Cloud Native Stack Developers
53 | cns_docker: no
54 | ## Enable For Cloud Native Stack Developers with TRD Driver
55 | cns_nvidia_driver: no
56 | 
57 | ## Kubernetes resources
58 | k8s_apt_key: "https://packages.cloud.google.com/apt/doc/apt-key.gpg"
59 | k8s_apt_repository: " https://apt.kubernetes.io/ kubernetes-xenial main"
60 | k8s_apt_ring: "/etc/apt/keyrings/kubernetes-archive-keyring.gpg"
61 | k8s_registry: "registry.k8s.io"
62 | 
63 | # Local Path Provisioner as Storage option
64 | storage: no
65 | 
66 | # Monitoring Stack Prometheus/Grafana with GPU Metrics
67 | monitoring: no
68 | 
69 | ## Cloud Native Stack Validation
70 | cns_validation: no
71 | 
72 | # BMC Details for Confidential Computing 
73 | bmc_ip:
74 | bmc_username:
75 | bmc_password:
76 | 
77 | # CSP values
78 | ## AWS EKS values
79 | aws_region: us-east-2
80 | aws_cluster_name: cns-cluster-1
81 | aws_gpu_instance_type: g4dn.2xlarge
82 | 
83 | ## Google Cloud GKE Values
84 | #https://cloud.google.com/resource-manager/docs/creating-managing-projects#identifying_projects
85 | gke_project_id: 
86 | #https://cloud.google.com/compute/docs/regions-zones#available
87 | gke_region: us-west1
88 | gke_node_zones: ["us-west1-b"]
89 | gke_cluster_name: gke-cluster-1
90 | 
91 | ## Azure AKS Values
92 | aks_cluster_name: aks-cluster-1
93 | #https://azure.microsoft.com/en-us/explore/global-infrastructure/geographies/#geographies
94 | aks_cluster_location: "West US 2"
95 | #https://learn.microsoft.com/en-us/partner-center/marketplace/find-tenant-object-id
96 | azure_object_id: [""]


--------------------------------------------------------------------------------
/playbooks/older_versions/cns_values_8.5.yaml:
--------------------------------------------------------------------------------
 1 | cns_version: 8.5
 2 | 
 3 | ## Components Versions
 4 | # Container Runtime options are containerd, cri-o, cri-dockerd
 5 | container_runtime: "containerd"
 6 | containerd_version: "1.7.3"
 7 | crio_version: "1.25.3"
 8 | cri_dockerd_version: "0.3.4"
 9 | k8s_version: "1.25.12"
10 | calico_version: "3.26.1"
11 | flannel_version: "0.22.0"
12 | helm_version: "3.12.2"
13 | gpu_operator_version: "23.6.1"
14 | network_operator_version: "23.7.0"
15 | local_path_provisioner: "0.0.24"
16 | 
17 | # GPU Operator Values
18 | enable_gpu_operator: yes
19 | confidential_computing: no
20 | confidential_computing: no
21 | gpu_driver_version: "535.104.05"
22 | enable_mig: no
23 | mig_profile: all-disabled
24 | mig_strategy: single
25 | enable_gds: no
26 | enable_secure_boot: no
27 | enable_vgpu: no
28 | vgpu_license_server: ""
29 | # URL of Helm repo to be added. If using NGC get this from the fetch command in the console
30 | helm_repository: https://helm.ngc.nvidia.com/nvidia
31 | # Name of the helm chart to be deployed
32 | gpu_operator_helm_chart: nvidia/gpu-operator
33 | ## If using a private/protected registry. NGC API Key. Leave blank for public registries
34 | gpu_operator_registry_password: ""
35 | ## This is most likely an NGC email
36 | gpu_operator_registry_email: ""
37 | ## This is most likely GPU Operator Driver Registry
38 | gpu_operator_driver_registry: "nvcr.io/nvidia"
39 | gpu_operator_registry_username: "$oauthtoken"
40 | 
41 | # Network Operator Values
42 | ## If the Network Operator is yes then make sure enable_rdma as well yes
43 | enable_network_operator: no
44 | ## Enable RDMA yes for NVIDIA Certification
45 | enable_rdma: no
46 | 
47 | # Prxoy Configuration
48 | proxy: no
49 | http_proxy: ""
50 | https_proxy: ""
51 | 
52 | # Cloud Native Stack for Developers Values
53 | ## Enable for Cloud Native Stack Developers
54 | cns_docker: no
55 | ## Enable For Cloud Native Stack Developers with TRD Driver
56 | cns_nvidia_driver: no
57 | 
58 | ## Kubernetes resources
59 | k8s_apt_key: "https://packages.cloud.google.com/apt/doc/apt-key.gpg"
60 | k8s_apt_repository: " https://apt.kubernetes.io/ kubernetes-xenial main"
61 | k8s_apt_ring: "/etc/apt/keyrings/kubernetes-archive-keyring.gpg"
62 | k8s_registry: "registry.k8s.io"
63 | 
64 | # Local Path Provisioner as Storage option
65 | storage: no
66 | 
67 | # Monitoring Stack Prometheus/Grafana with GPU Metrics
68 | monitoring: no
69 | 
70 | ## Cloud Native Stack Validation
71 | cns_validation: no
72 | 
73 | # BMC Details for Confidential Computing 
74 | bmc_ip:
75 | bmc_username:
76 | bmc_password:
77 | 
78 | # CSP values
79 | ## AWS EKS values
80 | aws_region: us-east-2
81 | aws_cluster_name: cns-cluster-1
82 | aws_gpu_instance_type: g4dn.2xlarge
83 | 
84 | ## Google Cloud GKE Values
85 | #https://cloud.google.com/resource-manager/docs/creating-managing-projects#identifying_projects
86 | gke_project_id: 
87 | #https://cloud.google.com/compute/docs/regions-zones#available
88 | gke_region: us-west1
89 | gke_node_zones: ["us-west1-b"]
90 | gke_cluster_name: gke-cluster-1
91 | 
92 | ## Azure AKS Values
93 | aks_cluster_name: aks-cluster-1
94 | #https://azure.microsoft.com/en-us/explore/global-infrastructure/geographies/#geographies
95 | aks_cluster_location: "West US 2"
96 | #https://learn.microsoft.com/en-us/partner-center/marketplace/find-tenant-object-id
97 | azure_object_id: [""]


--------------------------------------------------------------------------------
/playbooks/older_versions/cns_values_9.0.yaml:
--------------------------------------------------------------------------------
 1 | cns_version: 9.0
 2 | 
 3 | ## Components Versions
 4 | # Container Runtime options are containerd, cri-o, cri-dockerd
 5 | container_runtime: "containerd"
 6 | containerd_version: "1.6.16"
 7 | containerd_max_concurrent_downloads: "5"
 8 | crio_version: "1.26.3"
 9 | cri_dockerd_version: "0.3.4"
10 | k8s_version: "1.26.1"
11 | calico_version: "3.25.0"
12 | flannel_version: "0.20.2"
13 | helm_version: "3.11.0"
14 | gpu_operator_version: "22.9.2"
15 | network_operator_version: "1.4.0"
16 | local_path_provisioner: "0.0.24"
17 | 
18 | # GPU Operator Values
19 | enable_gpu_operator: yes
20 | confidential_computing: no
21 | gpu_driver_version: "525.85.12"
22 | use_open_kernel_module: no
23 | enable_mig: no
24 | mig_profile: all-disabled
25 | mig_strategy: single
26 | enable_gds: no
27 | enable_secure_boot: no
28 | enable_vgpu: no
29 | vgpu_license_server: ""
30 | # URL of Helm repo to be added. If using NGC get this from the fetch command in the console
31 | helm_repository: https://helm.ngc.nvidia.com/nvidia
32 | # Name of the helm chart to be deployed
33 | gpu_operator_helm_chart: nvidia/gpu-operator
34 | ## If using a private/protected registry. NGC API Key. Leave blank for public registries
35 | gpu_operator_registry_password: ""
36 | ## This is most likely an NGC email
37 | gpu_operator_registry_email: ""
38 | ## This is most likely GPU Operator Driver Registry
39 | gpu_operator_driver_registry: "nvcr.io/nvidia"
40 | gpu_operator_registry_username: "$oauthtoken"
41 | 
42 | # Network Operator Values
43 | ## If the Network Operator is yes then make sure enable_rdma as well yes
44 | enable_network_operator: no
45 | ## Enable RDMA yes for NVIDIA Certification
46 | enable_rdma: no
47 | 
48 | # Prxoy Configuration
49 | proxy: no
50 | http_proxy: ""
51 | https_proxy: ""
52 | 
53 | # Cloud Native Stack for Developers Values
54 | ## Enable for Cloud Native Stack Developers
55 | cns_docker: no
56 | ## Enable For Cloud Native Stack Developers with TRD Driver
57 | cns_nvidia_driver: no
58 | 
59 | ## Kubernetes resources
60 | k8s_apt_key: "https://pkgs.k8s.io/core:/stable:/v1.26/deb/Release.key"
61 | k8s_gpg_key: "https://pkgs.k8s.io/core:/stable:/v1.26/rpm/repodata/repomd.xml.key"
62 | k8s_apt_ring: "/etc/apt/keyrings/kubernetes-apt-keyring.gpg"
63 | k8s_registry: "registry.k8s.io"
64 | 
65 | # Local Path Provisioner as Storage option
66 | storage: no
67 | 
68 | # Monitoring Stack Prometheus/Grafana with GPU Metrics
69 | monitoring: no
70 | 
71 | ## Cloud Native Stack Validation
72 | cns_validation: no
73 | 
74 | # BMC Details for Confidential Computing 
75 | bmc_ip:
76 | bmc_username:
77 | bmc_password:
78 | 
79 | # CSP values
80 | ## AWS EKS values
81 | aws_region: us-east-2
82 | aws_cluster_name: cns-cluster-1
83 | aws_gpu_instance_type: g4dn.2xlarge
84 | 
85 | ## Google Cloud GKE Values
86 | #https://cloud.google.com/resource-manager/docs/creating-managing-projects#identifying_projects
87 | gke_project_id: 
88 | #https://cloud.google.com/compute/docs/regions-zones#available
89 | gke_region: us-west1
90 | gke_node_zones: ["us-west1-b"]
91 | gke_cluster_name: gke-cluster-1
92 | 
93 | ## Azure AKS Values
94 | aks_cluster_name: aks-cluster-1
95 | #https://azure.microsoft.com/en-us/explore/global-infrastructure/geographies/#geographies
96 | aks_cluster_location: "West US 2"
97 | #https://learn.microsoft.com/en-us/partner-center/marketplace/find-tenant-object-id
98 | azure_object_id: [""]


--------------------------------------------------------------------------------
/playbooks/older_versions/cns_values_9.1.yaml:
--------------------------------------------------------------------------------
 1 | cns_version: 9.1
 2 | 
 3 | ## Components Versions
 4 | # Container Runtime options are containerd, cri-o, cri-dockerd
 5 | container_runtime: "containerd"
 6 | containerd_version: "1.7.0"
 7 | containerd_max_concurrent_downloads: "5"
 8 | crio_version: "1.26.3"
 9 | cri_dockerd_version: "0.3.4"
10 | k8s_version: "1.26.3"
11 | calico_version: "3.25.1"
12 | flannel_version: "0.21.4"
13 | helm_version: "3.11.3"
14 | gpu_operator_version: "23.3.2"
15 | network_operator_version: "23.4.0"
16 | local_path_provisioner: "0.0.24"
17 | 
18 | # GPU Operator Values
19 | enable_gpu_operator: yes
20 | confidential_computing: no
21 | gpu_driver_version: "525.105.17"
22 | use_open_kernel_module: no
23 | enable_mig: no
24 | mig_profile: all-disabled
25 | mig_strategy: single
26 | enable_gds: no
27 | enable_secure_boot: no
28 | enable_vgpu: no
29 | vgpu_license_server: ""
30 | # URL of Helm repo to be added. If using NGC get this from the fetch command in the console
31 | helm_repository: https://helm.ngc.nvidia.com/nvidia
32 | # Name of the helm chart to be deployed
33 | gpu_operator_helm_chart: nvidia/gpu-operator
34 | ## If using a private/protected registry. NGC API Key. Leave blank for public registries
35 | gpu_operator_registry_password: ""
36 | ## This is most likely an NGC email
37 | gpu_operator_registry_email: ""
38 | ## This is most likely GPU Operator Driver Registry
39 | gpu_operator_driver_registry: "nvcr.io/nvidia"
40 | gpu_operator_registry_username: "$oauthtoken"
41 | 
42 | # Network Operator Values
43 | ## If the Network Operator is yes then make sure enable_rdma as well yes
44 | enable_network_operator: no
45 | ## Enable RDMA yes for NVIDIA Certification
46 | enable_rdma: no
47 | 
48 | # Prxoy Configuration
49 | proxy: no
50 | http_proxy: ""
51 | https_proxy: ""
52 | 
53 | # Cloud Native Stack for Developers Values
54 | ## Enable for Cloud Native Stack Developers
55 | cns_docker: no
56 | ## Enable For Cloud Native Stack Developers with TRD Driver
57 | cns_nvidia_driver: no
58 | 
59 | ## Kubernetes resources
60 | k8s_apt_key: "https://pkgs.k8s.io/core:/stable:/v1.26/deb/Release.key"
61 | k8s_gpg_key: "https://pkgs.k8s.io/core:/stable:/v1.26/rpm/repodata/repomd.xml.key"
62 | k8s_apt_ring: "/etc/apt/keyrings/kubernetes-apt-keyring.gpg"
63 | k8s_registry: "registry.k8s.io"
64 | 
65 | # Local Path Provisioner as Storage option
66 | storage: no
67 | 
68 | # Monitoring Stack Prometheus/Grafana with GPU Metrics
69 | monitoring: no
70 | 
71 | ## Cloud Native Stack Validation
72 | cns_validation: no
73 | 
74 | # BMC Details for Confidential Computing 
75 | bmc_ip:
76 | bmc_username:
77 | bmc_password:
78 | 
79 | # CSP values
80 | ## AWS EKS values
81 | aws_region: us-east-2
82 | aws_cluster_name: cns-cluster-1
83 | aws_gpu_instance_type: g4dn.2xlarge
84 | 
85 | ## Google Cloud GKE Values
86 | #https://cloud.google.com/resource-manager/docs/creating-managing-projects#identifying_projects
87 | gke_project_id: 
88 | #https://cloud.google.com/compute/docs/regions-zones#available
89 | gke_region: us-west1
90 | gke_node_zones: ["us-west1-b"]
91 | gke_cluster_name: gke-cluster-1
92 | 
93 | ## Azure AKS Values
94 | aks_cluster_name: aks-cluster-1
95 | #https://azure.microsoft.com/en-us/explore/global-infrastructure/geographies/#geographies
96 | aks_cluster_location: "West US 2"
97 | #https://learn.microsoft.com/en-us/partner-center/marketplace/find-tenant-object-id
98 | azure_object_id: [""]


--------------------------------------------------------------------------------
/playbooks/older_versions/cns_values_9.2.yaml:
--------------------------------------------------------------------------------
 1 | cns_version: 9.2
 2 | 
 3 | ## Components Versions
 4 | # Container Runtime options are containerd, cri-o, cri-dockerd
 5 | container_runtime: "containerd"
 6 | containerd_version: "1.7.2"
 7 | containerd_max_concurrent_downloads: "5"
 8 | crio_version: "1.26.3"
 9 | cri_dockerd_version: "0.3.4"
10 | k8s_version: "1.26.5"
11 | calico_version: "3.26.1"
12 | flannel_version: "0.22.0"
13 | helm_version: "3.12.1"
14 | gpu_operator_version: "23.3.2"
15 | network_operator_version: "23.5.0"
16 | local_path_provisioner: "0.0.24"
17 | 
18 | # GPU Operator Values
19 | enable_gpu_operator: yes
20 | confidential_computing: no
21 | gpu_driver_version: "535.54.03"
22 | use_open_kernel_module: no
23 | enable_mig: no
24 | mig_profile: all-disabled
25 | mig_strategy: single
26 | enable_gds: no
27 | enable_secure_boot: no
28 | enable_vgpu: no
29 | vgpu_license_server: ""
30 | # URL of Helm repo to be added. If using NGC get this from the fetch command in the console
31 | helm_repository: https://helm.ngc.nvidia.com/nvidia
32 | # Name of the helm chart to be deployed
33 | gpu_operator_helm_chart: nvidia/gpu-operator
34 | ## If using a private/protected registry. NGC API Key. Leave blank for public registries
35 | gpu_operator_registry_password: ""
36 | ## This is most likely an NGC email
37 | gpu_operator_registry_email: ""
38 | ## This is most likely GPU Operator Driver Registry
39 | gpu_operator_driver_registry: "nvcr.io/nvidia"
40 | gpu_operator_registry_username: "$oauthtoken"
41 | 
42 | # Network Operator Values
43 | ## If the Network Operator is yes then make sure enable_rdma as well yes
44 | enable_network_operator: no
45 | ## Enable RDMA yes for NVIDIA Certification
46 | enable_rdma: no
47 | 
48 | # Prxoy Configuration
49 | proxy: no
50 | http_proxy: ""
51 | https_proxy: ""
52 | 
53 | # Cloud Native Stack for Developers Values
54 | ## Enable for Cloud Native Stack Developers
55 | cns_docker: no
56 | ## Enable For Cloud Native Stack Developers with TRD Driver
57 | cns_nvidia_driver: no
58 | 
59 | ## Kubernetes resources
60 | k8s_apt_key: "https://pkgs.k8s.io/core:/stable:/v1.26/deb/Release.key"
61 | k8s_gpg_key: "https://pkgs.k8s.io/core:/stable:/v1.26/rpm/repodata/repomd.xml.key"
62 | k8s_apt_ring: "/etc/apt/keyrings/kubernetes-apt-keyring.gpg"
63 | k8s_registry: "registry.k8s.io"
64 | 
65 | # Local Path Provisioner as Storage option
66 | storage: no
67 | 
68 | # Monitoring Stack Prometheus/Grafana with GPU Metrics
69 | monitoring: no
70 | 
71 | ## Cloud Native Stack Validation
72 | cns_validation: no
73 | 
74 | # BMC Details for Confidential Computing 
75 | bmc_ip:
76 | bmc_username:
77 | bmc_password:
78 | 
79 | # CSP values
80 | ## AWS EKS values
81 | aws_region: us-east-2
82 | aws_cluster_name: cns-cluster-1
83 | aws_gpu_instance_type: g4dn.2xlarge
84 | 
85 | ## Google Cloud GKE Values
86 | #https://cloud.google.com/resource-manager/docs/creating-managing-projects#identifying_projects
87 | gke_project_id: 
88 | #https://cloud.google.com/compute/docs/regions-zones#available
89 | gke_region: us-west1
90 | gke_node_zones: ["us-west1-b"]
91 | gke_cluster_name: gke-cluster-1
92 | 
93 | ## Azure AKS Values
94 | aks_cluster_name: aks-cluster-1
95 | #https://azure.microsoft.com/en-us/explore/global-infrastructure/geographies/#geographies
96 | aks_cluster_location: "West US 2"
97 | #https://learn.microsoft.com/en-us/partner-center/marketplace/find-tenant-object-id
98 | azure_object_id: [""]


--------------------------------------------------------------------------------
/playbooks/older_versions/cns_values_9.3.yaml:
--------------------------------------------------------------------------------
 1 | cns_version: 9.3
 2 | 
 3 | microk8s: no
 4 | ## Components Versions
 5 | # Container Runtime options are containerd, cri-o, cri-dockerd
 6 | container_runtime: "containerd"
 7 | containerd_version: "1.7.3"
 8 | containerd_max_concurrent_downloads: "5"
 9 | crio_version: "1.26.4"
10 | cri_dockerd_version: "0.3.4"
11 | k8s_version: "1.26.7"
12 | calico_version: "3.26.1"
13 | flannel_version: "0.22.0"
14 | helm_version: "3.12.2"
15 | gpu_operator_version: "23.6.1"
16 | network_operator_version: "23.7.0"
17 | local_path_provisioner: "0.0.24"
18 | 
19 | # GPU Operator Values
20 | enable_gpu_operator: yes
21 | confidential_computing: no
22 | gpu_driver_version: "535.104.05"
23 | use_open_kernel_module: no
24 | enable_mig: no
25 | mig_profile: all-disabled
26 | mig_strategy: single
27 | enable_gds: no
28 | enable_secure_boot: no
29 | enable_vgpu: no
30 | vgpu_license_server: ""
31 | # URL of Helm repo to be added. If using NGC get this from the fetch command in the console
32 | helm_repository: https://helm.ngc.nvidia.com/nvidia
33 | # Name of the helm chart to be deployed
34 | gpu_operator_helm_chart: nvidia/gpu-operator
35 | ## If using a private/protected registry. NGC API Key. Leave blank for public registries
36 | gpu_operator_registry_password: ""
37 | ## This is most likely an NGC email
38 | gpu_operator_registry_email: ""
39 | ## This is most likely GPU Operator Driver Registry
40 | gpu_operator_driver_registry: "nvcr.io/nvidia"
41 | gpu_operator_registry_username: "$oauthtoken"
42 | 
43 | # Network Operator Values
44 | ## If the Network Operator is yes then make sure enable_rdma as well yes
45 | enable_network_operator: no
46 | ## Enable RDMA yes for NVIDIA Certification
47 | enable_rdma: no
48 | 
49 | # Prxoy Configuration
50 | proxy: no
51 | http_proxy: ""
52 | https_proxy: ""
53 | 
54 | # Cloud Native Stack for Developers Values
55 | ## Enable for Cloud Native Stack Developers
56 | cns_docker: no
57 | ## Enable For Cloud Native Stack Developers with TRD Driver
58 | cns_nvidia_driver: no
59 | 
60 | ## Kubernetes resources
61 | k8s_apt_key: "https://pkgs.k8s.io/core:/stable:/v1.26/deb/Release.key"
62 | k8s_gpg_key: "https://pkgs.k8s.io/core:/stable:/v1.26/rpm/repodata/repomd.xml.key"
63 | k8s_apt_ring: "/etc/apt/keyrings/kubernetes-apt-keyring.gpg"
64 | k8s_registry: "registry.k8s.io"
65 | 
66 | # Local Path Provisioner as Storage option
67 | storage: no
68 | 
69 | # Monitoring Stack Prometheus/Grafana with GPU Metrics
70 | monitoring: no
71 | 
72 | ## Cloud Native Stack Validation
73 | cns_validation: no
74 | 
75 | # BMC Details for Confidential Computing 
76 | bmc_ip:
77 | bmc_username:
78 | bmc_password:
79 | 
80 | # CSP values
81 | ## AWS EKS values
82 | aws_region: us-east-2
83 | aws_cluster_name: cns-cluster-1
84 | aws_gpu_instance_type: g4dn.2xlarge
85 | 
86 | ## Google Cloud GKE Values
87 | #https://cloud.google.com/resource-manager/docs/creating-managing-projects#identifying_projects
88 | gke_project_id: 
89 | #https://cloud.google.com/compute/docs/regions-zones#available
90 | gke_region: us-west1
91 | gke_node_zones: ["us-west1-b"]
92 | gke_cluster_name: gke-cluster-1
93 | 
94 | ## Azure AKS Values
95 | aks_cluster_name: aks-cluster-1
96 | #https://azure.microsoft.com/en-us/explore/global-infrastructure/geographies/#geographies
97 | aks_cluster_location: "West US 2"
98 | #https://learn.microsoft.com/en-us/partner-center/marketplace/find-tenant-object-id
99 | azure_object_id: [""]


--------------------------------------------------------------------------------
/playbooks/older_versions/cns_values_9.4.yaml:
--------------------------------------------------------------------------------
  1 | cns_version: 9.4
  2 | 
  3 | ## MicroK8s cluster
  4 | microk8s: no
  5 | ## Kubernetes Install with Kubeadm 
  6 | install_k8s: yes 
  7 | 
  8 | ## Components Versions
  9 | # Container Runtime options are containerd, cri-o, cri-dockerd
 10 | container_runtime: "containerd"
 11 | containerd_version: "1.7.7"
 12 | runc_version: "1.1.11"
 13 | cni_plugins_version: "1.4.0"
 14 | containerd_max_concurrent_downloads: "5"
 15 | crio_version: "1.26.4"
 16 | cri_dockerd_version: "0.3.6"
 17 | k8s_version: "1.26.9"
 18 | calico_version: "3.26.3"
 19 | flannel_version: "0.22.3"
 20 | helm_version: "3.13.1"
 21 | gpu_operator_version: "23.9.1"
 22 | network_operator_version: "23.10.0"
 23 | local_path_provisioner: "0.0.24"
 24 | nfs_provisioner: "4.0.18"
 25 | metallb_version: "0.14.5"
 26 | kserve_version: "0.13"
 27 | prometheus_stack: "61.3.0"
 28 | elastic_stack: "8.14.1"
 29 | 
 30 | # GPU Operator Values
 31 | enable_gpu_operator: yes
 32 | confidential_computing: no
 33 | gpu_driver_version: "535.161.08"
 34 | use_open_kernel_module: no
 35 | enable_mig: no
 36 | mig_profile: all-disabled
 37 | mig_strategy: single
 38 | enable_gds: no
 39 | enable_secure_boot: no
 40 | enable_vgpu: no
 41 | vgpu_license_server: ""
 42 | # URL of Helm repo to be added. If using NGC get this from the fetch command in the console
 43 | helm_repository: https://helm.ngc.nvidia.com/nvidia
 44 | # Name of the helm chart to be deployed
 45 | gpu_operator_helm_chart: nvidia/gpu-operator
 46 | ## This is most likely GPU Operator Driver Registry
 47 | gpu_operator_driver_registry: "nvcr.io/nvidia"
 48 | 
 49 | # NGC Values
 50 | ## If using a private/protected registry. NGC API Key. Leave blank for public registries
 51 | ngc_registry_password: ""
 52 | ## This is most likely an NGC email
 53 | ngc_registry_email: ""
 54 | ngc_registry_username: "$oauthtoken"
 55 | 
 56 | # Network Operator Values
 57 | ## If the Network Operator is yes then make sure enable_rdma as well yes
 58 | enable_network_operator: no
 59 | ## Enable RDMA yes for NVIDIA Certification
 60 | enable_rdma: no
 61 | 
 62 | # Prxoy Configuration
 63 | proxy: no
 64 | http_proxy: ""
 65 | https_proxy: ""
 66 | 
 67 | # Cloud Native Stack for Developers Values
 68 | ## Enable for Cloud Native Stack Developers
 69 | cns_docker: no
 70 | ## Enable For Cloud Native Stack Developers with TRD Driver
 71 | cns_nvidia_driver: no
 72 | 
 73 | ## Kubernetes resources
 74 | k8s_apt_key: "https://pkgs.k8s.io/core:/stable:/v1.26/deb/Release.key"
 75 | k8s_gpg_key: "https://pkgs.k8s.io/core:/stable:/v1.26/rpm/repodata/repomd.xml.key"
 76 | k8s_apt_ring: "/etc/apt/keyrings/kubernetes-apt-keyring.gpg"
 77 | k8s_registry: "registry.k8s.io"
 78 | 
 79 | # Install NVIDIA NIM Operator 
 80 | enable_nim_operator: no
 81 | 
 82 | # Local Path Provisioner and NFS Provisoner as Storage option
 83 | storage: no
 84 | 
 85 | # Monitoring Stack Prometheus/Grafana with GPU Metrics and Elastic Logging stack
 86 | monitoring: no
 87 | 
 88 | # Enable Kserve on Cloud Native Stack with Istio and Cert-Manager
 89 | kserve: no
 90 | 
 91 | # Install MetalLB
 92 | loadbalancer: no
 93 | # Example input loadbalancer_ip: "10.117.20.50/32", it could be node/host IP
 94 | loadbalancer_ip: ""
 95 | 
 96 | ## Cloud Native Stack Validation
 97 | cns_validation: no
 98 | 
 99 | # BMC Details for Confidential Computing 
100 | bmc_ip:
101 | bmc_username:
102 | bmc_password:
103 | 
104 | # CSP values
105 | ## AWS EKS values
106 | aws_region: us-east-2
107 | aws_cluster_name: cns-cluster-1
108 | aws_gpu_instance_type: g4dn.2xlarge
109 | 
110 | ## Google Cloud GKE Values
111 | #https://cloud.google.com/resource-manager/docs/creating-managing-projects#identifying_projects
112 | gke_project_id: 
113 | #https://cloud.google.com/compute/docs/regions-zones#available
114 | gke_region: us-west1
115 | gke_node_zones: ["us-west1-b"]
116 | gke_cluster_name: gke-cluster-1
117 | 
118 | ## Azure AKS Values
119 | aks_cluster_name: aks-cluster-1
120 | #https://azure.microsoft.com/en-us/explore/global-infrastructure/geographies/#geographies
121 | aks_cluster_location: "West US 2"
122 | #https://learn.microsoft.com/en-us/partner-center/marketplace/find-tenant-object-id
123 | azure_object_id: [""]


--------------------------------------------------------------------------------
/playbooks/older_versions/readme.md:
--------------------------------------------------------------------------------
 1 | ### Older Versions 
 2 | 
 3 | Below are older versions of NVIDIA Cloud Native Core (formerly known as EGX Stack).
 4 | 
 5 | ### Ubuntu Systems
 6 | - [Ubuntu Server v1.1](https://github.com/NVIDIA/cloud-native-core/blob/master/playbooks/older_versions/Ubuntu_Server_v1.1.md)
 7 | - [Ubuntu Server v1.2](https://github.com/NVIDIA/cloud-native-core/blob/master/playbooks/older_versions/Ubuntu_Server_v1.2.md)
 8 | - [Ubuntu Server v1.3](https://github.com/NVIDIA/cloud-native-core/blob/master/playbooks/older_versions/Ubuntu_Server_v1.3.md)
 9 | - [Ubuntu Server v2.0](https://github.com/NVIDIA/cloud-native-core/blob/master/playbooks/older_versions/Ubuntu_Server_v2.0.md)
10 | - [Ubuntu Server v3.0](https://github.com/NVIDIA/cloud-native-core/blob/master/playbooks/older_versions/Ubuntu_Server_v3.0.md)
11 | - [Ubuntu Server v3.1](https://github.com/NVIDIA/cloud-native-core/blob/master/playbooks/older_versions/Ubuntu_Server_v3.1.md)
12 | - [Ubuntu Server v4.0](https://github.com/NVIDIA/cloud-native-core/blob/master/playbooks/older_versions/Ubuntu_Server_v4.0.md)
13 | - [Ubuntu Server v4.1](https://github.com/NVIDIA/cloud-native-core/blob/master/playbooks/older_versions/Ubuntu_Server_v4.1.md)
14 | - [Ubuntu Server v4.2](https://github.com/NVIDIA/cloud-native-core/blob/master/playbooks/older_versions/Ubuntu_Server_v4.2.md)
15 | 


--------------------------------------------------------------------------------
/playbooks/templates/kubeadm-init-config.template:
--------------------------------------------------------------------------------
 1 | apiVersion: kubeadm.k8s.io/v1beta3
 2 | kind: InitConfiguration
 3 | nodeRegistration:
 4 |   criSocket: "{{ cri_socket }}"
 5 | localAPIEndpoint:
 6 |   advertiseAddress: "{{ network.stdout_lines[0] }}"
 7 | ---
 8 | apiVersion: kubeadm.k8s.io/v1beta3
 9 | kind: ClusterConfiguration
10 | networking:
11 |   podSubnet: "{{ subnet }}"
12 | kubernetesVersion: "v{{ k8s_version }}"
13 | imageRepository: "{{ k8s_registry }}"


--------------------------------------------------------------------------------
/playbooks/templates/kubeadm-join.template:
--------------------------------------------------------------------------------
 1 | apiVersion: kubeadm.k8s.io/v1beta3
 2 | kind: JoinConfiguration
 3 | nodeRegistration:
 4 |   criSocket: "{{ cri_socket }}"
 5 | discovery:
 6 |   bootstrapToken:
 7 |     apiServerEndpoint: "{{ api_endpoint }}"
 8 |     token: "{{ kubeadm_token }}"
 9 |     caCertHashes:
10 |     - "{{ ca_cert_hash }}"


--------------------------------------------------------------------------------
/playbooks/templates/metal-lb.template:
--------------------------------------------------------------------------------
 1 | apiVersion: metallb.io/v1beta1
 2 | kind: IPAddressPool
 3 | metadata:
 4 |   name: first-pool
 5 |   namespace: metallb-system
 6 | spec:
 7 |   addresses:
 8 |   - {{  loadbalancer_ip }}
 9 | ---
10 | apiVersion: metallb.io/v1beta1
11 | kind: L2Advertisement
12 | metadata:
13 |   name: example
14 |   namespace: metallb-system
15 | spec:
16 |   ipAddressPools:
17 |   - first-pool


--------------------------------------------------------------------------------
/troubleshooting/README.md:
--------------------------------------------------------------------------------
1 | # CNS Troubleshooting
2 | 
3 | CNS deployment may fail for diverse reasons.
4 | The topics below provide some guidance to root cause the issue.
5 | 
6 | [DNS Issues](https://github.com/NVIDIA/cloud-native-stack/blob/master/troubleshooting/dns.md)


--------------------------------------------------------------------------------
/troubleshooting/dns.md:
--------------------------------------------------------------------------------
 1 | # DNS troubleshooting
 2 | 
 3 | ## DNS resolution for pods
 4 | 
 5 | ### Driver Container failed to access archive.ubuntu.com
 6 | 
 7 | #### Issue:
 8 | 
 9 | Driver Container logs display the following error messages:
10 | ![driver container logs](driver-container-logs.png)
11 | 
12 | 
13 | #### Troubleshooting:
14 | 
15 | follow the steps located here: https://kubernetes.io/docs/tasks/administer-cluster/dns-debugging-resolution/ to troubleshoot DNS pod resolution.
16 | 
17 | To install the dnsutils pod, launch the command:
18 | ```
19 | kubectl apply -f https://k8s.io/examples/admin/dns/dnsutils.yaml
20 | ```
21 | 
22 | In a working CNS deployment, you should have an output similar to below:
23 | 
24 | ```
25 | nvidia@ipp1-1394:~$ kubectl exec -i -t dnsutils -- nslookup archive.ubuntu.com
26 | Server:		10.96.0.10
27 | Address:	10.96.0.10#53
28 | 
29 | Non-authoritative answer:
30 | Name:	archive.ubuntu.com
31 | Address: 91.189.91.82
32 | Name:	archive.ubuntu.com
33 | Address: 185.125.190.82
34 | Name:	archive.ubuntu.com
35 | Address: 185.125.190.83
36 | Name:	archive.ubuntu.com
37 | Address: 185.125.190.81
38 | Name:	archive.ubuntu.com
39 | Address: 91.189.91.81
40 | Name:	archive.ubuntu.com
41 | Address: 91.189.91.83
42 | Name:	archive.ubuntu.com
43 | Address: 2620:2d:4002:1::103
44 | Name:	archive.ubuntu.com
45 | Address: 2620:2d:4000:1::101
46 | Name:	archive.ubuntu.com
47 | Address: 2620:2d:4002:1::102
48 | Name:	archive.ubuntu.com
49 | Address: 2620:2d:4002:1::101
50 | Name:	archive.ubuntu.com
51 | Address: 2620:2d:4000:1::103
52 | Name:	archive.ubuntu.com
53 | Address: 2620:2d:4000:1::102
54 | ```
55 | 
56 | Note that Name must be exactly 'archive.ubuntu.com':
57 | 
58 | ***Name:	archive.ubuntu.com***
59 | 
60 | 
61 | If you get a different output, it is recommended to fix the root cause (check with the team in charge of the DNS server. They may have created an entry for the archive.ubuntu.com and if this is the case, they must remove it).
62 | 
63 | 


--------------------------------------------------------------------------------
/troubleshooting/driver-container-logs.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVIDIA/cloud-native-stack/ab595ce308627d8ec0e372c34e765795e52bf367/troubleshooting/driver-container-logs.png


--------------------------------------------------------------------------------