├── .github
    ├── FUNDING.yml
    └── workflows
    │   ├── auto-delete-deploy-key.yml
    │   └── medium-spark-stats.yaml
├── README.md
├── clusters
    └── home-cluster
    │   ├── cilium
    │       └── cilium.yaml
    │   ├── clickhouse
    │       └── clickhouse.yaml
    │   ├── cloudflared
    │       ├── kustomization.yaml
    │       └── tunnel-deployment.yaml
    │   ├── dagster
    │       └── dagster.yaml
    │   ├── data-analytics
    │       ├── kustomization.yaml
    │       ├── rook-nfs.yaml
    │       └── spark-sa.yaml
    │   ├── devtron
    │       └── flux-ui.yaml
    │   ├── flux-system
    │       ├── gotk-components.yaml
    │       ├── gotk-sync.yaml
    │       └── kustomization.yaml
    │   ├── k8sgpt
    │       └── k8sgpt-operator.yaml
    │   ├── kube-system
    │       └── metrics-server.yaml
    │   ├── minio
    │       └── minio-server.yaml
    │   ├── monitoring
    │       ├── agent-buff.yaml
    │       ├── agent.yaml
    │       ├── cluster-1.yaml
    │       ├── cluster-2.yaml
    │       ├── prometheus.yaml
    │       ├── user.yaml
    │       ├── vm.yaml
    │       └── vmauth.yaml
    │   ├── mysql
    │       └── mysql.yaml
    │   ├── nats
    │       └── nats.yaml
    │   ├── nvidia-gpu
    │       └── gpu-operator.yaml
    │   ├── ollama
    │       └── ollama-server.yaml
    │   ├── prestodb
    │       └── presto.yaml
    │   └── rook-ceph-system
    │       ├── rook-ceph-cluster.yaml
    │       └── rook-operator.yaml
└── scripts
    ├── delete_deploy_keys.py
    └── medium
        ├── Dockerfile
        ├── articles.json
        ├── driver.yaml
        ├── entrypoint.sh
        ├── executor.yaml
        ├── medium-articles-info.py
        └── medium-stats-spark-driver.py


/.github/FUNDING.yml:
--------------------------------------------------------------------------------
1 | custom: # Replace with up to 4 custom sponsorship URLs e.g., ['https://www.buymeacoffee.com/bettercallpavan']
2 | 


--------------------------------------------------------------------------------
/.github/workflows/auto-delete-deploy-key.yml:
--------------------------------------------------------------------------------
 1 | name: Delete Deploy Keys
 2 | 
 3 | on:
 4 |   schedule:
 5 |     - cron: '0 0 1 * *'
 6 | 
 7 | jobs:
 8 |   delete_deploy_keys:
 9 |     runs-on: ubuntu-latest
10 |     steps:
11 |       - name: Checkout repository
12 |         uses: actions/checkout@v4
13 | 
14 |       - name: Install Pip3
15 |         run: |
16 |           pip install requests==2.31.0
17 | 
18 |       - name: Run script
19 |         run: |
20 |           python scripts/delete_deploy_keys.py
21 |         env:
22 |           GITHUB_TOKEN: ${{ secrets.PAT_TOKEN }}
23 | 


--------------------------------------------------------------------------------
/.github/workflows/medium-spark-stats.yaml:
--------------------------------------------------------------------------------
 1 | name: Run Spark Job for Medium Stats
 2 | 
 3 | on:
 4 |   push:
 5 |     branches:
 6 |       - master
 7 |   schedule:
 8 |     - cron: '0 1 * * *' 
 9 | 
10 | jobs:
11 |   sparkStats:
12 |     runs-on: self-hosted
13 |     
14 |     steps:
15 |     - name: Checkout repository
16 |       uses: actions/checkout@v4
17 | 
18 |     - name: Spark Monthly and Yearly Stats
19 |       run: |
20 |         export KUBECONFIG="/root/.kube/config"
21 |         cd scripts/medium
22 |         todaysDate=$(date +"%Y-%m-%d")
23 |         spark-submit \
24 |           --master k8s://https://10.0.0.119:6443 \
25 |           --deploy-mode cluster \
26 |           --name medium-stats \
27 |           --conf spark.executor.instances=3 \
28 |           --conf spark.kubernetes.authenticate.driver.serviceAccountName=spark \
29 |           --conf spark.kubernetes.namespace=data-analytics \
30 |           --conf spark.kubernetes.driver.request.cores=2 \
31 |           --conf spark.driver.memory=1g \
32 |           --conf spark.kubernetes.executor.request.cores=2 \
33 |           --conf spark.executor.memory=1g \
34 |           --packages org.apache.hadoop:hadoop-aws:3.3.4 \
35 |           --conf spark.kubernetes.pyspark.pythonVersion=3 \
36 |           --conf spark.kubernetes.container.image=greypavan/medium-manifests:medium-stats \
37 |           --conf spark.kubernetes.driver.podTemplateFile=./driver.yaml \
38 |           --conf spark.kubernetes.executor.podTemplateFile=./executor.yaml \
39 |           --conf spark.kubernetes.container.image.pullPolicy=Always \
40 |           https://raw.githubusercontent.com/pavan-kumar-99/medium-manifests/master/scripts/medium/medium-stats-spark-driver.py $todaysDate/ create
41 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Medium Manifests
  2 | What is Medium Manifests?
  3 | Medium Manifests is a GitHub repo that contains all the code for the articles writtern here https://pavan1999-kumar.medium.com/. Every article here, will have the GitHub Branch added in the article description itself. One Might clone the repo with the branch specified in the medium articles. 
  4 | 
  5 | ## Cloning the Repo
  6 | 
  7 | The branch names for the corresponding articles could be found [here](https://pavan1999-kumar.medium.com/) 
  8 | 
  9 | ```bash
 10 | git clone https://github.com/pavan-kumar-99/medium-manifests.git -b <branch_name>
 11 | ```
 12 | 
 13 | ## Published
 14 | 
 15 | [Kyverno](https://medium.com/gitconnected/kubernetes-policies-as-code-using-kyverno-10720df9c842)
 16 | 
 17 | [Helm Dashboard](https://medium.com/gitconnected/introduction-to-helm-dashboard-dddf43e38cc2)
 18 | 
 19 | [Goldilocks](https://pavan1999-kumar.medium.com/how-to-guess-the-right-size-for-your-kubernetes-pods-9c88686fec)
 20 | 
 21 | [Multi Stage Docker Build](https://pavan1999-kumar.medium.com/how-i-reduced-the-size-of-my-docker-image-by-95-520a05439300)
 22 | 
 23 | [jsPolicy](https://pavan1999-kumar.medium.com/policies-as-code-in-kubernetes-using-jspolicy-8d358d064bfd)
 24 | 
 25 | [Kubeflow: MLOPS](https://medium.com/nerd-for-tech/mlops-machine-learning-pipelines-using-kubeflow-fc06508a3f0d)
 26 | 
 27 | [Hashicorp Vault Secrets in Kubernetes with CSI Driver](https://pavan1999-kumar.medium.com/hashicvault-secrets-in-kubernetes-with-csi-driver-ec917d4a2672)
 28 | 
 29 | [Loft ( Virtual Clusters )](https://pavan1999-kumar.medium.com/multi-tenancy-in-kubernetes-using-lofts-vcluster-dee6513a7206)
 30 | 
 31 | [HashiCorp Vault PKI With Vault Injector](https://medium.com/nerd-for-tech/pki-certs-injection-to-k8s-pods-with-vault-agent-injector-d97482b48f3d)
 32 | 
 33 | [Analyze Terraform costs with Infracost ( The GitOps Way )](https://pavan1999-kumar.medium.com/terraforming-the-cost-with-infracost-c28dc6c981c9)
 34 | 
 35 | [Atlantis Pull Request Automation](https://pavan1999-kumar.medium.com/terraforming-the-gitops-way-9417cf4abf58)
 36 | 
 37 | [Kubernetes HashiCorp Vault with Cert-Manager](https://pavan1999-kumar.medium.com/using-hashicorp-vault-as-a-certificate-issuer-in-cert-manager-9e19d7239d3d)
 38 | 
 39 | [Cortex Metrics](https://medium.com/nerd-for-tech/deep-dive-into-cortex-part-i-c228e01f8c58)
 40 | 
 41 | [Kubernetes Cluster Autoscaler](https://medium.com/nerd-for-tech/kubernetes-cluster-autoscaler-in-action-6172a023f542)
 42 | 
 43 | [Grafana Loki](https://medium.com/nerd-for-tech/logging-at-scale-in-kubernetes-using-grafana-loki-3bb2eb0c0872)
 44 | 
 45 | [Spark on EKS](https://medium.com/nerd-for-tech/running-apache-spark-on-eks-with-aws-spot-instances-f8ce91d319b9)
 46 | 
 47 | [Kubernetes Kubeless](https://medium.com/nerd-for-tech/going-serverless-in-kubernetes-using-kubeless-8ef83b3f2f89)
 48 | 
 49 | [Kubernetes Cert Manager](https://medium.com/nerd-for-tech/free-and-automatic-ssl-certificates-in-kubernetes-using-cert-manager-6fb65ac63d5)
 50 | 
 51 | [Kubernetes Chaos Mesh](https://medium.com/nerd-for-tech/chaos-engineering-in-kubernetes-using-chaos-mesh-431c1587ef0a)
 52 | 
 53 | [Thanos](https://medium.com/nerd-for-tech/deep-dive-into-thanos-part-i-f72ecba39f76)
 54 | 
 55 | [Kube-Bench and Kube-Hunter](https://www.techmanyu.com/kubernetes-security-with-kube-bench-and-kube-hunter-6765bf44ebc6)
 56 | 
 57 | [Kubernetes Network Policies](https://medium.com/nerd-for-tech/network-policies-demystified-in-kubernetes-d57fc2548043)
 58 | 
 59 | [Kubernetes Auto Scaling](https://medium.com/nerd-for-tech/autoscaling-in-kubernetes-hpa-vpa-ab61a2177950)
 60 | 
 61 | [GitHub Self Hosted Runner](https://www.techmanyu.com/creating-self-hosted-github-runners-in-a-kubernetes-cluster-fd05560de34a)
 62 | 
 63 | [Kubernetes Rabbit MQ Operator](https://medium.com/nerd-for-tech/deploying-rabbitmq-on-kubernetes-using-rabbitmq-cluster-operator-ef99f7a4e417)
 64 | 
 65 | [Kubernetes Cross Plane](https://medium.com/nerd-for-tech/introduction-to-crossplane-2f873ae0f9f3)
 66 | 
 67 | [Kubernetes Sealed Secrets](https://faun.pub/introduction-to-bitnami-sealed-secrets-bb5ae74d9a25)
 68 | 
 69 | [Kubernetes External DNS](https://faun.pub/introduction-to-external-dns-in-kubernetes-654aa4cf38e6)
 70 | 
 71 | [Kubernetes Jenkins Operator](https://medium.com/swlh/introduction-to-jenkins-operator-f4cb7ebc2e0b)
 72 | 
 73 | [GitHub Actions](https://medium.com/nerd-for-tech/creating-a-gke-cluster-with-github-actions-dd34e2de50a6)
 74 | 
 75 | [Kubernetes KIND](https://medium.com/nerd-for-tech/create-a-kubernetes-cluster-using-kind-b364a67437b7)
 76 | 
 77 | [Kubernetes Flux CD V1](https://medium.com/swlh/deploying-applications-in-kubernetes-using-flux-a9d171b11917)
 78 | 
 79 | 
 80 | [Kubernetes Kustomize](https://faun.pub/introduction-to-kustomize-97f990dc2f44)
 81 | 
 82 | [Kubernetes ArgoCD](https://medium.com/nerd-for-tech/deploying-applications-in-kubernetes-using-argo-cd-ab004a8cdb5e)
 83 | 
 84 | [Kubernetes HashiCorp Vault Injector](https://faun.pub/securing-your-secrets-using-vault-k8s-in-kubernetes-part-1-de3d7378e226)
 85 | 
 86 | 
 87 | ## Upcoming
 88 | 
 89 | - [x] Kubernetes Cert Manager and Vault
 90 | - [x] Atlantis ( Terraform Pull Request Automation )
 91 | - [x] Infracost
 92 | - [x] HashiCorp Vault PKI With Vault Injector
 93 | - [x] Loft ( Virtual Clusters )
 94 | - [x] HashiCorp Vault CSI Provider
 95 | - [x] Kubeflow
 96 | - [x] JSPolicy
 97 | - [x] MultiStage Docker Build
 98 | - [x] Goldilocks
 99 | - [x] Kyverno
100 | - [ ] Grafana Mimir
101 | - [x] Helm Dashboard
102 | - [ ] Stack Storm
103 | - [ ] Google Architecture Diagram
104 | - [ ] KubeArmour
105 | - [ ] Volcano
106 | - [ ] KubeCost
107 | - [ ] KeyCloak
108 | - [ ] KubeVirt
109 | - [ ] AWS Karpenter
110 | - [ ] Hierarchical Namespaces
111 | - [ ] Prometheus Adapter
112 | - [ ] Custom Scheduler in K8s
113 | - [ ] External Secrets and HashiCorp Vault
114 | - [ ] Elastic Search Hot-Warm-Cold Architecture using Elastic Operator
115 | - [ ] TelePort ( Go TelePort )
116 | - [ ] LongHorn.io
117 | - [ ] Backstage.io
118 | - [ ] Env0
119 | - [ ] kURL
120 | - [ ] Netflix Console Me
121 | - [ ] Cosign Sigstore
122 | - [ ] Cloud Custodian
123 | - [ ] Keptn
124 | - [ ] FluxCD V2
125 | - [ ] Kube Resource Report
126 | - [ ] Forecastle
127 | - [ ] Capsule
128 | - [ ] Grafana OnCall
129 | - [ ] ElasticSearch Curator
130 | - [ ] CAST AI
131 | - [ ] Devtron
132 | - [ ] Forsetti
133 | - [ ] Jit.io
134 | - [ ] Numaflow
135 | - [ ] Weavework TF Controller
136 | - [ ] Kaniaster
137 | - [ ] TestKube
138 | - [ ] Pritunl
139 | - [ ] AirByte
140 | 
141 | ## Contributing
142 | Pull requests are welcome. For major changes, please open an issue first to discuss what you would like to change.
143 | 
144 | Please make sure to update tests as appropriate.
145 | 
146 | ## Sponsorship
147 | 
148 | Want your product to be articulated and presented to a wider audience or want to sponsor any of my upcoming articles? Feel free to reach out on ( pavan1999.kumar@gmail.com ).
149 | 


--------------------------------------------------------------------------------
/clusters/home-cluster/cilium/cilium.yaml:
--------------------------------------------------------------------------------
 1 | # apiVersion: source.toolkit.fluxcd.io/v1
 2 | # kind: HelmRepository
 3 | # metadata:
 4 | #   name: cilium
 5 | #   namespace: flux-system
 6 | # spec:
 7 | #   interval: 1m
 8 | #   url: https://helm.cilium.io/
 9 | # ---
10 | # apiVersion: helm.toolkit.fluxcd.io/v2
11 | # kind: HelmRelease
12 | # metadata:
13 | #   name: cilium
14 | #   namespace: cilium
15 | # spec:
16 | #   interval: 10m
17 | #   chart:
18 | #     spec:
19 | #       chart: cilium
20 | #       version: '*'
21 | #       sourceRef:
22 | #         kind: HelmRepository
23 | #         name: cilium
24 | #         namespace: flux-system
25 | #       interval: 1m
26 | 


--------------------------------------------------------------------------------
/clusters/home-cluster/clickhouse/clickhouse.yaml:
--------------------------------------------------------------------------------
 1 | # apiVersion: source.toolkit.fluxcd.io/v1
 2 | # kind: HelmRepository
 3 | # metadata:
 4 | #   name: clickhouse-helm
 5 | #   namespace: flux-system
 6 | # spec:
 7 | #   interval: 1m
 8 | #   url: https://charts.bitnami.com/bitnami
 9 | # ---
10 | # apiVersion: helm.toolkit.fluxcd.io/v2
11 | # kind: HelmRelease
12 | # metadata:
13 | #   name: clickhouse-server
14 | #   namespace: clickhouse
15 | # spec:
16 | #   interval: 10m
17 | #   chart:
18 | #     spec:
19 | #       chart: clickhouse
20 | #       version: '6.2.6'
21 | #       sourceRef:
22 | #         kind: HelmRepository
23 | #         name: clickhouse-helm
24 | #         namespace: flux-system
25 | #       interval: 1m
26 | #   values:
27 | #     shards: 1
28 | #     keeper:
29 | #       enabled: true
30 | #     replicaCount: 2
31 | #     resources:
32 | #       requests:
33 | #         cpu: 1
34 | #         memory: 512Mi
35 | #       limits:
36 | #         cpu: 1
37 | #         memory: 1024Mi
38 | #     persistence:
39 | #       enabled: true
40 | #       storageClass: "ceph-block-ssd"
41 | #       size: 100Gi
42 | #     zookeeper:
43 | #       enabled: false
44 | 


--------------------------------------------------------------------------------
/clusters/home-cluster/cloudflared/kustomization.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: kustomize.config.k8s.io/v1beta1
2 | kind: Kustomization
3 | namespace: cloudflared
4 | resources:
5 |   - tunnel-deployment.yaml
6 | 


--------------------------------------------------------------------------------
/clusters/home-cluster/cloudflared/tunnel-deployment.yaml:
--------------------------------------------------------------------------------
 1 | # apiVersion: apps/v1
 2 | # kind: Deployment
 3 | # metadata:
 4 | #   labels:
 5 | #     app: cloudflared
 6 | #   name: cloudflared-deployment
 7 | #   namespace: cloudflared
 8 | # spec:
 9 | #   replicas: 2
10 | #   selector:
11 | #     matchLabels:
12 | #       pod: cloudflared
13 | #   template:
14 | #     metadata:
15 | #       labels:
16 | #         pod: cloudflared
17 | #     spec:
18 | #       containers:
19 | #       - command:
20 | #         - cloudflared
21 | #         - tunnel
22 | #         - --metrics
23 | #         - 0.0.0.0:2000
24 | #         - run
25 | #         args:
26 | #         - --token
27 | #         - ${TOKEN}
28 | #         image: cloudflare/cloudflared:latest
29 | #         name: cloudflared
30 | #         env:
31 | #         - name: TOKEN
32 | #           valueFrom:
33 | #             secretKeyRef:
34 | #               name: cloudflared-token
35 | #               key: token
36 | #         livenessProbe:
37 | #           httpGet:
38 | #             path: /ready
39 | #             port: 2000
40 | #           failureThreshold: 1
41 | #           initialDelaySeconds: 10
42 | #           periodSeconds: 10
43 | #         readinessProbe:
44 | #           httpGet:
45 | #             path: /ready
46 | #             port: 2000
47 | #           failureThreshold: 3
48 | #           initialDelaySeconds: 10
49 | #           periodSeconds: 10
50 | 


--------------------------------------------------------------------------------
/clusters/home-cluster/dagster/dagster.yaml:
--------------------------------------------------------------------------------
 1 | # apiVersion: source.toolkit.fluxcd.io/v1
 2 | # kind: HelmRepository
 3 | # metadata:
 4 | #   name: dagster-helm
 5 | #   namespace: flux-system
 6 | # spec:
 7 | #   interval: 1m
 8 | #   url: https://dagster-io.github.io/helm
 9 | # ---
10 | # apiVersion: helm.toolkit.fluxcd.io/v2
11 | # kind: HelmRelease
12 | # metadata:
13 | #   name: dagster
14 | #   namespace: dagster
15 | # spec:
16 | #   interval: 10m
17 | #   chart:
18 | #     spec:
19 | #       chart: dagster
20 | #       version: '1.7.12'
21 | #       sourceRef:
22 | #         kind: HelmRepository
23 | #         name: dagster-helm
24 | #         namespace: flux-system
25 | #       interval: 1m
26 | #   values:
27 | #     postgresql:
28 | #       enabled: false
29 | #       postgresqlHost: "ep-sweet-firefly-a5if2gk1.us-east-2.aws.neon.tech"
30 | #       postgresqlUsername: "dagster_owner"
31 | #       postgresqlPassword: "vU1cOqHYTes6"
32 | #       postgresqlDatabase: "dagster"
33 | #     dagsterWebserver:
34 | #       enableReadOnly: true
35 | #     runLauncher:
36 | #       type: K8sRunLauncher
37 | 


--------------------------------------------------------------------------------
/clusters/home-cluster/data-analytics/kustomization.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: kustomize.config.k8s.io/v1beta1
2 | kind: Kustomization
3 | namespace: data-analytics
4 | resources:
5 |   - rook-nfs.yaml
6 |   - spark-sa.yaml
7 | 


--------------------------------------------------------------------------------
/clusters/home-cluster/data-analytics/rook-nfs.yaml:
--------------------------------------------------------------------------------
 1 | ---
 2 | apiVersion: v1
 3 | kind: PersistentVolumeClaim
 4 | metadata:
 5 |   name: stats-pvc
 6 | spec:
 7 |   accessModes:
 8 |     - ReadWriteMany
 9 |   resources:
10 |     requests:
11 |       storage: 10Gi
12 |   storageClassName: ceph-filesystem
13 | 


--------------------------------------------------------------------------------
/clusters/home-cluster/data-analytics/spark-sa.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: v1
 2 | kind: ServiceAccount
 3 | metadata:
 4 |   name: spark
 5 |   namespace: default
 6 | ---
 7 | apiVersion: rbac.authorization.k8s.io/v1
 8 | kind: Role
 9 | metadata:
10 |   namespace: default
11 |   name: spark-role
12 | rules:
13 | - apiGroups: [""]
14 |   resources: ["pods"]
15 |   verbs: ["*"]
16 | - apiGroups: [""]
17 |   resources: ["services","configmaps"]
18 |   verbs: ["*"]
19 | ---
20 | apiVersion: rbac.authorization.k8s.io/v1
21 | kind: RoleBinding
22 | metadata:
23 |   name: spark-role-binding
24 |   namespace: default
25 | subjects:
26 | - kind: ServiceAccount
27 |   name: spark
28 |   namespace: default
29 | roleRef:
30 |   kind: Role
31 |   name: spark-role
32 |   apiGroup: rbac.authorization.k8s.io
33 | 


--------------------------------------------------------------------------------
/clusters/home-cluster/devtron/flux-ui.yaml:
--------------------------------------------------------------------------------
 1 | # apiVersion: source.toolkit.fluxcd.io/v1
 2 | # kind: HelmRepository
 3 | # metadata:
 4 | #   name:  devtron
 5 | #   namespace: flux-system
 6 | # spec:
 7 | #   interval: 1m
 8 | #   url: https://helm.devtron.ai
 9 | # ---
10 | # apiVersion: helm.toolkit.fluxcd.io/v2
11 | # kind: HelmRelease
12 | # metadata:
13 | #   name:  devtron
14 | #   namespace: devtroncd
15 | # spec:
16 | #   interval: 10m
17 | #   install:
18 | #     createNamespace: true
19 | #   chart:
20 | #     spec:
21 | #       chart:  devtron-operator
22 | #       version: "*"
23 | #       sourceRef:
24 | #         kind: HelmRepository
25 | #         name:  devtron
26 | #         namespace: flux-system
27 | #       interval: 1m
28 | #   values:
29 | #     installer:
30 | #       modules: [cicd]
31 | 


--------------------------------------------------------------------------------
/clusters/home-cluster/flux-system/gotk-sync.yaml:
--------------------------------------------------------------------------------
 1 | # This manifest was generated by flux. DO NOT EDIT.
 2 | ---
 3 | apiVersion: source.toolkit.fluxcd.io/v1
 4 | kind: GitRepository
 5 | metadata:
 6 |   name: flux-system
 7 |   namespace: flux-system
 8 | spec:
 9 |   interval: 1m0s
10 |   ref:
11 |     branch: master
12 |   secretRef:
13 |     name: flux-system
14 |   url: ssh://git@github.com/pavan-kumar-99/medium-manifests
15 | ---
16 | apiVersion: kustomize.toolkit.fluxcd.io/v1
17 | kind: Kustomization
18 | metadata:
19 |   name: flux-system
20 |   namespace: flux-system
21 | spec:
22 |   interval: 10m0s
23 |   path: ./clusters/home-cluster
24 |   prune: true
25 |   sourceRef:
26 |     kind: GitRepository
27 |     name: flux-system
28 | 


--------------------------------------------------------------------------------
/clusters/home-cluster/flux-system/kustomization.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: kustomize.config.k8s.io/v1beta1
2 | kind: Kustomization
3 | resources:
4 | - gotk-components.yaml
5 | - gotk-sync.yaml
6 | 


--------------------------------------------------------------------------------
/clusters/home-cluster/k8sgpt/k8sgpt-operator.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: source.toolkit.fluxcd.io/v1
 2 | kind: HelmRepository
 3 | metadata:
 4 |   name:  k8sgpt
 5 |   namespace: flux-system
 6 | spec:
 7 |   interval: 1m
 8 |   url: https://charts.k8sgpt.ai/
 9 | ---
10 | apiVersion: helm.toolkit.fluxcd.io/v2
11 | kind: HelmRelease
12 | metadata:
13 |   name:  k8sgpt
14 |   namespace: k8sgpt-operator-system
15 | spec:
16 |   interval: 10m
17 |   install:
18 |     createNamespace: true
19 |   chart:
20 |     spec:
21 |       chart: k8sgpt-operator
22 |       version: "*"
23 |       sourceRef:
24 |         kind: HelmRepository
25 |         name:  k8sgpt
26 |         namespace: flux-system
27 |       interval: 1m
28 |   values: 
29 |     config:
30 |       cluster:
31 |         enabled: true
32 |       jetstream:
33 |         enabled: true
34 | 


--------------------------------------------------------------------------------
/clusters/home-cluster/kube-system/metrics-server.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: source.toolkit.fluxcd.io/v1
 2 | kind: HelmRepository
 3 | metadata:
 4 |   name: metrics-server
 5 |   namespace: flux-system
 6 | spec:
 7 |   interval: 1m
 8 |   url: https://kubernetes-sigs.github.io/metrics-server/
 9 | ---
10 | apiVersion: helm.toolkit.fluxcd.io/v2
11 | kind: HelmRelease
12 | metadata:
13 |   name: metrics-server
14 |   namespace: kube-system
15 | spec:
16 |   interval: 10m
17 |   chart:
18 |     spec:
19 |       chart: metrics-server
20 |       version: '3.12.1'
21 |       sourceRef:
22 |         kind: HelmRepository
23 |         name: metrics-server
24 |         namespace: flux-system
25 |       interval: 1m
26 |   values:
27 |     defaultArgs:
28 |       - --cert-dir=/tmp
29 |       - --kubelet-preferred-address-types=InternalIP,ExternalIP,Hostname
30 |       - --kubelet-use-node-status-port
31 |       - --metric-resolution=15s
32 |       - --kubelet-insecure-tls
33 | 


--------------------------------------------------------------------------------
/clusters/home-cluster/minio/minio-server.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: source.toolkit.fluxcd.io/v1
 2 | kind: HelmRepository
 3 | metadata:
 4 |   name: minio-helm
 5 |   namespace: flux-system
 6 | spec:
 7 |   interval: 1m
 8 |   url: https://charts.bitnami.com/bitnami
 9 | ---
10 | apiVersion: helm.toolkit.fluxcd.io/v2
11 | kind: HelmRelease
12 | metadata:
13 |   name: minio-server
14 |   namespace: minio
15 | spec:
16 |   interval: 10m
17 |   chart:
18 |     spec:
19 |       chart: minio
20 |       version: '14.6.7'
21 |       sourceRef:
22 |         kind: HelmRepository
23 |         name: minio-helm
24 |         namespace: flux-system
25 |       interval: 1m
26 |   values:
27 |     image:
28 |       repository: bitnami/minio
29 |       tag: 2024.6.11
30 |     mode: standalone
31 |     replicaCount: 1
32 |     persistence:
33 |       enabled: true 
34 |       size: 100Gi
35 |     provisioning:
36 |       enabled: true
37 |       buckets:
38 |         - name: medium-stats
39 |           lifecycle:
40 |           - id: DeleteAfter1Day
41 |             expiry:
42 |               days: 1
43 |         - name: iceberg-table
44 |         - name: medium-tags
45 | 


--------------------------------------------------------------------------------
/clusters/home-cluster/monitoring/agent-buff.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: operator.victoriametrics.com/v1beta1
 2 | kind: VMAgent
 3 | metadata:
 4 |   name: vmagent-buffer
 5 |   namespace: monitoring
 6 | spec:
 7 |   selectAllByDefault: false
 8 |   remoteWrite:
 9 |     - url: "http://vminsert-cluster-1.monitoring.svc:8480/insert/0/prometheus/api/v1/write"
10 |     - url: "http://vminsert-cluster-2.monitoring.svc:8480/insert/0/prometheus/api/v1/write"
11 |   vmAgentExternalLabelName: vmagent_ha
12 |   ingestOnlyMode: true
13 |   replicaCount: 2
14 |   statefulMode: true
15 |   statefulStorage:
16 |     volumeClaimTemplate:
17 |       spec:
18 |         resources:
19 |             requests:
20 |               storage: 2Gi
21 | 


--------------------------------------------------------------------------------
/clusters/home-cluster/monitoring/agent.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: operator.victoriametrics.com/v1beta1
 2 | kind: VMAgent
 3 | metadata:
 4 |   name: k8s-vm-agent
 5 |   namespace: monitoring
 6 | spec:
 7 |   selectAllByDefault: true
 8 |   remoteWrite:
 9 |     # - url: "http://vminsert-cluster-1.monitoring.svc:8480/insert/0/prometheus/api/v1/write"
10 |     # - url: "http://vminsert-cluster-2.monitoring.svc:8480/insert/0/prometheus/api/v1/write"
11 |     - url: "http://vmagent-vmagent-buffer.monitoring.svc:8429/api/v1/write"
12 | 


--------------------------------------------------------------------------------
/clusters/home-cluster/monitoring/cluster-1.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: operator.victoriametrics.com/v1beta1
 2 | kind: VMCluster
 3 | metadata:
 4 |   name: cluster-1
 5 |   namespace: monitoring
 6 | spec:
 7 |   paused: true
 8 |   retentionPeriod: "1"
 9 |   replicationFactor: 1
10 |   vmstorage:
11 |     replicaCount: 2
12 |     storageDataPath: "/vm-data"
13 |     storage:
14 |       volumeClaimTemplate:
15 |         spec:
16 |           resources:
17 |             requests:
18 |               storage: 10Gi
19 |   vmselect:
20 |     replicaCount: 1
21 |     cacheMountPath: "/select-cache"
22 |     storage:
23 |       volumeClaimTemplate:
24 |         spec:
25 |           resources:
26 |             requests:
27 |               storage: 2Gi
28 |   vminsert:
29 |     replicaCount: 2
30 |     resources:
31 |       limits:
32 |         cpu: "1"
33 |         memory: "500Mi"
34 | 


--------------------------------------------------------------------------------
/clusters/home-cluster/monitoring/cluster-2.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: operator.victoriametrics.com/v1beta1
 2 | kind: VMCluster
 3 | metadata:
 4 |   name: cluster-2
 5 |   namespace: monitoring
 6 | spec:
 7 |   paused: true
 8 |   retentionPeriod: "1"
 9 |   replicationFactor: 1
10 |   vmstorage:
11 |     replicaCount: 2
12 |     storageDataPath: "/vm-data"
13 |     storage:
14 |       volumeClaimTemplate:
15 |         spec:
16 |           resources:
17 |             requests:
18 |               storage: 10Gi
19 |   vmselect:
20 |     replicaCount: 1
21 |     cacheMountPath: "/select-cache"
22 |     storage:
23 |       volumeClaimTemplate:
24 |         spec:
25 |           resources:
26 |             requests:
27 |               storage: 2Gi
28 |   vminsert:
29 |     replicaCount: 2
30 |     resources:
31 |       limits:
32 |         cpu: "1"
33 |         memory: "500Mi"
34 | 


--------------------------------------------------------------------------------
/clusters/home-cluster/monitoring/prometheus.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: source.toolkit.fluxcd.io/v1
 2 | kind: HelmRepository
 3 | metadata:
 4 |   name: prometheus
 5 |   namespace: flux-system
 6 | spec:
 7 |   interval: 1m
 8 |   url: https://prometheus-community.github.io/helm-charts
 9 | ---
10 | apiVersion: helm.toolkit.fluxcd.io/v2
11 | kind: HelmRelease
12 | metadata:
13 |   name: prom-operator
14 |   namespace: monitoring
15 | spec:
16 |   interval: 10m
17 |   install: 
18 |     createNamespace: true
19 |   chart:
20 |     spec:
21 |       chart: kube-prometheus-stack
22 |       version: '60.0.1'
23 |       sourceRef:
24 |         kind: HelmRepository
25 |         name: prometheus
26 |         namespace: flux-system
27 |       interval: 1m
28 | 


--------------------------------------------------------------------------------
/clusters/home-cluster/monitoring/user.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: operator.victoriametrics.com/v1beta1
 2 | kind: VMUser
 3 | metadata:
 4 |   name: demo
 5 |   namespace: monitoring
 6 | spec:
 7 |     name: demo
 8 |     username: demo
 9 |     generatePassword: true
10 |     targetRefs:
11 |       # vmui + vmselect
12 |       - crd:
13 |           kind: VMCluster/vmselect
14 |           name: cluster-1
15 |           namespace: monitoring
16 |         target_path_suffix: "/select/0"
17 |         paths:
18 |           - "/vmui"
19 |           - "/vmui/.*"
20 |           - "/prometheus/api/v1/query"
21 |           - "/prometheus/api/v1/query_range"
22 |           - "/prometheus/api/v1/series"
23 |           - "/prometheus/api/v1/status/.*"
24 |           - "/prometheus/api/v1/label/"
25 |           - "/prometheus/api/v1/label/[^/]+/values"
26 | 


--------------------------------------------------------------------------------
/clusters/home-cluster/monitoring/vm.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: source.toolkit.fluxcd.io/v1
 2 | kind: HelmRepository
 3 | metadata:
 4 |   name: vm
 5 |   namespace: flux-system
 6 | spec:
 7 |   interval: 1m
 8 |   url: https://victoriametrics.github.io/helm-charts
 9 | ---
10 | apiVersion: helm.toolkit.fluxcd.io/v2
11 | kind: HelmRelease
12 | metadata:
13 |   name: vm-operator
14 |   namespace: vm-operator
15 | spec:
16 |   interval: 10m
17 |   install: 
18 |     createNamespace: true
19 |   chart:
20 |     spec:
21 |       chart: victoria-metrics-operator
22 |       version: '0.39.1'
23 |       sourceRef:
24 |         kind: HelmRepository
25 |         name: vm
26 |         namespace: flux-system
27 |       interval: 1m
28 |   values:
29 |     crds:
30 |       enabled: false
31 |     operator:
32 |       disable_prometheus_converter: true
33 | 


--------------------------------------------------------------------------------
/clusters/home-cluster/monitoring/vmauth.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: operator.victoriametrics.com/v1beta1
2 | kind: VMAuth
3 | metadata:
4 |   name: k8svmauth
5 |   namespace: monitoring
6 | spec:
7 |   selectAllByDefault: true
8 | 


--------------------------------------------------------------------------------
/clusters/home-cluster/mysql/mysql.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: source.toolkit.fluxcd.io/v1beta2
 2 | kind: OCIRepository
 3 | metadata:
 4 |   name: mysql
 5 |   namespace: flux-system
 6 | spec:
 7 |   interval: 10m
 8 |   url: oci://registry-1.docker.io/bitnamicharts/mysql
 9 |   ref:
10 |     semver: ">8.4.3"
11 | ---
12 | apiVersion: helm.toolkit.fluxcd.io/v2
13 | kind: HelmRelease
14 | metadata:
15 |   name: mysql
16 |   namespace: mysql
17 | spec:
18 |   interval: 10m
19 |   releaseName: mysql
20 |   chartRef:
21 |     kind: OCIRepository
22 |     name: mysql
23 |     namespace: flux-system
24 | 


--------------------------------------------------------------------------------
/clusters/home-cluster/nats/nats.yaml:
--------------------------------------------------------------------------------
 1 | # apiVersion: source.toolkit.fluxcd.io/v1
 2 | # kind: HelmRepository
 3 | # metadata:
 4 | #   name:  nats
 5 | #   namespace: flux-system
 6 | # spec:
 7 | #   interval: 1m
 8 | #   url: https://nats-io.github.io/k8s/helm/charts
 9 | # ---
10 | # apiVersion: helm.toolkit.fluxcd.io/v2
11 | # kind: HelmRelease
12 | # metadata:
13 | #   name:  nats
14 | #   namespace: nats
15 | # spec:
16 | #   interval: 10m
17 | #   install:
18 | #     createNamespace: true
19 | #   chart:
20 | #     spec:
21 | #       chart:  nats
22 | #       version: "*"
23 | #       sourceRef:
24 | #         kind: HelmRepository
25 | #         name:  nats
26 | #         namespace: flux-system
27 | #       interval: 1m
28 | #   values: 
29 | #     config:
30 | #       cluster:
31 | #         enabled: true
32 | #       jetstream:
33 | #         enabled: true
34 | 


--------------------------------------------------------------------------------
/clusters/home-cluster/nvidia-gpu/gpu-operator.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: source.toolkit.fluxcd.io/v1
 2 | kind: HelmRepository
 3 | metadata:
 4 |   name: nvdia-gpu-operator
 5 |   namespace: flux-system
 6 | spec:
 7 |   interval: 1m
 8 |   url: https://helm.ngc.nvidia.com/nvidia/
 9 | ---
10 | apiVersion: helm.toolkit.fluxcd.io/v2
11 | kind: HelmRelease
12 | metadata:
13 |   name: nvdia-gpu-operator
14 |   namespace: nvidia
15 | spec:
16 |   interval: 10m
17 |   chart:
18 |     spec:
19 |       chart: gpu-operator
20 |       version: 'v24.3.0'
21 |       sourceRef:
22 |         kind: HelmRepository
23 |         name: nvdia-gpu-operator
24 |         namespace: flux-system
25 |       interval: 1m
26 |   values:
27 |     driver:
28 |       enabled: true
29 |       version: "550.90.07"
30 | 


--------------------------------------------------------------------------------
/clusters/home-cluster/ollama/ollama-server.yaml:
--------------------------------------------------------------------------------
 1 | # apiVersion: source.toolkit.fluxcd.io/v1
 2 | # kind: HelmRepository
 3 | # metadata:
 4 | #   name: ollama-helm
 5 | #   namespace: flux-system
 6 | # spec:
 7 | #   interval: 1m
 8 | #   url: https://helm.openwebui.com/
 9 | # ---
10 | # apiVersion: helm.toolkit.fluxcd.io/v2
11 | # kind: HelmRelease
12 | # metadata:
13 | #   name: ollama-server
14 | #   namespace: ollama
15 | # spec:
16 | #   interval: 10m
17 | #   chart:
18 | #     spec:
19 | #       chart: open-webui
20 | #       version: '2.1.0'
21 | #       sourceRef:
22 | #         kind: HelmRepository
23 | #         name: ollama-helm
24 | #         namespace: flux-system
25 | #       interval: 1m
26 | #   values:
27 | #     ollamaUrls:
28 | #     - 'http://ollama-server:11434'
29 | #     resources:
30 | #       requests:
31 | #         cpu: "500m"
32 | #         memory: "1Gi"
33 | #       limits:
34 | #         cpu: "1000m"
35 | #         memory: "4Gi"
36 | #     ollama:
37 | #       # -- Automatically install Ollama Helm chart from https://otwld.github.io/ollama-helm/. Use [Helm Values](https://github.com/otwld/ollama-helm/#helm-values) to configure
38 | #       enabled: true
39 | #       image:
40 | #         tag: "0.1.42"
41 | #       resources:
42 | #         requests:
43 | #           cpu: "4000m"
44 | #           memory: "4Gi"
45 | #         limits:
46 | #           cpu: "5000m"
47 | #           memory: "34Gi"
48 | #       replicaCount: 1
49 | #       ollama:
50 | #         gpu:
51 | #           enabled: true
52 | #           type: 'nvidia'
53 | #           number: 1
54 | #         models:
55 | #         - codellama:13b
56 | #       autoscaling:
57 | #         enabled: false
58 | #         minReplicas: 3
59 | #         maxReplicas: 4
60 | #         targetCPUUtilizationPercentage: 50
61 | #       persistentVolume:
62 | #         enabled: true
63 | #         accessModes:
64 | #         - ReadWriteOnce
65 | #         size: 50Gi
66 | #         storageClass: "ceph-block-ssd"
67 | 


--------------------------------------------------------------------------------
/clusters/home-cluster/prestodb/presto.yaml:
--------------------------------------------------------------------------------
 1 | # apiVersion: source.toolkit.fluxcd.io/v1
 2 | # kind: HelmRepository
 3 | # metadata:
 4 | #   name:  presto
 5 | #   namespace: flux-system
 6 | # spec:
 7 | #   interval: 1m
 8 | #   url: https://prestodb.github.io/presto-helm-charts
 9 | # ---
10 | # apiVersion: helm.toolkit.fluxcd.io/v2
11 | # kind: HelmRelease
12 | # metadata:
13 | #   name:  presto
14 | #   namespace: presto
15 | # spec:
16 | #   interval: 10m
17 | #   install:
18 | #     createNamespace: true
19 | #   chart:
20 | #     spec:
21 | #       chart:  presto
22 | #       version: "*"
23 | #       sourceRef:
24 | #         kind: HelmRepository
25 | #         name:  presto
26 | #         namespace: flux-system
27 | #       interval: 1m
28 | 


--------------------------------------------------------------------------------
/clusters/home-cluster/rook-ceph-system/rook-ceph-cluster.yaml:
--------------------------------------------------------------------------------
  1 | apiVersion: helm.toolkit.fluxcd.io/v2
  2 | kind: HelmRelease
  3 | metadata:
  4 |   name: rook-ceph-cluster
  5 |   namespace: rook-system
  6 | spec:
  7 |   interval: 10m
  8 |   install: 
  9 |     createNamespace: true
 10 |   chart:
 11 |     spec:
 12 |       chart: rook-ceph-cluster
 13 |       version: 'v1.14.5'
 14 |       sourceRef:
 15 |         kind: HelmRepository
 16 |         name: rook
 17 |         namespace: flux-system
 18 |       interval: 1m
 19 |   values:
 20 |     # Default values for a single rook-ceph cluster
 21 |     # This is a YAML-formatted file.
 22 |     # Declare variables to be passed into your templates.
 23 | 
 24 |     # -- Namespace of the main rook operator
 25 |     operatorNamespace: rook-system
 26 | 
 27 |     # -- The metadata.name of the CephCluster CR
 28 |     # @default -- The same as the namespace
 29 |     clusterName:
 30 | 
 31 |     # -- Optional override of the target kubernetes version
 32 |     kubeVersion:
 33 | 
 34 |     # -- Cluster ceph.conf override
 35 |     configOverride: |
 36 |       [global]
 37 |       osd_pool_default_size = 2
 38 |       osd_pool_default_min_size = 2
 39 | 
 40 |     # Installs a debugging toolbox deployment
 41 |     toolbox:
 42 |       # -- Enable Ceph debugging pod deployment. See [toolbox](../Troubleshooting/ceph-toolbox.md)
 43 |       enabled: true
 44 |       # -- Toolbox image, defaults to the image used by the Ceph cluster
 45 |       image: #quay.io/ceph/ceph:v18.2.2
 46 |       # -- Toolbox tolerations
 47 |       tolerations: []
 48 |       # -- Toolbox affinity
 49 |       affinity: {}
 50 |       # -- Toolbox container security context
 51 |       containerSecurityContext:
 52 |         runAsNonRoot: true
 53 |         runAsUser: 2016
 54 |         runAsGroup: 2016
 55 |         capabilities:
 56 |           drop: ["ALL"]
 57 |       # -- Toolbox resources
 58 |       resources:
 59 |         limits:
 60 |           memory: "1Gi"
 61 |         requests:
 62 |           cpu: "100m"
 63 |           memory: "128Mi"
 64 |       # -- Set the priority class for the toolbox if desired
 65 |       priorityClassName:
 66 | 
 67 |     monitoring:
 68 |       # -- Enable Prometheus integration, will also create necessary RBAC rules to allow Operator to create ServiceMonitors.
 69 |       # Monitoring requires Prometheus to be pre-installed
 70 |       enabled: false
 71 |       # -- Whether to create the Prometheus rules for Ceph alerts
 72 |       createPrometheusRules: true
 73 |       # -- The namespace in which to create the prometheus rules, if different from the rook cluster namespace.
 74 |       # If you have multiple rook-ceph clusters in the same k8s cluster, choose the same namespace (ideally, namespace with prometheus
 75 |       # deployed) to set rulesNamespaceOverride for all the clusters. Otherwise, you will get duplicate alerts with multiple alert definitions.
 76 |       rulesNamespaceOverride:
 77 |       # Monitoring settings for external clusters:
 78 |       # externalMgrEndpoints: <list of endpoints>
 79 |       # externalMgrPrometheusPort: <port>
 80 |       # Scrape interval for prometheus
 81 |       # interval: 10s
 82 |       # allow adding custom labels and annotations to the prometheus rule
 83 |       prometheusRule:
 84 |         # -- Labels applied to PrometheusRule
 85 |         labels:
 86 |           release: prom-operator
 87 |         # -- Annotations applied to PrometheusRule
 88 |         annotations: {}
 89 | 
 90 |     # -- Create & use PSP resources. Set this to the same value as the rook-ceph chart.
 91 |     pspEnable: false
 92 | 
 93 |     # imagePullSecrets option allow to pull docker images from private docker registry. Option will be passed to all service accounts.
 94 |     # imagePullSecrets:
 95 |     # - name: my-registry-secret
 96 | 
 97 |     # All values below are taken from the CephCluster CRD
 98 |     # -- Cluster configuration.
 99 |     # @default -- See [below](#ceph-cluster-spec)
100 |     cephClusterSpec:
101 |       # This cluster spec example is for a converged cluster where all the Ceph daemons are running locally,
102 |       # as in the host-based example (cluster.yaml). For a different configuration such as a
103 |       # PVC-based cluster (cluster-on-pvc.yaml), external cluster (cluster-external.yaml),
104 |       # or stretch cluster (cluster-stretched.yaml), replace this entire `cephClusterSpec`
105 |       # with the specs from those examples.
106 | 
107 |       # For more details, check https://rook.io/docs/rook/v1.10/CRDs/Cluster/ceph-cluster-crd/
108 |       cephVersion:
109 |         # The container image used to launch the Ceph daemon pods (mon, mgr, osd, mds, rgw).
110 |         # v17 is Quincy, v18 is Reef.
111 |         # RECOMMENDATION: In production, use a specific version tag instead of the general v18 flag, which pulls the latest release and could result in different
112 |         # versions running within the cluster. See tags available at https://hub.docker.com/r/ceph/ceph/tags/.
113 |         # If you want to be more precise, you can always use a timestamp tag such as quay.io/ceph/ceph:v18.2.2-20240311
114 |         # This tag might not contain a new Ceph version, just security fixes from the underlying operating system, which will reduce vulnerabilities
115 |         image: quay.io/ceph/ceph:v18.2.2
116 |         # Whether to allow unsupported versions of Ceph. Currently `quincy`, and `reef` are supported.
117 |         # Future versions such as `squid` (v19) would require this to be set to `true`.
118 |         # Do not set to true in production.
119 |         allowUnsupported: false
120 | 
121 |       # The path on the host where configuration files will be persisted. Must be specified.
122 |       # Important: if you reinstall the cluster, make sure you delete this directory from each host or else the mons will fail to start on the new cluster.
123 |       # In Minikube, the '/data' directory is configured to persist across reboots. Use "/data/rook" in Minikube environment.
124 |       dataDirHostPath: /var/lib/rook
125 | 
126 |       # Whether or not upgrade should continue even if a check fails
127 |       # This means Ceph's status could be degraded and we don't recommend upgrading but you might decide otherwise
128 |       # Use at your OWN risk
129 |       # To understand Rook's upgrade process of Ceph, read https://rook.io/docs/rook/v1.10/Upgrade/ceph-upgrade/
130 |       skipUpgradeChecks: false
131 | 
132 |       # Whether or not continue if PGs are not clean during an upgrade
133 |       continueUpgradeAfterChecksEvenIfNotHealthy: false
134 | 
135 |       # WaitTimeoutForHealthyOSDInMinutes defines the time (in minutes) the operator would wait before an OSD can be stopped for upgrade or restart.
136 |       # If the timeout exceeds and OSD is not ok to stop, then the operator would skip upgrade for the current OSD and proceed with the next one
137 |       # if `continueUpgradeAfterChecksEvenIfNotHealthy` is `false`. If `continueUpgradeAfterChecksEvenIfNotHealthy` is `true`, then operator would
138 |       # continue with the upgrade of an OSD even if its not ok to stop after the timeout. This timeout won't be applied if `skipUpgradeChecks` is `true`.
139 |       # The default wait timeout is 10 minutes.
140 |       waitTimeoutForHealthyOSDInMinutes: 10
141 | 
142 |       # Whether or not requires PGs are clean before an OSD upgrade. If set to `true` OSD upgrade process won't start until PGs are healthy.
143 |       # This configuration will be ignored if `skipUpgradeChecks` is `true`.
144 |       # Default is false.
145 |       upgradeOSDRequiresHealthyPGs: false
146 | 
147 |       mon:
148 |         # Set the number of mons to be started. Generally recommended to be 3.
149 |         # For highest availability, an odd number of mons should be specified.
150 |         count: 2
151 |         # The mons should be on unique nodes. For production, at least 3 nodes are recommended for this reason.
152 |         # Mons should only be allowed on the same node for test environments where data loss is acceptable.
153 |         allowMultiplePerNode: false
154 | 
155 |       mgr:
156 |         # When higher availability of the mgr is needed, increase the count to 2.
157 |         # In that case, one mgr will be active and one in standby. When Ceph updates which
158 |         # mgr is active, Rook will update the mgr services to match the active mgr.
159 |         count: 2
160 |         allowMultiplePerNode: false
161 |         modules:
162 |           # List of modules to optionally enable or disable.
163 |           # Note the "dashboard" and "monitoring" modules are already configured by other settings in the cluster CR.
164 |           # - name: rook
165 |           #   enabled: true
166 | 
167 |       # enable the ceph dashboard for viewing cluster status
168 |       dashboard:
169 |         enabled: true
170 |         # serve the dashboard under a subpath (useful when you are accessing the dashboard via a reverse proxy)
171 |         # urlPrefix: /ceph-dashboard
172 |         # serve the dashboard at the given port.
173 |         # port: 8443
174 |         # Serve the dashboard using SSL (if using ingress to expose the dashboard and `ssl: true` you need to set
175 |         # the corresponding "backend protocol" annotation(s) for your ingress controller of choice)
176 |         ssl: false
177 | 
178 |       # Network configuration, see: https://github.com/rook/rook/blob/master/Documentation/CRDs/Cluster/ceph-cluster-crd.md#network-configuration-settings
179 |       network:
180 |         connections:
181 |           # Whether to encrypt the data in transit across the wire to prevent eavesdropping the data on the network.
182 |           # The default is false. When encryption is enabled, all communication between clients and Ceph daemons, or between Ceph daemons will be encrypted.
183 |           # When encryption is not enabled, clients still establish a strong initial authentication and data integrity is still validated with a crc check.
184 |           # IMPORTANT: Encryption requires the 5.11 kernel for the latest nbd and cephfs drivers. Alternatively for testing only,
185 |           # you can set the "mounter: rbd-nbd" in the rbd storage class, or "mounter: fuse" in the cephfs storage class.
186 |           # The nbd and fuse drivers are *not* recommended in production since restarting the csi driver pod will disconnect the volumes.
187 |           encryption:
188 |             enabled: false
189 |           # Whether to compress the data in transit across the wire. The default is false.
190 |           # Requires Ceph Quincy (v17) or newer. Also see the kernel requirements above for encryption.
191 |           compression:
192 |             enabled: false
193 |           # Whether to require communication over msgr2. If true, the msgr v1 port (6789) will be disabled
194 |           # and clients will be required to connect to the Ceph cluster with the v2 port (3300).
195 |           # Requires a kernel that supports msgr v2 (kernel 5.11 or CentOS 8.4 or newer).
196 |           requireMsgr2: false
197 |       #   # enable host networking
198 |       #   provider: host
199 |       #   # EXPERIMENTAL: enable the Multus network provider
200 |       #   provider: multus
201 |       #   selectors:
202 |       #     # The selector keys are required to be `public` and `cluster`.
203 |       #     # Based on the configuration, the operator will do the following:
204 |       #     #   1. if only the `public` selector key is specified both public_network and cluster_network Ceph settings will listen on that interface
205 |       #     #   2. if both `public` and `cluster` selector keys are specified the first one will point to 'public_network' flag and the second one to 'cluster_network'
206 |       #     #
207 |       #     # In order to work, each selector value must match a NetworkAttachmentDefinition object in Multus
208 |       #     #
209 |       #     # public: public-conf --> NetworkAttachmentDefinition object name in Multus
210 |       #     # cluster: cluster-conf --> NetworkAttachmentDefinition object name in Multus
211 |       #   # Provide internet protocol version. IPv6, IPv4 or empty string are valid options. Empty string would mean IPv4
212 |       #   ipFamily: "IPv6"
213 |       #   # Ceph daemons to listen on both IPv4 and Ipv6 networks
214 |       #   dualStack: false
215 | 
216 |       # enable the crash collector for ceph daemon crash collection
217 |       crashCollector:
218 |         disable: false
219 |         # Uncomment daysToRetain to prune ceph crash entries older than the
220 |         # specified number of days.
221 |         # daysToRetain: 30
222 | 
223 |       # enable log collector, daemons will log on files and rotate
224 |       logCollector:
225 |         enabled: true
226 |         periodicity: daily # one of: hourly, daily, weekly, monthly
227 |         maxLogSize: 500M # SUFFIX may be 'M' or 'G'. Must be at least 1M.
228 | 
229 |       # automate [data cleanup process](https://github.com/rook/rook/blob/master/Documentation/Storage-Configuration/ceph-teardown.md#delete-the-data-on-hosts) in cluster destruction.
230 |       cleanupPolicy:
231 |         # Since cluster cleanup is destructive to data, confirmation is required.
232 |         # To destroy all Rook data on hosts during uninstall, confirmation must be set to "yes-really-destroy-data".
233 |         # This value should only be set when the cluster is about to be deleted. After the confirmation is set,
234 |         # Rook will immediately stop configuring the cluster and only wait for the delete command.
235 |         # If the empty string is set, Rook will not destroy any data on hosts during uninstall.
236 |         confirmation: ""
237 |         # sanitizeDisks represents settings for sanitizing OSD disks on cluster deletion
238 |         sanitizeDisks:
239 |           # method indicates if the entire disk should be sanitized or simply ceph's metadata
240 |           # in both case, re-install is possible
241 |           # possible choices are 'complete' or 'quick' (default)
242 |           method: quick
243 |           # dataSource indicate where to get random bytes from to write on the disk
244 |           # possible choices are 'zero' (default) or 'random'
245 |           # using random sources will consume entropy from the system and will take much more time then the zero source
246 |           dataSource: zero
247 |           # iteration overwrite N times instead of the default (1)
248 |           # takes an integer value
249 |           iteration: 1
250 |         # allowUninstallWithVolumes defines how the uninstall should be performed
251 |         # If set to true, cephCluster deletion does not wait for the PVs to be deleted.
252 |         allowUninstallWithVolumes: false
253 | 
254 |       # To control where various services will be scheduled by kubernetes, use the placement configuration sections below.
255 |       # The example under 'all' would have all services scheduled on kubernetes nodes labeled with 'role=storage-node' and
256 |       # tolerate taints with a key of 'storage-node'.
257 |       # placement:
258 |       #   all:
259 |       #     nodeAffinity:
260 |       #       requiredDuringSchedulingIgnoredDuringExecution:
261 |       #         nodeSelectorTerms:
262 |       #           - matchExpressions:
263 |       #             - key: role
264 |       #               operator: In
265 |       #               values:
266 |       #               - storage-node
267 |       #     podAffinity:
268 |       #     podAntiAffinity:
269 |       #     topologySpreadConstraints:
270 |       #     tolerations:
271 |       #     - key: storage-node
272 |       #       operator: Exists
273 |       #   # The above placement information can also be specified for mon, osd, and mgr components
274 |       #   mon:
275 |       #   # Monitor deployments may contain an anti-affinity rule for avoiding monitor
276 |       #   # collocation on the same node. This is a required rule when host network is used
277 |       #   # or when AllowMultiplePerNode is false. Otherwise this anti-affinity rule is a
278 |       #   # preferred rule with weight: 50.
279 |       #   osd:
280 |       #   mgr:
281 |       #   cleanup:
282 | 
283 |       # annotations:
284 |       #   all:
285 |       #   mon:
286 |       #   osd:
287 |       #   cleanup:
288 |       #   prepareosd:
289 |       #   # If no mgr annotations are set, prometheus scrape annotations will be set by default.
290 |       #   mgr:
291 |       #   dashboard:
292 | 
293 |       # labels:
294 |       #   all:
295 |       #   mon:
296 |       #   osd:
297 |       #   cleanup:
298 |       #   mgr:
299 |       #   prepareosd:
300 |       #   # monitoring is a list of key-value pairs. It is injected into all the monitoring resources created by operator.
301 |       #   # These labels can be passed as LabelSelector to Prometheus
302 |       #   monitoring:
303 |       #   dashboard:
304 | 
305 |       resources:
306 |         mgr:
307 |           limits:
308 |             memory: "1Gi"
309 |           requests:
310 |             cpu: "100m"
311 |             memory: "512Mi"
312 |         mon:
313 |           limits:
314 |             memory: "2Gi"
315 |           requests:
316 |             cpu: "100m"
317 |             memory: "512Mi"
318 |         osd:
319 |           limits:
320 |             memory: "4Gi"
321 |           requests:
322 |             cpu: "100m"
323 |             memory: "512Mi"
324 |         prepareosd:
325 |           # limits: It is not recommended to set limits on the OSD prepare job
326 |           #         since it's a one-time burst for memory that must be allowed to
327 |           #         complete without an OOM kill.  Note however that if a k8s
328 |           #         limitRange guardrail is defined external to Rook, the lack of
329 |           #         a limit here may result in a sync failure, in which case a
330 |           #         limit should be added.  1200Mi may suffice for up to 15Ti
331 |           #         OSDs ; for larger devices 2Gi may be required.
332 |           #         cf. https://github.com/rook/rook/pull/11103
333 |           requests:
334 |             cpu: "500m"
335 |             memory: "50Mi"
336 |         mgr-sidecar:
337 |           limits:
338 |             memory: "100Mi"
339 |           requests:
340 |             cpu: "100m"
341 |             memory: "40Mi"
342 |         crashcollector:
343 |           limits:
344 |             memory: "60Mi"
345 |           requests:
346 |             cpu: "100m"
347 |             memory: "60Mi"
348 |         logcollector:
349 |           limits:
350 |             memory: "1Gi"
351 |           requests:
352 |             cpu: "100m"
353 |             memory: "100Mi"
354 |         cleanup:
355 |           limits:
356 |             memory: "1Gi"
357 |           requests:
358 |             cpu: "500m"
359 |             memory: "100Mi"
360 |         exporter:
361 |           limits:
362 |             memory: "128Mi"
363 |           requests:
364 |             cpu: "50m"
365 |             memory: "50Mi"
366 | 
367 |       # The option to automatically remove OSDs that are out and are safe to destroy.
368 |       removeOSDsIfOutAndSafeToRemove: false
369 | 
370 |       # priority classes to apply to ceph resources
371 |       priorityClassNames:
372 |         mon: system-node-critical
373 |         osd: system-node-critical
374 |         mgr: system-cluster-critical
375 | 
376 |       storage: # cluster level storage configuration and selection
377 |         useAllNodes: true
378 |         useAllDevices: true
379 |         # deviceFilter:
380 |         # config:
381 |         #   crushRoot: "custom-root" # specify a non-default root label for the CRUSH map
382 |         #   metadataDevice: "md0" # specify a non-rotational storage so ceph-volume will use it as block db device of bluestore.
383 |         #   databaseSizeMB: "1024" # uncomment if the disks are smaller than 100 GB
384 |         #   osdsPerDevice: "1" # this value can be overridden at the node or device level
385 |         #   encryptedDevice: "true" # the default value for this option is "false"
386 |         # # Individual nodes and their config can be specified as well, but 'useAllNodes' above must be set to false. Then, only the named
387 |         # # nodes below will be used as storage resources. Each node's 'name' field should match their 'kubernetes.io/hostname' label.
388 |         # nodes:
389 |         #   - name: "172.17.4.201"
390 |         #     devices: # specific devices to use for storage can be specified for each node
391 |         #       - name: "sdb"
392 |         #       - name: "nvme01" # multiple osds can be created on high performance devices
393 |         #         config:
394 |         #           osdsPerDevice: "5"
395 |         #       - name: "/dev/disk/by-id/ata-ST4000DM004-XXXX" # devices can be specified using full udev paths
396 |         #     config: # configuration can be specified at the node level which overrides the cluster level config
397 |         #   - name: "172.17.4.301"
398 |         #     deviceFilter: "^sd."
399 | 
400 |       # The section for configuring management of daemon disruptions during upgrade or fencing.
401 |       disruptionManagement:
402 |         # If true, the operator will create and manage PodDisruptionBudgets for OSD, Mon, RGW, and MDS daemons. OSD PDBs are managed dynamically
403 |         # via the strategy outlined in the [design](https://github.com/rook/rook/blob/master/design/ceph/ceph-managed-disruptionbudgets.md). The operator will
404 |         # block eviction of OSDs by default and unblock them safely when drains are detected.
405 |         managePodBudgets: true
406 |         # A duration in minutes that determines how long an entire failureDomain like `region/zone/host` will be held in `noout` (in addition to the
407 |         # default DOWN/OUT interval) when it is draining. This is only relevant when  `managePodBudgets` is `true`. The default value is `30` minutes.
408 |         osdMaintenanceTimeout: 30
409 |         # A duration in minutes that the operator will wait for the placement groups to become healthy (active+clean) after a drain was completed and OSDs came back up.
410 |         # Operator will continue with the next drain if the timeout exceeds. It only works if `managePodBudgets` is `true`.
411 |         # No values or 0 means that the operator will wait until the placement groups are healthy before unblocking the next drain.
412 |         pgHealthCheckTimeout: 0
413 | 
414 |       # Configure the healthcheck and liveness probes for ceph pods.
415 |       # Valid values for daemons are 'mon', 'osd', 'status'
416 |       healthCheck:
417 |         daemonHealth:
418 |           mon:
419 |             disabled: false
420 |             interval: 45s
421 |           osd:
422 |             disabled: false
423 |             interval: 60s
424 |           status:
425 |             disabled: false
426 |             interval: 60s
427 |         # Change pod liveness probe, it works for all mon, mgr, and osd pods.
428 |         livenessProbe:
429 |           mon:
430 |             disabled: false
431 |           mgr:
432 |             disabled: false
433 |           osd:
434 |             disabled: false
435 | 
436 |     ingress:
437 |       # -- Enable an ingress for the ceph-dashboard
438 |       dashboard:
439 |         {}
440 |         # annotations:
441 |         #   external-dns.alpha.kubernetes.io/hostname: dashboard.example.com
442 |         #   nginx.ingress.kubernetes.io/rewrite-target: /ceph-dashboard/$2
443 |         # If the dashboard has ssl: true the following will make sure the NGINX Ingress controller can expose the dashboard correctly
444 |         #   nginx.ingress.kubernetes.io/backend-protocol: "HTTPS"
445 |         #   nginx.ingress.kubernetes.io/server-snippet: |
446 |         #     proxy_ssl_verify off;
447 |         # host:
448 |         #   name: dashboard.example.com
449 |         #   path: "/ceph-dashboard(/|$)(.*)"
450 |         # tls:
451 |         # - hosts:
452 |         #     - dashboard.example.com
453 |         #   secretName: testsecret-tls
454 |         ## Note: Only one of ingress class annotation or the `ingressClassName:` can be used at a time
455 |         ## to set the ingress class
456 |         # ingressClassName: nginx
457 | 
458 |     # -- A list of CephBlockPool configurations to deploy
459 |     # @default -- See [below](#ceph-block-pools)
460 |     cephBlockPools:
461 |       - name: ceph-blockpool
462 |         # see https://github.com/rook/rook/blob/master/Documentation/CRDs/Block-Storage/ceph-block-pool-crd.md#spec for available configuration
463 |         spec:
464 |           deviceClass: hdd
465 |           failureDomain: host
466 |           replicated:
467 |             size: 2
468 |           # Enables collecting RBD per-image IO statistics by enabling dynamic OSD performance counters. Defaults to false.
469 |           # For reference: https://docs.ceph.com/docs/latest/mgr/prometheus/#rbd-io-statistics
470 |           # enableRBDStats: true
471 |         storageClass:
472 |           enabled: true
473 |           name: ceph-block
474 |           isDefault: true
475 |           reclaimPolicy: Delete
476 |           allowVolumeExpansion: true
477 |           volumeBindingMode: "Immediate"
478 |           mountOptions: []
479 |           # see https://kubernetes.io/docs/concepts/storage/storage-classes/#allowed-topologies
480 |           allowedTopologies: []
481 |           #        - matchLabelExpressions:
482 |           #            - key: rook-ceph-role
483 |           #              values:
484 |           #                - storage-node
485 |           # see https://github.com/rook/rook/blob/master/Documentation/Storage-Configuration/Block-Storage-RBD/block-storage.md#provision-storage for available configuration
486 |           parameters:
487 |             # (optional) mapOptions is a comma-separated list of map options.
488 |             # For krbd options refer
489 |             # https://docs.ceph.com/docs/latest/man/8/rbd/#kernel-rbd-krbd-options
490 |             # For nbd options refer
491 |             # https://docs.ceph.com/docs/latest/man/8/rbd-nbd/#options
492 |             # mapOptions: lock_on_read,queue_depth=1024
493 | 
494 |             # (optional) unmapOptions is a comma-separated list of unmap options.
495 |             # For krbd options refer
496 |             # https://docs.ceph.com/docs/latest/man/8/rbd/#kernel-rbd-krbd-options
497 |             # For nbd options refer
498 |             # https://docs.ceph.com/docs/latest/man/8/rbd-nbd/#options
499 |             # unmapOptions: force
500 | 
501 |             # RBD image format. Defaults to "2".
502 |             imageFormat: "2"
503 | 
504 |             # RBD image features, equivalent to OR'd bitfield value: 63
505 |             # Available for imageFormat: "2". Older releases of CSI RBD
506 |             # support only the `layering` feature. The Linux kernel (KRBD) supports the
507 |             # full feature complement as of 5.4
508 |             imageFeatures: layering
509 | 
510 |             # These secrets contain Ceph admin credentials.
511 |             csi.storage.k8s.io/provisioner-secret-name: rook-csi-rbd-provisioner
512 |             csi.storage.k8s.io/provisioner-secret-namespace: "{{ .Release.Namespace }}"
513 |             csi.storage.k8s.io/controller-expand-secret-name: rook-csi-rbd-provisioner
514 |             csi.storage.k8s.io/controller-expand-secret-namespace: "{{ .Release.Namespace }}"
515 |             csi.storage.k8s.io/node-stage-secret-name: rook-csi-rbd-node
516 |             csi.storage.k8s.io/node-stage-secret-namespace: "{{ .Release.Namespace }}"
517 |             # Specify the filesystem type of the volume. If not specified, csi-provisioner
518 |             # will set default as `ext4`. Note that `xfs` is not recommended due to potential deadlock
519 |             # in hyperconverged settings where the volume is mounted on the same node as the osds.
520 |             csi.storage.k8s.io/fstype: ext4
521 |       - name: ceph-blockpool-ssd
522 |         # see https://github.com/rook/rook/blob/master/Documentation/CRDs/Block-Storage/ceph-block-pool-crd.md#spec for available configuration
523 |         spec:
524 |           deviceClass: ssd
525 |           failureDomain: host
526 |           replicated:
527 |             size: 2
528 |           # Enables collecting RBD per-image IO statistics by enabling dynamic OSD performance counters. Defaults to false.
529 |           # For reference: https://docs.ceph.com/docs/latest/mgr/prometheus/#rbd-io-statistics
530 |           # enableRBDStats: true
531 |         storageClass:
532 |           enabled: true
533 |           name: ceph-block-ssd
534 |           isDefault: false
535 |           reclaimPolicy: Delete
536 |           allowVolumeExpansion: true
537 |           volumeBindingMode: "Immediate"
538 |           mountOptions: []
539 |           # see https://kubernetes.io/docs/concepts/storage/storage-classes/#allowed-topologies
540 |           allowedTopologies: []
541 |           #        - matchLabelExpressions:
542 |           #            - key: rook-ceph-role
543 |           #              values:
544 |           #                - storage-node
545 |           # see https://github.com/rook/rook/blob/master/Documentation/Storage-Configuration/Block-Storage-RBD/block-storage.md#provision-storage for available configuration
546 |           parameters:
547 |             # (optional) mapOptions is a comma-separated list of map options.
548 |             # For krbd options refer
549 |             # https://docs.ceph.com/docs/latest/man/8/rbd/#kernel-rbd-krbd-options
550 |             # For nbd options refer
551 |             # https://docs.ceph.com/docs/latest/man/8/rbd-nbd/#options
552 |             # mapOptions: lock_on_read,queue_depth=1024
553 | 
554 |             # (optional) unmapOptions is a comma-separated list of unmap options.
555 |             # For krbd options refer
556 |             # https://docs.ceph.com/docs/latest/man/8/rbd/#kernel-rbd-krbd-options
557 |             # For nbd options refer
558 |             # https://docs.ceph.com/docs/latest/man/8/rbd-nbd/#options
559 |             # unmapOptions: force
560 | 
561 |             # RBD image format. Defaults to "2".
562 |             imageFormat: "2"
563 | 
564 |             # RBD image features, equivalent to OR'd bitfield value: 63
565 |             # Available for imageFormat: "2". Older releases of CSI RBD
566 |             # support only the `layering` feature. The Linux kernel (KRBD) supports the
567 |             # full feature complement as of 5.4
568 |             imageFeatures: layering
569 | 
570 |             # These secrets contain Ceph admin credentials.
571 |             csi.storage.k8s.io/provisioner-secret-name: rook-csi-rbd-provisioner
572 |             csi.storage.k8s.io/provisioner-secret-namespace: "{{ .Release.Namespace }}"
573 |             csi.storage.k8s.io/controller-expand-secret-name: rook-csi-rbd-provisioner
574 |             csi.storage.k8s.io/controller-expand-secret-namespace: "{{ .Release.Namespace }}"
575 |             csi.storage.k8s.io/node-stage-secret-name: rook-csi-rbd-node
576 |             csi.storage.k8s.io/node-stage-secret-namespace: "{{ .Release.Namespace }}"
577 |             # Specify the filesystem type of the volume. If not specified, csi-provisioner
578 |             # will set default as `ext4`. Note that `xfs` is not recommended due to potential deadlock
579 |             # in hyperconverged settings where the volume is mounted on the same node as the osds.
580 |             csi.storage.k8s.io/fstype: ext4
581 | 
582 |     # -- A list of CephFileSystem configurations to deploy
583 |     # @default -- See [below](#ceph-file-systems)
584 |     cephFileSystems:
585 |       - name: ceph-filesystem
586 |         # see https://github.com/rook/rook/blob/master/Documentation/CRDs/Shared-Filesystem/ceph-filesystem-crd.md#filesystem-settings for available configuration
587 |         spec:
588 |           metadataPool:
589 |             replicated:
590 |               size: 2
591 |           dataPools:
592 |             - failureDomain: host
593 |               replicated:
594 |                 size: 2
595 |               # Optional and highly recommended, 'data0' by default, see https://github.com/rook/rook/blob/master/Documentation/CRDs/Shared-Filesystem/ceph-filesystem-crd.md#pools
596 |               name: data0
597 |           metadataServer:
598 |             activeCount: 1
599 |             activeStandby: true
600 |             resources:
601 |               limits:
602 |                 memory: "4Gi"
603 |               requests:
604 |                 cpu: "100m"
605 |                 memory: "512Mi"
606 |             priorityClassName: system-cluster-critical
607 |         storageClass:
608 |           enabled: true
609 |           isDefault: false
610 |           name: ceph-filesystem
611 |           # (Optional) specify a data pool to use, must be the name of one of the data pools above, 'data0' by default
612 |           pool: data0
613 |           reclaimPolicy: Delete
614 |           allowVolumeExpansion: true
615 |           volumeBindingMode: "Immediate"
616 |           mountOptions: []
617 |           # see https://github.com/rook/rook/blob/master/Documentation/Storage-Configuration/Shared-Filesystem-CephFS/filesystem-storage.md#provision-storage for available configuration
618 |           parameters:
619 |             # The secrets contain Ceph admin credentials.
620 |             csi.storage.k8s.io/provisioner-secret-name: rook-csi-cephfs-provisioner
621 |             csi.storage.k8s.io/provisioner-secret-namespace: "{{ .Release.Namespace }}"
622 |             csi.storage.k8s.io/controller-expand-secret-name: rook-csi-cephfs-provisioner
623 |             csi.storage.k8s.io/controller-expand-secret-namespace: "{{ .Release.Namespace }}"
624 |             csi.storage.k8s.io/node-stage-secret-name: rook-csi-cephfs-node
625 |             csi.storage.k8s.io/node-stage-secret-namespace: "{{ .Release.Namespace }}"
626 |             # Specify the filesystem type of the volume. If not specified, csi-provisioner
627 |             # will set default as `ext4`. Note that `xfs` is not recommended due to potential deadlock
628 |             # in hyperconverged settings where the volume is mounted on the same node as the osds.
629 |             csi.storage.k8s.io/fstype: ext4
630 | 
631 |     # -- Settings for the filesystem snapshot class
632 |     # @default -- See [CephFS Snapshots](../Storage-Configuration/Ceph-CSI/ceph-csi-snapshot.md#cephfs-snapshots)
633 |     cephFileSystemVolumeSnapshotClass:
634 |       enabled: false
635 |       name: ceph-filesystem
636 |       isDefault: true
637 |       deletionPolicy: Delete
638 |       annotations: {}
639 |       labels: {}
640 |       # see https://rook.io/docs/rook/v1.10/Storage-Configuration/Ceph-CSI/ceph-csi-snapshot/#cephfs-snapshots for available configuration
641 |       parameters: {}
642 | 
643 |     # -- Settings for the block pool snapshot class
644 |     # @default -- See [RBD Snapshots](../Storage-Configuration/Ceph-CSI/ceph-csi-snapshot.md#rbd-snapshots)
645 |     cephBlockPoolsVolumeSnapshotClass:
646 |       enabled: false
647 |       name: ceph-block
648 |       isDefault: false
649 |       deletionPolicy: Delete
650 |       annotations: {}
651 |       labels: {}
652 |       # see https://rook.io/docs/rook/v1.10/Storage-Configuration/Ceph-CSI/ceph-csi-snapshot/#rbd-snapshots for available configuration
653 |       parameters: {}
654 | 
655 |     # -- A list of CephObjectStore configurations to deploy
656 |     # @default -- See [below](#ceph-object-stores)
657 |     cephObjectStores: []
658 |       # - name: ceph-objectstore
659 |       #   # see https://github.com/rook/rook/blob/master/Documentation/CRDs/Object-Storage/ceph-object-store-crd.md#object-store-settings for available configuration
660 |       #   spec:
661 |       #     metadataPool:
662 |       #       failureDomain: host
663 |       #       replicated:
664 |       #         size: 2
665 |       #     dataPool:
666 |       #       failureDomain: host
667 |       #       replicated:
668 |       #         size: 2
669 |       #     preservePoolsOnDelete: true
670 |       #     gateway:
671 |       #       port: 80
672 |       #       resources:
673 |       #         limits:
674 |       #           memory: "2Gi"
675 |       #         requests:
676 |       #           cpu: "100m"
677 |       #           memory: "512Mi"
678 |       #       # securePort: 443
679 |       #       # sslCertificateRef:
680 |       #       instances: 1
681 |       #       priorityClassName: system-cluster-critical
682 |       #   storageClass:
683 |       #     enabled: true
684 |       #     name: ceph-bucket
685 |       #     reclaimPolicy: Delete
686 |       #     volumeBindingMode: "Immediate"
687 |       #     # see https://github.com/rook/rook/blob/master/Documentation/Storage-Configuration/Object-Storage-RGW/ceph-object-bucket-claim.md#storageclass for available configuration
688 |       #     parameters:
689 |       #       # note: objectStoreNamespace and objectStoreName are configured by the chart
690 |       #       region: us-east-1
691 |       #   ingress:
692 |       #     # Enable an ingress for the ceph-objectstore
693 |       #     enabled: false
694 |           # annotations: {}
695 |           # host:
696 |           #   name: objectstore.example.com
697 |           #   path: /
698 |           # tls:
699 |           # - hosts:
700 |           #     - objectstore.example.com
701 |           #   secretName: ceph-objectstore-tls
702 |           # ingressClassName: nginx
703 |     ## cephECBlockPools are disabled by default, please remove the comments and set desired values to enable it
704 |     ## For erasure coded a replicated metadata pool is required.
705 |     ## https://rook.io/docs/rook/latest/CRDs/Shared-Filesystem/ceph-filesystem-crd/#erasure-coded
706 |     #cephECBlockPools:
707 |     #  - name: ec-pool
708 |     #    spec:
709 |     #      metadataPool:
710 |     #        replicated:
711 |     #          size: 2
712 |     #      dataPool:
713 |     #        failureDomain: osd
714 |     #        erasureCoded:
715 |     #          dataChunks: 2
716 |     #          codingChunks: 1
717 |     #        deviceClass: hdd
718 |     #
719 |     #    parameters:
720 |     #      # clusterID is the namespace where the rook cluster is running
721 |     #      # If you change this namespace, also change the namespace below where the secret namespaces are defined
722 |     #      clusterID: rook-ceph # namespace:cluster
723 |     #      # (optional) mapOptions is a comma-separated list of map options.
724 |     #      # For krbd options refer
725 |     #      # https://docs.ceph.com/docs/latest/man/8/rbd/#kernel-rbd-krbd-options
726 |     #      # For nbd options refer
727 |     #      # https://docs.ceph.com/docs/latest/man/8/rbd-nbd/#options
728 |     #      # mapOptions: lock_on_read,queue_depth=1024
729 |     #
730 |     #      # (optional) unmapOptions is a comma-separated list of unmap options.
731 |     #      # For krbd options refer
732 |     #      # https://docs.ceph.com/docs/latest/man/8/rbd/#kernel-rbd-krbd-options
733 |     #      # For nbd options refer
734 |     #      # https://docs.ceph.com/docs/latest/man/8/rbd-nbd/#options
735 |     #      # unmapOptions: force
736 |     #
737 |     #      # RBD image format. Defaults to "2".
738 |     #      imageFormat: "2"
739 |     #
740 |     #      # RBD image features, equivalent to OR'd bitfield value: 63
741 |     #      # Available for imageFormat: "2". Older releases of CSI RBD
742 |     #      # support only the `layering` feature. The Linux kernel (KRBD) supports the
743 |     #      # full feature complement as of 5.4
744 |     #      # imageFeatures: layering,fast-diff,object-map,deep-flatten,exclusive-lock
745 |     #      imageFeatures: layering
746 |     #
747 |     #    storageClass:
748 |     #      provisioner: rook-ceph.rbd.csi.ceph.com # csi-provisioner-name
749 |     #      enabled: true
750 |     #      name:  rook-ceph-block
751 |     #      isDefault: false
752 |     #      allowVolumeExpansion: true
753 |     #      reclaimPolicy: Retain
754 | 
755 |     # -- CSI driver name prefix for cephfs, rbd and nfs.
756 |     # @default -- `namespace name where rook-ceph operator is deployed`
757 |     csiDriverNamePrefix:
758 | 


--------------------------------------------------------------------------------
/clusters/home-cluster/rook-ceph-system/rook-operator.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: source.toolkit.fluxcd.io/v1
 2 | kind: HelmRepository
 3 | metadata:
 4 |   name: rook
 5 |   namespace: flux-system
 6 | spec:
 7 |   interval: 1m
 8 |   url: https://charts.rook.io/release
 9 | ---
10 | apiVersion: helm.toolkit.fluxcd.io/v2
11 | kind: HelmRelease
12 | metadata:
13 |   name: rook-ceph-operator
14 |   namespace: rook-system
15 | spec:
16 |   interval: 10m
17 |   install: 
18 |     createNamespace: true
19 |   chart:
20 |     spec:
21 |       chart: rook-ceph
22 |       version: 'v1.14.5'
23 |       sourceRef:
24 |         kind: HelmRepository
25 |         name: rook
26 |         namespace: flux-system
27 |       interval: 1m
28 | 


--------------------------------------------------------------------------------
/scripts/delete_deploy_keys.py:
--------------------------------------------------------------------------------
 1 | import requests
 2 | import logging
 3 | import os
 4 | 
 5 | # Configure logging
 6 | logging.basicConfig(level=logging.DEBUG)
 7 | logger = logging.getLogger(__name__)
 8 | 
 9 | def delete_deploy_keys(repo_name, owner, token):
10 |     # Get all deploy keys for the repository
11 |     deploy_keys_url = f'https://api.github.com/repos/{owner}/{repo_name}/keys'
12 |     response = requests.get(deploy_keys_url, headers={'Authorization': f'Bearer {token}'})
13 | 
14 |     if response.status_code == 200:
15 |         deploy_keys = response.json()
16 |         for key in deploy_keys:
17 |             key_id = key['id']
18 |             # Attempt to delete each deploy key
19 |             delete_key_url = f'https://api.github.com/repos/{owner}/{repo_name}/keys/{key_id}'
20 |             delete_response = requests.delete(delete_key_url, headers={'Authorization': f'Bearer {token}'})
21 |             if delete_response.status_code == 204:
22 |                 logger.info(f"Deploy key {key_id} deleted successfully.")
23 |             else:
24 |                 logger.error(f"Failed to delete deploy key {key_id}. Status code: {delete_response.status_code}, Response: {delete_response.text}")
25 |     else:
26 |         logger.error(f"Failed to retrieve deploy keys for repository {repo_name}. Status code: {response.status_code}, Response: {response.text}")
27 | 
28 | def get_all_repositories(username, token):
29 |     repositories_url = f'https://api.github.com/users/{username}/repos'
30 |     response = requests.get(repositories_url, headers={'Authorization': f'Bearer {token}'})
31 | 
32 |     if response.status_code == 200:
33 |         repositories = response.json()
34 |         for repo in repositories:
35 |             repo_name = repo['name']
36 |             owner = repo['owner']['login']
37 |             delete_deploy_keys(repo_name, owner, token)
38 |     else:
39 |         logger.error(f"Failed to retrieve repositories for user {username}. Status code: {response.status_code}, Response: {response.text}")
40 | 
41 | def main():
42 |     username = 'pavan-kumar-99'
43 |     token = os.getenv('GITHUB_TOKEN')
44 | 
45 |     get_all_repositories(username, token)
46 | 
47 | if __name__ == '__main__':
48 |     main()
49 | 


--------------------------------------------------------------------------------
/scripts/medium/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM eclipse-temurin:11-jre-focal
 2 | 
 3 | # SCALA = 2.12.15
 4 | # PY - 3.9.17
 5 | # openjdk 17.0.8 2023-07-18 LTS
 6 | 
 7 | ARG spark_uid=185
 8 | 
 9 | RUN groupadd --system --gid=${spark_uid} spark && \
10 |     useradd --system --uid=${spark_uid} --gid=spark spark
11 | 
12 | RUN set -ex && \
13 |     apt-get update && \
14 |     ln -s /lib /lib64 && \
15 |     apt install -y gnupg2 wget bash tini libc6 libpam-modules krb5-user libnss3 procps net-tools gosu && \
16 |     apt install -y python3 python3-pip && \
17 |     mkdir -p /opt/spark && \
18 |     mkdir /opt/spark/python && \
19 |     mkdir -p /opt/spark/examples && \
20 |     mkdir -p /opt/spark/work-dir && \
21 |     touch /opt/spark/RELEASE && \
22 |     chown -R spark:spark /opt/spark && \
23 |     rm /bin/sh && \
24 |     ln -sv /bin/bash /bin/sh && \
25 |     echo "auth required pam_wheel.so use_uid" >> /etc/pam.d/su && \
26 |     chgrp root /etc/passwd && chmod ug+rw /etc/passwd && \
27 |     rm -rf /var/cache/apt/* && \
28 |     rm -rf /var/lib/apt/lists/*
29 | 
30 | # Install Apache Spark
31 | # https://downloads.apache.org/spark/KEYS
32 | ENV SPARK_TGZ_URL=https://archive.apache.org/dist/spark/spark-3.3.2/spark-3.3.2-bin-hadoop3.tgz \
33 |     SPARK_TGZ_ASC_URL=https://archive.apache.org/dist/spark/spark-3.3.2/spark-3.3.2-bin-hadoop3.tgz.asc \
34 |     GPG_KEY=C56349D886F2B01F8CAE794C653C2301FEA493EE
35 | 
36 | RUN set -ex; \
37 |     export SPARK_TMP="$(mktemp -d)"; \
38 |     cd $SPARK_TMP; \
39 |     wget -nv -O spark.tgz "$SPARK_TGZ_URL"; \
40 |     wget -nv -O spark.tgz.asc "$SPARK_TGZ_ASC_URL"; \
41 |     export GNUPGHOME="$(mktemp -d)"; \
42 |     gpg --keyserver hkps://keys.openpgp.org --recv-key "$GPG_KEY" || \
43 |     gpg --keyserver hkps://keyserver.ubuntu.com --recv-keys "$GPG_KEY"; \
44 |     gpg --batch --verify spark.tgz.asc spark.tgz; \
45 |     gpgconf --kill all; \
46 |     rm -rf "$GNUPGHOME" spark.tgz.asc; \
47 |     \
48 |     tar -xf spark.tgz --strip-components=1; \
49 |     chown -R spark:spark .; \
50 |     mv jars /opt/spark/; \
51 |     mv bin /opt/spark/; \
52 |     mv sbin /opt/spark/; \
53 |     mv kubernetes/dockerfiles/spark/decom.sh /opt/; \
54 |     mv examples /opt/spark/; \
55 |     mv kubernetes/tests /opt/spark/; \
56 |     mv data /opt/spark/; \
57 |     mv python/pyspark /opt/spark/python/pyspark/; \
58 |     mv python/lib /opt/spark/python/lib/; \
59 |     cd ..; \
60 |     rm -rf "$SPARK_TMP";
61 | 
62 | COPY entrypoint.sh /opt/
63 | 
64 | COPY articles.json /opt/
65 | 
66 | ENV SPARK_HOME /opt/spark
67 | 
68 | RUN curl https://repo1.maven.org/maven2/org/mongodb/spark/mongo-spark-connector_2.12/10.3.0/mongo-spark-connector_2.12-10.3.0.jar --output /opt/spark/jars/mongo-spark-connector_2.12-10.3.0.jar
69 | 
70 | RUN curl https://repo1.maven.org/maven2/org/mongodb/mongodb-driver-sync/4.8.1/mongodb-driver-sync-4.8.1.jar --output /opt/spark/jars/mongodb-driver-sync-4.8.1.jar
71 | 
72 | RUN curl https://repo1.maven.org/maven2/org/mongodb/bson/4.8.1/bson-4.8.1.jar --output /opt/spark/jars/bson-4.8.1.jar
73 | 
74 | RUN curl https://repo1.maven.org/maven2/org/mongodb/mongodb-driver-core/4.8.1/mongodb-driver-core-4.8.1.jar --output /opt/spark/jars/mongodb-driver-core-4.8.1.jar
75 | 
76 | RUN curl https://repo1.maven.org/maven2/org/apache/iceberg/iceberg-spark-runtime-3.3_2.12/1.5.2/iceberg-spark-runtime-3.3_2.12-1.5.2.jar --output /opt/spark/jars/iceberg-spark-runtime-3.3_2.12-1.5.2.jar
77 | 
78 | RUN curl https://repo1.maven.org/maven2/org/mongodb/bson-record-codec/4.8.1/bson-record-codec-4.8.1.jar --output /opt/spark/jars/bson-record-codec-4.8.1.jar
79 | 
80 | RUN curl https://repo1.maven.org/maven2/org/apache/iceberg/iceberg-aws-bundle/1.4.3/iceberg-aws-bundle-1.4.3.jar --output /opt/spark/jars/iceberg-aws-bundle-1.4.3.jar
81 | 
82 | RUN chown -R spark:spark /opt/spark/jars
83 | 
84 | RUN pip install requests boto3 botocore
85 | 
86 | WORKDIR /opt/spark/work-dir
87 | RUN chmod g+w /opt/spark/work-dir
88 | RUN chmod a+x /opt/decom.sh
89 | RUN chmod a+x /opt/entrypoint.sh
90 | 
91 | ENTRYPOINT [ "/opt/entrypoint.sh" ]
92 | 


--------------------------------------------------------------------------------
/scripts/medium/articles.json:
--------------------------------------------------------------------------------
  1 | [
  2 |     {
  3 |         "article_id": "b09e698f88c9",
  4 |         "title": "Key Open-Source Tools Unveiled at KubeHuddle Toronto",
  5 |         "url": "https://medium.com/@pavan1999-kumar/key-open-source-tools-unveiled-at-kubehuddle-toronto-b09e698f88c9"
  6 |     },
  7 |     {
  8 |         "article_id": "dc3527552452",
  9 |         "title": "Introduction to Apache Iceberg | PySpark",
 10 |         "url": "https://pavan1999-kumar.medium.com/introduction-to-apache-iceberg-dc3527552452"
 11 |     },
 12 |     {
 13 |         "article_id": "feaf522d345a",
 14 |         "title": "Introduction to OpenCost",
 15 |         "url": "https://levelup.gitconnected.com/introduction-to-opencost-feaf522d345a"
 16 |     },
 17 |     {
 18 |         "article_id": "10720df9c842",
 19 |         "title": "Kubernetes Policies as code using Kyverno",
 20 |         "url": "https://levelup.gitconnected.com/kubernetes-policies-as-code-using-kyverno-10720df9c842"
 21 |     },
 22 |     {
 23 |         "article_id": "dddf43e38cc2",
 24 |         "title": "Introduction to Helm Dashboard",
 25 |         "url": "https://levelup.gitconnected.com/introduction-to-helm-dashboard-dddf43e38cc2"
 26 |     },
 27 |     {
 28 |         "article_id": "9c88686fec",
 29 |         "title": "How to guess the right size for your Kubernetes Pods?",
 30 |         "url": "https://levelup.gitconnected.com/how-to-guess-the-right-size-for-your-kubernetes-pods-9c88686fec"
 31 |     },
 32 |     {
 33 |         "article_id": "520a05439300",
 34 |         "title": "How I reduced the size of my Docker Image by 95%",
 35 |         "url": "https://levelup.gitconnected.com/how-i-reduced-the-size-of-my-docker-image-by-95-520a05439300"
 36 |     },
 37 |     {
 38 |         "article_id": "8d358d064bfd",
 39 |         "title": "Policies as Code in Kubernetes using jsPolicy",
 40 |         "url": "https://medium.com/nerd-for-tech/policies-as-code-in-kubernetes-using-jspolicy-8d358d064bfd"
 41 |     },
 42 |     {
 43 |         "article_id": "fc06508a3f0d",
 44 |         "title": "MlOps: Machine learning Pipelines using kubeflow",
 45 |         "url": "https://medium.com/nerd-for-tech/mlops-machine-learning-pipelines-using-kubeflow-fc06508a3f0d"
 46 |     },
 47 |     {
 48 |         "article_id": "ec917d4a2672",
 49 |         "title": "HashicVault Secrets in Kubernetes with CSI Driver",
 50 |         "url": "https://pavan1999-kumar.medium.com/hashicvault-secrets-in-kubernetes-with-csi-driver-ec917d4a2672"
 51 |     },
 52 |     {
 53 |         "article_id": "dee6513a7206",
 54 |         "title": "Multi-Tenancy in Kubernetes using Loft\u2019s Vcluster",
 55 |         "url": "https://medium.com/nerd-for-tech/multi-tenancy-in-kubernetes-using-lofts-vcluster-dee6513a7206"
 56 |     },
 57 |     {
 58 |         "article_id": "d97482b48f3d",
 59 |         "title": "PKI Certs Injection to K8s Pods with Vault Agent Injector",
 60 |         "url": "https://medium.com/nerd-for-tech/pki-certs-injection-to-k8s-pods-with-vault-agent-injector-d97482b48f3d"
 61 |     },
 62 |     {
 63 |         "article_id": "c28dc6c981c9",
 64 |         "title": "Analyze Terraform costs with Infracost ( The GitOps Way )",
 65 |         "url": "https://medium.com/nerd-for-tech/terraforming-the-cost-with-infracost-c28dc6c981c9"
 66 |     },
 67 |     {
 68 |         "article_id": "9417cf4abf58",
 69 |         "title": "Terraforming the GitOps Way\u00a0!!!",
 70 |         "url": "https://medium.com/nerd-for-tech/terraforming-the-gitops-way-9417cf4abf58"
 71 |     },
 72 |     {
 73 |         "article_id": "9e19d7239d3d",
 74 |         "title": "Using Hashicorp Vault as a Certificate issuer in Cert Manager",
 75 |         "url": "https://medium.com/nerd-for-tech/using-hashicorp-vault-as-a-certificate-issuer-in-cert-manager-9e19d7239d3d"
 76 |     },
 77 |     {
 78 |         "article_id": "666f74cb781a",
 79 |         "title": "Deep Dive into Cortex Metrics Part II",
 80 |         "url": "https://medium.com/nerd-for-tech/deep-dive-into-cortex-metrics-part-ii-666f74cb781a"
 81 |     },
 82 |     {
 83 |         "article_id": "c228e01f8c58",
 84 |         "title": "Deep Dive into Cortex Metrics \u2014 Part I",
 85 |         "url": "https://medium.com/nerd-for-tech/deep-dive-into-cortex-part-i-c228e01f8c58"
 86 |     },
 87 |     {
 88 |         "article_id": "6172a023f542",
 89 |         "title": "Kubernetes Cluster Autoscaler in Action",
 90 |         "url": "https://medium.com/nerd-for-tech/kubernetes-cluster-autoscaler-in-action-6172a023f542"
 91 |     },
 92 |     {
 93 |         "article_id": "3bb2eb0c0872",
 94 |         "title": "Logging at Scale in Kubernetes using Grafana Loki",
 95 |         "url": "https://medium.com/nerd-for-tech/logging-at-scale-in-kubernetes-using-grafana-loki-3bb2eb0c0872"
 96 |     },
 97 |     {
 98 |         "article_id": "f8ce91d319b9",
 99 |         "title": "Running Apache Spark on EKS with AWS Spot Instances",
100 |         "url": "https://medium.com/nerd-for-tech/running-apache-spark-on-eks-with-aws-spot-instances-f8ce91d319b9"
101 |     },
102 |     {
103 |         "article_id": "8ef83b3f2f89",
104 |         "title": "Going Serverless in Kubernetes using Kubeless",
105 |         "url": "https://medium.com/nerd-for-tech/going-serverless-in-kubernetes-using-kubeless-8ef83b3f2f89"
106 |     },
107 |     {
108 |         "article_id": "6fb65ac63d5",
109 |         "title": "Free and Automatic SSL Certificates in Kubernetes using Cert Manager",
110 |         "url": "https://medium.com/nerd-for-tech/free-and-automatic-ssl-certificates-in-kubernetes-using-cert-manager-6fb65ac63d5"
111 |     },
112 |     {
113 |         "article_id": "431c1587ef0a",
114 |         "title": "Chaos Engineering in Kubernetes using Chaos Mesh",
115 |         "url": "https://medium.com/nerd-for-tech/chaos-engineering-in-kubernetes-using-chaos-mesh-431c1587ef0a"
116 |     },
117 |     {
118 |         "article_id": "f72ecba39f76",
119 |         "title": "Deep Dive into Thanos-Part I",
120 |         "url": "https://medium.com/nerd-for-tech/deep-dive-into-thanos-part-i-f72ecba39f76"
121 |     },
122 |     {
123 |         "article_id": "8f48b8bba132",
124 |         "title": "Deep Dive into Thanos-Part II",
125 |         "url": "https://medium.com/nerd-for-tech/deep-dive-into-thanos-part-ii-8f48b8bba132"
126 |     },
127 |     {
128 |         "article_id": "6765bf44ebc6",
129 |         "title": "Kubernetes Security with Kube-bench and Kube-hunter",
130 |         "url": "https://www.techmanyu.com/kubernetes-security-with-kube-bench-and-kube-hunter-6765bf44ebc6"
131 |     },
132 |     {
133 |         "article_id": "d57fc2548043",
134 |         "title": "Network Policies demystified in Kubernetes",
135 |         "url": "https://medium.com/nerd-for-tech/network-policies-demystified-in-kubernetes-d57fc2548043"
136 |     },
137 |     {
138 |         "article_id": "ab61a2177950",
139 |         "title": "AutoScaling in Kubernetes ( HPA / VPA )",
140 |         "url": "https://medium.com/nerd-for-tech/autoscaling-in-kubernetes-hpa-vpa-ab61a2177950"
141 |     },
142 |     {
143 |         "article_id": "fd05560de34a",
144 |         "title": "Creating Self Hosted GitHub runners in a Kubernetes Cluster",
145 |         "url": "https://www.techmanyu.com/creating-self-hosted-github-runners-in-a-kubernetes-cluster-fd05560de34a"
146 |     },
147 |     {
148 |         "article_id": "ef99f7a4e417",
149 |         "title": "Deploying RabbitMQ on Kubernetes using RabbitMQ Cluster Operator",
150 |         "url": "https://medium.com/nerd-for-tech/deploying-rabbitmq-on-kubernetes-using-rabbitmq-cluster-operator-ef99f7a4e417"
151 |     },
152 |     {
153 |         "article_id": "2f873ae0f9f3",
154 |         "title": "Introduction to Crossplane",
155 |         "url": "https://medium.com/nerd-for-tech/introduction-to-crossplane-2f873ae0f9f3"
156 |     },
157 |     {
158 |         "article_id": "bb5ae74d9a25",
159 |         "title": "Introduction to Bitnami Sealed Secrets",
160 |         "url": "https://faun.pub/introduction-to-bitnami-sealed-secrets-bb5ae74d9a25"
161 |     },
162 |     {
163 |         "article_id": "654aa4cf38e6",
164 |         "title": "Introduction to External DNS in Kubernetes",
165 |         "url": "https://faun.pub/introduction-to-external-dns-in-kubernetes-654aa4cf38e6"
166 |     },
167 |     {
168 |         "article_id": "f4cb7ebc2e0b",
169 |         "title": "Introduction to Jenkins Operator",
170 |         "url": "https://medium.com/swlh/introduction-to-jenkins-operator-f4cb7ebc2e0b"
171 |     },
172 |     {
173 |         "article_id": "dd34e2de50a6",
174 |         "title": "Creating a GKE Cluster with GitHub Actions",
175 |         "url": "https://medium.com/nerd-for-tech/creating-a-gke-cluster-with-github-actions-dd34e2de50a6"
176 |     },
177 |     {
178 |         "article_id": "b364a67437b7",
179 |         "title": "Create a Kubernetes Cluster using Kind",
180 |         "url": "https://medium.com/nerd-for-tech/create-a-kubernetes-cluster-using-kind-b364a67437b7"
181 |     },
182 |     {
183 |         "article_id": "a9d171b11917",
184 |         "title": "Deploying Applications in Kubernetes Using Flux",
185 |         "url": "https://medium.com/swlh/deploying-applications-in-kubernetes-using-flux-a9d171b11917"
186 |     },
187 |     {
188 |         "article_id": "97f990dc2f44",
189 |         "title": "Introduction to Kustomize",
190 |         "url": "https://faun.pub/introduction-to-kustomize-97f990dc2f44"
191 |     },
192 |     {
193 |         "article_id": "ab004a8cdb5e",
194 |         "title": "Deploying applications in Kubernetes using Argo CD",
195 |         "url": "https://medium.com/nerd-for-tech/deploying-applications-in-kubernetes-using-argo-cd-ab004a8cdb5e"
196 |     },
197 |     {
198 |         "article_id": "bb9ab466a1a0",
199 |         "title": "Ingress Gateway in Istio",
200 |         "url": "https://medium.com/nerd-for-tech/ingress-gateway-in-istio-bb9ab466a1a0"
201 |     },
202 |     {
203 |         "article_id": "970f0666b867",
204 |         "title": "MTLS in Istio",
205 |         "url": "https://medium.com/nerd-for-tech/mtls-in-istio-970f0666b867"
206 |     },
207 |     {
208 |         "article_id": "36f8c4d32fe8",
209 |         "title": "Mirroring of Live Traffic in Kubernetes using Istio Traffic Mirroring",
210 |         "url": "https://medium.com/nerd-for-tech/mirroring-of-live-traffic-in-kubernetes-using-istio-traffic-mirroring-36f8c4d32fe8"
211 |     },
212 |     {
213 |         "article_id": "5d9bdb495032",
214 |         "title": "Weighted routing in Kubernetes using Istio",
215 |         "url": "https://www.techmanyu.com/weighted-routing-in-kubernetes-using-istio-5d9bdb495032"
216 |     },
217 |     {
218 |         "article_id": "1557db1cd62d",
219 |         "title": "How to Install Istio using istioctl",
220 |         "url": "https://www.techmanyu.com/how-to-install-istio-using-istioctl-1557db1cd62d"
221 |     },
222 |     {
223 |         "article_id": "2bc68d2ffdac",
224 |         "title": "Introduction to Istio Service Mesh",
225 |         "url": "https://www.techmanyu.com/introduction-to-istio-service-mesh-2bc68d2ffdac"
226 |     },
227 |     {
228 |         "article_id": "7640d34ad102",
229 |         "title": "Securing your secrets using vault in Kubernetes\u200a\u2014\u200aPart 2",
230 |         "url": "https://faun.pub/securing-your-secrets-using-vault-in-kubernetes-part-2-7640d34ad102"
231 |     },
232 |     {
233 |         "article_id": "de3d7378e226",
234 |         "title": "Securing your secrets using vault-k8s in Kubernetes\u200a\u2014\u200aPart 1",
235 |         "url": "https://faun.pub/securing-your-secrets-using-vault-k8s-in-kubernetes-part-1-de3d7378e226"
236 |     }
237 | ]
238 | 


--------------------------------------------------------------------------------
/scripts/medium/driver.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: v1
 2 | kind: Pod
 3 | metadata:
 4 |   name: spark-pod
 5 | spec:
 6 |   containers:
 7 |     - name: spark-driver
 8 |       image: greypavan/medium-manifests:medium-stats
 9 |       envFrom:
10 |       - secretRef:
11 |           name:  medium-creds
12 |       volumeMounts:
13 |         - name: stats-pvc
14 |           mountPath: /usr/stats
15 |   volumes:
16 |     - name: stats-pvc
17 |       persistentVolumeClaim:
18 |         claimName: stats-pvc
19 | 


--------------------------------------------------------------------------------
/scripts/medium/entrypoint.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # Check whether there is a passwd entry for the container UID
 3 | myuid=$(id -u)
 4 | mygid=$(id -g)
 5 | # turn off -e for getent because it will return error code in anonymous uid case
 6 | set +e
 7 | uidentry=$(getent passwd $myuid)
 8 | set -e
 9 | 
10 | # If there is no passwd entry for the container UID, attempt to create one
11 | if [ -z "$uidentry" ] ; then
12 |     if [ -w /etc/passwd ] ; then
13 |         echo "$myuid:x:$myuid:$mygid:${SPARK_USER_NAME:-anonymous uid}:$SPARK_HOME:/bin/false" >> /etc/passwd
14 |     else
15 |         echo "Container ENTRYPOINT failed to add passwd entry for anonymous UID"
16 |     fi
17 | fi
18 | 
19 | if [ -z "$JAVA_HOME" ]; then
20 |   JAVA_HOME=$(java -XshowSettings:properties -version 2>&1 > /dev/null | grep 'java.home' | awk '{print $3}')
21 | fi
22 | 
23 | SPARK_CLASSPATH="$SPARK_CLASSPATH:${SPARK_HOME}/jars/*"
24 | env | grep SPARK_JAVA_OPT_ | sort -t_ -k4 -n | sed 's/[^=]*=\(.*\)/\1/g' > /tmp/java_opts.txt
25 | readarray -t SPARK_EXECUTOR_JAVA_OPTS < /tmp/java_opts.txt
26 | 
27 | if [ -n "$SPARK_EXTRA_CLASSPATH" ]; then
28 |   SPARK_CLASSPATH="$SPARK_CLASSPATH:$SPARK_EXTRA_CLASSPATH"
29 | fi
30 | 
31 | if ! [ -z ${PYSPARK_PYTHON+x} ]; then
32 |     export PYSPARK_PYTHON
33 | fi
34 | if ! [ -z ${PYSPARK_DRIVER_PYTHON+x} ]; then
35 |     export PYSPARK_DRIVER_PYTHON
36 | fi
37 | 
38 | # If HADOOP_HOME is set and SPARK_DIST_CLASSPATH is not set, set it here so Hadoop jars are available to the executor.
39 | # It does not set SPARK_DIST_CLASSPATH if already set, to avoid overriding customizations of this value from elsewhere e.g. Docker/K8s.
40 | if [ -n "${HADOOP_HOME}"  ] && [ -z "${SPARK_DIST_CLASSPATH}"  ]; then
41 |   export SPARK_DIST_CLASSPATH="$($HADOOP_HOME/bin/hadoop classpath)"
42 | fi
43 | 
44 | if ! [ -z ${HADOOP_CONF_DIR+x} ]; then
45 |   SPARK_CLASSPATH="$HADOOP_CONF_DIR:$SPARK_CLASSPATH";
46 | fi
47 | 
48 | if ! [ -z ${SPARK_CONF_DIR+x} ]; then
49 |   SPARK_CLASSPATH="$SPARK_CONF_DIR:$SPARK_CLASSPATH";
50 | elif ! [ -z ${SPARK_HOME+x} ]; then
51 |   SPARK_CLASSPATH="$SPARK_HOME/conf:$SPARK_CLASSPATH";
52 | fi
53 | 
54 | case "$1" in
55 |   driver)
56 |     shift 1
57 |     CMD=(
58 |       "$SPARK_HOME/bin/spark-submit"
59 |       --conf "spark.driver.bindAddress=$SPARK_DRIVER_BIND_ADDRESS"
60 |       --deploy-mode client
61 |       "$@"
62 |     )
63 |     ;;
64 |   executor)
65 |     shift 1
66 |     CMD=(
67 |       ${JAVA_HOME}/bin/java
68 |       "${SPARK_EXECUTOR_JAVA_OPTS[@]}"
69 |       -Xms$SPARK_EXECUTOR_MEMORY
70 |       -Xmx$SPARK_EXECUTOR_MEMORY
71 |       -cp "$SPARK_CLASSPATH:$SPARK_DIST_CLASSPATH"
72 |       org.apache.spark.scheduler.cluster.k8s.KubernetesExecutorBackend
73 |       --driver-url $SPARK_DRIVER_URL
74 |       --executor-id $SPARK_EXECUTOR_ID
75 |       --cores $SPARK_EXECUTOR_CORES
76 |       --app-id $SPARK_APPLICATION_ID
77 |       --hostname $SPARK_EXECUTOR_POD_IP
78 |       --resourceProfileId $SPARK_RESOURCE_PROFILE_ID
79 |       --podName $SPARK_EXECUTOR_POD_NAME
80 |     )
81 |     ;;
82 | 
83 |   *)
84 |     # Non-spark-on-k8s command provided, proceeding in pass-through mode...
85 |     CMD=("$@")
86 |     ;;
87 | esac
88 | 
89 | # Switch to spark if no USER specified (root by default) otherwise use USER directly
90 | 
91 | # Execute the container CMD under tini for better hygiene
92 | /usr/bin/tini -s -- "${CMD[@]}"
93 | 


--------------------------------------------------------------------------------
/scripts/medium/executor.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: v1
 2 | kind: Pod
 3 | metadata:
 4 |   name: spark-executor
 5 | spec:
 6 |   containers:
 7 |     - name: spark-executor
 8 |       image: greypavan/medium-manifests:medium-stats
 9 |       envFrom:
10 |       - secretRef:
11 |           name:  medium-creds
12 |       volumeMounts:
13 |         - name: stats-pvc
14 |           mountPath: /usr/stats
15 |   volumes:
16 |     - name: stats-pvc
17 |       persistentVolumeClaim:
18 |         claimName: stats-pvc
19 | 


--------------------------------------------------------------------------------
/scripts/medium/medium-articles-info.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import requests
 3 | import json
 4 | from medium_api import Medium
 5 | 
 6 | api_key = os.getenv('API_KEY')
 7 | 
 8 | medium = Medium(api_key)
 9 | 
10 | user = medium.user(username="pavan1999-kumar")
11 | 
12 | user.fetch_articles()
13 | 
14 | articles_data = []
15 | 
16 | for article in user.articles:
17 |     article_data = {
18 |         "article_id": article.article_id,
19 |         "title": article.title,
20 |         "url": article.url
21 |     }
22 |     articles_data.append(article_data)
23 | 
24 | json_data = json.dumps(articles_data, indent=4)
25 | 
26 | print(json_data)
27 | 


--------------------------------------------------------------------------------
/scripts/medium/medium-stats-spark-driver.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import boto3
  3 | import os
  4 | import botocore
  5 | from pyspark import SparkConf
  6 | from pyspark.sql import SparkSession
  7 | from pyspark.sql.functions import *
  8 | from pyspark.sql.types import *
  9 | 
 10 | 
 11 | ## Define a function to download files from S3
 12 | def download_from_s3(bucket_name, key, local_path):
 13 |     """
 14 |     Download a file from Amazon S3 to the local file system.
 15 | 
 16 |     Args:
 17 |     - bucket_name: The name of the S3 bucket.
 18 |     - key: The key (path) of the file in the S3 bucket.
 19 |     - local_path: The local path where the file will be downloaded.
 20 |     """
 21 |     s3 = boto3.resource("s3",endpoint_url="http://minio-server.minio:9000")
 22 |     try:
 23 |         bucket = s3.Bucket(bucket_name)
 24 |         for obj in bucket.objects.filter(Prefix=key):
 25 |             target = (
 26 |                 obj.key
 27 |                 if local_path is None
 28 |                 else os.path.join(local_path, os.path.relpath(obj.key, key))
 29 |             )
 30 |             if not os.path.exists(os.path.dirname(target)):
 31 |                 os.makedirs(os.path.dirname(target))
 32 |             if obj.key[-1] == "/":
 33 |                 continue
 34 |             bucket.download_file(obj.key, target)
 35 |             print("Object Downloaded", obj.key)
 36 |     except botocore.exceptions.ClientError as e:
 37 |         if e.response["Error"]["Code"] == "404":
 38 |             print(f"The object does not exist: s3://{bucket_name}/{key}")
 39 |         else:
 40 |             raise
 41 | 
 42 | 
 43 | def process_data(bucket_name, bucket_prefix, local_path):
 44 |     """
 45 |     Process data from JSON files and return the joined DataFrame.
 46 | 
 47 |     Args:
 48 |     - bucket_name: Name of the S3 bucket.
 49 |     - bucket_prefix: Key (path) of the file in the S3 bucket.
 50 |     - local_path: Local path where the file will be downloaded.
 51 | 
 52 |     Returns:
 53 |     - Joined DataFrame.
 54 |     - Spark Session.
 55 |     """
 56 |     catalog_name = "medium_stats"
 57 |     iceberg_bucket_name = bucket_name
 58 |     iceberg_bucket_prefix = bucket_prefix
 59 |     warehouse_path = f"s3a://{iceberg_bucket_name}/{iceberg_bucket_prefix}"
 60 |     print("Warehouse path is",warehouse_path)
 61 |     mongodb_uri = os.getenv("MONGO_URI")
 62 |     access_key = os.getenv("AWS_ACCESS_KEY_ID")
 63 |     secret_key = os.getenv("AWS_SECRET_ACCESS_KEY")
 64 |     
 65 |     if not mongodb_uri:
 66 |         raise ValueError("MONGO_URI environment variable is not set")
 67 | 
 68 |     # Initialize Spark session
 69 |     spark = (
 70 |         SparkSession.builder.appName("Medium Stats Pavan")
 71 |         .config(
 72 |             "spark.jars",
 73 |             "/opt/spark/jars/mongo-spark-connector_2.12-10.3.0.jar,/opt/spark/jars/iceberg-spark-runtime-3.3_2.12-1.5.2.jar,/opt/spark/jars/iceberg-aws-bundle-1.4.3.jar,/opt/spark/jars/mongodb-driver-sync-4.8.1.jar,/opt/spark/jars/bson-4.8.1.jar,/opt/spark/jars/mongodb-driver-core-4.8.1.jar,/opt/spark/jars/bson-record-codec-4.8.1.jar",
 74 |         )
 75 |         .config(
 76 |             f"spark.sql.catalog.{catalog_name}", "org.apache.iceberg.spark.SparkCatalog"
 77 |         )
 78 |         .config(f"spark.sql.catalog.{catalog_name}.warehouse", f"{warehouse_path}")
 79 |         .config(
 80 |             f"spark.sql.catalog.{catalog_name}.type",
 81 |             "hadoop",
 82 |         )
 83 |         .config(f"spark.sql.defaultCatalog", f"{catalog_name}")
 84 |         .config(
 85 |             "spark.sql.extensions",
 86 |             "org.apache.iceberg.spark.extensions.IcebergSparkSessionExtensions",
 87 |         )
 88 |         .config(
 89 |             "spark.mongodb.write.connection.uri",
 90 |             f"{mongodb_uri}",
 91 |         )
 92 |         .config(
 93 |             "spark.hadoop.fs.s3a.endpoint",
 94 |             "http://10.100.44.242:9000/",
 95 |         )
 96 |         .config(
 97 |             "spark.sql.catalogImplementation",
 98 |             "in-memory",
 99 |         )
100 |         .config(
101 |             "spark.hadoop.fs.s3a.access.key",
102 |             f"{access_key}",
103 |         )
104 |         .config(
105 |             "spark.hadoop.fs.s3a.secret.key",
106 |             f"{secret_key}",
107 |         )
108 |         .getOrCreate()
109 |     )
110 | 
111 |     spark.sparkContext.setLogLevel("OFF")
112 | 
113 |     statsSchema = StructType(
114 |         [
115 |             StructField("dayStartsAt", LongType(), True),
116 |             StructField("Date", LongType(), True),
117 |             StructField("readersThatClappedCount", LongType(), True),
118 |             StructField("readersThatReadCount", LongType(), True),
119 |             StructField("readersThatRepliedCount", LongType(), True),
120 |             StructField("readersThatViewedCount", LongType(), True),
121 |             StructField("article_id", StringType(), True),
122 |         ]
123 |     )
124 | 
125 |     articlesSchema = StructType(
126 |         [
127 |             StructField("article_id", StringType(), True),
128 |             StructField("title", StringType(), True),
129 |             StructField("url", StringType(), True),
130 |         ]
131 |     )
132 | 
133 |     # Read JSON files into DataFrames
134 |     script_dir = os.path.dirname(os.path.abspath(__file__))
135 |     file_path = os.path.join(script_dir, "articles.json")
136 |     df = (
137 |         spark.read.option("multiLine", "true")
138 |         .option("mode", "PERMISSIVE")
139 |         .schema(statsSchema)
140 |         .json(local_path)
141 |     )
142 | 
143 |     df_posts = (
144 |         spark.read.option("multiLine", "true")
145 |         .option("mode", "PERMISSIVE")
146 |         .schema(articlesSchema)
147 |         .json("/opt/articles.json")
148 |     )
149 | 
150 |     # Data transformations
151 |     df = df.dropDuplicates()  # Drop duplicate rows
152 |     df_posts = df_posts.dropDuplicates()
153 |     df = df.withColumn(
154 |         "Date",
155 |         to_date(from_unixtime(floor(col("dayStartsAt") / 1000), "yyyy-MM-dd")),
156 |     )
157 |     columns_to_drop = [
158 |         "__typename",
159 |         "membershipType",
160 |         "dayStartsAt",
161 |         "readersThatInitiallyFollowedAuthorFromThisPostCount",
162 |         "readersThatHighlightedCount",
163 |     ]
164 |     df = df.drop(*columns_to_drop)
165 | 
166 |     # Join operation
167 |     df_join = (
168 |         df.join(df_posts, df["article_id"] == df_posts["article_id"], "inner")
169 |         .drop(df.article_id)
170 |         .withColumnRenamed("readersThatViewedCount", "Viewers")
171 |     )
172 | 
173 |     return df_join, spark
174 | 
175 | 
176 | def compute_yearly_statistics(spark, table_name):
177 |     """
178 |     Compute yearly statistics using Apache Spark.
179 | 
180 |     Args:
181 |     - spark: The SparkSession object.
182 |     - df: The DataFrame containing the data for computation.
183 |     """
184 | 
185 |     df = spark.sql(
186 |         f"""select title AS Title ,
187 |         Sum(Viewers) AS Total_Viewers,
188 |         FIRST(url) AS URL 
189 |         from {table_name} GROUP BY title ORDER BY Total_Viewers DESC;"""
190 |     )
191 |     df.show(100, truncate=False)
192 |     return df
193 | 
194 | 
195 | if __name__ == "__main__":
196 |     parser = argparse.ArgumentParser(
197 |         description="Process files from S3 and perform data transformations."
198 |     )
199 |     parser.add_argument("key", type=str, help="Key (path) of the file in the S3 bucket")
200 |     parser.add_argument(
201 |         "ingest_mode",
202 |         type=str,
203 |         choices=["append", "create"],
204 |         help="Ingestion mode: append or create",
205 |     )
206 |     args = parser.parse_args()
207 |     
208 |     catalog_name = "medium_stats"
209 |     db_name = "mediumstats"
210 |     table_name = "articles"
211 |     local_path = "/usr/stats/" + args.key
212 |     bucket_name = "medium-stats"
213 |     iceberg_bucket_name = "iceberg-table"
214 |     iceberg_bucket_prefix = "warehouse/"
215 |     temp_table_name = "mediumstatstemp"
216 | 
217 |     download_from_s3(bucket_name, args.key, local_path)
218 | 
219 |     df, spark = process_data(iceberg_bucket_name, iceberg_bucket_prefix, local_path)
220 | 
221 |     df.cache()
222 | 
223 |     if args.ingest_mode == "append":
224 |         df.createOrReplaceTempView(f"""{temp_table_name}""")
225 |         print("Merging Data to Iceberg")
226 |         spark.sql(
227 |             f"""
228 |                 MERGE INTO {catalog_name}.{db_name}.{table_name} a
229 |                 USING {temp_table_name} b
230 |                 on a.Date = b.Date
231 |                 WHEN NOT MATCHED THEN INSERT *;
232 |                 """
233 |         )
234 | 
235 |         df_yearly = compute_yearly_statistics(spark, temp_table_name)
236 |         print("Writing yearly stats to mongo")
237 |         df_yearly.write.format("mongodb").mode("overwrite").option(
238 |             "database", "medium"
239 |         ).option("collection", "yearlyStats").save()
240 | 
241 |     elif args.ingest_mode == "create":
242 |         spark.sql(
243 |             f"""
244 |                     CREATE TABLE IF NOT EXISTS {catalog_name}.{db_name}.{table_name} (
245 |                         Date date,
246 |                         readersThatClappedCount long,
247 |                         readersThatReadCount int,
248 |                         readersThatRepliedCount int,
249 |                         Viewers int,
250 |                         article_id string,
251 |                         title string,
252 |                         url string
253 |                         ) using iceberg
254 |                         PARTITIONED BY (year(Date),title);
255 |                     """
256 |         )
257 |         # df.writeTo(f"{catalog_name}.{db_name}.{table_name}").overwritePartitions()
258 |         df.writeTo(f"{catalog_name}.{db_name}.{table_name}")
259 |         print("Data created table", table_name)
260 |         print("Data writing to mongo")
261 |         df.write.format("mongodb").mode("overwrite").option(
262 |             "database", "medium"
263 |         ).option("collection", "allStats").save()
264 | 


--------------------------------------------------------------------------------